Merge branch 'next/dt' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/linux-arm-soc
* 'next/dt' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/linux-arm-soc: (21 commits)
arm/dt: tegra devicetree support
arm/versatile: Add device tree support
dt/irq: add irq_domain_generate_simple() helper
irq: add irq_domain translation infrastructure
dmaengine: imx-sdma: add device tree probe support
dmaengine: imx-sdma: sdma_get_firmware does not need to copy fw_name
dmaengine: imx-sdma: use platform_device_id to identify sdma version
mmc: sdhci-esdhc-imx: add device tree probe support
mmc: sdhci-pltfm: dt device does not pass parent to sdhci_alloc_host
mmc: sdhci-esdhc-imx: get rid of the uses of cpu_is_mx()
mmc: sdhci-esdhc-imx: do not reference platform data after probe
mmc: sdhci-esdhc-imx: extend card_detect and write_protect support for mx5
net/fec: add device tree probe support
net: ibm_newemac: convert it to use of_get_phy_mode
dt/net: add helper function of_get_phy_mode
net/fec: gasket needs to be enabled for some i.mx
serial/imx: add device tree probe support
serial/imx: get rid of the uses of cpu_is_mx1()
arm/dt: Add dtb make rule
arm/dt: Add skeleton dtsi file
...
diff --git a/Documentation/devicetree/bindings/gpio/gpio_keys.txt b/Documentation/devicetree/bindings/gpio/gpio_keys.txt
new file mode 100644
index 0000000..7190c99
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio_keys.txt
@@ -0,0 +1,36 @@
+Device-Tree bindings for input/gpio_keys.c keyboard driver
+
+Required properties:
+ - compatible = "gpio-keys";
+
+Optional properties:
+ - autorepeat: Boolean, Enable auto repeat feature of Linux input
+ subsystem.
+
+Each button (key) is represented as a sub-node of "gpio-keys":
+Subnode properties:
+
+ - gpios: OF devcie-tree gpio specificatin.
+ - label: Descriptive name of the key.
+ - linux,code: Keycode to emit.
+
+Optional subnode-properties:
+ - linux,input-type: Specify event type this button/key generates.
+ If not specified defaults to <1> == EV_KEY.
+ - debounce-interval: Debouncing interval time in milliseconds.
+ If not specified defaults to 5.
+ - gpio-key,wakeup: Boolean, button can wake-up the system.
+
+Example nodes:
+
+ gpio_keys {
+ compatible = "gpio-keys";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ autorepeat;
+ button@21 {
+ label = "GPIO Key UP";
+ linux,code = <103>;
+ gpios = <&gpio1 0 1>;
+ };
+ ...
diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt
new file mode 100644
index 0000000..2144af1
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt
@@ -0,0 +1,14 @@
+* Freescale i.MX Watchdog Timer (WDT) Controller
+
+Required properties:
+- compatible : Should be "fsl,<soc>-wdt"
+- reg : Should contain WDT registers location and length
+- interrupts : Should contain WDT interrupt
+
+Examples:
+
+wdt@73f98000 {
+ compatible = "fsl,imx51-wdt", "fsl,imx21-wdt";
+ reg = <0x73f98000 0x4000>;
+ interrupts = <58>;
+};
diff --git a/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt b/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt
new file mode 100644
index 0000000..79ead82
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt
@@ -0,0 +1,11 @@
+* Samsung's Watchdog Timer Controller
+
+The Samsung's Watchdog controller is used for resuming system operation
+after a preset amount of time during which the WDT reset event has not
+occured.
+
+Required properties:
+- compatible : should be "samsung,s3c2410-wdt"
+- reg : base physical address of the controller and length of memory mapped
+ region.
+- interrupts : interrupt number to the cpu.
diff --git a/Documentation/filesystems/nfs/Exporting b/Documentation/filesystems/nfs/Exporting
index 87019d2..09994c2 100644
--- a/Documentation/filesystems/nfs/Exporting
+++ b/Documentation/filesystems/nfs/Exporting
@@ -92,7 +92,14 @@
1/ provide the filehandle fragment routines described below.
2/ make sure that d_splice_alias is used rather than d_add
when ->lookup finds an inode for a given parent and name.
- Typically the ->lookup routine will end with a:
+
+ If inode is NULL, d_splice_alias(inode, dentry) is eqivalent to
+
+ d_add(dentry, inode), NULL
+
+ Similarly, d_splice_alias(ERR_PTR(err), dentry) = ERR_PTR(err)
+
+ Typically the ->lookup routine will simply end with a:
return d_splice_alias(inode, dentry);
}
diff --git a/Documentation/md.txt b/Documentation/md.txt
index f0eee83..fc94770 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -360,18 +360,20 @@
A file recording the current state of the device in the array
which can be a comma separated list of
faulty - device has been kicked from active use due to
- a detected fault
+ a detected fault or it has unacknowledged bad
+ blocks
in_sync - device is a fully in-sync member of the array
writemostly - device will only be subject to read
requests if there are no other options.
This applies only to raid1 arrays.
- blocked - device has failed, metadata is "external",
- and the failure hasn't been acknowledged yet.
+ blocked - device has failed, and the failure hasn't been
+ acknowledged yet by the metadata handler.
Writes that would write to this device if
it were not faulty are blocked.
spare - device is working, but not a full member.
This includes spares that are in the process
of being recovered to
+ write_error - device has ever seen a write error.
This list may grow in future.
This can be written to.
Writing "faulty" simulates a failure on the device.
@@ -379,9 +381,11 @@
Writing "writemostly" sets the writemostly flag.
Writing "-writemostly" clears the writemostly flag.
Writing "blocked" sets the "blocked" flag.
- Writing "-blocked" clears the "blocked" flag and allows writes
- to complete.
+ Writing "-blocked" clears the "blocked" flags and allows writes
+ to complete and possibly simulates an error.
Writing "in_sync" sets the in_sync flag.
+ Writing "write_error" sets writeerrorseen flag.
+ Writing "-write_error" clears writeerrorseen flag.
This file responds to select/poll. Any change to 'faulty'
or 'blocked' causes an event.
@@ -419,7 +423,6 @@
written, it will be rejected.
recovery_start
-
When the device is not 'in_sync', this records the number of
sectors from the start of the device which are known to be
correct. This is normally zero, but during a recovery
@@ -435,6 +438,20 @@
Setting this to 'none' is equivalent to setting 'in_sync'.
Setting to any other value also clears the 'in_sync' flag.
+ bad_blocks
+ This gives the list of all known bad blocks in the form of
+ start address and length (in sectors respectively). If output
+ is too big to fit in a page, it will be truncated. Writing
+ "sector length" to this file adds new acknowledged (i.e.
+ recorded to disk safely) bad blocks.
+
+ unacknowledged_bad_blocks
+ This gives the list of known-but-not-yet-saved-to-disk bad
+ blocks in the same form of 'bad_blocks'. If output is too big
+ to fit in a page, it will be truncated. Writing to this file
+ adds bad blocks without acknowledging them. This is largely
+ for testing.
+
An active md device will also contain and entry for each active device
diff --git a/Documentation/security/keys-ecryptfs.txt b/Documentation/security/keys-ecryptfs.txt
new file mode 100644
index 0000000..c3bbeba
--- /dev/null
+++ b/Documentation/security/keys-ecryptfs.txt
@@ -0,0 +1,68 @@
+ Encrypted keys for the eCryptfs filesystem
+
+ECryptfs is a stacked filesystem which transparently encrypts and decrypts each
+file using a randomly generated File Encryption Key (FEK).
+
+Each FEK is in turn encrypted with a File Encryption Key Encryption Key (FEFEK)
+either in kernel space or in user space with a daemon called 'ecryptfsd'. In
+the former case the operation is performed directly by the kernel CryptoAPI
+using a key, the FEFEK, derived from a user prompted passphrase; in the latter
+the FEK is encrypted by 'ecryptfsd' with the help of external libraries in order
+to support other mechanisms like public key cryptography, PKCS#11 and TPM based
+operations.
+
+The data structure defined by eCryptfs to contain information required for the
+FEK decryption is called authentication token and, currently, can be stored in a
+kernel key of the 'user' type, inserted in the user's session specific keyring
+by the userspace utility 'mount.ecryptfs' shipped with the package
+'ecryptfs-utils'.
+
+The 'encrypted' key type has been extended with the introduction of the new
+format 'ecryptfs' in order to be used in conjunction with the eCryptfs
+filesystem. Encrypted keys of the newly introduced format store an
+authentication token in its payload with a FEFEK randomly generated by the
+kernel and protected by the parent master key.
+
+In order to avoid known-plaintext attacks, the datablob obtained through
+commands 'keyctl print' or 'keyctl pipe' does not contain the overall
+authentication token, which content is well known, but only the FEFEK in
+encrypted form.
+
+The eCryptfs filesystem may really benefit from using encrypted keys in that the
+required key can be securely generated by an Administrator and provided at boot
+time after the unsealing of a 'trusted' key in order to perform the mount in a
+controlled environment. Another advantage is that the key is not exposed to
+threats of malicious software, because it is available in clear form only at
+kernel level.
+
+Usage:
+ keyctl add encrypted name "new ecryptfs key-type:master-key-name keylen" ring
+ keyctl add encrypted name "load hex_blob" ring
+ keyctl update keyid "update key-type:master-key-name"
+
+name:= '<16 hexadecimal characters>'
+key-type:= 'trusted' | 'user'
+keylen:= 64
+
+
+Example of encrypted key usage with the eCryptfs filesystem:
+
+Create an encrypted key "1000100010001000" of length 64 bytes with format
+'ecryptfs' and save it using a previously loaded user key "test":
+
+ $ keyctl add encrypted 1000100010001000 "new ecryptfs user:test 64" @u
+ 19184530
+
+ $ keyctl print 19184530
+ ecryptfs user:test 64 490045d4bfe48c99f0d465fbbbb79e7500da954178e2de0697
+ dd85091f5450a0511219e9f7cd70dcd498038181466f78ac8d4c19504fcc72402bfc41c2
+ f253a41b7507ccaa4b2b03fff19a69d1cc0b16e71746473f023a95488b6edfd86f7fdd40
+ 9d292e4bacded1258880122dd553a661
+
+ $ keyctl pipe 19184530 > ecryptfs.blob
+
+Mount an eCryptfs filesystem using the created encrypted key "1000100010001000"
+into the '/secret' directory:
+
+ $ mount -i -t ecryptfs -oecryptfs_sig=1000100010001000,\
+ ecryptfs_cipher=aes,ecryptfs_key_bytes=32 /secret /secret
diff --git a/Documentation/security/keys-trusted-encrypted.txt b/Documentation/security/keys-trusted-encrypted.txt
index 8fb79bc..5f50cca 100644
--- a/Documentation/security/keys-trusted-encrypted.txt
+++ b/Documentation/security/keys-trusted-encrypted.txt
@@ -53,12 +53,19 @@
should therefore be loaded in as secure a way as possible, preferably early in
boot.
-Usage:
- keyctl add encrypted name "new key-type:master-key-name keylen" ring
- keyctl add encrypted name "load hex_blob" ring
- keyctl update keyid "update key-type:master-key-name"
+The decrypted portion of encrypted keys can contain either a simple symmetric
+key or a more complex structure. The format of the more complex structure is
+application specific, which is identified by 'format'.
-where 'key-type' is either 'trusted' or 'user'.
+Usage:
+ keyctl add encrypted name "new [format] key-type:master-key-name keylen"
+ ring
+ keyctl add encrypted name "load hex_blob" ring
+ keyctl update keyid "update key-type:master-key-name"
+
+format:= 'default | ecryptfs'
+key-type:= 'trusted' | 'user'
+
Examples of trusted and encrypted key usage:
@@ -114,15 +121,25 @@
7ef6a24defe4846104209bf0c3eced7fa1a672ed5b125fc9d8cd88b476a658a4434644ef
df8ae9a178e9f83ba9f08d10fa47e4226b98b0702f06b3b8
-Create and save an encrypted key "evm" using the above trusted key "kmk":
+The initial consumer of trusted keys is EVM, which at boot time needs a high
+quality symmetric key for HMAC protection of file metadata. The use of a
+trusted key provides strong guarantees that the EVM key has not been
+compromised by a user level problem, and when sealed to specific boot PCR
+values, protects against boot and offline attacks. Create and save an
+encrypted key "evm" using the above trusted key "kmk":
+option 1: omitting 'format'
$ keyctl add encrypted evm "new trusted:kmk 32" @u
159771175
+option 2: explicitly defining 'format' as 'default'
+ $ keyctl add encrypted evm "new default trusted:kmk 32" @u
+ 159771175
+
$ keyctl print 159771175
- trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b382dbbc55
- be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e024717c64
- 5972dcb82ab2dde83376d82b2e3c09ffc
+ default trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b3
+ 82dbbc55be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e0
+ 24717c64 5972dcb82ab2dde83376d82b2e3c09ffc
$ keyctl pipe 159771175 > evm.blob
@@ -132,14 +149,11 @@
831684262
$ keyctl print 831684262
- trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b382dbbc55
- be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e024717c64
- 5972dcb82ab2dde83376d82b2e3c09ffc
+ default trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b3
+ 82dbbc55be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e0
+ 24717c64 5972dcb82ab2dde83376d82b2e3c09ffc
-
-The initial consumer of trusted keys is EVM, which at boot time needs a high
-quality symmetric key for HMAC protection of file metadata. The use of a
-trusted key provides strong guarantees that the EVM key has not been
-compromised by a user level problem, and when sealed to specific boot PCR
-values, protects against boot and offline attacks. Other uses for trusted and
-encrypted keys, such as for disk and file encryption are anticipated.
+Other uses for trusted and encrypted keys, such as for disk and file encryption
+are anticipated. In particular the new format 'ecryptfs' has been defined in
+in order to use encrypted keys to mount an eCryptfs filesystem. More details
+about the usage can be found in the file 'Documentation/keys-ecryptfs.txt'.
diff --git a/Documentation/watchdog/00-INDEX b/Documentation/watchdog/00-INDEX
index ee99451..fc51128 100644
--- a/Documentation/watchdog/00-INDEX
+++ b/Documentation/watchdog/00-INDEX
@@ -8,6 +8,8 @@
- directory holding watchdog related example programs.
watchdog-api.txt
- description of the Linux Watchdog driver API.
+watchdog-kernel-api.txt
+ - description of the Linux WatchDog Timer Driver Core kernel API.
watchdog-parameters.txt
- information on driver parameters (for drivers other than
the ones that have driver-specific files here)
diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt
new file mode 100644
index 0000000..4f7c894
--- /dev/null
+++ b/Documentation/watchdog/watchdog-kernel-api.txt
@@ -0,0 +1,162 @@
+The Linux WatchDog Timer Driver Core kernel API.
+===============================================
+Last reviewed: 22-Jul-2011
+
+Wim Van Sebroeck <wim@iguana.be>
+
+Introduction
+------------
+This document does not describe what a WatchDog Timer (WDT) Driver or Device is.
+It also does not describe the API which can be used by user space to communicate
+with a WatchDog Timer. If you want to know this then please read the following
+file: Documentation/watchdog/watchdog-api.txt .
+
+So what does this document describe? It describes the API that can be used by
+WatchDog Timer Drivers that want to use the WatchDog Timer Driver Core
+Framework. This framework provides all interfacing towards user space so that
+the same code does not have to be reproduced each time. This also means that
+a watchdog timer driver then only needs to provide the different routines
+(operations) that control the watchdog timer (WDT).
+
+The API
+-------
+Each watchdog timer driver that wants to use the WatchDog Timer Driver Core
+must #include <linux/watchdog.h> (you would have to do this anyway when
+writing a watchdog device driver). This include file contains following
+register/unregister routines:
+
+extern int watchdog_register_device(struct watchdog_device *);
+extern void watchdog_unregister_device(struct watchdog_device *);
+
+The watchdog_register_device routine registers a watchdog timer device.
+The parameter of this routine is a pointer to a watchdog_device structure.
+This routine returns zero on success and a negative errno code for failure.
+
+The watchdog_unregister_device routine deregisters a registered watchdog timer
+device. The parameter of this routine is the pointer to the registered
+watchdog_device structure.
+
+The watchdog device structure looks like this:
+
+struct watchdog_device {
+ const struct watchdog_info *info;
+ const struct watchdog_ops *ops;
+ unsigned int bootstatus;
+ unsigned int timeout;
+ unsigned int min_timeout;
+ unsigned int max_timeout;
+ void *driver_data;
+ unsigned long status;
+};
+
+It contains following fields:
+* info: a pointer to a watchdog_info structure. This structure gives some
+ additional information about the watchdog timer itself. (Like it's unique name)
+* ops: a pointer to the list of watchdog operations that the watchdog supports.
+* timeout: the watchdog timer's timeout value (in seconds).
+* min_timeout: the watchdog timer's minimum timeout value (in seconds).
+* max_timeout: the watchdog timer's maximum timeout value (in seconds).
+* bootstatus: status of the device after booting (reported with watchdog
+ WDIOF_* status bits).
+* driver_data: a pointer to the drivers private data of a watchdog device.
+ This data should only be accessed via the watchdog_set_drvadata and
+ watchdog_get_drvdata routines.
+* status: this field contains a number of status bits that give extra
+ information about the status of the device (Like: is the watchdog timer
+ running/active, is the nowayout bit set, is the device opened via
+ the /dev/watchdog interface or not, ...).
+
+The list of watchdog operations is defined as:
+
+struct watchdog_ops {
+ struct module *owner;
+ /* mandatory operations */
+ int (*start)(struct watchdog_device *);
+ int (*stop)(struct watchdog_device *);
+ /* optional operations */
+ int (*ping)(struct watchdog_device *);
+ unsigned int (*status)(struct watchdog_device *);
+ int (*set_timeout)(struct watchdog_device *, unsigned int);
+ long (*ioctl)(struct watchdog_device *, unsigned int, unsigned long);
+};
+
+It is important that you first define the module owner of the watchdog timer
+driver's operations. This module owner will be used to lock the module when
+the watchdog is active. (This to avoid a system crash when you unload the
+module and /dev/watchdog is still open).
+Some operations are mandatory and some are optional. The mandatory operations
+are:
+* start: this is a pointer to the routine that starts the watchdog timer
+ device.
+ The routine needs a pointer to the watchdog timer device structure as a
+ parameter. It returns zero on success or a negative errno code for failure.
+* stop: with this routine the watchdog timer device is being stopped.
+ The routine needs a pointer to the watchdog timer device structure as a
+ parameter. It returns zero on success or a negative errno code for failure.
+ Some watchdog timer hardware can only be started and not be stopped. The
+ driver supporting this hardware needs to make sure that a start and stop
+ routine is being provided. This can be done by using a timer in the driver
+ that regularly sends a keepalive ping to the watchdog timer hardware.
+
+Not all watchdog timer hardware supports the same functionality. That's why
+all other routines/operations are optional. They only need to be provided if
+they are supported. These optional routines/operations are:
+* ping: this is the routine that sends a keepalive ping to the watchdog timer
+ hardware.
+ The routine needs a pointer to the watchdog timer device structure as a
+ parameter. It returns zero on success or a negative errno code for failure.
+ Most hardware that does not support this as a separate function uses the
+ start function to restart the watchdog timer hardware. And that's also what
+ the watchdog timer driver core does: to send a keepalive ping to the watchdog
+ timer hardware it will either use the ping operation (when available) or the
+ start operation (when the ping operation is not available).
+ (Note: the WDIOC_KEEPALIVE ioctl call will only be active when the
+ WDIOF_KEEPALIVEPING bit has been set in the option field on the watchdog's
+ info structure).
+* status: this routine checks the status of the watchdog timer device. The
+ status of the device is reported with watchdog WDIOF_* status flags/bits.
+* set_timeout: this routine checks and changes the timeout of the watchdog
+ timer device. It returns 0 on success, -EINVAL for "parameter out of range"
+ and -EIO for "could not write value to the watchdog". On success the timeout
+ value of the watchdog_device will be changed to the value that was just used
+ to re-program the watchdog timer device.
+ (Note: the WDIOF_SETTIMEOUT needs to be set in the options field of the
+ watchdog's info structure).
+* ioctl: if this routine is present then it will be called first before we do
+ our own internal ioctl call handling. This routine should return -ENOIOCTLCMD
+ if a command is not supported. The parameters that are passed to the ioctl
+ call are: watchdog_device, cmd and arg.
+
+The status bits should (preferably) be set with the set_bit and clear_bit alike
+bit-operations. The status bits that are defined are:
+* WDOG_ACTIVE: this status bit indicates whether or not a watchdog timer device
+ is active or not. When the watchdog is active after booting, then you should
+ set this status bit (Note: when you register the watchdog timer device with
+ this bit set, then opening /dev/watchdog will skip the start operation)
+* WDOG_DEV_OPEN: this status bit shows whether or not the watchdog device
+ was opened via /dev/watchdog.
+ (This bit should only be used by the WatchDog Timer Driver Core).
+* WDOG_ALLOW_RELEASE: this bit stores whether or not the magic close character
+ has been sent (so that we can support the magic close feature).
+ (This bit should only be used by the WatchDog Timer Driver Core).
+* WDOG_NO_WAY_OUT: this bit stores the nowayout setting for the watchdog.
+ If this bit is set then the watchdog timer will not be able to stop.
+
+Note: The WatchDog Timer Driver Core supports the magic close feature and
+the nowayout feature. To use the magic close feature you must set the
+WDIOF_MAGICCLOSE bit in the options field of the watchdog's info structure.
+The nowayout feature will overrule the magic close feature.
+
+To get or set driver specific data the following two helper functions should be
+used:
+
+static inline void watchdog_set_drvdata(struct watchdog_device *wdd, void *data)
+static inline void *watchdog_get_drvdata(struct watchdog_device *wdd)
+
+The watchdog_set_drvdata function allows you to add driver specific data. The
+arguments of this function are the watchdog device where you want to add the
+driver specific data to and a pointer to the data itself.
+
+The watchdog_get_drvdata function allows you to retrieve driver specific data.
+The argument of this function is the watchdog device where you want to retrieve
+data from. The function retruns the pointer to the driver specific data.
diff --git a/MAINTAINERS b/MAINTAINERS
index 7b2e9e8..1d2e79d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6408,7 +6408,7 @@
L: tomoyo-dev@lists.sourceforge.jp (subscribers-only, for developers in Japanese)
L: tomoyo-users@lists.sourceforge.jp (subscribers-only, for users in Japanese)
W: http://tomoyo.sourceforge.jp/
-T: quilt http://svn.sourceforge.jp/svnroot/tomoyo/trunk/2.3.x/tomoyo-lsm/patches/
+T: quilt http://svn.sourceforge.jp/svnroot/tomoyo/trunk/2.4.x/tomoyo-lsm/patches/
S: Maintained
F: security/tomoyo/
diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig
index 4b8abf9..d82ebab 100644
--- a/arch/arm/mach-tegra/Kconfig
+++ b/arch/arm/mach-tegra/Kconfig
@@ -27,14 +27,14 @@
config MACH_HARMONY
bool "Harmony board"
- select MACH_HAS_SND_SOC_TEGRA_WM8903
+ select MACH_HAS_SND_SOC_TEGRA_WM8903 if SND_SOC
help
Support for nVidia Harmony development platform
config MACH_KAEN
bool "Kaen board"
select MACH_SEABOARD
- select MACH_HAS_SND_SOC_TEGRA_WM8903
+ select MACH_HAS_SND_SOC_TEGRA_WM8903 if SND_SOC
help
Support for the Kaen version of Seaboard
@@ -45,7 +45,7 @@
config MACH_SEABOARD
bool "Seaboard board"
- select MACH_HAS_SND_SOC_TEGRA_WM8903
+ select MACH_HAS_SND_SOC_TEGRA_WM8903 if SND_SOC
help
Support for nVidia Seaboard development platform. It will
also be included for some of the derivative boards that
diff --git a/arch/arm/mach-zynq/Makefile b/arch/arm/mach-zynq/Makefile
index c550c67..397268c 100644
--- a/arch/arm/mach-zynq/Makefile
+++ b/arch/arm/mach-zynq/Makefile
@@ -3,4 +3,4 @@
#
# Common support
-obj-y := common.o timer.o board_dt.o
+obj-y := common.o timer.o
diff --git a/arch/arm/mach-zynq/board_dt.c b/arch/arm/mach-zynq/board_dt.c
deleted file mode 100644
index e69de29..0000000
--- a/arch/arm/mach-zynq/board_dt.c
+++ /dev/null
diff --git a/arch/microblaze/include/asm/cpuinfo.h b/arch/microblaze/include/asm/cpuinfo.h
index d8f0133..7d6831a 100644
--- a/arch/microblaze/include/asm/cpuinfo.h
+++ b/arch/microblaze/include/asm/cpuinfo.h
@@ -38,6 +38,7 @@
u32 use_exc;
u32 ver_code;
u32 mmu;
+ u32 mmu_privins;
u32 endian;
/* CPU caches */
diff --git a/arch/microblaze/include/asm/irqflags.h b/arch/microblaze/include/asm/irqflags.h
index c4532f0..c9a6262 100644
--- a/arch/microblaze/include/asm/irqflags.h
+++ b/arch/microblaze/include/asm/irqflags.h
@@ -14,7 +14,7 @@
#if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR
-static inline unsigned long arch_local_irq_save(void)
+static inline notrace unsigned long arch_local_irq_save(void)
{
unsigned long flags;
asm volatile(" msrclr %0, %1 \n"
@@ -25,7 +25,7 @@
return flags;
}
-static inline void arch_local_irq_disable(void)
+static inline notrace void arch_local_irq_disable(void)
{
/* this uses r0 without declaring it - is that correct? */
asm volatile(" msrclr r0, %0 \n"
@@ -35,7 +35,7 @@
: "memory");
}
-static inline void arch_local_irq_enable(void)
+static inline notrace void arch_local_irq_enable(void)
{
/* this uses r0 without declaring it - is that correct? */
asm volatile(" msrset r0, %0 \n"
@@ -47,7 +47,7 @@
#else /* !CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR */
-static inline unsigned long arch_local_irq_save(void)
+static inline notrace unsigned long arch_local_irq_save(void)
{
unsigned long flags, tmp;
asm volatile (" mfs %0, rmsr \n"
@@ -61,7 +61,7 @@
return flags;
}
-static inline void arch_local_irq_disable(void)
+static inline notrace void arch_local_irq_disable(void)
{
unsigned long tmp;
asm volatile(" mfs %0, rmsr \n"
@@ -74,7 +74,7 @@
: "memory");
}
-static inline void arch_local_irq_enable(void)
+static inline notrace void arch_local_irq_enable(void)
{
unsigned long tmp;
asm volatile(" mfs %0, rmsr \n"
@@ -89,7 +89,7 @@
#endif /* CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR */
-static inline unsigned long arch_local_save_flags(void)
+static inline notrace unsigned long arch_local_save_flags(void)
{
unsigned long flags;
asm volatile(" mfs %0, rmsr \n"
@@ -100,7 +100,7 @@
return flags;
}
-static inline void arch_local_irq_restore(unsigned long flags)
+static inline notrace void arch_local_irq_restore(unsigned long flags)
{
asm volatile(" mts rmsr, %0 \n"
" nop \n"
@@ -109,12 +109,12 @@
: "memory");
}
-static inline bool arch_irqs_disabled_flags(unsigned long flags)
+static inline notrace bool arch_irqs_disabled_flags(unsigned long flags)
{
return (flags & MSR_IE) == 0;
}
-static inline bool arch_irqs_disabled(void)
+static inline notrace bool arch_irqs_disabled(void)
{
return arch_irqs_disabled_flags(arch_local_save_flags());
}
diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h
index aed2a6b..7283bfb 100644
--- a/arch/microblaze/include/asm/processor.h
+++ b/arch/microblaze/include/asm/processor.h
@@ -125,9 +125,6 @@
.pgdir = swapper_pg_dir, \
}
-/* Do necessary setup to start up a newly executed thread. */
-void start_thread(struct pt_regs *regs,
- unsigned long pc, unsigned long usp);
/* Free all resources held by a thread. */
extern inline void release_thread(struct task_struct *dead_task)
diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index 9ad567e..20c5e8e 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -26,8 +26,12 @@
#define HAVE_ARCH_DEVTREE_FIXUPS
/* Other Prototypes */
-extern int early_uartlite_console(void);
-extern int early_uart16550_console(void);
+enum early_consoles {
+ UARTLITE = 1,
+ UART16550 = 2,
+};
+
+extern int of_early_console(void *version);
/*
* OF address retreival & translation
diff --git a/arch/microblaze/include/asm/pvr.h b/arch/microblaze/include/asm/pvr.h
index a10bec6..4bbdb4c 100644
--- a/arch/microblaze/include/asm/pvr.h
+++ b/arch/microblaze/include/asm/pvr.h
@@ -111,16 +111,16 @@
/* Target family PVR mask */
#define PVR10_TARGET_FAMILY_MASK 0xFF000000
-/* MMU descrtiption */
+/* MMU description */
#define PVR11_USE_MMU 0xC0000000
#define PVR11_MMU_ITLB_SIZE 0x38000000
#define PVR11_MMU_DTLB_SIZE 0x07000000
#define PVR11_MMU_TLB_ACCESS 0x00C00000
#define PVR11_MMU_ZONES 0x003C0000
+#define PVR11_MMU_PRIVINS 0x00010000
/* MSR Reset value PVR mask */
#define PVR11_MSR_RESET_VALUE_MASK 0x000007FF
-
/* PVR access macros */
#define PVR_IS_FULL(_pvr) (_pvr.pvr[0] & PVR0_PVR_FULL_MASK)
#define PVR_USE_BARREL(_pvr) (_pvr.pvr[0] & PVR0_USE_BARREL_MASK)
@@ -216,6 +216,7 @@
#define PVR_MMU_DTLB_SIZE(_pvr) (_pvr.pvr[11] & PVR11_MMU_DTLB_SIZE)
#define PVR_MMU_TLB_ACCESS(_pvr) (_pvr.pvr[11] & PVR11_MMU_TLB_ACCESS)
#define PVR_MMU_ZONES(_pvr) (_pvr.pvr[11] & PVR11_MMU_ZONES)
+#define PVR_MMU_PRIVINS(pvr) (pvr.pvr[11] & PVR11_MMU_PRIVINS)
/* endian */
#define PVR_ENDIAN(_pvr) (_pvr.pvr[0] & PVR0_ENDI)
diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h
index 8f39689..904e5ef 100644
--- a/arch/microblaze/include/asm/setup.h
+++ b/arch/microblaze/include/asm/setup.h
@@ -23,6 +23,7 @@
void early_printk(const char *fmt, ...);
int setup_early_printk(char *opt);
+void remap_early_printk(void);
void disable_early_printk(void);
#if defined(CONFIG_EARLY_PRINTK)
diff --git a/arch/microblaze/kernel/cpu/cpuinfo-pvr-full.c b/arch/microblaze/kernel/cpu/cpuinfo-pvr-full.c
index f70a604..916aaed 100644
--- a/arch/microblaze/kernel/cpu/cpuinfo-pvr-full.c
+++ b/arch/microblaze/kernel/cpu/cpuinfo-pvr-full.c
@@ -72,6 +72,7 @@
CI(pvr_user2, USER2);
CI(mmu, USE_MMU);
+ CI(mmu_privins, MMU_PRIVINS);
CI(endian, ENDIAN);
CI(use_icache, USE_ICACHE);
diff --git a/arch/microblaze/kernel/cpu/cpuinfo-static.c b/arch/microblaze/kernel/cpu/cpuinfo-static.c
index b16b994..592bb2e 100644
--- a/arch/microblaze/kernel/cpu/cpuinfo-static.c
+++ b/arch/microblaze/kernel/cpu/cpuinfo-static.c
@@ -119,6 +119,7 @@
ci->pvr_user2 = fcpu(cpu, "xlnx,pvr-user2");
ci->mmu = fcpu(cpu, "xlnx,use-mmu");
+ ci->mmu_privins = fcpu(cpu, "xlnx,mmu-privileged-instr");
ci->endian = fcpu(cpu, "xlnx,endianness");
ci->ver_code = 0;
diff --git a/arch/microblaze/kernel/cpu/cpuinfo.c b/arch/microblaze/kernel/cpu/cpuinfo.c
index c1640c5..44394d8 100644
--- a/arch/microblaze/kernel/cpu/cpuinfo.c
+++ b/arch/microblaze/kernel/cpu/cpuinfo.c
@@ -88,4 +88,8 @@
printk(KERN_WARNING "%s: Unsupported PVR setting\n", __func__);
set_cpuinfo_static(&cpuinfo, cpu);
}
+
+ if (cpuinfo.mmu_privins)
+ printk(KERN_WARNING "%s: Stream instructions enabled"
+ " - USERSPACE CAN LOCK THIS KERNEL!\n", __func__);
}
diff --git a/arch/microblaze/kernel/cpu/mb.c b/arch/microblaze/kernel/cpu/mb.c
index b4048af..7b5dca7 100644
--- a/arch/microblaze/kernel/cpu/mb.c
+++ b/arch/microblaze/kernel/cpu/mb.c
@@ -97,6 +97,10 @@
(cpuinfo.use_exc & PVR2_FPU_EXC_MASK) ? "fpu " : "",
(cpuinfo.use_exc & PVR2_USE_FSL_EXC) ? "fsl " : "");
+ count += seq_printf(m,
+ "Stream-insns:\t%sprivileged\n",
+ cpuinfo.mmu_privins ? "un" : "");
+
if (cpuinfo.use_icache)
count += seq_printf(m,
"Icache:\t\t%ukB\tline length:\t%dB\n",
@@ -110,10 +114,11 @@
"Dcache:\t\t%ukB\tline length:\t%dB\n",
cpuinfo.dcache_size >> 10,
cpuinfo.dcache_line_length);
+ seq_printf(m, "Dcache-Policy:\t");
if (cpuinfo.dcache_wb)
- count += seq_printf(m, "\t\twrite-back\n");
+ count += seq_printf(m, "write-back\n");
else
- count += seq_printf(m, "\t\twrite-through\n");
+ count += seq_printf(m, "write-through\n");
} else
count += seq_printf(m, "Dcache:\t\tno\n");
diff --git a/arch/microblaze/kernel/early_printk.c b/arch/microblaze/kernel/early_printk.c
index c3616a0..d26d92d 100644
--- a/arch/microblaze/kernel/early_printk.c
+++ b/arch/microblaze/kernel/early_printk.c
@@ -35,7 +35,7 @@
* we'll never timeout on a working UART.
*/
- unsigned retries = 10000;
+ unsigned retries = 1000000;
/* read status bit - 0x8 offset */
while (--retries && (in_be32(base_addr + 8) & (1 << 3)))
;
@@ -60,7 +60,7 @@
static struct console early_serial_uartlite_console = {
.name = "earlyser",
.write = early_printk_uartlite_write,
- .flags = CON_PRINTBUFFER,
+ .flags = CON_PRINTBUFFER | CON_BOOT,
.index = -1,
};
#endif /* CONFIG_SERIAL_UARTLITE_CONSOLE */
@@ -104,7 +104,7 @@
static struct console early_serial_uart16550_console = {
.name = "earlyser",
.write = early_printk_uart16550_write,
- .flags = CON_PRINTBUFFER,
+ .flags = CON_PRINTBUFFER | CON_BOOT,
.index = -1,
};
#endif /* CONFIG_SERIAL_8250_CONSOLE */
@@ -127,48 +127,56 @@
int __init setup_early_printk(char *opt)
{
+ int version = 0;
+
if (early_console_initialized)
return 1;
+ base_addr = of_early_console(&version);
+ if (base_addr) {
+#ifdef CONFIG_MMU
+ early_console_reg_tlb_alloc(base_addr);
+#endif
+ switch (version) {
#ifdef CONFIG_SERIAL_UARTLITE_CONSOLE
- base_addr = early_uartlite_console();
- if (base_addr) {
- early_console_initialized = 1;
-#ifdef CONFIG_MMU
- early_console_reg_tlb_alloc(base_addr);
+ case UARTLITE:
+ printk(KERN_INFO "Early console on uartlite "
+ "at 0x%08x\n", base_addr);
+ early_console = &early_serial_uartlite_console;
+ break;
#endif
- early_console = &early_serial_uartlite_console;
- early_printk("early_printk_console is enabled at 0x%08x\n",
- base_addr);
-
- /* register_console(early_console); */
-
- return 0;
- }
-#endif /* CONFIG_SERIAL_UARTLITE_CONSOLE */
-
#ifdef CONFIG_SERIAL_8250_CONSOLE
- base_addr = early_uart16550_console();
- base_addr &= ~3; /* clear register offset */
- if (base_addr) {
- early_console_initialized = 1;
-#ifdef CONFIG_MMU
- early_console_reg_tlb_alloc(base_addr);
+ case UART16550:
+ printk(KERN_INFO "Early console on uart16650 "
+ "at 0x%08x\n", base_addr);
+ early_console = &early_serial_uart16550_console;
+ break;
#endif
- early_console = &early_serial_uart16550_console;
+ default:
+ printk(KERN_INFO "Unsupported early console %d\n",
+ version);
+ return 1;
+ }
- early_printk("early_printk_console is enabled at 0x%08x\n",
- base_addr);
-
- /* register_console(early_console); */
-
+ register_console(early_console);
+ early_console_initialized = 1;
return 0;
}
-#endif /* CONFIG_SERIAL_8250_CONSOLE */
-
return 1;
}
+/* Remap early console to virtual address and do not allocate one TLB
+ * only for early console because of performance degression */
+void __init remap_early_printk(void)
+{
+ if (!early_console_initialized || !early_console)
+ return;
+ printk(KERN_INFO "early_printk_console remaping from 0x%x to ",
+ base_addr);
+ base_addr = (u32) ioremap(base_addr, PAGE_SIZE);
+ printk(KERN_CONT "0x%x\n", base_addr);
+}
+
void __init disable_early_printk(void)
{
if (!early_console_initialized || !early_console)
diff --git a/arch/microblaze/kernel/hw_exception_handler.S b/arch/microblaze/kernel/hw_exception_handler.S
index 56572e9..e62be83 100644
--- a/arch/microblaze/kernel/hw_exception_handler.S
+++ b/arch/microblaze/kernel/hw_exception_handler.S
@@ -1113,23 +1113,23 @@
lw_r11_vm: R3_TO_LWREG_VM_V (11);
lw_r12_vm: R3_TO_LWREG_VM_V (12);
lw_r13_vm: R3_TO_LWREG_VM_V (13);
-lw_r14_vm: R3_TO_LWREG_VM (14);
+lw_r14_vm: R3_TO_LWREG_VM_V (14);
lw_r15_vm: R3_TO_LWREG_VM_V (15);
-lw_r16_vm: R3_TO_LWREG_VM (16);
+lw_r16_vm: R3_TO_LWREG_VM_V (16);
lw_r17_vm: R3_TO_LWREG_VM_V (17);
lw_r18_vm: R3_TO_LWREG_VM_V (18);
-lw_r19_vm: R3_TO_LWREG_VM (19);
-lw_r20_vm: R3_TO_LWREG_VM (20);
-lw_r21_vm: R3_TO_LWREG_VM (21);
-lw_r22_vm: R3_TO_LWREG_VM (22);
-lw_r23_vm: R3_TO_LWREG_VM (23);
-lw_r24_vm: R3_TO_LWREG_VM (24);
-lw_r25_vm: R3_TO_LWREG_VM (25);
-lw_r26_vm: R3_TO_LWREG_VM (26);
-lw_r27_vm: R3_TO_LWREG_VM (27);
-lw_r28_vm: R3_TO_LWREG_VM (28);
-lw_r29_vm: R3_TO_LWREG_VM (29);
-lw_r30_vm: R3_TO_LWREG_VM (30);
+lw_r19_vm: R3_TO_LWREG_VM_V (19);
+lw_r20_vm: R3_TO_LWREG_VM_V (20);
+lw_r21_vm: R3_TO_LWREG_VM_V (21);
+lw_r22_vm: R3_TO_LWREG_VM_V (22);
+lw_r23_vm: R3_TO_LWREG_VM_V (23);
+lw_r24_vm: R3_TO_LWREG_VM_V (24);
+lw_r25_vm: R3_TO_LWREG_VM_V (25);
+lw_r26_vm: R3_TO_LWREG_VM_V (26);
+lw_r27_vm: R3_TO_LWREG_VM_V (27);
+lw_r28_vm: R3_TO_LWREG_VM_V (28);
+lw_r29_vm: R3_TO_LWREG_VM_V (29);
+lw_r30_vm: R3_TO_LWREG_VM_V (30);
lw_r31_vm: R3_TO_LWREG_VM_V (31);
sw_table_vm:
@@ -1147,23 +1147,23 @@
sw_r11_vm: SWREG_TO_R3_VM_V (11);
sw_r12_vm: SWREG_TO_R3_VM_V (12);
sw_r13_vm: SWREG_TO_R3_VM_V (13);
-sw_r14_vm: SWREG_TO_R3_VM (14);
+sw_r14_vm: SWREG_TO_R3_VM_V (14);
sw_r15_vm: SWREG_TO_R3_VM_V (15);
-sw_r16_vm: SWREG_TO_R3_VM (16);
+sw_r16_vm: SWREG_TO_R3_VM_V (16);
sw_r17_vm: SWREG_TO_R3_VM_V (17);
sw_r18_vm: SWREG_TO_R3_VM_V (18);
-sw_r19_vm: SWREG_TO_R3_VM (19);
-sw_r20_vm: SWREG_TO_R3_VM (20);
-sw_r21_vm: SWREG_TO_R3_VM (21);
-sw_r22_vm: SWREG_TO_R3_VM (22);
-sw_r23_vm: SWREG_TO_R3_VM (23);
-sw_r24_vm: SWREG_TO_R3_VM (24);
-sw_r25_vm: SWREG_TO_R3_VM (25);
-sw_r26_vm: SWREG_TO_R3_VM (26);
-sw_r27_vm: SWREG_TO_R3_VM (27);
-sw_r28_vm: SWREG_TO_R3_VM (28);
-sw_r29_vm: SWREG_TO_R3_VM (29);
-sw_r30_vm: SWREG_TO_R3_VM (30);
+sw_r19_vm: SWREG_TO_R3_VM_V (19);
+sw_r20_vm: SWREG_TO_R3_VM_V (20);
+sw_r21_vm: SWREG_TO_R3_VM_V (21);
+sw_r22_vm: SWREG_TO_R3_VM_V (22);
+sw_r23_vm: SWREG_TO_R3_VM_V (23);
+sw_r24_vm: SWREG_TO_R3_VM_V (24);
+sw_r25_vm: SWREG_TO_R3_VM_V (25);
+sw_r26_vm: SWREG_TO_R3_VM_V (26);
+sw_r27_vm: SWREG_TO_R3_VM_V (27);
+sw_r28_vm: SWREG_TO_R3_VM_V (28);
+sw_r29_vm: SWREG_TO_R3_VM_V (29);
+sw_r30_vm: SWREG_TO_R3_VM_V (30);
sw_r31_vm: SWREG_TO_R3_VM_V (31);
#endif /* CONFIG_MMU */
diff --git a/arch/microblaze/kernel/intc.c b/arch/microblaze/kernel/intc.c
index c88f066..eb41441 100644
--- a/arch/microblaze/kernel/intc.c
+++ b/arch/microblaze/kernel/intc.c
@@ -134,7 +134,7 @@
intr_type =
be32_to_cpup(of_get_property(intc,
"xlnx,kind-of-intr", NULL));
- if (intr_type >= (1 << (nr_irq + 1)))
+ if (intr_type > (u32)((1ULL << nr_irq) - 1))
printk(KERN_INFO " ERROR: Mismatch in kind-of-intr param\n");
#ifdef CONFIG_SELFMOD_INTC
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 968648a..dbb8124 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -237,7 +237,6 @@
/* Set up a thread for executing a new program */
void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp)
{
- set_fs(USER_DS);
regs->pc = pc;
regs->r1 = usp;
regs->pt_mode = 0;
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index b15cc21..977484a 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -53,69 +53,58 @@
}
#ifdef CONFIG_EARLY_PRINTK
-/* MS this is Microblaze specifig function */
-static int __init early_init_dt_scan_serial(unsigned long node,
+char *stdout;
+
+int __init early_init_dt_scan_chosen_serial(unsigned long node,
const char *uname, int depth, void *data)
{
unsigned long l;
char *p;
- const __be32 *addr;
- pr_debug("search \"serial\", depth: %d, uname: %s\n", depth, uname);
+ pr_debug("%s: depth: %d, uname: %s\n", __func__, depth, uname);
-/* find all serial nodes */
- if (strncmp(uname, "serial", 6) != 0)
- return 0;
+ if (depth == 1 && (strcmp(uname, "chosen") == 0 ||
+ strcmp(uname, "chosen@0") == 0)) {
+ p = of_get_flat_dt_prop(node, "linux,stdout-path", &l);
+ if (p != NULL && l > 0)
+ stdout = p; /* store pointer to stdout-path */
+ }
-/* find compatible node with uartlite */
- p = of_get_flat_dt_prop(node, "compatible", &l);
- if ((strncmp(p, "xlnx,xps-uartlite", 17) != 0) &&
- (strncmp(p, "xlnx,opb-uartlite", 17) != 0) &&
- (strncmp(p, "xlnx,axi-uartlite", 17) != 0))
- return 0;
+ if (stdout && strstr(stdout, uname)) {
+ p = of_get_flat_dt_prop(node, "compatible", &l);
+ pr_debug("Compatible string: %s\n", p);
- addr = of_get_flat_dt_prop(node, "reg", &l);
- return be32_to_cpup(addr); /* return address */
+ if ((strncmp(p, "xlnx,xps-uart16550", 18) == 0) ||
+ (strncmp(p, "xlnx,axi-uart16550", 18) == 0)) {
+ unsigned int addr;
+
+ *(u32 *)data = UART16550;
+
+ addr = *(u32 *)of_get_flat_dt_prop(node, "reg", &l);
+ addr += *(u32 *)of_get_flat_dt_prop(node,
+ "reg-offset", &l);
+ /* clear register offset */
+ return be32_to_cpu(addr) & ~3;
+ }
+ if ((strncmp(p, "xlnx,xps-uartlite", 17) == 0) ||
+ (strncmp(p, "xlnx,opb-uartlite", 17) == 0) ||
+ (strncmp(p, "xlnx,axi-uartlite", 17) == 0) ||
+ (strncmp(p, "xlnx,mdm", 8) == 0)) {
+ unsigned int *addrp;
+
+ *(u32 *)data = UARTLITE;
+
+ addrp = of_get_flat_dt_prop(node, "reg", &l);
+ return be32_to_cpup(addrp); /* return address */
+ }
+ }
+ return 0;
}
-/* this function is looking for early uartlite console - Microblaze specific */
-int __init early_uartlite_console(void)
+/* this function is looking for early console - Microblaze specific */
+int __init of_early_console(void *version)
{
- return of_scan_flat_dt(early_init_dt_scan_serial, NULL);
-}
-
-/* MS this is Microblaze specifig function */
-static int __init early_init_dt_scan_serial_full(unsigned long node,
- const char *uname, int depth, void *data)
-{
- unsigned long l;
- char *p;
- unsigned int addr;
-
- pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
-
-/* find all serial nodes */
- if (strncmp(uname, "serial", 6) != 0)
- return 0;
-
- early_init_dt_check_for_initrd(node);
-
-/* find compatible node with uartlite */
- p = of_get_flat_dt_prop(node, "compatible", &l);
-
- if ((strncmp(p, "xlnx,xps-uart16550", 18) != 0) &&
- (strncmp(p, "xlnx,axi-uart16550", 18) != 0))
- return 0;
-
- addr = *(u32 *)of_get_flat_dt_prop(node, "reg", &l);
- addr += *(u32 *)of_get_flat_dt_prop(node, "reg-offset", &l);
- return be32_to_cpu(addr); /* return address */
-}
-
-/* this function is looking for early uartlite console - Microblaze specific */
-int __init early_uart16550_console(void)
-{
- return of_scan_flat_dt(early_init_dt_scan_serial_full, NULL);
+ return of_scan_flat_dt(early_init_dt_scan_chosen_serial, version);
}
#endif
diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index 8e2c09b..0e654a1 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -59,6 +59,11 @@
setup_memory();
+#ifdef CONFIG_EARLY_PRINTK
+ /* remap early console to virtual address */
+ remap_early_printk();
+#endif
+
xilinx_pci_init();
#if defined(CONFIG_SELFMOD_INTC) || defined(CONFIG_SELFMOD_TIMER)
diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h
index cfa9cd2..64f7a00 100644
--- a/arch/sparc/include/asm/elf_64.h
+++ b/arch/sparc/include/asm/elf_64.h
@@ -177,9 +177,11 @@
cap |= HWCAP_SPARC_ULTRA3;
else if (tlb_type == hypervisor) {
if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 ||
- sun4v_chip_type == SUN4V_CHIP_NIAGARA2)
+ sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
+ sun4v_chip_type == SUN4V_CHIP_NIAGARA3)
cap |= HWCAP_SPARC_BLKINIT;
- if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2)
+ if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
+ sun4v_chip_type == SUN4V_CHIP_NIAGARA3)
cap |= HWCAP_SPARC_N2;
}
diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h
index 7568640..7a5f80d 100644
--- a/arch/sparc/include/asm/hypervisor.h
+++ b/arch/sparc/include/asm/hypervisor.h
@@ -2950,6 +2950,7 @@
#define HV_GRP_N2_CPU 0x0202
#define HV_GRP_NIU 0x0204
#define HV_GRP_VF_CPU 0x0205
+#define HV_GRP_KT_CPU 0x0209
#define HV_GRP_DIAG 0x0300
#ifndef __ASSEMBLY__
diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h
index f0d0c40c4..55a17c6 100644
--- a/arch/sparc/include/asm/spitfire.h
+++ b/arch/sparc/include/asm/spitfire.h
@@ -42,6 +42,7 @@
#define SUN4V_CHIP_INVALID 0x00
#define SUN4V_CHIP_NIAGARA1 0x01
#define SUN4V_CHIP_NIAGARA2 0x02
+#define SUN4V_CHIP_NIAGARA3 0x03
#define SUN4V_CHIP_UNKNOWN 0xff
#ifndef __ASSEMBLY__
diff --git a/arch/sparc/include/asm/xor_64.h b/arch/sparc/include/asm/xor_64.h
index bee4bf4..9ed6ff6 100644
--- a/arch/sparc/include/asm/xor_64.h
+++ b/arch/sparc/include/asm/xor_64.h
@@ -65,6 +65,7 @@
#define XOR_SELECT_TEMPLATE(FASTEST) \
((tlb_type == hypervisor && \
(sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || \
- sun4v_chip_type == SUN4V_CHIP_NIAGARA2)) ? \
+ sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || \
+ sun4v_chip_type == SUN4V_CHIP_NIAGARA3)) ? \
&xor_block_niagara : \
&xor_block_VIS)
diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c
index 138dbbc..17cf290 100644
--- a/arch/sparc/kernel/cpu.c
+++ b/arch/sparc/kernel/cpu.c
@@ -474,11 +474,18 @@
sparc_pmu_type = "niagara2";
break;
+ case SUN4V_CHIP_NIAGARA3:
+ sparc_cpu_type = "UltraSparc T3 (Niagara3)";
+ sparc_fpu_type = "UltraSparc T3 integrated FPU";
+ sparc_pmu_type = "niagara3";
+ break;
+
default:
printk(KERN_WARNING "CPU: Unknown sun4v cpu type [%s]\n",
prom_cpu_compatible);
sparc_cpu_type = "Unknown SUN4V CPU";
sparc_fpu_type = "Unknown SUN4V FPU";
+ sparc_pmu_type = "Unknown SUN4V PMU";
break;
}
}
diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c
index d91fd78..4197e8d 100644
--- a/arch/sparc/kernel/cpumap.c
+++ b/arch/sparc/kernel/cpumap.c
@@ -324,6 +324,7 @@
switch (sun4v_chip_type) {
case SUN4V_CHIP_NIAGARA1:
case SUN4V_CHIP_NIAGARA2:
+ case SUN4V_CHIP_NIAGARA3:
rover_inc_table = niagara_iterate_method;
break;
default:
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index aa594c7..c752603 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -132,6 +132,8 @@
.asciz "sun4v"
prom_niagara_prefix:
.asciz "SUNW,UltraSPARC-T"
+prom_sparc_prefix:
+ .asciz "SPARC-T"
.align 4
prom_root_compatible:
.skip 64
@@ -382,6 +384,22 @@
90: ldub [%g7], %g2
ldub [%g1], %g4
cmp %g2, %g4
+ bne,pn %icc, 89f
+ add %g7, 1, %g7
+ subcc %g3, 1, %g3
+ bne,pt %xcc, 90b
+ add %g1, 1, %g1
+ ba,pt %xcc, 91f
+ nop
+
+89: sethi %hi(prom_cpu_compatible), %g1
+ or %g1, %lo(prom_cpu_compatible), %g1
+ sethi %hi(prom_sparc_prefix), %g7
+ or %g7, %lo(prom_sparc_prefix), %g7
+ mov 7, %g3
+90: ldub [%g7], %g2
+ ldub [%g1], %g4
+ cmp %g2, %g4
bne,pn %icc, 4f
add %g7, 1, %g7
subcc %g3, 1, %g3
@@ -390,6 +408,15 @@
sethi %hi(prom_cpu_compatible), %g1
or %g1, %lo(prom_cpu_compatible), %g1
+ ldub [%g1 + 7], %g2
+ cmp %g2, '3'
+ be,pt %xcc, 5f
+ mov SUN4V_CHIP_NIAGARA3, %g4
+ ba,pt %xcc, 4f
+ nop
+
+91: sethi %hi(prom_cpu_compatible), %g1
+ or %g1, %lo(prom_cpu_compatible), %g1
ldub [%g1 + 17], %g2
cmp %g2, '1'
be,pt %xcc, 5f
@@ -397,6 +424,7 @@
cmp %g2, '2'
be,pt %xcc, 5f
mov SUN4V_CHIP_NIAGARA2, %g4
+
4:
mov SUN4V_CHIP_UNKNOWN, %g4
5: sethi %hi(sun4v_chip_type), %g2
@@ -514,6 +542,9 @@
cmp %g1, SUN4V_CHIP_NIAGARA2
be,pt %xcc, niagara2_patch
nop
+ cmp %g1, SUN4V_CHIP_NIAGARA3
+ be,pt %xcc, niagara2_patch
+ nop
call generic_patch_copyops
nop
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
index 7c60afb..d306e64 100644
--- a/arch/sparc/kernel/hvapi.c
+++ b/arch/sparc/kernel/hvapi.c
@@ -38,6 +38,7 @@
{ .group = HV_GRP_N2_CPU, },
{ .group = HV_GRP_NIU, },
{ .group = HV_GRP_VF_CPU, },
+ { .group = HV_GRP_KT_CPU, },
{ .group = HV_GRP_DIAG, .flags = FLAG_PRE_API },
};
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
index 8ac23e6..343b0f9 100644
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -80,8 +80,11 @@
{
unsigned long ret;
- ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val);
- if (ret != HV_EOK)
+ if (val & PCR_N2_HTRACE) {
+ ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val);
+ if (ret != HV_EOK)
+ write_pcr(val);
+ } else
write_pcr(val);
}
@@ -106,6 +109,10 @@
perf_hsvc_group = HV_GRP_N2_CPU;
break;
+ case SUN4V_CHIP_NIAGARA3:
+ perf_hsvc_group = HV_GRP_KT_CPU;
+ break;
+
default:
return -ENODEV;
}
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 171e8d8..614da62 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1343,7 +1343,8 @@
sparc_pmu = &niagara1_pmu;
return true;
}
- if (!strcmp(sparc_pmu_type, "niagara2")) {
+ if (!strcmp(sparc_pmu_type, "niagara2") ||
+ !strcmp(sparc_pmu_type, "niagara3")) {
sparc_pmu = &niagara2_pmu;
return true;
}
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index 1a371f8..8600eb2 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -55,7 +55,7 @@
}
EXPORT_SYMBOL(atomic_cmpxchg);
-int atomic_add_unless(atomic_t *v, int a, int u)
+int __atomic_add_unless(atomic_t *v, int a, int u)
{
int ret;
unsigned long flags;
@@ -67,7 +67,7 @@
spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
return ret != u;
}
-EXPORT_SYMBOL(atomic_add_unless);
+EXPORT_SYMBOL(__atomic_add_unless);
/* Atomic operations are already serializing */
void atomic_set(atomic_t *v, int i)
diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c
index ac6739e..c3de70d 100644
--- a/drivers/char/hw_random/n2-drv.c
+++ b/drivers/char/hw_random/n2-drv.c
@@ -1,6 +1,6 @@
/* n2-drv.c: Niagara-2 RNG driver.
*
- * Copyright (C) 2008 David S. Miller <davem@davemloft.net>
+ * Copyright (C) 2008, 2011 David S. Miller <davem@davemloft.net>
*/
#include <linux/kernel.h>
@@ -22,8 +22,8 @@
#define DRV_MODULE_NAME "n2rng"
#define PFX DRV_MODULE_NAME ": "
-#define DRV_MODULE_VERSION "0.1"
-#define DRV_MODULE_RELDATE "May 15, 2008"
+#define DRV_MODULE_VERSION "0.2"
+#define DRV_MODULE_RELDATE "July 27, 2011"
static char version[] __devinitdata =
DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
@@ -623,14 +623,14 @@
static int __devinit n2rng_probe(struct platform_device *op)
{
const struct of_device_id *match;
- int victoria_falls;
+ int multi_capable;
int err = -ENOMEM;
struct n2rng *np;
match = of_match_device(n2rng_match, &op->dev);
if (!match)
return -EINVAL;
- victoria_falls = (match->data != NULL);
+ multi_capable = (match->data != NULL);
n2rng_driver_version();
np = kzalloc(sizeof(*np), GFP_KERNEL);
@@ -640,8 +640,8 @@
INIT_DELAYED_WORK(&np->work, n2rng_work);
- if (victoria_falls)
- np->flags |= N2RNG_FLAG_VF;
+ if (multi_capable)
+ np->flags |= N2RNG_FLAG_MULTI;
err = -ENODEV;
np->hvapi_major = 2;
@@ -658,10 +658,10 @@
}
}
- if (np->flags & N2RNG_FLAG_VF) {
+ if (np->flags & N2RNG_FLAG_MULTI) {
if (np->hvapi_major < 2) {
- dev_err(&op->dev, "VF RNG requires HVAPI major "
- "version 2 or later, got %lu\n",
+ dev_err(&op->dev, "multi-unit-capable RNG requires "
+ "HVAPI major version 2 or later, got %lu\n",
np->hvapi_major);
goto out_hvapi_unregister;
}
@@ -688,8 +688,8 @@
goto out_free_units;
dev_info(&op->dev, "Found %s RNG, units: %d\n",
- ((np->flags & N2RNG_FLAG_VF) ?
- "Victoria Falls" : "Niagara2"),
+ ((np->flags & N2RNG_FLAG_MULTI) ?
+ "multi-unit-capable" : "single-unit"),
np->num_units);
np->hwrng.name = "n2rng";
@@ -751,6 +751,11 @@
.compatible = "SUNW,vf-rng",
.data = (void *) 1,
},
+ {
+ .name = "random-number-generator",
+ .compatible = "SUNW,kt-rng",
+ .data = (void *) 1,
+ },
{},
};
MODULE_DEVICE_TABLE(of, n2rng_match);
diff --git a/drivers/char/hw_random/n2rng.h b/drivers/char/hw_random/n2rng.h
index 4bea07f..f244ac8 100644
--- a/drivers/char/hw_random/n2rng.h
+++ b/drivers/char/hw_random/n2rng.h
@@ -68,7 +68,7 @@
struct platform_device *op;
unsigned long flags;
-#define N2RNG_FLAG_VF 0x00000001 /* Victoria Falls RNG, else N2 */
+#define N2RNG_FLAG_MULTI 0x00000001 /* Multi-unit capable RNG */
#define N2RNG_FLAG_CONTROL 0x00000002 /* Operating in control domain */
#define N2RNG_FLAG_READY 0x00000008 /* Ready for hw-rng layer */
#define N2RNG_FLAG_SHUTDOWN 0x00000010 /* Driver unregistering */
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index 7beb0e2..caf8012 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -534,6 +534,7 @@
struct duration_t *duration_cap;
ssize_t rc;
u32 timeout;
+ unsigned int scale = 1;
tpm_cmd.header.in = tpm_getcap_header;
tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
@@ -545,24 +546,30 @@
if (rc)
goto duration;
- if (be32_to_cpu(tpm_cmd.header.out.length)
- != 4 * sizeof(u32))
- goto duration;
+ if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 ||
+ be32_to_cpu(tpm_cmd.header.out.length)
+ != sizeof(tpm_cmd.header.out) + sizeof(u32) + 4 * sizeof(u32))
+ return;
timeout_cap = &tpm_cmd.params.getcap_out.cap.timeout;
/* Don't overwrite default if value is 0 */
timeout = be32_to_cpu(timeout_cap->a);
+ if (timeout && timeout < 1000) {
+ /* timeouts in msec rather usec */
+ scale = 1000;
+ chip->vendor.timeout_adjusted = true;
+ }
if (timeout)
- chip->vendor.timeout_a = usecs_to_jiffies(timeout);
+ chip->vendor.timeout_a = usecs_to_jiffies(timeout * scale);
timeout = be32_to_cpu(timeout_cap->b);
if (timeout)
- chip->vendor.timeout_b = usecs_to_jiffies(timeout);
+ chip->vendor.timeout_b = usecs_to_jiffies(timeout * scale);
timeout = be32_to_cpu(timeout_cap->c);
if (timeout)
- chip->vendor.timeout_c = usecs_to_jiffies(timeout);
+ chip->vendor.timeout_c = usecs_to_jiffies(timeout * scale);
timeout = be32_to_cpu(timeout_cap->d);
if (timeout)
- chip->vendor.timeout_d = usecs_to_jiffies(timeout);
+ chip->vendor.timeout_d = usecs_to_jiffies(timeout * scale);
duration:
tpm_cmd.header.in = tpm_getcap_header;
@@ -575,23 +582,31 @@
if (rc)
return;
- if (be32_to_cpu(tpm_cmd.header.out.return_code)
- != 3 * sizeof(u32))
+ if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 ||
+ be32_to_cpu(tpm_cmd.header.out.length)
+ != sizeof(tpm_cmd.header.out) + sizeof(u32) + 3 * sizeof(u32))
return;
+
duration_cap = &tpm_cmd.params.getcap_out.cap.duration;
chip->vendor.duration[TPM_SHORT] =
usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_short));
- /* The Broadcom BCM0102 chipset in a Dell Latitude D820 gets the above
- * value wrong and apparently reports msecs rather than usecs. So we
- * fix up the resulting too-small TPM_SHORT value to make things work.
- */
- if (chip->vendor.duration[TPM_SHORT] < (HZ/100))
- chip->vendor.duration[TPM_SHORT] = HZ;
-
chip->vendor.duration[TPM_MEDIUM] =
usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_medium));
chip->vendor.duration[TPM_LONG] =
usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_long));
+
+ /* The Broadcom BCM0102 chipset in a Dell Latitude D820 gets the above
+ * value wrong and apparently reports msecs rather than usecs. So we
+ * fix up the resulting too-small TPM_SHORT value to make things work.
+ * We also scale the TPM_MEDIUM and -_LONG values by 1000.
+ */
+ if (chip->vendor.duration[TPM_SHORT] < (HZ / 100)) {
+ chip->vendor.duration[TPM_SHORT] = HZ;
+ chip->vendor.duration[TPM_MEDIUM] *= 1000;
+ chip->vendor.duration[TPM_LONG] *= 1000;
+ chip->vendor.duration_adjusted = true;
+ dev_info(chip->dev, "Adjusting TPM timeout parameters.");
+ }
}
EXPORT_SYMBOL_GPL(tpm_get_timeouts);
@@ -600,7 +615,7 @@
u8 data[] = {
0, 193, /* TPM_TAG_RQU_COMMAND */
0, 0, 0, 10, /* length */
- 0, 0, 0, 83, /* TPM_ORD_GetCapability */
+ 0, 0, 0, 83, /* TPM_ORD_ContinueSelfTest */
};
tpm_transmit(chip, data, sizeof(data));
@@ -863,18 +878,24 @@
data = tpm_cmd.params.readpubek_out_buffer;
str +=
sprintf(str,
- "Algorithm: %02X %02X %02X %02X\nEncscheme: %02X %02X\n"
- "Sigscheme: %02X %02X\nParameters: %02X %02X %02X %02X"
- " %02X %02X %02X %02X %02X %02X %02X %02X\n"
- "Modulus length: %d\nModulus: \n",
- data[10], data[11], data[12], data[13], data[14],
- data[15], data[16], data[17], data[22], data[23],
- data[24], data[25], data[26], data[27], data[28],
- data[29], data[30], data[31], data[32], data[33],
- be32_to_cpu(*((__be32 *) (data + 34))));
+ "Algorithm: %02X %02X %02X %02X\n"
+ "Encscheme: %02X %02X\n"
+ "Sigscheme: %02X %02X\n"
+ "Parameters: %02X %02X %02X %02X "
+ "%02X %02X %02X %02X "
+ "%02X %02X %02X %02X\n"
+ "Modulus length: %d\n"
+ "Modulus:\n",
+ data[0], data[1], data[2], data[3],
+ data[4], data[5],
+ data[6], data[7],
+ data[12], data[13], data[14], data[15],
+ data[16], data[17], data[18], data[19],
+ data[20], data[21], data[22], data[23],
+ be32_to_cpu(*((__be32 *) (data + 24))));
for (i = 0; i < 256; i++) {
- str += sprintf(str, "%02X ", data[i + 38]);
+ str += sprintf(str, "%02X ", data[i + 28]);
if ((i + 1) % 16 == 0)
str += sprintf(str, "\n");
}
@@ -937,6 +958,35 @@
}
EXPORT_SYMBOL_GPL(tpm_show_caps_1_2);
+ssize_t tpm_show_durations(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct tpm_chip *chip = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d %d %d [%s]\n",
+ jiffies_to_usecs(chip->vendor.duration[TPM_SHORT]),
+ jiffies_to_usecs(chip->vendor.duration[TPM_MEDIUM]),
+ jiffies_to_usecs(chip->vendor.duration[TPM_LONG]),
+ chip->vendor.duration_adjusted
+ ? "adjusted" : "original");
+}
+EXPORT_SYMBOL_GPL(tpm_show_durations);
+
+ssize_t tpm_show_timeouts(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct tpm_chip *chip = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d %d %d %d [%s]\n",
+ jiffies_to_usecs(chip->vendor.timeout_a),
+ jiffies_to_usecs(chip->vendor.timeout_b),
+ jiffies_to_usecs(chip->vendor.timeout_c),
+ jiffies_to_usecs(chip->vendor.timeout_d),
+ chip->vendor.timeout_adjusted
+ ? "adjusted" : "original");
+}
+EXPORT_SYMBOL_GPL(tpm_show_timeouts);
+
ssize_t tpm_store_cancel(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 72ddb03..9c4163c 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -56,6 +56,10 @@
char *);
extern ssize_t tpm_show_temp_deactivated(struct device *,
struct device_attribute *attr, char *);
+extern ssize_t tpm_show_durations(struct device *,
+ struct device_attribute *attr, char *);
+extern ssize_t tpm_show_timeouts(struct device *,
+ struct device_attribute *attr, char *);
struct tpm_chip;
@@ -67,6 +71,7 @@
unsigned long base; /* TPM base address */
int irq;
+ int probed_irq;
int region_size;
int have_region;
@@ -81,7 +86,9 @@
struct list_head list;
int locality;
unsigned long timeout_a, timeout_b, timeout_c, timeout_d; /* jiffies */
+ bool timeout_adjusted;
unsigned long duration[3]; /* jiffies */
+ bool duration_adjusted;
wait_queue_head_t read_queue;
wait_queue_head_t int_queue;
diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c
index a605cb7..82facc9 100644
--- a/drivers/char/tpm/tpm_nsc.c
+++ b/drivers/char/tpm/tpm_nsc.c
@@ -330,12 +330,12 @@
pdev->dev.driver = &nsc_drv.driver;
pdev->dev.release = tpm_nsc_remove;
- if ((rc = platform_device_register(pdev)) < 0)
- goto err_free_dev;
+ if ((rc = platform_device_add(pdev)) < 0)
+ goto err_put_dev;
if (request_region(base, 2, "tpm_nsc0") == NULL ) {
rc = -EBUSY;
- goto err_unreg_dev;
+ goto err_del_dev;
}
if (!(chip = tpm_register_hardware(&pdev->dev, &tpm_nsc))) {
@@ -382,10 +382,10 @@
err_rel_reg:
release_region(base, 2);
-err_unreg_dev:
- platform_device_unregister(pdev);
-err_free_dev:
- kfree(pdev);
+err_del_dev:
+ platform_device_del(pdev);
+err_put_dev:
+ platform_device_put(pdev);
err_unreg_drv:
platform_driver_unregister(&nsc_drv);
return rc;
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index dd21df5..7fc2f10 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -26,6 +26,7 @@
#include <linux/interrupt.h>
#include <linux/wait.h>
#include <linux/acpi.h>
+#include <linux/freezer.h>
#include "tpm.h"
#define TPM_HEADER_SIZE 10
@@ -79,7 +80,7 @@
static LIST_HEAD(tis_chips);
static DEFINE_SPINLOCK(tis_lock);
-#ifdef CONFIG_ACPI
+#ifdef CONFIG_PNP
static int is_itpm(struct pnp_dev *dev)
{
struct acpi_device *acpi = pnp_acpi_device(dev);
@@ -92,11 +93,6 @@
return 0;
}
-#else
-static int is_itpm(struct pnp_dev *dev)
-{
- return 0;
-}
#endif
static int check_locality(struct tpm_chip *chip, int l)
@@ -120,7 +116,7 @@
static int request_locality(struct tpm_chip *chip, int l)
{
- unsigned long stop;
+ unsigned long stop, timeout;
long rc;
if (check_locality(chip, l) >= 0)
@@ -129,17 +125,25 @@
iowrite8(TPM_ACCESS_REQUEST_USE,
chip->vendor.iobase + TPM_ACCESS(l));
+ stop = jiffies + chip->vendor.timeout_a;
+
if (chip->vendor.irq) {
+again:
+ timeout = stop - jiffies;
+ if ((long)timeout <= 0)
+ return -1;
rc = wait_event_interruptible_timeout(chip->vendor.int_queue,
(check_locality
(chip, l) >= 0),
- chip->vendor.timeout_a);
+ timeout);
if (rc > 0)
return l;
-
+ if (rc == -ERESTARTSYS && freezing(current)) {
+ clear_thread_flag(TIF_SIGPENDING);
+ goto again;
+ }
} else {
/* wait for burstcount */
- stop = jiffies + chip->vendor.timeout_a;
do {
if (check_locality(chip, l) >= 0)
return l;
@@ -196,15 +200,24 @@
if ((status & mask) == mask)
return 0;
+ stop = jiffies + timeout;
+
if (chip->vendor.irq) {
+again:
+ timeout = stop - jiffies;
+ if ((long)timeout <= 0)
+ return -ETIME;
rc = wait_event_interruptible_timeout(*queue,
((tpm_tis_status
(chip) & mask) ==
mask), timeout);
if (rc > 0)
return 0;
+ if (rc == -ERESTARTSYS && freezing(current)) {
+ clear_thread_flag(TIF_SIGPENDING);
+ goto again;
+ }
} else {
- stop = jiffies + timeout;
do {
msleep(TPM_TIMEOUT);
status = tpm_tis_status(chip);
@@ -288,11 +301,10 @@
* tpm.c can skip polling for the data to be available as the interrupt is
* waited for here
*/
-static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len)
+static int tpm_tis_send_data(struct tpm_chip *chip, u8 *buf, size_t len)
{
int rc, status, burstcnt;
size_t count = 0;
- u32 ordinal;
if (request_locality(chip, 0) < 0)
return -EBUSY;
@@ -327,8 +339,7 @@
/* write last byte */
iowrite8(buf[count],
- chip->vendor.iobase +
- TPM_DATA_FIFO(chip->vendor.locality));
+ chip->vendor.iobase + TPM_DATA_FIFO(chip->vendor.locality));
wait_for_stat(chip, TPM_STS_VALID, chip->vendor.timeout_c,
&chip->vendor.int_queue);
status = tpm_tis_status(chip);
@@ -337,6 +348,28 @@
goto out_err;
}
+ return 0;
+
+out_err:
+ tpm_tis_ready(chip);
+ release_locality(chip, chip->vendor.locality, 0);
+ return rc;
+}
+
+/*
+ * If interrupts are used (signaled by an irq set in the vendor structure)
+ * tpm.c can skip polling for the data to be available as the interrupt is
+ * waited for here
+ */
+static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len)
+{
+ int rc;
+ u32 ordinal;
+
+ rc = tpm_tis_send_data(chip, buf, len);
+ if (rc < 0)
+ return rc;
+
/* go and do it */
iowrite8(TPM_STS_GO,
chip->vendor.iobase + TPM_STS(chip->vendor.locality));
@@ -358,6 +391,47 @@
return rc;
}
+/*
+ * Early probing for iTPM with STS_DATA_EXPECT flaw.
+ * Try sending command without itpm flag set and if that
+ * fails, repeat with itpm flag set.
+ */
+static int probe_itpm(struct tpm_chip *chip)
+{
+ int rc = 0;
+ u8 cmd_getticks[] = {
+ 0x00, 0xc1, 0x00, 0x00, 0x00, 0x0a,
+ 0x00, 0x00, 0x00, 0xf1
+ };
+ size_t len = sizeof(cmd_getticks);
+ int rem_itpm = itpm;
+
+ itpm = 0;
+
+ rc = tpm_tis_send_data(chip, cmd_getticks, len);
+ if (rc == 0)
+ goto out;
+
+ tpm_tis_ready(chip);
+ release_locality(chip, chip->vendor.locality, 0);
+
+ itpm = 1;
+
+ rc = tpm_tis_send_data(chip, cmd_getticks, len);
+ if (rc == 0) {
+ dev_info(chip->dev, "Detected an iTPM.\n");
+ rc = 1;
+ } else
+ rc = -EFAULT;
+
+out:
+ itpm = rem_itpm;
+ tpm_tis_ready(chip);
+ release_locality(chip, chip->vendor.locality, 0);
+
+ return rc;
+}
+
static const struct file_operations tis_ops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
@@ -376,6 +450,8 @@
NULL);
static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
+static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
+static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
static struct attribute *tis_attrs[] = {
&dev_attr_pubek.attr,
@@ -385,7 +461,9 @@
&dev_attr_owned.attr,
&dev_attr_temp_deactivated.attr,
&dev_attr_caps.attr,
- &dev_attr_cancel.attr, NULL,
+ &dev_attr_cancel.attr,
+ &dev_attr_durations.attr,
+ &dev_attr_timeouts.attr, NULL,
};
static struct attribute_group tis_attr_grp = {
@@ -416,7 +494,7 @@
if (interrupt == 0)
return IRQ_NONE;
- chip->vendor.irq = irq;
+ chip->vendor.probed_irq = irq;
/* Clear interrupts handled with TPM_EOI */
iowrite32(interrupt,
@@ -464,7 +542,7 @@
resource_size_t len, unsigned int irq)
{
u32 vendor, intfcaps, intmask;
- int rc, i;
+ int rc, i, irq_s, irq_e;
struct tpm_chip *chip;
if (!(chip = tpm_register_hardware(dev, &tpm_tis)))
@@ -493,6 +571,14 @@
"1.2 TPM (device-id 0x%X, rev-id %d)\n",
vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0)));
+ if (!itpm) {
+ itpm = probe_itpm(chip);
+ if (itpm < 0) {
+ rc = -ENODEV;
+ goto out_err;
+ }
+ }
+
if (itpm)
dev_info(dev, "Intel iTPM workaround enabled\n");
@@ -522,6 +608,9 @@
if (intfcaps & TPM_INTF_DATA_AVAIL_INT)
dev_dbg(dev, "\tData Avail Int Support\n");
+ /* get the timeouts before testing for irqs */
+ tpm_get_timeouts(chip);
+
/* INTERRUPT Setup */
init_waitqueue_head(&chip->vendor.read_queue);
init_waitqueue_head(&chip->vendor.int_queue);
@@ -540,13 +629,19 @@
if (interrupts)
chip->vendor.irq = irq;
if (interrupts && !chip->vendor.irq) {
- chip->vendor.irq =
+ irq_s =
ioread8(chip->vendor.iobase +
TPM_INT_VECTOR(chip->vendor.locality));
+ if (irq_s) {
+ irq_e = irq_s;
+ } else {
+ irq_s = 3;
+ irq_e = 15;
+ }
- for (i = 3; i < 16 && chip->vendor.irq == 0; i++) {
+ for (i = irq_s; i <= irq_e && chip->vendor.irq == 0; i++) {
iowrite8(i, chip->vendor.iobase +
- TPM_INT_VECTOR(chip->vendor.locality));
+ TPM_INT_VECTOR(chip->vendor.locality));
if (request_irq
(i, tis_int_probe, IRQF_SHARED,
chip->vendor.miscdev.name, chip) != 0) {
@@ -568,9 +663,22 @@
chip->vendor.iobase +
TPM_INT_ENABLE(chip->vendor.locality));
+ chip->vendor.probed_irq = 0;
+
/* Generate Interrupts */
tpm_gen_interrupt(chip);
+ chip->vendor.irq = chip->vendor.probed_irq;
+
+ /* free_irq will call into tis_int_probe;
+ clear all irqs we haven't seen while doing
+ tpm_gen_interrupt */
+ iowrite32(ioread32
+ (chip->vendor.iobase +
+ TPM_INT_STATUS(chip->vendor.locality)),
+ chip->vendor.iobase +
+ TPM_INT_STATUS(chip->vendor.locality));
+
/* Turn off */
iowrite32(intmask,
chip->vendor.iobase +
@@ -609,7 +717,6 @@
list_add(&chip->vendor.list, &tis_chips);
spin_unlock(&tis_lock);
- tpm_get_timeouts(chip);
tpm_continue_selftest(chip);
return 0;
@@ -619,6 +726,29 @@
tpm_remove_hardware(chip->dev);
return rc;
}
+
+static void tpm_tis_reenable_interrupts(struct tpm_chip *chip)
+{
+ u32 intmask;
+
+ /* reenable interrupts that device may have lost or
+ BIOS/firmware may have disabled */
+ iowrite8(chip->vendor.irq, chip->vendor.iobase +
+ TPM_INT_VECTOR(chip->vendor.locality));
+
+ intmask =
+ ioread32(chip->vendor.iobase +
+ TPM_INT_ENABLE(chip->vendor.locality));
+
+ intmask |= TPM_INTF_CMD_READY_INT
+ | TPM_INTF_LOCALITY_CHANGE_INT | TPM_INTF_DATA_AVAIL_INT
+ | TPM_INTF_STS_VALID_INT | TPM_GLOBAL_INT_ENABLE;
+
+ iowrite32(intmask,
+ chip->vendor.iobase + TPM_INT_ENABLE(chip->vendor.locality));
+}
+
+
#ifdef CONFIG_PNP
static int __devinit tpm_tis_pnp_init(struct pnp_dev *pnp_dev,
const struct pnp_device_id *pnp_id)
@@ -650,6 +780,9 @@
struct tpm_chip *chip = pnp_get_drvdata(dev);
int ret;
+ if (chip->vendor.irq)
+ tpm_tis_reenable_interrupts(chip);
+
ret = tpm_pm_resume(&dev->dev);
if (!ret)
tpm_continue_selftest(chip);
@@ -702,6 +835,11 @@
static int tpm_tis_resume(struct platform_device *dev)
{
+ struct tpm_chip *chip = dev_get_drvdata(&dev->dev);
+
+ if (chip->vendor.irq)
+ tpm_tis_reenable_interrupts(chip);
+
return tpm_pm_resume(&dev->dev);
}
static struct platform_driver tis_drv = {
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index 2e5b204..d0183dd 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -1,6 +1,6 @@
/* n2_core.c: Niagara2 Stream Processing Unit (SPU) crypto support.
*
- * Copyright (C) 2010 David S. Miller <davem@davemloft.net>
+ * Copyright (C) 2010, 2011 David S. Miller <davem@davemloft.net>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -31,8 +31,8 @@
#include "n2_core.h"
#define DRV_MODULE_NAME "n2_crypto"
-#define DRV_MODULE_VERSION "0.1"
-#define DRV_MODULE_RELDATE "April 29, 2010"
+#define DRV_MODULE_VERSION "0.2"
+#define DRV_MODULE_RELDATE "July 28, 2011"
static char version[] __devinitdata =
DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
@@ -1823,22 +1823,17 @@
static int __devinit get_irq_props(struct mdesc_handle *mdesc, u64 node,
struct spu_mdesc_info *ip)
{
- const u64 *intr, *ino;
- int intr_len, ino_len;
+ const u64 *ino;
+ int ino_len;
int i;
- intr = mdesc_get_property(mdesc, node, "intr", &intr_len);
- if (!intr)
- return -ENODEV;
-
ino = mdesc_get_property(mdesc, node, "ino", &ino_len);
- if (!ino)
+ if (!ino) {
+ printk("NO 'ino'\n");
return -ENODEV;
+ }
- if (intr_len != ino_len)
- return -EINVAL;
-
- ip->num_intrs = intr_len / sizeof(u64);
+ ip->num_intrs = ino_len / sizeof(u64);
ip->ino_table = kzalloc((sizeof(struct ino_blob) *
ip->num_intrs),
GFP_KERNEL);
@@ -1847,7 +1842,7 @@
for (i = 0; i < ip->num_intrs; i++) {
struct ino_blob *b = &ip->ino_table[i];
- b->intr = intr[i];
+ b->intr = i + 1;
b->ino = ino[i];
}
@@ -2204,6 +2199,10 @@
.name = "n2cp",
.compatible = "SUNW,vf-cwq",
},
+ {
+ .name = "n2cp",
+ .compatible = "SUNW,kt-cwq",
+ },
{},
};
@@ -2228,6 +2227,10 @@
.name = "ncp",
.compatible = "SUNW,vf-mau",
},
+ {
+ .name = "ncp",
+ .compatible = "SUNW,kt-mau",
+ },
{},
};
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 95a08a8..5745b7f 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -271,7 +271,7 @@
unsigned long edtl;
int err;
struct iser_data_buf *data_buf;
- struct iscsi_cmd *hdr = (struct iscsi_cmd *)task->hdr;
+ struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
struct scsi_cmnd *sc = task->sc;
struct iser_tx_desc *tx_desc = &iser_task->desc;
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 56abf3d..d728875 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -154,10 +154,13 @@
{ 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX },
{ 0x102c, 0xff0c, "Joytech Wireless Advanced Controller", 0, XTYPE_XBOX },
{ 0x12ab, 0x8809, "Xbox DDR dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
+ { 0x12ab, 0x0004, "Honey Bee Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
+ { 0x0e6f, 0x0105, "HSM3 Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
{ 0x1430, 0x4748, "RedOctane Guitar Hero X-plorer", 0, XTYPE_XBOX360 },
{ 0x1430, 0x8888, "TX6500+ Dance Pad (first generation)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
{ 0x146b, 0x0601, "BigBen Interactive XBOX 360 Controller", 0, XTYPE_XBOX360 },
{ 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 },
+ { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 },
{ 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
{ 0x0f0d, 0x0016, "Hori Real Arcade Pro.EX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
{ 0x0f0d, 0x000d, "Hori Fighting Stick EX2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
@@ -236,9 +239,10 @@
XPAD_XBOX360_VENDOR(0x046d), /* Logitech X-Box 360 style controllers */
XPAD_XBOX360_VENDOR(0x0738), /* Mad Catz X-Box 360 controllers */
XPAD_XBOX360_VENDOR(0x0e6f), /* 0x0e6f X-Box 360 controllers */
+ XPAD_XBOX360_VENDOR(0x12ab), /* X-Box 360 dance pads */
XPAD_XBOX360_VENDOR(0x1430), /* RedOctane X-Box 360 controllers */
XPAD_XBOX360_VENDOR(0x146b), /* BigBen Interactive Controllers */
- XPAD_XBOX360_VENDOR(0x1bad), /* Rock Band Drums */
+ XPAD_XBOX360_VENDOR(0x1bad), /* Harminix Rock Band Guitar and Drums */
XPAD_XBOX360_VENDOR(0x0f0d), /* Hori Controllers */
{ }
};
@@ -545,7 +549,7 @@
struct usb_endpoint_descriptor *ep_irq_out;
int error;
- if (xpad->xtype != XTYPE_XBOX360 && xpad->xtype != XTYPE_XBOX)
+ if (xpad->xtype == XTYPE_UNKNOWN)
return 0;
xpad->odata = usb_alloc_coherent(xpad->udev, XPAD_PKT_LEN,
@@ -579,13 +583,13 @@
static void xpad_stop_output(struct usb_xpad *xpad)
{
- if (xpad->xtype == XTYPE_XBOX360 || xpad->xtype == XTYPE_XBOX)
+ if (xpad->xtype != XTYPE_UNKNOWN)
usb_kill_urb(xpad->irq_out);
}
static void xpad_deinit_output(struct usb_xpad *xpad)
{
- if (xpad->xtype == XTYPE_XBOX360 || xpad->xtype == XTYPE_XBOX) {
+ if (xpad->xtype != XTYPE_UNKNOWN) {
usb_free_urb(xpad->irq_out);
usb_free_coherent(xpad->udev, XPAD_PKT_LEN,
xpad->odata, xpad->odata_dma);
@@ -632,6 +636,23 @@
return usb_submit_urb(xpad->irq_out, GFP_ATOMIC);
+ case XTYPE_XBOX360W:
+ xpad->odata[0] = 0x00;
+ xpad->odata[1] = 0x01;
+ xpad->odata[2] = 0x0F;
+ xpad->odata[3] = 0xC0;
+ xpad->odata[4] = 0x00;
+ xpad->odata[5] = strong / 256;
+ xpad->odata[6] = weak / 256;
+ xpad->odata[7] = 0x00;
+ xpad->odata[8] = 0x00;
+ xpad->odata[9] = 0x00;
+ xpad->odata[10] = 0x00;
+ xpad->odata[11] = 0x00;
+ xpad->irq_out->transfer_buffer_length = 12;
+
+ return usb_submit_urb(xpad->irq_out, GFP_ATOMIC);
+
default:
dbg("%s - rumble command sent to unsupported xpad type: %d",
__func__, xpad->xtype);
@@ -644,7 +665,7 @@
static int xpad_init_ff(struct usb_xpad *xpad)
{
- if (xpad->xtype != XTYPE_XBOX360 && xpad->xtype != XTYPE_XBOX)
+ if (xpad->xtype == XTYPE_UNKNOWN)
return 0;
input_set_capability(xpad->dev, EV_FF, FF_RUMBLE);
diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c
index af45d27..7b404e5 100644
--- a/drivers/input/keyboard/adp5588-keys.c
+++ b/drivers/input/keyboard/adp5588-keys.c
@@ -9,7 +9,6 @@
*/
#include <linux/module.h>
-#include <linux/version.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
diff --git a/drivers/input/keyboard/adp5589-keys.c b/drivers/input/keyboard/adp5589-keys.c
index 6315986..c770826 100644
--- a/drivers/input/keyboard/adp5589-keys.c
+++ b/drivers/input/keyboard/adp5589-keys.c
@@ -8,7 +8,6 @@
*/
#include <linux/module.h>
-#include <linux/version.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 11478eb..19cfc0c 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -1578,14 +1578,14 @@
atkbd_platform_fixup = atkbd_apply_forced_release_keylist;
atkbd_platform_fixup_data = id->driver_data;
- return 0;
+ return 1;
}
static int __init atkbd_setup_scancode_fixup(const struct dmi_system_id *id)
{
atkbd_platform_scancode_fixup = id->driver_data;
- return 0;
+ return 1;
}
static const struct dmi_system_id atkbd_dmi_quirk_table[] __initconst = {
diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index 6e6145b..ce281d1 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -2,6 +2,7 @@
* Driver for keys on GPIO lines capable of generating interrupts.
*
* Copyright 2005 Phil Blundell
+ * Copyright 2010, 2011 David Jander <david@protonic.nl>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -25,6 +26,8 @@
#include <linux/gpio_keys.h>
#include <linux/workqueue.h>
#include <linux/gpio.h>
+#include <linux/of_platform.h>
+#include <linux/of_gpio.h>
struct gpio_button_data {
struct gpio_keys_button *button;
@@ -415,7 +418,7 @@
if (!button->can_disable)
irqflags |= IRQF_SHARED;
- error = request_any_context_irq(irq, gpio_keys_isr, irqflags, desc, bdata);
+ error = request_threaded_irq(irq, NULL, gpio_keys_isr, irqflags, desc, bdata);
if (error < 0) {
dev_err(dev, "Unable to claim irq %d; error %d\n",
irq, error);
@@ -445,15 +448,120 @@
ddata->disable(input->dev.parent);
}
+/*
+ * Handlers for alternative sources of platform_data
+ */
+#ifdef CONFIG_OF
+/*
+ * Translate OpenFirmware node properties into platform_data
+ */
+static int gpio_keys_get_devtree_pdata(struct device *dev,
+ struct gpio_keys_platform_data *pdata)
+{
+ struct device_node *node, *pp;
+ int i;
+ struct gpio_keys_button *buttons;
+ const u32 *reg;
+ int len;
+
+ node = dev->of_node;
+ if (node == NULL)
+ return -ENODEV;
+
+ memset(pdata, 0, sizeof *pdata);
+
+ pdata->rep = !!of_get_property(node, "autorepeat", &len);
+
+ /* First count the subnodes */
+ pdata->nbuttons = 0;
+ pp = NULL;
+ while ((pp = of_get_next_child(node, pp)))
+ pdata->nbuttons++;
+
+ if (pdata->nbuttons == 0)
+ return -ENODEV;
+
+ buttons = kzalloc(pdata->nbuttons * (sizeof *buttons), GFP_KERNEL);
+ if (!buttons)
+ return -ENODEV;
+
+ pp = NULL;
+ i = 0;
+ while ((pp = of_get_next_child(node, pp))) {
+ enum of_gpio_flags flags;
+
+ if (!of_find_property(pp, "gpios", NULL)) {
+ pdata->nbuttons--;
+ dev_warn(dev, "Found button without gpios\n");
+ continue;
+ }
+ buttons[i].gpio = of_get_gpio_flags(pp, 0, &flags);
+ buttons[i].active_low = flags & OF_GPIO_ACTIVE_LOW;
+
+ reg = of_get_property(pp, "linux,code", &len);
+ if (!reg) {
+ dev_err(dev, "Button without keycode: 0x%x\n", buttons[i].gpio);
+ goto out_fail;
+ }
+ buttons[i].code = be32_to_cpup(reg);
+
+ buttons[i].desc = of_get_property(pp, "label", &len);
+
+ reg = of_get_property(pp, "linux,input-type", &len);
+ buttons[i].type = reg ? be32_to_cpup(reg) : EV_KEY;
+
+ buttons[i].wakeup = !!of_get_property(pp, "gpio-key,wakeup", NULL);
+
+ reg = of_get_property(pp, "debounce-interval", &len);
+ buttons[i].debounce_interval = reg ? be32_to_cpup(reg) : 5;
+
+ i++;
+ }
+
+ pdata->buttons = buttons;
+
+ return 0;
+
+out_fail:
+ kfree(buttons);
+ return -ENODEV;
+}
+
+static struct of_device_id gpio_keys_of_match[] = {
+ { .compatible = "gpio-keys", },
+ { },
+};
+MODULE_DEVICE_TABLE(of, gpio_keys_of_match);
+
+#else
+
+static int gpio_keys_get_devtree_pdata(struct device *dev,
+ struct gpio_keys_platform_data *altp)
+{
+ return -ENODEV;
+}
+
+#define gpio_keys_of_match NULL
+
+#endif
+
static int __devinit gpio_keys_probe(struct platform_device *pdev)
{
struct gpio_keys_platform_data *pdata = pdev->dev.platform_data;
struct gpio_keys_drvdata *ddata;
struct device *dev = &pdev->dev;
+ struct gpio_keys_platform_data alt_pdata;
struct input_dev *input;
int i, error;
int wakeup = 0;
+ if (!pdata) {
+ error = gpio_keys_get_devtree_pdata(dev, &alt_pdata);
+ if (error)
+ return error;
+ pdata = &alt_pdata;
+ }
+
ddata = kzalloc(sizeof(struct gpio_keys_drvdata) +
pdata->nbuttons * sizeof(struct gpio_button_data),
GFP_KERNEL);
@@ -544,13 +652,15 @@
fail1:
input_free_device(input);
kfree(ddata);
+ /* If we have no platform_data, we allocated buttons dynamically. */
+ if (!pdev->dev.platform_data)
+ kfree(pdata->buttons);
return error;
}
static int __devexit gpio_keys_remove(struct platform_device *pdev)
{
- struct gpio_keys_platform_data *pdata = pdev->dev.platform_data;
struct gpio_keys_drvdata *ddata = platform_get_drvdata(pdev);
struct input_dev *input = ddata->input;
int i;
@@ -559,31 +669,39 @@
device_init_wakeup(&pdev->dev, 0);
- for (i = 0; i < pdata->nbuttons; i++) {
- int irq = gpio_to_irq(pdata->buttons[i].gpio);
+ for (i = 0; i < ddata->n_buttons; i++) {
+ int irq = gpio_to_irq(ddata->data[i].button->gpio);
free_irq(irq, &ddata->data[i]);
if (ddata->data[i].timer_debounce)
del_timer_sync(&ddata->data[i].timer);
cancel_work_sync(&ddata->data[i].work);
- gpio_free(pdata->buttons[i].gpio);
+ gpio_free(ddata->data[i].button->gpio);
}
input_unregister_device(input);
+ /*
+ * If we had no platform_data, we allocated buttons dynamically, and
+ * must free them here. ddata->data[0].button is the pointer to the
+ * beginning of the allocated array.
+ */
+ if (!pdev->dev.platform_data)
+ kfree(ddata->data[0].button);
+
+ kfree(ddata);
+
return 0;
}
-
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
static int gpio_keys_suspend(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct gpio_keys_platform_data *pdata = pdev->dev.platform_data;
+ struct gpio_keys_drvdata *ddata = dev_get_drvdata(dev);
int i;
- if (device_may_wakeup(&pdev->dev)) {
- for (i = 0; i < pdata->nbuttons; i++) {
- struct gpio_keys_button *button = &pdata->buttons[i];
+ if (device_may_wakeup(dev)) {
+ for (i = 0; i < ddata->n_buttons; i++) {
+ struct gpio_keys_button *button = ddata->data[i].button;
if (button->wakeup) {
int irq = gpio_to_irq(button->gpio);
enable_irq_wake(irq);
@@ -596,15 +714,13 @@
static int gpio_keys_resume(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct gpio_keys_drvdata *ddata = platform_get_drvdata(pdev);
- struct gpio_keys_platform_data *pdata = pdev->dev.platform_data;
+ struct gpio_keys_drvdata *ddata = dev_get_drvdata(dev);
int i;
- for (i = 0; i < pdata->nbuttons; i++) {
+ for (i = 0; i < ddata->n_buttons; i++) {
- struct gpio_keys_button *button = &pdata->buttons[i];
- if (button->wakeup && device_may_wakeup(&pdev->dev)) {
+ struct gpio_keys_button *button = ddata->data[i].button;
+ if (button->wakeup && device_may_wakeup(dev)) {
int irq = gpio_to_irq(button->gpio);
disable_irq_wake(irq);
}
@@ -615,22 +731,18 @@
return 0;
}
-
-static const struct dev_pm_ops gpio_keys_pm_ops = {
- .suspend = gpio_keys_suspend,
- .resume = gpio_keys_resume,
-};
#endif
+static SIMPLE_DEV_PM_OPS(gpio_keys_pm_ops, gpio_keys_suspend, gpio_keys_resume);
+
static struct platform_driver gpio_keys_device_driver = {
.probe = gpio_keys_probe,
.remove = __devexit_p(gpio_keys_remove),
.driver = {
.name = "gpio-keys",
.owner = THIS_MODULE,
-#ifdef CONFIG_PM
.pm = &gpio_keys_pm_ops,
-#endif
+ .of_match_table = gpio_keys_of_match,
}
};
@@ -644,10 +756,10 @@
platform_driver_unregister(&gpio_keys_device_driver);
}
-module_init(gpio_keys_init);
+late_initcall(gpio_keys_init);
module_exit(gpio_keys_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Phil Blundell <pb@handhelds.org>");
-MODULE_DESCRIPTION("Keyboard driver for CPU GPIOs");
+MODULE_DESCRIPTION("Keyboard driver for GPIOs");
MODULE_ALIAS("platform:gpio-keys");
diff --git a/drivers/input/keyboard/lm8323.c b/drivers/input/keyboard/lm8323.c
index 71f744a8..ab0acaf 100644
--- a/drivers/input/keyboard/lm8323.c
+++ b/drivers/input/keyboard/lm8323.c
@@ -146,7 +146,6 @@
/* device lock */
struct mutex lock;
struct i2c_client *client;
- struct work_struct work;
struct input_dev *idev;
bool kp_enabled;
bool pm_suspend;
@@ -162,7 +161,6 @@
#define client_to_lm8323(c) container_of(c, struct lm8323_chip, client)
#define dev_to_lm8323(d) container_of(d, struct lm8323_chip, client->dev)
-#define work_to_lm8323(w) container_of(w, struct lm8323_chip, work)
#define cdev_to_pwm(c) container_of(c, struct lm8323_pwm, cdev)
#define work_to_pwm(w) container_of(w, struct lm8323_pwm, work)
@@ -375,9 +373,9 @@
* Bottom half: handle the interrupt by posting key events, or dealing with
* errors appropriately.
*/
-static void lm8323_work(struct work_struct *work)
+static irqreturn_t lm8323_irq(int irq, void *_lm)
{
- struct lm8323_chip *lm = work_to_lm8323(work);
+ struct lm8323_chip *lm = _lm;
u8 ints;
int i;
@@ -409,16 +407,6 @@
}
mutex_unlock(&lm->lock);
-}
-
-/*
- * We cannot use I2C in interrupt context, so we just schedule work.
- */
-static irqreturn_t lm8323_irq(int irq, void *data)
-{
- struct lm8323_chip *lm = data;
-
- schedule_work(&lm->work);
return IRQ_HANDLED;
}
@@ -675,7 +663,6 @@
lm->client = client;
lm->idev = idev;
mutex_init(&lm->lock);
- INIT_WORK(&lm->work, lm8323_work);
lm->size_x = pdata->size_x;
lm->size_y = pdata->size_y;
@@ -746,9 +733,8 @@
goto fail3;
}
- err = request_irq(client->irq, lm8323_irq,
- IRQF_TRIGGER_FALLING | IRQF_DISABLED,
- "lm8323", lm);
+ err = request_threaded_irq(client->irq, NULL, lm8323_irq,
+ IRQF_TRIGGER_LOW|IRQF_ONESHOT, "lm8323", lm);
if (err) {
dev_err(&client->dev, "could not get IRQ %d\n", client->irq);
goto fail4;
@@ -783,7 +769,6 @@
disable_irq_wake(client->irq);
free_irq(client->irq, lm);
- cancel_work_sync(&lm->work);
input_unregister_device(lm->idev);
diff --git a/drivers/input/keyboard/mpr121_touchkey.c b/drivers/input/keyboard/mpr121_touchkey.c
index 0a9e811..1c1615d 100644
--- a/drivers/input/keyboard/mpr121_touchkey.c
+++ b/drivers/input/keyboard/mpr121_touchkey.c
@@ -43,14 +43,15 @@
* enabled capacitance sensing inputs and its run/suspend mode.
*/
#define ELECTRODE_CONF_ADDR 0x5e
+#define ELECTRODE_CONF_QUICK_CHARGE 0x80
#define AUTO_CONFIG_CTRL_ADDR 0x7b
#define AUTO_CONFIG_USL_ADDR 0x7d
#define AUTO_CONFIG_LSL_ADDR 0x7e
#define AUTO_CONFIG_TL_ADDR 0x7f
/* Threshold of touch/release trigger */
-#define TOUCH_THRESHOLD 0x0f
-#define RELEASE_THRESHOLD 0x0a
+#define TOUCH_THRESHOLD 0x08
+#define RELEASE_THRESHOLD 0x05
/* Masks for touch and release triggers */
#define TOUCH_STATUS_MASK 0xfff
/* MPR121 has 12 keys */
@@ -127,7 +128,7 @@
struct i2c_client *client)
{
const struct mpr121_init_register *reg;
- unsigned char usl, lsl, tl;
+ unsigned char usl, lsl, tl, eleconf;
int i, t, vdd, ret;
/* Set up touch/release threshold for ele0-ele11 */
@@ -163,8 +164,15 @@
ret = i2c_smbus_write_byte_data(client, AUTO_CONFIG_USL_ADDR, usl);
ret |= i2c_smbus_write_byte_data(client, AUTO_CONFIG_LSL_ADDR, lsl);
ret |= i2c_smbus_write_byte_data(client, AUTO_CONFIG_TL_ADDR, tl);
+
+ /*
+ * Quick charge bit will let the capacitive charge to ready
+ * state quickly, or the buttons may not function after system
+ * boot.
+ */
+ eleconf = mpr121->keycount | ELECTRODE_CONF_QUICK_CHARGE;
ret |= i2c_smbus_write_byte_data(client, ELECTRODE_CONF_ADDR,
- mpr121->keycount);
+ eleconf);
if (ret != 0)
goto err_i2c_write;
diff --git a/drivers/input/keyboard/pmic8xxx-keypad.c b/drivers/input/keyboard/pmic8xxx-keypad.c
index 6229c3e..e7cc51d 100644
--- a/drivers/input/keyboard/pmic8xxx-keypad.c
+++ b/drivers/input/keyboard/pmic8xxx-keypad.c
@@ -700,9 +700,9 @@
return 0;
err_pmic_reg_read:
- free_irq(kp->key_stuck_irq, NULL);
+ free_irq(kp->key_stuck_irq, kp);
err_req_stuck_irq:
- free_irq(kp->key_sense_irq, NULL);
+ free_irq(kp->key_sense_irq, kp);
err_gpio_config:
err_get_irq:
input_free_device(kp->input);
@@ -717,8 +717,8 @@
struct pmic8xxx_kp *kp = platform_get_drvdata(pdev);
device_init_wakeup(&pdev->dev, 0);
- free_irq(kp->key_stuck_irq, NULL);
- free_irq(kp->key_sense_irq, NULL);
+ free_irq(kp->key_stuck_irq, kp);
+ free_irq(kp->key_sense_irq, kp);
input_unregister_device(kp->input);
kfree(kp);
diff --git a/drivers/input/keyboard/qt1070.c b/drivers/input/keyboard/qt1070.c
index ca7b891..b21bf5b 100644
--- a/drivers/input/keyboard/qt1070.c
+++ b/drivers/input/keyboard/qt1070.c
@@ -239,8 +239,6 @@
input_unregister_device(data->input);
kfree(data);
- i2c_set_clientdata(client, NULL);
-
return 0;
}
diff --git a/drivers/input/keyboard/sh_keysc.c b/drivers/input/keyboard/sh_keysc.c
index 6876700..934aeb583 100644
--- a/drivers/input/keyboard/sh_keysc.c
+++ b/drivers/input/keyboard/sh_keysc.c
@@ -291,7 +291,7 @@
return 0;
}
-#if CONFIG_PM_SLEEP
+#ifdef CONFIG_PM_SLEEP
static int sh_keysc_suspend(struct device *dev)
{
struct platform_device *pdev = to_platform_device(dev);
diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c
index 2b3b73e..da3828f 100644
--- a/drivers/input/keyboard/tegra-kbc.c
+++ b/drivers/input/keyboard/tegra-kbc.c
@@ -657,7 +657,7 @@
input_set_drvdata(input_dev, kbc);
- input_dev->evbit[0] = BIT_MASK(EV_KEY);
+ input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
input_set_capability(input_dev, EV_MSC, MSC_SCAN);
input_dev->keycode = kbc->keycode;
diff --git a/drivers/input/keyboard/tnetv107x-keypad.c b/drivers/input/keyboard/tnetv107x-keypad.c
index c8f097a..1c58681 100644
--- a/drivers/input/keyboard/tnetv107x-keypad.c
+++ b/drivers/input/keyboard/tnetv107x-keypad.c
@@ -337,5 +337,5 @@
MODULE_AUTHOR("Cyril Chemparathy");
MODULE_DESCRIPTION("TNETV107X Keypad Driver");
-MODULE_ALIAS("platform: tnetv107x-keypad");
+MODULE_ALIAS("platform:tnetv107x-keypad");
MODULE_LICENSE("GPL");
diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index d1bf872..c9104bb 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -100,6 +100,27 @@
To compile this driver as a module, choose M here: the module
will be called max8925_onkey.
+config INPUT_MMA8450
+ tristate "MMA8450 - Freescale's 3-Axis, 8/12-bit Digital Accelerometer"
+ depends on I2C
+ select INPUT_POLLDEV
+ help
+ Say Y here if you want to support Freescale's MMA8450 Accelerometer
+ through I2C interface.
+
+ To compile this driver as a module, choose M here: the
+ module will be called mma8450.
+
+config INPUT_MPU3050
+ tristate "MPU3050 Triaxial gyroscope sensor"
+ depends on I2C
+ help
+ Say Y here if you want to support InvenSense MPU3050
+ connected via an I2C bus.
+
+ To compile this driver as a module, choose M here: the
+ module will be called mpu3050.
+
config INPUT_APANEL
tristate "Fujitsu Lifebook Application Panel buttons"
depends on X86 && I2C && LEDS_CLASS
@@ -209,6 +230,23 @@
To compile this driver as a module, choose M here: the module will
be called keyspan_remote.
+config INPUT_KXTJ9
+ tristate "Kionix KXTJ9 tri-axis digital accelerometer"
+ depends on I2C
+ help
+ Say Y here to enable support for the Kionix KXTJ9 digital tri-axis
+ accelerometer.
+
+ To compile this driver as a module, choose M here: the module will
+ be called kxtj9.
+
+config INPUT_KXTJ9_POLLED_MODE
+ bool "Enable polling mode support"
+ depends on INPUT_KXTJ9
+ select INPUT_POLLDEV
+ help
+ Say Y here if you need accelerometer to work in polling mode.
+
config INPUT_POWERMATE
tristate "Griffin PowerMate and Contour Jog support"
depends on USB_ARCH_HAS_HCD
diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile
index 4da7c3a..299ad5e 100644
--- a/drivers/input/misc/Makefile
+++ b/drivers/input/misc/Makefile
@@ -25,8 +25,11 @@
obj-$(CONFIG_HP_SDC_RTC) += hp_sdc_rtc.o
obj-$(CONFIG_INPUT_IXP4XX_BEEPER) += ixp4xx-beeper.o
obj-$(CONFIG_INPUT_KEYSPAN_REMOTE) += keyspan_remote.o
+obj-$(CONFIG_INPUT_KXTJ9) += kxtj9.o
obj-$(CONFIG_INPUT_M68K_BEEP) += m68kspkr.o
obj-$(CONFIG_INPUT_MAX8925_ONKEY) += max8925_onkey.o
+obj-$(CONFIG_INPUT_MMA8450) += mma8450.o
+obj-$(CONFIG_INPUT_MPU3050) += mpu3050.o
obj-$(CONFIG_INPUT_PCAP) += pcap_keys.o
obj-$(CONFIG_INPUT_PCF50633_PMU) += pcf50633-input.o
obj-$(CONFIG_INPUT_PCF8574) += pcf8574_keypad.o
@@ -46,4 +49,3 @@
obj-$(CONFIG_INPUT_WM831X_ON) += wm831x-on.o
obj-$(CONFIG_INPUT_XEN_KBDDEV_FRONTEND) += xen-kbdfront.o
obj-$(CONFIG_INPUT_YEALINK) += yealink.o
-
diff --git a/drivers/input/misc/bfin_rotary.c b/drivers/input/misc/bfin_rotary.c
index 4f72bdd..d00edc9 100644
--- a/drivers/input/misc/bfin_rotary.c
+++ b/drivers/input/misc/bfin_rotary.c
@@ -6,7 +6,6 @@
*/
#include <linux/module.h>
-#include <linux/version.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
diff --git a/drivers/input/misc/kxtj9.c b/drivers/input/misc/kxtj9.c
new file mode 100644
index 0000000..c456f63
--- /dev/null
+++ b/drivers/input/misc/kxtj9.c
@@ -0,0 +1,671 @@
+/*
+ * Copyright (C) 2011 Kionix, Inc.
+ * Written by Chris Hudson <chudson@kionix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307, USA
+ */
+
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/input/kxtj9.h>
+#include <linux/input-polldev.h>
+
+#define NAME "kxtj9"
+#define G_MAX 8000
+/* OUTPUT REGISTERS */
+#define XOUT_L 0x06
+#define WHO_AM_I 0x0F
+/* CONTROL REGISTERS */
+#define INT_REL 0x1A
+#define CTRL_REG1 0x1B
+#define INT_CTRL1 0x1E
+#define DATA_CTRL 0x21
+/* CONTROL REGISTER 1 BITS */
+#define PC1_OFF 0x7F
+#define PC1_ON (1 << 7)
+/* Data ready funtion enable bit: set during probe if using irq mode */
+#define DRDYE (1 << 5)
+/* INTERRUPT CONTROL REGISTER 1 BITS */
+/* Set these during probe if using irq mode */
+#define KXTJ9_IEL (1 << 3)
+#define KXTJ9_IEA (1 << 4)
+#define KXTJ9_IEN (1 << 5)
+/* INPUT_ABS CONSTANTS */
+#define FUZZ 3
+#define FLAT 3
+/* RESUME STATE INDICES */
+#define RES_DATA_CTRL 0
+#define RES_CTRL_REG1 1
+#define RES_INT_CTRL1 2
+#define RESUME_ENTRIES 3
+
+/*
+ * The following table lists the maximum appropriate poll interval for each
+ * available output data rate.
+ */
+static const struct {
+ unsigned int cutoff;
+ u8 mask;
+} kxtj9_odr_table[] = {
+ { 3, ODR800F },
+ { 5, ODR400F },
+ { 10, ODR200F },
+ { 20, ODR100F },
+ { 40, ODR50F },
+ { 80, ODR25F },
+ { 0, ODR12_5F},
+};
+
+struct kxtj9_data {
+ struct i2c_client *client;
+ struct kxtj9_platform_data pdata;
+ struct input_dev *input_dev;
+#ifdef CONFIG_INPUT_KXTJ9_POLLED_MODE
+ struct input_polled_dev *poll_dev;
+#endif
+ unsigned int last_poll_interval;
+ u8 shift;
+ u8 ctrl_reg1;
+ u8 data_ctrl;
+ u8 int_ctrl;
+};
+
+static int kxtj9_i2c_read(struct kxtj9_data *tj9, u8 addr, u8 *data, int len)
+{
+ struct i2c_msg msgs[] = {
+ {
+ .addr = tj9->client->addr,
+ .flags = tj9->client->flags,
+ .len = 1,
+ .buf = &addr,
+ },
+ {
+ .addr = tj9->client->addr,
+ .flags = tj9->client->flags | I2C_M_RD,
+ .len = len,
+ .buf = data,
+ },
+ };
+
+ return i2c_transfer(tj9->client->adapter, msgs, 2);
+}
+
+static void kxtj9_report_acceleration_data(struct kxtj9_data *tj9)
+{
+ s16 acc_data[3]; /* Data bytes from hardware xL, xH, yL, yH, zL, zH */
+ s16 x, y, z;
+ int err;
+
+ err = kxtj9_i2c_read(tj9, XOUT_L, (u8 *)acc_data, 6);
+ if (err < 0)
+ dev_err(&tj9->client->dev, "accelerometer data read failed\n");
+
+ x = le16_to_cpu(acc_data[tj9->pdata.axis_map_x]) >> tj9->shift;
+ y = le16_to_cpu(acc_data[tj9->pdata.axis_map_y]) >> tj9->shift;
+ z = le16_to_cpu(acc_data[tj9->pdata.axis_map_z]) >> tj9->shift;
+
+ input_report_abs(tj9->input_dev, ABS_X, tj9->pdata.negate_x ? -x : x);
+ input_report_abs(tj9->input_dev, ABS_Y, tj9->pdata.negate_y ? -y : y);
+ input_report_abs(tj9->input_dev, ABS_Z, tj9->pdata.negate_z ? -z : z);
+ input_sync(tj9->input_dev);
+}
+
+static irqreturn_t kxtj9_isr(int irq, void *dev)
+{
+ struct kxtj9_data *tj9 = dev;
+ int err;
+
+ /* data ready is the only possible interrupt type */
+ kxtj9_report_acceleration_data(tj9);
+
+ err = i2c_smbus_read_byte_data(tj9->client, INT_REL);
+ if (err < 0)
+ dev_err(&tj9->client->dev,
+ "error clearing interrupt status: %d\n", err);
+
+ return IRQ_HANDLED;
+}
+
+static int kxtj9_update_g_range(struct kxtj9_data *tj9, u8 new_g_range)
+{
+ switch (new_g_range) {
+ case KXTJ9_G_2G:
+ tj9->shift = 4;
+ break;
+ case KXTJ9_G_4G:
+ tj9->shift = 3;
+ break;
+ case KXTJ9_G_8G:
+ tj9->shift = 2;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ tj9->ctrl_reg1 &= 0xe7;
+ tj9->ctrl_reg1 |= new_g_range;
+
+ return 0;
+}
+
+static int kxtj9_update_odr(struct kxtj9_data *tj9, unsigned int poll_interval)
+{
+ int err;
+ int i;
+
+ /* Use the lowest ODR that can support the requested poll interval */
+ for (i = 0; i < ARRAY_SIZE(kxtj9_odr_table); i++) {
+ tj9->data_ctrl = kxtj9_odr_table[i].mask;
+ if (poll_interval < kxtj9_odr_table[i].cutoff)
+ break;
+ }
+
+ err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, 0);
+ if (err < 0)
+ return err;
+
+ err = i2c_smbus_write_byte_data(tj9->client, DATA_CTRL, tj9->data_ctrl);
+ if (err < 0)
+ return err;
+
+ err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, tj9->ctrl_reg1);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static int kxtj9_device_power_on(struct kxtj9_data *tj9)
+{
+ if (tj9->pdata.power_on)
+ return tj9->pdata.power_on();
+
+ return 0;
+}
+
+static void kxtj9_device_power_off(struct kxtj9_data *tj9)
+{
+ int err;
+
+ tj9->ctrl_reg1 &= PC1_OFF;
+ err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, tj9->ctrl_reg1);
+ if (err < 0)
+ dev_err(&tj9->client->dev, "soft power off failed\n");
+
+ if (tj9->pdata.power_off)
+ tj9->pdata.power_off();
+}
+
+static int kxtj9_enable(struct kxtj9_data *tj9)
+{
+ int err;
+
+ err = kxtj9_device_power_on(tj9);
+ if (err < 0)
+ return err;
+
+ /* ensure that PC1 is cleared before updating control registers */
+ err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, 0);
+ if (err < 0)
+ return err;
+
+ /* only write INT_CTRL_REG1 if in irq mode */
+ if (tj9->client->irq) {
+ err = i2c_smbus_write_byte_data(tj9->client,
+ INT_CTRL1, tj9->int_ctrl);
+ if (err < 0)
+ return err;
+ }
+
+ err = kxtj9_update_g_range(tj9, tj9->pdata.g_range);
+ if (err < 0)
+ return err;
+
+ /* turn on outputs */
+ tj9->ctrl_reg1 |= PC1_ON;
+ err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, tj9->ctrl_reg1);
+ if (err < 0)
+ return err;
+
+ err = kxtj9_update_odr(tj9, tj9->last_poll_interval);
+ if (err < 0)
+ return err;
+
+ /* clear initial interrupt if in irq mode */
+ if (tj9->client->irq) {
+ err = i2c_smbus_read_byte_data(tj9->client, INT_REL);
+ if (err < 0) {
+ dev_err(&tj9->client->dev,
+ "error clearing interrupt: %d\n", err);
+ goto fail;
+ }
+ }
+
+ return 0;
+
+fail:
+ kxtj9_device_power_off(tj9);
+ return err;
+}
+
+static void kxtj9_disable(struct kxtj9_data *tj9)
+{
+ kxtj9_device_power_off(tj9);
+}
+
+static int kxtj9_input_open(struct input_dev *input)
+{
+ struct kxtj9_data *tj9 = input_get_drvdata(input);
+
+ return kxtj9_enable(tj9);
+}
+
+static void kxtj9_input_close(struct input_dev *dev)
+{
+ struct kxtj9_data *tj9 = input_get_drvdata(dev);
+
+ kxtj9_disable(tj9);
+}
+
+static void __devinit kxtj9_init_input_device(struct kxtj9_data *tj9,
+ struct input_dev *input_dev)
+{
+ __set_bit(EV_ABS, input_dev->evbit);
+ input_set_abs_params(input_dev, ABS_X, -G_MAX, G_MAX, FUZZ, FLAT);
+ input_set_abs_params(input_dev, ABS_Y, -G_MAX, G_MAX, FUZZ, FLAT);
+ input_set_abs_params(input_dev, ABS_Z, -G_MAX, G_MAX, FUZZ, FLAT);
+
+ input_dev->name = "kxtj9_accel";
+ input_dev->id.bustype = BUS_I2C;
+ input_dev->dev.parent = &tj9->client->dev;
+}
+
+static int __devinit kxtj9_setup_input_device(struct kxtj9_data *tj9)
+{
+ struct input_dev *input_dev;
+ int err;
+
+ input_dev = input_allocate_device();
+ if (!input_dev) {
+ dev_err(&tj9->client->dev, "input device allocate failed\n");
+ return -ENOMEM;
+ }
+
+ tj9->input_dev = input_dev;
+
+ input_dev->open = kxtj9_input_open;
+ input_dev->close = kxtj9_input_close;
+ input_set_drvdata(input_dev, tj9);
+
+ kxtj9_init_input_device(tj9, input_dev);
+
+ err = input_register_device(tj9->input_dev);
+ if (err) {
+ dev_err(&tj9->client->dev,
+ "unable to register input polled device %s: %d\n",
+ tj9->input_dev->name, err);
+ input_free_device(tj9->input_dev);
+ return err;
+ }
+
+ return 0;
+}
+
+/*
+ * When IRQ mode is selected, we need to provide an interface to allow the user
+ * to change the output data rate of the part. For consistency, we are using
+ * the set_poll method, which accepts a poll interval in milliseconds, and then
+ * calls update_odr() while passing this value as an argument. In IRQ mode, the
+ * data outputs will not be read AT the requested poll interval, rather, the
+ * lowest ODR that can support the requested interval. The client application
+ * will be responsible for retrieving data from the input node at the desired
+ * interval.
+ */
+
+/* Returns currently selected poll interval (in ms) */
+static ssize_t kxtj9_get_poll(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct kxtj9_data *tj9 = i2c_get_clientdata(client);
+
+ return sprintf(buf, "%d\n", tj9->last_poll_interval);
+}
+
+/* Allow users to select a new poll interval (in ms) */
+static ssize_t kxtj9_set_poll(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct kxtj9_data *tj9 = i2c_get_clientdata(client);
+ struct input_dev *input_dev = tj9->input_dev;
+ unsigned int interval;
+ int error;
+
+ error = kstrtouint(buf, 10, &interval);
+ if (error < 0)
+ return error;
+
+ /* Lock the device to prevent races with open/close (and itself) */
+ mutex_lock(&input_dev->mutex);
+
+ disable_irq(client->irq);
+
+ /*
+ * Set current interval to the greater of the minimum interval or
+ * the requested interval
+ */
+ tj9->last_poll_interval = max(interval, tj9->pdata.min_interval);
+
+ kxtj9_update_odr(tj9, tj9->last_poll_interval);
+
+ enable_irq(client->irq);
+ mutex_unlock(&input_dev->mutex);
+
+ return count;
+}
+
+static DEVICE_ATTR(poll, S_IRUGO|S_IWUSR, kxtj9_get_poll, kxtj9_set_poll);
+
+static struct attribute *kxtj9_attributes[] = {
+ &dev_attr_poll.attr,
+ NULL
+};
+
+static struct attribute_group kxtj9_attribute_group = {
+ .attrs = kxtj9_attributes
+};
+
+
+#ifdef CONFIG_INPUT_KXTJ9_POLLED_MODE
+static void kxtj9_poll(struct input_polled_dev *dev)
+{
+ struct kxtj9_data *tj9 = dev->private;
+ unsigned int poll_interval = dev->poll_interval;
+
+ kxtj9_report_acceleration_data(tj9);
+
+ if (poll_interval != tj9->last_poll_interval) {
+ kxtj9_update_odr(tj9, poll_interval);
+ tj9->last_poll_interval = poll_interval;
+ }
+}
+
+static void kxtj9_polled_input_open(struct input_polled_dev *dev)
+{
+ struct kxtj9_data *tj9 = dev->private;
+
+ kxtj9_enable(tj9);
+}
+
+static void kxtj9_polled_input_close(struct input_polled_dev *dev)
+{
+ struct kxtj9_data *tj9 = dev->private;
+
+ kxtj9_disable(tj9);
+}
+
+static int __devinit kxtj9_setup_polled_device(struct kxtj9_data *tj9)
+{
+ int err;
+ struct input_polled_dev *poll_dev;
+ poll_dev = input_allocate_polled_device();
+
+ if (!poll_dev) {
+ dev_err(&tj9->client->dev,
+ "Failed to allocate polled device\n");
+ return -ENOMEM;
+ }
+
+ tj9->poll_dev = poll_dev;
+ tj9->input_dev = poll_dev->input;
+
+ poll_dev->private = tj9;
+ poll_dev->poll = kxtj9_poll;
+ poll_dev->open = kxtj9_polled_input_open;
+ poll_dev->close = kxtj9_polled_input_close;
+
+ kxtj9_init_input_device(tj9, poll_dev->input);
+
+ err = input_register_polled_device(poll_dev);
+ if (err) {
+ dev_err(&tj9->client->dev,
+ "Unable to register polled device, err=%d\n", err);
+ input_free_polled_device(poll_dev);
+ return err;
+ }
+
+ return 0;
+}
+
+static void __devexit kxtj9_teardown_polled_device(struct kxtj9_data *tj9)
+{
+ input_unregister_polled_device(tj9->poll_dev);
+ input_free_polled_device(tj9->poll_dev);
+}
+
+#else
+
+static inline int kxtj9_setup_polled_device(struct kxtj9_data *tj9)
+{
+ return -ENOSYS;
+}
+
+static inline void kxtj9_teardown_polled_device(struct kxtj9_data *tj9)
+{
+}
+
+#endif
+
+static int __devinit kxtj9_verify(struct kxtj9_data *tj9)
+{
+ int retval;
+
+ retval = kxtj9_device_power_on(tj9);
+ if (retval < 0)
+ return retval;
+
+ retval = i2c_smbus_read_byte_data(tj9->client, WHO_AM_I);
+ if (retval < 0) {
+ dev_err(&tj9->client->dev, "read err int source\n");
+ goto out;
+ }
+
+ retval = retval != 0x06 ? -EIO : 0;
+
+out:
+ kxtj9_device_power_off(tj9);
+ return retval;
+}
+
+static int __devinit kxtj9_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ const struct kxtj9_platform_data *pdata = client->dev.platform_data;
+ struct kxtj9_data *tj9;
+ int err;
+
+ if (!i2c_check_functionality(client->adapter,
+ I2C_FUNC_I2C | I2C_FUNC_SMBUS_BYTE_DATA)) {
+ dev_err(&client->dev, "client is not i2c capable\n");
+ return -ENXIO;
+ }
+
+ if (!pdata) {
+ dev_err(&client->dev, "platform data is NULL; exiting\n");
+ return -EINVAL;
+ }
+
+ tj9 = kzalloc(sizeof(*tj9), GFP_KERNEL);
+ if (!tj9) {
+ dev_err(&client->dev,
+ "failed to allocate memory for module data\n");
+ return -ENOMEM;
+ }
+
+ tj9->client = client;
+ tj9->pdata = *pdata;
+
+ if (pdata->init) {
+ err = pdata->init();
+ if (err < 0)
+ goto err_free_mem;
+ }
+
+ err = kxtj9_verify(tj9);
+ if (err < 0) {
+ dev_err(&client->dev, "device not recognized\n");
+ goto err_pdata_exit;
+ }
+
+ i2c_set_clientdata(client, tj9);
+
+ tj9->ctrl_reg1 = tj9->pdata.res_12bit | tj9->pdata.g_range;
+ tj9->data_ctrl = tj9->pdata.data_odr_init;
+
+ if (client->irq) {
+ /* If in irq mode, populate INT_CTRL_REG1 and enable DRDY. */
+ tj9->int_ctrl |= KXTJ9_IEN | KXTJ9_IEA | KXTJ9_IEL;
+ tj9->ctrl_reg1 |= DRDYE;
+
+ err = kxtj9_setup_input_device(tj9);
+ if (err)
+ goto err_pdata_exit;
+
+ err = request_threaded_irq(client->irq, NULL, kxtj9_isr,
+ IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+ "kxtj9-irq", tj9);
+ if (err) {
+ dev_err(&client->dev, "request irq failed: %d\n", err);
+ goto err_destroy_input;
+ }
+
+ err = sysfs_create_group(&client->dev.kobj, &kxtj9_attribute_group);
+ if (err) {
+ dev_err(&client->dev, "sysfs create failed: %d\n", err);
+ goto err_free_irq;
+ }
+
+ } else {
+ err = kxtj9_setup_polled_device(tj9);
+ if (err)
+ goto err_pdata_exit;
+ }
+
+ return 0;
+
+err_free_irq:
+ free_irq(client->irq, tj9);
+err_destroy_input:
+ input_unregister_device(tj9->input_dev);
+err_pdata_exit:
+ if (tj9->pdata.exit)
+ tj9->pdata.exit();
+err_free_mem:
+ kfree(tj9);
+ return err;
+}
+
+static int __devexit kxtj9_remove(struct i2c_client *client)
+{
+ struct kxtj9_data *tj9 = i2c_get_clientdata(client);
+
+ if (client->irq) {
+ sysfs_remove_group(&client->dev.kobj, &kxtj9_attribute_group);
+ free_irq(client->irq, tj9);
+ input_unregister_device(tj9->input_dev);
+ } else {
+ kxtj9_teardown_polled_device(tj9);
+ }
+
+ if (tj9->pdata.exit)
+ tj9->pdata.exit();
+
+ kfree(tj9);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int kxtj9_suspend(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct kxtj9_data *tj9 = i2c_get_clientdata(client);
+ struct input_dev *input_dev = tj9->input_dev;
+
+ mutex_lock(&input_dev->mutex);
+
+ if (input_dev->users)
+ kxtj9_disable(tj9);
+
+ mutex_unlock(&input_dev->mutex);
+ return 0;
+}
+
+static int kxtj9_resume(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct kxtj9_data *tj9 = i2c_get_clientdata(client);
+ struct input_dev *input_dev = tj9->input_dev;
+ int retval = 0;
+
+ mutex_lock(&input_dev->mutex);
+
+ if (input_dev->users)
+ kxtj9_enable(tj9);
+
+ mutex_unlock(&input_dev->mutex);
+ return retval;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(kxtj9_pm_ops, kxtj9_suspend, kxtj9_resume);
+
+static const struct i2c_device_id kxtj9_id[] = {
+ { NAME, 0 },
+ { },
+};
+
+MODULE_DEVICE_TABLE(i2c, kxtj9_id);
+
+static struct i2c_driver kxtj9_driver = {
+ .driver = {
+ .name = NAME,
+ .owner = THIS_MODULE,
+ .pm = &kxtj9_pm_ops,
+ },
+ .probe = kxtj9_probe,
+ .remove = __devexit_p(kxtj9_remove),
+ .id_table = kxtj9_id,
+};
+
+static int __init kxtj9_init(void)
+{
+ return i2c_add_driver(&kxtj9_driver);
+}
+module_init(kxtj9_init);
+
+static void __exit kxtj9_exit(void)
+{
+ i2c_del_driver(&kxtj9_driver);
+}
+module_exit(kxtj9_exit);
+
+MODULE_DESCRIPTION("KXTJ9 accelerometer driver");
+MODULE_AUTHOR("Chris Hudson <chudson@kionix.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/misc/mma8450.c b/drivers/input/misc/mma8450.c
new file mode 100644
index 0000000..20f8f92
--- /dev/null
+++ b/drivers/input/misc/mma8450.c
@@ -0,0 +1,256 @@
+/*
+ * Driver for Freescale's 3-Axis Accelerometer MMA8450
+ *
+ * Copyright (C) 2011 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/input-polldev.h>
+
+#define MMA8450_DRV_NAME "mma8450"
+
+#define MODE_CHANGE_DELAY_MS 100
+#define POLL_INTERVAL 100
+#define POLL_INTERVAL_MAX 500
+
+/* register definitions */
+#define MMA8450_STATUS 0x00
+#define MMA8450_STATUS_ZXYDR 0x08
+
+#define MMA8450_OUT_X8 0x01
+#define MMA8450_OUT_Y8 0x02
+#define MMA8450_OUT_Z8 0x03
+
+#define MMA8450_OUT_X_LSB 0x05
+#define MMA8450_OUT_X_MSB 0x06
+#define MMA8450_OUT_Y_LSB 0x07
+#define MMA8450_OUT_Y_MSB 0x08
+#define MMA8450_OUT_Z_LSB 0x09
+#define MMA8450_OUT_Z_MSB 0x0a
+
+#define MMA8450_XYZ_DATA_CFG 0x16
+
+#define MMA8450_CTRL_REG1 0x38
+#define MMA8450_CTRL_REG2 0x39
+
+/* mma8450 status */
+struct mma8450 {
+ struct i2c_client *client;
+ struct input_polled_dev *idev;
+};
+
+static int mma8450_read(struct mma8450 *m, unsigned off)
+{
+ struct i2c_client *c = m->client;
+ int ret;
+
+ ret = i2c_smbus_read_byte_data(c, off);
+ if (ret < 0)
+ dev_err(&c->dev,
+ "failed to read register 0x%02x, error %d\n",
+ off, ret);
+
+ return ret;
+}
+
+static int mma8450_write(struct mma8450 *m, unsigned off, u8 v)
+{
+ struct i2c_client *c = m->client;
+ int error;
+
+ error = i2c_smbus_write_byte_data(c, off, v);
+ if (error < 0) {
+ dev_err(&c->dev,
+ "failed to write to register 0x%02x, error %d\n",
+ off, error);
+ return error;
+ }
+
+ return 0;
+}
+
+static int mma8450_read_xyz(struct mma8450 *m, int *x, int *y, int *z)
+{
+ struct i2c_client *c = m->client;
+ u8 buff[6];
+ int err;
+
+ err = i2c_smbus_read_i2c_block_data(c, MMA8450_OUT_X_LSB, 6, buff);
+ if (err < 0) {
+ dev_err(&c->dev,
+ "failed to read block data at 0x%02x, error %d\n",
+ MMA8450_OUT_X_LSB, err);
+ return err;
+ }
+
+ *x = ((buff[1] << 4) & 0xff0) | (buff[0] & 0xf);
+ *y = ((buff[3] << 4) & 0xff0) | (buff[2] & 0xf);
+ *z = ((buff[5] << 4) & 0xff0) | (buff[4] & 0xf);
+
+ return 0;
+}
+
+static void mma8450_poll(struct input_polled_dev *dev)
+{
+ struct mma8450 *m = dev->private;
+ int x, y, z;
+ int ret;
+ int err;
+
+ ret = mma8450_read(m, MMA8450_STATUS);
+ if (ret < 0)
+ return;
+
+ if (!(ret & MMA8450_STATUS_ZXYDR))
+ return;
+
+ err = mma8450_read_xyz(m, &x, &y, &z);
+ if (err)
+ return;
+
+ input_report_abs(dev->input, ABS_X, x);
+ input_report_abs(dev->input, ABS_Y, y);
+ input_report_abs(dev->input, ABS_Z, z);
+ input_sync(dev->input);
+}
+
+/* Initialize the MMA8450 chip */
+static void mma8450_open(struct input_polled_dev *dev)
+{
+ struct mma8450 *m = dev->private;
+ int err;
+
+ /* enable all events from X/Y/Z, no FIFO */
+ err = mma8450_write(m, MMA8450_XYZ_DATA_CFG, 0x07);
+ if (err)
+ return;
+
+ /*
+ * Sleep mode poll rate - 50Hz
+ * System output data rate - 400Hz
+ * Full scale selection - Active, +/- 2G
+ */
+ err = mma8450_write(m, MMA8450_CTRL_REG1, 0x01);
+ if (err < 0)
+ return;
+
+ msleep(MODE_CHANGE_DELAY_MS);
+}
+
+static void mma8450_close(struct input_polled_dev *dev)
+{
+ struct mma8450 *m = dev->private;
+
+ mma8450_write(m, MMA8450_CTRL_REG1, 0x00);
+ mma8450_write(m, MMA8450_CTRL_REG2, 0x01);
+}
+
+/*
+ * I2C init/probing/exit functions
+ */
+static int __devinit mma8450_probe(struct i2c_client *c,
+ const struct i2c_device_id *id)
+{
+ struct input_polled_dev *idev;
+ struct mma8450 *m;
+ int err;
+
+ m = kzalloc(sizeof(struct mma8450), GFP_KERNEL);
+ idev = input_allocate_polled_device();
+ if (!m || !idev) {
+ err = -ENOMEM;
+ goto err_free_mem;
+ }
+
+ m->client = c;
+ m->idev = idev;
+
+ idev->private = m;
+ idev->input->name = MMA8450_DRV_NAME;
+ idev->input->id.bustype = BUS_I2C;
+ idev->poll = mma8450_poll;
+ idev->poll_interval = POLL_INTERVAL;
+ idev->poll_interval_max = POLL_INTERVAL_MAX;
+ idev->open = mma8450_open;
+ idev->close = mma8450_close;
+
+ __set_bit(EV_ABS, idev->input->evbit);
+ input_set_abs_params(idev->input, ABS_X, -2048, 2047, 32, 32);
+ input_set_abs_params(idev->input, ABS_Y, -2048, 2047, 32, 32);
+ input_set_abs_params(idev->input, ABS_Z, -2048, 2047, 32, 32);
+
+ err = input_register_polled_device(idev);
+ if (err) {
+ dev_err(&c->dev, "failed to register polled input device\n");
+ goto err_free_mem;
+ }
+
+ return 0;
+
+err_free_mem:
+ input_free_polled_device(idev);
+ kfree(m);
+ return err;
+}
+
+static int __devexit mma8450_remove(struct i2c_client *c)
+{
+ struct mma8450 *m = i2c_get_clientdata(c);
+ struct input_polled_dev *idev = m->idev;
+
+ input_unregister_polled_device(idev);
+ input_free_polled_device(idev);
+ kfree(m);
+
+ return 0;
+}
+
+static const struct i2c_device_id mma8450_id[] = {
+ { MMA8450_DRV_NAME, 0 },
+ { },
+};
+MODULE_DEVICE_TABLE(i2c, mma8450_id);
+
+static struct i2c_driver mma8450_driver = {
+ .driver = {
+ .name = MMA8450_DRV_NAME,
+ .owner = THIS_MODULE,
+ },
+ .probe = mma8450_probe,
+ .remove = __devexit_p(mma8450_remove),
+ .id_table = mma8450_id,
+};
+
+static int __init mma8450_init(void)
+{
+ return i2c_add_driver(&mma8450_driver);
+}
+module_init(mma8450_init);
+
+static void __exit mma8450_exit(void)
+{
+ i2c_del_driver(&mma8450_driver);
+}
+module_exit(mma8450_exit);
+
+MODULE_AUTHOR("Freescale Semiconductor, Inc.");
+MODULE_DESCRIPTION("MMA8450 3-Axis Accelerometer Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/misc/mpu3050.c b/drivers/input/misc/mpu3050.c
new file mode 100644
index 0000000..b95fac1
--- /dev/null
+++ b/drivers/input/misc/mpu3050.c
@@ -0,0 +1,376 @@
+/*
+ * MPU3050 Tri-axis gyroscope driver
+ *
+ * Copyright (C) 2011 Wistron Co.Ltd
+ * Joseph Lai <joseph_lai@wistron.com>
+ *
+ * Trimmed down by Alan Cox <alan@linux.intel.com> to produce this version
+ *
+ * This is a 'lite' version of the driver, while we consider the right way
+ * to present the other features to user space. In particular it requires the
+ * device has an IRQ, and it only provides an input interface, so is not much
+ * use for device orientation. A fuller version is available from the Meego
+ * tree.
+ *
+ * This program is based on bma023.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+
+#define MPU3050_CHIP_ID_REG 0x00
+#define MPU3050_CHIP_ID 0x69
+#define MPU3050_XOUT_H 0x1D
+#define MPU3050_PWR_MGM 0x3E
+#define MPU3050_PWR_MGM_POS 6
+#define MPU3050_PWR_MGM_MASK 0x40
+
+#define MPU3050_AUTO_DELAY 1000
+
+#define MPU3050_MIN_VALUE -32768
+#define MPU3050_MAX_VALUE 32767
+
+struct axis_data {
+ s16 x;
+ s16 y;
+ s16 z;
+};
+
+struct mpu3050_sensor {
+ struct i2c_client *client;
+ struct device *dev;
+ struct input_dev *idev;
+};
+
+/**
+ * mpu3050_xyz_read_reg - read the axes values
+ * @buffer: provide register addr and get register
+ * @length: length of register
+ *
+ * Reads the register values in one transaction or returns a negative
+ * error code on failure.
+ */
+static int mpu3050_xyz_read_reg(struct i2c_client *client,
+ u8 *buffer, int length)
+{
+ /*
+ * Annoying we can't make this const because the i2c layer doesn't
+ * declare input buffers const.
+ */
+ char cmd = MPU3050_XOUT_H;
+ struct i2c_msg msg[] = {
+ {
+ .addr = client->addr,
+ .flags = 0,
+ .len = 1,
+ .buf = &cmd,
+ },
+ {
+ .addr = client->addr,
+ .flags = I2C_M_RD,
+ .len = length,
+ .buf = buffer,
+ },
+ };
+
+ return i2c_transfer(client->adapter, msg, 2);
+}
+
+/**
+ * mpu3050_read_xyz - get co-ordinates from device
+ * @client: i2c address of sensor
+ * @coords: co-ordinates to update
+ *
+ * Return the converted X Y and Z co-ordinates from the sensor device
+ */
+static void mpu3050_read_xyz(struct i2c_client *client,
+ struct axis_data *coords)
+{
+ u16 buffer[3];
+
+ mpu3050_xyz_read_reg(client, (u8 *)buffer, 6);
+ coords->x = be16_to_cpu(buffer[0]);
+ coords->y = be16_to_cpu(buffer[1]);
+ coords->z = be16_to_cpu(buffer[2]);
+ dev_dbg(&client->dev, "%s: x %d, y %d, z %d\n", __func__,
+ coords->x, coords->y, coords->z);
+}
+
+/**
+ * mpu3050_set_power_mode - set the power mode
+ * @client: i2c client for the sensor
+ * @val: value to switch on/off of power, 1: normal power, 0: low power
+ *
+ * Put device to normal-power mode or low-power mode.
+ */
+static void mpu3050_set_power_mode(struct i2c_client *client, u8 val)
+{
+ u8 value;
+
+ value = i2c_smbus_read_byte_data(client, MPU3050_PWR_MGM);
+ value = (value & ~MPU3050_PWR_MGM_MASK) |
+ (((val << MPU3050_PWR_MGM_POS) & MPU3050_PWR_MGM_MASK) ^
+ MPU3050_PWR_MGM_MASK);
+ i2c_smbus_write_byte_data(client, MPU3050_PWR_MGM, value);
+}
+
+/**
+ * mpu3050_input_open - called on input event open
+ * @input: input dev of opened device
+ *
+ * The input layer calls this function when input event is opened. The
+ * function will push the device to resume. Then, the device is ready
+ * to provide data.
+ */
+static int mpu3050_input_open(struct input_dev *input)
+{
+ struct mpu3050_sensor *sensor = input_get_drvdata(input);
+
+ pm_runtime_get(sensor->dev);
+
+ return 0;
+}
+
+/**
+ * mpu3050_input_close - called on input event close
+ * @input: input dev of closed device
+ *
+ * The input layer calls this function when input event is closed. The
+ * function will push the device to suspend.
+ */
+static void mpu3050_input_close(struct input_dev *input)
+{
+ struct mpu3050_sensor *sensor = input_get_drvdata(input);
+
+ pm_runtime_put(sensor->dev);
+}
+
+/**
+ * mpu3050_interrupt_thread - handle an IRQ
+ * @irq: interrupt numner
+ * @data: the sensor
+ *
+ * Called by the kernel single threaded after an interrupt occurs. Read
+ * the sensor data and generate an input event for it.
+ */
+static irqreturn_t mpu3050_interrupt_thread(int irq, void *data)
+{
+ struct mpu3050_sensor *sensor = data;
+ struct axis_data axis;
+
+ mpu3050_read_xyz(sensor->client, &axis);
+
+ input_report_abs(sensor->idev, ABS_X, axis.x);
+ input_report_abs(sensor->idev, ABS_Y, axis.y);
+ input_report_abs(sensor->idev, ABS_Z, axis.z);
+ input_sync(sensor->idev);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * mpu3050_probe - device detection callback
+ * @client: i2c client of found device
+ * @id: id match information
+ *
+ * The I2C layer calls us when it believes a sensor is present at this
+ * address. Probe to see if this is correct and to validate the device.
+ *
+ * If present install the relevant sysfs interfaces and input device.
+ */
+static int __devinit mpu3050_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct mpu3050_sensor *sensor;
+ struct input_dev *idev;
+ int ret;
+ int error;
+
+ sensor = kzalloc(sizeof(struct mpu3050_sensor), GFP_KERNEL);
+ idev = input_allocate_device();
+ if (!sensor || !idev) {
+ dev_err(&client->dev, "failed to allocate driver data\n");
+ error = -ENOMEM;
+ goto err_free_mem;
+ }
+
+ sensor->client = client;
+ sensor->dev = &client->dev;
+ sensor->idev = idev;
+
+ mpu3050_set_power_mode(client, 1);
+ msleep(10);
+
+ ret = i2c_smbus_read_byte_data(client, MPU3050_CHIP_ID_REG);
+ if (ret < 0) {
+ dev_err(&client->dev, "failed to detect device\n");
+ error = -ENXIO;
+ goto err_free_mem;
+ }
+
+ if (ret != MPU3050_CHIP_ID) {
+ dev_err(&client->dev, "unsupported chip id\n");
+ error = -ENXIO;
+ goto err_free_mem;
+ }
+
+ idev->name = "MPU3050";
+ idev->id.bustype = BUS_I2C;
+ idev->dev.parent = &client->dev;
+
+ idev->open = mpu3050_input_open;
+ idev->close = mpu3050_input_close;
+
+ __set_bit(EV_ABS, idev->evbit);
+ input_set_abs_params(idev, ABS_X,
+ MPU3050_MIN_VALUE, MPU3050_MAX_VALUE, 0, 0);
+ input_set_abs_params(idev, ABS_Y,
+ MPU3050_MIN_VALUE, MPU3050_MAX_VALUE, 0, 0);
+ input_set_abs_params(idev, ABS_Z,
+ MPU3050_MIN_VALUE, MPU3050_MAX_VALUE, 0, 0);
+
+ input_set_drvdata(idev, sensor);
+
+ pm_runtime_set_active(&client->dev);
+
+ error = request_threaded_irq(client->irq,
+ NULL, mpu3050_interrupt_thread,
+ IRQF_TRIGGER_RISING,
+ "mpu_int", sensor);
+ if (error) {
+ dev_err(&client->dev,
+ "can't get IRQ %d, error %d\n", client->irq, error);
+ goto err_pm_set_suspended;
+ }
+
+ error = input_register_device(idev);
+ if (error) {
+ dev_err(&client->dev, "failed to register input device\n");
+ goto err_free_irq;
+ }
+
+ pm_runtime_enable(&client->dev);
+ pm_runtime_set_autosuspend_delay(&client->dev, MPU3050_AUTO_DELAY);
+
+ return 0;
+
+err_free_irq:
+ free_irq(client->irq, sensor);
+err_pm_set_suspended:
+ pm_runtime_set_suspended(&client->dev);
+err_free_mem:
+ input_unregister_device(idev);
+ kfree(sensor);
+ return error;
+}
+
+/**
+ * mpu3050_remove - remove a sensor
+ * @client: i2c client of sensor being removed
+ *
+ * Our sensor is going away, clean up the resources.
+ */
+static int __devexit mpu3050_remove(struct i2c_client *client)
+{
+ struct mpu3050_sensor *sensor = i2c_get_clientdata(client);
+
+ pm_runtime_disable(&client->dev);
+ pm_runtime_set_suspended(&client->dev);
+
+ free_irq(client->irq, sensor);
+ input_unregister_device(sensor->idev);
+ kfree(sensor);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+/**
+ * mpu3050_suspend - called on device suspend
+ * @dev: device being suspended
+ *
+ * Put the device into sleep mode before we suspend the machine.
+ */
+static int mpu3050_suspend(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+
+ mpu3050_set_power_mode(client, 0);
+
+ return 0;
+}
+
+/**
+ * mpu3050_resume - called on device resume
+ * @dev: device being resumed
+ *
+ * Put the device into powered mode on resume.
+ */
+static int mpu3050_resume(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+
+ mpu3050_set_power_mode(client, 1);
+ msleep(100); /* wait for gyro chip resume */
+
+ return 0;
+}
+#endif
+
+static UNIVERSAL_DEV_PM_OPS(mpu3050_pm, mpu3050_suspend, mpu3050_resume, NULL);
+
+static const struct i2c_device_id mpu3050_ids[] = {
+ { "mpu3050", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, mpu3050_ids);
+
+static struct i2c_driver mpu3050_i2c_driver = {
+ .driver = {
+ .name = "mpu3050",
+ .owner = THIS_MODULE,
+ .pm = &mpu3050_pm,
+ },
+ .probe = mpu3050_probe,
+ .remove = __devexit_p(mpu3050_remove),
+ .id_table = mpu3050_ids,
+};
+
+static int __init mpu3050_init(void)
+{
+ return i2c_add_driver(&mpu3050_i2c_driver);
+}
+module_init(mpu3050_init);
+
+static void __exit mpu3050_exit(void)
+{
+ i2c_del_driver(&mpu3050_i2c_driver);
+}
+module_exit(mpu3050_exit);
+
+MODULE_AUTHOR("Wistron Corp.");
+MODULE_DESCRIPTION("MPU3050 Tri-axis gyroscope driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c
index 62bae99..ad2e51c 100644
--- a/drivers/input/misc/xen-kbdfront.c
+++ b/drivers/input/misc/xen-kbdfront.c
@@ -373,7 +373,7 @@
static int __init xenkbd_init(void)
{
- if (!xen_pv_domain())
+ if (!xen_domain())
return -ENODEV;
/* Nothing to do if running in dom0. */
diff --git a/drivers/input/mouse/gpio_mouse.c b/drivers/input/mouse/gpio_mouse.c
index 7b6ce17..58902fb 100644
--- a/drivers/input/mouse/gpio_mouse.c
+++ b/drivers/input/mouse/gpio_mouse.c
@@ -191,7 +191,7 @@
}
module_exit(gpio_mouse_exit);
-MODULE_AUTHOR("Hans-Christian Egtvedt <hcegtvedt@atmel.com>");
+MODULE_AUTHOR("Hans-Christian Egtvedt <egtvedt@samfundet.no>");
MODULE_DESCRIPTION("GPIO mouse driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:gpio_mouse"); /* work with hotplug and coldplug */
diff --git a/drivers/input/mouse/lifebook.c b/drivers/input/mouse/lifebook.c
index c31ad11..83bcaba 100644
--- a/drivers/input/mouse/lifebook.c
+++ b/drivers/input/mouse/lifebook.c
@@ -33,7 +33,7 @@
static int lifebook_limit_serio3(const struct dmi_system_id *d)
{
desired_serio_phys = "isa0060/serio3";
- return 0;
+ return 1;
}
static bool lifebook_use_6byte_proto;
@@ -41,7 +41,7 @@
static int lifebook_set_6byte_proto(const struct dmi_system_id *d)
{
lifebook_use_6byte_proto = true;
- return 0;
+ return 1;
}
static const struct dmi_system_id __initconst lifebook_dmi_table[] = {
diff --git a/drivers/input/mouse/pxa930_trkball.c b/drivers/input/mouse/pxa930_trkball.c
index 943cfec..6c5d84f 100644
--- a/drivers/input/mouse/pxa930_trkball.c
+++ b/drivers/input/mouse/pxa930_trkball.c
@@ -12,7 +12,6 @@
#include <linux/init.h>
#include <linux/input.h>
-#include <linux/version.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/platform_device.h>
diff --git a/drivers/input/mouse/sentelic.c b/drivers/input/mouse/sentelic.c
index 1242775..2fc887a 100644
--- a/drivers/input/mouse/sentelic.c
+++ b/drivers/input/mouse/sentelic.c
@@ -20,7 +20,6 @@
*/
#include <linux/module.h>
-#include <linux/version.h>
#include <linux/input.h>
#include <linux/ctype.h>
#include <linux/libps2.h>
diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index e06e045..5538fc6 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -207,27 +207,37 @@
static int synaptics_resolution(struct psmouse *psmouse)
{
struct synaptics_data *priv = psmouse->private;
- unsigned char res[3];
- unsigned char max[3];
+ unsigned char resp[3];
if (SYN_ID_MAJOR(priv->identity) < 4)
return 0;
- if (synaptics_send_cmd(psmouse, SYN_QUE_RESOLUTION, res) == 0) {
- if (res[0] != 0 && (res[1] & 0x80) && res[2] != 0) {
- priv->x_res = res[0]; /* x resolution in units/mm */
- priv->y_res = res[2]; /* y resolution in units/mm */
+ if (synaptics_send_cmd(psmouse, SYN_QUE_RESOLUTION, resp) == 0) {
+ if (resp[0] != 0 && (resp[1] & 0x80) && resp[2] != 0) {
+ priv->x_res = resp[0]; /* x resolution in units/mm */
+ priv->y_res = resp[2]; /* y resolution in units/mm */
}
}
if (SYN_EXT_CAP_REQUESTS(priv->capabilities) >= 5 &&
SYN_CAP_MAX_DIMENSIONS(priv->ext_cap_0c)) {
- if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_DIMENSIONS, max)) {
- printk(KERN_ERR "Synaptics claims to have dimensions query,"
- " but I'm not able to read it.\n");
+ if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_MAX_COORDS, resp)) {
+ printk(KERN_ERR "Synaptics claims to have max coordinates"
+ " query, but I'm not able to read it.\n");
} else {
- priv->x_max = (max[0] << 5) | ((max[1] & 0x0f) << 1);
- priv->y_max = (max[2] << 5) | ((max[1] & 0xf0) >> 3);
+ priv->x_max = (resp[0] << 5) | ((resp[1] & 0x0f) << 1);
+ priv->y_max = (resp[2] << 5) | ((resp[1] & 0xf0) >> 3);
+ }
+ }
+
+ if (SYN_EXT_CAP_REQUESTS(priv->capabilities) >= 7 &&
+ SYN_CAP_MIN_DIMENSIONS(priv->ext_cap_0c)) {
+ if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_MIN_COORDS, resp)) {
+ printk(KERN_ERR "Synaptics claims to have min coordinates"
+ " query, but I'm not able to read it.\n");
+ } else {
+ priv->x_min = (resp[0] << 5) | ((resp[1] & 0x0f) << 1);
+ priv->y_min = (resp[2] << 5) | ((resp[1] & 0xf0) >> 3);
}
}
@@ -406,26 +416,10 @@
memset(hw, 0, sizeof(struct synaptics_hw_state));
if (SYN_MODEL_NEWABS(priv->model_id)) {
- hw->x = (((buf[3] & 0x10) << 8) |
- ((buf[1] & 0x0f) << 8) |
- buf[4]);
- hw->y = (((buf[3] & 0x20) << 7) |
- ((buf[1] & 0xf0) << 4) |
- buf[5]);
-
- hw->z = buf[2];
hw->w = (((buf[0] & 0x30) >> 2) |
((buf[0] & 0x04) >> 1) |
((buf[3] & 0x04) >> 2));
- if (SYN_CAP_ADV_GESTURE(priv->ext_cap_0c) && hw->w == 2) {
- /* Gesture packet: (x, y, z) at half resolution */
- priv->mt.x = (((buf[4] & 0x0f) << 8) | buf[1]) << 1;
- priv->mt.y = (((buf[4] & 0xf0) << 4) | buf[2]) << 1;
- priv->mt.z = ((buf[3] & 0x30) | (buf[5] & 0x0f)) << 1;
- return 1;
- }
-
hw->left = (buf[0] & 0x01) ? 1 : 0;
hw->right = (buf[0] & 0x02) ? 1 : 0;
@@ -448,6 +442,22 @@
hw->down = ((buf[0] ^ buf[3]) & 0x02) ? 1 : 0;
}
+ if (SYN_CAP_ADV_GESTURE(priv->ext_cap_0c) && hw->w == 2) {
+ /* Gesture packet: (x, y, z) at half resolution */
+ priv->mt.x = (((buf[4] & 0x0f) << 8) | buf[1]) << 1;
+ priv->mt.y = (((buf[4] & 0xf0) << 4) | buf[2]) << 1;
+ priv->mt.z = ((buf[3] & 0x30) | (buf[5] & 0x0f)) << 1;
+ return 1;
+ }
+
+ hw->x = (((buf[3] & 0x10) << 8) |
+ ((buf[1] & 0x0f) << 8) |
+ buf[4]);
+ hw->y = (((buf[3] & 0x20) << 7) |
+ ((buf[1] & 0xf0) << 4) |
+ buf[5]);
+ hw->z = buf[2];
+
if (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) &&
((buf[0] ^ buf[3]) & 0x02)) {
switch (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) & ~0x01) {
@@ -485,7 +495,8 @@
return 0;
}
-static void set_slot(struct input_dev *dev, int slot, bool active, int x, int y)
+static void synaptics_report_semi_mt_slot(struct input_dev *dev, int slot,
+ bool active, int x, int y)
{
input_mt_slot(dev, slot);
input_mt_report_slot_state(dev, MT_TOOL_FINGER, active);
@@ -502,14 +513,16 @@
int num_fingers)
{
if (num_fingers >= 2) {
- set_slot(dev, 0, true, min(a->x, b->x), min(a->y, b->y));
- set_slot(dev, 1, true, max(a->x, b->x), max(a->y, b->y));
+ synaptics_report_semi_mt_slot(dev, 0, true, min(a->x, b->x),
+ min(a->y, b->y));
+ synaptics_report_semi_mt_slot(dev, 1, true, max(a->x, b->x),
+ max(a->y, b->y));
} else if (num_fingers == 1) {
- set_slot(dev, 0, true, a->x, a->y);
- set_slot(dev, 1, false, 0, 0);
+ synaptics_report_semi_mt_slot(dev, 0, true, a->x, a->y);
+ synaptics_report_semi_mt_slot(dev, 1, false, 0, 0);
} else {
- set_slot(dev, 0, false, 0, 0);
- set_slot(dev, 1, false, 0, 0);
+ synaptics_report_semi_mt_slot(dev, 0, false, 0, 0);
+ synaptics_report_semi_mt_slot(dev, 1, false, 0, 0);
}
}
@@ -684,23 +697,36 @@
static void set_input_params(struct input_dev *dev, struct synaptics_data *priv)
{
int i;
+ int fuzz = SYN_CAP_REDUCED_FILTERING(priv->ext_cap_0c) ?
+ SYN_REDUCED_FILTER_FUZZ : 0;
__set_bit(INPUT_PROP_POINTER, dev->propbit);
__set_bit(EV_ABS, dev->evbit);
input_set_abs_params(dev, ABS_X,
- XMIN_NOMINAL, priv->x_max ?: XMAX_NOMINAL, 0, 0);
+ priv->x_min ?: XMIN_NOMINAL,
+ priv->x_max ?: XMAX_NOMINAL,
+ fuzz, 0);
input_set_abs_params(dev, ABS_Y,
- YMIN_NOMINAL, priv->y_max ?: YMAX_NOMINAL, 0, 0);
+ priv->y_min ?: YMIN_NOMINAL,
+ priv->y_max ?: YMAX_NOMINAL,
+ fuzz, 0);
input_set_abs_params(dev, ABS_PRESSURE, 0, 255, 0, 0);
if (SYN_CAP_ADV_GESTURE(priv->ext_cap_0c)) {
__set_bit(INPUT_PROP_SEMI_MT, dev->propbit);
input_mt_init_slots(dev, 2);
- input_set_abs_params(dev, ABS_MT_POSITION_X, XMIN_NOMINAL,
- priv->x_max ?: XMAX_NOMINAL, 0, 0);
- input_set_abs_params(dev, ABS_MT_POSITION_Y, YMIN_NOMINAL,
- priv->y_max ?: YMAX_NOMINAL, 0, 0);
+ input_set_abs_params(dev, ABS_MT_POSITION_X,
+ priv->x_min ?: XMIN_NOMINAL,
+ priv->x_max ?: XMAX_NOMINAL,
+ fuzz, 0);
+ input_set_abs_params(dev, ABS_MT_POSITION_Y,
+ priv->y_min ?: YMIN_NOMINAL,
+ priv->y_max ?: YMAX_NOMINAL,
+ fuzz, 0);
+
+ input_abs_set_res(dev, ABS_MT_POSITION_X, priv->x_res);
+ input_abs_set_res(dev, ABS_MT_POSITION_Y, priv->y_res);
}
if (SYN_CAP_PALMDETECT(priv->capabilities))
@@ -971,4 +997,3 @@
}
#endif /* CONFIG_MOUSE_PS2_SYNAPTICS */
-
diff --git a/drivers/input/mouse/synaptics.h b/drivers/input/mouse/synaptics.h
index 7453938..ca040aa 100644
--- a/drivers/input/mouse/synaptics.h
+++ b/drivers/input/mouse/synaptics.h
@@ -19,7 +19,8 @@
#define SYN_QUE_RESOLUTION 0x08
#define SYN_QUE_EXT_CAPAB 0x09
#define SYN_QUE_EXT_CAPAB_0C 0x0c
-#define SYN_QUE_EXT_DIMENSIONS 0x0d
+#define SYN_QUE_EXT_MAX_COORDS 0x0d
+#define SYN_QUE_EXT_MIN_COORDS 0x0f
/* synatics modes */
#define SYN_BIT_ABSOLUTE_MODE (1 << 7)
@@ -66,18 +67,21 @@
* 1 0x60 multifinger mode identifies firmware finger counting
* (not reporting!) algorithm.
* Not particularly meaningful
- * 1 0x80 covered pad W clipped to 14, 15 == pad mostly covered
- * 2 0x01 clickpad bit 1 2-button ClickPad
- * 2 0x02 deluxe LED controls touchpad support LED commands
+ * 1 0x80 covered pad W clipped to 14, 15 == pad mostly covered
+ * 2 0x01 clickpad bit 1 2-button ClickPad
+ * 2 0x02 deluxe LED controls touchpad support LED commands
* ala multimedia control bar
* 2 0x04 reduced filtering firmware does less filtering on
* position data, driver should watch
* for noise.
+ * 2 0x20 report min query 0x0f gives min coord reported
*/
#define SYN_CAP_CLICKPAD(ex0c) ((ex0c) & 0x100000) /* 1-button ClickPad */
#define SYN_CAP_CLICKPAD2BTN(ex0c) ((ex0c) & 0x000100) /* 2-button ClickPad */
#define SYN_CAP_MAX_DIMENSIONS(ex0c) ((ex0c) & 0x020000)
+#define SYN_CAP_MIN_DIMENSIONS(ex0c) ((ex0c) & 0x002000)
#define SYN_CAP_ADV_GESTURE(ex0c) ((ex0c) & 0x080000)
+#define SYN_CAP_REDUCED_FILTERING(ex0c) ((ex0c) & 0x000400)
/* synaptics modes query bits */
#define SYN_MODE_ABSOLUTE(m) ((m) & (1 << 7))
@@ -104,6 +108,9 @@
#define SYN_NEWABS_RELAXED 2
#define SYN_OLDABS 3
+/* amount to fuzz position data when touchpad reports reduced filtering */
+#define SYN_REDUCED_FILTER_FUZZ 8
+
/*
* A structure to describe the state of the touchpad hardware (buttons and pad)
*/
@@ -130,7 +137,8 @@
unsigned long int ext_cap_0c; /* Ext Caps from 0x0c query */
unsigned long int identity; /* Identification */
unsigned int x_res, y_res; /* X/Y resolution in units/mm */
- unsigned int x_max, y_max; /* Max dimensions (from FW) */
+ unsigned int x_max, y_max; /* Max coordinates (from FW) */
+ unsigned int x_min, y_min; /* Min coordinates (from FW) */
unsigned char pkt_type; /* packet type - old, new, etc */
unsigned char mode; /* current mode byte */
diff --git a/drivers/input/serio/at32psif.c b/drivers/input/serio/at32psif.c
index 6ee8f0d..95280f9 100644
--- a/drivers/input/serio/at32psif.c
+++ b/drivers/input/serio/at32psif.c
@@ -372,6 +372,6 @@
module_init(psif_init);
module_exit(psif_exit);
-MODULE_AUTHOR("Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>");
+MODULE_AUTHOR("Hans-Christian Egtvedt <egtvedt@samfundet.no>");
MODULE_DESCRIPTION("Atmel AVR32 PSIF PS/2 driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c
index 4220620..979c443 100644
--- a/drivers/input/serio/hp_sdc.c
+++ b/drivers/input/serio/hp_sdc.c
@@ -795,7 +795,7 @@
/************************* Keepalive timer task *********************/
-void hp_sdc_kicker (unsigned long data)
+static void hp_sdc_kicker(unsigned long data)
{
tasklet_schedule(&hp_sdc.task);
/* Re-insert the periodic task. */
diff --git a/drivers/input/tablet/aiptek.c b/drivers/input/tablet/aiptek.c
index 0a619c5..6d89fd1 100644
--- a/drivers/input/tablet/aiptek.c
+++ b/drivers/input/tablet/aiptek.c
@@ -225,7 +225,6 @@
/* toolMode codes
*/
#define AIPTEK_TOOL_BUTTON_PEN_MODE BTN_TOOL_PEN
-#define AIPTEK_TOOL_BUTTON_PEN_MODE BTN_TOOL_PEN
#define AIPTEK_TOOL_BUTTON_PENCIL_MODE BTN_TOOL_PENCIL
#define AIPTEK_TOOL_BUTTON_BRUSH_MODE BTN_TOOL_BRUSH
#define AIPTEK_TOOL_BUTTON_AIRBRUSH_MODE BTN_TOOL_AIRBRUSH
diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c
index 08ba5ad..03ebcc8 100644
--- a/drivers/input/tablet/wacom_wac.c
+++ b/drivers/input/tablet/wacom_wac.c
@@ -15,6 +15,7 @@
#include "wacom_wac.h"
#include "wacom.h"
#include <linux/input/mt.h>
+#include <linux/hid.h>
/* resolution for penabled devices */
#define WACOM_PL_RES 20
@@ -264,6 +265,7 @@
wacom->id[0] = 0;
input_report_abs(input, ABS_MISC, wacom->id[0]); /* report tool id */
input_report_key(input, wacom->tool[0], prox);
+ input_event(input, EV_MSC, MSC_SERIAL, 1);
input_sync(input); /* sync last event */
}
@@ -273,11 +275,10 @@
prox = data[7] & 0xf8;
if (prox || wacom->id[1]) {
wacom->id[1] = PAD_DEVICE_ID;
- input_report_key(input, BTN_0, (data[7] & 0x40));
- input_report_key(input, BTN_4, (data[7] & 0x80));
+ input_report_key(input, BTN_BACK, (data[7] & 0x40));
+ input_report_key(input, BTN_FORWARD, (data[7] & 0x80));
rw = ((data[7] & 0x18) >> 3) - ((data[7] & 0x20) >> 3);
input_report_rel(input, REL_WHEEL, rw);
- input_report_key(input, BTN_TOOL_FINGER, 0xf0);
if (!prox)
wacom->id[1] = 0;
input_report_abs(input, ABS_MISC, wacom->id[1]);
@@ -290,18 +291,17 @@
prox = (data[7] & 0xf8) || data[8];
if (prox || wacom->id[1]) {
wacom->id[1] = PAD_DEVICE_ID;
- input_report_key(input, BTN_0, (data[7] & 0x08));
- input_report_key(input, BTN_1, (data[7] & 0x20));
- input_report_key(input, BTN_4, (data[7] & 0x10));
- input_report_key(input, BTN_5, (data[7] & 0x40));
+ input_report_key(input, BTN_BACK, (data[7] & 0x08));
+ input_report_key(input, BTN_LEFT, (data[7] & 0x20));
+ input_report_key(input, BTN_FORWARD, (data[7] & 0x10));
+ input_report_key(input, BTN_RIGHT, (data[7] & 0x40));
input_report_abs(input, ABS_WHEEL, (data[8] & 0x7f));
- input_report_key(input, BTN_TOOL_FINGER, 0xf0);
if (!prox)
wacom->id[1] = 0;
input_report_abs(input, ABS_MISC, wacom->id[1]);
input_event(input, EV_MSC, MSC_SERIAL, 0xf0);
+ retval = 1;
}
- retval = 1;
break;
}
exit:
@@ -494,10 +494,6 @@
/* pad packets. Works as a second tool and is always in prox */
if (data[0] == WACOM_REPORT_INTUOSPAD) {
- /* initiate the pad as a device */
- if (wacom->tool[1] != BTN_TOOL_FINGER)
- wacom->tool[1] = BTN_TOOL_FINGER;
-
if (features->type >= INTUOS4S && features->type <= INTUOS4L) {
input_report_key(input, BTN_0, (data[2] & 0x01));
input_report_key(input, BTN_1, (data[3] & 0x01));
@@ -1080,18 +1076,14 @@
switch (wacom_wac->features.type) {
case WACOM_MO:
- __set_bit(BTN_1, input_dev->keybit);
- __set_bit(BTN_5, input_dev->keybit);
-
input_set_abs_params(input_dev, ABS_WHEEL, 0, 71, 0, 0);
/* fall through */
case WACOM_G4:
input_set_capability(input_dev, EV_MSC, MSC_SERIAL);
- __set_bit(BTN_TOOL_FINGER, input_dev->keybit);
- __set_bit(BTN_0, input_dev->keybit);
- __set_bit(BTN_4, input_dev->keybit);
+ __set_bit(BTN_BACK, input_dev->keybit);
+ __set_bit(BTN_FORWARD, input_dev->keybit);
/* fall through */
case GRAPHIRE:
@@ -1127,10 +1119,12 @@
case CINTIQ:
for (i = 0; i < 8; i++)
__set_bit(BTN_0 + i, input_dev->keybit);
- __set_bit(BTN_TOOL_FINGER, input_dev->keybit);
- input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0);
- input_set_abs_params(input_dev, ABS_RY, 0, 4096, 0, 0);
+ if (wacom_wac->features.type != WACOM_21UX2) {
+ input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0);
+ input_set_abs_params(input_dev, ABS_RY, 0, 4096, 0, 0);
+ }
+
input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0);
wacom_setup_cintiq(wacom_wac);
break;
@@ -1151,8 +1145,6 @@
__set_bit(BTN_2, input_dev->keybit);
__set_bit(BTN_3, input_dev->keybit);
- __set_bit(BTN_TOOL_FINGER, input_dev->keybit);
-
input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0);
input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0);
/* fall through */
@@ -1170,7 +1162,6 @@
case INTUOS4S:
for (i = 0; i < 7; i++)
__set_bit(BTN_0 + i, input_dev->keybit);
- __set_bit(BTN_TOOL_FINGER, input_dev->keybit);
input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0);
wacom_setup_intuos(wacom_wac);
@@ -1295,6 +1286,12 @@
static const struct wacom_features wacom_features_0x69 =
{ "Wacom Bamboo1", WACOM_PKGLEN_GRAPHIRE, 5104, 3712, 511,
63, GRAPHIRE, WACOM_PENPRTN_RES, WACOM_PENPRTN_RES };
+static const struct wacom_features wacom_features_0x6A =
+ { "Wacom Bamboo1 4x6", WACOM_PKGLEN_GRAPHIRE, 14760, 9225, 1023,
+ 63, GRAPHIRE, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+static const struct wacom_features wacom_features_0x6B =
+ { "Wacom Bamboo1 5x8", WACOM_PKGLEN_GRAPHIRE, 21648, 13530, 1023,
+ 63, GRAPHIRE, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
static const struct wacom_features wacom_features_0x20 =
{ "Wacom Intuos 4x5", WACOM_PKGLEN_INTUOS, 12700, 10600, 1023,
31, INTUOS, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
@@ -1427,6 +1424,9 @@
static const struct wacom_features wacom_features_0x93 =
{ "Wacom ISDv4 93", WACOM_PKGLEN_GRAPHIRE, 26202, 16325, 255,
0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+static const struct wacom_features wacom_features_0x97 =
+ { "Wacom ISDv4 97", WACOM_PKGLEN_GRAPHIRE, 26202, 16325, 511,
+ 0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
static const struct wacom_features wacom_features_0x9A =
{ "Wacom ISDv4 9A", WACOM_PKGLEN_GRAPHIRE, 26202, 16325, 255,
0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
@@ -1458,7 +1458,7 @@
{ "Wacom Bamboo 2FG 6x8", WACOM_PKGLEN_BBFUN, 21648, 13530, 1023,
63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
static const struct wacom_features wacom_features_0xD4 =
- { "Wacom Bamboo Pen", WACOM_PKGLEN_BBFUN, 14720, 9200, 255,
+ { "Wacom Bamboo Pen", WACOM_PKGLEN_BBFUN, 14720, 9200, 1023,
63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
static const struct wacom_features wacom_features_0xD6 =
{ "Wacom BambooPT 2FG 4x5", WACOM_PKGLEN_BBFUN, 14720, 9200, 1023,
@@ -1483,6 +1483,11 @@
USB_DEVICE(USB_VENDOR_ID_WACOM, prod), \
.driver_info = (kernel_ulong_t)&wacom_features_##prod
+#define USB_DEVICE_DETAILED(prod, class, sub, proto) \
+ USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_WACOM, prod, class, \
+ sub, proto), \
+ .driver_info = (kernel_ulong_t)&wacom_features_##prod
+
#define USB_DEVICE_LENOVO(prod) \
USB_DEVICE(USB_VENDOR_ID_LENOVO, prod), \
.driver_info = (kernel_ulong_t)&wacom_features_##prod
@@ -1506,6 +1511,8 @@
{ USB_DEVICE_WACOM(0x64) },
{ USB_DEVICE_WACOM(0x65) },
{ USB_DEVICE_WACOM(0x69) },
+ { USB_DEVICE_WACOM(0x6A) },
+ { USB_DEVICE_WACOM(0x6B) },
{ USB_DEVICE_WACOM(0x20) },
{ USB_DEVICE_WACOM(0x21) },
{ USB_DEVICE_WACOM(0x22) },
@@ -1545,7 +1552,13 @@
{ USB_DEVICE_WACOM(0xC5) },
{ USB_DEVICE_WACOM(0xC6) },
{ USB_DEVICE_WACOM(0xC7) },
- { USB_DEVICE_WACOM(0xCE) },
+ /*
+ * DTU-2231 has two interfaces on the same configuration,
+ * only one is used.
+ */
+ { USB_DEVICE_DETAILED(0xCE, USB_CLASS_HID,
+ USB_INTERFACE_SUBCLASS_BOOT,
+ USB_INTERFACE_PROTOCOL_MOUSE) },
{ USB_DEVICE_WACOM(0xD0) },
{ USB_DEVICE_WACOM(0xD1) },
{ USB_DEVICE_WACOM(0xD2) },
@@ -1560,6 +1573,7 @@
{ USB_DEVICE_WACOM(0xCC) },
{ USB_DEVICE_WACOM(0x90) },
{ USB_DEVICE_WACOM(0x93) },
+ { USB_DEVICE_WACOM(0x97) },
{ USB_DEVICE_WACOM(0x9A) },
{ USB_DEVICE_WACOM(0x9F) },
{ USB_DEVICE_WACOM(0xE2) },
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index 5196861..d507b9b 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -967,17 +967,12 @@
ts->get_pendown_state = pdata->get_pendown_state;
} else if (gpio_is_valid(pdata->gpio_pendown)) {
- err = gpio_request(pdata->gpio_pendown, "ads7846_pendown");
+ err = gpio_request_one(pdata->gpio_pendown, GPIOF_IN,
+ "ads7846_pendown");
if (err) {
- dev_err(&spi->dev, "failed to request pendown GPIO%d\n",
- pdata->gpio_pendown);
- return err;
- }
- err = gpio_direction_input(pdata->gpio_pendown);
- if (err) {
- dev_err(&spi->dev, "failed to setup pendown GPIO%d\n",
- pdata->gpio_pendown);
- gpio_free(pdata->gpio_pendown);
+ dev_err(&spi->dev,
+ "failed to request/setup pendown GPIO%d: %d\n",
+ pdata->gpio_pendown, err);
return err;
}
diff --git a/drivers/input/touchscreen/atmel-wm97xx.c b/drivers/input/touchscreen/atmel-wm97xx.c
index fa8e56b..8034cbb 100644
--- a/drivers/input/touchscreen/atmel-wm97xx.c
+++ b/drivers/input/touchscreen/atmel-wm97xx.c
@@ -164,7 +164,7 @@
data = ac97c_readl(atmel_wm97xx, CBRHR);
value = data & 0x0fff;
- source = data & WM97XX_ADCSRC_MASK;
+ source = data & WM97XX_ADCSEL_MASK;
pen_down = (data & WM97XX_PEN_DOWN) >> 8;
if (source == WM97XX_ADCSEL_X)
@@ -442,6 +442,6 @@
}
module_exit(atmel_wm97xx_exit);
-MODULE_AUTHOR("Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>");
+MODULE_AUTHOR("Hans-Christian Egtvedt <egtvedt@samfundet.no>");
MODULE_DESCRIPTION("wm97xx continuous touch driver for Atmel AT91 and AVR32");
MODULE_LICENSE("GPL");
diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index 1e61387..ae00604 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -48,41 +48,47 @@
#define MXT_OBJECT_SIZE 6
/* Object types */
-#define MXT_DEBUG_DIAGNOSTIC 37
-#define MXT_GEN_MESSAGE 5
-#define MXT_GEN_COMMAND 6
-#define MXT_GEN_POWER 7
-#define MXT_GEN_ACQUIRE 8
-#define MXT_TOUCH_MULTI 9
-#define MXT_TOUCH_KEYARRAY 15
-#define MXT_TOUCH_PROXIMITY 23
-#define MXT_PROCI_GRIPFACE 20
-#define MXT_PROCG_NOISE 22
-#define MXT_PROCI_ONETOUCH 24
-#define MXT_PROCI_TWOTOUCH 27
-#define MXT_PROCI_GRIP 40
-#define MXT_PROCI_PALM 41
-#define MXT_SPT_COMMSCONFIG 18
-#define MXT_SPT_GPIOPWM 19
-#define MXT_SPT_SELFTEST 25
-#define MXT_SPT_CTECONFIG 28
-#define MXT_SPT_USERDATA 38
-#define MXT_SPT_DIGITIZER 43
-#define MXT_SPT_MESSAGECOUNT 44
+#define MXT_DEBUG_DIAGNOSTIC_T37 37
+#define MXT_GEN_MESSAGE_T5 5
+#define MXT_GEN_COMMAND_T6 6
+#define MXT_GEN_POWER_T7 7
+#define MXT_GEN_ACQUIRE_T8 8
+#define MXT_GEN_DATASOURCE_T53 53
+#define MXT_TOUCH_MULTI_T9 9
+#define MXT_TOUCH_KEYARRAY_T15 15
+#define MXT_TOUCH_PROXIMITY_T23 23
+#define MXT_TOUCH_PROXKEY_T52 52
+#define MXT_PROCI_GRIPFACE_T20 20
+#define MXT_PROCG_NOISE_T22 22
+#define MXT_PROCI_ONETOUCH_T24 24
+#define MXT_PROCI_TWOTOUCH_T27 27
+#define MXT_PROCI_GRIP_T40 40
+#define MXT_PROCI_PALM_T41 41
+#define MXT_PROCI_TOUCHSUPPRESSION_T42 42
+#define MXT_PROCI_STYLUS_T47 47
+#define MXT_PROCG_NOISESUPPRESSION_T48 48
+#define MXT_SPT_COMMSCONFIG_T18 18
+#define MXT_SPT_GPIOPWM_T19 19
+#define MXT_SPT_SELFTEST_T25 25
+#define MXT_SPT_CTECONFIG_T28 28
+#define MXT_SPT_USERDATA_T38 38
+#define MXT_SPT_DIGITIZER_T43 43
+#define MXT_SPT_MESSAGECOUNT_T44 44
+#define MXT_SPT_CTECONFIG_T46 46
-/* MXT_GEN_COMMAND field */
+/* MXT_GEN_COMMAND_T6 field */
#define MXT_COMMAND_RESET 0
#define MXT_COMMAND_BACKUPNV 1
#define MXT_COMMAND_CALIBRATE 2
#define MXT_COMMAND_REPORTALL 3
#define MXT_COMMAND_DIAGNOSTIC 5
-/* MXT_GEN_POWER field */
+/* MXT_GEN_POWER_T7 field */
#define MXT_POWER_IDLEACQINT 0
#define MXT_POWER_ACTVACQINT 1
#define MXT_POWER_ACTV2IDLETO 2
-/* MXT_GEN_ACQUIRE field */
+/* MXT_GEN_ACQUIRE_T8 field */
#define MXT_ACQUIRE_CHRGTIME 0
#define MXT_ACQUIRE_TCHDRIFT 2
#define MXT_ACQUIRE_DRIFTST 3
@@ -91,7 +97,7 @@
#define MXT_ACQUIRE_ATCHCALST 6
#define MXT_ACQUIRE_ATCHCALSTHR 7
-/* MXT_TOUCH_MULTI field */
+/* MXT_TOUCH_MULTI_T9 field */
#define MXT_TOUCH_CTRL 0
#define MXT_TOUCH_XORIGIN 1
#define MXT_TOUCH_YORIGIN 2
@@ -121,7 +127,7 @@
#define MXT_TOUCH_YEDGEDIST 29
#define MXT_TOUCH_JUMPLIMIT 30
-/* MXT_PROCI_GRIPFACE field */
+/* MXT_PROCI_GRIPFACE_T20 field */
#define MXT_GRIPFACE_CTRL 0
#define MXT_GRIPFACE_XLOGRIP 1
#define MXT_GRIPFACE_XHIGRIP 2
@@ -151,11 +157,11 @@
#define MXT_NOISE_FREQ4 15
#define MXT_NOISE_IDLEGCAFVALID 16
-/* MXT_SPT_COMMSCONFIG */
+/* MXT_SPT_COMMSCONFIG_T18 */
#define MXT_COMMS_CTRL 0
#define MXT_COMMS_CMD 1
-/* MXT_SPT_CTECONFIG field */
+/* MXT_SPT_CTECONFIG_T28 field */
#define MXT_CTE_CTRL 0
#define MXT_CTE_CMD 1
#define MXT_CTE_MODE 2
@@ -166,7 +172,7 @@
#define MXT_VOLTAGE_DEFAULT 2700000
#define MXT_VOLTAGE_STEP 10000
-/* Define for MXT_GEN_COMMAND */
+/* Define for MXT_GEN_COMMAND_T6 */
#define MXT_BOOT_VALUE 0xa5
#define MXT_BACKUP_VALUE 0x55
#define MXT_BACKUP_TIME 25 /* msec */
@@ -256,24 +262,31 @@
static bool mxt_object_readable(unsigned int type)
{
switch (type) {
- case MXT_GEN_MESSAGE:
- case MXT_GEN_COMMAND:
- case MXT_GEN_POWER:
- case MXT_GEN_ACQUIRE:
- case MXT_TOUCH_MULTI:
- case MXT_TOUCH_KEYARRAY:
- case MXT_TOUCH_PROXIMITY:
- case MXT_PROCI_GRIPFACE:
- case MXT_PROCG_NOISE:
- case MXT_PROCI_ONETOUCH:
- case MXT_PROCI_TWOTOUCH:
- case MXT_PROCI_GRIP:
- case MXT_PROCI_PALM:
- case MXT_SPT_COMMSCONFIG:
- case MXT_SPT_GPIOPWM:
- case MXT_SPT_SELFTEST:
- case MXT_SPT_CTECONFIG:
- case MXT_SPT_USERDATA:
+ case MXT_GEN_MESSAGE_T5:
+ case MXT_GEN_COMMAND_T6:
+ case MXT_GEN_POWER_T7:
+ case MXT_GEN_ACQUIRE_T8:
+ case MXT_GEN_DATASOURCE_T53:
+ case MXT_TOUCH_MULTI_T9:
+ case MXT_TOUCH_KEYARRAY_T15:
+ case MXT_TOUCH_PROXIMITY_T23:
+ case MXT_TOUCH_PROXKEY_T52:
+ case MXT_PROCI_GRIPFACE_T20:
+ case MXT_PROCG_NOISE_T22:
+ case MXT_PROCI_ONETOUCH_T24:
+ case MXT_PROCI_TWOTOUCH_T27:
+ case MXT_PROCI_GRIP_T40:
+ case MXT_PROCI_PALM_T41:
+ case MXT_PROCI_TOUCHSUPPRESSION_T42:
+ case MXT_PROCI_STYLUS_T47:
+ case MXT_PROCG_NOISESUPPRESSION_T48:
+ case MXT_SPT_COMMSCONFIG_T18:
+ case MXT_SPT_GPIOPWM_T19:
+ case MXT_SPT_SELFTEST_T25:
+ case MXT_SPT_CTECONFIG_T28:
+ case MXT_SPT_USERDATA_T38:
+ case MXT_SPT_DIGITIZER_T43:
+ case MXT_SPT_CTECONFIG_T46:
return true;
default:
return false;
@@ -283,21 +296,28 @@
static bool mxt_object_writable(unsigned int type)
{
switch (type) {
- case MXT_GEN_COMMAND:
- case MXT_GEN_POWER:
- case MXT_GEN_ACQUIRE:
- case MXT_TOUCH_MULTI:
- case MXT_TOUCH_KEYARRAY:
- case MXT_TOUCH_PROXIMITY:
- case MXT_PROCI_GRIPFACE:
- case MXT_PROCG_NOISE:
- case MXT_PROCI_ONETOUCH:
- case MXT_PROCI_TWOTOUCH:
- case MXT_PROCI_GRIP:
- case MXT_PROCI_PALM:
- case MXT_SPT_GPIOPWM:
- case MXT_SPT_SELFTEST:
- case MXT_SPT_CTECONFIG:
+ case MXT_GEN_COMMAND_T6:
+ case MXT_GEN_POWER_T7:
+ case MXT_GEN_ACQUIRE_T8:
+ case MXT_TOUCH_MULTI_T9:
+ case MXT_TOUCH_KEYARRAY_T15:
+ case MXT_TOUCH_PROXIMITY_T23:
+ case MXT_TOUCH_PROXKEY_T52:
+ case MXT_PROCI_GRIPFACE_T20:
+ case MXT_PROCG_NOISE_T22:
+ case MXT_PROCI_ONETOUCH_T24:
+ case MXT_PROCI_TWOTOUCH_T27:
+ case MXT_PROCI_GRIP_T40:
+ case MXT_PROCI_PALM_T41:
+ case MXT_PROCI_TOUCHSUPPRESSION_T42:
+ case MXT_PROCI_STYLUS_T47:
+ case MXT_PROCG_NOISESUPPRESSION_T48:
+ case MXT_SPT_COMMSCONFIG_T18:
+ case MXT_SPT_GPIOPWM_T19:
+ case MXT_SPT_SELFTEST_T25:
+ case MXT_SPT_CTECONFIG_T28:
+ case MXT_SPT_DIGITIZER_T43:
+ case MXT_SPT_CTECONFIG_T46:
return true;
default:
return false;
@@ -455,7 +475,7 @@
struct mxt_object *object;
u16 reg;
- object = mxt_get_object(data, MXT_GEN_MESSAGE);
+ object = mxt_get_object(data, MXT_GEN_MESSAGE_T5);
if (!object)
return -EINVAL;
@@ -597,8 +617,8 @@
reportid = message.reportid;
- /* whether reportid is thing of MXT_TOUCH_MULTI */
- object = mxt_get_object(data, MXT_TOUCH_MULTI);
+ /* whether reportid is thing of MXT_TOUCH_MULTI_T9 */
+ object = mxt_get_object(data, MXT_TOUCH_MULTI_T9);
if (!object)
goto end;
@@ -635,7 +655,9 @@
if (!mxt_object_writable(object->type))
continue;
- for (j = 0; j < object->size + 1; j++) {
+ for (j = 0;
+ j < (object->size + 1) * (object->instances + 1);
+ j++) {
config_offset = index + j;
if (config_offset > pdata->config_length) {
dev_err(dev, "Not enough config data!\n");
@@ -644,7 +666,7 @@
mxt_write_object(data, object->type, j,
pdata->config[config_offset]);
}
- index += object->size + 1;
+ index += (object->size + 1) * (object->instances + 1);
}
return 0;
@@ -678,31 +700,31 @@
u8 voltage;
/* Set touchscreen lines */
- mxt_write_object(data, MXT_TOUCH_MULTI, MXT_TOUCH_XSIZE,
+ mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_XSIZE,
pdata->x_line);
- mxt_write_object(data, MXT_TOUCH_MULTI, MXT_TOUCH_YSIZE,
+ mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_YSIZE,
pdata->y_line);
/* Set touchscreen orient */
- mxt_write_object(data, MXT_TOUCH_MULTI, MXT_TOUCH_ORIENT,
+ mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_ORIENT,
pdata->orient);
/* Set touchscreen burst length */
- mxt_write_object(data, MXT_TOUCH_MULTI,
+ mxt_write_object(data, MXT_TOUCH_MULTI_T9,
MXT_TOUCH_BLEN, pdata->blen);
/* Set touchscreen threshold */
- mxt_write_object(data, MXT_TOUCH_MULTI,
+ mxt_write_object(data, MXT_TOUCH_MULTI_T9,
MXT_TOUCH_TCHTHR, pdata->threshold);
/* Set touchscreen resolution */
- mxt_write_object(data, MXT_TOUCH_MULTI,
+ mxt_write_object(data, MXT_TOUCH_MULTI_T9,
MXT_TOUCH_XRANGE_LSB, (pdata->x_size - 1) & 0xff);
- mxt_write_object(data, MXT_TOUCH_MULTI,
+ mxt_write_object(data, MXT_TOUCH_MULTI_T9,
MXT_TOUCH_XRANGE_MSB, (pdata->x_size - 1) >> 8);
- mxt_write_object(data, MXT_TOUCH_MULTI,
+ mxt_write_object(data, MXT_TOUCH_MULTI_T9,
MXT_TOUCH_YRANGE_LSB, (pdata->y_size - 1) & 0xff);
- mxt_write_object(data, MXT_TOUCH_MULTI,
+ mxt_write_object(data, MXT_TOUCH_MULTI_T9,
MXT_TOUCH_YRANGE_MSB, (pdata->y_size - 1) >> 8);
/* Set touchscreen voltage */
@@ -715,7 +737,7 @@
voltage = (pdata->voltage - MXT_VOLTAGE_DEFAULT) /
MXT_VOLTAGE_STEP;
- mxt_write_object(data, MXT_SPT_CTECONFIG,
+ mxt_write_object(data, MXT_SPT_CTECONFIG_T28,
MXT_CTE_VOLTAGE, voltage);
}
}
@@ -819,13 +841,13 @@
mxt_handle_pdata(data);
/* Backup to memory */
- mxt_write_object(data, MXT_GEN_COMMAND,
+ mxt_write_object(data, MXT_GEN_COMMAND_T6,
MXT_COMMAND_BACKUPNV,
MXT_BACKUP_VALUE);
msleep(MXT_BACKUP_TIME);
/* Soft reset */
- mxt_write_object(data, MXT_GEN_COMMAND,
+ mxt_write_object(data, MXT_GEN_COMMAND_T6,
MXT_COMMAND_RESET, 1);
msleep(MXT_RESET_TIME);
@@ -921,7 +943,7 @@
}
/* Change to the bootloader mode */
- mxt_write_object(data, MXT_GEN_COMMAND,
+ mxt_write_object(data, MXT_GEN_COMMAND_T6,
MXT_COMMAND_RESET, MXT_BOOT_VALUE);
msleep(MXT_RESET_TIME);
@@ -1027,14 +1049,14 @@
{
/* Touch enable */
mxt_write_object(data,
- MXT_TOUCH_MULTI, MXT_TOUCH_CTRL, 0x83);
+ MXT_TOUCH_MULTI_T9, MXT_TOUCH_CTRL, 0x83);
}
static void mxt_stop(struct mxt_data *data)
{
/* Touch disable */
mxt_write_object(data,
- MXT_TOUCH_MULTI, MXT_TOUCH_CTRL, 0);
+ MXT_TOUCH_MULTI_T9, MXT_TOUCH_CTRL, 0);
}
static int mxt_input_open(struct input_dev *dev)
@@ -1182,7 +1204,7 @@
struct input_dev *input_dev = data->input_dev;
/* Soft reset */
- mxt_write_object(data, MXT_GEN_COMMAND,
+ mxt_write_object(data, MXT_GEN_COMMAND_T6,
MXT_COMMAND_RESET, 1);
msleep(MXT_RESET_TIME);
diff --git a/drivers/input/touchscreen/cy8ctmg110_ts.c b/drivers/input/touchscreen/cy8ctmg110_ts.c
index a93c5c2..d8815c5 100644
--- a/drivers/input/touchscreen/cy8ctmg110_ts.c
+++ b/drivers/input/touchscreen/cy8ctmg110_ts.c
@@ -84,9 +84,9 @@
memcpy(i2c_data + 1, value, len);
ret = i2c_master_send(client, i2c_data, len + 1);
- if (ret != 1) {
+ if (ret != len + 1) {
dev_err(&client->dev, "i2c write data cmd failed\n");
- return ret ? ret : -EIO;
+ return ret < 0 ? ret : -EIO;
}
return 0;
@@ -193,6 +193,8 @@
ts->client = client;
ts->input = input_dev;
+ ts->reset_pin = pdata->reset_pin;
+ ts->irq_pin = pdata->irq_pin;
snprintf(ts->phys, sizeof(ts->phys),
"%s/input0", dev_name(&client->dev));
@@ -328,7 +330,7 @@
return 0;
}
-static struct i2c_device_id cy8ctmg110_idtable[] = {
+static const struct i2c_device_id cy8ctmg110_idtable[] = {
{ CY8CTMG110_DRIVER_NAME, 1 },
{ }
};
diff --git a/drivers/input/touchscreen/intel-mid-touch.c b/drivers/input/touchscreen/intel-mid-touch.c
index 66c96bf..3276952 100644
--- a/drivers/input/touchscreen/intel-mid-touch.c
+++ b/drivers/input/touchscreen/intel-mid-touch.c
@@ -448,15 +448,11 @@
*/
static int __devinit mrstouch_chan_parse(struct mrstouch_dev *tsdev)
{
- int err, i, found;
+ int found = 0;
+ int err, i;
u8 r8;
- found = -1;
-
for (i = 0; i < MRSTOUCH_MAX_CHANNELS; i++) {
- if (found >= 0)
- break;
-
err = intel_scu_ipc_ioread8(PMICADDR0 + i, &r8);
if (err)
return err;
@@ -466,16 +462,15 @@
break;
}
}
- if (found < 0)
- return 0;
if (tsdev->vendor == PMIC_VENDOR_FS) {
- if (found && found > (MRSTOUCH_MAX_CHANNELS - 18))
+ if (found > MRSTOUCH_MAX_CHANNELS - 18)
return -ENOSPC;
} else {
- if (found && found > (MRSTOUCH_MAX_CHANNELS - 4))
+ if (found > MRSTOUCH_MAX_CHANNELS - 4)
return -ENOSPC;
}
+
return found;
}
diff --git a/drivers/input/touchscreen/mainstone-wm97xx.c b/drivers/input/touchscreen/mainstone-wm97xx.c
index 3242e70..e966c29 100644
--- a/drivers/input/touchscreen/mainstone-wm97xx.c
+++ b/drivers/input/touchscreen/mainstone-wm97xx.c
@@ -157,9 +157,9 @@
x, y, p);
/* are samples valid */
- if ((x & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_X ||
- (y & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_Y ||
- (p & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_PRES)
+ if ((x & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_X ||
+ (y & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_Y ||
+ (p & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_PRES)
goto up;
/* coordinate is good */
diff --git a/drivers/input/touchscreen/tnetv107x-ts.c b/drivers/input/touchscreen/tnetv107x-ts.c
index 22a3411..089b0a0 100644
--- a/drivers/input/touchscreen/tnetv107x-ts.c
+++ b/drivers/input/touchscreen/tnetv107x-ts.c
@@ -393,5 +393,5 @@
MODULE_AUTHOR("Cyril Chemparathy");
MODULE_DESCRIPTION("TNETV107X Touchscreen Driver");
-MODULE_ALIAS("platform: tnetv107x-ts");
+MODULE_ALIAS("platform:tnetv107x-ts");
MODULE_LICENSE("GPL");
diff --git a/drivers/input/touchscreen/wm9705.c b/drivers/input/touchscreen/wm9705.c
index 98e6117..adc13a5 100644
--- a/drivers/input/touchscreen/wm9705.c
+++ b/drivers/input/touchscreen/wm9705.c
@@ -215,8 +215,9 @@
static int wm9705_poll_sample(struct wm97xx *wm, int adcsel, int *sample)
{
int timeout = 5 * delay;
+ bool wants_pen = adcsel & WM97XX_PEN_DOWN;
- if (!wm->pen_probably_down) {
+ if (wants_pen && !wm->pen_probably_down) {
u16 data = wm97xx_reg_read(wm, AC97_WM97XX_DIGITISER_RD);
if (!(data & WM97XX_PEN_DOWN))
return RC_PENUP;
@@ -224,13 +225,10 @@
}
/* set up digitiser */
- if (adcsel & 0x8000)
- adcsel = ((adcsel & 0x7fff) + 3) << 12;
-
if (wm->mach_ops && wm->mach_ops->pre_sample)
wm->mach_ops->pre_sample(adcsel);
- wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1,
- adcsel | WM97XX_POLL | WM97XX_DELAY(delay));
+ wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1, (adcsel & WM97XX_ADCSEL_MASK)
+ | WM97XX_POLL | WM97XX_DELAY(delay));
/* wait 3 AC97 time slots + delay for conversion */
poll_delay(delay);
@@ -256,13 +254,14 @@
wm->mach_ops->post_sample(adcsel);
/* check we have correct sample */
- if ((*sample & WM97XX_ADCSEL_MASK) != adcsel) {
- dev_dbg(wm->dev, "adc wrong sample, read %x got %x", adcsel,
- *sample & WM97XX_ADCSEL_MASK);
+ if ((*sample ^ adcsel) & WM97XX_ADCSEL_MASK) {
+ dev_dbg(wm->dev, "adc wrong sample, wanted %x got %x",
+ adcsel & WM97XX_ADCSEL_MASK,
+ *sample & WM97XX_ADCSEL_MASK);
return RC_PENUP;
}
- if (!(*sample & WM97XX_PEN_DOWN)) {
+ if (wants_pen && !(*sample & WM97XX_PEN_DOWN)) {
wm->pen_probably_down = 0;
return RC_PENUP;
}
@@ -277,14 +276,14 @@
{
int rc;
- rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_X, &data->x);
+ rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_X | WM97XX_PEN_DOWN, &data->x);
if (rc != RC_VALID)
return rc;
- rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_Y, &data->y);
+ rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_Y | WM97XX_PEN_DOWN, &data->y);
if (rc != RC_VALID)
return rc;
if (pil) {
- rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_PRES, &data->p);
+ rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_PRES | WM97XX_PEN_DOWN, &data->p);
if (rc != RC_VALID)
return rc;
} else
diff --git a/drivers/input/touchscreen/wm9712.c b/drivers/input/touchscreen/wm9712.c
index 2bc2fb8..6e743e3 100644
--- a/drivers/input/touchscreen/wm9712.c
+++ b/drivers/input/touchscreen/wm9712.c
@@ -255,8 +255,9 @@
static int wm9712_poll_sample(struct wm97xx *wm, int adcsel, int *sample)
{
int timeout = 5 * delay;
+ bool wants_pen = adcsel & WM97XX_PEN_DOWN;
- if (!wm->pen_probably_down) {
+ if (wants_pen && !wm->pen_probably_down) {
u16 data = wm97xx_reg_read(wm, AC97_WM97XX_DIGITISER_RD);
if (!(data & WM97XX_PEN_DOWN))
return RC_PENUP;
@@ -264,13 +265,10 @@
}
/* set up digitiser */
- if (adcsel & 0x8000)
- adcsel = ((adcsel & 0x7fff) + 3) << 12;
-
if (wm->mach_ops && wm->mach_ops->pre_sample)
wm->mach_ops->pre_sample(adcsel);
- wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1,
- adcsel | WM97XX_POLL | WM97XX_DELAY(delay));
+ wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1, (adcsel & WM97XX_ADCSEL_MASK)
+ | WM97XX_POLL | WM97XX_DELAY(delay));
/* wait 3 AC97 time slots + delay for conversion */
poll_delay(delay);
@@ -296,13 +294,14 @@
wm->mach_ops->post_sample(adcsel);
/* check we have correct sample */
- if ((*sample & WM97XX_ADCSEL_MASK) != adcsel) {
- dev_dbg(wm->dev, "adc wrong sample, read %x got %x", adcsel,
- *sample & WM97XX_ADCSEL_MASK);
+ if ((*sample ^ adcsel) & WM97XX_ADCSEL_MASK) {
+ dev_dbg(wm->dev, "adc wrong sample, wanted %x got %x",
+ adcsel & WM97XX_ADCSEL_MASK,
+ *sample & WM97XX_ADCSEL_MASK);
return RC_PENUP;
}
- if (!(*sample & WM97XX_PEN_DOWN)) {
+ if (wants_pen && !(*sample & WM97XX_PEN_DOWN)) {
wm->pen_probably_down = 0;
return RC_PENUP;
}
@@ -387,16 +386,18 @@
if (rc != RC_VALID)
return rc;
} else {
- rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_X, &data->x);
+ rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_X | WM97XX_PEN_DOWN,
+ &data->x);
if (rc != RC_VALID)
return rc;
- rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_Y, &data->y);
+ rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_Y | WM97XX_PEN_DOWN,
+ &data->y);
if (rc != RC_VALID)
return rc;
if (pil && !five_wire) {
- rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_PRES,
+ rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_PRES | WM97XX_PEN_DOWN,
&data->p);
if (rc != RC_VALID)
return rc;
diff --git a/drivers/input/touchscreen/wm9713.c b/drivers/input/touchscreen/wm9713.c
index 73ec995..7405353 100644
--- a/drivers/input/touchscreen/wm9713.c
+++ b/drivers/input/touchscreen/wm9713.c
@@ -261,8 +261,9 @@
{
u16 dig1;
int timeout = 5 * delay;
+ bool wants_pen = adcsel & WM97XX_PEN_DOWN;
- if (!wm->pen_probably_down) {
+ if (wants_pen && !wm->pen_probably_down) {
u16 data = wm97xx_reg_read(wm, AC97_WM97XX_DIGITISER_RD);
if (!(data & WM97XX_PEN_DOWN))
return RC_PENUP;
@@ -270,15 +271,14 @@
}
/* set up digitiser */
- if (adcsel & 0x8000)
- adcsel = 1 << ((adcsel & 0x7fff) + 3);
-
dig1 = wm97xx_reg_read(wm, AC97_WM9713_DIG1);
dig1 &= ~WM9713_ADCSEL_MASK;
+ /* WM97XX_ADCSEL_* channels need to be converted to WM9713 format */
+ dig1 |= 1 << ((adcsel & WM97XX_ADCSEL_MASK) >> 12);
if (wm->mach_ops && wm->mach_ops->pre_sample)
wm->mach_ops->pre_sample(adcsel);
- wm97xx_reg_write(wm, AC97_WM9713_DIG1, dig1 | adcsel | WM9713_POLL);
+ wm97xx_reg_write(wm, AC97_WM9713_DIG1, dig1 | WM9713_POLL);
/* wait 3 AC97 time slots + delay for conversion */
poll_delay(delay);
@@ -304,13 +304,14 @@
wm->mach_ops->post_sample(adcsel);
/* check we have correct sample */
- if ((*sample & WM97XX_ADCSRC_MASK) != ffs(adcsel >> 1) << 12) {
- dev_dbg(wm->dev, "adc wrong sample, read %x got %x", adcsel,
- *sample & WM97XX_ADCSRC_MASK);
+ if ((*sample ^ adcsel) & WM97XX_ADCSEL_MASK) {
+ dev_dbg(wm->dev, "adc wrong sample, wanted %x got %x",
+ adcsel & WM97XX_ADCSEL_MASK,
+ *sample & WM97XX_ADCSEL_MASK);
return RC_PENUP;
}
- if (!(*sample & WM97XX_PEN_DOWN)) {
+ if (wants_pen && !(*sample & WM97XX_PEN_DOWN)) {
wm->pen_probably_down = 0;
return RC_PENUP;
}
@@ -400,14 +401,14 @@
if (rc != RC_VALID)
return rc;
} else {
- rc = wm9713_poll_sample(wm, WM9713_ADCSEL_X, &data->x);
+ rc = wm9713_poll_sample(wm, WM97XX_ADCSEL_X | WM97XX_PEN_DOWN, &data->x);
if (rc != RC_VALID)
return rc;
- rc = wm9713_poll_sample(wm, WM9713_ADCSEL_Y, &data->y);
+ rc = wm9713_poll_sample(wm, WM97XX_ADCSEL_Y | WM97XX_PEN_DOWN, &data->y);
if (rc != RC_VALID)
return rc;
if (pil) {
- rc = wm9713_poll_sample(wm, WM9713_ADCSEL_PRES,
+ rc = wm9713_poll_sample(wm, WM97XX_ADCSEL_PRES | WM97XX_PEN_DOWN,
&data->p);
if (rc != RC_VALID)
return rc;
diff --git a/drivers/input/touchscreen/zylonite-wm97xx.c b/drivers/input/touchscreen/zylonite-wm97xx.c
index 5b0f15e..f6328c0 100644
--- a/drivers/input/touchscreen/zylonite-wm97xx.c
+++ b/drivers/input/touchscreen/zylonite-wm97xx.c
@@ -122,9 +122,9 @@
x, y, p);
/* are samples valid */
- if ((x & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_X ||
- (y & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_Y ||
- (p & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_PRES)
+ if ((x & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_X ||
+ (y & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_Y ||
+ (p & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_PRES)
goto up;
/* coordinate is good */
diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index 48e9cc0..1f73d7f 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -2532,6 +2532,9 @@
/* Setup the generic properties */
dev->flags = IFF_NOARP|IFF_POINTOPOINT;
+
+ /* isdn prepends a header in the tx path, can't share skbs */
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->header_ops = NULL;
dev->netdev_ops = &isdn_netdev_ops;
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 574b09a..0dc6546 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -29,7 +29,6 @@
#include "md.h"
#include "bitmap.h"
-#include <linux/dm-dirty-log.h>
/* debug macros */
#define DEBUG 0
@@ -775,10 +774,8 @@
* 0 or page 1
*/
static inline struct page *filemap_get_page(struct bitmap *bitmap,
- unsigned long chunk)
+ unsigned long chunk)
{
- if (bitmap->filemap == NULL)
- return NULL;
if (file_page_index(bitmap, chunk) >= bitmap->file_pages)
return NULL;
return bitmap->filemap[file_page_index(bitmap, chunk)
@@ -878,28 +875,19 @@
static inline void set_page_attr(struct bitmap *bitmap, struct page *page,
enum bitmap_page_attr attr)
{
- if (page)
- __set_bit((page->index<<2) + attr, bitmap->filemap_attr);
- else
- __set_bit(attr, &bitmap->logattrs);
+ __set_bit((page->index<<2) + attr, bitmap->filemap_attr);
}
static inline void clear_page_attr(struct bitmap *bitmap, struct page *page,
enum bitmap_page_attr attr)
{
- if (page)
- __clear_bit((page->index<<2) + attr, bitmap->filemap_attr);
- else
- __clear_bit(attr, &bitmap->logattrs);
+ __clear_bit((page->index<<2) + attr, bitmap->filemap_attr);
}
static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page,
enum bitmap_page_attr attr)
{
- if (page)
- return test_bit((page->index<<2) + attr, bitmap->filemap_attr);
- else
- return test_bit(attr, &bitmap->logattrs);
+ return test_bit((page->index<<2) + attr, bitmap->filemap_attr);
}
/*
@@ -912,30 +900,26 @@
static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
{
unsigned long bit;
- struct page *page = NULL;
+ struct page *page;
void *kaddr;
unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap);
- if (!bitmap->filemap) {
- struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log;
- if (log)
- log->type->mark_region(log, chunk);
- } else {
+ if (!bitmap->filemap)
+ return;
- page = filemap_get_page(bitmap, chunk);
- if (!page)
- return;
- bit = file_page_offset(bitmap, chunk);
+ page = filemap_get_page(bitmap, chunk);
+ if (!page)
+ return;
+ bit = file_page_offset(bitmap, chunk);
- /* set the bit */
- kaddr = kmap_atomic(page, KM_USER0);
- if (bitmap->flags & BITMAP_HOSTENDIAN)
- set_bit(bit, kaddr);
- else
- __test_and_set_bit_le(bit, kaddr);
- kunmap_atomic(kaddr, KM_USER0);
- PRINTK("set file bit %lu page %lu\n", bit, page->index);
- }
+ /* set the bit */
+ kaddr = kmap_atomic(page, KM_USER0);
+ if (bitmap->flags & BITMAP_HOSTENDIAN)
+ set_bit(bit, kaddr);
+ else
+ __set_bit_le(bit, kaddr);
+ kunmap_atomic(kaddr, KM_USER0);
+ PRINTK("set file bit %lu page %lu\n", bit, page->index);
/* record page number so it gets flushed to disk when unplug occurs */
set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
}
@@ -952,16 +936,6 @@
if (!bitmap)
return;
- if (!bitmap->filemap) {
- /* Must be using a dirty_log */
- struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log;
- dirty = test_and_clear_bit(BITMAP_PAGE_DIRTY, &bitmap->logattrs);
- need_write = test_and_clear_bit(BITMAP_PAGE_NEEDWRITE, &bitmap->logattrs);
- if (dirty || need_write)
- if (log->type->flush(log))
- bitmap->flags |= BITMAP_WRITE_ERROR;
- goto out;
- }
/* look at each page to see if there are any set bits that need to be
* flushed out to disk */
@@ -990,7 +964,6 @@
else
md_super_wait(bitmap->mddev);
}
-out:
if (bitmap->flags & BITMAP_WRITE_ERROR)
bitmap_file_kick(bitmap);
}
@@ -1199,7 +1172,6 @@
struct page *page = NULL, *lastpage = NULL;
sector_t blocks;
void *paddr;
- struct dm_dirty_log *log = mddev->bitmap_info.log;
/* Use a mutex to guard daemon_work against
* bitmap_destroy.
@@ -1224,12 +1196,11 @@
spin_lock_irqsave(&bitmap->lock, flags);
for (j = 0; j < bitmap->chunks; j++) {
bitmap_counter_t *bmc;
- if (!bitmap->filemap) {
- if (!log)
- /* error or shutdown */
- break;
- } else
- page = filemap_get_page(bitmap, j);
+ if (!bitmap->filemap)
+ /* error or shutdown */
+ break;
+
+ page = filemap_get_page(bitmap, j);
if (page != lastpage) {
/* skip this page unless it's marked as needing cleaning */
@@ -1298,17 +1269,16 @@
-1);
/* clear the bit */
- if (page) {
- paddr = kmap_atomic(page, KM_USER0);
- if (bitmap->flags & BITMAP_HOSTENDIAN)
- clear_bit(file_page_offset(bitmap, j),
- paddr);
- else
- __test_and_clear_bit_le(file_page_offset(bitmap, j),
- paddr);
- kunmap_atomic(paddr, KM_USER0);
- } else
- log->type->clear_region(log, j);
+ paddr = kmap_atomic(page, KM_USER0);
+ if (bitmap->flags & BITMAP_HOSTENDIAN)
+ clear_bit(file_page_offset(bitmap, j),
+ paddr);
+ else
+ __clear_bit_le(
+ file_page_offset(bitmap,
+ j),
+ paddr);
+ kunmap_atomic(paddr, KM_USER0);
}
} else
j |= PAGE_COUNTER_MASK;
@@ -1316,16 +1286,12 @@
spin_unlock_irqrestore(&bitmap->lock, flags);
/* now sync the final page */
- if (lastpage != NULL || log != NULL) {
+ if (lastpage != NULL) {
spin_lock_irqsave(&bitmap->lock, flags);
if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) {
clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
spin_unlock_irqrestore(&bitmap->lock, flags);
- if (lastpage)
- write_page(bitmap, lastpage, 0);
- else
- if (log->type->flush(log))
- bitmap->flags |= BITMAP_WRITE_ERROR;
+ write_page(bitmap, lastpage, 0);
} else {
set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
spin_unlock_irqrestore(&bitmap->lock, flags);
@@ -1767,12 +1733,10 @@
BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
if (!file
- && !mddev->bitmap_info.offset
- && !mddev->bitmap_info.log) /* bitmap disabled, nothing to do */
+ && !mddev->bitmap_info.offset) /* bitmap disabled, nothing to do */
return 0;
BUG_ON(file && mddev->bitmap_info.offset);
- BUG_ON(mddev->bitmap_info.offset && mddev->bitmap_info.log);
bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
if (!bitmap)
@@ -1863,6 +1827,7 @@
int bitmap_load(mddev_t *mddev)
{
int err = 0;
+ sector_t start = 0;
sector_t sector = 0;
struct bitmap *bitmap = mddev->bitmap;
@@ -1881,24 +1846,14 @@
}
bitmap_close_sync(bitmap);
- if (mddev->bitmap_info.log) {
- unsigned long i;
- struct dm_dirty_log *log = mddev->bitmap_info.log;
- for (i = 0; i < bitmap->chunks; i++)
- if (!log->type->in_sync(log, i, 1))
- bitmap_set_memory_bits(bitmap,
- (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap),
- 1);
- } else {
- sector_t start = 0;
- if (mddev->degraded == 0
- || bitmap->events_cleared == mddev->events)
- /* no need to keep dirty bits to optimise a
- * re-add of a missing device */
- start = mddev->recovery_cp;
+ if (mddev->degraded == 0
+ || bitmap->events_cleared == mddev->events)
+ /* no need to keep dirty bits to optimise a
+ * re-add of a missing device */
+ start = mddev->recovery_cp;
- err = bitmap_init_from_disk(bitmap, start);
- }
+ err = bitmap_init_from_disk(bitmap, start);
+
if (err)
goto out;
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
index b2a127e..a28f2e5 100644
--- a/drivers/md/bitmap.h
+++ b/drivers/md/bitmap.h
@@ -212,10 +212,6 @@
unsigned long file_pages; /* number of pages in the file */
int last_page_size; /* bytes in the last page */
- unsigned long logattrs; /* used when filemap_attr doesn't exist
- * because we are working with a dirty_log
- */
-
unsigned long flags;
int allclean;
@@ -237,7 +233,6 @@
wait_queue_head_t behind_wait;
struct sysfs_dirent *sysfs_can_clear;
-
};
/* the bitmap API */
diff --git a/drivers/md/md.c b/drivers/md/md.c
index dfc9425..8e221a2 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -215,6 +215,55 @@
}
EXPORT_SYMBOL_GPL(bio_clone_mddev);
+void md_trim_bio(struct bio *bio, int offset, int size)
+{
+ /* 'bio' is a cloned bio which we need to trim to match
+ * the given offset and size.
+ * This requires adjusting bi_sector, bi_size, and bi_io_vec
+ */
+ int i;
+ struct bio_vec *bvec;
+ int sofar = 0;
+
+ size <<= 9;
+ if (offset == 0 && size == bio->bi_size)
+ return;
+
+ bio->bi_sector += offset;
+ bio->bi_size = size;
+ offset <<= 9;
+ clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+
+ while (bio->bi_idx < bio->bi_vcnt &&
+ bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
+ /* remove this whole bio_vec */
+ offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
+ bio->bi_idx++;
+ }
+ if (bio->bi_idx < bio->bi_vcnt) {
+ bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
+ bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
+ }
+ /* avoid any complications with bi_idx being non-zero*/
+ if (bio->bi_idx) {
+ memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
+ (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
+ bio->bi_vcnt -= bio->bi_idx;
+ bio->bi_idx = 0;
+ }
+ /* Make sure vcnt and last bv are not too big */
+ bio_for_each_segment(bvec, bio, i) {
+ if (sofar + bvec->bv_len > size)
+ bvec->bv_len = size - sofar;
+ if (bvec->bv_len == 0) {
+ bio->bi_vcnt = i;
+ break;
+ }
+ sofar += bvec->bv_len;
+ }
+}
+EXPORT_SYMBOL_GPL(md_trim_bio);
+
/*
* We have a system wide 'event count' that is incremented
* on any 'interesting' event, and readers of /proc/mdstat
@@ -757,6 +806,10 @@
rdev->sb_start = 0;
rdev->sectors = 0;
}
+ if (rdev->bb_page) {
+ put_page(rdev->bb_page);
+ rdev->bb_page = NULL;
+ }
}
@@ -1025,7 +1078,7 @@
ret = -EINVAL;
bdevname(rdev->bdev, b);
- sb = (mdp_super_t*)page_address(rdev->sb_page);
+ sb = page_address(rdev->sb_page);
if (sb->md_magic != MD_SB_MAGIC) {
printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
@@ -1054,6 +1107,7 @@
rdev->preferred_minor = sb->md_minor;
rdev->data_offset = 0;
rdev->sb_size = MD_SB_BYTES;
+ rdev->badblocks.shift = -1;
if (sb->level == LEVEL_MULTIPATH)
rdev->desc_nr = -1;
@@ -1064,7 +1118,7 @@
ret = 1;
} else {
__u64 ev1, ev2;
- mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page);
+ mdp_super_t *refsb = page_address(refdev->sb_page);
if (!uuid_equal(refsb, sb)) {
printk(KERN_WARNING "md: %s has different UUID to %s\n",
b, bdevname(refdev->bdev,b2));
@@ -1099,7 +1153,7 @@
static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
{
mdp_disk_t *desc;
- mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
+ mdp_super_t *sb = page_address(rdev->sb_page);
__u64 ev1 = md_event(sb);
rdev->raid_disk = -1;
@@ -1230,7 +1284,7 @@
rdev->sb_size = MD_SB_BYTES;
- sb = (mdp_super_t*)page_address(rdev->sb_page);
+ sb = page_address(rdev->sb_page);
memset(sb, 0, sizeof(*sb));
@@ -1395,6 +1449,8 @@
return cpu_to_le32(csum);
}
+static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
+ int acknowledged);
static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
{
struct mdp_superblock_1 *sb;
@@ -1435,7 +1491,7 @@
if (ret) return ret;
- sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
+ sb = page_address(rdev->sb_page);
if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
sb->major_version != cpu_to_le32(1) ||
@@ -1473,12 +1529,52 @@
else
rdev->desc_nr = le32_to_cpu(sb->dev_number);
+ if (!rdev->bb_page) {
+ rdev->bb_page = alloc_page(GFP_KERNEL);
+ if (!rdev->bb_page)
+ return -ENOMEM;
+ }
+ if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
+ rdev->badblocks.count == 0) {
+ /* need to load the bad block list.
+ * Currently we limit it to one page.
+ */
+ s32 offset;
+ sector_t bb_sector;
+ u64 *bbp;
+ int i;
+ int sectors = le16_to_cpu(sb->bblog_size);
+ if (sectors > (PAGE_SIZE / 512))
+ return -EINVAL;
+ offset = le32_to_cpu(sb->bblog_offset);
+ if (offset == 0)
+ return -EINVAL;
+ bb_sector = (long long)offset;
+ if (!sync_page_io(rdev, bb_sector, sectors << 9,
+ rdev->bb_page, READ, true))
+ return -EIO;
+ bbp = (u64 *)page_address(rdev->bb_page);
+ rdev->badblocks.shift = sb->bblog_shift;
+ for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
+ u64 bb = le64_to_cpu(*bbp);
+ int count = bb & (0x3ff);
+ u64 sector = bb >> 10;
+ sector <<= sb->bblog_shift;
+ count <<= sb->bblog_shift;
+ if (bb + 1 == 0)
+ break;
+ if (md_set_badblocks(&rdev->badblocks,
+ sector, count, 1) == 0)
+ return -EINVAL;
+ }
+ } else if (sb->bblog_offset == 0)
+ rdev->badblocks.shift = -1;
+
if (!refdev) {
ret = 1;
} else {
__u64 ev1, ev2;
- struct mdp_superblock_1 *refsb =
- (struct mdp_superblock_1*)page_address(refdev->sb_page);
+ struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
sb->level != refsb->level ||
@@ -1513,7 +1609,7 @@
static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
{
- struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
+ struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
__u64 ev1 = le64_to_cpu(sb->events);
rdev->raid_disk = -1;
@@ -1619,13 +1715,12 @@
int max_dev, i;
/* make rdev->sb match mddev and rdev data. */
- sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
+ sb = page_address(rdev->sb_page);
sb->feature_map = 0;
sb->pad0 = 0;
sb->recovery_offset = cpu_to_le64(0);
memset(sb->pad1, 0, sizeof(sb->pad1));
- memset(sb->pad2, 0, sizeof(sb->pad2));
memset(sb->pad3, 0, sizeof(sb->pad3));
sb->utime = cpu_to_le64((__u64)mddev->utime);
@@ -1665,6 +1760,40 @@
sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
}
+ if (rdev->badblocks.count == 0)
+ /* Nothing to do for bad blocks*/ ;
+ else if (sb->bblog_offset == 0)
+ /* Cannot record bad blocks on this device */
+ md_error(mddev, rdev);
+ else {
+ struct badblocks *bb = &rdev->badblocks;
+ u64 *bbp = (u64 *)page_address(rdev->bb_page);
+ u64 *p = bb->page;
+ sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
+ if (bb->changed) {
+ unsigned seq;
+
+retry:
+ seq = read_seqbegin(&bb->lock);
+
+ memset(bbp, 0xff, PAGE_SIZE);
+
+ for (i = 0 ; i < bb->count ; i++) {
+ u64 internal_bb = *p++;
+ u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
+ | BB_LEN(internal_bb));
+ *bbp++ = cpu_to_le64(store_bb);
+ }
+ if (read_seqretry(&bb->lock, seq))
+ goto retry;
+
+ bb->sector = (rdev->sb_start +
+ (int)le32_to_cpu(sb->bblog_offset));
+ bb->size = le16_to_cpu(sb->bblog_size);
+ bb->changed = 0;
+ }
+ }
+
max_dev = 0;
list_for_each_entry(rdev2, &mddev->disks, same_set)
if (rdev2->desc_nr+1 > max_dev)
@@ -1724,7 +1853,7 @@
num_sectors = max_sectors;
rdev->sb_start = sb_start;
}
- sb = (struct mdp_superblock_1 *) page_address(rdev->sb_page);
+ sb = page_address(rdev->sb_page);
sb->data_size = cpu_to_le64(num_sectors);
sb->super_offset = rdev->sb_start;
sb->sb_csum = calc_sb_1_csum(sb);
@@ -1922,7 +2051,7 @@
bd_link_disk_holder(rdev->bdev, mddev->gendisk);
/* May as well allow recovery to be retried once */
- mddev->recovery_disabled = 0;
+ mddev->recovery_disabled++;
return 0;
@@ -1953,6 +2082,9 @@
sysfs_remove_link(&rdev->kobj, "block");
sysfs_put(rdev->sysfs_state);
rdev->sysfs_state = NULL;
+ kfree(rdev->badblocks.page);
+ rdev->badblocks.count = 0;
+ rdev->badblocks.page = NULL;
/* We need to delay this, otherwise we can deadlock when
* writing to 'remove' to "dev/state". We also need
* to delay it due to rcu usage.
@@ -2127,10 +2259,10 @@
printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
switch (major_version) {
case 0:
- print_sb_90((mdp_super_t*)page_address(rdev->sb_page));
+ print_sb_90(page_address(rdev->sb_page));
break;
case 1:
- print_sb_1((struct mdp_superblock_1 *)page_address(rdev->sb_page));
+ print_sb_1(page_address(rdev->sb_page));
break;
}
} else
@@ -2194,6 +2326,7 @@
mdk_rdev_t *rdev;
int sync_req;
int nospares = 0;
+ int any_badblocks_changed = 0;
repeat:
/* First make sure individual recovery_offsets are correct */
@@ -2208,8 +2341,18 @@
if (!mddev->persistent) {
clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
clear_bit(MD_CHANGE_DEVS, &mddev->flags);
- if (!mddev->external)
+ if (!mddev->external) {
clear_bit(MD_CHANGE_PENDING, &mddev->flags);
+ list_for_each_entry(rdev, &mddev->disks, same_set) {
+ if (rdev->badblocks.changed) {
+ md_ack_all_badblocks(&rdev->badblocks);
+ md_error(mddev, rdev);
+ }
+ clear_bit(Blocked, &rdev->flags);
+ clear_bit(BlockedBadBlocks, &rdev->flags);
+ wake_up(&rdev->blocked_wait);
+ }
+ }
wake_up(&mddev->sb_wait);
return;
}
@@ -2265,6 +2408,14 @@
MD_BUG();
mddev->events --;
}
+
+ list_for_each_entry(rdev, &mddev->disks, same_set) {
+ if (rdev->badblocks.changed)
+ any_badblocks_changed++;
+ if (test_bit(Faulty, &rdev->flags))
+ set_bit(FaultRecorded, &rdev->flags);
+ }
+
sync_sbs(mddev, nospares);
spin_unlock_irq(&mddev->write_lock);
@@ -2290,6 +2441,13 @@
bdevname(rdev->bdev,b),
(unsigned long long)rdev->sb_start);
rdev->sb_events = mddev->events;
+ if (rdev->badblocks.size) {
+ md_super_write(mddev, rdev,
+ rdev->badblocks.sector,
+ rdev->badblocks.size << 9,
+ rdev->bb_page);
+ rdev->badblocks.size = 0;
+ }
} else
dprintk(")\n");
@@ -2313,6 +2471,15 @@
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
+ list_for_each_entry(rdev, &mddev->disks, same_set) {
+ if (test_and_clear_bit(FaultRecorded, &rdev->flags))
+ clear_bit(Blocked, &rdev->flags);
+
+ if (any_badblocks_changed)
+ md_ack_all_badblocks(&rdev->badblocks);
+ clear_bit(BlockedBadBlocks, &rdev->flags);
+ wake_up(&rdev->blocked_wait);
+ }
}
/* words written to sysfs files may, or may not, be \n terminated.
@@ -2347,7 +2514,8 @@
char *sep = "";
size_t len = 0;
- if (test_bit(Faulty, &rdev->flags)) {
+ if (test_bit(Faulty, &rdev->flags) ||
+ rdev->badblocks.unacked_exist) {
len+= sprintf(page+len, "%sfaulty",sep);
sep = ",";
}
@@ -2359,7 +2527,8 @@
len += sprintf(page+len, "%swrite_mostly",sep);
sep = ",";
}
- if (test_bit(Blocked, &rdev->flags)) {
+ if (test_bit(Blocked, &rdev->flags) ||
+ rdev->badblocks.unacked_exist) {
len += sprintf(page+len, "%sblocked", sep);
sep = ",";
}
@@ -2368,6 +2537,10 @@
len += sprintf(page+len, "%sspare", sep);
sep = ",";
}
+ if (test_bit(WriteErrorSeen, &rdev->flags)) {
+ len += sprintf(page+len, "%swrite_error", sep);
+ sep = ",";
+ }
return len+sprintf(page+len, "\n");
}
@@ -2375,13 +2548,15 @@
state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
{
/* can write
- * faulty - simulates and error
+ * faulty - simulates an error
* remove - disconnects the device
* writemostly - sets write_mostly
* -writemostly - clears write_mostly
- * blocked - sets the Blocked flag
- * -blocked - clears the Blocked flag
+ * blocked - sets the Blocked flags
+ * -blocked - clears the Blocked and possibly simulates an error
* insync - sets Insync providing device isn't active
+ * write_error - sets WriteErrorSeen
+ * -write_error - clears WriteErrorSeen
*/
int err = -EINVAL;
if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
@@ -2408,7 +2583,15 @@
set_bit(Blocked, &rdev->flags);
err = 0;
} else if (cmd_match(buf, "-blocked")) {
+ if (!test_bit(Faulty, &rdev->flags) &&
+ test_bit(BlockedBadBlocks, &rdev->flags)) {
+ /* metadata handler doesn't understand badblocks,
+ * so we need to fail the device
+ */
+ md_error(rdev->mddev, rdev);
+ }
clear_bit(Blocked, &rdev->flags);
+ clear_bit(BlockedBadBlocks, &rdev->flags);
wake_up(&rdev->blocked_wait);
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
md_wakeup_thread(rdev->mddev->thread);
@@ -2417,6 +2600,12 @@
} else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
set_bit(In_sync, &rdev->flags);
err = 0;
+ } else if (cmd_match(buf, "write_error")) {
+ set_bit(WriteErrorSeen, &rdev->flags);
+ err = 0;
+ } else if (cmd_match(buf, "-write_error")) {
+ clear_bit(WriteErrorSeen, &rdev->flags);
+ err = 0;
}
if (!err)
sysfs_notify_dirent_safe(rdev->sysfs_state);
@@ -2459,7 +2648,6 @@
{
char *e;
int err;
- char nm[20];
int slot = simple_strtoul(buf, &e, 10);
if (strncmp(buf, "none", 4)==0)
slot = -1;
@@ -2482,8 +2670,7 @@
hot_remove_disk(rdev->mddev, rdev->raid_disk);
if (err)
return err;
- sprintf(nm, "rd%d", rdev->raid_disk);
- sysfs_remove_link(&rdev->mddev->kobj, nm);
+ sysfs_unlink_rdev(rdev->mddev, rdev);
rdev->raid_disk = -1;
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
md_wakeup_thread(rdev->mddev->thread);
@@ -2522,8 +2709,7 @@
return err;
} else
sysfs_notify_dirent_safe(rdev->sysfs_state);
- sprintf(nm, "rd%d", rdev->raid_disk);
- if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm))
+ if (sysfs_link_rdev(rdev->mddev, rdev))
/* failure here is OK */;
/* don't wakeup anyone, leave that to userspace. */
} else {
@@ -2712,6 +2898,39 @@
static struct rdev_sysfs_entry rdev_recovery_start =
__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
+
+static ssize_t
+badblocks_show(struct badblocks *bb, char *page, int unack);
+static ssize_t
+badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack);
+
+static ssize_t bb_show(mdk_rdev_t *rdev, char *page)
+{
+ return badblocks_show(&rdev->badblocks, page, 0);
+}
+static ssize_t bb_store(mdk_rdev_t *rdev, const char *page, size_t len)
+{
+ int rv = badblocks_store(&rdev->badblocks, page, len, 0);
+ /* Maybe that ack was all we needed */
+ if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
+ wake_up(&rdev->blocked_wait);
+ return rv;
+}
+static struct rdev_sysfs_entry rdev_bad_blocks =
+__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
+
+
+static ssize_t ubb_show(mdk_rdev_t *rdev, char *page)
+{
+ return badblocks_show(&rdev->badblocks, page, 1);
+}
+static ssize_t ubb_store(mdk_rdev_t *rdev, const char *page, size_t len)
+{
+ return badblocks_store(&rdev->badblocks, page, len, 1);
+}
+static struct rdev_sysfs_entry rdev_unack_bad_blocks =
+__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
+
static struct attribute *rdev_default_attrs[] = {
&rdev_state.attr,
&rdev_errors.attr,
@@ -2719,6 +2938,8 @@
&rdev_offset.attr,
&rdev_size.attr,
&rdev_recovery_start.attr,
+ &rdev_bad_blocks.attr,
+ &rdev_unack_bad_blocks.attr,
NULL,
};
static ssize_t
@@ -2782,7 +3003,7 @@
.default_attrs = rdev_default_attrs,
};
-void md_rdev_init(mdk_rdev_t *rdev)
+int md_rdev_init(mdk_rdev_t *rdev)
{
rdev->desc_nr = -1;
rdev->saved_raid_disk = -1;
@@ -2792,12 +3013,27 @@
rdev->sb_events = 0;
rdev->last_read_error.tv_sec = 0;
rdev->last_read_error.tv_nsec = 0;
+ rdev->sb_loaded = 0;
+ rdev->bb_page = NULL;
atomic_set(&rdev->nr_pending, 0);
atomic_set(&rdev->read_errors, 0);
atomic_set(&rdev->corrected_errors, 0);
INIT_LIST_HEAD(&rdev->same_set);
init_waitqueue_head(&rdev->blocked_wait);
+
+ /* Add space to store bad block list.
+ * This reserves the space even on arrays where it cannot
+ * be used - I wonder if that matters
+ */
+ rdev->badblocks.count = 0;
+ rdev->badblocks.shift = 0;
+ rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ seqlock_init(&rdev->badblocks.lock);
+ if (rdev->badblocks.page == NULL)
+ return -ENOMEM;
+
+ return 0;
}
EXPORT_SYMBOL_GPL(md_rdev_init);
/*
@@ -2823,8 +3059,11 @@
return ERR_PTR(-ENOMEM);
}
- md_rdev_init(rdev);
- if ((err = alloc_disk_sb(rdev)))
+ err = md_rdev_init(rdev);
+ if (err)
+ goto abort_free;
+ err = alloc_disk_sb(rdev);
+ if (err)
goto abort_free;
err = lock_rdev(rdev, newdev, super_format == -2);
@@ -2860,15 +3099,17 @@
goto abort_free;
}
}
+ if (super_format == -1)
+ /* hot-add for 0.90, or non-persistent: so no badblocks */
+ rdev->badblocks.shift = -1;
return rdev;
abort_free:
- if (rdev->sb_page) {
- if (rdev->bdev)
- unlock_rdev(rdev);
- free_disk_sb(rdev);
- }
+ if (rdev->bdev)
+ unlock_rdev(rdev);
+ free_disk_sb(rdev);
+ kfree(rdev->badblocks.page);
kfree(rdev);
return ERR_PTR(err);
}
@@ -3149,15 +3390,13 @@
}
list_for_each_entry(rdev, &mddev->disks, same_set) {
- char nm[20];
if (rdev->raid_disk < 0)
continue;
if (rdev->new_raid_disk >= mddev->raid_disks)
rdev->new_raid_disk = -1;
if (rdev->new_raid_disk == rdev->raid_disk)
continue;
- sprintf(nm, "rd%d", rdev->raid_disk);
- sysfs_remove_link(&mddev->kobj, nm);
+ sysfs_unlink_rdev(mddev, rdev);
}
list_for_each_entry(rdev, &mddev->disks, same_set) {
if (rdev->raid_disk < 0)
@@ -3168,11 +3407,10 @@
if (rdev->raid_disk < 0)
clear_bit(In_sync, &rdev->flags);
else {
- char nm[20];
- sprintf(nm, "rd%d", rdev->raid_disk);
- if(sysfs_create_link(&mddev->kobj, &rdev->kobj, nm))
- printk("md: cannot register %s for %s after level change\n",
- nm, mdname(mddev));
+ if (sysfs_link_rdev(mddev, rdev))
+ printk(KERN_WARNING "md: cannot register rd%d"
+ " for %s after level change\n",
+ rdev->raid_disk, mdname(mddev));
}
}
@@ -4504,7 +4742,8 @@
}
if (mddev->bio_set == NULL)
- mddev->bio_set = bioset_create(BIO_POOL_SIZE, sizeof(mddev));
+ mddev->bio_set = bioset_create(BIO_POOL_SIZE,
+ sizeof(mddev_t *));
spin_lock(&pers_lock);
pers = find_pers(mddev->level, mddev->clevel);
@@ -4621,12 +4860,9 @@
smp_wmb();
mddev->ready = 1;
list_for_each_entry(rdev, &mddev->disks, same_set)
- if (rdev->raid_disk >= 0) {
- char nm[20];
- sprintf(nm, "rd%d", rdev->raid_disk);
- if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm))
+ if (rdev->raid_disk >= 0)
+ if (sysfs_link_rdev(mddev, rdev))
/* failure here is OK */;
- }
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -4854,11 +5090,8 @@
sysfs_notify_dirent_safe(mddev->sysfs_state);
list_for_each_entry(rdev, &mddev->disks, same_set)
- if (rdev->raid_disk >= 0) {
- char nm[20];
- sprintf(nm, "rd%d", rdev->raid_disk);
- sysfs_remove_link(&mddev->kobj, nm);
- }
+ if (rdev->raid_disk >= 0)
+ sysfs_unlink_rdev(mddev, rdev);
set_capacity(disk, 0);
mutex_unlock(&mddev->open_mutex);
@@ -6198,18 +6431,7 @@
if (!rdev || test_bit(Faulty, &rdev->flags))
return;
- if (mddev->external)
- set_bit(Blocked, &rdev->flags);
-/*
- dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n",
- mdname(mddev),
- MAJOR(rdev->bdev->bd_dev), MINOR(rdev->bdev->bd_dev),
- __builtin_return_address(0),__builtin_return_address(1),
- __builtin_return_address(2),__builtin_return_address(3));
-*/
- if (!mddev->pers)
- return;
- if (!mddev->pers->error_handler)
+ if (!mddev->pers || !mddev->pers->error_handler)
return;
mddev->pers->error_handler(mddev,rdev);
if (mddev->degraded)
@@ -6933,11 +7155,14 @@
atomic_add(sectors, &mddev->recovery_active);
}
+ if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+ break;
+
j += sectors;
if (j>1) mddev->curr_resync = j;
mddev->curr_mark_cnt = io_sectors;
if (last_check == 0)
- /* this is the earliers that rebuilt will be
+ /* this is the earliest that rebuild will be
* visible in /proc/mdstat
*/
md_new_event(mddev);
@@ -6946,10 +7171,6 @@
continue;
last_check = io_sectors;
-
- if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
- break;
-
repeat:
if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
/* step marks */
@@ -7067,29 +7288,23 @@
atomic_read(&rdev->nr_pending)==0) {
if (mddev->pers->hot_remove_disk(
mddev, rdev->raid_disk)==0) {
- char nm[20];
- sprintf(nm,"rd%d", rdev->raid_disk);
- sysfs_remove_link(&mddev->kobj, nm);
+ sysfs_unlink_rdev(mddev, rdev);
rdev->raid_disk = -1;
}
}
- if (mddev->degraded && !mddev->recovery_disabled) {
+ if (mddev->degraded) {
list_for_each_entry(rdev, &mddev->disks, same_set) {
if (rdev->raid_disk >= 0 &&
!test_bit(In_sync, &rdev->flags) &&
- !test_bit(Faulty, &rdev->flags) &&
- !test_bit(Blocked, &rdev->flags))
+ !test_bit(Faulty, &rdev->flags))
spares++;
if (rdev->raid_disk < 0
&& !test_bit(Faulty, &rdev->flags)) {
rdev->recovery_offset = 0;
if (mddev->pers->
hot_add_disk(mddev, rdev) == 0) {
- char nm[20];
- sprintf(nm, "rd%d", rdev->raid_disk);
- if (sysfs_create_link(&mddev->kobj,
- &rdev->kobj, nm))
+ if (sysfs_link_rdev(mddev, rdev))
/* failure here is OK */;
spares++;
md_new_event(mddev);
@@ -7138,6 +7353,8 @@
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
sysfs_notify_dirent_safe(mddev->sysfs_action);
md_new_event(mddev);
+ if (mddev->event_work.func)
+ queue_work(md_misc_wq, &mddev->event_work);
}
/*
@@ -7170,9 +7387,6 @@
if (mddev->bitmap)
bitmap_daemon_work(mddev);
- if (mddev->ro)
- return;
-
if (signal_pending(current)) {
if (mddev->pers->sync_request && !mddev->external) {
printk(KERN_INFO "md: %s in immediate safe mode\n",
@@ -7209,9 +7423,7 @@
atomic_read(&rdev->nr_pending)==0) {
if (mddev->pers->hot_remove_disk(
mddev, rdev->raid_disk)==0) {
- char nm[20];
- sprintf(nm,"rd%d", rdev->raid_disk);
- sysfs_remove_link(&mddev->kobj, nm);
+ sysfs_unlink_rdev(mddev, rdev);
rdev->raid_disk = -1;
}
}
@@ -7331,12 +7543,499 @@
{
sysfs_notify_dirent_safe(rdev->sysfs_state);
wait_event_timeout(rdev->blocked_wait,
- !test_bit(Blocked, &rdev->flags),
+ !test_bit(Blocked, &rdev->flags) &&
+ !test_bit(BlockedBadBlocks, &rdev->flags),
msecs_to_jiffies(5000));
rdev_dec_pending(rdev, mddev);
}
EXPORT_SYMBOL(md_wait_for_blocked_rdev);
+
+/* Bad block management.
+ * We can record which blocks on each device are 'bad' and so just
+ * fail those blocks, or that stripe, rather than the whole device.
+ * Entries in the bad-block table are 64bits wide. This comprises:
+ * Length of bad-range, in sectors: 0-511 for lengths 1-512
+ * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
+ * A 'shift' can be set so that larger blocks are tracked and
+ * consequently larger devices can be covered.
+ * 'Acknowledged' flag - 1 bit. - the most significant bit.
+ *
+ * Locking of the bad-block table uses a seqlock so md_is_badblock
+ * might need to retry if it is very unlucky.
+ * We will sometimes want to check for bad blocks in a bi_end_io function,
+ * so we use the write_seqlock_irq variant.
+ *
+ * When looking for a bad block we specify a range and want to
+ * know if any block in the range is bad. So we binary-search
+ * to the last range that starts at-or-before the given endpoint,
+ * (or "before the sector after the target range")
+ * then see if it ends after the given start.
+ * We return
+ * 0 if there are no known bad blocks in the range
+ * 1 if there are known bad block which are all acknowledged
+ * -1 if there are bad blocks which have not yet been acknowledged in metadata.
+ * plus the start/length of the first bad section we overlap.
+ */
+int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
+ sector_t *first_bad, int *bad_sectors)
+{
+ int hi;
+ int lo = 0;
+ u64 *p = bb->page;
+ int rv = 0;
+ sector_t target = s + sectors;
+ unsigned seq;
+
+ if (bb->shift > 0) {
+ /* round the start down, and the end up */
+ s >>= bb->shift;
+ target += (1<<bb->shift) - 1;
+ target >>= bb->shift;
+ sectors = target - s;
+ }
+ /* 'target' is now the first block after the bad range */
+
+retry:
+ seq = read_seqbegin(&bb->lock);
+
+ hi = bb->count;
+
+ /* Binary search between lo and hi for 'target'
+ * i.e. for the last range that starts before 'target'
+ */
+ /* INVARIANT: ranges before 'lo' and at-or-after 'hi'
+ * are known not to be the last range before target.
+ * VARIANT: hi-lo is the number of possible
+ * ranges, and decreases until it reaches 1
+ */
+ while (hi - lo > 1) {
+ int mid = (lo + hi) / 2;
+ sector_t a = BB_OFFSET(p[mid]);
+ if (a < target)
+ /* This could still be the one, earlier ranges
+ * could not. */
+ lo = mid;
+ else
+ /* This and later ranges are definitely out. */
+ hi = mid;
+ }
+ /* 'lo' might be the last that started before target, but 'hi' isn't */
+ if (hi > lo) {
+ /* need to check all range that end after 's' to see if
+ * any are unacknowledged.
+ */
+ while (lo >= 0 &&
+ BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
+ if (BB_OFFSET(p[lo]) < target) {
+ /* starts before the end, and finishes after
+ * the start, so they must overlap
+ */
+ if (rv != -1 && BB_ACK(p[lo]))
+ rv = 1;
+ else
+ rv = -1;
+ *first_bad = BB_OFFSET(p[lo]);
+ *bad_sectors = BB_LEN(p[lo]);
+ }
+ lo--;
+ }
+ }
+
+ if (read_seqretry(&bb->lock, seq))
+ goto retry;
+
+ return rv;
+}
+EXPORT_SYMBOL_GPL(md_is_badblock);
+
+/*
+ * Add a range of bad blocks to the table.
+ * This might extend the table, or might contract it
+ * if two adjacent ranges can be merged.
+ * We binary-search to find the 'insertion' point, then
+ * decide how best to handle it.
+ */
+static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
+ int acknowledged)
+{
+ u64 *p;
+ int lo, hi;
+ int rv = 1;
+
+ if (bb->shift < 0)
+ /* badblocks are disabled */
+ return 0;
+
+ if (bb->shift) {
+ /* round the start down, and the end up */
+ sector_t next = s + sectors;
+ s >>= bb->shift;
+ next += (1<<bb->shift) - 1;
+ next >>= bb->shift;
+ sectors = next - s;
+ }
+
+ write_seqlock_irq(&bb->lock);
+
+ p = bb->page;
+ lo = 0;
+ hi = bb->count;
+ /* Find the last range that starts at-or-before 's' */
+ while (hi - lo > 1) {
+ int mid = (lo + hi) / 2;
+ sector_t a = BB_OFFSET(p[mid]);
+ if (a <= s)
+ lo = mid;
+ else
+ hi = mid;
+ }
+ if (hi > lo && BB_OFFSET(p[lo]) > s)
+ hi = lo;
+
+ if (hi > lo) {
+ /* we found a range that might merge with the start
+ * of our new range
+ */
+ sector_t a = BB_OFFSET(p[lo]);
+ sector_t e = a + BB_LEN(p[lo]);
+ int ack = BB_ACK(p[lo]);
+ if (e >= s) {
+ /* Yes, we can merge with a previous range */
+ if (s == a && s + sectors >= e)
+ /* new range covers old */
+ ack = acknowledged;
+ else
+ ack = ack && acknowledged;
+
+ if (e < s + sectors)
+ e = s + sectors;
+ if (e - a <= BB_MAX_LEN) {
+ p[lo] = BB_MAKE(a, e-a, ack);
+ s = e;
+ } else {
+ /* does not all fit in one range,
+ * make p[lo] maximal
+ */
+ if (BB_LEN(p[lo]) != BB_MAX_LEN)
+ p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
+ s = a + BB_MAX_LEN;
+ }
+ sectors = e - s;
+ }
+ }
+ if (sectors && hi < bb->count) {
+ /* 'hi' points to the first range that starts after 's'.
+ * Maybe we can merge with the start of that range */
+ sector_t a = BB_OFFSET(p[hi]);
+ sector_t e = a + BB_LEN(p[hi]);
+ int ack = BB_ACK(p[hi]);
+ if (a <= s + sectors) {
+ /* merging is possible */
+ if (e <= s + sectors) {
+ /* full overlap */
+ e = s + sectors;
+ ack = acknowledged;
+ } else
+ ack = ack && acknowledged;
+
+ a = s;
+ if (e - a <= BB_MAX_LEN) {
+ p[hi] = BB_MAKE(a, e-a, ack);
+ s = e;
+ } else {
+ p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
+ s = a + BB_MAX_LEN;
+ }
+ sectors = e - s;
+ lo = hi;
+ hi++;
+ }
+ }
+ if (sectors == 0 && hi < bb->count) {
+ /* we might be able to combine lo and hi */
+ /* Note: 's' is at the end of 'lo' */
+ sector_t a = BB_OFFSET(p[hi]);
+ int lolen = BB_LEN(p[lo]);
+ int hilen = BB_LEN(p[hi]);
+ int newlen = lolen + hilen - (s - a);
+ if (s >= a && newlen < BB_MAX_LEN) {
+ /* yes, we can combine them */
+ int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
+ p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
+ memmove(p + hi, p + hi + 1,
+ (bb->count - hi - 1) * 8);
+ bb->count--;
+ }
+ }
+ while (sectors) {
+ /* didn't merge (it all).
+ * Need to add a range just before 'hi' */
+ if (bb->count >= MD_MAX_BADBLOCKS) {
+ /* No room for more */
+ rv = 0;
+ break;
+ } else {
+ int this_sectors = sectors;
+ memmove(p + hi + 1, p + hi,
+ (bb->count - hi) * 8);
+ bb->count++;
+
+ if (this_sectors > BB_MAX_LEN)
+ this_sectors = BB_MAX_LEN;
+ p[hi] = BB_MAKE(s, this_sectors, acknowledged);
+ sectors -= this_sectors;
+ s += this_sectors;
+ }
+ }
+
+ bb->changed = 1;
+ if (!acknowledged)
+ bb->unacked_exist = 1;
+ write_sequnlock_irq(&bb->lock);
+
+ return rv;
+}
+
+int rdev_set_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors,
+ int acknowledged)
+{
+ int rv = md_set_badblocks(&rdev->badblocks,
+ s + rdev->data_offset, sectors, acknowledged);
+ if (rv) {
+ /* Make sure they get written out promptly */
+ set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
+ md_wakeup_thread(rdev->mddev->thread);
+ }
+ return rv;
+}
+EXPORT_SYMBOL_GPL(rdev_set_badblocks);
+
+/*
+ * Remove a range of bad blocks from the table.
+ * This may involve extending the table if we spilt a region,
+ * but it must not fail. So if the table becomes full, we just
+ * drop the remove request.
+ */
+static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors)
+{
+ u64 *p;
+ int lo, hi;
+ sector_t target = s + sectors;
+ int rv = 0;
+
+ if (bb->shift > 0) {
+ /* When clearing we round the start up and the end down.
+ * This should not matter as the shift should align with
+ * the block size and no rounding should ever be needed.
+ * However it is better the think a block is bad when it
+ * isn't than to think a block is not bad when it is.
+ */
+ s += (1<<bb->shift) - 1;
+ s >>= bb->shift;
+ target >>= bb->shift;
+ sectors = target - s;
+ }
+
+ write_seqlock_irq(&bb->lock);
+
+ p = bb->page;
+ lo = 0;
+ hi = bb->count;
+ /* Find the last range that starts before 'target' */
+ while (hi - lo > 1) {
+ int mid = (lo + hi) / 2;
+ sector_t a = BB_OFFSET(p[mid]);
+ if (a < target)
+ lo = mid;
+ else
+ hi = mid;
+ }
+ if (hi > lo) {
+ /* p[lo] is the last range that could overlap the
+ * current range. Earlier ranges could also overlap,
+ * but only this one can overlap the end of the range.
+ */
+ if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
+ /* Partial overlap, leave the tail of this range */
+ int ack = BB_ACK(p[lo]);
+ sector_t a = BB_OFFSET(p[lo]);
+ sector_t end = a + BB_LEN(p[lo]);
+
+ if (a < s) {
+ /* we need to split this range */
+ if (bb->count >= MD_MAX_BADBLOCKS) {
+ rv = 0;
+ goto out;
+ }
+ memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
+ bb->count++;
+ p[lo] = BB_MAKE(a, s-a, ack);
+ lo++;
+ }
+ p[lo] = BB_MAKE(target, end - target, ack);
+ /* there is no longer an overlap */
+ hi = lo;
+ lo--;
+ }
+ while (lo >= 0 &&
+ BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
+ /* This range does overlap */
+ if (BB_OFFSET(p[lo]) < s) {
+ /* Keep the early parts of this range. */
+ int ack = BB_ACK(p[lo]);
+ sector_t start = BB_OFFSET(p[lo]);
+ p[lo] = BB_MAKE(start, s - start, ack);
+ /* now low doesn't overlap, so.. */
+ break;
+ }
+ lo--;
+ }
+ /* 'lo' is strictly before, 'hi' is strictly after,
+ * anything between needs to be discarded
+ */
+ if (hi - lo > 1) {
+ memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
+ bb->count -= (hi - lo - 1);
+ }
+ }
+
+ bb->changed = 1;
+out:
+ write_sequnlock_irq(&bb->lock);
+ return rv;
+}
+
+int rdev_clear_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors)
+{
+ return md_clear_badblocks(&rdev->badblocks,
+ s + rdev->data_offset,
+ sectors);
+}
+EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
+
+/*
+ * Acknowledge all bad blocks in a list.
+ * This only succeeds if ->changed is clear. It is used by
+ * in-kernel metadata updates
+ */
+void md_ack_all_badblocks(struct badblocks *bb)
+{
+ if (bb->page == NULL || bb->changed)
+ /* no point even trying */
+ return;
+ write_seqlock_irq(&bb->lock);
+
+ if (bb->changed == 0) {
+ u64 *p = bb->page;
+ int i;
+ for (i = 0; i < bb->count ; i++) {
+ if (!BB_ACK(p[i])) {
+ sector_t start = BB_OFFSET(p[i]);
+ int len = BB_LEN(p[i]);
+ p[i] = BB_MAKE(start, len, 1);
+ }
+ }
+ bb->unacked_exist = 0;
+ }
+ write_sequnlock_irq(&bb->lock);
+}
+EXPORT_SYMBOL_GPL(md_ack_all_badblocks);
+
+/* sysfs access to bad-blocks list.
+ * We present two files.
+ * 'bad-blocks' lists sector numbers and lengths of ranges that
+ * are recorded as bad. The list is truncated to fit within
+ * the one-page limit of sysfs.
+ * Writing "sector length" to this file adds an acknowledged
+ * bad block list.
+ * 'unacknowledged-bad-blocks' lists bad blocks that have not yet
+ * been acknowledged. Writing to this file adds bad blocks
+ * without acknowledging them. This is largely for testing.
+ */
+
+static ssize_t
+badblocks_show(struct badblocks *bb, char *page, int unack)
+{
+ size_t len;
+ int i;
+ u64 *p = bb->page;
+ unsigned seq;
+
+ if (bb->shift < 0)
+ return 0;
+
+retry:
+ seq = read_seqbegin(&bb->lock);
+
+ len = 0;
+ i = 0;
+
+ while (len < PAGE_SIZE && i < bb->count) {
+ sector_t s = BB_OFFSET(p[i]);
+ unsigned int length = BB_LEN(p[i]);
+ int ack = BB_ACK(p[i]);
+ i++;
+
+ if (unack && ack)
+ continue;
+
+ len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
+ (unsigned long long)s << bb->shift,
+ length << bb->shift);
+ }
+ if (unack && len == 0)
+ bb->unacked_exist = 0;
+
+ if (read_seqretry(&bb->lock, seq))
+ goto retry;
+
+ return len;
+}
+
+#define DO_DEBUG 1
+
+static ssize_t
+badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack)
+{
+ unsigned long long sector;
+ int length;
+ char newline;
+#ifdef DO_DEBUG
+ /* Allow clearing via sysfs *only* for testing/debugging.
+ * Normally only a successful write may clear a badblock
+ */
+ int clear = 0;
+ if (page[0] == '-') {
+ clear = 1;
+ page++;
+ }
+#endif /* DO_DEBUG */
+
+ switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) {
+ case 3:
+ if (newline != '\n')
+ return -EINVAL;
+ case 2:
+ if (length <= 0)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+#ifdef DO_DEBUG
+ if (clear) {
+ md_clear_badblocks(bb, sector, length);
+ return len;
+ }
+#endif /* DO_DEBUG */
+ if (md_set_badblocks(bb, sector, length, !unack))
+ return len;
+ else
+ return -ENOSPC;
+}
+
static int md_notify_reboot(struct notifier_block *this,
unsigned long code, void *x)
{
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 1c26c7a..1e586bb 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -29,6 +29,13 @@
typedef struct mddev_s mddev_t;
typedef struct mdk_rdev_s mdk_rdev_t;
+/* Bad block numbers are stored sorted in a single page.
+ * 64bits is used for each block or extent.
+ * 54 bits are sector number, 9 bits are extent size,
+ * 1 bit is an 'acknowledged' flag.
+ */
+#define MD_MAX_BADBLOCKS (PAGE_SIZE/8)
+
/*
* MD's 'extended' device
*/
@@ -48,7 +55,7 @@
struct block_device *meta_bdev;
struct block_device *bdev; /* block device handle */
- struct page *sb_page;
+ struct page *sb_page, *bb_page;
int sb_loaded;
__u64 sb_events;
sector_t data_offset; /* start of data in array */
@@ -74,9 +81,29 @@
#define In_sync 2 /* device is in_sync with rest of array */
#define WriteMostly 4 /* Avoid reading if at all possible */
#define AutoDetected 7 /* added by auto-detect */
-#define Blocked 8 /* An error occurred on an externally
- * managed array, don't allow writes
+#define Blocked 8 /* An error occurred but has not yet
+ * been acknowledged by the metadata
+ * handler, so don't allow writes
* until it is cleared */
+#define WriteErrorSeen 9 /* A write error has been seen on this
+ * device
+ */
+#define FaultRecorded 10 /* Intermediate state for clearing
+ * Blocked. The Fault is/will-be
+ * recorded in the metadata, but that
+ * metadata hasn't been stored safely
+ * on disk yet.
+ */
+#define BlockedBadBlocks 11 /* A writer is blocked because they
+ * found an unacknowledged bad-block.
+ * This can safely be cleared at any
+ * time, and the writer will re-check.
+ * It may be set at any time, and at
+ * worst the writer will timeout and
+ * re-check. So setting it as
+ * accurately as possible is good, but
+ * not absolutely critical.
+ */
wait_queue_head_t blocked_wait;
int desc_nr; /* descriptor index in the superblock */
@@ -111,8 +138,54 @@
struct sysfs_dirent *sysfs_state; /* handle for 'state'
* sysfs entry */
+
+ struct badblocks {
+ int count; /* count of bad blocks */
+ int unacked_exist; /* there probably are unacknowledged
+ * bad blocks. This is only cleared
+ * when a read discovers none
+ */
+ int shift; /* shift from sectors to block size
+ * a -ve shift means badblocks are
+ * disabled.*/
+ u64 *page; /* badblock list */
+ int changed;
+ seqlock_t lock;
+
+ sector_t sector;
+ sector_t size; /* in sectors */
+ } badblocks;
};
+#define BB_LEN_MASK (0x00000000000001FFULL)
+#define BB_OFFSET_MASK (0x7FFFFFFFFFFFFE00ULL)
+#define BB_ACK_MASK (0x8000000000000000ULL)
+#define BB_MAX_LEN 512
+#define BB_OFFSET(x) (((x) & BB_OFFSET_MASK) >> 9)
+#define BB_LEN(x) (((x) & BB_LEN_MASK) + 1)
+#define BB_ACK(x) (!!((x) & BB_ACK_MASK))
+#define BB_MAKE(a, l, ack) (((a)<<9) | ((l)-1) | ((u64)(!!(ack)) << 63))
+
+extern int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
+ sector_t *first_bad, int *bad_sectors);
+static inline int is_badblock(mdk_rdev_t *rdev, sector_t s, int sectors,
+ sector_t *first_bad, int *bad_sectors)
+{
+ if (unlikely(rdev->badblocks.count)) {
+ int rv = md_is_badblock(&rdev->badblocks, rdev->data_offset + s,
+ sectors,
+ first_bad, bad_sectors);
+ if (rv)
+ *first_bad -= rdev->data_offset;
+ return rv;
+ }
+ return 0;
+}
+extern int rdev_set_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors,
+ int acknowledged);
+extern int rdev_clear_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors);
+extern void md_ack_all_badblocks(struct badblocks *bb);
+
struct mddev_s
{
void *private;
@@ -239,9 +312,12 @@
#define MD_RECOVERY_FROZEN 9
unsigned long recovery;
- int recovery_disabled; /* if we detect that recovery
- * will always fail, set this
- * so we don't loop trying */
+ /* If a RAID personality determines that recovery (of a particular
+ * device) will fail due to a read error on the source device, it
+ * takes a copy of this number and does not attempt recovery again
+ * until this number changes.
+ */
+ int recovery_disabled;
int in_sync; /* know to not need resync */
/* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
@@ -304,11 +380,6 @@
* hot-adding a bitmap. It should
* eventually be settable by sysfs.
*/
- /* When md is serving under dm, it might use a
- * dirty_log to store the bits.
- */
- struct dm_dirty_log *log;
-
struct mutex mutex;
unsigned long chunksize;
unsigned long daemon_sleep; /* how many jiffies between updates? */
@@ -413,6 +484,20 @@
return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
}
+static inline int sysfs_link_rdev(mddev_t *mddev, mdk_rdev_t *rdev)
+{
+ char nm[20];
+ sprintf(nm, "rd%d", rdev->raid_disk);
+ return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
+}
+
+static inline void sysfs_unlink_rdev(mddev_t *mddev, mdk_rdev_t *rdev)
+{
+ char nm[20];
+ sprintf(nm, "rd%d", rdev->raid_disk);
+ sysfs_remove_link(&mddev->kobj, nm);
+}
+
/*
* iterates through some rdev ringlist. It's safe to remove the
* current 'rdev'. Dont touch 'tmp' though.
@@ -505,7 +590,7 @@
extern int md_run(mddev_t *mddev);
extern void md_stop(mddev_t *mddev);
extern void md_stop_writes(mddev_t *mddev);
-extern void md_rdev_init(mdk_rdev_t *rdev);
+extern int md_rdev_init(mdk_rdev_t *rdev);
extern void mddev_suspend(mddev_t *mddev);
extern void mddev_resume(mddev_t *mddev);
@@ -514,4 +599,5 @@
extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
mddev_t *mddev);
extern int mddev_check_plugged(mddev_t *mddev);
+extern void md_trim_bio(struct bio *bio, int offset, int size);
#endif /* _MD_MD_H */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index f7431b6..32323f0 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -35,16 +35,13 @@
#include <linux/delay.h>
#include <linux/blkdev.h>
#include <linux/seq_file.h>
+#include <linux/ratelimit.h>
#include "md.h"
#include "raid1.h"
#include "bitmap.h"
#define DEBUG 0
-#if DEBUG
-#define PRINTK(x...) printk(x)
-#else
-#define PRINTK(x...)
-#endif
+#define PRINTK(x...) do { if (DEBUG) printk(x); } while (0)
/*
* Number of guaranteed r1bios in case of extreme VM load:
@@ -166,7 +163,7 @@
for (i = 0; i < conf->raid_disks; i++) {
struct bio **bio = r1_bio->bios + i;
- if (*bio && *bio != IO_BLOCKED)
+ if (!BIO_SPECIAL(*bio))
bio_put(*bio);
*bio = NULL;
}
@@ -176,12 +173,6 @@
{
conf_t *conf = r1_bio->mddev->private;
- /*
- * Wake up any possible resync thread that waits for the device
- * to go idle.
- */
- allow_barrier(conf);
-
put_all_bios(conf, r1_bio);
mempool_free(r1_bio, conf->r1bio_pool);
}
@@ -222,6 +213,33 @@
* operation and are ready to return a success/failure code to the buffer
* cache layer.
*/
+static void call_bio_endio(r1bio_t *r1_bio)
+{
+ struct bio *bio = r1_bio->master_bio;
+ int done;
+ conf_t *conf = r1_bio->mddev->private;
+
+ if (bio->bi_phys_segments) {
+ unsigned long flags;
+ spin_lock_irqsave(&conf->device_lock, flags);
+ bio->bi_phys_segments--;
+ done = (bio->bi_phys_segments == 0);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
+ } else
+ done = 1;
+
+ if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
+ clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ if (done) {
+ bio_endio(bio, 0);
+ /*
+ * Wake up any possible resync thread that waits for the device
+ * to go idle.
+ */
+ allow_barrier(conf);
+ }
+}
+
static void raid_end_bio_io(r1bio_t *r1_bio)
{
struct bio *bio = r1_bio->master_bio;
@@ -234,8 +252,7 @@
(unsigned long long) bio->bi_sector +
(bio->bi_size >> 9) - 1);
- bio_endio(bio,
- test_bit(R1BIO_Uptodate, &r1_bio->state) ? 0 : -EIO);
+ call_bio_endio(r1_bio);
}
free_r1bio(r1_bio);
}
@@ -287,36 +304,52 @@
* oops, read error:
*/
char b[BDEVNAME_SIZE];
- if (printk_ratelimit())
- printk(KERN_ERR "md/raid1:%s: %s: rescheduling sector %llu\n",
- mdname(conf->mddev),
- bdevname(conf->mirrors[mirror].rdev->bdev,b), (unsigned long long)r1_bio->sector);
+ printk_ratelimited(
+ KERN_ERR "md/raid1:%s: %s: "
+ "rescheduling sector %llu\n",
+ mdname(conf->mddev),
+ bdevname(conf->mirrors[mirror].rdev->bdev,
+ b),
+ (unsigned long long)r1_bio->sector);
+ set_bit(R1BIO_ReadError, &r1_bio->state);
reschedule_retry(r1_bio);
}
rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
}
+static void close_write(r1bio_t *r1_bio)
+{
+ /* it really is the end of this request */
+ if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
+ /* free extra copy of the data pages */
+ int i = r1_bio->behind_page_count;
+ while (i--)
+ safe_put_page(r1_bio->behind_bvecs[i].bv_page);
+ kfree(r1_bio->behind_bvecs);
+ r1_bio->behind_bvecs = NULL;
+ }
+ /* clear the bitmap if all writes complete successfully */
+ bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
+ r1_bio->sectors,
+ !test_bit(R1BIO_Degraded, &r1_bio->state),
+ test_bit(R1BIO_BehindIO, &r1_bio->state));
+ md_write_end(r1_bio->mddev);
+}
+
static void r1_bio_write_done(r1bio_t *r1_bio)
{
- if (atomic_dec_and_test(&r1_bio->remaining))
- {
- /* it really is the end of this request */
- if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
- /* free extra copy of the data pages */
- int i = r1_bio->behind_page_count;
- while (i--)
- safe_put_page(r1_bio->behind_pages[i]);
- kfree(r1_bio->behind_pages);
- r1_bio->behind_pages = NULL;
- }
- /* clear the bitmap if all writes complete successfully */
- bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
- r1_bio->sectors,
- !test_bit(R1BIO_Degraded, &r1_bio->state),
- test_bit(R1BIO_BehindIO, &r1_bio->state));
- md_write_end(r1_bio->mddev);
- raid_end_bio_io(r1_bio);
+ if (!atomic_dec_and_test(&r1_bio->remaining))
+ return;
+
+ if (test_bit(R1BIO_WriteError, &r1_bio->state))
+ reschedule_retry(r1_bio);
+ else {
+ close_write(r1_bio);
+ if (test_bit(R1BIO_MadeGood, &r1_bio->state))
+ reschedule_retry(r1_bio);
+ else
+ raid_end_bio_io(r1_bio);
}
}
@@ -336,13 +369,11 @@
/*
* 'one mirror IO has finished' event handler:
*/
- r1_bio->bios[mirror] = NULL;
- to_put = bio;
if (!uptodate) {
- md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
- /* an I/O failed, we can't clear the bitmap */
- set_bit(R1BIO_Degraded, &r1_bio->state);
- } else
+ set_bit(WriteErrorSeen,
+ &conf->mirrors[mirror].rdev->flags);
+ set_bit(R1BIO_WriteError, &r1_bio->state);
+ } else {
/*
* Set R1BIO_Uptodate in our master bio, so that we
* will return a good error code for to the higher
@@ -353,8 +384,22 @@
* to user-side. So if something waits for IO, then it
* will wait for the 'master' bio.
*/
+ sector_t first_bad;
+ int bad_sectors;
+
+ r1_bio->bios[mirror] = NULL;
+ to_put = bio;
set_bit(R1BIO_Uptodate, &r1_bio->state);
+ /* Maybe we can clear some bad blocks. */
+ if (is_badblock(conf->mirrors[mirror].rdev,
+ r1_bio->sector, r1_bio->sectors,
+ &first_bad, &bad_sectors)) {
+ r1_bio->bios[mirror] = IO_MADE_GOOD;
+ set_bit(R1BIO_MadeGood, &r1_bio->state);
+ }
+ }
+
update_head_pos(mirror, r1_bio);
if (behind) {
@@ -377,11 +422,13 @@
(unsigned long long) mbio->bi_sector,
(unsigned long long) mbio->bi_sector +
(mbio->bi_size >> 9) - 1);
- bio_endio(mbio, 0);
+ call_bio_endio(r1_bio);
}
}
}
- rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
+ if (r1_bio->bios[mirror] == NULL)
+ rdev_dec_pending(conf->mirrors[mirror].rdev,
+ conf->mddev);
/*
* Let's see if all mirrored write operations have finished
@@ -408,10 +455,11 @@
*
* The rdev for the device selected will have nr_pending incremented.
*/
-static int read_balance(conf_t *conf, r1bio_t *r1_bio)
+static int read_balance(conf_t *conf, r1bio_t *r1_bio, int *max_sectors)
{
const sector_t this_sector = r1_bio->sector;
- const int sectors = r1_bio->sectors;
+ int sectors;
+ int best_good_sectors;
int start_disk;
int best_disk;
int i;
@@ -426,8 +474,11 @@
* We take the first readable disk when above the resync window.
*/
retry:
+ sectors = r1_bio->sectors;
best_disk = -1;
best_dist = MaxSector;
+ best_good_sectors = 0;
+
if (conf->mddev->recovery_cp < MaxSector &&
(this_sector + sectors >= conf->next_resync)) {
choose_first = 1;
@@ -439,6 +490,9 @@
for (i = 0 ; i < conf->raid_disks ; i++) {
sector_t dist;
+ sector_t first_bad;
+ int bad_sectors;
+
int disk = start_disk + i;
if (disk >= conf->raid_disks)
disk -= conf->raid_disks;
@@ -461,6 +515,35 @@
/* This is a reasonable device to use. It might
* even be best.
*/
+ if (is_badblock(rdev, this_sector, sectors,
+ &first_bad, &bad_sectors)) {
+ if (best_dist < MaxSector)
+ /* already have a better device */
+ continue;
+ if (first_bad <= this_sector) {
+ /* cannot read here. If this is the 'primary'
+ * device, then we must not read beyond
+ * bad_sectors from another device..
+ */
+ bad_sectors -= (this_sector - first_bad);
+ if (choose_first && sectors > bad_sectors)
+ sectors = bad_sectors;
+ if (best_good_sectors > sectors)
+ best_good_sectors = sectors;
+
+ } else {
+ sector_t good_sectors = first_bad - this_sector;
+ if (good_sectors > best_good_sectors) {
+ best_good_sectors = good_sectors;
+ best_disk = disk;
+ }
+ if (choose_first)
+ break;
+ }
+ continue;
+ } else
+ best_good_sectors = sectors;
+
dist = abs(this_sector - conf->mirrors[disk].head_position);
if (choose_first
/* Don't change to another disk for sequential reads */
@@ -489,10 +572,12 @@
rdev_dec_pending(rdev, conf->mddev);
goto retry;
}
+ sectors = best_good_sectors;
conf->next_seq_sect = this_sector + sectors;
conf->last_used = best_disk;
}
rcu_read_unlock();
+ *max_sectors = sectors;
return best_disk;
}
@@ -672,30 +757,31 @@
{
int i;
struct bio_vec *bvec;
- struct page **pages = kzalloc(bio->bi_vcnt * sizeof(struct page*),
+ struct bio_vec *bvecs = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec),
GFP_NOIO);
- if (unlikely(!pages))
+ if (unlikely(!bvecs))
return;
bio_for_each_segment(bvec, bio, i) {
- pages[i] = alloc_page(GFP_NOIO);
- if (unlikely(!pages[i]))
+ bvecs[i] = *bvec;
+ bvecs[i].bv_page = alloc_page(GFP_NOIO);
+ if (unlikely(!bvecs[i].bv_page))
goto do_sync_io;
- memcpy(kmap(pages[i]) + bvec->bv_offset,
- kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
- kunmap(pages[i]);
+ memcpy(kmap(bvecs[i].bv_page) + bvec->bv_offset,
+ kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
+ kunmap(bvecs[i].bv_page);
kunmap(bvec->bv_page);
}
- r1_bio->behind_pages = pages;
+ r1_bio->behind_bvecs = bvecs;
r1_bio->behind_page_count = bio->bi_vcnt;
set_bit(R1BIO_BehindIO, &r1_bio->state);
return;
do_sync_io:
for (i = 0; i < bio->bi_vcnt; i++)
- if (pages[i])
- put_page(pages[i]);
- kfree(pages);
+ if (bvecs[i].bv_page)
+ put_page(bvecs[i].bv_page);
+ kfree(bvecs);
PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
}
@@ -705,7 +791,7 @@
mirror_info_t *mirror;
r1bio_t *r1_bio;
struct bio *read_bio;
- int i, targets = 0, disks;
+ int i, disks;
struct bitmap *bitmap;
unsigned long flags;
const int rw = bio_data_dir(bio);
@@ -713,6 +799,9 @@
const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
mdk_rdev_t *blocked_rdev;
int plugged;
+ int first_clone;
+ int sectors_handled;
+ int max_sectors;
/*
* Register the new request and wait if the reconstruction
@@ -759,11 +848,24 @@
r1_bio->mddev = mddev;
r1_bio->sector = bio->bi_sector;
+ /* We might need to issue multiple reads to different
+ * devices if there are bad blocks around, so we keep
+ * track of the number of reads in bio->bi_phys_segments.
+ * If this is 0, there is only one r1_bio and no locking
+ * will be needed when requests complete. If it is
+ * non-zero, then it is the number of not-completed requests.
+ */
+ bio->bi_phys_segments = 0;
+ clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+
if (rw == READ) {
/*
* read balancing logic:
*/
- int rdisk = read_balance(conf, r1_bio);
+ int rdisk;
+
+read_again:
+ rdisk = read_balance(conf, r1_bio, &max_sectors);
if (rdisk < 0) {
/* couldn't find anywhere to read from */
@@ -784,6 +886,8 @@
r1_bio->read_disk = rdisk;
read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+ md_trim_bio(read_bio, r1_bio->sector - bio->bi_sector,
+ max_sectors);
r1_bio->bios[rdisk] = read_bio;
@@ -793,16 +897,52 @@
read_bio->bi_rw = READ | do_sync;
read_bio->bi_private = r1_bio;
- generic_make_request(read_bio);
+ if (max_sectors < r1_bio->sectors) {
+ /* could not read all from this device, so we will
+ * need another r1_bio.
+ */
+
+ sectors_handled = (r1_bio->sector + max_sectors
+ - bio->bi_sector);
+ r1_bio->sectors = max_sectors;
+ spin_lock_irq(&conf->device_lock);
+ if (bio->bi_phys_segments == 0)
+ bio->bi_phys_segments = 2;
+ else
+ bio->bi_phys_segments++;
+ spin_unlock_irq(&conf->device_lock);
+ /* Cannot call generic_make_request directly
+ * as that will be queued in __make_request
+ * and subsequent mempool_alloc might block waiting
+ * for it. So hand bio over to raid1d.
+ */
+ reschedule_retry(r1_bio);
+
+ r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+
+ r1_bio->master_bio = bio;
+ r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
+ r1_bio->state = 0;
+ r1_bio->mddev = mddev;
+ r1_bio->sector = bio->bi_sector + sectors_handled;
+ goto read_again;
+ } else
+ generic_make_request(read_bio);
return 0;
}
/*
* WRITE:
*/
- /* first select target devices under spinlock and
+ /* first select target devices under rcu_lock and
* inc refcount on their rdev. Record them by setting
* bios[x] to bio
+ * If there are known/acknowledged bad blocks on any device on
+ * which we have seen a write error, we want to avoid writing those
+ * blocks.
+ * This potentially requires several writes to write around
+ * the bad blocks. Each set of writes gets it's own r1bio
+ * with a set of bios attached.
*/
plugged = mddev_check_plugged(mddev);
@@ -810,6 +950,7 @@
retry_write:
blocked_rdev = NULL;
rcu_read_lock();
+ max_sectors = r1_bio->sectors;
for (i = 0; i < disks; i++) {
mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
@@ -817,17 +958,56 @@
blocked_rdev = rdev;
break;
}
- if (rdev && !test_bit(Faulty, &rdev->flags)) {
- atomic_inc(&rdev->nr_pending);
- if (test_bit(Faulty, &rdev->flags)) {
- rdev_dec_pending(rdev, mddev);
- r1_bio->bios[i] = NULL;
- } else {
- r1_bio->bios[i] = bio;
- targets++;
+ r1_bio->bios[i] = NULL;
+ if (!rdev || test_bit(Faulty, &rdev->flags)) {
+ set_bit(R1BIO_Degraded, &r1_bio->state);
+ continue;
+ }
+
+ atomic_inc(&rdev->nr_pending);
+ if (test_bit(WriteErrorSeen, &rdev->flags)) {
+ sector_t first_bad;
+ int bad_sectors;
+ int is_bad;
+
+ is_bad = is_badblock(rdev, r1_bio->sector,
+ max_sectors,
+ &first_bad, &bad_sectors);
+ if (is_bad < 0) {
+ /* mustn't write here until the bad block is
+ * acknowledged*/
+ set_bit(BlockedBadBlocks, &rdev->flags);
+ blocked_rdev = rdev;
+ break;
}
- } else
- r1_bio->bios[i] = NULL;
+ if (is_bad && first_bad <= r1_bio->sector) {
+ /* Cannot write here at all */
+ bad_sectors -= (r1_bio->sector - first_bad);
+ if (bad_sectors < max_sectors)
+ /* mustn't write more than bad_sectors
+ * to other devices yet
+ */
+ max_sectors = bad_sectors;
+ rdev_dec_pending(rdev, mddev);
+ /* We don't set R1BIO_Degraded as that
+ * only applies if the disk is
+ * missing, so it might be re-added,
+ * and we want to know to recover this
+ * chunk.
+ * In this case the device is here,
+ * and the fact that this chunk is not
+ * in-sync is recorded in the bad
+ * block log
+ */
+ continue;
+ }
+ if (is_bad) {
+ int good_sectors = first_bad - r1_bio->sector;
+ if (good_sectors < max_sectors)
+ max_sectors = good_sectors;
+ }
+ }
+ r1_bio->bios[i] = bio;
}
rcu_read_unlock();
@@ -838,51 +1018,57 @@
for (j = 0; j < i; j++)
if (r1_bio->bios[j])
rdev_dec_pending(conf->mirrors[j].rdev, mddev);
-
+ r1_bio->state = 0;
allow_barrier(conf);
md_wait_for_blocked_rdev(blocked_rdev, mddev);
wait_barrier(conf);
goto retry_write;
}
- BUG_ON(targets == 0); /* we never fail the last device */
-
- if (targets < conf->raid_disks) {
- /* array is degraded, we will not clear the bitmap
- * on I/O completion (see raid1_end_write_request) */
- set_bit(R1BIO_Degraded, &r1_bio->state);
+ if (max_sectors < r1_bio->sectors) {
+ /* We are splitting this write into multiple parts, so
+ * we need to prepare for allocating another r1_bio.
+ */
+ r1_bio->sectors = max_sectors;
+ spin_lock_irq(&conf->device_lock);
+ if (bio->bi_phys_segments == 0)
+ bio->bi_phys_segments = 2;
+ else
+ bio->bi_phys_segments++;
+ spin_unlock_irq(&conf->device_lock);
}
-
- /* do behind I/O ?
- * Not if there are too many, or cannot allocate memory,
- * or a reader on WriteMostly is waiting for behind writes
- * to flush */
- if (bitmap &&
- (atomic_read(&bitmap->behind_writes)
- < mddev->bitmap_info.max_write_behind) &&
- !waitqueue_active(&bitmap->behind_wait))
- alloc_behind_pages(bio, r1_bio);
+ sectors_handled = r1_bio->sector + max_sectors - bio->bi_sector;
atomic_set(&r1_bio->remaining, 1);
atomic_set(&r1_bio->behind_remaining, 0);
- bitmap_startwrite(bitmap, bio->bi_sector, r1_bio->sectors,
- test_bit(R1BIO_BehindIO, &r1_bio->state));
+ first_clone = 1;
for (i = 0; i < disks; i++) {
struct bio *mbio;
if (!r1_bio->bios[i])
continue;
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
- r1_bio->bios[i] = mbio;
+ md_trim_bio(mbio, r1_bio->sector - bio->bi_sector, max_sectors);
- mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
- mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
- mbio->bi_end_io = raid1_end_write_request;
- mbio->bi_rw = WRITE | do_flush_fua | do_sync;
- mbio->bi_private = r1_bio;
+ if (first_clone) {
+ /* do behind I/O ?
+ * Not if there are too many, or cannot
+ * allocate memory, or a reader on WriteMostly
+ * is waiting for behind writes to flush */
+ if (bitmap &&
+ (atomic_read(&bitmap->behind_writes)
+ < mddev->bitmap_info.max_write_behind) &&
+ !waitqueue_active(&bitmap->behind_wait))
+ alloc_behind_pages(mbio, r1_bio);
- if (r1_bio->behind_pages) {
+ bitmap_startwrite(bitmap, r1_bio->sector,
+ r1_bio->sectors,
+ test_bit(R1BIO_BehindIO,
+ &r1_bio->state));
+ first_clone = 0;
+ }
+ if (r1_bio->behind_bvecs) {
struct bio_vec *bvec;
int j;
@@ -894,11 +1080,20 @@
* them all
*/
__bio_for_each_segment(bvec, mbio, j, 0)
- bvec->bv_page = r1_bio->behind_pages[j];
+ bvec->bv_page = r1_bio->behind_bvecs[j].bv_page;
if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
atomic_inc(&r1_bio->behind_remaining);
}
+ r1_bio->bios[i] = mbio;
+
+ mbio->bi_sector = (r1_bio->sector +
+ conf->mirrors[i].rdev->data_offset);
+ mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
+ mbio->bi_end_io = raid1_end_write_request;
+ mbio->bi_rw = WRITE | do_flush_fua | do_sync;
+ mbio->bi_private = r1_bio;
+
atomic_inc(&r1_bio->remaining);
spin_lock_irqsave(&conf->device_lock, flags);
bio_list_add(&conf->pending_bio_list, mbio);
@@ -909,6 +1104,19 @@
/* In case raid1d snuck in to freeze_array */
wake_up(&conf->wait_barrier);
+ if (sectors_handled < (bio->bi_size >> 9)) {
+ /* We need another r1_bio. It has already been counted
+ * in bio->bi_phys_segments
+ */
+ r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+ r1_bio->master_bio = bio;
+ r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
+ r1_bio->state = 0;
+ r1_bio->mddev = mddev;
+ r1_bio->sector = bio->bi_sector + sectors_handled;
+ goto retry_write;
+ }
+
if (do_sync || !bitmap || !plugged)
md_wakeup_thread(mddev->thread);
@@ -952,9 +1160,10 @@
* However don't try a recovery from this drive as
* it is very likely to fail.
*/
- mddev->recovery_disabled = 1;
+ conf->recovery_disabled = mddev->recovery_disabled;
return;
}
+ set_bit(Blocked, &rdev->flags);
if (test_and_clear_bit(In_sync, &rdev->flags)) {
unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags);
@@ -1027,7 +1236,7 @@
&& !test_bit(Faulty, &rdev->flags)
&& !test_and_set_bit(In_sync, &rdev->flags)) {
count++;
- sysfs_notify_dirent(rdev->sysfs_state);
+ sysfs_notify_dirent_safe(rdev->sysfs_state);
}
}
spin_lock_irqsave(&conf->device_lock, flags);
@@ -1048,6 +1257,9 @@
int first = 0;
int last = mddev->raid_disks - 1;
+ if (mddev->recovery_disabled == conf->recovery_disabled)
+ return -EBUSY;
+
if (rdev->raid_disk >= 0)
first = last = rdev->raid_disk;
@@ -1103,7 +1315,7 @@
* is not possible.
*/
if (!test_bit(Faulty, &rdev->flags) &&
- !mddev->recovery_disabled &&
+ mddev->recovery_disabled != conf->recovery_disabled &&
mddev->degraded < conf->raid_disks) {
err = -EBUSY;
goto abort;
@@ -1155,6 +1367,8 @@
conf_t *conf = mddev->private;
int i;
int mirror=0;
+ sector_t first_bad;
+ int bad_sectors;
for (i = 0; i < conf->raid_disks; i++)
if (r1_bio->bios[i] == bio) {
@@ -1172,18 +1386,48 @@
s += sync_blocks;
sectors_to_go -= sync_blocks;
} while (sectors_to_go > 0);
- md_error(mddev, conf->mirrors[mirror].rdev);
- }
+ set_bit(WriteErrorSeen,
+ &conf->mirrors[mirror].rdev->flags);
+ set_bit(R1BIO_WriteError, &r1_bio->state);
+ } else if (is_badblock(conf->mirrors[mirror].rdev,
+ r1_bio->sector,
+ r1_bio->sectors,
+ &first_bad, &bad_sectors) &&
+ !is_badblock(conf->mirrors[r1_bio->read_disk].rdev,
+ r1_bio->sector,
+ r1_bio->sectors,
+ &first_bad, &bad_sectors)
+ )
+ set_bit(R1BIO_MadeGood, &r1_bio->state);
update_head_pos(mirror, r1_bio);
if (atomic_dec_and_test(&r1_bio->remaining)) {
- sector_t s = r1_bio->sectors;
- put_buf(r1_bio);
- md_done_sync(mddev, s, uptodate);
+ int s = r1_bio->sectors;
+ if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
+ test_bit(R1BIO_WriteError, &r1_bio->state))
+ reschedule_retry(r1_bio);
+ else {
+ put_buf(r1_bio);
+ md_done_sync(mddev, s, uptodate);
+ }
}
}
+static int r1_sync_page_io(mdk_rdev_t *rdev, sector_t sector,
+ int sectors, struct page *page, int rw)
+{
+ if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
+ /* success */
+ return 1;
+ if (rw == WRITE)
+ set_bit(WriteErrorSeen, &rdev->flags);
+ /* need to record an error - either for the block or the device */
+ if (!rdev_set_badblocks(rdev, sector, sectors, 0))
+ md_error(rdev->mddev, rdev);
+ return 0;
+}
+
static int fix_sync_read_error(r1bio_t *r1_bio)
{
/* Try some synchronous reads of other devices to get
@@ -1193,6 +1437,9 @@
* We don't need to freeze the array, because being in an
* active sync request, there is no normal IO, and
* no overlapping syncs.
+ * We don't need to check is_badblock() again as we
+ * made sure that anything with a bad block in range
+ * will have bi_end_io clear.
*/
mddev_t *mddev = r1_bio->mddev;
conf_t *conf = mddev->private;
@@ -1217,9 +1464,7 @@
* active, and resync is currently active
*/
rdev = conf->mirrors[d].rdev;
- if (sync_page_io(rdev,
- sect,
- s<<9,
+ if (sync_page_io(rdev, sect, s<<9,
bio->bi_io_vec[idx].bv_page,
READ, false)) {
success = 1;
@@ -1233,16 +1478,36 @@
if (!success) {
char b[BDEVNAME_SIZE];
- /* Cannot read from anywhere, array is toast */
- md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
+ int abort = 0;
+ /* Cannot read from anywhere, this block is lost.
+ * Record a bad block on each device. If that doesn't
+ * work just disable and interrupt the recovery.
+ * Don't fail devices as that won't really help.
+ */
printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
" for block %llu\n",
mdname(mddev),
bdevname(bio->bi_bdev, b),
(unsigned long long)r1_bio->sector);
- md_done_sync(mddev, r1_bio->sectors, 0);
- put_buf(r1_bio);
- return 0;
+ for (d = 0; d < conf->raid_disks; d++) {
+ rdev = conf->mirrors[d].rdev;
+ if (!rdev || test_bit(Faulty, &rdev->flags))
+ continue;
+ if (!rdev_set_badblocks(rdev, sect, s, 0))
+ abort = 1;
+ }
+ if (abort) {
+ mddev->recovery_disabled = 1;
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+ md_done_sync(mddev, r1_bio->sectors, 0);
+ put_buf(r1_bio);
+ return 0;
+ }
+ /* Try next page */
+ sectors -= s;
+ sect += s;
+ idx++;
+ continue;
}
start = d;
@@ -1254,16 +1519,12 @@
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
continue;
rdev = conf->mirrors[d].rdev;
- if (sync_page_io(rdev,
- sect,
- s<<9,
- bio->bi_io_vec[idx].bv_page,
- WRITE, false) == 0) {
+ if (r1_sync_page_io(rdev, sect, s,
+ bio->bi_io_vec[idx].bv_page,
+ WRITE) == 0) {
r1_bio->bios[d]->bi_end_io = NULL;
rdev_dec_pending(rdev, mddev);
- md_error(mddev, rdev);
- } else
- atomic_add(s, &rdev->corrected_errors);
+ }
}
d = start;
while (d != r1_bio->read_disk) {
@@ -1273,12 +1534,10 @@
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
continue;
rdev = conf->mirrors[d].rdev;
- if (sync_page_io(rdev,
- sect,
- s<<9,
- bio->bi_io_vec[idx].bv_page,
- READ, false) == 0)
- md_error(mddev, rdev);
+ if (r1_sync_page_io(rdev, sect, s,
+ bio->bi_io_vec[idx].bv_page,
+ READ) != 0)
+ atomic_add(s, &rdev->corrected_errors);
}
sectors -= s;
sect += s;
@@ -1420,7 +1679,7 @@
*
* 1. Retries failed read operations on working mirrors.
* 2. Updates the raid superblock when problems encounter.
- * 3. Performs writes following reads for array syncronising.
+ * 3. Performs writes following reads for array synchronising.
*/
static void fix_read_error(conf_t *conf, int read_disk,
@@ -1443,9 +1702,14 @@
* which is the thread that might remove
* a device. If raid1d ever becomes multi-threaded....
*/
+ sector_t first_bad;
+ int bad_sectors;
+
rdev = conf->mirrors[d].rdev;
if (rdev &&
test_bit(In_sync, &rdev->flags) &&
+ is_badblock(rdev, sect, s,
+ &first_bad, &bad_sectors) == 0 &&
sync_page_io(rdev, sect, s<<9,
conf->tmppage, READ, false))
success = 1;
@@ -1457,8 +1721,10 @@
} while (!success && d != read_disk);
if (!success) {
- /* Cannot read from anywhere -- bye bye array */
- md_error(mddev, conf->mirrors[read_disk].rdev);
+ /* Cannot read from anywhere - mark it bad */
+ mdk_rdev_t *rdev = conf->mirrors[read_disk].rdev;
+ if (!rdev_set_badblocks(rdev, sect, s, 0))
+ md_error(mddev, rdev);
break;
}
/* write it back and re-read */
@@ -1469,13 +1735,9 @@
d--;
rdev = conf->mirrors[d].rdev;
if (rdev &&
- test_bit(In_sync, &rdev->flags)) {
- if (sync_page_io(rdev, sect, s<<9,
- conf->tmppage, WRITE, false)
- == 0)
- /* Well, this device is dead */
- md_error(mddev, rdev);
- }
+ test_bit(In_sync, &rdev->flags))
+ r1_sync_page_io(rdev, sect, s,
+ conf->tmppage, WRITE);
}
d = start;
while (d != read_disk) {
@@ -1486,12 +1748,8 @@
rdev = conf->mirrors[d].rdev;
if (rdev &&
test_bit(In_sync, &rdev->flags)) {
- if (sync_page_io(rdev, sect, s<<9,
- conf->tmppage, READ, false)
- == 0)
- /* Well, this device is dead */
- md_error(mddev, rdev);
- else {
+ if (r1_sync_page_io(rdev, sect, s,
+ conf->tmppage, READ)) {
atomic_add(s, &rdev->corrected_errors);
printk(KERN_INFO
"md/raid1:%s: read error corrected "
@@ -1508,21 +1766,255 @@
}
}
+static void bi_complete(struct bio *bio, int error)
+{
+ complete((struct completion *)bio->bi_private);
+}
+
+static int submit_bio_wait(int rw, struct bio *bio)
+{
+ struct completion event;
+ rw |= REQ_SYNC;
+
+ init_completion(&event);
+ bio->bi_private = &event;
+ bio->bi_end_io = bi_complete;
+ submit_bio(rw, bio);
+ wait_for_completion(&event);
+
+ return test_bit(BIO_UPTODATE, &bio->bi_flags);
+}
+
+static int narrow_write_error(r1bio_t *r1_bio, int i)
+{
+ mddev_t *mddev = r1_bio->mddev;
+ conf_t *conf = mddev->private;
+ mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+ int vcnt, idx;
+ struct bio_vec *vec;
+
+ /* bio has the data to be written to device 'i' where
+ * we just recently had a write error.
+ * We repeatedly clone the bio and trim down to one block,
+ * then try the write. Where the write fails we record
+ * a bad block.
+ * It is conceivable that the bio doesn't exactly align with
+ * blocks. We must handle this somehow.
+ *
+ * We currently own a reference on the rdev.
+ */
+
+ int block_sectors;
+ sector_t sector;
+ int sectors;
+ int sect_to_write = r1_bio->sectors;
+ int ok = 1;
+
+ if (rdev->badblocks.shift < 0)
+ return 0;
+
+ block_sectors = 1 << rdev->badblocks.shift;
+ sector = r1_bio->sector;
+ sectors = ((sector + block_sectors)
+ & ~(sector_t)(block_sectors - 1))
+ - sector;
+
+ if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
+ vcnt = r1_bio->behind_page_count;
+ vec = r1_bio->behind_bvecs;
+ idx = 0;
+ while (vec[idx].bv_page == NULL)
+ idx++;
+ } else {
+ vcnt = r1_bio->master_bio->bi_vcnt;
+ vec = r1_bio->master_bio->bi_io_vec;
+ idx = r1_bio->master_bio->bi_idx;
+ }
+ while (sect_to_write) {
+ struct bio *wbio;
+ if (sectors > sect_to_write)
+ sectors = sect_to_write;
+ /* Write at 'sector' for 'sectors'*/
+
+ wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
+ memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
+ wbio->bi_sector = r1_bio->sector;
+ wbio->bi_rw = WRITE;
+ wbio->bi_vcnt = vcnt;
+ wbio->bi_size = r1_bio->sectors << 9;
+ wbio->bi_idx = idx;
+
+ md_trim_bio(wbio, sector - r1_bio->sector, sectors);
+ wbio->bi_sector += rdev->data_offset;
+ wbio->bi_bdev = rdev->bdev;
+ if (submit_bio_wait(WRITE, wbio) == 0)
+ /* failure! */
+ ok = rdev_set_badblocks(rdev, sector,
+ sectors, 0)
+ && ok;
+
+ bio_put(wbio);
+ sect_to_write -= sectors;
+ sector += sectors;
+ sectors = block_sectors;
+ }
+ return ok;
+}
+
+static void handle_sync_write_finished(conf_t *conf, r1bio_t *r1_bio)
+{
+ int m;
+ int s = r1_bio->sectors;
+ for (m = 0; m < conf->raid_disks ; m++) {
+ mdk_rdev_t *rdev = conf->mirrors[m].rdev;
+ struct bio *bio = r1_bio->bios[m];
+ if (bio->bi_end_io == NULL)
+ continue;
+ if (test_bit(BIO_UPTODATE, &bio->bi_flags) &&
+ test_bit(R1BIO_MadeGood, &r1_bio->state)) {
+ rdev_clear_badblocks(rdev, r1_bio->sector, s);
+ }
+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
+ test_bit(R1BIO_WriteError, &r1_bio->state)) {
+ if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
+ md_error(conf->mddev, rdev);
+ }
+ }
+ put_buf(r1_bio);
+ md_done_sync(conf->mddev, s, 1);
+}
+
+static void handle_write_finished(conf_t *conf, r1bio_t *r1_bio)
+{
+ int m;
+ for (m = 0; m < conf->raid_disks ; m++)
+ if (r1_bio->bios[m] == IO_MADE_GOOD) {
+ mdk_rdev_t *rdev = conf->mirrors[m].rdev;
+ rdev_clear_badblocks(rdev,
+ r1_bio->sector,
+ r1_bio->sectors);
+ rdev_dec_pending(rdev, conf->mddev);
+ } else if (r1_bio->bios[m] != NULL) {
+ /* This drive got a write error. We need to
+ * narrow down and record precise write
+ * errors.
+ */
+ if (!narrow_write_error(r1_bio, m)) {
+ md_error(conf->mddev,
+ conf->mirrors[m].rdev);
+ /* an I/O failed, we can't clear the bitmap */
+ set_bit(R1BIO_Degraded, &r1_bio->state);
+ }
+ rdev_dec_pending(conf->mirrors[m].rdev,
+ conf->mddev);
+ }
+ if (test_bit(R1BIO_WriteError, &r1_bio->state))
+ close_write(r1_bio);
+ raid_end_bio_io(r1_bio);
+}
+
+static void handle_read_error(conf_t *conf, r1bio_t *r1_bio)
+{
+ int disk;
+ int max_sectors;
+ mddev_t *mddev = conf->mddev;
+ struct bio *bio;
+ char b[BDEVNAME_SIZE];
+ mdk_rdev_t *rdev;
+
+ clear_bit(R1BIO_ReadError, &r1_bio->state);
+ /* we got a read error. Maybe the drive is bad. Maybe just
+ * the block and we can fix it.
+ * We freeze all other IO, and try reading the block from
+ * other devices. When we find one, we re-write
+ * and check it that fixes the read error.
+ * This is all done synchronously while the array is
+ * frozen
+ */
+ if (mddev->ro == 0) {
+ freeze_array(conf);
+ fix_read_error(conf, r1_bio->read_disk,
+ r1_bio->sector, r1_bio->sectors);
+ unfreeze_array(conf);
+ } else
+ md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
+
+ bio = r1_bio->bios[r1_bio->read_disk];
+ bdevname(bio->bi_bdev, b);
+read_more:
+ disk = read_balance(conf, r1_bio, &max_sectors);
+ if (disk == -1) {
+ printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O"
+ " read error for block %llu\n",
+ mdname(mddev), b, (unsigned long long)r1_bio->sector);
+ raid_end_bio_io(r1_bio);
+ } else {
+ const unsigned long do_sync
+ = r1_bio->master_bio->bi_rw & REQ_SYNC;
+ if (bio) {
+ r1_bio->bios[r1_bio->read_disk] =
+ mddev->ro ? IO_BLOCKED : NULL;
+ bio_put(bio);
+ }
+ r1_bio->read_disk = disk;
+ bio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev);
+ md_trim_bio(bio, r1_bio->sector - bio->bi_sector, max_sectors);
+ r1_bio->bios[r1_bio->read_disk] = bio;
+ rdev = conf->mirrors[disk].rdev;
+ printk_ratelimited(KERN_ERR
+ "md/raid1:%s: redirecting sector %llu"
+ " to other mirror: %s\n",
+ mdname(mddev),
+ (unsigned long long)r1_bio->sector,
+ bdevname(rdev->bdev, b));
+ bio->bi_sector = r1_bio->sector + rdev->data_offset;
+ bio->bi_bdev = rdev->bdev;
+ bio->bi_end_io = raid1_end_read_request;
+ bio->bi_rw = READ | do_sync;
+ bio->bi_private = r1_bio;
+ if (max_sectors < r1_bio->sectors) {
+ /* Drat - have to split this up more */
+ struct bio *mbio = r1_bio->master_bio;
+ int sectors_handled = (r1_bio->sector + max_sectors
+ - mbio->bi_sector);
+ r1_bio->sectors = max_sectors;
+ spin_lock_irq(&conf->device_lock);
+ if (mbio->bi_phys_segments == 0)
+ mbio->bi_phys_segments = 2;
+ else
+ mbio->bi_phys_segments++;
+ spin_unlock_irq(&conf->device_lock);
+ generic_make_request(bio);
+ bio = NULL;
+
+ r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+
+ r1_bio->master_bio = mbio;
+ r1_bio->sectors = (mbio->bi_size >> 9)
+ - sectors_handled;
+ r1_bio->state = 0;
+ set_bit(R1BIO_ReadError, &r1_bio->state);
+ r1_bio->mddev = mddev;
+ r1_bio->sector = mbio->bi_sector + sectors_handled;
+
+ goto read_more;
+ } else
+ generic_make_request(bio);
+ }
+}
+
static void raid1d(mddev_t *mddev)
{
r1bio_t *r1_bio;
- struct bio *bio;
unsigned long flags;
conf_t *conf = mddev->private;
struct list_head *head = &conf->retry_list;
- mdk_rdev_t *rdev;
struct blk_plug plug;
md_check_recovery(mddev);
blk_start_plug(&plug);
for (;;) {
- char b[BDEVNAME_SIZE];
if (atomic_read(&mddev->plug_cnt) == 0)
flush_pending_writes(conf);
@@ -1539,62 +2031,26 @@
mddev = r1_bio->mddev;
conf = mddev->private;
- if (test_bit(R1BIO_IsSync, &r1_bio->state))
- sync_request_write(mddev, r1_bio);
- else {
- int disk;
-
- /* we got a read error. Maybe the drive is bad. Maybe just
- * the block and we can fix it.
- * We freeze all other IO, and try reading the block from
- * other devices. When we find one, we re-write
- * and check it that fixes the read error.
- * This is all done synchronously while the array is
- * frozen
+ if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
+ if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
+ test_bit(R1BIO_WriteError, &r1_bio->state))
+ handle_sync_write_finished(conf, r1_bio);
+ else
+ sync_request_write(mddev, r1_bio);
+ } else if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
+ test_bit(R1BIO_WriteError, &r1_bio->state))
+ handle_write_finished(conf, r1_bio);
+ else if (test_bit(R1BIO_ReadError, &r1_bio->state))
+ handle_read_error(conf, r1_bio);
+ else
+ /* just a partial read to be scheduled from separate
+ * context
*/
- if (mddev->ro == 0) {
- freeze_array(conf);
- fix_read_error(conf, r1_bio->read_disk,
- r1_bio->sector,
- r1_bio->sectors);
- unfreeze_array(conf);
- } else
- md_error(mddev,
- conf->mirrors[r1_bio->read_disk].rdev);
+ generic_make_request(r1_bio->bios[r1_bio->read_disk]);
- bio = r1_bio->bios[r1_bio->read_disk];
- if ((disk=read_balance(conf, r1_bio)) == -1) {
- printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O"
- " read error for block %llu\n",
- mdname(mddev),
- bdevname(bio->bi_bdev,b),
- (unsigned long long)r1_bio->sector);
- raid_end_bio_io(r1_bio);
- } else {
- const unsigned long do_sync = r1_bio->master_bio->bi_rw & REQ_SYNC;
- r1_bio->bios[r1_bio->read_disk] =
- mddev->ro ? IO_BLOCKED : NULL;
- r1_bio->read_disk = disk;
- bio_put(bio);
- bio = bio_clone_mddev(r1_bio->master_bio,
- GFP_NOIO, mddev);
- r1_bio->bios[r1_bio->read_disk] = bio;
- rdev = conf->mirrors[disk].rdev;
- if (printk_ratelimit())
- printk(KERN_ERR "md/raid1:%s: redirecting sector %llu to"
- " other mirror: %s\n",
- mdname(mddev),
- (unsigned long long)r1_bio->sector,
- bdevname(rdev->bdev,b));
- bio->bi_sector = r1_bio->sector + rdev->data_offset;
- bio->bi_bdev = rdev->bdev;
- bio->bi_end_io = raid1_end_read_request;
- bio->bi_rw = READ | do_sync;
- bio->bi_private = r1_bio;
- generic_make_request(bio);
- }
- }
cond_resched();
+ if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
+ md_check_recovery(mddev);
}
blk_finish_plug(&plug);
}
@@ -1636,6 +2092,8 @@
int write_targets = 0, read_targets = 0;
sector_t sync_blocks;
int still_degraded = 0;
+ int good_sectors = RESYNC_SECTORS;
+ int min_bad = 0; /* number of sectors that are bad in all devices */
if (!conf->r1buf_pool)
if (init_resync(conf))
@@ -1723,36 +2181,89 @@
rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev == NULL ||
- test_bit(Faulty, &rdev->flags)) {
+ test_bit(Faulty, &rdev->flags)) {
still_degraded = 1;
- continue;
} else if (!test_bit(In_sync, &rdev->flags)) {
bio->bi_rw = WRITE;
bio->bi_end_io = end_sync_write;
write_targets ++;
} else {
/* may need to read from here */
- bio->bi_rw = READ;
- bio->bi_end_io = end_sync_read;
- if (test_bit(WriteMostly, &rdev->flags)) {
- if (wonly < 0)
- wonly = i;
- } else {
- if (disk < 0)
- disk = i;
+ sector_t first_bad = MaxSector;
+ int bad_sectors;
+
+ if (is_badblock(rdev, sector_nr, good_sectors,
+ &first_bad, &bad_sectors)) {
+ if (first_bad > sector_nr)
+ good_sectors = first_bad - sector_nr;
+ else {
+ bad_sectors -= (sector_nr - first_bad);
+ if (min_bad == 0 ||
+ min_bad > bad_sectors)
+ min_bad = bad_sectors;
+ }
}
- read_targets++;
+ if (sector_nr < first_bad) {
+ if (test_bit(WriteMostly, &rdev->flags)) {
+ if (wonly < 0)
+ wonly = i;
+ } else {
+ if (disk < 0)
+ disk = i;
+ }
+ bio->bi_rw = READ;
+ bio->bi_end_io = end_sync_read;
+ read_targets++;
+ }
}
- atomic_inc(&rdev->nr_pending);
- bio->bi_sector = sector_nr + rdev->data_offset;
- bio->bi_bdev = rdev->bdev;
- bio->bi_private = r1_bio;
+ if (bio->bi_end_io) {
+ atomic_inc(&rdev->nr_pending);
+ bio->bi_sector = sector_nr + rdev->data_offset;
+ bio->bi_bdev = rdev->bdev;
+ bio->bi_private = r1_bio;
+ }
}
rcu_read_unlock();
if (disk < 0)
disk = wonly;
r1_bio->read_disk = disk;
+ if (read_targets == 0 && min_bad > 0) {
+ /* These sectors are bad on all InSync devices, so we
+ * need to mark them bad on all write targets
+ */
+ int ok = 1;
+ for (i = 0 ; i < conf->raid_disks ; i++)
+ if (r1_bio->bios[i]->bi_end_io == end_sync_write) {
+ mdk_rdev_t *rdev =
+ rcu_dereference(conf->mirrors[i].rdev);
+ ok = rdev_set_badblocks(rdev, sector_nr,
+ min_bad, 0
+ ) && ok;
+ }
+ set_bit(MD_CHANGE_DEVS, &mddev->flags);
+ *skipped = 1;
+ put_buf(r1_bio);
+
+ if (!ok) {
+ /* Cannot record the badblocks, so need to
+ * abort the resync.
+ * If there are multiple read targets, could just
+ * fail the really bad ones ???
+ */
+ conf->recovery_disabled = mddev->recovery_disabled;
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+ return 0;
+ } else
+ return min_bad;
+
+ }
+ if (min_bad > 0 && min_bad < good_sectors) {
+ /* only resync enough to reach the next bad->good
+ * transition */
+ good_sectors = min_bad;
+ }
+
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && read_targets > 0)
/* extra read targets are also write targets */
write_targets += read_targets-1;
@@ -1769,6 +2280,8 @@
if (max_sector > mddev->resync_max)
max_sector = mddev->resync_max; /* Don't do IO beyond here */
+ if (max_sector > sector_nr + good_sectors)
+ max_sector = sector_nr + good_sectors;
nr_sectors = 0;
sync_blocks = 0;
do {
@@ -2154,18 +2667,13 @@
for (d = d2 = 0; d < conf->raid_disks; d++) {
mdk_rdev_t *rdev = conf->mirrors[d].rdev;
if (rdev && rdev->raid_disk != d2) {
- char nm[20];
- sprintf(nm, "rd%d", rdev->raid_disk);
- sysfs_remove_link(&mddev->kobj, nm);
+ sysfs_unlink_rdev(mddev, rdev);
rdev->raid_disk = d2;
- sprintf(nm, "rd%d", rdev->raid_disk);
- sysfs_remove_link(&mddev->kobj, nm);
- if (sysfs_create_link(&mddev->kobj,
- &rdev->kobj, nm))
+ sysfs_unlink_rdev(mddev, rdev);
+ if (sysfs_link_rdev(mddev, rdev))
printk(KERN_WARNING
- "md/raid1:%s: cannot register "
- "%s\n",
- mdname(mddev), nm);
+ "md/raid1:%s: cannot register rd%d\n",
+ mdname(mddev), rdev->raid_disk);
}
if (rdev)
newmirrors[d2++].rdev = rdev;
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index e743a64..e0d676b 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -48,6 +48,12 @@
* (fresh device added).
* Cleared when a sync completes.
*/
+ int recovery_disabled; /* when the same as
+ * mddev->recovery_disabled
+ * we don't allow recovery
+ * to be attempted as we
+ * expect a read error
+ */
wait_queue_head_t wait_barrier;
@@ -95,7 +101,7 @@
struct list_head retry_list;
/* Next two are only valid when R1BIO_BehindIO is set */
- struct page **behind_pages;
+ struct bio_vec *behind_bvecs;
int behind_page_count;
/*
* if the IO is in WRITE direction, then multiple bios are used.
@@ -110,13 +116,24 @@
* correct the read error. To keep track of bad blocks on a per-bio
* level, we store IO_BLOCKED in the appropriate 'bios' pointer
*/
-#define IO_BLOCKED ((struct bio*)1)
+#define IO_BLOCKED ((struct bio *)1)
+/* When we successfully write to a known bad-block, we need to remove the
+ * bad-block marking which must be done from process context. So we record
+ * the success by setting bios[n] to IO_MADE_GOOD
+ */
+#define IO_MADE_GOOD ((struct bio *)2)
+
+#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
/* bits for r1bio.state */
#define R1BIO_Uptodate 0
#define R1BIO_IsSync 1
#define R1BIO_Degraded 2
#define R1BIO_BehindIO 3
+/* Set ReadError on bios that experience a readerror so that
+ * raid1d knows what to do with them.
+ */
+#define R1BIO_ReadError 4
/* For write-behind requests, we call bi_end_io when
* the last non-write-behind device completes, providing
* any write was successful. Otherwise we call when
@@ -125,6 +142,11 @@
* Record that bi_end_io was called with this flag...
*/
#define R1BIO_Returned 6
+/* If a write for this request means we can clear some
+ * known-bad-block records, we set this flag
+ */
+#define R1BIO_MadeGood 7
+#define R1BIO_WriteError 8
extern int md_raid1_congested(mddev_t *mddev, int bits);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 6e84668..8b29cd4 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -22,6 +22,7 @@
#include <linux/delay.h>
#include <linux/blkdev.h>
#include <linux/seq_file.h>
+#include <linux/ratelimit.h>
#include "md.h"
#include "raid10.h"
#include "raid0.h"
@@ -123,7 +124,14 @@
for (j = 0 ; j < nalloc; j++) {
bio = r10_bio->devs[j].bio;
for (i = 0; i < RESYNC_PAGES; i++) {
- page = alloc_page(gfp_flags);
+ if (j == 1 && !test_bit(MD_RECOVERY_SYNC,
+ &conf->mddev->recovery)) {
+ /* we can share bv_page's during recovery */
+ struct bio *rbio = r10_bio->devs[0].bio;
+ page = rbio->bi_io_vec[i].bv_page;
+ get_page(page);
+ } else
+ page = alloc_page(gfp_flags);
if (unlikely(!page))
goto out_free_pages;
@@ -173,7 +181,7 @@
for (i = 0; i < conf->copies; i++) {
struct bio **bio = & r10_bio->devs[i].bio;
- if (*bio && *bio != IO_BLOCKED)
+ if (!BIO_SPECIAL(*bio))
bio_put(*bio);
*bio = NULL;
}
@@ -183,12 +191,6 @@
{
conf_t *conf = r10_bio->mddev->private;
- /*
- * Wake up any possible resync thread that waits for the device
- * to go idle.
- */
- allow_barrier(conf);
-
put_all_bios(conf, r10_bio);
mempool_free(r10_bio, conf->r10bio_pool);
}
@@ -227,9 +229,27 @@
static void raid_end_bio_io(r10bio_t *r10_bio)
{
struct bio *bio = r10_bio->master_bio;
+ int done;
+ conf_t *conf = r10_bio->mddev->private;
- bio_endio(bio,
- test_bit(R10BIO_Uptodate, &r10_bio->state) ? 0 : -EIO);
+ if (bio->bi_phys_segments) {
+ unsigned long flags;
+ spin_lock_irqsave(&conf->device_lock, flags);
+ bio->bi_phys_segments--;
+ done = (bio->bi_phys_segments == 0);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
+ } else
+ done = 1;
+ if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
+ clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ if (done) {
+ bio_endio(bio, 0);
+ /*
+ * Wake up any possible resync thread that waits for the device
+ * to go idle.
+ */
+ allow_barrier(conf);
+ }
free_r10bio(r10_bio);
}
@@ -244,6 +264,26 @@
r10_bio->devs[slot].addr + (r10_bio->sectors);
}
+/*
+ * Find the disk number which triggered given bio
+ */
+static int find_bio_disk(conf_t *conf, r10bio_t *r10_bio,
+ struct bio *bio, int *slotp)
+{
+ int slot;
+
+ for (slot = 0; slot < conf->copies; slot++)
+ if (r10_bio->devs[slot].bio == bio)
+ break;
+
+ BUG_ON(slot == conf->copies);
+ update_head_pos(slot, r10_bio);
+
+ if (slotp)
+ *slotp = slot;
+ return r10_bio->devs[slot].devnum;
+}
+
static void raid10_end_read_request(struct bio *bio, int error)
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -277,34 +317,45 @@
* oops, read error - keep the refcount on the rdev
*/
char b[BDEVNAME_SIZE];
- if (printk_ratelimit())
- printk(KERN_ERR "md/raid10:%s: %s: rescheduling sector %llu\n",
- mdname(conf->mddev),
- bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector);
+ printk_ratelimited(KERN_ERR
+ "md/raid10:%s: %s: rescheduling sector %llu\n",
+ mdname(conf->mddev),
+ bdevname(conf->mirrors[dev].rdev->bdev, b),
+ (unsigned long long)r10_bio->sector);
+ set_bit(R10BIO_ReadError, &r10_bio->state);
reschedule_retry(r10_bio);
}
}
+static void close_write(r10bio_t *r10_bio)
+{
+ /* clear the bitmap if all writes complete successfully */
+ bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
+ r10_bio->sectors,
+ !test_bit(R10BIO_Degraded, &r10_bio->state),
+ 0);
+ md_write_end(r10_bio->mddev);
+}
+
static void raid10_end_write_request(struct bio *bio, int error)
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
r10bio_t *r10_bio = bio->bi_private;
- int slot, dev;
+ int dev;
+ int dec_rdev = 1;
conf_t *conf = r10_bio->mddev->private;
+ int slot;
- for (slot = 0; slot < conf->copies; slot++)
- if (r10_bio->devs[slot].bio == bio)
- break;
- dev = r10_bio->devs[slot].devnum;
+ dev = find_bio_disk(conf, r10_bio, bio, &slot);
/*
* this branch is our 'one mirror IO has finished' event handler:
*/
if (!uptodate) {
- md_error(r10_bio->mddev, conf->mirrors[dev].rdev);
- /* an I/O failed, we can't clear the bitmap */
- set_bit(R10BIO_Degraded, &r10_bio->state);
- } else
+ set_bit(WriteErrorSeen, &conf->mirrors[dev].rdev->flags);
+ set_bit(R10BIO_WriteError, &r10_bio->state);
+ dec_rdev = 0;
+ } else {
/*
* Set R10BIO_Uptodate in our master bio, so that
* we will return a good error code for to the higher
@@ -314,9 +365,22 @@
* user-side. So if something waits for IO, then it will
* wait for the 'master' bio.
*/
+ sector_t first_bad;
+ int bad_sectors;
+
set_bit(R10BIO_Uptodate, &r10_bio->state);
- update_head_pos(slot, r10_bio);
+ /* Maybe we can clear some bad blocks. */
+ if (is_badblock(conf->mirrors[dev].rdev,
+ r10_bio->devs[slot].addr,
+ r10_bio->sectors,
+ &first_bad, &bad_sectors)) {
+ bio_put(bio);
+ r10_bio->devs[slot].bio = IO_MADE_GOOD;
+ dec_rdev = 0;
+ set_bit(R10BIO_MadeGood, &r10_bio->state);
+ }
+ }
/*
*
@@ -324,16 +388,18 @@
* already.
*/
if (atomic_dec_and_test(&r10_bio->remaining)) {
- /* clear the bitmap if all writes complete successfully */
- bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
- r10_bio->sectors,
- !test_bit(R10BIO_Degraded, &r10_bio->state),
- 0);
- md_write_end(r10_bio->mddev);
- raid_end_bio_io(r10_bio);
+ if (test_bit(R10BIO_WriteError, &r10_bio->state))
+ reschedule_retry(r10_bio);
+ else {
+ close_write(r10_bio);
+ if (test_bit(R10BIO_MadeGood, &r10_bio->state))
+ reschedule_retry(r10_bio);
+ else
+ raid_end_bio_io(r10_bio);
+ }
}
-
- rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
+ if (dec_rdev)
+ rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
}
@@ -484,11 +550,12 @@
* FIXME: possibly should rethink readbalancing and do it differently
* depending on near_copies / far_copies geometry.
*/
-static int read_balance(conf_t *conf, r10bio_t *r10_bio)
+static int read_balance(conf_t *conf, r10bio_t *r10_bio, int *max_sectors)
{
const sector_t this_sector = r10_bio->sector;
int disk, slot;
- const int sectors = r10_bio->sectors;
+ int sectors = r10_bio->sectors;
+ int best_good_sectors;
sector_t new_distance, best_dist;
mdk_rdev_t *rdev;
int do_balance;
@@ -497,8 +564,10 @@
raid10_find_phys(conf, r10_bio);
rcu_read_lock();
retry:
+ sectors = r10_bio->sectors;
best_slot = -1;
best_dist = MaxSector;
+ best_good_sectors = 0;
do_balance = 1;
/*
* Check if we can balance. We can balance on the whole
@@ -511,6 +580,10 @@
do_balance = 0;
for (slot = 0; slot < conf->copies ; slot++) {
+ sector_t first_bad;
+ int bad_sectors;
+ sector_t dev_sector;
+
if (r10_bio->devs[slot].bio == IO_BLOCKED)
continue;
disk = r10_bio->devs[slot].devnum;
@@ -520,6 +593,37 @@
if (!test_bit(In_sync, &rdev->flags))
continue;
+ dev_sector = r10_bio->devs[slot].addr;
+ if (is_badblock(rdev, dev_sector, sectors,
+ &first_bad, &bad_sectors)) {
+ if (best_dist < MaxSector)
+ /* Already have a better slot */
+ continue;
+ if (first_bad <= dev_sector) {
+ /* Cannot read here. If this is the
+ * 'primary' device, then we must not read
+ * beyond 'bad_sectors' from another device.
+ */
+ bad_sectors -= (dev_sector - first_bad);
+ if (!do_balance && sectors > bad_sectors)
+ sectors = bad_sectors;
+ if (best_good_sectors > sectors)
+ best_good_sectors = sectors;
+ } else {
+ sector_t good_sectors =
+ first_bad - dev_sector;
+ if (good_sectors > best_good_sectors) {
+ best_good_sectors = good_sectors;
+ best_slot = slot;
+ }
+ if (!do_balance)
+ /* Must read from here */
+ break;
+ }
+ continue;
+ } else
+ best_good_sectors = sectors;
+
if (!do_balance)
break;
@@ -561,6 +665,7 @@
} else
disk = -1;
rcu_read_unlock();
+ *max_sectors = best_good_sectors;
return disk;
}
@@ -734,6 +839,8 @@
unsigned long flags;
mdk_rdev_t *blocked_rdev;
int plugged;
+ int sectors_handled;
+ int max_sectors;
if (unlikely(bio->bi_rw & REQ_FLUSH)) {
md_flush_request(mddev, bio);
@@ -808,12 +915,26 @@
r10_bio->sector = bio->bi_sector;
r10_bio->state = 0;
+ /* We might need to issue multiple reads to different
+ * devices if there are bad blocks around, so we keep
+ * track of the number of reads in bio->bi_phys_segments.
+ * If this is 0, there is only one r10_bio and no locking
+ * will be needed when the request completes. If it is
+ * non-zero, then it is the number of not-completed requests.
+ */
+ bio->bi_phys_segments = 0;
+ clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+
if (rw == READ) {
/*
* read balancing logic:
*/
- int disk = read_balance(conf, r10_bio);
- int slot = r10_bio->read_slot;
+ int disk;
+ int slot;
+
+read_again:
+ disk = read_balance(conf, r10_bio, &max_sectors);
+ slot = r10_bio->read_slot;
if (disk < 0) {
raid_end_bio_io(r10_bio);
return 0;
@@ -821,6 +942,8 @@
mirror = conf->mirrors + disk;
read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+ md_trim_bio(read_bio, r10_bio->sector - bio->bi_sector,
+ max_sectors);
r10_bio->devs[slot].bio = read_bio;
@@ -831,7 +954,37 @@
read_bio->bi_rw = READ | do_sync;
read_bio->bi_private = r10_bio;
- generic_make_request(read_bio);
+ if (max_sectors < r10_bio->sectors) {
+ /* Could not read all from this device, so we will
+ * need another r10_bio.
+ */
+ sectors_handled = (r10_bio->sectors + max_sectors
+ - bio->bi_sector);
+ r10_bio->sectors = max_sectors;
+ spin_lock_irq(&conf->device_lock);
+ if (bio->bi_phys_segments == 0)
+ bio->bi_phys_segments = 2;
+ else
+ bio->bi_phys_segments++;
+ spin_unlock(&conf->device_lock);
+ /* Cannot call generic_make_request directly
+ * as that will be queued in __generic_make_request
+ * and subsequent mempool_alloc might block
+ * waiting for it. so hand bio over to raid10d.
+ */
+ reschedule_retry(r10_bio);
+
+ r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+
+ r10_bio->master_bio = bio;
+ r10_bio->sectors = ((bio->bi_size >> 9)
+ - sectors_handled);
+ r10_bio->state = 0;
+ r10_bio->mddev = mddev;
+ r10_bio->sector = bio->bi_sector + sectors_handled;
+ goto read_again;
+ } else
+ generic_make_request(read_bio);
return 0;
}
@@ -841,13 +994,22 @@
/* first select target devices under rcu_lock and
* inc refcount on their rdev. Record them by setting
* bios[x] to bio
+ * If there are known/acknowledged bad blocks on any device
+ * on which we have seen a write error, we want to avoid
+ * writing to those blocks. This potentially requires several
+ * writes to write around the bad blocks. Each set of writes
+ * gets its own r10_bio with a set of bios attached. The number
+ * of r10_bios is recored in bio->bi_phys_segments just as with
+ * the read case.
*/
plugged = mddev_check_plugged(mddev);
raid10_find_phys(conf, r10_bio);
- retry_write:
+retry_write:
blocked_rdev = NULL;
rcu_read_lock();
+ max_sectors = r10_bio->sectors;
+
for (i = 0; i < conf->copies; i++) {
int d = r10_bio->devs[i].devnum;
mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev);
@@ -856,13 +1018,55 @@
blocked_rdev = rdev;
break;
}
- if (rdev && !test_bit(Faulty, &rdev->flags)) {
- atomic_inc(&rdev->nr_pending);
- r10_bio->devs[i].bio = bio;
- } else {
- r10_bio->devs[i].bio = NULL;
+ r10_bio->devs[i].bio = NULL;
+ if (!rdev || test_bit(Faulty, &rdev->flags)) {
set_bit(R10BIO_Degraded, &r10_bio->state);
+ continue;
}
+ if (test_bit(WriteErrorSeen, &rdev->flags)) {
+ sector_t first_bad;
+ sector_t dev_sector = r10_bio->devs[i].addr;
+ int bad_sectors;
+ int is_bad;
+
+ is_bad = is_badblock(rdev, dev_sector,
+ max_sectors,
+ &first_bad, &bad_sectors);
+ if (is_bad < 0) {
+ /* Mustn't write here until the bad block
+ * is acknowledged
+ */
+ atomic_inc(&rdev->nr_pending);
+ set_bit(BlockedBadBlocks, &rdev->flags);
+ blocked_rdev = rdev;
+ break;
+ }
+ if (is_bad && first_bad <= dev_sector) {
+ /* Cannot write here at all */
+ bad_sectors -= (dev_sector - first_bad);
+ if (bad_sectors < max_sectors)
+ /* Mustn't write more than bad_sectors
+ * to other devices yet
+ */
+ max_sectors = bad_sectors;
+ /* We don't set R10BIO_Degraded as that
+ * only applies if the disk is missing,
+ * so it might be re-added, and we want to
+ * know to recover this chunk.
+ * In this case the device is here, and the
+ * fact that this chunk is not in-sync is
+ * recorded in the bad block log.
+ */
+ continue;
+ }
+ if (is_bad) {
+ int good_sectors = first_bad - dev_sector;
+ if (good_sectors < max_sectors)
+ max_sectors = good_sectors;
+ }
+ }
+ r10_bio->devs[i].bio = bio;
+ atomic_inc(&rdev->nr_pending);
}
rcu_read_unlock();
@@ -882,8 +1086,22 @@
goto retry_write;
}
+ if (max_sectors < r10_bio->sectors) {
+ /* We are splitting this into multiple parts, so
+ * we need to prepare for allocating another r10_bio.
+ */
+ r10_bio->sectors = max_sectors;
+ spin_lock_irq(&conf->device_lock);
+ if (bio->bi_phys_segments == 0)
+ bio->bi_phys_segments = 2;
+ else
+ bio->bi_phys_segments++;
+ spin_unlock_irq(&conf->device_lock);
+ }
+ sectors_handled = r10_bio->sector + max_sectors - bio->bi_sector;
+
atomic_set(&r10_bio->remaining, 1);
- bitmap_startwrite(mddev->bitmap, bio->bi_sector, r10_bio->sectors, 0);
+ bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
for (i = 0; i < conf->copies; i++) {
struct bio *mbio;
@@ -892,10 +1110,12 @@
continue;
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+ md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
+ max_sectors);
r10_bio->devs[i].bio = mbio;
- mbio->bi_sector = r10_bio->devs[i].addr+
- conf->mirrors[d].rdev->data_offset;
+ mbio->bi_sector = (r10_bio->devs[i].addr+
+ conf->mirrors[d].rdev->data_offset);
mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
mbio->bi_end_io = raid10_end_write_request;
mbio->bi_rw = WRITE | do_sync | do_fua;
@@ -920,6 +1140,21 @@
/* In case raid10d snuck in to freeze_array */
wake_up(&conf->wait_barrier);
+ if (sectors_handled < (bio->bi_size >> 9)) {
+ /* We need another r10_bio. It has already been counted
+ * in bio->bi_phys_segments.
+ */
+ r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+
+ r10_bio->master_bio = bio;
+ r10_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
+
+ r10_bio->mddev = mddev;
+ r10_bio->sector = bio->bi_sector + sectors_handled;
+ r10_bio->state = 0;
+ goto retry_write;
+ }
+
if (do_sync || !mddev->bitmap || !plugged)
md_wakeup_thread(mddev->thread);
return 0;
@@ -949,6 +1184,30 @@
seq_printf(seq, "]");
}
+/* check if there are enough drives for
+ * every block to appear on atleast one.
+ * Don't consider the device numbered 'ignore'
+ * as we might be about to remove it.
+ */
+static int enough(conf_t *conf, int ignore)
+{
+ int first = 0;
+
+ do {
+ int n = conf->copies;
+ int cnt = 0;
+ while (n--) {
+ if (conf->mirrors[first].rdev &&
+ first != ignore)
+ cnt++;
+ first = (first+1) % conf->raid_disks;
+ }
+ if (cnt == 0)
+ return 0;
+ } while (first != 0);
+ return 1;
+}
+
static void error(mddev_t *mddev, mdk_rdev_t *rdev)
{
char b[BDEVNAME_SIZE];
@@ -961,13 +1220,9 @@
* else mark the drive as failed
*/
if (test_bit(In_sync, &rdev->flags)
- && conf->raid_disks-mddev->degraded == 1)
+ && !enough(conf, rdev->raid_disk))
/*
* Don't fail the drive, just return an IO error.
- * The test should really be more sophisticated than
- * "working_disks == 1", but it isn't critical, and
- * can wait until we do more sophisticated "is the drive
- * really dead" tests...
*/
return;
if (test_and_clear_bit(In_sync, &rdev->flags)) {
@@ -980,6 +1235,7 @@
*/
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
}
+ set_bit(Blocked, &rdev->flags);
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT
@@ -1022,27 +1278,6 @@
conf->r10buf_pool = NULL;
}
-/* check if there are enough drives for
- * every block to appear on atleast one
- */
-static int enough(conf_t *conf)
-{
- int first = 0;
-
- do {
- int n = conf->copies;
- int cnt = 0;
- while (n--) {
- if (conf->mirrors[first].rdev)
- cnt++;
- first = (first+1) % conf->raid_disks;
- }
- if (cnt == 0)
- return 0;
- } while (first != 0);
- return 1;
-}
-
static int raid10_spare_active(mddev_t *mddev)
{
int i;
@@ -1078,7 +1313,6 @@
conf_t *conf = mddev->private;
int err = -EEXIST;
int mirror;
- mirror_info_t *p;
int first = 0;
int last = conf->raid_disks - 1;
@@ -1087,44 +1321,47 @@
* very different from resync
*/
return -EBUSY;
- if (!enough(conf))
+ if (!enough(conf, -1))
return -EINVAL;
if (rdev->raid_disk >= 0)
first = last = rdev->raid_disk;
- if (rdev->saved_raid_disk >= 0 &&
- rdev->saved_raid_disk >= first &&
+ if (rdev->saved_raid_disk >= first &&
conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
mirror = rdev->saved_raid_disk;
else
mirror = first;
- for ( ; mirror <= last ; mirror++)
- if ( !(p=conf->mirrors+mirror)->rdev) {
+ for ( ; mirror <= last ; mirror++) {
+ mirror_info_t *p = &conf->mirrors[mirror];
+ if (p->recovery_disabled == mddev->recovery_disabled)
+ continue;
+ if (!p->rdev)
+ continue;
- disk_stack_limits(mddev->gendisk, rdev->bdev,
- rdev->data_offset << 9);
- /* as we don't honour merge_bvec_fn, we must
- * never risk violating it, so limit
- * ->max_segments to one lying with a single
- * page, as a one page request is never in
- * violation.
- */
- if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
- blk_queue_max_segments(mddev->queue, 1);
- blk_queue_segment_boundary(mddev->queue,
- PAGE_CACHE_SIZE - 1);
- }
-
- p->head_position = 0;
- rdev->raid_disk = mirror;
- err = 0;
- if (rdev->saved_raid_disk != mirror)
- conf->fullsync = 1;
- rcu_assign_pointer(p->rdev, rdev);
- break;
+ disk_stack_limits(mddev->gendisk, rdev->bdev,
+ rdev->data_offset << 9);
+ /* as we don't honour merge_bvec_fn, we must
+ * never risk violating it, so limit
+ * ->max_segments to one lying with a single
+ * page, as a one page request is never in
+ * violation.
+ */
+ if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
+ blk_queue_max_segments(mddev->queue, 1);
+ blk_queue_segment_boundary(mddev->queue,
+ PAGE_CACHE_SIZE - 1);
}
+ p->head_position = 0;
+ rdev->raid_disk = mirror;
+ err = 0;
+ if (rdev->saved_raid_disk != mirror)
+ conf->fullsync = 1;
+ rcu_assign_pointer(p->rdev, rdev);
+ break;
+ }
+
md_integrity_add_rdev(rdev, mddev);
print_conf(conf);
return err;
@@ -1149,7 +1386,8 @@
* is not possible.
*/
if (!test_bit(Faulty, &rdev->flags) &&
- enough(conf)) {
+ mddev->recovery_disabled != p->recovery_disabled &&
+ enough(conf, -1)) {
err = -EBUSY;
goto abort;
}
@@ -1174,24 +1412,18 @@
{
r10bio_t *r10_bio = bio->bi_private;
conf_t *conf = r10_bio->mddev->private;
- int i,d;
+ int d;
- for (i=0; i<conf->copies; i++)
- if (r10_bio->devs[i].bio == bio)
- break;
- BUG_ON(i == conf->copies);
- update_head_pos(i, r10_bio);
- d = r10_bio->devs[i].devnum;
+ d = find_bio_disk(conf, r10_bio, bio, NULL);
if (test_bit(BIO_UPTODATE, &bio->bi_flags))
set_bit(R10BIO_Uptodate, &r10_bio->state);
- else {
+ else
+ /* The write handler will notice the lack of
+ * R10BIO_Uptodate and record any errors etc
+ */
atomic_add(r10_bio->sectors,
&conf->mirrors[d].rdev->corrected_errors);
- if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
- md_error(r10_bio->mddev,
- conf->mirrors[d].rdev);
- }
/* for reconstruct, we always reschedule after a read.
* for resync, only after all reads
@@ -1206,38 +1438,58 @@
}
}
+static void end_sync_request(r10bio_t *r10_bio)
+{
+ mddev_t *mddev = r10_bio->mddev;
+
+ while (atomic_dec_and_test(&r10_bio->remaining)) {
+ if (r10_bio->master_bio == NULL) {
+ /* the primary of several recovery bios */
+ sector_t s = r10_bio->sectors;
+ if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
+ test_bit(R10BIO_WriteError, &r10_bio->state))
+ reschedule_retry(r10_bio);
+ else
+ put_buf(r10_bio);
+ md_done_sync(mddev, s, 1);
+ break;
+ } else {
+ r10bio_t *r10_bio2 = (r10bio_t *)r10_bio->master_bio;
+ if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
+ test_bit(R10BIO_WriteError, &r10_bio->state))
+ reschedule_retry(r10_bio);
+ else
+ put_buf(r10_bio);
+ r10_bio = r10_bio2;
+ }
+ }
+}
+
static void end_sync_write(struct bio *bio, int error)
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
r10bio_t *r10_bio = bio->bi_private;
mddev_t *mddev = r10_bio->mddev;
conf_t *conf = mddev->private;
- int i,d;
+ int d;
+ sector_t first_bad;
+ int bad_sectors;
+ int slot;
- for (i = 0; i < conf->copies; i++)
- if (r10_bio->devs[i].bio == bio)
- break;
- d = r10_bio->devs[i].devnum;
+ d = find_bio_disk(conf, r10_bio, bio, &slot);
- if (!uptodate)
- md_error(mddev, conf->mirrors[d].rdev);
-
- update_head_pos(i, r10_bio);
+ if (!uptodate) {
+ set_bit(WriteErrorSeen, &conf->mirrors[d].rdev->flags);
+ set_bit(R10BIO_WriteError, &r10_bio->state);
+ } else if (is_badblock(conf->mirrors[d].rdev,
+ r10_bio->devs[slot].addr,
+ r10_bio->sectors,
+ &first_bad, &bad_sectors))
+ set_bit(R10BIO_MadeGood, &r10_bio->state);
rdev_dec_pending(conf->mirrors[d].rdev, mddev);
- while (atomic_dec_and_test(&r10_bio->remaining)) {
- if (r10_bio->master_bio == NULL) {
- /* the primary of several recovery bios */
- sector_t s = r10_bio->sectors;
- put_buf(r10_bio);
- md_done_sync(mddev, s, 1);
- break;
- } else {
- r10bio_t *r10_bio2 = (r10bio_t *)r10_bio->master_bio;
- put_buf(r10_bio);
- r10_bio = r10_bio2;
- }
- }
+
+ end_sync_request(r10_bio);
}
/*
@@ -1299,11 +1551,12 @@
if (j == vcnt)
continue;
mddev->resync_mismatches += r10_bio->sectors;
+ if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
+ /* Don't fix anything. */
+ continue;
}
- if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
- /* Don't fix anything. */
- continue;
- /* Ok, we need to write this bio
+ /* Ok, we need to write this bio, either to correct an
+ * inconsistency or to correct an unreadable block.
* First we need to fixup bv_offset, bv_len and
* bi_vecs, as the read request might have corrupted these
*/
@@ -1355,32 +1608,107 @@
* The second for writing.
*
*/
+static void fix_recovery_read_error(r10bio_t *r10_bio)
+{
+ /* We got a read error during recovery.
+ * We repeat the read in smaller page-sized sections.
+ * If a read succeeds, write it to the new device or record
+ * a bad block if we cannot.
+ * If a read fails, record a bad block on both old and
+ * new devices.
+ */
+ mddev_t *mddev = r10_bio->mddev;
+ conf_t *conf = mddev->private;
+ struct bio *bio = r10_bio->devs[0].bio;
+ sector_t sect = 0;
+ int sectors = r10_bio->sectors;
+ int idx = 0;
+ int dr = r10_bio->devs[0].devnum;
+ int dw = r10_bio->devs[1].devnum;
+
+ while (sectors) {
+ int s = sectors;
+ mdk_rdev_t *rdev;
+ sector_t addr;
+ int ok;
+
+ if (s > (PAGE_SIZE>>9))
+ s = PAGE_SIZE >> 9;
+
+ rdev = conf->mirrors[dr].rdev;
+ addr = r10_bio->devs[0].addr + sect,
+ ok = sync_page_io(rdev,
+ addr,
+ s << 9,
+ bio->bi_io_vec[idx].bv_page,
+ READ, false);
+ if (ok) {
+ rdev = conf->mirrors[dw].rdev;
+ addr = r10_bio->devs[1].addr + sect;
+ ok = sync_page_io(rdev,
+ addr,
+ s << 9,
+ bio->bi_io_vec[idx].bv_page,
+ WRITE, false);
+ if (!ok)
+ set_bit(WriteErrorSeen, &rdev->flags);
+ }
+ if (!ok) {
+ /* We don't worry if we cannot set a bad block -
+ * it really is bad so there is no loss in not
+ * recording it yet
+ */
+ rdev_set_badblocks(rdev, addr, s, 0);
+
+ if (rdev != conf->mirrors[dw].rdev) {
+ /* need bad block on destination too */
+ mdk_rdev_t *rdev2 = conf->mirrors[dw].rdev;
+ addr = r10_bio->devs[1].addr + sect;
+ ok = rdev_set_badblocks(rdev2, addr, s, 0);
+ if (!ok) {
+ /* just abort the recovery */
+ printk(KERN_NOTICE
+ "md/raid10:%s: recovery aborted"
+ " due to read error\n",
+ mdname(mddev));
+
+ conf->mirrors[dw].recovery_disabled
+ = mddev->recovery_disabled;
+ set_bit(MD_RECOVERY_INTR,
+ &mddev->recovery);
+ break;
+ }
+ }
+ }
+
+ sectors -= s;
+ sect += s;
+ idx++;
+ }
+}
static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)
{
conf_t *conf = mddev->private;
- int i, d;
- struct bio *bio, *wbio;
+ int d;
+ struct bio *wbio;
+ if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) {
+ fix_recovery_read_error(r10_bio);
+ end_sync_request(r10_bio);
+ return;
+ }
- /* move the pages across to the second bio
+ /*
+ * share the pages with the first bio
* and submit the write request
*/
- bio = r10_bio->devs[0].bio;
wbio = r10_bio->devs[1].bio;
- for (i=0; i < wbio->bi_vcnt; i++) {
- struct page *p = bio->bi_io_vec[i].bv_page;
- bio->bi_io_vec[i].bv_page = wbio->bi_io_vec[i].bv_page;
- wbio->bi_io_vec[i].bv_page = p;
- }
d = r10_bio->devs[1].devnum;
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
- if (test_bit(R10BIO_Uptodate, &r10_bio->state))
- generic_make_request(wbio);
- else
- bio_endio(wbio, -EIO);
+ generic_make_request(wbio);
}
@@ -1421,6 +1749,26 @@
atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
}
+static int r10_sync_page_io(mdk_rdev_t *rdev, sector_t sector,
+ int sectors, struct page *page, int rw)
+{
+ sector_t first_bad;
+ int bad_sectors;
+
+ if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
+ && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags)))
+ return -1;
+ if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
+ /* success */
+ return 1;
+ if (rw == WRITE)
+ set_bit(WriteErrorSeen, &rdev->flags);
+ /* need to record an error - either for the block or the device */
+ if (!rdev_set_badblocks(rdev, sector, sectors, 0))
+ md_error(rdev->mddev, rdev);
+ return 0;
+}
+
/*
* This is a kernel thread which:
*
@@ -1476,10 +1824,15 @@
rcu_read_lock();
do {
+ sector_t first_bad;
+ int bad_sectors;
+
d = r10_bio->devs[sl].devnum;
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev &&
- test_bit(In_sync, &rdev->flags)) {
+ test_bit(In_sync, &rdev->flags) &&
+ is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
+ &first_bad, &bad_sectors) == 0) {
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
success = sync_page_io(rdev,
@@ -1499,9 +1852,19 @@
rcu_read_unlock();
if (!success) {
- /* Cannot read from anywhere -- bye bye array */
+ /* Cannot read from anywhere, just mark the block
+ * as bad on the first device to discourage future
+ * reads.
+ */
int dn = r10_bio->devs[r10_bio->read_slot].devnum;
- md_error(mddev, conf->mirrors[dn].rdev);
+ rdev = conf->mirrors[dn].rdev;
+
+ if (!rdev_set_badblocks(
+ rdev,
+ r10_bio->devs[r10_bio->read_slot].addr
+ + sect,
+ s, 0))
+ md_error(mddev, rdev);
break;
}
@@ -1516,80 +1879,82 @@
sl--;
d = r10_bio->devs[sl].devnum;
rdev = rcu_dereference(conf->mirrors[d].rdev);
- if (rdev &&
- test_bit(In_sync, &rdev->flags)) {
- atomic_inc(&rdev->nr_pending);
- rcu_read_unlock();
- atomic_add(s, &rdev->corrected_errors);
- if (sync_page_io(rdev,
- r10_bio->devs[sl].addr +
- sect,
- s<<9, conf->tmppage, WRITE, false)
- == 0) {
- /* Well, this device is dead */
- printk(KERN_NOTICE
- "md/raid10:%s: read correction "
- "write failed"
- " (%d sectors at %llu on %s)\n",
- mdname(mddev), s,
- (unsigned long long)(
- sect + rdev->data_offset),
- bdevname(rdev->bdev, b));
- printk(KERN_NOTICE "md/raid10:%s: %s: failing "
- "drive\n",
- mdname(mddev),
- bdevname(rdev->bdev, b));
- md_error(mddev, rdev);
- }
- rdev_dec_pending(rdev, mddev);
- rcu_read_lock();
+ if (!rdev ||
+ !test_bit(In_sync, &rdev->flags))
+ continue;
+
+ atomic_inc(&rdev->nr_pending);
+ rcu_read_unlock();
+ if (r10_sync_page_io(rdev,
+ r10_bio->devs[sl].addr +
+ sect,
+ s<<9, conf->tmppage, WRITE)
+ == 0) {
+ /* Well, this device is dead */
+ printk(KERN_NOTICE
+ "md/raid10:%s: read correction "
+ "write failed"
+ " (%d sectors at %llu on %s)\n",
+ mdname(mddev), s,
+ (unsigned long long)(
+ sect + rdev->data_offset),
+ bdevname(rdev->bdev, b));
+ printk(KERN_NOTICE "md/raid10:%s: %s: failing "
+ "drive\n",
+ mdname(mddev),
+ bdevname(rdev->bdev, b));
}
+ rdev_dec_pending(rdev, mddev);
+ rcu_read_lock();
}
sl = start;
while (sl != r10_bio->read_slot) {
+ char b[BDEVNAME_SIZE];
if (sl==0)
sl = conf->copies;
sl--;
d = r10_bio->devs[sl].devnum;
rdev = rcu_dereference(conf->mirrors[d].rdev);
- if (rdev &&
- test_bit(In_sync, &rdev->flags)) {
- char b[BDEVNAME_SIZE];
- atomic_inc(&rdev->nr_pending);
- rcu_read_unlock();
- if (sync_page_io(rdev,
- r10_bio->devs[sl].addr +
- sect,
- s<<9, conf->tmppage,
- READ, false) == 0) {
- /* Well, this device is dead */
- printk(KERN_NOTICE
- "md/raid10:%s: unable to read back "
- "corrected sectors"
- " (%d sectors at %llu on %s)\n",
- mdname(mddev), s,
- (unsigned long long)(
- sect + rdev->data_offset),
- bdevname(rdev->bdev, b));
- printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n",
- mdname(mddev),
- bdevname(rdev->bdev, b));
+ if (!rdev ||
+ !test_bit(In_sync, &rdev->flags))
+ continue;
- md_error(mddev, rdev);
- } else {
- printk(KERN_INFO
- "md/raid10:%s: read error corrected"
- " (%d sectors at %llu on %s)\n",
- mdname(mddev), s,
- (unsigned long long)(
- sect + rdev->data_offset),
- bdevname(rdev->bdev, b));
- }
-
- rdev_dec_pending(rdev, mddev);
- rcu_read_lock();
+ atomic_inc(&rdev->nr_pending);
+ rcu_read_unlock();
+ switch (r10_sync_page_io(rdev,
+ r10_bio->devs[sl].addr +
+ sect,
+ s<<9, conf->tmppage,
+ READ)) {
+ case 0:
+ /* Well, this device is dead */
+ printk(KERN_NOTICE
+ "md/raid10:%s: unable to read back "
+ "corrected sectors"
+ " (%d sectors at %llu on %s)\n",
+ mdname(mddev), s,
+ (unsigned long long)(
+ sect + rdev->data_offset),
+ bdevname(rdev->bdev, b));
+ printk(KERN_NOTICE "md/raid10:%s: %s: failing "
+ "drive\n",
+ mdname(mddev),
+ bdevname(rdev->bdev, b));
+ break;
+ case 1:
+ printk(KERN_INFO
+ "md/raid10:%s: read error corrected"
+ " (%d sectors at %llu on %s)\n",
+ mdname(mddev), s,
+ (unsigned long long)(
+ sect + rdev->data_offset),
+ bdevname(rdev->bdev, b));
+ atomic_add(s, &rdev->corrected_errors);
}
+
+ rdev_dec_pending(rdev, mddev);
+ rcu_read_lock();
}
rcu_read_unlock();
@@ -1598,21 +1963,254 @@
}
}
+static void bi_complete(struct bio *bio, int error)
+{
+ complete((struct completion *)bio->bi_private);
+}
+
+static int submit_bio_wait(int rw, struct bio *bio)
+{
+ struct completion event;
+ rw |= REQ_SYNC;
+
+ init_completion(&event);
+ bio->bi_private = &event;
+ bio->bi_end_io = bi_complete;
+ submit_bio(rw, bio);
+ wait_for_completion(&event);
+
+ return test_bit(BIO_UPTODATE, &bio->bi_flags);
+}
+
+static int narrow_write_error(r10bio_t *r10_bio, int i)
+{
+ struct bio *bio = r10_bio->master_bio;
+ mddev_t *mddev = r10_bio->mddev;
+ conf_t *conf = mddev->private;
+ mdk_rdev_t *rdev = conf->mirrors[r10_bio->devs[i].devnum].rdev;
+ /* bio has the data to be written to slot 'i' where
+ * we just recently had a write error.
+ * We repeatedly clone the bio and trim down to one block,
+ * then try the write. Where the write fails we record
+ * a bad block.
+ * It is conceivable that the bio doesn't exactly align with
+ * blocks. We must handle this.
+ *
+ * We currently own a reference to the rdev.
+ */
+
+ int block_sectors;
+ sector_t sector;
+ int sectors;
+ int sect_to_write = r10_bio->sectors;
+ int ok = 1;
+
+ if (rdev->badblocks.shift < 0)
+ return 0;
+
+ block_sectors = 1 << rdev->badblocks.shift;
+ sector = r10_bio->sector;
+ sectors = ((r10_bio->sector + block_sectors)
+ & ~(sector_t)(block_sectors - 1))
+ - sector;
+
+ while (sect_to_write) {
+ struct bio *wbio;
+ if (sectors > sect_to_write)
+ sectors = sect_to_write;
+ /* Write at 'sector' for 'sectors' */
+ wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+ md_trim_bio(wbio, sector - bio->bi_sector, sectors);
+ wbio->bi_sector = (r10_bio->devs[i].addr+
+ rdev->data_offset+
+ (sector - r10_bio->sector));
+ wbio->bi_bdev = rdev->bdev;
+ if (submit_bio_wait(WRITE, wbio) == 0)
+ /* Failure! */
+ ok = rdev_set_badblocks(rdev, sector,
+ sectors, 0)
+ && ok;
+
+ bio_put(wbio);
+ sect_to_write -= sectors;
+ sector += sectors;
+ sectors = block_sectors;
+ }
+ return ok;
+}
+
+static void handle_read_error(mddev_t *mddev, r10bio_t *r10_bio)
+{
+ int slot = r10_bio->read_slot;
+ int mirror = r10_bio->devs[slot].devnum;
+ struct bio *bio;
+ conf_t *conf = mddev->private;
+ mdk_rdev_t *rdev;
+ char b[BDEVNAME_SIZE];
+ unsigned long do_sync;
+ int max_sectors;
+
+ /* we got a read error. Maybe the drive is bad. Maybe just
+ * the block and we can fix it.
+ * We freeze all other IO, and try reading the block from
+ * other devices. When we find one, we re-write
+ * and check it that fixes the read error.
+ * This is all done synchronously while the array is
+ * frozen.
+ */
+ if (mddev->ro == 0) {
+ freeze_array(conf);
+ fix_read_error(conf, mddev, r10_bio);
+ unfreeze_array(conf);
+ }
+ rdev_dec_pending(conf->mirrors[mirror].rdev, mddev);
+
+ bio = r10_bio->devs[slot].bio;
+ bdevname(bio->bi_bdev, b);
+ r10_bio->devs[slot].bio =
+ mddev->ro ? IO_BLOCKED : NULL;
+read_more:
+ mirror = read_balance(conf, r10_bio, &max_sectors);
+ if (mirror == -1) {
+ printk(KERN_ALERT "md/raid10:%s: %s: unrecoverable I/O"
+ " read error for block %llu\n",
+ mdname(mddev), b,
+ (unsigned long long)r10_bio->sector);
+ raid_end_bio_io(r10_bio);
+ bio_put(bio);
+ return;
+ }
+
+ do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
+ if (bio)
+ bio_put(bio);
+ slot = r10_bio->read_slot;
+ rdev = conf->mirrors[mirror].rdev;
+ printk_ratelimited(
+ KERN_ERR
+ "md/raid10:%s: %s: redirecting"
+ "sector %llu to another mirror\n",
+ mdname(mddev),
+ bdevname(rdev->bdev, b),
+ (unsigned long long)r10_bio->sector);
+ bio = bio_clone_mddev(r10_bio->master_bio,
+ GFP_NOIO, mddev);
+ md_trim_bio(bio,
+ r10_bio->sector - bio->bi_sector,
+ max_sectors);
+ r10_bio->devs[slot].bio = bio;
+ bio->bi_sector = r10_bio->devs[slot].addr
+ + rdev->data_offset;
+ bio->bi_bdev = rdev->bdev;
+ bio->bi_rw = READ | do_sync;
+ bio->bi_private = r10_bio;
+ bio->bi_end_io = raid10_end_read_request;
+ if (max_sectors < r10_bio->sectors) {
+ /* Drat - have to split this up more */
+ struct bio *mbio = r10_bio->master_bio;
+ int sectors_handled =
+ r10_bio->sector + max_sectors
+ - mbio->bi_sector;
+ r10_bio->sectors = max_sectors;
+ spin_lock_irq(&conf->device_lock);
+ if (mbio->bi_phys_segments == 0)
+ mbio->bi_phys_segments = 2;
+ else
+ mbio->bi_phys_segments++;
+ spin_unlock_irq(&conf->device_lock);
+ generic_make_request(bio);
+ bio = NULL;
+
+ r10_bio = mempool_alloc(conf->r10bio_pool,
+ GFP_NOIO);
+ r10_bio->master_bio = mbio;
+ r10_bio->sectors = (mbio->bi_size >> 9)
+ - sectors_handled;
+ r10_bio->state = 0;
+ set_bit(R10BIO_ReadError,
+ &r10_bio->state);
+ r10_bio->mddev = mddev;
+ r10_bio->sector = mbio->bi_sector
+ + sectors_handled;
+
+ goto read_more;
+ } else
+ generic_make_request(bio);
+}
+
+static void handle_write_completed(conf_t *conf, r10bio_t *r10_bio)
+{
+ /* Some sort of write request has finished and it
+ * succeeded in writing where we thought there was a
+ * bad block. So forget the bad block.
+ * Or possibly if failed and we need to record
+ * a bad block.
+ */
+ int m;
+ mdk_rdev_t *rdev;
+
+ if (test_bit(R10BIO_IsSync, &r10_bio->state) ||
+ test_bit(R10BIO_IsRecover, &r10_bio->state)) {
+ for (m = 0; m < conf->copies; m++) {
+ int dev = r10_bio->devs[m].devnum;
+ rdev = conf->mirrors[dev].rdev;
+ if (r10_bio->devs[m].bio == NULL)
+ continue;
+ if (test_bit(BIO_UPTODATE,
+ &r10_bio->devs[m].bio->bi_flags)) {
+ rdev_clear_badblocks(
+ rdev,
+ r10_bio->devs[m].addr,
+ r10_bio->sectors);
+ } else {
+ if (!rdev_set_badblocks(
+ rdev,
+ r10_bio->devs[m].addr,
+ r10_bio->sectors, 0))
+ md_error(conf->mddev, rdev);
+ }
+ }
+ put_buf(r10_bio);
+ } else {
+ for (m = 0; m < conf->copies; m++) {
+ int dev = r10_bio->devs[m].devnum;
+ struct bio *bio = r10_bio->devs[m].bio;
+ rdev = conf->mirrors[dev].rdev;
+ if (bio == IO_MADE_GOOD) {
+ rdev_clear_badblocks(
+ rdev,
+ r10_bio->devs[m].addr,
+ r10_bio->sectors);
+ rdev_dec_pending(rdev, conf->mddev);
+ } else if (bio != NULL &&
+ !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+ if (!narrow_write_error(r10_bio, m)) {
+ md_error(conf->mddev, rdev);
+ set_bit(R10BIO_Degraded,
+ &r10_bio->state);
+ }
+ rdev_dec_pending(rdev, conf->mddev);
+ }
+ }
+ if (test_bit(R10BIO_WriteError,
+ &r10_bio->state))
+ close_write(r10_bio);
+ raid_end_bio_io(r10_bio);
+ }
+}
+
static void raid10d(mddev_t *mddev)
{
r10bio_t *r10_bio;
- struct bio *bio;
unsigned long flags;
conf_t *conf = mddev->private;
struct list_head *head = &conf->retry_list;
- mdk_rdev_t *rdev;
struct blk_plug plug;
md_check_recovery(mddev);
blk_start_plug(&plug);
for (;;) {
- char b[BDEVNAME_SIZE];
flush_pending_writes(conf);
@@ -1628,64 +2226,26 @@
mddev = r10_bio->mddev;
conf = mddev->private;
- if (test_bit(R10BIO_IsSync, &r10_bio->state))
+ if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
+ test_bit(R10BIO_WriteError, &r10_bio->state))
+ handle_write_completed(conf, r10_bio);
+ else if (test_bit(R10BIO_IsSync, &r10_bio->state))
sync_request_write(mddev, r10_bio);
else if (test_bit(R10BIO_IsRecover, &r10_bio->state))
recovery_request_write(mddev, r10_bio);
+ else if (test_bit(R10BIO_ReadError, &r10_bio->state))
+ handle_read_error(mddev, r10_bio);
else {
- int slot = r10_bio->read_slot;
- int mirror = r10_bio->devs[slot].devnum;
- /* we got a read error. Maybe the drive is bad. Maybe just
- * the block and we can fix it.
- * We freeze all other IO, and try reading the block from
- * other devices. When we find one, we re-write
- * and check it that fixes the read error.
- * This is all done synchronously while the array is
- * frozen.
+ /* just a partial read to be scheduled from a
+ * separate context
*/
- if (mddev->ro == 0) {
- freeze_array(conf);
- fix_read_error(conf, mddev, r10_bio);
- unfreeze_array(conf);
- }
- rdev_dec_pending(conf->mirrors[mirror].rdev, mddev);
-
- bio = r10_bio->devs[slot].bio;
- r10_bio->devs[slot].bio =
- mddev->ro ? IO_BLOCKED : NULL;
- mirror = read_balance(conf, r10_bio);
- if (mirror == -1) {
- printk(KERN_ALERT "md/raid10:%s: %s: unrecoverable I/O"
- " read error for block %llu\n",
- mdname(mddev),
- bdevname(bio->bi_bdev,b),
- (unsigned long long)r10_bio->sector);
- raid_end_bio_io(r10_bio);
- bio_put(bio);
- } else {
- const unsigned long do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
- bio_put(bio);
- slot = r10_bio->read_slot;
- rdev = conf->mirrors[mirror].rdev;
- if (printk_ratelimit())
- printk(KERN_ERR "md/raid10:%s: %s: redirecting sector %llu to"
- " another mirror\n",
- mdname(mddev),
- bdevname(rdev->bdev,b),
- (unsigned long long)r10_bio->sector);
- bio = bio_clone_mddev(r10_bio->master_bio,
- GFP_NOIO, mddev);
- r10_bio->devs[slot].bio = bio;
- bio->bi_sector = r10_bio->devs[slot].addr
- + rdev->data_offset;
- bio->bi_bdev = rdev->bdev;
- bio->bi_rw = READ | do_sync;
- bio->bi_private = r10_bio;
- bio->bi_end_io = raid10_end_read_request;
- generic_make_request(bio);
- }
+ int slot = r10_bio->read_slot;
+ generic_make_request(r10_bio->devs[slot].bio);
}
+
cond_resched();
+ if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
+ md_check_recovery(mddev);
}
blk_finish_plug(&plug);
}
@@ -1746,7 +2306,6 @@
int i;
int max_sync;
sector_t sync_blocks;
-
sector_t sectors_skipped = 0;
int chunks_skipped = 0;
@@ -1828,7 +2387,7 @@
max_sync = RESYNC_PAGES << (PAGE_SHIFT-9);
if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
/* recovery... the complicated one */
- int j, k;
+ int j;
r10_bio = NULL;
for (i=0 ; i<conf->raid_disks; i++) {
@@ -1836,6 +2395,7 @@
r10bio_t *rb2;
sector_t sect;
int must_sync;
+ int any_working;
if (conf->mirrors[i].rdev == NULL ||
test_bit(In_sync, &conf->mirrors[i].rdev->flags))
@@ -1887,19 +2447,42 @@
must_sync = bitmap_start_sync(mddev->bitmap, sect,
&sync_blocks, still_degraded);
+ any_working = 0;
for (j=0; j<conf->copies;j++) {
+ int k;
int d = r10_bio->devs[j].devnum;
+ sector_t from_addr, to_addr;
+ mdk_rdev_t *rdev;
+ sector_t sector, first_bad;
+ int bad_sectors;
if (!conf->mirrors[d].rdev ||
!test_bit(In_sync, &conf->mirrors[d].rdev->flags))
continue;
/* This is where we read from */
+ any_working = 1;
+ rdev = conf->mirrors[d].rdev;
+ sector = r10_bio->devs[j].addr;
+
+ if (is_badblock(rdev, sector, max_sync,
+ &first_bad, &bad_sectors)) {
+ if (first_bad > sector)
+ max_sync = first_bad - sector;
+ else {
+ bad_sectors -= (sector
+ - first_bad);
+ if (max_sync > bad_sectors)
+ max_sync = bad_sectors;
+ continue;
+ }
+ }
bio = r10_bio->devs[0].bio;
bio->bi_next = biolist;
biolist = bio;
bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_read;
bio->bi_rw = READ;
- bio->bi_sector = r10_bio->devs[j].addr +
+ from_addr = r10_bio->devs[j].addr;
+ bio->bi_sector = from_addr +
conf->mirrors[d].rdev->data_offset;
bio->bi_bdev = conf->mirrors[d].rdev->bdev;
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
@@ -1916,26 +2499,48 @@
bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_write;
bio->bi_rw = WRITE;
- bio->bi_sector = r10_bio->devs[k].addr +
+ to_addr = r10_bio->devs[k].addr;
+ bio->bi_sector = to_addr +
conf->mirrors[i].rdev->data_offset;
bio->bi_bdev = conf->mirrors[i].rdev->bdev;
r10_bio->devs[0].devnum = d;
+ r10_bio->devs[0].addr = from_addr;
r10_bio->devs[1].devnum = i;
+ r10_bio->devs[1].addr = to_addr;
break;
}
if (j == conf->copies) {
- /* Cannot recover, so abort the recovery */
+ /* Cannot recover, so abort the recovery or
+ * record a bad block */
put_buf(r10_bio);
if (rb2)
atomic_dec(&rb2->remaining);
r10_bio = rb2;
- if (!test_and_set_bit(MD_RECOVERY_INTR,
- &mddev->recovery))
- printk(KERN_INFO "md/raid10:%s: insufficient "
- "working devices for recovery.\n",
- mdname(mddev));
+ if (any_working) {
+ /* problem is that there are bad blocks
+ * on other device(s)
+ */
+ int k;
+ for (k = 0; k < conf->copies; k++)
+ if (r10_bio->devs[k].devnum == i)
+ break;
+ if (!rdev_set_badblocks(
+ conf->mirrors[i].rdev,
+ r10_bio->devs[k].addr,
+ max_sync, 0))
+ any_working = 0;
+ }
+ if (!any_working) {
+ if (!test_and_set_bit(MD_RECOVERY_INTR,
+ &mddev->recovery))
+ printk(KERN_INFO "md/raid10:%s: insufficient "
+ "working devices for recovery.\n",
+ mdname(mddev));
+ conf->mirrors[i].recovery_disabled
+ = mddev->recovery_disabled;
+ }
break;
}
}
@@ -1979,12 +2584,28 @@
for (i=0; i<conf->copies; i++) {
int d = r10_bio->devs[i].devnum;
+ sector_t first_bad, sector;
+ int bad_sectors;
+
bio = r10_bio->devs[i].bio;
bio->bi_end_io = NULL;
clear_bit(BIO_UPTODATE, &bio->bi_flags);
if (conf->mirrors[d].rdev == NULL ||
test_bit(Faulty, &conf->mirrors[d].rdev->flags))
continue;
+ sector = r10_bio->devs[i].addr;
+ if (is_badblock(conf->mirrors[d].rdev,
+ sector, max_sync,
+ &first_bad, &bad_sectors)) {
+ if (first_bad > sector)
+ max_sync = first_bad - sector;
+ else {
+ bad_sectors -= (sector - first_bad);
+ if (max_sync > bad_sectors)
+ max_sync = max_sync;
+ continue;
+ }
+ }
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
atomic_inc(&r10_bio->remaining);
bio->bi_next = biolist;
@@ -1992,7 +2613,7 @@
bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_read;
bio->bi_rw = READ;
- bio->bi_sector = r10_bio->devs[i].addr +
+ bio->bi_sector = sector +
conf->mirrors[d].rdev->data_offset;
bio->bi_bdev = conf->mirrors[d].rdev->bdev;
count++;
@@ -2079,7 +2700,8 @@
return sectors_skipped + nr_sectors;
giveup:
/* There is nowhere to write, so all non-sync
- * drives must be failed, so try the next chunk...
+ * drives must be failed or in resync, all drives
+ * have a bad block, so try the next chunk...
*/
if (sector_nr + max_sync < max_sector)
max_sector = sector_nr + max_sync;
@@ -2249,6 +2871,7 @@
(conf->raid_disks / conf->near_copies));
list_for_each_entry(rdev, &mddev->disks, same_set) {
+
disk_idx = rdev->raid_disk;
if (disk_idx >= conf->raid_disks
|| disk_idx < 0)
@@ -2271,7 +2894,7 @@
disk->head_position = 0;
}
/* need to check that every block has at least one working mirror */
- if (!enough(conf)) {
+ if (!enough(conf, -1)) {
printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n",
mdname(mddev));
goto out_free_conf;
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 944b110..79cb52a 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -6,6 +6,11 @@
struct mirror_info {
mdk_rdev_t *rdev;
sector_t head_position;
+ int recovery_disabled; /* matches
+ * mddev->recovery_disabled
+ * when we shouldn't try
+ * recovering this device.
+ */
};
typedef struct r10bio_s r10bio_t;
@@ -113,10 +118,26 @@
* level, we store IO_BLOCKED in the appropriate 'bios' pointer
*/
#define IO_BLOCKED ((struct bio*)1)
+/* When we successfully write to a known bad-block, we need to remove the
+ * bad-block marking which must be done from process context. So we record
+ * the success by setting devs[n].bio to IO_MADE_GOOD
+ */
+#define IO_MADE_GOOD ((struct bio *)2)
+
+#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
/* bits for r10bio.state */
#define R10BIO_Uptodate 0
#define R10BIO_IsSync 1
#define R10BIO_IsRecover 2
#define R10BIO_Degraded 3
+/* Set ReadError on bios that experience a read error
+ * so that raid10d knows what to do with them.
+ */
+#define R10BIO_ReadError 4
+/* If a write for this request means we can clear some
+ * known-bad-block records, we set this flag.
+ */
+#define R10BIO_MadeGood 5
+#define R10BIO_WriteError 6
#endif
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index b72edf3..dbae459 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -51,6 +51,7 @@
#include <linux/seq_file.h>
#include <linux/cpu.h>
#include <linux/slab.h>
+#include <linux/ratelimit.h>
#include "md.h"
#include "raid5.h"
#include "raid0.h"
@@ -96,8 +97,6 @@
#define __inline__
#endif
-#define printk_rl(args...) ((void) (printk_ratelimit() && printk(args)))
-
/*
* We maintain a biased count of active stripes in the bottom 16 bits of
* bi_phys_segments, and a count of processed stripes in the upper 16 bits
@@ -341,7 +340,7 @@
(unsigned long long)sh->sector, i, dev->toread,
dev->read, dev->towrite, dev->written,
test_bit(R5_LOCKED, &dev->flags));
- BUG();
+ WARN_ON(1);
}
dev->flags = 0;
raid5_build_block(sh, i, previous);
@@ -527,6 +526,36 @@
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
+ /* We have already checked bad blocks for reads. Now
+ * need to check for writes.
+ */
+ while ((rw & WRITE) && rdev &&
+ test_bit(WriteErrorSeen, &rdev->flags)) {
+ sector_t first_bad;
+ int bad_sectors;
+ int bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS,
+ &first_bad, &bad_sectors);
+ if (!bad)
+ break;
+
+ if (bad < 0) {
+ set_bit(BlockedBadBlocks, &rdev->flags);
+ if (!conf->mddev->external &&
+ conf->mddev->flags) {
+ /* It is very unlikely, but we might
+ * still need to write out the
+ * bad block log - better give it
+ * a chance*/
+ md_check_recovery(conf->mddev);
+ }
+ md_wait_for_blocked_rdev(rdev, conf->mddev);
+ } else {
+ /* Acknowledged bad block - skip the write */
+ rdev_dec_pending(rdev, conf->mddev);
+ rdev = NULL;
+ }
+ }
+
if (rdev) {
if (s->syncing || s->expanding || s->expanded)
md_sync_acct(rdev->bdev, STRIPE_SECTORS);
@@ -548,10 +577,6 @@
bi->bi_io_vec[0].bv_offset = 0;
bi->bi_size = STRIPE_SIZE;
bi->bi_next = NULL;
- if ((rw & WRITE) &&
- test_bit(R5_ReWrite, &sh->dev[i].flags))
- atomic_add(STRIPE_SECTORS,
- &rdev->corrected_errors);
generic_make_request(bi);
} else {
if (rw & WRITE)
@@ -1020,12 +1045,12 @@
if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) {
struct bio *wbi;
- spin_lock(&sh->lock);
+ spin_lock_irq(&sh->raid_conf->device_lock);
chosen = dev->towrite;
dev->towrite = NULL;
BUG_ON(dev->written);
wbi = dev->written = chosen;
- spin_unlock(&sh->lock);
+ spin_unlock_irq(&sh->raid_conf->device_lock);
while (wbi && wbi->bi_sector <
dev->sector + STRIPE_SECTORS) {
@@ -1315,12 +1340,11 @@
static int grow_one_stripe(raid5_conf_t *conf)
{
struct stripe_head *sh;
- sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL);
+ sh = kmem_cache_zalloc(conf->slab_cache, GFP_KERNEL);
if (!sh)
return 0;
- memset(sh, 0, sizeof(*sh) + (conf->pool_size-1)*sizeof(struct r5dev));
+
sh->raid_conf = conf;
- spin_lock_init(&sh->lock);
#ifdef CONFIG_MULTICORE_RAID456
init_waitqueue_head(&sh->ops.wait_for_ops);
#endif
@@ -1435,14 +1459,11 @@
return -ENOMEM;
for (i = conf->max_nr_stripes; i; i--) {
- nsh = kmem_cache_alloc(sc, GFP_KERNEL);
+ nsh = kmem_cache_zalloc(sc, GFP_KERNEL);
if (!nsh)
break;
- memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev));
-
nsh->raid_conf = conf;
- spin_lock_init(&nsh->lock);
#ifdef CONFIG_MULTICORE_RAID456
init_waitqueue_head(&nsh->ops.wait_for_ops);
#endif
@@ -1587,12 +1608,15 @@
set_bit(R5_UPTODATE, &sh->dev[i].flags);
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
rdev = conf->disks[i].rdev;
- printk_rl(KERN_INFO "md/raid:%s: read error corrected"
- " (%lu sectors at %llu on %s)\n",
- mdname(conf->mddev), STRIPE_SECTORS,
- (unsigned long long)(sh->sector
- + rdev->data_offset),
- bdevname(rdev->bdev, b));
+ printk_ratelimited(
+ KERN_INFO
+ "md/raid:%s: read error corrected"
+ " (%lu sectors at %llu on %s)\n",
+ mdname(conf->mddev), STRIPE_SECTORS,
+ (unsigned long long)(sh->sector
+ + rdev->data_offset),
+ bdevname(rdev->bdev, b));
+ atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
clear_bit(R5_ReadError, &sh->dev[i].flags);
clear_bit(R5_ReWrite, &sh->dev[i].flags);
}
@@ -1606,22 +1630,24 @@
clear_bit(R5_UPTODATE, &sh->dev[i].flags);
atomic_inc(&rdev->read_errors);
if (conf->mddev->degraded >= conf->max_degraded)
- printk_rl(KERN_WARNING
- "md/raid:%s: read error not correctable "
- "(sector %llu on %s).\n",
- mdname(conf->mddev),
- (unsigned long long)(sh->sector
- + rdev->data_offset),
- bdn);
+ printk_ratelimited(
+ KERN_WARNING
+ "md/raid:%s: read error not correctable "
+ "(sector %llu on %s).\n",
+ mdname(conf->mddev),
+ (unsigned long long)(sh->sector
+ + rdev->data_offset),
+ bdn);
else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
/* Oh, no!!! */
- printk_rl(KERN_WARNING
- "md/raid:%s: read error NOT corrected!! "
- "(sector %llu on %s).\n",
- mdname(conf->mddev),
- (unsigned long long)(sh->sector
- + rdev->data_offset),
- bdn);
+ printk_ratelimited(
+ KERN_WARNING
+ "md/raid:%s: read error NOT corrected!! "
+ "(sector %llu on %s).\n",
+ mdname(conf->mddev),
+ (unsigned long long)(sh->sector
+ + rdev->data_offset),
+ bdn);
else if (atomic_read(&rdev->read_errors)
> conf->max_nr_stripes)
printk(KERN_WARNING
@@ -1649,6 +1675,8 @@
raid5_conf_t *conf = sh->raid_conf;
int disks = sh->disks, i;
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
+ sector_t first_bad;
+ int bad_sectors;
for (i=0 ; i<disks; i++)
if (bi == &sh->dev[i].req)
@@ -1662,8 +1690,12 @@
return;
}
- if (!uptodate)
- md_error(conf->mddev, conf->disks[i].rdev);
+ if (!uptodate) {
+ set_bit(WriteErrorSeen, &conf->disks[i].rdev->flags);
+ set_bit(R5_WriteError, &sh->dev[i].flags);
+ } else if (is_badblock(conf->disks[i].rdev, sh->sector, STRIPE_SECTORS,
+ &first_bad, &bad_sectors))
+ set_bit(R5_MadeGood, &sh->dev[i].flags);
rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
@@ -1710,6 +1742,7 @@
*/
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
}
+ set_bit(Blocked, &rdev->flags);
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT
@@ -1760,7 +1793,7 @@
/*
* Select the parity disk based on the user selected algorithm.
*/
- pd_idx = qd_idx = ~0;
+ pd_idx = qd_idx = -1;
switch(conf->level) {
case 4:
pd_idx = data_disks;
@@ -2143,12 +2176,11 @@
raid5_conf_t *conf = sh->raid_conf;
int firstwrite=0;
- pr_debug("adding bh b#%llu to stripe s#%llu\n",
+ pr_debug("adding bi b#%llu to stripe s#%llu\n",
(unsigned long long)bi->bi_sector,
(unsigned long long)sh->sector);
- spin_lock(&sh->lock);
spin_lock_irq(&conf->device_lock);
if (forwrite) {
bip = &sh->dev[dd_idx].towrite;
@@ -2169,19 +2201,6 @@
bi->bi_next = *bip;
*bip = bi;
bi->bi_phys_segments++;
- spin_unlock_irq(&conf->device_lock);
- spin_unlock(&sh->lock);
-
- pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
- (unsigned long long)bi->bi_sector,
- (unsigned long long)sh->sector, dd_idx);
-
- if (conf->mddev->bitmap && firstwrite) {
- bitmap_startwrite(conf->mddev->bitmap, sh->sector,
- STRIPE_SECTORS, 0);
- sh->bm_seq = conf->seq_flush+1;
- set_bit(STRIPE_BIT_DELAY, &sh->state);
- }
if (forwrite) {
/* check if page is covered */
@@ -2196,12 +2215,23 @@
if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
}
+ spin_unlock_irq(&conf->device_lock);
+
+ pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
+ (unsigned long long)(*bip)->bi_sector,
+ (unsigned long long)sh->sector, dd_idx);
+
+ if (conf->mddev->bitmap && firstwrite) {
+ bitmap_startwrite(conf->mddev->bitmap, sh->sector,
+ STRIPE_SECTORS, 0);
+ sh->bm_seq = conf->seq_flush+1;
+ set_bit(STRIPE_BIT_DELAY, &sh->state);
+ }
return 1;
overlap:
set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
spin_unlock_irq(&conf->device_lock);
- spin_unlock(&sh->lock);
return 0;
}
@@ -2238,9 +2268,18 @@
rcu_read_lock();
rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && test_bit(In_sync, &rdev->flags))
- /* multiple read failures in one stripe */
- md_error(conf->mddev, rdev);
+ atomic_inc(&rdev->nr_pending);
+ else
+ rdev = NULL;
rcu_read_unlock();
+ if (rdev) {
+ if (!rdev_set_badblocks(
+ rdev,
+ sh->sector,
+ STRIPE_SECTORS, 0))
+ md_error(conf->mddev, rdev);
+ rdev_dec_pending(rdev, conf->mddev);
+ }
}
spin_lock_irq(&conf->device_lock);
/* fail all writes first */
@@ -2308,6 +2347,10 @@
if (bitmap_end)
bitmap_endwrite(conf->mddev->bitmap, sh->sector,
STRIPE_SECTORS, 0, 0);
+ /* If we were in the middle of a write the parity block might
+ * still be locked - so just clear all R5_LOCKED flags
+ */
+ clear_bit(R5_LOCKED, &sh->dev[i].flags);
}
if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
@@ -2315,17 +2358,53 @@
md_wakeup_thread(conf->mddev->thread);
}
-/* fetch_block5 - checks the given member device to see if its data needs
+static void
+handle_failed_sync(raid5_conf_t *conf, struct stripe_head *sh,
+ struct stripe_head_state *s)
+{
+ int abort = 0;
+ int i;
+
+ md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
+ clear_bit(STRIPE_SYNCING, &sh->state);
+ s->syncing = 0;
+ /* There is nothing more to do for sync/check/repair.
+ * For recover we need to record a bad block on all
+ * non-sync devices, or abort the recovery
+ */
+ if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery))
+ return;
+ /* During recovery devices cannot be removed, so locking and
+ * refcounting of rdevs is not needed
+ */
+ for (i = 0; i < conf->raid_disks; i++) {
+ mdk_rdev_t *rdev = conf->disks[i].rdev;
+ if (!rdev
+ || test_bit(Faulty, &rdev->flags)
+ || test_bit(In_sync, &rdev->flags))
+ continue;
+ if (!rdev_set_badblocks(rdev, sh->sector,
+ STRIPE_SECTORS, 0))
+ abort = 1;
+ }
+ if (abort) {
+ conf->recovery_disabled = conf->mddev->recovery_disabled;
+ set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery);
+ }
+}
+
+/* fetch_block - checks the given member device to see if its data needs
* to be read or computed to satisfy a request.
*
* Returns 1 when no more member devices need to be checked, otherwise returns
- * 0 to tell the loop in handle_stripe_fill5 to continue
+ * 0 to tell the loop in handle_stripe_fill to continue
*/
-static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s,
- int disk_idx, int disks)
+static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
+ int disk_idx, int disks)
{
struct r5dev *dev = &sh->dev[disk_idx];
- struct r5dev *failed_dev = &sh->dev[s->failed_num];
+ struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]],
+ &sh->dev[s->failed_num[1]] };
/* is the data in this block needed, and can we get it? */
if (!test_bit(R5_LOCKED, &dev->flags) &&
@@ -2333,91 +2412,19 @@
(dev->toread ||
(dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
s->syncing || s->expanding ||
- (s->failed &&
- (failed_dev->toread ||
- (failed_dev->towrite &&
- !test_bit(R5_OVERWRITE, &failed_dev->flags)))))) {
- /* We would like to get this block, possibly by computing it,
- * otherwise read it if the backing disk is insync
- */
- if ((s->uptodate == disks - 1) &&
- (s->failed && disk_idx == s->failed_num)) {
- set_bit(STRIPE_COMPUTE_RUN, &sh->state);
- set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
- set_bit(R5_Wantcompute, &dev->flags);
- sh->ops.target = disk_idx;
- sh->ops.target2 = -1;
- s->req_compute = 1;
- /* Careful: from this point on 'uptodate' is in the eye
- * of raid_run_ops which services 'compute' operations
- * before writes. R5_Wantcompute flags a block that will
- * be R5_UPTODATE by the time it is needed for a
- * subsequent operation.
- */
- s->uptodate++;
- return 1; /* uptodate + compute == disks */
- } else if (test_bit(R5_Insync, &dev->flags)) {
- set_bit(R5_LOCKED, &dev->flags);
- set_bit(R5_Wantread, &dev->flags);
- s->locked++;
- pr_debug("Reading block %d (sync=%d)\n", disk_idx,
- s->syncing);
- }
- }
-
- return 0;
-}
-
-/**
- * handle_stripe_fill5 - read or compute data to satisfy pending requests.
- */
-static void handle_stripe_fill5(struct stripe_head *sh,
- struct stripe_head_state *s, int disks)
-{
- int i;
-
- /* look for blocks to read/compute, skip this if a compute
- * is already in flight, or if the stripe contents are in the
- * midst of changing due to a write
- */
- if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
- !sh->reconstruct_state)
- for (i = disks; i--; )
- if (fetch_block5(sh, s, i, disks))
- break;
- set_bit(STRIPE_HANDLE, &sh->state);
-}
-
-/* fetch_block6 - checks the given member device to see if its data needs
- * to be read or computed to satisfy a request.
- *
- * Returns 1 when no more member devices need to be checked, otherwise returns
- * 0 to tell the loop in handle_stripe_fill6 to continue
- */
-static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s,
- struct r6_state *r6s, int disk_idx, int disks)
-{
- struct r5dev *dev = &sh->dev[disk_idx];
- struct r5dev *fdev[2] = { &sh->dev[r6s->failed_num[0]],
- &sh->dev[r6s->failed_num[1]] };
-
- if (!test_bit(R5_LOCKED, &dev->flags) &&
- !test_bit(R5_UPTODATE, &dev->flags) &&
- (dev->toread ||
- (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
- s->syncing || s->expanding ||
- (s->failed >= 1 &&
- (fdev[0]->toread || s->to_write)) ||
- (s->failed >= 2 &&
- (fdev[1]->toread || s->to_write)))) {
+ (s->failed >= 1 && fdev[0]->toread) ||
+ (s->failed >= 2 && fdev[1]->toread) ||
+ (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite &&
+ !test_bit(R5_OVERWRITE, &fdev[0]->flags)) ||
+ (sh->raid_conf->level == 6 && s->failed && s->to_write))) {
/* we would like to get this block, possibly by computing it,
* otherwise read it if the backing disk is insync
*/
BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
BUG_ON(test_bit(R5_Wantread, &dev->flags));
if ((s->uptodate == disks - 1) &&
- (s->failed && (disk_idx == r6s->failed_num[0] ||
- disk_idx == r6s->failed_num[1]))) {
+ (s->failed && (disk_idx == s->failed_num[0] ||
+ disk_idx == s->failed_num[1]))) {
/* have disk failed, and we're requested to fetch it;
* do compute it
*/
@@ -2429,6 +2436,12 @@
sh->ops.target = disk_idx;
sh->ops.target2 = -1; /* no 2nd target */
s->req_compute = 1;
+ /* Careful: from this point on 'uptodate' is in the eye
+ * of raid_run_ops which services 'compute' operations
+ * before writes. R5_Wantcompute flags a block that will
+ * be R5_UPTODATE by the time it is needed for a
+ * subsequent operation.
+ */
s->uptodate++;
return 1;
} else if (s->uptodate == disks-2 && s->failed >= 2) {
@@ -2469,11 +2482,11 @@
}
/**
- * handle_stripe_fill6 - read or compute data to satisfy pending requests.
+ * handle_stripe_fill - read or compute data to satisfy pending requests.
*/
-static void handle_stripe_fill6(struct stripe_head *sh,
- struct stripe_head_state *s, struct r6_state *r6s,
- int disks)
+static void handle_stripe_fill(struct stripe_head *sh,
+ struct stripe_head_state *s,
+ int disks)
{
int i;
@@ -2484,7 +2497,7 @@
if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
!sh->reconstruct_state)
for (i = disks; i--; )
- if (fetch_block6(sh, s, r6s, i, disks))
+ if (fetch_block(sh, s, i, disks))
break;
set_bit(STRIPE_HANDLE, &sh->state);
}
@@ -2540,11 +2553,19 @@
md_wakeup_thread(conf->mddev->thread);
}
-static void handle_stripe_dirtying5(raid5_conf_t *conf,
- struct stripe_head *sh, struct stripe_head_state *s, int disks)
+static void handle_stripe_dirtying(raid5_conf_t *conf,
+ struct stripe_head *sh,
+ struct stripe_head_state *s,
+ int disks)
{
int rmw = 0, rcw = 0, i;
- for (i = disks; i--; ) {
+ if (conf->max_degraded == 2) {
+ /* RAID6 requires 'rcw' in current implementation
+ * Calculate the real rcw later - for now fake it
+ * look like rcw is cheaper
+ */
+ rcw = 1; rmw = 2;
+ } else for (i = disks; i--; ) {
/* would I have to read this buffer for read_modify_write */
struct r5dev *dev = &sh->dev[i];
if ((dev->towrite || i == sh->pd_idx) &&
@@ -2591,16 +2612,19 @@
}
}
}
- if (rcw <= rmw && rcw > 0)
+ if (rcw <= rmw && rcw > 0) {
/* want reconstruct write, but need to get some data */
+ rcw = 0;
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
if (!test_bit(R5_OVERWRITE, &dev->flags) &&
- i != sh->pd_idx &&
+ i != sh->pd_idx && i != sh->qd_idx &&
!test_bit(R5_LOCKED, &dev->flags) &&
!(test_bit(R5_UPTODATE, &dev->flags) ||
- test_bit(R5_Wantcompute, &dev->flags)) &&
- test_bit(R5_Insync, &dev->flags)) {
+ test_bit(R5_Wantcompute, &dev->flags))) {
+ rcw++;
+ if (!test_bit(R5_Insync, &dev->flags))
+ continue; /* it's a failed drive */
if (
test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
pr_debug("Read_old block "
@@ -2614,6 +2638,7 @@
}
}
}
+ }
/* now if nothing is locked, and if we have enough data,
* we can start a write request
*/
@@ -2630,53 +2655,6 @@
schedule_reconstruction(sh, s, rcw == 0, 0);
}
-static void handle_stripe_dirtying6(raid5_conf_t *conf,
- struct stripe_head *sh, struct stripe_head_state *s,
- struct r6_state *r6s, int disks)
-{
- int rcw = 0, pd_idx = sh->pd_idx, i;
- int qd_idx = sh->qd_idx;
-
- set_bit(STRIPE_HANDLE, &sh->state);
- for (i = disks; i--; ) {
- struct r5dev *dev = &sh->dev[i];
- /* check if we haven't enough data */
- if (!test_bit(R5_OVERWRITE, &dev->flags) &&
- i != pd_idx && i != qd_idx &&
- !test_bit(R5_LOCKED, &dev->flags) &&
- !(test_bit(R5_UPTODATE, &dev->flags) ||
- test_bit(R5_Wantcompute, &dev->flags))) {
- rcw++;
- if (!test_bit(R5_Insync, &dev->flags))
- continue; /* it's a failed drive */
-
- if (
- test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
- pr_debug("Read_old stripe %llu "
- "block %d for Reconstruct\n",
- (unsigned long long)sh->sector, i);
- set_bit(R5_LOCKED, &dev->flags);
- set_bit(R5_Wantread, &dev->flags);
- s->locked++;
- } else {
- pr_debug("Request delayed stripe %llu "
- "block %d for Reconstruct\n",
- (unsigned long long)sh->sector, i);
- set_bit(STRIPE_DELAYED, &sh->state);
- set_bit(STRIPE_HANDLE, &sh->state);
- }
- }
- }
- /* now if nothing is locked, and if we have enough data, we can start a
- * write request
- */
- if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
- s->locked == 0 && rcw == 0 &&
- !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
- schedule_reconstruction(sh, s, 1, 0);
- }
-}
-
static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
struct stripe_head_state *s, int disks)
{
@@ -2695,7 +2673,7 @@
s->uptodate--;
break;
}
- dev = &sh->dev[s->failed_num];
+ dev = &sh->dev[s->failed_num[0]];
/* fall through */
case check_state_compute_result:
sh->check_state = check_state_idle;
@@ -2767,7 +2745,7 @@
static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
struct stripe_head_state *s,
- struct r6_state *r6s, int disks)
+ int disks)
{
int pd_idx = sh->pd_idx;
int qd_idx = sh->qd_idx;
@@ -2786,14 +2764,14 @@
switch (sh->check_state) {
case check_state_idle:
/* start a new check operation if there are < 2 failures */
- if (s->failed == r6s->q_failed) {
+ if (s->failed == s->q_failed) {
/* The only possible failed device holds Q, so it
* makes sense to check P (If anything else were failed,
* we would have used P to recreate it).
*/
sh->check_state = check_state_run;
}
- if (!r6s->q_failed && s->failed < 2) {
+ if (!s->q_failed && s->failed < 2) {
/* Q is not failed, and we didn't use it to generate
* anything, so it makes sense to check it
*/
@@ -2835,13 +2813,13 @@
*/
BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
if (s->failed == 2) {
- dev = &sh->dev[r6s->failed_num[1]];
+ dev = &sh->dev[s->failed_num[1]];
s->locked++;
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantwrite, &dev->flags);
}
if (s->failed >= 1) {
- dev = &sh->dev[r6s->failed_num[0]];
+ dev = &sh->dev[s->failed_num[0]];
s->locked++;
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantwrite, &dev->flags);
@@ -2928,8 +2906,7 @@
}
}
-static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
- struct r6_state *r6s)
+static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh)
{
int i;
@@ -2971,7 +2948,7 @@
set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
for (j = 0; j < conf->raid_disks; j++)
if (j != sh2->pd_idx &&
- (!r6s || j != sh2->qd_idx) &&
+ j != sh2->qd_idx &&
!test_bit(R5_Expanded, &sh2->dev[j].flags))
break;
if (j == conf->raid_disks) {
@@ -3006,43 +2983,35 @@
*
*/
-static void handle_stripe5(struct stripe_head *sh)
+static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
{
raid5_conf_t *conf = sh->raid_conf;
- int disks = sh->disks, i;
- struct bio *return_bi = NULL;
- struct stripe_head_state s;
+ int disks = sh->disks;
struct r5dev *dev;
- mdk_rdev_t *blocked_rdev = NULL;
- int prexor;
- int dec_preread_active = 0;
+ int i;
- memset(&s, 0, sizeof(s));
- pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d check:%d "
- "reconstruct:%d\n", (unsigned long long)sh->sector, sh->state,
- atomic_read(&sh->count), sh->pd_idx, sh->check_state,
- sh->reconstruct_state);
+ memset(s, 0, sizeof(*s));
- spin_lock(&sh->lock);
- clear_bit(STRIPE_HANDLE, &sh->state);
- clear_bit(STRIPE_DELAYED, &sh->state);
-
- s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
- s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
- s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
+ s->syncing = test_bit(STRIPE_SYNCING, &sh->state);
+ s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+ s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
+ s->failed_num[0] = -1;
+ s->failed_num[1] = -1;
/* Now to look around and see what can be done */
rcu_read_lock();
+ spin_lock_irq(&conf->device_lock);
for (i=disks; i--; ) {
mdk_rdev_t *rdev;
+ sector_t first_bad;
+ int bad_sectors;
+ int is_bad = 0;
dev = &sh->dev[i];
- pr_debug("check %d: state 0x%lx toread %p read %p write %p "
- "written %p\n", i, dev->flags, dev->toread, dev->read,
- dev->towrite, dev->written);
-
- /* maybe we can request a biofill operation
+ pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
+ i, dev->flags, dev->toread, dev->towrite, dev->written);
+ /* maybe we can reply to a read
*
* new wantfill requests are only permitted while
* ops_complete_biofill is guaranteed to be inactive
@@ -3052,37 +3021,74 @@
set_bit(R5_Wantfill, &dev->flags);
/* now count some things */
- if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
- if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
- if (test_bit(R5_Wantcompute, &dev->flags)) s.compute++;
+ if (test_bit(R5_LOCKED, &dev->flags))
+ s->locked++;
+ if (test_bit(R5_UPTODATE, &dev->flags))
+ s->uptodate++;
+ if (test_bit(R5_Wantcompute, &dev->flags)) {
+ s->compute++;
+ BUG_ON(s->compute > 2);
+ }
if (test_bit(R5_Wantfill, &dev->flags))
- s.to_fill++;
+ s->to_fill++;
else if (dev->toread)
- s.to_read++;
+ s->to_read++;
if (dev->towrite) {
- s.to_write++;
+ s->to_write++;
if (!test_bit(R5_OVERWRITE, &dev->flags))
- s.non_overwrite++;
+ s->non_overwrite++;
}
if (dev->written)
- s.written++;
+ s->written++;
rdev = rcu_dereference(conf->disks[i].rdev);
- if (blocked_rdev == NULL &&
- rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
- blocked_rdev = rdev;
- atomic_inc(&rdev->nr_pending);
+ if (rdev) {
+ is_bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS,
+ &first_bad, &bad_sectors);
+ if (s->blocked_rdev == NULL
+ && (test_bit(Blocked, &rdev->flags)
+ || is_bad < 0)) {
+ if (is_bad < 0)
+ set_bit(BlockedBadBlocks,
+ &rdev->flags);
+ s->blocked_rdev = rdev;
+ atomic_inc(&rdev->nr_pending);
+ }
}
clear_bit(R5_Insync, &dev->flags);
if (!rdev)
/* Not in-sync */;
- else if (test_bit(In_sync, &rdev->flags))
+ else if (is_bad) {
+ /* also not in-sync */
+ if (!test_bit(WriteErrorSeen, &rdev->flags)) {
+ /* treat as in-sync, but with a read error
+ * which we can now try to correct
+ */
+ set_bit(R5_Insync, &dev->flags);
+ set_bit(R5_ReadError, &dev->flags);
+ }
+ } else if (test_bit(In_sync, &rdev->flags))
set_bit(R5_Insync, &dev->flags);
else {
- /* could be in-sync depending on recovery/reshape status */
+ /* in sync if before recovery_offset */
if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset)
set_bit(R5_Insync, &dev->flags);
}
+ if (test_bit(R5_WriteError, &dev->flags)) {
+ clear_bit(R5_Insync, &dev->flags);
+ if (!test_bit(Faulty, &rdev->flags)) {
+ s->handle_bad_blocks = 1;
+ atomic_inc(&rdev->nr_pending);
+ } else
+ clear_bit(R5_WriteError, &dev->flags);
+ }
+ if (test_bit(R5_MadeGood, &dev->flags)) {
+ if (!test_bit(Faulty, &rdev->flags)) {
+ s->handle_bad_blocks = 1;
+ atomic_inc(&rdev->nr_pending);
+ } else
+ clear_bit(R5_MadeGood, &dev->flags);
+ }
if (!test_bit(R5_Insync, &dev->flags)) {
/* The ReadError flag will just be confusing now */
clear_bit(R5_ReadError, &dev->flags);
@@ -3091,21 +3097,60 @@
if (test_bit(R5_ReadError, &dev->flags))
clear_bit(R5_Insync, &dev->flags);
if (!test_bit(R5_Insync, &dev->flags)) {
- s.failed++;
- s.failed_num = i;
+ if (s->failed < 2)
+ s->failed_num[s->failed] = i;
+ s->failed++;
}
}
+ spin_unlock_irq(&conf->device_lock);
rcu_read_unlock();
+}
- if (unlikely(blocked_rdev)) {
+static void handle_stripe(struct stripe_head *sh)
+{
+ struct stripe_head_state s;
+ raid5_conf_t *conf = sh->raid_conf;
+ int i;
+ int prexor;
+ int disks = sh->disks;
+ struct r5dev *pdev, *qdev;
+
+ clear_bit(STRIPE_HANDLE, &sh->state);
+ if (test_and_set_bit(STRIPE_ACTIVE, &sh->state)) {
+ /* already being handled, ensure it gets handled
+ * again when current action finishes */
+ set_bit(STRIPE_HANDLE, &sh->state);
+ return;
+ }
+
+ if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
+ set_bit(STRIPE_SYNCING, &sh->state);
+ clear_bit(STRIPE_INSYNC, &sh->state);
+ }
+ clear_bit(STRIPE_DELAYED, &sh->state);
+
+ pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
+ "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
+ (unsigned long long)sh->sector, sh->state,
+ atomic_read(&sh->count), sh->pd_idx, sh->qd_idx,
+ sh->check_state, sh->reconstruct_state);
+
+ analyse_stripe(sh, &s);
+
+ if (s.handle_bad_blocks) {
+ set_bit(STRIPE_HANDLE, &sh->state);
+ goto finish;
+ }
+
+ if (unlikely(s.blocked_rdev)) {
if (s.syncing || s.expanding || s.expanded ||
s.to_write || s.written) {
set_bit(STRIPE_HANDLE, &sh->state);
- goto unlock;
+ goto finish;
}
/* There is nothing for the blocked_rdev to block */
- rdev_dec_pending(blocked_rdev, conf->mddev);
- blocked_rdev = NULL;
+ rdev_dec_pending(s.blocked_rdev, conf->mddev);
+ s.blocked_rdev = NULL;
}
if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
@@ -3114,38 +3159,46 @@
}
pr_debug("locked=%d uptodate=%d to_read=%d"
- " to_write=%d failed=%d failed_num=%d\n",
- s.locked, s.uptodate, s.to_read, s.to_write,
- s.failed, s.failed_num);
- /* check if the array has lost two devices and, if so, some requests might
- * need to be failed
+ " to_write=%d failed=%d failed_num=%d,%d\n",
+ s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
+ s.failed_num[0], s.failed_num[1]);
+ /* check if the array has lost more than max_degraded devices and,
+ * if so, some requests might need to be failed.
*/
- if (s.failed > 1 && s.to_read+s.to_write+s.written)
- handle_failed_stripe(conf, sh, &s, disks, &return_bi);
- if (s.failed > 1 && s.syncing) {
- md_done_sync(conf->mddev, STRIPE_SECTORS,0);
- clear_bit(STRIPE_SYNCING, &sh->state);
- s.syncing = 0;
- }
+ if (s.failed > conf->max_degraded && s.to_read+s.to_write+s.written)
+ handle_failed_stripe(conf, sh, &s, disks, &s.return_bi);
+ if (s.failed > conf->max_degraded && s.syncing)
+ handle_failed_sync(conf, sh, &s);
- /* might be able to return some write requests if the parity block
- * is safe, or on a failed drive
+ /*
+ * might be able to return some write requests if the parity blocks
+ * are safe, or on a failed drive
*/
- dev = &sh->dev[sh->pd_idx];
- if ( s.written &&
- ((test_bit(R5_Insync, &dev->flags) &&
- !test_bit(R5_LOCKED, &dev->flags) &&
- test_bit(R5_UPTODATE, &dev->flags)) ||
- (s.failed == 1 && s.failed_num == sh->pd_idx)))
- handle_stripe_clean_event(conf, sh, disks, &return_bi);
+ pdev = &sh->dev[sh->pd_idx];
+ s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
+ || (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
+ qdev = &sh->dev[sh->qd_idx];
+ s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
+ || (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
+ || conf->level < 6;
+
+ if (s.written &&
+ (s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
+ && !test_bit(R5_LOCKED, &pdev->flags)
+ && test_bit(R5_UPTODATE, &pdev->flags)))) &&
+ (s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
+ && !test_bit(R5_LOCKED, &qdev->flags)
+ && test_bit(R5_UPTODATE, &qdev->flags)))))
+ handle_stripe_clean_event(conf, sh, disks, &s.return_bi);
/* Now we might consider reading some blocks, either to check/generate
* parity, or to satisfy requests
* or to load a block that is being partially written.
*/
- if (s.to_read || s.non_overwrite ||
- (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
- handle_stripe_fill5(sh, &s, disks);
+ if (s.to_read || s.non_overwrite
+ || (conf->level == 6 && s.to_write && s.failed)
+ || (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
+ handle_stripe_fill(sh, &s, disks);
/* Now we check to see if any write operations have recently
* completed
@@ -3161,21 +3214,25 @@
* be written back to disk
*/
BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
+ BUG_ON(sh->qd_idx >= 0 &&
+ !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags));
for (i = disks; i--; ) {
- dev = &sh->dev[i];
+ struct r5dev *dev = &sh->dev[i];
if (test_bit(R5_LOCKED, &dev->flags) &&
- (i == sh->pd_idx || dev->written)) {
+ (i == sh->pd_idx || i == sh->qd_idx ||
+ dev->written)) {
pr_debug("Writing block %d\n", i);
set_bit(R5_Wantwrite, &dev->flags);
if (prexor)
continue;
if (!test_bit(R5_Insync, &dev->flags) ||
- (i == sh->pd_idx && s.failed == 0))
+ ((i == sh->pd_idx || i == sh->qd_idx) &&
+ s.failed == 0))
set_bit(STRIPE_INSYNC, &sh->state);
}
}
if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
- dec_preread_active = 1;
+ s.dec_preread_active = 1;
}
/* Now to consider new write requests and what else, if anything
@@ -3185,7 +3242,7 @@
* block.
*/
if (s.to_write && !sh->reconstruct_state && !sh->check_state)
- handle_stripe_dirtying5(conf, sh, &s, disks);
+ handle_stripe_dirtying(conf, sh, &s, disks);
/* maybe we need to check and possibly fix the parity for this stripe
* Any reads will already have been scheduled, so we just see if enough
@@ -3195,54 +3252,61 @@
if (sh->check_state ||
(s.syncing && s.locked == 0 &&
!test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
- !test_bit(STRIPE_INSYNC, &sh->state)))
- handle_parity_checks5(conf, sh, &s, disks);
+ !test_bit(STRIPE_INSYNC, &sh->state))) {
+ if (conf->level == 6)
+ handle_parity_checks6(conf, sh, &s, disks);
+ else
+ handle_parity_checks5(conf, sh, &s, disks);
+ }
if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
- md_done_sync(conf->mddev, STRIPE_SECTORS,1);
+ md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
clear_bit(STRIPE_SYNCING, &sh->state);
}
- /* If the failed drive is just a ReadError, then we might need to progress
- * the repair/check process
+ /* If the failed drives are just a ReadError, then we might need
+ * to progress the repair/check process
*/
- if (s.failed == 1 && !conf->mddev->ro &&
- test_bit(R5_ReadError, &sh->dev[s.failed_num].flags)
- && !test_bit(R5_LOCKED, &sh->dev[s.failed_num].flags)
- && test_bit(R5_UPTODATE, &sh->dev[s.failed_num].flags)
- ) {
- dev = &sh->dev[s.failed_num];
- if (!test_bit(R5_ReWrite, &dev->flags)) {
- set_bit(R5_Wantwrite, &dev->flags);
- set_bit(R5_ReWrite, &dev->flags);
- set_bit(R5_LOCKED, &dev->flags);
- s.locked++;
- } else {
- /* let's read it back */
- set_bit(R5_Wantread, &dev->flags);
- set_bit(R5_LOCKED, &dev->flags);
- s.locked++;
+ if (s.failed <= conf->max_degraded && !conf->mddev->ro)
+ for (i = 0; i < s.failed; i++) {
+ struct r5dev *dev = &sh->dev[s.failed_num[i]];
+ if (test_bit(R5_ReadError, &dev->flags)
+ && !test_bit(R5_LOCKED, &dev->flags)
+ && test_bit(R5_UPTODATE, &dev->flags)
+ ) {
+ if (!test_bit(R5_ReWrite, &dev->flags)) {
+ set_bit(R5_Wantwrite, &dev->flags);
+ set_bit(R5_ReWrite, &dev->flags);
+ set_bit(R5_LOCKED, &dev->flags);
+ s.locked++;
+ } else {
+ /* let's read it back */
+ set_bit(R5_Wantread, &dev->flags);
+ set_bit(R5_LOCKED, &dev->flags);
+ s.locked++;
+ }
+ }
}
- }
+
/* Finish reconstruct operations initiated by the expansion process */
if (sh->reconstruct_state == reconstruct_state_result) {
- struct stripe_head *sh2
+ struct stripe_head *sh_src
= get_active_stripe(conf, sh->sector, 1, 1, 1);
- if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
- /* sh cannot be written until sh2 has been read.
+ if (sh_src && test_bit(STRIPE_EXPAND_SOURCE, &sh_src->state)) {
+ /* sh cannot be written until sh_src has been read.
* so arrange for sh to be delayed a little
*/
set_bit(STRIPE_DELAYED, &sh->state);
set_bit(STRIPE_HANDLE, &sh->state);
if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
- &sh2->state))
+ &sh_src->state))
atomic_inc(&conf->preread_active_stripes);
- release_stripe(sh2);
- goto unlock;
+ release_stripe(sh_src);
+ goto finish;
}
- if (sh2)
- release_stripe(sh2);
+ if (sh_src)
+ release_stripe(sh_src);
sh->reconstruct_state = reconstruct_state_idle;
clear_bit(STRIPE_EXPANDING, &sh->state);
@@ -3268,323 +3332,39 @@
if (s.expanding && s.locked == 0 &&
!test_bit(STRIPE_COMPUTE_RUN, &sh->state))
- handle_stripe_expansion(conf, sh, NULL);
+ handle_stripe_expansion(conf, sh);
- unlock:
- spin_unlock(&sh->lock);
-
+finish:
/* wait for this device to become unblocked */
- if (unlikely(blocked_rdev))
- md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
+ if (unlikely(s.blocked_rdev))
+ md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev);
- if (s.ops_request)
- raid_run_ops(sh, s.ops_request);
-
- ops_run_io(sh, &s);
-
- if (dec_preread_active) {
- /* We delay this until after ops_run_io so that if make_request
- * is waiting on a flush, it won't continue until the writes
- * have actually been submitted.
- */
- atomic_dec(&conf->preread_active_stripes);
- if (atomic_read(&conf->preread_active_stripes) <
- IO_THRESHOLD)
- md_wakeup_thread(conf->mddev->thread);
- }
- return_io(return_bi);
-}
-
-static void handle_stripe6(struct stripe_head *sh)
-{
- raid5_conf_t *conf = sh->raid_conf;
- int disks = sh->disks;
- struct bio *return_bi = NULL;
- int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx;
- struct stripe_head_state s;
- struct r6_state r6s;
- struct r5dev *dev, *pdev, *qdev;
- mdk_rdev_t *blocked_rdev = NULL;
- int dec_preread_active = 0;
-
- pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
- "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
- (unsigned long long)sh->sector, sh->state,
- atomic_read(&sh->count), pd_idx, qd_idx,
- sh->check_state, sh->reconstruct_state);
- memset(&s, 0, sizeof(s));
-
- spin_lock(&sh->lock);
- clear_bit(STRIPE_HANDLE, &sh->state);
- clear_bit(STRIPE_DELAYED, &sh->state);
-
- s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
- s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
- s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
- /* Now to look around and see what can be done */
-
- rcu_read_lock();
- for (i=disks; i--; ) {
- mdk_rdev_t *rdev;
- dev = &sh->dev[i];
-
- pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
- i, dev->flags, dev->toread, dev->towrite, dev->written);
- /* maybe we can reply to a read
- *
- * new wantfill requests are only permitted while
- * ops_complete_biofill is guaranteed to be inactive
- */
- if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
- !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
- set_bit(R5_Wantfill, &dev->flags);
-
- /* now count some things */
- if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
- if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
- if (test_bit(R5_Wantcompute, &dev->flags)) {
- s.compute++;
- BUG_ON(s.compute > 2);
- }
-
- if (test_bit(R5_Wantfill, &dev->flags)) {
- s.to_fill++;
- } else if (dev->toread)
- s.to_read++;
- if (dev->towrite) {
- s.to_write++;
- if (!test_bit(R5_OVERWRITE, &dev->flags))
- s.non_overwrite++;
- }
- if (dev->written)
- s.written++;
- rdev = rcu_dereference(conf->disks[i].rdev);
- if (blocked_rdev == NULL &&
- rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
- blocked_rdev = rdev;
- atomic_inc(&rdev->nr_pending);
- }
- clear_bit(R5_Insync, &dev->flags);
- if (!rdev)
- /* Not in-sync */;
- else if (test_bit(In_sync, &rdev->flags))
- set_bit(R5_Insync, &dev->flags);
- else {
- /* in sync if before recovery_offset */
- if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset)
- set_bit(R5_Insync, &dev->flags);
- }
- if (!test_bit(R5_Insync, &dev->flags)) {
- /* The ReadError flag will just be confusing now */
- clear_bit(R5_ReadError, &dev->flags);
- clear_bit(R5_ReWrite, &dev->flags);
- }
- if (test_bit(R5_ReadError, &dev->flags))
- clear_bit(R5_Insync, &dev->flags);
- if (!test_bit(R5_Insync, &dev->flags)) {
- if (s.failed < 2)
- r6s.failed_num[s.failed] = i;
- s.failed++;
- }
- }
- rcu_read_unlock();
-
- if (unlikely(blocked_rdev)) {
- if (s.syncing || s.expanding || s.expanded ||
- s.to_write || s.written) {
- set_bit(STRIPE_HANDLE, &sh->state);
- goto unlock;
- }
- /* There is nothing for the blocked_rdev to block */
- rdev_dec_pending(blocked_rdev, conf->mddev);
- blocked_rdev = NULL;
- }
-
- if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
- set_bit(STRIPE_OP_BIOFILL, &s.ops_request);
- set_bit(STRIPE_BIOFILL_RUN, &sh->state);
- }
-
- pr_debug("locked=%d uptodate=%d to_read=%d"
- " to_write=%d failed=%d failed_num=%d,%d\n",
- s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
- r6s.failed_num[0], r6s.failed_num[1]);
- /* check if the array has lost >2 devices and, if so, some requests
- * might need to be failed
- */
- if (s.failed > 2 && s.to_read+s.to_write+s.written)
- handle_failed_stripe(conf, sh, &s, disks, &return_bi);
- if (s.failed > 2 && s.syncing) {
- md_done_sync(conf->mddev, STRIPE_SECTORS,0);
- clear_bit(STRIPE_SYNCING, &sh->state);
- s.syncing = 0;
- }
-
- /*
- * might be able to return some write requests if the parity blocks
- * are safe, or on a failed drive
- */
- pdev = &sh->dev[pd_idx];
- r6s.p_failed = (s.failed >= 1 && r6s.failed_num[0] == pd_idx)
- || (s.failed >= 2 && r6s.failed_num[1] == pd_idx);
- qdev = &sh->dev[qd_idx];
- r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == qd_idx)
- || (s.failed >= 2 && r6s.failed_num[1] == qd_idx);
-
- if ( s.written &&
- ( r6s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
- && !test_bit(R5_LOCKED, &pdev->flags)
- && test_bit(R5_UPTODATE, &pdev->flags)))) &&
- ( r6s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
- && !test_bit(R5_LOCKED, &qdev->flags)
- && test_bit(R5_UPTODATE, &qdev->flags)))))
- handle_stripe_clean_event(conf, sh, disks, &return_bi);
-
- /* Now we might consider reading some blocks, either to check/generate
- * parity, or to satisfy requests
- * or to load a block that is being partially written.
- */
- if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
- (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
- handle_stripe_fill6(sh, &s, &r6s, disks);
-
- /* Now we check to see if any write operations have recently
- * completed
- */
- if (sh->reconstruct_state == reconstruct_state_drain_result) {
-
- sh->reconstruct_state = reconstruct_state_idle;
- /* All the 'written' buffers and the parity blocks are ready to
- * be written back to disk
- */
- BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
- BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags));
+ if (s.handle_bad_blocks)
for (i = disks; i--; ) {
- dev = &sh->dev[i];
- if (test_bit(R5_LOCKED, &dev->flags) &&
- (i == sh->pd_idx || i == qd_idx ||
- dev->written)) {
- pr_debug("Writing block %d\n", i);
- BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
- set_bit(R5_Wantwrite, &dev->flags);
- if (!test_bit(R5_Insync, &dev->flags) ||
- ((i == sh->pd_idx || i == qd_idx) &&
- s.failed == 0))
- set_bit(STRIPE_INSYNC, &sh->state);
+ mdk_rdev_t *rdev;
+ struct r5dev *dev = &sh->dev[i];
+ if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
+ /* We own a safe reference to the rdev */
+ rdev = conf->disks[i].rdev;
+ if (!rdev_set_badblocks(rdev, sh->sector,
+ STRIPE_SECTORS, 0))
+ md_error(conf->mddev, rdev);
+ rdev_dec_pending(rdev, conf->mddev);
+ }
+ if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
+ rdev = conf->disks[i].rdev;
+ rdev_clear_badblocks(rdev, sh->sector,
+ STRIPE_SECTORS);
+ rdev_dec_pending(rdev, conf->mddev);
}
}
- if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
- dec_preread_active = 1;
- }
-
- /* Now to consider new write requests and what else, if anything
- * should be read. We do not handle new writes when:
- * 1/ A 'write' operation (copy+gen_syndrome) is already in flight.
- * 2/ A 'check' operation is in flight, as it may clobber the parity
- * block.
- */
- if (s.to_write && !sh->reconstruct_state && !sh->check_state)
- handle_stripe_dirtying6(conf, sh, &s, &r6s, disks);
-
- /* maybe we need to check and possibly fix the parity for this stripe
- * Any reads will already have been scheduled, so we just see if enough
- * data is available. The parity check is held off while parity
- * dependent operations are in flight.
- */
- if (sh->check_state ||
- (s.syncing && s.locked == 0 &&
- !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
- !test_bit(STRIPE_INSYNC, &sh->state)))
- handle_parity_checks6(conf, sh, &s, &r6s, disks);
-
- if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
- md_done_sync(conf->mddev, STRIPE_SECTORS,1);
- clear_bit(STRIPE_SYNCING, &sh->state);
- }
-
- /* If the failed drives are just a ReadError, then we might need
- * to progress the repair/check process
- */
- if (s.failed <= 2 && !conf->mddev->ro)
- for (i = 0; i < s.failed; i++) {
- dev = &sh->dev[r6s.failed_num[i]];
- if (test_bit(R5_ReadError, &dev->flags)
- && !test_bit(R5_LOCKED, &dev->flags)
- && test_bit(R5_UPTODATE, &dev->flags)
- ) {
- if (!test_bit(R5_ReWrite, &dev->flags)) {
- set_bit(R5_Wantwrite, &dev->flags);
- set_bit(R5_ReWrite, &dev->flags);
- set_bit(R5_LOCKED, &dev->flags);
- s.locked++;
- } else {
- /* let's read it back */
- set_bit(R5_Wantread, &dev->flags);
- set_bit(R5_LOCKED, &dev->flags);
- s.locked++;
- }
- }
- }
-
- /* Finish reconstruct operations initiated by the expansion process */
- if (sh->reconstruct_state == reconstruct_state_result) {
- sh->reconstruct_state = reconstruct_state_idle;
- clear_bit(STRIPE_EXPANDING, &sh->state);
- for (i = conf->raid_disks; i--; ) {
- set_bit(R5_Wantwrite, &sh->dev[i].flags);
- set_bit(R5_LOCKED, &sh->dev[i].flags);
- s.locked++;
- }
- }
-
- if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
- !sh->reconstruct_state) {
- struct stripe_head *sh2
- = get_active_stripe(conf, sh->sector, 1, 1, 1);
- if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
- /* sh cannot be written until sh2 has been read.
- * so arrange for sh to be delayed a little
- */
- set_bit(STRIPE_DELAYED, &sh->state);
- set_bit(STRIPE_HANDLE, &sh->state);
- if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
- &sh2->state))
- atomic_inc(&conf->preread_active_stripes);
- release_stripe(sh2);
- goto unlock;
- }
- if (sh2)
- release_stripe(sh2);
-
- /* Need to write out all blocks after computing P&Q */
- sh->disks = conf->raid_disks;
- stripe_set_idx(sh->sector, conf, 0, sh);
- schedule_reconstruction(sh, &s, 1, 1);
- } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
- clear_bit(STRIPE_EXPAND_READY, &sh->state);
- atomic_dec(&conf->reshape_stripes);
- wake_up(&conf->wait_for_overlap);
- md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
- }
-
- if (s.expanding && s.locked == 0 &&
- !test_bit(STRIPE_COMPUTE_RUN, &sh->state))
- handle_stripe_expansion(conf, sh, &r6s);
-
- unlock:
- spin_unlock(&sh->lock);
-
- /* wait for this device to become unblocked */
- if (unlikely(blocked_rdev))
- md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
if (s.ops_request)
raid_run_ops(sh, s.ops_request);
ops_run_io(sh, &s);
-
- if (dec_preread_active) {
+ if (s.dec_preread_active) {
/* We delay this until after ops_run_io so that if make_request
* is waiting on a flush, it won't continue until the writes
* have actually been submitted.
@@ -3595,15 +3375,9 @@
md_wakeup_thread(conf->mddev->thread);
}
- return_io(return_bi);
-}
+ return_io(s.return_bi);
-static void handle_stripe(struct stripe_head *sh)
-{
- if (sh->raid_conf->level == 6)
- handle_stripe6(sh);
- else
- handle_stripe5(sh);
+ clear_bit(STRIPE_ACTIVE, &sh->state);
}
static void raid5_activate_delayed(raid5_conf_t *conf)
@@ -3833,6 +3607,9 @@
rcu_read_lock();
rdev = rcu_dereference(conf->disks[dd_idx].rdev);
if (rdev && test_bit(In_sync, &rdev->flags)) {
+ sector_t first_bad;
+ int bad_sectors;
+
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
raid_bio->bi_next = (void*)rdev;
@@ -3840,8 +3617,10 @@
align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
align_bi->bi_sector += rdev->data_offset;
- if (!bio_fits_rdev(align_bi)) {
- /* too big in some way */
+ if (!bio_fits_rdev(align_bi) ||
+ is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9,
+ &first_bad, &bad_sectors)) {
+ /* too big in some way, or has a known bad block */
bio_put(align_bi);
rdev_dec_pending(rdev, mddev);
return 0;
@@ -4016,7 +3795,7 @@
}
}
- if (bio_data_dir(bi) == WRITE &&
+ if (rw == WRITE &&
logical_sector >= mddev->suspend_lo &&
logical_sector < mddev->suspend_hi) {
release_stripe(sh);
@@ -4034,7 +3813,7 @@
}
if (test_bit(STRIPE_EXPANDING, &sh->state) ||
- !add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {
+ !add_stripe_bio(sh, bi, dd_idx, rw)) {
/* Stripe is busy expanding or
* add failed due to overlap. Flush everything
* and wait a while
@@ -4375,10 +4154,7 @@
bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded);
- spin_lock(&sh->lock);
- set_bit(STRIPE_SYNCING, &sh->state);
- clear_bit(STRIPE_INSYNC, &sh->state);
- spin_unlock(&sh->lock);
+ set_bit(STRIPE_SYNC_REQUESTED, &sh->state);
handle_stripe(sh);
release_stripe(sh);
@@ -4509,6 +4285,9 @@
release_stripe(sh);
cond_resched();
+ if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
+ md_check_recovery(mddev);
+
spin_lock_irq(&conf->device_lock);
}
pr_debug("%d stripes handled\n", handled);
@@ -5313,6 +5092,7 @@
* isn't possible.
*/
if (!test_bit(Faulty, &rdev->flags) &&
+ mddev->recovery_disabled != conf->recovery_disabled &&
!has_failed(conf) &&
number < conf->raid_disks) {
err = -EBUSY;
@@ -5341,6 +5121,9 @@
int first = 0;
int last = conf->raid_disks - 1;
+ if (mddev->recovery_disabled == conf->recovery_disabled)
+ return -EBUSY;
+
if (has_failed(conf))
/* no point adding a device */
return -EINVAL;
@@ -5519,16 +5302,14 @@
if (rdev->raid_disk < 0 &&
!test_bit(Faulty, &rdev->flags)) {
if (raid5_add_disk(mddev, rdev) == 0) {
- char nm[20];
if (rdev->raid_disk
>= conf->previous_raid_disks) {
set_bit(In_sync, &rdev->flags);
added_devices++;
} else
rdev->recovery_offset = 0;
- sprintf(nm, "rd%d", rdev->raid_disk);
- if (sysfs_create_link(&mddev->kobj,
- &rdev->kobj, nm))
+
+ if (sysfs_link_rdev(mddev, rdev))
/* Failure here is OK */;
}
} else if (rdev->raid_disk >= conf->previous_raid_disks
@@ -5624,9 +5405,7 @@
d++) {
mdk_rdev_t *rdev = conf->disks[d].rdev;
if (rdev && raid5_remove_disk(mddev, d) == 0) {
- char nm[20];
- sprintf(nm, "rd%d", rdev->raid_disk);
- sysfs_remove_link(&mddev->kobj, nm);
+ sysfs_unlink_rdev(mddev, rdev);
rdev->raid_disk = -1;
}
}
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 3ca77a2..11b9566 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -6,11 +6,11 @@
/*
*
- * Each stripe contains one buffer per disc. Each buffer can be in
+ * Each stripe contains one buffer per device. Each buffer can be in
* one of a number of states stored in "flags". Changes between
- * these states happen *almost* exclusively under a per-stripe
- * spinlock. Some very specific changes can happen in bi_end_io, and
- * these are not protected by the spin lock.
+ * these states happen *almost* exclusively under the protection of the
+ * STRIPE_ACTIVE flag. Some very specific changes can happen in bi_end_io, and
+ * these are not protected by STRIPE_ACTIVE.
*
* The flag bits that are used to represent these states are:
* R5_UPTODATE and R5_LOCKED
@@ -76,12 +76,10 @@
* block and the cached buffer are successfully written, any buffer on
* a written list can be returned with b_end_io.
*
- * The write list and read list both act as fifos. The read list is
- * protected by the device_lock. The write and written lists are
- * protected by the stripe lock. The device_lock, which can be
- * claimed while the stipe lock is held, is only for list
- * manipulations and will only be held for a very short time. It can
- * be claimed from interrupts.
+ * The write list and read list both act as fifos. The read list,
+ * write list and written list are protected by the device_lock.
+ * The device_lock is only for list manipulations and will only be
+ * held for a very short time. It can be claimed from interrupts.
*
*
* Stripes in the stripe cache can be on one of two lists (or on
@@ -96,7 +94,6 @@
*
* The inactive_list, handle_list and hash bucket lists are all protected by the
* device_lock.
- * - stripes on the inactive_list never have their stripe_lock held.
* - stripes have a reference counter. If count==0, they are on a list.
* - If a stripe might need handling, STRIPE_HANDLE is set.
* - When refcount reaches zero, then if STRIPE_HANDLE it is put on
@@ -116,10 +113,10 @@
* attach a request to an active stripe (add_stripe_bh())
* lockdev attach-buffer unlockdev
* handle a stripe (handle_stripe())
- * lockstripe clrSTRIPE_HANDLE ...
+ * setSTRIPE_ACTIVE, clrSTRIPE_HANDLE ...
* (lockdev check-buffers unlockdev) ..
* change-state ..
- * record io/ops needed unlockstripe schedule io/ops
+ * record io/ops needed clearSTRIPE_ACTIVE schedule io/ops
* release an active stripe (release_stripe())
* lockdev if (!--cnt) { if STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev
*
@@ -128,8 +125,7 @@
* on a cached buffer, and plus one if the stripe is undergoing stripe
* operations.
*
- * Stripe operations are performed outside the stripe lock,
- * the stripe operations are:
+ * The stripe operations are:
* -copying data between the stripe cache and user application buffers
* -computing blocks to save a disk access, or to recover a missing block
* -updating the parity on a write operation (reconstruct write and
@@ -159,7 +155,8 @@
*/
/*
- * Operations state - intermediate states that are visible outside of sh->lock
+ * Operations state - intermediate states that are visible outside of
+ * STRIPE_ACTIVE.
* In general _idle indicates nothing is running, _run indicates a data
* processing operation is active, and _result means the data processing result
* is stable and can be acted upon. For simple operations like biofill and
@@ -209,7 +206,6 @@
short ddf_layout;/* use DDF ordering to calculate Q */
unsigned long state; /* state flags */
atomic_t count; /* nr of active thread/requests */
- spinlock_t lock;
int bm_seq; /* sequence number for bitmap flushes */
int disks; /* disks in stripe */
enum check_states check_state;
@@ -240,19 +236,20 @@
};
/* stripe_head_state - collects and tracks the dynamic state of a stripe_head
- * for handle_stripe. It is only valid under spin_lock(sh->lock);
+ * for handle_stripe.
*/
struct stripe_head_state {
int syncing, expanding, expanded;
int locked, uptodate, to_read, to_write, failed, written;
int to_fill, compute, req_compute, non_overwrite;
- int failed_num;
+ int failed_num[2];
+ int p_failed, q_failed;
+ int dec_preread_active;
unsigned long ops_request;
-};
-/* r6_state - extra state data only relevant to r6 */
-struct r6_state {
- int p_failed, q_failed, failed_num[2];
+ struct bio *return_bi;
+ mdk_rdev_t *blocked_rdev;
+ int handle_bad_blocks;
};
/* Flags */
@@ -268,14 +265,16 @@
#define R5_ReWrite 9 /* have tried to over-write the readerror */
#define R5_Expanded 10 /* This block now has post-expand data */
-#define R5_Wantcompute 11 /* compute_block in progress treat as
- * uptodate
- */
-#define R5_Wantfill 12 /* dev->toread contains a bio that needs
- * filling
- */
-#define R5_Wantdrain 13 /* dev->towrite needs to be drained */
-#define R5_WantFUA 14 /* Write should be FUA */
+#define R5_Wantcompute 11 /* compute_block in progress treat as
+ * uptodate
+ */
+#define R5_Wantfill 12 /* dev->toread contains a bio that needs
+ * filling
+ */
+#define R5_Wantdrain 13 /* dev->towrite needs to be drained */
+#define R5_WantFUA 14 /* Write should be FUA */
+#define R5_WriteError 15 /* got a write error - need to record it */
+#define R5_MadeGood 16 /* A bad block has been fixed by writing to it*/
/*
* Write method
*/
@@ -289,21 +288,25 @@
/*
* Stripe state
*/
-#define STRIPE_HANDLE 2
-#define STRIPE_SYNCING 3
-#define STRIPE_INSYNC 4
-#define STRIPE_PREREAD_ACTIVE 5
-#define STRIPE_DELAYED 6
-#define STRIPE_DEGRADED 7
-#define STRIPE_BIT_DELAY 8
-#define STRIPE_EXPANDING 9
-#define STRIPE_EXPAND_SOURCE 10
-#define STRIPE_EXPAND_READY 11
-#define STRIPE_IO_STARTED 12 /* do not count towards 'bypass_count' */
-#define STRIPE_FULL_WRITE 13 /* all blocks are set to be overwritten */
-#define STRIPE_BIOFILL_RUN 14
-#define STRIPE_COMPUTE_RUN 15
-#define STRIPE_OPS_REQ_PENDING 16
+enum {
+ STRIPE_ACTIVE,
+ STRIPE_HANDLE,
+ STRIPE_SYNC_REQUESTED,
+ STRIPE_SYNCING,
+ STRIPE_INSYNC,
+ STRIPE_PREREAD_ACTIVE,
+ STRIPE_DELAYED,
+ STRIPE_DEGRADED,
+ STRIPE_BIT_DELAY,
+ STRIPE_EXPANDING,
+ STRIPE_EXPAND_SOURCE,
+ STRIPE_EXPAND_READY,
+ STRIPE_IO_STARTED, /* do not count towards 'bypass_count' */
+ STRIPE_FULL_WRITE, /* all blocks are set to be overwritten */
+ STRIPE_BIOFILL_RUN,
+ STRIPE_COMPUTE_RUN,
+ STRIPE_OPS_REQ_PENDING,
+};
/*
* Operation request flags
@@ -336,7 +339,7 @@
* PREREAD_ACTIVE.
* In stripe_handle, if we find pre-reading is necessary, we do it if
* PREREAD_ACTIVE is set, else we set DELAYED which will send it to the delayed queue.
- * HANDLE gets cleared if stripe_handle leave nothing locked.
+ * HANDLE gets cleared if stripe_handle leaves nothing locked.
*/
@@ -399,7 +402,7 @@
* (fresh device added).
* Cleared when a sync completes.
*/
-
+ int recovery_disabled;
/* per cpu variables */
struct raid5_percpu {
struct page *spare_page; /* Used when checking P/Q in raid6 */
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index b7622c3..e1eca2a 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -282,6 +282,7 @@
obj-$(CONFIG_USB_USBNET) += usb/
obj-$(CONFIG_USB_ZD1201) += usb/
obj-$(CONFIG_USB_IPHETH) += usb/
+obj-$(CONFIG_USB_CDC_PHONET) += usb/
obj-$(CONFIG_WLAN) += wireless/
obj-$(CONFIG_NET_TULIP) += tulip/
diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c
index 536038b..31798f5 100644
--- a/drivers/net/acenic.c
+++ b/drivers/net/acenic.c
@@ -1502,13 +1502,13 @@
* firmware to wipe the ring without re-initializing it.
*/
if (!test_and_set_bit(0, &ap->std_refill_busy))
- ace_load_std_rx_ring(ap, RX_RING_SIZE);
+ ace_load_std_rx_ring(dev, RX_RING_SIZE);
else
printk(KERN_ERR "%s: Someone is busy refilling the RX ring\n",
ap->name);
if (ap->version >= 2) {
if (!test_and_set_bit(0, &ap->mini_refill_busy))
- ace_load_mini_rx_ring(ap, RX_MINI_SIZE);
+ ace_load_mini_rx_ring(dev, RX_MINI_SIZE);
else
printk(KERN_ERR "%s: Someone is busy refilling "
"the RX mini ring\n", ap->name);
@@ -1584,9 +1584,10 @@
}
-static void ace_tasklet(unsigned long dev)
+static void ace_tasklet(unsigned long arg)
{
- struct ace_private *ap = netdev_priv((struct net_device *)dev);
+ struct net_device *dev = (struct net_device *) arg;
+ struct ace_private *ap = netdev_priv(dev);
int cur_size;
cur_size = atomic_read(&ap->cur_rx_bufs);
@@ -1595,7 +1596,7 @@
#ifdef DEBUG
printk("refilling buffers (current %i)\n", cur_size);
#endif
- ace_load_std_rx_ring(ap, RX_RING_SIZE - cur_size);
+ ace_load_std_rx_ring(dev, RX_RING_SIZE - cur_size);
}
if (ap->version >= 2) {
@@ -1606,7 +1607,7 @@
printk("refilling mini buffers (current %i)\n",
cur_size);
#endif
- ace_load_mini_rx_ring(ap, RX_MINI_SIZE - cur_size);
+ ace_load_mini_rx_ring(dev, RX_MINI_SIZE - cur_size);
}
}
@@ -1616,7 +1617,7 @@
#ifdef DEBUG
printk("refilling jumbo buffers (current %i)\n", cur_size);
#endif
- ace_load_jumbo_rx_ring(ap, RX_JUMBO_SIZE - cur_size);
+ ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE - cur_size);
}
ap->tasklet_pending = 0;
}
@@ -1642,8 +1643,9 @@
* done only before the device is enabled, thus no interrupts are
* generated and by the interrupt handler/tasklet handler.
*/
-static void ace_load_std_rx_ring(struct ace_private *ap, int nr_bufs)
+static void ace_load_std_rx_ring(struct net_device *dev, int nr_bufs)
{
+ struct ace_private *ap = netdev_priv(dev);
struct ace_regs __iomem *regs = ap->regs;
short i, idx;
@@ -1657,11 +1659,10 @@
struct rx_desc *rd;
dma_addr_t mapping;
- skb = dev_alloc_skb(ACE_STD_BUFSIZE + NET_IP_ALIGN);
+ skb = netdev_alloc_skb_ip_align(dev, ACE_STD_BUFSIZE);
if (!skb)
break;
- skb_reserve(skb, NET_IP_ALIGN);
mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
offset_in_page(skb->data),
ACE_STD_BUFSIZE,
@@ -1705,8 +1706,9 @@
}
-static void ace_load_mini_rx_ring(struct ace_private *ap, int nr_bufs)
+static void ace_load_mini_rx_ring(struct net_device *dev, int nr_bufs)
{
+ struct ace_private *ap = netdev_priv(dev);
struct ace_regs __iomem *regs = ap->regs;
short i, idx;
@@ -1718,11 +1720,10 @@
struct rx_desc *rd;
dma_addr_t mapping;
- skb = dev_alloc_skb(ACE_MINI_BUFSIZE + NET_IP_ALIGN);
+ skb = netdev_alloc_skb_ip_align(dev, ACE_MINI_BUFSIZE);
if (!skb)
break;
- skb_reserve(skb, NET_IP_ALIGN);
mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
offset_in_page(skb->data),
ACE_MINI_BUFSIZE,
@@ -1762,8 +1763,9 @@
* Load the jumbo rx ring, this may happen at any time if the MTU
* is changed to a value > 1500.
*/
-static void ace_load_jumbo_rx_ring(struct ace_private *ap, int nr_bufs)
+static void ace_load_jumbo_rx_ring(struct net_device *dev, int nr_bufs)
{
+ struct ace_private *ap = netdev_priv(dev);
struct ace_regs __iomem *regs = ap->regs;
short i, idx;
@@ -1774,11 +1776,10 @@
struct rx_desc *rd;
dma_addr_t mapping;
- skb = dev_alloc_skb(ACE_JUMBO_BUFSIZE + NET_IP_ALIGN);
+ skb = netdev_alloc_skb_ip_align(dev, ACE_JUMBO_BUFSIZE);
if (!skb)
break;
- skb_reserve(skb, NET_IP_ALIGN);
mapping = pci_map_page(ap->pdev, virt_to_page(skb->data),
offset_in_page(skb->data),
ACE_JUMBO_BUFSIZE,
@@ -2196,7 +2197,7 @@
#ifdef DEBUG
printk("low on std buffers %i\n", cur_size);
#endif
- ace_load_std_rx_ring(ap,
+ ace_load_std_rx_ring(dev,
RX_RING_SIZE - cur_size);
} else
run_tasklet = 1;
@@ -2212,7 +2213,8 @@
printk("low on mini buffers %i\n",
cur_size);
#endif
- ace_load_mini_rx_ring(ap, RX_MINI_SIZE - cur_size);
+ ace_load_mini_rx_ring(dev,
+ RX_MINI_SIZE - cur_size);
} else
run_tasklet = 1;
}
@@ -2228,7 +2230,8 @@
printk("low on jumbo buffers %i\n",
cur_size);
#endif
- ace_load_jumbo_rx_ring(ap, RX_JUMBO_SIZE - cur_size);
+ ace_load_jumbo_rx_ring(dev,
+ RX_JUMBO_SIZE - cur_size);
} else
run_tasklet = 1;
}
@@ -2267,7 +2270,7 @@
if (ap->jumbo &&
!test_and_set_bit(0, &ap->jumbo_refill_busy))
- ace_load_jumbo_rx_ring(ap, RX_JUMBO_SIZE);
+ ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE);
if (dev->flags & IFF_PROMISC) {
cmd.evt = C_SET_PROMISC_MODE;
@@ -2575,7 +2578,7 @@
"support\n", dev->name);
ap->jumbo = 1;
if (!test_and_set_bit(0, &ap->jumbo_refill_busy))
- ace_load_jumbo_rx_ring(ap, RX_JUMBO_SIZE);
+ ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE);
ace_set_rxtx_parms(dev, 1);
}
} else {
diff --git a/drivers/net/acenic.h b/drivers/net/acenic.h
index f67dc9b..51c486c 100644
--- a/drivers/net/acenic.h
+++ b/drivers/net/acenic.h
@@ -766,9 +766,9 @@
* Prototypes
*/
static int ace_init(struct net_device *dev);
-static void ace_load_std_rx_ring(struct ace_private *ap, int nr_bufs);
-static void ace_load_mini_rx_ring(struct ace_private *ap, int nr_bufs);
-static void ace_load_jumbo_rx_ring(struct ace_private *ap, int nr_bufs);
+static void ace_load_std_rx_ring(struct net_device *dev, int nr_bufs);
+static void ace_load_mini_rx_ring(struct net_device *dev, int nr_bufs);
+static void ace_load_jumbo_rx_ring(struct net_device *dev, int nr_bufs);
static irqreturn_t ace_interrupt(int irq, void *dev_id);
static int ace_load_firmware(struct net_device *dev);
static int ace_open(struct net_device *dev);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 02842d0..38a83ac 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1557,8 +1557,10 @@
if (slave_dev->type != ARPHRD_ETHER)
bond_setup_by_slave(bond_dev, slave_dev);
- else
+ else {
ether_setup(bond_dev);
+ bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ }
netdev_bonding_change(bond_dev,
NETDEV_POST_TYPE_CHANGE);
@@ -4330,7 +4332,7 @@
bond_dev->tx_queue_len = 0;
bond_dev->flags |= IFF_MASTER|IFF_MULTICAST;
bond_dev->priv_flags |= IFF_BONDING;
- bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
/* At first, we block adding VLANs. That's the only way to
* prevent problems that occur when adding VLANs over an
@@ -4691,7 +4693,7 @@
/* miimon and arp_interval not set, we need one so things
* work as expected, see bonding.txt for details
*/
- pr_warning("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details.\n");
+ pr_debug("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details.\n");
}
if (primary && !USES_PRIMARY(bond_mode)) {
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index b60835f..2dfb4bf 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -1025,6 +1025,7 @@
int i;
struct slave *slave;
struct bonding *bond = to_bond(d);
+ char ifname[IFNAMSIZ];
if (!rtnl_trylock())
return restart_syscall();
@@ -1035,32 +1036,33 @@
if (!USES_PRIMARY(bond->params.mode)) {
pr_info("%s: Unable to set primary slave; %s is in mode %d\n",
bond->dev->name, bond->dev->name, bond->params.mode);
- } else {
- bond_for_each_slave(bond, slave, i) {
- if (strnicmp
- (slave->dev->name, buf,
- strlen(slave->dev->name)) == 0) {
- pr_info("%s: Setting %s as primary slave.\n",
- bond->dev->name, slave->dev->name);
- bond->primary_slave = slave;
- strcpy(bond->params.primary, slave->dev->name);
- bond_select_active_slave(bond);
- goto out;
- }
- }
+ goto out;
+ }
- /* if we got here, then we didn't match the name of any slave */
+ sscanf(buf, "%16s", ifname); /* IFNAMSIZ */
- if (strlen(buf) == 0 || buf[0] == '\n') {
- pr_info("%s: Setting primary slave to None.\n",
- bond->dev->name);
- bond->primary_slave = NULL;
- bond_select_active_slave(bond);
- } else {
- pr_info("%s: Unable to set %.*s as primary slave as it is not a slave.\n",
- bond->dev->name, (int)strlen(buf) - 1, buf);
+ /* check to see if we are clearing primary */
+ if (!strlen(ifname) || buf[0] == '\n') {
+ pr_info("%s: Setting primary slave to None.\n",
+ bond->dev->name);
+ bond->primary_slave = NULL;
+ bond_select_active_slave(bond);
+ goto out;
+ }
+
+ bond_for_each_slave(bond, slave, i) {
+ if (strncmp(slave->dev->name, ifname, IFNAMSIZ) == 0) {
+ pr_info("%s: Setting %s as primary slave.\n",
+ bond->dev->name, slave->dev->name);
+ bond->primary_slave = slave;
+ strcpy(bond->params.primary, slave->dev->name);
+ bond_select_active_slave(bond);
+ goto out;
}
}
+
+ pr_info("%s: Unable to set %.*s as primary slave.\n",
+ bond->dev->name, (int)strlen(buf) - 1, buf);
out:
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
@@ -1195,6 +1197,7 @@
struct slave *old_active = NULL;
struct slave *new_active = NULL;
struct bonding *bond = to_bond(d);
+ char ifname[IFNAMSIZ];
if (!rtnl_trylock())
return restart_syscall();
@@ -1203,56 +1206,62 @@
read_lock(&bond->lock);
write_lock_bh(&bond->curr_slave_lock);
- if (!USES_PRIMARY(bond->params.mode))
+ if (!USES_PRIMARY(bond->params.mode)) {
pr_info("%s: Unable to change active slave; %s is in mode %d\n",
bond->dev->name, bond->dev->name, bond->params.mode);
- else {
- bond_for_each_slave(bond, slave, i) {
- if (strnicmp
- (slave->dev->name, buf,
- strlen(slave->dev->name)) == 0) {
- old_active = bond->curr_active_slave;
- new_active = slave;
- if (new_active == old_active) {
- /* do nothing */
- pr_info("%s: %s is already the current active slave.\n",
+ goto out;
+ }
+
+ sscanf(buf, "%16s", ifname); /* IFNAMSIZ */
+
+ /* check to see if we are clearing active */
+ if (!strlen(ifname) || buf[0] == '\n') {
+ pr_info("%s: Clearing current active slave.\n",
+ bond->dev->name);
+ bond->curr_active_slave = NULL;
+ bond_select_active_slave(bond);
+ goto out;
+ }
+
+ bond_for_each_slave(bond, slave, i) {
+ if (strncmp(slave->dev->name, ifname, IFNAMSIZ) == 0) {
+ old_active = bond->curr_active_slave;
+ new_active = slave;
+ if (new_active == old_active) {
+ /* do nothing */
+ pr_info("%s: %s is already the current"
+ " active slave.\n",
+ bond->dev->name,
+ slave->dev->name);
+ goto out;
+ }
+ else {
+ if ((new_active) &&
+ (old_active) &&
+ (new_active->link == BOND_LINK_UP) &&
+ IS_UP(new_active->dev)) {
+ pr_info("%s: Setting %s as active"
+ " slave.\n",
bond->dev->name,
slave->dev->name);
- goto out;
+ bond_change_active_slave(bond,
+ new_active);
}
else {
- if ((new_active) &&
- (old_active) &&
- (new_active->link == BOND_LINK_UP) &&
- IS_UP(new_active->dev)) {
- pr_info("%s: Setting %s as active slave.\n",
- bond->dev->name,
- slave->dev->name);
- bond_change_active_slave(bond, new_active);
- }
- else {
- pr_info("%s: Could not set %s as active slave; either %s is down or the link is down.\n",
- bond->dev->name,
- slave->dev->name,
- slave->dev->name);
- }
- goto out;
+ pr_info("%s: Could not set %s as"
+ " active slave; either %s is"
+ " down or the link is down.\n",
+ bond->dev->name,
+ slave->dev->name,
+ slave->dev->name);
}
+ goto out;
}
}
-
- /* if we got here, then we didn't match the name of any slave */
-
- if (strlen(buf) == 0 || buf[0] == '\n') {
- pr_info("%s: Setting active slave to None.\n",
- bond->dev->name);
- bond->primary_slave = NULL;
- bond_select_active_slave(bond);
- } else {
- pr_info("%s: Unable to set %.*s as active slave as it is not a slave.\n",
- bond->dev->name, (int)strlen(buf) - 1, buf);
- }
}
+
+ pr_info("%s: Unable to set %.*s as active slave.\n",
+ bond->dev->name, (int)strlen(buf) - 1, buf);
out:
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index e64cd9c..e55df30 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -2764,7 +2764,14 @@
prefetch(skb->data);
vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
- if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
+
+ /*
+ * There's need to check for NETIF_F_HW_VLAN_RX here.
+ * Even if vlan rx accel is disabled,
+ * NV_RX3_VLAN_TAG_PRESENT is pseudo randomly set.
+ */
+ if (dev->features & NETIF_F_HW_VLAN_RX &&
+ vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
u16 vid = vlanflags & NV_RX3_VLAN_TAG_MASK;
__vlan_hwaccel_put_tag(skb, vid);
@@ -5331,15 +5338,16 @@
np->txrxctl_bits |= NVREG_TXRXCTL_RXCHECK;
dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_SG |
NETIF_F_TSO | NETIF_F_RXCSUM;
- dev->features |= dev->hw_features;
}
np->vlanctl_bits = 0;
if (id->driver_data & DEV_HAS_VLAN) {
np->vlanctl_bits = NVREG_VLANCONTROL_ENABLE;
- dev->features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX;
+ dev->hw_features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX;
}
+ dev->features |= dev->hw_features;
+
np->pause_flags = NV_PAUSEFRAME_RX_CAPABLE | NV_PAUSEFRAME_RX_REQ | NV_PAUSEFRAME_AUTONEG;
if ((id->driver_data & DEV_HAS_PAUSEFRAME_TX_V1) ||
(id->driver_data & DEV_HAS_PAUSEFRAME_TX_V2) ||
@@ -5607,6 +5615,8 @@
goto out_error;
}
+ nv_vlan_mode(dev, dev->features);
+
netif_carrier_off(dev);
dev_info(&pci_dev->dev, "ifname %s, PHY OUI 0x%x @ %d, addr %pM\n",
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 835cd25..2659daa 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -388,12 +388,8 @@
if (priv->hwts_rx_en)
rctrl |= RCTRL_PRSDEP_INIT | RCTRL_TS_ENABLE;
- /* keep vlan related bits if it's enabled */
- if (ndev->features & NETIF_F_HW_VLAN_TX)
- rctrl |= RCTRL_VLEX | RCTRL_PRSDEP_INIT;
-
if (ndev->features & NETIF_F_HW_VLAN_RX)
- tctrl |= TCTRL_VLINS;
+ rctrl |= RCTRL_VLEX | RCTRL_PRSDEP_INIT;
/* Init rctrl based on our settings */
gfar_write(®s->rctrl, rctrl);
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 6e82dd3..46b5f5f 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -183,7 +183,7 @@
dev->flags |= IFF_NOARP;
dev->flags &= ~IFF_MULTICAST;
- dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
random_ether_addr(dev->dev_addr);
}
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index ba631fc..05172c3 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -572,7 +572,7 @@
{
ether_setup(dev);
- dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
dev->netdev_ops = &macvlan_netdev_ops;
dev->destructor = free_netdev;
dev->header_ops = &macvlan_hard_header_ops,
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 8035765..dc3fbf6 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -190,6 +190,7 @@
/* minimum number of free TX descriptors required to wake up TX process */
#define TG3_TX_WAKEUP_THRESH(tnapi) ((tnapi)->tx_pending / 4)
+#define TG3_TX_BD_DMA_MAX 4096
#define TG3_RAW_IP_ALIGN 2
@@ -4824,7 +4825,7 @@
txq = netdev_get_tx_queue(tp->dev, index);
while (sw_idx != hw_idx) {
- struct ring_info *ri = &tnapi->tx_buffers[sw_idx];
+ struct tg3_tx_ring_info *ri = &tnapi->tx_buffers[sw_idx];
struct sk_buff *skb = ri->skb;
int i, tx_bug = 0;
@@ -4840,6 +4841,12 @@
ri->skb = NULL;
+ while (ri->fragmented) {
+ ri->fragmented = false;
+ sw_idx = NEXT_TX(sw_idx);
+ ri = &tnapi->tx_buffers[sw_idx];
+ }
+
sw_idx = NEXT_TX(sw_idx);
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
@@ -4851,6 +4858,13 @@
dma_unmap_addr(ri, mapping),
skb_shinfo(skb)->frags[i].size,
PCI_DMA_TODEVICE);
+
+ while (ri->fragmented) {
+ ri->fragmented = false;
+ sw_idx = NEXT_TX(sw_idx);
+ ri = &tnapi->tx_buffers[sw_idx];
+ }
+
sw_idx = NEXT_TX(sw_idx);
}
@@ -5901,40 +5915,100 @@
#endif
}
-static void tg3_set_txd(struct tg3_napi *tnapi, int entry,
- dma_addr_t mapping, int len, u32 flags,
- u32 mss_and_is_end)
+static inline void tg3_tx_set_bd(struct tg3_tx_buffer_desc *txbd,
+ dma_addr_t mapping, u32 len, u32 flags,
+ u32 mss, u32 vlan)
{
- struct tg3_tx_buffer_desc *txd = &tnapi->tx_ring[entry];
- int is_end = (mss_and_is_end & 0x1);
- u32 mss = (mss_and_is_end >> 1);
- u32 vlan_tag = 0;
-
- if (is_end)
- flags |= TXD_FLAG_END;
- if (flags & TXD_FLAG_VLAN) {
- vlan_tag = flags >> 16;
- flags &= 0xffff;
- }
- vlan_tag |= (mss << TXD_MSS_SHIFT);
-
- txd->addr_hi = ((u64) mapping >> 32);
- txd->addr_lo = ((u64) mapping & 0xffffffff);
- txd->len_flags = (len << TXD_LEN_SHIFT) | flags;
- txd->vlan_tag = vlan_tag << TXD_VLAN_TAG_SHIFT;
+ txbd->addr_hi = ((u64) mapping >> 32);
+ txbd->addr_lo = ((u64) mapping & 0xffffffff);
+ txbd->len_flags = (len << TXD_LEN_SHIFT) | (flags & 0x0000ffff);
+ txbd->vlan_tag = (mss << TXD_MSS_SHIFT) | (vlan << TXD_VLAN_TAG_SHIFT);
}
-static void tg3_skb_error_unmap(struct tg3_napi *tnapi,
- struct sk_buff *skb, int last)
+static bool tg3_tx_frag_set(struct tg3_napi *tnapi, u32 *entry, u32 *budget,
+ dma_addr_t map, u32 len, u32 flags,
+ u32 mss, u32 vlan)
+{
+ struct tg3 *tp = tnapi->tp;
+ bool hwbug = false;
+
+ if (tg3_flag(tp, SHORT_DMA_BUG) && len <= 8)
+ hwbug = 1;
+
+ if (tg3_4g_overflow_test(map, len))
+ hwbug = 1;
+
+ if (tg3_40bit_overflow_test(tp, map, len))
+ hwbug = 1;
+
+ if (tg3_flag(tp, 4K_FIFO_LIMIT)) {
+ u32 tmp_flag = flags & ~TXD_FLAG_END;
+ while (len > TG3_TX_BD_DMA_MAX) {
+ u32 frag_len = TG3_TX_BD_DMA_MAX;
+ len -= TG3_TX_BD_DMA_MAX;
+
+ if (len) {
+ tnapi->tx_buffers[*entry].fragmented = true;
+ /* Avoid the 8byte DMA problem */
+ if (len <= 8) {
+ len += TG3_TX_BD_DMA_MAX / 2;
+ frag_len = TG3_TX_BD_DMA_MAX / 2;
+ }
+ } else
+ tmp_flag = flags;
+
+ if (*budget) {
+ tg3_tx_set_bd(&tnapi->tx_ring[*entry], map,
+ frag_len, tmp_flag, mss, vlan);
+ (*budget)--;
+ *entry = NEXT_TX(*entry);
+ } else {
+ hwbug = 1;
+ break;
+ }
+
+ map += frag_len;
+ }
+
+ if (len) {
+ if (*budget) {
+ tg3_tx_set_bd(&tnapi->tx_ring[*entry], map,
+ len, flags, mss, vlan);
+ (*budget)--;
+ *entry = NEXT_TX(*entry);
+ } else {
+ hwbug = 1;
+ }
+ }
+ } else {
+ tg3_tx_set_bd(&tnapi->tx_ring[*entry], map,
+ len, flags, mss, vlan);
+ *entry = NEXT_TX(*entry);
+ }
+
+ return hwbug;
+}
+
+static void tg3_tx_skb_unmap(struct tg3_napi *tnapi, u32 entry, int last)
{
int i;
- u32 entry = tnapi->tx_prod;
- struct ring_info *txb = &tnapi->tx_buffers[entry];
+ struct sk_buff *skb;
+ struct tg3_tx_ring_info *txb = &tnapi->tx_buffers[entry];
+
+ skb = txb->skb;
+ txb->skb = NULL;
pci_unmap_single(tnapi->tp->pdev,
dma_unmap_addr(txb, mapping),
skb_headlen(skb),
PCI_DMA_TODEVICE);
+
+ while (txb->fragmented) {
+ txb->fragmented = false;
+ entry = NEXT_TX(entry);
+ txb = &tnapi->tx_buffers[entry];
+ }
+
for (i = 0; i < last; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
@@ -5944,18 +6018,24 @@
pci_unmap_page(tnapi->tp->pdev,
dma_unmap_addr(txb, mapping),
frag->size, PCI_DMA_TODEVICE);
+
+ while (txb->fragmented) {
+ txb->fragmented = false;
+ entry = NEXT_TX(entry);
+ txb = &tnapi->tx_buffers[entry];
+ }
}
}
/* Workaround 4GB and 40-bit hardware DMA bugs. */
static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi,
struct sk_buff *skb,
- u32 base_flags, u32 mss)
+ u32 *entry, u32 *budget,
+ u32 base_flags, u32 mss, u32 vlan)
{
struct tg3 *tp = tnapi->tp;
struct sk_buff *new_skb;
dma_addr_t new_addr = 0;
- u32 entry = tnapi->tx_prod;
int ret = 0;
if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5701)
@@ -5976,24 +6056,22 @@
PCI_DMA_TODEVICE);
/* Make sure the mapping succeeded */
if (pci_dma_mapping_error(tp->pdev, new_addr)) {
- ret = -1;
dev_kfree_skb(new_skb);
-
- /* Make sure new skb does not cross any 4G boundaries.
- * Drop the packet if it does.
- */
- } else if (tg3_4g_overflow_test(new_addr, new_skb->len)) {
- pci_unmap_single(tp->pdev, new_addr, new_skb->len,
- PCI_DMA_TODEVICE);
ret = -1;
- dev_kfree_skb(new_skb);
} else {
- tnapi->tx_buffers[entry].skb = new_skb;
- dma_unmap_addr_set(&tnapi->tx_buffers[entry],
+ base_flags |= TXD_FLAG_END;
+
+ tnapi->tx_buffers[*entry].skb = new_skb;
+ dma_unmap_addr_set(&tnapi->tx_buffers[*entry],
mapping, new_addr);
- tg3_set_txd(tnapi, entry, new_addr, new_skb->len,
- base_flags, 1 | (mss << 1));
+ if (tg3_tx_frag_set(tnapi, entry, budget, new_addr,
+ new_skb->len, base_flags,
+ mss, vlan)) {
+ tg3_tx_skb_unmap(tnapi, *entry, 0);
+ dev_kfree_skb(new_skb);
+ ret = -1;
+ }
}
}
@@ -6051,7 +6129,8 @@
static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct tg3 *tp = netdev_priv(dev);
- u32 len, entry, base_flags, mss;
+ u32 len, entry, base_flags, mss, vlan = 0;
+ u32 budget;
int i = -1, would_hit_hwbug;
dma_addr_t mapping;
struct tg3_napi *tnapi;
@@ -6063,12 +6142,14 @@
if (tg3_flag(tp, ENABLE_TSS))
tnapi++;
+ budget = tg3_tx_avail(tnapi);
+
/* We are running in BH disabled context with netif_tx_lock
* and TX reclaim runs via tp->napi.poll inside of a software
* interrupt. Furthermore, IRQ processing runs lockless so we have
* no IRQ context deadlocks to worry about either. Rejoice!
*/
- if (unlikely(tg3_tx_avail(tnapi) <= (skb_shinfo(skb)->nr_frags + 1))) {
+ if (unlikely(budget <= (skb_shinfo(skb)->nr_frags + 1))) {
if (!netif_tx_queue_stopped(txq)) {
netif_tx_stop_queue(txq);
@@ -6153,9 +6234,12 @@
}
}
- if (vlan_tx_tag_present(skb))
- base_flags |= (TXD_FLAG_VLAN |
- (vlan_tx_tag_get(skb) << 16));
+#ifdef BCM_KERNEL_SUPPORTS_8021Q
+ if (vlan_tx_tag_present(skb)) {
+ base_flags |= TXD_FLAG_VLAN;
+ vlan = vlan_tx_tag_get(skb);
+ }
+#endif
if (tg3_flag(tp, USE_JUMBO_BDFLAG) &&
!mss && skb->len > VLAN_ETH_FRAME_LEN)
@@ -6174,25 +6258,23 @@
would_hit_hwbug = 0;
- if (tg3_flag(tp, SHORT_DMA_BUG) && len <= 8)
- would_hit_hwbug = 1;
-
- if (tg3_4g_overflow_test(mapping, len))
- would_hit_hwbug = 1;
-
- if (tg3_40bit_overflow_test(tp, mapping, len))
- would_hit_hwbug = 1;
-
if (tg3_flag(tp, 5701_DMA_BUG))
would_hit_hwbug = 1;
- tg3_set_txd(tnapi, entry, mapping, len, base_flags,
- (skb_shinfo(skb)->nr_frags == 0) | (mss << 1));
-
- entry = NEXT_TX(entry);
+ if (tg3_tx_frag_set(tnapi, &entry, &budget, mapping, len, base_flags |
+ ((skb_shinfo(skb)->nr_frags == 0) ? TXD_FLAG_END : 0),
+ mss, vlan))
+ would_hit_hwbug = 1;
/* Now loop through additional data fragments, and queue them. */
if (skb_shinfo(skb)->nr_frags > 0) {
+ u32 tmp_mss = mss;
+
+ if (!tg3_flag(tp, HW_TSO_1) &&
+ !tg3_flag(tp, HW_TSO_2) &&
+ !tg3_flag(tp, HW_TSO_3))
+ tmp_mss = 0;
+
last = skb_shinfo(skb)->nr_frags - 1;
for (i = 0; i <= last; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
@@ -6209,39 +6291,25 @@
if (pci_dma_mapping_error(tp->pdev, mapping))
goto dma_error;
- if (tg3_flag(tp, SHORT_DMA_BUG) &&
- len <= 8)
+ if (tg3_tx_frag_set(tnapi, &entry, &budget, mapping,
+ len, base_flags |
+ ((i == last) ? TXD_FLAG_END : 0),
+ tmp_mss, vlan))
would_hit_hwbug = 1;
-
- if (tg3_4g_overflow_test(mapping, len))
- would_hit_hwbug = 1;
-
- if (tg3_40bit_overflow_test(tp, mapping, len))
- would_hit_hwbug = 1;
-
- if (tg3_flag(tp, HW_TSO_1) ||
- tg3_flag(tp, HW_TSO_2) ||
- tg3_flag(tp, HW_TSO_3))
- tg3_set_txd(tnapi, entry, mapping, len,
- base_flags, (i == last)|(mss << 1));
- else
- tg3_set_txd(tnapi, entry, mapping, len,
- base_flags, (i == last));
-
- entry = NEXT_TX(entry);
}
}
if (would_hit_hwbug) {
- tg3_skb_error_unmap(tnapi, skb, i);
+ tg3_tx_skb_unmap(tnapi, tnapi->tx_prod, i);
/* If the workaround fails due to memory/mapping
* failure, silently drop this packet.
*/
- if (tigon3_dma_hwbug_workaround(tnapi, skb, base_flags, mss))
+ entry = tnapi->tx_prod;
+ budget = tg3_tx_avail(tnapi);
+ if (tigon3_dma_hwbug_workaround(tnapi, skb, &entry, &budget,
+ base_flags, mss, vlan))
goto out_unlock;
-
- entry = NEXT_TX(tnapi->tx_prod);
}
skb_tx_timestamp(skb);
@@ -6269,7 +6337,7 @@
return NETDEV_TX_OK;
dma_error:
- tg3_skb_error_unmap(tnapi, skb, i);
+ tg3_tx_skb_unmap(tnapi, tnapi->tx_prod, i);
dev_kfree_skb(skb);
tnapi->tx_buffers[tnapi->tx_prod].skb = NULL;
return NETDEV_TX_OK;
@@ -6602,35 +6670,13 @@
if (!tnapi->tx_buffers)
continue;
- for (i = 0; i < TG3_TX_RING_SIZE; ) {
- struct ring_info *txp;
- struct sk_buff *skb;
- unsigned int k;
+ for (i = 0; i < TG3_TX_RING_SIZE; i++) {
+ struct sk_buff *skb = tnapi->tx_buffers[i].skb;
- txp = &tnapi->tx_buffers[i];
- skb = txp->skb;
-
- if (skb == NULL) {
- i++;
+ if (!skb)
continue;
- }
- pci_unmap_single(tp->pdev,
- dma_unmap_addr(txp, mapping),
- skb_headlen(skb),
- PCI_DMA_TODEVICE);
- txp->skb = NULL;
-
- i++;
-
- for (k = 0; k < skb_shinfo(skb)->nr_frags; k++) {
- txp = &tnapi->tx_buffers[i & (TG3_TX_RING_SIZE - 1)];
- pci_unmap_page(tp->pdev,
- dma_unmap_addr(txp, mapping),
- skb_shinfo(skb)->frags[k].size,
- PCI_DMA_TODEVICE);
- i++;
- }
+ tg3_tx_skb_unmap(tnapi, i, skb_shinfo(skb)->nr_frags);
dev_kfree_skb_any(skb);
}
@@ -6762,9 +6808,9 @@
*/
if ((!i && !tg3_flag(tp, ENABLE_TSS)) ||
(i && tg3_flag(tp, ENABLE_TSS))) {
- tnapi->tx_buffers = kzalloc(sizeof(struct ring_info) *
- TG3_TX_RING_SIZE,
- GFP_KERNEL);
+ tnapi->tx_buffers = kzalloc(
+ sizeof(struct tg3_tx_ring_info) *
+ TG3_TX_RING_SIZE, GFP_KERNEL);
if (!tnapi->tx_buffers)
goto err_out;
@@ -8360,7 +8406,7 @@
/* Program the jumbo buffer descriptor ring control
* blocks on those devices that have them.
*/
- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719 ||
+ if (tp->pci_chip_rev_id == CHIPREV_ID_5719_A0 ||
(tg3_flag(tp, JUMBO_CAPABLE) && !tg3_flag(tp, 5780_CLASS))) {
if (tg3_flag(tp, JUMBO_RING_ENABLE)) {
@@ -11204,6 +11250,7 @@
{
u32 mac_mode, rx_start_idx, rx_idx, tx_idx, opaque_key;
u32 base_flags = 0, mss = 0, desc_idx, coal_now, data_off, val;
+ u32 budget;
struct sk_buff *skb, *rx_skb;
u8 *tx_data;
dma_addr_t map;
@@ -11363,6 +11410,10 @@
return -EIO;
}
+ val = tnapi->tx_prod;
+ tnapi->tx_buffers[val].skb = skb;
+ dma_unmap_addr_set(&tnapi->tx_buffers[val], mapping, map);
+
tw32_f(HOSTCC_MODE, tp->coalesce_mode | HOSTCC_MODE_ENABLE |
rnapi->coal_now);
@@ -11370,8 +11421,13 @@
rx_start_idx = rnapi->hw_status->idx[0].rx_producer;
- tg3_set_txd(tnapi, tnapi->tx_prod, map, tx_len,
- base_flags, (mss << 1) | 1);
+ budget = tg3_tx_avail(tnapi);
+ if (tg3_tx_frag_set(tnapi, &val, &budget, map, tx_len,
+ base_flags | TXD_FLAG_END, mss, 0)) {
+ tnapi->tx_buffers[val].skb = NULL;
+ dev_kfree_skb(skb);
+ return -EIO;
+ }
tnapi->tx_prod++;
@@ -11394,7 +11450,7 @@
break;
}
- pci_unmap_single(tp->pdev, map, tx_len, PCI_DMA_TODEVICE);
+ tg3_tx_skb_unmap(tnapi, tnapi->tx_prod - 1, 0);
dev_kfree_skb(skb);
if (tx_idx != tnapi->tx_prod)
@@ -13817,7 +13873,7 @@
tg3_flag_set(tp, 5705_PLUS);
/* Determine TSO capabilities */
- if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719)
+ if (tp->pci_chip_rev_id == CHIPREV_ID_5719_A0)
; /* Do nothing. HW bug. */
else if (tg3_flag(tp, 57765_PLUS))
tg3_flag_set(tp, HW_TSO_3);
@@ -13880,11 +13936,14 @@
if (tg3_flag(tp, 5755_PLUS))
tg3_flag_set(tp, SHORT_DMA_BUG);
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719)
+ tg3_flag_set(tp, 4K_FIFO_LIMIT);
+
if (tg3_flag(tp, 5717_PLUS))
tg3_flag_set(tp, LRG_PROD_RING_CAP);
if (tg3_flag(tp, 57765_PLUS) &&
- GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5719)
+ tp->pci_chip_rev_id != CHIPREV_ID_5719_A0)
tg3_flag_set(tp, USE_JUMBO_BDFLAG);
if (!tg3_flag(tp, 5705_PLUS) ||
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 691539b..2ea456d 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2652,6 +2652,12 @@
DEFINE_DMA_UNMAP_ADDR(mapping);
};
+struct tg3_tx_ring_info {
+ struct sk_buff *skb;
+ DEFINE_DMA_UNMAP_ADDR(mapping);
+ bool fragmented;
+};
+
struct tg3_link_config {
/* Describes what we're trying to get. */
u32 advertising;
@@ -2816,7 +2822,7 @@
u32 last_tx_cons;
u32 prodmbox;
struct tg3_tx_buffer_desc *tx_ring;
- struct ring_info *tx_buffers;
+ struct tg3_tx_ring_info *tx_buffers;
dma_addr_t status_mapping;
dma_addr_t rx_rcb_mapping;
@@ -2899,6 +2905,7 @@
TG3_FLAG_57765_PLUS,
TG3_FLAG_APE_HAS_NCSI,
TG3_FLAG_5717_PLUS,
+ TG3_FLAG_4K_FIFO_LIMIT,
/* Add new flags before this comment and TG3_FLAG_NUMBER_OF_FLAGS */
TG3_FLAG_NUMBER_OF_FLAGS, /* Last entry in enum TG3_FLAGS */
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 9a6b382..71f3d1a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -528,6 +528,7 @@
dev->netdev_ops = &tap_netdev_ops;
/* Ethernet TAP Device */
ether_setup(dev);
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
random_ether_addr(dev->dev_addr);
diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c
index 5250288..c5c4b4d 100644
--- a/drivers/net/usb/asix.c
+++ b/drivers/net/usb/asix.c
@@ -314,12 +314,11 @@
skb_pull(skb, 4);
while (skb->len > 0) {
- if ((short)(header & 0x0000ffff) !=
- ~((short)((header & 0xffff0000) >> 16))) {
+ if ((header & 0x07ff) != ((~header >> 16) & 0x07ff))
netdev_err(dev->net, "asix_rx_fixup() Bad Header Length\n");
- }
+
/* get the packet length */
- size = (u16) (header & 0x0000ffff);
+ size = (u16) (header & 0x000007ff);
if ((skb->len) - ((size + 1) & 0xfffe) == 0) {
u8 alignment = (unsigned long)skb->data & 0x3;
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 7f78db7..5b23767 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -263,6 +263,8 @@
{
ether_setup(dev);
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+
dev->netdev_ops = &veth_netdev_ops;
dev->ethtool_ops = &veth_ethtool_ops;
dev->features |= NETIF_F_LLTX;
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index b25c922..eb20281 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -1074,9 +1074,10 @@
used = pvc_is_used(pvc);
- if (type == ARPHRD_ETHER)
+ if (type == ARPHRD_ETHER) {
dev = alloc_netdev(0, "pvceth%d", ether_setup);
- else
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ } else
dev = alloc_netdev(0, "pvc%d", pvc_setup);
if (!dev) {
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
index 55cf71f..e1b3e3c 100644
--- a/drivers/net/wireless/airo.c
+++ b/drivers/net/wireless/airo.c
@@ -2823,6 +2823,7 @@
dev->wireless_data = &ai->wireless_data;
dev->irq = irq;
dev->base_addr = port;
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
SET_NETDEV_DEV(dev, dmdev);
diff --git a/drivers/net/wireless/b43/Kconfig b/drivers/net/wireless/b43/Kconfig
index d2293dc..3cab843 100644
--- a/drivers/net/wireless/b43/Kconfig
+++ b/drivers/net/wireless/b43/Kconfig
@@ -28,7 +28,7 @@
config B43_BCMA
bool "Support for BCMA bus"
- depends on B43 && BCMA && BROKEN
+ depends on B43 && BCMA
default y
config B43_SSB
diff --git a/drivers/net/wireless/b43/bus.c b/drivers/net/wireless/b43/bus.c
index 64c3f65..05f6c7b 100644
--- a/drivers/net/wireless/b43/bus.c
+++ b/drivers/net/wireless/b43/bus.c
@@ -244,10 +244,12 @@
#ifdef CONFIG_B43_BCMA
case B43_BUS_BCMA:
bcma_set_drvdata(dev->bdev, wldev);
+ break;
#endif
#ifdef CONFIG_B43_SSB
case B43_BUS_SSB:
ssb_set_drvdata(dev->sdev, wldev);
+ break;
#endif
}
}
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 032d466..26f1ab8 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -5350,6 +5350,7 @@
{
struct b43_wl *wl = ssb_get_devtypedata(sdev);
struct b43_wldev *wldev = ssb_get_drvdata(sdev);
+ struct b43_bus_dev *dev = wldev->dev;
/* We must cancel any work here before unregistering from ieee80211,
* as the ieee80211 unreg will destroy the workqueue. */
@@ -5365,14 +5366,14 @@
ieee80211_unregister_hw(wl->hw);
}
- b43_one_core_detach(wldev->dev);
+ b43_one_core_detach(dev);
if (list_empty(&wl->devlist)) {
b43_leds_unregister(wl);
/* Last core on the chip unregistered.
* We can destroy common struct b43_wl.
*/
- b43_wireless_exit(wldev->dev, wl);
+ b43_wireless_exit(dev, wl);
}
}
diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c
index d508482..89a116f 100644
--- a/drivers/net/wireless/hostap/hostap_main.c
+++ b/drivers/net/wireless/hostap/hostap_main.c
@@ -855,6 +855,7 @@
iface = netdev_priv(dev);
ether_setup(dev);
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
/* kernel callbacks */
if (iface) {
diff --git a/drivers/nfc/pn533.c b/drivers/nfc/pn533.c
index 0372315..c77e054 100644
--- a/drivers/nfc/pn533.c
+++ b/drivers/nfc/pn533.c
@@ -1596,7 +1596,7 @@
usb_free_urb(dev->out_urb);
kfree(dev);
- nfc_dev_info(&dev->interface->dev, "NXP PN533 NFC device disconnected");
+ nfc_dev_info(&interface->dev, "NXP PN533 NFC device disconnected");
}
static struct usb_driver pn533_driver = {
diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c
index 77cb2a1..81525ae 100644
--- a/drivers/pci/pci-label.c
+++ b/drivers/pci/pci-label.c
@@ -55,7 +55,7 @@
SMBIOS_ATTR_INSTANCE_SHOW,
};
-static mode_t
+static size_t
find_smbios_instance_string(struct pci_dev *pdev, char *buf,
enum smbios_attr_enum attribute)
{
diff --git a/drivers/scsi/be2iscsi/be_main.h b/drivers/scsi/be2iscsi/be_main.h
index 081c171..5ce5170 100644
--- a/drivers/scsi/be2iscsi/be_main.h
+++ b/drivers/scsi/be2iscsi/be_main.h
@@ -397,7 +397,7 @@
};
struct be_cmd_bhs {
- struct iscsi_cmd iscsi_hdr;
+ struct iscsi_scsi_req iscsi_hdr;
unsigned char pad1[16];
struct pdu_data_out iscsi_data_pdu;
unsigned char pad2[BE_SENSE_INFO_SIZE -
@@ -428,7 +428,7 @@
};
struct be_status_bhs {
- struct iscsi_cmd iscsi_hdr;
+ struct iscsi_scsi_req iscsi_hdr;
unsigned char pad1[16];
/**
* The plus 2 below is to hold the sense info length that gets
diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
index 030a96c..9ae80cd 100644
--- a/drivers/scsi/bnx2i/bnx2i_hwi.c
+++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
@@ -332,11 +332,11 @@
{
struct bnx2i_cmd *bnx2i_cmd;
struct bnx2i_login_request *login_wqe;
- struct iscsi_login *login_hdr;
+ struct iscsi_login_req *login_hdr;
u32 dword;
bnx2i_cmd = (struct bnx2i_cmd *)task->dd_data;
- login_hdr = (struct iscsi_login *)task->hdr;
+ login_hdr = (struct iscsi_login_req *)task->hdr;
login_wqe = (struct bnx2i_login_request *)
bnx2i_conn->ep->qp.sq_prod_qe;
@@ -1349,7 +1349,7 @@
struct bnx2i_cmd_response *resp_cqe;
struct bnx2i_cmd *bnx2i_cmd;
struct iscsi_task *task;
- struct iscsi_cmd_rsp *hdr;
+ struct iscsi_scsi_rsp *hdr;
u32 datalen = 0;
resp_cqe = (struct bnx2i_cmd_response *)cqe;
@@ -1376,7 +1376,7 @@
}
bnx2i_iscsi_unmap_sg_list(bnx2i_cmd);
- hdr = (struct iscsi_cmd_rsp *)task->hdr;
+ hdr = (struct iscsi_scsi_rsp *)task->hdr;
resp_cqe = (struct bnx2i_cmd_response *)cqe;
hdr->opcode = resp_cqe->op_code;
hdr->max_cmdsn = cpu_to_be32(resp_cqe->max_cmd_sn);
diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c
index 5c55a75..cffd4d7 100644
--- a/drivers/scsi/bnx2i/bnx2i_iscsi.c
+++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c
@@ -1213,7 +1213,7 @@
struct bnx2i_conn *bnx2i_conn = conn->dd_data;
struct scsi_cmnd *sc = task->sc;
struct bnx2i_cmd *cmd = task->dd_data;
- struct iscsi_cmd *hdr = (struct iscsi_cmd *) task->hdr;
+ struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
if (atomic_read(&bnx2i_conn->ep->num_active_cmds) + 1 >
hba->max_sqes)
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index d7a4120..256a999 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -84,22 +84,6 @@
__func__, ##arg); \
} while (0);
-/* Serial Number Arithmetic, 32 bits, less than, RFC1982 */
-#define SNA32_CHECK 2147483648UL
-
-static int iscsi_sna_lt(u32 n1, u32 n2)
-{
- return n1 != n2 && ((n1 < n2 && (n2 - n1 < SNA32_CHECK)) ||
- (n1 > n2 && (n2 - n1 < SNA32_CHECK)));
-}
-
-/* Serial Number Arithmetic, 32 bits, less than, RFC1982 */
-static int iscsi_sna_lte(u32 n1, u32 n2)
-{
- return n1 == n2 || ((n1 < n2 && (n2 - n1 < SNA32_CHECK)) ||
- (n1 > n2 && (n2 - n1 < SNA32_CHECK)));
-}
-
inline void iscsi_conn_queue_work(struct iscsi_conn *conn)
{
struct Scsi_Host *shost = conn->session->host;
@@ -360,7 +344,7 @@
struct iscsi_conn *conn = task->conn;
struct iscsi_session *session = conn->session;
struct scsi_cmnd *sc = task->sc;
- struct iscsi_cmd *hdr;
+ struct iscsi_scsi_req *hdr;
unsigned hdrlength, cmd_len;
itt_t itt;
int rc;
@@ -374,7 +358,7 @@
if (rc)
return rc;
}
- hdr = (struct iscsi_cmd *) task->hdr;
+ hdr = (struct iscsi_scsi_req *)task->hdr;
itt = hdr->itt;
memset(hdr, 0, sizeof(*hdr));
@@ -830,7 +814,7 @@
struct iscsi_task *task, char *data,
int datalen)
{
- struct iscsi_cmd_rsp *rhdr = (struct iscsi_cmd_rsp *)hdr;
+ struct iscsi_scsi_rsp *rhdr = (struct iscsi_scsi_rsp *)hdr;
struct iscsi_session *session = conn->session;
struct scsi_cmnd *sc = task->sc;
diff --git a/drivers/staging/ath6kl/os/linux/ar6000_drv.c b/drivers/staging/ath6kl/os/linux/ar6000_drv.c
index 499b7a9..32ee39a 100644
--- a/drivers/staging/ath6kl/os/linux/ar6000_drv.c
+++ b/drivers/staging/ath6kl/os/linux/ar6000_drv.c
@@ -6205,6 +6205,7 @@
ether_setup(dev);
init_netdev(dev, ap_ifname);
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
if (register_netdev(dev)) {
AR_DEBUG_PRINTF(ATH_DEBUG_ERR,("ar6000_create_ap_interface: register_netdev failed\n"));
diff --git a/drivers/staging/brcm80211/brcmsmac/mac80211_if.h b/drivers/staging/brcm80211/brcmsmac/mac80211_if.h
index 5711e7c..40e3d37 100644
--- a/drivers/staging/brcm80211/brcmsmac/mac80211_if.h
+++ b/drivers/staging/brcm80211/brcmsmac/mac80211_if.h
@@ -24,8 +24,6 @@
#define BRCMS_SET_SHORTSLOT_OVERRIDE 146
-#include <linux/interrupt.h>
-
/* BMAC Note: High-only driver is no longer working in softirq context as it needs to block and
* sleep so perimeter lock has to be a semaphore instead of spinlock. This requires timers to be
* submitted to workqueue instead of being on kernel timer
diff --git a/drivers/target/Kconfig b/drivers/target/Kconfig
index 5cb0f0e..b28794b 100644
--- a/drivers/target/Kconfig
+++ b/drivers/target/Kconfig
@@ -31,5 +31,6 @@
source "drivers/target/loopback/Kconfig"
source "drivers/target/tcm_fc/Kconfig"
+source "drivers/target/iscsi/Kconfig"
endif
diff --git a/drivers/target/Makefile b/drivers/target/Makefile
index 21df808..1060c7b 100644
--- a/drivers/target/Makefile
+++ b/drivers/target/Makefile
@@ -24,5 +24,5 @@
# Fabric modules
obj-$(CONFIG_LOOPBACK_TARGET) += loopback/
-
obj-$(CONFIG_TCM_FC) += tcm_fc/
+obj-$(CONFIG_ISCSI_TARGET) += iscsi/
diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig
new file mode 100644
index 0000000..564ff4e
--- /dev/null
+++ b/drivers/target/iscsi/Kconfig
@@ -0,0 +1,8 @@
+config ISCSI_TARGET
+ tristate "Linux-iSCSI.org iSCSI Target Mode Stack"
+ select CRYPTO
+ select CRYPTO_CRC32C
+ select CRYPTO_CRC32C_INTEL if X86
+ help
+ Say M here to enable the ConfigFS enabled Linux-iSCSI.org iSCSI
+ Target Mode Stack.
diff --git a/drivers/target/iscsi/Makefile b/drivers/target/iscsi/Makefile
new file mode 100644
index 0000000..5b9a2cf
--- /dev/null
+++ b/drivers/target/iscsi/Makefile
@@ -0,0 +1,20 @@
+iscsi_target_mod-y += iscsi_target_parameters.o \
+ iscsi_target_seq_pdu_list.o \
+ iscsi_target_tq.o \
+ iscsi_target_auth.o \
+ iscsi_target_datain_values.o \
+ iscsi_target_device.o \
+ iscsi_target_erl0.o \
+ iscsi_target_erl1.o \
+ iscsi_target_erl2.o \
+ iscsi_target_login.o \
+ iscsi_target_nego.o \
+ iscsi_target_nodeattrib.o \
+ iscsi_target_tmr.o \
+ iscsi_target_tpg.o \
+ iscsi_target_util.o \
+ iscsi_target.o \
+ iscsi_target_configfs.o \
+ iscsi_target_stat.o
+
+obj-$(CONFIG_ISCSI_TARGET) += iscsi_target_mod.o
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
new file mode 100644
index 0000000..14c81c4
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -0,0 +1,4559 @@
+/*******************************************************************************
+ * This file contains main functions related to the iSCSI Target Core Driver.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/string.h>
+#include <linux/kthread.h>
+#include <linux/crypto.h>
+#include <linux/completion.h>
+#include <asm/unaligned.h>
+#include <scsi/scsi_device.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_tmr.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_parameters.h"
+#include "iscsi_target_seq_pdu_list.h"
+#include "iscsi_target_tq.h"
+#include "iscsi_target_configfs.h"
+#include "iscsi_target_datain_values.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target_login.h"
+#include "iscsi_target_tmr.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_stat.h"
+
+static LIST_HEAD(g_tiqn_list);
+static LIST_HEAD(g_np_list);
+static DEFINE_SPINLOCK(tiqn_lock);
+static DEFINE_SPINLOCK(np_lock);
+
+static struct idr tiqn_idr;
+struct idr sess_idr;
+struct mutex auth_id_lock;
+spinlock_t sess_idr_lock;
+
+struct iscsit_global *iscsit_global;
+
+struct kmem_cache *lio_cmd_cache;
+struct kmem_cache *lio_qr_cache;
+struct kmem_cache *lio_dr_cache;
+struct kmem_cache *lio_ooo_cache;
+struct kmem_cache *lio_r2t_cache;
+
+static int iscsit_handle_immediate_data(struct iscsi_cmd *,
+ unsigned char *buf, u32);
+static int iscsit_logout_post_handler(struct iscsi_cmd *, struct iscsi_conn *);
+
+struct iscsi_tiqn *iscsit_get_tiqn_for_login(unsigned char *buf)
+{
+ struct iscsi_tiqn *tiqn = NULL;
+
+ spin_lock(&tiqn_lock);
+ list_for_each_entry(tiqn, &g_tiqn_list, tiqn_list) {
+ if (!strcmp(tiqn->tiqn, buf)) {
+
+ spin_lock(&tiqn->tiqn_state_lock);
+ if (tiqn->tiqn_state == TIQN_STATE_ACTIVE) {
+ tiqn->tiqn_access_count++;
+ spin_unlock(&tiqn->tiqn_state_lock);
+ spin_unlock(&tiqn_lock);
+ return tiqn;
+ }
+ spin_unlock(&tiqn->tiqn_state_lock);
+ }
+ }
+ spin_unlock(&tiqn_lock);
+
+ return NULL;
+}
+
+static int iscsit_set_tiqn_shutdown(struct iscsi_tiqn *tiqn)
+{
+ spin_lock(&tiqn->tiqn_state_lock);
+ if (tiqn->tiqn_state == TIQN_STATE_ACTIVE) {
+ tiqn->tiqn_state = TIQN_STATE_SHUTDOWN;
+ spin_unlock(&tiqn->tiqn_state_lock);
+ return 0;
+ }
+ spin_unlock(&tiqn->tiqn_state_lock);
+
+ return -1;
+}
+
+void iscsit_put_tiqn_for_login(struct iscsi_tiqn *tiqn)
+{
+ spin_lock(&tiqn->tiqn_state_lock);
+ tiqn->tiqn_access_count--;
+ spin_unlock(&tiqn->tiqn_state_lock);
+}
+
+/*
+ * Note that IQN formatting is expected to be done in userspace, and
+ * no explict IQN format checks are done here.
+ */
+struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *buf)
+{
+ struct iscsi_tiqn *tiqn = NULL;
+ int ret;
+
+ if (strlen(buf) > ISCSI_IQN_LEN) {
+ pr_err("Target IQN exceeds %d bytes\n",
+ ISCSI_IQN_LEN);
+ return ERR_PTR(-EINVAL);
+ }
+
+ tiqn = kzalloc(sizeof(struct iscsi_tiqn), GFP_KERNEL);
+ if (!tiqn) {
+ pr_err("Unable to allocate struct iscsi_tiqn\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ sprintf(tiqn->tiqn, "%s", buf);
+ INIT_LIST_HEAD(&tiqn->tiqn_list);
+ INIT_LIST_HEAD(&tiqn->tiqn_tpg_list);
+ spin_lock_init(&tiqn->tiqn_state_lock);
+ spin_lock_init(&tiqn->tiqn_tpg_lock);
+ spin_lock_init(&tiqn->sess_err_stats.lock);
+ spin_lock_init(&tiqn->login_stats.lock);
+ spin_lock_init(&tiqn->logout_stats.lock);
+
+ if (!idr_pre_get(&tiqn_idr, GFP_KERNEL)) {
+ pr_err("idr_pre_get() for tiqn_idr failed\n");
+ kfree(tiqn);
+ return ERR_PTR(-ENOMEM);
+ }
+ tiqn->tiqn_state = TIQN_STATE_ACTIVE;
+
+ spin_lock(&tiqn_lock);
+ ret = idr_get_new(&tiqn_idr, NULL, &tiqn->tiqn_index);
+ if (ret < 0) {
+ pr_err("idr_get_new() failed for tiqn->tiqn_index\n");
+ spin_unlock(&tiqn_lock);
+ kfree(tiqn);
+ return ERR_PTR(ret);
+ }
+ list_add_tail(&tiqn->tiqn_list, &g_tiqn_list);
+ spin_unlock(&tiqn_lock);
+
+ pr_debug("CORE[0] - Added iSCSI Target IQN: %s\n", tiqn->tiqn);
+
+ return tiqn;
+
+}
+
+static void iscsit_wait_for_tiqn(struct iscsi_tiqn *tiqn)
+{
+ /*
+ * Wait for accesses to said struct iscsi_tiqn to end.
+ */
+ spin_lock(&tiqn->tiqn_state_lock);
+ while (tiqn->tiqn_access_count != 0) {
+ spin_unlock(&tiqn->tiqn_state_lock);
+ msleep(10);
+ spin_lock(&tiqn->tiqn_state_lock);
+ }
+ spin_unlock(&tiqn->tiqn_state_lock);
+}
+
+void iscsit_del_tiqn(struct iscsi_tiqn *tiqn)
+{
+ /*
+ * iscsit_set_tiqn_shutdown sets tiqn->tiqn_state = TIQN_STATE_SHUTDOWN
+ * while holding tiqn->tiqn_state_lock. This means that all subsequent
+ * attempts to access this struct iscsi_tiqn will fail from both transport
+ * fabric and control code paths.
+ */
+ if (iscsit_set_tiqn_shutdown(tiqn) < 0) {
+ pr_err("iscsit_set_tiqn_shutdown() failed\n");
+ return;
+ }
+
+ iscsit_wait_for_tiqn(tiqn);
+
+ spin_lock(&tiqn_lock);
+ list_del(&tiqn->tiqn_list);
+ idr_remove(&tiqn_idr, tiqn->tiqn_index);
+ spin_unlock(&tiqn_lock);
+
+ pr_debug("CORE[0] - Deleted iSCSI Target IQN: %s\n",
+ tiqn->tiqn);
+ kfree(tiqn);
+}
+
+int iscsit_access_np(struct iscsi_np *np, struct iscsi_portal_group *tpg)
+{
+ int ret;
+ /*
+ * Determine if the network portal is accepting storage traffic.
+ */
+ spin_lock_bh(&np->np_thread_lock);
+ if (np->np_thread_state != ISCSI_NP_THREAD_ACTIVE) {
+ spin_unlock_bh(&np->np_thread_lock);
+ return -1;
+ }
+ if (np->np_login_tpg) {
+ pr_err("np->np_login_tpg() is not NULL!\n");
+ spin_unlock_bh(&np->np_thread_lock);
+ return -1;
+ }
+ spin_unlock_bh(&np->np_thread_lock);
+ /*
+ * Determine if the portal group is accepting storage traffic.
+ */
+ spin_lock_bh(&tpg->tpg_state_lock);
+ if (tpg->tpg_state != TPG_STATE_ACTIVE) {
+ spin_unlock_bh(&tpg->tpg_state_lock);
+ return -1;
+ }
+ spin_unlock_bh(&tpg->tpg_state_lock);
+
+ /*
+ * Here we serialize access across the TIQN+TPG Tuple.
+ */
+ ret = mutex_lock_interruptible(&tpg->np_login_lock);
+ if ((ret != 0) || signal_pending(current))
+ return -1;
+
+ spin_lock_bh(&np->np_thread_lock);
+ np->np_login_tpg = tpg;
+ spin_unlock_bh(&np->np_thread_lock);
+
+ return 0;
+}
+
+int iscsit_deaccess_np(struct iscsi_np *np, struct iscsi_portal_group *tpg)
+{
+ struct iscsi_tiqn *tiqn = tpg->tpg_tiqn;
+
+ spin_lock_bh(&np->np_thread_lock);
+ np->np_login_tpg = NULL;
+ spin_unlock_bh(&np->np_thread_lock);
+
+ mutex_unlock(&tpg->np_login_lock);
+
+ if (tiqn)
+ iscsit_put_tiqn_for_login(tiqn);
+
+ return 0;
+}
+
+static struct iscsi_np *iscsit_get_np(
+ struct __kernel_sockaddr_storage *sockaddr,
+ int network_transport)
+{
+ struct sockaddr_in *sock_in, *sock_in_e;
+ struct sockaddr_in6 *sock_in6, *sock_in6_e;
+ struct iscsi_np *np;
+ int ip_match = 0;
+ u16 port;
+
+ spin_lock_bh(&np_lock);
+ list_for_each_entry(np, &g_np_list, np_list) {
+ spin_lock(&np->np_thread_lock);
+ if (np->np_thread_state != ISCSI_NP_THREAD_ACTIVE) {
+ spin_unlock(&np->np_thread_lock);
+ continue;
+ }
+
+ if (sockaddr->ss_family == AF_INET6) {
+ sock_in6 = (struct sockaddr_in6 *)sockaddr;
+ sock_in6_e = (struct sockaddr_in6 *)&np->np_sockaddr;
+
+ if (!memcmp((void *)&sock_in6->sin6_addr.in6_u,
+ (void *)&sock_in6_e->sin6_addr.in6_u,
+ sizeof(struct in6_addr)))
+ ip_match = 1;
+
+ port = ntohs(sock_in6->sin6_port);
+ } else {
+ sock_in = (struct sockaddr_in *)sockaddr;
+ sock_in_e = (struct sockaddr_in *)&np->np_sockaddr;
+
+ if (sock_in->sin_addr.s_addr ==
+ sock_in_e->sin_addr.s_addr)
+ ip_match = 1;
+
+ port = ntohs(sock_in->sin_port);
+ }
+
+ if ((ip_match == 1) && (np->np_port == port) &&
+ (np->np_network_transport == network_transport)) {
+ /*
+ * Increment the np_exports reference count now to
+ * prevent iscsit_del_np() below from being called
+ * while iscsi_tpg_add_network_portal() is called.
+ */
+ np->np_exports++;
+ spin_unlock(&np->np_thread_lock);
+ spin_unlock_bh(&np_lock);
+ return np;
+ }
+ spin_unlock(&np->np_thread_lock);
+ }
+ spin_unlock_bh(&np_lock);
+
+ return NULL;
+}
+
+struct iscsi_np *iscsit_add_np(
+ struct __kernel_sockaddr_storage *sockaddr,
+ char *ip_str,
+ int network_transport)
+{
+ struct sockaddr_in *sock_in;
+ struct sockaddr_in6 *sock_in6;
+ struct iscsi_np *np;
+ int ret;
+ /*
+ * Locate the existing struct iscsi_np if already active..
+ */
+ np = iscsit_get_np(sockaddr, network_transport);
+ if (np)
+ return np;
+
+ np = kzalloc(sizeof(struct iscsi_np), GFP_KERNEL);
+ if (!np) {
+ pr_err("Unable to allocate memory for struct iscsi_np\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ np->np_flags |= NPF_IP_NETWORK;
+ if (sockaddr->ss_family == AF_INET6) {
+ sock_in6 = (struct sockaddr_in6 *)sockaddr;
+ snprintf(np->np_ip, IPV6_ADDRESS_SPACE, "%s", ip_str);
+ np->np_port = ntohs(sock_in6->sin6_port);
+ } else {
+ sock_in = (struct sockaddr_in *)sockaddr;
+ sprintf(np->np_ip, "%s", ip_str);
+ np->np_port = ntohs(sock_in->sin_port);
+ }
+
+ np->np_network_transport = network_transport;
+ spin_lock_init(&np->np_thread_lock);
+ init_completion(&np->np_restart_comp);
+ INIT_LIST_HEAD(&np->np_list);
+
+ ret = iscsi_target_setup_login_socket(np, sockaddr);
+ if (ret != 0) {
+ kfree(np);
+ return ERR_PTR(ret);
+ }
+
+ np->np_thread = kthread_run(iscsi_target_login_thread, np, "iscsi_np");
+ if (IS_ERR(np->np_thread)) {
+ pr_err("Unable to create kthread: iscsi_np\n");
+ ret = PTR_ERR(np->np_thread);
+ kfree(np);
+ return ERR_PTR(ret);
+ }
+ /*
+ * Increment the np_exports reference count now to prevent
+ * iscsit_del_np() below from being run while a new call to
+ * iscsi_tpg_add_network_portal() for a matching iscsi_np is
+ * active. We don't need to hold np->np_thread_lock at this
+ * point because iscsi_np has not been added to g_np_list yet.
+ */
+ np->np_exports = 1;
+
+ spin_lock_bh(&np_lock);
+ list_add_tail(&np->np_list, &g_np_list);
+ spin_unlock_bh(&np_lock);
+
+ pr_debug("CORE[0] - Added Network Portal: %s:%hu on %s\n",
+ np->np_ip, np->np_port, (np->np_network_transport == ISCSI_TCP) ?
+ "TCP" : "SCTP");
+
+ return np;
+}
+
+int iscsit_reset_np_thread(
+ struct iscsi_np *np,
+ struct iscsi_tpg_np *tpg_np,
+ struct iscsi_portal_group *tpg)
+{
+ spin_lock_bh(&np->np_thread_lock);
+ if (tpg && tpg_np) {
+ /*
+ * The reset operation need only be performed when the
+ * passed struct iscsi_portal_group has a login in progress
+ * to one of the network portals.
+ */
+ if (tpg_np->tpg_np->np_login_tpg != tpg) {
+ spin_unlock_bh(&np->np_thread_lock);
+ return 0;
+ }
+ }
+ if (np->np_thread_state == ISCSI_NP_THREAD_INACTIVE) {
+ spin_unlock_bh(&np->np_thread_lock);
+ return 0;
+ }
+ np->np_thread_state = ISCSI_NP_THREAD_RESET;
+
+ if (np->np_thread) {
+ spin_unlock_bh(&np->np_thread_lock);
+ send_sig(SIGINT, np->np_thread, 1);
+ wait_for_completion(&np->np_restart_comp);
+ spin_lock_bh(&np->np_thread_lock);
+ }
+ spin_unlock_bh(&np->np_thread_lock);
+
+ return 0;
+}
+
+int iscsit_del_np_comm(struct iscsi_np *np)
+{
+ if (!np->np_socket)
+ return 0;
+
+ /*
+ * Some network transports allocate their own struct sock->file,
+ * see if we need to free any additional allocated resources.
+ */
+ if (np->np_flags & NPF_SCTP_STRUCT_FILE) {
+ kfree(np->np_socket->file);
+ np->np_socket->file = NULL;
+ }
+
+ sock_release(np->np_socket);
+ return 0;
+}
+
+int iscsit_del_np(struct iscsi_np *np)
+{
+ spin_lock_bh(&np->np_thread_lock);
+ np->np_exports--;
+ if (np->np_exports) {
+ spin_unlock_bh(&np->np_thread_lock);
+ return 0;
+ }
+ np->np_thread_state = ISCSI_NP_THREAD_SHUTDOWN;
+ spin_unlock_bh(&np->np_thread_lock);
+
+ if (np->np_thread) {
+ /*
+ * We need to send the signal to wakeup Linux/Net
+ * which may be sleeping in sock_accept()..
+ */
+ send_sig(SIGINT, np->np_thread, 1);
+ kthread_stop(np->np_thread);
+ }
+ iscsit_del_np_comm(np);
+
+ spin_lock_bh(&np_lock);
+ list_del(&np->np_list);
+ spin_unlock_bh(&np_lock);
+
+ pr_debug("CORE[0] - Removed Network Portal: %s:%hu on %s\n",
+ np->np_ip, np->np_port, (np->np_network_transport == ISCSI_TCP) ?
+ "TCP" : "SCTP");
+
+ kfree(np);
+ return 0;
+}
+
+static int __init iscsi_target_init_module(void)
+{
+ int ret = 0;
+
+ pr_debug("iSCSI-Target "ISCSIT_VERSION"\n");
+
+ iscsit_global = kzalloc(sizeof(struct iscsit_global), GFP_KERNEL);
+ if (!iscsit_global) {
+ pr_err("Unable to allocate memory for iscsit_global\n");
+ return -1;
+ }
+ mutex_init(&auth_id_lock);
+ spin_lock_init(&sess_idr_lock);
+ idr_init(&tiqn_idr);
+ idr_init(&sess_idr);
+
+ ret = iscsi_target_register_configfs();
+ if (ret < 0)
+ goto out;
+
+ ret = iscsi_thread_set_init();
+ if (ret < 0)
+ goto configfs_out;
+
+ if (iscsi_allocate_thread_sets(TARGET_THREAD_SET_COUNT) !=
+ TARGET_THREAD_SET_COUNT) {
+ pr_err("iscsi_allocate_thread_sets() returned"
+ " unexpected value!\n");
+ goto ts_out1;
+ }
+
+ lio_cmd_cache = kmem_cache_create("lio_cmd_cache",
+ sizeof(struct iscsi_cmd), __alignof__(struct iscsi_cmd),
+ 0, NULL);
+ if (!lio_cmd_cache) {
+ pr_err("Unable to kmem_cache_create() for"
+ " lio_cmd_cache\n");
+ goto ts_out2;
+ }
+
+ lio_qr_cache = kmem_cache_create("lio_qr_cache",
+ sizeof(struct iscsi_queue_req),
+ __alignof__(struct iscsi_queue_req), 0, NULL);
+ if (!lio_qr_cache) {
+ pr_err("nable to kmem_cache_create() for"
+ " lio_qr_cache\n");
+ goto cmd_out;
+ }
+
+ lio_dr_cache = kmem_cache_create("lio_dr_cache",
+ sizeof(struct iscsi_datain_req),
+ __alignof__(struct iscsi_datain_req), 0, NULL);
+ if (!lio_dr_cache) {
+ pr_err("Unable to kmem_cache_create() for"
+ " lio_dr_cache\n");
+ goto qr_out;
+ }
+
+ lio_ooo_cache = kmem_cache_create("lio_ooo_cache",
+ sizeof(struct iscsi_ooo_cmdsn),
+ __alignof__(struct iscsi_ooo_cmdsn), 0, NULL);
+ if (!lio_ooo_cache) {
+ pr_err("Unable to kmem_cache_create() for"
+ " lio_ooo_cache\n");
+ goto dr_out;
+ }
+
+ lio_r2t_cache = kmem_cache_create("lio_r2t_cache",
+ sizeof(struct iscsi_r2t), __alignof__(struct iscsi_r2t),
+ 0, NULL);
+ if (!lio_r2t_cache) {
+ pr_err("Unable to kmem_cache_create() for"
+ " lio_r2t_cache\n");
+ goto ooo_out;
+ }
+
+ if (iscsit_load_discovery_tpg() < 0)
+ goto r2t_out;
+
+ return ret;
+r2t_out:
+ kmem_cache_destroy(lio_r2t_cache);
+ooo_out:
+ kmem_cache_destroy(lio_ooo_cache);
+dr_out:
+ kmem_cache_destroy(lio_dr_cache);
+qr_out:
+ kmem_cache_destroy(lio_qr_cache);
+cmd_out:
+ kmem_cache_destroy(lio_cmd_cache);
+ts_out2:
+ iscsi_deallocate_thread_sets();
+ts_out1:
+ iscsi_thread_set_free();
+configfs_out:
+ iscsi_target_deregister_configfs();
+out:
+ kfree(iscsit_global);
+ return -ENOMEM;
+}
+
+static void __exit iscsi_target_cleanup_module(void)
+{
+ iscsi_deallocate_thread_sets();
+ iscsi_thread_set_free();
+ iscsit_release_discovery_tpg();
+ kmem_cache_destroy(lio_cmd_cache);
+ kmem_cache_destroy(lio_qr_cache);
+ kmem_cache_destroy(lio_dr_cache);
+ kmem_cache_destroy(lio_ooo_cache);
+ kmem_cache_destroy(lio_r2t_cache);
+
+ iscsi_target_deregister_configfs();
+
+ kfree(iscsit_global);
+}
+
+int iscsit_add_reject(
+ u8 reason,
+ int fail_conn,
+ unsigned char *buf,
+ struct iscsi_conn *conn)
+{
+ struct iscsi_cmd *cmd;
+ struct iscsi_reject *hdr;
+ int ret;
+
+ cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+ if (!cmd)
+ return -1;
+
+ cmd->iscsi_opcode = ISCSI_OP_REJECT;
+ if (fail_conn)
+ cmd->cmd_flags |= ICF_REJECT_FAIL_CONN;
+
+ hdr = (struct iscsi_reject *) cmd->pdu;
+ hdr->reason = reason;
+
+ cmd->buf_ptr = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL);
+ if (!cmd->buf_ptr) {
+ pr_err("Unable to allocate memory for cmd->buf_ptr\n");
+ iscsit_release_cmd(cmd);
+ return -1;
+ }
+ memcpy(cmd->buf_ptr, buf, ISCSI_HDR_LEN);
+
+ spin_lock_bh(&conn->cmd_lock);
+ list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+ spin_unlock_bh(&conn->cmd_lock);
+
+ cmd->i_state = ISTATE_SEND_REJECT;
+ iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+
+ ret = wait_for_completion_interruptible(&cmd->reject_comp);
+ if (ret != 0)
+ return -1;
+
+ return (!fail_conn) ? 0 : -1;
+}
+
+int iscsit_add_reject_from_cmd(
+ u8 reason,
+ int fail_conn,
+ int add_to_conn,
+ unsigned char *buf,
+ struct iscsi_cmd *cmd)
+{
+ struct iscsi_conn *conn;
+ struct iscsi_reject *hdr;
+ int ret;
+
+ if (!cmd->conn) {
+ pr_err("cmd->conn is NULL for ITT: 0x%08x\n",
+ cmd->init_task_tag);
+ return -1;
+ }
+ conn = cmd->conn;
+
+ cmd->iscsi_opcode = ISCSI_OP_REJECT;
+ if (fail_conn)
+ cmd->cmd_flags |= ICF_REJECT_FAIL_CONN;
+
+ hdr = (struct iscsi_reject *) cmd->pdu;
+ hdr->reason = reason;
+
+ cmd->buf_ptr = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL);
+ if (!cmd->buf_ptr) {
+ pr_err("Unable to allocate memory for cmd->buf_ptr\n");
+ iscsit_release_cmd(cmd);
+ return -1;
+ }
+ memcpy(cmd->buf_ptr, buf, ISCSI_HDR_LEN);
+
+ if (add_to_conn) {
+ spin_lock_bh(&conn->cmd_lock);
+ list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+ spin_unlock_bh(&conn->cmd_lock);
+ }
+
+ cmd->i_state = ISTATE_SEND_REJECT;
+ iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+
+ ret = wait_for_completion_interruptible(&cmd->reject_comp);
+ if (ret != 0)
+ return -1;
+
+ return (!fail_conn) ? 0 : -1;
+}
+
+/*
+ * Map some portion of the allocated scatterlist to an iovec, suitable for
+ * kernel sockets to copy data in/out. This handles both pages and slab-allocated
+ * buffers, since we have been tricky and mapped t_mem_sg to the buffer in
+ * either case (see iscsit_alloc_buffs)
+ */
+static int iscsit_map_iovec(
+ struct iscsi_cmd *cmd,
+ struct kvec *iov,
+ u32 data_offset,
+ u32 data_length)
+{
+ u32 i = 0;
+ struct scatterlist *sg;
+ unsigned int page_off;
+
+ /*
+ * We have a private mapping of the allocated pages in t_mem_sg.
+ * At this point, we also know each contains a page.
+ */
+ sg = &cmd->t_mem_sg[data_offset / PAGE_SIZE];
+ page_off = (data_offset % PAGE_SIZE);
+
+ cmd->first_data_sg = sg;
+ cmd->first_data_sg_off = page_off;
+
+ while (data_length) {
+ u32 cur_len = min_t(u32, data_length, sg->length - page_off);
+
+ iov[i].iov_base = kmap(sg_page(sg)) + sg->offset + page_off;
+ iov[i].iov_len = cur_len;
+
+ data_length -= cur_len;
+ page_off = 0;
+ sg = sg_next(sg);
+ i++;
+ }
+
+ cmd->kmapped_nents = i;
+
+ return i;
+}
+
+static void iscsit_unmap_iovec(struct iscsi_cmd *cmd)
+{
+ u32 i;
+ struct scatterlist *sg;
+
+ sg = cmd->first_data_sg;
+
+ for (i = 0; i < cmd->kmapped_nents; i++)
+ kunmap(sg_page(&sg[i]));
+}
+
+static void iscsit_ack_from_expstatsn(struct iscsi_conn *conn, u32 exp_statsn)
+{
+ struct iscsi_cmd *cmd;
+
+ conn->exp_statsn = exp_statsn;
+
+ spin_lock_bh(&conn->cmd_lock);
+ list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) {
+ spin_lock(&cmd->istate_lock);
+ if ((cmd->i_state == ISTATE_SENT_STATUS) &&
+ (cmd->stat_sn < exp_statsn)) {
+ cmd->i_state = ISTATE_REMOVE;
+ spin_unlock(&cmd->istate_lock);
+ iscsit_add_cmd_to_immediate_queue(cmd, conn,
+ cmd->i_state);
+ continue;
+ }
+ spin_unlock(&cmd->istate_lock);
+ }
+ spin_unlock_bh(&conn->cmd_lock);
+}
+
+static int iscsit_allocate_iovecs(struct iscsi_cmd *cmd)
+{
+ u32 iov_count = (cmd->se_cmd.t_data_nents == 0) ? 1 :
+ cmd->se_cmd.t_data_nents;
+
+ iov_count += TRANSPORT_IOV_DATA_BUFFER;
+
+ cmd->iov_data = kzalloc(iov_count * sizeof(struct kvec), GFP_KERNEL);
+ if (!cmd->iov_data) {
+ pr_err("Unable to allocate cmd->iov_data\n");
+ return -ENOMEM;
+ }
+
+ cmd->orig_iov_data_count = iov_count;
+ return 0;
+}
+
+static int iscsit_alloc_buffs(struct iscsi_cmd *cmd)
+{
+ struct scatterlist *sgl;
+ u32 length = cmd->se_cmd.data_length;
+ int nents = DIV_ROUND_UP(length, PAGE_SIZE);
+ int i = 0, ret;
+ /*
+ * If no SCSI payload is present, allocate the default iovecs used for
+ * iSCSI PDU Header
+ */
+ if (!length)
+ return iscsit_allocate_iovecs(cmd);
+
+ sgl = kzalloc(sizeof(*sgl) * nents, GFP_KERNEL);
+ if (!sgl)
+ return -ENOMEM;
+
+ sg_init_table(sgl, nents);
+
+ while (length) {
+ int buf_size = min_t(int, length, PAGE_SIZE);
+ struct page *page;
+
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page)
+ goto page_alloc_failed;
+
+ sg_set_page(&sgl[i], page, buf_size, 0);
+
+ length -= buf_size;
+ i++;
+ }
+
+ cmd->t_mem_sg = sgl;
+ cmd->t_mem_sg_nents = nents;
+
+ /* BIDI ops not supported */
+
+ /* Tell the core about our preallocated memory */
+ transport_generic_map_mem_to_cmd(&cmd->se_cmd, sgl, nents, NULL, 0);
+ /*
+ * Allocate iovecs for SCSI payload after transport_generic_map_mem_to_cmd
+ * so that cmd->se_cmd.t_tasks_se_num has been set.
+ */
+ ret = iscsit_allocate_iovecs(cmd);
+ if (ret < 0)
+ goto page_alloc_failed;
+
+ return 0;
+
+page_alloc_failed:
+ while (i >= 0) {
+ __free_page(sg_page(&sgl[i]));
+ i--;
+ }
+ kfree(cmd->t_mem_sg);
+ cmd->t_mem_sg = NULL;
+ return -ENOMEM;
+}
+
+static int iscsit_handle_scsi_cmd(
+ struct iscsi_conn *conn,
+ unsigned char *buf)
+{
+ int data_direction, cmdsn_ret = 0, immed_ret, ret, transport_ret;
+ int dump_immediate_data = 0, send_check_condition = 0, payload_length;
+ struct iscsi_cmd *cmd = NULL;
+ struct iscsi_scsi_req *hdr;
+
+ spin_lock_bh(&conn->sess->session_stats_lock);
+ conn->sess->cmd_pdus++;
+ if (conn->sess->se_sess->se_node_acl) {
+ spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock);
+ conn->sess->se_sess->se_node_acl->num_cmds++;
+ spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock);
+ }
+ spin_unlock_bh(&conn->sess->session_stats_lock);
+
+ hdr = (struct iscsi_scsi_req *) buf;
+ payload_length = ntoh24(hdr->dlength);
+ hdr->itt = be32_to_cpu(hdr->itt);
+ hdr->data_length = be32_to_cpu(hdr->data_length);
+ hdr->cmdsn = be32_to_cpu(hdr->cmdsn);
+ hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn);
+
+ /* FIXME; Add checks for AdditionalHeaderSegment */
+
+ if (!(hdr->flags & ISCSI_FLAG_CMD_WRITE) &&
+ !(hdr->flags & ISCSI_FLAG_CMD_FINAL)) {
+ pr_err("ISCSI_FLAG_CMD_WRITE & ISCSI_FLAG_CMD_FINAL"
+ " not set. Bad iSCSI Initiator.\n");
+ return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1,
+ buf, conn);
+ }
+
+ if (((hdr->flags & ISCSI_FLAG_CMD_READ) ||
+ (hdr->flags & ISCSI_FLAG_CMD_WRITE)) && !hdr->data_length) {
+ /*
+ * Vmware ESX v3.0 uses a modified Cisco Initiator (v3.4.2)
+ * that adds support for RESERVE/RELEASE. There is a bug
+ * add with this new functionality that sets R/W bits when
+ * neither CDB carries any READ or WRITE datapayloads.
+ */
+ if ((hdr->cdb[0] == 0x16) || (hdr->cdb[0] == 0x17)) {
+ hdr->flags &= ~ISCSI_FLAG_CMD_READ;
+ hdr->flags &= ~ISCSI_FLAG_CMD_WRITE;
+ goto done;
+ }
+
+ pr_err("ISCSI_FLAG_CMD_READ or ISCSI_FLAG_CMD_WRITE"
+ " set when Expected Data Transfer Length is 0 for"
+ " CDB: 0x%02x. Bad iSCSI Initiator.\n", hdr->cdb[0]);
+ return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1,
+ buf, conn);
+ }
+done:
+
+ if (!(hdr->flags & ISCSI_FLAG_CMD_READ) &&
+ !(hdr->flags & ISCSI_FLAG_CMD_WRITE) && (hdr->data_length != 0)) {
+ pr_err("ISCSI_FLAG_CMD_READ and/or ISCSI_FLAG_CMD_WRITE"
+ " MUST be set if Expected Data Transfer Length is not 0."
+ " Bad iSCSI Initiator\n");
+ return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1,
+ buf, conn);
+ }
+
+ if ((hdr->flags & ISCSI_FLAG_CMD_READ) &&
+ (hdr->flags & ISCSI_FLAG_CMD_WRITE)) {
+ pr_err("Bidirectional operations not supported!\n");
+ return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1,
+ buf, conn);
+ }
+
+ if (hdr->opcode & ISCSI_OP_IMMEDIATE) {
+ pr_err("Illegally set Immediate Bit in iSCSI Initiator"
+ " Scsi Command PDU.\n");
+ return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1,
+ buf, conn);
+ }
+
+ if (payload_length && !conn->sess->sess_ops->ImmediateData) {
+ pr_err("ImmediateData=No but DataSegmentLength=%u,"
+ " protocol error.\n", payload_length);
+ return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+ buf, conn);
+ }
+
+ if ((hdr->data_length == payload_length) &&
+ (!(hdr->flags & ISCSI_FLAG_CMD_FINAL))) {
+ pr_err("Expected Data Transfer Length and Length of"
+ " Immediate Data are the same, but ISCSI_FLAG_CMD_FINAL"
+ " bit is not set protocol error\n");
+ return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+ buf, conn);
+ }
+
+ if (payload_length > hdr->data_length) {
+ pr_err("DataSegmentLength: %u is greater than"
+ " EDTL: %u, protocol error.\n", payload_length,
+ hdr->data_length);
+ return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+ buf, conn);
+ }
+
+ if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) {
+ pr_err("DataSegmentLength: %u is greater than"
+ " MaxRecvDataSegmentLength: %u, protocol error.\n",
+ payload_length, conn->conn_ops->MaxRecvDataSegmentLength);
+ return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+ buf, conn);
+ }
+
+ if (payload_length > conn->sess->sess_ops->FirstBurstLength) {
+ pr_err("DataSegmentLength: %u is greater than"
+ " FirstBurstLength: %u, protocol error.\n",
+ payload_length, conn->sess->sess_ops->FirstBurstLength);
+ return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1,
+ buf, conn);
+ }
+
+ data_direction = (hdr->flags & ISCSI_FLAG_CMD_WRITE) ? DMA_TO_DEVICE :
+ (hdr->flags & ISCSI_FLAG_CMD_READ) ? DMA_FROM_DEVICE :
+ DMA_NONE;
+
+ cmd = iscsit_allocate_se_cmd(conn, hdr->data_length, data_direction,
+ (hdr->flags & ISCSI_FLAG_CMD_ATTR_MASK));
+ if (!cmd)
+ return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, 1,
+ buf, conn);
+
+ pr_debug("Got SCSI Command, ITT: 0x%08x, CmdSN: 0x%08x,"
+ " ExpXferLen: %u, Length: %u, CID: %hu\n", hdr->itt,
+ hdr->cmdsn, hdr->data_length, payload_length, conn->cid);
+
+ cmd->iscsi_opcode = ISCSI_OP_SCSI_CMD;
+ cmd->i_state = ISTATE_NEW_CMD;
+ cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0);
+ cmd->immediate_data = (payload_length) ? 1 : 0;
+ cmd->unsolicited_data = ((!(hdr->flags & ISCSI_FLAG_CMD_FINAL) &&
+ (hdr->flags & ISCSI_FLAG_CMD_WRITE)) ? 1 : 0);
+ if (cmd->unsolicited_data)
+ cmd->cmd_flags |= ICF_NON_IMMEDIATE_UNSOLICITED_DATA;
+
+ conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt;
+ if (hdr->flags & ISCSI_FLAG_CMD_READ) {
+ spin_lock_bh(&conn->sess->ttt_lock);
+ cmd->targ_xfer_tag = conn->sess->targ_xfer_tag++;
+ if (cmd->targ_xfer_tag == 0xFFFFFFFF)
+ cmd->targ_xfer_tag = conn->sess->targ_xfer_tag++;
+ spin_unlock_bh(&conn->sess->ttt_lock);
+ } else if (hdr->flags & ISCSI_FLAG_CMD_WRITE)
+ cmd->targ_xfer_tag = 0xFFFFFFFF;
+ cmd->cmd_sn = hdr->cmdsn;
+ cmd->exp_stat_sn = hdr->exp_statsn;
+ cmd->first_burst_len = payload_length;
+
+ if (cmd->data_direction == DMA_FROM_DEVICE) {
+ struct iscsi_datain_req *dr;
+
+ dr = iscsit_allocate_datain_req();
+ if (!dr)
+ return iscsit_add_reject_from_cmd(
+ ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+ 1, 1, buf, cmd);
+
+ iscsit_attach_datain_req(cmd, dr);
+ }
+
+ /*
+ * The CDB is going to an se_device_t.
+ */
+ ret = iscsit_get_lun_for_cmd(cmd, hdr->cdb,
+ get_unaligned_le64(&hdr->lun));
+ if (ret < 0) {
+ if (cmd->se_cmd.scsi_sense_reason == TCM_NON_EXISTENT_LUN) {
+ pr_debug("Responding to non-acl'ed,"
+ " non-existent or non-exported iSCSI LUN:"
+ " 0x%016Lx\n", get_unaligned_le64(&hdr->lun));
+ }
+ if (ret == PYX_TRANSPORT_OUT_OF_MEMORY_RESOURCES)
+ return iscsit_add_reject_from_cmd(
+ ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+ 1, 1, buf, cmd);
+
+ send_check_condition = 1;
+ goto attach_cmd;
+ }
+ /*
+ * The Initiator Node has access to the LUN (the addressing method
+ * is handled inside of iscsit_get_lun_for_cmd()). Now it's time to
+ * allocate 1->N transport tasks (depending on sector count and
+ * maximum request size the physical HBA(s) can handle.
+ */
+ transport_ret = transport_generic_allocate_tasks(&cmd->se_cmd, hdr->cdb);
+ if (transport_ret == -ENOMEM) {
+ return iscsit_add_reject_from_cmd(
+ ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+ 1, 1, buf, cmd);
+ } else if (transport_ret == -EINVAL) {
+ /*
+ * Unsupported SAM Opcode. CHECK_CONDITION will be sent
+ * in iscsit_execute_cmd() during the CmdSN OOO Execution
+ * Mechinism.
+ */
+ send_check_condition = 1;
+ } else {
+ if (iscsit_decide_list_to_build(cmd, payload_length) < 0)
+ return iscsit_add_reject_from_cmd(
+ ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+ 1, 1, buf, cmd);
+ }
+
+attach_cmd:
+ spin_lock_bh(&conn->cmd_lock);
+ list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+ spin_unlock_bh(&conn->cmd_lock);
+ /*
+ * Check if we need to delay processing because of ALUA
+ * Active/NonOptimized primary access state..
+ */
+ core_alua_check_nonop_delay(&cmd->se_cmd);
+ /*
+ * Allocate and setup SGL used with transport_generic_map_mem_to_cmd().
+ * also call iscsit_allocate_iovecs()
+ */
+ ret = iscsit_alloc_buffs(cmd);
+ if (ret < 0)
+ return iscsit_add_reject_from_cmd(
+ ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+ 1, 1, buf, cmd);
+ /*
+ * Check the CmdSN against ExpCmdSN/MaxCmdSN here if
+ * the Immediate Bit is not set, and no Immediate
+ * Data is attached.
+ *
+ * A PDU/CmdSN carrying Immediate Data can only
+ * be processed after the DataCRC has passed.
+ * If the DataCRC fails, the CmdSN MUST NOT
+ * be acknowledged. (See below)
+ */
+ if (!cmd->immediate_data) {
+ cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn);
+ if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER)
+ return iscsit_add_reject_from_cmd(
+ ISCSI_REASON_PROTOCOL_ERROR,
+ 1, 0, buf, cmd);
+ }
+
+ iscsit_ack_from_expstatsn(conn, hdr->exp_statsn);
+
+ /*
+ * If no Immediate Data is attached, it's OK to return now.
+ */
+ if (!cmd->immediate_data) {
+ if (send_check_condition)
+ return 0;
+
+ if (cmd->unsolicited_data) {
+ iscsit_set_dataout_sequence_values(cmd);
+
+ spin_lock_bh(&cmd->dataout_timeout_lock);
+ iscsit_start_dataout_timer(cmd, cmd->conn);
+ spin_unlock_bh(&cmd->dataout_timeout_lock);
+ }
+
+ return 0;
+ }
+
+ /*
+ * Early CHECK_CONDITIONs never make it to the transport processing
+ * thread. They are processed in CmdSN order by
+ * iscsit_check_received_cmdsn() below.
+ */
+ if (send_check_condition) {
+ immed_ret = IMMEDIATE_DATA_NORMAL_OPERATION;
+ dump_immediate_data = 1;
+ goto after_immediate_data;
+ }
+ /*
+ * Call directly into transport_generic_new_cmd() to perform
+ * the backend memory allocation.
+ */
+ ret = transport_generic_new_cmd(&cmd->se_cmd);
+ if ((ret < 0) || (cmd->se_cmd.se_cmd_flags & SCF_SE_CMD_FAILED)) {
+ immed_ret = IMMEDIATE_DATA_NORMAL_OPERATION;
+ dump_immediate_data = 1;
+ goto after_immediate_data;
+ }
+
+ immed_ret = iscsit_handle_immediate_data(cmd, buf, payload_length);
+after_immediate_data:
+ if (immed_ret == IMMEDIATE_DATA_NORMAL_OPERATION) {
+ /*
+ * A PDU/CmdSN carrying Immediate Data passed
+ * DataCRC, check against ExpCmdSN/MaxCmdSN if
+ * Immediate Bit is not set.
+ */
+ cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn);
+ /*
+ * Special case for Unsupported SAM WRITE Opcodes
+ * and ImmediateData=Yes.
+ */
+ if (dump_immediate_data) {
+ if (iscsit_dump_data_payload(conn, payload_length, 1) < 0)
+ return -1;
+ } else if (cmd->unsolicited_data) {
+ iscsit_set_dataout_sequence_values(cmd);
+
+ spin_lock_bh(&cmd->dataout_timeout_lock);
+ iscsit_start_dataout_timer(cmd, cmd->conn);
+ spin_unlock_bh(&cmd->dataout_timeout_lock);
+ }
+
+ if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER)
+ return iscsit_add_reject_from_cmd(
+ ISCSI_REASON_PROTOCOL_ERROR,
+ 1, 0, buf, cmd);
+
+ } else if (immed_ret == IMMEDIATE_DATA_ERL1_CRC_FAILURE) {
+ /*
+ * Immediate Data failed DataCRC and ERL>=1,
+ * silently drop this PDU and let the initiator
+ * plug the CmdSN gap.
+ *
+ * FIXME: Send Unsolicited NOPIN with reserved
+ * TTT here to help the initiator figure out
+ * the missing CmdSN, although they should be
+ * intelligent enough to determine the missing
+ * CmdSN and issue a retry to plug the sequence.
+ */
+ cmd->i_state = ISTATE_REMOVE;
+ iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state);
+ } else /* immed_ret == IMMEDIATE_DATA_CANNOT_RECOVER */
+ return -1;
+
+ return 0;
+}
+
+static u32 iscsit_do_crypto_hash_sg(
+ struct hash_desc *hash,
+ struct iscsi_cmd *cmd,
+ u32 data_offset,
+ u32 data_length,
+ u32 padding,
+ u8 *pad_bytes)
+{
+ u32 data_crc;
+ u32 i;
+ struct scatterlist *sg;
+ unsigned int page_off;
+
+ crypto_hash_init(hash);
+
+ sg = cmd->first_data_sg;
+ page_off = cmd->first_data_sg_off;
+
+ i = 0;
+ while (data_length) {
+ u32 cur_len = min_t(u32, data_length, (sg[i].length - page_off));
+
+ crypto_hash_update(hash, &sg[i], cur_len);
+
+ data_length -= cur_len;
+ page_off = 0;
+ i++;
+ }
+
+ if (padding) {
+ struct scatterlist pad_sg;
+
+ sg_init_one(&pad_sg, pad_bytes, padding);
+ crypto_hash_update(hash, &pad_sg, padding);
+ }
+ crypto_hash_final(hash, (u8 *) &data_crc);
+
+ return data_crc;
+}
+
+static void iscsit_do_crypto_hash_buf(
+ struct hash_desc *hash,
+ unsigned char *buf,
+ u32 payload_length,
+ u32 padding,
+ u8 *pad_bytes,
+ u8 *data_crc)
+{
+ struct scatterlist sg;
+
+ crypto_hash_init(hash);
+
+ sg_init_one(&sg, (u8 *)buf, payload_length);
+ crypto_hash_update(hash, &sg, payload_length);
+
+ if (padding) {
+ sg_init_one(&sg, pad_bytes, padding);
+ crypto_hash_update(hash, &sg, padding);
+ }
+ crypto_hash_final(hash, data_crc);
+}
+
+static int iscsit_handle_data_out(struct iscsi_conn *conn, unsigned char *buf)
+{
+ int iov_ret, ooo_cmdsn = 0, ret;
+ u8 data_crc_failed = 0;
+ u32 checksum, iov_count = 0, padding = 0, rx_got = 0;
+ u32 rx_size = 0, payload_length;
+ struct iscsi_cmd *cmd = NULL;
+ struct se_cmd *se_cmd;
+ struct iscsi_data *hdr;
+ struct kvec *iov;
+ unsigned long flags;
+
+ hdr = (struct iscsi_data *) buf;
+ payload_length = ntoh24(hdr->dlength);
+ hdr->itt = be32_to_cpu(hdr->itt);
+ hdr->ttt = be32_to_cpu(hdr->ttt);
+ hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn);
+ hdr->datasn = be32_to_cpu(hdr->datasn);
+ hdr->offset = be32_to_cpu(hdr->offset);
+
+ if (!payload_length) {
+ pr_err("DataOUT payload is ZERO, protocol error.\n");
+ return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+ buf, conn);
+ }
+
+ /* iSCSI write */
+ spin_lock_bh(&conn->sess->session_stats_lock);
+ conn->sess->rx_data_octets += payload_length;
+ if (conn->sess->se_sess->se_node_acl) {
+ spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock);
+ conn->sess->se_sess->se_node_acl->write_bytes += payload_length;
+ spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock);
+ }
+ spin_unlock_bh(&conn->sess->session_stats_lock);
+
+ if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) {
+ pr_err("DataSegmentLength: %u is greater than"
+ " MaxRecvDataSegmentLength: %u\n", payload_length,
+ conn->conn_ops->MaxRecvDataSegmentLength);
+ return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+ buf, conn);
+ }
+
+ cmd = iscsit_find_cmd_from_itt_or_dump(conn, hdr->itt,
+ payload_length);
+ if (!cmd)
+ return 0;
+
+ pr_debug("Got DataOut ITT: 0x%08x, TTT: 0x%08x,"
+ " DataSN: 0x%08x, Offset: %u, Length: %u, CID: %hu\n",
+ hdr->itt, hdr->ttt, hdr->datasn, hdr->offset,
+ payload_length, conn->cid);
+
+ if (cmd->cmd_flags & ICF_GOT_LAST_DATAOUT) {
+ pr_err("Command ITT: 0x%08x received DataOUT after"
+ " last DataOUT received, dumping payload\n",
+ cmd->init_task_tag);
+ return iscsit_dump_data_payload(conn, payload_length, 1);
+ }
+
+ if (cmd->data_direction != DMA_TO_DEVICE) {
+ pr_err("Command ITT: 0x%08x received DataOUT for a"
+ " NON-WRITE command.\n", cmd->init_task_tag);
+ return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR,
+ 1, 0, buf, cmd);
+ }
+ se_cmd = &cmd->se_cmd;
+ iscsit_mod_dataout_timer(cmd);
+
+ if ((hdr->offset + payload_length) > cmd->data_length) {
+ pr_err("DataOut Offset: %u, Length %u greater than"
+ " iSCSI Command EDTL %u, protocol error.\n",
+ hdr->offset, payload_length, cmd->data_length);
+ return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID,
+ 1, 0, buf, cmd);
+ }
+
+ if (cmd->unsolicited_data) {
+ int dump_unsolicited_data = 0;
+
+ if (conn->sess->sess_ops->InitialR2T) {
+ pr_err("Received unexpected unsolicited data"
+ " while InitialR2T=Yes, protocol error.\n");
+ transport_send_check_condition_and_sense(&cmd->se_cmd,
+ TCM_UNEXPECTED_UNSOLICITED_DATA, 0);
+ return -1;
+ }
+ /*
+ * Special case for dealing with Unsolicited DataOUT
+ * and Unsupported SAM WRITE Opcodes and SE resource allocation
+ * failures;
+ */
+
+ /* Something's amiss if we're not in WRITE_PENDING state... */
+ spin_lock_irqsave(&se_cmd->t_state_lock, flags);
+ WARN_ON(se_cmd->t_state != TRANSPORT_WRITE_PENDING);
+ spin_unlock_irqrestore(&se_cmd->t_state_lock, flags);
+
+ spin_lock_irqsave(&se_cmd->t_state_lock, flags);
+ if (!(se_cmd->se_cmd_flags & SCF_SUPPORTED_SAM_OPCODE) ||
+ (se_cmd->se_cmd_flags & SCF_SE_CMD_FAILED))
+ dump_unsolicited_data = 1;
+ spin_unlock_irqrestore(&se_cmd->t_state_lock, flags);
+
+ if (dump_unsolicited_data) {
+ /*
+ * Check if a delayed TASK_ABORTED status needs to
+ * be sent now if the ISCSI_FLAG_CMD_FINAL has been
+ * received with the unsolicitied data out.
+ */
+ if (hdr->flags & ISCSI_FLAG_CMD_FINAL)
+ iscsit_stop_dataout_timer(cmd);
+
+ transport_check_aborted_status(se_cmd,
+ (hdr->flags & ISCSI_FLAG_CMD_FINAL));
+ return iscsit_dump_data_payload(conn, payload_length, 1);
+ }
+ } else {
+ /*
+ * For the normal solicited data path:
+ *
+ * Check for a delayed TASK_ABORTED status and dump any
+ * incoming data out payload if one exists. Also, when the
+ * ISCSI_FLAG_CMD_FINAL is set to denote the end of the current
+ * data out sequence, we decrement outstanding_r2ts. Once
+ * outstanding_r2ts reaches zero, go ahead and send the delayed
+ * TASK_ABORTED status.
+ */
+ if (atomic_read(&se_cmd->t_transport_aborted) != 0) {
+ if (hdr->flags & ISCSI_FLAG_CMD_FINAL)
+ if (--cmd->outstanding_r2ts < 1) {
+ iscsit_stop_dataout_timer(cmd);
+ transport_check_aborted_status(
+ se_cmd, 1);
+ }
+
+ return iscsit_dump_data_payload(conn, payload_length, 1);
+ }
+ }
+ /*
+ * Preform DataSN, DataSequenceInOrder, DataPDUInOrder, and
+ * within-command recovery checks before receiving the payload.
+ */
+ ret = iscsit_check_pre_dataout(cmd, buf);
+ if (ret == DATAOUT_WITHIN_COMMAND_RECOVERY)
+ return 0;
+ else if (ret == DATAOUT_CANNOT_RECOVER)
+ return -1;
+
+ rx_size += payload_length;
+ iov = &cmd->iov_data[0];
+
+ iov_ret = iscsit_map_iovec(cmd, iov, hdr->offset, payload_length);
+ if (iov_ret < 0)
+ return -1;
+
+ iov_count += iov_ret;
+
+ padding = ((-payload_length) & 3);
+ if (padding != 0) {
+ iov[iov_count].iov_base = cmd->pad_bytes;
+ iov[iov_count++].iov_len = padding;
+ rx_size += padding;
+ pr_debug("Receiving %u padding bytes.\n", padding);
+ }
+
+ if (conn->conn_ops->DataDigest) {
+ iov[iov_count].iov_base = &checksum;
+ iov[iov_count++].iov_len = ISCSI_CRC_LEN;
+ rx_size += ISCSI_CRC_LEN;
+ }
+
+ rx_got = rx_data(conn, &cmd->iov_data[0], iov_count, rx_size);
+
+ iscsit_unmap_iovec(cmd);
+
+ if (rx_got != rx_size)
+ return -1;
+
+ if (conn->conn_ops->DataDigest) {
+ u32 data_crc;
+
+ data_crc = iscsit_do_crypto_hash_sg(&conn->conn_rx_hash, cmd,
+ hdr->offset, payload_length, padding,
+ cmd->pad_bytes);
+
+ if (checksum != data_crc) {
+ pr_err("ITT: 0x%08x, Offset: %u, Length: %u,"
+ " DataSN: 0x%08x, CRC32C DataDigest 0x%08x"
+ " does not match computed 0x%08x\n",
+ hdr->itt, hdr->offset, payload_length,
+ hdr->datasn, checksum, data_crc);
+ data_crc_failed = 1;
+ } else {
+ pr_debug("Got CRC32C DataDigest 0x%08x for"
+ " %u bytes of Data Out\n", checksum,
+ payload_length);
+ }
+ }
+ /*
+ * Increment post receive data and CRC values or perform
+ * within-command recovery.
+ */
+ ret = iscsit_check_post_dataout(cmd, buf, data_crc_failed);
+ if ((ret == DATAOUT_NORMAL) || (ret == DATAOUT_WITHIN_COMMAND_RECOVERY))
+ return 0;
+ else if (ret == DATAOUT_SEND_R2T) {
+ iscsit_set_dataout_sequence_values(cmd);
+ iscsit_build_r2ts_for_cmd(cmd, conn, 0);
+ } else if (ret == DATAOUT_SEND_TO_TRANSPORT) {
+ /*
+ * Handle extra special case for out of order
+ * Unsolicited Data Out.
+ */
+ spin_lock_bh(&cmd->istate_lock);
+ ooo_cmdsn = (cmd->cmd_flags & ICF_OOO_CMDSN);
+ cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT;
+ cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT;
+ spin_unlock_bh(&cmd->istate_lock);
+
+ iscsit_stop_dataout_timer(cmd);
+ return (!ooo_cmdsn) ? transport_generic_handle_data(
+ &cmd->se_cmd) : 0;
+ } else /* DATAOUT_CANNOT_RECOVER */
+ return -1;
+
+ return 0;
+}
+
+static int iscsit_handle_nop_out(
+ struct iscsi_conn *conn,
+ unsigned char *buf)
+{
+ unsigned char *ping_data = NULL;
+ int cmdsn_ret, niov = 0, ret = 0, rx_got, rx_size;
+ u32 checksum, data_crc, padding = 0, payload_length;
+ u64 lun;
+ struct iscsi_cmd *cmd = NULL;
+ struct kvec *iov = NULL;
+ struct iscsi_nopout *hdr;
+
+ hdr = (struct iscsi_nopout *) buf;
+ payload_length = ntoh24(hdr->dlength);
+ lun = get_unaligned_le64(&hdr->lun);
+ hdr->itt = be32_to_cpu(hdr->itt);
+ hdr->ttt = be32_to_cpu(hdr->ttt);
+ hdr->cmdsn = be32_to_cpu(hdr->cmdsn);
+ hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn);
+
+ if ((hdr->itt == 0xFFFFFFFF) && !(hdr->opcode & ISCSI_OP_IMMEDIATE)) {
+ pr_err("NOPOUT ITT is reserved, but Immediate Bit is"
+ " not set, protocol error.\n");
+ return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+ buf, conn);
+ }
+
+ if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) {
+ pr_err("NOPOUT Ping Data DataSegmentLength: %u is"
+ " greater than MaxRecvDataSegmentLength: %u, protocol"
+ " error.\n", payload_length,
+ conn->conn_ops->MaxRecvDataSegmentLength);
+ return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+ buf, conn);
+ }
+
+ pr_debug("Got NOPOUT Ping %s ITT: 0x%08x, TTT: 0x%09x,"
+ " CmdSN: 0x%08x, ExpStatSN: 0x%08x, Length: %u\n",
+ (hdr->itt == 0xFFFFFFFF) ? "Response" : "Request",
+ hdr->itt, hdr->ttt, hdr->cmdsn, hdr->exp_statsn,
+ payload_length);
+ /*
+ * This is not a response to a Unsolicited NopIN, which means
+ * it can either be a NOPOUT ping request (with a valid ITT),
+ * or a NOPOUT not requesting a NOPIN (with a reserved ITT).
+ * Either way, make sure we allocate an struct iscsi_cmd, as both
+ * can contain ping data.
+ */
+ if (hdr->ttt == 0xFFFFFFFF) {
+ cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+ if (!cmd)
+ return iscsit_add_reject(
+ ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+ 1, buf, conn);
+
+ cmd->iscsi_opcode = ISCSI_OP_NOOP_OUT;
+ cmd->i_state = ISTATE_SEND_NOPIN;
+ cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ?
+ 1 : 0);
+ conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt;
+ cmd->targ_xfer_tag = 0xFFFFFFFF;
+ cmd->cmd_sn = hdr->cmdsn;
+ cmd->exp_stat_sn = hdr->exp_statsn;
+ cmd->data_direction = DMA_NONE;
+ }
+
+ if (payload_length && (hdr->ttt == 0xFFFFFFFF)) {
+ rx_size = payload_length;
+ ping_data = kzalloc(payload_length + 1, GFP_KERNEL);
+ if (!ping_data) {
+ pr_err("Unable to allocate memory for"
+ " NOPOUT ping data.\n");
+ ret = -1;
+ goto out;
+ }
+
+ iov = &cmd->iov_misc[0];
+ iov[niov].iov_base = ping_data;
+ iov[niov++].iov_len = payload_length;
+
+ padding = ((-payload_length) & 3);
+ if (padding != 0) {
+ pr_debug("Receiving %u additional bytes"
+ " for padding.\n", padding);
+ iov[niov].iov_base = &cmd->pad_bytes;
+ iov[niov++].iov_len = padding;
+ rx_size += padding;
+ }
+ if (conn->conn_ops->DataDigest) {
+ iov[niov].iov_base = &checksum;
+ iov[niov++].iov_len = ISCSI_CRC_LEN;
+ rx_size += ISCSI_CRC_LEN;
+ }
+
+ rx_got = rx_data(conn, &cmd->iov_misc[0], niov, rx_size);
+ if (rx_got != rx_size) {
+ ret = -1;
+ goto out;
+ }
+
+ if (conn->conn_ops->DataDigest) {
+ iscsit_do_crypto_hash_buf(&conn->conn_rx_hash,
+ ping_data, payload_length,
+ padding, cmd->pad_bytes,
+ (u8 *)&data_crc);
+
+ if (checksum != data_crc) {
+ pr_err("Ping data CRC32C DataDigest"
+ " 0x%08x does not match computed 0x%08x\n",
+ checksum, data_crc);
+ if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+ pr_err("Unable to recover from"
+ " NOPOUT Ping DataCRC failure while in"
+ " ERL=0.\n");
+ ret = -1;
+ goto out;
+ } else {
+ /*
+ * Silently drop this PDU and let the
+ * initiator plug the CmdSN gap.
+ */
+ pr_debug("Dropping NOPOUT"
+ " Command CmdSN: 0x%08x due to"
+ " DataCRC error.\n", hdr->cmdsn);
+ ret = 0;
+ goto out;
+ }
+ } else {
+ pr_debug("Got CRC32C DataDigest"
+ " 0x%08x for %u bytes of ping data.\n",
+ checksum, payload_length);
+ }
+ }
+
+ ping_data[payload_length] = '\0';
+ /*
+ * Attach ping data to struct iscsi_cmd->buf_ptr.
+ */
+ cmd->buf_ptr = (void *)ping_data;
+ cmd->buf_ptr_size = payload_length;
+
+ pr_debug("Got %u bytes of NOPOUT ping"
+ " data.\n", payload_length);
+ pr_debug("Ping Data: \"%s\"\n", ping_data);
+ }
+
+ if (hdr->itt != 0xFFFFFFFF) {
+ if (!cmd) {
+ pr_err("Checking CmdSN for NOPOUT,"
+ " but cmd is NULL!\n");
+ return -1;
+ }
+ /*
+ * Initiator is expecting a NopIN ping reply,
+ */
+ spin_lock_bh(&conn->cmd_lock);
+ list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+ spin_unlock_bh(&conn->cmd_lock);
+
+ iscsit_ack_from_expstatsn(conn, hdr->exp_statsn);
+
+ if (hdr->opcode & ISCSI_OP_IMMEDIATE) {
+ iscsit_add_cmd_to_response_queue(cmd, conn,
+ cmd->i_state);
+ return 0;
+ }
+
+ cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn);
+ if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) {
+ ret = 0;
+ goto ping_out;
+ }
+ if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER)
+ return iscsit_add_reject_from_cmd(
+ ISCSI_REASON_PROTOCOL_ERROR,
+ 1, 0, buf, cmd);
+
+ return 0;
+ }
+
+ if (hdr->ttt != 0xFFFFFFFF) {
+ /*
+ * This was a response to a unsolicited NOPIN ping.
+ */
+ cmd = iscsit_find_cmd_from_ttt(conn, hdr->ttt);
+ if (!cmd)
+ return -1;
+
+ iscsit_stop_nopin_response_timer(conn);
+
+ cmd->i_state = ISTATE_REMOVE;
+ iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state);
+ iscsit_start_nopin_timer(conn);
+ } else {
+ /*
+ * Initiator is not expecting a NOPIN is response.
+ * Just ignore for now.
+ *
+ * iSCSI v19-91 10.18
+ * "A NOP-OUT may also be used to confirm a changed
+ * ExpStatSN if another PDU will not be available
+ * for a long time."
+ */
+ ret = 0;
+ goto out;
+ }
+
+ return 0;
+out:
+ if (cmd)
+ iscsit_release_cmd(cmd);
+ping_out:
+ kfree(ping_data);
+ return ret;
+}
+
+static int iscsit_handle_task_mgt_cmd(
+ struct iscsi_conn *conn,
+ unsigned char *buf)
+{
+ struct iscsi_cmd *cmd;
+ struct se_tmr_req *se_tmr;
+ struct iscsi_tmr_req *tmr_req;
+ struct iscsi_tm *hdr;
+ u32 payload_length;
+ int out_of_order_cmdsn = 0;
+ int ret;
+ u8 function;
+
+ hdr = (struct iscsi_tm *) buf;
+ payload_length = ntoh24(hdr->dlength);
+ hdr->itt = be32_to_cpu(hdr->itt);
+ hdr->rtt = be32_to_cpu(hdr->rtt);
+ hdr->cmdsn = be32_to_cpu(hdr->cmdsn);
+ hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn);
+ hdr->refcmdsn = be32_to_cpu(hdr->refcmdsn);
+ hdr->exp_datasn = be32_to_cpu(hdr->exp_datasn);
+ hdr->flags &= ~ISCSI_FLAG_CMD_FINAL;
+ function = hdr->flags;
+
+ pr_debug("Got Task Management Request ITT: 0x%08x, CmdSN:"
+ " 0x%08x, Function: 0x%02x, RefTaskTag: 0x%08x, RefCmdSN:"
+ " 0x%08x, CID: %hu\n", hdr->itt, hdr->cmdsn, function,
+ hdr->rtt, hdr->refcmdsn, conn->cid);
+
+ if ((function != ISCSI_TM_FUNC_ABORT_TASK) &&
+ ((function != ISCSI_TM_FUNC_TASK_REASSIGN) &&
+ (hdr->rtt != ISCSI_RESERVED_TAG))) {
+ pr_err("RefTaskTag should be set to 0xFFFFFFFF.\n");
+ hdr->rtt = ISCSI_RESERVED_TAG;
+ }
+
+ if ((function == ISCSI_TM_FUNC_TASK_REASSIGN) &&
+ !(hdr->opcode & ISCSI_OP_IMMEDIATE)) {
+ pr_err("Task Management Request TASK_REASSIGN not"
+ " issued as immediate command, bad iSCSI Initiator"
+ "implementation\n");
+ return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+ buf, conn);
+ }
+ if ((function != ISCSI_TM_FUNC_ABORT_TASK) &&
+ (hdr->refcmdsn != ISCSI_RESERVED_TAG))
+ hdr->refcmdsn = ISCSI_RESERVED_TAG;
+
+ cmd = iscsit_allocate_se_cmd_for_tmr(conn, function);
+ if (!cmd)
+ return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+ 1, buf, conn);
+
+ cmd->iscsi_opcode = ISCSI_OP_SCSI_TMFUNC;
+ cmd->i_state = ISTATE_SEND_TASKMGTRSP;
+ cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0);
+ cmd->init_task_tag = hdr->itt;
+ cmd->targ_xfer_tag = 0xFFFFFFFF;
+ cmd->cmd_sn = hdr->cmdsn;
+ cmd->exp_stat_sn = hdr->exp_statsn;
+ se_tmr = cmd->se_cmd.se_tmr_req;
+ tmr_req = cmd->tmr_req;
+ /*
+ * Locate the struct se_lun for all TMRs not related to ERL=2 TASK_REASSIGN
+ */
+ if (function != ISCSI_TM_FUNC_TASK_REASSIGN) {
+ ret = iscsit_get_lun_for_tmr(cmd,
+ get_unaligned_le64(&hdr->lun));
+ if (ret < 0) {
+ cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
+ se_tmr->response = ISCSI_TMF_RSP_NO_LUN;
+ goto attach;
+ }
+ }
+
+ switch (function) {
+ case ISCSI_TM_FUNC_ABORT_TASK:
+ se_tmr->response = iscsit_tmr_abort_task(cmd, buf);
+ if (se_tmr->response != ISCSI_TMF_RSP_COMPLETE) {
+ cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
+ goto attach;
+ }
+ break;
+ case ISCSI_TM_FUNC_ABORT_TASK_SET:
+ case ISCSI_TM_FUNC_CLEAR_ACA:
+ case ISCSI_TM_FUNC_CLEAR_TASK_SET:
+ case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET:
+ break;
+ case ISCSI_TM_FUNC_TARGET_WARM_RESET:
+ if (iscsit_tmr_task_warm_reset(conn, tmr_req, buf) < 0) {
+ cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
+ se_tmr->response = ISCSI_TMF_RSP_AUTH_FAILED;
+ goto attach;
+ }
+ break;
+ case ISCSI_TM_FUNC_TARGET_COLD_RESET:
+ if (iscsit_tmr_task_cold_reset(conn, tmr_req, buf) < 0) {
+ cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
+ se_tmr->response = ISCSI_TMF_RSP_AUTH_FAILED;
+ goto attach;
+ }
+ break;
+ case ISCSI_TM_FUNC_TASK_REASSIGN:
+ se_tmr->response = iscsit_tmr_task_reassign(cmd, buf);
+ /*
+ * Perform sanity checks on the ExpDataSN only if the
+ * TASK_REASSIGN was successful.
+ */
+ if (se_tmr->response != ISCSI_TMF_RSP_COMPLETE)
+ break;
+
+ if (iscsit_check_task_reassign_expdatasn(tmr_req, conn) < 0)
+ return iscsit_add_reject_from_cmd(
+ ISCSI_REASON_BOOKMARK_INVALID, 1, 1,
+ buf, cmd);
+ break;
+ default:
+ pr_err("Unknown TMR function: 0x%02x, protocol"
+ " error.\n", function);
+ cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
+ se_tmr->response = ISCSI_TMF_RSP_NOT_SUPPORTED;
+ goto attach;
+ }
+
+ if ((function != ISCSI_TM_FUNC_TASK_REASSIGN) &&
+ (se_tmr->response == ISCSI_TMF_RSP_COMPLETE))
+ se_tmr->call_transport = 1;
+attach:
+ spin_lock_bh(&conn->cmd_lock);
+ list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+ spin_unlock_bh(&conn->cmd_lock);
+
+ if (!(hdr->opcode & ISCSI_OP_IMMEDIATE)) {
+ int cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn);
+ if (cmdsn_ret == CMDSN_HIGHER_THAN_EXP)
+ out_of_order_cmdsn = 1;
+ else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) {
+ return 0;
+ } else { /* (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) */
+ return iscsit_add_reject_from_cmd(
+ ISCSI_REASON_PROTOCOL_ERROR,
+ 1, 0, buf, cmd);
+ }
+ }
+ iscsit_ack_from_expstatsn(conn, hdr->exp_statsn);
+
+ if (out_of_order_cmdsn)
+ return 0;
+ /*
+ * Found the referenced task, send to transport for processing.
+ */
+ if (se_tmr->call_transport)
+ return transport_generic_handle_tmr(&cmd->se_cmd);
+
+ /*
+ * Could not find the referenced LUN, task, or Task Management
+ * command not authorized or supported. Change state and
+ * let the tx_thread send the response.
+ *
+ * For connection recovery, this is also the default action for
+ * TMR TASK_REASSIGN.
+ */
+ iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+ return 0;
+}
+
+/* #warning FIXME: Support Text Command parameters besides SendTargets */
+static int iscsit_handle_text_cmd(
+ struct iscsi_conn *conn,
+ unsigned char *buf)
+{
+ char *text_ptr, *text_in;
+ int cmdsn_ret, niov = 0, rx_got, rx_size;
+ u32 checksum = 0, data_crc = 0, payload_length;
+ u32 padding = 0, text_length = 0;
+ struct iscsi_cmd *cmd;
+ struct kvec iov[3];
+ struct iscsi_text *hdr;
+
+ hdr = (struct iscsi_text *) buf;
+ payload_length = ntoh24(hdr->dlength);
+ hdr->itt = be32_to_cpu(hdr->itt);
+ hdr->ttt = be32_to_cpu(hdr->ttt);
+ hdr->cmdsn = be32_to_cpu(hdr->cmdsn);
+ hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn);
+
+ if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) {
+ pr_err("Unable to accept text parameter length: %u"
+ "greater than MaxRecvDataSegmentLength %u.\n",
+ payload_length, conn->conn_ops->MaxRecvDataSegmentLength);
+ return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+ buf, conn);
+ }
+
+ pr_debug("Got Text Request: ITT: 0x%08x, CmdSN: 0x%08x,"
+ " ExpStatSN: 0x%08x, Length: %u\n", hdr->itt, hdr->cmdsn,
+ hdr->exp_statsn, payload_length);
+
+ rx_size = text_length = payload_length;
+ if (text_length) {
+ text_in = kzalloc(text_length, GFP_KERNEL);
+ if (!text_in) {
+ pr_err("Unable to allocate memory for"
+ " incoming text parameters\n");
+ return -1;
+ }
+
+ memset(iov, 0, 3 * sizeof(struct kvec));
+ iov[niov].iov_base = text_in;
+ iov[niov++].iov_len = text_length;
+
+ padding = ((-payload_length) & 3);
+ if (padding != 0) {
+ iov[niov].iov_base = cmd->pad_bytes;
+ iov[niov++].iov_len = padding;
+ rx_size += padding;
+ pr_debug("Receiving %u additional bytes"
+ " for padding.\n", padding);
+ }
+ if (conn->conn_ops->DataDigest) {
+ iov[niov].iov_base = &checksum;
+ iov[niov++].iov_len = ISCSI_CRC_LEN;
+ rx_size += ISCSI_CRC_LEN;
+ }
+
+ rx_got = rx_data(conn, &iov[0], niov, rx_size);
+ if (rx_got != rx_size) {
+ kfree(text_in);
+ return -1;
+ }
+
+ if (conn->conn_ops->DataDigest) {
+ iscsit_do_crypto_hash_buf(&conn->conn_rx_hash,
+ text_in, text_length,
+ padding, cmd->pad_bytes,
+ (u8 *)&data_crc);
+
+ if (checksum != data_crc) {
+ pr_err("Text data CRC32C DataDigest"
+ " 0x%08x does not match computed"
+ " 0x%08x\n", checksum, data_crc);
+ if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+ pr_err("Unable to recover from"
+ " Text Data digest failure while in"
+ " ERL=0.\n");
+ kfree(text_in);
+ return -1;
+ } else {
+ /*
+ * Silently drop this PDU and let the
+ * initiator plug the CmdSN gap.
+ */
+ pr_debug("Dropping Text"
+ " Command CmdSN: 0x%08x due to"
+ " DataCRC error.\n", hdr->cmdsn);
+ kfree(text_in);
+ return 0;
+ }
+ } else {
+ pr_debug("Got CRC32C DataDigest"
+ " 0x%08x for %u bytes of text data.\n",
+ checksum, text_length);
+ }
+ }
+ text_in[text_length - 1] = '\0';
+ pr_debug("Successfully read %d bytes of text"
+ " data.\n", text_length);
+
+ if (strncmp("SendTargets", text_in, 11) != 0) {
+ pr_err("Received Text Data that is not"
+ " SendTargets, cannot continue.\n");
+ kfree(text_in);
+ return -1;
+ }
+ text_ptr = strchr(text_in, '=');
+ if (!text_ptr) {
+ pr_err("No \"=\" separator found in Text Data,"
+ " cannot continue.\n");
+ kfree(text_in);
+ return -1;
+ }
+ if (strncmp("=All", text_ptr, 4) != 0) {
+ pr_err("Unable to locate All value for"
+ " SendTargets key, cannot continue.\n");
+ kfree(text_in);
+ return -1;
+ }
+/*#warning Support SendTargets=(iSCSI Target Name/Nothing) values. */
+ kfree(text_in);
+ }
+
+ cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+ if (!cmd)
+ return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+ 1, buf, conn);
+
+ cmd->iscsi_opcode = ISCSI_OP_TEXT;
+ cmd->i_state = ISTATE_SEND_TEXTRSP;
+ cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0);
+ conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt;
+ cmd->targ_xfer_tag = 0xFFFFFFFF;
+ cmd->cmd_sn = hdr->cmdsn;
+ cmd->exp_stat_sn = hdr->exp_statsn;
+ cmd->data_direction = DMA_NONE;
+
+ spin_lock_bh(&conn->cmd_lock);
+ list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+ spin_unlock_bh(&conn->cmd_lock);
+
+ iscsit_ack_from_expstatsn(conn, hdr->exp_statsn);
+
+ if (!(hdr->opcode & ISCSI_OP_IMMEDIATE)) {
+ cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn);
+ if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER)
+ return iscsit_add_reject_from_cmd(
+ ISCSI_REASON_PROTOCOL_ERROR,
+ 1, 0, buf, cmd);
+
+ return 0;
+ }
+
+ return iscsit_execute_cmd(cmd, 0);
+}
+
+int iscsit_logout_closesession(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
+{
+ struct iscsi_conn *conn_p;
+ struct iscsi_session *sess = conn->sess;
+
+ pr_debug("Received logout request CLOSESESSION on CID: %hu"
+ " for SID: %u.\n", conn->cid, conn->sess->sid);
+
+ atomic_set(&sess->session_logout, 1);
+ atomic_set(&conn->conn_logout_remove, 1);
+ conn->conn_logout_reason = ISCSI_LOGOUT_REASON_CLOSE_SESSION;
+
+ iscsit_inc_conn_usage_count(conn);
+ iscsit_inc_session_usage_count(sess);
+
+ spin_lock_bh(&sess->conn_lock);
+ list_for_each_entry(conn_p, &sess->sess_conn_list, conn_list) {
+ if (conn_p->conn_state != TARG_CONN_STATE_LOGGED_IN)
+ continue;
+
+ pr_debug("Moving to TARG_CONN_STATE_IN_LOGOUT.\n");
+ conn_p->conn_state = TARG_CONN_STATE_IN_LOGOUT;
+ }
+ spin_unlock_bh(&sess->conn_lock);
+
+ iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+
+ return 0;
+}
+
+int iscsit_logout_closeconnection(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
+{
+ struct iscsi_conn *l_conn;
+ struct iscsi_session *sess = conn->sess;
+
+ pr_debug("Received logout request CLOSECONNECTION for CID:"
+ " %hu on CID: %hu.\n", cmd->logout_cid, conn->cid);
+
+ /*
+ * A Logout Request with a CLOSECONNECTION reason code for a CID
+ * can arrive on a connection with a differing CID.
+ */
+ if (conn->cid == cmd->logout_cid) {
+ spin_lock_bh(&conn->state_lock);
+ pr_debug("Moving to TARG_CONN_STATE_IN_LOGOUT.\n");
+ conn->conn_state = TARG_CONN_STATE_IN_LOGOUT;
+
+ atomic_set(&conn->conn_logout_remove, 1);
+ conn->conn_logout_reason = ISCSI_LOGOUT_REASON_CLOSE_CONNECTION;
+ iscsit_inc_conn_usage_count(conn);
+
+ spin_unlock_bh(&conn->state_lock);
+ } else {
+ /*
+ * Handle all different cid CLOSECONNECTION requests in
+ * iscsit_logout_post_handler_diffcid() as to give enough
+ * time for any non immediate command's CmdSN to be
+ * acknowledged on the connection in question.
+ *
+ * Here we simply make sure the CID is still around.
+ */
+ l_conn = iscsit_get_conn_from_cid(sess,
+ cmd->logout_cid);
+ if (!l_conn) {
+ cmd->logout_response = ISCSI_LOGOUT_CID_NOT_FOUND;
+ iscsit_add_cmd_to_response_queue(cmd, conn,
+ cmd->i_state);
+ return 0;
+ }
+
+ iscsit_dec_conn_usage_count(l_conn);
+ }
+
+ iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+
+ return 0;
+}
+
+int iscsit_logout_removeconnforrecovery(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
+{
+ struct iscsi_session *sess = conn->sess;
+
+ pr_debug("Received explicit REMOVECONNFORRECOVERY logout for"
+ " CID: %hu on CID: %hu.\n", cmd->logout_cid, conn->cid);
+
+ if (sess->sess_ops->ErrorRecoveryLevel != 2) {
+ pr_err("Received Logout Request REMOVECONNFORRECOVERY"
+ " while ERL!=2.\n");
+ cmd->logout_response = ISCSI_LOGOUT_RECOVERY_UNSUPPORTED;
+ iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+ return 0;
+ }
+
+ if (conn->cid == cmd->logout_cid) {
+ pr_err("Received Logout Request REMOVECONNFORRECOVERY"
+ " with CID: %hu on CID: %hu, implementation error.\n",
+ cmd->logout_cid, conn->cid);
+ cmd->logout_response = ISCSI_LOGOUT_CLEANUP_FAILED;
+ iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+ return 0;
+ }
+
+ iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+
+ return 0;
+}
+
+static int iscsit_handle_logout_cmd(
+ struct iscsi_conn *conn,
+ unsigned char *buf)
+{
+ int cmdsn_ret, logout_remove = 0;
+ u8 reason_code = 0;
+ struct iscsi_cmd *cmd;
+ struct iscsi_logout *hdr;
+ struct iscsi_tiqn *tiqn = iscsit_snmp_get_tiqn(conn);
+
+ hdr = (struct iscsi_logout *) buf;
+ reason_code = (hdr->flags & 0x7f);
+ hdr->itt = be32_to_cpu(hdr->itt);
+ hdr->cid = be16_to_cpu(hdr->cid);
+ hdr->cmdsn = be32_to_cpu(hdr->cmdsn);
+ hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn);
+
+ if (tiqn) {
+ spin_lock(&tiqn->logout_stats.lock);
+ if (reason_code == ISCSI_LOGOUT_REASON_CLOSE_SESSION)
+ tiqn->logout_stats.normal_logouts++;
+ else
+ tiqn->logout_stats.abnormal_logouts++;
+ spin_unlock(&tiqn->logout_stats.lock);
+ }
+
+ pr_debug("Got Logout Request ITT: 0x%08x CmdSN: 0x%08x"
+ " ExpStatSN: 0x%08x Reason: 0x%02x CID: %hu on CID: %hu\n",
+ hdr->itt, hdr->cmdsn, hdr->exp_statsn, reason_code,
+ hdr->cid, conn->cid);
+
+ if (conn->conn_state != TARG_CONN_STATE_LOGGED_IN) {
+ pr_err("Received logout request on connection that"
+ " is not in logged in state, ignoring request.\n");
+ return 0;
+ }
+
+ cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+ if (!cmd)
+ return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, 1,
+ buf, conn);
+
+ cmd->iscsi_opcode = ISCSI_OP_LOGOUT;
+ cmd->i_state = ISTATE_SEND_LOGOUTRSP;
+ cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0);
+ conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt;
+ cmd->targ_xfer_tag = 0xFFFFFFFF;
+ cmd->cmd_sn = hdr->cmdsn;
+ cmd->exp_stat_sn = hdr->exp_statsn;
+ cmd->logout_cid = hdr->cid;
+ cmd->logout_reason = reason_code;
+ cmd->data_direction = DMA_NONE;
+
+ /*
+ * We need to sleep in these cases (by returning 1) until the Logout
+ * Response gets sent in the tx thread.
+ */
+ if ((reason_code == ISCSI_LOGOUT_REASON_CLOSE_SESSION) ||
+ ((reason_code == ISCSI_LOGOUT_REASON_CLOSE_CONNECTION) &&
+ (hdr->cid == conn->cid)))
+ logout_remove = 1;
+
+ spin_lock_bh(&conn->cmd_lock);
+ list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+ spin_unlock_bh(&conn->cmd_lock);
+
+ if (reason_code != ISCSI_LOGOUT_REASON_RECOVERY)
+ iscsit_ack_from_expstatsn(conn, hdr->exp_statsn);
+
+ /*
+ * Immediate commands are executed, well, immediately.
+ * Non-Immediate Logout Commands are executed in CmdSN order.
+ */
+ if (hdr->opcode & ISCSI_OP_IMMEDIATE) {
+ int ret = iscsit_execute_cmd(cmd, 0);
+
+ if (ret < 0)
+ return ret;
+ } else {
+ cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn);
+ if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) {
+ logout_remove = 0;
+ } else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) {
+ return iscsit_add_reject_from_cmd(
+ ISCSI_REASON_PROTOCOL_ERROR,
+ 1, 0, buf, cmd);
+ }
+ }
+
+ return logout_remove;
+}
+
+static int iscsit_handle_snack(
+ struct iscsi_conn *conn,
+ unsigned char *buf)
+{
+ u32 unpacked_lun;
+ u64 lun;
+ struct iscsi_snack *hdr;
+
+ hdr = (struct iscsi_snack *) buf;
+ hdr->flags &= ~ISCSI_FLAG_CMD_FINAL;
+ lun = get_unaligned_le64(&hdr->lun);
+ unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun);
+ hdr->itt = be32_to_cpu(hdr->itt);
+ hdr->ttt = be32_to_cpu(hdr->ttt);
+ hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn);
+ hdr->begrun = be32_to_cpu(hdr->begrun);
+ hdr->runlength = be32_to_cpu(hdr->runlength);
+
+ pr_debug("Got ISCSI_INIT_SNACK, ITT: 0x%08x, ExpStatSN:"
+ " 0x%08x, Type: 0x%02x, BegRun: 0x%08x, RunLength: 0x%08x,"
+ " CID: %hu\n", hdr->itt, hdr->exp_statsn, hdr->flags,
+ hdr->begrun, hdr->runlength, conn->cid);
+
+ if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+ pr_err("Initiator sent SNACK request while in"
+ " ErrorRecoveryLevel=0.\n");
+ return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+ buf, conn);
+ }
+ /*
+ * SNACK_DATA and SNACK_R2T are both 0, so check which function to
+ * call from inside iscsi_send_recovery_datain_or_r2t().
+ */
+ switch (hdr->flags & ISCSI_FLAG_SNACK_TYPE_MASK) {
+ case 0:
+ return iscsit_handle_recovery_datain_or_r2t(conn, buf,
+ hdr->itt, hdr->ttt, hdr->begrun, hdr->runlength);
+ return 0;
+ case ISCSI_FLAG_SNACK_TYPE_STATUS:
+ return iscsit_handle_status_snack(conn, hdr->itt, hdr->ttt,
+ hdr->begrun, hdr->runlength);
+ case ISCSI_FLAG_SNACK_TYPE_DATA_ACK:
+ return iscsit_handle_data_ack(conn, hdr->ttt, hdr->begrun,
+ hdr->runlength);
+ case ISCSI_FLAG_SNACK_TYPE_RDATA:
+ /* FIXME: Support R-Data SNACK */
+ pr_err("R-Data SNACK Not Supported.\n");
+ return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+ buf, conn);
+ default:
+ pr_err("Unknown SNACK type 0x%02x, protocol"
+ " error.\n", hdr->flags & 0x0f);
+ return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+ buf, conn);
+ }
+
+ return 0;
+}
+
+static void iscsit_rx_thread_wait_for_tcp(struct iscsi_conn *conn)
+{
+ if ((conn->sock->sk->sk_shutdown & SEND_SHUTDOWN) ||
+ (conn->sock->sk->sk_shutdown & RCV_SHUTDOWN)) {
+ wait_for_completion_interruptible_timeout(
+ &conn->rx_half_close_comp,
+ ISCSI_RX_THREAD_TCP_TIMEOUT * HZ);
+ }
+}
+
+static int iscsit_handle_immediate_data(
+ struct iscsi_cmd *cmd,
+ unsigned char *buf,
+ u32 length)
+{
+ int iov_ret, rx_got = 0, rx_size = 0;
+ u32 checksum, iov_count = 0, padding = 0;
+ struct iscsi_conn *conn = cmd->conn;
+ struct kvec *iov;
+
+ iov_ret = iscsit_map_iovec(cmd, cmd->iov_data, cmd->write_data_done, length);
+ if (iov_ret < 0)
+ return IMMEDIATE_DATA_CANNOT_RECOVER;
+
+ rx_size = length;
+ iov_count = iov_ret;
+ iov = &cmd->iov_data[0];
+
+ padding = ((-length) & 3);
+ if (padding != 0) {
+ iov[iov_count].iov_base = cmd->pad_bytes;
+ iov[iov_count++].iov_len = padding;
+ rx_size += padding;
+ }
+
+ if (conn->conn_ops->DataDigest) {
+ iov[iov_count].iov_base = &checksum;
+ iov[iov_count++].iov_len = ISCSI_CRC_LEN;
+ rx_size += ISCSI_CRC_LEN;
+ }
+
+ rx_got = rx_data(conn, &cmd->iov_data[0], iov_count, rx_size);
+
+ iscsit_unmap_iovec(cmd);
+
+ if (rx_got != rx_size) {
+ iscsit_rx_thread_wait_for_tcp(conn);
+ return IMMEDIATE_DATA_CANNOT_RECOVER;
+ }
+
+ if (conn->conn_ops->DataDigest) {
+ u32 data_crc;
+
+ data_crc = iscsit_do_crypto_hash_sg(&conn->conn_rx_hash, cmd,
+ cmd->write_data_done, length, padding,
+ cmd->pad_bytes);
+
+ if (checksum != data_crc) {
+ pr_err("ImmediateData CRC32C DataDigest 0x%08x"
+ " does not match computed 0x%08x\n", checksum,
+ data_crc);
+
+ if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+ pr_err("Unable to recover from"
+ " Immediate Data digest failure while"
+ " in ERL=0.\n");
+ iscsit_add_reject_from_cmd(
+ ISCSI_REASON_DATA_DIGEST_ERROR,
+ 1, 0, buf, cmd);
+ return IMMEDIATE_DATA_CANNOT_RECOVER;
+ } else {
+ iscsit_add_reject_from_cmd(
+ ISCSI_REASON_DATA_DIGEST_ERROR,
+ 0, 0, buf, cmd);
+ return IMMEDIATE_DATA_ERL1_CRC_FAILURE;
+ }
+ } else {
+ pr_debug("Got CRC32C DataDigest 0x%08x for"
+ " %u bytes of Immediate Data\n", checksum,
+ length);
+ }
+ }
+
+ cmd->write_data_done += length;
+
+ if (cmd->write_data_done == cmd->data_length) {
+ spin_lock_bh(&cmd->istate_lock);
+ cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT;
+ cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT;
+ spin_unlock_bh(&cmd->istate_lock);
+ }
+
+ return IMMEDIATE_DATA_NORMAL_OPERATION;
+}
+
+/*
+ * Called with sess->conn_lock held.
+ */
+/* #warning iscsi_build_conn_drop_async_message() only sends out on connections
+ with active network interface */
+static void iscsit_build_conn_drop_async_message(struct iscsi_conn *conn)
+{
+ struct iscsi_cmd *cmd;
+ struct iscsi_conn *conn_p;
+
+ /*
+ * Only send a Asynchronous Message on connections whos network
+ * interface is still functional.
+ */
+ list_for_each_entry(conn_p, &conn->sess->sess_conn_list, conn_list) {
+ if (conn_p->conn_state == TARG_CONN_STATE_LOGGED_IN) {
+ iscsit_inc_conn_usage_count(conn_p);
+ break;
+ }
+ }
+
+ if (!conn_p)
+ return;
+
+ cmd = iscsit_allocate_cmd(conn_p, GFP_KERNEL);
+ if (!cmd) {
+ iscsit_dec_conn_usage_count(conn_p);
+ return;
+ }
+
+ cmd->logout_cid = conn->cid;
+ cmd->iscsi_opcode = ISCSI_OP_ASYNC_EVENT;
+ cmd->i_state = ISTATE_SEND_ASYNCMSG;
+
+ spin_lock_bh(&conn_p->cmd_lock);
+ list_add_tail(&cmd->i_list, &conn_p->conn_cmd_list);
+ spin_unlock_bh(&conn_p->cmd_lock);
+
+ iscsit_add_cmd_to_response_queue(cmd, conn_p, cmd->i_state);
+ iscsit_dec_conn_usage_count(conn_p);
+}
+
+static int iscsit_send_conn_drop_async_message(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn *conn)
+{
+ struct iscsi_async *hdr;
+
+ cmd->tx_size = ISCSI_HDR_LEN;
+ cmd->iscsi_opcode = ISCSI_OP_ASYNC_EVENT;
+
+ hdr = (struct iscsi_async *) cmd->pdu;
+ hdr->opcode = ISCSI_OP_ASYNC_EVENT;
+ hdr->flags = ISCSI_FLAG_CMD_FINAL;
+ cmd->init_task_tag = 0xFFFFFFFF;
+ cmd->targ_xfer_tag = 0xFFFFFFFF;
+ put_unaligned_be64(0xFFFFFFFFFFFFFFFFULL, &hdr->rsvd4[0]);
+ cmd->stat_sn = conn->stat_sn++;
+ hdr->statsn = cpu_to_be32(cmd->stat_sn);
+ hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
+ hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+ hdr->async_event = ISCSI_ASYNC_MSG_DROPPING_CONNECTION;
+ hdr->param1 = cpu_to_be16(cmd->logout_cid);
+ hdr->param2 = cpu_to_be16(conn->sess->sess_ops->DefaultTime2Wait);
+ hdr->param3 = cpu_to_be16(conn->sess->sess_ops->DefaultTime2Retain);
+
+ if (conn->conn_ops->HeaderDigest) {
+ u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+ iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+ (unsigned char *)hdr, ISCSI_HDR_LEN,
+ 0, NULL, (u8 *)header_digest);
+
+ cmd->tx_size += ISCSI_CRC_LEN;
+ pr_debug("Attaching CRC32C HeaderDigest to"
+ " Async Message 0x%08x\n", *header_digest);
+ }
+
+ cmd->iov_misc[0].iov_base = cmd->pdu;
+ cmd->iov_misc[0].iov_len = cmd->tx_size;
+ cmd->iov_misc_count = 1;
+
+ pr_debug("Sending Connection Dropped Async Message StatSN:"
+ " 0x%08x, for CID: %hu on CID: %hu\n", cmd->stat_sn,
+ cmd->logout_cid, conn->cid);
+ return 0;
+}
+
+static int iscsit_send_data_in(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn *conn,
+ int *eodr)
+{
+ int iov_ret = 0, set_statsn = 0;
+ u32 iov_count = 0, tx_size = 0;
+ struct iscsi_datain datain;
+ struct iscsi_datain_req *dr;
+ struct iscsi_data_rsp *hdr;
+ struct kvec *iov;
+
+ memset(&datain, 0, sizeof(struct iscsi_datain));
+ dr = iscsit_get_datain_values(cmd, &datain);
+ if (!dr) {
+ pr_err("iscsit_get_datain_values failed for ITT: 0x%08x\n",
+ cmd->init_task_tag);
+ return -1;
+ }
+
+ /*
+ * Be paranoid and double check the logic for now.
+ */
+ if ((datain.offset + datain.length) > cmd->data_length) {
+ pr_err("Command ITT: 0x%08x, datain.offset: %u and"
+ " datain.length: %u exceeds cmd->data_length: %u\n",
+ cmd->init_task_tag, datain.offset, datain.length,
+ cmd->data_length);
+ return -1;
+ }
+
+ spin_lock_bh(&conn->sess->session_stats_lock);
+ conn->sess->tx_data_octets += datain.length;
+ if (conn->sess->se_sess->se_node_acl) {
+ spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock);
+ conn->sess->se_sess->se_node_acl->read_bytes += datain.length;
+ spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock);
+ }
+ spin_unlock_bh(&conn->sess->session_stats_lock);
+ /*
+ * Special case for successfully execution w/ both DATAIN
+ * and Sense Data.
+ */
+ if ((datain.flags & ISCSI_FLAG_DATA_STATUS) &&
+ (cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE))
+ datain.flags &= ~ISCSI_FLAG_DATA_STATUS;
+ else {
+ if ((dr->dr_complete == DATAIN_COMPLETE_NORMAL) ||
+ (dr->dr_complete == DATAIN_COMPLETE_CONNECTION_RECOVERY)) {
+ iscsit_increment_maxcmdsn(cmd, conn->sess);
+ cmd->stat_sn = conn->stat_sn++;
+ set_statsn = 1;
+ } else if (dr->dr_complete ==
+ DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY)
+ set_statsn = 1;
+ }
+
+ hdr = (struct iscsi_data_rsp *) cmd->pdu;
+ memset(hdr, 0, ISCSI_HDR_LEN);
+ hdr->opcode = ISCSI_OP_SCSI_DATA_IN;
+ hdr->flags = datain.flags;
+ if (hdr->flags & ISCSI_FLAG_DATA_STATUS) {
+ if (cmd->se_cmd.se_cmd_flags & SCF_OVERFLOW_BIT) {
+ hdr->flags |= ISCSI_FLAG_DATA_OVERFLOW;
+ hdr->residual_count = cpu_to_be32(cmd->residual_count);
+ } else if (cmd->se_cmd.se_cmd_flags & SCF_UNDERFLOW_BIT) {
+ hdr->flags |= ISCSI_FLAG_DATA_UNDERFLOW;
+ hdr->residual_count = cpu_to_be32(cmd->residual_count);
+ }
+ }
+ hton24(hdr->dlength, datain.length);
+ if (hdr->flags & ISCSI_FLAG_DATA_ACK)
+ int_to_scsilun(cmd->se_cmd.orig_fe_lun,
+ (struct scsi_lun *)&hdr->lun);
+ else
+ put_unaligned_le64(0xFFFFFFFFFFFFFFFFULL, &hdr->lun);
+
+ hdr->itt = cpu_to_be32(cmd->init_task_tag);
+ hdr->ttt = (hdr->flags & ISCSI_FLAG_DATA_ACK) ?
+ cpu_to_be32(cmd->targ_xfer_tag) :
+ 0xFFFFFFFF;
+ hdr->statsn = (set_statsn) ? cpu_to_be32(cmd->stat_sn) :
+ 0xFFFFFFFF;
+ hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
+ hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+ hdr->datasn = cpu_to_be32(datain.data_sn);
+ hdr->offset = cpu_to_be32(datain.offset);
+
+ iov = &cmd->iov_data[0];
+ iov[iov_count].iov_base = cmd->pdu;
+ iov[iov_count++].iov_len = ISCSI_HDR_LEN;
+ tx_size += ISCSI_HDR_LEN;
+
+ if (conn->conn_ops->HeaderDigest) {
+ u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+ iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+ (unsigned char *)hdr, ISCSI_HDR_LEN,
+ 0, NULL, (u8 *)header_digest);
+
+ iov[0].iov_len += ISCSI_CRC_LEN;
+ tx_size += ISCSI_CRC_LEN;
+
+ pr_debug("Attaching CRC32 HeaderDigest"
+ " for DataIN PDU 0x%08x\n", *header_digest);
+ }
+
+ iov_ret = iscsit_map_iovec(cmd, &cmd->iov_data[1], datain.offset, datain.length);
+ if (iov_ret < 0)
+ return -1;
+
+ iov_count += iov_ret;
+ tx_size += datain.length;
+
+ cmd->padding = ((-datain.length) & 3);
+ if (cmd->padding) {
+ iov[iov_count].iov_base = cmd->pad_bytes;
+ iov[iov_count++].iov_len = cmd->padding;
+ tx_size += cmd->padding;
+
+ pr_debug("Attaching %u padding bytes\n",
+ cmd->padding);
+ }
+ if (conn->conn_ops->DataDigest) {
+ cmd->data_crc = iscsit_do_crypto_hash_sg(&conn->conn_tx_hash, cmd,
+ datain.offset, datain.length, cmd->padding, cmd->pad_bytes);
+
+ iov[iov_count].iov_base = &cmd->data_crc;
+ iov[iov_count++].iov_len = ISCSI_CRC_LEN;
+ tx_size += ISCSI_CRC_LEN;
+
+ pr_debug("Attached CRC32C DataDigest %d bytes, crc"
+ " 0x%08x\n", datain.length+cmd->padding, cmd->data_crc);
+ }
+
+ cmd->iov_data_count = iov_count;
+ cmd->tx_size = tx_size;
+
+ pr_debug("Built DataIN ITT: 0x%08x, StatSN: 0x%08x,"
+ " DataSN: 0x%08x, Offset: %u, Length: %u, CID: %hu\n",
+ cmd->init_task_tag, ntohl(hdr->statsn), ntohl(hdr->datasn),
+ ntohl(hdr->offset), datain.length, conn->cid);
+
+ if (dr->dr_complete) {
+ *eodr = (cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ?
+ 2 : 1;
+ iscsit_free_datain_req(cmd, dr);
+ }
+
+ return 0;
+}
+
+static int iscsit_send_logout_response(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn *conn)
+{
+ int niov = 0, tx_size;
+ struct iscsi_conn *logout_conn = NULL;
+ struct iscsi_conn_recovery *cr = NULL;
+ struct iscsi_session *sess = conn->sess;
+ struct kvec *iov;
+ struct iscsi_logout_rsp *hdr;
+ /*
+ * The actual shutting down of Sessions and/or Connections
+ * for CLOSESESSION and CLOSECONNECTION Logout Requests
+ * is done in scsi_logout_post_handler().
+ */
+ switch (cmd->logout_reason) {
+ case ISCSI_LOGOUT_REASON_CLOSE_SESSION:
+ pr_debug("iSCSI session logout successful, setting"
+ " logout response to ISCSI_LOGOUT_SUCCESS.\n");
+ cmd->logout_response = ISCSI_LOGOUT_SUCCESS;
+ break;
+ case ISCSI_LOGOUT_REASON_CLOSE_CONNECTION:
+ if (cmd->logout_response == ISCSI_LOGOUT_CID_NOT_FOUND)
+ break;
+ /*
+ * For CLOSECONNECTION logout requests carrying
+ * a matching logout CID -> local CID, the reference
+ * for the local CID will have been incremented in
+ * iscsi_logout_closeconnection().
+ *
+ * For CLOSECONNECTION logout requests carrying
+ * a different CID than the connection it arrived
+ * on, the connection responding to cmd->logout_cid
+ * is stopped in iscsit_logout_post_handler_diffcid().
+ */
+
+ pr_debug("iSCSI CID: %hu logout on CID: %hu"
+ " successful.\n", cmd->logout_cid, conn->cid);
+ cmd->logout_response = ISCSI_LOGOUT_SUCCESS;
+ break;
+ case ISCSI_LOGOUT_REASON_RECOVERY:
+ if ((cmd->logout_response == ISCSI_LOGOUT_RECOVERY_UNSUPPORTED) ||
+ (cmd->logout_response == ISCSI_LOGOUT_CLEANUP_FAILED))
+ break;
+ /*
+ * If the connection is still active from our point of view
+ * force connection recovery to occur.
+ */
+ logout_conn = iscsit_get_conn_from_cid_rcfr(sess,
+ cmd->logout_cid);
+ if ((logout_conn)) {
+ iscsit_connection_reinstatement_rcfr(logout_conn);
+ iscsit_dec_conn_usage_count(logout_conn);
+ }
+
+ cr = iscsit_get_inactive_connection_recovery_entry(
+ conn->sess, cmd->logout_cid);
+ if (!cr) {
+ pr_err("Unable to locate CID: %hu for"
+ " REMOVECONNFORRECOVERY Logout Request.\n",
+ cmd->logout_cid);
+ cmd->logout_response = ISCSI_LOGOUT_CID_NOT_FOUND;
+ break;
+ }
+
+ iscsit_discard_cr_cmds_by_expstatsn(cr, cmd->exp_stat_sn);
+
+ pr_debug("iSCSI REMOVECONNFORRECOVERY logout"
+ " for recovery for CID: %hu on CID: %hu successful.\n",
+ cmd->logout_cid, conn->cid);
+ cmd->logout_response = ISCSI_LOGOUT_SUCCESS;
+ break;
+ default:
+ pr_err("Unknown cmd->logout_reason: 0x%02x\n",
+ cmd->logout_reason);
+ return -1;
+ }
+
+ tx_size = ISCSI_HDR_LEN;
+ hdr = (struct iscsi_logout_rsp *)cmd->pdu;
+ memset(hdr, 0, ISCSI_HDR_LEN);
+ hdr->opcode = ISCSI_OP_LOGOUT_RSP;
+ hdr->flags |= ISCSI_FLAG_CMD_FINAL;
+ hdr->response = cmd->logout_response;
+ hdr->itt = cpu_to_be32(cmd->init_task_tag);
+ cmd->stat_sn = conn->stat_sn++;
+ hdr->statsn = cpu_to_be32(cmd->stat_sn);
+
+ iscsit_increment_maxcmdsn(cmd, conn->sess);
+ hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
+ hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+
+ iov = &cmd->iov_misc[0];
+ iov[niov].iov_base = cmd->pdu;
+ iov[niov++].iov_len = ISCSI_HDR_LEN;
+
+ if (conn->conn_ops->HeaderDigest) {
+ u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+ iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+ (unsigned char *)hdr, ISCSI_HDR_LEN,
+ 0, NULL, (u8 *)header_digest);
+
+ iov[0].iov_len += ISCSI_CRC_LEN;
+ tx_size += ISCSI_CRC_LEN;
+ pr_debug("Attaching CRC32C HeaderDigest to"
+ " Logout Response 0x%08x\n", *header_digest);
+ }
+ cmd->iov_misc_count = niov;
+ cmd->tx_size = tx_size;
+
+ pr_debug("Sending Logout Response ITT: 0x%08x StatSN:"
+ " 0x%08x Response: 0x%02x CID: %hu on CID: %hu\n",
+ cmd->init_task_tag, cmd->stat_sn, hdr->response,
+ cmd->logout_cid, conn->cid);
+
+ return 0;
+}
+
+/*
+ * Unsolicited NOPIN, either requesting a response or not.
+ */
+static int iscsit_send_unsolicited_nopin(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn *conn,
+ int want_response)
+{
+ int tx_size = ISCSI_HDR_LEN;
+ struct iscsi_nopin *hdr;
+
+ hdr = (struct iscsi_nopin *) cmd->pdu;
+ memset(hdr, 0, ISCSI_HDR_LEN);
+ hdr->opcode = ISCSI_OP_NOOP_IN;
+ hdr->flags |= ISCSI_FLAG_CMD_FINAL;
+ hdr->itt = cpu_to_be32(cmd->init_task_tag);
+ hdr->ttt = cpu_to_be32(cmd->targ_xfer_tag);
+ cmd->stat_sn = conn->stat_sn;
+ hdr->statsn = cpu_to_be32(cmd->stat_sn);
+ hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
+ hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+
+ if (conn->conn_ops->HeaderDigest) {
+ u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+ iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+ (unsigned char *)hdr, ISCSI_HDR_LEN,
+ 0, NULL, (u8 *)header_digest);
+
+ tx_size += ISCSI_CRC_LEN;
+ pr_debug("Attaching CRC32C HeaderDigest to"
+ " NopIN 0x%08x\n", *header_digest);
+ }
+
+ cmd->iov_misc[0].iov_base = cmd->pdu;
+ cmd->iov_misc[0].iov_len = tx_size;
+ cmd->iov_misc_count = 1;
+ cmd->tx_size = tx_size;
+
+ pr_debug("Sending Unsolicited NOPIN TTT: 0x%08x StatSN:"
+ " 0x%08x CID: %hu\n", hdr->ttt, cmd->stat_sn, conn->cid);
+
+ return 0;
+}
+
+static int iscsit_send_nopin_response(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn *conn)
+{
+ int niov = 0, tx_size;
+ u32 padding = 0;
+ struct kvec *iov;
+ struct iscsi_nopin *hdr;
+
+ tx_size = ISCSI_HDR_LEN;
+ hdr = (struct iscsi_nopin *) cmd->pdu;
+ memset(hdr, 0, ISCSI_HDR_LEN);
+ hdr->opcode = ISCSI_OP_NOOP_IN;
+ hdr->flags |= ISCSI_FLAG_CMD_FINAL;
+ hton24(hdr->dlength, cmd->buf_ptr_size);
+ put_unaligned_le64(0xFFFFFFFFFFFFFFFFULL, &hdr->lun);
+ hdr->itt = cpu_to_be32(cmd->init_task_tag);
+ hdr->ttt = cpu_to_be32(cmd->targ_xfer_tag);
+ cmd->stat_sn = conn->stat_sn++;
+ hdr->statsn = cpu_to_be32(cmd->stat_sn);
+
+ iscsit_increment_maxcmdsn(cmd, conn->sess);
+ hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
+ hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+
+ iov = &cmd->iov_misc[0];
+ iov[niov].iov_base = cmd->pdu;
+ iov[niov++].iov_len = ISCSI_HDR_LEN;
+
+ if (conn->conn_ops->HeaderDigest) {
+ u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+ iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+ (unsigned char *)hdr, ISCSI_HDR_LEN,
+ 0, NULL, (u8 *)header_digest);
+
+ iov[0].iov_len += ISCSI_CRC_LEN;
+ tx_size += ISCSI_CRC_LEN;
+ pr_debug("Attaching CRC32C HeaderDigest"
+ " to NopIn 0x%08x\n", *header_digest);
+ }
+
+ /*
+ * NOPOUT Ping Data is attached to struct iscsi_cmd->buf_ptr.
+ * NOPOUT DataSegmentLength is at struct iscsi_cmd->buf_ptr_size.
+ */
+ if (cmd->buf_ptr_size) {
+ iov[niov].iov_base = cmd->buf_ptr;
+ iov[niov++].iov_len = cmd->buf_ptr_size;
+ tx_size += cmd->buf_ptr_size;
+
+ pr_debug("Echoing back %u bytes of ping"
+ " data.\n", cmd->buf_ptr_size);
+
+ padding = ((-cmd->buf_ptr_size) & 3);
+ if (padding != 0) {
+ iov[niov].iov_base = &cmd->pad_bytes;
+ iov[niov++].iov_len = padding;
+ tx_size += padding;
+ pr_debug("Attaching %u additional"
+ " padding bytes.\n", padding);
+ }
+ if (conn->conn_ops->DataDigest) {
+ iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+ cmd->buf_ptr, cmd->buf_ptr_size,
+ padding, (u8 *)&cmd->pad_bytes,
+ (u8 *)&cmd->data_crc);
+
+ iov[niov].iov_base = &cmd->data_crc;
+ iov[niov++].iov_len = ISCSI_CRC_LEN;
+ tx_size += ISCSI_CRC_LEN;
+ pr_debug("Attached DataDigest for %u"
+ " bytes of ping data, CRC 0x%08x\n",
+ cmd->buf_ptr_size, cmd->data_crc);
+ }
+ }
+
+ cmd->iov_misc_count = niov;
+ cmd->tx_size = tx_size;
+
+ pr_debug("Sending NOPIN Response ITT: 0x%08x, TTT:"
+ " 0x%08x, StatSN: 0x%08x, Length %u\n", cmd->init_task_tag,
+ cmd->targ_xfer_tag, cmd->stat_sn, cmd->buf_ptr_size);
+
+ return 0;
+}
+
+int iscsit_send_r2t(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn *conn)
+{
+ int tx_size = 0;
+ struct iscsi_r2t *r2t;
+ struct iscsi_r2t_rsp *hdr;
+
+ r2t = iscsit_get_r2t_from_list(cmd);
+ if (!r2t)
+ return -1;
+
+ hdr = (struct iscsi_r2t_rsp *) cmd->pdu;
+ memset(hdr, 0, ISCSI_HDR_LEN);
+ hdr->opcode = ISCSI_OP_R2T;
+ hdr->flags |= ISCSI_FLAG_CMD_FINAL;
+ int_to_scsilun(cmd->se_cmd.orig_fe_lun,
+ (struct scsi_lun *)&hdr->lun);
+ hdr->itt = cpu_to_be32(cmd->init_task_tag);
+ spin_lock_bh(&conn->sess->ttt_lock);
+ r2t->targ_xfer_tag = conn->sess->targ_xfer_tag++;
+ if (r2t->targ_xfer_tag == 0xFFFFFFFF)
+ r2t->targ_xfer_tag = conn->sess->targ_xfer_tag++;
+ spin_unlock_bh(&conn->sess->ttt_lock);
+ hdr->ttt = cpu_to_be32(r2t->targ_xfer_tag);
+ hdr->statsn = cpu_to_be32(conn->stat_sn);
+ hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
+ hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+ hdr->r2tsn = cpu_to_be32(r2t->r2t_sn);
+ hdr->data_offset = cpu_to_be32(r2t->offset);
+ hdr->data_length = cpu_to_be32(r2t->xfer_len);
+
+ cmd->iov_misc[0].iov_base = cmd->pdu;
+ cmd->iov_misc[0].iov_len = ISCSI_HDR_LEN;
+ tx_size += ISCSI_HDR_LEN;
+
+ if (conn->conn_ops->HeaderDigest) {
+ u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+ iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+ (unsigned char *)hdr, ISCSI_HDR_LEN,
+ 0, NULL, (u8 *)header_digest);
+
+ cmd->iov_misc[0].iov_len += ISCSI_CRC_LEN;
+ tx_size += ISCSI_CRC_LEN;
+ pr_debug("Attaching CRC32 HeaderDigest for R2T"
+ " PDU 0x%08x\n", *header_digest);
+ }
+
+ pr_debug("Built %sR2T, ITT: 0x%08x, TTT: 0x%08x, StatSN:"
+ " 0x%08x, R2TSN: 0x%08x, Offset: %u, DDTL: %u, CID: %hu\n",
+ (!r2t->recovery_r2t) ? "" : "Recovery ", cmd->init_task_tag,
+ r2t->targ_xfer_tag, ntohl(hdr->statsn), r2t->r2t_sn,
+ r2t->offset, r2t->xfer_len, conn->cid);
+
+ cmd->iov_misc_count = 1;
+ cmd->tx_size = tx_size;
+
+ spin_lock_bh(&cmd->r2t_lock);
+ r2t->sent_r2t = 1;
+ spin_unlock_bh(&cmd->r2t_lock);
+
+ return 0;
+}
+
+/*
+ * type 0: Normal Operation.
+ * type 1: Called from Storage Transport.
+ * type 2: Called from iscsi_task_reassign_complete_write() for
+ * connection recovery.
+ */
+int iscsit_build_r2ts_for_cmd(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn *conn,
+ int type)
+{
+ int first_r2t = 1;
+ u32 offset = 0, xfer_len = 0;
+
+ spin_lock_bh(&cmd->r2t_lock);
+ if (cmd->cmd_flags & ICF_SENT_LAST_R2T) {
+ spin_unlock_bh(&cmd->r2t_lock);
+ return 0;
+ }
+
+ if (conn->sess->sess_ops->DataSequenceInOrder && (type != 2))
+ if (cmd->r2t_offset < cmd->write_data_done)
+ cmd->r2t_offset = cmd->write_data_done;
+
+ while (cmd->outstanding_r2ts < conn->sess->sess_ops->MaxOutstandingR2T) {
+ if (conn->sess->sess_ops->DataSequenceInOrder) {
+ offset = cmd->r2t_offset;
+
+ if (first_r2t && (type == 2)) {
+ xfer_len = ((offset +
+ (conn->sess->sess_ops->MaxBurstLength -
+ cmd->next_burst_len) >
+ cmd->data_length) ?
+ (cmd->data_length - offset) :
+ (conn->sess->sess_ops->MaxBurstLength -
+ cmd->next_burst_len));
+ } else {
+ xfer_len = ((offset +
+ conn->sess->sess_ops->MaxBurstLength) >
+ cmd->data_length) ?
+ (cmd->data_length - offset) :
+ conn->sess->sess_ops->MaxBurstLength;
+ }
+ cmd->r2t_offset += xfer_len;
+
+ if (cmd->r2t_offset == cmd->data_length)
+ cmd->cmd_flags |= ICF_SENT_LAST_R2T;
+ } else {
+ struct iscsi_seq *seq;
+
+ seq = iscsit_get_seq_holder_for_r2t(cmd);
+ if (!seq) {
+ spin_unlock_bh(&cmd->r2t_lock);
+ return -1;
+ }
+
+ offset = seq->offset;
+ xfer_len = seq->xfer_len;
+
+ if (cmd->seq_send_order == cmd->seq_count)
+ cmd->cmd_flags |= ICF_SENT_LAST_R2T;
+ }
+ cmd->outstanding_r2ts++;
+ first_r2t = 0;
+
+ if (iscsit_add_r2t_to_list(cmd, offset, xfer_len, 0, 0) < 0) {
+ spin_unlock_bh(&cmd->r2t_lock);
+ return -1;
+ }
+
+ if (cmd->cmd_flags & ICF_SENT_LAST_R2T)
+ break;
+ }
+ spin_unlock_bh(&cmd->r2t_lock);
+
+ return 0;
+}
+
+static int iscsit_send_status(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn *conn)
+{
+ u8 iov_count = 0, recovery;
+ u32 padding = 0, tx_size = 0;
+ struct iscsi_scsi_rsp *hdr;
+ struct kvec *iov;
+
+ recovery = (cmd->i_state != ISTATE_SEND_STATUS);
+ if (!recovery)
+ cmd->stat_sn = conn->stat_sn++;
+
+ spin_lock_bh(&conn->sess->session_stats_lock);
+ conn->sess->rsp_pdus++;
+ spin_unlock_bh(&conn->sess->session_stats_lock);
+
+ hdr = (struct iscsi_scsi_rsp *) cmd->pdu;
+ memset(hdr, 0, ISCSI_HDR_LEN);
+ hdr->opcode = ISCSI_OP_SCSI_CMD_RSP;
+ hdr->flags |= ISCSI_FLAG_CMD_FINAL;
+ if (cmd->se_cmd.se_cmd_flags & SCF_OVERFLOW_BIT) {
+ hdr->flags |= ISCSI_FLAG_CMD_OVERFLOW;
+ hdr->residual_count = cpu_to_be32(cmd->residual_count);
+ } else if (cmd->se_cmd.se_cmd_flags & SCF_UNDERFLOW_BIT) {
+ hdr->flags |= ISCSI_FLAG_CMD_UNDERFLOW;
+ hdr->residual_count = cpu_to_be32(cmd->residual_count);
+ }
+ hdr->response = cmd->iscsi_response;
+ hdr->cmd_status = cmd->se_cmd.scsi_status;
+ hdr->itt = cpu_to_be32(cmd->init_task_tag);
+ hdr->statsn = cpu_to_be32(cmd->stat_sn);
+
+ iscsit_increment_maxcmdsn(cmd, conn->sess);
+ hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
+ hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+
+ iov = &cmd->iov_misc[0];
+ iov[iov_count].iov_base = cmd->pdu;
+ iov[iov_count++].iov_len = ISCSI_HDR_LEN;
+ tx_size += ISCSI_HDR_LEN;
+
+ /*
+ * Attach SENSE DATA payload to iSCSI Response PDU
+ */
+ if (cmd->se_cmd.sense_buffer &&
+ ((cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ||
+ (cmd->se_cmd.se_cmd_flags & SCF_EMULATED_TASK_SENSE))) {
+ padding = -(cmd->se_cmd.scsi_sense_length) & 3;
+ hton24(hdr->dlength, cmd->se_cmd.scsi_sense_length);
+ iov[iov_count].iov_base = cmd->se_cmd.sense_buffer;
+ iov[iov_count++].iov_len =
+ (cmd->se_cmd.scsi_sense_length + padding);
+ tx_size += cmd->se_cmd.scsi_sense_length;
+
+ if (padding) {
+ memset(cmd->se_cmd.sense_buffer +
+ cmd->se_cmd.scsi_sense_length, 0, padding);
+ tx_size += padding;
+ pr_debug("Adding %u bytes of padding to"
+ " SENSE.\n", padding);
+ }
+
+ if (conn->conn_ops->DataDigest) {
+ iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+ cmd->se_cmd.sense_buffer,
+ (cmd->se_cmd.scsi_sense_length + padding),
+ 0, NULL, (u8 *)&cmd->data_crc);
+
+ iov[iov_count].iov_base = &cmd->data_crc;
+ iov[iov_count++].iov_len = ISCSI_CRC_LEN;
+ tx_size += ISCSI_CRC_LEN;
+
+ pr_debug("Attaching CRC32 DataDigest for"
+ " SENSE, %u bytes CRC 0x%08x\n",
+ (cmd->se_cmd.scsi_sense_length + padding),
+ cmd->data_crc);
+ }
+
+ pr_debug("Attaching SENSE DATA: %u bytes to iSCSI"
+ " Response PDU\n",
+ cmd->se_cmd.scsi_sense_length);
+ }
+
+ if (conn->conn_ops->HeaderDigest) {
+ u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+ iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+ (unsigned char *)hdr, ISCSI_HDR_LEN,
+ 0, NULL, (u8 *)header_digest);
+
+ iov[0].iov_len += ISCSI_CRC_LEN;
+ tx_size += ISCSI_CRC_LEN;
+ pr_debug("Attaching CRC32 HeaderDigest for Response"
+ " PDU 0x%08x\n", *header_digest);
+ }
+
+ cmd->iov_misc_count = iov_count;
+ cmd->tx_size = tx_size;
+
+ pr_debug("Built %sSCSI Response, ITT: 0x%08x, StatSN: 0x%08x,"
+ " Response: 0x%02x, SAM Status: 0x%02x, CID: %hu\n",
+ (!recovery) ? "" : "Recovery ", cmd->init_task_tag,
+ cmd->stat_sn, 0x00, cmd->se_cmd.scsi_status, conn->cid);
+
+ return 0;
+}
+
+static u8 iscsit_convert_tcm_tmr_rsp(struct se_tmr_req *se_tmr)
+{
+ switch (se_tmr->response) {
+ case TMR_FUNCTION_COMPLETE:
+ return ISCSI_TMF_RSP_COMPLETE;
+ case TMR_TASK_DOES_NOT_EXIST:
+ return ISCSI_TMF_RSP_NO_TASK;
+ case TMR_LUN_DOES_NOT_EXIST:
+ return ISCSI_TMF_RSP_NO_LUN;
+ case TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED:
+ return ISCSI_TMF_RSP_NOT_SUPPORTED;
+ case TMR_FUNCTION_AUTHORIZATION_FAILED:
+ return ISCSI_TMF_RSP_AUTH_FAILED;
+ case TMR_FUNCTION_REJECTED:
+ default:
+ return ISCSI_TMF_RSP_REJECTED;
+ }
+}
+
+static int iscsit_send_task_mgt_rsp(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn *conn)
+{
+ struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req;
+ struct iscsi_tm_rsp *hdr;
+ u32 tx_size = 0;
+
+ hdr = (struct iscsi_tm_rsp *) cmd->pdu;
+ memset(hdr, 0, ISCSI_HDR_LEN);
+ hdr->opcode = ISCSI_OP_SCSI_TMFUNC_RSP;
+ hdr->response = iscsit_convert_tcm_tmr_rsp(se_tmr);
+ hdr->itt = cpu_to_be32(cmd->init_task_tag);
+ cmd->stat_sn = conn->stat_sn++;
+ hdr->statsn = cpu_to_be32(cmd->stat_sn);
+
+ iscsit_increment_maxcmdsn(cmd, conn->sess);
+ hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
+ hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+
+ cmd->iov_misc[0].iov_base = cmd->pdu;
+ cmd->iov_misc[0].iov_len = ISCSI_HDR_LEN;
+ tx_size += ISCSI_HDR_LEN;
+
+ if (conn->conn_ops->HeaderDigest) {
+ u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+ iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+ (unsigned char *)hdr, ISCSI_HDR_LEN,
+ 0, NULL, (u8 *)header_digest);
+
+ cmd->iov_misc[0].iov_len += ISCSI_CRC_LEN;
+ tx_size += ISCSI_CRC_LEN;
+ pr_debug("Attaching CRC32 HeaderDigest for Task"
+ " Mgmt Response PDU 0x%08x\n", *header_digest);
+ }
+
+ cmd->iov_misc_count = 1;
+ cmd->tx_size = tx_size;
+
+ pr_debug("Built Task Management Response ITT: 0x%08x,"
+ " StatSN: 0x%08x, Response: 0x%02x, CID: %hu\n",
+ cmd->init_task_tag, cmd->stat_sn, hdr->response, conn->cid);
+
+ return 0;
+}
+
+static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd)
+{
+ char *payload = NULL;
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_portal_group *tpg;
+ struct iscsi_tiqn *tiqn;
+ struct iscsi_tpg_np *tpg_np;
+ int buffer_len, end_of_buf = 0, len = 0, payload_len = 0;
+ unsigned char buf[256];
+
+ buffer_len = (conn->conn_ops->MaxRecvDataSegmentLength > 32768) ?
+ 32768 : conn->conn_ops->MaxRecvDataSegmentLength;
+
+ memset(buf, 0, 256);
+
+ payload = kzalloc(buffer_len, GFP_KERNEL);
+ if (!payload) {
+ pr_err("Unable to allocate memory for sendtargets"
+ " response.\n");
+ return -ENOMEM;
+ }
+
+ spin_lock(&tiqn_lock);
+ list_for_each_entry(tiqn, &g_tiqn_list, tiqn_list) {
+ len = sprintf(buf, "TargetName=%s", tiqn->tiqn);
+ len += 1;
+
+ if ((len + payload_len) > buffer_len) {
+ spin_unlock(&tiqn->tiqn_tpg_lock);
+ end_of_buf = 1;
+ goto eob;
+ }
+ memcpy((void *)payload + payload_len, buf, len);
+ payload_len += len;
+
+ spin_lock(&tiqn->tiqn_tpg_lock);
+ list_for_each_entry(tpg, &tiqn->tiqn_tpg_list, tpg_list) {
+
+ spin_lock(&tpg->tpg_state_lock);
+ if ((tpg->tpg_state == TPG_STATE_FREE) ||
+ (tpg->tpg_state == TPG_STATE_INACTIVE)) {
+ spin_unlock(&tpg->tpg_state_lock);
+ continue;
+ }
+ spin_unlock(&tpg->tpg_state_lock);
+
+ spin_lock(&tpg->tpg_np_lock);
+ list_for_each_entry(tpg_np, &tpg->tpg_gnp_list,
+ tpg_np_list) {
+ len = sprintf(buf, "TargetAddress="
+ "%s%s%s:%hu,%hu",
+ (tpg_np->tpg_np->np_sockaddr.ss_family == AF_INET6) ?
+ "[" : "", tpg_np->tpg_np->np_ip,
+ (tpg_np->tpg_np->np_sockaddr.ss_family == AF_INET6) ?
+ "]" : "", tpg_np->tpg_np->np_port,
+ tpg->tpgt);
+ len += 1;
+
+ if ((len + payload_len) > buffer_len) {
+ spin_unlock(&tpg->tpg_np_lock);
+ spin_unlock(&tiqn->tiqn_tpg_lock);
+ end_of_buf = 1;
+ goto eob;
+ }
+ memcpy((void *)payload + payload_len, buf, len);
+ payload_len += len;
+ }
+ spin_unlock(&tpg->tpg_np_lock);
+ }
+ spin_unlock(&tiqn->tiqn_tpg_lock);
+eob:
+ if (end_of_buf)
+ break;
+ }
+ spin_unlock(&tiqn_lock);
+
+ cmd->buf_ptr = payload;
+
+ return payload_len;
+}
+
+/*
+ * FIXME: Add support for F_BIT and C_BIT when the length is longer than
+ * MaxRecvDataSegmentLength.
+ */
+static int iscsit_send_text_rsp(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn *conn)
+{
+ struct iscsi_text_rsp *hdr;
+ struct kvec *iov;
+ u32 padding = 0, tx_size = 0;
+ int text_length, iov_count = 0;
+
+ text_length = iscsit_build_sendtargets_response(cmd);
+ if (text_length < 0)
+ return text_length;
+
+ padding = ((-text_length) & 3);
+ if (padding != 0) {
+ memset(cmd->buf_ptr + text_length, 0, padding);
+ pr_debug("Attaching %u additional bytes for"
+ " padding.\n", padding);
+ }
+
+ hdr = (struct iscsi_text_rsp *) cmd->pdu;
+ memset(hdr, 0, ISCSI_HDR_LEN);
+ hdr->opcode = ISCSI_OP_TEXT_RSP;
+ hdr->flags |= ISCSI_FLAG_CMD_FINAL;
+ hton24(hdr->dlength, text_length);
+ hdr->itt = cpu_to_be32(cmd->init_task_tag);
+ hdr->ttt = cpu_to_be32(cmd->targ_xfer_tag);
+ cmd->stat_sn = conn->stat_sn++;
+ hdr->statsn = cpu_to_be32(cmd->stat_sn);
+
+ iscsit_increment_maxcmdsn(cmd, conn->sess);
+ hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
+ hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+
+ iov = &cmd->iov_misc[0];
+
+ iov[iov_count].iov_base = cmd->pdu;
+ iov[iov_count++].iov_len = ISCSI_HDR_LEN;
+ iov[iov_count].iov_base = cmd->buf_ptr;
+ iov[iov_count++].iov_len = text_length + padding;
+
+ tx_size += (ISCSI_HDR_LEN + text_length + padding);
+
+ if (conn->conn_ops->HeaderDigest) {
+ u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+ iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+ (unsigned char *)hdr, ISCSI_HDR_LEN,
+ 0, NULL, (u8 *)header_digest);
+
+ iov[0].iov_len += ISCSI_CRC_LEN;
+ tx_size += ISCSI_CRC_LEN;
+ pr_debug("Attaching CRC32 HeaderDigest for"
+ " Text Response PDU 0x%08x\n", *header_digest);
+ }
+
+ if (conn->conn_ops->DataDigest) {
+ iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+ cmd->buf_ptr, (text_length + padding),
+ 0, NULL, (u8 *)&cmd->data_crc);
+
+ iov[iov_count].iov_base = &cmd->data_crc;
+ iov[iov_count++].iov_len = ISCSI_CRC_LEN;
+ tx_size += ISCSI_CRC_LEN;
+
+ pr_debug("Attaching DataDigest for %u bytes of text"
+ " data, CRC 0x%08x\n", (text_length + padding),
+ cmd->data_crc);
+ }
+
+ cmd->iov_misc_count = iov_count;
+ cmd->tx_size = tx_size;
+
+ pr_debug("Built Text Response: ITT: 0x%08x, StatSN: 0x%08x,"
+ " Length: %u, CID: %hu\n", cmd->init_task_tag, cmd->stat_sn,
+ text_length, conn->cid);
+ return 0;
+}
+
+static int iscsit_send_reject(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn *conn)
+{
+ u32 iov_count = 0, tx_size = 0;
+ struct iscsi_reject *hdr;
+ struct kvec *iov;
+
+ hdr = (struct iscsi_reject *) cmd->pdu;
+ hdr->opcode = ISCSI_OP_REJECT;
+ hdr->flags |= ISCSI_FLAG_CMD_FINAL;
+ hton24(hdr->dlength, ISCSI_HDR_LEN);
+ cmd->stat_sn = conn->stat_sn++;
+ hdr->statsn = cpu_to_be32(cmd->stat_sn);
+ hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
+ hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+
+ iov = &cmd->iov_misc[0];
+
+ iov[iov_count].iov_base = cmd->pdu;
+ iov[iov_count++].iov_len = ISCSI_HDR_LEN;
+ iov[iov_count].iov_base = cmd->buf_ptr;
+ iov[iov_count++].iov_len = ISCSI_HDR_LEN;
+
+ tx_size = (ISCSI_HDR_LEN + ISCSI_HDR_LEN);
+
+ if (conn->conn_ops->HeaderDigest) {
+ u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
+
+ iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+ (unsigned char *)hdr, ISCSI_HDR_LEN,
+ 0, NULL, (u8 *)header_digest);
+
+ iov[0].iov_len += ISCSI_CRC_LEN;
+ tx_size += ISCSI_CRC_LEN;
+ pr_debug("Attaching CRC32 HeaderDigest for"
+ " REJECT PDU 0x%08x\n", *header_digest);
+ }
+
+ if (conn->conn_ops->DataDigest) {
+ iscsit_do_crypto_hash_buf(&conn->conn_tx_hash,
+ (unsigned char *)cmd->buf_ptr, ISCSI_HDR_LEN,
+ 0, NULL, (u8 *)&cmd->data_crc);
+
+ iov[iov_count].iov_base = &cmd->data_crc;
+ iov[iov_count++].iov_len = ISCSI_CRC_LEN;
+ tx_size += ISCSI_CRC_LEN;
+ pr_debug("Attaching CRC32 DataDigest for REJECT"
+ " PDU 0x%08x\n", cmd->data_crc);
+ }
+
+ cmd->iov_misc_count = iov_count;
+ cmd->tx_size = tx_size;
+
+ pr_debug("Built Reject PDU StatSN: 0x%08x, Reason: 0x%02x,"
+ " CID: %hu\n", ntohl(hdr->statsn), hdr->reason, conn->cid);
+
+ return 0;
+}
+
+static void iscsit_tx_thread_wait_for_tcp(struct iscsi_conn *conn)
+{
+ if ((conn->sock->sk->sk_shutdown & SEND_SHUTDOWN) ||
+ (conn->sock->sk->sk_shutdown & RCV_SHUTDOWN)) {
+ wait_for_completion_interruptible_timeout(
+ &conn->tx_half_close_comp,
+ ISCSI_TX_THREAD_TCP_TIMEOUT * HZ);
+ }
+}
+
+#ifdef CONFIG_SMP
+
+void iscsit_thread_get_cpumask(struct iscsi_conn *conn)
+{
+ struct iscsi_thread_set *ts = conn->thread_set;
+ int ord, cpu;
+ /*
+ * thread_id is assigned from iscsit_global->ts_bitmap from
+ * within iscsi_thread_set.c:iscsi_allocate_thread_sets()
+ *
+ * Here we use thread_id to determine which CPU that this
+ * iSCSI connection's iscsi_thread_set will be scheduled to
+ * execute upon.
+ */
+ ord = ts->thread_id % cpumask_weight(cpu_online_mask);
+#if 0
+ pr_debug(">>>>>>>>>>>>>>>>>>>> Generated ord: %d from"
+ " thread_id: %d\n", ord, ts->thread_id);
+#endif
+ for_each_online_cpu(cpu) {
+ if (ord-- == 0) {
+ cpumask_set_cpu(cpu, conn->conn_cpumask);
+ return;
+ }
+ }
+ /*
+ * This should never be reached..
+ */
+ dump_stack();
+ cpumask_setall(conn->conn_cpumask);
+}
+
+static inline void iscsit_thread_check_cpumask(
+ struct iscsi_conn *conn,
+ struct task_struct *p,
+ int mode)
+{
+ char buf[128];
+ /*
+ * mode == 1 signals iscsi_target_tx_thread() usage.
+ * mode == 0 signals iscsi_target_rx_thread() usage.
+ */
+ if (mode == 1) {
+ if (!conn->conn_tx_reset_cpumask)
+ return;
+ conn->conn_tx_reset_cpumask = 0;
+ } else {
+ if (!conn->conn_rx_reset_cpumask)
+ return;
+ conn->conn_rx_reset_cpumask = 0;
+ }
+ /*
+ * Update the CPU mask for this single kthread so that
+ * both TX and RX kthreads are scheduled to run on the
+ * same CPU.
+ */
+ memset(buf, 0, 128);
+ cpumask_scnprintf(buf, 128, conn->conn_cpumask);
+#if 0
+ pr_debug(">>>>>>>>>>>>>> Calling set_cpus_allowed_ptr():"
+ " %s for %s\n", buf, p->comm);
+#endif
+ set_cpus_allowed_ptr(p, conn->conn_cpumask);
+}
+
+#else
+#define iscsit_thread_get_cpumask(X) ({})
+#define iscsit_thread_check_cpumask(X, Y, Z) ({})
+#endif /* CONFIG_SMP */
+
+int iscsi_target_tx_thread(void *arg)
+{
+ u8 state;
+ int eodr = 0;
+ int ret = 0;
+ int sent_status = 0;
+ int use_misc = 0;
+ int map_sg = 0;
+ struct iscsi_cmd *cmd = NULL;
+ struct iscsi_conn *conn;
+ struct iscsi_queue_req *qr = NULL;
+ struct se_cmd *se_cmd;
+ struct iscsi_thread_set *ts = (struct iscsi_thread_set *)arg;
+ /*
+ * Allow ourselves to be interrupted by SIGINT so that a
+ * connection recovery / failure event can be triggered externally.
+ */
+ allow_signal(SIGINT);
+
+restart:
+ conn = iscsi_tx_thread_pre_handler(ts);
+ if (!conn)
+ goto out;
+
+ eodr = map_sg = ret = sent_status = use_misc = 0;
+
+ while (!kthread_should_stop()) {
+ /*
+ * Ensure that both TX and RX per connection kthreads
+ * are scheduled to run on the same CPU.
+ */
+ iscsit_thread_check_cpumask(conn, current, 1);
+
+ schedule_timeout_interruptible(MAX_SCHEDULE_TIMEOUT);
+
+ if ((ts->status == ISCSI_THREAD_SET_RESET) ||
+ signal_pending(current))
+ goto transport_err;
+
+get_immediate:
+ qr = iscsit_get_cmd_from_immediate_queue(conn);
+ if (qr) {
+ atomic_set(&conn->check_immediate_queue, 0);
+ cmd = qr->cmd;
+ state = qr->state;
+ kmem_cache_free(lio_qr_cache, qr);
+
+ spin_lock_bh(&cmd->istate_lock);
+ switch (state) {
+ case ISTATE_SEND_R2T:
+ spin_unlock_bh(&cmd->istate_lock);
+ ret = iscsit_send_r2t(cmd, conn);
+ break;
+ case ISTATE_REMOVE:
+ spin_unlock_bh(&cmd->istate_lock);
+
+ if (cmd->data_direction == DMA_TO_DEVICE)
+ iscsit_stop_dataout_timer(cmd);
+
+ spin_lock_bh(&conn->cmd_lock);
+ list_del(&cmd->i_list);
+ spin_unlock_bh(&conn->cmd_lock);
+ /*
+ * Determine if a struct se_cmd is assoicated with
+ * this struct iscsi_cmd.
+ */
+ if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) &&
+ !(cmd->tmr_req))
+ iscsit_release_cmd(cmd);
+ else
+ transport_generic_free_cmd(&cmd->se_cmd,
+ 1, 0);
+ goto get_immediate;
+ case ISTATE_SEND_NOPIN_WANT_RESPONSE:
+ spin_unlock_bh(&cmd->istate_lock);
+ iscsit_mod_nopin_response_timer(conn);
+ ret = iscsit_send_unsolicited_nopin(cmd,
+ conn, 1);
+ break;
+ case ISTATE_SEND_NOPIN_NO_RESPONSE:
+ spin_unlock_bh(&cmd->istate_lock);
+ ret = iscsit_send_unsolicited_nopin(cmd,
+ conn, 0);
+ break;
+ default:
+ pr_err("Unknown Opcode: 0x%02x ITT:"
+ " 0x%08x, i_state: %d on CID: %hu\n",
+ cmd->iscsi_opcode, cmd->init_task_tag, state,
+ conn->cid);
+ spin_unlock_bh(&cmd->istate_lock);
+ goto transport_err;
+ }
+ if (ret < 0) {
+ conn->tx_immediate_queue = 0;
+ goto transport_err;
+ }
+
+ if (iscsit_send_tx_data(cmd, conn, 1) < 0) {
+ conn->tx_immediate_queue = 0;
+ iscsit_tx_thread_wait_for_tcp(conn);
+ goto transport_err;
+ }
+
+ spin_lock_bh(&cmd->istate_lock);
+ switch (state) {
+ case ISTATE_SEND_R2T:
+ spin_unlock_bh(&cmd->istate_lock);
+ spin_lock_bh(&cmd->dataout_timeout_lock);
+ iscsit_start_dataout_timer(cmd, conn);
+ spin_unlock_bh(&cmd->dataout_timeout_lock);
+ break;
+ case ISTATE_SEND_NOPIN_WANT_RESPONSE:
+ cmd->i_state = ISTATE_SENT_NOPIN_WANT_RESPONSE;
+ spin_unlock_bh(&cmd->istate_lock);
+ break;
+ case ISTATE_SEND_NOPIN_NO_RESPONSE:
+ cmd->i_state = ISTATE_SENT_STATUS;
+ spin_unlock_bh(&cmd->istate_lock);
+ break;
+ default:
+ pr_err("Unknown Opcode: 0x%02x ITT:"
+ " 0x%08x, i_state: %d on CID: %hu\n",
+ cmd->iscsi_opcode, cmd->init_task_tag,
+ state, conn->cid);
+ spin_unlock_bh(&cmd->istate_lock);
+ goto transport_err;
+ }
+ goto get_immediate;
+ } else
+ conn->tx_immediate_queue = 0;
+
+get_response:
+ qr = iscsit_get_cmd_from_response_queue(conn);
+ if (qr) {
+ cmd = qr->cmd;
+ state = qr->state;
+ kmem_cache_free(lio_qr_cache, qr);
+
+ spin_lock_bh(&cmd->istate_lock);
+check_rsp_state:
+ switch (state) {
+ case ISTATE_SEND_DATAIN:
+ spin_unlock_bh(&cmd->istate_lock);
+ ret = iscsit_send_data_in(cmd, conn,
+ &eodr);
+ map_sg = 1;
+ break;
+ case ISTATE_SEND_STATUS:
+ case ISTATE_SEND_STATUS_RECOVERY:
+ spin_unlock_bh(&cmd->istate_lock);
+ use_misc = 1;
+ ret = iscsit_send_status(cmd, conn);
+ break;
+ case ISTATE_SEND_LOGOUTRSP:
+ spin_unlock_bh(&cmd->istate_lock);
+ use_misc = 1;
+ ret = iscsit_send_logout_response(cmd, conn);
+ break;
+ case ISTATE_SEND_ASYNCMSG:
+ spin_unlock_bh(&cmd->istate_lock);
+ use_misc = 1;
+ ret = iscsit_send_conn_drop_async_message(
+ cmd, conn);
+ break;
+ case ISTATE_SEND_NOPIN:
+ spin_unlock_bh(&cmd->istate_lock);
+ use_misc = 1;
+ ret = iscsit_send_nopin_response(cmd, conn);
+ break;
+ case ISTATE_SEND_REJECT:
+ spin_unlock_bh(&cmd->istate_lock);
+ use_misc = 1;
+ ret = iscsit_send_reject(cmd, conn);
+ break;
+ case ISTATE_SEND_TASKMGTRSP:
+ spin_unlock_bh(&cmd->istate_lock);
+ use_misc = 1;
+ ret = iscsit_send_task_mgt_rsp(cmd, conn);
+ if (ret != 0)
+ break;
+ ret = iscsit_tmr_post_handler(cmd, conn);
+ if (ret != 0)
+ iscsit_fall_back_to_erl0(conn->sess);
+ break;
+ case ISTATE_SEND_TEXTRSP:
+ spin_unlock_bh(&cmd->istate_lock);
+ use_misc = 1;
+ ret = iscsit_send_text_rsp(cmd, conn);
+ break;
+ default:
+ pr_err("Unknown Opcode: 0x%02x ITT:"
+ " 0x%08x, i_state: %d on CID: %hu\n",
+ cmd->iscsi_opcode, cmd->init_task_tag,
+ state, conn->cid);
+ spin_unlock_bh(&cmd->istate_lock);
+ goto transport_err;
+ }
+ if (ret < 0) {
+ conn->tx_response_queue = 0;
+ goto transport_err;
+ }
+
+ se_cmd = &cmd->se_cmd;
+
+ if (map_sg && !conn->conn_ops->IFMarker) {
+ if (iscsit_fe_sendpage_sg(cmd, conn) < 0) {
+ conn->tx_response_queue = 0;
+ iscsit_tx_thread_wait_for_tcp(conn);
+ iscsit_unmap_iovec(cmd);
+ goto transport_err;
+ }
+ } else {
+ if (iscsit_send_tx_data(cmd, conn, use_misc) < 0) {
+ conn->tx_response_queue = 0;
+ iscsit_tx_thread_wait_for_tcp(conn);
+ iscsit_unmap_iovec(cmd);
+ goto transport_err;
+ }
+ }
+ map_sg = 0;
+ iscsit_unmap_iovec(cmd);
+
+ spin_lock_bh(&cmd->istate_lock);
+ switch (state) {
+ case ISTATE_SEND_DATAIN:
+ if (!eodr)
+ goto check_rsp_state;
+
+ if (eodr == 1) {
+ cmd->i_state = ISTATE_SENT_LAST_DATAIN;
+ sent_status = 1;
+ eodr = use_misc = 0;
+ } else if (eodr == 2) {
+ cmd->i_state = state =
+ ISTATE_SEND_STATUS;
+ sent_status = 0;
+ eodr = use_misc = 0;
+ goto check_rsp_state;
+ }
+ break;
+ case ISTATE_SEND_STATUS:
+ use_misc = 0;
+ sent_status = 1;
+ break;
+ case ISTATE_SEND_ASYNCMSG:
+ case ISTATE_SEND_NOPIN:
+ case ISTATE_SEND_STATUS_RECOVERY:
+ case ISTATE_SEND_TEXTRSP:
+ use_misc = 0;
+ sent_status = 1;
+ break;
+ case ISTATE_SEND_REJECT:
+ use_misc = 0;
+ if (cmd->cmd_flags & ICF_REJECT_FAIL_CONN) {
+ cmd->cmd_flags &= ~ICF_REJECT_FAIL_CONN;
+ spin_unlock_bh(&cmd->istate_lock);
+ complete(&cmd->reject_comp);
+ goto transport_err;
+ }
+ complete(&cmd->reject_comp);
+ break;
+ case ISTATE_SEND_TASKMGTRSP:
+ use_misc = 0;
+ sent_status = 1;
+ break;
+ case ISTATE_SEND_LOGOUTRSP:
+ spin_unlock_bh(&cmd->istate_lock);
+ if (!iscsit_logout_post_handler(cmd, conn))
+ goto restart;
+ spin_lock_bh(&cmd->istate_lock);
+ use_misc = 0;
+ sent_status = 1;
+ break;
+ default:
+ pr_err("Unknown Opcode: 0x%02x ITT:"
+ " 0x%08x, i_state: %d on CID: %hu\n",
+ cmd->iscsi_opcode, cmd->init_task_tag,
+ cmd->i_state, conn->cid);
+ spin_unlock_bh(&cmd->istate_lock);
+ goto transport_err;
+ }
+
+ if (sent_status) {
+ cmd->i_state = ISTATE_SENT_STATUS;
+ sent_status = 0;
+ }
+ spin_unlock_bh(&cmd->istate_lock);
+
+ if (atomic_read(&conn->check_immediate_queue))
+ goto get_immediate;
+
+ goto get_response;
+ } else
+ conn->tx_response_queue = 0;
+ }
+
+transport_err:
+ iscsit_take_action_for_connection_exit(conn);
+ goto restart;
+out:
+ return 0;
+}
+
+int iscsi_target_rx_thread(void *arg)
+{
+ int ret;
+ u8 buffer[ISCSI_HDR_LEN], opcode;
+ u32 checksum = 0, digest = 0;
+ struct iscsi_conn *conn = NULL;
+ struct iscsi_thread_set *ts = (struct iscsi_thread_set *)arg;
+ struct kvec iov;
+ /*
+ * Allow ourselves to be interrupted by SIGINT so that a
+ * connection recovery / failure event can be triggered externally.
+ */
+ allow_signal(SIGINT);
+
+restart:
+ conn = iscsi_rx_thread_pre_handler(ts);
+ if (!conn)
+ goto out;
+
+ while (!kthread_should_stop()) {
+ /*
+ * Ensure that both TX and RX per connection kthreads
+ * are scheduled to run on the same CPU.
+ */
+ iscsit_thread_check_cpumask(conn, current, 0);
+
+ memset(buffer, 0, ISCSI_HDR_LEN);
+ memset(&iov, 0, sizeof(struct kvec));
+
+ iov.iov_base = buffer;
+ iov.iov_len = ISCSI_HDR_LEN;
+
+ ret = rx_data(conn, &iov, 1, ISCSI_HDR_LEN);
+ if (ret != ISCSI_HDR_LEN) {
+ iscsit_rx_thread_wait_for_tcp(conn);
+ goto transport_err;
+ }
+
+ /*
+ * Set conn->bad_hdr for use with REJECT PDUs.
+ */
+ memcpy(&conn->bad_hdr, &buffer, ISCSI_HDR_LEN);
+
+ if (conn->conn_ops->HeaderDigest) {
+ iov.iov_base = &digest;
+ iov.iov_len = ISCSI_CRC_LEN;
+
+ ret = rx_data(conn, &iov, 1, ISCSI_CRC_LEN);
+ if (ret != ISCSI_CRC_LEN) {
+ iscsit_rx_thread_wait_for_tcp(conn);
+ goto transport_err;
+ }
+
+ iscsit_do_crypto_hash_buf(&conn->conn_rx_hash,
+ buffer, ISCSI_HDR_LEN,
+ 0, NULL, (u8 *)&checksum);
+
+ if (digest != checksum) {
+ pr_err("HeaderDigest CRC32C failed,"
+ " received 0x%08x, computed 0x%08x\n",
+ digest, checksum);
+ /*
+ * Set the PDU to 0xff so it will intentionally
+ * hit default in the switch below.
+ */
+ memset(buffer, 0xff, ISCSI_HDR_LEN);
+ spin_lock_bh(&conn->sess->session_stats_lock);
+ conn->sess->conn_digest_errors++;
+ spin_unlock_bh(&conn->sess->session_stats_lock);
+ } else {
+ pr_debug("Got HeaderDigest CRC32C"
+ " 0x%08x\n", checksum);
+ }
+ }
+
+ if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT)
+ goto transport_err;
+
+ opcode = buffer[0] & ISCSI_OPCODE_MASK;
+
+ if (conn->sess->sess_ops->SessionType &&
+ ((!(opcode & ISCSI_OP_TEXT)) ||
+ (!(opcode & ISCSI_OP_LOGOUT)))) {
+ pr_err("Received illegal iSCSI Opcode: 0x%02x"
+ " while in Discovery Session, rejecting.\n", opcode);
+ iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1,
+ buffer, conn);
+ goto transport_err;
+ }
+
+ switch (opcode) {
+ case ISCSI_OP_SCSI_CMD:
+ if (iscsit_handle_scsi_cmd(conn, buffer) < 0)
+ goto transport_err;
+ break;
+ case ISCSI_OP_SCSI_DATA_OUT:
+ if (iscsit_handle_data_out(conn, buffer) < 0)
+ goto transport_err;
+ break;
+ case ISCSI_OP_NOOP_OUT:
+ if (iscsit_handle_nop_out(conn, buffer) < 0)
+ goto transport_err;
+ break;
+ case ISCSI_OP_SCSI_TMFUNC:
+ if (iscsit_handle_task_mgt_cmd(conn, buffer) < 0)
+ goto transport_err;
+ break;
+ case ISCSI_OP_TEXT:
+ if (iscsit_handle_text_cmd(conn, buffer) < 0)
+ goto transport_err;
+ break;
+ case ISCSI_OP_LOGOUT:
+ ret = iscsit_handle_logout_cmd(conn, buffer);
+ if (ret > 0) {
+ wait_for_completion_timeout(&conn->conn_logout_comp,
+ SECONDS_FOR_LOGOUT_COMP * HZ);
+ goto transport_err;
+ } else if (ret < 0)
+ goto transport_err;
+ break;
+ case ISCSI_OP_SNACK:
+ if (iscsit_handle_snack(conn, buffer) < 0)
+ goto transport_err;
+ break;
+ default:
+ pr_err("Got unknown iSCSI OpCode: 0x%02x\n",
+ opcode);
+ if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+ pr_err("Cannot recover from unknown"
+ " opcode while ERL=0, closing iSCSI connection"
+ ".\n");
+ goto transport_err;
+ }
+ if (!conn->conn_ops->OFMarker) {
+ pr_err("Unable to recover from unknown"
+ " opcode while OFMarker=No, closing iSCSI"
+ " connection.\n");
+ goto transport_err;
+ }
+ if (iscsit_recover_from_unknown_opcode(conn) < 0) {
+ pr_err("Unable to recover from unknown"
+ " opcode, closing iSCSI connection.\n");
+ goto transport_err;
+ }
+ break;
+ }
+ }
+
+transport_err:
+ if (!signal_pending(current))
+ atomic_set(&conn->transport_failed, 1);
+ iscsit_take_action_for_connection_exit(conn);
+ goto restart;
+out:
+ return 0;
+}
+
+static void iscsit_release_commands_from_conn(struct iscsi_conn *conn)
+{
+ struct iscsi_cmd *cmd = NULL, *cmd_tmp = NULL;
+ struct iscsi_session *sess = conn->sess;
+ struct se_cmd *se_cmd;
+ /*
+ * We expect this function to only ever be called from either RX or TX
+ * thread context via iscsit_close_connection() once the other context
+ * has been reset -> returned sleeping pre-handler state.
+ */
+ spin_lock_bh(&conn->cmd_lock);
+ list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_list) {
+ if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD)) {
+
+ list_del(&cmd->i_list);
+ spin_unlock_bh(&conn->cmd_lock);
+ iscsit_increment_maxcmdsn(cmd, sess);
+ se_cmd = &cmd->se_cmd;
+ /*
+ * Special cases for active iSCSI TMR, and
+ * transport_lookup_cmd_lun() failing from
+ * iscsit_get_lun_for_cmd() in iscsit_handle_scsi_cmd().
+ */
+ if (cmd->tmr_req && se_cmd->transport_wait_for_tasks)
+ se_cmd->transport_wait_for_tasks(se_cmd, 1, 1);
+ else if (cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD)
+ transport_release_cmd(se_cmd);
+ else
+ iscsit_release_cmd(cmd);
+
+ spin_lock_bh(&conn->cmd_lock);
+ continue;
+ }
+ list_del(&cmd->i_list);
+ spin_unlock_bh(&conn->cmd_lock);
+
+ iscsit_increment_maxcmdsn(cmd, sess);
+ se_cmd = &cmd->se_cmd;
+
+ if (se_cmd->transport_wait_for_tasks)
+ se_cmd->transport_wait_for_tasks(se_cmd, 1, 1);
+
+ spin_lock_bh(&conn->cmd_lock);
+ }
+ spin_unlock_bh(&conn->cmd_lock);
+}
+
+static void iscsit_stop_timers_for_cmds(
+ struct iscsi_conn *conn)
+{
+ struct iscsi_cmd *cmd;
+
+ spin_lock_bh(&conn->cmd_lock);
+ list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) {
+ if (cmd->data_direction == DMA_TO_DEVICE)
+ iscsit_stop_dataout_timer(cmd);
+ }
+ spin_unlock_bh(&conn->cmd_lock);
+}
+
+int iscsit_close_connection(
+ struct iscsi_conn *conn)
+{
+ int conn_logout = (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT);
+ struct iscsi_session *sess = conn->sess;
+
+ pr_debug("Closing iSCSI connection CID %hu on SID:"
+ " %u\n", conn->cid, sess->sid);
+ /*
+ * Always up conn_logout_comp just in case the RX Thread is sleeping
+ * and the logout response never got sent because the connection
+ * failed.
+ */
+ complete(&conn->conn_logout_comp);
+
+ iscsi_release_thread_set(conn);
+
+ iscsit_stop_timers_for_cmds(conn);
+ iscsit_stop_nopin_response_timer(conn);
+ iscsit_stop_nopin_timer(conn);
+ iscsit_free_queue_reqs_for_conn(conn);
+
+ /*
+ * During Connection recovery drop unacknowledged out of order
+ * commands for this connection, and prepare the other commands
+ * for realligence.
+ *
+ * During normal operation clear the out of order commands (but
+ * do not free the struct iscsi_ooo_cmdsn's) and release all
+ * struct iscsi_cmds.
+ */
+ if (atomic_read(&conn->connection_recovery)) {
+ iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(conn);
+ iscsit_prepare_cmds_for_realligance(conn);
+ } else {
+ iscsit_clear_ooo_cmdsns_for_conn(conn);
+ iscsit_release_commands_from_conn(conn);
+ }
+
+ /*
+ * Handle decrementing session or connection usage count if
+ * a logout response was not able to be sent because the
+ * connection failed. Fall back to Session Recovery here.
+ */
+ if (atomic_read(&conn->conn_logout_remove)) {
+ if (conn->conn_logout_reason == ISCSI_LOGOUT_REASON_CLOSE_SESSION) {
+ iscsit_dec_conn_usage_count(conn);
+ iscsit_dec_session_usage_count(sess);
+ }
+ if (conn->conn_logout_reason == ISCSI_LOGOUT_REASON_CLOSE_CONNECTION)
+ iscsit_dec_conn_usage_count(conn);
+
+ atomic_set(&conn->conn_logout_remove, 0);
+ atomic_set(&sess->session_reinstatement, 0);
+ atomic_set(&sess->session_fall_back_to_erl0, 1);
+ }
+
+ spin_lock_bh(&sess->conn_lock);
+ list_del(&conn->conn_list);
+
+ /*
+ * Attempt to let the Initiator know this connection failed by
+ * sending an Connection Dropped Async Message on another
+ * active connection.
+ */
+ if (atomic_read(&conn->connection_recovery))
+ iscsit_build_conn_drop_async_message(conn);
+
+ spin_unlock_bh(&sess->conn_lock);
+
+ /*
+ * If connection reinstatement is being performed on this connection,
+ * up the connection reinstatement semaphore that is being blocked on
+ * in iscsit_cause_connection_reinstatement().
+ */
+ spin_lock_bh(&conn->state_lock);
+ if (atomic_read(&conn->sleep_on_conn_wait_comp)) {
+ spin_unlock_bh(&conn->state_lock);
+ complete(&conn->conn_wait_comp);
+ wait_for_completion(&conn->conn_post_wait_comp);
+ spin_lock_bh(&conn->state_lock);
+ }
+
+ /*
+ * If connection reinstatement is being performed on this connection
+ * by receiving a REMOVECONNFORRECOVERY logout request, up the
+ * connection wait rcfr semaphore that is being blocked on
+ * an iscsit_connection_reinstatement_rcfr().
+ */
+ if (atomic_read(&conn->connection_wait_rcfr)) {
+ spin_unlock_bh(&conn->state_lock);
+ complete(&conn->conn_wait_rcfr_comp);
+ wait_for_completion(&conn->conn_post_wait_comp);
+ spin_lock_bh(&conn->state_lock);
+ }
+ atomic_set(&conn->connection_reinstatement, 1);
+ spin_unlock_bh(&conn->state_lock);
+
+ /*
+ * If any other processes are accessing this connection pointer we
+ * must wait until they have completed.
+ */
+ iscsit_check_conn_usage_count(conn);
+
+ if (conn->conn_rx_hash.tfm)
+ crypto_free_hash(conn->conn_rx_hash.tfm);
+ if (conn->conn_tx_hash.tfm)
+ crypto_free_hash(conn->conn_tx_hash.tfm);
+
+ if (conn->conn_cpumask)
+ free_cpumask_var(conn->conn_cpumask);
+
+ kfree(conn->conn_ops);
+ conn->conn_ops = NULL;
+
+ if (conn->sock) {
+ if (conn->conn_flags & CONNFLAG_SCTP_STRUCT_FILE) {
+ kfree(conn->sock->file);
+ conn->sock->file = NULL;
+ }
+ sock_release(conn->sock);
+ }
+ conn->thread_set = NULL;
+
+ pr_debug("Moving to TARG_CONN_STATE_FREE.\n");
+ conn->conn_state = TARG_CONN_STATE_FREE;
+ kfree(conn);
+
+ spin_lock_bh(&sess->conn_lock);
+ atomic_dec(&sess->nconn);
+ pr_debug("Decremented iSCSI connection count to %hu from node:"
+ " %s\n", atomic_read(&sess->nconn),
+ sess->sess_ops->InitiatorName);
+ /*
+ * Make sure that if one connection fails in an non ERL=2 iSCSI
+ * Session that they all fail.
+ */
+ if ((sess->sess_ops->ErrorRecoveryLevel != 2) && !conn_logout &&
+ !atomic_read(&sess->session_logout))
+ atomic_set(&sess->session_fall_back_to_erl0, 1);
+
+ /*
+ * If this was not the last connection in the session, and we are
+ * performing session reinstatement or falling back to ERL=0, call
+ * iscsit_stop_session() without sleeping to shutdown the other
+ * active connections.
+ */
+ if (atomic_read(&sess->nconn)) {
+ if (!atomic_read(&sess->session_reinstatement) &&
+ !atomic_read(&sess->session_fall_back_to_erl0)) {
+ spin_unlock_bh(&sess->conn_lock);
+ return 0;
+ }
+ if (!atomic_read(&sess->session_stop_active)) {
+ atomic_set(&sess->session_stop_active, 1);
+ spin_unlock_bh(&sess->conn_lock);
+ iscsit_stop_session(sess, 0, 0);
+ return 0;
+ }
+ spin_unlock_bh(&sess->conn_lock);
+ return 0;
+ }
+
+ /*
+ * If this was the last connection in the session and one of the
+ * following is occurring:
+ *
+ * Session Reinstatement is not being performed, and are falling back
+ * to ERL=0 call iscsit_close_session().
+ *
+ * Session Logout was requested. iscsit_close_session() will be called
+ * elsewhere.
+ *
+ * Session Continuation is not being performed, start the Time2Retain
+ * handler and check if sleep_on_sess_wait_sem is active.
+ */
+ if (!atomic_read(&sess->session_reinstatement) &&
+ atomic_read(&sess->session_fall_back_to_erl0)) {
+ spin_unlock_bh(&sess->conn_lock);
+ iscsit_close_session(sess);
+
+ return 0;
+ } else if (atomic_read(&sess->session_logout)) {
+ pr_debug("Moving to TARG_SESS_STATE_FREE.\n");
+ sess->session_state = TARG_SESS_STATE_FREE;
+ spin_unlock_bh(&sess->conn_lock);
+
+ if (atomic_read(&sess->sleep_on_sess_wait_comp))
+ complete(&sess->session_wait_comp);
+
+ return 0;
+ } else {
+ pr_debug("Moving to TARG_SESS_STATE_FAILED.\n");
+ sess->session_state = TARG_SESS_STATE_FAILED;
+
+ if (!atomic_read(&sess->session_continuation)) {
+ spin_unlock_bh(&sess->conn_lock);
+ iscsit_start_time2retain_handler(sess);
+ } else
+ spin_unlock_bh(&sess->conn_lock);
+
+ if (atomic_read(&sess->sleep_on_sess_wait_comp))
+ complete(&sess->session_wait_comp);
+
+ return 0;
+ }
+ spin_unlock_bh(&sess->conn_lock);
+
+ return 0;
+}
+
+int iscsit_close_session(struct iscsi_session *sess)
+{
+ struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess);
+ struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+
+ if (atomic_read(&sess->nconn)) {
+ pr_err("%d connection(s) still exist for iSCSI session"
+ " to %s\n", atomic_read(&sess->nconn),
+ sess->sess_ops->InitiatorName);
+ BUG();
+ }
+
+ spin_lock_bh(&se_tpg->session_lock);
+ atomic_set(&sess->session_logout, 1);
+ atomic_set(&sess->session_reinstatement, 1);
+ iscsit_stop_time2retain_timer(sess);
+ spin_unlock_bh(&se_tpg->session_lock);
+
+ /*
+ * transport_deregister_session_configfs() will clear the
+ * struct se_node_acl->nacl_sess pointer now as a iscsi_np process context
+ * can be setting it again with __transport_register_session() in
+ * iscsi_post_login_handler() again after the iscsit_stop_session()
+ * completes in iscsi_np context.
+ */
+ transport_deregister_session_configfs(sess->se_sess);
+
+ /*
+ * If any other processes are accessing this session pointer we must
+ * wait until they have completed. If we are in an interrupt (the
+ * time2retain handler) and contain and active session usage count we
+ * restart the timer and exit.
+ */
+ if (!in_interrupt()) {
+ if (iscsit_check_session_usage_count(sess) == 1)
+ iscsit_stop_session(sess, 1, 1);
+ } else {
+ if (iscsit_check_session_usage_count(sess) == 2) {
+ atomic_set(&sess->session_logout, 0);
+ iscsit_start_time2retain_handler(sess);
+ return 0;
+ }
+ }
+
+ transport_deregister_session(sess->se_sess);
+
+ if (sess->sess_ops->ErrorRecoveryLevel == 2)
+ iscsit_free_connection_recovery_entires(sess);
+
+ iscsit_free_all_ooo_cmdsns(sess);
+
+ spin_lock_bh(&se_tpg->session_lock);
+ pr_debug("Moving to TARG_SESS_STATE_FREE.\n");
+ sess->session_state = TARG_SESS_STATE_FREE;
+ pr_debug("Released iSCSI session from node: %s\n",
+ sess->sess_ops->InitiatorName);
+ tpg->nsessions--;
+ if (tpg->tpg_tiqn)
+ tpg->tpg_tiqn->tiqn_nsessions--;
+
+ pr_debug("Decremented number of active iSCSI Sessions on"
+ " iSCSI TPG: %hu to %u\n", tpg->tpgt, tpg->nsessions);
+
+ spin_lock(&sess_idr_lock);
+ idr_remove(&sess_idr, sess->session_index);
+ spin_unlock(&sess_idr_lock);
+
+ kfree(sess->sess_ops);
+ sess->sess_ops = NULL;
+ spin_unlock_bh(&se_tpg->session_lock);
+
+ kfree(sess);
+ return 0;
+}
+
+static void iscsit_logout_post_handler_closesession(
+ struct iscsi_conn *conn)
+{
+ struct iscsi_session *sess = conn->sess;
+
+ iscsi_set_thread_clear(conn, ISCSI_CLEAR_TX_THREAD);
+ iscsi_set_thread_set_signal(conn, ISCSI_SIGNAL_TX_THREAD);
+
+ atomic_set(&conn->conn_logout_remove, 0);
+ complete(&conn->conn_logout_comp);
+
+ iscsit_dec_conn_usage_count(conn);
+ iscsit_stop_session(sess, 1, 1);
+ iscsit_dec_session_usage_count(sess);
+ iscsit_close_session(sess);
+}
+
+static void iscsit_logout_post_handler_samecid(
+ struct iscsi_conn *conn)
+{
+ iscsi_set_thread_clear(conn, ISCSI_CLEAR_TX_THREAD);
+ iscsi_set_thread_set_signal(conn, ISCSI_SIGNAL_TX_THREAD);
+
+ atomic_set(&conn->conn_logout_remove, 0);
+ complete(&conn->conn_logout_comp);
+
+ iscsit_cause_connection_reinstatement(conn, 1);
+ iscsit_dec_conn_usage_count(conn);
+}
+
+static void iscsit_logout_post_handler_diffcid(
+ struct iscsi_conn *conn,
+ u16 cid)
+{
+ struct iscsi_conn *l_conn;
+ struct iscsi_session *sess = conn->sess;
+
+ if (!sess)
+ return;
+
+ spin_lock_bh(&sess->conn_lock);
+ list_for_each_entry(l_conn, &sess->sess_conn_list, conn_list) {
+ if (l_conn->cid == cid) {
+ iscsit_inc_conn_usage_count(l_conn);
+ break;
+ }
+ }
+ spin_unlock_bh(&sess->conn_lock);
+
+ if (!l_conn)
+ return;
+
+ if (l_conn->sock)
+ l_conn->sock->ops->shutdown(l_conn->sock, RCV_SHUTDOWN);
+
+ spin_lock_bh(&l_conn->state_lock);
+ pr_debug("Moving to TARG_CONN_STATE_IN_LOGOUT.\n");
+ l_conn->conn_state = TARG_CONN_STATE_IN_LOGOUT;
+ spin_unlock_bh(&l_conn->state_lock);
+
+ iscsit_cause_connection_reinstatement(l_conn, 1);
+ iscsit_dec_conn_usage_count(l_conn);
+}
+
+/*
+ * Return of 0 causes the TX thread to restart.
+ */
+static int iscsit_logout_post_handler(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn *conn)
+{
+ int ret = 0;
+
+ switch (cmd->logout_reason) {
+ case ISCSI_LOGOUT_REASON_CLOSE_SESSION:
+ switch (cmd->logout_response) {
+ case ISCSI_LOGOUT_SUCCESS:
+ case ISCSI_LOGOUT_CLEANUP_FAILED:
+ default:
+ iscsit_logout_post_handler_closesession(conn);
+ break;
+ }
+ ret = 0;
+ break;
+ case ISCSI_LOGOUT_REASON_CLOSE_CONNECTION:
+ if (conn->cid == cmd->logout_cid) {
+ switch (cmd->logout_response) {
+ case ISCSI_LOGOUT_SUCCESS:
+ case ISCSI_LOGOUT_CLEANUP_FAILED:
+ default:
+ iscsit_logout_post_handler_samecid(conn);
+ break;
+ }
+ ret = 0;
+ } else {
+ switch (cmd->logout_response) {
+ case ISCSI_LOGOUT_SUCCESS:
+ iscsit_logout_post_handler_diffcid(conn,
+ cmd->logout_cid);
+ break;
+ case ISCSI_LOGOUT_CID_NOT_FOUND:
+ case ISCSI_LOGOUT_CLEANUP_FAILED:
+ default:
+ break;
+ }
+ ret = 1;
+ }
+ break;
+ case ISCSI_LOGOUT_REASON_RECOVERY:
+ switch (cmd->logout_response) {
+ case ISCSI_LOGOUT_SUCCESS:
+ case ISCSI_LOGOUT_CID_NOT_FOUND:
+ case ISCSI_LOGOUT_RECOVERY_UNSUPPORTED:
+ case ISCSI_LOGOUT_CLEANUP_FAILED:
+ default:
+ break;
+ }
+ ret = 1;
+ break;
+ default:
+ break;
+
+ }
+ return ret;
+}
+
+void iscsit_fail_session(struct iscsi_session *sess)
+{
+ struct iscsi_conn *conn;
+
+ spin_lock_bh(&sess->conn_lock);
+ list_for_each_entry(conn, &sess->sess_conn_list, conn_list) {
+ pr_debug("Moving to TARG_CONN_STATE_CLEANUP_WAIT.\n");
+ conn->conn_state = TARG_CONN_STATE_CLEANUP_WAIT;
+ }
+ spin_unlock_bh(&sess->conn_lock);
+
+ pr_debug("Moving to TARG_SESS_STATE_FAILED.\n");
+ sess->session_state = TARG_SESS_STATE_FAILED;
+}
+
+int iscsit_free_session(struct iscsi_session *sess)
+{
+ u16 conn_count = atomic_read(&sess->nconn);
+ struct iscsi_conn *conn, *conn_tmp = NULL;
+ int is_last;
+
+ spin_lock_bh(&sess->conn_lock);
+ atomic_set(&sess->sleep_on_sess_wait_comp, 1);
+
+ list_for_each_entry_safe(conn, conn_tmp, &sess->sess_conn_list,
+ conn_list) {
+ if (conn_count == 0)
+ break;
+
+ if (list_is_last(&conn->conn_list, &sess->sess_conn_list)) {
+ is_last = 1;
+ } else {
+ iscsit_inc_conn_usage_count(conn_tmp);
+ is_last = 0;
+ }
+ iscsit_inc_conn_usage_count(conn);
+
+ spin_unlock_bh(&sess->conn_lock);
+ iscsit_cause_connection_reinstatement(conn, 1);
+ spin_lock_bh(&sess->conn_lock);
+
+ iscsit_dec_conn_usage_count(conn);
+ if (is_last == 0)
+ iscsit_dec_conn_usage_count(conn_tmp);
+
+ conn_count--;
+ }
+
+ if (atomic_read(&sess->nconn)) {
+ spin_unlock_bh(&sess->conn_lock);
+ wait_for_completion(&sess->session_wait_comp);
+ } else
+ spin_unlock_bh(&sess->conn_lock);
+
+ iscsit_close_session(sess);
+ return 0;
+}
+
+void iscsit_stop_session(
+ struct iscsi_session *sess,
+ int session_sleep,
+ int connection_sleep)
+{
+ u16 conn_count = atomic_read(&sess->nconn);
+ struct iscsi_conn *conn, *conn_tmp = NULL;
+ int is_last;
+
+ spin_lock_bh(&sess->conn_lock);
+ if (session_sleep)
+ atomic_set(&sess->sleep_on_sess_wait_comp, 1);
+
+ if (connection_sleep) {
+ list_for_each_entry_safe(conn, conn_tmp, &sess->sess_conn_list,
+ conn_list) {
+ if (conn_count == 0)
+ break;
+
+ if (list_is_last(&conn->conn_list, &sess->sess_conn_list)) {
+ is_last = 1;
+ } else {
+ iscsit_inc_conn_usage_count(conn_tmp);
+ is_last = 0;
+ }
+ iscsit_inc_conn_usage_count(conn);
+
+ spin_unlock_bh(&sess->conn_lock);
+ iscsit_cause_connection_reinstatement(conn, 1);
+ spin_lock_bh(&sess->conn_lock);
+
+ iscsit_dec_conn_usage_count(conn);
+ if (is_last == 0)
+ iscsit_dec_conn_usage_count(conn_tmp);
+ conn_count--;
+ }
+ } else {
+ list_for_each_entry(conn, &sess->sess_conn_list, conn_list)
+ iscsit_cause_connection_reinstatement(conn, 0);
+ }
+
+ if (session_sleep && atomic_read(&sess->nconn)) {
+ spin_unlock_bh(&sess->conn_lock);
+ wait_for_completion(&sess->session_wait_comp);
+ } else
+ spin_unlock_bh(&sess->conn_lock);
+}
+
+int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force)
+{
+ struct iscsi_session *sess;
+ struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+ struct se_session *se_sess, *se_sess_tmp;
+ int session_count = 0;
+
+ spin_lock_bh(&se_tpg->session_lock);
+ if (tpg->nsessions && !force) {
+ spin_unlock_bh(&se_tpg->session_lock);
+ return -1;
+ }
+
+ list_for_each_entry_safe(se_sess, se_sess_tmp, &se_tpg->tpg_sess_list,
+ sess_list) {
+ sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+
+ spin_lock(&sess->conn_lock);
+ if (atomic_read(&sess->session_fall_back_to_erl0) ||
+ atomic_read(&sess->session_logout) ||
+ (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)) {
+ spin_unlock(&sess->conn_lock);
+ continue;
+ }
+ atomic_set(&sess->session_reinstatement, 1);
+ spin_unlock(&sess->conn_lock);
+ spin_unlock_bh(&se_tpg->session_lock);
+
+ iscsit_free_session(sess);
+ spin_lock_bh(&se_tpg->session_lock);
+
+ session_count++;
+ }
+ spin_unlock_bh(&se_tpg->session_lock);
+
+ pr_debug("Released %d iSCSI Session(s) from Target Portal"
+ " Group: %hu\n", session_count, tpg->tpgt);
+ return 0;
+}
+
+MODULE_DESCRIPTION("iSCSI-Target Driver for mainline target infrastructure");
+MODULE_VERSION("4.1.x");
+MODULE_AUTHOR("nab@Linux-iSCSI.org");
+MODULE_LICENSE("GPL");
+
+module_init(iscsi_target_init_module);
+module_exit(iscsi_target_cleanup_module);
diff --git a/drivers/target/iscsi/iscsi_target.h b/drivers/target/iscsi/iscsi_target.h
new file mode 100644
index 0000000..5db2dde
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target.h
@@ -0,0 +1,42 @@
+#ifndef ISCSI_TARGET_H
+#define ISCSI_TARGET_H
+
+extern struct iscsi_tiqn *iscsit_get_tiqn_for_login(unsigned char *);
+extern struct iscsi_tiqn *iscsit_get_tiqn(unsigned char *, int);
+extern void iscsit_put_tiqn_for_login(struct iscsi_tiqn *);
+extern struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *);
+extern void iscsit_del_tiqn(struct iscsi_tiqn *);
+extern int iscsit_access_np(struct iscsi_np *, struct iscsi_portal_group *);
+extern int iscsit_deaccess_np(struct iscsi_np *, struct iscsi_portal_group *);
+extern struct iscsi_np *iscsit_add_np(struct __kernel_sockaddr_storage *,
+ char *, int);
+extern int iscsit_reset_np_thread(struct iscsi_np *, struct iscsi_tpg_np *,
+ struct iscsi_portal_group *);
+extern int iscsit_del_np(struct iscsi_np *);
+extern int iscsit_add_reject_from_cmd(u8, int, int, unsigned char *, struct iscsi_cmd *);
+extern int iscsit_logout_closesession(struct iscsi_cmd *, struct iscsi_conn *);
+extern int iscsit_logout_closeconnection(struct iscsi_cmd *, struct iscsi_conn *);
+extern int iscsit_logout_removeconnforrecovery(struct iscsi_cmd *, struct iscsi_conn *);
+extern int iscsit_send_async_msg(struct iscsi_conn *, u16, u8, u8);
+extern int iscsit_send_r2t(struct iscsi_cmd *, struct iscsi_conn *);
+extern int iscsit_build_r2ts_for_cmd(struct iscsi_cmd *, struct iscsi_conn *, int);
+extern void iscsit_thread_get_cpumask(struct iscsi_conn *);
+extern int iscsi_target_tx_thread(void *);
+extern int iscsi_target_rx_thread(void *);
+extern int iscsit_close_connection(struct iscsi_conn *);
+extern int iscsit_close_session(struct iscsi_session *);
+extern void iscsit_fail_session(struct iscsi_session *);
+extern int iscsit_free_session(struct iscsi_session *);
+extern void iscsit_stop_session(struct iscsi_session *, int, int);
+extern int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *, int);
+
+extern struct iscsit_global *iscsit_global;
+extern struct target_fabric_configfs *lio_target_fabric_configfs;
+
+extern struct kmem_cache *lio_dr_cache;
+extern struct kmem_cache *lio_ooo_cache;
+extern struct kmem_cache *lio_cmd_cache;
+extern struct kmem_cache *lio_qr_cache;
+extern struct kmem_cache *lio_r2t_cache;
+
+#endif /*** ISCSI_TARGET_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c
new file mode 100644
index 0000000..11fd743
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_auth.c
@@ -0,0 +1,490 @@
+/*******************************************************************************
+ * This file houses the main functions for the iSCSI CHAP support
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/string.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/scatterlist.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_nego.h"
+#include "iscsi_target_auth.h"
+
+static unsigned char chap_asciihex_to_binaryhex(unsigned char val[2])
+{
+ unsigned char result = 0;
+ /*
+ * MSB
+ */
+ if ((val[0] >= 'a') && (val[0] <= 'f'))
+ result = ((val[0] - 'a' + 10) & 0xf) << 4;
+ else
+ if ((val[0] >= 'A') && (val[0] <= 'F'))
+ result = ((val[0] - 'A' + 10) & 0xf) << 4;
+ else /* digit */
+ result = ((val[0] - '0') & 0xf) << 4;
+ /*
+ * LSB
+ */
+ if ((val[1] >= 'a') && (val[1] <= 'f'))
+ result |= ((val[1] - 'a' + 10) & 0xf);
+ else
+ if ((val[1] >= 'A') && (val[1] <= 'F'))
+ result |= ((val[1] - 'A' + 10) & 0xf);
+ else /* digit */
+ result |= ((val[1] - '0') & 0xf);
+
+ return result;
+}
+
+static int chap_string_to_hex(unsigned char *dst, unsigned char *src, int len)
+{
+ int i, j = 0;
+
+ for (i = 0; i < len; i += 2) {
+ dst[j++] = (unsigned char) chap_asciihex_to_binaryhex(&src[i]);
+ }
+
+ dst[j] = '\0';
+ return j;
+}
+
+static void chap_binaryhex_to_asciihex(char *dst, char *src, int src_len)
+{
+ int i;
+
+ for (i = 0; i < src_len; i++) {
+ sprintf(&dst[i*2], "%02x", (int) src[i] & 0xff);
+ }
+}
+
+static void chap_set_random(char *data, int length)
+{
+ long r;
+ unsigned n;
+
+ while (length > 0) {
+ get_random_bytes(&r, sizeof(long));
+ r = r ^ (r >> 8);
+ r = r ^ (r >> 4);
+ n = r & 0x7;
+
+ get_random_bytes(&r, sizeof(long));
+ r = r ^ (r >> 8);
+ r = r ^ (r >> 5);
+ n = (n << 3) | (r & 0x7);
+
+ get_random_bytes(&r, sizeof(long));
+ r = r ^ (r >> 8);
+ r = r ^ (r >> 5);
+ n = (n << 2) | (r & 0x3);
+
+ *data++ = n;
+ length--;
+ }
+}
+
+static void chap_gen_challenge(
+ struct iscsi_conn *conn,
+ int caller,
+ char *c_str,
+ unsigned int *c_len)
+{
+ unsigned char challenge_asciihex[CHAP_CHALLENGE_LENGTH * 2 + 1];
+ struct iscsi_chap *chap = (struct iscsi_chap *) conn->auth_protocol;
+
+ memset(challenge_asciihex, 0, CHAP_CHALLENGE_LENGTH * 2 + 1);
+
+ chap_set_random(chap->challenge, CHAP_CHALLENGE_LENGTH);
+ chap_binaryhex_to_asciihex(challenge_asciihex, chap->challenge,
+ CHAP_CHALLENGE_LENGTH);
+ /*
+ * Set CHAP_C, and copy the generated challenge into c_str.
+ */
+ *c_len += sprintf(c_str + *c_len, "CHAP_C=0x%s", challenge_asciihex);
+ *c_len += 1;
+
+ pr_debug("[%s] Sending CHAP_C=0x%s\n\n", (caller) ? "server" : "client",
+ challenge_asciihex);
+}
+
+
+static struct iscsi_chap *chap_server_open(
+ struct iscsi_conn *conn,
+ struct iscsi_node_auth *auth,
+ const char *a_str,
+ char *aic_str,
+ unsigned int *aic_len)
+{
+ struct iscsi_chap *chap;
+
+ if (!(auth->naf_flags & NAF_USERID_SET) ||
+ !(auth->naf_flags & NAF_PASSWORD_SET)) {
+ pr_err("CHAP user or password not set for"
+ " Initiator ACL\n");
+ return NULL;
+ }
+
+ conn->auth_protocol = kzalloc(sizeof(struct iscsi_chap), GFP_KERNEL);
+ if (!conn->auth_protocol)
+ return NULL;
+
+ chap = (struct iscsi_chap *) conn->auth_protocol;
+ /*
+ * We only support MD5 MDA presently.
+ */
+ if (strncmp(a_str, "CHAP_A=5", 8)) {
+ pr_err("CHAP_A is not MD5.\n");
+ return NULL;
+ }
+ pr_debug("[server] Got CHAP_A=5\n");
+ /*
+ * Send back CHAP_A set to MD5.
+ */
+ *aic_len = sprintf(aic_str, "CHAP_A=5");
+ *aic_len += 1;
+ chap->digest_type = CHAP_DIGEST_MD5;
+ pr_debug("[server] Sending CHAP_A=%d\n", chap->digest_type);
+ /*
+ * Set Identifier.
+ */
+ chap->id = ISCSI_TPG_C(conn)->tpg_chap_id++;
+ *aic_len += sprintf(aic_str + *aic_len, "CHAP_I=%d", chap->id);
+ *aic_len += 1;
+ pr_debug("[server] Sending CHAP_I=%d\n", chap->id);
+ /*
+ * Generate Challenge.
+ */
+ chap_gen_challenge(conn, 1, aic_str, aic_len);
+
+ return chap;
+}
+
+static void chap_close(struct iscsi_conn *conn)
+{
+ kfree(conn->auth_protocol);
+ conn->auth_protocol = NULL;
+}
+
+static int chap_server_compute_md5(
+ struct iscsi_conn *conn,
+ struct iscsi_node_auth *auth,
+ char *nr_in_ptr,
+ char *nr_out_ptr,
+ unsigned int *nr_out_len)
+{
+ char *endptr;
+ unsigned char id, digest[MD5_SIGNATURE_SIZE];
+ unsigned char type, response[MD5_SIGNATURE_SIZE * 2 + 2];
+ unsigned char identifier[10], *challenge = NULL;
+ unsigned char *challenge_binhex = NULL;
+ unsigned char client_digest[MD5_SIGNATURE_SIZE];
+ unsigned char server_digest[MD5_SIGNATURE_SIZE];
+ unsigned char chap_n[MAX_CHAP_N_SIZE], chap_r[MAX_RESPONSE_LENGTH];
+ struct iscsi_chap *chap = (struct iscsi_chap *) conn->auth_protocol;
+ struct crypto_hash *tfm;
+ struct hash_desc desc;
+ struct scatterlist sg;
+ int auth_ret = -1, ret, challenge_len;
+
+ memset(identifier, 0, 10);
+ memset(chap_n, 0, MAX_CHAP_N_SIZE);
+ memset(chap_r, 0, MAX_RESPONSE_LENGTH);
+ memset(digest, 0, MD5_SIGNATURE_SIZE);
+ memset(response, 0, MD5_SIGNATURE_SIZE * 2 + 2);
+ memset(client_digest, 0, MD5_SIGNATURE_SIZE);
+ memset(server_digest, 0, MD5_SIGNATURE_SIZE);
+
+ challenge = kzalloc(CHAP_CHALLENGE_STR_LEN, GFP_KERNEL);
+ if (!challenge) {
+ pr_err("Unable to allocate challenge buffer\n");
+ goto out;
+ }
+
+ challenge_binhex = kzalloc(CHAP_CHALLENGE_STR_LEN, GFP_KERNEL);
+ if (!challenge_binhex) {
+ pr_err("Unable to allocate challenge_binhex buffer\n");
+ goto out;
+ }
+ /*
+ * Extract CHAP_N.
+ */
+ if (extract_param(nr_in_ptr, "CHAP_N", MAX_CHAP_N_SIZE, chap_n,
+ &type) < 0) {
+ pr_err("Could not find CHAP_N.\n");
+ goto out;
+ }
+ if (type == HEX) {
+ pr_err("Could not find CHAP_N.\n");
+ goto out;
+ }
+
+ if (memcmp(chap_n, auth->userid, strlen(auth->userid)) != 0) {
+ pr_err("CHAP_N values do not match!\n");
+ goto out;
+ }
+ pr_debug("[server] Got CHAP_N=%s\n", chap_n);
+ /*
+ * Extract CHAP_R.
+ */
+ if (extract_param(nr_in_ptr, "CHAP_R", MAX_RESPONSE_LENGTH, chap_r,
+ &type) < 0) {
+ pr_err("Could not find CHAP_R.\n");
+ goto out;
+ }
+ if (type != HEX) {
+ pr_err("Could not find CHAP_R.\n");
+ goto out;
+ }
+
+ pr_debug("[server] Got CHAP_R=%s\n", chap_r);
+ chap_string_to_hex(client_digest, chap_r, strlen(chap_r));
+
+ tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm)) {
+ pr_err("Unable to allocate struct crypto_hash\n");
+ goto out;
+ }
+ desc.tfm = tfm;
+ desc.flags = 0;
+
+ ret = crypto_hash_init(&desc);
+ if (ret < 0) {
+ pr_err("crypto_hash_init() failed\n");
+ crypto_free_hash(tfm);
+ goto out;
+ }
+
+ sg_init_one(&sg, (void *)&chap->id, 1);
+ ret = crypto_hash_update(&desc, &sg, 1);
+ if (ret < 0) {
+ pr_err("crypto_hash_update() failed for id\n");
+ crypto_free_hash(tfm);
+ goto out;
+ }
+
+ sg_init_one(&sg, (void *)&auth->password, strlen(auth->password));
+ ret = crypto_hash_update(&desc, &sg, strlen(auth->password));
+ if (ret < 0) {
+ pr_err("crypto_hash_update() failed for password\n");
+ crypto_free_hash(tfm);
+ goto out;
+ }
+
+ sg_init_one(&sg, (void *)chap->challenge, CHAP_CHALLENGE_LENGTH);
+ ret = crypto_hash_update(&desc, &sg, CHAP_CHALLENGE_LENGTH);
+ if (ret < 0) {
+ pr_err("crypto_hash_update() failed for challenge\n");
+ crypto_free_hash(tfm);
+ goto out;
+ }
+
+ ret = crypto_hash_final(&desc, server_digest);
+ if (ret < 0) {
+ pr_err("crypto_hash_final() failed for server digest\n");
+ crypto_free_hash(tfm);
+ goto out;
+ }
+ crypto_free_hash(tfm);
+
+ chap_binaryhex_to_asciihex(response, server_digest, MD5_SIGNATURE_SIZE);
+ pr_debug("[server] MD5 Server Digest: %s\n", response);
+
+ if (memcmp(server_digest, client_digest, MD5_SIGNATURE_SIZE) != 0) {
+ pr_debug("[server] MD5 Digests do not match!\n\n");
+ goto out;
+ } else
+ pr_debug("[server] MD5 Digests match, CHAP connetication"
+ " successful.\n\n");
+ /*
+ * One way authentication has succeeded, return now if mutual
+ * authentication is not enabled.
+ */
+ if (!auth->authenticate_target) {
+ kfree(challenge);
+ kfree(challenge_binhex);
+ return 0;
+ }
+ /*
+ * Get CHAP_I.
+ */
+ if (extract_param(nr_in_ptr, "CHAP_I", 10, identifier, &type) < 0) {
+ pr_err("Could not find CHAP_I.\n");
+ goto out;
+ }
+
+ if (type == HEX)
+ id = (unsigned char)simple_strtoul((char *)&identifier[2],
+ &endptr, 0);
+ else
+ id = (unsigned char)simple_strtoul(identifier, &endptr, 0);
+ /*
+ * RFC 1994 says Identifier is no more than octet (8 bits).
+ */
+ pr_debug("[server] Got CHAP_I=%d\n", id);
+ /*
+ * Get CHAP_C.
+ */
+ if (extract_param(nr_in_ptr, "CHAP_C", CHAP_CHALLENGE_STR_LEN,
+ challenge, &type) < 0) {
+ pr_err("Could not find CHAP_C.\n");
+ goto out;
+ }
+
+ if (type != HEX) {
+ pr_err("Could not find CHAP_C.\n");
+ goto out;
+ }
+ pr_debug("[server] Got CHAP_C=%s\n", challenge);
+ challenge_len = chap_string_to_hex(challenge_binhex, challenge,
+ strlen(challenge));
+ if (!challenge_len) {
+ pr_err("Unable to convert incoming challenge\n");
+ goto out;
+ }
+ /*
+ * Generate CHAP_N and CHAP_R for mutual authentication.
+ */
+ tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm)) {
+ pr_err("Unable to allocate struct crypto_hash\n");
+ goto out;
+ }
+ desc.tfm = tfm;
+ desc.flags = 0;
+
+ ret = crypto_hash_init(&desc);
+ if (ret < 0) {
+ pr_err("crypto_hash_init() failed\n");
+ crypto_free_hash(tfm);
+ goto out;
+ }
+
+ sg_init_one(&sg, (void *)&id, 1);
+ ret = crypto_hash_update(&desc, &sg, 1);
+ if (ret < 0) {
+ pr_err("crypto_hash_update() failed for id\n");
+ crypto_free_hash(tfm);
+ goto out;
+ }
+
+ sg_init_one(&sg, (void *)auth->password_mutual,
+ strlen(auth->password_mutual));
+ ret = crypto_hash_update(&desc, &sg, strlen(auth->password_mutual));
+ if (ret < 0) {
+ pr_err("crypto_hash_update() failed for"
+ " password_mutual\n");
+ crypto_free_hash(tfm);
+ goto out;
+ }
+ /*
+ * Convert received challenge to binary hex.
+ */
+ sg_init_one(&sg, (void *)challenge_binhex, challenge_len);
+ ret = crypto_hash_update(&desc, &sg, challenge_len);
+ if (ret < 0) {
+ pr_err("crypto_hash_update() failed for ma challenge\n");
+ crypto_free_hash(tfm);
+ goto out;
+ }
+
+ ret = crypto_hash_final(&desc, digest);
+ if (ret < 0) {
+ pr_err("crypto_hash_final() failed for ma digest\n");
+ crypto_free_hash(tfm);
+ goto out;
+ }
+ crypto_free_hash(tfm);
+ /*
+ * Generate CHAP_N and CHAP_R.
+ */
+ *nr_out_len = sprintf(nr_out_ptr, "CHAP_N=%s", auth->userid_mutual);
+ *nr_out_len += 1;
+ pr_debug("[server] Sending CHAP_N=%s\n", auth->userid_mutual);
+ /*
+ * Convert response from binary hex to ascii hext.
+ */
+ chap_binaryhex_to_asciihex(response, digest, MD5_SIGNATURE_SIZE);
+ *nr_out_len += sprintf(nr_out_ptr + *nr_out_len, "CHAP_R=0x%s",
+ response);
+ *nr_out_len += 1;
+ pr_debug("[server] Sending CHAP_R=0x%s\n", response);
+ auth_ret = 0;
+out:
+ kfree(challenge);
+ kfree(challenge_binhex);
+ return auth_ret;
+}
+
+static int chap_got_response(
+ struct iscsi_conn *conn,
+ struct iscsi_node_auth *auth,
+ char *nr_in_ptr,
+ char *nr_out_ptr,
+ unsigned int *nr_out_len)
+{
+ struct iscsi_chap *chap = (struct iscsi_chap *) conn->auth_protocol;
+
+ switch (chap->digest_type) {
+ case CHAP_DIGEST_MD5:
+ if (chap_server_compute_md5(conn, auth, nr_in_ptr,
+ nr_out_ptr, nr_out_len) < 0)
+ return -1;
+ return 0;
+ default:
+ pr_err("Unknown CHAP digest type %d!\n",
+ chap->digest_type);
+ return -1;
+ }
+}
+
+u32 chap_main_loop(
+ struct iscsi_conn *conn,
+ struct iscsi_node_auth *auth,
+ char *in_text,
+ char *out_text,
+ int *in_len,
+ int *out_len)
+{
+ struct iscsi_chap *chap = (struct iscsi_chap *) conn->auth_protocol;
+
+ if (!chap) {
+ chap = chap_server_open(conn, auth, in_text, out_text, out_len);
+ if (!chap)
+ return 2;
+ chap->chap_state = CHAP_STAGE_SERVER_AIC;
+ return 0;
+ } else if (chap->chap_state == CHAP_STAGE_SERVER_AIC) {
+ convert_null_to_semi(in_text, *in_len);
+ if (chap_got_response(conn, auth, in_text, out_text,
+ out_len) < 0) {
+ chap_close(conn);
+ return 2;
+ }
+ if (auth->authenticate_target)
+ chap->chap_state = CHAP_STAGE_SERVER_NR;
+ else
+ *out_len = 0;
+ chap_close(conn);
+ return 1;
+ }
+
+ return 2;
+}
diff --git a/drivers/target/iscsi/iscsi_target_auth.h b/drivers/target/iscsi/iscsi_target_auth.h
new file mode 100644
index 0000000..2f463c0
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_auth.h
@@ -0,0 +1,31 @@
+#ifndef _ISCSI_CHAP_H_
+#define _ISCSI_CHAP_H_
+
+#define CHAP_DIGEST_MD5 5
+#define CHAP_DIGEST_SHA 6
+
+#define CHAP_CHALLENGE_LENGTH 16
+#define CHAP_CHALLENGE_STR_LEN 4096
+#define MAX_RESPONSE_LENGTH 64 /* sufficient for MD5 */
+#define MAX_CHAP_N_SIZE 512
+
+#define MD5_SIGNATURE_SIZE 16 /* 16 bytes in a MD5 message digest */
+
+#define CHAP_STAGE_CLIENT_A 1
+#define CHAP_STAGE_SERVER_AIC 2
+#define CHAP_STAGE_CLIENT_NR 3
+#define CHAP_STAGE_CLIENT_NRIC 4
+#define CHAP_STAGE_SERVER_NR 5
+
+extern u32 chap_main_loop(struct iscsi_conn *, struct iscsi_node_auth *, char *, char *,
+ int *, int *);
+
+struct iscsi_chap {
+ unsigned char digest_type;
+ unsigned char id;
+ unsigned char challenge[CHAP_CHALLENGE_LENGTH];
+ unsigned int authenticate_target;
+ unsigned int chap_state;
+} ____cacheline_aligned;
+
+#endif /*** _ISCSI_CHAP_H_ ***/
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
new file mode 100644
index 0000000..32bb92c
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -0,0 +1,1882 @@
+/*******************************************************************************
+ * This file contains the configfs implementation for iSCSI Target mode
+ * from the LIO-Target Project.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ ****************************************************************************/
+
+#include <linux/configfs.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+#include <target/target_core_fabric_ops.h>
+#include <target/target_core_fabric_configfs.h>
+#include <target/target_core_fabric_lib.h>
+#include <target/target_core_device.h>
+#include <target/target_core_tpg.h>
+#include <target/target_core_configfs.h>
+#include <target/configfs_macros.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_parameters.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_nodeattrib.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+#include "iscsi_target_stat.h"
+#include "iscsi_target_configfs.h"
+
+struct target_fabric_configfs *lio_target_fabric_configfs;
+
+struct lio_target_configfs_attribute {
+ struct configfs_attribute attr;
+ ssize_t (*show)(void *, char *);
+ ssize_t (*store)(void *, const char *, size_t);
+};
+
+struct iscsi_portal_group *lio_get_tpg_from_tpg_item(
+ struct config_item *item,
+ struct iscsi_tiqn **tiqn_out)
+{
+ struct se_portal_group *se_tpg = container_of(to_config_group(item),
+ struct se_portal_group, tpg_group);
+ struct iscsi_portal_group *tpg =
+ (struct iscsi_portal_group *)se_tpg->se_tpg_fabric_ptr;
+ int ret;
+
+ if (!tpg) {
+ pr_err("Unable to locate struct iscsi_portal_group "
+ "pointer\n");
+ return NULL;
+ }
+ ret = iscsit_get_tpg(tpg);
+ if (ret < 0)
+ return NULL;
+
+ *tiqn_out = tpg->tpg_tiqn;
+ return tpg;
+}
+
+/* Start items for lio_target_portal_cit */
+
+static ssize_t lio_target_np_show_sctp(
+ struct se_tpg_np *se_tpg_np,
+ char *page)
+{
+ struct iscsi_tpg_np *tpg_np = container_of(se_tpg_np,
+ struct iscsi_tpg_np, se_tpg_np);
+ struct iscsi_tpg_np *tpg_np_sctp;
+ ssize_t rb;
+
+ tpg_np_sctp = iscsit_tpg_locate_child_np(tpg_np, ISCSI_SCTP_TCP);
+ if (tpg_np_sctp)
+ rb = sprintf(page, "1\n");
+ else
+ rb = sprintf(page, "0\n");
+
+ return rb;
+}
+
+static ssize_t lio_target_np_store_sctp(
+ struct se_tpg_np *se_tpg_np,
+ const char *page,
+ size_t count)
+{
+ struct iscsi_np *np;
+ struct iscsi_portal_group *tpg;
+ struct iscsi_tpg_np *tpg_np = container_of(se_tpg_np,
+ struct iscsi_tpg_np, se_tpg_np);
+ struct iscsi_tpg_np *tpg_np_sctp = NULL;
+ char *endptr;
+ u32 op;
+ int ret;
+
+ op = simple_strtoul(page, &endptr, 0);
+ if ((op != 1) && (op != 0)) {
+ pr_err("Illegal value for tpg_enable: %u\n", op);
+ return -EINVAL;
+ }
+ np = tpg_np->tpg_np;
+ if (!np) {
+ pr_err("Unable to locate struct iscsi_np from"
+ " struct iscsi_tpg_np\n");
+ return -EINVAL;
+ }
+
+ tpg = tpg_np->tpg;
+ if (iscsit_get_tpg(tpg) < 0)
+ return -EINVAL;
+
+ if (op) {
+ /*
+ * Use existing np->np_sockaddr for SCTP network portal reference
+ */
+ tpg_np_sctp = iscsit_tpg_add_network_portal(tpg, &np->np_sockaddr,
+ np->np_ip, tpg_np, ISCSI_SCTP_TCP);
+ if (!tpg_np_sctp || IS_ERR(tpg_np_sctp))
+ goto out;
+ } else {
+ tpg_np_sctp = iscsit_tpg_locate_child_np(tpg_np, ISCSI_SCTP_TCP);
+ if (!tpg_np_sctp)
+ goto out;
+
+ ret = iscsit_tpg_del_network_portal(tpg, tpg_np_sctp);
+ if (ret < 0)
+ goto out;
+ }
+
+ iscsit_put_tpg(tpg);
+ return count;
+out:
+ iscsit_put_tpg(tpg);
+ return -EINVAL;
+}
+
+TF_NP_BASE_ATTR(lio_target, sctp, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_portal_attrs[] = {
+ &lio_target_np_sctp.attr,
+ NULL,
+};
+
+/* Stop items for lio_target_portal_cit */
+
+/* Start items for lio_target_np_cit */
+
+#define MAX_PORTAL_LEN 256
+
+struct se_tpg_np *lio_target_call_addnptotpg(
+ struct se_portal_group *se_tpg,
+ struct config_group *group,
+ const char *name)
+{
+ struct iscsi_portal_group *tpg;
+ struct iscsi_tpg_np *tpg_np;
+ char *str, *str2, *ip_str, *port_str;
+ struct __kernel_sockaddr_storage sockaddr;
+ struct sockaddr_in *sock_in;
+ struct sockaddr_in6 *sock_in6;
+ unsigned long port;
+ int ret;
+ char buf[MAX_PORTAL_LEN + 1];
+
+ if (strlen(name) > MAX_PORTAL_LEN) {
+ pr_err("strlen(name): %d exceeds MAX_PORTAL_LEN: %d\n",
+ (int)strlen(name), MAX_PORTAL_LEN);
+ return ERR_PTR(-EOVERFLOW);
+ }
+ memset(buf, 0, MAX_PORTAL_LEN + 1);
+ snprintf(buf, MAX_PORTAL_LEN, "%s", name);
+
+ memset(&sockaddr, 0, sizeof(struct __kernel_sockaddr_storage));
+
+ str = strstr(buf, "[");
+ if (str) {
+ const char *end;
+
+ str2 = strstr(str, "]");
+ if (!str2) {
+ pr_err("Unable to locate trailing \"]\""
+ " in IPv6 iSCSI network portal address\n");
+ return ERR_PTR(-EINVAL);
+ }
+ str++; /* Skip over leading "[" */
+ *str2 = '\0'; /* Terminate the IPv6 address */
+ str2++; /* Skip over the "]" */
+ port_str = strstr(str2, ":");
+ if (!port_str) {
+ pr_err("Unable to locate \":port\""
+ " in IPv6 iSCSI network portal address\n");
+ return ERR_PTR(-EINVAL);
+ }
+ *port_str = '\0'; /* Terminate string for IP */
+ port_str++; /* Skip over ":" */
+
+ ret = strict_strtoul(port_str, 0, &port);
+ if (ret < 0) {
+ pr_err("strict_strtoul() failed for port_str: %d\n", ret);
+ return ERR_PTR(ret);
+ }
+ sock_in6 = (struct sockaddr_in6 *)&sockaddr;
+ sock_in6->sin6_family = AF_INET6;
+ sock_in6->sin6_port = htons((unsigned short)port);
+ ret = in6_pton(str, IPV6_ADDRESS_SPACE,
+ (void *)&sock_in6->sin6_addr.in6_u, -1, &end);
+ if (ret <= 0) {
+ pr_err("in6_pton returned: %d\n", ret);
+ return ERR_PTR(-EINVAL);
+ }
+ } else {
+ str = ip_str = &buf[0];
+ port_str = strstr(ip_str, ":");
+ if (!port_str) {
+ pr_err("Unable to locate \":port\""
+ " in IPv4 iSCSI network portal address\n");
+ return ERR_PTR(-EINVAL);
+ }
+ *port_str = '\0'; /* Terminate string for IP */
+ port_str++; /* Skip over ":" */
+
+ ret = strict_strtoul(port_str, 0, &port);
+ if (ret < 0) {
+ pr_err("strict_strtoul() failed for port_str: %d\n", ret);
+ return ERR_PTR(ret);
+ }
+ sock_in = (struct sockaddr_in *)&sockaddr;
+ sock_in->sin_family = AF_INET;
+ sock_in->sin_port = htons((unsigned short)port);
+ sock_in->sin_addr.s_addr = in_aton(ip_str);
+ }
+ tpg = container_of(se_tpg, struct iscsi_portal_group, tpg_se_tpg);
+ ret = iscsit_get_tpg(tpg);
+ if (ret < 0)
+ return ERR_PTR(-EINVAL);
+
+ pr_debug("LIO_Target_ConfigFS: REGISTER -> %s TPGT: %hu"
+ " PORTAL: %s\n",
+ config_item_name(&se_tpg->se_tpg_wwn->wwn_group.cg_item),
+ tpg->tpgt, name);
+ /*
+ * Assume ISCSI_TCP by default. Other network portals for other
+ * iSCSI fabrics:
+ *
+ * Traditional iSCSI over SCTP (initial support)
+ * iSER/TCP (TODO, hardware available)
+ * iSER/SCTP (TODO, software emulation with osc-iwarp)
+ * iSER/IB (TODO, hardware available)
+ *
+ * can be enabled with atributes under
+ * sys/kernel/config/iscsi/$IQN/$TPG/np/$IP:$PORT/
+ *
+ */
+ tpg_np = iscsit_tpg_add_network_portal(tpg, &sockaddr, str, NULL,
+ ISCSI_TCP);
+ if (IS_ERR(tpg_np)) {
+ iscsit_put_tpg(tpg);
+ return ERR_PTR(PTR_ERR(tpg_np));
+ }
+ pr_debug("LIO_Target_ConfigFS: addnptotpg done!\n");
+
+ iscsit_put_tpg(tpg);
+ return &tpg_np->se_tpg_np;
+}
+
+static void lio_target_call_delnpfromtpg(
+ struct se_tpg_np *se_tpg_np)
+{
+ struct iscsi_portal_group *tpg;
+ struct iscsi_tpg_np *tpg_np;
+ struct se_portal_group *se_tpg;
+ int ret;
+
+ tpg_np = container_of(se_tpg_np, struct iscsi_tpg_np, se_tpg_np);
+ tpg = tpg_np->tpg;
+ ret = iscsit_get_tpg(tpg);
+ if (ret < 0)
+ return;
+
+ se_tpg = &tpg->tpg_se_tpg;
+ pr_debug("LIO_Target_ConfigFS: DEREGISTER -> %s TPGT: %hu"
+ " PORTAL: %s:%hu\n", config_item_name(&se_tpg->se_tpg_wwn->wwn_group.cg_item),
+ tpg->tpgt, tpg_np->tpg_np->np_ip, tpg_np->tpg_np->np_port);
+
+ ret = iscsit_tpg_del_network_portal(tpg, tpg_np);
+ if (ret < 0)
+ goto out;
+
+ pr_debug("LIO_Target_ConfigFS: delnpfromtpg done!\n");
+out:
+ iscsit_put_tpg(tpg);
+}
+
+/* End items for lio_target_np_cit */
+
+/* Start items for lio_target_nacl_attrib_cit */
+
+#define DEF_NACL_ATTRIB(name) \
+static ssize_t iscsi_nacl_attrib_show_##name( \
+ struct se_node_acl *se_nacl, \
+ char *page) \
+{ \
+ struct iscsi_node_acl *nacl = container_of(se_nacl, struct iscsi_node_acl, \
+ se_node_acl); \
+ \
+ return sprintf(page, "%u\n", ISCSI_NODE_ATTRIB(nacl)->name); \
+} \
+ \
+static ssize_t iscsi_nacl_attrib_store_##name( \
+ struct se_node_acl *se_nacl, \
+ const char *page, \
+ size_t count) \
+{ \
+ struct iscsi_node_acl *nacl = container_of(se_nacl, struct iscsi_node_acl, \
+ se_node_acl); \
+ char *endptr; \
+ u32 val; \
+ int ret; \
+ \
+ val = simple_strtoul(page, &endptr, 0); \
+ ret = iscsit_na_##name(nacl, val); \
+ if (ret < 0) \
+ return ret; \
+ \
+ return count; \
+}
+
+#define NACL_ATTR(_name, _mode) TF_NACL_ATTRIB_ATTR(iscsi, _name, _mode);
+/*
+ * Define iscsi_node_attrib_s_dataout_timeout
+ */
+DEF_NACL_ATTRIB(dataout_timeout);
+NACL_ATTR(dataout_timeout, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_dataout_timeout_retries
+ */
+DEF_NACL_ATTRIB(dataout_timeout_retries);
+NACL_ATTR(dataout_timeout_retries, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_default_erl
+ */
+DEF_NACL_ATTRIB(default_erl);
+NACL_ATTR(default_erl, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_nopin_timeout
+ */
+DEF_NACL_ATTRIB(nopin_timeout);
+NACL_ATTR(nopin_timeout, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_nopin_response_timeout
+ */
+DEF_NACL_ATTRIB(nopin_response_timeout);
+NACL_ATTR(nopin_response_timeout, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_random_datain_pdu_offsets
+ */
+DEF_NACL_ATTRIB(random_datain_pdu_offsets);
+NACL_ATTR(random_datain_pdu_offsets, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_random_datain_seq_offsets
+ */
+DEF_NACL_ATTRIB(random_datain_seq_offsets);
+NACL_ATTR(random_datain_seq_offsets, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_node_attrib_s_random_r2t_offsets
+ */
+DEF_NACL_ATTRIB(random_r2t_offsets);
+NACL_ATTR(random_r2t_offsets, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_nacl_attrib_attrs[] = {
+ &iscsi_nacl_attrib_dataout_timeout.attr,
+ &iscsi_nacl_attrib_dataout_timeout_retries.attr,
+ &iscsi_nacl_attrib_default_erl.attr,
+ &iscsi_nacl_attrib_nopin_timeout.attr,
+ &iscsi_nacl_attrib_nopin_response_timeout.attr,
+ &iscsi_nacl_attrib_random_datain_pdu_offsets.attr,
+ &iscsi_nacl_attrib_random_datain_seq_offsets.attr,
+ &iscsi_nacl_attrib_random_r2t_offsets.attr,
+ NULL,
+};
+
+/* End items for lio_target_nacl_attrib_cit */
+
+/* Start items for lio_target_nacl_auth_cit */
+
+#define __DEF_NACL_AUTH_STR(prefix, name, flags) \
+static ssize_t __iscsi_##prefix##_show_##name( \
+ struct iscsi_node_acl *nacl, \
+ char *page) \
+{ \
+ struct iscsi_node_auth *auth = &nacl->node_auth; \
+ \
+ if (!capable(CAP_SYS_ADMIN)) \
+ return -EPERM; \
+ return snprintf(page, PAGE_SIZE, "%s\n", auth->name); \
+} \
+ \
+static ssize_t __iscsi_##prefix##_store_##name( \
+ struct iscsi_node_acl *nacl, \
+ const char *page, \
+ size_t count) \
+{ \
+ struct iscsi_node_auth *auth = &nacl->node_auth; \
+ \
+ if (!capable(CAP_SYS_ADMIN)) \
+ return -EPERM; \
+ \
+ snprintf(auth->name, PAGE_SIZE, "%s", page); \
+ if (!strncmp("NULL", auth->name, 4)) \
+ auth->naf_flags &= ~flags; \
+ else \
+ auth->naf_flags |= flags; \
+ \
+ if ((auth->naf_flags & NAF_USERID_IN_SET) && \
+ (auth->naf_flags & NAF_PASSWORD_IN_SET)) \
+ auth->authenticate_target = 1; \
+ else \
+ auth->authenticate_target = 0; \
+ \
+ return count; \
+}
+
+#define __DEF_NACL_AUTH_INT(prefix, name) \
+static ssize_t __iscsi_##prefix##_show_##name( \
+ struct iscsi_node_acl *nacl, \
+ char *page) \
+{ \
+ struct iscsi_node_auth *auth = &nacl->node_auth; \
+ \
+ if (!capable(CAP_SYS_ADMIN)) \
+ return -EPERM; \
+ \
+ return snprintf(page, PAGE_SIZE, "%d\n", auth->name); \
+}
+
+#define DEF_NACL_AUTH_STR(name, flags) \
+ __DEF_NACL_AUTH_STR(nacl_auth, name, flags) \
+static ssize_t iscsi_nacl_auth_show_##name( \
+ struct se_node_acl *nacl, \
+ char *page) \
+{ \
+ return __iscsi_nacl_auth_show_##name(container_of(nacl, \
+ struct iscsi_node_acl, se_node_acl), page); \
+} \
+static ssize_t iscsi_nacl_auth_store_##name( \
+ struct se_node_acl *nacl, \
+ const char *page, \
+ size_t count) \
+{ \
+ return __iscsi_nacl_auth_store_##name(container_of(nacl, \
+ struct iscsi_node_acl, se_node_acl), page, count); \
+}
+
+#define DEF_NACL_AUTH_INT(name) \
+ __DEF_NACL_AUTH_INT(nacl_auth, name) \
+static ssize_t iscsi_nacl_auth_show_##name( \
+ struct se_node_acl *nacl, \
+ char *page) \
+{ \
+ return __iscsi_nacl_auth_show_##name(container_of(nacl, \
+ struct iscsi_node_acl, se_node_acl), page); \
+}
+
+#define AUTH_ATTR(_name, _mode) TF_NACL_AUTH_ATTR(iscsi, _name, _mode);
+#define AUTH_ATTR_RO(_name) TF_NACL_AUTH_ATTR_RO(iscsi, _name);
+
+/*
+ * One-way authentication userid
+ */
+DEF_NACL_AUTH_STR(userid, NAF_USERID_SET);
+AUTH_ATTR(userid, S_IRUGO | S_IWUSR);
+/*
+ * One-way authentication password
+ */
+DEF_NACL_AUTH_STR(password, NAF_PASSWORD_SET);
+AUTH_ATTR(password, S_IRUGO | S_IWUSR);
+/*
+ * Enforce mutual authentication
+ */
+DEF_NACL_AUTH_INT(authenticate_target);
+AUTH_ATTR_RO(authenticate_target);
+/*
+ * Mutual authentication userid
+ */
+DEF_NACL_AUTH_STR(userid_mutual, NAF_USERID_IN_SET);
+AUTH_ATTR(userid_mutual, S_IRUGO | S_IWUSR);
+/*
+ * Mutual authentication password
+ */
+DEF_NACL_AUTH_STR(password_mutual, NAF_PASSWORD_IN_SET);
+AUTH_ATTR(password_mutual, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_nacl_auth_attrs[] = {
+ &iscsi_nacl_auth_userid.attr,
+ &iscsi_nacl_auth_password.attr,
+ &iscsi_nacl_auth_authenticate_target.attr,
+ &iscsi_nacl_auth_userid_mutual.attr,
+ &iscsi_nacl_auth_password_mutual.attr,
+ NULL,
+};
+
+/* End items for lio_target_nacl_auth_cit */
+
+/* Start items for lio_target_nacl_param_cit */
+
+#define DEF_NACL_PARAM(name) \
+static ssize_t iscsi_nacl_param_show_##name( \
+ struct se_node_acl *se_nacl, \
+ char *page) \
+{ \
+ struct iscsi_session *sess; \
+ struct se_session *se_sess; \
+ ssize_t rb; \
+ \
+ spin_lock_bh(&se_nacl->nacl_sess_lock); \
+ se_sess = se_nacl->nacl_sess; \
+ if (!se_sess) { \
+ rb = snprintf(page, PAGE_SIZE, \
+ "No Active iSCSI Session\n"); \
+ } else { \
+ sess = se_sess->fabric_sess_ptr; \
+ rb = snprintf(page, PAGE_SIZE, "%u\n", \
+ (u32)sess->sess_ops->name); \
+ } \
+ spin_unlock_bh(&se_nacl->nacl_sess_lock); \
+ \
+ return rb; \
+}
+
+#define NACL_PARAM_ATTR(_name) TF_NACL_PARAM_ATTR_RO(iscsi, _name);
+
+DEF_NACL_PARAM(MaxConnections);
+NACL_PARAM_ATTR(MaxConnections);
+
+DEF_NACL_PARAM(InitialR2T);
+NACL_PARAM_ATTR(InitialR2T);
+
+DEF_NACL_PARAM(ImmediateData);
+NACL_PARAM_ATTR(ImmediateData);
+
+DEF_NACL_PARAM(MaxBurstLength);
+NACL_PARAM_ATTR(MaxBurstLength);
+
+DEF_NACL_PARAM(FirstBurstLength);
+NACL_PARAM_ATTR(FirstBurstLength);
+
+DEF_NACL_PARAM(DefaultTime2Wait);
+NACL_PARAM_ATTR(DefaultTime2Wait);
+
+DEF_NACL_PARAM(DefaultTime2Retain);
+NACL_PARAM_ATTR(DefaultTime2Retain);
+
+DEF_NACL_PARAM(MaxOutstandingR2T);
+NACL_PARAM_ATTR(MaxOutstandingR2T);
+
+DEF_NACL_PARAM(DataPDUInOrder);
+NACL_PARAM_ATTR(DataPDUInOrder);
+
+DEF_NACL_PARAM(DataSequenceInOrder);
+NACL_PARAM_ATTR(DataSequenceInOrder);
+
+DEF_NACL_PARAM(ErrorRecoveryLevel);
+NACL_PARAM_ATTR(ErrorRecoveryLevel);
+
+static struct configfs_attribute *lio_target_nacl_param_attrs[] = {
+ &iscsi_nacl_param_MaxConnections.attr,
+ &iscsi_nacl_param_InitialR2T.attr,
+ &iscsi_nacl_param_ImmediateData.attr,
+ &iscsi_nacl_param_MaxBurstLength.attr,
+ &iscsi_nacl_param_FirstBurstLength.attr,
+ &iscsi_nacl_param_DefaultTime2Wait.attr,
+ &iscsi_nacl_param_DefaultTime2Retain.attr,
+ &iscsi_nacl_param_MaxOutstandingR2T.attr,
+ &iscsi_nacl_param_DataPDUInOrder.attr,
+ &iscsi_nacl_param_DataSequenceInOrder.attr,
+ &iscsi_nacl_param_ErrorRecoveryLevel.attr,
+ NULL,
+};
+
+/* End items for lio_target_nacl_param_cit */
+
+/* Start items for lio_target_acl_cit */
+
+static ssize_t lio_target_nacl_show_info(
+ struct se_node_acl *se_nacl,
+ char *page)
+{
+ struct iscsi_session *sess;
+ struct iscsi_conn *conn;
+ struct se_session *se_sess;
+ ssize_t rb = 0;
+
+ spin_lock_bh(&se_nacl->nacl_sess_lock);
+ se_sess = se_nacl->nacl_sess;
+ if (!se_sess) {
+ rb += sprintf(page+rb, "No active iSCSI Session for Initiator"
+ " Endpoint: %s\n", se_nacl->initiatorname);
+ } else {
+ sess = se_sess->fabric_sess_ptr;
+
+ if (sess->sess_ops->InitiatorName)
+ rb += sprintf(page+rb, "InitiatorName: %s\n",
+ sess->sess_ops->InitiatorName);
+ if (sess->sess_ops->InitiatorAlias)
+ rb += sprintf(page+rb, "InitiatorAlias: %s\n",
+ sess->sess_ops->InitiatorAlias);
+
+ rb += sprintf(page+rb, "LIO Session ID: %u "
+ "ISID: 0x%02x %02x %02x %02x %02x %02x "
+ "TSIH: %hu ", sess->sid,
+ sess->isid[0], sess->isid[1], sess->isid[2],
+ sess->isid[3], sess->isid[4], sess->isid[5],
+ sess->tsih);
+ rb += sprintf(page+rb, "SessionType: %s\n",
+ (sess->sess_ops->SessionType) ?
+ "Discovery" : "Normal");
+ rb += sprintf(page+rb, "Session State: ");
+ switch (sess->session_state) {
+ case TARG_SESS_STATE_FREE:
+ rb += sprintf(page+rb, "TARG_SESS_FREE\n");
+ break;
+ case TARG_SESS_STATE_ACTIVE:
+ rb += sprintf(page+rb, "TARG_SESS_STATE_ACTIVE\n");
+ break;
+ case TARG_SESS_STATE_LOGGED_IN:
+ rb += sprintf(page+rb, "TARG_SESS_STATE_LOGGED_IN\n");
+ break;
+ case TARG_SESS_STATE_FAILED:
+ rb += sprintf(page+rb, "TARG_SESS_STATE_FAILED\n");
+ break;
+ case TARG_SESS_STATE_IN_CONTINUE:
+ rb += sprintf(page+rb, "TARG_SESS_STATE_IN_CONTINUE\n");
+ break;
+ default:
+ rb += sprintf(page+rb, "ERROR: Unknown Session"
+ " State!\n");
+ break;
+ }
+
+ rb += sprintf(page+rb, "---------------------[iSCSI Session"
+ " Values]-----------------------\n");
+ rb += sprintf(page+rb, " CmdSN/WR : CmdSN/WC : ExpCmdSN"
+ " : MaxCmdSN : ITT : TTT\n");
+ rb += sprintf(page+rb, " 0x%08x 0x%08x 0x%08x 0x%08x"
+ " 0x%08x 0x%08x\n",
+ sess->cmdsn_window,
+ (sess->max_cmd_sn - sess->exp_cmd_sn) + 1,
+ sess->exp_cmd_sn, sess->max_cmd_sn,
+ sess->init_task_tag, sess->targ_xfer_tag);
+ rb += sprintf(page+rb, "----------------------[iSCSI"
+ " Connections]-------------------------\n");
+
+ spin_lock(&sess->conn_lock);
+ list_for_each_entry(conn, &sess->sess_conn_list, conn_list) {
+ rb += sprintf(page+rb, "CID: %hu Connection"
+ " State: ", conn->cid);
+ switch (conn->conn_state) {
+ case TARG_CONN_STATE_FREE:
+ rb += sprintf(page+rb,
+ "TARG_CONN_STATE_FREE\n");
+ break;
+ case TARG_CONN_STATE_XPT_UP:
+ rb += sprintf(page+rb,
+ "TARG_CONN_STATE_XPT_UP\n");
+ break;
+ case TARG_CONN_STATE_IN_LOGIN:
+ rb += sprintf(page+rb,
+ "TARG_CONN_STATE_IN_LOGIN\n");
+ break;
+ case TARG_CONN_STATE_LOGGED_IN:
+ rb += sprintf(page+rb,
+ "TARG_CONN_STATE_LOGGED_IN\n");
+ break;
+ case TARG_CONN_STATE_IN_LOGOUT:
+ rb += sprintf(page+rb,
+ "TARG_CONN_STATE_IN_LOGOUT\n");
+ break;
+ case TARG_CONN_STATE_LOGOUT_REQUESTED:
+ rb += sprintf(page+rb,
+ "TARG_CONN_STATE_LOGOUT_REQUESTED\n");
+ break;
+ case TARG_CONN_STATE_CLEANUP_WAIT:
+ rb += sprintf(page+rb,
+ "TARG_CONN_STATE_CLEANUP_WAIT\n");
+ break;
+ default:
+ rb += sprintf(page+rb,
+ "ERROR: Unknown Connection State!\n");
+ break;
+ }
+
+ rb += sprintf(page+rb, " Address %s %s", conn->login_ip,
+ (conn->network_transport == ISCSI_TCP) ?
+ "TCP" : "SCTP");
+ rb += sprintf(page+rb, " StatSN: 0x%08x\n",
+ conn->stat_sn);
+ }
+ spin_unlock(&sess->conn_lock);
+ }
+ spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+ return rb;
+}
+
+TF_NACL_BASE_ATTR_RO(lio_target, info);
+
+static ssize_t lio_target_nacl_show_cmdsn_depth(
+ struct se_node_acl *se_nacl,
+ char *page)
+{
+ return sprintf(page, "%u\n", se_nacl->queue_depth);
+}
+
+static ssize_t lio_target_nacl_store_cmdsn_depth(
+ struct se_node_acl *se_nacl,
+ const char *page,
+ size_t count)
+{
+ struct se_portal_group *se_tpg = se_nacl->se_tpg;
+ struct iscsi_portal_group *tpg = container_of(se_tpg,
+ struct iscsi_portal_group, tpg_se_tpg);
+ struct config_item *acl_ci, *tpg_ci, *wwn_ci;
+ char *endptr;
+ u32 cmdsn_depth = 0;
+ int ret;
+
+ cmdsn_depth = simple_strtoul(page, &endptr, 0);
+ if (cmdsn_depth > TA_DEFAULT_CMDSN_DEPTH_MAX) {
+ pr_err("Passed cmdsn_depth: %u exceeds"
+ " TA_DEFAULT_CMDSN_DEPTH_MAX: %u\n", cmdsn_depth,
+ TA_DEFAULT_CMDSN_DEPTH_MAX);
+ return -EINVAL;
+ }
+ acl_ci = &se_nacl->acl_group.cg_item;
+ if (!acl_ci) {
+ pr_err("Unable to locatel acl_ci\n");
+ return -EINVAL;
+ }
+ tpg_ci = &acl_ci->ci_parent->ci_group->cg_item;
+ if (!tpg_ci) {
+ pr_err("Unable to locate tpg_ci\n");
+ return -EINVAL;
+ }
+ wwn_ci = &tpg_ci->ci_group->cg_item;
+ if (!wwn_ci) {
+ pr_err("Unable to locate config_item wwn_ci\n");
+ return -EINVAL;
+ }
+
+ if (iscsit_get_tpg(tpg) < 0)
+ return -EINVAL;
+ /*
+ * iscsit_tpg_set_initiator_node_queue_depth() assumes force=1
+ */
+ ret = iscsit_tpg_set_initiator_node_queue_depth(tpg,
+ config_item_name(acl_ci), cmdsn_depth, 1);
+
+ pr_debug("LIO_Target_ConfigFS: %s/%s Set CmdSN Window: %u for"
+ "InitiatorName: %s\n", config_item_name(wwn_ci),
+ config_item_name(tpg_ci), cmdsn_depth,
+ config_item_name(acl_ci));
+
+ iscsit_put_tpg(tpg);
+ return (!ret) ? count : (ssize_t)ret;
+}
+
+TF_NACL_BASE_ATTR(lio_target, cmdsn_depth, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_initiator_attrs[] = {
+ &lio_target_nacl_info.attr,
+ &lio_target_nacl_cmdsn_depth.attr,
+ NULL,
+};
+
+static struct se_node_acl *lio_tpg_alloc_fabric_acl(
+ struct se_portal_group *se_tpg)
+{
+ struct iscsi_node_acl *acl;
+
+ acl = kzalloc(sizeof(struct iscsi_node_acl), GFP_KERNEL);
+ if (!acl) {
+ pr_err("Unable to allocate memory for struct iscsi_node_acl\n");
+ return NULL;
+ }
+
+ return &acl->se_node_acl;
+}
+
+static struct se_node_acl *lio_target_make_nodeacl(
+ struct se_portal_group *se_tpg,
+ struct config_group *group,
+ const char *name)
+{
+ struct config_group *stats_cg;
+ struct iscsi_node_acl *acl;
+ struct se_node_acl *se_nacl_new, *se_nacl;
+ struct iscsi_portal_group *tpg = container_of(se_tpg,
+ struct iscsi_portal_group, tpg_se_tpg);
+ u32 cmdsn_depth;
+
+ se_nacl_new = lio_tpg_alloc_fabric_acl(se_tpg);
+ if (!se_nacl_new)
+ return ERR_PTR(-ENOMEM);
+
+ acl = container_of(se_nacl_new, struct iscsi_node_acl,
+ se_node_acl);
+
+ cmdsn_depth = ISCSI_TPG_ATTRIB(tpg)->default_cmdsn_depth;
+ /*
+ * se_nacl_new may be released by core_tpg_add_initiator_node_acl()
+ * when converting a NdoeACL from demo mode -> explict
+ */
+ se_nacl = core_tpg_add_initiator_node_acl(se_tpg, se_nacl_new,
+ name, cmdsn_depth);
+ if (IS_ERR(se_nacl))
+ return se_nacl;
+
+ stats_cg = &acl->se_node_acl.acl_fabric_stat_group;
+
+ stats_cg->default_groups = kzalloc(sizeof(struct config_group) * 2,
+ GFP_KERNEL);
+ if (!stats_cg->default_groups) {
+ pr_err("Unable to allocate memory for"
+ " stats_cg->default_groups\n");
+ core_tpg_del_initiator_node_acl(se_tpg, se_nacl, 1);
+ kfree(acl);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ stats_cg->default_groups[0] = &NODE_STAT_GRPS(acl)->iscsi_sess_stats_group;
+ stats_cg->default_groups[1] = NULL;
+ config_group_init_type_name(&NODE_STAT_GRPS(acl)->iscsi_sess_stats_group,
+ "iscsi_sess_stats", &iscsi_stat_sess_cit);
+
+ return se_nacl;
+}
+
+static void lio_target_drop_nodeacl(
+ struct se_node_acl *se_nacl)
+{
+ struct se_portal_group *se_tpg = se_nacl->se_tpg;
+ struct iscsi_node_acl *acl = container_of(se_nacl,
+ struct iscsi_node_acl, se_node_acl);
+ struct config_item *df_item;
+ struct config_group *stats_cg;
+ int i;
+
+ stats_cg = &acl->se_node_acl.acl_fabric_stat_group;
+ for (i = 0; stats_cg->default_groups[i]; i++) {
+ df_item = &stats_cg->default_groups[i]->cg_item;
+ stats_cg->default_groups[i] = NULL;
+ config_item_put(df_item);
+ }
+ kfree(stats_cg->default_groups);
+
+ core_tpg_del_initiator_node_acl(se_tpg, se_nacl, 1);
+ kfree(acl);
+}
+
+/* End items for lio_target_acl_cit */
+
+/* Start items for lio_target_tpg_attrib_cit */
+
+#define DEF_TPG_ATTRIB(name) \
+ \
+static ssize_t iscsi_tpg_attrib_show_##name( \
+ struct se_portal_group *se_tpg, \
+ char *page) \
+{ \
+ struct iscsi_portal_group *tpg = container_of(se_tpg, \
+ struct iscsi_portal_group, tpg_se_tpg); \
+ ssize_t rb; \
+ \
+ if (iscsit_get_tpg(tpg) < 0) \
+ return -EINVAL; \
+ \
+ rb = sprintf(page, "%u\n", ISCSI_TPG_ATTRIB(tpg)->name); \
+ iscsit_put_tpg(tpg); \
+ return rb; \
+} \
+ \
+static ssize_t iscsi_tpg_attrib_store_##name( \
+ struct se_portal_group *se_tpg, \
+ const char *page, \
+ size_t count) \
+{ \
+ struct iscsi_portal_group *tpg = container_of(se_tpg, \
+ struct iscsi_portal_group, tpg_se_tpg); \
+ char *endptr; \
+ u32 val; \
+ int ret; \
+ \
+ if (iscsit_get_tpg(tpg) < 0) \
+ return -EINVAL; \
+ \
+ val = simple_strtoul(page, &endptr, 0); \
+ ret = iscsit_ta_##name(tpg, val); \
+ if (ret < 0) \
+ goto out; \
+ \
+ iscsit_put_tpg(tpg); \
+ return count; \
+out: \
+ iscsit_put_tpg(tpg); \
+ return ret; \
+}
+
+#define TPG_ATTR(_name, _mode) TF_TPG_ATTRIB_ATTR(iscsi, _name, _mode);
+
+/*
+ * Define iscsi_tpg_attrib_s_authentication
+ */
+DEF_TPG_ATTRIB(authentication);
+TPG_ATTR(authentication, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_login_timeout
+ */
+DEF_TPG_ATTRIB(login_timeout);
+TPG_ATTR(login_timeout, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_netif_timeout
+ */
+DEF_TPG_ATTRIB(netif_timeout);
+TPG_ATTR(netif_timeout, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_generate_node_acls
+ */
+DEF_TPG_ATTRIB(generate_node_acls);
+TPG_ATTR(generate_node_acls, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_default_cmdsn_depth
+ */
+DEF_TPG_ATTRIB(default_cmdsn_depth);
+TPG_ATTR(default_cmdsn_depth, S_IRUGO | S_IWUSR);
+/*
+ Define iscsi_tpg_attrib_s_cache_dynamic_acls
+ */
+DEF_TPG_ATTRIB(cache_dynamic_acls);
+TPG_ATTR(cache_dynamic_acls, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_demo_mode_write_protect
+ */
+DEF_TPG_ATTRIB(demo_mode_write_protect);
+TPG_ATTR(demo_mode_write_protect, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_prod_mode_write_protect
+ */
+DEF_TPG_ATTRIB(prod_mode_write_protect);
+TPG_ATTR(prod_mode_write_protect, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = {
+ &iscsi_tpg_attrib_authentication.attr,
+ &iscsi_tpg_attrib_login_timeout.attr,
+ &iscsi_tpg_attrib_netif_timeout.attr,
+ &iscsi_tpg_attrib_generate_node_acls.attr,
+ &iscsi_tpg_attrib_default_cmdsn_depth.attr,
+ &iscsi_tpg_attrib_cache_dynamic_acls.attr,
+ &iscsi_tpg_attrib_demo_mode_write_protect.attr,
+ &iscsi_tpg_attrib_prod_mode_write_protect.attr,
+ NULL,
+};
+
+/* End items for lio_target_tpg_attrib_cit */
+
+/* Start items for lio_target_tpg_param_cit */
+
+#define DEF_TPG_PARAM(name) \
+static ssize_t iscsi_tpg_param_show_##name( \
+ struct se_portal_group *se_tpg, \
+ char *page) \
+{ \
+ struct iscsi_portal_group *tpg = container_of(se_tpg, \
+ struct iscsi_portal_group, tpg_se_tpg); \
+ struct iscsi_param *param; \
+ ssize_t rb; \
+ \
+ if (iscsit_get_tpg(tpg) < 0) \
+ return -EINVAL; \
+ \
+ param = iscsi_find_param_from_key(__stringify(name), \
+ tpg->param_list); \
+ if (!param) { \
+ iscsit_put_tpg(tpg); \
+ return -EINVAL; \
+ } \
+ rb = snprintf(page, PAGE_SIZE, "%s\n", param->value); \
+ \
+ iscsit_put_tpg(tpg); \
+ return rb; \
+} \
+static ssize_t iscsi_tpg_param_store_##name( \
+ struct se_portal_group *se_tpg, \
+ const char *page, \
+ size_t count) \
+{ \
+ struct iscsi_portal_group *tpg = container_of(se_tpg, \
+ struct iscsi_portal_group, tpg_se_tpg); \
+ char *buf; \
+ int ret; \
+ \
+ buf = kzalloc(PAGE_SIZE, GFP_KERNEL); \
+ if (!buf) \
+ return -ENOMEM; \
+ snprintf(buf, PAGE_SIZE, "%s=%s", __stringify(name), page); \
+ buf[strlen(buf)-1] = '\0'; /* Kill newline */ \
+ \
+ if (iscsit_get_tpg(tpg) < 0) { \
+ kfree(buf); \
+ return -EINVAL; \
+ } \
+ \
+ ret = iscsi_change_param_value(buf, tpg->param_list, 1); \
+ if (ret < 0) \
+ goto out; \
+ \
+ kfree(buf); \
+ iscsit_put_tpg(tpg); \
+ return count; \
+out: \
+ kfree(buf); \
+ iscsit_put_tpg(tpg); \
+ return -EINVAL; \
+}
+
+#define TPG_PARAM_ATTR(_name, _mode) TF_TPG_PARAM_ATTR(iscsi, _name, _mode);
+
+DEF_TPG_PARAM(AuthMethod);
+TPG_PARAM_ATTR(AuthMethod, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(HeaderDigest);
+TPG_PARAM_ATTR(HeaderDigest, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(DataDigest);
+TPG_PARAM_ATTR(DataDigest, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(MaxConnections);
+TPG_PARAM_ATTR(MaxConnections, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(TargetAlias);
+TPG_PARAM_ATTR(TargetAlias, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(InitialR2T);
+TPG_PARAM_ATTR(InitialR2T, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(ImmediateData);
+TPG_PARAM_ATTR(ImmediateData, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(MaxRecvDataSegmentLength);
+TPG_PARAM_ATTR(MaxRecvDataSegmentLength, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(MaxBurstLength);
+TPG_PARAM_ATTR(MaxBurstLength, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(FirstBurstLength);
+TPG_PARAM_ATTR(FirstBurstLength, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(DefaultTime2Wait);
+TPG_PARAM_ATTR(DefaultTime2Wait, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(DefaultTime2Retain);
+TPG_PARAM_ATTR(DefaultTime2Retain, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(MaxOutstandingR2T);
+TPG_PARAM_ATTR(MaxOutstandingR2T, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(DataPDUInOrder);
+TPG_PARAM_ATTR(DataPDUInOrder, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(DataSequenceInOrder);
+TPG_PARAM_ATTR(DataSequenceInOrder, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(ErrorRecoveryLevel);
+TPG_PARAM_ATTR(ErrorRecoveryLevel, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(IFMarker);
+TPG_PARAM_ATTR(IFMarker, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(OFMarker);
+TPG_PARAM_ATTR(OFMarker, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(IFMarkInt);
+TPG_PARAM_ATTR(IFMarkInt, S_IRUGO | S_IWUSR);
+
+DEF_TPG_PARAM(OFMarkInt);
+TPG_PARAM_ATTR(OFMarkInt, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_tpg_param_attrs[] = {
+ &iscsi_tpg_param_AuthMethod.attr,
+ &iscsi_tpg_param_HeaderDigest.attr,
+ &iscsi_tpg_param_DataDigest.attr,
+ &iscsi_tpg_param_MaxConnections.attr,
+ &iscsi_tpg_param_TargetAlias.attr,
+ &iscsi_tpg_param_InitialR2T.attr,
+ &iscsi_tpg_param_ImmediateData.attr,
+ &iscsi_tpg_param_MaxRecvDataSegmentLength.attr,
+ &iscsi_tpg_param_MaxBurstLength.attr,
+ &iscsi_tpg_param_FirstBurstLength.attr,
+ &iscsi_tpg_param_DefaultTime2Wait.attr,
+ &iscsi_tpg_param_DefaultTime2Retain.attr,
+ &iscsi_tpg_param_MaxOutstandingR2T.attr,
+ &iscsi_tpg_param_DataPDUInOrder.attr,
+ &iscsi_tpg_param_DataSequenceInOrder.attr,
+ &iscsi_tpg_param_ErrorRecoveryLevel.attr,
+ &iscsi_tpg_param_IFMarker.attr,
+ &iscsi_tpg_param_OFMarker.attr,
+ &iscsi_tpg_param_IFMarkInt.attr,
+ &iscsi_tpg_param_OFMarkInt.attr,
+ NULL,
+};
+
+/* End items for lio_target_tpg_param_cit */
+
+/* Start items for lio_target_tpg_cit */
+
+static ssize_t lio_target_tpg_show_enable(
+ struct se_portal_group *se_tpg,
+ char *page)
+{
+ struct iscsi_portal_group *tpg = container_of(se_tpg,
+ struct iscsi_portal_group, tpg_se_tpg);
+ ssize_t len;
+
+ spin_lock(&tpg->tpg_state_lock);
+ len = sprintf(page, "%d\n",
+ (tpg->tpg_state == TPG_STATE_ACTIVE) ? 1 : 0);
+ spin_unlock(&tpg->tpg_state_lock);
+
+ return len;
+}
+
+static ssize_t lio_target_tpg_store_enable(
+ struct se_portal_group *se_tpg,
+ const char *page,
+ size_t count)
+{
+ struct iscsi_portal_group *tpg = container_of(se_tpg,
+ struct iscsi_portal_group, tpg_se_tpg);
+ char *endptr;
+ u32 op;
+ int ret = 0;
+
+ op = simple_strtoul(page, &endptr, 0);
+ if ((op != 1) && (op != 0)) {
+ pr_err("Illegal value for tpg_enable: %u\n", op);
+ return -EINVAL;
+ }
+
+ ret = iscsit_get_tpg(tpg);
+ if (ret < 0)
+ return -EINVAL;
+
+ if (op) {
+ ret = iscsit_tpg_enable_portal_group(tpg);
+ if (ret < 0)
+ goto out;
+ } else {
+ /*
+ * iscsit_tpg_disable_portal_group() assumes force=1
+ */
+ ret = iscsit_tpg_disable_portal_group(tpg, 1);
+ if (ret < 0)
+ goto out;
+ }
+
+ iscsit_put_tpg(tpg);
+ return count;
+out:
+ iscsit_put_tpg(tpg);
+ return -EINVAL;
+}
+
+TF_TPG_BASE_ATTR(lio_target, enable, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_tpg_attrs[] = {
+ &lio_target_tpg_enable.attr,
+ NULL,
+};
+
+/* End items for lio_target_tpg_cit */
+
+/* Start items for lio_target_tiqn_cit */
+
+struct se_portal_group *lio_target_tiqn_addtpg(
+ struct se_wwn *wwn,
+ struct config_group *group,
+ const char *name)
+{
+ struct iscsi_portal_group *tpg;
+ struct iscsi_tiqn *tiqn;
+ char *tpgt_str, *end_ptr;
+ int ret = 0;
+ unsigned short int tpgt;
+
+ tiqn = container_of(wwn, struct iscsi_tiqn, tiqn_wwn);
+ /*
+ * Only tpgt_# directory groups can be created below
+ * target/iscsi/iqn.superturodiskarry/
+ */
+ tpgt_str = strstr(name, "tpgt_");
+ if (!tpgt_str) {
+ pr_err("Unable to locate \"tpgt_#\" directory"
+ " group\n");
+ return NULL;
+ }
+ tpgt_str += 5; /* Skip ahead of "tpgt_" */
+ tpgt = (unsigned short int) simple_strtoul(tpgt_str, &end_ptr, 0);
+
+ tpg = iscsit_alloc_portal_group(tiqn, tpgt);
+ if (!tpg)
+ return NULL;
+
+ ret = core_tpg_register(
+ &lio_target_fabric_configfs->tf_ops,
+ wwn, &tpg->tpg_se_tpg, (void *)tpg,
+ TRANSPORT_TPG_TYPE_NORMAL);
+ if (ret < 0)
+ return NULL;
+
+ ret = iscsit_tpg_add_portal_group(tiqn, tpg);
+ if (ret != 0)
+ goto out;
+
+ pr_debug("LIO_Target_ConfigFS: REGISTER -> %s\n", tiqn->tiqn);
+ pr_debug("LIO_Target_ConfigFS: REGISTER -> Allocated TPG: %s\n",
+ name);
+ return &tpg->tpg_se_tpg;
+out:
+ core_tpg_deregister(&tpg->tpg_se_tpg);
+ kfree(tpg);
+ return NULL;
+}
+
+void lio_target_tiqn_deltpg(struct se_portal_group *se_tpg)
+{
+ struct iscsi_portal_group *tpg;
+ struct iscsi_tiqn *tiqn;
+
+ tpg = container_of(se_tpg, struct iscsi_portal_group, tpg_se_tpg);
+ tiqn = tpg->tpg_tiqn;
+ /*
+ * iscsit_tpg_del_portal_group() assumes force=1
+ */
+ pr_debug("LIO_Target_ConfigFS: DEREGISTER -> Releasing TPG\n");
+ iscsit_tpg_del_portal_group(tiqn, tpg, 1);
+}
+
+/* End items for lio_target_tiqn_cit */
+
+/* Start LIO-Target TIQN struct contig_item lio_target_cit */
+
+static ssize_t lio_target_wwn_show_attr_lio_version(
+ struct target_fabric_configfs *tf,
+ char *page)
+{
+ return sprintf(page, "RisingTide Systems Linux-iSCSI Target "ISCSIT_VERSION"\n");
+}
+
+TF_WWN_ATTR_RO(lio_target, lio_version);
+
+static struct configfs_attribute *lio_target_wwn_attrs[] = {
+ &lio_target_wwn_lio_version.attr,
+ NULL,
+};
+
+struct se_wwn *lio_target_call_coreaddtiqn(
+ struct target_fabric_configfs *tf,
+ struct config_group *group,
+ const char *name)
+{
+ struct config_group *stats_cg;
+ struct iscsi_tiqn *tiqn;
+
+ tiqn = iscsit_add_tiqn((unsigned char *)name);
+ if (IS_ERR(tiqn))
+ return ERR_PTR(PTR_ERR(tiqn));
+ /*
+ * Setup struct iscsi_wwn_stat_grps for se_wwn->fabric_stat_group.
+ */
+ stats_cg = &tiqn->tiqn_wwn.fabric_stat_group;
+
+ stats_cg->default_groups = kzalloc(sizeof(struct config_group) * 6,
+ GFP_KERNEL);
+ if (!stats_cg->default_groups) {
+ pr_err("Unable to allocate memory for"
+ " stats_cg->default_groups\n");
+ iscsit_del_tiqn(tiqn);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ stats_cg->default_groups[0] = &WWN_STAT_GRPS(tiqn)->iscsi_instance_group;
+ stats_cg->default_groups[1] = &WWN_STAT_GRPS(tiqn)->iscsi_sess_err_group;
+ stats_cg->default_groups[2] = &WWN_STAT_GRPS(tiqn)->iscsi_tgt_attr_group;
+ stats_cg->default_groups[3] = &WWN_STAT_GRPS(tiqn)->iscsi_login_stats_group;
+ stats_cg->default_groups[4] = &WWN_STAT_GRPS(tiqn)->iscsi_logout_stats_group;
+ stats_cg->default_groups[5] = NULL;
+ config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_instance_group,
+ "iscsi_instance", &iscsi_stat_instance_cit);
+ config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_sess_err_group,
+ "iscsi_sess_err", &iscsi_stat_sess_err_cit);
+ config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_tgt_attr_group,
+ "iscsi_tgt_attr", &iscsi_stat_tgt_attr_cit);
+ config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_login_stats_group,
+ "iscsi_login_stats", &iscsi_stat_login_cit);
+ config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_logout_stats_group,
+ "iscsi_logout_stats", &iscsi_stat_logout_cit);
+
+ pr_debug("LIO_Target_ConfigFS: REGISTER -> %s\n", tiqn->tiqn);
+ pr_debug("LIO_Target_ConfigFS: REGISTER -> Allocated Node:"
+ " %s\n", name);
+ return &tiqn->tiqn_wwn;
+}
+
+void lio_target_call_coredeltiqn(
+ struct se_wwn *wwn)
+{
+ struct iscsi_tiqn *tiqn = container_of(wwn, struct iscsi_tiqn, tiqn_wwn);
+ struct config_item *df_item;
+ struct config_group *stats_cg;
+ int i;
+
+ stats_cg = &tiqn->tiqn_wwn.fabric_stat_group;
+ for (i = 0; stats_cg->default_groups[i]; i++) {
+ df_item = &stats_cg->default_groups[i]->cg_item;
+ stats_cg->default_groups[i] = NULL;
+ config_item_put(df_item);
+ }
+ kfree(stats_cg->default_groups);
+
+ pr_debug("LIO_Target_ConfigFS: DEREGISTER -> %s\n",
+ tiqn->tiqn);
+ iscsit_del_tiqn(tiqn);
+}
+
+/* End LIO-Target TIQN struct contig_lio_target_cit */
+
+/* Start lio_target_discovery_auth_cit */
+
+#define DEF_DISC_AUTH_STR(name, flags) \
+ __DEF_NACL_AUTH_STR(disc, name, flags) \
+static ssize_t iscsi_disc_show_##name( \
+ struct target_fabric_configfs *tf, \
+ char *page) \
+{ \
+ return __iscsi_disc_show_##name(&iscsit_global->discovery_acl, \
+ page); \
+} \
+static ssize_t iscsi_disc_store_##name( \
+ struct target_fabric_configfs *tf, \
+ const char *page, \
+ size_t count) \
+{ \
+ return __iscsi_disc_store_##name(&iscsit_global->discovery_acl, \
+ page, count); \
+}
+
+#define DEF_DISC_AUTH_INT(name) \
+ __DEF_NACL_AUTH_INT(disc, name) \
+static ssize_t iscsi_disc_show_##name( \
+ struct target_fabric_configfs *tf, \
+ char *page) \
+{ \
+ return __iscsi_disc_show_##name(&iscsit_global->discovery_acl, \
+ page); \
+}
+
+#define DISC_AUTH_ATTR(_name, _mode) TF_DISC_ATTR(iscsi, _name, _mode)
+#define DISC_AUTH_ATTR_RO(_name) TF_DISC_ATTR_RO(iscsi, _name)
+
+/*
+ * One-way authentication userid
+ */
+DEF_DISC_AUTH_STR(userid, NAF_USERID_SET);
+DISC_AUTH_ATTR(userid, S_IRUGO | S_IWUSR);
+/*
+ * One-way authentication password
+ */
+DEF_DISC_AUTH_STR(password, NAF_PASSWORD_SET);
+DISC_AUTH_ATTR(password, S_IRUGO | S_IWUSR);
+/*
+ * Enforce mutual authentication
+ */
+DEF_DISC_AUTH_INT(authenticate_target);
+DISC_AUTH_ATTR_RO(authenticate_target);
+/*
+ * Mutual authentication userid
+ */
+DEF_DISC_AUTH_STR(userid_mutual, NAF_USERID_IN_SET);
+DISC_AUTH_ATTR(userid_mutual, S_IRUGO | S_IWUSR);
+/*
+ * Mutual authentication password
+ */
+DEF_DISC_AUTH_STR(password_mutual, NAF_PASSWORD_IN_SET);
+DISC_AUTH_ATTR(password_mutual, S_IRUGO | S_IWUSR);
+
+/*
+ * enforce_discovery_auth
+ */
+static ssize_t iscsi_disc_show_enforce_discovery_auth(
+ struct target_fabric_configfs *tf,
+ char *page)
+{
+ struct iscsi_node_auth *discovery_auth = &iscsit_global->discovery_acl.node_auth;
+
+ return sprintf(page, "%d\n", discovery_auth->enforce_discovery_auth);
+}
+
+static ssize_t iscsi_disc_store_enforce_discovery_auth(
+ struct target_fabric_configfs *tf,
+ const char *page,
+ size_t count)
+{
+ struct iscsi_param *param;
+ struct iscsi_portal_group *discovery_tpg = iscsit_global->discovery_tpg;
+ char *endptr;
+ u32 op;
+
+ op = simple_strtoul(page, &endptr, 0);
+ if ((op != 1) && (op != 0)) {
+ pr_err("Illegal value for enforce_discovery_auth:"
+ " %u\n", op);
+ return -EINVAL;
+ }
+
+ if (!discovery_tpg) {
+ pr_err("iscsit_global->discovery_tpg is NULL\n");
+ return -EINVAL;
+ }
+
+ param = iscsi_find_param_from_key(AUTHMETHOD,
+ discovery_tpg->param_list);
+ if (!param)
+ return -EINVAL;
+
+ if (op) {
+ /*
+ * Reset the AuthMethod key to CHAP.
+ */
+ if (iscsi_update_param_value(param, CHAP) < 0)
+ return -EINVAL;
+
+ discovery_tpg->tpg_attrib.authentication = 1;
+ iscsit_global->discovery_acl.node_auth.enforce_discovery_auth = 1;
+ pr_debug("LIO-CORE[0] Successfully enabled"
+ " authentication enforcement for iSCSI"
+ " Discovery TPG\n");
+ } else {
+ /*
+ * Reset the AuthMethod key to CHAP,None
+ */
+ if (iscsi_update_param_value(param, "CHAP,None") < 0)
+ return -EINVAL;
+
+ discovery_tpg->tpg_attrib.authentication = 0;
+ iscsit_global->discovery_acl.node_auth.enforce_discovery_auth = 0;
+ pr_debug("LIO-CORE[0] Successfully disabled"
+ " authentication enforcement for iSCSI"
+ " Discovery TPG\n");
+ }
+
+ return count;
+}
+
+DISC_AUTH_ATTR(enforce_discovery_auth, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *lio_target_discovery_auth_attrs[] = {
+ &iscsi_disc_userid.attr,
+ &iscsi_disc_password.attr,
+ &iscsi_disc_authenticate_target.attr,
+ &iscsi_disc_userid_mutual.attr,
+ &iscsi_disc_password_mutual.attr,
+ &iscsi_disc_enforce_discovery_auth.attr,
+ NULL,
+};
+
+/* End lio_target_discovery_auth_cit */
+
+/* Start functions for target_core_fabric_ops */
+
+static char *iscsi_get_fabric_name(void)
+{
+ return "iSCSI";
+}
+
+static u32 iscsi_get_task_tag(struct se_cmd *se_cmd)
+{
+ struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+ return cmd->init_task_tag;
+}
+
+static int iscsi_get_cmd_state(struct se_cmd *se_cmd)
+{
+ struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+ return cmd->i_state;
+}
+
+static int iscsi_is_state_remove(struct se_cmd *se_cmd)
+{
+ struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+ return (cmd->i_state == ISTATE_REMOVE);
+}
+
+static int lio_sess_logged_in(struct se_session *se_sess)
+{
+ struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+ int ret;
+ /*
+ * Called with spin_lock_bh(&tpg_lock); and
+ * spin_lock(&se_tpg->session_lock); held.
+ */
+ spin_lock(&sess->conn_lock);
+ ret = (sess->session_state != TARG_SESS_STATE_LOGGED_IN);
+ spin_unlock(&sess->conn_lock);
+
+ return ret;
+}
+
+static u32 lio_sess_get_index(struct se_session *se_sess)
+{
+ struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+
+ return sess->session_index;
+}
+
+static u32 lio_sess_get_initiator_sid(
+ struct se_session *se_sess,
+ unsigned char *buf,
+ u32 size)
+{
+ struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+ /*
+ * iSCSI Initiator Session Identifier from RFC-3720.
+ */
+ return snprintf(buf, size, "%02x%02x%02x%02x%02x%02x",
+ sess->isid[0], sess->isid[1], sess->isid[2],
+ sess->isid[3], sess->isid[4], sess->isid[5]);
+}
+
+static int lio_queue_data_in(struct se_cmd *se_cmd)
+{
+ struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+ cmd->i_state = ISTATE_SEND_DATAIN;
+ iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state);
+ return 0;
+}
+
+static int lio_write_pending(struct se_cmd *se_cmd)
+{
+ struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+ if (!cmd->immediate_data && !cmd->unsolicited_data)
+ return iscsit_build_r2ts_for_cmd(cmd, cmd->conn, 1);
+
+ return 0;
+}
+
+static int lio_write_pending_status(struct se_cmd *se_cmd)
+{
+ struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+ int ret;
+
+ spin_lock_bh(&cmd->istate_lock);
+ ret = !(cmd->cmd_flags & ICF_GOT_LAST_DATAOUT);
+ spin_unlock_bh(&cmd->istate_lock);
+
+ return ret;
+}
+
+static int lio_queue_status(struct se_cmd *se_cmd)
+{
+ struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+ cmd->i_state = ISTATE_SEND_STATUS;
+ iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state);
+ return 0;
+}
+
+static u16 lio_set_fabric_sense_len(struct se_cmd *se_cmd, u32 sense_length)
+{
+ unsigned char *buffer = se_cmd->sense_buffer;
+ /*
+ * From RFC-3720 10.4.7. Data Segment - Sense and Response Data Segment
+ * 16-bit SenseLength.
+ */
+ buffer[0] = ((sense_length >> 8) & 0xff);
+ buffer[1] = (sense_length & 0xff);
+ /*
+ * Return two byte offset into allocated sense_buffer.
+ */
+ return 2;
+}
+
+static u16 lio_get_fabric_sense_len(void)
+{
+ /*
+ * Return two byte offset into allocated sense_buffer.
+ */
+ return 2;
+}
+
+static int lio_queue_tm_rsp(struct se_cmd *se_cmd)
+{
+ struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+ cmd->i_state = ISTATE_SEND_TASKMGTRSP;
+ iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state);
+ return 0;
+}
+
+static char *lio_tpg_get_endpoint_wwn(struct se_portal_group *se_tpg)
+{
+ struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+ return &tpg->tpg_tiqn->tiqn[0];
+}
+
+static u16 lio_tpg_get_tag(struct se_portal_group *se_tpg)
+{
+ struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+ return tpg->tpgt;
+}
+
+static u32 lio_tpg_get_default_depth(struct se_portal_group *se_tpg)
+{
+ struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+ return ISCSI_TPG_ATTRIB(tpg)->default_cmdsn_depth;
+}
+
+static int lio_tpg_check_demo_mode(struct se_portal_group *se_tpg)
+{
+ struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+ return ISCSI_TPG_ATTRIB(tpg)->generate_node_acls;
+}
+
+static int lio_tpg_check_demo_mode_cache(struct se_portal_group *se_tpg)
+{
+ struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+ return ISCSI_TPG_ATTRIB(tpg)->cache_dynamic_acls;
+}
+
+static int lio_tpg_check_demo_mode_write_protect(
+ struct se_portal_group *se_tpg)
+{
+ struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+ return ISCSI_TPG_ATTRIB(tpg)->demo_mode_write_protect;
+}
+
+static int lio_tpg_check_prod_mode_write_protect(
+ struct se_portal_group *se_tpg)
+{
+ struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+ return ISCSI_TPG_ATTRIB(tpg)->prod_mode_write_protect;
+}
+
+static void lio_tpg_release_fabric_acl(
+ struct se_portal_group *se_tpg,
+ struct se_node_acl *se_acl)
+{
+ struct iscsi_node_acl *acl = container_of(se_acl,
+ struct iscsi_node_acl, se_node_acl);
+ kfree(acl);
+}
+
+/*
+ * Called with spin_lock_bh(struct se_portal_group->session_lock) held..
+ *
+ * Also, this function calls iscsit_inc_session_usage_count() on the
+ * struct iscsi_session in question.
+ */
+static int lio_tpg_shutdown_session(struct se_session *se_sess)
+{
+ struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+
+ spin_lock(&sess->conn_lock);
+ if (atomic_read(&sess->session_fall_back_to_erl0) ||
+ atomic_read(&sess->session_logout) ||
+ (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)) {
+ spin_unlock(&sess->conn_lock);
+ return 0;
+ }
+ atomic_set(&sess->session_reinstatement, 1);
+ spin_unlock(&sess->conn_lock);
+
+ iscsit_inc_session_usage_count(sess);
+ iscsit_stop_time2retain_timer(sess);
+
+ return 1;
+}
+
+/*
+ * Calls iscsit_dec_session_usage_count() as inverse of
+ * lio_tpg_shutdown_session()
+ */
+static void lio_tpg_close_session(struct se_session *se_sess)
+{
+ struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+ /*
+ * If the iSCSI Session for the iSCSI Initiator Node exists,
+ * forcefully shutdown the iSCSI NEXUS.
+ */
+ iscsit_stop_session(sess, 1, 1);
+ iscsit_dec_session_usage_count(sess);
+ iscsit_close_session(sess);
+}
+
+static void lio_tpg_stop_session(
+ struct se_session *se_sess,
+ int sess_sleep,
+ int conn_sleep)
+{
+ struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+
+ iscsit_stop_session(sess, sess_sleep, conn_sleep);
+}
+
+static void lio_tpg_fall_back_to_erl0(struct se_session *se_sess)
+{
+ struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+
+ iscsit_fall_back_to_erl0(sess);
+}
+
+static u32 lio_tpg_get_inst_index(struct se_portal_group *se_tpg)
+{
+ struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+
+ return tpg->tpg_tiqn->tiqn_index;
+}
+
+static void lio_set_default_node_attributes(struct se_node_acl *se_acl)
+{
+ struct iscsi_node_acl *acl = container_of(se_acl, struct iscsi_node_acl,
+ se_node_acl);
+
+ ISCSI_NODE_ATTRIB(acl)->nacl = acl;
+ iscsit_set_default_node_attribues(acl);
+}
+
+static void lio_release_cmd(struct se_cmd *se_cmd)
+{
+ struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+ iscsit_release_cmd(cmd);
+}
+
+/* End functions for target_core_fabric_ops */
+
+int iscsi_target_register_configfs(void)
+{
+ struct target_fabric_configfs *fabric;
+ int ret;
+
+ lio_target_fabric_configfs = NULL;
+ fabric = target_fabric_configfs_init(THIS_MODULE, "iscsi");
+ if (IS_ERR(fabric)) {
+ pr_err("target_fabric_configfs_init() for"
+ " LIO-Target failed!\n");
+ return PTR_ERR(fabric);
+ }
+ /*
+ * Setup the fabric API of function pointers used by target_core_mod..
+ */
+ fabric->tf_ops.get_fabric_name = &iscsi_get_fabric_name;
+ fabric->tf_ops.get_fabric_proto_ident = &iscsi_get_fabric_proto_ident;
+ fabric->tf_ops.tpg_get_wwn = &lio_tpg_get_endpoint_wwn;
+ fabric->tf_ops.tpg_get_tag = &lio_tpg_get_tag;
+ fabric->tf_ops.tpg_get_default_depth = &lio_tpg_get_default_depth;
+ fabric->tf_ops.tpg_get_pr_transport_id = &iscsi_get_pr_transport_id;
+ fabric->tf_ops.tpg_get_pr_transport_id_len =
+ &iscsi_get_pr_transport_id_len;
+ fabric->tf_ops.tpg_parse_pr_out_transport_id =
+ &iscsi_parse_pr_out_transport_id;
+ fabric->tf_ops.tpg_check_demo_mode = &lio_tpg_check_demo_mode;
+ fabric->tf_ops.tpg_check_demo_mode_cache =
+ &lio_tpg_check_demo_mode_cache;
+ fabric->tf_ops.tpg_check_demo_mode_write_protect =
+ &lio_tpg_check_demo_mode_write_protect;
+ fabric->tf_ops.tpg_check_prod_mode_write_protect =
+ &lio_tpg_check_prod_mode_write_protect;
+ fabric->tf_ops.tpg_alloc_fabric_acl = &lio_tpg_alloc_fabric_acl;
+ fabric->tf_ops.tpg_release_fabric_acl = &lio_tpg_release_fabric_acl;
+ fabric->tf_ops.tpg_get_inst_index = &lio_tpg_get_inst_index;
+ fabric->tf_ops.release_cmd = &lio_release_cmd;
+ fabric->tf_ops.shutdown_session = &lio_tpg_shutdown_session;
+ fabric->tf_ops.close_session = &lio_tpg_close_session;
+ fabric->tf_ops.stop_session = &lio_tpg_stop_session;
+ fabric->tf_ops.fall_back_to_erl0 = &lio_tpg_fall_back_to_erl0;
+ fabric->tf_ops.sess_logged_in = &lio_sess_logged_in;
+ fabric->tf_ops.sess_get_index = &lio_sess_get_index;
+ fabric->tf_ops.sess_get_initiator_sid = &lio_sess_get_initiator_sid;
+ fabric->tf_ops.write_pending = &lio_write_pending;
+ fabric->tf_ops.write_pending_status = &lio_write_pending_status;
+ fabric->tf_ops.set_default_node_attributes =
+ &lio_set_default_node_attributes;
+ fabric->tf_ops.get_task_tag = &iscsi_get_task_tag;
+ fabric->tf_ops.get_cmd_state = &iscsi_get_cmd_state;
+ fabric->tf_ops.queue_data_in = &lio_queue_data_in;
+ fabric->tf_ops.queue_status = &lio_queue_status;
+ fabric->tf_ops.queue_tm_rsp = &lio_queue_tm_rsp;
+ fabric->tf_ops.set_fabric_sense_len = &lio_set_fabric_sense_len;
+ fabric->tf_ops.get_fabric_sense_len = &lio_get_fabric_sense_len;
+ fabric->tf_ops.is_state_remove = &iscsi_is_state_remove;
+ /*
+ * Setup function pointers for generic logic in target_core_fabric_configfs.c
+ */
+ fabric->tf_ops.fabric_make_wwn = &lio_target_call_coreaddtiqn;
+ fabric->tf_ops.fabric_drop_wwn = &lio_target_call_coredeltiqn;
+ fabric->tf_ops.fabric_make_tpg = &lio_target_tiqn_addtpg;
+ fabric->tf_ops.fabric_drop_tpg = &lio_target_tiqn_deltpg;
+ fabric->tf_ops.fabric_post_link = NULL;
+ fabric->tf_ops.fabric_pre_unlink = NULL;
+ fabric->tf_ops.fabric_make_np = &lio_target_call_addnptotpg;
+ fabric->tf_ops.fabric_drop_np = &lio_target_call_delnpfromtpg;
+ fabric->tf_ops.fabric_make_nodeacl = &lio_target_make_nodeacl;
+ fabric->tf_ops.fabric_drop_nodeacl = &lio_target_drop_nodeacl;
+ /*
+ * Setup default attribute lists for various fabric->tf_cit_tmpl
+ * sturct config_item_type's
+ */
+ TF_CIT_TMPL(fabric)->tfc_discovery_cit.ct_attrs = lio_target_discovery_auth_attrs;
+ TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = lio_target_wwn_attrs;
+ TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = lio_target_tpg_attrs;
+ TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = lio_target_tpg_attrib_attrs;
+ TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = lio_target_tpg_param_attrs;
+ TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = lio_target_portal_attrs;
+ TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = lio_target_initiator_attrs;
+ TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = lio_target_nacl_attrib_attrs;
+ TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = lio_target_nacl_auth_attrs;
+ TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = lio_target_nacl_param_attrs;
+
+ ret = target_fabric_configfs_register(fabric);
+ if (ret < 0) {
+ pr_err("target_fabric_configfs_register() for"
+ " LIO-Target failed!\n");
+ target_fabric_configfs_free(fabric);
+ return ret;
+ }
+
+ lio_target_fabric_configfs = fabric;
+ pr_debug("LIO_TARGET[0] - Set fabric ->"
+ " lio_target_fabric_configfs\n");
+ return 0;
+}
+
+
+void iscsi_target_deregister_configfs(void)
+{
+ if (!lio_target_fabric_configfs)
+ return;
+ /*
+ * Shutdown discovery sessions and disable discovery TPG
+ */
+ if (iscsit_global->discovery_tpg)
+ iscsit_tpg_disable_portal_group(iscsit_global->discovery_tpg, 1);
+
+ target_fabric_configfs_deregister(lio_target_fabric_configfs);
+ lio_target_fabric_configfs = NULL;
+ pr_debug("LIO_TARGET[0] - Cleared"
+ " lio_target_fabric_configfs\n");
+}
diff --git a/drivers/target/iscsi/iscsi_target_configfs.h b/drivers/target/iscsi/iscsi_target_configfs.h
new file mode 100644
index 0000000..8cd5a63
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_configfs.h
@@ -0,0 +1,7 @@
+#ifndef ISCSI_TARGET_CONFIGFS_H
+#define ISCSI_TARGET_CONFIGFS_H
+
+extern int iscsi_target_register_configfs(void);
+extern void iscsi_target_deregister_configfs(void);
+
+#endif /* ISCSI_TARGET_CONFIGFS_H */
diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h
new file mode 100644
index 0000000..470ed55
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_core.h
@@ -0,0 +1,859 @@
+#ifndef ISCSI_TARGET_CORE_H
+#define ISCSI_TARGET_CORE_H
+
+#include <linux/in.h>
+#include <linux/configfs.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+
+#define ISCSIT_VERSION "v4.1.0-rc1"
+#define ISCSI_MAX_DATASN_MISSING_COUNT 16
+#define ISCSI_TX_THREAD_TCP_TIMEOUT 2
+#define ISCSI_RX_THREAD_TCP_TIMEOUT 2
+#define SECONDS_FOR_ASYNC_LOGOUT 10
+#define SECONDS_FOR_ASYNC_TEXT 10
+#define SECONDS_FOR_LOGOUT_COMP 15
+#define WHITE_SPACE " \t\v\f\n\r"
+
+/* struct iscsi_node_attrib sanity values */
+#define NA_DATAOUT_TIMEOUT 3
+#define NA_DATAOUT_TIMEOUT_MAX 60
+#define NA_DATAOUT_TIMEOUT_MIX 2
+#define NA_DATAOUT_TIMEOUT_RETRIES 5
+#define NA_DATAOUT_TIMEOUT_RETRIES_MAX 15
+#define NA_DATAOUT_TIMEOUT_RETRIES_MIN 1
+#define NA_NOPIN_TIMEOUT 5
+#define NA_NOPIN_TIMEOUT_MAX 60
+#define NA_NOPIN_TIMEOUT_MIN 3
+#define NA_NOPIN_RESPONSE_TIMEOUT 5
+#define NA_NOPIN_RESPONSE_TIMEOUT_MAX 60
+#define NA_NOPIN_RESPONSE_TIMEOUT_MIN 3
+#define NA_RANDOM_DATAIN_PDU_OFFSETS 0
+#define NA_RANDOM_DATAIN_SEQ_OFFSETS 0
+#define NA_RANDOM_R2T_OFFSETS 0
+#define NA_DEFAULT_ERL 0
+#define NA_DEFAULT_ERL_MAX 2
+#define NA_DEFAULT_ERL_MIN 0
+
+/* struct iscsi_tpg_attrib sanity values */
+#define TA_AUTHENTICATION 1
+#define TA_LOGIN_TIMEOUT 15
+#define TA_LOGIN_TIMEOUT_MAX 30
+#define TA_LOGIN_TIMEOUT_MIN 5
+#define TA_NETIF_TIMEOUT 2
+#define TA_NETIF_TIMEOUT_MAX 15
+#define TA_NETIF_TIMEOUT_MIN 2
+#define TA_GENERATE_NODE_ACLS 0
+#define TA_DEFAULT_CMDSN_DEPTH 16
+#define TA_DEFAULT_CMDSN_DEPTH_MAX 512
+#define TA_DEFAULT_CMDSN_DEPTH_MIN 1
+#define TA_CACHE_DYNAMIC_ACLS 0
+/* Enabled by default in demo mode (generic_node_acls=1) */
+#define TA_DEMO_MODE_WRITE_PROTECT 1
+/* Disabled by default in production mode w/ explict ACLs */
+#define TA_PROD_MODE_WRITE_PROTECT 0
+#define TA_CACHE_CORE_NPS 0
+
+enum tpg_np_network_transport_table {
+ ISCSI_TCP = 0,
+ ISCSI_SCTP_TCP = 1,
+ ISCSI_SCTP_UDP = 2,
+ ISCSI_IWARP_TCP = 3,
+ ISCSI_IWARP_SCTP = 4,
+ ISCSI_INFINIBAND = 5,
+};
+
+/* RFC-3720 7.1.4 Standard Connection State Diagram for a Target */
+enum target_conn_state_table {
+ TARG_CONN_STATE_FREE = 0x1,
+ TARG_CONN_STATE_XPT_UP = 0x3,
+ TARG_CONN_STATE_IN_LOGIN = 0x4,
+ TARG_CONN_STATE_LOGGED_IN = 0x5,
+ TARG_CONN_STATE_IN_LOGOUT = 0x6,
+ TARG_CONN_STATE_LOGOUT_REQUESTED = 0x7,
+ TARG_CONN_STATE_CLEANUP_WAIT = 0x8,
+};
+
+/* RFC-3720 7.3.2 Session State Diagram for a Target */
+enum target_sess_state_table {
+ TARG_SESS_STATE_FREE = 0x1,
+ TARG_SESS_STATE_ACTIVE = 0x2,
+ TARG_SESS_STATE_LOGGED_IN = 0x3,
+ TARG_SESS_STATE_FAILED = 0x4,
+ TARG_SESS_STATE_IN_CONTINUE = 0x5,
+};
+
+/* struct iscsi_data_count->type */
+enum data_count_type {
+ ISCSI_RX_DATA = 1,
+ ISCSI_TX_DATA = 2,
+};
+
+/* struct iscsi_datain_req->dr_complete */
+enum datain_req_comp_table {
+ DATAIN_COMPLETE_NORMAL = 1,
+ DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY = 2,
+ DATAIN_COMPLETE_CONNECTION_RECOVERY = 3,
+};
+
+/* struct iscsi_datain_req->recovery */
+enum datain_req_rec_table {
+ DATAIN_WITHIN_COMMAND_RECOVERY = 1,
+ DATAIN_CONNECTION_RECOVERY = 2,
+};
+
+/* struct iscsi_portal_group->state */
+enum tpg_state_table {
+ TPG_STATE_FREE = 0,
+ TPG_STATE_ACTIVE = 1,
+ TPG_STATE_INACTIVE = 2,
+ TPG_STATE_COLD_RESET = 3,
+};
+
+/* struct iscsi_tiqn->tiqn_state */
+enum tiqn_state_table {
+ TIQN_STATE_ACTIVE = 1,
+ TIQN_STATE_SHUTDOWN = 2,
+};
+
+/* struct iscsi_cmd->cmd_flags */
+enum cmd_flags_table {
+ ICF_GOT_LAST_DATAOUT = 0x00000001,
+ ICF_GOT_DATACK_SNACK = 0x00000002,
+ ICF_NON_IMMEDIATE_UNSOLICITED_DATA = 0x00000004,
+ ICF_SENT_LAST_R2T = 0x00000008,
+ ICF_WITHIN_COMMAND_RECOVERY = 0x00000010,
+ ICF_CONTIG_MEMORY = 0x00000020,
+ ICF_ATTACHED_TO_RQUEUE = 0x00000040,
+ ICF_OOO_CMDSN = 0x00000080,
+ ICF_REJECT_FAIL_CONN = 0x00000100,
+};
+
+/* struct iscsi_cmd->i_state */
+enum cmd_i_state_table {
+ ISTATE_NO_STATE = 0,
+ ISTATE_NEW_CMD = 1,
+ ISTATE_DEFERRED_CMD = 2,
+ ISTATE_UNSOLICITED_DATA = 3,
+ ISTATE_RECEIVE_DATAOUT = 4,
+ ISTATE_RECEIVE_DATAOUT_RECOVERY = 5,
+ ISTATE_RECEIVED_LAST_DATAOUT = 6,
+ ISTATE_WITHIN_DATAOUT_RECOVERY = 7,
+ ISTATE_IN_CONNECTION_RECOVERY = 8,
+ ISTATE_RECEIVED_TASKMGT = 9,
+ ISTATE_SEND_ASYNCMSG = 10,
+ ISTATE_SENT_ASYNCMSG = 11,
+ ISTATE_SEND_DATAIN = 12,
+ ISTATE_SEND_LAST_DATAIN = 13,
+ ISTATE_SENT_LAST_DATAIN = 14,
+ ISTATE_SEND_LOGOUTRSP = 15,
+ ISTATE_SENT_LOGOUTRSP = 16,
+ ISTATE_SEND_NOPIN = 17,
+ ISTATE_SENT_NOPIN = 18,
+ ISTATE_SEND_REJECT = 19,
+ ISTATE_SENT_REJECT = 20,
+ ISTATE_SEND_R2T = 21,
+ ISTATE_SENT_R2T = 22,
+ ISTATE_SEND_R2T_RECOVERY = 23,
+ ISTATE_SENT_R2T_RECOVERY = 24,
+ ISTATE_SEND_LAST_R2T = 25,
+ ISTATE_SENT_LAST_R2T = 26,
+ ISTATE_SEND_LAST_R2T_RECOVERY = 27,
+ ISTATE_SENT_LAST_R2T_RECOVERY = 28,
+ ISTATE_SEND_STATUS = 29,
+ ISTATE_SEND_STATUS_BROKEN_PC = 30,
+ ISTATE_SENT_STATUS = 31,
+ ISTATE_SEND_STATUS_RECOVERY = 32,
+ ISTATE_SENT_STATUS_RECOVERY = 33,
+ ISTATE_SEND_TASKMGTRSP = 34,
+ ISTATE_SENT_TASKMGTRSP = 35,
+ ISTATE_SEND_TEXTRSP = 36,
+ ISTATE_SENT_TEXTRSP = 37,
+ ISTATE_SEND_NOPIN_WANT_RESPONSE = 38,
+ ISTATE_SENT_NOPIN_WANT_RESPONSE = 39,
+ ISTATE_SEND_NOPIN_NO_RESPONSE = 40,
+ ISTATE_REMOVE = 41,
+ ISTATE_FREE = 42,
+};
+
+/* Used for iscsi_recover_cmdsn() return values */
+enum recover_cmdsn_ret_table {
+ CMDSN_ERROR_CANNOT_RECOVER = -1,
+ CMDSN_NORMAL_OPERATION = 0,
+ CMDSN_LOWER_THAN_EXP = 1,
+ CMDSN_HIGHER_THAN_EXP = 2,
+};
+
+/* Used for iscsi_handle_immediate_data() return values */
+enum immedate_data_ret_table {
+ IMMEDIATE_DATA_CANNOT_RECOVER = -1,
+ IMMEDIATE_DATA_NORMAL_OPERATION = 0,
+ IMMEDIATE_DATA_ERL1_CRC_FAILURE = 1,
+};
+
+/* Used for iscsi_decide_dataout_action() return values */
+enum dataout_action_ret_table {
+ DATAOUT_CANNOT_RECOVER = -1,
+ DATAOUT_NORMAL = 0,
+ DATAOUT_SEND_R2T = 1,
+ DATAOUT_SEND_TO_TRANSPORT = 2,
+ DATAOUT_WITHIN_COMMAND_RECOVERY = 3,
+};
+
+/* Used for struct iscsi_node_auth->naf_flags */
+enum naf_flags_table {
+ NAF_USERID_SET = 0x01,
+ NAF_PASSWORD_SET = 0x02,
+ NAF_USERID_IN_SET = 0x04,
+ NAF_PASSWORD_IN_SET = 0x08,
+};
+
+/* Used by various struct timer_list to manage iSCSI specific state */
+enum iscsi_timer_flags_table {
+ ISCSI_TF_RUNNING = 0x01,
+ ISCSI_TF_STOP = 0x02,
+ ISCSI_TF_EXPIRED = 0x04,
+};
+
+/* Used for struct iscsi_np->np_flags */
+enum np_flags_table {
+ NPF_IP_NETWORK = 0x00,
+ NPF_SCTP_STRUCT_FILE = 0x01 /* Bugfix */
+};
+
+/* Used for struct iscsi_np->np_thread_state */
+enum np_thread_state_table {
+ ISCSI_NP_THREAD_ACTIVE = 1,
+ ISCSI_NP_THREAD_INACTIVE = 2,
+ ISCSI_NP_THREAD_RESET = 3,
+ ISCSI_NP_THREAD_SHUTDOWN = 4,
+ ISCSI_NP_THREAD_EXIT = 5,
+};
+
+struct iscsi_conn_ops {
+ u8 HeaderDigest; /* [0,1] == [None,CRC32C] */
+ u8 DataDigest; /* [0,1] == [None,CRC32C] */
+ u32 MaxRecvDataSegmentLength; /* [512..2**24-1] */
+ u8 OFMarker; /* [0,1] == [No,Yes] */
+ u8 IFMarker; /* [0,1] == [No,Yes] */
+ u32 OFMarkInt; /* [1..65535] */
+ u32 IFMarkInt; /* [1..65535] */
+};
+
+struct iscsi_sess_ops {
+ char InitiatorName[224];
+ char InitiatorAlias[256];
+ char TargetName[224];
+ char TargetAlias[256];
+ char TargetAddress[256];
+ u16 TargetPortalGroupTag; /* [0..65535] */
+ u16 MaxConnections; /* [1..65535] */
+ u8 InitialR2T; /* [0,1] == [No,Yes] */
+ u8 ImmediateData; /* [0,1] == [No,Yes] */
+ u32 MaxBurstLength; /* [512..2**24-1] */
+ u32 FirstBurstLength; /* [512..2**24-1] */
+ u16 DefaultTime2Wait; /* [0..3600] */
+ u16 DefaultTime2Retain; /* [0..3600] */
+ u16 MaxOutstandingR2T; /* [1..65535] */
+ u8 DataPDUInOrder; /* [0,1] == [No,Yes] */
+ u8 DataSequenceInOrder; /* [0,1] == [No,Yes] */
+ u8 ErrorRecoveryLevel; /* [0..2] */
+ u8 SessionType; /* [0,1] == [Normal,Discovery]*/
+};
+
+struct iscsi_queue_req {
+ int state;
+ struct iscsi_cmd *cmd;
+ struct list_head qr_list;
+};
+
+struct iscsi_data_count {
+ int data_length;
+ int sync_and_steering;
+ enum data_count_type type;
+ u32 iov_count;
+ u32 ss_iov_count;
+ u32 ss_marker_count;
+ struct kvec *iov;
+};
+
+struct iscsi_param_list {
+ struct list_head param_list;
+ struct list_head extra_response_list;
+};
+
+struct iscsi_datain_req {
+ enum datain_req_comp_table dr_complete;
+ int generate_recovery_values;
+ enum datain_req_rec_table recovery;
+ u32 begrun;
+ u32 runlength;
+ u32 data_length;
+ u32 data_offset;
+ u32 data_offset_end;
+ u32 data_sn;
+ u32 next_burst_len;
+ u32 read_data_done;
+ u32 seq_send_order;
+ struct list_head dr_list;
+} ____cacheline_aligned;
+
+struct iscsi_ooo_cmdsn {
+ u16 cid;
+ u32 batch_count;
+ u32 cmdsn;
+ u32 exp_cmdsn;
+ struct iscsi_cmd *cmd;
+ struct list_head ooo_list;
+} ____cacheline_aligned;
+
+struct iscsi_datain {
+ u8 flags;
+ u32 data_sn;
+ u32 length;
+ u32 offset;
+} ____cacheline_aligned;
+
+struct iscsi_r2t {
+ int seq_complete;
+ int recovery_r2t;
+ int sent_r2t;
+ u32 r2t_sn;
+ u32 offset;
+ u32 targ_xfer_tag;
+ u32 xfer_len;
+ struct list_head r2t_list;
+} ____cacheline_aligned;
+
+struct iscsi_cmd {
+ enum iscsi_timer_flags_table dataout_timer_flags;
+ /* DataOUT timeout retries */
+ u8 dataout_timeout_retries;
+ /* Within command recovery count */
+ u8 error_recovery_count;
+ /* iSCSI dependent state for out or order CmdSNs */
+ enum cmd_i_state_table deferred_i_state;
+ /* iSCSI dependent state */
+ enum cmd_i_state_table i_state;
+ /* Command is an immediate command (ISCSI_OP_IMMEDIATE set) */
+ u8 immediate_cmd;
+ /* Immediate data present */
+ u8 immediate_data;
+ /* iSCSI Opcode */
+ u8 iscsi_opcode;
+ /* iSCSI Response Code */
+ u8 iscsi_response;
+ /* Logout reason when iscsi_opcode == ISCSI_INIT_LOGOUT_CMND */
+ u8 logout_reason;
+ /* Logout response code when iscsi_opcode == ISCSI_INIT_LOGOUT_CMND */
+ u8 logout_response;
+ /* MaxCmdSN has been incremented */
+ u8 maxcmdsn_inc;
+ /* Immediate Unsolicited Dataout */
+ u8 unsolicited_data;
+ /* CID contained in logout PDU when opcode == ISCSI_INIT_LOGOUT_CMND */
+ u16 logout_cid;
+ /* Command flags */
+ enum cmd_flags_table cmd_flags;
+ /* Initiator Task Tag assigned from Initiator */
+ u32 init_task_tag;
+ /* Target Transfer Tag assigned from Target */
+ u32 targ_xfer_tag;
+ /* CmdSN assigned from Initiator */
+ u32 cmd_sn;
+ /* ExpStatSN assigned from Initiator */
+ u32 exp_stat_sn;
+ /* StatSN assigned to this ITT */
+ u32 stat_sn;
+ /* DataSN Counter */
+ u32 data_sn;
+ /* R2TSN Counter */
+ u32 r2t_sn;
+ /* Last DataSN acknowledged via DataAck SNACK */
+ u32 acked_data_sn;
+ /* Used for echoing NOPOUT ping data */
+ u32 buf_ptr_size;
+ /* Used to store DataDigest */
+ u32 data_crc;
+ /* Total size in bytes associated with command */
+ u32 data_length;
+ /* Counter for MaxOutstandingR2T */
+ u32 outstanding_r2ts;
+ /* Next R2T Offset when DataSequenceInOrder=Yes */
+ u32 r2t_offset;
+ /* Iovec current and orig count for iscsi_cmd->iov_data */
+ u32 iov_data_count;
+ u32 orig_iov_data_count;
+ /* Number of miscellaneous iovecs used for IP stack calls */
+ u32 iov_misc_count;
+ /* Number of struct iscsi_pdu in struct iscsi_cmd->pdu_list */
+ u32 pdu_count;
+ /* Next struct iscsi_pdu to send in struct iscsi_cmd->pdu_list */
+ u32 pdu_send_order;
+ /* Current struct iscsi_pdu in struct iscsi_cmd->pdu_list */
+ u32 pdu_start;
+ u32 residual_count;
+ /* Next struct iscsi_seq to send in struct iscsi_cmd->seq_list */
+ u32 seq_send_order;
+ /* Number of struct iscsi_seq in struct iscsi_cmd->seq_list */
+ u32 seq_count;
+ /* Current struct iscsi_seq in struct iscsi_cmd->seq_list */
+ u32 seq_no;
+ /* Lowest offset in current DataOUT sequence */
+ u32 seq_start_offset;
+ /* Highest offset in current DataOUT sequence */
+ u32 seq_end_offset;
+ /* Total size in bytes received so far of READ data */
+ u32 read_data_done;
+ /* Total size in bytes received so far of WRITE data */
+ u32 write_data_done;
+ /* Counter for FirstBurstLength key */
+ u32 first_burst_len;
+ /* Counter for MaxBurstLength key */
+ u32 next_burst_len;
+ /* Transfer size used for IP stack calls */
+ u32 tx_size;
+ /* Buffer used for various purposes */
+ void *buf_ptr;
+ /* See include/linux/dma-mapping.h */
+ enum dma_data_direction data_direction;
+ /* iSCSI PDU Header + CRC */
+ unsigned char pdu[ISCSI_HDR_LEN + ISCSI_CRC_LEN];
+ /* Number of times struct iscsi_cmd is present in immediate queue */
+ atomic_t immed_queue_count;
+ atomic_t response_queue_count;
+ atomic_t transport_sent;
+ spinlock_t datain_lock;
+ spinlock_t dataout_timeout_lock;
+ /* spinlock for protecting struct iscsi_cmd->i_state */
+ spinlock_t istate_lock;
+ /* spinlock for adding within command recovery entries */
+ spinlock_t error_lock;
+ /* spinlock for adding R2Ts */
+ spinlock_t r2t_lock;
+ /* DataIN List */
+ struct list_head datain_list;
+ /* R2T List */
+ struct list_head cmd_r2t_list;
+ struct completion reject_comp;
+ /* Timer for DataOUT */
+ struct timer_list dataout_timer;
+ /* Iovecs for SCSI data payload RX/TX w/ kernel level sockets */
+ struct kvec *iov_data;
+ /* Iovecs for miscellaneous purposes */
+#define ISCSI_MISC_IOVECS 5
+ struct kvec iov_misc[ISCSI_MISC_IOVECS];
+ /* Array of struct iscsi_pdu used for DataPDUInOrder=No */
+ struct iscsi_pdu *pdu_list;
+ /* Current struct iscsi_pdu used for DataPDUInOrder=No */
+ struct iscsi_pdu *pdu_ptr;
+ /* Array of struct iscsi_seq used for DataSequenceInOrder=No */
+ struct iscsi_seq *seq_list;
+ /* Current struct iscsi_seq used for DataSequenceInOrder=No */
+ struct iscsi_seq *seq_ptr;
+ /* TMR Request when iscsi_opcode == ISCSI_OP_SCSI_TMFUNC */
+ struct iscsi_tmr_req *tmr_req;
+ /* Connection this command is alligient to */
+ struct iscsi_conn *conn;
+ /* Pointer to connection recovery entry */
+ struct iscsi_conn_recovery *cr;
+ /* Session the command is part of, used for connection recovery */
+ struct iscsi_session *sess;
+ /* list_head for connection list */
+ struct list_head i_list;
+ /* The TCM I/O descriptor that is accessed via container_of() */
+ struct se_cmd se_cmd;
+ /* Sense buffer that will be mapped into outgoing status */
+#define ISCSI_SENSE_BUFFER_LEN (TRANSPORT_SENSE_BUFFER + 2)
+ unsigned char sense_buffer[ISCSI_SENSE_BUFFER_LEN];
+
+ struct scatterlist *t_mem_sg;
+ u32 t_mem_sg_nents;
+
+ u32 padding;
+ u8 pad_bytes[4];
+
+ struct scatterlist *first_data_sg;
+ u32 first_data_sg_off;
+ u32 kmapped_nents;
+
+} ____cacheline_aligned;
+
+struct iscsi_tmr_req {
+ bool task_reassign:1;
+ u32 ref_cmd_sn;
+ u32 exp_data_sn;
+ struct iscsi_conn_recovery *conn_recovery;
+ struct se_tmr_req *se_tmr_req;
+};
+
+struct iscsi_conn {
+ /* Authentication Successful for this connection */
+ u8 auth_complete;
+ /* State connection is currently in */
+ u8 conn_state;
+ u8 conn_logout_reason;
+ u8 network_transport;
+ enum iscsi_timer_flags_table nopin_timer_flags;
+ enum iscsi_timer_flags_table nopin_response_timer_flags;
+ u8 tx_immediate_queue;
+ u8 tx_response_queue;
+ /* Used to know what thread encountered a transport failure */
+ u8 which_thread;
+ /* connection id assigned by the Initiator */
+ u16 cid;
+ /* Remote TCP Port */
+ u16 login_port;
+ int net_size;
+ u32 auth_id;
+#define CONNFLAG_SCTP_STRUCT_FILE 0x01
+ u32 conn_flags;
+ /* Used for iscsi_tx_login_rsp() */
+ u32 login_itt;
+ u32 exp_statsn;
+ /* Per connection status sequence number */
+ u32 stat_sn;
+ /* IFMarkInt's Current Value */
+ u32 if_marker;
+ /* OFMarkInt's Current Value */
+ u32 of_marker;
+ /* Used for calculating OFMarker offset to next PDU */
+ u32 of_marker_offset;
+ /* Complete Bad PDU for sending reject */
+ unsigned char bad_hdr[ISCSI_HDR_LEN];
+#define IPV6_ADDRESS_SPACE 48
+ unsigned char login_ip[IPV6_ADDRESS_SPACE];
+ int conn_usage_count;
+ int conn_waiting_on_uc;
+ atomic_t check_immediate_queue;
+ atomic_t conn_logout_remove;
+ atomic_t connection_exit;
+ atomic_t connection_recovery;
+ atomic_t connection_reinstatement;
+ atomic_t connection_wait;
+ atomic_t connection_wait_rcfr;
+ atomic_t sleep_on_conn_wait_comp;
+ atomic_t transport_failed;
+ struct completion conn_post_wait_comp;
+ struct completion conn_wait_comp;
+ struct completion conn_wait_rcfr_comp;
+ struct completion conn_waiting_on_uc_comp;
+ struct completion conn_logout_comp;
+ struct completion tx_half_close_comp;
+ struct completion rx_half_close_comp;
+ /* socket used by this connection */
+ struct socket *sock;
+ struct timer_list nopin_timer;
+ struct timer_list nopin_response_timer;
+ struct timer_list transport_timer;
+ /* Spinlock used for add/deleting cmd's from conn_cmd_list */
+ spinlock_t cmd_lock;
+ spinlock_t conn_usage_lock;
+ spinlock_t immed_queue_lock;
+ spinlock_t nopin_timer_lock;
+ spinlock_t response_queue_lock;
+ spinlock_t state_lock;
+ /* libcrypto RX and TX contexts for crc32c */
+ struct hash_desc conn_rx_hash;
+ struct hash_desc conn_tx_hash;
+ /* Used for scheduling TX and RX connection kthreads */
+ cpumask_var_t conn_cpumask;
+ int conn_rx_reset_cpumask:1;
+ int conn_tx_reset_cpumask:1;
+ /* list_head of struct iscsi_cmd for this connection */
+ struct list_head conn_cmd_list;
+ struct list_head immed_queue_list;
+ struct list_head response_queue_list;
+ struct iscsi_conn_ops *conn_ops;
+ struct iscsi_param_list *param_list;
+ /* Used for per connection auth state machine */
+ void *auth_protocol;
+ struct iscsi_login_thread_s *login_thread;
+ struct iscsi_portal_group *tpg;
+ /* Pointer to parent session */
+ struct iscsi_session *sess;
+ /* Pointer to thread_set in use for this conn's threads */
+ struct iscsi_thread_set *thread_set;
+ /* list_head for session connection list */
+ struct list_head conn_list;
+} ____cacheline_aligned;
+
+struct iscsi_conn_recovery {
+ u16 cid;
+ u32 cmd_count;
+ u32 maxrecvdatasegmentlength;
+ int ready_for_reallegiance;
+ struct list_head conn_recovery_cmd_list;
+ spinlock_t conn_recovery_cmd_lock;
+ struct timer_list time2retain_timer;
+ struct iscsi_session *sess;
+ struct list_head cr_list;
+} ____cacheline_aligned;
+
+struct iscsi_session {
+ u8 initiator_vendor;
+ u8 isid[6];
+ enum iscsi_timer_flags_table time2retain_timer_flags;
+ u8 version_active;
+ u16 cid_called;
+ u16 conn_recovery_count;
+ u16 tsih;
+ /* state session is currently in */
+ u32 session_state;
+ /* session wide counter: initiator assigned task tag */
+ u32 init_task_tag;
+ /* session wide counter: target assigned task tag */
+ u32 targ_xfer_tag;
+ u32 cmdsn_window;
+
+ /* protects cmdsn values */
+ struct mutex cmdsn_mutex;
+ /* session wide counter: expected command sequence number */
+ u32 exp_cmd_sn;
+ /* session wide counter: maximum allowed command sequence number */
+ u32 max_cmd_sn;
+ struct list_head sess_ooo_cmdsn_list;
+
+ /* LIO specific session ID */
+ u32 sid;
+ char auth_type[8];
+ /* unique within the target */
+ int session_index;
+ /* Used for session reference counting */
+ int session_usage_count;
+ int session_waiting_on_uc;
+ u32 cmd_pdus;
+ u32 rsp_pdus;
+ u64 tx_data_octets;
+ u64 rx_data_octets;
+ u32 conn_digest_errors;
+ u32 conn_timeout_errors;
+ u64 creation_time;
+ spinlock_t session_stats_lock;
+ /* Number of active connections */
+ atomic_t nconn;
+ atomic_t session_continuation;
+ atomic_t session_fall_back_to_erl0;
+ atomic_t session_logout;
+ atomic_t session_reinstatement;
+ atomic_t session_stop_active;
+ atomic_t sleep_on_sess_wait_comp;
+ atomic_t transport_wait_cmds;
+ /* connection list */
+ struct list_head sess_conn_list;
+ struct list_head cr_active_list;
+ struct list_head cr_inactive_list;
+ spinlock_t conn_lock;
+ spinlock_t cr_a_lock;
+ spinlock_t cr_i_lock;
+ spinlock_t session_usage_lock;
+ spinlock_t ttt_lock;
+ struct completion async_msg_comp;
+ struct completion reinstatement_comp;
+ struct completion session_wait_comp;
+ struct completion session_waiting_on_uc_comp;
+ struct timer_list time2retain_timer;
+ struct iscsi_sess_ops *sess_ops;
+ struct se_session *se_sess;
+ struct iscsi_portal_group *tpg;
+} ____cacheline_aligned;
+
+struct iscsi_login {
+ u8 auth_complete;
+ u8 checked_for_existing;
+ u8 current_stage;
+ u8 leading_connection;
+ u8 first_request;
+ u8 version_min;
+ u8 version_max;
+ char isid[6];
+ u32 cmd_sn;
+ u32 init_task_tag;
+ u32 initial_exp_statsn;
+ u32 rsp_length;
+ u16 cid;
+ u16 tsih;
+ char *req;
+ char *rsp;
+ char *req_buf;
+ char *rsp_buf;
+} ____cacheline_aligned;
+
+struct iscsi_node_attrib {
+ u32 dataout_timeout;
+ u32 dataout_timeout_retries;
+ u32 default_erl;
+ u32 nopin_timeout;
+ u32 nopin_response_timeout;
+ u32 random_datain_pdu_offsets;
+ u32 random_datain_seq_offsets;
+ u32 random_r2t_offsets;
+ u32 tmr_cold_reset;
+ u32 tmr_warm_reset;
+ struct iscsi_node_acl *nacl;
+};
+
+struct se_dev_entry_s;
+
+struct iscsi_node_auth {
+ enum naf_flags_table naf_flags;
+ int authenticate_target;
+ /* Used for iscsit_global->discovery_auth,
+ * set to zero (auth disabled) by default */
+ int enforce_discovery_auth;
+#define MAX_USER_LEN 256
+#define MAX_PASS_LEN 256
+ char userid[MAX_USER_LEN];
+ char password[MAX_PASS_LEN];
+ char userid_mutual[MAX_USER_LEN];
+ char password_mutual[MAX_PASS_LEN];
+};
+
+#include "iscsi_target_stat.h"
+
+struct iscsi_node_stat_grps {
+ struct config_group iscsi_sess_stats_group;
+ struct config_group iscsi_conn_stats_group;
+};
+
+struct iscsi_node_acl {
+ struct iscsi_node_attrib node_attrib;
+ struct iscsi_node_auth node_auth;
+ struct iscsi_node_stat_grps node_stat_grps;
+ struct se_node_acl se_node_acl;
+};
+
+#define NODE_STAT_GRPS(nacl) (&(nacl)->node_stat_grps)
+
+#define ISCSI_NODE_ATTRIB(t) (&(t)->node_attrib)
+#define ISCSI_NODE_AUTH(t) (&(t)->node_auth)
+
+struct iscsi_tpg_attrib {
+ u32 authentication;
+ u32 login_timeout;
+ u32 netif_timeout;
+ u32 generate_node_acls;
+ u32 cache_dynamic_acls;
+ u32 default_cmdsn_depth;
+ u32 demo_mode_write_protect;
+ u32 prod_mode_write_protect;
+ struct iscsi_portal_group *tpg;
+};
+
+struct iscsi_np {
+ int np_network_transport;
+ int np_ip_proto;
+ int np_sock_type;
+ enum np_thread_state_table np_thread_state;
+ enum iscsi_timer_flags_table np_login_timer_flags;
+ u32 np_exports;
+ enum np_flags_table np_flags;
+ unsigned char np_ip[IPV6_ADDRESS_SPACE];
+ u16 np_port;
+ spinlock_t np_thread_lock;
+ struct completion np_restart_comp;
+ struct socket *np_socket;
+ struct __kernel_sockaddr_storage np_sockaddr;
+ struct task_struct *np_thread;
+ struct timer_list np_login_timer;
+ struct iscsi_portal_group *np_login_tpg;
+ struct list_head np_list;
+} ____cacheline_aligned;
+
+struct iscsi_tpg_np {
+ struct iscsi_np *tpg_np;
+ struct iscsi_portal_group *tpg;
+ struct iscsi_tpg_np *tpg_np_parent;
+ struct list_head tpg_np_list;
+ struct list_head tpg_np_child_list;
+ struct list_head tpg_np_parent_list;
+ struct se_tpg_np se_tpg_np;
+ spinlock_t tpg_np_parent_lock;
+};
+
+struct iscsi_portal_group {
+ unsigned char tpg_chap_id;
+ /* TPG State */
+ enum tpg_state_table tpg_state;
+ /* Target Portal Group Tag */
+ u16 tpgt;
+ /* Id assigned to target sessions */
+ u16 ntsih;
+ /* Number of active sessions */
+ u32 nsessions;
+ /* Number of Network Portals available for this TPG */
+ u32 num_tpg_nps;
+ /* Per TPG LIO specific session ID. */
+ u32 sid;
+ /* Spinlock for adding/removing Network Portals */
+ spinlock_t tpg_np_lock;
+ spinlock_t tpg_state_lock;
+ struct se_portal_group tpg_se_tpg;
+ struct mutex tpg_access_lock;
+ struct mutex np_login_lock;
+ struct iscsi_tpg_attrib tpg_attrib;
+ /* Pointer to default list of iSCSI parameters for TPG */
+ struct iscsi_param_list *param_list;
+ struct iscsi_tiqn *tpg_tiqn;
+ struct list_head tpg_gnp_list;
+ struct list_head tpg_list;
+} ____cacheline_aligned;
+
+#define ISCSI_TPG_C(c) ((struct iscsi_portal_group *)(c)->tpg)
+#define ISCSI_TPG_LUN(c, l) ((iscsi_tpg_list_t *)(c)->tpg->tpg_lun_list_t[l])
+#define ISCSI_TPG_S(s) ((struct iscsi_portal_group *)(s)->tpg)
+#define ISCSI_TPG_ATTRIB(t) (&(t)->tpg_attrib)
+#define SE_TPG(tpg) (&(tpg)->tpg_se_tpg)
+
+struct iscsi_wwn_stat_grps {
+ struct config_group iscsi_stat_group;
+ struct config_group iscsi_instance_group;
+ struct config_group iscsi_sess_err_group;
+ struct config_group iscsi_tgt_attr_group;
+ struct config_group iscsi_login_stats_group;
+ struct config_group iscsi_logout_stats_group;
+};
+
+struct iscsi_tiqn {
+#define ISCSI_IQN_LEN 224
+ unsigned char tiqn[ISCSI_IQN_LEN];
+ enum tiqn_state_table tiqn_state;
+ int tiqn_access_count;
+ u32 tiqn_active_tpgs;
+ u32 tiqn_ntpgs;
+ u32 tiqn_num_tpg_nps;
+ u32 tiqn_nsessions;
+ struct list_head tiqn_list;
+ struct list_head tiqn_tpg_list;
+ spinlock_t tiqn_state_lock;
+ spinlock_t tiqn_tpg_lock;
+ struct se_wwn tiqn_wwn;
+ struct iscsi_wwn_stat_grps tiqn_stat_grps;
+ int tiqn_index;
+ struct iscsi_sess_err_stats sess_err_stats;
+ struct iscsi_login_stats login_stats;
+ struct iscsi_logout_stats logout_stats;
+} ____cacheline_aligned;
+
+#define WWN_STAT_GRPS(tiqn) (&(tiqn)->tiqn_stat_grps)
+
+struct iscsit_global {
+ /* In core shutdown */
+ u32 in_shutdown;
+ u32 active_ts;
+ /* Unique identifier used for the authentication daemon */
+ u32 auth_id;
+ u32 inactive_ts;
+ /* Thread Set bitmap count */
+ int ts_bitmap_count;
+ /* Thread Set bitmap pointer */
+ unsigned long *ts_bitmap;
+ /* Used for iSCSI discovery session authentication */
+ struct iscsi_node_acl discovery_acl;
+ struct iscsi_portal_group *discovery_tpg;
+};
+
+#endif /* ISCSI_TARGET_CORE_H */
diff --git a/drivers/target/iscsi/iscsi_target_datain_values.c b/drivers/target/iscsi/iscsi_target_datain_values.c
new file mode 100644
index 0000000..8c04951
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_datain_values.c
@@ -0,0 +1,531 @@
+/*******************************************************************************
+ * This file contains the iSCSI Target DataIN value generation functions.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <scsi/iscsi_proto.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_seq_pdu_list.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+#include "iscsi_target_datain_values.h"
+
+struct iscsi_datain_req *iscsit_allocate_datain_req(void)
+{
+ struct iscsi_datain_req *dr;
+
+ dr = kmem_cache_zalloc(lio_dr_cache, GFP_ATOMIC);
+ if (!dr) {
+ pr_err("Unable to allocate memory for"
+ " struct iscsi_datain_req\n");
+ return NULL;
+ }
+ INIT_LIST_HEAD(&dr->dr_list);
+
+ return dr;
+}
+
+void iscsit_attach_datain_req(struct iscsi_cmd *cmd, struct iscsi_datain_req *dr)
+{
+ spin_lock(&cmd->datain_lock);
+ list_add_tail(&dr->dr_list, &cmd->datain_list);
+ spin_unlock(&cmd->datain_lock);
+}
+
+void iscsit_free_datain_req(struct iscsi_cmd *cmd, struct iscsi_datain_req *dr)
+{
+ spin_lock(&cmd->datain_lock);
+ list_del(&dr->dr_list);
+ spin_unlock(&cmd->datain_lock);
+
+ kmem_cache_free(lio_dr_cache, dr);
+}
+
+void iscsit_free_all_datain_reqs(struct iscsi_cmd *cmd)
+{
+ struct iscsi_datain_req *dr, *dr_tmp;
+
+ spin_lock(&cmd->datain_lock);
+ list_for_each_entry_safe(dr, dr_tmp, &cmd->datain_list, dr_list) {
+ list_del(&dr->dr_list);
+ kmem_cache_free(lio_dr_cache, dr);
+ }
+ spin_unlock(&cmd->datain_lock);
+}
+
+struct iscsi_datain_req *iscsit_get_datain_req(struct iscsi_cmd *cmd)
+{
+ struct iscsi_datain_req *dr;
+
+ if (list_empty(&cmd->datain_list)) {
+ pr_err("cmd->datain_list is empty for ITT:"
+ " 0x%08x\n", cmd->init_task_tag);
+ return NULL;
+ }
+ list_for_each_entry(dr, &cmd->datain_list, dr_list)
+ break;
+
+ return dr;
+}
+
+/*
+ * For Normal and Recovery DataSequenceInOrder=Yes and DataPDUInOrder=Yes.
+ */
+static struct iscsi_datain_req *iscsit_set_datain_values_yes_and_yes(
+ struct iscsi_cmd *cmd,
+ struct iscsi_datain *datain)
+{
+ u32 next_burst_len, read_data_done, read_data_left;
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_datain_req *dr;
+
+ dr = iscsit_get_datain_req(cmd);
+ if (!dr)
+ return NULL;
+
+ if (dr->recovery && dr->generate_recovery_values) {
+ if (iscsit_create_recovery_datain_values_datasequenceinorder_yes(
+ cmd, dr) < 0)
+ return NULL;
+
+ dr->generate_recovery_values = 0;
+ }
+
+ next_burst_len = (!dr->recovery) ?
+ cmd->next_burst_len : dr->next_burst_len;
+ read_data_done = (!dr->recovery) ?
+ cmd->read_data_done : dr->read_data_done;
+
+ read_data_left = (cmd->data_length - read_data_done);
+ if (!read_data_left) {
+ pr_err("ITT: 0x%08x read_data_left is zero!\n",
+ cmd->init_task_tag);
+ return NULL;
+ }
+
+ if ((read_data_left <= conn->conn_ops->MaxRecvDataSegmentLength) &&
+ (read_data_left <= (conn->sess->sess_ops->MaxBurstLength -
+ next_burst_len))) {
+ datain->length = read_data_left;
+
+ datain->flags |= (ISCSI_FLAG_CMD_FINAL | ISCSI_FLAG_DATA_STATUS);
+ if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+ datain->flags |= ISCSI_FLAG_DATA_ACK;
+ } else {
+ if ((next_burst_len +
+ conn->conn_ops->MaxRecvDataSegmentLength) <
+ conn->sess->sess_ops->MaxBurstLength) {
+ datain->length =
+ conn->conn_ops->MaxRecvDataSegmentLength;
+ next_burst_len += datain->length;
+ } else {
+ datain->length = (conn->sess->sess_ops->MaxBurstLength -
+ next_burst_len);
+ next_burst_len = 0;
+
+ datain->flags |= ISCSI_FLAG_CMD_FINAL;
+ if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+ datain->flags |= ISCSI_FLAG_DATA_ACK;
+ }
+ }
+
+ datain->data_sn = (!dr->recovery) ? cmd->data_sn++ : dr->data_sn++;
+ datain->offset = read_data_done;
+
+ if (!dr->recovery) {
+ cmd->next_burst_len = next_burst_len;
+ cmd->read_data_done += datain->length;
+ } else {
+ dr->next_burst_len = next_burst_len;
+ dr->read_data_done += datain->length;
+ }
+
+ if (!dr->recovery) {
+ if (datain->flags & ISCSI_FLAG_DATA_STATUS)
+ dr->dr_complete = DATAIN_COMPLETE_NORMAL;
+
+ return dr;
+ }
+
+ if (!dr->runlength) {
+ if (datain->flags & ISCSI_FLAG_DATA_STATUS) {
+ dr->dr_complete =
+ (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+ DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+ DATAIN_COMPLETE_CONNECTION_RECOVERY;
+ }
+ } else {
+ if ((dr->begrun + dr->runlength) == dr->data_sn) {
+ dr->dr_complete =
+ (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+ DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+ DATAIN_COMPLETE_CONNECTION_RECOVERY;
+ }
+ }
+
+ return dr;
+}
+
+/*
+ * For Normal and Recovery DataSequenceInOrder=No and DataPDUInOrder=Yes.
+ */
+static struct iscsi_datain_req *iscsit_set_datain_values_no_and_yes(
+ struct iscsi_cmd *cmd,
+ struct iscsi_datain *datain)
+{
+ u32 offset, read_data_done, read_data_left, seq_send_order;
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_datain_req *dr;
+ struct iscsi_seq *seq;
+
+ dr = iscsit_get_datain_req(cmd);
+ if (!dr)
+ return NULL;
+
+ if (dr->recovery && dr->generate_recovery_values) {
+ if (iscsit_create_recovery_datain_values_datasequenceinorder_no(
+ cmd, dr) < 0)
+ return NULL;
+
+ dr->generate_recovery_values = 0;
+ }
+
+ read_data_done = (!dr->recovery) ?
+ cmd->read_data_done : dr->read_data_done;
+ seq_send_order = (!dr->recovery) ?
+ cmd->seq_send_order : dr->seq_send_order;
+
+ read_data_left = (cmd->data_length - read_data_done);
+ if (!read_data_left) {
+ pr_err("ITT: 0x%08x read_data_left is zero!\n",
+ cmd->init_task_tag);
+ return NULL;
+ }
+
+ seq = iscsit_get_seq_holder_for_datain(cmd, seq_send_order);
+ if (!seq)
+ return NULL;
+
+ seq->sent = 1;
+
+ if (!dr->recovery && !seq->next_burst_len)
+ seq->first_datasn = cmd->data_sn;
+
+ offset = (seq->offset + seq->next_burst_len);
+
+ if ((offset + conn->conn_ops->MaxRecvDataSegmentLength) >=
+ cmd->data_length) {
+ datain->length = (cmd->data_length - offset);
+ datain->offset = offset;
+
+ datain->flags |= ISCSI_FLAG_CMD_FINAL;
+ if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+ datain->flags |= ISCSI_FLAG_DATA_ACK;
+
+ seq->next_burst_len = 0;
+ seq_send_order++;
+ } else {
+ if ((seq->next_burst_len +
+ conn->conn_ops->MaxRecvDataSegmentLength) <
+ conn->sess->sess_ops->MaxBurstLength) {
+ datain->length =
+ conn->conn_ops->MaxRecvDataSegmentLength;
+ datain->offset = (seq->offset + seq->next_burst_len);
+
+ seq->next_burst_len += datain->length;
+ } else {
+ datain->length = (conn->sess->sess_ops->MaxBurstLength -
+ seq->next_burst_len);
+ datain->offset = (seq->offset + seq->next_burst_len);
+
+ datain->flags |= ISCSI_FLAG_CMD_FINAL;
+ if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+ datain->flags |= ISCSI_FLAG_DATA_ACK;
+
+ seq->next_burst_len = 0;
+ seq_send_order++;
+ }
+ }
+
+ if ((read_data_done + datain->length) == cmd->data_length)
+ datain->flags |= ISCSI_FLAG_DATA_STATUS;
+
+ datain->data_sn = (!dr->recovery) ? cmd->data_sn++ : dr->data_sn++;
+ if (!dr->recovery) {
+ cmd->seq_send_order = seq_send_order;
+ cmd->read_data_done += datain->length;
+ } else {
+ dr->seq_send_order = seq_send_order;
+ dr->read_data_done += datain->length;
+ }
+
+ if (!dr->recovery) {
+ if (datain->flags & ISCSI_FLAG_CMD_FINAL)
+ seq->last_datasn = datain->data_sn;
+ if (datain->flags & ISCSI_FLAG_DATA_STATUS)
+ dr->dr_complete = DATAIN_COMPLETE_NORMAL;
+
+ return dr;
+ }
+
+ if (!dr->runlength) {
+ if (datain->flags & ISCSI_FLAG_DATA_STATUS) {
+ dr->dr_complete =
+ (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+ DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+ DATAIN_COMPLETE_CONNECTION_RECOVERY;
+ }
+ } else {
+ if ((dr->begrun + dr->runlength) == dr->data_sn) {
+ dr->dr_complete =
+ (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+ DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+ DATAIN_COMPLETE_CONNECTION_RECOVERY;
+ }
+ }
+
+ return dr;
+}
+
+/*
+ * For Normal and Recovery DataSequenceInOrder=Yes and DataPDUInOrder=No.
+ */
+static struct iscsi_datain_req *iscsit_set_datain_values_yes_and_no(
+ struct iscsi_cmd *cmd,
+ struct iscsi_datain *datain)
+{
+ u32 next_burst_len, read_data_done, read_data_left;
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_datain_req *dr;
+ struct iscsi_pdu *pdu;
+
+ dr = iscsit_get_datain_req(cmd);
+ if (!dr)
+ return NULL;
+
+ if (dr->recovery && dr->generate_recovery_values) {
+ if (iscsit_create_recovery_datain_values_datasequenceinorder_yes(
+ cmd, dr) < 0)
+ return NULL;
+
+ dr->generate_recovery_values = 0;
+ }
+
+ next_burst_len = (!dr->recovery) ?
+ cmd->next_burst_len : dr->next_burst_len;
+ read_data_done = (!dr->recovery) ?
+ cmd->read_data_done : dr->read_data_done;
+
+ read_data_left = (cmd->data_length - read_data_done);
+ if (!read_data_left) {
+ pr_err("ITT: 0x%08x read_data_left is zero!\n",
+ cmd->init_task_tag);
+ return dr;
+ }
+
+ pdu = iscsit_get_pdu_holder_for_seq(cmd, NULL);
+ if (!pdu)
+ return dr;
+
+ if ((read_data_done + pdu->length) == cmd->data_length) {
+ pdu->flags |= (ISCSI_FLAG_CMD_FINAL | ISCSI_FLAG_DATA_STATUS);
+ if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+ pdu->flags |= ISCSI_FLAG_DATA_ACK;
+
+ next_burst_len = 0;
+ } else {
+ if ((next_burst_len + conn->conn_ops->MaxRecvDataSegmentLength) <
+ conn->sess->sess_ops->MaxBurstLength)
+ next_burst_len += pdu->length;
+ else {
+ pdu->flags |= ISCSI_FLAG_CMD_FINAL;
+ if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+ pdu->flags |= ISCSI_FLAG_DATA_ACK;
+
+ next_burst_len = 0;
+ }
+ }
+
+ pdu->data_sn = (!dr->recovery) ? cmd->data_sn++ : dr->data_sn++;
+ if (!dr->recovery) {
+ cmd->next_burst_len = next_burst_len;
+ cmd->read_data_done += pdu->length;
+ } else {
+ dr->next_burst_len = next_burst_len;
+ dr->read_data_done += pdu->length;
+ }
+
+ datain->flags = pdu->flags;
+ datain->length = pdu->length;
+ datain->offset = pdu->offset;
+ datain->data_sn = pdu->data_sn;
+
+ if (!dr->recovery) {
+ if (datain->flags & ISCSI_FLAG_DATA_STATUS)
+ dr->dr_complete = DATAIN_COMPLETE_NORMAL;
+
+ return dr;
+ }
+
+ if (!dr->runlength) {
+ if (datain->flags & ISCSI_FLAG_DATA_STATUS) {
+ dr->dr_complete =
+ (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+ DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+ DATAIN_COMPLETE_CONNECTION_RECOVERY;
+ }
+ } else {
+ if ((dr->begrun + dr->runlength) == dr->data_sn) {
+ dr->dr_complete =
+ (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+ DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+ DATAIN_COMPLETE_CONNECTION_RECOVERY;
+ }
+ }
+
+ return dr;
+}
+
+/*
+ * For Normal and Recovery DataSequenceInOrder=No and DataPDUInOrder=No.
+ */
+static struct iscsi_datain_req *iscsit_set_datain_values_no_and_no(
+ struct iscsi_cmd *cmd,
+ struct iscsi_datain *datain)
+{
+ u32 read_data_done, read_data_left, seq_send_order;
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_datain_req *dr;
+ struct iscsi_pdu *pdu;
+ struct iscsi_seq *seq = NULL;
+
+ dr = iscsit_get_datain_req(cmd);
+ if (!dr)
+ return NULL;
+
+ if (dr->recovery && dr->generate_recovery_values) {
+ if (iscsit_create_recovery_datain_values_datasequenceinorder_no(
+ cmd, dr) < 0)
+ return NULL;
+
+ dr->generate_recovery_values = 0;
+ }
+
+ read_data_done = (!dr->recovery) ?
+ cmd->read_data_done : dr->read_data_done;
+ seq_send_order = (!dr->recovery) ?
+ cmd->seq_send_order : dr->seq_send_order;
+
+ read_data_left = (cmd->data_length - read_data_done);
+ if (!read_data_left) {
+ pr_err("ITT: 0x%08x read_data_left is zero!\n",
+ cmd->init_task_tag);
+ return NULL;
+ }
+
+ seq = iscsit_get_seq_holder_for_datain(cmd, seq_send_order);
+ if (!seq)
+ return NULL;
+
+ seq->sent = 1;
+
+ if (!dr->recovery && !seq->next_burst_len)
+ seq->first_datasn = cmd->data_sn;
+
+ pdu = iscsit_get_pdu_holder_for_seq(cmd, seq);
+ if (!pdu)
+ return NULL;
+
+ if (seq->pdu_send_order == seq->pdu_count) {
+ pdu->flags |= ISCSI_FLAG_CMD_FINAL;
+ if (conn->sess->sess_ops->ErrorRecoveryLevel > 0)
+ pdu->flags |= ISCSI_FLAG_DATA_ACK;
+
+ seq->next_burst_len = 0;
+ seq_send_order++;
+ } else
+ seq->next_burst_len += pdu->length;
+
+ if ((read_data_done + pdu->length) == cmd->data_length)
+ pdu->flags |= ISCSI_FLAG_DATA_STATUS;
+
+ pdu->data_sn = (!dr->recovery) ? cmd->data_sn++ : dr->data_sn++;
+ if (!dr->recovery) {
+ cmd->seq_send_order = seq_send_order;
+ cmd->read_data_done += pdu->length;
+ } else {
+ dr->seq_send_order = seq_send_order;
+ dr->read_data_done += pdu->length;
+ }
+
+ datain->flags = pdu->flags;
+ datain->length = pdu->length;
+ datain->offset = pdu->offset;
+ datain->data_sn = pdu->data_sn;
+
+ if (!dr->recovery) {
+ if (datain->flags & ISCSI_FLAG_CMD_FINAL)
+ seq->last_datasn = datain->data_sn;
+ if (datain->flags & ISCSI_FLAG_DATA_STATUS)
+ dr->dr_complete = DATAIN_COMPLETE_NORMAL;
+
+ return dr;
+ }
+
+ if (!dr->runlength) {
+ if (datain->flags & ISCSI_FLAG_DATA_STATUS) {
+ dr->dr_complete =
+ (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+ DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+ DATAIN_COMPLETE_CONNECTION_RECOVERY;
+ }
+ } else {
+ if ((dr->begrun + dr->runlength) == dr->data_sn) {
+ dr->dr_complete =
+ (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ?
+ DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY :
+ DATAIN_COMPLETE_CONNECTION_RECOVERY;
+ }
+ }
+
+ return dr;
+}
+
+struct iscsi_datain_req *iscsit_get_datain_values(
+ struct iscsi_cmd *cmd,
+ struct iscsi_datain *datain)
+{
+ struct iscsi_conn *conn = cmd->conn;
+
+ if (conn->sess->sess_ops->DataSequenceInOrder &&
+ conn->sess->sess_ops->DataPDUInOrder)
+ return iscsit_set_datain_values_yes_and_yes(cmd, datain);
+ else if (!conn->sess->sess_ops->DataSequenceInOrder &&
+ conn->sess->sess_ops->DataPDUInOrder)
+ return iscsit_set_datain_values_no_and_yes(cmd, datain);
+ else if (conn->sess->sess_ops->DataSequenceInOrder &&
+ !conn->sess->sess_ops->DataPDUInOrder)
+ return iscsit_set_datain_values_yes_and_no(cmd, datain);
+ else if (!conn->sess->sess_ops->DataSequenceInOrder &&
+ !conn->sess->sess_ops->DataPDUInOrder)
+ return iscsit_set_datain_values_no_and_no(cmd, datain);
+
+ return NULL;
+}
diff --git a/drivers/target/iscsi/iscsi_target_datain_values.h b/drivers/target/iscsi/iscsi_target_datain_values.h
new file mode 100644
index 0000000..646429a
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_datain_values.h
@@ -0,0 +1,12 @@
+#ifndef ISCSI_TARGET_DATAIN_VALUES_H
+#define ISCSI_TARGET_DATAIN_VALUES_H
+
+extern struct iscsi_datain_req *iscsit_allocate_datain_req(void);
+extern void iscsit_attach_datain_req(struct iscsi_cmd *, struct iscsi_datain_req *);
+extern void iscsit_free_datain_req(struct iscsi_cmd *, struct iscsi_datain_req *);
+extern void iscsit_free_all_datain_reqs(struct iscsi_cmd *);
+extern struct iscsi_datain_req *iscsit_get_datain_req(struct iscsi_cmd *);
+extern struct iscsi_datain_req *iscsit_get_datain_values(struct iscsi_cmd *,
+ struct iscsi_datain *);
+
+#endif /*** ISCSI_TARGET_DATAIN_VALUES_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_device.c b/drivers/target/iscsi/iscsi_target_device.c
new file mode 100644
index 0000000..a19fa5e
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_device.c
@@ -0,0 +1,87 @@
+/*******************************************************************************
+ * This file contains the iSCSI Virtual Device and Disk Transport
+ * agnostic related functions.
+ *
+ \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <scsi/scsi_device.h>
+#include <target/target_core_base.h>
+#include <target/target_core_device.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+
+int iscsit_get_lun_for_tmr(
+ struct iscsi_cmd *cmd,
+ u64 lun)
+{
+ u32 unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun);
+
+ return transport_lookup_tmr_lun(&cmd->se_cmd, unpacked_lun);
+}
+
+int iscsit_get_lun_for_cmd(
+ struct iscsi_cmd *cmd,
+ unsigned char *cdb,
+ u64 lun)
+{
+ u32 unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun);
+
+ return transport_lookup_cmd_lun(&cmd->se_cmd, unpacked_lun);
+}
+
+void iscsit_determine_maxcmdsn(struct iscsi_session *sess)
+{
+ struct se_node_acl *se_nacl;
+
+ /*
+ * This is a discovery session, the single queue slot was already
+ * assigned in iscsi_login_zero_tsih(). Since only Logout and
+ * Text Opcodes are allowed during discovery we do not have to worry
+ * about the HBA's queue depth here.
+ */
+ if (sess->sess_ops->SessionType)
+ return;
+
+ se_nacl = sess->se_sess->se_node_acl;
+
+ /*
+ * This is a normal session, set the Session's CmdSN window to the
+ * struct se_node_acl->queue_depth. The value in struct se_node_acl->queue_depth
+ * has already been validated as a legal value in
+ * core_set_queue_depth_for_node().
+ */
+ sess->cmdsn_window = se_nacl->queue_depth;
+ sess->max_cmd_sn = (sess->max_cmd_sn + se_nacl->queue_depth) - 1;
+}
+
+void iscsit_increment_maxcmdsn(struct iscsi_cmd *cmd, struct iscsi_session *sess)
+{
+ if (cmd->immediate_cmd || cmd->maxcmdsn_inc)
+ return;
+
+ cmd->maxcmdsn_inc = 1;
+
+ mutex_lock(&sess->cmdsn_mutex);
+ sess->max_cmd_sn += 1;
+ pr_debug("Updated MaxCmdSN to 0x%08x\n", sess->max_cmd_sn);
+ mutex_unlock(&sess->cmdsn_mutex);
+}
diff --git a/drivers/target/iscsi/iscsi_target_device.h b/drivers/target/iscsi/iscsi_target_device.h
new file mode 100644
index 0000000..bef1cad
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_device.h
@@ -0,0 +1,9 @@
+#ifndef ISCSI_TARGET_DEVICE_H
+#define ISCSI_TARGET_DEVICE_H
+
+extern int iscsit_get_lun_for_tmr(struct iscsi_cmd *, u64);
+extern int iscsit_get_lun_for_cmd(struct iscsi_cmd *, unsigned char *, u64);
+extern void iscsit_determine_maxcmdsn(struct iscsi_session *);
+extern void iscsit_increment_maxcmdsn(struct iscsi_cmd *, struct iscsi_session *);
+
+#endif /* ISCSI_TARGET_DEVICE_H */
diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c
new file mode 100644
index 0000000..b7ffc3c
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_erl0.c
@@ -0,0 +1,1004 @@
+/******************************************************************************
+ * This file contains error recovery level zero functions used by
+ * the iSCSI Target driver.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_seq_pdu_list.h"
+#include "iscsi_target_tq.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+
+/*
+ * Used to set values in struct iscsi_cmd that iscsit_dataout_check_sequence()
+ * checks against to determine a PDU's Offset+Length is within the current
+ * DataOUT Sequence. Used for DataSequenceInOrder=Yes only.
+ */
+void iscsit_set_dataout_sequence_values(
+ struct iscsi_cmd *cmd)
+{
+ struct iscsi_conn *conn = cmd->conn;
+ /*
+ * Still set seq_start_offset and seq_end_offset for Unsolicited
+ * DataOUT, even if DataSequenceInOrder=No.
+ */
+ if (cmd->unsolicited_data) {
+ cmd->seq_start_offset = cmd->write_data_done;
+ cmd->seq_end_offset = (cmd->write_data_done +
+ (cmd->data_length >
+ conn->sess->sess_ops->FirstBurstLength) ?
+ conn->sess->sess_ops->FirstBurstLength : cmd->data_length);
+ return;
+ }
+
+ if (!conn->sess->sess_ops->DataSequenceInOrder)
+ return;
+
+ if (!cmd->seq_start_offset && !cmd->seq_end_offset) {
+ cmd->seq_start_offset = cmd->write_data_done;
+ cmd->seq_end_offset = (cmd->data_length >
+ conn->sess->sess_ops->MaxBurstLength) ?
+ (cmd->write_data_done +
+ conn->sess->sess_ops->MaxBurstLength) : cmd->data_length;
+ } else {
+ cmd->seq_start_offset = cmd->seq_end_offset;
+ cmd->seq_end_offset = ((cmd->seq_end_offset +
+ conn->sess->sess_ops->MaxBurstLength) >=
+ cmd->data_length) ? cmd->data_length :
+ (cmd->seq_end_offset +
+ conn->sess->sess_ops->MaxBurstLength);
+ }
+}
+
+static int iscsit_dataout_within_command_recovery_check(
+ struct iscsi_cmd *cmd,
+ unsigned char *buf)
+{
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_data *hdr = (struct iscsi_data *) buf;
+ u32 payload_length = ntoh24(hdr->dlength);
+
+ /*
+ * We do the within-command recovery checks here as it is
+ * the first function called in iscsi_check_pre_dataout().
+ * Basically, if we are in within-command recovery and
+ * the PDU does not contain the offset the sequence needs,
+ * dump the payload.
+ *
+ * This only applies to DataPDUInOrder=Yes, for
+ * DataPDUInOrder=No we only re-request the failed PDU
+ * and check that all PDUs in a sequence are received
+ * upon end of sequence.
+ */
+ if (conn->sess->sess_ops->DataSequenceInOrder) {
+ if ((cmd->cmd_flags & ICF_WITHIN_COMMAND_RECOVERY) &&
+ (cmd->write_data_done != hdr->offset))
+ goto dump;
+
+ cmd->cmd_flags &= ~ICF_WITHIN_COMMAND_RECOVERY;
+ } else {
+ struct iscsi_seq *seq;
+
+ seq = iscsit_get_seq_holder(cmd, hdr->offset, payload_length);
+ if (!seq)
+ return DATAOUT_CANNOT_RECOVER;
+ /*
+ * Set the struct iscsi_seq pointer to reuse later.
+ */
+ cmd->seq_ptr = seq;
+
+ if (conn->sess->sess_ops->DataPDUInOrder) {
+ if ((seq->status ==
+ DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY) &&
+ ((seq->offset != hdr->offset) ||
+ (seq->data_sn != hdr->datasn)))
+ goto dump;
+ } else {
+ if ((seq->status ==
+ DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY) &&
+ (seq->data_sn != hdr->datasn))
+ goto dump;
+ }
+
+ if (seq->status == DATAOUT_SEQUENCE_COMPLETE)
+ goto dump;
+
+ if (seq->status != DATAOUT_SEQUENCE_COMPLETE)
+ seq->status = 0;
+ }
+
+ return DATAOUT_NORMAL;
+
+dump:
+ pr_err("Dumping DataOUT PDU Offset: %u Length: %d DataSN:"
+ " 0x%08x\n", hdr->offset, payload_length, hdr->datasn);
+ return iscsit_dump_data_payload(conn, payload_length, 1);
+}
+
+static int iscsit_dataout_check_unsolicited_sequence(
+ struct iscsi_cmd *cmd,
+ unsigned char *buf)
+{
+ u32 first_burst_len;
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_data *hdr = (struct iscsi_data *) buf;
+ u32 payload_length = ntoh24(hdr->dlength);
+
+
+ if ((hdr->offset < cmd->seq_start_offset) ||
+ ((hdr->offset + payload_length) > cmd->seq_end_offset)) {
+ pr_err("Command ITT: 0x%08x with Offset: %u,"
+ " Length: %u outside of Unsolicited Sequence %u:%u while"
+ " DataSequenceInOrder=Yes.\n", cmd->init_task_tag,
+ hdr->offset, payload_length, cmd->seq_start_offset,
+ cmd->seq_end_offset);
+ return DATAOUT_CANNOT_RECOVER;
+ }
+
+ first_burst_len = (cmd->first_burst_len + payload_length);
+
+ if (first_burst_len > conn->sess->sess_ops->FirstBurstLength) {
+ pr_err("Total %u bytes exceeds FirstBurstLength: %u"
+ " for this Unsolicited DataOut Burst.\n",
+ first_burst_len, conn->sess->sess_ops->FirstBurstLength);
+ transport_send_check_condition_and_sense(&cmd->se_cmd,
+ TCM_INCORRECT_AMOUNT_OF_DATA, 0);
+ return DATAOUT_CANNOT_RECOVER;
+ }
+
+ /*
+ * Perform various MaxBurstLength and ISCSI_FLAG_CMD_FINAL sanity
+ * checks for the current Unsolicited DataOUT Sequence.
+ */
+ if (hdr->flags & ISCSI_FLAG_CMD_FINAL) {
+ /*
+ * Ignore ISCSI_FLAG_CMD_FINAL checks while DataPDUInOrder=No, end of
+ * sequence checks are handled in
+ * iscsit_dataout_datapduinorder_no_fbit().
+ */
+ if (!conn->sess->sess_ops->DataPDUInOrder)
+ goto out;
+
+ if ((first_burst_len != cmd->data_length) &&
+ (first_burst_len != conn->sess->sess_ops->FirstBurstLength)) {
+ pr_err("Unsolicited non-immediate data"
+ " received %u does not equal FirstBurstLength: %u, and"
+ " does not equal ExpXferLen %u.\n", first_burst_len,
+ conn->sess->sess_ops->FirstBurstLength,
+ cmd->data_length);
+ transport_send_check_condition_and_sense(&cmd->se_cmd,
+ TCM_INCORRECT_AMOUNT_OF_DATA, 0);
+ return DATAOUT_CANNOT_RECOVER;
+ }
+ } else {
+ if (first_burst_len == conn->sess->sess_ops->FirstBurstLength) {
+ pr_err("Command ITT: 0x%08x reached"
+ " FirstBurstLength: %u, but ISCSI_FLAG_CMD_FINAL is not set. protocol"
+ " error.\n", cmd->init_task_tag,
+ conn->sess->sess_ops->FirstBurstLength);
+ return DATAOUT_CANNOT_RECOVER;
+ }
+ if (first_burst_len == cmd->data_length) {
+ pr_err("Command ITT: 0x%08x reached"
+ " ExpXferLen: %u, but ISCSI_FLAG_CMD_FINAL is not set. protocol"
+ " error.\n", cmd->init_task_tag, cmd->data_length);
+ return DATAOUT_CANNOT_RECOVER;
+ }
+ }
+
+out:
+ return DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_check_sequence(
+ struct iscsi_cmd *cmd,
+ unsigned char *buf)
+{
+ u32 next_burst_len;
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_seq *seq = NULL;
+ struct iscsi_data *hdr = (struct iscsi_data *) buf;
+ u32 payload_length = ntoh24(hdr->dlength);
+
+ /*
+ * For DataSequenceInOrder=Yes: Check that the offset and offset+length
+ * is within range as defined by iscsi_set_dataout_sequence_values().
+ *
+ * For DataSequenceInOrder=No: Check that an struct iscsi_seq exists for
+ * offset+length tuple.
+ */
+ if (conn->sess->sess_ops->DataSequenceInOrder) {
+ /*
+ * Due to possibility of recovery DataOUT sent by the initiator
+ * fullfilling an Recovery R2T, it's best to just dump the
+ * payload here, instead of erroring out.
+ */
+ if ((hdr->offset < cmd->seq_start_offset) ||
+ ((hdr->offset + payload_length) > cmd->seq_end_offset)) {
+ pr_err("Command ITT: 0x%08x with Offset: %u,"
+ " Length: %u outside of Sequence %u:%u while"
+ " DataSequenceInOrder=Yes.\n", cmd->init_task_tag,
+ hdr->offset, payload_length, cmd->seq_start_offset,
+ cmd->seq_end_offset);
+
+ if (iscsit_dump_data_payload(conn, payload_length, 1) < 0)
+ return DATAOUT_CANNOT_RECOVER;
+ return DATAOUT_WITHIN_COMMAND_RECOVERY;
+ }
+
+ next_burst_len = (cmd->next_burst_len + payload_length);
+ } else {
+ seq = iscsit_get_seq_holder(cmd, hdr->offset, payload_length);
+ if (!seq)
+ return DATAOUT_CANNOT_RECOVER;
+ /*
+ * Set the struct iscsi_seq pointer to reuse later.
+ */
+ cmd->seq_ptr = seq;
+
+ if (seq->status == DATAOUT_SEQUENCE_COMPLETE) {
+ if (iscsit_dump_data_payload(conn, payload_length, 1) < 0)
+ return DATAOUT_CANNOT_RECOVER;
+ return DATAOUT_WITHIN_COMMAND_RECOVERY;
+ }
+
+ next_burst_len = (seq->next_burst_len + payload_length);
+ }
+
+ if (next_burst_len > conn->sess->sess_ops->MaxBurstLength) {
+ pr_err("Command ITT: 0x%08x, NextBurstLength: %u and"
+ " Length: %u exceeds MaxBurstLength: %u. protocol"
+ " error.\n", cmd->init_task_tag,
+ (next_burst_len - payload_length),
+ payload_length, conn->sess->sess_ops->MaxBurstLength);
+ return DATAOUT_CANNOT_RECOVER;
+ }
+
+ /*
+ * Perform various MaxBurstLength and ISCSI_FLAG_CMD_FINAL sanity
+ * checks for the current DataOUT Sequence.
+ */
+ if (hdr->flags & ISCSI_FLAG_CMD_FINAL) {
+ /*
+ * Ignore ISCSI_FLAG_CMD_FINAL checks while DataPDUInOrder=No, end of
+ * sequence checks are handled in
+ * iscsit_dataout_datapduinorder_no_fbit().
+ */
+ if (!conn->sess->sess_ops->DataPDUInOrder)
+ goto out;
+
+ if (conn->sess->sess_ops->DataSequenceInOrder) {
+ if ((next_burst_len <
+ conn->sess->sess_ops->MaxBurstLength) &&
+ ((cmd->write_data_done + payload_length) <
+ cmd->data_length)) {
+ pr_err("Command ITT: 0x%08x set ISCSI_FLAG_CMD_FINAL"
+ " before end of DataOUT sequence, protocol"
+ " error.\n", cmd->init_task_tag);
+ return DATAOUT_CANNOT_RECOVER;
+ }
+ } else {
+ if (next_burst_len < seq->xfer_len) {
+ pr_err("Command ITT: 0x%08x set ISCSI_FLAG_CMD_FINAL"
+ " before end of DataOUT sequence, protocol"
+ " error.\n", cmd->init_task_tag);
+ return DATAOUT_CANNOT_RECOVER;
+ }
+ }
+ } else {
+ if (conn->sess->sess_ops->DataSequenceInOrder) {
+ if (next_burst_len ==
+ conn->sess->sess_ops->MaxBurstLength) {
+ pr_err("Command ITT: 0x%08x reached"
+ " MaxBurstLength: %u, but ISCSI_FLAG_CMD_FINAL is"
+ " not set, protocol error.", cmd->init_task_tag,
+ conn->sess->sess_ops->MaxBurstLength);
+ return DATAOUT_CANNOT_RECOVER;
+ }
+ if ((cmd->write_data_done + payload_length) ==
+ cmd->data_length) {
+ pr_err("Command ITT: 0x%08x reached"
+ " last DataOUT PDU in sequence but ISCSI_FLAG_"
+ "CMD_FINAL is not set, protocol error.\n",
+ cmd->init_task_tag);
+ return DATAOUT_CANNOT_RECOVER;
+ }
+ } else {
+ if (next_burst_len == seq->xfer_len) {
+ pr_err("Command ITT: 0x%08x reached"
+ " last DataOUT PDU in sequence but ISCSI_FLAG_"
+ "CMD_FINAL is not set, protocol error.\n",
+ cmd->init_task_tag);
+ return DATAOUT_CANNOT_RECOVER;
+ }
+ }
+ }
+
+out:
+ return DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_check_datasn(
+ struct iscsi_cmd *cmd,
+ unsigned char *buf)
+{
+ int dump = 0, recovery = 0;
+ u32 data_sn = 0;
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_data *hdr = (struct iscsi_data *) buf;
+ u32 payload_length = ntoh24(hdr->dlength);
+
+ /*
+ * Considering the target has no method of re-requesting DataOUT
+ * by DataSN, if we receieve a greater DataSN than expected we
+ * assume the functions for DataPDUInOrder=[Yes,No] below will
+ * handle it.
+ *
+ * If the DataSN is less than expected, dump the payload.
+ */
+ if (conn->sess->sess_ops->DataSequenceInOrder)
+ data_sn = cmd->data_sn;
+ else {
+ struct iscsi_seq *seq = cmd->seq_ptr;
+ data_sn = seq->data_sn;
+ }
+
+ if (hdr->datasn > data_sn) {
+ pr_err("Command ITT: 0x%08x, received DataSN: 0x%08x"
+ " higher than expected 0x%08x.\n", cmd->init_task_tag,
+ hdr->datasn, data_sn);
+ recovery = 1;
+ goto recover;
+ } else if (hdr->datasn < data_sn) {
+ pr_err("Command ITT: 0x%08x, received DataSN: 0x%08x"
+ " lower than expected 0x%08x, discarding payload.\n",
+ cmd->init_task_tag, hdr->datasn, data_sn);
+ dump = 1;
+ goto dump;
+ }
+
+ return DATAOUT_NORMAL;
+
+recover:
+ if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+ pr_err("Unable to perform within-command recovery"
+ " while ERL=0.\n");
+ return DATAOUT_CANNOT_RECOVER;
+ }
+dump:
+ if (iscsit_dump_data_payload(conn, payload_length, 1) < 0)
+ return DATAOUT_CANNOT_RECOVER;
+
+ return (recovery || dump) ? DATAOUT_WITHIN_COMMAND_RECOVERY :
+ DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_pre_datapduinorder_yes(
+ struct iscsi_cmd *cmd,
+ unsigned char *buf)
+{
+ int dump = 0, recovery = 0;
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_data *hdr = (struct iscsi_data *) buf;
+ u32 payload_length = ntoh24(hdr->dlength);
+
+ /*
+ * For DataSequenceInOrder=Yes: If the offset is greater than the global
+ * DataPDUInOrder=Yes offset counter in struct iscsi_cmd a protcol error has
+ * occured and fail the connection.
+ *
+ * For DataSequenceInOrder=No: If the offset is greater than the per
+ * sequence DataPDUInOrder=Yes offset counter in struct iscsi_seq a protocol
+ * error has occured and fail the connection.
+ */
+ if (conn->sess->sess_ops->DataSequenceInOrder) {
+ if (hdr->offset != cmd->write_data_done) {
+ pr_err("Command ITT: 0x%08x, received offset"
+ " %u different than expected %u.\n", cmd->init_task_tag,
+ hdr->offset, cmd->write_data_done);
+ recovery = 1;
+ goto recover;
+ }
+ } else {
+ struct iscsi_seq *seq = cmd->seq_ptr;
+
+ if (hdr->offset > seq->offset) {
+ pr_err("Command ITT: 0x%08x, received offset"
+ " %u greater than expected %u.\n", cmd->init_task_tag,
+ hdr->offset, seq->offset);
+ recovery = 1;
+ goto recover;
+ } else if (hdr->offset < seq->offset) {
+ pr_err("Command ITT: 0x%08x, received offset"
+ " %u less than expected %u, discarding payload.\n",
+ cmd->init_task_tag, hdr->offset, seq->offset);
+ dump = 1;
+ goto dump;
+ }
+ }
+
+ return DATAOUT_NORMAL;
+
+recover:
+ if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+ pr_err("Unable to perform within-command recovery"
+ " while ERL=0.\n");
+ return DATAOUT_CANNOT_RECOVER;
+ }
+dump:
+ if (iscsit_dump_data_payload(conn, payload_length, 1) < 0)
+ return DATAOUT_CANNOT_RECOVER;
+
+ return (recovery) ? iscsit_recover_dataout_sequence(cmd,
+ hdr->offset, payload_length) :
+ (dump) ? DATAOUT_WITHIN_COMMAND_RECOVERY : DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_pre_datapduinorder_no(
+ struct iscsi_cmd *cmd,
+ unsigned char *buf)
+{
+ struct iscsi_pdu *pdu;
+ struct iscsi_data *hdr = (struct iscsi_data *) buf;
+ u32 payload_length = ntoh24(hdr->dlength);
+
+ pdu = iscsit_get_pdu_holder(cmd, hdr->offset, payload_length);
+ if (!pdu)
+ return DATAOUT_CANNOT_RECOVER;
+
+ cmd->pdu_ptr = pdu;
+
+ switch (pdu->status) {
+ case ISCSI_PDU_NOT_RECEIVED:
+ case ISCSI_PDU_CRC_FAILED:
+ case ISCSI_PDU_TIMED_OUT:
+ break;
+ case ISCSI_PDU_RECEIVED_OK:
+ pr_err("Command ITT: 0x%08x received already gotten"
+ " Offset: %u, Length: %u\n", cmd->init_task_tag,
+ hdr->offset, payload_length);
+ return iscsit_dump_data_payload(cmd->conn, payload_length, 1);
+ default:
+ return DATAOUT_CANNOT_RECOVER;
+ }
+
+ return DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_update_r2t(struct iscsi_cmd *cmd, u32 offset, u32 length)
+{
+ struct iscsi_r2t *r2t;
+
+ if (cmd->unsolicited_data)
+ return 0;
+
+ r2t = iscsit_get_r2t_for_eos(cmd, offset, length);
+ if (!r2t)
+ return -1;
+
+ spin_lock_bh(&cmd->r2t_lock);
+ r2t->seq_complete = 1;
+ cmd->outstanding_r2ts--;
+ spin_unlock_bh(&cmd->r2t_lock);
+
+ return 0;
+}
+
+static int iscsit_dataout_update_datapduinorder_no(
+ struct iscsi_cmd *cmd,
+ u32 data_sn,
+ int f_bit)
+{
+ int ret = 0;
+ struct iscsi_pdu *pdu = cmd->pdu_ptr;
+
+ pdu->data_sn = data_sn;
+
+ switch (pdu->status) {
+ case ISCSI_PDU_NOT_RECEIVED:
+ pdu->status = ISCSI_PDU_RECEIVED_OK;
+ break;
+ case ISCSI_PDU_CRC_FAILED:
+ pdu->status = ISCSI_PDU_RECEIVED_OK;
+ break;
+ case ISCSI_PDU_TIMED_OUT:
+ pdu->status = ISCSI_PDU_RECEIVED_OK;
+ break;
+ default:
+ return DATAOUT_CANNOT_RECOVER;
+ }
+
+ if (f_bit) {
+ ret = iscsit_dataout_datapduinorder_no_fbit(cmd, pdu);
+ if (ret == DATAOUT_CANNOT_RECOVER)
+ return ret;
+ }
+
+ return DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_post_crc_passed(
+ struct iscsi_cmd *cmd,
+ unsigned char *buf)
+{
+ int ret, send_r2t = 0;
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_seq *seq = NULL;
+ struct iscsi_data *hdr = (struct iscsi_data *) buf;
+ u32 payload_length = ntoh24(hdr->dlength);
+
+ if (cmd->unsolicited_data) {
+ if ((cmd->first_burst_len + payload_length) ==
+ conn->sess->sess_ops->FirstBurstLength) {
+ if (iscsit_dataout_update_r2t(cmd, hdr->offset,
+ payload_length) < 0)
+ return DATAOUT_CANNOT_RECOVER;
+ send_r2t = 1;
+ }
+
+ if (!conn->sess->sess_ops->DataPDUInOrder) {
+ ret = iscsit_dataout_update_datapduinorder_no(cmd,
+ hdr->datasn, (hdr->flags & ISCSI_FLAG_CMD_FINAL));
+ if (ret == DATAOUT_CANNOT_RECOVER)
+ return ret;
+ }
+
+ cmd->first_burst_len += payload_length;
+
+ if (conn->sess->sess_ops->DataSequenceInOrder)
+ cmd->data_sn++;
+ else {
+ seq = cmd->seq_ptr;
+ seq->data_sn++;
+ seq->offset += payload_length;
+ }
+
+ if (send_r2t) {
+ if (seq)
+ seq->status = DATAOUT_SEQUENCE_COMPLETE;
+ cmd->first_burst_len = 0;
+ cmd->unsolicited_data = 0;
+ }
+ } else {
+ if (conn->sess->sess_ops->DataSequenceInOrder) {
+ if ((cmd->next_burst_len + payload_length) ==
+ conn->sess->sess_ops->MaxBurstLength) {
+ if (iscsit_dataout_update_r2t(cmd, hdr->offset,
+ payload_length) < 0)
+ return DATAOUT_CANNOT_RECOVER;
+ send_r2t = 1;
+ }
+
+ if (!conn->sess->sess_ops->DataPDUInOrder) {
+ ret = iscsit_dataout_update_datapduinorder_no(
+ cmd, hdr->datasn,
+ (hdr->flags & ISCSI_FLAG_CMD_FINAL));
+ if (ret == DATAOUT_CANNOT_RECOVER)
+ return ret;
+ }
+
+ cmd->next_burst_len += payload_length;
+ cmd->data_sn++;
+
+ if (send_r2t)
+ cmd->next_burst_len = 0;
+ } else {
+ seq = cmd->seq_ptr;
+
+ if ((seq->next_burst_len + payload_length) ==
+ seq->xfer_len) {
+ if (iscsit_dataout_update_r2t(cmd, hdr->offset,
+ payload_length) < 0)
+ return DATAOUT_CANNOT_RECOVER;
+ send_r2t = 1;
+ }
+
+ if (!conn->sess->sess_ops->DataPDUInOrder) {
+ ret = iscsit_dataout_update_datapduinorder_no(
+ cmd, hdr->datasn,
+ (hdr->flags & ISCSI_FLAG_CMD_FINAL));
+ if (ret == DATAOUT_CANNOT_RECOVER)
+ return ret;
+ }
+
+ seq->data_sn++;
+ seq->offset += payload_length;
+ seq->next_burst_len += payload_length;
+
+ if (send_r2t) {
+ seq->next_burst_len = 0;
+ seq->status = DATAOUT_SEQUENCE_COMPLETE;
+ }
+ }
+ }
+
+ if (send_r2t && conn->sess->sess_ops->DataSequenceInOrder)
+ cmd->data_sn = 0;
+
+ cmd->write_data_done += payload_length;
+
+ return (cmd->write_data_done == cmd->data_length) ?
+ DATAOUT_SEND_TO_TRANSPORT : (send_r2t) ?
+ DATAOUT_SEND_R2T : DATAOUT_NORMAL;
+}
+
+static int iscsit_dataout_post_crc_failed(
+ struct iscsi_cmd *cmd,
+ unsigned char *buf)
+{
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_pdu *pdu;
+ struct iscsi_data *hdr = (struct iscsi_data *) buf;
+ u32 payload_length = ntoh24(hdr->dlength);
+
+ if (conn->sess->sess_ops->DataPDUInOrder)
+ goto recover;
+ /*
+ * The rest of this function is only called when DataPDUInOrder=No.
+ */
+ pdu = cmd->pdu_ptr;
+
+ switch (pdu->status) {
+ case ISCSI_PDU_NOT_RECEIVED:
+ pdu->status = ISCSI_PDU_CRC_FAILED;
+ break;
+ case ISCSI_PDU_CRC_FAILED:
+ break;
+ case ISCSI_PDU_TIMED_OUT:
+ pdu->status = ISCSI_PDU_CRC_FAILED;
+ break;
+ default:
+ return DATAOUT_CANNOT_RECOVER;
+ }
+
+recover:
+ return iscsit_recover_dataout_sequence(cmd, hdr->offset, payload_length);
+}
+
+/*
+ * Called from iscsit_handle_data_out() before DataOUT Payload is received
+ * and CRC computed.
+ */
+extern int iscsit_check_pre_dataout(
+ struct iscsi_cmd *cmd,
+ unsigned char *buf)
+{
+ int ret;
+ struct iscsi_conn *conn = cmd->conn;
+
+ ret = iscsit_dataout_within_command_recovery_check(cmd, buf);
+ if ((ret == DATAOUT_WITHIN_COMMAND_RECOVERY) ||
+ (ret == DATAOUT_CANNOT_RECOVER))
+ return ret;
+
+ ret = iscsit_dataout_check_datasn(cmd, buf);
+ if ((ret == DATAOUT_WITHIN_COMMAND_RECOVERY) ||
+ (ret == DATAOUT_CANNOT_RECOVER))
+ return ret;
+
+ if (cmd->unsolicited_data) {
+ ret = iscsit_dataout_check_unsolicited_sequence(cmd, buf);
+ if ((ret == DATAOUT_WITHIN_COMMAND_RECOVERY) ||
+ (ret == DATAOUT_CANNOT_RECOVER))
+ return ret;
+ } else {
+ ret = iscsit_dataout_check_sequence(cmd, buf);
+ if ((ret == DATAOUT_WITHIN_COMMAND_RECOVERY) ||
+ (ret == DATAOUT_CANNOT_RECOVER))
+ return ret;
+ }
+
+ return (conn->sess->sess_ops->DataPDUInOrder) ?
+ iscsit_dataout_pre_datapduinorder_yes(cmd, buf) :
+ iscsit_dataout_pre_datapduinorder_no(cmd, buf);
+}
+
+/*
+ * Called from iscsit_handle_data_out() after DataOUT Payload is received
+ * and CRC computed.
+ */
+int iscsit_check_post_dataout(
+ struct iscsi_cmd *cmd,
+ unsigned char *buf,
+ u8 data_crc_failed)
+{
+ struct iscsi_conn *conn = cmd->conn;
+
+ cmd->dataout_timeout_retries = 0;
+
+ if (!data_crc_failed)
+ return iscsit_dataout_post_crc_passed(cmd, buf);
+ else {
+ if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
+ pr_err("Unable to recover from DataOUT CRC"
+ " failure while ERL=0, closing session.\n");
+ iscsit_add_reject_from_cmd(ISCSI_REASON_DATA_DIGEST_ERROR,
+ 1, 0, buf, cmd);
+ return DATAOUT_CANNOT_RECOVER;
+ }
+
+ iscsit_add_reject_from_cmd(ISCSI_REASON_DATA_DIGEST_ERROR,
+ 0, 0, buf, cmd);
+ return iscsit_dataout_post_crc_failed(cmd, buf);
+ }
+}
+
+static void iscsit_handle_time2retain_timeout(unsigned long data)
+{
+ struct iscsi_session *sess = (struct iscsi_session *) data;
+ struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess);
+ struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+
+ spin_lock_bh(&se_tpg->session_lock);
+ if (sess->time2retain_timer_flags & ISCSI_TF_STOP) {
+ spin_unlock_bh(&se_tpg->session_lock);
+ return;
+ }
+ if (atomic_read(&sess->session_reinstatement)) {
+ pr_err("Exiting Time2Retain handler because"
+ " session_reinstatement=1\n");
+ spin_unlock_bh(&se_tpg->session_lock);
+ return;
+ }
+ sess->time2retain_timer_flags |= ISCSI_TF_EXPIRED;
+
+ pr_err("Time2Retain timer expired for SID: %u, cleaning up"
+ " iSCSI session.\n", sess->sid);
+ {
+ struct iscsi_tiqn *tiqn = tpg->tpg_tiqn;
+
+ if (tiqn) {
+ spin_lock(&tiqn->sess_err_stats.lock);
+ strcpy(tiqn->sess_err_stats.last_sess_fail_rem_name,
+ (void *)sess->sess_ops->InitiatorName);
+ tiqn->sess_err_stats.last_sess_failure_type =
+ ISCSI_SESS_ERR_CXN_TIMEOUT;
+ tiqn->sess_err_stats.cxn_timeout_errors++;
+ sess->conn_timeout_errors++;
+ spin_unlock(&tiqn->sess_err_stats.lock);
+ }
+ }
+
+ spin_unlock_bh(&se_tpg->session_lock);
+ iscsit_close_session(sess);
+}
+
+extern void iscsit_start_time2retain_handler(struct iscsi_session *sess)
+{
+ int tpg_active;
+ /*
+ * Only start Time2Retain timer when the assoicated TPG is still in
+ * an ACTIVE (eg: not disabled or shutdown) state.
+ */
+ spin_lock(&ISCSI_TPG_S(sess)->tpg_state_lock);
+ tpg_active = (ISCSI_TPG_S(sess)->tpg_state == TPG_STATE_ACTIVE);
+ spin_unlock(&ISCSI_TPG_S(sess)->tpg_state_lock);
+
+ if (!tpg_active)
+ return;
+
+ if (sess->time2retain_timer_flags & ISCSI_TF_RUNNING)
+ return;
+
+ pr_debug("Starting Time2Retain timer for %u seconds on"
+ " SID: %u\n", sess->sess_ops->DefaultTime2Retain, sess->sid);
+
+ init_timer(&sess->time2retain_timer);
+ sess->time2retain_timer.expires =
+ (get_jiffies_64() + sess->sess_ops->DefaultTime2Retain * HZ);
+ sess->time2retain_timer.data = (unsigned long)sess;
+ sess->time2retain_timer.function = iscsit_handle_time2retain_timeout;
+ sess->time2retain_timer_flags &= ~ISCSI_TF_STOP;
+ sess->time2retain_timer_flags |= ISCSI_TF_RUNNING;
+ add_timer(&sess->time2retain_timer);
+}
+
+/*
+ * Called with spin_lock_bh(&struct se_portal_group->session_lock) held
+ */
+extern int iscsit_stop_time2retain_timer(struct iscsi_session *sess)
+{
+ struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess);
+ struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+
+ if (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)
+ return -1;
+
+ if (!(sess->time2retain_timer_flags & ISCSI_TF_RUNNING))
+ return 0;
+
+ sess->time2retain_timer_flags |= ISCSI_TF_STOP;
+ spin_unlock_bh(&se_tpg->session_lock);
+
+ del_timer_sync(&sess->time2retain_timer);
+
+ spin_lock_bh(&se_tpg->session_lock);
+ sess->time2retain_timer_flags &= ~ISCSI_TF_RUNNING;
+ pr_debug("Stopped Time2Retain Timer for SID: %u\n",
+ sess->sid);
+ return 0;
+}
+
+void iscsit_connection_reinstatement_rcfr(struct iscsi_conn *conn)
+{
+ spin_lock_bh(&conn->state_lock);
+ if (atomic_read(&conn->connection_exit)) {
+ spin_unlock_bh(&conn->state_lock);
+ goto sleep;
+ }
+
+ if (atomic_read(&conn->transport_failed)) {
+ spin_unlock_bh(&conn->state_lock);
+ goto sleep;
+ }
+ spin_unlock_bh(&conn->state_lock);
+
+ iscsi_thread_set_force_reinstatement(conn);
+
+sleep:
+ wait_for_completion(&conn->conn_wait_rcfr_comp);
+ complete(&conn->conn_post_wait_comp);
+}
+
+void iscsit_cause_connection_reinstatement(struct iscsi_conn *conn, int sleep)
+{
+ spin_lock_bh(&conn->state_lock);
+ if (atomic_read(&conn->connection_exit)) {
+ spin_unlock_bh(&conn->state_lock);
+ return;
+ }
+
+ if (atomic_read(&conn->transport_failed)) {
+ spin_unlock_bh(&conn->state_lock);
+ return;
+ }
+
+ if (atomic_read(&conn->connection_reinstatement)) {
+ spin_unlock_bh(&conn->state_lock);
+ return;
+ }
+
+ if (iscsi_thread_set_force_reinstatement(conn) < 0) {
+ spin_unlock_bh(&conn->state_lock);
+ return;
+ }
+
+ atomic_set(&conn->connection_reinstatement, 1);
+ if (!sleep) {
+ spin_unlock_bh(&conn->state_lock);
+ return;
+ }
+
+ atomic_set(&conn->sleep_on_conn_wait_comp, 1);
+ spin_unlock_bh(&conn->state_lock);
+
+ wait_for_completion(&conn->conn_wait_comp);
+ complete(&conn->conn_post_wait_comp);
+}
+
+void iscsit_fall_back_to_erl0(struct iscsi_session *sess)
+{
+ pr_debug("Falling back to ErrorRecoveryLevel=0 for SID:"
+ " %u\n", sess->sid);
+
+ atomic_set(&sess->session_fall_back_to_erl0, 1);
+}
+
+static void iscsit_handle_connection_cleanup(struct iscsi_conn *conn)
+{
+ struct iscsi_session *sess = conn->sess;
+
+ if ((sess->sess_ops->ErrorRecoveryLevel == 2) &&
+ !atomic_read(&sess->session_reinstatement) &&
+ !atomic_read(&sess->session_fall_back_to_erl0))
+ iscsit_connection_recovery_transport_reset(conn);
+ else {
+ pr_debug("Performing cleanup for failed iSCSI"
+ " Connection ID: %hu from %s\n", conn->cid,
+ sess->sess_ops->InitiatorName);
+ iscsit_close_connection(conn);
+ }
+}
+
+extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn)
+{
+ spin_lock_bh(&conn->state_lock);
+ if (atomic_read(&conn->connection_exit)) {
+ spin_unlock_bh(&conn->state_lock);
+ return;
+ }
+ atomic_set(&conn->connection_exit, 1);
+
+ if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT) {
+ spin_unlock_bh(&conn->state_lock);
+ iscsit_close_connection(conn);
+ return;
+ }
+
+ if (conn->conn_state == TARG_CONN_STATE_CLEANUP_WAIT) {
+ spin_unlock_bh(&conn->state_lock);
+ return;
+ }
+
+ pr_debug("Moving to TARG_CONN_STATE_CLEANUP_WAIT.\n");
+ conn->conn_state = TARG_CONN_STATE_CLEANUP_WAIT;
+ spin_unlock_bh(&conn->state_lock);
+
+ iscsit_handle_connection_cleanup(conn);
+}
+
+/*
+ * This is the simple function that makes the magic of
+ * sync and steering happen in the follow paradoxical order:
+ *
+ * 0) Receive conn->of_marker (bytes left until next OFMarker)
+ * bytes into an offload buffer. When we pass the exact number
+ * of bytes in conn->of_marker, iscsit_dump_data_payload() and hence
+ * rx_data() will automatically receive the identical u32 marker
+ * values and store it in conn->of_marker_offset;
+ * 1) Now conn->of_marker_offset will contain the offset to the start
+ * of the next iSCSI PDU. Dump these remaining bytes into another
+ * offload buffer.
+ * 2) We are done!
+ * Next byte in the TCP stream will contain the next iSCSI PDU!
+ * Cool Huh?!
+ */
+int iscsit_recover_from_unknown_opcode(struct iscsi_conn *conn)
+{
+ /*
+ * Make sure the remaining bytes to next maker is a sane value.
+ */
+ if (conn->of_marker > (conn->conn_ops->OFMarkInt * 4)) {
+ pr_err("Remaining bytes to OFMarker: %u exceeds"
+ " OFMarkInt bytes: %u.\n", conn->of_marker,
+ conn->conn_ops->OFMarkInt * 4);
+ return -1;
+ }
+
+ pr_debug("Advancing %u bytes in TCP stream to get to the"
+ " next OFMarker.\n", conn->of_marker);
+
+ if (iscsit_dump_data_payload(conn, conn->of_marker, 0) < 0)
+ return -1;
+
+ /*
+ * Make sure the offset marker we retrived is a valid value.
+ */
+ if (conn->of_marker_offset > (ISCSI_HDR_LEN + (ISCSI_CRC_LEN * 2) +
+ conn->conn_ops->MaxRecvDataSegmentLength)) {
+ pr_err("OfMarker offset value: %u exceeds limit.\n",
+ conn->of_marker_offset);
+ return -1;
+ }
+
+ pr_debug("Discarding %u bytes of TCP stream to get to the"
+ " next iSCSI Opcode.\n", conn->of_marker_offset);
+
+ if (iscsit_dump_data_payload(conn, conn->of_marker_offset, 0) < 0)
+ return -1;
+
+ return 0;
+}
diff --git a/drivers/target/iscsi/iscsi_target_erl0.h b/drivers/target/iscsi/iscsi_target_erl0.h
new file mode 100644
index 0000000..21acc9a
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_erl0.h
@@ -0,0 +1,15 @@
+#ifndef ISCSI_TARGET_ERL0_H
+#define ISCSI_TARGET_ERL0_H
+
+extern void iscsit_set_dataout_sequence_values(struct iscsi_cmd *);
+extern int iscsit_check_pre_dataout(struct iscsi_cmd *, unsigned char *);
+extern int iscsit_check_post_dataout(struct iscsi_cmd *, unsigned char *, u8);
+extern void iscsit_start_time2retain_handler(struct iscsi_session *);
+extern int iscsit_stop_time2retain_timer(struct iscsi_session *);
+extern void iscsit_connection_reinstatement_rcfr(struct iscsi_conn *);
+extern void iscsit_cause_connection_reinstatement(struct iscsi_conn *, int);
+extern void iscsit_fall_back_to_erl0(struct iscsi_session *);
+extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *);
+extern int iscsit_recover_from_unknown_opcode(struct iscsi_conn *);
+
+#endif /*** ISCSI_TARGET_ERL0_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c
new file mode 100644
index 0000000..9806507
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_erl1.c
@@ -0,0 +1,1299 @@
+/*******************************************************************************
+ * This file contains error recovery level one used by the iSCSI Target driver.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/list.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_seq_pdu_list.h"
+#include "iscsi_target_datain_values.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target.h"
+
+#define OFFLOAD_BUF_SIZE 32768
+
+/*
+ * Used to dump excess datain payload for certain error recovery
+ * situations. Receive in OFFLOAD_BUF_SIZE max of datain per rx_data().
+ *
+ * dump_padding_digest denotes if padding and data digests need
+ * to be dumped.
+ */
+int iscsit_dump_data_payload(
+ struct iscsi_conn *conn,
+ u32 buf_len,
+ int dump_padding_digest)
+{
+ char *buf, pad_bytes[4];
+ int ret = DATAOUT_WITHIN_COMMAND_RECOVERY, rx_got;
+ u32 length, padding, offset = 0, size;
+ struct kvec iov;
+
+ length = (buf_len > OFFLOAD_BUF_SIZE) ? OFFLOAD_BUF_SIZE : buf_len;
+
+ buf = kzalloc(length, GFP_ATOMIC);
+ if (!buf) {
+ pr_err("Unable to allocate %u bytes for offload"
+ " buffer.\n", length);
+ return -1;
+ }
+ memset(&iov, 0, sizeof(struct kvec));
+
+ while (offset < buf_len) {
+ size = ((offset + length) > buf_len) ?
+ (buf_len - offset) : length;
+
+ iov.iov_len = size;
+ iov.iov_base = buf;
+
+ rx_got = rx_data(conn, &iov, 1, size);
+ if (rx_got != size) {
+ ret = DATAOUT_CANNOT_RECOVER;
+ goto out;
+ }
+
+ offset += size;
+ }
+
+ if (!dump_padding_digest)
+ goto out;
+
+ padding = ((-buf_len) & 3);
+ if (padding != 0) {
+ iov.iov_len = padding;
+ iov.iov_base = pad_bytes;
+
+ rx_got = rx_data(conn, &iov, 1, padding);
+ if (rx_got != padding) {
+ ret = DATAOUT_CANNOT_RECOVER;
+ goto out;
+ }
+ }
+
+ if (conn->conn_ops->DataDigest) {
+ u32 data_crc;
+
+ iov.iov_len = ISCSI_CRC_LEN;
+ iov.iov_base = &data_crc;
+
+ rx_got = rx_data(conn, &iov, 1, ISCSI_CRC_LEN);
+ if (rx_got != ISCSI_CRC_LEN) {
+ ret = DATAOUT_CANNOT_RECOVER;
+ goto out;
+ }
+ }
+
+out:
+ kfree(buf);
+ return ret;
+}
+
+/*
+ * Used for retransmitting R2Ts from a R2T SNACK request.
+ */
+static int iscsit_send_recovery_r2t_for_snack(
+ struct iscsi_cmd *cmd,
+ struct iscsi_r2t *r2t)
+{
+ /*
+ * If the struct iscsi_r2t has not been sent yet, we can safely
+ * ignore retransmission
+ * of the R2TSN in question.
+ */
+ spin_lock_bh(&cmd->r2t_lock);
+ if (!r2t->sent_r2t) {
+ spin_unlock_bh(&cmd->r2t_lock);
+ return 0;
+ }
+ r2t->sent_r2t = 0;
+ spin_unlock_bh(&cmd->r2t_lock);
+
+ iscsit_add_cmd_to_immediate_queue(cmd, cmd->conn, ISTATE_SEND_R2T);
+
+ return 0;
+}
+
+static int iscsit_handle_r2t_snack(
+ struct iscsi_cmd *cmd,
+ unsigned char *buf,
+ u32 begrun,
+ u32 runlength)
+{
+ u32 last_r2tsn;
+ struct iscsi_r2t *r2t;
+
+ /*
+ * Make sure the initiator is not requesting retransmission
+ * of R2TSNs already acknowledged by a TMR TASK_REASSIGN.
+ */
+ if ((cmd->cmd_flags & ICF_GOT_DATACK_SNACK) &&
+ (begrun <= cmd->acked_data_sn)) {
+ pr_err("ITT: 0x%08x, R2T SNACK requesting"
+ " retransmission of R2TSN: 0x%08x to 0x%08x but already"
+ " acked to R2TSN: 0x%08x by TMR TASK_REASSIGN,"
+ " protocol error.\n", cmd->init_task_tag, begrun,
+ (begrun + runlength), cmd->acked_data_sn);
+
+ return iscsit_add_reject_from_cmd(
+ ISCSI_REASON_PROTOCOL_ERROR,
+ 1, 0, buf, cmd);
+ }
+
+ if (runlength) {
+ if ((begrun + runlength) > cmd->r2t_sn) {
+ pr_err("Command ITT: 0x%08x received R2T SNACK"
+ " with BegRun: 0x%08x, RunLength: 0x%08x, exceeds"
+ " current R2TSN: 0x%08x, protocol error.\n",
+ cmd->init_task_tag, begrun, runlength, cmd->r2t_sn);
+ return iscsit_add_reject_from_cmd(
+ ISCSI_REASON_BOOKMARK_INVALID, 1, 0, buf, cmd);
+ }
+ last_r2tsn = (begrun + runlength);
+ } else
+ last_r2tsn = cmd->r2t_sn;
+
+ while (begrun < last_r2tsn) {
+ r2t = iscsit_get_holder_for_r2tsn(cmd, begrun);
+ if (!r2t)
+ return -1;
+ if (iscsit_send_recovery_r2t_for_snack(cmd, r2t) < 0)
+ return -1;
+
+ begrun++;
+ }
+
+ return 0;
+}
+
+/*
+ * Generates Offsets and NextBurstLength based on Begrun and Runlength
+ * carried in a Data SNACK or ExpDataSN in TMR TASK_REASSIGN.
+ *
+ * For DataSequenceInOrder=Yes and DataPDUInOrder=[Yes,No] only.
+ *
+ * FIXME: How is this handled for a RData SNACK?
+ */
+int iscsit_create_recovery_datain_values_datasequenceinorder_yes(
+ struct iscsi_cmd *cmd,
+ struct iscsi_datain_req *dr)
+{
+ u32 data_sn = 0, data_sn_count = 0;
+ u32 pdu_start = 0, seq_no = 0;
+ u32 begrun = dr->begrun;
+ struct iscsi_conn *conn = cmd->conn;
+
+ while (begrun > data_sn++) {
+ data_sn_count++;
+ if ((dr->next_burst_len +
+ conn->conn_ops->MaxRecvDataSegmentLength) <
+ conn->sess->sess_ops->MaxBurstLength) {
+ dr->read_data_done +=
+ conn->conn_ops->MaxRecvDataSegmentLength;
+ dr->next_burst_len +=
+ conn->conn_ops->MaxRecvDataSegmentLength;
+ } else {
+ dr->read_data_done +=
+ (conn->sess->sess_ops->MaxBurstLength -
+ dr->next_burst_len);
+ dr->next_burst_len = 0;
+ pdu_start += data_sn_count;
+ data_sn_count = 0;
+ seq_no++;
+ }
+ }
+
+ if (!conn->sess->sess_ops->DataPDUInOrder) {
+ cmd->seq_no = seq_no;
+ cmd->pdu_start = pdu_start;
+ cmd->pdu_send_order = data_sn_count;
+ }
+
+ return 0;
+}
+
+/*
+ * Generates Offsets and NextBurstLength based on Begrun and Runlength
+ * carried in a Data SNACK or ExpDataSN in TMR TASK_REASSIGN.
+ *
+ * For DataSequenceInOrder=No and DataPDUInOrder=[Yes,No] only.
+ *
+ * FIXME: How is this handled for a RData SNACK?
+ */
+int iscsit_create_recovery_datain_values_datasequenceinorder_no(
+ struct iscsi_cmd *cmd,
+ struct iscsi_datain_req *dr)
+{
+ int found_seq = 0, i;
+ u32 data_sn, read_data_done = 0, seq_send_order = 0;
+ u32 begrun = dr->begrun;
+ u32 runlength = dr->runlength;
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_seq *first_seq = NULL, *seq = NULL;
+
+ if (!cmd->seq_list) {
+ pr_err("struct iscsi_cmd->seq_list is NULL!\n");
+ return -1;
+ }
+
+ /*
+ * Calculate read_data_done for all sequences containing a
+ * first_datasn and last_datasn less than the BegRun.
+ *
+ * Locate the struct iscsi_seq the BegRun lies within and calculate
+ * NextBurstLenghth up to the DataSN based on MaxRecvDataSegmentLength.
+ *
+ * Also use struct iscsi_seq->seq_send_order to determine where to start.
+ */
+ for (i = 0; i < cmd->seq_count; i++) {
+ seq = &cmd->seq_list[i];
+
+ if (!seq->seq_send_order)
+ first_seq = seq;
+
+ /*
+ * No data has been transferred for this DataIN sequence, so the
+ * seq->first_datasn and seq->last_datasn have not been set.
+ */
+ if (!seq->sent) {
+#if 0
+ pr_err("Ignoring non-sent sequence 0x%08x ->"
+ " 0x%08x\n\n", seq->first_datasn,
+ seq->last_datasn);
+#endif
+ continue;
+ }
+
+ /*
+ * This DataIN sequence is precedes the received BegRun, add the
+ * total xfer_len of the sequence to read_data_done and reset
+ * seq->pdu_send_order.
+ */
+ if ((seq->first_datasn < begrun) &&
+ (seq->last_datasn < begrun)) {
+#if 0
+ pr_err("Pre BegRun sequence 0x%08x ->"
+ " 0x%08x\n", seq->first_datasn,
+ seq->last_datasn);
+#endif
+ read_data_done += cmd->seq_list[i].xfer_len;
+ seq->next_burst_len = seq->pdu_send_order = 0;
+ continue;
+ }
+
+ /*
+ * The BegRun lies within this DataIN sequence.
+ */
+ if ((seq->first_datasn <= begrun) &&
+ (seq->last_datasn >= begrun)) {
+#if 0
+ pr_err("Found sequence begrun: 0x%08x in"
+ " 0x%08x -> 0x%08x\n", begrun,
+ seq->first_datasn, seq->last_datasn);
+#endif
+ seq_send_order = seq->seq_send_order;
+ data_sn = seq->first_datasn;
+ seq->next_burst_len = seq->pdu_send_order = 0;
+ found_seq = 1;
+
+ /*
+ * For DataPDUInOrder=Yes, while the first DataSN of
+ * the sequence is less than the received BegRun, add
+ * the MaxRecvDataSegmentLength to read_data_done and
+ * to the sequence's next_burst_len;
+ *
+ * For DataPDUInOrder=No, while the first DataSN of the
+ * sequence is less than the received BegRun, find the
+ * struct iscsi_pdu of the DataSN in question and add the
+ * MaxRecvDataSegmentLength to read_data_done and to the
+ * sequence's next_burst_len;
+ */
+ if (conn->sess->sess_ops->DataPDUInOrder) {
+ while (data_sn < begrun) {
+ seq->pdu_send_order++;
+ read_data_done +=
+ conn->conn_ops->MaxRecvDataSegmentLength;
+ seq->next_burst_len +=
+ conn->conn_ops->MaxRecvDataSegmentLength;
+ data_sn++;
+ }
+ } else {
+ int j;
+ struct iscsi_pdu *pdu;
+
+ while (data_sn < begrun) {
+ seq->pdu_send_order++;
+
+ for (j = 0; j < seq->pdu_count; j++) {
+ pdu = &cmd->pdu_list[
+ seq->pdu_start + j];
+ if (pdu->data_sn == data_sn) {
+ read_data_done +=
+ pdu->length;
+ seq->next_burst_len +=
+ pdu->length;
+ }
+ }
+ data_sn++;
+ }
+ }
+ continue;
+ }
+
+ /*
+ * This DataIN sequence is larger than the received BegRun,
+ * reset seq->pdu_send_order and continue.
+ */
+ if ((seq->first_datasn > begrun) ||
+ (seq->last_datasn > begrun)) {
+#if 0
+ pr_err("Post BegRun sequence 0x%08x -> 0x%08x\n",
+ seq->first_datasn, seq->last_datasn);
+#endif
+ seq->next_burst_len = seq->pdu_send_order = 0;
+ continue;
+ }
+ }
+
+ if (!found_seq) {
+ if (!begrun) {
+ if (!first_seq) {
+ pr_err("ITT: 0x%08x, Begrun: 0x%08x"
+ " but first_seq is NULL\n",
+ cmd->init_task_tag, begrun);
+ return -1;
+ }
+ seq_send_order = first_seq->seq_send_order;
+ seq->next_burst_len = seq->pdu_send_order = 0;
+ goto done;
+ }
+
+ pr_err("Unable to locate struct iscsi_seq for ITT: 0x%08x,"
+ " BegRun: 0x%08x, RunLength: 0x%08x while"
+ " DataSequenceInOrder=No and DataPDUInOrder=%s.\n",
+ cmd->init_task_tag, begrun, runlength,
+ (conn->sess->sess_ops->DataPDUInOrder) ? "Yes" : "No");
+ return -1;
+ }
+
+done:
+ dr->read_data_done = read_data_done;
+ dr->seq_send_order = seq_send_order;
+
+ return 0;
+}
+
+static int iscsit_handle_recovery_datain(
+ struct iscsi_cmd *cmd,
+ unsigned char *buf,
+ u32 begrun,
+ u32 runlength)
+{
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_datain_req *dr;
+ struct se_cmd *se_cmd = &cmd->se_cmd;
+
+ if (!atomic_read(&se_cmd->t_transport_complete)) {
+ pr_err("Ignoring ITT: 0x%08x Data SNACK\n",
+ cmd->init_task_tag);
+ return 0;
+ }
+
+ /*
+ * Make sure the initiator is not requesting retransmission
+ * of DataSNs already acknowledged by a Data ACK SNACK.
+ */
+ if ((cmd->cmd_flags & ICF_GOT_DATACK_SNACK) &&
+ (begrun <= cmd->acked_data_sn)) {
+ pr_err("ITT: 0x%08x, Data SNACK requesting"
+ " retransmission of DataSN: 0x%08x to 0x%08x but"
+ " already acked to DataSN: 0x%08x by Data ACK SNACK,"
+ " protocol error.\n", cmd->init_task_tag, begrun,
+ (begrun + runlength), cmd->acked_data_sn);
+
+ return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR,
+ 1, 0, buf, cmd);
+ }
+
+ /*
+ * Make sure BegRun and RunLength in the Data SNACK are sane.
+ * Note: (cmd->data_sn - 1) will carry the maximum DataSN sent.
+ */
+ if ((begrun + runlength) > (cmd->data_sn - 1)) {
+ pr_err("Initiator requesting BegRun: 0x%08x, RunLength"
+ ": 0x%08x greater than maximum DataSN: 0x%08x.\n",
+ begrun, runlength, (cmd->data_sn - 1));
+ return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID,
+ 1, 0, buf, cmd);
+ }
+
+ dr = iscsit_allocate_datain_req();
+ if (!dr)
+ return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_NO_RESOURCES,
+ 1, 0, buf, cmd);
+
+ dr->data_sn = dr->begrun = begrun;
+ dr->runlength = runlength;
+ dr->generate_recovery_values = 1;
+ dr->recovery = DATAIN_WITHIN_COMMAND_RECOVERY;
+
+ iscsit_attach_datain_req(cmd, dr);
+
+ cmd->i_state = ISTATE_SEND_DATAIN;
+ iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+
+ return 0;
+}
+
+int iscsit_handle_recovery_datain_or_r2t(
+ struct iscsi_conn *conn,
+ unsigned char *buf,
+ u32 init_task_tag,
+ u32 targ_xfer_tag,
+ u32 begrun,
+ u32 runlength)
+{
+ struct iscsi_cmd *cmd;
+
+ cmd = iscsit_find_cmd_from_itt(conn, init_task_tag);
+ if (!cmd)
+ return 0;
+
+ /*
+ * FIXME: This will not work for bidi commands.
+ */
+ switch (cmd->data_direction) {
+ case DMA_TO_DEVICE:
+ return iscsit_handle_r2t_snack(cmd, buf, begrun, runlength);
+ case DMA_FROM_DEVICE:
+ return iscsit_handle_recovery_datain(cmd, buf, begrun,
+ runlength);
+ default:
+ pr_err("Unknown cmd->data_direction: 0x%02x\n",
+ cmd->data_direction);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* #warning FIXME: Status SNACK needs to be dependent on OPCODE!!! */
+int iscsit_handle_status_snack(
+ struct iscsi_conn *conn,
+ u32 init_task_tag,
+ u32 targ_xfer_tag,
+ u32 begrun,
+ u32 runlength)
+{
+ struct iscsi_cmd *cmd = NULL;
+ u32 last_statsn;
+ int found_cmd;
+
+ if (conn->exp_statsn > begrun) {
+ pr_err("Got Status SNACK Begrun: 0x%08x, RunLength:"
+ " 0x%08x but already got ExpStatSN: 0x%08x on CID:"
+ " %hu.\n", begrun, runlength, conn->exp_statsn,
+ conn->cid);
+ return 0;
+ }
+
+ last_statsn = (!runlength) ? conn->stat_sn : (begrun + runlength);
+
+ while (begrun < last_statsn) {
+ found_cmd = 0;
+
+ spin_lock_bh(&conn->cmd_lock);
+ list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) {
+ if (cmd->stat_sn == begrun) {
+ found_cmd = 1;
+ break;
+ }
+ }
+ spin_unlock_bh(&conn->cmd_lock);
+
+ if (!found_cmd) {
+ pr_err("Unable to find StatSN: 0x%08x for"
+ " a Status SNACK, assuming this was a"
+ " protactic SNACK for an untransmitted"
+ " StatSN, ignoring.\n", begrun);
+ begrun++;
+ continue;
+ }
+
+ spin_lock_bh(&cmd->istate_lock);
+ if (cmd->i_state == ISTATE_SEND_DATAIN) {
+ spin_unlock_bh(&cmd->istate_lock);
+ pr_err("Ignoring Status SNACK for BegRun:"
+ " 0x%08x, RunLength: 0x%08x, assuming this was"
+ " a protactic SNACK for an untransmitted"
+ " StatSN\n", begrun, runlength);
+ begrun++;
+ continue;
+ }
+ spin_unlock_bh(&cmd->istate_lock);
+
+ cmd->i_state = ISTATE_SEND_STATUS_RECOVERY;
+ iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+ begrun++;
+ }
+
+ return 0;
+}
+
+int iscsit_handle_data_ack(
+ struct iscsi_conn *conn,
+ u32 targ_xfer_tag,
+ u32 begrun,
+ u32 runlength)
+{
+ struct iscsi_cmd *cmd = NULL;
+
+ cmd = iscsit_find_cmd_from_ttt(conn, targ_xfer_tag);
+ if (!cmd) {
+ pr_err("Data ACK SNACK for TTT: 0x%08x is"
+ " invalid.\n", targ_xfer_tag);
+ return -1;
+ }
+
+ if (begrun <= cmd->acked_data_sn) {
+ pr_err("ITT: 0x%08x Data ACK SNACK BegRUN: 0x%08x is"
+ " less than the already acked DataSN: 0x%08x.\n",
+ cmd->init_task_tag, begrun, cmd->acked_data_sn);
+ return -1;
+ }
+
+ /*
+ * For Data ACK SNACK, BegRun is the next expected DataSN.
+ * (see iSCSI v19: 10.16.6)
+ */
+ cmd->cmd_flags |= ICF_GOT_DATACK_SNACK;
+ cmd->acked_data_sn = (begrun - 1);
+
+ pr_debug("Received Data ACK SNACK for ITT: 0x%08x,"
+ " updated acked DataSN to 0x%08x.\n",
+ cmd->init_task_tag, cmd->acked_data_sn);
+
+ return 0;
+}
+
+static int iscsit_send_recovery_r2t(
+ struct iscsi_cmd *cmd,
+ u32 offset,
+ u32 xfer_len)
+{
+ int ret;
+
+ spin_lock_bh(&cmd->r2t_lock);
+ ret = iscsit_add_r2t_to_list(cmd, offset, xfer_len, 1, 0);
+ spin_unlock_bh(&cmd->r2t_lock);
+
+ return ret;
+}
+
+int iscsit_dataout_datapduinorder_no_fbit(
+ struct iscsi_cmd *cmd,
+ struct iscsi_pdu *pdu)
+{
+ int i, send_recovery_r2t = 0, recovery = 0;
+ u32 length = 0, offset = 0, pdu_count = 0, xfer_len = 0;
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_pdu *first_pdu = NULL;
+
+ /*
+ * Get an struct iscsi_pdu pointer to the first PDU, and total PDU count
+ * of the DataOUT sequence.
+ */
+ if (conn->sess->sess_ops->DataSequenceInOrder) {
+ for (i = 0; i < cmd->pdu_count; i++) {
+ if (cmd->pdu_list[i].seq_no == pdu->seq_no) {
+ if (!first_pdu)
+ first_pdu = &cmd->pdu_list[i];
+ xfer_len += cmd->pdu_list[i].length;
+ pdu_count++;
+ } else if (pdu_count)
+ break;
+ }
+ } else {
+ struct iscsi_seq *seq = cmd->seq_ptr;
+
+ first_pdu = &cmd->pdu_list[seq->pdu_start];
+ pdu_count = seq->pdu_count;
+ }
+
+ if (!first_pdu || !pdu_count)
+ return DATAOUT_CANNOT_RECOVER;
+
+ /*
+ * Loop through the ending DataOUT Sequence checking each struct iscsi_pdu.
+ * The following ugly logic does batching of not received PDUs.
+ */
+ for (i = 0; i < pdu_count; i++) {
+ if (first_pdu[i].status == ISCSI_PDU_RECEIVED_OK) {
+ if (!send_recovery_r2t)
+ continue;
+
+ if (iscsit_send_recovery_r2t(cmd, offset, length) < 0)
+ return DATAOUT_CANNOT_RECOVER;
+
+ send_recovery_r2t = length = offset = 0;
+ continue;
+ }
+ /*
+ * Set recovery = 1 for any missing, CRC failed, or timed
+ * out PDUs to let the DataOUT logic know that this sequence
+ * has not been completed yet.
+ *
+ * Also, only send a Recovery R2T for ISCSI_PDU_NOT_RECEIVED.
+ * We assume if the PDU either failed CRC or timed out
+ * that a Recovery R2T has already been sent.
+ */
+ recovery = 1;
+
+ if (first_pdu[i].status != ISCSI_PDU_NOT_RECEIVED)
+ continue;
+
+ if (!offset)
+ offset = first_pdu[i].offset;
+ length += first_pdu[i].length;
+
+ send_recovery_r2t = 1;
+ }
+
+ if (send_recovery_r2t)
+ if (iscsit_send_recovery_r2t(cmd, offset, length) < 0)
+ return DATAOUT_CANNOT_RECOVER;
+
+ return (!recovery) ? DATAOUT_NORMAL : DATAOUT_WITHIN_COMMAND_RECOVERY;
+}
+
+static int iscsit_recalculate_dataout_values(
+ struct iscsi_cmd *cmd,
+ u32 pdu_offset,
+ u32 pdu_length,
+ u32 *r2t_offset,
+ u32 *r2t_length)
+{
+ int i;
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_pdu *pdu = NULL;
+
+ if (conn->sess->sess_ops->DataSequenceInOrder) {
+ cmd->data_sn = 0;
+
+ if (conn->sess->sess_ops->DataPDUInOrder) {
+ *r2t_offset = cmd->write_data_done;
+ *r2t_length = (cmd->seq_end_offset -
+ cmd->write_data_done);
+ return 0;
+ }
+
+ *r2t_offset = cmd->seq_start_offset;
+ *r2t_length = (cmd->seq_end_offset - cmd->seq_start_offset);
+
+ for (i = 0; i < cmd->pdu_count; i++) {
+ pdu = &cmd->pdu_list[i];
+
+ if (pdu->status != ISCSI_PDU_RECEIVED_OK)
+ continue;
+
+ if ((pdu->offset >= cmd->seq_start_offset) &&
+ ((pdu->offset + pdu->length) <=
+ cmd->seq_end_offset)) {
+ if (!cmd->unsolicited_data)
+ cmd->next_burst_len -= pdu->length;
+ else
+ cmd->first_burst_len -= pdu->length;
+
+ cmd->write_data_done -= pdu->length;
+ pdu->status = ISCSI_PDU_NOT_RECEIVED;
+ }
+ }
+ } else {
+ struct iscsi_seq *seq = NULL;
+
+ seq = iscsit_get_seq_holder(cmd, pdu_offset, pdu_length);
+ if (!seq)
+ return -1;
+
+ *r2t_offset = seq->orig_offset;
+ *r2t_length = seq->xfer_len;
+
+ cmd->write_data_done -= (seq->offset - seq->orig_offset);
+ if (cmd->immediate_data)
+ cmd->first_burst_len = cmd->write_data_done;
+
+ seq->data_sn = 0;
+ seq->offset = seq->orig_offset;
+ seq->next_burst_len = 0;
+ seq->status = DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY;
+
+ if (conn->sess->sess_ops->DataPDUInOrder)
+ return 0;
+
+ for (i = 0; i < seq->pdu_count; i++) {
+ pdu = &cmd->pdu_list[i+seq->pdu_start];
+
+ if (pdu->status != ISCSI_PDU_RECEIVED_OK)
+ continue;
+
+ pdu->status = ISCSI_PDU_NOT_RECEIVED;
+ }
+ }
+
+ return 0;
+}
+
+int iscsit_recover_dataout_sequence(
+ struct iscsi_cmd *cmd,
+ u32 pdu_offset,
+ u32 pdu_length)
+{
+ u32 r2t_length = 0, r2t_offset = 0;
+
+ spin_lock_bh(&cmd->istate_lock);
+ cmd->cmd_flags |= ICF_WITHIN_COMMAND_RECOVERY;
+ spin_unlock_bh(&cmd->istate_lock);
+
+ if (iscsit_recalculate_dataout_values(cmd, pdu_offset, pdu_length,
+ &r2t_offset, &r2t_length) < 0)
+ return DATAOUT_CANNOT_RECOVER;
+
+ iscsit_send_recovery_r2t(cmd, r2t_offset, r2t_length);
+
+ return DATAOUT_WITHIN_COMMAND_RECOVERY;
+}
+
+static struct iscsi_ooo_cmdsn *iscsit_allocate_ooo_cmdsn(void)
+{
+ struct iscsi_ooo_cmdsn *ooo_cmdsn = NULL;
+
+ ooo_cmdsn = kmem_cache_zalloc(lio_ooo_cache, GFP_ATOMIC);
+ if (!ooo_cmdsn) {
+ pr_err("Unable to allocate memory for"
+ " struct iscsi_ooo_cmdsn.\n");
+ return NULL;
+ }
+ INIT_LIST_HEAD(&ooo_cmdsn->ooo_list);
+
+ return ooo_cmdsn;
+}
+
+/*
+ * Called with sess->cmdsn_mutex held.
+ */
+static int iscsit_attach_ooo_cmdsn(
+ struct iscsi_session *sess,
+ struct iscsi_ooo_cmdsn *ooo_cmdsn)
+{
+ struct iscsi_ooo_cmdsn *ooo_tail, *ooo_tmp;
+ /*
+ * We attach the struct iscsi_ooo_cmdsn entry to the out of order
+ * list in increasing CmdSN order.
+ * This allows iscsi_execute_ooo_cmdsns() to detect any
+ * additional CmdSN holes while performing delayed execution.
+ */
+ if (list_empty(&sess->sess_ooo_cmdsn_list))
+ list_add_tail(&ooo_cmdsn->ooo_list,
+ &sess->sess_ooo_cmdsn_list);
+ else {
+ ooo_tail = list_entry(sess->sess_ooo_cmdsn_list.prev,
+ typeof(*ooo_tail), ooo_list);
+ /*
+ * CmdSN is greater than the tail of the list.
+ */
+ if (ooo_tail->cmdsn < ooo_cmdsn->cmdsn)
+ list_add_tail(&ooo_cmdsn->ooo_list,
+ &sess->sess_ooo_cmdsn_list);
+ else {
+ /*
+ * CmdSN is either lower than the head, or somewhere
+ * in the middle.
+ */
+ list_for_each_entry(ooo_tmp, &sess->sess_ooo_cmdsn_list,
+ ooo_list) {
+ while (ooo_tmp->cmdsn < ooo_cmdsn->cmdsn)
+ continue;
+
+ list_add(&ooo_cmdsn->ooo_list,
+ &ooo_tmp->ooo_list);
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Removes an struct iscsi_ooo_cmdsn from a session's list,
+ * called with struct iscsi_session->cmdsn_mutex held.
+ */
+void iscsit_remove_ooo_cmdsn(
+ struct iscsi_session *sess,
+ struct iscsi_ooo_cmdsn *ooo_cmdsn)
+{
+ list_del(&ooo_cmdsn->ooo_list);
+ kmem_cache_free(lio_ooo_cache, ooo_cmdsn);
+}
+
+void iscsit_clear_ooo_cmdsns_for_conn(struct iscsi_conn *conn)
+{
+ struct iscsi_ooo_cmdsn *ooo_cmdsn;
+ struct iscsi_session *sess = conn->sess;
+
+ mutex_lock(&sess->cmdsn_mutex);
+ list_for_each_entry(ooo_cmdsn, &sess->sess_ooo_cmdsn_list, ooo_list) {
+ if (ooo_cmdsn->cid != conn->cid)
+ continue;
+
+ ooo_cmdsn->cmd = NULL;
+ }
+ mutex_unlock(&sess->cmdsn_mutex);
+}
+
+/*
+ * Called with sess->cmdsn_mutex held.
+ */
+int iscsit_execute_ooo_cmdsns(struct iscsi_session *sess)
+{
+ int ooo_count = 0;
+ struct iscsi_cmd *cmd = NULL;
+ struct iscsi_ooo_cmdsn *ooo_cmdsn, *ooo_cmdsn_tmp;
+
+ list_for_each_entry_safe(ooo_cmdsn, ooo_cmdsn_tmp,
+ &sess->sess_ooo_cmdsn_list, ooo_list) {
+ if (ooo_cmdsn->cmdsn != sess->exp_cmd_sn)
+ continue;
+
+ if (!ooo_cmdsn->cmd) {
+ sess->exp_cmd_sn++;
+ iscsit_remove_ooo_cmdsn(sess, ooo_cmdsn);
+ continue;
+ }
+
+ cmd = ooo_cmdsn->cmd;
+ cmd->i_state = cmd->deferred_i_state;
+ ooo_count++;
+ sess->exp_cmd_sn++;
+ pr_debug("Executing out of order CmdSN: 0x%08x,"
+ " incremented ExpCmdSN to 0x%08x.\n",
+ cmd->cmd_sn, sess->exp_cmd_sn);
+
+ iscsit_remove_ooo_cmdsn(sess, ooo_cmdsn);
+
+ if (iscsit_execute_cmd(cmd, 1) < 0)
+ return -1;
+
+ continue;
+ }
+
+ return ooo_count;
+}
+
+/*
+ * Called either:
+ *
+ * 1. With sess->cmdsn_mutex held from iscsi_execute_ooo_cmdsns()
+ * or iscsi_check_received_cmdsn().
+ * 2. With no locks held directly from iscsi_handle_XXX_pdu() functions
+ * for immediate commands.
+ */
+int iscsit_execute_cmd(struct iscsi_cmd *cmd, int ooo)
+{
+ struct se_cmd *se_cmd = &cmd->se_cmd;
+ int lr = 0;
+
+ spin_lock_bh(&cmd->istate_lock);
+ if (ooo)
+ cmd->cmd_flags &= ~ICF_OOO_CMDSN;
+
+ switch (cmd->iscsi_opcode) {
+ case ISCSI_OP_SCSI_CMD:
+ /*
+ * Go ahead and send the CHECK_CONDITION status for
+ * any SCSI CDB exceptions that may have occurred, also
+ * handle the SCF_SCSI_RESERVATION_CONFLICT case here as well.
+ */
+ if (se_cmd->se_cmd_flags & SCF_SCSI_CDB_EXCEPTION) {
+ if (se_cmd->se_cmd_flags &
+ SCF_SCSI_RESERVATION_CONFLICT) {
+ cmd->i_state = ISTATE_SEND_STATUS;
+ spin_unlock_bh(&cmd->istate_lock);
+ iscsit_add_cmd_to_response_queue(cmd, cmd->conn,
+ cmd->i_state);
+ return 0;
+ }
+ spin_unlock_bh(&cmd->istate_lock);
+ /*
+ * Determine if delayed TASK_ABORTED status for WRITEs
+ * should be sent now if no unsolicited data out
+ * payloads are expected, or if the delayed status
+ * should be sent after unsolicited data out with
+ * ISCSI_FLAG_CMD_FINAL set in iscsi_handle_data_out()
+ */
+ if (transport_check_aborted_status(se_cmd,
+ (cmd->unsolicited_data == 0)) != 0)
+ return 0;
+ /*
+ * Otherwise send CHECK_CONDITION and sense for
+ * exception
+ */
+ return transport_send_check_condition_and_sense(se_cmd,
+ se_cmd->scsi_sense_reason, 0);
+ }
+ /*
+ * Special case for delayed CmdSN with Immediate
+ * Data and/or Unsolicited Data Out attached.
+ */
+ if (cmd->immediate_data) {
+ if (cmd->cmd_flags & ICF_GOT_LAST_DATAOUT) {
+ spin_unlock_bh(&cmd->istate_lock);
+ return transport_generic_handle_data(
+ &cmd->se_cmd);
+ }
+ spin_unlock_bh(&cmd->istate_lock);
+
+ if (!(cmd->cmd_flags &
+ ICF_NON_IMMEDIATE_UNSOLICITED_DATA)) {
+ /*
+ * Send the delayed TASK_ABORTED status for
+ * WRITEs if no more unsolicitied data is
+ * expected.
+ */
+ if (transport_check_aborted_status(se_cmd, 1)
+ != 0)
+ return 0;
+
+ iscsit_set_dataout_sequence_values(cmd);
+ iscsit_build_r2ts_for_cmd(cmd, cmd->conn, 0);
+ }
+ return 0;
+ }
+ /*
+ * The default handler.
+ */
+ spin_unlock_bh(&cmd->istate_lock);
+
+ if ((cmd->data_direction == DMA_TO_DEVICE) &&
+ !(cmd->cmd_flags & ICF_NON_IMMEDIATE_UNSOLICITED_DATA)) {
+ /*
+ * Send the delayed TASK_ABORTED status for WRITEs if
+ * no more nsolicitied data is expected.
+ */
+ if (transport_check_aborted_status(se_cmd, 1) != 0)
+ return 0;
+
+ iscsit_set_dataout_sequence_values(cmd);
+ spin_lock_bh(&cmd->dataout_timeout_lock);
+ iscsit_start_dataout_timer(cmd, cmd->conn);
+ spin_unlock_bh(&cmd->dataout_timeout_lock);
+ }
+ return transport_handle_cdb_direct(&cmd->se_cmd);
+
+ case ISCSI_OP_NOOP_OUT:
+ case ISCSI_OP_TEXT:
+ spin_unlock_bh(&cmd->istate_lock);
+ iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state);
+ break;
+ case ISCSI_OP_SCSI_TMFUNC:
+ if (se_cmd->se_cmd_flags & SCF_SCSI_CDB_EXCEPTION) {
+ spin_unlock_bh(&cmd->istate_lock);
+ iscsit_add_cmd_to_response_queue(cmd, cmd->conn,
+ cmd->i_state);
+ return 0;
+ }
+ spin_unlock_bh(&cmd->istate_lock);
+
+ return transport_generic_handle_tmr(&cmd->se_cmd);
+ case ISCSI_OP_LOGOUT:
+ spin_unlock_bh(&cmd->istate_lock);
+ switch (cmd->logout_reason) {
+ case ISCSI_LOGOUT_REASON_CLOSE_SESSION:
+ lr = iscsit_logout_closesession(cmd, cmd->conn);
+ break;
+ case ISCSI_LOGOUT_REASON_CLOSE_CONNECTION:
+ lr = iscsit_logout_closeconnection(cmd, cmd->conn);
+ break;
+ case ISCSI_LOGOUT_REASON_RECOVERY:
+ lr = iscsit_logout_removeconnforrecovery(cmd, cmd->conn);
+ break;
+ default:
+ pr_err("Unknown iSCSI Logout Request Code:"
+ " 0x%02x\n", cmd->logout_reason);
+ return -1;
+ }
+
+ return lr;
+ default:
+ spin_unlock_bh(&cmd->istate_lock);
+ pr_err("Cannot perform out of order execution for"
+ " unknown iSCSI Opcode: 0x%02x\n", cmd->iscsi_opcode);
+ return -1;
+ }
+
+ return 0;
+}
+
+void iscsit_free_all_ooo_cmdsns(struct iscsi_session *sess)
+{
+ struct iscsi_ooo_cmdsn *ooo_cmdsn, *ooo_cmdsn_tmp;
+
+ mutex_lock(&sess->cmdsn_mutex);
+ list_for_each_entry_safe(ooo_cmdsn, ooo_cmdsn_tmp,
+ &sess->sess_ooo_cmdsn_list, ooo_list) {
+
+ list_del(&ooo_cmdsn->ooo_list);
+ kmem_cache_free(lio_ooo_cache, ooo_cmdsn);
+ }
+ mutex_unlock(&sess->cmdsn_mutex);
+}
+
+int iscsit_handle_ooo_cmdsn(
+ struct iscsi_session *sess,
+ struct iscsi_cmd *cmd,
+ u32 cmdsn)
+{
+ int batch = 0;
+ struct iscsi_ooo_cmdsn *ooo_cmdsn = NULL, *ooo_tail = NULL;
+
+ cmd->deferred_i_state = cmd->i_state;
+ cmd->i_state = ISTATE_DEFERRED_CMD;
+ cmd->cmd_flags |= ICF_OOO_CMDSN;
+
+ if (list_empty(&sess->sess_ooo_cmdsn_list))
+ batch = 1;
+ else {
+ ooo_tail = list_entry(sess->sess_ooo_cmdsn_list.prev,
+ typeof(*ooo_tail), ooo_list);
+ if (ooo_tail->cmdsn != (cmdsn - 1))
+ batch = 1;
+ }
+
+ ooo_cmdsn = iscsit_allocate_ooo_cmdsn();
+ if (!ooo_cmdsn)
+ return CMDSN_ERROR_CANNOT_RECOVER;
+
+ ooo_cmdsn->cmd = cmd;
+ ooo_cmdsn->batch_count = (batch) ?
+ (cmdsn - sess->exp_cmd_sn) : 1;
+ ooo_cmdsn->cid = cmd->conn->cid;
+ ooo_cmdsn->exp_cmdsn = sess->exp_cmd_sn;
+ ooo_cmdsn->cmdsn = cmdsn;
+
+ if (iscsit_attach_ooo_cmdsn(sess, ooo_cmdsn) < 0) {
+ kmem_cache_free(lio_ooo_cache, ooo_cmdsn);
+ return CMDSN_ERROR_CANNOT_RECOVER;
+ }
+
+ return CMDSN_HIGHER_THAN_EXP;
+}
+
+static int iscsit_set_dataout_timeout_values(
+ struct iscsi_cmd *cmd,
+ u32 *offset,
+ u32 *length)
+{
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_r2t *r2t;
+
+ if (cmd->unsolicited_data) {
+ *offset = 0;
+ *length = (conn->sess->sess_ops->FirstBurstLength >
+ cmd->data_length) ?
+ cmd->data_length :
+ conn->sess->sess_ops->FirstBurstLength;
+ return 0;
+ }
+
+ spin_lock_bh(&cmd->r2t_lock);
+ if (list_empty(&cmd->cmd_r2t_list)) {
+ pr_err("cmd->cmd_r2t_list is empty!\n");
+ spin_unlock_bh(&cmd->r2t_lock);
+ return -1;
+ }
+
+ list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) {
+ if (r2t->sent_r2t && !r2t->recovery_r2t && !r2t->seq_complete) {
+ *offset = r2t->offset;
+ *length = r2t->xfer_len;
+ spin_unlock_bh(&cmd->r2t_lock);
+ return 0;
+ }
+ }
+ spin_unlock_bh(&cmd->r2t_lock);
+
+ pr_err("Unable to locate any incomplete DataOUT"
+ " sequences for ITT: 0x%08x.\n", cmd->init_task_tag);
+
+ return -1;
+}
+
+/*
+ * NOTE: Called from interrupt (timer) context.
+ */
+static void iscsit_handle_dataout_timeout(unsigned long data)
+{
+ u32 pdu_length = 0, pdu_offset = 0;
+ u32 r2t_length = 0, r2t_offset = 0;
+ struct iscsi_cmd *cmd = (struct iscsi_cmd *) data;
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_session *sess = NULL;
+ struct iscsi_node_attrib *na;
+
+ iscsit_inc_conn_usage_count(conn);
+
+ spin_lock_bh(&cmd->dataout_timeout_lock);
+ if (cmd->dataout_timer_flags & ISCSI_TF_STOP) {
+ spin_unlock_bh(&cmd->dataout_timeout_lock);
+ iscsit_dec_conn_usage_count(conn);
+ return;
+ }
+ cmd->dataout_timer_flags &= ~ISCSI_TF_RUNNING;
+ sess = conn->sess;
+ na = iscsit_tpg_get_node_attrib(sess);
+
+ if (!sess->sess_ops->ErrorRecoveryLevel) {
+ pr_debug("Unable to recover from DataOut timeout while"
+ " in ERL=0.\n");
+ goto failure;
+ }
+
+ if (++cmd->dataout_timeout_retries == na->dataout_timeout_retries) {
+ pr_debug("Command ITT: 0x%08x exceeded max retries"
+ " for DataOUT timeout %u, closing iSCSI connection.\n",
+ cmd->init_task_tag, na->dataout_timeout_retries);
+ goto failure;
+ }
+
+ cmd->cmd_flags |= ICF_WITHIN_COMMAND_RECOVERY;
+
+ if (conn->sess->sess_ops->DataSequenceInOrder) {
+ if (conn->sess->sess_ops->DataPDUInOrder) {
+ pdu_offset = cmd->write_data_done;
+ if ((pdu_offset + (conn->sess->sess_ops->MaxBurstLength -
+ cmd->next_burst_len)) > cmd->data_length)
+ pdu_length = (cmd->data_length -
+ cmd->write_data_done);
+ else
+ pdu_length = (conn->sess->sess_ops->MaxBurstLength -
+ cmd->next_burst_len);
+ } else {
+ pdu_offset = cmd->seq_start_offset;
+ pdu_length = (cmd->seq_end_offset -
+ cmd->seq_start_offset);
+ }
+ } else {
+ if (iscsit_set_dataout_timeout_values(cmd, &pdu_offset,
+ &pdu_length) < 0)
+ goto failure;
+ }
+
+ if (iscsit_recalculate_dataout_values(cmd, pdu_offset, pdu_length,
+ &r2t_offset, &r2t_length) < 0)
+ goto failure;
+
+ pr_debug("Command ITT: 0x%08x timed out waiting for"
+ " completion of %sDataOUT Sequence Offset: %u, Length: %u\n",
+ cmd->init_task_tag, (cmd->unsolicited_data) ? "Unsolicited " :
+ "", r2t_offset, r2t_length);
+
+ if (iscsit_send_recovery_r2t(cmd, r2t_offset, r2t_length) < 0)
+ goto failure;
+
+ iscsit_start_dataout_timer(cmd, conn);
+ spin_unlock_bh(&cmd->dataout_timeout_lock);
+ iscsit_dec_conn_usage_count(conn);
+
+ return;
+
+failure:
+ spin_unlock_bh(&cmd->dataout_timeout_lock);
+ iscsit_cause_connection_reinstatement(conn, 0);
+ iscsit_dec_conn_usage_count(conn);
+}
+
+void iscsit_mod_dataout_timer(struct iscsi_cmd *cmd)
+{
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_session *sess = conn->sess;
+ struct iscsi_node_attrib *na = na = iscsit_tpg_get_node_attrib(sess);
+
+ spin_lock_bh(&cmd->dataout_timeout_lock);
+ if (!(cmd->dataout_timer_flags & ISCSI_TF_RUNNING)) {
+ spin_unlock_bh(&cmd->dataout_timeout_lock);
+ return;
+ }
+
+ mod_timer(&cmd->dataout_timer,
+ (get_jiffies_64() + na->dataout_timeout * HZ));
+ pr_debug("Updated DataOUT timer for ITT: 0x%08x",
+ cmd->init_task_tag);
+ spin_unlock_bh(&cmd->dataout_timeout_lock);
+}
+
+/*
+ * Called with cmd->dataout_timeout_lock held.
+ */
+void iscsit_start_dataout_timer(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn *conn)
+{
+ struct iscsi_session *sess = conn->sess;
+ struct iscsi_node_attrib *na = na = iscsit_tpg_get_node_attrib(sess);
+
+ if (cmd->dataout_timer_flags & ISCSI_TF_RUNNING)
+ return;
+
+ pr_debug("Starting DataOUT timer for ITT: 0x%08x on"
+ " CID: %hu.\n", cmd->init_task_tag, conn->cid);
+
+ init_timer(&cmd->dataout_timer);
+ cmd->dataout_timer.expires = (get_jiffies_64() + na->dataout_timeout * HZ);
+ cmd->dataout_timer.data = (unsigned long)cmd;
+ cmd->dataout_timer.function = iscsit_handle_dataout_timeout;
+ cmd->dataout_timer_flags &= ~ISCSI_TF_STOP;
+ cmd->dataout_timer_flags |= ISCSI_TF_RUNNING;
+ add_timer(&cmd->dataout_timer);
+}
+
+void iscsit_stop_dataout_timer(struct iscsi_cmd *cmd)
+{
+ spin_lock_bh(&cmd->dataout_timeout_lock);
+ if (!(cmd->dataout_timer_flags & ISCSI_TF_RUNNING)) {
+ spin_unlock_bh(&cmd->dataout_timeout_lock);
+ return;
+ }
+ cmd->dataout_timer_flags |= ISCSI_TF_STOP;
+ spin_unlock_bh(&cmd->dataout_timeout_lock);
+
+ del_timer_sync(&cmd->dataout_timer);
+
+ spin_lock_bh(&cmd->dataout_timeout_lock);
+ cmd->dataout_timer_flags &= ~ISCSI_TF_RUNNING;
+ pr_debug("Stopped DataOUT Timer for ITT: 0x%08x\n",
+ cmd->init_task_tag);
+ spin_unlock_bh(&cmd->dataout_timeout_lock);
+}
diff --git a/drivers/target/iscsi/iscsi_target_erl1.h b/drivers/target/iscsi/iscsi_target_erl1.h
new file mode 100644
index 0000000..85e67e2
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_erl1.h
@@ -0,0 +1,26 @@
+#ifndef ISCSI_TARGET_ERL1_H
+#define ISCSI_TARGET_ERL1_H
+
+extern int iscsit_dump_data_payload(struct iscsi_conn *, u32, int);
+extern int iscsit_create_recovery_datain_values_datasequenceinorder_yes(
+ struct iscsi_cmd *, struct iscsi_datain_req *);
+extern int iscsit_create_recovery_datain_values_datasequenceinorder_no(
+ struct iscsi_cmd *, struct iscsi_datain_req *);
+extern int iscsit_handle_recovery_datain_or_r2t(struct iscsi_conn *, unsigned char *,
+ u32, u32, u32, u32);
+extern int iscsit_handle_status_snack(struct iscsi_conn *, u32, u32,
+ u32, u32);
+extern int iscsit_handle_data_ack(struct iscsi_conn *, u32, u32, u32);
+extern int iscsit_dataout_datapduinorder_no_fbit(struct iscsi_cmd *, struct iscsi_pdu *);
+extern int iscsit_recover_dataout_sequence(struct iscsi_cmd *, u32, u32);
+extern void iscsit_clear_ooo_cmdsns_for_conn(struct iscsi_conn *);
+extern void iscsit_free_all_ooo_cmdsns(struct iscsi_session *);
+extern int iscsit_execute_ooo_cmdsns(struct iscsi_session *);
+extern int iscsit_execute_cmd(struct iscsi_cmd *, int);
+extern int iscsit_handle_ooo_cmdsn(struct iscsi_session *, struct iscsi_cmd *, u32);
+extern void iscsit_remove_ooo_cmdsn(struct iscsi_session *, struct iscsi_ooo_cmdsn *);
+extern void iscsit_mod_dataout_timer(struct iscsi_cmd *);
+extern void iscsit_start_dataout_timer(struct iscsi_cmd *, struct iscsi_conn *);
+extern void iscsit_stop_dataout_timer(struct iscsi_cmd *);
+
+#endif /* ISCSI_TARGET_ERL1_H */
diff --git a/drivers/target/iscsi/iscsi_target_erl2.c b/drivers/target/iscsi/iscsi_target_erl2.c
new file mode 100644
index 0000000..91a4d17
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_erl2.c
@@ -0,0 +1,474 @@
+/*******************************************************************************
+ * This file contains error recovery level two functions used by
+ * the iSCSI Target driver.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_datain_values.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target.h"
+
+/*
+ * FIXME: Does RData SNACK apply here as well?
+ */
+void iscsit_create_conn_recovery_datain_values(
+ struct iscsi_cmd *cmd,
+ u32 exp_data_sn)
+{
+ u32 data_sn = 0;
+ struct iscsi_conn *conn = cmd->conn;
+
+ cmd->next_burst_len = 0;
+ cmd->read_data_done = 0;
+
+ while (exp_data_sn > data_sn) {
+ if ((cmd->next_burst_len +
+ conn->conn_ops->MaxRecvDataSegmentLength) <
+ conn->sess->sess_ops->MaxBurstLength) {
+ cmd->read_data_done +=
+ conn->conn_ops->MaxRecvDataSegmentLength;
+ cmd->next_burst_len +=
+ conn->conn_ops->MaxRecvDataSegmentLength;
+ } else {
+ cmd->read_data_done +=
+ (conn->sess->sess_ops->MaxBurstLength -
+ cmd->next_burst_len);
+ cmd->next_burst_len = 0;
+ }
+ data_sn++;
+ }
+}
+
+void iscsit_create_conn_recovery_dataout_values(
+ struct iscsi_cmd *cmd)
+{
+ u32 write_data_done = 0;
+ struct iscsi_conn *conn = cmd->conn;
+
+ cmd->data_sn = 0;
+ cmd->next_burst_len = 0;
+
+ while (cmd->write_data_done > write_data_done) {
+ if ((write_data_done + conn->sess->sess_ops->MaxBurstLength) <=
+ cmd->write_data_done)
+ write_data_done += conn->sess->sess_ops->MaxBurstLength;
+ else
+ break;
+ }
+
+ cmd->write_data_done = write_data_done;
+}
+
+static int iscsit_attach_active_connection_recovery_entry(
+ struct iscsi_session *sess,
+ struct iscsi_conn_recovery *cr)
+{
+ spin_lock(&sess->cr_a_lock);
+ list_add_tail(&cr->cr_list, &sess->cr_active_list);
+ spin_unlock(&sess->cr_a_lock);
+
+ return 0;
+}
+
+static int iscsit_attach_inactive_connection_recovery_entry(
+ struct iscsi_session *sess,
+ struct iscsi_conn_recovery *cr)
+{
+ spin_lock(&sess->cr_i_lock);
+ list_add_tail(&cr->cr_list, &sess->cr_inactive_list);
+
+ sess->conn_recovery_count++;
+ pr_debug("Incremented connection recovery count to %u for"
+ " SID: %u\n", sess->conn_recovery_count, sess->sid);
+ spin_unlock(&sess->cr_i_lock);
+
+ return 0;
+}
+
+struct iscsi_conn_recovery *iscsit_get_inactive_connection_recovery_entry(
+ struct iscsi_session *sess,
+ u16 cid)
+{
+ struct iscsi_conn_recovery *cr;
+
+ spin_lock(&sess->cr_i_lock);
+ list_for_each_entry(cr, &sess->cr_inactive_list, cr_list) {
+ if (cr->cid == cid) {
+ spin_unlock(&sess->cr_i_lock);
+ return cr;
+ }
+ }
+ spin_unlock(&sess->cr_i_lock);
+
+ return NULL;
+}
+
+void iscsit_free_connection_recovery_entires(struct iscsi_session *sess)
+{
+ struct iscsi_cmd *cmd, *cmd_tmp;
+ struct iscsi_conn_recovery *cr, *cr_tmp;
+
+ spin_lock(&sess->cr_a_lock);
+ list_for_each_entry_safe(cr, cr_tmp, &sess->cr_active_list, cr_list) {
+ list_del(&cr->cr_list);
+ spin_unlock(&sess->cr_a_lock);
+
+ spin_lock(&cr->conn_recovery_cmd_lock);
+ list_for_each_entry_safe(cmd, cmd_tmp,
+ &cr->conn_recovery_cmd_list, i_list) {
+
+ list_del(&cmd->i_list);
+ cmd->conn = NULL;
+ spin_unlock(&cr->conn_recovery_cmd_lock);
+ if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) ||
+ !(cmd->se_cmd.transport_wait_for_tasks))
+ iscsit_release_cmd(cmd);
+ else
+ cmd->se_cmd.transport_wait_for_tasks(
+ &cmd->se_cmd, 1, 1);
+ spin_lock(&cr->conn_recovery_cmd_lock);
+ }
+ spin_unlock(&cr->conn_recovery_cmd_lock);
+ spin_lock(&sess->cr_a_lock);
+
+ kfree(cr);
+ }
+ spin_unlock(&sess->cr_a_lock);
+
+ spin_lock(&sess->cr_i_lock);
+ list_for_each_entry_safe(cr, cr_tmp, &sess->cr_inactive_list, cr_list) {
+ list_del(&cr->cr_list);
+ spin_unlock(&sess->cr_i_lock);
+
+ spin_lock(&cr->conn_recovery_cmd_lock);
+ list_for_each_entry_safe(cmd, cmd_tmp,
+ &cr->conn_recovery_cmd_list, i_list) {
+
+ list_del(&cmd->i_list);
+ cmd->conn = NULL;
+ spin_unlock(&cr->conn_recovery_cmd_lock);
+ if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) ||
+ !(cmd->se_cmd.transport_wait_for_tasks))
+ iscsit_release_cmd(cmd);
+ else
+ cmd->se_cmd.transport_wait_for_tasks(
+ &cmd->se_cmd, 1, 1);
+ spin_lock(&cr->conn_recovery_cmd_lock);
+ }
+ spin_unlock(&cr->conn_recovery_cmd_lock);
+ spin_lock(&sess->cr_i_lock);
+
+ kfree(cr);
+ }
+ spin_unlock(&sess->cr_i_lock);
+}
+
+int iscsit_remove_active_connection_recovery_entry(
+ struct iscsi_conn_recovery *cr,
+ struct iscsi_session *sess)
+{
+ spin_lock(&sess->cr_a_lock);
+ list_del(&cr->cr_list);
+
+ sess->conn_recovery_count--;
+ pr_debug("Decremented connection recovery count to %u for"
+ " SID: %u\n", sess->conn_recovery_count, sess->sid);
+ spin_unlock(&sess->cr_a_lock);
+
+ kfree(cr);
+
+ return 0;
+}
+
+int iscsit_remove_inactive_connection_recovery_entry(
+ struct iscsi_conn_recovery *cr,
+ struct iscsi_session *sess)
+{
+ spin_lock(&sess->cr_i_lock);
+ list_del(&cr->cr_list);
+ spin_unlock(&sess->cr_i_lock);
+
+ return 0;
+}
+
+/*
+ * Called with cr->conn_recovery_cmd_lock help.
+ */
+int iscsit_remove_cmd_from_connection_recovery(
+ struct iscsi_cmd *cmd,
+ struct iscsi_session *sess)
+{
+ struct iscsi_conn_recovery *cr;
+
+ if (!cmd->cr) {
+ pr_err("struct iscsi_conn_recovery pointer for ITT: 0x%08x"
+ " is NULL!\n", cmd->init_task_tag);
+ BUG();
+ }
+ cr = cmd->cr;
+
+ list_del(&cmd->i_list);
+ return --cr->cmd_count;
+}
+
+void iscsit_discard_cr_cmds_by_expstatsn(
+ struct iscsi_conn_recovery *cr,
+ u32 exp_statsn)
+{
+ u32 dropped_count = 0;
+ struct iscsi_cmd *cmd, *cmd_tmp;
+ struct iscsi_session *sess = cr->sess;
+
+ spin_lock(&cr->conn_recovery_cmd_lock);
+ list_for_each_entry_safe(cmd, cmd_tmp,
+ &cr->conn_recovery_cmd_list, i_list) {
+
+ if (((cmd->deferred_i_state != ISTATE_SENT_STATUS) &&
+ (cmd->deferred_i_state != ISTATE_REMOVE)) ||
+ (cmd->stat_sn >= exp_statsn)) {
+ continue;
+ }
+
+ dropped_count++;
+ pr_debug("Dropping Acknowledged ITT: 0x%08x, StatSN:"
+ " 0x%08x, CID: %hu.\n", cmd->init_task_tag,
+ cmd->stat_sn, cr->cid);
+
+ iscsit_remove_cmd_from_connection_recovery(cmd, sess);
+
+ spin_unlock(&cr->conn_recovery_cmd_lock);
+ if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) ||
+ !(cmd->se_cmd.transport_wait_for_tasks))
+ iscsit_release_cmd(cmd);
+ else
+ cmd->se_cmd.transport_wait_for_tasks(
+ &cmd->se_cmd, 1, 0);
+ spin_lock(&cr->conn_recovery_cmd_lock);
+ }
+ spin_unlock(&cr->conn_recovery_cmd_lock);
+
+ pr_debug("Dropped %u total acknowledged commands on"
+ " CID: %hu less than old ExpStatSN: 0x%08x\n",
+ dropped_count, cr->cid, exp_statsn);
+
+ if (!cr->cmd_count) {
+ pr_debug("No commands to be reassigned for failed"
+ " connection CID: %hu on SID: %u\n",
+ cr->cid, sess->sid);
+ iscsit_remove_inactive_connection_recovery_entry(cr, sess);
+ iscsit_attach_active_connection_recovery_entry(sess, cr);
+ pr_debug("iSCSI connection recovery successful for CID:"
+ " %hu on SID: %u\n", cr->cid, sess->sid);
+ iscsit_remove_active_connection_recovery_entry(cr, sess);
+ } else {
+ iscsit_remove_inactive_connection_recovery_entry(cr, sess);
+ iscsit_attach_active_connection_recovery_entry(sess, cr);
+ }
+}
+
+int iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(struct iscsi_conn *conn)
+{
+ u32 dropped_count = 0;
+ struct iscsi_cmd *cmd, *cmd_tmp;
+ struct iscsi_ooo_cmdsn *ooo_cmdsn, *ooo_cmdsn_tmp;
+ struct iscsi_session *sess = conn->sess;
+
+ mutex_lock(&sess->cmdsn_mutex);
+ list_for_each_entry_safe(ooo_cmdsn, ooo_cmdsn_tmp,
+ &sess->sess_ooo_cmdsn_list, ooo_list) {
+
+ if (ooo_cmdsn->cid != conn->cid)
+ continue;
+
+ dropped_count++;
+ pr_debug("Dropping unacknowledged CmdSN:"
+ " 0x%08x during connection recovery on CID: %hu\n",
+ ooo_cmdsn->cmdsn, conn->cid);
+ iscsit_remove_ooo_cmdsn(sess, ooo_cmdsn);
+ }
+ mutex_unlock(&sess->cmdsn_mutex);
+
+ spin_lock_bh(&conn->cmd_lock);
+ list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_list) {
+ if (!(cmd->cmd_flags & ICF_OOO_CMDSN))
+ continue;
+
+ list_del(&cmd->i_list);
+
+ spin_unlock_bh(&conn->cmd_lock);
+ if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) ||
+ !(cmd->se_cmd.transport_wait_for_tasks))
+ iscsit_release_cmd(cmd);
+ else
+ cmd->se_cmd.transport_wait_for_tasks(
+ &cmd->se_cmd, 1, 1);
+ spin_lock_bh(&conn->cmd_lock);
+ }
+ spin_unlock_bh(&conn->cmd_lock);
+
+ pr_debug("Dropped %u total unacknowledged commands on CID:"
+ " %hu for ExpCmdSN: 0x%08x.\n", dropped_count, conn->cid,
+ sess->exp_cmd_sn);
+ return 0;
+}
+
+int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn)
+{
+ u32 cmd_count = 0;
+ struct iscsi_cmd *cmd, *cmd_tmp;
+ struct iscsi_conn_recovery *cr;
+
+ /*
+ * Allocate an struct iscsi_conn_recovery for this connection.
+ * Each struct iscsi_cmd contains an struct iscsi_conn_recovery pointer
+ * (struct iscsi_cmd->cr) so we need to allocate this before preparing the
+ * connection's command list for connection recovery.
+ */
+ cr = kzalloc(sizeof(struct iscsi_conn_recovery), GFP_KERNEL);
+ if (!cr) {
+ pr_err("Unable to allocate memory for"
+ " struct iscsi_conn_recovery.\n");
+ return -1;
+ }
+ INIT_LIST_HEAD(&cr->cr_list);
+ INIT_LIST_HEAD(&cr->conn_recovery_cmd_list);
+ spin_lock_init(&cr->conn_recovery_cmd_lock);
+ /*
+ * Only perform connection recovery on ISCSI_OP_SCSI_CMD or
+ * ISCSI_OP_NOOP_OUT opcodes. For all other opcodes call
+ * list_del(&cmd->i_list); to release the command to the
+ * session pool and remove it from the connection's list.
+ *
+ * Also stop the DataOUT timer, which will be restarted after
+ * sending the TMR response.
+ */
+ spin_lock_bh(&conn->cmd_lock);
+ list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_list) {
+
+ if ((cmd->iscsi_opcode != ISCSI_OP_SCSI_CMD) &&
+ (cmd->iscsi_opcode != ISCSI_OP_NOOP_OUT)) {
+ pr_debug("Not performing realligence on"
+ " Opcode: 0x%02x, ITT: 0x%08x, CmdSN: 0x%08x,"
+ " CID: %hu\n", cmd->iscsi_opcode,
+ cmd->init_task_tag, cmd->cmd_sn, conn->cid);
+
+ list_del(&cmd->i_list);
+ spin_unlock_bh(&conn->cmd_lock);
+
+ if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) ||
+ !(cmd->se_cmd.transport_wait_for_tasks))
+ iscsit_release_cmd(cmd);
+ else
+ cmd->se_cmd.transport_wait_for_tasks(
+ &cmd->se_cmd, 1, 0);
+ spin_lock_bh(&conn->cmd_lock);
+ continue;
+ }
+
+ /*
+ * Special case where commands greater than or equal to
+ * the session's ExpCmdSN are attached to the connection
+ * list but not to the out of order CmdSN list. The one
+ * obvious case is when a command with immediate data
+ * attached must only check the CmdSN against ExpCmdSN
+ * after the data is received. The special case below
+ * is when the connection fails before data is received,
+ * but also may apply to other PDUs, so it has been
+ * made generic here.
+ */
+ if (!(cmd->cmd_flags & ICF_OOO_CMDSN) && !cmd->immediate_cmd &&
+ (cmd->cmd_sn >= conn->sess->exp_cmd_sn)) {
+ list_del(&cmd->i_list);
+ spin_unlock_bh(&conn->cmd_lock);
+
+ if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) ||
+ !(cmd->se_cmd.transport_wait_for_tasks))
+ iscsit_release_cmd(cmd);
+ else
+ cmd->se_cmd.transport_wait_for_tasks(
+ &cmd->se_cmd, 1, 1);
+ spin_lock_bh(&conn->cmd_lock);
+ continue;
+ }
+
+ cmd_count++;
+ pr_debug("Preparing Opcode: 0x%02x, ITT: 0x%08x,"
+ " CmdSN: 0x%08x, StatSN: 0x%08x, CID: %hu for"
+ " realligence.\n", cmd->iscsi_opcode,
+ cmd->init_task_tag, cmd->cmd_sn, cmd->stat_sn,
+ conn->cid);
+
+ cmd->deferred_i_state = cmd->i_state;
+ cmd->i_state = ISTATE_IN_CONNECTION_RECOVERY;
+
+ if (cmd->data_direction == DMA_TO_DEVICE)
+ iscsit_stop_dataout_timer(cmd);
+
+ cmd->sess = conn->sess;
+
+ list_del(&cmd->i_list);
+ spin_unlock_bh(&conn->cmd_lock);
+
+ iscsit_free_all_datain_reqs(cmd);
+
+ if ((cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) &&
+ cmd->se_cmd.transport_wait_for_tasks)
+ cmd->se_cmd.transport_wait_for_tasks(&cmd->se_cmd,
+ 0, 0);
+ /*
+ * Add the struct iscsi_cmd to the connection recovery cmd list
+ */
+ spin_lock(&cr->conn_recovery_cmd_lock);
+ list_add_tail(&cmd->i_list, &cr->conn_recovery_cmd_list);
+ spin_unlock(&cr->conn_recovery_cmd_lock);
+
+ spin_lock_bh(&conn->cmd_lock);
+ cmd->cr = cr;
+ cmd->conn = NULL;
+ }
+ spin_unlock_bh(&conn->cmd_lock);
+ /*
+ * Fill in the various values in the preallocated struct iscsi_conn_recovery.
+ */
+ cr->cid = conn->cid;
+ cr->cmd_count = cmd_count;
+ cr->maxrecvdatasegmentlength = conn->conn_ops->MaxRecvDataSegmentLength;
+ cr->sess = conn->sess;
+
+ iscsit_attach_inactive_connection_recovery_entry(conn->sess, cr);
+
+ return 0;
+}
+
+int iscsit_connection_recovery_transport_reset(struct iscsi_conn *conn)
+{
+ atomic_set(&conn->connection_recovery, 1);
+
+ if (iscsit_close_connection(conn) < 0)
+ return -1;
+
+ return 0;
+}
diff --git a/drivers/target/iscsi/iscsi_target_erl2.h b/drivers/target/iscsi/iscsi_target_erl2.h
new file mode 100644
index 0000000..22f8d24
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_erl2.h
@@ -0,0 +1,18 @@
+#ifndef ISCSI_TARGET_ERL2_H
+#define ISCSI_TARGET_ERL2_H
+
+extern void iscsit_create_conn_recovery_datain_values(struct iscsi_cmd *, u32);
+extern void iscsit_create_conn_recovery_dataout_values(struct iscsi_cmd *);
+extern struct iscsi_conn_recovery *iscsit_get_inactive_connection_recovery_entry(
+ struct iscsi_session *, u16);
+extern void iscsit_free_connection_recovery_entires(struct iscsi_session *);
+extern int iscsit_remove_active_connection_recovery_entry(
+ struct iscsi_conn_recovery *, struct iscsi_session *);
+extern int iscsit_remove_cmd_from_connection_recovery(struct iscsi_cmd *,
+ struct iscsi_session *);
+extern void iscsit_discard_cr_cmds_by_expstatsn(struct iscsi_conn_recovery *, u32);
+extern int iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(struct iscsi_conn *);
+extern int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *);
+extern int iscsit_connection_recovery_transport_reset(struct iscsi_conn *);
+
+#endif /*** ISCSI_TARGET_ERL2_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
new file mode 100644
index 0000000..bcaf82f
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -0,0 +1,1232 @@
+/*******************************************************************************
+ * This file contains the login functions used by the iSCSI Target driver.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/string.h>
+#include <linux/kthread.h>
+#include <linux/crypto.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_tq.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_nego.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target_login.h"
+#include "iscsi_target_stat.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+#include "iscsi_target_parameters.h"
+
+extern struct idr sess_idr;
+extern struct mutex auth_id_lock;
+extern spinlock_t sess_idr_lock;
+
+static int iscsi_login_init_conn(struct iscsi_conn *conn)
+{
+ INIT_LIST_HEAD(&conn->conn_list);
+ INIT_LIST_HEAD(&conn->conn_cmd_list);
+ INIT_LIST_HEAD(&conn->immed_queue_list);
+ INIT_LIST_HEAD(&conn->response_queue_list);
+ init_completion(&conn->conn_post_wait_comp);
+ init_completion(&conn->conn_wait_comp);
+ init_completion(&conn->conn_wait_rcfr_comp);
+ init_completion(&conn->conn_waiting_on_uc_comp);
+ init_completion(&conn->conn_logout_comp);
+ init_completion(&conn->rx_half_close_comp);
+ init_completion(&conn->tx_half_close_comp);
+ spin_lock_init(&conn->cmd_lock);
+ spin_lock_init(&conn->conn_usage_lock);
+ spin_lock_init(&conn->immed_queue_lock);
+ spin_lock_init(&conn->nopin_timer_lock);
+ spin_lock_init(&conn->response_queue_lock);
+ spin_lock_init(&conn->state_lock);
+
+ if (!zalloc_cpumask_var(&conn->conn_cpumask, GFP_KERNEL)) {
+ pr_err("Unable to allocate conn->conn_cpumask\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/*
+ * Used by iscsi_target_nego.c:iscsi_target_locate_portal() to setup
+ * per struct iscsi_conn libcrypto contexts for crc32c and crc32-intel
+ */
+int iscsi_login_setup_crypto(struct iscsi_conn *conn)
+{
+ /*
+ * Setup slicing by CRC32C algorithm for RX and TX libcrypto contexts
+ * which will default to crc32c_intel.ko for cpu_has_xmm4_2, or fallback
+ * to software 1x8 byte slicing from crc32c.ko
+ */
+ conn->conn_rx_hash.flags = 0;
+ conn->conn_rx_hash.tfm = crypto_alloc_hash("crc32c", 0,
+ CRYPTO_ALG_ASYNC);
+ if (IS_ERR(conn->conn_rx_hash.tfm)) {
+ pr_err("crypto_alloc_hash() failed for conn_rx_tfm\n");
+ return -ENOMEM;
+ }
+
+ conn->conn_tx_hash.flags = 0;
+ conn->conn_tx_hash.tfm = crypto_alloc_hash("crc32c", 0,
+ CRYPTO_ALG_ASYNC);
+ if (IS_ERR(conn->conn_tx_hash.tfm)) {
+ pr_err("crypto_alloc_hash() failed for conn_tx_tfm\n");
+ crypto_free_hash(conn->conn_rx_hash.tfm);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int iscsi_login_check_initiator_version(
+ struct iscsi_conn *conn,
+ u8 version_max,
+ u8 version_min)
+{
+ if ((version_max != 0x00) || (version_min != 0x00)) {
+ pr_err("Unsupported iSCSI IETF Pre-RFC Revision,"
+ " version Min/Max 0x%02x/0x%02x, rejecting login.\n",
+ version_min, version_max);
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_NO_VERSION);
+ return -1;
+ }
+
+ return 0;
+}
+
+int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn)
+{
+ int sessiontype;
+ struct iscsi_param *initiatorname_param = NULL, *sessiontype_param = NULL;
+ struct iscsi_portal_group *tpg = conn->tpg;
+ struct iscsi_session *sess = NULL, *sess_p = NULL;
+ struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+ struct se_session *se_sess, *se_sess_tmp;
+
+ initiatorname_param = iscsi_find_param_from_key(
+ INITIATORNAME, conn->param_list);
+ if (!initiatorname_param)
+ return -1;
+
+ sessiontype_param = iscsi_find_param_from_key(
+ SESSIONTYPE, conn->param_list);
+ if (!sessiontype_param)
+ return -1;
+
+ sessiontype = (strncmp(sessiontype_param->value, NORMAL, 6)) ? 1 : 0;
+
+ spin_lock_bh(&se_tpg->session_lock);
+ list_for_each_entry_safe(se_sess, se_sess_tmp, &se_tpg->tpg_sess_list,
+ sess_list) {
+
+ sess_p = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+ spin_lock(&sess_p->conn_lock);
+ if (atomic_read(&sess_p->session_fall_back_to_erl0) ||
+ atomic_read(&sess_p->session_logout) ||
+ (sess_p->time2retain_timer_flags & ISCSI_TF_EXPIRED)) {
+ spin_unlock(&sess_p->conn_lock);
+ continue;
+ }
+ if (!memcmp((void *)sess_p->isid, (void *)conn->sess->isid, 6) &&
+ (!strcmp((void *)sess_p->sess_ops->InitiatorName,
+ (void *)initiatorname_param->value) &&
+ (sess_p->sess_ops->SessionType == sessiontype))) {
+ atomic_set(&sess_p->session_reinstatement, 1);
+ spin_unlock(&sess_p->conn_lock);
+ iscsit_inc_session_usage_count(sess_p);
+ iscsit_stop_time2retain_timer(sess_p);
+ sess = sess_p;
+ break;
+ }
+ spin_unlock(&sess_p->conn_lock);
+ }
+ spin_unlock_bh(&se_tpg->session_lock);
+ /*
+ * If the Time2Retain handler has expired, the session is already gone.
+ */
+ if (!sess)
+ return 0;
+
+ pr_debug("%s iSCSI Session SID %u is still active for %s,"
+ " preforming session reinstatement.\n", (sessiontype) ?
+ "Discovery" : "Normal", sess->sid,
+ sess->sess_ops->InitiatorName);
+
+ spin_lock_bh(&sess->conn_lock);
+ if (sess->session_state == TARG_SESS_STATE_FAILED) {
+ spin_unlock_bh(&sess->conn_lock);
+ iscsit_dec_session_usage_count(sess);
+ return iscsit_close_session(sess);
+ }
+ spin_unlock_bh(&sess->conn_lock);
+
+ iscsit_stop_session(sess, 1, 1);
+ iscsit_dec_session_usage_count(sess);
+
+ return iscsit_close_session(sess);
+}
+
+static void iscsi_login_set_conn_values(
+ struct iscsi_session *sess,
+ struct iscsi_conn *conn,
+ u16 cid)
+{
+ conn->sess = sess;
+ conn->cid = cid;
+ /*
+ * Generate a random Status sequence number (statsn) for the new
+ * iSCSI connection.
+ */
+ get_random_bytes(&conn->stat_sn, sizeof(u32));
+
+ mutex_lock(&auth_id_lock);
+ conn->auth_id = iscsit_global->auth_id++;
+ mutex_unlock(&auth_id_lock);
+}
+
+/*
+ * This is the leading connection of a new session,
+ * or session reinstatement.
+ */
+static int iscsi_login_zero_tsih_s1(
+ struct iscsi_conn *conn,
+ unsigned char *buf)
+{
+ struct iscsi_session *sess = NULL;
+ struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf;
+
+ sess = kzalloc(sizeof(struct iscsi_session), GFP_KERNEL);
+ if (!sess) {
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_NO_RESOURCES);
+ pr_err("Could not allocate memory for session\n");
+ return -1;
+ }
+
+ iscsi_login_set_conn_values(sess, conn, pdu->cid);
+ sess->init_task_tag = pdu->itt;
+ memcpy((void *)&sess->isid, (void *)pdu->isid, 6);
+ sess->exp_cmd_sn = pdu->cmdsn;
+ INIT_LIST_HEAD(&sess->sess_conn_list);
+ INIT_LIST_HEAD(&sess->sess_ooo_cmdsn_list);
+ INIT_LIST_HEAD(&sess->cr_active_list);
+ INIT_LIST_HEAD(&sess->cr_inactive_list);
+ init_completion(&sess->async_msg_comp);
+ init_completion(&sess->reinstatement_comp);
+ init_completion(&sess->session_wait_comp);
+ init_completion(&sess->session_waiting_on_uc_comp);
+ mutex_init(&sess->cmdsn_mutex);
+ spin_lock_init(&sess->conn_lock);
+ spin_lock_init(&sess->cr_a_lock);
+ spin_lock_init(&sess->cr_i_lock);
+ spin_lock_init(&sess->session_usage_lock);
+ spin_lock_init(&sess->ttt_lock);
+
+ if (!idr_pre_get(&sess_idr, GFP_KERNEL)) {
+ pr_err("idr_pre_get() for sess_idr failed\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_NO_RESOURCES);
+ return -1;
+ }
+ spin_lock(&sess_idr_lock);
+ idr_get_new(&sess_idr, NULL, &sess->session_index);
+ spin_unlock(&sess_idr_lock);
+
+ sess->creation_time = get_jiffies_64();
+ spin_lock_init(&sess->session_stats_lock);
+ /*
+ * The FFP CmdSN window values will be allocated from the TPG's
+ * Initiator Node's ACL once the login has been successfully completed.
+ */
+ sess->max_cmd_sn = pdu->cmdsn;
+
+ sess->sess_ops = kzalloc(sizeof(struct iscsi_sess_ops), GFP_KERNEL);
+ if (!sess->sess_ops) {
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_NO_RESOURCES);
+ pr_err("Unable to allocate memory for"
+ " struct iscsi_sess_ops.\n");
+ return -1;
+ }
+
+ sess->se_sess = transport_init_session();
+ if (!sess->se_sess) {
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_NO_RESOURCES);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int iscsi_login_zero_tsih_s2(
+ struct iscsi_conn *conn)
+{
+ struct iscsi_node_attrib *na;
+ struct iscsi_session *sess = conn->sess;
+ unsigned char buf[32];
+
+ sess->tpg = conn->tpg;
+
+ /*
+ * Assign a new TPG Session Handle. Note this is protected with
+ * struct iscsi_portal_group->np_login_sem from iscsit_access_np().
+ */
+ sess->tsih = ++ISCSI_TPG_S(sess)->ntsih;
+ if (!sess->tsih)
+ sess->tsih = ++ISCSI_TPG_S(sess)->ntsih;
+
+ /*
+ * Create the default params from user defined values..
+ */
+ if (iscsi_copy_param_list(&conn->param_list,
+ ISCSI_TPG_C(conn)->param_list, 1) < 0) {
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_NO_RESOURCES);
+ return -1;
+ }
+
+ iscsi_set_keys_to_negotiate(0, conn->param_list);
+
+ if (sess->sess_ops->SessionType)
+ return iscsi_set_keys_irrelevant_for_discovery(
+ conn->param_list);
+
+ na = iscsit_tpg_get_node_attrib(sess);
+
+ /*
+ * Need to send TargetPortalGroupTag back in first login response
+ * on any iSCSI connection where the Initiator provides TargetName.
+ * See 5.3.1. Login Phase Start
+ *
+ * In our case, we have already located the struct iscsi_tiqn at this point.
+ */
+ memset(buf, 0, 32);
+ sprintf(buf, "TargetPortalGroupTag=%hu", ISCSI_TPG_S(sess)->tpgt);
+ if (iscsi_change_param_value(buf, conn->param_list, 0) < 0) {
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_NO_RESOURCES);
+ return -1;
+ }
+
+ /*
+ * Workaround for Initiators that have broken connection recovery logic.
+ *
+ * "We would really like to get rid of this." Linux-iSCSI.org team
+ */
+ memset(buf, 0, 32);
+ sprintf(buf, "ErrorRecoveryLevel=%d", na->default_erl);
+ if (iscsi_change_param_value(buf, conn->param_list, 0) < 0) {
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_NO_RESOURCES);
+ return -1;
+ }
+
+ if (iscsi_login_disable_FIM_keys(conn->param_list, conn) < 0)
+ return -1;
+
+ return 0;
+}
+
+/*
+ * Remove PSTATE_NEGOTIATE for the four FIM related keys.
+ * The Initiator node will be able to enable FIM by proposing them itself.
+ */
+int iscsi_login_disable_FIM_keys(
+ struct iscsi_param_list *param_list,
+ struct iscsi_conn *conn)
+{
+ struct iscsi_param *param;
+
+ param = iscsi_find_param_from_key("OFMarker", param_list);
+ if (!param) {
+ pr_err("iscsi_find_param_from_key() for"
+ " OFMarker failed\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_NO_RESOURCES);
+ return -1;
+ }
+ param->state &= ~PSTATE_NEGOTIATE;
+
+ param = iscsi_find_param_from_key("OFMarkInt", param_list);
+ if (!param) {
+ pr_err("iscsi_find_param_from_key() for"
+ " IFMarker failed\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_NO_RESOURCES);
+ return -1;
+ }
+ param->state &= ~PSTATE_NEGOTIATE;
+
+ param = iscsi_find_param_from_key("IFMarker", param_list);
+ if (!param) {
+ pr_err("iscsi_find_param_from_key() for"
+ " IFMarker failed\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_NO_RESOURCES);
+ return -1;
+ }
+ param->state &= ~PSTATE_NEGOTIATE;
+
+ param = iscsi_find_param_from_key("IFMarkInt", param_list);
+ if (!param) {
+ pr_err("iscsi_find_param_from_key() for"
+ " IFMarker failed\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_NO_RESOURCES);
+ return -1;
+ }
+ param->state &= ~PSTATE_NEGOTIATE;
+
+ return 0;
+}
+
+static int iscsi_login_non_zero_tsih_s1(
+ struct iscsi_conn *conn,
+ unsigned char *buf)
+{
+ struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf;
+
+ iscsi_login_set_conn_values(NULL, conn, pdu->cid);
+ return 0;
+}
+
+/*
+ * Add a new connection to an existing session.
+ */
+static int iscsi_login_non_zero_tsih_s2(
+ struct iscsi_conn *conn,
+ unsigned char *buf)
+{
+ struct iscsi_portal_group *tpg = conn->tpg;
+ struct iscsi_session *sess = NULL, *sess_p = NULL;
+ struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+ struct se_session *se_sess, *se_sess_tmp;
+ struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf;
+
+ spin_lock_bh(&se_tpg->session_lock);
+ list_for_each_entry_safe(se_sess, se_sess_tmp, &se_tpg->tpg_sess_list,
+ sess_list) {
+
+ sess_p = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+ if (atomic_read(&sess_p->session_fall_back_to_erl0) ||
+ atomic_read(&sess_p->session_logout) ||
+ (sess_p->time2retain_timer_flags & ISCSI_TF_EXPIRED))
+ continue;
+ if (!memcmp((const void *)sess_p->isid,
+ (const void *)pdu->isid, 6) &&
+ (sess_p->tsih == pdu->tsih)) {
+ iscsit_inc_session_usage_count(sess_p);
+ iscsit_stop_time2retain_timer(sess_p);
+ sess = sess_p;
+ break;
+ }
+ }
+ spin_unlock_bh(&se_tpg->session_lock);
+
+ /*
+ * If the Time2Retain handler has expired, the session is already gone.
+ */
+ if (!sess) {
+ pr_err("Initiator attempting to add a connection to"
+ " a non-existent session, rejecting iSCSI Login.\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_NO_SESSION);
+ return -1;
+ }
+
+ /*
+ * Stop the Time2Retain timer if this is a failed session, we restart
+ * the timer if the login is not successful.
+ */
+ spin_lock_bh(&sess->conn_lock);
+ if (sess->session_state == TARG_SESS_STATE_FAILED)
+ atomic_set(&sess->session_continuation, 1);
+ spin_unlock_bh(&sess->conn_lock);
+
+ iscsi_login_set_conn_values(sess, conn, pdu->cid);
+
+ if (iscsi_copy_param_list(&conn->param_list,
+ ISCSI_TPG_C(conn)->param_list, 0) < 0) {
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_NO_RESOURCES);
+ return -1;
+ }
+
+ iscsi_set_keys_to_negotiate(0, conn->param_list);
+ /*
+ * Need to send TargetPortalGroupTag back in first login response
+ * on any iSCSI connection where the Initiator provides TargetName.
+ * See 5.3.1. Login Phase Start
+ *
+ * In our case, we have already located the struct iscsi_tiqn at this point.
+ */
+ memset(buf, 0, 32);
+ sprintf(buf, "TargetPortalGroupTag=%hu", ISCSI_TPG_S(sess)->tpgt);
+ if (iscsi_change_param_value(buf, conn->param_list, 0) < 0) {
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_NO_RESOURCES);
+ return -1;
+ }
+
+ return iscsi_login_disable_FIM_keys(conn->param_list, conn);
+}
+
+int iscsi_login_post_auth_non_zero_tsih(
+ struct iscsi_conn *conn,
+ u16 cid,
+ u32 exp_statsn)
+{
+ struct iscsi_conn *conn_ptr = NULL;
+ struct iscsi_conn_recovery *cr = NULL;
+ struct iscsi_session *sess = conn->sess;
+
+ /*
+ * By following item 5 in the login table, if we have found
+ * an existing ISID and a valid/existing TSIH and an existing
+ * CID we do connection reinstatement. Currently we dont not
+ * support it so we send back an non-zero status class to the
+ * initiator and release the new connection.
+ */
+ conn_ptr = iscsit_get_conn_from_cid_rcfr(sess, cid);
+ if ((conn_ptr)) {
+ pr_err("Connection exists with CID %hu for %s,"
+ " performing connection reinstatement.\n",
+ conn_ptr->cid, sess->sess_ops->InitiatorName);
+
+ iscsit_connection_reinstatement_rcfr(conn_ptr);
+ iscsit_dec_conn_usage_count(conn_ptr);
+ }
+
+ /*
+ * Check for any connection recovery entires containing CID.
+ * We use the original ExpStatSN sent in the first login request
+ * to acknowledge commands for the failed connection.
+ *
+ * Also note that an explict logout may have already been sent,
+ * but the response may not be sent due to additional connection
+ * loss.
+ */
+ if (sess->sess_ops->ErrorRecoveryLevel == 2) {
+ cr = iscsit_get_inactive_connection_recovery_entry(
+ sess, cid);
+ if ((cr)) {
+ pr_debug("Performing implicit logout"
+ " for connection recovery on CID: %hu\n",
+ conn->cid);
+ iscsit_discard_cr_cmds_by_expstatsn(cr, exp_statsn);
+ }
+ }
+
+ /*
+ * Else we follow item 4 from the login table in that we have
+ * found an existing ISID and a valid/existing TSIH and a new
+ * CID we go ahead and continue to add a new connection to the
+ * session.
+ */
+ pr_debug("Adding CID %hu to existing session for %s.\n",
+ cid, sess->sess_ops->InitiatorName);
+
+ if ((atomic_read(&sess->nconn) + 1) > sess->sess_ops->MaxConnections) {
+ pr_err("Adding additional connection to this session"
+ " would exceed MaxConnections %d, login failed.\n",
+ sess->sess_ops->MaxConnections);
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_ISID_ERROR);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void iscsi_post_login_start_timers(struct iscsi_conn *conn)
+{
+ struct iscsi_session *sess = conn->sess;
+
+ if (!sess->sess_ops->SessionType)
+ iscsit_start_nopin_timer(conn);
+}
+
+static int iscsi_post_login_handler(
+ struct iscsi_np *np,
+ struct iscsi_conn *conn,
+ u8 zero_tsih)
+{
+ int stop_timer = 0;
+ struct iscsi_session *sess = conn->sess;
+ struct se_session *se_sess = sess->se_sess;
+ struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess);
+ struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
+ struct iscsi_thread_set *ts;
+
+ iscsit_inc_conn_usage_count(conn);
+
+ iscsit_collect_login_stats(conn, ISCSI_STATUS_CLS_SUCCESS,
+ ISCSI_LOGIN_STATUS_ACCEPT);
+
+ pr_debug("Moving to TARG_CONN_STATE_LOGGED_IN.\n");
+ conn->conn_state = TARG_CONN_STATE_LOGGED_IN;
+
+ iscsi_set_connection_parameters(conn->conn_ops, conn->param_list);
+ iscsit_set_sync_and_steering_values(conn);
+ /*
+ * SCSI Initiator -> SCSI Target Port Mapping
+ */
+ ts = iscsi_get_thread_set();
+ if (!zero_tsih) {
+ iscsi_set_session_parameters(sess->sess_ops,
+ conn->param_list, 0);
+ iscsi_release_param_list(conn->param_list);
+ conn->param_list = NULL;
+
+ spin_lock_bh(&sess->conn_lock);
+ atomic_set(&sess->session_continuation, 0);
+ if (sess->session_state == TARG_SESS_STATE_FAILED) {
+ pr_debug("Moving to"
+ " TARG_SESS_STATE_LOGGED_IN.\n");
+ sess->session_state = TARG_SESS_STATE_LOGGED_IN;
+ stop_timer = 1;
+ }
+
+ pr_debug("iSCSI Login successful on CID: %hu from %s to"
+ " %s:%hu,%hu\n", conn->cid, conn->login_ip, np->np_ip,
+ np->np_port, tpg->tpgt);
+
+ list_add_tail(&conn->conn_list, &sess->sess_conn_list);
+ atomic_inc(&sess->nconn);
+ pr_debug("Incremented iSCSI Connection count to %hu"
+ " from node: %s\n", atomic_read(&sess->nconn),
+ sess->sess_ops->InitiatorName);
+ spin_unlock_bh(&sess->conn_lock);
+
+ iscsi_post_login_start_timers(conn);
+ iscsi_activate_thread_set(conn, ts);
+ /*
+ * Determine CPU mask to ensure connection's RX and TX kthreads
+ * are scheduled on the same CPU.
+ */
+ iscsit_thread_get_cpumask(conn);
+ conn->conn_rx_reset_cpumask = 1;
+ conn->conn_tx_reset_cpumask = 1;
+
+ iscsit_dec_conn_usage_count(conn);
+ if (stop_timer) {
+ spin_lock_bh(&se_tpg->session_lock);
+ iscsit_stop_time2retain_timer(sess);
+ spin_unlock_bh(&se_tpg->session_lock);
+ }
+ iscsit_dec_session_usage_count(sess);
+ return 0;
+ }
+
+ iscsi_set_session_parameters(sess->sess_ops, conn->param_list, 1);
+ iscsi_release_param_list(conn->param_list);
+ conn->param_list = NULL;
+
+ iscsit_determine_maxcmdsn(sess);
+
+ spin_lock_bh(&se_tpg->session_lock);
+ __transport_register_session(&sess->tpg->tpg_se_tpg,
+ se_sess->se_node_acl, se_sess, (void *)sess);
+ pr_debug("Moving to TARG_SESS_STATE_LOGGED_IN.\n");
+ sess->session_state = TARG_SESS_STATE_LOGGED_IN;
+
+ pr_debug("iSCSI Login successful on CID: %hu from %s to %s:%hu,%hu\n",
+ conn->cid, conn->login_ip, np->np_ip, np->np_port, tpg->tpgt);
+
+ spin_lock_bh(&sess->conn_lock);
+ list_add_tail(&conn->conn_list, &sess->sess_conn_list);
+ atomic_inc(&sess->nconn);
+ pr_debug("Incremented iSCSI Connection count to %hu from node:"
+ " %s\n", atomic_read(&sess->nconn),
+ sess->sess_ops->InitiatorName);
+ spin_unlock_bh(&sess->conn_lock);
+
+ sess->sid = tpg->sid++;
+ if (!sess->sid)
+ sess->sid = tpg->sid++;
+ pr_debug("Established iSCSI session from node: %s\n",
+ sess->sess_ops->InitiatorName);
+
+ tpg->nsessions++;
+ if (tpg->tpg_tiqn)
+ tpg->tpg_tiqn->tiqn_nsessions++;
+
+ pr_debug("Incremented number of active iSCSI sessions to %u on"
+ " iSCSI Target Portal Group: %hu\n", tpg->nsessions, tpg->tpgt);
+ spin_unlock_bh(&se_tpg->session_lock);
+
+ iscsi_post_login_start_timers(conn);
+ iscsi_activate_thread_set(conn, ts);
+ /*
+ * Determine CPU mask to ensure connection's RX and TX kthreads
+ * are scheduled on the same CPU.
+ */
+ iscsit_thread_get_cpumask(conn);
+ conn->conn_rx_reset_cpumask = 1;
+ conn->conn_tx_reset_cpumask = 1;
+
+ iscsit_dec_conn_usage_count(conn);
+
+ return 0;
+}
+
+static void iscsi_handle_login_thread_timeout(unsigned long data)
+{
+ struct iscsi_np *np = (struct iscsi_np *) data;
+
+ spin_lock_bh(&np->np_thread_lock);
+ pr_err("iSCSI Login timeout on Network Portal %s:%hu\n",
+ np->np_ip, np->np_port);
+
+ if (np->np_login_timer_flags & ISCSI_TF_STOP) {
+ spin_unlock_bh(&np->np_thread_lock);
+ return;
+ }
+
+ if (np->np_thread)
+ send_sig(SIGINT, np->np_thread, 1);
+
+ np->np_login_timer_flags &= ~ISCSI_TF_RUNNING;
+ spin_unlock_bh(&np->np_thread_lock);
+}
+
+static void iscsi_start_login_thread_timer(struct iscsi_np *np)
+{
+ /*
+ * This used the TA_LOGIN_TIMEOUT constant because at this
+ * point we do not have access to ISCSI_TPG_ATTRIB(tpg)->login_timeout
+ */
+ spin_lock_bh(&np->np_thread_lock);
+ init_timer(&np->np_login_timer);
+ np->np_login_timer.expires = (get_jiffies_64() + TA_LOGIN_TIMEOUT * HZ);
+ np->np_login_timer.data = (unsigned long)np;
+ np->np_login_timer.function = iscsi_handle_login_thread_timeout;
+ np->np_login_timer_flags &= ~ISCSI_TF_STOP;
+ np->np_login_timer_flags |= ISCSI_TF_RUNNING;
+ add_timer(&np->np_login_timer);
+
+ pr_debug("Added timeout timer to iSCSI login request for"
+ " %u seconds.\n", TA_LOGIN_TIMEOUT);
+ spin_unlock_bh(&np->np_thread_lock);
+}
+
+static void iscsi_stop_login_thread_timer(struct iscsi_np *np)
+{
+ spin_lock_bh(&np->np_thread_lock);
+ if (!(np->np_login_timer_flags & ISCSI_TF_RUNNING)) {
+ spin_unlock_bh(&np->np_thread_lock);
+ return;
+ }
+ np->np_login_timer_flags |= ISCSI_TF_STOP;
+ spin_unlock_bh(&np->np_thread_lock);
+
+ del_timer_sync(&np->np_login_timer);
+
+ spin_lock_bh(&np->np_thread_lock);
+ np->np_login_timer_flags &= ~ISCSI_TF_RUNNING;
+ spin_unlock_bh(&np->np_thread_lock);
+}
+
+int iscsi_target_setup_login_socket(
+ struct iscsi_np *np,
+ struct __kernel_sockaddr_storage *sockaddr)
+{
+ struct socket *sock;
+ int backlog = 5, ret, opt = 0, len;
+
+ switch (np->np_network_transport) {
+ case ISCSI_TCP:
+ np->np_ip_proto = IPPROTO_TCP;
+ np->np_sock_type = SOCK_STREAM;
+ break;
+ case ISCSI_SCTP_TCP:
+ np->np_ip_proto = IPPROTO_SCTP;
+ np->np_sock_type = SOCK_STREAM;
+ break;
+ case ISCSI_SCTP_UDP:
+ np->np_ip_proto = IPPROTO_SCTP;
+ np->np_sock_type = SOCK_SEQPACKET;
+ break;
+ case ISCSI_IWARP_TCP:
+ case ISCSI_IWARP_SCTP:
+ case ISCSI_INFINIBAND:
+ default:
+ pr_err("Unsupported network_transport: %d\n",
+ np->np_network_transport);
+ return -EINVAL;
+ }
+
+ ret = sock_create(sockaddr->ss_family, np->np_sock_type,
+ np->np_ip_proto, &sock);
+ if (ret < 0) {
+ pr_err("sock_create() failed.\n");
+ return ret;
+ }
+ np->np_socket = sock;
+ /*
+ * The SCTP stack needs struct socket->file.
+ */
+ if ((np->np_network_transport == ISCSI_SCTP_TCP) ||
+ (np->np_network_transport == ISCSI_SCTP_UDP)) {
+ if (!sock->file) {
+ sock->file = kzalloc(sizeof(struct file), GFP_KERNEL);
+ if (!sock->file) {
+ pr_err("Unable to allocate struct"
+ " file for SCTP\n");
+ ret = -ENOMEM;
+ goto fail;
+ }
+ np->np_flags |= NPF_SCTP_STRUCT_FILE;
+ }
+ }
+ /*
+ * Setup the np->np_sockaddr from the passed sockaddr setup
+ * in iscsi_target_configfs.c code..
+ */
+ memcpy((void *)&np->np_sockaddr, (void *)sockaddr,
+ sizeof(struct __kernel_sockaddr_storage));
+
+ if (sockaddr->ss_family == AF_INET6)
+ len = sizeof(struct sockaddr_in6);
+ else
+ len = sizeof(struct sockaddr_in);
+ /*
+ * Set SO_REUSEADDR, and disable Nagel Algorithm with TCP_NODELAY.
+ */
+ opt = 1;
+ if (np->np_network_transport == ISCSI_TCP) {
+ ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY,
+ (char *)&opt, sizeof(opt));
+ if (ret < 0) {
+ pr_err("kernel_setsockopt() for TCP_NODELAY"
+ " failed: %d\n", ret);
+ goto fail;
+ }
+ }
+
+ ret = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
+ (char *)&opt, sizeof(opt));
+ if (ret < 0) {
+ pr_err("kernel_setsockopt() for SO_REUSEADDR"
+ " failed\n");
+ goto fail;
+ }
+
+ ret = kernel_bind(sock, (struct sockaddr *)&np->np_sockaddr, len);
+ if (ret < 0) {
+ pr_err("kernel_bind() failed: %d\n", ret);
+ goto fail;
+ }
+
+ ret = kernel_listen(sock, backlog);
+ if (ret != 0) {
+ pr_err("kernel_listen() failed: %d\n", ret);
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ np->np_socket = NULL;
+ if (sock) {
+ if (np->np_flags & NPF_SCTP_STRUCT_FILE) {
+ kfree(sock->file);
+ sock->file = NULL;
+ }
+
+ sock_release(sock);
+ }
+ return ret;
+}
+
+static int __iscsi_target_login_thread(struct iscsi_np *np)
+{
+ u8 buffer[ISCSI_HDR_LEN], iscsi_opcode, zero_tsih = 0;
+ int err, ret = 0, ip_proto, sock_type, set_sctp_conn_flag, stop;
+ struct iscsi_conn *conn = NULL;
+ struct iscsi_login *login;
+ struct iscsi_portal_group *tpg = NULL;
+ struct socket *new_sock, *sock;
+ struct kvec iov;
+ struct iscsi_login_req *pdu;
+ struct sockaddr_in sock_in;
+ struct sockaddr_in6 sock_in6;
+
+ flush_signals(current);
+ set_sctp_conn_flag = 0;
+ sock = np->np_socket;
+ ip_proto = np->np_ip_proto;
+ sock_type = np->np_sock_type;
+
+ spin_lock_bh(&np->np_thread_lock);
+ if (np->np_thread_state == ISCSI_NP_THREAD_RESET) {
+ np->np_thread_state = ISCSI_NP_THREAD_ACTIVE;
+ complete(&np->np_restart_comp);
+ } else {
+ np->np_thread_state = ISCSI_NP_THREAD_ACTIVE;
+ }
+ spin_unlock_bh(&np->np_thread_lock);
+
+ if (kernel_accept(sock, &new_sock, 0) < 0) {
+ spin_lock_bh(&np->np_thread_lock);
+ if (np->np_thread_state == ISCSI_NP_THREAD_RESET) {
+ spin_unlock_bh(&np->np_thread_lock);
+ complete(&np->np_restart_comp);
+ /* Get another socket */
+ return 1;
+ }
+ spin_unlock_bh(&np->np_thread_lock);
+ goto out;
+ }
+ /*
+ * The SCTP stack needs struct socket->file.
+ */
+ if ((np->np_network_transport == ISCSI_SCTP_TCP) ||
+ (np->np_network_transport == ISCSI_SCTP_UDP)) {
+ if (!new_sock->file) {
+ new_sock->file = kzalloc(
+ sizeof(struct file), GFP_KERNEL);
+ if (!new_sock->file) {
+ pr_err("Unable to allocate struct"
+ " file for SCTP\n");
+ sock_release(new_sock);
+ /* Get another socket */
+ return 1;
+ }
+ set_sctp_conn_flag = 1;
+ }
+ }
+
+ iscsi_start_login_thread_timer(np);
+
+ conn = kzalloc(sizeof(struct iscsi_conn), GFP_KERNEL);
+ if (!conn) {
+ pr_err("Could not allocate memory for"
+ " new connection\n");
+ if (set_sctp_conn_flag) {
+ kfree(new_sock->file);
+ new_sock->file = NULL;
+ }
+ sock_release(new_sock);
+ /* Get another socket */
+ return 1;
+ }
+
+ pr_debug("Moving to TARG_CONN_STATE_FREE.\n");
+ conn->conn_state = TARG_CONN_STATE_FREE;
+ conn->sock = new_sock;
+
+ if (set_sctp_conn_flag)
+ conn->conn_flags |= CONNFLAG_SCTP_STRUCT_FILE;
+
+ pr_debug("Moving to TARG_CONN_STATE_XPT_UP.\n");
+ conn->conn_state = TARG_CONN_STATE_XPT_UP;
+
+ /*
+ * Allocate conn->conn_ops early as a failure calling
+ * iscsit_tx_login_rsp() below will call tx_data().
+ */
+ conn->conn_ops = kzalloc(sizeof(struct iscsi_conn_ops), GFP_KERNEL);
+ if (!conn->conn_ops) {
+ pr_err("Unable to allocate memory for"
+ " struct iscsi_conn_ops.\n");
+ goto new_sess_out;
+ }
+ /*
+ * Perform the remaining iSCSI connection initialization items..
+ */
+ if (iscsi_login_init_conn(conn) < 0)
+ goto new_sess_out;
+
+ memset(buffer, 0, ISCSI_HDR_LEN);
+ memset(&iov, 0, sizeof(struct kvec));
+ iov.iov_base = buffer;
+ iov.iov_len = ISCSI_HDR_LEN;
+
+ if (rx_data(conn, &iov, 1, ISCSI_HDR_LEN) <= 0) {
+ pr_err("rx_data() returned an error.\n");
+ goto new_sess_out;
+ }
+
+ iscsi_opcode = (buffer[0] & ISCSI_OPCODE_MASK);
+ if (!(iscsi_opcode & ISCSI_OP_LOGIN)) {
+ pr_err("First opcode is not login request,"
+ " failing login request.\n");
+ goto new_sess_out;
+ }
+
+ pdu = (struct iscsi_login_req *) buffer;
+ pdu->cid = be16_to_cpu(pdu->cid);
+ pdu->tsih = be16_to_cpu(pdu->tsih);
+ pdu->itt = be32_to_cpu(pdu->itt);
+ pdu->cmdsn = be32_to_cpu(pdu->cmdsn);
+ pdu->exp_statsn = be32_to_cpu(pdu->exp_statsn);
+ /*
+ * Used by iscsit_tx_login_rsp() for Login Resonses PDUs
+ * when Status-Class != 0.
+ */
+ conn->login_itt = pdu->itt;
+
+ spin_lock_bh(&np->np_thread_lock);
+ if (np->np_thread_state != ISCSI_NP_THREAD_ACTIVE) {
+ spin_unlock_bh(&np->np_thread_lock);
+ pr_err("iSCSI Network Portal on %s:%hu currently not"
+ " active.\n", np->np_ip, np->np_port);
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE);
+ goto new_sess_out;
+ }
+ spin_unlock_bh(&np->np_thread_lock);
+
+ if (np->np_sockaddr.ss_family == AF_INET6) {
+ memset(&sock_in6, 0, sizeof(struct sockaddr_in6));
+
+ if (conn->sock->ops->getname(conn->sock,
+ (struct sockaddr *)&sock_in6, &err, 1) < 0) {
+ pr_err("sock_ops->getname() failed.\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_TARGET_ERROR);
+ goto new_sess_out;
+ }
+#if 0
+ if (!iscsi_ntop6((const unsigned char *)
+ &sock_in6.sin6_addr.in6_u,
+ (char *)&conn->ipv6_login_ip[0],
+ IPV6_ADDRESS_SPACE)) {
+ pr_err("iscsi_ntop6() failed\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_TARGET_ERROR);
+ goto new_sess_out;
+ }
+#else
+ pr_debug("Skipping iscsi_ntop6()\n");
+#endif
+ } else {
+ memset(&sock_in, 0, sizeof(struct sockaddr_in));
+
+ if (conn->sock->ops->getname(conn->sock,
+ (struct sockaddr *)&sock_in, &err, 1) < 0) {
+ pr_err("sock_ops->getname() failed.\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_TARGET_ERROR);
+ goto new_sess_out;
+ }
+ sprintf(conn->login_ip, "%pI4", &sock_in.sin_addr.s_addr);
+ conn->login_port = ntohs(sock_in.sin_port);
+ }
+
+ conn->network_transport = np->np_network_transport;
+
+ pr_debug("Received iSCSI login request from %s on %s Network"
+ " Portal %s:%hu\n", conn->login_ip,
+ (conn->network_transport == ISCSI_TCP) ? "TCP" : "SCTP",
+ np->np_ip, np->np_port);
+
+ pr_debug("Moving to TARG_CONN_STATE_IN_LOGIN.\n");
+ conn->conn_state = TARG_CONN_STATE_IN_LOGIN;
+
+ if (iscsi_login_check_initiator_version(conn, pdu->max_version,
+ pdu->min_version) < 0)
+ goto new_sess_out;
+
+ zero_tsih = (pdu->tsih == 0x0000);
+ if ((zero_tsih)) {
+ /*
+ * This is the leading connection of a new session.
+ * We wait until after authentication to check for
+ * session reinstatement.
+ */
+ if (iscsi_login_zero_tsih_s1(conn, buffer) < 0)
+ goto new_sess_out;
+ } else {
+ /*
+ * Add a new connection to an existing session.
+ * We check for a non-existant session in
+ * iscsi_login_non_zero_tsih_s2() below based
+ * on ISID/TSIH, but wait until after authentication
+ * to check for connection reinstatement, etc.
+ */
+ if (iscsi_login_non_zero_tsih_s1(conn, buffer) < 0)
+ goto new_sess_out;
+ }
+
+ /*
+ * This will process the first login request, and call
+ * iscsi_target_locate_portal(), and return a valid struct iscsi_login.
+ */
+ login = iscsi_target_init_negotiation(np, conn, buffer);
+ if (!login) {
+ tpg = conn->tpg;
+ goto new_sess_out;
+ }
+
+ tpg = conn->tpg;
+ if (!tpg) {
+ pr_err("Unable to locate struct iscsi_conn->tpg\n");
+ goto new_sess_out;
+ }
+
+ if (zero_tsih) {
+ if (iscsi_login_zero_tsih_s2(conn) < 0) {
+ iscsi_target_nego_release(login, conn);
+ goto new_sess_out;
+ }
+ } else {
+ if (iscsi_login_non_zero_tsih_s2(conn, buffer) < 0) {
+ iscsi_target_nego_release(login, conn);
+ goto old_sess_out;
+ }
+ }
+
+ if (iscsi_target_start_negotiation(login, conn) < 0)
+ goto new_sess_out;
+
+ if (!conn->sess) {
+ pr_err("struct iscsi_conn session pointer is NULL!\n");
+ goto new_sess_out;
+ }
+
+ iscsi_stop_login_thread_timer(np);
+
+ if (signal_pending(current))
+ goto new_sess_out;
+
+ ret = iscsi_post_login_handler(np, conn, zero_tsih);
+
+ if (ret < 0)
+ goto new_sess_out;
+
+ iscsit_deaccess_np(np, tpg);
+ tpg = NULL;
+ /* Get another socket */
+ return 1;
+
+new_sess_out:
+ pr_err("iSCSI Login negotiation failed.\n");
+ iscsit_collect_login_stats(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_INIT_ERR);
+ if (!zero_tsih || !conn->sess)
+ goto old_sess_out;
+ if (conn->sess->se_sess)
+ transport_free_session(conn->sess->se_sess);
+ if (conn->sess->session_index != 0) {
+ spin_lock_bh(&sess_idr_lock);
+ idr_remove(&sess_idr, conn->sess->session_index);
+ spin_unlock_bh(&sess_idr_lock);
+ }
+ if (conn->sess->sess_ops)
+ kfree(conn->sess->sess_ops);
+ if (conn->sess)
+ kfree(conn->sess);
+old_sess_out:
+ iscsi_stop_login_thread_timer(np);
+ /*
+ * If login negotiation fails check if the Time2Retain timer
+ * needs to be restarted.
+ */
+ if (!zero_tsih && conn->sess) {
+ spin_lock_bh(&conn->sess->conn_lock);
+ if (conn->sess->session_state == TARG_SESS_STATE_FAILED) {
+ struct se_portal_group *se_tpg =
+ &ISCSI_TPG_C(conn)->tpg_se_tpg;
+
+ atomic_set(&conn->sess->session_continuation, 0);
+ spin_unlock_bh(&conn->sess->conn_lock);
+ spin_lock_bh(&se_tpg->session_lock);
+ iscsit_start_time2retain_handler(conn->sess);
+ spin_unlock_bh(&se_tpg->session_lock);
+ } else
+ spin_unlock_bh(&conn->sess->conn_lock);
+ iscsit_dec_session_usage_count(conn->sess);
+ }
+
+ if (!IS_ERR(conn->conn_rx_hash.tfm))
+ crypto_free_hash(conn->conn_rx_hash.tfm);
+ if (!IS_ERR(conn->conn_tx_hash.tfm))
+ crypto_free_hash(conn->conn_tx_hash.tfm);
+
+ if (conn->conn_cpumask)
+ free_cpumask_var(conn->conn_cpumask);
+
+ kfree(conn->conn_ops);
+
+ if (conn->param_list) {
+ iscsi_release_param_list(conn->param_list);
+ conn->param_list = NULL;
+ }
+ if (conn->sock) {
+ if (conn->conn_flags & CONNFLAG_SCTP_STRUCT_FILE) {
+ kfree(conn->sock->file);
+ conn->sock->file = NULL;
+ }
+ sock_release(conn->sock);
+ }
+ kfree(conn);
+
+ if (tpg) {
+ iscsit_deaccess_np(np, tpg);
+ tpg = NULL;
+ }
+
+out:
+ stop = kthread_should_stop();
+ if (!stop && signal_pending(current)) {
+ spin_lock_bh(&np->np_thread_lock);
+ stop = (np->np_thread_state == ISCSI_NP_THREAD_SHUTDOWN);
+ spin_unlock_bh(&np->np_thread_lock);
+ }
+ /* Wait for another socket.. */
+ if (!stop)
+ return 1;
+
+ iscsi_stop_login_thread_timer(np);
+ spin_lock_bh(&np->np_thread_lock);
+ np->np_thread_state = ISCSI_NP_THREAD_EXIT;
+ spin_unlock_bh(&np->np_thread_lock);
+ return 0;
+}
+
+int iscsi_target_login_thread(void *arg)
+{
+ struct iscsi_np *np = (struct iscsi_np *)arg;
+ int ret;
+
+ allow_signal(SIGINT);
+
+ while (!kthread_should_stop()) {
+ ret = __iscsi_target_login_thread(np);
+ /*
+ * We break and exit here unless another sock_accept() call
+ * is expected.
+ */
+ if (ret != 1)
+ break;
+ }
+
+ return 0;
+}
diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h
new file mode 100644
index 0000000..091dcae
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_login.h
@@ -0,0 +1,12 @@
+#ifndef ISCSI_TARGET_LOGIN_H
+#define ISCSI_TARGET_LOGIN_H
+
+extern int iscsi_login_setup_crypto(struct iscsi_conn *);
+extern int iscsi_check_for_session_reinstatement(struct iscsi_conn *);
+extern int iscsi_login_post_auth_non_zero_tsih(struct iscsi_conn *, u16, u32);
+extern int iscsi_target_setup_login_socket(struct iscsi_np *,
+ struct __kernel_sockaddr_storage *);
+extern int iscsi_target_login_thread(void *);
+extern int iscsi_login_disable_FIM_keys(struct iscsi_param_list *, struct iscsi_conn *);
+
+#endif /*** ISCSI_TARGET_LOGIN_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
new file mode 100644
index 0000000..713a4d2
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -0,0 +1,1067 @@
+/*******************************************************************************
+ * This file contains main functions related to iSCSI Parameter negotiation.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/ctype.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_tpg.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_parameters.h"
+#include "iscsi_target_login.h"
+#include "iscsi_target_nego.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+#include "iscsi_target_auth.h"
+
+#define MAX_LOGIN_PDUS 7
+#define TEXT_LEN 4096
+
+void convert_null_to_semi(char *buf, int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ if (buf[i] == '\0')
+ buf[i] = ';';
+}
+
+int strlen_semi(char *buf)
+{
+ int i = 0;
+
+ while (buf[i] != '\0') {
+ if (buf[i] == ';')
+ return i;
+ i++;
+ }
+
+ return -1;
+}
+
+int extract_param(
+ const char *in_buf,
+ const char *pattern,
+ unsigned int max_length,
+ char *out_buf,
+ unsigned char *type)
+{
+ char *ptr;
+ int len;
+
+ if (!in_buf || !pattern || !out_buf || !type)
+ return -1;
+
+ ptr = strstr(in_buf, pattern);
+ if (!ptr)
+ return -1;
+
+ ptr = strstr(ptr, "=");
+ if (!ptr)
+ return -1;
+
+ ptr += 1;
+ if (*ptr == '0' && (*(ptr+1) == 'x' || *(ptr+1) == 'X')) {
+ ptr += 2; /* skip 0x */
+ *type = HEX;
+ } else
+ *type = DECIMAL;
+
+ len = strlen_semi(ptr);
+ if (len < 0)
+ return -1;
+
+ if (len > max_length) {
+ pr_err("Length of input: %d exeeds max_length:"
+ " %d\n", len, max_length);
+ return -1;
+ }
+ memcpy(out_buf, ptr, len);
+ out_buf[len] = '\0';
+
+ return 0;
+}
+
+static u32 iscsi_handle_authentication(
+ struct iscsi_conn *conn,
+ char *in_buf,
+ char *out_buf,
+ int in_length,
+ int *out_length,
+ unsigned char *authtype)
+{
+ struct iscsi_session *sess = conn->sess;
+ struct iscsi_node_auth *auth;
+ struct iscsi_node_acl *iscsi_nacl;
+ struct se_node_acl *se_nacl;
+
+ if (!sess->sess_ops->SessionType) {
+ /*
+ * For SessionType=Normal
+ */
+ se_nacl = conn->sess->se_sess->se_node_acl;
+ if (!se_nacl) {
+ pr_err("Unable to locate struct se_node_acl for"
+ " CHAP auth\n");
+ return -1;
+ }
+ iscsi_nacl = container_of(se_nacl, struct iscsi_node_acl,
+ se_node_acl);
+ if (!iscsi_nacl) {
+ pr_err("Unable to locate struct iscsi_node_acl for"
+ " CHAP auth\n");
+ return -1;
+ }
+
+ auth = ISCSI_NODE_AUTH(iscsi_nacl);
+ } else {
+ /*
+ * For SessionType=Discovery
+ */
+ auth = &iscsit_global->discovery_acl.node_auth;
+ }
+
+ if (strstr("CHAP", authtype))
+ strcpy(conn->sess->auth_type, "CHAP");
+ else
+ strcpy(conn->sess->auth_type, NONE);
+
+ if (strstr("None", authtype))
+ return 1;
+#ifdef CANSRP
+ else if (strstr("SRP", authtype))
+ return srp_main_loop(conn, auth, in_buf, out_buf,
+ &in_length, out_length);
+#endif
+ else if (strstr("CHAP", authtype))
+ return chap_main_loop(conn, auth, in_buf, out_buf,
+ &in_length, out_length);
+ else if (strstr("SPKM1", authtype))
+ return 2;
+ else if (strstr("SPKM2", authtype))
+ return 2;
+ else if (strstr("KRB5", authtype))
+ return 2;
+ else
+ return 2;
+}
+
+static void iscsi_remove_failed_auth_entry(struct iscsi_conn *conn)
+{
+ kfree(conn->auth_protocol);
+}
+
+static int iscsi_target_check_login_request(
+ struct iscsi_conn *conn,
+ struct iscsi_login *login)
+{
+ int req_csg, req_nsg, rsp_csg, rsp_nsg;
+ u32 payload_length;
+ struct iscsi_login_req *login_req;
+ struct iscsi_login_rsp *login_rsp;
+
+ login_req = (struct iscsi_login_req *) login->req;
+ login_rsp = (struct iscsi_login_rsp *) login->rsp;
+ payload_length = ntoh24(login_req->dlength);
+
+ switch (login_req->opcode & ISCSI_OPCODE_MASK) {
+ case ISCSI_OP_LOGIN:
+ break;
+ default:
+ pr_err("Received unknown opcode 0x%02x.\n",
+ login_req->opcode & ISCSI_OPCODE_MASK);
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_INIT_ERR);
+ return -1;
+ }
+
+ if ((login_req->flags & ISCSI_FLAG_LOGIN_CONTINUE) &&
+ (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)) {
+ pr_err("Login request has both ISCSI_FLAG_LOGIN_CONTINUE"
+ " and ISCSI_FLAG_LOGIN_TRANSIT set, protocol error.\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_INIT_ERR);
+ return -1;
+ }
+
+ req_csg = (login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2;
+ rsp_csg = (login_rsp->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2;
+ req_nsg = (login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK);
+ rsp_nsg = (login_rsp->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK);
+
+ if (req_csg != login->current_stage) {
+ pr_err("Initiator unexpectedly changed login stage"
+ " from %d to %d, login failed.\n", login->current_stage,
+ req_csg);
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_INIT_ERR);
+ return -1;
+ }
+
+ if ((req_nsg == 2) || (req_csg >= 2) ||
+ ((login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT) &&
+ (req_nsg <= req_csg))) {
+ pr_err("Illegal login_req->flags Combination, CSG: %d,"
+ " NSG: %d, ISCSI_FLAG_LOGIN_TRANSIT: %d.\n", req_csg,
+ req_nsg, (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT));
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_INIT_ERR);
+ return -1;
+ }
+
+ if ((login_req->max_version != login->version_max) ||
+ (login_req->min_version != login->version_min)) {
+ pr_err("Login request changed Version Max/Nin"
+ " unexpectedly to 0x%02x/0x%02x, protocol error\n",
+ login_req->max_version, login_req->min_version);
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_INIT_ERR);
+ return -1;
+ }
+
+ if (memcmp(login_req->isid, login->isid, 6) != 0) {
+ pr_err("Login request changed ISID unexpectedly,"
+ " protocol error.\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_INIT_ERR);
+ return -1;
+ }
+
+ if (login_req->itt != login->init_task_tag) {
+ pr_err("Login request changed ITT unexpectedly to"
+ " 0x%08x, protocol error.\n", login_req->itt);
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_INIT_ERR);
+ return -1;
+ }
+
+ if (payload_length > MAX_KEY_VALUE_PAIRS) {
+ pr_err("Login request payload exceeds default"
+ " MaxRecvDataSegmentLength: %u, protocol error.\n",
+ MAX_KEY_VALUE_PAIRS);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int iscsi_target_check_first_request(
+ struct iscsi_conn *conn,
+ struct iscsi_login *login)
+{
+ struct iscsi_param *param = NULL;
+ struct se_node_acl *se_nacl;
+
+ login->first_request = 0;
+
+ list_for_each_entry(param, &conn->param_list->param_list, p_list) {
+ if (!strncmp(param->name, SESSIONTYPE, 11)) {
+ if (!IS_PSTATE_ACCEPTOR(param)) {
+ pr_err("SessionType key not received"
+ " in first login request.\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_MISSING_FIELDS);
+ return -1;
+ }
+ if (!strncmp(param->value, DISCOVERY, 9))
+ return 0;
+ }
+
+ if (!strncmp(param->name, INITIATORNAME, 13)) {
+ if (!IS_PSTATE_ACCEPTOR(param)) {
+ if (!login->leading_connection)
+ continue;
+
+ pr_err("InitiatorName key not received"
+ " in first login request.\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_MISSING_FIELDS);
+ return -1;
+ }
+
+ /*
+ * For non-leading connections, double check that the
+ * received InitiatorName matches the existing session's
+ * struct iscsi_node_acl.
+ */
+ if (!login->leading_connection) {
+ se_nacl = conn->sess->se_sess->se_node_acl;
+ if (!se_nacl) {
+ pr_err("Unable to locate"
+ " struct se_node_acl\n");
+ iscsit_tx_login_rsp(conn,
+ ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_TGT_NOT_FOUND);
+ return -1;
+ }
+
+ if (strcmp(param->value,
+ se_nacl->initiatorname)) {
+ pr_err("Incorrect"
+ " InitiatorName: %s for this"
+ " iSCSI Initiator Node.\n",
+ param->value);
+ iscsit_tx_login_rsp(conn,
+ ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_TGT_NOT_FOUND);
+ return -1;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_login *login)
+{
+ u32 padding = 0;
+ struct iscsi_session *sess = conn->sess;
+ struct iscsi_login_rsp *login_rsp;
+
+ login_rsp = (struct iscsi_login_rsp *) login->rsp;
+
+ login_rsp->opcode = ISCSI_OP_LOGIN_RSP;
+ hton24(login_rsp->dlength, login->rsp_length);
+ memcpy(login_rsp->isid, login->isid, 6);
+ login_rsp->tsih = cpu_to_be16(login->tsih);
+ login_rsp->itt = cpu_to_be32(login->init_task_tag);
+ login_rsp->statsn = cpu_to_be32(conn->stat_sn++);
+ login_rsp->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn);
+ login_rsp->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn);
+
+ pr_debug("Sending Login Response, Flags: 0x%02x, ITT: 0x%08x,"
+ " ExpCmdSN; 0x%08x, MaxCmdSN: 0x%08x, StatSN: 0x%08x, Length:"
+ " %u\n", login_rsp->flags, ntohl(login_rsp->itt),
+ ntohl(login_rsp->exp_cmdsn), ntohl(login_rsp->max_cmdsn),
+ ntohl(login_rsp->statsn), login->rsp_length);
+
+ padding = ((-login->rsp_length) & 3);
+
+ if (iscsi_login_tx_data(
+ conn,
+ login->rsp,
+ login->rsp_buf,
+ login->rsp_length + padding) < 0)
+ return -1;
+
+ login->rsp_length = 0;
+ login_rsp->tsih = be16_to_cpu(login_rsp->tsih);
+ login_rsp->itt = be32_to_cpu(login_rsp->itt);
+ login_rsp->statsn = be32_to_cpu(login_rsp->statsn);
+ mutex_lock(&sess->cmdsn_mutex);
+ login_rsp->exp_cmdsn = be32_to_cpu(sess->exp_cmd_sn);
+ login_rsp->max_cmdsn = be32_to_cpu(sess->max_cmd_sn);
+ mutex_unlock(&sess->cmdsn_mutex);
+
+ return 0;
+}
+
+static int iscsi_target_do_rx_login_io(struct iscsi_conn *conn, struct iscsi_login *login)
+{
+ u32 padding = 0, payload_length;
+ struct iscsi_login_req *login_req;
+
+ if (iscsi_login_rx_data(conn, login->req, ISCSI_HDR_LEN) < 0)
+ return -1;
+
+ login_req = (struct iscsi_login_req *) login->req;
+ payload_length = ntoh24(login_req->dlength);
+ login_req->tsih = be16_to_cpu(login_req->tsih);
+ login_req->itt = be32_to_cpu(login_req->itt);
+ login_req->cid = be16_to_cpu(login_req->cid);
+ login_req->cmdsn = be32_to_cpu(login_req->cmdsn);
+ login_req->exp_statsn = be32_to_cpu(login_req->exp_statsn);
+
+ pr_debug("Got Login Command, Flags 0x%02x, ITT: 0x%08x,"
+ " CmdSN: 0x%08x, ExpStatSN: 0x%08x, CID: %hu, Length: %u\n",
+ login_req->flags, login_req->itt, login_req->cmdsn,
+ login_req->exp_statsn, login_req->cid, payload_length);
+
+ if (iscsi_target_check_login_request(conn, login) < 0)
+ return -1;
+
+ padding = ((-payload_length) & 3);
+ memset(login->req_buf, 0, MAX_KEY_VALUE_PAIRS);
+
+ if (iscsi_login_rx_data(
+ conn,
+ login->req_buf,
+ payload_length + padding) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int iscsi_target_do_login_io(struct iscsi_conn *conn, struct iscsi_login *login)
+{
+ if (iscsi_target_do_tx_login_io(conn, login) < 0)
+ return -1;
+
+ if (iscsi_target_do_rx_login_io(conn, login) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int iscsi_target_get_initial_payload(
+ struct iscsi_conn *conn,
+ struct iscsi_login *login)
+{
+ u32 padding = 0, payload_length;
+ struct iscsi_login_req *login_req;
+
+ login_req = (struct iscsi_login_req *) login->req;
+ payload_length = ntoh24(login_req->dlength);
+
+ pr_debug("Got Login Command, Flags 0x%02x, ITT: 0x%08x,"
+ " CmdSN: 0x%08x, ExpStatSN: 0x%08x, Length: %u\n",
+ login_req->flags, login_req->itt, login_req->cmdsn,
+ login_req->exp_statsn, payload_length);
+
+ if (iscsi_target_check_login_request(conn, login) < 0)
+ return -1;
+
+ padding = ((-payload_length) & 3);
+
+ if (iscsi_login_rx_data(
+ conn,
+ login->req_buf,
+ payload_length + padding) < 0)
+ return -1;
+
+ return 0;
+}
+
+/*
+ * NOTE: We check for existing sessions or connections AFTER the initiator
+ * has been successfully authenticated in order to protect against faked
+ * ISID/TSIH combinations.
+ */
+static int iscsi_target_check_for_existing_instances(
+ struct iscsi_conn *conn,
+ struct iscsi_login *login)
+{
+ if (login->checked_for_existing)
+ return 0;
+
+ login->checked_for_existing = 1;
+
+ if (!login->tsih)
+ return iscsi_check_for_session_reinstatement(conn);
+ else
+ return iscsi_login_post_auth_non_zero_tsih(conn, login->cid,
+ login->initial_exp_statsn);
+}
+
+static int iscsi_target_do_authentication(
+ struct iscsi_conn *conn,
+ struct iscsi_login *login)
+{
+ int authret;
+ u32 payload_length;
+ struct iscsi_param *param;
+ struct iscsi_login_req *login_req;
+ struct iscsi_login_rsp *login_rsp;
+
+ login_req = (struct iscsi_login_req *) login->req;
+ login_rsp = (struct iscsi_login_rsp *) login->rsp;
+ payload_length = ntoh24(login_req->dlength);
+
+ param = iscsi_find_param_from_key(AUTHMETHOD, conn->param_list);
+ if (!param)
+ return -1;
+
+ authret = iscsi_handle_authentication(
+ conn,
+ login->req_buf,
+ login->rsp_buf,
+ payload_length,
+ &login->rsp_length,
+ param->value);
+ switch (authret) {
+ case 0:
+ pr_debug("Received OK response"
+ " from LIO Authentication, continuing.\n");
+ break;
+ case 1:
+ pr_debug("iSCSI security negotiation"
+ " completed sucessfully.\n");
+ login->auth_complete = 1;
+ if ((login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE1) &&
+ (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)) {
+ login_rsp->flags |= (ISCSI_FLAG_LOGIN_NEXT_STAGE1 |
+ ISCSI_FLAG_LOGIN_TRANSIT);
+ login->current_stage = 1;
+ }
+ return iscsi_target_check_for_existing_instances(
+ conn, login);
+ case 2:
+ pr_err("Security negotiation"
+ " failed.\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_AUTH_FAILED);
+ return -1;
+ default:
+ pr_err("Received unknown error %d from LIO"
+ " Authentication\n", authret);
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_TARGET_ERROR);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int iscsi_target_handle_csg_zero(
+ struct iscsi_conn *conn,
+ struct iscsi_login *login)
+{
+ int ret;
+ u32 payload_length;
+ struct iscsi_param *param;
+ struct iscsi_login_req *login_req;
+ struct iscsi_login_rsp *login_rsp;
+
+ login_req = (struct iscsi_login_req *) login->req;
+ login_rsp = (struct iscsi_login_rsp *) login->rsp;
+ payload_length = ntoh24(login_req->dlength);
+
+ param = iscsi_find_param_from_key(AUTHMETHOD, conn->param_list);
+ if (!param)
+ return -1;
+
+ ret = iscsi_decode_text_input(
+ PHASE_SECURITY|PHASE_DECLARATIVE,
+ SENDER_INITIATOR|SENDER_RECEIVER,
+ login->req_buf,
+ payload_length,
+ conn->param_list);
+ if (ret < 0)
+ return -1;
+
+ if (ret > 0) {
+ if (login->auth_complete) {
+ pr_err("Initiator has already been"
+ " successfully authenticated, but is still"
+ " sending %s keys.\n", param->value);
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_INIT_ERR);
+ return -1;
+ }
+
+ goto do_auth;
+ }
+
+ if (login->first_request)
+ if (iscsi_target_check_first_request(conn, login) < 0)
+ return -1;
+
+ ret = iscsi_encode_text_output(
+ PHASE_SECURITY|PHASE_DECLARATIVE,
+ SENDER_TARGET,
+ login->rsp_buf,
+ &login->rsp_length,
+ conn->param_list);
+ if (ret < 0)
+ return -1;
+
+ if (!iscsi_check_negotiated_keys(conn->param_list)) {
+ if (ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication &&
+ !strncmp(param->value, NONE, 4)) {
+ pr_err("Initiator sent AuthMethod=None but"
+ " Target is enforcing iSCSI Authentication,"
+ " login failed.\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_AUTH_FAILED);
+ return -1;
+ }
+
+ if (ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication &&
+ !login->auth_complete)
+ return 0;
+
+ if (strncmp(param->value, NONE, 4) && !login->auth_complete)
+ return 0;
+
+ if ((login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE1) &&
+ (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)) {
+ login_rsp->flags |= ISCSI_FLAG_LOGIN_NEXT_STAGE1 |
+ ISCSI_FLAG_LOGIN_TRANSIT;
+ login->current_stage = 1;
+ }
+ }
+
+ return 0;
+do_auth:
+ return iscsi_target_do_authentication(conn, login);
+}
+
+static int iscsi_target_handle_csg_one(struct iscsi_conn *conn, struct iscsi_login *login)
+{
+ int ret;
+ u32 payload_length;
+ struct iscsi_login_req *login_req;
+ struct iscsi_login_rsp *login_rsp;
+
+ login_req = (struct iscsi_login_req *) login->req;
+ login_rsp = (struct iscsi_login_rsp *) login->rsp;
+ payload_length = ntoh24(login_req->dlength);
+
+ ret = iscsi_decode_text_input(
+ PHASE_OPERATIONAL|PHASE_DECLARATIVE,
+ SENDER_INITIATOR|SENDER_RECEIVER,
+ login->req_buf,
+ payload_length,
+ conn->param_list);
+ if (ret < 0)
+ return -1;
+
+ if (login->first_request)
+ if (iscsi_target_check_first_request(conn, login) < 0)
+ return -1;
+
+ if (iscsi_target_check_for_existing_instances(conn, login) < 0)
+ return -1;
+
+ ret = iscsi_encode_text_output(
+ PHASE_OPERATIONAL|PHASE_DECLARATIVE,
+ SENDER_TARGET,
+ login->rsp_buf,
+ &login->rsp_length,
+ conn->param_list);
+ if (ret < 0)
+ return -1;
+
+ if (!login->auth_complete &&
+ ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication) {
+ pr_err("Initiator is requesting CSG: 1, has not been"
+ " successfully authenticated, and the Target is"
+ " enforcing iSCSI Authentication, login failed.\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_AUTH_FAILED);
+ return -1;
+ }
+
+ if (!iscsi_check_negotiated_keys(conn->param_list))
+ if ((login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE3) &&
+ (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT))
+ login_rsp->flags |= ISCSI_FLAG_LOGIN_NEXT_STAGE3 |
+ ISCSI_FLAG_LOGIN_TRANSIT;
+
+ return 0;
+}
+
+static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *login)
+{
+ int pdu_count = 0;
+ struct iscsi_login_req *login_req;
+ struct iscsi_login_rsp *login_rsp;
+
+ login_req = (struct iscsi_login_req *) login->req;
+ login_rsp = (struct iscsi_login_rsp *) login->rsp;
+
+ while (1) {
+ if (++pdu_count > MAX_LOGIN_PDUS) {
+ pr_err("MAX_LOGIN_PDUS count reached.\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_TARGET_ERROR);
+ return -1;
+ }
+
+ switch ((login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2) {
+ case 0:
+ login_rsp->flags |= (0 & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK);
+ if (iscsi_target_handle_csg_zero(conn, login) < 0)
+ return -1;
+ break;
+ case 1:
+ login_rsp->flags |= ISCSI_FLAG_LOGIN_CURRENT_STAGE1;
+ if (iscsi_target_handle_csg_one(conn, login) < 0)
+ return -1;
+ if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) {
+ login->tsih = conn->sess->tsih;
+ if (iscsi_target_do_tx_login_io(conn,
+ login) < 0)
+ return -1;
+ return 0;
+ }
+ break;
+ default:
+ pr_err("Illegal CSG: %d received from"
+ " Initiator, protocol error.\n",
+ (login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK)
+ >> 2);
+ break;
+ }
+
+ if (iscsi_target_do_login_io(conn, login) < 0)
+ return -1;
+
+ if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) {
+ login_rsp->flags &= ~ISCSI_FLAG_LOGIN_TRANSIT;
+ login_rsp->flags &= ~ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK;
+ }
+ }
+
+ return 0;
+}
+
+static void iscsi_initiatorname_tolower(
+ char *param_buf)
+{
+ char *c;
+ u32 iqn_size = strlen(param_buf), i;
+
+ for (i = 0; i < iqn_size; i++) {
+ c = (char *)¶m_buf[i];
+ if (!isupper(*c))
+ continue;
+
+ *c = tolower(*c);
+ }
+}
+
+/*
+ * Processes the first Login Request..
+ */
+static int iscsi_target_locate_portal(
+ struct iscsi_np *np,
+ struct iscsi_conn *conn,
+ struct iscsi_login *login)
+{
+ char *i_buf = NULL, *s_buf = NULL, *t_buf = NULL;
+ char *tmpbuf, *start = NULL, *end = NULL, *key, *value;
+ struct iscsi_session *sess = conn->sess;
+ struct iscsi_tiqn *tiqn;
+ struct iscsi_login_req *login_req;
+ struct iscsi_targ_login_rsp *login_rsp;
+ u32 payload_length;
+ int sessiontype = 0, ret = 0;
+
+ login_req = (struct iscsi_login_req *) login->req;
+ login_rsp = (struct iscsi_targ_login_rsp *) login->rsp;
+ payload_length = ntoh24(login_req->dlength);
+
+ login->first_request = 1;
+ login->leading_connection = (!login_req->tsih) ? 1 : 0;
+ login->current_stage =
+ (login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2;
+ login->version_min = login_req->min_version;
+ login->version_max = login_req->max_version;
+ memcpy(login->isid, login_req->isid, 6);
+ login->cmd_sn = login_req->cmdsn;
+ login->init_task_tag = login_req->itt;
+ login->initial_exp_statsn = login_req->exp_statsn;
+ login->cid = login_req->cid;
+ login->tsih = login_req->tsih;
+
+ if (iscsi_target_get_initial_payload(conn, login) < 0)
+ return -1;
+
+ tmpbuf = kzalloc(payload_length + 1, GFP_KERNEL);
+ if (!tmpbuf) {
+ pr_err("Unable to allocate memory for tmpbuf.\n");
+ return -1;
+ }
+
+ memcpy(tmpbuf, login->req_buf, payload_length);
+ tmpbuf[payload_length] = '\0';
+ start = tmpbuf;
+ end = (start + payload_length);
+
+ /*
+ * Locate the initial keys expected from the Initiator node in
+ * the first login request in order to progress with the login phase.
+ */
+ while (start < end) {
+ if (iscsi_extract_key_value(start, &key, &value) < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ if (!strncmp(key, "InitiatorName", 13))
+ i_buf = value;
+ else if (!strncmp(key, "SessionType", 11))
+ s_buf = value;
+ else if (!strncmp(key, "TargetName", 10))
+ t_buf = value;
+
+ start += strlen(key) + strlen(value) + 2;
+ }
+
+ /*
+ * See 5.3. Login Phase.
+ */
+ if (!i_buf) {
+ pr_err("InitiatorName key not received"
+ " in first login request.\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_MISSING_FIELDS);
+ ret = -1;
+ goto out;
+ }
+ /*
+ * Convert the incoming InitiatorName to lowercase following
+ * RFC-3720 3.2.6.1. section c) that says that iSCSI IQNs
+ * are NOT case sensitive.
+ */
+ iscsi_initiatorname_tolower(i_buf);
+
+ if (!s_buf) {
+ if (!login->leading_connection)
+ goto get_target;
+
+ pr_err("SessionType key not received"
+ " in first login request.\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_MISSING_FIELDS);
+ ret = -1;
+ goto out;
+ }
+
+ /*
+ * Use default portal group for discovery sessions.
+ */
+ sessiontype = strncmp(s_buf, DISCOVERY, 9);
+ if (!sessiontype) {
+ conn->tpg = iscsit_global->discovery_tpg;
+ if (!login->leading_connection)
+ goto get_target;
+
+ sess->sess_ops->SessionType = 1;
+ /*
+ * Setup crc32c modules from libcrypto
+ */
+ if (iscsi_login_setup_crypto(conn) < 0) {
+ pr_err("iscsi_login_setup_crypto() failed\n");
+ ret = -1;
+ goto out;
+ }
+ /*
+ * Serialize access across the discovery struct iscsi_portal_group to
+ * process login attempt.
+ */
+ if (iscsit_access_np(np, conn->tpg) < 0) {
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE);
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+ goto out;
+ }
+
+get_target:
+ if (!t_buf) {
+ pr_err("TargetName key not received"
+ " in first login request while"
+ " SessionType=Normal.\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_MISSING_FIELDS);
+ ret = -1;
+ goto out;
+ }
+
+ /*
+ * Locate Target IQN from Storage Node.
+ */
+ tiqn = iscsit_get_tiqn_for_login(t_buf);
+ if (!tiqn) {
+ pr_err("Unable to locate Target IQN: %s in"
+ " Storage Node\n", t_buf);
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE);
+ ret = -1;
+ goto out;
+ }
+ pr_debug("Located Storage Object: %s\n", tiqn->tiqn);
+
+ /*
+ * Locate Target Portal Group from Storage Node.
+ */
+ conn->tpg = iscsit_get_tpg_from_np(tiqn, np);
+ if (!conn->tpg) {
+ pr_err("Unable to locate Target Portal Group"
+ " on %s\n", tiqn->tiqn);
+ iscsit_put_tiqn_for_login(tiqn);
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE);
+ ret = -1;
+ goto out;
+ }
+ pr_debug("Located Portal Group Object: %hu\n", conn->tpg->tpgt);
+ /*
+ * Setup crc32c modules from libcrypto
+ */
+ if (iscsi_login_setup_crypto(conn) < 0) {
+ pr_err("iscsi_login_setup_crypto() failed\n");
+ ret = -1;
+ goto out;
+ }
+ /*
+ * Serialize access across the struct iscsi_portal_group to
+ * process login attempt.
+ */
+ if (iscsit_access_np(np, conn->tpg) < 0) {
+ iscsit_put_tiqn_for_login(tiqn);
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE);
+ ret = -1;
+ conn->tpg = NULL;
+ goto out;
+ }
+
+ /*
+ * conn->sess->node_acl will be set when the referenced
+ * struct iscsi_session is located from received ISID+TSIH in
+ * iscsi_login_non_zero_tsih_s2().
+ */
+ if (!login->leading_connection) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * This value is required in iscsi_login_zero_tsih_s2()
+ */
+ sess->sess_ops->SessionType = 0;
+
+ /*
+ * Locate incoming Initiator IQN reference from Storage Node.
+ */
+ sess->se_sess->se_node_acl = core_tpg_check_initiator_node_acl(
+ &conn->tpg->tpg_se_tpg, i_buf);
+ if (!sess->se_sess->se_node_acl) {
+ pr_err("iSCSI Initiator Node: %s is not authorized to"
+ " access iSCSI target portal group: %hu.\n",
+ i_buf, conn->tpg->tpgt);
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+ ISCSI_LOGIN_STATUS_TGT_FORBIDDEN);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ kfree(tmpbuf);
+ return ret;
+}
+
+struct iscsi_login *iscsi_target_init_negotiation(
+ struct iscsi_np *np,
+ struct iscsi_conn *conn,
+ char *login_pdu)
+{
+ struct iscsi_login *login;
+
+ login = kzalloc(sizeof(struct iscsi_login), GFP_KERNEL);
+ if (!login) {
+ pr_err("Unable to allocate memory for struct iscsi_login.\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_NO_RESOURCES);
+ goto out;
+ }
+
+ login->req = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL);
+ if (!login->req) {
+ pr_err("Unable to allocate memory for Login Request.\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_NO_RESOURCES);
+ goto out;
+ }
+ memcpy(login->req, login_pdu, ISCSI_HDR_LEN);
+
+ login->req_buf = kzalloc(MAX_KEY_VALUE_PAIRS, GFP_KERNEL);
+ if (!login->req_buf) {
+ pr_err("Unable to allocate memory for response buffer.\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_NO_RESOURCES);
+ goto out;
+ }
+ /*
+ * SessionType: Discovery
+ *
+ * Locates Default Portal
+ *
+ * SessionType: Normal
+ *
+ * Locates Target Portal from NP -> Target IQN
+ */
+ if (iscsi_target_locate_portal(np, conn, login) < 0) {
+ pr_err("iSCSI Login negotiation failed.\n");
+ goto out;
+ }
+
+ return login;
+out:
+ kfree(login->req);
+ kfree(login->req_buf);
+ kfree(login);
+
+ return NULL;
+}
+
+int iscsi_target_start_negotiation(
+ struct iscsi_login *login,
+ struct iscsi_conn *conn)
+{
+ int ret = -1;
+
+ login->rsp = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL);
+ if (!login->rsp) {
+ pr_err("Unable to allocate memory for"
+ " Login Response.\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_NO_RESOURCES);
+ ret = -1;
+ goto out;
+ }
+
+ login->rsp_buf = kzalloc(MAX_KEY_VALUE_PAIRS, GFP_KERNEL);
+ if (!login->rsp_buf) {
+ pr_err("Unable to allocate memory for"
+ " request buffer.\n");
+ iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+ ISCSI_LOGIN_STATUS_NO_RESOURCES);
+ ret = -1;
+ goto out;
+ }
+
+ ret = iscsi_target_do_login(conn, login);
+out:
+ if (ret != 0)
+ iscsi_remove_failed_auth_entry(conn);
+
+ iscsi_target_nego_release(login, conn);
+ return ret;
+}
+
+void iscsi_target_nego_release(
+ struct iscsi_login *login,
+ struct iscsi_conn *conn)
+{
+ kfree(login->req);
+ kfree(login->rsp);
+ kfree(login->req_buf);
+ kfree(login->rsp_buf);
+ kfree(login);
+}
diff --git a/drivers/target/iscsi/iscsi_target_nego.h b/drivers/target/iscsi/iscsi_target_nego.h
new file mode 100644
index 0000000..92e133a
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_nego.h
@@ -0,0 +1,17 @@
+#ifndef ISCSI_TARGET_NEGO_H
+#define ISCSI_TARGET_NEGO_H
+
+#define DECIMAL 0
+#define HEX 1
+
+extern void convert_null_to_semi(char *, int);
+extern int extract_param(const char *, const char *, unsigned int, char *,
+ unsigned char *);
+extern struct iscsi_login *iscsi_target_init_negotiation(
+ struct iscsi_np *, struct iscsi_conn *, char *);
+extern int iscsi_target_start_negotiation(
+ struct iscsi_login *, struct iscsi_conn *);
+extern void iscsi_target_nego_release(
+ struct iscsi_login *, struct iscsi_conn *);
+
+#endif /* ISCSI_TARGET_NEGO_H */
diff --git a/drivers/target/iscsi/iscsi_target_nodeattrib.c b/drivers/target/iscsi/iscsi_target_nodeattrib.c
new file mode 100644
index 0000000..aeafbe0
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_nodeattrib.c
@@ -0,0 +1,263 @@
+/*******************************************************************************
+ * This file contains the main functions related to Initiator Node Attributes.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target_nodeattrib.h"
+
+static inline char *iscsit_na_get_initiatorname(
+ struct iscsi_node_acl *nacl)
+{
+ struct se_node_acl *se_nacl = &nacl->se_node_acl;
+
+ return &se_nacl->initiatorname[0];
+}
+
+void iscsit_set_default_node_attribues(
+ struct iscsi_node_acl *acl)
+{
+ struct iscsi_node_attrib *a = &acl->node_attrib;
+
+ a->dataout_timeout = NA_DATAOUT_TIMEOUT;
+ a->dataout_timeout_retries = NA_DATAOUT_TIMEOUT_RETRIES;
+ a->nopin_timeout = NA_NOPIN_TIMEOUT;
+ a->nopin_response_timeout = NA_NOPIN_RESPONSE_TIMEOUT;
+ a->random_datain_pdu_offsets = NA_RANDOM_DATAIN_PDU_OFFSETS;
+ a->random_datain_seq_offsets = NA_RANDOM_DATAIN_SEQ_OFFSETS;
+ a->random_r2t_offsets = NA_RANDOM_R2T_OFFSETS;
+ a->default_erl = NA_DEFAULT_ERL;
+}
+
+extern int iscsit_na_dataout_timeout(
+ struct iscsi_node_acl *acl,
+ u32 dataout_timeout)
+{
+ struct iscsi_node_attrib *a = &acl->node_attrib;
+
+ if (dataout_timeout > NA_DATAOUT_TIMEOUT_MAX) {
+ pr_err("Requested DataOut Timeout %u larger than"
+ " maximum %u\n", dataout_timeout,
+ NA_DATAOUT_TIMEOUT_MAX);
+ return -EINVAL;
+ } else if (dataout_timeout < NA_DATAOUT_TIMEOUT_MIX) {
+ pr_err("Requested DataOut Timeout %u smaller than"
+ " minimum %u\n", dataout_timeout,
+ NA_DATAOUT_TIMEOUT_MIX);
+ return -EINVAL;
+ }
+
+ a->dataout_timeout = dataout_timeout;
+ pr_debug("Set DataOut Timeout to %u for Initiator Node"
+ " %s\n", a->dataout_timeout, iscsit_na_get_initiatorname(acl));
+
+ return 0;
+}
+
+extern int iscsit_na_dataout_timeout_retries(
+ struct iscsi_node_acl *acl,
+ u32 dataout_timeout_retries)
+{
+ struct iscsi_node_attrib *a = &acl->node_attrib;
+
+ if (dataout_timeout_retries > NA_DATAOUT_TIMEOUT_RETRIES_MAX) {
+ pr_err("Requested DataOut Timeout Retries %u larger"
+ " than maximum %u", dataout_timeout_retries,
+ NA_DATAOUT_TIMEOUT_RETRIES_MAX);
+ return -EINVAL;
+ } else if (dataout_timeout_retries < NA_DATAOUT_TIMEOUT_RETRIES_MIN) {
+ pr_err("Requested DataOut Timeout Retries %u smaller"
+ " than minimum %u", dataout_timeout_retries,
+ NA_DATAOUT_TIMEOUT_RETRIES_MIN);
+ return -EINVAL;
+ }
+
+ a->dataout_timeout_retries = dataout_timeout_retries;
+ pr_debug("Set DataOut Timeout Retries to %u for"
+ " Initiator Node %s\n", a->dataout_timeout_retries,
+ iscsit_na_get_initiatorname(acl));
+
+ return 0;
+}
+
+extern int iscsit_na_nopin_timeout(
+ struct iscsi_node_acl *acl,
+ u32 nopin_timeout)
+{
+ struct iscsi_node_attrib *a = &acl->node_attrib;
+ struct iscsi_session *sess;
+ struct iscsi_conn *conn;
+ struct se_node_acl *se_nacl = &a->nacl->se_node_acl;
+ struct se_session *se_sess;
+ u32 orig_nopin_timeout = a->nopin_timeout;
+
+ if (nopin_timeout > NA_NOPIN_TIMEOUT_MAX) {
+ pr_err("Requested NopIn Timeout %u larger than maximum"
+ " %u\n", nopin_timeout, NA_NOPIN_TIMEOUT_MAX);
+ return -EINVAL;
+ } else if ((nopin_timeout < NA_NOPIN_TIMEOUT_MIN) &&
+ (nopin_timeout != 0)) {
+ pr_err("Requested NopIn Timeout %u smaller than"
+ " minimum %u and not 0\n", nopin_timeout,
+ NA_NOPIN_TIMEOUT_MIN);
+ return -EINVAL;
+ }
+
+ a->nopin_timeout = nopin_timeout;
+ pr_debug("Set NopIn Timeout to %u for Initiator"
+ " Node %s\n", a->nopin_timeout,
+ iscsit_na_get_initiatorname(acl));
+ /*
+ * Reenable disabled nopin_timeout timer for all iSCSI connections.
+ */
+ if (!orig_nopin_timeout) {
+ spin_lock_bh(&se_nacl->nacl_sess_lock);
+ se_sess = se_nacl->nacl_sess;
+ if (se_sess) {
+ sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+
+ spin_lock(&sess->conn_lock);
+ list_for_each_entry(conn, &sess->sess_conn_list,
+ conn_list) {
+ if (conn->conn_state !=
+ TARG_CONN_STATE_LOGGED_IN)
+ continue;
+
+ spin_lock(&conn->nopin_timer_lock);
+ __iscsit_start_nopin_timer(conn);
+ spin_unlock(&conn->nopin_timer_lock);
+ }
+ spin_unlock(&sess->conn_lock);
+ }
+ spin_unlock_bh(&se_nacl->nacl_sess_lock);
+ }
+
+ return 0;
+}
+
+extern int iscsit_na_nopin_response_timeout(
+ struct iscsi_node_acl *acl,
+ u32 nopin_response_timeout)
+{
+ struct iscsi_node_attrib *a = &acl->node_attrib;
+
+ if (nopin_response_timeout > NA_NOPIN_RESPONSE_TIMEOUT_MAX) {
+ pr_err("Requested NopIn Response Timeout %u larger"
+ " than maximum %u\n", nopin_response_timeout,
+ NA_NOPIN_RESPONSE_TIMEOUT_MAX);
+ return -EINVAL;
+ } else if (nopin_response_timeout < NA_NOPIN_RESPONSE_TIMEOUT_MIN) {
+ pr_err("Requested NopIn Response Timeout %u smaller"
+ " than minimum %u\n", nopin_response_timeout,
+ NA_NOPIN_RESPONSE_TIMEOUT_MIN);
+ return -EINVAL;
+ }
+
+ a->nopin_response_timeout = nopin_response_timeout;
+ pr_debug("Set NopIn Response Timeout to %u for"
+ " Initiator Node %s\n", a->nopin_timeout,
+ iscsit_na_get_initiatorname(acl));
+
+ return 0;
+}
+
+extern int iscsit_na_random_datain_pdu_offsets(
+ struct iscsi_node_acl *acl,
+ u32 random_datain_pdu_offsets)
+{
+ struct iscsi_node_attrib *a = &acl->node_attrib;
+
+ if (random_datain_pdu_offsets != 0 && random_datain_pdu_offsets != 1) {
+ pr_err("Requested Random DataIN PDU Offsets: %u not"
+ " 0 or 1\n", random_datain_pdu_offsets);
+ return -EINVAL;
+ }
+
+ a->random_datain_pdu_offsets = random_datain_pdu_offsets;
+ pr_debug("Set Random DataIN PDU Offsets to %u for"
+ " Initiator Node %s\n", a->random_datain_pdu_offsets,
+ iscsit_na_get_initiatorname(acl));
+
+ return 0;
+}
+
+extern int iscsit_na_random_datain_seq_offsets(
+ struct iscsi_node_acl *acl,
+ u32 random_datain_seq_offsets)
+{
+ struct iscsi_node_attrib *a = &acl->node_attrib;
+
+ if (random_datain_seq_offsets != 0 && random_datain_seq_offsets != 1) {
+ pr_err("Requested Random DataIN Sequence Offsets: %u"
+ " not 0 or 1\n", random_datain_seq_offsets);
+ return -EINVAL;
+ }
+
+ a->random_datain_seq_offsets = random_datain_seq_offsets;
+ pr_debug("Set Random DataIN Sequence Offsets to %u for"
+ " Initiator Node %s\n", a->random_datain_seq_offsets,
+ iscsit_na_get_initiatorname(acl));
+
+ return 0;
+}
+
+extern int iscsit_na_random_r2t_offsets(
+ struct iscsi_node_acl *acl,
+ u32 random_r2t_offsets)
+{
+ struct iscsi_node_attrib *a = &acl->node_attrib;
+
+ if (random_r2t_offsets != 0 && random_r2t_offsets != 1) {
+ pr_err("Requested Random R2T Offsets: %u not"
+ " 0 or 1\n", random_r2t_offsets);
+ return -EINVAL;
+ }
+
+ a->random_r2t_offsets = random_r2t_offsets;
+ pr_debug("Set Random R2T Offsets to %u for"
+ " Initiator Node %s\n", a->random_r2t_offsets,
+ iscsit_na_get_initiatorname(acl));
+
+ return 0;
+}
+
+extern int iscsit_na_default_erl(
+ struct iscsi_node_acl *acl,
+ u32 default_erl)
+{
+ struct iscsi_node_attrib *a = &acl->node_attrib;
+
+ if (default_erl != 0 && default_erl != 1 && default_erl != 2) {
+ pr_err("Requested default ERL: %u not 0, 1, or 2\n",
+ default_erl);
+ return -EINVAL;
+ }
+
+ a->default_erl = default_erl;
+ pr_debug("Set use ERL0 flag to %u for Initiator"
+ " Node %s\n", a->default_erl,
+ iscsit_na_get_initiatorname(acl));
+
+ return 0;
+}
diff --git a/drivers/target/iscsi/iscsi_target_nodeattrib.h b/drivers/target/iscsi/iscsi_target_nodeattrib.h
new file mode 100644
index 0000000..c970b326
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_nodeattrib.h
@@ -0,0 +1,14 @@
+#ifndef ISCSI_TARGET_NODEATTRIB_H
+#define ISCSI_TARGET_NODEATTRIB_H
+
+extern void iscsit_set_default_node_attribues(struct iscsi_node_acl *);
+extern int iscsit_na_dataout_timeout(struct iscsi_node_acl *, u32);
+extern int iscsit_na_dataout_timeout_retries(struct iscsi_node_acl *, u32);
+extern int iscsit_na_nopin_timeout(struct iscsi_node_acl *, u32);
+extern int iscsit_na_nopin_response_timeout(struct iscsi_node_acl *, u32);
+extern int iscsit_na_random_datain_pdu_offsets(struct iscsi_node_acl *, u32);
+extern int iscsit_na_random_datain_seq_offsets(struct iscsi_node_acl *, u32);
+extern int iscsit_na_random_r2t_offsets(struct iscsi_node_acl *, u32);
+extern int iscsit_na_default_erl(struct iscsi_node_acl *, u32);
+
+#endif /* ISCSI_TARGET_NODEATTRIB_H */
diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c
new file mode 100644
index 0000000..252e246
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_parameters.c
@@ -0,0 +1,1905 @@
+/*******************************************************************************
+ * This file contains main functions related to iSCSI Parameter negotiation.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/slab.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target_parameters.h"
+
+int iscsi_login_rx_data(
+ struct iscsi_conn *conn,
+ char *buf,
+ int length)
+{
+ int rx_got;
+ struct kvec iov;
+
+ memset(&iov, 0, sizeof(struct kvec));
+ iov.iov_len = length;
+ iov.iov_base = buf;
+
+ /*
+ * Initial Marker-less Interval.
+ * Add the values regardless of IFMarker/OFMarker, considering
+ * it may not be negoitated yet.
+ */
+ conn->of_marker += length;
+
+ rx_got = rx_data(conn, &iov, 1, length);
+ if (rx_got != length) {
+ pr_err("rx_data returned %d, expecting %d.\n",
+ rx_got, length);
+ return -1;
+ }
+
+ return 0 ;
+}
+
+int iscsi_login_tx_data(
+ struct iscsi_conn *conn,
+ char *pdu_buf,
+ char *text_buf,
+ int text_length)
+{
+ int length, tx_sent;
+ struct kvec iov[2];
+
+ length = (ISCSI_HDR_LEN + text_length);
+
+ memset(&iov[0], 0, 2 * sizeof(struct kvec));
+ iov[0].iov_len = ISCSI_HDR_LEN;
+ iov[0].iov_base = pdu_buf;
+ iov[1].iov_len = text_length;
+ iov[1].iov_base = text_buf;
+
+ /*
+ * Initial Marker-less Interval.
+ * Add the values regardless of IFMarker/OFMarker, considering
+ * it may not be negoitated yet.
+ */
+ conn->if_marker += length;
+
+ tx_sent = tx_data(conn, &iov[0], 2, length);
+ if (tx_sent != length) {
+ pr_err("tx_data returned %d, expecting %d.\n",
+ tx_sent, length);
+ return -1;
+ }
+
+ return 0;
+}
+
+void iscsi_dump_conn_ops(struct iscsi_conn_ops *conn_ops)
+{
+ pr_debug("HeaderDigest: %s\n", (conn_ops->HeaderDigest) ?
+ "CRC32C" : "None");
+ pr_debug("DataDigest: %s\n", (conn_ops->DataDigest) ?
+ "CRC32C" : "None");
+ pr_debug("MaxRecvDataSegmentLength: %u\n",
+ conn_ops->MaxRecvDataSegmentLength);
+ pr_debug("OFMarker: %s\n", (conn_ops->OFMarker) ? "Yes" : "No");
+ pr_debug("IFMarker: %s\n", (conn_ops->IFMarker) ? "Yes" : "No");
+ if (conn_ops->OFMarker)
+ pr_debug("OFMarkInt: %u\n", conn_ops->OFMarkInt);
+ if (conn_ops->IFMarker)
+ pr_debug("IFMarkInt: %u\n", conn_ops->IFMarkInt);
+}
+
+void iscsi_dump_sess_ops(struct iscsi_sess_ops *sess_ops)
+{
+ pr_debug("InitiatorName: %s\n", sess_ops->InitiatorName);
+ pr_debug("InitiatorAlias: %s\n", sess_ops->InitiatorAlias);
+ pr_debug("TargetName: %s\n", sess_ops->TargetName);
+ pr_debug("TargetAlias: %s\n", sess_ops->TargetAlias);
+ pr_debug("TargetPortalGroupTag: %hu\n",
+ sess_ops->TargetPortalGroupTag);
+ pr_debug("MaxConnections: %hu\n", sess_ops->MaxConnections);
+ pr_debug("InitialR2T: %s\n",
+ (sess_ops->InitialR2T) ? "Yes" : "No");
+ pr_debug("ImmediateData: %s\n", (sess_ops->ImmediateData) ?
+ "Yes" : "No");
+ pr_debug("MaxBurstLength: %u\n", sess_ops->MaxBurstLength);
+ pr_debug("FirstBurstLength: %u\n", sess_ops->FirstBurstLength);
+ pr_debug("DefaultTime2Wait: %hu\n", sess_ops->DefaultTime2Wait);
+ pr_debug("DefaultTime2Retain: %hu\n",
+ sess_ops->DefaultTime2Retain);
+ pr_debug("MaxOutstandingR2T: %hu\n",
+ sess_ops->MaxOutstandingR2T);
+ pr_debug("DataPDUInOrder: %s\n",
+ (sess_ops->DataPDUInOrder) ? "Yes" : "No");
+ pr_debug("DataSequenceInOrder: %s\n",
+ (sess_ops->DataSequenceInOrder) ? "Yes" : "No");
+ pr_debug("ErrorRecoveryLevel: %hu\n",
+ sess_ops->ErrorRecoveryLevel);
+ pr_debug("SessionType: %s\n", (sess_ops->SessionType) ?
+ "Discovery" : "Normal");
+}
+
+void iscsi_print_params(struct iscsi_param_list *param_list)
+{
+ struct iscsi_param *param;
+
+ list_for_each_entry(param, ¶m_list->param_list, p_list)
+ pr_debug("%s: %s\n", param->name, param->value);
+}
+
+static struct iscsi_param *iscsi_set_default_param(struct iscsi_param_list *param_list,
+ char *name, char *value, u8 phase, u8 scope, u8 sender,
+ u16 type_range, u8 use)
+{
+ struct iscsi_param *param = NULL;
+
+ param = kzalloc(sizeof(struct iscsi_param), GFP_KERNEL);
+ if (!param) {
+ pr_err("Unable to allocate memory for parameter.\n");
+ goto out;
+ }
+ INIT_LIST_HEAD(¶m->p_list);
+
+ param->name = kzalloc(strlen(name) + 1, GFP_KERNEL);
+ if (!param->name) {
+ pr_err("Unable to allocate memory for parameter name.\n");
+ goto out;
+ }
+
+ param->value = kzalloc(strlen(value) + 1, GFP_KERNEL);
+ if (!param->value) {
+ pr_err("Unable to allocate memory for parameter value.\n");
+ goto out;
+ }
+
+ memcpy(param->name, name, strlen(name));
+ param->name[strlen(name)] = '\0';
+ memcpy(param->value, value, strlen(value));
+ param->value[strlen(value)] = '\0';
+ param->phase = phase;
+ param->scope = scope;
+ param->sender = sender;
+ param->use = use;
+ param->type_range = type_range;
+
+ switch (param->type_range) {
+ case TYPERANGE_BOOL_AND:
+ param->type = TYPE_BOOL_AND;
+ break;
+ case TYPERANGE_BOOL_OR:
+ param->type = TYPE_BOOL_OR;
+ break;
+ case TYPERANGE_0_TO_2:
+ case TYPERANGE_0_TO_3600:
+ case TYPERANGE_0_TO_32767:
+ case TYPERANGE_0_TO_65535:
+ case TYPERANGE_1_TO_65535:
+ case TYPERANGE_2_TO_3600:
+ case TYPERANGE_512_TO_16777215:
+ param->type = TYPE_NUMBER;
+ break;
+ case TYPERANGE_AUTH:
+ case TYPERANGE_DIGEST:
+ param->type = TYPE_VALUE_LIST | TYPE_STRING;
+ break;
+ case TYPERANGE_MARKINT:
+ param->type = TYPE_NUMBER_RANGE;
+ param->type_range |= TYPERANGE_1_TO_65535;
+ break;
+ case TYPERANGE_ISCSINAME:
+ case TYPERANGE_SESSIONTYPE:
+ case TYPERANGE_TARGETADDRESS:
+ case TYPERANGE_UTF8:
+ param->type = TYPE_STRING;
+ break;
+ default:
+ pr_err("Unknown type_range 0x%02x\n",
+ param->type_range);
+ goto out;
+ }
+ list_add_tail(¶m->p_list, ¶m_list->param_list);
+
+ return param;
+out:
+ if (param) {
+ kfree(param->value);
+ kfree(param->name);
+ kfree(param);
+ }
+
+ return NULL;
+}
+
+/* #warning Add extension keys */
+int iscsi_create_default_params(struct iscsi_param_list **param_list_ptr)
+{
+ struct iscsi_param *param = NULL;
+ struct iscsi_param_list *pl;
+
+ pl = kzalloc(sizeof(struct iscsi_param_list), GFP_KERNEL);
+ if (!pl) {
+ pr_err("Unable to allocate memory for"
+ " struct iscsi_param_list.\n");
+ return -1 ;
+ }
+ INIT_LIST_HEAD(&pl->param_list);
+ INIT_LIST_HEAD(&pl->extra_response_list);
+
+ /*
+ * The format for setting the initial parameter definitions are:
+ *
+ * Parameter name:
+ * Initial value:
+ * Allowable phase:
+ * Scope:
+ * Allowable senders:
+ * Typerange:
+ * Use:
+ */
+ param = iscsi_set_default_param(pl, AUTHMETHOD, INITIAL_AUTHMETHOD,
+ PHASE_SECURITY, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+ TYPERANGE_AUTH, USE_INITIAL_ONLY);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, HEADERDIGEST, INITIAL_HEADERDIGEST,
+ PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+ TYPERANGE_DIGEST, USE_INITIAL_ONLY);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, DATADIGEST, INITIAL_DATADIGEST,
+ PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+ TYPERANGE_DIGEST, USE_INITIAL_ONLY);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, MAXCONNECTIONS,
+ INITIAL_MAXCONNECTIONS, PHASE_OPERATIONAL,
+ SCOPE_SESSION_WIDE, SENDER_BOTH,
+ TYPERANGE_1_TO_65535, USE_LEADING_ONLY);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, SENDTARGETS, INITIAL_SENDTARGETS,
+ PHASE_FFP0, SCOPE_SESSION_WIDE, SENDER_INITIATOR,
+ TYPERANGE_UTF8, 0);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, TARGETNAME, INITIAL_TARGETNAME,
+ PHASE_DECLARATIVE, SCOPE_SESSION_WIDE, SENDER_BOTH,
+ TYPERANGE_ISCSINAME, USE_ALL);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, INITIATORNAME,
+ INITIAL_INITIATORNAME, PHASE_DECLARATIVE,
+ SCOPE_SESSION_WIDE, SENDER_INITIATOR,
+ TYPERANGE_ISCSINAME, USE_INITIAL_ONLY);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, TARGETALIAS, INITIAL_TARGETALIAS,
+ PHASE_DECLARATIVE, SCOPE_SESSION_WIDE, SENDER_TARGET,
+ TYPERANGE_UTF8, USE_ALL);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, INITIATORALIAS,
+ INITIAL_INITIATORALIAS, PHASE_DECLARATIVE,
+ SCOPE_SESSION_WIDE, SENDER_INITIATOR, TYPERANGE_UTF8,
+ USE_ALL);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, TARGETADDRESS,
+ INITIAL_TARGETADDRESS, PHASE_DECLARATIVE,
+ SCOPE_SESSION_WIDE, SENDER_TARGET,
+ TYPERANGE_TARGETADDRESS, USE_ALL);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, TARGETPORTALGROUPTAG,
+ INITIAL_TARGETPORTALGROUPTAG,
+ PHASE_DECLARATIVE, SCOPE_SESSION_WIDE, SENDER_TARGET,
+ TYPERANGE_0_TO_65535, USE_INITIAL_ONLY);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, INITIALR2T, INITIAL_INITIALR2T,
+ PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+ TYPERANGE_BOOL_OR, USE_LEADING_ONLY);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, IMMEDIATEDATA,
+ INITIAL_IMMEDIATEDATA, PHASE_OPERATIONAL,
+ SCOPE_SESSION_WIDE, SENDER_BOTH, TYPERANGE_BOOL_AND,
+ USE_LEADING_ONLY);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, MAXRECVDATASEGMENTLENGTH,
+ INITIAL_MAXRECVDATASEGMENTLENGTH,
+ PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+ TYPERANGE_512_TO_16777215, USE_ALL);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, MAXBURSTLENGTH,
+ INITIAL_MAXBURSTLENGTH, PHASE_OPERATIONAL,
+ SCOPE_SESSION_WIDE, SENDER_BOTH,
+ TYPERANGE_512_TO_16777215, USE_LEADING_ONLY);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, FIRSTBURSTLENGTH,
+ INITIAL_FIRSTBURSTLENGTH,
+ PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+ TYPERANGE_512_TO_16777215, USE_LEADING_ONLY);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, DEFAULTTIME2WAIT,
+ INITIAL_DEFAULTTIME2WAIT,
+ PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+ TYPERANGE_0_TO_3600, USE_LEADING_ONLY);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, DEFAULTTIME2RETAIN,
+ INITIAL_DEFAULTTIME2RETAIN,
+ PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+ TYPERANGE_0_TO_3600, USE_LEADING_ONLY);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, MAXOUTSTANDINGR2T,
+ INITIAL_MAXOUTSTANDINGR2T,
+ PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+ TYPERANGE_1_TO_65535, USE_LEADING_ONLY);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, DATAPDUINORDER,
+ INITIAL_DATAPDUINORDER, PHASE_OPERATIONAL,
+ SCOPE_SESSION_WIDE, SENDER_BOTH, TYPERANGE_BOOL_OR,
+ USE_LEADING_ONLY);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, DATASEQUENCEINORDER,
+ INITIAL_DATASEQUENCEINORDER,
+ PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+ TYPERANGE_BOOL_OR, USE_LEADING_ONLY);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, ERRORRECOVERYLEVEL,
+ INITIAL_ERRORRECOVERYLEVEL,
+ PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH,
+ TYPERANGE_0_TO_2, USE_LEADING_ONLY);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, SESSIONTYPE, INITIAL_SESSIONTYPE,
+ PHASE_DECLARATIVE, SCOPE_SESSION_WIDE, SENDER_INITIATOR,
+ TYPERANGE_SESSIONTYPE, USE_LEADING_ONLY);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, IFMARKER, INITIAL_IFMARKER,
+ PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+ TYPERANGE_BOOL_AND, USE_INITIAL_ONLY);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, OFMARKER, INITIAL_OFMARKER,
+ PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+ TYPERANGE_BOOL_AND, USE_INITIAL_ONLY);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, IFMARKINT, INITIAL_IFMARKINT,
+ PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+ TYPERANGE_MARKINT, USE_INITIAL_ONLY);
+ if (!param)
+ goto out;
+
+ param = iscsi_set_default_param(pl, OFMARKINT, INITIAL_OFMARKINT,
+ PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH,
+ TYPERANGE_MARKINT, USE_INITIAL_ONLY);
+ if (!param)
+ goto out;
+
+ *param_list_ptr = pl;
+ return 0;
+out:
+ iscsi_release_param_list(pl);
+ return -1;
+}
+
+int iscsi_set_keys_to_negotiate(
+ int sessiontype,
+ struct iscsi_param_list *param_list)
+{
+ struct iscsi_param *param;
+
+ list_for_each_entry(param, ¶m_list->param_list, p_list) {
+ param->state = 0;
+ if (!strcmp(param->name, AUTHMETHOD)) {
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, HEADERDIGEST)) {
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, DATADIGEST)) {
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, MAXCONNECTIONS)) {
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, TARGETNAME)) {
+ continue;
+ } else if (!strcmp(param->name, INITIATORNAME)) {
+ continue;
+ } else if (!strcmp(param->name, TARGETALIAS)) {
+ if (param->value)
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, INITIATORALIAS)) {
+ continue;
+ } else if (!strcmp(param->name, TARGETPORTALGROUPTAG)) {
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, INITIALR2T)) {
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, IMMEDIATEDATA)) {
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) {
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, MAXBURSTLENGTH)) {
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, FIRSTBURSTLENGTH)) {
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, DEFAULTTIME2WAIT)) {
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, DEFAULTTIME2RETAIN)) {
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, MAXOUTSTANDINGR2T)) {
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, DATAPDUINORDER)) {
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, DATASEQUENCEINORDER)) {
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, ERRORRECOVERYLEVEL)) {
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, SESSIONTYPE)) {
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, IFMARKER)) {
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, OFMARKER)) {
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, IFMARKINT)) {
+ SET_PSTATE_NEGOTIATE(param);
+ } else if (!strcmp(param->name, OFMARKINT)) {
+ SET_PSTATE_NEGOTIATE(param);
+ }
+ }
+
+ return 0;
+}
+
+int iscsi_set_keys_irrelevant_for_discovery(
+ struct iscsi_param_list *param_list)
+{
+ struct iscsi_param *param;
+
+ list_for_each_entry(param, ¶m_list->param_list, p_list) {
+ if (!strcmp(param->name, MAXCONNECTIONS))
+ param->state &= ~PSTATE_NEGOTIATE;
+ else if (!strcmp(param->name, INITIALR2T))
+ param->state &= ~PSTATE_NEGOTIATE;
+ else if (!strcmp(param->name, IMMEDIATEDATA))
+ param->state &= ~PSTATE_NEGOTIATE;
+ else if (!strcmp(param->name, MAXBURSTLENGTH))
+ param->state &= ~PSTATE_NEGOTIATE;
+ else if (!strcmp(param->name, FIRSTBURSTLENGTH))
+ param->state &= ~PSTATE_NEGOTIATE;
+ else if (!strcmp(param->name, MAXOUTSTANDINGR2T))
+ param->state &= ~PSTATE_NEGOTIATE;
+ else if (!strcmp(param->name, DATAPDUINORDER))
+ param->state &= ~PSTATE_NEGOTIATE;
+ else if (!strcmp(param->name, DATASEQUENCEINORDER))
+ param->state &= ~PSTATE_NEGOTIATE;
+ else if (!strcmp(param->name, ERRORRECOVERYLEVEL))
+ param->state &= ~PSTATE_NEGOTIATE;
+ else if (!strcmp(param->name, DEFAULTTIME2WAIT))
+ param->state &= ~PSTATE_NEGOTIATE;
+ else if (!strcmp(param->name, DEFAULTTIME2RETAIN))
+ param->state &= ~PSTATE_NEGOTIATE;
+ else if (!strcmp(param->name, IFMARKER))
+ param->state &= ~PSTATE_NEGOTIATE;
+ else if (!strcmp(param->name, OFMARKER))
+ param->state &= ~PSTATE_NEGOTIATE;
+ else if (!strcmp(param->name, IFMARKINT))
+ param->state &= ~PSTATE_NEGOTIATE;
+ else if (!strcmp(param->name, OFMARKINT))
+ param->state &= ~PSTATE_NEGOTIATE;
+ }
+
+ return 0;
+}
+
+int iscsi_copy_param_list(
+ struct iscsi_param_list **dst_param_list,
+ struct iscsi_param_list *src_param_list,
+ int leading)
+{
+ struct iscsi_param *new_param = NULL, *param = NULL;
+ struct iscsi_param_list *param_list = NULL;
+
+ param_list = kzalloc(sizeof(struct iscsi_param_list), GFP_KERNEL);
+ if (!param_list) {
+ pr_err("Unable to allocate memory for"
+ " struct iscsi_param_list.\n");
+ goto err_out;
+ }
+ INIT_LIST_HEAD(¶m_list->param_list);
+ INIT_LIST_HEAD(¶m_list->extra_response_list);
+
+ list_for_each_entry(param, &src_param_list->param_list, p_list) {
+ if (!leading && (param->scope & SCOPE_SESSION_WIDE)) {
+ if ((strcmp(param->name, "TargetName") != 0) &&
+ (strcmp(param->name, "InitiatorName") != 0) &&
+ (strcmp(param->name, "TargetPortalGroupTag") != 0))
+ continue;
+ }
+
+ new_param = kzalloc(sizeof(struct iscsi_param), GFP_KERNEL);
+ if (!new_param) {
+ pr_err("Unable to allocate memory for"
+ " struct iscsi_param.\n");
+ goto err_out;
+ }
+
+ new_param->set_param = param->set_param;
+ new_param->phase = param->phase;
+ new_param->scope = param->scope;
+ new_param->sender = param->sender;
+ new_param->type = param->type;
+ new_param->use = param->use;
+ new_param->type_range = param->type_range;
+
+ new_param->name = kzalloc(strlen(param->name) + 1, GFP_KERNEL);
+ if (!new_param->name) {
+ pr_err("Unable to allocate memory for"
+ " parameter name.\n");
+ goto err_out;
+ }
+
+ new_param->value = kzalloc(strlen(param->value) + 1,
+ GFP_KERNEL);
+ if (!new_param->value) {
+ pr_err("Unable to allocate memory for"
+ " parameter value.\n");
+ goto err_out;
+ }
+
+ memcpy(new_param->name, param->name, strlen(param->name));
+ new_param->name[strlen(param->name)] = '\0';
+ memcpy(new_param->value, param->value, strlen(param->value));
+ new_param->value[strlen(param->value)] = '\0';
+
+ list_add_tail(&new_param->p_list, ¶m_list->param_list);
+ }
+
+ if (!list_empty(¶m_list->param_list))
+ *dst_param_list = param_list;
+ else {
+ pr_err("No parameters allocated.\n");
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+ iscsi_release_param_list(param_list);
+ return -1;
+}
+
+static void iscsi_release_extra_responses(struct iscsi_param_list *param_list)
+{
+ struct iscsi_extra_response *er, *er_tmp;
+
+ list_for_each_entry_safe(er, er_tmp, ¶m_list->extra_response_list,
+ er_list) {
+ list_del(&er->er_list);
+ kfree(er);
+ }
+}
+
+void iscsi_release_param_list(struct iscsi_param_list *param_list)
+{
+ struct iscsi_param *param, *param_tmp;
+
+ list_for_each_entry_safe(param, param_tmp, ¶m_list->param_list,
+ p_list) {
+ list_del(¶m->p_list);
+
+ kfree(param->name);
+ param->name = NULL;
+ kfree(param->value);
+ param->value = NULL;
+ kfree(param);
+ param = NULL;
+ }
+
+ iscsi_release_extra_responses(param_list);
+
+ kfree(param_list);
+}
+
+struct iscsi_param *iscsi_find_param_from_key(
+ char *key,
+ struct iscsi_param_list *param_list)
+{
+ struct iscsi_param *param;
+
+ if (!key || !param_list) {
+ pr_err("Key or parameter list pointer is NULL.\n");
+ return NULL;
+ }
+
+ list_for_each_entry(param, ¶m_list->param_list, p_list) {
+ if (!strcmp(key, param->name))
+ return param;
+ }
+
+ pr_err("Unable to locate key \"%s\".\n", key);
+ return NULL;
+}
+
+int iscsi_extract_key_value(char *textbuf, char **key, char **value)
+{
+ *value = strchr(textbuf, '=');
+ if (!*value) {
+ pr_err("Unable to locate \"=\" seperator for key,"
+ " ignoring request.\n");
+ return -1;
+ }
+
+ *key = textbuf;
+ **value = '\0';
+ *value = *value + 1;
+
+ return 0;
+}
+
+int iscsi_update_param_value(struct iscsi_param *param, char *value)
+{
+ kfree(param->value);
+
+ param->value = kzalloc(strlen(value) + 1, GFP_KERNEL);
+ if (!param->value) {
+ pr_err("Unable to allocate memory for value.\n");
+ return -1;
+ }
+
+ memcpy(param->value, value, strlen(value));
+ param->value[strlen(value)] = '\0';
+
+ pr_debug("iSCSI Parameter updated to %s=%s\n",
+ param->name, param->value);
+ return 0;
+}
+
+static int iscsi_add_notunderstood_response(
+ char *key,
+ char *value,
+ struct iscsi_param_list *param_list)
+{
+ struct iscsi_extra_response *extra_response;
+
+ if (strlen(value) > VALUE_MAXLEN) {
+ pr_err("Value for notunderstood key \"%s\" exceeds %d,"
+ " protocol error.\n", key, VALUE_MAXLEN);
+ return -1;
+ }
+
+ extra_response = kzalloc(sizeof(struct iscsi_extra_response), GFP_KERNEL);
+ if (!extra_response) {
+ pr_err("Unable to allocate memory for"
+ " struct iscsi_extra_response.\n");
+ return -1;
+ }
+ INIT_LIST_HEAD(&extra_response->er_list);
+
+ strncpy(extra_response->key, key, strlen(key) + 1);
+ strncpy(extra_response->value, NOTUNDERSTOOD,
+ strlen(NOTUNDERSTOOD) + 1);
+
+ list_add_tail(&extra_response->er_list,
+ ¶m_list->extra_response_list);
+ return 0;
+}
+
+static int iscsi_check_for_auth_key(char *key)
+{
+ /*
+ * RFC 1994
+ */
+ if (!strcmp(key, "CHAP_A") || !strcmp(key, "CHAP_I") ||
+ !strcmp(key, "CHAP_C") || !strcmp(key, "CHAP_N") ||
+ !strcmp(key, "CHAP_R"))
+ return 1;
+
+ /*
+ * RFC 2945
+ */
+ if (!strcmp(key, "SRP_U") || !strcmp(key, "SRP_N") ||
+ !strcmp(key, "SRP_g") || !strcmp(key, "SRP_s") ||
+ !strcmp(key, "SRP_A") || !strcmp(key, "SRP_B") ||
+ !strcmp(key, "SRP_M") || !strcmp(key, "SRP_HM"))
+ return 1;
+
+ return 0;
+}
+
+static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param)
+{
+ if (IS_TYPE_BOOL_AND(param)) {
+ if (!strcmp(param->value, NO))
+ SET_PSTATE_REPLY_OPTIONAL(param);
+ } else if (IS_TYPE_BOOL_OR(param)) {
+ if (!strcmp(param->value, YES))
+ SET_PSTATE_REPLY_OPTIONAL(param);
+ /*
+ * Required for gPXE iSCSI boot client
+ */
+ if (!strcmp(param->name, IMMEDIATEDATA))
+ SET_PSTATE_REPLY_OPTIONAL(param);
+ } else if (IS_TYPE_NUMBER(param)) {
+ if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH))
+ SET_PSTATE_REPLY_OPTIONAL(param);
+ /*
+ * The GlobalSAN iSCSI Initiator for MacOSX does
+ * not respond to MaxBurstLength, FirstBurstLength,
+ * DefaultTime2Wait or DefaultTime2Retain parameter keys.
+ * So, we set them to 'reply optional' here, and assume the
+ * the defaults from iscsi_parameters.h if the initiator
+ * is not RFC compliant and the keys are not negotiated.
+ */
+ if (!strcmp(param->name, MAXBURSTLENGTH))
+ SET_PSTATE_REPLY_OPTIONAL(param);
+ if (!strcmp(param->name, FIRSTBURSTLENGTH))
+ SET_PSTATE_REPLY_OPTIONAL(param);
+ if (!strcmp(param->name, DEFAULTTIME2WAIT))
+ SET_PSTATE_REPLY_OPTIONAL(param);
+ if (!strcmp(param->name, DEFAULTTIME2RETAIN))
+ SET_PSTATE_REPLY_OPTIONAL(param);
+ /*
+ * Required for gPXE iSCSI boot client
+ */
+ if (!strcmp(param->name, MAXCONNECTIONS))
+ SET_PSTATE_REPLY_OPTIONAL(param);
+ } else if (IS_PHASE_DECLARATIVE(param))
+ SET_PSTATE_REPLY_OPTIONAL(param);
+}
+
+static int iscsi_check_boolean_value(struct iscsi_param *param, char *value)
+{
+ if (strcmp(value, YES) && strcmp(value, NO)) {
+ pr_err("Illegal value for \"%s\", must be either"
+ " \"%s\" or \"%s\".\n", param->name, YES, NO);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int iscsi_check_numerical_value(struct iscsi_param *param, char *value_ptr)
+{
+ char *tmpptr;
+ int value = 0;
+
+ value = simple_strtoul(value_ptr, &tmpptr, 0);
+
+/* #warning FIXME: Fix this */
+#if 0
+ if (strspn(endptr, WHITE_SPACE) != strlen(endptr)) {
+ pr_err("Illegal value \"%s\" for \"%s\".\n",
+ value, param->name);
+ return -1;
+ }
+#endif
+ if (IS_TYPERANGE_0_TO_2(param)) {
+ if ((value < 0) || (value > 2)) {
+ pr_err("Illegal value for \"%s\", must be"
+ " between 0 and 2.\n", param->name);
+ return -1;
+ }
+ return 0;
+ }
+ if (IS_TYPERANGE_0_TO_3600(param)) {
+ if ((value < 0) || (value > 3600)) {
+ pr_err("Illegal value for \"%s\", must be"
+ " between 0 and 3600.\n", param->name);
+ return -1;
+ }
+ return 0;
+ }
+ if (IS_TYPERANGE_0_TO_32767(param)) {
+ if ((value < 0) || (value > 32767)) {
+ pr_err("Illegal value for \"%s\", must be"
+ " between 0 and 32767.\n", param->name);
+ return -1;
+ }
+ return 0;
+ }
+ if (IS_TYPERANGE_0_TO_65535(param)) {
+ if ((value < 0) || (value > 65535)) {
+ pr_err("Illegal value for \"%s\", must be"
+ " between 0 and 65535.\n", param->name);
+ return -1;
+ }
+ return 0;
+ }
+ if (IS_TYPERANGE_1_TO_65535(param)) {
+ if ((value < 1) || (value > 65535)) {
+ pr_err("Illegal value for \"%s\", must be"
+ " between 1 and 65535.\n", param->name);
+ return -1;
+ }
+ return 0;
+ }
+ if (IS_TYPERANGE_2_TO_3600(param)) {
+ if ((value < 2) || (value > 3600)) {
+ pr_err("Illegal value for \"%s\", must be"
+ " between 2 and 3600.\n", param->name);
+ return -1;
+ }
+ return 0;
+ }
+ if (IS_TYPERANGE_512_TO_16777215(param)) {
+ if ((value < 512) || (value > 16777215)) {
+ pr_err("Illegal value for \"%s\", must be"
+ " between 512 and 16777215.\n", param->name);
+ return -1;
+ }
+ return 0;
+ }
+
+ return 0;
+}
+
+static int iscsi_check_numerical_range_value(struct iscsi_param *param, char *value)
+{
+ char *left_val_ptr = NULL, *right_val_ptr = NULL;
+ char *tilde_ptr = NULL, *tmp_ptr = NULL;
+ u32 left_val, right_val, local_left_val, local_right_val;
+
+ if (strcmp(param->name, IFMARKINT) &&
+ strcmp(param->name, OFMARKINT)) {
+ pr_err("Only parameters \"%s\" or \"%s\" may contain a"
+ " numerical range value.\n", IFMARKINT, OFMARKINT);
+ return -1;
+ }
+
+ if (IS_PSTATE_PROPOSER(param))
+ return 0;
+
+ tilde_ptr = strchr(value, '~');
+ if (!tilde_ptr) {
+ pr_err("Unable to locate numerical range indicator"
+ " \"~\" for \"%s\".\n", param->name);
+ return -1;
+ }
+ *tilde_ptr = '\0';
+
+ left_val_ptr = value;
+ right_val_ptr = value + strlen(left_val_ptr) + 1;
+
+ if (iscsi_check_numerical_value(param, left_val_ptr) < 0)
+ return -1;
+ if (iscsi_check_numerical_value(param, right_val_ptr) < 0)
+ return -1;
+
+ left_val = simple_strtoul(left_val_ptr, &tmp_ptr, 0);
+ right_val = simple_strtoul(right_val_ptr, &tmp_ptr, 0);
+ *tilde_ptr = '~';
+
+ if (right_val < left_val) {
+ pr_err("Numerical range for parameter \"%s\" contains"
+ " a right value which is less than the left.\n",
+ param->name);
+ return -1;
+ }
+
+ /*
+ * For now, enforce reasonable defaults for [I,O]FMarkInt.
+ */
+ tilde_ptr = strchr(param->value, '~');
+ if (!tilde_ptr) {
+ pr_err("Unable to locate numerical range indicator"
+ " \"~\" for \"%s\".\n", param->name);
+ return -1;
+ }
+ *tilde_ptr = '\0';
+
+ left_val_ptr = param->value;
+ right_val_ptr = param->value + strlen(left_val_ptr) + 1;
+
+ local_left_val = simple_strtoul(left_val_ptr, &tmp_ptr, 0);
+ local_right_val = simple_strtoul(right_val_ptr, &tmp_ptr, 0);
+ *tilde_ptr = '~';
+
+ if (param->set_param) {
+ if ((left_val < local_left_val) ||
+ (right_val < local_left_val)) {
+ pr_err("Passed value range \"%u~%u\" is below"
+ " minimum left value \"%u\" for key \"%s\","
+ " rejecting.\n", left_val, right_val,
+ local_left_val, param->name);
+ return -1;
+ }
+ } else {
+ if ((left_val < local_left_val) &&
+ (right_val < local_left_val)) {
+ pr_err("Received value range \"%u~%u\" is"
+ " below minimum left value \"%u\" for key"
+ " \"%s\", rejecting.\n", left_val, right_val,
+ local_left_val, param->name);
+ SET_PSTATE_REJECT(param);
+ if (iscsi_update_param_value(param, REJECT) < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int iscsi_check_string_or_list_value(struct iscsi_param *param, char *value)
+{
+ if (IS_PSTATE_PROPOSER(param))
+ return 0;
+
+ if (IS_TYPERANGE_AUTH_PARAM(param)) {
+ if (strcmp(value, KRB5) && strcmp(value, SPKM1) &&
+ strcmp(value, SPKM2) && strcmp(value, SRP) &&
+ strcmp(value, CHAP) && strcmp(value, NONE)) {
+ pr_err("Illegal value for \"%s\", must be"
+ " \"%s\", \"%s\", \"%s\", \"%s\", \"%s\""
+ " or \"%s\".\n", param->name, KRB5,
+ SPKM1, SPKM2, SRP, CHAP, NONE);
+ return -1;
+ }
+ }
+ if (IS_TYPERANGE_DIGEST_PARAM(param)) {
+ if (strcmp(value, CRC32C) && strcmp(value, NONE)) {
+ pr_err("Illegal value for \"%s\", must be"
+ " \"%s\" or \"%s\".\n", param->name,
+ CRC32C, NONE);
+ return -1;
+ }
+ }
+ if (IS_TYPERANGE_SESSIONTYPE(param)) {
+ if (strcmp(value, DISCOVERY) && strcmp(value, NORMAL)) {
+ pr_err("Illegal value for \"%s\", must be"
+ " \"%s\" or \"%s\".\n", param->name,
+ DISCOVERY, NORMAL);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * This function is used to pick a value range number, currently just
+ * returns the lesser of both right values.
+ */
+static char *iscsi_get_value_from_number_range(
+ struct iscsi_param *param,
+ char *value)
+{
+ char *end_ptr, *tilde_ptr1 = NULL, *tilde_ptr2 = NULL;
+ u32 acceptor_right_value, proposer_right_value;
+
+ tilde_ptr1 = strchr(value, '~');
+ if (!tilde_ptr1)
+ return NULL;
+ *tilde_ptr1++ = '\0';
+ proposer_right_value = simple_strtoul(tilde_ptr1, &end_ptr, 0);
+
+ tilde_ptr2 = strchr(param->value, '~');
+ if (!tilde_ptr2)
+ return NULL;
+ *tilde_ptr2++ = '\0';
+ acceptor_right_value = simple_strtoul(tilde_ptr2, &end_ptr, 0);
+
+ return (acceptor_right_value >= proposer_right_value) ?
+ tilde_ptr1 : tilde_ptr2;
+}
+
+static char *iscsi_check_valuelist_for_support(
+ struct iscsi_param *param,
+ char *value)
+{
+ char *tmp1 = NULL, *tmp2 = NULL;
+ char *acceptor_values = NULL, *proposer_values = NULL;
+
+ acceptor_values = param->value;
+ proposer_values = value;
+
+ do {
+ if (!proposer_values)
+ return NULL;
+ tmp1 = strchr(proposer_values, ',');
+ if (tmp1)
+ *tmp1 = '\0';
+ acceptor_values = param->value;
+ do {
+ if (!acceptor_values) {
+ if (tmp1)
+ *tmp1 = ',';
+ return NULL;
+ }
+ tmp2 = strchr(acceptor_values, ',');
+ if (tmp2)
+ *tmp2 = '\0';
+ if (!acceptor_values || !proposer_values) {
+ if (tmp1)
+ *tmp1 = ',';
+ if (tmp2)
+ *tmp2 = ',';
+ return NULL;
+ }
+ if (!strcmp(acceptor_values, proposer_values)) {
+ if (tmp2)
+ *tmp2 = ',';
+ goto out;
+ }
+ if (tmp2)
+ *tmp2++ = ',';
+
+ acceptor_values = tmp2;
+ if (!acceptor_values)
+ break;
+ } while (acceptor_values);
+ if (tmp1)
+ *tmp1++ = ',';
+ proposer_values = tmp1;
+ } while (proposer_values);
+
+out:
+ return proposer_values;
+}
+
+static int iscsi_check_acceptor_state(struct iscsi_param *param, char *value)
+{
+ u8 acceptor_boolean_value = 0, proposer_boolean_value = 0;
+ char *negoitated_value = NULL;
+
+ if (IS_PSTATE_ACCEPTOR(param)) {
+ pr_err("Received key \"%s\" twice, protocol error.\n",
+ param->name);
+ return -1;
+ }
+
+ if (IS_PSTATE_REJECT(param))
+ return 0;
+
+ if (IS_TYPE_BOOL_AND(param)) {
+ if (!strcmp(value, YES))
+ proposer_boolean_value = 1;
+ if (!strcmp(param->value, YES))
+ acceptor_boolean_value = 1;
+ if (acceptor_boolean_value && proposer_boolean_value)
+ do {} while (0);
+ else {
+ if (iscsi_update_param_value(param, NO) < 0)
+ return -1;
+ if (!proposer_boolean_value)
+ SET_PSTATE_REPLY_OPTIONAL(param);
+ }
+ } else if (IS_TYPE_BOOL_OR(param)) {
+ if (!strcmp(value, YES))
+ proposer_boolean_value = 1;
+ if (!strcmp(param->value, YES))
+ acceptor_boolean_value = 1;
+ if (acceptor_boolean_value || proposer_boolean_value) {
+ if (iscsi_update_param_value(param, YES) < 0)
+ return -1;
+ if (proposer_boolean_value)
+ SET_PSTATE_REPLY_OPTIONAL(param);
+ }
+ } else if (IS_TYPE_NUMBER(param)) {
+ char *tmpptr, buf[10];
+ u32 acceptor_value = simple_strtoul(param->value, &tmpptr, 0);
+ u32 proposer_value = simple_strtoul(value, &tmpptr, 0);
+
+ memset(buf, 0, 10);
+
+ if (!strcmp(param->name, MAXCONNECTIONS) ||
+ !strcmp(param->name, MAXBURSTLENGTH) ||
+ !strcmp(param->name, FIRSTBURSTLENGTH) ||
+ !strcmp(param->name, MAXOUTSTANDINGR2T) ||
+ !strcmp(param->name, DEFAULTTIME2RETAIN) ||
+ !strcmp(param->name, ERRORRECOVERYLEVEL)) {
+ if (proposer_value > acceptor_value) {
+ sprintf(buf, "%u", acceptor_value);
+ if (iscsi_update_param_value(param,
+ &buf[0]) < 0)
+ return -1;
+ } else {
+ if (iscsi_update_param_value(param, value) < 0)
+ return -1;
+ }
+ } else if (!strcmp(param->name, DEFAULTTIME2WAIT)) {
+ if (acceptor_value > proposer_value) {
+ sprintf(buf, "%u", acceptor_value);
+ if (iscsi_update_param_value(param,
+ &buf[0]) < 0)
+ return -1;
+ } else {
+ if (iscsi_update_param_value(param, value) < 0)
+ return -1;
+ }
+ } else {
+ if (iscsi_update_param_value(param, value) < 0)
+ return -1;
+ }
+
+ if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH))
+ SET_PSTATE_REPLY_OPTIONAL(param);
+ } else if (IS_TYPE_NUMBER_RANGE(param)) {
+ negoitated_value = iscsi_get_value_from_number_range(
+ param, value);
+ if (!negoitated_value)
+ return -1;
+ if (iscsi_update_param_value(param, negoitated_value) < 0)
+ return -1;
+ } else if (IS_TYPE_VALUE_LIST(param)) {
+ negoitated_value = iscsi_check_valuelist_for_support(
+ param, value);
+ if (!negoitated_value) {
+ pr_err("Proposer's value list \"%s\" contains"
+ " no valid values from Acceptor's value list"
+ " \"%s\".\n", value, param->value);
+ return -1;
+ }
+ if (iscsi_update_param_value(param, negoitated_value) < 0)
+ return -1;
+ } else if (IS_PHASE_DECLARATIVE(param)) {
+ if (iscsi_update_param_value(param, value) < 0)
+ return -1;
+ SET_PSTATE_REPLY_OPTIONAL(param);
+ }
+
+ return 0;
+}
+
+static int iscsi_check_proposer_state(struct iscsi_param *param, char *value)
+{
+ if (IS_PSTATE_RESPONSE_GOT(param)) {
+ pr_err("Received key \"%s\" twice, protocol error.\n",
+ param->name);
+ return -1;
+ }
+
+ if (IS_TYPE_NUMBER_RANGE(param)) {
+ u32 left_val = 0, right_val = 0, recieved_value = 0;
+ char *left_val_ptr = NULL, *right_val_ptr = NULL;
+ char *tilde_ptr = NULL, *tmp_ptr = NULL;
+
+ if (!strcmp(value, IRRELEVANT) || !strcmp(value, REJECT)) {
+ if (iscsi_update_param_value(param, value) < 0)
+ return -1;
+ return 0;
+ }
+
+ tilde_ptr = strchr(value, '~');
+ if (tilde_ptr) {
+ pr_err("Illegal \"~\" in response for \"%s\".\n",
+ param->name);
+ return -1;
+ }
+ tilde_ptr = strchr(param->value, '~');
+ if (!tilde_ptr) {
+ pr_err("Unable to locate numerical range"
+ " indicator \"~\" for \"%s\".\n", param->name);
+ return -1;
+ }
+ *tilde_ptr = '\0';
+
+ left_val_ptr = param->value;
+ right_val_ptr = param->value + strlen(left_val_ptr) + 1;
+ left_val = simple_strtoul(left_val_ptr, &tmp_ptr, 0);
+ right_val = simple_strtoul(right_val_ptr, &tmp_ptr, 0);
+ recieved_value = simple_strtoul(value, &tmp_ptr, 0);
+
+ *tilde_ptr = '~';
+
+ if ((recieved_value < left_val) ||
+ (recieved_value > right_val)) {
+ pr_err("Illegal response \"%s=%u\", value must"
+ " be between %u and %u.\n", param->name,
+ recieved_value, left_val, right_val);
+ return -1;
+ }
+ } else if (IS_TYPE_VALUE_LIST(param)) {
+ char *comma_ptr = NULL, *tmp_ptr = NULL;
+
+ comma_ptr = strchr(value, ',');
+ if (comma_ptr) {
+ pr_err("Illegal \",\" in response for \"%s\".\n",
+ param->name);
+ return -1;
+ }
+
+ tmp_ptr = iscsi_check_valuelist_for_support(param, value);
+ if (!tmp_ptr)
+ return -1;
+ }
+
+ if (iscsi_update_param_value(param, value) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int iscsi_check_value(struct iscsi_param *param, char *value)
+{
+ char *comma_ptr = NULL;
+
+ if (!strcmp(value, REJECT)) {
+ if (!strcmp(param->name, IFMARKINT) ||
+ !strcmp(param->name, OFMARKINT)) {
+ /*
+ * Reject is not fatal for [I,O]FMarkInt, and causes
+ * [I,O]FMarker to be reset to No. (See iSCSI v20 A.3.2)
+ */
+ SET_PSTATE_REJECT(param);
+ return 0;
+ }
+ pr_err("Received %s=%s\n", param->name, value);
+ return -1;
+ }
+ if (!strcmp(value, IRRELEVANT)) {
+ pr_debug("Received %s=%s\n", param->name, value);
+ SET_PSTATE_IRRELEVANT(param);
+ return 0;
+ }
+ if (!strcmp(value, NOTUNDERSTOOD)) {
+ if (!IS_PSTATE_PROPOSER(param)) {
+ pr_err("Received illegal offer %s=%s\n",
+ param->name, value);
+ return -1;
+ }
+
+/* #warning FIXME: Add check for X-ExtensionKey here */
+ pr_err("Standard iSCSI key \"%s\" cannot be answered"
+ " with \"%s\", protocol error.\n", param->name, value);
+ return -1;
+ }
+
+ do {
+ comma_ptr = NULL;
+ comma_ptr = strchr(value, ',');
+
+ if (comma_ptr && !IS_TYPE_VALUE_LIST(param)) {
+ pr_err("Detected value seperator \",\", but"
+ " key \"%s\" does not allow a value list,"
+ " protocol error.\n", param->name);
+ return -1;
+ }
+ if (comma_ptr)
+ *comma_ptr = '\0';
+
+ if (strlen(value) > VALUE_MAXLEN) {
+ pr_err("Value for key \"%s\" exceeds %d,"
+ " protocol error.\n", param->name,
+ VALUE_MAXLEN);
+ return -1;
+ }
+
+ if (IS_TYPE_BOOL_AND(param) || IS_TYPE_BOOL_OR(param)) {
+ if (iscsi_check_boolean_value(param, value) < 0)
+ return -1;
+ } else if (IS_TYPE_NUMBER(param)) {
+ if (iscsi_check_numerical_value(param, value) < 0)
+ return -1;
+ } else if (IS_TYPE_NUMBER_RANGE(param)) {
+ if (iscsi_check_numerical_range_value(param, value) < 0)
+ return -1;
+ } else if (IS_TYPE_STRING(param) || IS_TYPE_VALUE_LIST(param)) {
+ if (iscsi_check_string_or_list_value(param, value) < 0)
+ return -1;
+ } else {
+ pr_err("Huh? 0x%02x\n", param->type);
+ return -1;
+ }
+
+ if (comma_ptr)
+ *comma_ptr++ = ',';
+
+ value = comma_ptr;
+ } while (value);
+
+ return 0;
+}
+
+static struct iscsi_param *__iscsi_check_key(
+ char *key,
+ int sender,
+ struct iscsi_param_list *param_list)
+{
+ struct iscsi_param *param;
+
+ if (strlen(key) > KEY_MAXLEN) {
+ pr_err("Length of key name \"%s\" exceeds %d.\n",
+ key, KEY_MAXLEN);
+ return NULL;
+ }
+
+ param = iscsi_find_param_from_key(key, param_list);
+ if (!param)
+ return NULL;
+
+ if ((sender & SENDER_INITIATOR) && !IS_SENDER_INITIATOR(param)) {
+ pr_err("Key \"%s\" may not be sent to %s,"
+ " protocol error.\n", param->name,
+ (sender & SENDER_RECEIVER) ? "target" : "initiator");
+ return NULL;
+ }
+
+ if ((sender & SENDER_TARGET) && !IS_SENDER_TARGET(param)) {
+ pr_err("Key \"%s\" may not be sent to %s,"
+ " protocol error.\n", param->name,
+ (sender & SENDER_RECEIVER) ? "initiator" : "target");
+ return NULL;
+ }
+
+ return param;
+}
+
+static struct iscsi_param *iscsi_check_key(
+ char *key,
+ int phase,
+ int sender,
+ struct iscsi_param_list *param_list)
+{
+ struct iscsi_param *param;
+ /*
+ * Key name length must not exceed 63 bytes. (See iSCSI v20 5.1)
+ */
+ if (strlen(key) > KEY_MAXLEN) {
+ pr_err("Length of key name \"%s\" exceeds %d.\n",
+ key, KEY_MAXLEN);
+ return NULL;
+ }
+
+ param = iscsi_find_param_from_key(key, param_list);
+ if (!param)
+ return NULL;
+
+ if ((sender & SENDER_INITIATOR) && !IS_SENDER_INITIATOR(param)) {
+ pr_err("Key \"%s\" may not be sent to %s,"
+ " protocol error.\n", param->name,
+ (sender & SENDER_RECEIVER) ? "target" : "initiator");
+ return NULL;
+ }
+ if ((sender & SENDER_TARGET) && !IS_SENDER_TARGET(param)) {
+ pr_err("Key \"%s\" may not be sent to %s,"
+ " protocol error.\n", param->name,
+ (sender & SENDER_RECEIVER) ? "initiator" : "target");
+ return NULL;
+ }
+
+ if (IS_PSTATE_ACCEPTOR(param)) {
+ pr_err("Key \"%s\" received twice, protocol error.\n",
+ key);
+ return NULL;
+ }
+
+ if (!phase)
+ return param;
+
+ if (!(param->phase & phase)) {
+ pr_err("Key \"%s\" may not be negotiated during ",
+ param->name);
+ switch (phase) {
+ case PHASE_SECURITY:
+ pr_debug("Security phase.\n");
+ break;
+ case PHASE_OPERATIONAL:
+ pr_debug("Operational phase.\n");
+ default:
+ pr_debug("Unknown phase.\n");
+ }
+ return NULL;
+ }
+
+ return param;
+}
+
+static int iscsi_enforce_integrity_rules(
+ u8 phase,
+ struct iscsi_param_list *param_list)
+{
+ char *tmpptr;
+ u8 DataSequenceInOrder = 0;
+ u8 ErrorRecoveryLevel = 0, SessionType = 0;
+ u8 IFMarker = 0, OFMarker = 0;
+ u8 IFMarkInt_Reject = 0, OFMarkInt_Reject = 0;
+ u32 FirstBurstLength = 0, MaxBurstLength = 0;
+ struct iscsi_param *param = NULL;
+
+ list_for_each_entry(param, ¶m_list->param_list, p_list) {
+ if (!(param->phase & phase))
+ continue;
+ if (!strcmp(param->name, SESSIONTYPE))
+ if (!strcmp(param->value, NORMAL))
+ SessionType = 1;
+ if (!strcmp(param->name, ERRORRECOVERYLEVEL))
+ ErrorRecoveryLevel = simple_strtoul(param->value,
+ &tmpptr, 0);
+ if (!strcmp(param->name, DATASEQUENCEINORDER))
+ if (!strcmp(param->value, YES))
+ DataSequenceInOrder = 1;
+ if (!strcmp(param->name, MAXBURSTLENGTH))
+ MaxBurstLength = simple_strtoul(param->value,
+ &tmpptr, 0);
+ if (!strcmp(param->name, IFMARKER))
+ if (!strcmp(param->value, YES))
+ IFMarker = 1;
+ if (!strcmp(param->name, OFMARKER))
+ if (!strcmp(param->value, YES))
+ OFMarker = 1;
+ if (!strcmp(param->name, IFMARKINT))
+ if (!strcmp(param->value, REJECT))
+ IFMarkInt_Reject = 1;
+ if (!strcmp(param->name, OFMARKINT))
+ if (!strcmp(param->value, REJECT))
+ OFMarkInt_Reject = 1;
+ }
+
+ list_for_each_entry(param, ¶m_list->param_list, p_list) {
+ if (!(param->phase & phase))
+ continue;
+ if (!SessionType && (!IS_PSTATE_ACCEPTOR(param) &&
+ (strcmp(param->name, IFMARKER) &&
+ strcmp(param->name, OFMARKER) &&
+ strcmp(param->name, IFMARKINT) &&
+ strcmp(param->name, OFMARKINT))))
+ continue;
+ if (!strcmp(param->name, MAXOUTSTANDINGR2T) &&
+ DataSequenceInOrder && (ErrorRecoveryLevel > 0)) {
+ if (strcmp(param->value, "1")) {
+ if (iscsi_update_param_value(param, "1") < 0)
+ return -1;
+ pr_debug("Reset \"%s\" to \"%s\".\n",
+ param->name, param->value);
+ }
+ }
+ if (!strcmp(param->name, MAXCONNECTIONS) && !SessionType) {
+ if (strcmp(param->value, "1")) {
+ if (iscsi_update_param_value(param, "1") < 0)
+ return -1;
+ pr_debug("Reset \"%s\" to \"%s\".\n",
+ param->name, param->value);
+ }
+ }
+ if (!strcmp(param->name, FIRSTBURSTLENGTH)) {
+ FirstBurstLength = simple_strtoul(param->value,
+ &tmpptr, 0);
+ if (FirstBurstLength > MaxBurstLength) {
+ char tmpbuf[10];
+ memset(tmpbuf, 0, 10);
+ sprintf(tmpbuf, "%u", MaxBurstLength);
+ if (iscsi_update_param_value(param, tmpbuf))
+ return -1;
+ pr_debug("Reset \"%s\" to \"%s\".\n",
+ param->name, param->value);
+ }
+ }
+ if (!strcmp(param->name, IFMARKER) && IFMarkInt_Reject) {
+ if (iscsi_update_param_value(param, NO) < 0)
+ return -1;
+ IFMarker = 0;
+ pr_debug("Reset \"%s\" to \"%s\".\n",
+ param->name, param->value);
+ }
+ if (!strcmp(param->name, OFMARKER) && OFMarkInt_Reject) {
+ if (iscsi_update_param_value(param, NO) < 0)
+ return -1;
+ OFMarker = 0;
+ pr_debug("Reset \"%s\" to \"%s\".\n",
+ param->name, param->value);
+ }
+ if (!strcmp(param->name, IFMARKINT) && !IFMarker) {
+ if (!strcmp(param->value, REJECT))
+ continue;
+ param->state &= ~PSTATE_NEGOTIATE;
+ if (iscsi_update_param_value(param, IRRELEVANT) < 0)
+ return -1;
+ pr_debug("Reset \"%s\" to \"%s\".\n",
+ param->name, param->value);
+ }
+ if (!strcmp(param->name, OFMARKINT) && !OFMarker) {
+ if (!strcmp(param->value, REJECT))
+ continue;
+ param->state &= ~PSTATE_NEGOTIATE;
+ if (iscsi_update_param_value(param, IRRELEVANT) < 0)
+ return -1;
+ pr_debug("Reset \"%s\" to \"%s\".\n",
+ param->name, param->value);
+ }
+ }
+
+ return 0;
+}
+
+int iscsi_decode_text_input(
+ u8 phase,
+ u8 sender,
+ char *textbuf,
+ u32 length,
+ struct iscsi_param_list *param_list)
+{
+ char *tmpbuf, *start = NULL, *end = NULL;
+
+ tmpbuf = kzalloc(length + 1, GFP_KERNEL);
+ if (!tmpbuf) {
+ pr_err("Unable to allocate memory for tmpbuf.\n");
+ return -1;
+ }
+
+ memcpy(tmpbuf, textbuf, length);
+ tmpbuf[length] = '\0';
+ start = tmpbuf;
+ end = (start + length);
+
+ while (start < end) {
+ char *key, *value;
+ struct iscsi_param *param;
+
+ if (iscsi_extract_key_value(start, &key, &value) < 0) {
+ kfree(tmpbuf);
+ return -1;
+ }
+
+ pr_debug("Got key: %s=%s\n", key, value);
+
+ if (phase & PHASE_SECURITY) {
+ if (iscsi_check_for_auth_key(key) > 0) {
+ char *tmpptr = key + strlen(key);
+ *tmpptr = '=';
+ kfree(tmpbuf);
+ return 1;
+ }
+ }
+
+ param = iscsi_check_key(key, phase, sender, param_list);
+ if (!param) {
+ if (iscsi_add_notunderstood_response(key,
+ value, param_list) < 0) {
+ kfree(tmpbuf);
+ return -1;
+ }
+ start += strlen(key) + strlen(value) + 2;
+ continue;
+ }
+ if (iscsi_check_value(param, value) < 0) {
+ kfree(tmpbuf);
+ return -1;
+ }
+
+ start += strlen(key) + strlen(value) + 2;
+
+ if (IS_PSTATE_PROPOSER(param)) {
+ if (iscsi_check_proposer_state(param, value) < 0) {
+ kfree(tmpbuf);
+ return -1;
+ }
+ SET_PSTATE_RESPONSE_GOT(param);
+ } else {
+ if (iscsi_check_acceptor_state(param, value) < 0) {
+ kfree(tmpbuf);
+ return -1;
+ }
+ SET_PSTATE_ACCEPTOR(param);
+ }
+ }
+
+ kfree(tmpbuf);
+ return 0;
+}
+
+int iscsi_encode_text_output(
+ u8 phase,
+ u8 sender,
+ char *textbuf,
+ u32 *length,
+ struct iscsi_param_list *param_list)
+{
+ char *output_buf = NULL;
+ struct iscsi_extra_response *er;
+ struct iscsi_param *param;
+
+ output_buf = textbuf + *length;
+
+ if (iscsi_enforce_integrity_rules(phase, param_list) < 0)
+ return -1;
+
+ list_for_each_entry(param, ¶m_list->param_list, p_list) {
+ if (!(param->sender & sender))
+ continue;
+ if (IS_PSTATE_ACCEPTOR(param) &&
+ !IS_PSTATE_RESPONSE_SENT(param) &&
+ !IS_PSTATE_REPLY_OPTIONAL(param) &&
+ (param->phase & phase)) {
+ *length += sprintf(output_buf, "%s=%s",
+ param->name, param->value);
+ *length += 1;
+ output_buf = textbuf + *length;
+ SET_PSTATE_RESPONSE_SENT(param);
+ pr_debug("Sending key: %s=%s\n",
+ param->name, param->value);
+ continue;
+ }
+ if (IS_PSTATE_NEGOTIATE(param) &&
+ !IS_PSTATE_ACCEPTOR(param) &&
+ !IS_PSTATE_PROPOSER(param) &&
+ (param->phase & phase)) {
+ *length += sprintf(output_buf, "%s=%s",
+ param->name, param->value);
+ *length += 1;
+ output_buf = textbuf + *length;
+ SET_PSTATE_PROPOSER(param);
+ iscsi_check_proposer_for_optional_reply(param);
+ pr_debug("Sending key: %s=%s\n",
+ param->name, param->value);
+ }
+ }
+
+ list_for_each_entry(er, ¶m_list->extra_response_list, er_list) {
+ *length += sprintf(output_buf, "%s=%s", er->key, er->value);
+ *length += 1;
+ output_buf = textbuf + *length;
+ pr_debug("Sending key: %s=%s\n", er->key, er->value);
+ }
+ iscsi_release_extra_responses(param_list);
+
+ return 0;
+}
+
+int iscsi_check_negotiated_keys(struct iscsi_param_list *param_list)
+{
+ int ret = 0;
+ struct iscsi_param *param;
+
+ list_for_each_entry(param, ¶m_list->param_list, p_list) {
+ if (IS_PSTATE_NEGOTIATE(param) &&
+ IS_PSTATE_PROPOSER(param) &&
+ !IS_PSTATE_RESPONSE_GOT(param) &&
+ !IS_PSTATE_REPLY_OPTIONAL(param) &&
+ !IS_PHASE_DECLARATIVE(param)) {
+ pr_err("No response for proposed key \"%s\".\n",
+ param->name);
+ ret = -1;
+ }
+ }
+
+ return ret;
+}
+
+int iscsi_change_param_value(
+ char *keyvalue,
+ struct iscsi_param_list *param_list,
+ int check_key)
+{
+ char *key = NULL, *value = NULL;
+ struct iscsi_param *param;
+ int sender = 0;
+
+ if (iscsi_extract_key_value(keyvalue, &key, &value) < 0)
+ return -1;
+
+ if (!check_key) {
+ param = __iscsi_check_key(keyvalue, sender, param_list);
+ if (!param)
+ return -1;
+ } else {
+ param = iscsi_check_key(keyvalue, 0, sender, param_list);
+ if (!param)
+ return -1;
+
+ param->set_param = 1;
+ if (iscsi_check_value(param, value) < 0) {
+ param->set_param = 0;
+ return -1;
+ }
+ param->set_param = 0;
+ }
+
+ if (iscsi_update_param_value(param, value) < 0)
+ return -1;
+
+ return 0;
+}
+
+void iscsi_set_connection_parameters(
+ struct iscsi_conn_ops *ops,
+ struct iscsi_param_list *param_list)
+{
+ char *tmpptr;
+ struct iscsi_param *param;
+
+ pr_debug("---------------------------------------------------"
+ "---------------\n");
+ list_for_each_entry(param, ¶m_list->param_list, p_list) {
+ if (!IS_PSTATE_ACCEPTOR(param) && !IS_PSTATE_PROPOSER(param))
+ continue;
+ if (!strcmp(param->name, AUTHMETHOD)) {
+ pr_debug("AuthMethod: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, HEADERDIGEST)) {
+ ops->HeaderDigest = !strcmp(param->value, CRC32C);
+ pr_debug("HeaderDigest: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, DATADIGEST)) {
+ ops->DataDigest = !strcmp(param->value, CRC32C);
+ pr_debug("DataDigest: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) {
+ ops->MaxRecvDataSegmentLength =
+ simple_strtoul(param->value, &tmpptr, 0);
+ pr_debug("MaxRecvDataSegmentLength: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, OFMARKER)) {
+ ops->OFMarker = !strcmp(param->value, YES);
+ pr_debug("OFMarker: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, IFMARKER)) {
+ ops->IFMarker = !strcmp(param->value, YES);
+ pr_debug("IFMarker: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, OFMARKINT)) {
+ ops->OFMarkInt =
+ simple_strtoul(param->value, &tmpptr, 0);
+ pr_debug("OFMarkInt: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, IFMARKINT)) {
+ ops->IFMarkInt =
+ simple_strtoul(param->value, &tmpptr, 0);
+ pr_debug("IFMarkInt: %s\n",
+ param->value);
+ }
+ }
+ pr_debug("----------------------------------------------------"
+ "--------------\n");
+}
+
+void iscsi_set_session_parameters(
+ struct iscsi_sess_ops *ops,
+ struct iscsi_param_list *param_list,
+ int leading)
+{
+ char *tmpptr;
+ struct iscsi_param *param;
+
+ pr_debug("----------------------------------------------------"
+ "--------------\n");
+ list_for_each_entry(param, ¶m_list->param_list, p_list) {
+ if (!IS_PSTATE_ACCEPTOR(param) && !IS_PSTATE_PROPOSER(param))
+ continue;
+ if (!strcmp(param->name, INITIATORNAME)) {
+ if (!param->value)
+ continue;
+ if (leading)
+ snprintf(ops->InitiatorName,
+ sizeof(ops->InitiatorName),
+ "%s", param->value);
+ pr_debug("InitiatorName: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, INITIATORALIAS)) {
+ if (!param->value)
+ continue;
+ snprintf(ops->InitiatorAlias,
+ sizeof(ops->InitiatorAlias),
+ "%s", param->value);
+ pr_debug("InitiatorAlias: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, TARGETNAME)) {
+ if (!param->value)
+ continue;
+ if (leading)
+ snprintf(ops->TargetName,
+ sizeof(ops->TargetName),
+ "%s", param->value);
+ pr_debug("TargetName: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, TARGETALIAS)) {
+ if (!param->value)
+ continue;
+ snprintf(ops->TargetAlias, sizeof(ops->TargetAlias),
+ "%s", param->value);
+ pr_debug("TargetAlias: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, TARGETPORTALGROUPTAG)) {
+ ops->TargetPortalGroupTag =
+ simple_strtoul(param->value, &tmpptr, 0);
+ pr_debug("TargetPortalGroupTag: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, MAXCONNECTIONS)) {
+ ops->MaxConnections =
+ simple_strtoul(param->value, &tmpptr, 0);
+ pr_debug("MaxConnections: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, INITIALR2T)) {
+ ops->InitialR2T = !strcmp(param->value, YES);
+ pr_debug("InitialR2T: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, IMMEDIATEDATA)) {
+ ops->ImmediateData = !strcmp(param->value, YES);
+ pr_debug("ImmediateData: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, MAXBURSTLENGTH)) {
+ ops->MaxBurstLength =
+ simple_strtoul(param->value, &tmpptr, 0);
+ pr_debug("MaxBurstLength: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, FIRSTBURSTLENGTH)) {
+ ops->FirstBurstLength =
+ simple_strtoul(param->value, &tmpptr, 0);
+ pr_debug("FirstBurstLength: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, DEFAULTTIME2WAIT)) {
+ ops->DefaultTime2Wait =
+ simple_strtoul(param->value, &tmpptr, 0);
+ pr_debug("DefaultTime2Wait: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, DEFAULTTIME2RETAIN)) {
+ ops->DefaultTime2Retain =
+ simple_strtoul(param->value, &tmpptr, 0);
+ pr_debug("DefaultTime2Retain: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, MAXOUTSTANDINGR2T)) {
+ ops->MaxOutstandingR2T =
+ simple_strtoul(param->value, &tmpptr, 0);
+ pr_debug("MaxOutstandingR2T: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, DATAPDUINORDER)) {
+ ops->DataPDUInOrder = !strcmp(param->value, YES);
+ pr_debug("DataPDUInOrder: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, DATASEQUENCEINORDER)) {
+ ops->DataSequenceInOrder = !strcmp(param->value, YES);
+ pr_debug("DataSequenceInOrder: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, ERRORRECOVERYLEVEL)) {
+ ops->ErrorRecoveryLevel =
+ simple_strtoul(param->value, &tmpptr, 0);
+ pr_debug("ErrorRecoveryLevel: %s\n",
+ param->value);
+ } else if (!strcmp(param->name, SESSIONTYPE)) {
+ ops->SessionType = !strcmp(param->value, DISCOVERY);
+ pr_debug("SessionType: %s\n",
+ param->value);
+ }
+ }
+ pr_debug("----------------------------------------------------"
+ "--------------\n");
+
+}
diff --git a/drivers/target/iscsi/iscsi_target_parameters.h b/drivers/target/iscsi/iscsi_target_parameters.h
new file mode 100644
index 0000000..6a37fd6
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_parameters.h
@@ -0,0 +1,269 @@
+#ifndef ISCSI_PARAMETERS_H
+#define ISCSI_PARAMETERS_H
+
+struct iscsi_extra_response {
+ char key[64];
+ char value[32];
+ struct list_head er_list;
+} ____cacheline_aligned;
+
+struct iscsi_param {
+ char *name;
+ char *value;
+ u8 set_param;
+ u8 phase;
+ u8 scope;
+ u8 sender;
+ u8 type;
+ u8 use;
+ u16 type_range;
+ u32 state;
+ struct list_head p_list;
+} ____cacheline_aligned;
+
+extern int iscsi_login_rx_data(struct iscsi_conn *, char *, int);
+extern int iscsi_login_tx_data(struct iscsi_conn *, char *, char *, int);
+extern void iscsi_dump_conn_ops(struct iscsi_conn_ops *);
+extern void iscsi_dump_sess_ops(struct iscsi_sess_ops *);
+extern void iscsi_print_params(struct iscsi_param_list *);
+extern int iscsi_create_default_params(struct iscsi_param_list **);
+extern int iscsi_set_keys_to_negotiate(int, struct iscsi_param_list *);
+extern int iscsi_set_keys_irrelevant_for_discovery(struct iscsi_param_list *);
+extern int iscsi_copy_param_list(struct iscsi_param_list **,
+ struct iscsi_param_list *, int);
+extern int iscsi_change_param_value(char *, struct iscsi_param_list *, int);
+extern void iscsi_release_param_list(struct iscsi_param_list *);
+extern struct iscsi_param *iscsi_find_param_from_key(char *, struct iscsi_param_list *);
+extern int iscsi_extract_key_value(char *, char **, char **);
+extern int iscsi_update_param_value(struct iscsi_param *, char *);
+extern int iscsi_decode_text_input(u8, u8, char *, u32, struct iscsi_param_list *);
+extern int iscsi_encode_text_output(u8, u8, char *, u32 *,
+ struct iscsi_param_list *);
+extern int iscsi_check_negotiated_keys(struct iscsi_param_list *);
+extern void iscsi_set_connection_parameters(struct iscsi_conn_ops *,
+ struct iscsi_param_list *);
+extern void iscsi_set_session_parameters(struct iscsi_sess_ops *,
+ struct iscsi_param_list *, int);
+
+#define YES "Yes"
+#define NO "No"
+#define ALL "All"
+#define IRRELEVANT "Irrelevant"
+#define NONE "None"
+#define NOTUNDERSTOOD "NotUnderstood"
+#define REJECT "Reject"
+
+/*
+ * The Parameter Names.
+ */
+#define AUTHMETHOD "AuthMethod"
+#define HEADERDIGEST "HeaderDigest"
+#define DATADIGEST "DataDigest"
+#define MAXCONNECTIONS "MaxConnections"
+#define SENDTARGETS "SendTargets"
+#define TARGETNAME "TargetName"
+#define INITIATORNAME "InitiatorName"
+#define TARGETALIAS "TargetAlias"
+#define INITIATORALIAS "InitiatorAlias"
+#define TARGETADDRESS "TargetAddress"
+#define TARGETPORTALGROUPTAG "TargetPortalGroupTag"
+#define INITIALR2T "InitialR2T"
+#define IMMEDIATEDATA "ImmediateData"
+#define MAXRECVDATASEGMENTLENGTH "MaxRecvDataSegmentLength"
+#define MAXBURSTLENGTH "MaxBurstLength"
+#define FIRSTBURSTLENGTH "FirstBurstLength"
+#define DEFAULTTIME2WAIT "DefaultTime2Wait"
+#define DEFAULTTIME2RETAIN "DefaultTime2Retain"
+#define MAXOUTSTANDINGR2T "MaxOutstandingR2T"
+#define DATAPDUINORDER "DataPDUInOrder"
+#define DATASEQUENCEINORDER "DataSequenceInOrder"
+#define ERRORRECOVERYLEVEL "ErrorRecoveryLevel"
+#define SESSIONTYPE "SessionType"
+#define IFMARKER "IFMarker"
+#define OFMARKER "OFMarker"
+#define IFMARKINT "IFMarkInt"
+#define OFMARKINT "OFMarkInt"
+#define X_EXTENSIONKEY "X-com.sbei.version"
+#define X_EXTENSIONKEY_CISCO_NEW "X-com.cisco.protocol"
+#define X_EXTENSIONKEY_CISCO_OLD "X-com.cisco.iscsi.draft"
+
+/*
+ * For AuthMethod.
+ */
+#define KRB5 "KRB5"
+#define SPKM1 "SPKM1"
+#define SPKM2 "SPKM2"
+#define SRP "SRP"
+#define CHAP "CHAP"
+
+/*
+ * Initial values for Parameter Negotiation.
+ */
+#define INITIAL_AUTHMETHOD CHAP
+#define INITIAL_HEADERDIGEST "CRC32C,None"
+#define INITIAL_DATADIGEST "CRC32C,None"
+#define INITIAL_MAXCONNECTIONS "1"
+#define INITIAL_SENDTARGETS ALL
+#define INITIAL_TARGETNAME "LIO.Target"
+#define INITIAL_INITIATORNAME "LIO.Initiator"
+#define INITIAL_TARGETALIAS "LIO Target"
+#define INITIAL_INITIATORALIAS "LIO Initiator"
+#define INITIAL_TARGETADDRESS "0.0.0.0:0000,0"
+#define INITIAL_TARGETPORTALGROUPTAG "1"
+#define INITIAL_INITIALR2T YES
+#define INITIAL_IMMEDIATEDATA YES
+#define INITIAL_MAXRECVDATASEGMENTLENGTH "8192"
+#define INITIAL_MAXBURSTLENGTH "262144"
+#define INITIAL_FIRSTBURSTLENGTH "65536"
+#define INITIAL_DEFAULTTIME2WAIT "2"
+#define INITIAL_DEFAULTTIME2RETAIN "20"
+#define INITIAL_MAXOUTSTANDINGR2T "1"
+#define INITIAL_DATAPDUINORDER YES
+#define INITIAL_DATASEQUENCEINORDER YES
+#define INITIAL_ERRORRECOVERYLEVEL "0"
+#define INITIAL_SESSIONTYPE NORMAL
+#define INITIAL_IFMARKER NO
+#define INITIAL_OFMARKER NO
+#define INITIAL_IFMARKINT "2048~65535"
+#define INITIAL_OFMARKINT "2048~65535"
+
+/*
+ * For [Header,Data]Digests.
+ */
+#define CRC32C "CRC32C"
+
+/*
+ * For SessionType.
+ */
+#define DISCOVERY "Discovery"
+#define NORMAL "Normal"
+
+/*
+ * struct iscsi_param->use
+ */
+#define USE_LEADING_ONLY 0x01
+#define USE_INITIAL_ONLY 0x02
+#define USE_ALL 0x04
+
+#define IS_USE_LEADING_ONLY(p) ((p)->use & USE_LEADING_ONLY)
+#define IS_USE_INITIAL_ONLY(p) ((p)->use & USE_INITIAL_ONLY)
+#define IS_USE_ALL(p) ((p)->use & USE_ALL)
+
+#define SET_USE_INITIAL_ONLY(p) ((p)->use |= USE_INITIAL_ONLY)
+
+/*
+ * struct iscsi_param->sender
+ */
+#define SENDER_INITIATOR 0x01
+#define SENDER_TARGET 0x02
+#define SENDER_BOTH 0x03
+/* Used in iscsi_check_key() */
+#define SENDER_RECEIVER 0x04
+
+#define IS_SENDER_INITIATOR(p) ((p)->sender & SENDER_INITIATOR)
+#define IS_SENDER_TARGET(p) ((p)->sender & SENDER_TARGET)
+#define IS_SENDER_BOTH(p) ((p)->sender & SENDER_BOTH)
+
+/*
+ * struct iscsi_param->scope
+ */
+#define SCOPE_CONNECTION_ONLY 0x01
+#define SCOPE_SESSION_WIDE 0x02
+
+#define IS_SCOPE_CONNECTION_ONLY(p) ((p)->scope & SCOPE_CONNECTION_ONLY)
+#define IS_SCOPE_SESSION_WIDE(p) ((p)->scope & SCOPE_SESSION_WIDE)
+
+/*
+ * struct iscsi_param->phase
+ */
+#define PHASE_SECURITY 0x01
+#define PHASE_OPERATIONAL 0x02
+#define PHASE_DECLARATIVE 0x04
+#define PHASE_FFP0 0x08
+
+#define IS_PHASE_SECURITY(p) ((p)->phase & PHASE_SECURITY)
+#define IS_PHASE_OPERATIONAL(p) ((p)->phase & PHASE_OPERATIONAL)
+#define IS_PHASE_DECLARATIVE(p) ((p)->phase & PHASE_DECLARATIVE)
+#define IS_PHASE_FFP0(p) ((p)->phase & PHASE_FFP0)
+
+/*
+ * struct iscsi_param->type
+ */
+#define TYPE_BOOL_AND 0x01
+#define TYPE_BOOL_OR 0x02
+#define TYPE_NUMBER 0x04
+#define TYPE_NUMBER_RANGE 0x08
+#define TYPE_STRING 0x10
+#define TYPE_VALUE_LIST 0x20
+
+#define IS_TYPE_BOOL_AND(p) ((p)->type & TYPE_BOOL_AND)
+#define IS_TYPE_BOOL_OR(p) ((p)->type & TYPE_BOOL_OR)
+#define IS_TYPE_NUMBER(p) ((p)->type & TYPE_NUMBER)
+#define IS_TYPE_NUMBER_RANGE(p) ((p)->type & TYPE_NUMBER_RANGE)
+#define IS_TYPE_STRING(p) ((p)->type & TYPE_STRING)
+#define IS_TYPE_VALUE_LIST(p) ((p)->type & TYPE_VALUE_LIST)
+
+/*
+ * struct iscsi_param->type_range
+ */
+#define TYPERANGE_BOOL_AND 0x0001
+#define TYPERANGE_BOOL_OR 0x0002
+#define TYPERANGE_0_TO_2 0x0004
+#define TYPERANGE_0_TO_3600 0x0008
+#define TYPERANGE_0_TO_32767 0x0010
+#define TYPERANGE_0_TO_65535 0x0020
+#define TYPERANGE_1_TO_65535 0x0040
+#define TYPERANGE_2_TO_3600 0x0080
+#define TYPERANGE_512_TO_16777215 0x0100
+#define TYPERANGE_AUTH 0x0200
+#define TYPERANGE_DIGEST 0x0400
+#define TYPERANGE_ISCSINAME 0x0800
+#define TYPERANGE_MARKINT 0x1000
+#define TYPERANGE_SESSIONTYPE 0x2000
+#define TYPERANGE_TARGETADDRESS 0x4000
+#define TYPERANGE_UTF8 0x8000
+
+#define IS_TYPERANGE_0_TO_2(p) ((p)->type_range & TYPERANGE_0_TO_2)
+#define IS_TYPERANGE_0_TO_3600(p) ((p)->type_range & TYPERANGE_0_TO_3600)
+#define IS_TYPERANGE_0_TO_32767(p) ((p)->type_range & TYPERANGE_0_TO_32767)
+#define IS_TYPERANGE_0_TO_65535(p) ((p)->type_range & TYPERANGE_0_TO_65535)
+#define IS_TYPERANGE_1_TO_65535(p) ((p)->type_range & TYPERANGE_1_TO_65535)
+#define IS_TYPERANGE_2_TO_3600(p) ((p)->type_range & TYPERANGE_2_TO_3600)
+#define IS_TYPERANGE_512_TO_16777215(p) ((p)->type_range & \
+ TYPERANGE_512_TO_16777215)
+#define IS_TYPERANGE_AUTH_PARAM(p) ((p)->type_range & TYPERANGE_AUTH)
+#define IS_TYPERANGE_DIGEST_PARAM(p) ((p)->type_range & TYPERANGE_DIGEST)
+#define IS_TYPERANGE_SESSIONTYPE(p) ((p)->type_range & \
+ TYPERANGE_SESSIONTYPE)
+
+/*
+ * struct iscsi_param->state
+ */
+#define PSTATE_ACCEPTOR 0x01
+#define PSTATE_NEGOTIATE 0x02
+#define PSTATE_PROPOSER 0x04
+#define PSTATE_IRRELEVANT 0x08
+#define PSTATE_REJECT 0x10
+#define PSTATE_REPLY_OPTIONAL 0x20
+#define PSTATE_RESPONSE_GOT 0x40
+#define PSTATE_RESPONSE_SENT 0x80
+
+#define IS_PSTATE_ACCEPTOR(p) ((p)->state & PSTATE_ACCEPTOR)
+#define IS_PSTATE_NEGOTIATE(p) ((p)->state & PSTATE_NEGOTIATE)
+#define IS_PSTATE_PROPOSER(p) ((p)->state & PSTATE_PROPOSER)
+#define IS_PSTATE_IRRELEVANT(p) ((p)->state & PSTATE_IRRELEVANT)
+#define IS_PSTATE_REJECT(p) ((p)->state & PSTATE_REJECT)
+#define IS_PSTATE_REPLY_OPTIONAL(p) ((p)->state & PSTATE_REPLY_OPTIONAL)
+#define IS_PSTATE_RESPONSE_GOT(p) ((p)->state & PSTATE_RESPONSE_GOT)
+#define IS_PSTATE_RESPONSE_SENT(p) ((p)->state & PSTATE_RESPONSE_SENT)
+
+#define SET_PSTATE_ACCEPTOR(p) ((p)->state |= PSTATE_ACCEPTOR)
+#define SET_PSTATE_NEGOTIATE(p) ((p)->state |= PSTATE_NEGOTIATE)
+#define SET_PSTATE_PROPOSER(p) ((p)->state |= PSTATE_PROPOSER)
+#define SET_PSTATE_IRRELEVANT(p) ((p)->state |= PSTATE_IRRELEVANT)
+#define SET_PSTATE_REJECT(p) ((p)->state |= PSTATE_REJECT)
+#define SET_PSTATE_REPLY_OPTIONAL(p) ((p)->state |= PSTATE_REPLY_OPTIONAL)
+#define SET_PSTATE_RESPONSE_GOT(p) ((p)->state |= PSTATE_RESPONSE_GOT)
+#define SET_PSTATE_RESPONSE_SENT(p) ((p)->state |= PSTATE_RESPONSE_SENT)
+
+#endif /* ISCSI_PARAMETERS_H */
diff --git a/drivers/target/iscsi/iscsi_target_seq_pdu_list.c b/drivers/target/iscsi/iscsi_target_seq_pdu_list.c
new file mode 100644
index 0000000..fc69408
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_seq_pdu_list.c
@@ -0,0 +1,664 @@
+/*******************************************************************************
+ * This file contains main functions related to iSCSI DataSequenceInOrder=No
+ * and DataPDUInOrder=No.
+ *
+ \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/slab.h>
+#include <linux/random.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target_seq_pdu_list.h"
+
+#define OFFLOAD_BUF_SIZE 32768
+
+void iscsit_dump_seq_list(struct iscsi_cmd *cmd)
+{
+ int i;
+ struct iscsi_seq *seq;
+
+ pr_debug("Dumping Sequence List for ITT: 0x%08x:\n",
+ cmd->init_task_tag);
+
+ for (i = 0; i < cmd->seq_count; i++) {
+ seq = &cmd->seq_list[i];
+ pr_debug("i: %d, pdu_start: %d, pdu_count: %d,"
+ " offset: %d, xfer_len: %d, seq_send_order: %d,"
+ " seq_no: %d\n", i, seq->pdu_start, seq->pdu_count,
+ seq->offset, seq->xfer_len, seq->seq_send_order,
+ seq->seq_no);
+ }
+}
+
+void iscsit_dump_pdu_list(struct iscsi_cmd *cmd)
+{
+ int i;
+ struct iscsi_pdu *pdu;
+
+ pr_debug("Dumping PDU List for ITT: 0x%08x:\n",
+ cmd->init_task_tag);
+
+ for (i = 0; i < cmd->pdu_count; i++) {
+ pdu = &cmd->pdu_list[i];
+ pr_debug("i: %d, offset: %d, length: %d,"
+ " pdu_send_order: %d, seq_no: %d\n", i, pdu->offset,
+ pdu->length, pdu->pdu_send_order, pdu->seq_no);
+ }
+}
+
+static void iscsit_ordered_seq_lists(
+ struct iscsi_cmd *cmd,
+ u8 type)
+{
+ u32 i, seq_count = 0;
+
+ for (i = 0; i < cmd->seq_count; i++) {
+ if (cmd->seq_list[i].type != SEQTYPE_NORMAL)
+ continue;
+ cmd->seq_list[i].seq_send_order = seq_count++;
+ }
+}
+
+static void iscsit_ordered_pdu_lists(
+ struct iscsi_cmd *cmd,
+ u8 type)
+{
+ u32 i, pdu_send_order = 0, seq_no = 0;
+
+ for (i = 0; i < cmd->pdu_count; i++) {
+redo:
+ if (cmd->pdu_list[i].seq_no == seq_no) {
+ cmd->pdu_list[i].pdu_send_order = pdu_send_order++;
+ continue;
+ }
+ seq_no++;
+ pdu_send_order = 0;
+ goto redo;
+ }
+}
+
+/*
+ * Generate count random values into array.
+ * Use 0x80000000 to mark generates valued in array[].
+ */
+static void iscsit_create_random_array(u32 *array, u32 count)
+{
+ int i, j, k;
+
+ if (count == 1) {
+ array[0] = 0;
+ return;
+ }
+
+ for (i = 0; i < count; i++) {
+redo:
+ get_random_bytes(&j, sizeof(u32));
+ j = (1 + (int) (9999 + 1) - j) % count;
+ for (k = 0; k < i + 1; k++) {
+ j |= 0x80000000;
+ if ((array[k] & 0x80000000) && (array[k] == j))
+ goto redo;
+ }
+ array[i] = j;
+ }
+
+ for (i = 0; i < count; i++)
+ array[i] &= ~0x80000000;
+}
+
+static int iscsit_randomize_pdu_lists(
+ struct iscsi_cmd *cmd,
+ u8 type)
+{
+ int i = 0;
+ u32 *array, pdu_count, seq_count = 0, seq_no = 0, seq_offset = 0;
+
+ for (pdu_count = 0; pdu_count < cmd->pdu_count; pdu_count++) {
+redo:
+ if (cmd->pdu_list[pdu_count].seq_no == seq_no) {
+ seq_count++;
+ continue;
+ }
+ array = kzalloc(seq_count * sizeof(u32), GFP_KERNEL);
+ if (!array) {
+ pr_err("Unable to allocate memory"
+ " for random array.\n");
+ return -1;
+ }
+ iscsit_create_random_array(array, seq_count);
+
+ for (i = 0; i < seq_count; i++)
+ cmd->pdu_list[seq_offset+i].pdu_send_order = array[i];
+
+ kfree(array);
+
+ seq_offset += seq_count;
+ seq_count = 0;
+ seq_no++;
+ goto redo;
+ }
+
+ if (seq_count) {
+ array = kzalloc(seq_count * sizeof(u32), GFP_KERNEL);
+ if (!array) {
+ pr_err("Unable to allocate memory for"
+ " random array.\n");
+ return -1;
+ }
+ iscsit_create_random_array(array, seq_count);
+
+ for (i = 0; i < seq_count; i++)
+ cmd->pdu_list[seq_offset+i].pdu_send_order = array[i];
+
+ kfree(array);
+ }
+
+ return 0;
+}
+
+static int iscsit_randomize_seq_lists(
+ struct iscsi_cmd *cmd,
+ u8 type)
+{
+ int i, j = 0;
+ u32 *array, seq_count = cmd->seq_count;
+
+ if ((type == PDULIST_IMMEDIATE) || (type == PDULIST_UNSOLICITED))
+ seq_count--;
+ else if (type == PDULIST_IMMEDIATE_AND_UNSOLICITED)
+ seq_count -= 2;
+
+ if (!seq_count)
+ return 0;
+
+ array = kzalloc(seq_count * sizeof(u32), GFP_KERNEL);
+ if (!array) {
+ pr_err("Unable to allocate memory for random array.\n");
+ return -1;
+ }
+ iscsit_create_random_array(array, seq_count);
+
+ for (i = 0; i < cmd->seq_count; i++) {
+ if (cmd->seq_list[i].type != SEQTYPE_NORMAL)
+ continue;
+ cmd->seq_list[i].seq_send_order = array[j++];
+ }
+
+ kfree(array);
+ return 0;
+}
+
+static void iscsit_determine_counts_for_list(
+ struct iscsi_cmd *cmd,
+ struct iscsi_build_list *bl,
+ u32 *seq_count,
+ u32 *pdu_count)
+{
+ int check_immediate = 0;
+ u32 burstlength = 0, offset = 0;
+ u32 unsolicited_data_length = 0;
+ struct iscsi_conn *conn = cmd->conn;
+
+ if ((bl->type == PDULIST_IMMEDIATE) ||
+ (bl->type == PDULIST_IMMEDIATE_AND_UNSOLICITED))
+ check_immediate = 1;
+
+ if ((bl->type == PDULIST_UNSOLICITED) ||
+ (bl->type == PDULIST_IMMEDIATE_AND_UNSOLICITED))
+ unsolicited_data_length = (cmd->data_length >
+ conn->sess->sess_ops->FirstBurstLength) ?
+ conn->sess->sess_ops->FirstBurstLength : cmd->data_length;
+
+ while (offset < cmd->data_length) {
+ *pdu_count += 1;
+
+ if (check_immediate) {
+ check_immediate = 0;
+ offset += bl->immediate_data_length;
+ *seq_count += 1;
+ if (unsolicited_data_length)
+ unsolicited_data_length -=
+ bl->immediate_data_length;
+ continue;
+ }
+ if (unsolicited_data_length > 0) {
+ if ((offset + conn->conn_ops->MaxRecvDataSegmentLength)
+ >= cmd->data_length) {
+ unsolicited_data_length -=
+ (cmd->data_length - offset);
+ offset += (cmd->data_length - offset);
+ continue;
+ }
+ if ((offset + conn->conn_ops->MaxRecvDataSegmentLength)
+ >= conn->sess->sess_ops->FirstBurstLength) {
+ unsolicited_data_length -=
+ (conn->sess->sess_ops->FirstBurstLength -
+ offset);
+ offset += (conn->sess->sess_ops->FirstBurstLength -
+ offset);
+ burstlength = 0;
+ *seq_count += 1;
+ continue;
+ }
+
+ offset += conn->conn_ops->MaxRecvDataSegmentLength;
+ unsolicited_data_length -=
+ conn->conn_ops->MaxRecvDataSegmentLength;
+ continue;
+ }
+ if ((offset + conn->conn_ops->MaxRecvDataSegmentLength) >=
+ cmd->data_length) {
+ offset += (cmd->data_length - offset);
+ continue;
+ }
+ if ((burstlength + conn->conn_ops->MaxRecvDataSegmentLength) >=
+ conn->sess->sess_ops->MaxBurstLength) {
+ offset += (conn->sess->sess_ops->MaxBurstLength -
+ burstlength);
+ burstlength = 0;
+ *seq_count += 1;
+ continue;
+ }
+
+ burstlength += conn->conn_ops->MaxRecvDataSegmentLength;
+ offset += conn->conn_ops->MaxRecvDataSegmentLength;
+ }
+}
+
+
+/*
+ * Builds PDU and/or Sequence list, called while DataSequenceInOrder=No
+ * and DataPDUInOrder=No.
+ */
+static int iscsit_build_pdu_and_seq_list(
+ struct iscsi_cmd *cmd,
+ struct iscsi_build_list *bl)
+{
+ int check_immediate = 0, datapduinorder, datasequenceinorder;
+ u32 burstlength = 0, offset = 0, i = 0;
+ u32 pdu_count = 0, seq_no = 0, unsolicited_data_length = 0;
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_pdu *pdu = cmd->pdu_list;
+ struct iscsi_seq *seq = cmd->seq_list;
+
+ datapduinorder = conn->sess->sess_ops->DataPDUInOrder;
+ datasequenceinorder = conn->sess->sess_ops->DataSequenceInOrder;
+
+ if ((bl->type == PDULIST_IMMEDIATE) ||
+ (bl->type == PDULIST_IMMEDIATE_AND_UNSOLICITED))
+ check_immediate = 1;
+
+ if ((bl->type == PDULIST_UNSOLICITED) ||
+ (bl->type == PDULIST_IMMEDIATE_AND_UNSOLICITED))
+ unsolicited_data_length = (cmd->data_length >
+ conn->sess->sess_ops->FirstBurstLength) ?
+ conn->sess->sess_ops->FirstBurstLength : cmd->data_length;
+
+ while (offset < cmd->data_length) {
+ pdu_count++;
+ if (!datapduinorder) {
+ pdu[i].offset = offset;
+ pdu[i].seq_no = seq_no;
+ }
+ if (!datasequenceinorder && (pdu_count == 1)) {
+ seq[seq_no].pdu_start = i;
+ seq[seq_no].seq_no = seq_no;
+ seq[seq_no].offset = offset;
+ seq[seq_no].orig_offset = offset;
+ }
+
+ if (check_immediate) {
+ check_immediate = 0;
+ if (!datapduinorder) {
+ pdu[i].type = PDUTYPE_IMMEDIATE;
+ pdu[i++].length = bl->immediate_data_length;
+ }
+ if (!datasequenceinorder) {
+ seq[seq_no].type = SEQTYPE_IMMEDIATE;
+ seq[seq_no].pdu_count = 1;
+ seq[seq_no].xfer_len =
+ bl->immediate_data_length;
+ }
+ offset += bl->immediate_data_length;
+ pdu_count = 0;
+ seq_no++;
+ if (unsolicited_data_length)
+ unsolicited_data_length -=
+ bl->immediate_data_length;
+ continue;
+ }
+ if (unsolicited_data_length > 0) {
+ if ((offset +
+ conn->conn_ops->MaxRecvDataSegmentLength) >=
+ cmd->data_length) {
+ if (!datapduinorder) {
+ pdu[i].type = PDUTYPE_UNSOLICITED;
+ pdu[i].length =
+ (cmd->data_length - offset);
+ }
+ if (!datasequenceinorder) {
+ seq[seq_no].type = SEQTYPE_UNSOLICITED;
+ seq[seq_no].pdu_count = pdu_count;
+ seq[seq_no].xfer_len = (burstlength +
+ (cmd->data_length - offset));
+ }
+ unsolicited_data_length -=
+ (cmd->data_length - offset);
+ offset += (cmd->data_length - offset);
+ continue;
+ }
+ if ((offset +
+ conn->conn_ops->MaxRecvDataSegmentLength) >=
+ conn->sess->sess_ops->FirstBurstLength) {
+ if (!datapduinorder) {
+ pdu[i].type = PDUTYPE_UNSOLICITED;
+ pdu[i++].length =
+ (conn->sess->sess_ops->FirstBurstLength -
+ offset);
+ }
+ if (!datasequenceinorder) {
+ seq[seq_no].type = SEQTYPE_UNSOLICITED;
+ seq[seq_no].pdu_count = pdu_count;
+ seq[seq_no].xfer_len = (burstlength +
+ (conn->sess->sess_ops->FirstBurstLength -
+ offset));
+ }
+ unsolicited_data_length -=
+ (conn->sess->sess_ops->FirstBurstLength -
+ offset);
+ offset += (conn->sess->sess_ops->FirstBurstLength -
+ offset);
+ burstlength = 0;
+ pdu_count = 0;
+ seq_no++;
+ continue;
+ }
+
+ if (!datapduinorder) {
+ pdu[i].type = PDUTYPE_UNSOLICITED;
+ pdu[i++].length =
+ conn->conn_ops->MaxRecvDataSegmentLength;
+ }
+ burstlength += conn->conn_ops->MaxRecvDataSegmentLength;
+ offset += conn->conn_ops->MaxRecvDataSegmentLength;
+ unsolicited_data_length -=
+ conn->conn_ops->MaxRecvDataSegmentLength;
+ continue;
+ }
+ if ((offset + conn->conn_ops->MaxRecvDataSegmentLength) >=
+ cmd->data_length) {
+ if (!datapduinorder) {
+ pdu[i].type = PDUTYPE_NORMAL;
+ pdu[i].length = (cmd->data_length - offset);
+ }
+ if (!datasequenceinorder) {
+ seq[seq_no].type = SEQTYPE_NORMAL;
+ seq[seq_no].pdu_count = pdu_count;
+ seq[seq_no].xfer_len = (burstlength +
+ (cmd->data_length - offset));
+ }
+ offset += (cmd->data_length - offset);
+ continue;
+ }
+ if ((burstlength + conn->conn_ops->MaxRecvDataSegmentLength) >=
+ conn->sess->sess_ops->MaxBurstLength) {
+ if (!datapduinorder) {
+ pdu[i].type = PDUTYPE_NORMAL;
+ pdu[i++].length =
+ (conn->sess->sess_ops->MaxBurstLength -
+ burstlength);
+ }
+ if (!datasequenceinorder) {
+ seq[seq_no].type = SEQTYPE_NORMAL;
+ seq[seq_no].pdu_count = pdu_count;
+ seq[seq_no].xfer_len = (burstlength +
+ (conn->sess->sess_ops->MaxBurstLength -
+ burstlength));
+ }
+ offset += (conn->sess->sess_ops->MaxBurstLength -
+ burstlength);
+ burstlength = 0;
+ pdu_count = 0;
+ seq_no++;
+ continue;
+ }
+
+ if (!datapduinorder) {
+ pdu[i].type = PDUTYPE_NORMAL;
+ pdu[i++].length =
+ conn->conn_ops->MaxRecvDataSegmentLength;
+ }
+ burstlength += conn->conn_ops->MaxRecvDataSegmentLength;
+ offset += conn->conn_ops->MaxRecvDataSegmentLength;
+ }
+
+ if (!datasequenceinorder) {
+ if (bl->data_direction & ISCSI_PDU_WRITE) {
+ if (bl->randomize & RANDOM_R2T_OFFSETS) {
+ if (iscsit_randomize_seq_lists(cmd, bl->type)
+ < 0)
+ return -1;
+ } else
+ iscsit_ordered_seq_lists(cmd, bl->type);
+ } else if (bl->data_direction & ISCSI_PDU_READ) {
+ if (bl->randomize & RANDOM_DATAIN_SEQ_OFFSETS) {
+ if (iscsit_randomize_seq_lists(cmd, bl->type)
+ < 0)
+ return -1;
+ } else
+ iscsit_ordered_seq_lists(cmd, bl->type);
+ }
+#if 0
+ iscsit_dump_seq_list(cmd);
+#endif
+ }
+ if (!datapduinorder) {
+ if (bl->data_direction & ISCSI_PDU_WRITE) {
+ if (bl->randomize & RANDOM_DATAOUT_PDU_OFFSETS) {
+ if (iscsit_randomize_pdu_lists(cmd, bl->type)
+ < 0)
+ return -1;
+ } else
+ iscsit_ordered_pdu_lists(cmd, bl->type);
+ } else if (bl->data_direction & ISCSI_PDU_READ) {
+ if (bl->randomize & RANDOM_DATAIN_PDU_OFFSETS) {
+ if (iscsit_randomize_pdu_lists(cmd, bl->type)
+ < 0)
+ return -1;
+ } else
+ iscsit_ordered_pdu_lists(cmd, bl->type);
+ }
+#if 0
+ iscsit_dump_pdu_list(cmd);
+#endif
+ }
+
+ return 0;
+}
+
+/*
+ * Only called while DataSequenceInOrder=No or DataPDUInOrder=No.
+ */
+int iscsit_do_build_list(
+ struct iscsi_cmd *cmd,
+ struct iscsi_build_list *bl)
+{
+ u32 pdu_count = 0, seq_count = 1;
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_pdu *pdu = NULL;
+ struct iscsi_seq *seq = NULL;
+
+ iscsit_determine_counts_for_list(cmd, bl, &seq_count, &pdu_count);
+
+ if (!conn->sess->sess_ops->DataSequenceInOrder) {
+ seq = kzalloc(seq_count * sizeof(struct iscsi_seq), GFP_ATOMIC);
+ if (!seq) {
+ pr_err("Unable to allocate struct iscsi_seq list\n");
+ return -1;
+ }
+ cmd->seq_list = seq;
+ cmd->seq_count = seq_count;
+ }
+
+ if (!conn->sess->sess_ops->DataPDUInOrder) {
+ pdu = kzalloc(pdu_count * sizeof(struct iscsi_pdu), GFP_ATOMIC);
+ if (!pdu) {
+ pr_err("Unable to allocate struct iscsi_pdu list.\n");
+ kfree(seq);
+ return -1;
+ }
+ cmd->pdu_list = pdu;
+ cmd->pdu_count = pdu_count;
+ }
+
+ return iscsit_build_pdu_and_seq_list(cmd, bl);
+}
+
+struct iscsi_pdu *iscsit_get_pdu_holder(
+ struct iscsi_cmd *cmd,
+ u32 offset,
+ u32 length)
+{
+ u32 i;
+ struct iscsi_pdu *pdu = NULL;
+
+ if (!cmd->pdu_list) {
+ pr_err("struct iscsi_cmd->pdu_list is NULL!\n");
+ return NULL;
+ }
+
+ pdu = &cmd->pdu_list[0];
+
+ for (i = 0; i < cmd->pdu_count; i++)
+ if ((pdu[i].offset == offset) && (pdu[i].length == length))
+ return &pdu[i];
+
+ pr_err("Unable to locate PDU holder for ITT: 0x%08x, Offset:"
+ " %u, Length: %u\n", cmd->init_task_tag, offset, length);
+ return NULL;
+}
+
+struct iscsi_pdu *iscsit_get_pdu_holder_for_seq(
+ struct iscsi_cmd *cmd,
+ struct iscsi_seq *seq)
+{
+ u32 i;
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_pdu *pdu = NULL;
+
+ if (!cmd->pdu_list) {
+ pr_err("struct iscsi_cmd->pdu_list is NULL!\n");
+ return NULL;
+ }
+
+ if (conn->sess->sess_ops->DataSequenceInOrder) {
+redo:
+ pdu = &cmd->pdu_list[cmd->pdu_start];
+
+ for (i = 0; pdu[i].seq_no != cmd->seq_no; i++) {
+#if 0
+ pr_debug("pdu[i].seq_no: %d, pdu[i].pdu"
+ "_send_order: %d, pdu[i].offset: %d,"
+ " pdu[i].length: %d\n", pdu[i].seq_no,
+ pdu[i].pdu_send_order, pdu[i].offset,
+ pdu[i].length);
+#endif
+ if (pdu[i].pdu_send_order == cmd->pdu_send_order) {
+ cmd->pdu_send_order++;
+ return &pdu[i];
+ }
+ }
+
+ cmd->pdu_start += cmd->pdu_send_order;
+ cmd->pdu_send_order = 0;
+ cmd->seq_no++;
+
+ if (cmd->pdu_start < cmd->pdu_count)
+ goto redo;
+
+ pr_err("Command ITT: 0x%08x unable to locate"
+ " struct iscsi_pdu for cmd->pdu_send_order: %u.\n",
+ cmd->init_task_tag, cmd->pdu_send_order);
+ return NULL;
+ } else {
+ if (!seq) {
+ pr_err("struct iscsi_seq is NULL!\n");
+ return NULL;
+ }
+#if 0
+ pr_debug("seq->pdu_start: %d, seq->pdu_count: %d,"
+ " seq->seq_no: %d\n", seq->pdu_start, seq->pdu_count,
+ seq->seq_no);
+#endif
+ pdu = &cmd->pdu_list[seq->pdu_start];
+
+ if (seq->pdu_send_order == seq->pdu_count) {
+ pr_err("Command ITT: 0x%08x seq->pdu_send"
+ "_order: %u equals seq->pdu_count: %u\n",
+ cmd->init_task_tag, seq->pdu_send_order,
+ seq->pdu_count);
+ return NULL;
+ }
+
+ for (i = 0; i < seq->pdu_count; i++) {
+ if (pdu[i].pdu_send_order == seq->pdu_send_order) {
+ seq->pdu_send_order++;
+ return &pdu[i];
+ }
+ }
+
+ pr_err("Command ITT: 0x%08x unable to locate iscsi"
+ "_pdu_t for seq->pdu_send_order: %u.\n",
+ cmd->init_task_tag, seq->pdu_send_order);
+ return NULL;
+ }
+
+ return NULL;
+}
+
+struct iscsi_seq *iscsit_get_seq_holder(
+ struct iscsi_cmd *cmd,
+ u32 offset,
+ u32 length)
+{
+ u32 i;
+
+ if (!cmd->seq_list) {
+ pr_err("struct iscsi_cmd->seq_list is NULL!\n");
+ return NULL;
+ }
+
+ for (i = 0; i < cmd->seq_count; i++) {
+#if 0
+ pr_debug("seq_list[i].orig_offset: %d, seq_list[i]."
+ "xfer_len: %d, seq_list[i].seq_no %u\n",
+ cmd->seq_list[i].orig_offset, cmd->seq_list[i].xfer_len,
+ cmd->seq_list[i].seq_no);
+#endif
+ if ((cmd->seq_list[i].orig_offset +
+ cmd->seq_list[i].xfer_len) >=
+ (offset + length))
+ return &cmd->seq_list[i];
+ }
+
+ pr_err("Unable to locate Sequence holder for ITT: 0x%08x,"
+ " Offset: %u, Length: %u\n", cmd->init_task_tag, offset,
+ length);
+ return NULL;
+}
diff --git a/drivers/target/iscsi/iscsi_target_seq_pdu_list.h b/drivers/target/iscsi/iscsi_target_seq_pdu_list.h
new file mode 100644
index 0000000..0d52a10
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_seq_pdu_list.h
@@ -0,0 +1,86 @@
+#ifndef ISCSI_SEQ_AND_PDU_LIST_H
+#define ISCSI_SEQ_AND_PDU_LIST_H
+
+/* struct iscsi_pdu->status */
+#define DATAOUT_PDU_SENT 1
+
+/* struct iscsi_seq->type */
+#define SEQTYPE_IMMEDIATE 1
+#define SEQTYPE_UNSOLICITED 2
+#define SEQTYPE_NORMAL 3
+
+/* struct iscsi_seq->status */
+#define DATAOUT_SEQUENCE_GOT_R2T 1
+#define DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY 2
+#define DATAOUT_SEQUENCE_COMPLETE 3
+
+/* iscsi_determine_counts_for_list() type */
+#define PDULIST_NORMAL 1
+#define PDULIST_IMMEDIATE 2
+#define PDULIST_UNSOLICITED 3
+#define PDULIST_IMMEDIATE_AND_UNSOLICITED 4
+
+/* struct iscsi_pdu->type */
+#define PDUTYPE_IMMEDIATE 1
+#define PDUTYPE_UNSOLICITED 2
+#define PDUTYPE_NORMAL 3
+
+/* struct iscsi_pdu->status */
+#define ISCSI_PDU_NOT_RECEIVED 0
+#define ISCSI_PDU_RECEIVED_OK 1
+#define ISCSI_PDU_CRC_FAILED 2
+#define ISCSI_PDU_TIMED_OUT 3
+
+/* struct iscsi_build_list->randomize */
+#define RANDOM_DATAIN_PDU_OFFSETS 0x01
+#define RANDOM_DATAIN_SEQ_OFFSETS 0x02
+#define RANDOM_DATAOUT_PDU_OFFSETS 0x04
+#define RANDOM_R2T_OFFSETS 0x08
+
+/* struct iscsi_build_list->data_direction */
+#define ISCSI_PDU_READ 0x01
+#define ISCSI_PDU_WRITE 0x02
+
+struct iscsi_build_list {
+ int data_direction;
+ int randomize;
+ int type;
+ int immediate_data_length;
+};
+
+struct iscsi_pdu {
+ int status;
+ int type;
+ u8 flags;
+ u32 data_sn;
+ u32 length;
+ u32 offset;
+ u32 pdu_send_order;
+ u32 seq_no;
+} ____cacheline_aligned;
+
+struct iscsi_seq {
+ int sent;
+ int status;
+ int type;
+ u32 data_sn;
+ u32 first_datasn;
+ u32 last_datasn;
+ u32 next_burst_len;
+ u32 pdu_start;
+ u32 pdu_count;
+ u32 offset;
+ u32 orig_offset;
+ u32 pdu_send_order;
+ u32 r2t_sn;
+ u32 seq_send_order;
+ u32 seq_no;
+ u32 xfer_len;
+} ____cacheline_aligned;
+
+extern int iscsit_do_build_list(struct iscsi_cmd *, struct iscsi_build_list *);
+extern struct iscsi_pdu *iscsit_get_pdu_holder(struct iscsi_cmd *, u32, u32);
+extern struct iscsi_pdu *iscsit_get_pdu_holder_for_seq(struct iscsi_cmd *, struct iscsi_seq *);
+extern struct iscsi_seq *iscsit_get_seq_holder(struct iscsi_cmd *, u32, u32);
+
+#endif /* ISCSI_SEQ_AND_PDU_LIST_H */
diff --git a/drivers/target/iscsi/iscsi_target_stat.c b/drivers/target/iscsi/iscsi_target_stat.c
new file mode 100644
index 0000000..bbdbe93
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_stat.c
@@ -0,0 +1,950 @@
+/*******************************************************************************
+ * Modern ConfigFS group context specific iSCSI statistics based on original
+ * iscsi_target_mib.c code
+ *
+ * Copyright (c) 2011 Rising Tide Systems
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/configfs.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+#include <target/configfs_macros.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_parameters.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target_stat.h"
+
+#ifndef INITIAL_JIFFIES
+#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
+#endif
+
+/* Instance Attributes Table */
+#define ISCSI_INST_NUM_NODES 1
+#define ISCSI_INST_DESCR "Storage Engine Target"
+#define ISCSI_INST_LAST_FAILURE_TYPE 0
+#define ISCSI_DISCONTINUITY_TIME 0
+
+#define ISCSI_NODE_INDEX 1
+
+#define ISPRINT(a) ((a >= ' ') && (a <= '~'))
+
+/****************************************************************************
+ * iSCSI MIB Tables
+ ****************************************************************************/
+/*
+ * Instance Attributes Table
+ */
+CONFIGFS_EATTR_STRUCT(iscsi_stat_instance, iscsi_wwn_stat_grps);
+#define ISCSI_STAT_INSTANCE_ATTR(_name, _mode) \
+static struct iscsi_stat_instance_attribute \
+ iscsi_stat_instance_##_name = \
+ __CONFIGFS_EATTR(_name, _mode, \
+ iscsi_stat_instance_show_attr_##_name, \
+ iscsi_stat_instance_store_attr_##_name);
+
+#define ISCSI_STAT_INSTANCE_ATTR_RO(_name) \
+static struct iscsi_stat_instance_attribute \
+ iscsi_stat_instance_##_name = \
+ __CONFIGFS_EATTR_RO(_name, \
+ iscsi_stat_instance_show_attr_##_name);
+
+static ssize_t iscsi_stat_instance_show_attr_inst(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+
+ return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(inst);
+
+static ssize_t iscsi_stat_instance_show_attr_min_ver(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_DRAFT20_VERSION);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(min_ver);
+
+static ssize_t iscsi_stat_instance_show_attr_max_ver(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_DRAFT20_VERSION);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(max_ver);
+
+static ssize_t iscsi_stat_instance_show_attr_portals(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+
+ return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_num_tpg_nps);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(portals);
+
+static ssize_t iscsi_stat_instance_show_attr_nodes(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_INST_NUM_NODES);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(nodes);
+
+static ssize_t iscsi_stat_instance_show_attr_sessions(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+
+ return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_nsessions);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(sessions);
+
+static ssize_t iscsi_stat_instance_show_attr_fail_sess(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+ struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats;
+ u32 sess_err_count;
+
+ spin_lock_bh(&sess_err->lock);
+ sess_err_count = (sess_err->digest_errors +
+ sess_err->cxn_timeout_errors +
+ sess_err->pdu_format_errors);
+ spin_unlock_bh(&sess_err->lock);
+
+ return snprintf(page, PAGE_SIZE, "%u\n", sess_err_count);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(fail_sess);
+
+static ssize_t iscsi_stat_instance_show_attr_fail_type(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+ struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats;
+
+ return snprintf(page, PAGE_SIZE, "%u\n",
+ sess_err->last_sess_failure_type);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(fail_type);
+
+static ssize_t iscsi_stat_instance_show_attr_fail_rem_name(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+ struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats;
+
+ return snprintf(page, PAGE_SIZE, "%s\n",
+ sess_err->last_sess_fail_rem_name[0] ?
+ sess_err->last_sess_fail_rem_name : NONE);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(fail_rem_name);
+
+static ssize_t iscsi_stat_instance_show_attr_disc_time(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_DISCONTINUITY_TIME);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(disc_time);
+
+static ssize_t iscsi_stat_instance_show_attr_description(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%s\n", ISCSI_INST_DESCR);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(description);
+
+static ssize_t iscsi_stat_instance_show_attr_vendor(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "RisingTide Systems iSCSI-Target\n");
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(vendor);
+
+static ssize_t iscsi_stat_instance_show_attr_version(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%s\n", ISCSIT_VERSION);
+}
+ISCSI_STAT_INSTANCE_ATTR_RO(version);
+
+CONFIGFS_EATTR_OPS(iscsi_stat_instance, iscsi_wwn_stat_grps,
+ iscsi_instance_group);
+
+static struct configfs_attribute *iscsi_stat_instance_attrs[] = {
+ &iscsi_stat_instance_inst.attr,
+ &iscsi_stat_instance_min_ver.attr,
+ &iscsi_stat_instance_max_ver.attr,
+ &iscsi_stat_instance_portals.attr,
+ &iscsi_stat_instance_nodes.attr,
+ &iscsi_stat_instance_sessions.attr,
+ &iscsi_stat_instance_fail_sess.attr,
+ &iscsi_stat_instance_fail_type.attr,
+ &iscsi_stat_instance_fail_rem_name.attr,
+ &iscsi_stat_instance_disc_time.attr,
+ &iscsi_stat_instance_description.attr,
+ &iscsi_stat_instance_vendor.attr,
+ &iscsi_stat_instance_version.attr,
+ NULL,
+};
+
+static struct configfs_item_operations iscsi_stat_instance_item_ops = {
+ .show_attribute = iscsi_stat_instance_attr_show,
+ .store_attribute = iscsi_stat_instance_attr_store,
+};
+
+struct config_item_type iscsi_stat_instance_cit = {
+ .ct_item_ops = &iscsi_stat_instance_item_ops,
+ .ct_attrs = iscsi_stat_instance_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+/*
+ * Instance Session Failure Stats Table
+ */
+CONFIGFS_EATTR_STRUCT(iscsi_stat_sess_err, iscsi_wwn_stat_grps);
+#define ISCSI_STAT_SESS_ERR_ATTR(_name, _mode) \
+static struct iscsi_stat_sess_err_attribute \
+ iscsi_stat_sess_err_##_name = \
+ __CONFIGFS_EATTR(_name, _mode, \
+ iscsi_stat_sess_err_show_attr_##_name, \
+ iscsi_stat_sess_err_store_attr_##_name);
+
+#define ISCSI_STAT_SESS_ERR_ATTR_RO(_name) \
+static struct iscsi_stat_sess_err_attribute \
+ iscsi_stat_sess_err_##_name = \
+ __CONFIGFS_EATTR_RO(_name, \
+ iscsi_stat_sess_err_show_attr_##_name);
+
+static ssize_t iscsi_stat_sess_err_show_attr_inst(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+
+ return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index);
+}
+ISCSI_STAT_SESS_ERR_ATTR_RO(inst);
+
+static ssize_t iscsi_stat_sess_err_show_attr_digest_errors(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+ struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats;
+
+ return snprintf(page, PAGE_SIZE, "%u\n", sess_err->digest_errors);
+}
+ISCSI_STAT_SESS_ERR_ATTR_RO(digest_errors);
+
+static ssize_t iscsi_stat_sess_err_show_attr_cxn_errors(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+ struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats;
+
+ return snprintf(page, PAGE_SIZE, "%u\n", sess_err->cxn_timeout_errors);
+}
+ISCSI_STAT_SESS_ERR_ATTR_RO(cxn_errors);
+
+static ssize_t iscsi_stat_sess_err_show_attr_format_errors(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+ struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats;
+
+ return snprintf(page, PAGE_SIZE, "%u\n", sess_err->pdu_format_errors);
+}
+ISCSI_STAT_SESS_ERR_ATTR_RO(format_errors);
+
+CONFIGFS_EATTR_OPS(iscsi_stat_sess_err, iscsi_wwn_stat_grps,
+ iscsi_sess_err_group);
+
+static struct configfs_attribute *iscsi_stat_sess_err_attrs[] = {
+ &iscsi_stat_sess_err_inst.attr,
+ &iscsi_stat_sess_err_digest_errors.attr,
+ &iscsi_stat_sess_err_cxn_errors.attr,
+ &iscsi_stat_sess_err_format_errors.attr,
+ NULL,
+};
+
+static struct configfs_item_operations iscsi_stat_sess_err_item_ops = {
+ .show_attribute = iscsi_stat_sess_err_attr_show,
+ .store_attribute = iscsi_stat_sess_err_attr_store,
+};
+
+struct config_item_type iscsi_stat_sess_err_cit = {
+ .ct_item_ops = &iscsi_stat_sess_err_item_ops,
+ .ct_attrs = iscsi_stat_sess_err_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+/*
+ * Target Attributes Table
+ */
+CONFIGFS_EATTR_STRUCT(iscsi_stat_tgt_attr, iscsi_wwn_stat_grps);
+#define ISCSI_STAT_TGT_ATTR(_name, _mode) \
+static struct iscsi_stat_tgt_attr_attribute \
+ iscsi_stat_tgt_attr_##_name = \
+ __CONFIGFS_EATTR(_name, _mode, \
+ iscsi_stat_tgt-attr_show_attr_##_name, \
+ iscsi_stat_tgt_attr_store_attr_##_name);
+
+#define ISCSI_STAT_TGT_ATTR_RO(_name) \
+static struct iscsi_stat_tgt_attr_attribute \
+ iscsi_stat_tgt_attr_##_name = \
+ __CONFIGFS_EATTR_RO(_name, \
+ iscsi_stat_tgt_attr_show_attr_##_name);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_inst(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+
+ return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index);
+}
+ISCSI_STAT_TGT_ATTR_RO(inst);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_indx(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_NODE_INDEX);
+}
+ISCSI_STAT_TGT_ATTR_RO(indx);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_login_fails(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+ struct iscsi_login_stats *lstat = &tiqn->login_stats;
+ u32 fail_count;
+
+ spin_lock(&lstat->lock);
+ fail_count = (lstat->redirects + lstat->authorize_fails +
+ lstat->authenticate_fails + lstat->negotiate_fails +
+ lstat->other_fails);
+ spin_unlock(&lstat->lock);
+
+ return snprintf(page, PAGE_SIZE, "%u\n", fail_count);
+}
+ISCSI_STAT_TGT_ATTR_RO(login_fails);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_last_fail_time(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+ struct iscsi_login_stats *lstat = &tiqn->login_stats;
+ u32 last_fail_time;
+
+ spin_lock(&lstat->lock);
+ last_fail_time = lstat->last_fail_time ?
+ (u32)(((u32)lstat->last_fail_time -
+ INITIAL_JIFFIES) * 100 / HZ) : 0;
+ spin_unlock(&lstat->lock);
+
+ return snprintf(page, PAGE_SIZE, "%u\n", last_fail_time);
+}
+ISCSI_STAT_TGT_ATTR_RO(last_fail_time);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_last_fail_type(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+ struct iscsi_login_stats *lstat = &tiqn->login_stats;
+ u32 last_fail_type;
+
+ spin_lock(&lstat->lock);
+ last_fail_type = lstat->last_fail_type;
+ spin_unlock(&lstat->lock);
+
+ return snprintf(page, PAGE_SIZE, "%u\n", last_fail_type);
+}
+ISCSI_STAT_TGT_ATTR_RO(last_fail_type);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_fail_intr_name(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+ struct iscsi_login_stats *lstat = &tiqn->login_stats;
+ unsigned char buf[224];
+
+ spin_lock(&lstat->lock);
+ snprintf(buf, 224, "%s", lstat->last_intr_fail_name[0] ?
+ lstat->last_intr_fail_name : NONE);
+ spin_unlock(&lstat->lock);
+
+ return snprintf(page, PAGE_SIZE, "%s\n", buf);
+}
+ISCSI_STAT_TGT_ATTR_RO(fail_intr_name);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_fail_intr_addr_type(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+ struct iscsi_login_stats *lstat = &tiqn->login_stats;
+ unsigned char buf[8];
+
+ spin_lock(&lstat->lock);
+ snprintf(buf, 8, "%s", (lstat->last_intr_fail_ip_addr != NULL) ?
+ "ipv6" : "ipv4");
+ spin_unlock(&lstat->lock);
+
+ return snprintf(page, PAGE_SIZE, "%s\n", buf);
+}
+ISCSI_STAT_TGT_ATTR_RO(fail_intr_addr_type);
+
+static ssize_t iscsi_stat_tgt_attr_show_attr_fail_intr_addr(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+ struct iscsi_login_stats *lstat = &tiqn->login_stats;
+ unsigned char buf[32];
+
+ spin_lock(&lstat->lock);
+ if (lstat->last_intr_fail_ip_family == AF_INET6)
+ snprintf(buf, 32, "[%s]", lstat->last_intr_fail_ip_addr);
+ else
+ snprintf(buf, 32, "%s", lstat->last_intr_fail_ip_addr);
+ spin_unlock(&lstat->lock);
+
+ return snprintf(page, PAGE_SIZE, "%s\n", buf);
+}
+ISCSI_STAT_TGT_ATTR_RO(fail_intr_addr);
+
+CONFIGFS_EATTR_OPS(iscsi_stat_tgt_attr, iscsi_wwn_stat_grps,
+ iscsi_tgt_attr_group);
+
+static struct configfs_attribute *iscsi_stat_tgt_attr_attrs[] = {
+ &iscsi_stat_tgt_attr_inst.attr,
+ &iscsi_stat_tgt_attr_indx.attr,
+ &iscsi_stat_tgt_attr_login_fails.attr,
+ &iscsi_stat_tgt_attr_last_fail_time.attr,
+ &iscsi_stat_tgt_attr_last_fail_type.attr,
+ &iscsi_stat_tgt_attr_fail_intr_name.attr,
+ &iscsi_stat_tgt_attr_fail_intr_addr_type.attr,
+ &iscsi_stat_tgt_attr_fail_intr_addr.attr,
+ NULL,
+};
+
+static struct configfs_item_operations iscsi_stat_tgt_attr_item_ops = {
+ .show_attribute = iscsi_stat_tgt_attr_attr_show,
+ .store_attribute = iscsi_stat_tgt_attr_attr_store,
+};
+
+struct config_item_type iscsi_stat_tgt_attr_cit = {
+ .ct_item_ops = &iscsi_stat_tgt_attr_item_ops,
+ .ct_attrs = iscsi_stat_tgt_attr_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+/*
+ * Target Login Stats Table
+ */
+CONFIGFS_EATTR_STRUCT(iscsi_stat_login, iscsi_wwn_stat_grps);
+#define ISCSI_STAT_LOGIN(_name, _mode) \
+static struct iscsi_stat_login_attribute \
+ iscsi_stat_login_##_name = \
+ __CONFIGFS_EATTR(_name, _mode, \
+ iscsi_stat_login_show_attr_##_name, \
+ iscsi_stat_login_store_attr_##_name);
+
+#define ISCSI_STAT_LOGIN_RO(_name) \
+static struct iscsi_stat_login_attribute \
+ iscsi_stat_login_##_name = \
+ __CONFIGFS_EATTR_RO(_name, \
+ iscsi_stat_login_show_attr_##_name);
+
+static ssize_t iscsi_stat_login_show_attr_inst(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+
+ return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index);
+}
+ISCSI_STAT_LOGIN_RO(inst);
+
+static ssize_t iscsi_stat_login_show_attr_indx(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_NODE_INDEX);
+}
+ISCSI_STAT_LOGIN_RO(indx);
+
+static ssize_t iscsi_stat_login_show_attr_accepts(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+ struct iscsi_login_stats *lstat = &tiqn->login_stats;
+ ssize_t ret;
+
+ spin_lock(&lstat->lock);
+ ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->accepts);
+ spin_unlock(&lstat->lock);
+
+ return ret;
+}
+ISCSI_STAT_LOGIN_RO(accepts);
+
+static ssize_t iscsi_stat_login_show_attr_other_fails(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+ struct iscsi_login_stats *lstat = &tiqn->login_stats;
+ ssize_t ret;
+
+ spin_lock(&lstat->lock);
+ ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->other_fails);
+ spin_unlock(&lstat->lock);
+
+ return ret;
+}
+ISCSI_STAT_LOGIN_RO(other_fails);
+
+static ssize_t iscsi_stat_login_show_attr_redirects(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+ struct iscsi_login_stats *lstat = &tiqn->login_stats;
+ ssize_t ret;
+
+ spin_lock(&lstat->lock);
+ ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->redirects);
+ spin_unlock(&lstat->lock);
+
+ return ret;
+}
+ISCSI_STAT_LOGIN_RO(redirects);
+
+static ssize_t iscsi_stat_login_show_attr_authorize_fails(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+ struct iscsi_login_stats *lstat = &tiqn->login_stats;
+ ssize_t ret;
+
+ spin_lock(&lstat->lock);
+ ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->authorize_fails);
+ spin_unlock(&lstat->lock);
+
+ return ret;
+}
+ISCSI_STAT_LOGIN_RO(authorize_fails);
+
+static ssize_t iscsi_stat_login_show_attr_authenticate_fails(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+ struct iscsi_login_stats *lstat = &tiqn->login_stats;
+ ssize_t ret;
+
+ spin_lock(&lstat->lock);
+ ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->authenticate_fails);
+ spin_unlock(&lstat->lock);
+
+ return ret;
+}
+ISCSI_STAT_LOGIN_RO(authenticate_fails);
+
+static ssize_t iscsi_stat_login_show_attr_negotiate_fails(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+ struct iscsi_login_stats *lstat = &tiqn->login_stats;
+ ssize_t ret;
+
+ spin_lock(&lstat->lock);
+ ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->negotiate_fails);
+ spin_unlock(&lstat->lock);
+
+ return ret;
+}
+ISCSI_STAT_LOGIN_RO(negotiate_fails);
+
+CONFIGFS_EATTR_OPS(iscsi_stat_login, iscsi_wwn_stat_grps,
+ iscsi_login_stats_group);
+
+static struct configfs_attribute *iscsi_stat_login_stats_attrs[] = {
+ &iscsi_stat_login_inst.attr,
+ &iscsi_stat_login_indx.attr,
+ &iscsi_stat_login_accepts.attr,
+ &iscsi_stat_login_other_fails.attr,
+ &iscsi_stat_login_redirects.attr,
+ &iscsi_stat_login_authorize_fails.attr,
+ &iscsi_stat_login_authenticate_fails.attr,
+ &iscsi_stat_login_negotiate_fails.attr,
+ NULL,
+};
+
+static struct configfs_item_operations iscsi_stat_login_stats_item_ops = {
+ .show_attribute = iscsi_stat_login_attr_show,
+ .store_attribute = iscsi_stat_login_attr_store,
+};
+
+struct config_item_type iscsi_stat_login_cit = {
+ .ct_item_ops = &iscsi_stat_login_stats_item_ops,
+ .ct_attrs = iscsi_stat_login_stats_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+/*
+ * Target Logout Stats Table
+ */
+
+CONFIGFS_EATTR_STRUCT(iscsi_stat_logout, iscsi_wwn_stat_grps);
+#define ISCSI_STAT_LOGOUT(_name, _mode) \
+static struct iscsi_stat_logout_attribute \
+ iscsi_stat_logout_##_name = \
+ __CONFIGFS_EATTR(_name, _mode, \
+ iscsi_stat_logout_show_attr_##_name, \
+ iscsi_stat_logout_store_attr_##_name);
+
+#define ISCSI_STAT_LOGOUT_RO(_name) \
+static struct iscsi_stat_logout_attribute \
+ iscsi_stat_logout_##_name = \
+ __CONFIGFS_EATTR_RO(_name, \
+ iscsi_stat_logout_show_attr_##_name);
+
+static ssize_t iscsi_stat_logout_show_attr_inst(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+
+ return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index);
+}
+ISCSI_STAT_LOGOUT_RO(inst);
+
+static ssize_t iscsi_stat_logout_show_attr_indx(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_NODE_INDEX);
+}
+ISCSI_STAT_LOGOUT_RO(indx);
+
+static ssize_t iscsi_stat_logout_show_attr_normal_logouts(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+ struct iscsi_logout_stats *lstats = &tiqn->logout_stats;
+
+ return snprintf(page, PAGE_SIZE, "%u\n", lstats->normal_logouts);
+}
+ISCSI_STAT_LOGOUT_RO(normal_logouts);
+
+static ssize_t iscsi_stat_logout_show_attr_abnormal_logouts(
+ struct iscsi_wwn_stat_grps *igrps, char *page)
+{
+ struct iscsi_tiqn *tiqn = container_of(igrps,
+ struct iscsi_tiqn, tiqn_stat_grps);
+ struct iscsi_logout_stats *lstats = &tiqn->logout_stats;
+
+ return snprintf(page, PAGE_SIZE, "%u\n", lstats->abnormal_logouts);
+}
+ISCSI_STAT_LOGOUT_RO(abnormal_logouts);
+
+CONFIGFS_EATTR_OPS(iscsi_stat_logout, iscsi_wwn_stat_grps,
+ iscsi_logout_stats_group);
+
+static struct configfs_attribute *iscsi_stat_logout_stats_attrs[] = {
+ &iscsi_stat_logout_inst.attr,
+ &iscsi_stat_logout_indx.attr,
+ &iscsi_stat_logout_normal_logouts.attr,
+ &iscsi_stat_logout_abnormal_logouts.attr,
+ NULL,
+};
+
+static struct configfs_item_operations iscsi_stat_logout_stats_item_ops = {
+ .show_attribute = iscsi_stat_logout_attr_show,
+ .store_attribute = iscsi_stat_logout_attr_store,
+};
+
+struct config_item_type iscsi_stat_logout_cit = {
+ .ct_item_ops = &iscsi_stat_logout_stats_item_ops,
+ .ct_attrs = iscsi_stat_logout_stats_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+/*
+ * Session Stats Table
+ */
+
+CONFIGFS_EATTR_STRUCT(iscsi_stat_sess, iscsi_node_stat_grps);
+#define ISCSI_STAT_SESS(_name, _mode) \
+static struct iscsi_stat_sess_attribute \
+ iscsi_stat_sess_##_name = \
+ __CONFIGFS_EATTR(_name, _mode, \
+ iscsi_stat_sess_show_attr_##_name, \
+ iscsi_stat_sess_store_attr_##_name);
+
+#define ISCSI_STAT_SESS_RO(_name) \
+static struct iscsi_stat_sess_attribute \
+ iscsi_stat_sess_##_name = \
+ __CONFIGFS_EATTR_RO(_name, \
+ iscsi_stat_sess_show_attr_##_name);
+
+static ssize_t iscsi_stat_sess_show_attr_inst(
+ struct iscsi_node_stat_grps *igrps, char *page)
+{
+ struct iscsi_node_acl *acl = container_of(igrps,
+ struct iscsi_node_acl, node_stat_grps);
+ struct se_wwn *wwn = acl->se_node_acl.se_tpg->se_tpg_wwn;
+ struct iscsi_tiqn *tiqn = container_of(wwn,
+ struct iscsi_tiqn, tiqn_wwn);
+
+ return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index);
+}
+ISCSI_STAT_SESS_RO(inst);
+
+static ssize_t iscsi_stat_sess_show_attr_node(
+ struct iscsi_node_stat_grps *igrps, char *page)
+{
+ struct iscsi_node_acl *acl = container_of(igrps,
+ struct iscsi_node_acl, node_stat_grps);
+ struct se_node_acl *se_nacl = &acl->se_node_acl;
+ struct iscsi_session *sess;
+ struct se_session *se_sess;
+ ssize_t ret = 0;
+
+ spin_lock_bh(&se_nacl->nacl_sess_lock);
+ se_sess = se_nacl->nacl_sess;
+ if (se_sess) {
+ sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+ if (sess)
+ ret = snprintf(page, PAGE_SIZE, "%u\n",
+ sess->sess_ops->SessionType ? 0 : ISCSI_NODE_INDEX);
+ }
+ spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+ return ret;
+}
+ISCSI_STAT_SESS_RO(node);
+
+static ssize_t iscsi_stat_sess_show_attr_indx(
+ struct iscsi_node_stat_grps *igrps, char *page)
+{
+ struct iscsi_node_acl *acl = container_of(igrps,
+ struct iscsi_node_acl, node_stat_grps);
+ struct se_node_acl *se_nacl = &acl->se_node_acl;
+ struct iscsi_session *sess;
+ struct se_session *se_sess;
+ ssize_t ret = 0;
+
+ spin_lock_bh(&se_nacl->nacl_sess_lock);
+ se_sess = se_nacl->nacl_sess;
+ if (se_sess) {
+ sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+ if (sess)
+ ret = snprintf(page, PAGE_SIZE, "%u\n",
+ sess->session_index);
+ }
+ spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+ return ret;
+}
+ISCSI_STAT_SESS_RO(indx);
+
+static ssize_t iscsi_stat_sess_show_attr_cmd_pdus(
+ struct iscsi_node_stat_grps *igrps, char *page)
+{
+ struct iscsi_node_acl *acl = container_of(igrps,
+ struct iscsi_node_acl, node_stat_grps);
+ struct se_node_acl *se_nacl = &acl->se_node_acl;
+ struct iscsi_session *sess;
+ struct se_session *se_sess;
+ ssize_t ret = 0;
+
+ spin_lock_bh(&se_nacl->nacl_sess_lock);
+ se_sess = se_nacl->nacl_sess;
+ if (se_sess) {
+ sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+ if (sess)
+ ret = snprintf(page, PAGE_SIZE, "%u\n", sess->cmd_pdus);
+ }
+ spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+ return ret;
+}
+ISCSI_STAT_SESS_RO(cmd_pdus);
+
+static ssize_t iscsi_stat_sess_show_attr_rsp_pdus(
+ struct iscsi_node_stat_grps *igrps, char *page)
+{
+ struct iscsi_node_acl *acl = container_of(igrps,
+ struct iscsi_node_acl, node_stat_grps);
+ struct se_node_acl *se_nacl = &acl->se_node_acl;
+ struct iscsi_session *sess;
+ struct se_session *se_sess;
+ ssize_t ret = 0;
+
+ spin_lock_bh(&se_nacl->nacl_sess_lock);
+ se_sess = se_nacl->nacl_sess;
+ if (se_sess) {
+ sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+ if (sess)
+ ret = snprintf(page, PAGE_SIZE, "%u\n", sess->rsp_pdus);
+ }
+ spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+ return ret;
+}
+ISCSI_STAT_SESS_RO(rsp_pdus);
+
+static ssize_t iscsi_stat_sess_show_attr_txdata_octs(
+ struct iscsi_node_stat_grps *igrps, char *page)
+{
+ struct iscsi_node_acl *acl = container_of(igrps,
+ struct iscsi_node_acl, node_stat_grps);
+ struct se_node_acl *se_nacl = &acl->se_node_acl;
+ struct iscsi_session *sess;
+ struct se_session *se_sess;
+ ssize_t ret = 0;
+
+ spin_lock_bh(&se_nacl->nacl_sess_lock);
+ se_sess = se_nacl->nacl_sess;
+ if (se_sess) {
+ sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+ if (sess)
+ ret = snprintf(page, PAGE_SIZE, "%llu\n",
+ (unsigned long long)sess->tx_data_octets);
+ }
+ spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+ return ret;
+}
+ISCSI_STAT_SESS_RO(txdata_octs);
+
+static ssize_t iscsi_stat_sess_show_attr_rxdata_octs(
+ struct iscsi_node_stat_grps *igrps, char *page)
+{
+ struct iscsi_node_acl *acl = container_of(igrps,
+ struct iscsi_node_acl, node_stat_grps);
+ struct se_node_acl *se_nacl = &acl->se_node_acl;
+ struct iscsi_session *sess;
+ struct se_session *se_sess;
+ ssize_t ret = 0;
+
+ spin_lock_bh(&se_nacl->nacl_sess_lock);
+ se_sess = se_nacl->nacl_sess;
+ if (se_sess) {
+ sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+ if (sess)
+ ret = snprintf(page, PAGE_SIZE, "%llu\n",
+ (unsigned long long)sess->rx_data_octets);
+ }
+ spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+ return ret;
+}
+ISCSI_STAT_SESS_RO(rxdata_octs);
+
+static ssize_t iscsi_stat_sess_show_attr_conn_digest_errors(
+ struct iscsi_node_stat_grps *igrps, char *page)
+{
+ struct iscsi_node_acl *acl = container_of(igrps,
+ struct iscsi_node_acl, node_stat_grps);
+ struct se_node_acl *se_nacl = &acl->se_node_acl;
+ struct iscsi_session *sess;
+ struct se_session *se_sess;
+ ssize_t ret = 0;
+
+ spin_lock_bh(&se_nacl->nacl_sess_lock);
+ se_sess = se_nacl->nacl_sess;
+ if (se_sess) {
+ sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+ if (sess)
+ ret = snprintf(page, PAGE_SIZE, "%u\n",
+ sess->conn_digest_errors);
+ }
+ spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+ return ret;
+}
+ISCSI_STAT_SESS_RO(conn_digest_errors);
+
+static ssize_t iscsi_stat_sess_show_attr_conn_timeout_errors(
+ struct iscsi_node_stat_grps *igrps, char *page)
+{
+ struct iscsi_node_acl *acl = container_of(igrps,
+ struct iscsi_node_acl, node_stat_grps);
+ struct se_node_acl *se_nacl = &acl->se_node_acl;
+ struct iscsi_session *sess;
+ struct se_session *se_sess;
+ ssize_t ret = 0;
+
+ spin_lock_bh(&se_nacl->nacl_sess_lock);
+ se_sess = se_nacl->nacl_sess;
+ if (se_sess) {
+ sess = (struct iscsi_session *)se_sess->fabric_sess_ptr;
+ if (sess)
+ ret = snprintf(page, PAGE_SIZE, "%u\n",
+ sess->conn_timeout_errors);
+ }
+ spin_unlock_bh(&se_nacl->nacl_sess_lock);
+
+ return ret;
+}
+ISCSI_STAT_SESS_RO(conn_timeout_errors);
+
+CONFIGFS_EATTR_OPS(iscsi_stat_sess, iscsi_node_stat_grps,
+ iscsi_sess_stats_group);
+
+static struct configfs_attribute *iscsi_stat_sess_stats_attrs[] = {
+ &iscsi_stat_sess_inst.attr,
+ &iscsi_stat_sess_node.attr,
+ &iscsi_stat_sess_indx.attr,
+ &iscsi_stat_sess_cmd_pdus.attr,
+ &iscsi_stat_sess_rsp_pdus.attr,
+ &iscsi_stat_sess_txdata_octs.attr,
+ &iscsi_stat_sess_rxdata_octs.attr,
+ &iscsi_stat_sess_conn_digest_errors.attr,
+ &iscsi_stat_sess_conn_timeout_errors.attr,
+ NULL,
+};
+
+static struct configfs_item_operations iscsi_stat_sess_stats_item_ops = {
+ .show_attribute = iscsi_stat_sess_attr_show,
+ .store_attribute = iscsi_stat_sess_attr_store,
+};
+
+struct config_item_type iscsi_stat_sess_cit = {
+ .ct_item_ops = &iscsi_stat_sess_stats_item_ops,
+ .ct_attrs = iscsi_stat_sess_stats_attrs,
+ .ct_owner = THIS_MODULE,
+};
diff --git a/drivers/target/iscsi/iscsi_target_stat.h b/drivers/target/iscsi/iscsi_target_stat.h
new file mode 100644
index 0000000..3ff76b4
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_stat.h
@@ -0,0 +1,64 @@
+#ifndef ISCSI_TARGET_STAT_H
+#define ISCSI_TARGET_STAT_H
+
+/*
+ * For struct iscsi_tiqn->tiqn_wwn default groups
+ */
+extern struct config_item_type iscsi_stat_instance_cit;
+extern struct config_item_type iscsi_stat_sess_err_cit;
+extern struct config_item_type iscsi_stat_tgt_attr_cit;
+extern struct config_item_type iscsi_stat_login_cit;
+extern struct config_item_type iscsi_stat_logout_cit;
+
+/*
+ * For struct iscsi_session->se_sess default groups
+ */
+extern struct config_item_type iscsi_stat_sess_cit;
+
+/* iSCSI session error types */
+#define ISCSI_SESS_ERR_UNKNOWN 0
+#define ISCSI_SESS_ERR_DIGEST 1
+#define ISCSI_SESS_ERR_CXN_TIMEOUT 2
+#define ISCSI_SESS_ERR_PDU_FORMAT 3
+
+/* iSCSI session error stats */
+struct iscsi_sess_err_stats {
+ spinlock_t lock;
+ u32 digest_errors;
+ u32 cxn_timeout_errors;
+ u32 pdu_format_errors;
+ u32 last_sess_failure_type;
+ char last_sess_fail_rem_name[224];
+} ____cacheline_aligned;
+
+/* iSCSI login failure types (sub oids) */
+#define ISCSI_LOGIN_FAIL_OTHER 2
+#define ISCSI_LOGIN_FAIL_REDIRECT 3
+#define ISCSI_LOGIN_FAIL_AUTHORIZE 4
+#define ISCSI_LOGIN_FAIL_AUTHENTICATE 5
+#define ISCSI_LOGIN_FAIL_NEGOTIATE 6
+
+/* iSCSI login stats */
+struct iscsi_login_stats {
+ spinlock_t lock;
+ u32 accepts;
+ u32 other_fails;
+ u32 redirects;
+ u32 authorize_fails;
+ u32 authenticate_fails;
+ u32 negotiate_fails; /* used for notifications */
+ u64 last_fail_time; /* time stamp (jiffies) */
+ u32 last_fail_type;
+ int last_intr_fail_ip_family;
+ unsigned char last_intr_fail_ip_addr[IPV6_ADDRESS_SPACE];
+ char last_intr_fail_name[224];
+} ____cacheline_aligned;
+
+/* iSCSI logout stats */
+struct iscsi_logout_stats {
+ spinlock_t lock;
+ u32 normal_logouts;
+ u32 abnormal_logouts;
+} ____cacheline_aligned;
+
+#endif /*** ISCSI_TARGET_STAT_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_tmr.c b/drivers/target/iscsi/iscsi_target_tmr.c
new file mode 100644
index 0000000..db1fe1e
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_tmr.c
@@ -0,0 +1,849 @@
+/*******************************************************************************
+ * This file contains the iSCSI Target specific Task Management functions.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <asm/unaligned.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_seq_pdu_list.h"
+#include "iscsi_target_datain_values.h"
+#include "iscsi_target_device.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target_tmr.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+
+u8 iscsit_tmr_abort_task(
+ struct iscsi_cmd *cmd,
+ unsigned char *buf)
+{
+ struct iscsi_cmd *ref_cmd;
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_tmr_req *tmr_req = cmd->tmr_req;
+ struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req;
+ struct iscsi_tm *hdr = (struct iscsi_tm *) buf;
+
+ ref_cmd = iscsit_find_cmd_from_itt(conn, hdr->rtt);
+ if (!ref_cmd) {
+ pr_err("Unable to locate RefTaskTag: 0x%08x on CID:"
+ " %hu.\n", hdr->rtt, conn->cid);
+ return ((hdr->refcmdsn >= conn->sess->exp_cmd_sn) &&
+ (hdr->refcmdsn <= conn->sess->max_cmd_sn)) ?
+ ISCSI_TMF_RSP_COMPLETE : ISCSI_TMF_RSP_NO_TASK;
+ }
+ if (ref_cmd->cmd_sn != hdr->refcmdsn) {
+ pr_err("RefCmdSN 0x%08x does not equal"
+ " task's CmdSN 0x%08x. Rejecting ABORT_TASK.\n",
+ hdr->refcmdsn, ref_cmd->cmd_sn);
+ return ISCSI_TMF_RSP_REJECTED;
+ }
+
+ se_tmr->ref_task_tag = hdr->rtt;
+ se_tmr->ref_cmd = &ref_cmd->se_cmd;
+ tmr_req->ref_cmd_sn = hdr->refcmdsn;
+ tmr_req->exp_data_sn = hdr->exp_datasn;
+
+ return ISCSI_TMF_RSP_COMPLETE;
+}
+
+/*
+ * Called from iscsit_handle_task_mgt_cmd().
+ */
+int iscsit_tmr_task_warm_reset(
+ struct iscsi_conn *conn,
+ struct iscsi_tmr_req *tmr_req,
+ unsigned char *buf)
+{
+ struct iscsi_session *sess = conn->sess;
+ struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess);
+#if 0
+ struct iscsi_init_task_mgt_cmnd *hdr =
+ (struct iscsi_init_task_mgt_cmnd *) buf;
+#endif
+ if (!na->tmr_warm_reset) {
+ pr_err("TMR Opcode TARGET_WARM_RESET authorization"
+ " failed for Initiator Node: %s\n",
+ sess->se_sess->se_node_acl->initiatorname);
+ return -1;
+ }
+ /*
+ * Do the real work in transport_generic_do_tmr().
+ */
+ return 0;
+}
+
+int iscsit_tmr_task_cold_reset(
+ struct iscsi_conn *conn,
+ struct iscsi_tmr_req *tmr_req,
+ unsigned char *buf)
+{
+ struct iscsi_session *sess = conn->sess;
+ struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess);
+
+ if (!na->tmr_cold_reset) {
+ pr_err("TMR Opcode TARGET_COLD_RESET authorization"
+ " failed for Initiator Node: %s\n",
+ sess->se_sess->se_node_acl->initiatorname);
+ return -1;
+ }
+ /*
+ * Do the real work in transport_generic_do_tmr().
+ */
+ return 0;
+}
+
+u8 iscsit_tmr_task_reassign(
+ struct iscsi_cmd *cmd,
+ unsigned char *buf)
+{
+ struct iscsi_cmd *ref_cmd = NULL;
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_conn_recovery *cr = NULL;
+ struct iscsi_tmr_req *tmr_req = cmd->tmr_req;
+ struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req;
+ struct iscsi_tm *hdr = (struct iscsi_tm *) buf;
+ int ret;
+
+ pr_debug("Got TASK_REASSIGN TMR ITT: 0x%08x,"
+ " RefTaskTag: 0x%08x, ExpDataSN: 0x%08x, CID: %hu\n",
+ hdr->itt, hdr->rtt, hdr->exp_datasn, conn->cid);
+
+ if (conn->sess->sess_ops->ErrorRecoveryLevel != 2) {
+ pr_err("TMR TASK_REASSIGN not supported in ERL<2,"
+ " ignoring request.\n");
+ return ISCSI_TMF_RSP_NOT_SUPPORTED;
+ }
+
+ ret = iscsit_find_cmd_for_recovery(conn->sess, &ref_cmd, &cr, hdr->rtt);
+ if (ret == -2) {
+ pr_err("Command ITT: 0x%08x is still alligent to CID:"
+ " %hu\n", ref_cmd->init_task_tag, cr->cid);
+ return ISCSI_TMF_RSP_TASK_ALLEGIANT;
+ } else if (ret == -1) {
+ pr_err("Unable to locate RefTaskTag: 0x%08x in"
+ " connection recovery command list.\n", hdr->rtt);
+ return ISCSI_TMF_RSP_NO_TASK;
+ }
+ /*
+ * Temporary check to prevent connection recovery for
+ * connections with a differing MaxRecvDataSegmentLength.
+ */
+ if (cr->maxrecvdatasegmentlength !=
+ conn->conn_ops->MaxRecvDataSegmentLength) {
+ pr_err("Unable to perform connection recovery for"
+ " differing MaxRecvDataSegmentLength, rejecting"
+ " TMR TASK_REASSIGN.\n");
+ return ISCSI_TMF_RSP_REJECTED;
+ }
+
+ se_tmr->ref_task_tag = hdr->rtt;
+ se_tmr->ref_cmd = &ref_cmd->se_cmd;
+ se_tmr->ref_task_lun = get_unaligned_le64(&hdr->lun);
+ tmr_req->ref_cmd_sn = hdr->refcmdsn;
+ tmr_req->exp_data_sn = hdr->exp_datasn;
+ tmr_req->conn_recovery = cr;
+ tmr_req->task_reassign = 1;
+ /*
+ * Command can now be reassigned to a new connection.
+ * The task management response must be sent before the
+ * reassignment actually happens. See iscsi_tmr_post_handler().
+ */
+ return ISCSI_TMF_RSP_COMPLETE;
+}
+
+static void iscsit_task_reassign_remove_cmd(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn_recovery *cr,
+ struct iscsi_session *sess)
+{
+ int ret;
+
+ spin_lock(&cr->conn_recovery_cmd_lock);
+ ret = iscsit_remove_cmd_from_connection_recovery(cmd, sess);
+ spin_unlock(&cr->conn_recovery_cmd_lock);
+ if (!ret) {
+ pr_debug("iSCSI connection recovery successful for CID:"
+ " %hu on SID: %u\n", cr->cid, sess->sid);
+ iscsit_remove_active_connection_recovery_entry(cr, sess);
+ }
+}
+
+static int iscsit_task_reassign_complete_nop_out(
+ struct iscsi_tmr_req *tmr_req,
+ struct iscsi_conn *conn)
+{
+ struct se_tmr_req *se_tmr = tmr_req->se_tmr_req;
+ struct se_cmd *se_cmd = se_tmr->ref_cmd;
+ struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+ struct iscsi_conn_recovery *cr;
+
+ if (!cmd->cr) {
+ pr_err("struct iscsi_conn_recovery pointer for ITT: 0x%08x"
+ " is NULL!\n", cmd->init_task_tag);
+ return -1;
+ }
+ cr = cmd->cr;
+
+ /*
+ * Reset the StatSN so a new one for this commands new connection
+ * will be assigned.
+ * Reset the ExpStatSN as well so we may receive Status SNACKs.
+ */
+ cmd->stat_sn = cmd->exp_stat_sn = 0;
+
+ iscsit_task_reassign_remove_cmd(cmd, cr, conn->sess);
+
+ spin_lock_bh(&conn->cmd_lock);
+ list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+ spin_unlock_bh(&conn->cmd_lock);
+
+ cmd->i_state = ISTATE_SEND_NOPIN;
+ iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+ return 0;
+}
+
+static int iscsit_task_reassign_complete_write(
+ struct iscsi_cmd *cmd,
+ struct iscsi_tmr_req *tmr_req)
+{
+ int no_build_r2ts = 0;
+ u32 length = 0, offset = 0;
+ struct iscsi_conn *conn = cmd->conn;
+ struct se_cmd *se_cmd = &cmd->se_cmd;
+ /*
+ * The Initiator must not send a R2T SNACK with a Begrun less than
+ * the TMR TASK_REASSIGN's ExpDataSN.
+ */
+ if (!tmr_req->exp_data_sn) {
+ cmd->cmd_flags &= ~ICF_GOT_DATACK_SNACK;
+ cmd->acked_data_sn = 0;
+ } else {
+ cmd->cmd_flags |= ICF_GOT_DATACK_SNACK;
+ cmd->acked_data_sn = (tmr_req->exp_data_sn - 1);
+ }
+
+ /*
+ * The TMR TASK_REASSIGN's ExpDataSN contains the next R2TSN the
+ * Initiator is expecting. The Target controls all WRITE operations
+ * so if we have received all DataOUT we can safety ignore Initiator.
+ */
+ if (cmd->cmd_flags & ICF_GOT_LAST_DATAOUT) {
+ if (!atomic_read(&cmd->transport_sent)) {
+ pr_debug("WRITE ITT: 0x%08x: t_state: %d"
+ " never sent to transport\n",
+ cmd->init_task_tag, cmd->se_cmd.t_state);
+ return transport_generic_handle_data(se_cmd);
+ }
+
+ cmd->i_state = ISTATE_SEND_STATUS;
+ iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+ return 0;
+ }
+
+ /*
+ * Special case to deal with DataSequenceInOrder=No and Non-Immeidate
+ * Unsolicited DataOut.
+ */
+ if (cmd->unsolicited_data) {
+ cmd->unsolicited_data = 0;
+
+ offset = cmd->next_burst_len = cmd->write_data_done;
+
+ if ((conn->sess->sess_ops->FirstBurstLength - offset) >=
+ cmd->data_length) {
+ no_build_r2ts = 1;
+ length = (cmd->data_length - offset);
+ } else
+ length = (conn->sess->sess_ops->FirstBurstLength - offset);
+
+ spin_lock_bh(&cmd->r2t_lock);
+ if (iscsit_add_r2t_to_list(cmd, offset, length, 0, 0) < 0) {
+ spin_unlock_bh(&cmd->r2t_lock);
+ return -1;
+ }
+ cmd->outstanding_r2ts++;
+ spin_unlock_bh(&cmd->r2t_lock);
+
+ if (no_build_r2ts)
+ return 0;
+ }
+ /*
+ * iscsit_build_r2ts_for_cmd() can handle the rest from here.
+ */
+ return iscsit_build_r2ts_for_cmd(cmd, conn, 2);
+}
+
+static int iscsit_task_reassign_complete_read(
+ struct iscsi_cmd *cmd,
+ struct iscsi_tmr_req *tmr_req)
+{
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_datain_req *dr;
+ struct se_cmd *se_cmd = &cmd->se_cmd;
+ /*
+ * The Initiator must not send a Data SNACK with a BegRun less than
+ * the TMR TASK_REASSIGN's ExpDataSN.
+ */
+ if (!tmr_req->exp_data_sn) {
+ cmd->cmd_flags &= ~ICF_GOT_DATACK_SNACK;
+ cmd->acked_data_sn = 0;
+ } else {
+ cmd->cmd_flags |= ICF_GOT_DATACK_SNACK;
+ cmd->acked_data_sn = (tmr_req->exp_data_sn - 1);
+ }
+
+ if (!atomic_read(&cmd->transport_sent)) {
+ pr_debug("READ ITT: 0x%08x: t_state: %d never sent to"
+ " transport\n", cmd->init_task_tag,
+ cmd->se_cmd.t_state);
+ transport_generic_handle_cdb(se_cmd);
+ return 0;
+ }
+
+ if (!atomic_read(&se_cmd->t_transport_complete)) {
+ pr_err("READ ITT: 0x%08x: t_state: %d, never returned"
+ " from transport\n", cmd->init_task_tag,
+ cmd->se_cmd.t_state);
+ return -1;
+ }
+
+ dr = iscsit_allocate_datain_req();
+ if (!dr)
+ return -1;
+ /*
+ * The TMR TASK_REASSIGN's ExpDataSN contains the next DataSN the
+ * Initiator is expecting.
+ */
+ dr->data_sn = dr->begrun = tmr_req->exp_data_sn;
+ dr->runlength = 0;
+ dr->generate_recovery_values = 1;
+ dr->recovery = DATAIN_CONNECTION_RECOVERY;
+
+ iscsit_attach_datain_req(cmd, dr);
+
+ cmd->i_state = ISTATE_SEND_DATAIN;
+ iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+ return 0;
+}
+
+static int iscsit_task_reassign_complete_none(
+ struct iscsi_cmd *cmd,
+ struct iscsi_tmr_req *tmr_req)
+{
+ struct iscsi_conn *conn = cmd->conn;
+
+ cmd->i_state = ISTATE_SEND_STATUS;
+ iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+ return 0;
+}
+
+static int iscsit_task_reassign_complete_scsi_cmnd(
+ struct iscsi_tmr_req *tmr_req,
+ struct iscsi_conn *conn)
+{
+ struct se_tmr_req *se_tmr = tmr_req->se_tmr_req;
+ struct se_cmd *se_cmd = se_tmr->ref_cmd;
+ struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+ struct iscsi_conn_recovery *cr;
+
+ if (!cmd->cr) {
+ pr_err("struct iscsi_conn_recovery pointer for ITT: 0x%08x"
+ " is NULL!\n", cmd->init_task_tag);
+ return -1;
+ }
+ cr = cmd->cr;
+
+ /*
+ * Reset the StatSN so a new one for this commands new connection
+ * will be assigned.
+ * Reset the ExpStatSN as well so we may receive Status SNACKs.
+ */
+ cmd->stat_sn = cmd->exp_stat_sn = 0;
+
+ iscsit_task_reassign_remove_cmd(cmd, cr, conn->sess);
+
+ spin_lock_bh(&conn->cmd_lock);
+ list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+ spin_unlock_bh(&conn->cmd_lock);
+
+ if (se_cmd->se_cmd_flags & SCF_SENT_CHECK_CONDITION) {
+ cmd->i_state = ISTATE_SEND_STATUS;
+ iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+ return 0;
+ }
+
+ switch (cmd->data_direction) {
+ case DMA_TO_DEVICE:
+ return iscsit_task_reassign_complete_write(cmd, tmr_req);
+ case DMA_FROM_DEVICE:
+ return iscsit_task_reassign_complete_read(cmd, tmr_req);
+ case DMA_NONE:
+ return iscsit_task_reassign_complete_none(cmd, tmr_req);
+ default:
+ pr_err("Unknown cmd->data_direction: 0x%02x\n",
+ cmd->data_direction);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int iscsit_task_reassign_complete(
+ struct iscsi_tmr_req *tmr_req,
+ struct iscsi_conn *conn)
+{
+ struct se_tmr_req *se_tmr = tmr_req->se_tmr_req;
+ struct se_cmd *se_cmd;
+ struct iscsi_cmd *cmd;
+ int ret = 0;
+
+ if (!se_tmr->ref_cmd) {
+ pr_err("TMR Request is missing a RefCmd struct iscsi_cmd.\n");
+ return -1;
+ }
+ se_cmd = se_tmr->ref_cmd;
+ cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+ cmd->conn = conn;
+
+ switch (cmd->iscsi_opcode) {
+ case ISCSI_OP_NOOP_OUT:
+ ret = iscsit_task_reassign_complete_nop_out(tmr_req, conn);
+ break;
+ case ISCSI_OP_SCSI_CMD:
+ ret = iscsit_task_reassign_complete_scsi_cmnd(tmr_req, conn);
+ break;
+ default:
+ pr_err("Illegal iSCSI Opcode 0x%02x during"
+ " command realligence\n", cmd->iscsi_opcode);
+ return -1;
+ }
+
+ if (ret != 0)
+ return ret;
+
+ pr_debug("Completed connection realligence for Opcode: 0x%02x,"
+ " ITT: 0x%08x to CID: %hu.\n", cmd->iscsi_opcode,
+ cmd->init_task_tag, conn->cid);
+
+ return 0;
+}
+
+/*
+ * Handles special after-the-fact actions related to TMRs.
+ * Right now the only one that its really needed for is
+ * connection recovery releated TASK_REASSIGN.
+ */
+extern int iscsit_tmr_post_handler(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
+{
+ struct iscsi_tmr_req *tmr_req = cmd->tmr_req;
+ struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req;
+
+ if (tmr_req->task_reassign &&
+ (se_tmr->response == ISCSI_TMF_RSP_COMPLETE))
+ return iscsit_task_reassign_complete(tmr_req, conn);
+
+ return 0;
+}
+
+/*
+ * Nothing to do here, but leave it for good measure. :-)
+ */
+int iscsit_task_reassign_prepare_read(
+ struct iscsi_tmr_req *tmr_req,
+ struct iscsi_conn *conn)
+{
+ return 0;
+}
+
+static void iscsit_task_reassign_prepare_unsolicited_dataout(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn *conn)
+{
+ int i, j;
+ struct iscsi_pdu *pdu = NULL;
+ struct iscsi_seq *seq = NULL;
+
+ if (conn->sess->sess_ops->DataSequenceInOrder) {
+ cmd->data_sn = 0;
+
+ if (cmd->immediate_data)
+ cmd->r2t_offset += (cmd->first_burst_len -
+ cmd->seq_start_offset);
+
+ if (conn->sess->sess_ops->DataPDUInOrder) {
+ cmd->write_data_done -= (cmd->immediate_data) ?
+ (cmd->first_burst_len -
+ cmd->seq_start_offset) :
+ cmd->first_burst_len;
+ cmd->first_burst_len = 0;
+ return;
+ }
+
+ for (i = 0; i < cmd->pdu_count; i++) {
+ pdu = &cmd->pdu_list[i];
+
+ if (pdu->status != ISCSI_PDU_RECEIVED_OK)
+ continue;
+
+ if ((pdu->offset >= cmd->seq_start_offset) &&
+ ((pdu->offset + pdu->length) <=
+ cmd->seq_end_offset)) {
+ cmd->first_burst_len -= pdu->length;
+ cmd->write_data_done -= pdu->length;
+ pdu->status = ISCSI_PDU_NOT_RECEIVED;
+ }
+ }
+ } else {
+ for (i = 0; i < cmd->seq_count; i++) {
+ seq = &cmd->seq_list[i];
+
+ if (seq->type != SEQTYPE_UNSOLICITED)
+ continue;
+
+ cmd->write_data_done -=
+ (seq->offset - seq->orig_offset);
+ cmd->first_burst_len = 0;
+ seq->data_sn = 0;
+ seq->offset = seq->orig_offset;
+ seq->next_burst_len = 0;
+ seq->status = DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY;
+
+ if (conn->sess->sess_ops->DataPDUInOrder)
+ continue;
+
+ for (j = 0; j < seq->pdu_count; j++) {
+ pdu = &cmd->pdu_list[j+seq->pdu_start];
+
+ if (pdu->status != ISCSI_PDU_RECEIVED_OK)
+ continue;
+
+ pdu->status = ISCSI_PDU_NOT_RECEIVED;
+ }
+ }
+ }
+}
+
+int iscsit_task_reassign_prepare_write(
+ struct iscsi_tmr_req *tmr_req,
+ struct iscsi_conn *conn)
+{
+ struct se_tmr_req *se_tmr = tmr_req->se_tmr_req;
+ struct se_cmd *se_cmd = se_tmr->ref_cmd;
+ struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+ struct iscsi_pdu *pdu = NULL;
+ struct iscsi_r2t *r2t = NULL, *r2t_tmp;
+ int first_incomplete_r2t = 1, i = 0;
+
+ /*
+ * The command was in the process of receiving Unsolicited DataOUT when
+ * the connection failed.
+ */
+ if (cmd->unsolicited_data)
+ iscsit_task_reassign_prepare_unsolicited_dataout(cmd, conn);
+
+ /*
+ * The Initiator is requesting R2Ts starting from zero, skip
+ * checking acknowledged R2Ts and start checking struct iscsi_r2ts
+ * greater than zero.
+ */
+ if (!tmr_req->exp_data_sn)
+ goto drop_unacknowledged_r2ts;
+
+ /*
+ * We now check that the PDUs in DataOUT sequences below
+ * the TMR TASK_REASSIGN ExpDataSN (R2TSN the Initiator is
+ * expecting next) have all the DataOUT they require to complete
+ * the DataOUT sequence. First scan from R2TSN 0 to TMR
+ * TASK_REASSIGN ExpDataSN-1.
+ *
+ * If we have not received all DataOUT in question, we must
+ * make sure to make the appropriate changes to values in
+ * struct iscsi_cmd (and elsewhere depending on session parameters)
+ * so iscsit_build_r2ts_for_cmd() in iscsit_task_reassign_complete_write()
+ * will resend a new R2T for the DataOUT sequences in question.
+ */
+ spin_lock_bh(&cmd->r2t_lock);
+ if (list_empty(&cmd->cmd_r2t_list)) {
+ spin_unlock_bh(&cmd->r2t_lock);
+ return -1;
+ }
+
+ list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) {
+
+ if (r2t->r2t_sn >= tmr_req->exp_data_sn)
+ continue;
+ /*
+ * Safely ignore Recovery R2Ts and R2Ts that have completed
+ * DataOUT sequences.
+ */
+ if (r2t->seq_complete)
+ continue;
+
+ if (r2t->recovery_r2t)
+ continue;
+
+ /*
+ * DataSequenceInOrder=Yes:
+ *
+ * Taking into account the iSCSI implementation requirement of
+ * MaxOutstandingR2T=1 while ErrorRecoveryLevel>0 and
+ * DataSequenceInOrder=Yes, we must take into consideration
+ * the following:
+ *
+ * DataSequenceInOrder=No:
+ *
+ * Taking into account that the Initiator controls the (possibly
+ * random) PDU Order in (possibly random) Sequence Order of
+ * DataOUT the target requests with R2Ts, we must take into
+ * consideration the following:
+ *
+ * DataPDUInOrder=Yes for DataSequenceInOrder=[Yes,No]:
+ *
+ * While processing non-complete R2T DataOUT sequence requests
+ * the Target will re-request only the total sequence length
+ * minus current received offset. This is because we must
+ * assume the initiator will continue sending DataOUT from the
+ * last PDU before the connection failed.
+ *
+ * DataPDUInOrder=No for DataSequenceInOrder=[Yes,No]:
+ *
+ * While processing non-complete R2T DataOUT sequence requests
+ * the Target will re-request the entire DataOUT sequence if
+ * any single PDU is missing from the sequence. This is because
+ * we have no logical method to determine the next PDU offset,
+ * and we must assume the Initiator will be sending any random
+ * PDU offset in the current sequence after TASK_REASSIGN
+ * has completed.
+ */
+ if (conn->sess->sess_ops->DataSequenceInOrder) {
+ if (!first_incomplete_r2t) {
+ cmd->r2t_offset -= r2t->xfer_len;
+ goto next;
+ }
+
+ if (conn->sess->sess_ops->DataPDUInOrder) {
+ cmd->data_sn = 0;
+ cmd->r2t_offset -= (r2t->xfer_len -
+ cmd->next_burst_len);
+ first_incomplete_r2t = 0;
+ goto next;
+ }
+
+ cmd->data_sn = 0;
+ cmd->r2t_offset -= r2t->xfer_len;
+
+ for (i = 0; i < cmd->pdu_count; i++) {
+ pdu = &cmd->pdu_list[i];
+
+ if (pdu->status != ISCSI_PDU_RECEIVED_OK)
+ continue;
+
+ if ((pdu->offset >= r2t->offset) &&
+ (pdu->offset < (r2t->offset +
+ r2t->xfer_len))) {
+ cmd->next_burst_len -= pdu->length;
+ cmd->write_data_done -= pdu->length;
+ pdu->status = ISCSI_PDU_NOT_RECEIVED;
+ }
+ }
+
+ first_incomplete_r2t = 0;
+ } else {
+ struct iscsi_seq *seq;
+
+ seq = iscsit_get_seq_holder(cmd, r2t->offset,
+ r2t->xfer_len);
+ if (!seq) {
+ spin_unlock_bh(&cmd->r2t_lock);
+ return -1;
+ }
+
+ cmd->write_data_done -=
+ (seq->offset - seq->orig_offset);
+ seq->data_sn = 0;
+ seq->offset = seq->orig_offset;
+ seq->next_burst_len = 0;
+ seq->status = DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY;
+
+ cmd->seq_send_order--;
+
+ if (conn->sess->sess_ops->DataPDUInOrder)
+ goto next;
+
+ for (i = 0; i < seq->pdu_count; i++) {
+ pdu = &cmd->pdu_list[i+seq->pdu_start];
+
+ if (pdu->status != ISCSI_PDU_RECEIVED_OK)
+ continue;
+
+ pdu->status = ISCSI_PDU_NOT_RECEIVED;
+ }
+ }
+
+next:
+ cmd->outstanding_r2ts--;
+ }
+ spin_unlock_bh(&cmd->r2t_lock);
+
+ /*
+ * We now drop all unacknowledged R2Ts, ie: ExpDataSN from TMR
+ * TASK_REASSIGN to the last R2T in the list.. We are also careful
+ * to check that the Initiator is not requesting R2Ts for DataOUT
+ * sequences it has already completed.
+ *
+ * Free each R2T in question and adjust values in struct iscsi_cmd
+ * accordingly so iscsit_build_r2ts_for_cmd() do the rest of
+ * the work after the TMR TASK_REASSIGN Response is sent.
+ */
+drop_unacknowledged_r2ts:
+
+ cmd->cmd_flags &= ~ICF_SENT_LAST_R2T;
+ cmd->r2t_sn = tmr_req->exp_data_sn;
+
+ spin_lock_bh(&cmd->r2t_lock);
+ list_for_each_entry_safe(r2t, r2t_tmp, &cmd->cmd_r2t_list, r2t_list) {
+ /*
+ * Skip up to the R2T Sequence number provided by the
+ * iSCSI TASK_REASSIGN TMR
+ */
+ if (r2t->r2t_sn < tmr_req->exp_data_sn)
+ continue;
+
+ if (r2t->seq_complete) {
+ pr_err("Initiator is requesting R2Ts from"
+ " R2TSN: 0x%08x, but R2TSN: 0x%08x, Offset: %u,"
+ " Length: %u is already complete."
+ " BAD INITIATOR ERL=2 IMPLEMENTATION!\n",
+ tmr_req->exp_data_sn, r2t->r2t_sn,
+ r2t->offset, r2t->xfer_len);
+ spin_unlock_bh(&cmd->r2t_lock);
+ return -1;
+ }
+
+ if (r2t->recovery_r2t) {
+ iscsit_free_r2t(r2t, cmd);
+ continue;
+ }
+
+ /* DataSequenceInOrder=Yes:
+ *
+ * Taking into account the iSCSI implementation requirement of
+ * MaxOutstandingR2T=1 while ErrorRecoveryLevel>0 and
+ * DataSequenceInOrder=Yes, it's safe to subtract the R2Ts
+ * entire transfer length from the commands R2T offset marker.
+ *
+ * DataSequenceInOrder=No:
+ *
+ * We subtract the difference from struct iscsi_seq between the
+ * current offset and original offset from cmd->write_data_done
+ * for account for DataOUT PDUs already received. Then reset
+ * the current offset to the original and zero out the current
+ * burst length, to make sure we re-request the entire DataOUT
+ * sequence.
+ */
+ if (conn->sess->sess_ops->DataSequenceInOrder)
+ cmd->r2t_offset -= r2t->xfer_len;
+ else
+ cmd->seq_send_order--;
+
+ cmd->outstanding_r2ts--;
+ iscsit_free_r2t(r2t, cmd);
+ }
+ spin_unlock_bh(&cmd->r2t_lock);
+
+ return 0;
+}
+
+/*
+ * Performs sanity checks TMR TASK_REASSIGN's ExpDataSN for
+ * a given struct iscsi_cmd.
+ */
+int iscsit_check_task_reassign_expdatasn(
+ struct iscsi_tmr_req *tmr_req,
+ struct iscsi_conn *conn)
+{
+ struct se_tmr_req *se_tmr = tmr_req->se_tmr_req;
+ struct se_cmd *se_cmd = se_tmr->ref_cmd;
+ struct iscsi_cmd *ref_cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+
+ if (ref_cmd->iscsi_opcode != ISCSI_OP_SCSI_CMD)
+ return 0;
+
+ if (se_cmd->se_cmd_flags & SCF_SENT_CHECK_CONDITION)
+ return 0;
+
+ if (ref_cmd->data_direction == DMA_NONE)
+ return 0;
+
+ /*
+ * For READs the TMR TASK_REASSIGNs ExpDataSN contains the next DataSN
+ * of DataIN the Initiator is expecting.
+ *
+ * Also check that the Initiator is not re-requesting DataIN that has
+ * already been acknowledged with a DataAck SNACK.
+ */
+ if (ref_cmd->data_direction == DMA_FROM_DEVICE) {
+ if (tmr_req->exp_data_sn > ref_cmd->data_sn) {
+ pr_err("Received ExpDataSN: 0x%08x for READ"
+ " in TMR TASK_REASSIGN greater than command's"
+ " DataSN: 0x%08x.\n", tmr_req->exp_data_sn,
+ ref_cmd->data_sn);
+ return -1;
+ }
+ if ((ref_cmd->cmd_flags & ICF_GOT_DATACK_SNACK) &&
+ (tmr_req->exp_data_sn <= ref_cmd->acked_data_sn)) {
+ pr_err("Received ExpDataSN: 0x%08x for READ"
+ " in TMR TASK_REASSIGN for previously"
+ " acknowledged DataIN: 0x%08x,"
+ " protocol error\n", tmr_req->exp_data_sn,
+ ref_cmd->acked_data_sn);
+ return -1;
+ }
+ return iscsit_task_reassign_prepare_read(tmr_req, conn);
+ }
+
+ /*
+ * For WRITEs the TMR TASK_REASSIGNs ExpDataSN contains the next R2TSN
+ * for R2Ts the Initiator is expecting.
+ *
+ * Do the magic in iscsit_task_reassign_prepare_write().
+ */
+ if (ref_cmd->data_direction == DMA_TO_DEVICE) {
+ if (tmr_req->exp_data_sn > ref_cmd->r2t_sn) {
+ pr_err("Received ExpDataSN: 0x%08x for WRITE"
+ " in TMR TASK_REASSIGN greater than command's"
+ " R2TSN: 0x%08x.\n", tmr_req->exp_data_sn,
+ ref_cmd->r2t_sn);
+ return -1;
+ }
+ return iscsit_task_reassign_prepare_write(tmr_req, conn);
+ }
+
+ pr_err("Unknown iSCSI data_direction: 0x%02x\n",
+ ref_cmd->data_direction);
+
+ return -1;
+}
diff --git a/drivers/target/iscsi/iscsi_target_tmr.h b/drivers/target/iscsi/iscsi_target_tmr.h
new file mode 100644
index 0000000..142e992
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_tmr.h
@@ -0,0 +1,14 @@
+#ifndef ISCSI_TARGET_TMR_H
+#define ISCSI_TARGET_TMR_H
+
+extern u8 iscsit_tmr_abort_task(struct iscsi_cmd *, unsigned char *);
+extern int iscsit_tmr_task_warm_reset(struct iscsi_conn *, struct iscsi_tmr_req *,
+ unsigned char *);
+extern int iscsit_tmr_task_cold_reset(struct iscsi_conn *, struct iscsi_tmr_req *,
+ unsigned char *);
+extern u8 iscsit_tmr_task_reassign(struct iscsi_cmd *, unsigned char *);
+extern int iscsit_tmr_post_handler(struct iscsi_cmd *, struct iscsi_conn *);
+extern int iscsit_check_task_reassign_expdatasn(struct iscsi_tmr_req *,
+ struct iscsi_conn *);
+
+#endif /* ISCSI_TARGET_TMR_H */
diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c
new file mode 100644
index 0000000..d4cf2cd
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_tpg.c
@@ -0,0 +1,759 @@
+/*******************************************************************************
+ * This file contains iSCSI Target Portal Group related functions.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+#include <target/target_core_fabric_ops.h>
+#include <target/target_core_configfs.h>
+#include <target/target_core_tpg.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_login.h"
+#include "iscsi_target_nodeattrib.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+#include "iscsi_target_parameters.h"
+
+struct iscsi_portal_group *iscsit_alloc_portal_group(struct iscsi_tiqn *tiqn, u16 tpgt)
+{
+ struct iscsi_portal_group *tpg;
+
+ tpg = kzalloc(sizeof(struct iscsi_portal_group), GFP_KERNEL);
+ if (!tpg) {
+ pr_err("Unable to allocate struct iscsi_portal_group\n");
+ return NULL;
+ }
+
+ tpg->tpgt = tpgt;
+ tpg->tpg_state = TPG_STATE_FREE;
+ tpg->tpg_tiqn = tiqn;
+ INIT_LIST_HEAD(&tpg->tpg_gnp_list);
+ INIT_LIST_HEAD(&tpg->tpg_list);
+ mutex_init(&tpg->tpg_access_lock);
+ mutex_init(&tpg->np_login_lock);
+ spin_lock_init(&tpg->tpg_state_lock);
+ spin_lock_init(&tpg->tpg_np_lock);
+
+ return tpg;
+}
+
+static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *);
+
+int iscsit_load_discovery_tpg(void)
+{
+ struct iscsi_param *param;
+ struct iscsi_portal_group *tpg;
+ int ret;
+
+ tpg = iscsit_alloc_portal_group(NULL, 1);
+ if (!tpg) {
+ pr_err("Unable to allocate struct iscsi_portal_group\n");
+ return -1;
+ }
+
+ ret = core_tpg_register(
+ &lio_target_fabric_configfs->tf_ops,
+ NULL, &tpg->tpg_se_tpg, (void *)tpg,
+ TRANSPORT_TPG_TYPE_DISCOVERY);
+ if (ret < 0) {
+ kfree(tpg);
+ return -1;
+ }
+
+ tpg->sid = 1; /* First Assigned LIO Session ID */
+ iscsit_set_default_tpg_attribs(tpg);
+
+ if (iscsi_create_default_params(&tpg->param_list) < 0)
+ goto out;
+ /*
+ * By default we disable authentication for discovery sessions,
+ * this can be changed with:
+ *
+ * /sys/kernel/config/target/iscsi/discovery_auth/enforce_discovery_auth
+ */
+ param = iscsi_find_param_from_key(AUTHMETHOD, tpg->param_list);
+ if (!param)
+ goto out;
+
+ if (iscsi_update_param_value(param, "CHAP,None") < 0)
+ goto out;
+
+ tpg->tpg_attrib.authentication = 0;
+
+ spin_lock(&tpg->tpg_state_lock);
+ tpg->tpg_state = TPG_STATE_ACTIVE;
+ spin_unlock(&tpg->tpg_state_lock);
+
+ iscsit_global->discovery_tpg = tpg;
+ pr_debug("CORE[0] - Allocated Discovery TPG\n");
+
+ return 0;
+out:
+ if (tpg->sid == 1)
+ core_tpg_deregister(&tpg->tpg_se_tpg);
+ kfree(tpg);
+ return -1;
+}
+
+void iscsit_release_discovery_tpg(void)
+{
+ struct iscsi_portal_group *tpg = iscsit_global->discovery_tpg;
+
+ if (!tpg)
+ return;
+
+ core_tpg_deregister(&tpg->tpg_se_tpg);
+
+ kfree(tpg);
+ iscsit_global->discovery_tpg = NULL;
+}
+
+struct iscsi_portal_group *iscsit_get_tpg_from_np(
+ struct iscsi_tiqn *tiqn,
+ struct iscsi_np *np)
+{
+ struct iscsi_portal_group *tpg = NULL;
+ struct iscsi_tpg_np *tpg_np;
+
+ spin_lock(&tiqn->tiqn_tpg_lock);
+ list_for_each_entry(tpg, &tiqn->tiqn_tpg_list, tpg_list) {
+
+ spin_lock(&tpg->tpg_state_lock);
+ if (tpg->tpg_state == TPG_STATE_FREE) {
+ spin_unlock(&tpg->tpg_state_lock);
+ continue;
+ }
+ spin_unlock(&tpg->tpg_state_lock);
+
+ spin_lock(&tpg->tpg_np_lock);
+ list_for_each_entry(tpg_np, &tpg->tpg_gnp_list, tpg_np_list) {
+ if (tpg_np->tpg_np == np) {
+ spin_unlock(&tpg->tpg_np_lock);
+ spin_unlock(&tiqn->tiqn_tpg_lock);
+ return tpg;
+ }
+ }
+ spin_unlock(&tpg->tpg_np_lock);
+ }
+ spin_unlock(&tiqn->tiqn_tpg_lock);
+
+ return NULL;
+}
+
+int iscsit_get_tpg(
+ struct iscsi_portal_group *tpg)
+{
+ int ret;
+
+ ret = mutex_lock_interruptible(&tpg->tpg_access_lock);
+ return ((ret != 0) || signal_pending(current)) ? -1 : 0;
+}
+
+void iscsit_put_tpg(struct iscsi_portal_group *tpg)
+{
+ mutex_unlock(&tpg->tpg_access_lock);
+}
+
+static void iscsit_clear_tpg_np_login_thread(
+ struct iscsi_tpg_np *tpg_np,
+ struct iscsi_portal_group *tpg)
+{
+ if (!tpg_np->tpg_np) {
+ pr_err("struct iscsi_tpg_np->tpg_np is NULL!\n");
+ return;
+ }
+
+ iscsit_reset_np_thread(tpg_np->tpg_np, tpg_np, tpg);
+}
+
+void iscsit_clear_tpg_np_login_threads(
+ struct iscsi_portal_group *tpg)
+{
+ struct iscsi_tpg_np *tpg_np;
+
+ spin_lock(&tpg->tpg_np_lock);
+ list_for_each_entry(tpg_np, &tpg->tpg_gnp_list, tpg_np_list) {
+ if (!tpg_np->tpg_np) {
+ pr_err("struct iscsi_tpg_np->tpg_np is NULL!\n");
+ continue;
+ }
+ spin_unlock(&tpg->tpg_np_lock);
+ iscsit_clear_tpg_np_login_thread(tpg_np, tpg);
+ spin_lock(&tpg->tpg_np_lock);
+ }
+ spin_unlock(&tpg->tpg_np_lock);
+}
+
+void iscsit_tpg_dump_params(struct iscsi_portal_group *tpg)
+{
+ iscsi_print_params(tpg->param_list);
+}
+
+static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *tpg)
+{
+ struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+ a->authentication = TA_AUTHENTICATION;
+ a->login_timeout = TA_LOGIN_TIMEOUT;
+ a->netif_timeout = TA_NETIF_TIMEOUT;
+ a->default_cmdsn_depth = TA_DEFAULT_CMDSN_DEPTH;
+ a->generate_node_acls = TA_GENERATE_NODE_ACLS;
+ a->cache_dynamic_acls = TA_CACHE_DYNAMIC_ACLS;
+ a->demo_mode_write_protect = TA_DEMO_MODE_WRITE_PROTECT;
+ a->prod_mode_write_protect = TA_PROD_MODE_WRITE_PROTECT;
+}
+
+int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_group *tpg)
+{
+ if (tpg->tpg_state != TPG_STATE_FREE) {
+ pr_err("Unable to add iSCSI Target Portal Group: %d"
+ " while not in TPG_STATE_FREE state.\n", tpg->tpgt);
+ return -EEXIST;
+ }
+ iscsit_set_default_tpg_attribs(tpg);
+
+ if (iscsi_create_default_params(&tpg->param_list) < 0)
+ goto err_out;
+
+ ISCSI_TPG_ATTRIB(tpg)->tpg = tpg;
+
+ spin_lock(&tpg->tpg_state_lock);
+ tpg->tpg_state = TPG_STATE_INACTIVE;
+ spin_unlock(&tpg->tpg_state_lock);
+
+ spin_lock(&tiqn->tiqn_tpg_lock);
+ list_add_tail(&tpg->tpg_list, &tiqn->tiqn_tpg_list);
+ tiqn->tiqn_ntpgs++;
+ pr_debug("CORE[%s]_TPG[%hu] - Added iSCSI Target Portal Group\n",
+ tiqn->tiqn, tpg->tpgt);
+ spin_unlock(&tiqn->tiqn_tpg_lock);
+
+ return 0;
+err_out:
+ if (tpg->param_list) {
+ iscsi_release_param_list(tpg->param_list);
+ tpg->param_list = NULL;
+ }
+ kfree(tpg);
+ return -ENOMEM;
+}
+
+int iscsit_tpg_del_portal_group(
+ struct iscsi_tiqn *tiqn,
+ struct iscsi_portal_group *tpg,
+ int force)
+{
+ u8 old_state = tpg->tpg_state;
+
+ spin_lock(&tpg->tpg_state_lock);
+ tpg->tpg_state = TPG_STATE_INACTIVE;
+ spin_unlock(&tpg->tpg_state_lock);
+
+ if (iscsit_release_sessions_for_tpg(tpg, force) < 0) {
+ pr_err("Unable to delete iSCSI Target Portal Group:"
+ " %hu while active sessions exist, and force=0\n",
+ tpg->tpgt);
+ tpg->tpg_state = old_state;
+ return -EPERM;
+ }
+
+ core_tpg_clear_object_luns(&tpg->tpg_se_tpg);
+
+ if (tpg->param_list) {
+ iscsi_release_param_list(tpg->param_list);
+ tpg->param_list = NULL;
+ }
+
+ core_tpg_deregister(&tpg->tpg_se_tpg);
+
+ spin_lock(&tpg->tpg_state_lock);
+ tpg->tpg_state = TPG_STATE_FREE;
+ spin_unlock(&tpg->tpg_state_lock);
+
+ spin_lock(&tiqn->tiqn_tpg_lock);
+ tiqn->tiqn_ntpgs--;
+ list_del(&tpg->tpg_list);
+ spin_unlock(&tiqn->tiqn_tpg_lock);
+
+ pr_debug("CORE[%s]_TPG[%hu] - Deleted iSCSI Target Portal Group\n",
+ tiqn->tiqn, tpg->tpgt);
+
+ kfree(tpg);
+ return 0;
+}
+
+int iscsit_tpg_enable_portal_group(struct iscsi_portal_group *tpg)
+{
+ struct iscsi_param *param;
+ struct iscsi_tiqn *tiqn = tpg->tpg_tiqn;
+
+ spin_lock(&tpg->tpg_state_lock);
+ if (tpg->tpg_state == TPG_STATE_ACTIVE) {
+ pr_err("iSCSI target portal group: %hu is already"
+ " active, ignoring request.\n", tpg->tpgt);
+ spin_unlock(&tpg->tpg_state_lock);
+ return -EINVAL;
+ }
+ /*
+ * Make sure that AuthMethod does not contain None as an option
+ * unless explictly disabled. Set the default to CHAP if authentication
+ * is enforced (as per default), and remove the NONE option.
+ */
+ param = iscsi_find_param_from_key(AUTHMETHOD, tpg->param_list);
+ if (!param) {
+ spin_unlock(&tpg->tpg_state_lock);
+ return -ENOMEM;
+ }
+
+ if (ISCSI_TPG_ATTRIB(tpg)->authentication) {
+ if (!strcmp(param->value, NONE))
+ if (iscsi_update_param_value(param, CHAP) < 0) {
+ spin_unlock(&tpg->tpg_state_lock);
+ return -ENOMEM;
+ }
+ if (iscsit_ta_authentication(tpg, 1) < 0) {
+ spin_unlock(&tpg->tpg_state_lock);
+ return -ENOMEM;
+ }
+ }
+
+ tpg->tpg_state = TPG_STATE_ACTIVE;
+ spin_unlock(&tpg->tpg_state_lock);
+
+ spin_lock(&tiqn->tiqn_tpg_lock);
+ tiqn->tiqn_active_tpgs++;
+ pr_debug("iSCSI_TPG[%hu] - Enabled iSCSI Target Portal Group\n",
+ tpg->tpgt);
+ spin_unlock(&tiqn->tiqn_tpg_lock);
+
+ return 0;
+}
+
+int iscsit_tpg_disable_portal_group(struct iscsi_portal_group *tpg, int force)
+{
+ struct iscsi_tiqn *tiqn;
+ u8 old_state = tpg->tpg_state;
+
+ spin_lock(&tpg->tpg_state_lock);
+ if (tpg->tpg_state == TPG_STATE_INACTIVE) {
+ pr_err("iSCSI Target Portal Group: %hu is already"
+ " inactive, ignoring request.\n", tpg->tpgt);
+ spin_unlock(&tpg->tpg_state_lock);
+ return -EINVAL;
+ }
+ tpg->tpg_state = TPG_STATE_INACTIVE;
+ spin_unlock(&tpg->tpg_state_lock);
+
+ iscsit_clear_tpg_np_login_threads(tpg);
+
+ if (iscsit_release_sessions_for_tpg(tpg, force) < 0) {
+ spin_lock(&tpg->tpg_state_lock);
+ tpg->tpg_state = old_state;
+ spin_unlock(&tpg->tpg_state_lock);
+ pr_err("Unable to disable iSCSI Target Portal Group:"
+ " %hu while active sessions exist, and force=0\n",
+ tpg->tpgt);
+ return -EPERM;
+ }
+
+ tiqn = tpg->tpg_tiqn;
+ if (!tiqn || (tpg == iscsit_global->discovery_tpg))
+ return 0;
+
+ spin_lock(&tiqn->tiqn_tpg_lock);
+ tiqn->tiqn_active_tpgs--;
+ pr_debug("iSCSI_TPG[%hu] - Disabled iSCSI Target Portal Group\n",
+ tpg->tpgt);
+ spin_unlock(&tiqn->tiqn_tpg_lock);
+
+ return 0;
+}
+
+struct iscsi_node_attrib *iscsit_tpg_get_node_attrib(
+ struct iscsi_session *sess)
+{
+ struct se_session *se_sess = sess->se_sess;
+ struct se_node_acl *se_nacl = se_sess->se_node_acl;
+ struct iscsi_node_acl *acl = container_of(se_nacl, struct iscsi_node_acl,
+ se_node_acl);
+
+ return &acl->node_attrib;
+}
+
+struct iscsi_tpg_np *iscsit_tpg_locate_child_np(
+ struct iscsi_tpg_np *tpg_np,
+ int network_transport)
+{
+ struct iscsi_tpg_np *tpg_np_child, *tpg_np_child_tmp;
+
+ spin_lock(&tpg_np->tpg_np_parent_lock);
+ list_for_each_entry_safe(tpg_np_child, tpg_np_child_tmp,
+ &tpg_np->tpg_np_parent_list, tpg_np_child_list) {
+ if (tpg_np_child->tpg_np->np_network_transport ==
+ network_transport) {
+ spin_unlock(&tpg_np->tpg_np_parent_lock);
+ return tpg_np_child;
+ }
+ }
+ spin_unlock(&tpg_np->tpg_np_parent_lock);
+
+ return NULL;
+}
+
+struct iscsi_tpg_np *iscsit_tpg_add_network_portal(
+ struct iscsi_portal_group *tpg,
+ struct __kernel_sockaddr_storage *sockaddr,
+ char *ip_str,
+ struct iscsi_tpg_np *tpg_np_parent,
+ int network_transport)
+{
+ struct iscsi_np *np;
+ struct iscsi_tpg_np *tpg_np;
+
+ tpg_np = kzalloc(sizeof(struct iscsi_tpg_np), GFP_KERNEL);
+ if (!tpg_np) {
+ pr_err("Unable to allocate memory for"
+ " struct iscsi_tpg_np.\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ np = iscsit_add_np(sockaddr, ip_str, network_transport);
+ if (IS_ERR(np)) {
+ kfree(tpg_np);
+ return ERR_CAST(np);
+ }
+
+ INIT_LIST_HEAD(&tpg_np->tpg_np_list);
+ INIT_LIST_HEAD(&tpg_np->tpg_np_child_list);
+ INIT_LIST_HEAD(&tpg_np->tpg_np_parent_list);
+ spin_lock_init(&tpg_np->tpg_np_parent_lock);
+ tpg_np->tpg_np = np;
+ tpg_np->tpg = tpg;
+
+ spin_lock(&tpg->tpg_np_lock);
+ list_add_tail(&tpg_np->tpg_np_list, &tpg->tpg_gnp_list);
+ tpg->num_tpg_nps++;
+ if (tpg->tpg_tiqn)
+ tpg->tpg_tiqn->tiqn_num_tpg_nps++;
+ spin_unlock(&tpg->tpg_np_lock);
+
+ if (tpg_np_parent) {
+ tpg_np->tpg_np_parent = tpg_np_parent;
+ spin_lock(&tpg_np_parent->tpg_np_parent_lock);
+ list_add_tail(&tpg_np->tpg_np_child_list,
+ &tpg_np_parent->tpg_np_parent_list);
+ spin_unlock(&tpg_np_parent->tpg_np_parent_lock);
+ }
+
+ pr_debug("CORE[%s] - Added Network Portal: %s:%hu,%hu on %s\n",
+ tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt,
+ (np->np_network_transport == ISCSI_TCP) ? "TCP" : "SCTP");
+
+ return tpg_np;
+}
+
+static int iscsit_tpg_release_np(
+ struct iscsi_tpg_np *tpg_np,
+ struct iscsi_portal_group *tpg,
+ struct iscsi_np *np)
+{
+ iscsit_clear_tpg_np_login_thread(tpg_np, tpg);
+
+ pr_debug("CORE[%s] - Removed Network Portal: %s:%hu,%hu on %s\n",
+ tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt,
+ (np->np_network_transport == ISCSI_TCP) ? "TCP" : "SCTP");
+
+ tpg_np->tpg_np = NULL;
+ tpg_np->tpg = NULL;
+ kfree(tpg_np);
+ /*
+ * iscsit_del_np() will shutdown struct iscsi_np when last TPG reference is released.
+ */
+ return iscsit_del_np(np);
+}
+
+int iscsit_tpg_del_network_portal(
+ struct iscsi_portal_group *tpg,
+ struct iscsi_tpg_np *tpg_np)
+{
+ struct iscsi_np *np;
+ struct iscsi_tpg_np *tpg_np_child, *tpg_np_child_tmp;
+ int ret = 0;
+
+ np = tpg_np->tpg_np;
+ if (!np) {
+ pr_err("Unable to locate struct iscsi_np from"
+ " struct iscsi_tpg_np\n");
+ return -EINVAL;
+ }
+
+ if (!tpg_np->tpg_np_parent) {
+ /*
+ * We are the parent tpg network portal. Release all of the
+ * child tpg_np's (eg: the non ISCSI_TCP ones) on our parent
+ * list first.
+ */
+ list_for_each_entry_safe(tpg_np_child, tpg_np_child_tmp,
+ &tpg_np->tpg_np_parent_list,
+ tpg_np_child_list) {
+ ret = iscsit_tpg_del_network_portal(tpg, tpg_np_child);
+ if (ret < 0)
+ pr_err("iscsit_tpg_del_network_portal()"
+ " failed: %d\n", ret);
+ }
+ } else {
+ /*
+ * We are not the parent ISCSI_TCP tpg network portal. Release
+ * our own network portals from the child list.
+ */
+ spin_lock(&tpg_np->tpg_np_parent->tpg_np_parent_lock);
+ list_del(&tpg_np->tpg_np_child_list);
+ spin_unlock(&tpg_np->tpg_np_parent->tpg_np_parent_lock);
+ }
+
+ spin_lock(&tpg->tpg_np_lock);
+ list_del(&tpg_np->tpg_np_list);
+ tpg->num_tpg_nps--;
+ if (tpg->tpg_tiqn)
+ tpg->tpg_tiqn->tiqn_num_tpg_nps--;
+ spin_unlock(&tpg->tpg_np_lock);
+
+ return iscsit_tpg_release_np(tpg_np, tpg, np);
+}
+
+int iscsit_tpg_set_initiator_node_queue_depth(
+ struct iscsi_portal_group *tpg,
+ unsigned char *initiatorname,
+ u32 queue_depth,
+ int force)
+{
+ return core_tpg_set_initiator_node_queue_depth(&tpg->tpg_se_tpg,
+ initiatorname, queue_depth, force);
+}
+
+int iscsit_ta_authentication(struct iscsi_portal_group *tpg, u32 authentication)
+{
+ unsigned char buf1[256], buf2[256], *none = NULL;
+ int len;
+ struct iscsi_param *param;
+ struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+ if ((authentication != 1) && (authentication != 0)) {
+ pr_err("Illegal value for authentication parameter:"
+ " %u, ignoring request.\n", authentication);
+ return -1;
+ }
+
+ memset(buf1, 0, sizeof(buf1));
+ memset(buf2, 0, sizeof(buf2));
+
+ param = iscsi_find_param_from_key(AUTHMETHOD, tpg->param_list);
+ if (!param)
+ return -EINVAL;
+
+ if (authentication) {
+ snprintf(buf1, sizeof(buf1), "%s", param->value);
+ none = strstr(buf1, NONE);
+ if (!none)
+ goto out;
+ if (!strncmp(none + 4, ",", 1)) {
+ if (!strcmp(buf1, none))
+ sprintf(buf2, "%s", none+5);
+ else {
+ none--;
+ *none = '\0';
+ len = sprintf(buf2, "%s", buf1);
+ none += 5;
+ sprintf(buf2 + len, "%s", none);
+ }
+ } else {
+ none--;
+ *none = '\0';
+ sprintf(buf2, "%s", buf1);
+ }
+ if (iscsi_update_param_value(param, buf2) < 0)
+ return -EINVAL;
+ } else {
+ snprintf(buf1, sizeof(buf1), "%s", param->value);
+ none = strstr(buf1, NONE);
+ if ((none))
+ goto out;
+ strncat(buf1, ",", strlen(","));
+ strncat(buf1, NONE, strlen(NONE));
+ if (iscsi_update_param_value(param, buf1) < 0)
+ return -EINVAL;
+ }
+
+out:
+ a->authentication = authentication;
+ pr_debug("%s iSCSI Authentication Methods for TPG: %hu.\n",
+ a->authentication ? "Enforcing" : "Disabling", tpg->tpgt);
+
+ return 0;
+}
+
+int iscsit_ta_login_timeout(
+ struct iscsi_portal_group *tpg,
+ u32 login_timeout)
+{
+ struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+ if (login_timeout > TA_LOGIN_TIMEOUT_MAX) {
+ pr_err("Requested Login Timeout %u larger than maximum"
+ " %u\n", login_timeout, TA_LOGIN_TIMEOUT_MAX);
+ return -EINVAL;
+ } else if (login_timeout < TA_LOGIN_TIMEOUT_MIN) {
+ pr_err("Requested Logout Timeout %u smaller than"
+ " minimum %u\n", login_timeout, TA_LOGIN_TIMEOUT_MIN);
+ return -EINVAL;
+ }
+
+ a->login_timeout = login_timeout;
+ pr_debug("Set Logout Timeout to %u for Target Portal Group"
+ " %hu\n", a->login_timeout, tpg->tpgt);
+
+ return 0;
+}
+
+int iscsit_ta_netif_timeout(
+ struct iscsi_portal_group *tpg,
+ u32 netif_timeout)
+{
+ struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+ if (netif_timeout > TA_NETIF_TIMEOUT_MAX) {
+ pr_err("Requested Network Interface Timeout %u larger"
+ " than maximum %u\n", netif_timeout,
+ TA_NETIF_TIMEOUT_MAX);
+ return -EINVAL;
+ } else if (netif_timeout < TA_NETIF_TIMEOUT_MIN) {
+ pr_err("Requested Network Interface Timeout %u smaller"
+ " than minimum %u\n", netif_timeout,
+ TA_NETIF_TIMEOUT_MIN);
+ return -EINVAL;
+ }
+
+ a->netif_timeout = netif_timeout;
+ pr_debug("Set Network Interface Timeout to %u for"
+ " Target Portal Group %hu\n", a->netif_timeout, tpg->tpgt);
+
+ return 0;
+}
+
+int iscsit_ta_generate_node_acls(
+ struct iscsi_portal_group *tpg,
+ u32 flag)
+{
+ struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+ if ((flag != 0) && (flag != 1)) {
+ pr_err("Illegal value %d\n", flag);
+ return -EINVAL;
+ }
+
+ a->generate_node_acls = flag;
+ pr_debug("iSCSI_TPG[%hu] - Generate Initiator Portal Group ACLs: %s\n",
+ tpg->tpgt, (a->generate_node_acls) ? "Enabled" : "Disabled");
+
+ return 0;
+}
+
+int iscsit_ta_default_cmdsn_depth(
+ struct iscsi_portal_group *tpg,
+ u32 tcq_depth)
+{
+ struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+ if (tcq_depth > TA_DEFAULT_CMDSN_DEPTH_MAX) {
+ pr_err("Requested Default Queue Depth: %u larger"
+ " than maximum %u\n", tcq_depth,
+ TA_DEFAULT_CMDSN_DEPTH_MAX);
+ return -EINVAL;
+ } else if (tcq_depth < TA_DEFAULT_CMDSN_DEPTH_MIN) {
+ pr_err("Requested Default Queue Depth: %u smaller"
+ " than minimum %u\n", tcq_depth,
+ TA_DEFAULT_CMDSN_DEPTH_MIN);
+ return -EINVAL;
+ }
+
+ a->default_cmdsn_depth = tcq_depth;
+ pr_debug("iSCSI_TPG[%hu] - Set Default CmdSN TCQ Depth to %u\n",
+ tpg->tpgt, a->default_cmdsn_depth);
+
+ return 0;
+}
+
+int iscsit_ta_cache_dynamic_acls(
+ struct iscsi_portal_group *tpg,
+ u32 flag)
+{
+ struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+ if ((flag != 0) && (flag != 1)) {
+ pr_err("Illegal value %d\n", flag);
+ return -EINVAL;
+ }
+
+ a->cache_dynamic_acls = flag;
+ pr_debug("iSCSI_TPG[%hu] - Cache Dynamic Initiator Portal Group"
+ " ACLs %s\n", tpg->tpgt, (a->cache_dynamic_acls) ?
+ "Enabled" : "Disabled");
+
+ return 0;
+}
+
+int iscsit_ta_demo_mode_write_protect(
+ struct iscsi_portal_group *tpg,
+ u32 flag)
+{
+ struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+ if ((flag != 0) && (flag != 1)) {
+ pr_err("Illegal value %d\n", flag);
+ return -EINVAL;
+ }
+
+ a->demo_mode_write_protect = flag;
+ pr_debug("iSCSI_TPG[%hu] - Demo Mode Write Protect bit: %s\n",
+ tpg->tpgt, (a->demo_mode_write_protect) ? "ON" : "OFF");
+
+ return 0;
+}
+
+int iscsit_ta_prod_mode_write_protect(
+ struct iscsi_portal_group *tpg,
+ u32 flag)
+{
+ struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+ if ((flag != 0) && (flag != 1)) {
+ pr_err("Illegal value %d\n", flag);
+ return -EINVAL;
+ }
+
+ a->prod_mode_write_protect = flag;
+ pr_debug("iSCSI_TPG[%hu] - Production Mode Write Protect bit:"
+ " %s\n", tpg->tpgt, (a->prod_mode_write_protect) ?
+ "ON" : "OFF");
+
+ return 0;
+}
diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h
new file mode 100644
index 0000000..dda48c1
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_tpg.h
@@ -0,0 +1,41 @@
+#ifndef ISCSI_TARGET_TPG_H
+#define ISCSI_TARGET_TPG_H
+
+extern struct iscsi_portal_group *iscsit_alloc_portal_group(struct iscsi_tiqn *, u16);
+extern int iscsit_load_discovery_tpg(void);
+extern void iscsit_release_discovery_tpg(void);
+extern struct iscsi_portal_group *iscsit_get_tpg_from_np(struct iscsi_tiqn *,
+ struct iscsi_np *);
+extern int iscsit_get_tpg(struct iscsi_portal_group *);
+extern void iscsit_put_tpg(struct iscsi_portal_group *);
+extern void iscsit_clear_tpg_np_login_threads(struct iscsi_portal_group *);
+extern void iscsit_tpg_dump_params(struct iscsi_portal_group *);
+extern int iscsit_tpg_add_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *);
+extern int iscsit_tpg_del_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *,
+ int);
+extern int iscsit_tpg_enable_portal_group(struct iscsi_portal_group *);
+extern int iscsit_tpg_disable_portal_group(struct iscsi_portal_group *, int);
+extern struct iscsi_node_acl *iscsit_tpg_add_initiator_node_acl(
+ struct iscsi_portal_group *, const char *, u32);
+extern void iscsit_tpg_del_initiator_node_acl(struct iscsi_portal_group *,
+ struct se_node_acl *);
+extern struct iscsi_node_attrib *iscsit_tpg_get_node_attrib(struct iscsi_session *);
+extern void iscsit_tpg_del_external_nps(struct iscsi_tpg_np *);
+extern struct iscsi_tpg_np *iscsit_tpg_locate_child_np(struct iscsi_tpg_np *, int);
+extern struct iscsi_tpg_np *iscsit_tpg_add_network_portal(struct iscsi_portal_group *,
+ struct __kernel_sockaddr_storage *, char *, struct iscsi_tpg_np *,
+ int);
+extern int iscsit_tpg_del_network_portal(struct iscsi_portal_group *,
+ struct iscsi_tpg_np *);
+extern int iscsit_tpg_set_initiator_node_queue_depth(struct iscsi_portal_group *,
+ unsigned char *, u32, int);
+extern int iscsit_ta_authentication(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_login_timeout(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_netif_timeout(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_generate_node_acls(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_default_cmdsn_depth(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_cache_dynamic_acls(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_demo_mode_write_protect(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_prod_mode_write_protect(struct iscsi_portal_group *, u32);
+
+#endif /* ISCSI_TARGET_TPG_H */
diff --git a/drivers/target/iscsi/iscsi_target_tq.c b/drivers/target/iscsi/iscsi_target_tq.c
new file mode 100644
index 0000000..0baac5b
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_tq.c
@@ -0,0 +1,551 @@
+/*******************************************************************************
+ * This file contains the iSCSI Login Thread and Thread Queue functions.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/bitmap.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_tq.h"
+#include "iscsi_target.h"
+
+static LIST_HEAD(active_ts_list);
+static LIST_HEAD(inactive_ts_list);
+static DEFINE_SPINLOCK(active_ts_lock);
+static DEFINE_SPINLOCK(inactive_ts_lock);
+static DEFINE_SPINLOCK(ts_bitmap_lock);
+
+static void iscsi_add_ts_to_active_list(struct iscsi_thread_set *ts)
+{
+ spin_lock(&active_ts_lock);
+ list_add_tail(&ts->ts_list, &active_ts_list);
+ iscsit_global->active_ts++;
+ spin_unlock(&active_ts_lock);
+}
+
+extern void iscsi_add_ts_to_inactive_list(struct iscsi_thread_set *ts)
+{
+ spin_lock(&inactive_ts_lock);
+ list_add_tail(&ts->ts_list, &inactive_ts_list);
+ iscsit_global->inactive_ts++;
+ spin_unlock(&inactive_ts_lock);
+}
+
+static void iscsi_del_ts_from_active_list(struct iscsi_thread_set *ts)
+{
+ spin_lock(&active_ts_lock);
+ list_del(&ts->ts_list);
+ iscsit_global->active_ts--;
+ spin_unlock(&active_ts_lock);
+}
+
+static struct iscsi_thread_set *iscsi_get_ts_from_inactive_list(void)
+{
+ struct iscsi_thread_set *ts;
+
+ spin_lock(&inactive_ts_lock);
+ if (list_empty(&inactive_ts_list)) {
+ spin_unlock(&inactive_ts_lock);
+ return NULL;
+ }
+
+ list_for_each_entry(ts, &inactive_ts_list, ts_list)
+ break;
+
+ list_del(&ts->ts_list);
+ iscsit_global->inactive_ts--;
+ spin_unlock(&inactive_ts_lock);
+
+ return ts;
+}
+
+extern int iscsi_allocate_thread_sets(u32 thread_pair_count)
+{
+ int allocated_thread_pair_count = 0, i, thread_id;
+ struct iscsi_thread_set *ts = NULL;
+
+ for (i = 0; i < thread_pair_count; i++) {
+ ts = kzalloc(sizeof(struct iscsi_thread_set), GFP_KERNEL);
+ if (!ts) {
+ pr_err("Unable to allocate memory for"
+ " thread set.\n");
+ return allocated_thread_pair_count;
+ }
+ /*
+ * Locate the next available regision in the thread_set_bitmap
+ */
+ spin_lock(&ts_bitmap_lock);
+ thread_id = bitmap_find_free_region(iscsit_global->ts_bitmap,
+ iscsit_global->ts_bitmap_count, get_order(1));
+ spin_unlock(&ts_bitmap_lock);
+ if (thread_id < 0) {
+ pr_err("bitmap_find_free_region() failed for"
+ " thread_set_bitmap\n");
+ kfree(ts);
+ return allocated_thread_pair_count;
+ }
+
+ ts->thread_id = thread_id;
+ ts->status = ISCSI_THREAD_SET_FREE;
+ INIT_LIST_HEAD(&ts->ts_list);
+ spin_lock_init(&ts->ts_state_lock);
+ init_completion(&ts->rx_post_start_comp);
+ init_completion(&ts->tx_post_start_comp);
+ init_completion(&ts->rx_restart_comp);
+ init_completion(&ts->tx_restart_comp);
+ init_completion(&ts->rx_start_comp);
+ init_completion(&ts->tx_start_comp);
+
+ ts->create_threads = 1;
+ ts->tx_thread = kthread_run(iscsi_target_tx_thread, ts, "%s",
+ ISCSI_TX_THREAD_NAME);
+ if (IS_ERR(ts->tx_thread)) {
+ dump_stack();
+ pr_err("Unable to start iscsi_target_tx_thread\n");
+ break;
+ }
+
+ ts->rx_thread = kthread_run(iscsi_target_rx_thread, ts, "%s",
+ ISCSI_RX_THREAD_NAME);
+ if (IS_ERR(ts->rx_thread)) {
+ kthread_stop(ts->tx_thread);
+ pr_err("Unable to start iscsi_target_rx_thread\n");
+ break;
+ }
+ ts->create_threads = 0;
+
+ iscsi_add_ts_to_inactive_list(ts);
+ allocated_thread_pair_count++;
+ }
+
+ pr_debug("Spawned %d thread set(s) (%d total threads).\n",
+ allocated_thread_pair_count, allocated_thread_pair_count * 2);
+ return allocated_thread_pair_count;
+}
+
+extern void iscsi_deallocate_thread_sets(void)
+{
+ u32 released_count = 0;
+ struct iscsi_thread_set *ts = NULL;
+
+ while ((ts = iscsi_get_ts_from_inactive_list())) {
+
+ spin_lock_bh(&ts->ts_state_lock);
+ ts->status = ISCSI_THREAD_SET_DIE;
+ spin_unlock_bh(&ts->ts_state_lock);
+
+ if (ts->rx_thread) {
+ send_sig(SIGINT, ts->rx_thread, 1);
+ kthread_stop(ts->rx_thread);
+ }
+ if (ts->tx_thread) {
+ send_sig(SIGINT, ts->tx_thread, 1);
+ kthread_stop(ts->tx_thread);
+ }
+ /*
+ * Release this thread_id in the thread_set_bitmap
+ */
+ spin_lock(&ts_bitmap_lock);
+ bitmap_release_region(iscsit_global->ts_bitmap,
+ ts->thread_id, get_order(1));
+ spin_unlock(&ts_bitmap_lock);
+
+ released_count++;
+ kfree(ts);
+ }
+
+ if (released_count)
+ pr_debug("Stopped %d thread set(s) (%d total threads)."
+ "\n", released_count, released_count * 2);
+}
+
+static void iscsi_deallocate_extra_thread_sets(void)
+{
+ u32 orig_count, released_count = 0;
+ struct iscsi_thread_set *ts = NULL;
+
+ orig_count = TARGET_THREAD_SET_COUNT;
+
+ while ((iscsit_global->inactive_ts + 1) > orig_count) {
+ ts = iscsi_get_ts_from_inactive_list();
+ if (!ts)
+ break;
+
+ spin_lock_bh(&ts->ts_state_lock);
+ ts->status = ISCSI_THREAD_SET_DIE;
+ spin_unlock_bh(&ts->ts_state_lock);
+
+ if (ts->rx_thread) {
+ send_sig(SIGINT, ts->rx_thread, 1);
+ kthread_stop(ts->rx_thread);
+ }
+ if (ts->tx_thread) {
+ send_sig(SIGINT, ts->tx_thread, 1);
+ kthread_stop(ts->tx_thread);
+ }
+ /*
+ * Release this thread_id in the thread_set_bitmap
+ */
+ spin_lock(&ts_bitmap_lock);
+ bitmap_release_region(iscsit_global->ts_bitmap,
+ ts->thread_id, get_order(1));
+ spin_unlock(&ts_bitmap_lock);
+
+ released_count++;
+ kfree(ts);
+ }
+
+ if (released_count) {
+ pr_debug("Stopped %d thread set(s) (%d total threads)."
+ "\n", released_count, released_count * 2);
+ }
+}
+
+void iscsi_activate_thread_set(struct iscsi_conn *conn, struct iscsi_thread_set *ts)
+{
+ iscsi_add_ts_to_active_list(ts);
+
+ spin_lock_bh(&ts->ts_state_lock);
+ conn->thread_set = ts;
+ ts->conn = conn;
+ spin_unlock_bh(&ts->ts_state_lock);
+ /*
+ * Start up the RX thread and wait on rx_post_start_comp. The RX
+ * Thread will then do the same for the TX Thread in
+ * iscsi_rx_thread_pre_handler().
+ */
+ complete(&ts->rx_start_comp);
+ wait_for_completion(&ts->rx_post_start_comp);
+}
+
+struct iscsi_thread_set *iscsi_get_thread_set(void)
+{
+ int allocate_ts = 0;
+ struct completion comp;
+ struct iscsi_thread_set *ts = NULL;
+ /*
+ * If no inactive thread set is available on the first call to
+ * iscsi_get_ts_from_inactive_list(), sleep for a second and
+ * try again. If still none are available after two attempts,
+ * allocate a set ourselves.
+ */
+get_set:
+ ts = iscsi_get_ts_from_inactive_list();
+ if (!ts) {
+ if (allocate_ts == 2)
+ iscsi_allocate_thread_sets(1);
+
+ init_completion(&comp);
+ wait_for_completion_timeout(&comp, 1 * HZ);
+
+ allocate_ts++;
+ goto get_set;
+ }
+
+ ts->delay_inactive = 1;
+ ts->signal_sent = 0;
+ ts->thread_count = 2;
+ init_completion(&ts->rx_restart_comp);
+ init_completion(&ts->tx_restart_comp);
+
+ return ts;
+}
+
+void iscsi_set_thread_clear(struct iscsi_conn *conn, u8 thread_clear)
+{
+ struct iscsi_thread_set *ts = NULL;
+
+ if (!conn->thread_set) {
+ pr_err("struct iscsi_conn->thread_set is NULL\n");
+ return;
+ }
+ ts = conn->thread_set;
+
+ spin_lock_bh(&ts->ts_state_lock);
+ ts->thread_clear &= ~thread_clear;
+
+ if ((thread_clear & ISCSI_CLEAR_RX_THREAD) &&
+ (ts->blocked_threads & ISCSI_BLOCK_RX_THREAD))
+ complete(&ts->rx_restart_comp);
+ else if ((thread_clear & ISCSI_CLEAR_TX_THREAD) &&
+ (ts->blocked_threads & ISCSI_BLOCK_TX_THREAD))
+ complete(&ts->tx_restart_comp);
+ spin_unlock_bh(&ts->ts_state_lock);
+}
+
+void iscsi_set_thread_set_signal(struct iscsi_conn *conn, u8 signal_sent)
+{
+ struct iscsi_thread_set *ts = NULL;
+
+ if (!conn->thread_set) {
+ pr_err("struct iscsi_conn->thread_set is NULL\n");
+ return;
+ }
+ ts = conn->thread_set;
+
+ spin_lock_bh(&ts->ts_state_lock);
+ ts->signal_sent |= signal_sent;
+ spin_unlock_bh(&ts->ts_state_lock);
+}
+
+int iscsi_release_thread_set(struct iscsi_conn *conn)
+{
+ int thread_called = 0;
+ struct iscsi_thread_set *ts = NULL;
+
+ if (!conn || !conn->thread_set) {
+ pr_err("connection or thread set pointer is NULL\n");
+ BUG();
+ }
+ ts = conn->thread_set;
+
+ spin_lock_bh(&ts->ts_state_lock);
+ ts->status = ISCSI_THREAD_SET_RESET;
+
+ if (!strncmp(current->comm, ISCSI_RX_THREAD_NAME,
+ strlen(ISCSI_RX_THREAD_NAME)))
+ thread_called = ISCSI_RX_THREAD;
+ else if (!strncmp(current->comm, ISCSI_TX_THREAD_NAME,
+ strlen(ISCSI_TX_THREAD_NAME)))
+ thread_called = ISCSI_TX_THREAD;
+
+ if (ts->rx_thread && (thread_called == ISCSI_TX_THREAD) &&
+ (ts->thread_clear & ISCSI_CLEAR_RX_THREAD)) {
+
+ if (!(ts->signal_sent & ISCSI_SIGNAL_RX_THREAD)) {
+ send_sig(SIGINT, ts->rx_thread, 1);
+ ts->signal_sent |= ISCSI_SIGNAL_RX_THREAD;
+ }
+ ts->blocked_threads |= ISCSI_BLOCK_RX_THREAD;
+ spin_unlock_bh(&ts->ts_state_lock);
+ wait_for_completion(&ts->rx_restart_comp);
+ spin_lock_bh(&ts->ts_state_lock);
+ ts->blocked_threads &= ~ISCSI_BLOCK_RX_THREAD;
+ }
+ if (ts->tx_thread && (thread_called == ISCSI_RX_THREAD) &&
+ (ts->thread_clear & ISCSI_CLEAR_TX_THREAD)) {
+
+ if (!(ts->signal_sent & ISCSI_SIGNAL_TX_THREAD)) {
+ send_sig(SIGINT, ts->tx_thread, 1);
+ ts->signal_sent |= ISCSI_SIGNAL_TX_THREAD;
+ }
+ ts->blocked_threads |= ISCSI_BLOCK_TX_THREAD;
+ spin_unlock_bh(&ts->ts_state_lock);
+ wait_for_completion(&ts->tx_restart_comp);
+ spin_lock_bh(&ts->ts_state_lock);
+ ts->blocked_threads &= ~ISCSI_BLOCK_TX_THREAD;
+ }
+
+ ts->conn = NULL;
+ ts->status = ISCSI_THREAD_SET_FREE;
+ spin_unlock_bh(&ts->ts_state_lock);
+
+ return 0;
+}
+
+int iscsi_thread_set_force_reinstatement(struct iscsi_conn *conn)
+{
+ struct iscsi_thread_set *ts;
+
+ if (!conn->thread_set)
+ return -1;
+ ts = conn->thread_set;
+
+ spin_lock_bh(&ts->ts_state_lock);
+ if (ts->status != ISCSI_THREAD_SET_ACTIVE) {
+ spin_unlock_bh(&ts->ts_state_lock);
+ return -1;
+ }
+
+ if (ts->tx_thread && (!(ts->signal_sent & ISCSI_SIGNAL_TX_THREAD))) {
+ send_sig(SIGINT, ts->tx_thread, 1);
+ ts->signal_sent |= ISCSI_SIGNAL_TX_THREAD;
+ }
+ if (ts->rx_thread && (!(ts->signal_sent & ISCSI_SIGNAL_RX_THREAD))) {
+ send_sig(SIGINT, ts->rx_thread, 1);
+ ts->signal_sent |= ISCSI_SIGNAL_RX_THREAD;
+ }
+ spin_unlock_bh(&ts->ts_state_lock);
+
+ return 0;
+}
+
+static void iscsi_check_to_add_additional_sets(void)
+{
+ int thread_sets_add;
+
+ spin_lock(&inactive_ts_lock);
+ thread_sets_add = iscsit_global->inactive_ts;
+ spin_unlock(&inactive_ts_lock);
+ if (thread_sets_add == 1)
+ iscsi_allocate_thread_sets(1);
+}
+
+static int iscsi_signal_thread_pre_handler(struct iscsi_thread_set *ts)
+{
+ spin_lock_bh(&ts->ts_state_lock);
+ if ((ts->status == ISCSI_THREAD_SET_DIE) || signal_pending(current)) {
+ spin_unlock_bh(&ts->ts_state_lock);
+ return -1;
+ }
+ spin_unlock_bh(&ts->ts_state_lock);
+
+ return 0;
+}
+
+struct iscsi_conn *iscsi_rx_thread_pre_handler(struct iscsi_thread_set *ts)
+{
+ int ret;
+
+ spin_lock_bh(&ts->ts_state_lock);
+ if (ts->create_threads) {
+ spin_unlock_bh(&ts->ts_state_lock);
+ goto sleep;
+ }
+
+ flush_signals(current);
+
+ if (ts->delay_inactive && (--ts->thread_count == 0)) {
+ spin_unlock_bh(&ts->ts_state_lock);
+ iscsi_del_ts_from_active_list(ts);
+
+ if (!iscsit_global->in_shutdown)
+ iscsi_deallocate_extra_thread_sets();
+
+ iscsi_add_ts_to_inactive_list(ts);
+ spin_lock_bh(&ts->ts_state_lock);
+ }
+
+ if ((ts->status == ISCSI_THREAD_SET_RESET) &&
+ (ts->thread_clear & ISCSI_CLEAR_RX_THREAD))
+ complete(&ts->rx_restart_comp);
+
+ ts->thread_clear &= ~ISCSI_CLEAR_RX_THREAD;
+ spin_unlock_bh(&ts->ts_state_lock);
+sleep:
+ ret = wait_for_completion_interruptible(&ts->rx_start_comp);
+ if (ret != 0)
+ return NULL;
+
+ if (iscsi_signal_thread_pre_handler(ts) < 0)
+ return NULL;
+
+ if (!ts->conn) {
+ pr_err("struct iscsi_thread_set->conn is NULL for"
+ " thread_id: %d, going back to sleep\n", ts->thread_id);
+ goto sleep;
+ }
+ iscsi_check_to_add_additional_sets();
+ /*
+ * The RX Thread starts up the TX Thread and sleeps.
+ */
+ ts->thread_clear |= ISCSI_CLEAR_RX_THREAD;
+ complete(&ts->tx_start_comp);
+ wait_for_completion(&ts->tx_post_start_comp);
+
+ return ts->conn;
+}
+
+struct iscsi_conn *iscsi_tx_thread_pre_handler(struct iscsi_thread_set *ts)
+{
+ int ret;
+
+ spin_lock_bh(&ts->ts_state_lock);
+ if (ts->create_threads) {
+ spin_unlock_bh(&ts->ts_state_lock);
+ goto sleep;
+ }
+
+ flush_signals(current);
+
+ if (ts->delay_inactive && (--ts->thread_count == 0)) {
+ spin_unlock_bh(&ts->ts_state_lock);
+ iscsi_del_ts_from_active_list(ts);
+
+ if (!iscsit_global->in_shutdown)
+ iscsi_deallocate_extra_thread_sets();
+
+ iscsi_add_ts_to_inactive_list(ts);
+ spin_lock_bh(&ts->ts_state_lock);
+ }
+ if ((ts->status == ISCSI_THREAD_SET_RESET) &&
+ (ts->thread_clear & ISCSI_CLEAR_TX_THREAD))
+ complete(&ts->tx_restart_comp);
+
+ ts->thread_clear &= ~ISCSI_CLEAR_TX_THREAD;
+ spin_unlock_bh(&ts->ts_state_lock);
+sleep:
+ ret = wait_for_completion_interruptible(&ts->tx_start_comp);
+ if (ret != 0)
+ return NULL;
+
+ if (iscsi_signal_thread_pre_handler(ts) < 0)
+ return NULL;
+
+ if (!ts->conn) {
+ pr_err("struct iscsi_thread_set->conn is NULL for "
+ " thread_id: %d, going back to sleep\n",
+ ts->thread_id);
+ goto sleep;
+ }
+
+ iscsi_check_to_add_additional_sets();
+ /*
+ * From the TX thread, up the tx_post_start_comp that the RX Thread is
+ * sleeping on in iscsi_rx_thread_pre_handler(), then up the
+ * rx_post_start_comp that iscsi_activate_thread_set() is sleeping on.
+ */
+ ts->thread_clear |= ISCSI_CLEAR_TX_THREAD;
+ complete(&ts->tx_post_start_comp);
+ complete(&ts->rx_post_start_comp);
+
+ spin_lock_bh(&ts->ts_state_lock);
+ ts->status = ISCSI_THREAD_SET_ACTIVE;
+ spin_unlock_bh(&ts->ts_state_lock);
+
+ return ts->conn;
+}
+
+int iscsi_thread_set_init(void)
+{
+ int size;
+
+ iscsit_global->ts_bitmap_count = ISCSI_TS_BITMAP_BITS;
+
+ size = BITS_TO_LONGS(iscsit_global->ts_bitmap_count) * sizeof(long);
+ iscsit_global->ts_bitmap = kzalloc(size, GFP_KERNEL);
+ if (!iscsit_global->ts_bitmap) {
+ pr_err("Unable to allocate iscsit_global->ts_bitmap\n");
+ return -ENOMEM;
+ }
+
+ spin_lock_init(&active_ts_lock);
+ spin_lock_init(&inactive_ts_lock);
+ spin_lock_init(&ts_bitmap_lock);
+ INIT_LIST_HEAD(&active_ts_list);
+ INIT_LIST_HEAD(&inactive_ts_list);
+
+ return 0;
+}
+
+void iscsi_thread_set_free(void)
+{
+ kfree(iscsit_global->ts_bitmap);
+}
diff --git a/drivers/target/iscsi/iscsi_target_tq.h b/drivers/target/iscsi/iscsi_target_tq.h
new file mode 100644
index 0000000..26e6a95
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_tq.h
@@ -0,0 +1,88 @@
+#ifndef ISCSI_THREAD_QUEUE_H
+#define ISCSI_THREAD_QUEUE_H
+
+/*
+ * Defines for thread sets.
+ */
+extern int iscsi_thread_set_force_reinstatement(struct iscsi_conn *);
+extern void iscsi_add_ts_to_inactive_list(struct iscsi_thread_set *);
+extern int iscsi_allocate_thread_sets(u32);
+extern void iscsi_deallocate_thread_sets(void);
+extern void iscsi_activate_thread_set(struct iscsi_conn *, struct iscsi_thread_set *);
+extern struct iscsi_thread_set *iscsi_get_thread_set(void);
+extern void iscsi_set_thread_clear(struct iscsi_conn *, u8);
+extern void iscsi_set_thread_set_signal(struct iscsi_conn *, u8);
+extern int iscsi_release_thread_set(struct iscsi_conn *);
+extern struct iscsi_conn *iscsi_rx_thread_pre_handler(struct iscsi_thread_set *);
+extern struct iscsi_conn *iscsi_tx_thread_pre_handler(struct iscsi_thread_set *);
+extern int iscsi_thread_set_init(void);
+extern void iscsi_thread_set_free(void);
+
+extern int iscsi_target_tx_thread(void *);
+extern int iscsi_target_rx_thread(void *);
+
+#define TARGET_THREAD_SET_COUNT 4
+
+#define ISCSI_RX_THREAD 1
+#define ISCSI_TX_THREAD 2
+#define ISCSI_RX_THREAD_NAME "iscsi_trx"
+#define ISCSI_TX_THREAD_NAME "iscsi_ttx"
+#define ISCSI_BLOCK_RX_THREAD 0x1
+#define ISCSI_BLOCK_TX_THREAD 0x2
+#define ISCSI_CLEAR_RX_THREAD 0x1
+#define ISCSI_CLEAR_TX_THREAD 0x2
+#define ISCSI_SIGNAL_RX_THREAD 0x1
+#define ISCSI_SIGNAL_TX_THREAD 0x2
+
+/* struct iscsi_thread_set->status */
+#define ISCSI_THREAD_SET_FREE 1
+#define ISCSI_THREAD_SET_ACTIVE 2
+#define ISCSI_THREAD_SET_DIE 3
+#define ISCSI_THREAD_SET_RESET 4
+#define ISCSI_THREAD_SET_DEALLOCATE_THREADS 5
+
+/* By default allow a maximum of 32K iSCSI connections */
+#define ISCSI_TS_BITMAP_BITS 32768
+
+struct iscsi_thread_set {
+ /* flags used for blocking and restarting sets */
+ int blocked_threads;
+ /* flag for creating threads */
+ int create_threads;
+ /* flag for delaying readding to inactive list */
+ int delay_inactive;
+ /* status for thread set */
+ int status;
+ /* which threads have had signals sent */
+ int signal_sent;
+ /* flag for which threads exited first */
+ int thread_clear;
+ /* Active threads in the thread set */
+ int thread_count;
+ /* Unique thread ID */
+ u32 thread_id;
+ /* pointer to connection if set is active */
+ struct iscsi_conn *conn;
+ /* used for controlling ts state accesses */
+ spinlock_t ts_state_lock;
+ /* Used for rx side post startup */
+ struct completion rx_post_start_comp;
+ /* Used for tx side post startup */
+ struct completion tx_post_start_comp;
+ /* used for restarting thread queue */
+ struct completion rx_restart_comp;
+ /* used for restarting thread queue */
+ struct completion tx_restart_comp;
+ /* used for normal unused blocking */
+ struct completion rx_start_comp;
+ /* used for normal unused blocking */
+ struct completion tx_start_comp;
+ /* OS descriptor for rx thread */
+ struct task_struct *rx_thread;
+ /* OS descriptor for tx thread */
+ struct task_struct *tx_thread;
+ /* struct iscsi_thread_set in list list head*/
+ struct list_head ts_list;
+};
+
+#endif /*** ISCSI_THREAD_QUEUE_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
new file mode 100644
index 0000000..a1acb01
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -0,0 +1,1819 @@
+/*******************************************************************************
+ * This file contains the iSCSI Target specific utility functions.
+ *
+ * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ ******************************************************************************/
+
+#include <linux/list.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/iscsi_proto.h>
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+#include <target/target_core_tmr.h>
+#include <target/target_core_fabric_ops.h>
+#include <target/target_core_configfs.h>
+
+#include "iscsi_target_core.h"
+#include "iscsi_target_parameters.h"
+#include "iscsi_target_seq_pdu_list.h"
+#include "iscsi_target_datain_values.h"
+#include "iscsi_target_erl0.h"
+#include "iscsi_target_erl1.h"
+#include "iscsi_target_erl2.h"
+#include "iscsi_target_tpg.h"
+#include "iscsi_target_tq.h"
+#include "iscsi_target_util.h"
+#include "iscsi_target.h"
+
+#define PRINT_BUFF(buff, len) \
+{ \
+ int zzz; \
+ \
+ pr_debug("%d:\n", __LINE__); \
+ for (zzz = 0; zzz < len; zzz++) { \
+ if (zzz % 16 == 0) { \
+ if (zzz) \
+ pr_debug("\n"); \
+ pr_debug("%4i: ", zzz); \
+ } \
+ pr_debug("%02x ", (unsigned char) (buff)[zzz]); \
+ } \
+ if ((len + 1) % 16) \
+ pr_debug("\n"); \
+}
+
+extern struct list_head g_tiqn_list;
+extern spinlock_t tiqn_lock;
+
+/*
+ * Called with cmd->r2t_lock held.
+ */
+int iscsit_add_r2t_to_list(
+ struct iscsi_cmd *cmd,
+ u32 offset,
+ u32 xfer_len,
+ int recovery,
+ u32 r2t_sn)
+{
+ struct iscsi_r2t *r2t;
+
+ r2t = kmem_cache_zalloc(lio_r2t_cache, GFP_ATOMIC);
+ if (!r2t) {
+ pr_err("Unable to allocate memory for struct iscsi_r2t.\n");
+ return -1;
+ }
+ INIT_LIST_HEAD(&r2t->r2t_list);
+
+ r2t->recovery_r2t = recovery;
+ r2t->r2t_sn = (!r2t_sn) ? cmd->r2t_sn++ : r2t_sn;
+ r2t->offset = offset;
+ r2t->xfer_len = xfer_len;
+ list_add_tail(&r2t->r2t_list, &cmd->cmd_r2t_list);
+ spin_unlock_bh(&cmd->r2t_lock);
+
+ iscsit_add_cmd_to_immediate_queue(cmd, cmd->conn, ISTATE_SEND_R2T);
+
+ spin_lock_bh(&cmd->r2t_lock);
+ return 0;
+}
+
+struct iscsi_r2t *iscsit_get_r2t_for_eos(
+ struct iscsi_cmd *cmd,
+ u32 offset,
+ u32 length)
+{
+ struct iscsi_r2t *r2t;
+
+ spin_lock_bh(&cmd->r2t_lock);
+ list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) {
+ if ((r2t->offset <= offset) &&
+ (r2t->offset + r2t->xfer_len) >= (offset + length)) {
+ spin_unlock_bh(&cmd->r2t_lock);
+ return r2t;
+ }
+ }
+ spin_unlock_bh(&cmd->r2t_lock);
+
+ pr_err("Unable to locate R2T for Offset: %u, Length:"
+ " %u\n", offset, length);
+ return NULL;
+}
+
+struct iscsi_r2t *iscsit_get_r2t_from_list(struct iscsi_cmd *cmd)
+{
+ struct iscsi_r2t *r2t;
+
+ spin_lock_bh(&cmd->r2t_lock);
+ list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) {
+ if (!r2t->sent_r2t) {
+ spin_unlock_bh(&cmd->r2t_lock);
+ return r2t;
+ }
+ }
+ spin_unlock_bh(&cmd->r2t_lock);
+
+ pr_err("Unable to locate next R2T to send for ITT:"
+ " 0x%08x.\n", cmd->init_task_tag);
+ return NULL;
+}
+
+/*
+ * Called with cmd->r2t_lock held.
+ */
+void iscsit_free_r2t(struct iscsi_r2t *r2t, struct iscsi_cmd *cmd)
+{
+ list_del(&r2t->r2t_list);
+ kmem_cache_free(lio_r2t_cache, r2t);
+}
+
+void iscsit_free_r2ts_from_list(struct iscsi_cmd *cmd)
+{
+ struct iscsi_r2t *r2t, *r2t_tmp;
+
+ spin_lock_bh(&cmd->r2t_lock);
+ list_for_each_entry_safe(r2t, r2t_tmp, &cmd->cmd_r2t_list, r2t_list)
+ iscsit_free_r2t(r2t, cmd);
+ spin_unlock_bh(&cmd->r2t_lock);
+}
+
+/*
+ * May be called from software interrupt (timer) context for allocating
+ * iSCSI NopINs.
+ */
+struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *conn, gfp_t gfp_mask)
+{
+ struct iscsi_cmd *cmd;
+
+ cmd = kmem_cache_zalloc(lio_cmd_cache, gfp_mask);
+ if (!cmd) {
+ pr_err("Unable to allocate memory for struct iscsi_cmd.\n");
+ return NULL;
+ }
+
+ cmd->conn = conn;
+ INIT_LIST_HEAD(&cmd->i_list);
+ INIT_LIST_HEAD(&cmd->datain_list);
+ INIT_LIST_HEAD(&cmd->cmd_r2t_list);
+ init_completion(&cmd->reject_comp);
+ spin_lock_init(&cmd->datain_lock);
+ spin_lock_init(&cmd->dataout_timeout_lock);
+ spin_lock_init(&cmd->istate_lock);
+ spin_lock_init(&cmd->error_lock);
+ spin_lock_init(&cmd->r2t_lock);
+
+ return cmd;
+}
+
+/*
+ * Called from iscsi_handle_scsi_cmd()
+ */
+struct iscsi_cmd *iscsit_allocate_se_cmd(
+ struct iscsi_conn *conn,
+ u32 data_length,
+ int data_direction,
+ int iscsi_task_attr)
+{
+ struct iscsi_cmd *cmd;
+ struct se_cmd *se_cmd;
+ int sam_task_attr;
+
+ cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+ if (!cmd)
+ return NULL;
+
+ cmd->data_direction = data_direction;
+ cmd->data_length = data_length;
+ /*
+ * Figure out the SAM Task Attribute for the incoming SCSI CDB
+ */
+ if ((iscsi_task_attr == ISCSI_ATTR_UNTAGGED) ||
+ (iscsi_task_attr == ISCSI_ATTR_SIMPLE))
+ sam_task_attr = MSG_SIMPLE_TAG;
+ else if (iscsi_task_attr == ISCSI_ATTR_ORDERED)
+ sam_task_attr = MSG_ORDERED_TAG;
+ else if (iscsi_task_attr == ISCSI_ATTR_HEAD_OF_QUEUE)
+ sam_task_attr = MSG_HEAD_TAG;
+ else if (iscsi_task_attr == ISCSI_ATTR_ACA)
+ sam_task_attr = MSG_ACA_TAG;
+ else {
+ pr_debug("Unknown iSCSI Task Attribute: 0x%02x, using"
+ " MSG_SIMPLE_TAG\n", iscsi_task_attr);
+ sam_task_attr = MSG_SIMPLE_TAG;
+ }
+
+ se_cmd = &cmd->se_cmd;
+ /*
+ * Initialize struct se_cmd descriptor from target_core_mod infrastructure
+ */
+ transport_init_se_cmd(se_cmd, &lio_target_fabric_configfs->tf_ops,
+ conn->sess->se_sess, data_length, data_direction,
+ sam_task_attr, &cmd->sense_buffer[0]);
+ return cmd;
+}
+
+struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr(
+ struct iscsi_conn *conn,
+ u8 function)
+{
+ struct iscsi_cmd *cmd;
+ struct se_cmd *se_cmd;
+ u8 tcm_function;
+
+ cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+ if (!cmd)
+ return NULL;
+
+ cmd->data_direction = DMA_NONE;
+
+ cmd->tmr_req = kzalloc(sizeof(struct iscsi_tmr_req), GFP_KERNEL);
+ if (!cmd->tmr_req) {
+ pr_err("Unable to allocate memory for"
+ " Task Management command!\n");
+ return NULL;
+ }
+ /*
+ * TASK_REASSIGN for ERL=2 / connection stays inside of
+ * LIO-Target $FABRIC_MOD
+ */
+ if (function == ISCSI_TM_FUNC_TASK_REASSIGN)
+ return cmd;
+
+ se_cmd = &cmd->se_cmd;
+ /*
+ * Initialize struct se_cmd descriptor from target_core_mod infrastructure
+ */
+ transport_init_se_cmd(se_cmd, &lio_target_fabric_configfs->tf_ops,
+ conn->sess->se_sess, 0, DMA_NONE,
+ MSG_SIMPLE_TAG, &cmd->sense_buffer[0]);
+
+ switch (function) {
+ case ISCSI_TM_FUNC_ABORT_TASK:
+ tcm_function = TMR_ABORT_TASK;
+ break;
+ case ISCSI_TM_FUNC_ABORT_TASK_SET:
+ tcm_function = TMR_ABORT_TASK_SET;
+ break;
+ case ISCSI_TM_FUNC_CLEAR_ACA:
+ tcm_function = TMR_CLEAR_ACA;
+ break;
+ case ISCSI_TM_FUNC_CLEAR_TASK_SET:
+ tcm_function = TMR_CLEAR_TASK_SET;
+ break;
+ case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET:
+ tcm_function = TMR_LUN_RESET;
+ break;
+ case ISCSI_TM_FUNC_TARGET_WARM_RESET:
+ tcm_function = TMR_TARGET_WARM_RESET;
+ break;
+ case ISCSI_TM_FUNC_TARGET_COLD_RESET:
+ tcm_function = TMR_TARGET_COLD_RESET;
+ break;
+ default:
+ pr_err("Unknown iSCSI TMR Function:"
+ " 0x%02x\n", function);
+ goto out;
+ }
+
+ se_cmd->se_tmr_req = core_tmr_alloc_req(se_cmd,
+ (void *)cmd->tmr_req, tcm_function);
+ if (!se_cmd->se_tmr_req)
+ goto out;
+
+ cmd->tmr_req->se_tmr_req = se_cmd->se_tmr_req;
+
+ return cmd;
+out:
+ iscsit_release_cmd(cmd);
+ if (se_cmd)
+ transport_free_se_cmd(se_cmd);
+ return NULL;
+}
+
+int iscsit_decide_list_to_build(
+ struct iscsi_cmd *cmd,
+ u32 immediate_data_length)
+{
+ struct iscsi_build_list bl;
+ struct iscsi_conn *conn = cmd->conn;
+ struct iscsi_session *sess = conn->sess;
+ struct iscsi_node_attrib *na;
+
+ if (sess->sess_ops->DataSequenceInOrder &&
+ sess->sess_ops->DataPDUInOrder)
+ return 0;
+
+ if (cmd->data_direction == DMA_NONE)
+ return 0;
+
+ na = iscsit_tpg_get_node_attrib(sess);
+ memset(&bl, 0, sizeof(struct iscsi_build_list));
+
+ if (cmd->data_direction == DMA_FROM_DEVICE) {
+ bl.data_direction = ISCSI_PDU_READ;
+ bl.type = PDULIST_NORMAL;
+ if (na->random_datain_pdu_offsets)
+ bl.randomize |= RANDOM_DATAIN_PDU_OFFSETS;
+ if (na->random_datain_seq_offsets)
+ bl.randomize |= RANDOM_DATAIN_SEQ_OFFSETS;
+ } else {
+ bl.data_direction = ISCSI_PDU_WRITE;
+ bl.immediate_data_length = immediate_data_length;
+ if (na->random_r2t_offsets)
+ bl.randomize |= RANDOM_R2T_OFFSETS;
+
+ if (!cmd->immediate_data && !cmd->unsolicited_data)
+ bl.type = PDULIST_NORMAL;
+ else if (cmd->immediate_data && !cmd->unsolicited_data)
+ bl.type = PDULIST_IMMEDIATE;
+ else if (!cmd->immediate_data && cmd->unsolicited_data)
+ bl.type = PDULIST_UNSOLICITED;
+ else if (cmd->immediate_data && cmd->unsolicited_data)
+ bl.type = PDULIST_IMMEDIATE_AND_UNSOLICITED;
+ }
+
+ return iscsit_do_build_list(cmd, &bl);
+}
+
+struct iscsi_seq *iscsit_get_seq_holder_for_datain(
+ struct iscsi_cmd *cmd,
+ u32 seq_send_order)
+{
+ u32 i;
+
+ for (i = 0; i < cmd->seq_count; i++)
+ if (cmd->seq_list[i].seq_send_order == seq_send_order)
+ return &cmd->seq_list[i];
+
+ return NULL;
+}
+
+struct iscsi_seq *iscsit_get_seq_holder_for_r2t(struct iscsi_cmd *cmd)
+{
+ u32 i;
+
+ if (!cmd->seq_list) {
+ pr_err("struct iscsi_cmd->seq_list is NULL!\n");
+ return NULL;
+ }
+
+ for (i = 0; i < cmd->seq_count; i++) {
+ if (cmd->seq_list[i].type != SEQTYPE_NORMAL)
+ continue;
+ if (cmd->seq_list[i].seq_send_order == cmd->seq_send_order) {
+ cmd->seq_send_order++;
+ return &cmd->seq_list[i];
+ }
+ }
+
+ return NULL;
+}
+
+struct iscsi_r2t *iscsit_get_holder_for_r2tsn(
+ struct iscsi_cmd *cmd,
+ u32 r2t_sn)
+{
+ struct iscsi_r2t *r2t;
+
+ spin_lock_bh(&cmd->r2t_lock);
+ list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) {
+ if (r2t->r2t_sn == r2t_sn) {
+ spin_unlock_bh(&cmd->r2t_lock);
+ return r2t;
+ }
+ }
+ spin_unlock_bh(&cmd->r2t_lock);
+
+ return NULL;
+}
+
+static inline int iscsit_check_received_cmdsn(struct iscsi_session *sess, u32 cmdsn)
+{
+ int ret;
+
+ /*
+ * This is the proper method of checking received CmdSN against
+ * ExpCmdSN and MaxCmdSN values, as well as accounting for out
+ * or order CmdSNs due to multiple connection sessions and/or
+ * CRC failures.
+ */
+ if (iscsi_sna_gt(cmdsn, sess->max_cmd_sn)) {
+ pr_err("Received CmdSN: 0x%08x is greater than"
+ " MaxCmdSN: 0x%08x, protocol error.\n", cmdsn,
+ sess->max_cmd_sn);
+ ret = CMDSN_ERROR_CANNOT_RECOVER;
+
+ } else if (cmdsn == sess->exp_cmd_sn) {
+ sess->exp_cmd_sn++;
+ pr_debug("Received CmdSN matches ExpCmdSN,"
+ " incremented ExpCmdSN to: 0x%08x\n",
+ sess->exp_cmd_sn);
+ ret = CMDSN_NORMAL_OPERATION;
+
+ } else if (iscsi_sna_gt(cmdsn, sess->exp_cmd_sn)) {
+ pr_debug("Received CmdSN: 0x%08x is greater"
+ " than ExpCmdSN: 0x%08x, not acknowledging.\n",
+ cmdsn, sess->exp_cmd_sn);
+ ret = CMDSN_HIGHER_THAN_EXP;
+
+ } else {
+ pr_err("Received CmdSN: 0x%08x is less than"
+ " ExpCmdSN: 0x%08x, ignoring.\n", cmdsn,
+ sess->exp_cmd_sn);
+ ret = CMDSN_LOWER_THAN_EXP;
+ }
+
+ return ret;
+}
+
+/*
+ * Commands may be received out of order if MC/S is in use.
+ * Ensure they are executed in CmdSN order.
+ */
+int iscsit_sequence_cmd(
+ struct iscsi_conn *conn,
+ struct iscsi_cmd *cmd,
+ u32 cmdsn)
+{
+ int ret;
+ int cmdsn_ret;
+
+ mutex_lock(&conn->sess->cmdsn_mutex);
+
+ cmdsn_ret = iscsit_check_received_cmdsn(conn->sess, cmdsn);
+ switch (cmdsn_ret) {
+ case CMDSN_NORMAL_OPERATION:
+ ret = iscsit_execute_cmd(cmd, 0);
+ if ((ret >= 0) && !list_empty(&conn->sess->sess_ooo_cmdsn_list))
+ iscsit_execute_ooo_cmdsns(conn->sess);
+ break;
+ case CMDSN_HIGHER_THAN_EXP:
+ ret = iscsit_handle_ooo_cmdsn(conn->sess, cmd, cmdsn);
+ break;
+ case CMDSN_LOWER_THAN_EXP:
+ cmd->i_state = ISTATE_REMOVE;
+ iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state);
+ ret = cmdsn_ret;
+ break;
+ default:
+ ret = cmdsn_ret;
+ break;
+ }
+ mutex_unlock(&conn->sess->cmdsn_mutex);
+
+ return ret;
+}
+
+int iscsit_check_unsolicited_dataout(struct iscsi_cmd *cmd, unsigned char *buf)
+{
+ struct iscsi_conn *conn = cmd->conn;
+ struct se_cmd *se_cmd = &cmd->se_cmd;
+ struct iscsi_data *hdr = (struct iscsi_data *) buf;
+ u32 payload_length = ntoh24(hdr->dlength);
+
+ if (conn->sess->sess_ops->InitialR2T) {
+ pr_err("Received unexpected unsolicited data"
+ " while InitialR2T=Yes, protocol error.\n");
+ transport_send_check_condition_and_sense(se_cmd,
+ TCM_UNEXPECTED_UNSOLICITED_DATA, 0);
+ return -1;
+ }
+
+ if ((cmd->first_burst_len + payload_length) >
+ conn->sess->sess_ops->FirstBurstLength) {
+ pr_err("Total %u bytes exceeds FirstBurstLength: %u"
+ " for this Unsolicited DataOut Burst.\n",
+ (cmd->first_burst_len + payload_length),
+ conn->sess->sess_ops->FirstBurstLength);
+ transport_send_check_condition_and_sense(se_cmd,
+ TCM_INCORRECT_AMOUNT_OF_DATA, 0);
+ return -1;
+ }
+
+ if (!(hdr->flags & ISCSI_FLAG_CMD_FINAL))
+ return 0;
+
+ if (((cmd->first_burst_len + payload_length) != cmd->data_length) &&
+ ((cmd->first_burst_len + payload_length) !=
+ conn->sess->sess_ops->FirstBurstLength)) {
+ pr_err("Unsolicited non-immediate data received %u"
+ " does not equal FirstBurstLength: %u, and does"
+ " not equal ExpXferLen %u.\n",
+ (cmd->first_burst_len + payload_length),
+ conn->sess->sess_ops->FirstBurstLength, cmd->data_length);
+ transport_send_check_condition_and_sense(se_cmd,
+ TCM_INCORRECT_AMOUNT_OF_DATA, 0);
+ return -1;
+ }
+ return 0;
+}
+
+struct iscsi_cmd *iscsit_find_cmd_from_itt(
+ struct iscsi_conn *conn,
+ u32 init_task_tag)
+{
+ struct iscsi_cmd *cmd;
+
+ spin_lock_bh(&conn->cmd_lock);
+ list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) {
+ if (cmd->init_task_tag == init_task_tag) {
+ spin_unlock_bh(&conn->cmd_lock);
+ return cmd;
+ }
+ }
+ spin_unlock_bh(&conn->cmd_lock);
+
+ pr_err("Unable to locate ITT: 0x%08x on CID: %hu",
+ init_task_tag, conn->cid);
+ return NULL;
+}
+
+struct iscsi_cmd *iscsit_find_cmd_from_itt_or_dump(
+ struct iscsi_conn *conn,
+ u32 init_task_tag,
+ u32 length)
+{
+ struct iscsi_cmd *cmd;
+
+ spin_lock_bh(&conn->cmd_lock);
+ list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) {
+ if (cmd->init_task_tag == init_task_tag) {
+ spin_unlock_bh(&conn->cmd_lock);
+ return cmd;
+ }
+ }
+ spin_unlock_bh(&conn->cmd_lock);
+
+ pr_err("Unable to locate ITT: 0x%08x on CID: %hu,"
+ " dumping payload\n", init_task_tag, conn->cid);
+ if (length)
+ iscsit_dump_data_payload(conn, length, 1);
+
+ return NULL;
+}
+
+struct iscsi_cmd *iscsit_find_cmd_from_ttt(
+ struct iscsi_conn *conn,
+ u32 targ_xfer_tag)
+{
+ struct iscsi_cmd *cmd = NULL;
+
+ spin_lock_bh(&conn->cmd_lock);
+ list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) {
+ if (cmd->targ_xfer_tag == targ_xfer_tag) {
+ spin_unlock_bh(&conn->cmd_lock);
+ return cmd;
+ }
+ }
+ spin_unlock_bh(&conn->cmd_lock);
+
+ pr_err("Unable to locate TTT: 0x%08x on CID: %hu\n",
+ targ_xfer_tag, conn->cid);
+ return NULL;
+}
+
+int iscsit_find_cmd_for_recovery(
+ struct iscsi_session *sess,
+ struct iscsi_cmd **cmd_ptr,
+ struct iscsi_conn_recovery **cr_ptr,
+ u32 init_task_tag)
+{
+ struct iscsi_cmd *cmd = NULL;
+ struct iscsi_conn_recovery *cr;
+ /*
+ * Scan through the inactive connection recovery list's command list.
+ * If init_task_tag matches the command is still alligent.
+ */
+ spin_lock(&sess->cr_i_lock);
+ list_for_each_entry(cr, &sess->cr_inactive_list, cr_list) {
+ spin_lock(&cr->conn_recovery_cmd_lock);
+ list_for_each_entry(cmd, &cr->conn_recovery_cmd_list, i_list) {
+ if (cmd->init_task_tag == init_task_tag) {
+ spin_unlock(&cr->conn_recovery_cmd_lock);
+ spin_unlock(&sess->cr_i_lock);
+
+ *cr_ptr = cr;
+ *cmd_ptr = cmd;
+ return -2;
+ }
+ }
+ spin_unlock(&cr->conn_recovery_cmd_lock);
+ }
+ spin_unlock(&sess->cr_i_lock);
+ /*
+ * Scan through the active connection recovery list's command list.
+ * If init_task_tag matches the command is ready to be reassigned.
+ */
+ spin_lock(&sess->cr_a_lock);
+ list_for_each_entry(cr, &sess->cr_active_list, cr_list) {
+ spin_lock(&cr->conn_recovery_cmd_lock);
+ list_for_each_entry(cmd, &cr->conn_recovery_cmd_list, i_list) {
+ if (cmd->init_task_tag == init_task_tag) {
+ spin_unlock(&cr->conn_recovery_cmd_lock);
+ spin_unlock(&sess->cr_a_lock);
+
+ *cr_ptr = cr;
+ *cmd_ptr = cmd;
+ return 0;
+ }
+ }
+ spin_unlock(&cr->conn_recovery_cmd_lock);
+ }
+ spin_unlock(&sess->cr_a_lock);
+
+ return -1;
+}
+
+void iscsit_add_cmd_to_immediate_queue(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn *conn,
+ u8 state)
+{
+ struct iscsi_queue_req *qr;
+
+ qr = kmem_cache_zalloc(lio_qr_cache, GFP_ATOMIC);
+ if (!qr) {
+ pr_err("Unable to allocate memory for"
+ " struct iscsi_queue_req\n");
+ return;
+ }
+ INIT_LIST_HEAD(&qr->qr_list);
+ qr->cmd = cmd;
+ qr->state = state;
+
+ spin_lock_bh(&conn->immed_queue_lock);
+ list_add_tail(&qr->qr_list, &conn->immed_queue_list);
+ atomic_inc(&cmd->immed_queue_count);
+ atomic_set(&conn->check_immediate_queue, 1);
+ spin_unlock_bh(&conn->immed_queue_lock);
+
+ wake_up_process(conn->thread_set->tx_thread);
+}
+
+struct iscsi_queue_req *iscsit_get_cmd_from_immediate_queue(struct iscsi_conn *conn)
+{
+ struct iscsi_queue_req *qr;
+
+ spin_lock_bh(&conn->immed_queue_lock);
+ if (list_empty(&conn->immed_queue_list)) {
+ spin_unlock_bh(&conn->immed_queue_lock);
+ return NULL;
+ }
+ list_for_each_entry(qr, &conn->immed_queue_list, qr_list)
+ break;
+
+ list_del(&qr->qr_list);
+ if (qr->cmd)
+ atomic_dec(&qr->cmd->immed_queue_count);
+ spin_unlock_bh(&conn->immed_queue_lock);
+
+ return qr;
+}
+
+static void iscsit_remove_cmd_from_immediate_queue(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn *conn)
+{
+ struct iscsi_queue_req *qr, *qr_tmp;
+
+ spin_lock_bh(&conn->immed_queue_lock);
+ if (!atomic_read(&cmd->immed_queue_count)) {
+ spin_unlock_bh(&conn->immed_queue_lock);
+ return;
+ }
+
+ list_for_each_entry_safe(qr, qr_tmp, &conn->immed_queue_list, qr_list) {
+ if (qr->cmd != cmd)
+ continue;
+
+ atomic_dec(&qr->cmd->immed_queue_count);
+ list_del(&qr->qr_list);
+ kmem_cache_free(lio_qr_cache, qr);
+ }
+ spin_unlock_bh(&conn->immed_queue_lock);
+
+ if (atomic_read(&cmd->immed_queue_count)) {
+ pr_err("ITT: 0x%08x immed_queue_count: %d\n",
+ cmd->init_task_tag,
+ atomic_read(&cmd->immed_queue_count));
+ }
+}
+
+void iscsit_add_cmd_to_response_queue(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn *conn,
+ u8 state)
+{
+ struct iscsi_queue_req *qr;
+
+ qr = kmem_cache_zalloc(lio_qr_cache, GFP_ATOMIC);
+ if (!qr) {
+ pr_err("Unable to allocate memory for"
+ " struct iscsi_queue_req\n");
+ return;
+ }
+ INIT_LIST_HEAD(&qr->qr_list);
+ qr->cmd = cmd;
+ qr->state = state;
+
+ spin_lock_bh(&conn->response_queue_lock);
+ list_add_tail(&qr->qr_list, &conn->response_queue_list);
+ atomic_inc(&cmd->response_queue_count);
+ spin_unlock_bh(&conn->response_queue_lock);
+
+ wake_up_process(conn->thread_set->tx_thread);
+}
+
+struct iscsi_queue_req *iscsit_get_cmd_from_response_queue(struct iscsi_conn *conn)
+{
+ struct iscsi_queue_req *qr;
+
+ spin_lock_bh(&conn->response_queue_lock);
+ if (list_empty(&conn->response_queue_list)) {
+ spin_unlock_bh(&conn->response_queue_lock);
+ return NULL;
+ }
+
+ list_for_each_entry(qr, &conn->response_queue_list, qr_list)
+ break;
+
+ list_del(&qr->qr_list);
+ if (qr->cmd)
+ atomic_dec(&qr->cmd->response_queue_count);
+ spin_unlock_bh(&conn->response_queue_lock);
+
+ return qr;
+}
+
+static void iscsit_remove_cmd_from_response_queue(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn *conn)
+{
+ struct iscsi_queue_req *qr, *qr_tmp;
+
+ spin_lock_bh(&conn->response_queue_lock);
+ if (!atomic_read(&cmd->response_queue_count)) {
+ spin_unlock_bh(&conn->response_queue_lock);
+ return;
+ }
+
+ list_for_each_entry_safe(qr, qr_tmp, &conn->response_queue_list,
+ qr_list) {
+ if (qr->cmd != cmd)
+ continue;
+
+ atomic_dec(&qr->cmd->response_queue_count);
+ list_del(&qr->qr_list);
+ kmem_cache_free(lio_qr_cache, qr);
+ }
+ spin_unlock_bh(&conn->response_queue_lock);
+
+ if (atomic_read(&cmd->response_queue_count)) {
+ pr_err("ITT: 0x%08x response_queue_count: %d\n",
+ cmd->init_task_tag,
+ atomic_read(&cmd->response_queue_count));
+ }
+}
+
+void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *conn)
+{
+ struct iscsi_queue_req *qr, *qr_tmp;
+
+ spin_lock_bh(&conn->immed_queue_lock);
+ list_for_each_entry_safe(qr, qr_tmp, &conn->immed_queue_list, qr_list) {
+ list_del(&qr->qr_list);
+ if (qr->cmd)
+ atomic_dec(&qr->cmd->immed_queue_count);
+
+ kmem_cache_free(lio_qr_cache, qr);
+ }
+ spin_unlock_bh(&conn->immed_queue_lock);
+
+ spin_lock_bh(&conn->response_queue_lock);
+ list_for_each_entry_safe(qr, qr_tmp, &conn->response_queue_list,
+ qr_list) {
+ list_del(&qr->qr_list);
+ if (qr->cmd)
+ atomic_dec(&qr->cmd->response_queue_count);
+
+ kmem_cache_free(lio_qr_cache, qr);
+ }
+ spin_unlock_bh(&conn->response_queue_lock);
+}
+
+void iscsit_release_cmd(struct iscsi_cmd *cmd)
+{
+ struct iscsi_conn *conn = cmd->conn;
+ int i;
+
+ iscsit_free_r2ts_from_list(cmd);
+ iscsit_free_all_datain_reqs(cmd);
+
+ kfree(cmd->buf_ptr);
+ kfree(cmd->pdu_list);
+ kfree(cmd->seq_list);
+ kfree(cmd->tmr_req);
+ kfree(cmd->iov_data);
+
+ for (i = 0; i < cmd->t_mem_sg_nents; i++)
+ __free_page(sg_page(&cmd->t_mem_sg[i]));
+
+ kfree(cmd->t_mem_sg);
+
+ if (conn) {
+ iscsit_remove_cmd_from_immediate_queue(cmd, conn);
+ iscsit_remove_cmd_from_response_queue(cmd, conn);
+ }
+
+ kmem_cache_free(lio_cmd_cache, cmd);
+}
+
+int iscsit_check_session_usage_count(struct iscsi_session *sess)
+{
+ spin_lock_bh(&sess->session_usage_lock);
+ if (sess->session_usage_count != 0) {
+ sess->session_waiting_on_uc = 1;
+ spin_unlock_bh(&sess->session_usage_lock);
+ if (in_interrupt())
+ return 2;
+
+ wait_for_completion(&sess->session_waiting_on_uc_comp);
+ return 1;
+ }
+ spin_unlock_bh(&sess->session_usage_lock);
+
+ return 0;
+}
+
+void iscsit_dec_session_usage_count(struct iscsi_session *sess)
+{
+ spin_lock_bh(&sess->session_usage_lock);
+ sess->session_usage_count--;
+
+ if (!sess->session_usage_count && sess->session_waiting_on_uc)
+ complete(&sess->session_waiting_on_uc_comp);
+
+ spin_unlock_bh(&sess->session_usage_lock);
+}
+
+void iscsit_inc_session_usage_count(struct iscsi_session *sess)
+{
+ spin_lock_bh(&sess->session_usage_lock);
+ sess->session_usage_count++;
+ spin_unlock_bh(&sess->session_usage_lock);
+}
+
+/*
+ * Used before iscsi_do[rx,tx]_data() to determine iov and [rx,tx]_marker
+ * array counts needed for sync and steering.
+ */
+static int iscsit_determine_sync_and_steering_counts(
+ struct iscsi_conn *conn,
+ struct iscsi_data_count *count)
+{
+ u32 length = count->data_length;
+ u32 marker, markint;
+
+ count->sync_and_steering = 1;
+
+ marker = (count->type == ISCSI_RX_DATA) ?
+ conn->of_marker : conn->if_marker;
+ markint = (count->type == ISCSI_RX_DATA) ?
+ (conn->conn_ops->OFMarkInt * 4) :
+ (conn->conn_ops->IFMarkInt * 4);
+ count->ss_iov_count = count->iov_count;
+
+ while (length > 0) {
+ if (length >= marker) {
+ count->ss_iov_count += 3;
+ count->ss_marker_count += 2;
+
+ length -= marker;
+ marker = markint;
+ } else
+ length = 0;
+ }
+
+ return 0;
+}
+
+/*
+ * Setup conn->if_marker and conn->of_marker values based upon
+ * the initial marker-less interval. (see iSCSI v19 A.2)
+ */
+int iscsit_set_sync_and_steering_values(struct iscsi_conn *conn)
+{
+ int login_ifmarker_count = 0, login_ofmarker_count = 0, next_marker = 0;
+ /*
+ * IFMarkInt and OFMarkInt are negotiated as 32-bit words.
+ */
+ u32 IFMarkInt = (conn->conn_ops->IFMarkInt * 4);
+ u32 OFMarkInt = (conn->conn_ops->OFMarkInt * 4);
+
+ if (conn->conn_ops->OFMarker) {
+ /*
+ * Account for the first Login Command received not
+ * via iscsi_recv_msg().
+ */
+ conn->of_marker += ISCSI_HDR_LEN;
+ if (conn->of_marker <= OFMarkInt) {
+ conn->of_marker = (OFMarkInt - conn->of_marker);
+ } else {
+ login_ofmarker_count = (conn->of_marker / OFMarkInt);
+ next_marker = (OFMarkInt * (login_ofmarker_count + 1)) +
+ (login_ofmarker_count * MARKER_SIZE);
+ conn->of_marker = (next_marker - conn->of_marker);
+ }
+ conn->of_marker_offset = 0;
+ pr_debug("Setting OFMarker value to %u based on Initial"
+ " Markerless Interval.\n", conn->of_marker);
+ }
+
+ if (conn->conn_ops->IFMarker) {
+ if (conn->if_marker <= IFMarkInt) {
+ conn->if_marker = (IFMarkInt - conn->if_marker);
+ } else {
+ login_ifmarker_count = (conn->if_marker / IFMarkInt);
+ next_marker = (IFMarkInt * (login_ifmarker_count + 1)) +
+ (login_ifmarker_count * MARKER_SIZE);
+ conn->if_marker = (next_marker - conn->if_marker);
+ }
+ pr_debug("Setting IFMarker value to %u based on Initial"
+ " Markerless Interval.\n", conn->if_marker);
+ }
+
+ return 0;
+}
+
+struct iscsi_conn *iscsit_get_conn_from_cid(struct iscsi_session *sess, u16 cid)
+{
+ struct iscsi_conn *conn;
+
+ spin_lock_bh(&sess->conn_lock);
+ list_for_each_entry(conn, &sess->sess_conn_list, conn_list) {
+ if ((conn->cid == cid) &&
+ (conn->conn_state == TARG_CONN_STATE_LOGGED_IN)) {
+ iscsit_inc_conn_usage_count(conn);
+ spin_unlock_bh(&sess->conn_lock);
+ return conn;
+ }
+ }
+ spin_unlock_bh(&sess->conn_lock);
+
+ return NULL;
+}
+
+struct iscsi_conn *iscsit_get_conn_from_cid_rcfr(struct iscsi_session *sess, u16 cid)
+{
+ struct iscsi_conn *conn;
+
+ spin_lock_bh(&sess->conn_lock);
+ list_for_each_entry(conn, &sess->sess_conn_list, conn_list) {
+ if (conn->cid == cid) {
+ iscsit_inc_conn_usage_count(conn);
+ spin_lock(&conn->state_lock);
+ atomic_set(&conn->connection_wait_rcfr, 1);
+ spin_unlock(&conn->state_lock);
+ spin_unlock_bh(&sess->conn_lock);
+ return conn;
+ }
+ }
+ spin_unlock_bh(&sess->conn_lock);
+
+ return NULL;
+}
+
+void iscsit_check_conn_usage_count(struct iscsi_conn *conn)
+{
+ spin_lock_bh(&conn->conn_usage_lock);
+ if (conn->conn_usage_count != 0) {
+ conn->conn_waiting_on_uc = 1;
+ spin_unlock_bh(&conn->conn_usage_lock);
+
+ wait_for_completion(&conn->conn_waiting_on_uc_comp);
+ return;
+ }
+ spin_unlock_bh(&conn->conn_usage_lock);
+}
+
+void iscsit_dec_conn_usage_count(struct iscsi_conn *conn)
+{
+ spin_lock_bh(&conn->conn_usage_lock);
+ conn->conn_usage_count--;
+
+ if (!conn->conn_usage_count && conn->conn_waiting_on_uc)
+ complete(&conn->conn_waiting_on_uc_comp);
+
+ spin_unlock_bh(&conn->conn_usage_lock);
+}
+
+void iscsit_inc_conn_usage_count(struct iscsi_conn *conn)
+{
+ spin_lock_bh(&conn->conn_usage_lock);
+ conn->conn_usage_count++;
+ spin_unlock_bh(&conn->conn_usage_lock);
+}
+
+static int iscsit_add_nopin(struct iscsi_conn *conn, int want_response)
+{
+ u8 state;
+ struct iscsi_cmd *cmd;
+
+ cmd = iscsit_allocate_cmd(conn, GFP_ATOMIC);
+ if (!cmd)
+ return -1;
+
+ cmd->iscsi_opcode = ISCSI_OP_NOOP_IN;
+ state = (want_response) ? ISTATE_SEND_NOPIN_WANT_RESPONSE :
+ ISTATE_SEND_NOPIN_NO_RESPONSE;
+ cmd->init_task_tag = 0xFFFFFFFF;
+ spin_lock_bh(&conn->sess->ttt_lock);
+ cmd->targ_xfer_tag = (want_response) ? conn->sess->targ_xfer_tag++ :
+ 0xFFFFFFFF;
+ if (want_response && (cmd->targ_xfer_tag == 0xFFFFFFFF))
+ cmd->targ_xfer_tag = conn->sess->targ_xfer_tag++;
+ spin_unlock_bh(&conn->sess->ttt_lock);
+
+ spin_lock_bh(&conn->cmd_lock);
+ list_add_tail(&cmd->i_list, &conn->conn_cmd_list);
+ spin_unlock_bh(&conn->cmd_lock);
+
+ if (want_response)
+ iscsit_start_nopin_response_timer(conn);
+ iscsit_add_cmd_to_immediate_queue(cmd, conn, state);
+
+ return 0;
+}
+
+static void iscsit_handle_nopin_response_timeout(unsigned long data)
+{
+ struct iscsi_conn *conn = (struct iscsi_conn *) data;
+
+ iscsit_inc_conn_usage_count(conn);
+
+ spin_lock_bh(&conn->nopin_timer_lock);
+ if (conn->nopin_response_timer_flags & ISCSI_TF_STOP) {
+ spin_unlock_bh(&conn->nopin_timer_lock);
+ iscsit_dec_conn_usage_count(conn);
+ return;
+ }
+
+ pr_debug("Did not receive response to NOPIN on CID: %hu on"
+ " SID: %u, failing connection.\n", conn->cid,
+ conn->sess->sid);
+ conn->nopin_response_timer_flags &= ~ISCSI_TF_RUNNING;
+ spin_unlock_bh(&conn->nopin_timer_lock);
+
+ {
+ struct iscsi_portal_group *tpg = conn->sess->tpg;
+ struct iscsi_tiqn *tiqn = tpg->tpg_tiqn;
+
+ if (tiqn) {
+ spin_lock_bh(&tiqn->sess_err_stats.lock);
+ strcpy(tiqn->sess_err_stats.last_sess_fail_rem_name,
+ (void *)conn->sess->sess_ops->InitiatorName);
+ tiqn->sess_err_stats.last_sess_failure_type =
+ ISCSI_SESS_ERR_CXN_TIMEOUT;
+ tiqn->sess_err_stats.cxn_timeout_errors++;
+ conn->sess->conn_timeout_errors++;
+ spin_unlock_bh(&tiqn->sess_err_stats.lock);
+ }
+ }
+
+ iscsit_cause_connection_reinstatement(conn, 0);
+ iscsit_dec_conn_usage_count(conn);
+}
+
+void iscsit_mod_nopin_response_timer(struct iscsi_conn *conn)
+{
+ struct iscsi_session *sess = conn->sess;
+ struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess);
+
+ spin_lock_bh(&conn->nopin_timer_lock);
+ if (!(conn->nopin_response_timer_flags & ISCSI_TF_RUNNING)) {
+ spin_unlock_bh(&conn->nopin_timer_lock);
+ return;
+ }
+
+ mod_timer(&conn->nopin_response_timer,
+ (get_jiffies_64() + na->nopin_response_timeout * HZ));
+ spin_unlock_bh(&conn->nopin_timer_lock);
+}
+
+/*
+ * Called with conn->nopin_timer_lock held.
+ */
+void iscsit_start_nopin_response_timer(struct iscsi_conn *conn)
+{
+ struct iscsi_session *sess = conn->sess;
+ struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess);
+
+ spin_lock_bh(&conn->nopin_timer_lock);
+ if (conn->nopin_response_timer_flags & ISCSI_TF_RUNNING) {
+ spin_unlock_bh(&conn->nopin_timer_lock);
+ return;
+ }
+
+ init_timer(&conn->nopin_response_timer);
+ conn->nopin_response_timer.expires =
+ (get_jiffies_64() + na->nopin_response_timeout * HZ);
+ conn->nopin_response_timer.data = (unsigned long)conn;
+ conn->nopin_response_timer.function = iscsit_handle_nopin_response_timeout;
+ conn->nopin_response_timer_flags &= ~ISCSI_TF_STOP;
+ conn->nopin_response_timer_flags |= ISCSI_TF_RUNNING;
+ add_timer(&conn->nopin_response_timer);
+
+ pr_debug("Started NOPIN Response Timer on CID: %d to %u"
+ " seconds\n", conn->cid, na->nopin_response_timeout);
+ spin_unlock_bh(&conn->nopin_timer_lock);
+}
+
+void iscsit_stop_nopin_response_timer(struct iscsi_conn *conn)
+{
+ spin_lock_bh(&conn->nopin_timer_lock);
+ if (!(conn->nopin_response_timer_flags & ISCSI_TF_RUNNING)) {
+ spin_unlock_bh(&conn->nopin_timer_lock);
+ return;
+ }
+ conn->nopin_response_timer_flags |= ISCSI_TF_STOP;
+ spin_unlock_bh(&conn->nopin_timer_lock);
+
+ del_timer_sync(&conn->nopin_response_timer);
+
+ spin_lock_bh(&conn->nopin_timer_lock);
+ conn->nopin_response_timer_flags &= ~ISCSI_TF_RUNNING;
+ spin_unlock_bh(&conn->nopin_timer_lock);
+}
+
+static void iscsit_handle_nopin_timeout(unsigned long data)
+{
+ struct iscsi_conn *conn = (struct iscsi_conn *) data;
+
+ iscsit_inc_conn_usage_count(conn);
+
+ spin_lock_bh(&conn->nopin_timer_lock);
+ if (conn->nopin_timer_flags & ISCSI_TF_STOP) {
+ spin_unlock_bh(&conn->nopin_timer_lock);
+ iscsit_dec_conn_usage_count(conn);
+ return;
+ }
+ conn->nopin_timer_flags &= ~ISCSI_TF_RUNNING;
+ spin_unlock_bh(&conn->nopin_timer_lock);
+
+ iscsit_add_nopin(conn, 1);
+ iscsit_dec_conn_usage_count(conn);
+}
+
+/*
+ * Called with conn->nopin_timer_lock held.
+ */
+void __iscsit_start_nopin_timer(struct iscsi_conn *conn)
+{
+ struct iscsi_session *sess = conn->sess;
+ struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess);
+ /*
+ * NOPIN timeout is disabled.
+ */
+ if (!na->nopin_timeout)
+ return;
+
+ if (conn->nopin_timer_flags & ISCSI_TF_RUNNING)
+ return;
+
+ init_timer(&conn->nopin_timer);
+ conn->nopin_timer.expires = (get_jiffies_64() + na->nopin_timeout * HZ);
+ conn->nopin_timer.data = (unsigned long)conn;
+ conn->nopin_timer.function = iscsit_handle_nopin_timeout;
+ conn->nopin_timer_flags &= ~ISCSI_TF_STOP;
+ conn->nopin_timer_flags |= ISCSI_TF_RUNNING;
+ add_timer(&conn->nopin_timer);
+
+ pr_debug("Started NOPIN Timer on CID: %d at %u second"
+ " interval\n", conn->cid, na->nopin_timeout);
+}
+
+void iscsit_start_nopin_timer(struct iscsi_conn *conn)
+{
+ struct iscsi_session *sess = conn->sess;
+ struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess);
+ /*
+ * NOPIN timeout is disabled..
+ */
+ if (!na->nopin_timeout)
+ return;
+
+ spin_lock_bh(&conn->nopin_timer_lock);
+ if (conn->nopin_timer_flags & ISCSI_TF_RUNNING) {
+ spin_unlock_bh(&conn->nopin_timer_lock);
+ return;
+ }
+
+ init_timer(&conn->nopin_timer);
+ conn->nopin_timer.expires = (get_jiffies_64() + na->nopin_timeout * HZ);
+ conn->nopin_timer.data = (unsigned long)conn;
+ conn->nopin_timer.function = iscsit_handle_nopin_timeout;
+ conn->nopin_timer_flags &= ~ISCSI_TF_STOP;
+ conn->nopin_timer_flags |= ISCSI_TF_RUNNING;
+ add_timer(&conn->nopin_timer);
+
+ pr_debug("Started NOPIN Timer on CID: %d at %u second"
+ " interval\n", conn->cid, na->nopin_timeout);
+ spin_unlock_bh(&conn->nopin_timer_lock);
+}
+
+void iscsit_stop_nopin_timer(struct iscsi_conn *conn)
+{
+ spin_lock_bh(&conn->nopin_timer_lock);
+ if (!(conn->nopin_timer_flags & ISCSI_TF_RUNNING)) {
+ spin_unlock_bh(&conn->nopin_timer_lock);
+ return;
+ }
+ conn->nopin_timer_flags |= ISCSI_TF_STOP;
+ spin_unlock_bh(&conn->nopin_timer_lock);
+
+ del_timer_sync(&conn->nopin_timer);
+
+ spin_lock_bh(&conn->nopin_timer_lock);
+ conn->nopin_timer_flags &= ~ISCSI_TF_RUNNING;
+ spin_unlock_bh(&conn->nopin_timer_lock);
+}
+
+int iscsit_send_tx_data(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn *conn,
+ int use_misc)
+{
+ int tx_sent, tx_size;
+ u32 iov_count;
+ struct kvec *iov;
+
+send_data:
+ tx_size = cmd->tx_size;
+
+ if (!use_misc) {
+ iov = &cmd->iov_data[0];
+ iov_count = cmd->iov_data_count;
+ } else {
+ iov = &cmd->iov_misc[0];
+ iov_count = cmd->iov_misc_count;
+ }
+
+ tx_sent = tx_data(conn, &iov[0], iov_count, tx_size);
+ if (tx_size != tx_sent) {
+ if (tx_sent == -EAGAIN) {
+ pr_err("tx_data() returned -EAGAIN\n");
+ goto send_data;
+ } else
+ return -1;
+ }
+ cmd->tx_size = 0;
+
+ return 0;
+}
+
+int iscsit_fe_sendpage_sg(
+ struct iscsi_cmd *cmd,
+ struct iscsi_conn *conn)
+{
+ struct scatterlist *sg = cmd->first_data_sg;
+ struct kvec iov;
+ u32 tx_hdr_size, data_len;
+ u32 offset = cmd->first_data_sg_off;
+ int tx_sent;
+
+send_hdr:
+ tx_hdr_size = ISCSI_HDR_LEN;
+ if (conn->conn_ops->HeaderDigest)
+ tx_hdr_size += ISCSI_CRC_LEN;
+
+ iov.iov_base = cmd->pdu;
+ iov.iov_len = tx_hdr_size;
+
+ tx_sent = tx_data(conn, &iov, 1, tx_hdr_size);
+ if (tx_hdr_size != tx_sent) {
+ if (tx_sent == -EAGAIN) {
+ pr_err("tx_data() returned -EAGAIN\n");
+ goto send_hdr;
+ }
+ return -1;
+ }
+
+ data_len = cmd->tx_size - tx_hdr_size - cmd->padding;
+ if (conn->conn_ops->DataDigest)
+ data_len -= ISCSI_CRC_LEN;
+
+ /*
+ * Perform sendpage() for each page in the scatterlist
+ */
+ while (data_len) {
+ u32 space = (sg->length - offset);
+ u32 sub_len = min_t(u32, data_len, space);
+send_pg:
+ tx_sent = conn->sock->ops->sendpage(conn->sock,
+ sg_page(sg), sg->offset + offset, sub_len, 0);
+ if (tx_sent != sub_len) {
+ if (tx_sent == -EAGAIN) {
+ pr_err("tcp_sendpage() returned"
+ " -EAGAIN\n");
+ goto send_pg;
+ }
+
+ pr_err("tcp_sendpage() failure: %d\n",
+ tx_sent);
+ return -1;
+ }
+
+ data_len -= sub_len;
+ offset = 0;
+ sg = sg_next(sg);
+ }
+
+send_padding:
+ if (cmd->padding) {
+ struct kvec *iov_p =
+ &cmd->iov_data[cmd->iov_data_count-1];
+
+ tx_sent = tx_data(conn, iov_p, 1, cmd->padding);
+ if (cmd->padding != tx_sent) {
+ if (tx_sent == -EAGAIN) {
+ pr_err("tx_data() returned -EAGAIN\n");
+ goto send_padding;
+ }
+ return -1;
+ }
+ }
+
+send_datacrc:
+ if (conn->conn_ops->DataDigest) {
+ struct kvec *iov_d =
+ &cmd->iov_data[cmd->iov_data_count];
+
+ tx_sent = tx_data(conn, iov_d, 1, ISCSI_CRC_LEN);
+ if (ISCSI_CRC_LEN != tx_sent) {
+ if (tx_sent == -EAGAIN) {
+ pr_err("tx_data() returned -EAGAIN\n");
+ goto send_datacrc;
+ }
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * This function is used for mainly sending a ISCSI_TARG_LOGIN_RSP PDU
+ * back to the Initiator when an expection condition occurs with the
+ * errors set in status_class and status_detail.
+ *
+ * Parameters: iSCSI Connection, Status Class, Status Detail.
+ * Returns: 0 on success, -1 on error.
+ */
+int iscsit_tx_login_rsp(struct iscsi_conn *conn, u8 status_class, u8 status_detail)
+{
+ u8 iscsi_hdr[ISCSI_HDR_LEN];
+ int err;
+ struct kvec iov;
+ struct iscsi_login_rsp *hdr;
+
+ iscsit_collect_login_stats(conn, status_class, status_detail);
+
+ memset(&iov, 0, sizeof(struct kvec));
+ memset(&iscsi_hdr, 0x0, ISCSI_HDR_LEN);
+
+ hdr = (struct iscsi_login_rsp *)&iscsi_hdr;
+ hdr->opcode = ISCSI_OP_LOGIN_RSP;
+ hdr->status_class = status_class;
+ hdr->status_detail = status_detail;
+ hdr->itt = cpu_to_be32(conn->login_itt);
+
+ iov.iov_base = &iscsi_hdr;
+ iov.iov_len = ISCSI_HDR_LEN;
+
+ PRINT_BUFF(iscsi_hdr, ISCSI_HDR_LEN);
+
+ err = tx_data(conn, &iov, 1, ISCSI_HDR_LEN);
+ if (err != ISCSI_HDR_LEN) {
+ pr_err("tx_data returned less than expected\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+void iscsit_print_session_params(struct iscsi_session *sess)
+{
+ struct iscsi_conn *conn;
+
+ pr_debug("-----------------------------[Session Params for"
+ " SID: %u]-----------------------------\n", sess->sid);
+ spin_lock_bh(&sess->conn_lock);
+ list_for_each_entry(conn, &sess->sess_conn_list, conn_list)
+ iscsi_dump_conn_ops(conn->conn_ops);
+ spin_unlock_bh(&sess->conn_lock);
+
+ iscsi_dump_sess_ops(sess->sess_ops);
+}
+
+static int iscsit_do_rx_data(
+ struct iscsi_conn *conn,
+ struct iscsi_data_count *count)
+{
+ int data = count->data_length, rx_loop = 0, total_rx = 0, iov_len;
+ u32 rx_marker_val[count->ss_marker_count], rx_marker_iov = 0;
+ struct kvec iov[count->ss_iov_count], *iov_p;
+ struct msghdr msg;
+
+ if (!conn || !conn->sock || !conn->conn_ops)
+ return -1;
+
+ memset(&msg, 0, sizeof(struct msghdr));
+
+ if (count->sync_and_steering) {
+ int size = 0;
+ u32 i, orig_iov_count = 0;
+ u32 orig_iov_len = 0, orig_iov_loc = 0;
+ u32 iov_count = 0, per_iov_bytes = 0;
+ u32 *rx_marker, old_rx_marker = 0;
+ struct kvec *iov_record;
+
+ memset(&rx_marker_val, 0,
+ count->ss_marker_count * sizeof(u32));
+ memset(&iov, 0, count->ss_iov_count * sizeof(struct kvec));
+
+ iov_record = count->iov;
+ orig_iov_count = count->iov_count;
+ rx_marker = &conn->of_marker;
+
+ i = 0;
+ size = data;
+ orig_iov_len = iov_record[orig_iov_loc].iov_len;
+ while (size > 0) {
+ pr_debug("rx_data: #1 orig_iov_len %u,"
+ " orig_iov_loc %u\n", orig_iov_len, orig_iov_loc);
+ pr_debug("rx_data: #2 rx_marker %u, size"
+ " %u\n", *rx_marker, size);
+
+ if (orig_iov_len >= *rx_marker) {
+ iov[iov_count].iov_len = *rx_marker;
+ iov[iov_count++].iov_base =
+ (iov_record[orig_iov_loc].iov_base +
+ per_iov_bytes);
+
+ iov[iov_count].iov_len = (MARKER_SIZE / 2);
+ iov[iov_count++].iov_base =
+ &rx_marker_val[rx_marker_iov++];
+ iov[iov_count].iov_len = (MARKER_SIZE / 2);
+ iov[iov_count++].iov_base =
+ &rx_marker_val[rx_marker_iov++];
+ old_rx_marker = *rx_marker;
+
+ /*
+ * OFMarkInt is in 32-bit words.
+ */
+ *rx_marker = (conn->conn_ops->OFMarkInt * 4);
+ size -= old_rx_marker;
+ orig_iov_len -= old_rx_marker;
+ per_iov_bytes += old_rx_marker;
+
+ pr_debug("rx_data: #3 new_rx_marker"
+ " %u, size %u\n", *rx_marker, size);
+ } else {
+ iov[iov_count].iov_len = orig_iov_len;
+ iov[iov_count++].iov_base =
+ (iov_record[orig_iov_loc].iov_base +
+ per_iov_bytes);
+
+ per_iov_bytes = 0;
+ *rx_marker -= orig_iov_len;
+ size -= orig_iov_len;
+
+ if (size)
+ orig_iov_len =
+ iov_record[++orig_iov_loc].iov_len;
+
+ pr_debug("rx_data: #4 new_rx_marker"
+ " %u, size %u\n", *rx_marker, size);
+ }
+ }
+ data += (rx_marker_iov * (MARKER_SIZE / 2));
+
+ iov_p = &iov[0];
+ iov_len = iov_count;
+
+ if (iov_count > count->ss_iov_count) {
+ pr_err("iov_count: %d, count->ss_iov_count:"
+ " %d\n", iov_count, count->ss_iov_count);
+ return -1;
+ }
+ if (rx_marker_iov > count->ss_marker_count) {
+ pr_err("rx_marker_iov: %d, count->ss_marker"
+ "_count: %d\n", rx_marker_iov,
+ count->ss_marker_count);
+ return -1;
+ }
+ } else {
+ iov_p = count->iov;
+ iov_len = count->iov_count;
+ }
+
+ while (total_rx < data) {
+ rx_loop = kernel_recvmsg(conn->sock, &msg, iov_p, iov_len,
+ (data - total_rx), MSG_WAITALL);
+ if (rx_loop <= 0) {
+ pr_debug("rx_loop: %d total_rx: %d\n",
+ rx_loop, total_rx);
+ return rx_loop;
+ }
+ total_rx += rx_loop;
+ pr_debug("rx_loop: %d, total_rx: %d, data: %d\n",
+ rx_loop, total_rx, data);
+ }
+
+ if (count->sync_and_steering) {
+ int j;
+ for (j = 0; j < rx_marker_iov; j++) {
+ pr_debug("rx_data: #5 j: %d, offset: %d\n",
+ j, rx_marker_val[j]);
+ conn->of_marker_offset = rx_marker_val[j];
+ }
+ total_rx -= (rx_marker_iov * (MARKER_SIZE / 2));
+ }
+
+ return total_rx;
+}
+
+static int iscsit_do_tx_data(
+ struct iscsi_conn *conn,
+ struct iscsi_data_count *count)
+{
+ int data = count->data_length, total_tx = 0, tx_loop = 0, iov_len;
+ u32 tx_marker_val[count->ss_marker_count], tx_marker_iov = 0;
+ struct kvec iov[count->ss_iov_count], *iov_p;
+ struct msghdr msg;
+
+ if (!conn || !conn->sock || !conn->conn_ops)
+ return -1;
+
+ if (data <= 0) {
+ pr_err("Data length is: %d\n", data);
+ return -1;
+ }
+
+ memset(&msg, 0, sizeof(struct msghdr));
+
+ if (count->sync_and_steering) {
+ int size = 0;
+ u32 i, orig_iov_count = 0;
+ u32 orig_iov_len = 0, orig_iov_loc = 0;
+ u32 iov_count = 0, per_iov_bytes = 0;
+ u32 *tx_marker, old_tx_marker = 0;
+ struct kvec *iov_record;
+
+ memset(&tx_marker_val, 0,
+ count->ss_marker_count * sizeof(u32));
+ memset(&iov, 0, count->ss_iov_count * sizeof(struct kvec));
+
+ iov_record = count->iov;
+ orig_iov_count = count->iov_count;
+ tx_marker = &conn->if_marker;
+
+ i = 0;
+ size = data;
+ orig_iov_len = iov_record[orig_iov_loc].iov_len;
+ while (size > 0) {
+ pr_debug("tx_data: #1 orig_iov_len %u,"
+ " orig_iov_loc %u\n", orig_iov_len, orig_iov_loc);
+ pr_debug("tx_data: #2 tx_marker %u, size"
+ " %u\n", *tx_marker, size);
+
+ if (orig_iov_len >= *tx_marker) {
+ iov[iov_count].iov_len = *tx_marker;
+ iov[iov_count++].iov_base =
+ (iov_record[orig_iov_loc].iov_base +
+ per_iov_bytes);
+
+ tx_marker_val[tx_marker_iov] =
+ (size - *tx_marker);
+ iov[iov_count].iov_len = (MARKER_SIZE / 2);
+ iov[iov_count++].iov_base =
+ &tx_marker_val[tx_marker_iov++];
+ iov[iov_count].iov_len = (MARKER_SIZE / 2);
+ iov[iov_count++].iov_base =
+ &tx_marker_val[tx_marker_iov++];
+ old_tx_marker = *tx_marker;
+
+ /*
+ * IFMarkInt is in 32-bit words.
+ */
+ *tx_marker = (conn->conn_ops->IFMarkInt * 4);
+ size -= old_tx_marker;
+ orig_iov_len -= old_tx_marker;
+ per_iov_bytes += old_tx_marker;
+
+ pr_debug("tx_data: #3 new_tx_marker"
+ " %u, size %u\n", *tx_marker, size);
+ pr_debug("tx_data: #4 offset %u\n",
+ tx_marker_val[tx_marker_iov-1]);
+ } else {
+ iov[iov_count].iov_len = orig_iov_len;
+ iov[iov_count++].iov_base
+ = (iov_record[orig_iov_loc].iov_base +
+ per_iov_bytes);
+
+ per_iov_bytes = 0;
+ *tx_marker -= orig_iov_len;
+ size -= orig_iov_len;
+
+ if (size)
+ orig_iov_len =
+ iov_record[++orig_iov_loc].iov_len;
+
+ pr_debug("tx_data: #5 new_tx_marker"
+ " %u, size %u\n", *tx_marker, size);
+ }
+ }
+
+ data += (tx_marker_iov * (MARKER_SIZE / 2));
+
+ iov_p = &iov[0];
+ iov_len = iov_count;
+
+ if (iov_count > count->ss_iov_count) {
+ pr_err("iov_count: %d, count->ss_iov_count:"
+ " %d\n", iov_count, count->ss_iov_count);
+ return -1;
+ }
+ if (tx_marker_iov > count->ss_marker_count) {
+ pr_err("tx_marker_iov: %d, count->ss_marker"
+ "_count: %d\n", tx_marker_iov,
+ count->ss_marker_count);
+ return -1;
+ }
+ } else {
+ iov_p = count->iov;
+ iov_len = count->iov_count;
+ }
+
+ while (total_tx < data) {
+ tx_loop = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len,
+ (data - total_tx));
+ if (tx_loop <= 0) {
+ pr_debug("tx_loop: %d total_tx %d\n",
+ tx_loop, total_tx);
+ return tx_loop;
+ }
+ total_tx += tx_loop;
+ pr_debug("tx_loop: %d, total_tx: %d, data: %d\n",
+ tx_loop, total_tx, data);
+ }
+
+ if (count->sync_and_steering)
+ total_tx -= (tx_marker_iov * (MARKER_SIZE / 2));
+
+ return total_tx;
+}
+
+int rx_data(
+ struct iscsi_conn *conn,
+ struct kvec *iov,
+ int iov_count,
+ int data)
+{
+ struct iscsi_data_count c;
+
+ if (!conn || !conn->sock || !conn->conn_ops)
+ return -1;
+
+ memset(&c, 0, sizeof(struct iscsi_data_count));
+ c.iov = iov;
+ c.iov_count = iov_count;
+ c.data_length = data;
+ c.type = ISCSI_RX_DATA;
+
+ if (conn->conn_ops->OFMarker &&
+ (conn->conn_state >= TARG_CONN_STATE_LOGGED_IN)) {
+ if (iscsit_determine_sync_and_steering_counts(conn, &c) < 0)
+ return -1;
+ }
+
+ return iscsit_do_rx_data(conn, &c);
+}
+
+int tx_data(
+ struct iscsi_conn *conn,
+ struct kvec *iov,
+ int iov_count,
+ int data)
+{
+ struct iscsi_data_count c;
+
+ if (!conn || !conn->sock || !conn->conn_ops)
+ return -1;
+
+ memset(&c, 0, sizeof(struct iscsi_data_count));
+ c.iov = iov;
+ c.iov_count = iov_count;
+ c.data_length = data;
+ c.type = ISCSI_TX_DATA;
+
+ if (conn->conn_ops->IFMarker &&
+ (conn->conn_state >= TARG_CONN_STATE_LOGGED_IN)) {
+ if (iscsit_determine_sync_and_steering_counts(conn, &c) < 0)
+ return -1;
+ }
+
+ return iscsit_do_tx_data(conn, &c);
+}
+
+void iscsit_collect_login_stats(
+ struct iscsi_conn *conn,
+ u8 status_class,
+ u8 status_detail)
+{
+ struct iscsi_param *intrname = NULL;
+ struct iscsi_tiqn *tiqn;
+ struct iscsi_login_stats *ls;
+
+ tiqn = iscsit_snmp_get_tiqn(conn);
+ if (!tiqn)
+ return;
+
+ ls = &tiqn->login_stats;
+
+ spin_lock(&ls->lock);
+ if (!strcmp(conn->login_ip, ls->last_intr_fail_ip_addr) &&
+ ((get_jiffies_64() - ls->last_fail_time) < 10)) {
+ /* We already have the failure info for this login */
+ spin_unlock(&ls->lock);
+ return;
+ }
+
+ if (status_class == ISCSI_STATUS_CLS_SUCCESS)
+ ls->accepts++;
+ else if (status_class == ISCSI_STATUS_CLS_REDIRECT) {
+ ls->redirects++;
+ ls->last_fail_type = ISCSI_LOGIN_FAIL_REDIRECT;
+ } else if ((status_class == ISCSI_STATUS_CLS_INITIATOR_ERR) &&
+ (status_detail == ISCSI_LOGIN_STATUS_AUTH_FAILED)) {
+ ls->authenticate_fails++;
+ ls->last_fail_type = ISCSI_LOGIN_FAIL_AUTHENTICATE;
+ } else if ((status_class == ISCSI_STATUS_CLS_INITIATOR_ERR) &&
+ (status_detail == ISCSI_LOGIN_STATUS_TGT_FORBIDDEN)) {
+ ls->authorize_fails++;
+ ls->last_fail_type = ISCSI_LOGIN_FAIL_AUTHORIZE;
+ } else if ((status_class == ISCSI_STATUS_CLS_INITIATOR_ERR) &&
+ (status_detail == ISCSI_LOGIN_STATUS_INIT_ERR)) {
+ ls->negotiate_fails++;
+ ls->last_fail_type = ISCSI_LOGIN_FAIL_NEGOTIATE;
+ } else {
+ ls->other_fails++;
+ ls->last_fail_type = ISCSI_LOGIN_FAIL_OTHER;
+ }
+
+ /* Save initiator name, ip address and time, if it is a failed login */
+ if (status_class != ISCSI_STATUS_CLS_SUCCESS) {
+ if (conn->param_list)
+ intrname = iscsi_find_param_from_key(INITIATORNAME,
+ conn->param_list);
+ strcpy(ls->last_intr_fail_name,
+ (intrname ? intrname->value : "Unknown"));
+
+ ls->last_intr_fail_ip_family = conn->sock->sk->sk_family;
+ snprintf(ls->last_intr_fail_ip_addr, IPV6_ADDRESS_SPACE,
+ "%s", conn->login_ip);
+ ls->last_fail_time = get_jiffies_64();
+ }
+
+ spin_unlock(&ls->lock);
+}
+
+struct iscsi_tiqn *iscsit_snmp_get_tiqn(struct iscsi_conn *conn)
+{
+ struct iscsi_portal_group *tpg;
+
+ if (!conn || !conn->sess)
+ return NULL;
+
+ tpg = conn->sess->tpg;
+ if (!tpg)
+ return NULL;
+
+ if (!tpg->tpg_tiqn)
+ return NULL;
+
+ return tpg->tpg_tiqn;
+}
diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h
new file mode 100644
index 0000000..2cd49d6
--- /dev/null
+++ b/drivers/target/iscsi/iscsi_target_util.h
@@ -0,0 +1,60 @@
+#ifndef ISCSI_TARGET_UTIL_H
+#define ISCSI_TARGET_UTIL_H
+
+#define MARKER_SIZE 8
+
+extern int iscsit_add_r2t_to_list(struct iscsi_cmd *, u32, u32, int, u32);
+extern struct iscsi_r2t *iscsit_get_r2t_for_eos(struct iscsi_cmd *, u32, u32);
+extern struct iscsi_r2t *iscsit_get_r2t_from_list(struct iscsi_cmd *);
+extern void iscsit_free_r2t(struct iscsi_r2t *, struct iscsi_cmd *);
+extern void iscsit_free_r2ts_from_list(struct iscsi_cmd *);
+extern struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *, gfp_t);
+extern struct iscsi_cmd *iscsit_allocate_se_cmd(struct iscsi_conn *, u32, int, int);
+extern struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr(struct iscsi_conn *, u8);
+extern int iscsit_decide_list_to_build(struct iscsi_cmd *, u32);
+extern struct iscsi_seq *iscsit_get_seq_holder_for_datain(struct iscsi_cmd *, u32);
+extern struct iscsi_seq *iscsit_get_seq_holder_for_r2t(struct iscsi_cmd *);
+extern struct iscsi_r2t *iscsit_get_holder_for_r2tsn(struct iscsi_cmd *, u32);
+int iscsit_sequence_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, u32 cmdsn);
+extern int iscsit_check_unsolicited_dataout(struct iscsi_cmd *, unsigned char *);
+extern struct iscsi_cmd *iscsit_find_cmd_from_itt(struct iscsi_conn *, u32);
+extern struct iscsi_cmd *iscsit_find_cmd_from_itt_or_dump(struct iscsi_conn *,
+ u32, u32);
+extern struct iscsi_cmd *iscsit_find_cmd_from_ttt(struct iscsi_conn *, u32);
+extern int iscsit_find_cmd_for_recovery(struct iscsi_session *, struct iscsi_cmd **,
+ struct iscsi_conn_recovery **, u32);
+extern void iscsit_add_cmd_to_immediate_queue(struct iscsi_cmd *, struct iscsi_conn *, u8);
+extern struct iscsi_queue_req *iscsit_get_cmd_from_immediate_queue(struct iscsi_conn *);
+extern void iscsit_add_cmd_to_response_queue(struct iscsi_cmd *, struct iscsi_conn *, u8);
+extern struct iscsi_queue_req *iscsit_get_cmd_from_response_queue(struct iscsi_conn *);
+extern void iscsit_remove_cmd_from_tx_queues(struct iscsi_cmd *, struct iscsi_conn *);
+extern void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *);
+extern void iscsit_release_cmd(struct iscsi_cmd *);
+extern int iscsit_check_session_usage_count(struct iscsi_session *);
+extern void iscsit_dec_session_usage_count(struct iscsi_session *);
+extern void iscsit_inc_session_usage_count(struct iscsi_session *);
+extern int iscsit_set_sync_and_steering_values(struct iscsi_conn *);
+extern struct iscsi_conn *iscsit_get_conn_from_cid(struct iscsi_session *, u16);
+extern struct iscsi_conn *iscsit_get_conn_from_cid_rcfr(struct iscsi_session *, u16);
+extern void iscsit_check_conn_usage_count(struct iscsi_conn *);
+extern void iscsit_dec_conn_usage_count(struct iscsi_conn *);
+extern void iscsit_inc_conn_usage_count(struct iscsi_conn *);
+extern void iscsit_mod_nopin_response_timer(struct iscsi_conn *);
+extern void iscsit_start_nopin_response_timer(struct iscsi_conn *);
+extern void iscsit_stop_nopin_response_timer(struct iscsi_conn *);
+extern void __iscsit_start_nopin_timer(struct iscsi_conn *);
+extern void iscsit_start_nopin_timer(struct iscsi_conn *);
+extern void iscsit_stop_nopin_timer(struct iscsi_conn *);
+extern int iscsit_send_tx_data(struct iscsi_cmd *, struct iscsi_conn *, int);
+extern int iscsit_fe_sendpage_sg(struct iscsi_cmd *, struct iscsi_conn *);
+extern int iscsit_tx_login_rsp(struct iscsi_conn *, u8, u8);
+extern void iscsit_print_session_params(struct iscsi_session *);
+extern int iscsit_print_dev_to_proc(char *, char **, off_t, int);
+extern int iscsit_print_sessions_to_proc(char *, char **, off_t, int);
+extern int iscsit_print_tpg_to_proc(char *, char **, off_t, int);
+extern int rx_data(struct iscsi_conn *, struct kvec *, int, int);
+extern int tx_data(struct iscsi_conn *, struct kvec *, int, int);
+extern void iscsit_collect_login_stats(struct iscsi_conn *, u8, u8);
+extern struct iscsi_tiqn *iscsit_snmp_get_tiqn(struct iscsi_conn *);
+
+#endif /*** ISCSI_TARGET_UTIL_H ***/
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 46352d6..c75a01a 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -4052,17 +4052,16 @@
struct se_task *task;
struct se_device *dev = cmd->se_dev;
unsigned long flags;
- sector_t sectors;
int task_count, i, ret;
- sector_t dev_max_sectors = dev->se_sub_dev->se_dev_attrib.max_sectors;
+ sector_t sectors, dev_max_sectors = dev->se_sub_dev->se_dev_attrib.max_sectors;
u32 sector_size = dev->se_sub_dev->se_dev_attrib.block_size;
struct scatterlist *sg;
struct scatterlist *cmd_sg;
WARN_ON(cmd->data_length % sector_size);
sectors = DIV_ROUND_UP(cmd->data_length, sector_size);
- task_count = DIV_ROUND_UP(sectors, dev_max_sectors);
-
+ task_count = DIV_ROUND_UP_SECTOR_T(sectors, dev_max_sectors);
+
cmd_sg = sgl;
for (i = 0; i < task_count; i++) {
unsigned int task_size;
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 21d816e..f441726 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -28,6 +28,17 @@
if WATCHDOG
+config WATCHDOG_CORE
+ bool "WatchDog Timer Driver Core"
+ ---help---
+ Say Y here if you want to use the new watchdog timer driver core.
+ This driver provides a framework for all watchdog timer drivers
+ and gives them the /dev/watchdog interface (and later also the
+ sysfs interface).
+
+ To compile this driver as a module, choose M here: the module will
+ be called watchdog.
+
config WATCHDOG_NOWAYOUT
bool "Disable watchdog shutdown on close"
help
@@ -186,6 +197,15 @@
To compile this driver as a module, choose M here: the
module will be called sa1100_wdt.
+config DW_WATCHDOG
+ tristate "Synopsys DesignWare watchdog"
+ depends on ARM && HAVE_CLK
+ help
+ Say Y here if to include support for the Synopsys DesignWare
+ watchdog timer found in many ARM chips.
+ To compile this driver as a module, choose M here: the
+ module will be called dw_wdt.
+
config MPCORE_WATCHDOG
tristate "MPcore watchdog"
depends on HAVE_ARM_TWD
@@ -321,7 +341,7 @@
config IMX2_WDT
tristate "IMX2+ Watchdog"
- depends on ARCH_MX2 || ARCH_MX25 || ARCH_MX3 || ARCH_MX5
+ depends on IMX_HAVE_PLATFORM_IMX2_WDT
help
This is the driver for the hardware watchdog
on the Freescale IMX2 and later processors.
@@ -879,6 +899,20 @@
To compile this driver as a module, choose M here: the
module will be called m54xx_wdt.
+# MicroBlaze Architecture
+
+config XILINX_WATCHDOG
+ tristate "Xilinx Watchdog timer"
+ depends on MICROBLAZE
+ ---help---
+ Watchdog driver for the xps_timebase_wdt ip core.
+
+ IMPORTANT: The xps_timebase_wdt parent must have the property
+ "clock-frequency" at device tree.
+
+ To compile this driver as a module, choose M here: the
+ module will be called of_xilinx_wdt.
+
# MIPS Architecture
config ATH79_WDT
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index ed26f70..55bd574 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -2,6 +2,10 @@
# Makefile for the WatchDog device drivers.
#
+# The WatchDog Timer Driver Core.
+watchdog-objs += watchdog_core.o watchdog_dev.o
+obj-$(CONFIG_WATCHDOG_CORE) += watchdog.o
+
# Only one watchdog can succeed. We probe the ISA/PCI/USB based
# watchdog-cards first, then the architecture specific watchdog
# drivers and then the architecture independent "softdog" driver.
@@ -37,6 +41,7 @@
obj-$(CONFIG_KS8695_WATCHDOG) += ks8695_wdt.o
obj-$(CONFIG_S3C2410_WATCHDOG) += s3c2410_wdt.o
obj-$(CONFIG_SA1100_WATCHDOG) += sa1100_wdt.o
+obj-$(CONFIG_DW_WATCHDOG) += dw_wdt.o
obj-$(CONFIG_MPCORE_WATCHDOG) += mpcore_wdt.o
obj-$(CONFIG_EP93XX_WATCHDOG) += ep93xx_wdt.o
obj-$(CONFIG_PNX4008_WATCHDOG) += pnx4008_wdt.o
@@ -109,6 +114,9 @@
# M68K Architecture
obj-$(CONFIG_M54xx_WATCHDOG) += m54xx_wdt.o
+# MicroBlaze Architecture
+obj-$(CONFIG_XILINX_WATCHDOG) += of_xilinx_wdt.o
+
# MIPS Architecture
obj-$(CONFIG_ATH79_WDT) += ath79_wdt.o
obj-$(CONFIG_BCM47XX_WDT) += bcm47xx_wdt.o
diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c
index eac2602..87445b2 100644
--- a/drivers/watchdog/at91sam9_wdt.c
+++ b/drivers/watchdog/at91sam9_wdt.c
@@ -31,7 +31,7 @@
#include <linux/bitops.h>
#include <linux/uaccess.h>
-#include <mach/at91_wdt.h>
+#include "at91sam9_wdt.h"
#define DRV_NAME "AT91SAM9 Watchdog"
@@ -284,27 +284,8 @@
return res;
}
-#ifdef CONFIG_PM
-
-static int at91wdt_suspend(struct platform_device *pdev, pm_message_t message)
-{
- return 0;
-}
-
-static int at91wdt_resume(struct platform_device *pdev)
-{
- return 0;
-}
-
-#else
-#define at91wdt_suspend NULL
-#define at91wdt_resume NULL
-#endif
-
static struct platform_driver at91wdt_driver = {
.remove = __exit_p(at91wdt_remove),
- .suspend = at91wdt_suspend,
- .resume = at91wdt_resume,
.driver = {
.name = "at91_wdt",
.owner = THIS_MODULE,
diff --git a/arch/arm/mach-at91/include/mach/at91_wdt.h b/drivers/watchdog/at91sam9_wdt.h
similarity index 96%
rename from arch/arm/mach-at91/include/mach/at91_wdt.h
rename to drivers/watchdog/at91sam9_wdt.h
index fecc2e9..757f9ca 100644
--- a/arch/arm/mach-at91/include/mach/at91_wdt.h
+++ b/drivers/watchdog/at91sam9_wdt.h
@@ -1,5 +1,5 @@
/*
- * arch/arm/mach-at91/include/mach/at91_wdt.h
+ * drivers/watchdog/at91sam9_wdt.h
*
* Copyright (C) 2007 Andrew Victor
* Copyright (C) 2007 Atmel Corporation.
diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c
new file mode 100644
index 0000000..f10f8c0
--- /dev/null
+++ b/drivers/watchdog/dw_wdt.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright 2010-2011 Picochip Ltd., Jamie Iles
+ * http://www.picochip.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This file implements a driver for the Synopsys DesignWare watchdog device
+ * in the many ARM subsystems. The watchdog has 16 different timeout periods
+ * and these are a function of the input clock frequency.
+ *
+ * The DesignWare watchdog cannot be stopped once it has been started so we
+ * use a software timer to implement a ping that will keep the watchdog alive.
+ * If we receive an expected close for the watchdog then we keep the timer
+ * running, otherwise the timer is stopped and the watchdog will expire.
+ */
+#define pr_fmt(fmt) "dw_wdt: " fmt
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pm.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+#include <linux/uaccess.h>
+#include <linux/watchdog.h>
+
+#define WDOG_CONTROL_REG_OFFSET 0x00
+#define WDOG_CONTROL_REG_WDT_EN_MASK 0x01
+#define WDOG_TIMEOUT_RANGE_REG_OFFSET 0x04
+#define WDOG_CURRENT_COUNT_REG_OFFSET 0x08
+#define WDOG_COUNTER_RESTART_REG_OFFSET 0x0c
+#define WDOG_COUNTER_RESTART_KICK_VALUE 0x76
+
+/* The maximum TOP (timeout period) value that can be set in the watchdog. */
+#define DW_WDT_MAX_TOP 15
+
+static int nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, int, 0);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
+ "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+#define WDT_TIMEOUT (HZ / 2)
+
+static struct {
+ spinlock_t lock;
+ void __iomem *regs;
+ struct clk *clk;
+ unsigned long in_use;
+ unsigned long next_heartbeat;
+ struct timer_list timer;
+ int expect_close;
+} dw_wdt;
+
+static inline int dw_wdt_is_enabled(void)
+{
+ return readl(dw_wdt.regs + WDOG_CONTROL_REG_OFFSET) &
+ WDOG_CONTROL_REG_WDT_EN_MASK;
+}
+
+static inline int dw_wdt_top_in_seconds(unsigned top)
+{
+ /*
+ * There are 16 possible timeout values in 0..15 where the number of
+ * cycles is 2 ^ (16 + i) and the watchdog counts down.
+ */
+ return (1 << (16 + top)) / clk_get_rate(dw_wdt.clk);
+}
+
+static int dw_wdt_get_top(void)
+{
+ int top = readl(dw_wdt.regs + WDOG_TIMEOUT_RANGE_REG_OFFSET) & 0xF;
+
+ return dw_wdt_top_in_seconds(top);
+}
+
+static inline void dw_wdt_set_next_heartbeat(void)
+{
+ dw_wdt.next_heartbeat = jiffies + dw_wdt_get_top() * HZ;
+}
+
+static int dw_wdt_set_top(unsigned top_s)
+{
+ int i, top_val = DW_WDT_MAX_TOP;
+
+ /*
+ * Iterate over the timeout values until we find the closest match. We
+ * always look for >=.
+ */
+ for (i = 0; i <= DW_WDT_MAX_TOP; ++i)
+ if (dw_wdt_top_in_seconds(i) >= top_s) {
+ top_val = i;
+ break;
+ }
+
+ /* Set the new value in the watchdog. */
+ writel(top_val, dw_wdt.regs + WDOG_TIMEOUT_RANGE_REG_OFFSET);
+
+ dw_wdt_set_next_heartbeat();
+
+ return dw_wdt_top_in_seconds(top_val);
+}
+
+static void dw_wdt_keepalive(void)
+{
+ writel(WDOG_COUNTER_RESTART_KICK_VALUE, dw_wdt.regs +
+ WDOG_COUNTER_RESTART_REG_OFFSET);
+}
+
+static void dw_wdt_ping(unsigned long data)
+{
+ if (time_before(jiffies, dw_wdt.next_heartbeat) ||
+ (!nowayout && !dw_wdt.in_use)) {
+ dw_wdt_keepalive();
+ mod_timer(&dw_wdt.timer, jiffies + WDT_TIMEOUT);
+ } else
+ pr_crit("keepalive missed, machine will reset\n");
+}
+
+static int dw_wdt_open(struct inode *inode, struct file *filp)
+{
+ if (test_and_set_bit(0, &dw_wdt.in_use))
+ return -EBUSY;
+
+ /* Make sure we don't get unloaded. */
+ __module_get(THIS_MODULE);
+
+ spin_lock(&dw_wdt.lock);
+ if (!dw_wdt_is_enabled()) {
+ /*
+ * The watchdog is not currently enabled. Set the timeout to
+ * the maximum and then start it.
+ */
+ dw_wdt_set_top(DW_WDT_MAX_TOP);
+ writel(WDOG_CONTROL_REG_WDT_EN_MASK,
+ dw_wdt.regs + WDOG_CONTROL_REG_OFFSET);
+ }
+
+ dw_wdt_set_next_heartbeat();
+
+ spin_unlock(&dw_wdt.lock);
+
+ return nonseekable_open(inode, filp);
+}
+
+ssize_t dw_wdt_write(struct file *filp, const char __user *buf, size_t len,
+ loff_t *offset)
+{
+ if (!len)
+ return 0;
+
+ if (!nowayout) {
+ size_t i;
+
+ dw_wdt.expect_close = 0;
+
+ for (i = 0; i < len; ++i) {
+ char c;
+
+ if (get_user(c, buf + i))
+ return -EFAULT;
+
+ if (c == 'V') {
+ dw_wdt.expect_close = 1;
+ break;
+ }
+ }
+ }
+
+ dw_wdt_set_next_heartbeat();
+ mod_timer(&dw_wdt.timer, jiffies + WDT_TIMEOUT);
+
+ return len;
+}
+
+static u32 dw_wdt_time_left(void)
+{
+ return readl(dw_wdt.regs + WDOG_CURRENT_COUNT_REG_OFFSET) /
+ clk_get_rate(dw_wdt.clk);
+}
+
+static const struct watchdog_info dw_wdt_ident = {
+ .options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT |
+ WDIOF_MAGICCLOSE,
+ .identity = "Synopsys DesignWare Watchdog",
+};
+
+static long dw_wdt_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ unsigned long val;
+ int timeout;
+
+ switch (cmd) {
+ case WDIOC_GETSUPPORT:
+ return copy_to_user((struct watchdog_info *)arg, &dw_wdt_ident,
+ sizeof(dw_wdt_ident)) ? -EFAULT : 0;
+
+ case WDIOC_GETSTATUS:
+ case WDIOC_GETBOOTSTATUS:
+ return put_user(0, (int *)arg);
+
+ case WDIOC_KEEPALIVE:
+ dw_wdt_set_next_heartbeat();
+ return 0;
+
+ case WDIOC_SETTIMEOUT:
+ if (get_user(val, (int __user *)arg))
+ return -EFAULT;
+ timeout = dw_wdt_set_top(val);
+ return put_user(timeout , (int __user *)arg);
+
+ case WDIOC_GETTIMEOUT:
+ return put_user(dw_wdt_get_top(), (int __user *)arg);
+
+ case WDIOC_GETTIMELEFT:
+ /* Get the time left until expiry. */
+ if (get_user(val, (int __user *)arg))
+ return -EFAULT;
+ return put_user(dw_wdt_time_left(), (int __user *)arg);
+
+ default:
+ return -ENOTTY;
+ }
+}
+
+static int dw_wdt_release(struct inode *inode, struct file *filp)
+{
+ clear_bit(0, &dw_wdt.in_use);
+
+ if (!dw_wdt.expect_close) {
+ del_timer(&dw_wdt.timer);
+
+ if (!nowayout)
+ pr_crit("unexpected close, system will reboot soon\n");
+ else
+ pr_crit("watchdog cannot be disabled, system will reboot soon\n");
+ }
+
+ dw_wdt.expect_close = 0;
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+static int dw_wdt_suspend(struct device *dev)
+{
+ clk_disable(dw_wdt.clk);
+
+ return 0;
+}
+
+static int dw_wdt_resume(struct device *dev)
+{
+ int err = clk_enable(dw_wdt.clk);
+
+ if (err)
+ return err;
+
+ dw_wdt_keepalive();
+
+ return 0;
+}
+
+static const struct dev_pm_ops dw_wdt_pm_ops = {
+ .suspend = dw_wdt_suspend,
+ .resume = dw_wdt_resume,
+};
+#endif /* CONFIG_PM */
+
+static const struct file_operations wdt_fops = {
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+ .open = dw_wdt_open,
+ .write = dw_wdt_write,
+ .unlocked_ioctl = dw_wdt_ioctl,
+ .release = dw_wdt_release
+};
+
+static struct miscdevice dw_wdt_miscdev = {
+ .fops = &wdt_fops,
+ .name = "watchdog",
+ .minor = WATCHDOG_MINOR,
+};
+
+static int __devinit dw_wdt_drv_probe(struct platform_device *pdev)
+{
+ int ret;
+ struct resource *mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+ if (!mem)
+ return -EINVAL;
+
+ if (!devm_request_mem_region(&pdev->dev, mem->start, resource_size(mem),
+ "dw_wdt"))
+ return -ENOMEM;
+
+ dw_wdt.regs = devm_ioremap(&pdev->dev, mem->start, resource_size(mem));
+ if (!dw_wdt.regs)
+ return -ENOMEM;
+
+ dw_wdt.clk = clk_get(&pdev->dev, NULL);
+ if (IS_ERR(dw_wdt.clk))
+ return PTR_ERR(dw_wdt.clk);
+
+ ret = clk_enable(dw_wdt.clk);
+ if (ret)
+ goto out_put_clk;
+
+ spin_lock_init(&dw_wdt.lock);
+
+ ret = misc_register(&dw_wdt_miscdev);
+ if (ret)
+ goto out_disable_clk;
+
+ dw_wdt_set_next_heartbeat();
+ setup_timer(&dw_wdt.timer, dw_wdt_ping, 0);
+ mod_timer(&dw_wdt.timer, jiffies + WDT_TIMEOUT);
+
+ return 0;
+
+out_disable_clk:
+ clk_disable(dw_wdt.clk);
+out_put_clk:
+ clk_put(dw_wdt.clk);
+
+ return ret;
+}
+
+static int __devexit dw_wdt_drv_remove(struct platform_device *pdev)
+{
+ misc_deregister(&dw_wdt_miscdev);
+
+ clk_disable(dw_wdt.clk);
+ clk_put(dw_wdt.clk);
+
+ return 0;
+}
+
+static struct platform_driver dw_wdt_driver = {
+ .probe = dw_wdt_drv_probe,
+ .remove = __devexit_p(dw_wdt_drv_remove),
+ .driver = {
+ .name = "dw_wdt",
+ .owner = THIS_MODULE,
+#ifdef CONFIG_PM
+ .pm = &dw_wdt_pm_ops,
+#endif /* CONFIG_PM */
+ },
+};
+
+static int __init dw_wdt_watchdog_init(void)
+{
+ return platform_driver_register(&dw_wdt_driver);
+}
+module_init(dw_wdt_watchdog_init);
+
+static void __exit dw_wdt_watchdog_exit(void)
+{
+ platform_driver_unregister(&dw_wdt_driver);
+}
+module_exit(dw_wdt_watchdog_exit);
+
+MODULE_AUTHOR("Jamie Iles");
+MODULE_DESCRIPTION("Synopsys DesignWare Watchdog Driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 8cb2685..410fba4 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -36,7 +36,7 @@
#include <asm/cacheflush.h>
#endif /* CONFIG_HPWDT_NMI_DECODING */
-#define HPWDT_VERSION "1.2.0"
+#define HPWDT_VERSION "1.3.0"
#define SECS_TO_TICKS(secs) ((secs) * 1000 / 128)
#define TICKS_TO_SECS(ticks) ((ticks) * 128 / 1000)
#define HPWDT_MAX_TIMER TICKS_TO_SECS(65535)
@@ -87,6 +87,19 @@
};
#define SMBIOS_CRU64_INFORMATION 212
+/* type 219 */
+struct smbios_proliant_info {
+ u8 type;
+ u8 byte_length;
+ u16 handle;
+ u32 power_features;
+ u32 omega_features;
+ u32 reserved;
+ u32 misc_features;
+};
+#define SMBIOS_ICRU_INFORMATION 219
+
+
struct cmn_registers {
union {
struct {
@@ -132,6 +145,7 @@
static unsigned int hpwdt_nmi_decoding;
static unsigned int allow_kdump;
static unsigned int priority; /* hpwdt at end of die_notify list */
+static unsigned int is_icru;
static DEFINE_SPINLOCK(rom_lock);
static void *cru_rom_addr;
static struct cmn_registers cmn_regs;
@@ -476,19 +490,22 @@
goto out;
spin_lock_irqsave(&rom_lock, rom_pl);
- if (!die_nmi_called)
+ if (!die_nmi_called && !is_icru)
asminline_call(&cmn_regs, cru_rom_addr);
die_nmi_called = 1;
spin_unlock_irqrestore(&rom_lock, rom_pl);
- if (cmn_regs.u1.ral == 0) {
- printk(KERN_WARNING "hpwdt: An NMI occurred, "
- "but unable to determine source.\n");
- } else {
- if (allow_kdump)
- hpwdt_stop();
- panic("An NMI occurred, please see the Integrated "
- "Management Log for details.\n");
+ if (!is_icru) {
+ if (cmn_regs.u1.ral == 0) {
+ printk(KERN_WARNING "hpwdt: An NMI occurred, "
+ "but unable to determine source.\n");
+ }
}
+
+ if (allow_kdump)
+ hpwdt_stop();
+ panic("An NMI occurred, please see the Integrated "
+ "Management Log for details.\n");
+
out:
return NOTIFY_OK;
}
@@ -659,30 +676,63 @@
}
#endif /* CONFIG_X86_LOCAL_APIC */
+/*
+ * dmi_find_icru
+ *
+ * Routine Description:
+ * This function checks whether or not we are on an iCRU-based server.
+ * This check is independent of architecture and needs to be made for
+ * any ProLiant system.
+ */
+static void __devinit dmi_find_icru(const struct dmi_header *dm, void *dummy)
+{
+ struct smbios_proliant_info *smbios_proliant_ptr;
+
+ if (dm->type == SMBIOS_ICRU_INFORMATION) {
+ smbios_proliant_ptr = (struct smbios_proliant_info *) dm;
+ if (smbios_proliant_ptr->misc_features & 0x01)
+ is_icru = 1;
+ }
+}
+
static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
{
int retval;
/*
- * We need to map the ROM to get the CRU service.
- * For 32 bit Operating Systems we need to go through the 32 Bit
- * BIOS Service Directory
- * For 64 bit Operating Systems we get that service through SMBIOS.
+ * On typical CRU-based systems we need to map that service in
+ * the BIOS. For 32 bit Operating Systems we need to go through
+ * the 32 Bit BIOS Service Directory. For 64 bit Operating
+ * Systems we get that service through SMBIOS.
+ *
+ * On systems that support the new iCRU service all we need to
+ * do is call dmi_walk to get the supported flag value and skip
+ * the old cru detect code.
*/
- retval = detect_cru_service();
- if (retval < 0) {
- dev_warn(&dev->dev,
- "Unable to detect the %d Bit CRU Service.\n",
- HPWDT_ARCH);
- return retval;
- }
+ dmi_walk(dmi_find_icru, NULL);
+ if (!is_icru) {
- /*
- * We know this is the only CRU call we need to make so lets keep as
- * few instructions as possible once the NMI comes in.
- */
- cmn_regs.u1.rah = 0x0D;
- cmn_regs.u1.ral = 0x02;
+ /*
+ * We need to map the ROM to get the CRU service.
+ * For 32 bit Operating Systems we need to go through the 32 Bit
+ * BIOS Service Directory
+ * For 64 bit Operating Systems we get that service through SMBIOS.
+ */
+ retval = detect_cru_service();
+ if (retval < 0) {
+ dev_warn(&dev->dev,
+ "Unable to detect the %d Bit CRU Service.\n",
+ HPWDT_ARCH);
+ return retval;
+ }
+
+ /*
+ * We know this is the only CRU call we need to make so lets keep as
+ * few instructions as possible once the NMI comes in.
+ */
+ cmn_regs.u1.rah = 0x0D;
+ cmn_regs.u1.ral = 0x02;
+ }
/*
* If the priority is set to 1, then we will be put first on the
diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
index 5fd020d..751a591 100644
--- a/drivers/watchdog/iTCO_wdt.c
+++ b/drivers/watchdog/iTCO_wdt.c
@@ -120,72 +120,12 @@
TCO_3420, /* 3420 */
TCO_3450, /* 3450 */
TCO_EP80579, /* EP80579 */
- TCO_CPT1, /* Cougar Point */
- TCO_CPT2, /* Cougar Point Desktop */
- TCO_CPT3, /* Cougar Point Mobile */
- TCO_CPT4, /* Cougar Point */
- TCO_CPT5, /* Cougar Point */
- TCO_CPT6, /* Cougar Point */
- TCO_CPT7, /* Cougar Point */
- TCO_CPT8, /* Cougar Point */
- TCO_CPT9, /* Cougar Point */
- TCO_CPT10, /* Cougar Point */
- TCO_CPT11, /* Cougar Point */
- TCO_CPT12, /* Cougar Point */
- TCO_CPT13, /* Cougar Point */
- TCO_CPT14, /* Cougar Point */
- TCO_CPT15, /* Cougar Point */
- TCO_CPT16, /* Cougar Point */
- TCO_CPT17, /* Cougar Point */
- TCO_CPT18, /* Cougar Point */
- TCO_CPT19, /* Cougar Point */
- TCO_CPT20, /* Cougar Point */
- TCO_CPT21, /* Cougar Point */
- TCO_CPT22, /* Cougar Point */
- TCO_CPT23, /* Cougar Point */
- TCO_CPT24, /* Cougar Point */
- TCO_CPT25, /* Cougar Point */
- TCO_CPT26, /* Cougar Point */
- TCO_CPT27, /* Cougar Point */
- TCO_CPT28, /* Cougar Point */
- TCO_CPT29, /* Cougar Point */
- TCO_CPT30, /* Cougar Point */
- TCO_CPT31, /* Cougar Point */
- TCO_PBG1, /* Patsburg */
- TCO_PBG2, /* Patsburg */
+ TCO_CPT, /* Cougar Point */
+ TCO_CPTD, /* Cougar Point Desktop */
+ TCO_CPTM, /* Cougar Point Mobile */
+ TCO_PBG, /* Patsburg */
TCO_DH89XXCC, /* DH89xxCC */
- TCO_PPT0, /* Panther Point */
- TCO_PPT1, /* Panther Point */
- TCO_PPT2, /* Panther Point */
- TCO_PPT3, /* Panther Point */
- TCO_PPT4, /* Panther Point */
- TCO_PPT5, /* Panther Point */
- TCO_PPT6, /* Panther Point */
- TCO_PPT7, /* Panther Point */
- TCO_PPT8, /* Panther Point */
- TCO_PPT9, /* Panther Point */
- TCO_PPT10, /* Panther Point */
- TCO_PPT11, /* Panther Point */
- TCO_PPT12, /* Panther Point */
- TCO_PPT13, /* Panther Point */
- TCO_PPT14, /* Panther Point */
- TCO_PPT15, /* Panther Point */
- TCO_PPT16, /* Panther Point */
- TCO_PPT17, /* Panther Point */
- TCO_PPT18, /* Panther Point */
- TCO_PPT19, /* Panther Point */
- TCO_PPT20, /* Panther Point */
- TCO_PPT21, /* Panther Point */
- TCO_PPT22, /* Panther Point */
- TCO_PPT23, /* Panther Point */
- TCO_PPT24, /* Panther Point */
- TCO_PPT25, /* Panther Point */
- TCO_PPT26, /* Panther Point */
- TCO_PPT27, /* Panther Point */
- TCO_PPT28, /* Panther Point */
- TCO_PPT29, /* Panther Point */
- TCO_PPT30, /* Panther Point */
- TCO_PPT31, /* Panther Point */
+ TCO_PPT, /* Panther Point */
};
static struct {
@@ -244,83 +184,14 @@
{"3450", 2},
{"EP80579", 2},
{"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Cougar Point", 2},
- {"Patsburg", 2},
+ {"Cougar Point Desktop", 2},
+ {"Cougar Point Mobile", 2},
{"Patsburg", 2},
{"DH89xxCC", 2},
{"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
- {"Panther Point", 2},
{NULL, 0}
};
-#define ITCO_PCI_DEVICE(dev, data) \
- .vendor = PCI_VENDOR_ID_INTEL, \
- .device = dev, \
- .subvendor = PCI_ANY_ID, \
- .subdevice = PCI_ANY_ID, \
- .class = 0, \
- .class_mask = 0, \
- .driver_data = data
-
/*
* This data only exists for exporting the supported PCI ids
* via MODULE_DEVICE_TABLE. We do not actually register a
@@ -328,138 +199,138 @@
* functions that probably will be registered by other drivers.
*/
static DEFINE_PCI_DEVICE_TABLE(iTCO_wdt_pci_tbl) = {
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801AA_0, TCO_ICH)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801AB_0, TCO_ICH0)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801BA_0, TCO_ICH2)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801BA_10, TCO_ICH2M)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801CA_0, TCO_ICH3)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801CA_12, TCO_ICH3M)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801DB_0, TCO_ICH4)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801DB_12, TCO_ICH4M)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801E_0, TCO_CICH)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801EB_0, TCO_ICH5)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ESB_1, TCO_6300ESB)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH6_0, TCO_ICH6)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH6_1, TCO_ICH6M)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH6_2, TCO_ICH6W)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ESB2_0, TCO_631XESB)},
- { ITCO_PCI_DEVICE(0x2671, TCO_631XESB)},
- { ITCO_PCI_DEVICE(0x2672, TCO_631XESB)},
- { ITCO_PCI_DEVICE(0x2673, TCO_631XESB)},
- { ITCO_PCI_DEVICE(0x2674, TCO_631XESB)},
- { ITCO_PCI_DEVICE(0x2675, TCO_631XESB)},
- { ITCO_PCI_DEVICE(0x2676, TCO_631XESB)},
- { ITCO_PCI_DEVICE(0x2677, TCO_631XESB)},
- { ITCO_PCI_DEVICE(0x2678, TCO_631XESB)},
- { ITCO_PCI_DEVICE(0x2679, TCO_631XESB)},
- { ITCO_PCI_DEVICE(0x267a, TCO_631XESB)},
- { ITCO_PCI_DEVICE(0x267b, TCO_631XESB)},
- { ITCO_PCI_DEVICE(0x267c, TCO_631XESB)},
- { ITCO_PCI_DEVICE(0x267d, TCO_631XESB)},
- { ITCO_PCI_DEVICE(0x267e, TCO_631XESB)},
- { ITCO_PCI_DEVICE(0x267f, TCO_631XESB)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH7_0, TCO_ICH7)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH7_30, TCO_ICH7DH)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH7_1, TCO_ICH7M)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH7_31, TCO_ICH7MDH)},
- { ITCO_PCI_DEVICE(0x27bc, TCO_NM10)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_0, TCO_ICH8)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_2, TCO_ICH8DH)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_3, TCO_ICH8DO)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_4, TCO_ICH8M)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_1, TCO_ICH8ME)},
- { ITCO_PCI_DEVICE(0x2918, TCO_ICH9)},
- { ITCO_PCI_DEVICE(0x2916, TCO_ICH9R)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH9_2, TCO_ICH9DH)},
- { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH9_4, TCO_ICH9DO)},
- { ITCO_PCI_DEVICE(0x2919, TCO_ICH9M)},
- { ITCO_PCI_DEVICE(0x2917, TCO_ICH9ME)},
- { ITCO_PCI_DEVICE(0x3a18, TCO_ICH10)},
- { ITCO_PCI_DEVICE(0x3a16, TCO_ICH10R)},
- { ITCO_PCI_DEVICE(0x3a1a, TCO_ICH10D)},
- { ITCO_PCI_DEVICE(0x3a14, TCO_ICH10DO)},
- { ITCO_PCI_DEVICE(0x3b00, TCO_PCH)},
- { ITCO_PCI_DEVICE(0x3b01, TCO_PCHM)},
- { ITCO_PCI_DEVICE(0x3b02, TCO_P55)},
- { ITCO_PCI_DEVICE(0x3b03, TCO_PM55)},
- { ITCO_PCI_DEVICE(0x3b06, TCO_H55)},
- { ITCO_PCI_DEVICE(0x3b07, TCO_QM57)},
- { ITCO_PCI_DEVICE(0x3b08, TCO_H57)},
- { ITCO_PCI_DEVICE(0x3b09, TCO_HM55)},
- { ITCO_PCI_DEVICE(0x3b0a, TCO_Q57)},
- { ITCO_PCI_DEVICE(0x3b0b, TCO_HM57)},
- { ITCO_PCI_DEVICE(0x3b0d, TCO_PCHMSFF)},
- { ITCO_PCI_DEVICE(0x3b0f, TCO_QS57)},
- { ITCO_PCI_DEVICE(0x3b12, TCO_3400)},
- { ITCO_PCI_DEVICE(0x3b14, TCO_3420)},
- { ITCO_PCI_DEVICE(0x3b16, TCO_3450)},
- { ITCO_PCI_DEVICE(0x5031, TCO_EP80579)},
- { ITCO_PCI_DEVICE(0x1c41, TCO_CPT1)},
- { ITCO_PCI_DEVICE(0x1c42, TCO_CPT2)},
- { ITCO_PCI_DEVICE(0x1c43, TCO_CPT3)},
- { ITCO_PCI_DEVICE(0x1c44, TCO_CPT4)},
- { ITCO_PCI_DEVICE(0x1c45, TCO_CPT5)},
- { ITCO_PCI_DEVICE(0x1c46, TCO_CPT6)},
- { ITCO_PCI_DEVICE(0x1c47, TCO_CPT7)},
- { ITCO_PCI_DEVICE(0x1c48, TCO_CPT8)},
- { ITCO_PCI_DEVICE(0x1c49, TCO_CPT9)},
- { ITCO_PCI_DEVICE(0x1c4a, TCO_CPT10)},
- { ITCO_PCI_DEVICE(0x1c4b, TCO_CPT11)},
- { ITCO_PCI_DEVICE(0x1c4c, TCO_CPT12)},
- { ITCO_PCI_DEVICE(0x1c4d, TCO_CPT13)},
- { ITCO_PCI_DEVICE(0x1c4e, TCO_CPT14)},
- { ITCO_PCI_DEVICE(0x1c4f, TCO_CPT15)},
- { ITCO_PCI_DEVICE(0x1c50, TCO_CPT16)},
- { ITCO_PCI_DEVICE(0x1c51, TCO_CPT17)},
- { ITCO_PCI_DEVICE(0x1c52, TCO_CPT18)},
- { ITCO_PCI_DEVICE(0x1c53, TCO_CPT19)},
- { ITCO_PCI_DEVICE(0x1c54, TCO_CPT20)},
- { ITCO_PCI_DEVICE(0x1c55, TCO_CPT21)},
- { ITCO_PCI_DEVICE(0x1c56, TCO_CPT22)},
- { ITCO_PCI_DEVICE(0x1c57, TCO_CPT23)},
- { ITCO_PCI_DEVICE(0x1c58, TCO_CPT24)},
- { ITCO_PCI_DEVICE(0x1c59, TCO_CPT25)},
- { ITCO_PCI_DEVICE(0x1c5a, TCO_CPT26)},
- { ITCO_PCI_DEVICE(0x1c5b, TCO_CPT27)},
- { ITCO_PCI_DEVICE(0x1c5c, TCO_CPT28)},
- { ITCO_PCI_DEVICE(0x1c5d, TCO_CPT29)},
- { ITCO_PCI_DEVICE(0x1c5e, TCO_CPT30)},
- { ITCO_PCI_DEVICE(0x1c5f, TCO_CPT31)},
- { ITCO_PCI_DEVICE(0x1d40, TCO_PBG1)},
- { ITCO_PCI_DEVICE(0x1d41, TCO_PBG2)},
- { ITCO_PCI_DEVICE(0x2310, TCO_DH89XXCC)},
- { ITCO_PCI_DEVICE(0x1e40, TCO_PPT0)},
- { ITCO_PCI_DEVICE(0x1e41, TCO_PPT1)},
- { ITCO_PCI_DEVICE(0x1e42, TCO_PPT2)},
- { ITCO_PCI_DEVICE(0x1e43, TCO_PPT3)},
- { ITCO_PCI_DEVICE(0x1e44, TCO_PPT4)},
- { ITCO_PCI_DEVICE(0x1e45, TCO_PPT5)},
- { ITCO_PCI_DEVICE(0x1e46, TCO_PPT6)},
- { ITCO_PCI_DEVICE(0x1e47, TCO_PPT7)},
- { ITCO_PCI_DEVICE(0x1e48, TCO_PPT8)},
- { ITCO_PCI_DEVICE(0x1e49, TCO_PPT9)},
- { ITCO_PCI_DEVICE(0x1e4a, TCO_PPT10)},
- { ITCO_PCI_DEVICE(0x1e4b, TCO_PPT11)},
- { ITCO_PCI_DEVICE(0x1e4c, TCO_PPT12)},
- { ITCO_PCI_DEVICE(0x1e4d, TCO_PPT13)},
- { ITCO_PCI_DEVICE(0x1e4e, TCO_PPT14)},
- { ITCO_PCI_DEVICE(0x1e4f, TCO_PPT15)},
- { ITCO_PCI_DEVICE(0x1e50, TCO_PPT16)},
- { ITCO_PCI_DEVICE(0x1e51, TCO_PPT17)},
- { ITCO_PCI_DEVICE(0x1e52, TCO_PPT18)},
- { ITCO_PCI_DEVICE(0x1e53, TCO_PPT19)},
- { ITCO_PCI_DEVICE(0x1e54, TCO_PPT20)},
- { ITCO_PCI_DEVICE(0x1e55, TCO_PPT21)},
- { ITCO_PCI_DEVICE(0x1e56, TCO_PPT22)},
- { ITCO_PCI_DEVICE(0x1e57, TCO_PPT23)},
- { ITCO_PCI_DEVICE(0x1e58, TCO_PPT24)},
- { ITCO_PCI_DEVICE(0x1e59, TCO_PPT25)},
- { ITCO_PCI_DEVICE(0x1e5a, TCO_PPT26)},
- { ITCO_PCI_DEVICE(0x1e5b, TCO_PPT27)},
- { ITCO_PCI_DEVICE(0x1e5c, TCO_PPT28)},
- { ITCO_PCI_DEVICE(0x1e5d, TCO_PPT29)},
- { ITCO_PCI_DEVICE(0x1e5e, TCO_PPT30)},
- { ITCO_PCI_DEVICE(0x1e5f, TCO_PPT31)},
+ { PCI_VDEVICE(INTEL, 0x2410), TCO_ICH},
+ { PCI_VDEVICE(INTEL, 0x2420), TCO_ICH0},
+ { PCI_VDEVICE(INTEL, 0x2440), TCO_ICH2},
+ { PCI_VDEVICE(INTEL, 0x244c), TCO_ICH2M},
+ { PCI_VDEVICE(INTEL, 0x2480), TCO_ICH3},
+ { PCI_VDEVICE(INTEL, 0x248c), TCO_ICH3M},
+ { PCI_VDEVICE(INTEL, 0x24c0), TCO_ICH4},
+ { PCI_VDEVICE(INTEL, 0x24cc), TCO_ICH4M},
+ { PCI_VDEVICE(INTEL, 0x2450), TCO_CICH},
+ { PCI_VDEVICE(INTEL, 0x24d0), TCO_ICH5},
+ { PCI_VDEVICE(INTEL, 0x25a1), TCO_6300ESB},
+ { PCI_VDEVICE(INTEL, 0x2640), TCO_ICH6},
+ { PCI_VDEVICE(INTEL, 0x2641), TCO_ICH6M},
+ { PCI_VDEVICE(INTEL, 0x2642), TCO_ICH6W},
+ { PCI_VDEVICE(INTEL, 0x2670), TCO_631XESB},
+ { PCI_VDEVICE(INTEL, 0x2671), TCO_631XESB},
+ { PCI_VDEVICE(INTEL, 0x2672), TCO_631XESB},
+ { PCI_VDEVICE(INTEL, 0x2673), TCO_631XESB},
+ { PCI_VDEVICE(INTEL, 0x2674), TCO_631XESB},
+ { PCI_VDEVICE(INTEL, 0x2675), TCO_631XESB},
+ { PCI_VDEVICE(INTEL, 0x2676), TCO_631XESB},
+ { PCI_VDEVICE(INTEL, 0x2677), TCO_631XESB},
+ { PCI_VDEVICE(INTEL, 0x2678), TCO_631XESB},
+ { PCI_VDEVICE(INTEL, 0x2679), TCO_631XESB},
+ { PCI_VDEVICE(INTEL, 0x267a), TCO_631XESB},
+ { PCI_VDEVICE(INTEL, 0x267b), TCO_631XESB},
+ { PCI_VDEVICE(INTEL, 0x267c), TCO_631XESB},
+ { PCI_VDEVICE(INTEL, 0x267d), TCO_631XESB},
+ { PCI_VDEVICE(INTEL, 0x267e), TCO_631XESB},
+ { PCI_VDEVICE(INTEL, 0x267f), TCO_631XESB},
+ { PCI_VDEVICE(INTEL, 0x27b8), TCO_ICH7},
+ { PCI_VDEVICE(INTEL, 0x27b0), TCO_ICH7DH},
+ { PCI_VDEVICE(INTEL, 0x27b9), TCO_ICH7M},
+ { PCI_VDEVICE(INTEL, 0x27bd), TCO_ICH7MDH},
+ { PCI_VDEVICE(INTEL, 0x27bc), TCO_NM10},
+ { PCI_VDEVICE(INTEL, 0x2810), TCO_ICH8},
+ { PCI_VDEVICE(INTEL, 0x2812), TCO_ICH8DH},
+ { PCI_VDEVICE(INTEL, 0x2814), TCO_ICH8DO},
+ { PCI_VDEVICE(INTEL, 0x2815), TCO_ICH8M},
+ { PCI_VDEVICE(INTEL, 0x2811), TCO_ICH8ME},
+ { PCI_VDEVICE(INTEL, 0x2918), TCO_ICH9},
+ { PCI_VDEVICE(INTEL, 0x2916), TCO_ICH9R},
+ { PCI_VDEVICE(INTEL, 0x2912), TCO_ICH9DH},
+ { PCI_VDEVICE(INTEL, 0x2914), TCO_ICH9DO},
+ { PCI_VDEVICE(INTEL, 0x2919), TCO_ICH9M},
+ { PCI_VDEVICE(INTEL, 0x2917), TCO_ICH9ME},
+ { PCI_VDEVICE(INTEL, 0x3a18), TCO_ICH10},
+ { PCI_VDEVICE(INTEL, 0x3a16), TCO_ICH10R},
+ { PCI_VDEVICE(INTEL, 0x3a1a), TCO_ICH10D},
+ { PCI_VDEVICE(INTEL, 0x3a14), TCO_ICH10DO},
+ { PCI_VDEVICE(INTEL, 0x3b00), TCO_PCH},
+ { PCI_VDEVICE(INTEL, 0x3b01), TCO_PCHM},
+ { PCI_VDEVICE(INTEL, 0x3b02), TCO_P55},
+ { PCI_VDEVICE(INTEL, 0x3b03), TCO_PM55},
+ { PCI_VDEVICE(INTEL, 0x3b06), TCO_H55},
+ { PCI_VDEVICE(INTEL, 0x3b07), TCO_QM57},
+ { PCI_VDEVICE(INTEL, 0x3b08), TCO_H57},
+ { PCI_VDEVICE(INTEL, 0x3b09), TCO_HM55},
+ { PCI_VDEVICE(INTEL, 0x3b0a), TCO_Q57},
+ { PCI_VDEVICE(INTEL, 0x3b0b), TCO_HM57},
+ { PCI_VDEVICE(INTEL, 0x3b0d), TCO_PCHMSFF},
+ { PCI_VDEVICE(INTEL, 0x3b0f), TCO_QS57},
+ { PCI_VDEVICE(INTEL, 0x3b12), TCO_3400},
+ { PCI_VDEVICE(INTEL, 0x3b14), TCO_3420},
+ { PCI_VDEVICE(INTEL, 0x3b16), TCO_3450},
+ { PCI_VDEVICE(INTEL, 0x5031), TCO_EP80579},
+ { PCI_VDEVICE(INTEL, 0x1c41), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c42), TCO_CPTD},
+ { PCI_VDEVICE(INTEL, 0x1c43), TCO_CPTM},
+ { PCI_VDEVICE(INTEL, 0x1c44), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c45), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c46), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c47), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c48), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c49), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c4a), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c4b), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c4c), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c4d), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c4e), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c4f), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c50), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c51), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c52), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c53), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c54), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c55), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c56), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c57), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c58), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c59), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c5a), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c5b), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c5c), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c5d), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c5e), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1c5f), TCO_CPT},
+ { PCI_VDEVICE(INTEL, 0x1d40), TCO_PBG},
+ { PCI_VDEVICE(INTEL, 0x1d41), TCO_PBG},
+ { PCI_VDEVICE(INTEL, 0x2310), TCO_DH89XXCC},
+ { PCI_VDEVICE(INTEL, 0x1e40), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e41), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e42), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e43), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e44), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e45), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e46), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e47), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e48), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e49), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e4a), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e4b), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e4c), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e4d), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e4e), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e4f), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e50), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e51), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e52), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e53), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e54), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e55), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e56), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e57), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e58), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e59), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e5a), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e5b), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e5c), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e5d), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e5e), TCO_PPT},
+ { PCI_VDEVICE(INTEL, 0x1e5f), TCO_PPT},
{ 0, }, /* End of list */
};
MODULE_DEVICE_TABLE(pci, iTCO_wdt_pci_tbl);
@@ -1052,15 +923,10 @@
iTCO_wdt_stop();
}
-#define iTCO_wdt_suspend NULL
-#define iTCO_wdt_resume NULL
-
static struct platform_driver iTCO_wdt_driver = {
.probe = iTCO_wdt_probe,
.remove = __devexit_p(iTCO_wdt_remove),
.shutdown = iTCO_wdt_shutdown,
- .suspend = iTCO_wdt_suspend,
- .resume = iTCO_wdt_resume,
.driver = {
.owner = THIS_MODULE,
.name = DRV_NAME,
diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c
index 86f7cac..b8ef2c6 100644
--- a/drivers/watchdog/imx2_wdt.c
+++ b/drivers/watchdog/imx2_wdt.c
@@ -329,12 +329,18 @@
}
}
+static const struct of_device_id imx2_wdt_dt_ids[] = {
+ { .compatible = "fsl,imx21-wdt", },
+ { /* sentinel */ }
+};
+
static struct platform_driver imx2_wdt_driver = {
.remove = __exit_p(imx2_wdt_remove),
.shutdown = imx2_wdt_shutdown,
.driver = {
.name = DRIVER_NAME,
.owner = THIS_MODULE,
+ .of_match_table = imx2_wdt_dt_ids,
},
};
diff --git a/drivers/watchdog/it8712f_wdt.c b/drivers/watchdog/it8712f_wdt.c
index 6143f52..8d2d850 100644
--- a/drivers/watchdog/it8712f_wdt.c
+++ b/drivers/watchdog/it8712f_wdt.c
@@ -28,10 +28,10 @@
#include <linux/notifier.h>
#include <linux/reboot.h>
#include <linux/fs.h>
-#include <linux/pci.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
#include <linux/io.h>
+#include <linux/ioport.h>
#define NAME "it8712f_wdt"
@@ -51,7 +51,6 @@
static unsigned long wdt_open;
static unsigned expect_close;
-static spinlock_t io_lock;
static unsigned char revision;
/* Dog Food address - We use the game port address */
@@ -121,20 +120,26 @@
outb(ldn, VAL);
}
-static inline void superio_enter(void)
+static inline int superio_enter(void)
{
- spin_lock(&io_lock);
+ /*
+ * Try to reserve REG and REG + 1 for exclusive access.
+ */
+ if (!request_muxed_region(REG, 2, NAME))
+ return -EBUSY;
+
outb(0x87, REG);
outb(0x01, REG);
outb(0x55, REG);
outb(0x55, REG);
+ return 0;
}
static inline void superio_exit(void)
{
outb(0x02, REG);
outb(0x02, VAL);
- spin_unlock(&io_lock);
+ release_region(REG, 2);
}
static inline void it8712f_wdt_ping(void)
@@ -173,10 +178,13 @@
return 0;
}
-static void it8712f_wdt_enable(void)
+static int it8712f_wdt_enable(void)
{
+ int ret = superio_enter();
+ if (ret)
+ return ret;
+
printk(KERN_DEBUG NAME ": enabling watchdog timer\n");
- superio_enter();
superio_select(LDN_GPIO);
superio_outb(wdt_control_reg, WDT_CONTROL);
@@ -186,13 +194,17 @@
superio_exit();
it8712f_wdt_ping();
+
+ return 0;
}
-static void it8712f_wdt_disable(void)
+static int it8712f_wdt_disable(void)
{
- printk(KERN_DEBUG NAME ": disabling watchdog timer\n");
+ int ret = superio_enter();
+ if (ret)
+ return ret;
- superio_enter();
+ printk(KERN_DEBUG NAME ": disabling watchdog timer\n");
superio_select(LDN_GPIO);
superio_outb(0, WDT_CONFIG);
@@ -202,6 +214,7 @@
superio_outb(0, WDT_TIMEOUT);
superio_exit();
+ return 0;
}
static int it8712f_wdt_notify(struct notifier_block *this,
@@ -252,6 +265,7 @@
WDIOF_MAGICCLOSE,
};
int value;
+ int ret;
switch (cmd) {
case WDIOC_GETSUPPORT:
@@ -259,7 +273,9 @@
return -EFAULT;
return 0;
case WDIOC_GETSTATUS:
- superio_enter();
+ ret = superio_enter();
+ if (ret)
+ return ret;
superio_select(LDN_GPIO);
value = it8712f_wdt_get_status();
@@ -280,7 +296,9 @@
if (value > (max_units * 60))
return -EINVAL;
margin = value;
- superio_enter();
+ ret = superio_enter();
+ if (ret)
+ return ret;
superio_select(LDN_GPIO);
it8712f_wdt_update_margin();
@@ -299,10 +317,14 @@
static int it8712f_wdt_open(struct inode *inode, struct file *file)
{
+ int ret;
/* only allow one at a time */
if (test_and_set_bit(0, &wdt_open))
return -EBUSY;
- it8712f_wdt_enable();
+
+ ret = it8712f_wdt_enable();
+ if (ret)
+ return ret;
return nonseekable_open(inode, file);
}
@@ -313,7 +335,8 @@
": watchdog device closed unexpectedly, will not"
" disable the watchdog timer\n");
} else if (!nowayout) {
- it8712f_wdt_disable();
+ if (it8712f_wdt_disable())
+ printk(KERN_WARNING NAME "Watchdog disable failed\n");
}
expect_close = 0;
clear_bit(0, &wdt_open);
@@ -340,8 +363,10 @@
{
int err = -ENODEV;
int chip_type;
+ int ret = superio_enter();
+ if (ret)
+ return ret;
- superio_enter();
chip_type = superio_inw(DEVID);
if (chip_type != IT8712F_DEVID)
goto exit;
@@ -382,8 +407,6 @@
{
int err = 0;
- spin_lock_init(&io_lock);
-
if (it8712f_wdt_find(&address))
return -ENODEV;
@@ -392,7 +415,11 @@
return -EBUSY;
}
- it8712f_wdt_disable();
+ err = it8712f_wdt_disable();
+ if (err) {
+ printk(KERN_ERR NAME ": unable to disable watchdog timer.\n");
+ goto out;
+ }
err = register_reboot_notifier(&it8712f_wdt_notifier);
if (err) {
diff --git a/drivers/watchdog/it87_wdt.c b/drivers/watchdog/it87_wdt.c
index b1bc72f..a2d9a12 100644
--- a/drivers/watchdog/it87_wdt.c
+++ b/drivers/watchdog/it87_wdt.c
@@ -137,7 +137,6 @@
static unsigned int base, gpact, ciract, max_units, chip_type;
static unsigned long wdt_status;
-static DEFINE_SPINLOCK(spinlock);
static int nogameport = DEFAULT_NOGAMEPORT;
static int exclusive = DEFAULT_EXCLUSIVE;
@@ -163,18 +162,26 @@
/* Superio Chip */
-static inline void superio_enter(void)
+static inline int superio_enter(void)
{
+ /*
+ * Try to reserve REG and REG + 1 for exclusive access.
+ */
+ if (!request_muxed_region(REG, 2, WATCHDOG_NAME))
+ return -EBUSY;
+
outb(0x87, REG);
outb(0x01, REG);
outb(0x55, REG);
outb(0x55, REG);
+ return 0;
}
static inline void superio_exit(void)
{
outb(0x02, REG);
outb(0x02, VAL);
+ release_region(REG, 2);
}
static inline void superio_select(int ldn)
@@ -255,12 +262,11 @@
set_bit(WDTS_KEEPALIVE, &wdt_status);
}
-static void wdt_start(void)
+static int wdt_start(void)
{
- unsigned long flags;
-
- spin_lock_irqsave(&spinlock, flags);
- superio_enter();
+ int ret = superio_enter();
+ if (ret)
+ return ret;
superio_select(GPIO);
if (test_bit(WDTS_USE_GP, &wdt_status))
@@ -270,15 +276,15 @@
wdt_update_timeout();
superio_exit();
- spin_unlock_irqrestore(&spinlock, flags);
+
+ return 0;
}
-static void wdt_stop(void)
+static int wdt_stop(void)
{
- unsigned long flags;
-
- spin_lock_irqsave(&spinlock, flags);
- superio_enter();
+ int ret = superio_enter();
+ if (ret)
+ return ret;
superio_select(GPIO);
superio_outb(0x00, WDTCTRL);
@@ -288,7 +294,7 @@
superio_outb(0x00, WDTVALMSB);
superio_exit();
- spin_unlock_irqrestore(&spinlock, flags);
+ return 0;
}
/**
@@ -303,8 +309,6 @@
static int wdt_set_timeout(int t)
{
- unsigned long flags;
-
if (t < 1 || t > max_units * 60)
return -EINVAL;
@@ -313,14 +317,15 @@
else
timeout = t;
- spin_lock_irqsave(&spinlock, flags);
if (test_bit(WDTS_TIMER_RUN, &wdt_status)) {
- superio_enter();
+ int ret = superio_enter();
+ if (ret)
+ return ret;
+
superio_select(GPIO);
wdt_update_timeout();
superio_exit();
}
- spin_unlock_irqrestore(&spinlock, flags);
return 0;
}
@@ -339,12 +344,12 @@
static int wdt_get_status(int *status)
{
- unsigned long flags;
-
*status = 0;
if (testmode) {
- spin_lock_irqsave(&spinlock, flags);
- superio_enter();
+ int ret = superio_enter();
+ if (ret)
+ return ret;
+
superio_select(GPIO);
if (superio_inb(WDTCTRL) & WDT_ZERO) {
superio_outb(0x00, WDTCTRL);
@@ -353,7 +358,6 @@
}
superio_exit();
- spin_unlock_irqrestore(&spinlock, flags);
}
if (test_and_clear_bit(WDTS_KEEPALIVE, &wdt_status))
*status |= WDIOF_KEEPALIVEPING;
@@ -379,9 +383,17 @@
if (exclusive && test_and_set_bit(WDTS_DEV_OPEN, &wdt_status))
return -EBUSY;
if (!test_and_set_bit(WDTS_TIMER_RUN, &wdt_status)) {
+ int ret;
if (nowayout && !test_and_set_bit(WDTS_LOCKED, &wdt_status))
__module_get(THIS_MODULE);
- wdt_start();
+
+ ret = wdt_start();
+ if (ret) {
+ clear_bit(WDTS_LOCKED, &wdt_status);
+ clear_bit(WDTS_TIMER_RUN, &wdt_status);
+ clear_bit(WDTS_DEV_OPEN, &wdt_status);
+ return ret;
+ }
}
return nonseekable_open(inode, file);
}
@@ -403,7 +415,16 @@
{
if (test_bit(WDTS_TIMER_RUN, &wdt_status)) {
if (test_and_clear_bit(WDTS_EXPECTED, &wdt_status)) {
- wdt_stop();
+ int ret = wdt_stop();
+ if (ret) {
+ /*
+ * Stop failed. Just keep the watchdog alive
+ * and hope nothing bad happens.
+ */
+ set_bit(WDTS_EXPECTED, &wdt_status);
+ wdt_keepalive();
+ return ret;
+ }
clear_bit(WDTS_TIMER_RUN, &wdt_status);
} else {
wdt_keepalive();
@@ -484,7 +505,9 @@
&ident, sizeof(ident)) ? -EFAULT : 0;
case WDIOC_GETSTATUS:
- wdt_get_status(&status);
+ rc = wdt_get_status(&status);
+ if (rc)
+ return rc;
return put_user(status, uarg.i);
case WDIOC_GETBOOTSTATUS:
@@ -500,14 +523,22 @@
switch (new_options) {
case WDIOS_DISABLECARD:
- if (test_bit(WDTS_TIMER_RUN, &wdt_status))
- wdt_stop();
+ if (test_bit(WDTS_TIMER_RUN, &wdt_status)) {
+ rc = wdt_stop();
+ if (rc)
+ return rc;
+ }
clear_bit(WDTS_TIMER_RUN, &wdt_status);
return 0;
case WDIOS_ENABLECARD:
- if (!test_and_set_bit(WDTS_TIMER_RUN, &wdt_status))
- wdt_start();
+ if (!test_and_set_bit(WDTS_TIMER_RUN, &wdt_status)) {
+ rc = wdt_start();
+ if (rc) {
+ clear_bit(WDTS_TIMER_RUN, &wdt_status);
+ return rc;
+ }
+ }
return 0;
default:
@@ -560,16 +591,17 @@
int rc = 0;
int try_gameport = !nogameport;
u8 chip_rev;
- unsigned long flags;
+ int gp_rreq_fail = 0;
wdt_status = 0;
- spin_lock_irqsave(&spinlock, flags);
- superio_enter();
+ rc = superio_enter();
+ if (rc)
+ return rc;
+
chip_type = superio_inw(CHIPID);
chip_rev = superio_inb(CHIPREV) & 0x0f;
superio_exit();
- spin_unlock_irqrestore(&spinlock, flags);
switch (chip_type) {
case IT8702_ID:
@@ -603,8 +635,9 @@
return -ENODEV;
}
- spin_lock_irqsave(&spinlock, flags);
- superio_enter();
+ rc = superio_enter();
+ if (rc)
+ return rc;
superio_select(GPIO);
superio_outb(WDT_TOV1, WDTCFG);
@@ -620,21 +653,16 @@
}
gpact = superio_inb(ACTREG);
superio_outb(0x01, ACTREG);
- superio_exit();
- spin_unlock_irqrestore(&spinlock, flags);
if (request_region(base, 1, WATCHDOG_NAME))
set_bit(WDTS_USE_GP, &wdt_status);
else
- rc = -EIO;
- } else {
- superio_exit();
- spin_unlock_irqrestore(&spinlock, flags);
+ gp_rreq_fail = 1;
}
/* If we haven't Gameport support, try to get CIR support */
if (!test_bit(WDTS_USE_GP, &wdt_status)) {
if (!request_region(CIR_BASE, 8, WATCHDOG_NAME)) {
- if (rc == -EIO)
+ if (gp_rreq_fail)
printk(KERN_ERR PFX
"I/O Address 0x%04x and 0x%04x"
" already in use\n", base, CIR_BASE);
@@ -646,21 +674,16 @@
goto err_out;
}
base = CIR_BASE;
- spin_lock_irqsave(&spinlock, flags);
- superio_enter();
superio_select(CIR);
superio_outw(base, BASEREG);
superio_outb(0x00, CIR_ILS);
ciract = superio_inb(ACTREG);
superio_outb(0x01, ACTREG);
- if (rc == -EIO) {
+ if (gp_rreq_fail) {
superio_select(GAMEPORT);
superio_outb(gpact, ACTREG);
}
-
- superio_exit();
- spin_unlock_irqrestore(&spinlock, flags);
}
if (timeout < 1 || timeout > max_units * 60) {
@@ -704,6 +727,7 @@
"nogameport=%d)\n", chip_type, chip_rev, timeout,
nowayout, testmode, exclusive, nogameport);
+ superio_exit();
return 0;
err_out_reboot:
@@ -711,49 +735,37 @@
err_out_region:
release_region(base, test_bit(WDTS_USE_GP, &wdt_status) ? 1 : 8);
if (!test_bit(WDTS_USE_GP, &wdt_status)) {
- spin_lock_irqsave(&spinlock, flags);
- superio_enter();
superio_select(CIR);
superio_outb(ciract, ACTREG);
- superio_exit();
- spin_unlock_irqrestore(&spinlock, flags);
}
err_out:
if (try_gameport) {
- spin_lock_irqsave(&spinlock, flags);
- superio_enter();
superio_select(GAMEPORT);
superio_outb(gpact, ACTREG);
- superio_exit();
- spin_unlock_irqrestore(&spinlock, flags);
}
+ superio_exit();
return rc;
}
static void __exit it87_wdt_exit(void)
{
- unsigned long flags;
- int nolock;
-
- nolock = !spin_trylock_irqsave(&spinlock, flags);
- superio_enter();
- superio_select(GPIO);
- superio_outb(0x00, WDTCTRL);
- superio_outb(0x00, WDTCFG);
- superio_outb(0x00, WDTVALLSB);
- if (max_units > 255)
- superio_outb(0x00, WDTVALMSB);
- if (test_bit(WDTS_USE_GP, &wdt_status)) {
- superio_select(GAMEPORT);
- superio_outb(gpact, ACTREG);
- } else {
- superio_select(CIR);
- superio_outb(ciract, ACTREG);
+ if (superio_enter() == 0) {
+ superio_select(GPIO);
+ superio_outb(0x00, WDTCTRL);
+ superio_outb(0x00, WDTCFG);
+ superio_outb(0x00, WDTVALLSB);
+ if (max_units > 255)
+ superio_outb(0x00, WDTVALMSB);
+ if (test_bit(WDTS_USE_GP, &wdt_status)) {
+ superio_select(GAMEPORT);
+ superio_outb(gpact, ACTREG);
+ } else {
+ superio_select(CIR);
+ superio_outb(ciract, ACTREG);
+ }
+ superio_exit();
}
- superio_exit();
- if (!nolock)
- spin_unlock_irqrestore(&spinlock, flags);
misc_deregister(&wdt_miscdev);
unregister_reboot_notifier(&wdt_notifier);
diff --git a/drivers/watchdog/mpcore_wdt.c b/drivers/watchdog/mpcore_wdt.c
index 2b4af22..4dc3102 100644
--- a/drivers/watchdog/mpcore_wdt.c
+++ b/drivers/watchdog/mpcore_wdt.c
@@ -407,12 +407,35 @@
return 0;
}
+#ifdef CONFIG_PM
+static int mpcore_wdt_suspend(struct platform_device *dev, pm_message_t msg)
+{
+ struct mpcore_wdt *wdt = platform_get_drvdata(dev);
+ mpcore_wdt_stop(wdt); /* Turn the WDT off */
+ return 0;
+}
+
+static int mpcore_wdt_resume(struct platform_device *dev)
+{
+ struct mpcore_wdt *wdt = platform_get_drvdata(dev);
+ /* re-activate timer */
+ if (test_bit(0, &wdt->timer_alive))
+ mpcore_wdt_start(wdt);
+ return 0;
+}
+#else
+#define mpcore_wdt_suspend NULL
+#define mpcore_wdt_resume NULL
+#endif
+
/* work with hotplug and coldplug */
MODULE_ALIAS("platform:mpcore_wdt");
static struct platform_driver mpcore_wdt_driver = {
.probe = mpcore_wdt_probe,
.remove = __devexit_p(mpcore_wdt_remove),
+ .suspend = mpcore_wdt_suspend,
+ .resume = mpcore_wdt_resume,
.shutdown = mpcore_wdt_shutdown,
.driver = {
.owner = THIS_MODULE,
diff --git a/drivers/watchdog/mtx-1_wdt.c b/drivers/watchdog/mtx-1_wdt.c
index 0430e09..ac37bb8 100644
--- a/drivers/watchdog/mtx-1_wdt.c
+++ b/drivers/watchdog/mtx-1_wdt.c
@@ -225,11 +225,11 @@
ret = misc_register(&mtx1_wdt_misc);
if (ret < 0) {
- printk(KERN_ERR " mtx-1_wdt : failed to register\n");
+ dev_err(&pdev->dev, "failed to register\n");
return ret;
}
mtx1_wdt_start();
- printk(KERN_INFO "MTX-1 Watchdog driver\n");
+ dev_info(&pdev->dev, "MTX-1 Watchdog driver\n");
return 0;
}
diff --git a/drivers/watchdog/of_xilinx_wdt.c b/drivers/watchdog/of_xilinx_wdt.c
new file mode 100644
index 0000000..4ec741a
--- /dev/null
+++ b/drivers/watchdog/of_xilinx_wdt.c
@@ -0,0 +1,433 @@
+/*
+* of_xilinx_wdt.c 1.01 A Watchdog Device Driver for Xilinx xps_timebase_wdt
+*
+* (C) Copyright 2011 (Alejandro Cabrera <aldaya@gmail.com>)
+*
+* -----------------------
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License
+* as published by the Free Software Foundation; either version
+* 2 of the License, or (at your option) any later version.
+*
+* -----------------------
+* 30-May-2011 Alejandro Cabrera <aldaya@gmail.com>
+* - If "xlnx,wdt-enable-once" wasn't found on device tree the
+* module will use CONFIG_WATCHDOG_NOWAYOUT
+* - If the device tree parameters ("clock-frequency" and
+* "xlnx,wdt-interval") wasn't found the driver won't
+* know the wdt reset interval
+*/
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/watchdog.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+
+/* Register offsets for the Wdt device */
+#define XWT_TWCSR0_OFFSET 0x0 /* Control/Status Register0 */
+#define XWT_TWCSR1_OFFSET 0x4 /* Control/Status Register1 */
+#define XWT_TBR_OFFSET 0x8 /* Timebase Register Offset */
+
+/* Control/Status Register Masks */
+#define XWT_CSR0_WRS_MASK 0x00000008 /* Reset status */
+#define XWT_CSR0_WDS_MASK 0x00000004 /* Timer state */
+#define XWT_CSR0_EWDT1_MASK 0x00000002 /* Enable bit 1 */
+
+/* Control/Status Register 0/1 bits */
+#define XWT_CSRX_EWDT2_MASK 0x00000001 /* Enable bit 2 */
+
+/* SelfTest constants */
+#define XWT_MAX_SELFTEST_LOOP_COUNT 0x00010000
+#define XWT_TIMER_FAILED 0xFFFFFFFF
+
+#define WATCHDOG_NAME "Xilinx Watchdog"
+#define PFX WATCHDOG_NAME ": "
+
+struct xwdt_device {
+ struct resource res;
+ void __iomem *base;
+ u32 nowayout;
+ u32 wdt_interval;
+ u32 boot_status;
+};
+
+static struct xwdt_device xdev;
+
+static u32 timeout;
+static u32 control_status_reg;
+static u8 expect_close;
+static u8 no_timeout;
+static unsigned long driver_open;
+
+static DEFINE_SPINLOCK(spinlock);
+
+static void xwdt_start(void)
+{
+ spin_lock(&spinlock);
+
+ /* Clean previous status and enable the watchdog timer */
+ control_status_reg = ioread32(xdev.base + XWT_TWCSR0_OFFSET);
+ control_status_reg |= (XWT_CSR0_WRS_MASK | XWT_CSR0_WDS_MASK);
+
+ iowrite32((control_status_reg | XWT_CSR0_EWDT1_MASK),
+ xdev.base + XWT_TWCSR0_OFFSET);
+
+ iowrite32(XWT_CSRX_EWDT2_MASK, xdev.base + XWT_TWCSR1_OFFSET);
+
+ spin_unlock(&spinlock);
+}
+
+static void xwdt_stop(void)
+{
+ spin_lock(&spinlock);
+
+ control_status_reg = ioread32(xdev.base + XWT_TWCSR0_OFFSET);
+
+ iowrite32((control_status_reg & ~XWT_CSR0_EWDT1_MASK),
+ xdev.base + XWT_TWCSR0_OFFSET);
+
+ iowrite32(0, xdev.base + XWT_TWCSR1_OFFSET);
+
+ spin_unlock(&spinlock);
+ printk(KERN_INFO PFX "Stopped!\n");
+}
+
+static void xwdt_keepalive(void)
+{
+ spin_lock(&spinlock);
+
+ control_status_reg = ioread32(xdev.base + XWT_TWCSR0_OFFSET);
+ control_status_reg |= (XWT_CSR0_WRS_MASK | XWT_CSR0_WDS_MASK);
+ iowrite32(control_status_reg, xdev.base + XWT_TWCSR0_OFFSET);
+
+ spin_unlock(&spinlock);
+}
+
+static void xwdt_get_status(int *status)
+{
+ int new_status;
+
+ spin_lock(&spinlock);
+
+ control_status_reg = ioread32(xdev.base + XWT_TWCSR0_OFFSET);
+ new_status = ((control_status_reg &
+ (XWT_CSR0_WRS_MASK | XWT_CSR0_WDS_MASK)) != 0);
+ spin_unlock(&spinlock);
+
+ *status = 0;
+ if (new_status & 1)
+ *status |= WDIOF_CARDRESET;
+}
+
+static u32 xwdt_selftest(void)
+{
+ int i;
+ u32 timer_value1;
+ u32 timer_value2;
+
+ spin_lock(&spinlock);
+
+ timer_value1 = ioread32(xdev.base + XWT_TBR_OFFSET);
+ timer_value2 = ioread32(xdev.base + XWT_TBR_OFFSET);
+
+ for (i = 0;
+ ((i <= XWT_MAX_SELFTEST_LOOP_COUNT) &&
+ (timer_value2 == timer_value1)); i++) {
+ timer_value2 = ioread32(xdev.base + XWT_TBR_OFFSET);
+ }
+
+ spin_unlock(&spinlock);
+
+ if (timer_value2 != timer_value1)
+ return ~XWT_TIMER_FAILED;
+ else
+ return XWT_TIMER_FAILED;
+}
+
+static int xwdt_open(struct inode *inode, struct file *file)
+{
+ /* Only one process can handle the wdt at a time */
+ if (test_and_set_bit(0, &driver_open))
+ return -EBUSY;
+
+ /* Make sure that the module are always loaded...*/
+ if (xdev.nowayout)
+ __module_get(THIS_MODULE);
+
+ xwdt_start();
+ printk(KERN_INFO PFX "Started...\n");
+
+ return nonseekable_open(inode, file);
+}
+
+static int xwdt_release(struct inode *inode, struct file *file)
+{
+ if (expect_close == 42) {
+ xwdt_stop();
+ } else {
+ printk(KERN_CRIT PFX
+ "Unexpected close, not stopping watchdog!\n");
+ xwdt_keepalive();
+ }
+
+ clear_bit(0, &driver_open);
+ expect_close = 0;
+ return 0;
+}
+
+/*
+ * xwdt_write:
+ * @file: file handle to the watchdog
+ * @buf: buffer to write (unused as data does not matter here
+ * @count: count of bytes
+ * @ppos: pointer to the position to write. No seeks allowed
+ *
+ * A write to a watchdog device is defined as a keepalive signal. Any
+ * write of data will do, as we don't define content meaning.
+ */
+static ssize_t xwdt_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ if (len) {
+ if (!xdev.nowayout) {
+ size_t i;
+
+ /* In case it was set long ago */
+ expect_close = 0;
+
+ for (i = 0; i != len; i++) {
+ char c;
+
+ if (get_user(c, buf + i))
+ return -EFAULT;
+ if (c == 'V')
+ expect_close = 42;
+ }
+ }
+ xwdt_keepalive();
+ }
+ return len;
+}
+
+static const struct watchdog_info ident = {
+ .options = WDIOF_MAGICCLOSE |
+ WDIOF_KEEPALIVEPING,
+ .firmware_version = 1,
+ .identity = WATCHDOG_NAME,
+};
+
+/*
+ * xwdt_ioctl:
+ * @file: file handle to the device
+ * @cmd: watchdog command
+ * @arg: argument pointer
+ *
+ * The watchdog API defines a common set of functions for all watchdogs
+ * according to their available features.
+ */
+static long xwdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ int status;
+
+ union {
+ struct watchdog_info __user *ident;
+ int __user *i;
+ } uarg;
+
+ uarg.i = (int __user *)arg;
+
+ switch (cmd) {
+ case WDIOC_GETSUPPORT:
+ return copy_to_user(uarg.ident, &ident,
+ sizeof(ident)) ? -EFAULT : 0;
+
+ case WDIOC_GETBOOTSTATUS:
+ return put_user(xdev.boot_status, uarg.i);
+
+ case WDIOC_GETSTATUS:
+ xwdt_get_status(&status);
+ return put_user(status, uarg.i);
+
+ case WDIOC_KEEPALIVE:
+ xwdt_keepalive();
+ return 0;
+
+ case WDIOC_GETTIMEOUT:
+ if (no_timeout)
+ return -ENOTTY;
+ else
+ return put_user(timeout, uarg.i);
+
+ default:
+ return -ENOTTY;
+ }
+}
+
+static const struct file_operations xwdt_fops = {
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+ .write = xwdt_write,
+ .open = xwdt_open,
+ .release = xwdt_release,
+ .unlocked_ioctl = xwdt_ioctl,
+};
+
+static struct miscdevice xwdt_miscdev = {
+ .minor = WATCHDOG_MINOR,
+ .name = "watchdog",
+ .fops = &xwdt_fops,
+};
+
+static int __devinit xwdt_probe(struct platform_device *pdev)
+{
+ int rc;
+ u32 *tmptr;
+ u32 *pfreq;
+
+ no_timeout = 0;
+
+ pfreq = (u32 *)of_get_property(pdev->dev.of_node->parent,
+ "clock-frequency", NULL);
+
+ if (pfreq == NULL) {
+ printk(KERN_WARNING PFX
+ "The watchdog clock frequency cannot be obtained!\n");
+ no_timeout = 1;
+ }
+
+ rc = of_address_to_resource(pdev->dev.of_node, 0, &xdev.res);
+ if (rc) {
+ printk(KERN_WARNING PFX "invalid address!\n");
+ return rc;
+ }
+
+ tmptr = (u32 *)of_get_property(pdev->dev.of_node,
+ "xlnx,wdt-interval", NULL);
+ if (tmptr == NULL) {
+ printk(KERN_WARNING PFX "Parameter \"xlnx,wdt-interval\""
+ " not found in device tree!\n");
+ no_timeout = 1;
+ } else {
+ xdev.wdt_interval = *tmptr;
+ }
+
+ tmptr = (u32 *)of_get_property(pdev->dev.of_node,
+ "xlnx,wdt-enable-once", NULL);
+ if (tmptr == NULL) {
+ printk(KERN_WARNING PFX "Parameter \"xlnx,wdt-enable-once\""
+ " not found in device tree!\n");
+ xdev.nowayout = WATCHDOG_NOWAYOUT;
+ }
+
+/*
+ * Twice of the 2^wdt_interval / freq because the first wdt overflow is
+ * ignored (interrupt), reset is only generated at second wdt overflow
+ */
+ if (!no_timeout)
+ timeout = 2 * ((1<<xdev.wdt_interval) / *pfreq);
+
+ if (!request_mem_region(xdev.res.start,
+ xdev.res.end - xdev.res.start + 1, WATCHDOG_NAME)) {
+ rc = -ENXIO;
+ printk(KERN_ERR PFX "memory request failure!\n");
+ goto err_out;
+ }
+
+ xdev.base = ioremap(xdev.res.start, xdev.res.end - xdev.res.start + 1);
+ if (xdev.base == NULL) {
+ rc = -ENOMEM;
+ printk(KERN_ERR PFX "ioremap failure!\n");
+ goto release_mem;
+ }
+
+ rc = xwdt_selftest();
+ if (rc == XWT_TIMER_FAILED) {
+ printk(KERN_ERR PFX "SelfTest routine error!\n");
+ goto unmap_io;
+ }
+
+ xwdt_get_status(&xdev.boot_status);
+
+ rc = misc_register(&xwdt_miscdev);
+ if (rc) {
+ printk(KERN_ERR PFX
+ "cannot register miscdev on minor=%d (err=%d)\n",
+ xwdt_miscdev.minor, rc);
+ goto unmap_io;
+ }
+
+ if (no_timeout)
+ printk(KERN_INFO PFX
+ "driver loaded (timeout=? sec, nowayout=%d)\n",
+ xdev.nowayout);
+ else
+ printk(KERN_INFO PFX
+ "driver loaded (timeout=%d sec, nowayout=%d)\n",
+ timeout, xdev.nowayout);
+
+ expect_close = 0;
+ clear_bit(0, &driver_open);
+
+ return 0;
+
+unmap_io:
+ iounmap(xdev.base);
+release_mem:
+ release_mem_region(xdev.res.start, resource_size(&xdev.res));
+err_out:
+ return rc;
+}
+
+static int __devexit xwdt_remove(struct platform_device *dev)
+{
+ misc_deregister(&xwdt_miscdev);
+ iounmap(xdev.base);
+ release_mem_region(xdev.res.start, resource_size(&xdev.res));
+
+ return 0;
+}
+
+/* Match table for of_platform binding */
+static struct of_device_id __devinitdata xwdt_of_match[] = {
+ { .compatible = "xlnx,xps-timebase-wdt-1.01.a", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, xwdt_of_match);
+
+static struct platform_driver xwdt_driver = {
+ .probe = xwdt_probe,
+ .remove = __devexit_p(xwdt_remove),
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = WATCHDOG_NAME,
+ .of_match_table = xwdt_of_match,
+ },
+};
+
+static int __init xwdt_init(void)
+{
+ return platform_driver_register(&xwdt_driver);
+}
+
+static void __exit xwdt_exit(void)
+{
+ platform_driver_unregister(&xwdt_driver);
+}
+
+module_init(xwdt_init);
+module_exit(xwdt_exit);
+
+MODULE_AUTHOR("Alejandro Cabrera <aldaya@gmail.com>");
+MODULE_DESCRIPTION("Xilinx Watchdog driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
diff --git a/drivers/watchdog/pc87413_wdt.c b/drivers/watchdog/pc87413_wdt.c
index b7c1390..e78d899 100644
--- a/drivers/watchdog/pc87413_wdt.c
+++ b/drivers/watchdog/pc87413_wdt.c
@@ -56,6 +56,7 @@
#define IO_DEFAULT 0x2E /* Address used on Portwell Boards */
static int io = IO_DEFAULT;
+static int swc_base_addr = -1;
static int timeout = DEFAULT_TIMEOUT; /* timeout value */
static unsigned long timer_enabled; /* is the timer enabled? */
@@ -116,9 +117,8 @@
/* Read SWC I/O base address */
-static inline unsigned int pc87413_get_swc_base(void)
+static void pc87413_get_swc_base_addr(void)
{
- unsigned int swc_base_addr = 0;
unsigned char addr_l, addr_h = 0;
/* Step 3: Read SWC I/O Base Address */
@@ -136,12 +136,11 @@
"Read SWC I/O Base Address: low %d, high %d, res %d\n",
addr_l, addr_h, swc_base_addr);
#endif
- return swc_base_addr;
}
/* Select Bank 3 of SWC */
-static inline void pc87413_swc_bank3(unsigned int swc_base_addr)
+static inline void pc87413_swc_bank3(void)
{
/* Step 4: Select Bank3 of SWC */
outb_p(inb(swc_base_addr + 0x0f) | 0x03, swc_base_addr + 0x0f);
@@ -152,8 +151,7 @@
/* Set watchdog timeout to x minutes */
-static inline void pc87413_programm_wdto(unsigned int swc_base_addr,
- char pc87413_time)
+static inline void pc87413_programm_wdto(char pc87413_time)
{
/* Step 5: Programm WDTO, Twd. */
outb_p(pc87413_time, swc_base_addr + WDTO);
@@ -164,7 +162,7 @@
/* Enable WDEN */
-static inline void pc87413_enable_wden(unsigned int swc_base_addr)
+static inline void pc87413_enable_wden(void)
{
/* Step 6: Enable WDEN */
outb_p(inb(swc_base_addr + WDCTL) | 0x01, swc_base_addr + WDCTL);
@@ -174,7 +172,7 @@
}
/* Enable SW_WD_TREN */
-static inline void pc87413_enable_sw_wd_tren(unsigned int swc_base_addr)
+static inline void pc87413_enable_sw_wd_tren(void)
{
/* Enable SW_WD_TREN */
outb_p(inb(swc_base_addr + WDCFG) | 0x80, swc_base_addr + WDCFG);
@@ -185,7 +183,7 @@
/* Disable SW_WD_TREN */
-static inline void pc87413_disable_sw_wd_tren(unsigned int swc_base_addr)
+static inline void pc87413_disable_sw_wd_tren(void)
{
/* Disable SW_WD_TREN */
outb_p(inb(swc_base_addr + WDCFG) & 0x7f, swc_base_addr + WDCFG);
@@ -196,7 +194,7 @@
/* Enable SW_WD_TRG */
-static inline void pc87413_enable_sw_wd_trg(unsigned int swc_base_addr)
+static inline void pc87413_enable_sw_wd_trg(void)
{
/* Enable SW_WD_TRG */
outb_p(inb(swc_base_addr + WDCTL) | 0x80, swc_base_addr + WDCTL);
@@ -207,7 +205,7 @@
/* Disable SW_WD_TRG */
-static inline void pc87413_disable_sw_wd_trg(unsigned int swc_base_addr)
+static inline void pc87413_disable_sw_wd_trg(void)
{
/* Disable SW_WD_TRG */
outb_p(inb(swc_base_addr + WDCTL) & 0x7f, swc_base_addr + WDCTL);
@@ -222,18 +220,13 @@
static void pc87413_enable(void)
{
- unsigned int swc_base_addr;
-
spin_lock(&io_lock);
- pc87413_select_wdt_out();
- pc87413_enable_swc();
- swc_base_addr = pc87413_get_swc_base();
- pc87413_swc_bank3(swc_base_addr);
- pc87413_programm_wdto(swc_base_addr, timeout);
- pc87413_enable_wden(swc_base_addr);
- pc87413_enable_sw_wd_tren(swc_base_addr);
- pc87413_enable_sw_wd_trg(swc_base_addr);
+ pc87413_swc_bank3();
+ pc87413_programm_wdto(timeout);
+ pc87413_enable_wden();
+ pc87413_enable_sw_wd_tren();
+ pc87413_enable_sw_wd_trg();
spin_unlock(&io_lock);
}
@@ -242,17 +235,12 @@
static void pc87413_disable(void)
{
- unsigned int swc_base_addr;
-
spin_lock(&io_lock);
- pc87413_select_wdt_out();
- pc87413_enable_swc();
- swc_base_addr = pc87413_get_swc_base();
- pc87413_swc_bank3(swc_base_addr);
- pc87413_disable_sw_wd_tren(swc_base_addr);
- pc87413_disable_sw_wd_trg(swc_base_addr);
- pc87413_programm_wdto(swc_base_addr, 0);
+ pc87413_swc_bank3();
+ pc87413_disable_sw_wd_tren();
+ pc87413_disable_sw_wd_trg();
+ pc87413_programm_wdto(0);
spin_unlock(&io_lock);
}
@@ -261,20 +249,15 @@
static void pc87413_refresh(void)
{
- unsigned int swc_base_addr;
-
spin_lock(&io_lock);
- pc87413_select_wdt_out();
- pc87413_enable_swc();
- swc_base_addr = pc87413_get_swc_base();
- pc87413_swc_bank3(swc_base_addr);
- pc87413_disable_sw_wd_tren(swc_base_addr);
- pc87413_disable_sw_wd_trg(swc_base_addr);
- pc87413_programm_wdto(swc_base_addr, timeout);
- pc87413_enable_wden(swc_base_addr);
- pc87413_enable_sw_wd_tren(swc_base_addr);
- pc87413_enable_sw_wd_trg(swc_base_addr);
+ pc87413_swc_bank3();
+ pc87413_disable_sw_wd_tren();
+ pc87413_disable_sw_wd_trg();
+ pc87413_programm_wdto(timeout);
+ pc87413_enable_wden();
+ pc87413_enable_sw_wd_tren();
+ pc87413_enable_sw_wd_trg();
spin_unlock(&io_lock);
}
@@ -528,7 +511,8 @@
printk(KERN_INFO PFX "Version " VERSION " at io 0x%X\n",
WDT_INDEX_IO_PORT);
- /* request_region(io, 2, "pc87413"); */
+ if (!request_muxed_region(io, 2, MODNAME))
+ return -EBUSY;
ret = register_reboot_notifier(&pc87413_notifier);
if (ret != 0) {
@@ -541,12 +525,32 @@
printk(KERN_ERR PFX
"cannot register miscdev on minor=%d (err=%d)\n",
WATCHDOG_MINOR, ret);
- unregister_reboot_notifier(&pc87413_notifier);
- return ret;
+ goto reboot_unreg;
}
printk(KERN_INFO PFX "initialized. timeout=%d min \n", timeout);
+
+ pc87413_select_wdt_out();
+ pc87413_enable_swc();
+ pc87413_get_swc_base_addr();
+
+ if (!request_region(swc_base_addr, 0x20, MODNAME)) {
+ printk(KERN_ERR PFX
+ "cannot request SWC region at 0x%x\n", swc_base_addr);
+ ret = -EBUSY;
+ goto misc_unreg;
+ }
+
pc87413_enable();
+
+ release_region(io, 2);
return 0;
+
+misc_unreg:
+ misc_deregister(&pc87413_miscdev);
+reboot_unreg:
+ unregister_reboot_notifier(&pc87413_notifier);
+ release_region(io, 2);
+ return ret;
}
/**
@@ -569,7 +573,7 @@
misc_deregister(&pc87413_miscdev);
unregister_reboot_notifier(&pc87413_notifier);
- /* release_region(io, 2); */
+ release_region(swc_base_addr, 0x20);
printk(KERN_INFO MODNAME " watchdog component driver removed.\n");
}
diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c
index f7f5aa0..30da88f 100644
--- a/drivers/watchdog/s3c2410_wdt.c
+++ b/drivers/watchdog/s3c2410_wdt.c
@@ -589,6 +589,15 @@
#define s3c2410wdt_resume NULL
#endif /* CONFIG_PM */
+#ifdef CONFIG_OF
+static const struct of_device_id s3c2410_wdt_match[] = {
+ { .compatible = "samsung,s3c2410-wdt" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, s3c2410_wdt_match);
+#else
+#define s3c2410_wdt_match NULL
+#endif
static struct platform_driver s3c2410wdt_driver = {
.probe = s3c2410wdt_probe,
@@ -599,6 +608,7 @@
.driver = {
.owner = THIS_MODULE,
.name = "s3c2410-wdt",
+ .of_match_table = s3c2410_wdt_match,
},
};
diff --git a/drivers/watchdog/sch311x_wdt.c b/drivers/watchdog/sch311x_wdt.c
index c7cf4b0..029467e 100644
--- a/drivers/watchdog/sch311x_wdt.c
+++ b/drivers/watchdog/sch311x_wdt.c
@@ -472,15 +472,10 @@
sch311x_wdt_stop();
}
-#define sch311x_wdt_suspend NULL
-#define sch311x_wdt_resume NULL
-
static struct platform_driver sch311x_wdt_driver = {
.probe = sch311x_wdt_probe,
.remove = __devexit_p(sch311x_wdt_remove),
.shutdown = sch311x_wdt_shutdown,
- .suspend = sch311x_wdt_suspend,
- .resume = sch311x_wdt_resume,
.driver = {
.owner = THIS_MODULE,
.name = DRV_NAME,
diff --git a/drivers/watchdog/sp805_wdt.c b/drivers/watchdog/sp805_wdt.c
index 0d80e08..cc2cfbe 100644
--- a/drivers/watchdog/sp805_wdt.c
+++ b/drivers/watchdog/sp805_wdt.c
@@ -134,6 +134,8 @@
writel(INT_ENABLE | RESET_ENABLE, wdt->base + WDTCONTROL);
writel(LOCK, wdt->base + WDTLOCK);
+ /* Flush posted writes. */
+ readl(wdt->base + WDTLOCK);
spin_unlock(&wdt->lock);
}
@@ -144,9 +146,10 @@
writel(UNLOCK, wdt->base + WDTLOCK);
writel(0, wdt->base + WDTCONTROL);
- writel(0, wdt->base + WDTLOAD);
writel(LOCK, wdt->base + WDTLOCK);
+ /* Flush posted writes. */
+ readl(wdt->base + WDTLOCK);
spin_unlock(&wdt->lock);
}
diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c
new file mode 100644
index 0000000..cfa1a15
--- /dev/null
+++ b/drivers/watchdog/watchdog_core.c
@@ -0,0 +1,111 @@
+/*
+ * watchdog_core.c
+ *
+ * (c) Copyright 2008-2011 Alan Cox <alan@lxorguk.ukuu.org.uk>,
+ * All Rights Reserved.
+ *
+ * (c) Copyright 2008-2011 Wim Van Sebroeck <wim@iguana.be>.
+ *
+ * This source code is part of the generic code that can be used
+ * by all the watchdog timer drivers.
+ *
+ * Based on source code of the following authors:
+ * Matt Domsch <Matt_Domsch@dell.com>,
+ * Rob Radez <rob@osinvestor.com>,
+ * Rusty Lynch <rusty@linux.co.intel.com>
+ * Satyam Sharma <satyam@infradead.org>
+ * Randy Dunlap <randy.dunlap@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Neither Alan Cox, CymruNet Ltd., Wim Van Sebroeck nor Iguana vzw.
+ * admit liability nor provide warranty for any of this software.
+ * This material is provided "AS-IS" and at no charge.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h> /* For EXPORT_SYMBOL/module stuff/... */
+#include <linux/types.h> /* For standard types */
+#include <linux/errno.h> /* For the -ENODEV/... values */
+#include <linux/kernel.h> /* For printk/panic/... */
+#include <linux/watchdog.h> /* For watchdog specific items */
+#include <linux/init.h> /* For __init/__exit/... */
+
+#include "watchdog_dev.h" /* For watchdog_dev_register/... */
+
+/**
+ * watchdog_register_device() - register a watchdog device
+ * @wdd: watchdog device
+ *
+ * Register a watchdog device with the kernel so that the
+ * watchdog timer can be accessed from userspace.
+ *
+ * A zero is returned on success and a negative errno code for
+ * failure.
+ */
+int watchdog_register_device(struct watchdog_device *wdd)
+{
+ int ret;
+
+ if (wdd == NULL || wdd->info == NULL || wdd->ops == NULL)
+ return -EINVAL;
+
+ /* Mandatory operations need to be supported */
+ if (wdd->ops->start == NULL || wdd->ops->stop == NULL)
+ return -EINVAL;
+
+ /*
+ * Check that we have valid min and max timeout values, if
+ * not reset them both to 0 (=not used or unknown)
+ */
+ if (wdd->min_timeout > wdd->max_timeout) {
+ pr_info("Invalid min and max timeout values, resetting to 0!\n");
+ wdd->min_timeout = 0;
+ wdd->max_timeout = 0;
+ }
+
+ /*
+ * Note: now that all watchdog_device data has been verified, we
+ * will not check this anymore in other functions. If data gets
+ * corrupted in a later stage then we expect a kernel panic!
+ */
+
+ /* We only support 1 watchdog device via the /dev/watchdog interface */
+ ret = watchdog_dev_register(wdd);
+ if (ret) {
+ pr_err("error registering /dev/watchdog (err=%d).\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(watchdog_register_device);
+
+/**
+ * watchdog_unregister_device() - unregister a watchdog device
+ * @wdd: watchdog device to unregister
+ *
+ * Unregister a watchdog device that was previously successfully
+ * registered with watchdog_register_device().
+ */
+void watchdog_unregister_device(struct watchdog_device *wdd)
+{
+ int ret;
+
+ if (wdd == NULL)
+ return;
+
+ ret = watchdog_dev_unregister(wdd);
+ if (ret)
+ pr_err("error unregistering /dev/watchdog (err=%d).\n", ret);
+}
+EXPORT_SYMBOL_GPL(watchdog_unregister_device);
+
+MODULE_AUTHOR("Alan Cox <alan@lxorguk.ukuu.org.uk>");
+MODULE_AUTHOR("Wim Van Sebroeck <wim@iguana.be>");
+MODULE_DESCRIPTION("WatchDog Timer Driver Core");
+MODULE_LICENSE("GPL");
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
new file mode 100644
index 0000000..d33520d
--- /dev/null
+++ b/drivers/watchdog/watchdog_dev.c
@@ -0,0 +1,395 @@
+/*
+ * watchdog_dev.c
+ *
+ * (c) Copyright 2008-2011 Alan Cox <alan@lxorguk.ukuu.org.uk>,
+ * All Rights Reserved.
+ *
+ * (c) Copyright 2008-2011 Wim Van Sebroeck <wim@iguana.be>.
+ *
+ *
+ * This source code is part of the generic code that can be used
+ * by all the watchdog timer drivers.
+ *
+ * This part of the generic code takes care of the following
+ * misc device: /dev/watchdog.
+ *
+ * Based on source code of the following authors:
+ * Matt Domsch <Matt_Domsch@dell.com>,
+ * Rob Radez <rob@osinvestor.com>,
+ * Rusty Lynch <rusty@linux.co.intel.com>
+ * Satyam Sharma <satyam@infradead.org>
+ * Randy Dunlap <randy.dunlap@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Neither Alan Cox, CymruNet Ltd., Wim Van Sebroeck nor Iguana vzw.
+ * admit liability nor provide warranty for any of this software.
+ * This material is provided "AS-IS" and at no charge.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h> /* For module stuff/... */
+#include <linux/types.h> /* For standard types (like size_t) */
+#include <linux/errno.h> /* For the -ENODEV/... values */
+#include <linux/kernel.h> /* For printk/panic/... */
+#include <linux/fs.h> /* For file operations */
+#include <linux/watchdog.h> /* For watchdog specific items */
+#include <linux/miscdevice.h> /* For handling misc devices */
+#include <linux/init.h> /* For __init/__exit/... */
+#include <linux/uaccess.h> /* For copy_to_user/put_user/... */
+
+/* make sure we only register one /dev/watchdog device */
+static unsigned long watchdog_dev_busy;
+/* the watchdog device behind /dev/watchdog */
+static struct watchdog_device *wdd;
+
+/*
+ * watchdog_ping: ping the watchdog.
+ * @wddev: the watchdog device to ping
+ *
+ * If the watchdog has no own ping operation then it needs to be
+ * restarted via the start operation. This wrapper function does
+ * exactly that.
+ * We only ping when the watchdog device is running.
+ */
+
+static int watchdog_ping(struct watchdog_device *wddev)
+{
+ if (test_bit(WDOG_ACTIVE, &wdd->status)) {
+ if (wddev->ops->ping)
+ return wddev->ops->ping(wddev); /* ping the watchdog */
+ else
+ return wddev->ops->start(wddev); /* restart watchdog */
+ }
+ return 0;
+}
+
+/*
+ * watchdog_start: wrapper to start the watchdog.
+ * @wddev: the watchdog device to start
+ *
+ * Start the watchdog if it is not active and mark it active.
+ * This function returns zero on success or a negative errno code for
+ * failure.
+ */
+
+static int watchdog_start(struct watchdog_device *wddev)
+{
+ int err;
+
+ if (!test_bit(WDOG_ACTIVE, &wdd->status)) {
+ err = wddev->ops->start(wddev);
+ if (err < 0)
+ return err;
+
+ set_bit(WDOG_ACTIVE, &wdd->status);
+ }
+ return 0;
+}
+
+/*
+ * watchdog_stop: wrapper to stop the watchdog.
+ * @wddev: the watchdog device to stop
+ *
+ * Stop the watchdog if it is still active and unmark it active.
+ * This function returns zero on success or a negative errno code for
+ * failure.
+ * If the 'nowayout' feature was set, the watchdog cannot be stopped.
+ */
+
+static int watchdog_stop(struct watchdog_device *wddev)
+{
+ int err = -EBUSY;
+
+ if (test_bit(WDOG_NO_WAY_OUT, &wdd->status)) {
+ pr_info("%s: nowayout prevents watchdog to be stopped!\n",
+ wdd->info->identity);
+ return err;
+ }
+
+ if (test_bit(WDOG_ACTIVE, &wdd->status)) {
+ err = wddev->ops->stop(wddev);
+ if (err < 0)
+ return err;
+
+ clear_bit(WDOG_ACTIVE, &wdd->status);
+ }
+ return 0;
+}
+
+/*
+ * watchdog_write: writes to the watchdog.
+ * @file: file from VFS
+ * @data: user address of data
+ * @len: length of data
+ * @ppos: pointer to the file offset
+ *
+ * A write to a watchdog device is defined as a keepalive ping.
+ * Writing the magic 'V' sequence allows the next close to turn
+ * off the watchdog (if 'nowayout' is not set).
+ */
+
+static ssize_t watchdog_write(struct file *file, const char __user *data,
+ size_t len, loff_t *ppos)
+{
+ size_t i;
+ char c;
+
+ if (len == 0)
+ return 0;
+
+ /*
+ * Note: just in case someone wrote the magic character
+ * five months ago...
+ */
+ clear_bit(WDOG_ALLOW_RELEASE, &wdd->status);
+
+ /* scan to see whether or not we got the magic character */
+ for (i = 0; i != len; i++) {
+ if (get_user(c, data + i))
+ return -EFAULT;
+ if (c == 'V')
+ set_bit(WDOG_ALLOW_RELEASE, &wdd->status);
+ }
+
+ /* someone wrote to us, so we send the watchdog a keepalive ping */
+ watchdog_ping(wdd);
+
+ return len;
+}
+
+/*
+ * watchdog_ioctl: handle the different ioctl's for the watchdog device.
+ * @file: file handle to the device
+ * @cmd: watchdog command
+ * @arg: argument pointer
+ *
+ * The watchdog API defines a common set of functions for all watchdogs
+ * according to their available features.
+ */
+
+static long watchdog_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ void __user *argp = (void __user *)arg;
+ int __user *p = argp;
+ unsigned int val;
+ int err;
+
+ if (wdd->ops->ioctl) {
+ err = wdd->ops->ioctl(wdd, cmd, arg);
+ if (err != -ENOIOCTLCMD)
+ return err;
+ }
+
+ switch (cmd) {
+ case WDIOC_GETSUPPORT:
+ return copy_to_user(argp, wdd->info,
+ sizeof(struct watchdog_info)) ? -EFAULT : 0;
+ case WDIOC_GETSTATUS:
+ val = wdd->ops->status ? wdd->ops->status(wdd) : 0;
+ return put_user(val, p);
+ case WDIOC_GETBOOTSTATUS:
+ return put_user(wdd->bootstatus, p);
+ case WDIOC_SETOPTIONS:
+ if (get_user(val, p))
+ return -EFAULT;
+ if (val & WDIOS_DISABLECARD) {
+ err = watchdog_stop(wdd);
+ if (err < 0)
+ return err;
+ }
+ if (val & WDIOS_ENABLECARD) {
+ err = watchdog_start(wdd);
+ if (err < 0)
+ return err;
+ }
+ return 0;
+ case WDIOC_KEEPALIVE:
+ if (!(wdd->info->options & WDIOF_KEEPALIVEPING))
+ return -EOPNOTSUPP;
+ watchdog_ping(wdd);
+ return 0;
+ case WDIOC_SETTIMEOUT:
+ if ((wdd->ops->set_timeout == NULL) ||
+ !(wdd->info->options & WDIOF_SETTIMEOUT))
+ return -EOPNOTSUPP;
+ if (get_user(val, p))
+ return -EFAULT;
+ if ((wdd->max_timeout != 0) &&
+ (val < wdd->min_timeout || val > wdd->max_timeout))
+ return -EINVAL;
+ err = wdd->ops->set_timeout(wdd, val);
+ if (err < 0)
+ return err;
+ wdd->timeout = val;
+ /* If the watchdog is active then we send a keepalive ping
+ * to make sure that the watchdog keep's running (and if
+ * possible that it takes the new timeout) */
+ watchdog_ping(wdd);
+ /* Fall */
+ case WDIOC_GETTIMEOUT:
+ /* timeout == 0 means that we don't know the timeout */
+ if (wdd->timeout == 0)
+ return -EOPNOTSUPP;
+ return put_user(wdd->timeout, p);
+ default:
+ return -ENOTTY;
+ }
+}
+
+/*
+ * watchdog_open: open the /dev/watchdog device.
+ * @inode: inode of device
+ * @file: file handle to device
+ *
+ * When the /dev/watchdog device gets opened, we start the watchdog.
+ * Watch out: the /dev/watchdog device is single open, so we make sure
+ * it can only be opened once.
+ */
+
+static int watchdog_open(struct inode *inode, struct file *file)
+{
+ int err = -EBUSY;
+
+ /* the watchdog is single open! */
+ if (test_and_set_bit(WDOG_DEV_OPEN, &wdd->status))
+ return -EBUSY;
+
+ /*
+ * If the /dev/watchdog device is open, we don't want the module
+ * to be unloaded.
+ */
+ if (!try_module_get(wdd->ops->owner))
+ goto out;
+
+ err = watchdog_start(wdd);
+ if (err < 0)
+ goto out_mod;
+
+ /* dev/watchdog is a virtual (and thus non-seekable) filesystem */
+ return nonseekable_open(inode, file);
+
+out_mod:
+ module_put(wdd->ops->owner);
+out:
+ clear_bit(WDOG_DEV_OPEN, &wdd->status);
+ return err;
+}
+
+/*
+ * watchdog_release: release the /dev/watchdog device.
+ * @inode: inode of device
+ * @file: file handle to device
+ *
+ * This is the code for when /dev/watchdog gets closed. We will only
+ * stop the watchdog when we have received the magic char (and nowayout
+ * was not set), else the watchdog will keep running.
+ */
+
+static int watchdog_release(struct inode *inode, struct file *file)
+{
+ int err = -EBUSY;
+
+ /*
+ * We only stop the watchdog if we received the magic character
+ * or if WDIOF_MAGICCLOSE is not set. If nowayout was set then
+ * watchdog_stop will fail.
+ */
+ if (test_and_clear_bit(WDOG_ALLOW_RELEASE, &wdd->status) ||
+ !(wdd->info->options & WDIOF_MAGICCLOSE))
+ err = watchdog_stop(wdd);
+
+ /* If the watchdog was not stopped, send a keepalive ping */
+ if (err < 0) {
+ pr_crit("%s: watchdog did not stop!\n", wdd->info->identity);
+ watchdog_ping(wdd);
+ }
+
+ /* Allow the owner module to be unloaded again */
+ module_put(wdd->ops->owner);
+
+ /* make sure that /dev/watchdog can be re-opened */
+ clear_bit(WDOG_DEV_OPEN, &wdd->status);
+
+ return 0;
+}
+
+static const struct file_operations watchdog_fops = {
+ .owner = THIS_MODULE,
+ .write = watchdog_write,
+ .unlocked_ioctl = watchdog_ioctl,
+ .open = watchdog_open,
+ .release = watchdog_release,
+};
+
+static struct miscdevice watchdog_miscdev = {
+ .minor = WATCHDOG_MINOR,
+ .name = "watchdog",
+ .fops = &watchdog_fops,
+};
+
+/*
+ * watchdog_dev_register:
+ * @watchdog: watchdog device
+ *
+ * Register a watchdog device as /dev/watchdog. /dev/watchdog
+ * is actually a miscdevice and thus we set it up like that.
+ */
+
+int watchdog_dev_register(struct watchdog_device *watchdog)
+{
+ int err;
+
+ /* Only one device can register for /dev/watchdog */
+ if (test_and_set_bit(0, &watchdog_dev_busy)) {
+ pr_err("only one watchdog can use /dev/watchdog.\n");
+ return -EBUSY;
+ }
+
+ wdd = watchdog;
+
+ err = misc_register(&watchdog_miscdev);
+ if (err != 0) {
+ pr_err("%s: cannot register miscdev on minor=%d (err=%d).\n",
+ watchdog->info->identity, WATCHDOG_MINOR, err);
+ goto out;
+ }
+
+ return 0;
+
+out:
+ wdd = NULL;
+ clear_bit(0, &watchdog_dev_busy);
+ return err;
+}
+
+/*
+ * watchdog_dev_unregister:
+ * @watchdog: watchdog device
+ *
+ * Deregister the /dev/watchdog device.
+ */
+
+int watchdog_dev_unregister(struct watchdog_device *watchdog)
+{
+ /* Check that a watchdog device was registered in the past */
+ if (!test_bit(0, &watchdog_dev_busy) || !wdd)
+ return -ENODEV;
+
+ /* We can only unregister the watchdog device that was registered */
+ if (watchdog != wdd) {
+ pr_err("%s: watchdog was not registered as /dev/watchdog.\n",
+ watchdog->info->identity);
+ return -ENODEV;
+ }
+
+ misc_deregister(&watchdog_miscdev);
+ wdd = NULL;
+ clear_bit(0, &watchdog_dev_busy);
+ return 0;
+}
diff --git a/drivers/watchdog/watchdog_dev.h b/drivers/watchdog/watchdog_dev.h
new file mode 100644
index 0000000..bc7612b
--- /dev/null
+++ b/drivers/watchdog/watchdog_dev.h
@@ -0,0 +1,33 @@
+/*
+ * watchdog_core.h
+ *
+ * (c) Copyright 2008-2011 Alan Cox <alan@lxorguk.ukuu.org.uk>,
+ * All Rights Reserved.
+ *
+ * (c) Copyright 2008-2011 Wim Van Sebroeck <wim@iguana.be>.
+ *
+ * This source code is part of the generic code that can be used
+ * by all the watchdog timer drivers.
+ *
+ * Based on source code of the following authors:
+ * Matt Domsch <Matt_Domsch@dell.com>,
+ * Rob Radez <rob@osinvestor.com>,
+ * Rusty Lynch <rusty@linux.co.intel.com>
+ * Satyam Sharma <satyam@infradead.org>
+ * Randy Dunlap <randy.dunlap@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Neither Alan Cox, CymruNet Ltd., Wim Van Sebroeck nor Iguana vzw.
+ * admit liability nor provide warranty for any of this software.
+ * This material is provided "AS-IS" and at no charge.
+ */
+
+/*
+ * Functions/procedures to be called by the core
+ */
+int watchdog_dev_register(struct watchdog_device *);
+int watchdog_dev_unregister(struct watchdog_device *);
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 4d433d3..f11e43e 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -187,7 +187,7 @@
*/
static struct inode *anon_inode_mkinode(void)
{
- struct inode *inode = new_inode(anon_inode_mnt->mnt_sb);
+ struct inode *inode = new_inode_pseudo(anon_inode_mnt->mnt_sb);
if (!inode)
return ERR_PTR(-ENOMEM);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 52d7eca..502b9e9 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -34,6 +34,9 @@
*/
struct btrfs_key location;
+ /* Lock for counters */
+ spinlock_t lock;
+
/* the extent_tree has caches of all the extent mappings to disk */
struct extent_map_tree extent_tree;
@@ -134,8 +137,8 @@
* items we think we'll end up using, and reserved_extents is the number
* of extent items we've reserved metadata for.
*/
- atomic_t outstanding_extents;
- atomic_t reserved_extents;
+ unsigned outstanding_extents;
+ unsigned reserved_extents;
/*
* ordered_data_close is set by truncate when a file that used
@@ -184,4 +187,13 @@
BTRFS_I(inode)->disk_i_size = size;
}
+static inline bool btrfs_is_free_space_inode(struct btrfs_root *root,
+ struct inode *inode)
+{
+ if (root == root->fs_info->tree_root ||
+ BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
+ return true;
+ return false;
+}
+
#endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 2e66786..011cab3 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -54,8 +54,13 @@
{
int i;
for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
- if (p->nodes[i] && p->locks[i])
- btrfs_set_lock_blocking(p->nodes[i]);
+ if (!p->nodes[i] || !p->locks[i])
+ continue;
+ btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]);
+ if (p->locks[i] == BTRFS_READ_LOCK)
+ p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
+ else if (p->locks[i] == BTRFS_WRITE_LOCK)
+ p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
}
}
@@ -68,7 +73,7 @@
* for held
*/
noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
- struct extent_buffer *held)
+ struct extent_buffer *held, int held_rw)
{
int i;
@@ -79,19 +84,29 @@
* really sure by forcing the path to blocking before we clear
* the path blocking.
*/
- if (held)
- btrfs_set_lock_blocking(held);
+ if (held) {
+ btrfs_set_lock_blocking_rw(held, held_rw);
+ if (held_rw == BTRFS_WRITE_LOCK)
+ held_rw = BTRFS_WRITE_LOCK_BLOCKING;
+ else if (held_rw == BTRFS_READ_LOCK)
+ held_rw = BTRFS_READ_LOCK_BLOCKING;
+ }
btrfs_set_path_blocking(p);
#endif
for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
- if (p->nodes[i] && p->locks[i])
- btrfs_clear_lock_blocking(p->nodes[i]);
+ if (p->nodes[i] && p->locks[i]) {
+ btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]);
+ if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING)
+ p->locks[i] = BTRFS_WRITE_LOCK;
+ else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING)
+ p->locks[i] = BTRFS_READ_LOCK;
+ }
}
#ifdef CONFIG_DEBUG_LOCK_ALLOC
if (held)
- btrfs_clear_lock_blocking(held);
+ btrfs_clear_lock_blocking_rw(held, held_rw);
#endif
}
@@ -119,7 +134,7 @@
if (!p->nodes[i])
continue;
if (p->locks[i]) {
- btrfs_tree_unlock(p->nodes[i]);
+ btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
p->locks[i] = 0;
}
free_extent_buffer(p->nodes[i]);
@@ -167,6 +182,25 @@
return eb;
}
+/* loop around taking references on and locking the root node of the
+ * tree until you end up with a lock on the root. A locked buffer
+ * is returned, with a reference held.
+ */
+struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
+{
+ struct extent_buffer *eb;
+
+ while (1) {
+ eb = btrfs_root_node(root);
+ btrfs_tree_read_lock(eb);
+ if (eb == root->node)
+ break;
+ btrfs_tree_read_unlock(eb);
+ free_extent_buffer(eb);
+ }
+ return eb;
+}
+
/* cowonly root (everything not a reference counted cow subvolume), just get
* put onto a simple dirty list. transaction.c walks this to make sure they
* get properly updated on disk.
@@ -626,14 +660,6 @@
for (i = start_slot; i < end_slot; i++) {
int close = 1;
- if (!parent->map_token) {
- map_extent_buffer(parent,
- btrfs_node_key_ptr_offset(i),
- sizeof(struct btrfs_key_ptr),
- &parent->map_token, &parent->kaddr,
- &parent->map_start, &parent->map_len,
- KM_USER1);
- }
btrfs_node_key(parent, &disk_key, i);
if (!progress_passed && comp_keys(&disk_key, progress) < 0)
continue;
@@ -656,11 +682,6 @@
last_block = blocknr;
continue;
}
- if (parent->map_token) {
- unmap_extent_buffer(parent, parent->map_token,
- KM_USER1);
- parent->map_token = NULL;
- }
cur = btrfs_find_tree_block(root, blocknr, blocksize);
if (cur)
@@ -701,11 +722,6 @@
btrfs_tree_unlock(cur);
free_extent_buffer(cur);
}
- if (parent->map_token) {
- unmap_extent_buffer(parent, parent->map_token,
- KM_USER1);
- parent->map_token = NULL;
- }
return err;
}
@@ -746,7 +762,6 @@
struct btrfs_disk_key *tmp = NULL;
struct btrfs_disk_key unaligned;
unsigned long offset;
- char *map_token = NULL;
char *kaddr = NULL;
unsigned long map_start = 0;
unsigned long map_len = 0;
@@ -756,18 +771,13 @@
mid = (low + high) / 2;
offset = p + mid * item_size;
- if (!map_token || offset < map_start ||
+ if (!kaddr || offset < map_start ||
(offset + sizeof(struct btrfs_disk_key)) >
map_start + map_len) {
- if (map_token) {
- unmap_extent_buffer(eb, map_token, KM_USER0);
- map_token = NULL;
- }
err = map_private_extent_buffer(eb, offset,
sizeof(struct btrfs_disk_key),
- &map_token, &kaddr,
- &map_start, &map_len, KM_USER0);
+ &kaddr, &map_start, &map_len);
if (!err) {
tmp = (struct btrfs_disk_key *)(kaddr + offset -
@@ -790,14 +800,10 @@
high = mid;
else {
*slot = mid;
- if (map_token)
- unmap_extent_buffer(eb, map_token, KM_USER0);
return 0;
}
}
*slot = low;
- if (map_token)
- unmap_extent_buffer(eb, map_token, KM_USER0);
return 1;
}
@@ -890,7 +896,8 @@
mid = path->nodes[level];
- WARN_ON(!path->locks[level]);
+ WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK &&
+ path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING);
WARN_ON(btrfs_header_generation(mid) != trans->transid);
orig_ptr = btrfs_node_blockptr(mid, orig_slot);
@@ -1228,7 +1235,6 @@
u32 nr;
u32 blocksize;
u32 nscan = 0;
- bool map = true;
if (level != 1)
return;
@@ -1250,19 +1256,8 @@
nritems = btrfs_header_nritems(node);
nr = slot;
- if (node->map_token || path->skip_locking)
- map = false;
while (1) {
- if (map && !node->map_token) {
- unsigned long offset = btrfs_node_key_ptr_offset(nr);
- map_private_extent_buffer(node, offset,
- sizeof(struct btrfs_key_ptr),
- &node->map_token,
- &node->kaddr,
- &node->map_start,
- &node->map_len, KM_USER1);
- }
if (direction < 0) {
if (nr == 0)
break;
@@ -1281,11 +1276,6 @@
if ((search <= target && target - search <= 65536) ||
(search > target && search - target <= 65536)) {
gen = btrfs_node_ptr_generation(node, nr);
- if (map && node->map_token) {
- unmap_extent_buffer(node, node->map_token,
- KM_USER1);
- node->map_token = NULL;
- }
readahead_tree_block(root, search, blocksize, gen);
nread += blocksize;
}
@@ -1293,10 +1283,6 @@
if ((nread > 65536 || nscan > 32))
break;
}
- if (map && node->map_token) {
- unmap_extent_buffer(node, node->map_token, KM_USER1);
- node->map_token = NULL;
- }
}
/*
@@ -1409,7 +1395,7 @@
t = path->nodes[i];
if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
- btrfs_tree_unlock(t);
+ btrfs_tree_unlock_rw(t, path->locks[i]);
path->locks[i] = 0;
}
}
@@ -1436,7 +1422,7 @@
continue;
if (!path->locks[i])
continue;
- btrfs_tree_unlock(path->nodes[i]);
+ btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
path->locks[i] = 0;
}
}
@@ -1485,6 +1471,8 @@
* we can trust our generation number
*/
free_extent_buffer(tmp);
+ btrfs_set_path_blocking(p);
+
tmp = read_tree_block(root, blocknr, blocksize, gen);
if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
*eb_ret = tmp;
@@ -1540,20 +1528,27 @@
static int
setup_nodes_for_search(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *p,
- struct extent_buffer *b, int level, int ins_len)
+ struct extent_buffer *b, int level, int ins_len,
+ int *write_lock_level)
{
int ret;
if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
int sret;
+ if (*write_lock_level < level + 1) {
+ *write_lock_level = level + 1;
+ btrfs_release_path(p);
+ goto again;
+ }
+
sret = reada_for_balance(root, p, level);
if (sret)
goto again;
btrfs_set_path_blocking(p);
sret = split_node(trans, root, p, level);
- btrfs_clear_path_blocking(p, NULL);
+ btrfs_clear_path_blocking(p, NULL, 0);
BUG_ON(sret > 0);
if (sret) {
@@ -1565,13 +1560,19 @@
BTRFS_NODEPTRS_PER_BLOCK(root) / 2) {
int sret;
+ if (*write_lock_level < level + 1) {
+ *write_lock_level = level + 1;
+ btrfs_release_path(p);
+ goto again;
+ }
+
sret = reada_for_balance(root, p, level);
if (sret)
goto again;
btrfs_set_path_blocking(p);
sret = balance_level(trans, root, p, level);
- btrfs_clear_path_blocking(p, NULL);
+ btrfs_clear_path_blocking(p, NULL, 0);
if (sret) {
ret = sret;
@@ -1615,27 +1616,78 @@
int err;
int level;
int lowest_unlock = 1;
+ int root_lock;
+ /* everything at write_lock_level or lower must be write locked */
+ int write_lock_level = 0;
u8 lowest_level = 0;
lowest_level = p->lowest_level;
WARN_ON(lowest_level && ins_len > 0);
WARN_ON(p->nodes[0] != NULL);
- if (ins_len < 0)
+ if (ins_len < 0) {
lowest_unlock = 2;
+ /* when we are removing items, we might have to go up to level
+ * two as we update tree pointers Make sure we keep write
+ * for those levels as well
+ */
+ write_lock_level = 2;
+ } else if (ins_len > 0) {
+ /*
+ * for inserting items, make sure we have a write lock on
+ * level 1 so we can update keys
+ */
+ write_lock_level = 1;
+ }
+
+ if (!cow)
+ write_lock_level = -1;
+
+ if (cow && (p->keep_locks || p->lowest_level))
+ write_lock_level = BTRFS_MAX_LEVEL;
+
again:
+ /*
+ * we try very hard to do read locks on the root
+ */
+ root_lock = BTRFS_READ_LOCK;
+ level = 0;
if (p->search_commit_root) {
+ /*
+ * the commit roots are read only
+ * so we always do read locks
+ */
b = root->commit_root;
extent_buffer_get(b);
+ level = btrfs_header_level(b);
if (!p->skip_locking)
- btrfs_tree_lock(b);
+ btrfs_tree_read_lock(b);
} else {
- if (p->skip_locking)
+ if (p->skip_locking) {
b = btrfs_root_node(root);
- else
- b = btrfs_lock_root_node(root);
+ level = btrfs_header_level(b);
+ } else {
+ /* we don't know the level of the root node
+ * until we actually have it read locked
+ */
+ b = btrfs_read_lock_root_node(root);
+ level = btrfs_header_level(b);
+ if (level <= write_lock_level) {
+ /* whoops, must trade for write lock */
+ btrfs_tree_read_unlock(b);
+ free_extent_buffer(b);
+ b = btrfs_lock_root_node(root);
+ root_lock = BTRFS_WRITE_LOCK;
+
+ /* the level might have changed, check again */
+ level = btrfs_header_level(b);
+ }
+ }
}
+ p->nodes[level] = b;
+ if (!p->skip_locking)
+ p->locks[level] = root_lock;
while (b) {
level = btrfs_header_level(b);
@@ -1644,10 +1696,6 @@
* setup the path here so we can release it under lock
* contention with the cow code
*/
- p->nodes[level] = b;
- if (!p->skip_locking)
- p->locks[level] = 1;
-
if (cow) {
/*
* if we don't really need to cow this block
@@ -1659,6 +1707,16 @@
btrfs_set_path_blocking(p);
+ /*
+ * must have write locks on this node and the
+ * parent
+ */
+ if (level + 1 > write_lock_level) {
+ write_lock_level = level + 1;
+ btrfs_release_path(p);
+ goto again;
+ }
+
err = btrfs_cow_block(trans, root, b,
p->nodes[level + 1],
p->slots[level + 1], &b);
@@ -1671,10 +1729,7 @@
BUG_ON(!cow && ins_len);
p->nodes[level] = b;
- if (!p->skip_locking)
- p->locks[level] = 1;
-
- btrfs_clear_path_blocking(p, NULL);
+ btrfs_clear_path_blocking(p, NULL, 0);
/*
* we have a lock on b and as long as we aren't changing
@@ -1700,7 +1755,7 @@
}
p->slots[level] = slot;
err = setup_nodes_for_search(trans, root, p, b, level,
- ins_len);
+ ins_len, &write_lock_level);
if (err == -EAGAIN)
goto again;
if (err) {
@@ -1710,6 +1765,19 @@
b = p->nodes[level];
slot = p->slots[level];
+ /*
+ * slot 0 is special, if we change the key
+ * we have to update the parent pointer
+ * which means we must have a write lock
+ * on the parent
+ */
+ if (slot == 0 && cow &&
+ write_lock_level < level + 1) {
+ write_lock_level = level + 1;
+ btrfs_release_path(p);
+ goto again;
+ }
+
unlock_up(p, level, lowest_unlock);
if (level == lowest_level) {
@@ -1728,23 +1796,42 @@
}
if (!p->skip_locking) {
- btrfs_clear_path_blocking(p, NULL);
- err = btrfs_try_spin_lock(b);
-
- if (!err) {
- btrfs_set_path_blocking(p);
- btrfs_tree_lock(b);
- btrfs_clear_path_blocking(p, b);
+ level = btrfs_header_level(b);
+ if (level <= write_lock_level) {
+ err = btrfs_try_tree_write_lock(b);
+ if (!err) {
+ btrfs_set_path_blocking(p);
+ btrfs_tree_lock(b);
+ btrfs_clear_path_blocking(p, b,
+ BTRFS_WRITE_LOCK);
+ }
+ p->locks[level] = BTRFS_WRITE_LOCK;
+ } else {
+ err = btrfs_try_tree_read_lock(b);
+ if (!err) {
+ btrfs_set_path_blocking(p);
+ btrfs_tree_read_lock(b);
+ btrfs_clear_path_blocking(p, b,
+ BTRFS_READ_LOCK);
+ }
+ p->locks[level] = BTRFS_READ_LOCK;
}
+ p->nodes[level] = b;
}
} else {
p->slots[level] = slot;
if (ins_len > 0 &&
btrfs_leaf_free_space(root, b) < ins_len) {
+ if (write_lock_level < 1) {
+ write_lock_level = 1;
+ btrfs_release_path(p);
+ goto again;
+ }
+
btrfs_set_path_blocking(p);
err = split_leaf(trans, root, key,
p, ins_len, ret == 0);
- btrfs_clear_path_blocking(p, NULL);
+ btrfs_clear_path_blocking(p, NULL, 0);
BUG_ON(err > 0);
if (err) {
@@ -2025,7 +2112,7 @@
add_root_to_dirty_list(root);
extent_buffer_get(c);
path->nodes[level] = c;
- path->locks[level] = 1;
+ path->locks[level] = BTRFS_WRITE_LOCK;
path->slots[level] = 0;
return 0;
}
@@ -2253,14 +2340,6 @@
if (path->slots[0] == i)
push_space += data_size;
- if (!left->map_token) {
- map_extent_buffer(left, (unsigned long)item,
- sizeof(struct btrfs_item),
- &left->map_token, &left->kaddr,
- &left->map_start, &left->map_len,
- KM_USER1);
- }
-
this_item_size = btrfs_item_size(left, item);
if (this_item_size + sizeof(*item) + push_space > free_space)
break;
@@ -2271,10 +2350,6 @@
break;
i--;
}
- if (left->map_token) {
- unmap_extent_buffer(left, left->map_token, KM_USER1);
- left->map_token = NULL;
- }
if (push_items == 0)
goto out_unlock;
@@ -2316,21 +2391,10 @@
push_space = BTRFS_LEAF_DATA_SIZE(root);
for (i = 0; i < right_nritems; i++) {
item = btrfs_item_nr(right, i);
- if (!right->map_token) {
- map_extent_buffer(right, (unsigned long)item,
- sizeof(struct btrfs_item),
- &right->map_token, &right->kaddr,
- &right->map_start, &right->map_len,
- KM_USER1);
- }
push_space -= btrfs_item_size(right, item);
btrfs_set_item_offset(right, item, push_space);
}
- if (right->map_token) {
- unmap_extent_buffer(right, right->map_token, KM_USER1);
- right->map_token = NULL;
- }
left_nritems -= push_items;
btrfs_set_header_nritems(left, left_nritems);
@@ -2467,13 +2531,6 @@
for (i = 0; i < nr; i++) {
item = btrfs_item_nr(right, i);
- if (!right->map_token) {
- map_extent_buffer(right, (unsigned long)item,
- sizeof(struct btrfs_item),
- &right->map_token, &right->kaddr,
- &right->map_start, &right->map_len,
- KM_USER1);
- }
if (!empty && push_items > 0) {
if (path->slots[0] < i)
@@ -2496,11 +2553,6 @@
push_space += this_item_size + sizeof(*item);
}
- if (right->map_token) {
- unmap_extent_buffer(right, right->map_token, KM_USER1);
- right->map_token = NULL;
- }
-
if (push_items == 0) {
ret = 1;
goto out;
@@ -2530,23 +2582,12 @@
u32 ioff;
item = btrfs_item_nr(left, i);
- if (!left->map_token) {
- map_extent_buffer(left, (unsigned long)item,
- sizeof(struct btrfs_item),
- &left->map_token, &left->kaddr,
- &left->map_start, &left->map_len,
- KM_USER1);
- }
ioff = btrfs_item_offset(left, item);
btrfs_set_item_offset(left, item,
ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
}
btrfs_set_header_nritems(left, old_left_nritems + push_items);
- if (left->map_token) {
- unmap_extent_buffer(left, left->map_token, KM_USER1);
- left->map_token = NULL;
- }
/* fixup right node */
if (push_items > right_nritems) {
@@ -2574,21 +2615,9 @@
for (i = 0; i < right_nritems; i++) {
item = btrfs_item_nr(right, i);
- if (!right->map_token) {
- map_extent_buffer(right, (unsigned long)item,
- sizeof(struct btrfs_item),
- &right->map_token, &right->kaddr,
- &right->map_start, &right->map_len,
- KM_USER1);
- }
-
push_space = push_space - btrfs_item_size(right, item);
btrfs_set_item_offset(right, item, push_space);
}
- if (right->map_token) {
- unmap_extent_buffer(right, right->map_token, KM_USER1);
- right->map_token = NULL;
- }
btrfs_mark_buffer_dirty(left);
if (right_nritems)
@@ -2729,23 +2758,10 @@
struct btrfs_item *item = btrfs_item_nr(right, i);
u32 ioff;
- if (!right->map_token) {
- map_extent_buffer(right, (unsigned long)item,
- sizeof(struct btrfs_item),
- &right->map_token, &right->kaddr,
- &right->map_start, &right->map_len,
- KM_USER1);
- }
-
ioff = btrfs_item_offset(right, item);
btrfs_set_item_offset(right, item, ioff + rt_data_off);
}
- if (right->map_token) {
- unmap_extent_buffer(right, right->map_token, KM_USER1);
- right->map_token = NULL;
- }
-
btrfs_set_header_nritems(l, mid);
ret = 0;
btrfs_item_key(right, &disk_key, 0);
@@ -3264,23 +3280,10 @@
u32 ioff;
item = btrfs_item_nr(leaf, i);
- if (!leaf->map_token) {
- map_extent_buffer(leaf, (unsigned long)item,
- sizeof(struct btrfs_item),
- &leaf->map_token, &leaf->kaddr,
- &leaf->map_start, &leaf->map_len,
- KM_USER1);
- }
-
ioff = btrfs_item_offset(leaf, item);
btrfs_set_item_offset(leaf, item, ioff + size_diff);
}
- if (leaf->map_token) {
- unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
- leaf->map_token = NULL;
- }
-
/* shift the data */
if (from_end) {
memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
@@ -3377,22 +3380,10 @@
u32 ioff;
item = btrfs_item_nr(leaf, i);
- if (!leaf->map_token) {
- map_extent_buffer(leaf, (unsigned long)item,
- sizeof(struct btrfs_item),
- &leaf->map_token, &leaf->kaddr,
- &leaf->map_start, &leaf->map_len,
- KM_USER1);
- }
ioff = btrfs_item_offset(leaf, item);
btrfs_set_item_offset(leaf, item, ioff - data_size);
}
- if (leaf->map_token) {
- unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
- leaf->map_token = NULL;
- }
-
/* shift the data */
memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
data_end - data_size, btrfs_leaf_data(leaf) +
@@ -3494,27 +3485,13 @@
* item0..itemN ... dataN.offset..dataN.size .. data0.size
*/
/* first correct the data pointers */
- WARN_ON(leaf->map_token);
for (i = slot; i < nritems; i++) {
u32 ioff;
item = btrfs_item_nr(leaf, i);
- if (!leaf->map_token) {
- map_extent_buffer(leaf, (unsigned long)item,
- sizeof(struct btrfs_item),
- &leaf->map_token, &leaf->kaddr,
- &leaf->map_start, &leaf->map_len,
- KM_USER1);
- }
-
ioff = btrfs_item_offset(leaf, item);
btrfs_set_item_offset(leaf, item, ioff - total_data);
}
- if (leaf->map_token) {
- unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
- leaf->map_token = NULL;
- }
-
/* shift the items */
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
btrfs_item_nr_offset(slot),
@@ -3608,27 +3585,13 @@
* item0..itemN ... dataN.offset..dataN.size .. data0.size
*/
/* first correct the data pointers */
- WARN_ON(leaf->map_token);
for (i = slot; i < nritems; i++) {
u32 ioff;
item = btrfs_item_nr(leaf, i);
- if (!leaf->map_token) {
- map_extent_buffer(leaf, (unsigned long)item,
- sizeof(struct btrfs_item),
- &leaf->map_token, &leaf->kaddr,
- &leaf->map_start, &leaf->map_len,
- KM_USER1);
- }
-
ioff = btrfs_item_offset(leaf, item);
btrfs_set_item_offset(leaf, item, ioff - total_data);
}
- if (leaf->map_token) {
- unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
- leaf->map_token = NULL;
- }
-
/* shift the items */
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
btrfs_item_nr_offset(slot),
@@ -3840,22 +3803,10 @@
u32 ioff;
item = btrfs_item_nr(leaf, i);
- if (!leaf->map_token) {
- map_extent_buffer(leaf, (unsigned long)item,
- sizeof(struct btrfs_item),
- &leaf->map_token, &leaf->kaddr,
- &leaf->map_start, &leaf->map_len,
- KM_USER1);
- }
ioff = btrfs_item_offset(leaf, item);
btrfs_set_item_offset(leaf, item, ioff + dsize);
}
- if (leaf->map_token) {
- unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
- leaf->map_token = NULL;
- }
-
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
btrfs_item_nr_offset(slot + nr),
sizeof(struct btrfs_item) *
@@ -4004,11 +3955,11 @@
WARN_ON(!path->keep_locks);
again:
- cur = btrfs_lock_root_node(root);
+ cur = btrfs_read_lock_root_node(root);
level = btrfs_header_level(cur);
WARN_ON(path->nodes[level]);
path->nodes[level] = cur;
- path->locks[level] = 1;
+ path->locks[level] = BTRFS_READ_LOCK;
if (btrfs_header_generation(cur) < min_trans) {
ret = 1;
@@ -4098,12 +4049,12 @@
cur = read_node_slot(root, cur, slot);
BUG_ON(!cur);
- btrfs_tree_lock(cur);
+ btrfs_tree_read_lock(cur);
- path->locks[level - 1] = 1;
+ path->locks[level - 1] = BTRFS_READ_LOCK;
path->nodes[level - 1] = cur;
unlock_up(path, level, 1);
- btrfs_clear_path_blocking(path, NULL);
+ btrfs_clear_path_blocking(path, NULL, 0);
}
out:
if (ret == 0)
@@ -4218,30 +4169,21 @@
u32 nritems;
int ret;
int old_spinning = path->leave_spinning;
- int force_blocking = 0;
+ int next_rw_lock = 0;
nritems = btrfs_header_nritems(path->nodes[0]);
if (nritems == 0)
return 1;
- /*
- * we take the blocks in an order that upsets lockdep. Using
- * blocking mode is the only way around it.
- */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- force_blocking = 1;
-#endif
-
btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
again:
level = 1;
next = NULL;
+ next_rw_lock = 0;
btrfs_release_path(path);
path->keep_locks = 1;
-
- if (!force_blocking)
- path->leave_spinning = 1;
+ path->leave_spinning = 1;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
path->keep_locks = 0;
@@ -4281,11 +4223,12 @@
}
if (next) {
- btrfs_tree_unlock(next);
+ btrfs_tree_unlock_rw(next, next_rw_lock);
free_extent_buffer(next);
}
next = c;
+ next_rw_lock = path->locks[level];
ret = read_block_for_search(NULL, root, path, &next, level,
slot, &key);
if (ret == -EAGAIN)
@@ -4297,15 +4240,14 @@
}
if (!path->skip_locking) {
- ret = btrfs_try_spin_lock(next);
+ ret = btrfs_try_tree_read_lock(next);
if (!ret) {
btrfs_set_path_blocking(path);
- btrfs_tree_lock(next);
- if (!force_blocking)
- btrfs_clear_path_blocking(path, next);
+ btrfs_tree_read_lock(next);
+ btrfs_clear_path_blocking(path, next,
+ BTRFS_READ_LOCK);
}
- if (force_blocking)
- btrfs_set_lock_blocking(next);
+ next_rw_lock = BTRFS_READ_LOCK;
}
break;
}
@@ -4314,14 +4256,13 @@
level--;
c = path->nodes[level];
if (path->locks[level])
- btrfs_tree_unlock(c);
+ btrfs_tree_unlock_rw(c, path->locks[level]);
free_extent_buffer(c);
path->nodes[level] = next;
path->slots[level] = 0;
if (!path->skip_locking)
- path->locks[level] = 1;
-
+ path->locks[level] = next_rw_lock;
if (!level)
break;
@@ -4336,16 +4277,14 @@
}
if (!path->skip_locking) {
- btrfs_assert_tree_locked(path->nodes[level]);
- ret = btrfs_try_spin_lock(next);
+ ret = btrfs_try_tree_read_lock(next);
if (!ret) {
btrfs_set_path_blocking(path);
- btrfs_tree_lock(next);
- if (!force_blocking)
- btrfs_clear_path_blocking(path, next);
+ btrfs_tree_read_lock(next);
+ btrfs_clear_path_blocking(path, next,
+ BTRFS_READ_LOCK);
}
- if (force_blocking)
- btrfs_set_lock_blocking(next);
+ next_rw_lock = BTRFS_READ_LOCK;
}
}
ret = 0;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index fe9287b..365c4e1 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -755,6 +755,8 @@
chunks for this space */
unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
+ unsigned int flush:1; /* set if we are trying to make space */
+
unsigned int force_alloc; /* set if we need to force a chunk
alloc for this space */
@@ -764,7 +766,7 @@
struct list_head block_groups[BTRFS_NR_RAID_TYPES];
spinlock_t lock;
struct rw_semaphore groups_sem;
- atomic_t caching_threads;
+ wait_queue_head_t wait;
};
struct btrfs_block_rsv {
@@ -824,6 +826,7 @@
struct list_head list;
struct mutex mutex;
wait_queue_head_t wait;
+ struct btrfs_work work;
struct btrfs_block_group_cache *block_group;
u64 progress;
atomic_t count;
@@ -1032,6 +1035,8 @@
struct btrfs_workers endio_write_workers;
struct btrfs_workers endio_freespace_worker;
struct btrfs_workers submit_workers;
+ struct btrfs_workers caching_workers;
+
/*
* fixup workers take dirty pages that didn't properly go through
* the cow mechanism and make them safe to write. It happens
@@ -2128,7 +2133,7 @@
/* extent-tree.c */
static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
- int num_items)
+ unsigned num_items)
{
return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
3 * num_items;
@@ -2222,9 +2227,6 @@
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
-int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- int num_items);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
@@ -2330,7 +2332,7 @@
void btrfs_free_path(struct btrfs_path *p);
void btrfs_set_path_blocking(struct btrfs_path *p);
void btrfs_clear_path_blocking(struct btrfs_path *p,
- struct extent_buffer *held);
+ struct extent_buffer *held, int held_rw);
void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 98c68e6..b52c672 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -735,7 +735,7 @@
}
/* reset all the locked nodes in the patch to spinning locks. */
- btrfs_clear_path_blocking(path, NULL);
+ btrfs_clear_path_blocking(path, NULL, 0);
/* insert the keys of the items */
ret = setup_items_for_insert(trans, root, path, keys, data_size,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 685f259..c360a84 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -89,13 +89,8 @@
data_size = sizeof(*dir_item) + name_len + data_len;
dir_item = insert_with_overflow(trans, root, path, &key, data_size,
name, name_len);
- /*
- * FIXME: at some point we should handle xattr's that are larger than
- * what we can fit in our leaf. We set location to NULL b/c we arent
- * pointing at anything else, that will change if we store the xattr
- * data in a separate inode.
- */
- BUG_ON(IS_ERR(dir_item));
+ if (IS_ERR(dir_item))
+ return PTR_ERR(dir_item);
memset(&location, 0, sizeof(location));
leaf = path->nodes[0];
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b231ae1..07b3ac6 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -100,38 +100,83 @@
struct btrfs_work work;
};
-/* These are used to set the lockdep class on the extent buffer locks.
- * The class is set by the readpage_end_io_hook after the buffer has
- * passed csum validation but before the pages are unlocked.
+/*
+ * Lockdep class keys for extent_buffer->lock's in this root. For a given
+ * eb, the lockdep key is determined by the btrfs_root it belongs to and
+ * the level the eb occupies in the tree.
*
- * The lockdep class is also set by btrfs_init_new_buffer on freshly
- * allocated blocks.
+ * Different roots are used for different purposes and may nest inside each
+ * other and they require separate keysets. As lockdep keys should be
+ * static, assign keysets according to the purpose of the root as indicated
+ * by btrfs_root->objectid. This ensures that all special purpose roots
+ * have separate keysets.
*
- * The class is based on the level in the tree block, which allows lockdep
- * to know that lower nodes nest inside the locks of higher nodes.
+ * Lock-nesting across peer nodes is always done with the immediate parent
+ * node locked thus preventing deadlock. As lockdep doesn't know this, use
+ * subclass to avoid triggering lockdep warning in such cases.
*
- * We also add a check to make sure the highest level of the tree is
- * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this
- * code needs update as well.
+ * The key is set by the readpage_end_io_hook after the buffer has passed
+ * csum validation but before the pages are unlocked. It is also set by
+ * btrfs_init_new_buffer on freshly allocated blocks.
+ *
+ * We also add a check to make sure the highest level of the tree is the
+ * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code
+ * needs update as well.
*/
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# if BTRFS_MAX_LEVEL != 8
# error
# endif
-static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1];
-static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
- /* leaf */
- "btrfs-extent-00",
- "btrfs-extent-01",
- "btrfs-extent-02",
- "btrfs-extent-03",
- "btrfs-extent-04",
- "btrfs-extent-05",
- "btrfs-extent-06",
- "btrfs-extent-07",
- /* highest possible level */
- "btrfs-extent-08",
+
+static struct btrfs_lockdep_keyset {
+ u64 id; /* root objectid */
+ const char *name_stem; /* lock name stem */
+ char names[BTRFS_MAX_LEVEL + 1][20];
+ struct lock_class_key keys[BTRFS_MAX_LEVEL + 1];
+} btrfs_lockdep_keysets[] = {
+ { .id = BTRFS_ROOT_TREE_OBJECTID, .name_stem = "root" },
+ { .id = BTRFS_EXTENT_TREE_OBJECTID, .name_stem = "extent" },
+ { .id = BTRFS_CHUNK_TREE_OBJECTID, .name_stem = "chunk" },
+ { .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" },
+ { .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" },
+ { .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" },
+ { .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" },
+ { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" },
+ { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
+ { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
+ { .id = 0, .name_stem = "tree" },
};
+
+void __init btrfs_init_lockdep(void)
+{
+ int i, j;
+
+ /* initialize lockdep class names */
+ for (i = 0; i < ARRAY_SIZE(btrfs_lockdep_keysets); i++) {
+ struct btrfs_lockdep_keyset *ks = &btrfs_lockdep_keysets[i];
+
+ for (j = 0; j < ARRAY_SIZE(ks->names); j++)
+ snprintf(ks->names[j], sizeof(ks->names[j]),
+ "btrfs-%s-%02d", ks->name_stem, j);
+ }
+}
+
+void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
+ int level)
+{
+ struct btrfs_lockdep_keyset *ks;
+
+ BUG_ON(level >= ARRAY_SIZE(ks->keys));
+
+ /* find the matching keyset, id 0 is the default entry */
+ for (ks = btrfs_lockdep_keysets; ks->id; ks++)
+ if (ks->id == objectid)
+ break;
+
+ lockdep_set_class_and_name(&eb->lock,
+ &ks->keys[level], ks->names[level]);
+}
+
#endif
/*
@@ -217,7 +262,6 @@
unsigned long len;
unsigned long cur_len;
unsigned long offset = BTRFS_CSUM_SIZE;
- char *map_token = NULL;
char *kaddr;
unsigned long map_start;
unsigned long map_len;
@@ -228,8 +272,7 @@
len = buf->len - offset;
while (len > 0) {
err = map_private_extent_buffer(buf, offset, 32,
- &map_token, &kaddr,
- &map_start, &map_len, KM_USER0);
+ &kaddr, &map_start, &map_len);
if (err)
return 1;
cur_len = min(len, map_len - (offset - map_start));
@@ -237,7 +280,6 @@
crc, cur_len);
len -= cur_len;
offset += cur_len;
- unmap_extent_buffer(buf, map_token, KM_USER0);
}
if (csum_size > sizeof(inline_result)) {
result = kzalloc(csum_size * sizeof(char), GFP_NOFS);
@@ -494,15 +536,6 @@
return 0;
}
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
-{
- lockdep_set_class_and_name(&eb->lock,
- &btrfs_eb_class[level],
- btrfs_eb_name[level]);
-}
-#endif
-
static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state)
{
@@ -553,7 +586,8 @@
}
found_level = btrfs_header_level(eb);
- btrfs_set_buffer_lockdep_class(eb, found_level);
+ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
+ eb, found_level);
ret = csum_tree_block(root, eb, 1);
if (ret) {
@@ -1598,7 +1632,7 @@
goto fail_bdi;
}
- fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS;
+ mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
INIT_LIST_HEAD(&fs_info->trans_list);
@@ -1802,6 +1836,9 @@
fs_info->thread_pool_size),
&fs_info->generic_worker);
+ btrfs_init_workers(&fs_info->caching_workers, "cache",
+ 2, &fs_info->generic_worker);
+
/* a higher idle thresh on the submit workers makes it much more
* likely that bios will be send down in a sane order to the
* devices
@@ -1855,6 +1892,7 @@
btrfs_start_workers(&fs_info->endio_write_workers, 1);
btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
btrfs_start_workers(&fs_info->delayed_workers, 1);
+ btrfs_start_workers(&fs_info->caching_workers, 1);
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -2112,6 +2150,7 @@
btrfs_stop_workers(&fs_info->endio_freespace_worker);
btrfs_stop_workers(&fs_info->submit_workers);
btrfs_stop_workers(&fs_info->delayed_workers);
+ btrfs_stop_workers(&fs_info->caching_workers);
fail_alloc:
kfree(fs_info->delayed_root);
fail_iput:
@@ -2577,6 +2616,7 @@
btrfs_stop_workers(&fs_info->endio_freespace_worker);
btrfs_stop_workers(&fs_info->submit_workers);
btrfs_stop_workers(&fs_info->delayed_workers);
+ btrfs_stop_workers(&fs_info->caching_workers);
btrfs_close_devices(fs_info->fs_devices);
btrfs_mapping_tree_free(&fs_info->mapping_tree);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index a0b610a..bec3ea4 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -87,10 +87,14 @@
#ifdef CONFIG_DEBUG_LOCK_ALLOC
-void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level);
+void btrfs_init_lockdep(void);
+void btrfs_set_buffer_lockdep_class(u64 objectid,
+ struct extent_buffer *eb, int level);
#else
-static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb,
- int level)
+static inline void btrfs_init_lockdep(void)
+{ }
+static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
+ struct extent_buffer *eb, int level)
{
}
#endif
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 71cd456..4d08ed7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -320,12 +320,12 @@
return total_added;
}
-static int caching_kthread(void *data)
+static noinline void caching_thread(struct btrfs_work *work)
{
- struct btrfs_block_group_cache *block_group = data;
- struct btrfs_fs_info *fs_info = block_group->fs_info;
- struct btrfs_caching_control *caching_ctl = block_group->caching_ctl;
- struct btrfs_root *extent_root = fs_info->extent_root;
+ struct btrfs_block_group_cache *block_group;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_caching_control *caching_ctl;
+ struct btrfs_root *extent_root;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_key key;
@@ -334,9 +334,14 @@
u32 nritems;
int ret = 0;
+ caching_ctl = container_of(work, struct btrfs_caching_control, work);
+ block_group = caching_ctl->block_group;
+ fs_info = block_group->fs_info;
+ extent_root = fs_info->extent_root;
+
path = btrfs_alloc_path();
if (!path)
- return -ENOMEM;
+ goto out;
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
@@ -433,13 +438,11 @@
free_excluded_extents(extent_root, block_group);
mutex_unlock(&caching_ctl->mutex);
+out:
wake_up(&caching_ctl->wait);
put_caching_control(caching_ctl);
- atomic_dec(&block_group->space_info->caching_threads);
btrfs_put_block_group(block_group);
-
- return 0;
}
static int cache_block_group(struct btrfs_block_group_cache *cache,
@@ -449,7 +452,6 @@
{
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_caching_control *caching_ctl;
- struct task_struct *tsk;
int ret = 0;
smp_mb();
@@ -501,6 +503,7 @@
caching_ctl->progress = cache->key.objectid;
/* one for caching kthread, one for caching block group list */
atomic_set(&caching_ctl->count, 2);
+ caching_ctl->work.func = caching_thread;
spin_lock(&cache->lock);
if (cache->cached != BTRFS_CACHE_NO) {
@@ -516,16 +519,9 @@
list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
up_write(&fs_info->extent_commit_sem);
- atomic_inc(&cache->space_info->caching_threads);
btrfs_get_block_group(cache);
- tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
- cache->key.objectid);
- if (IS_ERR(tsk)) {
- ret = PTR_ERR(tsk);
- printk(KERN_ERR "error running thread %d\n", ret);
- BUG();
- }
+ btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work);
return ret;
}
@@ -2932,9 +2928,10 @@
found->full = 0;
found->force_alloc = CHUNK_ALLOC_NO_FORCE;
found->chunk_alloc = 0;
+ found->flush = 0;
+ init_waitqueue_head(&found->wait);
*space_info = found;
list_add_rcu(&found->list, &info->space_info);
- atomic_set(&found->caching_threads, 0);
return 0;
}
@@ -3314,6 +3311,14 @@
if (reserved == 0)
return 0;
+ smp_mb();
+ if (root->fs_info->delalloc_bytes == 0) {
+ if (trans)
+ return 0;
+ btrfs_wait_ordered_extents(root, 0, 0);
+ return 0;
+ }
+
max_reclaim = min(reserved, to_reclaim);
while (loops < 1024) {
@@ -3356,6 +3361,8 @@
}
}
+ if (reclaimed >= to_reclaim && !trans)
+ btrfs_wait_ordered_extents(root, 0, 0);
return reclaimed >= to_reclaim;
}
@@ -3380,15 +3387,36 @@
u64 num_bytes = orig_bytes;
int retries = 0;
int ret = 0;
- bool reserved = false;
bool committed = false;
+ bool flushing = false;
again:
- ret = -ENOSPC;
- if (reserved)
- num_bytes = 0;
-
+ ret = 0;
spin_lock(&space_info->lock);
+ /*
+ * We only want to wait if somebody other than us is flushing and we are
+ * actually alloed to flush.
+ */
+ while (flush && !flushing && space_info->flush) {
+ spin_unlock(&space_info->lock);
+ /*
+ * If we have a trans handle we can't wait because the flusher
+ * may have to commit the transaction, which would mean we would
+ * deadlock since we are waiting for the flusher to finish, but
+ * hold the current transaction open.
+ */
+ if (trans)
+ return -EAGAIN;
+ ret = wait_event_interruptible(space_info->wait,
+ !space_info->flush);
+ /* Must have been interrupted, return */
+ if (ret)
+ return -EINTR;
+
+ spin_lock(&space_info->lock);
+ }
+
+ ret = -ENOSPC;
unused = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly +
space_info->bytes_may_use;
@@ -3403,8 +3431,7 @@
if (unused <= space_info->total_bytes) {
unused = space_info->total_bytes - unused;
if (unused >= num_bytes) {
- if (!reserved)
- space_info->bytes_reserved += orig_bytes;
+ space_info->bytes_reserved += orig_bytes;
ret = 0;
} else {
/*
@@ -3429,17 +3456,14 @@
* to reclaim space we can actually use it instead of somebody else
* stealing it from us.
*/
- if (ret && !reserved) {
- space_info->bytes_reserved += orig_bytes;
- reserved = true;
+ if (ret && flush) {
+ flushing = true;
+ space_info->flush = 1;
}
spin_unlock(&space_info->lock);
- if (!ret)
- return 0;
-
- if (!flush)
+ if (!ret || !flush)
goto out;
/*
@@ -3447,11 +3471,11 @@
* metadata until after the IO is completed.
*/
ret = shrink_delalloc(trans, root, num_bytes, 1);
- if (ret > 0)
- return 0;
- else if (ret < 0)
+ if (ret < 0)
goto out;
+ ret = 0;
+
/*
* So if we were overcommitted it's possible that somebody else flushed
* out enough space and we simply didn't have enough space to reclaim,
@@ -3462,11 +3486,11 @@
goto again;
}
- spin_lock(&space_info->lock);
/*
* Not enough space to be reclaimed, don't bother committing the
* transaction.
*/
+ spin_lock(&space_info->lock);
if (space_info->bytes_pinned < orig_bytes)
ret = -ENOSPC;
spin_unlock(&space_info->lock);
@@ -3474,10 +3498,13 @@
goto out;
ret = -EAGAIN;
- if (trans || committed)
+ if (trans)
goto out;
ret = -ENOSPC;
+ if (committed)
+ goto out;
+
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
goto out;
@@ -3489,12 +3516,12 @@
}
out:
- if (reserved) {
+ if (flushing) {
spin_lock(&space_info->lock);
- space_info->bytes_reserved -= orig_bytes;
+ space_info->flush = 0;
+ wake_up_all(&space_info->wait);
spin_unlock(&space_info->lock);
}
-
return ret;
}
@@ -3704,7 +3731,6 @@
if (commit_trans) {
if (trans)
return -EAGAIN;
-
trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
ret = btrfs_commit_transaction(trans, root);
@@ -3874,26 +3900,6 @@
return 0;
}
-int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- int num_items)
-{
- u64 num_bytes;
- int ret;
-
- if (num_items == 0 || root->fs_info->chunk_root == root)
- return 0;
-
- num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
- ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
- num_bytes);
- if (!ret) {
- trans->bytes_reserved += num_bytes;
- trans->block_rsv = &root->fs_info->trans_block_rsv;
- }
- return ret;
-}
-
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
@@ -3944,6 +3950,30 @@
return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
}
+static unsigned drop_outstanding_extent(struct inode *inode)
+{
+ unsigned dropped_extents = 0;
+
+ spin_lock(&BTRFS_I(inode)->lock);
+ BUG_ON(!BTRFS_I(inode)->outstanding_extents);
+ BTRFS_I(inode)->outstanding_extents--;
+
+ /*
+ * If we have more or the same amount of outsanding extents than we have
+ * reserved then we need to leave the reserved extents count alone.
+ */
+ if (BTRFS_I(inode)->outstanding_extents >=
+ BTRFS_I(inode)->reserved_extents)
+ goto out;
+
+ dropped_extents = BTRFS_I(inode)->reserved_extents -
+ BTRFS_I(inode)->outstanding_extents;
+ BTRFS_I(inode)->reserved_extents -= dropped_extents;
+out:
+ spin_unlock(&BTRFS_I(inode)->lock);
+ return dropped_extents;
+}
+
static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
{
return num_bytes >>= 3;
@@ -3953,9 +3983,8 @@
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
- u64 to_reserve;
- int nr_extents;
- int reserved_extents;
+ u64 to_reserve = 0;
+ unsigned nr_extents = 0;
int ret;
if (btrfs_transaction_in_commit(root->fs_info))
@@ -3963,66 +3992,49 @@
num_bytes = ALIGN(num_bytes, root->sectorsize);
- nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
- reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents++;
- if (nr_extents > reserved_extents) {
- nr_extents -= reserved_extents;
+ if (BTRFS_I(inode)->outstanding_extents >
+ BTRFS_I(inode)->reserved_extents) {
+ nr_extents = BTRFS_I(inode)->outstanding_extents -
+ BTRFS_I(inode)->reserved_extents;
+ BTRFS_I(inode)->reserved_extents += nr_extents;
+
to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
- } else {
- nr_extents = 0;
- to_reserve = 0;
}
+ spin_unlock(&BTRFS_I(inode)->lock);
to_reserve += calc_csum_metadata_size(inode, num_bytes);
ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
- if (ret)
+ if (ret) {
+ unsigned dropped;
+ /*
+ * We don't need the return value since our reservation failed,
+ * we just need to clean up our counter.
+ */
+ dropped = drop_outstanding_extent(inode);
+ WARN_ON(dropped > 1);
return ret;
-
- atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents);
- atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+ }
block_rsv_add_bytes(block_rsv, to_reserve, 1);
- if (block_rsv->size > 512 * 1024 * 1024)
- shrink_delalloc(NULL, root, to_reserve, 0);
-
return 0;
}
void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- u64 to_free;
- int nr_extents;
- int reserved_extents;
+ u64 to_free = 0;
+ unsigned dropped;
num_bytes = ALIGN(num_bytes, root->sectorsize);
- atomic_dec(&BTRFS_I(inode)->outstanding_extents);
- WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
-
- reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
- do {
- int old, new;
-
- nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
- if (nr_extents >= reserved_extents) {
- nr_extents = 0;
- break;
- }
- old = reserved_extents;
- nr_extents = reserved_extents - nr_extents;
- new = reserved_extents - nr_extents;
- old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents,
- reserved_extents, new);
- if (likely(old == reserved_extents))
- break;
- reserved_extents = old;
- } while (1);
+ dropped = drop_outstanding_extent(inode);
to_free = calc_csum_metadata_size(inode, num_bytes);
- if (nr_extents > 0)
- to_free += btrfs_calc_trans_metadata_size(root, nr_extents);
+ if (dropped > 0)
+ to_free += btrfs_calc_trans_metadata_size(root, dropped);
btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
to_free);
@@ -4990,14 +5002,10 @@
}
/*
- * We only want to start kthread caching if we are at
- * the point where we will wait for caching to make
- * progress, or if our ideal search is over and we've
- * found somebody to start caching.
+ * The caching workers are limited to 2 threads, so we
+ * can queue as much work as we care to.
*/
- if (loop > LOOP_CACHING_NOWAIT ||
- (loop > LOOP_FIND_IDEAL &&
- atomic_read(&space_info->caching_threads) < 2)) {
+ if (loop > LOOP_FIND_IDEAL) {
ret = cache_block_group(block_group, trans,
orig_root, 0);
BUG_ON(ret);
@@ -5219,8 +5227,7 @@
if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
found_uncached_bg = false;
loop++;
- if (!ideal_cache_percent &&
- atomic_read(&space_info->caching_threads))
+ if (!ideal_cache_percent)
goto search;
/*
@@ -5623,7 +5630,7 @@
if (!buf)
return ERR_PTR(-ENOMEM);
btrfs_set_header_generation(buf, trans->transid);
- btrfs_set_buffer_lockdep_class(buf, level);
+ btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
btrfs_tree_lock(buf);
clean_tree_block(trans, root, buf);
@@ -5910,7 +5917,7 @@
return 1;
if (path->locks[level] && !wc->keep_locks) {
- btrfs_tree_unlock(eb);
+ btrfs_tree_unlock_rw(eb, path->locks[level]);
path->locks[level] = 0;
}
return 0;
@@ -5934,7 +5941,7 @@
* keep the tree lock
*/
if (path->locks[level] && level > 0) {
- btrfs_tree_unlock(eb);
+ btrfs_tree_unlock_rw(eb, path->locks[level]);
path->locks[level] = 0;
}
return 0;
@@ -6047,7 +6054,7 @@
BUG_ON(level != btrfs_header_level(next));
path->nodes[level] = next;
path->slots[level] = 0;
- path->locks[level] = 1;
+ path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
wc->level = level;
if (wc->level == 1)
wc->reada_slot = 0;
@@ -6118,7 +6125,7 @@
BUG_ON(level == 0);
btrfs_tree_lock(eb);
btrfs_set_lock_blocking(eb);
- path->locks[level] = 1;
+ path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
ret = btrfs_lookup_extent_info(trans, root,
eb->start, eb->len,
@@ -6127,8 +6134,7 @@
BUG_ON(ret);
BUG_ON(wc->refs[level] == 0);
if (wc->refs[level] == 1) {
- btrfs_tree_unlock(eb);
- path->locks[level] = 0;
+ btrfs_tree_unlock_rw(eb, path->locks[level]);
return 1;
}
}
@@ -6150,7 +6156,7 @@
btrfs_header_generation(eb) == trans->transid) {
btrfs_tree_lock(eb);
btrfs_set_lock_blocking(eb);
- path->locks[level] = 1;
+ path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
}
clean_tree_block(trans, root, eb);
}
@@ -6229,7 +6235,8 @@
return 0;
if (path->locks[level]) {
- btrfs_tree_unlock(path->nodes[level]);
+ btrfs_tree_unlock_rw(path->nodes[level],
+ path->locks[level]);
path->locks[level] = 0;
}
free_extent_buffer(path->nodes[level]);
@@ -6281,7 +6288,7 @@
path->nodes[level] = btrfs_lock_root_node(root);
btrfs_set_lock_blocking(path->nodes[level]);
path->slots[level] = 0;
- path->locks[level] = 1;
+ path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
memset(&wc->update_progress, 0,
sizeof(wc->update_progress));
} else {
@@ -6449,7 +6456,7 @@
level = btrfs_header_level(node);
path->nodes[level] = node;
path->slots[level] = 0;
- path->locks[level] = 1;
+ path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
wc->refs[parent_level] = 1;
wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
@@ -6524,15 +6531,28 @@
return flags;
}
-static int set_block_group_ro(struct btrfs_block_group_cache *cache)
+static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
{
struct btrfs_space_info *sinfo = cache->space_info;
u64 num_bytes;
+ u64 min_allocable_bytes;
int ret = -ENOSPC;
if (cache->ro)
return 0;
+ /*
+ * We need some metadata space and system metadata space for
+ * allocating chunks in some corner cases until we force to set
+ * it to be readonly.
+ */
+ if ((sinfo->flags &
+ (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
+ !force)
+ min_allocable_bytes = 1 * 1024 * 1024;
+ else
+ min_allocable_bytes = 0;
+
spin_lock(&sinfo->lock);
spin_lock(&cache->lock);
num_bytes = cache->key.offset - cache->reserved - cache->pinned -
@@ -6540,7 +6560,8 @@
if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
sinfo->bytes_may_use + sinfo->bytes_readonly +
- cache->reserved_pinned + num_bytes <= sinfo->total_bytes) {
+ cache->reserved_pinned + num_bytes + min_allocable_bytes <=
+ sinfo->total_bytes) {
sinfo->bytes_readonly += num_bytes;
sinfo->bytes_reserved += cache->reserved_pinned;
cache->reserved_pinned = 0;
@@ -6571,7 +6592,7 @@
do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
CHUNK_ALLOC_FORCE);
- ret = set_block_group_ro(cache);
+ ret = set_block_group_ro(cache, 0);
if (!ret)
goto out;
alloc_flags = get_alloc_profile(root, cache->space_info->flags);
@@ -6579,7 +6600,7 @@
CHUNK_ALLOC_FORCE);
if (ret < 0)
goto out;
- ret = set_block_group_ro(cache);
+ ret = set_block_group_ro(cache, 0);
out:
btrfs_end_transaction(trans, root);
return ret;
@@ -7016,7 +7037,7 @@
set_avail_alloc_bits(root->fs_info, cache->flags);
if (btrfs_chunk_readonly(root, cache->key.objectid))
- set_block_group_ro(cache);
+ set_block_group_ro(cache, 1);
}
list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
@@ -7030,9 +7051,9 @@
* mirrored block groups.
*/
list_for_each_entry(cache, &space_info->block_groups[3], list)
- set_block_group_ro(cache);
+ set_block_group_ro(cache, 1);
list_for_each_entry(cache, &space_info->block_groups[4], list)
- set_block_group_ro(cache);
+ set_block_group_ro(cache, 1);
}
init_global_block_rsv(info);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 561262d..067b174 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -281,11 +281,10 @@
if (other->start == state->end + 1 &&
other->state == state->state) {
merge_cb(tree, state, other);
- other->start = state->start;
- state->tree = NULL;
- rb_erase(&state->rb_node, &tree->state);
- free_extent_state(state);
- state = NULL;
+ state->end = other->end;
+ other->tree = NULL;
+ rb_erase(&other->rb_node, &tree->state);
+ free_extent_state(other);
}
}
@@ -351,7 +350,6 @@
"%llu %llu\n", (unsigned long long)found->start,
(unsigned long long)found->end,
(unsigned long long)start, (unsigned long long)end);
- free_extent_state(state);
return -EEXIST;
}
state->tree = tree;
@@ -500,7 +498,8 @@
cached_state = NULL;
}
- if (cached && cached->tree && cached->start == start) {
+ if (cached && cached->tree && cached->start <= start &&
+ cached->end > start) {
if (clear)
atomic_dec(&cached->refs);
state = cached;
@@ -742,7 +741,8 @@
spin_lock(&tree->lock);
if (cached_state && *cached_state) {
state = *cached_state;
- if (state->start == start && state->tree) {
+ if (state->start <= start && state->end > start &&
+ state->tree) {
node = &state->rb_node;
goto hit_next;
}
@@ -783,13 +783,13 @@
if (err)
goto out;
- next_node = rb_next(node);
cache_state(state, cached_state);
merge_state(tree, state);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
+ next_node = rb_next(&state->rb_node);
if (next_node && start < end && prealloc && !need_resched()) {
state = rb_entry(next_node, struct extent_state,
rb_node);
@@ -862,7 +862,6 @@
* Avoid to free 'prealloc' if it can be merged with
* the later extent.
*/
- atomic_inc(&prealloc->refs);
err = insert_state(tree, prealloc, start, this_end,
&bits);
BUG_ON(err == -EEXIST);
@@ -872,7 +871,6 @@
goto out;
}
cache_state(prealloc, cached_state);
- free_extent_state(prealloc);
prealloc = NULL;
start = this_end + 1;
goto search_again;
@@ -1564,7 +1562,8 @@
int bitset = 0;
spin_lock(&tree->lock);
- if (cached && cached->tree && cached->start == start)
+ if (cached && cached->tree && cached->start <= start &&
+ cached->end > start)
node = &cached->rb_node;
else
node = tree_search(tree, start);
@@ -2432,6 +2431,7 @@
pgoff_t index;
pgoff_t end; /* Inclusive */
int scanned = 0;
+ int tag;
pagevec_init(&pvec, 0);
if (wbc->range_cyclic) {
@@ -2442,11 +2442,16 @@
end = wbc->range_end >> PAGE_CACHE_SHIFT;
scanned = 1;
}
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ tag = PAGECACHE_TAG_TOWRITE;
+ else
+ tag = PAGECACHE_TAG_DIRTY;
retry:
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ tag_pages_for_writeback(mapping, index, end);
while (!done && !nr_to_write_done && (index <= end) &&
- (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_DIRTY, min(end - index,
- (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+ (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
unsigned i;
scanned = 1;
@@ -3020,8 +3025,15 @@
return NULL;
eb->start = start;
eb->len = len;
- spin_lock_init(&eb->lock);
- init_waitqueue_head(&eb->lock_wq);
+ rwlock_init(&eb->lock);
+ atomic_set(&eb->write_locks, 0);
+ atomic_set(&eb->read_locks, 0);
+ atomic_set(&eb->blocking_readers, 0);
+ atomic_set(&eb->blocking_writers, 0);
+ atomic_set(&eb->spinning_readers, 0);
+ atomic_set(&eb->spinning_writers, 0);
+ init_waitqueue_head(&eb->write_lock_wq);
+ init_waitqueue_head(&eb->read_lock_wq);
#if LEAK_DEBUG
spin_lock_irqsave(&leak_lock, flags);
@@ -3117,7 +3129,7 @@
i = 0;
}
for (; i < num_pages; i++, index++) {
- p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM);
+ p = find_or_create_page(mapping, index, GFP_NOFS);
if (!p) {
WARN_ON(1);
goto free_eb;
@@ -3264,6 +3276,22 @@
return was_dirty;
}
+static int __eb_straddles_pages(u64 start, u64 len)
+{
+ if (len < PAGE_CACHE_SIZE)
+ return 1;
+ if (start & (PAGE_CACHE_SIZE - 1))
+ return 1;
+ if ((start + len) & (PAGE_CACHE_SIZE - 1))
+ return 1;
+ return 0;
+}
+
+static int eb_straddles_pages(struct extent_buffer *eb)
+{
+ return __eb_straddles_pages(eb->start, eb->len);
+}
+
int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
struct extent_buffer *eb,
struct extent_state **cached_state)
@@ -3275,8 +3303,10 @@
num_pages = num_extent_pages(eb->start, eb->len);
clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
- clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
- cached_state, GFP_NOFS);
+ if (eb_straddles_pages(eb)) {
+ clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+ cached_state, GFP_NOFS);
+ }
for (i = 0; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
if (page)
@@ -3294,8 +3324,10 @@
num_pages = num_extent_pages(eb->start, eb->len);
- set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
- NULL, GFP_NOFS);
+ if (eb_straddles_pages(eb)) {
+ set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+ NULL, GFP_NOFS);
+ }
for (i = 0; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
@@ -3318,9 +3350,12 @@
int uptodate;
unsigned long index;
- ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL);
- if (ret)
- return 1;
+ if (__eb_straddles_pages(start, end - start + 1)) {
+ ret = test_range_bit(tree, start, end,
+ EXTENT_UPTODATE, 1, NULL);
+ if (ret)
+ return 1;
+ }
while (start <= end) {
index = start >> PAGE_CACHE_SHIFT;
page = find_get_page(tree->mapping, index);
@@ -3348,10 +3383,12 @@
if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return 1;
- ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
- EXTENT_UPTODATE, 1, cached_state);
- if (ret)
- return ret;
+ if (eb_straddles_pages(eb)) {
+ ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+ EXTENT_UPTODATE, 1, cached_state);
+ if (ret)
+ return ret;
+ }
num_pages = num_extent_pages(eb->start, eb->len);
for (i = 0; i < num_pages; i++) {
@@ -3384,9 +3421,11 @@
if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return 0;
- if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
- EXTENT_UPTODATE, 1, NULL)) {
- return 0;
+ if (eb_straddles_pages(eb)) {
+ if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+ EXTENT_UPTODATE, 1, NULL)) {
+ return 0;
+ }
}
if (start) {
@@ -3490,9 +3529,8 @@
page = extent_buffer_page(eb, i);
cur = min(len, (PAGE_CACHE_SIZE - offset));
- kaddr = kmap_atomic(page, KM_USER1);
+ kaddr = page_address(page);
memcpy(dst, kaddr + offset, cur);
- kunmap_atomic(kaddr, KM_USER1);
dst += cur;
len -= cur;
@@ -3502,9 +3540,9 @@
}
int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
- unsigned long min_len, char **token, char **map,
+ unsigned long min_len, char **map,
unsigned long *map_start,
- unsigned long *map_len, int km)
+ unsigned long *map_len)
{
size_t offset = start & (PAGE_CACHE_SIZE - 1);
char *kaddr;
@@ -3534,42 +3572,12 @@
}
p = extent_buffer_page(eb, i);
- kaddr = kmap_atomic(p, km);
- *token = kaddr;
+ kaddr = page_address(p);
*map = kaddr + offset;
*map_len = PAGE_CACHE_SIZE - offset;
return 0;
}
-int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
- unsigned long min_len,
- char **token, char **map,
- unsigned long *map_start,
- unsigned long *map_len, int km)
-{
- int err;
- int save = 0;
- if (eb->map_token) {
- unmap_extent_buffer(eb, eb->map_token, km);
- eb->map_token = NULL;
- save = 1;
- }
- err = map_private_extent_buffer(eb, start, min_len, token, map,
- map_start, map_len, km);
- if (!err && save) {
- eb->map_token = *token;
- eb->kaddr = *map;
- eb->map_start = *map_start;
- eb->map_len = *map_len;
- }
- return err;
-}
-
-void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
-{
- kunmap_atomic(token, km);
-}
-
int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
unsigned long start,
unsigned long len)
@@ -3593,9 +3601,8 @@
cur = min(len, (PAGE_CACHE_SIZE - offset));
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = page_address(page);
ret = memcmp(ptr, kaddr + offset, cur);
- kunmap_atomic(kaddr, KM_USER0);
if (ret)
break;
@@ -3628,9 +3635,8 @@
WARN_ON(!PageUptodate(page));
cur = min(len, PAGE_CACHE_SIZE - offset);
- kaddr = kmap_atomic(page, KM_USER1);
+ kaddr = page_address(page);
memcpy(kaddr + offset, src, cur);
- kunmap_atomic(kaddr, KM_USER1);
src += cur;
len -= cur;
@@ -3659,9 +3665,8 @@
WARN_ON(!PageUptodate(page));
cur = min(len, PAGE_CACHE_SIZE - offset);
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = page_address(page);
memset(kaddr + offset, c, cur);
- kunmap_atomic(kaddr, KM_USER0);
len -= cur;
offset = 0;
@@ -3692,9 +3697,8 @@
cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = page_address(page);
read_extent_buffer(src, kaddr + offset, src_offset, cur);
- kunmap_atomic(kaddr, KM_USER0);
src_offset += cur;
len -= cur;
@@ -3707,20 +3711,17 @@
unsigned long dst_off, unsigned long src_off,
unsigned long len)
{
- char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
+ char *dst_kaddr = page_address(dst_page);
if (dst_page == src_page) {
memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
} else {
- char *src_kaddr = kmap_atomic(src_page, KM_USER1);
+ char *src_kaddr = page_address(src_page);
char *p = dst_kaddr + dst_off + len;
char *s = src_kaddr + src_off + len;
while (len--)
*--p = *--s;
-
- kunmap_atomic(src_kaddr, KM_USER1);
}
- kunmap_atomic(dst_kaddr, KM_USER0);
}
static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
@@ -3733,20 +3734,17 @@
unsigned long dst_off, unsigned long src_off,
unsigned long len)
{
- char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
+ char *dst_kaddr = page_address(dst_page);
char *src_kaddr;
if (dst_page != src_page) {
- src_kaddr = kmap_atomic(src_page, KM_USER1);
+ src_kaddr = page_address(src_page);
} else {
src_kaddr = dst_kaddr;
BUG_ON(areas_overlap(src_off, dst_off, len));
}
memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
- kunmap_atomic(dst_kaddr, KM_USER0);
- if (dst_page != src_page)
- kunmap_atomic(src_kaddr, KM_USER1);
}
void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index a11a92e..21a7ca9 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -120,8 +120,6 @@
struct extent_buffer {
u64 start;
unsigned long len;
- char *map_token;
- char *kaddr;
unsigned long map_start;
unsigned long map_len;
struct page *first_page;
@@ -130,14 +128,26 @@
struct rcu_head rcu_head;
atomic_t refs;
- /* the spinlock is used to protect most operations */
- spinlock_t lock;
+ /* count of read lock holders on the extent buffer */
+ atomic_t write_locks;
+ atomic_t read_locks;
+ atomic_t blocking_writers;
+ atomic_t blocking_readers;
+ atomic_t spinning_readers;
+ atomic_t spinning_writers;
- /*
- * when we keep the lock held while blocking, waiters go onto
- * the wq
+ /* protects write locks */
+ rwlock_t lock;
+
+ /* readers use lock_wq while they wait for the write
+ * lock holders to unlock
*/
- wait_queue_head_t lock_wq;
+ wait_queue_head_t write_lock_wq;
+
+ /* writers use read_lock_wq while they wait for readers
+ * to unlock
+ */
+ wait_queue_head_t read_lock_wq;
};
static inline void extent_set_compress_type(unsigned long *bio_flags,
@@ -279,15 +289,10 @@
int extent_buffer_uptodate(struct extent_io_tree *tree,
struct extent_buffer *eb,
struct extent_state *cached_state);
-int map_extent_buffer(struct extent_buffer *eb, unsigned long offset,
- unsigned long min_len, char **token, char **map,
- unsigned long *map_start,
- unsigned long *map_len, int km);
int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
- unsigned long min_len, char **token, char **map,
+ unsigned long min_len, char **map,
unsigned long *map_start,
- unsigned long *map_len, int km);
-void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
+ unsigned long *map_len);
int extent_range_uptodate(struct extent_io_tree *tree,
u64 start, u64 end);
int extent_clear_unlock_delalloc(struct inode *inode,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 90d4ee5..08bcfa9 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -177,6 +177,15 @@
WARN_ON(bio->bi_vcnt <= 0);
+ /*
+ * the free space stuff is only read when it hasn't been
+ * updated in the current transaction. So, we can safely
+ * read from the commit root and sidestep a nasty deadlock
+ * between reading the free space cache and updating the csum tree.
+ */
+ if (btrfs_is_free_space_inode(root, inode))
+ path->search_commit_root = 1;
+
disk_bytenr = (u64)bio->bi_sector << 9;
if (dio)
offset = logical_offset;
@@ -664,10 +673,6 @@
struct btrfs_sector_sum *sector_sum;
u32 nritems;
u32 ins_size;
- char *eb_map;
- char *eb_token;
- unsigned long map_len;
- unsigned long map_start;
u16 csum_size =
btrfs_super_csum_size(&root->fs_info->super_copy);
@@ -814,30 +819,9 @@
item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
btrfs_item_size_nr(leaf, path->slots[0]));
- eb_token = NULL;
next_sector:
- if (!eb_token ||
- (unsigned long)item + csum_size >= map_start + map_len) {
- int err;
-
- if (eb_token)
- unmap_extent_buffer(leaf, eb_token, KM_USER1);
- eb_token = NULL;
- err = map_private_extent_buffer(leaf, (unsigned long)item,
- csum_size,
- &eb_token, &eb_map,
- &map_start, &map_len, KM_USER1);
- if (err)
- eb_token = NULL;
- }
- if (eb_token) {
- memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)),
- §or_sum->sum, csum_size);
- } else {
- write_extent_buffer(leaf, §or_sum->sum,
- (unsigned long)item, csum_size);
- }
+ write_extent_buffer(leaf, §or_sum->sum, (unsigned long)item, csum_size);
total_bytes += root->sectorsize;
sector_sum++;
@@ -850,10 +834,7 @@
goto next_sector;
}
}
- if (eb_token) {
- unmap_extent_buffer(leaf, eb_token, KM_USER1);
- eb_token = NULL;
- }
+
btrfs_mark_buffer_dirty(path->nodes[0]);
if (total_bytes < sums->len) {
btrfs_release_path(path);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 59cbdb1..a35e51c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1081,7 +1081,8 @@
again:
for (i = 0; i < num_pages; i++) {
- pages[i] = grab_cache_page(inode->i_mapping, index + i);
+ pages[i] = find_or_create_page(inode->i_mapping, index + i,
+ GFP_NOFS);
if (!pages[i]) {
faili = i - 1;
err = -ENOMEM;
@@ -1238,9 +1239,11 @@
* managed to copy.
*/
if (num_pages > dirty_pages) {
- if (copied > 0)
- atomic_inc(
- &BTRFS_I(inode)->outstanding_extents);
+ if (copied > 0) {
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents++;
+ spin_unlock(&BTRFS_I(inode)->lock);
+ }
btrfs_delalloc_release_space(inode,
(num_pages - dirty_pages) <<
PAGE_CACHE_SHIFT);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index bf0d615..6377713 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -98,6 +98,12 @@
return inode;
spin_lock(&block_group->lock);
+ if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) {
+ printk(KERN_INFO "Old style space inode found, converting.\n");
+ BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM;
+ block_group->disk_cache_state = BTRFS_DC_CLEAR;
+ }
+
if (!btrfs_fs_closing(root->fs_info)) {
block_group->inode = igrab(inode);
block_group->iref = 1;
@@ -135,7 +141,7 @@
btrfs_set_inode_gid(leaf, inode_item, 0);
btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
- BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
+ BTRFS_INODE_PREALLOC);
btrfs_set_inode_nlink(leaf, inode_item, 1);
btrfs_set_inode_transid(leaf, inode_item, trans->transid);
btrfs_set_inode_block_group(leaf, inode_item, offset);
@@ -239,17 +245,12 @@
struct btrfs_free_space_header *header;
struct extent_buffer *leaf;
struct page *page;
- u32 *checksums = NULL, *crc;
- char *disk_crcs = NULL;
struct btrfs_key key;
struct list_head bitmaps;
u64 num_entries;
u64 num_bitmaps;
u64 generation;
- u32 cur_crc = ~(u32)0;
pgoff_t index = 0;
- unsigned long first_page_offset;
- int num_checksums;
int ret = 0;
INIT_LIST_HEAD(&bitmaps);
@@ -292,16 +293,6 @@
if (!num_entries)
goto out;
- /* Setup everything for doing checksumming */
- num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
- checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
- if (!checksums)
- goto out;
- first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
- disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
- if (!disk_crcs)
- goto out;
-
ret = readahead_cache(inode);
if (ret)
goto out;
@@ -311,18 +302,12 @@
struct btrfs_free_space *e;
void *addr;
unsigned long offset = 0;
- unsigned long start_offset = 0;
int need_loop = 0;
if (!num_entries && !num_bitmaps)
break;
- if (index == 0) {
- start_offset = first_page_offset;
- offset = start_offset;
- }
-
- page = grab_cache_page(inode->i_mapping, index);
+ page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
if (!page)
goto free_cache;
@@ -342,8 +327,15 @@
if (index == 0) {
u64 *gen;
- memcpy(disk_crcs, addr, first_page_offset);
- gen = addr + (sizeof(u32) * num_checksums);
+ /*
+ * We put a bogus crc in the front of the first page in
+ * case old kernels try to mount a fs with the new
+ * format to make sure they discard the cache.
+ */
+ addr += sizeof(u64);
+ offset += sizeof(u64);
+
+ gen = addr;
if (*gen != BTRFS_I(inode)->generation) {
printk(KERN_ERR "btrfs: space cache generation"
" (%llu) does not match inode (%llu)\n",
@@ -355,24 +347,10 @@
page_cache_release(page);
goto free_cache;
}
- crc = (u32 *)disk_crcs;
+ addr += sizeof(u64);
+ offset += sizeof(u64);
}
- entry = addr + start_offset;
-
- /* First lets check our crc before we do anything fun */
- cur_crc = ~(u32)0;
- cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
- PAGE_CACHE_SIZE - start_offset);
- btrfs_csum_final(cur_crc, (char *)&cur_crc);
- if (cur_crc != *crc) {
- printk(KERN_ERR "btrfs: crc mismatch for page %lu\n",
- index);
- kunmap(page);
- unlock_page(page);
- page_cache_release(page);
- goto free_cache;
- }
- crc++;
+ entry = addr;
while (1) {
if (!num_entries)
@@ -470,8 +448,6 @@
ret = 1;
out:
- kfree(checksums);
- kfree(disk_crcs);
return ret;
free_cache:
__btrfs_remove_free_space_cache(ctl);
@@ -569,8 +545,7 @@
struct btrfs_key key;
u64 start, end, len;
u64 bytes = 0;
- u32 *crc, *checksums;
- unsigned long first_page_offset;
+ u32 crc = ~(u32)0;
int index = 0, num_pages = 0;
int entries = 0;
int bitmaps = 0;
@@ -590,34 +565,13 @@
num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT;
- /* Since the first page has all of our checksums and our generation we
- * need to calculate the offset into the page that we can start writing
- * our entries.
- */
- first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
-
filemap_write_and_wait(inode->i_mapping);
btrfs_wait_ordered_range(inode, inode->i_size &
~(root->sectorsize - 1), (u64)-1);
- /* make sure we don't overflow that first page */
- if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) {
- /* this is really the same as running out of space, where we also return 0 */
- printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n");
- ret = 0;
- goto out_update;
- }
-
- /* We need a checksum per page. */
- crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
- if (!crc)
- return -1;
-
pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
- if (!pages) {
- kfree(crc);
+ if (!pages)
return -1;
- }
/* Get the cluster for this block_group if it exists */
if (block_group && !list_empty(&block_group->cluster_list))
@@ -640,7 +594,7 @@
* know and don't freak out.
*/
while (index < num_pages) {
- page = grab_cache_page(inode->i_mapping, index);
+ page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
if (!page) {
int i;
@@ -648,7 +602,7 @@
unlock_page(pages[i]);
page_cache_release(pages[i]);
}
- goto out_free;
+ goto out;
}
pages[index] = page;
index++;
@@ -668,17 +622,11 @@
/* Write out the extent entries */
do {
struct btrfs_free_space_entry *entry;
- void *addr;
+ void *addr, *orig;
unsigned long offset = 0;
- unsigned long start_offset = 0;
next_page = false;
- if (index == 0) {
- start_offset = first_page_offset;
- offset = start_offset;
- }
-
if (index >= num_pages) {
out_of_space = true;
break;
@@ -686,10 +634,26 @@
page = pages[index];
- addr = kmap(page);
- entry = addr + start_offset;
+ orig = addr = kmap(page);
+ if (index == 0) {
+ u64 *gen;
- memset(addr, 0, PAGE_CACHE_SIZE);
+ /*
+ * We're going to put in a bogus crc for this page to
+ * make sure that old kernels who aren't aware of this
+ * format will be sure to discard the cache.
+ */
+ addr += sizeof(u64);
+ offset += sizeof(u64);
+
+ gen = addr;
+ *gen = trans->transid;
+ addr += sizeof(u64);
+ offset += sizeof(u64);
+ }
+ entry = addr;
+
+ memset(addr, 0, PAGE_CACHE_SIZE - offset);
while (node && !next_page) {
struct btrfs_free_space *e;
@@ -752,13 +716,19 @@
next_page = true;
entry++;
}
- *crc = ~(u32)0;
- *crc = btrfs_csum_data(root, addr + start_offset, *crc,
- PAGE_CACHE_SIZE - start_offset);
- kunmap(page);
- btrfs_csum_final(*crc, (char *)crc);
- crc++;
+ /* Generate bogus crc value */
+ if (index == 0) {
+ u32 *tmp;
+ crc = btrfs_csum_data(root, orig + sizeof(u64), crc,
+ PAGE_CACHE_SIZE - sizeof(u64));
+ btrfs_csum_final(crc, (char *)&crc);
+ crc++;
+ tmp = orig;
+ *tmp = crc;
+ }
+
+ kunmap(page);
bytes += PAGE_CACHE_SIZE;
@@ -779,11 +749,7 @@
addr = kmap(page);
memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
- *crc = ~(u32)0;
- *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
kunmap(page);
- btrfs_csum_final(*crc, (char *)crc);
- crc++;
bytes += PAGE_CACHE_SIZE;
list_del_init(&entry->list);
@@ -796,7 +762,7 @@
i_size_read(inode) - 1, &cached_state,
GFP_NOFS);
ret = 0;
- goto out_free;
+ goto out;
}
/* Zero out the rest of the pages just to make sure */
@@ -811,20 +777,6 @@
index++;
}
- /* Write the checksums and trans id to the first page */
- {
- void *addr;
- u64 *gen;
-
- page = pages[0];
-
- addr = kmap(page);
- memcpy(addr, checksums, sizeof(u32) * num_pages);
- gen = addr + (sizeof(u32) * num_pages);
- *gen = trans->transid;
- kunmap(page);
- }
-
ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
bytes, &cached_state);
btrfs_drop_pages(pages, num_pages);
@@ -833,7 +785,7 @@
if (ret) {
ret = 0;
- goto out_free;
+ goto out;
}
BTRFS_I(inode)->generation = trans->transid;
@@ -850,7 +802,7 @@
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
- goto out_free;
+ goto out;
}
leaf = path->nodes[0];
if (ret > 0) {
@@ -866,7 +818,7 @@
EXTENT_DO_ACCOUNTING, 0, 0, NULL,
GFP_NOFS);
btrfs_release_path(path);
- goto out_free;
+ goto out;
}
}
header = btrfs_item_ptr(leaf, path->slots[0],
@@ -879,11 +831,8 @@
ret = 1;
-out_free:
- kfree(checksums);
+out:
kfree(pages);
-
-out_update:
if (ret != 1) {
invalidate_inode_pages2_range(inode->i_mapping, 0, index);
BTRFS_I(inode)->generation = 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e91b097..13e6255 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -750,15 +750,6 @@
return alloc_hint;
}
-static inline bool is_free_space_inode(struct btrfs_root *root,
- struct inode *inode)
-{
- if (root == root->fs_info->tree_root ||
- BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
- return true;
- return false;
-}
-
/*
* when extent_io.c finds a delayed allocation range in the file,
* the call backs end up in this code. The basic idea is to
@@ -791,7 +782,7 @@
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0;
- BUG_ON(is_free_space_inode(root, inode));
+ BUG_ON(btrfs_is_free_space_inode(root, inode));
trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -1072,7 +1063,7 @@
path = btrfs_alloc_path();
BUG_ON(!path);
- nolock = is_free_space_inode(root, inode);
+ nolock = btrfs_is_free_space_inode(root, inode);
if (nolock)
trans = btrfs_join_transaction_nolock(root);
@@ -1298,7 +1289,9 @@
if (!(orig->state & EXTENT_DELALLOC))
return 0;
- atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents++;
+ spin_unlock(&BTRFS_I(inode)->lock);
return 0;
}
@@ -1316,7 +1309,9 @@
if (!(other->state & EXTENT_DELALLOC))
return 0;
- atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents--;
+ spin_unlock(&BTRFS_I(inode)->lock);
return 0;
}
@@ -1337,12 +1332,15 @@
if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
- bool do_list = !is_free_space_inode(root, inode);
+ bool do_list = !btrfs_is_free_space_inode(root, inode);
- if (*bits & EXTENT_FIRST_DELALLOC)
+ if (*bits & EXTENT_FIRST_DELALLOC) {
*bits &= ~EXTENT_FIRST_DELALLOC;
- else
- atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+ } else {
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents++;
+ spin_unlock(&BTRFS_I(inode)->lock);
+ }
spin_lock(&root->fs_info->delalloc_lock);
BTRFS_I(inode)->delalloc_bytes += len;
@@ -1370,12 +1368,15 @@
if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
- bool do_list = !is_free_space_inode(root, inode);
+ bool do_list = !btrfs_is_free_space_inode(root, inode);
- if (*bits & EXTENT_FIRST_DELALLOC)
+ if (*bits & EXTENT_FIRST_DELALLOC) {
*bits &= ~EXTENT_FIRST_DELALLOC;
- else if (!(*bits & EXTENT_DO_ACCOUNTING))
- atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+ } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents--;
+ spin_unlock(&BTRFS_I(inode)->lock);
+ }
if (*bits & EXTENT_DO_ACCOUNTING)
btrfs_delalloc_release_metadata(inode, len);
@@ -1477,7 +1478,7 @@
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
- if (is_free_space_inode(root, inode))
+ if (btrfs_is_free_space_inode(root, inode))
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
else
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
@@ -1726,7 +1727,7 @@
return 0;
BUG_ON(!ordered_extent);
- nolock = is_free_space_inode(root, inode);
+ nolock = btrfs_is_free_space_inode(root, inode);
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
BUG_ON(!list_empty(&ordered_extent->list));
@@ -2531,13 +2532,6 @@
inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item);
- if (!leaf->map_token)
- map_private_extent_buffer(leaf, (unsigned long)inode_item,
- sizeof(struct btrfs_inode_item),
- &leaf->map_token, &leaf->kaddr,
- &leaf->map_start, &leaf->map_len,
- KM_USER1);
-
inode->i_mode = btrfs_inode_mode(leaf, inode_item);
inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
inode->i_uid = btrfs_inode_uid(leaf, inode_item);
@@ -2575,11 +2569,6 @@
if (!maybe_acls)
cache_no_acl(inode);
- if (leaf->map_token) {
- unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
- leaf->map_token = NULL;
- }
-
btrfs_free_path(path);
switch (inode->i_mode & S_IFMT) {
@@ -2624,13 +2613,6 @@
struct btrfs_inode_item *item,
struct inode *inode)
{
- if (!leaf->map_token)
- map_private_extent_buffer(leaf, (unsigned long)item,
- sizeof(struct btrfs_inode_item),
- &leaf->map_token, &leaf->kaddr,
- &leaf->map_start, &leaf->map_len,
- KM_USER1);
-
btrfs_set_inode_uid(leaf, item, inode->i_uid);
btrfs_set_inode_gid(leaf, item, inode->i_gid);
btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
@@ -2659,11 +2641,6 @@
btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
btrfs_set_inode_block_group(leaf, item, 0);
-
- if (leaf->map_token) {
- unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
- leaf->map_token = NULL;
- }
}
/*
@@ -2684,7 +2661,7 @@
* The data relocation inode should also be directly updated
* without delay
*/
- if (!is_free_space_inode(root, inode)
+ if (!btrfs_is_free_space_inode(root, inode)
&& root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
ret = btrfs_delayed_update_inode(trans, root, inode);
if (!ret)
@@ -3398,7 +3375,7 @@
ret = -ENOMEM;
again:
- page = grab_cache_page(mapping, index);
+ page = find_or_create_page(mapping, index, GFP_NOFS);
if (!page) {
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
goto out;
@@ -3634,7 +3611,7 @@
truncate_inode_pages(&inode->i_data, 0);
if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
- is_free_space_inode(root, inode)))
+ btrfs_is_free_space_inode(root, inode)))
goto no_delete;
if (is_bad_inode(inode)) {
@@ -4271,7 +4248,7 @@
if (BTRFS_I(inode)->dummy_inode)
return 0;
- if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode))
+ if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode))
nolock = true;
if (wbc->sync_mode == WB_SYNC_ALL) {
@@ -4467,7 +4444,7 @@
inode->i_generation = BTRFS_I(inode)->generation;
btrfs_set_inode_space_info(root, inode);
- if (mode & S_IFDIR)
+ if (S_ISDIR(mode))
owner = 0;
else
owner = 1;
@@ -4512,7 +4489,7 @@
btrfs_inherit_iflags(inode, dir);
- if ((mode & S_IFREG)) {
+ if (S_ISREG(mode)) {
if (btrfs_test_opt(root, NODATASUM))
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
if (btrfs_test_opt(root, NODATACOW) ||
@@ -6728,8 +6705,9 @@
ei->index_cnt = (u64)-1;
ei->last_unlink_trans = 0;
- atomic_set(&ei->outstanding_extents, 0);
- atomic_set(&ei->reserved_extents, 0);
+ spin_lock_init(&ei->lock);
+ ei->outstanding_extents = 0;
+ ei->reserved_extents = 0;
ei->ordered_data_close = 0;
ei->orphan_meta_reserved = 0;
@@ -6767,8 +6745,8 @@
WARN_ON(!list_empty(&inode->i_dentry));
WARN_ON(inode->i_data.nrpages);
- WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
- WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents));
+ WARN_ON(BTRFS_I(inode)->outstanding_extents);
+ WARN_ON(BTRFS_I(inode)->reserved_extents);
/*
* This can happen where we create an inode, but somebody else also
@@ -6823,7 +6801,7 @@
struct btrfs_root *root = BTRFS_I(inode)->root;
if (btrfs_root_refs(&root->root_item) == 0 &&
- !is_free_space_inode(root, inode))
+ !btrfs_is_free_space_inode(root, inode))
return 1;
else
return generic_drop_inode(inode);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 6225433..0b980af 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -859,8 +859,8 @@
/* step one, lock all the pages */
for (i = 0; i < num_pages; i++) {
struct page *page;
- page = grab_cache_page(inode->i_mapping,
- start_index + i);
+ page = find_or_create_page(inode->i_mapping,
+ start_index + i, GFP_NOFS);
if (!page)
break;
@@ -930,7 +930,9 @@
GFP_NOFS);
if (i_done != num_pages) {
- atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents++;
+ spin_unlock(&BTRFS_I(inode)->lock);
btrfs_delalloc_release_space(inode,
(num_pages - i_done) << PAGE_CACHE_SHIFT);
}
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 66fa43d..d77b67c 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -24,185 +24,197 @@
#include "extent_io.h"
#include "locking.h"
-static inline void spin_nested(struct extent_buffer *eb)
+void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
+
+/*
+ * if we currently have a spinning reader or writer lock
+ * (indicated by the rw flag) this will bump the count
+ * of blocking holders and drop the spinlock.
+ */
+void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
{
- spin_lock(&eb->lock);
+ if (rw == BTRFS_WRITE_LOCK) {
+ if (atomic_read(&eb->blocking_writers) == 0) {
+ WARN_ON(atomic_read(&eb->spinning_writers) != 1);
+ atomic_dec(&eb->spinning_writers);
+ btrfs_assert_tree_locked(eb);
+ atomic_inc(&eb->blocking_writers);
+ write_unlock(&eb->lock);
+ }
+ } else if (rw == BTRFS_READ_LOCK) {
+ btrfs_assert_tree_read_locked(eb);
+ atomic_inc(&eb->blocking_readers);
+ WARN_ON(atomic_read(&eb->spinning_readers) == 0);
+ atomic_dec(&eb->spinning_readers);
+ read_unlock(&eb->lock);
+ }
+ return;
}
/*
- * Setting a lock to blocking will drop the spinlock and set the
- * flag that forces other procs who want the lock to wait. After
- * this you can safely schedule with the lock held.
+ * if we currently have a blocking lock, take the spinlock
+ * and drop our blocking count
*/
-void btrfs_set_lock_blocking(struct extent_buffer *eb)
+void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
{
- if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
- set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags);
- spin_unlock(&eb->lock);
+ if (rw == BTRFS_WRITE_LOCK_BLOCKING) {
+ BUG_ON(atomic_read(&eb->blocking_writers) != 1);
+ write_lock(&eb->lock);
+ WARN_ON(atomic_read(&eb->spinning_writers));
+ atomic_inc(&eb->spinning_writers);
+ if (atomic_dec_and_test(&eb->blocking_writers))
+ wake_up(&eb->write_lock_wq);
+ } else if (rw == BTRFS_READ_LOCK_BLOCKING) {
+ BUG_ON(atomic_read(&eb->blocking_readers) == 0);
+ read_lock(&eb->lock);
+ atomic_inc(&eb->spinning_readers);
+ if (atomic_dec_and_test(&eb->blocking_readers))
+ wake_up(&eb->read_lock_wq);
}
- /* exit with the spin lock released and the bit set */
+ return;
}
/*
- * clearing the blocking flag will take the spinlock again.
- * After this you can't safely schedule
+ * take a spinning read lock. This will wait for any blocking
+ * writers
*/
-void btrfs_clear_lock_blocking(struct extent_buffer *eb)
+void btrfs_tree_read_lock(struct extent_buffer *eb)
{
- if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
- spin_nested(eb);
- clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags);
- smp_mb__after_clear_bit();
+again:
+ wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
+ read_lock(&eb->lock);
+ if (atomic_read(&eb->blocking_writers)) {
+ read_unlock(&eb->lock);
+ wait_event(eb->write_lock_wq,
+ atomic_read(&eb->blocking_writers) == 0);
+ goto again;
}
- /* exit with the spin lock held */
+ atomic_inc(&eb->read_locks);
+ atomic_inc(&eb->spinning_readers);
}
/*
- * unfortunately, many of the places that currently set a lock to blocking
- * don't end up blocking for very long, and often they don't block
- * at all. For a dbench 50 run, if we don't spin on the blocking bit
- * at all, the context switch rate can jump up to 400,000/sec or more.
- *
- * So, we're still stuck with this crummy spin on the blocking bit,
- * at least until the most common causes of the short blocks
- * can be dealt with.
+ * returns 1 if we get the read lock and 0 if we don't
+ * this won't wait for blocking writers
*/
-static int btrfs_spin_on_block(struct extent_buffer *eb)
+int btrfs_try_tree_read_lock(struct extent_buffer *eb)
{
- int i;
+ if (atomic_read(&eb->blocking_writers))
+ return 0;
- for (i = 0; i < 512; i++) {
- if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
- return 1;
- if (need_resched())
- break;
- cpu_relax();
+ read_lock(&eb->lock);
+ if (atomic_read(&eb->blocking_writers)) {
+ read_unlock(&eb->lock);
+ return 0;
}
- return 0;
-}
-
-/*
- * This is somewhat different from trylock. It will take the
- * spinlock but if it finds the lock is set to blocking, it will
- * return without the lock held.
- *
- * returns 1 if it was able to take the lock and zero otherwise
- *
- * After this call, scheduling is not safe without first calling
- * btrfs_set_lock_blocking()
- */
-int btrfs_try_spin_lock(struct extent_buffer *eb)
-{
- int i;
-
- if (btrfs_spin_on_block(eb)) {
- spin_nested(eb);
- if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
- return 1;
- spin_unlock(&eb->lock);
- }
- /* spin for a bit on the BLOCKING flag */
- for (i = 0; i < 2; i++) {
- cpu_relax();
- if (!btrfs_spin_on_block(eb))
- break;
-
- spin_nested(eb);
- if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
- return 1;
- spin_unlock(&eb->lock);
- }
- return 0;
-}
-
-/*
- * the autoremove wake function will return 0 if it tried to wake up
- * a process that was already awake, which means that process won't
- * count as an exclusive wakeup. The waitq code will continue waking
- * procs until it finds one that was actually sleeping.
- *
- * For btrfs, this isn't quite what we want. We want a single proc
- * to be notified that the lock is ready for taking. If that proc
- * already happen to be awake, great, it will loop around and try for
- * the lock.
- *
- * So, btrfs_wake_function always returns 1, even when the proc that we
- * tried to wake up was already awake.
- */
-static int btrfs_wake_function(wait_queue_t *wait, unsigned mode,
- int sync, void *key)
-{
- autoremove_wake_function(wait, mode, sync, key);
+ atomic_inc(&eb->read_locks);
+ atomic_inc(&eb->spinning_readers);
return 1;
}
/*
- * returns with the extent buffer spinlocked.
- *
- * This will spin and/or wait as required to take the lock, and then
- * return with the spinlock held.
- *
- * After this call, scheduling is not safe without first calling
- * btrfs_set_lock_blocking()
+ * returns 1 if we get the read lock and 0 if we don't
+ * this won't wait for blocking writers or readers
+ */
+int btrfs_try_tree_write_lock(struct extent_buffer *eb)
+{
+ if (atomic_read(&eb->blocking_writers) ||
+ atomic_read(&eb->blocking_readers))
+ return 0;
+ write_lock(&eb->lock);
+ if (atomic_read(&eb->blocking_writers) ||
+ atomic_read(&eb->blocking_readers)) {
+ write_unlock(&eb->lock);
+ return 0;
+ }
+ atomic_inc(&eb->write_locks);
+ atomic_inc(&eb->spinning_writers);
+ return 1;
+}
+
+/*
+ * drop a spinning read lock
+ */
+void btrfs_tree_read_unlock(struct extent_buffer *eb)
+{
+ btrfs_assert_tree_read_locked(eb);
+ WARN_ON(atomic_read(&eb->spinning_readers) == 0);
+ atomic_dec(&eb->spinning_readers);
+ atomic_dec(&eb->read_locks);
+ read_unlock(&eb->lock);
+}
+
+/*
+ * drop a blocking read lock
+ */
+void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
+{
+ btrfs_assert_tree_read_locked(eb);
+ WARN_ON(atomic_read(&eb->blocking_readers) == 0);
+ if (atomic_dec_and_test(&eb->blocking_readers))
+ wake_up(&eb->read_lock_wq);
+ atomic_dec(&eb->read_locks);
+}
+
+/*
+ * take a spinning write lock. This will wait for both
+ * blocking readers or writers
*/
int btrfs_tree_lock(struct extent_buffer *eb)
{
- DEFINE_WAIT(wait);
- wait.func = btrfs_wake_function;
-
- if (!btrfs_spin_on_block(eb))
- goto sleep;
-
- while(1) {
- spin_nested(eb);
-
- /* nobody is blocking, exit with the spinlock held */
- if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
- return 0;
-
- /*
- * we have the spinlock, but the real owner is blocking.
- * wait for them
- */
- spin_unlock(&eb->lock);
-
- /*
- * spin for a bit, and if the blocking flag goes away,
- * loop around
- */
- cpu_relax();
- if (btrfs_spin_on_block(eb))
- continue;
-sleep:
- prepare_to_wait_exclusive(&eb->lock_wq, &wait,
- TASK_UNINTERRUPTIBLE);
-
- if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
- schedule();
-
- finish_wait(&eb->lock_wq, &wait);
+again:
+ wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
+ wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
+ write_lock(&eb->lock);
+ if (atomic_read(&eb->blocking_readers)) {
+ write_unlock(&eb->lock);
+ wait_event(eb->read_lock_wq,
+ atomic_read(&eb->blocking_readers) == 0);
+ goto again;
}
+ if (atomic_read(&eb->blocking_writers)) {
+ write_unlock(&eb->lock);
+ wait_event(eb->write_lock_wq,
+ atomic_read(&eb->blocking_writers) == 0);
+ goto again;
+ }
+ WARN_ON(atomic_read(&eb->spinning_writers));
+ atomic_inc(&eb->spinning_writers);
+ atomic_inc(&eb->write_locks);
return 0;
}
+/*
+ * drop a spinning or a blocking write lock.
+ */
int btrfs_tree_unlock(struct extent_buffer *eb)
{
- /*
- * if we were a blocking owner, we don't have the spinlock held
- * just clear the bit and look for waiters
- */
- if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
- smp_mb__after_clear_bit();
- else
- spin_unlock(&eb->lock);
+ int blockers = atomic_read(&eb->blocking_writers);
- if (waitqueue_active(&eb->lock_wq))
- wake_up(&eb->lock_wq);
+ BUG_ON(blockers > 1);
+
+ btrfs_assert_tree_locked(eb);
+ atomic_dec(&eb->write_locks);
+
+ if (blockers) {
+ WARN_ON(atomic_read(&eb->spinning_writers));
+ atomic_dec(&eb->blocking_writers);
+ smp_wmb();
+ wake_up(&eb->write_lock_wq);
+ } else {
+ WARN_ON(atomic_read(&eb->spinning_writers) != 1);
+ atomic_dec(&eb->spinning_writers);
+ write_unlock(&eb->lock);
+ }
return 0;
}
void btrfs_assert_tree_locked(struct extent_buffer *eb)
{
- if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
- assert_spin_locked(&eb->lock);
+ BUG_ON(!atomic_read(&eb->write_locks));
+}
+
+void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
+{
+ BUG_ON(!atomic_read(&eb->read_locks));
}
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index 5c33a56..17247dd 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -19,11 +19,43 @@
#ifndef __BTRFS_LOCKING_
#define __BTRFS_LOCKING_
+#define BTRFS_WRITE_LOCK 1
+#define BTRFS_READ_LOCK 2
+#define BTRFS_WRITE_LOCK_BLOCKING 3
+#define BTRFS_READ_LOCK_BLOCKING 4
+
int btrfs_tree_lock(struct extent_buffer *eb);
int btrfs_tree_unlock(struct extent_buffer *eb);
int btrfs_try_spin_lock(struct extent_buffer *eb);
-void btrfs_set_lock_blocking(struct extent_buffer *eb);
-void btrfs_clear_lock_blocking(struct extent_buffer *eb);
+void btrfs_tree_read_lock(struct extent_buffer *eb);
+void btrfs_tree_read_unlock(struct extent_buffer *eb);
+void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb);
+void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw);
+void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw);
void btrfs_assert_tree_locked(struct extent_buffer *eb);
+int btrfs_try_tree_read_lock(struct extent_buffer *eb);
+int btrfs_try_tree_write_lock(struct extent_buffer *eb);
+
+static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
+{
+ if (rw == BTRFS_WRITE_LOCK || rw == BTRFS_WRITE_LOCK_BLOCKING)
+ btrfs_tree_unlock(eb);
+ else if (rw == BTRFS_READ_LOCK_BLOCKING)
+ btrfs_tree_read_unlock_blocking(eb);
+ else if (rw == BTRFS_READ_LOCK)
+ btrfs_tree_read_unlock(eb);
+ else
+ BUG();
+}
+
+static inline void btrfs_set_lock_blocking(struct extent_buffer *eb)
+{
+ btrfs_set_lock_blocking_rw(eb, BTRFS_WRITE_LOCK);
+}
+
+static inline void btrfs_clear_lock_blocking(struct extent_buffer *eb)
+{
+ btrfs_clear_lock_blocking_rw(eb, BTRFS_WRITE_LOCK_BLOCKING);
+}
#endif
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 5e0a3dc..59bb176 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2955,7 +2955,8 @@
page_cache_sync_readahead(inode->i_mapping,
ra, NULL, index,
last_index + 1 - index);
- page = grab_cache_page(inode->i_mapping, index);
+ page = find_or_create_page(inode->i_mapping, index,
+ GFP_NOFS);
if (!page) {
btrfs_delalloc_release_metadata(inode,
PAGE_CACHE_SIZE);
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index c0f7eca..bc1f6ad 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -50,36 +50,22 @@
unsigned long part_offset = (unsigned long)s; \
unsigned long offset = part_offset + offsetof(type, member); \
type *p; \
- /* ugly, but we want the fast path here */ \
- if (eb->map_token && offset >= eb->map_start && \
- offset + sizeof(((type *)0)->member) <= eb->map_start + \
- eb->map_len) { \
- p = (type *)(eb->kaddr + part_offset - eb->map_start); \
- return le##bits##_to_cpu(p->member); \
- } \
- { \
- int err; \
- char *map_token; \
- char *kaddr; \
- int unmap_on_exit = (eb->map_token == NULL); \
- unsigned long map_start; \
- unsigned long map_len; \
- u##bits res; \
- err = map_extent_buffer(eb, offset, \
- sizeof(((type *)0)->member), \
- &map_token, &kaddr, \
- &map_start, &map_len, KM_USER1); \
- if (err) { \
- __le##bits leres; \
- read_eb_member(eb, s, type, member, &leres); \
- return le##bits##_to_cpu(leres); \
- } \
- p = (type *)(kaddr + part_offset - map_start); \
- res = le##bits##_to_cpu(p->member); \
- if (unmap_on_exit) \
- unmap_extent_buffer(eb, map_token, KM_USER1); \
- return res; \
- } \
+ int err; \
+ char *kaddr; \
+ unsigned long map_start; \
+ unsigned long map_len; \
+ u##bits res; \
+ err = map_private_extent_buffer(eb, offset, \
+ sizeof(((type *)0)->member), \
+ &kaddr, &map_start, &map_len); \
+ if (err) { \
+ __le##bits leres; \
+ read_eb_member(eb, s, type, member, &leres); \
+ return le##bits##_to_cpu(leres); \
+ } \
+ p = (type *)(kaddr + part_offset - map_start); \
+ res = le##bits##_to_cpu(p->member); \
+ return res; \
} \
void btrfs_set_##name(struct extent_buffer *eb, \
type *s, u##bits val) \
@@ -87,36 +73,21 @@
unsigned long part_offset = (unsigned long)s; \
unsigned long offset = part_offset + offsetof(type, member); \
type *p; \
- /* ugly, but we want the fast path here */ \
- if (eb->map_token && offset >= eb->map_start && \
- offset + sizeof(((type *)0)->member) <= eb->map_start + \
- eb->map_len) { \
- p = (type *)(eb->kaddr + part_offset - eb->map_start); \
- p->member = cpu_to_le##bits(val); \
- return; \
- } \
- { \
- int err; \
- char *map_token; \
- char *kaddr; \
- int unmap_on_exit = (eb->map_token == NULL); \
- unsigned long map_start; \
- unsigned long map_len; \
- err = map_extent_buffer(eb, offset, \
- sizeof(((type *)0)->member), \
- &map_token, &kaddr, \
- &map_start, &map_len, KM_USER1); \
- if (err) { \
- __le##bits val2; \
- val2 = cpu_to_le##bits(val); \
- write_eb_member(eb, s, type, member, &val2); \
- return; \
- } \
- p = (type *)(kaddr + part_offset - map_start); \
- p->member = cpu_to_le##bits(val); \
- if (unmap_on_exit) \
- unmap_extent_buffer(eb, map_token, KM_USER1); \
- } \
+ int err; \
+ char *kaddr; \
+ unsigned long map_start; \
+ unsigned long map_len; \
+ err = map_private_extent_buffer(eb, offset, \
+ sizeof(((type *)0)->member), \
+ &kaddr, &map_start, &map_len); \
+ if (err) { \
+ __le##bits val2; \
+ val2 = cpu_to_le##bits(val); \
+ write_eb_member(eb, s, type, member, &val2); \
+ return; \
+ } \
+ p = (type *)(kaddr + part_offset - map_start); \
+ p->member = cpu_to_le##bits(val); \
}
#include "ctree.h"
@@ -125,15 +96,6 @@
struct btrfs_disk_key *disk_key, int nr)
{
unsigned long ptr = btrfs_node_key_ptr_offset(nr);
- if (eb->map_token && ptr >= eb->map_start &&
- ptr + sizeof(*disk_key) <= eb->map_start + eb->map_len) {
- memcpy(disk_key, eb->kaddr + ptr - eb->map_start,
- sizeof(*disk_key));
- return;
- } else if (eb->map_token) {
- unmap_extent_buffer(eb, eb->map_token, KM_USER1);
- eb->map_token = NULL;
- }
read_eb_member(eb, (struct btrfs_key_ptr *)ptr,
struct btrfs_key_ptr, key, disk_key);
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 51dcec8..eb55863 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -260,7 +260,7 @@
{
struct btrfs_trans_handle *h;
struct btrfs_transaction *cur_trans;
- int retries = 0;
+ u64 num_bytes = 0;
int ret;
if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
@@ -274,6 +274,19 @@
h->block_rsv = NULL;
goto got_it;
}
+
+ /*
+ * Do the reservation before we join the transaction so we can do all
+ * the appropriate flushing if need be.
+ */
+ if (num_items > 0 && root != root->fs_info->chunk_root) {
+ num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
+ ret = btrfs_block_rsv_add(NULL, root,
+ &root->fs_info->trans_block_rsv,
+ num_bytes);
+ if (ret)
+ return ERR_PTR(ret);
+ }
again:
h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
if (!h)
@@ -310,24 +323,9 @@
goto again;
}
- if (num_items > 0) {
- ret = btrfs_trans_reserve_metadata(h, root, num_items);
- if (ret == -EAGAIN && !retries) {
- retries++;
- btrfs_commit_transaction(h, root);
- goto again;
- } else if (ret == -EAGAIN) {
- /*
- * We have already retried and got EAGAIN, so really we
- * don't have space, so set ret to -ENOSPC.
- */
- ret = -ENOSPC;
- }
-
- if (ret < 0) {
- btrfs_end_transaction(h, root);
- return ERR_PTR(ret);
- }
+ if (num_bytes) {
+ h->block_rsv = &root->fs_info->trans_block_rsv;
+ h->bytes_reserved = num_bytes;
}
got_it:
@@ -499,10 +497,17 @@
}
if (lock && cur_trans->blocked && !cur_trans->in_commit) {
- if (throttle)
+ if (throttle) {
+ /*
+ * We may race with somebody else here so end up having
+ * to call end_transaction on ourselves again, so inc
+ * our use_count.
+ */
+ trans->use_count++;
return btrfs_commit_transaction(trans, root);
- else
+ } else {
wake_up_process(info->transaction_kthread);
+ }
}
WARN_ON(cur_trans != info->running_transaction);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 4ce8a9f..ac278dd 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1730,8 +1730,8 @@
btrfs_read_buffer(next, ptr_gen);
btrfs_tree_lock(next);
- clean_tree_block(trans, root, next);
btrfs_set_lock_blocking(next);
+ clean_tree_block(trans, root, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
@@ -1796,8 +1796,8 @@
next = path->nodes[*level];
btrfs_tree_lock(next);
- clean_tree_block(trans, root, next);
btrfs_set_lock_blocking(next);
+ clean_tree_block(trans, root, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
@@ -1864,8 +1864,8 @@
next = path->nodes[orig_level];
btrfs_tree_lock(next);
- clean_tree_block(trans, log, next);
btrfs_set_lock_blocking(next);
+ clean_tree_block(trans, log, next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 19450bc..b89e372 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3595,7 +3595,7 @@
if (!sb)
return -ENOMEM;
btrfs_set_buffer_uptodate(sb);
- btrfs_set_buffer_lockdep_class(sb, 0);
+ btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
array_size = btrfs_super_sys_array_size(super_copy);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 5366fe4..d733b9c 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -102,43 +102,57 @@
if (!path)
return -ENOMEM;
- /* first lets see if we already have this xattr */
- di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
- strlen(name), -1);
- if (IS_ERR(di)) {
- ret = PTR_ERR(di);
- goto out;
- }
-
- /* ok we already have this xattr, lets remove it */
- if (di) {
- /* if we want create only exit */
- if (flags & XATTR_CREATE) {
- ret = -EEXIST;
+ if (flags & XATTR_REPLACE) {
+ di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
+ name_len, -1);
+ if (IS_ERR(di)) {
+ ret = PTR_ERR(di);
goto out;
- }
-
- ret = btrfs_delete_one_dir_name(trans, root, path, di);
- BUG_ON(ret);
- btrfs_release_path(path);
-
- /* if we don't have a value then we are removing the xattr */
- if (!value)
- goto out;
- } else {
- btrfs_release_path(path);
-
- if (flags & XATTR_REPLACE) {
- /* we couldn't find the attr to replace */
+ } else if (!di) {
ret = -ENODATA;
goto out;
}
+ ret = btrfs_delete_one_dir_name(trans, root, path, di);
+ if (ret)
+ goto out;
+ btrfs_release_path(path);
}
- /* ok we have to create a completely new xattr */
+again:
ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
name, name_len, value, size);
- BUG_ON(ret);
+ if (ret == -EEXIST) {
+ if (flags & XATTR_CREATE)
+ goto out;
+ /*
+ * We can't use the path we already have since we won't have the
+ * proper locking for a delete, so release the path and
+ * re-lookup to delete the thing.
+ */
+ btrfs_release_path(path);
+ di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
+ name, name_len, -1);
+ if (IS_ERR(di)) {
+ ret = PTR_ERR(di);
+ goto out;
+ } else if (!di) {
+ /* Shouldn't happen but just in case... */
+ btrfs_release_path(path);
+ goto again;
+ }
+
+ ret = btrfs_delete_one_dir_name(trans, root, path, di);
+ if (ret)
+ goto out;
+
+ /*
+ * We have a value to set, so go back and try to insert it now.
+ */
+ if (value) {
+ btrfs_release_path(path);
+ goto again;
+ }
+ }
out:
btrfs_free_path(path);
return ret;
diff --git a/fs/dcache.c b/fs/dcache.c
index be18598..b05aac3 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2138,8 +2138,9 @@
* @target: new dentry
*
* Update the dcache to reflect the move of a file name. Negative
- * dcache entries should not be moved in this way. Caller hold
- * rename_lock.
+ * dcache entries should not be moved in this way. Caller must hold
+ * rename_lock, the i_mutex of the source and target directories,
+ * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
*/
static void __d_move(struct dentry * dentry, struct dentry * target)
{
@@ -2202,7 +2203,8 @@
* @target: new dentry
*
* Update the dcache to reflect the move of a file name. Negative
- * dcache entries should not be moved in this way.
+ * dcache entries should not be moved in this way. See the locking
+ * requirements for __d_move.
*/
void d_move(struct dentry *dentry, struct dentry *target)
{
@@ -2320,7 +2322,8 @@
* @inode: inode to bind to the dentry, to which aliases may be attached
*
* Introduces an dentry into the tree, substituting an extant disconnected
- * root directory alias in its place if there is one
+ * root directory alias in its place if there is one. Caller must hold the
+ * i_mutex of the parent directory.
*/
struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
{
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 43c7c43..b36c557 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -29,6 +29,7 @@
#define ECRYPTFS_KERNEL_H
#include <keys/user-type.h>
+#include <keys/encrypted-type.h>
#include <linux/fs.h>
#include <linux/fs_stack.h>
#include <linux/namei.h>
@@ -36,125 +37,18 @@
#include <linux/hash.h>
#include <linux/nsproxy.h>
#include <linux/backing-dev.h>
+#include <linux/ecryptfs.h>
-/* Version verification for shared data structures w/ userspace */
-#define ECRYPTFS_VERSION_MAJOR 0x00
-#define ECRYPTFS_VERSION_MINOR 0x04
-#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x03
-/* These flags indicate which features are supported by the kernel
- * module; userspace tools such as the mount helper read
- * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine
- * how to behave. */
-#define ECRYPTFS_VERSIONING_PASSPHRASE 0x00000001
-#define ECRYPTFS_VERSIONING_PUBKEY 0x00000002
-#define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004
-#define ECRYPTFS_VERSIONING_POLICY 0x00000008
-#define ECRYPTFS_VERSIONING_XATTR 0x00000010
-#define ECRYPTFS_VERSIONING_MULTKEY 0x00000020
-#define ECRYPTFS_VERSIONING_DEVMISC 0x00000040
-#define ECRYPTFS_VERSIONING_HMAC 0x00000080
-#define ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION 0x00000100
-#define ECRYPTFS_VERSIONING_GCM 0x00000200
-#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
- | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \
- | ECRYPTFS_VERSIONING_PUBKEY \
- | ECRYPTFS_VERSIONING_XATTR \
- | ECRYPTFS_VERSIONING_MULTKEY \
- | ECRYPTFS_VERSIONING_DEVMISC \
- | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION)
-#define ECRYPTFS_MAX_PASSWORD_LENGTH 64
-#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH
-#define ECRYPTFS_SALT_SIZE 8
-#define ECRYPTFS_SALT_SIZE_HEX (ECRYPTFS_SALT_SIZE*2)
-/* The original signature size is only for what is stored on disk; all
- * in-memory representations are expanded hex, so it better adapted to
- * be passed around or referenced on the command line */
-#define ECRYPTFS_SIG_SIZE 8
-#define ECRYPTFS_SIG_SIZE_HEX (ECRYPTFS_SIG_SIZE*2)
-#define ECRYPTFS_PASSWORD_SIG_SIZE ECRYPTFS_SIG_SIZE_HEX
-#define ECRYPTFS_MAX_KEY_BYTES 64
-#define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512
#define ECRYPTFS_DEFAULT_IV_BYTES 16
-#define ECRYPTFS_FILE_VERSION 0x03
#define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096
#define ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE 8192
#define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32
#define ECRYPTFS_DEFAULT_SEND_TIMEOUT HZ
#define ECRYPTFS_MAX_MSG_CTX_TTL (HZ*3)
-#define ECRYPTFS_MAX_PKI_NAME_BYTES 16
#define ECRYPTFS_DEFAULT_NUM_USERS 4
#define ECRYPTFS_MAX_NUM_USERS 32768
#define ECRYPTFS_XATTR_NAME "user.ecryptfs"
-#define RFC2440_CIPHER_DES3_EDE 0x02
-#define RFC2440_CIPHER_CAST_5 0x03
-#define RFC2440_CIPHER_BLOWFISH 0x04
-#define RFC2440_CIPHER_AES_128 0x07
-#define RFC2440_CIPHER_AES_192 0x08
-#define RFC2440_CIPHER_AES_256 0x09
-#define RFC2440_CIPHER_TWOFISH 0x0a
-#define RFC2440_CIPHER_CAST_6 0x0b
-
-#define RFC2440_CIPHER_RSA 0x01
-
-/**
- * For convenience, we may need to pass around the encrypted session
- * key between kernel and userspace because the authentication token
- * may not be extractable. For example, the TPM may not release the
- * private key, instead requiring the encrypted data and returning the
- * decrypted data.
- */
-struct ecryptfs_session_key {
-#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT 0x00000001
-#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT 0x00000002
-#define ECRYPTFS_CONTAINS_DECRYPTED_KEY 0x00000004
-#define ECRYPTFS_CONTAINS_ENCRYPTED_KEY 0x00000008
- u32 flags;
- u32 encrypted_key_size;
- u32 decrypted_key_size;
- u8 encrypted_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES];
- u8 decrypted_key[ECRYPTFS_MAX_KEY_BYTES];
-};
-
-struct ecryptfs_password {
- u32 password_bytes;
- s32 hash_algo;
- u32 hash_iterations;
- u32 session_key_encryption_key_bytes;
-#define ECRYPTFS_PERSISTENT_PASSWORD 0x01
-#define ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET 0x02
- u32 flags;
- /* Iterated-hash concatenation of salt and passphrase */
- u8 session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES];
- u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1];
- /* Always in expanded hex */
- u8 salt[ECRYPTFS_SALT_SIZE];
-};
-
-enum ecryptfs_token_types {ECRYPTFS_PASSWORD, ECRYPTFS_PRIVATE_KEY};
-
-struct ecryptfs_private_key {
- u32 key_size;
- u32 data_len;
- u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1];
- char pki_type[ECRYPTFS_MAX_PKI_NAME_BYTES + 1];
- u8 data[];
-};
-
-/* May be a password or a private key */
-struct ecryptfs_auth_tok {
- u16 version; /* 8-bit major and 8-bit minor */
- u16 token_type;
-#define ECRYPTFS_ENCRYPT_ONLY 0x00000001
- u32 flags;
- struct ecryptfs_session_key session_key;
- u8 reserved[32];
- union {
- struct ecryptfs_password password;
- struct ecryptfs_private_key private_key;
- } token;
-} __attribute__ ((packed));
-
void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok);
extern void ecryptfs_to_hex(char *dst, char *src, size_t src_size);
extern void ecryptfs_from_hex(char *dst, char *src, int dst_size);
@@ -185,11 +79,47 @@
} param;
};
+#if defined(CONFIG_ENCRYPTED_KEYS) || defined(CONFIG_ENCRYPTED_KEYS_MODULE)
+static inline struct ecryptfs_auth_tok *
+ecryptfs_get_encrypted_key_payload_data(struct key *key)
+{
+ if (key->type == &key_type_encrypted)
+ return (struct ecryptfs_auth_tok *)
+ (&((struct encrypted_key_payload *)key->payload.data)->payload_data);
+ else
+ return NULL;
+}
+
+static inline struct key *ecryptfs_get_encrypted_key(char *sig)
+{
+ return request_key(&key_type_encrypted, sig, NULL);
+}
+
+#else
+static inline struct ecryptfs_auth_tok *
+ecryptfs_get_encrypted_key_payload_data(struct key *key)
+{
+ return NULL;
+}
+
+static inline struct key *ecryptfs_get_encrypted_key(char *sig)
+{
+ return ERR_PTR(-ENOKEY);
+}
+
+#endif /* CONFIG_ENCRYPTED_KEYS */
+
static inline struct ecryptfs_auth_tok *
ecryptfs_get_key_payload_data(struct key *key)
{
- return (struct ecryptfs_auth_tok *)
- (((struct user_key_payload*)key->payload.data)->data);
+ struct ecryptfs_auth_tok *auth_tok;
+
+ auth_tok = ecryptfs_get_encrypted_key_payload_data(key);
+ if (!auth_tok)
+ return (struct ecryptfs_auth_tok *)
+ (((struct user_key_payload *)key->payload.data)->data);
+ else
+ return auth_tok;
}
#define ECRYPTFS_MAX_KEYSET_SIZE 1024
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index fa8049e..c472533 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1635,11 +1635,14 @@
(*auth_tok_key) = request_key(&key_type_user, sig, NULL);
if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
- printk(KERN_ERR "Could not find key with description: [%s]\n",
- sig);
- rc = process_request_key_err(PTR_ERR(*auth_tok_key));
- (*auth_tok_key) = NULL;
- goto out;
+ (*auth_tok_key) = ecryptfs_get_encrypted_key(sig);
+ if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
+ printk(KERN_ERR "Could not find key with description: [%s]\n",
+ sig);
+ rc = process_request_key_err(PTR_ERR(*auth_tok_key));
+ (*auth_tok_key) = NULL;
+ goto out;
+ }
}
down_write(&(*auth_tok_key)->sem);
rc = ecryptfs_verify_auth_tok_from_key(*auth_tok_key, auth_tok);
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index 5c0a6a4..503bfb0 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -61,7 +61,6 @@
#else
#include <linux/sched.h>
#define ext2_get_acl NULL
-#define ext2_get_acl NULL
#define ext2_set_acl NULL
static inline int
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 516516e..3bc073a 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1018,13 +1018,13 @@
fsname++;
if (lm->lm_mount == NULL) {
fs_info(sdp, "Now mounting FS...\n");
- complete(&sdp->sd_locking_init);
+ complete_all(&sdp->sd_locking_init);
return 0;
}
ret = lm->lm_mount(sdp, fsname);
if (ret == 0)
fs_info(sdp, "Joined cluster. Now mounting FS...\n");
- complete(&sdp->sd_locking_init);
+ complete_all(&sdp->sd_locking_init);
return ret;
}
diff --git a/fs/inode.c b/fs/inode.c
index a48fa53..d0c72ff 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -361,9 +361,11 @@
static inline void inode_sb_list_del(struct inode *inode)
{
- spin_lock(&inode_sb_list_lock);
- list_del_init(&inode->i_sb_list);
- spin_unlock(&inode_sb_list_lock);
+ if (!list_empty(&inode->i_sb_list)) {
+ spin_lock(&inode_sb_list_lock);
+ list_del_init(&inode->i_sb_list);
+ spin_unlock(&inode_sb_list_lock);
+ }
}
static unsigned long hash(struct super_block *sb, unsigned long hashval)
@@ -796,6 +798,29 @@
EXPORT_SYMBOL(get_next_ino);
/**
+ * new_inode_pseudo - obtain an inode
+ * @sb: superblock
+ *
+ * Allocates a new inode for given superblock.
+ * Inode wont be chained in superblock s_inodes list
+ * This means :
+ * - fs can't be unmount
+ * - quotas, fsnotify, writeback can't work
+ */
+struct inode *new_inode_pseudo(struct super_block *sb)
+{
+ struct inode *inode = alloc_inode(sb);
+
+ if (inode) {
+ spin_lock(&inode->i_lock);
+ inode->i_state = 0;
+ spin_unlock(&inode->i_lock);
+ INIT_LIST_HEAD(&inode->i_sb_list);
+ }
+ return inode;
+}
+
+/**
* new_inode - obtain an inode
* @sb: superblock
*
@@ -813,13 +838,9 @@
spin_lock_prefetch(&inode_sb_list_lock);
- inode = alloc_inode(sb);
- if (inode) {
- spin_lock(&inode->i_lock);
- inode->i_state = 0;
- spin_unlock(&inode->i_lock);
+ inode = new_inode_pseudo(sb);
+ if (inode)
inode_sb_list_add(inode);
- }
return inode;
}
EXPORT_SYMBOL(new_inode);
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index eeead33..b81b35d 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -80,7 +80,7 @@
ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
if (ret) {
jffs2_free_raw_inode(ri);
- if (S_ISLNK(inode->i_mode & S_IFMT))
+ if (S_ISLNK(inode->i_mode))
kfree(mdata);
return ret;
}
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 4496872..9cbd11a 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -3161,7 +3161,7 @@
{
int rc;
int dbitno, word, rembits, nb, nwords, wbitno, agno;
- s8 oldroot, *leaf;
+ s8 oldroot;
struct dmaptree *tp = (struct dmaptree *) & dp->tree;
/* save the current value of the root (i.e. maximum free string)
@@ -3169,9 +3169,6 @@
*/
oldroot = tp->stree[ROOT];
- /* pick up a pointer to the leaves of the dmap tree */
- leaf = tp->stree + LEAFIND;
-
/* determine the bit number and word within the dmap of the
* starting block.
*/
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index f6cc0c0..af96060 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1143,7 +1143,6 @@
struct jfs_log *log;
struct tblock *tblk;
struct lrd *lrd;
- int lsn;
struct inode *ip;
struct jfs_inode_info *jfs_ip;
int k, n;
@@ -1310,7 +1309,7 @@
*/
lrd->type = cpu_to_le16(LOG_COMMIT);
lrd->length = 0;
- lsn = lmLog(log, tblk, lrd, NULL);
+ lmLog(log, tblk, lrd, NULL);
lmGroupCommit(log, tblk);
@@ -2935,7 +2934,6 @@
{
struct inode *ip;
struct jfs_inode_info *jfs_ip;
- int rc;
tid_t tid;
do {
@@ -2961,7 +2959,7 @@
*/
TXN_UNLOCK();
tid = txBegin(ip->i_sb, COMMIT_INODE);
- rc = txCommit(tid, 1, &ip, 0);
+ txCommit(tid, 1, &ip, 0);
txEnd(tid);
mutex_unlock(&jfs_ip->commit_mutex);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 29b1f1a..e17545e 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -893,7 +893,7 @@
unchar *i_fastsymlink;
s64 xlen = 0;
int bmask = 0, xsize;
- s64 extent = 0, xaddr;
+ s64 xaddr;
struct metapage *mp;
struct super_block *sb;
struct tblock *tblk;
@@ -993,7 +993,6 @@
txAbort(tid, 0);
goto out3;
}
- extent = xaddr;
ip->i_size = ssize - 1;
while (ssize) {
/* This is kind of silly since PATH_MAX == 4K */
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index e374050..8392cb8 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -302,7 +302,8 @@
/* We appear to be out of the grace period */
wake_up_all(&host->h_gracewait);
}
- dprintk("lockd: server returns status %d\n", resp->status);
+ dprintk("lockd: server returns status %d\n",
+ ntohl(resp->status));
return 0; /* Okay, call complete */
}
@@ -690,7 +691,8 @@
goto out;
if (resp->status != nlm_lck_denied_nolocks)
- printk("lockd: unexpected unlock status: %d\n", resp->status);
+ printk("lockd: unexpected unlock status: %d\n",
+ ntohl(resp->status));
/* What to do now? I'm out of my depth... */
status = -ENOLCK;
out:
@@ -843,6 +845,7 @@
return -ENOLCK;
#endif
}
- printk(KERN_NOTICE "lockd: unexpected server status %d\n", status);
+ printk(KERN_NOTICE "lockd: unexpected server status %d\n",
+ ntohl(status));
return -ENOLCK;
}
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 8151554..2cde5d9 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -77,6 +77,7 @@
config NFS_V4_1
bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
depends on NFS_FS && NFS_V4 && EXPERIMENTAL
+ select SUNRPC_BACKCHANNEL
select PNFS_FILE_LAYOUT
help
This option enables support for minor version 1 of the NFSv4 protocol
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index d4d1954..74780f9 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -111,6 +111,7 @@
static u32 initiate_file_draining(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
+ struct nfs_server *server;
struct pnfs_layout_hdr *lo;
struct inode *ino;
bool found = false;
@@ -118,21 +119,28 @@
LIST_HEAD(free_me_list);
spin_lock(&clp->cl_lock);
- list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
- if (nfs_compare_fh(&args->cbl_fh,
- &NFS_I(lo->plh_inode)->fh))
- continue;
- ino = igrab(lo->plh_inode);
- if (!ino)
- continue;
- found = true;
- /* Without this, layout can be freed as soon
- * as we release cl_lock.
- */
- get_layout_hdr(lo);
- break;
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ list_for_each_entry(lo, &server->layouts, plh_layouts) {
+ if (nfs_compare_fh(&args->cbl_fh,
+ &NFS_I(lo->plh_inode)->fh))
+ continue;
+ ino = igrab(lo->plh_inode);
+ if (!ino)
+ continue;
+ found = true;
+ /* Without this, layout can be freed as soon
+ * as we release cl_lock.
+ */
+ get_layout_hdr(lo);
+ break;
+ }
+ if (found)
+ break;
}
+ rcu_read_unlock();
spin_unlock(&clp->cl_lock);
+
if (!found)
return NFS4ERR_NOMATCHING_LAYOUT;
@@ -154,6 +162,7 @@
static u32 initiate_bulk_draining(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
+ struct nfs_server *server;
struct pnfs_layout_hdr *lo;
struct inode *ino;
u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
@@ -167,18 +176,24 @@
};
spin_lock(&clp->cl_lock);
- list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
if ((args->cbl_recall_type == RETURN_FSID) &&
- memcmp(&NFS_SERVER(lo->plh_inode)->fsid,
- &args->cbl_fsid, sizeof(struct nfs_fsid)))
+ memcmp(&server->fsid, &args->cbl_fsid,
+ sizeof(struct nfs_fsid)))
continue;
- if (!igrab(lo->plh_inode))
- continue;
- get_layout_hdr(lo);
- BUG_ON(!list_empty(&lo->plh_bulk_recall));
- list_add(&lo->plh_bulk_recall, &recall_list);
+
+ list_for_each_entry(lo, &server->layouts, plh_layouts) {
+ if (!igrab(lo->plh_inode))
+ continue;
+ get_layout_hdr(lo);
+ BUG_ON(!list_empty(&lo->plh_bulk_recall));
+ list_add(&lo->plh_bulk_recall, &recall_list);
+ }
}
+ rcu_read_unlock();
spin_unlock(&clp->cl_lock);
+
list_for_each_entry_safe(lo, tmp,
&recall_list, plh_bulk_recall) {
ino = lo->plh_inode;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index b3dc2b8..19ea7d9 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -188,9 +188,6 @@
cred = rpc_lookup_machine_cred();
if (!IS_ERR(cred))
clp->cl_machine_cred = cred;
-#if defined(CONFIG_NFS_V4_1)
- INIT_LIST_HEAD(&clp->cl_layouts);
-#endif
nfs_fscache_get_client_cookie(clp);
return clp;
@@ -293,6 +290,7 @@
nfs4_deviceid_purge_client(clp);
kfree(clp->cl_hostname);
+ kfree(clp->server_scope);
kfree(clp);
dprintk("<-- nfs_free_client()\n");
@@ -1062,6 +1060,7 @@
INIT_LIST_HEAD(&server->client_link);
INIT_LIST_HEAD(&server->master_link);
INIT_LIST_HEAD(&server->delegations);
+ INIT_LIST_HEAD(&server->layouts);
atomic_set(&server->active, 0);
@@ -1464,7 +1463,7 @@
dprintk("<-- %s %p\n", __func__, clp);
return clp;
}
-EXPORT_SYMBOL(nfs4_set_ds_client);
+EXPORT_SYMBOL_GPL(nfs4_set_ds_client);
/*
* Session has been established, and the client marked ready.
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index dd25c2a..321a66b 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -398,12 +398,11 @@
return err;
}
-static void nfs_mark_return_delegation(struct nfs_delegation *delegation)
+static void nfs_mark_return_delegation(struct nfs_server *server,
+ struct nfs_delegation *delegation)
{
- struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client;
-
set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
- set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
+ set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
}
/**
@@ -441,7 +440,7 @@
if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
continue;
if (delegation->type & flags)
- nfs_mark_return_delegation(delegation);
+ nfs_mark_return_delegation(server, delegation);
}
}
@@ -508,7 +507,7 @@
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
continue;
- nfs_mark_return_delegation(delegation);
+ nfs_mark_return_delegation(server, delegation);
}
}
@@ -539,7 +538,8 @@
int nfs_async_inode_return_delegation(struct inode *inode,
const nfs4_stateid *stateid)
{
- struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_client *clp = server->nfs_client;
struct nfs_delegation *delegation;
rcu_read_lock();
@@ -549,7 +549,7 @@
rcu_read_unlock();
return -ENOENT;
}
- nfs_mark_return_delegation(delegation);
+ nfs_mark_return_delegation(server, delegation);
rcu_read_unlock();
nfs_delegation_run_state_manager(clp);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 2a55347..ab12913 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -277,6 +277,9 @@
extern char *nfs_path(char **p, struct dentry *dentry,
char *buffer, ssize_t buflen);
extern struct vfsmount *nfs_d_automount(struct path *path);
+#ifdef CONFIG_NFS_V4
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
+#endif
/* getroot.c */
extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
@@ -288,12 +291,22 @@
extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
#endif
+struct nfs_pageio_descriptor;
/* read.c */
extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
const struct rpc_call_ops *call_ops);
extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
+extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
+ struct list_head *head);
+
+extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
+extern void nfs_readdata_release(struct nfs_read_data *rdata);
/* write.c */
+extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
+ struct list_head *head);
+extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
+extern void nfs_writedata_release(struct nfs_write_data *wdata);
extern void nfs_commit_free(struct nfs_write_data *p);
extern int nfs_initiate_write(struct nfs_write_data *data,
struct rpc_clnt *clnt,
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 1f063ba..8102391 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -119,7 +119,7 @@
}
#ifdef CONFIG_NFS_V4
-static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
{
struct gss_api_mech *mech;
struct xdr_netobj oid;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index b788f2e..1909ee8 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -48,6 +48,7 @@
NFS4CLNT_SESSION_RESET,
NFS4CLNT_RECALL_SLOT,
NFS4CLNT_LEASE_CONFIRM,
+ NFS4CLNT_SERVER_SCOPE_MISMATCH,
};
enum nfs4_session_state {
@@ -66,6 +67,8 @@
int cache_reply);
int (*validate_stateid)(struct nfs_delegation *,
const nfs4_stateid *);
+ int (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
+ struct nfs_fsinfo *);
const struct nfs4_state_recovery_ops *reboot_recovery_ops;
const struct nfs4_state_recovery_ops *nograce_recovery_ops;
const struct nfs4_state_maintenance_ops *state_renewal_ops;
@@ -349,6 +352,8 @@
extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
extern void nfs41_handle_recall_slot(struct nfs_client *clp);
+extern void nfs41_handle_server_scope(struct nfs_client *,
+ struct server_scope **);
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index f9d03ab..be93a62 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -334,6 +334,9 @@
__func__, data->inode->i_ino,
data->args.pgbase, (size_t)data->args.count, offset);
+ if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
+ return PNFS_NOT_ATTEMPTED;
+
/* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index(lseg, offset);
idx = nfs4_fl_calc_ds_index(lseg, j);
@@ -344,8 +347,7 @@
set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
return PNFS_NOT_ATTEMPTED;
}
- dprintk("%s USE DS:ip %x %hu\n", __func__,
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+ dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr);
/* No multipath support. Use first DS */
data->ds_clp = ds->ds_clp;
@@ -374,6 +376,9 @@
struct nfs_fh *fh;
int status;
+ if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
+ return PNFS_NOT_ATTEMPTED;
+
/* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index(lseg, offset);
idx = nfs4_fl_calc_ds_index(lseg, j);
@@ -384,9 +389,9 @@
set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
return PNFS_NOT_ATTEMPTED;
}
- dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__,
+ dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__,
data->inode->i_ino, sync, (size_t) data->args.count, offset,
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+ ds->ds_remotestr);
data->write_done_cb = filelayout_write_done_cb;
data->ds_clp = ds->ds_clp;
@@ -428,6 +433,14 @@
dprintk("--> %s\n", __func__);
+ /* FIXME: remove this check when layout segment support is added */
+ if (lgr->range.offset != 0 ||
+ lgr->range.length != NFS4_MAX_UINT64) {
+ dprintk("%s Only whole file layouts supported. Use MDS i/o\n",
+ __func__);
+ goto out;
+ }
+
if (fl->pattern_offset > lgr->range.offset) {
dprintk("%s pattern_offset %lld too large\n",
__func__, fl->pattern_offset);
@@ -449,6 +462,10 @@
goto out;
} else
dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
+ /* Found deviceid is being reaped */
+ if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags))
+ goto out_put;
+
fl->dsaddr = dsaddr;
if (fl->first_stripe_index < 0 ||
@@ -659,7 +676,7 @@
* return true : coalesce page
* return false : don't coalesce page
*/
-bool
+static bool
filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
@@ -670,8 +687,6 @@
!nfs_generic_pg_test(pgio, prev, req))
return false;
- if (!pgio->pg_lseg)
- return 1;
p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
@@ -682,6 +697,52 @@
return (p_stripe == r_stripe);
}
+void
+filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
+ struct nfs_page *req)
+{
+ BUG_ON(pgio->pg_lseg != NULL);
+
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ req->wb_context,
+ 0,
+ NFS4_MAX_UINT64,
+ IOMODE_READ,
+ GFP_KERNEL);
+ /* If no lseg, fall back to read through mds */
+ if (pgio->pg_lseg == NULL)
+ nfs_pageio_reset_read_mds(pgio);
+}
+
+void
+filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
+ struct nfs_page *req)
+{
+ BUG_ON(pgio->pg_lseg != NULL);
+
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ req->wb_context,
+ 0,
+ NFS4_MAX_UINT64,
+ IOMODE_RW,
+ GFP_NOFS);
+ /* If no lseg, fall back to write through mds */
+ if (pgio->pg_lseg == NULL)
+ nfs_pageio_reset_write_mds(pgio);
+}
+
+static const struct nfs_pageio_ops filelayout_pg_read_ops = {
+ .pg_init = filelayout_pg_init_read,
+ .pg_test = filelayout_pg_test,
+ .pg_doio = pnfs_generic_pg_readpages,
+};
+
+static const struct nfs_pageio_ops filelayout_pg_write_ops = {
+ .pg_init = filelayout_pg_init_write,
+ .pg_test = filelayout_pg_test,
+ .pg_doio = pnfs_generic_pg_writepages,
+};
+
static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
{
return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
@@ -879,7 +940,8 @@
.owner = THIS_MODULE,
.alloc_lseg = filelayout_alloc_lseg,
.free_lseg = filelayout_free_lseg,
- .pg_test = filelayout_pg_test,
+ .pg_read_ops = &filelayout_pg_read_ops,
+ .pg_write_ops = &filelayout_pg_write_ops,
.mark_pnfs_commit = filelayout_mark_pnfs_commit,
.choose_commit_list = filelayout_choose_commit_list,
.commit_pagelist = filelayout_commit_pagelist,
@@ -902,5 +964,7 @@
pnfs_unregister_layoutdriver(&filelayout_type);
}
+MODULE_ALIAS("nfs-layouttype4-1");
+
module_init(nfs4filelayout_init);
module_exit(nfs4filelayout_exit);
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index cebe01e..2e42284 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -47,10 +47,17 @@
};
/* Individual ip address */
+struct nfs4_pnfs_ds_addr {
+ struct sockaddr_storage da_addr;
+ size_t da_addrlen;
+ struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */
+ char *da_remotestr; /* human readable addr+port */
+};
+
struct nfs4_pnfs_ds {
struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
- u32 ds_ip_addr;
- u32 ds_port;
+ char *ds_remotestr; /* comma sep list of addrs */
+ struct list_head ds_addrs;
struct nfs_client *ds_clp;
atomic_t ds_count;
};
@@ -89,6 +96,12 @@
generic_hdr);
}
+static inline struct nfs4_deviceid_node *
+FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg)
+{
+ return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node;
+}
+
extern struct nfs_fh *
nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 3b7bf13..ed388aa 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -56,54 +56,139 @@
printk("%s NULL device\n", __func__);
return;
}
- printk(" ip_addr %x port %hu\n"
+ printk(" ds %s\n"
" ref count %d\n"
" client %p\n"
" cl_exchange_flags %x\n",
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
+ ds->ds_remotestr,
atomic_read(&ds->ds_count), ds->ds_clp,
ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
}
-/* nfs4_ds_cache_lock is held */
+static bool
+same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
+{
+ struct sockaddr_in *a, *b;
+ struct sockaddr_in6 *a6, *b6;
+
+ if (addr1->sa_family != addr2->sa_family)
+ return false;
+
+ switch (addr1->sa_family) {
+ case AF_INET:
+ a = (struct sockaddr_in *)addr1;
+ b = (struct sockaddr_in *)addr2;
+
+ if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
+ a->sin_port == b->sin_port)
+ return true;
+ break;
+
+ case AF_INET6:
+ a6 = (struct sockaddr_in6 *)addr1;
+ b6 = (struct sockaddr_in6 *)addr2;
+
+ /* LINKLOCAL addresses must have matching scope_id */
+ if (ipv6_addr_scope(&a6->sin6_addr) ==
+ IPV6_ADDR_SCOPE_LINKLOCAL &&
+ a6->sin6_scope_id != b6->sin6_scope_id)
+ return false;
+
+ if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
+ a6->sin6_port == b6->sin6_port)
+ return true;
+ break;
+
+ default:
+ dprintk("%s: unhandled address family: %u\n",
+ __func__, addr1->sa_family);
+ return false;
+ }
+
+ return false;
+}
+
+/*
+ * Lookup DS by addresses. The first matching address returns true.
+ * nfs4_ds_cache_lock is held
+ */
static struct nfs4_pnfs_ds *
-_data_server_lookup_locked(u32 ip_addr, u32 port)
+_data_server_lookup_locked(struct list_head *dsaddrs)
{
struct nfs4_pnfs_ds *ds;
+ struct nfs4_pnfs_ds_addr *da1, *da2;
- dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
- ntohl(ip_addr), ntohs(port));
-
- list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
- if (ds->ds_ip_addr == ip_addr &&
- ds->ds_port == port) {
- return ds;
+ list_for_each_entry(da1, dsaddrs, da_node) {
+ list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
+ list_for_each_entry(da2, &ds->ds_addrs, da_node) {
+ if (same_sockaddr(
+ (struct sockaddr *)&da1->da_addr,
+ (struct sockaddr *)&da2->da_addr))
+ return ds;
+ }
}
}
return NULL;
}
/*
+ * Compare two lists of addresses.
+ */
+static bool
+_data_server_match_all_addrs_locked(struct list_head *dsaddrs1,
+ struct list_head *dsaddrs2)
+{
+ struct nfs4_pnfs_ds_addr *da1, *da2;
+ size_t count1 = 0,
+ count2 = 0;
+
+ list_for_each_entry(da1, dsaddrs1, da_node)
+ count1++;
+
+ list_for_each_entry(da2, dsaddrs2, da_node) {
+ bool found = false;
+ count2++;
+ list_for_each_entry(da1, dsaddrs1, da_node) {
+ if (same_sockaddr((struct sockaddr *)&da1->da_addr,
+ (struct sockaddr *)&da2->da_addr)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ return false;
+ }
+
+ return (count1 == count2);
+}
+
+/*
* Create an rpc connection to the nfs4_pnfs_ds data server
- * Currently only support IPv4
+ * Currently only supports IPv4 and IPv6 addresses
*/
static int
nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
{
- struct nfs_client *clp;
- struct sockaddr_in sin;
+ struct nfs_client *clp = ERR_PTR(-EIO);
+ struct nfs4_pnfs_ds_addr *da;
int status = 0;
- dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__,
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
+ dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = ds->ds_ip_addr;
- sin.sin_port = ds->ds_port;
+ BUG_ON(list_empty(&ds->ds_addrs));
- clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
- sizeof(sin), IPPROTO_TCP);
+ list_for_each_entry(da, &ds->ds_addrs, da_node) {
+ dprintk("%s: DS %s: trying address %s\n",
+ __func__, ds->ds_remotestr, da->da_remotestr);
+
+ clp = nfs4_set_ds_client(mds_srv->nfs_client,
+ (struct sockaddr *)&da->da_addr,
+ da->da_addrlen, IPPROTO_TCP);
+ if (!IS_ERR(clp))
+ break;
+ }
+
if (IS_ERR(clp)) {
status = PTR_ERR(clp);
goto out;
@@ -115,8 +200,8 @@
goto out_put;
}
ds->ds_clp = clp;
- dprintk("%s [existing] ip=%x, port=%hu\n", __func__,
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+ dprintk("%s [existing] server=%s\n", __func__,
+ ds->ds_remotestr);
goto out;
}
@@ -135,8 +220,7 @@
goto out_put;
ds->ds_clp = clp;
- dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr),
- ntohs(ds->ds_port));
+ dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
out:
return status;
out_put:
@@ -147,12 +231,25 @@
static void
destroy_ds(struct nfs4_pnfs_ds *ds)
{
+ struct nfs4_pnfs_ds_addr *da;
+
dprintk("--> %s\n", __func__);
ifdebug(FACILITY)
print_ds(ds);
if (ds->ds_clp)
nfs_put_client(ds->ds_clp);
+
+ while (!list_empty(&ds->ds_addrs)) {
+ da = list_first_entry(&ds->ds_addrs,
+ struct nfs4_pnfs_ds_addr,
+ da_node);
+ list_del_init(&da->da_node);
+ kfree(da->da_remotestr);
+ kfree(da);
+ }
+
+ kfree(ds->ds_remotestr);
kfree(ds);
}
@@ -179,31 +276,96 @@
kfree(dsaddr);
}
-static struct nfs4_pnfs_ds *
-nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags)
+/*
+ * Create a string with a human readable address and port to avoid
+ * complicated setup around many dprinks.
+ */
+static char *
+nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
{
- struct nfs4_pnfs_ds *tmp_ds, *ds;
+ struct nfs4_pnfs_ds_addr *da;
+ char *remotestr;
+ size_t len;
+ char *p;
- ds = kzalloc(sizeof(*tmp_ds), gfp_flags);
+ len = 3; /* '{', '}' and eol */
+ list_for_each_entry(da, dsaddrs, da_node) {
+ len += strlen(da->da_remotestr) + 1; /* string plus comma */
+ }
+
+ remotestr = kzalloc(len, gfp_flags);
+ if (!remotestr)
+ return NULL;
+
+ p = remotestr;
+ *(p++) = '{';
+ len--;
+ list_for_each_entry(da, dsaddrs, da_node) {
+ size_t ll = strlen(da->da_remotestr);
+
+ if (ll > len)
+ goto out_err;
+
+ memcpy(p, da->da_remotestr, ll);
+ p += ll;
+ len -= ll;
+
+ if (len < 1)
+ goto out_err;
+ (*p++) = ',';
+ len--;
+ }
+ if (len < 2)
+ goto out_err;
+ *(p++) = '}';
+ *p = '\0';
+ return remotestr;
+out_err:
+ kfree(remotestr);
+ return NULL;
+}
+
+static struct nfs4_pnfs_ds *
+nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
+{
+ struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
+ char *remotestr;
+
+ if (list_empty(dsaddrs)) {
+ dprintk("%s: no addresses defined\n", __func__);
+ goto out;
+ }
+
+ ds = kzalloc(sizeof(*ds), gfp_flags);
if (!ds)
goto out;
+ /* this is only used for debugging, so it's ok if its NULL */
+ remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
+
spin_lock(&nfs4_ds_cache_lock);
- tmp_ds = _data_server_lookup_locked(ip_addr, port);
+ tmp_ds = _data_server_lookup_locked(dsaddrs);
if (tmp_ds == NULL) {
- ds->ds_ip_addr = ip_addr;
- ds->ds_port = port;
+ INIT_LIST_HEAD(&ds->ds_addrs);
+ list_splice_init(dsaddrs, &ds->ds_addrs);
+ ds->ds_remotestr = remotestr;
atomic_set(&ds->ds_count, 1);
INIT_LIST_HEAD(&ds->ds_node);
ds->ds_clp = NULL;
list_add(&ds->ds_node, &nfs4_data_server_cache);
- dprintk("%s add new data server ip 0x%x\n", __func__,
- ds->ds_ip_addr);
+ dprintk("%s add new data server %s\n", __func__,
+ ds->ds_remotestr);
} else {
+ if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs,
+ dsaddrs)) {
+ dprintk("%s: multipath address mismatch: %s != %s",
+ __func__, tmp_ds->ds_remotestr, remotestr);
+ }
+ kfree(remotestr);
kfree(ds);
atomic_inc(&tmp_ds->ds_count);
- dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
- __func__, tmp_ds->ds_ip_addr,
+ dprintk("%s data server %s found, inc'ed ds_count to %d\n",
+ __func__, tmp_ds->ds_remotestr,
atomic_read(&tmp_ds->ds_count));
ds = tmp_ds;
}
@@ -213,18 +375,22 @@
}
/*
- * Currently only support ipv4, and one multi-path address.
+ * Currently only supports ipv4, ipv6 and one multi-path address.
*/
-static struct nfs4_pnfs_ds *
-decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags)
+static struct nfs4_pnfs_ds_addr *
+decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags)
{
- struct nfs4_pnfs_ds *ds = NULL;
- char *buf;
- const char *ipend, *pstr;
- u32 ip_addr, port;
- int nlen, rlen, i;
+ struct nfs4_pnfs_ds_addr *da = NULL;
+ char *buf, *portstr;
+ u32 port;
+ int nlen, rlen;
int tmp[2];
__be32 *p;
+ char *netid, *match_netid;
+ size_t len, match_netid_len;
+ char *startsep = "";
+ char *endsep = "";
+
/* r_netid */
p = xdr_inline_decode(streamp, 4);
@@ -236,64 +402,123 @@
if (unlikely(!p))
goto out_err;
- /* Check that netid is "tcp" */
- if (nlen != 3 || memcmp((char *)p, "tcp", 3)) {
- dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
+ netid = kmalloc(nlen+1, gfp_flags);
+ if (unlikely(!netid))
goto out_err;
- }
- /* r_addr */
+ netid[nlen] = '\0';
+ memcpy(netid, p, nlen);
+
+ /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
p = xdr_inline_decode(streamp, 4);
if (unlikely(!p))
- goto out_err;
+ goto out_free_netid;
rlen = be32_to_cpup(p);
p = xdr_inline_decode(streamp, rlen);
if (unlikely(!p))
- goto out_err;
+ goto out_free_netid;
- /* ipv6 length plus port is legal */
- if (rlen > INET6_ADDRSTRLEN + 8) {
+ /* port is ".ABC.DEF", 8 chars max */
+ if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
dprintk("%s: Invalid address, length %d\n", __func__,
rlen);
- goto out_err;
+ goto out_free_netid;
}
buf = kmalloc(rlen + 1, gfp_flags);
if (!buf) {
dprintk("%s: Not enough memory\n", __func__);
- goto out_err;
+ goto out_free_netid;
}
buf[rlen] = '\0';
memcpy(buf, p, rlen);
- /* replace the port dots with dashes for the in4_pton() delimiter*/
- for (i = 0; i < 2; i++) {
- char *res = strrchr(buf, '.');
- if (!res) {
- dprintk("%s: Failed finding expected dots in port\n",
- __func__);
- goto out_free;
- }
- *res = '-';
+ /* replace port '.' with '-' */
+ portstr = strrchr(buf, '.');
+ if (!portstr) {
+ dprintk("%s: Failed finding expected dot in port\n",
+ __func__);
+ goto out_free_buf;
+ }
+ *portstr = '-';
+
+ /* find '.' between address and port */
+ portstr = strrchr(buf, '.');
+ if (!portstr) {
+ dprintk("%s: Failed finding expected dot between address and "
+ "port\n", __func__);
+ goto out_free_buf;
+ }
+ *portstr = '\0';
+
+ da = kzalloc(sizeof(*da), gfp_flags);
+ if (unlikely(!da))
+ goto out_free_buf;
+
+ INIT_LIST_HEAD(&da->da_node);
+
+ if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr,
+ sizeof(da->da_addr))) {
+ dprintk("%s: error parsing address %s\n", __func__, buf);
+ goto out_free_da;
}
- /* Currently only support ipv4 address */
- if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) {
- dprintk("%s: Only ipv4 addresses supported\n", __func__);
- goto out_free;
- }
-
- /* port */
- pstr = ipend;
- sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
+ portstr++;
+ sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
port = htons((tmp[0] << 8) | (tmp[1]));
- ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags);
- dprintk("%s: Decoded address and port %s\n", __func__, buf);
-out_free:
+ switch (da->da_addr.ss_family) {
+ case AF_INET:
+ ((struct sockaddr_in *)&da->da_addr)->sin_port = port;
+ da->da_addrlen = sizeof(struct sockaddr_in);
+ match_netid = "tcp";
+ match_netid_len = 3;
+ break;
+
+ case AF_INET6:
+ ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
+ da->da_addrlen = sizeof(struct sockaddr_in6);
+ match_netid = "tcp6";
+ match_netid_len = 4;
+ startsep = "[";
+ endsep = "]";
+ break;
+
+ default:
+ dprintk("%s: unsupported address family: %u\n",
+ __func__, da->da_addr.ss_family);
+ goto out_free_da;
+ }
+
+ if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
+ dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
+ __func__, netid, match_netid);
+ goto out_free_da;
+ }
+
+ /* save human readable address */
+ len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
+ da->da_remotestr = kzalloc(len, gfp_flags);
+
+ /* NULL is ok, only used for dprintk */
+ if (da->da_remotestr)
+ snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
+ buf, endsep, ntohs(port));
+
+ dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
kfree(buf);
+ kfree(netid);
+ return da;
+
+out_free_da:
+ kfree(da);
+out_free_buf:
+ dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
+ kfree(buf);
+out_free_netid:
+ kfree(netid);
out_err:
- return ds;
+ return NULL;
}
/* Decode opaque device data and return the result */
@@ -310,6 +535,8 @@
struct xdr_stream stream;
struct xdr_buf buf;
struct page *scratch;
+ struct list_head dsaddrs;
+ struct nfs4_pnfs_ds_addr *da;
/* set up xdr stream */
scratch = alloc_page(gfp_flags);
@@ -386,6 +613,8 @@
NFS_SERVER(ino)->nfs_client,
&pdev->dev_id);
+ INIT_LIST_HEAD(&dsaddrs);
+
for (i = 0; i < dsaddr->ds_num; i++) {
int j;
u32 mp_count;
@@ -395,48 +624,43 @@
goto out_err_free_deviceid;
mp_count = be32_to_cpup(p); /* multipath count */
- if (mp_count > 1) {
- printk(KERN_WARNING
- "%s: Multipath count %d not supported, "
- "skipping all greater than 1\n", __func__,
- mp_count);
- }
for (j = 0; j < mp_count; j++) {
- if (j == 0) {
- dsaddr->ds_list[i] = decode_and_add_ds(&stream,
- ino, gfp_flags);
- if (dsaddr->ds_list[i] == NULL)
- goto out_err_free_deviceid;
- } else {
- u32 len;
- /* skip extra multipath */
+ da = decode_ds_addr(&stream, gfp_flags);
+ if (da)
+ list_add_tail(&da->da_node, &dsaddrs);
+ }
+ if (list_empty(&dsaddrs)) {
+ dprintk("%s: no suitable DS addresses found\n",
+ __func__);
+ goto out_err_free_deviceid;
+ }
- /* read len, skip */
- p = xdr_inline_decode(&stream, 4);
- if (unlikely(!p))
- goto out_err_free_deviceid;
- len = be32_to_cpup(p);
+ dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
+ if (!dsaddr->ds_list[i])
+ goto out_err_drain_dsaddrs;
- p = xdr_inline_decode(&stream, len);
- if (unlikely(!p))
- goto out_err_free_deviceid;
-
- /* read len, skip */
- p = xdr_inline_decode(&stream, 4);
- if (unlikely(!p))
- goto out_err_free_deviceid;
- len = be32_to_cpup(p);
-
- p = xdr_inline_decode(&stream, len);
- if (unlikely(!p))
- goto out_err_free_deviceid;
- }
+ /* If DS was already in cache, free ds addrs */
+ while (!list_empty(&dsaddrs)) {
+ da = list_first_entry(&dsaddrs,
+ struct nfs4_pnfs_ds_addr,
+ da_node);
+ list_del_init(&da->da_node);
+ kfree(da->da_remotestr);
+ kfree(da);
}
}
__free_page(scratch);
return dsaddr;
+out_err_drain_dsaddrs:
+ while (!list_empty(&dsaddrs)) {
+ da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
+ da_node);
+ list_del_init(&da->da_node);
+ kfree(da->da_remotestr);
+ kfree(da);
+ }
out_err_free_deviceid:
nfs4_fl_free_deviceid(dsaddr);
/* stripe_indicies was part of dsaddr */
@@ -591,13 +815,13 @@
static void
filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
- int err, u32 ds_addr)
+ int err, const char *ds_remotestr)
{
u32 *p = (u32 *)&dsaddr->id_node.deviceid;
- printk(KERN_ERR "NFS: data server %x connection error %d."
+ printk(KERN_ERR "NFS: data server %s connection error %d."
" Deviceid [%x%x%x%x] marked out of use.\n",
- ds_addr, err, p[0], p[1], p[2], p[3]);
+ ds_remotestr, err, p[0], p[1], p[2], p[3]);
spin_lock(&nfs4_ds_cache_lock);
dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
@@ -628,7 +852,7 @@
err = nfs4_ds_connect(s, ds);
if (err) {
filelayout_mark_devid_negative(dsaddr, err,
- ntohl(ds->ds_ip_addr));
+ ds->ds_remotestr);
return NULL;
}
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 26bece8..079614d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -80,7 +80,10 @@
static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
struct nfs_fattr *fattr, struct iattr *sattr,
struct nfs4_state *state);
-
+#ifdef CONFIG_NFS_V4_1
+static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *);
+static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *);
+#endif
/* Prevent leaks of NFSv4 errors into userland */
static int nfs4_map_errors(int err)
{
@@ -1689,6 +1692,20 @@
return ret;
}
+#if defined(CONFIG_NFS_V4_1)
+static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+ int status;
+ struct nfs_server *server = NFS_SERVER(state->inode);
+
+ status = nfs41_test_stateid(server, state);
+ if (status == NFS_OK)
+ return 0;
+ nfs41_free_stateid(server, state);
+ return nfs4_open_expired(sp, state);
+}
+#endif
+
/*
* on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-*
* fields corresponding to attributes that were used to store the verifier.
@@ -2252,13 +2269,14 @@
static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
+ int minor_version = server->nfs_client->cl_minorversion;
int status = nfs4_lookup_root(server, fhandle, info);
if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR))
/*
* A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM
* by nfs4_map_errors() as this function exits.
*/
- status = nfs4_find_root_sec(server, fhandle, info);
+ status = nfs_v4_minor_ops[minor_version]->find_root_sec(server, fhandle, info);
if (status == 0)
status = nfs4_server_capabilities(server, fhandle);
if (status == 0)
@@ -4441,6 +4459,20 @@
return err;
}
+#if defined(CONFIG_NFS_V4_1)
+static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request)
+{
+ int status;
+ struct nfs_server *server = NFS_SERVER(state->inode);
+
+ status = nfs41_test_stateid(server, state);
+ if (status == NFS_OK)
+ return 0;
+ nfs41_free_stateid(server, state);
+ return nfs4_lock_expired(state, request);
+}
+#endif
+
static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
{
struct nfs_inode *nfsi = NFS_I(state->inode);
@@ -4779,6 +4811,16 @@
return -NFS4ERR_INVAL;
}
+static bool
+nfs41_same_server_scope(struct server_scope *a, struct server_scope *b)
+{
+ if (a->server_scope_sz == b->server_scope_sz &&
+ memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0)
+ return true;
+
+ return false;
+}
+
/*
* nfs4_proc_exchange_id()
*
@@ -4821,9 +4863,31 @@
init_utsname()->domainname,
clp->cl_rpcclient->cl_auth->au_flavor);
+ res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL);
+ if (unlikely(!res.server_scope))
+ return -ENOMEM;
+
status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
if (!status)
status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
+
+ if (!status) {
+ if (clp->server_scope &&
+ !nfs41_same_server_scope(clp->server_scope,
+ res.server_scope)) {
+ dprintk("%s: server_scope mismatch detected\n",
+ __func__);
+ set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state);
+ kfree(clp->server_scope);
+ clp->server_scope = NULL;
+ }
+
+ if (!clp->server_scope)
+ clp->server_scope = res.server_scope;
+ else
+ kfree(res.server_scope);
+ }
+
dprintk("<-- %s status= %d\n", __func__, status);
return status;
}
@@ -5704,7 +5768,7 @@
{
struct nfs4_layoutreturn *lrp = calldata;
struct nfs_server *server;
- struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
+ struct pnfs_layout_hdr *lo = lrp->args.layout;
dprintk("--> %s\n", __func__);
@@ -5733,7 +5797,7 @@
struct nfs4_layoutreturn *lrp = calldata;
dprintk("--> %s\n", __func__);
- put_layout_hdr(NFS_I(lrp->args.inode)->layout);
+ put_layout_hdr(lrp->args.layout);
kfree(calldata);
dprintk("<-- %s\n", __func__);
}
@@ -5901,6 +5965,143 @@
rpc_put_task(task);
return status;
}
+
+static int
+_nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
+{
+ struct nfs41_secinfo_no_name_args args = {
+ .style = SECINFO_STYLE_CURRENT_FH,
+ };
+ struct nfs4_secinfo_res res = {
+ .flavors = flavors,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO_NO_NAME],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ };
+ return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
+}
+
+static int
+nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = _nfs41_proc_secinfo_no_name(server, fhandle, info, flavors);
+ switch (err) {
+ case 0:
+ case -NFS4ERR_WRONGSEC:
+ case -NFS4ERR_NOTSUPP:
+ break;
+ default:
+ err = nfs4_handle_exception(server, err, &exception);
+ }
+ } while (exception.retry);
+ return err;
+}
+
+static int
+nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fsinfo *info)
+{
+ int err;
+ struct page *page;
+ rpc_authflavor_t flavor;
+ struct nfs4_secinfo_flavors *flavors;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ flavors = page_address(page);
+ err = nfs41_proc_secinfo_no_name(server, fhandle, info, flavors);
+
+ /*
+ * Fall back on "guess and check" method if
+ * the server doesn't support SECINFO_NO_NAME
+ */
+ if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) {
+ err = nfs4_find_root_sec(server, fhandle, info);
+ goto out_freepage;
+ }
+ if (err)
+ goto out_freepage;
+
+ flavor = nfs_find_best_sec(flavors);
+ if (err == 0)
+ err = nfs4_lookup_root_sec(server, fhandle, info, flavor);
+
+out_freepage:
+ put_page(page);
+ if (err == -EACCES)
+ return -EPERM;
+out:
+ return err;
+}
+static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state)
+{
+ int status;
+ struct nfs41_test_stateid_args args = {
+ .stateid = &state->stateid,
+ };
+ struct nfs41_test_stateid_res res;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_TEST_STATEID],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ };
+ args.seq_args.sa_session = res.seq_res.sr_session = NULL;
+ status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1);
+ return status;
+}
+
+static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(server,
+ _nfs41_test_stateid(server, state),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state)
+{
+ int status;
+ struct nfs41_free_stateid_args args = {
+ .stateid = &state->stateid,
+ };
+ struct nfs41_free_stateid_res res;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FREE_STATEID],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ };
+
+ args.seq_args.sa_session = res.seq_res.sr_session = NULL;
+ status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1);
+ return status;
+}
+
+static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(server,
+ _nfs4_free_stateid(server, state),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
#endif /* CONFIG_NFS_V4_1 */
struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
@@ -5937,8 +6138,8 @@
struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
.owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
.state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
- .recover_open = nfs4_open_expired,
- .recover_lock = nfs4_lock_expired,
+ .recover_open = nfs41_open_expired,
+ .recover_lock = nfs41_lock_expired,
.establish_clid = nfs41_init_clientid,
.get_clid_cred = nfs4_get_exchange_id_cred,
};
@@ -5962,6 +6163,7 @@
.minor_version = 0,
.call_sync = _nfs4_call_sync,
.validate_stateid = nfs4_validate_delegation_stateid,
+ .find_root_sec = nfs4_find_root_sec,
.reboot_recovery_ops = &nfs40_reboot_recovery_ops,
.nograce_recovery_ops = &nfs40_nograce_recovery_ops,
.state_renewal_ops = &nfs40_state_renewal_ops,
@@ -5972,6 +6174,7 @@
.minor_version = 1,
.call_sync = _nfs4_call_sync_session,
.validate_stateid = nfs41_validate_delegation_stateid,
+ .find_root_sec = nfs41_find_root_sec,
.reboot_recovery_ops = &nfs41_reboot_recovery_ops,
.nograce_recovery_ops = &nfs41_nograce_recovery_ops,
.state_renewal_ops = &nfs41_state_renewal_ops,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 7acfe88..72ab97e 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1643,7 +1643,14 @@
goto out_error;
}
clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
- set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
+
+ if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH,
+ &clp->cl_state))
+ nfs4_state_start_reclaim_nograce(clp);
+ else
+ set_bit(NFS4CLNT_RECLAIM_REBOOT,
+ &clp->cl_state);
+
pnfs_destroy_all_layouts(clp);
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e6e8f3b..c191a9b 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -343,6 +343,14 @@
1 /* FIXME: opaque lrf_body always empty at the moment */)
#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \
1 + decode_stateid_maxsz)
+#define encode_secinfo_no_name_maxsz (op_encode_hdr_maxsz + 1)
+#define decode_secinfo_no_name_maxsz decode_secinfo_maxsz
+#define encode_test_stateid_maxsz (op_encode_hdr_maxsz + 2 + \
+ XDR_QUADLEN(NFS4_STATEID_SIZE))
+#define decode_test_stateid_maxsz (op_decode_hdr_maxsz + 2 + 1)
+#define encode_free_stateid_maxsz (op_encode_hdr_maxsz + 1 + \
+ XDR_QUADLEN(NFS4_STATEID_SIZE))
+#define decode_free_stateid_maxsz (op_decode_hdr_maxsz + 1)
#else /* CONFIG_NFS_V4_1 */
#define encode_sequence_maxsz 0
#define decode_sequence_maxsz 0
@@ -772,6 +780,26 @@
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_layoutreturn_maxsz)
+#define NFS4_enc_secinfo_no_name_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putrootfh_maxsz +\
+ encode_secinfo_no_name_maxsz)
+#define NFS4_dec_secinfo_no_name_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putrootfh_maxsz + \
+ decode_secinfo_no_name_maxsz)
+#define NFS4_enc_test_stateid_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_test_stateid_maxsz)
+#define NFS4_dec_test_stateid_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_test_stateid_maxsz)
+#define NFS4_enc_free_stateid_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_free_stateid_maxsz)
+#define NFS4_dec_free_stateid_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_free_stateid_maxsz)
const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
compound_encode_hdr_maxsz +
@@ -1938,6 +1966,46 @@
hdr->nops++;
hdr->replen += decode_layoutreturn_maxsz;
}
+
+static int
+encode_secinfo_no_name(struct xdr_stream *xdr,
+ const struct nfs41_secinfo_no_name_args *args,
+ struct compound_hdr *hdr)
+{
+ __be32 *p;
+ p = reserve_space(xdr, 8);
+ *p++ = cpu_to_be32(OP_SECINFO_NO_NAME);
+ *p++ = cpu_to_be32(args->style);
+ hdr->nops++;
+ hdr->replen += decode_secinfo_no_name_maxsz;
+ return 0;
+}
+
+static void encode_test_stateid(struct xdr_stream *xdr,
+ struct nfs41_test_stateid_args *args,
+ struct compound_hdr *hdr)
+{
+ __be32 *p;
+
+ p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE);
+ *p++ = cpu_to_be32(OP_TEST_STATEID);
+ *p++ = cpu_to_be32(1);
+ xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
+ hdr->nops++;
+ hdr->replen += decode_test_stateid_maxsz;
+}
+
+static void encode_free_stateid(struct xdr_stream *xdr,
+ struct nfs41_free_stateid_args *args,
+ struct compound_hdr *hdr)
+{
+ __be32 *p;
+ p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE);
+ *p++ = cpu_to_be32(OP_FREE_STATEID);
+ xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
+ hdr->nops++;
+ hdr->replen += decode_free_stateid_maxsz;
+}
#endif /* CONFIG_NFS_V4_1 */
/*
@@ -2790,6 +2858,59 @@
encode_layoutreturn(xdr, args, &hdr);
encode_nops(&hdr);
}
+
+/*
+ * Encode SECINFO_NO_NAME request
+ */
+static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs41_secinfo_no_name_args *args)
+{
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putrootfh(xdr, &hdr);
+ encode_secinfo_no_name(xdr, args, &hdr);
+ encode_nops(&hdr);
+ return 0;
+}
+
+/*
+ * Encode TEST_STATEID request
+ */
+static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs41_test_stateid_args *args)
+{
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_test_stateid(xdr, args, &hdr);
+ encode_nops(&hdr);
+}
+
+/*
+ * Encode FREE_STATEID request
+ */
+static void nfs4_xdr_enc_free_stateid(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs41_free_stateid_args *args)
+{
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_free_stateid(xdr, args, &hdr);
+ encode_nops(&hdr);
+}
#endif /* CONFIG_NFS_V4_1 */
static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -4977,11 +5098,17 @@
if (unlikely(status))
return status;
- /* Throw away server_scope */
+ /* Save server_scope */
status = decode_opaque_inline(xdr, &dummy, &dummy_str);
if (unlikely(status))
return status;
+ if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
+ return -EIO;
+
+ memcpy(res->server_scope->server_scope, dummy_str, dummy);
+ res->server_scope->server_scope_sz = dummy;
+
/* Throw away Implementation id array */
status = decode_opaque_inline(xdr, &dummy, &dummy_str);
if (unlikely(status))
@@ -5322,6 +5449,55 @@
print_overflow_msg(__func__, xdr);
return -EIO;
}
+
+static int decode_test_stateid(struct xdr_stream *xdr,
+ struct nfs41_test_stateid_res *res)
+{
+ __be32 *p;
+ int status;
+ int num_res;
+
+ status = decode_op_hdr(xdr, OP_TEST_STATEID);
+ if (status)
+ return status;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ num_res = be32_to_cpup(p++);
+ if (num_res != 1)
+ goto out;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ res->status = be32_to_cpup(p++);
+ return res->status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+out:
+ return -EIO;
+}
+
+static int decode_free_stateid(struct xdr_stream *xdr,
+ struct nfs41_free_stateid_res *res)
+{
+ __be32 *p;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_FREE_STATEID);
+ if (status)
+ return status;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ res->status = be32_to_cpup(p++);
+ return res->status;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
#endif /* CONFIG_NFS_V4_1 */
/*
@@ -6461,6 +6637,72 @@
out:
return status;
}
+
+/*
+ * Decode SECINFO_NO_NAME response
+ */
+static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs4_secinfo_res *res)
+{
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putrootfh(xdr);
+ if (status)
+ goto out;
+ status = decode_secinfo(xdr, res);
+out:
+ return status;
+}
+
+/*
+ * Decode TEST_STATEID response
+ */
+static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs41_test_stateid_res *res)
+{
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_test_stateid(xdr, res);
+out:
+ return status;
+}
+
+/*
+ * Decode FREE_STATEID response
+ */
+static int nfs4_xdr_dec_free_stateid(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs41_free_stateid_res *res)
+{
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_free_stateid(xdr, res);
+out:
+ return status;
+}
#endif /* CONFIG_NFS_V4_1 */
/**
@@ -6663,6 +6905,9 @@
PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit),
PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn),
+ PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name),
+ PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid),
+ PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid),
#endif /* CONFIG_NFS_V4_1 */
};
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 8ff2ea3..9383ca7 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -1000,13 +1000,22 @@
if (!pnfs_generic_pg_test(pgio, prev, req))
return false;
- if (pgio->pg_lseg == NULL)
- return true;
-
return pgio->pg_count + req->wb_bytes <=
OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
}
+static const struct nfs_pageio_ops objio_pg_read_ops = {
+ .pg_init = pnfs_generic_pg_init_read,
+ .pg_test = objio_pg_test,
+ .pg_doio = pnfs_generic_pg_readpages,
+};
+
+static const struct nfs_pageio_ops objio_pg_write_ops = {
+ .pg_init = pnfs_generic_pg_init_write,
+ .pg_test = objio_pg_test,
+ .pg_doio = pnfs_generic_pg_writepages,
+};
+
static struct pnfs_layoutdriver_type objlayout_type = {
.id = LAYOUT_OSD2_OBJECTS,
.name = "LAYOUT_OSD2_OBJECTS",
@@ -1020,7 +1029,8 @@
.read_pagelist = objlayout_read_pagelist,
.write_pagelist = objlayout_write_pagelist,
- .pg_test = objio_pg_test,
+ .pg_read_ops = &objio_pg_read_ops,
+ .pg_write_ops = &objio_pg_write_ops,
.free_deviceid_node = objio_free_deviceid_node,
@@ -1055,5 +1065,7 @@
__func__);
}
+MODULE_ALIAS("nfs-layouttype4-2");
+
module_init(objlayout_init);
module_exit(objlayout_exit);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 18449f4..b60970c 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -230,7 +230,7 @@
*/
void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
- int (*doio)(struct nfs_pageio_descriptor *),
+ const struct nfs_pageio_ops *pg_ops,
size_t bsize,
int io_flags)
{
@@ -240,13 +240,12 @@
desc->pg_bsize = bsize;
desc->pg_base = 0;
desc->pg_moreio = 0;
+ desc->pg_recoalesce = 0;
desc->pg_inode = inode;
- desc->pg_doio = doio;
+ desc->pg_ops = pg_ops;
desc->pg_ioflags = io_flags;
desc->pg_error = 0;
desc->pg_lseg = NULL;
- desc->pg_test = nfs_generic_pg_test;
- pnfs_pageio_init(desc, inode);
}
/**
@@ -276,7 +275,7 @@
return false;
if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
return false;
- return pgio->pg_test(pgio, prev, req);
+ return pgio->pg_ops->pg_test(pgio, prev, req);
}
/**
@@ -297,6 +296,8 @@
if (!nfs_can_coalesce_requests(prev, req, desc))
return 0;
} else {
+ if (desc->pg_ops->pg_init)
+ desc->pg_ops->pg_init(desc, req);
desc->pg_base = req->wb_pgbase;
}
nfs_list_remove_request(req);
@@ -311,7 +312,7 @@
static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
{
if (!list_empty(&desc->pg_list)) {
- int error = desc->pg_doio(desc);
+ int error = desc->pg_ops->pg_doio(desc);
if (error < 0)
desc->pg_error = error;
else
@@ -331,7 +332,7 @@
* Returns true if the request 'req' was successfully coalesced into the
* existing list of pages 'desc'.
*/
-int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
+static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
struct nfs_page *req)
{
while (!nfs_pageio_do_add_request(desc, req)) {
@@ -340,17 +341,67 @@
if (desc->pg_error < 0)
return 0;
desc->pg_moreio = 0;
+ if (desc->pg_recoalesce)
+ return 0;
}
return 1;
}
+static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
+{
+ LIST_HEAD(head);
+
+ do {
+ list_splice_init(&desc->pg_list, &head);
+ desc->pg_bytes_written -= desc->pg_count;
+ desc->pg_count = 0;
+ desc->pg_base = 0;
+ desc->pg_recoalesce = 0;
+
+ while (!list_empty(&head)) {
+ struct nfs_page *req;
+
+ req = list_first_entry(&head, struct nfs_page, wb_list);
+ nfs_list_remove_request(req);
+ if (__nfs_pageio_add_request(desc, req))
+ continue;
+ if (desc->pg_error < 0)
+ return 0;
+ break;
+ }
+ } while (desc->pg_recoalesce);
+ return 1;
+}
+
+int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
+ struct nfs_page *req)
+{
+ int ret;
+
+ do {
+ ret = __nfs_pageio_add_request(desc, req);
+ if (ret)
+ break;
+ if (desc->pg_error < 0)
+ break;
+ ret = nfs_do_recoalesce(desc);
+ } while (ret);
+ return ret;
+}
+
/**
* nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
* @desc: pointer to io descriptor
*/
void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
{
- nfs_pageio_doio(desc);
+ for (;;) {
+ nfs_pageio_doio(desc);
+ if (!desc->pg_recoalesce)
+ break;
+ if (!nfs_do_recoalesce(desc))
+ break;
+ }
}
/**
@@ -369,7 +420,7 @@
if (!list_empty(&desc->pg_list)) {
struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev);
if (index != prev->wb_index + 1)
- nfs_pageio_doio(desc);
+ nfs_pageio_complete(desc);
}
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 29c0ca7f..38e5508 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -28,6 +28,7 @@
*/
#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
#include "internal.h"
#include "pnfs.h"
#include "iostat.h"
@@ -448,11 +449,20 @@
void
pnfs_destroy_all_layouts(struct nfs_client *clp)
{
+ struct nfs_server *server;
struct pnfs_layout_hdr *lo;
LIST_HEAD(tmp_list);
+ nfs4_deviceid_mark_client_invalid(clp);
+ nfs4_deviceid_purge_client(clp);
+
spin_lock(&clp->cl_lock);
- list_splice_init(&clp->cl_layouts, &tmp_list);
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ if (!list_empty(&server->layouts))
+ list_splice_init(&server->layouts, &tmp_list);
+ }
+ rcu_read_unlock();
spin_unlock(&clp->cl_lock);
while (!list_empty(&tmp_list)) {
@@ -661,6 +671,7 @@
lrp->args.stateid = stateid;
lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
lrp->args.inode = ino;
+ lrp->args.layout = lo;
lrp->clp = NFS_SERVER(ino)->nfs_client;
status = nfs4_proc_layoutreturn(lrp);
@@ -920,7 +931,8 @@
};
unsigned pg_offset;
struct nfs_inode *nfsi = NFS_I(ino);
- struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
+ struct nfs_server *server = NFS_SERVER(ino);
+ struct nfs_client *clp = server->nfs_client;
struct pnfs_layout_hdr *lo;
struct pnfs_layout_segment *lseg = NULL;
bool first = false;
@@ -964,7 +976,7 @@
*/
spin_lock(&clp->cl_lock);
BUG_ON(!list_empty(&lo->plh_layouts));
- list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
+ list_add_tail(&lo->plh_layouts, &server->layouts);
spin_unlock(&clp->cl_lock);
}
@@ -973,7 +985,8 @@
arg.offset -= pg_offset;
arg.length += pg_offset;
}
- arg.length = PAGE_CACHE_ALIGN(arg.length);
+ if (arg.length != NFS4_MAX_UINT64)
+ arg.length = PAGE_CACHE_ALIGN(arg.length);
lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
if (!lseg && first) {
@@ -991,6 +1004,7 @@
spin_unlock(&ino->i_lock);
goto out;
}
+EXPORT_SYMBOL_GPL(pnfs_update_layout);
int
pnfs_layout_process(struct nfs4_layoutget *lgp)
@@ -1048,35 +1062,71 @@
goto out;
}
+void
+pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+ BUG_ON(pgio->pg_lseg != NULL);
+
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ req->wb_context,
+ req_offset(req),
+ req->wb_bytes,
+ IOMODE_READ,
+ GFP_KERNEL);
+ /* If no lseg, fall back to read through mds */
+ if (pgio->pg_lseg == NULL)
+ nfs_pageio_reset_read_mds(pgio);
+
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
+
+void
+pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+ BUG_ON(pgio->pg_lseg != NULL);
+
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ req->wb_context,
+ req_offset(req),
+ req->wb_bytes,
+ IOMODE_RW,
+ GFP_NOFS);
+ /* If no lseg, fall back to write through mds */
+ if (pgio->pg_lseg == NULL)
+ nfs_pageio_reset_write_mds(pgio);
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
+
+bool
+pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
+
+ if (ld == NULL)
+ return false;
+ nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0);
+ return true;
+}
+
+bool
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
+
+ if (ld == NULL)
+ return false;
+ nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags);
+ return true;
+}
+
bool
pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
- enum pnfs_iomode access_type;
- gfp_t gfp_flags;
-
- /* We assume that pg_ioflags == 0 iff we're reading a page */
- if (pgio->pg_ioflags == 0) {
- access_type = IOMODE_READ;
- gfp_flags = GFP_KERNEL;
- } else {
- access_type = IOMODE_RW;
- gfp_flags = GFP_NOFS;
- }
-
- if (pgio->pg_lseg == NULL) {
- if (pgio->pg_count != prev->wb_bytes)
- return true;
- /* This is first coelesce call for a series of nfs_pages */
- pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
- prev->wb_context,
- req_offset(prev),
- pgio->pg_count,
- access_type,
- gfp_flags);
- if (pgio->pg_lseg == NULL)
- return true;
- }
+ if (pgio->pg_lseg == NULL)
+ return nfs_generic_pg_test(pgio, prev, req);
/*
* Test if a nfs_page is fully contained in the pnfs_layout_range.
@@ -1120,15 +1170,30 @@
}
EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
-enum pnfs_try_status
+static void
+pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
+ struct nfs_write_data *data)
+{
+ list_splice_tail_init(&data->pages, &desc->pg_list);
+ if (data->req && list_empty(&data->req->wb_list))
+ nfs_list_add_request(data->req, &desc->pg_list);
+ nfs_pageio_reset_write_mds(desc);
+ desc->pg_recoalesce = 1;
+ nfs_writedata_release(data);
+}
+
+static enum pnfs_try_status
pnfs_try_to_write_data(struct nfs_write_data *wdata,
- const struct rpc_call_ops *call_ops, int how)
+ const struct rpc_call_ops *call_ops,
+ struct pnfs_layout_segment *lseg,
+ int how)
{
struct inode *inode = wdata->inode;
enum pnfs_try_status trypnfs;
struct nfs_server *nfss = NFS_SERVER(inode);
wdata->mds_ops = call_ops;
+ wdata->lseg = get_lseg(lseg);
dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
inode->i_ino, wdata->args.count, wdata->args.offset, how);
@@ -1144,6 +1209,44 @@
return trypnfs;
}
+static void
+pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
+{
+ struct nfs_write_data *data;
+ const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
+ struct pnfs_layout_segment *lseg = desc->pg_lseg;
+
+ desc->pg_lseg = NULL;
+ while (!list_empty(head)) {
+ enum pnfs_try_status trypnfs;
+
+ data = list_entry(head->next, struct nfs_write_data, list);
+ list_del_init(&data->list);
+
+ trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
+ if (trypnfs == PNFS_NOT_ATTEMPTED)
+ pnfs_write_through_mds(desc, data);
+ }
+ put_lseg(lseg);
+}
+
+int
+pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
+{
+ LIST_HEAD(head);
+ int ret;
+
+ ret = nfs_generic_flush(desc, &head);
+ if (ret != 0) {
+ put_lseg(desc->pg_lseg);
+ desc->pg_lseg = NULL;
+ return ret;
+ }
+ pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
+
/*
* Called by non rpc-based layout drivers
*/
@@ -1167,18 +1270,32 @@
}
EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
+static void
+pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
+ struct nfs_read_data *data)
+{
+ list_splice_tail_init(&data->pages, &desc->pg_list);
+ if (data->req && list_empty(&data->req->wb_list))
+ nfs_list_add_request(data->req, &desc->pg_list);
+ nfs_pageio_reset_read_mds(desc);
+ desc->pg_recoalesce = 1;
+ nfs_readdata_release(data);
+}
+
/*
* Call the appropriate parallel I/O subsystem read function.
*/
-enum pnfs_try_status
+static enum pnfs_try_status
pnfs_try_to_read_data(struct nfs_read_data *rdata,
- const struct rpc_call_ops *call_ops)
+ const struct rpc_call_ops *call_ops,
+ struct pnfs_layout_segment *lseg)
{
struct inode *inode = rdata->inode;
struct nfs_server *nfss = NFS_SERVER(inode);
enum pnfs_try_status trypnfs;
rdata->mds_ops = call_ops;
+ rdata->lseg = get_lseg(lseg);
dprintk("%s: Reading ino:%lu %u@%llu\n",
__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
@@ -1194,6 +1311,44 @@
return trypnfs;
}
+static void
+pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+ struct nfs_read_data *data;
+ const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
+ struct pnfs_layout_segment *lseg = desc->pg_lseg;
+
+ desc->pg_lseg = NULL;
+ while (!list_empty(head)) {
+ enum pnfs_try_status trypnfs;
+
+ data = list_entry(head->next, struct nfs_read_data, list);
+ list_del_init(&data->list);
+
+ trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
+ if (trypnfs == PNFS_NOT_ATTEMPTED)
+ pnfs_read_through_mds(desc, data);
+ }
+ put_lseg(lseg);
+}
+
+int
+pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
+{
+ LIST_HEAD(head);
+ int ret;
+
+ ret = nfs_generic_pagein(desc, &head);
+ if (ret != 0) {
+ put_lseg(desc->pg_lseg);
+ desc->pg_lseg = NULL;
+ return ret;
+ }
+ pnfs_do_multiple_reads(desc, &head);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
+
/*
* Currently there is only one (whole file) write lseg.
*/
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 96bf4e6..078670d 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -87,7 +87,8 @@
void (*free_lseg) (struct pnfs_layout_segment *lseg);
/* test for nfs page cache coalescing */
- bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+ const struct nfs_pageio_ops *pg_read_ops;
+ const struct nfs_pageio_ops *pg_write_ops;
/* Returns true if layoutdriver wants to divert this request to
* driver's commit routine.
@@ -148,16 +149,16 @@
/* pnfs.c */
void get_layout_hdr(struct pnfs_layout_hdr *lo);
void put_lseg(struct pnfs_layout_segment *lseg);
-struct pnfs_layout_segment *
-pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
- loff_t pos, u64 count, enum pnfs_iomode access_type,
- gfp_t gfp_flags);
+
+bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
+bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int);
+
void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
void unset_pnfs_layoutdriver(struct nfs_server *);
-enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
- const struct rpc_call_ops *, int);
-enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
- const struct rpc_call_ops *);
+void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
+int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
+void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *);
+int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
int pnfs_layout_process(struct nfs4_layoutget *lgp);
void pnfs_free_lseg_list(struct list_head *tmp_list);
@@ -182,6 +183,19 @@
int _pnfs_return_layout(struct inode *);
int pnfs_ld_write_done(struct nfs_write_data *);
int pnfs_ld_read_done(struct nfs_read_data *);
+struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
+ struct nfs_open_context *ctx,
+ loff_t pos,
+ u64 count,
+ enum pnfs_iomode iomode,
+ gfp_t gfp_flags);
+
+void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
+
+/* nfs4_deviceid_flags */
+enum {
+ NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */
+};
/* pnfs_dev.c */
struct nfs4_deviceid_node {
@@ -189,13 +203,13 @@
struct hlist_node tmpnode;
const struct pnfs_layoutdriver_type *ld;
const struct nfs_client *nfs_client;
+ unsigned long flags;
struct nfs4_deviceid deviceid;
atomic_t ref;
};
void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
-struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
const struct pnfs_layoutdriver_type *,
@@ -293,15 +307,6 @@
return 0;
}
-static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
- struct inode *inode)
-{
- struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
-
- if (ld)
- pgio->pg_test = ld->pg_test;
-}
-
#else /* CONFIG_NFS_V4_1 */
static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -322,28 +327,6 @@
{
}
-static inline struct pnfs_layout_segment *
-pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
- loff_t pos, u64 count, enum pnfs_iomode access_type,
- gfp_t gfp_flags)
-{
- return NULL;
-}
-
-static inline enum pnfs_try_status
-pnfs_try_to_read_data(struct nfs_read_data *data,
- const struct rpc_call_ops *call_ops)
-{
- return PNFS_NOT_ATTEMPTED;
-}
-
-static inline enum pnfs_try_status
-pnfs_try_to_write_data(struct nfs_write_data *data,
- const struct rpc_call_ops *call_ops, int how)
-{
- return PNFS_NOT_ATTEMPTED;
-}
-
static inline int pnfs_return_layout(struct inode *ino)
{
return 0;
@@ -385,9 +368,14 @@
{
}
-static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
- struct inode *inode)
+static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
{
+ return false;
+}
+
+static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
+{
+ return false;
}
static inline void
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index f0f8e1e..6fda522 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -100,8 +100,8 @@
rcu_read_lock();
d = _lookup_deviceid(ld, clp, id, hash);
- if (d && !atomic_inc_not_zero(&d->ref))
- d = NULL;
+ if (d != NULL)
+ atomic_inc(&d->ref);
rcu_read_unlock();
return d;
}
@@ -115,15 +115,15 @@
EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
/*
- * Unhash and put deviceid
+ * Remove a deviceid from cache
*
* @clp nfs_client associated with deviceid
* @id the deviceid to unhash
*
* @ret the unhashed node, if found and dereferenced to zero, NULL otherwise.
*/
-struct nfs4_deviceid_node *
-nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld,
+void
+nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld,
const struct nfs_client *clp, const struct nfs4_deviceid *id)
{
struct nfs4_deviceid_node *d;
@@ -134,7 +134,7 @@
rcu_read_unlock();
if (!d) {
spin_unlock(&nfs4_deviceid_lock);
- return NULL;
+ return;
}
hlist_del_init_rcu(&d->node);
spin_unlock(&nfs4_deviceid_lock);
@@ -142,28 +142,7 @@
/* balance the initial ref set in pnfs_insert_deviceid */
if (atomic_dec_and_test(&d->ref))
- return d;
-
- return NULL;
-}
-EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid);
-
-/*
- * Delete a deviceid from cache
- *
- * @clp struct nfs_client qualifying the deviceid
- * @id deviceid to delete
- */
-void
-nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld,
- const struct nfs_client *clp, const struct nfs4_deviceid *id)
-{
- struct nfs4_deviceid_node *d;
-
- d = nfs4_unhash_put_deviceid(ld, clp, id);
- if (!d)
- return;
- d->ld->free_deviceid_node(d);
+ d->ld->free_deviceid_node(d);
}
EXPORT_SYMBOL_GPL(nfs4_delete_deviceid);
@@ -177,6 +156,7 @@
INIT_HLIST_NODE(&d->tmpnode);
d->ld = ld;
d->nfs_client = nfs_client;
+ d->flags = 0;
d->deviceid = *id;
atomic_set(&d->ref, 1);
}
@@ -221,16 +201,15 @@
*
* @d deviceid node to put
*
- * @ret true iff the node was deleted
+ * return true iff the node was deleted
+ * Note that since the test for d->ref == 0 is sufficient to establish
+ * that the node is no longer hashed in the global device id cache.
*/
bool
nfs4_put_deviceid_node(struct nfs4_deviceid_node *d)
{
- if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock))
+ if (!atomic_dec_and_test(&d->ref))
return false;
- hlist_del_init_rcu(&d->node);
- spin_unlock(&nfs4_deviceid_lock);
- synchronize_rcu();
d->ld->free_deviceid_node(d);
return true;
}
@@ -275,3 +254,22 @@
for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++)
_deviceid_purge_client(clp, h);
}
+
+/*
+ * Stop use of all deviceids associated with an nfs_client
+ */
+void
+nfs4_deviceid_mark_client_invalid(struct nfs_client *clp)
+{
+ struct nfs4_deviceid_node *d;
+ struct hlist_node *n;
+ int i;
+
+ rcu_read_lock();
+ for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){
+ hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node)
+ if (d->nfs_client == clp)
+ set_bit(NFS_DEVICEID_INVALID, &d->flags);
+ }
+ rcu_read_unlock();
+}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index a68679f..2171c04 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -30,8 +30,7 @@
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc);
-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
+static const struct nfs_pageio_ops nfs_pageio_read_ops;
static const struct rpc_call_ops nfs_read_partial_ops;
static const struct rpc_call_ops nfs_read_full_ops;
@@ -68,7 +67,7 @@
mempool_free(p, nfs_rdata_mempool);
}
-static void nfs_readdata_release(struct nfs_read_data *rdata)
+void nfs_readdata_release(struct nfs_read_data *rdata)
{
put_lseg(rdata->lseg);
put_nfs_open_context(rdata->args.context);
@@ -113,6 +112,27 @@
}
}
+static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode)
+{
+ nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
+ NFS_SERVER(inode)->rsize, 0);
+}
+
+void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
+{
+ pgio->pg_ops = &nfs_pageio_read_ops;
+ pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
+}
+EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
+
+static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode)
+{
+ if (!pnfs_pageio_init_read(pgio, inode))
+ nfs_pageio_init_read_mds(pgio, inode);
+}
+
int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
struct page *page)
{
@@ -131,14 +151,9 @@
if (len < PAGE_CACHE_SIZE)
zero_user_segment(page, len, PAGE_CACHE_SIZE);
- nfs_pageio_init(&pgio, inode, NULL, 0, 0);
- nfs_list_add_request(new, &pgio.pg_list);
- pgio.pg_count = len;
-
- if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
- nfs_pagein_multi(&pgio);
- else
- nfs_pagein_one(&pgio);
+ nfs_pageio_init_read(&pgio, inode);
+ nfs_pageio_add_request(&pgio, new);
+ nfs_pageio_complete(&pgio);
return 0;
}
@@ -202,17 +217,14 @@
/*
* Set up the NFS read request struct
*/
-static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
- const struct rpc_call_ops *call_ops,
- unsigned int count, unsigned int offset,
- struct pnfs_layout_segment *lseg)
+static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+ unsigned int count, unsigned int offset)
{
struct inode *inode = req->wb_context->dentry->d_inode;
data->req = req;
data->inode = inode;
data->cred = req->wb_context->cred;
- data->lseg = get_lseg(lseg);
data->args.fh = NFS_FH(inode);
data->args.offset = req_offset(req) + offset;
@@ -226,14 +238,36 @@
data->res.count = count;
data->res.eof = 0;
nfs_fattr_init(&data->fattr);
+}
- if (data->lseg &&
- (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED))
- return 0;
+static int nfs_do_read(struct nfs_read_data *data,
+ const struct rpc_call_ops *call_ops)
+{
+ struct inode *inode = data->args.context->dentry->d_inode;
return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
}
+static int
+nfs_do_multiple_reads(struct list_head *head,
+ const struct rpc_call_ops *call_ops)
+{
+ struct nfs_read_data *data;
+ int ret = 0;
+
+ while (!list_empty(head)) {
+ int ret2;
+
+ data = list_entry(head->next, struct nfs_read_data, list);
+ list_del_init(&data->list);
+
+ ret2 = nfs_do_read(data, call_ops);
+ if (ret == 0)
+ ret = ret2;
+ }
+ return ret;
+}
+
static void
nfs_async_read_error(struct list_head *head)
{
@@ -260,20 +294,19 @@
* won't see the new data until our attribute cache is updated. This is more
* or less conventional NFS client behavior.
*/
-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
+static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
{
struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
struct page *page = req->wb_page;
struct nfs_read_data *data;
- size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes;
+ size_t rsize = desc->pg_bsize, nbytes;
unsigned int offset;
int requests = 0;
int ret = 0;
- struct pnfs_layout_segment *lseg;
- LIST_HEAD(list);
nfs_list_remove_request(req);
+ offset = 0;
nbytes = desc->pg_count;
do {
size_t len = min(nbytes,rsize);
@@ -281,45 +314,21 @@
data = nfs_readdata_alloc(1);
if (!data)
goto out_bad;
- list_add(&data->pages, &list);
+ data->pagevec[0] = page;
+ nfs_read_rpcsetup(req, data, len, offset);
+ list_add(&data->list, res);
requests++;
nbytes -= len;
+ offset += len;
} while(nbytes != 0);
atomic_set(&req->wb_complete, requests);
-
- BUG_ON(desc->pg_lseg != NULL);
- lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
- req_offset(req), desc->pg_count,
- IOMODE_READ, GFP_KERNEL);
ClearPageError(page);
- offset = 0;
- nbytes = desc->pg_count;
- do {
- int ret2;
-
- data = list_entry(list.next, struct nfs_read_data, pages);
- list_del_init(&data->pages);
-
- data->pagevec[0] = page;
-
- if (nbytes < rsize)
- rsize = nbytes;
- ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
- rsize, offset, lseg);
- if (ret == 0)
- ret = ret2;
- offset += rsize;
- nbytes -= rsize;
- } while (nbytes != 0);
- put_lseg(lseg);
- desc->pg_lseg = NULL;
-
+ desc->pg_rpc_callops = &nfs_read_partial_ops;
return ret;
-
out_bad:
- while (!list_empty(&list)) {
- data = list_entry(list.next, struct nfs_read_data, pages);
- list_del(&data->pages);
+ while (!list_empty(res)) {
+ data = list_entry(res->next, struct nfs_read_data, list);
+ list_del(&data->list);
nfs_readdata_free(data);
}
SetPageError(page);
@@ -327,19 +336,19 @@
return -ENOMEM;
}
-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
+static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
{
struct nfs_page *req;
struct page **pages;
struct nfs_read_data *data;
struct list_head *head = &desc->pg_list;
- struct pnfs_layout_segment *lseg = desc->pg_lseg;
- int ret = -ENOMEM;
+ int ret = 0;
data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
desc->pg_count));
if (!data) {
nfs_async_read_error(head);
+ ret = -ENOMEM;
goto out;
}
@@ -352,19 +361,37 @@
*pages++ = req->wb_page;
}
req = nfs_list_entry(data->pages.next);
- if ((!lseg) && list_is_singular(&data->pages))
- lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
- req_offset(req), desc->pg_count,
- IOMODE_READ, GFP_KERNEL);
- ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
- 0, lseg);
+ nfs_read_rpcsetup(req, data, desc->pg_count, 0);
+ list_add(&data->list, res);
+ desc->pg_rpc_callops = &nfs_read_full_ops;
out:
- put_lseg(lseg);
- desc->pg_lseg = NULL;
return ret;
}
+int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+ if (desc->pg_bsize < PAGE_CACHE_SIZE)
+ return nfs_pagein_multi(desc, head);
+ return nfs_pagein_one(desc, head);
+}
+
+static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
+{
+ LIST_HEAD(head);
+ int ret;
+
+ ret = nfs_generic_pagein(desc, &head);
+ if (ret == 0)
+ ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops);
+ return ret;
+}
+
+static const struct nfs_pageio_ops nfs_pageio_read_ops = {
+ .pg_test = nfs_generic_pg_test,
+ .pg_doio = nfs_generic_pg_readpages,
+};
+
/*
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
@@ -635,8 +662,6 @@
.pgio = &pgio,
};
struct inode *inode = mapping->host;
- struct nfs_server *server = NFS_SERVER(inode);
- size_t rsize = server->rsize;
unsigned long npages;
int ret = -ESTALE;
@@ -664,10 +689,7 @@
if (ret == 0)
goto read_complete; /* all pages were read */
- if (rsize < PAGE_CACHE_SIZE)
- nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
- else
- nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0);
+ nfs_pageio_init_read(&pgio, inode);
ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 8d6864c..b2fbbde 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -147,7 +147,7 @@
alias = d_lookup(parent, &data->args.name);
if (alias != NULL) {
- int ret = 0;
+ int ret;
void *devname_garbage = NULL;
/*
@@ -155,14 +155,16 @@
* the sillyrename information to the aliased dentry.
*/
nfs_free_dname(data);
+ ret = nfs_copy_dname(alias, data);
spin_lock(&alias->d_lock);
- if (alias->d_inode != NULL &&
+ if (ret == 0 && alias->d_inode != NULL &&
!(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
devname_garbage = alias->d_fsdata;
alias->d_fsdata = data;
alias->d_flags |= DCACHE_NFSFS_RENAMED;
ret = 1;
- }
+ } else
+ ret = 0;
spin_unlock(&alias->d_lock);
nfs_dec_sillycount(dir);
dput(alias);
@@ -171,8 +173,7 @@
* point dentry is definitely not a root, so we won't need
* that anymore.
*/
- if (devname_garbage)
- kfree(devname_garbage);
+ kfree(devname_garbage);
return ret;
}
data->dir = igrab(dir);
@@ -204,8 +205,6 @@
if (parent == NULL)
goto out_free;
dir = parent->d_inode;
- if (nfs_copy_dname(dentry, data) != 0)
- goto out_dput;
/* Non-exclusive lock protects against concurrent lookup() calls */
spin_lock(&dir->i_lock);
if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) {
@@ -366,6 +365,8 @@
struct nfs_renamedata *data = calldata;
struct inode *old_dir = data->old_dir;
struct inode *new_dir = data->new_dir;
+ struct dentry *old_dentry = data->old_dentry;
+ struct dentry *new_dentry = data->new_dentry;
if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) {
nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client);
@@ -373,12 +374,12 @@
}
if (task->tk_status != 0) {
- nfs_cancel_async_unlink(data->old_dentry);
+ nfs_cancel_async_unlink(old_dentry);
return;
}
- nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir));
- d_move(data->old_dentry, data->new_dentry);
+ d_drop(old_dentry);
+ d_drop(new_dentry);
}
/**
@@ -501,6 +502,14 @@
* and only performs the unlink once the last reference to it is put.
*
* The final cleanup is done during dentry_iput.
+ *
+ * (Note: NFSv4 is stateful, and has opens, so in theory an NFSv4 server
+ * could take responsibility for keeping open files referenced. The server
+ * would also need to ensure that opened-but-deleted files were kept over
+ * reboots. However, we may not assume a server does so. (RFC 5661
+ * does provide an OPEN4_RESULT_PRESERVE_UNLINKED flag that a server can
+ * use to advertise that it does this; some day we may take advantage of
+ * it.))
*/
int
nfs_sillyrename(struct inode *dir, struct dentry *dentry)
@@ -560,6 +569,14 @@
if (error)
goto out_dput;
+ /* populate unlinkdata with the right dname */
+ error = nfs_copy_dname(sdentry,
+ (struct nfs_unlinkdata *)dentry->d_fsdata);
+ if (error) {
+ nfs_cancel_async_unlink(dentry);
+ goto out_dput;
+ }
+
/* run the rename task, undo unlink if it fails */
task = nfs_async_rename(dir, dir, dentry, sdentry);
if (IS_ERR(task)) {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 00e3750..b39b37f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -97,7 +97,7 @@
mempool_free(p, nfs_wdata_mempool);
}
-static void nfs_writedata_release(struct nfs_write_data *wdata)
+void nfs_writedata_release(struct nfs_write_data *wdata)
{
put_lseg(wdata->lseg);
put_nfs_open_context(wdata->args.context);
@@ -845,11 +845,9 @@
/*
* Set up the argument/result storage required for the RPC call.
*/
-static int nfs_write_rpcsetup(struct nfs_page *req,
+static void nfs_write_rpcsetup(struct nfs_page *req,
struct nfs_write_data *data,
- const struct rpc_call_ops *call_ops,
unsigned int count, unsigned int offset,
- struct pnfs_layout_segment *lseg,
int how)
{
struct inode *inode = req->wb_context->dentry->d_inode;
@@ -860,7 +858,6 @@
data->req = req;
data->inode = inode = req->wb_context->dentry->d_inode;
data->cred = req->wb_context->cred;
- data->lseg = get_lseg(lseg);
data->args.fh = NFS_FH(inode);
data->args.offset = req_offset(req) + offset;
@@ -872,24 +869,51 @@
data->args.context = get_nfs_open_context(req->wb_context);
data->args.lock_context = req->wb_lock_context;
data->args.stable = NFS_UNSTABLE;
- if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
- data->args.stable = NFS_DATA_SYNC;
- if (!nfs_need_commit(NFS_I(inode)))
- data->args.stable = NFS_FILE_SYNC;
+ switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
+ case 0:
+ break;
+ case FLUSH_COND_STABLE:
+ if (nfs_need_commit(NFS_I(inode)))
+ break;
+ default:
+ data->args.stable = NFS_FILE_SYNC;
}
data->res.fattr = &data->fattr;
data->res.count = count;
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
+}
- if (data->lseg &&
- (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
- return 0;
+static int nfs_do_write(struct nfs_write_data *data,
+ const struct rpc_call_ops *call_ops,
+ int how)
+{
+ struct inode *inode = data->args.context->dentry->d_inode;
return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
}
+static int nfs_do_multiple_writes(struct list_head *head,
+ const struct rpc_call_ops *call_ops,
+ int how)
+{
+ struct nfs_write_data *data;
+ int ret = 0;
+
+ while (!list_empty(head)) {
+ int ret2;
+
+ data = list_entry(head->next, struct nfs_write_data, list);
+ list_del_init(&data->list);
+
+ ret2 = nfs_do_write(data, call_ops, how);
+ if (ret == 0)
+ ret = ret2;
+ }
+ return ret;
+}
+
/* If a nfs_flush_* function fails, it should remove reqs from @head and
* call this on each, which will prepare them to be retried on next
* writeback using standard nfs.
@@ -907,17 +931,15 @@
* Generate multiple small requests to write out a single
* contiguous dirty area on one page.
*/
-static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
+static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
{
struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
struct page *page = req->wb_page;
struct nfs_write_data *data;
- size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes;
+ size_t wsize = desc->pg_bsize, nbytes;
unsigned int offset;
int requests = 0;
int ret = 0;
- struct pnfs_layout_segment *lseg;
- LIST_HEAD(list);
nfs_list_remove_request(req);
@@ -927,6 +949,7 @@
desc->pg_ioflags &= ~FLUSH_COND_STABLE;
+ offset = 0;
nbytes = desc->pg_count;
do {
size_t len = min(nbytes, wsize);
@@ -934,45 +957,21 @@
data = nfs_writedata_alloc(1);
if (!data)
goto out_bad;
- list_add(&data->pages, &list);
+ data->pagevec[0] = page;
+ nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags);
+ list_add(&data->list, res);
requests++;
nbytes -= len;
+ offset += len;
} while (nbytes != 0);
atomic_set(&req->wb_complete, requests);
-
- BUG_ON(desc->pg_lseg);
- lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
- req_offset(req), desc->pg_count,
- IOMODE_RW, GFP_NOFS);
- ClearPageError(page);
- offset = 0;
- nbytes = desc->pg_count;
- do {
- int ret2;
-
- data = list_entry(list.next, struct nfs_write_data, pages);
- list_del_init(&data->pages);
-
- data->pagevec[0] = page;
-
- if (nbytes < wsize)
- wsize = nbytes;
- ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
- wsize, offset, lseg, desc->pg_ioflags);
- if (ret == 0)
- ret = ret2;
- offset += wsize;
- nbytes -= wsize;
- } while (nbytes != 0);
-
- put_lseg(lseg);
- desc->pg_lseg = NULL;
+ desc->pg_rpc_callops = &nfs_write_partial_ops;
return ret;
out_bad:
- while (!list_empty(&list)) {
- data = list_entry(list.next, struct nfs_write_data, pages);
- list_del(&data->pages);
+ while (!list_empty(res)) {
+ data = list_entry(res->next, struct nfs_write_data, list);
+ list_del(&data->list);
nfs_writedata_free(data);
}
nfs_redirty_request(req);
@@ -987,14 +986,13 @@
* This is the case if nfs_updatepage detects a conflicting request
* that has been written but not committed.
*/
-static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
+static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
{
struct nfs_page *req;
struct page **pages;
struct nfs_write_data *data;
struct list_head *head = &desc->pg_list;
- struct pnfs_layout_segment *lseg = desc->pg_lseg;
- int ret;
+ int ret = 0;
data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
desc->pg_count));
@@ -1016,32 +1014,62 @@
*pages++ = req->wb_page;
}
req = nfs_list_entry(data->pages.next);
- if ((!lseg) && list_is_singular(&data->pages))
- lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
- req_offset(req), desc->pg_count,
- IOMODE_RW, GFP_NOFS);
if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
(desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
desc->pg_ioflags &= ~FLUSH_COND_STABLE;
/* Set up the argument struct */
- ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
+ nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags);
+ list_add(&data->list, res);
+ desc->pg_rpc_callops = &nfs_write_full_ops;
out:
- put_lseg(lseg); /* Cleans any gotten in ->pg_test */
- desc->pg_lseg = NULL;
return ret;
}
+int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+ if (desc->pg_bsize < PAGE_CACHE_SIZE)
+ return nfs_flush_multi(desc, head);
+ return nfs_flush_one(desc, head);
+}
+
+static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
+{
+ LIST_HEAD(head);
+ int ret;
+
+ ret = nfs_generic_flush(desc, &head);
+ if (ret == 0)
+ ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops,
+ desc->pg_ioflags);
+ return ret;
+}
+
+static const struct nfs_pageio_ops nfs_pageio_write_ops = {
+ .pg_test = nfs_generic_pg_test,
+ .pg_doio = nfs_generic_pg_writepages,
+};
+
+static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode, int ioflags)
+{
+ nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops,
+ NFS_SERVER(inode)->wsize, ioflags);
+}
+
+void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
+{
+ pgio->pg_ops = &nfs_pageio_write_ops;
+ pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
+}
+EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
+
static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags)
{
- size_t wsize = NFS_SERVER(inode)->wsize;
-
- if (wsize < PAGE_CACHE_SIZE)
- nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
- else
- nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags);
+ if (!pnfs_pageio_init_write(pgio, inode, ioflags))
+ nfs_pageio_init_write_mds(pgio, inode, ioflags);
}
/*
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 3b8d397..98e5442 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -93,7 +93,7 @@
memset(bh->b_data, 0, sizeof(struct omfs_inode));
- if (inode->i_mode & S_IFDIR) {
+ if (S_ISDIR(inode->i_mode)) {
memset(&bh->b_data[OMFS_DIR_START], 0xff,
sbi->s_sys_blocksize - OMFS_DIR_START);
} else
diff --git a/fs/open.c b/fs/open.c
index 739b751..f711921 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -446,74 +446,52 @@
return error;
}
-SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
+static int chmod_common(struct path *path, umode_t mode)
{
- struct inode * inode;
- struct dentry * dentry;
- struct file * file;
- int err = -EBADF;
+ struct inode *inode = path->dentry->d_inode;
struct iattr newattrs;
+ int error;
- file = fget(fd);
- if (!file)
- goto out;
-
- dentry = file->f_path.dentry;
- inode = dentry->d_inode;
-
- audit_inode(NULL, dentry);
-
- err = mnt_want_write_file(file);
- if (err)
- goto out_putf;
+ error = mnt_want_write(path->mnt);
+ if (error)
+ return error;
mutex_lock(&inode->i_mutex);
- err = security_path_chmod(dentry, file->f_vfsmnt, mode);
- if (err)
+ error = security_path_chmod(path->dentry, path->mnt, mode);
+ if (error)
goto out_unlock;
- if (mode == (mode_t) -1)
- mode = inode->i_mode;
newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
- err = notify_change(dentry, &newattrs);
+ error = notify_change(path->dentry, &newattrs);
out_unlock:
mutex_unlock(&inode->i_mutex);
- mnt_drop_write(file->f_path.mnt);
-out_putf:
- fput(file);
-out:
+ mnt_drop_write(path->mnt);
+ return error;
+}
+
+SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
+{
+ struct file * file;
+ int err = -EBADF;
+
+ file = fget(fd);
+ if (file) {
+ audit_inode(NULL, file->f_path.dentry);
+ err = chmod_common(&file->f_path, mode);
+ fput(file);
+ }
return err;
}
SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
{
struct path path;
- struct inode *inode;
int error;
- struct iattr newattrs;
error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
- if (error)
- goto out;
- inode = path.dentry->d_inode;
-
- error = mnt_want_write(path.mnt);
- if (error)
- goto dput_and_out;
- mutex_lock(&inode->i_mutex);
- error = security_path_chmod(path.dentry, path.mnt, mode);
- if (error)
- goto out_unlock;
- if (mode == (mode_t) -1)
- mode = inode->i_mode;
- newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
- error = notify_change(path.dentry, &newattrs);
-out_unlock:
- mutex_unlock(&inode->i_mutex);
- mnt_drop_write(path.mnt);
-dput_and_out:
- path_put(&path);
-out:
+ if (!error) {
+ error = chmod_common(&path, mode);
+ path_put(&path);
+ }
return error;
}
diff --git a/fs/pipe.c b/fs/pipe.c
index 1b7f9af..0e0be1d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -948,7 +948,7 @@
static struct inode * get_pipe_inode(void)
{
- struct inode *inode = new_inode(pipe_mnt->mnt_sb);
+ struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
struct pipe_inode_info *pipe;
if (!inode)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index f1637f1..9d99131 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -620,8 +620,7 @@
if (!ent) goto out;
memset(ent, 0, sizeof(struct proc_dir_entry));
- memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1);
- ent->name = ((char *) ent) + sizeof(*ent);
+ memcpy(ent->name, fn, len + 1);
ent->namelen = len;
ent->mode = mode;
ent->nlink = nlink;
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 9020ac1..f738024 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -197,15 +197,15 @@
int err;
err = -ENOMEM;
- netd = kzalloc(sizeof(*netd), GFP_KERNEL);
+ netd = kzalloc(sizeof(*netd) + 4, GFP_KERNEL);
if (!netd)
goto out;
netd->data = net;
netd->nlink = 2;
- netd->name = "net";
netd->namelen = 3;
netd->parent = &proc_root;
+ memcpy(netd->name, "net", 4);
err = -EEXIST;
net_statd = proc_net_mkdir(net, "stat", netd);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index d6c3b41..9a8a2b7 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -186,13 +186,13 @@
struct proc_dir_entry proc_root = {
.low_ino = PROC_ROOT_INO,
.namelen = 5,
- .name = "/proc",
.mode = S_IFDIR | S_IRUGO | S_IXUGO,
.nlink = 2,
.count = ATOMIC_INIT(1),
.proc_iops = &proc_root_inode_operations,
.proc_fops = &proc_root_operations,
.parent = &proc_root,
+ .name = "/proc",
};
int pid_ns_prepare_proc(struct pid_namespace *ns)
diff --git a/fs/read_write.c b/fs/read_write.c
index 5907b49..179f1c3 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -166,8 +166,10 @@
* long as offset isn't at the end of the file then the
* offset is data.
*/
- if (offset >= inode->i_size)
- return -ENXIO;
+ if (offset >= inode->i_size) {
+ retval = -ENXIO;
+ goto out;
+ }
break;
case SEEK_HOLE:
/*
@@ -175,8 +177,10 @@
* as long as offset isn't i_size or larger, return
* i_size.
*/
- if (offset >= inode->i_size)
- return -ENXIO;
+ if (offset >= inode->i_size) {
+ retval = -ENXIO;
+ goto out;
+ }
offset = inode->i_size;
break;
}
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index b2b4119..d1fe745 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1224,6 +1224,9 @@
rw = READ;
}
+ /* we only use the buffer cache for meta-data */
+ rw |= REQ_META;
+
next_chunk:
atomic_inc(&bp->b_io_remaining);
nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 825390e..7f7b424 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -149,7 +149,9 @@
xfs_iflags_clear(ip, XFS_ITRUNCATED);
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
xfs_ioend_wait(ip);
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
if (mp->m_flags & XFS_MOUNT_BARRIER) {
/*
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index acca2c5..f7ce7de 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -265,7 +265,7 @@
return PTR_ERR(filp);
}
- if (inode->i_mode & S_IFREG) {
+ if (S_ISREG(inode->i_mode)) {
filp->f_flags |= O_NOATIME;
filp->f_mode |= FMODE_NOCMTIME;
}
@@ -850,14 +850,14 @@
di_flags |= XFS_DIFLAG_NODEFRAG;
if (xflags & XFS_XFLAG_FILESTREAM)
di_flags |= XFS_DIFLAG_FILESTREAM;
- if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+ if (S_ISDIR(ip->i_d.di_mode)) {
if (xflags & XFS_XFLAG_RTINHERIT)
di_flags |= XFS_DIFLAG_RTINHERIT;
if (xflags & XFS_XFLAG_NOSYMLINKS)
di_flags |= XFS_DIFLAG_NOSYMLINKS;
if (xflags & XFS_XFLAG_EXTSZINHERIT)
di_flags |= XFS_DIFLAG_EXTSZINHERIT;
- } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
+ } else if (S_ISREG(ip->i_d.di_mode)) {
if (xflags & XFS_XFLAG_REALTIME)
di_flags |= XFS_DIFLAG_REALTIME;
if (xflags & XFS_XFLAG_EXTSIZE)
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 6544c32..b9c172b 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -1194,9 +1194,14 @@
break;
}
- /* if there is no attribute fork no ACL can exist on this inode */
- if (!XFS_IFORK_Q(ip))
+ /*
+ * If there is no attribute fork no ACL can exist on this inode,
+ * and it can't have any file capabilities attached to it either.
+ */
+ if (!XFS_IFORK_Q(ip)) {
+ inode_has_no_xattr(inode);
cache_no_acl(inode);
+ }
xfs_iflags_clear(ip, XFS_INEW);
barrier();
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index c51a3f9..ab3e5c6 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -414,7 +414,7 @@
if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
return 0;
- if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+ if (S_ISDIR(ip->i_d.di_mode)) {
mp = ip->i_mount;
memset(&dargs, 0, sizeof(dargs));
dargs.dp = ip;
@@ -3344,8 +3344,7 @@
* We don't want to deal with the case of keeping inode data inline yet.
* So sending the data fork of a regular inode is invalid.
*/
- ASSERT(!((ip->i_d.di_mode & S_IFMT) == S_IFREG &&
- whichfork == XFS_DATA_FORK));
+ ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
flags = 0;
@@ -4052,7 +4051,7 @@
#ifndef DEBUG
if (whichfork == XFS_DATA_FORK) {
- return ((ip->i_d.di_mode & S_IFMT) == S_IFREG) ?
+ return S_ISREG(ip->i_d.di_mode) ?
(ip->i_size == ip->i_mount->m_sb.sb_blocksize) :
(ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize);
}
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 2925726..5bfcb87 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -692,6 +692,24 @@
return(error);
}
+#ifdef DEBUG
+static void
+xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level)
+{
+ __be16 magic = blkinfo->magic;
+
+ if (level == 1) {
+ ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
+ magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+ } else
+ ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+ ASSERT(!blkinfo->forw);
+ ASSERT(!blkinfo->back);
+}
+#else /* !DEBUG */
+#define xfs_da_blkinfo_onlychild_validate(blkinfo, level)
+#endif /* !DEBUG */
+
/*
* We have only one entry in the root. Copy the only remaining child of
* the old root to block 0 as the new root node.
@@ -700,8 +718,6 @@
xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
{
xfs_da_intnode_t *oldroot;
- /* REFERENCED */
- xfs_da_blkinfo_t *blkinfo;
xfs_da_args_t *args;
xfs_dablk_t child;
xfs_dabuf_t *bp;
@@ -732,15 +748,9 @@
if (error)
return(error);
ASSERT(bp != NULL);
- blkinfo = bp->data;
- if (be16_to_cpu(oldroot->hdr.level) == 1) {
- ASSERT(blkinfo->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
- blkinfo->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
- } else {
- ASSERT(blkinfo->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
- }
- ASSERT(!blkinfo->forw);
- ASSERT(!blkinfo->back);
+ xfs_da_blkinfo_onlychild_validate(bp->data,
+ be16_to_cpu(oldroot->hdr.level));
+
memcpy(root_blk->bp->data, bp->data, state->blocksize);
xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
error = xfs_da_shrink_inode(args, child, bp);
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 4580ce0..a2e2701 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -121,7 +121,7 @@
{
xfs_dir2_sf_hdr_t *sfp;
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
if (dp->i_d.di_size == 0) /* might happen during shutdown. */
return 1;
if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
@@ -179,7 +179,7 @@
memset((char *)&args, 0, sizeof(args));
args.dp = dp;
args.trans = tp;
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino)))
return error;
return xfs_dir2_sf_create(&args, pdp->i_ino);
@@ -202,7 +202,7 @@
int rval;
int v; /* type-checking value */
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
return rval;
XFS_STATS_INC(xs_dir_create);
@@ -278,7 +278,7 @@
int rval;
int v; /* type-checking value */
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_lookup);
memset(&args, 0, sizeof(xfs_da_args_t));
@@ -333,7 +333,7 @@
int rval;
int v; /* type-checking value */
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_remove);
memset(&args, 0, sizeof(xfs_da_args_t));
@@ -382,7 +382,7 @@
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
return XFS_ERROR(EIO);
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_getdents);
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
@@ -414,7 +414,7 @@
int rval;
int v; /* type-checking value */
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
return rval;
@@ -464,7 +464,7 @@
if (resblks)
return 0;
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
memset(&args, 0, sizeof(xfs_da_args_t));
args.name = name->name;
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 9124425..3ff3d9e 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -344,9 +344,9 @@
* Either ip is a regular file and pip is a directory, or ip is a
* directory and pip is NULL.
*/
- ASSERT(ip && (((ip->i_d.di_mode & S_IFREG) && pip &&
- (pip->i_d.di_mode & S_IFDIR)) ||
- ((ip->i_d.di_mode & S_IFDIR) && !pip)));
+ ASSERT(ip && ((S_ISREG(ip->i_d.di_mode) && pip &&
+ S_ISDIR(pip->i_d.di_mode)) ||
+ (S_ISDIR(ip->i_d.di_mode) && !pip)));
mp = ip->i_mount;
cache = mp->m_filestream;
@@ -537,7 +537,7 @@
xfs_agnumber_t ag;
int ref;
- if (!(ip->i_d.di_mode & (S_IFREG | S_IFDIR))) {
+ if (!S_ISREG(ip->i_d.di_mode) && !S_ISDIR(ip->i_d.di_mode)) {
ASSERT(0);
return NULLAGNUMBER;
}
@@ -579,9 +579,9 @@
xfs_agnumber_t ag, rotorstep, startag;
int err = 0;
- ASSERT(pip->i_d.di_mode & S_IFDIR);
- ASSERT(ip->i_d.di_mode & S_IFREG);
- if (!(pip->i_d.di_mode & S_IFDIR) || !(ip->i_d.di_mode & S_IFREG))
+ ASSERT(S_ISDIR(pip->i_d.di_mode));
+ ASSERT(S_ISREG(ip->i_d.di_mode));
+ if (!S_ISDIR(pip->i_d.di_mode) || !S_ISREG(ip->i_d.di_mode))
return -EINVAL;
mp = pip->i_mount;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 3cc21dd..2fcca4b 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -368,7 +368,7 @@
/*
* no local regular files yet
*/
- if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) {
+ if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
xfs_warn(ip->i_mount,
"corrupt inode %Lu (local format for regular file).",
(unsigned long long) ip->i_ino);
@@ -1040,7 +1040,7 @@
if (pip && XFS_INHERIT_GID(pip)) {
ip->i_d.di_gid = pip->i_d.di_gid;
- if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) {
+ if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) {
ip->i_d.di_mode |= S_ISGID;
}
}
@@ -1097,14 +1097,14 @@
if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
uint di_flags = 0;
- if ((mode & S_IFMT) == S_IFDIR) {
+ if (S_ISDIR(mode)) {
if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
di_flags |= XFS_DIFLAG_RTINHERIT;
if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
di_flags |= XFS_DIFLAG_EXTSZINHERIT;
ip->i_d.di_extsize = pip->i_d.di_extsize;
}
- } else if ((mode & S_IFMT) == S_IFREG) {
+ } else if (S_ISREG(mode)) {
if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
di_flags |= XFS_DIFLAG_REALTIME;
if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
@@ -1188,7 +1188,7 @@
int nimaps;
xfs_bmbt_irec_t imaps[2];
- if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
+ if (!S_ISREG(ip->i_d.di_mode))
return;
if (XFS_IS_REALTIME_INODE(ip))
@@ -1828,7 +1828,7 @@
ASSERT(ip->i_d.di_nextents == 0);
ASSERT(ip->i_d.di_anextents == 0);
ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) ||
- ((ip->i_d.di_mode & S_IFMT) != S_IFREG));
+ (!S_ISREG(ip->i_d.di_mode)));
ASSERT(ip->i_d.di_nblocks == 0);
/*
@@ -2671,7 +2671,7 @@
__func__, ip->i_ino, ip, ip->i_d.di_magic);
goto corrupt_out;
}
- if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
+ if (S_ISREG(ip->i_d.di_mode)) {
if (XFS_TEST_ERROR(
(ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
(ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
@@ -2681,7 +2681,7 @@
__func__, ip->i_ino, ip);
goto corrupt_out;
}
- } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+ } else if (S_ISDIR(ip->i_d.di_mode)) {
if (XFS_TEST_ERROR(
(ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
(ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index a97644a..2380a4b 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -263,7 +263,7 @@
struct inode i_vnode; /* embedded VFS inode */
} xfs_inode_t;
-#define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \
+#define XFS_ISIZE(ip) S_ISREG((ip)->i_d.di_mode) ? \
(ip)->i_size : (ip)->i_d.di_size;
/* Convert from vfs inode to xfs inode */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8fe4206..052a2c0 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2283,7 +2283,7 @@
/* Take the opportunity to reset the flush iteration count */
dicp->di_flushiter = 0;
- if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) {
+ if (unlikely(S_ISREG(dicp->di_mode))) {
if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
(dicp->di_format != XFS_DINODE_FMT_BTREE)) {
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
@@ -2296,7 +2296,7 @@
error = EFSCORRUPTED;
goto error;
}
- } else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) {
+ } else if (unlikely(S_ISDIR(dicp->di_mode))) {
if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
(dicp->di_format != XFS_DINODE_FMT_BTREE) &&
(dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 7f25245..092e16a 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1331,7 +1331,7 @@
ASSERT(rip != NULL);
- if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) {
+ if (unlikely(!S_ISDIR(rip->i_d.di_mode))) {
xfs_warn(mp, "corrupted root inode %llu: not a directory",
(unsigned long long)rip->i_ino);
xfs_iunlock(rip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 77a5989..df78c29 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -116,7 +116,7 @@
trace_xfs_rename(src_dp, target_dp, src_name, target_name);
new_parent = (src_dp != target_dp);
- src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
+ src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
if (src_is_directory) {
/*
@@ -226,7 +226,7 @@
* target and source are directories and that target can be
* destroyed, or that neither is a directory.
*/
- if ((target_ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+ if (S_ISDIR(target_ip->i_d.di_mode)) {
/*
* Make sure target dir is empty.
*/
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 88d1214..9322e13 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -121,7 +121,7 @@
xfs_ilock(ip, XFS_ILOCK_SHARED);
- ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK);
+ ASSERT(S_ISLNK(ip->i_d.di_mode));
ASSERT(ip->i_d.di_size <= MAXPATHLEN);
pathlen = ip->i_d.di_size;
@@ -529,7 +529,7 @@
if (ip->i_d.di_nlink == 0)
return 0;
- if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
+ if ((S_ISREG(ip->i_d.di_mode) &&
((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
ip->i_delayed_blks > 0)) &&
(ip->i_df.if_flags & XFS_IFEXTENTS)) &&
@@ -610,7 +610,7 @@
truncate = ((ip->i_d.di_nlink == 0) &&
((ip->i_d.di_size != 0) || (ip->i_size != 0) ||
(ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) &&
- ((ip->i_d.di_mode & S_IFMT) == S_IFREG));
+ S_ISREG(ip->i_d.di_mode));
mp = ip->i_mount;
@@ -621,7 +621,7 @@
goto out;
if (ip->i_d.di_nlink != 0) {
- if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
+ if ((S_ISREG(ip->i_d.di_mode) &&
((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
ip->i_delayed_blks > 0)) &&
(ip->i_df.if_flags & XFS_IFEXTENTS) &&
@@ -669,7 +669,7 @@
xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
return VN_INACTIVE_CACHE;
}
- } else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) {
+ } else if (S_ISLNK(ip->i_d.di_mode)) {
/*
* If we get an error while cleaning up a
diff --git a/include/keys/encrypted-type.h b/include/keys/encrypted-type.h
index 9585501..1d45413 100644
--- a/include/keys/encrypted-type.h
+++ b/include/keys/encrypted-type.h
@@ -1,6 +1,11 @@
/*
* Copyright (C) 2010 IBM Corporation
- * Author: Mimi Zohar <zohar@us.ibm.com>
+ * Copyright (C) 2010 Politecnico di Torino, Italy
+ * TORSEC group -- http://security.polito.it
+ *
+ * Authors:
+ * Mimi Zohar <zohar@us.ibm.com>
+ * Roberto Sassu <roberto.sassu@polito.it>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -15,13 +20,17 @@
struct encrypted_key_payload {
struct rcu_head rcu;
+ char *format; /* datablob: format */
char *master_desc; /* datablob: master key name */
char *datalen; /* datablob: decrypted key length */
u8 *iv; /* datablob: iv */
u8 *encrypted_data; /* datablob: encrypted data */
unsigned short datablob_len; /* length of datablob */
unsigned short decrypted_datalen; /* decrypted data length */
- u8 decrypted_data[0]; /* decrypted data + datablob + hmac */
+ unsigned short payload_datalen; /* payload data length */
+ unsigned short encrypted_key_format; /* encrypted key format */
+ u8 *decrypted_data; /* decrypted data */
+ u8 payload_data[0]; /* payload data + datablob + hmac */
};
extern struct key_type key_type_encrypted;
diff --git a/include/linux/ecryptfs.h b/include/linux/ecryptfs.h
new file mode 100644
index 0000000..2224a8c
--- /dev/null
+++ b/include/linux/ecryptfs.h
@@ -0,0 +1,113 @@
+#ifndef _LINUX_ECRYPTFS_H
+#define _LINUX_ECRYPTFS_H
+
+/* Version verification for shared data structures w/ userspace */
+#define ECRYPTFS_VERSION_MAJOR 0x00
+#define ECRYPTFS_VERSION_MINOR 0x04
+#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x03
+/* These flags indicate which features are supported by the kernel
+ * module; userspace tools such as the mount helper read
+ * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine
+ * how to behave. */
+#define ECRYPTFS_VERSIONING_PASSPHRASE 0x00000001
+#define ECRYPTFS_VERSIONING_PUBKEY 0x00000002
+#define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004
+#define ECRYPTFS_VERSIONING_POLICY 0x00000008
+#define ECRYPTFS_VERSIONING_XATTR 0x00000010
+#define ECRYPTFS_VERSIONING_MULTKEY 0x00000020
+#define ECRYPTFS_VERSIONING_DEVMISC 0x00000040
+#define ECRYPTFS_VERSIONING_HMAC 0x00000080
+#define ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION 0x00000100
+#define ECRYPTFS_VERSIONING_GCM 0x00000200
+#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
+ | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \
+ | ECRYPTFS_VERSIONING_PUBKEY \
+ | ECRYPTFS_VERSIONING_XATTR \
+ | ECRYPTFS_VERSIONING_MULTKEY \
+ | ECRYPTFS_VERSIONING_DEVMISC \
+ | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION)
+#define ECRYPTFS_MAX_PASSWORD_LENGTH 64
+#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH
+#define ECRYPTFS_SALT_SIZE 8
+#define ECRYPTFS_SALT_SIZE_HEX (ECRYPTFS_SALT_SIZE*2)
+/* The original signature size is only for what is stored on disk; all
+ * in-memory representations are expanded hex, so it better adapted to
+ * be passed around or referenced on the command line */
+#define ECRYPTFS_SIG_SIZE 8
+#define ECRYPTFS_SIG_SIZE_HEX (ECRYPTFS_SIG_SIZE*2)
+#define ECRYPTFS_PASSWORD_SIG_SIZE ECRYPTFS_SIG_SIZE_HEX
+#define ECRYPTFS_MAX_KEY_BYTES 64
+#define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512
+#define ECRYPTFS_FILE_VERSION 0x03
+#define ECRYPTFS_MAX_PKI_NAME_BYTES 16
+
+#define RFC2440_CIPHER_DES3_EDE 0x02
+#define RFC2440_CIPHER_CAST_5 0x03
+#define RFC2440_CIPHER_BLOWFISH 0x04
+#define RFC2440_CIPHER_AES_128 0x07
+#define RFC2440_CIPHER_AES_192 0x08
+#define RFC2440_CIPHER_AES_256 0x09
+#define RFC2440_CIPHER_TWOFISH 0x0a
+#define RFC2440_CIPHER_CAST_6 0x0b
+
+#define RFC2440_CIPHER_RSA 0x01
+
+/**
+ * For convenience, we may need to pass around the encrypted session
+ * key between kernel and userspace because the authentication token
+ * may not be extractable. For example, the TPM may not release the
+ * private key, instead requiring the encrypted data and returning the
+ * decrypted data.
+ */
+struct ecryptfs_session_key {
+#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT 0x00000001
+#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT 0x00000002
+#define ECRYPTFS_CONTAINS_DECRYPTED_KEY 0x00000004
+#define ECRYPTFS_CONTAINS_ENCRYPTED_KEY 0x00000008
+ u32 flags;
+ u32 encrypted_key_size;
+ u32 decrypted_key_size;
+ u8 encrypted_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES];
+ u8 decrypted_key[ECRYPTFS_MAX_KEY_BYTES];
+};
+
+struct ecryptfs_password {
+ u32 password_bytes;
+ s32 hash_algo;
+ u32 hash_iterations;
+ u32 session_key_encryption_key_bytes;
+#define ECRYPTFS_PERSISTENT_PASSWORD 0x01
+#define ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET 0x02
+ u32 flags;
+ /* Iterated-hash concatenation of salt and passphrase */
+ u8 session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES];
+ u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1];
+ /* Always in expanded hex */
+ u8 salt[ECRYPTFS_SALT_SIZE];
+};
+
+enum ecryptfs_token_types {ECRYPTFS_PASSWORD, ECRYPTFS_PRIVATE_KEY};
+
+struct ecryptfs_private_key {
+ u32 key_size;
+ u32 data_len;
+ u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1];
+ char pki_type[ECRYPTFS_MAX_PKI_NAME_BYTES + 1];
+ u8 data[];
+};
+
+/* May be a password or a private key */
+struct ecryptfs_auth_tok {
+ u16 version; /* 8-bit major and 8-bit minor */
+ u16 token_type;
+#define ECRYPTFS_ENCRYPT_ONLY 0x00000001
+ u32 flags;
+ struct ecryptfs_session_key session_key;
+ u8 reserved[32];
+ union {
+ struct ecryptfs_password password;
+ struct ecryptfs_private_key private_key;
+ } token;
+} __attribute__ ((packed));
+
+#endif /* _LINUX_ECRYPTFS_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5f523eb..f23bcb7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2310,7 +2310,8 @@
extern void iget_failed(struct inode *);
extern void end_writeback(struct inode *);
extern void __destroy_inode(struct inode *);
-extern struct inode *new_inode(struct super_block *);
+extern struct inode *new_inode_pseudo(struct super_block *sb);
+extern struct inode *new_inode(struct super_block *sb);
extern void free_inode_nonrcu(struct inode *inode);
extern int should_remove_suid(struct dentry *);
extern int file_remove_suid(struct file *);
diff --git a/include/linux/if.h b/include/linux/if.h
index 3bc63e6..03489ca 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -76,6 +76,8 @@
#define IFF_BRIDGE_PORT 0x4000 /* device used as bridge port */
#define IFF_OVS_DATAPATH 0x8000 /* device used as Open vSwitch
* datapath port */
+#define IFF_TX_SKB_SHARING 0x10000 /* The interface supports sharing
+ * skbs on transmit */
#define IF_GET_IFACE 0x0001 /* for querying only */
#define IF_GET_PROTO 0x0002
diff --git a/include/linux/input.h b/include/linux/input.h
index 771d6d8..068784e 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -119,9 +119,9 @@
#define EVIOCGSND(len) _IOC(_IOC_READ, 'E', 0x1a, len) /* get all sounds status */
#define EVIOCGSW(len) _IOC(_IOC_READ, 'E', 0x1b, len) /* get all switch states */
-#define EVIOCGBIT(ev,len) _IOC(_IOC_READ, 'E', 0x20 + ev, len) /* get event bits */
-#define EVIOCGABS(abs) _IOR('E', 0x40 + abs, struct input_absinfo) /* get abs value/limits */
-#define EVIOCSABS(abs) _IOW('E', 0xc0 + abs, struct input_absinfo) /* set abs value/limits */
+#define EVIOCGBIT(ev,len) _IOC(_IOC_READ, 'E', 0x20 + (ev), len) /* get event bits */
+#define EVIOCGABS(abs) _IOR('E', 0x40 + (abs), struct input_absinfo) /* get abs value/limits */
+#define EVIOCSABS(abs) _IOW('E', 0xc0 + (abs), struct input_absinfo) /* set abs value/limits */
#define EVIOCSFF _IOC(_IOC_WRITE, 'E', 0x80, sizeof(struct ff_effect)) /* send a force effect to a force feedback device */
#define EVIOCRMFF _IOW('E', 0x81, int) /* Erase a force effect */
diff --git a/include/linux/input/kxtj9.h b/include/linux/input/kxtj9.h
new file mode 100644
index 0000000..f6bac89
--- /dev/null
+++ b/include/linux/input/kxtj9.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2011 Kionix, Inc.
+ * Written by Chris Hudson <chudson@kionix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307, USA
+ */
+
+#ifndef __KXTJ9_H__
+#define __KXTJ9_H__
+
+#define KXTJ9_I2C_ADDR 0x0F
+
+struct kxtj9_platform_data {
+ unsigned int min_interval; /* minimum poll interval (in milli-seconds) */
+
+ /*
+ * By default, x is axis 0, y is axis 1, z is axis 2; these can be
+ * changed to account for sensor orientation within the host device.
+ */
+ u8 axis_map_x;
+ u8 axis_map_y;
+ u8 axis_map_z;
+
+ /*
+ * Each axis can be negated to account for sensor orientation within
+ * the host device.
+ */
+ bool negate_x;
+ bool negate_y;
+ bool negate_z;
+
+ /* CTRL_REG1: set resolution, g-range, data ready enable */
+ /* Output resolution: 8-bit valid or 12-bit valid */
+ #define RES_8BIT 0
+ #define RES_12BIT (1 << 6)
+ u8 res_12bit;
+ /* Output g-range: +/-2g, 4g, or 8g */
+ #define KXTJ9_G_2G 0
+ #define KXTJ9_G_4G (1 << 3)
+ #define KXTJ9_G_8G (1 << 4)
+ u8 g_range;
+
+ /* DATA_CTRL_REG: controls the output data rate of the part */
+ #define ODR12_5F 0
+ #define ODR25F 1
+ #define ODR50F 2
+ #define ODR100F 3
+ #define ODR200F 4
+ #define ODR400F 5
+ #define ODR800F 6
+ u8 data_odr_init;
+
+ int (*init)(void);
+ void (*exit)(void);
+ int (*power_on)(void);
+ int (*power_off)(void);
+};
+#endif /* __KXTJ9_H__ */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 9a43ad7..46ac9a5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -56,6 +56,14 @@
#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+#define DIV_ROUND_UP_ULL(ll,d) \
+ ({ unsigned long long _tmp = (ll)+(d)-1; do_div(_tmp, d); _tmp; })
+
+#if BITS_PER_LONG == 32
+# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP_ULL(ll, d)
+#else
+# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP(ll,d)
+#endif
/* The `const' in roundup() prevents gcc-3.3 from calling __divdi3 */
#define roundup(x, y) ( \
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2ed0b6c..ddee79b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1132,7 +1132,7 @@
spinlock_t addr_list_lock;
struct netdev_hw_addr_list uc; /* Unicast mac addresses */
struct netdev_hw_addr_list mc; /* Multicast mac addresses */
- int uc_promisc;
+ bool uc_promisc;
unsigned int promiscuity;
unsigned int allmulti;
@@ -1679,9 +1679,12 @@
static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
unsigned int offset)
{
+ if (!pskb_may_pull(skb, hlen))
+ return NULL;
+
NAPI_GRO_CB(skb)->frag0 = NULL;
NAPI_GRO_CB(skb)->frag0_len = 0;
- return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
+ return skb->data + offset;
}
static inline void *skb_gro_mac_header(struct sk_buff *skb)
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 504b289..a3c4bc8 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -563,6 +563,9 @@
NFSPROC4_CLNT_GETDEVICEINFO,
NFSPROC4_CLNT_LAYOUTCOMMIT,
NFSPROC4_CLNT_LAYOUTRETURN,
+ NFSPROC4_CLNT_SECINFO_NO_NAME,
+ NFSPROC4_CLNT_TEST_STATEID,
+ NFSPROC4_CLNT_FREE_STATEID,
};
/* nfs41 types */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 08c444a..50a661f 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -16,6 +16,7 @@
struct nfs4_sequence_res;
struct nfs_server;
struct nfs4_minor_version_ops;
+struct server_scope;
/*
* The nfs_client identifies our client state to the server.
@@ -77,12 +78,13 @@
/* The flags used for obtaining the clientid during EXCHANGE_ID */
u32 cl_exchange_flags;
struct nfs4_session *cl_session; /* sharred session */
- struct list_head cl_layouts;
#endif /* CONFIG_NFS_V4 */
#ifdef CONFIG_NFS_FSCACHE
struct fscache_cookie *fscache; /* client index cache cookie */
#endif
+
+ struct server_scope *server_scope; /* from exchange_id */
};
/*
@@ -149,6 +151,7 @@
struct rb_root openowner_id;
struct rb_root lockowner_id;
#endif
+ struct list_head layouts;
struct list_head delegations;
void (*destroy)(struct nfs_server *);
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 25311b3..e2791a2 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -55,20 +55,28 @@
struct nfs_writeverf wb_verf; /* Commit cookie */
};
+struct nfs_pageio_descriptor;
+struct nfs_pageio_ops {
+ void (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *);
+ bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+ int (*pg_doio)(struct nfs_pageio_descriptor *);
+};
+
struct nfs_pageio_descriptor {
struct list_head pg_list;
unsigned long pg_bytes_written;
size_t pg_count;
size_t pg_bsize;
unsigned int pg_base;
- char pg_moreio;
+ unsigned char pg_moreio : 1,
+ pg_recoalesce : 1;
struct inode *pg_inode;
- int (*pg_doio)(struct nfs_pageio_descriptor *);
+ const struct nfs_pageio_ops *pg_ops;
int pg_ioflags;
int pg_error;
+ const struct rpc_call_ops *pg_rpc_callops;
struct pnfs_layout_segment *pg_lseg;
- bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
};
#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags))
@@ -85,7 +93,7 @@
pgoff_t idx_start, unsigned int npages, int tag);
extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
- int (*doio)(struct nfs_pageio_descriptor *desc),
+ const struct nfs_pageio_ops *pg_ops,
size_t bsize,
int how);
extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
@@ -100,7 +108,6 @@
extern int nfs_set_page_tag_locked(struct nfs_page *req);
extern void nfs_clear_page_tag_locked(struct nfs_page *req);
-
/*
* Lock the page of an asynchronous request without getting a new reference
*/
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 00848d8..5b11595 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -269,9 +269,10 @@
};
struct nfs4_layoutreturn_args {
- __u32 layout_type;
+ struct pnfs_layout_hdr *layout;
struct inode *inode;
nfs4_stateid stateid;
+ __u32 layout_type;
struct nfs4_sequence_args seq_args;
};
@@ -1060,6 +1061,7 @@
struct nfs41_exchange_id_res {
struct nfs_client *client;
u32 flags;
+ struct server_scope *server_scope;
};
struct nfs41_create_session_args {
@@ -1083,6 +1085,34 @@
struct nfs41_reclaim_complete_res {
struct nfs4_sequence_res seq_res;
};
+
+#define SECINFO_STYLE_CURRENT_FH 0
+#define SECINFO_STYLE_PARENT 1
+struct nfs41_secinfo_no_name_args {
+ int style;
+ struct nfs4_sequence_args seq_args;
+};
+
+struct nfs41_test_stateid_args {
+ nfs4_stateid *stateid;
+ struct nfs4_sequence_args seq_args;
+};
+
+struct nfs41_test_stateid_res {
+ unsigned int status;
+ struct nfs4_sequence_res seq_res;
+};
+
+struct nfs41_free_stateid_args {
+ nfs4_stateid *stateid;
+ struct nfs4_sequence_args seq_args;
+};
+
+struct nfs41_free_stateid_res {
+ unsigned int status;
+ struct nfs4_sequence_res seq_res;
+};
+
#endif /* CONFIG_NFS_V4_1 */
struct nfs_page;
@@ -1096,6 +1126,7 @@
struct rpc_cred *cred;
struct nfs_fattr fattr; /* fattr storage */
struct list_head pages; /* Coalesced read requests */
+ struct list_head list; /* lists of struct nfs_read_data */
struct nfs_page *req; /* multi ops per nfs_page */
struct page **pagevec;
unsigned int npages; /* Max length of pagevec */
@@ -1119,6 +1150,7 @@
struct nfs_fattr fattr;
struct nfs_writeverf verf;
struct list_head pages; /* Coalesced requests we wish to flush */
+ struct list_head list; /* lists of struct nfs_write_data */
struct nfs_page *req; /* multi ops per nfs_page */
struct page **pagevec;
unsigned int npages; /* Max length of pagevec */
diff --git a/include/linux/pnfs_osd_xdr.h b/include/linux/pnfs_osd_xdr.h
index 76efbdd..435dd5f 100644
--- a/include/linux/pnfs_osd_xdr.h
+++ b/include/linux/pnfs_osd_xdr.h
@@ -41,9 +41,6 @@
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
-#include <scsi/osd_protocol.h>
-
-#define PNFS_OSD_OSDNAME_MAXSIZE 256
/*
* draft-ietf-nfsv4-minorversion-22
@@ -99,12 +96,6 @@
#define _DEVID_HI(oid_device_id) \
(unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1)
-static inline int
-pnfs_osd_objid_xdr_sz(void)
-{
- return (NFS4_DEVICEID4_SIZE / 4) + 2 + 2;
-}
-
enum pnfs_osd_version {
PNFS_OSD_MISSING = 0,
PNFS_OSD_VERSION_1 = 1,
@@ -189,8 +180,6 @@
struct nfs4_string oti_scsi_device_id;
};
-enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 };
-
/* struct netaddr4 {
* // see struct rpcb in RFC1833
* string r_netid<>; // network id
@@ -207,12 +196,6 @@
struct pnfs_osd_net_addr ota_netaddr;
};
-enum {
- NETWORK_ID_MAX = 16 / 4,
- UNIVERSAL_ADDRESS_MAX = 64 / 4,
- PNFS_OSD_TARGETADDR_MAX = 3 + NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX,
-};
-
struct pnfs_osd_deviceaddr {
struct pnfs_osd_targetid oda_targetid;
struct pnfs_osd_targetaddr oda_targetaddr;
@@ -222,15 +205,6 @@
struct nfs4_string oda_osdname;
};
-enum {
- ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4,
- PNFS_OSD_DEVICEADDR_MAX =
- PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX +
- 2 /*oda_lun*/ +
- 1 + OSD_SYSTEMID_LEN +
- 1 + ODA_OSDNAME_MAX,
-};
-
/* LAYOUTCOMMIT: layoutupdate */
/* union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) {
@@ -279,7 +253,7 @@
u32 oer_errno;
};
-/* OSD XDR API */
+/* OSD XDR Client API */
/* Layout helpers */
/* Layout decoding is done in two parts:
* 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part
@@ -337,8 +311,7 @@
pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
struct pnfs_osd_layoutupdate *lou);
-/* osd_ioerror encoding/decoding (layout_return) */
-/* Client */
+/* osd_ioerror encoding (layout_return) */
extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr);
extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 650af6d..643b96c 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -50,8 +50,6 @@
struct proc_dir_entry {
unsigned int low_ino;
- unsigned int namelen;
- const char *name;
mode_t mode;
nlink_t nlink;
uid_t uid;
@@ -73,9 +71,11 @@
write_proc_t *write_proc;
atomic_t count; /* use count */
int pde_users; /* number of callers into module in progress */
- spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
struct completion *pde_unload_completion;
struct list_head pde_openers; /* who did ->open, but not ->release */
+ spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
+ u8 namelen;
+ char name[];
};
enum kcore_type {
diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h
index 75cbf4f..9e65d9e 100644
--- a/include/linux/raid/md_p.h
+++ b/include/linux/raid/md_p.h
@@ -245,10 +245,16 @@
__u8 device_uuid[16]; /* user-space setable, ignored by kernel */
__u8 devflags; /* per-device flags. Only one defined...*/
#define WriteMostly1 1 /* mask for writemostly flag in above */
- __u8 pad2[64-57]; /* set to 0 when writing */
+ /* Bad block log. If there are any bad blocks the feature flag is set.
+ * If offset and size are non-zero, that space is reserved and available
+ */
+ __u8 bblog_shift; /* shift from sectors to block size */
+ __le16 bblog_size; /* number of sectors reserved for list */
+ __le32 bblog_offset; /* sector offset from superblock to bblog,
+ * signed - not unsigned */
/* array state information - 64 bytes */
- __le64 utime; /* 40 bits second, 24 btes microseconds */
+ __le64 utime; /* 40 bits second, 24 bits microseconds */
__le64 events; /* incremented when superblock updated */
__le64 resync_offset; /* data before this offset (from data_offset) known to be in sync */
__le32 sb_csum; /* checksum up to devs[max_dev] */
@@ -270,8 +276,8 @@
* must be honoured
*/
#define MD_FEATURE_RESHAPE_ACTIVE 4
+#define MD_FEATURE_BAD_BLOCKS 8 /* badblock list is not empty */
-#define MD_FEATURE_ALL (1|2|4)
+#define MD_FEATURE_ALL (1|2|4|8)
#endif
-
diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index 0828842..f7f3ce34 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -31,7 +31,7 @@
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/sched.h>
-#ifdef CONFIG_NFS_V4_1
+#ifdef CONFIG_SUNRPC_BACKCHANNEL
struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt);
void xprt_free_bc_request(struct rpc_rqst *req);
int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
@@ -47,7 +47,7 @@
return 1;
return 0;
}
-#else /* CONFIG_NFS_V4_1 */
+#else /* CONFIG_SUNRPC_BACKCHANNEL */
static inline int xprt_setup_backchannel(struct rpc_xprt *xprt,
unsigned int min_reqs)
{
@@ -62,6 +62,6 @@
static inline void xprt_free_bc_request(struct rpc_rqst *req)
{
}
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
#endif /* _LINUX_SUNRPC_BC_XPRT_H */
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index fe2d8e6..e775689 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -227,6 +227,10 @@
void rpc_destroy_wait_queue(struct rpc_wait_queue *);
void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
rpc_action action);
+void rpc_sleep_on_priority(struct rpc_wait_queue *,
+ struct rpc_task *,
+ rpc_action action,
+ int priority);
void rpc_wake_up_queued_task(struct rpc_wait_queue *,
struct rpc_task *);
void rpc_wake_up(struct rpc_wait_queue *);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 2f1e518..223588a 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -92,7 +92,7 @@
struct module * sv_module; /* optional module to count when
* adding threads */
svc_thread_fn sv_function; /* main function for threads */
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
struct list_head sv_cb_list; /* queue for callback requests
* that arrive over the same
* connection */
@@ -100,7 +100,7 @@
wait_queue_head_t sv_cb_waitq; /* sleep here if there are no
* entries in the svc_cb_list */
struct svc_xprt *sv_bc_xprt; /* callback on fore channel */
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
};
/*
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 81cce3b..15518a1 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -22,6 +22,7 @@
#define RPC_MIN_SLOT_TABLE (2U)
#define RPC_DEF_SLOT_TABLE (16U)
#define RPC_MAX_SLOT_TABLE (128U)
+#define RPC_MAX_SLOT_TABLE_LIMIT (65536U)
/*
* This describes a timeout strategy
@@ -100,18 +101,18 @@
ktime_t rq_xtime; /* transmit time stamp */
int rq_ntrans;
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
struct list_head rq_bc_list; /* Callback service list */
unsigned long rq_bc_pa_state; /* Backchannel prealloc state */
struct list_head rq_bc_pa_list; /* Backchannel prealloc list */
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANEL */
};
#define rq_svec rq_snd_buf.head
#define rq_slen rq_snd_buf.len
struct rpc_xprt_ops {
void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize);
- int (*reserve_xprt)(struct rpc_task *task);
+ int (*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
void (*rpcbind)(struct rpc_task *task);
void (*set_port)(struct rpc_xprt *xprt, unsigned short port);
@@ -164,12 +165,12 @@
struct rpc_wait_queue binding; /* requests waiting on rpcbind */
struct rpc_wait_queue sending; /* requests waiting to send */
- struct rpc_wait_queue resend; /* requests waiting to resend */
struct rpc_wait_queue pending; /* requests in flight */
struct rpc_wait_queue backlog; /* waiting for slot */
struct list_head free; /* free slots */
- struct rpc_rqst * slot; /* slot table storage */
- unsigned int max_reqs; /* total slots */
+ unsigned int max_reqs; /* max number of slots */
+ unsigned int min_reqs; /* min number of slots */
+ atomic_t num_reqs; /* total slots */
unsigned long state; /* transport state */
unsigned char shutdown : 1, /* being shut down */
resvport : 1; /* use a reserved port */
@@ -200,7 +201,7 @@
u32 xid; /* Next XID value to use */
struct rpc_task * snd_task; /* Task blocked in send */
struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
struct svc_serv *bc_serv; /* The RPC service which will */
/* process the callback */
unsigned int bc_alloc_count; /* Total number of preallocs */
@@ -208,7 +209,7 @@
* items */
struct list_head bc_pa_list; /* List of preallocated
* backchannel rpc_rqst's */
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
struct list_head recv;
struct {
@@ -228,15 +229,15 @@
const char *address_strings[RPC_DISPLAY_MAX];
};
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/*
* Backchannel flags
*/
#define RPC_BC_PA_IN_USE 0x0001 /* Preallocated backchannel */
/* buffer in use */
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
static inline int bc_prealloc(struct rpc_rqst *req)
{
return test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
@@ -246,7 +247,7 @@
{
return 0;
}
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
struct xprt_create {
int ident; /* XPRT_TRANSPORT identifier */
@@ -271,8 +272,8 @@
struct rpc_xprt *xprt_create_transport(struct xprt_create *args);
void xprt_connect(struct rpc_task *task);
void xprt_reserve(struct rpc_task *task);
-int xprt_reserve_xprt(struct rpc_task *task);
-int xprt_reserve_xprt_cong(struct rpc_task *task);
+int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
+int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
int xprt_prepare_transmit(struct rpc_task *task);
void xprt_transmit(struct rpc_task *task);
void xprt_end_transmit(struct rpc_task *task);
@@ -282,7 +283,9 @@
void xprt_release(struct rpc_task *task);
struct rpc_xprt * xprt_get(struct rpc_xprt *xprt);
void xprt_put(struct rpc_xprt *xprt);
-struct rpc_xprt * xprt_alloc(struct net *net, int size, int max_req);
+struct rpc_xprt * xprt_alloc(struct net *net, size_t size,
+ unsigned int num_prealloc,
+ unsigned int max_req);
void xprt_free(struct rpc_xprt *);
static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p)
@@ -321,7 +324,6 @@
#define XPRT_CLOSING (6)
#define XPRT_CONNECTION_ABORT (7)
#define XPRT_CONNECTION_CLOSE (8)
-#define XPRT_INITIALIZED (9)
static inline void xprt_set_connected(struct rpc_xprt *xprt)
{
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 011bcfe..111843f 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -59,6 +59,84 @@
#define WATCHDOG_NOWAYOUT 0
#endif
+struct watchdog_ops;
+struct watchdog_device;
+
+/** struct watchdog_ops - The watchdog-devices operations
+ *
+ * @owner: The module owner.
+ * @start: The routine for starting the watchdog device.
+ * @stop: The routine for stopping the watchdog device.
+ * @ping: The routine that sends a keepalive ping to the watchdog device.
+ * @status: The routine that shows the status of the watchdog device.
+ * @set_timeout:The routine for setting the watchdog devices timeout value.
+ * @ioctl: The routines that handles extra ioctl calls.
+ *
+ * The watchdog_ops structure contains a list of low-level operations
+ * that control a watchdog device. It also contains the module that owns
+ * these operations. The start and stop function are mandatory, all other
+ * functions are optonal.
+ */
+struct watchdog_ops {
+ struct module *owner;
+ /* mandatory operations */
+ int (*start)(struct watchdog_device *);
+ int (*stop)(struct watchdog_device *);
+ /* optional operations */
+ int (*ping)(struct watchdog_device *);
+ unsigned int (*status)(struct watchdog_device *);
+ int (*set_timeout)(struct watchdog_device *, unsigned int);
+ long (*ioctl)(struct watchdog_device *, unsigned int, unsigned long);
+};
+
+/** struct watchdog_device - The structure that defines a watchdog device
+ *
+ * @info: Pointer to a watchdog_info structure.
+ * @ops: Pointer to the list of watchdog operations.
+ * @bootstatus: Status of the watchdog device at boot.
+ * @timeout: The watchdog devices timeout value.
+ * @min_timeout:The watchdog devices minimum timeout value.
+ * @max_timeout:The watchdog devices maximum timeout value.
+ * @driver-data:Pointer to the drivers private data.
+ * @status: Field that contains the devices internal status bits.
+ *
+ * The watchdog_device structure contains all information about a
+ * watchdog timer device.
+ *
+ * The driver-data field may not be accessed directly. It must be accessed
+ * via the watchdog_set_drvdata and watchdog_get_drvdata helpers.
+ */
+struct watchdog_device {
+ const struct watchdog_info *info;
+ const struct watchdog_ops *ops;
+ unsigned int bootstatus;
+ unsigned int timeout;
+ unsigned int min_timeout;
+ unsigned int max_timeout;
+ void *driver_data;
+ unsigned long status;
+/* Bit numbers for status flags */
+#define WDOG_ACTIVE 0 /* Is the watchdog running/active */
+#define WDOG_DEV_OPEN 1 /* Opened via /dev/watchdog ? */
+#define WDOG_ALLOW_RELEASE 2 /* Did we receive the magic char ? */
+#define WDOG_NO_WAY_OUT 3 /* Is 'nowayout' feature set ? */
+};
+
+/* Use the following functions to manipulate watchdog driver specific data */
+static inline void watchdog_set_drvdata(struct watchdog_device *wdd, void *data)
+{
+ wdd->driver_data = data;
+}
+
+static inline void *watchdog_get_drvdata(struct watchdog_device *wdd)
+{
+ return wdd->driver_data;
+}
+
+/* drivers/watchdog/core/watchdog_core.c */
+extern int watchdog_register_device(struct watchdog_device *);
+extern void watchdog_unregister_device(struct watchdog_device *);
+
#endif /* __KERNEL__ */
#endif /* ifndef _LINUX_WATCHDOG_H */
diff --git a/include/linux/wm97xx.h b/include/linux/wm97xx.h
index 38e8c4d..fd98bb9 100644
--- a/include/linux/wm97xx.h
+++ b/include/linux/wm97xx.h
@@ -38,7 +38,11 @@
#define WM97XX_ADCSEL_X 0x1000 /* x coord measurement */
#define WM97XX_ADCSEL_Y 0x2000 /* y coord measurement */
#define WM97XX_ADCSEL_PRES 0x3000 /* pressure measurement */
-#define WM97XX_ADCSEL_MASK 0x7000
+#define WM97XX_AUX_ID1 0x4000
+#define WM97XX_AUX_ID2 0x5000
+#define WM97XX_AUX_ID3 0x6000
+#define WM97XX_AUX_ID4 0x7000
+#define WM97XX_ADCSEL_MASK 0x7000 /* ADC selection mask */
#define WM97XX_COO 0x0800 /* enable coordinate mode */
#define WM97XX_CTC 0x0400 /* enable continuous mode */
#define WM97XX_CM_RATE_93 0x0000 /* 93.75Hz continuous rate */
@@ -61,13 +65,6 @@
#define WM97XX_PRP_DET_DIG 0xc000 /* setect on, digitise on */
#define WM97XX_RPR 0x2000 /* wake up on pen down */
#define WM97XX_PEN_DOWN 0x8000 /* pen is down */
-#define WM97XX_ADCSRC_MASK 0x7000 /* ADC source mask */
-
-#define WM97XX_AUX_ID1 0x8001
-#define WM97XX_AUX_ID2 0x8002
-#define WM97XX_AUX_ID3 0x8003
-#define WM97XX_AUX_ID4 0x8004
-
/* WM9712 Bits */
#define WM9712_45W 0x1000 /* set for 5-wire touchscreen */
diff --git a/include/scsi/iscsi_proto.h b/include/scsi/iscsi_proto.h
index ea68b3c..988ba06 100644
--- a/include/scsi/iscsi_proto.h
+++ b/include/scsi/iscsi_proto.h
@@ -29,10 +29,40 @@
/* default iSCSI listen port for incoming connections */
#define ISCSI_LISTEN_PORT 3260
+/* iSCSI header length */
+#define ISCSI_HDR_LEN 48
+
+/* iSCSI CRC32C length */
+#define ISCSI_CRC_LEN 4
+
/* Padding word length */
#define ISCSI_PAD_LEN 4
/*
+ * Serial Number Arithmetic, 32 bits, RFC1982
+ */
+
+static inline int iscsi_sna_lt(u32 n1, u32 n2)
+{
+ return (s32)(n1 - n2) < 0;
+}
+
+static inline int iscsi_sna_lte(u32 n1, u32 n2)
+{
+ return (s32)(n1 - n2) <= 0;
+}
+
+static inline int iscsi_sna_gt(u32 n1, u32 n2)
+{
+ return (s32)(n1 - n2) > 0;
+}
+
+static inline int iscsi_sna_gte(u32 n1, u32 n2)
+{
+ return (s32)(n1 - n2) >= 0;
+}
+
+/*
* useful common(control and data pathes) macro
*/
#define ntoh24(p) (((p)[0] << 16) | ((p)[1] << 8) | ((p)[2]))
@@ -116,7 +146,7 @@
#define ISCSI_CDB_SIZE 16
/* iSCSI PDU Header */
-struct iscsi_cmd {
+struct iscsi_scsi_req {
uint8_t opcode;
uint8_t flags;
__be16 rsvd2;
@@ -161,7 +191,7 @@
};
/* SCSI Response Header */
-struct iscsi_cmd_rsp {
+struct iscsi_scsi_rsp {
uint8_t opcode;
uint8_t flags;
uint8_t response;
@@ -406,7 +436,7 @@
};
/* Login Header */
-struct iscsi_login {
+struct iscsi_login_req {
uint8_t opcode;
uint8_t flags;
uint8_t max_version; /* Max. version supported */
@@ -427,7 +457,13 @@
#define ISCSI_FLAG_LOGIN_TRANSIT 0x80
#define ISCSI_FLAG_LOGIN_CONTINUE 0x40
#define ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK 0x0C /* 2 bits */
+#define ISCSI_FLAG_LOGIN_CURRENT_STAGE1 0x04
+#define ISCSI_FLAG_LOGIN_CURRENT_STAGE2 0x08
+#define ISCSI_FLAG_LOGIN_CURRENT_STAGE3 0x0C
#define ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK 0x03 /* 2 bits */
+#define ISCSI_FLAG_LOGIN_NEXT_STAGE1 0x01
+#define ISCSI_FLAG_LOGIN_NEXT_STAGE2 0x02
+#define ISCSI_FLAG_LOGIN_NEXT_STAGE3 0x03
#define ISCSI_LOGIN_CURRENT_STAGE(flags) \
((flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2)
@@ -550,17 +586,25 @@
struct iscsi_snack {
uint8_t opcode;
uint8_t flags;
- uint8_t rsvd2[14];
+ uint8_t rsvd2[2];
+ uint8_t hlength;
+ uint8_t dlength[3];
+ uint8_t lun[8];
itt_t itt;
+ __be32 ttt;
+ uint8_t rsvd3[4];
+ __be32 exp_statsn;
+ uint8_t rsvd4[8];
__be32 begrun;
__be32 runlength;
- __be32 exp_statsn;
- __be32 rsvd3;
- __be32 exp_datasn;
- uint8_t rsvd6[8];
};
/* SNACK PDU flags */
+#define ISCSI_FLAG_SNACK_TYPE_DATA 0
+#define ISCSI_FLAG_SNACK_TYPE_R2T 0
+#define ISCSI_FLAG_SNACK_TYPE_STATUS 1
+#define ISCSI_FLAG_SNACK_TYPE_DATA_ACK 2
+#define ISCSI_FLAG_SNACK_TYPE_RDATA 3
#define ISCSI_FLAG_SNACK_TYPE_MASK 0x0F /* 4 bits */
/* Reject Message Header */
diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index e1bad11..57e71fa 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -507,6 +507,18 @@
void snd_pcm_vma_notify_data(void *client, void *data);
int snd_pcm_mmap_data(struct snd_pcm_substream *substream, struct file *file, struct vm_area_struct *area);
+
+#ifdef CONFIG_SND_DEBUG
+void snd_pcm_debug_name(struct snd_pcm_substream *substream,
+ char *name, size_t len);
+#else
+static inline void
+snd_pcm_debug_name(struct snd_pcm_substream *substream, char *buf, size_t size)
+{
+ *buf = 0;
+}
+#endif
+
/*
* PCM library
*/
@@ -749,17 +761,18 @@
return ¶ms->intervals[var - SNDRV_PCM_HW_PARAM_FIRST_INTERVAL];
}
-#define params_access(p) ((__force snd_pcm_access_t)snd_mask_min(hw_param_mask((p), SNDRV_PCM_HW_PARAM_ACCESS)))
-#define params_format(p) ((__force snd_pcm_format_t)snd_mask_min(hw_param_mask((p), SNDRV_PCM_HW_PARAM_FORMAT)))
-#define params_subformat(p) snd_mask_min(hw_param_mask((p), SNDRV_PCM_HW_PARAM_SUBFORMAT))
-#define params_channels(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_CHANNELS)->min
-#define params_rate(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_RATE)->min
-#define params_period_size(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_PERIOD_SIZE)->min
-#define params_period_bytes(p) ((params_period_size(p)*snd_pcm_format_physical_width(params_format(p))*params_channels(p))/8)
-#define params_periods(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_PERIODS)->min
-#define params_buffer_size(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_BUFFER_SIZE)->min
-#define params_buffer_bytes(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_BUFFER_BYTES)->min
-
+#define params_channels(p) \
+ (hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_CHANNELS)->min)
+#define params_rate(p) \
+ (hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_RATE)->min)
+#define params_period_size(p) \
+ (hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_PERIOD_SIZE)->min)
+#define params_periods(p) \
+ (hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_PERIODS)->min)
+#define params_buffer_size(p) \
+ (hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_BUFFER_SIZE)->min)
+#define params_buffer_bytes(p) \
+ (hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_BUFFER_BYTES)->min)
int snd_interval_refine(struct snd_interval *i, const struct snd_interval *v);
void snd_interval_mul(const struct snd_interval *a, const struct snd_interval *b, struct snd_interval *c);
diff --git a/include/sound/pcm_params.h b/include/sound/pcm_params.h
index 85cf1cf..f494f1e 100644
--- a/include/sound/pcm_params.h
+++ b/include/sound/pcm_params.h
@@ -337,5 +337,19 @@
return 0;
}
-#endif /* __SOUND_PCM_PARAMS_H */
+#define params_access(p) ((__force snd_pcm_access_t)\
+ snd_mask_min(hw_param_mask_c((p), SNDRV_PCM_HW_PARAM_ACCESS)))
+#define params_format(p) ((__force snd_pcm_format_t)\
+ snd_mask_min(hw_param_mask_c((p), SNDRV_PCM_HW_PARAM_FORMAT)))
+#define params_subformat(p) \
+ snd_mask_min(hw_param_mask_c((p), SNDRV_PCM_HW_PARAM_SUBFORMAT))
+static inline unsigned int
+params_period_bytes(const struct snd_pcm_hw_params *p)
+{
+ return (params_period_size(p) *
+ snd_pcm_format_physical_width(params_format(p)) *
+ params_channels(p)) / 8;
+}
+
+#endif /* __SOUND_PCM_PARAMS_H */
diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index e09505c..e0583b7 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -266,6 +266,12 @@
.get = snd_soc_dapm_get_enum_virt, \
.put = snd_soc_dapm_put_enum_virt, \
.private_value = (unsigned long)&xenum }
+#define SOC_DAPM_ENUM_EXT(xname, xenum, xget, xput) \
+{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \
+ .info = snd_soc_info_enum_double, \
+ .get = xget, \
+ .put = xput, \
+ .private_value = (unsigned long)&xenum }
#define SOC_DAPM_VALUE_ENUM(xname, xenum) \
{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \
.info = snd_soc_info_enum_double, \
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9844580..1d2b6ce 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -27,9 +27,11 @@
*/
#include <linux/cgroup.h>
+#include <linux/cred.h>
#include <linux/ctype.h>
#include <linux/errno.h>
#include <linux/fs.h>
+#include <linux/init_task.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/mm.h>
@@ -1514,6 +1516,7 @@
struct cgroup *root_cgrp = &root->top_cgroup;
struct inode *inode;
struct cgroupfs_root *existing_root;
+ const struct cred *cred;
int i;
BUG_ON(sb->s_root != NULL);
@@ -1593,7 +1596,9 @@
BUG_ON(!list_empty(&root_cgrp->children));
BUG_ON(root->number_of_cgroups != 1);
+ cred = override_creds(&init_cred);
cgroup_populate_dir(root_cgrp);
+ revert_creds(cred);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
} else {
diff --git a/kernel/compat.c b/kernel/compat.c
index 18197ae..616c781 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -992,11 +992,8 @@
sigset_from_compat(&newset, &newset32);
sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP));
- spin_lock_irq(¤t->sighand->siglock);
current->saved_sigmask = current->blocked;
- current->blocked = newset;
- recalc_sigpending();
- spin_unlock_irq(¤t->sighand->siglock);
+ set_current_blocked(&newset);
current->state = TASK_INTERRUPTIBLE;
schedule();
diff --git a/kernel/signal.c b/kernel/signal.c
index d7f70ae..291c970 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3102,15 +3102,11 @@
SYSCALL_DEFINE1(ssetmask, int, newmask)
{
- int old;
+ int old = current->blocked.sig[0];
+ sigset_t newset;
- spin_lock_irq(¤t->sighand->siglock);
- old = current->blocked.sig[0];
-
- siginitset(¤t->blocked, newmask & ~(sigmask(SIGKILL)|
- sigmask(SIGSTOP)));
- recalc_sigpending();
- spin_unlock_irq(¤t->sighand->siglock);
+ siginitset(&newset, newmask & ~(sigmask(SIGKILL) | sigmask(SIGSTOP)));
+ set_current_blocked(&newset);
return old;
}
@@ -3167,11 +3163,8 @@
return -EFAULT;
sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP));
- spin_lock_irq(¤t->sighand->siglock);
current->saved_sigmask = current->blocked;
- current->blocked = newset;
- recalc_sigpending();
- spin_unlock_irq(¤t->sighand->siglock);
+ set_current_blocked(&newset);
current->state = TASK_INTERRUPTIBLE;
schedule();
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 934e221..9d40a07 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -695,7 +695,7 @@
ether_setup(dev);
dev->priv_flags |= IFF_802_1Q_VLAN;
- dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
dev->tx_queue_len = 0;
dev->netdev_ops = &vlan_netdev_ops;
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 8c100c9..d4f5dff 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -231,6 +231,7 @@
dev->addr_len = ETH_ALEN;
ether_setup(dev);
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->netdev_ops = &bnep_netdev_ops;
dev->watchdog_timeo = HZ * 2;
diff --git a/net/core/dev.c b/net/core/dev.c
index 9444c5c..17d67b5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4497,10 +4497,10 @@
*/
if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
__dev_set_promiscuity(dev, 1);
- dev->uc_promisc = 1;
+ dev->uc_promisc = true;
} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
__dev_set_promiscuity(dev, -1);
- dev->uc_promisc = 0;
+ dev->uc_promisc = false;
}
if (ops->ndo_set_multicast_list)
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index f76079c..e35a6fb 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1070,7 +1070,9 @@
len = num_arg(&user_buffer[i], 10, &value);
if (len < 0)
return len;
-
+ if ((value > 0) &&
+ (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
+ return -ENOTSUPP;
i += len;
pkt_dev->clone_skb = value;
@@ -3555,7 +3557,6 @@
pkt_dev->min_pkt_size = ETH_ZLEN;
pkt_dev->max_pkt_size = ETH_ZLEN;
pkt_dev->nfrags = 0;
- pkt_dev->clone_skb = pg_clone_skb_d;
pkt_dev->delay = pg_delay_d;
pkt_dev->count = pg_count_d;
pkt_dev->sofar = 0;
@@ -3563,7 +3564,6 @@
pkt_dev->udp_src_max = 9;
pkt_dev->udp_dst_min = 9;
pkt_dev->udp_dst_max = 9;
-
pkt_dev->vlan_p = 0;
pkt_dev->vlan_cfi = 0;
pkt_dev->vlan_id = 0xffff;
@@ -3575,6 +3575,8 @@
err = pktgen_setup_dev(pkt_dev, ifname);
if (err)
goto out1;
+ if (pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)
+ pkt_dev->clone_skb = pg_clone_skb_d;
pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir,
&pktgen_if_fops, pkt_dev);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 5cffb63..27997d3 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -231,6 +231,7 @@
* eth_header_cache - fill cache entry from neighbour
* @neigh: source neighbour
* @hh: destination cache entry
+ * @type: Ethernet type field
* Create an Ethernet header template from the neighbour.
*/
int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type)
@@ -339,6 +340,7 @@
dev->addr_len = ETH_ALEN;
dev->tx_queue_len = 1000; /* Ethernet wants good queues */
dev->flags = IFF_BROADCAST|IFF_MULTICAST;
+ dev->priv_flags = IFF_TX_SKB_SHARING;
memset(dev->broadcast, 0xFF, ETH_ALEN);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 37b3c18..bc19bd0 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1134,15 +1134,15 @@
struct in_device *in_dev)
{
- struct in_ifaddr *ifa = in_dev->ifa_list;
+ struct in_ifaddr *ifa;
- if (!ifa)
- return;
-
- arp_send(ARPOP_REQUEST, ETH_P_ARP,
- ifa->ifa_local, dev,
- ifa->ifa_local, NULL,
- dev->dev_addr, NULL);
+ for (ifa = in_dev->ifa_list; ifa;
+ ifa = ifa->ifa_next) {
+ arp_send(ARPOP_REQUEST, ETH_P_ARP,
+ ifa->ifa_local, dev,
+ ifa->ifa_local, NULL,
+ dev->dev_addr, NULL);
+ }
}
/* Called only under RTNL semaphore */
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a06c53c..a55500c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1481,6 +1481,8 @@
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
{
struct in6_addr addr;
+ if (ifp->prefix_len == 127) /* RFC 6164 */
+ return;
ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
if (ipv6_addr_any(&addr))
return;
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index a8193f5..d2726a7 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -103,7 +103,7 @@
static void l2tp_eth_dev_setup(struct net_device *dev)
{
ether_setup(dev);
-
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->netdev_ops = &l2tp_eth_netdev_ops;
dev->destructor = free_netdev;
}
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index cd5fb40..556e7e6 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -698,6 +698,7 @@
static void ieee80211_if_setup(struct net_device *dev)
{
ether_setup(dev);
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->netdev_ops = &ieee80211_dataif_ops;
dev->destructor = free_netdev;
}
diff --git a/net/socket.c b/net/socket.c
index 02dc82d..b1cbbcd 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -467,7 +467,7 @@
struct inode *inode;
struct socket *sock;
- inode = new_inode(sock_mnt->mnt_sb);
+ inode = new_inode_pseudo(sock_mnt->mnt_sb);
if (!inode)
return NULL;
@@ -580,7 +580,7 @@
}
EXPORT_SYMBOL(sock_sendmsg);
-int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
+static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
{
struct kiocb iocb;
struct sock_iocb siocb;
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index b2198e6..ffd243d 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -4,6 +4,10 @@
config SUNRPC_GSS
tristate
+config SUNRPC_BACKCHANNEL
+ bool
+ depends on SUNRPC
+
config SUNRPC_XPRT_RDMA
tristate
depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 9d2fca5..8209a04 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -13,6 +13,6 @@
addr.o rpcb_clnt.o timer.o xdr.o \
sunrpc_syms.o cache.o rpc_pipe.o \
svc_xprt.o
-sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o
+sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o
sunrpc-$(CONFIG_PROC_FS) += stats.o
sunrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index cf06af3..91eaa26 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -29,8 +29,6 @@
#define RPCDBG_FACILITY RPCDBG_TRANS
#endif
-#if defined(CONFIG_NFS_V4_1)
-
/*
* Helper routines that track the number of preallocation elements
* on the transport.
@@ -174,7 +172,7 @@
dprintk("RPC: setup backchannel transport failed\n");
return -1;
}
-EXPORT_SYMBOL(xprt_setup_backchannel);
+EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
/*
* Destroys the backchannel preallocated structures.
@@ -204,7 +202,7 @@
dprintk("RPC: backchannel list empty= %s\n",
list_empty(&xprt->bc_pa_list) ? "true" : "false");
}
-EXPORT_SYMBOL(xprt_destroy_backchannel);
+EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
/*
* One or more rpc_rqst structure have been preallocated during the
@@ -279,4 +277,3 @@
spin_unlock_bh(&xprt->bc_pa_lock);
}
-#endif /* CONFIG_NFS_V4_1 */
diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c
index 1dd1a68..0b2eb38 100644
--- a/net/sunrpc/bc_svc.c
+++ b/net/sunrpc/bc_svc.c
@@ -27,8 +27,6 @@
* reply over an existing open connection previously established by the client.
*/
-#if defined(CONFIG_NFS_V4_1)
-
#include <linux/module.h>
#include <linux/sunrpc/xprt.h>
@@ -63,4 +61,3 @@
return ret;
}
-#endif /* CONFIG_NFS_V4_1 */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index c50818f..c5347d2 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -64,9 +64,9 @@
static void call_bind(struct rpc_task *task);
static void call_bind_status(struct rpc_task *task);
static void call_transmit(struct rpc_task *task);
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
static void call_bc_transmit(struct rpc_task *task);
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
static void call_status(struct rpc_task *task);
static void call_transmit_status(struct rpc_task *task);
static void call_refresh(struct rpc_task *task);
@@ -715,7 +715,7 @@
}
EXPORT_SYMBOL_GPL(rpc_call_async);
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/**
* rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
* rpc_execute against it
@@ -758,7 +758,7 @@
dprintk("RPC: rpc_run_bc_task: task= %p\n", task);
return task;
}
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
void
rpc_call_start(struct rpc_task *task)
@@ -1361,7 +1361,7 @@
}
}
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/*
* 5b. Send the backchannel RPC reply. On error, drop the reply. In
* addition, disconnect on connectivity errors.
@@ -1425,7 +1425,7 @@
}
rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
}
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
/*
* 6. Sort out the RPC call status
@@ -1550,8 +1550,7 @@
kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode;
__be32 *p;
- dprintk("RPC: %5u call_decode (status %d)\n",
- task->tk_pid, task->tk_status);
+ dprint_status(task);
if (task->tk_flags & RPC_CALL_MAJORSEEN) {
if (clnt->cl_chatty)
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 4814e24..d12ffa5 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -97,14 +97,16 @@
/*
* Add new request to a priority queue.
*/
-static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *task)
+static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
+ struct rpc_task *task,
+ unsigned char queue_priority)
{
struct list_head *q;
struct rpc_task *t;
INIT_LIST_HEAD(&task->u.tk_wait.links);
- q = &queue->tasks[task->tk_priority];
- if (unlikely(task->tk_priority > queue->maxpriority))
+ q = &queue->tasks[queue_priority];
+ if (unlikely(queue_priority > queue->maxpriority))
q = &queue->tasks[queue->maxpriority];
list_for_each_entry(t, q, u.tk_wait.list) {
if (t->tk_owner == task->tk_owner) {
@@ -123,12 +125,14 @@
* improve overall performance.
* Everyone else gets appended to the queue to ensure proper FIFO behavior.
*/
-static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
+static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
+ struct rpc_task *task,
+ unsigned char queue_priority)
{
BUG_ON (RPC_IS_QUEUED(task));
if (RPC_IS_PRIORITY(queue))
- __rpc_add_wait_queue_priority(queue, task);
+ __rpc_add_wait_queue_priority(queue, task, queue_priority);
else if (RPC_IS_SWAPPER(task))
list_add(&task->u.tk_wait.list, &queue->tasks[0]);
else
@@ -311,13 +315,15 @@
* NB: An RPC task will only receive interrupt-driven events as long
* as it's on a wait queue.
*/
-static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
- rpc_action action)
+static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
+ struct rpc_task *task,
+ rpc_action action,
+ unsigned char queue_priority)
{
dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
task->tk_pid, rpc_qname(q), jiffies);
- __rpc_add_wait_queue(q, task);
+ __rpc_add_wait_queue(q, task, queue_priority);
BUG_ON(task->tk_callback != NULL);
task->tk_callback = action;
@@ -334,11 +340,25 @@
* Protect the queue operations.
*/
spin_lock_bh(&q->lock);
- __rpc_sleep_on(q, task, action);
+ __rpc_sleep_on_priority(q, task, action, task->tk_priority);
spin_unlock_bh(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on);
+void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task,
+ rpc_action action, int priority)
+{
+ /* We shouldn't ever put an inactive task to sleep */
+ BUG_ON(!RPC_IS_ACTIVATED(task));
+
+ /*
+ * Protect the queue operations.
+ */
+ spin_lock_bh(&q->lock);
+ __rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW);
+ spin_unlock_bh(&q->lock);
+}
+
/**
* __rpc_do_wake_up_task - wake up a single rpc_task
* @queue: wait queue
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 2b90292..6a69a11 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1252,7 +1252,7 @@
}
}
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/*
* Process a backchannel RPC request that arrived over an existing
* outbound connection
@@ -1300,8 +1300,8 @@
return 0;
}
}
-EXPORT_SYMBOL(bc_svc_process);
-#endif /* CONFIG_NFS_V4_1 */
+EXPORT_SYMBOL_GPL(bc_svc_process);
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
/*
* Return (transport-specific) limit on the rpc payload.
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index f2cb5b8..767d494 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -68,12 +68,12 @@
static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
struct net *, struct sockaddr *,
int, int);
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
struct net *, struct sockaddr *,
int, int);
static void svc_bc_sock_free(struct svc_xprt *xprt);
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key svc_key[2];
@@ -1243,7 +1243,7 @@
return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
}
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
struct net *, struct sockaddr *,
int, int);
@@ -1284,7 +1284,7 @@
{
svc_unreg_xprt_class(&svc_tcp_bc_class);
}
-#else /* CONFIG_NFS_V4_1 */
+#else /* CONFIG_SUNRPC_BACKCHANNEL */
static void svc_init_bc_xprt_sock(void)
{
}
@@ -1292,7 +1292,7 @@
static void svc_cleanup_bc_xprt_sock(void)
{
}
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
static struct svc_xprt_ops svc_tcp_ops = {
.xpo_create = svc_tcp_create,
@@ -1623,7 +1623,7 @@
kfree(svsk);
}
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/*
* Create a back channel svc_xprt which shares the fore channel socket.
*/
@@ -1662,4 +1662,4 @@
if (xprt)
kfree(container_of(xprt, struct svc_sock, sk_xprt));
}
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index f008c14..277ebd4 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -126,7 +126,7 @@
kaddr[buf->page_base + len] = '\0';
kunmap_atomic(kaddr, KM_USER0);
}
-EXPORT_SYMBOL(xdr_terminate_string);
+EXPORT_SYMBOL_GPL(xdr_terminate_string);
void
xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index ce5eb68..9b6a4d1 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -62,6 +62,7 @@
/*
* Local functions
*/
+static void xprt_init(struct rpc_xprt *xprt, struct net *net);
static void xprt_request_init(struct rpc_task *, struct rpc_xprt *);
static void xprt_connect_status(struct rpc_task *task);
static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
@@ -191,10 +192,10 @@
* transport connects from colliding with writes. No congestion control
* is provided.
*/
-int xprt_reserve_xprt(struct rpc_task *task)
+int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
- struct rpc_xprt *xprt = req->rq_xprt;
+ int priority;
if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
if (task == xprt->snd_task)
@@ -202,8 +203,10 @@
goto out_sleep;
}
xprt->snd_task = task;
- req->rq_bytes_sent = 0;
- req->rq_ntrans++;
+ if (req != NULL) {
+ req->rq_bytes_sent = 0;
+ req->rq_ntrans++;
+ }
return 1;
@@ -212,10 +215,13 @@
task->tk_pid, xprt);
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
- if (req->rq_ntrans)
- rpc_sleep_on(&xprt->resend, task, NULL);
+ if (req == NULL)
+ priority = RPC_PRIORITY_LOW;
+ else if (!req->rq_ntrans)
+ priority = RPC_PRIORITY_NORMAL;
else
- rpc_sleep_on(&xprt->sending, task, NULL);
+ priority = RPC_PRIORITY_HIGH;
+ rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
return 0;
}
EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
@@ -239,22 +245,24 @@
* integrated into the decision of whether a request is allowed to be
* woken up and given access to the transport.
*/
-int xprt_reserve_xprt_cong(struct rpc_task *task)
+int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
{
- struct rpc_xprt *xprt = task->tk_xprt;
struct rpc_rqst *req = task->tk_rqstp;
+ int priority;
if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
if (task == xprt->snd_task)
return 1;
goto out_sleep;
}
+ if (req == NULL) {
+ xprt->snd_task = task;
+ return 1;
+ }
if (__xprt_get_cong(xprt, task)) {
xprt->snd_task = task;
- if (req) {
- req->rq_bytes_sent = 0;
- req->rq_ntrans++;
- }
+ req->rq_bytes_sent = 0;
+ req->rq_ntrans++;
return 1;
}
xprt_clear_locked(xprt);
@@ -262,10 +270,13 @@
dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
- if (req && req->rq_ntrans)
- rpc_sleep_on(&xprt->resend, task, NULL);
+ if (req == NULL)
+ priority = RPC_PRIORITY_LOW;
+ else if (!req->rq_ntrans)
+ priority = RPC_PRIORITY_NORMAL;
else
- rpc_sleep_on(&xprt->sending, task, NULL);
+ priority = RPC_PRIORITY_HIGH;
+ rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
return 0;
}
EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
@@ -275,7 +286,7 @@
int retval;
spin_lock_bh(&xprt->transport_lock);
- retval = xprt->ops->reserve_xprt(task);
+ retval = xprt->ops->reserve_xprt(xprt, task);
spin_unlock_bh(&xprt->transport_lock);
return retval;
}
@@ -288,12 +299,9 @@
if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
return;
- task = rpc_wake_up_next(&xprt->resend);
- if (!task) {
- task = rpc_wake_up_next(&xprt->sending);
- if (!task)
- goto out_unlock;
- }
+ task = rpc_wake_up_next(&xprt->sending);
+ if (task == NULL)
+ goto out_unlock;
req = task->tk_rqstp;
xprt->snd_task = task;
@@ -310,24 +318,25 @@
static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
{
struct rpc_task *task;
+ struct rpc_rqst *req;
if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
return;
if (RPCXPRT_CONGESTED(xprt))
goto out_unlock;
- task = rpc_wake_up_next(&xprt->resend);
- if (!task) {
- task = rpc_wake_up_next(&xprt->sending);
- if (!task)
- goto out_unlock;
+ task = rpc_wake_up_next(&xprt->sending);
+ if (task == NULL)
+ goto out_unlock;
+
+ req = task->tk_rqstp;
+ if (req == NULL) {
+ xprt->snd_task = task;
+ return;
}
if (__xprt_get_cong(xprt, task)) {
- struct rpc_rqst *req = task->tk_rqstp;
xprt->snd_task = task;
- if (req) {
- req->rq_bytes_sent = 0;
- req->rq_ntrans++;
- }
+ req->rq_bytes_sent = 0;
+ req->rq_ntrans++;
return;
}
out_unlock:
@@ -852,7 +861,7 @@
err = req->rq_reply_bytes_recvd;
goto out_unlock;
}
- if (!xprt->ops->reserve_xprt(task))
+ if (!xprt->ops->reserve_xprt(xprt, task))
err = -EAGAIN;
out_unlock:
spin_unlock_bh(&xprt->transport_lock);
@@ -928,28 +937,66 @@
spin_unlock_bh(&xprt->transport_lock);
}
+static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags)
+{
+ struct rpc_rqst *req = ERR_PTR(-EAGAIN);
+
+ if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs))
+ goto out;
+ req = kzalloc(sizeof(struct rpc_rqst), gfp_flags);
+ if (req != NULL)
+ goto out;
+ atomic_dec(&xprt->num_reqs);
+ req = ERR_PTR(-ENOMEM);
+out:
+ return req;
+}
+
+static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+ if (atomic_add_unless(&xprt->num_reqs, -1, xprt->min_reqs)) {
+ kfree(req);
+ return true;
+ }
+ return false;
+}
+
static void xprt_alloc_slot(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_xprt;
+ struct rpc_rqst *req;
- task->tk_status = 0;
- if (task->tk_rqstp)
- return;
if (!list_empty(&xprt->free)) {
- struct rpc_rqst *req = list_entry(xprt->free.next, struct rpc_rqst, rq_list);
- list_del_init(&req->rq_list);
- task->tk_rqstp = req;
- xprt_request_init(task, xprt);
- return;
+ req = list_entry(xprt->free.next, struct rpc_rqst, rq_list);
+ list_del(&req->rq_list);
+ goto out_init_req;
}
- dprintk("RPC: waiting for request slot\n");
+ req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT);
+ if (!IS_ERR(req))
+ goto out_init_req;
+ switch (PTR_ERR(req)) {
+ case -ENOMEM:
+ rpc_delay(task, HZ >> 2);
+ dprintk("RPC: dynamic allocation of request slot "
+ "failed! Retrying\n");
+ break;
+ case -EAGAIN:
+ rpc_sleep_on(&xprt->backlog, task, NULL);
+ dprintk("RPC: waiting for request slot\n");
+ }
task->tk_status = -EAGAIN;
- task->tk_timeout = 0;
- rpc_sleep_on(&xprt->backlog, task, NULL);
+ return;
+out_init_req:
+ task->tk_status = 0;
+ task->tk_rqstp = req;
+ xprt_request_init(task, xprt);
}
static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
+ if (xprt_dynamic_free_slot(xprt, req))
+ return;
+
memset(req, 0, sizeof(*req)); /* mark unused */
spin_lock(&xprt->reserve_lock);
@@ -958,25 +1005,49 @@
spin_unlock(&xprt->reserve_lock);
}
-struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req)
+static void xprt_free_all_slots(struct rpc_xprt *xprt)
+{
+ struct rpc_rqst *req;
+ while (!list_empty(&xprt->free)) {
+ req = list_first_entry(&xprt->free, struct rpc_rqst, rq_list);
+ list_del(&req->rq_list);
+ kfree(req);
+ }
+}
+
+struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
+ unsigned int num_prealloc,
+ unsigned int max_alloc)
{
struct rpc_xprt *xprt;
+ struct rpc_rqst *req;
+ int i;
xprt = kzalloc(size, GFP_KERNEL);
if (xprt == NULL)
goto out;
- atomic_set(&xprt->count, 1);
- xprt->max_reqs = max_req;
- xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL);
- if (xprt->slot == NULL)
+ xprt_init(xprt, net);
+
+ for (i = 0; i < num_prealloc; i++) {
+ req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL);
+ if (!req)
+ break;
+ list_add(&req->rq_list, &xprt->free);
+ }
+ if (i < num_prealloc)
goto out_free;
+ if (max_alloc > num_prealloc)
+ xprt->max_reqs = max_alloc;
+ else
+ xprt->max_reqs = num_prealloc;
+ xprt->min_reqs = num_prealloc;
+ atomic_set(&xprt->num_reqs, num_prealloc);
- xprt->xprt_net = get_net(net);
return xprt;
out_free:
- kfree(xprt);
+ xprt_free(xprt);
out:
return NULL;
}
@@ -985,7 +1056,7 @@
void xprt_free(struct rpc_xprt *xprt)
{
put_net(xprt->xprt_net);
- kfree(xprt->slot);
+ xprt_free_all_slots(xprt);
kfree(xprt);
}
EXPORT_SYMBOL_GPL(xprt_free);
@@ -1001,10 +1072,24 @@
{
struct rpc_xprt *xprt = task->tk_xprt;
- task->tk_status = -EIO;
+ task->tk_status = 0;
+ if (task->tk_rqstp != NULL)
+ return;
+
+ /* Note: grabbing the xprt_lock_write() here is not strictly needed,
+ * but ensures that we throttle new slot allocation if the transport
+ * is congested (e.g. if reconnecting or if we're out of socket
+ * write buffer space).
+ */
+ task->tk_timeout = 0;
+ task->tk_status = -EAGAIN;
+ if (!xprt_lock_write(xprt, task))
+ return;
+
spin_lock(&xprt->reserve_lock);
xprt_alloc_slot(task);
spin_unlock(&xprt->reserve_lock);
+ xprt_release_write(xprt, task);
}
static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
@@ -1021,6 +1106,7 @@
{
struct rpc_rqst *req = task->tk_rqstp;
+ INIT_LIST_HEAD(&req->rq_list);
req->rq_timeout = task->tk_client->cl_timeout->to_initval;
req->rq_task = task;
req->rq_xprt = xprt;
@@ -1073,6 +1159,34 @@
xprt_free_bc_request(req);
}
+static void xprt_init(struct rpc_xprt *xprt, struct net *net)
+{
+ atomic_set(&xprt->count, 1);
+
+ spin_lock_init(&xprt->transport_lock);
+ spin_lock_init(&xprt->reserve_lock);
+
+ INIT_LIST_HEAD(&xprt->free);
+ INIT_LIST_HEAD(&xprt->recv);
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ spin_lock_init(&xprt->bc_pa_lock);
+ INIT_LIST_HEAD(&xprt->bc_pa_list);
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+
+ xprt->last_used = jiffies;
+ xprt->cwnd = RPC_INITCWND;
+ xprt->bind_index = 0;
+
+ rpc_init_wait_queue(&xprt->binding, "xprt_binding");
+ rpc_init_wait_queue(&xprt->pending, "xprt_pending");
+ rpc_init_priority_wait_queue(&xprt->sending, "xprt_sending");
+ rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
+
+ xprt_init_xid(xprt);
+
+ xprt->xprt_net = get_net(net);
+}
+
/**
* xprt_create_transport - create an RPC transport
* @args: rpc transport creation arguments
@@ -1081,7 +1195,6 @@
struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
{
struct rpc_xprt *xprt;
- struct rpc_rqst *req;
struct xprt_class *t;
spin_lock(&xprt_list_lock);
@@ -1100,46 +1213,17 @@
if (IS_ERR(xprt)) {
dprintk("RPC: xprt_create_transport: failed, %ld\n",
-PTR_ERR(xprt));
- return xprt;
+ goto out;
}
- if (test_and_set_bit(XPRT_INITIALIZED, &xprt->state))
- /* ->setup returned a pre-initialized xprt: */
- return xprt;
-
- spin_lock_init(&xprt->transport_lock);
- spin_lock_init(&xprt->reserve_lock);
-
- INIT_LIST_HEAD(&xprt->free);
- INIT_LIST_HEAD(&xprt->recv);
-#if defined(CONFIG_NFS_V4_1)
- spin_lock_init(&xprt->bc_pa_lock);
- INIT_LIST_HEAD(&xprt->bc_pa_list);
-#endif /* CONFIG_NFS_V4_1 */
-
INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
if (xprt_has_timer(xprt))
setup_timer(&xprt->timer, xprt_init_autodisconnect,
(unsigned long)xprt);
else
init_timer(&xprt->timer);
- xprt->last_used = jiffies;
- xprt->cwnd = RPC_INITCWND;
- xprt->bind_index = 0;
-
- rpc_init_wait_queue(&xprt->binding, "xprt_binding");
- rpc_init_wait_queue(&xprt->pending, "xprt_pending");
- rpc_init_wait_queue(&xprt->sending, "xprt_sending");
- rpc_init_wait_queue(&xprt->resend, "xprt_resend");
- rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
-
- /* initialize free list */
- for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--)
- list_add(&req->rq_list, &xprt->free);
-
- xprt_init_xid(xprt);
-
dprintk("RPC: created transport %p with %u slots\n", xprt,
xprt->max_reqs);
+out:
return xprt;
}
@@ -1157,7 +1241,6 @@
rpc_destroy_wait_queue(&xprt->binding);
rpc_destroy_wait_queue(&xprt->pending);
rpc_destroy_wait_queue(&xprt->sending);
- rpc_destroy_wait_queue(&xprt->resend);
rpc_destroy_wait_queue(&xprt->backlog);
cancel_work_sync(&xprt->task_cleanup);
/*
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 0867070..b446e10 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -283,6 +283,7 @@
}
xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
+ xprt_rdma_slot_table_entries,
xprt_rdma_slot_table_entries);
if (xprt == NULL) {
dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
@@ -452,9 +453,8 @@
}
static int
-xprt_rdma_reserve_xprt(struct rpc_task *task)
+xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
{
- struct rpc_xprt *xprt = task->tk_xprt;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
int credits = atomic_read(&r_xprt->rx_buf.rb_credits);
@@ -466,7 +466,7 @@
BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0);
}
xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale;
- return xprt_reserve_xprt_cong(task);
+ return xprt_reserve_xprt_cong(xprt, task);
}
/*
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index ddf0528..08c5d5a 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -109,7 +109,7 @@
*/
/* temporary static scatter/gather max */
-#define RPCRDMA_MAX_DATA_SEGS (8) /* max scatter/gather */
+#define RPCRDMA_MAX_DATA_SEGS (64) /* max scatter/gather */
#define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */
#define MAX_RPCRDMAHDR (\
/* max supported RPC/RDMA header */ \
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 72abb73..d7f97ef 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -37,7 +37,7 @@
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/xprtsock.h>
#include <linux/file.h>
-#ifdef CONFIG_NFS_V4_1
+#ifdef CONFIG_SUNRPC_BACKCHANNEL
#include <linux/sunrpc/bc_xprt.h>
#endif
@@ -54,7 +54,8 @@
* xprtsock tunables
*/
unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE;
-unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
+unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE;
+unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE;
unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
@@ -75,6 +76,7 @@
static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
+static unsigned int max_tcp_slot_table_limit = RPC_MAX_SLOT_TABLE_LIMIT;
static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT;
static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT;
@@ -104,6 +106,15 @@
.extra2 = &max_slot_table_size
},
{
+ .procname = "tcp_max_slot_table_entries",
+ .data = &xprt_max_tcp_slot_table_entries,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &min_slot_table_size,
+ .extra2 = &max_tcp_slot_table_limit
+ },
+ {
.procname = "min_resvport",
.data = &xprt_min_resvport,
.maxlen = sizeof(unsigned int),
@@ -755,6 +766,8 @@
if (task == NULL)
goto out_release;
req = task->tk_rqstp;
+ if (req == NULL)
+ goto out_release;
if (req->rq_bytes_sent == 0)
goto out_release;
if (req->rq_bytes_sent == req->rq_snd_buf.len)
@@ -1236,7 +1249,7 @@
return 0;
}
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/*
* Obtains an rpc_rqst previously allocated and invokes the common
* tcp read code to read the data. The result is placed in the callback
@@ -1299,7 +1312,7 @@
{
return xs_tcp_read_reply(xprt, desc);
}
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
/*
* Read data off the transport. This can be either an RPC_CALL or an
@@ -2489,7 +2502,8 @@
}
static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
- unsigned int slot_table_size)
+ unsigned int slot_table_size,
+ unsigned int max_slot_table_size)
{
struct rpc_xprt *xprt;
struct sock_xprt *new;
@@ -2499,7 +2513,8 @@
return ERR_PTR(-EBADF);
}
- xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size);
+ xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size,
+ max_slot_table_size);
if (xprt == NULL) {
dprintk("RPC: xs_setup_xprt: couldn't allocate "
"rpc_xprt\n");
@@ -2541,7 +2556,8 @@
struct rpc_xprt *xprt;
struct rpc_xprt *ret;
- xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+ xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
+ xprt_max_tcp_slot_table_entries);
if (IS_ERR(xprt))
return xprt;
transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2605,7 +2621,8 @@
struct sock_xprt *transport;
struct rpc_xprt *ret;
- xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries);
+ xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries,
+ xprt_udp_slot_table_entries);
if (IS_ERR(xprt))
return xprt;
transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2681,7 +2698,8 @@
struct sock_xprt *transport;
struct rpc_xprt *ret;
- xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+ xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
+ xprt_max_tcp_slot_table_entries);
if (IS_ERR(xprt))
return xprt;
transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2760,7 +2778,8 @@
*/
return args->bc_xprt->xpt_bc_xprt;
}
- xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+ xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
+ xprt_tcp_slot_table_entries);
if (IS_ERR(xprt))
return xprt;
transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2947,8 +2966,26 @@
#define param_check_slot_table_size(name, p) \
__param_check(name, p, unsigned int);
+static int param_set_max_slot_table_size(const char *val,
+ const struct kernel_param *kp)
+{
+ return param_set_uint_minmax(val, kp,
+ RPC_MIN_SLOT_TABLE,
+ RPC_MAX_SLOT_TABLE_LIMIT);
+}
+
+static struct kernel_param_ops param_ops_max_slot_table_size = {
+ .set = param_set_max_slot_table_size,
+ .get = param_get_uint,
+};
+
+#define param_check_max_slot_table_size(name, p) \
+ __param_check(name, p, unsigned int);
+
module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries,
slot_table_size, 0644);
+module_param_named(tcp_max_slot_table_entries, xprt_max_tcp_slot_table_entries,
+ max_slot_table_size, 0644);
module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries,
slot_table_size, 0644);
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 1ad0f39..02751db 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -903,7 +903,7 @@
initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
!is_world_regdom(last_request->alpha2)) {
REG_DBG_PRINT("Ignoring regulatory request %s "
- "since the driver requires its own regulaotry "
+ "since the driver requires its own regulatory "
"domain to be set first",
reg_initiator_name(initiator));
return true;
@@ -1125,12 +1125,13 @@
enum ieee80211_band band;
if (ignore_reg_update(wiphy, initiator))
- goto out;
+ return;
+
for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
if (wiphy->bands[band])
handle_band(wiphy, band, initiator);
}
-out:
+
reg_process_beacons(wiphy);
reg_process_ht_flags(wiphy);
if (wiphy->reg_notifier)
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 7312bf9..c1e18ba 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -73,7 +73,6 @@
cred = get_task_cred(tracer);
tracerp = aa_cred_profile(cred);
}
- rcu_read_unlock();
/* not ptraced */
if (!tracer || unconfined(tracerp))
@@ -82,6 +81,7 @@
error = aa_may_ptrace(tracer, tracerp, to_profile, PTRACE_MODE_ATTACH);
out:
+ rcu_read_unlock();
if (cred)
put_cred(cred);
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 3d2fd14..3783202 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -127,7 +127,7 @@
*inheritable = cred->cap_inheritable;
*permitted = cred->cap_permitted;
- if (!unconfined(profile)) {
+ if (!unconfined(profile) && !COMPLAIN_MODE(profile)) {
*effective = cap_intersect(*effective, profile->caps.allow);
*permitted = cap_intersect(*permitted, profile->caps.allow);
}
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 39d66dc..26b46ff 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -86,7 +86,7 @@
struct inode *inode,
struct file *file)
{
- mode_t mode = file->f_mode;
+ fmode_t mode = file->f_mode;
mutex_lock(&iint->mutex);
if (mode & FMODE_WRITE &&
diff --git a/security/keys/Makefile b/security/keys/Makefile
index 1bf090a..b34cc6e 100644
--- a/security/keys/Makefile
+++ b/security/keys/Makefile
@@ -14,7 +14,7 @@
user_defined.o
obj-$(CONFIG_TRUSTED_KEYS) += trusted.o
-obj-$(CONFIG_ENCRYPTED_KEYS) += encrypted.o
+obj-$(CONFIG_ENCRYPTED_KEYS) += ecryptfs_format.o encrypted.o
obj-$(CONFIG_KEYS_COMPAT) += compat.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/security/keys/ecryptfs_format.c b/security/keys/ecryptfs_format.c
new file mode 100644
index 0000000..6daa3b6
--- /dev/null
+++ b/security/keys/ecryptfs_format.c
@@ -0,0 +1,81 @@
+/*
+ * ecryptfs_format.c: helper functions for the encrypted key type
+ *
+ * Copyright (C) 2006 International Business Machines Corp.
+ * Copyright (C) 2010 Politecnico di Torino, Italy
+ * TORSEC group -- http://security.polito.it
+ *
+ * Authors:
+ * Michael A. Halcrow <mahalcro@us.ibm.com>
+ * Tyler Hicks <tyhicks@ou.edu>
+ * Roberto Sassu <roberto.sassu@polito.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ */
+
+#include <linux/module.h>
+#include "ecryptfs_format.h"
+
+u8 *ecryptfs_get_auth_tok_key(struct ecryptfs_auth_tok *auth_tok)
+{
+ return auth_tok->token.password.session_key_encryption_key;
+}
+EXPORT_SYMBOL(ecryptfs_get_auth_tok_key);
+
+/*
+ * ecryptfs_get_versions()
+ *
+ * Source code taken from the software 'ecryptfs-utils' version 83.
+ *
+ */
+void ecryptfs_get_versions(int *major, int *minor, int *file_version)
+{
+ *major = ECRYPTFS_VERSION_MAJOR;
+ *minor = ECRYPTFS_VERSION_MINOR;
+ if (file_version)
+ *file_version = ECRYPTFS_SUPPORTED_FILE_VERSION;
+}
+EXPORT_SYMBOL(ecryptfs_get_versions);
+
+/*
+ * ecryptfs_fill_auth_tok - fill the ecryptfs_auth_tok structure
+ *
+ * Fill the ecryptfs_auth_tok structure with required ecryptfs data.
+ * The source code is inspired to the original function generate_payload()
+ * shipped with the software 'ecryptfs-utils' version 83.
+ *
+ */
+int ecryptfs_fill_auth_tok(struct ecryptfs_auth_tok *auth_tok,
+ const char *key_desc)
+{
+ int major, minor;
+
+ ecryptfs_get_versions(&major, &minor, NULL);
+ auth_tok->version = (((uint16_t)(major << 8) & 0xFF00)
+ | ((uint16_t)minor & 0x00FF));
+ auth_tok->token_type = ECRYPTFS_PASSWORD;
+ strncpy((char *)auth_tok->token.password.signature, key_desc,
+ ECRYPTFS_PASSWORD_SIG_SIZE);
+ auth_tok->token.password.session_key_encryption_key_bytes =
+ ECRYPTFS_MAX_KEY_BYTES;
+ /*
+ * Removed auth_tok->token.password.salt and
+ * auth_tok->token.password.session_key_encryption_key
+ * initialization from the original code
+ */
+ /* TODO: Make the hash parameterizable via policy */
+ auth_tok->token.password.flags |=
+ ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET;
+ /* The kernel code will encrypt the session key. */
+ auth_tok->session_key.encrypted_key[0] = 0;
+ auth_tok->session_key.encrypted_key_size = 0;
+ /* Default; subject to change by kernel eCryptfs */
+ auth_tok->token.password.hash_algo = PGP_DIGEST_ALGO_SHA512;
+ auth_tok->token.password.flags &= ~(ECRYPTFS_PERSISTENT_PASSWORD);
+ return 0;
+}
+EXPORT_SYMBOL(ecryptfs_fill_auth_tok);
+
+MODULE_LICENSE("GPL");
diff --git a/security/keys/ecryptfs_format.h b/security/keys/ecryptfs_format.h
new file mode 100644
index 0000000..40294de
--- /dev/null
+++ b/security/keys/ecryptfs_format.h
@@ -0,0 +1,30 @@
+/*
+ * ecryptfs_format.h: helper functions for the encrypted key type
+ *
+ * Copyright (C) 2006 International Business Machines Corp.
+ * Copyright (C) 2010 Politecnico di Torino, Italy
+ * TORSEC group -- http://security.polito.it
+ *
+ * Authors:
+ * Michael A. Halcrow <mahalcro@us.ibm.com>
+ * Tyler Hicks <tyhicks@ou.edu>
+ * Roberto Sassu <roberto.sassu@polito.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ */
+
+#ifndef __KEYS_ECRYPTFS_H
+#define __KEYS_ECRYPTFS_H
+
+#include <linux/ecryptfs.h>
+
+#define PGP_DIGEST_ALGO_SHA512 10
+
+u8 *ecryptfs_get_auth_tok_key(struct ecryptfs_auth_tok *auth_tok);
+void ecryptfs_get_versions(int *major, int *minor, int *file_version);
+int ecryptfs_fill_auth_tok(struct ecryptfs_auth_tok *auth_tok,
+ const char *key_desc);
+
+#endif /* __KEYS_ECRYPTFS_H */
diff --git a/security/keys/encrypted.c b/security/keys/encrypted.c
index b1cba5b..e7eca9e 100644
--- a/security/keys/encrypted.c
+++ b/security/keys/encrypted.c
@@ -1,8 +1,11 @@
/*
* Copyright (C) 2010 IBM Corporation
+ * Copyright (C) 2010 Politecnico di Torino, Italy
+ * TORSEC group -- http://security.polito.it
*
- * Author:
+ * Authors:
* Mimi Zohar <zohar@us.ibm.com>
+ * Roberto Sassu <roberto.sassu@polito.it>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -26,22 +29,27 @@
#include <linux/rcupdate.h>
#include <linux/scatterlist.h>
#include <linux/crypto.h>
+#include <linux/ctype.h>
#include <crypto/hash.h>
#include <crypto/sha.h>
#include <crypto/aes.h>
#include "encrypted.h"
+#include "ecryptfs_format.h"
static const char KEY_TRUSTED_PREFIX[] = "trusted:";
static const char KEY_USER_PREFIX[] = "user:";
static const char hash_alg[] = "sha256";
static const char hmac_alg[] = "hmac(sha256)";
static const char blkcipher_alg[] = "cbc(aes)";
+static const char key_format_default[] = "default";
+static const char key_format_ecryptfs[] = "ecryptfs";
static unsigned int ivsize;
static int blksize;
#define KEY_TRUSTED_PREFIX_LEN (sizeof (KEY_TRUSTED_PREFIX) - 1)
#define KEY_USER_PREFIX_LEN (sizeof (KEY_USER_PREFIX) - 1)
+#define KEY_ECRYPTFS_DESC_LEN 16
#define HASH_SIZE SHA256_DIGEST_SIZE
#define MAX_DATA_SIZE 4096
#define MIN_DATA_SIZE 20
@@ -58,6 +66,16 @@
Opt_err = -1, Opt_new, Opt_load, Opt_update
};
+enum {
+ Opt_error = -1, Opt_default, Opt_ecryptfs
+};
+
+static const match_table_t key_format_tokens = {
+ {Opt_default, "default"},
+ {Opt_ecryptfs, "ecryptfs"},
+ {Opt_error, NULL}
+};
+
static const match_table_t key_tokens = {
{Opt_new, "new"},
{Opt_load, "load"},
@@ -82,9 +100,37 @@
}
/*
+ * valid_ecryptfs_desc - verify the description of a new/loaded encrypted key
+ *
+ * The description of a encrypted key with format 'ecryptfs' must contain
+ * exactly 16 hexadecimal characters.
+ *
+ */
+static int valid_ecryptfs_desc(const char *ecryptfs_desc)
+{
+ int i;
+
+ if (strlen(ecryptfs_desc) != KEY_ECRYPTFS_DESC_LEN) {
+ pr_err("encrypted_key: key description must be %d hexadecimal "
+ "characters long\n", KEY_ECRYPTFS_DESC_LEN);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < KEY_ECRYPTFS_DESC_LEN; i++) {
+ if (!isxdigit(ecryptfs_desc[i])) {
+ pr_err("encrypted_key: key description must contain "
+ "only hexadecimal characters\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+/*
* valid_master_desc - verify the 'key-type:desc' of a new/updated master-key
*
- * key-type:= "trusted:" | "encrypted:"
+ * key-type:= "trusted:" | "user:"
* desc:= master-key description
*
* Verify that 'key-type' is valid and that 'desc' exists. On key update,
@@ -118,8 +164,9 @@
* datablob_parse - parse the keyctl data
*
* datablob format:
- * new <master-key name> <decrypted data length>
- * load <master-key name> <decrypted data length> <encrypted iv + data>
+ * new [<format>] <master-key name> <decrypted data length>
+ * load [<format>] <master-key name> <decrypted data length>
+ * <encrypted iv + data>
* update <new-master-key name>
*
* Tokenizes a copy of the keyctl data, returning a pointer to each token,
@@ -127,52 +174,95 @@
*
* On success returns 0, otherwise -EINVAL.
*/
-static int datablob_parse(char *datablob, char **master_desc,
- char **decrypted_datalen, char **hex_encoded_iv)
+static int datablob_parse(char *datablob, const char **format,
+ char **master_desc, char **decrypted_datalen,
+ char **hex_encoded_iv)
{
substring_t args[MAX_OPT_ARGS];
int ret = -EINVAL;
int key_cmd;
- char *p;
+ int key_format;
+ char *p, *keyword;
- p = strsep(&datablob, " \t");
- if (!p)
+ keyword = strsep(&datablob, " \t");
+ if (!keyword) {
+ pr_info("encrypted_key: insufficient parameters specified\n");
return ret;
- key_cmd = match_token(p, key_tokens, args);
+ }
+ key_cmd = match_token(keyword, key_tokens, args);
- *master_desc = strsep(&datablob, " \t");
- if (!*master_desc)
- goto out;
+ /* Get optional format: default | ecryptfs */
+ p = strsep(&datablob, " \t");
+ if (!p) {
+ pr_err("encrypted_key: insufficient parameters specified\n");
+ return ret;
+ }
- if (valid_master_desc(*master_desc, NULL) < 0)
+ key_format = match_token(p, key_format_tokens, args);
+ switch (key_format) {
+ case Opt_ecryptfs:
+ case Opt_default:
+ *format = p;
+ *master_desc = strsep(&datablob, " \t");
+ break;
+ case Opt_error:
+ *master_desc = p;
+ break;
+ }
+
+ if (!*master_desc) {
+ pr_info("encrypted_key: master key parameter is missing\n");
goto out;
+ }
+
+ if (valid_master_desc(*master_desc, NULL) < 0) {
+ pr_info("encrypted_key: master key parameter \'%s\' "
+ "is invalid\n", *master_desc);
+ goto out;
+ }
if (decrypted_datalen) {
*decrypted_datalen = strsep(&datablob, " \t");
- if (!*decrypted_datalen)
+ if (!*decrypted_datalen) {
+ pr_info("encrypted_key: keylen parameter is missing\n");
goto out;
+ }
}
switch (key_cmd) {
case Opt_new:
- if (!decrypted_datalen)
+ if (!decrypted_datalen) {
+ pr_info("encrypted_key: keyword \'%s\' not allowed "
+ "when called from .update method\n", keyword);
break;
+ }
ret = 0;
break;
case Opt_load:
- if (!decrypted_datalen)
+ if (!decrypted_datalen) {
+ pr_info("encrypted_key: keyword \'%s\' not allowed "
+ "when called from .update method\n", keyword);
break;
+ }
*hex_encoded_iv = strsep(&datablob, " \t");
- if (!*hex_encoded_iv)
+ if (!*hex_encoded_iv) {
+ pr_info("encrypted_key: hex blob is missing\n");
break;
+ }
ret = 0;
break;
case Opt_update:
- if (decrypted_datalen)
+ if (decrypted_datalen) {
+ pr_info("encrypted_key: keyword \'%s\' not allowed "
+ "when called from .instantiate method\n",
+ keyword);
break;
+ }
ret = 0;
break;
case Opt_err:
+ pr_info("encrypted_key: keyword \'%s\' not recognized\n",
+ keyword);
break;
}
out:
@@ -197,8 +287,8 @@
ascii_buf[asciiblob_len] = '\0';
/* copy datablob master_desc and datalen strings */
- len = sprintf(ascii_buf, "%s %s ", epayload->master_desc,
- epayload->datalen);
+ len = sprintf(ascii_buf, "%s %s %s ", epayload->format,
+ epayload->master_desc, epayload->datalen);
/* convert the hex encoded iv, encrypted-data and HMAC to ascii */
bufp = &ascii_buf[len];
@@ -378,11 +468,13 @@
} else
goto out;
- if (IS_ERR(mkey))
+ if (IS_ERR(mkey)) {
pr_info("encrypted_key: key %s not found",
epayload->master_desc);
- if (mkey)
- dump_master_key(*master_key, *master_keylen);
+ goto out;
+ }
+
+ dump_master_key(*master_key, *master_keylen);
out:
return mkey;
}
@@ -439,9 +531,9 @@
if (ret < 0)
goto out;
- digest = epayload->master_desc + epayload->datablob_len;
+ digest = epayload->format + epayload->datablob_len;
ret = calc_hmac(digest, derived_key, sizeof derived_key,
- epayload->master_desc, epayload->datablob_len);
+ epayload->format, epayload->datablob_len);
if (!ret)
dump_hmac(NULL, digest, HASH_SIZE);
out:
@@ -450,26 +542,35 @@
/* verify HMAC before decrypting encrypted key */
static int datablob_hmac_verify(struct encrypted_key_payload *epayload,
- const u8 *master_key, size_t master_keylen)
+ const u8 *format, const u8 *master_key,
+ size_t master_keylen)
{
u8 derived_key[HASH_SIZE];
u8 digest[HASH_SIZE];
int ret;
+ char *p;
+ unsigned short len;
ret = get_derived_key(derived_key, AUTH_KEY, master_key, master_keylen);
if (ret < 0)
goto out;
- ret = calc_hmac(digest, derived_key, sizeof derived_key,
- epayload->master_desc, epayload->datablob_len);
+ len = epayload->datablob_len;
+ if (!format) {
+ p = epayload->master_desc;
+ len -= strlen(epayload->format) + 1;
+ } else
+ p = epayload->format;
+
+ ret = calc_hmac(digest, derived_key, sizeof derived_key, p, len);
if (ret < 0)
goto out;
- ret = memcmp(digest, epayload->master_desc + epayload->datablob_len,
+ ret = memcmp(digest, epayload->format + epayload->datablob_len,
sizeof digest);
if (ret) {
ret = -EINVAL;
dump_hmac("datablob",
- epayload->master_desc + epayload->datablob_len,
+ epayload->format + epayload->datablob_len,
HASH_SIZE);
dump_hmac("calc", digest, HASH_SIZE);
}
@@ -514,13 +615,16 @@
/* Allocate memory for decrypted key and datablob. */
static struct encrypted_key_payload *encrypted_key_alloc(struct key *key,
+ const char *format,
const char *master_desc,
const char *datalen)
{
struct encrypted_key_payload *epayload = NULL;
unsigned short datablob_len;
unsigned short decrypted_datalen;
+ unsigned short payload_datalen;
unsigned int encrypted_datalen;
+ unsigned int format_len;
long dlen;
int ret;
@@ -528,29 +632,43 @@
if (ret < 0 || dlen < MIN_DATA_SIZE || dlen > MAX_DATA_SIZE)
return ERR_PTR(-EINVAL);
+ format_len = (!format) ? strlen(key_format_default) : strlen(format);
decrypted_datalen = dlen;
+ payload_datalen = decrypted_datalen;
+ if (format && !strcmp(format, key_format_ecryptfs)) {
+ if (dlen != ECRYPTFS_MAX_KEY_BYTES) {
+ pr_err("encrypted_key: keylen for the ecryptfs format "
+ "must be equal to %d bytes\n",
+ ECRYPTFS_MAX_KEY_BYTES);
+ return ERR_PTR(-EINVAL);
+ }
+ decrypted_datalen = ECRYPTFS_MAX_KEY_BYTES;
+ payload_datalen = sizeof(struct ecryptfs_auth_tok);
+ }
+
encrypted_datalen = roundup(decrypted_datalen, blksize);
- datablob_len = strlen(master_desc) + 1 + strlen(datalen) + 1
- + ivsize + 1 + encrypted_datalen;
+ datablob_len = format_len + 1 + strlen(master_desc) + 1
+ + strlen(datalen) + 1 + ivsize + 1 + encrypted_datalen;
- ret = key_payload_reserve(key, decrypted_datalen + datablob_len
+ ret = key_payload_reserve(key, payload_datalen + datablob_len
+ HASH_SIZE + 1);
if (ret < 0)
return ERR_PTR(ret);
- epayload = kzalloc(sizeof(*epayload) + decrypted_datalen +
+ epayload = kzalloc(sizeof(*epayload) + payload_datalen +
datablob_len + HASH_SIZE + 1, GFP_KERNEL);
if (!epayload)
return ERR_PTR(-ENOMEM);
+ epayload->payload_datalen = payload_datalen;
epayload->decrypted_datalen = decrypted_datalen;
epayload->datablob_len = datablob_len;
return epayload;
}
static int encrypted_key_decrypt(struct encrypted_key_payload *epayload,
- const char *hex_encoded_iv)
+ const char *format, const char *hex_encoded_iv)
{
struct key *mkey;
u8 derived_key[HASH_SIZE];
@@ -571,14 +689,14 @@
hex2bin(epayload->iv, hex_encoded_iv, ivsize);
hex2bin(epayload->encrypted_data, hex_encoded_data, encrypted_datalen);
- hmac = epayload->master_desc + epayload->datablob_len;
+ hmac = epayload->format + epayload->datablob_len;
hex2bin(hmac, hex_encoded_data + (encrypted_datalen * 2), HASH_SIZE);
mkey = request_master_key(epayload, &master_key, &master_keylen);
if (IS_ERR(mkey))
return PTR_ERR(mkey);
- ret = datablob_hmac_verify(epayload, master_key, master_keylen);
+ ret = datablob_hmac_verify(epayload, format, master_key, master_keylen);
if (ret < 0) {
pr_err("encrypted_key: bad hmac (%d)\n", ret);
goto out;
@@ -598,13 +716,28 @@
}
static void __ekey_init(struct encrypted_key_payload *epayload,
- const char *master_desc, const char *datalen)
+ const char *format, const char *master_desc,
+ const char *datalen)
{
- epayload->master_desc = epayload->decrypted_data
- + epayload->decrypted_datalen;
+ unsigned int format_len;
+
+ format_len = (!format) ? strlen(key_format_default) : strlen(format);
+ epayload->format = epayload->payload_data + epayload->payload_datalen;
+ epayload->master_desc = epayload->format + format_len + 1;
epayload->datalen = epayload->master_desc + strlen(master_desc) + 1;
epayload->iv = epayload->datalen + strlen(datalen) + 1;
epayload->encrypted_data = epayload->iv + ivsize + 1;
+ epayload->decrypted_data = epayload->payload_data;
+
+ if (!format)
+ memcpy(epayload->format, key_format_default, format_len);
+ else {
+ if (!strcmp(format, key_format_ecryptfs))
+ epayload->decrypted_data =
+ ecryptfs_get_auth_tok_key((struct ecryptfs_auth_tok *)epayload->payload_data);
+
+ memcpy(epayload->format, format, format_len);
+ }
memcpy(epayload->master_desc, master_desc, strlen(master_desc));
memcpy(epayload->datalen, datalen, strlen(datalen));
@@ -617,19 +750,29 @@
* itself. For an old key, decrypt the hex encoded data.
*/
static int encrypted_init(struct encrypted_key_payload *epayload,
+ const char *key_desc, const char *format,
const char *master_desc, const char *datalen,
const char *hex_encoded_iv)
{
int ret = 0;
- __ekey_init(epayload, master_desc, datalen);
+ if (format && !strcmp(format, key_format_ecryptfs)) {
+ ret = valid_ecryptfs_desc(key_desc);
+ if (ret < 0)
+ return ret;
+
+ ecryptfs_fill_auth_tok((struct ecryptfs_auth_tok *)epayload->payload_data,
+ key_desc);
+ }
+
+ __ekey_init(epayload, format, master_desc, datalen);
if (!hex_encoded_iv) {
get_random_bytes(epayload->iv, ivsize);
get_random_bytes(epayload->decrypted_data,
epayload->decrypted_datalen);
} else
- ret = encrypted_key_decrypt(epayload, hex_encoded_iv);
+ ret = encrypted_key_decrypt(epayload, format, hex_encoded_iv);
return ret;
}
@@ -646,6 +789,7 @@
{
struct encrypted_key_payload *epayload = NULL;
char *datablob = NULL;
+ const char *format = NULL;
char *master_desc = NULL;
char *decrypted_datalen = NULL;
char *hex_encoded_iv = NULL;
@@ -659,18 +803,19 @@
return -ENOMEM;
datablob[datalen] = 0;
memcpy(datablob, data, datalen);
- ret = datablob_parse(datablob, &master_desc, &decrypted_datalen,
- &hex_encoded_iv);
+ ret = datablob_parse(datablob, &format, &master_desc,
+ &decrypted_datalen, &hex_encoded_iv);
if (ret < 0)
goto out;
- epayload = encrypted_key_alloc(key, master_desc, decrypted_datalen);
+ epayload = encrypted_key_alloc(key, format, master_desc,
+ decrypted_datalen);
if (IS_ERR(epayload)) {
ret = PTR_ERR(epayload);
goto out;
}
- ret = encrypted_init(epayload, master_desc, decrypted_datalen,
- hex_encoded_iv);
+ ret = encrypted_init(epayload, key->description, format, master_desc,
+ decrypted_datalen, hex_encoded_iv);
if (ret < 0) {
kfree(epayload);
goto out;
@@ -706,6 +851,7 @@
struct encrypted_key_payload *new_epayload;
char *buf;
char *new_master_desc = NULL;
+ const char *format = NULL;
int ret = 0;
if (datalen <= 0 || datalen > 32767 || !data)
@@ -717,7 +863,7 @@
buf[datalen] = 0;
memcpy(buf, data, datalen);
- ret = datablob_parse(buf, &new_master_desc, NULL, NULL);
+ ret = datablob_parse(buf, &format, &new_master_desc, NULL, NULL);
if (ret < 0)
goto out;
@@ -725,18 +871,19 @@
if (ret < 0)
goto out;
- new_epayload = encrypted_key_alloc(key, new_master_desc,
- epayload->datalen);
+ new_epayload = encrypted_key_alloc(key, epayload->format,
+ new_master_desc, epayload->datalen);
if (IS_ERR(new_epayload)) {
ret = PTR_ERR(new_epayload);
goto out;
}
- __ekey_init(new_epayload, new_master_desc, epayload->datalen);
+ __ekey_init(new_epayload, epayload->format, new_master_desc,
+ epayload->datalen);
memcpy(new_epayload->iv, epayload->iv, ivsize);
- memcpy(new_epayload->decrypted_data, epayload->decrypted_data,
- epayload->decrypted_datalen);
+ memcpy(new_epayload->payload_data, epayload->payload_data,
+ epayload->payload_datalen);
rcu_assign_pointer(key->payload.data, new_epayload);
call_rcu(&epayload->rcu, encrypted_rcu_free);
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 6cff375..60d4e3f 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -251,6 +251,8 @@
if (IS_ERR(authkey_ref)) {
authkey = ERR_CAST(authkey_ref);
+ if (authkey == ERR_PTR(-EAGAIN))
+ authkey = ERR_PTR(-ENOKEY);
goto error;
}
diff --git a/security/tomoyo/Kconfig b/security/tomoyo/Kconfig
index c8f3857..7c7f8c1 100644
--- a/security/tomoyo/Kconfig
+++ b/security/tomoyo/Kconfig
@@ -9,3 +9,64 @@
Required userspace tools and further information may be
found at <http://tomoyo.sourceforge.jp/>.
If you are unsure how to answer this question, answer N.
+
+config SECURITY_TOMOYO_MAX_ACCEPT_ENTRY
+ int "Default maximal count for learning mode"
+ default 2048
+ range 0 2147483647
+ depends on SECURITY_TOMOYO
+ help
+ This is the default value for maximal ACL entries
+ that are automatically appended into policy at "learning mode".
+ Some programs access thousands of objects, so running
+ such programs in "learning mode" dulls the system response
+ and consumes much memory.
+ This is the safeguard for such programs.
+
+config SECURITY_TOMOYO_MAX_AUDIT_LOG
+ int "Default maximal count for audit log"
+ default 1024
+ range 0 2147483647
+ depends on SECURITY_TOMOYO
+ help
+ This is the default value for maximal entries for
+ audit logs that the kernel can hold on memory.
+ You can read the log via /sys/kernel/security/tomoyo/audit.
+ If you don't need audit logs, you may set this value to 0.
+
+config SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
+ bool "Activate without calling userspace policy loader."
+ default n
+ depends on SECURITY_TOMOYO
+ ---help---
+ Say Y here if you want to activate access control as soon as built-in
+ policy was loaded. This option will be useful for systems where
+ operations which can lead to the hijacking of the boot sequence are
+ needed before loading the policy. For example, you can activate
+ immediately after loading the fixed part of policy which will allow
+ only operations needed for mounting a partition which contains the
+ variant part of policy and verifying (e.g. running GPG check) and
+ loading the variant part of policy. Since you can start using
+ enforcing mode from the beginning, you can reduce the possibility of
+ hijacking the boot sequence.
+
+config SECURITY_TOMOYO_POLICY_LOADER
+ string "Location of userspace policy loader"
+ default "/sbin/tomoyo-init"
+ depends on SECURITY_TOMOYO
+ depends on !SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
+ ---help---
+ This is the default pathname of policy loader which is called before
+ activation. You can override this setting via TOMOYO_loader= kernel
+ command line option.
+
+config SECURITY_TOMOYO_ACTIVATION_TRIGGER
+ string "Trigger for calling userspace policy loader"
+ default "/sbin/init"
+ depends on SECURITY_TOMOYO
+ depends on !SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
+ ---help---
+ This is the default pathname of activation trigger.
+ You can override this setting via TOMOYO_trigger= kernel command line
+ option. For example, if you pass init=/bin/systemd option, you may
+ want to also pass TOMOYO_trigger=/bin/systemd option.
diff --git a/security/tomoyo/Makefile b/security/tomoyo/Makefile
index 91640e9..95278b7 100644
--- a/security/tomoyo/Makefile
+++ b/security/tomoyo/Makefile
@@ -1 +1,48 @@
-obj-y = common.o domain.o file.o gc.o group.o load_policy.o memory.o mount.o realpath.o securityfs_if.o tomoyo.o util.o
+obj-y = audit.o common.o condition.o domain.o file.o gc.o group.o load_policy.o memory.o mount.o realpath.o securityfs_if.o tomoyo.o util.o
+
+$(obj)/policy/profile.conf:
+ @mkdir -p $(obj)/policy/
+ @echo Creating an empty policy/profile.conf
+ @touch $@
+
+$(obj)/policy/exception_policy.conf:
+ @mkdir -p $(obj)/policy/
+ @echo Creating a default policy/exception_policy.conf
+ @echo initialize_domain /sbin/modprobe from any >> $@
+ @echo initialize_domain /sbin/hotplug from any >> $@
+
+$(obj)/policy/domain_policy.conf:
+ @mkdir -p $(obj)/policy/
+ @echo Creating an empty policy/domain_policy.conf
+ @touch $@
+
+$(obj)/policy/manager.conf:
+ @mkdir -p $(obj)/policy/
+ @echo Creating an empty policy/manager.conf
+ @touch $@
+
+$(obj)/policy/stat.conf:
+ @mkdir -p $(obj)/policy/
+ @echo Creating an empty policy/stat.conf
+ @touch $@
+
+$(obj)/builtin-policy.h: $(obj)/policy/profile.conf $(obj)/policy/exception_policy.conf $(obj)/policy/domain_policy.conf $(obj)/policy/manager.conf $(obj)/policy/stat.conf
+ @echo Generating built-in policy for TOMOYO 2.4.x.
+ @echo "static char tomoyo_builtin_profile[] __initdata =" > $@.tmp
+ @sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/"\1\\n"/' < $(obj)/policy/profile.conf >> $@.tmp
+ @echo "\"\";" >> $@.tmp
+ @echo "static char tomoyo_builtin_exception_policy[] __initdata =" >> $@.tmp
+ @sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/"\1\\n"/' < $(obj)/policy/exception_policy.conf >> $@.tmp
+ @echo "\"\";" >> $@.tmp
+ @echo "static char tomoyo_builtin_domain_policy[] __initdata =" >> $@.tmp
+ @sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/"\1\\n"/' < $(obj)/policy/domain_policy.conf >> $@.tmp
+ @echo "\"\";" >> $@.tmp
+ @echo "static char tomoyo_builtin_manager[] __initdata =" >> $@.tmp
+ @sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/"\1\\n"/' < $(obj)/policy/manager.conf >> $@.tmp
+ @echo "\"\";" >> $@.tmp
+ @echo "static char tomoyo_builtin_stat[] __initdata =" >> $@.tmp
+ @sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/"\1\\n"/' < $(obj)/policy/stat.conf >> $@.tmp
+ @echo "\"\";" >> $@.tmp
+ @mv $@.tmp $@
+
+$(obj)/common.o: $(obj)/builtin-policy.h
diff --git a/security/tomoyo/audit.c b/security/tomoyo/audit.c
new file mode 100644
index 0000000..5dbb1f7
--- /dev/null
+++ b/security/tomoyo/audit.c
@@ -0,0 +1,456 @@
+/*
+ * security/tomoyo/audit.c
+ *
+ * Copyright (C) 2005-2011 NTT DATA CORPORATION
+ */
+
+#include "common.h"
+#include <linux/slab.h>
+
+/**
+ * tomoyo_print_bprm - Print "struct linux_binprm" for auditing.
+ *
+ * @bprm: Pointer to "struct linux_binprm".
+ * @dump: Pointer to "struct tomoyo_page_dump".
+ *
+ * Returns the contents of @bprm on success, NULL otherwise.
+ *
+ * This function uses kzalloc(), so caller must kfree() if this function
+ * didn't return NULL.
+ */
+static char *tomoyo_print_bprm(struct linux_binprm *bprm,
+ struct tomoyo_page_dump *dump)
+{
+ static const int tomoyo_buffer_len = 4096 * 2;
+ char *buffer = kzalloc(tomoyo_buffer_len, GFP_NOFS);
+ char *cp;
+ char *last_start;
+ int len;
+ unsigned long pos = bprm->p;
+ int offset = pos % PAGE_SIZE;
+ int argv_count = bprm->argc;
+ int envp_count = bprm->envc;
+ bool truncated = false;
+ if (!buffer)
+ return NULL;
+ len = snprintf(buffer, tomoyo_buffer_len - 1, "argv[]={ ");
+ cp = buffer + len;
+ if (!argv_count) {
+ memmove(cp, "} envp[]={ ", 11);
+ cp += 11;
+ }
+ last_start = cp;
+ while (argv_count || envp_count) {
+ if (!tomoyo_dump_page(bprm, pos, dump))
+ goto out;
+ pos += PAGE_SIZE - offset;
+ /* Read. */
+ while (offset < PAGE_SIZE) {
+ const char *kaddr = dump->data;
+ const unsigned char c = kaddr[offset++];
+ if (cp == last_start)
+ *cp++ = '"';
+ if (cp >= buffer + tomoyo_buffer_len - 32) {
+ /* Reserve some room for "..." string. */
+ truncated = true;
+ } else if (c == '\\') {
+ *cp++ = '\\';
+ *cp++ = '\\';
+ } else if (c > ' ' && c < 127) {
+ *cp++ = c;
+ } else if (!c) {
+ *cp++ = '"';
+ *cp++ = ' ';
+ last_start = cp;
+ } else {
+ *cp++ = '\\';
+ *cp++ = (c >> 6) + '0';
+ *cp++ = ((c >> 3) & 7) + '0';
+ *cp++ = (c & 7) + '0';
+ }
+ if (c)
+ continue;
+ if (argv_count) {
+ if (--argv_count == 0) {
+ if (truncated) {
+ cp = last_start;
+ memmove(cp, "... ", 4);
+ cp += 4;
+ }
+ memmove(cp, "} envp[]={ ", 11);
+ cp += 11;
+ last_start = cp;
+ truncated = false;
+ }
+ } else if (envp_count) {
+ if (--envp_count == 0) {
+ if (truncated) {
+ cp = last_start;
+ memmove(cp, "... ", 4);
+ cp += 4;
+ }
+ }
+ }
+ if (!argv_count && !envp_count)
+ break;
+ }
+ offset = 0;
+ }
+ *cp++ = '}';
+ *cp = '\0';
+ return buffer;
+out:
+ snprintf(buffer, tomoyo_buffer_len - 1,
+ "argv[]={ ... } envp[]= { ... }");
+ return buffer;
+}
+
+/**
+ * tomoyo_filetype - Get string representation of file type.
+ *
+ * @mode: Mode value for stat().
+ *
+ * Returns file type string.
+ */
+static inline const char *tomoyo_filetype(const mode_t mode)
+{
+ switch (mode & S_IFMT) {
+ case S_IFREG:
+ case 0:
+ return tomoyo_condition_keyword[TOMOYO_TYPE_IS_FILE];
+ case S_IFDIR:
+ return tomoyo_condition_keyword[TOMOYO_TYPE_IS_DIRECTORY];
+ case S_IFLNK:
+ return tomoyo_condition_keyword[TOMOYO_TYPE_IS_SYMLINK];
+ case S_IFIFO:
+ return tomoyo_condition_keyword[TOMOYO_TYPE_IS_FIFO];
+ case S_IFSOCK:
+ return tomoyo_condition_keyword[TOMOYO_TYPE_IS_SOCKET];
+ case S_IFBLK:
+ return tomoyo_condition_keyword[TOMOYO_TYPE_IS_BLOCK_DEV];
+ case S_IFCHR:
+ return tomoyo_condition_keyword[TOMOYO_TYPE_IS_CHAR_DEV];
+ }
+ return "unknown"; /* This should not happen. */
+}
+
+/**
+ * tomoyo_print_header - Get header line of audit log.
+ *
+ * @r: Pointer to "struct tomoyo_request_info".
+ *
+ * Returns string representation.
+ *
+ * This function uses kmalloc(), so caller must kfree() if this function
+ * didn't return NULL.
+ */
+static char *tomoyo_print_header(struct tomoyo_request_info *r)
+{
+ struct tomoyo_time stamp;
+ const pid_t gpid = task_pid_nr(current);
+ struct tomoyo_obj_info *obj = r->obj;
+ static const int tomoyo_buffer_len = 4096;
+ char *buffer = kmalloc(tomoyo_buffer_len, GFP_NOFS);
+ int pos;
+ u8 i;
+ if (!buffer)
+ return NULL;
+ {
+ struct timeval tv;
+ do_gettimeofday(&tv);
+ tomoyo_convert_time(tv.tv_sec, &stamp);
+ }
+ pos = snprintf(buffer, tomoyo_buffer_len - 1,
+ "#%04u/%02u/%02u %02u:%02u:%02u# profile=%u mode=%s "
+ "granted=%s (global-pid=%u) task={ pid=%u ppid=%u "
+ "uid=%u gid=%u euid=%u egid=%u suid=%u sgid=%u "
+ "fsuid=%u fsgid=%u }", stamp.year, stamp.month,
+ stamp.day, stamp.hour, stamp.min, stamp.sec, r->profile,
+ tomoyo_mode[r->mode], tomoyo_yesno(r->granted), gpid,
+ tomoyo_sys_getpid(), tomoyo_sys_getppid(),
+ current_uid(), current_gid(), current_euid(),
+ current_egid(), current_suid(), current_sgid(),
+ current_fsuid(), current_fsgid());
+ if (!obj)
+ goto no_obj_info;
+ if (!obj->validate_done) {
+ tomoyo_get_attributes(obj);
+ obj->validate_done = true;
+ }
+ for (i = 0; i < TOMOYO_MAX_PATH_STAT; i++) {
+ struct tomoyo_mini_stat *stat;
+ unsigned int dev;
+ mode_t mode;
+ if (!obj->stat_valid[i])
+ continue;
+ stat = &obj->stat[i];
+ dev = stat->dev;
+ mode = stat->mode;
+ if (i & 1) {
+ pos += snprintf(buffer + pos,
+ tomoyo_buffer_len - 1 - pos,
+ " path%u.parent={ uid=%u gid=%u "
+ "ino=%lu perm=0%o }", (i >> 1) + 1,
+ stat->uid, stat->gid, (unsigned long)
+ stat->ino, stat->mode & S_IALLUGO);
+ continue;
+ }
+ pos += snprintf(buffer + pos, tomoyo_buffer_len - 1 - pos,
+ " path%u={ uid=%u gid=%u ino=%lu major=%u"
+ " minor=%u perm=0%o type=%s", (i >> 1) + 1,
+ stat->uid, stat->gid, (unsigned long)
+ stat->ino, MAJOR(dev), MINOR(dev),
+ mode & S_IALLUGO, tomoyo_filetype(mode));
+ if (S_ISCHR(mode) || S_ISBLK(mode)) {
+ dev = stat->rdev;
+ pos += snprintf(buffer + pos,
+ tomoyo_buffer_len - 1 - pos,
+ " dev_major=%u dev_minor=%u",
+ MAJOR(dev), MINOR(dev));
+ }
+ pos += snprintf(buffer + pos, tomoyo_buffer_len - 1 - pos,
+ " }");
+ }
+no_obj_info:
+ if (pos < tomoyo_buffer_len - 1)
+ return buffer;
+ kfree(buffer);
+ return NULL;
+}
+
+/**
+ * tomoyo_init_log - Allocate buffer for audit logs.
+ *
+ * @r: Pointer to "struct tomoyo_request_info".
+ * @len: Buffer size needed for @fmt and @args.
+ * @fmt: The printf()'s format string.
+ * @args: va_list structure for @fmt.
+ *
+ * Returns pointer to allocated memory.
+ *
+ * This function uses kzalloc(), so caller must kfree() if this function
+ * didn't return NULL.
+ */
+char *tomoyo_init_log(struct tomoyo_request_info *r, int len, const char *fmt,
+ va_list args)
+{
+ char *buf = NULL;
+ char *bprm_info = NULL;
+ const char *header = NULL;
+ char *realpath = NULL;
+ const char *symlink = NULL;
+ int pos;
+ const char *domainname = r->domain->domainname->name;
+ header = tomoyo_print_header(r);
+ if (!header)
+ return NULL;
+ /* +10 is for '\n' etc. and '\0'. */
+ len += strlen(domainname) + strlen(header) + 10;
+ if (r->ee) {
+ struct file *file = r->ee->bprm->file;
+ realpath = tomoyo_realpath_from_path(&file->f_path);
+ bprm_info = tomoyo_print_bprm(r->ee->bprm, &r->ee->dump);
+ if (!realpath || !bprm_info)
+ goto out;
+ /* +80 is for " exec={ realpath=\"%s\" argc=%d envc=%d %s }" */
+ len += strlen(realpath) + 80 + strlen(bprm_info);
+ } else if (r->obj && r->obj->symlink_target) {
+ symlink = r->obj->symlink_target->name;
+ /* +18 is for " symlink.target=\"%s\"" */
+ len += 18 + strlen(symlink);
+ }
+ len = tomoyo_round2(len);
+ buf = kzalloc(len, GFP_NOFS);
+ if (!buf)
+ goto out;
+ len--;
+ pos = snprintf(buf, len, "%s", header);
+ if (realpath) {
+ struct linux_binprm *bprm = r->ee->bprm;
+ pos += snprintf(buf + pos, len - pos,
+ " exec={ realpath=\"%s\" argc=%d envc=%d %s }",
+ realpath, bprm->argc, bprm->envc, bprm_info);
+ } else if (symlink)
+ pos += snprintf(buf + pos, len - pos, " symlink.target=\"%s\"",
+ symlink);
+ pos += snprintf(buf + pos, len - pos, "\n%s\n", domainname);
+ vsnprintf(buf + pos, len - pos, fmt, args);
+out:
+ kfree(realpath);
+ kfree(bprm_info);
+ kfree(header);
+ return buf;
+}
+
+/* Wait queue for /sys/kernel/security/tomoyo/audit. */
+static DECLARE_WAIT_QUEUE_HEAD(tomoyo_log_wait);
+
+/* Structure for audit log. */
+struct tomoyo_log {
+ struct list_head list;
+ char *log;
+ int size;
+};
+
+/* The list for "struct tomoyo_log". */
+static LIST_HEAD(tomoyo_log);
+
+/* Lock for "struct list_head tomoyo_log". */
+static DEFINE_SPINLOCK(tomoyo_log_lock);
+
+/* Length of "stuct list_head tomoyo_log". */
+static unsigned int tomoyo_log_count;
+
+/**
+ * tomoyo_get_audit - Get audit mode.
+ *
+ * @ns: Pointer to "struct tomoyo_policy_namespace".
+ * @profile: Profile number.
+ * @index: Index number of functionality.
+ * @is_granted: True if granted log, false otherwise.
+ *
+ * Returns true if this request should be audited, false otherwise.
+ */
+static bool tomoyo_get_audit(const struct tomoyo_policy_namespace *ns,
+ const u8 profile, const u8 index,
+ const bool is_granted)
+{
+ u8 mode;
+ const u8 category = tomoyo_index2category[index] +
+ TOMOYO_MAX_MAC_INDEX;
+ struct tomoyo_profile *p;
+ if (!tomoyo_policy_loaded)
+ return false;
+ p = tomoyo_profile(ns, profile);
+ if (tomoyo_log_count >= p->pref[TOMOYO_PREF_MAX_AUDIT_LOG])
+ return false;
+ mode = p->config[index];
+ if (mode == TOMOYO_CONFIG_USE_DEFAULT)
+ mode = p->config[category];
+ if (mode == TOMOYO_CONFIG_USE_DEFAULT)
+ mode = p->default_config;
+ if (is_granted)
+ return mode & TOMOYO_CONFIG_WANT_GRANT_LOG;
+ return mode & TOMOYO_CONFIG_WANT_REJECT_LOG;
+}
+
+/**
+ * tomoyo_write_log2 - Write an audit log.
+ *
+ * @r: Pointer to "struct tomoyo_request_info".
+ * @len: Buffer size needed for @fmt and @args.
+ * @fmt: The printf()'s format string.
+ * @args: va_list structure for @fmt.
+ *
+ * Returns nothing.
+ */
+void tomoyo_write_log2(struct tomoyo_request_info *r, int len, const char *fmt,
+ va_list args)
+{
+ char *buf;
+ struct tomoyo_log *entry;
+ bool quota_exceeded = false;
+ if (!tomoyo_get_audit(r->domain->ns, r->profile, r->type, r->granted))
+ goto out;
+ buf = tomoyo_init_log(r, len, fmt, args);
+ if (!buf)
+ goto out;
+ entry = kzalloc(sizeof(*entry), GFP_NOFS);
+ if (!entry) {
+ kfree(buf);
+ goto out;
+ }
+ entry->log = buf;
+ len = tomoyo_round2(strlen(buf) + 1);
+ /*
+ * The entry->size is used for memory quota checks.
+ * Don't go beyond strlen(entry->log).
+ */
+ entry->size = len + tomoyo_round2(sizeof(*entry));
+ spin_lock(&tomoyo_log_lock);
+ if (tomoyo_memory_quota[TOMOYO_MEMORY_AUDIT] &&
+ tomoyo_memory_used[TOMOYO_MEMORY_AUDIT] + entry->size >=
+ tomoyo_memory_quota[TOMOYO_MEMORY_AUDIT]) {
+ quota_exceeded = true;
+ } else {
+ tomoyo_memory_used[TOMOYO_MEMORY_AUDIT] += entry->size;
+ list_add_tail(&entry->list, &tomoyo_log);
+ tomoyo_log_count++;
+ }
+ spin_unlock(&tomoyo_log_lock);
+ if (quota_exceeded) {
+ kfree(buf);
+ kfree(entry);
+ goto out;
+ }
+ wake_up(&tomoyo_log_wait);
+out:
+ return;
+}
+
+/**
+ * tomoyo_write_log - Write an audit log.
+ *
+ * @r: Pointer to "struct tomoyo_request_info".
+ * @fmt: The printf()'s format string, followed by parameters.
+ *
+ * Returns nothing.
+ */
+void tomoyo_write_log(struct tomoyo_request_info *r, const char *fmt, ...)
+{
+ va_list args;
+ int len;
+ va_start(args, fmt);
+ len = vsnprintf((char *) &len, 1, fmt, args) + 1;
+ va_end(args);
+ va_start(args, fmt);
+ tomoyo_write_log2(r, len, fmt, args);
+ va_end(args);
+}
+
+/**
+ * tomoyo_read_log - Read an audit log.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
+void tomoyo_read_log(struct tomoyo_io_buffer *head)
+{
+ struct tomoyo_log *ptr = NULL;
+ if (head->r.w_pos)
+ return;
+ kfree(head->read_buf);
+ head->read_buf = NULL;
+ spin_lock(&tomoyo_log_lock);
+ if (!list_empty(&tomoyo_log)) {
+ ptr = list_entry(tomoyo_log.next, typeof(*ptr), list);
+ list_del(&ptr->list);
+ tomoyo_log_count--;
+ tomoyo_memory_used[TOMOYO_MEMORY_AUDIT] -= ptr->size;
+ }
+ spin_unlock(&tomoyo_log_lock);
+ if (ptr) {
+ head->read_buf = ptr->log;
+ head->r.w[head->r.w_pos++] = head->read_buf;
+ kfree(ptr);
+ }
+}
+
+/**
+ * tomoyo_poll_log - Wait for an audit log.
+ *
+ * @file: Pointer to "struct file".
+ * @wait: Pointer to "poll_table".
+ *
+ * Returns POLLIN | POLLRDNORM when ready to read an audit log.
+ */
+int tomoyo_poll_log(struct file *file, poll_table *wait)
+{
+ if (tomoyo_log_count)
+ return POLLIN | POLLRDNORM;
+ poll_wait(file, &tomoyo_log_wait, wait);
+ if (tomoyo_log_count)
+ return POLLIN | POLLRDNORM;
+ return 0;
+}
diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index a0d09e5..c8439cf2 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -1,9 +1,7 @@
/*
* security/tomoyo/common.c
*
- * Common functions for TOMOYO.
- *
- * Copyright (C) 2005-2010 NTT DATA CORPORATION
+ * Copyright (C) 2005-2011 NTT DATA CORPORATION
*/
#include <linux/uaccess.h>
@@ -11,56 +9,133 @@
#include <linux/security.h>
#include "common.h"
-static struct tomoyo_profile tomoyo_default_profile = {
- .learning = &tomoyo_default_profile.preference,
- .permissive = &tomoyo_default_profile.preference,
- .enforcing = &tomoyo_default_profile.preference,
- .preference.enforcing_verbose = true,
- .preference.learning_max_entry = 2048,
- .preference.learning_verbose = false,
- .preference.permissive_verbose = true
-};
-
-/* Profile version. Currently only 20090903 is defined. */
-static unsigned int tomoyo_profile_version;
-
-/* Profile table. Memory is allocated as needed. */
-static struct tomoyo_profile *tomoyo_profile_ptr[TOMOYO_MAX_PROFILES];
-
-/* String table for functionality that takes 4 modes. */
-static const char *tomoyo_mode[4] = {
- "disabled", "learning", "permissive", "enforcing"
+/* String table for operation mode. */
+const char * const tomoyo_mode[TOMOYO_CONFIG_MAX_MODE] = {
+ [TOMOYO_CONFIG_DISABLED] = "disabled",
+ [TOMOYO_CONFIG_LEARNING] = "learning",
+ [TOMOYO_CONFIG_PERMISSIVE] = "permissive",
+ [TOMOYO_CONFIG_ENFORCING] = "enforcing"
};
/* String table for /sys/kernel/security/tomoyo/profile */
-static const char *tomoyo_mac_keywords[TOMOYO_MAX_MAC_INDEX
+const char * const tomoyo_mac_keywords[TOMOYO_MAX_MAC_INDEX
+ TOMOYO_MAX_MAC_CATEGORY_INDEX] = {
- [TOMOYO_MAC_FILE_EXECUTE] = "file::execute",
- [TOMOYO_MAC_FILE_OPEN] = "file::open",
- [TOMOYO_MAC_FILE_CREATE] = "file::create",
- [TOMOYO_MAC_FILE_UNLINK] = "file::unlink",
- [TOMOYO_MAC_FILE_MKDIR] = "file::mkdir",
- [TOMOYO_MAC_FILE_RMDIR] = "file::rmdir",
- [TOMOYO_MAC_FILE_MKFIFO] = "file::mkfifo",
- [TOMOYO_MAC_FILE_MKSOCK] = "file::mksock",
- [TOMOYO_MAC_FILE_TRUNCATE] = "file::truncate",
- [TOMOYO_MAC_FILE_SYMLINK] = "file::symlink",
- [TOMOYO_MAC_FILE_REWRITE] = "file::rewrite",
- [TOMOYO_MAC_FILE_MKBLOCK] = "file::mkblock",
- [TOMOYO_MAC_FILE_MKCHAR] = "file::mkchar",
- [TOMOYO_MAC_FILE_LINK] = "file::link",
- [TOMOYO_MAC_FILE_RENAME] = "file::rename",
- [TOMOYO_MAC_FILE_CHMOD] = "file::chmod",
- [TOMOYO_MAC_FILE_CHOWN] = "file::chown",
- [TOMOYO_MAC_FILE_CHGRP] = "file::chgrp",
- [TOMOYO_MAC_FILE_IOCTL] = "file::ioctl",
- [TOMOYO_MAC_FILE_CHROOT] = "file::chroot",
- [TOMOYO_MAC_FILE_MOUNT] = "file::mount",
- [TOMOYO_MAC_FILE_UMOUNT] = "file::umount",
- [TOMOYO_MAC_FILE_PIVOT_ROOT] = "file::pivot_root",
+ [TOMOYO_MAC_FILE_EXECUTE] = "execute",
+ [TOMOYO_MAC_FILE_OPEN] = "open",
+ [TOMOYO_MAC_FILE_CREATE] = "create",
+ [TOMOYO_MAC_FILE_UNLINK] = "unlink",
+ [TOMOYO_MAC_FILE_GETATTR] = "getattr",
+ [TOMOYO_MAC_FILE_MKDIR] = "mkdir",
+ [TOMOYO_MAC_FILE_RMDIR] = "rmdir",
+ [TOMOYO_MAC_FILE_MKFIFO] = "mkfifo",
+ [TOMOYO_MAC_FILE_MKSOCK] = "mksock",
+ [TOMOYO_MAC_FILE_TRUNCATE] = "truncate",
+ [TOMOYO_MAC_FILE_SYMLINK] = "symlink",
+ [TOMOYO_MAC_FILE_MKBLOCK] = "mkblock",
+ [TOMOYO_MAC_FILE_MKCHAR] = "mkchar",
+ [TOMOYO_MAC_FILE_LINK] = "link",
+ [TOMOYO_MAC_FILE_RENAME] = "rename",
+ [TOMOYO_MAC_FILE_CHMOD] = "chmod",
+ [TOMOYO_MAC_FILE_CHOWN] = "chown",
+ [TOMOYO_MAC_FILE_CHGRP] = "chgrp",
+ [TOMOYO_MAC_FILE_IOCTL] = "ioctl",
+ [TOMOYO_MAC_FILE_CHROOT] = "chroot",
+ [TOMOYO_MAC_FILE_MOUNT] = "mount",
+ [TOMOYO_MAC_FILE_UMOUNT] = "unmount",
+ [TOMOYO_MAC_FILE_PIVOT_ROOT] = "pivot_root",
[TOMOYO_MAX_MAC_INDEX + TOMOYO_MAC_CATEGORY_FILE] = "file",
};
+/* String table for conditions. */
+const char * const tomoyo_condition_keyword[TOMOYO_MAX_CONDITION_KEYWORD] = {
+ [TOMOYO_TASK_UID] = "task.uid",
+ [TOMOYO_TASK_EUID] = "task.euid",
+ [TOMOYO_TASK_SUID] = "task.suid",
+ [TOMOYO_TASK_FSUID] = "task.fsuid",
+ [TOMOYO_TASK_GID] = "task.gid",
+ [TOMOYO_TASK_EGID] = "task.egid",
+ [TOMOYO_TASK_SGID] = "task.sgid",
+ [TOMOYO_TASK_FSGID] = "task.fsgid",
+ [TOMOYO_TASK_PID] = "task.pid",
+ [TOMOYO_TASK_PPID] = "task.ppid",
+ [TOMOYO_EXEC_ARGC] = "exec.argc",
+ [TOMOYO_EXEC_ENVC] = "exec.envc",
+ [TOMOYO_TYPE_IS_SOCKET] = "socket",
+ [TOMOYO_TYPE_IS_SYMLINK] = "symlink",
+ [TOMOYO_TYPE_IS_FILE] = "file",
+ [TOMOYO_TYPE_IS_BLOCK_DEV] = "block",
+ [TOMOYO_TYPE_IS_DIRECTORY] = "directory",
+ [TOMOYO_TYPE_IS_CHAR_DEV] = "char",
+ [TOMOYO_TYPE_IS_FIFO] = "fifo",
+ [TOMOYO_MODE_SETUID] = "setuid",
+ [TOMOYO_MODE_SETGID] = "setgid",
+ [TOMOYO_MODE_STICKY] = "sticky",
+ [TOMOYO_MODE_OWNER_READ] = "owner_read",
+ [TOMOYO_MODE_OWNER_WRITE] = "owner_write",
+ [TOMOYO_MODE_OWNER_EXECUTE] = "owner_execute",
+ [TOMOYO_MODE_GROUP_READ] = "group_read",
+ [TOMOYO_MODE_GROUP_WRITE] = "group_write",
+ [TOMOYO_MODE_GROUP_EXECUTE] = "group_execute",
+ [TOMOYO_MODE_OTHERS_READ] = "others_read",
+ [TOMOYO_MODE_OTHERS_WRITE] = "others_write",
+ [TOMOYO_MODE_OTHERS_EXECUTE] = "others_execute",
+ [TOMOYO_EXEC_REALPATH] = "exec.realpath",
+ [TOMOYO_SYMLINK_TARGET] = "symlink.target",
+ [TOMOYO_PATH1_UID] = "path1.uid",
+ [TOMOYO_PATH1_GID] = "path1.gid",
+ [TOMOYO_PATH1_INO] = "path1.ino",
+ [TOMOYO_PATH1_MAJOR] = "path1.major",
+ [TOMOYO_PATH1_MINOR] = "path1.minor",
+ [TOMOYO_PATH1_PERM] = "path1.perm",
+ [TOMOYO_PATH1_TYPE] = "path1.type",
+ [TOMOYO_PATH1_DEV_MAJOR] = "path1.dev_major",
+ [TOMOYO_PATH1_DEV_MINOR] = "path1.dev_minor",
+ [TOMOYO_PATH2_UID] = "path2.uid",
+ [TOMOYO_PATH2_GID] = "path2.gid",
+ [TOMOYO_PATH2_INO] = "path2.ino",
+ [TOMOYO_PATH2_MAJOR] = "path2.major",
+ [TOMOYO_PATH2_MINOR] = "path2.minor",
+ [TOMOYO_PATH2_PERM] = "path2.perm",
+ [TOMOYO_PATH2_TYPE] = "path2.type",
+ [TOMOYO_PATH2_DEV_MAJOR] = "path2.dev_major",
+ [TOMOYO_PATH2_DEV_MINOR] = "path2.dev_minor",
+ [TOMOYO_PATH1_PARENT_UID] = "path1.parent.uid",
+ [TOMOYO_PATH1_PARENT_GID] = "path1.parent.gid",
+ [TOMOYO_PATH1_PARENT_INO] = "path1.parent.ino",
+ [TOMOYO_PATH1_PARENT_PERM] = "path1.parent.perm",
+ [TOMOYO_PATH2_PARENT_UID] = "path2.parent.uid",
+ [TOMOYO_PATH2_PARENT_GID] = "path2.parent.gid",
+ [TOMOYO_PATH2_PARENT_INO] = "path2.parent.ino",
+ [TOMOYO_PATH2_PARENT_PERM] = "path2.parent.perm",
+};
+
+/* String table for PREFERENCE keyword. */
+static const char * const tomoyo_pref_keywords[TOMOYO_MAX_PREF] = {
+ [TOMOYO_PREF_MAX_AUDIT_LOG] = "max_audit_log",
+ [TOMOYO_PREF_MAX_LEARNING_ENTRY] = "max_learning_entry",
+};
+
+/* String table for path operation. */
+const char * const tomoyo_path_keyword[TOMOYO_MAX_PATH_OPERATION] = {
+ [TOMOYO_TYPE_EXECUTE] = "execute",
+ [TOMOYO_TYPE_READ] = "read",
+ [TOMOYO_TYPE_WRITE] = "write",
+ [TOMOYO_TYPE_APPEND] = "append",
+ [TOMOYO_TYPE_UNLINK] = "unlink",
+ [TOMOYO_TYPE_GETATTR] = "getattr",
+ [TOMOYO_TYPE_RMDIR] = "rmdir",
+ [TOMOYO_TYPE_TRUNCATE] = "truncate",
+ [TOMOYO_TYPE_SYMLINK] = "symlink",
+ [TOMOYO_TYPE_CHROOT] = "chroot",
+ [TOMOYO_TYPE_UMOUNT] = "unmount",
+};
+
+/* String table for categories. */
+static const char * const tomoyo_category_keywords
+[TOMOYO_MAX_MAC_CATEGORY_INDEX] = {
+ [TOMOYO_MAC_CATEGORY_FILE] = "file",
+};
+
/* Permit policy management by non-root user? */
static bool tomoyo_manage_by_non_root;
@@ -71,11 +146,20 @@
*
* @value: Bool value.
*/
-static const char *tomoyo_yesno(const unsigned int value)
+const char *tomoyo_yesno(const unsigned int value)
{
return value ? "yes" : "no";
}
+/**
+ * tomoyo_addprintf - strncat()-like-snprintf().
+ *
+ * @buffer: Buffer to write to. Must be '\0'-terminated.
+ * @len: Size of @buffer.
+ * @fmt: The printf()'s format string, followed by parameters.
+ *
+ * Returns nothing.
+ */
static void tomoyo_addprintf(char *buffer, int len, const char *fmt, ...)
{
va_list args;
@@ -96,7 +180,7 @@
{
while (head->r.w_pos) {
const char *w = head->r.w[0];
- int len = strlen(w);
+ size_t len = strlen(w);
if (len) {
if (len > head->read_user_buf_avail)
len = head->read_user_buf_avail;
@@ -111,7 +195,7 @@
head->r.w[0] = w;
if (*w)
return false;
- /* Add '\0' for query. */
+ /* Add '\0' for audit logs and query. */
if (head->poll) {
if (!head->read_user_buf_avail ||
copy_to_user(head->read_user_buf, "", 1))
@@ -155,8 +239,8 @@
void tomoyo_io_printf(struct tomoyo_io_buffer *head, const char *fmt, ...)
{
va_list args;
- int len;
- int pos = head->r.avail;
+ size_t len;
+ size_t pos = head->r.avail;
int size = head->readbuf_size - pos;
if (size <= 0)
return;
@@ -171,11 +255,25 @@
tomoyo_set_string(head, head->read_buf + pos);
}
+/**
+ * tomoyo_set_space - Put a space to "struct tomoyo_io_buffer" structure.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
static void tomoyo_set_space(struct tomoyo_io_buffer *head)
{
tomoyo_set_string(head, " ");
}
+/**
+ * tomoyo_set_lf - Put a line feed to "struct tomoyo_io_buffer" structure.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
static bool tomoyo_set_lf(struct tomoyo_io_buffer *head)
{
tomoyo_set_string(head, "\n");
@@ -183,6 +281,62 @@
}
/**
+ * tomoyo_set_slash - Put a shash to "struct tomoyo_io_buffer" structure.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
+static void tomoyo_set_slash(struct tomoyo_io_buffer *head)
+{
+ tomoyo_set_string(head, "/");
+}
+
+/* List of namespaces. */
+LIST_HEAD(tomoyo_namespace_list);
+/* True if namespace other than tomoyo_kernel_namespace is defined. */
+static bool tomoyo_namespace_enabled;
+
+/**
+ * tomoyo_init_policy_namespace - Initialize namespace.
+ *
+ * @ns: Pointer to "struct tomoyo_policy_namespace".
+ *
+ * Returns nothing.
+ */
+void tomoyo_init_policy_namespace(struct tomoyo_policy_namespace *ns)
+{
+ unsigned int idx;
+ for (idx = 0; idx < TOMOYO_MAX_ACL_GROUPS; idx++)
+ INIT_LIST_HEAD(&ns->acl_group[idx]);
+ for (idx = 0; idx < TOMOYO_MAX_GROUP; idx++)
+ INIT_LIST_HEAD(&ns->group_list[idx]);
+ for (idx = 0; idx < TOMOYO_MAX_POLICY; idx++)
+ INIT_LIST_HEAD(&ns->policy_list[idx]);
+ ns->profile_version = 20100903;
+ tomoyo_namespace_enabled = !list_empty(&tomoyo_namespace_list);
+ list_add_tail_rcu(&ns->namespace_list, &tomoyo_namespace_list);
+}
+
+/**
+ * tomoyo_print_namespace - Print namespace header.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
+static void tomoyo_print_namespace(struct tomoyo_io_buffer *head)
+{
+ if (!tomoyo_namespace_enabled)
+ return;
+ tomoyo_set_string(head,
+ container_of(head->r.ns,
+ struct tomoyo_policy_namespace,
+ namespace_list)->name);
+ tomoyo_set_space(head);
+}
+
+/**
* tomoyo_print_name_union - Print a tomoyo_name_union.
*
* @head: Pointer to "struct tomoyo_io_buffer".
@@ -192,7 +346,7 @@
const struct tomoyo_name_union *ptr)
{
tomoyo_set_space(head);
- if (ptr->is_group) {
+ if (ptr->group) {
tomoyo_set_string(head, "@");
tomoyo_set_string(head, ptr->group->group_name->name);
} else {
@@ -201,24 +355,46 @@
}
/**
- * tomoyo_print_number_union - Print a tomoyo_number_union.
+ * tomoyo_print_name_union_quoted - Print a tomoyo_name_union with a quote.
*
- * @head: Pointer to "struct tomoyo_io_buffer".
- * @ptr: Pointer to "struct tomoyo_number_union".
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ * @ptr: Pointer to "struct tomoyo_name_union".
+ *
+ * Returns nothing.
*/
-static void tomoyo_print_number_union(struct tomoyo_io_buffer *head,
- const struct tomoyo_number_union *ptr)
+static void tomoyo_print_name_union_quoted(struct tomoyo_io_buffer *head,
+ const struct tomoyo_name_union *ptr)
{
- tomoyo_set_space(head);
- if (ptr->is_group) {
+ if (ptr->group) {
+ tomoyo_set_string(head, "@");
+ tomoyo_set_string(head, ptr->group->group_name->name);
+ } else {
+ tomoyo_set_string(head, "\"");
+ tomoyo_set_string(head, ptr->filename->name);
+ tomoyo_set_string(head, "\"");
+ }
+}
+
+/**
+ * tomoyo_print_number_union_nospace - Print a tomoyo_number_union without a space.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ * @ptr: Pointer to "struct tomoyo_number_union".
+ *
+ * Returns nothing.
+ */
+static void tomoyo_print_number_union_nospace
+(struct tomoyo_io_buffer *head, const struct tomoyo_number_union *ptr)
+{
+ if (ptr->group) {
tomoyo_set_string(head, "@");
tomoyo_set_string(head, ptr->group->group_name->name);
} else {
int i;
unsigned long min = ptr->values[0];
const unsigned long max = ptr->values[1];
- u8 min_type = ptr->min_type;
- const u8 max_type = ptr->max_type;
+ u8 min_type = ptr->value_type[0];
+ const u8 max_type = ptr->value_type[1];
char buffer[128];
buffer[0] = '\0';
for (i = 0; i < 2; i++) {
@@ -232,8 +408,8 @@
"0%lo", min);
break;
default:
- tomoyo_addprintf(buffer, sizeof(buffer),
- "%lu", min);
+ tomoyo_addprintf(buffer, sizeof(buffer), "%lu",
+ min);
break;
}
if (min == max && min_type == max_type)
@@ -247,35 +423,53 @@
}
/**
+ * tomoyo_print_number_union - Print a tomoyo_number_union.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ * @ptr: Pointer to "struct tomoyo_number_union".
+ *
+ * Returns nothing.
+ */
+static void tomoyo_print_number_union(struct tomoyo_io_buffer *head,
+ const struct tomoyo_number_union *ptr)
+{
+ tomoyo_set_space(head);
+ tomoyo_print_number_union_nospace(head, ptr);
+}
+
+/**
* tomoyo_assign_profile - Create a new profile.
*
+ * @ns: Pointer to "struct tomoyo_policy_namespace".
* @profile: Profile number to create.
*
* Returns pointer to "struct tomoyo_profile" on success, NULL otherwise.
*/
-static struct tomoyo_profile *tomoyo_assign_profile(const unsigned int profile)
+static struct tomoyo_profile *tomoyo_assign_profile
+(struct tomoyo_policy_namespace *ns, const unsigned int profile)
{
struct tomoyo_profile *ptr;
struct tomoyo_profile *entry;
if (profile >= TOMOYO_MAX_PROFILES)
return NULL;
- ptr = tomoyo_profile_ptr[profile];
+ ptr = ns->profile_ptr[profile];
if (ptr)
return ptr;
entry = kzalloc(sizeof(*entry), GFP_NOFS);
if (mutex_lock_interruptible(&tomoyo_policy_lock))
goto out;
- ptr = tomoyo_profile_ptr[profile];
+ ptr = ns->profile_ptr[profile];
if (!ptr && tomoyo_memory_ok(entry)) {
ptr = entry;
- ptr->learning = &tomoyo_default_profile.preference;
- ptr->permissive = &tomoyo_default_profile.preference;
- ptr->enforcing = &tomoyo_default_profile.preference;
- ptr->default_config = TOMOYO_CONFIG_DISABLED;
+ ptr->default_config = TOMOYO_CONFIG_DISABLED |
+ TOMOYO_CONFIG_WANT_GRANT_LOG |
+ TOMOYO_CONFIG_WANT_REJECT_LOG;
memset(ptr->config, TOMOYO_CONFIG_USE_DEFAULT,
sizeof(ptr->config));
+ ptr->pref[TOMOYO_PREF_MAX_AUDIT_LOG] = 1024;
+ ptr->pref[TOMOYO_PREF_MAX_LEARNING_ENTRY] = 2048;
mb(); /* Avoid out-of-order execution. */
- tomoyo_profile_ptr[profile] = ptr;
+ ns->profile_ptr[profile] = ptr;
entry = NULL;
}
mutex_unlock(&tomoyo_policy_lock);
@@ -287,19 +481,29 @@
/**
* tomoyo_profile - Find a profile.
*
+ * @ns: Pointer to "struct tomoyo_policy_namespace".
* @profile: Profile number to find.
*
* Returns pointer to "struct tomoyo_profile".
*/
-struct tomoyo_profile *tomoyo_profile(const u8 profile)
+struct tomoyo_profile *tomoyo_profile(const struct tomoyo_policy_namespace *ns,
+ const u8 profile)
{
- struct tomoyo_profile *ptr = tomoyo_profile_ptr[profile];
- if (!tomoyo_policy_loaded)
- return &tomoyo_default_profile;
- BUG_ON(!ptr);
+ static struct tomoyo_profile tomoyo_null_profile;
+ struct tomoyo_profile *ptr = ns->profile_ptr[profile];
+ if (!ptr)
+ ptr = &tomoyo_null_profile;
return ptr;
}
+/**
+ * tomoyo_find_yesno - Find values for specified keyword.
+ *
+ * @string: String to check.
+ * @find: Name of keyword.
+ *
+ * Returns 1 if "@find=yes" was found, 0 if "@find=no" was found, -1 otherwise.
+ */
static s8 tomoyo_find_yesno(const char *string, const char *find)
{
const char *cp = strstr(string, find);
@@ -313,18 +517,15 @@
return -1;
}
-static void tomoyo_set_bool(bool *b, const char *string, const char *find)
-{
- switch (tomoyo_find_yesno(string, find)) {
- case 1:
- *b = true;
- break;
- case 0:
- *b = false;
- break;
- }
-}
-
+/**
+ * tomoyo_set_uint - Set value for specified preference.
+ *
+ * @i: Pointer to "unsigned int".
+ * @string: String to check.
+ * @find: Name of keyword.
+ *
+ * Returns nothing.
+ */
static void tomoyo_set_uint(unsigned int *i, const char *string,
const char *find)
{
@@ -333,51 +534,16 @@
sscanf(cp + strlen(find), "=%u", i);
}
-static void tomoyo_set_pref(const char *name, const char *value,
- const bool use_default,
- struct tomoyo_profile *profile)
-{
- struct tomoyo_preference **pref;
- bool *verbose;
- if (!strcmp(name, "enforcing")) {
- if (use_default) {
- pref = &profile->enforcing;
- goto set_default;
- }
- profile->enforcing = &profile->preference;
- verbose = &profile->preference.enforcing_verbose;
- goto set_verbose;
- }
- if (!strcmp(name, "permissive")) {
- if (use_default) {
- pref = &profile->permissive;
- goto set_default;
- }
- profile->permissive = &profile->preference;
- verbose = &profile->preference.permissive_verbose;
- goto set_verbose;
- }
- if (!strcmp(name, "learning")) {
- if (use_default) {
- pref = &profile->learning;
- goto set_default;
- }
- profile->learning = &profile->preference;
- tomoyo_set_uint(&profile->preference.learning_max_entry, value,
- "max_entry");
- verbose = &profile->preference.learning_verbose;
- goto set_verbose;
- }
- return;
- set_default:
- *pref = &tomoyo_default_profile.preference;
- return;
- set_verbose:
- tomoyo_set_bool(verbose, value, "verbose");
-}
-
+/**
+ * tomoyo_set_mode - Set mode for specified profile.
+ *
+ * @name: Name of functionality.
+ * @value: Mode for @name.
+ * @profile: Pointer to "struct tomoyo_profile".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
static int tomoyo_set_mode(char *name, const char *value,
- const bool use_default,
struct tomoyo_profile *profile)
{
u8 i;
@@ -389,7 +555,17 @@
config = 0;
for (i = 0; i < TOMOYO_MAX_MAC_INDEX
+ TOMOYO_MAX_MAC_CATEGORY_INDEX; i++) {
- if (strcmp(name, tomoyo_mac_keywords[i]))
+ int len = 0;
+ if (i < TOMOYO_MAX_MAC_INDEX) {
+ const u8 c = tomoyo_index2category[i];
+ const char *category =
+ tomoyo_category_keywords[c];
+ len = strlen(category);
+ if (strncmp(name, category, len) ||
+ name[len++] != ':' || name[len++] != ':')
+ continue;
+ }
+ if (strcmp(name + len, tomoyo_mac_keywords[i]))
continue;
config = profile->config[i];
break;
@@ -399,7 +575,7 @@
} else {
return -EINVAL;
}
- if (use_default) {
+ if (strstr(value, "use_default")) {
config = TOMOYO_CONFIG_USE_DEFAULT;
} else {
u8 mode;
@@ -410,6 +586,24 @@
* 'config' from 'TOMOYO_CONFIG_USE_DEAFULT'.
*/
config = (config & ~7) | mode;
+ if (config != TOMOYO_CONFIG_USE_DEFAULT) {
+ switch (tomoyo_find_yesno(value, "grant_log")) {
+ case 1:
+ config |= TOMOYO_CONFIG_WANT_GRANT_LOG;
+ break;
+ case 0:
+ config &= ~TOMOYO_CONFIG_WANT_GRANT_LOG;
+ break;
+ }
+ switch (tomoyo_find_yesno(value, "reject_log")) {
+ case 1:
+ config |= TOMOYO_CONFIG_WANT_REJECT_LOG;
+ break;
+ case 0:
+ config &= ~TOMOYO_CONFIG_WANT_REJECT_LOG;
+ break;
+ }
+ }
}
if (i < TOMOYO_MAX_MAC_INDEX + TOMOYO_MAX_MAC_CATEGORY_INDEX)
profile->config[i] = config;
@@ -429,34 +623,22 @@
{
char *data = head->write_buf;
unsigned int i;
- bool use_default = false;
char *cp;
struct tomoyo_profile *profile;
- if (sscanf(data, "PROFILE_VERSION=%u", &tomoyo_profile_version) == 1)
+ if (sscanf(data, "PROFILE_VERSION=%u", &head->w.ns->profile_version)
+ == 1)
return 0;
i = simple_strtoul(data, &cp, 10);
- if (data == cp) {
- profile = &tomoyo_default_profile;
- } else {
- if (*cp != '-')
- return -EINVAL;
- data = cp + 1;
- profile = tomoyo_assign_profile(i);
- if (!profile)
- return -EINVAL;
- }
+ if (*cp != '-')
+ return -EINVAL;
+ data = cp + 1;
+ profile = tomoyo_assign_profile(head->w.ns, i);
+ if (!profile)
+ return -EINVAL;
cp = strchr(data, '=');
if (!cp)
return -EINVAL;
*cp++ = '\0';
- if (profile != &tomoyo_default_profile)
- use_default = strstr(cp, "use_default") != NULL;
- if (tomoyo_str_starts(&data, "PREFERENCE::")) {
- tomoyo_set_pref(data, cp, use_default, profile);
- return 0;
- }
- if (profile == &tomoyo_default_profile)
- return -EINVAL;
if (!strcmp(data, "COMMENT")) {
static DEFINE_SPINLOCK(lock);
const struct tomoyo_path_info *new_comment
@@ -471,77 +653,62 @@
tomoyo_put_name(old_comment);
return 0;
}
- return tomoyo_set_mode(data, cp, use_default, profile);
+ if (!strcmp(data, "PREFERENCE")) {
+ for (i = 0; i < TOMOYO_MAX_PREF; i++)
+ tomoyo_set_uint(&profile->pref[i], cp,
+ tomoyo_pref_keywords[i]);
+ return 0;
+ }
+ return tomoyo_set_mode(data, cp, profile);
}
-static void tomoyo_print_preference(struct tomoyo_io_buffer *head,
- const int idx)
-{
- struct tomoyo_preference *pref = &tomoyo_default_profile.preference;
- const struct tomoyo_profile *profile = idx >= 0 ?
- tomoyo_profile_ptr[idx] : NULL;
- char buffer[16] = "";
- if (profile) {
- buffer[sizeof(buffer) - 1] = '\0';
- snprintf(buffer, sizeof(buffer) - 1, "%u-", idx);
- }
- if (profile) {
- pref = profile->learning;
- if (pref == &tomoyo_default_profile.preference)
- goto skip1;
- }
- tomoyo_io_printf(head, "%sPREFERENCE::%s={ "
- "verbose=%s max_entry=%u }\n",
- buffer, "learning",
- tomoyo_yesno(pref->learning_verbose),
- pref->learning_max_entry);
- skip1:
- if (profile) {
- pref = profile->permissive;
- if (pref == &tomoyo_default_profile.preference)
- goto skip2;
- }
- tomoyo_io_printf(head, "%sPREFERENCE::%s={ verbose=%s }\n",
- buffer, "permissive",
- tomoyo_yesno(pref->permissive_verbose));
- skip2:
- if (profile) {
- pref = profile->enforcing;
- if (pref == &tomoyo_default_profile.preference)
- return;
- }
- tomoyo_io_printf(head, "%sPREFERENCE::%s={ verbose=%s }\n",
- buffer, "enforcing",
- tomoyo_yesno(pref->enforcing_verbose));
-}
-
+/**
+ * tomoyo_print_config - Print mode for specified functionality.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ * @config: Mode for that functionality.
+ *
+ * Returns nothing.
+ *
+ * Caller prints functionality's name.
+ */
static void tomoyo_print_config(struct tomoyo_io_buffer *head, const u8 config)
{
- tomoyo_io_printf(head, "={ mode=%s }\n", tomoyo_mode[config & 3]);
+ tomoyo_io_printf(head, "={ mode=%s grant_log=%s reject_log=%s }\n",
+ tomoyo_mode[config & 3],
+ tomoyo_yesno(config & TOMOYO_CONFIG_WANT_GRANT_LOG),
+ tomoyo_yesno(config & TOMOYO_CONFIG_WANT_REJECT_LOG));
}
/**
* tomoyo_read_profile - Read profile table.
*
* @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
*/
static void tomoyo_read_profile(struct tomoyo_io_buffer *head)
{
u8 index;
+ struct tomoyo_policy_namespace *ns =
+ container_of(head->r.ns, typeof(*ns), namespace_list);
const struct tomoyo_profile *profile;
+ if (head->r.eof)
+ return;
next:
index = head->r.index;
- profile = tomoyo_profile_ptr[index];
+ profile = ns->profile_ptr[index];
switch (head->r.step) {
case 0:
- tomoyo_io_printf(head, "PROFILE_VERSION=%s\n", "20090903");
- tomoyo_print_preference(head, -1);
+ tomoyo_print_namespace(head);
+ tomoyo_io_printf(head, "PROFILE_VERSION=%u\n",
+ ns->profile_version);
head->r.step++;
break;
case 1:
for ( ; head->r.index < TOMOYO_MAX_PROFILES;
head->r.index++)
- if (tomoyo_profile_ptr[head->r.index])
+ if (ns->profile_ptr[head->r.index])
break;
if (head->r.index == TOMOYO_MAX_PROFILES)
return;
@@ -549,16 +716,25 @@
break;
case 2:
{
+ u8 i;
const struct tomoyo_path_info *comment =
profile->comment;
+ tomoyo_print_namespace(head);
tomoyo_io_printf(head, "%u-COMMENT=", index);
tomoyo_set_string(head, comment ? comment->name : "");
tomoyo_set_lf(head);
+ tomoyo_io_printf(head, "%u-PREFERENCE={ ", index);
+ for (i = 0; i < TOMOYO_MAX_PREF; i++)
+ tomoyo_io_printf(head, "%s=%u ",
+ tomoyo_pref_keywords[i],
+ profile->pref[i]);
+ tomoyo_set_string(head, "}\n");
head->r.step++;
}
break;
case 3:
{
+ tomoyo_print_namespace(head);
tomoyo_io_printf(head, "%u-%s", index, "CONFIG");
tomoyo_print_config(head, profile->default_config);
head->r.bit = 0;
@@ -572,15 +748,22 @@
const u8 config = profile->config[i];
if (config == TOMOYO_CONFIG_USE_DEFAULT)
continue;
- tomoyo_io_printf(head, "%u-%s%s", index, "CONFIG::",
- tomoyo_mac_keywords[i]);
+ tomoyo_print_namespace(head);
+ if (i < TOMOYO_MAX_MAC_INDEX)
+ tomoyo_io_printf(head, "%u-CONFIG::%s::%s",
+ index,
+ tomoyo_category_keywords
+ [tomoyo_index2category[i]],
+ tomoyo_mac_keywords[i]);
+ else
+ tomoyo_io_printf(head, "%u-CONFIG::%s", index,
+ tomoyo_mac_keywords[i]);
tomoyo_print_config(head, config);
head->r.bit++;
break;
}
if (head->r.bit == TOMOYO_MAX_MAC_INDEX
+ TOMOYO_MAX_MAC_CATEGORY_INDEX) {
- tomoyo_print_preference(head, index);
head->r.index++;
head->r.step = 1;
}
@@ -590,6 +773,14 @@
goto next;
}
+/**
+ * tomoyo_same_manager - Check for duplicated "struct tomoyo_manager" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_head".
+ * @b: Pointer to "struct tomoyo_acl_head".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
static bool tomoyo_same_manager(const struct tomoyo_acl_head *a,
const struct tomoyo_acl_head *b)
{
@@ -611,8 +802,13 @@
const bool is_delete)
{
struct tomoyo_manager e = { };
- int error;
-
+ struct tomoyo_acl_param param = {
+ /* .ns = &tomoyo_kernel_namespace, */
+ .is_delete = is_delete,
+ .list = &tomoyo_kernel_namespace.
+ policy_list[TOMOYO_ID_MANAGER],
+ };
+ int error = is_delete ? -ENOENT : -ENOMEM;
if (tomoyo_domain_def(manager)) {
if (!tomoyo_correct_domain(manager))
return -EINVAL;
@@ -622,12 +818,11 @@
return -EINVAL;
}
e.manager = tomoyo_get_name(manager);
- if (!e.manager)
- return -ENOMEM;
- error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
- &tomoyo_policy_list[TOMOYO_ID_MANAGER],
- tomoyo_same_manager);
- tomoyo_put_name(e.manager);
+ if (e.manager) {
+ error = tomoyo_update_policy(&e.head, sizeof(e), ¶m,
+ tomoyo_same_manager);
+ tomoyo_put_name(e.manager);
+ }
return error;
}
@@ -643,13 +838,12 @@
static int tomoyo_write_manager(struct tomoyo_io_buffer *head)
{
char *data = head->write_buf;
- bool is_delete = tomoyo_str_starts(&data, TOMOYO_KEYWORD_DELETE);
if (!strcmp(data, "manage_by_non_root")) {
- tomoyo_manage_by_non_root = !is_delete;
+ tomoyo_manage_by_non_root = !head->w.is_delete;
return 0;
}
- return tomoyo_update_manager_entry(data, is_delete);
+ return tomoyo_update_manager_entry(data, head->w.is_delete);
}
/**
@@ -663,8 +857,8 @@
{
if (head->r.eof)
return;
- list_for_each_cookie(head->r.acl,
- &tomoyo_policy_list[TOMOYO_ID_MANAGER]) {
+ list_for_each_cookie(head->r.acl, &tomoyo_kernel_namespace.
+ policy_list[TOMOYO_ID_MANAGER]) {
struct tomoyo_manager *ptr =
list_entry(head->r.acl, typeof(*ptr), head.list);
if (ptr->head.is_deleted)
@@ -697,8 +891,8 @@
return true;
if (!tomoyo_manage_by_non_root && (task->cred->uid || task->cred->euid))
return false;
- list_for_each_entry_rcu(ptr, &tomoyo_policy_list[TOMOYO_ID_MANAGER],
- head.list) {
+ list_for_each_entry_rcu(ptr, &tomoyo_kernel_namespace.
+ policy_list[TOMOYO_ID_MANAGER], head.list) {
if (!ptr->head.is_deleted && ptr->is_domain
&& !tomoyo_pathcmp(domainname, ptr->manager)) {
found = true;
@@ -710,8 +904,8 @@
exe = tomoyo_get_exe();
if (!exe)
return false;
- list_for_each_entry_rcu(ptr, &tomoyo_policy_list[TOMOYO_ID_MANAGER],
- head.list) {
+ list_for_each_entry_rcu(ptr, &tomoyo_kernel_namespace.
+ policy_list[TOMOYO_ID_MANAGER], head.list) {
if (!ptr->head.is_deleted && !ptr->is_domain
&& !strcmp(exe, ptr->manager->name)) {
found = true;
@@ -732,7 +926,7 @@
}
/**
- * tomoyo_select_one - Parse select command.
+ * tomoyo_select_domain - Parse select command.
*
* @head: Pointer to "struct tomoyo_io_buffer".
* @data: String to parse.
@@ -741,16 +935,15 @@
*
* Caller holds tomoyo_read_lock().
*/
-static bool tomoyo_select_one(struct tomoyo_io_buffer *head, const char *data)
+static bool tomoyo_select_domain(struct tomoyo_io_buffer *head,
+ const char *data)
{
unsigned int pid;
struct tomoyo_domain_info *domain = NULL;
bool global_pid = false;
-
- if (!strcmp(data, "allow_execute")) {
- head->r.print_execute_only = true;
- return true;
- }
+ if (strncmp(data, "select ", 7))
+ return false;
+ data += 7;
if (sscanf(data, "pid=%u", &pid) == 1 ||
(global_pid = true, sscanf(data, "global-pid=%u", &pid) == 1)) {
struct task_struct *p;
@@ -769,7 +962,7 @@
domain = tomoyo_find_domain(data + 7);
} else
return false;
- head->write_var1 = domain;
+ head->w.domain = domain;
/* Accessing read_buf is safe because head->io_sem is held. */
if (!head->read_buf)
return true; /* Do nothing if open(O_WRONLY). */
@@ -821,20 +1014,47 @@
/**
* tomoyo_write_domain2 - Write domain policy.
*
- * @head: Pointer to "struct tomoyo_io_buffer".
+ * @ns: Pointer to "struct tomoyo_policy_namespace".
+ * @list: Pointer to "struct list_head".
+ * @data: Policy to be interpreted.
+ * @is_delete: True if it is a delete request.
*
* Returns 0 on success, negative value otherwise.
*
* Caller holds tomoyo_read_lock().
*/
-static int tomoyo_write_domain2(char *data, struct tomoyo_domain_info *domain,
+static int tomoyo_write_domain2(struct tomoyo_policy_namespace *ns,
+ struct list_head *list, char *data,
const bool is_delete)
{
- if (tomoyo_str_starts(&data, TOMOYO_KEYWORD_ALLOW_MOUNT))
- return tomoyo_write_mount(data, domain, is_delete);
- return tomoyo_write_file(data, domain, is_delete);
+ struct tomoyo_acl_param param = {
+ .ns = ns,
+ .list = list,
+ .data = data,
+ .is_delete = is_delete,
+ };
+ static const struct {
+ const char *keyword;
+ int (*write) (struct tomoyo_acl_param *);
+ } tomoyo_callback[1] = {
+ { "file ", tomoyo_write_file },
+ };
+ u8 i;
+ for (i = 0; i < 1; i++) {
+ if (!tomoyo_str_starts(¶m.data,
+ tomoyo_callback[i].keyword))
+ continue;
+ return tomoyo_callback[i].write(¶m);
+ }
+ return -EINVAL;
}
+/* String table for domain flags. */
+const char * const tomoyo_dif[TOMOYO_MAX_DOMAIN_INFO_FLAGS] = {
+ [TOMOYO_DIF_QUOTA_WARNED] = "quota_exceeded\n",
+ [TOMOYO_DIF_TRANSITION_FAILED] = "transition_failed\n",
+};
+
/**
* tomoyo_write_domain - Write domain policy.
*
@@ -847,69 +1067,198 @@
static int tomoyo_write_domain(struct tomoyo_io_buffer *head)
{
char *data = head->write_buf;
- struct tomoyo_domain_info *domain = head->write_var1;
- bool is_delete = false;
- bool is_select = false;
+ struct tomoyo_policy_namespace *ns;
+ struct tomoyo_domain_info *domain = head->w.domain;
+ const bool is_delete = head->w.is_delete;
+ bool is_select = !is_delete && tomoyo_str_starts(&data, "select ");
unsigned int profile;
-
- if (tomoyo_str_starts(&data, TOMOYO_KEYWORD_DELETE))
- is_delete = true;
- else if (tomoyo_str_starts(&data, TOMOYO_KEYWORD_SELECT))
- is_select = true;
- if (is_select && tomoyo_select_one(head, data))
- return 0;
- /* Don't allow updating policies by non manager programs. */
- if (!tomoyo_manager())
- return -EPERM;
- if (tomoyo_domain_def(data)) {
+ if (*data == '<') {
domain = NULL;
if (is_delete)
tomoyo_delete_domain(data);
else if (is_select)
domain = tomoyo_find_domain(data);
else
- domain = tomoyo_assign_domain(data, 0);
- head->write_var1 = domain;
+ domain = tomoyo_assign_domain(data, false);
+ head->w.domain = domain;
return 0;
}
if (!domain)
return -EINVAL;
-
- if (sscanf(data, TOMOYO_KEYWORD_USE_PROFILE "%u", &profile) == 1
+ ns = domain->ns;
+ if (sscanf(data, "use_profile %u", &profile) == 1
&& profile < TOMOYO_MAX_PROFILES) {
- if (tomoyo_profile_ptr[profile] || !tomoyo_policy_loaded)
+ if (!tomoyo_policy_loaded || ns->profile_ptr[profile])
domain->profile = (u8) profile;
return 0;
}
- if (!strcmp(data, TOMOYO_KEYWORD_IGNORE_GLOBAL_ALLOW_READ)) {
- domain->ignore_global_allow_read = !is_delete;
+ if (sscanf(data, "use_group %u\n", &profile) == 1
+ && profile < TOMOYO_MAX_ACL_GROUPS) {
+ if (!is_delete)
+ domain->group = (u8) profile;
return 0;
}
- if (!strcmp(data, TOMOYO_KEYWORD_QUOTA_EXCEEDED)) {
- domain->quota_warned = !is_delete;
+ for (profile = 0; profile < TOMOYO_MAX_DOMAIN_INFO_FLAGS; profile++) {
+ const char *cp = tomoyo_dif[profile];
+ if (strncmp(data, cp, strlen(cp) - 1))
+ continue;
+ domain->flags[profile] = !is_delete;
return 0;
}
- if (!strcmp(data, TOMOYO_KEYWORD_TRANSITION_FAILED)) {
- domain->transition_failed = !is_delete;
- return 0;
- }
- return tomoyo_write_domain2(data, domain, is_delete);
+ return tomoyo_write_domain2(ns, &domain->acl_info_list, data,
+ is_delete);
}
/**
- * tomoyo_fns - Find next set bit.
+ * tomoyo_print_condition - Print condition part.
*
- * @perm: 8 bits value.
- * @bit: First bit to find.
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ * @cond: Pointer to "struct tomoyo_condition".
*
- * Returns next on-bit on success, 8 otherwise.
+ * Returns true on success, false otherwise.
*/
-static u8 tomoyo_fns(const u8 perm, u8 bit)
+static bool tomoyo_print_condition(struct tomoyo_io_buffer *head,
+ const struct tomoyo_condition *cond)
{
- for ( ; bit < 8; bit++)
- if (perm & (1 << bit))
+ switch (head->r.cond_step) {
+ case 0:
+ head->r.cond_index = 0;
+ head->r.cond_step++;
+ /* fall through */
+ case 1:
+ {
+ const u16 condc = cond->condc;
+ const struct tomoyo_condition_element *condp =
+ (typeof(condp)) (cond + 1);
+ const struct tomoyo_number_union *numbers_p =
+ (typeof(numbers_p)) (condp + condc);
+ const struct tomoyo_name_union *names_p =
+ (typeof(names_p))
+ (numbers_p + cond->numbers_count);
+ const struct tomoyo_argv *argv =
+ (typeof(argv)) (names_p + cond->names_count);
+ const struct tomoyo_envp *envp =
+ (typeof(envp)) (argv + cond->argc);
+ u16 skip;
+ for (skip = 0; skip < head->r.cond_index; skip++) {
+ const u8 left = condp->left;
+ const u8 right = condp->right;
+ condp++;
+ switch (left) {
+ case TOMOYO_ARGV_ENTRY:
+ argv++;
+ continue;
+ case TOMOYO_ENVP_ENTRY:
+ envp++;
+ continue;
+ case TOMOYO_NUMBER_UNION:
+ numbers_p++;
+ break;
+ }
+ switch (right) {
+ case TOMOYO_NAME_UNION:
+ names_p++;
+ break;
+ case TOMOYO_NUMBER_UNION:
+ numbers_p++;
+ break;
+ }
+ }
+ while (head->r.cond_index < condc) {
+ const u8 match = condp->equals;
+ const u8 left = condp->left;
+ const u8 right = condp->right;
+ if (!tomoyo_flush(head))
+ return false;
+ condp++;
+ head->r.cond_index++;
+ tomoyo_set_space(head);
+ switch (left) {
+ case TOMOYO_ARGV_ENTRY:
+ tomoyo_io_printf(head,
+ "exec.argv[%lu]%s=\"",
+ argv->index, argv->
+ is_not ? "!" : "");
+ tomoyo_set_string(head,
+ argv->value->name);
+ tomoyo_set_string(head, "\"");
+ argv++;
+ continue;
+ case TOMOYO_ENVP_ENTRY:
+ tomoyo_set_string(head,
+ "exec.envp[\"");
+ tomoyo_set_string(head,
+ envp->name->name);
+ tomoyo_io_printf(head, "\"]%s=", envp->
+ is_not ? "!" : "");
+ if (envp->value) {
+ tomoyo_set_string(head, "\"");
+ tomoyo_set_string(head, envp->
+ value->name);
+ tomoyo_set_string(head, "\"");
+ } else {
+ tomoyo_set_string(head,
+ "NULL");
+ }
+ envp++;
+ continue;
+ case TOMOYO_NUMBER_UNION:
+ tomoyo_print_number_union_nospace
+ (head, numbers_p++);
+ break;
+ default:
+ tomoyo_set_string(head,
+ tomoyo_condition_keyword[left]);
+ break;
+ }
+ tomoyo_set_string(head, match ? "=" : "!=");
+ switch (right) {
+ case TOMOYO_NAME_UNION:
+ tomoyo_print_name_union_quoted
+ (head, names_p++);
+ break;
+ case TOMOYO_NUMBER_UNION:
+ tomoyo_print_number_union_nospace
+ (head, numbers_p++);
+ break;
+ default:
+ tomoyo_set_string(head,
+ tomoyo_condition_keyword[right]);
+ break;
+ }
+ }
+ }
+ head->r.cond_step++;
+ /* fall through */
+ case 2:
+ if (!tomoyo_flush(head))
break;
- return bit;
+ head->r.cond_step++;
+ /* fall through */
+ case 3:
+ tomoyo_set_lf(head);
+ return true;
+ }
+ return false;
+}
+
+/**
+ * tomoyo_set_group - Print "acl_group " header keyword and category name.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ * @category: Category name.
+ *
+ * Returns nothing.
+ */
+static void tomoyo_set_group(struct tomoyo_io_buffer *head,
+ const char *category)
+{
+ if (head->type == TOMOYO_EXCEPTIONPOLICY) {
+ tomoyo_print_namespace(head);
+ tomoyo_io_printf(head, "acl_group %u ",
+ head->r.acl_group_index);
+ }
+ tomoyo_set_string(head, category);
}
/**
@@ -924,63 +1273,96 @@
struct tomoyo_acl_info *acl)
{
const u8 acl_type = acl->type;
+ bool first = true;
u8 bit;
+ if (head->r.print_cond_part)
+ goto print_cond_part;
if (acl->is_deleted)
return true;
- next:
- bit = head->r.bit;
if (!tomoyo_flush(head))
return false;
else if (acl_type == TOMOYO_TYPE_PATH_ACL) {
struct tomoyo_path_acl *ptr =
container_of(acl, typeof(*ptr), head);
const u16 perm = ptr->perm;
- for ( ; bit < TOMOYO_MAX_PATH_OPERATION; bit++) {
+ for (bit = 0; bit < TOMOYO_MAX_PATH_OPERATION; bit++) {
if (!(perm & (1 << bit)))
continue;
- if (head->r.print_execute_only &&
+ if (head->r.print_transition_related_only &&
bit != TOMOYO_TYPE_EXECUTE)
continue;
- /* Print "read/write" instead of "read" and "write". */
- if ((bit == TOMOYO_TYPE_READ ||
- bit == TOMOYO_TYPE_WRITE)
- && (perm & (1 << TOMOYO_TYPE_READ_WRITE)))
- continue;
- break;
+ if (first) {
+ tomoyo_set_group(head, "file ");
+ first = false;
+ } else {
+ tomoyo_set_slash(head);
+ }
+ tomoyo_set_string(head, tomoyo_path_keyword[bit]);
}
- if (bit >= TOMOYO_MAX_PATH_OPERATION)
- goto done;
- tomoyo_io_printf(head, "allow_%s", tomoyo_path_keyword[bit]);
+ if (first)
+ return true;
tomoyo_print_name_union(head, &ptr->name);
- } else if (head->r.print_execute_only) {
+ } else if (head->r.print_transition_related_only) {
return true;
} else if (acl_type == TOMOYO_TYPE_PATH2_ACL) {
struct tomoyo_path2_acl *ptr =
container_of(acl, typeof(*ptr), head);
- bit = tomoyo_fns(ptr->perm, bit);
- if (bit >= TOMOYO_MAX_PATH2_OPERATION)
- goto done;
- tomoyo_io_printf(head, "allow_%s", tomoyo_path2_keyword[bit]);
+ const u8 perm = ptr->perm;
+ for (bit = 0; bit < TOMOYO_MAX_PATH2_OPERATION; bit++) {
+ if (!(perm & (1 << bit)))
+ continue;
+ if (first) {
+ tomoyo_set_group(head, "file ");
+ first = false;
+ } else {
+ tomoyo_set_slash(head);
+ }
+ tomoyo_set_string(head, tomoyo_mac_keywords
+ [tomoyo_pp2mac[bit]]);
+ }
+ if (first)
+ return true;
tomoyo_print_name_union(head, &ptr->name1);
tomoyo_print_name_union(head, &ptr->name2);
} else if (acl_type == TOMOYO_TYPE_PATH_NUMBER_ACL) {
struct tomoyo_path_number_acl *ptr =
container_of(acl, typeof(*ptr), head);
- bit = tomoyo_fns(ptr->perm, bit);
- if (bit >= TOMOYO_MAX_PATH_NUMBER_OPERATION)
- goto done;
- tomoyo_io_printf(head, "allow_%s",
- tomoyo_path_number_keyword[bit]);
+ const u8 perm = ptr->perm;
+ for (bit = 0; bit < TOMOYO_MAX_PATH_NUMBER_OPERATION; bit++) {
+ if (!(perm & (1 << bit)))
+ continue;
+ if (first) {
+ tomoyo_set_group(head, "file ");
+ first = false;
+ } else {
+ tomoyo_set_slash(head);
+ }
+ tomoyo_set_string(head, tomoyo_mac_keywords
+ [tomoyo_pn2mac[bit]]);
+ }
+ if (first)
+ return true;
tomoyo_print_name_union(head, &ptr->name);
tomoyo_print_number_union(head, &ptr->number);
} else if (acl_type == TOMOYO_TYPE_MKDEV_ACL) {
struct tomoyo_mkdev_acl *ptr =
container_of(acl, typeof(*ptr), head);
- bit = tomoyo_fns(ptr->perm, bit);
- if (bit >= TOMOYO_MAX_MKDEV_OPERATION)
- goto done;
- tomoyo_io_printf(head, "allow_%s", tomoyo_mkdev_keyword[bit]);
+ const u8 perm = ptr->perm;
+ for (bit = 0; bit < TOMOYO_MAX_MKDEV_OPERATION; bit++) {
+ if (!(perm & (1 << bit)))
+ continue;
+ if (first) {
+ tomoyo_set_group(head, "file ");
+ first = false;
+ } else {
+ tomoyo_set_slash(head);
+ }
+ tomoyo_set_string(head, tomoyo_mac_keywords
+ [tomoyo_pnnn2mac[bit]]);
+ }
+ if (first)
+ return true;
tomoyo_print_name_union(head, &ptr->name);
tomoyo_print_number_union(head, &ptr->mode);
tomoyo_print_number_union(head, &ptr->major);
@@ -988,35 +1370,41 @@
} else if (acl_type == TOMOYO_TYPE_MOUNT_ACL) {
struct tomoyo_mount_acl *ptr =
container_of(acl, typeof(*ptr), head);
- tomoyo_io_printf(head, "allow_mount");
+ tomoyo_set_group(head, "file mount");
tomoyo_print_name_union(head, &ptr->dev_name);
tomoyo_print_name_union(head, &ptr->dir_name);
tomoyo_print_name_union(head, &ptr->fs_type);
tomoyo_print_number_union(head, &ptr->flags);
}
- head->r.bit = bit + 1;
- tomoyo_io_printf(head, "\n");
- if (acl_type != TOMOYO_TYPE_MOUNT_ACL)
- goto next;
- done:
- head->r.bit = 0;
+ if (acl->cond) {
+ head->r.print_cond_part = true;
+ head->r.cond_step = 0;
+ if (!tomoyo_flush(head))
+ return false;
+print_cond_part:
+ if (!tomoyo_print_condition(head, acl->cond))
+ return false;
+ head->r.print_cond_part = false;
+ } else {
+ tomoyo_set_lf(head);
+ }
return true;
}
/**
* tomoyo_read_domain2 - Read domain policy.
*
- * @head: Pointer to "struct tomoyo_io_buffer".
- * @domain: Pointer to "struct tomoyo_domain_info".
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ * @list: Pointer to "struct list_head".
*
* Caller holds tomoyo_read_lock().
*
* Returns true on success, false otherwise.
*/
static bool tomoyo_read_domain2(struct tomoyo_io_buffer *head,
- struct tomoyo_domain_info *domain)
+ struct list_head *list)
{
- list_for_each_cookie(head->r.acl, &domain->acl_info_list) {
+ list_for_each_cookie(head->r.acl, list) {
struct tomoyo_acl_info *ptr =
list_entry(head->r.acl, typeof(*ptr), list);
if (!tomoyo_print_entry(head, ptr))
@@ -1041,6 +1429,7 @@
struct tomoyo_domain_info *domain =
list_entry(head->r.domain, typeof(*domain), list);
switch (head->r.step) {
+ u8 i;
case 0:
if (domain->is_deleted &&
!head->r.print_this_domain_only)
@@ -1048,22 +1437,18 @@
/* Print domainname and flags. */
tomoyo_set_string(head, domain->domainname->name);
tomoyo_set_lf(head);
- tomoyo_io_printf(head,
- TOMOYO_KEYWORD_USE_PROFILE "%u\n",
+ tomoyo_io_printf(head, "use_profile %u\n",
domain->profile);
- if (domain->quota_warned)
- tomoyo_set_string(head, "quota_exceeded\n");
- if (domain->transition_failed)
- tomoyo_set_string(head, "transition_failed\n");
- if (domain->ignore_global_allow_read)
- tomoyo_set_string(head,
- TOMOYO_KEYWORD_IGNORE_GLOBAL_ALLOW_READ
- "\n");
+ tomoyo_io_printf(head, "use_group %u\n",
+ domain->group);
+ for (i = 0; i < TOMOYO_MAX_DOMAIN_INFO_FLAGS; i++)
+ if (domain->flags[i])
+ tomoyo_set_string(head, tomoyo_dif[i]);
head->r.step++;
tomoyo_set_lf(head);
/* fall through */
case 1:
- if (!tomoyo_read_domain2(head, domain))
+ if (!tomoyo_read_domain2(head, &domain->acl_info_list))
return;
head->r.step++;
if (!tomoyo_set_lf(head))
@@ -1080,73 +1465,6 @@
}
/**
- * tomoyo_write_domain_profile - Assign profile for specified domain.
- *
- * @head: Pointer to "struct tomoyo_io_buffer".
- *
- * Returns 0 on success, -EINVAL otherwise.
- *
- * This is equivalent to doing
- *
- * ( echo "select " $domainname; echo "use_profile " $profile ) |
- * /usr/sbin/tomoyo-loadpolicy -d
- *
- * Caller holds tomoyo_read_lock().
- */
-static int tomoyo_write_domain_profile(struct tomoyo_io_buffer *head)
-{
- char *data = head->write_buf;
- char *cp = strchr(data, ' ');
- struct tomoyo_domain_info *domain;
- unsigned long profile;
-
- if (!cp)
- return -EINVAL;
- *cp = '\0';
- domain = tomoyo_find_domain(cp + 1);
- if (strict_strtoul(data, 10, &profile))
- return -EINVAL;
- if (domain && profile < TOMOYO_MAX_PROFILES
- && (tomoyo_profile_ptr[profile] || !tomoyo_policy_loaded))
- domain->profile = (u8) profile;
- return 0;
-}
-
-/**
- * tomoyo_read_domain_profile - Read only domainname and profile.
- *
- * @head: Pointer to "struct tomoyo_io_buffer".
- *
- * Returns list of profile number and domainname pairs.
- *
- * This is equivalent to doing
- *
- * grep -A 1 '^<kernel>' /sys/kernel/security/tomoyo/domain_policy |
- * awk ' { if ( domainname == "" ) { if ( $1 == "<kernel>" )
- * domainname = $0; } else if ( $1 == "use_profile" ) {
- * print $2 " " domainname; domainname = ""; } } ; '
- *
- * Caller holds tomoyo_read_lock().
- */
-static void tomoyo_read_domain_profile(struct tomoyo_io_buffer *head)
-{
- if (head->r.eof)
- return;
- list_for_each_cookie(head->r.domain, &tomoyo_domain_list) {
- struct tomoyo_domain_info *domain =
- list_entry(head->r.domain, typeof(*domain), list);
- if (domain->is_deleted)
- continue;
- if (!tomoyo_flush(head))
- return;
- tomoyo_io_printf(head, "%u ", domain->profile);
- tomoyo_set_string(head, domain->domainname->name);
- tomoyo_set_lf(head);
- }
- head->r.eof = true;
-}
-
-/**
* tomoyo_write_pid: Specify PID to obtain domainname.
*
* @head: Pointer to "struct tomoyo_io_buffer".
@@ -1204,18 +1522,20 @@
tomoyo_set_string(head, domain->domainname->name);
}
+/* String table for domain transition control keywords. */
static const char *tomoyo_transition_type[TOMOYO_MAX_TRANSITION_TYPE] = {
- [TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE]
- = TOMOYO_KEYWORD_NO_INITIALIZE_DOMAIN,
- [TOMOYO_TRANSITION_CONTROL_INITIALIZE]
- = TOMOYO_KEYWORD_INITIALIZE_DOMAIN,
- [TOMOYO_TRANSITION_CONTROL_NO_KEEP] = TOMOYO_KEYWORD_NO_KEEP_DOMAIN,
- [TOMOYO_TRANSITION_CONTROL_KEEP] = TOMOYO_KEYWORD_KEEP_DOMAIN
+ [TOMOYO_TRANSITION_CONTROL_NO_RESET] = "no_reset_domain ",
+ [TOMOYO_TRANSITION_CONTROL_RESET] = "reset_domain ",
+ [TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE] = "no_initialize_domain ",
+ [TOMOYO_TRANSITION_CONTROL_INITIALIZE] = "initialize_domain ",
+ [TOMOYO_TRANSITION_CONTROL_NO_KEEP] = "no_keep_domain ",
+ [TOMOYO_TRANSITION_CONTROL_KEEP] = "keep_domain ",
};
+/* String table for grouping keywords. */
static const char *tomoyo_group_name[TOMOYO_MAX_GROUP] = {
- [TOMOYO_PATH_GROUP] = TOMOYO_KEYWORD_PATH_GROUP,
- [TOMOYO_NUMBER_GROUP] = TOMOYO_KEYWORD_NUMBER_GROUP
+ [TOMOYO_PATH_GROUP] = "path_group ",
+ [TOMOYO_NUMBER_GROUP] = "number_group ",
};
/**
@@ -1229,29 +1549,30 @@
*/
static int tomoyo_write_exception(struct tomoyo_io_buffer *head)
{
- char *data = head->write_buf;
- bool is_delete = tomoyo_str_starts(&data, TOMOYO_KEYWORD_DELETE);
- u8 i;
- static const struct {
- const char *keyword;
- int (*write) (char *, const bool);
- } tomoyo_callback[4] = {
- { TOMOYO_KEYWORD_AGGREGATOR, tomoyo_write_aggregator },
- { TOMOYO_KEYWORD_FILE_PATTERN, tomoyo_write_pattern },
- { TOMOYO_KEYWORD_DENY_REWRITE, tomoyo_write_no_rewrite },
- { TOMOYO_KEYWORD_ALLOW_READ, tomoyo_write_globally_readable },
+ const bool is_delete = head->w.is_delete;
+ struct tomoyo_acl_param param = {
+ .ns = head->w.ns,
+ .is_delete = is_delete,
+ .data = head->write_buf,
};
-
+ u8 i;
+ if (tomoyo_str_starts(¶m.data, "aggregator "))
+ return tomoyo_write_aggregator(¶m);
for (i = 0; i < TOMOYO_MAX_TRANSITION_TYPE; i++)
- if (tomoyo_str_starts(&data, tomoyo_transition_type[i]))
- return tomoyo_write_transition_control(data, is_delete,
- i);
- for (i = 0; i < 4; i++)
- if (tomoyo_str_starts(&data, tomoyo_callback[i].keyword))
- return tomoyo_callback[i].write(data, is_delete);
+ if (tomoyo_str_starts(¶m.data, tomoyo_transition_type[i]))
+ return tomoyo_write_transition_control(¶m, i);
for (i = 0; i < TOMOYO_MAX_GROUP; i++)
- if (tomoyo_str_starts(&data, tomoyo_group_name[i]))
- return tomoyo_write_group(data, is_delete, i);
+ if (tomoyo_str_starts(¶m.data, tomoyo_group_name[i]))
+ return tomoyo_write_group(¶m, i);
+ if (tomoyo_str_starts(¶m.data, "acl_group ")) {
+ unsigned int group;
+ char *data;
+ group = simple_strtoul(param.data, &data, 10);
+ if (group < TOMOYO_MAX_ACL_GROUPS && *data++ == ' ')
+ return tomoyo_write_domain2
+ (head->w.ns, &head->w.ns->acl_group[group],
+ data, is_delete);
+ }
return -EINVAL;
}
@@ -1267,9 +1588,12 @@
*/
static bool tomoyo_read_group(struct tomoyo_io_buffer *head, const int idx)
{
- list_for_each_cookie(head->r.group, &tomoyo_group_list[idx]) {
+ struct tomoyo_policy_namespace *ns =
+ container_of(head->r.ns, typeof(*ns), namespace_list);
+ struct list_head *list = &ns->group_list[idx];
+ list_for_each_cookie(head->r.group, list) {
struct tomoyo_group *group =
- list_entry(head->r.group, typeof(*group), list);
+ list_entry(head->r.group, typeof(*group), head.list);
list_for_each_cookie(head->r.acl, &group->member_list) {
struct tomoyo_acl_head *ptr =
list_entry(head->r.acl, typeof(*ptr), list);
@@ -1277,6 +1601,7 @@
continue;
if (!tomoyo_flush(head))
return false;
+ tomoyo_print_namespace(head);
tomoyo_set_string(head, tomoyo_group_name[idx]);
tomoyo_set_string(head, group->group_name->name);
if (idx == TOMOYO_PATH_GROUP) {
@@ -1310,7 +1635,10 @@
*/
static bool tomoyo_read_policy(struct tomoyo_io_buffer *head, const int idx)
{
- list_for_each_cookie(head->r.acl, &tomoyo_policy_list[idx]) {
+ struct tomoyo_policy_namespace *ns =
+ container_of(head->r.ns, typeof(*ns), namespace_list);
+ struct list_head *list = &ns->policy_list[idx];
+ list_for_each_cookie(head->r.acl, list) {
struct tomoyo_acl_head *acl =
container_of(head->r.acl, typeof(*acl), list);
if (acl->is_deleted)
@@ -1322,35 +1650,23 @@
{
struct tomoyo_transition_control *ptr =
container_of(acl, typeof(*ptr), head);
- tomoyo_set_string(head,
- tomoyo_transition_type
+ tomoyo_print_namespace(head);
+ tomoyo_set_string(head, tomoyo_transition_type
[ptr->type]);
- if (ptr->program)
- tomoyo_set_string(head,
- ptr->program->name);
- if (ptr->program && ptr->domainname)
- tomoyo_set_string(head, " from ");
- if (ptr->domainname)
- tomoyo_set_string(head,
- ptr->domainname->
- name);
- }
- break;
- case TOMOYO_ID_GLOBALLY_READABLE:
- {
- struct tomoyo_readable_file *ptr =
- container_of(acl, typeof(*ptr), head);
- tomoyo_set_string(head,
- TOMOYO_KEYWORD_ALLOW_READ);
- tomoyo_set_string(head, ptr->filename->name);
+ tomoyo_set_string(head, ptr->program ?
+ ptr->program->name : "any");
+ tomoyo_set_string(head, " from ");
+ tomoyo_set_string(head, ptr->domainname ?
+ ptr->domainname->name :
+ "any");
}
break;
case TOMOYO_ID_AGGREGATOR:
{
struct tomoyo_aggregator *ptr =
container_of(acl, typeof(*ptr), head);
- tomoyo_set_string(head,
- TOMOYO_KEYWORD_AGGREGATOR);
+ tomoyo_print_namespace(head);
+ tomoyo_set_string(head, "aggregator ");
tomoyo_set_string(head,
ptr->original_name->name);
tomoyo_set_space(head);
@@ -1358,24 +1674,6 @@
ptr->aggregated_name->name);
}
break;
- case TOMOYO_ID_PATTERN:
- {
- struct tomoyo_no_pattern *ptr =
- container_of(acl, typeof(*ptr), head);
- tomoyo_set_string(head,
- TOMOYO_KEYWORD_FILE_PATTERN);
- tomoyo_set_string(head, ptr->pattern->name);
- }
- break;
- case TOMOYO_ID_NO_REWRITE:
- {
- struct tomoyo_no_rewrite *ptr =
- container_of(acl, typeof(*ptr), head);
- tomoyo_set_string(head,
- TOMOYO_KEYWORD_DENY_REWRITE);
- tomoyo_set_string(head, ptr->pattern->name);
- }
- break;
default:
continue;
}
@@ -1394,6 +1692,8 @@
*/
static void tomoyo_read_exception(struct tomoyo_io_buffer *head)
{
+ struct tomoyo_policy_namespace *ns =
+ container_of(head->r.ns, typeof(*ns), namespace_list);
if (head->r.eof)
return;
while (head->r.step < TOMOYO_MAX_POLICY &&
@@ -1406,95 +1706,40 @@
head->r.step++;
if (head->r.step < TOMOYO_MAX_POLICY + TOMOYO_MAX_GROUP)
return;
+ while (head->r.step < TOMOYO_MAX_POLICY + TOMOYO_MAX_GROUP
+ + TOMOYO_MAX_ACL_GROUPS) {
+ head->r.acl_group_index = head->r.step - TOMOYO_MAX_POLICY
+ - TOMOYO_MAX_GROUP;
+ if (!tomoyo_read_domain2(head, &ns->acl_group
+ [head->r.acl_group_index]))
+ return;
+ head->r.step++;
+ }
head->r.eof = true;
}
-/**
- * tomoyo_print_header - Get header line of audit log.
- *
- * @r: Pointer to "struct tomoyo_request_info".
- *
- * Returns string representation.
- *
- * This function uses kmalloc(), so caller must kfree() if this function
- * didn't return NULL.
- */
-static char *tomoyo_print_header(struct tomoyo_request_info *r)
-{
- struct timeval tv;
- const pid_t gpid = task_pid_nr(current);
- static const int tomoyo_buffer_len = 4096;
- char *buffer = kmalloc(tomoyo_buffer_len, GFP_NOFS);
- pid_t ppid;
- if (!buffer)
- return NULL;
- do_gettimeofday(&tv);
- rcu_read_lock();
- ppid = task_tgid_vnr(current->real_parent);
- rcu_read_unlock();
- snprintf(buffer, tomoyo_buffer_len - 1,
- "#timestamp=%lu profile=%u mode=%s (global-pid=%u)"
- " task={ pid=%u ppid=%u uid=%u gid=%u euid=%u"
- " egid=%u suid=%u sgid=%u fsuid=%u fsgid=%u }",
- tv.tv_sec, r->profile, tomoyo_mode[r->mode], gpid,
- task_tgid_vnr(current), ppid,
- current_uid(), current_gid(), current_euid(),
- current_egid(), current_suid(), current_sgid(),
- current_fsuid(), current_fsgid());
- return buffer;
-}
-
-/**
- * tomoyo_init_audit_log - Allocate buffer for audit logs.
- *
- * @len: Required size.
- * @r: Pointer to "struct tomoyo_request_info".
- *
- * Returns pointer to allocated memory.
- *
- * The @len is updated to add the header lines' size on success.
- *
- * This function uses kzalloc(), so caller must kfree() if this function
- * didn't return NULL.
- */
-static char *tomoyo_init_audit_log(int *len, struct tomoyo_request_info *r)
-{
- char *buf = NULL;
- const char *header;
- const char *domainname;
- if (!r->domain)
- r->domain = tomoyo_domain();
- domainname = r->domain->domainname->name;
- header = tomoyo_print_header(r);
- if (!header)
- return NULL;
- *len += strlen(domainname) + strlen(header) + 10;
- buf = kzalloc(*len, GFP_NOFS);
- if (buf)
- snprintf(buf, (*len) - 1, "%s\n%s\n", header, domainname);
- kfree(header);
- return buf;
-}
-
-/* Wait queue for tomoyo_query_list. */
+/* Wait queue for kernel -> userspace notification. */
static DECLARE_WAIT_QUEUE_HEAD(tomoyo_query_wait);
-
-/* Lock for manipulating tomoyo_query_list. */
-static DEFINE_SPINLOCK(tomoyo_query_list_lock);
+/* Wait queue for userspace -> kernel notification. */
+static DECLARE_WAIT_QUEUE_HEAD(tomoyo_answer_wait);
/* Structure for query. */
struct tomoyo_query {
struct list_head list;
char *query;
- int query_len;
+ size_t query_len;
unsigned int serial;
- int timer;
- int answer;
+ u8 timer;
+ u8 answer;
+ u8 retry;
};
/* The list for "struct tomoyo_query". */
static LIST_HEAD(tomoyo_query_list);
+/* Lock for manipulating tomoyo_query_list. */
+static DEFINE_SPINLOCK(tomoyo_query_list_lock);
+
/*
* Number of "struct file" referring /sys/kernel/security/tomoyo/query
* interface.
@@ -1502,10 +1747,82 @@
static atomic_t tomoyo_query_observers = ATOMIC_INIT(0);
/**
+ * tomoyo_truncate - Truncate a line.
+ *
+ * @str: String to truncate.
+ *
+ * Returns length of truncated @str.
+ */
+static int tomoyo_truncate(char *str)
+{
+ char *start = str;
+ while (*(unsigned char *) str > (unsigned char) ' ')
+ str++;
+ *str = '\0';
+ return strlen(start) + 1;
+}
+
+/**
+ * tomoyo_add_entry - Add an ACL to current thread's domain. Used by learning mode.
+ *
+ * @domain: Pointer to "struct tomoyo_domain_info".
+ * @header: Lines containing ACL.
+ *
+ * Returns nothing.
+ */
+static void tomoyo_add_entry(struct tomoyo_domain_info *domain, char *header)
+{
+ char *buffer;
+ char *realpath = NULL;
+ char *argv0 = NULL;
+ char *symlink = NULL;
+ char *cp = strchr(header, '\n');
+ int len;
+ if (!cp)
+ return;
+ cp = strchr(cp + 1, '\n');
+ if (!cp)
+ return;
+ *cp++ = '\0';
+ len = strlen(cp) + 1;
+ /* strstr() will return NULL if ordering is wrong. */
+ if (*cp == 'f') {
+ argv0 = strstr(header, " argv[]={ \"");
+ if (argv0) {
+ argv0 += 10;
+ len += tomoyo_truncate(argv0) + 14;
+ }
+ realpath = strstr(header, " exec={ realpath=\"");
+ if (realpath) {
+ realpath += 8;
+ len += tomoyo_truncate(realpath) + 6;
+ }
+ symlink = strstr(header, " symlink.target=\"");
+ if (symlink)
+ len += tomoyo_truncate(symlink + 1) + 1;
+ }
+ buffer = kmalloc(len, GFP_NOFS);
+ if (!buffer)
+ return;
+ snprintf(buffer, len - 1, "%s", cp);
+ if (realpath)
+ tomoyo_addprintf(buffer, len, " exec.%s", realpath);
+ if (argv0)
+ tomoyo_addprintf(buffer, len, " exec.argv[0]=%s", argv0);
+ if (symlink)
+ tomoyo_addprintf(buffer, len, "%s", symlink);
+ tomoyo_normalize_line(buffer);
+ if (!tomoyo_write_domain2(domain->ns, &domain->acl_info_list, buffer,
+ false))
+ tomoyo_update_stat(TOMOYO_STAT_POLICY_UPDATES);
+ kfree(buffer);
+}
+
+/**
* tomoyo_supervisor - Ask for the supervisor's decision.
*
- * @r: Pointer to "struct tomoyo_request_info".
- * @fmt: The printf()'s format string, followed by parameters.
+ * @r: Pointer to "struct tomoyo_request_info".
+ * @fmt: The printf()'s format string, followed by parameters.
*
* Returns 0 if the supervisor decided to permit the access request which
* violated the policy in enforcing mode, TOMOYO_RETRY_REQUEST if the
@@ -1515,88 +1832,79 @@
int tomoyo_supervisor(struct tomoyo_request_info *r, const char *fmt, ...)
{
va_list args;
- int error = -EPERM;
- int pos;
+ int error;
int len;
static unsigned int tomoyo_serial;
- struct tomoyo_query *entry = NULL;
+ struct tomoyo_query entry = { };
bool quota_exceeded = false;
- char *header;
+ va_start(args, fmt);
+ len = vsnprintf((char *) &len, 1, fmt, args) + 1;
+ va_end(args);
+ /* Write /sys/kernel/security/tomoyo/audit. */
+ va_start(args, fmt);
+ tomoyo_write_log2(r, len, fmt, args);
+ va_end(args);
+ /* Nothing more to do if granted. */
+ if (r->granted)
+ return 0;
+ if (r->mode)
+ tomoyo_update_stat(r->mode);
switch (r->mode) {
- char *buffer;
+ case TOMOYO_CONFIG_ENFORCING:
+ error = -EPERM;
+ if (atomic_read(&tomoyo_query_observers))
+ break;
+ goto out;
case TOMOYO_CONFIG_LEARNING:
- if (!tomoyo_domain_quota_is_ok(r))
- return 0;
- va_start(args, fmt);
- len = vsnprintf((char *) &pos, sizeof(pos) - 1, fmt, args) + 4;
- va_end(args);
- buffer = kmalloc(len, GFP_NOFS);
- if (!buffer)
- return 0;
- va_start(args, fmt);
- vsnprintf(buffer, len - 1, fmt, args);
- va_end(args);
- tomoyo_normalize_line(buffer);
- tomoyo_write_domain2(buffer, r->domain, false);
- kfree(buffer);
+ error = 0;
+ /* Check max_learning_entry parameter. */
+ if (tomoyo_domain_quota_is_ok(r))
+ break;
/* fall through */
- case TOMOYO_CONFIG_PERMISSIVE:
+ default:
return 0;
}
- if (!r->domain)
- r->domain = tomoyo_domain();
- if (!atomic_read(&tomoyo_query_observers))
- return -EPERM;
+ /* Get message. */
va_start(args, fmt);
- len = vsnprintf((char *) &pos, sizeof(pos) - 1, fmt, args) + 32;
+ entry.query = tomoyo_init_log(r, len, fmt, args);
va_end(args);
- header = tomoyo_init_audit_log(&len, r);
- if (!header)
+ if (!entry.query)
goto out;
- entry = kzalloc(sizeof(*entry), GFP_NOFS);
- if (!entry)
+ entry.query_len = strlen(entry.query) + 1;
+ if (!error) {
+ tomoyo_add_entry(r->domain, entry.query);
goto out;
- entry->query = kzalloc(len, GFP_NOFS);
- if (!entry->query)
- goto out;
- len = ksize(entry->query);
+ }
+ len = tomoyo_round2(entry.query_len);
spin_lock(&tomoyo_query_list_lock);
- if (tomoyo_quota_for_query && tomoyo_query_memory_size + len +
- sizeof(*entry) >= tomoyo_quota_for_query) {
+ if (tomoyo_memory_quota[TOMOYO_MEMORY_QUERY] &&
+ tomoyo_memory_used[TOMOYO_MEMORY_QUERY] + len
+ >= tomoyo_memory_quota[TOMOYO_MEMORY_QUERY]) {
quota_exceeded = true;
} else {
- tomoyo_query_memory_size += len + sizeof(*entry);
- entry->serial = tomoyo_serial++;
+ entry.serial = tomoyo_serial++;
+ entry.retry = r->retry;
+ tomoyo_memory_used[TOMOYO_MEMORY_QUERY] += len;
+ list_add_tail(&entry.list, &tomoyo_query_list);
}
spin_unlock(&tomoyo_query_list_lock);
if (quota_exceeded)
goto out;
- pos = snprintf(entry->query, len - 1, "Q%u-%hu\n%s",
- entry->serial, r->retry, header);
- kfree(header);
- header = NULL;
- va_start(args, fmt);
- vsnprintf(entry->query + pos, len - 1 - pos, fmt, args);
- entry->query_len = strlen(entry->query) + 1;
- va_end(args);
- spin_lock(&tomoyo_query_list_lock);
- list_add_tail(&entry->list, &tomoyo_query_list);
- spin_unlock(&tomoyo_query_list_lock);
/* Give 10 seconds for supervisor's opinion. */
- for (entry->timer = 0;
- atomic_read(&tomoyo_query_observers) && entry->timer < 100;
- entry->timer++) {
- wake_up(&tomoyo_query_wait);
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(HZ / 10);
- if (entry->answer)
+ while (entry.timer < 10) {
+ wake_up_all(&tomoyo_query_wait);
+ if (wait_event_interruptible_timeout
+ (tomoyo_answer_wait, entry.answer ||
+ !atomic_read(&tomoyo_query_observers), HZ))
break;
+ else
+ entry.timer++;
}
spin_lock(&tomoyo_query_list_lock);
- list_del(&entry->list);
- tomoyo_query_memory_size -= len + sizeof(*entry);
+ list_del(&entry.list);
+ tomoyo_memory_used[TOMOYO_MEMORY_QUERY] -= len;
spin_unlock(&tomoyo_query_list_lock);
- switch (entry->answer) {
+ switch (entry.answer) {
case 3: /* Asked to retry by administrator. */
error = TOMOYO_RETRY_REQUEST;
r->retry++;
@@ -1605,18 +1913,12 @@
/* Granted by administrator. */
error = 0;
break;
- case 0:
- /* Timed out. */
- break;
default:
- /* Rejected by administrator. */
+ /* Timed out or rejected by administrator. */
break;
}
- out:
- if (entry)
- kfree(entry->query);
- kfree(entry);
- kfree(header);
+out:
+ kfree(entry.query);
return error;
}
@@ -1663,8 +1965,8 @@
static void tomoyo_read_query(struct tomoyo_io_buffer *head)
{
struct list_head *tmp;
- int pos = 0;
- int len = 0;
+ unsigned int pos = 0;
+ size_t len = 0;
char *buf;
if (head->r.w_pos)
return;
@@ -1687,7 +1989,7 @@
head->r.query_index = 0;
return;
}
- buf = kzalloc(len, GFP_NOFS);
+ buf = kzalloc(len + 32, GFP_NOFS);
if (!buf)
return;
pos = 0;
@@ -1703,7 +2005,8 @@
* can change, but I don't care.
*/
if (len == ptr->query_len)
- memmove(buf, ptr->query, len);
+ snprintf(buf, len + 31, "Q%u-%hu\n%s", ptr->serial,
+ ptr->retry, ptr->query);
break;
}
spin_unlock(&tomoyo_query_list_lock);
@@ -1760,7 +2063,7 @@
static void tomoyo_read_version(struct tomoyo_io_buffer *head)
{
if (!head->r.eof) {
- tomoyo_io_printf(head, "2.3.0");
+ tomoyo_io_printf(head, "2.4.0");
head->r.eof = true;
}
}
@@ -1785,15 +2088,111 @@
}
}
+/* String table for /sys/kernel/security/tomoyo/stat interface. */
+static const char * const tomoyo_policy_headers[TOMOYO_MAX_POLICY_STAT] = {
+ [TOMOYO_STAT_POLICY_UPDATES] = "update:",
+ [TOMOYO_STAT_POLICY_LEARNING] = "violation in learning mode:",
+ [TOMOYO_STAT_POLICY_PERMISSIVE] = "violation in permissive mode:",
+ [TOMOYO_STAT_POLICY_ENFORCING] = "violation in enforcing mode:",
+};
+
+/* String table for /sys/kernel/security/tomoyo/stat interface. */
+static const char * const tomoyo_memory_headers[TOMOYO_MAX_MEMORY_STAT] = {
+ [TOMOYO_MEMORY_POLICY] = "policy:",
+ [TOMOYO_MEMORY_AUDIT] = "audit log:",
+ [TOMOYO_MEMORY_QUERY] = "query message:",
+};
+
+/* Timestamp counter for last updated. */
+static unsigned int tomoyo_stat_updated[TOMOYO_MAX_POLICY_STAT];
+/* Counter for number of updates. */
+static unsigned int tomoyo_stat_modified[TOMOYO_MAX_POLICY_STAT];
+
+/**
+ * tomoyo_update_stat - Update statistic counters.
+ *
+ * @index: Index for policy type.
+ *
+ * Returns nothing.
+ */
+void tomoyo_update_stat(const u8 index)
+{
+ struct timeval tv;
+ do_gettimeofday(&tv);
+ /*
+ * I don't use atomic operations because race condition is not fatal.
+ */
+ tomoyo_stat_updated[index]++;
+ tomoyo_stat_modified[index] = tv.tv_sec;
+}
+
+/**
+ * tomoyo_read_stat - Read statistic data.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
+static void tomoyo_read_stat(struct tomoyo_io_buffer *head)
+{
+ u8 i;
+ unsigned int total = 0;
+ if (head->r.eof)
+ return;
+ for (i = 0; i < TOMOYO_MAX_POLICY_STAT; i++) {
+ tomoyo_io_printf(head, "Policy %-30s %10u",
+ tomoyo_policy_headers[i],
+ tomoyo_stat_updated[i]);
+ if (tomoyo_stat_modified[i]) {
+ struct tomoyo_time stamp;
+ tomoyo_convert_time(tomoyo_stat_modified[i], &stamp);
+ tomoyo_io_printf(head, " (Last: %04u/%02u/%02u "
+ "%02u:%02u:%02u)",
+ stamp.year, stamp.month, stamp.day,
+ stamp.hour, stamp.min, stamp.sec);
+ }
+ tomoyo_set_lf(head);
+ }
+ for (i = 0; i < TOMOYO_MAX_MEMORY_STAT; i++) {
+ unsigned int used = tomoyo_memory_used[i];
+ total += used;
+ tomoyo_io_printf(head, "Memory used by %-22s %10u",
+ tomoyo_memory_headers[i], used);
+ used = tomoyo_memory_quota[i];
+ if (used)
+ tomoyo_io_printf(head, " (Quota: %10u)", used);
+ tomoyo_set_lf(head);
+ }
+ tomoyo_io_printf(head, "Total memory used: %10u\n",
+ total);
+ head->r.eof = true;
+}
+
+/**
+ * tomoyo_write_stat - Set memory quota.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns 0.
+ */
+static int tomoyo_write_stat(struct tomoyo_io_buffer *head)
+{
+ char *data = head->write_buf;
+ u8 i;
+ if (tomoyo_str_starts(&data, "Memory used by "))
+ for (i = 0; i < TOMOYO_MAX_MEMORY_STAT; i++)
+ if (tomoyo_str_starts(&data, tomoyo_memory_headers[i]))
+ sscanf(data, "%u", &tomoyo_memory_quota[i]);
+ return 0;
+}
+
/**
* tomoyo_open_control - open() for /sys/kernel/security/tomoyo/ interface.
*
* @type: Type of interface.
* @file: Pointer to "struct file".
*
- * Associates policy handler and returns 0 on success, -ENOMEM otherwise.
- *
- * Caller acquires tomoyo_read_lock().
+ * Returns 0 on success, negative value otherwise.
*/
int tomoyo_open_control(const u8 type, struct file *file)
{
@@ -1814,15 +2213,15 @@
head->write = tomoyo_write_exception;
head->read = tomoyo_read_exception;
break;
+ case TOMOYO_AUDIT:
+ /* /sys/kernel/security/tomoyo/audit */
+ head->poll = tomoyo_poll_log;
+ head->read = tomoyo_read_log;
+ break;
case TOMOYO_SELFDOMAIN:
/* /sys/kernel/security/tomoyo/self_domain */
head->read = tomoyo_read_self_domain;
break;
- case TOMOYO_DOMAIN_STATUS:
- /* /sys/kernel/security/tomoyo/.domain_status */
- head->write = tomoyo_write_domain_profile;
- head->read = tomoyo_read_domain_profile;
- break;
case TOMOYO_PROCESS_STATUS:
/* /sys/kernel/security/tomoyo/.process_status */
head->write = tomoyo_write_pid;
@@ -1833,11 +2232,11 @@
head->read = tomoyo_read_version;
head->readbuf_size = 128;
break;
- case TOMOYO_MEMINFO:
- /* /sys/kernel/security/tomoyo/meminfo */
- head->write = tomoyo_write_memory_quota;
- head->read = tomoyo_read_memory_counter;
- head->readbuf_size = 512;
+ case TOMOYO_STAT:
+ /* /sys/kernel/security/tomoyo/stat */
+ head->write = tomoyo_write_stat;
+ head->read = tomoyo_read_stat;
+ head->readbuf_size = 1024;
break;
case TOMOYO_PROFILE:
/* /sys/kernel/security/tomoyo/profile */
@@ -1887,26 +2286,16 @@
return -ENOMEM;
}
}
- if (type != TOMOYO_QUERY)
- head->reader_idx = tomoyo_read_lock();
- file->private_data = head;
- /*
- * Call the handler now if the file is
- * /sys/kernel/security/tomoyo/self_domain
- * so that the user can use
- * cat < /sys/kernel/security/tomoyo/self_domain"
- * to know the current process's domainname.
- */
- if (type == TOMOYO_SELFDOMAIN)
- tomoyo_read_control(file, NULL, 0);
/*
* If the file is /sys/kernel/security/tomoyo/query , increment the
* observer counter.
* The obserber counter is used by tomoyo_supervisor() to see if
* there is some process monitoring /sys/kernel/security/tomoyo/query.
*/
- else if (type == TOMOYO_QUERY)
+ if (type == TOMOYO_QUERY)
atomic_inc(&tomoyo_query_observers);
+ file->private_data = head;
+ tomoyo_notify_gc(head, true);
return 0;
}
@@ -1917,7 +2306,8 @@
* @wait: Pointer to "poll_table".
*
* Waits for read readiness.
- * /sys/kernel/security/tomoyo/query is handled by /usr/sbin/tomoyo-queryd .
+ * /sys/kernel/security/tomoyo/query is handled by /usr/sbin/tomoyo-queryd and
+ * /sys/kernel/security/tomoyo/audit is handled by /usr/sbin/tomoyo-auditd.
*/
int tomoyo_poll_control(struct file *file, poll_table *wait)
{
@@ -1928,21 +2318,58 @@
}
/**
+ * tomoyo_set_namespace_cursor - Set namespace to read.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns nothing.
+ */
+static inline void tomoyo_set_namespace_cursor(struct tomoyo_io_buffer *head)
+{
+ struct list_head *ns;
+ if (head->type != TOMOYO_EXCEPTIONPOLICY &&
+ head->type != TOMOYO_PROFILE)
+ return;
+ /*
+ * If this is the first read, or reading previous namespace finished
+ * and has more namespaces to read, update the namespace cursor.
+ */
+ ns = head->r.ns;
+ if (!ns || (head->r.eof && ns->next != &tomoyo_namespace_list)) {
+ /* Clearing is OK because tomoyo_flush() returned true. */
+ memset(&head->r, 0, sizeof(head->r));
+ head->r.ns = ns ? ns->next : tomoyo_namespace_list.next;
+ }
+}
+
+/**
+ * tomoyo_has_more_namespace - Check for unread namespaces.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ *
+ * Returns true if we have more entries to print, false otherwise.
+ */
+static inline bool tomoyo_has_more_namespace(struct tomoyo_io_buffer *head)
+{
+ return (head->type == TOMOYO_EXCEPTIONPOLICY ||
+ head->type == TOMOYO_PROFILE) && head->r.eof &&
+ head->r.ns->next != &tomoyo_namespace_list;
+}
+
+/**
* tomoyo_read_control - read() for /sys/kernel/security/tomoyo/ interface.
*
- * @file: Pointer to "struct file".
+ * @head: Pointer to "struct tomoyo_io_buffer".
* @buffer: Poiner to buffer to write to.
* @buffer_len: Size of @buffer.
*
* Returns bytes read on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
*/
-int tomoyo_read_control(struct file *file, char __user *buffer,
- const int buffer_len)
+ssize_t tomoyo_read_control(struct tomoyo_io_buffer *head, char __user *buffer,
+ const int buffer_len)
{
int len;
- struct tomoyo_io_buffer *head = file->private_data;
+ int idx;
if (!head->read)
return -ENOSYS;
@@ -1950,64 +2377,156 @@
return -EINTR;
head->read_user_buf = buffer;
head->read_user_buf_avail = buffer_len;
+ idx = tomoyo_read_lock();
if (tomoyo_flush(head))
/* Call the policy handler. */
- head->read(head);
- tomoyo_flush(head);
+ do {
+ tomoyo_set_namespace_cursor(head);
+ head->read(head);
+ } while (tomoyo_flush(head) &&
+ tomoyo_has_more_namespace(head));
+ tomoyo_read_unlock(idx);
len = head->read_user_buf - buffer;
mutex_unlock(&head->io_sem);
return len;
}
/**
+ * tomoyo_parse_policy - Parse a policy line.
+ *
+ * @head: Poiter to "struct tomoyo_io_buffer".
+ * @line: Line to parse.
+ *
+ * Returns 0 on success, negative value otherwise.
+ *
+ * Caller holds tomoyo_read_lock().
+ */
+static int tomoyo_parse_policy(struct tomoyo_io_buffer *head, char *line)
+{
+ /* Delete request? */
+ head->w.is_delete = !strncmp(line, "delete ", 7);
+ if (head->w.is_delete)
+ memmove(line, line + 7, strlen(line + 7) + 1);
+ /* Selecting namespace to update. */
+ if (head->type == TOMOYO_EXCEPTIONPOLICY ||
+ head->type == TOMOYO_PROFILE) {
+ if (*line == '<') {
+ char *cp = strchr(line, ' ');
+ if (cp) {
+ *cp++ = '\0';
+ head->w.ns = tomoyo_assign_namespace(line);
+ memmove(line, cp, strlen(cp) + 1);
+ } else
+ head->w.ns = NULL;
+ } else
+ head->w.ns = &tomoyo_kernel_namespace;
+ /* Don't allow updating if namespace is invalid. */
+ if (!head->w.ns)
+ return -ENOENT;
+ }
+ /* Do the update. */
+ return head->write(head);
+}
+
+/**
* tomoyo_write_control - write() for /sys/kernel/security/tomoyo/ interface.
*
- * @file: Pointer to "struct file".
+ * @head: Pointer to "struct tomoyo_io_buffer".
* @buffer: Pointer to buffer to read from.
* @buffer_len: Size of @buffer.
*
* Returns @buffer_len on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
*/
-int tomoyo_write_control(struct file *file, const char __user *buffer,
- const int buffer_len)
+ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head,
+ const char __user *buffer, const int buffer_len)
{
- struct tomoyo_io_buffer *head = file->private_data;
int error = buffer_len;
- int avail_len = buffer_len;
+ size_t avail_len = buffer_len;
char *cp0 = head->write_buf;
-
+ int idx;
if (!head->write)
return -ENOSYS;
if (!access_ok(VERIFY_READ, buffer, buffer_len))
return -EFAULT;
- /* Don't allow updating policies by non manager programs. */
- if (head->write != tomoyo_write_pid &&
- head->write != tomoyo_write_domain && !tomoyo_manager())
- return -EPERM;
if (mutex_lock_interruptible(&head->io_sem))
return -EINTR;
+ idx = tomoyo_read_lock();
/* Read a line and dispatch it to the policy handler. */
while (avail_len > 0) {
char c;
- if (head->write_avail >= head->writebuf_size - 1) {
- error = -ENOMEM;
- break;
- } else if (get_user(c, buffer)) {
+ if (head->w.avail >= head->writebuf_size - 1) {
+ const int len = head->writebuf_size * 2;
+ char *cp = kzalloc(len, GFP_NOFS);
+ if (!cp) {
+ error = -ENOMEM;
+ break;
+ }
+ memmove(cp, cp0, head->w.avail);
+ kfree(cp0);
+ head->write_buf = cp;
+ cp0 = cp;
+ head->writebuf_size = len;
+ }
+ if (get_user(c, buffer)) {
error = -EFAULT;
break;
}
buffer++;
avail_len--;
- cp0[head->write_avail++] = c;
+ cp0[head->w.avail++] = c;
if (c != '\n')
continue;
- cp0[head->write_avail - 1] = '\0';
- head->write_avail = 0;
+ cp0[head->w.avail - 1] = '\0';
+ head->w.avail = 0;
tomoyo_normalize_line(cp0);
- head->write(head);
+ if (!strcmp(cp0, "reset")) {
+ head->w.ns = &tomoyo_kernel_namespace;
+ head->w.domain = NULL;
+ memset(&head->r, 0, sizeof(head->r));
+ continue;
+ }
+ /* Don't allow updating policies by non manager programs. */
+ switch (head->type) {
+ case TOMOYO_PROCESS_STATUS:
+ /* This does not write anything. */
+ break;
+ case TOMOYO_DOMAINPOLICY:
+ if (tomoyo_select_domain(head, cp0))
+ continue;
+ /* fall through */
+ case TOMOYO_EXCEPTIONPOLICY:
+ if (!strcmp(cp0, "select transition_only")) {
+ head->r.print_transition_related_only = true;
+ continue;
+ }
+ /* fall through */
+ default:
+ if (!tomoyo_manager()) {
+ error = -EPERM;
+ goto out;
+ }
+ }
+ switch (tomoyo_parse_policy(head, cp0)) {
+ case -EPERM:
+ error = -EPERM;
+ goto out;
+ case 0:
+ switch (head->type) {
+ case TOMOYO_DOMAINPOLICY:
+ case TOMOYO_EXCEPTIONPOLICY:
+ case TOMOYO_STAT:
+ case TOMOYO_PROFILE:
+ case TOMOYO_MANAGER:
+ tomoyo_update_stat(TOMOYO_STAT_POLICY_UPDATES);
+ break;
+ default:
+ break;
+ }
+ break;
+ }
}
+out:
+ tomoyo_read_unlock(idx);
mutex_unlock(&head->io_sem);
return error;
}
@@ -2015,35 +2534,20 @@
/**
* tomoyo_close_control - close() for /sys/kernel/security/tomoyo/ interface.
*
- * @file: Pointer to "struct file".
+ * @head: Pointer to "struct tomoyo_io_buffer".
*
- * Releases memory and returns 0.
- *
- * Caller looses tomoyo_read_lock().
+ * Returns 0.
*/
-int tomoyo_close_control(struct file *file)
+int tomoyo_close_control(struct tomoyo_io_buffer *head)
{
- struct tomoyo_io_buffer *head = file->private_data;
- const bool is_write = !!head->write_buf;
-
/*
* If the file is /sys/kernel/security/tomoyo/query , decrement the
* observer counter.
*/
- if (head->type == TOMOYO_QUERY)
- atomic_dec(&tomoyo_query_observers);
- else
- tomoyo_read_unlock(head->reader_idx);
- /* Release memory used for policy I/O. */
- kfree(head->read_buf);
- head->read_buf = NULL;
- kfree(head->write_buf);
- head->write_buf = NULL;
- kfree(head);
- head = NULL;
- file->private_data = NULL;
- if (is_write)
- tomoyo_run_gc();
+ if (head->type == TOMOYO_QUERY &&
+ atomic_dec_and_test(&tomoyo_query_observers))
+ wake_up_all(&tomoyo_answer_wait);
+ tomoyo_notify_gc(head, false);
return 0;
}
@@ -2055,27 +2559,90 @@
struct tomoyo_domain_info *domain;
const int idx = tomoyo_read_lock();
tomoyo_policy_loaded = true;
- /* Check all profiles currently assigned to domains are defined. */
+ printk(KERN_INFO "TOMOYO: 2.4.0\n");
list_for_each_entry_rcu(domain, &tomoyo_domain_list, list) {
const u8 profile = domain->profile;
- if (tomoyo_profile_ptr[profile])
+ const struct tomoyo_policy_namespace *ns = domain->ns;
+ if (ns->profile_version != 20100903)
+ printk(KERN_ERR
+ "Profile version %u is not supported.\n",
+ ns->profile_version);
+ else if (!ns->profile_ptr[profile])
+ printk(KERN_ERR
+ "Profile %u (used by '%s') is not defined.\n",
+ profile, domain->domainname->name);
+ else
continue;
- printk(KERN_ERR "You need to define profile %u before using it.\n",
- profile);
- printk(KERN_ERR "Please see http://tomoyo.sourceforge.jp/2.3/ "
+ printk(KERN_ERR
+ "Userland tools for TOMOYO 2.4 must be installed and "
+ "policy must be initialized.\n");
+ printk(KERN_ERR "Please see http://tomoyo.sourceforge.jp/2.4/ "
"for more information.\n");
- panic("Profile %u (used by '%s') not defined.\n",
- profile, domain->domainname->name);
+ panic("STOP!");
}
tomoyo_read_unlock(idx);
- if (tomoyo_profile_version != 20090903) {
- printk(KERN_ERR "You need to install userland programs for "
- "TOMOYO 2.3 and initialize policy configuration.\n");
- printk(KERN_ERR "Please see http://tomoyo.sourceforge.jp/2.3/ "
- "for more information.\n");
- panic("Profile version %u is not supported.\n",
- tomoyo_profile_version);
- }
- printk(KERN_INFO "TOMOYO: 2.3.0\n");
printk(KERN_INFO "Mandatory Access Control activated.\n");
}
+
+/**
+ * tomoyo_load_builtin_policy - Load built-in policy.
+ *
+ * Returns nothing.
+ */
+void __init tomoyo_load_builtin_policy(void)
+{
+ /*
+ * This include file is manually created and contains built-in policy
+ * named "tomoyo_builtin_profile", "tomoyo_builtin_exception_policy",
+ * "tomoyo_builtin_domain_policy", "tomoyo_builtin_manager",
+ * "tomoyo_builtin_stat" in the form of "static char [] __initdata".
+ */
+#include "builtin-policy.h"
+ u8 i;
+ const int idx = tomoyo_read_lock();
+ for (i = 0; i < 5; i++) {
+ struct tomoyo_io_buffer head = { };
+ char *start = "";
+ switch (i) {
+ case 0:
+ start = tomoyo_builtin_profile;
+ head.type = TOMOYO_PROFILE;
+ head.write = tomoyo_write_profile;
+ break;
+ case 1:
+ start = tomoyo_builtin_exception_policy;
+ head.type = TOMOYO_EXCEPTIONPOLICY;
+ head.write = tomoyo_write_exception;
+ break;
+ case 2:
+ start = tomoyo_builtin_domain_policy;
+ head.type = TOMOYO_DOMAINPOLICY;
+ head.write = tomoyo_write_domain;
+ break;
+ case 3:
+ start = tomoyo_builtin_manager;
+ head.type = TOMOYO_MANAGER;
+ head.write = tomoyo_write_manager;
+ break;
+ case 4:
+ start = tomoyo_builtin_stat;
+ head.type = TOMOYO_STAT;
+ head.write = tomoyo_write_stat;
+ break;
+ }
+ while (1) {
+ char *end = strchr(start, '\n');
+ if (!end)
+ break;
+ *end = '\0';
+ tomoyo_normalize_line(start);
+ head.write_buf = start;
+ tomoyo_parse_policy(&head, start);
+ start = end + 1;
+ }
+ }
+ tomoyo_read_unlock(idx);
+#ifdef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
+ tomoyo_check_profile();
+#endif
+}
diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h
index 7c66bd8..f7fbaa6 100644
--- a/security/tomoyo/common.h
+++ b/security/tomoyo/common.h
@@ -21,7 +21,8 @@
#include <linux/list.h>
#include <linux/cred.h>
#include <linux/poll.h>
-struct linux_binprm;
+#include <linux/binfmts.h>
+#include <linux/highmem.h>
/********** Constants definitions. **********/
@@ -38,66 +39,149 @@
/* Profile number is an integer between 0 and 255. */
#define TOMOYO_MAX_PROFILES 256
+/* Group number is an integer between 0 and 255. */
+#define TOMOYO_MAX_ACL_GROUPS 256
+
+/* Index numbers for "struct tomoyo_condition". */
+enum tomoyo_conditions_index {
+ TOMOYO_TASK_UID, /* current_uid() */
+ TOMOYO_TASK_EUID, /* current_euid() */
+ TOMOYO_TASK_SUID, /* current_suid() */
+ TOMOYO_TASK_FSUID, /* current_fsuid() */
+ TOMOYO_TASK_GID, /* current_gid() */
+ TOMOYO_TASK_EGID, /* current_egid() */
+ TOMOYO_TASK_SGID, /* current_sgid() */
+ TOMOYO_TASK_FSGID, /* current_fsgid() */
+ TOMOYO_TASK_PID, /* sys_getpid() */
+ TOMOYO_TASK_PPID, /* sys_getppid() */
+ TOMOYO_EXEC_ARGC, /* "struct linux_binprm *"->argc */
+ TOMOYO_EXEC_ENVC, /* "struct linux_binprm *"->envc */
+ TOMOYO_TYPE_IS_SOCKET, /* S_IFSOCK */
+ TOMOYO_TYPE_IS_SYMLINK, /* S_IFLNK */
+ TOMOYO_TYPE_IS_FILE, /* S_IFREG */
+ TOMOYO_TYPE_IS_BLOCK_DEV, /* S_IFBLK */
+ TOMOYO_TYPE_IS_DIRECTORY, /* S_IFDIR */
+ TOMOYO_TYPE_IS_CHAR_DEV, /* S_IFCHR */
+ TOMOYO_TYPE_IS_FIFO, /* S_IFIFO */
+ TOMOYO_MODE_SETUID, /* S_ISUID */
+ TOMOYO_MODE_SETGID, /* S_ISGID */
+ TOMOYO_MODE_STICKY, /* S_ISVTX */
+ TOMOYO_MODE_OWNER_READ, /* S_IRUSR */
+ TOMOYO_MODE_OWNER_WRITE, /* S_IWUSR */
+ TOMOYO_MODE_OWNER_EXECUTE, /* S_IXUSR */
+ TOMOYO_MODE_GROUP_READ, /* S_IRGRP */
+ TOMOYO_MODE_GROUP_WRITE, /* S_IWGRP */
+ TOMOYO_MODE_GROUP_EXECUTE, /* S_IXGRP */
+ TOMOYO_MODE_OTHERS_READ, /* S_IROTH */
+ TOMOYO_MODE_OTHERS_WRITE, /* S_IWOTH */
+ TOMOYO_MODE_OTHERS_EXECUTE, /* S_IXOTH */
+ TOMOYO_EXEC_REALPATH,
+ TOMOYO_SYMLINK_TARGET,
+ TOMOYO_PATH1_UID,
+ TOMOYO_PATH1_GID,
+ TOMOYO_PATH1_INO,
+ TOMOYO_PATH1_MAJOR,
+ TOMOYO_PATH1_MINOR,
+ TOMOYO_PATH1_PERM,
+ TOMOYO_PATH1_TYPE,
+ TOMOYO_PATH1_DEV_MAJOR,
+ TOMOYO_PATH1_DEV_MINOR,
+ TOMOYO_PATH2_UID,
+ TOMOYO_PATH2_GID,
+ TOMOYO_PATH2_INO,
+ TOMOYO_PATH2_MAJOR,
+ TOMOYO_PATH2_MINOR,
+ TOMOYO_PATH2_PERM,
+ TOMOYO_PATH2_TYPE,
+ TOMOYO_PATH2_DEV_MAJOR,
+ TOMOYO_PATH2_DEV_MINOR,
+ TOMOYO_PATH1_PARENT_UID,
+ TOMOYO_PATH1_PARENT_GID,
+ TOMOYO_PATH1_PARENT_INO,
+ TOMOYO_PATH1_PARENT_PERM,
+ TOMOYO_PATH2_PARENT_UID,
+ TOMOYO_PATH2_PARENT_GID,
+ TOMOYO_PATH2_PARENT_INO,
+ TOMOYO_PATH2_PARENT_PERM,
+ TOMOYO_MAX_CONDITION_KEYWORD,
+ TOMOYO_NUMBER_UNION,
+ TOMOYO_NAME_UNION,
+ TOMOYO_ARGV_ENTRY,
+ TOMOYO_ENVP_ENTRY,
+};
+
+
+/* Index numbers for stat(). */
+enum tomoyo_path_stat_index {
+ /* Do not change this order. */
+ TOMOYO_PATH1,
+ TOMOYO_PATH1_PARENT,
+ TOMOYO_PATH2,
+ TOMOYO_PATH2_PARENT,
+ TOMOYO_MAX_PATH_STAT
+};
+
+/* Index numbers for operation mode. */
enum tomoyo_mode_index {
TOMOYO_CONFIG_DISABLED,
TOMOYO_CONFIG_LEARNING,
TOMOYO_CONFIG_PERMISSIVE,
TOMOYO_CONFIG_ENFORCING,
- TOMOYO_CONFIG_USE_DEFAULT = 255
+ TOMOYO_CONFIG_MAX_MODE,
+ TOMOYO_CONFIG_WANT_REJECT_LOG = 64,
+ TOMOYO_CONFIG_WANT_GRANT_LOG = 128,
+ TOMOYO_CONFIG_USE_DEFAULT = 255,
};
+/* Index numbers for entry type. */
enum tomoyo_policy_id {
TOMOYO_ID_GROUP,
TOMOYO_ID_PATH_GROUP,
TOMOYO_ID_NUMBER_GROUP,
TOMOYO_ID_TRANSITION_CONTROL,
TOMOYO_ID_AGGREGATOR,
- TOMOYO_ID_GLOBALLY_READABLE,
- TOMOYO_ID_PATTERN,
- TOMOYO_ID_NO_REWRITE,
TOMOYO_ID_MANAGER,
+ TOMOYO_ID_CONDITION,
TOMOYO_ID_NAME,
TOMOYO_ID_ACL,
TOMOYO_ID_DOMAIN,
TOMOYO_MAX_POLICY
};
+/* Index numbers for domain's attributes. */
+enum tomoyo_domain_info_flags_index {
+ /* Quota warnning flag. */
+ TOMOYO_DIF_QUOTA_WARNED,
+ /*
+ * This domain was unable to create a new domain at
+ * tomoyo_find_next_domain() because the name of the domain to be
+ * created was too long or it could not allocate memory.
+ * More than one process continued execve() without domain transition.
+ */
+ TOMOYO_DIF_TRANSITION_FAILED,
+ TOMOYO_MAX_DOMAIN_INFO_FLAGS
+};
+
+/* Index numbers for group entries. */
enum tomoyo_group_id {
TOMOYO_PATH_GROUP,
TOMOYO_NUMBER_GROUP,
TOMOYO_MAX_GROUP
};
-/* Keywords for ACLs. */
-#define TOMOYO_KEYWORD_AGGREGATOR "aggregator "
-#define TOMOYO_KEYWORD_ALLOW_MOUNT "allow_mount "
-#define TOMOYO_KEYWORD_ALLOW_READ "allow_read "
-#define TOMOYO_KEYWORD_DELETE "delete "
-#define TOMOYO_KEYWORD_DENY_REWRITE "deny_rewrite "
-#define TOMOYO_KEYWORD_FILE_PATTERN "file_pattern "
-#define TOMOYO_KEYWORD_INITIALIZE_DOMAIN "initialize_domain "
-#define TOMOYO_KEYWORD_KEEP_DOMAIN "keep_domain "
-#define TOMOYO_KEYWORD_NO_INITIALIZE_DOMAIN "no_initialize_domain "
-#define TOMOYO_KEYWORD_NO_KEEP_DOMAIN "no_keep_domain "
-#define TOMOYO_KEYWORD_PATH_GROUP "path_group "
-#define TOMOYO_KEYWORD_NUMBER_GROUP "number_group "
-#define TOMOYO_KEYWORD_SELECT "select "
-#define TOMOYO_KEYWORD_USE_PROFILE "use_profile "
-#define TOMOYO_KEYWORD_IGNORE_GLOBAL_ALLOW_READ "ignore_global_allow_read"
-#define TOMOYO_KEYWORD_QUOTA_EXCEEDED "quota_exceeded"
-#define TOMOYO_KEYWORD_TRANSITION_FAILED "transition_failed"
-/* A domain definition starts with <kernel>. */
-#define TOMOYO_ROOT_NAME "<kernel>"
-#define TOMOYO_ROOT_NAME_LEN (sizeof(TOMOYO_ROOT_NAME) - 1)
+/* Index numbers for type of numeric values. */
+enum tomoyo_value_type {
+ TOMOYO_VALUE_TYPE_INVALID,
+ TOMOYO_VALUE_TYPE_DECIMAL,
+ TOMOYO_VALUE_TYPE_OCTAL,
+ TOMOYO_VALUE_TYPE_HEXADECIMAL,
+};
-/* Value type definition. */
-#define TOMOYO_VALUE_TYPE_INVALID 0
-#define TOMOYO_VALUE_TYPE_DECIMAL 1
-#define TOMOYO_VALUE_TYPE_OCTAL 2
-#define TOMOYO_VALUE_TYPE_HEXADECIMAL 3
-
+/* Index numbers for domain transition control keywords. */
enum tomoyo_transition_type {
/* Do not change this order, */
+ TOMOYO_TRANSITION_CONTROL_NO_RESET,
+ TOMOYO_TRANSITION_CONTROL_RESET,
TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE,
TOMOYO_TRANSITION_CONTROL_INITIALIZE,
TOMOYO_TRANSITION_CONTROL_NO_KEEP,
@@ -114,35 +198,29 @@
TOMOYO_TYPE_MOUNT_ACL,
};
-/* Index numbers for File Controls. */
-
-/*
- * TOMOYO_TYPE_READ_WRITE is special. TOMOYO_TYPE_READ_WRITE is automatically
- * set if both TOMOYO_TYPE_READ and TOMOYO_TYPE_WRITE are set.
- * Both TOMOYO_TYPE_READ and TOMOYO_TYPE_WRITE are automatically set if
- * TOMOYO_TYPE_READ_WRITE is set.
- * TOMOYO_TYPE_READ_WRITE is automatically cleared if either TOMOYO_TYPE_READ
- * or TOMOYO_TYPE_WRITE is cleared.
- * Both TOMOYO_TYPE_READ and TOMOYO_TYPE_WRITE are automatically cleared if
- * TOMOYO_TYPE_READ_WRITE is cleared.
- */
-
+/* Index numbers for access controls with one pathname. */
enum tomoyo_path_acl_index {
- TOMOYO_TYPE_READ_WRITE,
TOMOYO_TYPE_EXECUTE,
TOMOYO_TYPE_READ,
TOMOYO_TYPE_WRITE,
+ TOMOYO_TYPE_APPEND,
TOMOYO_TYPE_UNLINK,
+ TOMOYO_TYPE_GETATTR,
TOMOYO_TYPE_RMDIR,
TOMOYO_TYPE_TRUNCATE,
TOMOYO_TYPE_SYMLINK,
- TOMOYO_TYPE_REWRITE,
TOMOYO_TYPE_CHROOT,
TOMOYO_TYPE_UMOUNT,
TOMOYO_MAX_PATH_OPERATION
};
-#define TOMOYO_RW_MASK ((1 << TOMOYO_TYPE_READ) | (1 << TOMOYO_TYPE_WRITE))
+/* Index numbers for /sys/kernel/security/tomoyo/stat interface. */
+enum tomoyo_memory_stat_type {
+ TOMOYO_MEMORY_POLICY,
+ TOMOYO_MEMORY_AUDIT,
+ TOMOYO_MEMORY_QUERY,
+ TOMOYO_MAX_MEMORY_STAT
+};
enum tomoyo_mkdev_acl_index {
TOMOYO_TYPE_MKBLOCK,
@@ -150,6 +228,7 @@
TOMOYO_MAX_MKDEV_OPERATION
};
+/* Index numbers for access controls with two pathnames. */
enum tomoyo_path2_acl_index {
TOMOYO_TYPE_LINK,
TOMOYO_TYPE_RENAME,
@@ -157,6 +236,7 @@
TOMOYO_MAX_PATH2_OPERATION
};
+/* Index numbers for access controls with one pathname and one number. */
enum tomoyo_path_number_acl_index {
TOMOYO_TYPE_CREATE,
TOMOYO_TYPE_MKDIR,
@@ -169,31 +249,45 @@
TOMOYO_MAX_PATH_NUMBER_OPERATION
};
+/* Index numbers for /sys/kernel/security/tomoyo/ interfaces. */
enum tomoyo_securityfs_interface_index {
TOMOYO_DOMAINPOLICY,
TOMOYO_EXCEPTIONPOLICY,
- TOMOYO_DOMAIN_STATUS,
TOMOYO_PROCESS_STATUS,
- TOMOYO_MEMINFO,
+ TOMOYO_STAT,
TOMOYO_SELFDOMAIN,
+ TOMOYO_AUDIT,
TOMOYO_VERSION,
TOMOYO_PROFILE,
TOMOYO_QUERY,
TOMOYO_MANAGER
};
+/* Index numbers for special mount operations. */
+enum tomoyo_special_mount {
+ TOMOYO_MOUNT_BIND, /* mount --bind /source /dest */
+ TOMOYO_MOUNT_MOVE, /* mount --move /old /new */
+ TOMOYO_MOUNT_REMOUNT, /* mount -o remount /dir */
+ TOMOYO_MOUNT_MAKE_UNBINDABLE, /* mount --make-unbindable /dir */
+ TOMOYO_MOUNT_MAKE_PRIVATE, /* mount --make-private /dir */
+ TOMOYO_MOUNT_MAKE_SLAVE, /* mount --make-slave /dir */
+ TOMOYO_MOUNT_MAKE_SHARED, /* mount --make-shared /dir */
+ TOMOYO_MAX_SPECIAL_MOUNT
+};
+
+/* Index numbers for functionality. */
enum tomoyo_mac_index {
TOMOYO_MAC_FILE_EXECUTE,
TOMOYO_MAC_FILE_OPEN,
TOMOYO_MAC_FILE_CREATE,
TOMOYO_MAC_FILE_UNLINK,
+ TOMOYO_MAC_FILE_GETATTR,
TOMOYO_MAC_FILE_MKDIR,
TOMOYO_MAC_FILE_RMDIR,
TOMOYO_MAC_FILE_MKFIFO,
TOMOYO_MAC_FILE_MKSOCK,
TOMOYO_MAC_FILE_TRUNCATE,
TOMOYO_MAC_FILE_SYMLINK,
- TOMOYO_MAC_FILE_REWRITE,
TOMOYO_MAC_FILE_MKBLOCK,
TOMOYO_MAC_FILE_MKCHAR,
TOMOYO_MAC_FILE_LINK,
@@ -209,38 +303,66 @@
TOMOYO_MAX_MAC_INDEX
};
+/* Index numbers for category of functionality. */
enum tomoyo_mac_category_index {
TOMOYO_MAC_CATEGORY_FILE,
TOMOYO_MAX_MAC_CATEGORY_INDEX
};
-#define TOMOYO_RETRY_REQUEST 1 /* Retry this request. */
+/*
+ * Retry this request. Returned by tomoyo_supervisor() if policy violation has
+ * occurred in enforcing mode and the userspace daemon decided to retry.
+ *
+ * We must choose a positive value in order to distinguish "granted" (which is
+ * 0) and "rejected" (which is a negative value) and "retry".
+ */
+#define TOMOYO_RETRY_REQUEST 1
+
+/* Index numbers for /sys/kernel/security/tomoyo/stat interface. */
+enum tomoyo_policy_stat_type {
+ /* Do not change this order. */
+ TOMOYO_STAT_POLICY_UPDATES,
+ TOMOYO_STAT_POLICY_LEARNING, /* == TOMOYO_CONFIG_LEARNING */
+ TOMOYO_STAT_POLICY_PERMISSIVE, /* == TOMOYO_CONFIG_PERMISSIVE */
+ TOMOYO_STAT_POLICY_ENFORCING, /* == TOMOYO_CONFIG_ENFORCING */
+ TOMOYO_MAX_POLICY_STAT
+};
+
+/* Index numbers for profile's PREFERENCE values. */
+enum tomoyo_pref_index {
+ TOMOYO_PREF_MAX_AUDIT_LOG,
+ TOMOYO_PREF_MAX_LEARNING_ENTRY,
+ TOMOYO_MAX_PREF
+};
/********** Structure definitions. **********/
-/*
- * tomoyo_acl_head is a structure which is used for holding elements not in
- * domain policy.
- * It has following fields.
- *
- * (1) "list" which is linked to tomoyo_policy_list[] .
- * (2) "is_deleted" is a bool which is true if marked as deleted, false
- * otherwise.
- */
+/* Common header for holding ACL entries. */
struct tomoyo_acl_head {
struct list_head list;
bool is_deleted;
} __packed;
-/*
- * tomoyo_request_info is a structure which is used for holding
- *
- * (1) Domain information of current process.
- * (2) How many retries are made for this request.
- * (3) Profile number used for this request.
- * (4) Access control mode of the profile.
- */
+/* Common header for shared entries. */
+struct tomoyo_shared_acl_head {
+ struct list_head list;
+ atomic_t users;
+} __packed;
+
+struct tomoyo_policy_namespace;
+
+/* Structure for request info. */
struct tomoyo_request_info {
+ /*
+ * For holding parameters specific to operations which deal files.
+ * NULL if not dealing files.
+ */
+ struct tomoyo_obj_info *obj;
+ /*
+ * For holding parameters specific to execve() request.
+ * NULL if not dealing do_execve().
+ */
+ struct tomoyo_execve *ee;
struct tomoyo_domain_info *domain;
/* For holding parameters. */
union {
@@ -248,11 +370,13 @@
const struct tomoyo_path_info *filename;
/* For using wildcards at tomoyo_find_next_domain(). */
const struct tomoyo_path_info *matched_path;
+ /* One of values in "enum tomoyo_path_acl_index". */
u8 operation;
} path;
struct {
const struct tomoyo_path_info *filename1;
const struct tomoyo_path_info *filename2;
+ /* One of values in "enum tomoyo_path2_acl_index". */
u8 operation;
} path2;
struct {
@@ -260,11 +384,16 @@
unsigned int mode;
unsigned int major;
unsigned int minor;
+ /* One of values in "enum tomoyo_mkdev_acl_index". */
u8 operation;
} mkdev;
struct {
const struct tomoyo_path_info *filename;
unsigned long number;
+ /*
+ * One of values in
+ * "enum tomoyo_path_number_acl_index".
+ */
u8 operation;
} path_number;
struct {
@@ -283,26 +412,7 @@
u8 type;
};
-/*
- * tomoyo_path_info is a structure which is used for holding a string data
- * used by TOMOYO.
- * This structure has several fields for supporting pattern matching.
- *
- * (1) "name" is the '\0' terminated string data.
- * (2) "hash" is full_name_hash(name, strlen(name)).
- * This allows tomoyo_pathcmp() to compare by hash before actually compare
- * using strcmp().
- * (3) "const_len" is the length of the initial segment of "name" which
- * consists entirely of non wildcard characters. In other words, the length
- * which we can compare two strings using strncmp().
- * (4) "is_dir" is a bool which is true if "name" ends with "/",
- * false otherwise.
- * TOMOYO distinguishes directory and non-directory. A directory ends with
- * "/" and non-directory does not end with "/".
- * (5) "is_patterned" is a bool which is true if "name" contains wildcard
- * characters, false otherwise. This allows TOMOYO to use "hash" and
- * strcmp() for string comparison if "is_patterned" is false.
- */
+/* Structure for holding a token. */
struct tomoyo_path_info {
const char *name;
u32 hash; /* = full_name_hash(name, strlen(name)) */
@@ -311,36 +421,32 @@
bool is_patterned; /* = tomoyo_path_contains_pattern(name) */
};
-/*
- * tomoyo_name is a structure which is used for linking
- * "struct tomoyo_path_info" into tomoyo_name_list .
- */
+/* Structure for holding string data. */
struct tomoyo_name {
- struct list_head list;
- atomic_t users;
+ struct tomoyo_shared_acl_head head;
struct tomoyo_path_info entry;
};
+/* Structure for holding a word. */
struct tomoyo_name_union {
+ /* Either @filename or @group is NULL. */
const struct tomoyo_path_info *filename;
struct tomoyo_group *group;
- u8 is_group;
};
+/* Structure for holding a number. */
struct tomoyo_number_union {
unsigned long values[2];
- struct tomoyo_group *group;
- u8 min_type;
- u8 max_type;
- u8 is_group;
+ struct tomoyo_group *group; /* Maybe NULL. */
+ /* One of values in "enum tomoyo_value_type". */
+ u8 value_type[2];
};
/* Structure for "path_group"/"number_group" directive. */
struct tomoyo_group {
- struct list_head list;
+ struct tomoyo_shared_acl_head head;
const struct tomoyo_path_info *group_name;
struct list_head member_list;
- atomic_t users;
};
/* Structure for "path_group" directive. */
@@ -355,130 +461,158 @@
struct tomoyo_number_union number;
};
-/*
- * tomoyo_acl_info is a structure which is used for holding
- *
- * (1) "list" which is linked to the ->acl_info_list of
- * "struct tomoyo_domain_info"
- * (2) "is_deleted" is a bool which is true if this domain is marked as
- * "deleted", false otherwise.
- * (3) "type" which tells type of the entry.
- *
- * Packing "struct tomoyo_acl_info" allows
- * "struct tomoyo_path_acl" to embed "u16" and "struct tomoyo_path2_acl"
- * "struct tomoyo_path_number_acl" "struct tomoyo_mkdev_acl" to embed
- * "u8" without enlarging their structure size.
- */
+/* Subset of "struct stat". Used by conditional ACL and audit logs. */
+struct tomoyo_mini_stat {
+ uid_t uid;
+ gid_t gid;
+ ino_t ino;
+ mode_t mode;
+ dev_t dev;
+ dev_t rdev;
+};
+
+/* Structure for dumping argv[] and envp[] of "struct linux_binprm". */
+struct tomoyo_page_dump {
+ struct page *page; /* Previously dumped page. */
+ char *data; /* Contents of "page". Size is PAGE_SIZE. */
+};
+
+/* Structure for attribute checks in addition to pathname checks. */
+struct tomoyo_obj_info {
+ /*
+ * True if tomoyo_get_attributes() was already called, false otherwise.
+ */
+ bool validate_done;
+ /* True if @stat[] is valid. */
+ bool stat_valid[TOMOYO_MAX_PATH_STAT];
+ /* First pathname. Initialized with { NULL, NULL } if no path. */
+ struct path path1;
+ /* Second pathname. Initialized with { NULL, NULL } if no path. */
+ struct path path2;
+ /*
+ * Information on @path1, @path1's parent directory, @path2, @path2's
+ * parent directory.
+ */
+ struct tomoyo_mini_stat stat[TOMOYO_MAX_PATH_STAT];
+ /*
+ * Content of symbolic link to be created. NULL for operations other
+ * than symlink().
+ */
+ struct tomoyo_path_info *symlink_target;
+};
+
+/* Structure for argv[]. */
+struct tomoyo_argv {
+ unsigned long index;
+ const struct tomoyo_path_info *value;
+ bool is_not;
+};
+
+/* Structure for envp[]. */
+struct tomoyo_envp {
+ const struct tomoyo_path_info *name;
+ const struct tomoyo_path_info *value;
+ bool is_not;
+};
+
+/* Structure for execve() operation. */
+struct tomoyo_execve {
+ struct tomoyo_request_info r;
+ struct tomoyo_obj_info obj;
+ struct linux_binprm *bprm;
+ /* For dumping argv[] and envp[]. */
+ struct tomoyo_page_dump dump;
+ /* For temporary use. */
+ char *tmp; /* Size is TOMOYO_EXEC_TMPSIZE bytes */
+};
+
+/* Structure for entries which follows "struct tomoyo_condition". */
+struct tomoyo_condition_element {
+ /*
+ * Left hand operand. A "struct tomoyo_argv" for TOMOYO_ARGV_ENTRY, a
+ * "struct tomoyo_envp" for TOMOYO_ENVP_ENTRY is attached to the tail
+ * of the array of this struct.
+ */
+ u8 left;
+ /*
+ * Right hand operand. A "struct tomoyo_number_union" for
+ * TOMOYO_NUMBER_UNION, a "struct tomoyo_name_union" for
+ * TOMOYO_NAME_UNION is attached to the tail of the array of this
+ * struct.
+ */
+ u8 right;
+ /* Equation operator. True if equals or overlaps, false otherwise. */
+ bool equals;
+};
+
+/* Structure for optional arguments. */
+struct tomoyo_condition {
+ struct tomoyo_shared_acl_head head;
+ u32 size; /* Memory size allocated for this entry. */
+ u16 condc; /* Number of conditions in this struct. */
+ u16 numbers_count; /* Number of "struct tomoyo_number_union values". */
+ u16 names_count; /* Number of "struct tomoyo_name_union names". */
+ u16 argc; /* Number of "struct tomoyo_argv". */
+ u16 envc; /* Number of "struct tomoyo_envp". */
+ /*
+ * struct tomoyo_condition_element condition[condc];
+ * struct tomoyo_number_union values[numbers_count];
+ * struct tomoyo_name_union names[names_count];
+ * struct tomoyo_argv argv[argc];
+ * struct tomoyo_envp envp[envc];
+ */
+};
+
+/* Common header for individual entries. */
struct tomoyo_acl_info {
struct list_head list;
+ struct tomoyo_condition *cond; /* Maybe NULL. */
bool is_deleted;
- u8 type; /* = one of values in "enum tomoyo_acl_entry_type_index". */
+ u8 type; /* One of values in "enum tomoyo_acl_entry_type_index". */
} __packed;
-/*
- * tomoyo_domain_info is a structure which is used for holding permissions
- * (e.g. "allow_read /lib/libc-2.5.so") given to each domain.
- * It has following fields.
- *
- * (1) "list" which is linked to tomoyo_domain_list .
- * (2) "acl_info_list" which is linked to "struct tomoyo_acl_info".
- * (3) "domainname" which holds the name of the domain.
- * (4) "profile" which remembers profile number assigned to this domain.
- * (5) "is_deleted" is a bool which is true if this domain is marked as
- * "deleted", false otherwise.
- * (6) "quota_warned" is a bool which is used for suppressing warning message
- * when learning mode learned too much entries.
- * (7) "ignore_global_allow_read" is a bool which is true if this domain
- * should ignore "allow_read" directive in exception policy.
- * (8) "transition_failed" is a bool which is set to true when this domain was
- * unable to create a new domain at tomoyo_find_next_domain() because the
- * name of the domain to be created was too long or it could not allocate
- * memory. If set to true, more than one process continued execve()
- * without domain transition.
- * (9) "users" is an atomic_t that holds how many "struct cred"->security
- * are referring this "struct tomoyo_domain_info". If is_deleted == true
- * and users == 0, this struct will be kfree()d upon next garbage
- * collection.
- *
- * A domain's lifecycle is an analogy of files on / directory.
- * Multiple domains with the same domainname cannot be created (as with
- * creating files with the same filename fails with -EEXIST).
- * If a process reached a domain, that process can reside in that domain after
- * that domain is marked as "deleted" (as with a process can access an already
- * open()ed file after that file was unlink()ed).
- */
+/* Structure for domain information. */
struct tomoyo_domain_info {
struct list_head list;
struct list_head acl_info_list;
/* Name of this domain. Never NULL. */
const struct tomoyo_path_info *domainname;
+ /* Namespace for this domain. Never NULL. */
+ struct tomoyo_policy_namespace *ns;
u8 profile; /* Profile number to use. */
+ u8 group; /* Group number to use. */
bool is_deleted; /* Delete flag. */
- bool quota_warned; /* Quota warnning flag. */
- bool ignore_global_allow_read; /* Ignore "allow_read" flag. */
- bool transition_failed; /* Domain transition failed flag. */
+ bool flags[TOMOYO_MAX_DOMAIN_INFO_FLAGS];
atomic_t users; /* Number of referring credentials. */
};
/*
- * tomoyo_path_acl is a structure which is used for holding an
- * entry with one pathname operation (e.g. open(), mkdir()).
- * It has following fields.
- *
- * (1) "head" which is a "struct tomoyo_acl_info".
- * (2) "perm" which is a bitmask of permitted operations.
- * (3) "name" is the pathname.
- *
- * Directives held by this structure are "allow_read/write", "allow_execute",
- * "allow_read", "allow_write", "allow_unlink", "allow_rmdir",
- * "allow_truncate", "allow_symlink", "allow_rewrite", "allow_chroot" and
- * "allow_unmount".
+ * Structure for "file execute", "file read", "file write", "file append",
+ * "file unlink", "file getattr", "file rmdir", "file truncate",
+ * "file symlink", "file chroot" and "file unmount" directive.
*/
struct tomoyo_path_acl {
struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_PATH_ACL */
- u16 perm;
+ u16 perm; /* Bitmask of values in "enum tomoyo_path_acl_index". */
struct tomoyo_name_union name;
};
/*
- * tomoyo_path_number_acl is a structure which is used for holding an
- * entry with one pathname and one number operation.
- * It has following fields.
- *
- * (1) "head" which is a "struct tomoyo_acl_info".
- * (2) "perm" which is a bitmask of permitted operations.
- * (3) "name" is the pathname.
- * (4) "number" is the numeric value.
- *
- * Directives held by this structure are "allow_create", "allow_mkdir",
- * "allow_ioctl", "allow_mkfifo", "allow_mksock", "allow_chmod", "allow_chown"
- * and "allow_chgrp".
- *
+ * Structure for "file create", "file mkdir", "file mkfifo", "file mksock",
+ * "file ioctl", "file chmod", "file chown" and "file chgrp" directive.
*/
struct tomoyo_path_number_acl {
struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_PATH_NUMBER_ACL */
+ /* Bitmask of values in "enum tomoyo_path_number_acl_index". */
u8 perm;
struct tomoyo_name_union name;
struct tomoyo_number_union number;
};
-/*
- * tomoyo_mkdev_acl is a structure which is used for holding an
- * entry with one pathname and three numbers operation.
- * It has following fields.
- *
- * (1) "head" which is a "struct tomoyo_acl_info".
- * (2) "perm" which is a bitmask of permitted operations.
- * (3) "mode" is the create mode.
- * (4) "major" is the major number of device node.
- * (5) "minor" is the minor number of device node.
- *
- * Directives held by this structure are "allow_mkchar", "allow_mkblock".
- *
- */
+/* Structure for "file mkblock" and "file mkchar" directive. */
struct tomoyo_mkdev_acl {
struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_MKDEV_ACL */
- u8 perm;
+ u8 perm; /* Bitmask of values in "enum tomoyo_mkdev_acl_index". */
struct tomoyo_name_union name;
struct tomoyo_number_union mode;
struct tomoyo_number_union major;
@@ -486,38 +620,16 @@
};
/*
- * tomoyo_path2_acl is a structure which is used for holding an
- * entry with two pathnames operation (i.e. link(), rename() and pivot_root()).
- * It has following fields.
- *
- * (1) "head" which is a "struct tomoyo_acl_info".
- * (2) "perm" which is a bitmask of permitted operations.
- * (3) "name1" is the source/old pathname.
- * (4) "name2" is the destination/new pathname.
- *
- * Directives held by this structure are "allow_rename", "allow_link" and
- * "allow_pivot_root".
+ * Structure for "file rename", "file link" and "file pivot_root" directive.
*/
struct tomoyo_path2_acl {
struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_PATH2_ACL */
- u8 perm;
+ u8 perm; /* Bitmask of values in "enum tomoyo_path2_acl_index". */
struct tomoyo_name_union name1;
struct tomoyo_name_union name2;
};
-/*
- * tomoyo_mount_acl is a structure which is used for holding an
- * entry for mount operation.
- * It has following fields.
- *
- * (1) "head" which is a "struct tomoyo_acl_info".
- * (2) "dev_name" is the device name.
- * (3) "dir_name" is the mount point.
- * (4) "fs_type" is the filesystem type.
- * (5) "flags" is the mount flags.
- *
- * Directive held by this structure is "allow_mount".
- */
+/* Structure for "file mount" directive. */
struct tomoyo_mount_acl {
struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_MOUNT_ACL */
struct tomoyo_name_union dev_name;
@@ -526,7 +638,15 @@
struct tomoyo_number_union flags;
};
-#define TOMOYO_MAX_IO_READ_QUEUE 32
+/* Structure for holding a line from /sys/kernel/security/tomoyo/ interface. */
+struct tomoyo_acl_param {
+ char *data;
+ struct list_head *list;
+ struct tomoyo_policy_namespace *ns;
+ bool is_delete;
+};
+
+#define TOMOYO_MAX_IO_READ_QUEUE 64
/*
* Structure for reading/writing policy via /sys/kernel/security/tomoyo
@@ -538,95 +658,55 @@
int (*poll) (struct file *file, poll_table *wait);
/* Exclusive lock for this structure. */
struct mutex io_sem;
- /* Index returned by tomoyo_read_lock(). */
- int reader_idx;
char __user *read_user_buf;
- int read_user_buf_avail;
+ size_t read_user_buf_avail;
struct {
+ struct list_head *ns;
struct list_head *domain;
struct list_head *group;
struct list_head *acl;
- int avail;
- int step;
- int query_index;
+ size_t avail;
+ unsigned int step;
+ unsigned int query_index;
u16 index;
+ u16 cond_index;
+ u8 acl_group_index;
+ u8 cond_step;
u8 bit;
u8 w_pos;
bool eof;
bool print_this_domain_only;
- bool print_execute_only;
+ bool print_transition_related_only;
+ bool print_cond_part;
const char *w[TOMOYO_MAX_IO_READ_QUEUE];
} r;
- /* The position currently writing to. */
- struct tomoyo_domain_info *write_var1;
+ struct {
+ struct tomoyo_policy_namespace *ns;
+ /* The position currently writing to. */
+ struct tomoyo_domain_info *domain;
+ /* Bytes available for writing. */
+ size_t avail;
+ bool is_delete;
+ } w;
/* Buffer for reading. */
char *read_buf;
/* Size of read buffer. */
- int readbuf_size;
+ size_t readbuf_size;
/* Buffer for writing. */
char *write_buf;
- /* Bytes available for writing. */
- int write_avail;
/* Size of write buffer. */
- int writebuf_size;
+ size_t writebuf_size;
/* Type of this interface. */
- u8 type;
+ enum tomoyo_securityfs_interface_index type;
+ /* Users counter protected by tomoyo_io_buffer_list_lock. */
+ u8 users;
+ /* List for telling GC not to kfree() elements. */
+ struct list_head list;
};
/*
- * tomoyo_readable_file is a structure which is used for holding
- * "allow_read" entries.
- * It has following fields.
- *
- * (1) "head" is "struct tomoyo_acl_head".
- * (2) "filename" is a pathname which is allowed to open(O_RDONLY).
- */
-struct tomoyo_readable_file {
- struct tomoyo_acl_head head;
- const struct tomoyo_path_info *filename;
-};
-
-/*
- * tomoyo_no_pattern is a structure which is used for holding
- * "file_pattern" entries.
- * It has following fields.
- *
- * (1) "head" is "struct tomoyo_acl_head".
- * (2) "pattern" is a pathname pattern which is used for converting pathnames
- * to pathname patterns during learning mode.
- */
-struct tomoyo_no_pattern {
- struct tomoyo_acl_head head;
- const struct tomoyo_path_info *pattern;
-};
-
-/*
- * tomoyo_no_rewrite is a structure which is used for holding
- * "deny_rewrite" entries.
- * It has following fields.
- *
- * (1) "head" is "struct tomoyo_acl_head".
- * (2) "pattern" is a pathname which is by default not permitted to modify
- * already existing content.
- */
-struct tomoyo_no_rewrite {
- struct tomoyo_acl_head head;
- const struct tomoyo_path_info *pattern;
-};
-
-/*
- * tomoyo_transition_control is a structure which is used for holding
- * "initialize_domain"/"no_initialize_domain"/"keep_domain"/"no_keep_domain"
- * entries.
- * It has following fields.
- *
- * (1) "head" is "struct tomoyo_acl_head".
- * (2) "type" is type of this entry.
- * (3) "is_last_name" is a bool which is true if "domainname" is "the last
- * component of a domainname", false otherwise.
- * (4) "domainname" which is "a domainname" or "the last component of a
- * domainname".
- * (5) "program" which is a program's pathname.
+ * Structure for "initialize_domain"/"no_initialize_domain"/"keep_domain"/
+ * "no_keep_domain" keyword.
*/
struct tomoyo_transition_control {
struct tomoyo_acl_head head;
@@ -637,32 +717,14 @@
const struct tomoyo_path_info *program; /* Maybe NULL */
};
-/*
- * tomoyo_aggregator is a structure which is used for holding
- * "aggregator" entries.
- * It has following fields.
- *
- * (1) "head" is "struct tomoyo_acl_head".
- * (2) "original_name" which is originally requested name.
- * (3) "aggregated_name" which is name to rewrite.
- */
+/* Structure for "aggregator" keyword. */
struct tomoyo_aggregator {
struct tomoyo_acl_head head;
const struct tomoyo_path_info *original_name;
const struct tomoyo_path_info *aggregated_name;
};
-/*
- * tomoyo_manager is a structure which is used for holding list of
- * domainnames or programs which are permitted to modify configuration via
- * /sys/kernel/security/tomoyo/ interface.
- * It has following fields.
- *
- * (1) "head" is "struct tomoyo_acl_head".
- * (2) "is_domain" is a bool which is true if "manager" is a domainname, false
- * otherwise.
- * (3) "manager" is a domainname or a program's pathname.
- */
+/* Structure for policy manager. */
struct tomoyo_manager {
struct tomoyo_acl_head head;
bool is_domain; /* True if manager is a domainname. */
@@ -677,6 +739,7 @@
bool permissive_verbose;
};
+/* Structure for /sys/kernel/security/tomnoyo/profile interface. */
struct tomoyo_profile {
const struct tomoyo_path_info *comment;
struct tomoyo_preference *learning;
@@ -685,246 +748,254 @@
struct tomoyo_preference preference;
u8 default_config;
u8 config[TOMOYO_MAX_MAC_INDEX + TOMOYO_MAX_MAC_CATEGORY_INDEX];
+ unsigned int pref[TOMOYO_MAX_PREF];
+};
+
+/* Structure for representing YYYY/MM/DD hh/mm/ss. */
+struct tomoyo_time {
+ u16 year;
+ u8 month;
+ u8 day;
+ u8 hour;
+ u8 min;
+ u8 sec;
+};
+
+/* Structure for policy namespace. */
+struct tomoyo_policy_namespace {
+ /* Profile table. Memory is allocated as needed. */
+ struct tomoyo_profile *profile_ptr[TOMOYO_MAX_PROFILES];
+ /* List of "struct tomoyo_group". */
+ struct list_head group_list[TOMOYO_MAX_GROUP];
+ /* List of policy. */
+ struct list_head policy_list[TOMOYO_MAX_POLICY];
+ /* The global ACL referred by "use_group" keyword. */
+ struct list_head acl_group[TOMOYO_MAX_ACL_GROUPS];
+ /* List for connecting to tomoyo_namespace_list list. */
+ struct list_head namespace_list;
+ /* Profile version. Currently only 20100903 is defined. */
+ unsigned int profile_version;
+ /* Name of this namespace (e.g. "<kernel>", "</usr/sbin/httpd>" ). */
+ const char *name;
};
/********** Function prototypes. **********/
-/* Check whether the given string starts with the given keyword. */
-bool tomoyo_str_starts(char **src, const char *find);
-/* Get tomoyo_realpath() of current process. */
-const char *tomoyo_get_exe(void);
-/* Format string. */
-void tomoyo_normalize_line(unsigned char *buffer);
-/* Print warning or error message on console. */
-void tomoyo_warn_log(struct tomoyo_request_info *r, const char *fmt, ...)
- __attribute__ ((format(printf, 2, 3)));
-/* Check all profiles currently assigned to domains are defined. */
-void tomoyo_check_profile(void);
-/* Open operation for /sys/kernel/security/tomoyo/ interface. */
-int tomoyo_open_control(const u8 type, struct file *file);
-/* Close /sys/kernel/security/tomoyo/ interface. */
-int tomoyo_close_control(struct file *file);
-/* Poll operation for /sys/kernel/security/tomoyo/ interface. */
-int tomoyo_poll_control(struct file *file, poll_table *wait);
-/* Read operation for /sys/kernel/security/tomoyo/ interface. */
-int tomoyo_read_control(struct file *file, char __user *buffer,
- const int buffer_len);
-/* Write operation for /sys/kernel/security/tomoyo/ interface. */
-int tomoyo_write_control(struct file *file, const char __user *buffer,
- const int buffer_len);
-/* Check whether the domain has too many ACL entries to hold. */
-bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r);
-/* Print out of memory warning message. */
-void tomoyo_warn_oom(const char *function);
-/* Check whether the given name matches the given name_union. */
-const struct tomoyo_path_info *
-tomoyo_compare_name_union(const struct tomoyo_path_info *name,
- const struct tomoyo_name_union *ptr);
-/* Check whether the given number matches the given number_union. */
bool tomoyo_compare_number_union(const unsigned long value,
const struct tomoyo_number_union *ptr);
-int tomoyo_get_mode(const u8 profile, const u8 index);
-void tomoyo_io_printf(struct tomoyo_io_buffer *head, const char *fmt, ...)
- __attribute__ ((format(printf, 2, 3)));
-/* Check whether the domainname is correct. */
+bool tomoyo_condition(struct tomoyo_request_info *r,
+ const struct tomoyo_condition *cond);
bool tomoyo_correct_domain(const unsigned char *domainname);
-/* Check whether the token is correct. */
bool tomoyo_correct_path(const char *filename);
bool tomoyo_correct_word(const char *string);
-/* Check whether the token can be a domainname. */
bool tomoyo_domain_def(const unsigned char *buffer);
-bool tomoyo_parse_name_union(const char *filename,
- struct tomoyo_name_union *ptr);
-/* Check whether the given filename matches the given path_group. */
-const struct tomoyo_path_info *
-tomoyo_path_matches_group(const struct tomoyo_path_info *pathname,
- const struct tomoyo_group *group);
-/* Check whether the given value matches the given number_group. */
+bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r);
+bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos,
+ struct tomoyo_page_dump *dump);
+bool tomoyo_memory_ok(void *ptr);
bool tomoyo_number_matches_group(const unsigned long min,
const unsigned long max,
const struct tomoyo_group *group);
-/* Check whether the given filename matches the given pattern. */
+bool tomoyo_parse_name_union(struct tomoyo_acl_param *param,
+ struct tomoyo_name_union *ptr);
+bool tomoyo_parse_number_union(struct tomoyo_acl_param *param,
+ struct tomoyo_number_union *ptr);
bool tomoyo_path_matches_pattern(const struct tomoyo_path_info *filename,
const struct tomoyo_path_info *pattern);
-
-bool tomoyo_parse_number_union(char *data, struct tomoyo_number_union *num);
-/* Tokenize a line. */
-bool tomoyo_tokenize(char *buffer, char *w[], size_t size);
-/* Write domain policy violation warning message to console? */
-bool tomoyo_verbose_mode(const struct tomoyo_domain_info *domain);
-/* Fill "struct tomoyo_request_info". */
+bool tomoyo_permstr(const char *string, const char *keyword);
+bool tomoyo_str_starts(char **src, const char *find);
+char *tomoyo_encode(const char *str);
+char *tomoyo_init_log(struct tomoyo_request_info *r, int len, const char *fmt,
+ va_list args);
+char *tomoyo_read_token(struct tomoyo_acl_param *param);
+char *tomoyo_realpath_from_path(struct path *path);
+char *tomoyo_realpath_nofollow(const char *pathname);
+const char *tomoyo_get_exe(void);
+const char *tomoyo_yesno(const unsigned int value);
+const struct tomoyo_path_info *tomoyo_compare_name_union
+(const struct tomoyo_path_info *name, const struct tomoyo_name_union *ptr);
+const struct tomoyo_path_info *tomoyo_get_name(const char *name);
+const struct tomoyo_path_info *tomoyo_path_matches_group
+(const struct tomoyo_path_info *pathname, const struct tomoyo_group *group);
+int tomoyo_check_open_permission(struct tomoyo_domain_info *domain,
+ struct path *path, const int flag);
+int tomoyo_close_control(struct tomoyo_io_buffer *head);
+int tomoyo_find_next_domain(struct linux_binprm *bprm);
+int tomoyo_get_mode(const struct tomoyo_policy_namespace *ns, const u8 profile,
+ const u8 index);
int tomoyo_init_request_info(struct tomoyo_request_info *r,
struct tomoyo_domain_info *domain,
const u8 index);
-/* Check permission for mount operation. */
-int tomoyo_mount_permission(char *dev_name, struct path *path, char *type,
- unsigned long flags, void *data_page);
-/* Create "aggregator" entry in exception policy. */
-int tomoyo_write_aggregator(char *data, const bool is_delete);
-int tomoyo_write_transition_control(char *data, const bool is_delete,
- const u8 type);
-/*
- * Create "allow_read/write", "allow_execute", "allow_read", "allow_write",
- * "allow_create", "allow_unlink", "allow_mkdir", "allow_rmdir",
- * "allow_mkfifo", "allow_mksock", "allow_mkblock", "allow_mkchar",
- * "allow_truncate", "allow_symlink", "allow_rewrite", "allow_rename" and
- * "allow_link" entry in domain policy.
- */
-int tomoyo_write_file(char *data, struct tomoyo_domain_info *domain,
- const bool is_delete);
-/* Create "allow_read" entry in exception policy. */
-int tomoyo_write_globally_readable(char *data, const bool is_delete);
-/* Create "allow_mount" entry in domain policy. */
-int tomoyo_write_mount(char *data, struct tomoyo_domain_info *domain,
- const bool is_delete);
-/* Create "deny_rewrite" entry in exception policy. */
-int tomoyo_write_no_rewrite(char *data, const bool is_delete);
-/* Create "file_pattern" entry in exception policy. */
-int tomoyo_write_pattern(char *data, const bool is_delete);
-/* Create "path_group"/"number_group" entry in exception policy. */
-int tomoyo_write_group(char *data, const bool is_delete, const u8 type);
-int tomoyo_supervisor(struct tomoyo_request_info *r, const char *fmt, ...)
- __attribute__ ((format(printf, 2, 3)));
-/* Find a domain by the given name. */
-struct tomoyo_domain_info *tomoyo_find_domain(const char *domainname);
-/* Find or create a domain by the given name. */
-struct tomoyo_domain_info *tomoyo_assign_domain(const char *domainname,
- const u8 profile);
-struct tomoyo_profile *tomoyo_profile(const u8 profile);
-/*
- * Allocate memory for "struct tomoyo_path_group"/"struct tomoyo_number_group".
- */
-struct tomoyo_group *tomoyo_get_group(const char *group_name, const u8 type);
-
-/* Check mode for specified functionality. */
-unsigned int tomoyo_check_flags(const struct tomoyo_domain_info *domain,
- const u8 index);
-/* Fill in "struct tomoyo_path_info" members. */
-void tomoyo_fill_path_info(struct tomoyo_path_info *ptr);
-/* Run policy loader when /sbin/init starts. */
-void tomoyo_load_policy(const char *filename);
-
-void tomoyo_put_number_union(struct tomoyo_number_union *ptr);
-
-/* Convert binary string to ascii string. */
-char *tomoyo_encode(const char *str);
-
-/*
- * Returns realpath(3) of the given pathname except that
- * ignores chroot'ed root and does not follow the final symlink.
- */
-char *tomoyo_realpath_nofollow(const char *pathname);
-/*
- * Returns realpath(3) of the given pathname except that
- * ignores chroot'ed root and the pathname is already solved.
- */
-char *tomoyo_realpath_from_path(struct path *path);
-/* Get patterned pathname. */
-const char *tomoyo_pattern(const struct tomoyo_path_info *filename);
-
-/* Check memory quota. */
-bool tomoyo_memory_ok(void *ptr);
-void *tomoyo_commit_ok(void *data, const unsigned int size);
-
-/*
- * Keep the given name on the RAM.
- * The RAM is shared, so NEVER try to modify or kfree() the returned name.
- */
-const struct tomoyo_path_info *tomoyo_get_name(const char *name);
-
-/* Check for memory usage. */
-void tomoyo_read_memory_counter(struct tomoyo_io_buffer *head);
-
-/* Set memory quota. */
-int tomoyo_write_memory_quota(struct tomoyo_io_buffer *head);
-
-/* Initialize mm related code. */
-void __init tomoyo_mm_init(void);
-int tomoyo_path_permission(struct tomoyo_request_info *r, u8 operation,
- const struct tomoyo_path_info *filename);
-int tomoyo_check_open_permission(struct tomoyo_domain_info *domain,
- struct path *path, const int flag);
-int tomoyo_path_number_perm(const u8 operation, struct path *path,
- unsigned long number);
int tomoyo_mkdev_perm(const u8 operation, struct path *path,
const unsigned int mode, unsigned int dev);
-int tomoyo_path_perm(const u8 operation, struct path *path);
+int tomoyo_mount_permission(char *dev_name, struct path *path,
+ const char *type, unsigned long flags,
+ void *data_page);
+int tomoyo_open_control(const u8 type, struct file *file);
int tomoyo_path2_perm(const u8 operation, struct path *path1,
struct path *path2);
-int tomoyo_find_next_domain(struct linux_binprm *bprm);
-
-void tomoyo_print_ulong(char *buffer, const int buffer_len,
- const unsigned long value, const u8 type);
-
-/* Drop refcount on tomoyo_name_union. */
-void tomoyo_put_name_union(struct tomoyo_name_union *ptr);
-
-/* Run garbage collector. */
-void tomoyo_run_gc(void);
-
-void tomoyo_memory_free(void *ptr);
-
+int tomoyo_path_number_perm(const u8 operation, struct path *path,
+ unsigned long number);
+int tomoyo_path_perm(const u8 operation, struct path *path,
+ const char *target);
+int tomoyo_path_permission(struct tomoyo_request_info *r, u8 operation,
+ const struct tomoyo_path_info *filename);
+int tomoyo_poll_control(struct file *file, poll_table *wait);
+int tomoyo_poll_log(struct file *file, poll_table *wait);
+int tomoyo_supervisor(struct tomoyo_request_info *r, const char *fmt, ...)
+ __printf(2, 3);
int tomoyo_update_domain(struct tomoyo_acl_info *new_entry, const int size,
- bool is_delete, struct tomoyo_domain_info *domain,
- bool (*check_duplicate) (const struct tomoyo_acl_info
- *,
- const struct tomoyo_acl_info
- *),
- bool (*merge_duplicate) (struct tomoyo_acl_info *,
- struct tomoyo_acl_info *,
- const bool));
+ struct tomoyo_acl_param *param,
+ bool (*check_duplicate)
+ (const struct tomoyo_acl_info *,
+ const struct tomoyo_acl_info *),
+ bool (*merge_duplicate)
+ (struct tomoyo_acl_info *, struct tomoyo_acl_info *,
+ const bool));
int tomoyo_update_policy(struct tomoyo_acl_head *new_entry, const int size,
- bool is_delete, struct list_head *list,
- bool (*check_duplicate) (const struct tomoyo_acl_head
- *,
- const struct tomoyo_acl_head
- *));
+ struct tomoyo_acl_param *param,
+ bool (*check_duplicate)
+ (const struct tomoyo_acl_head *,
+ const struct tomoyo_acl_head *));
+int tomoyo_write_aggregator(struct tomoyo_acl_param *param);
+int tomoyo_write_file(struct tomoyo_acl_param *param);
+int tomoyo_write_group(struct tomoyo_acl_param *param, const u8 type);
+int tomoyo_write_transition_control(struct tomoyo_acl_param *param,
+ const u8 type);
+ssize_t tomoyo_read_control(struct tomoyo_io_buffer *head, char __user *buffer,
+ const int buffer_len);
+ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head,
+ const char __user *buffer, const int buffer_len);
+struct tomoyo_condition *tomoyo_get_condition(struct tomoyo_acl_param *param);
+struct tomoyo_domain_info *tomoyo_assign_domain(const char *domainname,
+ const bool transit);
+struct tomoyo_domain_info *tomoyo_find_domain(const char *domainname);
+struct tomoyo_group *tomoyo_get_group(struct tomoyo_acl_param *param,
+ const u8 idx);
+struct tomoyo_policy_namespace *tomoyo_assign_namespace
+(const char *domainname);
+struct tomoyo_profile *tomoyo_profile(const struct tomoyo_policy_namespace *ns,
+ const u8 profile);
+unsigned int tomoyo_check_flags(const struct tomoyo_domain_info *domain,
+ const u8 index);
+u8 tomoyo_parse_ulong(unsigned long *result, char **str);
+void *tomoyo_commit_ok(void *data, const unsigned int size);
+void __init tomoyo_load_builtin_policy(void);
+void __init tomoyo_mm_init(void);
void tomoyo_check_acl(struct tomoyo_request_info *r,
bool (*check_entry) (struct tomoyo_request_info *,
const struct tomoyo_acl_info *));
+void tomoyo_check_profile(void);
+void tomoyo_convert_time(time_t time, struct tomoyo_time *stamp);
+void tomoyo_del_condition(struct list_head *element);
+void tomoyo_fill_path_info(struct tomoyo_path_info *ptr);
+void tomoyo_get_attributes(struct tomoyo_obj_info *obj);
+void tomoyo_init_policy_namespace(struct tomoyo_policy_namespace *ns);
+void tomoyo_io_printf(struct tomoyo_io_buffer *head, const char *fmt, ...)
+ __printf(2, 3);
+void tomoyo_load_policy(const char *filename);
+void tomoyo_memory_free(void *ptr);
+void tomoyo_normalize_line(unsigned char *buffer);
+void tomoyo_notify_gc(struct tomoyo_io_buffer *head, const bool is_register);
+void tomoyo_print_ulong(char *buffer, const int buffer_len,
+ const unsigned long value, const u8 type);
+void tomoyo_put_name_union(struct tomoyo_name_union *ptr);
+void tomoyo_put_number_union(struct tomoyo_number_union *ptr);
+void tomoyo_read_log(struct tomoyo_io_buffer *head);
+void tomoyo_update_stat(const u8 index);
+void tomoyo_warn_oom(const char *function);
+void tomoyo_write_log(struct tomoyo_request_info *r, const char *fmt, ...)
+ __printf(2, 3);
+void tomoyo_write_log2(struct tomoyo_request_info *r, int len, const char *fmt,
+ va_list args);
/********** External variable definitions. **********/
-/* Lock for GC. */
-extern struct srcu_struct tomoyo_ss;
-
-/* The list for "struct tomoyo_domain_info". */
-extern struct list_head tomoyo_domain_list;
-
-extern struct list_head tomoyo_policy_list[TOMOYO_MAX_POLICY];
-extern struct list_head tomoyo_group_list[TOMOYO_MAX_GROUP];
-extern struct list_head tomoyo_name_list[TOMOYO_MAX_HASH];
-
-/* Lock for protecting policy. */
-extern struct mutex tomoyo_policy_lock;
-
-/* Has /sbin/init started? */
extern bool tomoyo_policy_loaded;
-
-/* The kernel's domain. */
+extern const char * const tomoyo_condition_keyword
+[TOMOYO_MAX_CONDITION_KEYWORD];
+extern const char * const tomoyo_dif[TOMOYO_MAX_DOMAIN_INFO_FLAGS];
+extern const char * const tomoyo_mac_keywords[TOMOYO_MAX_MAC_INDEX
+ + TOMOYO_MAX_MAC_CATEGORY_INDEX];
+extern const char * const tomoyo_mode[TOMOYO_CONFIG_MAX_MODE];
+extern const char * const tomoyo_path_keyword[TOMOYO_MAX_PATH_OPERATION];
+extern const u8 tomoyo_index2category[TOMOYO_MAX_MAC_INDEX];
+extern const u8 tomoyo_pn2mac[TOMOYO_MAX_PATH_NUMBER_OPERATION];
+extern const u8 tomoyo_pnnn2mac[TOMOYO_MAX_MKDEV_OPERATION];
+extern const u8 tomoyo_pp2mac[TOMOYO_MAX_PATH2_OPERATION];
+extern struct list_head tomoyo_condition_list;
+extern struct list_head tomoyo_domain_list;
+extern struct list_head tomoyo_name_list[TOMOYO_MAX_HASH];
+extern struct list_head tomoyo_namespace_list;
+extern struct mutex tomoyo_policy_lock;
+extern struct srcu_struct tomoyo_ss;
extern struct tomoyo_domain_info tomoyo_kernel_domain;
-
-extern const char *tomoyo_path_keyword[TOMOYO_MAX_PATH_OPERATION];
-extern const char *tomoyo_mkdev_keyword[TOMOYO_MAX_MKDEV_OPERATION];
-extern const char *tomoyo_path2_keyword[TOMOYO_MAX_PATH2_OPERATION];
-extern const char *tomoyo_path_number_keyword[TOMOYO_MAX_PATH_NUMBER_OPERATION];
-
-extern unsigned int tomoyo_quota_for_query;
-extern unsigned int tomoyo_query_memory_size;
+extern struct tomoyo_policy_namespace tomoyo_kernel_namespace;
+extern unsigned int tomoyo_memory_quota[TOMOYO_MAX_MEMORY_STAT];
+extern unsigned int tomoyo_memory_used[TOMOYO_MAX_MEMORY_STAT];
/********** Inlined functions. **********/
+/**
+ * tomoyo_read_lock - Take lock for protecting policy.
+ *
+ * Returns index number for tomoyo_read_unlock().
+ */
static inline int tomoyo_read_lock(void)
{
return srcu_read_lock(&tomoyo_ss);
}
+/**
+ * tomoyo_read_unlock - Release lock for protecting policy.
+ *
+ * @idx: Index number returned by tomoyo_read_lock().
+ *
+ * Returns nothing.
+ */
static inline void tomoyo_read_unlock(int idx)
{
srcu_read_unlock(&tomoyo_ss, idx);
}
-/* strcmp() for "struct tomoyo_path_info" structure. */
+/**
+ * tomoyo_sys_getppid - Copy of getppid().
+ *
+ * Returns parent process's PID.
+ *
+ * Alpha does not have getppid() defined. To be able to build this module on
+ * Alpha, I have to copy getppid() from kernel/timer.c.
+ */
+static inline pid_t tomoyo_sys_getppid(void)
+{
+ pid_t pid;
+ rcu_read_lock();
+ pid = task_tgid_vnr(current->real_parent);
+ rcu_read_unlock();
+ return pid;
+}
+
+/**
+ * tomoyo_sys_getpid - Copy of getpid().
+ *
+ * Returns current thread's PID.
+ *
+ * Alpha does not have getpid() defined. To be able to build this module on
+ * Alpha, I have to copy getpid() from kernel/timer.c.
+ */
+static inline pid_t tomoyo_sys_getpid(void)
+{
+ return task_tgid_vnr(current);
+}
+
+/**
+ * tomoyo_pathcmp - strcmp() for "struct tomoyo_path_info" structure.
+ *
+ * @a: Pointer to "struct tomoyo_path_info".
+ * @b: Pointer to "struct tomoyo_path_info".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
static inline bool tomoyo_pathcmp(const struct tomoyo_path_info *a,
const struct tomoyo_path_info *b)
{
@@ -932,77 +1003,155 @@
}
/**
- * tomoyo_valid - Check whether the character is a valid char.
+ * tomoyo_put_name - Drop reference on "struct tomoyo_name".
*
- * @c: The character to check.
+ * @name: Pointer to "struct tomoyo_path_info". Maybe NULL.
*
- * Returns true if @c is a valid character, false otherwise.
+ * Returns nothing.
*/
-static inline bool tomoyo_valid(const unsigned char c)
-{
- return c > ' ' && c < 127;
-}
-
-/**
- * tomoyo_invalid - Check whether the character is an invalid char.
- *
- * @c: The character to check.
- *
- * Returns true if @c is an invalid character, false otherwise.
- */
-static inline bool tomoyo_invalid(const unsigned char c)
-{
- return c && (c <= ' ' || c >= 127);
-}
-
static inline void tomoyo_put_name(const struct tomoyo_path_info *name)
{
if (name) {
struct tomoyo_name *ptr =
container_of(name, typeof(*ptr), entry);
- atomic_dec(&ptr->users);
+ atomic_dec(&ptr->head.users);
}
}
+/**
+ * tomoyo_put_condition - Drop reference on "struct tomoyo_condition".
+ *
+ * @cond: Pointer to "struct tomoyo_condition". Maybe NULL.
+ *
+ * Returns nothing.
+ */
+static inline void tomoyo_put_condition(struct tomoyo_condition *cond)
+{
+ if (cond)
+ atomic_dec(&cond->head.users);
+}
+
+/**
+ * tomoyo_put_group - Drop reference on "struct tomoyo_group".
+ *
+ * @group: Pointer to "struct tomoyo_group". Maybe NULL.
+ *
+ * Returns nothing.
+ */
static inline void tomoyo_put_group(struct tomoyo_group *group)
{
if (group)
- atomic_dec(&group->users);
+ atomic_dec(&group->head.users);
}
+/**
+ * tomoyo_domain - Get "struct tomoyo_domain_info" for current thread.
+ *
+ * Returns pointer to "struct tomoyo_domain_info" for current thread.
+ */
static inline struct tomoyo_domain_info *tomoyo_domain(void)
{
return current_cred()->security;
}
+/**
+ * tomoyo_real_domain - Get "struct tomoyo_domain_info" for specified thread.
+ *
+ * @task: Pointer to "struct task_struct".
+ *
+ * Returns pointer to "struct tomoyo_security" for specified thread.
+ */
static inline struct tomoyo_domain_info *tomoyo_real_domain(struct task_struct
*task)
{
return task_cred_xxx(task, security);
}
-static inline bool tomoyo_same_acl_head(const struct tomoyo_acl_info *p1,
- const struct tomoyo_acl_info *p2)
-{
- return p1->type == p2->type;
-}
-
+/**
+ * tomoyo_same_name_union - Check for duplicated "struct tomoyo_name_union" entry.
+ *
+ * @a: Pointer to "struct tomoyo_name_union".
+ * @b: Pointer to "struct tomoyo_name_union".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
static inline bool tomoyo_same_name_union
-(const struct tomoyo_name_union *p1, const struct tomoyo_name_union *p2)
+(const struct tomoyo_name_union *a, const struct tomoyo_name_union *b)
{
- return p1->filename == p2->filename && p1->group == p2->group &&
- p1->is_group == p2->is_group;
+ return a->filename == b->filename && a->group == b->group;
}
+/**
+ * tomoyo_same_number_union - Check for duplicated "struct tomoyo_number_union" entry.
+ *
+ * @a: Pointer to "struct tomoyo_number_union".
+ * @b: Pointer to "struct tomoyo_number_union".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
static inline bool tomoyo_same_number_union
-(const struct tomoyo_number_union *p1, const struct tomoyo_number_union *p2)
+(const struct tomoyo_number_union *a, const struct tomoyo_number_union *b)
{
- return p1->values[0] == p2->values[0] && p1->values[1] == p2->values[1]
- && p1->group == p2->group && p1->min_type == p2->min_type &&
- p1->max_type == p2->max_type && p1->is_group == p2->is_group;
+ return a->values[0] == b->values[0] && a->values[1] == b->values[1] &&
+ a->group == b->group && a->value_type[0] == b->value_type[0] &&
+ a->value_type[1] == b->value_type[1];
}
/**
+ * tomoyo_current_namespace - Get "struct tomoyo_policy_namespace" for current thread.
+ *
+ * Returns pointer to "struct tomoyo_policy_namespace" for current thread.
+ */
+static inline struct tomoyo_policy_namespace *tomoyo_current_namespace(void)
+{
+ return tomoyo_domain()->ns;
+}
+
+#if defined(CONFIG_SLOB)
+
+/**
+ * tomoyo_round2 - Round up to power of 2 for calculating memory usage.
+ *
+ * @size: Size to be rounded up.
+ *
+ * Returns @size.
+ *
+ * Since SLOB does not round up, this function simply returns @size.
+ */
+static inline int tomoyo_round2(size_t size)
+{
+ return size;
+}
+
+#else
+
+/**
+ * tomoyo_round2 - Round up to power of 2 for calculating memory usage.
+ *
+ * @size: Size to be rounded up.
+ *
+ * Returns rounded size.
+ *
+ * Strictly speaking, SLAB may be able to allocate (e.g.) 96 bytes instead of
+ * (e.g.) 128 bytes.
+ */
+static inline int tomoyo_round2(size_t size)
+{
+#if PAGE_SIZE == 4096
+ size_t bsize = 32;
+#else
+ size_t bsize = 64;
+#endif
+ if (!size)
+ return 0;
+ while (size > bsize)
+ bsize <<= 1;
+ return bsize;
+}
+
+#endif
+
+/**
* list_for_each_cookie - iterate over a list with cookie.
* @pos: the &struct list_head to use as a loop cursor.
* @head: the head for your list.
diff --git a/security/tomoyo/condition.c b/security/tomoyo/condition.c
new file mode 100644
index 0000000..8a05f71
--- /dev/null
+++ b/security/tomoyo/condition.c
@@ -0,0 +1,1035 @@
+/*
+ * security/tomoyo/condition.c
+ *
+ * Copyright (C) 2005-2011 NTT DATA CORPORATION
+ */
+
+#include "common.h"
+#include <linux/slab.h>
+
+/* List of "struct tomoyo_condition". */
+LIST_HEAD(tomoyo_condition_list);
+
+/**
+ * tomoyo_argv - Check argv[] in "struct linux_binbrm".
+ *
+ * @index: Index number of @arg_ptr.
+ * @arg_ptr: Contents of argv[@index].
+ * @argc: Length of @argv.
+ * @argv: Pointer to "struct tomoyo_argv".
+ * @checked: Set to true if @argv[@index] was found.
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_argv(const unsigned int index, const char *arg_ptr,
+ const int argc, const struct tomoyo_argv *argv,
+ u8 *checked)
+{
+ int i;
+ struct tomoyo_path_info arg;
+ arg.name = arg_ptr;
+ for (i = 0; i < argc; argv++, checked++, i++) {
+ bool result;
+ if (index != argv->index)
+ continue;
+ *checked = 1;
+ tomoyo_fill_path_info(&arg);
+ result = tomoyo_path_matches_pattern(&arg, argv->value);
+ if (argv->is_not)
+ result = !result;
+ if (!result)
+ return false;
+ }
+ return true;
+}
+
+/**
+ * tomoyo_envp - Check envp[] in "struct linux_binbrm".
+ *
+ * @env_name: The name of environment variable.
+ * @env_value: The value of environment variable.
+ * @envc: Length of @envp.
+ * @envp: Pointer to "struct tomoyo_envp".
+ * @checked: Set to true if @envp[@env_name] was found.
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_envp(const char *env_name, const char *env_value,
+ const int envc, const struct tomoyo_envp *envp,
+ u8 *checked)
+{
+ int i;
+ struct tomoyo_path_info name;
+ struct tomoyo_path_info value;
+ name.name = env_name;
+ tomoyo_fill_path_info(&name);
+ value.name = env_value;
+ tomoyo_fill_path_info(&value);
+ for (i = 0; i < envc; envp++, checked++, i++) {
+ bool result;
+ if (!tomoyo_path_matches_pattern(&name, envp->name))
+ continue;
+ *checked = 1;
+ if (envp->value) {
+ result = tomoyo_path_matches_pattern(&value,
+ envp->value);
+ if (envp->is_not)
+ result = !result;
+ } else {
+ result = true;
+ if (!envp->is_not)
+ result = !result;
+ }
+ if (!result)
+ return false;
+ }
+ return true;
+}
+
+/**
+ * tomoyo_scan_bprm - Scan "struct linux_binprm".
+ *
+ * @ee: Pointer to "struct tomoyo_execve".
+ * @argc: Length of @argc.
+ * @argv: Pointer to "struct tomoyo_argv".
+ * @envc: Length of @envp.
+ * @envp: Poiner to "struct tomoyo_envp".
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_scan_bprm(struct tomoyo_execve *ee,
+ const u16 argc, const struct tomoyo_argv *argv,
+ const u16 envc, const struct tomoyo_envp *envp)
+{
+ struct linux_binprm *bprm = ee->bprm;
+ struct tomoyo_page_dump *dump = &ee->dump;
+ char *arg_ptr = ee->tmp;
+ int arg_len = 0;
+ unsigned long pos = bprm->p;
+ int offset = pos % PAGE_SIZE;
+ int argv_count = bprm->argc;
+ int envp_count = bprm->envc;
+ bool result = true;
+ u8 local_checked[32];
+ u8 *checked;
+ if (argc + envc <= sizeof(local_checked)) {
+ checked = local_checked;
+ memset(local_checked, 0, sizeof(local_checked));
+ } else {
+ checked = kzalloc(argc + envc, GFP_NOFS);
+ if (!checked)
+ return false;
+ }
+ while (argv_count || envp_count) {
+ if (!tomoyo_dump_page(bprm, pos, dump)) {
+ result = false;
+ goto out;
+ }
+ pos += PAGE_SIZE - offset;
+ while (offset < PAGE_SIZE) {
+ /* Read. */
+ const char *kaddr = dump->data;
+ const unsigned char c = kaddr[offset++];
+ if (c && arg_len < TOMOYO_EXEC_TMPSIZE - 10) {
+ if (c == '\\') {
+ arg_ptr[arg_len++] = '\\';
+ arg_ptr[arg_len++] = '\\';
+ } else if (c > ' ' && c < 127) {
+ arg_ptr[arg_len++] = c;
+ } else {
+ arg_ptr[arg_len++] = '\\';
+ arg_ptr[arg_len++] = (c >> 6) + '0';
+ arg_ptr[arg_len++] =
+ ((c >> 3) & 7) + '0';
+ arg_ptr[arg_len++] = (c & 7) + '0';
+ }
+ } else {
+ arg_ptr[arg_len] = '\0';
+ }
+ if (c)
+ continue;
+ /* Check. */
+ if (argv_count) {
+ if (!tomoyo_argv(bprm->argc - argv_count,
+ arg_ptr, argc, argv,
+ checked)) {
+ result = false;
+ break;
+ }
+ argv_count--;
+ } else if (envp_count) {
+ char *cp = strchr(arg_ptr, '=');
+ if (cp) {
+ *cp = '\0';
+ if (!tomoyo_envp(arg_ptr, cp + 1,
+ envc, envp,
+ checked + argc)) {
+ result = false;
+ break;
+ }
+ }
+ envp_count--;
+ } else {
+ break;
+ }
+ arg_len = 0;
+ }
+ offset = 0;
+ if (!result)
+ break;
+ }
+out:
+ if (result) {
+ int i;
+ /* Check not-yet-checked entries. */
+ for (i = 0; i < argc; i++) {
+ if (checked[i])
+ continue;
+ /*
+ * Return true only if all unchecked indexes in
+ * bprm->argv[] are not matched.
+ */
+ if (argv[i].is_not)
+ continue;
+ result = false;
+ break;
+ }
+ for (i = 0; i < envc; envp++, i++) {
+ if (checked[argc + i])
+ continue;
+ /*
+ * Return true only if all unchecked environ variables
+ * in bprm->envp[] are either undefined or not matched.
+ */
+ if ((!envp->value && !envp->is_not) ||
+ (envp->value && envp->is_not))
+ continue;
+ result = false;
+ break;
+ }
+ }
+ if (checked != local_checked)
+ kfree(checked);
+ return result;
+}
+
+/**
+ * tomoyo_scan_exec_realpath - Check "exec.realpath" parameter of "struct tomoyo_condition".
+ *
+ * @file: Pointer to "struct file".
+ * @ptr: Pointer to "struct tomoyo_name_union".
+ * @match: True if "exec.realpath=", false if "exec.realpath!=".
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_scan_exec_realpath(struct file *file,
+ const struct tomoyo_name_union *ptr,
+ const bool match)
+{
+ bool result;
+ struct tomoyo_path_info exe;
+ if (!file)
+ return false;
+ exe.name = tomoyo_realpath_from_path(&file->f_path);
+ if (!exe.name)
+ return false;
+ tomoyo_fill_path_info(&exe);
+ result = tomoyo_compare_name_union(&exe, ptr);
+ kfree(exe.name);
+ return result == match;
+}
+
+/**
+ * tomoyo_get_dqword - tomoyo_get_name() for a quoted string.
+ *
+ * @start: String to save.
+ *
+ * Returns pointer to "struct tomoyo_path_info" on success, NULL otherwise.
+ */
+static const struct tomoyo_path_info *tomoyo_get_dqword(char *start)
+{
+ char *cp = start + strlen(start) - 1;
+ if (cp == start || *start++ != '"' || *cp != '"')
+ return NULL;
+ *cp = '\0';
+ if (*start && !tomoyo_correct_word(start))
+ return NULL;
+ return tomoyo_get_name(start);
+}
+
+/**
+ * tomoyo_parse_name_union_quoted - Parse a quoted word.
+ *
+ * @param: Pointer to "struct tomoyo_acl_param".
+ * @ptr: Pointer to "struct tomoyo_name_union".
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_parse_name_union_quoted(struct tomoyo_acl_param *param,
+ struct tomoyo_name_union *ptr)
+{
+ char *filename = param->data;
+ if (*filename == '@')
+ return tomoyo_parse_name_union(param, ptr);
+ ptr->filename = tomoyo_get_dqword(filename);
+ return ptr->filename != NULL;
+}
+
+/**
+ * tomoyo_parse_argv - Parse an argv[] condition part.
+ *
+ * @left: Lefthand value.
+ * @right: Righthand value.
+ * @argv: Pointer to "struct tomoyo_argv".
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_parse_argv(char *left, char *right,
+ struct tomoyo_argv *argv)
+{
+ if (tomoyo_parse_ulong(&argv->index, &left) !=
+ TOMOYO_VALUE_TYPE_DECIMAL || *left++ != ']' || *left)
+ return false;
+ argv->value = tomoyo_get_dqword(right);
+ return argv->value != NULL;
+}
+
+/**
+ * tomoyo_parse_envp - Parse an envp[] condition part.
+ *
+ * @left: Lefthand value.
+ * @right: Righthand value.
+ * @envp: Pointer to "struct tomoyo_envp".
+ *
+ * Returns true on success, false otherwise.
+ */
+static bool tomoyo_parse_envp(char *left, char *right,
+ struct tomoyo_envp *envp)
+{
+ const struct tomoyo_path_info *name;
+ const struct tomoyo_path_info *value;
+ char *cp = left + strlen(left) - 1;
+ if (*cp-- != ']' || *cp != '"')
+ goto out;
+ *cp = '\0';
+ if (!tomoyo_correct_word(left))
+ goto out;
+ name = tomoyo_get_name(left);
+ if (!name)
+ goto out;
+ if (!strcmp(right, "NULL")) {
+ value = NULL;
+ } else {
+ value = tomoyo_get_dqword(right);
+ if (!value) {
+ tomoyo_put_name(name);
+ goto out;
+ }
+ }
+ envp->name = name;
+ envp->value = value;
+ return true;
+out:
+ return false;
+}
+
+/**
+ * tomoyo_same_condition - Check for duplicated "struct tomoyo_condition" entry.
+ *
+ * @a: Pointer to "struct tomoyo_condition".
+ * @b: Pointer to "struct tomoyo_condition".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
+static inline bool tomoyo_same_condition(const struct tomoyo_condition *a,
+ const struct tomoyo_condition *b)
+{
+ return a->size == b->size && a->condc == b->condc &&
+ a->numbers_count == b->numbers_count &&
+ a->names_count == b->names_count &&
+ a->argc == b->argc && a->envc == b->envc &&
+ !memcmp(a + 1, b + 1, a->size - sizeof(*a));
+}
+
+/**
+ * tomoyo_condition_type - Get condition type.
+ *
+ * @word: Keyword string.
+ *
+ * Returns one of values in "enum tomoyo_conditions_index" on success,
+ * TOMOYO_MAX_CONDITION_KEYWORD otherwise.
+ */
+static u8 tomoyo_condition_type(const char *word)
+{
+ u8 i;
+ for (i = 0; i < TOMOYO_MAX_CONDITION_KEYWORD; i++) {
+ if (!strcmp(word, tomoyo_condition_keyword[i]))
+ break;
+ }
+ return i;
+}
+
+/* Define this to enable debug mode. */
+/* #define DEBUG_CONDITION */
+
+#ifdef DEBUG_CONDITION
+#define dprintk printk
+#else
+#define dprintk(...) do { } while (0)
+#endif
+
+/**
+ * tomoyo_commit_condition - Commit "struct tomoyo_condition".
+ *
+ * @entry: Pointer to "struct tomoyo_condition".
+ *
+ * Returns pointer to "struct tomoyo_condition" on success, NULL otherwise.
+ *
+ * This function merges duplicated entries. This function returns NULL if
+ * @entry is not duplicated but memory quota for policy has exceeded.
+ */
+static struct tomoyo_condition *tomoyo_commit_condition
+(struct tomoyo_condition *entry)
+{
+ struct tomoyo_condition *ptr;
+ bool found = false;
+ if (mutex_lock_interruptible(&tomoyo_policy_lock)) {
+ dprintk(KERN_WARNING "%u: %s failed\n", __LINE__, __func__);
+ ptr = NULL;
+ found = true;
+ goto out;
+ }
+ list_for_each_entry_rcu(ptr, &tomoyo_condition_list, head.list) {
+ if (!tomoyo_same_condition(ptr, entry))
+ continue;
+ /* Same entry found. Share this entry. */
+ atomic_inc(&ptr->head.users);
+ found = true;
+ break;
+ }
+ if (!found) {
+ if (tomoyo_memory_ok(entry)) {
+ atomic_set(&entry->head.users, 1);
+ list_add_rcu(&entry->head.list,
+ &tomoyo_condition_list);
+ } else {
+ found = true;
+ ptr = NULL;
+ }
+ }
+ mutex_unlock(&tomoyo_policy_lock);
+out:
+ if (found) {
+ tomoyo_del_condition(&entry->head.list);
+ kfree(entry);
+ entry = ptr;
+ }
+ return entry;
+}
+
+/**
+ * tomoyo_get_condition - Parse condition part.
+ *
+ * @param: Pointer to "struct tomoyo_acl_param".
+ *
+ * Returns pointer to "struct tomoyo_condition" on success, NULL otherwise.
+ */
+struct tomoyo_condition *tomoyo_get_condition(struct tomoyo_acl_param *param)
+{
+ struct tomoyo_condition *entry = NULL;
+ struct tomoyo_condition_element *condp = NULL;
+ struct tomoyo_number_union *numbers_p = NULL;
+ struct tomoyo_name_union *names_p = NULL;
+ struct tomoyo_argv *argv = NULL;
+ struct tomoyo_envp *envp = NULL;
+ struct tomoyo_condition e = { };
+ char * const start_of_string = param->data;
+ char * const end_of_string = start_of_string + strlen(start_of_string);
+ char *pos;
+rerun:
+ pos = start_of_string;
+ while (1) {
+ u8 left = -1;
+ u8 right = -1;
+ char *left_word = pos;
+ char *cp;
+ char *right_word;
+ bool is_not;
+ if (!*left_word)
+ break;
+ /*
+ * Since left-hand condition does not allow use of "path_group"
+ * or "number_group" and environment variable's names do not
+ * accept '=', it is guaranteed that the original line consists
+ * of one or more repetition of $left$operator$right blocks
+ * where "$left is free from '=' and ' '" and "$operator is
+ * either '=' or '!='" and "$right is free from ' '".
+ * Therefore, we can reconstruct the original line at the end
+ * of dry run even if we overwrite $operator with '\0'.
+ */
+ cp = strchr(pos, ' ');
+ if (cp) {
+ *cp = '\0'; /* Will restore later. */
+ pos = cp + 1;
+ } else {
+ pos = "";
+ }
+ right_word = strchr(left_word, '=');
+ if (!right_word || right_word == left_word)
+ goto out;
+ is_not = *(right_word - 1) == '!';
+ if (is_not)
+ *(right_word++ - 1) = '\0'; /* Will restore later. */
+ else if (*(right_word + 1) != '=')
+ *right_word++ = '\0'; /* Will restore later. */
+ else
+ goto out;
+ dprintk(KERN_WARNING "%u: <%s>%s=<%s>\n", __LINE__, left_word,
+ is_not ? "!" : "", right_word);
+ if (!strncmp(left_word, "exec.argv[", 10)) {
+ if (!argv) {
+ e.argc++;
+ e.condc++;
+ } else {
+ e.argc--;
+ e.condc--;
+ left = TOMOYO_ARGV_ENTRY;
+ argv->is_not = is_not;
+ if (!tomoyo_parse_argv(left_word + 10,
+ right_word, argv++))
+ goto out;
+ }
+ goto store_value;
+ }
+ if (!strncmp(left_word, "exec.envp[\"", 11)) {
+ if (!envp) {
+ e.envc++;
+ e.condc++;
+ } else {
+ e.envc--;
+ e.condc--;
+ left = TOMOYO_ENVP_ENTRY;
+ envp->is_not = is_not;
+ if (!tomoyo_parse_envp(left_word + 11,
+ right_word, envp++))
+ goto out;
+ }
+ goto store_value;
+ }
+ left = tomoyo_condition_type(left_word);
+ dprintk(KERN_WARNING "%u: <%s> left=%u\n", __LINE__, left_word,
+ left);
+ if (left == TOMOYO_MAX_CONDITION_KEYWORD) {
+ if (!numbers_p) {
+ e.numbers_count++;
+ } else {
+ e.numbers_count--;
+ left = TOMOYO_NUMBER_UNION;
+ param->data = left_word;
+ if (*left_word == '@' ||
+ !tomoyo_parse_number_union(param,
+ numbers_p++))
+ goto out;
+ }
+ }
+ if (!condp)
+ e.condc++;
+ else
+ e.condc--;
+ if (left == TOMOYO_EXEC_REALPATH ||
+ left == TOMOYO_SYMLINK_TARGET) {
+ if (!names_p) {
+ e.names_count++;
+ } else {
+ e.names_count--;
+ right = TOMOYO_NAME_UNION;
+ param->data = right_word;
+ if (!tomoyo_parse_name_union_quoted(param,
+ names_p++))
+ goto out;
+ }
+ goto store_value;
+ }
+ right = tomoyo_condition_type(right_word);
+ if (right == TOMOYO_MAX_CONDITION_KEYWORD) {
+ if (!numbers_p) {
+ e.numbers_count++;
+ } else {
+ e.numbers_count--;
+ right = TOMOYO_NUMBER_UNION;
+ param->data = right_word;
+ if (!tomoyo_parse_number_union(param,
+ numbers_p++))
+ goto out;
+ }
+ }
+store_value:
+ if (!condp) {
+ dprintk(KERN_WARNING "%u: dry_run left=%u right=%u "
+ "match=%u\n", __LINE__, left, right, !is_not);
+ continue;
+ }
+ condp->left = left;
+ condp->right = right;
+ condp->equals = !is_not;
+ dprintk(KERN_WARNING "%u: left=%u right=%u match=%u\n",
+ __LINE__, condp->left, condp->right,
+ condp->equals);
+ condp++;
+ }
+ dprintk(KERN_INFO "%u: cond=%u numbers=%u names=%u ac=%u ec=%u\n",
+ __LINE__, e.condc, e.numbers_count, e.names_count, e.argc,
+ e.envc);
+ if (entry) {
+ BUG_ON(e.names_count | e.numbers_count | e.argc | e.envc |
+ e.condc);
+ return tomoyo_commit_condition(entry);
+ }
+ e.size = sizeof(*entry)
+ + e.condc * sizeof(struct tomoyo_condition_element)
+ + e.numbers_count * sizeof(struct tomoyo_number_union)
+ + e.names_count * sizeof(struct tomoyo_name_union)
+ + e.argc * sizeof(struct tomoyo_argv)
+ + e.envc * sizeof(struct tomoyo_envp);
+ entry = kzalloc(e.size, GFP_NOFS);
+ if (!entry)
+ return NULL;
+ *entry = e;
+ condp = (struct tomoyo_condition_element *) (entry + 1);
+ numbers_p = (struct tomoyo_number_union *) (condp + e.condc);
+ names_p = (struct tomoyo_name_union *) (numbers_p + e.numbers_count);
+ argv = (struct tomoyo_argv *) (names_p + e.names_count);
+ envp = (struct tomoyo_envp *) (argv + e.argc);
+ {
+ bool flag = false;
+ for (pos = start_of_string; pos < end_of_string; pos++) {
+ if (*pos)
+ continue;
+ if (flag) /* Restore " ". */
+ *pos = ' ';
+ else if (*(pos + 1) == '=') /* Restore "!=". */
+ *pos = '!';
+ else /* Restore "=". */
+ *pos = '=';
+ flag = !flag;
+ }
+ }
+ goto rerun;
+out:
+ dprintk(KERN_WARNING "%u: %s failed\n", __LINE__, __func__);
+ if (entry) {
+ tomoyo_del_condition(&entry->head.list);
+ kfree(entry);
+ }
+ return NULL;
+}
+
+/**
+ * tomoyo_get_attributes - Revalidate "struct inode".
+ *
+ * @obj: Pointer to "struct tomoyo_obj_info".
+ *
+ * Returns nothing.
+ */
+void tomoyo_get_attributes(struct tomoyo_obj_info *obj)
+{
+ u8 i;
+ struct dentry *dentry = NULL;
+
+ for (i = 0; i < TOMOYO_MAX_PATH_STAT; i++) {
+ struct inode *inode;
+ switch (i) {
+ case TOMOYO_PATH1:
+ dentry = obj->path1.dentry;
+ if (!dentry)
+ continue;
+ break;
+ case TOMOYO_PATH2:
+ dentry = obj->path2.dentry;
+ if (!dentry)
+ continue;
+ break;
+ default:
+ if (!dentry)
+ continue;
+ dentry = dget_parent(dentry);
+ break;
+ }
+ inode = dentry->d_inode;
+ if (inode) {
+ struct tomoyo_mini_stat *stat = &obj->stat[i];
+ stat->uid = inode->i_uid;
+ stat->gid = inode->i_gid;
+ stat->ino = inode->i_ino;
+ stat->mode = inode->i_mode;
+ stat->dev = inode->i_sb->s_dev;
+ stat->rdev = inode->i_rdev;
+ obj->stat_valid[i] = true;
+ }
+ if (i & 1) /* i == TOMOYO_PATH1_PARENT ||
+ i == TOMOYO_PATH2_PARENT */
+ dput(dentry);
+ }
+}
+
+/**
+ * tomoyo_condition - Check condition part.
+ *
+ * @r: Pointer to "struct tomoyo_request_info".
+ * @cond: Pointer to "struct tomoyo_condition". Maybe NULL.
+ *
+ * Returns true on success, false otherwise.
+ *
+ * Caller holds tomoyo_read_lock().
+ */
+bool tomoyo_condition(struct tomoyo_request_info *r,
+ const struct tomoyo_condition *cond)
+{
+ u32 i;
+ unsigned long min_v[2] = { 0, 0 };
+ unsigned long max_v[2] = { 0, 0 };
+ const struct tomoyo_condition_element *condp;
+ const struct tomoyo_number_union *numbers_p;
+ const struct tomoyo_name_union *names_p;
+ const struct tomoyo_argv *argv;
+ const struct tomoyo_envp *envp;
+ struct tomoyo_obj_info *obj;
+ u16 condc;
+ u16 argc;
+ u16 envc;
+ struct linux_binprm *bprm = NULL;
+ if (!cond)
+ return true;
+ condc = cond->condc;
+ argc = cond->argc;
+ envc = cond->envc;
+ obj = r->obj;
+ if (r->ee)
+ bprm = r->ee->bprm;
+ if (!bprm && (argc || envc))
+ return false;
+ condp = (struct tomoyo_condition_element *) (cond + 1);
+ numbers_p = (const struct tomoyo_number_union *) (condp + condc);
+ names_p = (const struct tomoyo_name_union *)
+ (numbers_p + cond->numbers_count);
+ argv = (const struct tomoyo_argv *) (names_p + cond->names_count);
+ envp = (const struct tomoyo_envp *) (argv + argc);
+ for (i = 0; i < condc; i++) {
+ const bool match = condp->equals;
+ const u8 left = condp->left;
+ const u8 right = condp->right;
+ bool is_bitop[2] = { false, false };
+ u8 j;
+ condp++;
+ /* Check argv[] and envp[] later. */
+ if (left == TOMOYO_ARGV_ENTRY || left == TOMOYO_ENVP_ENTRY)
+ continue;
+ /* Check string expressions. */
+ if (right == TOMOYO_NAME_UNION) {
+ const struct tomoyo_name_union *ptr = names_p++;
+ switch (left) {
+ struct tomoyo_path_info *symlink;
+ struct tomoyo_execve *ee;
+ struct file *file;
+ case TOMOYO_SYMLINK_TARGET:
+ symlink = obj ? obj->symlink_target : NULL;
+ if (!symlink ||
+ !tomoyo_compare_name_union(symlink, ptr)
+ == match)
+ goto out;
+ break;
+ case TOMOYO_EXEC_REALPATH:
+ ee = r->ee;
+ file = ee ? ee->bprm->file : NULL;
+ if (!tomoyo_scan_exec_realpath(file, ptr,
+ match))
+ goto out;
+ break;
+ }
+ continue;
+ }
+ /* Check numeric or bit-op expressions. */
+ for (j = 0; j < 2; j++) {
+ const u8 index = j ? right : left;
+ unsigned long value = 0;
+ switch (index) {
+ case TOMOYO_TASK_UID:
+ value = current_uid();
+ break;
+ case TOMOYO_TASK_EUID:
+ value = current_euid();
+ break;
+ case TOMOYO_TASK_SUID:
+ value = current_suid();
+ break;
+ case TOMOYO_TASK_FSUID:
+ value = current_fsuid();
+ break;
+ case TOMOYO_TASK_GID:
+ value = current_gid();
+ break;
+ case TOMOYO_TASK_EGID:
+ value = current_egid();
+ break;
+ case TOMOYO_TASK_SGID:
+ value = current_sgid();
+ break;
+ case TOMOYO_TASK_FSGID:
+ value = current_fsgid();
+ break;
+ case TOMOYO_TASK_PID:
+ value = tomoyo_sys_getpid();
+ break;
+ case TOMOYO_TASK_PPID:
+ value = tomoyo_sys_getppid();
+ break;
+ case TOMOYO_TYPE_IS_SOCKET:
+ value = S_IFSOCK;
+ break;
+ case TOMOYO_TYPE_IS_SYMLINK:
+ value = S_IFLNK;
+ break;
+ case TOMOYO_TYPE_IS_FILE:
+ value = S_IFREG;
+ break;
+ case TOMOYO_TYPE_IS_BLOCK_DEV:
+ value = S_IFBLK;
+ break;
+ case TOMOYO_TYPE_IS_DIRECTORY:
+ value = S_IFDIR;
+ break;
+ case TOMOYO_TYPE_IS_CHAR_DEV:
+ value = S_IFCHR;
+ break;
+ case TOMOYO_TYPE_IS_FIFO:
+ value = S_IFIFO;
+ break;
+ case TOMOYO_MODE_SETUID:
+ value = S_ISUID;
+ break;
+ case TOMOYO_MODE_SETGID:
+ value = S_ISGID;
+ break;
+ case TOMOYO_MODE_STICKY:
+ value = S_ISVTX;
+ break;
+ case TOMOYO_MODE_OWNER_READ:
+ value = S_IRUSR;
+ break;
+ case TOMOYO_MODE_OWNER_WRITE:
+ value = S_IWUSR;
+ break;
+ case TOMOYO_MODE_OWNER_EXECUTE:
+ value = S_IXUSR;
+ break;
+ case TOMOYO_MODE_GROUP_READ:
+ value = S_IRGRP;
+ break;
+ case TOMOYO_MODE_GROUP_WRITE:
+ value = S_IWGRP;
+ break;
+ case TOMOYO_MODE_GROUP_EXECUTE:
+ value = S_IXGRP;
+ break;
+ case TOMOYO_MODE_OTHERS_READ:
+ value = S_IROTH;
+ break;
+ case TOMOYO_MODE_OTHERS_WRITE:
+ value = S_IWOTH;
+ break;
+ case TOMOYO_MODE_OTHERS_EXECUTE:
+ value = S_IXOTH;
+ break;
+ case TOMOYO_EXEC_ARGC:
+ if (!bprm)
+ goto out;
+ value = bprm->argc;
+ break;
+ case TOMOYO_EXEC_ENVC:
+ if (!bprm)
+ goto out;
+ value = bprm->envc;
+ break;
+ case TOMOYO_NUMBER_UNION:
+ /* Fetch values later. */
+ break;
+ default:
+ if (!obj)
+ goto out;
+ if (!obj->validate_done) {
+ tomoyo_get_attributes(obj);
+ obj->validate_done = true;
+ }
+ {
+ u8 stat_index;
+ struct tomoyo_mini_stat *stat;
+ switch (index) {
+ case TOMOYO_PATH1_UID:
+ case TOMOYO_PATH1_GID:
+ case TOMOYO_PATH1_INO:
+ case TOMOYO_PATH1_MAJOR:
+ case TOMOYO_PATH1_MINOR:
+ case TOMOYO_PATH1_TYPE:
+ case TOMOYO_PATH1_DEV_MAJOR:
+ case TOMOYO_PATH1_DEV_MINOR:
+ case TOMOYO_PATH1_PERM:
+ stat_index = TOMOYO_PATH1;
+ break;
+ case TOMOYO_PATH2_UID:
+ case TOMOYO_PATH2_GID:
+ case TOMOYO_PATH2_INO:
+ case TOMOYO_PATH2_MAJOR:
+ case TOMOYO_PATH2_MINOR:
+ case TOMOYO_PATH2_TYPE:
+ case TOMOYO_PATH2_DEV_MAJOR:
+ case TOMOYO_PATH2_DEV_MINOR:
+ case TOMOYO_PATH2_PERM:
+ stat_index = TOMOYO_PATH2;
+ break;
+ case TOMOYO_PATH1_PARENT_UID:
+ case TOMOYO_PATH1_PARENT_GID:
+ case TOMOYO_PATH1_PARENT_INO:
+ case TOMOYO_PATH1_PARENT_PERM:
+ stat_index =
+ TOMOYO_PATH1_PARENT;
+ break;
+ case TOMOYO_PATH2_PARENT_UID:
+ case TOMOYO_PATH2_PARENT_GID:
+ case TOMOYO_PATH2_PARENT_INO:
+ case TOMOYO_PATH2_PARENT_PERM:
+ stat_index =
+ TOMOYO_PATH2_PARENT;
+ break;
+ default:
+ goto out;
+ }
+ if (!obj->stat_valid[stat_index])
+ goto out;
+ stat = &obj->stat[stat_index];
+ switch (index) {
+ case TOMOYO_PATH1_UID:
+ case TOMOYO_PATH2_UID:
+ case TOMOYO_PATH1_PARENT_UID:
+ case TOMOYO_PATH2_PARENT_UID:
+ value = stat->uid;
+ break;
+ case TOMOYO_PATH1_GID:
+ case TOMOYO_PATH2_GID:
+ case TOMOYO_PATH1_PARENT_GID:
+ case TOMOYO_PATH2_PARENT_GID:
+ value = stat->gid;
+ break;
+ case TOMOYO_PATH1_INO:
+ case TOMOYO_PATH2_INO:
+ case TOMOYO_PATH1_PARENT_INO:
+ case TOMOYO_PATH2_PARENT_INO:
+ value = stat->ino;
+ break;
+ case TOMOYO_PATH1_MAJOR:
+ case TOMOYO_PATH2_MAJOR:
+ value = MAJOR(stat->dev);
+ break;
+ case TOMOYO_PATH1_MINOR:
+ case TOMOYO_PATH2_MINOR:
+ value = MINOR(stat->dev);
+ break;
+ case TOMOYO_PATH1_TYPE:
+ case TOMOYO_PATH2_TYPE:
+ value = stat->mode & S_IFMT;
+ break;
+ case TOMOYO_PATH1_DEV_MAJOR:
+ case TOMOYO_PATH2_DEV_MAJOR:
+ value = MAJOR(stat->rdev);
+ break;
+ case TOMOYO_PATH1_DEV_MINOR:
+ case TOMOYO_PATH2_DEV_MINOR:
+ value = MINOR(stat->rdev);
+ break;
+ case TOMOYO_PATH1_PERM:
+ case TOMOYO_PATH2_PERM:
+ case TOMOYO_PATH1_PARENT_PERM:
+ case TOMOYO_PATH2_PARENT_PERM:
+ value = stat->mode & S_IALLUGO;
+ break;
+ }
+ }
+ break;
+ }
+ max_v[j] = value;
+ min_v[j] = value;
+ switch (index) {
+ case TOMOYO_MODE_SETUID:
+ case TOMOYO_MODE_SETGID:
+ case TOMOYO_MODE_STICKY:
+ case TOMOYO_MODE_OWNER_READ:
+ case TOMOYO_MODE_OWNER_WRITE:
+ case TOMOYO_MODE_OWNER_EXECUTE:
+ case TOMOYO_MODE_GROUP_READ:
+ case TOMOYO_MODE_GROUP_WRITE:
+ case TOMOYO_MODE_GROUP_EXECUTE:
+ case TOMOYO_MODE_OTHERS_READ:
+ case TOMOYO_MODE_OTHERS_WRITE:
+ case TOMOYO_MODE_OTHERS_EXECUTE:
+ is_bitop[j] = true;
+ }
+ }
+ if (left == TOMOYO_NUMBER_UNION) {
+ /* Fetch values now. */
+ const struct tomoyo_number_union *ptr = numbers_p++;
+ min_v[0] = ptr->values[0];
+ max_v[0] = ptr->values[1];
+ }
+ if (right == TOMOYO_NUMBER_UNION) {
+ /* Fetch values now. */
+ const struct tomoyo_number_union *ptr = numbers_p++;
+ if (ptr->group) {
+ if (tomoyo_number_matches_group(min_v[0],
+ max_v[0],
+ ptr->group)
+ == match)
+ continue;
+ } else {
+ if ((min_v[0] <= ptr->values[1] &&
+ max_v[0] >= ptr->values[0]) == match)
+ continue;
+ }
+ goto out;
+ }
+ /*
+ * Bit operation is valid only when counterpart value
+ * represents permission.
+ */
+ if (is_bitop[0] && is_bitop[1]) {
+ goto out;
+ } else if (is_bitop[0]) {
+ switch (right) {
+ case TOMOYO_PATH1_PERM:
+ case TOMOYO_PATH1_PARENT_PERM:
+ case TOMOYO_PATH2_PERM:
+ case TOMOYO_PATH2_PARENT_PERM:
+ if (!(max_v[0] & max_v[1]) == !match)
+ continue;
+ }
+ goto out;
+ } else if (is_bitop[1]) {
+ switch (left) {
+ case TOMOYO_PATH1_PERM:
+ case TOMOYO_PATH1_PARENT_PERM:
+ case TOMOYO_PATH2_PERM:
+ case TOMOYO_PATH2_PARENT_PERM:
+ if (!(max_v[0] & max_v[1]) == !match)
+ continue;
+ }
+ goto out;
+ }
+ /* Normal value range comparison. */
+ if ((min_v[0] <= max_v[1] && max_v[0] >= min_v[1]) == match)
+ continue;
+out:
+ return false;
+ }
+ /* Check argv[] and envp[] now. */
+ if (r->ee && (argc || envc))
+ return tomoyo_scan_bprm(r->ee, argc, argv, envc, envp);
+ return true;
+}
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 3538840..cd0f92d 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -1,9 +1,7 @@
/*
* security/tomoyo/domain.c
*
- * Domain transition functions for TOMOYO.
- *
- * Copyright (C) 2005-2010 NTT DATA CORPORATION
+ * Copyright (C) 2005-2011 NTT DATA CORPORATION
*/
#include "common.h"
@@ -20,8 +18,7 @@
*
* @new_entry: Pointer to "struct tomoyo_acl_info".
* @size: Size of @new_entry in bytes.
- * @is_delete: True if it is a delete request.
- * @list: Pointer to "struct list_head".
+ * @param: Pointer to "struct tomoyo_acl_param".
* @check_duplicate: Callback function to find duplicated entry.
*
* Returns 0 on success, negative value otherwise.
@@ -29,25 +26,26 @@
* Caller holds tomoyo_read_lock().
*/
int tomoyo_update_policy(struct tomoyo_acl_head *new_entry, const int size,
- bool is_delete, struct list_head *list,
+ struct tomoyo_acl_param *param,
bool (*check_duplicate) (const struct tomoyo_acl_head
*,
const struct tomoyo_acl_head
*))
{
- int error = is_delete ? -ENOENT : -ENOMEM;
+ int error = param->is_delete ? -ENOENT : -ENOMEM;
struct tomoyo_acl_head *entry;
+ struct list_head *list = param->list;
if (mutex_lock_interruptible(&tomoyo_policy_lock))
return -ENOMEM;
list_for_each_entry_rcu(entry, list, list) {
if (!check_duplicate(entry, new_entry))
continue;
- entry->is_deleted = is_delete;
+ entry->is_deleted = param->is_delete;
error = 0;
break;
}
- if (error && !is_delete) {
+ if (error && !param->is_delete) {
entry = tomoyo_commit_ok(new_entry, size);
if (entry) {
list_add_tail_rcu(&entry->list, list);
@@ -59,12 +57,25 @@
}
/**
+ * tomoyo_same_acl_head - Check for duplicated "struct tomoyo_acl_info" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
+static inline bool tomoyo_same_acl_head(const struct tomoyo_acl_info *a,
+ const struct tomoyo_acl_info *b)
+{
+ return a->type == b->type && a->cond == b->cond;
+}
+
+/**
* tomoyo_update_domain - Update an entry for domain policy.
*
* @new_entry: Pointer to "struct tomoyo_acl_info".
* @size: Size of @new_entry in bytes.
- * @is_delete: True if it is a delete request.
- * @domain: Pointer to "struct tomoyo_domain_info".
+ * @param: Pointer to "struct tomoyo_acl_param".
* @check_duplicate: Callback function to find duplicated entry.
* @merge_duplicate: Callback function to merge duplicated entry.
*
@@ -73,7 +84,7 @@
* Caller holds tomoyo_read_lock().
*/
int tomoyo_update_domain(struct tomoyo_acl_info *new_entry, const int size,
- bool is_delete, struct tomoyo_domain_info *domain,
+ struct tomoyo_acl_param *param,
bool (*check_duplicate) (const struct tomoyo_acl_info
*,
const struct tomoyo_acl_info
@@ -82,13 +93,21 @@
struct tomoyo_acl_info *,
const bool))
{
+ const bool is_delete = param->is_delete;
int error = is_delete ? -ENOENT : -ENOMEM;
struct tomoyo_acl_info *entry;
+ struct list_head * const list = param->list;
+ if (param->data[0]) {
+ new_entry->cond = tomoyo_get_condition(param);
+ if (!new_entry->cond)
+ return -EINVAL;
+ }
if (mutex_lock_interruptible(&tomoyo_policy_lock))
- return error;
- list_for_each_entry_rcu(entry, &domain->acl_info_list, list) {
- if (!check_duplicate(entry, new_entry))
+ goto out;
+ list_for_each_entry_rcu(entry, list, list) {
+ if (!tomoyo_same_acl_head(entry, new_entry) ||
+ !check_duplicate(entry, new_entry))
continue;
if (merge_duplicate)
entry->is_deleted = merge_duplicate(entry, new_entry,
@@ -101,28 +120,50 @@
if (error && !is_delete) {
entry = tomoyo_commit_ok(new_entry, size);
if (entry) {
- list_add_tail_rcu(&entry->list, &domain->acl_info_list);
+ list_add_tail_rcu(&entry->list, list);
error = 0;
}
}
mutex_unlock(&tomoyo_policy_lock);
+out:
+ tomoyo_put_condition(new_entry->cond);
return error;
}
+/**
+ * tomoyo_check_acl - Do permission check.
+ *
+ * @r: Pointer to "struct tomoyo_request_info".
+ * @check_entry: Callback function to check type specific parameters.
+ *
+ * Returns 0 on success, negative value otherwise.
+ *
+ * Caller holds tomoyo_read_lock().
+ */
void tomoyo_check_acl(struct tomoyo_request_info *r,
bool (*check_entry) (struct tomoyo_request_info *,
const struct tomoyo_acl_info *))
{
const struct tomoyo_domain_info *domain = r->domain;
struct tomoyo_acl_info *ptr;
+ bool retried = false;
+ const struct list_head *list = &domain->acl_info_list;
- list_for_each_entry_rcu(ptr, &domain->acl_info_list, list) {
+retry:
+ list_for_each_entry_rcu(ptr, list, list) {
if (ptr->is_deleted || ptr->type != r->param_type)
continue;
- if (check_entry(r, ptr)) {
- r->granted = true;
- return;
- }
+ if (!check_entry(r, ptr))
+ continue;
+ if (!tomoyo_condition(r, ptr->cond))
+ continue;
+ r->granted = true;
+ return;
+ }
+ if (!retried) {
+ retried = true;
+ list = &domain->ns->acl_group[domain->group];
+ goto retry;
}
r->granted = false;
}
@@ -130,24 +171,29 @@
/* The list for "struct tomoyo_domain_info". */
LIST_HEAD(tomoyo_domain_list);
-struct list_head tomoyo_policy_list[TOMOYO_MAX_POLICY];
-struct list_head tomoyo_group_list[TOMOYO_MAX_GROUP];
-
/**
* tomoyo_last_word - Get last component of a domainname.
*
- * @domainname: Domainname to check.
+ * @name: Domainname to check.
*
* Returns the last word of @domainname.
*/
static const char *tomoyo_last_word(const char *name)
{
- const char *cp = strrchr(name, ' ');
- if (cp)
- return cp + 1;
- return name;
+ const char *cp = strrchr(name, ' ');
+ if (cp)
+ return cp + 1;
+ return name;
}
+/**
+ * tomoyo_same_transition_control - Check for duplicated "struct tomoyo_transition_control" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_head".
+ * @b: Pointer to "struct tomoyo_acl_head".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
static bool tomoyo_same_transition_control(const struct tomoyo_acl_head *a,
const struct tomoyo_acl_head *b)
{
@@ -163,30 +209,36 @@
}
/**
- * tomoyo_update_transition_control_entry - Update "struct tomoyo_transition_control" list.
+ * tomoyo_write_transition_control - Write "struct tomoyo_transition_control" list.
*
- * @domainname: The name of domain. Maybe NULL.
- * @program: The name of program. Maybe NULL.
- * @type: Type of transition.
- * @is_delete: True if it is a delete request.
+ * @param: Pointer to "struct tomoyo_acl_param".
+ * @type: Type of this entry.
*
* Returns 0 on success, negative value otherwise.
*/
-static int tomoyo_update_transition_control_entry(const char *domainname,
- const char *program,
- const u8 type,
- const bool is_delete)
+int tomoyo_write_transition_control(struct tomoyo_acl_param *param,
+ const u8 type)
{
struct tomoyo_transition_control e = { .type = type };
- int error = is_delete ? -ENOENT : -ENOMEM;
- if (program) {
+ int error = param->is_delete ? -ENOENT : -ENOMEM;
+ char *program = param->data;
+ char *domainname = strstr(program, " from ");
+ if (domainname) {
+ *domainname = '\0';
+ domainname += 6;
+ } else if (type == TOMOYO_TRANSITION_CONTROL_NO_KEEP ||
+ type == TOMOYO_TRANSITION_CONTROL_KEEP) {
+ domainname = program;
+ program = NULL;
+ }
+ if (program && strcmp(program, "any")) {
if (!tomoyo_correct_path(program))
return -EINVAL;
e.program = tomoyo_get_name(program);
if (!e.program)
goto out;
}
- if (domainname) {
+ if (domainname && strcmp(domainname, "any")) {
if (!tomoyo_correct_domain(domainname)) {
if (!tomoyo_correct_path(domainname))
goto out;
@@ -196,126 +248,136 @@
if (!e.domainname)
goto out;
}
- error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
- &tomoyo_policy_list
- [TOMOYO_ID_TRANSITION_CONTROL],
+ param->list = ¶m->ns->policy_list[TOMOYO_ID_TRANSITION_CONTROL];
+ error = tomoyo_update_policy(&e.head, sizeof(e), param,
tomoyo_same_transition_control);
- out:
+out:
tomoyo_put_name(e.domainname);
tomoyo_put_name(e.program);
return error;
}
/**
- * tomoyo_write_transition_control - Write "struct tomoyo_transition_control" list.
+ * tomoyo_scan_transition - Try to find specific domain transition type.
*
- * @data: String to parse.
- * @is_delete: True if it is a delete request.
- * @type: Type of this entry.
+ * @list: Pointer to "struct list_head".
+ * @domainname: The name of current domain.
+ * @program: The name of requested program.
+ * @last_name: The last component of @domainname.
+ * @type: One of values in "enum tomoyo_transition_type".
*
- * Returns 0 on success, negative value otherwise.
+ * Returns true if found one, false otherwise.
+ *
+ * Caller holds tomoyo_read_lock().
*/
-int tomoyo_write_transition_control(char *data, const bool is_delete,
- const u8 type)
+static inline bool tomoyo_scan_transition
+(const struct list_head *list, const struct tomoyo_path_info *domainname,
+ const struct tomoyo_path_info *program, const char *last_name,
+ const enum tomoyo_transition_type type)
{
- char *domainname = strstr(data, " from ");
- if (domainname) {
- *domainname = '\0';
- domainname += 6;
- } else if (type == TOMOYO_TRANSITION_CONTROL_NO_KEEP ||
- type == TOMOYO_TRANSITION_CONTROL_KEEP) {
- domainname = data;
- data = NULL;
+ const struct tomoyo_transition_control *ptr;
+ list_for_each_entry_rcu(ptr, list, head.list) {
+ if (ptr->head.is_deleted || ptr->type != type)
+ continue;
+ if (ptr->domainname) {
+ if (!ptr->is_last_name) {
+ if (ptr->domainname != domainname)
+ continue;
+ } else {
+ /*
+ * Use direct strcmp() since this is
+ * unlikely used.
+ */
+ if (strcmp(ptr->domainname->name, last_name))
+ continue;
+ }
+ }
+ if (ptr->program && tomoyo_pathcmp(ptr->program, program))
+ continue;
+ return true;
}
- return tomoyo_update_transition_control_entry(domainname, data, type,
- is_delete);
+ return false;
}
/**
* tomoyo_transition_type - Get domain transition type.
*
- * @domainname: The name of domain.
- * @program: The name of program.
+ * @ns: Pointer to "struct tomoyo_policy_namespace".
+ * @domainname: The name of current domain.
+ * @program: The name of requested program.
*
- * Returns TOMOYO_TRANSITION_CONTROL_INITIALIZE if executing @program
- * reinitializes domain transition, TOMOYO_TRANSITION_CONTROL_KEEP if executing
- * @program suppresses domain transition, others otherwise.
+ * Returns TOMOYO_TRANSITION_CONTROL_TRANSIT if executing @program causes
+ * domain transition across namespaces, TOMOYO_TRANSITION_CONTROL_INITIALIZE if
+ * executing @program reinitializes domain transition within that namespace,
+ * TOMOYO_TRANSITION_CONTROL_KEEP if executing @program stays at @domainname ,
+ * others otherwise.
*
* Caller holds tomoyo_read_lock().
*/
-static u8 tomoyo_transition_type(const struct tomoyo_path_info *domainname,
- const struct tomoyo_path_info *program)
+static enum tomoyo_transition_type tomoyo_transition_type
+(const struct tomoyo_policy_namespace *ns,
+ const struct tomoyo_path_info *domainname,
+ const struct tomoyo_path_info *program)
{
- const struct tomoyo_transition_control *ptr;
const char *last_name = tomoyo_last_word(domainname->name);
- u8 type;
- for (type = 0; type < TOMOYO_MAX_TRANSITION_TYPE; type++) {
- next:
- list_for_each_entry_rcu(ptr, &tomoyo_policy_list
- [TOMOYO_ID_TRANSITION_CONTROL],
- head.list) {
- if (ptr->head.is_deleted || ptr->type != type)
- continue;
- if (ptr->domainname) {
- if (!ptr->is_last_name) {
- if (ptr->domainname != domainname)
- continue;
- } else {
- /*
- * Use direct strcmp() since this is
- * unlikely used.
- */
- if (strcmp(ptr->domainname->name,
- last_name))
- continue;
- }
- }
- if (ptr->program &&
- tomoyo_pathcmp(ptr->program, program))
- continue;
- if (type == TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE) {
- /*
- * Do not check for initialize_domain if
- * no_initialize_domain matched.
- */
- type = TOMOYO_TRANSITION_CONTROL_NO_KEEP;
- goto next;
- }
- goto done;
+ enum tomoyo_transition_type type = TOMOYO_TRANSITION_CONTROL_NO_RESET;
+ while (type < TOMOYO_MAX_TRANSITION_TYPE) {
+ const struct list_head * const list =
+ &ns->policy_list[TOMOYO_ID_TRANSITION_CONTROL];
+ if (!tomoyo_scan_transition(list, domainname, program,
+ last_name, type)) {
+ type++;
+ continue;
}
+ if (type != TOMOYO_TRANSITION_CONTROL_NO_RESET &&
+ type != TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE)
+ break;
+ /*
+ * Do not check for reset_domain if no_reset_domain matched.
+ * Do not check for initialize_domain if no_initialize_domain
+ * matched.
+ */
+ type++;
+ type++;
}
- done:
return type;
}
+/**
+ * tomoyo_same_aggregator - Check for duplicated "struct tomoyo_aggregator" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_head".
+ * @b: Pointer to "struct tomoyo_acl_head".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
static bool tomoyo_same_aggregator(const struct tomoyo_acl_head *a,
const struct tomoyo_acl_head *b)
{
- const struct tomoyo_aggregator *p1 = container_of(a, typeof(*p1), head);
- const struct tomoyo_aggregator *p2 = container_of(b, typeof(*p2), head);
+ const struct tomoyo_aggregator *p1 = container_of(a, typeof(*p1),
+ head);
+ const struct tomoyo_aggregator *p2 = container_of(b, typeof(*p2),
+ head);
return p1->original_name == p2->original_name &&
p1->aggregated_name == p2->aggregated_name;
}
/**
- * tomoyo_update_aggregator_entry - Update "struct tomoyo_aggregator" list.
+ * tomoyo_write_aggregator - Write "struct tomoyo_aggregator" list.
*
- * @original_name: The original program's name.
- * @aggregated_name: The program name to use.
- * @is_delete: True if it is a delete request.
+ * @param: Pointer to "struct tomoyo_acl_param".
*
* Returns 0 on success, negative value otherwise.
*
* Caller holds tomoyo_read_lock().
*/
-static int tomoyo_update_aggregator_entry(const char *original_name,
- const char *aggregated_name,
- const bool is_delete)
+int tomoyo_write_aggregator(struct tomoyo_acl_param *param)
{
struct tomoyo_aggregator e = { };
- int error = is_delete ? -ENOENT : -ENOMEM;
-
- if (!tomoyo_correct_path(original_name) ||
+ int error = param->is_delete ? -ENOENT : -ENOMEM;
+ const char *original_name = tomoyo_read_token(param);
+ const char *aggregated_name = tomoyo_read_token(param);
+ if (!tomoyo_correct_word(original_name) ||
!tomoyo_correct_path(aggregated_name))
return -EINVAL;
e.original_name = tomoyo_get_name(original_name);
@@ -323,83 +385,181 @@
if (!e.original_name || !e.aggregated_name ||
e.aggregated_name->is_patterned) /* No patterns allowed. */
goto out;
- error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
- &tomoyo_policy_list[TOMOYO_ID_AGGREGATOR],
+ param->list = ¶m->ns->policy_list[TOMOYO_ID_AGGREGATOR];
+ error = tomoyo_update_policy(&e.head, sizeof(e), param,
tomoyo_same_aggregator);
- out:
+out:
tomoyo_put_name(e.original_name);
tomoyo_put_name(e.aggregated_name);
return error;
}
/**
- * tomoyo_write_aggregator - Write "struct tomoyo_aggregator" list.
+ * tomoyo_find_namespace - Find specified namespace.
*
- * @data: String to parse.
- * @is_delete: True if it is a delete request.
+ * @name: Name of namespace to find.
+ * @len: Length of @name.
*
- * Returns 0 on success, negative value otherwise.
+ * Returns pointer to "struct tomoyo_policy_namespace" if found,
+ * NULL otherwise.
*
* Caller holds tomoyo_read_lock().
*/
-int tomoyo_write_aggregator(char *data, const bool is_delete)
+static struct tomoyo_policy_namespace *tomoyo_find_namespace
+(const char *name, const unsigned int len)
{
- char *cp = strchr(data, ' ');
-
- if (!cp)
- return -EINVAL;
- *cp++ = '\0';
- return tomoyo_update_aggregator_entry(data, cp, is_delete);
+ struct tomoyo_policy_namespace *ns;
+ list_for_each_entry(ns, &tomoyo_namespace_list, namespace_list) {
+ if (strncmp(name, ns->name, len) ||
+ (name[len] && name[len] != ' '))
+ continue;
+ return ns;
+ }
+ return NULL;
}
/**
- * tomoyo_assign_domain - Create a domain.
+ * tomoyo_assign_namespace - Create a new namespace.
+ *
+ * @domainname: Name of namespace to create.
+ *
+ * Returns pointer to "struct tomoyo_policy_namespace" on success,
+ * NULL otherwise.
+ *
+ * Caller holds tomoyo_read_lock().
+ */
+struct tomoyo_policy_namespace *tomoyo_assign_namespace(const char *domainname)
+{
+ struct tomoyo_policy_namespace *ptr;
+ struct tomoyo_policy_namespace *entry;
+ const char *cp = domainname;
+ unsigned int len = 0;
+ while (*cp && *cp++ != ' ')
+ len++;
+ ptr = tomoyo_find_namespace(domainname, len);
+ if (ptr)
+ return ptr;
+ if (len >= TOMOYO_EXEC_TMPSIZE - 10 || !tomoyo_domain_def(domainname))
+ return NULL;
+ entry = kzalloc(sizeof(*entry) + len + 1, GFP_NOFS);
+ if (!entry)
+ return NULL;
+ if (mutex_lock_interruptible(&tomoyo_policy_lock))
+ goto out;
+ ptr = tomoyo_find_namespace(domainname, len);
+ if (!ptr && tomoyo_memory_ok(entry)) {
+ char *name = (char *) (entry + 1);
+ ptr = entry;
+ memmove(name, domainname, len);
+ name[len] = '\0';
+ entry->name = name;
+ tomoyo_init_policy_namespace(entry);
+ entry = NULL;
+ }
+ mutex_unlock(&tomoyo_policy_lock);
+out:
+ kfree(entry);
+ return ptr;
+}
+
+/**
+ * tomoyo_namespace_jump - Check for namespace jump.
+ *
+ * @domainname: Name of domain.
+ *
+ * Returns true if namespace differs, false otherwise.
+ */
+static bool tomoyo_namespace_jump(const char *domainname)
+{
+ const char *namespace = tomoyo_current_namespace()->name;
+ const int len = strlen(namespace);
+ return strncmp(domainname, namespace, len) ||
+ (domainname[len] && domainname[len] != ' ');
+}
+
+/**
+ * tomoyo_assign_domain - Create a domain or a namespace.
*
* @domainname: The name of domain.
- * @profile: Profile number to assign if the domain was newly created.
+ * @transit: True if transit to domain found or created.
*
* Returns pointer to "struct tomoyo_domain_info" on success, NULL otherwise.
*
* Caller holds tomoyo_read_lock().
*/
struct tomoyo_domain_info *tomoyo_assign_domain(const char *domainname,
- const u8 profile)
+ const bool transit)
{
- struct tomoyo_domain_info *entry;
- struct tomoyo_domain_info *domain = NULL;
- const struct tomoyo_path_info *saved_domainname;
- bool found = false;
-
- if (!tomoyo_correct_domain(domainname))
+ struct tomoyo_domain_info e = { };
+ struct tomoyo_domain_info *entry = tomoyo_find_domain(domainname);
+ bool created = false;
+ if (entry) {
+ if (transit) {
+ /*
+ * Since namespace is created at runtime, profiles may
+ * not be created by the moment the process transits to
+ * that domain. Do not perform domain transition if
+ * profile for that domain is not yet created.
+ */
+ if (!entry->ns->profile_ptr[entry->profile])
+ return NULL;
+ }
+ return entry;
+ }
+ /* Requested domain does not exist. */
+ /* Don't create requested domain if domainname is invalid. */
+ if (strlen(domainname) >= TOMOYO_EXEC_TMPSIZE - 10 ||
+ !tomoyo_correct_domain(domainname))
return NULL;
- saved_domainname = tomoyo_get_name(domainname);
- if (!saved_domainname)
+ /*
+ * Since definition of profiles and acl_groups may differ across
+ * namespaces, do not inherit "use_profile" and "use_group" settings
+ * by automatically creating requested domain upon domain transition.
+ */
+ if (transit && tomoyo_namespace_jump(domainname))
return NULL;
- entry = kzalloc(sizeof(*entry), GFP_NOFS);
+ e.ns = tomoyo_assign_namespace(domainname);
+ if (!e.ns)
+ return NULL;
+ /*
+ * "use_profile" and "use_group" settings for automatically created
+ * domains are inherited from current domain. These are 0 for manually
+ * created domains.
+ */
+ if (transit) {
+ const struct tomoyo_domain_info *domain = tomoyo_domain();
+ e.profile = domain->profile;
+ e.group = domain->group;
+ }
+ e.domainname = tomoyo_get_name(domainname);
+ if (!e.domainname)
+ return NULL;
if (mutex_lock_interruptible(&tomoyo_policy_lock))
goto out;
- list_for_each_entry_rcu(domain, &tomoyo_domain_list, list) {
- if (domain->is_deleted ||
- tomoyo_pathcmp(saved_domainname, domain->domainname))
- continue;
- found = true;
- break;
- }
- if (!found && tomoyo_memory_ok(entry)) {
- INIT_LIST_HEAD(&entry->acl_info_list);
- entry->domainname = saved_domainname;
- saved_domainname = NULL;
- entry->profile = profile;
- list_add_tail_rcu(&entry->list, &tomoyo_domain_list);
- domain = entry;
- entry = NULL;
- found = true;
+ entry = tomoyo_find_domain(domainname);
+ if (!entry) {
+ entry = tomoyo_commit_ok(&e, sizeof(e));
+ if (entry) {
+ INIT_LIST_HEAD(&entry->acl_info_list);
+ list_add_tail_rcu(&entry->list, &tomoyo_domain_list);
+ created = true;
+ }
}
mutex_unlock(&tomoyo_policy_lock);
- out:
- tomoyo_put_name(saved_domainname);
- kfree(entry);
- return found ? domain : NULL;
+out:
+ tomoyo_put_name(e.domainname);
+ if (entry && transit) {
+ if (created) {
+ struct tomoyo_request_info r;
+ tomoyo_init_request_info(&r, entry,
+ TOMOYO_MAC_FILE_EXECUTE);
+ r.granted = false;
+ tomoyo_write_log(&r, "use_profile %u\n",
+ entry->profile);
+ tomoyo_write_log(&r, "use_group %u\n", entry->group);
+ }
+ }
+ return entry;
}
/**
@@ -413,22 +573,27 @@
*/
int tomoyo_find_next_domain(struct linux_binprm *bprm)
{
- struct tomoyo_request_info r;
- char *tmp = kzalloc(TOMOYO_EXEC_TMPSIZE, GFP_NOFS);
struct tomoyo_domain_info *old_domain = tomoyo_domain();
struct tomoyo_domain_info *domain = NULL;
const char *original_name = bprm->filename;
- u8 mode;
- bool is_enforce;
int retval = -ENOMEM;
bool need_kfree = false;
+ bool reject_on_transition_failure = false;
struct tomoyo_path_info rn = { }; /* real name */
-
- mode = tomoyo_init_request_info(&r, NULL, TOMOYO_MAC_FILE_EXECUTE);
- is_enforce = (mode == TOMOYO_CONFIG_ENFORCING);
- if (!tmp)
- goto out;
-
+ struct tomoyo_execve *ee = kzalloc(sizeof(*ee), GFP_NOFS);
+ if (!ee)
+ return -ENOMEM;
+ ee->tmp = kzalloc(TOMOYO_EXEC_TMPSIZE, GFP_NOFS);
+ if (!ee->tmp) {
+ kfree(ee);
+ return -ENOMEM;
+ }
+ /* ee->dump->data is allocated by tomoyo_dump_page(). */
+ tomoyo_init_request_info(&ee->r, NULL, TOMOYO_MAC_FILE_EXECUTE);
+ ee->r.ee = ee;
+ ee->bprm = bprm;
+ ee->r.obj = &ee->obj;
+ ee->obj.path1 = bprm->file->f_path;
retry:
if (need_kfree) {
kfree(rn.name);
@@ -445,8 +610,10 @@
/* Check 'aggregator' directive. */
{
struct tomoyo_aggregator *ptr;
- list_for_each_entry_rcu(ptr, &tomoyo_policy_list
- [TOMOYO_ID_AGGREGATOR], head.list) {
+ struct list_head *list =
+ &old_domain->ns->policy_list[TOMOYO_ID_AGGREGATOR];
+ /* Check 'aggregator' directive. */
+ list_for_each_entry_rcu(ptr, list, head.list) {
if (ptr->head.is_deleted ||
!tomoyo_path_matches_pattern(&rn,
ptr->original_name))
@@ -460,7 +627,7 @@
}
/* Check execute permission. */
- retval = tomoyo_path_permission(&r, TOMOYO_TYPE_EXECUTE, &rn);
+ retval = tomoyo_path_permission(&ee->r, TOMOYO_TYPE_EXECUTE, &rn);
if (retval == TOMOYO_RETRY_REQUEST)
goto retry;
if (retval < 0)
@@ -471,20 +638,30 @@
* wildcard) rather than the pathname passed to execve()
* (which never contains wildcard).
*/
- if (r.param.path.matched_path) {
+ if (ee->r.param.path.matched_path) {
if (need_kfree)
kfree(rn.name);
need_kfree = false;
/* This is OK because it is read only. */
- rn = *r.param.path.matched_path;
+ rn = *ee->r.param.path.matched_path;
}
/* Calculate domain to transit to. */
- switch (tomoyo_transition_type(old_domain->domainname, &rn)) {
+ switch (tomoyo_transition_type(old_domain->ns, old_domain->domainname,
+ &rn)) {
+ case TOMOYO_TRANSITION_CONTROL_RESET:
+ /* Transit to the root of specified namespace. */
+ snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "<%s>", rn.name);
+ /*
+ * Make do_execve() fail if domain transition across namespaces
+ * has failed.
+ */
+ reject_on_transition_failure = true;
+ break;
case TOMOYO_TRANSITION_CONTROL_INITIALIZE:
- /* Transit to the child of tomoyo_kernel_domain domain. */
- snprintf(tmp, TOMOYO_EXEC_TMPSIZE - 1, TOMOYO_ROOT_NAME " "
- "%s", rn.name);
+ /* Transit to the child of current namespace's root. */
+ snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "%s %s",
+ old_domain->ns->name, rn.name);
break;
case TOMOYO_TRANSITION_CONTROL_KEEP:
/* Keep current domain. */
@@ -502,33 +679,32 @@
domain = old_domain;
} else {
/* Normal domain transition. */
- snprintf(tmp, TOMOYO_EXEC_TMPSIZE - 1, "%s %s",
+ snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "%s %s",
old_domain->domainname->name, rn.name);
}
break;
}
- if (domain || strlen(tmp) >= TOMOYO_EXEC_TMPSIZE - 10)
- goto done;
- domain = tomoyo_find_domain(tmp);
+ if (!domain)
+ domain = tomoyo_assign_domain(ee->tmp, true);
if (domain)
- goto done;
- if (is_enforce) {
- int error = tomoyo_supervisor(&r, "# wants to create domain\n"
- "%s\n", tmp);
- if (error == TOMOYO_RETRY_REQUEST)
- goto retry;
- if (error < 0)
- goto done;
+ retval = 0;
+ else if (reject_on_transition_failure) {
+ printk(KERN_WARNING "ERROR: Domain '%s' not ready.\n",
+ ee->tmp);
+ retval = -ENOMEM;
+ } else if (ee->r.mode == TOMOYO_CONFIG_ENFORCING)
+ retval = -ENOMEM;
+ else {
+ retval = 0;
+ if (!old_domain->flags[TOMOYO_DIF_TRANSITION_FAILED]) {
+ old_domain->flags[TOMOYO_DIF_TRANSITION_FAILED] = true;
+ ee->r.granted = false;
+ tomoyo_write_log(&ee->r, "%s", tomoyo_dif
+ [TOMOYO_DIF_TRANSITION_FAILED]);
+ printk(KERN_WARNING
+ "ERROR: Domain '%s' not defined.\n", ee->tmp);
+ }
}
- domain = tomoyo_assign_domain(tmp, old_domain->profile);
- done:
- if (domain)
- goto out;
- printk(KERN_WARNING "TOMOYO-ERROR: Domain '%s' not defined.\n", tmp);
- if (is_enforce)
- retval = -EPERM;
- else
- old_domain->transition_failed = true;
out:
if (!domain)
domain = old_domain;
@@ -537,6 +713,54 @@
bprm->cred->security = domain;
if (need_kfree)
kfree(rn.name);
- kfree(tmp);
+ kfree(ee->tmp);
+ kfree(ee->dump.data);
+ kfree(ee);
return retval;
}
+
+/**
+ * tomoyo_dump_page - Dump a page to buffer.
+ *
+ * @bprm: Pointer to "struct linux_binprm".
+ * @pos: Location to dump.
+ * @dump: Poiner to "struct tomoyo_page_dump".
+ *
+ * Returns true on success, false otherwise.
+ */
+bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos,
+ struct tomoyo_page_dump *dump)
+{
+ struct page *page;
+ /* dump->data is released by tomoyo_finish_execve(). */
+ if (!dump->data) {
+ dump->data = kzalloc(PAGE_SIZE, GFP_NOFS);
+ if (!dump->data)
+ return false;
+ }
+ /* Same with get_arg_page(bprm, pos, 0) in fs/exec.c */
+#ifdef CONFIG_MMU
+ if (get_user_pages(current, bprm->mm, pos, 1, 0, 1, &page, NULL) <= 0)
+ return false;
+#else
+ page = bprm->page[pos / PAGE_SIZE];
+#endif
+ if (page != dump->page) {
+ const unsigned int offset = pos % PAGE_SIZE;
+ /*
+ * Maybe kmap()/kunmap() should be used here.
+ * But remove_arg_zero() uses kmap_atomic()/kunmap_atomic().
+ * So do I.
+ */
+ char *kaddr = kmap_atomic(page, KM_USER0);
+ dump->page = page;
+ memcpy(dump->data + offset, kaddr + offset,
+ PAGE_SIZE - offset);
+ kunmap_atomic(kaddr, KM_USER0);
+ }
+ /* Same with put_arg_page(page) in fs/exec.c */
+#ifdef CONFIG_MMU
+ put_page(page);
+#endif
+ return true;
+}
diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c
index d64e8ec..743c35f 100644
--- a/security/tomoyo/file.c
+++ b/security/tomoyo/file.c
@@ -1,80 +1,51 @@
/*
* security/tomoyo/file.c
*
- * Pathname restriction functions.
- *
- * Copyright (C) 2005-2010 NTT DATA CORPORATION
+ * Copyright (C) 2005-2011 NTT DATA CORPORATION
*/
#include "common.h"
#include <linux/slab.h>
-/* Keyword array for operations with one pathname. */
-const char *tomoyo_path_keyword[TOMOYO_MAX_PATH_OPERATION] = {
- [TOMOYO_TYPE_READ_WRITE] = "read/write",
- [TOMOYO_TYPE_EXECUTE] = "execute",
- [TOMOYO_TYPE_READ] = "read",
- [TOMOYO_TYPE_WRITE] = "write",
- [TOMOYO_TYPE_UNLINK] = "unlink",
- [TOMOYO_TYPE_RMDIR] = "rmdir",
- [TOMOYO_TYPE_TRUNCATE] = "truncate",
- [TOMOYO_TYPE_SYMLINK] = "symlink",
- [TOMOYO_TYPE_REWRITE] = "rewrite",
- [TOMOYO_TYPE_CHROOT] = "chroot",
- [TOMOYO_TYPE_UMOUNT] = "unmount",
-};
-
-/* Keyword array for operations with one pathname and three numbers. */
-const char *tomoyo_mkdev_keyword[TOMOYO_MAX_MKDEV_OPERATION] = {
- [TOMOYO_TYPE_MKBLOCK] = "mkblock",
- [TOMOYO_TYPE_MKCHAR] = "mkchar",
-};
-
-/* Keyword array for operations with two pathnames. */
-const char *tomoyo_path2_keyword[TOMOYO_MAX_PATH2_OPERATION] = {
- [TOMOYO_TYPE_LINK] = "link",
- [TOMOYO_TYPE_RENAME] = "rename",
- [TOMOYO_TYPE_PIVOT_ROOT] = "pivot_root",
-};
-
-/* Keyword array for operations with one pathname and one number. */
-const char *tomoyo_path_number_keyword[TOMOYO_MAX_PATH_NUMBER_OPERATION] = {
- [TOMOYO_TYPE_CREATE] = "create",
- [TOMOYO_TYPE_MKDIR] = "mkdir",
- [TOMOYO_TYPE_MKFIFO] = "mkfifo",
- [TOMOYO_TYPE_MKSOCK] = "mksock",
- [TOMOYO_TYPE_IOCTL] = "ioctl",
- [TOMOYO_TYPE_CHMOD] = "chmod",
- [TOMOYO_TYPE_CHOWN] = "chown",
- [TOMOYO_TYPE_CHGRP] = "chgrp",
-};
-
+/*
+ * Mapping table from "enum tomoyo_path_acl_index" to "enum tomoyo_mac_index".
+ */
static const u8 tomoyo_p2mac[TOMOYO_MAX_PATH_OPERATION] = {
- [TOMOYO_TYPE_READ_WRITE] = TOMOYO_MAC_FILE_OPEN,
[TOMOYO_TYPE_EXECUTE] = TOMOYO_MAC_FILE_EXECUTE,
[TOMOYO_TYPE_READ] = TOMOYO_MAC_FILE_OPEN,
[TOMOYO_TYPE_WRITE] = TOMOYO_MAC_FILE_OPEN,
+ [TOMOYO_TYPE_APPEND] = TOMOYO_MAC_FILE_OPEN,
[TOMOYO_TYPE_UNLINK] = TOMOYO_MAC_FILE_UNLINK,
+ [TOMOYO_TYPE_GETATTR] = TOMOYO_MAC_FILE_GETATTR,
[TOMOYO_TYPE_RMDIR] = TOMOYO_MAC_FILE_RMDIR,
[TOMOYO_TYPE_TRUNCATE] = TOMOYO_MAC_FILE_TRUNCATE,
[TOMOYO_TYPE_SYMLINK] = TOMOYO_MAC_FILE_SYMLINK,
- [TOMOYO_TYPE_REWRITE] = TOMOYO_MAC_FILE_REWRITE,
[TOMOYO_TYPE_CHROOT] = TOMOYO_MAC_FILE_CHROOT,
[TOMOYO_TYPE_UMOUNT] = TOMOYO_MAC_FILE_UMOUNT,
};
-static const u8 tomoyo_pnnn2mac[TOMOYO_MAX_MKDEV_OPERATION] = {
+/*
+ * Mapping table from "enum tomoyo_mkdev_acl_index" to "enum tomoyo_mac_index".
+ */
+const u8 tomoyo_pnnn2mac[TOMOYO_MAX_MKDEV_OPERATION] = {
[TOMOYO_TYPE_MKBLOCK] = TOMOYO_MAC_FILE_MKBLOCK,
[TOMOYO_TYPE_MKCHAR] = TOMOYO_MAC_FILE_MKCHAR,
};
-static const u8 tomoyo_pp2mac[TOMOYO_MAX_PATH2_OPERATION] = {
+/*
+ * Mapping table from "enum tomoyo_path2_acl_index" to "enum tomoyo_mac_index".
+ */
+const u8 tomoyo_pp2mac[TOMOYO_MAX_PATH2_OPERATION] = {
[TOMOYO_TYPE_LINK] = TOMOYO_MAC_FILE_LINK,
[TOMOYO_TYPE_RENAME] = TOMOYO_MAC_FILE_RENAME,
[TOMOYO_TYPE_PIVOT_ROOT] = TOMOYO_MAC_FILE_PIVOT_ROOT,
};
-static const u8 tomoyo_pn2mac[TOMOYO_MAX_PATH_NUMBER_OPERATION] = {
+/*
+ * Mapping table from "enum tomoyo_path_number_acl_index" to
+ * "enum tomoyo_mac_index".
+ */
+const u8 tomoyo_pn2mac[TOMOYO_MAX_PATH_NUMBER_OPERATION] = {
[TOMOYO_TYPE_CREATE] = TOMOYO_MAC_FILE_CREATE,
[TOMOYO_TYPE_MKDIR] = TOMOYO_MAC_FILE_MKDIR,
[TOMOYO_TYPE_MKFIFO] = TOMOYO_MAC_FILE_MKFIFO,
@@ -85,41 +56,76 @@
[TOMOYO_TYPE_CHGRP] = TOMOYO_MAC_FILE_CHGRP,
};
+/**
+ * tomoyo_put_name_union - Drop reference on "struct tomoyo_name_union".
+ *
+ * @ptr: Pointer to "struct tomoyo_name_union".
+ *
+ * Returns nothing.
+ */
void tomoyo_put_name_union(struct tomoyo_name_union *ptr)
{
- if (!ptr)
- return;
- if (ptr->is_group)
- tomoyo_put_group(ptr->group);
- else
- tomoyo_put_name(ptr->filename);
+ tomoyo_put_group(ptr->group);
+ tomoyo_put_name(ptr->filename);
}
+/**
+ * tomoyo_compare_name_union - Check whether a name matches "struct tomoyo_name_union" or not.
+ *
+ * @name: Pointer to "struct tomoyo_path_info".
+ * @ptr: Pointer to "struct tomoyo_name_union".
+ *
+ * Returns "struct tomoyo_path_info" if @name matches @ptr, NULL otherwise.
+ */
const struct tomoyo_path_info *
tomoyo_compare_name_union(const struct tomoyo_path_info *name,
const struct tomoyo_name_union *ptr)
{
- if (ptr->is_group)
+ if (ptr->group)
return tomoyo_path_matches_group(name, ptr->group);
if (tomoyo_path_matches_pattern(name, ptr->filename))
return ptr->filename;
return NULL;
}
+/**
+ * tomoyo_put_number_union - Drop reference on "struct tomoyo_number_union".
+ *
+ * @ptr: Pointer to "struct tomoyo_number_union".
+ *
+ * Returns nothing.
+ */
void tomoyo_put_number_union(struct tomoyo_number_union *ptr)
{
- if (ptr && ptr->is_group)
- tomoyo_put_group(ptr->group);
+ tomoyo_put_group(ptr->group);
}
+/**
+ * tomoyo_compare_number_union - Check whether a value matches "struct tomoyo_number_union" or not.
+ *
+ * @value: Number to check.
+ * @ptr: Pointer to "struct tomoyo_number_union".
+ *
+ * Returns true if @value matches @ptr, false otherwise.
+ */
bool tomoyo_compare_number_union(const unsigned long value,
const struct tomoyo_number_union *ptr)
{
- if (ptr->is_group)
+ if (ptr->group)
return tomoyo_number_matches_group(value, value, ptr->group);
return value >= ptr->values[0] && value <= ptr->values[1];
}
+/**
+ * tomoyo_add_slash - Add trailing '/' if needed.
+ *
+ * @buf: Pointer to "struct tomoyo_path_info".
+ *
+ * Returns nothing.
+ *
+ * @buf must be generated by tomoyo_encode() because this function does not
+ * allocate memory for adding '/'.
+ */
static void tomoyo_add_slash(struct tomoyo_path_info *buf)
{
if (buf->is_dir)
@@ -132,24 +138,6 @@
}
/**
- * tomoyo_strendswith - Check whether the token ends with the given token.
- *
- * @name: The token to check.
- * @tail: The token to find.
- *
- * Returns true if @name ends with @tail, false otherwise.
- */
-static bool tomoyo_strendswith(const char *name, const char *tail)
-{
- int len;
-
- if (!name || !tail)
- return false;
- len = strlen(name) - strlen(tail);
- return len >= 0 && !strcmp(name + len, tail);
-}
-
-/**
* tomoyo_get_realpath - Get realpath.
*
* @buf: Pointer to "struct tomoyo_path_info".
@@ -164,7 +152,7 @@
tomoyo_fill_path_info(buf);
return true;
}
- return false;
+ return false;
}
/**
@@ -176,13 +164,9 @@
*/
static int tomoyo_audit_path_log(struct tomoyo_request_info *r)
{
- const char *operation = tomoyo_path_keyword[r->param.path.operation];
- const struct tomoyo_path_info *filename = r->param.path.filename;
- if (r->granted)
- return 0;
- tomoyo_warn_log(r, "%s %s", operation, filename->name);
- return tomoyo_supervisor(r, "allow_%s %s\n", operation,
- tomoyo_pattern(filename));
+ return tomoyo_supervisor(r, "file %s %s\n", tomoyo_path_keyword
+ [r->param.path.operation],
+ r->param.path.filename->name);
}
/**
@@ -194,16 +178,10 @@
*/
static int tomoyo_audit_path2_log(struct tomoyo_request_info *r)
{
- const char *operation = tomoyo_path2_keyword[r->param.path2.operation];
- const struct tomoyo_path_info *filename1 = r->param.path2.filename1;
- const struct tomoyo_path_info *filename2 = r->param.path2.filename2;
- if (r->granted)
- return 0;
- tomoyo_warn_log(r, "%s %s %s", operation, filename1->name,
- filename2->name);
- return tomoyo_supervisor(r, "allow_%s %s %s\n", operation,
- tomoyo_pattern(filename1),
- tomoyo_pattern(filename2));
+ return tomoyo_supervisor(r, "file %s %s %s\n", tomoyo_mac_keywords
+ [tomoyo_pp2mac[r->param.path2.operation]],
+ r->param.path2.filename1->name,
+ r->param.path2.filename2->name);
}
/**
@@ -215,24 +193,18 @@
*/
static int tomoyo_audit_mkdev_log(struct tomoyo_request_info *r)
{
- const char *operation = tomoyo_mkdev_keyword[r->param.mkdev.operation];
- const struct tomoyo_path_info *filename = r->param.mkdev.filename;
- const unsigned int major = r->param.mkdev.major;
- const unsigned int minor = r->param.mkdev.minor;
- const unsigned int mode = r->param.mkdev.mode;
- if (r->granted)
- return 0;
- tomoyo_warn_log(r, "%s %s 0%o %u %u", operation, filename->name, mode,
- major, minor);
- return tomoyo_supervisor(r, "allow_%s %s 0%o %u %u\n", operation,
- tomoyo_pattern(filename), mode, major, minor);
+ return tomoyo_supervisor(r, "file %s %s 0%o %u %u\n",
+ tomoyo_mac_keywords
+ [tomoyo_pnnn2mac[r->param.mkdev.operation]],
+ r->param.mkdev.filename->name,
+ r->param.mkdev.mode, r->param.mkdev.major,
+ r->param.mkdev.minor);
}
/**
* tomoyo_audit_path_number_log - Audit path/number request log.
*
- * @r: Pointer to "struct tomoyo_request_info".
- * @error: Error code.
+ * @r: Pointer to "struct tomoyo_request_info".
*
* Returns 0 on success, negative value otherwise.
*/
@@ -240,11 +212,7 @@
{
const u8 type = r->param.path_number.operation;
u8 radix;
- const struct tomoyo_path_info *filename = r->param.path_number.filename;
- const char *operation = tomoyo_path_number_keyword[type];
char buffer[64];
- if (r->granted)
- return 0;
switch (type) {
case TOMOYO_TYPE_CREATE:
case TOMOYO_TYPE_MKDIR:
@@ -262,251 +230,23 @@
}
tomoyo_print_ulong(buffer, sizeof(buffer), r->param.path_number.number,
radix);
- tomoyo_warn_log(r, "%s %s %s", operation, filename->name, buffer);
- return tomoyo_supervisor(r, "allow_%s %s %s\n", operation,
- tomoyo_pattern(filename), buffer);
-}
-
-static bool tomoyo_same_globally_readable(const struct tomoyo_acl_head *a,
- const struct tomoyo_acl_head *b)
-{
- return container_of(a, struct tomoyo_readable_file,
- head)->filename ==
- container_of(b, struct tomoyo_readable_file,
- head)->filename;
+ return tomoyo_supervisor(r, "file %s %s %s\n", tomoyo_mac_keywords
+ [tomoyo_pn2mac[type]],
+ r->param.path_number.filename->name, buffer);
}
/**
- * tomoyo_update_globally_readable_entry - Update "struct tomoyo_readable_file" list.
+ * tomoyo_check_path_acl - Check permission for path operation.
*
- * @filename: Filename unconditionally permitted to open() for reading.
- * @is_delete: True if it is a delete request.
+ * @r: Pointer to "struct tomoyo_request_info".
+ * @ptr: Pointer to "struct tomoyo_acl_info".
*
- * Returns 0 on success, negative value otherwise.
+ * Returns true if granted, false otherwise.
*
- * Caller holds tomoyo_read_lock().
+ * To be able to use wildcard for domain transition, this function sets
+ * matching entry on success. Since the caller holds tomoyo_read_lock(),
+ * it is safe to set matching entry.
*/
-static int tomoyo_update_globally_readable_entry(const char *filename,
- const bool is_delete)
-{
- struct tomoyo_readable_file e = { };
- int error;
-
- if (!tomoyo_correct_word(filename))
- return -EINVAL;
- e.filename = tomoyo_get_name(filename);
- if (!e.filename)
- return -ENOMEM;
- error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
- &tomoyo_policy_list
- [TOMOYO_ID_GLOBALLY_READABLE],
- tomoyo_same_globally_readable);
- tomoyo_put_name(e.filename);
- return error;
-}
-
-/**
- * tomoyo_globally_readable_file - Check if the file is unconditionnaly permitted to be open()ed for reading.
- *
- * @filename: The filename to check.
- *
- * Returns true if any domain can open @filename for reading, false otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-static bool tomoyo_globally_readable_file(const struct tomoyo_path_info *
- filename)
-{
- struct tomoyo_readable_file *ptr;
- bool found = false;
-
- list_for_each_entry_rcu(ptr, &tomoyo_policy_list
- [TOMOYO_ID_GLOBALLY_READABLE], head.list) {
- if (!ptr->head.is_deleted &&
- tomoyo_path_matches_pattern(filename, ptr->filename)) {
- found = true;
- break;
- }
- }
- return found;
-}
-
-/**
- * tomoyo_write_globally_readable - Write "struct tomoyo_readable_file" list.
- *
- * @data: String to parse.
- * @is_delete: True if it is a delete request.
- *
- * Returns 0 on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-int tomoyo_write_globally_readable(char *data, const bool is_delete)
-{
- return tomoyo_update_globally_readable_entry(data, is_delete);
-}
-
-static bool tomoyo_same_pattern(const struct tomoyo_acl_head *a,
- const struct tomoyo_acl_head *b)
-{
- return container_of(a, struct tomoyo_no_pattern, head)->pattern ==
- container_of(b, struct tomoyo_no_pattern, head)->pattern;
-}
-
-/**
- * tomoyo_update_file_pattern_entry - Update "struct tomoyo_no_pattern" list.
- *
- * @pattern: Pathname pattern.
- * @is_delete: True if it is a delete request.
- *
- * Returns 0 on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-static int tomoyo_update_file_pattern_entry(const char *pattern,
- const bool is_delete)
-{
- struct tomoyo_no_pattern e = { };
- int error;
-
- if (!tomoyo_correct_word(pattern))
- return -EINVAL;
- e.pattern = tomoyo_get_name(pattern);
- if (!e.pattern)
- return -ENOMEM;
- error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
- &tomoyo_policy_list[TOMOYO_ID_PATTERN],
- tomoyo_same_pattern);
- tomoyo_put_name(e.pattern);
- return error;
-}
-
-/**
- * tomoyo_pattern - Get patterned pathname.
- *
- * @filename: The filename to find patterned pathname.
- *
- * Returns pointer to pathname pattern if matched, @filename otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-const char *tomoyo_pattern(const struct tomoyo_path_info *filename)
-{
- struct tomoyo_no_pattern *ptr;
- const struct tomoyo_path_info *pattern = NULL;
-
- list_for_each_entry_rcu(ptr, &tomoyo_policy_list[TOMOYO_ID_PATTERN],
- head.list) {
- if (ptr->head.is_deleted)
- continue;
- if (!tomoyo_path_matches_pattern(filename, ptr->pattern))
- continue;
- pattern = ptr->pattern;
- if (tomoyo_strendswith(pattern->name, "/\\*")) {
- /* Do nothing. Try to find the better match. */
- } else {
- /* This would be the better match. Use this. */
- break;
- }
- }
- if (pattern)
- filename = pattern;
- return filename->name;
-}
-
-/**
- * tomoyo_write_pattern - Write "struct tomoyo_no_pattern" list.
- *
- * @data: String to parse.
- * @is_delete: True if it is a delete request.
- *
- * Returns 0 on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-int tomoyo_write_pattern(char *data, const bool is_delete)
-{
- return tomoyo_update_file_pattern_entry(data, is_delete);
-}
-
-static bool tomoyo_same_no_rewrite(const struct tomoyo_acl_head *a,
- const struct tomoyo_acl_head *b)
-{
- return container_of(a, struct tomoyo_no_rewrite, head)->pattern
- == container_of(b, struct tomoyo_no_rewrite, head)
- ->pattern;
-}
-
-/**
- * tomoyo_update_no_rewrite_entry - Update "struct tomoyo_no_rewrite" list.
- *
- * @pattern: Pathname pattern that are not rewritable by default.
- * @is_delete: True if it is a delete request.
- *
- * Returns 0 on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-static int tomoyo_update_no_rewrite_entry(const char *pattern,
- const bool is_delete)
-{
- struct tomoyo_no_rewrite e = { };
- int error;
-
- if (!tomoyo_correct_word(pattern))
- return -EINVAL;
- e.pattern = tomoyo_get_name(pattern);
- if (!e.pattern)
- return -ENOMEM;
- error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
- &tomoyo_policy_list[TOMOYO_ID_NO_REWRITE],
- tomoyo_same_no_rewrite);
- tomoyo_put_name(e.pattern);
- return error;
-}
-
-/**
- * tomoyo_no_rewrite_file - Check if the given pathname is not permitted to be rewrited.
- *
- * @filename: Filename to check.
- *
- * Returns true if @filename is specified by "deny_rewrite" directive,
- * false otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-static bool tomoyo_no_rewrite_file(const struct tomoyo_path_info *filename)
-{
- struct tomoyo_no_rewrite *ptr;
- bool found = false;
-
- list_for_each_entry_rcu(ptr, &tomoyo_policy_list[TOMOYO_ID_NO_REWRITE],
- head.list) {
- if (ptr->head.is_deleted)
- continue;
- if (!tomoyo_path_matches_pattern(filename, ptr->pattern))
- continue;
- found = true;
- break;
- }
- return found;
-}
-
-/**
- * tomoyo_write_no_rewrite - Write "struct tomoyo_no_rewrite" list.
- *
- * @data: String to parse.
- * @is_delete: True if it is a delete request.
- *
- * Returns 0 on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-int tomoyo_write_no_rewrite(char *data, const bool is_delete)
-{
- return tomoyo_update_no_rewrite_entry(data, is_delete);
-}
-
static bool tomoyo_check_path_acl(struct tomoyo_request_info *r,
const struct tomoyo_acl_info *ptr)
{
@@ -521,6 +261,14 @@
return false;
}
+/**
+ * tomoyo_check_path_number_acl - Check permission for path number operation.
+ *
+ * @r: Pointer to "struct tomoyo_request_info".
+ * @ptr: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if granted, false otherwise.
+ */
static bool tomoyo_check_path_number_acl(struct tomoyo_request_info *r,
const struct tomoyo_acl_info *ptr)
{
@@ -533,6 +281,14 @@
&acl->name);
}
+/**
+ * tomoyo_check_path2_acl - Check permission for path path operation.
+ *
+ * @r: Pointer to "struct tomoyo_request_info".
+ * @ptr: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if granted, false otherwise.
+ */
static bool tomoyo_check_path2_acl(struct tomoyo_request_info *r,
const struct tomoyo_acl_info *ptr)
{
@@ -544,8 +300,16 @@
&acl->name2);
}
+/**
+ * tomoyo_check_mkdev_acl - Check permission for path number number number operation.
+ *
+ * @r: Pointer to "struct tomoyo_request_info".
+ * @ptr: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if granted, false otherwise.
+ */
static bool tomoyo_check_mkdev_acl(struct tomoyo_request_info *r,
- const struct tomoyo_acl_info *ptr)
+ const struct tomoyo_acl_info *ptr)
{
const struct tomoyo_mkdev_acl *acl =
container_of(ptr, typeof(*acl), head);
@@ -560,15 +324,31 @@
&acl->name);
}
+/**
+ * tomoyo_same_path_acl - Check for duplicated "struct tomoyo_path_acl" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if @a == @b except permission bits, false otherwise.
+ */
static bool tomoyo_same_path_acl(const struct tomoyo_acl_info *a,
const struct tomoyo_acl_info *b)
{
const struct tomoyo_path_acl *p1 = container_of(a, typeof(*p1), head);
const struct tomoyo_path_acl *p2 = container_of(b, typeof(*p2), head);
- return tomoyo_same_acl_head(&p1->head, &p2->head) &&
- tomoyo_same_name_union(&p1->name, &p2->name);
+ return tomoyo_same_name_union(&p1->name, &p2->name);
}
+/**
+ * tomoyo_merge_path_acl - Merge duplicated "struct tomoyo_path_acl" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ * @is_delete: True for @a &= ~@b, false for @a |= @b.
+ *
+ * Returns true if @a is empty, false otherwise.
+ */
static bool tomoyo_merge_path_acl(struct tomoyo_acl_info *a,
struct tomoyo_acl_info *b,
const bool is_delete)
@@ -577,19 +357,10 @@
->perm;
u16 perm = *a_perm;
const u16 b_perm = container_of(b, struct tomoyo_path_acl, head)->perm;
- if (is_delete) {
+ if (is_delete)
perm &= ~b_perm;
- if ((perm & TOMOYO_RW_MASK) != TOMOYO_RW_MASK)
- perm &= ~(1 << TOMOYO_TYPE_READ_WRITE);
- else if (!(perm & (1 << TOMOYO_TYPE_READ_WRITE)))
- perm &= ~TOMOYO_RW_MASK;
- } else {
+ else
perm |= b_perm;
- if ((perm & TOMOYO_RW_MASK) == TOMOYO_RW_MASK)
- perm |= (1 << TOMOYO_TYPE_READ_WRITE);
- else if (perm & (1 << TOMOYO_TYPE_READ_WRITE))
- perm |= TOMOYO_RW_MASK;
- }
*a_perm = perm;
return !perm;
}
@@ -597,52 +368,62 @@
/**
* tomoyo_update_path_acl - Update "struct tomoyo_path_acl" list.
*
- * @type: Type of operation.
- * @filename: Filename.
- * @domain: Pointer to "struct tomoyo_domain_info".
- * @is_delete: True if it is a delete request.
+ * @perm: Permission.
+ * @param: Pointer to "struct tomoyo_acl_param".
*
* Returns 0 on success, negative value otherwise.
*
* Caller holds tomoyo_read_lock().
*/
-static int tomoyo_update_path_acl(const u8 type, const char *filename,
- struct tomoyo_domain_info * const domain,
- const bool is_delete)
+static int tomoyo_update_path_acl(const u16 perm,
+ struct tomoyo_acl_param *param)
{
struct tomoyo_path_acl e = {
.head.type = TOMOYO_TYPE_PATH_ACL,
- .perm = 1 << type
+ .perm = perm
};
int error;
- if (e.perm == (1 << TOMOYO_TYPE_READ_WRITE))
- e.perm |= TOMOYO_RW_MASK;
- if (!tomoyo_parse_name_union(filename, &e.name))
- return -EINVAL;
- error = tomoyo_update_domain(&e.head, sizeof(e), is_delete, domain,
- tomoyo_same_path_acl,
- tomoyo_merge_path_acl);
+ if (!tomoyo_parse_name_union(param, &e.name))
+ error = -EINVAL;
+ else
+ error = tomoyo_update_domain(&e.head, sizeof(e), param,
+ tomoyo_same_path_acl,
+ tomoyo_merge_path_acl);
tomoyo_put_name_union(&e.name);
return error;
}
+/**
+ * tomoyo_same_mkdev_acl - Check for duplicated "struct tomoyo_mkdev_acl" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if @a == @b except permission bits, false otherwise.
+ */
static bool tomoyo_same_mkdev_acl(const struct tomoyo_acl_info *a,
const struct tomoyo_acl_info *b)
{
- const struct tomoyo_mkdev_acl *p1 = container_of(a, typeof(*p1),
- head);
- const struct tomoyo_mkdev_acl *p2 = container_of(b, typeof(*p2),
- head);
- return tomoyo_same_acl_head(&p1->head, &p2->head)
- && tomoyo_same_name_union(&p1->name, &p2->name)
- && tomoyo_same_number_union(&p1->mode, &p2->mode)
- && tomoyo_same_number_union(&p1->major, &p2->major)
- && tomoyo_same_number_union(&p1->minor, &p2->minor);
+ const struct tomoyo_mkdev_acl *p1 = container_of(a, typeof(*p1), head);
+ const struct tomoyo_mkdev_acl *p2 = container_of(b, typeof(*p2), head);
+ return tomoyo_same_name_union(&p1->name, &p2->name) &&
+ tomoyo_same_number_union(&p1->mode, &p2->mode) &&
+ tomoyo_same_number_union(&p1->major, &p2->major) &&
+ tomoyo_same_number_union(&p1->minor, &p2->minor);
}
+/**
+ * tomoyo_merge_mkdev_acl - Merge duplicated "struct tomoyo_mkdev_acl" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ * @is_delete: True for @a &= ~@b, false for @a |= @b.
+ *
+ * Returns true if @a is empty, false otherwise.
+ */
static bool tomoyo_merge_mkdev_acl(struct tomoyo_acl_info *a,
- struct tomoyo_acl_info *b,
- const bool is_delete)
+ struct tomoyo_acl_info *b,
+ const bool is_delete)
{
u8 *const a_perm = &container_of(a, struct tomoyo_mkdev_acl,
head)->perm;
@@ -660,37 +441,30 @@
/**
* tomoyo_update_mkdev_acl - Update "struct tomoyo_mkdev_acl" list.
*
- * @type: Type of operation.
- * @filename: Filename.
- * @mode: Create mode.
- * @major: Device major number.
- * @minor: Device minor number.
- * @domain: Pointer to "struct tomoyo_domain_info".
- * @is_delete: True if it is a delete request.
+ * @perm: Permission.
+ * @param: Pointer to "struct tomoyo_acl_param".
*
* Returns 0 on success, negative value otherwise.
*
* Caller holds tomoyo_read_lock().
*/
-static int tomoyo_update_mkdev_acl(const u8 type, const char *filename,
- char *mode, char *major, char *minor,
- struct tomoyo_domain_info * const
- domain, const bool is_delete)
+static int tomoyo_update_mkdev_acl(const u8 perm,
+ struct tomoyo_acl_param *param)
{
struct tomoyo_mkdev_acl e = {
.head.type = TOMOYO_TYPE_MKDEV_ACL,
- .perm = 1 << type
+ .perm = perm
};
- int error = is_delete ? -ENOENT : -ENOMEM;
- if (!tomoyo_parse_name_union(filename, &e.name) ||
- !tomoyo_parse_number_union(mode, &e.mode) ||
- !tomoyo_parse_number_union(major, &e.major) ||
- !tomoyo_parse_number_union(minor, &e.minor))
- goto out;
- error = tomoyo_update_domain(&e.head, sizeof(e), is_delete, domain,
- tomoyo_same_mkdev_acl,
- tomoyo_merge_mkdev_acl);
- out:
+ int error;
+ if (!tomoyo_parse_name_union(param, &e.name) ||
+ !tomoyo_parse_number_union(param, &e.mode) ||
+ !tomoyo_parse_number_union(param, &e.major) ||
+ !tomoyo_parse_number_union(param, &e.minor))
+ error = -EINVAL;
+ else
+ error = tomoyo_update_domain(&e.head, sizeof(e), param,
+ tomoyo_same_mkdev_acl,
+ tomoyo_merge_mkdev_acl);
tomoyo_put_name_union(&e.name);
tomoyo_put_number_union(&e.mode);
tomoyo_put_number_union(&e.major);
@@ -698,16 +472,32 @@
return error;
}
+/**
+ * tomoyo_same_path2_acl - Check for duplicated "struct tomoyo_path2_acl" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if @a == @b except permission bits, false otherwise.
+ */
static bool tomoyo_same_path2_acl(const struct tomoyo_acl_info *a,
const struct tomoyo_acl_info *b)
{
const struct tomoyo_path2_acl *p1 = container_of(a, typeof(*p1), head);
const struct tomoyo_path2_acl *p2 = container_of(b, typeof(*p2), head);
- return tomoyo_same_acl_head(&p1->head, &p2->head)
- && tomoyo_same_name_union(&p1->name1, &p2->name1)
- && tomoyo_same_name_union(&p1->name2, &p2->name2);
+ return tomoyo_same_name_union(&p1->name1, &p2->name1) &&
+ tomoyo_same_name_union(&p1->name2, &p2->name2);
}
+/**
+ * tomoyo_merge_path2_acl - Merge duplicated "struct tomoyo_path2_acl" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ * @is_delete: True for @a &= ~@b, false for @a |= @b.
+ *
+ * Returns true if @a is empty, false otherwise.
+ */
static bool tomoyo_merge_path2_acl(struct tomoyo_acl_info *a,
struct tomoyo_acl_info *b,
const bool is_delete)
@@ -727,33 +517,28 @@
/**
* tomoyo_update_path2_acl - Update "struct tomoyo_path2_acl" list.
*
- * @type: Type of operation.
- * @filename1: First filename.
- * @filename2: Second filename.
- * @domain: Pointer to "struct tomoyo_domain_info".
- * @is_delete: True if it is a delete request.
+ * @perm: Permission.
+ * @param: Pointer to "struct tomoyo_acl_param".
*
* Returns 0 on success, negative value otherwise.
*
* Caller holds tomoyo_read_lock().
*/
-static int tomoyo_update_path2_acl(const u8 type, const char *filename1,
- const char *filename2,
- struct tomoyo_domain_info * const domain,
- const bool is_delete)
+static int tomoyo_update_path2_acl(const u8 perm,
+ struct tomoyo_acl_param *param)
{
struct tomoyo_path2_acl e = {
.head.type = TOMOYO_TYPE_PATH2_ACL,
- .perm = 1 << type
+ .perm = perm
};
- int error = is_delete ? -ENOENT : -ENOMEM;
- if (!tomoyo_parse_name_union(filename1, &e.name1) ||
- !tomoyo_parse_name_union(filename2, &e.name2))
- goto out;
- error = tomoyo_update_domain(&e.head, sizeof(e), is_delete, domain,
- tomoyo_same_path2_acl,
- tomoyo_merge_path2_acl);
- out:
+ int error;
+ if (!tomoyo_parse_name_union(param, &e.name1) ||
+ !tomoyo_parse_name_union(param, &e.name2))
+ error = -EINVAL;
+ else
+ error = tomoyo_update_domain(&e.head, sizeof(e), param,
+ tomoyo_same_path2_acl,
+ tomoyo_merge_path2_acl);
tomoyo_put_name_union(&e.name1);
tomoyo_put_name_union(&e.name2);
return error;
@@ -775,9 +560,8 @@
{
int error;
- next:
r->type = tomoyo_p2mac[operation];
- r->mode = tomoyo_get_mode(r->profile, r->type);
+ r->mode = tomoyo_get_mode(r->domain->ns, r->profile, r->type);
if (r->mode == TOMOYO_CONFIG_DISABLED)
return 0;
r->param_type = TOMOYO_TYPE_PATH_ACL;
@@ -785,10 +569,6 @@
r->param.path.operation = operation;
do {
tomoyo_check_acl(r, tomoyo_check_path_acl);
- if (!r->granted && operation == TOMOYO_TYPE_READ &&
- !r->domain->ignore_global_allow_read &&
- tomoyo_globally_readable_file(filename))
- r->granted = true;
error = tomoyo_audit_path_log(r);
/*
* Do not retry for execute request, for alias may have
@@ -796,19 +576,17 @@
*/
} while (error == TOMOYO_RETRY_REQUEST &&
operation != TOMOYO_TYPE_EXECUTE);
- /*
- * Since "allow_truncate" doesn't imply "allow_rewrite" permission,
- * we need to check "allow_rewrite" permission if the filename is
- * specified by "deny_rewrite" keyword.
- */
- if (!error && operation == TOMOYO_TYPE_TRUNCATE &&
- tomoyo_no_rewrite_file(filename)) {
- operation = TOMOYO_TYPE_REWRITE;
- goto next;
- }
return error;
}
+/**
+ * tomoyo_same_path_number_acl - Check for duplicated "struct tomoyo_path_number_acl" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if @a == @b except permission bits, false otherwise.
+ */
static bool tomoyo_same_path_number_acl(const struct tomoyo_acl_info *a,
const struct tomoyo_acl_info *b)
{
@@ -816,11 +594,19 @@
head);
const struct tomoyo_path_number_acl *p2 = container_of(b, typeof(*p2),
head);
- return tomoyo_same_acl_head(&p1->head, &p2->head)
- && tomoyo_same_name_union(&p1->name, &p2->name)
- && tomoyo_same_number_union(&p1->number, &p2->number);
+ return tomoyo_same_name_union(&p1->name, &p2->name) &&
+ tomoyo_same_number_union(&p1->number, &p2->number);
}
+/**
+ * tomoyo_merge_path_number_acl - Merge duplicated "struct tomoyo_path_number_acl" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ * @is_delete: True for @a &= ~@b, false for @a |= @b.
+ *
+ * Returns true if @a is empty, false otherwise.
+ */
static bool tomoyo_merge_path_number_acl(struct tomoyo_acl_info *a,
struct tomoyo_acl_info *b,
const bool is_delete)
@@ -841,33 +627,26 @@
/**
* tomoyo_update_path_number_acl - Update ioctl/chmod/chown/chgrp ACL.
*
- * @type: Type of operation.
- * @filename: Filename.
- * @number: Number.
- * @domain: Pointer to "struct tomoyo_domain_info".
- * @is_delete: True if it is a delete request.
+ * @perm: Permission.
+ * @param: Pointer to "struct tomoyo_acl_param".
*
* Returns 0 on success, negative value otherwise.
*/
-static int tomoyo_update_path_number_acl(const u8 type, const char *filename,
- char *number,
- struct tomoyo_domain_info * const
- domain,
- const bool is_delete)
+static int tomoyo_update_path_number_acl(const u8 perm,
+ struct tomoyo_acl_param *param)
{
struct tomoyo_path_number_acl e = {
.head.type = TOMOYO_TYPE_PATH_NUMBER_ACL,
- .perm = 1 << type
+ .perm = perm
};
- int error = is_delete ? -ENOENT : -ENOMEM;
- if (!tomoyo_parse_name_union(filename, &e.name))
- return -EINVAL;
- if (!tomoyo_parse_number_union(number, &e.number))
- goto out;
- error = tomoyo_update_domain(&e.head, sizeof(e), is_delete, domain,
- tomoyo_same_path_number_acl,
- tomoyo_merge_path_number_acl);
- out:
+ int error;
+ if (!tomoyo_parse_name_union(param, &e.name) ||
+ !tomoyo_parse_number_union(param, &e.number))
+ error = -EINVAL;
+ else
+ error = tomoyo_update_domain(&e.head, sizeof(e), param,
+ tomoyo_same_path_number_acl,
+ tomoyo_merge_path_number_acl);
tomoyo_put_name_union(&e.name);
tomoyo_put_number_union(&e.number);
return error;
@@ -886,16 +665,20 @@
unsigned long number)
{
struct tomoyo_request_info r;
+ struct tomoyo_obj_info obj = {
+ .path1 = *path,
+ };
int error = -ENOMEM;
struct tomoyo_path_info buf;
int idx;
if (tomoyo_init_request_info(&r, NULL, tomoyo_pn2mac[type])
- == TOMOYO_CONFIG_DISABLED || !path->mnt || !path->dentry)
+ == TOMOYO_CONFIG_DISABLED || !path->dentry)
return 0;
idx = tomoyo_read_lock();
if (!tomoyo_get_realpath(&buf, path))
goto out;
+ r.obj = &obj;
if (type == TOMOYO_TYPE_MKDIR)
tomoyo_add_slash(&buf);
r.param_type = TOMOYO_TYPE_PATH_NUMBER_ACL;
@@ -930,45 +713,30 @@
int error = 0;
struct tomoyo_path_info buf;
struct tomoyo_request_info r;
+ struct tomoyo_obj_info obj = {
+ .path1 = *path,
+ };
int idx;
- if (!path->mnt ||
- (path->dentry->d_inode && S_ISDIR(path->dentry->d_inode->i_mode)))
- return 0;
buf.name = NULL;
r.mode = TOMOYO_CONFIG_DISABLED;
idx = tomoyo_read_lock();
- /*
- * If the filename is specified by "deny_rewrite" keyword,
- * we need to check "allow_rewrite" permission when the filename is not
- * opened for append mode or the filename is truncated at open time.
- */
- if ((acc_mode & MAY_WRITE) && !(flag & O_APPEND)
- && tomoyo_init_request_info(&r, domain, TOMOYO_MAC_FILE_REWRITE)
+ if (acc_mode &&
+ tomoyo_init_request_info(&r, domain, TOMOYO_MAC_FILE_OPEN)
!= TOMOYO_CONFIG_DISABLED) {
if (!tomoyo_get_realpath(&buf, path)) {
error = -ENOMEM;
goto out;
}
- if (tomoyo_no_rewrite_file(&buf))
- error = tomoyo_path_permission(&r, TOMOYO_TYPE_REWRITE,
+ r.obj = &obj;
+ if (acc_mode & MAY_READ)
+ error = tomoyo_path_permission(&r, TOMOYO_TYPE_READ,
&buf);
- }
- if (!error && acc_mode &&
- tomoyo_init_request_info(&r, domain, TOMOYO_MAC_FILE_OPEN)
- != TOMOYO_CONFIG_DISABLED) {
- u8 operation;
- if (!buf.name && !tomoyo_get_realpath(&buf, path)) {
- error = -ENOMEM;
- goto out;
- }
- if (acc_mode == (MAY_READ | MAY_WRITE))
- operation = TOMOYO_TYPE_READ_WRITE;
- else if (acc_mode == MAY_READ)
- operation = TOMOYO_TYPE_READ;
- else
- operation = TOMOYO_TYPE_WRITE;
- error = tomoyo_path_permission(&r, operation, &buf);
+ if (!error && (acc_mode & MAY_WRITE))
+ error = tomoyo_path_permission(&r, (flag & O_APPEND) ?
+ TOMOYO_TYPE_APPEND :
+ TOMOYO_TYPE_WRITE,
+ &buf);
}
out:
kfree(buf.name);
@@ -979,46 +747,57 @@
}
/**
- * tomoyo_path_perm - Check permission for "unlink", "rmdir", "truncate", "symlink", "rewrite", "chroot" and "unmount".
+ * tomoyo_path_perm - Check permission for "unlink", "rmdir", "truncate", "symlink", "append", "chroot" and "unmount".
*
* @operation: Type of operation.
* @path: Pointer to "struct path".
+ * @target: Symlink's target if @operation is TOMOYO_TYPE_SYMLINK,
+ * NULL otherwise.
*
* Returns 0 on success, negative value otherwise.
*/
-int tomoyo_path_perm(const u8 operation, struct path *path)
+int tomoyo_path_perm(const u8 operation, struct path *path, const char *target)
{
- int error = -ENOMEM;
- struct tomoyo_path_info buf;
struct tomoyo_request_info r;
+ struct tomoyo_obj_info obj = {
+ .path1 = *path,
+ };
+ int error;
+ struct tomoyo_path_info buf;
+ bool is_enforce;
+ struct tomoyo_path_info symlink_target;
int idx;
- if (!path->mnt)
- return 0;
if (tomoyo_init_request_info(&r, NULL, tomoyo_p2mac[operation])
== TOMOYO_CONFIG_DISABLED)
return 0;
+ is_enforce = (r.mode == TOMOYO_CONFIG_ENFORCING);
+ error = -ENOMEM;
buf.name = NULL;
idx = tomoyo_read_lock();
if (!tomoyo_get_realpath(&buf, path))
goto out;
+ r.obj = &obj;
switch (operation) {
- case TOMOYO_TYPE_REWRITE:
- if (!tomoyo_no_rewrite_file(&buf)) {
- error = 0;
- goto out;
- }
- break;
case TOMOYO_TYPE_RMDIR:
case TOMOYO_TYPE_CHROOT:
tomoyo_add_slash(&buf);
break;
+ case TOMOYO_TYPE_SYMLINK:
+ symlink_target.name = tomoyo_encode(target);
+ if (!symlink_target.name)
+ goto out;
+ tomoyo_fill_path_info(&symlink_target);
+ obj.symlink_target = &symlink_target;
+ break;
}
error = tomoyo_path_permission(&r, operation, &buf);
+ if (operation == TOMOYO_TYPE_SYMLINK)
+ kfree(symlink_target.name);
out:
kfree(buf.name);
tomoyo_read_unlock(idx);
- if (r.mode != TOMOYO_CONFIG_ENFORCING)
+ if (!is_enforce)
error = 0;
return error;
}
@@ -1034,20 +813,23 @@
* Returns 0 on success, negative value otherwise.
*/
int tomoyo_mkdev_perm(const u8 operation, struct path *path,
- const unsigned int mode, unsigned int dev)
+ const unsigned int mode, unsigned int dev)
{
struct tomoyo_request_info r;
+ struct tomoyo_obj_info obj = {
+ .path1 = *path,
+ };
int error = -ENOMEM;
struct tomoyo_path_info buf;
int idx;
- if (!path->mnt ||
- tomoyo_init_request_info(&r, NULL, tomoyo_pnnn2mac[operation])
+ if (tomoyo_init_request_info(&r, NULL, tomoyo_pnnn2mac[operation])
== TOMOYO_CONFIG_DISABLED)
return 0;
idx = tomoyo_read_lock();
error = -ENOMEM;
if (tomoyo_get_realpath(&buf, path)) {
+ r.obj = &obj;
dev = new_decode_dev(dev);
r.param_type = TOMOYO_TYPE_MKDEV_ACL;
r.param.mkdev.filename = &buf;
@@ -1081,10 +863,13 @@
struct tomoyo_path_info buf1;
struct tomoyo_path_info buf2;
struct tomoyo_request_info r;
+ struct tomoyo_obj_info obj = {
+ .path1 = *path1,
+ .path2 = *path2,
+ };
int idx;
- if (!path1->mnt || !path2->mnt ||
- tomoyo_init_request_info(&r, NULL, tomoyo_pp2mac[operation])
+ if (tomoyo_init_request_info(&r, NULL, tomoyo_pp2mac[operation])
== TOMOYO_CONFIG_DISABLED)
return 0;
buf1.name = NULL;
@@ -1096,16 +881,17 @@
switch (operation) {
struct dentry *dentry;
case TOMOYO_TYPE_RENAME:
- case TOMOYO_TYPE_LINK:
+ case TOMOYO_TYPE_LINK:
dentry = path1->dentry;
- if (!dentry->d_inode || !S_ISDIR(dentry->d_inode->i_mode))
- break;
- /* fall through */
- case TOMOYO_TYPE_PIVOT_ROOT:
- tomoyo_add_slash(&buf1);
- tomoyo_add_slash(&buf2);
+ if (!dentry->d_inode || !S_ISDIR(dentry->d_inode->i_mode))
+ break;
+ /* fall through */
+ case TOMOYO_TYPE_PIVOT_ROOT:
+ tomoyo_add_slash(&buf1);
+ tomoyo_add_slash(&buf2);
break;
- }
+ }
+ r.obj = &obj;
r.param_type = TOMOYO_TYPE_PATH2_ACL;
r.param.path2.operation = operation;
r.param.path2.filename1 = &buf1;
@@ -1124,53 +910,91 @@
}
/**
- * tomoyo_write_file - Update file related list.
+ * tomoyo_same_mount_acl - Check for duplicated "struct tomoyo_mount_acl" entry.
*
- * @data: String to parse.
- * @domain: Pointer to "struct tomoyo_domain_info".
- * @is_delete: True if it is a delete request.
+ * @a: Pointer to "struct tomoyo_acl_info".
+ * @b: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
+static bool tomoyo_same_mount_acl(const struct tomoyo_acl_info *a,
+ const struct tomoyo_acl_info *b)
+{
+ const struct tomoyo_mount_acl *p1 = container_of(a, typeof(*p1), head);
+ const struct tomoyo_mount_acl *p2 = container_of(b, typeof(*p2), head);
+ return tomoyo_same_name_union(&p1->dev_name, &p2->dev_name) &&
+ tomoyo_same_name_union(&p1->dir_name, &p2->dir_name) &&
+ tomoyo_same_name_union(&p1->fs_type, &p2->fs_type) &&
+ tomoyo_same_number_union(&p1->flags, &p2->flags);
+}
+
+/**
+ * tomoyo_update_mount_acl - Write "struct tomoyo_mount_acl" list.
+ *
+ * @param: Pointer to "struct tomoyo_acl_param".
*
* Returns 0 on success, negative value otherwise.
*
* Caller holds tomoyo_read_lock().
*/
-int tomoyo_write_file(char *data, struct tomoyo_domain_info *domain,
- const bool is_delete)
+static int tomoyo_update_mount_acl(struct tomoyo_acl_param *param)
{
- char *w[5];
+ struct tomoyo_mount_acl e = { .head.type = TOMOYO_TYPE_MOUNT_ACL };
+ int error;
+ if (!tomoyo_parse_name_union(param, &e.dev_name) ||
+ !tomoyo_parse_name_union(param, &e.dir_name) ||
+ !tomoyo_parse_name_union(param, &e.fs_type) ||
+ !tomoyo_parse_number_union(param, &e.flags))
+ error = -EINVAL;
+ else
+ error = tomoyo_update_domain(&e.head, sizeof(e), param,
+ tomoyo_same_mount_acl, NULL);
+ tomoyo_put_name_union(&e.dev_name);
+ tomoyo_put_name_union(&e.dir_name);
+ tomoyo_put_name_union(&e.fs_type);
+ tomoyo_put_number_union(&e.flags);
+ return error;
+}
+
+/**
+ * tomoyo_write_file - Update file related list.
+ *
+ * @param: Pointer to "struct tomoyo_acl_param".
+ *
+ * Returns 0 on success, negative value otherwise.
+ *
+ * Caller holds tomoyo_read_lock().
+ */
+int tomoyo_write_file(struct tomoyo_acl_param *param)
+{
+ u16 perm = 0;
u8 type;
- if (!tomoyo_tokenize(data, w, sizeof(w)) || !w[1][0])
- return -EINVAL;
- if (strncmp(w[0], "allow_", 6))
- goto out;
- w[0] += 6;
- for (type = 0; type < TOMOYO_MAX_PATH_OPERATION; type++) {
- if (strcmp(w[0], tomoyo_path_keyword[type]))
- continue;
- return tomoyo_update_path_acl(type, w[1], domain, is_delete);
- }
- if (!w[2][0])
- goto out;
- for (type = 0; type < TOMOYO_MAX_PATH2_OPERATION; type++) {
- if (strcmp(w[0], tomoyo_path2_keyword[type]))
- continue;
- return tomoyo_update_path2_acl(type, w[1], w[2], domain,
- is_delete);
- }
- for (type = 0; type < TOMOYO_MAX_PATH_NUMBER_OPERATION; type++) {
- if (strcmp(w[0], tomoyo_path_number_keyword[type]))
- continue;
- return tomoyo_update_path_number_acl(type, w[1], w[2], domain,
- is_delete);
- }
- if (!w[3][0] || !w[4][0])
- goto out;
- for (type = 0; type < TOMOYO_MAX_MKDEV_OPERATION; type++) {
- if (strcmp(w[0], tomoyo_mkdev_keyword[type]))
- continue;
- return tomoyo_update_mkdev_acl(type, w[1], w[2], w[3],
- w[4], domain, is_delete);
- }
- out:
+ const char *operation = tomoyo_read_token(param);
+ for (type = 0; type < TOMOYO_MAX_PATH_OPERATION; type++)
+ if (tomoyo_permstr(operation, tomoyo_path_keyword[type]))
+ perm |= 1 << type;
+ if (perm)
+ return tomoyo_update_path_acl(perm, param);
+ for (type = 0; type < TOMOYO_MAX_PATH2_OPERATION; type++)
+ if (tomoyo_permstr(operation,
+ tomoyo_mac_keywords[tomoyo_pp2mac[type]]))
+ perm |= 1 << type;
+ if (perm)
+ return tomoyo_update_path2_acl(perm, param);
+ for (type = 0; type < TOMOYO_MAX_PATH_NUMBER_OPERATION; type++)
+ if (tomoyo_permstr(operation,
+ tomoyo_mac_keywords[tomoyo_pn2mac[type]]))
+ perm |= 1 << type;
+ if (perm)
+ return tomoyo_update_path_number_acl(perm, param);
+ for (type = 0; type < TOMOYO_MAX_MKDEV_OPERATION; type++)
+ if (tomoyo_permstr(operation,
+ tomoyo_mac_keywords[tomoyo_pnnn2mac[type]]))
+ perm |= 1 << type;
+ if (perm)
+ return tomoyo_update_mkdev_acl(perm, param);
+ if (tomoyo_permstr(operation,
+ tomoyo_mac_keywords[TOMOYO_MAC_FILE_MOUNT]))
+ return tomoyo_update_mount_acl(param);
return -EINVAL;
}
diff --git a/security/tomoyo/gc.c b/security/tomoyo/gc.c
index a877e4c..ae135fb 100644
--- a/security/tomoyo/gc.c
+++ b/security/tomoyo/gc.c
@@ -1,58 +1,205 @@
/*
* security/tomoyo/gc.c
*
- * Implementation of the Domain-Based Mandatory Access Control.
- *
- * Copyright (C) 2005-2010 NTT DATA CORPORATION
- *
+ * Copyright (C) 2005-2011 NTT DATA CORPORATION
*/
#include "common.h"
#include <linux/kthread.h>
#include <linux/slab.h>
+/* The list for "struct tomoyo_io_buffer". */
+static LIST_HEAD(tomoyo_io_buffer_list);
+/* Lock for protecting tomoyo_io_buffer_list. */
+static DEFINE_SPINLOCK(tomoyo_io_buffer_list_lock);
+
+/* Size of an element. */
+static const u8 tomoyo_element_size[TOMOYO_MAX_POLICY] = {
+ [TOMOYO_ID_GROUP] = sizeof(struct tomoyo_group),
+ [TOMOYO_ID_PATH_GROUP] = sizeof(struct tomoyo_path_group),
+ [TOMOYO_ID_NUMBER_GROUP] = sizeof(struct tomoyo_number_group),
+ [TOMOYO_ID_AGGREGATOR] = sizeof(struct tomoyo_aggregator),
+ [TOMOYO_ID_TRANSITION_CONTROL] =
+ sizeof(struct tomoyo_transition_control),
+ [TOMOYO_ID_MANAGER] = sizeof(struct tomoyo_manager),
+ /* [TOMOYO_ID_CONDITION] = "struct tomoyo_condition"->size, */
+ /* [TOMOYO_ID_NAME] = "struct tomoyo_name"->size, */
+ /* [TOMOYO_ID_ACL] =
+ tomoyo_acl_size["struct tomoyo_acl_info"->type], */
+ [TOMOYO_ID_DOMAIN] = sizeof(struct tomoyo_domain_info),
+};
+
+/* Size of a domain ACL element. */
+static const u8 tomoyo_acl_size[] = {
+ [TOMOYO_TYPE_PATH_ACL] = sizeof(struct tomoyo_path_acl),
+ [TOMOYO_TYPE_PATH2_ACL] = sizeof(struct tomoyo_path2_acl),
+ [TOMOYO_TYPE_PATH_NUMBER_ACL] = sizeof(struct tomoyo_path_number_acl),
+ [TOMOYO_TYPE_MKDEV_ACL] = sizeof(struct tomoyo_mkdev_acl),
+ [TOMOYO_TYPE_MOUNT_ACL] = sizeof(struct tomoyo_mount_acl),
+};
+
+/**
+ * tomoyo_struct_used_by_io_buffer - Check whether the list element is used by /sys/kernel/security/tomoyo/ users or not.
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns true if @element is used by /sys/kernel/security/tomoyo/ users,
+ * false otherwise.
+ */
+static bool tomoyo_struct_used_by_io_buffer(const struct list_head *element)
+{
+ struct tomoyo_io_buffer *head;
+ bool in_use = false;
+
+ spin_lock(&tomoyo_io_buffer_list_lock);
+ list_for_each_entry(head, &tomoyo_io_buffer_list, list) {
+ head->users++;
+ spin_unlock(&tomoyo_io_buffer_list_lock);
+ if (mutex_lock_interruptible(&head->io_sem)) {
+ in_use = true;
+ goto out;
+ }
+ if (head->r.domain == element || head->r.group == element ||
+ head->r.acl == element || &head->w.domain->list == element)
+ in_use = true;
+ mutex_unlock(&head->io_sem);
+out:
+ spin_lock(&tomoyo_io_buffer_list_lock);
+ head->users--;
+ if (in_use)
+ break;
+ }
+ spin_unlock(&tomoyo_io_buffer_list_lock);
+ return in_use;
+}
+
+/**
+ * tomoyo_name_used_by_io_buffer - Check whether the string is used by /sys/kernel/security/tomoyo/ users or not.
+ *
+ * @string: String to check.
+ * @size: Memory allocated for @string .
+ *
+ * Returns true if @string is used by /sys/kernel/security/tomoyo/ users,
+ * false otherwise.
+ */
+static bool tomoyo_name_used_by_io_buffer(const char *string,
+ const size_t size)
+{
+ struct tomoyo_io_buffer *head;
+ bool in_use = false;
+
+ spin_lock(&tomoyo_io_buffer_list_lock);
+ list_for_each_entry(head, &tomoyo_io_buffer_list, list) {
+ int i;
+ head->users++;
+ spin_unlock(&tomoyo_io_buffer_list_lock);
+ if (mutex_lock_interruptible(&head->io_sem)) {
+ in_use = true;
+ goto out;
+ }
+ for (i = 0; i < TOMOYO_MAX_IO_READ_QUEUE; i++) {
+ const char *w = head->r.w[i];
+ if (w < string || w > string + size)
+ continue;
+ in_use = true;
+ break;
+ }
+ mutex_unlock(&head->io_sem);
+out:
+ spin_lock(&tomoyo_io_buffer_list_lock);
+ head->users--;
+ if (in_use)
+ break;
+ }
+ spin_unlock(&tomoyo_io_buffer_list_lock);
+ return in_use;
+}
+
+/* Structure for garbage collection. */
struct tomoyo_gc {
struct list_head list;
- int type;
+ enum tomoyo_policy_id type;
+ size_t size;
struct list_head *element;
};
-static LIST_HEAD(tomoyo_gc_queue);
-static DEFINE_MUTEX(tomoyo_gc_mutex);
+/* List of entries to be deleted. */
+static LIST_HEAD(tomoyo_gc_list);
+/* Length of tomoyo_gc_list. */
+static int tomoyo_gc_list_len;
-/* Caller holds tomoyo_policy_lock mutex. */
+/**
+ * tomoyo_add_to_gc - Add an entry to to be deleted list.
+ *
+ * @type: One of values in "enum tomoyo_policy_id".
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns true on success, false otherwise.
+ *
+ * Caller holds tomoyo_policy_lock mutex.
+ *
+ * Adding an entry needs kmalloc(). Thus, if we try to add thousands of
+ * entries at once, it will take too long time. Thus, do not add more than 128
+ * entries per a scan. But to be able to handle worst case where all entries
+ * are in-use, we accept one more entry per a scan.
+ *
+ * If we use singly linked list using "struct list_head"->prev (which is
+ * LIST_POISON2), we can avoid kmalloc().
+ */
static bool tomoyo_add_to_gc(const int type, struct list_head *element)
{
struct tomoyo_gc *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry)
return false;
entry->type = type;
+ if (type == TOMOYO_ID_ACL)
+ entry->size = tomoyo_acl_size[
+ container_of(element,
+ typeof(struct tomoyo_acl_info),
+ list)->type];
+ else if (type == TOMOYO_ID_NAME)
+ entry->size = strlen(container_of(element,
+ typeof(struct tomoyo_name),
+ head.list)->entry.name) + 1;
+ else if (type == TOMOYO_ID_CONDITION)
+ entry->size =
+ container_of(element, typeof(struct tomoyo_condition),
+ head.list)->size;
+ else
+ entry->size = tomoyo_element_size[type];
entry->element = element;
- list_add(&entry->list, &tomoyo_gc_queue);
+ list_add(&entry->list, &tomoyo_gc_list);
list_del_rcu(element);
- return true;
+ return tomoyo_gc_list_len++ < 128;
}
-static void tomoyo_del_allow_read(struct list_head *element)
+/**
+ * tomoyo_element_linked_by_gc - Validate next element of an entry.
+ *
+ * @element: Pointer to an element.
+ * @size: Size of @element in byte.
+ *
+ * Returns true if @element is linked by other elements in the garbage
+ * collector's queue, false otherwise.
+ */
+static bool tomoyo_element_linked_by_gc(const u8 *element, const size_t size)
{
- struct tomoyo_readable_file *ptr =
- container_of(element, typeof(*ptr), head.list);
- tomoyo_put_name(ptr->filename);
+ struct tomoyo_gc *p;
+ list_for_each_entry(p, &tomoyo_gc_list, list) {
+ const u8 *ptr = (const u8 *) p->element->next;
+ if (ptr < element || element + size < ptr)
+ continue;
+ return true;
+ }
+ return false;
}
-static void tomoyo_del_file_pattern(struct list_head *element)
-{
- struct tomoyo_no_pattern *ptr =
- container_of(element, typeof(*ptr), head.list);
- tomoyo_put_name(ptr->pattern);
-}
-
-static void tomoyo_del_no_rewrite(struct list_head *element)
-{
- struct tomoyo_no_rewrite *ptr =
- container_of(element, typeof(*ptr), head.list);
- tomoyo_put_name(ptr->pattern);
-}
-
+/**
+ * tomoyo_del_transition_control - Delete members in "struct tomoyo_transition_control".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
static void tomoyo_del_transition_control(struct list_head *element)
{
struct tomoyo_transition_control *ptr =
@@ -61,6 +208,13 @@
tomoyo_put_name(ptr->program);
}
+/**
+ * tomoyo_del_aggregator - Delete members in "struct tomoyo_aggregator".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
static void tomoyo_del_aggregator(struct list_head *element)
{
struct tomoyo_aggregator *ptr =
@@ -69,6 +223,13 @@
tomoyo_put_name(ptr->aggregated_name);
}
+/**
+ * tomoyo_del_manager - Delete members in "struct tomoyo_manager".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
static void tomoyo_del_manager(struct list_head *element)
{
struct tomoyo_manager *ptr =
@@ -76,10 +237,18 @@
tomoyo_put_name(ptr->manager);
}
+/**
+ * tomoyo_del_acl - Delete members in "struct tomoyo_acl_info".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
static void tomoyo_del_acl(struct list_head *element)
{
struct tomoyo_acl_info *acl =
container_of(element, typeof(*acl), list);
+ tomoyo_put_condition(acl->cond);
switch (acl->type) {
case TOMOYO_TYPE_PATH_ACL:
{
@@ -127,6 +296,13 @@
}
}
+/**
+ * tomoyo_del_domain - Delete members in "struct tomoyo_domain_info".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns true if deleted, false otherwise.
+ */
static bool tomoyo_del_domain(struct list_head *element)
{
struct tomoyo_domain_info *domain =
@@ -165,13 +341,65 @@
return true;
}
+/**
+ * tomoyo_del_condition - Delete members in "struct tomoyo_condition".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
+void tomoyo_del_condition(struct list_head *element)
+{
+ struct tomoyo_condition *cond = container_of(element, typeof(*cond),
+ head.list);
+ const u16 condc = cond->condc;
+ const u16 numbers_count = cond->numbers_count;
+ const u16 names_count = cond->names_count;
+ const u16 argc = cond->argc;
+ const u16 envc = cond->envc;
+ unsigned int i;
+ const struct tomoyo_condition_element *condp
+ = (const struct tomoyo_condition_element *) (cond + 1);
+ struct tomoyo_number_union *numbers_p
+ = (struct tomoyo_number_union *) (condp + condc);
+ struct tomoyo_name_union *names_p
+ = (struct tomoyo_name_union *) (numbers_p + numbers_count);
+ const struct tomoyo_argv *argv
+ = (const struct tomoyo_argv *) (names_p + names_count);
+ const struct tomoyo_envp *envp
+ = (const struct tomoyo_envp *) (argv + argc);
+ for (i = 0; i < numbers_count; i++)
+ tomoyo_put_number_union(numbers_p++);
+ for (i = 0; i < names_count; i++)
+ tomoyo_put_name_union(names_p++);
+ for (i = 0; i < argc; argv++, i++)
+ tomoyo_put_name(argv->value);
+ for (i = 0; i < envc; envp++, i++) {
+ tomoyo_put_name(envp->name);
+ tomoyo_put_name(envp->value);
+ }
+}
+/**
+ * tomoyo_del_name - Delete members in "struct tomoyo_name".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
static void tomoyo_del_name(struct list_head *element)
{
const struct tomoyo_name *ptr =
- container_of(element, typeof(*ptr), list);
+ container_of(element, typeof(*ptr), head.list);
}
+/**
+ * tomoyo_del_path_group - Delete members in "struct tomoyo_path_group".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
static void tomoyo_del_path_group(struct list_head *element)
{
struct tomoyo_path_group *member =
@@ -179,20 +407,43 @@
tomoyo_put_name(member->member_name);
}
+/**
+ * tomoyo_del_group - Delete "struct tomoyo_group".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
static void tomoyo_del_group(struct list_head *element)
{
struct tomoyo_group *group =
- container_of(element, typeof(*group), list);
+ container_of(element, typeof(*group), head.list);
tomoyo_put_name(group->group_name);
}
+/**
+ * tomoyo_del_number_group - Delete members in "struct tomoyo_number_group".
+ *
+ * @element: Pointer to "struct list_head".
+ *
+ * Returns nothing.
+ */
static void tomoyo_del_number_group(struct list_head *element)
{
struct tomoyo_number_group *member =
container_of(element, typeof(*member), head.list);
}
-static bool tomoyo_collect_member(struct list_head *member_list, int id)
+/**
+ * tomoyo_collect_member - Delete elements with "struct tomoyo_acl_head".
+ *
+ * @id: One of values in "enum tomoyo_policy_id".
+ * @member_list: Pointer to "struct list_head".
+ *
+ * Returns true if some elements are deleted, false otherwise.
+ */
+static bool tomoyo_collect_member(const enum tomoyo_policy_id id,
+ struct list_head *member_list)
{
struct tomoyo_acl_head *member;
list_for_each_entry(member, member_list, list) {
@@ -201,13 +452,20 @@
if (!tomoyo_add_to_gc(id, &member->list))
return false;
}
- return true;
+ return true;
}
-static bool tomoyo_collect_acl(struct tomoyo_domain_info *domain)
+/**
+ * tomoyo_collect_acl - Delete elements in "struct tomoyo_domain_info".
+ *
+ * @list: Pointer to "struct list_head".
+ *
+ * Returns true if some elements are deleted, false otherwise.
+ */
+static bool tomoyo_collect_acl(struct list_head *list)
{
struct tomoyo_acl_info *acl;
- list_for_each_entry(acl, &domain->acl_info_list, list) {
+ list_for_each_entry(acl, list, list) {
if (!acl->is_deleted)
continue;
if (!tomoyo_add_to_gc(TOMOYO_ID_ACL, &acl->list))
@@ -216,19 +474,24 @@
return true;
}
+/**
+ * tomoyo_collect_entry - Scan lists for deleted elements.
+ *
+ * Returns nothing.
+ */
static void tomoyo_collect_entry(void)
{
int i;
+ enum tomoyo_policy_id id;
+ struct tomoyo_policy_namespace *ns;
+ int idx;
if (mutex_lock_interruptible(&tomoyo_policy_lock))
return;
- for (i = 0; i < TOMOYO_MAX_POLICY; i++) {
- if (!tomoyo_collect_member(&tomoyo_policy_list[i], i))
- goto unlock;
- }
+ idx = tomoyo_read_lock();
{
struct tomoyo_domain_info *domain;
list_for_each_entry_rcu(domain, &tomoyo_domain_list, list) {
- if (!tomoyo_collect_acl(domain))
+ if (!tomoyo_collect_acl(&domain->acl_info_list))
goto unlock;
if (!domain->is_deleted || atomic_read(&domain->users))
continue;
@@ -241,48 +504,93 @@
goto unlock;
}
}
- for (i = 0; i < TOMOYO_MAX_HASH; i++) {
- struct tomoyo_name *ptr;
- list_for_each_entry_rcu(ptr, &tomoyo_name_list[i], list) {
+ list_for_each_entry_rcu(ns, &tomoyo_namespace_list, namespace_list) {
+ for (id = 0; id < TOMOYO_MAX_POLICY; id++)
+ if (!tomoyo_collect_member(id, &ns->policy_list[id]))
+ goto unlock;
+ for (i = 0; i < TOMOYO_MAX_ACL_GROUPS; i++)
+ if (!tomoyo_collect_acl(&ns->acl_group[i]))
+ goto unlock;
+ for (i = 0; i < TOMOYO_MAX_GROUP; i++) {
+ struct list_head *list = &ns->group_list[i];
+ struct tomoyo_group *group;
+ switch (i) {
+ case 0:
+ id = TOMOYO_ID_PATH_GROUP;
+ break;
+ default:
+ id = TOMOYO_ID_NUMBER_GROUP;
+ break;
+ }
+ list_for_each_entry(group, list, head.list) {
+ if (!tomoyo_collect_member
+ (id, &group->member_list))
+ goto unlock;
+ if (!list_empty(&group->member_list) ||
+ atomic_read(&group->head.users))
+ continue;
+ if (!tomoyo_add_to_gc(TOMOYO_ID_GROUP,
+ &group->head.list))
+ goto unlock;
+ }
+ }
+ }
+ id = TOMOYO_ID_CONDITION;
+ for (i = 0; i < TOMOYO_MAX_HASH + 1; i++) {
+ struct list_head *list = !i ?
+ &tomoyo_condition_list : &tomoyo_name_list[i - 1];
+ struct tomoyo_shared_acl_head *ptr;
+ list_for_each_entry(ptr, list, list) {
if (atomic_read(&ptr->users))
continue;
- if (!tomoyo_add_to_gc(TOMOYO_ID_NAME, &ptr->list))
+ if (!tomoyo_add_to_gc(id, &ptr->list))
goto unlock;
}
+ id = TOMOYO_ID_NAME;
}
- for (i = 0; i < TOMOYO_MAX_GROUP; i++) {
- struct list_head *list = &tomoyo_group_list[i];
- int id;
- struct tomoyo_group *group;
- switch (i) {
- case 0:
- id = TOMOYO_ID_PATH_GROUP;
- break;
- default:
- id = TOMOYO_ID_NUMBER_GROUP;
- break;
- }
- list_for_each_entry(group, list, list) {
- if (!tomoyo_collect_member(&group->member_list, id))
- goto unlock;
- if (!list_empty(&group->member_list) ||
- atomic_read(&group->users))
- continue;
- if (!tomoyo_add_to_gc(TOMOYO_ID_GROUP, &group->list))
- goto unlock;
- }
- }
- unlock:
+unlock:
+ tomoyo_read_unlock(idx);
mutex_unlock(&tomoyo_policy_lock);
}
-static void tomoyo_kfree_entry(void)
+/**
+ * tomoyo_kfree_entry - Delete entries in tomoyo_gc_list.
+ *
+ * Returns true if some entries were kfree()d, false otherwise.
+ */
+static bool tomoyo_kfree_entry(void)
{
struct tomoyo_gc *p;
struct tomoyo_gc *tmp;
+ bool result = false;
- list_for_each_entry_safe(p, tmp, &tomoyo_gc_queue, list) {
+ list_for_each_entry_safe(p, tmp, &tomoyo_gc_list, list) {
struct list_head *element = p->element;
+
+ /*
+ * list_del_rcu() in tomoyo_add_to_gc() guarantees that the
+ * list element became no longer reachable from the list which
+ * the element was originally on (e.g. tomoyo_domain_list).
+ * Also, synchronize_srcu() in tomoyo_gc_thread() guarantees
+ * that the list element became no longer referenced by syscall
+ * users.
+ *
+ * However, there are three users which may still be using the
+ * list element. We need to defer until all of these users
+ * forget the list element.
+ *
+ * Firstly, defer until "struct tomoyo_io_buffer"->r.{domain,
+ * group,acl} and "struct tomoyo_io_buffer"->w.domain forget
+ * the list element.
+ */
+ if (tomoyo_struct_used_by_io_buffer(element))
+ continue;
+ /*
+ * Secondly, defer until all other elements in the
+ * tomoyo_gc_list list forget the list element.
+ */
+ if (tomoyo_element_linked_by_gc((const u8 *) element, p->size))
+ continue;
switch (p->type) {
case TOMOYO_ID_TRANSITION_CONTROL:
tomoyo_del_transition_control(element);
@@ -290,19 +598,21 @@
case TOMOYO_ID_AGGREGATOR:
tomoyo_del_aggregator(element);
break;
- case TOMOYO_ID_GLOBALLY_READABLE:
- tomoyo_del_allow_read(element);
- break;
- case TOMOYO_ID_PATTERN:
- tomoyo_del_file_pattern(element);
- break;
- case TOMOYO_ID_NO_REWRITE:
- tomoyo_del_no_rewrite(element);
- break;
case TOMOYO_ID_MANAGER:
tomoyo_del_manager(element);
break;
+ case TOMOYO_ID_CONDITION:
+ tomoyo_del_condition(element);
+ break;
case TOMOYO_ID_NAME:
+ /*
+ * Thirdly, defer until all "struct tomoyo_io_buffer"
+ * ->r.w[] forget the list element.
+ */
+ if (tomoyo_name_used_by_io_buffer(
+ container_of(element, typeof(struct tomoyo_name),
+ head.list)->entry.name, p->size))
+ continue;
tomoyo_del_name(element);
break;
case TOMOYO_ID_ACL:
@@ -321,34 +631,95 @@
case TOMOYO_ID_NUMBER_GROUP:
tomoyo_del_number_group(element);
break;
+ case TOMOYO_MAX_POLICY:
+ break;
}
tomoyo_memory_free(element);
list_del(&p->list);
kfree(p);
+ tomoyo_gc_list_len--;
+ result = true;
}
+ return result;
}
+/**
+ * tomoyo_gc_thread - Garbage collector thread function.
+ *
+ * @unused: Unused.
+ *
+ * In case OOM-killer choose this thread for termination, we create this thread
+ * as a short live thread whenever /sys/kernel/security/tomoyo/ interface was
+ * close()d.
+ *
+ * Returns 0.
+ */
static int tomoyo_gc_thread(void *unused)
{
+ /* Garbage collector thread is exclusive. */
+ static DEFINE_MUTEX(tomoyo_gc_mutex);
+ if (!mutex_trylock(&tomoyo_gc_mutex))
+ goto out;
daemonize("GC for TOMOYO");
- if (mutex_trylock(&tomoyo_gc_mutex)) {
- int i;
- for (i = 0; i < 10; i++) {
- tomoyo_collect_entry();
- if (list_empty(&tomoyo_gc_queue))
- break;
- synchronize_srcu(&tomoyo_ss);
- tomoyo_kfree_entry();
+ do {
+ tomoyo_collect_entry();
+ if (list_empty(&tomoyo_gc_list))
+ break;
+ synchronize_srcu(&tomoyo_ss);
+ } while (tomoyo_kfree_entry());
+ {
+ struct tomoyo_io_buffer *head;
+ struct tomoyo_io_buffer *tmp;
+
+ spin_lock(&tomoyo_io_buffer_list_lock);
+ list_for_each_entry_safe(head, tmp, &tomoyo_io_buffer_list,
+ list) {
+ if (head->users)
+ continue;
+ list_del(&head->list);
+ kfree(head->read_buf);
+ kfree(head->write_buf);
+ kfree(head);
}
- mutex_unlock(&tomoyo_gc_mutex);
+ spin_unlock(&tomoyo_io_buffer_list_lock);
}
- do_exit(0);
+ mutex_unlock(&tomoyo_gc_mutex);
+out:
+ /* This acts as do_exit(0). */
+ return 0;
}
-void tomoyo_run_gc(void)
+/**
+ * tomoyo_notify_gc - Register/unregister /sys/kernel/security/tomoyo/ users.
+ *
+ * @head: Pointer to "struct tomoyo_io_buffer".
+ * @is_register: True if register, false if unregister.
+ *
+ * Returns nothing.
+ */
+void tomoyo_notify_gc(struct tomoyo_io_buffer *head, const bool is_register)
{
- struct task_struct *task = kthread_create(tomoyo_gc_thread, NULL,
- "GC for TOMOYO");
- if (!IS_ERR(task))
- wake_up_process(task);
+ bool is_write = false;
+
+ spin_lock(&tomoyo_io_buffer_list_lock);
+ if (is_register) {
+ head->users = 1;
+ list_add(&head->list, &tomoyo_io_buffer_list);
+ } else {
+ is_write = head->write_buf != NULL;
+ if (!--head->users) {
+ list_del(&head->list);
+ kfree(head->read_buf);
+ kfree(head->write_buf);
+ kfree(head);
+ }
+ }
+ spin_unlock(&tomoyo_io_buffer_list_lock);
+ if (is_write) {
+ struct task_struct *task = kthread_create(tomoyo_gc_thread,
+ NULL,
+ "GC for TOMOYO");
+ if (!IS_ERR(task))
+ wake_up_process(task);
+ }
}
diff --git a/security/tomoyo/group.c b/security/tomoyo/group.c
index e94352c..5fb0e12 100644
--- a/security/tomoyo/group.c
+++ b/security/tomoyo/group.c
@@ -1,21 +1,37 @@
/*
* security/tomoyo/group.c
*
- * Copyright (C) 2005-2010 NTT DATA CORPORATION
+ * Copyright (C) 2005-2011 NTT DATA CORPORATION
*/
#include <linux/slab.h>
#include "common.h"
+/**
+ * tomoyo_same_path_group - Check for duplicated "struct tomoyo_path_group" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_head".
+ * @b: Pointer to "struct tomoyo_acl_head".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
static bool tomoyo_same_path_group(const struct tomoyo_acl_head *a,
- const struct tomoyo_acl_head *b)
+ const struct tomoyo_acl_head *b)
{
return container_of(a, struct tomoyo_path_group, head)->member_name ==
container_of(b, struct tomoyo_path_group, head)->member_name;
}
+/**
+ * tomoyo_same_number_group - Check for duplicated "struct tomoyo_number_group" entry.
+ *
+ * @a: Pointer to "struct tomoyo_acl_head".
+ * @b: Pointer to "struct tomoyo_acl_head".
+ *
+ * Returns true if @a == @b, false otherwise.
+ */
static bool tomoyo_same_number_group(const struct tomoyo_acl_head *a,
- const struct tomoyo_acl_head *b)
+ const struct tomoyo_acl_head *b)
{
return !memcmp(&container_of(a, struct tomoyo_number_group, head)
->number,
@@ -28,48 +44,41 @@
/**
* tomoyo_write_group - Write "struct tomoyo_path_group"/"struct tomoyo_number_group" list.
*
- * @data: String to parse.
- * @is_delete: True if it is a delete request.
- * @type: Type of this group.
+ * @param: Pointer to "struct tomoyo_acl_param".
+ * @type: Type of this group.
*
* Returns 0 on success, negative value otherwise.
*/
-int tomoyo_write_group(char *data, const bool is_delete, const u8 type)
+int tomoyo_write_group(struct tomoyo_acl_param *param, const u8 type)
{
- struct tomoyo_group *group;
- struct list_head *member;
- char *w[2];
+ struct tomoyo_group *group = tomoyo_get_group(param, type);
int error = -EINVAL;
- if (!tomoyo_tokenize(data, w, sizeof(w)) || !w[1][0])
- return -EINVAL;
- group = tomoyo_get_group(w[0], type);
if (!group)
return -ENOMEM;
- member = &group->member_list;
+ param->list = &group->member_list;
if (type == TOMOYO_PATH_GROUP) {
struct tomoyo_path_group e = { };
- e.member_name = tomoyo_get_name(w[1]);
+ e.member_name = tomoyo_get_name(tomoyo_read_token(param));
if (!e.member_name) {
error = -ENOMEM;
goto out;
}
- error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
- member, tomoyo_same_path_group);
+ error = tomoyo_update_policy(&e.head, sizeof(e), param,
+ tomoyo_same_path_group);
tomoyo_put_name(e.member_name);
} else if (type == TOMOYO_NUMBER_GROUP) {
struct tomoyo_number_group e = { };
- if (w[1][0] == '@'
- || !tomoyo_parse_number_union(w[1], &e.number)
- || e.number.values[0] > e.number.values[1])
+ if (param->data[0] == '@' ||
+ !tomoyo_parse_number_union(param, &e.number))
goto out;
- error = tomoyo_update_policy(&e.head, sizeof(e), is_delete,
- member, tomoyo_same_number_group);
+ error = tomoyo_update_policy(&e.head, sizeof(e), param,
+ tomoyo_same_number_group);
/*
* tomoyo_put_number_union() is not needed because
- * w[1][0] != '@'.
+ * param->data[0] != '@'.
*/
}
- out:
+out:
tomoyo_put_group(group);
return error;
}
@@ -77,8 +86,8 @@
/**
* tomoyo_path_matches_group - Check whether the given pathname matches members of the given pathname group.
*
- * @pathname: The name of pathname.
- * @group: Pointer to "struct tomoyo_path_group".
+ * @pathname: The name of pathname.
+ * @group: Pointer to "struct tomoyo_path_group".
*
* Returns matched member's pathname if @pathname matches pathnames in @group,
* NULL otherwise.
diff --git a/security/tomoyo/load_policy.c b/security/tomoyo/load_policy.c
index 3312e56..6797540 100644
--- a/security/tomoyo/load_policy.c
+++ b/security/tomoyo/load_policy.c
@@ -1,15 +1,32 @@
/*
* security/tomoyo/load_policy.c
*
- * Policy loader launcher for TOMOYO.
- *
- * Copyright (C) 2005-2010 NTT DATA CORPORATION
+ * Copyright (C) 2005-2011 NTT DATA CORPORATION
*/
#include "common.h"
-/* path to policy loader */
-static const char *tomoyo_loader = "/sbin/tomoyo-init";
+#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
+
+/*
+ * Path to the policy loader. (default = CONFIG_SECURITY_TOMOYO_POLICY_LOADER)
+ */
+static const char *tomoyo_loader;
+
+/**
+ * tomoyo_loader_setup - Set policy loader.
+ *
+ * @str: Program to use as a policy loader (e.g. /sbin/tomoyo-init ).
+ *
+ * Returns 0.
+ */
+static int __init tomoyo_loader_setup(char *str)
+{
+ tomoyo_loader = str;
+ return 0;
+}
+
+__setup("TOMOYO_loader=", tomoyo_loader_setup);
/**
* tomoyo_policy_loader_exists - Check whether /sbin/tomoyo-init exists.
@@ -18,24 +35,38 @@
*/
static bool tomoyo_policy_loader_exists(void)
{
- /*
- * Don't activate MAC if the policy loader doesn't exist.
- * If the initrd includes /sbin/init but real-root-dev has not
- * mounted on / yet, activating MAC will block the system since
- * policies are not loaded yet.
- * Thus, let do_execve() call this function every time.
- */
struct path path;
-
+ if (!tomoyo_loader)
+ tomoyo_loader = CONFIG_SECURITY_TOMOYO_POLICY_LOADER;
if (kern_path(tomoyo_loader, LOOKUP_FOLLOW, &path)) {
- printk(KERN_INFO "Not activating Mandatory Access Control now "
- "since %s doesn't exist.\n", tomoyo_loader);
+ printk(KERN_INFO "Not activating Mandatory Access Control "
+ "as %s does not exist.\n", tomoyo_loader);
return false;
}
path_put(&path);
return true;
}
+/*
+ * Path to the trigger. (default = CONFIG_SECURITY_TOMOYO_ACTIVATION_TRIGGER)
+ */
+static const char *tomoyo_trigger;
+
+/**
+ * tomoyo_trigger_setup - Set trigger for activation.
+ *
+ * @str: Program to use as an activation trigger (e.g. /sbin/init ).
+ *
+ * Returns 0.
+ */
+static int __init tomoyo_trigger_setup(char *str)
+{
+ tomoyo_trigger = str;
+ return 0;
+}
+
+__setup("TOMOYO_trigger=", tomoyo_trigger_setup);
+
/**
* tomoyo_load_policy - Run external policy loader to load policy.
*
@@ -51,24 +82,19 @@
*/
void tomoyo_load_policy(const char *filename)
{
+ static bool done;
char *argv[2];
char *envp[3];
- if (tomoyo_policy_loaded)
+ if (tomoyo_policy_loaded || done)
return;
- /*
- * Check filename is /sbin/init or /sbin/tomoyo-start.
- * /sbin/tomoyo-start is a dummy filename in case where /sbin/init can't
- * be passed.
- * You can create /sbin/tomoyo-start by
- * "ln -s /bin/true /sbin/tomoyo-start".
- */
- if (strcmp(filename, "/sbin/init") &&
- strcmp(filename, "/sbin/tomoyo-start"))
+ if (!tomoyo_trigger)
+ tomoyo_trigger = CONFIG_SECURITY_TOMOYO_ACTIVATION_TRIGGER;
+ if (strcmp(filename, tomoyo_trigger))
return;
if (!tomoyo_policy_loader_exists())
return;
-
+ done = true;
printk(KERN_INFO "Calling %s to load policy. Please wait.\n",
tomoyo_loader);
argv[0] = (char *) tomoyo_loader;
@@ -79,3 +105,5 @@
call_usermodehelper(argv[0], argv, envp, 1);
tomoyo_check_profile();
}
+
+#endif
diff --git a/security/tomoyo/memory.c b/security/tomoyo/memory.c
index 42a7b1b..7a56051 100644
--- a/security/tomoyo/memory.c
+++ b/security/tomoyo/memory.c
@@ -1,9 +1,7 @@
/*
* security/tomoyo/memory.c
*
- * Memory management functions for TOMOYO.
- *
- * Copyright (C) 2005-2010 NTT DATA CORPORATION
+ * Copyright (C) 2005-2011 NTT DATA CORPORATION
*/
#include <linux/hash.h>
@@ -29,10 +27,12 @@
panic("MAC Initialization failed.\n");
}
-/* Memory allocated for policy. */
-static atomic_t tomoyo_policy_memory_size;
-/* Quota for holding policy. */
-static unsigned int tomoyo_quota_for_policy;
+/* Lock for protecting tomoyo_memory_used. */
+static DEFINE_SPINLOCK(tomoyo_policy_memory_lock);
+/* Memoy currently used by policy/audit log/query. */
+unsigned int tomoyo_memory_used[TOMOYO_MAX_MEMORY_STAT];
+/* Memory quota for "policy"/"audit log"/"query". */
+unsigned int tomoyo_memory_quota[TOMOYO_MAX_MEMORY_STAT];
/**
* tomoyo_memory_ok - Check memory quota.
@@ -45,15 +45,20 @@
*/
bool tomoyo_memory_ok(void *ptr)
{
- size_t s = ptr ? ksize(ptr) : 0;
- atomic_add(s, &tomoyo_policy_memory_size);
- if (ptr && (!tomoyo_quota_for_policy ||
- atomic_read(&tomoyo_policy_memory_size)
- <= tomoyo_quota_for_policy)) {
- memset(ptr, 0, s);
- return true;
+ if (ptr) {
+ const size_t s = ksize(ptr);
+ bool result;
+ spin_lock(&tomoyo_policy_memory_lock);
+ tomoyo_memory_used[TOMOYO_MEMORY_POLICY] += s;
+ result = !tomoyo_memory_quota[TOMOYO_MEMORY_POLICY] ||
+ tomoyo_memory_used[TOMOYO_MEMORY_POLICY] <=
+ tomoyo_memory_quota[TOMOYO_MEMORY_POLICY];
+ if (!result)
+ tomoyo_memory_used[TOMOYO_MEMORY_POLICY] -= s;
+ spin_unlock(&tomoyo_policy_memory_lock);
+ if (result)
+ return true;
}
- atomic_sub(s, &tomoyo_policy_memory_size);
tomoyo_warn_oom(__func__);
return false;
}
@@ -86,22 +91,28 @@
*/
void tomoyo_memory_free(void *ptr)
{
- atomic_sub(ksize(ptr), &tomoyo_policy_memory_size);
+ size_t s = ksize(ptr);
+ spin_lock(&tomoyo_policy_memory_lock);
+ tomoyo_memory_used[TOMOYO_MEMORY_POLICY] -= s;
+ spin_unlock(&tomoyo_policy_memory_lock);
kfree(ptr);
}
/**
* tomoyo_get_group - Allocate memory for "struct tomoyo_path_group"/"struct tomoyo_number_group".
*
- * @group_name: The name of address group.
- * @idx: Index number.
+ * @param: Pointer to "struct tomoyo_acl_param".
+ * @idx: Index number.
*
* Returns pointer to "struct tomoyo_group" on success, NULL otherwise.
*/
-struct tomoyo_group *tomoyo_get_group(const char *group_name, const u8 idx)
+struct tomoyo_group *tomoyo_get_group(struct tomoyo_acl_param *param,
+ const u8 idx)
{
struct tomoyo_group e = { };
struct tomoyo_group *group = NULL;
+ struct list_head *list;
+ const char *group_name = tomoyo_read_token(param);
bool found = false;
if (!tomoyo_correct_word(group_name) || idx >= TOMOYO_MAX_GROUP)
return NULL;
@@ -110,10 +121,11 @@
return NULL;
if (mutex_lock_interruptible(&tomoyo_policy_lock))
goto out;
- list_for_each_entry(group, &tomoyo_group_list[idx], list) {
+ list = ¶m->ns->group_list[idx];
+ list_for_each_entry(group, list, head.list) {
if (e.group_name != group->group_name)
continue;
- atomic_inc(&group->users);
+ atomic_inc(&group->head.users);
found = true;
break;
}
@@ -121,15 +133,14 @@
struct tomoyo_group *entry = tomoyo_commit_ok(&e, sizeof(e));
if (entry) {
INIT_LIST_HEAD(&entry->member_list);
- atomic_set(&entry->users, 1);
- list_add_tail_rcu(&entry->list,
- &tomoyo_group_list[idx]);
+ atomic_set(&entry->head.users, 1);
+ list_add_tail_rcu(&entry->head.list, list);
group = entry;
found = true;
}
}
mutex_unlock(&tomoyo_policy_lock);
- out:
+out:
tomoyo_put_name(e.group_name);
return found ? group : NULL;
}
@@ -154,7 +165,6 @@
struct tomoyo_name *ptr;
unsigned int hash;
int len;
- int allocated_len;
struct list_head *head;
if (!name)
@@ -164,120 +174,43 @@
head = &tomoyo_name_list[hash_long(hash, TOMOYO_HASH_BITS)];
if (mutex_lock_interruptible(&tomoyo_policy_lock))
return NULL;
- list_for_each_entry(ptr, head, list) {
+ list_for_each_entry(ptr, head, head.list) {
if (hash != ptr->entry.hash || strcmp(name, ptr->entry.name))
continue;
- atomic_inc(&ptr->users);
+ atomic_inc(&ptr->head.users);
goto out;
}
ptr = kzalloc(sizeof(*ptr) + len, GFP_NOFS);
- allocated_len = ptr ? ksize(ptr) : 0;
- if (!ptr || (tomoyo_quota_for_policy &&
- atomic_read(&tomoyo_policy_memory_size) + allocated_len
- > tomoyo_quota_for_policy)) {
+ if (tomoyo_memory_ok(ptr)) {
+ ptr->entry.name = ((char *) ptr) + sizeof(*ptr);
+ memmove((char *) ptr->entry.name, name, len);
+ atomic_set(&ptr->head.users, 1);
+ tomoyo_fill_path_info(&ptr->entry);
+ list_add_tail(&ptr->head.list, head);
+ } else {
kfree(ptr);
ptr = NULL;
- tomoyo_warn_oom(__func__);
- goto out;
}
- atomic_add(allocated_len, &tomoyo_policy_memory_size);
- ptr->entry.name = ((char *) ptr) + sizeof(*ptr);
- memmove((char *) ptr->entry.name, name, len);
- atomic_set(&ptr->users, 1);
- tomoyo_fill_path_info(&ptr->entry);
- list_add_tail(&ptr->list, head);
- out:
+out:
mutex_unlock(&tomoyo_policy_lock);
return ptr ? &ptr->entry : NULL;
}
+/* Initial namespace.*/
+struct tomoyo_policy_namespace tomoyo_kernel_namespace;
+
/**
* tomoyo_mm_init - Initialize mm related code.
*/
void __init tomoyo_mm_init(void)
{
int idx;
-
- for (idx = 0; idx < TOMOYO_MAX_POLICY; idx++)
- INIT_LIST_HEAD(&tomoyo_policy_list[idx]);
- for (idx = 0; idx < TOMOYO_MAX_GROUP; idx++)
- INIT_LIST_HEAD(&tomoyo_group_list[idx]);
for (idx = 0; idx < TOMOYO_MAX_HASH; idx++)
INIT_LIST_HEAD(&tomoyo_name_list[idx]);
+ tomoyo_kernel_namespace.name = "<kernel>";
+ tomoyo_init_policy_namespace(&tomoyo_kernel_namespace);
+ tomoyo_kernel_domain.ns = &tomoyo_kernel_namespace;
INIT_LIST_HEAD(&tomoyo_kernel_domain.acl_info_list);
- tomoyo_kernel_domain.domainname = tomoyo_get_name(TOMOYO_ROOT_NAME);
+ tomoyo_kernel_domain.domainname = tomoyo_get_name("<kernel>");
list_add_tail_rcu(&tomoyo_kernel_domain.list, &tomoyo_domain_list);
- idx = tomoyo_read_lock();
- if (tomoyo_find_domain(TOMOYO_ROOT_NAME) != &tomoyo_kernel_domain)
- panic("Can't register tomoyo_kernel_domain");
- {
- /* Load built-in policy. */
- tomoyo_write_transition_control("/sbin/hotplug", false,
- TOMOYO_TRANSITION_CONTROL_INITIALIZE);
- tomoyo_write_transition_control("/sbin/modprobe", false,
- TOMOYO_TRANSITION_CONTROL_INITIALIZE);
- }
- tomoyo_read_unlock(idx);
-}
-
-
-/* Memory allocated for query lists. */
-unsigned int tomoyo_query_memory_size;
-/* Quota for holding query lists. */
-unsigned int tomoyo_quota_for_query;
-
-/**
- * tomoyo_read_memory_counter - Check for memory usage in bytes.
- *
- * @head: Pointer to "struct tomoyo_io_buffer".
- *
- * Returns memory usage.
- */
-void tomoyo_read_memory_counter(struct tomoyo_io_buffer *head)
-{
- if (!head->r.eof) {
- const unsigned int policy
- = atomic_read(&tomoyo_policy_memory_size);
- const unsigned int query = tomoyo_query_memory_size;
- char buffer[64];
-
- memset(buffer, 0, sizeof(buffer));
- if (tomoyo_quota_for_policy)
- snprintf(buffer, sizeof(buffer) - 1,
- " (Quota: %10u)",
- tomoyo_quota_for_policy);
- else
- buffer[0] = '\0';
- tomoyo_io_printf(head, "Policy: %10u%s\n", policy,
- buffer);
- if (tomoyo_quota_for_query)
- snprintf(buffer, sizeof(buffer) - 1,
- " (Quota: %10u)",
- tomoyo_quota_for_query);
- else
- buffer[0] = '\0';
- tomoyo_io_printf(head, "Query lists: %10u%s\n", query,
- buffer);
- tomoyo_io_printf(head, "Total: %10u\n", policy + query);
- head->r.eof = true;
- }
-}
-
-/**
- * tomoyo_write_memory_quota - Set memory quota.
- *
- * @head: Pointer to "struct tomoyo_io_buffer".
- *
- * Returns 0.
- */
-int tomoyo_write_memory_quota(struct tomoyo_io_buffer *head)
-{
- char *data = head->write_buf;
- unsigned int size;
-
- if (sscanf(data, "Policy: %u", &size) == 1)
- tomoyo_quota_for_policy = size;
- else if (sscanf(data, "Query lists: %u", &size) == 1)
- tomoyo_quota_for_query = size;
- return 0;
}
diff --git a/security/tomoyo/mount.c b/security/tomoyo/mount.c
index 9fc2e15..bee09d0 100644
--- a/security/tomoyo/mount.c
+++ b/security/tomoyo/mount.c
@@ -1,28 +1,22 @@
/*
* security/tomoyo/mount.c
*
- * Copyright (C) 2005-2010 NTT DATA CORPORATION
+ * Copyright (C) 2005-2011 NTT DATA CORPORATION
*/
#include <linux/slab.h>
#include "common.h"
-/* Keywords for mount restrictions. */
-
-/* Allow to call 'mount --bind /source_dir /dest_dir' */
-#define TOMOYO_MOUNT_BIND_KEYWORD "--bind"
-/* Allow to call 'mount --move /old_dir /new_dir ' */
-#define TOMOYO_MOUNT_MOVE_KEYWORD "--move"
-/* Allow to call 'mount -o remount /dir ' */
-#define TOMOYO_MOUNT_REMOUNT_KEYWORD "--remount"
-/* Allow to call 'mount --make-unbindable /dir' */
-#define TOMOYO_MOUNT_MAKE_UNBINDABLE_KEYWORD "--make-unbindable"
-/* Allow to call 'mount --make-private /dir' */
-#define TOMOYO_MOUNT_MAKE_PRIVATE_KEYWORD "--make-private"
-/* Allow to call 'mount --make-slave /dir' */
-#define TOMOYO_MOUNT_MAKE_SLAVE_KEYWORD "--make-slave"
-/* Allow to call 'mount --make-shared /dir' */
-#define TOMOYO_MOUNT_MAKE_SHARED_KEYWORD "--make-shared"
+/* String table for special mount operations. */
+static const char * const tomoyo_mounts[TOMOYO_MAX_SPECIAL_MOUNT] = {
+ [TOMOYO_MOUNT_BIND] = "--bind",
+ [TOMOYO_MOUNT_MOVE] = "--move",
+ [TOMOYO_MOUNT_REMOUNT] = "--remount",
+ [TOMOYO_MOUNT_MAKE_UNBINDABLE] = "--make-unbindable",
+ [TOMOYO_MOUNT_MAKE_PRIVATE] = "--make-private",
+ [TOMOYO_MOUNT_MAKE_SLAVE] = "--make-slave",
+ [TOMOYO_MOUNT_MAKE_SHARED] = "--make-shared",
+};
/**
* tomoyo_audit_mount_log - Audit mount log.
@@ -33,50 +27,42 @@
*/
static int tomoyo_audit_mount_log(struct tomoyo_request_info *r)
{
- const char *dev = r->param.mount.dev->name;
- const char *dir = r->param.mount.dir->name;
- const char *type = r->param.mount.type->name;
- const unsigned long flags = r->param.mount.flags;
- if (r->granted)
- return 0;
- if (!strcmp(type, TOMOYO_MOUNT_REMOUNT_KEYWORD))
- tomoyo_warn_log(r, "mount -o remount %s 0x%lX", dir, flags);
- else if (!strcmp(type, TOMOYO_MOUNT_BIND_KEYWORD)
- || !strcmp(type, TOMOYO_MOUNT_MOVE_KEYWORD))
- tomoyo_warn_log(r, "mount %s %s %s 0x%lX", type, dev, dir,
- flags);
- else if (!strcmp(type, TOMOYO_MOUNT_MAKE_UNBINDABLE_KEYWORD) ||
- !strcmp(type, TOMOYO_MOUNT_MAKE_PRIVATE_KEYWORD) ||
- !strcmp(type, TOMOYO_MOUNT_MAKE_SLAVE_KEYWORD) ||
- !strcmp(type, TOMOYO_MOUNT_MAKE_SHARED_KEYWORD))
- tomoyo_warn_log(r, "mount %s %s 0x%lX", type, dir, flags);
- else
- tomoyo_warn_log(r, "mount -t %s %s %s 0x%lX", type, dev, dir,
- flags);
- return tomoyo_supervisor(r,
- TOMOYO_KEYWORD_ALLOW_MOUNT "%s %s %s 0x%lX\n",
- tomoyo_pattern(r->param.mount.dev),
- tomoyo_pattern(r->param.mount.dir), type,
- flags);
+ return tomoyo_supervisor(r, "file mount %s %s %s 0x%lX\n",
+ r->param.mount.dev->name,
+ r->param.mount.dir->name,
+ r->param.mount.type->name,
+ r->param.mount.flags);
}
+/**
+ * tomoyo_check_mount_acl - Check permission for path path path number operation.
+ *
+ * @r: Pointer to "struct tomoyo_request_info".
+ * @ptr: Pointer to "struct tomoyo_acl_info".
+ *
+ * Returns true if granted, false otherwise.
+ */
static bool tomoyo_check_mount_acl(struct tomoyo_request_info *r,
const struct tomoyo_acl_info *ptr)
{
const struct tomoyo_mount_acl *acl =
container_of(ptr, typeof(*acl), head);
- return tomoyo_compare_number_union(r->param.mount.flags, &acl->flags) &&
- tomoyo_compare_name_union(r->param.mount.type, &acl->fs_type) &&
- tomoyo_compare_name_union(r->param.mount.dir, &acl->dir_name) &&
+ return tomoyo_compare_number_union(r->param.mount.flags,
+ &acl->flags) &&
+ tomoyo_compare_name_union(r->param.mount.type,
+ &acl->fs_type) &&
+ tomoyo_compare_name_union(r->param.mount.dir,
+ &acl->dir_name) &&
(!r->param.mount.need_dev ||
- tomoyo_compare_name_union(r->param.mount.dev, &acl->dev_name));
+ tomoyo_compare_name_union(r->param.mount.dev,
+ &acl->dev_name));
}
/**
* tomoyo_mount_acl - Check permission for mount() operation.
*
* @r: Pointer to "struct tomoyo_request_info".
- * @dev_name: Name of device file.
+ * @dev_name: Name of device file. Maybe NULL.
* @dir: Pointer to "struct path".
* @type: Name of filesystem type.
* @flags: Mount options.
@@ -86,8 +72,10 @@
* Caller holds tomoyo_read_lock().
*/
static int tomoyo_mount_acl(struct tomoyo_request_info *r, char *dev_name,
- struct path *dir, char *type, unsigned long flags)
+ struct path *dir, const char *type,
+ unsigned long flags)
{
+ struct tomoyo_obj_info obj = { };
struct path path;
struct file_system_type *fstype = NULL;
const char *requested_type = NULL;
@@ -98,6 +86,7 @@
struct tomoyo_path_info rdir;
int need_dev = 0;
int error = -ENOMEM;
+ r->obj = &obj;
/* Get fstype. */
requested_type = tomoyo_encode(type);
@@ -107,6 +96,7 @@
tomoyo_fill_path_info(&rtype);
/* Get mount point. */
+ obj.path2 = *dir;
requested_dir_name = tomoyo_realpath_from_path(dir);
if (!requested_dir_name) {
error = -ENOMEM;
@@ -116,15 +106,15 @@
tomoyo_fill_path_info(&rdir);
/* Compare fs name. */
- if (!strcmp(type, TOMOYO_MOUNT_REMOUNT_KEYWORD)) {
+ if (type == tomoyo_mounts[TOMOYO_MOUNT_REMOUNT]) {
/* dev_name is ignored. */
- } else if (!strcmp(type, TOMOYO_MOUNT_MAKE_UNBINDABLE_KEYWORD) ||
- !strcmp(type, TOMOYO_MOUNT_MAKE_PRIVATE_KEYWORD) ||
- !strcmp(type, TOMOYO_MOUNT_MAKE_SLAVE_KEYWORD) ||
- !strcmp(type, TOMOYO_MOUNT_MAKE_SHARED_KEYWORD)) {
+ } else if (type == tomoyo_mounts[TOMOYO_MOUNT_MAKE_UNBINDABLE] ||
+ type == tomoyo_mounts[TOMOYO_MOUNT_MAKE_PRIVATE] ||
+ type == tomoyo_mounts[TOMOYO_MOUNT_MAKE_SLAVE] ||
+ type == tomoyo_mounts[TOMOYO_MOUNT_MAKE_SHARED]) {
/* dev_name is ignored. */
- } else if (!strcmp(type, TOMOYO_MOUNT_BIND_KEYWORD) ||
- !strcmp(type, TOMOYO_MOUNT_MOVE_KEYWORD)) {
+ } else if (type == tomoyo_mounts[TOMOYO_MOUNT_BIND] ||
+ type == tomoyo_mounts[TOMOYO_MOUNT_MOVE]) {
need_dev = -1; /* dev_name is a directory */
} else {
fstype = get_fs_type(type);
@@ -142,8 +132,8 @@
error = -ENOENT;
goto out;
}
+ obj.path1 = path;
requested_dev_name = tomoyo_realpath_from_path(&path);
- path_put(&path);
if (!requested_dev_name) {
error = -ENOENT;
goto out;
@@ -176,22 +166,26 @@
if (fstype)
put_filesystem(fstype);
kfree(requested_type);
+ /* Drop refcount obtained by kern_path(). */
+ if (obj.path1.dentry)
+ path_put(&obj.path1);
return error;
}
/**
* tomoyo_mount_permission - Check permission for mount() operation.
*
- * @dev_name: Name of device file.
+ * @dev_name: Name of device file. Maybe NULL.
* @path: Pointer to "struct path".
- * @type: Name of filesystem type. May be NULL.
+ * @type: Name of filesystem type. Maybe NULL.
* @flags: Mount options.
- * @data_page: Optional data. May be NULL.
+ * @data_page: Optional data. Maybe NULL.
*
* Returns 0 on success, negative value otherwise.
*/
-int tomoyo_mount_permission(char *dev_name, struct path *path, char *type,
- unsigned long flags, void *data_page)
+int tomoyo_mount_permission(char *dev_name, struct path *path,
+ const char *type, unsigned long flags,
+ void *data_page)
{
struct tomoyo_request_info r;
int error;
@@ -203,31 +197,31 @@
if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
flags &= ~MS_MGC_MSK;
if (flags & MS_REMOUNT) {
- type = TOMOYO_MOUNT_REMOUNT_KEYWORD;
+ type = tomoyo_mounts[TOMOYO_MOUNT_REMOUNT];
flags &= ~MS_REMOUNT;
}
if (flags & MS_MOVE) {
- type = TOMOYO_MOUNT_MOVE_KEYWORD;
+ type = tomoyo_mounts[TOMOYO_MOUNT_MOVE];
flags &= ~MS_MOVE;
}
if (flags & MS_BIND) {
- type = TOMOYO_MOUNT_BIND_KEYWORD;
+ type = tomoyo_mounts[TOMOYO_MOUNT_BIND];
flags &= ~MS_BIND;
}
if (flags & MS_UNBINDABLE) {
- type = TOMOYO_MOUNT_MAKE_UNBINDABLE_KEYWORD;
+ type = tomoyo_mounts[TOMOYO_MOUNT_MAKE_UNBINDABLE];
flags &= ~MS_UNBINDABLE;
}
if (flags & MS_PRIVATE) {
- type = TOMOYO_MOUNT_MAKE_PRIVATE_KEYWORD;
+ type = tomoyo_mounts[TOMOYO_MOUNT_MAKE_PRIVATE];
flags &= ~MS_PRIVATE;
}
if (flags & MS_SLAVE) {
- type = TOMOYO_MOUNT_MAKE_SLAVE_KEYWORD;
+ type = tomoyo_mounts[TOMOYO_MOUNT_MAKE_SLAVE];
flags &= ~MS_SLAVE;
}
if (flags & MS_SHARED) {
- type = TOMOYO_MOUNT_MAKE_SHARED_KEYWORD;
+ type = tomoyo_mounts[TOMOYO_MOUNT_MAKE_SHARED];
flags &= ~MS_SHARED;
}
if (!type)
@@ -237,49 +231,3 @@
tomoyo_read_unlock(idx);
return error;
}
-
-static bool tomoyo_same_mount_acl(const struct tomoyo_acl_info *a,
- const struct tomoyo_acl_info *b)
-{
- const struct tomoyo_mount_acl *p1 = container_of(a, typeof(*p1), head);
- const struct tomoyo_mount_acl *p2 = container_of(b, typeof(*p2), head);
- return tomoyo_same_acl_head(&p1->head, &p2->head) &&
- tomoyo_same_name_union(&p1->dev_name, &p2->dev_name) &&
- tomoyo_same_name_union(&p1->dir_name, &p2->dir_name) &&
- tomoyo_same_name_union(&p1->fs_type, &p2->fs_type) &&
- tomoyo_same_number_union(&p1->flags, &p2->flags);
-}
-
-/**
- * tomoyo_write_mount - Write "struct tomoyo_mount_acl" list.
- *
- * @data: String to parse.
- * @domain: Pointer to "struct tomoyo_domain_info".
- * @is_delete: True if it is a delete request.
- *
- * Returns 0 on success, negative value otherwise.
- *
- * Caller holds tomoyo_read_lock().
- */
-int tomoyo_write_mount(char *data, struct tomoyo_domain_info *domain,
- const bool is_delete)
-{
- struct tomoyo_mount_acl e = { .head.type = TOMOYO_TYPE_MOUNT_ACL };
- int error = is_delete ? -ENOENT : -ENOMEM;
- char *w[4];
- if (!tomoyo_tokenize(data, w, sizeof(w)) || !w[3][0])
- return -EINVAL;
- if (!tomoyo_parse_name_union(w[0], &e.dev_name) ||
- !tomoyo_parse_name_union(w[1], &e.dir_name) ||
- !tomoyo_parse_name_union(w[2], &e.fs_type) ||
- !tomoyo_parse_number_union(w[3], &e.flags))
- goto out;
- error = tomoyo_update_domain(&e.head, sizeof(e), is_delete, domain,
- tomoyo_same_mount_acl, NULL);
- out:
- tomoyo_put_name_union(&e.dev_name);
- tomoyo_put_name_union(&e.dir_name);
- tomoyo_put_name_union(&e.fs_type);
- tomoyo_put_number_union(&e.flags);
- return error;
-}
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c
index 8d95e91c9..6c601bd 100644
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -1,9 +1,7 @@
/*
* security/tomoyo/realpath.c
*
- * Pathname calculation functions for TOMOYO.
- *
- * Copyright (C) 2005-2010 NTT DATA CORPORATION
+ * Copyright (C) 2005-2011 NTT DATA CORPORATION
*/
#include <linux/types.h>
@@ -70,6 +68,161 @@
}
/**
+ * tomoyo_get_absolute_path - Get the path of a dentry but ignores chroot'ed root.
+ *
+ * @path: Pointer to "struct path".
+ * @buffer: Pointer to buffer to return value in.
+ * @buflen: Sizeof @buffer.
+ *
+ * Returns the buffer on success, an error code otherwise.
+ *
+ * If dentry is a directory, trailing '/' is appended.
+ */
+static char *tomoyo_get_absolute_path(struct path *path, char * const buffer,
+ const int buflen)
+{
+ char *pos = ERR_PTR(-ENOMEM);
+ if (buflen >= 256) {
+ struct path ns_root = { };
+ /* go to whatever namespace root we are under */
+ pos = __d_path(path, &ns_root, buffer, buflen - 1);
+ if (!IS_ERR(pos) && *pos == '/' && pos[1]) {
+ struct inode *inode = path->dentry->d_inode;
+ if (inode && S_ISDIR(inode->i_mode)) {
+ buffer[buflen - 2] = '/';
+ buffer[buflen - 1] = '\0';
+ }
+ }
+ }
+ return pos;
+}
+
+/**
+ * tomoyo_get_dentry_path - Get the path of a dentry.
+ *
+ * @dentry: Pointer to "struct dentry".
+ * @buffer: Pointer to buffer to return value in.
+ * @buflen: Sizeof @buffer.
+ *
+ * Returns the buffer on success, an error code otherwise.
+ *
+ * If dentry is a directory, trailing '/' is appended.
+ */
+static char *tomoyo_get_dentry_path(struct dentry *dentry, char * const buffer,
+ const int buflen)
+{
+ char *pos = ERR_PTR(-ENOMEM);
+ if (buflen >= 256) {
+ pos = dentry_path_raw(dentry, buffer, buflen - 1);
+ if (!IS_ERR(pos) && *pos == '/' && pos[1]) {
+ struct inode *inode = dentry->d_inode;
+ if (inode && S_ISDIR(inode->i_mode)) {
+ buffer[buflen - 2] = '/';
+ buffer[buflen - 1] = '\0';
+ }
+ }
+ }
+ return pos;
+}
+
+/**
+ * tomoyo_get_local_path - Get the path of a dentry.
+ *
+ * @dentry: Pointer to "struct dentry".
+ * @buffer: Pointer to buffer to return value in.
+ * @buflen: Sizeof @buffer.
+ *
+ * Returns the buffer on success, an error code otherwise.
+ */
+static char *tomoyo_get_local_path(struct dentry *dentry, char * const buffer,
+ const int buflen)
+{
+ struct super_block *sb = dentry->d_sb;
+ char *pos = tomoyo_get_dentry_path(dentry, buffer, buflen);
+ if (IS_ERR(pos))
+ return pos;
+ /* Convert from $PID to self if $PID is current thread. */
+ if (sb->s_magic == PROC_SUPER_MAGIC && *pos == '/') {
+ char *ep;
+ const pid_t pid = (pid_t) simple_strtoul(pos + 1, &ep, 10);
+ if (*ep == '/' && pid && pid ==
+ task_tgid_nr_ns(current, sb->s_fs_info)) {
+ pos = ep - 5;
+ if (pos < buffer)
+ goto out;
+ memmove(pos, "/self", 5);
+ }
+ goto prepend_filesystem_name;
+ }
+ /* Use filesystem name for unnamed devices. */
+ if (!MAJOR(sb->s_dev))
+ goto prepend_filesystem_name;
+ {
+ struct inode *inode = sb->s_root->d_inode;
+ /*
+ * Use filesystem name if filesystem does not support rename()
+ * operation.
+ */
+ if (inode->i_op && !inode->i_op->rename)
+ goto prepend_filesystem_name;
+ }
+ /* Prepend device name. */
+ {
+ char name[64];
+ int name_len;
+ const dev_t dev = sb->s_dev;
+ name[sizeof(name) - 1] = '\0';
+ snprintf(name, sizeof(name) - 1, "dev(%u,%u):", MAJOR(dev),
+ MINOR(dev));
+ name_len = strlen(name);
+ pos -= name_len;
+ if (pos < buffer)
+ goto out;
+ memmove(pos, name, name_len);
+ return pos;
+ }
+ /* Prepend filesystem name. */
+prepend_filesystem_name:
+ {
+ const char *name = sb->s_type->name;
+ const int name_len = strlen(name);
+ pos -= name_len + 1;
+ if (pos < buffer)
+ goto out;
+ memmove(pos, name, name_len);
+ pos[name_len] = ':';
+ }
+ return pos;
+out:
+ return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * tomoyo_get_socket_name - Get the name of a socket.
+ *
+ * @path: Pointer to "struct path".
+ * @buffer: Pointer to buffer to return value in.
+ * @buflen: Sizeof @buffer.
+ *
+ * Returns the buffer.
+ */
+static char *tomoyo_get_socket_name(struct path *path, char * const buffer,
+ const int buflen)
+{
+ struct inode *inode = path->dentry->d_inode;
+ struct socket *sock = inode ? SOCKET_I(inode) : NULL;
+ struct sock *sk = sock ? sock->sk : NULL;
+ if (sk) {
+ snprintf(buffer, buflen, "socket:[family=%u:type=%u:"
+ "protocol=%u]", sk->sk_family, sk->sk_type,
+ sk->sk_protocol);
+ } else {
+ snprintf(buffer, buflen, "socket:[unknown]");
+ }
+ return buffer;
+}
+
+/**
* tomoyo_realpath_from_path - Returns realpath(3) of the given pathname but ignores chroot'ed root.
*
* @path: Pointer to "struct path".
@@ -90,55 +243,42 @@
char *name = NULL;
unsigned int buf_len = PAGE_SIZE / 2;
struct dentry *dentry = path->dentry;
- bool is_dir;
+ struct super_block *sb;
if (!dentry)
return NULL;
- is_dir = dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode);
+ sb = dentry->d_sb;
while (1) {
- struct path ns_root = { .mnt = NULL, .dentry = NULL };
char *pos;
+ struct inode *inode;
buf_len <<= 1;
kfree(buf);
buf = kmalloc(buf_len, GFP_NOFS);
if (!buf)
break;
+ /* To make sure that pos is '\0' terminated. */
+ buf[buf_len - 1] = '\0';
/* Get better name for socket. */
- if (dentry->d_sb->s_magic == SOCKFS_MAGIC) {
- struct inode *inode = dentry->d_inode;
- struct socket *sock = inode ? SOCKET_I(inode) : NULL;
- struct sock *sk = sock ? sock->sk : NULL;
- if (sk) {
- snprintf(buf, buf_len - 1, "socket:[family=%u:"
- "type=%u:protocol=%u]", sk->sk_family,
- sk->sk_type, sk->sk_protocol);
- } else {
- snprintf(buf, buf_len - 1, "socket:[unknown]");
- }
- name = tomoyo_encode(buf);
- break;
+ if (sb->s_magic == SOCKFS_MAGIC) {
+ pos = tomoyo_get_socket_name(path, buf, buf_len - 1);
+ goto encode;
}
- /* For "socket:[\$]" and "pipe:[\$]". */
+ /* For "pipe:[\$]". */
if (dentry->d_op && dentry->d_op->d_dname) {
pos = dentry->d_op->d_dname(dentry, buf, buf_len - 1);
- if (IS_ERR(pos))
- continue;
- name = tomoyo_encode(pos);
- break;
+ goto encode;
}
- /* If we don't have a vfsmount, we can't calculate. */
- if (!path->mnt)
- break;
- /* go to whatever namespace root we are under */
- pos = __d_path(path, &ns_root, buf, buf_len);
- /* Prepend "/proc" prefix if using internal proc vfs mount. */
- if (!IS_ERR(pos) && (path->mnt->mnt_flags & MNT_INTERNAL) &&
- (path->mnt->mnt_sb->s_magic == PROC_SUPER_MAGIC)) {
- pos -= 5;
- if (pos >= buf)
- memcpy(pos, "/proc", 5);
- else
- pos = ERR_PTR(-ENOMEM);
- }
+ inode = sb->s_root->d_inode;
+ /*
+ * Get local name for filesystems without rename() operation
+ * or dentry without vfsmount.
+ */
+ if (!path->mnt || (inode->i_op && !inode->i_op->rename))
+ pos = tomoyo_get_local_path(path->dentry, buf,
+ buf_len - 1);
+ /* Get absolute name for the rest. */
+ else
+ pos = tomoyo_get_absolute_path(path, buf, buf_len - 1);
+encode:
if (IS_ERR(pos))
continue;
name = tomoyo_encode(pos);
@@ -147,16 +287,6 @@
kfree(buf);
if (!name)
tomoyo_warn_oom(__func__);
- else if (is_dir && *name) {
- /* Append trailing '/' if dentry is a directory. */
- char *pos = name + strlen(name) - 1;
- if (*pos != '/')
- /*
- * This is OK because tomoyo_encode() reserves space
- * for appending "/".
- */
- *++pos = '/';
- }
return name;
}
diff --git a/security/tomoyo/securityfs_if.c b/security/tomoyo/securityfs_if.c
index e43d555..a49c3bf 100644
--- a/security/tomoyo/securityfs_if.c
+++ b/security/tomoyo/securityfs_if.c
@@ -1,9 +1,7 @@
/*
- * security/tomoyo/common.c
+ * security/tomoyo/securityfs_if.c
*
- * Securityfs interface for TOMOYO.
- *
- * Copyright (C) 2005-2010 NTT DATA CORPORATION
+ * Copyright (C) 2005-2011 NTT DATA CORPORATION
*/
#include <linux/security.h>
@@ -34,11 +32,11 @@
*/
static int tomoyo_release(struct inode *inode, struct file *file)
{
- return tomoyo_close_control(file);
+ return tomoyo_close_control(file->private_data);
}
/**
- * tomoyo_poll - poll() for /proc/ccs/ interface.
+ * tomoyo_poll - poll() for /sys/kernel/security/tomoyo/ interface.
*
* @file: Pointer to "struct file".
* @wait: Pointer to "poll_table".
@@ -63,7 +61,7 @@
static ssize_t tomoyo_read(struct file *file, char __user *buf, size_t count,
loff_t *ppos)
{
- return tomoyo_read_control(file, buf, count);
+ return tomoyo_read_control(file->private_data, buf, count);
}
/**
@@ -79,7 +77,7 @@
static ssize_t tomoyo_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- return tomoyo_write_control(file, buf, count);
+ return tomoyo_write_control(file->private_data, buf, count);
}
/*
@@ -135,14 +133,14 @@
TOMOYO_DOMAINPOLICY);
tomoyo_create_entry("exception_policy", 0600, tomoyo_dir,
TOMOYO_EXCEPTIONPOLICY);
+ tomoyo_create_entry("audit", 0400, tomoyo_dir,
+ TOMOYO_AUDIT);
tomoyo_create_entry("self_domain", 0400, tomoyo_dir,
TOMOYO_SELFDOMAIN);
- tomoyo_create_entry(".domain_status", 0600, tomoyo_dir,
- TOMOYO_DOMAIN_STATUS);
tomoyo_create_entry(".process_status", 0600, tomoyo_dir,
TOMOYO_PROCESS_STATUS);
- tomoyo_create_entry("meminfo", 0600, tomoyo_dir,
- TOMOYO_MEMINFO);
+ tomoyo_create_entry("stat", 0644, tomoyo_dir,
+ TOMOYO_STAT);
tomoyo_create_entry("profile", 0600, tomoyo_dir,
TOMOYO_PROFILE);
tomoyo_create_entry("manager", 0600, tomoyo_dir,
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 95d3f95..f776400 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -1,20 +1,35 @@
/*
* security/tomoyo/tomoyo.c
*
- * LSM hooks for TOMOYO Linux.
- *
- * Copyright (C) 2005-2010 NTT DATA CORPORATION
+ * Copyright (C) 2005-2011 NTT DATA CORPORATION
*/
#include <linux/security.h>
#include "common.h"
+/**
+ * tomoyo_cred_alloc_blank - Target for security_cred_alloc_blank().
+ *
+ * @new: Pointer to "struct cred".
+ * @gfp: Memory allocation flags.
+ *
+ * Returns 0.
+ */
static int tomoyo_cred_alloc_blank(struct cred *new, gfp_t gfp)
{
new->security = NULL;
return 0;
}
+/**
+ * tomoyo_cred_prepare - Target for security_prepare_creds().
+ *
+ * @new: Pointer to "struct cred".
+ * @old: Pointer to "struct cred".
+ * @gfp: Memory allocation flags.
+ *
+ * Returns 0.
+ */
static int tomoyo_cred_prepare(struct cred *new, const struct cred *old,
gfp_t gfp)
{
@@ -25,11 +40,22 @@
return 0;
}
+/**
+ * tomoyo_cred_transfer - Target for security_transfer_creds().
+ *
+ * @new: Pointer to "struct cred".
+ * @old: Pointer to "struct cred".
+ */
static void tomoyo_cred_transfer(struct cred *new, const struct cred *old)
{
tomoyo_cred_prepare(new, old, 0);
}
+/**
+ * tomoyo_cred_free - Target for security_cred_free().
+ *
+ * @cred: Pointer to "struct cred".
+ */
static void tomoyo_cred_free(struct cred *cred)
{
struct tomoyo_domain_info *domain = cred->security;
@@ -37,6 +63,13 @@
atomic_dec(&domain->users);
}
+/**
+ * tomoyo_bprm_set_creds - Target for security_bprm_set_creds().
+ *
+ * @bprm: Pointer to "struct linux_binprm".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
static int tomoyo_bprm_set_creds(struct linux_binprm *bprm)
{
int rc;
@@ -51,12 +84,14 @@
*/
if (bprm->cred_prepared)
return 0;
+#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
/*
* Load policy if /sbin/tomoyo-init exists and /sbin/init is requested
* for the first time.
*/
if (!tomoyo_policy_loaded)
tomoyo_load_policy(bprm->filename);
+#endif
/*
* Release reference to "struct tomoyo_domain_info" stored inside
* "bprm->cred->security". New reference to "struct tomoyo_domain_info"
@@ -73,6 +108,13 @@
return 0;
}
+/**
+ * tomoyo_bprm_check_security - Target for security_bprm_check().
+ *
+ * @bprm: Pointer to "struct linux_binprm".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
static int tomoyo_bprm_check_security(struct linux_binprm *bprm)
{
struct tomoyo_domain_info *domain = bprm->cred->security;
@@ -90,20 +132,59 @@
/*
* Read permission is checked against interpreters using next domain.
*/
- return tomoyo_check_open_permission(domain, &bprm->file->f_path, O_RDONLY);
+ return tomoyo_check_open_permission(domain, &bprm->file->f_path,
+ O_RDONLY);
}
+/**
+ * tomoyo_inode_getattr - Target for security_inode_getattr().
+ *
+ * @mnt: Pointer to "struct vfsmount".
+ * @dentry: Pointer to "struct dentry".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
+static int tomoyo_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+{
+ struct path path = { mnt, dentry };
+ return tomoyo_path_perm(TOMOYO_TYPE_GETATTR, &path, NULL);
+}
+
+/**
+ * tomoyo_path_truncate - Target for security_path_truncate().
+ *
+ * @path: Pointer to "struct path".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
static int tomoyo_path_truncate(struct path *path)
{
- return tomoyo_path_perm(TOMOYO_TYPE_TRUNCATE, path);
+ return tomoyo_path_perm(TOMOYO_TYPE_TRUNCATE, path, NULL);
}
+/**
+ * tomoyo_path_unlink - Target for security_path_unlink().
+ *
+ * @parent: Pointer to "struct path".
+ * @dentry: Pointer to "struct dentry".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
static int tomoyo_path_unlink(struct path *parent, struct dentry *dentry)
{
struct path path = { parent->mnt, dentry };
- return tomoyo_path_perm(TOMOYO_TYPE_UNLINK, &path);
+ return tomoyo_path_perm(TOMOYO_TYPE_UNLINK, &path, NULL);
}
+/**
+ * tomoyo_path_mkdir - Target for security_path_mkdir().
+ *
+ * @parent: Pointer to "struct path".
+ * @dentry: Pointer to "struct dentry".
+ * @mode: DAC permission mode.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
static int tomoyo_path_mkdir(struct path *parent, struct dentry *dentry,
int mode)
{
@@ -112,19 +193,46 @@
mode & S_IALLUGO);
}
+/**
+ * tomoyo_path_rmdir - Target for security_path_rmdir().
+ *
+ * @parent: Pointer to "struct path".
+ * @dentry: Pointer to "struct dentry".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
static int tomoyo_path_rmdir(struct path *parent, struct dentry *dentry)
{
struct path path = { parent->mnt, dentry };
- return tomoyo_path_perm(TOMOYO_TYPE_RMDIR, &path);
+ return tomoyo_path_perm(TOMOYO_TYPE_RMDIR, &path, NULL);
}
+/**
+ * tomoyo_path_symlink - Target for security_path_symlink().
+ *
+ * @parent: Pointer to "struct path".
+ * @dentry: Pointer to "struct dentry".
+ * @old_name: Symlink's content.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
static int tomoyo_path_symlink(struct path *parent, struct dentry *dentry,
const char *old_name)
{
struct path path = { parent->mnt, dentry };
- return tomoyo_path_perm(TOMOYO_TYPE_SYMLINK, &path);
+ return tomoyo_path_perm(TOMOYO_TYPE_SYMLINK, &path, old_name);
}
+/**
+ * tomoyo_path_mknod - Target for security_path_mknod().
+ *
+ * @parent: Pointer to "struct path".
+ * @dentry: Pointer to "struct dentry".
+ * @mode: DAC permission mode.
+ * @dev: Device attributes.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
static int tomoyo_path_mknod(struct path *parent, struct dentry *dentry,
int mode, unsigned int dev)
{
@@ -155,6 +263,15 @@
return tomoyo_path_number_perm(type, &path, perm);
}
+/**
+ * tomoyo_path_link - Target for security_path_link().
+ *
+ * @old_dentry: Pointer to "struct dentry".
+ * @new_dir: Pointer to "struct path".
+ * @new_dentry: Pointer to "struct dentry".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
static int tomoyo_path_link(struct dentry *old_dentry, struct path *new_dir,
struct dentry *new_dentry)
{
@@ -163,6 +280,16 @@
return tomoyo_path2_perm(TOMOYO_TYPE_LINK, &path1, &path2);
}
+/**
+ * tomoyo_path_rename - Target for security_path_rename().
+ *
+ * @old_parent: Pointer to "struct path".
+ * @old_dentry: Pointer to "struct dentry".
+ * @new_parent: Pointer to "struct path".
+ * @new_dentry: Pointer to "struct dentry".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
static int tomoyo_path_rename(struct path *old_parent,
struct dentry *old_dentry,
struct path *new_parent,
@@ -173,14 +300,32 @@
return tomoyo_path2_perm(TOMOYO_TYPE_RENAME, &path1, &path2);
}
+/**
+ * tomoyo_file_fcntl - Target for security_file_fcntl().
+ *
+ * @file: Pointer to "struct file".
+ * @cmd: Command for fcntl().
+ * @arg: Argument for @cmd.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
static int tomoyo_file_fcntl(struct file *file, unsigned int cmd,
unsigned long arg)
{
- if (cmd == F_SETFL && ((arg ^ file->f_flags) & O_APPEND))
- return tomoyo_path_perm(TOMOYO_TYPE_REWRITE, &file->f_path);
- return 0;
+ if (!(cmd == F_SETFL && ((arg ^ file->f_flags) & O_APPEND)))
+ return 0;
+ return tomoyo_check_open_permission(tomoyo_domain(), &file->f_path,
+ O_WRONLY | (arg & O_APPEND));
}
+/**
+ * tomoyo_dentry_open - Target for security_dentry_open().
+ *
+ * @f: Pointer to "struct file".
+ * @cred: Pointer to "struct cred".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
static int tomoyo_dentry_open(struct file *f, const struct cred *cred)
{
int flags = f->f_flags;
@@ -190,12 +335,30 @@
return tomoyo_check_open_permission(tomoyo_domain(), &f->f_path, flags);
}
+/**
+ * tomoyo_file_ioctl - Target for security_file_ioctl().
+ *
+ * @file: Pointer to "struct file".
+ * @cmd: Command for ioctl().
+ * @arg: Argument for @cmd.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
static int tomoyo_file_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
return tomoyo_path_number_perm(TOMOYO_TYPE_IOCTL, &file->f_path, cmd);
}
+/**
+ * tomoyo_path_chmod - Target for security_path_chmod().
+ *
+ * @dentry: Pointer to "struct dentry".
+ * @mnt: Pointer to "struct vfsmount".
+ * @mode: DAC permission mode.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
static int tomoyo_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
mode_t mode)
{
@@ -204,6 +367,15 @@
mode & S_IALLUGO);
}
+/**
+ * tomoyo_path_chown - Target for security_path_chown().
+ *
+ * @path: Pointer to "struct path".
+ * @uid: Owner ID.
+ * @gid: Group ID.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
static int tomoyo_path_chown(struct path *path, uid_t uid, gid_t gid)
{
int error = 0;
@@ -214,23 +386,57 @@
return error;
}
+/**
+ * tomoyo_path_chroot - Target for security_path_chroot().
+ *
+ * @path: Pointer to "struct path".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
static int tomoyo_path_chroot(struct path *path)
{
- return tomoyo_path_perm(TOMOYO_TYPE_CHROOT, path);
+ return tomoyo_path_perm(TOMOYO_TYPE_CHROOT, path, NULL);
}
+/**
+ * tomoyo_sb_mount - Target for security_sb_mount().
+ *
+ * @dev_name: Name of device file. Maybe NULL.
+ * @path: Pointer to "struct path".
+ * @type: Name of filesystem type. Maybe NULL.
+ * @flags: Mount options.
+ * @data: Optional data. Maybe NULL.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
static int tomoyo_sb_mount(char *dev_name, struct path *path,
char *type, unsigned long flags, void *data)
{
return tomoyo_mount_permission(dev_name, path, type, flags, data);
}
+/**
+ * tomoyo_sb_umount - Target for security_sb_umount().
+ *
+ * @mnt: Pointer to "struct vfsmount".
+ * @flags: Unmount options.
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
static int tomoyo_sb_umount(struct vfsmount *mnt, int flags)
{
struct path path = { mnt, mnt->mnt_root };
- return tomoyo_path_perm(TOMOYO_TYPE_UMOUNT, &path);
+ return tomoyo_path_perm(TOMOYO_TYPE_UMOUNT, &path, NULL);
}
+/**
+ * tomoyo_sb_pivotroot - Target for security_sb_pivotroot().
+ *
+ * @old_path: Pointer to "struct path".
+ * @new_path: Pointer to "struct path".
+ *
+ * Returns 0 on success, negative value otherwise.
+ */
static int tomoyo_sb_pivotroot(struct path *old_path, struct path *new_path)
{
return tomoyo_path2_perm(TOMOYO_TYPE_PIVOT_ROOT, new_path, old_path);
@@ -258,6 +464,7 @@
.path_mknod = tomoyo_path_mknod,
.path_link = tomoyo_path_link,
.path_rename = tomoyo_path_rename,
+ .inode_getattr = tomoyo_inode_getattr,
.file_ioctl = tomoyo_file_ioctl,
.path_chmod = tomoyo_path_chmod,
.path_chown = tomoyo_path_chown,
@@ -270,6 +477,11 @@
/* Lock for GC. */
struct srcu_struct tomoyo_ss;
+/**
+ * tomoyo_init - Register TOMOYO Linux as a LSM module.
+ *
+ * Returns 0.
+ */
static int __init tomoyo_init(void)
{
struct cred *cred = (struct cred *) current_cred();
diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c
index 6d53932..c36bd11 100644
--- a/security/tomoyo/util.c
+++ b/security/tomoyo/util.c
@@ -1,9 +1,7 @@
/*
* security/tomoyo/util.c
*
- * Utility functions for TOMOYO.
- *
- * Copyright (C) 2005-2010 NTT DATA CORPORATION
+ * Copyright (C) 2005-2011 NTT DATA CORPORATION
*/
#include <linux/slab.h>
@@ -15,18 +13,130 @@
/* Has /sbin/init started? */
bool tomoyo_policy_loaded;
+/*
+ * Mapping table from "enum tomoyo_mac_index" to
+ * "enum tomoyo_mac_category_index".
+ */
+const u8 tomoyo_index2category[TOMOYO_MAX_MAC_INDEX] = {
+ /* CONFIG::file group */
+ [TOMOYO_MAC_FILE_EXECUTE] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_OPEN] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_CREATE] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_UNLINK] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_GETATTR] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_MKDIR] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_RMDIR] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_MKFIFO] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_MKSOCK] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_TRUNCATE] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_SYMLINK] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_MKBLOCK] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_MKCHAR] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_LINK] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_RENAME] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_CHMOD] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_CHOWN] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_CHGRP] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_IOCTL] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_CHROOT] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_MOUNT] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_UMOUNT] = TOMOYO_MAC_CATEGORY_FILE,
+ [TOMOYO_MAC_FILE_PIVOT_ROOT] = TOMOYO_MAC_CATEGORY_FILE,
+};
+
+/**
+ * tomoyo_convert_time - Convert time_t to YYYY/MM/DD hh/mm/ss.
+ *
+ * @time: Seconds since 1970/01/01 00:00:00.
+ * @stamp: Pointer to "struct tomoyo_time".
+ *
+ * Returns nothing.
+ *
+ * This function does not handle Y2038 problem.
+ */
+void tomoyo_convert_time(time_t time, struct tomoyo_time *stamp)
+{
+ static const u16 tomoyo_eom[2][12] = {
+ { 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
+ { 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
+ };
+ u16 y;
+ u8 m;
+ bool r;
+ stamp->sec = time % 60;
+ time /= 60;
+ stamp->min = time % 60;
+ time /= 60;
+ stamp->hour = time % 24;
+ time /= 24;
+ for (y = 1970; ; y++) {
+ const unsigned short days = (y & 3) ? 365 : 366;
+ if (time < days)
+ break;
+ time -= days;
+ }
+ r = (y & 3) == 0;
+ for (m = 0; m < 11 && time >= tomoyo_eom[r][m]; m++)
+ ;
+ if (m)
+ time -= tomoyo_eom[r][m - 1];
+ stamp->year = y;
+ stamp->month = ++m;
+ stamp->day = ++time;
+}
+
+/**
+ * tomoyo_permstr - Find permission keywords.
+ *
+ * @string: String representation for permissions in foo/bar/buz format.
+ * @keyword: Keyword to find from @string/
+ *
+ * Returns ture if @keyword was found in @string, false otherwise.
+ *
+ * This function assumes that strncmp(w1, w2, strlen(w1)) != 0 if w1 != w2.
+ */
+bool tomoyo_permstr(const char *string, const char *keyword)
+{
+ const char *cp = strstr(string, keyword);
+ if (cp)
+ return cp == string || *(cp - 1) == '/';
+ return false;
+}
+
+/**
+ * tomoyo_read_token - Read a word from a line.
+ *
+ * @param: Pointer to "struct tomoyo_acl_param".
+ *
+ * Returns a word on success, "" otherwise.
+ *
+ * To allow the caller to skip NULL check, this function returns "" rather than
+ * NULL if there is no more words to read.
+ */
+char *tomoyo_read_token(struct tomoyo_acl_param *param)
+{
+ char *pos = param->data;
+ char *del = strchr(pos, ' ');
+ if (del)
+ *del++ = '\0';
+ else
+ del = pos + strlen(pos);
+ param->data = del;
+ return pos;
+}
+
/**
* tomoyo_parse_ulong - Parse an "unsigned long" value.
*
* @result: Pointer to "unsigned long".
* @str: Pointer to string to parse.
*
- * Returns value type on success, 0 otherwise.
+ * Returns one of values in "enum tomoyo_value_type".
*
* The @src is updated to point the first character after the value
* on success.
*/
-static u8 tomoyo_parse_ulong(unsigned long *result, char **str)
+u8 tomoyo_parse_ulong(unsigned long *result, char **str)
{
const char *cp = *str;
char *ep;
@@ -43,7 +153,7 @@
}
*result = simple_strtoul(cp, &ep, base);
if (cp == ep)
- return 0;
+ return TOMOYO_VALUE_TYPE_INVALID;
*str = ep;
switch (base) {
case 16:
@@ -81,63 +191,65 @@
/**
* tomoyo_parse_name_union - Parse a tomoyo_name_union.
*
- * @filename: Name or name group.
- * @ptr: Pointer to "struct tomoyo_name_union".
+ * @param: Pointer to "struct tomoyo_acl_param".
+ * @ptr: Pointer to "struct tomoyo_name_union".
*
* Returns true on success, false otherwise.
*/
-bool tomoyo_parse_name_union(const char *filename,
+bool tomoyo_parse_name_union(struct tomoyo_acl_param *param,
struct tomoyo_name_union *ptr)
{
- if (!tomoyo_correct_word(filename))
- return false;
- if (filename[0] == '@') {
- ptr->group = tomoyo_get_group(filename + 1, TOMOYO_PATH_GROUP);
- ptr->is_group = true;
+ char *filename;
+ if (param->data[0] == '@') {
+ param->data++;
+ ptr->group = tomoyo_get_group(param, TOMOYO_PATH_GROUP);
return ptr->group != NULL;
}
+ filename = tomoyo_read_token(param);
+ if (!tomoyo_correct_word(filename))
+ return false;
ptr->filename = tomoyo_get_name(filename);
- ptr->is_group = false;
return ptr->filename != NULL;
}
/**
* tomoyo_parse_number_union - Parse a tomoyo_number_union.
*
- * @data: Number or number range or number group.
- * @ptr: Pointer to "struct tomoyo_number_union".
+ * @param: Pointer to "struct tomoyo_acl_param".
+ * @ptr: Pointer to "struct tomoyo_number_union".
*
* Returns true on success, false otherwise.
*/
-bool tomoyo_parse_number_union(char *data, struct tomoyo_number_union *num)
+bool tomoyo_parse_number_union(struct tomoyo_acl_param *param,
+ struct tomoyo_number_union *ptr)
{
+ char *data;
u8 type;
unsigned long v;
- memset(num, 0, sizeof(*num));
- if (data[0] == '@') {
- if (!tomoyo_correct_word(data))
- return false;
- num->group = tomoyo_get_group(data + 1, TOMOYO_NUMBER_GROUP);
- num->is_group = true;
- return num->group != NULL;
+ memset(ptr, 0, sizeof(*ptr));
+ if (param->data[0] == '@') {
+ param->data++;
+ ptr->group = tomoyo_get_group(param, TOMOYO_NUMBER_GROUP);
+ return ptr->group != NULL;
}
+ data = tomoyo_read_token(param);
type = tomoyo_parse_ulong(&v, &data);
- if (!type)
+ if (type == TOMOYO_VALUE_TYPE_INVALID)
return false;
- num->values[0] = v;
- num->min_type = type;
+ ptr->values[0] = v;
+ ptr->value_type[0] = type;
if (!*data) {
- num->values[1] = v;
- num->max_type = type;
+ ptr->values[1] = v;
+ ptr->value_type[1] = type;
return true;
}
if (*data++ != '-')
return false;
type = tomoyo_parse_ulong(&v, &data);
- if (!type || *data)
+ if (type == TOMOYO_VALUE_TYPE_INVALID || *data || ptr->values[0] > v)
return false;
- num->values[1] = v;
- num->max_type = type;
+ ptr->values[1] = v;
+ ptr->value_type[1] = type;
return true;
}
@@ -185,6 +297,30 @@
}
/**
+ * tomoyo_valid - Check whether the character is a valid char.
+ *
+ * @c: The character to check.
+ *
+ * Returns true if @c is a valid character, false otherwise.
+ */
+static inline bool tomoyo_valid(const unsigned char c)
+{
+ return c > ' ' && c < 127;
+}
+
+/**
+ * tomoyo_invalid - Check whether the character is an invalid char.
+ *
+ * @c: The character to check.
+ *
+ * Returns true if @c is an invalid character, false otherwise.
+ */
+static inline bool tomoyo_invalid(const unsigned char c)
+{
+ return c && (c <= ' ' || c >= 127);
+}
+
+/**
* tomoyo_str_starts - Check whether the given string starts with the given keyword.
*
* @src: Pointer to pointer to the string.
@@ -238,36 +374,9 @@
}
/**
- * tomoyo_tokenize - Tokenize string.
- *
- * @buffer: The line to tokenize.
- * @w: Pointer to "char *".
- * @size: Sizeof @w .
- *
- * Returns true on success, false otherwise.
- */
-bool tomoyo_tokenize(char *buffer, char *w[], size_t size)
-{
- int count = size / sizeof(char *);
- int i;
- for (i = 0; i < count; i++)
- w[i] = "";
- for (i = 0; i < count; i++) {
- char *cp = strchr(buffer, ' ');
- if (cp)
- *cp = '\0';
- w[i] = buffer;
- if (!cp)
- break;
- buffer = cp + 1;
- }
- return i < count || !*buffer;
-}
-
-/**
* tomoyo_correct_word2 - Validate a string.
*
- * @string: The string to check. May be non-'\0'-terminated.
+ * @string: The string to check. Maybe non-'\0'-terminated.
* @len: Length of @string.
*
* Check whether the given string follows the naming rules.
@@ -377,26 +486,21 @@
*/
bool tomoyo_correct_domain(const unsigned char *domainname)
{
- if (!domainname || strncmp(domainname, TOMOYO_ROOT_NAME,
- TOMOYO_ROOT_NAME_LEN))
- goto out;
- domainname += TOMOYO_ROOT_NAME_LEN;
- if (!*domainname)
+ if (!domainname || !tomoyo_domain_def(domainname))
+ return false;
+ domainname = strchr(domainname, ' ');
+ if (!domainname++)
return true;
- if (*domainname++ != ' ')
- goto out;
while (1) {
const unsigned char *cp = strchr(domainname, ' ');
if (!cp)
break;
if (*domainname != '/' ||
!tomoyo_correct_word2(domainname, cp - domainname))
- goto out;
+ return false;
domainname = cp + 1;
}
return tomoyo_correct_path(domainname);
- out:
- return false;
}
/**
@@ -408,7 +512,19 @@
*/
bool tomoyo_domain_def(const unsigned char *buffer)
{
- return !strncmp(buffer, TOMOYO_ROOT_NAME, TOMOYO_ROOT_NAME_LEN);
+ const unsigned char *cp;
+ int len;
+ if (*buffer != '<')
+ return false;
+ cp = strchr(buffer, ' ');
+ if (!cp)
+ len = strlen(buffer);
+ else
+ len = cp - buffer;
+ if (buffer[len - 1] != '>' ||
+ !tomoyo_correct_word2(buffer + 1, len - 2))
+ return false;
+ return true;
}
/**
@@ -794,22 +910,24 @@
/**
* tomoyo_get_mode - Get MAC mode.
*
+ * @ns: Pointer to "struct tomoyo_policy_namespace".
* @profile: Profile number.
* @index: Index number of functionality.
*
* Returns mode.
*/
-int tomoyo_get_mode(const u8 profile, const u8 index)
+int tomoyo_get_mode(const struct tomoyo_policy_namespace *ns, const u8 profile,
+ const u8 index)
{
u8 mode;
const u8 category = TOMOYO_MAC_CATEGORY_FILE;
if (!tomoyo_policy_loaded)
return TOMOYO_CONFIG_DISABLED;
- mode = tomoyo_profile(profile)->config[index];
+ mode = tomoyo_profile(ns, profile)->config[index];
if (mode == TOMOYO_CONFIG_USE_DEFAULT)
- mode = tomoyo_profile(profile)->config[category];
+ mode = tomoyo_profile(ns, profile)->config[category];
if (mode == TOMOYO_CONFIG_USE_DEFAULT)
- mode = tomoyo_profile(profile)->default_config;
+ mode = tomoyo_profile(ns, profile)->default_config;
return mode & 3;
}
@@ -833,65 +951,11 @@
profile = domain->profile;
r->profile = profile;
r->type = index;
- r->mode = tomoyo_get_mode(profile, index);
+ r->mode = tomoyo_get_mode(domain->ns, profile, index);
return r->mode;
}
/**
- * tomoyo_last_word - Get last component of a line.
- *
- * @line: A line.
- *
- * Returns the last word of a line.
- */
-const char *tomoyo_last_word(const char *name)
-{
- const char *cp = strrchr(name, ' ');
- if (cp)
- return cp + 1;
- return name;
-}
-
-/**
- * tomoyo_warn_log - Print warning or error message on console.
- *
- * @r: Pointer to "struct tomoyo_request_info".
- * @fmt: The printf()'s format string, followed by parameters.
- */
-void tomoyo_warn_log(struct tomoyo_request_info *r, const char *fmt, ...)
-{
- va_list args;
- char *buffer;
- const struct tomoyo_domain_info * const domain = r->domain;
- const struct tomoyo_profile *profile = tomoyo_profile(domain->profile);
- switch (r->mode) {
- case TOMOYO_CONFIG_ENFORCING:
- if (!profile->enforcing->enforcing_verbose)
- return;
- break;
- case TOMOYO_CONFIG_PERMISSIVE:
- if (!profile->permissive->permissive_verbose)
- return;
- break;
- case TOMOYO_CONFIG_LEARNING:
- if (!profile->learning->learning_verbose)
- return;
- break;
- }
- buffer = kmalloc(4096, GFP_NOFS);
- if (!buffer)
- return;
- va_start(args, fmt);
- vsnprintf(buffer, 4095, fmt, args);
- va_end(args);
- buffer[4095] = '\0';
- printk(KERN_WARNING "%s: Access %s denied for %s\n",
- r->mode == TOMOYO_CONFIG_ENFORCING ? "ERROR" : "WARNING", buffer,
- tomoyo_last_word(domain->domainname->name));
- kfree(buffer);
-}
-
-/**
* tomoyo_domain_quota_is_ok - Check for domain's quota.
*
* @r: Pointer to "struct tomoyo_request_info".
@@ -911,52 +975,43 @@
if (!domain)
return true;
list_for_each_entry_rcu(ptr, &domain->acl_info_list, list) {
+ u16 perm;
+ u8 i;
if (ptr->is_deleted)
continue;
switch (ptr->type) {
- u16 perm;
- u8 i;
case TOMOYO_TYPE_PATH_ACL:
perm = container_of(ptr, struct tomoyo_path_acl, head)
->perm;
- for (i = 0; i < TOMOYO_MAX_PATH_OPERATION; i++)
- if (perm & (1 << i))
- count++;
- if (perm & (1 << TOMOYO_TYPE_READ_WRITE))
- count -= 2;
break;
case TOMOYO_TYPE_PATH2_ACL:
perm = container_of(ptr, struct tomoyo_path2_acl, head)
->perm;
- for (i = 0; i < TOMOYO_MAX_PATH2_OPERATION; i++)
- if (perm & (1 << i))
- count++;
break;
case TOMOYO_TYPE_PATH_NUMBER_ACL:
perm = container_of(ptr, struct tomoyo_path_number_acl,
head)->perm;
- for (i = 0; i < TOMOYO_MAX_PATH_NUMBER_OPERATION; i++)
- if (perm & (1 << i))
- count++;
break;
case TOMOYO_TYPE_MKDEV_ACL:
perm = container_of(ptr, struct tomoyo_mkdev_acl,
head)->perm;
- for (i = 0; i < TOMOYO_MAX_MKDEV_OPERATION; i++)
- if (perm & (1 << i))
- count++;
break;
default:
- count++;
+ perm = 1;
}
+ for (i = 0; i < 16; i++)
+ if (perm & (1 << i))
+ count++;
}
- if (count < tomoyo_profile(domain->profile)->learning->
- learning_max_entry)
+ if (count < tomoyo_profile(domain->ns, domain->profile)->
+ pref[TOMOYO_PREF_MAX_LEARNING_ENTRY])
return true;
- if (!domain->quota_warned) {
- domain->quota_warned = true;
- printk(KERN_WARNING "TOMOYO-WARNING: "
- "Domain '%s' has so many ACLs to hold. "
+ if (!domain->flags[TOMOYO_DIF_QUOTA_WARNED]) {
+ domain->flags[TOMOYO_DIF_QUOTA_WARNED] = true;
+ /* r->granted = false; */
+ tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]);
+ printk(KERN_WARNING "WARNING: "
+ "Domain '%s' has too many ACLs to hold. "
"Stopped learning mode.\n", domain->domainname->name);
}
return false;
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index f134130..86d0caf 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -128,7 +128,8 @@
}
}
-static void pcm_debug_name(struct snd_pcm_substream *substream,
+#ifdef CONFIG_SND_DEBUG
+void snd_pcm_debug_name(struct snd_pcm_substream *substream,
char *name, size_t len)
{
snprintf(name, len, "pcmC%dD%d%c:%d",
@@ -137,6 +138,8 @@
substream->stream ? 'c' : 'p',
substream->number);
}
+EXPORT_SYMBOL(snd_pcm_debug_name);
+#endif
#define XRUN_DEBUG_BASIC (1<<0)
#define XRUN_DEBUG_STACK (1<<1) /* dump also stack */
@@ -168,7 +171,7 @@
snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN);
if (xrun_debug(substream, XRUN_DEBUG_BASIC)) {
char name[16];
- pcm_debug_name(substream, name, sizeof(name));
+ snd_pcm_debug_name(substream, name, sizeof(name));
snd_printd(KERN_DEBUG "XRUN: %s\n", name);
dump_stack_on_xrun(substream);
}
@@ -243,7 +246,7 @@
return;
if (xrun_debug(substream, XRUN_DEBUG_LOGONCE) && log->hit)
return;
- pcm_debug_name(substream, name, sizeof(name));
+ snd_pcm_debug_name(substream, name, sizeof(name));
for (cnt = 0, idx = log->idx; cnt < XRUN_LOG_CNT; cnt++) {
entry = &log->entries[idx];
if (entry->period_size == 0)
@@ -319,7 +322,7 @@
if (pos >= runtime->buffer_size) {
if (printk_ratelimit()) {
char name[16];
- pcm_debug_name(substream, name, sizeof(name));
+ snd_pcm_debug_name(substream, name, sizeof(name));
xrun_log_show(substream);
snd_printd(KERN_ERR "BUG: %s, pos = %ld, "
"buffer size = %ld, period size = %ld\n",
@@ -364,7 +367,7 @@
if (xrun_debug(substream, in_interrupt ?
XRUN_DEBUG_PERIODUPDATE : XRUN_DEBUG_HWPTRUPDATE)) {
char name[16];
- pcm_debug_name(substream, name, sizeof(name));
+ snd_pcm_debug_name(substream, name, sizeof(name));
snd_printd("%s_update: %s: pos=%u/%u/%u, "
"hwptr=%ld/%ld/%ld/%ld\n",
in_interrupt ? "period" : "hwptr",
diff --git a/sound/isa/msnd/msnd.h b/sound/isa/msnd/msnd.h
index 3773e24..a168ba3 100644
--- a/sound/isa/msnd/msnd.h
+++ b/sound/isa/msnd/msnd.h
@@ -249,7 +249,7 @@
/* State variables */
enum { msndClassic, msndPinnacle } type;
- mode_t mode;
+ fmode_t mode;
unsigned long flags;
#define F_RESETTING 0
#define F_HAVEDIGITAL 1
diff --git a/sound/oss/ad1848.c b/sound/oss/ad1848.c
index 4d2a6ae9..8a197fd 100644
--- a/sound/oss/ad1848.c
+++ b/sound/oss/ad1848.c
@@ -458,7 +458,7 @@
return mask;
}
-static void change_bits(ad1848_info * devc, unsigned char *regval,
+static void oss_change_bits(ad1848_info *devc, unsigned char *regval,
unsigned char *muteval, int dev, int chn, int newval)
{
unsigned char mask;
@@ -516,10 +516,10 @@
if (muteregoffs != regoffs) {
muteval = ad_read(devc, muteregoffs);
- change_bits(devc, &val, &muteval, dev, channel, value);
+ oss_change_bits(devc, &val, &muteval, dev, channel, value);
}
else
- change_bits(devc, &val, &val, dev, channel, value);
+ oss_change_bits(devc, &val, &val, dev, channel, value);
spin_lock_irqsave(&devc->lock,flags);
ad_write(devc, regoffs, val);
diff --git a/sound/oss/sb_mixer.c b/sound/oss/sb_mixer.c
index 2039d31..f8f3b7a 100644
--- a/sound/oss/sb_mixer.c
+++ b/sound/oss/sb_mixer.c
@@ -232,7 +232,7 @@
return 1;
}
-static void change_bits(sb_devc * devc, unsigned char *regval, int dev, int chn, int newval)
+static void oss_change_bits(sb_devc *devc, unsigned char *regval, int dev, int chn, int newval)
{
unsigned char mask;
int shift;
@@ -284,7 +284,7 @@
return -EINVAL;
val = sb_getmixer(devc, regoffs);
- change_bits(devc, &val, dev, LEFT_CHN, left);
+ oss_change_bits(devc, &val, dev, LEFT_CHN, left);
if ((*devc->iomap)[dev][RIGHT_CHN].regno != regoffs) /*
* Change register
@@ -304,7 +304,7 @@
* Read the new one
*/
}
- change_bits(devc, &val, dev, RIGHT_CHN, right);
+ oss_change_bits(devc, &val, dev, RIGHT_CHN, right);
sb_setmixer(devc, regoffs, val);
diff --git a/sound/pci/asihpi/asihpi.c b/sound/pci/asihpi/asihpi.c
index b941d25..eae62eb 100644
--- a/sound/pci/asihpi/asihpi.c
+++ b/sound/pci/asihpi/asihpi.c
@@ -41,31 +41,10 @@
#include <sound/tlv.h>
#include <sound/hwdep.h>
-
MODULE_LICENSE("GPL");
MODULE_AUTHOR("AudioScience inc. <support@audioscience.com>");
MODULE_DESCRIPTION("AudioScience ALSA ASI5000 ASI6000 ASI87xx ASI89xx");
-#if defined CONFIG_SND_DEBUG
-/* copied from pcm_lib.c, hope later patch will make that version public
-and this copy can be removed */
-static inline void
-snd_pcm_debug_name(struct snd_pcm_substream *substream, char *buf, size_t size)
-{
- snprintf(buf, size, "pcmC%dD%d%c:%d",
- substream->pcm->card->number,
- substream->pcm->device,
- substream->stream ? 'c' : 'p',
- substream->number);
-}
-#else
-static inline void
-snd_pcm_debug_name(struct snd_pcm_substream *substream, char *buf, size_t size)
-{
- *buf = 0;
-}
-#endif
-
#if defined CONFIG_SND_DEBUG_VERBOSE
/**
* snd_printddd - very verbose debug printk
diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c
index 65fcf47..9683f84 100644
--- a/sound/pci/asihpi/hpioctl.c
+++ b/sound/pci/asihpi/hpioctl.c
@@ -107,7 +107,6 @@
union hpi_response_buffer_v1 *hr;
u16 res_max_size;
u32 uncopied_bytes;
- struct hpi_adapter *pa = NULL;
int err = 0;
if (cmd != HPI_IOCTL_LINUX)
@@ -182,8 +181,9 @@
/* -1=no data 0=read from user mem, 1=write to user mem */
int wrflag = -1;
u32 adapter = hm->h.adapter_index;
+ struct hpi_adapter *pa = &adapters[adapter];
- if ((adapter > HPI_MAX_ADAPTERS) || (!pa->type)) {
+ if ((adapter >= HPI_MAX_ADAPTERS) || (!pa->type)) {
hpi_init_response(&hr->r0, HPI_OBJ_ADAPTER,
HPI_ADAPTER_OPEN,
HPI_ERROR_BAD_ADAPTER_NUMBER);
@@ -197,9 +197,7 @@
goto out;
}
- pa = &adapters[adapter];
-
- if (mutex_lock_interruptible(&adapters[adapter].mutex)) {
+ if (mutex_lock_interruptible(&pa->mutex)) {
err = -EINTR;
goto out;
}
@@ -235,8 +233,7 @@
"stream buffer size %d\n",
size);
- mutex_unlock(&adapters
- [adapter].mutex);
+ mutex_unlock(&pa->mutex);
err = -EINVAL;
goto out;
}
@@ -277,7 +274,7 @@
uncopied_bytes, size);
}
- mutex_unlock(&adapters[adapter].mutex);
+ mutex_unlock(&pa->mutex);
}
/* on return response size must be set */
diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig
index 7489b46..bb7e102d6 100644
--- a/sound/pci/hda/Kconfig
+++ b/sound/pci/hda/Kconfig
@@ -243,6 +243,7 @@
config SND_HDA_POWER_SAVE
bool "Aggressive power-saving on HD-audio"
+ depends on PM
help
Say Y here to enable more aggressive power-saving mode on
HD-audio driver. The power-saving timeout can be configured
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 9c27a3a..3e7850c 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -91,8 +91,10 @@
#ifdef CONFIG_SND_HDA_POWER_SAVE
static void hda_power_work(struct work_struct *work);
static void hda_keep_power_on(struct hda_codec *codec);
+#define hda_codec_is_power_on(codec) ((codec)->power_on)
#else
static inline void hda_keep_power_on(struct hda_codec *codec) {}
+#define hda_codec_is_power_on(codec) 1
#endif
/**
@@ -1101,7 +1103,7 @@
}
EXPORT_SYMBOL_HDA(snd_hda_shutup_pins);
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
/* Restore the pin controls cleared previously via snd_hda_shutup_pins() */
static void restore_shutup_pins(struct hda_codec *codec)
{
@@ -1499,7 +1501,7 @@
}
}
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
/* clean up all streams; called from suspend */
static void hda_cleanup_all_streams(struct hda_codec *codec)
{
@@ -1838,7 +1840,7 @@
}
EXPORT_SYMBOL_HDA(snd_hda_codec_amp_stereo);
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
/**
* snd_hda_codec_resume_amp - Resume all AMP commands from the cache
* @codec: HD-audio codec
@@ -1868,7 +1870,7 @@
}
}
EXPORT_SYMBOL_HDA(snd_hda_codec_resume_amp);
-#endif /* SND_HDA_NEEDS_RESUME */
+#endif /* CONFIG_PM */
static u32 get_amp_max_value(struct hda_codec *codec, hda_nid_t nid, int dir,
unsigned int ofs)
@@ -3082,7 +3084,7 @@
}
EXPORT_SYMBOL_HDA(snd_hda_create_spdif_in_ctls);
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
/*
* command cache
*/
@@ -3199,53 +3201,32 @@
seq->param);
}
EXPORT_SYMBOL_HDA(snd_hda_sequence_write_cache);
-#endif /* SND_HDA_NEEDS_RESUME */
+#endif /* CONFIG_PM */
-/*
- * set power state of the codec
- */
-static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg,
- unsigned int power_state)
+void snd_hda_codec_set_power_to_all(struct hda_codec *codec, hda_nid_t fg,
+ unsigned int power_state,
+ bool eapd_workaround)
{
- hda_nid_t nid;
+ hda_nid_t nid = codec->start_nid;
int i;
- /* this delay seems necessary to avoid click noise at power-down */
- if (power_state == AC_PWRST_D3)
- msleep(100);
- snd_hda_codec_read(codec, fg, 0, AC_VERB_SET_POWER_STATE,
- power_state);
- /* partial workaround for "azx_get_response timeout" */
- if (power_state == AC_PWRST_D0 &&
- (codec->vendor_id & 0xffff0000) == 0x14f10000)
- msleep(10);
-
- nid = codec->start_nid;
for (i = 0; i < codec->num_nodes; i++, nid++) {
unsigned int wcaps = get_wcaps(codec, nid);
- if (wcaps & AC_WCAP_POWER) {
- unsigned int wid_type = get_wcaps_type(wcaps);
- if (power_state == AC_PWRST_D3 &&
- wid_type == AC_WID_PIN) {
- unsigned int pincap;
- /*
- * don't power down the widget if it controls
- * eapd and EAPD_BTLENABLE is set.
- */
- pincap = snd_hda_query_pin_caps(codec, nid);
- if (pincap & AC_PINCAP_EAPD) {
- int eapd = snd_hda_codec_read(codec,
- nid, 0,
+ if (!(wcaps & AC_WCAP_POWER))
+ continue;
+ /* don't power down the widget if it controls eapd and
+ * EAPD_BTLENABLE is set.
+ */
+ if (eapd_workaround && power_state == AC_PWRST_D3 &&
+ get_wcaps_type(wcaps) == AC_WID_PIN &&
+ (snd_hda_query_pin_caps(codec, nid) & AC_PINCAP_EAPD)) {
+ int eapd = snd_hda_codec_read(codec, nid, 0,
AC_VERB_GET_EAPD_BTLENABLE, 0);
- eapd &= 0x02;
- if (eapd)
- continue;
- }
- }
- snd_hda_codec_write(codec, nid, 0,
- AC_VERB_SET_POWER_STATE,
- power_state);
+ if (eapd & 0x02)
+ continue;
}
+ snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_POWER_STATE,
+ power_state);
}
if (power_state == AC_PWRST_D0) {
@@ -3262,6 +3243,26 @@
} while (time_after_eq(end_time, jiffies));
}
}
+EXPORT_SYMBOL_HDA(snd_hda_codec_set_power_to_all);
+
+/*
+ * set power state of the codec
+ */
+static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg,
+ unsigned int power_state)
+{
+ if (codec->patch_ops.set_power_state) {
+ codec->patch_ops.set_power_state(codec, fg, power_state);
+ return;
+ }
+
+ /* this delay seems necessary to avoid click noise at power-down */
+ if (power_state == AC_PWRST_D3)
+ msleep(100);
+ snd_hda_codec_read(codec, fg, 0, AC_VERB_SET_POWER_STATE,
+ power_state);
+ snd_hda_codec_set_power_to_all(codec, fg, power_state, true);
+}
#ifdef CONFIG_SND_HDA_HWDEP
/* execute additional init verbs */
@@ -3274,7 +3275,7 @@
static inline void hda_exec_init_verbs(struct hda_codec *codec) {}
#endif
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
/*
* call suspend and power-down; used both from PM and power-save
*/
@@ -3315,7 +3316,7 @@
snd_hda_codec_resume_cache(codec);
}
}
-#endif /* SND_HDA_NEEDS_RESUME */
+#endif /* CONFIG_PM */
/**
@@ -4071,9 +4072,6 @@
EXPORT_SYMBOL_HDA(snd_hda_add_new_ctls);
#ifdef CONFIG_SND_HDA_POWER_SAVE
-static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg,
- unsigned int power_state);
-
static void hda_power_work(struct work_struct *work)
{
struct hda_codec *codec =
@@ -4376,11 +4374,8 @@
if (!bus)
return;
list_for_each_entry(codec, &bus->codec_list, list) {
-#ifdef CONFIG_SND_HDA_POWER_SAVE
- if (!codec->power_on)
- continue;
-#endif
- if (codec->patch_ops.reboot_notify)
+ if (hda_codec_is_power_on(codec) &&
+ codec->patch_ops.reboot_notify)
codec->patch_ops.reboot_notify(codec);
}
}
@@ -5079,11 +5074,10 @@
struct hda_codec *codec;
list_for_each_entry(codec, &bus->codec_list, list) {
-#ifdef CONFIG_SND_HDA_POWER_SAVE
- if (!codec->power_on)
- continue;
-#endif
- hda_call_codec_suspend(codec);
+ if (hda_codec_is_power_on(codec))
+ hda_call_codec_suspend(codec);
+ if (codec->patch_ops.post_suspend)
+ codec->patch_ops.post_suspend(codec);
}
return 0;
}
@@ -5103,6 +5097,8 @@
struct hda_codec *codec;
list_for_each_entry(codec, &bus->codec_list, list) {
+ if (codec->patch_ops.pre_resume)
+ codec->patch_ops.pre_resume(codec);
if (snd_hda_codec_needs_resume(codec))
hda_call_codec_resume(codec);
}
diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h
index f465e07..755f2b0 100644
--- a/sound/pci/hda/hda_codec.h
+++ b/sound/pci/hda/hda_codec.h
@@ -26,10 +26,6 @@
#include <sound/pcm.h>
#include <sound/hwdep.h>
-#if defined(CONFIG_PM) || defined(CONFIG_SND_HDA_POWER_SAVE)
-#define SND_HDA_NEEDS_RESUME /* resume control code is required */
-#endif
-
/*
* nodes
*/
@@ -704,8 +700,12 @@
int (*init)(struct hda_codec *codec);
void (*free)(struct hda_codec *codec);
void (*unsol_event)(struct hda_codec *codec, unsigned int res);
-#ifdef SND_HDA_NEEDS_RESUME
+ void (*set_power_state)(struct hda_codec *codec, hda_nid_t fg,
+ unsigned int power_state);
+#ifdef CONFIG_PM
int (*suspend)(struct hda_codec *codec, pm_message_t state);
+ int (*post_suspend)(struct hda_codec *codec);
+ int (*pre_resume)(struct hda_codec *codec);
int (*resume)(struct hda_codec *codec);
#endif
#ifdef CONFIG_SND_HDA_POWER_SAVE
@@ -927,7 +927,7 @@
int snd_hda_queue_unsol_event(struct hda_bus *bus, u32 res, u32 res_ex);
/* cached write */
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
int snd_hda_codec_write_cache(struct hda_codec *codec, hda_nid_t nid,
int direct, unsigned int verb, unsigned int parm);
void snd_hda_sequence_write_cache(struct hda_codec *codec,
@@ -1008,6 +1008,9 @@
*/
void snd_hda_get_codec_name(struct hda_codec *codec, char *name, int namelen);
void snd_hda_bus_reboot_notify(struct hda_bus *bus);
+void snd_hda_codec_set_power_to_all(struct hda_codec *codec, hda_nid_t fg,
+ unsigned int power_state,
+ bool eapd_workaround);
/*
* power management
diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h
index 88b277e..2e7ac31 100644
--- a/sound/pci/hda/hda_local.h
+++ b/sound/pci/hda/hda_local.h
@@ -131,7 +131,7 @@
int direction, int idx, int mask, int val);
int snd_hda_codec_amp_stereo(struct hda_codec *codec, hda_nid_t nid,
int dir, int idx, int mask, int val);
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
void snd_hda_codec_resume_amp(struct hda_codec *codec);
#endif
diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
index 1362c8b..8648917 100644
--- a/sound/pci/hda/patch_analog.c
+++ b/sound/pci/hda/patch_analog.c
@@ -563,7 +563,7 @@
snd_hda_detach_beep_device(codec);
}
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
static int ad198x_suspend(struct hda_codec *codec, pm_message_t state)
{
ad198x_shutup(codec);
@@ -579,7 +579,7 @@
#ifdef CONFIG_SND_HDA_POWER_SAVE
.check_power_status = ad198x_check_power_status,
#endif
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
.suspend = ad198x_suspend,
#endif
.reboot_notify = ad198x_shutup,
diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
index 7f93739..47d6ffc 100644
--- a/sound/pci/hda/patch_cirrus.c
+++ b/sound/pci/hda/patch_cirrus.c
@@ -25,6 +25,7 @@
#include <sound/core.h>
#include "hda_codec.h"
#include "hda_local.h"
+#include <sound/tlv.h>
/*
*/
@@ -61,9 +62,15 @@
unsigned int hp_detect:1;
unsigned int mic_detect:1;
+ /* CS421x */
+ unsigned int spdif_detect:1;
+ unsigned int sense_b:1;
+ hda_nid_t vendor_nid;
+ struct hda_input_mux input_mux;
+ unsigned int last_input;
};
-/* available models */
+/* available models with CS420x */
enum {
CS420X_MBP53,
CS420X_MBP55,
@@ -72,6 +79,12 @@
CS420X_MODELS
};
+/* CS421x boards */
+enum {
+ CS421X_CDB4210,
+ CS421X_MODELS
+};
+
/* Vendor-specific processing widget */
#define CS420X_VENDOR_NID 0x11
#define CS_DIG_OUT1_PIN_NID 0x10
@@ -111,21 +124,42 @@
/* 0x0009 - 0x0014 -> 12 test regs */
/* 0x0015 - visibility reg */
+/*
+ * Cirrus Logic CS4210
+ *
+ * 1 DAC => HP(sense) / Speakers,
+ * 1 ADC <= LineIn(sense) / MicIn / DMicIn,
+ * 1 SPDIF OUT => SPDIF Trasmitter(sense)
+*/
+#define CS4210_DAC_NID 0x02
+#define CS4210_ADC_NID 0x03
+#define CS421X_VENDOR_NID 0x0B
+#define CS421X_DMIC_PIN_NID 0x09 /* Port E */
+#define CS421X_SPDIF_PIN_NID 0x0A /* Port H */
+
+#define CS421X_IDX_DEV_CFG 0x01
+#define CS421X_IDX_ADC_CFG 0x02
+#define CS421X_IDX_DAC_CFG 0x03
+#define CS421X_IDX_SPK_CTL 0x04
+
+#define SPDIF_EVENT 0x04
static inline int cs_vendor_coef_get(struct hda_codec *codec, unsigned int idx)
{
- snd_hda_codec_write(codec, CS420X_VENDOR_NID, 0,
+ struct cs_spec *spec = codec->spec;
+ snd_hda_codec_write(codec, spec->vendor_nid, 0,
AC_VERB_SET_COEF_INDEX, idx);
- return snd_hda_codec_read(codec, CS420X_VENDOR_NID, 0,
+ return snd_hda_codec_read(codec, spec->vendor_nid, 0,
AC_VERB_GET_PROC_COEF, 0);
}
static inline void cs_vendor_coef_set(struct hda_codec *codec, unsigned int idx,
unsigned int coef)
{
- snd_hda_codec_write(codec, CS420X_VENDOR_NID, 0,
+ struct cs_spec *spec = codec->spec;
+ snd_hda_codec_write(codec, spec->vendor_nid, 0,
AC_VERB_SET_COEF_INDEX, idx);
- snd_hda_codec_write(codec, CS420X_VENDOR_NID, 0,
+ snd_hda_codec_write(codec, spec->vendor_nid, 0,
AC_VERB_SET_PROC_COEF, coef);
}
@@ -347,15 +381,12 @@
nid = codec->start_nid;
for (i = 0; i < codec->num_nodes; i++, nid++) {
unsigned int type;
- int idx;
type = get_wcaps_type(get_wcaps(codec, nid));
if (type != AC_WID_AUD_IN)
continue;
- idx = snd_hda_get_conn_index(codec, nid, pin, 0);
- if (idx >= 0) {
- *idxp = idx;
+ *idxp = snd_hda_get_conn_index(codec, nid, pin, false);
+ if (*idxp >= 0)
return nid;
- }
}
return 0;
}
@@ -835,6 +866,8 @@
/*
* auto-mute and auto-mic switching
+ * CS421x auto-output redirecting
+ * HP/SPK/SPDIF
*/
static void cs_automute(struct hda_codec *codec)
@@ -842,9 +875,25 @@
struct cs_spec *spec = codec->spec;
struct auto_pin_cfg *cfg = &spec->autocfg;
unsigned int hp_present;
+ unsigned int spdif_present;
hda_nid_t nid;
int i;
+ spdif_present = 0;
+ if (cfg->dig_outs) {
+ nid = cfg->dig_out_pins[0];
+ if (is_jack_detectable(codec, nid)) {
+ /*
+ TODO: SPDIF output redirect when SENSE_B is enabled.
+ Shared (SENSE_A) jack (e.g HP/mini-TOSLINK)
+ assumed.
+ */
+ if (snd_hda_jack_detect(codec, nid)
+ /* && spec->sense_b */)
+ spdif_present = 1;
+ }
+ }
+
hp_present = 0;
for (i = 0; i < cfg->hp_outs; i++) {
nid = cfg->hp_pins[i];
@@ -854,11 +903,19 @@
if (hp_present)
break;
}
+
+ /* mute speakers if spdif or hp jack is plugged in */
for (i = 0; i < cfg->speaker_outs; i++) {
nid = cfg->speaker_pins[i];
snd_hda_codec_write(codec, nid, 0,
AC_VERB_SET_PIN_WIDGET_CONTROL,
hp_present ? 0 : PIN_OUT);
+ /* detect on spdif is specific to CS421x */
+ if (spec->vendor_nid == CS421X_VENDOR_NID) {
+ snd_hda_codec_write(codec, nid, 0,
+ AC_VERB_SET_PIN_WIDGET_CONTROL,
+ spdif_present ? 0 : PIN_OUT);
+ }
}
if (spec->board_config == CS420X_MBP53 ||
spec->board_config == CS420X_MBP55 ||
@@ -867,21 +924,62 @@
snd_hda_codec_write(codec, 0x01, 0,
AC_VERB_SET_GPIO_DATA, gpio);
}
+
+ /* specific to CS421x */
+ if (spec->vendor_nid == CS421X_VENDOR_NID) {
+ /* mute HPs if spdif jack (SENSE_B) is present */
+ for (i = 0; i < cfg->hp_outs; i++) {
+ nid = cfg->hp_pins[i];
+ snd_hda_codec_write(codec, nid, 0,
+ AC_VERB_SET_PIN_WIDGET_CONTROL,
+ (spdif_present && spec->sense_b) ? 0 : PIN_HP);
+ }
+
+ /* SPDIF TX on/off */
+ if (cfg->dig_outs) {
+ nid = cfg->dig_out_pins[0];
+ snd_hda_codec_write(codec, nid, 0,
+ AC_VERB_SET_PIN_WIDGET_CONTROL,
+ spdif_present ? PIN_OUT : 0);
+
+ }
+ /* Update board GPIOs if neccessary ... */
+ }
}
+/*
+ * Auto-input redirect for CS421x
+ * Switch max 3 inputs of a single ADC (nid 3)
+*/
+
static void cs_automic(struct hda_codec *codec)
{
struct cs_spec *spec = codec->spec;
struct auto_pin_cfg *cfg = &spec->autocfg;
hda_nid_t nid;
unsigned int present;
-
+
nid = cfg->inputs[spec->automic_idx].pin;
present = snd_hda_jack_detect(codec, nid);
- if (present)
- change_cur_input(codec, spec->automic_idx, 0);
- else
- change_cur_input(codec, !spec->automic_idx, 0);
+
+ /* specific to CS421x, single ADC */
+ if (spec->vendor_nid == CS421X_VENDOR_NID) {
+ if (present) {
+ spec->last_input = spec->cur_input;
+ spec->cur_input = spec->automic_idx;
+ } else {
+ spec->cur_input = spec->last_input;
+ }
+
+ snd_hda_codec_write_cache(codec, spec->cur_adc, 0,
+ AC_VERB_SET_CONNECT_SEL,
+ spec->adc_idx[spec->cur_input]);
+ } else {
+ if (present)
+ change_cur_input(codec, spec->automic_idx, 0);
+ else
+ change_cur_input(codec, !spec->automic_idx, 0);
+ }
}
/*
@@ -911,23 +1009,28 @@
for (i = 0; i < cfg->line_outs; i++)
snd_hda_codec_write(codec, cfg->line_out_pins[i], 0,
AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT);
+ /* HP */
for (i = 0; i < cfg->hp_outs; i++) {
hda_nid_t nid = cfg->hp_pins[i];
snd_hda_codec_write(codec, nid, 0,
AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP);
if (!cfg->speaker_outs)
continue;
- if (is_jack_detectable(codec, nid)) {
+ if (get_wcaps(codec, nid) & AC_WCAP_UNSOL_CAP) {
snd_hda_codec_write(codec, nid, 0,
AC_VERB_SET_UNSOLICITED_ENABLE,
AC_USRSP_EN | HP_EVENT);
spec->hp_detect = 1;
}
}
+
+ /* Speaker */
for (i = 0; i < cfg->speaker_outs; i++)
snd_hda_codec_write(codec, cfg->speaker_pins[i], 0,
AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT);
- if (spec->hp_detect)
+
+ /* SPDIF is enabled on presence detect for CS421x */
+ if (spec->hp_detect || spec->spdif_detect)
cs_automute(codec);
}
@@ -961,19 +1064,31 @@
AC_VERB_SET_UNSOLICITED_ENABLE,
AC_USRSP_EN | MIC_EVENT);
}
- change_cur_input(codec, spec->cur_input, 1);
- if (spec->mic_detect)
- cs_automic(codec);
+ /* specific to CS421x */
+ if (spec->vendor_nid == CS421X_VENDOR_NID) {
+ if (spec->mic_detect)
+ cs_automic(codec);
+ else {
+ spec->cur_adc = spec->adc_nid[spec->cur_input];
+ snd_hda_codec_write(codec, spec->cur_adc, 0,
+ AC_VERB_SET_CONNECT_SEL,
+ spec->adc_idx[spec->cur_input]);
+ }
+ } else {
+ change_cur_input(codec, spec->cur_input, 1);
+ if (spec->mic_detect)
+ cs_automic(codec);
- coef = 0x000a; /* ADC1/2 - Digital and Analog Soft Ramp */
- if (is_active_pin(codec, CS_DMIC2_PIN_NID))
- coef |= 0x0500; /* DMIC2 enable 2 channels, disable GPIO1 */
- if (is_active_pin(codec, CS_DMIC1_PIN_NID))
- coef |= 0x1800; /* DMIC1 enable 2 channels, disable GPIO0
- * No effect if SPDIF_OUT2 is selected in
- * IDX_SPDIF_CTL.
- */
- cs_vendor_coef_set(codec, IDX_ADC_CFG, coef);
+ coef = 0x000a; /* ADC1/2 - Digital and Analog Soft Ramp */
+ if (is_active_pin(codec, CS_DMIC2_PIN_NID))
+ coef |= 0x0500; /* DMIC2 2 chan on, GPIO1 off */
+ if (is_active_pin(codec, CS_DMIC1_PIN_NID))
+ coef |= 0x1800; /* DMIC1 2 chan on, GPIO0 off
+ * No effect if SPDIF_OUT2 is
+ * selected in IDX_SPDIF_CTL.
+ */
+ cs_vendor_coef_set(codec, IDX_ADC_CFG, coef);
+ }
}
static const struct hda_verb cs_coef_init_verbs[] = {
@@ -1221,16 +1336,16 @@
[CS420X_IMAC27] = imac27_pincfgs,
};
-static void fix_pincfg(struct hda_codec *codec, int model)
+static void fix_pincfg(struct hda_codec *codec, int model,
+ const struct cs_pincfg **pin_configs)
{
- const struct cs_pincfg *cfg = cs_pincfgs[model];
+ const struct cs_pincfg *cfg = pin_configs[model];
if (!cfg)
return;
for (; cfg->nid; cfg++)
snd_hda_codec_set_pincfg(codec, cfg->nid, cfg->val);
}
-
static int patch_cs420x(struct hda_codec *codec)
{
struct cs_spec *spec;
@@ -1241,11 +1356,13 @@
return -ENOMEM;
codec->spec = spec;
+ spec->vendor_nid = CS420X_VENDOR_NID;
+
spec->board_config =
snd_hda_check_board_config(codec, CS420X_MODELS,
cs420x_models, cs420x_cfg_tbl);
if (spec->board_config >= 0)
- fix_pincfg(codec, spec->board_config);
+ fix_pincfg(codec, spec->board_config, cs_pincfgs);
switch (spec->board_config) {
case CS420X_IMAC27:
@@ -1272,6 +1389,562 @@
return err;
}
+/*
+ * Cirrus Logic CS4210
+ *
+ * 1 DAC => HP(sense) / Speakers,
+ * 1 ADC <= LineIn(sense) / MicIn / DMicIn,
+ * 1 SPDIF OUT => SPDIF Trasmitter(sense)
+*/
+
+/* CS4210 board names */
+static const char *cs421x_models[CS421X_MODELS] = {
+ [CS421X_CDB4210] = "cdb4210",
+};
+
+static const struct snd_pci_quirk cs421x_cfg_tbl[] = {
+ /* Test Intel board + CDB2410 */
+ SND_PCI_QUIRK(0x8086, 0x5001, "DP45SG/CDB4210", CS421X_CDB4210),
+ {} /* terminator */
+};
+
+/* CS4210 board pinconfigs */
+/* Default CS4210 (CDB4210)*/
+static const struct cs_pincfg cdb4210_pincfgs[] = {
+ { 0x05, 0x0321401f },
+ { 0x06, 0x90170010 },
+ { 0x07, 0x03813031 },
+ { 0x08, 0xb7a70037 },
+ { 0x09, 0xb7a6003e },
+ { 0x0a, 0x034510f0 },
+ {} /* terminator */
+};
+
+static const struct cs_pincfg *cs421x_pincfgs[CS421X_MODELS] = {
+ [CS421X_CDB4210] = cdb4210_pincfgs,
+};
+
+static const struct hda_verb cs421x_coef_init_verbs[] = {
+ {0x0B, AC_VERB_SET_PROC_STATE, 1},
+ {0x0B, AC_VERB_SET_COEF_INDEX, CS421X_IDX_DEV_CFG},
+ /*
+ Disable Coefficient Index Auto-Increment(DAI)=1,
+ PDREF=0
+ */
+ {0x0B, AC_VERB_SET_PROC_COEF, 0x0001 },
+
+ {0x0B, AC_VERB_SET_COEF_INDEX, CS421X_IDX_ADC_CFG},
+ /* ADC SZCMode = Digital Soft Ramp */
+ {0x0B, AC_VERB_SET_PROC_COEF, 0x0002 },
+
+ {0x0B, AC_VERB_SET_COEF_INDEX, CS421X_IDX_DAC_CFG},
+ {0x0B, AC_VERB_SET_PROC_COEF,
+ (0x0002 /* DAC SZCMode = Digital Soft Ramp */
+ | 0x0004 /* Mute DAC on FIFO error */
+ | 0x0008 /* Enable DAC High Pass Filter */
+ )},
+ {} /* terminator */
+};
+
+/* Errata: CS4210 rev A1 Silicon
+ *
+ * http://www.cirrus.com/en/pubs/errata/
+ *
+ * Description:
+ * 1. Performance degredation is present in the ADC.
+ * 2. Speaker output is not completely muted upon HP detect.
+ * 3. Noise is present when clipping occurs on the amplified
+ * speaker outputs.
+ *
+ * Workaround:
+ * The following verb sequence written to the registers during
+ * initialization will correct the issues listed above.
+ */
+
+static const struct hda_verb cs421x_coef_init_verbs_A1_silicon_fixes[] = {
+ {0x0B, AC_VERB_SET_PROC_STATE, 0x01}, /* VPW: processing on */
+
+ {0x0B, AC_VERB_SET_COEF_INDEX, 0x0006},
+ {0x0B, AC_VERB_SET_PROC_COEF, 0x9999}, /* Test mode: on */
+
+ {0x0B, AC_VERB_SET_COEF_INDEX, 0x000A},
+ {0x0B, AC_VERB_SET_PROC_COEF, 0x14CB}, /* Chop double */
+
+ {0x0B, AC_VERB_SET_COEF_INDEX, 0x0011},
+ {0x0B, AC_VERB_SET_PROC_COEF, 0xA2D0}, /* Increase ADC current */
+
+ {0x0B, AC_VERB_SET_COEF_INDEX, 0x001A},
+ {0x0B, AC_VERB_SET_PROC_COEF, 0x02A9}, /* Mute speaker */
+
+ {0x0B, AC_VERB_SET_COEF_INDEX, 0x001B},
+ {0x0B, AC_VERB_SET_PROC_COEF, 0X1006}, /* Remove noise */
+
+ {} /* terminator */
+};
+
+/* Speaker Amp Gain is controlled by the vendor widget's coef 4 */
+static const DECLARE_TLV_DB_SCALE(cs421x_speaker_boost_db_scale, 900, 300, 0);
+
+static int cs421x_boost_vol_info(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_info *uinfo)
+{
+ uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+ uinfo->count = 1;
+ uinfo->value.integer.min = 0;
+ uinfo->value.integer.max = 3;
+ return 0;
+}
+
+static int cs421x_boost_vol_get(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+
+ ucontrol->value.integer.value[0] =
+ cs_vendor_coef_get(codec, CS421X_IDX_SPK_CTL) & 0x0003;
+ return 0;
+}
+
+static int cs421x_boost_vol_put(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+
+ unsigned int vol = ucontrol->value.integer.value[0];
+ unsigned int coef =
+ cs_vendor_coef_get(codec, CS421X_IDX_SPK_CTL);
+ unsigned int original_coef = coef;
+
+ coef &= ~0x0003;
+ coef |= (vol & 0x0003);
+ if (original_coef == coef)
+ return 0;
+ else {
+ cs_vendor_coef_set(codec, CS421X_IDX_SPK_CTL, coef);
+ return 1;
+ }
+}
+
+static const struct snd_kcontrol_new cs421x_speaker_bost_ctl = {
+
+ .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+ .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE |
+ SNDRV_CTL_ELEM_ACCESS_TLV_READ),
+ .name = "Speaker Boost Playback Volume",
+ .info = cs421x_boost_vol_info,
+ .get = cs421x_boost_vol_get,
+ .put = cs421x_boost_vol_put,
+ .tlv = { .p = cs421x_speaker_boost_db_scale },
+};
+
+static void cs421x_pinmux_init(struct hda_codec *codec)
+{
+ struct cs_spec *spec = codec->spec;
+ unsigned int def_conf, coef;
+
+ /* GPIO, DMIC_SCL, DMIC_SDA and SENSE_B are multiplexed */
+ coef = cs_vendor_coef_get(codec, CS421X_IDX_DEV_CFG);
+
+ if (spec->gpio_mask)
+ coef |= 0x0008; /* B1,B2 are GPIOs */
+ else
+ coef &= ~0x0008;
+
+ if (spec->sense_b)
+ coef |= 0x0010; /* B2 is SENSE_B, not inverted */
+ else
+ coef &= ~0x0010;
+
+ cs_vendor_coef_set(codec, CS421X_IDX_DEV_CFG, coef);
+
+ if ((spec->gpio_mask || spec->sense_b) &&
+ is_active_pin(codec, CS421X_DMIC_PIN_NID)) {
+
+ /*
+ GPIO or SENSE_B forced - disconnect the DMIC pin.
+ */
+ def_conf = snd_hda_codec_get_pincfg(codec, CS421X_DMIC_PIN_NID);
+ def_conf &= ~AC_DEFCFG_PORT_CONN;
+ def_conf |= (AC_JACK_PORT_NONE << AC_DEFCFG_PORT_CONN_SHIFT);
+ snd_hda_codec_set_pincfg(codec, CS421X_DMIC_PIN_NID, def_conf);
+ }
+}
+
+static void init_cs421x_digital(struct hda_codec *codec)
+{
+ struct cs_spec *spec = codec->spec;
+ struct auto_pin_cfg *cfg = &spec->autocfg;
+ int i;
+
+
+ for (i = 0; i < cfg->dig_outs; i++) {
+ hda_nid_t nid = cfg->dig_out_pins[i];
+ if (!cfg->speaker_outs)
+ continue;
+ if (get_wcaps(codec, nid) & AC_WCAP_UNSOL_CAP) {
+
+ snd_hda_codec_write(codec, nid, 0,
+ AC_VERB_SET_UNSOLICITED_ENABLE,
+ AC_USRSP_EN | SPDIF_EVENT);
+ spec->spdif_detect = 1;
+ }
+ }
+}
+
+static int cs421x_init(struct hda_codec *codec)
+{
+ struct cs_spec *spec = codec->spec;
+
+ snd_hda_sequence_write(codec, cs421x_coef_init_verbs);
+ snd_hda_sequence_write(codec, cs421x_coef_init_verbs_A1_silicon_fixes);
+
+ cs421x_pinmux_init(codec);
+
+ if (spec->gpio_mask) {
+ snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_MASK,
+ spec->gpio_mask);
+ snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DIRECTION,
+ spec->gpio_dir);
+ snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA,
+ spec->gpio_data);
+ }
+
+ init_output(codec);
+ init_input(codec);
+ init_cs421x_digital(codec);
+
+ return 0;
+}
+
+/*
+ * CS4210 Input MUX (1 ADC)
+ */
+static int cs421x_mux_enum_info(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_info *uinfo)
+{
+ struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+ struct cs_spec *spec = codec->spec;
+
+ return snd_hda_input_mux_info(&spec->input_mux, uinfo);
+}
+
+static int cs421x_mux_enum_get(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+ struct cs_spec *spec = codec->spec;
+
+ ucontrol->value.enumerated.item[0] = spec->cur_input;
+ return 0;
+}
+
+static int cs421x_mux_enum_put(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+ struct cs_spec *spec = codec->spec;
+
+ return snd_hda_input_mux_put(codec, &spec->input_mux, ucontrol,
+ spec->adc_nid[0], &spec->cur_input);
+
+}
+
+static struct snd_kcontrol_new cs421x_capture_source = {
+
+ .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+ .name = "Capture Source",
+ .access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+ .info = cs421x_mux_enum_info,
+ .get = cs421x_mux_enum_get,
+ .put = cs421x_mux_enum_put,
+};
+
+static int cs421x_add_input_volume_control(struct hda_codec *codec, int item)
+{
+ struct cs_spec *spec = codec->spec;
+ struct auto_pin_cfg *cfg = &spec->autocfg;
+ const struct hda_input_mux *imux = &spec->input_mux;
+ hda_nid_t pin = cfg->inputs[item].pin;
+ struct snd_kcontrol *kctl;
+ u32 caps;
+
+ if (!(get_wcaps(codec, pin) & AC_WCAP_IN_AMP))
+ return 0;
+
+ caps = query_amp_caps(codec, pin, HDA_INPUT);
+ caps = (caps & AC_AMPCAP_NUM_STEPS) >> AC_AMPCAP_NUM_STEPS_SHIFT;
+ if (caps <= 1)
+ return 0;
+
+ return add_volume(codec, imux->items[item].label, 0,
+ HDA_COMPOSE_AMP_VAL(pin, 3, 0, HDA_INPUT), 1, &kctl);
+}
+
+/* add a (input-boost) volume control to the given input pin */
+static int build_cs421x_input(struct hda_codec *codec)
+{
+ struct cs_spec *spec = codec->spec;
+ struct auto_pin_cfg *cfg = &spec->autocfg;
+ struct hda_input_mux *imux = &spec->input_mux;
+ int i, err, type_idx;
+ const char *label;
+
+ if (!spec->num_inputs)
+ return 0;
+
+ /* make bind-capture */
+ spec->capture_bind[0] = make_bind_capture(codec, &snd_hda_bind_sw);
+ spec->capture_bind[1] = make_bind_capture(codec, &snd_hda_bind_vol);
+ for (i = 0; i < 2; i++) {
+ struct snd_kcontrol *kctl;
+ int n;
+ if (!spec->capture_bind[i])
+ return -ENOMEM;
+ kctl = snd_ctl_new1(&cs_capture_ctls[i], codec);
+ if (!kctl)
+ return -ENOMEM;
+ kctl->private_value = (long)spec->capture_bind[i];
+ err = snd_hda_ctl_add(codec, 0, kctl);
+ if (err < 0)
+ return err;
+ for (n = 0; n < AUTO_PIN_LAST; n++) {
+ if (!spec->adc_nid[n])
+ continue;
+ err = snd_hda_add_nid(codec, kctl, 0, spec->adc_nid[n]);
+ if (err < 0)
+ return err;
+ }
+ }
+
+ /* Add Input MUX Items + Capture Volume/Switch */
+ for (i = 0; i < spec->num_inputs; i++) {
+ label = hda_get_autocfg_input_label(codec, cfg, i);
+ snd_hda_add_imux_item(imux, label, spec->adc_idx[i], &type_idx);
+
+ err = cs421x_add_input_volume_control(codec, i);
+ if (err < 0)
+ return err;
+ }
+
+ /*
+ Add 'Capture Source' Switch if
+ * 2 inputs and no mic detec
+ * 3 inputs
+ */
+ if ((spec->num_inputs == 2 && !spec->mic_detect) ||
+ (spec->num_inputs == 3)) {
+
+ err = snd_hda_ctl_add(codec, spec->adc_nid[0],
+ snd_ctl_new1(&cs421x_capture_source, codec));
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+
+/* Single DAC (Mute/Gain) */
+static int build_cs421x_output(struct hda_codec *codec)
+{
+ hda_nid_t dac = CS4210_DAC_NID;
+ struct cs_spec *spec = codec->spec;
+ struct auto_pin_cfg *cfg = &spec->autocfg;
+ struct snd_kcontrol *kctl;
+ int err;
+ char *name = "HP/Speakers";
+
+ fix_volume_caps(codec, dac);
+ if (!spec->vmaster_sw) {
+ err = add_vmaster(codec, dac);
+ if (err < 0)
+ return err;
+ }
+
+ err = add_mute(codec, name, 0,
+ HDA_COMPOSE_AMP_VAL(dac, 3, 0, HDA_OUTPUT), 0, &kctl);
+ if (err < 0)
+ return err;
+ err = snd_ctl_add_slave(spec->vmaster_sw, kctl);
+ if (err < 0)
+ return err;
+
+ err = add_volume(codec, name, 0,
+ HDA_COMPOSE_AMP_VAL(dac, 3, 0, HDA_OUTPUT), 0, &kctl);
+ if (err < 0)
+ return err;
+ err = snd_ctl_add_slave(spec->vmaster_vol, kctl);
+ if (err < 0)
+ return err;
+
+ if (cfg->speaker_outs) {
+ err = snd_hda_ctl_add(codec, 0,
+ snd_ctl_new1(&cs421x_speaker_bost_ctl, codec));
+ if (err < 0)
+ return err;
+ }
+ return err;
+}
+
+static int cs421x_build_controls(struct hda_codec *codec)
+{
+ int err;
+
+ err = build_cs421x_output(codec);
+ if (err < 0)
+ return err;
+ err = build_cs421x_input(codec);
+ if (err < 0)
+ return err;
+ err = build_digital_output(codec);
+ if (err < 0)
+ return err;
+ return cs421x_init(codec);
+}
+
+static void cs421x_unsol_event(struct hda_codec *codec, unsigned int res)
+{
+ switch ((res >> 26) & 0x3f) {
+ case HP_EVENT:
+ case SPDIF_EVENT:
+ cs_automute(codec);
+ break;
+
+ case MIC_EVENT:
+ cs_automic(codec);
+ break;
+ }
+}
+
+static int parse_cs421x_input(struct hda_codec *codec)
+{
+ struct cs_spec *spec = codec->spec;
+ struct auto_pin_cfg *cfg = &spec->autocfg;
+ int i;
+
+ for (i = 0; i < cfg->num_inputs; i++) {
+ hda_nid_t pin = cfg->inputs[i].pin;
+ spec->adc_nid[i] = get_adc(codec, pin, &spec->adc_idx[i]);
+ spec->cur_input = spec->last_input = i;
+ spec->num_inputs++;
+
+ /* check whether the automatic mic switch is available */
+ if (is_ext_mic(codec, i) && cfg->num_inputs >= 2) {
+ spec->mic_detect = 1;
+ spec->automic_idx = i;
+ }
+ }
+ return 0;
+}
+
+static int cs421x_parse_auto_config(struct hda_codec *codec)
+{
+ struct cs_spec *spec = codec->spec;
+ int err;
+
+ err = snd_hda_parse_pin_def_config(codec, &spec->autocfg, NULL);
+ if (err < 0)
+ return err;
+ err = parse_output(codec);
+ if (err < 0)
+ return err;
+ err = parse_cs421x_input(codec);
+ if (err < 0)
+ return err;
+ err = parse_digital_output(codec);
+ if (err < 0)
+ return err;
+ return 0;
+}
+
+#ifdef CONFIG_PM
+/*
+ Manage PDREF, when transitioning to D3hot
+ (DAC,ADC) -> D3, PDREF=1, AFG->D3
+*/
+static int cs421x_suspend(struct hda_codec *codec, pm_message_t state)
+{
+ unsigned int coef;
+
+ snd_hda_shutup_pins(codec);
+
+ snd_hda_codec_write(codec, CS4210_DAC_NID, 0,
+ AC_VERB_SET_POWER_STATE, AC_PWRST_D3);
+ snd_hda_codec_write(codec, CS4210_ADC_NID, 0,
+ AC_VERB_SET_POWER_STATE, AC_PWRST_D3);
+
+ coef = cs_vendor_coef_get(codec, CS421X_IDX_DEV_CFG);
+ coef |= 0x0004; /* PDREF */
+ cs_vendor_coef_set(codec, CS421X_IDX_DEV_CFG, coef);
+
+ return 0;
+}
+#endif
+
+static struct hda_codec_ops cs4210_patch_ops = {
+ .build_controls = cs421x_build_controls,
+ .build_pcms = cs_build_pcms,
+ .init = cs421x_init,
+ .free = cs_free,
+ .unsol_event = cs421x_unsol_event,
+#ifdef CONFIG_PM
+ .suspend = cs421x_suspend,
+#endif
+};
+
+static int patch_cs421x(struct hda_codec *codec)
+{
+ struct cs_spec *spec;
+ int err;
+
+ spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+ codec->spec = spec;
+
+ spec->vendor_nid = CS421X_VENDOR_NID;
+
+ spec->board_config =
+ snd_hda_check_board_config(codec, CS421X_MODELS,
+ cs421x_models, cs421x_cfg_tbl);
+ if (spec->board_config >= 0)
+ fix_pincfg(codec, spec->board_config, cs421x_pincfgs);
+ /*
+ Setup GPIO/SENSE for each board (if used)
+ */
+ switch (spec->board_config) {
+ case CS421X_CDB4210:
+ snd_printd("CS4210 board: %s\n",
+ cs421x_models[spec->board_config]);
+/* spec->gpio_mask = 3;
+ spec->gpio_dir = 3;
+ spec->gpio_data = 3;
+*/
+ spec->sense_b = 1;
+
+ break;
+ }
+
+ /*
+ Update the GPIO/DMIC/SENSE_B pinmux before the configuration
+ is auto-parsed. If GPIO or SENSE_B is forced, DMIC input
+ is disabled.
+ */
+ cs421x_pinmux_init(codec);
+
+ err = cs421x_parse_auto_config(codec);
+ if (err < 0)
+ goto error;
+
+ codec->patch_ops = cs4210_patch_ops;
+
+ return 0;
+
+ error:
+ kfree(codec->spec);
+ codec->spec = NULL;
+ return err;
+}
+
/*
* patch entries
@@ -1279,11 +1952,13 @@
static const struct hda_codec_preset snd_hda_preset_cirrus[] = {
{ .id = 0x10134206, .name = "CS4206", .patch = patch_cs420x },
{ .id = 0x10134207, .name = "CS4207", .patch = patch_cs420x },
+ { .id = 0x10134210, .name = "CS4210", .patch = patch_cs421x },
{} /* terminator */
};
MODULE_ALIAS("snd-hda-codec-id:10134206");
MODULE_ALIAS("snd-hda-codec-id:10134207");
+MODULE_ALIAS("snd-hda-codec-id:10134210");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Cirrus Logic HD-audio codec");
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 884f67b..502fc94 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -446,6 +446,19 @@
return 0;
}
+static void conexant_set_power(struct hda_codec *codec, hda_nid_t fg,
+ unsigned int power_state)
+{
+ if (power_state == AC_PWRST_D3)
+ msleep(100);
+ snd_hda_codec_read(codec, fg, 0, AC_VERB_SET_POWER_STATE,
+ power_state);
+ /* partial workaround for "azx_get_response timeout" */
+ if (power_state == AC_PWRST_D0)
+ msleep(10);
+ snd_hda_codec_set_power_to_all(codec, fg, power_state, true);
+}
+
static int conexant_init(struct hda_codec *codec)
{
struct conexant_spec *spec = codec->spec;
@@ -588,6 +601,7 @@
.build_pcms = conexant_build_pcms,
.init = conexant_init,
.free = conexant_free,
+ .set_power_state = conexant_set_power,
#ifdef CONFIG_SND_HDA_POWER_SAVE
.suspend = conexant_suspend,
#endif
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 52ce075..e125c60 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -895,13 +895,15 @@
if (present == 3)
spec->automute_hp_lo = 1; /* both HP and LO automute */
- if (!cfg->speaker_pins[0]) {
+ if (!cfg->speaker_pins[0] &&
+ cfg->line_out_type == AUTO_PIN_SPEAKER_OUT) {
memcpy(cfg->speaker_pins, cfg->line_out_pins,
sizeof(cfg->speaker_pins));
cfg->speaker_outs = cfg->line_outs;
}
- if (!cfg->hp_pins[0]) {
+ if (!cfg->hp_pins[0] &&
+ cfg->line_out_type == AUTO_PIN_HP_OUT) {
memcpy(cfg->hp_pins, cfg->line_out_pins,
sizeof(cfg->hp_pins));
cfg->hp_outs = cfg->line_outs;
@@ -920,6 +922,7 @@
spec->automute_mode = ALC_AUTOMUTE_PIN;
}
if (spec->automute && cfg->line_out_pins[0] &&
+ cfg->speaker_pins[0] &&
cfg->line_out_pins[0] != cfg->hp_pins[0] &&
cfg->line_out_pins[0] != cfg->speaker_pins[0]) {
for (i = 0; i < cfg->line_outs; i++) {
@@ -1911,7 +1914,7 @@
return err;
}
}
- if (spec->cap_mixer) {
+ if (spec->cap_mixer && spec->adc_nids) {
const char *kname = kctl ? kctl->id.name : NULL;
for (knew = spec->cap_mixer; knew->name; knew++) {
if (kname && strcmp(knew->name, kname) == 0)
@@ -2386,7 +2389,7 @@
}
#endif
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
static int alc_resume(struct hda_codec *codec)
{
msleep(150); /* to avoid pop noise */
@@ -2406,7 +2409,7 @@
.init = alc_init,
.free = alc_free,
.unsol_event = alc_unsol_event,
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
.resume = alc_resume,
#endif
#ifdef CONFIG_SND_HDA_POWER_SAVE
@@ -2801,7 +2804,8 @@
int i;
again:
- spec->multiout.num_dacs = 0;
+ /* set num_dacs once to full for alc_auto_look_for_dac() */
+ spec->multiout.num_dacs = cfg->line_outs;
spec->multiout.hp_nid = 0;
spec->multiout.extra_out_nid[0] = 0;
memset(spec->private_dac_nids, 0, sizeof(spec->private_dac_nids));
@@ -2834,6 +2838,8 @@
}
}
+ /* re-count num_dacs and squash invalid entries */
+ spec->multiout.num_dacs = 0;
for (i = 0; i < cfg->line_outs; i++) {
if (spec->private_dac_nids[i])
spec->multiout.num_dacs++;
@@ -3674,7 +3680,7 @@
if (board_config != ALC_MODEL_AUTO)
setup_preset(codec, &alc880_presets[board_config]);
- if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+ if (!spec->no_analog && !spec->adc_nids) {
alc_auto_fill_adc_caps(codec);
alc_rebuild_imux_for_auto_mic(codec);
alc_remove_invalid_adc_nids(codec);
@@ -3801,7 +3807,7 @@
if (board_config != ALC_MODEL_AUTO)
setup_preset(codec, &alc260_presets[board_config]);
- if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+ if (!spec->no_analog && !spec->adc_nids) {
alc_auto_fill_adc_caps(codec);
alc_rebuild_imux_for_auto_mic(codec);
alc_remove_invalid_adc_nids(codec);
@@ -3980,7 +3986,7 @@
if (board_config != ALC_MODEL_AUTO)
setup_preset(codec, &alc882_presets[board_config]);
- if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+ if (!spec->no_analog && !spec->adc_nids) {
alc_auto_fill_adc_caps(codec);
alc_rebuild_imux_for_auto_mic(codec);
alc_remove_invalid_adc_nids(codec);
@@ -4134,7 +4140,7 @@
if (board_config != ALC_MODEL_AUTO)
setup_preset(codec, &alc262_presets[board_config]);
- if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+ if (!spec->no_analog && !spec->adc_nids) {
alc_auto_fill_adc_caps(codec);
alc_rebuild_imux_for_auto_mic(codec);
alc_remove_invalid_adc_nids(codec);
@@ -4290,7 +4296,7 @@
(0 << AC_AMPCAP_MUTE_SHIFT));
}
- if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+ if (!spec->no_analog && !spec->adc_nids) {
alc_auto_fill_adc_caps(codec);
alc_rebuild_imux_for_auto_mic(codec);
alc_remove_invalid_adc_nids(codec);
@@ -4410,7 +4416,7 @@
}
}
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
static int alc269_resume(struct hda_codec *codec)
{
if ((alc_read_coef_idx(codec, 0) & 0x00ff) == 0x018) {
@@ -4433,7 +4439,7 @@
hda_call_check_power_status(codec, 0x01);
return 0;
}
-#endif /* SND_HDA_NEEDS_RESUME */
+#endif /* CONFIG_PM */
static void alc269_fixup_hweq(struct hda_codec *codec,
const struct alc_fixup *fix, int action)
@@ -4702,7 +4708,7 @@
if (board_config != ALC_MODEL_AUTO)
setup_preset(codec, &alc269_presets[board_config]);
- if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+ if (!spec->no_analog && !spec->adc_nids) {
alc_auto_fill_adc_caps(codec);
alc_rebuild_imux_for_auto_mic(codec);
alc_remove_invalid_adc_nids(codec);
@@ -4725,7 +4731,7 @@
spec->vmaster_nid = 0x02;
codec->patch_ops = alc_patch_ops;
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
codec->patch_ops.resume = alc269_resume;
#endif
if (board_config == ALC_MODEL_AUTO)
@@ -4840,7 +4846,7 @@
if (board_config != ALC_MODEL_AUTO)
setup_preset(codec, &alc861_presets[board_config]);
- if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+ if (!spec->no_analog && !spec->adc_nids) {
alc_auto_fill_adc_caps(codec);
alc_rebuild_imux_for_auto_mic(codec);
alc_remove_invalid_adc_nids(codec);
@@ -4981,7 +4987,7 @@
add_verb(spec, alc660vd_eapd_verbs);
}
- if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+ if (!spec->no_analog && !spec->adc_nids) {
alc_auto_fill_adc_caps(codec);
alc_rebuild_imux_for_auto_mic(codec);
alc_remove_invalid_adc_nids(codec);
@@ -5197,7 +5203,7 @@
if (board_config != ALC_MODEL_AUTO)
setup_preset(codec, &alc662_presets[board_config]);
- if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+ if (!spec->no_analog && !spec->adc_nids) {
alc_auto_fill_adc_caps(codec);
alc_rebuild_imux_for_auto_mic(codec);
alc_remove_invalid_adc_nids(codec);
@@ -5333,7 +5339,7 @@
#endif
}
- if (!spec->no_analog && !spec->adc_nids && spec->input_mux) {
+ if (!spec->no_analog && !spec->adc_nids) {
alc_auto_fill_adc_caps(codec);
alc_rebuild_imux_for_auto_mic(codec);
alc_remove_invalid_adc_nids(codec);
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 56425a5..aa376b5 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -95,6 +95,7 @@
STAC_92HD83XXX_PWR_REF,
STAC_DELL_S14,
STAC_92HD83XXX_HP,
+ STAC_92HD83XXX_HP_cNB11_INTQUAD,
STAC_HP_DV7_4000,
STAC_92HD83XXX_MODELS
};
@@ -212,6 +213,7 @@
unsigned int gpio_mute;
unsigned int gpio_led;
unsigned int gpio_led_polarity;
+ unsigned int vref_led;
/* stream */
unsigned int stream_delay;
@@ -671,6 +673,30 @@
return 0;
}
+static int stac_vrefout_set(struct hda_codec *codec,
+ hda_nid_t nid, unsigned int new_vref)
+{
+ int error, pinctl;
+
+ snd_printdd("%s, nid %x ctl %x\n", __func__, nid, new_vref);
+ pinctl = snd_hda_codec_read(codec, nid, 0,
+ AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
+
+ if (pinctl < 0)
+ return pinctl;
+
+ pinctl &= 0xff;
+ pinctl &= ~AC_PINCTL_VREFEN;
+ pinctl |= (new_vref & AC_PINCTL_VREFEN);
+
+ error = snd_hda_codec_write_cache(codec, nid, 0,
+ AC_VERB_SET_PIN_WIDGET_CONTROL, pinctl);
+ if (error < 0)
+ return error;
+
+ return 1;
+}
+
static unsigned int stac92xx_vref_set(struct hda_codec *codec,
hda_nid_t nid, unsigned int new_vref)
{
@@ -1636,10 +1662,17 @@
0x40f000f0, 0x40f000f0,
};
+static const unsigned int hp_cNB11_intquad_pin_configs[10] = {
+ 0x40f000f0, 0x0221101f, 0x02a11020, 0x92170110,
+ 0x40f000f0, 0x92170110, 0x40f000f0, 0xd5a30130,
+ 0x40f000f0, 0x40f000f0,
+};
+
static const unsigned int *stac92hd83xxx_brd_tbl[STAC_92HD83XXX_MODELS] = {
[STAC_92HD83XXX_REF] = ref92hd83xxx_pin_configs,
[STAC_92HD83XXX_PWR_REF] = ref92hd83xxx_pin_configs,
[STAC_DELL_S14] = dell_s14_pin_configs,
+ [STAC_92HD83XXX_HP_cNB11_INTQUAD] = hp_cNB11_intquad_pin_configs,
[STAC_HP_DV7_4000] = hp_dv7_4000_pin_configs,
};
@@ -1649,6 +1682,7 @@
[STAC_92HD83XXX_PWR_REF] = "mic-ref",
[STAC_DELL_S14] = "dell-s14",
[STAC_92HD83XXX_HP] = "hp",
+ [STAC_92HD83XXX_HP_cNB11_INTQUAD] = "hp_cNB11_intquad",
[STAC_HP_DV7_4000] = "hp-dv7-4000",
};
@@ -1661,7 +1695,47 @@
SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x02ba,
"unknown Dell", STAC_DELL_S14),
SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_HP, 0xff00, 0x3600,
- "HP", STAC_92HD83XXX_HP),
+ "HP", STAC_92HD83XXX_HP),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1656,
+ "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1657,
+ "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1658,
+ "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1659,
+ "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x165A,
+ "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x165B,
+ "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3388,
+ "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3389,
+ "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x355B,
+ "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x355C,
+ "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x355D,
+ "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x355E,
+ "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x355F,
+ "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3560,
+ "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x358B,
+ "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x358C,
+ "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x358D,
+ "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3591,
+ "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3592,
+ "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
+ SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3593,
+ "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD),
{} /* terminator */
};
@@ -4020,6 +4094,8 @@
{
unsigned int gpiostate, gpiomask, gpiodir;
+ snd_printdd("%s msk %x dir %x gpio %x\n", __func__, mask, dir_mask, data);
+
gpiostate = snd_hda_codec_read(codec, codec->afg, 0,
AC_VERB_GET_GPIO_DATA, 0);
gpiostate = (gpiostate & ~dir_mask) | (data & dir_mask);
@@ -4209,10 +4285,12 @@
spec->eapd_switch = val;
get_int_hint(codec, "gpio_led_polarity", &spec->gpio_led_polarity);
if (get_int_hint(codec, "gpio_led", &spec->gpio_led)) {
- spec->gpio_mask |= spec->gpio_led;
- spec->gpio_dir |= spec->gpio_led;
- if (spec->gpio_led_polarity)
- spec->gpio_data |= spec->gpio_led;
+ if (spec->gpio_led <= 8) {
+ spec->gpio_mask |= spec->gpio_led;
+ spec->gpio_dir |= spec->gpio_led;
+ if (spec->gpio_led_polarity)
+ spec->gpio_data |= spec->gpio_led;
+ }
}
}
@@ -4382,11 +4460,26 @@
snd_array_free(&spec->kctls);
}
+static void stac92xx_shutup_pins(struct hda_codec *codec)
+{
+ unsigned int i, def_conf;
+
+ if (codec->bus->shutdown)
+ return;
+ for (i = 0; i < codec->init_pins.used; i++) {
+ struct hda_pincfg *pin = snd_array_elem(&codec->init_pins, i);
+ def_conf = snd_hda_codec_get_pincfg(codec, pin->nid);
+ if (get_defcfg_connect(def_conf) != AC_JACK_PORT_NONE)
+ snd_hda_codec_write(codec, pin->nid, 0,
+ AC_VERB_SET_PIN_WIDGET_CONTROL, 0);
+ }
+}
+
static void stac92xx_shutup(struct hda_codec *codec)
{
struct sigmatel_spec *spec = codec->spec;
- snd_hda_shutup_pins(codec);
+ stac92xx_shutup_pins(codec);
if (spec->eapd_mask)
stac_gpio_set(codec, spec->gpio_mask,
@@ -4784,10 +4877,11 @@
if ((codec->subsystem_id >> 16) == PCI_VENDOR_ID_HP) {
while ((dev = dmi_find_device(DMI_DEV_TYPE_OEM_STRING,
NULL, dev))) {
- if (sscanf(dev->name, "HP_Mute_LED_%d_%d",
+ if (sscanf(dev->name, "HP_Mute_LED_%d_%x",
&spec->gpio_led_polarity,
&spec->gpio_led) == 2) {
- spec->gpio_led = 1 << spec->gpio_led;
+ if (spec->gpio_led < 4)
+ spec->gpio_led = 1 << spec->gpio_led;
return 1;
}
if (sscanf(dev->name, "HP_Mute_LED_%d",
@@ -4885,7 +4979,7 @@
#define stac927x_proc_hook NULL
#endif
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
static int stac92xx_resume(struct hda_codec *codec)
{
struct sigmatel_spec *spec = codec->spec;
@@ -4901,29 +4995,81 @@
stac_issue_unsol_event(codec,
spec->autocfg.line_out_pins[0]);
}
- /* sync mute LED */
- if (spec->gpio_led)
- hda_call_check_power_status(codec, 0x01);
return 0;
}
-/*
- * using power check for controlling mute led of HP notebooks
- * check for mute state only on Speakers (nid = 0x10)
- *
- * For this feature CONFIG_SND_HDA_POWER_SAVE is needed, otherwise
- * the LED is NOT working properly !
- *
- * Changed name to reflect that it now works for any designated
- * model, not just HP HDX.
- */
+static int stac92xx_suspend(struct hda_codec *codec, pm_message_t state)
+{
+ stac92xx_shutup(codec);
+ return 0;
+}
#ifdef CONFIG_SND_HDA_POWER_SAVE
-static int stac92xx_hp_check_power_status(struct hda_codec *codec,
- hda_nid_t nid)
+static int stac92xx_pre_resume(struct hda_codec *codec)
{
struct sigmatel_spec *spec = codec->spec;
- int i, muted = 1;
+
+ /* sync mute LED */
+ if (spec->gpio_led) {
+ if (spec->gpio_led <= 8) {
+ stac_gpio_set(codec, spec->gpio_mask,
+ spec->gpio_dir, spec->gpio_data);
+ } else {
+ stac_vrefout_set(codec,
+ spec->gpio_led, spec->vref_led);
+ }
+ }
+ return 0;
+}
+
+static int stac92xx_post_suspend(struct hda_codec *codec)
+{
+ struct sigmatel_spec *spec = codec->spec;
+ if (spec->gpio_led > 8) {
+ /* with vref-out pin used for mute led control
+ * codec AFG is prevented from D3 state, but on
+ * system suspend it can (and should) be used
+ */
+ snd_hda_codec_read(codec, codec->afg, 0,
+ AC_VERB_SET_POWER_STATE, AC_PWRST_D3);
+ }
+ return 0;
+}
+
+static void stac92xx_set_power_state(struct hda_codec *codec, hda_nid_t fg,
+ unsigned int power_state)
+{
+ unsigned int afg_power_state = power_state;
+ struct sigmatel_spec *spec = codec->spec;
+
+ if (power_state == AC_PWRST_D3) {
+ if (spec->gpio_led > 8) {
+ /* with vref-out pin used for mute led control
+ * codec AFG is prevented from D3 state
+ */
+ afg_power_state = AC_PWRST_D1;
+ }
+ /* this delay seems necessary to avoid click noise at power-down */
+ msleep(100);
+ }
+ snd_hda_codec_read(codec, fg, 0, AC_VERB_SET_POWER_STATE,
+ afg_power_state);
+ snd_hda_codec_set_power_to_all(codec, fg, power_state, true);
+}
+
+/*
+ * For this feature CONFIG_SND_HDA_POWER_SAVE is needed
+ * as mute LED state is updated in check_power_status hook
+ */
+static int stac92xx_update_led_status(struct hda_codec *codec)
+{
+ struct sigmatel_spec *spec = codec->spec;
+ int i, num_ext_dacs, muted = 1;
+ unsigned int muted_lvl, notmtd_lvl;
+ hda_nid_t nid;
+
+ if (!spec->gpio_led)
+ return 0;
for (i = 0; i < spec->multiout.num_dacs; i++) {
nid = spec->multiout.dac_nids[i];
@@ -4933,27 +5079,58 @@
break;
}
}
- if (muted)
- spec->gpio_data &= ~spec->gpio_led; /* orange */
- else
- spec->gpio_data |= spec->gpio_led; /* white */
-
- if (!spec->gpio_led_polarity) {
- /* LED state is inverted on these systems */
- spec->gpio_data ^= spec->gpio_led;
+ if (muted && spec->multiout.hp_nid)
+ if (!(snd_hda_codec_amp_read(codec,
+ spec->multiout.hp_nid, 0, HDA_OUTPUT, 0) &
+ HDA_AMP_MUTE)) {
+ muted = 0; /* HP is not muted */
+ }
+ num_ext_dacs = ARRAY_SIZE(spec->multiout.extra_out_nid);
+ for (i = 0; muted && i < num_ext_dacs; i++) {
+ nid = spec->multiout.extra_out_nid[i];
+ if (nid == 0)
+ break;
+ if (!(snd_hda_codec_amp_read(codec, nid, 0, HDA_OUTPUT, 0) &
+ HDA_AMP_MUTE)) {
+ muted = 0; /* extra output is not muted */
+ }
}
+ /*polarity defines *not* muted state level*/
+ if (spec->gpio_led <= 8) {
+ if (muted)
+ spec->gpio_data &= ~spec->gpio_led; /* orange */
+ else
+ spec->gpio_data |= spec->gpio_led; /* white */
- stac_gpio_set(codec, spec->gpio_mask, spec->gpio_dir, spec->gpio_data);
+ if (!spec->gpio_led_polarity) {
+ /* LED state is inverted on these systems */
+ spec->gpio_data ^= spec->gpio_led;
+ }
+ stac_gpio_set(codec, spec->gpio_mask,
+ spec->gpio_dir, spec->gpio_data);
+ } else {
+ notmtd_lvl = spec->gpio_led_polarity ?
+ AC_PINCTL_VREF_HIZ : AC_PINCTL_VREF_GRD;
+ muted_lvl = spec->gpio_led_polarity ?
+ AC_PINCTL_VREF_GRD : AC_PINCTL_VREF_HIZ;
+ spec->vref_led = muted ? muted_lvl : notmtd_lvl;
+ stac_vrefout_set(codec, spec->gpio_led, spec->vref_led);
+ }
return 0;
}
-#endif
-static int stac92xx_suspend(struct hda_codec *codec, pm_message_t state)
+/*
+ * use power check for controlling mute led of HP notebooks
+ */
+static int stac92xx_check_power_status(struct hda_codec *codec,
+ hda_nid_t nid)
{
- stac92xx_shutup(codec);
+ stac92xx_update_led_status(codec);
+
return 0;
}
-#endif
+#endif /* CONFIG_SND_HDA_POWER_SAVE */
+#endif /* CONFIG_PM */
static const struct hda_codec_ops stac92xx_patch_ops = {
.build_controls = stac92xx_build_controls,
@@ -4961,7 +5138,7 @@
.init = stac92xx_init,
.free = stac92xx_free,
.unsol_event = stac92xx_unsol_event,
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
.suspend = stac92xx_suspend,
.resume = stac92xx_resume,
#endif
@@ -5477,12 +5654,19 @@
#ifdef CONFIG_SND_HDA_POWER_SAVE
if (spec->gpio_led) {
- spec->gpio_mask |= spec->gpio_led;
- spec->gpio_dir |= spec->gpio_led;
- spec->gpio_data |= spec->gpio_led;
- /* register check_power_status callback. */
+ if (spec->gpio_led <= 8) {
+ spec->gpio_mask |= spec->gpio_led;
+ spec->gpio_dir |= spec->gpio_led;
+ spec->gpio_data |= spec->gpio_led;
+ } else {
+ codec->patch_ops.set_power_state =
+ stac92xx_set_power_state;
+ codec->patch_ops.post_suspend =
+ stac92xx_post_suspend;
+ }
+ codec->patch_ops.pre_resume = stac92xx_pre_resume;
codec->patch_ops.check_power_status =
- stac92xx_hp_check_power_status;
+ stac92xx_check_power_status;
}
#endif
@@ -5805,12 +5989,19 @@
#ifdef CONFIG_SND_HDA_POWER_SAVE
if (spec->gpio_led) {
- spec->gpio_mask |= spec->gpio_led;
- spec->gpio_dir |= spec->gpio_led;
- spec->gpio_data |= spec->gpio_led;
- /* register check_power_status callback. */
+ if (spec->gpio_led <= 8) {
+ spec->gpio_mask |= spec->gpio_led;
+ spec->gpio_dir |= spec->gpio_led;
+ spec->gpio_data |= spec->gpio_led;
+ } else {
+ codec->patch_ops.set_power_state =
+ stac92xx_set_power_state;
+ codec->patch_ops.post_suspend =
+ stac92xx_post_suspend;
+ }
+ codec->patch_ops.pre_resume = stac92xx_pre_resume;
codec->patch_ops.check_power_status =
- stac92xx_hp_check_power_status;
+ stac92xx_check_power_status;
}
#endif
diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index f38160b..84d8798 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -1708,7 +1708,7 @@
via_gpio_control(codec);
}
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
static int via_suspend(struct hda_codec *codec, pm_message_t state)
{
struct via_spec *spec = codec->spec;
@@ -1736,7 +1736,7 @@
.init = via_init,
.free = via_free,
.unsol_event = via_unsol_event,
-#ifdef SND_HDA_NEEDS_RESUME
+#ifdef CONFIG_PM
.suspend = via_suspend,
#endif
#ifdef CONFIG_SND_HDA_POWER_SAVE
diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c
index ff29380..76258f2 100644
--- a/sound/soc/codecs/sgtl5000.c
+++ b/sound/soc/codecs/sgtl5000.c
@@ -907,6 +907,7 @@
struct regulator_init_data *init_data,
int voltage)
{
+ dev_err(codec->dev, "this setup needs regulator support in the kernel\n");
return -EINVAL;
}
@@ -1218,6 +1219,34 @@
return 0;
}
+static int sgtl5000_replace_vddd_with_ldo(struct snd_soc_codec *codec)
+{
+ struct sgtl5000_priv *sgtl5000 = snd_soc_codec_get_drvdata(codec);
+ int ret;
+
+ /* set internal ldo to 1.2v */
+ ret = ldo_regulator_register(codec, &ldo_init_data, LDO_VOLTAGE);
+ if (ret) {
+ dev_err(codec->dev,
+ "Failed to register vddd internal supplies: %d\n", ret);
+ return ret;
+ }
+
+ sgtl5000->supplies[VDDD].supply = LDO_CONSUMER_NAME;
+
+ ret = regulator_bulk_get(codec->dev, ARRAY_SIZE(sgtl5000->supplies),
+ sgtl5000->supplies);
+
+ if (ret) {
+ ldo_regulator_remove(codec);
+ dev_err(codec->dev, "Failed to request supplies: %d\n", ret);
+ return ret;
+ }
+
+ dev_info(codec->dev, "Using internal LDO instead of VDDD\n");
+ return 0;
+}
+
static int sgtl5000_enable_regulators(struct snd_soc_codec *codec)
{
u16 reg;
@@ -1235,30 +1264,9 @@
if (!ret)
external_vddd = 1;
else {
- /* set internal ldo to 1.2v */
- int voltage = LDO_VOLTAGE;
-
- ret = ldo_regulator_register(codec, &ldo_init_data, voltage);
- if (ret) {
- dev_err(codec->dev,
- "Failed to register vddd internal supplies: %d\n",
- ret);
+ ret = sgtl5000_replace_vddd_with_ldo(codec);
+ if (ret)
return ret;
- }
-
- sgtl5000->supplies[VDDD].supply = LDO_CONSUMER_NAME;
-
- ret = regulator_bulk_get(codec->dev,
- ARRAY_SIZE(sgtl5000->supplies),
- sgtl5000->supplies);
-
- if (ret) {
- ldo_regulator_remove(codec);
- dev_err(codec->dev,
- "Failed to request supplies: %d\n", ret);
-
- return ret;
- }
}
ret = regulator_bulk_enable(ARRAY_SIZE(sgtl5000->supplies),
@@ -1287,7 +1295,6 @@
* roll back to use internal LDO
*/
if (external_vddd && rev >= 0x11) {
- int voltage = LDO_VOLTAGE;
/* disable all regulator first */
regulator_bulk_disable(ARRAY_SIZE(sgtl5000->supplies),
sgtl5000->supplies);
@@ -1295,23 +1302,10 @@
regulator_bulk_free(ARRAY_SIZE(sgtl5000->supplies),
sgtl5000->supplies);
- ret = ldo_regulator_register(codec, &ldo_init_data, voltage);
+ ret = sgtl5000_replace_vddd_with_ldo(codec);
if (ret)
return ret;
- sgtl5000->supplies[VDDD].supply = LDO_CONSUMER_NAME;
-
- ret = regulator_bulk_get(codec->dev,
- ARRAY_SIZE(sgtl5000->supplies),
- sgtl5000->supplies);
- if (ret) {
- ldo_regulator_remove(codec);
- dev_err(codec->dev,
- "Failed to request supplies: %d\n", ret);
-
- return ret;
- }
-
ret = regulator_bulk_enable(ARRAY_SIZE(sgtl5000->supplies),
sgtl5000->supplies);
if (ret)
diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c
index 8499c56..60d740e 100644
--- a/sound/soc/codecs/wm8962.c
+++ b/sound/soc/codecs/wm8962.c
@@ -3409,6 +3409,9 @@
active = snd_soc_read(codec, WM8962_INTERRUPT_STATUS_2);
active &= ~mask;
+ /* Acknowledge the interrupts */
+ snd_soc_write(codec, WM8962_INTERRUPT_STATUS_2, active);
+
if (active & WM8962_FLL_LOCK_EINT) {
dev_dbg(codec->dev, "FLL locked\n");
complete(&wm8962->fll_lock);
@@ -3433,9 +3436,6 @@
msecs_to_jiffies(250));
}
- /* Acknowledge the interrupts */
- snd_soc_write(codec, WM8962_INTERRUPT_STATUS_2, active);
-
return IRQ_HANDLED;
}
diff --git a/sound/soc/davinci/davinci-vcif.c b/sound/soc/davinci/davinci-vcif.c
index 9259f1f..1f11525 100644
--- a/sound/soc/davinci/davinci-vcif.c
+++ b/sound/soc/davinci/davinci-vcif.c
@@ -62,9 +62,9 @@
w = readl(davinci_vc->base + DAVINCI_VC_CTRL);
if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
- MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTDAC, 1);
+ MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTDAC, 0);
else
- MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTADC, 1);
+ MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTADC, 0);
writel(w, davinci_vc->base + DAVINCI_VC_CTRL);
}
@@ -80,9 +80,9 @@
/* Reset transmitter/receiver and sample rate/frame sync generators */
w = readl(davinci_vc->base + DAVINCI_VC_CTRL);
if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
- MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTDAC, 0);
+ MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTDAC, 1);
else
- MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTADC, 0);
+ MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTADC, 1);
writel(w, davinci_vc->base + DAVINCI_VC_CTRL);
}
@@ -159,6 +159,7 @@
case SNDRV_PCM_TRIGGER_RESUME:
case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
davinci_vcif_start(substream);
+ break;
case SNDRV_PCM_TRIGGER_STOP:
case SNDRV_PCM_TRIGGER_SUSPEND:
case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c
index 1568eea..c086b78 100644
--- a/sound/soc/samsung/i2s.c
+++ b/sound/soc/samsung/i2s.c
@@ -21,6 +21,7 @@
#include <plat/audio.h>
#include "dma.h"
+#include "idma.h"
#include "i2s.h"
#include "i2s-regs.h"
@@ -60,6 +61,7 @@
/* DMA parameters */
struct s3c_dma_params dma_playback;
struct s3c_dma_params dma_capture;
+ struct s3c_dma_params idma_playback;
u32 quirks;
u32 suspend_i2smod;
u32 suspend_i2scon;
@@ -877,6 +879,10 @@
if (i2s->quirks & QUIRK_NEED_RSTCLR)
writel(CON_RSTCLR, i2s->addr + I2SCON);
+ if (i2s->quirks & QUIRK_SEC_DAI)
+ idma_reg_addr_init((void *)i2s->addr,
+ i2s->sec_dai->idma_playback.dma_addr);
+
probe_exit:
/* Reset any constraint on RFS and BFS */
i2s->rfs = 0;
@@ -1077,6 +1083,7 @@
sec_dai->dma_playback.dma_size = 4;
sec_dai->base = regs_base;
sec_dai->quirks = quirks;
+ sec_dai->idma_playback.dma_addr = i2s_cfg->idma_addr;
sec_dai->pri_dai = pri_dai;
pri_dai->sec_dai = sec_dai;
}
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index e44267f..83ad8ca 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -577,6 +577,7 @@
case SND_SOC_BIAS_OFF:
codec->driver->suspend(codec, PMSG_SUSPEND);
codec->suspended = 1;
+ codec->cache_sync = 1;
break;
default:
dev_dbg(codec->dev, "CODEC is on over suspend\n");
@@ -1140,7 +1141,7 @@
}
}
cpu_dai->probed = 1;
- /* mark cpu_dai as probed and add to card cpu_dai list */
+ /* mark cpu_dai as probed and add to card dai list */
list_add(&cpu_dai->card_list, &card->dai_dev_list);
}
@@ -1171,7 +1172,7 @@
}
}
- /* mark cpu_dai as probed and add to card cpu_dai list */
+ /* mark codec_dai as probed and add to card dai list */
codec_dai->probed = 1;
list_add(&codec_dai->card_list, &card->dai_dev_list);
}
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index fbfcda0..7e15914 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -124,6 +124,36 @@
return kmemdup(_widget, sizeof(*_widget), GFP_KERNEL);
}
+/* get snd_card from DAPM context */
+static inline struct snd_card *dapm_get_snd_card(
+ struct snd_soc_dapm_context *dapm)
+{
+ if (dapm->codec)
+ return dapm->codec->card->snd_card;
+ else if (dapm->platform)
+ return dapm->platform->card->snd_card;
+ else
+ BUG();
+
+ /* unreachable */
+ return NULL;
+}
+
+/* get soc_card from DAPM context */
+static inline struct snd_soc_card *dapm_get_soc_card(
+ struct snd_soc_dapm_context *dapm)
+{
+ if (dapm->codec)
+ return dapm->codec->card;
+ else if (dapm->platform)
+ return dapm->platform->card;
+ else
+ BUG();
+
+ /* unreachable */
+ return NULL;
+}
+
static int soc_widget_read(struct snd_soc_dapm_widget *w, int reg)
{
if (w->codec)