Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4 Pull ext4 bugfixes from Ted Ts'o: "These changes contains a fix for overlayfs interacting with some (badly behaved) dentry code in various file systems. These have been reviewed by Al and the respective file system mtinainers and are going through the ext4 tree for convenience. This also has a few ext4 encryption bug fixes that were discovered in Android testing (yes, we will need to get these sync'ed up with the fs/crypto code; I'll take care of that). It also has some bug fixes and a change to ignore the legacy quota options to allow for xfstests regression testing of ext4's internal quota feature and to be more consistent with how xfs handles this case" * tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: ignore quota mount options if the quota feature is enabled ext4 crypto: fix some error handling ext4: avoid calling dquot_get_next_id() if quota is not enabled ext4: retry block allocation for failed DIO and DAX writes ext4: add lockdep annotations for i_data_sem ext4: allow readdir()'s of large empty directories to be interrupted btrfs: fix crash/invalid memory access on fsync when using overlayfs ext4 crypto: use dget_parent() in ext4_d_revalidate() ext4: use file_dentry() ext4: use dget_parent() in ext4_file_open() nfs: use file_dentry() fs: add file_dentry() ext4 crypto: don't let data integrity writebacks fail with ENOMEM ext4: check if in-inode xattr is corrupted in ext4_expand_extra_isize_ea()

commit: 93061f390f107c37bad7e3bf9eb07bda58a4a99f [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Thu Apr 07 17:22:20 2016 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> Thu Apr 07 17:22:20 2016 -0700
tree: 6cd3ad605900fa7145a700c22062fabe36ccff95
parent: 1c915b3ac4ecf6ff67573eed24458d862e842cb6 [diff]
parent: c325a67c72903e1cc30e990a15ce745bda0dbfde [diff]
diff --git a/.mailmap b/.mailmap
index 7e6c533..90c0aef 100644
--- a/.mailmap
+++ b/.mailmap

@@ -33,6 +33,7 @@
 Brian Avery <b.avery@hp.com>
 Brian King <brking@us.ibm.com>
 Christoph Hellwig <hch@lst.de>
+Christophe Ricard <christophe.ricard@gmail.com>
 Corey Minyard <minyard@acm.org>
 Damian Hobson-Garcia <dhobsong@igel.co.jp>
 David Brownell <david-b@pacbell.net>

diff --git a/Documentation/ABI/stable/sysfs-fs-orangefs b/Documentation/ABI/stable/sysfs-fs-orangefs
new file mode 100644
index 0000000..affdb11
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-fs-orangefs

@@ -0,0 +1,87 @@
+What:			/sys/fs/orangefs/perf_counters/*
+Date:			Jun 2015
+Contact:		Mike Marshall <hubcap@omnibond.com>
+Description:
+			Counters and settings for various caches.
+			Read only.
+
+
+What:			/sys/fs/orangefs/perf_counter_reset
+Date:			June 2015
+Contact:		Mike Marshall <hubcap@omnibond.com>
+Description:
+			echo a 0 or a 1 into perf_counter_reset to
+			reset all the counters in
+			/sys/fs/orangefs/perf_counters
+			except ones with PINT_PERF_PRESERVE set.
+
+
+What:			/sys/fs/orangefs/perf_time_interval_secs
+Date:			Jun 2015
+Contact:		Mike Marshall <hubcap@omnibond.com>
+Description:
+			Length of perf counter intervals in
+			seconds.
+
+
+What:			/sys/fs/orangefs/perf_history_size
+Date:			Jun 2015
+Contact:		Mike Marshall <hubcap@omnibond.com>
+Description:
+			The perf_counters cache statistics have N, or
+			perf_history_size, samples. The default is
+			one.
+
+			Every perf_time_interval_secs the (first)
+			samples are reset.
+
+			If N is greater than one, the "current" set
+			of samples is reset, and the samples from the
+			other N-1 intervals remain available.
+
+
+What:			/sys/fs/orangefs/op_timeout_secs
+Date:			Jun 2015
+Contact:		Mike Marshall <hubcap@omnibond.com>
+Description:
+			Service operation timeout in seconds.
+
+
+What:			/sys/fs/orangefs/slot_timeout_secs
+Date:			Jun 2015
+Contact:		Mike Marshall <hubcap@omnibond.com>
+Description:
+			"Slot" timeout in seconds. A "slot"
+			is an indexed buffer in the shared
+			memory segment used for communication
+			between the kernel module and userspace.
+			Slots are requested and waited for,
+			the wait times out after slot_timeout_secs.
+
+
+What:			/sys/fs/orangefs/acache/*
+Date:			Jun 2015
+Contact:		Mike Marshall <hubcap@omnibond.com>
+Description:
+			Attribute cache configurable settings.
+
+
+What:			/sys/fs/orangefs/ncache/*
+Date:			Jun 2015
+Contact:		Mike Marshall <hubcap@omnibond.com>
+Description:
+			Name cache configurable settings.
+
+
+What:			/sys/fs/orangefs/capcache/*
+Date:			Jun 2015
+Contact:		Mike Marshall <hubcap@omnibond.com>
+Description:
+			Capability cache configurable settings.
+
+
+What:			/sys/fs/orangefs/ccache/*
+Date:			Jun 2015
+Contact:		Mike Marshall <hubcap@omnibond.com>
+Description:
+			Credential cache configurable settings.

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index b683e8e..1650133 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu

@@ -271,3 +271,72 @@
 			- WriteBack: data is written only to the cache line and
 				     the modified cache line is written to main
 				     memory only when it is replaced
+
+What:		/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats
+		/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/turbo_stat
+		/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/sub_turbo_stat
+		/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/unthrottle
+		/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/powercap
+		/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/overtemp
+		/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/supply_fault
+		/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/overcurrent
+		/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/occ_reset
+Date:		March 2016
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+		Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Description:	POWERNV CPUFreq driver's frequency throttle stats directory and
+		attributes
+
+		'cpuX/cpufreq/throttle_stats' directory contains the CPU frequency
+		throttle stat attributes for the chip. The throttle stats of a cpu
+		is common across all the cpus belonging to a chip. Below are the
+		throttle attributes exported in the 'throttle_stats' directory:
+
+		- turbo_stat : This file gives the total number of times the max
+		frequency is throttled to lower frequency in turbo (at and above
+		nominal frequency) range of frequencies.
+
+		- sub_turbo_stat : This file gives the total number of times the
+		max frequency is throttled to lower frequency in sub-turbo(below
+		nominal frequency) range of frequencies.
+
+		- unthrottle : This file gives the total number of times the max
+		frequency is unthrottled after being throttled.
+
+		- powercap : This file gives the total number of times the max
+		frequency is throttled due to 'Power Capping'.
+
+		- overtemp : This file gives the total number of times the max
+		frequency is throttled due to 'CPU Over Temperature'.
+
+		- supply_fault : This file gives the total number of times the
+		max frequency is throttled due to 'Power Supply Failure'.
+
+		- overcurrent : This file gives the total number of times the
+		max frequency is throttled due to 'Overcurrent'.
+
+		- occ_reset : This file gives the total number of times the max
+		frequency is throttled due to 'OCC Reset'.
+
+		The sysfs attributes representing different throttle reasons like
+		powercap, overtemp, supply_fault, overcurrent and occ_reset map to
+		the reasons provided by OCC firmware for throttling the frequency.
+
+What:		/sys/devices/system/cpu/cpufreq/policyX/throttle_stats
+		/sys/devices/system/cpu/cpufreq/policyX/throttle_stats/turbo_stat
+		/sys/devices/system/cpu/cpufreq/policyX/throttle_stats/sub_turbo_stat
+		/sys/devices/system/cpu/cpufreq/policyX/throttle_stats/unthrottle
+		/sys/devices/system/cpu/cpufreq/policyX/throttle_stats/powercap
+		/sys/devices/system/cpu/cpufreq/policyX/throttle_stats/overtemp
+		/sys/devices/system/cpu/cpufreq/policyX/throttle_stats/supply_fault
+		/sys/devices/system/cpu/cpufreq/policyX/throttle_stats/overcurrent
+		/sys/devices/system/cpu/cpufreq/policyX/throttle_stats/occ_reset
+Date:		March 2016
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+		Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Description:	POWERNV CPUFreq driver's frequency throttle stats directory and
+		attributes
+
+		'policyX/throttle_stats' directory and all the attributes are same as
+		the /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats directory and
+		attributes which give the frequency throttle information of the chip.

diff --git a/Documentation/ABI/testing/sysfs-driver-toshiba_acpi b/Documentation/ABI/testing/sysfs-driver-toshiba_acpi
index eed922e..f34221b 100644
--- a/Documentation/ABI/testing/sysfs-driver-toshiba_acpi
+++ b/Documentation/ABI/testing/sysfs-driver-toshiba_acpi

@@ -179,3 +179,19 @@
 		Note that toggling this value requires a reboot for changes to
 		take effect.
 Users:		KToshiba
+
+What:		/sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/cooling_method
+Date:		2016
+KernelVersion:	4.6
+Contact:	Azael Avalos <coproscefalo@gmail.com>
+Description:	This file controls the Cooling Method feature.
+		Reading this file prints two values, the first is the actual cooling method
+		and the second is the maximum cooling method supported.
+		When the maximum cooling method is ONE, valid values are:
+			* 0 -> Maximum Performance
+			* 1 -> Battery Optimized
+		When the maximum cooling method is TWO, valid values are:
+			* 0 -> Maximum Performance
+			* 1 -> Performance
+			* 2 -> Battery Optimized
+Users:		KToshiba

diff --git a/Documentation/devicetree/bindings/clock/axi-clkgen.txt b/Documentation/devicetree/bindings/clock/axi-clkgen.txt
index 20e1704..fb40da3 100644
--- a/Documentation/devicetree/bindings/clock/axi-clkgen.txt
+++ b/Documentation/devicetree/bindings/clock/axi-clkgen.txt

@@ -8,7 +8,10 @@
 - compatible : shall be "adi,axi-clkgen-1.00.a" or "adi,axi-clkgen-2.00.a".
 - #clock-cells : from common clock binding; Should always be set to 0.
 - reg : Address and length of the axi-clkgen register set.
-- clocks : Phandle and clock specifier for the parent clock.
+- clocks : Phandle and clock specifier for the parent clock(s). This must
+	either reference one clock if only the first clock input is connected or two
+	if both clock inputs are connected. For the later case the clock connected
+	to the first input must be specified first.
 
 Optional properties:
 - clock-output-names : From common clock binding.

diff --git a/Documentation/devicetree/bindings/clock/brcm,iproc-clocks.txt b/Documentation/devicetree/bindings/clock/brcm,iproc-clocks.txt
index 0b35e71..6f66e9a 100644
--- a/Documentation/devicetree/bindings/clock/brcm,iproc-clocks.txt
+++ b/Documentation/devicetree/bindings/clock/brcm,iproc-clocks.txt

@@ -92,6 +92,7 @@
     "brcm,cygnus-lcpll0"
     "brcm,cygnus-mipipll"
     "brcm,cygnus-asiu-clk"
+    "brcm,cygnus-audiopll"
 
 The following table defines the set of PLL/clock index and ID for Cygnus.
 These clock IDs are defined in:
@@ -131,6 +132,11 @@
     ch4_unused mipipll          5       BCM_CYGNUS_MIPIPLL_CH4_UNUSED
     ch5_unused mipipll          6       BCM_CYGNUS_MIPIPLL_CH5_UNUSED
 
+    audiopll   crystal          0       BCM_CYGNUS_AUDIOPLL
+    ch0_audio  audiopll         1       BCM_CYGNUS_AUDIOPLL_CH0
+    ch1_audio  audiopll         2       BCM_CYGNUS_AUDIOPLL_CH1
+    ch2_audio  audiopll         3       BCM_CYGNUS_AUDIOPLL_CH2
+
 Northstar and Northstar Plus
 ------
 PLL and leaf clock compatible strings for Northstar and Northstar Plus are:

diff --git a/Documentation/devicetree/bindings/clock/lpc1850-creg-clk.txt b/Documentation/devicetree/bindings/clock/lpc1850-creg-clk.txt
new file mode 100644
index 0000000..6f1c7b4
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/lpc1850-creg-clk.txt

@@ -0,0 +1,52 @@
+* NXP LPC1850 CREG clocks
+
+The NXP LPC18xx/43xx CREG (Configuration Registers) block contains
+control registers for two low speed clocks. One of the clocks is a
+32 kHz oscillator driver with power up/down and clock gating. Next
+is a fixed divider that creates a 1 kHz clock from the 32 kHz osc.
+
+These clocks are used by the RTC and the Event Router peripherials.
+The 32 kHz can also be routed to other peripherials to enable low
+power modes.
+
+This binding uses the common clock binding:
+    Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible:
+	Should be "nxp,lpc1850-creg-clk"
+- #clock-cells:
+	Shall have value <1>.
+- clocks:
+	Shall contain a phandle to the fixed 32 kHz crystal.
+
+The creg-clk node must be a child of the creg syscon node.
+
+The following clocks are available from the clock node.
+
+Clock ID	Name
+   0		 1 kHz clock
+   1		32 kHz Oscillator
+
+Example:
+soc {
+	creg: syscon@40043000 {
+		compatible = "nxp,lpc1850-creg", "syscon", "simple-mfd";
+		reg = <0x40043000 0x1000>;
+
+		creg_clk: clock-controller {
+			compatible = "nxp,lpc1850-creg-clk";
+			clocks = <&xtal32>;
+			#clock-cells = <1>;
+		};
+
+		...
+	};
+
+	rtc: rtc@40046000 {
+		...
+		clocks = <&creg_clk 0>, <&ccu1 CLK_CPU_BUS>;
+		clock-names = "rtc", "reg";
+		...
+	};
+};

diff --git a/Documentation/devicetree/bindings/clock/qca,ath79-pll.txt b/Documentation/devicetree/bindings/clock/qca,ath79-pll.txt
index e0fc2c1..241fb05 100644
--- a/Documentation/devicetree/bindings/clock/qca,ath79-pll.txt
+++ b/Documentation/devicetree/bindings/clock/qca,ath79-pll.txt

@@ -3,7 +3,7 @@
 The PPL controller provides the 3 main clocks of the SoC: CPU, DDR and AHB.
 
 Required Properties:
-- compatible: has to be "qca,<soctype>-cpu-intc" and one of the following
+- compatible: has to be "qca,<soctype>-pll" and one of the following
   fallbacks:
   - "qca,ar7100-pll"
   - "qca,ar7240-pll"
@@ -21,8 +21,8 @@
 
 Example:
 
-	memory-controller@18050000 {
-		compatible = "qca,ar9132-ppl", "qca,ar9130-pll";
+	pll-controller@18050000 {
+		compatible = "qca,ar9132-pll", "qca,ar9130-pll";
 		reg = <0x18050000 0x20>;
 
 		clock-names = "ref";

diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc.txt b/Documentation/devicetree/bindings/clock/qcom,gcc.txt
index 72f82f4..9a60fde 100644
--- a/Documentation/devicetree/bindings/clock/qcom,gcc.txt
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc.txt

@@ -7,6 +7,7 @@
 			"qcom,gcc-apq8064"
 			"qcom,gcc-apq8084"
 			"qcom,gcc-ipq8064"
+			"qcom,gcc-ipq4019"
 			"qcom,gcc-msm8660"
 			"qcom,gcc-msm8916"
 			"qcom,gcc-msm8960"

diff --git a/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.txt b/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.txt
index 59297d3..fefb802 100644
--- a/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.txt
+++ b/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.txt

@@ -61,7 +61,7 @@
 		reg = <0 0xe6e88000 0 64>;
 		interrupts = <GIC_SPI 164 IRQ_TYPE_LEVEL_HIGH>;
 		clocks = <&cpg CPG_MOD 310>;
-		clock-names = "sci_ick";
+		clock-names = "fck";
 		dmas = <&dmac1 0x13>, <&dmac1 0x12>;
 		dma-names = "tx", "rx";
 		power-domains = <&cpg>;

diff --git a/Documentation/devicetree/bindings/clock/sunxi.txt b/Documentation/devicetree/bindings/clock/sunxi.txt
index e59f57b..834436f 100644
--- a/Documentation/devicetree/bindings/clock/sunxi.txt
+++ b/Documentation/devicetree/bindings/clock/sunxi.txt

@@ -18,6 +18,7 @@
 	"allwinner,sun4i-a10-cpu-clk" - for the CPU multiplexer clock
 	"allwinner,sun4i-a10-axi-clk" - for the AXI clock
 	"allwinner,sun8i-a23-axi-clk" - for the AXI clock on A23
+	"allwinner,sun4i-a10-gates-clk" - for generic gates on all compatible SoCs
 	"allwinner,sun4i-a10-axi-gates-clk" - for the AXI gates
 	"allwinner,sun4i-a10-ahb-clk" - for the AHB clock
 	"allwinner,sun5i-a13-ahb-clk" - for the AHB clock on A13
@@ -39,12 +40,14 @@
 	"allwinner,sun6i-a31-apb0-clk" - for the APB0 clock on A31
 	"allwinner,sun8i-a23-apb0-clk" - for the APB0 clock on A23
 	"allwinner,sun9i-a80-apb0-clk" - for the APB0 bus clock on A80
+	"allwinner,sun8i-a83t-apb0-gates-clk" - for the APB0 gates on A83T
 	"allwinner,sun4i-a10-apb0-gates-clk" - for the APB0 gates on A10
 	"allwinner,sun5i-a13-apb0-gates-clk" - for the APB0 gates on A13
 	"allwinner,sun5i-a10s-apb0-gates-clk" - for the APB0 gates on A10s
 	"allwinner,sun6i-a31-apb0-gates-clk" - for the APB0 gates on A31
 	"allwinner,sun7i-a20-apb0-gates-clk" - for the APB0 gates on A20
 	"allwinner,sun8i-a23-apb0-gates-clk" - for the APB0 gates on A23
+	"allwinner,sun8i-h3-apb0-gates-clk" - for the APB0 gates on H3
 	"allwinner,sun9i-a80-apb0-gates-clk" - for the APB0 gates on A80
 	"allwinner,sun4i-a10-apb1-clk" - for the APB1 clock
 	"allwinner,sun9i-a80-apb1-clk" - for the APB1 bus clock on A80
@@ -57,6 +60,7 @@
 	"allwinner,sun9i-a80-apb1-gates-clk" - for the APB1 gates on A80
 	"allwinner,sun6i-a31-apb2-gates-clk" - for the APB2 gates on A31
 	"allwinner,sun8i-a23-apb2-gates-clk" - for the APB2 gates on A23
+	"allwinner,sun8i-a83t-bus-gates-clk" - for the bus gates on A83T
 	"allwinner,sun8i-h3-bus-gates-clk" - for the bus gates on H3
 	"allwinner,sun9i-a80-apbs-gates-clk" - for the APBS gates on A80
 	"allwinner,sun4i-a10-dram-gates-clk" - for the DRAM gates on A10

diff --git a/Documentation/devicetree/bindings/clock/ti/adpll.txt b/Documentation/devicetree/bindings/clock/ti/adpll.txt
new file mode 100644
index 0000000..4c8a2ce
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/adpll.txt

@@ -0,0 +1,41 @@
+Binding for Texas Instruments ADPLL clock.
+
+Binding status: Unstable - ABI compatibility may be broken in the future
+
+This binding uses the common clock binding[1]. It assumes a
+register-mapped ADPLL with two to three selectable input clocks
+and three to four children.
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be one of "ti,dm814-adpll-s-clock" or
+  "ti,dm814-adpll-lj-clock" depending on the type of the ADPLL
+- #clock-cells : from common clock binding; shall be set to 1.
+- clocks : link phandles of parent clocks clkinp and clkinpulow, note
+  that the adpll-s-clock also has an optional clkinphif
+- reg : address and length of the register set for controlling the ADPLL.
+
+Examples:
+	adpll_mpu_ck: adpll@40 {
+		#clock-cells = <1>;
+		compatible = "ti,dm814-adpll-s-clock";
+		reg = <0x40 0x40>;
+		clocks = <&devosc_ck &devosc_ck &devosc_ck>;
+		clock-names = "clkinp", "clkinpulow", "clkinphif";
+		clock-output-names = "481c5040.adpll.dcoclkldo",
+				     "481c5040.adpll.clkout",
+				     "481c5040.adpll.clkoutx2",
+				     "481c5040.adpll.clkouthif";
+	};
+
+	adpll_dsp_ck: adpll@80 {
+		#clock-cells = <1>;
+		compatible = "ti,dm814-adpll-lj-clock";
+		reg = <0x80 0x30>;
+		clocks = <&devosc_ck &devosc_ck>;
+		clock-names = "clkinp", "clkinpulow";
+		clock-output-names = "481c5080.adpll.dcoclkldo",
+				     "481c5080.adpll.clkout",
+				     "481c5080.adpll.clkoutldo";
+	};

diff --git a/Documentation/devicetree/bindings/interrupt-controller/hisilicon,mbigen-v2.txt b/Documentation/devicetree/bindings/interrupt-controller/hisilicon,mbigen-v2.txt
index 720f7c9..3b2f4c4 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/hisilicon,mbigen-v2.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/hisilicon,mbigen-v2.txt

@@ -21,6 +21,8 @@
 - reg: Specifies the base physical address and size of the Mbigen
   registers.
 
+Mbigen sub node required properties:
+------------------------------------------
 - interrupt controller: Identifies the node as an interrupt controller
 
 - msi-parent: Specifies the MSI controller this mbigen use.
@@ -45,13 +47,23 @@
 
 Examples:
 
-	mbigen_device_gmac:intc {
+	mbigen_chip_dsa {
 			compatible = "hisilicon,mbigen-v2";
 			reg = <0x0 0xc0080000 0x0 0x10000>;
-			interrupt-controller;
-			msi-parent = <&its_dsa 0x40b1c>;
-			num-pins = <9>;
-			#interrupt-cells = <2>;
+
+			mbigen_gmac:intc_gmac {
+				interrupt-controller;
+				msi-parent = <&its_dsa 0x40b1c>;
+				num-pins = <9>;
+				#interrupt-cells = <2>;
+			};
+
+			mbigen_i2c:intc_i2c {
+				interrupt-controller;
+				msi-parent = <&its_dsa 0x40b0e>;
+				num-pins = <2>;
+				#interrupt-cells = <2>;
+			};
 	};
 
 Devices connect to mbigen required properties:

diff --git a/Documentation/devicetree/bindings/mailbox/ti,message-manager.txt b/Documentation/devicetree/bindings/mailbox/ti,message-manager.txt
new file mode 100644
index 0000000..b449d02
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/ti,message-manager.txt

@@ -0,0 +1,50 @@
+Texas Instruments' Message Manager Driver
+========================================
+
+The Texas Instruments' Message Manager is a mailbox controller that has
+configurable queues selectable at SoC(System on Chip) integration. The Message
+manager is broken up into queues in different address regions that are called
+"proxies" - each instance is unidirectional and is instantiated at SoC
+integration level to indicate receive or transmit path.
+
+Message Manager Device Node:
+===========================
+Required properties:
+--------------------
+- compatible:		Shall be: "ti,k2g-message-manager"
+- reg-names 		queue_proxy_region - Map the queue proxy region.
+			queue_state_debug_region - Map the queue state debug
+			region.
+- reg:			Contains the register map per reg-names.
+- #mbox-cells		Shall be 2. Contains the queue ID and proxy ID in that
+		        order referring to the transfer path.
+- interrupt-names:	Contains interrupt names matching the rx transfer path
+			for a given SoC. Receive interrupts shall be of the
+			format: "rx_<QID>_<PID>".
+			For ti,k2g-message-manager, this shall contain:
+				"rx_005_002", "rx_057_002"
+- interrupts:		Contains the interrupt information corresponding to
+			interrupt-names property.
+
+Example(K2G):
+------------
+
+	msgmgr: msgmgr@02a00000 {
+		compatible = "ti,k2g-message-manager";
+		#mbox-cells = <2>;
+		reg-names = "queue_proxy_region", "queue_state_debug_region";
+		reg = <0x02a00000 0x400000>, <0x028c3400 0x400>;
+		interrupt-names = "rx_005", "rx_057";
+		interrupts = <GIC_SPI 324 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 327 IRQ_TYPE_LEVEL_HIGH>;
+	};
+
+	pmmc: pmmc {
+		[...]
+		mbox-names = "rx", "tx";
+		# RX queue ID is 5, proxy ID is 2
+		# TX queue ID is 0, proxy ID is 0
+		mboxes= <&msgmgr 5 2>,
+			<&msgmgr 0 0>;
+		[...]
+	};

diff --git a/Documentation/devicetree/bindings/mtd/atmel-nand.txt b/Documentation/devicetree/bindings/mtd/atmel-nand.txt
index 7d4c8eb..d53aba9 100644
--- a/Documentation/devicetree/bindings/mtd/atmel-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/atmel-nand.txt

@@ -1,7 +1,10 @@
 Atmel NAND flash
 
 Required properties:
-- compatible : should be "atmel,at91rm9200-nand" or "atmel,sama5d4-nand".
+- compatible: The possible values are:
+	"atmel,at91rm9200-nand"
+	"atmel,sama5d2-nand"
+	"atmel,sama5d4-nand"
 - reg : should specify localbus address and size used for the chip,
 	and hardware ECC controller if available.
 	If the hardware ECC is PMECC, it should contain address and size for
@@ -21,10 +24,11 @@
 - nand-ecc-mode : String, operation mode of the NAND ecc mode, soft by default.
   Supported values are: "none", "soft", "hw", "hw_syndrome", "hw_oob_first",
   "soft_bch".
-- atmel,has-pmecc : boolean to enable Programmable Multibit ECC hardware.
-  Only supported by at91sam9x5 or later sam9 product.
+- atmel,has-pmecc : boolean to enable Programmable Multibit ECC hardware,
+  capable of BCH encoding and decoding, on devices where it is present.
 - atmel,pmecc-cap : error correct capability for Programmable Multibit ECC
-  Controller. Supported values are: 2, 4, 8, 12, 24.
+  Controller. Supported values are: 2, 4, 8, 12, 24. If the compatible string
+  is "atmel,sama5d2-nand", 32 is also valid.
 - atmel,pmecc-sector-size : sector size for ECC computation. Supported values
   are: 512, 1024.
 - atmel,pmecc-lookup-table-offset : includes two offsets of lookup table in ROM
@@ -32,15 +36,16 @@
   sector size 1024. If not specified, driver will build the table in runtime.
 - nand-bus-width : 8 or 16 bus width if not present 8
 - nand-on-flash-bbt: boolean to enable on flash bbt option if not present false
-- Nand Flash Controller(NFC) is a slave driver under Atmel nand flash
-  - Required properties:
-    - compatible : "atmel,sama5d3-nfc".
-    - reg : should specify the address and size used for NFC command registers,
-            NFC registers and NFC Sram. NFC Sram address and size can be absent
-            if don't want to use it.
-    - clocks: phandle to the peripheral clock
-  - Optional properties:
-    - atmel,write-by-sram: boolean to enable NFC write by sram.
+
+Nand Flash Controller(NFC) is an optional sub-node
+Required properties:
+- compatible : "atmel,sama5d3-nfc" or "atmel,sama5d4-nfc".
+- reg : should specify the address and size used for NFC command registers,
+        NFC registers and NFC SRAM. NFC SRAM address and size can be absent
+        if don't want to use it.
+- clocks: phandle to the peripheral clock
+Optional properties:
+- atmel,write-by-sram: boolean to enable NFC write by SRAM.
 
 Examples:
 nand0: nand@40000000,0 {

diff --git a/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt b/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt
index 00c587b..0333ec8 100644
--- a/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt
+++ b/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt

@@ -3,7 +3,9 @@
 Required properties:
   - compatible : Should be "fsl,vf610-qspi", "fsl,imx6sx-qspi",
 		 "fsl,imx7d-qspi", "fsl,imx6ul-qspi",
-		 "fsl,ls1021-qspi"
+		 "fsl,ls1021a-qspi"
+		 or
+		 "fsl,ls2080a-qspi" followed by "fsl,ls1021a-qspi"
   - reg : the first contains the register location and length,
           the second contains the memory mapping address and length
   - reg-names: Should contain the reg names "QuadSPI" and "QuadSPI-memory"
@@ -19,6 +21,7 @@
 			      But if there are two NOR flashes connected to the
 			      bus, you should enable this property.
 			      (Please check the board's schematic.)
+  - big-endian : That means the IP register is big endian
 
 Example:
 

diff --git a/Documentation/devicetree/bindings/mtd/qcom_nandc.txt b/Documentation/devicetree/bindings/mtd/qcom_nandc.txt
new file mode 100644
index 0000000..70dd511
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/qcom_nandc.txt

@@ -0,0 +1,86 @@
+* Qualcomm NAND controller
+
+Required properties:
+- compatible:		should be "qcom,ipq806x-nand"
+- reg:			MMIO address range
+- clocks:		must contain core clock and always on clock
+- clock-names:		must contain "core" for the core clock and "aon" for the
+			always on clock
+- dmas:			DMA specifier, consisting of a phandle to the ADM DMA
+			controller node and the channel number to be used for
+			NAND. Refer to dma.txt and qcom_adm.txt for more details
+- dma-names:		must be "rxtx"
+- qcom,cmd-crci:	must contain the ADM command type CRCI block instance
+			number specified for the NAND controller on the given
+			platform
+- qcom,data-crci:	must contain the ADM data type CRCI block instance
+			number specified for the NAND controller on the given
+			platform
+- #address-cells:	<1> - subnodes give the chip-select number
+- #size-cells:		<0>
+
+* NAND chip-select
+
+Each controller may contain one or more subnodes to represent enabled
+chip-selects which (may) contain NAND flash chips. Their properties are as
+follows.
+
+Required properties:
+- compatible:		should contain "qcom,nandcs"
+- reg:			a single integer representing the chip-select
+			number (e.g., 0, 1, 2, etc.)
+- #address-cells:	see partition.txt
+- #size-cells:		see partition.txt
+- nand-ecc-strength:	see nand.txt
+- nand-ecc-step-size:	must be 512. see nand.txt for more details.
+
+Optional properties:
+- nand-bus-width:	see nand.txt
+
+Each nandcs device node may optionally contain a 'partitions' sub-node, which
+further contains sub-nodes describing the flash partition mapping. See
+partition.txt for more detail.
+
+Example:
+
+nand@1ac00000 {
+	compatible = "qcom,ebi2-nandc";
+	reg = <0x1ac00000 0x800>;
+
+	clocks = <&gcc EBI2_CLK>,
+		 <&gcc EBI2_AON_CLK>;
+	clock-names = "core", "aon";
+
+	dmas = <&adm_dma 3>;
+	dma-names = "rxtx";
+	qcom,cmd-crci = <15>;
+	qcom,data-crci = <3>;
+
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	nandcs@0 {
+		compatible = "qcom,nandcs";
+		reg = <0>;
+
+		nand-ecc-strength = <4>;
+		nand-ecc-step-size = <512>;
+		nand-bus-width = <8>;
+
+		partitions {
+			compatible = "fixed-partitions";
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			partition@0 {
+				label = "boot-nand";
+				reg = <0 0x58a0000>;
+			};
+
+			partition@58a0000 {
+				label = "fs-nand";
+				reg = <0x58a0000 0x4000000>;
+			};
+		};
+	};
+};

diff --git a/Documentation/devicetree/bindings/pinctrl/img,pistachio-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/img,pistachio-pinctrl.txt
index 08a4a32..0326154 100644
--- a/Documentation/devicetree/bindings/pinctrl/img,pistachio-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/img,pistachio-pinctrl.txt

@@ -134,12 +134,12 @@
 mfio81		dreq0, mips_trace_data, eth_debug
 mfio82		dreq1, mips_trace_data, eth_debug
 mfio83		mips_pll_lock, mips_trace_data, usb_debug
-mfio84		sys_pll_lock, mips_trace_data, usb_debug
-mfio85		wifi_pll_lock, mips_trace_data, sdhost_debug
-mfio86		bt_pll_lock, mips_trace_data, sdhost_debug
-mfio87		rpu_v_pll_lock, dreq2, socif_debug
-mfio88		rpu_l_pll_lock, dreq3, socif_debug
-mfio89		audio_pll_lock, dreq4, dreq5
+mfio84		audio_pll_lock, mips_trace_data, usb_debug
+mfio85		rpu_v_pll_lock, mips_trace_data, sdhost_debug
+mfio86		rpu_l_pll_lock, mips_trace_data, sdhost_debug
+mfio87		sys_pll_lock, dreq2, socif_debug
+mfio88		wifi_pll_lock, dreq3, socif_debug
+mfio89		bt_pll_lock, dreq4, dreq5
 tck
 trstn
 tdi

diff --git a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
index b8627e7..c84fb47 100644
--- a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
+++ b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt

@@ -35,6 +35,8 @@
   - "rockchip,rk3288-io-voltage-domain" for rk3288
   - "rockchip,rk3368-io-voltage-domain" for rk3368
   - "rockchip,rk3368-pmu-io-voltage-domain" for rk3368 pmu-domains
+  - "rockchip,rk3399-io-voltage-domain" for rk3399
+  - "rockchip,rk3399-pmu-io-voltage-domain" for rk3399 pmu-domains
 - rockchip,grf: phandle to the syscon managing the "general register files"
 
 
@@ -79,6 +81,15 @@
 - pmu-supply:    The supply connected to PMUIO_VDD.
 - vop-supply:    The supply connected to LCDC_VDD.
 
+Possible supplies for rk3399:
+- bt656-supply:  The supply connected to APIO2_VDD.
+- audio-supply:  The supply connected to APIO5_VDD.
+- sdmmc-supply:  The supply connected to SDMMC0_VDD.
+- gpio1830       The supply connected to APIO4_VDD.
+
+Possible supplies for rk3399 pmu-domains:
+- pmu1830-supply:The supply connected to PMUIO2_VDD.
+
 Example:
 
 	io-domains {

diff --git a/Documentation/devicetree/bindings/rtc/maxim,mcp795.txt b/Documentation/devicetree/bindings/rtc/maxim,mcp795.txt
new file mode 100644
index 0000000..a59fdd8
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/maxim,mcp795.txt

@@ -0,0 +1,11 @@
+* Maxim MCP795		SPI Serial Real-Time Clock
+
+Required properties:
+- compatible: Should contain "maxim,mcp795".
+- reg: SPI address for chip
+
+Example:
+	mcp795: rtc@0 {
+		compatible = "maxim,mcp795";
+		reg = <0>;
+	};

diff --git a/Documentation/devicetree/bindings/thermal/exynos-thermal.txt b/Documentation/devicetree/bindings/thermal/exynos-thermal.txt
index 695150a..70b4c16 100644
--- a/Documentation/devicetree/bindings/thermal/exynos-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/exynos-thermal.txt

@@ -11,6 +11,7 @@
 	       "samsung,exynos5420-tmu" for TMU channel 0, 1 on Exynos5420
 	       "samsung,exynos5420-tmu-ext-triminfo" for TMU channels 2, 3 and 4
 			Exynos5420 (Must pass triminfo base and triminfo clock)
+               "samsung,exynos5433-tmu"
 	       "samsung,exynos5440-tmu"
 	       "samsung,exynos7-tmu"
 - interrupt-parent : The phandle for the interrupt controller
@@ -40,9 +41,14 @@
 		for current TMU channel
 	-- "tmu_sclk" clock for functional operation of the current TMU
 		channel
-- vtmu-supply: This entry is optional and provides the regulator node supplying
-		voltage to TMU. If needed this entry can be placed inside
-		board/platform specific dts file.
+
+The Exynos TMU supports generating interrupts when reaching given
+temperature thresholds. Number of supported thermal trip points depends
+on the SoC (only first trip points defined in DT will be configured):
+ - most of SoC: 4
+ - samsung,exynos5433-tmu: 8
+ - samsung,exynos7-tmu: 8
+
 Following properties are mandatory (depending on SoC):
 - samsung,tmu_gain: Gain value for internal TMU operation.
 - samsung,tmu_reference_voltage: Value of TMU IP block's reference voltage
@@ -56,6 +62,12 @@
 - samsung,tmu_default_temp_offset: Default temperature offset
 - samsung,tmu_cal_type: Callibration type
 
+** Optional properties:
+
+- vtmu-supply: This entry is optional and provides the regulator node supplying
+		voltage to TMU. If needed this entry can be placed inside
+		board/platform specific dts file.
+
 Example 1):
 
 	tmu@100C0000 {

diff --git a/Documentation/devicetree/bindings/thermal/mediatek-thermal.txt b/Documentation/devicetree/bindings/thermal/mediatek-thermal.txt
new file mode 100644
index 0000000..81f9a51
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/mediatek-thermal.txt

@@ -0,0 +1,43 @@
+* Mediatek Thermal
+
+This describes the device tree binding for the Mediatek thermal controller
+which measures the on-SoC temperatures. This device does not have its own ADC,
+instead it directly controls the AUXADC via AHB bus accesses. For this reason
+this device needs phandles to the AUXADC. Also it controls a mux in the
+apmixedsys register space via AHB bus accesses, so a phandle to the APMIXEDSYS
+is also needed.
+
+Required properties:
+- compatible: "mediatek,mt8173-thermal"
+- reg: Address range of the thermal controller
+- interrupts: IRQ for the thermal controller
+- clocks, clock-names: Clocks needed for the thermal controller. required
+                       clocks are:
+		       "therm":	 Main clock needed for register access
+		       "auxadc": The AUXADC clock
+- resets: Reference to the reset controller controlling the thermal controller.
+- mediatek,auxadc: A phandle to the AUXADC which the thermal controller uses
+- mediatek,apmixedsys: A phandle to the APMIXEDSYS controller.
+- #thermal-sensor-cells : Should be 0. See ./thermal.txt for a description.
+
+Optional properties:
+- nvmem-cells: A phandle to the calibration data provided by a nvmem device. If
+               unspecified default values shall be used.
+- nvmem-cell-names: Should be "calibration-data"
+
+Example:
+
+	thermal: thermal@1100b000 {
+		#thermal-sensor-cells = <1>;
+		compatible = "mediatek,mt8173-thermal";
+		reg = <0 0x1100b000 0 0x1000>;
+		interrupts = <0 70 IRQ_TYPE_LEVEL_LOW>;
+		clocks = <&pericfg CLK_PERI_THERM>, <&pericfg CLK_PERI_AUXADC>;
+		clock-names = "therm", "auxadc";
+		resets = <&pericfg MT8173_PERI_THERM_SW_RST>;
+		reset-names = "therm";
+		mediatek,auxadc = <&auxadc>;
+		mediatek,apmixedsys = <&apmixedsys>;
+		nvmem-cells = <&thermal_calibration_data>;
+		nvmem-cell-names = "calibration-data";
+	};

diff --git a/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt b/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt
index 03c0e98..66f6adf 100644
--- a/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt
+++ b/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt

@@ -38,6 +38,9 @@
 			  defined or a value in the array is "0" then it is assumed
 			  that the frequency is set by the parent clock or a
 			  fixed rate clock source.
+-lanes-per-direction	: number of lanes available per direction - either 1 or 2.
+			  Note that it is assume same number of lanes is used both
+			  directions at once. If not specified, default is 2 lanes per direction.
 
 Note: If above properties are not defined it can be assumed that the supply
 regulators or clocks are always on.

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 42adb41..86740d4 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt

@@ -10,6 +10,7 @@
 adapteva	Adapteva, Inc.
 adh	AD Holdings Plc.
 adi	Analog Devices, Inc.
+advantech	Advantech Corporation
 aeroflexgaisler	Aeroflex Gaisler AB
 al	Annapurna Labs
 allwinner	Allwinner Technology Co., Ltd.
@@ -89,6 +90,7 @@
 firefly	Firefly
 focaltech	FocalTech Systems Co.,Ltd
 fsl	Freescale Semiconductor
+ge	General Electric Company
 GEFanuc	GE Fanuc Intelligent Platforms Embedded Systems, Inc.
 gef	GE Fanuc Intelligent Platforms Embedded Systems, Inc.
 geniatech	Geniatech, Inc.

diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt
index 32ac32e..ca44c58 100644
--- a/Documentation/dma-buf-sharing.txt
+++ b/Documentation/dma-buf-sharing.txt

@@ -352,7 +352,8 @@
 
    No special interfaces, userspace simply calls mmap on the dma-buf fd, making
    sure that the cache synchronization ioctl (DMA_BUF_IOCTL_SYNC) is *always*
-   used when the access happens. This is discussed next paragraphs.
+   used when the access happens. Note that DMA_BUF_IOCTL_SYNC can fail with
+   -EAGAIN or -EINTR, in which case it must be restarted.
 
    Some systems might need some sort of cache coherency management e.g. when
    CPU and GPU domains are being accessed through dma-buf at the same time. To
@@ -366,10 +367,10 @@
        want (with the new data being consumed by the GPU or say scanout device)
      - munmap once you don't need the buffer any more
 
-    Therefore, for correctness and optimal performance, systems with the memory
-    cache shared by the GPU and CPU i.e. the "coherent" and also the
-    "incoherent" are always required to use SYNC_START and SYNC_END before and
-    after, respectively, when accessing the mapped address.
+    For correctness and optimal performance, it is always required to use
+    SYNC_START and SYNC_END before and after, respectively, when accessing the
+    mapped address. Userspace cannot rely on coherent access, even when there
+    are systems where it just works without calling these ioctls.
 
 2. Supporting existing mmap interfaces in importers
 

diff --git a/Documentation/filesystems/cramfs.txt b/Documentation/filesystems/cramfs.txt
index 31f53f0..4006298 100644
--- a/Documentation/filesystems/cramfs.txt
+++ b/Documentation/filesystems/cramfs.txt

@@ -38,7 +38,7 @@
 which the timestamp reverts to 1970, i.e. moves backwards in time.
 
 Currently, cramfs must be written and read with architectures of the
-same endianness, and can be read only by kernels with PAGE_CACHE_SIZE
+same endianness, and can be read only by kernels with PAGE_SIZE
 == 4096.  At least the latter of these is a bug, but it hasn't been
 decided what the best fix is.  For the moment if you have larger pages
 you can just change the #define in mkcramfs.c, so long as you don't

diff --git a/Documentation/filesystems/nfs/pnfs-scsi-server.txt b/Documentation/filesystems/nfs/pnfs-scsi-server.txt
new file mode 100644
index 0000000..5bef726
--- /dev/null
+++ b/Documentation/filesystems/nfs/pnfs-scsi-server.txt

@@ -0,0 +1,23 @@
+
+pNFS SCSI layout server user guide
+==================================
+
+This document describes support for pNFS SCSI layouts in the Linux NFS server.
+With pNFS SCSI layouts, the NFS server acts as Metadata Server (MDS) for pNFS,
+which in addition to handling all the metadata access to the NFS export,
+also hands out layouts to the clients so that they can directly access the
+underlying SCSI LUNs that are shared with the client.
+
+To use pNFS SCSI layouts with with the Linux NFS server, the exported file
+system needs to support the pNFS SCSI layouts (currently just XFS), and the
+file system must sit on a SCSI LUN that is accessible to the clients in
+addition to the MDS.  As of now the file system needs to sit directly on the
+exported LUN, striping or concatenation of LUNs on the MDS and clients
+is not supported yet.
+
+On a server built with CONFIG_NFSD_SCSI, the pNFS SCSI volume support is
+automatically enabled if the file system is exported using the "pnfs"
+option and the underlying SCSI device support persistent reservations.
+On the client make sure the kernel has the CONFIG_PNFS_BLOCK option
+enabled, and the file system is mounted using the NFSv4.1 protocol
+version (mount -o vers=4.1).

diff --git a/Documentation/filesystems/ocfs2-online-filecheck.txt b/Documentation/filesystems/ocfs2-online-filecheck.txt
new file mode 100644
index 0000000..1ab0786
--- /dev/null
+++ b/Documentation/filesystems/ocfs2-online-filecheck.txt

@@ -0,0 +1,94 @@
+		    OCFS2 online file check
+		    -----------------------
+
+This document will describe OCFS2 online file check feature.
+
+Introduction
+============
+OCFS2 is often used in high-availaibility systems. However, OCFS2 usually
+converts the filesystem to read-only when encounters an error. This may not be
+necessary, since turning the filesystem read-only would affect other running
+processes as well, decreasing availability.
+Then, a mount option (errors=continue) is introduced, which would return the
+-EIO errno to the calling process and terminate furhter processing so that the
+filesystem is not corrupted further. The filesystem is not converted to
+read-only, and the problematic file's inode number is reported in the kernel
+log. The user can try to check/fix this file via online filecheck feature.
+
+Scope
+=====
+This effort is to check/fix small issues which may hinder day-to-day operations
+of a cluster filesystem by turning the filesystem read-only. The scope of
+checking/fixing is at the file level, initially for regular files and eventually
+to all files (including system files) of the filesystem.
+
+In case of directory to file links is incorrect, the directory inode is
+reported as erroneous.
+
+This feature is not suited for extravagant checks which involve dependency of
+other components of the filesystem, such as but not limited to, checking if the
+bits for file blocks in the allocation has been set. In case of such an error,
+the offline fsck should/would be recommended.
+
+Finally, such an operation/feature should not be automated lest the filesystem
+may end up with more damage than before the repair attempt. So, this has to
+be performed using user interaction and consent.
+
+User interface
+==============
+When there are errors in the OCFS2 filesystem, they are usually accompanied
+by the inode number which caused the error. This inode number would be the
+input to check/fix the file.
+
+There is a sysfs directory for each OCFS2 file system mounting:
+
+  /sys/fs/ocfs2/<devname>/filecheck
+
+Here, <devname> indicates the name of OCFS2 volumn device which has been already
+mounted. The file above would accept inode numbers. This could be used to
+communicate with kernel space, tell which file(inode number) will be checked or
+fixed. Currently, three operations are supported, which includes checking
+inode, fixing inode and setting the size of result record history.
+
+1. If you want to know what error exactly happened to <inode> before fixing, do
+
+  # echo "<inode>" > /sys/fs/ocfs2/<devname>/filecheck/check
+  # cat /sys/fs/ocfs2/<devname>/filecheck/check
+
+The output is like this:
+  INO		DONE	ERROR
+39502		1	GENERATION
+
+<INO> lists the inode numbers.
+<DONE> indicates whether the operation has been finished.
+<ERROR> says what kind of errors was found. For the detailed error numbers,
+please refer to the file linux/fs/ocfs2/filecheck.h.
+
+2. If you determine to fix this inode, do
+
+  # echo "<inode>" > /sys/fs/ocfs2/<devname>/filecheck/fix
+  # cat /sys/fs/ocfs2/<devname>/filecheck/fix
+
+The output is like this:
+  INO		DONE	ERROR
+39502		1	SUCCESS
+
+This time, the <ERROR> column indicates whether this fix is successful or not.
+
+3. The record cache is used to store the history of check/fix results. It's
+defalut size is 10, and can be adjust between the range of 10 ~ 100. You can
+adjust the size like this:
+
+  # echo "<size>" > /sys/fs/ocfs2/<devname>/filecheck/set
+
+Fixing stuff
+============
+On receivng the inode, the filesystem would read the inode and the
+file metadata. In case of errors, the filesystem would fix the errors
+and report the problems it fixed in the kernel log. As a precautionary measure,
+the inode must first be checked for errors before performing a final fix.
+
+The inode and the result history will be maintained temporarily in a
+small linked list buffer which would contain the last (N) inodes
+fixed/checked, the detailed errors which were fixed/checked are printed in the
+kernel log.

diff --git a/Documentation/filesystems/orangefs.txt b/Documentation/filesystems/orangefs.txt
new file mode 100644
index 0000000..e1a0056
--- /dev/null
+++ b/Documentation/filesystems/orangefs.txt

@@ -0,0 +1,406 @@
+ORANGEFS
+========
+
+OrangeFS is an LGPL userspace scale-out parallel storage system. It is ideal
+for large storage problems faced by HPC, BigData, Streaming Video,
+Genomics, Bioinformatics.
+
+Orangefs, originally called PVFS, was first developed in 1993 by
+Walt Ligon and Eric Blumer as a parallel file system for Parallel
+Virtual Machine (PVM) as part of a NASA grant to study the I/O patterns
+of parallel programs.
+
+Orangefs features include:
+
+  * Distributes file data among multiple file servers
+  * Supports simultaneous access by multiple clients
+  * Stores file data and metadata on servers using local file system
+    and access methods
+  * Userspace implementation is easy to install and maintain
+  * Direct MPI support
+  * Stateless
+
+
+MAILING LIST
+============
+
+http://beowulf-underground.org/mailman/listinfo/pvfs2-users
+
+
+DOCUMENTATION
+=============
+
+http://www.orangefs.org/documentation/
+
+
+USERSPACE FILESYSTEM SOURCE
+===========================
+
+http://www.orangefs.org/download
+
+Orangefs versions prior to 2.9.3 would not be compatible with the
+upstream version of the kernel client.
+
+
+BUILDING THE USERSPACE FILESYSTEM ON A SINGLE SERVER
+====================================================
+
+When Orangefs is upstream, "--with-kernel" shouldn't be needed, but
+until then the path to where the kernel with the Orangefs kernel client
+patch was built is needed to ensure that pvfs2-client-core (the bridge
+between kernel space and user space) will build properly. You can omit
+--prefix if you don't care that things are sprinkled around in
+/usr/local.
+
+./configure --prefix=/opt/ofs --with-kernel=/path/to/orangefs/kernel
+
+make
+
+make install
+
+Create an orangefs config file:
+/opt/ofs/bin/pvfs2-genconfig /etc/pvfs2.conf
+
+  for "Enter hostnames", use the hostname, don't let it default to
+  localhost.
+
+create a pvfs2tab file in /etc:
+cat /etc/pvfs2tab
+tcp://myhostname:3334/orangefs /mymountpoint pvfs2 defaults,noauto 0 0
+
+create the mount point you specified in the tab file if needed:
+mkdir /mymountpoint
+
+bootstrap the server:
+/opt/ofs/sbin/pvfs2-server /etc/pvfs2.conf -f
+
+start the server:
+/opt/osf/sbin/pvfs2-server /etc/pvfs2.conf
+
+Now the server is running. At this point you might like to
+prove things are working with:
+
+/opt/osf/bin/pvfs2-ls /mymountpoint
+
+You might not want to enforce selinux, it doesn't seem to matter by
+linux 3.11...
+
+If stuff seems to be working, turn on the client core:
+/opt/osf/sbin/pvfs2-client -p /opt/osf/sbin/pvfs2-client-core
+
+Mount your filesystem.
+mount -t pvfs2 tcp://myhostname:3334/orangefs /mymountpoint
+
+
+OPTIONS
+=======
+
+The following mount options are accepted:
+
+  acl
+    Allow the use of Access Control Lists on files and directories.
+
+  intr
+    Some operations between the kernel client and the user space
+    filesystem can be interruptible, such as changes in debug levels
+    and the setting of tunable parameters.
+
+  local_lock
+    Enable posix locking from the perspective of "this" kernel. The
+    default file_operations lock action is to return ENOSYS. Posix
+    locking kicks in if the filesystem is mounted with -o local_lock.
+    Distributed locking is being worked on for the future.
+
+
+DEBUGGING
+=========
+
+If you want the debug (GOSSIP) statements in a particular
+source file (inode.c for example) go to syslog:
+
+  echo inode > /sys/kernel/debug/orangefs/kernel-debug
+
+No debugging (the default):
+
+  echo none > /sys/kernel/debug/orangefs/kernel-debug
+
+Debugging from several source files:
+
+  echo inode,dir > /sys/kernel/debug/orangefs/kernel-debug
+
+All debugging:
+
+  echo all > /sys/kernel/debug/orangefs/kernel-debug
+
+Get a list of all debugging keywords:
+
+  cat /sys/kernel/debug/orangefs/debug-help
+
+
+PROTOCOL BETWEEN KERNEL MODULE AND USERSPACE
+============================================
+
+Orangefs is a user space filesystem and an associated kernel module.
+We'll just refer to the user space part of Orangefs as "userspace"
+from here on out. Orangefs descends from PVFS, and userspace code
+still uses PVFS for function and variable names. Userspace typedefs
+many of the important structures. Function and variable names in
+the kernel module have been transitioned to "orangefs", and The Linux
+Coding Style avoids typedefs, so kernel module structures that
+correspond to userspace structures are not typedefed.
+
+The kernel module implements a pseudo device that userspace
+can read from and write to. Userspace can also manipulate the
+kernel module through the pseudo device with ioctl.
+
+THE BUFMAP:
+
+At startup userspace allocates two page-size-aligned (posix_memalign)
+mlocked memory buffers, one is used for IO and one is used for readdir
+operations. The IO buffer is 41943040 bytes and the readdir buffer is
+4194304 bytes. Each buffer contains logical chunks, or partitions, and
+a pointer to each buffer is added to its own PVFS_dev_map_desc structure
+which also describes its total size, as well as the size and number of
+the partitions.
+
+A pointer to the IO buffer's PVFS_dev_map_desc structure is sent to a
+mapping routine in the kernel module with an ioctl. The structure is
+copied from user space to kernel space with copy_from_user and is used
+to initialize the kernel module's "bufmap" (struct orangefs_bufmap), which
+then contains:
+
+  * refcnt - a reference counter
+  * desc_size - PVFS2_BUFMAP_DEFAULT_DESC_SIZE (4194304) - the IO buffer's
+    partition size, which represents the filesystem's block size and
+    is used for s_blocksize in super blocks.
+  * desc_count - PVFS2_BUFMAP_DEFAULT_DESC_COUNT (10) - the number of
+    partitions in the IO buffer.
+  * desc_shift - log2(desc_size), used for s_blocksize_bits in super blocks.
+  * total_size - the total size of the IO buffer.
+  * page_count - the number of 4096 byte pages in the IO buffer.
+  * page_array - a pointer to page_count * (sizeof(struct page*)) bytes
+    of kcalloced memory. This memory is used as an array of pointers
+    to each of the pages in the IO buffer through a call to get_user_pages.
+  * desc_array - a pointer to desc_count * (sizeof(struct orangefs_bufmap_desc))
+    bytes of kcalloced memory. This memory is further intialized:
+
+      user_desc is the kernel's copy of the IO buffer's ORANGEFS_dev_map_desc
+      structure. user_desc->ptr points to the IO buffer.
+
+      pages_per_desc = bufmap->desc_size / PAGE_SIZE
+      offset = 0
+
+        bufmap->desc_array[0].page_array = &bufmap->page_array[offset]
+        bufmap->desc_array[0].array_count = pages_per_desc = 1024
+        bufmap->desc_array[0].uaddr = (user_desc->ptr) + (0 * 1024 * 4096)
+        offset += 1024
+                           .
+                           .
+                           .
+        bufmap->desc_array[9].page_array = &bufmap->page_array[offset]
+        bufmap->desc_array[9].array_count = pages_per_desc = 1024
+        bufmap->desc_array[9].uaddr = (user_desc->ptr) +
+                                               (9 * 1024 * 4096)
+        offset += 1024
+
+  * buffer_index_array - a desc_count sized array of ints, used to
+    indicate which of the IO buffer's partitions are available to use.
+  * buffer_index_lock - a spinlock to protect buffer_index_array during update.
+  * readdir_index_array - a five (ORANGEFS_READDIR_DEFAULT_DESC_COUNT) element
+    int array used to indicate which of the readdir buffer's partitions are
+    available to use.
+  * readdir_index_lock - a spinlock to protect readdir_index_array during
+    update.
+
+OPERATIONS:
+
+The kernel module builds an "op" (struct orangefs_kernel_op_s) when it
+needs to communicate with userspace. Part of the op contains the "upcall"
+which expresses the request to userspace. Part of the op eventually
+contains the "downcall" which expresses the results of the request.
+
+The slab allocator is used to keep a cache of op structures handy.
+
+At init time the kernel module defines and initializes a request list
+and an in_progress hash table to keep track of all the ops that are
+in flight at any given time.
+
+Ops are stateful:
+
+ * unknown  - op was just initialized
+ * waiting  - op is on request_list (upward bound)
+ * inprogr  - op is in progress (waiting for downcall)
+ * serviced - op has matching downcall; ok
+ * purged   - op has to start a timer since client-core
+              exited uncleanly before servicing op
+ * given up - submitter has given up waiting for it
+
+When some arbitrary userspace program needs to perform a
+filesystem operation on Orangefs (readdir, I/O, create, whatever)
+an op structure is initialized and tagged with a distinguishing ID
+number. The upcall part of the op is filled out, and the op is
+passed to the "service_operation" function.
+
+Service_operation changes the op's state to "waiting", puts
+it on the request list, and signals the Orangefs file_operations.poll
+function through a wait queue. Userspace is polling the pseudo-device
+and thus becomes aware of the upcall request that needs to be read.
+
+When the Orangefs file_operations.read function is triggered, the
+request list is searched for an op that seems ready-to-process.
+The op is removed from the request list. The tag from the op and
+the filled-out upcall struct are copy_to_user'ed back to userspace.
+
+If any of these (and some additional protocol) copy_to_users fail,
+the op's state is set to "waiting" and the op is added back to
+the request list. Otherwise, the op's state is changed to "in progress",
+and the op is hashed on its tag and put onto the end of a list in the
+in_progress hash table at the index the tag hashed to.
+
+When userspace has assembled the response to the upcall, it
+writes the response, which includes the distinguishing tag, back to
+the pseudo device in a series of io_vecs. This triggers the Orangefs
+file_operations.write_iter function to find the op with the associated
+tag and remove it from the in_progress hash table. As long as the op's
+state is not "canceled" or "given up", its state is set to "serviced".
+The file_operations.write_iter function returns to the waiting vfs,
+and back to service_operation through wait_for_matching_downcall.
+
+Service operation returns to its caller with the op's downcall
+part (the response to the upcall) filled out.
+
+The "client-core" is the bridge between the kernel module and
+userspace. The client-core is a daemon. The client-core has an
+associated watchdog daemon. If the client-core is ever signaled
+to die, the watchdog daemon restarts the client-core. Even though
+the client-core is restarted "right away", there is a period of
+time during such an event that the client-core is dead. A dead client-core
+can't be triggered by the Orangefs file_operations.poll function.
+Ops that pass through service_operation during a "dead spell" can timeout
+on the wait queue and one attempt is made to recycle them. Obviously,
+if the client-core stays dead too long, the arbitrary userspace processes
+trying to use Orangefs will be negatively affected. Waiting ops
+that can't be serviced will be removed from the request list and
+have their states set to "given up". In-progress ops that can't 
+be serviced will be removed from the in_progress hash table and
+have their states set to "given up".
+
+Readdir and I/O ops are atypical with respect to their payloads.
+
+  - readdir ops use the smaller of the two pre-allocated pre-partitioned
+    memory buffers. The readdir buffer is only available to userspace.
+    The kernel module obtains an index to a free partition before launching
+    a readdir op. Userspace deposits the results into the indexed partition
+    and then writes them to back to the pvfs device.
+
+  - io (read and write) ops use the larger of the two pre-allocated
+    pre-partitioned memory buffers. The IO buffer is accessible from
+    both userspace and the kernel module. The kernel module obtains an
+    index to a free partition before launching an io op. The kernel module
+    deposits write data into the indexed partition, to be consumed
+    directly by userspace. Userspace deposits the results of read
+    requests into the indexed partition, to be consumed directly
+    by the kernel module.
+
+Responses to kernel requests are all packaged in pvfs2_downcall_t
+structs. Besides a few other members, pvfs2_downcall_t contains a
+union of structs, each of which is associated with a particular
+response type.
+
+The several members outside of the union are:
+ - int32_t type - type of operation.
+ - int32_t status - return code for the operation.
+ - int64_t trailer_size - 0 unless readdir operation.
+ - char *trailer_buf - initialized to NULL, used during readdir operations.
+
+The appropriate member inside the union is filled out for any
+particular response.
+
+  PVFS2_VFS_OP_FILE_IO
+    fill a pvfs2_io_response_t
+
+  PVFS2_VFS_OP_LOOKUP
+    fill a PVFS_object_kref
+
+  PVFS2_VFS_OP_CREATE
+    fill a PVFS_object_kref
+
+  PVFS2_VFS_OP_SYMLINK
+    fill a PVFS_object_kref
+
+  PVFS2_VFS_OP_GETATTR
+    fill in a PVFS_sys_attr_s (tons of stuff the kernel doesn't need)
+    fill in a string with the link target when the object is a symlink.
+
+  PVFS2_VFS_OP_MKDIR
+    fill a PVFS_object_kref
+
+  PVFS2_VFS_OP_STATFS
+    fill a pvfs2_statfs_response_t with useless info <g>. It is hard for
+    us to know, in a timely fashion, these statistics about our
+    distributed network filesystem. 
+
+  PVFS2_VFS_OP_FS_MOUNT
+    fill a pvfs2_fs_mount_response_t which is just like a PVFS_object_kref
+    except its members are in a different order and "__pad1" is replaced
+    with "id".
+
+  PVFS2_VFS_OP_GETXATTR
+    fill a pvfs2_getxattr_response_t
+
+  PVFS2_VFS_OP_LISTXATTR
+    fill a pvfs2_listxattr_response_t
+
+  PVFS2_VFS_OP_PARAM
+    fill a pvfs2_param_response_t
+
+  PVFS2_VFS_OP_PERF_COUNT
+    fill a pvfs2_perf_count_response_t
+
+  PVFS2_VFS_OP_FSKEY
+    file a pvfs2_fs_key_response_t
+
+  PVFS2_VFS_OP_READDIR
+    jamb everything needed to represent a pvfs2_readdir_response_t into
+    the readdir buffer descriptor specified in the upcall.
+
+Userspace uses writev() on /dev/pvfs2-req to pass responses to the requests
+made by the kernel side.
+
+A buffer_list containing:
+  - a pointer to the prepared response to the request from the
+    kernel (struct pvfs2_downcall_t).
+  - and also, in the case of a readdir request, a pointer to a
+    buffer containing descriptors for the objects in the target
+    directory.
+... is sent to the function (PINT_dev_write_list) which performs
+the writev.
+
+PINT_dev_write_list has a local iovec array: struct iovec io_array[10];
+
+The first four elements of io_array are initialized like this for all
+responses:
+
+  io_array[0].iov_base = address of local variable "proto_ver" (int32_t)
+  io_array[0].iov_len = sizeof(int32_t)
+
+  io_array[1].iov_base = address of global variable "pdev_magic" (int32_t)
+  io_array[1].iov_len = sizeof(int32_t)
+  
+  io_array[2].iov_base = address of parameter "tag" (PVFS_id_gen_t)
+  io_array[2].iov_len = sizeof(int64_t)
+
+  io_array[3].iov_base = address of out_downcall member (pvfs2_downcall_t)
+                         of global variable vfs_request (vfs_request_t)
+  io_array[3].iov_len = sizeof(pvfs2_downcall_t)
+
+Readdir responses initialize the fifth element io_array like this:
+
+  io_array[4].iov_base = contents of member trailer_buf (char *)
+                         from out_downcall member of global variable
+                         vfs_request
+  io_array[4].iov_len = contents of member trailer_size (PVFS_size)
+                        from out_downcall member of global variable
+                        vfs_request
+  
+

diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt
index d392e15..d9c11d2 100644
--- a/Documentation/filesystems/tmpfs.txt
+++ b/Documentation/filesystems/tmpfs.txt

@@ -60,7 +60,7 @@
            default is half of your physical RAM without swap. If you
            oversize your tmpfs instances the machine will deadlock
            since the OOM handler will not be able to free that memory.
-nr_blocks: The same as size, but in blocks of PAGE_CACHE_SIZE.
+nr_blocks: The same as size, but in blocks of PAGE_SIZE.
 nr_inodes: The maximum number of inodes for this instance. The default
            is half of the number of your physical RAM pages, or (on a
            machine with highmem) the number of lowmem RAM pages,

diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index 223c321..cf51360 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt

@@ -56,9 +56,10 @@
 		 you should consider the following option instead.
 
 utf8=<bool>   -- UTF-8 is the filesystem safe version of Unicode that
-		 is used by the console.  It can be enabled for the
-		 filesystem with this option. If 'uni_xlate' gets set,
-		 UTF-8 gets disabled.
+		 is used by the console. It can be enabled or disabled
+		 for the filesystem with this option.
+		 If 'uni_xlate' gets set, UTF-8 gets disabled.
+		 By default, FAT_DEFAULT_UTF8 setting is used.
 
 uni_xlate=<bool> -- Translate unhandled Unicode characters to special
 		 escaped sequences.  This would let you backup and

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index b02a7d5..4164bd6 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt

@@ -708,9 +708,9 @@
 	from the address space.  This generally corresponds to either a
 	truncation, punch hole  or a complete invalidation of the address
 	space (in the latter case 'offset' will always be 0 and 'length'
-	will be PAGE_CACHE_SIZE). Any private data associated with the page
+	will be PAGE_SIZE). Any private data associated with the page
 	should be updated to reflect this truncation.  If offset is 0 and
-	length is PAGE_CACHE_SIZE, then the private data should be released,
+	length is PAGE_SIZE, then the private data should be released,
 	because the page must be able to be completely discarded.  This may
 	be done by calling the ->releasepage function, but in this case the
 	release MUST succeed.

diff --git a/Documentation/infiniband/sysfs.txt b/Documentation/infiniband/sysfs.txt
index 9028b02..3ecf0c3 100644
--- a/Documentation/infiniband/sysfs.txt
+++ b/Documentation/infiniband/sysfs.txt

@@ -78,9 +78,10 @@
    chip_reset - diagnostic (root only)
    boardversion - board version
    ports/1/
-          CMgtA/
+          CCMgtA/
                cc_settings_bin - CCA tables used by PSM2
                cc_table_bin
+               cc_prescan - enable prescaning for faster BECN response
           sc2v/ - 32 files (0 - 31) used to translate sl->vl
           sl2sc/ - 32 files (0 - 31) used to translate sl->sc
           vl2mtu/ - 16 (0 - 15) files used to determine MTU for vl

diff --git a/Documentation/kasan.txt b/Documentation/kasan.txt
index aa1e0c9..7dd95b3 100644
--- a/Documentation/kasan.txt
+++ b/Documentation/kasan.txt

@@ -12,8 +12,7 @@
 therefore you will need a GCC version 4.9.2 or later. GCC 5.0 or later is
 required for detection of out-of-bounds accesses to stack or global variables.
 
-Currently KASAN is supported only for x86_64 architecture and requires the
-kernel to be built with the SLUB allocator.
+Currently KASAN is supported only for x86_64 architecture.
 
 1. Usage
 ========
@@ -27,7 +26,7 @@
 the latter is 1.1 - 2 times faster. Inline instrumentation requires a GCC
 version 5.0 or later.
 
-Currently KASAN works only with the SLUB memory allocator.
+KASAN works with both SLUB and SLAB memory allocators.
 For better bug detection and nicer reporting, enable CONFIG_STACKTRACE.
 
 To disable instrumentation for specific files or directories, add a line

diff --git a/Documentation/kcov.txt b/Documentation/kcov.txt
new file mode 100644
index 0000000..779ff4a
--- /dev/null
+++ b/Documentation/kcov.txt

@@ -0,0 +1,111 @@
+kcov: code coverage for fuzzing
+===============================
+
+kcov exposes kernel code coverage information in a form suitable for coverage-
+guided fuzzing (randomized testing). Coverage data of a running kernel is
+exported via the "kcov" debugfs file. Coverage collection is enabled on a task
+basis, and thus it can capture precise coverage of a single system call.
+
+Note that kcov does not aim to collect as much coverage as possible. It aims
+to collect more or less stable coverage that is function of syscall inputs.
+To achieve this goal it does not collect coverage in soft/hard interrupts
+and instrumentation of some inherently non-deterministic parts of kernel is
+disbled (e.g. scheduler, locking).
+
+Usage:
+======
+
+Configure kernel with:
+
+        CONFIG_KCOV=y
+
+CONFIG_KCOV requires gcc built on revision 231296 or later.
+Profiling data will only become accessible once debugfs has been mounted:
+
+        mount -t debugfs none /sys/kernel/debug
+
+The following program demonstrates kcov usage from within a test program:
+
+#include <stdio.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#define KCOV_INIT_TRACE			_IOR('c', 1, unsigned long)
+#define KCOV_ENABLE			_IO('c', 100)
+#define KCOV_DISABLE			_IO('c', 101)
+#define COVER_SIZE			(64<<10)
+
+int main(int argc, char **argv)
+{
+	int fd;
+	unsigned long *cover, n, i;
+
+	/* A single fd descriptor allows coverage collection on a single
+	 * thread.
+	 */
+	fd = open("/sys/kernel/debug/kcov", O_RDWR);
+	if (fd == -1)
+		perror("open"), exit(1);
+	/* Setup trace mode and trace size. */
+	if (ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE))
+		perror("ioctl"), exit(1);
+	/* Mmap buffer shared between kernel- and user-space. */
+	cover = (unsigned long*)mmap(NULL, COVER_SIZE * sizeof(unsigned long),
+				     PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if ((void*)cover == MAP_FAILED)
+		perror("mmap"), exit(1);
+	/* Enable coverage collection on the current thread. */
+	if (ioctl(fd, KCOV_ENABLE, 0))
+		perror("ioctl"), exit(1);
+	/* Reset coverage from the tail of the ioctl() call. */
+	__atomic_store_n(&cover[0], 0, __ATOMIC_RELAXED);
+	/* That's the target syscal call. */
+	read(-1, NULL, 0);
+	/* Read number of PCs collected. */
+	n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
+	for (i = 0; i < n; i++)
+		printf("0x%lx\n", cover[i + 1]);
+	/* Disable coverage collection for the current thread. After this call
+	 * coverage can be enabled for a different thread.
+	 */
+	if (ioctl(fd, KCOV_DISABLE, 0))
+		perror("ioctl"), exit(1);
+	/* Free resources. */
+	if (munmap(cover, COVER_SIZE * sizeof(unsigned long)))
+		perror("munmap"), exit(1);
+	if (close(fd))
+		perror("close"), exit(1);
+	return 0;
+}
+
+After piping through addr2line output of the program looks as follows:
+
+SyS_read
+fs/read_write.c:562
+__fdget_pos
+fs/file.c:774
+__fget_light
+fs/file.c:746
+__fget_light
+fs/file.c:750
+__fget_light
+fs/file.c:760
+__fdget_pos
+fs/file.c:784
+SyS_read
+fs/read_write.c:562
+
+If a program needs to collect coverage from several threads (independently),
+it needs to open /sys/kernel/debug/kcov in each thread separately.
+
+The interface is fine-grained to allow efficient forking of test processes.
+That is, a parent process opens /sys/kernel/debug/kcov, enables trace mode,
+mmaps coverage buffer and then forks child processes in a loop. Child processes
+only need to enable coverage (disable happens automatically on thread end).

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 904ee42..3729cbe 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt

@@ -232,7 +232,7 @@
      with memory references that are not protected by READ_ONCE() and
      WRITE_ONCE().  Without them, the compiler is within its rights to
      do all sorts of "creative" transformations, which are covered in
-     the Compiler Barrier section.
+     the COMPILER BARRIER section.
 
  (*) It _must_not_ be assumed that independent loads and stores will be issued
      in the order given.  This means that for:
@@ -555,6 +555,30 @@
 This enforces the occurrence of one of the two implications, and prevents the
 third possibility from arising.
 
+A data-dependency barrier must also order against dependent writes:
+
+	CPU 1		      CPU 2
+	===============	      ===============
+	{ A == 1, B == 2, C = 3, P == &A, Q == &C }
+	B = 4;
+	<write barrier>
+	WRITE_ONCE(P, &B);
+			      Q = READ_ONCE(P);
+			      <data dependency barrier>
+			      *Q = 5;
+
+The data-dependency barrier must order the read into Q with the store
+into *Q.  This prohibits this outcome:
+
+	(Q == B) && (B == 4)
+
+Please note that this pattern should be rare.  After all, the whole point
+of dependency ordering is to -prevent- writes to the data structure, along
+with the expensive cache misses associated with those writes.  This pattern
+can be used to record rare error conditions and the like, and the ordering
+prevents such records from being lost.
+
+
 [!] Note that this extremely counterintuitive situation arises most easily on
 machines with split caches, so that, for example, one cache bank processes
 even-numbered cache lines and the other bank processes odd-numbered cache
@@ -565,21 +589,6 @@
 but the old value of the variable B (2).
 
 
-Another example of where data dependency barriers might be required is where a
-number is read from memory and then used to calculate the index for an array
-access:
-
-	CPU 1		      CPU 2
-	===============	      ===============
-	{ M[0] == 1, M[1] == 2, M[3] = 3, P == 0, Q == 3 }
-	M[1] = 4;
-	<write barrier>
-	WRITE_ONCE(P, 1);
-			      Q = READ_ONCE(P);
-			      <data dependency barrier>
-			      D = M[Q];
-
-
 The data dependency barrier is very important to the RCU system,
 for example.  See rcu_assign_pointer() and rcu_dereference() in
 include/linux/rcupdate.h.  This permits the current target of an RCU'd
@@ -800,9 +809,13 @@
       use smp_rmb(), smp_wmb(), or, in the case of prior stores and
       later loads, smp_mb().
 
-  (*) If both legs of the "if" statement begin with identical stores
-      to the same variable, a barrier() statement is required at the
-      beginning of each leg of the "if" statement.
+  (*) If both legs of the "if" statement begin with identical stores to
+      the same variable, then those stores must be ordered, either by
+      preceding both of them with smp_mb() or by using smp_store_release()
+      to carry out the stores.  Please note that it is -not- sufficient
+      to use barrier() at beginning of each leg of the "if" statement,
+      as optimizing compilers do not necessarily respect barrier()
+      in this case.
 
   (*) Control dependencies require at least one run-time conditional
       between the prior load and the subsequent store, and this
@@ -814,7 +827,7 @@
   (*) Control dependencies require that the compiler avoid reordering the
       dependency into nonexistence.  Careful use of READ_ONCE() or
       atomic{,64}_read() can help to preserve your control dependency.
-      Please see the Compiler Barrier section for more information.
+      Please see the COMPILER BARRIER section for more information.
 
   (*) Control dependencies pair normally with other types of barriers.
 
@@ -1257,7 +1270,7 @@
 
 Transitivity is a deeply intuitive notion about ordering that is not
 always provided by real computer systems.  The following example
-demonstrates transitivity (also called "cumulativity"):
+demonstrates transitivity:
 
 	CPU 1			CPU 2			CPU 3
 	=======================	=======================	=======================
@@ -1305,8 +1318,86 @@
 General barriers are therefore required to ensure that all CPUs agree
 on the combined order of CPU 1's and CPU 2's accesses.
 
-To reiterate, if your code requires transitivity, use general barriers
-throughout.
+General barriers provide "global transitivity", so that all CPUs will
+agree on the order of operations.  In contrast, a chain of release-acquire
+pairs provides only "local transitivity", so that only those CPUs on
+the chain are guaranteed to agree on the combined order of the accesses.
+For example, switching to C code in deference to Herman Hollerith:
+
+	int u, v, x, y, z;
+
+	void cpu0(void)
+	{
+		r0 = smp_load_acquire(&x);
+		WRITE_ONCE(u, 1);
+		smp_store_release(&y, 1);
+	}
+
+	void cpu1(void)
+	{
+		r1 = smp_load_acquire(&y);
+		r4 = READ_ONCE(v);
+		r5 = READ_ONCE(u);
+		smp_store_release(&z, 1);
+	}
+
+	void cpu2(void)
+	{
+		r2 = smp_load_acquire(&z);
+		smp_store_release(&x, 1);
+	}
+
+	void cpu3(void)
+	{
+		WRITE_ONCE(v, 1);
+		smp_mb();
+		r3 = READ_ONCE(u);
+	}
+
+Because cpu0(), cpu1(), and cpu2() participate in a local transitive
+chain of smp_store_release()/smp_load_acquire() pairs, the following
+outcome is prohibited:
+
+	r0 == 1 && r1 == 1 && r2 == 1
+
+Furthermore, because of the release-acquire relationship between cpu0()
+and cpu1(), cpu1() must see cpu0()'s writes, so that the following
+outcome is prohibited:
+
+	r1 == 1 && r5 == 0
+
+However, the transitivity of release-acquire is local to the participating
+CPUs and does not apply to cpu3().  Therefore, the following outcome
+is possible:
+
+	r0 == 0 && r1 == 1 && r2 == 1 && r3 == 0 && r4 == 0
+
+As an aside, the following outcome is also possible:
+
+	r0 == 0 && r1 == 1 && r2 == 1 && r3 == 0 && r4 == 0 && r5 == 1
+
+Although cpu0(), cpu1(), and cpu2() will see their respective reads and
+writes in order, CPUs not involved in the release-acquire chain might
+well disagree on the order.  This disagreement stems from the fact that
+the weak memory-barrier instructions used to implement smp_load_acquire()
+and smp_store_release() are not required to order prior stores against
+subsequent loads in all cases.  This means that cpu3() can see cpu0()'s
+store to u as happening -after- cpu1()'s load from v, even though
+both cpu0() and cpu1() agree that these two operations occurred in the
+intended order.
+
+However, please keep in mind that smp_load_acquire() is not magic.
+In particular, it simply reads from its argument with ordering.  It does
+-not- ensure that any particular value will be read.  Therefore, the
+following outcome is possible:
+
+	r0 == 0 && r1 == 0 && r2 == 0 && r5 == 0
+
+Note that this outcome can happen even on a mythical sequentially
+consistent system where nothing is ever reordered.
+
+To reiterate, if your code requires global transitivity, use general
+barriers throughout.
 
 
 ========================
@@ -1459,7 +1550,7 @@
      the following:
 
 	a = 0;
-	/* Code that does not store to variable a. */
+	... Code that does not store to variable a ...
 	a = 0;
 
      The compiler sees that the value of variable 'a' is already zero, so
@@ -1471,7 +1562,7 @@
      wrong guess:
 
 	WRITE_ONCE(a, 0);
-	/* Code that does not store to variable a. */
+	... Code that does not store to variable a ...
 	WRITE_ONCE(a, 0);
 
  (*) The compiler is within its rights to reorder memory accesses unless

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index d5df40c..b183e2b 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt

@@ -946,15 +946,20 @@
 	The value 5459 assumes no IP header options, so in practice
 	this number may be lower.
 
-	conf/interface/*  changes special settings per interface (where
-	"interface" is the name of your network interface)
-
-	conf/all/*	  is special, changes the settings for all interfaces
+igmp_max_msf - INTEGER
+	Maximum number of addresses allowed in the source filter list for a
+	multicast group.
+	Default: 10
 
 igmp_qrv - INTEGER
-	 Controls the IGMP query robustness variable (see RFC2236 8.1).
-	 Default: 2 (as specified by RFC2236 8.1)
-	 Minimum: 1 (as specified by RFC6636 4.5)
+	Controls the IGMP query robustness variable (see RFC2236 8.1).
+	Default: 2 (as specified by RFC2236 8.1)
+	Minimum: 1 (as specified by RFC6636 4.5)
+
+conf/interface/*  changes special settings per interface (where
+"interface" is the name of your network interface)
+
+conf/all/*	  is special, changes the settings for all interfaces
 
 log_martians - BOOLEAN
 	Log packets with impossible addresses to kernel log.

diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
index fad6313..2f65912 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt

@@ -386,7 +386,7 @@
 memory allocation, etc. The goal is to handle the stuff that is not unlikely
 to fail here. The second phase is to "commit" the actual changes.
 
-Switchdev provides an inftrastructure for sharing items (for example memory
+Switchdev provides an infrastructure for sharing items (for example memory
 allocations) between the two phases.
 
 The object created by a driver in "prepare" phase and it is queued up by:

diff --git a/Documentation/rapidio/mport_cdev.txt b/Documentation/rapidio/mport_cdev.txt
new file mode 100644
index 0000000..20c120d
--- /dev/null
+++ b/Documentation/rapidio/mport_cdev.txt

@@ -0,0 +1,104 @@
+RapidIO subsystem mport character device driver (rio_mport_cdev.c)
+==================================================================
+
+Version History:
+----------------
+  1.0.0 - Initial driver release.
+
+==================================================================
+
+I. Overview
+
+This device driver is the result of collaboration within the RapidIO.org
+Software Task Group (STG) between Texas Instruments, Freescale,
+Prodrive Technologies, Nokia Networks, BAE and IDT.  Additional input was
+received from other members of RapidIO.org. The objective was to create a
+character mode driver interface which exposes the capabilities of RapidIO
+devices directly to applications, in a manner that allows the numerous and
+varied RapidIO implementations to interoperate.
+
+This driver (MPORT_CDEV) provides access to basic RapidIO subsystem operations
+for user-space applications. Most of RapidIO operations are supported through
+'ioctl' system calls.
+
+When loaded this device driver creates filesystem nodes named rio_mportX in /dev
+directory for each registered RapidIO mport device. 'X' in the node name matches
+to unique port ID assigned to each local mport device.
+
+Using available set of ioctl commands user-space applications can perform
+following RapidIO bus and subsystem operations:
+
+- Reads and writes from/to configuration registers of mport devices
+    (RIO_MPORT_MAINT_READ_LOCAL/RIO_MPORT_MAINT_WRITE_LOCAL)
+- Reads and writes from/to configuration registers of remote RapidIO devices.
+  This operations are defined as RapidIO Maintenance reads/writes in RIO spec.
+    (RIO_MPORT_MAINT_READ_REMOTE/RIO_MPORT_MAINT_WRITE_REMOTE)
+- Set RapidIO Destination ID for mport devices (RIO_MPORT_MAINT_HDID_SET)
+- Set RapidIO Component Tag for mport devices (RIO_MPORT_MAINT_COMPTAG_SET)
+- Query logical index of mport devices (RIO_MPORT_MAINT_PORT_IDX_GET)
+- Query capabilities and RapidIO link configuration of mport devices
+    (RIO_MPORT_GET_PROPERTIES)
+- Enable/Disable reporting of RapidIO doorbell events to user-space applications
+    (RIO_ENABLE_DOORBELL_RANGE/RIO_DISABLE_DOORBELL_RANGE)
+- Enable/Disable reporting of RIO port-write events to user-space applications
+    (RIO_ENABLE_PORTWRITE_RANGE/RIO_DISABLE_PORTWRITE_RANGE)
+- Query/Control type of events reported through this driver: doorbells,
+  port-writes or both (RIO_SET_EVENT_MASK/RIO_GET_EVENT_MASK)
+- Configure/Map mport's outbound requests window(s) for specific size,
+  RapidIO destination ID, hopcount and request type
+    (RIO_MAP_OUTBOUND/RIO_UNMAP_OUTBOUND)
+- Configure/Map mport's inbound requests window(s) for specific size,
+  RapidIO base address and local memory base address
+    (RIO_MAP_INBOUND/RIO_UNMAP_INBOUND)
+- Allocate/Free contiguous DMA coherent memory buffer for DMA data transfers
+  to/from remote RapidIO devices (RIO_ALLOC_DMA/RIO_FREE_DMA)
+- Initiate DMA data transfers to/from remote RapidIO devices (RIO_TRANSFER).
+  Supports blocking, asynchronous and posted (a.k.a 'fire-and-forget') data
+  transfer modes.
+- Check/Wait for completion of asynchronous DMA data transfer
+    (RIO_WAIT_FOR_ASYNC)
+- Manage device objects supported by RapidIO subsystem (RIO_DEV_ADD/RIO_DEV_DEL).
+  This allows implementation of various RapidIO fabric enumeration algorithms
+  as user-space applications while using remaining functionality provided by
+  kernel RapidIO subsystem.
+
+II. Hardware Compatibility
+
+This device driver uses standard interfaces defined by kernel RapidIO subsystem
+and therefore it can be used with any mport device driver registered by RapidIO
+subsystem with limitations set by available mport implementation.
+
+At this moment the most common limitation is availability of RapidIO-specific
+DMA engine framework for specific mport device. Users should verify available
+functionality of their platform when planning to use this driver:
+
+- IDT Tsi721 PCIe-to-RapidIO bridge device and its mport device driver are fully
+  compatible with this driver.
+- Freescale SoCs 'fsl_rio' mport driver does not have implementation for RapidIO
+  specific DMA engine support and therefore DMA data transfers mport_cdev driver
+  are not available.
+
+III. Module parameters
+
+- 'dbg_level' - This parameter allows to control amount of debug information
+        generated by this device driver. This parameter is formed by set of
+        This parameter can be changed bit masks that correspond to the specific
+        functional block.
+        For mask definitions see 'drivers/rapidio/devices/rio_mport_cdev.c'
+        This parameter can be changed dynamically.
+        Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level.
+
+IV. Known problems
+
+  None.
+
+V. User-space Applications and API
+
+API library and applications that use this device driver are available from
+RapidIO.org.
+
+VI. TODO List
+
+- Add support for sending/receiving "raw" RapidIO messaging packets.
+- Add memory mapped DMA data transfers as an option when RapidIO-specific DMA
+  is not available.

diff --git a/Documentation/rapidio/tsi721.txt b/Documentation/rapidio/tsi721.txt
index 626052f..7c1c7bf 100644
--- a/Documentation/rapidio/tsi721.txt
+++ b/Documentation/rapidio/tsi721.txt

@@ -16,6 +16,15 @@
 into the corresponding message queue. Messaging callbacks are implemented to be
 fully compatible with RIONET driver (Ethernet over RapidIO messaging services).
 
+1. Module parameters:
+- 'dbg_level' - This parameter allows to control amount of debug information
+        generated by this device driver. This parameter is formed by set of
+        This parameter can be changed bit masks that correspond to the specific
+        functional block.
+        For mask definitions see 'drivers/rapidio/devices/tsi721.h'
+        This parameter can be changed dynamically.
+        Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level.
+
 II. Known problems
 
   None.

diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
index 8c745c8..ed419d6 100644
--- a/Documentation/thermal/sysfs-api.txt
+++ b/Documentation/thermal/sysfs-api.txt

@@ -72,6 +72,74 @@
     It deletes the corresponding entry form /sys/class/thermal folder and
     unbind all the thermal cooling devices it uses.
 
+1.1.3 struct thermal_zone_device *thermal_zone_of_sensor_register(
+		struct device *dev, int sensor_id, void *data,
+		const struct thermal_zone_of_device_ops *ops)
+
+	This interface adds a new sensor to a DT thermal zone.
+	This function will search the list of thermal zones described in
+	device tree and look for the zone that refer to the sensor device
+	pointed by dev->of_node as temperature providers. For the zone
+	pointing to the sensor node, the sensor will be added to the DT
+	thermal zone device.
+
+	The parameters for this interface are:
+	dev:		Device node of sensor containing valid node pointer in
+			dev->of_node.
+	sensor_id:	a sensor identifier, in case the sensor IP has more
+			than one sensors
+	data:		a private pointer (owned by the caller) that will be
+			passed back, when a temperature reading is needed.
+	ops:		struct thermal_zone_of_device_ops *.
+
+			get_temp:	a pointer to a function that reads the
+					sensor temperature. This is mandatory
+					callback provided by sensor driver.
+			get_trend: 	a pointer to a function that reads the
+					sensor temperature trend.
+			set_emul_temp:	a pointer to a function that sets
+					sensor emulated temperature.
+	The thermal zone temperature is provided by the get_temp() function
+	pointer of thermal_zone_of_device_ops. When called, it will
+	have the private pointer @data back.
+
+	It returns error pointer if fails otherwise valid thermal zone device
+	handle. Caller should check the return handle with IS_ERR() for finding
+	whether success or not.
+
+1.1.4 void thermal_zone_of_sensor_unregister(struct device *dev,
+		struct thermal_zone_device *tzd)
+
+	This interface unregisters a sensor from a DT thermal zone which was
+	successfully added by interface thermal_zone_of_sensor_register().
+	This function removes the sensor callbacks and private data from the
+	thermal zone device registered with thermal_zone_of_sensor_register()
+	interface. It will also silent the zone by remove the .get_temp() and
+	get_trend() thermal zone device callbacks.
+
+1.1.5 struct thermal_zone_device *devm_thermal_zone_of_sensor_register(
+		struct device *dev, int sensor_id,
+		void *data, const struct thermal_zone_of_device_ops *ops)
+
+	This interface is resource managed version of
+	thermal_zone_of_sensor_register().
+	All details of thermal_zone_of_sensor_register() described in
+	section 1.1.3 is applicable here.
+	The benefit of using this interface to register sensor is that it
+	is not require to explicitly call thermal_zone_of_sensor_unregister()
+	in error path or during driver unbinding as this is done by driver
+	resource manager.
+
+1.1.6 void devm_thermal_zone_of_sensor_unregister(struct device *dev,
+		struct thermal_zone_device *tzd)
+
+	This interface is resource managed version of
+	thermal_zone_of_sensor_unregister().
+	All details of thermal_zone_of_sensor_unregister() described in
+	section 1.1.4 is applicable here.
+	Normally this function will not need to be called and the resource
+	management code will ensure that the resource is freed.
+
 1.2 thermal cooling device interface
 1.2.1 struct thermal_cooling_device *thermal_cooling_device_register(char *name,
 		void *devdata, struct thermal_cooling_device_ops *)

diff --git a/Documentation/x86/topology.txt b/Documentation/x86/topology.txt
new file mode 100644
index 0000000..06afac2
--- /dev/null
+++ b/Documentation/x86/topology.txt

@@ -0,0 +1,208 @@
+x86 Topology
+============
+
+This documents and clarifies the main aspects of x86 topology modelling and
+representation in the kernel. Update/change when doing changes to the
+respective code.
+
+The architecture-agnostic topology definitions are in
+Documentation/cputopology.txt. This file holds x86-specific
+differences/specialities which must not necessarily apply to the generic
+definitions. Thus, the way to read up on Linux topology on x86 is to start
+with the generic one and look at this one in parallel for the x86 specifics.
+
+Needless to say, code should use the generic functions - this file is *only*
+here to *document* the inner workings of x86 topology.
+
+Started by Thomas Gleixner <tglx@linutronix.de> and Borislav Petkov <bp@alien8.de>.
+
+The main aim of the topology facilities is to present adequate interfaces to
+code which needs to know/query/use the structure of the running system wrt
+threads, cores, packages, etc.
+
+The kernel does not care about the concept of physical sockets because a
+socket has no relevance to software. It's an electromechanical component. In
+the past a socket always contained a single package (see below), but with the
+advent of Multi Chip Modules (MCM) a socket can hold more than one package. So
+there might be still references to sockets in the code, but they are of
+historical nature and should be cleaned up.
+
+The topology of a system is described in the units of:
+
+    - packages
+    - cores
+    - threads
+
+* Package:
+
+  Packages contain a number of cores plus shared resources, e.g. DRAM
+  controller, shared caches etc.
+
+  AMD nomenclature for package is 'Node'.
+
+  Package-related topology information in the kernel:
+
+  - cpuinfo_x86.x86_max_cores:
+
+    The number of cores in a package. This information is retrieved via CPUID.
+
+  - cpuinfo_x86.phys_proc_id:
+
+    The physical ID of the package. This information is retrieved via CPUID
+    and deduced from the APIC IDs of the cores in the package.
+
+  - cpuinfo_x86.logical_id:
+
+    The logical ID of the package. As we do not trust BIOSes to enumerate the
+    packages in a consistent way, we introduced the concept of logical package
+    ID so we can sanely calculate the number of maximum possible packages in
+    the system and have the packages enumerated linearly.
+
+  - topology_max_packages():
+
+    The maximum possible number of packages in the system. Helpful for per
+    package facilities to preallocate per package information.
+
+
+* Cores:
+
+  A core consists of 1 or more threads. It does not matter whether the threads
+  are SMT- or CMT-type threads.
+
+  AMDs nomenclature for a CMT core is "Compute Unit". The kernel always uses
+  "core".
+
+  Core-related topology information in the kernel:
+
+  - smp_num_siblings:
+
+    The number of threads in a core. The number of threads in a package can be
+    calculated by:
+
+	threads_per_package = cpuinfo_x86.x86_max_cores * smp_num_siblings
+
+
+* Threads:
+
+  A thread is a single scheduling unit. It's the equivalent to a logical Linux
+  CPU.
+
+  AMDs nomenclature for CMT threads is "Compute Unit Core". The kernel always
+  uses "thread".
+
+  Thread-related topology information in the kernel:
+
+  - topology_core_cpumask():
+
+    The cpumask contains all online threads in the package to which a thread
+    belongs.
+
+    The number of online threads is also printed in /proc/cpuinfo "siblings."
+
+  - topology_sibling_mask():
+
+    The cpumask contains all online threads in the core to which a thread
+    belongs.
+
+   - topology_logical_package_id():
+
+    The logical package ID to which a thread belongs.
+
+   - topology_physical_package_id():
+
+    The physical package ID to which a thread belongs.
+
+   - topology_core_id();
+
+    The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo
+    "core_id."
+
+
+
+System topology examples
+
+Note:
+
+The alternative Linux CPU enumeration depends on how the BIOS enumerates the
+threads. Many BIOSes enumerate all threads 0 first and then all threads 1.
+That has the "advantage" that the logical Linux CPU numbers of threads 0 stay
+the same whether threads are enabled or not. That's merely an implementation
+detail and has no practical impact.
+
+1) Single Package, Single Core
+
+   [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+
+2) Single Package, Dual Core
+
+   a) One thread per core
+
+	[package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+		    -> [core 1] -> [thread 0] -> Linux CPU 1
+
+   b) Two threads per core
+
+	[package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+				-> [thread 1] -> Linux CPU 1
+		    -> [core 1] -> [thread 0] -> Linux CPU 2
+				-> [thread 1] -> Linux CPU 3
+
+      Alternative enumeration:
+
+	[package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+				-> [thread 1] -> Linux CPU 2
+		    -> [core 1] -> [thread 0] -> Linux CPU 1
+				-> [thread 1] -> Linux CPU 3
+
+      AMD nomenclature for CMT systems:
+
+	[node 0] -> [Compute Unit 0] -> [Compute Unit Core 0] -> Linux CPU 0
+				     -> [Compute Unit Core 1] -> Linux CPU 1
+		 -> [Compute Unit 1] -> [Compute Unit Core 0] -> Linux CPU 2
+				     -> [Compute Unit Core 1] -> Linux CPU 3
+
+4) Dual Package, Dual Core
+
+   a) One thread per core
+
+	[package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+		    -> [core 1] -> [thread 0] -> Linux CPU 1
+
+	[package 1] -> [core 0] -> [thread 0] -> Linux CPU 2
+		    -> [core 1] -> [thread 0] -> Linux CPU 3
+
+   b) Two threads per core
+
+	[package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+				-> [thread 1] -> Linux CPU 1
+		    -> [core 1] -> [thread 0] -> Linux CPU 2
+				-> [thread 1] -> Linux CPU 3
+
+	[package 1] -> [core 0] -> [thread 0] -> Linux CPU 4
+				-> [thread 1] -> Linux CPU 5
+		    -> [core 1] -> [thread 0] -> Linux CPU 6
+				-> [thread 1] -> Linux CPU 7
+
+      Alternative enumeration:
+
+	[package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+				-> [thread 1] -> Linux CPU 4
+		    -> [core 1] -> [thread 0] -> Linux CPU 1
+				-> [thread 1] -> Linux CPU 5
+
+	[package 1] -> [core 0] -> [thread 0] -> Linux CPU 2
+				-> [thread 1] -> Linux CPU 6
+		    -> [core 1] -> [thread 0] -> Linux CPU 3
+				-> [thread 1] -> Linux CPU 7
+
+      AMD nomenclature for CMT systems:
+
+	[node 0] -> [Compute Unit 0] -> [Compute Unit Core 0] -> Linux CPU 0
+				     -> [Compute Unit Core 1] -> Linux CPU 1
+		 -> [Compute Unit 1] -> [Compute Unit Core 0] -> Linux CPU 2
+				     -> [Compute Unit Core 1] -> Linux CPU 3
+
+	[node 1] -> [Compute Unit 0] -> [Compute Unit Core 0] -> Linux CPU 4
+				     -> [Compute Unit Core 1] -> Linux CPU 5
+		 -> [Compute Unit 1] -> [Compute Unit Core 0] -> Linux CPU 6
+				     -> [Compute Unit Core 1] -> Linux CPU 7

diff --git a/MAINTAINERS b/MAINTAINERS
index 0f3063c..d8c901b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -228,13 +228,13 @@
 
 ABIT UGURU 1,2 HARDWARE MONITOR DRIVER
 M:	Hans de Goede <hdegoede@redhat.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	drivers/hwmon/abituguru.c
 
 ABIT UGURU 3 HARDWARE MONITOR DRIVER
 M:	Alistair John Strachan <alistair@devzero.co.uk>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	drivers/hwmon/abituguru3.c
 
@@ -392,14 +392,14 @@
 
 ADM1025 HARDWARE MONITOR DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/adm1025
 F:	drivers/hwmon/adm1025.c
 
 ADM1029 HARDWARE MONITOR DRIVER
 M:	Corentin Labbe <clabbe.montjoie@gmail.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	drivers/hwmon/adm1029.c
 
@@ -444,7 +444,7 @@
 
 ADS1015 HARDWARE MONITOR DRIVER
 M:	Dirk Eibach <eibach@gdsys.de>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/ads1015
 F:	drivers/hwmon/ads1015.c
@@ -457,7 +457,7 @@
 
 ADT7475 HARDWARE MONITOR DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/adt7475
 F:	drivers/hwmon/adt7475.c
@@ -634,7 +634,7 @@
 
 AMD FAM15H PROCESSOR POWER MONITORING DRIVER
 M:	Huang Rui <ray.huang@amd.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Supported
 F:	Documentation/hwmon/fam15h_power
 F:	drivers/hwmon/fam15h_power.c
@@ -806,7 +806,7 @@
 
 APPLE SMC DRIVER
 M:	Henrik Rydberg <rydberg@bitmath.org>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Odd fixes
 F:	drivers/hwmon/applesmc.c
 
@@ -1865,7 +1865,7 @@
 
 ASC7621 HARDWARE MONITOR DRIVER
 M:	George Joseph <george.joseph@fairview5.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/asc7621
 F:	drivers/hwmon/asc7621.c
@@ -1958,7 +1958,7 @@
 
 ATK0110 HWMON DRIVER
 M:	Luca Tettamanti <kronos.it@gmail.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	drivers/hwmon/asus_atk0110.c
 
@@ -3094,7 +3094,7 @@
 
 CORETEMP HARDWARE MONITORING DRIVER
 M:	Fenghua Yu <fenghua.yu@intel.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/coretemp
 F:	drivers/hwmon/coretemp.c
@@ -3683,7 +3683,7 @@
 
 DME1737 HARDWARE MONITOR DRIVER
 M:	Juerg Haefliger <juergh@gmail.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/dme1737
 F:	drivers/hwmon/dme1737.c
@@ -4381,7 +4381,7 @@
 
 F71805F HARDWARE MONITORING DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/f71805f
 F:	drivers/hwmon/f71805f.c
@@ -4460,7 +4460,7 @@
 
 FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER
 M:	Riku Voipio <riku.voipio@iki.fi>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	drivers/hwmon/f75375s.c
 F:	include/linux/f75375s.h
@@ -5021,8 +5021,8 @@
 HARDWARE MONITORING
 M:	Jean Delvare <jdelvare@suse.com>
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
-W:	http://www.lm-sensors.org/
+L:	linux-hwmon@vger.kernel.org
+W:	http://hwmon.wiki.kernel.org/
 T:	quilt http://jdelvare.nerim.net/devel/linux/jdelvare-hwmon/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git
 S:	Maintained
@@ -5042,6 +5042,7 @@
 HARDWARE SPINLOCK CORE
 M:	Ohad Ben-Cohen <ohad@wizery.com>
 M:	Bjorn Andersson <bjorn.andersson@linaro.org>
+L:	linux-remoteproc@vger.kernel.org
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ohad/hwspinlock.git
 F:	Documentation/hwspinlock.txt
@@ -5554,7 +5555,7 @@
 
 INA209 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/ina209
 F:	Documentation/devicetree/bindings/i2c/ina209.txt
@@ -5562,7 +5563,7 @@
 
 INA2XX HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/ina2xx
 F:	drivers/hwmon/ina2xx.c
@@ -5770,6 +5771,16 @@
 F:	drivers/net/ethernet/intel/
 F:	drivers/net/ethernet/intel/*/
 
+INTEL RDMA RNIC DRIVER
+M:     Faisal Latif <faisal.latif@intel.com>
+R:     Chien Tin Tung <chien.tin.tung@intel.com>
+R:     Mustafa Ismail <mustafa.ismail@intel.com>
+R:     Shiraz Saleem <shiraz.saleem@intel.com>
+R:     Tatyana Nikolova <tatyana.e.nikolova@intel.com>
+L:     linux-rdma@vger.kernel.org
+S:     Supported
+F:     drivers/infiniband/hw/i40iw/
+
 INTEL-MID GPIO DRIVER
 M:	David Cohen <david.a.cohen@linux.intel.com>
 L:	linux-gpio@vger.kernel.org
@@ -6057,7 +6068,7 @@
 
 IT87 HARDWARE MONITORING DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/it87
 F:	drivers/hwmon/it87.c
@@ -6093,7 +6104,7 @@
 
 JC42.4 TEMPERATURE SENSOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	drivers/hwmon/jc42.c
 F:	Documentation/hwmon/jc42
@@ -6143,18 +6154,32 @@
 
 K10TEMP HARDWARE MONITORING DRIVER
 M:	Clemens Ladisch <clemens@ladisch.de>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/k10temp
 F:	drivers/hwmon/k10temp.c
 
 K8TEMP HARDWARE MONITORING DRIVER
 M:	Rudolf Marek <r.marek@assembler.cz>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/k8temp
 F:	drivers/hwmon/k8temp.c
 
+KASAN
+M:	Andrey Ryabinin <aryabinin@virtuozzo.com>
+R:	Alexander Potapenko <glider@google.com>
+R:	Dmitry Vyukov <dvyukov@google.com>
+L:	kasan-dev@googlegroups.com
+S:	Maintained
+F:	arch/*/include/asm/kasan.h
+F:	arch/*/mm/kasan_init*
+F:	Documentation/kasan.txt
+F:	include/linux/kasan.h
+F:	lib/test_kasan.c
+F:	mm/kasan/
+F:	scripts/Makefile.kasan
+
 KCONFIG
 M:	"Yann E. MORIN" <yann.morin.1998@free.fr>
 L:	linux-kbuild@vger.kernel.org
@@ -6378,7 +6403,7 @@
 M:	Ananth N Mavinakayanahalli <ananth@in.ibm.com>
 M:	Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
 M:	"David S. Miller" <davem@davemloft.net>
-M:	Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+M:	Masami Hiramatsu <mhiramat@kernel.org>
 S:	Maintained
 F:	Documentation/kprobes.txt
 F:	include/linux/kprobes.h
@@ -6683,27 +6708,27 @@
 
 LM73 HARDWARE MONITOR DRIVER
 M:	Guillaume Ligneul <guillaume.ligneul@gmail.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	drivers/hwmon/lm73.c
 
 LM78 HARDWARE MONITOR DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/lm78
 F:	drivers/hwmon/lm78.c
 
 LM83 HARDWARE MONITOR DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/lm83
 F:	drivers/hwmon/lm83.c
 
 LM90 HARDWARE MONITOR DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/lm90
 F:	Documentation/devicetree/bindings/hwmon/lm90.txt
@@ -6711,7 +6736,7 @@
 
 LM95234 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/lm95234
 F:	drivers/hwmon/lm95234.c
@@ -6777,7 +6802,7 @@
 
 LTC4261 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/ltc4261
 F:	drivers/hwmon/ltc4261.c
@@ -6947,28 +6972,28 @@
 
 MAX16065 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/max16065
 F:	drivers/hwmon/max16065.c
 
 MAX20751 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/max20751
 F:	drivers/hwmon/max20751.c
 
 MAX6650 HARDWARE MONITOR AND FAN CONTROLLER DRIVER
 M:	"Hans J. Koch" <hjk@hansjkoch.de>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/max6650
 F:	drivers/hwmon/max6650.c
 
 MAX6697 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/max6697
 F:	Documentation/devicetree/bindings/i2c/max6697.txt
@@ -7537,7 +7562,7 @@
 
 NCT6775 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/nct6775
 F:	drivers/hwmon/nct6775.c
@@ -8227,6 +8252,14 @@
 F:	fs/overlayfs/
 F:	Documentation/filesystems/overlayfs.txt
 
+ORANGEFS FILESYSTEM
+M:	Mike Marshall <hubcap@omnibond.com>
+L:	pvfs2-developers@beowulf-underground.org (subscribers-only)
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux.git
+S:	Supported
+F:	fs/orangefs/
+F:	Documentation/filesystems/orangefs.txt
+
 P54 WIRELESS DRIVER
 M:	Christian Lamparter <chunkeey@googlemail.com>
 L:	linux-wireless@vger.kernel.org
@@ -8327,7 +8360,7 @@
 
 PC87360 HARDWARE MONITORING DRIVER
 M:	Jim Cromie <jim.cromie@gmail.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/pc87360
 F:	drivers/hwmon/pc87360.c
@@ -8339,7 +8372,7 @@
 
 PC87427 HARDWARE MONITORING DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/pc87427
 F:	drivers/hwmon/pc87427.c
@@ -8679,6 +8712,8 @@
 
 PIN CONTROLLER - SAMSUNG
 M:	Tomasz Figa <tomasz.figa@gmail.com>
+M:	Krzysztof Kozlowski <k.kozlowski@samsung.com>
+M:	Sylwester Nawrocki <s.nawrocki@samsung.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:	linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
 S:	Maintained
@@ -8719,8 +8754,8 @@
 
 PMBUS HARDWARE MONITORING DRIVERS
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
-W:	http://www.lm-sensors.org/
+L:	linux-hwmon@vger.kernel.org
+W:	http://hwmon.wiki.kernel.org/
 W:	http://www.roeck-us.net/linux/drivers/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git
 S:	Maintained
@@ -8925,7 +8960,7 @@
 
 PWM FAN DRIVER
 M:	Kamil Debski <k.debski@samsung.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Supported
 F:	Documentation/devicetree/bindings/hwmon/pwm-fan.txt
 F:	Documentation/hwmon/pwm-fan
@@ -9107,6 +9142,13 @@
 S:	Supported
 F:	drivers/net/wireless/ath/wcn36xx/
 
+QEMU MACHINE EMULATOR AND VIRTUALIZER SUPPORT
+M:	Gabriel Somlo <somlo@cmu.edu>
+M:	"Michael S. Tsirkin" <mst@redhat.com>
+L:	qemu-devel@nongnu.org
+S:	Maintained
+F:	drivers/firmware/qemu_fw_cfg.c
+
 RADOS BLOCK DEVICE (RBD)
 M:	Ilya Dryomov <idryomov@gmail.com>
 M:	Sage Weil <sage@redhat.com>
@@ -9224,6 +9266,12 @@
 F:	net/rds/
 F:	Documentation/networking/rds.txt
 
+RDMAVT - RDMA verbs software
+M:	Dennis Dalessandro <dennis.dalessandro@intel.com>
+L:	linux-rdma@vger.kernel.org
+S:	Supported
+F:	drivers/infiniband/sw/rdmavt
+
 READ-COPY UPDATE (RCU)
 M:	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
 M:	Josh Triplett <josh@joshtriplett.org>
@@ -9276,6 +9324,7 @@
 REMOTE PROCESSOR (REMOTEPROC) SUBSYSTEM
 M:	Ohad Ben-Cohen <ohad@wizery.com>
 M:	Bjorn Andersson <bjorn.andersson@linaro.org>
+L:	linux-remoteproc@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ohad/remoteproc.git
 S:	Maintained
 F:	drivers/remoteproc/
@@ -9285,6 +9334,7 @@
 REMOTE PROCESSOR MESSAGING (RPMSG) SUBSYSTEM
 M:	Ohad Ben-Cohen <ohad@wizery.com>
 M:	Bjorn Andersson <bjorn.andersson@linaro.org>
+L:	linux-remoteproc@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ohad/rpmsg.git
 S:	Maintained
 F:	drivers/rpmsg/
@@ -10240,28 +10290,28 @@
 
 SMM665 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/smm665
 F:	drivers/hwmon/smm665.c
 
 SMSC EMC2103 HARDWARE MONITOR DRIVER
 M:	Steve Glendinning <steve.glendinning@shawell.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/emc2103
 F:	drivers/hwmon/emc2103.c
 
 SMSC SCH5627 HARDWARE MONITOR DRIVER
 M:	Hans de Goede <hdegoede@redhat.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Supported
 F:	Documentation/hwmon/sch5627
 F:	drivers/hwmon/sch5627.c
 
 SMSC47B397 HARDWARE MONITOR DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/smsc47b397
 F:	drivers/hwmon/smsc47b397.c
@@ -11099,8 +11149,8 @@
 F:	net/tipc/
 
 TILE ARCHITECTURE
-M:	Chris Metcalf <cmetcalf@ezchip.com>
-W:	http://www.ezchip.com/scm/
+M:	Chris Metcalf <cmetcalf@mellanox.com>
+W:	http://www.mellanox.com/repository/solutions/tile-scm/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile.git
 S:	Supported
 F:	arch/tile/
@@ -11189,7 +11239,7 @@
 
 TMP401 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/tmp401
 F:	drivers/hwmon/tmp401.c
@@ -11310,12 +11360,13 @@
 F:	drivers/scsi/u14-34f.c
 
 UBI FILE SYSTEM (UBIFS)
+M:	Richard Weinberger <richard@nod.at>
 M:	Artem Bityutskiy <dedekind1@gmail.com>
 M:	Adrian Hunter <adrian.hunter@intel.com>
 L:	linux-mtd@lists.infradead.org
 T:	git git://git.infradead.org/ubifs-2.6.git
 W:	http://www.linux-mtd.infradead.org/doc/ubifs.html
-S:	Maintained
+S:	Supported
 F:	Documentation/filesystems/ubifs.txt
 F:	fs/ubifs/
 
@@ -11941,14 +11992,14 @@
 
 VT1211 HARDWARE MONITOR DRIVER
 M:	Juerg Haefliger <juergh@gmail.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/vt1211
 F:	drivers/hwmon/vt1211.c
 
 VT8231 HARDWARE MONITOR DRIVER
 M:	Roger Lucas <vt8231@hiddenengine.co.uk>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	drivers/hwmon/vt8231.c
 
@@ -11967,21 +12018,21 @@
 
 W83791D HARDWARE MONITORING DRIVER
 M:	Marc Hulsman <m.hulsman@tudelft.nl>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/w83791d
 F:	drivers/hwmon/w83791d.c
 
 W83793 HARDWARE MONITORING DRIVER
 M:	Rudolf Marek <r.marek@assembler.cz>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	Documentation/hwmon/w83793
 F:	drivers/hwmon/w83793.c
 
 W83795 HARDWARE MONITORING DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
-L:	lm-sensors@lm-sensors.org
+L:	linux-hwmon@vger.kernel.org
 S:	Maintained
 F:	drivers/hwmon/w83795.c
 
@@ -12163,9 +12214,9 @@
 F:	drivers/media/tuners/tuner-xc2028.*
 
 XEN HYPERVISOR INTERFACE
-M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
 M:	Boris Ostrovsky <boris.ostrovsky@oracle.com>
 M:	David Vrabel <david.vrabel@citrix.com>
+M:	Juergen Gross <jgross@suse.com>
 L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git
 S:	Supported
@@ -12177,16 +12228,16 @@
 F:	include/uapi/xen/
 
 XEN HYPERVISOR ARM
-M:	Stefano Stabellini <stefano.stabellini@eu.citrix.com>
+M:	Stefano Stabellini <sstabellini@kernel.org>
 L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
-S:	Supported
+S:	Maintained
 F:	arch/arm/xen/
 F:	arch/arm/include/asm/xen/
 
 XEN HYPERVISOR ARM64
-M:	Stefano Stabellini <stefano.stabellini@eu.citrix.com>
+M:	Stefano Stabellini <sstabellini@kernel.org>
 L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
-S:	Supported
+S:	Maintained
 F:	arch/arm64/xen/
 F:	arch/arm64/include/asm/xen/
 

diff --git a/Makefile b/Makefile
index e055b96..173437d 100644
--- a/Makefile
+++ b/Makefile

@@ -1,7 +1,7 @@
 VERSION = 4
-PATCHLEVEL = 5
+PATCHLEVEL = 6
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc2
 NAME = Blurry Fish Butt
 
 # *DOCUMENTATION*
@@ -365,6 +365,7 @@
 CFLAGS_KERNEL	=
 AFLAGS_KERNEL	=
 CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage
+CFLAGS_KCOV	= -fsanitize-coverage=trace-pc
 
 
 # Use USERINCLUDE when you must reference the UAPI directories only.
@@ -411,7 +412,7 @@
 export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
 
 export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS
-export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KASAN CFLAGS_UBSAN
+export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KCOV CFLAGS_KASAN CFLAGS_UBSAN
 export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE
 export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE
 export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL
@@ -673,6 +674,14 @@
 endif
 KBUILD_CFLAGS += $(stackp-flag)
 
+ifdef CONFIG_KCOV
+  ifeq ($(call cc-option, $(CFLAGS_KCOV)),)
+    $(warning Cannot use CONFIG_KCOV: \
+             -fsanitize-coverage=trace-pc is not supported by compiler)
+    CFLAGS_KCOV =
+  endif
+endif
+
 ifeq ($(cc-name),clang)
 KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
 KBUILD_CPPFLAGS += $(call cc-option,-Wno-unknown-warning-option,)
@@ -773,6 +782,9 @@
 # Prohibit date/time macros, which would make the build non-deterministic
 KBUILD_CFLAGS   += $(call cc-option,-Werror=date-time)
 
+# enforce correct pointer usage
+KBUILD_CFLAGS   += $(call cc-option,-Werror=incompatible-pointer-types)
+
 # use the deterministic mode of AR if available
 KBUILD_ARFLAGS := $(call ar-option,D)
 

diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h
index 9b0d400..c419b43 100644
--- a/arch/alpha/include/asm/uaccess.h
+++ b/arch/alpha/include/asm/uaccess.h

@@ -483,7 +483,13 @@
 	(pc) + (_fixup)->fixup.bits.nextinsn;			\
 })
 
-#define ARCH_HAS_SORT_EXTABLE
-#define ARCH_HAS_SEARCH_EXTABLE
+#define ARCH_HAS_RELATIVE_EXTABLE
+
+#define swap_ex_entry_fixup(a, b, tmp, delta)			\
+	do {							\
+		(a)->fixup.unit = (b)->fixup.unit;		\
+		(b)->fixup.unit = (tmp).fixup.unit;		\
+	} while (0)
+
 
 #endif /* __ALPHA_UACCESS_H */

diff --git a/arch/alpha/mm/Makefile b/arch/alpha/mm/Makefile
index c993d3f..5a98079 100644
--- a/arch/alpha/mm/Makefile
+++ b/arch/alpha/mm/Makefile

@@ -4,6 +4,6 @@
 
 ccflags-y := -Werror
 
-obj-y	:= init.o fault.o extable.o
+obj-y	:= init.o fault.o
 
 obj-$(CONFIG_DISCONTIGMEM) += numa.o

diff --git a/arch/alpha/mm/extable.c b/arch/alpha/mm/extable.c
deleted file mode 100644
index 813c9b6..0000000
--- a/arch/alpha/mm/extable.c
+++ /dev/null

@@ -1,92 +0,0 @@
-/*
- * linux/arch/alpha/mm/extable.c
- */
-
-#include <linux/module.h>
-#include <linux/sort.h>
-#include <asm/uaccess.h>
-
-static inline unsigned long ex_to_addr(const struct exception_table_entry *x)
-{
-	return (unsigned long)&x->insn + x->insn;
-}
-
-static void swap_ex(void *a, void *b, int size)
-{
-	struct exception_table_entry *ex_a = a, *ex_b = b;
-	unsigned long addr_a = ex_to_addr(ex_a), addr_b = ex_to_addr(ex_b);
-	unsigned int t = ex_a->fixup.unit;
-
-	ex_a->fixup.unit = ex_b->fixup.unit;
-	ex_b->fixup.unit = t;
-	ex_a->insn = (int)(addr_b - (unsigned long)&ex_a->insn);
-	ex_b->insn = (int)(addr_a - (unsigned long)&ex_b->insn);
-}
-
-/*
- * The exception table needs to be sorted so that the binary
- * search that we use to find entries in it works properly.
- * This is used both for the kernel exception table and for
- * the exception tables of modules that get loaded.
- */
-static int cmp_ex(const void *a, const void *b)
-{
-	const struct exception_table_entry *x = a, *y = b;
-
-	/* avoid overflow */
-	if (ex_to_addr(x) > ex_to_addr(y))
-		return 1;
-	if (ex_to_addr(x) < ex_to_addr(y))
-		return -1;
-	return 0;
-}
-
-void sort_extable(struct exception_table_entry *start,
-		  struct exception_table_entry *finish)
-{
-	sort(start, finish - start, sizeof(struct exception_table_entry),
-	     cmp_ex, swap_ex);
-}
-
-#ifdef CONFIG_MODULES
-/*
- * Any entry referring to the module init will be at the beginning or
- * the end.
- */
-void trim_init_extable(struct module *m)
-{
-	/*trim the beginning*/
-	while (m->num_exentries &&
-	       within_module_init(ex_to_addr(&m->extable[0]), m)) {
-		m->extable++;
-		m->num_exentries--;
-	}
-	/*trim the end*/
-	while (m->num_exentries &&
-	       within_module_init(ex_to_addr(&m->extable[m->num_exentries-1]),
-				  m))
-		m->num_exentries--;
-}
-#endif /* CONFIG_MODULES */
-
-const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
-	       const struct exception_table_entry *last,
-	       unsigned long value)
-{
-        while (first <= last) {
-		const struct exception_table_entry *mid;
-		unsigned long mid_value;
-
-		mid = (last - first) / 2 + first;
-		mid_value = ex_to_addr(mid);
-                if (mid_value == value)
-                        return mid;
-                else if (mid_value < value)
-                        first = mid+1;
-                else
-                        last = mid-1;
-        }
-
-        return NULL;
-}

diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index 9a7cf52..36da89e 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h

@@ -12,9 +12,6 @@
 
 #ifndef __ASSEMBLY__
 
-#define get_user_page(vaddr)		__get_free_page(GFP_KERNEL)
-#define free_user_page(page, addr)	free_page(addr)
-
 #define clear_page(paddr)		memset((paddr), 0, PAGE_SIZE)
 #define copy_page(to, from)		memcpy((to), (from), PAGE_SIZE)
 

diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index d7709e3..9e5eddb 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c

@@ -628,7 +628,7 @@
 
 		/* kernel reading from page with U-mapping */
 		phys_addr_t paddr = (unsigned long)page_address(page);
-		unsigned long vaddr = page->index << PAGE_CACHE_SHIFT;
+		unsigned long vaddr = page->index << PAGE_SHIFT;
 
 		if (addr_not_cache_congruent(paddr, vaddr))
 			__flush_dcache_page(paddr, vaddr);

diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 52b34a0..95c1923 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile

@@ -325,12 +325,17 @@
 	imx6dl-udoo.dtb \
 	imx6dl-wandboard.dtb \
 	imx6dl-wandboard-revb1.dtb \
+	imx6q-apalis-ixora.dtb \
 	imx6q-apf6dev.dtb \
 	imx6q-arm2.dtb \
+	imx6q-b450v3.dtb \
+	imx6q-b650v3.dtb \
+	imx6q-b850v3.dtb \
 	imx6q-cm-fx6.dtb \
 	imx6q-cubox-i.dtb \
 	imx6q-dfi-fs700-m60.dtb \
 	imx6q-dmo-edmqmx6.dtb \
+	imx6q-evi.dtb \
 	imx6q-gk802.dtb \
 	imx6q-gw51xx.dtb \
 	imx6q-gw52xx.dtb \
@@ -340,6 +345,7 @@
 	imx6q-gw551x.dtb \
 	imx6q-gw552x.dtb \
 	imx6q-hummingboard.dtb \
+	imx6q-icore-rqs.dtb \
 	imx6q-nitrogen6x.dtb \
 	imx6q-nitrogen6_max.dtb \
 	imx6q-novena.dtb \
@@ -357,7 +363,9 @@
 	imx6q-tx6q-1110.dtb \
 	imx6q-udoo.dtb \
 	imx6q-wandboard.dtb \
-	imx6q-wandboard-revb1.dtb
+	imx6q-wandboard-revb1.dtb \
+	imx6qp-sabreauto.dtb \
+	imx6qp-sabresd.dtb
 dtb-$(CONFIG_SOC_IMX6SL) += \
 	imx6sl-evk.dtb \
 	imx6sl-warp.dtb
@@ -752,7 +760,9 @@
 dtb-$(CONFIG_ARCH_UNIPHIER) += \
 	uniphier-ph1-ld4-ref.dtb \
 	uniphier-ph1-ld6b-ref.dtb \
+	uniphier-ph1-pro4-ace.dtb \
 	uniphier-ph1-pro4-ref.dtb \
+	uniphier-ph1-pro4-sanji.dtb \
 	uniphier-ph1-sld3-ref.dtb \
 	uniphier-ph1-sld8-ref.dtb \
 	uniphier-proxstream2-gentil.dtb \

diff --git a/arch/arm/boot/dts/at91-sama5d3_xplained.dts b/arch/arm/boot/dts/at91-sama5d3_xplained.dts
index ff888d2..f3e2b96 100644
--- a/arch/arm/boot/dts/at91-sama5d3_xplained.dts
+++ b/arch/arm/boot/dts/at91-sama5d3_xplained.dts

@@ -303,6 +303,7 @@
 		regulator-name = "mmc0-card-supply";
 		regulator-min-microvolt = <3300000>;
 		regulator-max-microvolt = <3300000>;
+		regulator-always-on;
 	};
 
 	gpio_keys {

diff --git a/arch/arm/boot/dts/at91-sama5d4_xplained.dts b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
index 569026e..da84e65 100644
--- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts
+++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts

@@ -268,5 +268,6 @@
 		regulator-min-microvolt = <3300000>;
 		regulator-max-microvolt = <3300000>;
 		vin-supply = <&vcc_3v3_reg>;
+		regulator-always-on;
 	};
 };

diff --git a/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard.dts b/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard.dts
index ed1d0b4..cda6907 100644
--- a/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard.dts
+++ b/arch/arm/boot/dts/imx25-eukrea-mbimxsd25-baseboard.dts

@@ -30,7 +30,7 @@
 			label = "BP1";
 			gpios = <&gpio3 18 GPIO_ACTIVE_LOW>;
 			linux,code = <BTN_MISC>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 

diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi
index cde329e..6b1f4bb 100644
--- a/arch/arm/boot/dts/imx25.dtsi
+++ b/arch/arm/boot/dts/imx25.dtsi

@@ -269,13 +269,36 @@
 				status = "disabled";
 			};
 
-			tsc: tsc@50030000 {
-				compatible = "fsl,imx25-adc", "fsl,imx21-tsc";
-				reg = <0x50030000 0x4000>;
+			tscadc: tscadc@50030000 {
+				compatible = "fsl,imx25-tsadc";
+				reg = <0x50030000 0xc>;
 				interrupts = <46>;
 				clocks = <&clks 119>;
 				clock-names = "ipg";
+				interrupt-controller;
+				#interrupt-cells = <1>;
+				#address-cells = <1>;
+				#size-cells = <1>;
 				status = "disabled";
+
+				adc: adc@50030800 {
+					compatible = "fsl,imx25-gcq";
+					reg = <0x50030800 0x60>;
+					interrupt-parent = <&tscadc>;
+					interrupts = <1>;
+					#address-cells = <1>;
+					#size-cells = <0>;
+					status = "disabled";
+				};
+
+				tsc: tcq@50030400 {
+					compatible = "fsl,imx25-tcq";
+					reg = <0x50030400 0x60>;
+					interrupt-parent = <&tscadc>;
+					interrupts = <0>;
+					fsl,wires = <4>;
+					status = "disabled";
+				};
 			};
 
 			ssi1: ssi@50034000 {
@@ -497,7 +520,8 @@
 				compatible = "fsl,imx25-usb", "fsl,imx27-usb";
 				reg = <0x53ff4000 0x0200>;
 				interrupts = <37>;
-				clocks = <&clks 70>;
+				clocks = <&clks 9>, <&clks 70>, <&clks 8>;
+				clock-names = "ipg", "ahb", "per";
 				fsl,usbmisc = <&usbmisc 0>;
 				fsl,usbphy = <&usbphy0>;
 				status = "disabled";
@@ -507,7 +531,8 @@
 				compatible = "fsl,imx25-usb", "fsl,imx27-usb";
 				reg = <0x53ff4400 0x0200>;
 				interrupts = <35>;
-				clocks = <&clks 70>;
+				clocks = <&clks 9>, <&clks 70>, <&clks 8>;
+				clock-names = "ipg", "ahb", "per";
 				fsl,usbmisc = <&usbmisc 1>;
 				fsl,usbphy = <&usbphy1>;
 				status = "disabled";
@@ -516,8 +541,6 @@
 			usbmisc: usbmisc@53ff4600 {
 				#index-cells = <1>;
 				compatible = "fsl,imx25-usbmisc";
-				clocks = <&clks 9>, <&clks 70>, <&clks 8>;
-				clock-names = "ipg", "ahb", "per";
 				reg = <0x53ff4600 0x00f>;
 			};
 

diff --git a/arch/arm/boot/dts/imx28-apf28dev.dts b/arch/arm/boot/dts/imx28-apf28dev.dts
index 7ac4f1a..1eaa131 100644
--- a/arch/arm/boot/dts/imx28-apf28dev.dts
+++ b/arch/arm/boot/dts/imx28-apf28dev.dts

@@ -225,7 +225,7 @@
 			label = "User button";
 			gpios = <&gpio0 17 GPIO_ACTIVE_LOW>;
 			linux,code = <0x100>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 };

diff --git a/arch/arm/boot/dts/imx28-eukrea-mbmx28lc.dtsi b/arch/arm/boot/dts/imx28-eukrea-mbmx28lc.dtsi
index 927b391..8859474 100644
--- a/arch/arm/boot/dts/imx28-eukrea-mbmx28lc.dtsi
+++ b/arch/arm/boot/dts/imx28-eukrea-mbmx28lc.dtsi

@@ -36,7 +36,7 @@
 			label = "SW3";
 			gpios = <&gpio1 21 GPIO_ACTIVE_LOW>;
 			linux,code = <BTN_MISC>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 
@@ -49,7 +49,7 @@
 			label = "SW4";
 			gpios = <&gpio1 20 GPIO_ACTIVE_LOW>;
 			linux,code = <BTN_MISC>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 

diff --git a/arch/arm/boot/dts/imx28-tx28.dts b/arch/arm/boot/dts/imx28-tx28.dts
index 4ea8934..fd20e99 100644
--- a/arch/arm/boot/dts/imx28-tx28.dts
+++ b/arch/arm/boot/dts/imx28-tx28.dts

@@ -130,7 +130,7 @@
 			compatible = "fixed-clock";
 			reg = <0>;
 			#clock-cells = <0>;
-			clock-frequency = <27000000>;
+			clock-frequency = <26000000>;
 		};
 	};
 
@@ -202,7 +202,7 @@
 			0x02020049 /* row 2, col 2, KEY_KP9 */
 		>;
 		gpio-activelow;
-		linux,wakeup;
+		wakeup-source;
 		debounce-delay-ms = <100>;
 		col-scan-delay-us = <5000>;
 		linux,no-autorepeat;

diff --git a/arch/arm/boot/dts/imx35-eukrea-mbimxsd35-baseboard.dts b/arch/arm/boot/dts/imx35-eukrea-mbimxsd35-baseboard.dts
index 75b0367..4727bbb 100644
--- a/arch/arm/boot/dts/imx35-eukrea-mbimxsd35-baseboard.dts
+++ b/arch/arm/boot/dts/imx35-eukrea-mbimxsd35-baseboard.dts

@@ -30,7 +30,7 @@
 			label = "BP1";
 			gpios = <&gpio3 25 GPIO_ACTIVE_LOW>;
 			linux,code = <BTN_MISC>;
-			gpio-key,wakeup;
+			wakeup-source;
 			linux,input-type = <1>;
 		};
 	};

diff --git a/arch/arm/boot/dts/imx35.dtsi b/arch/arm/boot/dts/imx35.dtsi
index ed3dc33..14e1320 100644
--- a/arch/arm/boot/dts/imx35.dtsi
+++ b/arch/arm/boot/dts/imx35.dtsi

@@ -305,7 +305,8 @@
 				compatible = "fsl,imx35-usb", "fsl,imx27-usb";
 				reg = <0x53ff4000 0x0200>;
 				interrupts = <37>;
-				clocks = <&clks 73>;
+				clocks = <&clks 9>, <&clks 73>, <&clks 28>;
+				clock-names = "ipg", "ahb", "per";
 				fsl,usbmisc = <&usbmisc 0>;
 				fsl,usbphy = <&usbphy0>;
 				status = "disabled";
@@ -315,7 +316,8 @@
 				compatible = "fsl,imx35-usb", "fsl,imx27-usb";
 				reg = <0x53ff4400 0x0200>;
 				interrupts = <35>;
-				clocks = <&clks 73>;
+				clocks = <&clks 9>, <&clks 73>, <&clks 28>;
+				clock-names = "ipg", "ahb", "per";
 				fsl,usbmisc = <&usbmisc 1>;
 				fsl,usbphy = <&usbphy1>;
 				dr_mode = "host";
@@ -325,8 +327,6 @@
 			usbmisc: usbmisc@53ff4600 {
 				#index-cells = <1>;
 				compatible = "fsl,imx35-usbmisc";
-				clocks = <&clks 9>, <&clks 73>, <&clks 28>;
-				clock-names = "ipg", "ahb", "per";
 				reg = <0x53ff4600 0x00f>;
 			};
 		};

diff --git a/arch/arm/boot/dts/imx51-babbage.dts b/arch/arm/boot/dts/imx51-babbage.dts
index 649befe..018d24e 100644
--- a/arch/arm/boot/dts/imx51-babbage.dts
+++ b/arch/arm/boot/dts/imx51-babbage.dts

@@ -107,7 +107,7 @@
 			label = "Power Button";
 			gpios = <&gpio2 21 GPIO_ACTIVE_HIGH>;
 			linux,code = <KEY_POWER>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 

diff --git a/arch/arm/boot/dts/imx51-digi-connectcore-som.dtsi b/arch/arm/boot/dts/imx51-digi-connectcore-som.dtsi
index 321662f5..16fc69c 100644
--- a/arch/arm/boot/dts/imx51-digi-connectcore-som.dtsi
+++ b/arch/arm/boot/dts/imx51-digi-connectcore-som.dtsi

@@ -156,7 +156,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_esdhc2>;
 	cap-sdio-irq;
-	enable-sdio-wakeup;
+	wakeup-source;
 	keep-power-in-suspend;
 	max-frequency = <50000000>;
 	no-1-8-v;

diff --git a/arch/arm/boot/dts/imx51-eukrea-mbimxsd51-baseboard.dts b/arch/arm/boot/dts/imx51-eukrea-mbimxsd51-baseboard.dts
index 34599c5..d270df3 100644
--- a/arch/arm/boot/dts/imx51-eukrea-mbimxsd51-baseboard.dts
+++ b/arch/arm/boot/dts/imx51-eukrea-mbimxsd51-baseboard.dts

@@ -41,7 +41,7 @@
 			label = "BP1";
 			gpios = <&gpio3 31 GPIO_ACTIVE_LOW>;
 			linux,code = <256>;
-			gpio-key,wakeup;
+			wakeup-source;
 			linux,input-type = <1>;
 		};
 	};

diff --git a/arch/arm/boot/dts/imx51-pinfunc.h b/arch/arm/boot/dts/imx51-pinfunc.h
index 9eb92ab..82eae3c 100644
--- a/arch/arm/boot/dts/imx51-pinfunc.h
+++ b/arch/arm/boot/dts/imx51-pinfunc.h

@@ -536,7 +536,6 @@
 #define MX51_PAD_DISPB2_SER_CLK__DISP1_PIN7		0x2c4 0x6c4 0x000 0x3 0x0
 #define MX51_PAD_DISPB2_SER_CLK__DISPB2_SER_CLK		0x2c4 0x6c4 0x000 0x0 0x0
 #define MX51_PAD_DISPB2_SER_CLK__GPIO3_7		0x2c4 0x6c4 0x990 0x4 0x1
-#define MX51_PAD_DISPB2_SER_RS__DISP1_EXT_CLK		0x2c8 0x6c8 0x000 0x2 0x0
 #define MX51_PAD_DISPB2_SER_RS__DISP1_PIN16		0x2c8 0x6c8 0x000 0x2 0x0
 #define MX51_PAD_DISPB2_SER_RS__DISP1_PIN8		0x2c8 0x6c8 0x000 0x3 0x0
 #define MX51_PAD_DISPB2_SER_RS__DISPB2_SER_RS		0x2c8 0x6c8 0x000 0x0 0x0

diff --git a/arch/arm/boot/dts/imx53-ard.dts b/arch/arm/boot/dts/imx53-ard.dts
index 3bc1883..4486bc4 100644
--- a/arch/arm/boot/dts/imx53-ard.dts
+++ b/arch/arm/boot/dts/imx53-ard.dts

@@ -69,21 +69,21 @@
 			label = "Home";
 			gpios = <&gpio5 10 0>;
 			linux,code = <102>; /* KEY_HOME */
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		back {
 			label = "Back";
 			gpios = <&gpio5 11 0>;
 			linux,code = <158>; /* KEY_BACK */
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		program {
 			label = "Program";
 			gpios = <&gpio5 12 0>;
 			linux,code = <362>; /* KEY_PROGRAM */
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		volume-up {

diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi
index 53fd75c..c05e7cf 100644
--- a/arch/arm/boot/dts/imx53-qsb-common.dtsi
+++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi

@@ -59,22 +59,22 @@
 
 		power {
 			label = "Power Button";
-			gpios = <&gpio1 8 0>;
-			linux,code = <116>; /* KEY_POWER */
+			gpios = <&gpio1 8 GPIO_ACTIVE_LOW>;
+			linux,code = <KEY_POWER>;
 		};
 
 		volume-up {
 			label = "Volume Up";
-			gpios = <&gpio2 14 0>;
-			linux,code = <115>; /* KEY_VOLUMEUP */
-			gpio-key,wakeup;
+			gpios = <&gpio2 14 GPIO_ACTIVE_LOW>;
+			linux,code = <KEY_VOLUMEUP>;
+			wakeup-source;
 		};
 
 		volume-down {
 			label = "Volume Down";
-			gpios = <&gpio2 15 0>;
-			linux,code = <114>; /* KEY_VOLUMEDOWN */
-			gpio-key,wakeup;
+			gpios = <&gpio2 15 GPIO_ACTIVE_LOW>;
+			linux,code = <KEY_VOLUMEDOWN>;
+			wakeup-source;
 		};
 	};
 

diff --git a/arch/arm/boot/dts/imx53-tx53-x03x.dts b/arch/arm/boot/dts/imx53-tx53-x03x.dts
index 13e842b..0ecb43d 100644
--- a/arch/arm/boot/dts/imx53-tx53-x03x.dts
+++ b/arch/arm/boot/dts/imx53-tx53-x03x.dts

@@ -231,7 +231,7 @@
 		interrupts = <26 0>;
 		gpios = <&gpio3 26 GPIO_ACTIVE_LOW>;
 		ti,x-plate-ohms = <660>;
-		linux,wakeup;
+		wakeup-source;
 	};
 };
 

diff --git a/arch/arm/boot/dts/imx53-tx53-x13x.dts b/arch/arm/boot/dts/imx53-tx53-x13x.dts
index 6480471..3cf682a 100644
--- a/arch/arm/boot/dts/imx53-tx53-x13x.dts
+++ b/arch/arm/boot/dts/imx53-tx53-x13x.dts

@@ -101,7 +101,7 @@
 		interrupt-parent = <&gpio3>;
 		interrupts = <23 0>;
 		wakeup-gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>;
-		linux,wakeup;
+		wakeup-source;
 	};
 };
 
@@ -126,7 +126,7 @@
 		interrupt-parent = <&gpio3>;
 		interrupts = <22 0>;
 		wakeup-gpios = <&gpio3 22 GPIO_ACTIVE_HIGH>;
-		linux,wakeup;
+		wakeup-source;
 	};
 };
 
@@ -183,13 +183,14 @@
 	status = "okay";
 
 	lvds0: lvds-channel@0 {
-		fsl,data-mapping = "jeida";
-		fsl,data-width = <24>;
+		fsl,data-mapping = "spwg";
+		fsl,data-width = <18>;
 		status = "okay";
 
 		display-timings {
-			native-mode = <&lvds_timing0>;
-			lvds_timing0: hsd100pxn1 {
+			native-mode = <&lvds0_timing0>;
+
+			lvds0_timing0: hsd100pxn1 {
 				clock-frequency = <65000000>;
 				hactive = <1024>;
 				vactive = <768>;
@@ -202,19 +203,36 @@
 				hsync-active = <0>;
 				vsync-active = <0>;
 				de-active = <1>;
-				pixelclk-active = <0>;
+				pixelclk-active = <1>;
+			};
+
+			lvds0_timing1: nl12880bc20 {
+				clock-frequency = <71000000>;
+				hactive = <1280>;
+				vactive = <800>;
+				hback-porch = <50>;
+				hsync-len = <60>;
+				hfront-porch = <50>;
+				vback-porch = <5>;
+				vsync-len = <13>;
+				vfront-porch = <5>;
+				hsync-active = <0>;
+				vsync-active = <0>;
+				de-active = <1>;
+				pixelclk-active = <1>;
 			};
 		};
 	};
 
 	lvds1: lvds-channel@1 {
-		fsl,data-mapping = "jeida";
-		fsl,data-width = <24>;
+		fsl,data-mapping = "spwg";
+		fsl,data-width = <18>;
 		status = "okay";
 
 		display-timings {
-			native-mode = <&lvds_timing1>;
-			lvds_timing1: hsd100pxn1 {
+			native-mode = <&lvds1_timing0>;
+
+			lvds1_timing0: hsd100pxn1 {
 				clock-frequency = <65000000>;
 				hactive = <1024>;
 				vactive = <768>;
@@ -227,7 +245,7 @@
 				hsync-active = <0>;
 				vsync-active = <0>;
 				de-active = <1>;
-				pixelclk-active = <0>;
+				pixelclk-active = <1>;
 			};
 		};
 	};

diff --git a/arch/arm/boot/dts/imx53-tx53.dtsi b/arch/arm/boot/dts/imx53-tx53.dtsi
index d3e50b2..bd3dfef 100644
--- a/arch/arm/boot/dts/imx53-tx53.dtsi
+++ b/arch/arm/boot/dts/imx53-tx53.dtsi

@@ -37,7 +37,7 @@
 			compatible = "fixed-clock";
 			reg = <0>;
 			#clock-cells = <0>;
-			clock-frequency = <27000000>;
+			clock-frequency = <26000000>;
 		};
 	};
 
@@ -50,7 +50,7 @@
 			label = "Power Button";
 			gpios = <&gpio5 2 GPIO_ACTIVE_HIGH>;
 			linux,code = <116>; /* KEY_POWER */
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 

diff --git a/arch/arm/boot/dts/imx6dl-tx6u-811x.dts b/arch/arm/boot/dts/imx6dl-tx6u-811x.dts
index c275eec..d35a5cd 100644
--- a/arch/arm/boot/dts/imx6dl-tx6u-811x.dts
+++ b/arch/arm/boot/dts/imx6dl-tx6u-811x.dts

@@ -77,7 +77,7 @@
 		interrupt-parent = <&gpio3>;
 		interrupts = <22 0>;
 		wakeup-gpios = <&gpio3 22 GPIO_ACTIVE_HIGH>;
-		linux,wakeup;
+		wakeup-source;
 	};
 };
 

diff --git a/arch/arm/boot/dts/imx6dl-wandboard-revb1.dts b/arch/arm/boot/dts/imx6dl-wandboard-revb1.dts
index f607d4f..8c314ee 100644
--- a/arch/arm/boot/dts/imx6dl-wandboard-revb1.dts
+++ b/arch/arm/boot/dts/imx6dl-wandboard-revb1.dts

@@ -13,7 +13,7 @@
 #include "imx6qdl-wandboard-revb1.dtsi"
 
 / {
-	model = "Wandboard i.MX6 Dual Lite Board";
+	model = "Wandboard i.MX6 Dual Lite Board rev B1";
 	compatible = "wand,imx6dl-wandboard", "fsl,imx6dl";
 
 	memory {

diff --git a/arch/arm/boot/dts/imx6q-apalis-ixora.dts b/arch/arm/boot/dts/imx6q-apalis-ixora.dts
new file mode 100644
index 0000000..2cba82d
--- /dev/null
+++ b/arch/arm/boot/dts/imx6q-apalis-ixora.dts

@@ -0,0 +1,272 @@
+/*
+ * Copyright 2014-2016 Toradex AG
+ * Copyright 2012 Freescale Semiconductor, Inc.
+ * Copyright 2011 Linaro Ltd.
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     version 2 as published by the Free Software Foundation.
+ *
+ *     This file is distributed in the hope that it will be useful
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+#include "imx6q.dtsi"
+#include "imx6qdl-apalis.dtsi"
+
+/ {
+	model = "Toradex Apalis iMX6Q/D Module on Ixora Carrier Board";
+	compatible = "toradex,apalis_imx6q-ixora", "toradex,apalis_imx6q",
+		     "fsl,imx6q";
+
+	aliases {
+		i2c0 = &i2cddc;
+		i2c1 = &i2c1;
+		i2c2 = &i2c2;
+		i2c3 = &i2c3;
+	};
+
+	aliases {
+		rtc0 = &rtc_i2c;
+		rtc1 = &snvs_rtc;
+	};
+
+	gpio-keys {
+		compatible = "gpio-keys";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_gpio_keys>;
+
+		wakeup {
+			label = "Wake-Up";
+			gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
+			linux,code = <KEY_WAKEUP>;
+			debounce-interval = <10>;
+			wakeup-source;
+		};
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_leds_ixora>;
+
+		led4-green {
+			label = "LED_4_GREEN";
+			gpios = <&gpio1 17 GPIO_ACTIVE_HIGH>;
+		};
+
+		led4-red {
+			label = "LED_4_RED";
+			gpios = <&gpio1 21 GPIO_ACTIVE_HIGH>;
+		};
+
+		led5-green {
+			label = "LED_5_GREEN";
+			gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>;
+		};
+
+		led5-red {
+			label = "LED_5_RED";
+			gpios = <&gpio2 2 GPIO_ACTIVE_HIGH>;
+		};
+	};
+
+	pwmleds {
+		compatible = "pwm-leds";
+
+		ledpwm1 {
+			label = "PWM1";
+			pwms = <&pwm1 0 50000>;
+			max-brightness = <255>;
+		};
+
+		ledpwm2 {
+			label = "PWM2";
+			pwms = <&pwm2 0 50000>;
+			max-brightness = <255>;
+		};
+
+		ledpwm3 {
+			label = "PWM3";
+			pwms = <&pwm3 0 50000>;
+			max-brightness = <255>;
+		};
+	};
+};
+
+&backlight {
+	brightness-levels = <0 127 191 223 239 247 251 255>;
+	default-brightness-level = <1>;
+	status = "okay";
+};
+
+&can1 {
+	status = "okay";
+};
+
+&can2 {
+	status = "okay";
+};
+
+&hdmi {
+	ddc-i2c-bus = <&i2cddc>;
+	status = "okay";
+};
+
+&i2cddc {
+	status = "okay";
+};
+
+/* GEN1_I2C: I2C1_SDA/SCL on MXM3 209/211 (e.g. RTC on carrier board) */
+&i2c1 {
+	status = "okay";
+
+	eeprom@50 {
+		compatible = "atmel,24c02";
+		reg = <0x50>;
+	};
+
+	/* M41T0M6 real time clock on carrier board */
+	rtc_i2c: rtc@68 {
+		compatible = "st,m41t00";
+		reg = <0x68>;
+	};
+};
+
+&ldb {
+	status = "okay";
+};
+
+&pcie {
+	/* active-low meaning opposite of regular PERST# active-low polarity */
+	reset-gpio = <&gpio1 28 GPIO_ACTIVE_LOW>;
+	status = "okay";
+};
+
+&pwm1 {
+	status = "okay";
+};
+
+&pwm2 {
+	status = "okay";
+};
+
+&pwm3 {
+	status = "okay";
+};
+
+&pwm4 {
+	status = "okay";
+};
+
+&reg_usb_otg_vbus {
+	status = "okay";
+};
+
+&reg_usb_host_vbus {
+	status = "okay";
+};
+
+&sata {
+	status = "okay";
+};
+
+&sound_spdif {
+	status = "okay";
+};
+
+&spdif {
+	status = "okay";
+};
+
+&uart1 {
+	status = "okay";
+};
+
+&uart2 {
+	status = "okay";
+};
+
+&uart4 {
+	status = "okay";
+};
+
+&uart5 {
+	status = "okay";
+};
+
+&usbh1 {
+	vbus-supply = <&reg_usb_host_vbus>;
+	status = "okay";
+};
+
+&usbotg {
+	vbus-supply = <&reg_usb_otg_vbus>;
+	status = "okay";
+};
+
+/* SD1 */
+&usdhc2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_sd_cd>;
+	cd-gpios = <&gpio6 14 GPIO_ACTIVE_LOW>;
+	status = "okay";
+};
+
+&iomuxc {
+	/*
+	 * Mux the Apalis GPIOs
+	 * GPIO5, 6 used by optional fusion_F0710A kernel module
+	 */
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_apalis_gpio1 &pinctrl_apalis_gpio2
+		     &pinctrl_apalis_gpio3 &pinctrl_apalis_gpio4
+		     &pinctrl_apalis_gpio5 &pinctrl_apalis_gpio6
+		     &pinctrl_apalis_gpio7 &pinctrl_apalis_gpio8
+		    >;
+
+	pinctrl_leds_ixora: ledsixoragrp {
+		fsl,pins = <
+			MX6QDL_PAD_SD1_DAT1__GPIO1_IO17 0x1b0b0
+			MX6QDL_PAD_SD1_DAT3__GPIO1_IO21 0x1b0b0
+			MX6QDL_PAD_NANDF_D1__GPIO2_IO01 0x1b0b0
+			MX6QDL_PAD_NANDF_D2__GPIO2_IO02 0x1b0b0
+		>;
+	};
+};

diff --git a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld10-ref.dts b/arch/arm/boot/dts/imx6q-b450v3.dts
similarity index 61%
copy from arch/arm64/boot/dts/socionext/uniphier-ph1-ld10-ref.dts
copy to arch/arm/boot/dts/imx6q-b450v3.dts
index 3e53317..3101be5 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld10-ref.dts
+++ b/arch/arm/boot/dts/imx6q-b450v3.dts

@@ -1,7 +1,6 @@
 /*
- * Device Tree Source for UniPhier PH1-LD10 Reference Board
- *
- * Copyright (C) 2015 Masahiro Yamada <yamada.masahiro@socionext.com>
+ * Copyright 2015 Timesys Corporation.
+ * Copyright 2015 General Electric Company
  *
  * This file is dual-licensed: you can use it either under the terms
  * of the GPL or the X11 license, at your option. Note that this dual
@@ -9,21 +8,20 @@
  * whole.
  *
  *  a) This file is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of the
- *     License, or (at your option) any later version.
+ *     modify it under the terms of the GNU General Public License
+ *     version 2 as published by the Free Software Foundation.
  *
- *     This file is distributed in the hope that it will be useful,
+ *     This file is distributed in the hope that it will be useful
  *     but WITHOUT ANY WARRANTY; without even the implied warranty of
  *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *     GNU General Public License for more details.
  *
- * Or, alternatively,
+ * Or, alternatively
  *
  *  b) Permission is hereby granted, free of charge, to any person
  *     obtaining a copy of this software and associated documentation
  *     files (the "Software"), to deal in the Software without
- *     restriction, including without limitation the rights to use,
+ *     restriction, including without limitation the rights to use
  *     copy, modify, merge, publish, distribute, sublicense, and/or
  *     sell copies of the Software, and to permit persons to whom the
  *     Software is furnished to do so, subject to the following
@@ -32,64 +30,59 @@
  *     The above copyright notice and this permission notice shall be
  *     included in all copies or substantial portions of the Software.
  *
- *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
  *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
  *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
 /dts-v1/;
-/include/ "uniphier-ph1-ld10.dtsi"
-/include/ "uniphier-support-card.dtsi"
+
+#include "imx6q-bx50v3.dtsi"
 
 / {
-	model = "UniPhier PH1-LD10 Reference Board";
-	compatible = "socionext,ph1-ld10-ref", "socionext,ph1-ld10";
-
-	memory {
-		device_type = "memory";
-		reg = <0 0x80000000 0 0xc0000000>;
-	};
+	model = "General Electric B450v3";
+	compatible = "ge,imx6q-b450v3", "advantech,imx6q-ba16", "fsl,imx6q";
 
 	chosen {
-		stdout-path = "serial0:115200n8";
+		stdout-path = &uart3;
 	};
 
-	aliases {
-		serial0 = &serial0;
-		serial1 = &serial1;
-		serial2 = &serial2;
-		serial3 = &serial3;
-		i2c0 = &i2c0;
-		i2c1 = &i2c1;
-		i2c2 = &i2c2;
-		i2c3 = &i2c3;
-		i2c4 = &i2c4;
-		i2c5 = &i2c5;
-		i2c6 = &i2c6;
+	panel-lvds0 {
+		compatible = "innolux,g121x1-l03";
+		backlight = <&backlight_lvds>;
+		power-supply = <&reg_lvds>;
+
+		port {
+			panel_in_lvds0: endpoint {
+				remote-endpoint = <&lvds0_out>;
+			};
+		};
 	};
 };
 
-&extbus {
-	ranges = <1 0x00000000 0x42000000 0x02000000>;
-};
-
-&support_card {
-	ranges = <0x00000000 1 0x01f00000 0x00100000>;
-};
-
-&ethsc {
-	interrupts = <0 48 4>;
-};
-
-&serial0 {
+&ldb {
+	assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>,
+			  <&clks IMX6QDL_CLK_LDB_DI1_SEL>;
+	assigned-clock-parents = <&clks IMX6QDL_CLK_PLL3_USB_OTG>,
+				 <&clks IMX6QDL_CLK_PLL3_USB_OTG>;
 	status = "okay";
-};
 
-&i2c0 {
-	status = "okay";
+	lvds0: lvds-channel@0 {
+		fsl,data-mapping = "spwg";
+		fsl,data-width = <24>;
+		status = "okay";
+
+		port@4 {
+			reg = <4>;
+
+			lvds0_out: endpoint {
+				remote-endpoint = <&panel_in_lvds0>;
+			};
+		};
+	};
 };

diff --git a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld10-ref.dts b/arch/arm/boot/dts/imx6q-b650v3.dts
similarity index 61%
copy from arch/arm64/boot/dts/socionext/uniphier-ph1-ld10-ref.dts
copy to arch/arm/boot/dts/imx6q-b650v3.dts
index 3e53317..823f55c 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld10-ref.dts
+++ b/arch/arm/boot/dts/imx6q-b650v3.dts

@@ -1,7 +1,6 @@
 /*
- * Device Tree Source for UniPhier PH1-LD10 Reference Board
- *
- * Copyright (C) 2015 Masahiro Yamada <yamada.masahiro@socionext.com>
+ * Copyright 2015 Timesys Corporation.
+ * Copyright 2015 General Electric Company
  *
  * This file is dual-licensed: you can use it either under the terms
  * of the GPL or the X11 license, at your option. Note that this dual
@@ -9,21 +8,20 @@
  * whole.
  *
  *  a) This file is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of the
- *     License, or (at your option) any later version.
+ *     modify it under the terms of the GNU General Public License
+ *     version 2 as published by the Free Software Foundation.
  *
- *     This file is distributed in the hope that it will be useful,
+ *     This file is distributed in the hope that it will be useful
  *     but WITHOUT ANY WARRANTY; without even the implied warranty of
  *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *     GNU General Public License for more details.
  *
- * Or, alternatively,
+ * Or, alternatively
  *
  *  b) Permission is hereby granted, free of charge, to any person
  *     obtaining a copy of this software and associated documentation
  *     files (the "Software"), to deal in the Software without
- *     restriction, including without limitation the rights to use,
+ *     restriction, including without limitation the rights to use
  *     copy, modify, merge, publish, distribute, sublicense, and/or
  *     sell copies of the Software, and to permit persons to whom the
  *     Software is furnished to do so, subject to the following
@@ -32,64 +30,59 @@
  *     The above copyright notice and this permission notice shall be
  *     included in all copies or substantial portions of the Software.
  *
- *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
  *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
  *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
 /dts-v1/;
-/include/ "uniphier-ph1-ld10.dtsi"
-/include/ "uniphier-support-card.dtsi"
+
+#include "imx6q-bx50v3.dtsi"
 
 / {
-	model = "UniPhier PH1-LD10 Reference Board";
-	compatible = "socionext,ph1-ld10-ref", "socionext,ph1-ld10";
-
-	memory {
-		device_type = "memory";
-		reg = <0 0x80000000 0 0xc0000000>;
-	};
+	model = "General Electric B650v3";
+	compatible = "ge,imx6q-b650v3", "advantech,imx6q-ba16", "fsl,imx6q";
 
 	chosen {
-		stdout-path = "serial0:115200n8";
+		stdout-path = &uart3;
 	};
 
-	aliases {
-		serial0 = &serial0;
-		serial1 = &serial1;
-		serial2 = &serial2;
-		serial3 = &serial3;
-		i2c0 = &i2c0;
-		i2c1 = &i2c1;
-		i2c2 = &i2c2;
-		i2c3 = &i2c3;
-		i2c4 = &i2c4;
-		i2c5 = &i2c5;
-		i2c6 = &i2c6;
+	panel-lvds0 {
+		compatible = "innolux,g121x1-l03";
+		backlight = <&backlight_lvds>;
+		power-supply = <&reg_lvds>;
+
+		port {
+			panel_in_lvds0: endpoint {
+				remote-endpoint = <&lvds0_out>;
+			};
+		};
 	};
 };
 
-&extbus {
-	ranges = <1 0x00000000 0x42000000 0x02000000>;
-};
-
-&support_card {
-	ranges = <0x00000000 1 0x01f00000 0x00100000>;
-};
-
-&ethsc {
-	interrupts = <0 48 4>;
-};
-
-&serial0 {
+&ldb {
+	assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>,
+			  <&clks IMX6QDL_CLK_LDB_DI1_SEL>;
+	assigned-clock-parents = <&clks IMX6QDL_CLK_PLL3_USB_OTG>,
+				 <&clks IMX6QDL_CLK_PLL3_USB_OTG>;
 	status = "okay";
-};
 
-&i2c0 {
-	status = "okay";
+	lvds0: lvds-channel@0 {
+		fsl,data-mapping = "spwg";
+		fsl,data-width = <24>;
+		status = "okay";
+
+		port@4 {
+			reg = <4>;
+
+			lvds0_out: endpoint {
+				remote-endpoint = <&panel_in_lvds0>;
+			};
+		};
+	};
 };

diff --git a/arch/arm/boot/dts/imx6q-b850v3.dts b/arch/arm/boot/dts/imx6q-b850v3.dts
new file mode 100644
index 0000000..984d000
--- /dev/null
+++ b/arch/arm/boot/dts/imx6q-b850v3.dts

@@ -0,0 +1,157 @@
+/*
+ * Copyright 2015 Timesys Corporation.
+ * Copyright 2015 General Electric Company
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     version 2 as published by the Free Software Foundation.
+ *
+ *     This file is distributed in the hope that it will be useful
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/dts-v1/;
+
+#include "imx6q-bx50v3.dtsi"
+
+/ {
+	model = "General Electric B850v3";
+	compatible = "ge,imx6q-b850v3", "advantech,imx6q-ba16", "fsl,imx6q";
+
+	chosen {
+		stdout-path = &uart3;
+	};
+
+	panel-lvds0 {
+		compatible = "auo,b133htn01";
+		backlight = <&backlight_lvds>;
+		ddc-i2c-bus = <&mux2_i2c2>;
+
+		port {
+			panel_in_lvds0: endpoint {
+				remote-endpoint = <&lvds0_out>;
+			};
+		};
+	};
+};
+
+&ldb {
+	assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>,
+			  <&clks IMX6QDL_CLK_LDB_DI1_SEL>;
+	assigned-clock-parents = <&clks IMX6QDL_CLK_PLL3_USB_OTG>,
+				 <&clks IMX6QDL_CLK_PLL3_USB_OTG>;
+	fsl,dual-channel;
+	status = "okay";
+
+	lvds0: lvds-channel@0 {
+		fsl,data-mapping = "spwg";
+		fsl,data-width = <24>;
+		status = "okay";
+
+		port@4 {
+			reg = <4>;
+
+			lvds0_out: endpoint {
+				remote-endpoint = <&panel_in_lvds0>;
+			};
+		};
+	};
+};
+
+&i2c2 {
+	pca9547_ddc: mux@70 {
+		compatible = "nxp,pca9547";
+		reg = <0x70>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		mux2_i2c1: i2c@0 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x0>;
+		};
+
+		mux2_i2c2: i2c@1 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x1>;
+		};
+
+		mux2_i2c3: i2c@2 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x2>;
+		};
+
+		mux2_i2c4: i2c@3 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x3>;
+		};
+
+		mux2_i2c5: i2c@4 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x4>;
+		};
+
+		mux2_i2c6: i2c@5 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x5>;
+		};
+
+		mux2_i2c7: i2c@6 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x6>;
+		};
+
+		mux2_i2c8: i2c@7 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x7>;
+		};
+	};
+};
+
+&hdmi {
+	ddc-i2c-bus = <&mux2_i2c1>;
+};
+
+&mux1_i2c1 {
+	ads7830@4a {
+		compatible = "ti,ads7830";
+		reg = <0x4a>;
+	};
+};

diff --git a/arch/arm/boot/dts/imx6q-ba16.dtsi b/arch/arm/boot/dts/imx6q-ba16.dtsi
new file mode 100644
index 0000000..8f6e603
--- /dev/null
+++ b/arch/arm/boot/dts/imx6q-ba16.dtsi

@@ -0,0 +1,632 @@
+/*
+ * Support for imx6 based Advantech DMS-BA16 Qseven module
+ *
+ * Copyright 2015 Timesys Corporation.
+ * Copyright 2015 General Electric Company
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     version 2 as published by the Free Software Foundation.
+ *
+ *     This file is distributed in the hope that it will be useful
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "imx6q.dtsi"
+#include <dt-bindings/gpio/gpio.h>
+
+/ {
+	memory {
+		reg = <0x10000000 0x40000000>;
+	};
+
+	backlight_lvds: backlight {
+		compatible = "pwm-backlight";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_display>;
+		pwms = <&pwm1 0 5000000>;
+		brightness-levels = <  0   1   2   3   4   5   6   7   8   9
+				      10  11  12  13  14  15  16  17  18  19
+				      20  21  22  23  24  25  26  27  28  29
+				      30  31  32  33  34  35  36  37  38  39
+				      40  41  42  43  44  45  46  47  48  49
+				      50  51  52  53  54  55  56  57  58  59
+				      60  61  62  63  64  65  66  67  68  69
+				      70  71  72  73  74  75  76  77  78  79
+				      80  81  82  83  84  85  86  87  88  89
+				      90  91  92  93  94  95  96  97  98  99
+				     100 101 102 103 104 105 106 107 108 109
+				     110 111 112 113 114 115 116 117 118 119
+				     120 121 122 123 124 125 126 127 128 129
+				     130 131 132 133 134 135 136 137 138 139
+				     140 141 142 143 144 145 146 147 148 149
+				     150 151 152 153 154 155 156 157 158 159
+				     160 161 162 163 164 165 166 167 168 169
+				     170 171 172 173 174 175 176 177 178 179
+				     180 181 182 183 184 185 186 187 188 189
+				     190 191 192 193 194 195 196 197 198 199
+				     200 201 202 203 204 205 206 207 208 209
+				     210 211 212 213 214 215 216 217 218 219
+				     220 221 222 223 224 225 226 227 228 229
+				     230 231 232 233 234 235 236 237 238 239
+				     240 241 242 243 244 245 246 247 248 249
+				     250 251 252 253 254 255>;
+		default-brightness-level = <255>;
+		enable-gpios = <&gpio1 0 GPIO_ACTIVE_HIGH>;
+	};
+
+	reg_1p8v: regulator-1p8v {
+		compatible = "regulator-fixed";
+		regulator-name = "1P8V";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+		regulator-always-on;
+	};
+
+	reg_3p3v: regulator-3p3v {
+		compatible = "regulator-fixed";
+		regulator-name = "3P3V";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-always-on;
+	};
+
+	reg_lvds: regulator-lvds {
+		compatible = "regulator-fixed";
+		regulator-name = "lvds_ppen";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-boot-on;
+		gpio = <&gpio3 22 GPIO_ACTIVE_HIGH>;
+		enable-active-high;
+	};
+
+	reg_usb_h1_vbus: regulator-usbh1vbus {
+		compatible = "regulator-fixed";
+		regulator-name = "usb_h1_vbus";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+	};
+
+	reg_usb_otg_vbus: regulator-usbotgvbus {
+		compatible = "regulator-fixed";
+		regulator-name = "usb_otg_vbus";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+	};
+};
+
+&audmux {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_audmux>;
+	status = "okay";
+};
+
+&ecspi1 {
+	fsl,spi-num-chipselects = <1>;
+	cs-gpios = <&gpio2 30 GPIO_ACTIVE_HIGH>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_ecspi1>;
+	status = "okay";
+
+	flash: n25q032@0 {
+		compatible = "jedec,spi-nor";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		spi-max-frequency = <20000000>;
+		reg = <0>;
+
+		partition@0 {
+			label = "U-Boot";
+			reg = <0x0 0xc0000>;
+		};
+
+		partition@c0000 {
+			label = "env";
+			reg = <0xc0000 0x10000>;
+		};
+
+		partition@d0000 {
+			label = "spare";
+			reg = <0xd0000 0x130000>;
+		};
+	};
+};
+
+&fec {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_enet>;
+	phy-mode = "rgmii";
+	status = "okay";
+};
+
+&hdmi {
+	ddc-i2c-bus = <&i2c2>;
+	status = "okay";
+};
+
+&i2c1 {
+	clock-frequency = <100000>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c1>;
+	status = "okay";
+};
+
+&i2c2 {
+	clock-frequency = <100000>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c2>;
+	status = "okay";
+};
+
+&i2c3 {
+	clock-frequency = <100000>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c3>;
+	status = "okay";
+
+	pmic@58 {
+		compatible = "dlg,da9063";
+		reg = <0x58>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_pmic>;
+		interrupt-parent = <&gpio7>;
+		interrupts = <13 IRQ_TYPE_LEVEL_LOW>;
+
+		onkey {
+			compatible = "dlg,da9063-onkey";
+		};
+
+		regulators {
+			vdd_bcore1: bcore1 {
+				regulator-min-microvolt = <1420000>;
+				regulator-max-microvolt = <1420000>;
+				regulator-always-on;
+				regulator-boot-on;
+			};
+
+			vdd_bcore2: bcore2 {
+				regulator-min-microvolt = <1420000>;
+				regulator-max-microvolt = <1420000>;
+				regulator-always-on;
+				regulator-boot-on;
+			};
+
+			vdd_bpro: bpro {
+				regulator-min-microvolt = <1500000>;
+				regulator-max-microvolt = <1500000>;
+				regulator-always-on;
+				regulator-boot-on;
+			};
+
+			vdd_bmem: bmem {
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-always-on;
+				regulator-boot-on;
+			};
+
+			vdd_bio: bio {
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-always-on;
+				regulator-boot-on;
+			};
+
+			vdd_bperi: bperi {
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-always-on;
+				regulator-boot-on;
+			};
+
+			vdd_ldo1: ldo1 {
+				regulator-min-microvolt = <600000>;
+				regulator-max-microvolt = <1860000>;
+			};
+
+			vdd_ldo2: ldo2 {
+				regulator-min-microvolt = <600000>;
+				regulator-max-microvolt = <1860000>;
+			};
+
+			vdd_ldo3: ldo3 {
+				regulator-min-microvolt = <900000>;
+				regulator-max-microvolt = <3440000>;
+			};
+
+			vdd_ldo4: ldo4 {
+				regulator-min-microvolt = <900000>;
+				regulator-max-microvolt = <3440000>;
+			};
+
+			vdd_ldo5: ldo5 {
+				regulator-min-microvolt = <900000>;
+				regulator-max-microvolt = <3600000>;
+			};
+
+			vdd_ldo6: ldo6 {
+				regulator-min-microvolt = <900000>;
+				regulator-max-microvolt = <3600000>;
+			};
+
+			vdd_ldo7: ldo7 {
+				regulator-min-microvolt = <900000>;
+				regulator-max-microvolt = <3600000>;
+			};
+
+			vdd_ldo8: ldo8 {
+				regulator-min-microvolt = <900000>;
+				regulator-max-microvolt = <3600000>;
+			};
+
+			vdd_ldo9: ldo9 {
+				regulator-min-microvolt = <950000>;
+				regulator-max-microvolt = <3600000>;
+			};
+
+			vdd_ldo10: ldo10 {
+				regulator-min-microvolt = <900000>;
+				regulator-max-microvolt = <3600000>;
+			};
+
+			vdd_ldo11: ldo11 {
+				regulator-min-microvolt = <900000>;
+				regulator-max-microvolt = <3600000>;
+				regulator-always-on;
+				regulator-boot-on;
+			};
+		};
+	};
+
+	rtc@32 {
+		compatible = "epson,rx8010";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_rtc>;
+		reg = <0x32>;
+		interrupt-parent = <&gpio4>;
+		interrupts = <10 IRQ_TYPE_LEVEL_HIGH>;
+	};
+};
+
+&pcie {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_pcie>;
+	reset-gpio = <&gpio7 12 GPIO_ACTIVE_HIGH>;
+	status = "okay";
+};
+
+&pwm1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_pwm1>;
+	status = "okay";
+};
+
+&pwm2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_pwm2>;
+	status = "okay";
+};
+
+&sata {
+	status = "okay";
+};
+
+&ssi1 {
+	status = "okay";
+};
+
+&uart3 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart3>;
+	fsl,uart-has-rtscts;
+	status = "okay";
+};
+
+&uart4 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart4>;
+	status = "okay";
+};
+
+&usbh1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usbhub>;
+	vbus-supply = <&reg_usb_h1_vbus>;
+	reset-gpios = <&gpio7 11 GPIO_ACTIVE_HIGH>;
+	status = "okay";
+};
+
+&usbotg {
+	vbus-supply = <&reg_usb_otg_vbus>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usbotg>;
+	disable-over-current;
+	status = "okay";
+};
+
+&usdhc2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usdhc2>;
+	cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
+	no-1-8-v;
+	keep-power-in-suspend;
+	wakeup-source;
+	status = "okay";
+};
+
+&usdhc3 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usdhc3 &pinctrl_usdhc3_reset>;
+	bus-width = <8>;
+	vmmc-supply = <&vdd_bperi>;
+	vqmmc-supply = <&vdd_bio>;
+	non-removable;
+	keep-power-in-suspend;
+	status = "okay";
+};
+
+&wdog1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_wdog>;
+};
+
+&iomuxc {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_hog>;
+
+	pinctrl_audmux: audmuxgrp {
+		fsl,pins = <
+			MX6QDL_PAD_DISP0_DAT20__AUD4_TXC  0x130b0
+			MX6QDL_PAD_DISP0_DAT21__AUD4_TXD  0x130b0
+			MX6QDL_PAD_DISP0_DAT22__AUD4_TXFS 0x130b0
+			MX6QDL_PAD_DISP0_DAT23__AUD4_RXD  0x130b0
+		>;
+	};
+
+	pinctrl_display: dispgrp {
+		fsl,pins = <
+			/* BLEN_OUT */
+			MX6QDL_PAD_GPIO_0__GPIO1_IO00    0x1b0b0
+			/* LVDS_PPEN_OUT */
+			MX6QDL_PAD_EIM_D22__GPIO3_IO22   0x1b0b0
+		>;
+	};
+
+	pinctrl_ecspi1: ecspi1grp {
+		fsl,pins = <
+			MX6QDL_PAD_EIM_D17__ECSPI1_MISO	0x100b1
+			MX6QDL_PAD_EIM_D18__ECSPI1_MOSI	0x100b1
+			MX6QDL_PAD_EIM_D16__ECSPI1_SCLK	0x100b1
+			/* SPI1 CS */
+			MX6QDL_PAD_EIM_EB2__GPIO2_IO30	0x1b0b0
+		>;
+	};
+
+	pinctrl_ecspi5: ecspi5grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD1_DAT0__ECSPI5_MISO	0x1b0b0
+			MX6QDL_PAD_SD1_CMD__ECSPI5_MOSI		0x1b0b0
+			MX6QDL_PAD_SD1_CLK__ECSPI5_SCLK		0x1b0b0
+			MX6QDL_PAD_SD1_DAT1__GPIO1_IO17		0x1b0b0
+		>;
+	};
+
+	pinctrl_enet: enetgrp {
+		fsl,pins = <
+			MX6QDL_PAD_ENET_MDIO__ENET_MDIO       0x100b0
+			MX6QDL_PAD_ENET_MDC__ENET_MDC         0x100b0
+			MX6QDL_PAD_RGMII_TXC__RGMII_TXC       0x100b0
+			MX6QDL_PAD_RGMII_TD0__RGMII_TD0       0x100b0
+			MX6QDL_PAD_RGMII_TD1__RGMII_TD1       0x100b0
+			MX6QDL_PAD_RGMII_TD2__RGMII_TD2       0x100b0
+			MX6QDL_PAD_RGMII_TD3__RGMII_TD3       0x100b0
+			MX6QDL_PAD_RGMII_TX_CTL__RGMII_TX_CTL 0x100b0
+			MX6QDL_PAD_ENET_REF_CLK__ENET_TX_CLK  0x100b0
+			MX6QDL_PAD_RGMII_RXC__RGMII_RXC       0x1b0b0
+			MX6QDL_PAD_RGMII_RD0__RGMII_RD0       0x1b0b0
+			MX6QDL_PAD_RGMII_RD1__RGMII_RD1       0x1b0b0
+			MX6QDL_PAD_RGMII_RD2__RGMII_RD2       0x1b0b0
+			MX6QDL_PAD_RGMII_RD3__RGMII_RD3       0x1b0b0
+			MX6QDL_PAD_RGMII_RX_CTL__RGMII_RX_CTL 0x1b0b0
+			/* FEC Reset */
+			MX6QDL_PAD_ENET_TX_EN__GPIO1_IO28     0x1b0b0
+			/* AR8033 Interrupt */
+			MX6QDL_PAD_GPIO_19__GPIO4_IO05        0x1b0b0
+		>;
+	};
+
+	pinctrl_hog: hoggrp {
+		fsl,pins = <
+			/* GPIO 0-7 */
+			MX6QDL_PAD_NANDF_D0__GPIO2_IO00  0x1b0b0
+			MX6QDL_PAD_NANDF_D1__GPIO2_IO01  0x1b0b0
+			MX6QDL_PAD_NANDF_D2__GPIO2_IO02  0x1b0b0
+			MX6QDL_PAD_NANDF_D3__GPIO2_IO03  0x1b0b0
+			MX6QDL_PAD_NANDF_D4__GPIO2_IO04  0x1b0b0
+			MX6QDL_PAD_NANDF_D5__GPIO2_IO05  0x1b0b0
+			MX6QDL_PAD_NANDF_D6__GPIO2_IO06  0x1b0b0
+			MX6QDL_PAD_NANDF_D7__GPIO2_IO07  0x1b0b0
+			/* SUS_S3_OUT to CPLD */
+			MX6QDL_PAD_KEY_ROW2__GPIO4_IO11  0x1b0b0
+		>;
+	};
+
+	pinctrl_i2c1: i2c1grp {
+		fsl,pins = <
+			MX6QDL_PAD_CSI0_DAT8__I2C1_SDA	0x4001b8b1
+			MX6QDL_PAD_CSI0_DAT9__I2C1_SCL	0x4001b8b1
+		>;
+	};
+
+	pinctrl_i2c2: i2c2grp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_COL3__I2C2_SCL	0x4001b8b1
+			MX6QDL_PAD_KEY_ROW3__I2C2_SDA	0x4001b8b1
+		>;
+	};
+
+	pinctrl_i2c3: i2c3grp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_3__I2C3_SCL	0x4001b8b1
+			MX6QDL_PAD_GPIO_6__I2C3_SDA	0x4001b8b1
+		>;
+	};
+
+	pinctrl_pcie: pciegrp {
+		fsl,pins = <
+			/* PCIe Reset */
+			MX6QDL_PAD_GPIO_17__GPIO7_IO12	0x1b0b0
+			/* PCIe Wake */
+			MX6QDL_PAD_GPIO_5__GPIO1_IO05	0x1b0b0
+		>;
+	};
+
+	pinctrl_pmic: pmicgrp {
+		fsl,pins = <
+			/* PMIC Interrupt */
+			MX6QDL_PAD_GPIO_18__GPIO7_IO13	0x1b0b0
+		>;
+	};
+
+	pinctrl_pwm1: pwm1grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD1_DAT3__PWM1_OUT	0x1b0b1
+		>;
+	};
+
+	pinctrl_pwm2: pwm2grp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_1__PWM2_OUT	0x1b0b1
+		>;
+	};
+
+	pinctrl_rtc: rtcgrp {
+		fsl,pins = <
+			/* RTC_INT */
+			MX6QDL_PAD_KEY_COL2__GPIO4_IO10	0x1b0b0
+		>;
+	};
+
+	pinctrl_uart3: uart3grp {
+		fsl,pins = <
+			MX6QDL_PAD_EIM_D25__UART3_RX_DATA 0x1b0b1
+			MX6QDL_PAD_EIM_D24__UART3_TX_DATA 0x1b0b1
+			MX6QDL_PAD_EIM_D23__UART3_CTS_B   0x1b0b1
+			MX6QDL_PAD_EIM_D31__UART3_RTS_B   0x1b0b1
+		>;
+	};
+
+	pinctrl_uart4: uart4grp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_COL0__UART4_TX_DATA 0x1b0b1
+			MX6QDL_PAD_KEY_ROW0__UART4_RX_DATA 0x1b0b1
+		>;
+	};
+
+	pinctrl_usbhub: usbhubgrp {
+		fsl,pins = <
+			/* HUB_RESET */
+			MX6QDL_PAD_GPIO_16__GPIO7_IO11	0x1b0b0
+		>;
+	};
+
+	pinctrl_usbotg: usbotggrp {
+		fsl,pins = <
+			MX6QDL_PAD_ENET_RX_ER__USB_OTG_ID 0x17059
+		>;
+	};
+
+	pinctrl_usdhc2: usdhc2grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD2_CMD__SD2_CMD	0x17059
+			MX6QDL_PAD_SD2_CLK__SD2_CLK	0x10059
+			MX6QDL_PAD_SD2_DAT0__SD2_DATA0	0x17059
+			MX6QDL_PAD_SD2_DAT1__SD2_DATA1	0x17059
+			MX6QDL_PAD_SD2_DAT2__SD2_DATA2	0x17059
+			MX6QDL_PAD_SD2_DAT3__SD2_DATA3	0x17059
+			/* uSDHC2 CD */
+			MX6QDL_PAD_GPIO_4__GPIO1_IO04	0x1b0b0
+		>;
+	};
+
+	pinctrl_usdhc3: usdhc3grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD3_CMD__SD3_CMD	0x17059
+			MX6QDL_PAD_SD3_CLK__SD3_CLK	0x10059
+			MX6QDL_PAD_SD3_DAT0__SD3_DATA0	0x17059
+			MX6QDL_PAD_SD3_DAT1__SD3_DATA1	0x17059
+			MX6QDL_PAD_SD3_DAT2__SD3_DATA2	0x17059
+			MX6QDL_PAD_SD3_DAT3__SD3_DATA3	0x17059
+			MX6QDL_PAD_SD3_DAT4__SD3_DATA4	0x17059
+			MX6QDL_PAD_SD3_DAT5__SD3_DATA5	0x17059
+			MX6QDL_PAD_SD3_DAT6__SD3_DATA6	0x17059
+			MX6QDL_PAD_SD3_DAT7__SD3_DATA7	0x17059
+		>;
+	};
+
+	pinctrl_usdhc3_reset: usdhc3grp-reset {
+		fsl,pins = <
+			MX6QDL_PAD_SD3_RST__SD3_RESET   0x170F9
+		>;
+	};
+
+	pinctrl_usdhc4: usdhc4grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD4_CMD__SD4_CMD	0x17059
+			MX6QDL_PAD_SD4_CLK__SD4_CLK	0x17059
+			MX6QDL_PAD_SD4_DAT0__SD4_DATA0	0x17059
+			MX6QDL_PAD_SD4_DAT1__SD4_DATA1	0x17059
+			MX6QDL_PAD_SD4_DAT2__SD4_DATA2	0x17059
+			MX6QDL_PAD_SD4_DAT3__SD4_DATA3	0x17059
+			MX6QDL_PAD_SD4_DAT4__SD4_DATA4	0x17059
+			MX6QDL_PAD_SD4_DAT5__SD4_DATA5	0x17059
+			MX6QDL_PAD_SD4_DAT6__SD4_DATA6	0x17059
+			MX6QDL_PAD_SD4_DAT7__SD4_DATA7	0x17059
+			/* uSDHC4 CD */
+			MX6QDL_PAD_NANDF_CS0__GPIO6_IO11 0x1b0b0
+			/* uSDHC4 SDIO PWR */
+			MX6QDL_PAD_NANDF_CS1__GPIO6_IO14 0x1b0b0
+			/* uSDHC4 SDIO WP */
+			MX6QDL_PAD_NANDF_CS2__GPIO6_IO15 0x1b0b0
+			/* uSDHC4 SDIO LED */
+			MX6QDL_PAD_NANDF_CS3__GPIO6_IO16 0x1b0b0
+		>;
+	};
+
+	pinctrl_wdog: wdoggrp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_9__WDOG1_B	0x1b0b0
+		>;
+	};
+};

diff --git a/arch/arm/boot/dts/imx6q-bx50v3.dtsi b/arch/arm/boot/dts/imx6q-bx50v3.dtsi
new file mode 100644
index 0000000..bb66dfd
--- /dev/null
+++ b/arch/arm/boot/dts/imx6q-bx50v3.dtsi

@@ -0,0 +1,225 @@
+/*
+ * Copyright 2015 Timesys Corporation.
+ * Copyright 2015 General Electric Company
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     version 2 as published by the Free Software Foundation.
+ *
+ *     This file is distributed in the hope that it will be useful
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "imx6q-ba16.dtsi"
+
+/ {
+	clocks {
+		mclk: clock@0 {
+			compatible = "fixed-clock";
+			reg = <0>;
+			#clock-cells = <0>;
+			clock-frequency = <22000000>;
+		};
+	};
+
+	reg_wl18xx_vmmc: regulator-wl18xx {
+		compatible = "regulator-fixed";
+		regulator-name = "vwl1807";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		gpio = <&pca9539 3 GPIO_ACTIVE_HIGH>;
+		startup-delay-us = <70000>;
+		enable-active-high;
+	};
+
+	reg_wlan: regulator-wlan {
+		compatible = "regulator-fixed";
+		regulator-name = "3P3V_wlan";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-always-on;
+		regulator-boot-on;
+		gpio = <&gpio6 14 GPIO_ACTIVE_HIGH>;
+	};
+
+	sound {
+		compatible = "fsl,imx6q-ba16-sgtl5000",
+			     "fsl,imx-audio-sgtl5000";
+		model = "imx6q-ba16-sgtl5000";
+		ssi-controller = <&ssi1>;
+		audio-codec = <&sgtl5000>;
+		audio-routing =
+			"MIC_IN", "Mic Jack",
+			"Mic Jack", "Mic Bias",
+			"LINE_IN", "Line In Jack",
+			"Headphone Jack", "HP_OUT";
+		mux-int-port = <1>;
+		mux-ext-port = <4>;
+	};
+};
+
+&ecspi5 {
+	fsl,spi-num-chipselects = <1>;
+	cs-gpios = <&gpio1 17 GPIO_ACTIVE_HIGH>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_ecspi5>;
+	status = "okay";
+
+	m25_eeprom: m25p80@0 {
+		compatible = "atmel,at25";
+		spi-max-frequency = <20000000>;
+		size = <0x8000>;
+		pagesize = <64>;
+		reg = <0>;
+		address-width = <16>;
+	};
+};
+
+&i2c1 {
+	pca9547: mux@70 {
+		compatible = "nxp,pca9547";
+		reg = <0x70>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		mux1_i2c1: i2c@0 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x0>;
+
+			ads7830: ads7830@48 {
+				compatible = "ti,ads7830";
+				reg = <0x48>;
+			};
+
+			mma8453: mma8453@1c {
+				compatible = "fsl,mma8453";
+				reg = <0x1c>;
+			};
+		};
+
+		mux1_i2c2: i2c@1 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x1>;
+
+			eeprom: eeprom@50 {
+				compatible = "atmel,24c08";
+				reg = <0x50>;
+			};
+
+			mpl3115: mpl3115@60 {
+				compatible = "fsl,mpl3115";
+				reg = <0x60>;
+			};
+		};
+
+		mux1_i2c3: i2c@2 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x2>;
+		};
+
+		mux1_i2c4: i2c@3 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x3>;
+
+			sgtl5000: codec@0a {
+				compatible = "fsl,sgtl5000";
+				reg = <0x0a>;
+				clocks = <&mclk>;
+				VDDA-supply = <&reg_1p8v>;
+				VDDIO-supply = <&reg_3p3v>;
+			};
+		};
+
+		mux1_i2c5: i2c@4 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x4>;
+
+			pca9539: pca9539@74 {
+				compatible = "nxp,pca9539";
+				reg = <0x74>;
+				gpio-controller;
+				#gpio-cells = <2>;
+				interrupt-controller;
+				interrupt-parent = <&gpio2>;
+				interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
+			};
+		};
+
+		mux1_i2c6: i2c@5 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x5>;
+		};
+
+		mux1_i2c7: i2c@6 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x6>;
+		};
+
+		mux1_i2c8: i2c@7 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x7>;
+		};
+	};
+};
+
+&usdhc4 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usdhc4>;
+	bus-width = <4>;
+	vmmc-supply = <&reg_wl18xx_vmmc>;
+	no-1-8-v;
+	non-removable;
+	wakeup-source;
+	keep-power-in-suspend;
+	cap-power-off-card;
+	max-frequency = <25000000>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+	status = "okay";
+
+	wlcore: wlcore@2 {
+		compatible = "ti,wl1837";
+		reg = <2>;
+		interrupt-parent = <&gpio2>;
+		interrupts = <6 IRQ_TYPE_LEVEL_HIGH>;
+		tcxo-clock-frequency = <26000000>;
+	};
+};

diff --git a/arch/arm/boot/dts/imx6q-evi.dts b/arch/arm/boot/dts/imx6q-evi.dts
new file mode 100644
index 0000000..4fa5601
--- /dev/null
+++ b/arch/arm/boot/dts/imx6q-evi.dts

@@ -0,0 +1,502 @@
+/*
+ * Copyright 2016 United Western Technologies.
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This file is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/dts-v1/;
+#include "imx6q.dtsi"
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/ {
+	model = "Uniwest Evi";
+	compatible = "uniwest,imx6q-evi", "fsl,imx6q";
+
+	memory {
+		reg = <0x10000000 0x40000000>;
+	};
+
+	reg_usbh1_vbus: regulator-usbhubreset {
+		compatible = "regulator-fixed";
+		regulator-name = "usbh1_vbus";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		enable-active-high;
+		startup-delay-us = <2>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_usbh1_hubreset>;
+		gpio = <&gpio7 12 GPIO_ACTIVE_HIGH>;
+	};
+
+	reg_usb_otg_vbus: regulator-usbotgvbus {
+		compatible = "regulator-fixed";
+		regulator-name = "usb_otg_vbus";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_usbotgvbus>;
+		gpio = <&gpio4 15 GPIO_ACTIVE_HIGH>;
+		enable-active-high;
+		regulator-always-on;
+	};
+
+	panel {
+		compatible = "sharp,lq101k1ly04";
+
+		port {
+			panel_in: endpoint {
+				remote-endpoint = <&lvds0_out>;
+			};
+		};
+	};
+};
+
+&ecspi1 {
+	fsl,spi-num-chipselects = <1>;
+	cs-gpios = <&gpio4 10 GPIO_ACTIVE_LOW>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_ecspi1 &pinctrl_ecspi1cs>;
+	status = "okay";
+};
+
+&ecspi3 {
+	fsl,spi-num-chipselects = <3>;
+	cs-gpios = <&gpio4 24 GPIO_ACTIVE_LOW>,
+		<&gpio4 25 GPIO_ACTIVE_LOW>,
+		<&gpio4 26 GPIO_ACTIVE_LOW>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_ecspi3 &pinctrl_ecspi3cs>;
+	status = "okay";
+};
+
+&ecspi5 {
+	fsl,spi-num-chipselects = <4>;
+	cs-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>,
+		<&gpio1 13 GPIO_ACTIVE_LOW>,
+		<&gpio1 12 GPIO_ACTIVE_LOW>,
+		<&gpio2 9 GPIO_ACTIVE_HIGH>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_ecspi5 &pinctrl_ecspi5cs>;
+	status = "okay";
+
+	eeprom: m95m02@1 {
+		compatible = "st,m95m02", "atmel,at25";
+		size = <262144>;
+		pagesize = <256>;
+		address-width = <24>;
+		spi-max-frequency = <5000000>;
+		reg = <1>;
+	};
+
+	pb_rtc: rtc@3 {
+		compatible = "nxp,rtc-pcf2123";
+		spi-max-frequency = <2450000>;
+		spi-cs-high;
+		reg = <3>;
+	};
+};
+
+&fec {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_enet>;
+	phy-mode = "rgmii";
+	phy-reset-gpios = <&gpio1 25 0>;
+	status = "okay";
+};
+
+&gpmi {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_gpminand>;
+	status = "okay";
+};
+
+&i2c2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c2>;
+	clock-frequency = <100000>;
+	status = "okay";
+};
+
+&i2c3 {
+	pinctrl-names = "default", "gpio";
+	pinctrl-0 = <&pinctrl_i2c3>;
+	pinctrl-1 = <&pinctrl_i2c3_gpio>;
+	clock-frequency = <100000>;
+	scl-gpios = <&gpio1 5 GPIO_ACTIVE_HIGH>;
+	sda-gpios = <&gpio7 11 GPIO_ACTIVE_HIGH>;
+	status = "okay";
+
+	battery: sbs-battery@b {
+		compatible = "sbs,sbs-battery";
+		reg = <0x0b>;
+		sbs,poll-retry-count = <100>;
+		sbs,i2c-retry-count = <100>;
+	};
+};
+
+&ldb {
+	status = "okay";
+
+	lvds0: lvds-channel@0 {
+		status = "okay";
+
+		port@4 {
+			reg = <4>;
+			lvds0_out: endpoint {
+				remote-endpoint = <&panel_in>;
+			};
+		};
+	};
+};
+
+&ssi1 {
+	status = "okay";
+};
+
+&uart1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart1>;
+	status = "okay";
+};
+
+&uart2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart2>;
+	status = "okay";
+};
+
+&usbh1 {
+	vbus-supply = <&reg_usbh1_vbus>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usbh1>;
+	dr_mode = "host";
+	disable-over-current;
+	status = "okay";
+};
+
+&usbotg {
+	vbus-supply = <&reg_usb_otg_vbus>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usbotg>;
+	disable-over-current;
+	dr_mode = "otg";
+	status = "okay";
+};
+
+&usdhc1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usdhc1>;
+	non-removable;
+	status = "okay";
+};
+
+&weim {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	ranges = <0 0 0x08000000 0x08000000>;
+	fsl,weim-cs-gpr = <&gpr>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_weimfpga &pinctrl_weimcs>;
+	status = "okay";
+};
+
+&iomuxc {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_hog>;
+
+	pinctrl_hog: hoggrp {
+		fsl,pins = <
+			/* pwr mcu alert irq */
+			MX6QDL_PAD_SD4_DAT2__GPIO2_IO10 0x1b0b0
+			/* remainder ???? */
+			MX6QDL_PAD_CSI0_MCLK__GPIO5_IO19 0x1b0b0
+		>;
+	};
+
+	pinctrl_ecspi1: ecspi1grp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_COL1__ECSPI1_MISO 0x100b1
+			MX6QDL_PAD_KEY_ROW0__ECSPI1_MOSI 0x100b1
+			MX6QDL_PAD_KEY_COL0__ECSPI1_SCLK 0x100b1
+		>;
+	};
+
+	pinctrl_ecspi1cs: ecspi1csgrp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_COL2__GPIO4_IO10 0x1b0b0
+		>;
+	};
+
+	pinctrl_ecspi3: ecspi3grp {
+		fsl,pins = <
+			MX6QDL_PAD_DISP0_DAT0__ECSPI3_SCLK 0x10068
+			MX6QDL_PAD_DISP0_DAT1__ECSPI3_MOSI 0x10068
+			MX6QDL_PAD_DISP0_DAT2__ECSPI3_MISO 0x1f068
+		>;
+	};
+
+	pinctrl_ecspi3cs: ecspi3csgrp {
+		fsl,pins = <
+			MX6QDL_PAD_DISP0_DAT3__GPIO4_IO24 0x1b0b0
+			MX6QDL_PAD_DISP0_DAT4__GPIO4_IO25 0x1b0b0
+			MX6QDL_PAD_DISP0_DAT5__GPIO4_IO26 0x1b0b0
+			MX6QDL_PAD_DISP0_DAT6__GPIO4_IO27 0x1b0b0
+		>;
+	};
+
+	pinctrl_ecspi5: ecspi5grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD2_CLK__ECSPI5_SCLK 0x100b1
+			MX6QDL_PAD_SD2_CMD__ECSPI5_MOSI 0x100b1
+			MX6QDL_PAD_SD2_DAT0__ECSPI5_MISO 0x100b1
+		>;
+	};
+
+	pinctrl_ecspi5cs: ecspi5csgrp {
+		fsl,pins = <
+			MX6QDL_PAD_SD2_DAT1__GPIO1_IO14 0x1b0b0
+			MX6QDL_PAD_SD2_DAT2__GPIO1_IO13 0x1b0b0
+			MX6QDL_PAD_SD2_DAT3__GPIO1_IO12 0x1b0b0
+			MX6QDL_PAD_SD4_DAT1__GPIO2_IO09 0x1b0b0
+		>;
+	};
+
+	pinctrl_enet: enetgrp {
+		fsl,pins = <
+			MX6QDL_PAD_ENET_MDIO__ENET_MDIO 0x1b0b0
+			MX6QDL_PAD_ENET_MDC__ENET_MDC 0x1b0b0
+			MX6QDL_PAD_RGMII_TXC__RGMII_TXC 0x1b0b0
+			MX6QDL_PAD_RGMII_TD0__RGMII_TD0 0x1b0b0
+			MX6QDL_PAD_RGMII_TD1__RGMII_TD1 0x1b0b0
+			MX6QDL_PAD_RGMII_TD2__RGMII_TD2 0x1b0b0
+			MX6QDL_PAD_RGMII_TD3__RGMII_TD3 0x1b0b0
+			MX6QDL_PAD_RGMII_TX_CTL__RGMII_TX_CTL 0x1b0b0
+			MX6QDL_PAD_ENET_REF_CLK__ENET_TX_CLK 0x1b0b0
+			MX6QDL_PAD_RGMII_RXC__RGMII_RXC 0x1b0b0
+			MX6QDL_PAD_RGMII_RD0__RGMII_RD0 0x1b0b0
+			MX6QDL_PAD_RGMII_RD1__RGMII_RD1 0x1b0b0
+			MX6QDL_PAD_RGMII_RD2__RGMII_RD2 0x1b0b0
+			MX6QDL_PAD_RGMII_RD3__RGMII_RD3 0x1b0b0
+			MX6QDL_PAD_RGMII_RX_CTL__RGMII_RX_CTL 0x1b0b0
+			MX6QDL_PAD_ENET_TX_EN__ENET_TX_EN 0x4001b0a8
+			MX6QDL_PAD_ENET_CRS_DV__GPIO1_IO25 0x1b0b0
+		>;
+	};
+
+	pinctrl_gpminand: gpminandgrp {
+		fsl,pins = <
+			MX6QDL_PAD_NANDF_CLE__NAND_CLE 0xb0b1
+			MX6QDL_PAD_NANDF_ALE__NAND_ALE 0xb0b1
+			MX6QDL_PAD_NANDF_WP_B__NAND_WP_B 0xb0b1
+			MX6QDL_PAD_NANDF_RB0__NAND_READY_B 0xb000
+			MX6QDL_PAD_NANDF_CS0__NAND_CE0_B 0xb0b1
+			MX6QDL_PAD_NANDF_CS1__NAND_CE1_B 0xb0b1
+			MX6QDL_PAD_SD4_CMD__NAND_RE_B 0xb0b1
+			MX6QDL_PAD_SD4_CLK__NAND_WE_B 0xb0b1
+			MX6QDL_PAD_NANDF_D0__NAND_DATA00 0xb0b1
+			MX6QDL_PAD_NANDF_D1__NAND_DATA01 0xb0b1
+			MX6QDL_PAD_NANDF_D2__NAND_DATA02 0xb0b1
+			MX6QDL_PAD_NANDF_D3__NAND_DATA03 0xb0b1
+			MX6QDL_PAD_NANDF_D4__NAND_DATA04 0xb0b1
+			MX6QDL_PAD_NANDF_D5__NAND_DATA05 0xb0b1
+			MX6QDL_PAD_NANDF_D6__NAND_DATA06 0xb0b1
+			MX6QDL_PAD_NANDF_D7__NAND_DATA07 0xb0b1
+			MX6QDL_PAD_SD4_DAT0__NAND_DQS 0x00b1
+		>;
+	};
+
+	pinctrl_i2c2: i2c2grp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_ROW3__I2C2_SDA 0x4001b8b1
+			MX6QDL_PAD_KEY_COL3__I2C2_SCL 0x4001b8b1
+		>;
+	};
+
+	pinctrl_i2c3: i2c3grp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_5__I2C3_SCL 0x4001b8b1
+			MX6QDL_PAD_GPIO_16__I2C3_SDA 0x4001b8b1
+		>;
+	};
+
+	pinctrl_i2c3_gpio: i2c3gpiogrp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_5__GPIO1_IO05 0x4001b8b1
+			MX6QDL_PAD_GPIO_16__GPIO7_IO11 0x4001b8b1
+		>;
+	};
+
+	pinctrl_weimcs: weimcsgrp {
+		fsl,pins = <
+			MX6QDL_PAD_EIM_CS0__EIM_CS0_B 0xb0b1
+			MX6QDL_PAD_EIM_CS1__EIM_CS1_B 0xb0b1
+		>;
+	};
+
+	pinctrl_weimfpga: weimfpgagrp {
+		fsl,pins = <
+			/* weim misc */
+			MX6QDL_PAD_EIM_OE__EIM_OE_B 0xb0b1
+			MX6QDL_PAD_EIM_RW__EIM_RW 0xb0b1
+			MX6QDL_PAD_EIM_WAIT__EIM_WAIT_B 0xb060
+			MX6QDL_PAD_EIM_BCLK__EIM_BCLK 0xb0b1
+			MX6QDL_PAD_EIM_LBA__EIM_LBA_B 0xb0b1
+			MX6QDL_PAD_EIM_EB0__EIM_EB0_B 0xb0b1
+			MX6QDL_PAD_EIM_EB1__EIM_EB1_B 0xb0b1
+			MX6QDL_PAD_EIM_EB2__EIM_EB2_B 0xb0b1
+			MX6QDL_PAD_EIM_EB3__EIM_EB3_B 0xb0b1
+			/* weim data */
+			MX6QDL_PAD_CSI0_DATA_EN__EIM_DATA00 0x1b0b0
+			MX6QDL_PAD_CSI0_VSYNC__EIM_DATA01 0x1b0b0
+			MX6QDL_PAD_CSI0_DAT4__EIM_DATA02 0x1b0b0
+			MX6QDL_PAD_CSI0_DAT5__EIM_DATA03 0x1b0b0
+			MX6QDL_PAD_CSI0_DAT6__EIM_DATA04 0x1b0b0
+			MX6QDL_PAD_CSI0_DAT7__EIM_DATA05 0x1b0b0
+			MX6QDL_PAD_CSI0_DAT8__EIM_DATA06 0x1b0b0
+			MX6QDL_PAD_CSI0_DAT9__EIM_DATA07 0x1b0b0
+			MX6QDL_PAD_CSI0_DAT12__EIM_DATA08 0x1b0b0
+			MX6QDL_PAD_CSI0_DAT13__EIM_DATA09 0x1b0b0
+			MX6QDL_PAD_CSI0_DAT14__EIM_DATA10 0x1b0b0
+			MX6QDL_PAD_CSI0_DAT15__EIM_DATA11 0x1b0b0
+			MX6QDL_PAD_CSI0_DAT16__EIM_DATA12 0x1b0b0
+			MX6QDL_PAD_CSI0_DAT17__EIM_DATA13 0x1b0b0
+			MX6QDL_PAD_CSI0_DAT18__EIM_DATA14 0x1b0b0
+			MX6QDL_PAD_CSI0_DAT19__EIM_DATA15 0x1b0b0
+			MX6QDL_PAD_EIM_D16__EIM_DATA16 0x1b0b0
+			MX6QDL_PAD_EIM_D17__EIM_DATA17 0x1b0b0
+			MX6QDL_PAD_EIM_D18__EIM_DATA18 0x1b0b0
+			MX6QDL_PAD_EIM_D19__EIM_DATA19 0x1b0b0
+			MX6QDL_PAD_EIM_D20__EIM_DATA20 0x1b0b0
+			MX6QDL_PAD_EIM_D21__EIM_DATA21 0x1b0b0
+			MX6QDL_PAD_EIM_D22__EIM_DATA22 0x1b0b0
+			MX6QDL_PAD_EIM_D23__EIM_DATA23 0x1b0b0
+			MX6QDL_PAD_EIM_D24__EIM_DATA24 0x1b0b0
+			MX6QDL_PAD_EIM_D25__EIM_DATA25 0x1b0b0
+			MX6QDL_PAD_EIM_D26__EIM_DATA26 0x1b0b0
+			MX6QDL_PAD_EIM_D27__EIM_DATA27 0x1b0b0
+			MX6QDL_PAD_EIM_D28__EIM_DATA28 0x1b0b0
+			MX6QDL_PAD_EIM_D29__EIM_DATA29 0x1b0b0
+			MX6QDL_PAD_EIM_D30__EIM_DATA30 0x1b0b0
+			MX6QDL_PAD_EIM_D31__EIM_DATA31 0x1b0b0
+			/* weim address */
+			MX6QDL_PAD_EIM_A25__EIM_ADDR25 0xb0b1
+			MX6QDL_PAD_EIM_A24__EIM_ADDR24 0xb0b1
+			MX6QDL_PAD_EIM_A23__EIM_ADDR23 0xb0b1
+			MX6QDL_PAD_EIM_A22__EIM_ADDR22 0xb0b1
+			MX6QDL_PAD_EIM_A21__EIM_ADDR21 0xb0b1
+			MX6QDL_PAD_EIM_A20__EIM_ADDR20 0xb0b1
+			MX6QDL_PAD_EIM_A19__EIM_ADDR19 0xb0b1
+			MX6QDL_PAD_EIM_A18__EIM_ADDR18 0xb0b1
+			MX6QDL_PAD_EIM_A17__EIM_ADDR17 0xb0b1
+			MX6QDL_PAD_EIM_A16__EIM_ADDR16 0xb0b1
+			MX6QDL_PAD_EIM_DA15__EIM_AD15 0xb0b1
+			MX6QDL_PAD_EIM_DA14__EIM_AD14 0xb0b1
+			MX6QDL_PAD_EIM_DA13__EIM_AD13 0xb0b1
+			MX6QDL_PAD_EIM_DA12__EIM_AD12 0xb0b1
+			MX6QDL_PAD_EIM_DA11__EIM_AD11 0xb0b1
+			MX6QDL_PAD_EIM_DA10__EIM_AD10 0xb0b1
+			MX6QDL_PAD_EIM_DA9__EIM_AD09 0xb0b1
+			MX6QDL_PAD_EIM_DA8__EIM_AD08 0xb0b1
+			MX6QDL_PAD_EIM_DA7__EIM_AD07 0xb0b1
+			MX6QDL_PAD_EIM_DA6__EIM_AD06 0xb0b1
+			MX6QDL_PAD_EIM_DA5__EIM_AD05 0xb0b1
+			MX6QDL_PAD_EIM_DA4__EIM_AD04 0xb0b1
+			MX6QDL_PAD_EIM_DA3__EIM_AD03 0xb0b1
+			MX6QDL_PAD_EIM_DA2__EIM_AD02 0xb0b1
+			MX6QDL_PAD_EIM_DA1__EIM_AD01 0xb0b1
+			MX6QDL_PAD_EIM_DA0__EIM_AD00 0xb0b1
+		>;
+	};
+
+	pinctrl_uart1: uart1grp {
+		fsl,pins = <
+			MX6QDL_PAD_CSI0_DAT10__UART1_TX_DATA 0x1b0b1
+			MX6QDL_PAD_CSI0_DAT11__UART1_RX_DATA 0x1b0b1
+		>;
+	};
+
+	pinctrl_uart2: uart2grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD3_DAT5__UART2_TX_DATA 0x1b0b1
+			MX6QDL_PAD_SD3_DAT4__UART2_RX_DATA 0x1b0b1
+			MX6QDL_PAD_SD3_CLK__UART2_RTS_B 0x1b0b1
+			MX6QDL_PAD_SD3_CMD__UART2_CTS_B 0x1b0b1
+		>;
+	};
+
+	pinctrl_usbh1: usbh1grp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_3__USB_H1_OC 0x1b0b0
+			/* usbh1_b OC */
+			MX6QDL_PAD_GPIO_0__GPIO1_IO00 0x1b0b0
+		>;
+	};
+
+	pinctrl_usbh1_hubreset: usbh1hubresetgrp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_17__GPIO7_IO12 0x1b0b0
+		>;
+	};
+
+	pinctrl_usbotg: usbotggrp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_1__USB_OTG_ID 0x17059
+			MX6QDL_PAD_KEY_COL4__USB_OTG_OC 0x1b0b0
+		>;
+	};
+
+	pinctrl_usbotgvbus: usbotgvbusgrp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_ROW4__GPIO4_IO15 0x000b0
+		>;
+	};
+
+	pinctrl_usdhc1: usdhc1grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD1_CMD__SD1_CMD 0x17059
+			MX6QDL_PAD_SD1_CLK__SD1_CLK 0x10059
+			MX6QDL_PAD_SD1_DAT0__SD1_DATA0 0x17059
+			MX6QDL_PAD_SD1_DAT1__SD1_DATA1 0x17059
+			MX6QDL_PAD_SD1_DAT2__SD1_DATA2 0x17059
+			MX6QDL_PAD_SD1_DAT3__SD1_DATA3 0x17059
+		>;
+	};
+};

diff --git a/arch/arm/boot/dts/imx6q-gk802.dts b/arch/arm/boot/dts/imx6q-gk802.dts
index 00bd63e..b715deb 100644
--- a/arch/arm/boot/dts/imx6q-gk802.dts
+++ b/arch/arm/boot/dts/imx6q-gk802.dts

@@ -44,7 +44,7 @@
 			label = "recovery";
 			gpios = <&gpio3 16 1>;
 			linux,code = <0x198>; /* KEY_RESTART */
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 };

diff --git a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld10-ref.dts b/arch/arm/boot/dts/imx6q-icore-rqs.dts
similarity index 61%
copy from arch/arm64/boot/dts/socionext/uniphier-ph1-ld10-ref.dts
copy to arch/arm/boot/dts/imx6q-icore-rqs.dts
index 3e53317..0053188 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld10-ref.dts
+++ b/arch/arm/boot/dts/imx6q-icore-rqs.dts

@@ -1,7 +1,5 @@
 /*
- * Device Tree Source for UniPhier PH1-LD10 Reference Board
- *
- * Copyright (C) 2015 Masahiro Yamada <yamada.masahiro@socionext.com>
+ * Copyright (C) 2015 Amarula Solutions B.V.
  *
  * This file is dual-licensed: you can use it either under the terms
  * of the GPL or the X11 license, at your option. Note that this dual
@@ -9,21 +7,20 @@
  * whole.
  *
  *  a) This file is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of the
- *     License, or (at your option) any later version.
+ *     modify it under the terms of the GNU General Public License
+ *     version 2 as published by the Free Software Foundation.
  *
- *     This file is distributed in the hope that it will be useful,
+ *     This file is distributed in the hope that it will be useful
  *     but WITHOUT ANY WARRANTY; without even the implied warranty of
  *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *     GNU General Public License for more details.
  *
- * Or, alternatively,
+ * Or, alternatively
  *
  *  b) Permission is hereby granted, free of charge, to any person
  *     obtaining a copy of this software and associated documentation
  *     files (the "Software"), to deal in the Software without
- *     restriction, including without limitation the rights to use,
+ *     restriction, including without limitation the rights to use
  *     copy, modify, merge, publish, distribute, sublicense, and/or
  *     sell copies of the Software, and to permit persons to whom the
  *     Software is furnished to do so, subject to the following
@@ -32,64 +29,50 @@
  *     The above copyright notice and this permission notice shall be
  *     included in all copies or substantial portions of the Software.
  *
- *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
  *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
  *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
 /dts-v1/;
-/include/ "uniphier-ph1-ld10.dtsi"
-/include/ "uniphier-support-card.dtsi"
+
+#include "imx6q.dtsi"
+#include "imx6qdl-icore-rqs.dtsi"
 
 / {
-	model = "UniPhier PH1-LD10 Reference Board";
-	compatible = "socionext,ph1-ld10-ref", "socionext,ph1-ld10";
+	model = "Engicam i.CoreM6 Quad SOM";
+	compatible = "engicam,imx6-icore-rqs", "fsl,imx6q";
 
-	memory {
-		device_type = "memory";
-		reg = <0 0x80000000 0 0xc0000000>;
-	};
-
-	chosen {
-		stdout-path = "serial0:115200n8";
-	};
-
-	aliases {
-		serial0 = &serial0;
-		serial1 = &serial1;
-		serial2 = &serial2;
-		serial3 = &serial3;
-		i2c0 = &i2c0;
-		i2c1 = &i2c1;
-		i2c2 = &i2c2;
-		i2c3 = &i2c3;
-		i2c4 = &i2c4;
-		i2c5 = &i2c5;
-		i2c6 = &i2c6;
+	sound {
+		compatible = "fsl,imx-audio-sgtl5000";
+		model = "imx-audio-sgtl5000";
+		ssi-controller = <&ssi1>;
+		audio-codec = <&codec>;
+		audio-routing =
+			"MIC_IN", "Mic Jack",
+			"Mic Jack", "Mic Bias",
+			"Headphone Jack", "HP_OUT";
+		mux-int-port = <1>;
+		mux-ext-port = <4>;
 	};
 };
 
-&extbus {
-	ranges = <1 0x00000000 0x42000000 0x02000000>;
+&i2c3 {
+	codec: sgtl5000@0a {
+		compatible = "fsl,sgtl5000";
+		reg = <0x0a>;
+		clocks = <&clks IMX6QDL_CLK_CKO>;
+		VDDA-supply = <&reg_2p5v>;
+		VDDIO-supply = <&reg_3p3v>;
+		VDDD-supply = <&reg_1p8v>;
+	};
 };
 
-&support_card {
-	ranges = <0x00000000 1 0x01f00000 0x00100000>;
-};
-
-&ethsc {
-	interrupts = <0 48 4>;
-};
-
-&serial0 {
-	status = "okay";
-};
-
-&i2c0 {
+&sata {
 	status = "okay";
 };

diff --git a/arch/arm/boot/dts/imx6q-tbs2910.dts b/arch/arm/boot/dts/imx6q-tbs2910.dts
index 5645d52..0da81bc 100644
--- a/arch/arm/boot/dts/imx6q-tbs2910.dts
+++ b/arch/arm/boot/dts/imx6q-tbs2910.dts

@@ -91,34 +91,25 @@
 		};
 	};
 
-	regulators {
-		compatible = "simple-bus";
-		#address-cells = <1>;
-		#size-cells = <0>;
+	reg_2p5v: regulator-2p5v {
+		compatible = "regulator-fixed";
+		regulator-name = "2P5V";
+		regulator-min-microvolt = <2500000>;
+		regulator-max-microvolt = <2500000>;
+	};
 
-		reg_2p5v: regulator@0 {
-			compatible = "regulator-fixed";
-			reg = <0>;
-			regulator-name = "2P5V";
-			regulator-min-microvolt = <2500000>;
-			regulator-max-microvolt = <2500000>;
-		};
+	reg_3p3v: regulator-3p3v {
+		compatible = "regulator-fixed";
+		regulator-name = "3P3V";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+	};
 
-		reg_3p3v: regulator@1 {
-			compatible = "regulator-fixed";
-			reg = <1>;
-			regulator-name = "3P3V";
-			regulator-min-microvolt = <3300000>;
-			regulator-max-microvolt = <3300000>;
-		};
-
-		reg_5p0v: regulator@2 {
-			compatible = "regulator-fixed";
-			reg = <2>;
-			regulator-name = "5P0V";
-			regulator-min-microvolt = <5000000>;
-			regulator-max-microvolt = <5000000>;
-		};
+	reg_5p0v: regulator-5p0v {
+		compatible = "regulator-fixed";
+		regulator-name = "5P0V";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
 	};
 
 	sound-sgtl5000 {
@@ -205,6 +196,10 @@
 };
 
 &sata {
+	fsl,transmit-level-mV = <1104>;
+	fsl,transmit-boost-mdB = <3330>;
+	fsl,transmit-atten-16ths = <16>;
+	fsl,receive-eq-mdB = <3000>;
 	status = "okay";
 };
 
@@ -253,6 +248,9 @@
 	bus-width = <4>;
 	cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>;
 	vmmc-supply = <&reg_3p3v>;
+	vqmmc-supply = <&reg_3p3v>;
+	voltage-ranges = <3300 3300>;
+	no-1-8-v;
 	status = "okay";
 };
 
@@ -263,6 +261,9 @@
 	cd-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>;
 	wp-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>;
 	vmmc-supply = <&reg_3p3v>;
+	vqmmc-supply = <&reg_3p3v>;
+	voltage-ranges = <3300 3300>;
+	no-1-8-v;
 	status = "okay";
 };
 
@@ -270,163 +271,160 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc4>;
 	bus-width = <8>;
+	vmmc-supply = <&reg_3p3v>;
+	vqmmc-supply = <&reg_3p3v>;
+	voltage-ranges = <3300 3300>;
 	non-removable;
 	no-1-8-v;
 	status = "okay";
 };
 
 &iomuxc {
-	imx6q-tbs2910 {
-		pinctrl_enet: enetgrp {
-			fsl,pins = <
-				MX6QDL_PAD_ENET_MDIO__ENET_MDIO       0x1b0b0
-				MX6QDL_PAD_ENET_MDC__ENET_MDC         0x1b0b0
-				MX6QDL_PAD_RGMII_TXC__RGMII_TXC       0x1b0b0
-				MX6QDL_PAD_RGMII_TD0__RGMII_TD0       0x1b0b0
-				MX6QDL_PAD_RGMII_TD1__RGMII_TD1       0x1b0b0
-				MX6QDL_PAD_RGMII_TD2__RGMII_TD2       0x1b0b0
-				MX6QDL_PAD_RGMII_TD3__RGMII_TD3       0x1b0b0
-				MX6QDL_PAD_RGMII_TX_CTL__RGMII_TX_CTL 0x1b0b0
-				MX6QDL_PAD_ENET_REF_CLK__ENET_TX_CLK  0x1b0b0
-				MX6QDL_PAD_RGMII_RXC__RGMII_RXC       0x1b0b0
-				MX6QDL_PAD_RGMII_RD0__RGMII_RD0       0x1b0b0
-				MX6QDL_PAD_RGMII_RD1__RGMII_RD1       0x1b0b0
-				MX6QDL_PAD_RGMII_RD2__RGMII_RD2       0x1b0b0
-				MX6QDL_PAD_RGMII_RD3__RGMII_RD3       0x1b0b0
-				MX6QDL_PAD_RGMII_RX_CTL__RGMII_RX_CTL 0x1b0b0
-				MX6QDL_PAD_GPIO_16__ENET_REF_CLK      0x4001b0a8
-				MX6QDL_PAD_ENET_CRS_DV__GPIO1_IO25    0x1b059
-			>;
-		};
-
-		pinctrl_hdmi: hdmigrp {
-			fsl,pins = <
-				MX6QDL_PAD_KEY_ROW2__HDMI_TX_CEC_LINE 0x1f8b0
-			>;
-		};
-
-		pinctrl_i2c1: i2c1grp {
-			fsl,pins = <
-				MX6QDL_PAD_CSI0_DAT9__I2C1_SCL        0x4001b8b1
-				MX6QDL_PAD_CSI0_DAT8__I2C1_SDA        0x4001b8b1
-			>;
-		};
-
-		pinctrl_i2c2: i2c2grp {
-			fsl,pins = <
-				MX6QDL_PAD_KEY_COL3__I2C2_SCL         0x4001b8b1
-				MX6QDL_PAD_KEY_ROW3__I2C2_SDA         0x4001b8b1
-			>;
-		};
-
-		pinctrl_i2c3: i2c3grp {
-			fsl,pins = <
-				MX6QDL_PAD_GPIO_3__I2C3_SCL           0x4001b8b1
-				MX6QDL_PAD_GPIO_6__I2C3_SDA           0x4001b8b1
-			>;
-		};
-
-		pinctrl_ir: irgrp {
-			fsl,pins = <
-				MX6QDL_PAD_EIM_D18__GPIO3_IO18        0x17059
-			>;
-		};
-
-		pinctrl_pcie: pciegrp {
-			fsl,pins = <
-				MX6QDL_PAD_GPIO_17__GPIO7_IO12        0x17059
-			>;
-		};
-
-		pinctrl_sgtl5000: sgtl5000grp {
-			fsl,pins = <
-				MX6QDL_PAD_CSI0_DAT7__AUD3_RXD        0x130b0
-				MX6QDL_PAD_CSI0_DAT4__AUD3_TXC        0x130b0
-				MX6QDL_PAD_CSI0_DAT5__AUD3_TXD        0x110b0
-				MX6QDL_PAD_CSI0_DAT6__AUD3_TXFS       0x130b0
-				MX6QDL_PAD_GPIO_0__CCM_CLKO1          0x130b0
-			>;
-		};
-
-		pinctrl_spdif: spdifgrp {
-			fsl,pins = <MX6QDL_PAD_GPIO_19__SPDIF_OUT     0x13091
-			>;
-		};
-
-		pinctrl_uart1: uart1grp {
-			fsl,pins = <
-				MX6QDL_PAD_CSI0_DAT10__UART1_TX_DATA  0x1b0b1
-				MX6QDL_PAD_CSI0_DAT11__UART1_RX_DATA  0x1b0b1
-			>;
-		};
-
-		pinctrl_uart2: uart2grp {
-			fsl,pins = <
-				MX6QDL_PAD_EIM_D26__UART2_TX_DATA     0x1b0b1
-				MX6QDL_PAD_EIM_D27__UART2_RX_DATA     0x1b0b1
-			>;
-		};
-
-		pinctrl_usbotg: usbotggrp {
-			fsl,pins = <
-				MX6QDL_PAD_ENET_RX_ER__USB_OTG_ID     0x17059
-			>;
-		};
-
-		pinctrl_usdhc2: usdhc2grp {
-			fsl,pins = <
-				MX6QDL_PAD_SD2_CMD__SD2_CMD           0x17059
-				MX6QDL_PAD_SD2_CLK__SD2_CLK           0x10059
-				MX6QDL_PAD_SD2_DAT0__SD2_DATA0        0x17059
-				MX6QDL_PAD_SD2_DAT1__SD2_DATA1        0x17059
-				MX6QDL_PAD_SD2_DAT2__SD2_DATA2        0x17059
-				MX6QDL_PAD_SD2_DAT3__SD2_DATA3        0x17059
-				MX6QDL_PAD_NANDF_D2__GPIO2_IO02       0x17059
-			>;
-		};
-
-		pinctrl_usdhc3: usdhc3grp {
-			fsl,pins = <
-				MX6QDL_PAD_SD3_CMD__SD3_CMD           0x17059
-				MX6QDL_PAD_SD3_CLK__SD3_CLK           0x10059
-				MX6QDL_PAD_SD3_DAT0__SD3_DATA0        0x17059
-				MX6QDL_PAD_SD3_DAT1__SD3_DATA1        0x17059
-				MX6QDL_PAD_SD3_DAT2__SD3_DATA2        0x17059
-				MX6QDL_PAD_SD3_DAT3__SD3_DATA3        0x17059
-				MX6QDL_PAD_NANDF_D0__GPIO2_IO00       0x17059
-				MX6QDL_PAD_NANDF_D1__GPIO2_IO01       0x17059
-			>;
-		};
-
-		pinctrl_usdhc4: usdhc4grp {
-			fsl,pins = <
-				MX6QDL_PAD_SD4_CMD__SD4_CMD           0x17059
-				MX6QDL_PAD_SD4_CLK__SD4_CLK           0x10059
-				MX6QDL_PAD_SD4_DAT0__SD4_DATA0        0x17059
-				MX6QDL_PAD_SD4_DAT1__SD4_DATA1        0x17059
-				MX6QDL_PAD_SD4_DAT2__SD4_DATA2        0x17059
-				MX6QDL_PAD_SD4_DAT3__SD4_DATA3        0x17059
-				MX6QDL_PAD_SD4_DAT4__SD4_DATA4        0x17059
-				MX6QDL_PAD_SD4_DAT5__SD4_DATA5        0x17059
-				MX6QDL_PAD_SD4_DAT6__SD4_DATA6        0x17059
-				MX6QDL_PAD_SD4_DAT7__SD4_DATA7        0x17059
-			>;
-		};
+	pinctrl_enet: enetgrp {
+		fsl,pins = <
+			MX6QDL_PAD_ENET_MDIO__ENET_MDIO       0x1b0b0
+			MX6QDL_PAD_ENET_MDC__ENET_MDC         0x1b0b0
+			MX6QDL_PAD_RGMII_TXC__RGMII_TXC       0x1b0b0
+			MX6QDL_PAD_RGMII_TD0__RGMII_TD0       0x1b0b0
+			MX6QDL_PAD_RGMII_TD1__RGMII_TD1       0x1b0b0
+			MX6QDL_PAD_RGMII_TD2__RGMII_TD2       0x1b0b0
+			MX6QDL_PAD_RGMII_TD3__RGMII_TD3       0x1b0b0
+			MX6QDL_PAD_RGMII_TX_CTL__RGMII_TX_CTL 0x1b0b0
+			MX6QDL_PAD_ENET_REF_CLK__ENET_TX_CLK  0x1b0b0
+			MX6QDL_PAD_RGMII_RXC__RGMII_RXC       0x1b0b0
+			MX6QDL_PAD_RGMII_RD0__RGMII_RD0       0x1b0b0
+			MX6QDL_PAD_RGMII_RD1__RGMII_RD1       0x1b0b0
+			MX6QDL_PAD_RGMII_RD2__RGMII_RD2       0x1b0b0
+			MX6QDL_PAD_RGMII_RD3__RGMII_RD3       0x1b0b0
+			MX6QDL_PAD_RGMII_RX_CTL__RGMII_RX_CTL 0x1b0b0
+			MX6QDL_PAD_GPIO_16__ENET_REF_CLK      0x4001b0a8
+			MX6QDL_PAD_ENET_CRS_DV__GPIO1_IO25    0x1b059
+		>;
 	};
 
-	gpio_fan {
-		pinctrl_gpio_fan: gpiofangrp {
-			fsl,pins = <
-				MX6QDL_PAD_EIM_D28__GPIO3_IO28        0x130b1
-			>;
-		};
+	pinctrl_gpio_fan: gpiofangrp {
+		fsl,pins = <
+			MX6QDL_PAD_EIM_D28__GPIO3_IO28        0x130b1
+		>;
 	};
 
-	gpio_leds {
-		pinctrl_gpio_leds: gpioledsgrp {
-			fsl,pins = <
-				MX6QDL_PAD_GPIO_2__GPIO1_IO02         0x130b1
-			>;
-		};
+	pinctrl_gpio_leds: gpioledsgrp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_2__GPIO1_IO02         0x130b1
+		>;
+	};
+
+	pinctrl_hdmi: hdmigrp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_ROW2__HDMI_TX_CEC_LINE 0x1f8b0
+		>;
+	};
+
+	pinctrl_i2c1: i2c1grp {
+		fsl,pins = <
+			MX6QDL_PAD_CSI0_DAT9__I2C1_SCL        0x4001b8b1
+			MX6QDL_PAD_CSI0_DAT8__I2C1_SDA        0x4001b8b1
+		>;
+	};
+
+	pinctrl_i2c2: i2c2grp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_COL3__I2C2_SCL         0x4001b8b1
+			MX6QDL_PAD_KEY_ROW3__I2C2_SDA         0x4001b8b1
+		>;
+	};
+
+	pinctrl_i2c3: i2c3grp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_3__I2C3_SCL           0x4001b8b1
+			MX6QDL_PAD_GPIO_6__I2C3_SDA           0x4001b8b1
+		>;
+	};
+
+	pinctrl_ir: irgrp {
+		fsl,pins = <
+			MX6QDL_PAD_EIM_D18__GPIO3_IO18        0x17059
+		>;
+	};
+
+	pinctrl_pcie: pciegrp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_17__GPIO7_IO12        0x17059
+		>;
+	};
+
+	pinctrl_sgtl5000: sgtl5000grp {
+		fsl,pins = <
+			MX6QDL_PAD_CSI0_DAT7__AUD3_RXD        0x130b0
+			MX6QDL_PAD_CSI0_DAT4__AUD3_TXC        0x130b0
+			MX6QDL_PAD_CSI0_DAT5__AUD3_TXD        0x110b0
+			MX6QDL_PAD_CSI0_DAT6__AUD3_TXFS       0x130b0
+			MX6QDL_PAD_GPIO_0__CCM_CLKO1          0x130b0
+		>;
+	};
+
+	pinctrl_spdif: spdifgrp {
+		fsl,pins = <MX6QDL_PAD_GPIO_19__SPDIF_OUT     0x13091
+		>;
+	};
+
+	pinctrl_uart1: uart1grp {
+		fsl,pins = <
+			MX6QDL_PAD_CSI0_DAT10__UART1_TX_DATA  0x1b0b1
+			MX6QDL_PAD_CSI0_DAT11__UART1_RX_DATA  0x1b0b1
+		>;
+	};
+
+	pinctrl_uart2: uart2grp {
+		fsl,pins = <
+			MX6QDL_PAD_EIM_D26__UART2_TX_DATA     0x1b0b1
+			MX6QDL_PAD_EIM_D27__UART2_RX_DATA     0x1b0b1
+		>;
+	};
+
+	pinctrl_usbotg: usbotggrp {
+		fsl,pins = <
+			MX6QDL_PAD_ENET_RX_ER__USB_OTG_ID     0x17059
+		>;
+	};
+
+	pinctrl_usdhc2: usdhc2grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD2_CMD__SD2_CMD           0x17059
+			MX6QDL_PAD_SD2_CLK__SD2_CLK           0x10059
+			MX6QDL_PAD_SD2_DAT0__SD2_DATA0        0x17059
+			MX6QDL_PAD_SD2_DAT1__SD2_DATA1        0x17059
+			MX6QDL_PAD_SD2_DAT2__SD2_DATA2        0x17059
+			MX6QDL_PAD_SD2_DAT3__SD2_DATA3        0x17059
+			MX6QDL_PAD_NANDF_D2__GPIO2_IO02       0x17059
+		>;
+	};
+
+	pinctrl_usdhc3: usdhc3grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD3_CMD__SD3_CMD           0x17059
+			MX6QDL_PAD_SD3_CLK__SD3_CLK           0x10059
+			MX6QDL_PAD_SD3_DAT0__SD3_DATA0        0x17059
+			MX6QDL_PAD_SD3_DAT1__SD3_DATA1        0x17059
+			MX6QDL_PAD_SD3_DAT2__SD3_DATA2        0x17059
+			MX6QDL_PAD_SD3_DAT3__SD3_DATA3        0x17059
+			MX6QDL_PAD_NANDF_D0__GPIO2_IO00       0x17059
+			MX6QDL_PAD_NANDF_D1__GPIO2_IO01       0x17059
+		>;
+	};
+
+	pinctrl_usdhc4: usdhc4grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD4_CMD__SD4_CMD           0x17059
+			MX6QDL_PAD_SD4_CLK__SD4_CLK           0x10059
+			MX6QDL_PAD_SD4_DAT0__SD4_DATA0        0x17059
+			MX6QDL_PAD_SD4_DAT1__SD4_DATA1        0x17059
+			MX6QDL_PAD_SD4_DAT2__SD4_DATA2        0x17059
+			MX6QDL_PAD_SD4_DAT3__SD4_DATA3        0x17059
+			MX6QDL_PAD_SD4_DAT4__SD4_DATA4        0x17059
+			MX6QDL_PAD_SD4_DAT5__SD4_DATA5        0x17059
+			MX6QDL_PAD_SD4_DAT6__SD4_DATA6        0x17059
+			MX6QDL_PAD_SD4_DAT7__SD4_DATA7        0x17059
+		>;
 	};
 };

diff --git a/arch/arm/boot/dts/imx6q-tx6q-1110.dts b/arch/arm/boot/dts/imx6q-tx6q-1110.dts
index 88aa1e4..2792da9 100644
--- a/arch/arm/boot/dts/imx6q-tx6q-1110.dts
+++ b/arch/arm/boot/dts/imx6q-tx6q-1110.dts

@@ -77,7 +77,7 @@
 		interrupt-parent = <&gpio3>;
 		interrupts = <22 0>;
 		wakeup-gpios = <&gpio3 22 GPIO_ACTIVE_HIGH>;
-		linux,wakeup;
+		wakeup-source;
 	};
 };
 

diff --git a/arch/arm/boot/dts/imx6q-wandboard-revb1.dts b/arch/arm/boot/dts/imx6q-wandboard-revb1.dts
index 20bf3c2..9207d80 100644
--- a/arch/arm/boot/dts/imx6q-wandboard-revb1.dts
+++ b/arch/arm/boot/dts/imx6q-wandboard-revb1.dts

@@ -13,7 +13,7 @@
 #include "imx6qdl-wandboard-revb1.dtsi"
 
 / {
-	model = "Wandboard i.MX6 Quad Board";
+	model = "Wandboard i.MX6 Quad Board rev B1";
 	compatible = "wand,imx6q-wandboard", "fsl,imx6q";
 
 	memory {

diff --git a/arch/arm/boot/dts/imx6q.dtsi b/arch/arm/boot/dts/imx6q.dtsi
index 0d93c0e..cd10c8d 100644
--- a/arch/arm/boot/dts/imx6q.dtsi
+++ b/arch/arm/boot/dts/imx6q.dtsi

@@ -22,7 +22,7 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		cpu@0 {
+		cpu0: cpu@0 {
 			compatible = "arm,cortex-a9";
 			device_type = "cpu";
 			reg = <0>;
@@ -162,6 +162,7 @@
 				};
 
 				ipu2_di0_mipi: endpoint@2 {
+					remote-endpoint = <&mipi_mux_2>;
 				};
 
 				ipu2_di0_lvds0: endpoint@3 {
@@ -183,6 +184,7 @@
 				};
 
 				ipu2_di1_mipi: endpoint@2 {
+					remote-endpoint = <&mipi_mux_3>;
 				};
 
 				ipu2_di1_lvds0: endpoint@3 {

diff --git a/arch/arm/boot/dts/imx6qdl-apalis.dtsi b/arch/arm/boot/dts/imx6qdl-apalis.dtsi
new file mode 100644
index 0000000..b33e5a9
--- /dev/null
+++ b/arch/arm/boot/dts/imx6qdl-apalis.dtsi

@@ -0,0 +1,984 @@
+/*
+ * Copyright 2014-2016 Toradex AG
+ * Copyright 2012 Freescale Semiconductor, Inc.
+ * Copyright 2011 Linaro Ltd.
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     version 2 as published by the Free Software Foundation.
+ *
+ *     This file is distributed in the hope that it will be useful
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <dt-bindings/gpio/gpio.h>
+
+/ {
+	model = "Toradex Apalis iMX6Q/D Module";
+	compatible = "toradex,apalis_imx6q", "fsl,imx6q";
+
+	backlight: backlight {
+		compatible = "pwm-backlight";
+		pwms = <&pwm4 0 5000000>;
+		status = "disabled";
+	};
+
+	/* DDC_I2C: I2C2_SDA/SCL on MXM3 205/207 */
+	i2cddc: i2c@0 {
+		compatible = "i2c-gpio";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2c_ddc>;
+		gpios = <&gpio3 16 GPIO_ACTIVE_HIGH /* sda */
+			 &gpio2 30 GPIO_ACTIVE_HIGH /* scl */
+			>;
+		i2c-gpio,delay-us = <2>;	/* ~100 kHz */
+		status = "disabled";
+	};
+
+	reg_1p8v: regulator-1p8v {
+		compatible = "regulator-fixed";
+		regulator-name = "1P8V";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+		regulator-always-on;
+	};
+
+	reg_2p5v: regulator-2p5v {
+		compatible = "regulator-fixed";
+		regulator-name = "2P5V";
+		regulator-min-microvolt = <2500000>;
+		regulator-max-microvolt = <2500000>;
+		regulator-always-on;
+	};
+
+	reg_3p3v: regulator-3p3v {
+		compatible = "regulator-fixed";
+		regulator-name = "3P3V";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-always-on;
+	};
+
+	reg_usb_otg_vbus: regulator-usb-otg-vbus {
+		compatible = "regulator-fixed";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_regulator_usbotg_pwr>;
+		regulator-name = "usb_otg_vbus";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		gpio = <&gpio3 22 GPIO_ACTIVE_HIGH>;
+		enable-active-high;
+		status = "disabled";
+	};
+
+	/* on module USB hub */
+	reg_usb_host_vbus_hub: regulator-usb-host-vbus-hub {
+		compatible = "regulator-fixed";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_regulator_usbhub_pwr>;
+		regulator-name = "usb_host_vbus_hub";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		gpio = <&gpio3 28 GPIO_ACTIVE_HIGH>;
+		startup-delay-us = <2000>;
+		enable-active-high;
+		status = "okay";
+	};
+
+	reg_usb_host_vbus: regulator-usb-host-vbus {
+		compatible = "regulator-fixed";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_regulator_usbh_pwr>;
+		regulator-name = "usb_host_vbus";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		gpio =  <&gpio1 0 GPIO_ACTIVE_HIGH>;
+		enable-active-high;
+		vin-supply = <&reg_usb_host_vbus_hub>;
+		status = "disabled";
+	};
+
+	sound {
+		compatible = "fsl,imx-audio-sgtl5000";
+		model = "imx6q-apalis-sgtl5000";
+		ssi-controller = <&ssi1>;
+		audio-codec = <&codec>;
+		audio-routing =
+			"LINE_IN", "Line In Jack",
+			"MIC_IN", "Mic Jack",
+			"Mic Jack", "Mic Bias",
+			"Headphone Jack", "HP_OUT";
+		mux-int-port = <1>;
+		mux-ext-port = <4>;
+	};
+
+	sound_spdif: sound-spdif {
+		compatible = "fsl,imx-audio-spdif";
+		model = "imx-spdif";
+		spdif-controller = <&spdif>;
+		spdif-in;
+		spdif-out;
+		status = "disabled";
+	};
+};
+
+&audmux {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_audmux>;
+	status = "okay";
+};
+
+&can1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_flexcan1>;
+	status = "disabled";
+};
+
+&can2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_flexcan2>;
+	status = "disabled";
+};
+
+/* Apalis SPI1 */
+&ecspi1 {
+	fsl,spi-num-chipselects = <1>;
+	cs-gpios = <&gpio5 25 GPIO_ACTIVE_HIGH>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_ecspi1>;
+	status = "disabled";
+};
+
+/* Apalis SPI2 */
+&ecspi2 {
+	fsl,spi-num-chipselects = <1>;
+	cs-gpios = <&gpio2 26 GPIO_ACTIVE_HIGH>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_ecspi2>;
+	status = "disabled";
+};
+
+&fec {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_enet>;
+	phy-mode = "rgmii";
+	phy-handle = <&ethphy>;
+	phy-reset-duration = <10>;
+	phy-reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>;
+	status = "okay";
+
+	mdio {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		ethphy: ethernet-phy@7 {
+			interrupt-parent = <&gpio1>;
+			interrupts = <30 IRQ_TYPE_LEVEL_LOW>;
+			reg = <7>;
+		};
+	};
+};
+
+/*
+ * GEN1_I2C: I2C1_SDA/SCL on MXM3 209/211 (e.g. RTC on carrier
+ * board)
+ */
+&i2c1 {
+	clock-frequency = <100000>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c1>;
+	status = "disabled";
+};
+
+/*
+ * PWR_I2C: power I2C to audio codec, PMIC, temperature sensor and
+ * touch screen controller
+ */
+&i2c2 {
+	clock-frequency = <100000>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c2>;
+	status = "okay";
+
+	pmic: pfuze100@08 {
+		compatible = "fsl,pfuze100";
+		reg = <0x08>;
+
+		regulators {
+			sw1a_reg: sw1ab {
+				regulator-min-microvolt = <300000>;
+				regulator-max-microvolt = <1875000>;
+				regulator-boot-on;
+				regulator-always-on;
+				regulator-ramp-delay = <6250>;
+			};
+
+			sw1c_reg: sw1c {
+				regulator-min-microvolt = <300000>;
+				regulator-max-microvolt = <1875000>;
+				regulator-boot-on;
+				regulator-always-on;
+				regulator-ramp-delay = <6250>;
+			};
+
+			sw3a_reg: sw3a {
+				regulator-min-microvolt = <400000>;
+				regulator-max-microvolt = <1975000>;
+				regulator-boot-on;
+				regulator-always-on;
+			};
+
+			swbst_reg: swbst {
+				regulator-min-microvolt = <5000000>;
+				regulator-max-microvolt = <5150000>;
+				regulator-boot-on;
+				regulator-always-on;
+			};
+
+			snvs_reg: vsnvs {
+				regulator-min-microvolt = <1000000>;
+				regulator-max-microvolt = <3000000>;
+				regulator-boot-on;
+				regulator-always-on;
+			};
+
+			vref_reg: vrefddr {
+				regulator-boot-on;
+				regulator-always-on;
+			};
+
+			vgen1_reg: vgen1 {
+				regulator-min-microvolt = <800000>;
+				regulator-max-microvolt = <1550000>;
+				regulator-boot-on;
+				regulator-always-on;
+			};
+
+			vgen2_reg: vgen2 {
+				regulator-min-microvolt = <800000>;
+				regulator-max-microvolt = <1550000>;
+				regulator-boot-on;
+				regulator-always-on;
+			};
+
+			vgen3_reg: vgen3 {
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-boot-on;
+				regulator-always-on;
+			};
+
+			vgen4_reg: vgen4 {
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-boot-on;
+				regulator-always-on;
+			};
+
+			vgen5_reg: vgen5 {
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-boot-on;
+				regulator-always-on;
+			};
+
+			vgen6_reg: vgen6 {
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <3300000>;
+				regulator-boot-on;
+				regulator-always-on;
+			};
+		};
+	};
+
+	codec: sgtl5000@0a {
+		compatible = "fsl,sgtl5000";
+		reg = <0x0a>;
+		clocks = <&clks 201>;
+		VDDA-supply = <&reg_2p5v>;
+		VDDIO-supply = <&reg_3p3v>;
+	};
+
+	/* STMPE811 touch screen controller */
+	stmpe811@41 {
+		compatible = "st,stmpe811";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_touch_int>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x41>;
+		interrupts = <10 IRQ_TYPE_LEVEL_LOW>;
+		interrupt-parent = <&gpio4>;
+		interrupt-controller;
+		id = <0>;
+		blocks = <0x5>;
+		irq-trigger = <0x1>;
+
+		stmpe_touchscreen {
+			compatible = "st,stmpe-ts";
+			reg = <0>;
+			/* 3.25 MHz ADC clock speed */
+			st,adc-freq = <1>;
+			/* 8 sample average control */
+			st,ave-ctrl = <3>;
+			/* 7 length fractional part in z */
+			st,fraction-z = <7>;
+			/*
+			 * 50 mA typical 80 mA max touchscreen drivers
+			 * current limit value
+			 */
+			st,i-drive = <1>;
+			/* 12-bit ADC */
+			st,mod-12b = <1>;
+			/* internal ADC reference */
+			st,ref-sel = <0>;
+			/* ADC converstion time: 80 clocks */
+			st,sample-time = <4>;
+			/* 1 ms panel driver settling time */
+			st,settling = <3>;
+			/* 5 ms touch detect interrupt delay */
+			st,touch-det-delay = <5>;
+		};
+	};
+};
+
+/*
+ * GEN2_I2C, CAM: I2C3_SDA/SCL on MXM3 201/203 (unused)
+ */
+&i2c3 {
+	clock-frequency = <100000>;
+	pinctrl-names = "default", "recovery";
+	pinctrl-0 = <&pinctrl_i2c3>;
+	pinctrl-1 = <&pinctrl_i2c3_recovery>;
+	scl-gpios = <&gpio3 17 GPIO_ACTIVE_HIGH>;
+	sda-gpios = <&gpio3 18 GPIO_ACTIVE_HIGH>;
+	status = "disabled";
+};
+
+&pwm1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_pwm1>;
+	status = "disabled";
+};
+
+&pwm2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_pwm2>;
+	status = "disabled";
+};
+
+&pwm3 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_pwm3>;
+	status = "disabled";
+};
+
+&pwm4 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_pwm4>;
+	status = "disabled";
+};
+
+&spdif {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_spdif>;
+	status = "disabled";
+};
+
+&ssi1 {
+	status = "okay";
+};
+
+&uart1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart1_dte &pinctrl_uart1_ctrl>;
+	fsl,dte-mode;
+	fsl,uart-has-rtscts;
+	status = "disabled";
+};
+
+&uart2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart2_dte>;
+	fsl,dte-mode;
+	fsl,uart-has-rtscts;
+	status = "disabled";
+};
+
+&uart4 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart4_dte>;
+	fsl,dte-mode;
+	status = "disabled";
+};
+
+&uart5 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart5_dte>;
+	fsl,dte-mode;
+	status = "disabled";
+};
+
+&usbotg {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usbotg>;
+	disable-over-current;
+	status = "disabled";
+};
+
+/* MMC1 */
+&usdhc1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usdhc1>;
+	vqmmc-supply = <&reg_3p3v>;
+	bus-width = <8>;
+	voltage-ranges = <3300 3300>;
+	status = "disabled";
+};
+
+/* SD1 */
+&usdhc2 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usdhc2>;
+	vqmmc-supply = <&reg_3p3v>;
+	bus-width = <4>;
+	voltage-ranges = <3300 3300>;
+	status = "disabled";
+};
+
+/* eMMC */
+&usdhc3 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usdhc3>;
+	vqmmc-supply = <&reg_3p3v>;
+	bus-width = <8>;
+	voltage-ranges = <3300 3300>;
+	non-removable;
+	status = "okay";
+};
+
+&weim {
+	status = "disabled";
+};
+
+&iomuxc {
+	/* pins used on module */
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_reset_moci>;
+
+	pinctrl_apalis_gpio1: gpio2io04grp {
+		fsl,pins = <
+			MX6QDL_PAD_NANDF_D4__GPIO2_IO04 0x130b0
+		>;
+	};
+
+	pinctrl_apalis_gpio2: gpio2io05grp {
+		fsl,pins = <
+			MX6QDL_PAD_NANDF_D5__GPIO2_IO05 0x130b0
+		>;
+	};
+
+	pinctrl_apalis_gpio3: gpio2io06grp {
+		fsl,pins = <
+			MX6QDL_PAD_NANDF_D6__GPIO2_IO06 0x130b0
+		>;
+	};
+
+	pinctrl_apalis_gpio4: gpio2io07grp {
+		fsl,pins = <
+			MX6QDL_PAD_NANDF_D7__GPIO2_IO07 0x130b0
+		>;
+	};
+
+	pinctrl_apalis_gpio5: gpio6io10grp {
+		fsl,pins = <
+			MX6QDL_PAD_NANDF_RB0__GPIO6_IO10 0x130b0
+		>;
+	};
+
+	pinctrl_apalis_gpio6: gpio6io09grp {
+		fsl,pins = <
+			MX6QDL_PAD_NANDF_WP_B__GPIO6_IO09 0x130b0
+		>;
+	};
+
+	pinctrl_apalis_gpio7: gpio1io02grp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_2__GPIO1_IO02 0x130b0
+		>;
+	};
+
+	pinctrl_apalis_gpio8: gpio1io06grp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_6__GPIO1_IO06 0x130b0
+		>;
+	};
+
+	pinctrl_audmux: audmuxgrp {
+		fsl,pins = <
+			MX6QDL_PAD_DISP0_DAT20__AUD4_TXC	0x130b0
+			MX6QDL_PAD_DISP0_DAT21__AUD4_TXD	0x130b0
+			MX6QDL_PAD_DISP0_DAT22__AUD4_TXFS	0x130b0
+			MX6QDL_PAD_DISP0_DAT23__AUD4_RXD	0x130b0
+			/* SGTL5000 sys_mclk */
+			MX6QDL_PAD_GPIO_5__CCM_CLKO1		0x130b0
+		>;
+	};
+
+	pinctrl_cam_mclk: cammclkgrp {
+		fsl,pins = <
+			/* CAM sys_mclk */
+			MX6QDL_PAD_NANDF_CS2__CCM_CLKO2 0x00b0
+		>;
+	};
+
+	pinctrl_ecspi1: ecspi1grp {
+		fsl,pins = <
+			MX6QDL_PAD_CSI0_DAT6__ECSPI1_MISO 0x100b1
+			MX6QDL_PAD_CSI0_DAT5__ECSPI1_MOSI 0x100b1
+			MX6QDL_PAD_CSI0_DAT4__ECSPI1_SCLK 0x100b1
+			/* SPI1 cs */
+			MX6QDL_PAD_CSI0_DAT7__GPIO5_IO25 0x000b1
+		>;
+	};
+
+	pinctrl_ecspi2: ecspi2grp {
+		fsl,pins = <
+			MX6QDL_PAD_EIM_OE__ECSPI2_MISO 0x100b1
+			MX6QDL_PAD_EIM_CS1__ECSPI2_MOSI 0x100b1
+			MX6QDL_PAD_EIM_CS0__ECSPI2_SCLK 0x100b1
+			/* SPI2 cs */
+			MX6QDL_PAD_EIM_RW__GPIO2_IO26 0x000b1
+		>;
+	};
+
+	pinctrl_enet: enetgrp {
+		fsl,pins = <
+			MX6QDL_PAD_ENET_MDIO__ENET_MDIO		0x100b0
+			MX6QDL_PAD_ENET_MDC__ENET_MDC		0x100b0
+			MX6QDL_PAD_RGMII_TXC__RGMII_TXC		0x100b0
+			MX6QDL_PAD_RGMII_TD0__RGMII_TD0		0x100b0
+			MX6QDL_PAD_RGMII_TD1__RGMII_TD1		0x100b0
+			MX6QDL_PAD_RGMII_TD2__RGMII_TD2		0x100b0
+			MX6QDL_PAD_RGMII_TD3__RGMII_TD3		0x100b0
+			MX6QDL_PAD_RGMII_TX_CTL__RGMII_TX_CTL	0x100b0
+			MX6QDL_PAD_ENET_REF_CLK__ENET_TX_CLK	0x100b0
+			MX6QDL_PAD_RGMII_RXC__RGMII_RXC		0x1b0b0
+			MX6QDL_PAD_RGMII_RD0__RGMII_RD0		0x1b0b0
+			MX6QDL_PAD_RGMII_RD1__RGMII_RD1		0x1b0b0
+			MX6QDL_PAD_RGMII_RD2__RGMII_RD2		0x1b0b0
+			MX6QDL_PAD_RGMII_RD3__RGMII_RD3		0x1b0b0
+			MX6QDL_PAD_RGMII_RX_CTL__RGMII_RX_CTL	0x1b0b0
+			/* Ethernet PHY reset */
+			MX6QDL_PAD_ENET_CRS_DV__GPIO1_IO25	0x000b0
+			/* Ethernet PHY interrupt */
+			MX6QDL_PAD_ENET_TXD0__GPIO1_IO30	0x000b1
+		>;
+	};
+
+	pinctrl_flexcan1: flexcan1grp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_7__FLEXCAN1_TX 0x1b0b0
+			MX6QDL_PAD_GPIO_8__FLEXCAN1_RX 0x1b0b0
+		>;
+	};
+
+	pinctrl_flexcan2: flexcan2grp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_COL4__FLEXCAN2_TX 0x1b0b0
+			MX6QDL_PAD_KEY_ROW4__FLEXCAN2_RX 0x1b0b0
+		>;
+	};
+
+	pinctrl_gpio_keys: gpio1io04grp {
+		fsl,pins = <
+			/* Power button */
+			MX6QDL_PAD_GPIO_4__GPIO1_IO04 0x1b0b0
+		>;
+	};
+
+	pinctrl_hdmi_cec: hdmicecgrp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_ROW2__HDMI_TX_CEC_LINE 0x1f8b0
+		>;
+	};
+
+	pinctrl_i2c_ddc: gpioi2cddcgrp {
+		fsl,pins = <
+			/* DDC bitbang */
+			MX6QDL_PAD_EIM_EB2__GPIO2_IO30 0x1b0b0
+			MX6QDL_PAD_EIM_D16__GPIO3_IO16 0x1b0b0
+		>;
+	};
+
+	pinctrl_i2c1: i2c1grp {
+		fsl,pins = <
+			MX6QDL_PAD_CSI0_DAT8__I2C1_SDA 0x4001b8b1
+			MX6QDL_PAD_CSI0_DAT9__I2C1_SCL 0x4001b8b1
+		>;
+	};
+
+	pinctrl_i2c2: i2c2grp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_COL3__I2C2_SCL 0x4001b8b1
+			MX6QDL_PAD_KEY_ROW3__I2C2_SDA 0x4001b8b1
+		>;
+	};
+
+	pinctrl_i2c3: i2c3grp {
+		fsl,pins = <
+			MX6QDL_PAD_EIM_D17__I2C3_SCL 0x4001b8b1
+			MX6QDL_PAD_EIM_D18__I2C3_SDA 0x4001b8b1
+		>;
+	};
+
+	pinctrl_i2c3_recovery: i2c3recoverygrp {
+		fsl,pins = <
+			MX6QDL_PAD_EIM_D17__GPIO3_IO17 0x4001b8b1
+			MX6QDL_PAD_EIM_D18__GPIO3_IO18 0x4001b8b1
+		>;
+	};
+
+	pinctrl_ipu1_csi0: ipu1csi0grp { /* parallel camera */
+		fsl,pins = <
+			MX6QDL_PAD_CSI0_DAT12__IPU1_CSI0_DATA12  0xb0b1
+			MX6QDL_PAD_CSI0_DAT13__IPU1_CSI0_DATA13  0xb0b1
+			MX6QDL_PAD_CSI0_DAT14__IPU1_CSI0_DATA14  0xb0b1
+			MX6QDL_PAD_CSI0_DAT15__IPU1_CSI0_DATA15  0xb0b1
+			MX6QDL_PAD_CSI0_DAT16__IPU1_CSI0_DATA16  0xb0b1
+			MX6QDL_PAD_CSI0_DAT17__IPU1_CSI0_DATA17  0xb0b1
+			MX6QDL_PAD_CSI0_DAT18__IPU1_CSI0_DATA18  0xb0b1
+			MX6QDL_PAD_CSI0_DAT19__IPU1_CSI0_DATA19  0xb0b1
+			MX6QDL_PAD_CSI0_PIXCLK__IPU1_CSI0_PIXCLK 0xb0b1
+			MX6QDL_PAD_CSI0_MCLK__IPU1_CSI0_HSYNC    0xb0b1
+			MX6QDL_PAD_CSI0_VSYNC__IPU1_CSI0_VSYNC   0xb0b1
+		>;
+	};
+
+	pinctrl_ipu1_lcdif: ipu1lcdifgrp {
+		fsl,pins = <
+			MX6QDL_PAD_EIM_A16__IPU1_DI1_DISP_CLK	0x61
+			/* DE */
+			MX6QDL_PAD_EIM_DA10__IPU1_DI1_PIN15	0x61
+			/* HSync */
+			MX6QDL_PAD_EIM_DA11__IPU1_DI1_PIN02	0x61
+			/* VSync */
+			MX6QDL_PAD_EIM_DA12__IPU1_DI1_PIN03	0x61
+			MX6QDL_PAD_EIM_DA9__IPU1_DISP1_DATA00	0x61
+			MX6QDL_PAD_EIM_DA8__IPU1_DISP1_DATA01	0x61
+			MX6QDL_PAD_EIM_DA7__IPU1_DISP1_DATA02	0x61
+			MX6QDL_PAD_EIM_DA6__IPU1_DISP1_DATA03	0x61
+			MX6QDL_PAD_EIM_DA5__IPU1_DISP1_DATA04	0x61
+			MX6QDL_PAD_EIM_DA4__IPU1_DISP1_DATA05	0x61
+			MX6QDL_PAD_EIM_DA3__IPU1_DISP1_DATA06	0x61
+			MX6QDL_PAD_EIM_DA2__IPU1_DISP1_DATA07	0x61
+			MX6QDL_PAD_EIM_DA1__IPU1_DISP1_DATA08	0x61
+			MX6QDL_PAD_EIM_DA0__IPU1_DISP1_DATA09	0x61
+			MX6QDL_PAD_EIM_EB1__IPU1_DISP1_DATA10	0x61
+			MX6QDL_PAD_EIM_EB0__IPU1_DISP1_DATA11	0x61
+			MX6QDL_PAD_EIM_A17__IPU1_DISP1_DATA12	0x61
+			MX6QDL_PAD_EIM_A18__IPU1_DISP1_DATA13	0x61
+			MX6QDL_PAD_EIM_A19__IPU1_DISP1_DATA14	0x61
+			MX6QDL_PAD_EIM_A20__IPU1_DISP1_DATA15	0x61
+			MX6QDL_PAD_EIM_A21__IPU1_DISP1_DATA16	0x61
+			MX6QDL_PAD_EIM_A22__IPU1_DISP1_DATA17	0x61
+			MX6QDL_PAD_EIM_A23__IPU1_DISP1_DATA18	0x61
+			MX6QDL_PAD_EIM_A24__IPU1_DISP1_DATA19	0x61
+			MX6QDL_PAD_EIM_D31__IPU1_DISP1_DATA20	0x61
+			MX6QDL_PAD_EIM_D30__IPU1_DISP1_DATA21	0x61
+			MX6QDL_PAD_EIM_D26__IPU1_DISP1_DATA22	0x61
+			MX6QDL_PAD_EIM_D27__IPU1_DISP1_DATA23	0x61
+		>;
+	};
+
+	pinctrl_ipu2_vdac: ipu2vdacgrp {
+		fsl,pins = <
+			MX6QDL_PAD_DI0_DISP_CLK__IPU2_DI0_DISP_CLK 0xd1
+			MX6QDL_PAD_DI0_PIN15__IPU2_DI0_PIN15       0xd1
+			MX6QDL_PAD_DI0_PIN2__IPU2_DI0_PIN02        0xd1
+			MX6QDL_PAD_DI0_PIN3__IPU2_DI0_PIN03        0xd1
+			MX6QDL_PAD_DISP0_DAT0__IPU2_DISP0_DATA00   0xf9
+			MX6QDL_PAD_DISP0_DAT1__IPU2_DISP0_DATA01   0xf9
+			MX6QDL_PAD_DISP0_DAT2__IPU2_DISP0_DATA02   0xf9
+			MX6QDL_PAD_DISP0_DAT3__IPU2_DISP0_DATA03   0xf9
+			MX6QDL_PAD_DISP0_DAT4__IPU2_DISP0_DATA04   0xf9
+			MX6QDL_PAD_DISP0_DAT5__IPU2_DISP0_DATA05   0xf9
+			MX6QDL_PAD_DISP0_DAT6__IPU2_DISP0_DATA06   0xf9
+			MX6QDL_PAD_DISP0_DAT7__IPU2_DISP0_DATA07   0xf9
+			MX6QDL_PAD_DISP0_DAT8__IPU2_DISP0_DATA08   0xf9
+			MX6QDL_PAD_DISP0_DAT9__IPU2_DISP0_DATA09   0xf9
+			MX6QDL_PAD_DISP0_DAT10__IPU2_DISP0_DATA10  0xf9
+			MX6QDL_PAD_DISP0_DAT11__IPU2_DISP0_DATA11  0xf9
+			MX6QDL_PAD_DISP0_DAT12__IPU2_DISP0_DATA12  0xf9
+			MX6QDL_PAD_DISP0_DAT13__IPU2_DISP0_DATA13  0xf9
+			MX6QDL_PAD_DISP0_DAT14__IPU2_DISP0_DATA14  0xf9
+			MX6QDL_PAD_DISP0_DAT15__IPU2_DISP0_DATA15  0xf9
+		>;
+	};
+
+	pinctrl_mmc_cd: gpiommccdgrp {
+		fsl,pins = <
+			 /* MMC1 CD */
+			MX6QDL_PAD_DI0_PIN4__GPIO4_IO20 0x000b0
+		>;
+	};
+
+	pinctrl_pwm1: pwm1grp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_9__PWM1_OUT 0x1b0b1
+		>;
+	};
+
+	pinctrl_pwm2: pwm2grp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_1__PWM2_OUT 0x1b0b1
+		>;
+	};
+
+	pinctrl_pwm3: pwm3grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD4_DAT1__PWM3_OUT 0x1b0b1
+		>;
+	};
+
+	pinctrl_pwm4: pwm4grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD4_DAT2__PWM4_OUT 0x1b0b1
+		>;
+	};
+
+	pinctrl_regulator_usbh_pwr: gpioregusbhpwrgrp {
+		fsl,pins = <
+			/* USBH_EN */
+			MX6QDL_PAD_GPIO_0__GPIO1_IO00 0x0f058
+		>;
+	};
+
+	pinctrl_regulator_usbhub_pwr: gpioregusbhubpwrgrp {
+		fsl,pins = <
+			/* USBH_HUB_EN */
+			MX6QDL_PAD_EIM_D28__GPIO3_IO28 0x0f058
+		>;
+	};
+
+	pinctrl_regulator_usbotg_pwr: gpioregusbotgpwrgrp {
+		fsl,pins = <
+			/* USBO1 power en */
+			MX6QDL_PAD_EIM_D22__GPIO3_IO22 0x0f058
+		>;
+	};
+
+	pinctrl_reset_moci: gpioresetmocigrp {
+		fsl,pins = <
+			/* RESET_MOCI control */
+			MX6QDL_PAD_ENET_TX_EN__GPIO1_IO28 0x0f058
+		>;
+	};
+
+	pinctrl_sd_cd: gpiosdcdgrp {
+		fsl,pins = <
+			/* SD1 CD */
+			MX6QDL_PAD_NANDF_CS1__GPIO6_IO14 0x000b0
+		>;
+	};
+
+	pinctrl_spdif: spdifgrp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_16__SPDIF_IN  0x1b0b0
+			MX6QDL_PAD_GPIO_17__SPDIF_OUT 0x1b0b0
+		>;
+	};
+
+	pinctrl_touch_int: gpiotouchintgrp {
+		fsl,pins = <
+			/* STMPE811 interrupt */
+			MX6QDL_PAD_KEY_COL2__GPIO4_IO10 0x1b0b0
+		>;
+	};
+
+	pinctrl_uart1_dce: uart1dcegrp {
+		fsl,pins = <
+			MX6QDL_PAD_CSI0_DAT10__UART1_TX_DATA 0x1b0b1
+			MX6QDL_PAD_CSI0_DAT11__UART1_RX_DATA 0x1b0b1
+		>;
+	};
+
+	/* DTE mode */
+	pinctrl_uart1_dte: uart1dtegrp {
+		fsl,pins = <
+			MX6QDL_PAD_CSI0_DAT10__UART1_RX_DATA 0x1b0b1
+			MX6QDL_PAD_CSI0_DAT11__UART1_TX_DATA 0x1b0b1
+			MX6QDL_PAD_EIM_D19__UART1_RTS_B 0x1b0b1
+			MX6QDL_PAD_EIM_D20__UART1_CTS_B 0x1b0b1
+		>;
+	};
+
+	/* Additional DTR, DSR, DCD */
+	pinctrl_uart1_ctrl: uart1ctrlgrp {
+		fsl,pins = <
+			MX6QDL_PAD_EIM_D23__UART1_DCD_B 0x1b0b0
+			MX6QDL_PAD_EIM_D24__UART1_DTR_B 0x1b0b0
+			MX6QDL_PAD_EIM_D25__UART1_DSR_B 0x1b0b0
+		>;
+	};
+
+	pinctrl_uart2_dce: uart2dcegrp {
+		fsl,pins = <
+			MX6QDL_PAD_SD4_DAT4__UART2_RX_DATA	0x1b0b1
+			MX6QDL_PAD_SD4_DAT7__UART2_TX_DATA	0x1b0b1
+		>;
+	};
+
+	/* DTE mode */
+	pinctrl_uart2_dte: uart2dtegrp {
+		fsl,pins = <
+			MX6QDL_PAD_SD4_DAT4__UART2_TX_DATA	0x1b0b1
+			MX6QDL_PAD_SD4_DAT7__UART2_RX_DATA	0x1b0b1
+			MX6QDL_PAD_SD4_DAT6__UART2_RTS_B	0x1b0b1
+			MX6QDL_PAD_SD4_DAT5__UART2_CTS_B	0x1b0b1
+		>;
+	};
+
+	pinctrl_uart4_dce: uart4dcegrp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_COL0__UART4_TX_DATA 0x1b0b1
+			MX6QDL_PAD_KEY_ROW0__UART4_RX_DATA 0x1b0b1
+		>;
+	};
+
+	/* DTE mode */
+	pinctrl_uart4_dte: uart4dtegrp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_COL0__UART4_RX_DATA 0x1b0b1
+			MX6QDL_PAD_KEY_ROW0__UART4_TX_DATA 0x1b0b1
+		>;
+	};
+
+	pinctrl_uart5_dce: uart5dcegrp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_COL1__UART5_TX_DATA 0x1b0b1
+			MX6QDL_PAD_KEY_ROW1__UART5_RX_DATA 0x1b0b1
+		>;
+	};
+
+	/* DTE mode */
+	pinctrl_uart5_dte: uart5dtegrp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_COL1__UART5_RX_DATA 0x1b0b1
+			MX6QDL_PAD_KEY_ROW1__UART5_TX_DATA 0x1b0b1
+		>;
+	};
+
+	pinctrl_usbotg: usbotggrp {
+		fsl,pins = <
+			MX6QDL_PAD_ENET_RX_ER__USB_OTG_ID 0x17059
+		>;
+	};
+
+	pinctrl_usdhc1: usdhc1grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD1_CMD__SD1_CMD    0x17071
+			MX6QDL_PAD_SD1_CLK__SD1_CLK    0x10071
+			MX6QDL_PAD_SD1_DAT0__SD1_DATA0 0x17071
+			MX6QDL_PAD_SD1_DAT1__SD1_DATA1 0x17071
+			MX6QDL_PAD_SD1_DAT2__SD1_DATA2 0x17071
+			MX6QDL_PAD_SD1_DAT3__SD1_DATA3 0x17071
+			MX6QDL_PAD_NANDF_D0__SD1_DATA4 0x17071
+			MX6QDL_PAD_NANDF_D1__SD1_DATA5 0x17071
+			MX6QDL_PAD_NANDF_D2__SD1_DATA6 0x17071
+			MX6QDL_PAD_NANDF_D3__SD1_DATA7 0x17071
+		>;
+	};
+
+	pinctrl_usdhc2: usdhc2grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD2_CMD__SD2_CMD    0x17071
+			MX6QDL_PAD_SD2_CLK__SD2_CLK    0x10071
+			MX6QDL_PAD_SD2_DAT0__SD2_DATA0 0x17071
+			MX6QDL_PAD_SD2_DAT1__SD2_DATA1 0x17071
+			MX6QDL_PAD_SD2_DAT2__SD2_DATA2 0x17071
+			MX6QDL_PAD_SD2_DAT3__SD2_DATA3 0x17071
+		>;
+	};
+
+	pinctrl_usdhc3: usdhc3grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD3_CMD__SD3_CMD    0x17059
+			MX6QDL_PAD_SD3_CLK__SD3_CLK    0x10059
+			MX6QDL_PAD_SD3_DAT0__SD3_DATA0 0x17059
+			MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x17059
+			MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x17059
+			MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x17059
+			MX6QDL_PAD_SD3_DAT4__SD3_DATA4 0x17059
+			MX6QDL_PAD_SD3_DAT5__SD3_DATA5 0x17059
+			MX6QDL_PAD_SD3_DAT6__SD3_DATA6 0x17059
+			MX6QDL_PAD_SD3_DAT7__SD3_DATA7 0x17059
+			/* eMMC reset */
+			MX6QDL_PAD_SD3_RST__SD3_RESET  0x17059
+		>;
+	};
+
+	pinctrl_usdhc3_100mhz: usdhc3100mhzgrp {
+		fsl,pins = <
+			MX6QDL_PAD_SD3_CMD__SD3_CMD    0x170b9
+			MX6QDL_PAD_SD3_CLK__SD3_CLK    0x100b9
+			MX6QDL_PAD_SD3_DAT0__SD3_DATA0 0x170b9
+			MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x170b9
+			MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x170b9
+			MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x170b9
+			MX6QDL_PAD_SD3_DAT4__SD3_DATA4 0x170b9
+			MX6QDL_PAD_SD3_DAT5__SD3_DATA5 0x170b9
+			MX6QDL_PAD_SD3_DAT6__SD3_DATA6 0x170b9
+			MX6QDL_PAD_SD3_DAT7__SD3_DATA7 0x170b9
+			/* eMMC reset */
+			MX6QDL_PAD_SD3_RST__SD3_RESET  0x170b9
+		>;
+	};
+
+	pinctrl_usdhc3_200mhz: usdhc3200mhzgrp {
+		fsl,pins = <
+			MX6QDL_PAD_SD3_CMD__SD3_CMD    0x170f9
+			MX6QDL_PAD_SD3_CLK__SD3_CLK    0x100f9
+			MX6QDL_PAD_SD3_DAT0__SD3_DATA0 0x170f9
+			MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x170f9
+			MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x170f9
+			MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x170f9
+			MX6QDL_PAD_SD3_DAT4__SD3_DATA4 0x170f9
+			MX6QDL_PAD_SD3_DAT5__SD3_DATA5 0x170f9
+			MX6QDL_PAD_SD3_DAT6__SD3_DATA6 0x170f9
+			MX6QDL_PAD_SD3_DAT7__SD3_DATA7 0x170f9
+			/* eMMC reset */
+			MX6QDL_PAD_SD3_RST__SD3_RESET  0x170f9
+		>;
+	};
+};

diff --git a/arch/arm/boot/dts/imx6qdl-apf6dev.dtsi b/arch/arm/boot/dts/imx6qdl-apf6dev.dtsi
index e26ebeb..a8f3500 100644
--- a/arch/arm/boot/dts/imx6qdl-apf6dev.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-apf6dev.dtsi

@@ -94,7 +94,7 @@
 			label = "User button";
 			gpios = <&gpio1 9 GPIO_ACTIVE_LOW>;
 			linux,code = <BTN_MISC>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 

diff --git a/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi
index 5cd16f2..9d7ab6c 100644
--- a/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi

@@ -320,13 +320,13 @@
 
 		pinctrl_pwm3: pwm3grp {
 			fsl,pins = <
-				MX6QDL_PAD_SD4_DAT1__PWM3_OUT		0x1b0b1
+				MX6QDL_PAD_SD1_DAT1__PWM3_OUT		0x1b0b1
 			>;
 		};
 
 		pinctrl_pwm4: pwm4grp {
 			fsl,pins = <
-				MX6QDL_PAD_SD4_DAT2__PWM4_OUT		0x1b0b1
+				MX6QDL_PAD_SD1_CMD__PWM4_OUT		0x1b0b1
 			>;
 		};
 

diff --git a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi
index 9fa8a10..8dd74e9 100644
--- a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi

@@ -473,7 +473,7 @@
 
 		pinctrl_pwm3: pwm3grp {
 			fsl,pins = <
-				MX6QDL_PAD_SD4_DAT1__PWM3_OUT		0x1b0b1
+				MX6QDL_PAD_SD1_DAT1__PWM3_OUT		0x1b0b1
 			>;
 		};
 

diff --git a/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi
index e8375e1..ec3fe74 100644
--- a/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi

@@ -462,7 +462,7 @@
 
 		pinctrl_pwm3: pwm3grp {
 			fsl,pins = <
-				MX6QDL_PAD_SD4_DAT1__PWM3_OUT		0x1b0b1
+				MX6QDL_PAD_SD1_DAT1__PWM3_OUT		0x1b0b1
 			>;
 		};
 

diff --git a/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi
index 66983dc..367cc49 100644
--- a/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi

@@ -397,8 +397,9 @@
 };
 
 &pwm4 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_pwm4>;
+	pinctrl-names = "default", "state_dio";
+	pinctrl-0 = <&pinctrl_pwm4_backlight>;
+	pinctrl-1 = <&pinctrl_pwm4_dio>;
 	status = "okay";
 };
 
@@ -573,12 +574,20 @@
 			>;
 		};
 
-		pinctrl_pwm4: pwm4grp {
+		pinctrl_pwm4_backlight: pwm4grpbacklight {
 			fsl,pins = <
+				/* LVDS_PWM J6.5 */
 				MX6QDL_PAD_SD1_CMD__PWM4_OUT		0x1b0b1
 			>;
 		};
 
+		pinctrl_pwm4_dio: pwm4grpdio {
+			fsl,pins = <
+				/* DIO3 J16.4 */
+				MX6QDL_PAD_SD4_DAT2__PWM4_OUT		0x1b0b1
+			>;
+		};
+
 		pinctrl_uart1: uart1grp {
 			fsl,pins = <
 				MX6QDL_PAD_SD3_DAT7__UART1_TX_DATA	0x1b0b1

diff --git a/arch/arm/boot/dts/imx6qdl-gw552x.dtsi b/arch/arm/boot/dts/imx6qdl-gw552x.dtsi
index cca39f1..f27f184 100644
--- a/arch/arm/boot/dts/imx6qdl-gw552x.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-gw552x.dtsi

@@ -262,7 +262,7 @@
 
 		pinctrl_pwm3: pwm3grp {
 			fsl,pins = <
-				MX6QDL_PAD_SD4_DAT1__PWM3_OUT		0x1b0b1
+				MX6QDL_PAD_SD1_DAT1__PWM3_OUT		0x1b0b1
 			>;
 		};
 

diff --git a/arch/arm/boot/dts/imx6qdl-hummingboard.dtsi b/arch/arm/boot/dts/imx6qdl-hummingboard.dtsi
index 6dd0b76..d6c2358 100644
--- a/arch/arm/boot/dts/imx6qdl-hummingboard.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-hummingboard.dtsi

@@ -48,7 +48,7 @@
 
 	ir_recv: ir-receiver {
 		compatible = "gpio-ir-receiver";
-		gpios = <&gpio3 5 1>;
+		gpios = <&gpio3 5 GPIO_ACTIVE_LOW>;
 		pinctrl-names = "default";
 		pinctrl-0 = <&pinctrl_hummingboard_gpio3_5>;
 	};
@@ -67,7 +67,7 @@
 		reg_usbh1_vbus: usb-h1-vbus {
 			compatible = "regulator-fixed";
 			enable-active-high;
-			gpio = <&gpio1 0 0>;
+			gpio = <&gpio1 0 GPIO_ACTIVE_HIGH>;
 			pinctrl-names = "default";
 			pinctrl-0 = <&pinctrl_hummingboard_usbh1_vbus>;
 			regulator-name = "usb_h1_vbus";
@@ -78,7 +78,7 @@
 		reg_usbotg_vbus: usb-otg-vbus {
 			compatible = "regulator-fixed";
 			enable-active-high;
-			gpio = <&gpio3 22 0>;
+			gpio = <&gpio3 22 GPIO_ACTIVE_HIGH>;
 			pinctrl-names = "default";
 			pinctrl-0 = <&pinctrl_hummingboard_usbotg_vbus>;
 			regulator-name = "usb_otg_vbus";
@@ -253,7 +253,7 @@
 &pcie {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_hummingboard_pcie_reset>;
-	reset-gpio = <&gpio3 4 0>;
+	reset-gpio = <&gpio3 4 GPIO_ACTIVE_LOW>;
 	status = "okay";
 };
 

diff --git a/arch/arm/boot/dts/imx6qdl-icore-rqs.dtsi b/arch/arm/boot/dts/imx6qdl-icore-rqs.dtsi
new file mode 100644
index 0000000..f8d945a
--- /dev/null
+++ b/arch/arm/boot/dts/imx6qdl-icore-rqs.dtsi

@@ -0,0 +1,411 @@
+/*
+ * Copyright (C) 2015 Amarula Solutions B.V.
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     version 2 as published by the Free Software Foundation.
+ *
+ *     This file is distributed in the hope that it will be useful
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/clock/imx6qdl-clock.h>
+
+/ {
+	memory {
+		reg = <0x10000000 0x80000000>;
+	};
+
+	reg_1p8v: regulator-1p8v {
+		compatible = "regulator-fixed";
+		regulator-name = "1P8V";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+		regulator-boot-on;
+		regulator-always-on;
+	};
+
+	reg_2p5v: regulator-2p5v {
+		compatible = "regulator-fixed";
+		regulator-name = "2P5V";
+		regulator-min-microvolt = <2500000>;
+		regulator-max-microvolt = <2500000>;
+		regulator-boot-on;
+		regulator-always-on;
+	};
+
+	reg_3p3v: regulator-3p3v {
+		compatible = "regulator-fixed";
+		regulator-name = "3P3V";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-boot-on;
+		regulator-always-on;
+	};
+
+	reg_sd3_vmmc: regulator-sd3-vmmc {
+		compatible = "regulator-fixed";
+		regulator-name = "P3V3_SD3_SWITCHED";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		gpio = <&gpio1 4 GPIO_ACTIVE_LOW>;
+		enable-active-high;
+	};
+
+	reg_sd4_vmmc: regulator-sd4-vmmc {
+		compatible = "regulator-fixed";
+		regulator-name = "P3V3_SD4_SWITCHED";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-boot-on;
+		regulator-always-on;
+	};
+
+	reg_usb_h1_vbus: regulator-usb-h1-vbus {
+		compatible = "regulator-fixed";
+		regulator-name = "usb_h1_vbus";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		regulator-boot-on;
+		regulator-always-on;
+	};
+
+	reg_usb_otg_vbus: regulator-usb-otg-vbus {
+		compatible = "regulator-fixed";
+		regulator-name = "usb_otg_vbus";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		regulator-boot-on;
+		regulator-always-on;
+	};
+
+	usb_hub: usb-hub {
+		compatible = "smsc,usb3503a";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_usbhub>;
+		reset-gpios = <&gpio1 6 GPIO_ACTIVE_LOW>;
+		clocks = <&clks IMX6QDL_CLK_LVDS2_GATE>;
+		clock-names = "refclk";
+	};
+};
+
+&clks {
+	assigned-clocks = <&clks IMX6QDL_CLK_LVDS2_SEL>;
+	assigned-clock-parents = <&clks IMX6QDL_CLK_OSC>;
+};
+
+&audmux {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_audmux>;
+	status = "okay";
+};
+
+&fec {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_enet>;
+	phy-handle = <&eth_phy>;
+	phy-mode = "rgmii";
+	status = "okay";
+
+	mdio {
+		eth_phy: ethernet-phy {
+			rxc-skew-ps = <1140>;
+			txc-skew-ps = <1140>;
+			txen-skew-ps = <600>;
+			rxdv-skew-ps = <240>;
+			rxd0-skew-ps = <420>;
+			rxd1-skew-ps = <600>;
+			rxd2-skew-ps = <420>;
+			rxd3-skew-ps = <240>;
+			txd0-skew-ps = <60>;
+			txd1-skew-ps = <60>;
+			txd2-skew-ps = <60>;
+			txd3-skew-ps = <240>;
+		};
+	};
+};
+
+&i2c1 {
+	clock-frequency = <100000>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c1>;
+	status = "okay";
+};
+
+&i2c2 {
+	clock-frequency = <100000>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c2>;
+	status = "okay";
+};
+
+&i2c3 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c3>;
+	status = "okay";
+};
+
+&pcie {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_pcie>;
+	reset-gpio = <&gpio3 29 GPIO_ACTIVE_LOW>;
+	status = "okay";
+};
+
+&ssi1 {
+	fsl,mode = "i2s-slave";
+	status = "okay";
+};
+
+&uart4 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_uart4>;
+	status = "okay";
+};
+
+&usbh1 {
+	vbus-supply = <&reg_usb_h1_vbus>;
+	disable-over-current;
+	clocks = <&clks IMX6QDL_CLK_USBOH3>;
+	status = "okay";
+};
+
+&usbotg {
+	vbus-supply = <&reg_usb_otg_vbus>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usbotg>;
+	disable-over-current;
+	status = "okay";
+};
+
+&usdhc1 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usdhc1>;
+	no-1-8-v;
+	status = "okay";
+};
+
+&usdhc3 {
+	pinctrl-names = "default", "state_100mhz", "state_200mhz";
+	pinctrl-0 = <&pinctrl_usdhc3>;
+	pinctrl-1 = <&pinctrl_usdhc3_100mhz>;
+	pinctrl-2 = <&pinctrl_usdhc3_200mhz>;
+	vmcc-supply = <&reg_sd3_vmmc>;
+	cd-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>;
+	bus-witdh=<4>;
+	no-1-8-v;
+	status = "okay";
+};
+
+&usdhc4 {
+	pinctrl-names = "default", "state_100mhz", "state_200mhz";
+	pinctrl-0 = <&pinctrl_usdhc4>;
+	pinctrl-1 = <&pinctrl_usdhc4_100mhz>;
+	pinctrl-2 = <&pinctrl_usdhc4_200mhz>;
+	vmcc-supply = <&reg_sd4_vmmc>;
+	bus-witdh=<8>;
+	no-1-8-v;
+	non-removable;
+	status = "okay";
+};
+
+&iomuxc {
+	pinctrl_audmux: audmux {
+		fsl,pins = <
+			MX6QDL_PAD_DISP0_DAT20__AUD4_TXC  0x130b0
+			MX6QDL_PAD_DISP0_DAT21__AUD4_TXD  0x110b0
+			MX6QDL_PAD_DISP0_DAT22__AUD4_TXFS 0x130b0
+			MX6QDL_PAD_DISP0_DAT23__AUD4_RXD  0x130b0
+		>;
+	};
+
+	pinctrl_enet: enetgrp {
+		fsl,pins = <
+			MX6QDL_PAD_ENET_MDIO__ENET_MDIO       0x1b0b0
+			MX6QDL_PAD_ENET_MDC__ENET_MDC         0x1b0b0
+			MX6QDL_PAD_RGMII_TXC__RGMII_TXC       0x1b0b0
+			MX6QDL_PAD_RGMII_TD0__RGMII_TD0       0x1b0b0
+			MX6QDL_PAD_RGMII_TD1__RGMII_TD1       0x1b0b0
+			MX6QDL_PAD_RGMII_TD2__RGMII_TD2       0x1b0b0
+			MX6QDL_PAD_RGMII_TD3__RGMII_TD3       0x1b0b0
+			MX6QDL_PAD_RGMII_TX_CTL__RGMII_TX_CTL 0x1b0b0
+			MX6QDL_PAD_ENET_REF_CLK__ENET_TX_CLK  0x1b0b0
+			MX6QDL_PAD_RGMII_RXC__RGMII_RXC       0x1b0b0
+			MX6QDL_PAD_RGMII_RD0__RGMII_RD0       0x1b0b0
+			MX6QDL_PAD_RGMII_RD1__RGMII_RD1       0x1b0b0
+			MX6QDL_PAD_RGMII_RD2__RGMII_RD2       0x1b0b0
+			MX6QDL_PAD_RGMII_RD3__RGMII_RD3       0x1b0b0
+			MX6QDL_PAD_RGMII_RX_CTL__RGMII_RX_CTL 0x1b0b0
+			MX6QDL_PAD_ENET_TX_EN__ENET_TX_EN     0x1b0b0
+		>;
+	};
+
+	pinctrl_i2c1: i2c1grp {
+		fsl,pins = <
+			MX6QDL_PAD_EIM_D21__I2C1_SCL 0x4001b8b1
+			MX6QDL_PAD_EIM_D28__I2C1_SDA 0x4001b8b1
+		>;
+	};
+
+	pinctrl_i2c2: i2c2grp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_COL3__I2C2_SCL 0x4001b8b1
+			MX6QDL_PAD_KEY_ROW3__I2C2_SDA 0x4001b8b1
+		>;
+	};
+
+	pinctrl_i2c3: i2c3grp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_5__I2C3_SCL  0x4001b8b1
+			MX6QDL_PAD_EIM_D18__I2C3_SDA 0x4001b8b1
+			MX6QDL_PAD_GPIO_0__CCM_CLKO1    0x130b0
+		>;
+	};
+
+	pinctrl_pcie: pciegrp {
+		fsl,pins = <
+			MX6QDL_PAD_EIM_D29__GPIO3_IO29 0x1f059	/* PCIe Reset */
+		>;
+	};
+
+	pinctrl_uart4: uart4grp {
+		fsl,pins = <
+			MX6QDL_PAD_KEY_COL0__UART4_TX_DATA 0x1b0b1
+			MX6QDL_PAD_KEY_ROW0__UART4_RX_DATA 0x1b0b1
+		>;
+	};
+
+	pinctrl_usbhub: usbhubgrp {
+		fsl,pins = <
+			MX6QDL_PAD_GPIO_6__GPIO1_IO06  0x1f059	/* HUB USB Reset */
+		>;
+	};
+
+	pinctrl_usbotg: usbotggrp {
+		fsl,pins = <
+			MX6QDL_PAD_ENET_RX_ER__USB_OTG_ID 0x17059
+		>;
+	};
+
+	pinctrl_usdhc1: usdhc1grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD1_CMD__SD1_CMD    0x17071
+			MX6QDL_PAD_SD1_CLK__SD1_CLK    0x10071
+			MX6QDL_PAD_SD1_DAT0__SD1_DATA0 0x17071
+			MX6QDL_PAD_SD1_DAT1__SD1_DATA1 0x17071
+			MX6QDL_PAD_SD1_DAT2__SD1_DATA2 0x17071
+			MX6QDL_PAD_SD1_DAT3__SD1_DATA3 0x17071
+		>;
+	};
+
+	pinctrl_usdhc3: usdhc3grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD3_CMD__SD3_CMD    0x17070
+			MX6QDL_PAD_SD3_CLK__SD3_CLK    0x10070
+			MX6QDL_PAD_SD3_DAT0__SD3_DATA0 0x17070
+			MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x17070
+			MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x17070
+			MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x17070
+			MX6QDL_PAD_GPIO_1__GPIO1_IO01  0x1f059	/* CD */
+			MX6QDL_PAD_GPIO_4__GPIO1_IO04  0x1f059	/* PWR */
+		>;
+	};
+
+	pinctrl_usdhc3_100mhz: usdhc3grp_100mhz {
+		fsl,pins = <
+			MX6QDL_PAD_SD3_CMD__SD3_CMD    0x170B1
+			MX6QDL_PAD_SD3_CLK__SD3_CLK    0x100B1
+			MX6QDL_PAD_SD3_DAT0__SD3_DATA0 0x170B1
+			MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x170B1
+			MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x170B1
+			MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x170B1
+		>;
+	};
+
+	pinctrl_usdhc3_200mhz: usdhc3grp_200mhz {
+		fsl,pins = <
+			MX6QDL_PAD_SD3_CMD__SD3_CMD    0x170F9
+			MX6QDL_PAD_SD3_CLK__SD3_CLK    0x100F9
+			MX6QDL_PAD_SD3_DAT0__SD3_DATA0 0x170F9
+			MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x170F9
+			MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x170F9
+			MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x170F9
+		>;
+	};
+
+	pinctrl_usdhc4: usdhc4grp {
+		fsl,pins = <
+			MX6QDL_PAD_SD4_CMD__SD4_CMD    0x17070
+			MX6QDL_PAD_SD4_CLK__SD4_CLK    0x10070
+			MX6QDL_PAD_SD4_DAT0__SD4_DATA0 0x17070
+			MX6QDL_PAD_SD4_DAT1__SD4_DATA1 0x17070
+			MX6QDL_PAD_SD4_DAT2__SD4_DATA2 0x17070
+			MX6QDL_PAD_SD4_DAT3__SD4_DATA3 0x17070
+			MX6QDL_PAD_SD4_DAT4__SD4_DATA4 0x17070
+			MX6QDL_PAD_SD4_DAT5__SD4_DATA5 0x17070
+			MX6QDL_PAD_SD4_DAT6__SD4_DATA6 0x17070
+			MX6QDL_PAD_SD4_DAT7__SD4_DATA7 0x17070
+		>;
+	};
+
+	pinctrl_usdhc4_100mhz: usdhc4grp_100mhz {
+		fsl,pins = <
+			MX6QDL_PAD_SD4_CMD__SD4_CMD    0x170B1
+			MX6QDL_PAD_SD4_CLK__SD4_CLK    0x100B1
+			MX6QDL_PAD_SD4_DAT0__SD4_DATA0 0x170B1
+			MX6QDL_PAD_SD4_DAT1__SD4_DATA1 0x170B1
+			MX6QDL_PAD_SD4_DAT2__SD4_DATA2 0x170B1
+			MX6QDL_PAD_SD4_DAT3__SD4_DATA3 0x170B1
+			MX6QDL_PAD_SD4_DAT4__SD4_DATA4 0x170B1
+			MX6QDL_PAD_SD4_DAT5__SD4_DATA5 0x170B1
+			MX6QDL_PAD_SD4_DAT6__SD4_DATA6 0x170B1
+			MX6QDL_PAD_SD4_DAT7__SD4_DATA7 0x170B1
+		>;
+	};
+
+	pinctrl_usdhc4_200mhz: usdhc4grp_200mhz {
+		fsl,pins = <
+			MX6QDL_PAD_SD4_CMD__SD4_CMD    0x170F9
+			MX6QDL_PAD_SD4_CLK__SD4_CLK    0x100F9
+			MX6QDL_PAD_SD4_DAT0__SD4_DATA0 0x170F9
+			MX6QDL_PAD_SD4_DAT1__SD4_DATA1 0x170F9
+			MX6QDL_PAD_SD4_DAT2__SD4_DATA2 0x170F9
+			MX6QDL_PAD_SD4_DAT3__SD4_DATA3 0x170F9
+			MX6QDL_PAD_SD4_DAT4__SD4_DATA4 0x170F9
+			MX6QDL_PAD_SD4_DAT5__SD4_DATA5 0x170F9
+			MX6QDL_PAD_SD4_DAT6__SD4_DATA6 0x170F9
+			MX6QDL_PAD_SD4_DAT7__SD4_DATA7 0x170F9
+		>;
+	};
+};

diff --git a/arch/arm/boot/dts/imx6qdl-microsom.dtsi b/arch/arm/boot/dts/imx6qdl-microsom.dtsi
index 6d4069c..86460e4 100644
--- a/arch/arm/boot/dts/imx6qdl-microsom.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-microsom.dtsi

@@ -154,6 +154,7 @@
 	bus-width = <4>;
 	mmc-pwrseq = <&usdhc1_pwrseq>;
 	keep-power-in-suspend;
+	no-1-8-v;
 	non-removable;
 	vmmc-supply = <&reg_brcm>;
 	status = "okay";

diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi
index a35d54f..dc74aa3 100644
--- a/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi

@@ -138,7 +138,7 @@
 			label = "Power Button";
 			gpios = <&gpio2 3 GPIO_ACTIVE_LOW>;
 			linux,code = <KEY_POWER>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		menu {

diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi
index caeed56..c6c590d 100644
--- a/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi

@@ -119,7 +119,7 @@
 			label = "Power Button";
 			gpios = <&gpio2 3 GPIO_ACTIVE_LOW>;
 			linux,code = <KEY_POWER>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		menu {

diff --git a/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi b/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi
index 1a69a34..0f1aca4 100644
--- a/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sabrelite.dtsi

@@ -105,7 +105,7 @@
 			label = "Power Button";
 			gpios = <&gpio2 3 GPIO_ACTIVE_LOW>;
 			linux,code = <KEY_POWER>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		menu {

diff --git a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
index a6d445c..0b5c4de 100644
--- a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi

@@ -79,21 +79,21 @@
 		power {
 			label = "Power Button";
 			gpios = <&gpio3 29 GPIO_ACTIVE_LOW>;
-			gpio-key,wakeup;
+			wakeup-source;
 			linux,code = <KEY_POWER>;
 		};
 
 		volume-up {
 			label = "Volume Up";
 			gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
-			gpio-key,wakeup;
+			wakeup-source;
 			linux,code = <KEY_VOLUMEUP>;
 		};
 
 		volume-down {
 			label = "Volume Down";
 			gpios = <&gpio1 5 GPIO_ACTIVE_LOW>;
-			gpio-key,wakeup;
+			wakeup-source;
 			linux,code = <KEY_VOLUMEDOWN>;
 		};
 	};
@@ -238,6 +238,7 @@
 				regulator-max-microvolt = <3300000>;
 				regulator-boot-on;
 				regulator-always-on;
+				regulator-ramp-delay = <6250>;
 			};
 
 			sw3a_reg: sw3a {

diff --git a/arch/arm/boot/dts/imx6qdl-tx6.dtsi b/arch/arm/boot/dts/imx6qdl-tx6.dtsi
index 13cb7cc..efd06b5 100644
--- a/arch/arm/boot/dts/imx6qdl-tx6.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-tx6.dtsi

@@ -41,7 +41,7 @@
 			compatible = "fixed-clock";
 			reg = <0>;
 			#clock-cells = <0>;
-			clock-frequency = <27000000>;
+			clock-frequency = <26000000>;
 		};
 	};
 
@@ -52,7 +52,7 @@
 			label = "Power Button";
 			gpios = <&gpio5 2 GPIO_ACTIVE_HIGH>;
 			linux,code = <KEY_POWER>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 
@@ -227,6 +227,11 @@
 &fec {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_enet>;
+	clocks = <&clks IMX6QDL_CLK_ENET>,
+		 <&clks IMX6QDL_CLK_ENET>,
+		 <&clks IMX6QDL_CLK_ENET_REF>,
+		 <&clks IMX6QDL_CLK_ENET_REF>;
+	clock-names = "ipg", "ahb", "ptp", "enet_out";
 	phy-mode = "rmii";
 	phy-reset-gpios = <&gpio7 6 GPIO_ACTIVE_HIGH>;
 	phy-supply = <&reg_3v3_etn>;
@@ -276,7 +281,7 @@
 		interrupts = <15 IRQ_TYPE_EDGE_FALLING>;
 		reset-gpios = <&gpio2 22 GPIO_ACTIVE_LOW>;
 		wake-gpios = <&gpio2 21 GPIO_ACTIVE_HIGH>;
-		linux,wakeup;
+		wakeup-source;
 	};
 
 	touchscreen: tsc2007@48 {
@@ -288,7 +293,7 @@
 		interrupts = <26 0>;
 		gpios = <&gpio3 26 GPIO_ACTIVE_LOW>;
 		ti,x-plate-ohms = <660>;
-		linux,wakeup;
+		wakeup-source;
 	};
 };
 

diff --git a/arch/arm/boot/dts/imx6qdl-udoo.dtsi b/arch/arm/boot/dts/imx6qdl-udoo.dtsi
index 1211da8..d3e54e4 100644
--- a/arch/arm/boot/dts/imx6qdl-udoo.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-udoo.dtsi

@@ -34,6 +34,18 @@
 			gpio = <&gpio7 12 0>;
 		};
 	};
+
+	sound {
+		compatible = "fsl,imx6q-udoo-ac97",
+			     "fsl,imx-audio-ac97";
+		model = "fsl,imx6q-udoo-ac97";
+		audio-cpu = <&ssi1>;
+		audio-routing =
+			"RX", "Mic Jack",
+			"Headphone Jack", "TX";
+		mux-int-port = <1>;
+		mux-ext-port = <6>;
+	};
 };
 
 &fec {
@@ -109,6 +121,36 @@
 				MX6QDL_PAD_SD3_DAT3__SD3_DATA3		0x17059
 			>;
 		};
+
+		pinctrl_ac97_running: ac97running {
+			fsl,pins = <
+				MX6QDL_PAD_DI0_PIN2__AUD6_TXD		0x1b0b0
+				MX6QDL_PAD_DI0_PIN3__AUD6_TXFS		0x1b0b0
+				MX6QDL_PAD_DI0_PIN4__AUD6_RXD		0x1b0b0
+				MX6QDL_PAD_DI0_PIN15__AUD6_TXC		0x1b0b0
+				MX6QDL_PAD_EIM_EB2__GPIO2_IO30		0x1b0b0
+			>;
+		};
+
+		pinctrl_ac97_warm_reset: ac97warmreset {
+			fsl,pins = <
+				MX6QDL_PAD_DI0_PIN2__AUD6_TXD		0x1b0b0
+				MX6QDL_PAD_DI0_PIN3__GPIO4_IO19		0x1b0b0
+				MX6QDL_PAD_DI0_PIN4__AUD6_RXD		0x1b0b0
+				MX6QDL_PAD_DI0_PIN15__AUD6_TXC		0x1b0b0
+				MX6QDL_PAD_EIM_EB2__GPIO2_IO30		0x1b0b0
+			>;
+		};
+
+		pinctrl_ac97_reset: ac97reset {
+			fsl,pins = <
+				MX6QDL_PAD_DI0_PIN2__GPIO4_IO18		0x1b0b0
+				MX6QDL_PAD_DI0_PIN3__GPIO4_IO19		0x1b0b0
+				MX6QDL_PAD_DI0_PIN4__AUD6_RXD		0x1b0b0
+				MX6QDL_PAD_DI0_PIN15__AUD6_TXC		0x1b0b0
+				MX6QDL_PAD_EIM_EB2__GPIO2_IO30		0x1b0b0
+			>;
+		};
 	};
 };
 
@@ -132,3 +174,18 @@
 	non-removable;
 	status = "okay";
 };
+
+&audmux {
+	status = "okay";
+};
+
+&ssi1 {
+	cell-index = <0>;
+	fsl,mode = "ac97-slave";
+	pinctrl-names = "ac97-running", "ac97-reset", "ac97-warm-reset";
+	pinctrl-0 = <&pinctrl_ac97_running>;
+	pinctrl-1 = <&pinctrl_ac97_reset>;
+	pinctrl-2 = <&pinctrl_ac97_warm_reset>;
+	ac97-gpios = <&gpio4 19 0 &gpio4 18 0 &gpio2 30 0>;
+	status = "okay";
+};

diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi
index f74d3db..b42822a 100644
--- a/arch/arm/boot/dts/imx6qdl.dtsi
+++ b/arch/arm/boot/dts/imx6qdl.dtsi

@@ -261,7 +261,7 @@
 					clocks = <&clks IMX6QDL_CLK_ECSPI1>,
 						 <&clks IMX6QDL_CLK_ECSPI1>;
 					clock-names = "ipg", "per";
-					dmas = <&sdma 3 7 1>, <&sdma 4 7 2>;
+					dmas = <&sdma 3 8 1>, <&sdma 4 8 2>;
 					dma-names = "rx", "tx";
 					status = "disabled";
 				};
@@ -275,7 +275,7 @@
 					clocks = <&clks IMX6QDL_CLK_ECSPI2>,
 						 <&clks IMX6QDL_CLK_ECSPI2>;
 					clock-names = "ipg", "per";
-					dmas = <&sdma 5 7 1>, <&sdma 6 7 2>;
+					dmas = <&sdma 5 8 1>, <&sdma 6 8 2>;
 					dma-names = "rx", "tx";
 					status = "disabled";
 				};
@@ -289,7 +289,7 @@
 					clocks = <&clks IMX6QDL_CLK_ECSPI3>,
 						 <&clks IMX6QDL_CLK_ECSPI3>;
 					clock-names = "ipg", "per";
-					dmas = <&sdma 7 7 1>, <&sdma 8 7 2>;
+					dmas = <&sdma 7 8 1>, <&sdma 8 8 2>;
 					dma-names = "rx", "tx";
 					status = "disabled";
 				};
@@ -303,7 +303,7 @@
 					clocks = <&clks IMX6QDL_CLK_ECSPI4>,
 						 <&clks IMX6QDL_CLK_ECSPI4>;
 					clock-names = "ipg", "per";
-					dmas = <&sdma 9 7 1>, <&sdma 10 7 2>;
+					dmas = <&sdma 9 8 1>, <&sdma 10 8 2>;
 					dma-names = "rx", "tx";
 					status = "disabled";
 				};

diff --git a/arch/arm/boot/dts/imx6qp-sabreauto.dts b/arch/arm/boot/dts/imx6qp-sabreauto.dts
new file mode 100644
index 0000000..5ce3840
--- /dev/null
+++ b/arch/arm/boot/dts/imx6qp-sabreauto.dts

@@ -0,0 +1,93 @@
+/*
+ * Copyright 2016 Freescale Semiconductor, Inc.
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This file is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/dts-v1/;
+
+#include "imx6qp.dtsi"
+#include "imx6qdl-sabreauto.dtsi"
+
+/ {
+	model = "Freescale i.MX6 Quad Plus SABRE Automotive Board";
+	compatible = "fsl,imx6qp-sabreauto", "fsl,imx6qp";
+};
+
+&i2c2 {
+	max7322: gpio@68 {
+		compatible = "maxim,max7322";
+		reg = <0x68>;
+		gpio-controller;
+		#gpio-cells = <2>;
+	};
+};
+
+&iomuxc {
+	imx6qdl-sabreauto {
+		pinctrl_enet: enetgrp {
+			fsl,pins = <
+				MX6QDL_PAD_KEY_COL1__ENET_MDIO          0x1b0b0
+				MX6QDL_PAD_KEY_COL2__ENET_MDC           0x1b0b0
+				MX6QDL_PAD_RGMII_TXC__RGMII_TXC         0x1b018
+				MX6QDL_PAD_RGMII_TD0__RGMII_TD0         0x1b018
+				MX6QDL_PAD_RGMII_TD1__RGMII_TD1         0x1b018
+				MX6QDL_PAD_RGMII_TD2__RGMII_TD2         0x1b018
+				MX6QDL_PAD_RGMII_TD3__RGMII_TD3         0x1b018
+				MX6QDL_PAD_RGMII_TX_CTL__RGMII_TX_CTL   0x1b018
+				MX6QDL_PAD_RGMII_RXC__RGMII_RXC         0x1b018
+				MX6QDL_PAD_RGMII_RD0__RGMII_RD0         0x1b018
+				MX6QDL_PAD_RGMII_RD1__RGMII_RD1         0x1b018
+				MX6QDL_PAD_RGMII_RD2__RGMII_RD2         0x1b018
+				MX6QDL_PAD_RGMII_RD3__RGMII_RD3         0x1b018
+				MX6QDL_PAD_RGMII_RX_CTL__RGMII_RX_CTL   0x1b018
+				MX6QDL_PAD_GPIO_16__ENET_REF_CLK        0x4001b0a8
+				MX6QDL_PAD_GPIO_6__ENET_IRQ		0x000b1
+			>;
+		};
+	};
+};
+
+&pcie {
+	status = "disabled";
+};
+
+&vgen3_reg {
+	regulator-always-on;
+};

diff --git a/arch/arm/boot/dts/imx6qp-sabresd.dts b/arch/arm/boot/dts/imx6qp-sabresd.dts
new file mode 100644
index 0000000..b234580
--- /dev/null
+++ b/arch/arm/boot/dts/imx6qp-sabresd.dts

@@ -0,0 +1,93 @@
+/*
+ * Copyright 2016 Freescale Semiconductor, Inc.
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This file is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/dts-v1/;
+
+#include "imx6qp.dtsi"
+#include "imx6qdl-sabresd.dtsi"
+
+/ {
+	model = "Freescale i.MX6 Quad Plus SABRE Smart Device Board";
+	compatible = "fsl,imx6qp-sabresd", "fsl,imx6qp";
+};
+
+&cpu0 {
+	arm-supply = <&sw2_reg>;
+};
+
+&iomuxc {
+	imx6qdl-sabresd {
+		pinctrl_usdhc2: usdhc2grp {
+			fsl,pins = <
+				MX6QDL_PAD_SD2_CMD__SD2_CMD		0x17059
+				MX6QDL_PAD_SD2_CLK__SD2_CLK		0x10071
+				MX6QDL_PAD_SD2_DAT0__SD2_DATA0		0x17059
+				MX6QDL_PAD_SD2_DAT1__SD2_DATA1		0x17059
+				MX6QDL_PAD_SD2_DAT2__SD2_DATA2		0x17059
+				MX6QDL_PAD_SD2_DAT3__SD2_DATA3		0x17059
+				MX6QDL_PAD_NANDF_D4__SD2_DATA4		0x17059
+				MX6QDL_PAD_NANDF_D5__SD2_DATA5		0x17059
+				MX6QDL_PAD_NANDF_D6__SD2_DATA6		0x17059
+				MX6QDL_PAD_NANDF_D7__SD2_DATA7		0x17059
+			>;
+		};
+
+		pinctrl_usdhc3: usdhc3grp {
+			fsl,pins = <
+				MX6QDL_PAD_SD3_CMD__SD3_CMD		0x17059
+				MX6QDL_PAD_SD3_CLK__SD3_CLK		0x10071
+				MX6QDL_PAD_SD3_DAT0__SD3_DATA0		0x17059
+				MX6QDL_PAD_SD3_DAT1__SD3_DATA1		0x17059
+				MX6QDL_PAD_SD3_DAT2__SD3_DATA2		0x17059
+				MX6QDL_PAD_SD3_DAT3__SD3_DATA3		0x17059
+				MX6QDL_PAD_SD3_DAT4__SD3_DATA4		0x17059
+				MX6QDL_PAD_SD3_DAT5__SD3_DATA5		0x17059
+				MX6QDL_PAD_SD3_DAT6__SD3_DATA6		0x17059
+				MX6QDL_PAD_SD3_DAT7__SD3_DATA7		0x17059
+			>;
+		};
+	};
+};
+
+&pcie {
+	status = "disabled";
+};

diff --git a/arch/arm/boot/dts/imx6qp.dtsi b/arch/arm/boot/dts/imx6qp.dtsi
new file mode 100644
index 0000000..1ada714
--- /dev/null
+++ b/arch/arm/boot/dts/imx6qp.dtsi

@@ -0,0 +1,86 @@
+/*
+ * Copyright 2016 Freescale Semiconductor, Inc.
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This file is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "imx6q.dtsi"
+
+/ {
+	soc {
+		ocram2: sram@00940000 {
+			compatible = "mmio-sram";
+			reg = <0x00940000 0x20000>;
+			clocks = <&clks IMX6QDL_CLK_OCRAM>;
+		};
+
+		ocram3: sram@00960000 {
+			compatible = "mmio-sram";
+			reg = <0x00960000 0x20000>;
+			clocks = <&clks IMX6QDL_CLK_OCRAM>;
+		};
+
+		ipu1: ipu@02400000 {
+			compatible = "fsl,imx6qp-ipu", "fsl,imx6q-ipu";
+			clocks = <&clks IMX6QDL_CLK_IPU1>,
+				 <&clks IMX6QDL_CLK_IPU1_DI0>, <&clks IMX6QDL_CLK_IPU1_DI1>,
+				 <&clks IMX6QDL_CLK_IPU1_DI0_SEL>, <&clks IMX6QDL_CLK_IPU1_DI1_SEL>,
+				 <&clks IMX6QDL_CLK_LDB_DI0_PODF>, <&clks IMX6QDL_CLK_LDB_DI1_PODF>,
+				 <&clks IMX6QDL_CLK_PRG0_APB>;
+			clock-names = "bus",
+				      "di0", "di1",
+				      "di0_sel", "di1_sel",
+				      "ldb_di0", "ldb_di1", "prg";
+		};
+
+		ipu2: ipu@02800000 {
+			compatible = "fsl,imx6qp-ipu", "fsl,imx6q-ipu";
+			clocks = <&clks IMX6QDL_CLK_IPU2>,
+				 <&clks IMX6QDL_CLK_IPU2_DI0>, <&clks IMX6QDL_CLK_IPU2_DI1>,
+				 <&clks IMX6QDL_CLK_IPU2_DI0_SEL>, <&clks IMX6QDL_CLK_IPU2_DI1_SEL>,
+				 <&clks IMX6QDL_CLK_LDB_DI0_PODF>, <&clks IMX6QDL_CLK_LDB_DI1_PODF>,
+				 <&clks IMX6QDL_CLK_PRG1_APB>;
+			clock-names = "bus",
+				      "di0", "di1",
+				      "di0_sel", "di1_sel",
+				      "ldb_di0", "ldb_di1", "prg";
+		};
+
+	};
+};

diff --git a/arch/arm/boot/dts/imx6sl-warp.dts b/arch/arm/boot/dts/imx6sl-warp.dts
index 10c6996..058bcdc 100644
--- a/arch/arm/boot/dts/imx6sl-warp.dts
+++ b/arch/arm/boot/dts/imx6sl-warp.dts

@@ -118,7 +118,7 @@
 	bus-width = <4>;
 	non-removable;
 	keep-power-in-suspend;
-	enable-sdio-wakeup;
+	wakeup-source;
 	mmc-pwrseq = <&usdhc3_pwrseq>;
 	status = "okay";
 };

diff --git a/arch/arm/boot/dts/imx6sx-sabreauto.dts b/arch/arm/boot/dts/imx6sx-sabreauto.dts
index 115f3fd..96ea936 100644
--- a/arch/arm/boot/dts/imx6sx-sabreauto.dts
+++ b/arch/arm/boot/dts/imx6sx-sabreauto.dts

@@ -52,7 +52,7 @@
 	cd-gpios = <&gpio7 10 GPIO_ACTIVE_LOW>;
 	wp-gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>;
 	keep-power-in-suspend;
-	enable-sdio-wakeup;
+	wakeup-source;
 	vmmc-supply = <&vcc_sd3>;
 	status = "okay";
 };

diff --git a/arch/arm/boot/dts/imx6sx-sdb.dtsi b/arch/arm/boot/dts/imx6sx-sdb.dtsi
index 94ac400..f1d3730 100644
--- a/arch/arm/boot/dts/imx6sx-sdb.dtsi
+++ b/arch/arm/boot/dts/imx6sx-sdb.dtsi

@@ -184,6 +184,13 @@
 	status = "okay";
 };
 
+&i2c3 {
+	clock-frequency = <100000>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_i2c3>;
+	status = "okay";
+};
+
 &i2c4 {
         clock-frequency = <100000>;
         pinctrl-names = "default";
@@ -283,7 +290,7 @@
 	non-removable;
 	no-1-8-v;
 	keep-power-in-suspend;
-	enable-sdio-wakeup;
+	wakeup-source;
 	status = "okay";
 };
 
@@ -296,7 +303,7 @@
 	cd-gpios = <&gpio2 10 GPIO_ACTIVE_LOW>;
 	wp-gpios = <&gpio2 15 GPIO_ACTIVE_HIGH>;
 	keep-power-in-suspend;
-	enable-sdio-wakeup;
+	wakeup-source;
 	vmmc-supply = <&vcc_sd3>;
 	status = "okay";
 };
@@ -378,6 +385,13 @@
 			>;
 		};
 
+		pinctrl_i2c3: i2c3grp {
+			fsl,pins = <
+				MX6SX_PAD_KEY_ROW4__I2C3_SDA		0x4001b8b1
+				MX6SX_PAD_KEY_COL4__I2C3_SCL		0x4001b8b1
+			>;
+		};
+
 		pinctrl_i2c4: i2c4grp {
 			fsl,pins = <
 				MX6SX_PAD_CSI_DATA07__I2C4_SDA		0x4001b8b1

diff --git a/arch/arm/boot/dts/imx6ul-14x14-evk.dts b/arch/arm/boot/dts/imx6ul-14x14-evk.dts
index 6aaa5ec..7207280 100644
--- a/arch/arm/boot/dts/imx6ul-14x14-evk.dts
+++ b/arch/arm/boot/dts/imx6ul-14x14-evk.dts

@@ -8,7 +8,6 @@
 
 /dts-v1/;
 
-#include <dt-bindings/input/input.h>
 #include "imx6ul.dtsi"
 
 / {
@@ -131,7 +130,7 @@
 	pinctrl-2 = <&pinctrl_usdhc1_200mhz>;
 	cd-gpios = <&gpio1 19 GPIO_ACTIVE_LOW>;
 	keep-power-in-suspend;
-	enable-sdio-wakeup;
+	wakeup-source;
 	vmmc-supply = <&reg_sd1_vmmc>;
 	status = "okay";
 };
@@ -141,7 +140,7 @@
 	pinctrl-0 = <&pinctrl_usdhc2>;
 	no-1-8-v;
 	keep-power-in-suspend;
-	enable-sdio-wakeup;
+	wakeup-source;
 	status = "okay";
 };
 

diff --git a/arch/arm/boot/dts/imx6ul-pinfunc.h b/arch/arm/boot/dts/imx6ul-pinfunc.h
index 20c7da1..0034eeb 100644
--- a/arch/arm/boot/dts/imx6ul-pinfunc.h
+++ b/arch/arm/boot/dts/imx6ul-pinfunc.h

@@ -14,925 +14,925 @@
  * The pin function ID is a tuple of
  * <mux_reg conf_reg input_reg mux_mode input_val>
  */
-#define MX6UL_PAD_BOOT_MODE0__GPIO5_IO10				0x0014 0x02a0 0x0000 5 0
-#define MX6UL_PAD_BOOT_MODE1__GPIO5_IO11				0x0018 0x02a4 0x0000 5 0
+#define MX6UL_PAD_BOOT_MODE0__GPIO5_IO10		0x0014 0x02a0 0x0000 5 0
+#define MX6UL_PAD_BOOT_MODE1__GPIO5_IO11		0x0018 0x02a4 0x0000 5 0
 
-#define MX6UL_PAD_SNVS_TAMPER0__GPIO5_IO00				0x001c 0x02a8 0x0000 5 0
-#define MX6UL_PAD_SNVS_TAMPER1__GPIO5_IO01				0x0020 0x02ac 0x0000 5 0
-#define MX6UL_PAD_SNVS_TAMPER2__GPIO5_IO02				0x0024 0x02b0 0x0000 5 0
-#define MX6UL_PAD_SNVS_TAMPER3__GPIO5_IO03				0x0028 0x02b4 0x0000 5 0
-#define MX6UL_PAD_SNVS_TAMPER4__GPIO5_IO04				0x002c 0x02b8 0x0000 5 0
-#define MX6UL_PAD_SNVS_TAMPER5__GPIO5_IO05				0x0030 0x02bc 0x0000 5 0
-#define MX6UL_PAD_SNVS_TAMPER6__GPIO5_IO06				0x0034 0x02c0 0x0000 5 0
-#define MX6UL_PAD_SNVS_TAMPER7__GPIO5_IO07				0x0038 0x02c4 0x0000 5 0
-#define MX6UL_PAD_SNVS_TAMPER8__GPIO5_IO08				0x003c 0x02c8 0x0000 5 0
-#define MX6UL_PAD_SNVS_TAMPER9__GPIO5_IO09				0x0040 0x02cc 0x0000 5 0
+#define MX6UL_PAD_SNVS_TAMPER0__GPIO5_IO00		0x001c 0x02a8 0x0000 5 0
+#define MX6UL_PAD_SNVS_TAMPER1__GPIO5_IO01		0x0020 0x02ac 0x0000 5 0
+#define MX6UL_PAD_SNVS_TAMPER2__GPIO5_IO02		0x0024 0x02b0 0x0000 5 0
+#define MX6UL_PAD_SNVS_TAMPER3__GPIO5_IO03		0x0028 0x02b4 0x0000 5 0
+#define MX6UL_PAD_SNVS_TAMPER4__GPIO5_IO04		0x002c 0x02b8 0x0000 5 0
+#define MX6UL_PAD_SNVS_TAMPER5__GPIO5_IO05		0x0030 0x02bc 0x0000 5 0
+#define MX6UL_PAD_SNVS_TAMPER6__GPIO5_IO06		0x0034 0x02c0 0x0000 5 0
+#define MX6UL_PAD_SNVS_TAMPER7__GPIO5_IO07		0x0038 0x02c4 0x0000 5 0
+#define MX6UL_PAD_SNVS_TAMPER8__GPIO5_IO08		0x003c 0x02c8 0x0000 5 0
+#define MX6UL_PAD_SNVS_TAMPER9__GPIO5_IO09		0x0040 0x02cc 0x0000 5 0
 
-#define MX6UL_PAD_JTAG_MOD__SJC_MOD                              	0x0044 0x02d0 0x0000 0 0
-#define MX6UL_PAD_JTAG_MOD__GPT2_CLK                             	0x0044 0x02d0 0x05a0 1 0
-#define MX6UL_PAD_JTAG_MOD__SPDIF_OUT                            	0x0044 0x02d0 0x0000 2 0
-#define MX6UL_PAD_JTAG_MOD__ENET1_REF_CLK_25M                    	0x0044 0x02d0 0x0000 3 0
-#define MX6UL_PAD_JTAG_MOD__CCM_PMIC_RDY                         	0x0044 0x02d0 0x04c0 4 0
-#define MX6UL_PAD_JTAG_MOD__GPIO1_IO10                           	0x0044 0x02d0 0x0000 5 0
-#define MX6UL_PAD_JTAG_MOD__SDMA_EXT_EVENT00                     	0x0044 0x02d0 0x0000 6 0
-#define MX6UL_PAD_JTAG_TMS__SJC_TMS                              	0x0048 0x02d4 0x0000 0 0
-#define MX6UL_PAD_JTAG_TMS__GPT2_CAPTURE1                        	0x0048 0x02d4 0x0598 1 0
-#define MX6UL_PAD_JTAG_TMS__SAI2_MCLK                            	0x0048 0x02d4 0x0000 2 0
-#define MX6UL_PAD_JTAG_TMS__CCM_CLKO1                            	0x0048 0x02d4 0x0000 3 0
-#define MX6UL_PAD_JTAG_TMS__CCM_WAIT                             	0x0048 0x02d4 0x0000 4 0
-#define MX6UL_PAD_JTAG_TMS__GPIO1_IO11                           	0x0048 0x02d4 0x0000 5 0
-#define MX6UL_PAD_JTAG_TMS__SDMA_EXT_EVENT01                     	0x0048 0x02d4 0x0000 6 0
-#define MX6UL_PAD_JTAG_TMS__EPIT1_OUT                            	0x0048 0x02d4 0x0000 8 0
-#define MX6UL_PAD_JTAG_TDO__SJC_TDO                              	0x004c 0x02d8 0x0000 0 0
-#define MX6UL_PAD_JTAG_TDO__GPT2_CAPTURE2                        	0x004c 0x02d8 0x059c 1 0
-#define MX6UL_PAD_JTAG_TDO__SAI2_TX_SYNC                         	0x004c 0x02d8 0x05fc 2 0
-#define MX6UL_PAD_JTAG_TDO__CCM_CLKO2                            	0x004c 0x02d8 0x0000 3 0
-#define MX6UL_PAD_JTAG_TDO__CCM_STOP                             	0x004c 0x02d8 0x0000 4 0
-#define MX6UL_PAD_JTAG_TDO__GPIO1_IO12                           	0x004c 0x02d8 0x0000 5 0
-#define MX6UL_PAD_JTAG_TDO__MQS_RIGHT                            	0x004c 0x02d8 0x0000 6 0
-#define MX6UL_PAD_JTAG_TDO__EPIT2_OUT                            	0x004c 0x02d8 0x0000 8 0
-#define MX6UL_PAD_JTAG_TDI__SJC_TDI                              	0x0050 0x02dc 0x0000 0 0
-#define MX6UL_PAD_JTAG_TDI__GPT2_COMPARE1                        	0x0050 0x02dc 0x0000 1 0
-#define MX6UL_PAD_JTAG_TDI__SAI2_TX_BCLK                         	0x0050 0x02dc 0x05f8 2 0
-#define MX6UL_PAD_JTAG_TDI__PWM6_OUT                             	0x0050 0x02dc 0x0000 4 0
-#define MX6UL_PAD_JTAG_TDI__GPIO1_IO13                           	0x0050 0x02dc 0x0000 5 0
-#define MX6UL_PAD_JTAG_TDI__MQS_LEFT                             	0x0050 0x02dc 0x0000 6 0
-#define MX6UL_PAD_JTAG_TDI__SIM1_POWER_FAIL                      	0x0050 0x02dc 0x0000 8 0
-#define MX6UL_PAD_JTAG_TCK__SJC_TCK                              	0x0054 0x02e0 0x0000 0 0
-#define MX6UL_PAD_JTAG_TCK__GPT2_COMPARE2                        	0x0054 0x02e0 0x0000 1 0
-#define MX6UL_PAD_JTAG_TCK__SAI2_RX_DATA                         	0x0054 0x02e0 0x0000 2 0
-#define MX6UL_PAD_JTAG_TCK__PWM7_OUT                             	0x0054 0x02e0 0x0000 4 0
-#define MX6UL_PAD_JTAG_TCK__GPIO1_IO14                           	0x0054 0x02e0 0x0000 5 0
-#define MX6UL_PAD_JTAG_TCK__SIM2_POWER_FAIL                      	0x0054 0x02e0 0x0000 8 0
-#define MX6UL_PAD_JTAG_TRST_B__SJC_TRSTB                         	0x0058 0x02e4 0x0000 0 0
-#define MX6UL_PAD_JTAG_TRST_B__GPT2_COMPARE3                     	0x0058 0x02e4 0x0000 1 0
-#define MX6UL_PAD_JTAG_TRST_B__SAI2_TX_DATA                      	0x0058 0x02e4 0x0000 2 0
-#define MX6UL_PAD_JTAG_TRST_B__PWM8_OUT                          	0x0058 0x02e4 0x0000 4 0
-#define MX6UL_PAD_JTAG_TRST_B__GPIO1_IO15                        	0x0058 0x02e4 0x0000 5 0
-#define MX6UL_PAD_JTAG_TRST_B__CAAM_RNG_OSC_OBS                  	0x0058 0x02e4 0x0000 8 0
-#define MX6UL_PAD_GPIO1_IO00__I2C2_SCL                           	0x005c 0x02e8 0x05ac 0 1
-#define MX6UL_PAD_GPIO1_IO00__GPT1_CAPTURE1                      	0x005c 0x02e8 0x058c 1 0
-#define MX6UL_PAD_GPIO1_IO00__ANATOP_OTG1_ID                     	0x005c 0x02e8 0x04b8 2 0
-#define MX6UL_PAD_GPIO1_IO00__ENET1_REF_CLK1                     	0x005c 0x02e8 0x0574 3 0
-#define MX6UL_PAD_GPIO1_IO00__MQS_RIGHT                          	0x005c 0x02e8 0x0000 4 0
-#define MX6UL_PAD_GPIO1_IO00__GPIO1_IO00                         	0x005c 0x02e8 0x0000 5 0
-#define MX6UL_PAD_GPIO1_IO00__ENET1_1588_EVENT0_IN               	0x005c 0x02e8 0x0000 6 0
-#define MX6UL_PAD_GPIO1_IO00__SRC_SYSTEM_RESET                   	0x005c 0x02e8 0x0000 7 0
-#define MX6UL_PAD_GPIO1_IO00__WDOG3_WDOG_B                       	0x005c 0x02e8 0x0000 8 0
-#define MX6UL_PAD_GPIO1_IO01__I2C2_SDA                           	0x0060 0x02ec 0x05b0 0 1
-#define MX6UL_PAD_GPIO1_IO01__GPT1_COMPARE1                      	0x0060 0x02ec 0x0000 1 0
-#define MX6UL_PAD_GPIO1_IO01__USB_OTG1_OC                        	0x0060 0x02ec 0x0664 2 0
-#define MX6UL_PAD_GPIO1_IO01__ENET2_REF_CLK2                     	0x0060 0x02ec 0x057c 3 0
-#define MX6UL_PAD_GPIO1_IO01__MQS_LEFT                           	0x0060 0x02ec 0x0000 4 0
-#define MX6UL_PAD_GPIO1_IO01__GPIO1_IO01                         	0x0060 0x02ec 0x0000 5 0
-#define MX6UL_PAD_GPIO1_IO01__ENET1_1588_EVENT0_OUT              	0x0060 0x02ec 0x0000 6 0
-#define MX6UL_PAD_GPIO1_IO01__SRC_EARLY_RESET                    	0x0060 0x02ec 0x0000 7 0
-#define MX6UL_PAD_GPIO1_IO01__WDOG1_WDOG_B                       	0x0060 0x02ec 0x0000 8 0
-#define MX6UL_PAD_GPIO1_IO02__I2C1_SCL                           	0x0064 0x02f0 0x05a4 0 0
-#define MX6UL_PAD_GPIO1_IO02__GPT1_COMPARE2                      	0x0064 0x02f0 0x0000 1 0
-#define MX6UL_PAD_GPIO1_IO02__USB_OTG2_PWR                       	0x0064 0x02f0 0x0000 2 0
-#define MX6UL_PAD_GPIO1_IO02__ENET1_REF_CLK_25M                  	0x0064 0x02f0 0x0000 3 0
-#define MX6UL_PAD_GPIO1_IO02__USDHC1_WP                          	0x0064 0x02f0 0x066c 4 0
-#define MX6UL_PAD_GPIO1_IO02__GPIO1_IO02                         	0x0064 0x02f0 0x0000 5 0
-#define MX6UL_PAD_GPIO1_IO02__SDMA_EXT_EVENT00                   	0x0064 0x02f0 0x0000 6 0
-#define MX6UL_PAD_GPIO1_IO02__SRC_ANY_PU_RESET                   	0x0064 0x02f0 0x0000 7 0
-#define MX6UL_PAD_GPIO1_IO02__UART1_DCE_TX                           	0x0064 0x02f0 0x0000 8 0
-#define MX6UL_PAD_GPIO1_IO02__UART1_DTE_RX                           	0x0064 0x02f0 0x0624 8 0
-#define MX6UL_PAD_GPIO1_IO03__I2C1_SDA                           	0x0068 0x02f4 0x05a8 0 1
-#define MX6UL_PAD_GPIO1_IO03__GPT1_COMPARE3                      	0x0068 0x02f4 0x0000 1 0
-#define MX6UL_PAD_GPIO1_IO03__USB_OTG2_OC                        	0x0068 0x02f4 0x0660 2 0
-#define MX6UL_PAD_GPIO1_IO03__USDHC1_CD_B                        	0x0068 0x02f4 0x0668 4 0
-#define MX6UL_PAD_GPIO1_IO03__GPIO1_IO03                         	0x0068 0x02f4 0x0000 5 0
-#define MX6UL_PAD_GPIO1_IO03__CCM_DI0_eXT_CLK                    	0x0068 0x02f4 0x0000 6 0
-#define MX6UL_PAD_GPIO1_IO03__SRC_TESTER_ACK                     	0x0068 0x02f4 0x0000 7 0
-#define MX6UL_PAD_GPIO1_IO03__UART1_DTE_TX                           	0x0068 0x02f4 0x0000 8 0
-#define MX6UL_PAD_GPIO1_IO03__UART1_DCE_RX                           	0x0068 0x02f4 0x0624 8 1
-#define MX6UL_PAD_GPIO1_IO04__ENET1_REF_CLK1                     	0x006c 0x02f8 0x0574 0 1
-#define MX6UL_PAD_GPIO1_IO04__PWM3_OUT                           	0x006c 0x02f8 0x0000 1 0
-#define MX6UL_PAD_GPIO1_IO04__USB_OTG1_PWR                       	0x006c 0x02f8 0x0000 2 0
-#define MX6UL_PAD_GPIO1_IO04__USDHC1_RESET_B                     	0x006c 0x02f8 0x0000 4 0
-#define MX6UL_PAD_GPIO1_IO04__GPIO1_IO04                         	0x006c 0x02f8 0x0000 5 0
-#define MX6UL_PAD_GPIO1_IO04__ENET2_1588_EVENT0_IN               	0x006c 0x02f8 0x0000 6 0
-#define MX6UL_PAD_GPIO1_IO04__UART5_DCE_TX                           	0x006c 0x02f8 0x0000 8 0
-#define MX6UL_PAD_GPIO1_IO04__UART5_DTE_RX                           	0x006c 0x02f8 0x0644 8 2
-#define MX6UL_PAD_GPIO1_IO05__ENET2_REF_CLK2                     	0x0070 0x02fc 0x057c 0 1
-#define MX6UL_PAD_GPIO1_IO05__PWM4_OUT                           	0x0070 0x02fc 0x0000 1 0
-#define MX6UL_PAD_GPIO1_IO05__ANATOP_OTG2_ID                     	0x0070 0x02fc 0x04bc 2 0
-#define MX6UL_PAD_GPIO1_IO05__CSI_FIELD                          	0x0070 0x02fc 0x0530 3 0
-#define MX6UL_PAD_GPIO1_IO05__USDHC1_VSELECT                     	0x0070 0x02fc 0x0000 4 0
-#define MX6UL_PAD_GPIO1_IO05__GPIO1_IO05                         	0x0070 0x02fc 0x0000 5 0
-#define MX6UL_PAD_GPIO1_IO05__ENET2_1588_EVENT0_OUT              	0x0070 0x02fc 0x0000 6 0
-#define MX6UL_PAD_GPIO1_IO05__UART5_DCE_RX                           	0x0070 0x02fc 0x0644 8 3
-#define MX6UL_PAD_GPIO1_IO05__UART5_DTE_TX                           	0x0070 0x02fc 0x0000 8 0
-#define MX6UL_PAD_GPIO1_IO06__ENET1_MDIO                         	0x0074 0x0300 0x0578 0 0
-#define MX6UL_PAD_GPIO1_IO06__ENET2_MDIO                         	0x0074 0x0300 0x0580 1 0
-#define MX6UL_PAD_GPIO1_IO06__USB_OTG_PWR_WAKE                   	0x0074 0x0300 0x0000 2 0
-#define MX6UL_PAD_GPIO1_IO06__CSI_MCLK                           	0x0074 0x0300 0x0000 3 0
-#define MX6UL_PAD_GPIO1_IO06__USDHC2_WP                          	0x0074 0x0300 0x069c 4 0
-#define MX6UL_PAD_GPIO1_IO06__GPIO1_IO06                         	0x0074 0x0300 0x0000 5 0
-#define MX6UL_PAD_GPIO1_IO06__CCM_WAIT                           	0x0074 0x0300 0x0000 6 0
-#define MX6UL_PAD_GPIO1_IO06__CCM_REF_EN_B                       	0x0074 0x0300 0x0000 7 0
-#define MX6UL_PAD_GPIO1_IO06__UART1_DCE_CTS                      	0x0074 0x0300 0x0000 8 0
-#define MX6UL_PAD_GPIO1_IO06__UART1_DTE_RTS                      	0x0074 0x0300 0x0620 8 0
-#define MX6UL_PAD_GPIO1_IO07__ENET1_MDC                          	0x0078 0x0304 0x0000 0 0
-#define MX6UL_PAD_GPIO1_IO07__ENET2_MDC                          	0x0078 0x0304 0x0000 1 0
-#define MX6UL_PAD_GPIO1_IO07__USB_OTG_HOST_MODE                  	0x0078 0x0304 0x0000 2 0
-#define MX6UL_PAD_GPIO1_IO07__CSI_PIXCLK                         	0x0078 0x0304 0x0528 3 0
-#define MX6UL_PAD_GPIO1_IO07__USDHC2_CD_B                        	0x0078 0x0304 0x0674 4 1
-#define MX6UL_PAD_GPIO1_IO07__GPIO1_IO07                         	0x0078 0x0304 0x0000 5 0
-#define MX6UL_PAD_GPIO1_IO07__CCM_STOP                           	0x0078 0x0304 0x0000 6 0
-#define MX6UL_PAD_GPIO1_IO07__UART1_DCE_RTS                      	0x0078 0x0304 0x0620 8 1
-#define MX6UL_PAD_GPIO1_IO07__UART1_DTE_CTS                      	0x0078 0x0304 0x0000 8 0
-#define MX6UL_PAD_GPIO1_IO08__PWM1_OUT                           	0x007c 0x0308 0x0000 0 0
-#define MX6UL_PAD_GPIO1_IO08__WDOG1_WDOG_B                       	0x007c 0x0308 0x0000 1 0
-#define MX6UL_PAD_GPIO1_IO08__SPDIF_OUT                          	0x007c 0x0308 0x0000 2 0
-#define MX6UL_PAD_GPIO1_IO08__CSI_VSYNC                          	0x007c 0x0308 0x052c 3 1
-#define MX6UL_PAD_GPIO1_IO08__USDHC2_VSELECT                     	0x007c 0x0308 0x0000 4 0
-#define MX6UL_PAD_GPIO1_IO08__GPIO1_IO08                         	0x007c 0x0308 0x0000 5 0
-#define MX6UL_PAD_GPIO1_IO08__CCM_PMIC_RDY                       	0x007c 0x0308 0x04c0 6 1
-#define MX6UL_PAD_GPIO1_IO08__UART5_DCE_RTS                      	0x007c 0x0308 0x0640 8 1
-#define MX6UL_PAD_GPIO1_IO08__UART5_DTE_CTS                      	0x007c 0x0308 0x0000 8 0
-#define MX6UL_PAD_GPIO1_IO09__PWM2_OUT                           	0x0080 0x030c 0x0000 0 0
-#define MX6UL_PAD_GPIO1_IO09__WDOG1_WDOG_ANY                     	0x0080 0x030c 0x0000 1 0
-#define MX6UL_PAD_GPIO1_IO09__SPDIF_IN                           	0x0080 0x030c 0x0618 2 0
-#define MX6UL_PAD_GPIO1_IO09__CSI_HSYNC                          	0x0080 0x030c 0x0524 3 1
-#define MX6UL_PAD_GPIO1_IO09__USDHC2_RESET_B                     	0x0080 0x030c 0x0000 4 0
-#define MX6UL_PAD_GPIO1_IO09__GPIO1_IO09                         	0x0080 0x030c 0x0000 5 0
-#define MX6UL_PAD_GPIO1_IO09__USDHC1_RESET_B                     	0x0080 0x030c 0x0000 6 0
-#define MX6UL_PAD_GPIO1_IO09__UART5_DCE_CTS                      	0x0080 0x030c 0x0000 8 0
-#define MX6UL_PAD_GPIO1_IO09__UART5_DTE_RTS                      	0x0080 0x030c 0x0640 8 2
-#define MX6UL_PAD_UART1_TX_DATA__UART1_DCE_TX                        	0x0084 0x0310 0x0000 0 0
-#define MX6UL_PAD_UART1_TX_DATA__UART1_DTE_RX                        	0x0084 0x0310 0x0624 0 2
-#define MX6UL_PAD_UART1_TX_DATA__ENET1_RDATA02                   	0x0084 0x0310 0x0000 1 0
-#define MX6UL_PAD_UART1_TX_DATA__I2C3_SCL                        	0x0084 0x0310 0x05b4 2 0
-#define MX6UL_PAD_UART1_TX_DATA__CSI_DATA02                      	0x0084 0x0310 0x0000 3 0
-#define MX6UL_PAD_UART1_TX_DATA__GPT1_COMPARE1                   	0x0084 0x0310 0x0000 4 0
-#define MX6UL_PAD_UART1_TX_DATA__GPIO1_IO16                      	0x0084 0x0310 0x0000 5 0
-#define MX6UL_PAD_UART1_TX_DATA__SPDIF_OUT                       	0x0084 0x0310 0x0000 8 0
-#define MX6UL_PAD_UART1_RX_DATA__UART1_DCE_RX                        	0x0088 0x0314 0x0624 0 3
-#define MX6UL_PAD_UART1_RX_DATA__UART1_DTE_TX                        	0x0088 0x0314 0x0000 0 0
-#define MX6UL_PAD_UART1_RX_DATA__ENET1_RDATA03                   	0x0088 0x0314 0x0000 1 0
-#define MX6UL_PAD_UART1_RX_DATA__I2C3_SDA                        	0x0088 0x0314 0x05b8 2 0
-#define MX6UL_PAD_UART1_RX_DATA__CSI_DATA03                      	0x0088 0x0314 0x0000 3 0
-#define MX6UL_PAD_UART1_RX_DATA__GPT1_CLK                        	0x0088 0x0314 0x0594 4 0
-#define MX6UL_PAD_UART1_RX_DATA__GPIO1_IO17                      	0x0088 0x0314 0x0000 5 0
-#define MX6UL_PAD_UART1_RX_DATA__SPDIF_IN                        	0x0088 0x0314 0x0000 8 0
-#define MX6UL_PAD_UART1_CTS_B__UART1_DCE_CTS                     	0x008c 0x0318 0x0000 0 0
-#define MX6UL_PAD_UART1_CTS_B__UART1_DTE_RTS                     	0x008c 0x0318 0x0620 0 2
-#define MX6UL_PAD_UART1_CTS_B__ENET1_RX_CLK                      	0x008c 0x0318 0x0000 1 0
-#define MX6UL_PAD_UART1_CTS_B__USDHC1_WP                         	0x008c 0x0318 0x066c 2 1
-#define MX6UL_PAD_UART1_CTS_B__CSI_DATA04                        	0x008c 0x0318 0x0000 3 0
-#define MX6UL_PAD_UART1_CTS_B__ENET2_1588_EVENT1_IN              	0x008c 0x0318 0x0000 4 0
-#define MX6UL_PAD_UART1_CTS_B__GPIO1_IO18                        	0x008c 0x0318 0x0000 5 0
-#define MX6UL_PAD_UART1_CTS_B__USDHC2_WP                         	0x008c 0x0318 0x0000 8 0
-#define MX6UL_PAD_UART1_RTS_B__UART1_DCE_RTS                     	0x0090 0x031c 0x0620 0 3
-#define MX6UL_PAD_UART1_RTS_B__UART1_DTE_CTS                     	0x0090 0x031c 0x0000 0 0
-#define MX6UL_PAD_UART1_RTS_B__ENET1_TX_ER                       	0x0090 0x031c 0x0000 1 0
-#define MX6UL_PAD_UART1_RTS_B__USDHC1_CD_B                       	0x0090 0x031c 0x0668 2 1
-#define MX6UL_PAD_UART1_RTS_B__CSI_DATA05                        	0x0090 0x031c 0x0000 3 0
-#define MX6UL_PAD_UART1_RTS_B__ENET2_1588_EVENT1_OUT             	0x0090 0x031c 0x0000 4 0
-#define MX6UL_PAD_UART1_RTS_B__GPIO1_IO19                        	0x0090 0x031c 0x0000 5 0
-#define MX6UL_PAD_UART1_RTS_B__USDHC2_CD_B                       	0x0090 0x031c 0x0000 8 0
-#define MX6UL_PAD_UART2_TX_DATA__UART2_DCE_TX                        	0x0094 0x0320 0x0000 0 0
-#define MX6UL_PAD_UART2_TX_DATA__UART2_DTE_RX                        	0x0094 0x0320 0x062c 0 0
-#define MX6UL_PAD_UART2_TX_DATA__ENET1_TDATA02                   	0x0094 0x0320 0x0000 1 0
-#define MX6UL_PAD_UART2_TX_DATA__I2C4_SCL                        	0x0094 0x0320 0x05bc 2 0
-#define MX6UL_PAD_UART2_TX_DATA__CSI_DATA06                      	0x0094 0x0320 0x0000 3 0
-#define MX6UL_PAD_UART2_TX_DATA__GPT1_CAPTURE1                   	0x0094 0x0320 0x058c 4 1
-#define MX6UL_PAD_UART2_TX_DATA__GPIO1_IO20                      	0x0094 0x0320 0x0000 5 0
-#define MX6UL_PAD_UART2_TX_DATA__ECSPI3_SS0                      	0x0094 0x0320 0x0000 8 0
-#define MX6UL_PAD_UART2_RX_DATA__UART2_DCE_RX                        	0x0098 0x0324 0x062c 0 1
-#define MX6UL_PAD_UART2_RX_DATA__UART2_DTE_TX                        	0x0098 0x0324 0x0000 0 0
-#define MX6UL_PAD_UART2_RX_DATA__ENET1_TDATA03                   	0x0098 0x0324 0x0000 1 0
-#define MX6UL_PAD_UART2_RX_DATA__I2C4_SDA                        	0x0098 0x0324 0x05c0 2 0
-#define MX6UL_PAD_UART2_RX_DATA__CSI_DATA07                      	0x0098 0x0324 0x0000 3 0
-#define MX6UL_PAD_UART2_RX_DATA__GPT1_CAPTURE2                   	0x0098 0x0324 0x0590 4 0
-#define MX6UL_PAD_UART2_RX_DATA__GPIO1_IO21                      	0x0098 0x0324 0x0000 5 0
-#define MX6UL_PAD_UART2_RX_DATA__SJC_DONE                        	0x0098 0x0324 0x0000 7 0
-#define MX6UL_PAD_UART2_RX_DATA__ECSPI3_SCLK                     	0x0098 0x0324 0x0000 8 0
-#define MX6UL_PAD_UART2_CTS_B__UART2_DCE_CTS                     	0x009c 0x0328 0x0000 0 0
-#define MX6UL_PAD_UART2_CTS_B__UART2_DTE_RTS                     	0x009c 0x0328 0x0628 0 0
-#define MX6UL_PAD_UART2_CTS_B__ENET1_CRS                         	0x009c 0x0328 0x0000 1 0
-#define MX6UL_PAD_UART2_CTS_B__FLEXCAN2_TX                       	0x009c 0x0328 0x0000 2 0
-#define MX6UL_PAD_UART2_CTS_B__CSI_DATA08                        	0x009c 0x0328 0x0000 3 0
-#define MX6UL_PAD_UART2_CTS_B__GPT1_COMPARE2                     	0x009c 0x0328 0x0000 4 0
-#define MX6UL_PAD_UART2_CTS_B__GPIO1_IO22                        	0x009c 0x0328 0x0000 5 0
-#define MX6UL_PAD_UART2_CTS_B__SJC_DE_B                          	0x009c 0x0328 0x0000 7 0
-#define MX6UL_PAD_UART2_CTS_B__ECSPI3_MOSI                       	0x009c 0x0328 0x0000 8 0
-#define MX6UL_PAD_UART2_RTS_B__UART2_DCE_RTS                     	0x00a0 0x032c 0x0628 0 1
-#define MX6UL_PAD_UART2_RTS_B__UART2_DTE_CTS                     	0x00a0 0x032c 0x0000 0 0
-#define MX6UL_PAD_UART2_RTS_B__ENET1_COL                         	0x00a0 0x032c 0x0000 1 0
-#define MX6UL_PAD_UART2_RTS_B__FLEXCAN2_RX                       	0x00a0 0x032c 0x0588 2 0
-#define MX6UL_PAD_UART2_RTS_B__CSI_DATA09                        	0x00a0 0x032c 0x0000 3 0
-#define MX6UL_PAD_UART2_RTS_B__GPT1_COMPARE3                     	0x00a0 0x032c 0x0000 4 0
-#define MX6UL_PAD_UART2_RTS_B__GPIO1_IO23                        	0x00a0 0x032c 0x0000 5 0
-#define MX6UL_PAD_UART2_RTS_B__SJC_FAIL                          	0x00a0 0x032c 0x0000 7 0
-#define MX6UL_PAD_UART2_RTS_B__ECSPI3_MISO                       	0x00a0 0x032c 0x0000 8 0
-#define MX6UL_PAD_UART3_TX_DATA__UART3_DCE_TX                        	0x00a4 0x0330 0x0000 0 0
-#define MX6UL_PAD_UART3_TX_DATA__UART3_DTE_RX                        	0x00a4 0x0330 0x0634 0 0
-#define MX6UL_PAD_UART3_TX_DATA__ENET2_RDATA02                   	0x00a4 0x0330 0x0000 1 0
-#define MX6UL_PAD_UART3_TX_DATA__SIM1_PORT0_PD                   	0x00a4 0x0330 0x0000 2 0
-#define MX6UL_PAD_UART3_TX_DATA__CSI_DATA01                      	0x00a4 0x0330 0x0000 3 0
-#define MX6UL_PAD_UART3_TX_DATA__UART2_DCE_CTS                   	0x00a4 0x0330 0x0000 4 0
-#define MX6UL_PAD_UART3_TX_DATA__UART2_DTE_RTS                   	0x00a4 0x0330 0x0628 4 2
-#define MX6UL_PAD_UART3_TX_DATA__GPIO1_IO24                      	0x00a4 0x0330 0x0000 5 0
-#define MX6UL_PAD_UART3_TX_DATA__SJC_JTAG_ACT                    	0x00a4 0x0330 0x0000 7 0
-#define MX6UL_PAD_UART3_TX_DATA__ANATOP_OTG1_ID                  	0x00a4 0x0330 0x0000 8 0
-#define MX6UL_PAD_UART3_RX_DATA__UART3_DCE_RX                        	0x00a8 0x0334 0x0634 0 1
-#define MX6UL_PAD_UART3_RX_DATA__UART3_DTE_TX                        	0x00a8 0x0334 0x0000 0 0
-#define MX6UL_PAD_UART3_RX_DATA__ENET2_RDATA03                   	0x00a8 0x0334 0x0000 1 0
-#define MX6UL_PAD_UART3_RX_DATA__SIM2_PORT0_PD                   	0x00a8 0x0334 0x0000 2 0
-#define MX6UL_PAD_UART3_RX_DATA__CSI_DATA00                      	0x00a8 0x0334 0x0000 3 0
-#define MX6UL_PAD_UART3_RX_DATA__UART2_DCE_RTS                   	0x00a8 0x0334 0x0628 4 3
-#define MX6UL_PAD_UART3_RX_DATA__UART2_DTE_CTS                   	0x00a8 0x0334 0x0000 4 0
-#define MX6UL_PAD_UART3_RX_DATA__GPIO1_IO25                      	0x00a8 0x0334 0x0000 5 0
-#define MX6UL_PAD_UART3_RX_DATA__EPIT1_OUT                       	0x00a8 0x0334 0x0000 8 0
-#define MX6UL_PAD_UART3_CTS_B__UART3_DCE_CTS                     	0x00ac 0x0338 0x0000 0 0
-#define MX6UL_PAD_UART3_CTS_B__UART3_DTE_RTS                     	0x00ac 0x0338 0x0630 0 0
-#define MX6UL_PAD_UART3_CTS_B__ENET2_RX_CLK                      	0x00ac 0x0338 0x0000 1 0
-#define MX6UL_PAD_UART3_CTS_B__FLEXCAN1_TX                       	0x00ac 0x0338 0x0000 2 0
-#define MX6UL_PAD_UART3_CTS_B__CSI_DATA10                        	0x00ac 0x0338 0x0000 3 0
-#define MX6UL_PAD_UART3_CTS_B__ENET1_1588_EVENT1_IN              	0x00ac 0x0338 0x0000 4 0
-#define MX6UL_PAD_UART3_CTS_B__GPIO1_IO26                        	0x00ac 0x0338 0x0000 5 0
-#define MX6UL_PAD_UART3_CTS_B__EPIT2_OUT                         	0x00ac 0x0338 0x0000 8 0
-#define MX6UL_PAD_UART3_RTS_B__UART3_DCE_RTS                     	0x00b0 0x033c 0x0630 0 1
-#define MX6UL_PAD_UART3_RTS_B__UART3_DTE_CTS                     	0x00b0 0x033c 0x0000 0 0
-#define MX6UL_PAD_UART3_RTS_B__ENET2_TX_ER                       	0x00b0 0x033c 0x0000 1 0
-#define MX6UL_PAD_UART3_RTS_B__FLEXCAN1_RX                       	0x00b0 0x033c 0x0584 2 0
-#define MX6UL_PAD_UART3_RTS_B__CSI_DATA11                        	0x00b0 0x033c 0x0000 3 0
-#define MX6UL_PAD_UART3_RTS_B__ENET1_1588_EVENT1_OUT             	0x00b0 0x033c 0x0000 4 0
-#define MX6UL_PAD_UART3_RTS_B__GPIO1_IO27                        	0x00b0 0x033c 0x0000 5 0
-#define MX6UL_PAD_UART3_RTS_B__WDOG1_WDOG_B                      	0x00b0 0x033c 0x0000 8 0
-#define MX6UL_PAD_UART4_TX_DATA__UART4_DCE_TX                        	0x00b4 0x0340 0x0000 0 0
-#define MX6UL_PAD_UART4_TX_DATA__UART4_DTE_RX                        	0x00b4 0x0340 0x063c 0 0
-#define MX6UL_PAD_UART4_TX_DATA__ENET2_TDATA02                   	0x00b4 0x0340 0x0000 1 0
-#define MX6UL_PAD_UART4_TX_DATA__I2C1_SCL                        	0x00b4 0x0340 0x05a4 2 1
-#define MX6UL_PAD_UART4_TX_DATA__CSI_DATA12                      	0x00b4 0x0340 0x0000 3 0
-#define MX6UL_PAD_UART4_TX_DATA__CSU_CSU_ALARM_AUT02             	0x00b4 0x0340 0x0000 4 0
-#define MX6UL_PAD_UART4_TX_DATA__GPIO1_IO28                      	0x00b4 0x0340 0x0000 5 0
-#define MX6UL_PAD_UART4_TX_DATA__ECSPI2_SCLK                     	0x00b4 0x0340 0x0000 8 0
-#define MX6UL_PAD_UART4_RX_DATA__UART4_DCE_RX                        	0x00b8 0x0344 0x063c 0 1
-#define MX6UL_PAD_UART4_RX_DATA__UART4_DTE_TX                        	0x00b8 0x0344 0x0000 0 0
-#define MX6UL_PAD_UART4_RX_DATA__ENET2_TDATA03                   	0x00b8 0x0344 0x0000 1 0
-#define MX6UL_PAD_UART4_RX_DATA__I2C1_SDA                        	0x00b8 0x0344 0x05a8 2 2
-#define MX6UL_PAD_UART4_RX_DATA__CSI_DATA13                      	0x00b8 0x0344 0x0000 3 0
-#define MX6UL_PAD_UART4_RX_DATA__CSU_CSU_ALARM_AUT01             	0x00b8 0x0344 0x0000 4 0
-#define MX6UL_PAD_UART4_RX_DATA__GPIO1_IO29                      	0x00b8 0x0344 0x0000 5 0
-#define MX6UL_PAD_UART4_RX_DATA__ECSPI2_SS0                      	0x00b8 0x0344 0x0000 8 0
-#define MX6UL_PAD_UART5_TX_DATA__GPIO1_IO30                      	0x00bc 0x0348 0x0000 5 0
-#define MX6UL_PAD_UART5_TX_DATA__ECSPI2_MOSI                     	0x00bc 0x0348 0x0000 8 0
-#define MX6UL_PAD_UART5_TX_DATA__UART5_DCE_TX                        	0x00bc 0x0348 0x0000 0 0
-#define MX6UL_PAD_UART5_TX_DATA__UART5_DTE_RX                        	0x00bc 0x0348 0x0644 0 4
-#define MX6UL_PAD_UART5_TX_DATA__ENET2_CRS                       	0x00bc 0x0348 0x0000 1 0
-#define MX6UL_PAD_UART5_TX_DATA__I2C2_SCL                        	0x00bc 0x0348 0x05ac 2 2
-#define MX6UL_PAD_UART5_TX_DATA__CSI_DATA14                      	0x00bc 0x0348 0x0000 3 0
-#define MX6UL_PAD_UART5_TX_DATA__CSU_CSU_ALARM_AUT00             	0x00bc 0x0348 0x0000 4 0
-#define MX6UL_PAD_UART5_RX_DATA__UART5_DCE_RX                        	0x00c0 0x034c 0x0644 0 5
-#define MX6UL_PAD_UART5_RX_DATA__UART5_DTE_TX                        	0x00c0 0x034c 0x0000 0 0
-#define MX6UL_PAD_UART5_RX_DATA__ENET2_COL                       	0x00c0 0x034c 0x0000 1 0
-#define MX6UL_PAD_UART5_RX_DATA__I2C2_SDA                        	0x00c0 0x034c 0x05b0 2 2
-#define MX6UL_PAD_UART5_RX_DATA__CSI_DATA15                      	0x00c0 0x034c 0x0000 3 0
-#define MX6UL_PAD_UART5_RX_DATA__CSU_CSU_INT_DEB                 	0x00c0 0x034c 0x0000 4 0
-#define MX6UL_PAD_UART5_RX_DATA__GPIO1_IO31                      	0x00c0 0x034c 0x0000 5 0
-#define MX6UL_PAD_UART5_RX_DATA__ECSPI2_MISO                     	0x00c0 0x034c 0x0000 8 0
-#define MX6UL_PAD_ENET1_RX_DATA0__ENET1_RDATA00                  	0x00c4 0x0350 0x0000 0 0
-#define MX6UL_PAD_ENET1_RX_DATA0__UART4_DCE_RTS                  	0x00c4 0x0350 0x0638 1 0
-#define MX6UL_PAD_ENET1_RX_DATA0__UART4_DTE_CTS                  	0x00c4 0x0350 0x0000 1 0
-#define MX6UL_PAD_ENET1_RX_DATA0__PWM1_OUT                       	0x00c4 0x0350 0x0000 2 0
-#define MX6UL_PAD_ENET1_RX_DATA0__CSI_DATA16                     	0x00c4 0x0350 0x0000 3 0
-#define MX6UL_PAD_ENET1_RX_DATA0__FLEXCAN1_TX                    	0x00c4 0x0350 0x0000 4 0
-#define MX6UL_PAD_ENET1_RX_DATA0__GPIO2_IO00                     	0x00c4 0x0350 0x0000 5 0
-#define MX6UL_PAD_ENET1_RX_DATA0__KPP_ROW00                      	0x00c4 0x0350 0x0000 6 0
-#define MX6UL_PAD_ENET1_RX_DATA0__USDHC1_LCTL                    	0x00c4 0x0350 0x0000 8 0
-#define MX6UL_PAD_ENET1_RX_DATA1__ENET1_RDATA01                  	0x00c8 0x0354 0x0000 0 0
-#define MX6UL_PAD_ENET1_RX_DATA1__UART4_DCE_CTS                  	0x00c8 0x0354 0x0000 1 0
-#define MX6UL_PAD_ENET1_RX_DATA1__UART4_DTE_RTS                  	0x00c8 0x0354 0x0638 1 1
-#define MX6UL_PAD_ENET1_RX_DATA1__PWM2_OUT                       	0x00c8 0x0354 0x0000 2 0
-#define MX6UL_PAD_ENET1_RX_DATA1__CSI_DATA17                     	0x00c8 0x0354 0x0000 3 0
-#define MX6UL_PAD_ENET1_RX_DATA1__FLEXCAN1_RX                    	0x00c8 0x0354 0x0584 4 1
-#define MX6UL_PAD_ENET1_RX_DATA1__GPIO2_IO01                     	0x00c8 0x0354 0x0000 5 0
-#define MX6UL_PAD_ENET1_RX_DATA1__KPP_COL00                      	0x00c8 0x0354 0x0000 6 0
-#define MX6UL_PAD_ENET1_RX_DATA1__USDHC2_LCTL                    	0x00c8 0x0354 0x0000 8 0
-#define MX6UL_PAD_ENET1_RX_EN__ENET1_RX_EN                       	0x00cc 0x0358 0x0000 0 0
-#define MX6UL_PAD_ENET1_RX_EN__UART5_DCE_RTS                     	0x00cc 0x0358 0x0640 1 3
-#define MX6UL_PAD_ENET1_RX_EN__UART5_DTE_CTS                     	0x00cc 0x0358 0x0000 1 0
-#define MX6UL_PAD_ENET1_RX_EN__CSI_DATA18                        	0x00cc 0x0358 0x0000 3 0
-#define MX6UL_PAD_ENET1_RX_EN__FLEXCAN2_TX                       	0x00cc 0x0358 0x0000 4 0
-#define MX6UL_PAD_ENET1_RX_EN__GPIO2_IO02                        	0x00cc 0x0358 0x0000 5 0
-#define MX6UL_PAD_ENET1_RX_EN__KPP_ROW01                         	0x00cc 0x0358 0x0000 6 0
-#define MX6UL_PAD_ENET1_RX_EN__USDHC1_VSELECT                    	0x00cc 0x0358 0x0000 8 0
-#define MX6UL_PAD_ENET1_TX_DATA0__ENET1_TDATA00                  	0x00d0 0x035c 0x0000 0 0
-#define MX6UL_PAD_ENET1_TX_DATA0__UART5_DCE_CTS                  	0x00d0 0x035c 0x0000 1 0
-#define MX6UL_PAD_ENET1_TX_DATA0__UART5_DTE_RTS                  	0x00d0 0x035c 0x0640 1 4
-#define MX6UL_PAD_ENET1_TX_DATA0__CSI_DATA19                     	0x00d0 0x035c 0x0000 3 0
-#define MX6UL_PAD_ENET1_TX_DATA0__FLEXCAN2_RX                    	0x00d0 0x035c 0x0588 4 1
-#define MX6UL_PAD_ENET1_TX_DATA0__GPIO2_IO03                     	0x00d0 0x035c 0x0000 5 0
-#define MX6UL_PAD_ENET1_TX_DATA0__KPP_COL01                      	0x00d0 0x035c 0x0000 6 0
-#define MX6UL_PAD_ENET1_TX_DATA0__USDHC2_VSELECT                 	0x00d0 0x035c 0x0000 8 0
-#define MX6UL_PAD_ENET1_TX_DATA1__ENET1_TDATA01                  	0x00d4 0x0360 0x0000 0 0
-#define MX6UL_PAD_ENET1_TX_DATA1__UART6_DCE_CTS                  	0x00d4 0x0360 0x0000 1 0
-#define MX6UL_PAD_ENET1_TX_DATA1__UART6_DTE_RTS                  	0x00d4 0x0360 0x0648 1 2
-#define MX6UL_PAD_ENET1_TX_DATA1__PWM5_OUT                       	0x00d4 0x0360 0x0000 2 0
-#define MX6UL_PAD_ENET1_TX_DATA1__CSI_DATA20                     	0x00d4 0x0360 0x0000 3 0
-#define MX6UL_PAD_ENET1_TX_DATA1__ENET2_MDIO                     	0x00d4 0x0360 0x0580 4 1
-#define MX6UL_PAD_ENET1_TX_DATA1__GPIO2_IO04                     	0x00d4 0x0360 0x0000 5 0
-#define MX6UL_PAD_ENET1_TX_DATA1__KPP_ROW02                      	0x00d4 0x0360 0x0000 6 0
-#define MX6UL_PAD_ENET1_TX_DATA1__WDOG1_WDOG_RST_B_DEB           	0x00d4 0x0360 0x0000 8 0
-#define MX6UL_PAD_ENET1_TX_EN__ENET1_TX_EN                       	0x00d8 0x0364 0x0000 0 0
-#define MX6UL_PAD_ENET1_TX_EN__UART6_DCE_RTS                     	0x00d8 0x0364 0x0648 1 3
-#define MX6UL_PAD_ENET1_TX_EN__UART6_DTE_CTS                     	0x00d8 0x0364 0x0000 1 0
-#define MX6UL_PAD_ENET1_TX_EN__PWM6_OUT                          	0x00d8 0x0364 0x0000 2 0
-#define MX6UL_PAD_ENET1_TX_EN__CSI_DATA21                        	0x00d8 0x0364 0x0000 3 0
-#define MX6UL_PAD_ENET1_TX_EN__ENET2_MDC                         	0x00d8 0x0364 0x0000 4 0
-#define MX6UL_PAD_ENET1_TX_EN__GPIO2_IO05                        	0x00d8 0x0364 0x0000 5 0
-#define MX6UL_PAD_ENET1_TX_EN__KPP_COL02                         	0x00d8 0x0364 0x0000 6 0
-#define MX6UL_PAD_ENET1_TX_EN__WDOG2_WDOG_RST_B_DEB              	0x00d8 0x0364 0x0000 8 0
-#define MX6UL_PAD_ENET1_TX_CLK__ENET1_TX_CLK                     	0x00dc 0x0368 0x0000 0 0
-#define MX6UL_PAD_ENET1_TX_CLK__UART7_DCE_CTS                    	0x00dc 0x0368 0x0000 1 0
-#define MX6UL_PAD_ENET1_TX_CLK__UART7_DTE_RTS                    	0x00dc 0x0368 0x0650 1 0
-#define MX6UL_PAD_ENET1_TX_CLK__PWM7_OUT                         	0x00dc 0x0368 0x0000 2 0
-#define MX6UL_PAD_ENET1_TX_CLK__CSI_DATA22                       	0x00dc 0x0368 0x0000 3 0
-#define MX6UL_PAD_ENET1_TX_CLK__ENET1_REF_CLK1                   	0x00dc 0x0368 0x0574 4 2
-#define MX6UL_PAD_ENET1_TX_CLK__GPIO2_IO06                       	0x00dc 0x0368 0x0000 5 0
-#define MX6UL_PAD_ENET1_TX_CLK__KPP_ROW03                        	0x00dc 0x0368 0x0000 6 0
-#define MX6UL_PAD_ENET1_TX_CLK__GPT1_CLK                         	0x00dc 0x0368 0x0000 8 0
-#define MX6UL_PAD_ENET1_RX_ER__ENET1_RX_ER                       	0x00e0 0x036c 0x0000 0 0
-#define MX6UL_PAD_ENET1_RX_ER__UART7_DCE_RTS                     	0x00e0 0x036c 0x0650 1 1
-#define MX6UL_PAD_ENET1_RX_ER__UART7_DTE_CTS                     	0x00e0 0x036c 0x0000 1 0
-#define MX6UL_PAD_ENET1_RX_ER__PWM8_OUT                          	0x00e0 0x036c 0x0000 2 0
-#define MX6UL_PAD_ENET1_RX_ER__CSI_DATA23                        	0x00e0 0x036c 0x0000 3 0
-#define MX6UL_PAD_ENET1_RX_ER__EIM_CRE                           	0x00e0 0x036c 0x0000 4 0
-#define MX6UL_PAD_ENET1_RX_ER__GPIO2_IO07                        	0x00e0 0x036c 0x0000 5 0
-#define MX6UL_PAD_ENET1_RX_ER__KPP_COL03                         	0x00e0 0x036c 0x0000 6 0
-#define MX6UL_PAD_ENET1_RX_ER__GPT1_CAPTURE2                     	0x00e0 0x036c 0x0000 8 0
-#define MX6UL_PAD_ENET2_RX_DATA0__ENET2_RDATA00                  	0x00e4 0x0370 0x0000 0 0
-#define MX6UL_PAD_ENET2_RX_DATA0__UART6_DCE_TX                       	0x00e4 0x0370 0x0000 1 0
-#define MX6UL_PAD_ENET2_RX_DATA0__UART6_DTE_RX                    	0x00e4 0x0370 0x064c 1 1
-#define MX6UL_PAD_ENET2_RX_DATA0__SIM1_PORT0_TRXD                	0x00e4 0x0370 0x0000 2 0
-#define MX6UL_PAD_ENET2_RX_DATA0__I2C3_SCL                       	0x00e4 0x0370 0x05b4 3 1
-#define MX6UL_PAD_ENET2_RX_DATA0__ENET1_MDIO                     	0x00e4 0x0370 0x0578 4 1
-#define MX6UL_PAD_ENET2_RX_DATA0__GPIO2_IO08                     	0x00e4 0x0370 0x0000 5 0
-#define MX6UL_PAD_ENET2_RX_DATA0__KPP_ROW04                      	0x00e4 0x0370 0x0000 6 0
-#define MX6UL_PAD_ENET2_RX_DATA0__USB_OTG1_PWR                   	0x00e4 0x0370 0x0000 8 0
-#define MX6UL_PAD_ENET2_RX_DATA1__ENET2_RDATA01                  	0x00e8 0x0374 0x0000 0 0
-#define MX6UL_PAD_ENET2_RX_DATA1__UART6_DCE_RX                       	0x00e8 0x0374 0x064c 1 2
-#define MX6UL_PAD_ENET2_RX_DATA1__UART6_DTE_TX                       	0x00e8 0x0374 0x0000 1 0
-#define MX6UL_PAD_ENET2_RX_DATA1__SIM1_PORT0_cLK                 	0x00e8 0x0374 0x0000 2 0
-#define MX6UL_PAD_ENET2_RX_DATA1__I2C3_SDA                       	0x00e8 0x0374 0x05b8 3 1
-#define MX6UL_PAD_ENET2_RX_DATA1__ENET1_MDC                      	0x00e8 0x0374 0x0000 4 0
-#define MX6UL_PAD_ENET2_RX_DATA1__GPIO2_IO09                     	0x00e8 0x0374 0x0000 5 0
-#define MX6UL_PAD_ENET2_RX_DATA1__KPP_COL04                      	0x00e8 0x0374 0x0000 6 0
-#define MX6UL_PAD_ENET2_RX_DATA1__USB_OTG1_OC                    	0x00e8 0x0374 0x0000 8 0
-#define MX6UL_PAD_ENET2_RX_EN__ENET2_RX_EN                       	0x00ec 0x0378 0x0000 0 0
-#define MX6UL_PAD_ENET2_RX_EN__UART7_DCE_TX                          	0x00ec 0x0378 0x0000 1 0
-#define MX6UL_PAD_ENET2_RX_EN__UART7_DTE_RX                          	0x00ec 0x0378 0x0654 1 0
-#define MX6UL_PAD_ENET2_RX_EN__SIM1_PORT0_RST_B                  	0x00ec 0x0378 0x0000 2 0
-#define MX6UL_PAD_ENET2_RX_EN__I2C4_SCL                          	0x00ec 0x0378 0x05bc 3 1
-#define MX6UL_PAD_ENET2_RX_EN__EIM_ADDR26                        	0x00ec 0x0378 0x0000 4 0
-#define MX6UL_PAD_ENET2_RX_EN__GPIO2_IO10                        	0x00ec 0x0378 0x0000 5 0
-#define MX6UL_PAD_ENET2_RX_EN__KPP_ROW05                         	0x00ec 0x0378 0x0000 6 0
-#define MX6UL_PAD_ENET2_RX_EN__ENET1_REF_CLK_25M                 	0x00ec 0x0378 0x0000 8 0
-#define MX6UL_PAD_ENET2_TX_DATA0__ENET2_TDATA00                  	0x00f0 0x037c 0x0000 0 0
-#define MX6UL_PAD_ENET2_TX_DATA0__UART7_DCE_RX                       	0x00f0 0x037c 0x0654 1 1
-#define MX6UL_PAD_ENET2_TX_DATA0__UART7_DTE_TX                       	0x00f0 0x037c 0x0000 1 0
-#define MX6UL_PAD_ENET2_TX_DATA0__SIM1_PORT0_SVEN                	0x00f0 0x037c 0x0000 2 0
-#define MX6UL_PAD_ENET2_TX_DATA0__I2C4_SDA                       	0x00f0 0x037c 0x05c0 3 1
-#define MX6UL_PAD_ENET2_TX_DATA0__EIM_EB_B02                     	0x00f0 0x037c 0x0000 4 0
-#define MX6UL_PAD_ENET2_TX_DATA0__GPIO2_IO11                     	0x00f0 0x037c 0x0000 5 0
-#define MX6UL_PAD_ENET2_TX_DATA0__KPP_COL05                      	0x00f0 0x037c 0x0000 6 0
-#define MX6UL_PAD_ENET2_TX_DATA1__ENET2_TDATA01                  	0x00f4 0x0380 0x0000 0 0
-#define MX6UL_PAD_ENET2_TX_DATA1__UART8_DCE_TX                       	0x00f4 0x0380 0x0000 1 0
-#define MX6UL_PAD_ENET2_TX_DATA1__UART8_DTE_RX                       	0x00f4 0x0380 0x065c 1 0
-#define MX6UL_PAD_ENET2_TX_DATA1__SIM2_PORT0_TRXD                	0x00f4 0x0380 0x0000 2 0
-#define MX6UL_PAD_ENET2_TX_DATA1__ECSPI4_SCLK                    	0x00f4 0x0380 0x0564 3 0
-#define MX6UL_PAD_ENET2_TX_DATA1__EIM_EB_B03                     	0x00f4 0x0380 0x0000 4 0
-#define MX6UL_PAD_ENET2_TX_DATA1__GPIO2_IO12                     	0x00f4 0x0380 0x0000 5 0
-#define MX6UL_PAD_ENET2_TX_DATA1__KPP_ROW06                      	0x00f4 0x0380 0x0000 6 0
-#define MX6UL_PAD_ENET2_TX_DATA1__USB_OTG2_PWR                   	0x00f4 0x0380 0x0000 8 0
-#define MX6UL_PAD_ENET2_TX_EN__ENET2_TX_EN                       	0x00f8 0x0384 0x0000 0 0
-#define MX6UL_PAD_ENET2_TX_EN__UART8_DCE_RX                          	0x00f8 0x0384 0x065c 1 1
-#define MX6UL_PAD_ENET2_TX_EN__UART8_DTE_TX                          	0x00f8 0x0384 0x0000 1 0
-#define MX6UL_PAD_ENET2_TX_EN__SIM2_PORT0_cLK                    	0x00f8 0x0384 0x0000 2 0
-#define MX6UL_PAD_ENET2_TX_EN__ECSPI4_MOSI                       	0x00f8 0x0384 0x056c 3 0
-#define MX6UL_PAD_ENET2_TX_EN__EIM_ACLK_FREERUN                  	0x00f8 0x0384 0x0000 4 0
-#define MX6UL_PAD_ENET2_TX_EN__GPIO2_IO13                        	0x00f8 0x0384 0x0000 5 0
-#define MX6UL_PAD_ENET2_TX_EN__KPP_COL06                         	0x00f8 0x0384 0x0000 6 0
-#define MX6UL_PAD_ENET2_TX_EN__USB_OTG2_OC                       	0x00f8 0x0384 0x0000 8 0
-#define MX6UL_PAD_ENET2_TX_CLK__ENET2_TX_CLK                     	0x00fc 0x0388 0x0000 0 0
-#define MX6UL_PAD_ENET2_TX_CLK__UART8_DCE_CTS                    	0x00fc 0x0388 0x0000 1 0
-#define MX6UL_PAD_ENET2_TX_CLK__UART8_DTE_RTS                    	0x00fc 0x0388 0x0658 1 0
-#define MX6UL_PAD_ENET2_TX_CLK__SIM2_PORT0_RST_B                 	0x00fc 0x0388 0x0000 2 0
-#define MX6UL_PAD_ENET2_TX_CLK__ECSPI4_MISO                      	0x00fc 0x0388 0x0568 3 0
-#define MX6UL_PAD_ENET2_TX_CLK__ENET2_REF_CLK2                   	0x00fc 0x0388 0x057c 4 2
-#define MX6UL_PAD_ENET2_TX_CLK__GPIO2_IO14                       	0x00fc 0x0388 0x0000 5 0
-#define MX6UL_PAD_ENET2_TX_CLK__KPP_ROW07                        	0x00fc 0x0388 0x0000 6 0
-#define MX6UL_PAD_ENET2_TX_CLK__ANATOP_OTG2_ID                   	0x00fc 0x0388 0x0000 8 0
-#define MX6UL_PAD_ENET2_RX_ER__ENET2_RX_ER                       	0x0100 0x038c 0x0000 0 0
-#define MX6UL_PAD_ENET2_RX_ER__UART8_DCE_RTS                     	0x0100 0x038c 0x0658 1 1
-#define MX6UL_PAD_ENET2_RX_ER__UART8_DTE_CTS                     	0x0100 0x038c 0x0000 1 0
-#define MX6UL_PAD_ENET2_RX_ER__SIM2_PORT0_SVEN                   	0x0100 0x038c 0x0000 2 0
-#define MX6UL_PAD_ENET2_RX_ER__ECSPI4_SS0                        	0x0100 0x038c 0x0000 3 0
-#define MX6UL_PAD_ENET2_RX_ER__EIM_ADDR25                        	0x0100 0x038c 0x0000 4 0
-#define MX6UL_PAD_ENET2_RX_ER__GPIO2_IO15                        	0x0100 0x038c 0x0000 5 0
-#define MX6UL_PAD_ENET2_RX_ER__KPP_COL07                         	0x0100 0x038c 0x0000 6 0
-#define MX6UL_PAD_ENET2_RX_ER__WDOG1_WDOG_ANY                    	0x0100 0x038c 0x0000 8 0
-#define MX6UL_PAD_LCD_CLK__LCDIF_CLK                             	0x0104 0x0390 0x0000 0 0
-#define MX6UL_PAD_LCD_CLK__LCDIF_WR_RWN                          	0x0104 0x0390 0x0000 1 0
-#define MX6UL_PAD_LCD_CLK__UART4_DCE_TX                              	0x0104 0x0390 0x0000 2 0
-#define MX6UL_PAD_LCD_CLK__UART4_DTE_RX                              	0x0104 0x0390 0x063c 2 2
-#define MX6UL_PAD_LCD_CLK__SAI3_MCLK                             	0x0104 0x0390 0x0000 3 0
-#define MX6UL_PAD_LCD_CLK__EIM_CS2_B                             	0x0104 0x0390 0x0000 4 0
-#define MX6UL_PAD_LCD_CLK__GPIO3_IO00                            	0x0104 0x0390 0x0000 5 0
-#define MX6UL_PAD_LCD_CLK__WDOG1_WDOG_RST_B_DEB                  	0x0104 0x0390 0x0000 8 0
-#define MX6UL_PAD_LCD_ENABLE__LCDIF_ENABLE                       	0x0108 0x0394 0x0000 0 0
-#define MX6UL_PAD_LCD_ENABLE__LCDIF_RD_E                         	0x0108 0x0394 0x0000 1 0
-#define MX6UL_PAD_LCD_ENABLE__UART4_DCE_RX                           	0x0108 0x0394 0x063c 2 3
-#define MX6UL_PAD_LCD_ENABLE__UART4_DTE_TX                           	0x0108 0x0394 0x0000 2 0
-#define MX6UL_PAD_LCD_ENABLE__SAI3_TX_SYNC                       	0x0108 0x0394 0x060c 3 0
-#define MX6UL_PAD_LCD_ENABLE__EIM_CS3_B                          	0x0108 0x0394 0x0000 4 0
-#define MX6UL_PAD_LCD_ENABLE__GPIO3_IO01                         	0x0108 0x0394 0x0000 5 0
-#define MX6UL_PAD_LCD_ENABLE__ECSPI2_RDY                         	0x0108 0x0394 0x0000 8 0
-#define MX6UL_PAD_LCD_HSYNC__LCDIF_HSYNC                         	0x010c 0x0398 0x05dc 0 0
-#define MX6UL_PAD_LCD_HSYNC__LCDIF_RS                            	0x010c 0x0398 0x0000 1 0
-#define MX6UL_PAD_LCD_HSYNC__UART4_DCE_CTS                       	0x010c 0x0398 0x0000 2 0
-#define MX6UL_PAD_LCD_HSYNC__UART4_DTE_RTS                       	0x010c 0x0398 0x0638 2 2
-#define MX6UL_PAD_LCD_HSYNC__SAI3_TX_BCLK                        	0x010c 0x0398 0x0608 3 0
-#define MX6UL_PAD_LCD_HSYNC__WDOG3_WDOG_RST_B_DEB                	0x010c 0x0398 0x0000 4 0
-#define MX6UL_PAD_LCD_HSYNC__GPIO3_IO02                          	0x010c 0x0398 0x0000 5 0
-#define MX6UL_PAD_LCD_HSYNC__ECSPI2_SS1                          	0x010c 0x0398 0x0000 8 0
-#define MX6UL_PAD_LCD_VSYNC__LCDIF_VSYNC                         	0x0110 0x039c 0x0000 0 0
-#define MX6UL_PAD_LCD_VSYNC__LCDIF_BUSY                          	0x0110 0x039c 0x05dc 1 1
-#define MX6UL_PAD_LCD_VSYNC__UART4_DCE_RTS                       	0x0110 0x039c 0x0638 2 3
-#define MX6UL_PAD_LCD_VSYNC__UART4_DTE_CTS                       	0x0110 0x039c 0x0000 2 0
-#define MX6UL_PAD_LCD_VSYNC__SAI3_RX_DATA                        	0x0110 0x039c 0x0000 3 0
-#define MX6UL_PAD_LCD_VSYNC__WDOG2_WDOG_B                        	0x0110 0x039c 0x0000 4 0
-#define MX6UL_PAD_LCD_VSYNC__GPIO3_IO03                          	0x0110 0x039c 0x0000 5 0
-#define MX6UL_PAD_LCD_VSYNC__ECSPI2_SS2                          	0x0110 0x039c 0x0000 8 0
-#define MX6UL_PAD_LCD_RESET__LCDIF_RESET                         	0x0114 0x03a0 0x0000 0 0
-#define MX6UL_PAD_LCD_RESET__LCDIF_CS                            	0x0114 0x03a0 0x0000 1 0
-#define MX6UL_PAD_LCD_RESET__CA7_MX6UL_EVENTI                    	0x0114 0x03a0 0x0000 2 0
-#define MX6UL_PAD_LCD_RESET__SAI3_TX_DATA                        	0x0114 0x03a0 0x0000 3 0
-#define MX6UL_PAD_LCD_RESET__WDOG1_WDOG_ANY                      	0x0114 0x03a0 0x0000 4 0
-#define MX6UL_PAD_LCD_RESET__GPIO3_IO04                          	0x0114 0x03a0 0x0000 5 0
-#define MX6UL_PAD_LCD_RESET__ECSPI2_SS3                          	0x0114 0x03a0 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA00__LCDIF_DATA00                       	0x0118 0x03a4 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA00__PWM1_OUT                           	0x0118 0x03a4 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA00__ENET1_1588_EVENT2_IN               	0x0118 0x03a4 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA00__I2C3_SDA                           	0x0118 0x03a4 0x05b8 4 2
-#define MX6UL_PAD_LCD_DATA00__GPIO3_IO05                         	0x0118 0x03a4 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA00__SRC_BT_CFG00                       	0x0118 0x03a4 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA00__SAI1_MCLK                          	0x0118 0x03a4 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA01__LCDIF_DATA01                       	0x011c 0x03a8 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA01__PWM2_OUT                           	0x011c 0x03a8 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA01__ENET1_1588_EVENT2_OUT              	0x011c 0x03a8 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA01__I2C3_SCL                           	0x011c 0x03a8 0x05b4 4 2
-#define MX6UL_PAD_LCD_DATA01__GPIO3_IO06                         	0x011c 0x03a8 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA01__SRC_BT_CFG01                       	0x011c 0x03a8 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA01__SAI1_TX_SYNC                       	0x011c 0x03a8 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA02__LCDIF_DATA02                       	0x0120 0x03ac 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA02__PWM3_OUT                           	0x0120 0x03ac 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA02__ENET1_1588_EVENT3_IN               	0x0120 0x03ac 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA02__I2C4_SDA                           	0x0120 0x03ac 0x05c0 4 2
-#define MX6UL_PAD_LCD_DATA02__GPIO3_IO07                         	0x0120 0x03ac 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA02__SRC_BT_CFG02                       	0x0120 0x03ac 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA02__SAI1_TX_BCLK                       	0x0120 0x03ac 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA03__LCDIF_DATA03                       	0x0124 0x03b0 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA03__PWM4_OUT                           	0x0124 0x03b0 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA03__ENET1_1588_EVENT3_OUT              	0x0124 0x03b0 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA03__I2C4_SCL                           	0x0124 0x03b0 0x05bc 4 2
-#define MX6UL_PAD_LCD_DATA03__GPIO3_IO08                         	0x0124 0x03b0 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA03__SRC_BT_CFG03                       	0x0124 0x03b0 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA03__SAI1_RX_DATA                       	0x0124 0x03b0 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA04__LCDIF_DATA04                       	0x0128 0x03b4 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA04__UART8_DCE_CTS                      	0x0128 0x03b4 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA04__UART8_DTE_RTS                      	0x0128 0x03b4 0x0658 1 2
-#define MX6UL_PAD_LCD_DATA04__ENET2_1588_EVENT2_IN               	0x0128 0x03b4 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA04__SPDIF_SR_CLK                       	0x0128 0x03b4 0x0000 4 0
-#define MX6UL_PAD_LCD_DATA04__GPIO3_IO09                         	0x0128 0x03b4 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA04__SRC_BT_CFG04                       	0x0128 0x03b4 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA04__SAI1_TX_DATA                       	0x0128 0x03b4 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA05__LCDIF_DATA05                       	0x012c 0x03b8 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA05__UART8_DCE_RTS                      	0x012c 0x03b8 0x0658 1 3
-#define MX6UL_PAD_LCD_DATA05__UART8_DTE_CTS                      	0x012c 0x03b8 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA05__ENET2_1588_EVENT2_OUT              	0x012c 0x03b8 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA05__SPDIF_OUT                          	0x012c 0x03b8 0x0000 4 0
-#define MX6UL_PAD_LCD_DATA05__GPIO3_IO10                         	0x012c 0x03b8 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA05__SRC_BT_CFG05                       	0x012c 0x03b8 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA05__ECSPI1_SS1                         	0x012c 0x03b8 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA06__LCDIF_DATA06                       	0x0130 0x03bc 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA06__UART7_DCE_CTS                      	0x0130 0x03bc 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA06__UART7_DTE_RTS                      	0x0130 0x03bc 0x0650 1 2
-#define MX6UL_PAD_LCD_DATA06__ENET2_1588_EVENT3_IN               	0x0130 0x03bc 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA06__SPDIF_LOCK                         	0x0130 0x03bc 0x0000 4 0
-#define MX6UL_PAD_LCD_DATA06__GPIO3_IO11                         	0x0130 0x03bc 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA06__SRC_BT_CFG06                       	0x0130 0x03bc 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA06__ECSPI1_SS2                         	0x0130 0x03bc 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA07__LCDIF_DATA07                       	0x0134 0x03c0 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA07__UART7_DCE_RTS                      	0x0134 0x03c0 0x0650 1 3
-#define MX6UL_PAD_LCD_DATA07__UART7_DTE_CTS                      	0x0134 0x03c0 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA07__ENET2_1588_EVENT3_OUT              	0x0134 0x03c0 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA07__SPDIF_EXT_CLK                      	0x0134 0x03c0 0x061c 4 0
-#define MX6UL_PAD_LCD_DATA07__GPIO3_IO12                         	0x0134 0x03c0 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA07__SRC_BT_CFG07                       	0x0134 0x03c0 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA07__ECSPI1_SS3                         	0x0134 0x03c0 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA08__LCDIF_DATA08                       	0x0138 0x03c4 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA08__SPDIF_IN                           	0x0138 0x03c4 0x0618 1 2
-#define MX6UL_PAD_LCD_DATA08__CSI_DATA16                         	0x0138 0x03c4 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA08__EIM_DATA00                         	0x0138 0x03c4 0x0000 4 0
-#define MX6UL_PAD_LCD_DATA08__GPIO3_IO13                         	0x0138 0x03c4 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA08__SRC_BT_CFG08                       	0x0138 0x03c4 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA08__FLEXCAN1_TX                        	0x0138 0x03c4 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA09__LCDIF_DATA09                       	0x013c 0x03c8 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA09__SAI3_MCLK                          	0x013c 0x03c8 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA09__CSI_DATA17                         	0x013c 0x03c8 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA09__EIM_DATA01                         	0x013c 0x03c8 0x0000 4 0
-#define MX6UL_PAD_LCD_DATA09__GPIO3_IO14                         	0x013c 0x03c8 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA09__SRC_BT_CFG09                       	0x013c 0x03c8 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA09__FLEXCAN1_RX                        	0x013c 0x03c8 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA10__LCDIF_DATA10                       	0x0140 0x03cc 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA10__SAI3_RX_SYNC                       	0x0140 0x03cc 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA10__CSI_DATA18                         	0x0140 0x03cc 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA10__EIM_DATA02                         	0x0140 0x03cc 0x0000 4 0
-#define MX6UL_PAD_LCD_DATA10__GPIO3_IO15                         	0x0140 0x03cc 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA10__SRC_BT_CFG10                       	0x0140 0x03cc 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA10__FLEXCAN2_TX                        	0x0140 0x03cc 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA11__LCDIF_DATA11                       	0x0144 0x03d0 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA11__SAI3_RX_BCLK                       	0x0144 0x03d0 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA11__CSI_DATA19                         	0x0144 0x03d0 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA11__EIM_DATA03                         	0x0144 0x03d0 0x0000 4 0
-#define MX6UL_PAD_LCD_DATA11__GPIO3_IO16                         	0x0144 0x03d0 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA11__SRC_BT_CFG11                       	0x0144 0x03d0 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA11__FLEXCAN2_RX                        	0x0144 0x03d0 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA12__LCDIF_DATA12                       	0x0148 0x03d4 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA12__SAI3_TX_SYNC                       	0x0148 0x03d4 0x060c 1 1
-#define MX6UL_PAD_LCD_DATA12__CSI_DATA20                         	0x0148 0x03d4 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA12__EIM_DATA04                         	0x0148 0x03d4 0x0000 4 0
-#define MX6UL_PAD_LCD_DATA12__GPIO3_IO17                         	0x0148 0x03d4 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA12__SRC_BT_CFG12                       	0x0148 0x03d4 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA12__ECSPI1_RDY                         	0x0148 0x03d4 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA13__LCDIF_DATA13                       	0x014c 0x03d8 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA13__SAI3_TX_BCLK                       	0x014c 0x03d8 0x0608 1 1
-#define MX6UL_PAD_LCD_DATA13__CSI_DATA21                         	0x014c 0x03d8 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA13__EIM_DATA05                         	0x014c 0x03d8 0x0000 4 0
-#define MX6UL_PAD_LCD_DATA13__GPIO3_IO18                         	0x014c 0x03d8 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA13__SRC_BT_CFG13                       	0x014c 0x03d8 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA13__USDHC2_RESET_B                     	0x014c 0x03d8 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA14__LCDIF_DATA14                       	0x0150 0x03dc 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA14__SAI3_RX_DATA                       	0x0150 0x03dc 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA14__CSI_DATA22                         	0x0150 0x03dc 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA14__EIM_DATA06                         	0x0150 0x03dc 0x0000 4 0
-#define MX6UL_PAD_LCD_DATA14__GPIO3_IO19                         	0x0150 0x03dc 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA14__SRC_BT_CFG14                       	0x0150 0x03dc 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA14__USDHC2_DATA4                       	0x0150 0x03dc 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA15__LCDIF_DATA15                       	0x0154 0x03e0 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA15__SAI3_TX_DATA                       	0x0154 0x03e0 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA15__CSI_DATA23                         	0x0154 0x03e0 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA15__EIM_DATA07                         	0x0154 0x03e0 0x0000 4 0
-#define MX6UL_PAD_LCD_DATA15__GPIO3_IO20                         	0x0154 0x03e0 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA15__SRC_BT_CFG15                       	0x0154 0x03e0 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA15__USDHC2_DATA5                       	0x0154 0x03e0 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA16__LCDIF_DATA16                       	0x0158 0x03e4 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA16__UART7_DCE_TX                           	0x0158 0x03e4 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA16__UART7_DTE_RX                           	0x0158 0x03e4 0x0654 1 2
-#define MX6UL_PAD_LCD_DATA16__CSI_DATA01                         	0x0158 0x03e4 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA16__EIM_DATA08                         	0x0158 0x03e4 0x0000 4 0
-#define MX6UL_PAD_LCD_DATA16__GPIO3_IO21                         	0x0158 0x03e4 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA16__SRC_BT_CFG24                       	0x0158 0x03e4 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA16__USDHC2_DATA6                       	0x0158 0x03e4 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA17__LCDIF_DATA17                       	0x015c 0x03e8 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA17__UART7_DCE_RX                           	0x015c 0x03e8 0x0654 1 3
-#define MX6UL_PAD_LCD_DATA17__UART7_DTE_TX                           	0x015c 0x03e8 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA17__CSI_DATA00                         	0x015c 0x03e8 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA17__EIM_DATA09                         	0x015c 0x03e8 0x0000 4 0
-#define MX6UL_PAD_LCD_DATA17__GPIO3_IO22                         	0x015c 0x03e8 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA17__SRC_BT_CFG25                       	0x015c 0x03e8 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA17__USDHC2_DATA7                       	0x015c 0x03e8 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA18__LCDIF_DATA18                       	0x0160 0x03ec 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA18__PWM5_OUT                           	0x0160 0x03ec 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA18__CA7_MX6UL_EVENTO                   	0x0160 0x03ec 0x0000 2 0
-#define MX6UL_PAD_LCD_DATA18__CSI_DATA10                         	0x0160 0x03ec 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA18__EIM_DATA10                         	0x0160 0x03ec 0x0000 4 0
-#define MX6UL_PAD_LCD_DATA18__GPIO3_IO23                         	0x0160 0x03ec 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA18__SRC_BT_CFG26                       	0x0160 0x03ec 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA18__USDHC2_CMD                         	0x0160 0x03ec 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA19__EIM_DATA11                         	0x0164 0x03f0 0x0000 4 0
-#define MX6UL_PAD_LCD_DATA19__GPIO3_IO24                         	0x0164 0x03f0 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA19__SRC_BT_CFG27                       	0x0164 0x03f0 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA19__USDHC2_CLK                         	0x0164 0x03f0 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA19__LCDIF_DATA19                       	0x0164 0x03f0 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA19__PWM6_OUT                           	0x0164 0x03f0 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA19__WDOG1_WDOG_ANY                     	0x0164 0x03f0 0x0000 2 0
-#define MX6UL_PAD_LCD_DATA19__CSI_DATA11                         	0x0164 0x03f0 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA20__EIM_DATA12                         	0x0168 0x03f4 0x0000 4 0
-#define MX6UL_PAD_LCD_DATA20__GPIO3_IO25                         	0x0168 0x03f4 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA20__SRC_BT_CFG28                       	0x0168 0x03f4 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA20__USDHC2_DATA0                       	0x0168 0x03f4 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA20__LCDIF_DATA20                       	0x0168 0x03f4 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA20__UART8_DCE_TX                           	0x0168 0x03f4 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA20__UART8_DTE_RX                           	0x0168 0x03f4 0x065c 1 2
-#define MX6UL_PAD_LCD_DATA20__ECSPI1_SCLK                        	0x0168 0x03f4 0x0534 2 0
-#define MX6UL_PAD_LCD_DATA20__CSI_DATA12                         	0x0168 0x03f4 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA21__LCDIF_DATA21                       	0x016c 0x03f8 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA21__UART8_DCE_RX                           	0x016c 0x03f8 0x065c 1 3
-#define MX6UL_PAD_LCD_DATA21__UART8_DTE_TX                           	0x016c 0x03f8 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA21__ECSPI1_SS0                         	0x016c 0x03f8 0x0000 2 0
-#define MX6UL_PAD_LCD_DATA21__CSI_DATA13                         	0x016c 0x03f8 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA21__EIM_DATA13                         	0x016c 0x03f8 0x0000 4 0
-#define MX6UL_PAD_LCD_DATA21__GPIO3_IO26                         	0x016c 0x03f8 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA21__SRC_BT_CFG29                       	0x016c 0x03f8 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA21__USDHC2_DATA1                       	0x016c 0x03f8 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA22__LCDIF_DATA22                       	0x0170 0x03fc 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA22__MQS_RIGHT                          	0x0170 0x03fc 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA22__ECSPI1_MOSI                        	0x0170 0x03fc 0x053c 2 0
-#define MX6UL_PAD_LCD_DATA22__CSI_DATA14                         	0x0170 0x03fc 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA22__EIM_DATA14                         	0x0170 0x03fc 0x0000 4 0
-#define MX6UL_PAD_LCD_DATA22__GPIO3_IO27                         	0x0170 0x03fc 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA22__SRC_BT_CFG30                       	0x0170 0x03fc 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA22__USDHC2_DATA2                       	0x0170 0x03fc 0x0000 8 0
-#define MX6UL_PAD_LCD_DATA23__LCDIF_DATA23                       	0x0174 0x0400 0x0000 0 0
-#define MX6UL_PAD_LCD_DATA23__MQS_LEFT                           	0x0174 0x0400 0x0000 1 0
-#define MX6UL_PAD_LCD_DATA23__ECSPI1_MISO                        	0x0174 0x0400 0x0538 2 0
-#define MX6UL_PAD_LCD_DATA23__CSI_DATA15                         	0x0174 0x0400 0x0000 3 0
-#define MX6UL_PAD_LCD_DATA23__EIM_DATA15                         	0x0174 0x0400 0x0000 4 0
-#define MX6UL_PAD_LCD_DATA23__GPIO3_IO28                         	0x0174 0x0400 0x0000 5 0
-#define MX6UL_PAD_LCD_DATA23__SRC_BT_CFG31                       	0x0174 0x0400 0x0000 6 0
-#define MX6UL_PAD_LCD_DATA23__USDHC2_DATA3                       	0x0174 0x0400 0x0000 8 0
-#define MX6UL_PAD_NAND_RE_B__RAWNAND_RE_B                        	0x0178 0x0404 0x0000 0 0
-#define MX6UL_PAD_NAND_RE_B__USDHC2_CLK                          	0x0178 0x0404 0x0670 1 2
-#define MX6UL_PAD_NAND_RE_B__QSPI_B_SCLK                         	0x0178 0x0404 0x0000 2 0
-#define MX6UL_PAD_NAND_RE_B__KPP_ROW00                           	0x0178 0x0404 0x0000 3 0
-#define MX6UL_PAD_NAND_RE_B__EIM_EB_B00                          	0x0178 0x0404 0x0000 4 0
-#define MX6UL_PAD_NAND_RE_B__GPIO4_IO00                          	0x0178 0x0404 0x0000 5 0
-#define MX6UL_PAD_NAND_RE_B__ECSPI3_SS2                          	0x0178 0x0404 0x0000 8 0
-#define MX6UL_PAD_NAND_WE_B__RAWNAND_WE_B                        	0x017c 0x0408 0x0000 0 0
-#define MX6UL_PAD_NAND_WE_B__USDHC2_CMD                          	0x017c 0x0408 0x0678 1 2
-#define MX6UL_PAD_NAND_WE_B__QSPI_B_SS0_B                        	0x017c 0x0408 0x0000 2 0
-#define MX6UL_PAD_NAND_WE_B__KPP_COL00                           	0x017c 0x0408 0x0000 3 0
-#define MX6UL_PAD_NAND_WE_B__EIM_EB_B01                          	0x017c 0x0408 0x0000 4 0
-#define MX6UL_PAD_NAND_WE_B__GPIO4_IO01                          	0x017c 0x0408 0x0000 5 0
-#define MX6UL_PAD_NAND_WE_B__ECSPI3_SS3                          	0x017c 0x0408 0x0000 8 0
-#define MX6UL_PAD_NAND_DATA00__RAWNAND_DATA00                    	0x0180 0x040c 0x0000 0 0
-#define MX6UL_PAD_NAND_DATA00__USDHC2_DATA0                      	0x0180 0x040c 0x067c 1 2
-#define MX6UL_PAD_NAND_DATA00__QSPI_B_SS1_B                      	0x0180 0x040c 0x0000 2 0
-#define MX6UL_PAD_NAND_DATA00__KPP_ROW01                         	0x0180 0x040c 0x0000 3 0
-#define MX6UL_PAD_NAND_DATA00__EIM_AD08                          	0x0180 0x040c 0x0000 4 0
-#define MX6UL_PAD_NAND_DATA00__GPIO4_IO02                        	0x0180 0x040c 0x0000 5 0
-#define MX6UL_PAD_NAND_DATA00__ECSPI4_RDY                        	0x0180 0x040c 0x0000 8 0
-#define MX6UL_PAD_NAND_DATA01__RAWNAND_DATA01                    	0x0184 0x0410 0x0000 0 0
-#define MX6UL_PAD_NAND_DATA01__USDHC2_DATA1                      	0x0184 0x0410 0x0680 1 2
-#define MX6UL_PAD_NAND_DATA01__QSPI_B_DQS                        	0x0184 0x0410 0x0000 2 0
-#define MX6UL_PAD_NAND_DATA01__KPP_COL01                         	0x0184 0x0410 0x0000 3 0
-#define MX6UL_PAD_NAND_DATA01__EIM_AD09                          	0x0184 0x0410 0x0000 4 0
-#define MX6UL_PAD_NAND_DATA01__GPIO4_IO03                        	0x0184 0x0410 0x0000 5 0
-#define MX6UL_PAD_NAND_DATA01__ECSPI4_SS1                        	0x0184 0x0410 0x0000 8 0
-#define MX6UL_PAD_NAND_DATA02__RAWNAND_DATA02                    	0x0188 0x0414 0x0000 0 0
-#define MX6UL_PAD_NAND_DATA02__USDHC2_DATA2                      	0x0188 0x0414 0x0684 1 1
-#define MX6UL_PAD_NAND_DATA02__QSPI_B_DATA00                     	0x0188 0x0414 0x0000 2 0
-#define MX6UL_PAD_NAND_DATA02__KPP_ROW02                         	0x0188 0x0414 0x0000 3 0
-#define MX6UL_PAD_NAND_DATA02__EIM_AD10                          	0x0188 0x0414 0x0000 4 0
-#define MX6UL_PAD_NAND_DATA02__GPIO4_IO04                        	0x0188 0x0414 0x0000 5 0
-#define MX6UL_PAD_NAND_DATA02__ECSPI4_SS2                        	0x0188 0x0414 0x0000 8 0
-#define MX6UL_PAD_NAND_DATA03__RAWNAND_DATA03                    	0x018c 0x0418 0x0000 0 0
-#define MX6UL_PAD_NAND_DATA03__USDHC2_DATA3                      	0x018c 0x0418 0x0688 1 2
-#define MX6UL_PAD_NAND_DATA03__QSPI_B_DATA01                     	0x018c 0x0418 0x0000 2 0
-#define MX6UL_PAD_NAND_DATA03__KPP_COL02                         	0x018c 0x0418 0x0000 3 0
-#define MX6UL_PAD_NAND_DATA03__EIM_AD11                          	0x018c 0x0418 0x0000 4 0
-#define MX6UL_PAD_NAND_DATA03__GPIO4_IO05                        	0x018c 0x0418 0x0000 5 0
-#define MX6UL_PAD_NAND_DATA03__ECSPI4_SS3                        	0x018c 0x0418 0x0000 8 0
-#define MX6UL_PAD_NAND_DATA04__RAWNAND_DATA04                    	0x0190 0x041c 0x0000 0 0
-#define MX6UL_PAD_NAND_DATA04__USDHC2_DATA4                      	0x0190 0x041c 0x068c 1 1
-#define MX6UL_PAD_NAND_DATA04__QSPI_B_DATA02                     	0x0190 0x041c 0x0000 2 0
-#define MX6UL_PAD_NAND_DATA04__ECSPI4_SCLK                       	0x0190 0x041c 0x0564 3 1
-#define MX6UL_PAD_NAND_DATA04__EIM_AD12                          	0x0190 0x041c 0x0000 4 0
-#define MX6UL_PAD_NAND_DATA04__GPIO4_IO06                        	0x0190 0x041c 0x0000 5 0
-#define MX6UL_PAD_NAND_DATA04__UART2_DCE_TX                          	0x0190 0x041c 0x0000 8 0
-#define MX6UL_PAD_NAND_DATA04__UART2_DTE_RX                          	0x0190 0x041c 0x062c 8 2
-#define MX6UL_PAD_NAND_DATA05__RAWNAND_DATA05                    	0x0194 0x0420 0x0000 0 0
-#define MX6UL_PAD_NAND_DATA05__USDHC2_DATA5                      	0x0194 0x0420 0x0690 1 1
-#define MX6UL_PAD_NAND_DATA05__QSPI_B_DATA03                     	0x0194 0x0420 0x0000 2 0
-#define MX6UL_PAD_NAND_DATA05__ECSPI4_MOSI                       	0x0194 0x0420 0x056c 3 1
-#define MX6UL_PAD_NAND_DATA05__EIM_AD13                          	0x0194 0x0420 0x0000 4 0
-#define MX6UL_PAD_NAND_DATA05__GPIO4_IO07                        	0x0194 0x0420 0x0000 5 0
-#define MX6UL_PAD_NAND_DATA05__UART2_DCE_RX                          	0x0194 0x0420 0x062c 8 3
-#define MX6UL_PAD_NAND_DATA05__UART2_DTE_TX                          	0x0194 0x0420 0x0000 8 0
-#define MX6UL_PAD_NAND_DATA06__RAWNAND_DATA06                    	0x0198 0x0424 0x0000 0 0
-#define MX6UL_PAD_NAND_DATA06__USDHC2_DATA6                      	0x0198 0x0424 0x0694 1 1
-#define MX6UL_PAD_NAND_DATA06__SAI2_RX_BCLK                      	0x0198 0x0424 0x0000 2 0
-#define MX6UL_PAD_NAND_DATA06__ECSPI4_MISO                       	0x0198 0x0424 0x0568 3 1
-#define MX6UL_PAD_NAND_DATA06__EIM_AD14                          	0x0198 0x0424 0x0000 4 0
-#define MX6UL_PAD_NAND_DATA06__GPIO4_IO08                        	0x0198 0x0424 0x0000 5 0
-#define MX6UL_PAD_NAND_DATA06__UART2_DCE_CTS                     	0x0198 0x0424 0x0000 8 0
-#define MX6UL_PAD_NAND_DATA06__UART2_DTE_RTS                     	0x0198 0x0424 0x0628 8 4
-#define MX6UL_PAD_NAND_DATA07__RAWNAND_DATA07                    	0x019c 0x0428 0x0000 0 0
-#define MX6UL_PAD_NAND_DATA07__USDHC2_DATA7                      	0x019c 0x0428 0x0698 1 1
-#define MX6UL_PAD_NAND_DATA07__QSPI_A_SS1_B                      	0x019c 0x0428 0x0000 2 0
-#define MX6UL_PAD_NAND_DATA07__ECSPI4_SS0                        	0x019c 0x0428 0x0000 3 0
-#define MX6UL_PAD_NAND_DATA07__EIM_AD15                          	0x019c 0x0428 0x0000 4 0
-#define MX6UL_PAD_NAND_DATA07__GPIO4_IO09                        	0x019c 0x0428 0x0000 5 0
-#define MX6UL_PAD_NAND_DATA07__UART2_DCE_RTS                     	0x019c 0x0428 0x0628 8 5
-#define MX6UL_PAD_NAND_DATA07__UART2_DTE_CTS                     	0x019c 0x0428 0x0000 8 0
-#define MX6UL_PAD_NAND_ALE__RAWNAND_ALE                          	0x01a0 0x042c 0x0000 0 0
-#define MX6UL_PAD_NAND_ALE__USDHC2_RESET_B                       	0x01a0 0x042c 0x0000 1 0
-#define MX6UL_PAD_NAND_ALE__QSPI_A_DQS                           	0x01a0 0x042c 0x0000 2 0
-#define MX6UL_PAD_NAND_ALE__PWM3_OUT                             	0x01a0 0x042c 0x0000 3 0
-#define MX6UL_PAD_NAND_ALE__EIM_ADDR17                           	0x01a0 0x042c 0x0000 4 0
-#define MX6UL_PAD_NAND_ALE__GPIO4_IO10                           	0x01a0 0x042c 0x0000 5 0
-#define MX6UL_PAD_NAND_ALE__ECSPI3_SS1                           	0x01a0 0x042c 0x0000 8 0
-#define MX6UL_PAD_NAND_WP_B__RAWNAND_WP_B                        	0x01a4 0x0430 0x0000 0 0
-#define MX6UL_PAD_NAND_WP_B__USDHC1_RESET_B                      	0x01a4 0x0430 0x0000 1 0
-#define MX6UL_PAD_NAND_WP_B__QSPI_A_SCLK                         	0x01a4 0x0430 0x0000 2 0
-#define MX6UL_PAD_NAND_WP_B__PWM4_OUT                            	0x01a4 0x0430 0x0000 3 0
-#define MX6UL_PAD_NAND_WP_B__EIM_BCLK                            	0x01a4 0x0430 0x0000 4 0
-#define MX6UL_PAD_NAND_WP_B__GPIO4_IO11                          	0x01a4 0x0430 0x0000 5 0
-#define MX6UL_PAD_NAND_WP_B__ECSPI3_RDY                          	0x01a4 0x0430 0x0000 8 0
-#define MX6UL_PAD_NAND_READY_B__RAWNAND_READY_B                  	0x01a8 0x0434 0x0000 0 0
-#define MX6UL_PAD_NAND_READY_B__USDHC1_DATA4                     	0x01a8 0x0434 0x0000 1 0
-#define MX6UL_PAD_NAND_READY_B__QSPI_A_DATA00                    	0x01a8 0x0434 0x0000 2 0
-#define MX6UL_PAD_NAND_READY_B__ECSPI3_SS0                       	0x01a8 0x0434 0x0000 3 0
-#define MX6UL_PAD_NAND_READY_B__EIM_CS1_B                        	0x01a8 0x0434 0x0000 4 0
-#define MX6UL_PAD_NAND_READY_B__GPIO4_IO12                       	0x01a8 0x0434 0x0000 5 0
-#define MX6UL_PAD_NAND_READY_B__UART3_DCE_TX                         	0x01a8 0x0434 0x0000 8 0
-#define MX6UL_PAD_NAND_READY_B__UART3_DTE_RX                         	0x01a8 0x0434 0x0634 8 2
-#define MX6UL_PAD_NAND_CE0_B__RAWNAND_CE0_B                      	0x01ac 0x0438 0x0000 0 0
-#define MX6UL_PAD_NAND_CE0_B__USDHC1_DATA5                       	0x01ac 0x0438 0x0000 1 0
-#define MX6UL_PAD_NAND_CE0_B__QSPI_A_DATA01                      	0x01ac 0x0438 0x0000 2 0
-#define MX6UL_PAD_NAND_CE0_B__ECSPI3_SCLK                        	0x01ac 0x0438 0x0554 3 1
-#define MX6UL_PAD_NAND_CE0_B__EIM_DTACK_B                        	0x01ac 0x0438 0x0000 4 0
-#define MX6UL_PAD_NAND_CE0_B__GPIO4_IO13                         	0x01ac 0x0438 0x0000 5 0
-#define MX6UL_PAD_NAND_CE0_B__UART3_DCE_RX                           	0x01ac 0x0438 0x0634 8 3
-#define MX6UL_PAD_NAND_CE0_B__UART3_DTE_TX                           	0x01ac 0x0438 0x0000 8 0
-#define MX6UL_PAD_NAND_CE1_B__RAWNAND_CE1_B                      	0x01b0 0x043c 0x0000 0 0
-#define MX6UL_PAD_NAND_CE1_B__USDHC1_DATA6                       	0x01b0 0x043c 0x0000 1 0
-#define MX6UL_PAD_NAND_CE1_B__QSPI_A_DATA02                      	0x01b0 0x043c 0x0000 2 0
-#define MX6UL_PAD_NAND_CE1_B__ECSPI3_MOSI                        	0x01b0 0x043c 0x055c 3 1
-#define MX6UL_PAD_NAND_CE1_B__EIM_ADDR18                         	0x01b0 0x043c 0x0000 4 0
-#define MX6UL_PAD_NAND_CE1_B__GPIO4_IO14                         	0x01b0 0x043c 0x0000 5 0
-#define MX6UL_PAD_NAND_CE1_B__UART3_DCE_CTS                      	0x01b0 0x043c 0x0000 8 0
-#define MX6UL_PAD_NAND_CE1_B__UART3_DTE_RTS                      	0x01b0 0x043c 0x0630 8 2
-#define MX6UL_PAD_NAND_CLE__RAWNAND_CLE                          	0x01b4 0x0440 0x0000 0 0
-#define MX6UL_PAD_NAND_CLE__USDHC1_DATA7                         	0x01b4 0x0440 0x0000 1 0
-#define MX6UL_PAD_NAND_CLE__QSPI_A_DATA03                        	0x01b4 0x0440 0x0000 2 0
-#define MX6UL_PAD_NAND_CLE__ECSPI3_MISO                          	0x01b4 0x0440 0x0558 3 1
-#define MX6UL_PAD_NAND_CLE__EIM_ADDR16                           	0x01b4 0x0440 0x0000 4 0
-#define MX6UL_PAD_NAND_CLE__GPIO4_IO15                           	0x01b4 0x0440 0x0000 5 0
-#define MX6UL_PAD_NAND_CLE__UART3_DCE_RTS                        	0x01b4 0x0440 0x0630 8 3
-#define MX6UL_PAD_NAND_CLE__UART3_DTE_CTS                        	0x01b4 0x0440 0x0000 8 0
-#define MX6UL_PAD_NAND_DQS__RAWNAND_DQS                          	0x01b8 0x0444 0x0000 0 0
-#define MX6UL_PAD_NAND_DQS__CSI_FIELD                            	0x01b8 0x0444 0x0530 1 1
-#define MX6UL_PAD_NAND_DQS__QSPI_A_SS0_B                         	0x01b8 0x0444 0x0000 2 0
-#define MX6UL_PAD_NAND_DQS__PWM5_OUT                             	0x01b8 0x0444 0x0000 3 0
-#define MX6UL_PAD_NAND_DQS__EIM_WAIT                             	0x01b8 0x0444 0x0000 4 0
-#define MX6UL_PAD_NAND_DQS__GPIO4_IO16                           	0x01b8 0x0444 0x0000 5 0
-#define MX6UL_PAD_NAND_DQS__SDMA_EXT_EVENT01                     	0x01b8 0x0444 0x0000 6 0
-#define MX6UL_PAD_NAND_DQS__SPDIF_EXT_CLK                        	0x01b8 0x0444 0x0000 8 0
-#define MX6UL_PAD_SD1_CMD__USDHC1_CMD                            	0x01bc 0x0448 0x0000 0 0
-#define MX6UL_PAD_SD1_CMD__GPT2_COMPARE1                         	0x01bc 0x0448 0x0000 1 0
-#define MX6UL_PAD_SD1_CMD__SAI2_RX_SYNC                          	0x01bc 0x0448 0x0000 2 0
-#define MX6UL_PAD_SD1_CMD__SPDIF_OUT                             	0x01bc 0x0448 0x0000 3 0
-#define MX6UL_PAD_SD1_CMD__EIM_ADDR19                            	0x01bc 0x0448 0x0000 4 0
-#define MX6UL_PAD_SD1_CMD__GPIO2_IO16                            	0x01bc 0x0448 0x0000 5 0
-#define MX6UL_PAD_SD1_CMD__SDMA_EXT_EVENT00                      	0x01bc 0x0448 0x0000 6 0
-#define MX6UL_PAD_SD1_CMD__USB_OTG1_PWR                          	0x01bc 0x0448 0x0000 8 0
-#define MX6UL_PAD_SD1_CLK__USDHC1_CLK                            	0x01c0 0x044c 0x0000 0 0
-#define MX6UL_PAD_SD1_CLK__GPT2_COMPARE2                         	0x01c0 0x044c 0x0000 1 0
-#define MX6UL_PAD_SD1_CLK__SAI2_MCLK                             	0x01c0 0x044c 0x0000 2 0
-#define MX6UL_PAD_SD1_CLK__SPDIF_IN                              	0x01c0 0x044c 0x0618 3 3
-#define MX6UL_PAD_SD1_CLK__EIM_ADDR20                            	0x01c0 0x044c 0x0000 4 0
-#define MX6UL_PAD_SD1_CLK__GPIO2_IO17                            	0x01c0 0x044c 0x0000 5 0
-#define MX6UL_PAD_SD1_CLK__USB_OTG1_OC                           	0x01c0 0x044c 0x0000 8 0
-#define MX6UL_PAD_SD1_DATA0__USDHC1_DATA0                        	0x01c4 0x0450 0x0000 0 0
-#define MX6UL_PAD_SD1_DATA0__GPT2_COMPARE3                       	0x01c4 0x0450 0x0000 1 0
-#define MX6UL_PAD_SD1_DATA0__SAI2_TX_SYNC                        	0x01c4 0x0450 0x05fc 2 1
-#define MX6UL_PAD_SD1_DATA0__FLEXCAN1_TX                         	0x01c4 0x0450 0x0000 3 0
-#define MX6UL_PAD_SD1_DATA0__EIM_ADDR21                          	0x01c4 0x0450 0x0000 4 0
-#define MX6UL_PAD_SD1_DATA0__GPIO2_IO18                          	0x01c4 0x0450 0x0000 5 0
-#define MX6UL_PAD_SD1_DATA0__ANATOP_OTG1_ID                      	0x01c4 0x0450 0x0000 8 0
-#define MX6UL_PAD_SD1_DATA1__USDHC1_DATA1                        	0x01c8 0x0454 0x0000 0 0
-#define MX6UL_PAD_SD1_DATA1__GPT2_CLK                            	0x01c8 0x0454 0x05a0 1 1
-#define MX6UL_PAD_SD1_DATA1__SAI2_TX_BCLK                        	0x01c8 0x0454 0x05f8 2 1
-#define MX6UL_PAD_SD1_DATA1__FLEXCAN1_RX                         	0x01c8 0x0454 0x0584 3 3
-#define MX6UL_PAD_SD1_DATA1__EIM_ADDR22                          	0x01c8 0x0454 0x0000 4 0
-#define MX6UL_PAD_SD1_DATA1__GPIO2_IO19                          	0x01c8 0x0454 0x0000 5 0
-#define MX6UL_PAD_SD1_DATA1__USB_OTG2_PWR                        	0x01c8 0x0454 0x0000 8 0
-#define MX6UL_PAD_SD1_DATA2__USDHC1_DATA2                        	0x01cc 0x0458 0x0000 0 0
-#define MX6UL_PAD_SD1_DATA2__GPT2_CAPTURE1                       	0x01cc 0x0458 0x0598 1 1
-#define MX6UL_PAD_SD1_DATA2__SAI2_RX_DATA                        	0x01cc 0x0458 0x05f4 2 1
-#define MX6UL_PAD_SD1_DATA2__FLEXCAN2_TX                         	0x01cc 0x0458 0x0000 3 0
-#define MX6UL_PAD_SD1_DATA2__EIM_ADDR23                          	0x01cc 0x0458 0x0000 4 0
-#define MX6UL_PAD_SD1_DATA2__GPIO2_IO20                          	0x01cc 0x0458 0x0000 5 0
-#define MX6UL_PAD_SD1_DATA2__CCM_CLKO1                           	0x01cc 0x0458 0x0000 6 0
-#define MX6UL_PAD_SD1_DATA2__USB_OTG2_OC                         	0x01cc 0x0458 0x0000 8 0
-#define MX6UL_PAD_SD1_DATA3__USDHC1_DATA3                        	0x01d0 0x045c 0x0000 0 0
-#define MX6UL_PAD_SD1_DATA3__GPT2_CAPTURE2                       	0x01d0 0x045c 0x059c 1 1
-#define MX6UL_PAD_SD1_DATA3__SAI2_TX_DATA                        	0x01d0 0x045c 0x0000 2 0
-#define MX6UL_PAD_SD1_DATA3__FLEXCAN2_RX                         	0x01d0 0x045c 0x0588 3 3
-#define MX6UL_PAD_SD1_DATA3__EIM_ADDR24                          	0x01d0 0x045c 0x0000 4 0
-#define MX6UL_PAD_SD1_DATA3__GPIO2_IO21                          	0x01d0 0x045c 0x0000 5 0
-#define MX6UL_PAD_SD1_DATA3__CCM_CLKO2                           	0x01d0 0x045c 0x0000 6 0
-#define MX6UL_PAD_SD1_DATA3__ANATOP_OTG2_ID                      	0x01d0 0x045c 0x0000 8 0
-#define MX6UL_PAD_CSI_MCLK__CSI_MCLK                             	0x01d4 0x0460 0x0000 0 0
-#define MX6UL_PAD_CSI_MCLK__USDHC2_CD_B                          	0x01d4 0x0460 0x0674 1 0
-#define MX6UL_PAD_CSI_MCLK__RAWNAND_CE2_B                        	0x01d4 0x0460 0x0000 2 0
-#define MX6UL_PAD_CSI_MCLK__I2C1_SDA                             	0x01d4 0x0460 0x05a8 3 0
-#define MX6UL_PAD_CSI_MCLK__EIM_CS0_B                            	0x01d4 0x0460 0x0000 4 0
-#define MX6UL_PAD_CSI_MCLK__GPIO4_IO17                           	0x01d4 0x0460 0x0000 5 0
-#define MX6UL_PAD_CSI_MCLK__SNVS_HP_VIO_5_CTL                    	0x01d4 0x0460 0x0000 6 0
-#define MX6UL_PAD_CSI_MCLK__UART6_DCE_TX                             	0x01d4 0x0460 0x0000 8 0
-#define MX6UL_PAD_CSI_MCLK__UART6_DTE_RX                             	0x01d4 0x0460 0x064c 8 0
-#define MX6UL_PAD_CSI_PIXCLK__CSI_PIXCLK                         	0x01d8 0x0464 0x0528 0 1
-#define MX6UL_PAD_CSI_PIXCLK__USDHC2_WP                          	0x01d8 0x0464 0x069c 1 2
-#define MX6UL_PAD_CSI_PIXCLK__RAWNAND_CE3_B                      	0x01d8 0x0464 0x0000 2 0
-#define MX6UL_PAD_CSI_PIXCLK__I2C1_SCL                           	0x01d8 0x0464 0x05a4 3 2
-#define MX6UL_PAD_CSI_PIXCLK__EIM_OE                             	0x01d8 0x0464 0x0000 4 0
-#define MX6UL_PAD_CSI_PIXCLK__GPIO4_IO18                         	0x01d8 0x0464 0x0000 5 0
-#define MX6UL_PAD_CSI_PIXCLK__SNVS_HP_VIO_5                      	0x01d8 0x0464 0x0000 6 0
-#define MX6UL_PAD_CSI_PIXCLK__UART6_DCE_RX                           	0x01d8 0x0464 0x064c 8 3
-#define MX6UL_PAD_CSI_PIXCLK__UART6_DTE_TX                           	0x01d8 0x0464 0x0000 8 0
-#define MX6UL_PAD_CSI_VSYNC__CSI_VSYNC                           	0x01dc 0x0468 0x052c 0 0
-#define MX6UL_PAD_CSI_VSYNC__USDHC2_CLK                          	0x01dc 0x0468 0x0670 1 0
-#define MX6UL_PAD_CSI_VSYNC__SIM1_PORT1_CLK                      	0x01dc 0x0468 0x0000 2 0
-#define MX6UL_PAD_CSI_VSYNC__I2C2_SDA                            	0x01dc 0x0468 0x05b0 3 0
-#define MX6UL_PAD_CSI_VSYNC__EIM_RW                              	0x01dc 0x0468 0x0000 4 0
-#define MX6UL_PAD_CSI_VSYNC__GPIO4_IO19                          	0x01dc 0x0468 0x0000 5 0
-#define MX6UL_PAD_CSI_VSYNC__PWM7_OUT                            	0x01dc 0x0468 0x0000 6 0
-#define MX6UL_PAD_CSI_VSYNC__UART6_DCE_RTS                       	0x01dc 0x0468 0x0648 8 0
-#define MX6UL_PAD_CSI_VSYNC__UART6_DTE_CTS                       	0x01dc 0x0468 0x0000 8 0
-#define MX6UL_PAD_CSI_HSYNC__CSI_HSYNC                           	0x01e0 0x046c 0x0524 0 0
-#define MX6UL_PAD_CSI_HSYNC__USDHC2_CMD                          	0x01e0 0x046c 0x0678 1 0
-#define MX6UL_PAD_CSI_HSYNC__SIM1_PORT1_PD                       	0x01e0 0x046c 0x0000 2 0
-#define MX6UL_PAD_CSI_HSYNC__I2C2_SCL                            	0x01e0 0x046c 0x05ac 3 0
-#define MX6UL_PAD_CSI_HSYNC__EIM_LBA_B                           	0x01e0 0x046c 0x0000 4 0
-#define MX6UL_PAD_CSI_HSYNC__GPIO4_IO20                          	0x01e0 0x046c 0x0000 5 0
-#define MX6UL_PAD_CSI_HSYNC__PWM8_OUT                            	0x01e0 0x046c 0x0000 6 0
-#define MX6UL_PAD_CSI_HSYNC__UART6_DCE_CTS                       	0x01e0 0x046c 0x0000 8 0
-#define MX6UL_PAD_CSI_HSYNC__UART6_DTE_RTS                       	0x01e0 0x046c 0x0648 8 1
-#define MX6UL_PAD_CSI_DATA00__CSI_DATA02                         	0x01e4 0x0470 0x04c4 0 0
-#define MX6UL_PAD_CSI_DATA00__USDHC2_DATA0                       	0x01e4 0x0470 0x067c 1 0
-#define MX6UL_PAD_CSI_DATA00__SIM1_PORT1_RST_B                   	0x01e4 0x0470 0x0000 2 0
-#define MX6UL_PAD_CSI_DATA00__ECSPI2_SCLK                        	0x01e4 0x0470 0x0544 3 0
-#define MX6UL_PAD_CSI_DATA00__EIM_AD00                           	0x01e4 0x0470 0x0000 4 0
-#define MX6UL_PAD_CSI_DATA00__GPIO4_IO21                         	0x01e4 0x0470 0x0000 5 0
-#define MX6UL_PAD_CSI_DATA00__SRC_INT_BOOT                       	0x01e4 0x0470 0x0000 6 0
-#define MX6UL_PAD_CSI_DATA00__UART5_DCE_TX                           	0x01e4 0x0470 0x0000 8 0
-#define MX6UL_PAD_CSI_DATA00__UART5_DTE_RX                           	0x01e4 0x0470 0x0644 8 0
-#define MX6UL_PAD_CSI_DATA01__CSI_DATA03                         	0x01e8 0x0474 0x04c8 0 0
-#define MX6UL_PAD_CSI_DATA01__USDHC2_DATA1                       	0x01e8 0x0474 0x0680 1 0
-#define MX6UL_PAD_CSI_DATA01__SIM1_PORT1_SVEN                    	0x01e8 0x0474 0x0000 2 0
-#define MX6UL_PAD_CSI_DATA01__ECSPI2_SS0                         	0x01e8 0x0474 0x0000 3 0
-#define MX6UL_PAD_CSI_DATA01__EIM_AD01                           	0x01e8 0x0474 0x0000 4 0
-#define MX6UL_PAD_CSI_DATA01__GPIO4_IO22                         	0x01e8 0x0474 0x0000 5 0
-#define MX6UL_PAD_CSI_DATA01__SAI1_MCLK                          	0x01e8 0x0474 0x0000 6 0
-#define MX6UL_PAD_CSI_DATA01__UART5_DCE_RX                           	0x01e8 0x0474 0x0644 8 1
-#define MX6UL_PAD_CSI_DATA01__UART5_DTE_TX                           	0x01e8 0x0474 0x0000 8 0
-#define MX6UL_PAD_CSI_DATA02__CSI_DATA04                         	0x01ec 0x0478 0x04d8 0 1
-#define MX6UL_PAD_CSI_DATA02__USDHC2_DATA2                       	0x01ec 0x0478 0x0684 1 2
-#define MX6UL_PAD_CSI_DATA02__SIM1_PORT1_TRXD                    	0x01ec 0x0478 0x0000 2 0
-#define MX6UL_PAD_CSI_DATA02__ECSPI2_MOSI                        	0x01ec 0x0478 0x054c 3 1
-#define MX6UL_PAD_CSI_DATA02__EIM_AD02                           	0x01ec 0x0478 0x0000 4 0
-#define MX6UL_PAD_CSI_DATA02__GPIO4_IO23                         	0x01ec 0x0478 0x0000 5 0
-#define MX6UL_PAD_CSI_DATA02__SAI1_RX_SYNC                       	0x01ec 0x0478 0x0000 6 0
-#define MX6UL_PAD_CSI_DATA02__UART5_DCE_RTS                      	0x01ec 0x0478 0x0640 8 5
-#define MX6UL_PAD_CSI_DATA02__UART5_DTE_CTS                      	0x01ec 0x0478 0x0000 8 0
-#define MX6UL_PAD_CSI_DATA03__CSI_DATA05                         	0x01f0 0x047c 0x04cc 0 0
-#define MX6UL_PAD_CSI_DATA03__USDHC2_DATA3                       	0x01f0 0x047c 0x0688 1 0
-#define MX6UL_PAD_CSI_DATA03__SIM2_PORT1_PD                      	0x01f0 0x047c 0x0000 2 0
-#define MX6UL_PAD_CSI_DATA03__ECSPI2_MISO                        	0x01f0 0x047c 0x0548 3 0
-#define MX6UL_PAD_CSI_DATA03__EIM_AD03                           	0x01f0 0x047c 0x0000 4 0
-#define MX6UL_PAD_CSI_DATA03__GPIO4_IO24                         	0x01f0 0x047c 0x0000 5 0
-#define MX6UL_PAD_CSI_DATA03__SAI1_RX_BCLK                       	0x01f0 0x047c 0x0000 6 0
-#define MX6UL_PAD_CSI_DATA03__UART5_DCE_CTS                      	0x01f0 0x047c 0x0000 8 0
-#define MX6UL_PAD_CSI_DATA03__UART5_DTE_RTS                      	0x01f0 0x047c 0x0640 8 0
-#define MX6UL_PAD_CSI_DATA04__CSI_DATA06                         	0x01f4 0x0480 0x04dc 0 1
-#define MX6UL_PAD_CSI_DATA04__USDHC2_DATA4                       	0x01f4 0x0480 0x068c 1 2
-#define MX6UL_PAD_CSI_DATA04__SIM2_PORT1_CLK                     	0x01f4 0x0480 0x0000 2 0
-#define MX6UL_PAD_CSI_DATA04__ECSPI1_SCLK                        	0x01f4 0x0480 0x0534 3 1
-#define MX6UL_PAD_CSI_DATA04__EIM_AD04                           	0x01f4 0x0480 0x0000 4 0
-#define MX6UL_PAD_CSI_DATA04__GPIO4_IO25                         	0x01f4 0x0480 0x0000 5 0
-#define MX6UL_PAD_CSI_DATA04__SAI1_TX_SYNC                       	0x01f4 0x0480 0x05ec 6 1
-#define MX6UL_PAD_CSI_DATA04__USDHC1_WP                          	0x01f4 0x0480 0x0000 8 0
-#define MX6UL_PAD_CSI_DATA05__CSI_DATA07                         	0x01f8 0x0484 0x04e0 0 1
-#define MX6UL_PAD_CSI_DATA05__USDHC2_DATA5                       	0x01f8 0x0484 0x0690 1 2
-#define MX6UL_PAD_CSI_DATA05__SIM2_PORT1_RST_B                   	0x01f8 0x0484 0x0000 2 0
-#define MX6UL_PAD_CSI_DATA05__ECSPI1_SS0                         	0x01f8 0x0484 0x0000 3 0
-#define MX6UL_PAD_CSI_DATA05__EIM_AD05                           	0x01f8 0x0484 0x0000 4 0
-#define MX6UL_PAD_CSI_DATA05__GPIO4_IO26                         	0x01f8 0x0484 0x0000 5 0
-#define MX6UL_PAD_CSI_DATA05__SAI1_TX_BCLK                       	0x01f8 0x0484 0x05e8 6 1
-#define MX6UL_PAD_CSI_DATA05__USDHC1_CD_B                        	0x01f8 0x0484 0x0000 8 0
-#define MX6UL_PAD_CSI_DATA06__CSI_DATA08                         	0x01fc 0x0488 0x04e4 0 1
-#define MX6UL_PAD_CSI_DATA06__USDHC2_DATA6                       	0x01fc 0x0488 0x0694 1 2
-#define MX6UL_PAD_CSI_DATA06__SIM2_PORT1_SVEN                    	0x01fc 0x0488 0x0000 2 0
-#define MX6UL_PAD_CSI_DATA06__ECSPI1_MOSI                        	0x01fc 0x0488 0x053c 3 1
-#define MX6UL_PAD_CSI_DATA06__EIM_AD06                           	0x01fc 0x0488 0x0000 4 0
-#define MX6UL_PAD_CSI_DATA06__GPIO4_IO27                         	0x01fc 0x0488 0x0000 5 0
-#define MX6UL_PAD_CSI_DATA06__SAI1_RX_DATA                       	0x01fc 0x0488 0x0000 6 0
-#define MX6UL_PAD_CSI_DATA06__USDHC1_RESET_B                     	0x01fc 0x0488 0x0000 8 0
-#define MX6UL_PAD_CSI_DATA07__CSI_DATA09                         	0x0200 0x048c 0x04e8 0 1
-#define MX6UL_PAD_CSI_DATA07__USDHC2_DATA7                       	0x0200 0x048c 0x0698 1 2
-#define MX6UL_PAD_CSI_DATA07__SIM2_PORT1_TRXD                    	0x0200 0x048c 0x0000 2 0
-#define MX6UL_PAD_CSI_DATA07__ECSPI1_MISO                        	0x0200 0x048c 0x0538 3 1
-#define MX6UL_PAD_CSI_DATA07__EIM_AD07                           	0x0200 0x048c 0x0000 4 0
-#define MX6UL_PAD_CSI_DATA07__GPIO4_IO28                         	0x0200 0x048c 0x0000 5 0
-#define MX6UL_PAD_CSI_DATA07__SAI1_TX_DATA                       	0x0200 0x048c 0x0000 6 0
-#define MX6UL_PAD_CSI_DATA07__USDHC1_VSELECT                     	0x0200 0x048c 0x0000 8 0
+#define MX6UL_PAD_JTAG_MOD__SJC_MOD			0x0044 0x02d0 0x0000 0 0
+#define MX6UL_PAD_JTAG_MOD__GPT2_CLK			0x0044 0x02d0 0x05a0 1 0
+#define MX6UL_PAD_JTAG_MOD__SPDIF_OUT			0x0044 0x02d0 0x0000 2 0
+#define MX6UL_PAD_JTAG_MOD__ENET1_REF_CLK_25M		0x0044 0x02d0 0x0000 3 0
+#define MX6UL_PAD_JTAG_MOD__CCM_PMIC_RDY		0x0044 0x02d0 0x04c0 4 0
+#define MX6UL_PAD_JTAG_MOD__GPIO1_IO10			0x0044 0x02d0 0x0000 5 0
+#define MX6UL_PAD_JTAG_MOD__SDMA_EXT_EVENT00		0x0044 0x02d0 0x0000 6 0
+#define MX6UL_PAD_JTAG_TMS__SJC_TMS			0x0048 0x02d4 0x0000 0 0
+#define MX6UL_PAD_JTAG_TMS__GPT2_CAPTURE1		0x0048 0x02d4 0x0598 1 0
+#define MX6UL_PAD_JTAG_TMS__SAI2_MCLK			0x0048 0x02d4 0x0000 2 0
+#define MX6UL_PAD_JTAG_TMS__CCM_CLKO1			0x0048 0x02d4 0x0000 3 0
+#define MX6UL_PAD_JTAG_TMS__CCM_WAIT			0x0048 0x02d4 0x0000 4 0
+#define MX6UL_PAD_JTAG_TMS__GPIO1_IO11			0x0048 0x02d4 0x0000 5 0
+#define MX6UL_PAD_JTAG_TMS__SDMA_EXT_EVENT01		0x0048 0x02d4 0x0000 6 0
+#define MX6UL_PAD_JTAG_TMS__EPIT1_OUT			0x0048 0x02d4 0x0000 8 0
+#define MX6UL_PAD_JTAG_TDO__SJC_TDO			0x004c 0x02d8 0x0000 0 0
+#define MX6UL_PAD_JTAG_TDO__GPT2_CAPTURE2		0x004c 0x02d8 0x059c 1 0
+#define MX6UL_PAD_JTAG_TDO__SAI2_TX_SYNC		0x004c 0x02d8 0x05fc 2 0
+#define MX6UL_PAD_JTAG_TDO__CCM_CLKO2			0x004c 0x02d8 0x0000 3 0
+#define MX6UL_PAD_JTAG_TDO__CCM_STOP			0x004c 0x02d8 0x0000 4 0
+#define MX6UL_PAD_JTAG_TDO__GPIO1_IO12			0x004c 0x02d8 0x0000 5 0
+#define MX6UL_PAD_JTAG_TDO__MQS_RIGHT			0x004c 0x02d8 0x0000 6 0
+#define MX6UL_PAD_JTAG_TDO__EPIT2_OUT			0x004c 0x02d8 0x0000 8 0
+#define MX6UL_PAD_JTAG_TDI__SJC_TDI			0x0050 0x02dc 0x0000 0 0
+#define MX6UL_PAD_JTAG_TDI__GPT2_COMPARE1		0x0050 0x02dc 0x0000 1 0
+#define MX6UL_PAD_JTAG_TDI__SAI2_TX_BCLK		0x0050 0x02dc 0x05f8 2 0
+#define MX6UL_PAD_JTAG_TDI__PWM6_OUT			0x0050 0x02dc 0x0000 4 0
+#define MX6UL_PAD_JTAG_TDI__GPIO1_IO13			0x0050 0x02dc 0x0000 5 0
+#define MX6UL_PAD_JTAG_TDI__MQS_LEFT			0x0050 0x02dc 0x0000 6 0
+#define MX6UL_PAD_JTAG_TDI__SIM1_POWER_FAIL		0x0050 0x02dc 0x0000 8 0
+#define MX6UL_PAD_JTAG_TCK__SJC_TCK			0x0054 0x02e0 0x0000 0 0
+#define MX6UL_PAD_JTAG_TCK__GPT2_COMPARE2		0x0054 0x02e0 0x0000 1 0
+#define MX6UL_PAD_JTAG_TCK__SAI2_RX_DATA		0x0054 0x02e0 0x05f4 2 0
+#define MX6UL_PAD_JTAG_TCK__PWM7_OUT			0x0054 0x02e0 0x0000 4 0
+#define MX6UL_PAD_JTAG_TCK__GPIO1_IO14			0x0054 0x02e0 0x0000 5 0
+#define MX6UL_PAD_JTAG_TCK__SIM2_POWER_FAIL		0x0054 0x02e0 0x0000 8 0
+#define MX6UL_PAD_JTAG_TRST_B__SJC_TRSTB		0x0058 0x02e4 0x0000 0 0
+#define MX6UL_PAD_JTAG_TRST_B__GPT2_COMPARE3		0x0058 0x02e4 0x0000 1 0
+#define MX6UL_PAD_JTAG_TRST_B__SAI2_TX_DATA		0x0058 0x02e4 0x0000 2 0
+#define MX6UL_PAD_JTAG_TRST_B__PWM8_OUT			0x0058 0x02e4 0x0000 4 0
+#define MX6UL_PAD_JTAG_TRST_B__GPIO1_IO15		0x0058 0x02e4 0x0000 5 0
+#define MX6UL_PAD_JTAG_TRST_B__CAAM_RNG_OSC_OBS		0x0058 0x02e4 0x0000 8 0
+#define MX6UL_PAD_GPIO1_IO00__I2C2_SCL			0x005c 0x02e8 0x05ac 0 1
+#define MX6UL_PAD_GPIO1_IO00__GPT1_CAPTURE1		0x005c 0x02e8 0x058c 1 0
+#define MX6UL_PAD_GPIO1_IO00__ANATOP_OTG1_ID		0x005c 0x02e8 0x04b8 2 0
+#define MX6UL_PAD_GPIO1_IO00__ENET1_REF_CLK1		0x005c 0x02e8 0x0574 3 0
+#define MX6UL_PAD_GPIO1_IO00__MQS_RIGHT			0x005c 0x02e8 0x0000 4 0
+#define MX6UL_PAD_GPIO1_IO00__GPIO1_IO00		0x005c 0x02e8 0x0000 5 0
+#define MX6UL_PAD_GPIO1_IO00__ENET1_1588_EVENT0_IN	0x005c 0x02e8 0x0000 6 0
+#define MX6UL_PAD_GPIO1_IO00__SRC_SYSTEM_RESET		0x005c 0x02e8 0x0000 7 0
+#define MX6UL_PAD_GPIO1_IO00__WDOG3_WDOG_B		0x005c 0x02e8 0x0000 8 0
+#define MX6UL_PAD_GPIO1_IO01__I2C2_SDA			0x0060 0x02ec 0x05b0 0 1
+#define MX6UL_PAD_GPIO1_IO01__GPT1_COMPARE1		0x0060 0x02ec 0x0000 1 0
+#define MX6UL_PAD_GPIO1_IO01__USB_OTG1_OC		0x0060 0x02ec 0x0664 2 0
+#define MX6UL_PAD_GPIO1_IO01__ENET2_REF_CLK2		0x0060 0x02ec 0x057c 3 0
+#define MX6UL_PAD_GPIO1_IO01__MQS_LEFT			0x0060 0x02ec 0x0000 4 0
+#define MX6UL_PAD_GPIO1_IO01__GPIO1_IO01		0x0060 0x02ec 0x0000 5 0
+#define MX6UL_PAD_GPIO1_IO01__ENET1_1588_EVENT0_OUT	0x0060 0x02ec 0x0000 6 0
+#define MX6UL_PAD_GPIO1_IO01__SRC_EARLY_RESET		0x0060 0x02ec 0x0000 7 0
+#define MX6UL_PAD_GPIO1_IO01__WDOG1_WDOG_B		0x0060 0x02ec 0x0000 8 0
+#define MX6UL_PAD_GPIO1_IO02__I2C1_SCL			0x0064 0x02f0 0x05a4 0 0
+#define MX6UL_PAD_GPIO1_IO02__GPT1_COMPARE2		0x0064 0x02f0 0x0000 1 0
+#define MX6UL_PAD_GPIO1_IO02__USB_OTG2_PWR		0x0064 0x02f0 0x0000 2 0
+#define MX6UL_PAD_GPIO1_IO02__ENET1_REF_CLK_25M		0x0064 0x02f0 0x0000 3 0
+#define MX6UL_PAD_GPIO1_IO02__USDHC1_WP			0x0064 0x02f0 0x066c 4 0
+#define MX6UL_PAD_GPIO1_IO02__GPIO1_IO02		0x0064 0x02f0 0x0000 5 0
+#define MX6UL_PAD_GPIO1_IO02__SDMA_EXT_EVENT00		0x0064 0x02f0 0x0000 6 0
+#define MX6UL_PAD_GPIO1_IO02__SRC_ANY_PU_RESET		0x0064 0x02f0 0x0000 7 0
+#define MX6UL_PAD_GPIO1_IO02__UART1_DCE_TX		0x0064 0x02f0 0x0000 8 0
+#define MX6UL_PAD_GPIO1_IO02__UART1_DTE_RX		0x0064 0x02f0 0x0624 8 0
+#define MX6UL_PAD_GPIO1_IO03__I2C1_SDA			0x0068 0x02f4 0x05a8 0 1
+#define MX6UL_PAD_GPIO1_IO03__GPT1_COMPARE3		0x0068 0x02f4 0x0000 1 0
+#define MX6UL_PAD_GPIO1_IO03__USB_OTG2_OC		0x0068 0x02f4 0x0660 2 0
+#define MX6UL_PAD_GPIO1_IO03__USDHC1_CD_B		0x0068 0x02f4 0x0668 4 0
+#define MX6UL_PAD_GPIO1_IO03__GPIO1_IO03		0x0068 0x02f4 0x0000 5 0
+#define MX6UL_PAD_GPIO1_IO03__CCM_DI0_eXT_CLK		0x0068 0x02f4 0x0000 6 0
+#define MX6UL_PAD_GPIO1_IO03__SRC_TESTER_ACK		0x0068 0x02f4 0x0000 7 0
+#define MX6UL_PAD_GPIO1_IO03__UART1_DTE_TX		0x0068 0x02f4 0x0000 8 0
+#define MX6UL_PAD_GPIO1_IO03__UART1_DCE_RX		0x0068 0x02f4 0x0624 8 1
+#define MX6UL_PAD_GPIO1_IO04__ENET1_REF_CLK1		0x006c 0x02f8 0x0574 0 1
+#define MX6UL_PAD_GPIO1_IO04__PWM3_OUT			0x006c 0x02f8 0x0000 1 0
+#define MX6UL_PAD_GPIO1_IO04__USB_OTG1_PWR		0x006c 0x02f8 0x0000 2 0
+#define MX6UL_PAD_GPIO1_IO04__USDHC1_RESET_B		0x006c 0x02f8 0x0000 4 0
+#define MX6UL_PAD_GPIO1_IO04__GPIO1_IO04		0x006c 0x02f8 0x0000 5 0
+#define MX6UL_PAD_GPIO1_IO04__ENET2_1588_EVENT0_IN	0x006c 0x02f8 0x0000 6 0
+#define MX6UL_PAD_GPIO1_IO04__UART5_DCE_TX		0x006c 0x02f8 0x0000 8 0
+#define MX6UL_PAD_GPIO1_IO04__UART5_DTE_RX		0x006c 0x02f8 0x0644 8 2
+#define MX6UL_PAD_GPIO1_IO05__ENET2_REF_CLK2		0x0070 0x02fc 0x057c 0 1
+#define MX6UL_PAD_GPIO1_IO05__PWM4_OUT			0x0070 0x02fc 0x0000 1 0
+#define MX6UL_PAD_GPIO1_IO05__ANATOP_OTG2_ID		0x0070 0x02fc 0x04bc 2 0
+#define MX6UL_PAD_GPIO1_IO05__CSI_FIELD			0x0070 0x02fc 0x0530 3 0
+#define MX6UL_PAD_GPIO1_IO05__USDHC1_VSELECT		0x0070 0x02fc 0x0000 4 0
+#define MX6UL_PAD_GPIO1_IO05__GPIO1_IO05		0x0070 0x02fc 0x0000 5 0
+#define MX6UL_PAD_GPIO1_IO05__ENET2_1588_EVENT0_OUT	0x0070 0x02fc 0x0000 6 0
+#define MX6UL_PAD_GPIO1_IO05__UART5_DCE_RX		0x0070 0x02fc 0x0644 8 3
+#define MX6UL_PAD_GPIO1_IO05__UART5_DTE_TX		0x0070 0x02fc 0x0000 8 0
+#define MX6UL_PAD_GPIO1_IO06__ENET1_MDIO		0x0074 0x0300 0x0578 0 0
+#define MX6UL_PAD_GPIO1_IO06__ENET2_MDIO		0x0074 0x0300 0x0580 1 0
+#define MX6UL_PAD_GPIO1_IO06__USB_OTG_PWR_WAKE		0x0074 0x0300 0x0000 2 0
+#define MX6UL_PAD_GPIO1_IO06__CSI_MCLK			0x0074 0x0300 0x0000 3 0
+#define MX6UL_PAD_GPIO1_IO06__USDHC2_WP			0x0074 0x0300 0x069c 4 0
+#define MX6UL_PAD_GPIO1_IO06__GPIO1_IO06		0x0074 0x0300 0x0000 5 0
+#define MX6UL_PAD_GPIO1_IO06__CCM_WAIT			0x0074 0x0300 0x0000 6 0
+#define MX6UL_PAD_GPIO1_IO06__CCM_REF_EN_B		0x0074 0x0300 0x0000 7 0
+#define MX6UL_PAD_GPIO1_IO06__UART1_DCE_CTS		0x0074 0x0300 0x0000 8 0
+#define MX6UL_PAD_GPIO1_IO06__UART1_DTE_RTS		0x0074 0x0300 0x0620 8 0
+#define MX6UL_PAD_GPIO1_IO07__ENET1_MDC			0x0078 0x0304 0x0000 0 0
+#define MX6UL_PAD_GPIO1_IO07__ENET2_MDC			0x0078 0x0304 0x0000 1 0
+#define MX6UL_PAD_GPIO1_IO07__USB_OTG_HOST_MODE		0x0078 0x0304 0x0000 2 0
+#define MX6UL_PAD_GPIO1_IO07__CSI_PIXCLK		0x0078 0x0304 0x0528 3 0
+#define MX6UL_PAD_GPIO1_IO07__USDHC2_CD_B		0x0078 0x0304 0x0674 4 1
+#define MX6UL_PAD_GPIO1_IO07__GPIO1_IO07		0x0078 0x0304 0x0000 5 0
+#define MX6UL_PAD_GPIO1_IO07__CCM_STOP			0x0078 0x0304 0x0000 6 0
+#define MX6UL_PAD_GPIO1_IO07__UART1_DCE_RTS		0x0078 0x0304 0x0620 8 1
+#define MX6UL_PAD_GPIO1_IO07__UART1_DTE_CTS		0x0078 0x0304 0x0000 8 0
+#define MX6UL_PAD_GPIO1_IO08__PWM1_OUT			0x007c 0x0308 0x0000 0 0
+#define MX6UL_PAD_GPIO1_IO08__WDOG1_WDOG_B		0x007c 0x0308 0x0000 1 0
+#define MX6UL_PAD_GPIO1_IO08__SPDIF_OUT			0x007c 0x0308 0x0000 2 0
+#define MX6UL_PAD_GPIO1_IO08__CSI_VSYNC			0x007c 0x0308 0x052c 3 1
+#define MX6UL_PAD_GPIO1_IO08__USDHC2_VSELECT		0x007c 0x0308 0x0000 4 0
+#define MX6UL_PAD_GPIO1_IO08__GPIO1_IO08		0x007c 0x0308 0x0000 5 0
+#define MX6UL_PAD_GPIO1_IO08__CCM_PMIC_RDY		0x007c 0x0308 0x04c0 6 1
+#define MX6UL_PAD_GPIO1_IO08__UART5_DCE_RTS		0x007c 0x0308 0x0640 8 1
+#define MX6UL_PAD_GPIO1_IO08__UART5_DTE_CTS		0x007c 0x0308 0x0000 8 0
+#define MX6UL_PAD_GPIO1_IO09__PWM2_OUT			0x0080 0x030c 0x0000 0 0
+#define MX6UL_PAD_GPIO1_IO09__WDOG1_WDOG_ANY		0x0080 0x030c 0x0000 1 0
+#define MX6UL_PAD_GPIO1_IO09__SPDIF_IN			0x0080 0x030c 0x0618 2 0
+#define MX6UL_PAD_GPIO1_IO09__CSI_HSYNC			0x0080 0x030c 0x0524 3 1
+#define MX6UL_PAD_GPIO1_IO09__USDHC2_RESET_B		0x0080 0x030c 0x0000 4 0
+#define MX6UL_PAD_GPIO1_IO09__GPIO1_IO09		0x0080 0x030c 0x0000 5 0
+#define MX6UL_PAD_GPIO1_IO09__USDHC1_RESET_B		0x0080 0x030c 0x0000 6 0
+#define MX6UL_PAD_GPIO1_IO09__UART5_DCE_CTS		0x0080 0x030c 0x0000 8 0
+#define MX6UL_PAD_GPIO1_IO09__UART5_DTE_RTS		0x0080 0x030c 0x0640 8 2
+#define MX6UL_PAD_UART1_TX_DATA__UART1_DCE_TX		0x0084 0x0310 0x0000 0 0
+#define MX6UL_PAD_UART1_TX_DATA__UART1_DTE_RX		0x0084 0x0310 0x0624 0 2
+#define MX6UL_PAD_UART1_TX_DATA__ENET1_RDATA02		0x0084 0x0310 0x0000 1 0
+#define MX6UL_PAD_UART1_TX_DATA__I2C3_SCL		0x0084 0x0310 0x05b4 2 0
+#define MX6UL_PAD_UART1_TX_DATA__CSI_DATA02		0x0084 0x0310 0x04c4 3 1
+#define MX6UL_PAD_UART1_TX_DATA__GPT1_COMPARE1		0x0084 0x0310 0x0000 4 0
+#define MX6UL_PAD_UART1_TX_DATA__GPIO1_IO16		0x0084 0x0310 0x0000 5 0
+#define MX6UL_PAD_UART1_TX_DATA__SPDIF_OUT		0x0084 0x0310 0x0000 8 0
+#define MX6UL_PAD_UART1_RX_DATA__UART1_DCE_RX		0x0088 0x0314 0x0624 0 3
+#define MX6UL_PAD_UART1_RX_DATA__UART1_DTE_TX		0x0088 0x0314 0x0000 0 0
+#define MX6UL_PAD_UART1_RX_DATA__ENET1_RDATA03		0x0088 0x0314 0x0000 1 0
+#define MX6UL_PAD_UART1_RX_DATA__I2C3_SDA		0x0088 0x0314 0x05b8 2 0
+#define MX6UL_PAD_UART1_RX_DATA__CSI_DATA03		0x0088 0x0314 0x04c8 3 1
+#define MX6UL_PAD_UART1_RX_DATA__GPT1_CLK		0x0088 0x0314 0x0594 4 0
+#define MX6UL_PAD_UART1_RX_DATA__GPIO1_IO17		0x0088 0x0314 0x0000 5 0
+#define MX6UL_PAD_UART1_RX_DATA__SPDIF_IN		0x0088 0x0314 0x0618 8 1
+#define MX6UL_PAD_UART1_CTS_B__UART1_DCE_CTS		0x008c 0x0318 0x0000 0 0
+#define MX6UL_PAD_UART1_CTS_B__UART1_DTE_RTS		0x008c 0x0318 0x0620 0 2
+#define MX6UL_PAD_UART1_CTS_B__ENET1_RX_CLK		0x008c 0x0318 0x0000 1 0
+#define MX6UL_PAD_UART1_CTS_B__USDHC1_WP		0x008c 0x0318 0x066c 2 1
+#define MX6UL_PAD_UART1_CTS_B__CSI_DATA04		0x008c 0x0318 0x04d8 3 0
+#define MX6UL_PAD_UART1_CTS_B__ENET2_1588_EVENT1_IN	0x008c 0x0318 0x0000 4 0
+#define MX6UL_PAD_UART1_CTS_B__GPIO1_IO18		0x008c 0x0318 0x0000 5 0
+#define MX6UL_PAD_UART1_CTS_B__USDHC2_WP		0x008c 0x0318 0x069c 8 1
+#define MX6UL_PAD_UART1_RTS_B__UART1_DCE_RTS		0x0090 0x031c 0x0620 0 3
+#define MX6UL_PAD_UART1_RTS_B__UART1_DTE_CTS		0x0090 0x031c 0x0000 0 0
+#define MX6UL_PAD_UART1_RTS_B__ENET1_TX_ER		0x0090 0x031c 0x0000 1 0
+#define MX6UL_PAD_UART1_RTS_B__USDHC1_CD_B		0x0090 0x031c 0x0668 2 1
+#define MX6UL_PAD_UART1_RTS_B__CSI_DATA05		0x0090 0x031c 0x04cc 3 1
+#define MX6UL_PAD_UART1_RTS_B__ENET2_1588_EVENT1_OUT	0x0090 0x031c 0x0000 4 0
+#define MX6UL_PAD_UART1_RTS_B__GPIO1_IO19		0x0090 0x031c 0x0000 5 0
+#define MX6UL_PAD_UART1_RTS_B__USDHC2_CD_B		0x0090 0x031c 0x0674 8 2
+#define MX6UL_PAD_UART2_TX_DATA__UART2_DCE_TX		0x0094 0x0320 0x0000 0 0
+#define MX6UL_PAD_UART2_TX_DATA__UART2_DTE_RX		0x0094 0x0320 0x062c 0 0
+#define MX6UL_PAD_UART2_TX_DATA__ENET1_TDATA02		0x0094 0x0320 0x0000 1 0
+#define MX6UL_PAD_UART2_TX_DATA__I2C4_SCL		0x0094 0x0320 0x05bc 2 0
+#define MX6UL_PAD_UART2_TX_DATA__CSI_DATA06		0x0094 0x0320 0x04dc 3 0
+#define MX6UL_PAD_UART2_TX_DATA__GPT1_CAPTURE1		0x0094 0x0320 0x058c 4 1
+#define MX6UL_PAD_UART2_TX_DATA__GPIO1_IO20		0x0094 0x0320 0x0000 5 0
+#define MX6UL_PAD_UART2_TX_DATA__ECSPI3_SS0		0x0094 0x0320 0x0000 8 0
+#define MX6UL_PAD_UART2_RX_DATA__UART2_DCE_RX		0x0098 0x0324 0x062c 0 1
+#define MX6UL_PAD_UART2_RX_DATA__UART2_DTE_TX		0x0098 0x0324 0x0000 0 0
+#define MX6UL_PAD_UART2_RX_DATA__ENET1_TDATA03		0x0098 0x0324 0x0000 1 0
+#define MX6UL_PAD_UART2_RX_DATA__I2C4_SDA		0x0098 0x0324 0x05c0 2 0
+#define MX6UL_PAD_UART2_RX_DATA__CSI_DATA07		0x0098 0x0324 0x04e0 3 0
+#define MX6UL_PAD_UART2_RX_DATA__GPT1_CAPTURE2		0x0098 0x0324 0x0590 4 0
+#define MX6UL_PAD_UART2_RX_DATA__GPIO1_IO21		0x0098 0x0324 0x0000 5 0
+#define MX6UL_PAD_UART2_RX_DATA__SJC_DONE		0x0098 0x0324 0x0000 7 0
+#define MX6UL_PAD_UART2_RX_DATA__ECSPI3_SCLK		0x0098 0x0324 0x0554 8 0
+#define MX6UL_PAD_UART2_CTS_B__UART2_DCE_CTS		0x009c 0x0328 0x0000 0 0
+#define MX6UL_PAD_UART2_CTS_B__UART2_DTE_RTS		0x009c 0x0328 0x0628 0 0
+#define MX6UL_PAD_UART2_CTS_B__ENET1_CRS		0x009c 0x0328 0x0000 1 0
+#define MX6UL_PAD_UART2_CTS_B__FLEXCAN2_TX		0x009c 0x0328 0x0000 2 0
+#define MX6UL_PAD_UART2_CTS_B__CSI_DATA08		0x009c 0x0328 0x04e4 3 0
+#define MX6UL_PAD_UART2_CTS_B__GPT1_COMPARE2		0x009c 0x0328 0x0000 4 0
+#define MX6UL_PAD_UART2_CTS_B__GPIO1_IO22		0x009c 0x0328 0x0000 5 0
+#define MX6UL_PAD_UART2_CTS_B__SJC_DE_B			0x009c 0x0328 0x0000 7 0
+#define MX6UL_PAD_UART2_CTS_B__ECSPI3_MOSI		0x009c 0x0328 0x055c 8 0
+#define MX6UL_PAD_UART2_RTS_B__UART2_DCE_RTS		0x00a0 0x032c 0x0628 0 1
+#define MX6UL_PAD_UART2_RTS_B__UART2_DTE_CTS		0x00a0 0x032c 0x0000 0 0
+#define MX6UL_PAD_UART2_RTS_B__ENET1_COL		0x00a0 0x032c 0x0000 1 0
+#define MX6UL_PAD_UART2_RTS_B__FLEXCAN2_RX		0x00a0 0x032c 0x0588 2 0
+#define MX6UL_PAD_UART2_RTS_B__CSI_DATA09		0x00a0 0x032c 0x04e8 3 0
+#define MX6UL_PAD_UART2_RTS_B__GPT1_COMPARE3		0x00a0 0x032c 0x0000 4 0
+#define MX6UL_PAD_UART2_RTS_B__GPIO1_IO23		0x00a0 0x032c 0x0000 5 0
+#define MX6UL_PAD_UART2_RTS_B__SJC_FAIL			0x00a0 0x032c 0x0000 7 0
+#define MX6UL_PAD_UART2_RTS_B__ECSPI3_MISO		0x00a0 0x032c 0x0558 8 0
+#define MX6UL_PAD_UART3_TX_DATA__UART3_DCE_TX		0x00a4 0x0330 0x0000 0 0
+#define MX6UL_PAD_UART3_TX_DATA__UART3_DTE_RX		0x00a4 0x0330 0x0634 0 0
+#define MX6UL_PAD_UART3_TX_DATA__ENET2_RDATA02		0x00a4 0x0330 0x0000 1 0
+#define MX6UL_PAD_UART3_TX_DATA__SIM1_PORT0_PD		0x00a4 0x0330 0x0000 2 0
+#define MX6UL_PAD_UART3_TX_DATA__CSI_DATA01		0x00a4 0x0330 0x0000 3 0
+#define MX6UL_PAD_UART3_TX_DATA__UART2_DCE_CTS		0x00a4 0x0330 0x0000 4 0
+#define MX6UL_PAD_UART3_TX_DATA__UART2_DTE_RTS		0x00a4 0x0330 0x0628 4 2
+#define MX6UL_PAD_UART3_TX_DATA__GPIO1_IO24		0x00a4 0x0330 0x0000 5 0
+#define MX6UL_PAD_UART3_TX_DATA__SJC_JTAG_ACT		0x00a4 0x0330 0x0000 7 0
+#define MX6UL_PAD_UART3_TX_DATA__ANATOP_OTG1_ID		0x00a4 0x0330 0x04b8 8 1
+#define MX6UL_PAD_UART3_RX_DATA__UART3_DCE_RX		0x00a8 0x0334 0x0634 0 1
+#define MX6UL_PAD_UART3_RX_DATA__UART3_DTE_TX		0x00a8 0x0334 0x0000 0 0
+#define MX6UL_PAD_UART3_RX_DATA__ENET2_RDATA03		0x00a8 0x0334 0x0000 1 0
+#define MX6UL_PAD_UART3_RX_DATA__SIM2_PORT0_PD		0x00a8 0x0334 0x0000 2 0
+#define MX6UL_PAD_UART3_RX_DATA__CSI_DATA00		0x00a8 0x0334 0x0000 3 0
+#define MX6UL_PAD_UART3_RX_DATA__UART2_DCE_RTS		0x00a8 0x0334 0x0628 4 3
+#define MX6UL_PAD_UART3_RX_DATA__UART2_DTE_CTS		0x00a8 0x0334 0x0000 4 0
+#define MX6UL_PAD_UART3_RX_DATA__GPIO1_IO25		0x00a8 0x0334 0x0000 5 0
+#define MX6UL_PAD_UART3_RX_DATA__EPIT1_OUT		0x00a8 0x0334 0x0000 8 0
+#define MX6UL_PAD_UART3_CTS_B__UART3_DCE_CTS		0x00ac 0x0338 0x0000 0 0
+#define MX6UL_PAD_UART3_CTS_B__UART3_DTE_RTS		0x00ac 0x0338 0x0630 0 0
+#define MX6UL_PAD_UART3_CTS_B__ENET2_RX_CLK		0x00ac 0x0338 0x0000 1 0
+#define MX6UL_PAD_UART3_CTS_B__FLEXCAN1_TX		0x00ac 0x0338 0x0000 2 0
+#define MX6UL_PAD_UART3_CTS_B__CSI_DATA10		0x00ac 0x0338 0x0000 3 0
+#define MX6UL_PAD_UART3_CTS_B__ENET1_1588_EVENT1_IN	0x00ac 0x0338 0x0000 4 0
+#define MX6UL_PAD_UART3_CTS_B__GPIO1_IO26		0x00ac 0x0338 0x0000 5 0
+#define MX6UL_PAD_UART3_CTS_B__EPIT2_OUT		0x00ac 0x0338 0x0000 8 0
+#define MX6UL_PAD_UART3_RTS_B__UART3_DCE_RTS		0x00b0 0x033c 0x0630 0 1
+#define MX6UL_PAD_UART3_RTS_B__UART3_DTE_CTS		0x00b0 0x033c 0x0000 0 0
+#define MX6UL_PAD_UART3_RTS_B__ENET2_TX_ER		0x00b0 0x033c 0x0000 1 0
+#define MX6UL_PAD_UART3_RTS_B__FLEXCAN1_RX		0x00b0 0x033c 0x0584 2 0
+#define MX6UL_PAD_UART3_RTS_B__CSI_DATA11		0x00b0 0x033c 0x0000 3 0
+#define MX6UL_PAD_UART3_RTS_B__ENET1_1588_EVENT1_OUT	0x00b0 0x033c 0x0000 4 0
+#define MX6UL_PAD_UART3_RTS_B__GPIO1_IO27		0x00b0 0x033c 0x0000 5 0
+#define MX6UL_PAD_UART3_RTS_B__WDOG1_WDOG_B		0x00b0 0x033c 0x0000 8 0
+#define MX6UL_PAD_UART4_TX_DATA__UART4_DCE_TX		0x00b4 0x0340 0x0000 0 0
+#define MX6UL_PAD_UART4_TX_DATA__UART4_DTE_RX		0x00b4 0x0340 0x063c 0 0
+#define MX6UL_PAD_UART4_TX_DATA__ENET2_TDATA02		0x00b4 0x0340 0x0000 1 0
+#define MX6UL_PAD_UART4_TX_DATA__I2C1_SCL		0x00b4 0x0340 0x05a4 2 1
+#define MX6UL_PAD_UART4_TX_DATA__CSI_DATA12		0x00b4 0x0340 0x0000 3 0
+#define MX6UL_PAD_UART4_TX_DATA__CSU_CSU_ALARM_AUT02	0x00b4 0x0340 0x0000 4 0
+#define MX6UL_PAD_UART4_TX_DATA__GPIO1_IO28		0x00b4 0x0340 0x0000 5 0
+#define MX6UL_PAD_UART4_TX_DATA__ECSPI2_SCLK		0x00b4 0x0340 0x0544 8 1
+#define MX6UL_PAD_UART4_RX_DATA__UART4_DCE_RX		0x00b8 0x0344 0x063c 0 1
+#define MX6UL_PAD_UART4_RX_DATA__UART4_DTE_TX		0x00b8 0x0344 0x0000 0 0
+#define MX6UL_PAD_UART4_RX_DATA__ENET2_TDATA03		0x00b8 0x0344 0x0000 1 0
+#define MX6UL_PAD_UART4_RX_DATA__I2C1_SDA		0x00b8 0x0344 0x05a8 2 2
+#define MX6UL_PAD_UART4_RX_DATA__CSI_DATA13		0x00b8 0x0344 0x0000 3 0
+#define MX6UL_PAD_UART4_RX_DATA__CSU_CSU_ALARM_AUT01	0x00b8 0x0344 0x0000 4 0
+#define MX6UL_PAD_UART4_RX_DATA__GPIO1_IO29		0x00b8 0x0344 0x0000 5 0
+#define MX6UL_PAD_UART4_RX_DATA__ECSPI2_SS0		0x00b8 0x0344 0x0000 8 0
+#define MX6UL_PAD_UART5_TX_DATA__GPIO1_IO30		0x00bc 0x0348 0x0000 5 0
+#define MX6UL_PAD_UART5_TX_DATA__ECSPI2_MOSI		0x00bc 0x0348 0x054c 8 0
+#define MX6UL_PAD_UART5_TX_DATA__UART5_DCE_TX		0x00bc 0x0348 0x0000 0 0
+#define MX6UL_PAD_UART5_TX_DATA__UART5_DTE_RX		0x00bc 0x0348 0x0644 0 4
+#define MX6UL_PAD_UART5_TX_DATA__ENET2_CRS		0x00bc 0x0348 0x0000 1 0
+#define MX6UL_PAD_UART5_TX_DATA__I2C2_SCL		0x00bc 0x0348 0x05ac 2 2
+#define MX6UL_PAD_UART5_TX_DATA__CSI_DATA14		0x00bc 0x0348 0x0000 3 0
+#define MX6UL_PAD_UART5_TX_DATA__CSU_CSU_ALARM_AUT00	0x00bc 0x0348 0x0000 4 0
+#define MX6UL_PAD_UART5_RX_DATA__UART5_DCE_RX		0x00c0 0x034c 0x0644 0 5
+#define MX6UL_PAD_UART5_RX_DATA__UART5_DTE_TX		0x00c0 0x034c 0x0000 0 0
+#define MX6UL_PAD_UART5_RX_DATA__ENET2_COL		0x00c0 0x034c 0x0000 1 0
+#define MX6UL_PAD_UART5_RX_DATA__I2C2_SDA		0x00c0 0x034c 0x05b0 2 2
+#define MX6UL_PAD_UART5_RX_DATA__CSI_DATA15		0x00c0 0x034c 0x0000 3 0
+#define MX6UL_PAD_UART5_RX_DATA__CSU_CSU_INT_DEB	0x00c0 0x034c 0x0000 4 0
+#define MX6UL_PAD_UART5_RX_DATA__GPIO1_IO31		0x00c0 0x034c 0x0000 5 0
+#define MX6UL_PAD_UART5_RX_DATA__ECSPI2_MISO		0x00c0 0x034c 0x0548 8 1
+#define MX6UL_PAD_ENET1_RX_DATA0__ENET1_RDATA00		0x00c4 0x0350 0x0000 0 0
+#define MX6UL_PAD_ENET1_RX_DATA0__UART4_DCE_RTS		0x00c4 0x0350 0x0638 1 0
+#define MX6UL_PAD_ENET1_RX_DATA0__UART4_DTE_CTS		0x00c4 0x0350 0x0000 1 0
+#define MX6UL_PAD_ENET1_RX_DATA0__PWM1_OUT		0x00c4 0x0350 0x0000 2 0
+#define MX6UL_PAD_ENET1_RX_DATA0__CSI_DATA16		0x00c4 0x0350 0x0000 3 0
+#define MX6UL_PAD_ENET1_RX_DATA0__FLEXCAN1_TX		0x00c4 0x0350 0x0000 4 0
+#define MX6UL_PAD_ENET1_RX_DATA0__GPIO2_IO00		0x00c4 0x0350 0x0000 5 0
+#define MX6UL_PAD_ENET1_RX_DATA0__KPP_ROW00		0x00c4 0x0350 0x0000 6 0
+#define MX6UL_PAD_ENET1_RX_DATA0__USDHC1_LCTL		0x00c4 0x0350 0x0000 8 0
+#define MX6UL_PAD_ENET1_RX_DATA1__ENET1_RDATA01		0x00c8 0x0354 0x0000 0 0
+#define MX6UL_PAD_ENET1_RX_DATA1__UART4_DCE_CTS		0x00c8 0x0354 0x0000 1 0
+#define MX6UL_PAD_ENET1_RX_DATA1__UART4_DTE_RTS		0x00c8 0x0354 0x0638 1 1
+#define MX6UL_PAD_ENET1_RX_DATA1__PWM2_OUT		0x00c8 0x0354 0x0000 2 0
+#define MX6UL_PAD_ENET1_RX_DATA1__CSI_DATA17		0x00c8 0x0354 0x0000 3 0
+#define MX6UL_PAD_ENET1_RX_DATA1__FLEXCAN1_RX		0x00c8 0x0354 0x0584 4 1
+#define MX6UL_PAD_ENET1_RX_DATA1__GPIO2_IO01		0x00c8 0x0354 0x0000 5 0
+#define MX6UL_PAD_ENET1_RX_DATA1__KPP_COL00		0x00c8 0x0354 0x0000 6 0
+#define MX6UL_PAD_ENET1_RX_DATA1__USDHC2_LCTL		0x00c8 0x0354 0x0000 8 0
+#define MX6UL_PAD_ENET1_RX_EN__ENET1_RX_EN		0x00cc 0x0358 0x0000 0 0
+#define MX6UL_PAD_ENET1_RX_EN__UART5_DCE_RTS		0x00cc 0x0358 0x0640 1 3
+#define MX6UL_PAD_ENET1_RX_EN__UART5_DTE_CTS		0x00cc 0x0358 0x0000 1 0
+#define MX6UL_PAD_ENET1_RX_EN__CSI_DATA18		0x00cc 0x0358 0x0000 3 0
+#define MX6UL_PAD_ENET1_RX_EN__FLEXCAN2_TX		0x00cc 0x0358 0x0000 4 0
+#define MX6UL_PAD_ENET1_RX_EN__GPIO2_IO02		0x00cc 0x0358 0x0000 5 0
+#define MX6UL_PAD_ENET1_RX_EN__KPP_ROW01		0x00cc 0x0358 0x0000 6 0
+#define MX6UL_PAD_ENET1_RX_EN__USDHC1_VSELECT		0x00cc 0x0358 0x0000 8 0
+#define MX6UL_PAD_ENET1_TX_DATA0__ENET1_TDATA00		0x00d0 0x035c 0x0000 0 0
+#define MX6UL_PAD_ENET1_TX_DATA0__UART5_DCE_CTS		0x00d0 0x035c 0x0000 1 0
+#define MX6UL_PAD_ENET1_TX_DATA0__UART5_DTE_RTS		0x00d0 0x035c 0x0640 1 4
+#define MX6UL_PAD_ENET1_TX_DATA0__CSI_DATA19		0x00d0 0x035c 0x0000 3 0
+#define MX6UL_PAD_ENET1_TX_DATA0__FLEXCAN2_RX		0x00d0 0x035c 0x0588 4 1
+#define MX6UL_PAD_ENET1_TX_DATA0__GPIO2_IO03		0x00d0 0x035c 0x0000 5 0
+#define MX6UL_PAD_ENET1_TX_DATA0__KPP_COL01		0x00d0 0x035c 0x0000 6 0
+#define MX6UL_PAD_ENET1_TX_DATA0__USDHC2_VSELECT	0x00d0 0x035c 0x0000 8 0
+#define MX6UL_PAD_ENET1_TX_DATA1__ENET1_TDATA01		0x00d4 0x0360 0x0000 0 0
+#define MX6UL_PAD_ENET1_TX_DATA1__UART6_DCE_CTS		0x00d4 0x0360 0x0000 1 0
+#define MX6UL_PAD_ENET1_TX_DATA1__UART6_DTE_RTS		0x00d4 0x0360 0x0648 1 2
+#define MX6UL_PAD_ENET1_TX_DATA1__PWM5_OUT		0x00d4 0x0360 0x0000 2 0
+#define MX6UL_PAD_ENET1_TX_DATA1__CSI_DATA20		0x00d4 0x0360 0x0000 3 0
+#define MX6UL_PAD_ENET1_TX_DATA1__ENET2_MDIO		0x00d4 0x0360 0x0580 4 1
+#define MX6UL_PAD_ENET1_TX_DATA1__GPIO2_IO04		0x00d4 0x0360 0x0000 5 0
+#define MX6UL_PAD_ENET1_TX_DATA1__KPP_ROW02		0x00d4 0x0360 0x0000 6 0
+#define MX6UL_PAD_ENET1_TX_DATA1__WDOG1_WDOG_RST_B_DEB	0x00d4 0x0360 0x0000 8 0
+#define MX6UL_PAD_ENET1_TX_EN__ENET1_TX_EN		0x00d8 0x0364 0x0000 0 0
+#define MX6UL_PAD_ENET1_TX_EN__UART6_DCE_RTS		0x00d8 0x0364 0x0648 1 3
+#define MX6UL_PAD_ENET1_TX_EN__UART6_DTE_CTS		0x00d8 0x0364 0x0000 1 0
+#define MX6UL_PAD_ENET1_TX_EN__PWM6_OUT			0x00d8 0x0364 0x0000 2 0
+#define MX6UL_PAD_ENET1_TX_EN__CSI_DATA21		0x00d8 0x0364 0x0000 3 0
+#define MX6UL_PAD_ENET1_TX_EN__ENET2_MDC		0x00d8 0x0364 0x0000 4 0
+#define MX6UL_PAD_ENET1_TX_EN__GPIO2_IO05		0x00d8 0x0364 0x0000 5 0
+#define MX6UL_PAD_ENET1_TX_EN__KPP_COL02		0x00d8 0x0364 0x0000 6 0
+#define MX6UL_PAD_ENET1_TX_EN__WDOG2_WDOG_RST_B_DEB	0x00d8 0x0364 0x0000 8 0
+#define MX6UL_PAD_ENET1_TX_CLK__ENET1_TX_CLK		0x00dc 0x0368 0x0000 0 0
+#define MX6UL_PAD_ENET1_TX_CLK__UART7_DCE_CTS		0x00dc 0x0368 0x0000 1 0
+#define MX6UL_PAD_ENET1_TX_CLK__UART7_DTE_RTS		0x00dc 0x0368 0x0650 1 0
+#define MX6UL_PAD_ENET1_TX_CLK__PWM7_OUT		0x00dc 0x0368 0x0000 2 0
+#define MX6UL_PAD_ENET1_TX_CLK__CSI_DATA22		0x00dc 0x0368 0x0000 3 0
+#define MX6UL_PAD_ENET1_TX_CLK__ENET1_REF_CLK1		0x00dc 0x0368 0x0574 4 2
+#define MX6UL_PAD_ENET1_TX_CLK__GPIO2_IO06		0x00dc 0x0368 0x0000 5 0
+#define MX6UL_PAD_ENET1_TX_CLK__KPP_ROW03		0x00dc 0x0368 0x0000 6 0
+#define MX6UL_PAD_ENET1_TX_CLK__GPT1_CLK		0x00dc 0x0368 0x0594 8 1
+#define MX6UL_PAD_ENET1_RX_ER__ENET1_RX_ER		0x00e0 0x036c 0x0000 0 0
+#define MX6UL_PAD_ENET1_RX_ER__UART7_DCE_RTS		0x00e0 0x036c 0x0650 1 1
+#define MX6UL_PAD_ENET1_RX_ER__UART7_DTE_CTS		0x00e0 0x036c 0x0000 1 0
+#define MX6UL_PAD_ENET1_RX_ER__PWM8_OUT			0x00e0 0x036c 0x0000 2 0
+#define MX6UL_PAD_ENET1_RX_ER__CSI_DATA23		0x00e0 0x036c 0x0000 3 0
+#define MX6UL_PAD_ENET1_RX_ER__EIM_CRE			0x00e0 0x036c 0x0000 4 0
+#define MX6UL_PAD_ENET1_RX_ER__GPIO2_IO07		0x00e0 0x036c 0x0000 5 0
+#define MX6UL_PAD_ENET1_RX_ER__KPP_COL03		0x00e0 0x036c 0x0000 6 0
+#define MX6UL_PAD_ENET1_RX_ER__GPT1_CAPTURE2		0x00e0 0x036c 0x0590 8 1
+#define MX6UL_PAD_ENET2_RX_DATA0__ENET2_RDATA00		0x00e4 0x0370 0x0000 0 0
+#define MX6UL_PAD_ENET2_RX_DATA0__UART6_DCE_TX		0x00e4 0x0370 0x0000 1 0
+#define MX6UL_PAD_ENET2_RX_DATA0__UART6_DTE_RX		0x00e4 0x0370 0x064c 1 1
+#define MX6UL_PAD_ENET2_RX_DATA0__SIM1_PORT0_TRXD	0x00e4 0x0370 0x0000 2 0
+#define MX6UL_PAD_ENET2_RX_DATA0__I2C3_SCL		0x00e4 0x0370 0x05b4 3 1
+#define MX6UL_PAD_ENET2_RX_DATA0__ENET1_MDIO		0x00e4 0x0370 0x0578 4 1
+#define MX6UL_PAD_ENET2_RX_DATA0__GPIO2_IO08		0x00e4 0x0370 0x0000 5 0
+#define MX6UL_PAD_ENET2_RX_DATA0__KPP_ROW04		0x00e4 0x0370 0x0000 6 0
+#define MX6UL_PAD_ENET2_RX_DATA0__USB_OTG1_PWR		0x00e4 0x0370 0x0000 8 0
+#define MX6UL_PAD_ENET2_RX_DATA1__ENET2_RDATA01		0x00e8 0x0374 0x0000 0 0
+#define MX6UL_PAD_ENET2_RX_DATA1__UART6_DCE_RX		0x00e8 0x0374 0x064c 1 2
+#define MX6UL_PAD_ENET2_RX_DATA1__UART6_DTE_TX		0x00e8 0x0374 0x0000 1 0
+#define MX6UL_PAD_ENET2_RX_DATA1__SIM1_PORT0_cLK	0x00e8 0x0374 0x0000 2 0
+#define MX6UL_PAD_ENET2_RX_DATA1__I2C3_SDA		0x00e8 0x0374 0x05b8 3 1
+#define MX6UL_PAD_ENET2_RX_DATA1__ENET1_MDC		0x00e8 0x0374 0x0000 4 0
+#define MX6UL_PAD_ENET2_RX_DATA1__GPIO2_IO09		0x00e8 0x0374 0x0000 5 0
+#define MX6UL_PAD_ENET2_RX_DATA1__KPP_COL04		0x00e8 0x0374 0x0000 6 0
+#define MX6UL_PAD_ENET2_RX_DATA1__USB_OTG1_OC		0x00e8 0x0374 0x0664 8 1
+#define MX6UL_PAD_ENET2_RX_EN__ENET2_RX_EN		0x00ec 0x0378 0x0000 0 0
+#define MX6UL_PAD_ENET2_RX_EN__UART7_DCE_TX		0x00ec 0x0378 0x0000 1 0
+#define MX6UL_PAD_ENET2_RX_EN__UART7_DTE_RX		0x00ec 0x0378 0x0654 1 0
+#define MX6UL_PAD_ENET2_RX_EN__SIM1_PORT0_RST_B		0x00ec 0x0378 0x0000 2 0
+#define MX6UL_PAD_ENET2_RX_EN__I2C4_SCL			0x00ec 0x0378 0x05bc 3 1
+#define MX6UL_PAD_ENET2_RX_EN__EIM_ADDR26		0x00ec 0x0378 0x0000 4 0
+#define MX6UL_PAD_ENET2_RX_EN__GPIO2_IO10		0x00ec 0x0378 0x0000 5 0
+#define MX6UL_PAD_ENET2_RX_EN__KPP_ROW05		0x00ec 0x0378 0x0000 6 0
+#define MX6UL_PAD_ENET2_RX_EN__ENET1_REF_CLK_25M	0x00ec 0x0378 0x0000 8 0
+#define MX6UL_PAD_ENET2_TX_DATA0__ENET2_TDATA00		0x00f0 0x037c 0x0000 0 0
+#define MX6UL_PAD_ENET2_TX_DATA0__UART7_DCE_RX		0x00f0 0x037c 0x0654 1 1
+#define MX6UL_PAD_ENET2_TX_DATA0__UART7_DTE_TX		0x00f0 0x037c 0x0000 1 0
+#define MX6UL_PAD_ENET2_TX_DATA0__SIM1_PORT0_SVEN	0x00f0 0x037c 0x0000 2 0
+#define MX6UL_PAD_ENET2_TX_DATA0__I2C4_SDA		0x00f0 0x037c 0x05c0 3 1
+#define MX6UL_PAD_ENET2_TX_DATA0__EIM_EB_B02		0x00f0 0x037c 0x0000 4 0
+#define MX6UL_PAD_ENET2_TX_DATA0__GPIO2_IO11		0x00f0 0x037c 0x0000 5 0
+#define MX6UL_PAD_ENET2_TX_DATA0__KPP_COL05		0x00f0 0x037c 0x0000 6 0
+#define MX6UL_PAD_ENET2_TX_DATA1__ENET2_TDATA01		0x00f4 0x0380 0x0000 0 0
+#define MX6UL_PAD_ENET2_TX_DATA1__UART8_DCE_TX		0x00f4 0x0380 0x0000 1 0
+#define MX6UL_PAD_ENET2_TX_DATA1__UART8_DTE_RX		0x00f4 0x0380 0x065c 1 0
+#define MX6UL_PAD_ENET2_TX_DATA1__SIM2_PORT0_TRXD	0x00f4 0x0380 0x0000 2 0
+#define MX6UL_PAD_ENET2_TX_DATA1__ECSPI4_SCLK		0x00f4 0x0380 0x0564 3 0
+#define MX6UL_PAD_ENET2_TX_DATA1__EIM_EB_B03		0x00f4 0x0380 0x0000 4 0
+#define MX6UL_PAD_ENET2_TX_DATA1__GPIO2_IO12		0x00f4 0x0380 0x0000 5 0
+#define MX6UL_PAD_ENET2_TX_DATA1__KPP_ROW06		0x00f4 0x0380 0x0000 6 0
+#define MX6UL_PAD_ENET2_TX_DATA1__USB_OTG2_PWR		0x00f4 0x0380 0x0000 8 0
+#define MX6UL_PAD_ENET2_TX_EN__ENET2_TX_EN		0x00f8 0x0384 0x0000 0 0
+#define MX6UL_PAD_ENET2_TX_EN__UART8_DCE_RX		0x00f8 0x0384 0x065c 1 1
+#define MX6UL_PAD_ENET2_TX_EN__UART8_DTE_TX		0x00f8 0x0384 0x0000 1 0
+#define MX6UL_PAD_ENET2_TX_EN__SIM2_PORT0_cLK		0x00f8 0x0384 0x0000 2 0
+#define MX6UL_PAD_ENET2_TX_EN__ECSPI4_MOSI		0x00f8 0x0384 0x056c 3 0
+#define MX6UL_PAD_ENET2_TX_EN__EIM_ACLK_FREERUN		0x00f8 0x0384 0x0000 4 0
+#define MX6UL_PAD_ENET2_TX_EN__GPIO2_IO13		0x00f8 0x0384 0x0000 5 0
+#define MX6UL_PAD_ENET2_TX_EN__KPP_COL06		0x00f8 0x0384 0x0000 6 0
+#define MX6UL_PAD_ENET2_TX_EN__USB_OTG2_OC		0x00f8 0x0384 0x0660 8 1
+#define MX6UL_PAD_ENET2_TX_CLK__ENET2_TX_CLK		0x00fc 0x0388 0x0000 0 0
+#define MX6UL_PAD_ENET2_TX_CLK__UART8_DCE_CTS		0x00fc 0x0388 0x0000 1 0
+#define MX6UL_PAD_ENET2_TX_CLK__UART8_DTE_RTS		0x00fc 0x0388 0x0658 1 0
+#define MX6UL_PAD_ENET2_TX_CLK__SIM2_PORT0_RST_B	0x00fc 0x0388 0x0000 2 0
+#define MX6UL_PAD_ENET2_TX_CLK__ECSPI4_MISO		0x00fc 0x0388 0x0568 3 0
+#define MX6UL_PAD_ENET2_TX_CLK__ENET2_REF_CLK2		0x00fc 0x0388 0x057c 4 2
+#define MX6UL_PAD_ENET2_TX_CLK__GPIO2_IO14		0x00fc 0x0388 0x0000 5 0
+#define MX6UL_PAD_ENET2_TX_CLK__KPP_ROW07		0x00fc 0x0388 0x0000 6 0
+#define MX6UL_PAD_ENET2_TX_CLK__ANATOP_OTG2_ID		0x00fc 0x0388 0x04bc 8 1
+#define MX6UL_PAD_ENET2_RX_ER__ENET2_RX_ER		0x0100 0x038c 0x0000 0 0
+#define MX6UL_PAD_ENET2_RX_ER__UART8_DCE_RTS		0x0100 0x038c 0x0658 1 1
+#define MX6UL_PAD_ENET2_RX_ER__UART8_DTE_CTS		0x0100 0x038c 0x0000 1 0
+#define MX6UL_PAD_ENET2_RX_ER__SIM2_PORT0_SVEN		0x0100 0x038c 0x0000 2 0
+#define MX6UL_PAD_ENET2_RX_ER__ECSPI4_SS0		0x0100 0x038c 0x0000 3 0
+#define MX6UL_PAD_ENET2_RX_ER__EIM_ADDR25		0x0100 0x038c 0x0000 4 0
+#define MX6UL_PAD_ENET2_RX_ER__GPIO2_IO15		0x0100 0x038c 0x0000 5 0
+#define MX6UL_PAD_ENET2_RX_ER__KPP_COL07		0x0100 0x038c 0x0000 6 0
+#define MX6UL_PAD_ENET2_RX_ER__WDOG1_WDOG_ANY		0x0100 0x038c 0x0000 8 0
+#define MX6UL_PAD_LCD_CLK__LCDIF_CLK			0x0104 0x0390 0x0000 0 0
+#define MX6UL_PAD_LCD_CLK__LCDIF_WR_RWN			0x0104 0x0390 0x0000 1 0
+#define MX6UL_PAD_LCD_CLK__UART4_DCE_TX			0x0104 0x0390 0x0000 2 0
+#define MX6UL_PAD_LCD_CLK__UART4_DTE_RX			0x0104 0x0390 0x063c 2 2
+#define MX6UL_PAD_LCD_CLK__SAI3_MCLK			0x0104 0x0390 0x0000 3 0
+#define MX6UL_PAD_LCD_CLK__EIM_CS2_B			0x0104 0x0390 0x0000 4 0
+#define MX6UL_PAD_LCD_CLK__GPIO3_IO00			0x0104 0x0390 0x0000 5 0
+#define MX6UL_PAD_LCD_CLK__WDOG1_WDOG_RST_B_DEB		0x0104 0x0390 0x0000 8 0
+#define MX6UL_PAD_LCD_ENABLE__LCDIF_ENABLE		0x0108 0x0394 0x0000 0 0
+#define MX6UL_PAD_LCD_ENABLE__LCDIF_RD_E		0x0108 0x0394 0x0000 1 0
+#define MX6UL_PAD_LCD_ENABLE__UART4_DCE_RX		0x0108 0x0394 0x063c 2 3
+#define MX6UL_PAD_LCD_ENABLE__UART4_DTE_TX		0x0108 0x0394 0x0000 2 0
+#define MX6UL_PAD_LCD_ENABLE__SAI3_TX_SYNC		0x0108 0x0394 0x060c 3 0
+#define MX6UL_PAD_LCD_ENABLE__EIM_CS3_B			0x0108 0x0394 0x0000 4 0
+#define MX6UL_PAD_LCD_ENABLE__GPIO3_IO01		0x0108 0x0394 0x0000 5 0
+#define MX6UL_PAD_LCD_ENABLE__ECSPI2_RDY		0x0108 0x0394 0x0000 8 0
+#define MX6UL_PAD_LCD_HSYNC__LCDIF_HSYNC		0x010c 0x0398 0x05dc 0 0
+#define MX6UL_PAD_LCD_HSYNC__LCDIF_RS			0x010c 0x0398 0x0000 1 0
+#define MX6UL_PAD_LCD_HSYNC__UART4_DCE_CTS		0x010c 0x0398 0x0000 2 0
+#define MX6UL_PAD_LCD_HSYNC__UART4_DTE_RTS		0x010c 0x0398 0x0638 2 2
+#define MX6UL_PAD_LCD_HSYNC__SAI3_TX_BCLK		0x010c 0x0398 0x0608 3 0
+#define MX6UL_PAD_LCD_HSYNC__WDOG3_WDOG_RST_B_DEB	0x010c 0x0398 0x0000 4 0
+#define MX6UL_PAD_LCD_HSYNC__GPIO3_IO02			0x010c 0x0398 0x0000 5 0
+#define MX6UL_PAD_LCD_HSYNC__ECSPI2_SS1			0x010c 0x0398 0x0000 8 0
+#define MX6UL_PAD_LCD_VSYNC__LCDIF_VSYNC		0x0110 0x039c 0x0000 0 0
+#define MX6UL_PAD_LCD_VSYNC__LCDIF_BUSY			0x0110 0x039c 0x05dc 1 1
+#define MX6UL_PAD_LCD_VSYNC__UART4_DCE_RTS		0x0110 0x039c 0x0638 2 3
+#define MX6UL_PAD_LCD_VSYNC__UART4_DTE_CTS		0x0110 0x039c 0x0000 2 0
+#define MX6UL_PAD_LCD_VSYNC__SAI3_RX_DATA		0x0110 0x039c 0x0000 3 0
+#define MX6UL_PAD_LCD_VSYNC__WDOG2_WDOG_B		0x0110 0x039c 0x0000 4 0
+#define MX6UL_PAD_LCD_VSYNC__GPIO3_IO03			0x0110 0x039c 0x0000 5 0
+#define MX6UL_PAD_LCD_VSYNC__ECSPI2_SS2			0x0110 0x039c 0x0000 8 0
+#define MX6UL_PAD_LCD_RESET__LCDIF_RESET		0x0114 0x03a0 0x0000 0 0
+#define MX6UL_PAD_LCD_RESET__LCDIF_CS			0x0114 0x03a0 0x0000 1 0
+#define MX6UL_PAD_LCD_RESET__CA7_MX6UL_EVENTI		0x0114 0x03a0 0x0000 2 0
+#define MX6UL_PAD_LCD_RESET__SAI3_TX_DATA		0x0114 0x03a0 0x0000 3 0
+#define MX6UL_PAD_LCD_RESET__WDOG1_WDOG_ANY		0x0114 0x03a0 0x0000 4 0
+#define MX6UL_PAD_LCD_RESET__GPIO3_IO04			0x0114 0x03a0 0x0000 5 0
+#define MX6UL_PAD_LCD_RESET__ECSPI2_SS3			0x0114 0x03a0 0x0000 8 0
+#define MX6UL_PAD_LCD_DATA00__LCDIF_DATA00		0x0118 0x03a4 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA00__PWM1_OUT			0x0118 0x03a4 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA00__ENET1_1588_EVENT2_IN	0x0118 0x03a4 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA00__I2C3_SDA			0x0118 0x03a4 0x05b8 4 2
+#define MX6UL_PAD_LCD_DATA00__GPIO3_IO05		0x0118 0x03a4 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA00__SRC_BT_CFG00		0x0118 0x03a4 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA00__SAI1_MCLK			0x0118 0x03a4 0x0000 8 0
+#define MX6UL_PAD_LCD_DATA01__LCDIF_DATA01		0x011c 0x03a8 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA01__PWM2_OUT			0x011c 0x03a8 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA01__ENET1_1588_EVENT2_OUT	0x011c 0x03a8 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA01__I2C3_SCL			0x011c 0x03a8 0x05b4 4 2
+#define MX6UL_PAD_LCD_DATA01__GPIO3_IO06		0x011c 0x03a8 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA01__SRC_BT_CFG01		0x011c 0x03a8 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA01__SAI1_TX_SYNC		0x011c 0x03a8 0x05ec 8 0
+#define MX6UL_PAD_LCD_DATA02__LCDIF_DATA02		0x0120 0x03ac 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA02__PWM3_OUT			0x0120 0x03ac 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA02__ENET1_1588_EVENT3_IN	0x0120 0x03ac 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA02__I2C4_SDA			0x0120 0x03ac 0x05c0 4 2
+#define MX6UL_PAD_LCD_DATA02__GPIO3_IO07		0x0120 0x03ac 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA02__SRC_BT_CFG02		0x0120 0x03ac 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA02__SAI1_TX_BCLK		0x0120 0x03ac 0x05e8 8 0
+#define MX6UL_PAD_LCD_DATA03__LCDIF_DATA03		0x0124 0x03b0 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA03__PWM4_OUT			0x0124 0x03b0 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA03__ENET1_1588_EVENT3_OUT	0x0124 0x03b0 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA03__I2C4_SCL			0x0124 0x03b0 0x05bc 4 2
+#define MX6UL_PAD_LCD_DATA03__GPIO3_IO08		0x0124 0x03b0 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA03__SRC_BT_CFG03		0x0124 0x03b0 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA03__SAI1_RX_DATA		0x0124 0x03b0 0x0000 8 0
+#define MX6UL_PAD_LCD_DATA04__LCDIF_DATA04		0x0128 0x03b4 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA04__UART8_DCE_CTS		0x0128 0x03b4 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA04__UART8_DTE_RTS		0x0128 0x03b4 0x0658 1 2
+#define MX6UL_PAD_LCD_DATA04__ENET2_1588_EVENT2_IN	0x0128 0x03b4 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA04__SPDIF_SR_CLK		0x0128 0x03b4 0x0000 4 0
+#define MX6UL_PAD_LCD_DATA04__GPIO3_IO09		0x0128 0x03b4 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA04__SRC_BT_CFG04		0x0128 0x03b4 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA04__SAI1_TX_DATA		0x0128 0x03b4 0x0000 8 0
+#define MX6UL_PAD_LCD_DATA05__LCDIF_DATA05		0x012c 0x03b8 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA05__UART8_DCE_RTS		0x012c 0x03b8 0x0658 1 3
+#define MX6UL_PAD_LCD_DATA05__UART8_DTE_CTS		0x012c 0x03b8 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA05__ENET2_1588_EVENT2_OUT	0x012c 0x03b8 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA05__SPDIF_OUT			0x012c 0x03b8 0x0000 4 0
+#define MX6UL_PAD_LCD_DATA05__GPIO3_IO10		0x012c 0x03b8 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA05__SRC_BT_CFG05		0x012c 0x03b8 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA05__ECSPI1_SS1		0x012c 0x03b8 0x0000 8 0
+#define MX6UL_PAD_LCD_DATA06__LCDIF_DATA06		0x0130 0x03bc 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA06__UART7_DCE_CTS		0x0130 0x03bc 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA06__UART7_DTE_RTS		0x0130 0x03bc 0x0650 1 2
+#define MX6UL_PAD_LCD_DATA06__ENET2_1588_EVENT3_IN	0x0130 0x03bc 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA06__SPDIF_LOCK		0x0130 0x03bc 0x0000 4 0
+#define MX6UL_PAD_LCD_DATA06__GPIO3_IO11		0x0130 0x03bc 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA06__SRC_BT_CFG06		0x0130 0x03bc 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA06__ECSPI1_SS2		0x0130 0x03bc 0x0000 8 0
+#define MX6UL_PAD_LCD_DATA07__LCDIF_DATA07		0x0134 0x03c0 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA07__UART7_DCE_RTS		0x0134 0x03c0 0x0650 1 3
+#define MX6UL_PAD_LCD_DATA07__UART7_DTE_CTS		0x0134 0x03c0 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA07__ENET2_1588_EVENT3_OUT	0x0134 0x03c0 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA07__SPDIF_EXT_CLK		0x0134 0x03c0 0x061c 4 0
+#define MX6UL_PAD_LCD_DATA07__GPIO3_IO12		0x0134 0x03c0 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA07__SRC_BT_CFG07		0x0134 0x03c0 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA07__ECSPI1_SS3		0x0134 0x03c0 0x0000 8 0
+#define MX6UL_PAD_LCD_DATA08__LCDIF_DATA08		0x0138 0x03c4 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA08__SPDIF_IN			0x0138 0x03c4 0x0618 1 2
+#define MX6UL_PAD_LCD_DATA08__CSI_DATA16		0x0138 0x03c4 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA08__EIM_DATA00		0x0138 0x03c4 0x0000 4 0
+#define MX6UL_PAD_LCD_DATA08__GPIO3_IO13		0x0138 0x03c4 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA08__SRC_BT_CFG08		0x0138 0x03c4 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA08__FLEXCAN1_TX		0x0138 0x03c4 0x0000 8 0
+#define MX6UL_PAD_LCD_DATA09__LCDIF_DATA09		0x013c 0x03c8 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA09__SAI3_MCLK			0x013c 0x03c8 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA09__CSI_DATA17		0x013c 0x03c8 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA09__EIM_DATA01		0x013c 0x03c8 0x0000 4 0
+#define MX6UL_PAD_LCD_DATA09__GPIO3_IO14		0x013c 0x03c8 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA09__SRC_BT_CFG09		0x013c 0x03c8 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA09__FLEXCAN1_RX		0x013c 0x03c8 0x0584 8 2
+#define MX6UL_PAD_LCD_DATA10__LCDIF_DATA10		0x0140 0x03cc 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA10__SAI3_RX_SYNC		0x0140 0x03cc 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA10__CSI_DATA18		0x0140 0x03cc 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA10__EIM_DATA02		0x0140 0x03cc 0x0000 4 0
+#define MX6UL_PAD_LCD_DATA10__GPIO3_IO15		0x0140 0x03cc 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA10__SRC_BT_CFG10		0x0140 0x03cc 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA10__FLEXCAN2_TX		0x0140 0x03cc 0x0000 8 0
+#define MX6UL_PAD_LCD_DATA11__LCDIF_DATA11		0x0144 0x03d0 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA11__SAI3_RX_BCLK		0x0144 0x03d0 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA11__CSI_DATA19		0x0144 0x03d0 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA11__EIM_DATA03		0x0144 0x03d0 0x0000 4 0
+#define MX6UL_PAD_LCD_DATA11__GPIO3_IO16		0x0144 0x03d0 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA11__SRC_BT_CFG11		0x0144 0x03d0 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA11__FLEXCAN2_RX		0x0144 0x03d0 0x0588 8 2
+#define MX6UL_PAD_LCD_DATA12__LCDIF_DATA12		0x0148 0x03d4 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA12__SAI3_TX_SYNC		0x0148 0x03d4 0x060c 1 1
+#define MX6UL_PAD_LCD_DATA12__CSI_DATA20		0x0148 0x03d4 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA12__EIM_DATA04		0x0148 0x03d4 0x0000 4 0
+#define MX6UL_PAD_LCD_DATA12__GPIO3_IO17		0x0148 0x03d4 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA12__SRC_BT_CFG12		0x0148 0x03d4 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA12__ECSPI1_RDY		0x0148 0x03d4 0x0000 8 0
+#define MX6UL_PAD_LCD_DATA13__LCDIF_DATA13		0x014c 0x03d8 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA13__SAI3_TX_BCLK		0x014c 0x03d8 0x0608 1 1
+#define MX6UL_PAD_LCD_DATA13__CSI_DATA21		0x014c 0x03d8 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA13__EIM_DATA05		0x014c 0x03d8 0x0000 4 0
+#define MX6UL_PAD_LCD_DATA13__GPIO3_IO18		0x014c 0x03d8 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA13__SRC_BT_CFG13		0x014c 0x03d8 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA13__USDHC2_RESET_B		0x014c 0x03d8 0x0000 8 0
+#define MX6UL_PAD_LCD_DATA14__LCDIF_DATA14		0x0150 0x03dc 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA14__SAI3_RX_DATA		0x0150 0x03dc 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA14__CSI_DATA22		0x0150 0x03dc 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA14__EIM_DATA06		0x0150 0x03dc 0x0000 4 0
+#define MX6UL_PAD_LCD_DATA14__GPIO3_IO19		0x0150 0x03dc 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA14__SRC_BT_CFG14		0x0150 0x03dc 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA14__USDHC2_DATA4		0x0150 0x03dc 0x068c 8 0
+#define MX6UL_PAD_LCD_DATA15__LCDIF_DATA15		0x0154 0x03e0 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA15__SAI3_TX_DATA		0x0154 0x03e0 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA15__CSI_DATA23		0x0154 0x03e0 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA15__EIM_DATA07		0x0154 0x03e0 0x0000 4 0
+#define MX6UL_PAD_LCD_DATA15__GPIO3_IO20		0x0154 0x03e0 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA15__SRC_BT_CFG15		0x0154 0x03e0 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA15__USDHC2_DATA5		0x0154 0x03e0 0x0690 8 0
+#define MX6UL_PAD_LCD_DATA16__LCDIF_DATA16		0x0158 0x03e4 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA16__UART7_DCE_TX		0x0158 0x03e4 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA16__UART7_DTE_RX		0x0158 0x03e4 0x0654 1 2
+#define MX6UL_PAD_LCD_DATA16__CSI_DATA01		0x0158 0x03e4 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA16__EIM_DATA08		0x0158 0x03e4 0x0000 4 0
+#define MX6UL_PAD_LCD_DATA16__GPIO3_IO21		0x0158 0x03e4 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA16__SRC_BT_CFG24		0x0158 0x03e4 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA16__USDHC2_DATA6		0x0158 0x03e4 0x0694 8 0
+#define MX6UL_PAD_LCD_DATA17__LCDIF_DATA17		0x015c 0x03e8 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA17__UART7_DCE_RX		0x015c 0x03e8 0x0654 1 3
+#define MX6UL_PAD_LCD_DATA17__UART7_DTE_TX		0x015c 0x03e8 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA17__CSI_DATA00		0x015c 0x03e8 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA17__EIM_DATA09		0x015c 0x03e8 0x0000 4 0
+#define MX6UL_PAD_LCD_DATA17__GPIO3_IO22		0x015c 0x03e8 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA17__SRC_BT_CFG25		0x015c 0x03e8 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA17__USDHC2_DATA7		0x015c 0x03e8 0x0698 8 0
+#define MX6UL_PAD_LCD_DATA18__LCDIF_DATA18		0x0160 0x03ec 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA18__PWM5_OUT			0x0160 0x03ec 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA18__CA7_MX6UL_EVENTO		0x0160 0x03ec 0x0000 2 0
+#define MX6UL_PAD_LCD_DATA18__CSI_DATA10		0x0160 0x03ec 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA18__EIM_DATA10		0x0160 0x03ec 0x0000 4 0
+#define MX6UL_PAD_LCD_DATA18__GPIO3_IO23		0x0160 0x03ec 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA18__SRC_BT_CFG26		0x0160 0x03ec 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA18__USDHC2_CMD		0x0160 0x03ec 0x0678 8 1
+#define MX6UL_PAD_LCD_DATA19__EIM_DATA11		0x0164 0x03f0 0x0000 4 0
+#define MX6UL_PAD_LCD_DATA19__GPIO3_IO24		0x0164 0x03f0 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA19__SRC_BT_CFG27		0x0164 0x03f0 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA19__USDHC2_CLK		0x0164 0x03f0 0x0670 8 1
+#define MX6UL_PAD_LCD_DATA19__LCDIF_DATA19		0x0164 0x03f0 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA19__PWM6_OUT			0x0164 0x03f0 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA19__WDOG1_WDOG_ANY		0x0164 0x03f0 0x0000 2 0
+#define MX6UL_PAD_LCD_DATA19__CSI_DATA11		0x0164 0x03f0 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA20__EIM_DATA12		0x0168 0x03f4 0x0000 4 0
+#define MX6UL_PAD_LCD_DATA20__GPIO3_IO25		0x0168 0x03f4 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA20__SRC_BT_CFG28		0x0168 0x03f4 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA20__USDHC2_DATA0		0x0168 0x03f4 0x067c 8 1
+#define MX6UL_PAD_LCD_DATA20__LCDIF_DATA20		0x0168 0x03f4 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA20__UART8_DCE_TX		0x0168 0x03f4 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA20__UART8_DTE_RX		0x0168 0x03f4 0x065c 1 2
+#define MX6UL_PAD_LCD_DATA20__ECSPI1_SCLK		0x0168 0x03f4 0x0534 2 0
+#define MX6UL_PAD_LCD_DATA20__CSI_DATA12		0x0168 0x03f4 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA21__LCDIF_DATA21		0x016c 0x03f8 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA21__UART8_DCE_RX		0x016c 0x03f8 0x065c 1 3
+#define MX6UL_PAD_LCD_DATA21__UART8_DTE_TX		0x016c 0x03f8 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA21__ECSPI1_SS0		0x016c 0x03f8 0x0000 2 0
+#define MX6UL_PAD_LCD_DATA21__CSI_DATA13		0x016c 0x03f8 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA21__EIM_DATA13		0x016c 0x03f8 0x0000 4 0
+#define MX6UL_PAD_LCD_DATA21__GPIO3_IO26		0x016c 0x03f8 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA21__SRC_BT_CFG29		0x016c 0x03f8 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA21__USDHC2_DATA1		0x016c 0x03f8 0x0680 8 1
+#define MX6UL_PAD_LCD_DATA22__LCDIF_DATA22		0x0170 0x03fc 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA22__MQS_RIGHT			0x0170 0x03fc 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA22__ECSPI1_MOSI		0x0170 0x03fc 0x053c 2 0
+#define MX6UL_PAD_LCD_DATA22__CSI_DATA14		0x0170 0x03fc 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA22__EIM_DATA14		0x0170 0x03fc 0x0000 4 0
+#define MX6UL_PAD_LCD_DATA22__GPIO3_IO27		0x0170 0x03fc 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA22__SRC_BT_CFG30		0x0170 0x03fc 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA22__USDHC2_DATA2		0x0170 0x03fc 0x0684 8 0
+#define MX6UL_PAD_LCD_DATA23__LCDIF_DATA23		0x0174 0x0400 0x0000 0 0
+#define MX6UL_PAD_LCD_DATA23__MQS_LEFT			0x0174 0x0400 0x0000 1 0
+#define MX6UL_PAD_LCD_DATA23__ECSPI1_MISO		0x0174 0x0400 0x0538 2 0
+#define MX6UL_PAD_LCD_DATA23__CSI_DATA15		0x0174 0x0400 0x0000 3 0
+#define MX6UL_PAD_LCD_DATA23__EIM_DATA15		0x0174 0x0400 0x0000 4 0
+#define MX6UL_PAD_LCD_DATA23__GPIO3_IO28		0x0174 0x0400 0x0000 5 0
+#define MX6UL_PAD_LCD_DATA23__SRC_BT_CFG31		0x0174 0x0400 0x0000 6 0
+#define MX6UL_PAD_LCD_DATA23__USDHC2_DATA3		0x0174 0x0400 0x0688 8 1
+#define MX6UL_PAD_NAND_RE_B__RAWNAND_RE_B		0x0178 0x0404 0x0000 0 0
+#define MX6UL_PAD_NAND_RE_B__USDHC2_CLK			0x0178 0x0404 0x0670 1 2
+#define MX6UL_PAD_NAND_RE_B__QSPI_B_SCLK		0x0178 0x0404 0x0000 2 0
+#define MX6UL_PAD_NAND_RE_B__KPP_ROW00			0x0178 0x0404 0x0000 3 0
+#define MX6UL_PAD_NAND_RE_B__EIM_EB_B00			0x0178 0x0404 0x0000 4 0
+#define MX6UL_PAD_NAND_RE_B__GPIO4_IO00			0x0178 0x0404 0x0000 5 0
+#define MX6UL_PAD_NAND_RE_B__ECSPI3_SS2			0x0178 0x0404 0x0000 8 0
+#define MX6UL_PAD_NAND_WE_B__RAWNAND_WE_B		0x017c 0x0408 0x0000 0 0
+#define MX6UL_PAD_NAND_WE_B__USDHC2_CMD			0x017c 0x0408 0x0678 1 2
+#define MX6UL_PAD_NAND_WE_B__QSPI_B_SS0_B		0x017c 0x0408 0x0000 2 0
+#define MX6UL_PAD_NAND_WE_B__KPP_COL00			0x017c 0x0408 0x0000 3 0
+#define MX6UL_PAD_NAND_WE_B__EIM_EB_B01			0x017c 0x0408 0x0000 4 0
+#define MX6UL_PAD_NAND_WE_B__GPIO4_IO01			0x017c 0x0408 0x0000 5 0
+#define MX6UL_PAD_NAND_WE_B__ECSPI3_SS3			0x017c 0x0408 0x0000 8 0
+#define MX6UL_PAD_NAND_DATA00__RAWNAND_DATA00		0x0180 0x040c 0x0000 0 0
+#define MX6UL_PAD_NAND_DATA00__USDHC2_DATA0		0x0180 0x040c 0x067c 1 2
+#define MX6UL_PAD_NAND_DATA00__QSPI_B_SS1_B		0x0180 0x040c 0x0000 2 0
+#define MX6UL_PAD_NAND_DATA00__KPP_ROW01		0x0180 0x040c 0x0000 3 0
+#define MX6UL_PAD_NAND_DATA00__EIM_AD08			0x0180 0x040c 0x0000 4 0
+#define MX6UL_PAD_NAND_DATA00__GPIO4_IO02		0x0180 0x040c 0x0000 5 0
+#define MX6UL_PAD_NAND_DATA00__ECSPI4_RDY		0x0180 0x040c 0x0000 8 0
+#define MX6UL_PAD_NAND_DATA01__RAWNAND_DATA01		0x0184 0x0410 0x0000 0 0
+#define MX6UL_PAD_NAND_DATA01__USDHC2_DATA1		0x0184 0x0410 0x0680 1 2
+#define MX6UL_PAD_NAND_DATA01__QSPI_B_DQS		0x0184 0x0410 0x0000 2 0
+#define MX6UL_PAD_NAND_DATA01__KPP_COL01		0x0184 0x0410 0x0000 3 0
+#define MX6UL_PAD_NAND_DATA01__EIM_AD09			0x0184 0x0410 0x0000 4 0
+#define MX6UL_PAD_NAND_DATA01__GPIO4_IO03		0x0184 0x0410 0x0000 5 0
+#define MX6UL_PAD_NAND_DATA01__ECSPI4_SS1		0x0184 0x0410 0x0000 8 0
+#define MX6UL_PAD_NAND_DATA02__RAWNAND_DATA02		0x0188 0x0414 0x0000 0 0
+#define MX6UL_PAD_NAND_DATA02__USDHC2_DATA2		0x0188 0x0414 0x0684 1 1
+#define MX6UL_PAD_NAND_DATA02__QSPI_B_DATA00		0x0188 0x0414 0x0000 2 0
+#define MX6UL_PAD_NAND_DATA02__KPP_ROW02		0x0188 0x0414 0x0000 3 0
+#define MX6UL_PAD_NAND_DATA02__EIM_AD10			0x0188 0x0414 0x0000 4 0
+#define MX6UL_PAD_NAND_DATA02__GPIO4_IO04		0x0188 0x0414 0x0000 5 0
+#define MX6UL_PAD_NAND_DATA02__ECSPI4_SS2		0x0188 0x0414 0x0000 8 0
+#define MX6UL_PAD_NAND_DATA03__RAWNAND_DATA03		0x018c 0x0418 0x0000 0 0
+#define MX6UL_PAD_NAND_DATA03__USDHC2_DATA3		0x018c 0x0418 0x0688 1 2
+#define MX6UL_PAD_NAND_DATA03__QSPI_B_DATA01		0x018c 0x0418 0x0000 2 0
+#define MX6UL_PAD_NAND_DATA03__KPP_COL02		0x018c 0x0418 0x0000 3 0
+#define MX6UL_PAD_NAND_DATA03__EIM_AD11			0x018c 0x0418 0x0000 4 0
+#define MX6UL_PAD_NAND_DATA03__GPIO4_IO05		0x018c 0x0418 0x0000 5 0
+#define MX6UL_PAD_NAND_DATA03__ECSPI4_SS3		0x018c 0x0418 0x0000 8 0
+#define MX6UL_PAD_NAND_DATA04__RAWNAND_DATA04		0x0190 0x041c 0x0000 0 0
+#define MX6UL_PAD_NAND_DATA04__USDHC2_DATA4		0x0190 0x041c 0x068c 1 1
+#define MX6UL_PAD_NAND_DATA04__QSPI_B_DATA02		0x0190 0x041c 0x0000 2 0
+#define MX6UL_PAD_NAND_DATA04__ECSPI4_SCLK		0x0190 0x041c 0x0564 3 1
+#define MX6UL_PAD_NAND_DATA04__EIM_AD12			0x0190 0x041c 0x0000 4 0
+#define MX6UL_PAD_NAND_DATA04__GPIO4_IO06		0x0190 0x041c 0x0000 5 0
+#define MX6UL_PAD_NAND_DATA04__UART2_DCE_TX		0x0190 0x041c 0x0000 8 0
+#define MX6UL_PAD_NAND_DATA04__UART2_DTE_RX		0x0190 0x041c 0x062c 8 2
+#define MX6UL_PAD_NAND_DATA05__RAWNAND_DATA05		0x0194 0x0420 0x0000 0 0
+#define MX6UL_PAD_NAND_DATA05__USDHC2_DATA5		0x0194 0x0420 0x0690 1 1
+#define MX6UL_PAD_NAND_DATA05__QSPI_B_DATA03		0x0194 0x0420 0x0000 2 0
+#define MX6UL_PAD_NAND_DATA05__ECSPI4_MOSI		0x0194 0x0420 0x056c 3 1
+#define MX6UL_PAD_NAND_DATA05__EIM_AD13			0x0194 0x0420 0x0000 4 0
+#define MX6UL_PAD_NAND_DATA05__GPIO4_IO07		0x0194 0x0420 0x0000 5 0
+#define MX6UL_PAD_NAND_DATA05__UART2_DCE_RX		0x0194 0x0420 0x062c 8 3
+#define MX6UL_PAD_NAND_DATA05__UART2_DTE_TX		0x0194 0x0420 0x0000 8 0
+#define MX6UL_PAD_NAND_DATA06__RAWNAND_DATA06		0x0198 0x0424 0x0000 0 0
+#define MX6UL_PAD_NAND_DATA06__USDHC2_DATA6		0x0198 0x0424 0x0694 1 1
+#define MX6UL_PAD_NAND_DATA06__SAI2_RX_BCLK		0x0198 0x0424 0x0000 2 0
+#define MX6UL_PAD_NAND_DATA06__ECSPI4_MISO		0x0198 0x0424 0x0568 3 1
+#define MX6UL_PAD_NAND_DATA06__EIM_AD14			0x0198 0x0424 0x0000 4 0
+#define MX6UL_PAD_NAND_DATA06__GPIO4_IO08		0x0198 0x0424 0x0000 5 0
+#define MX6UL_PAD_NAND_DATA06__UART2_DCE_CTS		0x0198 0x0424 0x0000 8 0
+#define MX6UL_PAD_NAND_DATA06__UART2_DTE_RTS		0x0198 0x0424 0x0628 8 4
+#define MX6UL_PAD_NAND_DATA07__RAWNAND_DATA07		0x019c 0x0428 0x0000 0 0
+#define MX6UL_PAD_NAND_DATA07__USDHC2_DATA7		0x019c 0x0428 0x0698 1 1
+#define MX6UL_PAD_NAND_DATA07__QSPI_A_SS1_B		0x019c 0x0428 0x0000 2 0
+#define MX6UL_PAD_NAND_DATA07__ECSPI4_SS0		0x019c 0x0428 0x0000 3 0
+#define MX6UL_PAD_NAND_DATA07__EIM_AD15			0x019c 0x0428 0x0000 4 0
+#define MX6UL_PAD_NAND_DATA07__GPIO4_IO09		0x019c 0x0428 0x0000 5 0
+#define MX6UL_PAD_NAND_DATA07__UART2_DCE_RTS		0x019c 0x0428 0x0628 8 5
+#define MX6UL_PAD_NAND_DATA07__UART2_DTE_CTS		0x019c 0x0428 0x0000 8 0
+#define MX6UL_PAD_NAND_ALE__RAWNAND_ALE			0x01a0 0x042c 0x0000 0 0
+#define MX6UL_PAD_NAND_ALE__USDHC2_RESET_B		0x01a0 0x042c 0x0000 1 0
+#define MX6UL_PAD_NAND_ALE__QSPI_A_DQS			0x01a0 0x042c 0x0000 2 0
+#define MX6UL_PAD_NAND_ALE__PWM3_OUT			0x01a0 0x042c 0x0000 3 0
+#define MX6UL_PAD_NAND_ALE__EIM_ADDR17			0x01a0 0x042c 0x0000 4 0
+#define MX6UL_PAD_NAND_ALE__GPIO4_IO10			0x01a0 0x042c 0x0000 5 0
+#define MX6UL_PAD_NAND_ALE__ECSPI3_SS1			0x01a0 0x042c 0x0000 8 0
+#define MX6UL_PAD_NAND_WP_B__RAWNAND_WP_B		0x01a4 0x0430 0x0000 0 0
+#define MX6UL_PAD_NAND_WP_B__USDHC1_RESET_B		0x01a4 0x0430 0x0000 1 0
+#define MX6UL_PAD_NAND_WP_B__QSPI_A_SCLK		0x01a4 0x0430 0x0000 2 0
+#define MX6UL_PAD_NAND_WP_B__PWM4_OUT			0x01a4 0x0430 0x0000 3 0
+#define MX6UL_PAD_NAND_WP_B__EIM_BCLK			0x01a4 0x0430 0x0000 4 0
+#define MX6UL_PAD_NAND_WP_B__GPIO4_IO11			0x01a4 0x0430 0x0000 5 0
+#define MX6UL_PAD_NAND_WP_B__ECSPI3_RDY			0x01a4 0x0430 0x0000 8 0
+#define MX6UL_PAD_NAND_READY_B__RAWNAND_READY_B		0x01a8 0x0434 0x0000 0 0
+#define MX6UL_PAD_NAND_READY_B__USDHC1_DATA4		0x01a8 0x0434 0x0000 1 0
+#define MX6UL_PAD_NAND_READY_B__QSPI_A_DATA00		0x01a8 0x0434 0x0000 2 0
+#define MX6UL_PAD_NAND_READY_B__ECSPI3_SS0		0x01a8 0x0434 0x0000 3 0
+#define MX6UL_PAD_NAND_READY_B__EIM_CS1_B		0x01a8 0x0434 0x0000 4 0
+#define MX6UL_PAD_NAND_READY_B__GPIO4_IO12		0x01a8 0x0434 0x0000 5 0
+#define MX6UL_PAD_NAND_READY_B__UART3_DCE_TX		0x01a8 0x0434 0x0000 8 0
+#define MX6UL_PAD_NAND_READY_B__UART3_DTE_RX		0x01a8 0x0434 0x0634 8 2
+#define MX6UL_PAD_NAND_CE0_B__RAWNAND_CE0_B		0x01ac 0x0438 0x0000 0 0
+#define MX6UL_PAD_NAND_CE0_B__USDHC1_DATA5		0x01ac 0x0438 0x0000 1 0
+#define MX6UL_PAD_NAND_CE0_B__QSPI_A_DATA01		0x01ac 0x0438 0x0000 2 0
+#define MX6UL_PAD_NAND_CE0_B__ECSPI3_SCLK		0x01ac 0x0438 0x0554 3 1
+#define MX6UL_PAD_NAND_CE0_B__EIM_DTACK_B		0x01ac 0x0438 0x0000 4 0
+#define MX6UL_PAD_NAND_CE0_B__GPIO4_IO13		0x01ac 0x0438 0x0000 5 0
+#define MX6UL_PAD_NAND_CE0_B__UART3_DCE_RX		0x01ac 0x0438 0x0634 8 3
+#define MX6UL_PAD_NAND_CE0_B__UART3_DTE_TX		0x01ac 0x0438 0x0000 8 0
+#define MX6UL_PAD_NAND_CE1_B__RAWNAND_CE1_B		0x01b0 0x043c 0x0000 0 0
+#define MX6UL_PAD_NAND_CE1_B__USDHC1_DATA6		0x01b0 0x043c 0x0000 1 0
+#define MX6UL_PAD_NAND_CE1_B__QSPI_A_DATA02		0x01b0 0x043c 0x0000 2 0
+#define MX6UL_PAD_NAND_CE1_B__ECSPI3_MOSI		0x01b0 0x043c 0x055c 3 1
+#define MX6UL_PAD_NAND_CE1_B__EIM_ADDR18		0x01b0 0x043c 0x0000 4 0
+#define MX6UL_PAD_NAND_CE1_B__GPIO4_IO14		0x01b0 0x043c 0x0000 5 0
+#define MX6UL_PAD_NAND_CE1_B__UART3_DCE_CTS		0x01b0 0x043c 0x0000 8 0
+#define MX6UL_PAD_NAND_CE1_B__UART3_DTE_RTS		0x01b0 0x043c 0x0630 8 2
+#define MX6UL_PAD_NAND_CLE__RAWNAND_CLE			0x01b4 0x0440 0x0000 0 0
+#define MX6UL_PAD_NAND_CLE__USDHC1_DATA7		0x01b4 0x0440 0x0000 1 0
+#define MX6UL_PAD_NAND_CLE__QSPI_A_DATA03		0x01b4 0x0440 0x0000 2 0
+#define MX6UL_PAD_NAND_CLE__ECSPI3_MISO			0x01b4 0x0440 0x0558 3 1
+#define MX6UL_PAD_NAND_CLE__EIM_ADDR16			0x01b4 0x0440 0x0000 4 0
+#define MX6UL_PAD_NAND_CLE__GPIO4_IO15			0x01b4 0x0440 0x0000 5 0
+#define MX6UL_PAD_NAND_CLE__UART3_DCE_RTS		0x01b4 0x0440 0x0630 8 3
+#define MX6UL_PAD_NAND_CLE__UART3_DTE_CTS		0x01b4 0x0440 0x0000 8 0
+#define MX6UL_PAD_NAND_DQS__RAWNAND_DQS			0x01b8 0x0444 0x0000 0 0
+#define MX6UL_PAD_NAND_DQS__CSI_FIELD			0x01b8 0x0444 0x0530 1 1
+#define MX6UL_PAD_NAND_DQS__QSPI_A_SS0_B		0x01b8 0x0444 0x0000 2 0
+#define MX6UL_PAD_NAND_DQS__PWM5_OUT			0x01b8 0x0444 0x0000 3 0
+#define MX6UL_PAD_NAND_DQS__EIM_WAIT			0x01b8 0x0444 0x0000 4 0
+#define MX6UL_PAD_NAND_DQS__GPIO4_IO16			0x01b8 0x0444 0x0000 5 0
+#define MX6UL_PAD_NAND_DQS__SDMA_EXT_EVENT01		0x01b8 0x0444 0x0000 6 0
+#define MX6UL_PAD_NAND_DQS__SPDIF_EXT_CLK		0x01b8 0x0444 0x061c 8 1
+#define MX6UL_PAD_SD1_CMD__USDHC1_CMD			0x01bc 0x0448 0x0000 0 0
+#define MX6UL_PAD_SD1_CMD__GPT2_COMPARE1		0x01bc 0x0448 0x0000 1 0
+#define MX6UL_PAD_SD1_CMD__SAI2_RX_SYNC			0x01bc 0x0448 0x0000 2 0
+#define MX6UL_PAD_SD1_CMD__SPDIF_OUT			0x01bc 0x0448 0x0000 3 0
+#define MX6UL_PAD_SD1_CMD__EIM_ADDR19			0x01bc 0x0448 0x0000 4 0
+#define MX6UL_PAD_SD1_CMD__GPIO2_IO16			0x01bc 0x0448 0x0000 5 0
+#define MX6UL_PAD_SD1_CMD__SDMA_EXT_EVENT00		0x01bc 0x0448 0x0000 6 0
+#define MX6UL_PAD_SD1_CMD__USB_OTG1_PWR			0x01bc 0x0448 0x0000 8 0
+#define MX6UL_PAD_SD1_CLK__USDHC1_CLK			0x01c0 0x044c 0x0000 0 0
+#define MX6UL_PAD_SD1_CLK__GPT2_COMPARE2		0x01c0 0x044c 0x0000 1 0
+#define MX6UL_PAD_SD1_CLK__SAI2_MCLK			0x01c0 0x044c 0x0000 2 0
+#define MX6UL_PAD_SD1_CLK__SPDIF_IN			0x01c0 0x044c 0x0618 3 3
+#define MX6UL_PAD_SD1_CLK__EIM_ADDR20			0x01c0 0x044c 0x0000 4 0
+#define MX6UL_PAD_SD1_CLK__GPIO2_IO17			0x01c0 0x044c 0x0000 5 0
+#define MX6UL_PAD_SD1_CLK__USB_OTG1_OC			0x01c0 0x044c 0x0664 8 2
+#define MX6UL_PAD_SD1_DATA0__USDHC1_DATA0		0x01c4 0x0450 0x0000 0 0
+#define MX6UL_PAD_SD1_DATA0__GPT2_COMPARE3		0x01c4 0x0450 0x0000 1 0
+#define MX6UL_PAD_SD1_DATA0__SAI2_TX_SYNC		0x01c4 0x0450 0x05fc 2 1
+#define MX6UL_PAD_SD1_DATA0__FLEXCAN1_TX		0x01c4 0x0450 0x0000 3 0
+#define MX6UL_PAD_SD1_DATA0__EIM_ADDR21			0x01c4 0x0450 0x0000 4 0
+#define MX6UL_PAD_SD1_DATA0__GPIO2_IO18			0x01c4 0x0450 0x0000 5 0
+#define MX6UL_PAD_SD1_DATA0__ANATOP_OTG1_ID		0x01c4 0x0450 0x04b8 8 2
+#define MX6UL_PAD_SD1_DATA1__USDHC1_DATA1		0x01c8 0x0454 0x0000 0 0
+#define MX6UL_PAD_SD1_DATA1__GPT2_CLK			0x01c8 0x0454 0x05a0 1 1
+#define MX6UL_PAD_SD1_DATA1__SAI2_TX_BCLK		0x01c8 0x0454 0x05f8 2 1
+#define MX6UL_PAD_SD1_DATA1__FLEXCAN1_RX		0x01c8 0x0454 0x0584 3 3
+#define MX6UL_PAD_SD1_DATA1__EIM_ADDR22			0x01c8 0x0454 0x0000 4 0
+#define MX6UL_PAD_SD1_DATA1__GPIO2_IO19			0x01c8 0x0454 0x0000 5 0
+#define MX6UL_PAD_SD1_DATA1__USB_OTG2_PWR		0x01c8 0x0454 0x0000 8 0
+#define MX6UL_PAD_SD1_DATA2__USDHC1_DATA2		0x01cc 0x0458 0x0000 0 0
+#define MX6UL_PAD_SD1_DATA2__GPT2_CAPTURE1		0x01cc 0x0458 0x0598 1 1
+#define MX6UL_PAD_SD1_DATA2__SAI2_RX_DATA		0x01cc 0x0458 0x05f4 2 1
+#define MX6UL_PAD_SD1_DATA2__FLEXCAN2_TX		0x01cc 0x0458 0x0000 3 0
+#define MX6UL_PAD_SD1_DATA2__EIM_ADDR23			0x01cc 0x0458 0x0000 4 0
+#define MX6UL_PAD_SD1_DATA2__GPIO2_IO20			0x01cc 0x0458 0x0000 5 0
+#define MX6UL_PAD_SD1_DATA2__CCM_CLKO1			0x01cc 0x0458 0x0000 6 0
+#define MX6UL_PAD_SD1_DATA2__USB_OTG2_OC		0x01cc 0x0458 0x0660 8 2
+#define MX6UL_PAD_SD1_DATA3__USDHC1_DATA3		0x01d0 0x045c 0x0000 0 0
+#define MX6UL_PAD_SD1_DATA3__GPT2_CAPTURE2		0x01d0 0x045c 0x059c 1 1
+#define MX6UL_PAD_SD1_DATA3__SAI2_TX_DATA		0x01d0 0x045c 0x0000 2 0
+#define MX6UL_PAD_SD1_DATA3__FLEXCAN2_RX		0x01d0 0x045c 0x0588 3 3
+#define MX6UL_PAD_SD1_DATA3__EIM_ADDR24			0x01d0 0x045c 0x0000 4 0
+#define MX6UL_PAD_SD1_DATA3__GPIO2_IO21			0x01d0 0x045c 0x0000 5 0
+#define MX6UL_PAD_SD1_DATA3__CCM_CLKO2			0x01d0 0x045c 0x0000 6 0
+#define MX6UL_PAD_SD1_DATA3__ANATOP_OTG2_ID		0x01d0 0x045c 0x04bc 8 2
+#define MX6UL_PAD_CSI_MCLK__CSI_MCLK			0x01d4 0x0460 0x0000 0 0
+#define MX6UL_PAD_CSI_MCLK__USDHC2_CD_B			0x01d4 0x0460 0x0674 1 0
+#define MX6UL_PAD_CSI_MCLK__RAWNAND_CE2_B		0x01d4 0x0460 0x0000 2 0
+#define MX6UL_PAD_CSI_MCLK__I2C1_SDA			0x01d4 0x0460 0x05a8 3 0
+#define MX6UL_PAD_CSI_MCLK__EIM_CS0_B			0x01d4 0x0460 0x0000 4 0
+#define MX6UL_PAD_CSI_MCLK__GPIO4_IO17			0x01d4 0x0460 0x0000 5 0
+#define MX6UL_PAD_CSI_MCLK__SNVS_HP_VIO_5_CTL		0x01d4 0x0460 0x0000 6 0
+#define MX6UL_PAD_CSI_MCLK__UART6_DCE_TX		0x01d4 0x0460 0x0000 8 0
+#define MX6UL_PAD_CSI_MCLK__UART6_DTE_RX		0x01d4 0x0460 0x064c 8 0
+#define MX6UL_PAD_CSI_PIXCLK__CSI_PIXCLK		0x01d8 0x0464 0x0528 0 1
+#define MX6UL_PAD_CSI_PIXCLK__USDHC2_WP			0x01d8 0x0464 0x069c 1 2
+#define MX6UL_PAD_CSI_PIXCLK__RAWNAND_CE3_B		0x01d8 0x0464 0x0000 2 0
+#define MX6UL_PAD_CSI_PIXCLK__I2C1_SCL			0x01d8 0x0464 0x05a4 3 2
+#define MX6UL_PAD_CSI_PIXCLK__EIM_OE			0x01d8 0x0464 0x0000 4 0
+#define MX6UL_PAD_CSI_PIXCLK__GPIO4_IO18		0x01d8 0x0464 0x0000 5 0
+#define MX6UL_PAD_CSI_PIXCLK__SNVS_HP_VIO_5		0x01d8 0x0464 0x0000 6 0
+#define MX6UL_PAD_CSI_PIXCLK__UART6_DCE_RX		0x01d8 0x0464 0x064c 8 3
+#define MX6UL_PAD_CSI_PIXCLK__UART6_DTE_TX		0x01d8 0x0464 0x0000 8 0
+#define MX6UL_PAD_CSI_VSYNC__CSI_VSYNC			0x01dc 0x0468 0x052c 0 0
+#define MX6UL_PAD_CSI_VSYNC__USDHC2_CLK			0x01dc 0x0468 0x0670 1 0
+#define MX6UL_PAD_CSI_VSYNC__SIM1_PORT1_CLK		0x01dc 0x0468 0x0000 2 0
+#define MX6UL_PAD_CSI_VSYNC__I2C2_SDA			0x01dc 0x0468 0x05b0 3 0
+#define MX6UL_PAD_CSI_VSYNC__EIM_RW			0x01dc 0x0468 0x0000 4 0
+#define MX6UL_PAD_CSI_VSYNC__GPIO4_IO19			0x01dc 0x0468 0x0000 5 0
+#define MX6UL_PAD_CSI_VSYNC__PWM7_OUT			0x01dc 0x0468 0x0000 6 0
+#define MX6UL_PAD_CSI_VSYNC__UART6_DCE_RTS		0x01dc 0x0468 0x0648 8 0
+#define MX6UL_PAD_CSI_VSYNC__UART6_DTE_CTS		0x01dc 0x0468 0x0000 8 0
+#define MX6UL_PAD_CSI_HSYNC__CSI_HSYNC			0x01e0 0x046c 0x0524 0 0
+#define MX6UL_PAD_CSI_HSYNC__USDHC2_CMD			0x01e0 0x046c 0x0678 1 0
+#define MX6UL_PAD_CSI_HSYNC__SIM1_PORT1_PD		0x01e0 0x046c 0x0000 2 0
+#define MX6UL_PAD_CSI_HSYNC__I2C2_SCL			0x01e0 0x046c 0x05ac 3 0
+#define MX6UL_PAD_CSI_HSYNC__EIM_LBA_B			0x01e0 0x046c 0x0000 4 0
+#define MX6UL_PAD_CSI_HSYNC__GPIO4_IO20			0x01e0 0x046c 0x0000 5 0
+#define MX6UL_PAD_CSI_HSYNC__PWM8_OUT			0x01e0 0x046c 0x0000 6 0
+#define MX6UL_PAD_CSI_HSYNC__UART6_DCE_CTS		0x01e0 0x046c 0x0000 8 0
+#define MX6UL_PAD_CSI_HSYNC__UART6_DTE_RTS		0x01e0 0x046c 0x0648 8 1
+#define MX6UL_PAD_CSI_DATA00__CSI_DATA02		0x01e4 0x0470 0x04c4 0 0
+#define MX6UL_PAD_CSI_DATA00__USDHC2_DATA0		0x01e4 0x0470 0x067c 1 0
+#define MX6UL_PAD_CSI_DATA00__SIM1_PORT1_RST_B		0x01e4 0x0470 0x0000 2 0
+#define MX6UL_PAD_CSI_DATA00__ECSPI2_SCLK		0x01e4 0x0470 0x0544 3 0
+#define MX6UL_PAD_CSI_DATA00__EIM_AD00			0x01e4 0x0470 0x0000 4 0
+#define MX6UL_PAD_CSI_DATA00__GPIO4_IO21		0x01e4 0x0470 0x0000 5 0
+#define MX6UL_PAD_CSI_DATA00__SRC_INT_BOOT		0x01e4 0x0470 0x0000 6 0
+#define MX6UL_PAD_CSI_DATA00__UART5_DCE_TX		0x01e4 0x0470 0x0000 8 0
+#define MX6UL_PAD_CSI_DATA00__UART5_DTE_RX		0x01e4 0x0470 0x0644 8 0
+#define MX6UL_PAD_CSI_DATA01__CSI_DATA03		0x01e8 0x0474 0x04c8 0 0
+#define MX6UL_PAD_CSI_DATA01__USDHC2_DATA1		0x01e8 0x0474 0x0680 1 0
+#define MX6UL_PAD_CSI_DATA01__SIM1_PORT1_SVEN		0x01e8 0x0474 0x0000 2 0
+#define MX6UL_PAD_CSI_DATA01__ECSPI2_SS0		0x01e8 0x0474 0x0000 3 0
+#define MX6UL_PAD_CSI_DATA01__EIM_AD01			0x01e8 0x0474 0x0000 4 0
+#define MX6UL_PAD_CSI_DATA01__GPIO4_IO22		0x01e8 0x0474 0x0000 5 0
+#define MX6UL_PAD_CSI_DATA01__SAI1_MCLK			0x01e8 0x0474 0x0000 6 0
+#define MX6UL_PAD_CSI_DATA01__UART5_DCE_RX		0x01e8 0x0474 0x0644 8 1
+#define MX6UL_PAD_CSI_DATA01__UART5_DTE_TX		0x01e8 0x0474 0x0000 8 0
+#define MX6UL_PAD_CSI_DATA02__CSI_DATA04		0x01ec 0x0478 0x04d8 0 1
+#define MX6UL_PAD_CSI_DATA02__USDHC2_DATA2		0x01ec 0x0478 0x0684 1 2
+#define MX6UL_PAD_CSI_DATA02__SIM1_PORT1_TRXD		0x01ec 0x0478 0x0000 2 0
+#define MX6UL_PAD_CSI_DATA02__ECSPI2_MOSI		0x01ec 0x0478 0x054c 3 1
+#define MX6UL_PAD_CSI_DATA02__EIM_AD02			0x01ec 0x0478 0x0000 4 0
+#define MX6UL_PAD_CSI_DATA02__GPIO4_IO23		0x01ec 0x0478 0x0000 5 0
+#define MX6UL_PAD_CSI_DATA02__SAI1_RX_SYNC		0x01ec 0x0478 0x0000 6 0
+#define MX6UL_PAD_CSI_DATA02__UART5_DCE_RTS		0x01ec 0x0478 0x0640 8 5
+#define MX6UL_PAD_CSI_DATA02__UART5_DTE_CTS		0x01ec 0x0478 0x0000 8 0
+#define MX6UL_PAD_CSI_DATA03__CSI_DATA05		0x01f0 0x047c 0x04cc 0 0
+#define MX6UL_PAD_CSI_DATA03__USDHC2_DATA3		0x01f0 0x047c 0x0688 1 0
+#define MX6UL_PAD_CSI_DATA03__SIM2_PORT1_PD		0x01f0 0x047c 0x0000 2 0
+#define MX6UL_PAD_CSI_DATA03__ECSPI2_MISO		0x01f0 0x047c 0x0548 3 0
+#define MX6UL_PAD_CSI_DATA03__EIM_AD03			0x01f0 0x047c 0x0000 4 0
+#define MX6UL_PAD_CSI_DATA03__GPIO4_IO24		0x01f0 0x047c 0x0000 5 0
+#define MX6UL_PAD_CSI_DATA03__SAI1_RX_BCLK		0x01f0 0x047c 0x0000 6 0
+#define MX6UL_PAD_CSI_DATA03__UART5_DCE_CTS		0x01f0 0x047c 0x0000 8 0
+#define MX6UL_PAD_CSI_DATA03__UART5_DTE_RTS		0x01f0 0x047c 0x0640 8 0
+#define MX6UL_PAD_CSI_DATA04__CSI_DATA06		0x01f4 0x0480 0x04dc 0 1
+#define MX6UL_PAD_CSI_DATA04__USDHC2_DATA4		0x01f4 0x0480 0x068c 1 2
+#define MX6UL_PAD_CSI_DATA04__SIM2_PORT1_CLK		0x01f4 0x0480 0x0000 2 0
+#define MX6UL_PAD_CSI_DATA04__ECSPI1_SCLK		0x01f4 0x0480 0x0534 3 1
+#define MX6UL_PAD_CSI_DATA04__EIM_AD04			0x01f4 0x0480 0x0000 4 0
+#define MX6UL_PAD_CSI_DATA04__GPIO4_IO25		0x01f4 0x0480 0x0000 5 0
+#define MX6UL_PAD_CSI_DATA04__SAI1_TX_SYNC		0x01f4 0x0480 0x05ec 6 1
+#define MX6UL_PAD_CSI_DATA04__USDHC1_WP			0x01f4 0x0480 0x066c 8 2
+#define MX6UL_PAD_CSI_DATA05__CSI_DATA07		0x01f8 0x0484 0x04e0 0 1
+#define MX6UL_PAD_CSI_DATA05__USDHC2_DATA5		0x01f8 0x0484 0x0690 1 2
+#define MX6UL_PAD_CSI_DATA05__SIM2_PORT1_RST_B		0x01f8 0x0484 0x0000 2 0
+#define MX6UL_PAD_CSI_DATA05__ECSPI1_SS0		0x01f8 0x0484 0x0000 3 0
+#define MX6UL_PAD_CSI_DATA05__EIM_AD05			0x01f8 0x0484 0x0000 4 0
+#define MX6UL_PAD_CSI_DATA05__GPIO4_IO26		0x01f8 0x0484 0x0000 5 0
+#define MX6UL_PAD_CSI_DATA05__SAI1_TX_BCLK		0x01f8 0x0484 0x05e8 6 1
+#define MX6UL_PAD_CSI_DATA05__USDHC1_CD_B		0x01f8 0x0484 0x0668 8 2
+#define MX6UL_PAD_CSI_DATA06__CSI_DATA08		0x01fc 0x0488 0x04e4 0 1
+#define MX6UL_PAD_CSI_DATA06__USDHC2_DATA6		0x01fc 0x0488 0x0694 1 2
+#define MX6UL_PAD_CSI_DATA06__SIM2_PORT1_SVEN		0x01fc 0x0488 0x0000 2 0
+#define MX6UL_PAD_CSI_DATA06__ECSPI1_MOSI		0x01fc 0x0488 0x053c 3 1
+#define MX6UL_PAD_CSI_DATA06__EIM_AD06			0x01fc 0x0488 0x0000 4 0
+#define MX6UL_PAD_CSI_DATA06__GPIO4_IO27		0x01fc 0x0488 0x0000 5 0
+#define MX6UL_PAD_CSI_DATA06__SAI1_RX_DATA		0x01fc 0x0488 0x0000 6 0
+#define MX6UL_PAD_CSI_DATA06__USDHC1_RESET_B		0x01fc 0x0488 0x0000 8 0
+#define MX6UL_PAD_CSI_DATA07__CSI_DATA09		0x0200 0x048c 0x04e8 0 1
+#define MX6UL_PAD_CSI_DATA07__USDHC2_DATA7		0x0200 0x048c 0x0698 1 2
+#define MX6UL_PAD_CSI_DATA07__SIM2_PORT1_TRXD		0x0200 0x048c 0x0000 2 0
+#define MX6UL_PAD_CSI_DATA07__ECSPI1_MISO		0x0200 0x048c 0x0538 3 1
+#define MX6UL_PAD_CSI_DATA07__EIM_AD07			0x0200 0x048c 0x0000 4 0
+#define MX6UL_PAD_CSI_DATA07__GPIO4_IO28		0x0200 0x048c 0x0000 5 0
+#define MX6UL_PAD_CSI_DATA07__SAI1_TX_DATA		0x0200 0x048c 0x0000 6 0
+#define MX6UL_PAD_CSI_DATA07__USDHC1_VSELECT		0x0200 0x048c 0x0000 8 0
 
 #endif /* __DTS_IMX6UL_PINFUNC_H */

diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi
index 99b6465..7177899 100644
--- a/arch/arm/boot/dts/imx6ul.dtsi
+++ b/arch/arm/boot/dts/imx6ul.dtsi

@@ -8,6 +8,7 @@
 
 #include <dt-bindings/clock/imx6ul-clock.h>
 #include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
 #include "imx6ul-pinfunc.h"
 #include "skeleton.dtsi"
@@ -140,6 +141,39 @@
 			reg = <0x00900000 0x20000>;
 		};
 
+		dma_apbh: dma-apbh@01804000 {
+			compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh";
+			reg = <0x01804000 0x2000>;
+			interrupts = <0 13 IRQ_TYPE_LEVEL_HIGH>,
+				     <0 13 IRQ_TYPE_LEVEL_HIGH>,
+				     <0 13 IRQ_TYPE_LEVEL_HIGH>,
+				     <0 13 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "gpmi0", "gpmi1", "gpmi2", "gpmi3";
+			#dma-cells = <1>;
+			dma-channels = <4>;
+			clocks = <&clks IMX6UL_CLK_APBHDMA>;
+		};
+
+		gpmi: gpmi-nand@01806000         {
+			compatible = "fsl,imx6q-gpmi-nand";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			reg = <0x01806000 0x2000>, <0x01808000 0x2000>;
+			reg-names = "gpmi-nand", "bch";
+			interrupts = <0 15 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "bch";
+			clocks = <&clks IMX6UL_CLK_GPMI_IO>,
+				 <&clks IMX6UL_CLK_GPMI_APB>,
+				 <&clks IMX6UL_CLK_GPMI_BCH>,
+				 <&clks IMX6UL_CLK_GPMI_BCH_APB>,
+				 <&clks IMX6UL_CLK_PER_BCH>;
+			clock-names = "gpmi_io", "gpmi_apb", "gpmi_bch",
+				      "gpmi_bch_apb", "per1_bch";
+			dmas = <&dma_apbh 0>;
+			dma-names = "rx-tx";
+			status = "disabled";
+		};
+
 		aips1: aips-bus@02000000 {
 			compatible = "fsl,aips-bus", "simple-bus";
 			#address-cells = <1>;
@@ -234,6 +268,126 @@
 					clock-names = "ipg", "per";
 					status = "disabled";
 				};
+
+				sai1: sai@02028000 {
+					#sound-dai-cells = <0>;
+					compatible = "fsl,imx6ul-sai", "fsl,imx6sx-sai";
+					reg = <0x02028000 0x4000>;
+					interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
+					clocks = <&clks IMX6UL_CLK_SAI1_IPG>,
+						 <&clks IMX6UL_CLK_SAI1>,
+						 <&clks IMX6UL_CLK_DUMMY>, <&clks IMX6UL_CLK_DUMMY>;
+					clock-names = "bus", "mclk1", "mclk2", "mclk3";
+					dmas = <&sdma 35 24 0>,
+					       <&sdma 36 24 0>;
+					dma-names = "rx", "tx";
+					status = "disabled";
+				};
+
+				sai2: sai@0202c000 {
+					#sound-dai-cells = <0>;
+					compatible = "fsl,imx6ul-sai", "fsl,imx6sx-sai";
+					reg = <0x0202c000 0x4000>;
+					interrupts = <GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>;
+					clocks = <&clks IMX6UL_CLK_SAI2_IPG>,
+						 <&clks IMX6UL_CLK_SAI2>,
+						 <&clks IMX6UL_CLK_DUMMY>, <&clks IMX6UL_CLK_DUMMY>;
+					clock-names = "bus", "mclk1", "mclk2", "mclk3";
+					dmas = <&sdma 37 24 0>,
+					       <&sdma 38 24 0>;
+					dma-names = "rx", "tx";
+					status = "disabled";
+				};
+
+				sai3: sai@02030000 {
+					#sound-dai-cells = <0>;
+					compatible = "fsl,imx6ul-sai", "fsl,imx6sx-sai";
+					reg = <0x02030000 0x4000>;
+					interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>;
+					clocks = <&clks IMX6UL_CLK_SAI3_IPG>,
+						 <&clks IMX6UL_CLK_SAI3>,
+						 <&clks IMX6UL_CLK_DUMMY>, <&clks IMX6UL_CLK_DUMMY>;
+					clock-names = "bus", "mclk1", "mclk2", "mclk3";
+					dmas = <&sdma 39 24 0>,
+					       <&sdma 40 24 0>;
+					dma-names = "rx", "tx";
+					status = "disabled";
+				};
+			};
+
+			tsc: tsc@02040000 {
+				compatible = "fsl,imx6ul-tsc";
+				reg = <0x02040000 0x4000>, <0x0219c000 0x4000>;
+				interrupts = <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
+					     <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&clks IMX6UL_CLK_IPG>,
+					 <&clks IMX6UL_CLK_ADC2>;
+				clock-names = "tsc", "adc";
+				status = "disabled";
+			};
+
+			pwm1: pwm@02080000 {
+				compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
+				reg = <0x02080000 0x4000>;
+				interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&clks IMX6UL_CLK_PWM1>,
+					 <&clks IMX6UL_CLK_PWM1>;
+				clock-names = "ipg", "per";
+				#pwm-cells = <2>;
+				status = "disabled";
+			};
+
+			pwm2: pwm@02084000 {
+				compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
+				reg = <0x02084000 0x4000>;
+				interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&clks IMX6UL_CLK_PWM2>,
+					 <&clks IMX6UL_CLK_PWM2>;
+				clock-names = "ipg", "per";
+				#pwm-cells = <2>;
+				status = "disabled";
+			};
+
+			pwm3: pwm@02088000 {
+				compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
+				reg = <0x02088000 0x4000>;
+				interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&clks IMX6UL_CLK_PWM3>,
+					 <&clks IMX6UL_CLK_PWM3>;
+				clock-names = "ipg", "per";
+				#pwm-cells = <2>;
+				status = "disabled";
+			};
+
+			pwm4: pwm@0208c000 {
+				compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
+				reg = <0x0208c000 0x4000>;
+				interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&clks IMX6UL_CLK_PWM4>,
+					 <&clks IMX6UL_CLK_PWM4>;
+				clock-names = "ipg", "per";
+				#pwm-cells = <2>;
+				status = "disabled";
+			};
+
+			can1: flexcan@02090000 {
+				compatible = "fsl,imx6ul-flexcan", "fsl,imx6q-flexcan";
+				reg = <0x02090000 0x4000>;
+				interrupts = <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&clks IMX6UL_CLK_CAN1_IPG>,
+					 <&clks IMX6UL_CLK_CAN1_SERIAL>;
+				clock-names = "ipg", "per";
+				status = "disabled";
+			};
+
+			can2: flexcan@02094000 {
+				compatible = "fsl,imx6ul-flexcan", "fsl,imx6q-flexcan";
+				reg = <0x02094000 0x4000>;
+				interrupts = <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&clks IMX6UL_CLK_CAN2_IPG>,
+					 <&clks IMX6UL_CLK_CAN2_SERIAL>;
+				clock-names = "ipg", "per";
+				status = "disabled";
 			};
 
 			gpt1: gpt@02098000 {
@@ -317,6 +471,14 @@
 				status = "disabled";
 			};
 
+			kpp: kpp@020b8000 {
+				compatible = "fsl,imx6ul-kpp", "fsl,imx6q-kpp", "fsl,imx21-kpp";
+				reg = <0x020b8000 0x4000>;
+				interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&clks IMX6UL_CLK_KPP>;
+				status = "disabled";
+			};
+
 			wdog1: wdog@020bc000 {
 				compatible = "fsl,imx6ul-wdt", "fsl,imx21-wdt";
 				reg = <0x020bc000 0x4000>;
@@ -487,49 +649,65 @@
 				compatible = "fsl,imx6ul-gpt", "fsl,imx6sx-gpt";
 				reg = <0x020e8000 0x4000>;
 				interrupts = <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>;
-				clocks = <&clks IMX6UL_CLK_DUMMY>,
-					 <&clks IMX6UL_CLK_DUMMY>;
+				clocks = <&clks IMX6UL_CLK_GPT2_BUS>,
+					 <&clks IMX6UL_CLK_GPT2_SERIAL>;
 				clock-names = "ipg", "per";
 			};
 
+			sdma: sdma@020ec000 {
+				compatible = "fsl,imx6ul-sdma", "fsl,imx6q-sdma",
+					     "fsl,imx35-sdma";
+				reg = <0x020ec000 0x4000>;
+				interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&clks IMX6UL_CLK_SDMA>,
+					 <&clks IMX6UL_CLK_SDMA>;
+				clock-names = "ipg", "ahb";
+				#dma-cells = <3>;
+				fsl,sdma-ram-script-name = "imx/sdma/sdma-imx6q.bin";
+			};
+
 			pwm5: pwm@020f0000 {
 				compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
 				reg = <0x020f0000 0x4000>;
 				interrupts = <GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH>;
-				clocks = <&clks IMX6UL_CLK_DUMMY>,
-					 <&clks IMX6UL_CLK_DUMMY>;
+				clocks = <&clks IMX6UL_CLK_PWM5>,
+					 <&clks IMX6UL_CLK_PWM5>;
 				clock-names = "ipg", "per";
 				#pwm-cells = <2>;
+				status = "disabled";
 			};
 
 			pwm6: pwm@020f4000 {
 				compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
 				reg = <0x020f4000 0x4000>;
 				interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>;
-				clocks = <&clks IMX6UL_CLK_DUMMY>,
-					 <&clks IMX6UL_CLK_DUMMY>;
+				clocks = <&clks IMX6UL_CLK_PWM6>,
+					 <&clks IMX6UL_CLK_PWM6>;
 				clock-names = "ipg", "per";
 				#pwm-cells = <2>;
+				status = "disabled";
 			};
 
 			pwm7: pwm@020f8000 {
 				compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
 				reg = <0x020f8000 0x4000>;
 				interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>;
-				clocks = <&clks IMX6UL_CLK_DUMMY>,
-					 <&clks IMX6UL_CLK_DUMMY>;
+				clocks = <&clks IMX6UL_CLK_PWM7>,
+					 <&clks IMX6UL_CLK_PWM7>;
 				clock-names = "ipg", "per";
 				#pwm-cells = <2>;
+				status = "disabled";
 			};
 
 			pwm8: pwm@020fc000 {
 				compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
 				reg = <0x020fc000 0x4000>;
 				interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>;
-				clocks = <&clks IMX6UL_CLK_DUMMY>,
-					 <&clks IMX6UL_CLK_DUMMY>;
+				clocks = <&clks IMX6UL_CLK_PWM8>,
+					 <&clks IMX6UL_CLK_PWM8>;
 				clock-names = "ipg", "per";
 				#pwm-cells = <2>;
+				status = "disabled";
 			};
 		};
 
@@ -590,17 +768,6 @@
 				status = "disabled";
 			};
 
-			tsc: tsc@02040000 {
-				compatible = "fsl,imx6ul-tsc";
-				reg = <0x02040000 0x4000>, <0x0219c000 0x4000>;
-				interrupts = <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
-					     <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
-				clocks = <&clks IMX6UL_CLK_IPG>,
-					 <&clks IMX6UL_CLK_ADC2>;
-				clock-names = "tsc", "adc";
-				status = "disabled";
-			};
-
 			usdhc1: usdhc@02190000 {
 				compatible = "fsl,imx6ul-usdhc", "fsl,imx6sx-usdhc";
 				reg = <0x02190000 0x4000>;
@@ -672,6 +839,17 @@
 				reg = <0x021b0000 0x4000>;
 			};
 
+			lcdif: lcdif@021c8000 {
+				compatible = "fsl,imx6ul-lcdif", "fsl,imx28-lcdif";
+				reg = <0x021c8000 0x4000>;
+				interrupts = <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&clks IMX6UL_CLK_LCDIF_PIX>,
+					 <&clks IMX6UL_CLK_LCDIF_APB>,
+					 <&clks IMX6UL_CLK_DUMMY>;
+				clock-names = "pix", "axi", "disp_axi";
+				status = "disabled";
+			};
+
 			qspi: qspi@021e0000 {
 				#address-cells = <1>;
 				#size-cells = <0>;

diff --git a/arch/arm/boot/dts/imx7d-sbc-imx7.dts b/arch/arm/boot/dts/imx7d-sbc-imx7.dts
index d63c597..f8a8685 100644
--- a/arch/arm/boot/dts/imx7d-sbc-imx7.dts
+++ b/arch/arm/boot/dts/imx7d-sbc-imx7.dts

@@ -22,7 +22,7 @@
 	pinctrl-0 = <&pinctrl_usdhc1>;
 	cd-gpios = <&gpio5 0 GPIO_ACTIVE_LOW>;
 	wp-gpios = <&gpio5 1 GPIO_ACTIVE_HIGH>;
-	enable-sdio-wakeup;
+	wakeup-source;
 	status = "okay";
 };
 

diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts
index b2c4536..b267f79 100644
--- a/arch/arm/boot/dts/imx7d-sdb.dts
+++ b/arch/arm/boot/dts/imx7d-sdb.dts

@@ -296,7 +296,7 @@
 	pinctrl-0 = <&pinctrl_usdhc1>;
 	cd-gpios = <&gpio5 0 GPIO_ACTIVE_LOW>;
 	wp-gpios = <&gpio5 1 GPIO_ACTIVE_HIGH>;
-	enable-sdio-wakeup;
+	wakeup-source;
 	keep-power-in-suspend;
 	status = "okay";
 };

diff --git a/arch/arm/boot/dts/imx7d.dtsi b/arch/arm/boot/dts/imx7d.dtsi
index 25ad309..b5a50e0 100644
--- a/arch/arm/boot/dts/imx7d.dtsi
+++ b/arch/arm/boot/dts/imx7d.dtsi

@@ -119,6 +119,15 @@
 		clock-output-names = "osc";
 	};
 
+	timer {
+		compatible = "arm,armv7-timer";
+		interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+		interrupt-parent = <&intc>;
+	};
+
 	etr@30086000 {
 		compatible = "arm,coresight-tmc", "arm,primecell";
 		reg = <0x30086000 0x1000>;

diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi
index ecf12dc..726372d 100644
--- a/arch/arm/boot/dts/ls1021a.dtsi
+++ b/arch/arm/boot/dts/ls1021a.dtsi

@@ -572,5 +572,49 @@
 			dr_mode = "host";
 			snps,quirk-frame-length-adjustment = <0x20>;
 		};
+
+		pcie@3400000 {
+			compatible = "fsl,ls1021a-pcie", "snps,dw-pcie";
+			reg = <0x00 0x03400000 0x0 0x00010000   /* controller registers */
+			       0x40 0x00000000 0x0 0x00002000>; /* configuration space */
+			reg-names = "regs", "config";
+			interrupts = <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>; /* controller interrupt */
+			fsl,pcie-scfg = <&scfg 0>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			device_type = "pci";
+			num-lanes = <4>;
+			bus-range = <0x0 0xff>;
+			ranges = <0x81000000 0x0 0x00000000 0x40 0x00010000 0x0 0x00010000   /* downstream I/O */
+				  0x82000000 0x0 0x40000000 0x40 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */
+			#interrupt-cells = <1>;
+			interrupt-map-mask = <0 0 0 7>;
+			interrupt-map = <0000 0 0 1 &gic GIC_SPI 91  IRQ_TYPE_LEVEL_HIGH>,
+					<0000 0 0 2 &gic GIC_SPI 188 IRQ_TYPE_LEVEL_HIGH>,
+					<0000 0 0 3 &gic GIC_SPI 190 IRQ_TYPE_LEVEL_HIGH>,
+					<0000 0 0 4 &gic GIC_SPI 192 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		pcie@3500000 {
+			compatible = "fsl,ls1021a-pcie", "snps,dw-pcie";
+			reg = <0x00 0x03500000 0x0 0x00010000   /* controller registers */
+			       0x48 0x00000000 0x0 0x00002000>; /* configuration space */
+			reg-names = "regs", "config";
+			interrupts = <GIC_SPI 178 IRQ_TYPE_LEVEL_HIGH>;
+			fsl,pcie-scfg = <&scfg 1>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			device_type = "pci";
+			num-lanes = <4>;
+			bus-range = <0x0 0xff>;
+			ranges = <0x81000000 0x0 0x00000000 0x48 0x00010000 0x0 0x00010000   /* downstream I/O */
+				  0x82000000 0x0 0x40000000 0x48 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */
+			#interrupt-cells = <1>;
+			interrupt-map-mask = <0 0 0 7>;
+			interrupt-map = <0000 0 0 1 &gic GIC_SPI 92  IRQ_TYPE_LEVEL_HIGH>,
+					<0000 0 0 2 &gic GIC_SPI 189 IRQ_TYPE_LEVEL_HIGH>,
+					<0000 0 0 3 &gic GIC_SPI 191 IRQ_TYPE_LEVEL_HIGH>,
+					<0000 0 0 4 &gic GIC_SPI 193 IRQ_TYPE_LEVEL_HIGH>;
+		};
 	};
 };

diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi
index 0350f79..412809c 100644
--- a/arch/arm/boot/dts/rk3288-veyron.dtsi
+++ b/arch/arm/boot/dts/rk3288-veyron.dtsi

@@ -416,7 +416,7 @@
 	status = "okay";
 
 	assigned-clocks = <&cru SCLK_USBPHY480M_SRC>;
-	assigned-clock-parents = <&cru SCLK_OTGPHY0>;
+	assigned-clock-parents = <&usbphy0>;
 	dr_mode = "host";
 };
 

diff --git a/arch/arm/boot/dts/uniphier-common32.dtsi b/arch/arm/boot/dts/uniphier-common32.dtsi
index ea9301a..61a0955 100644
--- a/arch/arm/boot/dts/uniphier-common32.dtsi
+++ b/arch/arm/boot/dts/uniphier-common32.dtsi

@@ -45,6 +45,13 @@
 /include/ "skeleton.dtsi"
 
 / {
+	clocks {
+		refclk: ref {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+		};
+	};
+
 	soc: soc {
 		compatible = "simple-bus";
 		#address-cells = <1>;
@@ -52,12 +59,6 @@
 		ranges;
 		interrupt-parent = <&intc>;
 
-		extbus: extbus {
-			compatible = "simple-bus";
-			#address-cells = <2>;
-			#size-cells = <1>;
-		};
-
 		serial0: serial@54006800 {
 			compatible = "socionext,uniphier-uart";
 			status = "disabled";
@@ -98,9 +99,17 @@
 			clocks = <&uart_clk>;
 		};
 
-		system-bus-controller@58c00000 {
-			compatible = "socionext,uniphier-system-bus-controller";
-			reg = <0x58c00000 0x400>, <0x59800000 0x2000>;
+		system_bus: system-bus@58c00000 {
+			compatible = "socionext,uniphier-system-bus";
+			status = "disabled";
+			reg = <0x58c00000 0x400>;
+			#address-cells = <2>;
+			#size-cells = <1>;
+		};
+
+		smpctrl@59800000 {
+			compatible = "socionext,uniphier-smpctrl";
+			reg = <0x59801000 0x400>;
 		};
 
 		timer@60000200 {

diff --git a/arch/arm/boot/dts/uniphier-ph1-ld4-ref.dts b/arch/arm/boot/dts/uniphier-ph1-ld4-ref.dts
index f1e9d40..ec94b7a 100644
--- a/arch/arm/boot/dts/uniphier-ph1-ld4-ref.dts
+++ b/arch/arm/boot/dts/uniphier-ph1-ld4-ref.dts

@@ -72,14 +72,6 @@
 	};
 };
 
-&extbus {
-	ranges = <1 0x00000000 0x42000000 0x02000000>;
-};
-
-&support_card {
-	ranges = <0x00000000 1 0x01f00000 0x00100000>;
-};
-
 &ethsc {
 	interrupts = <0 49 4>;
 };

diff --git a/arch/arm/boot/dts/uniphier-ph1-ld4.dtsi b/arch/arm/boot/dts/uniphier-ph1-ld4.dtsi
index 34f0d8d..dadd860 100644
--- a/arch/arm/boot/dts/uniphier-ph1-ld4.dtsi
+++ b/arch/arm/boot/dts/uniphier-ph1-ld4.dtsi

@@ -173,6 +173,10 @@
 
 };
 
+&refclk {
+	clock-frequency = <24576000>;
+};
+
 &serial3 {
 	interrupts = <0 29 4>;
 };

diff --git a/arch/arm/boot/dts/uniphier-ph1-ld6b-ref.dts b/arch/arm/boot/dts/uniphier-ph1-ld6b-ref.dts
index 5baa9fc..b8134c6 100644
--- a/arch/arm/boot/dts/uniphier-ph1-ld6b-ref.dts
+++ b/arch/arm/boot/dts/uniphier-ph1-ld6b-ref.dts

@@ -74,14 +74,6 @@
 	};
 };
 
-&extbus {
-	ranges = <1 0x00000000 0x42000000 0x02000000>;
-};
-
-&support_card {
-	ranges = <0x00000000 1 0x01f00000 0x00100000>;
-};
-
 &ethsc {
 	interrupts = <0 52 4>;
 };

diff --git a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld10-ref.dts b/arch/arm/boot/dts/uniphier-ph1-pro4-ace.dts
similarity index 80%
copy from arch/arm64/boot/dts/socionext/uniphier-ph1-ld10-ref.dts
copy to arch/arm/boot/dts/uniphier-ph1-pro4-ace.dts
index 3e53317..d343586 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld10-ref.dts
+++ b/arch/arm/boot/dts/uniphier-ph1-pro4-ace.dts

@@ -1,7 +1,7 @@
 /*
- * Device Tree Source for UniPhier PH1-LD10 Reference Board
+ * Device Tree Source for UniPhier PH1-Pro4 Ace Board
  *
- * Copyright (C) 2015 Masahiro Yamada <yamada.masahiro@socionext.com>
+ * Copyright (C) 2016 Masahiro Yamada <yamada.masahiro@socionext.com>
  *
  * This file is dual-licensed: you can use it either under the terms
  * of the GPL or the X11 license, at your option. Note that this dual
@@ -43,16 +43,15 @@
  */
 
 /dts-v1/;
-/include/ "uniphier-ph1-ld10.dtsi"
-/include/ "uniphier-support-card.dtsi"
+/include/ "uniphier-ph1-pro4.dtsi"
 
 / {
-	model = "UniPhier PH1-LD10 Reference Board";
-	compatible = "socionext,ph1-ld10-ref", "socionext,ph1-ld10";
+	model = "UniPhier PH1-Pro4 Ace Board";
+	compatible = "socionext,ph1-pro4-ace", "socionext,ph1-pro4";
 
 	memory {
 		device_type = "memory";
-		reg = <0 0x80000000 0 0xc0000000>;
+		reg = <0x80000000 0x40000000>;
 	};
 
 	chosen {
@@ -63,33 +62,52 @@
 		serial0 = &serial0;
 		serial1 = &serial1;
 		serial2 = &serial2;
-		serial3 = &serial3;
 		i2c0 = &i2c0;
 		i2c1 = &i2c1;
 		i2c2 = &i2c2;
 		i2c3 = &i2c3;
-		i2c4 = &i2c4;
 		i2c5 = &i2c5;
 		i2c6 = &i2c6;
 	};
 };
 
-&extbus {
-	ranges = <1 0x00000000 0x42000000 0x02000000>;
-};
-
-&support_card {
-	ranges = <0x00000000 1 0x01f00000 0x00100000>;
-};
-
-&ethsc {
-	interrupts = <0 48 4>;
-};
-
 &serial0 {
 	status = "okay";
 };
 
+&serial1 {
+	status = "okay";
+};
+
+&serial2 {
+	status = "okay";
+};
+
 &i2c0 {
 	status = "okay";
+
+	eeprom@54 {
+		compatible = "st,24c64";
+		reg = <0x54>;
+	};
+};
+
+&i2c1 {
+	status = "okay";
+};
+
+&i2c2 {
+	status = "okay";
+};
+
+&i2c3 {
+	status = "okay";
+};
+
+&usb2 {
+	status = "okay";
+};
+
+&usb3 {
+	status = "okay";
 };

diff --git a/arch/arm/boot/dts/uniphier-ph1-pro4-ref.dts b/arch/arm/boot/dts/uniphier-ph1-pro4-ref.dts
index 2462668..95f631a 100644
--- a/arch/arm/boot/dts/uniphier-ph1-pro4-ref.dts
+++ b/arch/arm/boot/dts/uniphier-ph1-pro4-ref.dts

@@ -74,14 +74,6 @@
 	};
 };
 
-&extbus {
-	ranges = <1 0x00000000 0x42000000 0x02000000>;
-};
-
-&support_card {
-	ranges = <0x00000000 1 0x01f00000 0x00100000>;
-};
-
 &ethsc {
 	interrupts = <0 50 4>;
 };

diff --git a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld10-ref.dts b/arch/arm/boot/dts/uniphier-ph1-pro4-sanji.dts
similarity index 79%
copy from arch/arm64/boot/dts/socionext/uniphier-ph1-ld10-ref.dts
copy to arch/arm/boot/dts/uniphier-ph1-pro4-sanji.dts
index 3e53317..7c3a1fc 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld10-ref.dts
+++ b/arch/arm/boot/dts/uniphier-ph1-pro4-sanji.dts

@@ -1,7 +1,7 @@
 /*
- * Device Tree Source for UniPhier PH1-LD10 Reference Board
+ * Device Tree Source for UniPhier PH1-Pro4 Sanji Board
  *
- * Copyright (C) 2015 Masahiro Yamada <yamada.masahiro@socionext.com>
+ * Copyright (C) 2016 Masahiro Yamada <yamada.masahiro@socionext.com>
  *
  * This file is dual-licensed: you can use it either under the terms
  * of the GPL or the X11 license, at your option. Note that this dual
@@ -43,16 +43,15 @@
  */
 
 /dts-v1/;
-/include/ "uniphier-ph1-ld10.dtsi"
-/include/ "uniphier-support-card.dtsi"
+/include/ "uniphier-ph1-pro4.dtsi"
 
 / {
-	model = "UniPhier PH1-LD10 Reference Board";
-	compatible = "socionext,ph1-ld10-ref", "socionext,ph1-ld10";
+	model = "UniPhier PH1-Pro4 Sanji Board";
+	compatible = "socionext,ph1-pro4-sanji", "socionext,ph1-pro4";
 
 	memory {
 		device_type = "memory";
-		reg = <0 0x80000000 0 0xc0000000>;
+		reg = <0x80000000 0x80000000>;
 	};
 
 	chosen {
@@ -62,34 +61,48 @@
 	aliases {
 		serial0 = &serial0;
 		serial1 = &serial1;
-		serial2 = &serial2;
-		serial3 = &serial3;
 		i2c0 = &i2c0;
 		i2c1 = &i2c1;
 		i2c2 = &i2c2;
 		i2c3 = &i2c3;
-		i2c4 = &i2c4;
 		i2c5 = &i2c5;
 		i2c6 = &i2c6;
 	};
 };
 
-&extbus {
-	ranges = <1 0x00000000 0x42000000 0x02000000>;
-};
-
-&support_card {
-	ranges = <0x00000000 1 0x01f00000 0x00100000>;
-};
-
-&ethsc {
-	interrupts = <0 48 4>;
-};
-
 &serial0 {
 	status = "okay";
 };
 
+&serial1 {
+	status = "okay";
+};
+
 &i2c0 {
 	status = "okay";
+
+	eeprom@54 {
+		compatible = "st,24c64";
+		reg = <0x54>;
+	};
+};
+
+&i2c1 {
+	status = "okay";
+};
+
+&i2c2 {
+	status = "okay";
+};
+
+&i2c3 {
+	status = "okay";
+};
+
+&usb2 {
+	status = "okay";
+};
+
+&usb3 {
+	status = "okay";
 };

diff --git a/arch/arm/boot/dts/uniphier-ph1-pro4.dtsi b/arch/arm/boot/dts/uniphier-ph1-pro4.dtsi
index d78142f..20f3f2a 100644
--- a/arch/arm/boot/dts/uniphier-ph1-pro4.dtsi
+++ b/arch/arm/boot/dts/uniphier-ph1-pro4.dtsi

@@ -195,6 +195,10 @@
 	};
 };
 
+&refclk {
+	clock-frequency = <25000000>;
+};
+
 &pinctrl {
 	compatible = "socionext,ph1-pro4-pinctrl", "syscon";
 };

diff --git a/arch/arm/boot/dts/uniphier-ph1-pro5.dtsi b/arch/arm/boot/dts/uniphier-ph1-pro5.dtsi
index 2f389ea..24f6f66 100644
--- a/arch/arm/boot/dts/uniphier-ph1-pro5.dtsi
+++ b/arch/arm/boot/dts/uniphier-ph1-pro5.dtsi

@@ -189,6 +189,10 @@
 	};
 };
 
+&refclk {
+	clock-frequency = <20000000>;
+};
+
 &pinctrl {
 	compatible = "socionext,ph1-pro5-pinctrl", "syscon";
 };

diff --git a/arch/arm/boot/dts/uniphier-ph1-sld3-ref.dts b/arch/arm/boot/dts/uniphier-ph1-sld3-ref.dts
index b7a03215..acb4204 100644
--- a/arch/arm/boot/dts/uniphier-ph1-sld3-ref.dts
+++ b/arch/arm/boot/dts/uniphier-ph1-sld3-ref.dts

@@ -73,14 +73,6 @@
 	};
 };
 
-&extbus {
-	ranges = <1 0x00000000 0x42000000 0x02000000>;
-};
-
-&support_card {
-	ranges = <0x00000000 1 0x01f00000 0x00100000>;
-};
-
 &ethsc {
 	interrupts = <0 49 4>;
 };

diff --git a/arch/arm/boot/dts/uniphier-ph1-sld3.dtsi b/arch/arm/boot/dts/uniphier-ph1-sld3.dtsi
index 691a17d..03292f4 100644
--- a/arch/arm/boot/dts/uniphier-ph1-sld3.dtsi
+++ b/arch/arm/boot/dts/uniphier-ph1-sld3.dtsi

@@ -68,6 +68,12 @@
 	};
 
 	clocks {
+		refclk: ref {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <24576000>;
+		};
+
 		arm_timer_clk: arm_timer_clk {
 			#clock-cells = <0>;
 			compatible = "fixed-clock";
@@ -94,12 +100,6 @@
 		ranges;
 		interrupt-parent = <&intc>;
 
-		extbus: extbus {
-			compatible = "simple-bus";
-			#address-cells = <2>;
-			#size-cells = <1>;
-		};
-
 		timer@20000200 {
 			compatible = "arm,cortex-a9-global-timer";
 			reg = <0x20000200 0x20>;
@@ -216,9 +216,17 @@
 			clock-frequency = <400000>;
 		};
 
-		system-bus-controller@58c00000 {
-			compatible = "socionext,uniphier-system-bus-controller";
-			reg = <0x58c00000 0x400>, <0x59800000 0x2000>;
+		system_bus: system-bus@58c00000 {
+			compatible = "socionext,uniphier-system-bus";
+			status = "disabled";
+			reg = <0x58c00000 0x400>;
+			#address-cells = <2>;
+			#size-cells = <1>;
+		};
+
+		smpctrl@59800000 {
+			compatible = "socionext,uniphier-smpctrl";
+			reg = <0x59801000 0x400>;
 		};
 
 		usb0: usb@5a800100 {

diff --git a/arch/arm/boot/dts/uniphier-ph1-sld8-ref.dts b/arch/arm/boot/dts/uniphier-ph1-sld8-ref.dts
index fc7250c..d594f40 100644
--- a/arch/arm/boot/dts/uniphier-ph1-sld8-ref.dts
+++ b/arch/arm/boot/dts/uniphier-ph1-sld8-ref.dts

@@ -72,14 +72,6 @@
 	};
 };
 
-&extbus {
-	ranges = <1 0x00000000 0x42000000 0x02000000>;
-};
-
-&support_card {
-	ranges = <0x00000000 1 0x01f00000 0x00100000>;
-};
-
 &ethsc {
 	interrupts = <0 48 4>;
 };

diff --git a/arch/arm/boot/dts/uniphier-ph1-sld8.dtsi b/arch/arm/boot/dts/uniphier-ph1-sld8.dtsi
index 7d06a1c..6bfd29a 100644
--- a/arch/arm/boot/dts/uniphier-ph1-sld8.dtsi
+++ b/arch/arm/boot/dts/uniphier-ph1-sld8.dtsi

@@ -172,6 +172,10 @@
 	};
 };
 
+&refclk {
+	clock-frequency = <25000000>;
+};
+
 &serial3 {
 	interrupts = <0 29 4>;
 };

diff --git a/arch/arm/boot/dts/uniphier-pinctrl.dtsi b/arch/arm/boot/dts/uniphier-pinctrl.dtsi
index f67445f..2459279 100644
--- a/arch/arm/boot/dts/uniphier-pinctrl.dtsi
+++ b/arch/arm/boot/dts/uniphier-pinctrl.dtsi

@@ -63,6 +63,11 @@
 		function = "i2c3";
 	};
 
+	pinctrl_i2c4: i2c4_grp {
+		groups = "i2c4";
+		function = "i2c4";
+	};
+
 	pinctrl_uart0: uart0_grp {
 		groups = "uart0";
 		function = "uart0";

diff --git a/arch/arm/boot/dts/uniphier-proxstream2-gentil.dts b/arch/arm/boot/dts/uniphier-proxstream2-gentil.dts
index 9d7ec5c..bf2619e 100644
--- a/arch/arm/boot/dts/uniphier-proxstream2-gentil.dts
+++ b/arch/arm/boot/dts/uniphier-proxstream2-gentil.dts

@@ -63,6 +63,7 @@
 		serial1 = &serial1;
 		serial2 = &serial2;
 		i2c0 = &i2c0;
+		i2c2 = &i2c2;
 		i2c4 = &i2c4;
 		i2c5 = &i2c5;
 		i2c6 = &i2c6;
@@ -75,4 +76,13 @@
 
 &i2c0 {
 	status = "okay";
+
+	eeprom@54 {
+		compatible = "st,24c64";
+		reg = <0x54>;
+	};
+};
+
+&i2c2 {
+	status = "okay";
 };

diff --git a/arch/arm/boot/dts/uniphier-proxstream2.dtsi b/arch/arm/boot/dts/uniphier-proxstream2.dtsi
index 6bd353f..4ac484c 100644
--- a/arch/arm/boot/dts/uniphier-proxstream2.dtsi
+++ b/arch/arm/boot/dts/uniphier-proxstream2.dtsi

@@ -200,6 +200,10 @@
 	};
 };
 
+&refclk {
+	clock-frequency = <25000000>;
+};
+
 &pinctrl {
 	compatible = "socionext,proxstream2-pinctrl", "syscon";
 };

diff --git a/arch/arm/boot/dts/uniphier-ref-daughter.dtsi b/arch/arm/boot/dts/uniphier-ref-daughter.dtsi
index 3d29d28..f7df088 100644
--- a/arch/arm/boot/dts/uniphier-ref-daughter.dtsi
+++ b/arch/arm/boot/dts/uniphier-ref-daughter.dtsi

@@ -43,7 +43,7 @@
  */
 
 &i2c0 {
-	eeprom {
+	eeprom@50 {
 		compatible = "microchip,24lc128";
 		reg = <0x50>;
 	};

diff --git a/arch/arm/boot/dts/uniphier-support-card.dtsi b/arch/arm/boot/dts/uniphier-support-card.dtsi
index da271e3..51ecc9b9 100644
--- a/arch/arm/boot/dts/uniphier-support-card.dtsi
+++ b/arch/arm/boot/dts/uniphier-support-card.dtsi

@@ -42,11 +42,15 @@
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
-&extbus {
+&system_bus {
+	status = "okay";
+	ranges = <1 0x00000000 0x42000000 0x02000000>;
+
 	support_card: support_card {
 		compatible = "simple-bus";
 		#address-cells = <1>;
 		#size-cells = <1>;
+		ranges = <0x00000000 1 0x01f00000 0x00100000>;
 
 		ethsc: ethernet@00000000 {
 			compatible = "smsc,lan9118", "smsc,lan9115";

diff --git a/arch/arm/boot/dts/vf-colibri-eval-v3.dtsi b/arch/arm/boot/dts/vf-colibri-eval-v3.dtsi
index ed65e0f..4d8b7f6 100644
--- a/arch/arm/boot/dts/vf-colibri-eval-v3.dtsi
+++ b/arch/arm/boot/dts/vf-colibri-eval-v3.dtsi

@@ -1,10 +1,42 @@
 /*
  * Copyright 2014 Toradex AG
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     version 2 as published by the Free Software Foundation.
+ *
+ *     This file is distributed in the hope that it will be useful
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
 / {
@@ -18,38 +50,36 @@
 		clock-frequency = <16000000>;
 	};
 
-	regulators {
-		compatible = "simple-bus";
-		#address-cells = <1>;
-		#size-cells = <0>;
+	reg_3v3: regulator-3v3 {
+		compatible = "regulator-fixed";
+		regulator-name = "3.3V";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+	};
 
-		sys_5v0_reg: regulator@0 {
-			compatible = "regulator-fixed";
-			reg = <0>;
-			regulator-name = "5v0";
-			regulator-min-microvolt = <5000000>;
-			regulator-max-microvolt = <5000000>;
-			regulator-always-on;
-		};
+	reg_5v0: regulator-5v0 {
+		compatible = "regulator-fixed";
+		regulator-name = "5V";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+	};
 
-		/* USBH_PEN */
-		usbh_vbus_reg: regulator@1 {
-			compatible = "regulator-fixed";
-			pinctrl-names = "default";
-			pinctrl-0 = <&pinctrl_usbh1_reg>;
-			reg = <1>;
-			regulator-name = "usbh_vbus";
-			regulator-min-microvolt = <5000000>;
-			regulator-max-microvolt = <5000000>;
-			gpio = <&gpio2 19 GPIO_ACTIVE_LOW>;
-			vin-supply = <&sys_5v0_reg>;
-		};
+	reg_usbh_vbus: regulator-usbh-vbus {
+		compatible = "regulator-fixed";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_usbh1_reg>;
+		regulator-name = "VCC_USB[1-4]";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		gpio = <&gpio2 19 GPIO_ACTIVE_LOW>; /* USBH_PEN resp. USBH_P_EN */
+		vin-supply = <&reg_5v0>;
 	};
 };
 
 &bl {
 	brightness-levels = <0 4 8 16 32 64 128 255>;
 	default-brightness-level = <6>;
+	power-supply = <&reg_3v3>;
 	status  = "okay";
 };
 
@@ -100,6 +130,10 @@
 	status = "okay";
 };
 
+&reg_module_3v3 {
+	vin-supply = <&reg_3v3>;
+};
+
 &uart0 {
 	status = "okay";
 };
@@ -113,7 +147,7 @@
 };
 
 &usbh1 {
-	vbus-supply = <&usbh_vbus_reg>;
+	vbus-supply = <&reg_usbh_vbus>;
 };
 
 &iomuxc {

diff --git a/arch/arm/boot/dts/vf-colibri.dtsi b/arch/arm/boot/dts/vf-colibri.dtsi
index 6e556be..fda7f28 100644
--- a/arch/arm/boot/dts/vf-colibri.dtsi
+++ b/arch/arm/boot/dts/vf-colibri.dtsi

@@ -1,26 +1,77 @@
 /*
  * Copyright 2014 Toradex AG
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     version 2 as published by the Free Software Foundation.
+ *
+ *     This file is distributed in the hope that it will be useful
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
 / {
 	bl: backlight {
 		compatible = "pwm-backlight";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_gpio_bl_on>;
 		pwms = <&pwm0 0 5000000 0>;
+		enable-gpios = <&gpio1 13 GPIO_ACTIVE_HIGH>;
 		status = "disabled";
 	};
+
+	reg_module_3v3: regulator-module-3v3 {
+		compatible = "regulator-fixed";
+		regulator-name = "+V3.3";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+	};
+
+	reg_module_3v3_avdd: regulator-module-3v3-avdd {
+		compatible = "regulator-fixed";
+		regulator-name = "+V3.3_AVDD_AUDIO";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+	};
 };
 
 &adc0 {
 	status = "okay";
+	vref-supply = <&reg_module_3v3_avdd>;
 };
 
 &adc1 {
 	status = "okay";
+	vref-supply = <&reg_module_3v3_avdd>;
 };
 
 &can0 {
@@ -35,6 +86,13 @@
 	status = "disabled";
 };
 
+&clks {
+	assigned-clocks = <&clks VF610_CLK_ENET_SEL>,
+			  <&clks VF610_CLK_ENET_TS_SEL>;
+	assigned-clock-parents = <&clks VF610_CLK_ENET_50M>,
+				 <&clks VF610_CLK_ENET_50M>;
+};
+
 &dspi1 {
 	bus-num = <1>;
 	pinctrl-names = "default";
@@ -50,10 +108,12 @@
 	pinctrl-0 = <&pinctrl_esdhc1>;
 	bus-width = <4>;
 	cd-gpios = <&gpio1 10 GPIO_ACTIVE_LOW>;
+	disable-wp;
 };
 
 &fec1 {
 	phy-mode = "rmii";
+	phy-supply = <&reg_module_3v3>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_fec1>;
 };
@@ -195,6 +255,12 @@
 			>;
 		};
 
+		pinctrl_gpio_bl_on: gpio_bl_on {
+			fsl,pins = <
+				VF610_PAD_PTC0__GPIO_45		0x22ef
+			>;
+		};
+
 		pinctrl_i2c0: i2c0grp {
 			fsl,pins = <
 				VF610_PAD_PTB14__I2C0_SCL		0x37ff
@@ -239,6 +305,8 @@
 			fsl,pins = <
 				VF610_PAD_PTB10__UART0_TX		0x21a2
 				VF610_PAD_PTB11__UART0_RX		0x21a1
+				VF610_PAD_PTB12__UART0_RTS		0x21a2
+				VF610_PAD_PTB13__UART0_CTS		0x21a1
 			>;
 		};
 

diff --git a/arch/arm/boot/dts/vf500-colibri-eval-v3.dts b/arch/arm/boot/dts/vf500-colibri-eval-v3.dts
index c3173fc..b3aeab5 100644
--- a/arch/arm/boot/dts/vf500-colibri-eval-v3.dts
+++ b/arch/arm/boot/dts/vf500-colibri-eval-v3.dts

@@ -1,10 +1,42 @@
 /*
  * Copyright 2014 Toradex AG
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     version 2 as published by the Free Software Foundation.
+ *
+ *     This file is distributed in the hope that it will be useful
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
 /dts-v1/;

diff --git a/arch/arm/boot/dts/vf500-colibri.dtsi b/arch/arm/boot/dts/vf500-colibri.dtsi
index 84f091d..3fe1f48 100644
--- a/arch/arm/boot/dts/vf500-colibri.dtsi
+++ b/arch/arm/boot/dts/vf500-colibri.dtsi

@@ -1,10 +1,42 @@
 /*
  * Copyright 2014 Toradex AG
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     version 2 as published by the Free Software Foundation.
+ *
+ *     This file is distributed in the hope that it will be useful
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include "vf500.dtsi"

diff --git a/arch/arm/boot/dts/vf500.dtsi b/arch/arm/boot/dts/vf500.dtsi
index e976d2f..9d37272 100644
--- a/arch/arm/boot/dts/vf500.dtsi
+++ b/arch/arm/boot/dts/vf500.dtsi

@@ -1,10 +1,42 @@
 /*
  * Copyright 2013 Freescale Semiconductor, Inc.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     version 2 as published by the Free Software Foundation.
+ *
+ *     This file is distributed in the hope that it will be useful
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include "skeleton.dtsi"
@@ -43,6 +75,15 @@
 				clocks = <&clks VF610_CLK_PLATFORM_BUS>;
 			};
 		};
+
+		aips-bus@40080000 {
+			pmu@40089000 {
+				compatible = "arm,cortex-a5-pmu";
+				interrupts = <7 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-affinity = <&a5_cpu>;
+			};
+		};
+
 	};
 };
 

diff --git a/arch/arm/boot/dts/vf610-colibri-eval-v3.dts b/arch/arm/boot/dts/vf610-colibri-eval-v3.dts
index 10ebe99..dbca4f8 100644
--- a/arch/arm/boot/dts/vf610-colibri-eval-v3.dts
+++ b/arch/arm/boot/dts/vf610-colibri-eval-v3.dts

@@ -1,10 +1,42 @@
 /*
  * Copyright 2014 Toradex AG
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     version 2 as published by the Free Software Foundation.
+ *
+ *     This file is distributed in the hope that it will be useful
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
 /dts-v1/;
@@ -14,4 +46,4 @@
 / {
 	model = "Toradex Colibri VF61 on Colibri Evaluation Board";
 	compatible = "toradex,vf610-colibri_vf61-on-eval", "toradex,vf610-colibri_vf61", "fsl,vf610";
-};
\ No newline at end of file
+};

diff --git a/arch/arm/boot/dts/vf610-colibri.dtsi b/arch/arm/boot/dts/vf610-colibri.dtsi
index 2d7eab7..ab4a29f 100644
--- a/arch/arm/boot/dts/vf610-colibri.dtsi
+++ b/arch/arm/boot/dts/vf610-colibri.dtsi

@@ -1,10 +1,42 @@
 /*
  * Copyright 2014 Toradex AG
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     version 2 as published by the Free Software Foundation.
+ *
+ *     This file is distributed in the hope that it will be useful
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include "vf610.dtsi"

diff --git a/arch/arm/boot/dts/vf610-twr.dts b/arch/arm/boot/dts/vf610-twr.dts
index 5438ee4..cdc1007 100644
--- a/arch/arm/boot/dts/vf610-twr.dts
+++ b/arch/arm/boot/dts/vf610-twr.dts

@@ -1,10 +1,42 @@
 /*
  * Copyright 2013 Freescale Semiconductor, Inc.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     version 2 as published by the Free Software Foundation.
+ *
+ *     This file is distributed in the hope that it will be useful
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
 /dts-v1/;
@@ -96,6 +128,10 @@
 &clks {
 	clocks = <&sxosc>, <&fxosc>, <&enet_ext>, <&audio_ext>;
 	clock-names = "sxosc", "fxosc", "enet_ext", "audio_ext";
+	assigned-clocks = <&clks VF610_CLK_ENET_SEL>,
+			  <&clks VF610_CLK_ENET_TS_SEL>;
+	assigned-clock-parents = <&clks VF610_CLK_ENET_EXT>,
+				 <&clks VF610_CLK_ENET_EXT>;
 };
 
 &dspi0 {

diff --git a/arch/arm/boot/dts/vf610.dtsi b/arch/arm/boot/dts/vf610.dtsi
index 58bc6e4..0cfc060 100644
--- a/arch/arm/boot/dts/vf610.dtsi
+++ b/arch/arm/boot/dts/vf610.dtsi

@@ -1,10 +1,42 @@
 /*
  * Copyright 2013 Freescale Semiconductor, Inc.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     version 2 as published by the Free Software Foundation.
+ *
+ *     This file is distributed in the hope that it will be useful
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include "vf500.dtsi"

diff --git a/arch/arm/boot/dts/vfxxx.dtsi b/arch/arm/boot/dts/vfxxx.dtsi
index 4539f8d..5c09754 100644
--- a/arch/arm/boot/dts/vfxxx.dtsi
+++ b/arch/arm/boot/dts/vfxxx.dtsi

@@ -1,10 +1,42 @@
 /*
  * Copyright 2013 Freescale Semiconductor, Inc.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     version 2 as published by the Free Software Foundation.
+ *
+ *     This file is distributed in the hope that it will be useful
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED , WITHOUT WARRANTY OF ANY KIND
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include "vf610-pinfunc.h"
@@ -16,6 +48,8 @@
 	aliases {
 		can0 = &can0;
 		can1 = &can1;
+		ethernet0 = &fec0;
+		ethernet1 = &fec1;
 		serial0 = &uart0;
 		serial1 = &uart1;
 		serial2 = &uart2;
@@ -174,6 +208,34 @@
 				status = "disabled";
 			};
 
+			sai0: sai@4002f000 {
+				compatible = "fsl,vf610-sai";
+				reg = <0x4002f000 0x1000>;
+				interrupts = <84 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&clks VF610_CLK_SAI0>,
+					<&clks VF610_CLK_SAI0_DIV>,
+					<&clks 0>, <&clks 0>;
+				clock-names = "bus", "mclk1", "mclk2", "mclk3";
+				dma-names = "tx", "rx";
+				dmas = <&edma0 0 17>,
+					<&edma0 0 16>;
+				status = "disabled";
+			};
+
+			sai1: sai@40030000 {
+				compatible = "fsl,vf610-sai";
+				reg = <0x40030000 0x1000>;
+				interrupts = <85 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&clks VF610_CLK_SAI1>,
+					<&clks VF610_CLK_SAI1_DIV>,
+					<&clks 0>, <&clks 0>;
+				clock-names = "bus", "mclk1", "mclk2", "mclk3";
+				dma-names = "tx", "rx";
+				dmas = <&edma0 0 19>,
+					<&edma0 0 18>;
+				status = "disabled";
+			};
+
 			sai2: sai@40031000 {
 				compatible = "fsl,vf610-sai";
 				reg = <0x40031000 0x1000>;
@@ -188,6 +250,20 @@
 				status = "disabled";
 			};
 
+			sai3: sai@40032000 {
+				compatible = "fsl,vf610-sai";
+				reg = <0x40032000 0x1000>;
+				interrupts = <87 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&clks VF610_CLK_SAI3>,
+					<&clks VF610_CLK_SAI3_DIV>,
+					<&clks 0>, <&clks 0>;
+				clock-names = "bus", "mclk1", "mclk2", "mclk3";
+				dma-names = "tx", "rx";
+				dmas = <&edma0 1 9>,
+					<&edma0 1 8>;
+				status = "disabled";
+			};
+
 			pit: pit@40037000 {
 				compatible = "fsl,vf610-pit";
 				reg = <0x40037000 0x1000>;
@@ -558,6 +634,24 @@
 				status = "disabled";
 			};
 
+			dac0: dac@400cc000 {
+				compatible = "fsl,vf610-dac";
+				reg = <0x400cc000 1000>;
+				interrupts = <55 IRQ_TYPE_LEVEL_HIGH>;
+				clock-names = "dac";
+				clocks = <&clks VF610_CLK_DAC0>;
+				status = "disabled";
+			};
+
+			dac1: dac@400cd000 {
+				compatible = "fsl,vf610-dac";
+				reg = <0x400cd000 1000>;
+				interrupts = <56 IRQ_TYPE_LEVEL_HIGH>;
+				clock-names = "dac";
+				clocks = <&clks VF610_CLK_DAC1>;
+				status = "disabled";
+			};
+
 			fec0: ethernet@400d0000 {
 				compatible = "fsl,mvf600-fec";
 				reg = <0x400d0000 0x1000>;

diff --git a/arch/arm/include/asm/exception.h b/arch/arm/include/asm/exception.h
index 5abaf5b..bf19912 100644
--- a/arch/arm/include/asm/exception.h
+++ b/arch/arm/include/asm/exception.h

@@ -7,7 +7,7 @@
 #ifndef __ASM_ARM_EXCEPTION_H
 #define __ASM_ARM_EXCEPTION_H
 
-#include <linux/ftrace.h>
+#include <linux/interrupt.h>
 
 #define __exception	__attribute__((section(".exception.text")))
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER

diff --git a/arch/arm/include/asm/page-nommu.h b/arch/arm/include/asm/page-nommu.h
index d1b162a..503f488 100644
--- a/arch/arm/include/asm/page-nommu.h
+++ b/arch/arm/include/asm/page-nommu.h

@@ -17,9 +17,6 @@
 #define KTHREAD_SIZE PAGE_SIZE
 #endif
  
-#define get_user_page(vaddr)		__get_free_page(GFP_KERNEL)
-#define free_user_page(page, addr)	free_page(addr)
-
 #define clear_page(page)	memset((page), 0, PAGE_SIZE)
 #define copy_page(to,from)	memcpy((to), (from), PAGE_SIZE)
 

diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 1fab979..e2c6da0 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S

@@ -108,6 +108,7 @@
 			*(.exception.text)
 			__exception_text_end = .;
 			IRQENTRY_TEXT
+			SOFTIRQENTRY_TEXT
 			TEXT_TEXT
 			SCHED_TEXT
 			LOCK_TEXT

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 3e0fb66..b538431 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c

@@ -373,7 +373,9 @@
 
 void force_vm_exit(const cpumask_t *mask)
 {
+	preempt_disable();
 	smp_call_function_many(mask, exit_vm_noop, NULL, true);
+	preempt_enable();
 }
 
 /**
@@ -1059,15 +1061,27 @@
 	kvm_arm_init_debug();
 }
 
+static void cpu_hyp_reinit(void)
+{
+	if (is_kernel_in_hyp_mode()) {
+		/*
+		 * cpu_init_stage2() is safe to call even if the PM
+		 * event was cancelled before the CPU was reset.
+		 */
+		cpu_init_stage2(NULL);
+	} else {
+		if (__hyp_get_vectors() == hyp_default_vectors)
+			cpu_init_hyp_mode(NULL);
+	}
+}
+
 static int hyp_init_cpu_notify(struct notifier_block *self,
 			       unsigned long action, void *cpu)
 {
 	switch (action) {
 	case CPU_STARTING:
 	case CPU_STARTING_FROZEN:
-		if (__hyp_get_vectors() == hyp_default_vectors)
-			cpu_init_hyp_mode(NULL);
-		break;
+		cpu_hyp_reinit();
 	}
 
 	return NOTIFY_OK;
@@ -1082,9 +1096,8 @@
 				    unsigned long cmd,
 				    void *v)
 {
-	if (cmd == CPU_PM_EXIT &&
-	    __hyp_get_vectors() == hyp_default_vectors) {
-		cpu_init_hyp_mode(NULL);
+	if (cmd == CPU_PM_EXIT) {
+		cpu_hyp_reinit();
 		return NOTIFY_OK;
 	}
 
@@ -1126,6 +1139,22 @@
 	int err;
 
 	/*
+	 * Register CPU Hotplug notifier
+	 */
+	cpu_notifier_register_begin();
+	err = __register_cpu_notifier(&hyp_init_cpu_nb);
+	cpu_notifier_register_done();
+	if (err) {
+		kvm_err("Cannot register KVM init CPU notifier (%d)\n", err);
+		return err;
+	}
+
+	/*
+	 * Register CPU lower-power notifier
+	 */
+	hyp_cpu_pm_init();
+
+	/*
 	 * Init HYP view of VGIC
 	 */
 	err = kvm_vgic_hyp_init();
@@ -1268,19 +1297,6 @@
 	free_boot_hyp_pgd();
 #endif
 
-	cpu_notifier_register_begin();
-
-	err = __register_cpu_notifier(&hyp_init_cpu_nb);
-
-	cpu_notifier_register_done();
-
-	if (err) {
-		kvm_err("Cannot register HYP init CPU notifier (%d)\n", err);
-		goto out_err;
-	}
-
-	hyp_cpu_pm_init();
-
 	/* set size of VMID supported by CPU */
 	kvm_vmid_bits = kvm_get_vmid_bits();
 	kvm_info("%d-bit VMID\n", kvm_vmid_bits);

diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig
index b003e3a..348044e 100644
--- a/arch/arm/mach-mvebu/Kconfig
+++ b/arch/arm/mach-mvebu/Kconfig

@@ -32,6 +32,7 @@
 	select CPU_PJ4B
 	select MACH_MVEBU_V7
 	select PINCTRL_ARMADA_370
+	select MVEBU_CLK_COREDIV
 	help
 	  Say 'Y' here if you want your kernel to support boards based
 	  on the Marvell Armada 370 SoC with device tree.
@@ -49,6 +50,7 @@
 	select HAVE_SMP
 	select MACH_MVEBU_V7
 	select PINCTRL_ARMADA_375
+	select MVEBU_CLK_COREDIV
 	help
 	  Say 'Y' here if you want your kernel to support boards based
 	  on the Marvell Armada 375 SoC with device tree.
@@ -66,6 +68,7 @@
 	select HAVE_SMP
 	select MACH_MVEBU_V7
 	select PINCTRL_ARMADA_38X
+	select MVEBU_CLK_COREDIV
 	help
 	  Say 'Y' here if you want your kernel to support boards based
 	  on the Marvell Armada 380/385 SoC with device tree.

diff --git a/arch/arm/mach-shmobile/pm-rmobile.c b/arch/arm/mach-shmobile/pm-rmobile.c
index 46d0a1d..c0b05e9 100644
--- a/arch/arm/mach-shmobile/pm-rmobile.c
+++ b/arch/arm/mach-shmobile/pm-rmobile.c

@@ -12,7 +12,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/console.h>
 #include <linux/delay.h>
 #include <linux/of.h>

diff --git a/arch/arm/mach-shmobile/setup-r8a7778.c b/arch/arm/mach-shmobile/setup-r8a7778.c
index fab95d1..cf236db 100644
--- a/arch/arm/mach-shmobile/setup-r8a7778.c
+++ b/arch/arm/mach-shmobile/setup-r8a7778.c

@@ -15,7 +15,7 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/io.h>
 #include <linux/irqchip.h>
 

diff --git a/arch/arm/mach-shmobile/setup-r8a7779.c b/arch/arm/mach-shmobile/setup-r8a7779.c
index 1e572a9..0007ff5 100644
--- a/arch/arm/mach-shmobile/setup-r8a7779.c
+++ b/arch/arm/mach-shmobile/setup-r8a7779.c

@@ -14,7 +14,7 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  */
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/clocksource.h>
 #include <linux/init.h>
 #include <linux/irq.h>

diff --git a/arch/arm/mach-shmobile/setup-rcar-gen2.c b/arch/arm/mach-shmobile/setup-rcar-gen2.c
index 6d0ebdfb0..1c6fd11 100644
--- a/arch/arm/mach-shmobile/setup-rcar-gen2.c
+++ b/arch/arm/mach-shmobile/setup-rcar-gen2.c

@@ -15,7 +15,7 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/clocksource.h>
 #include <linux/device.h>
 #include <linux/dma-contiguous.h>

diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index d0ba3551..3cced84 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c

@@ -235,7 +235,7 @@
 	 */
 	if (mapping && cache_is_vipt_aliasing())
 		flush_pfn_alias(page_to_pfn(page),
-				page->index << PAGE_CACHE_SHIFT);
+				page->index << PAGE_SHIFT);
 }
 
 static void __flush_dcache_aliases(struct address_space *mapping, struct page *page)
@@ -250,7 +250,7 @@
 	 *   data in the current VM view associated with this page.
 	 * - aliasing VIPT: we only need to find one mapping of this page.
 	 */
-	pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	pgoff = page->index;
 
 	flush_dcache_mmap_lock(mapping);
 	vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {

diff --git a/arch/arm/plat-samsung/devs.c b/arch/arm/plat-samsung/devs.c
index b53d4ff..84baa16 100644
--- a/arch/arm/plat-samsung/devs.c
+++ b/arch/arm/plat-samsung/devs.c

@@ -727,15 +727,6 @@
 			return -ENOMEM;
 	}
 
-	if (set->ecc_layout) {
-		ptr = kmemdup(set->ecc_layout,
-			      sizeof(struct nand_ecclayout), GFP_KERNEL);
-		set->ecc_layout = ptr;
-
-		if (!ptr)
-			return -ENOMEM;
-	}
-
 	return 0;
 }
 

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 9fbc3e6..efa77c1 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms

@@ -45,6 +45,7 @@
 
 config ARCH_HISI
 	bool "Hisilicon SoC Family"
+	select HISILICON_IRQ_MBIGEN
 	help
 	  This enables support for Hisilicon ARMv8 SoC family
 

diff --git a/arch/arm64/boot/dts/socionext/Makefile b/arch/arm64/boot/dts/socionext/Makefile
index 8d72771..299b67e 100644
--- a/arch/arm64/boot/dts/socionext/Makefile
+++ b/arch/arm64/boot/dts/socionext/Makefile

@@ -1,4 +1,4 @@
-dtb-$(CONFIG_ARCH_UNIPHIER) += uniphier-ph1-ld10-ref.dtb
+dtb-$(CONFIG_ARCH_UNIPHIER) += uniphier-ph1-ld20-ref.dtb
 
 always		:= $(dtb-y)
 clean-files	:= *.dtb

diff --git a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld10-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20-ref.dts
similarity index 88%
rename from arch/arm64/boot/dts/socionext/uniphier-ph1-ld10-ref.dts
rename to arch/arm64/boot/dts/socionext/uniphier-ph1-ld20-ref.dts
index 3e53317..727ae5f 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld10-ref.dts
+++ b/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20-ref.dts

@@ -1,5 +1,5 @@
 /*
- * Device Tree Source for UniPhier PH1-LD10 Reference Board
+ * Device Tree Source for UniPhier PH1-LD20 Reference Board
  *
  * Copyright (C) 2015 Masahiro Yamada <yamada.masahiro@socionext.com>
  *
@@ -43,12 +43,12 @@
  */
 
 /dts-v1/;
-/include/ "uniphier-ph1-ld10.dtsi"
+/include/ "uniphier-ph1-ld20.dtsi"
 /include/ "uniphier-support-card.dtsi"
 
 / {
-	model = "UniPhier PH1-LD10 Reference Board";
-	compatible = "socionext,ph1-ld10-ref", "socionext,ph1-ld10";
+	model = "UniPhier PH1-LD20 Reference Board";
+	compatible = "socionext,ph1-ld20-ref", "socionext,ph1-ld20";
 
 	memory {
 		device_type = "memory";
@@ -74,14 +74,6 @@
 	};
 };
 
-&extbus {
-	ranges = <1 0x00000000 0x42000000 0x02000000>;
-};
-
-&support_card {
-	ranges = <0x00000000 1 0x01f00000 0x00100000>;
-};
-
 &ethsc {
 	interrupts = <0 48 4>;
 };

diff --git a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld10.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi
similarity index 94%
rename from arch/arm64/boot/dts/socionext/uniphier-ph1-ld10.dtsi
rename to arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi
index 0296af9..e682a3f 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld10.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi

@@ -1,5 +1,5 @@
 /*
- * Device Tree Source for UniPhier PH1-LD10 SoC
+ * Device Tree Source for UniPhier PH1-LD20 SoC
  *
  * Copyright (C) 2015 Masahiro Yamada <yamada.masahiro@socionext.com>
  *
@@ -43,7 +43,7 @@
  */
 
 / {
-	compatible = "socionext,ph1-ld10";
+	compatible = "socionext,ph1-ld20";
 	#address-cells = <2>;
 	#size-cells = <2>;
 	interrupt-parent = <&gic>;
@@ -133,12 +133,6 @@
 		#size-cells = <1>;
 		ranges = <0 0 0 0xffffffff>;
 
-		extbus: extbus {
-			compatible = "simple-bus";
-			#address-cells = <2>;
-			#size-cells = <1>;
-		};
-
 		serial0: serial@54006800 {
 			compatible = "socionext,uniphier-uart";
 			status = "disabled";
@@ -261,8 +255,21 @@
 			clock-frequency = <400000>;
 		};
 
+		system_bus: system-bus@58c00000 {
+			compatible = "socionext,uniphier-system-bus";
+			status = "disabled";
+			reg = <0x58c00000 0x400>;
+			#address-cells = <2>;
+			#size-cells = <1>;
+		};
+
+		smpctrl@59800000 {
+			compatible = "socionext,uniphier-smpctrl";
+			reg = <0x59801000 0x400>;
+		};
+
 		pinctrl: pinctrl@5f801000 {
-			compatible = "socionext,ph1-ld10-pinctrl", "syscon";
+			compatible = "socionext,ph1-ld20-pinctrl", "syscon";
 			reg = <0x5f801000 0xe00>;
 		};
 

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index f705051..a44ef99 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig

@@ -68,11 +68,13 @@
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
 CONFIG_XEN=y
-CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
 CONFIG_CPU_IDLE=y
 CONFIG_ARM_CPUIDLE=y
+CONFIG_CPU_FREQ=y
+CONFIG_ARM_BIG_LITTLE_CPUFREQ=y
+CONFIG_ARM_SCPI_CPUFREQ=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -80,7 +82,6 @@
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
 CONFIG_BPF_JIT=y
 # CONFIG_WIRELESS is not set
@@ -144,16 +145,18 @@
 CONFIG_SERIAL_MVEBU_UART=y
 CONFIG_VIRTIO_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
 CONFIG_I2C_MV64XXX=y
 CONFIG_I2C_QUP=y
+CONFIG_I2C_TEGRA=y
 CONFIG_I2C_UNIPHIER_F=y
 CONFIG_I2C_RCAR=y
 CONFIG_SPI=y
 CONFIG_SPI_PL022=y
 CONFIG_SPI_QUP=y
 CONFIG_SPMI=y
+CONFIG_PINCTRL_SINGLE=y
 CONFIG_PINCTRL_MSM8916=y
 CONFIG_PINCTRL_QCOM_SPMI_PMIC=y
 CONFIG_GPIO_SYSFS=y
@@ -196,6 +199,7 @@
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_HCD_PLATFORM=y
 CONFIG_USB_STORAGE=y
+CONFIG_USB_DWC2=y
 CONFIG_USB_CHIPIDEA=y
 CONFIG_USB_CHIPIDEA_UDC=y
 CONFIG_USB_CHIPIDEA_HOST=y
@@ -205,19 +209,20 @@
 CONFIG_USB_ULPI=y
 CONFIG_USB_GADGET=y
 CONFIG_MMC=y
-CONFIG_MMC_BLOCK_MINORS=16
+CONFIG_MMC_BLOCK_MINORS=32
 CONFIG_MMC_ARMMMCI=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_SDHCI_TEGRA=y
 CONFIG_MMC_SDHCI_MSM=y
 CONFIG_MMC_SPI=y
-CONFIG_MMC_SUNXI=y
 CONFIG_MMC_DW=y
 CONFIG_MMC_DW_EXYNOS=y
-CONFIG_MMC_BLOCK_MINORS=16
+CONFIG_MMC_DW_K3=y
+CONFIG_MMC_SUNXI=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
 CONFIG_LEDS_SYSCON=y
 CONFIG_LEDS_TRIGGERS=y
 CONFIG_LEDS_TRIGGER_HEARTBEAT=y
@@ -229,8 +234,8 @@
 CONFIG_RTC_DRV_SUN6I=y
 CONFIG_RTC_DRV_XGENE=y
 CONFIG_DMADEVICES=y
-CONFIG_QCOM_BAM_DMA=y
 CONFIG_TEGRA20_APB_DMA=y
+CONFIG_QCOM_BAM_DMA=y
 CONFIG_RCAR_DMAC=y
 CONFIG_VFIO=y
 CONFIG_VFIO_PCI=y
@@ -239,20 +244,26 @@
 CONFIG_VIRTIO_MMIO=y
 CONFIG_XEN_GNTDEV=y
 CONFIG_XEN_GRANT_DEV_ALLOC=y
+CONFIG_COMMON_CLK_SCPI=y
 CONFIG_COMMON_CLK_CS2000_CP=y
 CONFIG_COMMON_CLK_QCOM=y
 CONFIG_MSM_GCC_8916=y
 CONFIG_HWSPINLOCK_QCOM=y
+CONFIG_MAILBOX=y
+CONFIG_ARM_MHU=y
+CONFIG_HI6220_MBOX=y
 CONFIG_ARM_SMMU=y
 CONFIG_QCOM_SMEM=y
 CONFIG_QCOM_SMD=y
 CONFIG_QCOM_SMD_RPM=y
 CONFIG_ARCH_TEGRA_132_SOC=y
 CONFIG_ARCH_TEGRA_210_SOC=y
-CONFIG_HISILICON_IRQ_MBIGEN=y
 CONFIG_EXTCON_USB_GPIO=y
+CONFIG_COMMON_RESET_HI6220=y
 CONFIG_PHY_RCAR_GEN3_USB2=y
+CONFIG_PHY_HI6220_USB=y
 CONFIG_PHY_XGENE=y
+CONFIG_ARM_SCPI_PROTOCOL=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 CONFIG_FANOTIFY=y
@@ -264,6 +275,7 @@
 CONFIG_VFAT_FS=y
 CONFIG_TMPFS=y
 CONFIG_HUGETLBFS=y
+CONFIG_CONFIGFS_FS=y
 CONFIG_EFIVAR_FS=y
 CONFIG_SQUASHFS=y
 CONFIG_NFS_FS=y

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 22dda61..c64268d 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h

@@ -116,13 +116,6 @@
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
 
-static inline void __local_flush_icache_all(void)
-{
-	asm("ic iallu");
-	dsb(nsh);
-	isb();
-}
-
 static inline void __flush_icache_all(void)
 {
 	asm("ic	ialluis");

diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 6cb7e1a..0c2eec4 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h

@@ -18,7 +18,7 @@
 #ifndef __ASM_EXCEPTION_H
 #define __ASM_EXCEPTION_H
 
-#include <linux/ftrace.h>
+#include <linux/interrupt.h>
 
 #define __exception	__attribute__((section(".exception.text")))
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 0e391db..4150fd8 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h

@@ -124,7 +124,9 @@
 #define VTCR_EL2_SL0_LVL1	(1 << 6)
 #define VTCR_EL2_T0SZ_MASK	0x3f
 #define VTCR_EL2_T0SZ_40B	24
-#define VTCR_EL2_VS		19
+#define VTCR_EL2_VS_SHIFT	19
+#define VTCR_EL2_VS_8BIT	(0 << VTCR_EL2_VS_SHIFT)
+#define VTCR_EL2_VS_16BIT	(1 << VTCR_EL2_VS_SHIFT)
 
 /*
  * We configure the Stage-2 page tables to always restrict the IPA space to be

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 226f49d..eb7490d 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h

@@ -26,7 +26,13 @@
 #define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
 #define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
 
-#define kvm_ksym_ref(sym)		phys_to_virt((u64)&sym - kimage_voffset)
+#define kvm_ksym_ref(sym)						\
+	({								\
+		void *val = &sym;					\
+		if (!is_kernel_in_hyp_mode())				\
+			val = phys_to_virt((u64)&sym - kimage_voffset);	\
+		val;							\
+	 })
 
 #ifndef __ASSEMBLY__
 struct kvm;

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 227ed47..b7e82a7 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h

@@ -27,7 +27,6 @@
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
-#include <asm/kvm_perf_event.h>
 
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
 

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index a46b019..44eaff7 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h

@@ -21,7 +21,6 @@
 #include <linux/compiler.h>
 #include <linux/kvm_host.h>
 #include <asm/kvm_mmu.h>
-#include <asm/kvm_perf_event.h>
 #include <asm/sysreg.h>
 
 #define __hyp_text __section(.hyp.text) notrace

diff --git a/arch/arm64/include/asm/kvm_perf_event.h b/arch/arm64/include/asm/kvm_perf_event.h
deleted file mode 100644
index c18fdeb..0000000
--- a/arch/arm64/include/asm/kvm_perf_event.h
+++ /dev/null

@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __ASM_KVM_PERF_EVENT_H
-#define __ASM_KVM_PERF_EVENT_H
-
-#define	ARMV8_PMU_MAX_COUNTERS	32
-#define	ARMV8_PMU_COUNTER_MASK	(ARMV8_PMU_MAX_COUNTERS - 1)
-
-/*
- * Per-CPU PMCR: config reg
- */
-#define ARMV8_PMU_PMCR_E	(1 << 0) /* Enable all counters */
-#define ARMV8_PMU_PMCR_P	(1 << 1) /* Reset all counters */
-#define ARMV8_PMU_PMCR_C	(1 << 2) /* Cycle counter reset */
-#define ARMV8_PMU_PMCR_D	(1 << 3) /* CCNT counts every 64th cpu cycle */
-#define ARMV8_PMU_PMCR_X	(1 << 4) /* Export to ETM */
-#define ARMV8_PMU_PMCR_DP	(1 << 5) /* Disable CCNT if non-invasive debug*/
-/* Determines which bit of PMCCNTR_EL0 generates an overflow */
-#define ARMV8_PMU_PMCR_LC	(1 << 6)
-#define	ARMV8_PMU_PMCR_N_SHIFT	11	 /* Number of counters supported */
-#define	ARMV8_PMU_PMCR_N_MASK	0x1f
-#define	ARMV8_PMU_PMCR_MASK	0x7f	 /* Mask for writable bits */
-
-/*
- * PMOVSR: counters overflow flag status reg
- */
-#define	ARMV8_PMU_OVSR_MASK		0xffffffff	/* Mask for writable bits */
-#define	ARMV8_PMU_OVERFLOWED_MASK	ARMV8_PMU_OVSR_MASK
-
-/*
- * PMXEVTYPER: Event selection reg
- */
-#define	ARMV8_PMU_EVTYPE_MASK	0xc80003ff	/* Mask for writable bits */
-#define	ARMV8_PMU_EVTYPE_EVENT	0x3ff		/* Mask for EVENT bits */
-
-#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR	0	/* Software increment event */
-
-/*
- * Event filters for PMUv3
- */
-#define	ARMV8_PMU_EXCLUDE_EL1	(1 << 31)
-#define	ARMV8_PMU_EXCLUDE_EL0	(1 << 30)
-#define	ARMV8_PMU_INCLUDE_EL2	(1 << 27)
-
-/*
- * PMUSERENR: user enable reg
- */
-#define ARMV8_PMU_USERENR_MASK	0xf		/* Mask for writable bits */
-#define ARMV8_PMU_USERENR_EN	(1 << 0) /* PMU regs can be accessed at EL0 */
-#define ARMV8_PMU_USERENR_SW	(1 << 1) /* PMSWINC can be written at EL0 */
-#define ARMV8_PMU_USERENR_CR	(1 << 2) /* Cycle counter can be read at EL0 */
-#define ARMV8_PMU_USERENR_ER	(1 << 3) /* Event counter can be read at EL0 */
-
-#endif

diff --git a/arch/arm64/include/asm/opcodes.h b/arch/arm64/include/asm/opcodes.h
index 4e603ea..123f45d 100644
--- a/arch/arm64/include/asm/opcodes.h
+++ b/arch/arm64/include/asm/opcodes.h

@@ -1 +1,5 @@
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define CONFIG_CPU_ENDIAN_BE8 CONFIG_CPU_BIG_ENDIAN
+#endif
+
 #include <../../arm/include/asm/opcodes.h>

diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 7bd3cdb..2065f46 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h

@@ -17,6 +17,53 @@
 #ifndef __ASM_PERF_EVENT_H
 #define __ASM_PERF_EVENT_H
 
+#define	ARMV8_PMU_MAX_COUNTERS	32
+#define	ARMV8_PMU_COUNTER_MASK	(ARMV8_PMU_MAX_COUNTERS - 1)
+
+/*
+ * Per-CPU PMCR: config reg
+ */
+#define ARMV8_PMU_PMCR_E	(1 << 0) /* Enable all counters */
+#define ARMV8_PMU_PMCR_P	(1 << 1) /* Reset all counters */
+#define ARMV8_PMU_PMCR_C	(1 << 2) /* Cycle counter reset */
+#define ARMV8_PMU_PMCR_D	(1 << 3) /* CCNT counts every 64th cpu cycle */
+#define ARMV8_PMU_PMCR_X	(1 << 4) /* Export to ETM */
+#define ARMV8_PMU_PMCR_DP	(1 << 5) /* Disable CCNT if non-invasive debug*/
+#define ARMV8_PMU_PMCR_LC	(1 << 6) /* Overflow on 64 bit cycle counter */
+#define	ARMV8_PMU_PMCR_N_SHIFT	11	 /* Number of counters supported */
+#define	ARMV8_PMU_PMCR_N_MASK	0x1f
+#define	ARMV8_PMU_PMCR_MASK	0x7f	 /* Mask for writable bits */
+
+/*
+ * PMOVSR: counters overflow flag status reg
+ */
+#define	ARMV8_PMU_OVSR_MASK		0xffffffff	/* Mask for writable bits */
+#define	ARMV8_PMU_OVERFLOWED_MASK	ARMV8_PMU_OVSR_MASK
+
+/*
+ * PMXEVTYPER: Event selection reg
+ */
+#define	ARMV8_PMU_EVTYPE_MASK	0xc800ffff	/* Mask for writable bits */
+#define	ARMV8_PMU_EVTYPE_EVENT	0xffff		/* Mask for EVENT bits */
+
+#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR	0	/* Software increment event */
+
+/*
+ * Event filters for PMUv3
+ */
+#define	ARMV8_PMU_EXCLUDE_EL1	(1 << 31)
+#define	ARMV8_PMU_EXCLUDE_EL0	(1 << 30)
+#define	ARMV8_PMU_INCLUDE_EL2	(1 << 27)
+
+/*
+ * PMUSERENR: user enable reg
+ */
+#define ARMV8_PMU_USERENR_MASK	0xf		/* Mask for writable bits */
+#define ARMV8_PMU_USERENR_EN	(1 << 0) /* PMU regs can be accessed at EL0 */
+#define ARMV8_PMU_USERENR_SW	(1 << 1) /* PMSWINC can be written at EL0 */
+#define ARMV8_PMU_USERENR_CR	(1 << 2) /* Cycle counter can be read at EL0 */
+#define ARMV8_PMU_USERENR_ER	(1 << 3) /* Event counter can be read at EL0 */
+
 #ifdef CONFIG_PERF_EVENTS
 struct pt_regs;
 extern unsigned long perf_instruction_pointer(struct pt_regs *regs);

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 1a78d6e..1287416 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h

@@ -141,6 +141,9 @@
 #define ID_AA64MMFR1_VMIDBITS_SHIFT	4
 #define ID_AA64MMFR1_HADBS_SHIFT	0
 
+#define ID_AA64MMFR1_VMIDBITS_8		0
+#define ID_AA64MMFR1_VMIDBITS_16	2
+
 /* id_aa64mmfr2 */
 #define ID_AA64MMFR2_UAO_SHIFT		4
 

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 1f7f5a2..12e8d2b 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S

@@ -277,7 +277,7 @@
  * Invalid mode handlers
  */
 	.macro	inv_entry, el, reason, regsize = 64
-	kernel_entry el, \regsize
+	kernel_entry \el, \regsize
 	mov	x0, sp
 	mov	x1, #\reason
 	mrs	x2, esr_el1

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 6ebd204..4203d5f 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S

@@ -758,7 +758,7 @@
  */
 	.section	".idmap.text", "ax"
 __enable_mmu:
-	mrs	x18, sctlr_el1			// preserve old SCTLR_EL1 value
+	mrs	x22, sctlr_el1			// preserve old SCTLR_EL1 value
 	mrs	x1, ID_AA64MMFR0_EL1
 	ubfx	x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
 	cmp	x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
@@ -786,14 +786,15 @@
 	 * to take into account by discarding the current kernel mapping and
 	 * creating a new one.
 	 */
-	msr	sctlr_el1, x18			// disable the MMU
+	msr	sctlr_el1, x22			// disable the MMU
 	isb
 	bl	__create_page_tables		// recreate kernel mapping
 
 	msr	sctlr_el1, x19			// re-enable the MMU
 	isb
-	ic	ialluis				// flush instructions fetched
-	isb					// via old mapping
+	ic	iallu				// flush instructions fetched
+	dsb	nsh				// via old mapping
+	isb
 	add	x27, x27, x23			// relocated __mmap_switched
 #endif
 	br	x27

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 767c4f6..f419a7c 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c

@@ -20,6 +20,7 @@
  */
 
 #include <asm/irq_regs.h>
+#include <asm/perf_event.h>
 #include <asm/virt.h>
 
 #include <linux/of.h>
@@ -384,9 +385,6 @@
 #define	ARMV8_IDX_COUNTER_LAST(cpu_pmu) \
 	(ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
 
-#define	ARMV8_MAX_COUNTERS	32
-#define	ARMV8_COUNTER_MASK	(ARMV8_MAX_COUNTERS - 1)
-
 /*
  * ARMv8 low level PMU access
  */
@@ -395,40 +393,7 @@
  * Perf Event to low level counters mapping
  */
 #define	ARMV8_IDX_TO_COUNTER(x)	\
-	(((x) - ARMV8_IDX_COUNTER0) & ARMV8_COUNTER_MASK)
-
-/*
- * Per-CPU PMCR: config reg
- */
-#define ARMV8_PMCR_E		(1 << 0) /* Enable all counters */
-#define ARMV8_PMCR_P		(1 << 1) /* Reset all counters */
-#define ARMV8_PMCR_C		(1 << 2) /* Cycle counter reset */
-#define ARMV8_PMCR_D		(1 << 3) /* CCNT counts every 64th cpu cycle */
-#define ARMV8_PMCR_X		(1 << 4) /* Export to ETM */
-#define ARMV8_PMCR_DP		(1 << 5) /* Disable CCNT if non-invasive debug*/
-#define ARMV8_PMCR_LC		(1 << 6) /* Overflow on 64 bit cycle counter */
-#define	ARMV8_PMCR_N_SHIFT	11	 /* Number of counters supported */
-#define	ARMV8_PMCR_N_MASK	0x1f
-#define	ARMV8_PMCR_MASK		0x7f	 /* Mask for writable bits */
-
-/*
- * PMOVSR: counters overflow flag status reg
- */
-#define	ARMV8_OVSR_MASK		0xffffffff	/* Mask for writable bits */
-#define	ARMV8_OVERFLOWED_MASK	ARMV8_OVSR_MASK
-
-/*
- * PMXEVTYPER: Event selection reg
- */
-#define	ARMV8_EVTYPE_MASK	0xc800ffff	/* Mask for writable bits */
-#define	ARMV8_EVTYPE_EVENT	0xffff		/* Mask for EVENT bits */
-
-/*
- * Event filters for PMUv3
- */
-#define	ARMV8_EXCLUDE_EL1	(1 << 31)
-#define	ARMV8_EXCLUDE_EL0	(1 << 30)
-#define	ARMV8_INCLUDE_EL2	(1 << 27)
+	(((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK)
 
 static inline u32 armv8pmu_pmcr_read(void)
 {
@@ -439,14 +404,14 @@
 
 static inline void armv8pmu_pmcr_write(u32 val)
 {
-	val &= ARMV8_PMCR_MASK;
+	val &= ARMV8_PMU_PMCR_MASK;
 	isb();
 	asm volatile("msr pmcr_el0, %0" :: "r" (val));
 }
 
 static inline int armv8pmu_has_overflowed(u32 pmovsr)
 {
-	return pmovsr & ARMV8_OVERFLOWED_MASK;
+	return pmovsr & ARMV8_PMU_OVERFLOWED_MASK;
 }
 
 static inline int armv8pmu_counter_valid(struct arm_pmu *cpu_pmu, int idx)
@@ -512,7 +477,7 @@
 static inline void armv8pmu_write_evtype(int idx, u32 val)
 {
 	if (armv8pmu_select_counter(idx) == idx) {
-		val &= ARMV8_EVTYPE_MASK;
+		val &= ARMV8_PMU_EVTYPE_MASK;
 		asm volatile("msr pmxevtyper_el0, %0" :: "r" (val));
 	}
 }
@@ -558,7 +523,7 @@
 	asm volatile("mrs %0, pmovsclr_el0" : "=r" (value));
 
 	/* Write to clear flags */
-	value &= ARMV8_OVSR_MASK;
+	value &= ARMV8_PMU_OVSR_MASK;
 	asm volatile("msr pmovsclr_el0, %0" :: "r" (value));
 
 	return value;
@@ -696,7 +661,7 @@
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	/* Enable all counters */
-	armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMCR_E);
+	armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
@@ -707,7 +672,7 @@
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	/* Disable all counters */
-	armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMCR_E);
+	armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E);
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
@@ -717,7 +682,7 @@
 	int idx;
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
-	unsigned long evtype = hwc->config_base & ARMV8_EVTYPE_EVENT;
+	unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
 
 	/* Always place a cycle counter into the cycle counter. */
 	if (evtype == ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES) {
@@ -754,11 +719,11 @@
 	    attr->exclude_kernel != attr->exclude_hv)
 		return -EINVAL;
 	if (attr->exclude_user)
-		config_base |= ARMV8_EXCLUDE_EL0;
+		config_base |= ARMV8_PMU_EXCLUDE_EL0;
 	if (!is_kernel_in_hyp_mode() && attr->exclude_kernel)
-		config_base |= ARMV8_EXCLUDE_EL1;
+		config_base |= ARMV8_PMU_EXCLUDE_EL1;
 	if (!attr->exclude_hv)
-		config_base |= ARMV8_INCLUDE_EL2;
+		config_base |= ARMV8_PMU_INCLUDE_EL2;
 
 	/*
 	 * Install the filter into config_base as this is used to
@@ -784,35 +749,36 @@
 	 * Initialize & Reset PMNC. Request overflow interrupt for
 	 * 64 bit cycle counter but cheat in armv8pmu_write_counter().
 	 */
-	armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C | ARMV8_PMCR_LC);
+	armv8pmu_pmcr_write(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C |
+			    ARMV8_PMU_PMCR_LC);
 }
 
 static int armv8_pmuv3_map_event(struct perf_event *event)
 {
 	return armpmu_map_event(event, &armv8_pmuv3_perf_map,
 				&armv8_pmuv3_perf_cache_map,
-				ARMV8_EVTYPE_EVENT);
+				ARMV8_PMU_EVTYPE_EVENT);
 }
 
 static int armv8_a53_map_event(struct perf_event *event)
 {
 	return armpmu_map_event(event, &armv8_a53_perf_map,
 				&armv8_a53_perf_cache_map,
-				ARMV8_EVTYPE_EVENT);
+				ARMV8_PMU_EVTYPE_EVENT);
 }
 
 static int armv8_a57_map_event(struct perf_event *event)
 {
 	return armpmu_map_event(event, &armv8_a57_perf_map,
 				&armv8_a57_perf_cache_map,
-				ARMV8_EVTYPE_EVENT);
+				ARMV8_PMU_EVTYPE_EVENT);
 }
 
 static int armv8_thunder_map_event(struct perf_event *event)
 {
 	return armpmu_map_event(event, &armv8_thunder_perf_map,
 				&armv8_thunder_perf_cache_map,
-				ARMV8_EVTYPE_EVENT);
+				ARMV8_PMU_EVTYPE_EVENT);
 }
 
 static void armv8pmu_read_num_pmnc_events(void *info)
@@ -820,7 +786,7 @@
 	int *nb_cnt = info;
 
 	/* Read the nb of CNTx counters supported from PMNC */
-	*nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK;
+	*nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
 
 	/* Add the CPU cycles counter */
 	*nb_cnt += 1;

diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 37f624df..5a1939a 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S

@@ -103,6 +103,7 @@
 			*(.exception.text)
 			__exception_text_end = .;
 			IRQENTRY_TEXT
+			SOFTIRQENTRY_TEXT
 			TEXT_TEXT
 			SCHED_TEXT
 			LOCK_TEXT

diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index b6a8fc5..778d0ef 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile

@@ -16,3 +16,7 @@
 obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
 obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
 obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
+
+GCOV_PROFILE	:= n
+KASAN_SANITIZE	:= n
+UBSAN_SANITIZE	:= n

diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
index bfc54fd..5a9f3bf 100644
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ b/arch/arm64/kvm/hyp/s2-setup.c

@@ -36,8 +36,10 @@
 	 * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS
 	 * bit in VTCR_EL2.
 	 */
-	tmp = (read_sysreg(id_aa64mmfr1_el1) >> 4) & 0xf;
-	val |= (tmp == 2) ? VTCR_EL2_VS : 0;
+	tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_VMIDBITS_SHIFT) & 0xf;
+	val |= (tmp == ID_AA64MMFR1_VMIDBITS_16) ?
+			VTCR_EL2_VS_16BIT :
+			VTCR_EL2_VS_8BIT;
 
 	write_sysreg(val, vtcr_el2);
 }

diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 60585bd..dbd12ea 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c

@@ -58,17 +58,13 @@
  * Copy user data from/to a page which is mapped into a different processes
  * address space.  Really, we want to allow our "user space" model to handle
  * this.
- *
- * Note that this code needs to run on the current CPU.
  */
 void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 		       unsigned long uaddr, void *dst, const void *src,
 		       unsigned long len)
 {
-	preempt_disable();
 	memcpy(dst, src, len);
 	flush_ptrace_access(vma, page, uaddr, dst, len);
-	preempt_enable();
 }
 
 void __sync_icache_dcache(pte_t pte, unsigned long addr)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 61a38ea..ea989d8 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c

@@ -362,42 +362,38 @@
 #define MLG(b, t) b, t, ((t) - (b)) >> 30
 #define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
 
-	pr_notice("Virtual kernel memory layout:\n"
+	pr_notice("Virtual kernel memory layout:\n");
 #ifdef CONFIG_KASAN
-		  "    kasan   : 0x%16lx - 0x%16lx   (%6ld GB)\n"
+	pr_cont("    kasan   : 0x%16lx - 0x%16lx   (%6ld GB)\n",
+		MLG(KASAN_SHADOW_START, KASAN_SHADOW_END));
 #endif
-		  "    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n"
-		  "    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n"
-		  "      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n"
-		  "    .rodata : 0x%p" " - 0x%p" "   (%6ld KB)\n"
-		  "      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n"
-		  "      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n"
+	pr_cont("    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n",
+		MLM(MODULES_VADDR, MODULES_END));
+	pr_cont("    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n",
+		MLG(VMALLOC_START, VMALLOC_END));
+	pr_cont("      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n"
+		"    .rodata : 0x%p" " - 0x%p" "   (%6ld KB)\n"
+		"      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n"
+		"      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+		MLK_ROUNDUP(_text, __start_rodata),
+		MLK_ROUNDUP(__start_rodata, _etext),
+		MLK_ROUNDUP(__init_begin, __init_end),
+		MLK_ROUNDUP(_sdata, _edata));
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-		  "    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n"
-		  "              0x%16lx - 0x%16lx   (%6ld MB actual)\n"
+	pr_cont("    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n"
+		"              0x%16lx - 0x%16lx   (%6ld MB actual)\n",
+		MLG(VMEMMAP_START,
+		    VMEMMAP_START + VMEMMAP_SIZE),
+		MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
+		    (unsigned long)virt_to_page(high_memory)));
 #endif
-		  "    fixed   : 0x%16lx - 0x%16lx   (%6ld KB)\n"
-		  "    PCI I/O : 0x%16lx - 0x%16lx   (%6ld MB)\n"
-		  "    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n",
-#ifdef CONFIG_KASAN
-		  MLG(KASAN_SHADOW_START, KASAN_SHADOW_END),
-#endif
-		  MLM(MODULES_VADDR, MODULES_END),
-		  MLG(VMALLOC_START, VMALLOC_END),
-		  MLK_ROUNDUP(_text, __start_rodata),
-		  MLK_ROUNDUP(__start_rodata, _etext),
-		  MLK_ROUNDUP(__init_begin, __init_end),
-		  MLK_ROUNDUP(_sdata, _edata),
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-		  MLG(VMEMMAP_START,
-		      VMEMMAP_START + VMEMMAP_SIZE),
-		  MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
-		      (unsigned long)virt_to_page(high_memory)),
-#endif
-		  MLK(FIXADDR_START, FIXADDR_TOP),
-		  MLM(PCI_IO_START, PCI_IO_END),
-		  MLM(__phys_to_virt(memblock_start_of_DRAM()),
-		      (unsigned long)high_memory));
+	pr_cont("    fixed   : 0x%16lx - 0x%16lx   (%6ld KB)\n",
+		MLK(FIXADDR_START, FIXADDR_TOP));
+	pr_cont("    PCI I/O : 0x%16lx - 0x%16lx   (%6ld MB)\n",
+		MLM(PCI_IO_START, PCI_IO_END));
+	pr_cont("    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n",
+		MLM(__phys_to_virt(memblock_start_of_DRAM()),
+		    (unsigned long)high_memory));
 
 #undef MLK
 #undef MLM

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index d2d8b8c..f3e5c74 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c

@@ -211,8 +211,7 @@
 		if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
 		      block_mappings_allowed(pgtable_alloc)) {
 			pmd_t old_pmd =*pmd;
-			set_pmd(pmd, __pmd(phys |
-					   pgprot_val(mk_sect_prot(prot))));
+			pmd_set_huge(pmd, phys, prot);
 			/*
 			 * Check for previous table entries created during
 			 * boot (__create_page_tables) and flush them.
@@ -272,8 +271,7 @@
 		if (use_1G_block(addr, next, phys) &&
 		    block_mappings_allowed(pgtable_alloc)) {
 			pud_t old_pud = *pud;
-			set_pud(pud, __pud(phys |
-					   pgprot_val(mk_sect_prot(prot))));
+			pud_set_huge(pud, phys, prot);
 
 			/*
 			 * If we have an old value for a pud, it will

diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S
index c9eec84..d920b95 100644
--- a/arch/blackfin/kernel/vmlinux.lds.S
+++ b/arch/blackfin/kernel/vmlinux.lds.S

@@ -35,6 +35,7 @@
 #endif
 		LOCK_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		KPROBES_TEXT
 #ifdef CONFIG_ROMKERNEL
 		__sinittext = .;

diff --git a/arch/c6x/kernel/vmlinux.lds.S b/arch/c6x/kernel/vmlinux.lds.S
index 5a6e141..50bc10f 100644
--- a/arch/c6x/kernel/vmlinux.lds.S
+++ b/arch/c6x/kernel/vmlinux.lds.S

@@ -72,6 +72,7 @@
 		SCHED_TEXT
 		LOCK_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		KPROBES_TEXT
 		*(.fixup)
 		*(.gnu.warning)

diff --git a/arch/frv/include/asm/page.h b/arch/frv/include/asm/page.h
index 688d807..ec5eebc 100644
--- a/arch/frv/include/asm/page.h
+++ b/arch/frv/include/asm/page.h

@@ -8,9 +8,6 @@
 
 #ifndef __ASSEMBLY__
 
-#define get_user_page(vaddr)			__get_free_page(GFP_KERNEL)
-#define free_user_page(page, addr)		free_page(addr)
-
 #define clear_page(pgaddr)			memset((pgaddr), 0, PAGE_SIZE)
 #define copy_page(to,from)			memcpy((to), (from), PAGE_SIZE)
 

diff --git a/arch/h8300/boot/dts/edosk2674.dts b/arch/h8300/boot/dts/edosk2674.dts
index 4ce9fa8..6ae884b 100644
--- a/arch/h8300/boot/dts/edosk2674.dts
+++ b/arch/h8300/boot/dts/edosk2674.dts

@@ -88,20 +88,20 @@
 		reg = <0xffff78 8>;
 		interrupts = <88 0>, <89 0>, <90 0>, <91 0>;
 		clocks = <&fclk>;
-		clock-names = "sci_ick";
+		clock-names = "fck";
 	};
 	sci1: serial@ffff80 {
 		compatible = "renesas,sci";
 		reg = <0xffff80 8>;
 		interrupts = <92 0>, <93 0>, <94 0>, <95 0>;
 		clocks = <&fclk>;
-		clock-names = "sci_ick";
+		clock-names = "fck";
 	};
 	sci2: serial@ffff88 {
 		compatible = "renesas,sci";
 		reg = <0xffff88 8>;
 		interrupts = <96 0>, <97 0>, <98 0>, <99 0>;
 		clocks = <&fclk>;
-		clock-names = "sci_ick";
+		clock-names = "fck";
 	};
 };

diff --git a/arch/h8300/boot/dts/h8300h_sim.dts b/arch/h8300/boot/dts/h8300h_sim.dts
index 545bfb5..9c733d9 100644
--- a/arch/h8300/boot/dts/h8300h_sim.dts
+++ b/arch/h8300/boot/dts/h8300h_sim.dts

@@ -83,7 +83,7 @@
 		reg = <0xffffb0 8>;
 		interrupts = <52 0>, <53 0>, <54 0>, <55 0>;
 		clocks = <&fclk>;
-		clock-names = "sci_ick";
+		clock-names = "fck";
 	};
 
 	sci1: serial@ffffb8 {
@@ -91,6 +91,6 @@
 		reg = <0xffffb8 8>;
 		interrupts = <56 0>, <57 0>, <58 0>, <59 0>;
 		clocks = <&fclk>;
-		clock-names = "sci_ick";
+		clock-names = "fck";
 	};
 };

diff --git a/arch/h8300/boot/dts/h8s_sim.dts b/arch/h8300/boot/dts/h8s_sim.dts
index bcedba5..97e1f4b 100644
--- a/arch/h8300/boot/dts/h8s_sim.dts
+++ b/arch/h8300/boot/dts/h8s_sim.dts

@@ -87,13 +87,13 @@
 		reg = <0xffff78 8>;
 		interrupts = <88 0>, <89 0>, <90 0>, <91 0>;
 		clocks = <&fclk>;
-		clock-names = "sci_ick";
+		clock-names = "fck";
 	};
 	sci1: serial@ffff80 {
 		compatible = "renesas,sci";
 		reg = <0xffff80 8>;
 		interrupts = <92 0>, <93 0>, <94 0>, <95 0>;
 		clocks = <&fclk>;
-		clock-names = "sci_ick";
+		clock-names = "fck";
 	};
 };

diff --git a/arch/h8300/configs/h8300h-sim_defconfig b/arch/h8300/configs/h8300h-sim_defconfig
index 067bfe9..80624f4 100644
--- a/arch/h8300/configs/h8300h-sim_defconfig
+++ b/arch/h8300/configs/h8300h-sim_defconfig

@@ -34,7 +34,7 @@
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_SH_SCI=y
-CONFIG_SERIAL_SH_SCI_CONSOLE=y
+CONFIG_SERIAL_SH_SCI_EARLYCON=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 # CONFIG_USB_SUPPORT is not set

diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
index e4985df..c8c25a4 100644
--- a/arch/h8300/kernel/setup.c
+++ b/arch/h8300/kernel/setup.c

@@ -20,8 +20,6 @@
 #include <linux/bootmem.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <linux/of_platform.h>
@@ -137,11 +135,6 @@
 	parse_early_param();
 
 	bootmem_init();
-#if defined(CONFIG_H8300H_SIM) || defined(CONFIG_H8S_SIM)
-	sim_console_register();
-#endif
-
-	early_platform_driver_probe("earlyprintk", 1, 0);
 	/*
 	 * get kmalloc into gear
 	 */

diff --git a/arch/h8300/kernel/sim-console.c b/arch/h8300/kernel/sim-console.c
index a15edf0..46138f5 100644
--- a/arch/h8300/kernel/sim-console.c
+++ b/arch/h8300/kernel/sim-console.c

@@ -1,79 +1,30 @@
 /*
- * arch/h8300/kernel/early_printk.c
+ * arch/h8300/kernel/sim-console.c
  *
- *  Copyright (C) 2009 Yoshinori Sato <ysato@users.sourceforge.jp>
+ *  Copyright (C) 2015 Yoshinori Sato <ysato@users.sourceforge.jp>
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
 #include <linux/console.h>
-#include <linux/tty.h>
 #include <linux/init.h>
-#include <linux/io.h>
-#include <linux/platform_device.h>
+#include <linux/serial_core.h>
 
-static void sim_write(struct console *co, const char *ptr,
-				 unsigned len)
+static void sim_write(struct console *con, const char *s, unsigned n)
 {
 	register const int fd __asm__("er0") = 1; /* stdout */
-	register const char *_ptr __asm__("er1") = ptr;
-	register const unsigned _len __asm__("er2") = len;
+	register const char *_ptr __asm__("er1") = s;
+	register const unsigned _len __asm__("er2") = n;
 
 	__asm__(".byte 0x5e,0x00,0x00,0xc7\n\t" /* jsr @0xc7 (sys_write) */
 		: : "g"(fd), "g"(_ptr), "g"(_len));
 }
 
-static struct console sim_console = {
-	.name		= "sim_console",
-	.write		= sim_write,
-	.setup		= NULL,
-	.flags		= CON_PRINTBUFFER,
-	.index		= -1,
-};
-
-static char sim_console_buf[32];
-
-static int sim_probe(struct platform_device *pdev)
+static int __init sim_setup(struct earlycon_device *device, const char *opt)
 {
-	if (sim_console.data)
-		return -EEXIST;
-
-	if (!strstr(sim_console_buf, "keep"))
-		sim_console.flags |= CON_BOOT;
-
-	register_console(&sim_console);
+	device->con->write = sim_write;
 	return 0;
 }
 
-static int sim_remove(struct platform_device *pdev)
-{
-	return 0;
-}
-
-static struct platform_driver sim_driver = {
-	.probe		= sim_probe,
-	.remove		= sim_remove,
-	.driver		= {
-		.name	= "h8300-sim",
-		.owner	= THIS_MODULE,
-	},
-};
-
-early_platform_init_buffer("earlyprintk", &sim_driver,
-			   sim_console_buf, ARRAY_SIZE(sim_console_buf));
-
-static struct platform_device sim_console_device = {
-	.name		= "h8300-sim",
-	.id		= 0,
-};
-
-static struct platform_device *devices[] __initdata = {
-	&sim_console_device,
-};
-
-void __init sim_console_register(void)
-{
-	early_platform_add_devices(devices,
-				   ARRAY_SIZE(devices));
-}
+EARLYCON_DECLARE(h8sim, sim_setup);

diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h
index 4f3fb6cc..2189d5d 100644
--- a/arch/ia64/include/asm/uaccess.h
+++ b/arch/ia64/include/asm/uaccess.h

@@ -341,13 +341,11 @@
 	__su_ret;						\
 })
 
-/* Generic code can't deal with the location-relative format that we use for compactness.  */
-#define ARCH_HAS_SORT_EXTABLE
-#define ARCH_HAS_SEARCH_EXTABLE
+#define ARCH_HAS_RELATIVE_EXTABLE
 
 struct exception_table_entry {
-	int addr;	/* location-relative address of insn this fixup is for */
-	int cont;	/* location-relative continuation addr.; if bit 2 is set, r9 is set to 0 */
+	int insn;	/* location-relative address of insn this fixup is for */
+	int fixup;	/* location-relative continuation addr.; if bit 2 is set, r9 is set to 0 */
 };
 
 extern void ia64_handle_exception (struct pt_regs *regs, const struct exception_table_entry *e);

diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index 6a86850..8c85209 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h

@@ -11,7 +11,7 @@
 
 
 
-#define NR_syscalls			324 /* length of syscall table */
+#define NR_syscalls			326 /* length of syscall table */
 
 /*
  * The following defines stop scripts/checksyscalls.sh from complaining about

diff --git a/arch/ia64/include/uapi/asm/unistd.h b/arch/ia64/include/uapi/asm/unistd.h
index 41369a1..ea5363d 100644
--- a/arch/ia64/include/uapi/asm/unistd.h
+++ b/arch/ia64/include/uapi/asm/unistd.h

@@ -337,5 +337,7 @@
 #define __NR_kcmp			1345
 #define __NR_mlock2			1346
 #define __NR_copy_file_range		1347
+#define __NR_preadv2			1348
+#define __NR_pwritev2			1349
 
 #endif /* _UAPI_ASM_IA64_UNISTD_H */

diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 477c55e..cfaa7b2 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S

@@ -1773,5 +1773,7 @@
 	data8 sys_kcmp				// 1345
 	data8 sys_mlock2
 	data8 sys_copy_file_range
+	data8 sys_preadv2
+	data8 sys_pwritev2
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls

diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c
index c99a41e..8f70bb2 100644
--- a/arch/ia64/mm/extable.c
+++ b/arch/ia64/mm/extable.c

@@ -5,107 +5,12 @@
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
-#include <linux/sort.h>
-
 #include <asm/uaccess.h>
-#include <linux/module.h>
-
-static int cmp_ex(const void *a, const void *b)
-{
-	const struct exception_table_entry *l = a, *r = b;
-	u64 lip = (u64) &l->addr + l->addr;
-	u64 rip = (u64) &r->addr + r->addr;
-
-	/* avoid overflow */
-	if (lip > rip)
-		return 1;
-	if (lip < rip)
-		return -1;
-	return 0;
-}
-
-static void swap_ex(void *a, void *b, int size)
-{
-	struct exception_table_entry *l = a, *r = b, tmp;
-	u64 delta = (u64) r - (u64) l;
-
-	tmp = *l;
-	l->addr = r->addr + delta;
-	l->cont = r->cont + delta;
-	r->addr = tmp.addr - delta;
-	r->cont = tmp.cont - delta;
-}
-
-/*
- * Sort the exception table. It's usually already sorted, but there
- * may be unordered entries due to multiple text sections (such as the
- * .init text section). Note that the exception-table-entries contain
- * location-relative addresses, which requires a bit of care during
- * sorting to avoid overflows in the offset members (e.g., it would
- * not be safe to make a temporary copy of an exception-table entry on
- * the stack, because the stack may be more than 2GB away from the
- * exception-table).
- */
-void sort_extable (struct exception_table_entry *start,
-		   struct exception_table_entry *finish)
-{
-	sort(start, finish - start, sizeof(struct exception_table_entry),
-	     cmp_ex, swap_ex);
-}
-
-static inline unsigned long ex_to_addr(const struct exception_table_entry *x)
-{
-	return (unsigned long)&x->addr + x->addr;
-}
-
-#ifdef CONFIG_MODULES
-/*
- * Any entry referring to the module init will be at the beginning or
- * the end.
- */
-void trim_init_extable(struct module *m)
-{
-	/*trim the beginning*/
-	while (m->num_exentries &&
-	       within_module_init(ex_to_addr(&m->extable[0]), m)) {
-		m->extable++;
-		m->num_exentries--;
-	}
-	/*trim the end*/
-	while (m->num_exentries &&
-	       within_module_init(ex_to_addr(&m->extable[m->num_exentries-1]),
-				  m))
-		m->num_exentries--;
-}
-#endif /* CONFIG_MODULES */
-
-const struct exception_table_entry *
-search_extable (const struct exception_table_entry *first,
-		const struct exception_table_entry *last,
-		unsigned long ip)
-{
-	const struct exception_table_entry *mid;
-	unsigned long mid_ip;
-	long diff;
-
-        while (first <= last) {
-		mid = &first[(last - first)/2];
-		mid_ip = (u64) &mid->addr + mid->addr;
-		diff = mid_ip - ip;
-                if (diff == 0)
-                        return mid;
-                else if (diff < 0)
-                        first = mid + 1;
-                else
-                        last = mid - 1;
-        }
-        return NULL;
-}
 
 void
 ia64_handle_exception (struct pt_regs *regs, const struct exception_table_entry *e)
 {
-	long fix = (u64) &e->cont + e->cont;
+	long fix = (u64) &e->fixup + e->fixup;
 
 	regs->r8 = -EFAULT;
 	if (fix & 4)

diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h
index 5029f73..e7a1946 100644
--- a/arch/m68k/include/asm/page_mm.h
+++ b/arch/m68k/include/asm/page_mm.h

@@ -6,9 +6,6 @@
 #include <linux/compiler.h>
 #include <asm/module.h>
 
-#define get_user_page(vaddr)		__get_free_page(GFP_KERNEL)
-#define free_user_page(page, addr)	free_page(addr)
-
 /*
  * We don't need to check for alignment etc.
  */

diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h
index ef20916..fa7f32d 100644
--- a/arch/m68k/include/asm/page_no.h
+++ b/arch/m68k/include/asm/page_no.h

@@ -6,9 +6,6 @@
 extern unsigned long memory_start;
 extern unsigned long memory_end;
 
-#define get_user_page(vaddr)		__get_free_page(GFP_KERNEL)
-#define free_user_page(page, addr)	free_page(addr)
-
 #define clear_page(page)	memset((page), 0, PAGE_SIZE)
 #define copy_page(to,from)	memcpy((to), (from), PAGE_SIZE)
 

diff --git a/arch/metag/kernel/vmlinux.lds.S b/arch/metag/kernel/vmlinux.lds.S
index e12055e..150ace9 100644
--- a/arch/metag/kernel/vmlinux.lds.S
+++ b/arch/metag/kernel/vmlinux.lds.S

@@ -24,6 +24,7 @@
 	LOCK_TEXT
 	KPROBES_TEXT
 	IRQENTRY_TEXT
+	SOFTIRQENTRY_TEXT
 	*(.text.*)
 	*(.gnu.warning)
 	}

diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S
index be9488d..0a47f04 100644
--- a/arch/microblaze/kernel/vmlinux.lds.S
+++ b/arch/microblaze/kernel/vmlinux.lds.S

@@ -36,6 +36,7 @@
 		LOCK_TEXT
 		KPROBES_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		. = ALIGN (4) ;
 		_etext = . ;
 	}

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 7c4a4ce..2018c2b 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig

@@ -328,7 +328,6 @@
 	select ARCH_REQUIRE_GPIOLIB
 	select SWAP_IO_SPACE
 	select BOOT_RAW
-	select HAVE_MACH_CLKDEV
 	select CLKDEV_LOOKUP
 	select USE_OF
 	select PINCTRL
@@ -590,7 +589,6 @@
 	select SYS_SUPPORTS_LITTLE_ENDIAN
 	select SYS_SUPPORTS_MIPS16
 	select SYS_HAS_EARLY_PRINTK
-	select HAVE_MACH_CLKDEV
 	select CLKDEV_LOOKUP
 	select ARCH_HAS_RESET_CONTROLLER
 	select RESET_CONTROLLER

diff --git a/arch/mips/alchemy/common/dbdma.c b/arch/mips/alchemy/common/dbdma.c
index 745695d..f2f264b 100644
--- a/arch/mips/alchemy/common/dbdma.c
+++ b/arch/mips/alchemy/common/dbdma.c

@@ -261,7 +261,7 @@
 	au1x_dma_chan_t *cp;
 
 	/*
-	 * We do the intialization on the first channel allocation.
+	 * We do the initialization on the first channel allocation.
 	 * We have to wait because of the interrupt handler initialization
 	 * which can't be done successfully during board set up.
 	 */
@@ -964,7 +964,7 @@
 	dp->dscr_source1 = dscr->dscr_source1;
 	dp->dscr_cmd1 = dscr->dscr_cmd1;
 	nbytes = dscr->dscr_cmd1;
-	/* Allow the caller to specifiy if an interrupt is generated */
+	/* Allow the caller to specify if an interrupt is generated */
 	dp->dscr_cmd0 &= ~DSCR_CMD0_IE;
 	dp->dscr_cmd0 |= dscr->dscr_cmd0 | DSCR_CMD0_V;
 	ctp->chan_ptr->ddma_dbell = 0;

diff --git a/arch/mips/alchemy/devboards/db1000.c b/arch/mips/alchemy/devboards/db1000.c
index bdeed9d..433c4b9 100644
--- a/arch/mips/alchemy/devboards/db1000.c
+++ b/arch/mips/alchemy/devboards/db1000.c

@@ -503,15 +503,15 @@
 	if (board == BCSR_WHOAMI_DB1500) {
 		c0 = AU1500_GPIO2_INT;
 		c1 = AU1500_GPIO5_INT;
-		d0 = AU1500_GPIO0_INT;
-		d1 = AU1500_GPIO3_INT;
+		d0 = 0;	/* GPIO number, NOT irq! */
+		d1 = 3; /* GPIO number, NOT irq! */
 		s0 = AU1500_GPIO1_INT;
 		s1 = AU1500_GPIO4_INT;
 	} else if (board == BCSR_WHOAMI_DB1100) {
 		c0 = AU1100_GPIO2_INT;
 		c1 = AU1100_GPIO5_INT;
-		d0 = AU1100_GPIO0_INT;
-		d1 = AU1100_GPIO3_INT;
+		d0 = 0; /* GPIO number, NOT irq! */
+		d1 = 3; /* GPIO number, NOT irq! */
 		s0 = AU1100_GPIO1_INT;
 		s1 = AU1100_GPIO4_INT;
 
@@ -545,15 +545,15 @@
 	} else if (board == BCSR_WHOAMI_DB1000) {
 		c0 = AU1000_GPIO2_INT;
 		c1 = AU1000_GPIO5_INT;
-		d0 = AU1000_GPIO0_INT;
-		d1 = AU1000_GPIO3_INT;
+		d0 = 0; /* GPIO number, NOT irq! */
+		d1 = 3; /* GPIO number, NOT irq! */
 		s0 = AU1000_GPIO1_INT;
 		s1 = AU1000_GPIO4_INT;
 		platform_add_devices(db1000_devs, ARRAY_SIZE(db1000_devs));
 	} else if ((board == BCSR_WHOAMI_PB1500) ||
 		   (board == BCSR_WHOAMI_PB1500R2)) {
 		c0 = AU1500_GPIO203_INT;
-		d0 = AU1500_GPIO201_INT;
+		d0 = 1; /* GPIO number, NOT irq! */
 		s0 = AU1500_GPIO202_INT;
 		twosocks = 0;
 		flashsize = 64;
@@ -566,7 +566,7 @@
 		 */
 	} else if (board == BCSR_WHOAMI_PB1100) {
 		c0 = AU1100_GPIO11_INT;
-		d0 = AU1100_GPIO9_INT;
+		d0 = 9; /* GPIO number, NOT irq! */
 		s0 = AU1100_GPIO10_INT;
 		twosocks = 0;
 		flashsize = 64;
@@ -583,7 +583,6 @@
 	} else
 		return 0; /* unknown board, no further dev setup to do */
 
-	irq_set_irq_type(d0, IRQ_TYPE_EDGE_BOTH);
 	irq_set_irq_type(c0, IRQ_TYPE_LEVEL_LOW);
 	irq_set_irq_type(s0, IRQ_TYPE_LEVEL_LOW);
 
@@ -597,7 +596,6 @@
 		c0, d0, /*s0*/0, 0, 0);
 
 	if (twosocks) {
-		irq_set_irq_type(d1, IRQ_TYPE_EDGE_BOTH);
 		irq_set_irq_type(c1, IRQ_TYPE_LEVEL_LOW);
 		irq_set_irq_type(s1, IRQ_TYPE_LEVEL_LOW);
 

diff --git a/arch/mips/alchemy/devboards/db1550.c b/arch/mips/alchemy/devboards/db1550.c
index b518f02..1c01d6e 100644
--- a/arch/mips/alchemy/devboards/db1550.c
+++ b/arch/mips/alchemy/devboards/db1550.c

@@ -514,7 +514,7 @@
 		AU1000_PCMCIA_MEM_PHYS_ADDR  + 0x000400000 - 1,
 		AU1000_PCMCIA_IO_PHYS_ADDR,
 		AU1000_PCMCIA_IO_PHYS_ADDR   + 0x000010000 - 1,
-		AU1550_GPIO3_INT, AU1550_GPIO0_INT,
+		AU1550_GPIO3_INT, 0,
 		/*AU1550_GPIO21_INT*/0, 0, 0);
 
 	db1x_register_pcmcia_socket(
@@ -524,7 +524,7 @@
 		AU1000_PCMCIA_MEM_PHYS_ADDR  + 0x004400000 - 1,
 		AU1000_PCMCIA_IO_PHYS_ADDR   + 0x004000000,
 		AU1000_PCMCIA_IO_PHYS_ADDR   + 0x004010000 - 1,
-		AU1550_GPIO5_INT, AU1550_GPIO1_INT,
+		AU1550_GPIO5_INT, 1,
 		/*AU1550_GPIO22_INT*/0, 0, 1);
 
 	platform_device_register(&db1550_nand_dev);

diff --git a/arch/mips/ath79/clock.c b/arch/mips/ath79/clock.c
index eb5117c..618dfd7 100644
--- a/arch/mips/ath79/clock.c
+++ b/arch/mips/ath79/clock.c

@@ -26,8 +26,7 @@
 #include "common.h"
 
 #define AR71XX_BASE_FREQ	40000000
-#define AR724X_BASE_FREQ	5000000
-#define AR913X_BASE_FREQ	5000000
+#define AR724X_BASE_FREQ	40000000
 
 static struct clk *clks[3];
 static struct clk_onecell_data clk_data = {
@@ -103,8 +102,8 @@
 	div = ((pll >> AR724X_PLL_FB_SHIFT) & AR724X_PLL_FB_MASK);
 	freq = div * ref_rate;
 
-	div = ((pll >> AR724X_PLL_REF_DIV_SHIFT) & AR724X_PLL_REF_DIV_MASK);
-	freq *= div;
+	div = ((pll >> AR724X_PLL_REF_DIV_SHIFT) & AR724X_PLL_REF_DIV_MASK) * 2;
+	freq /= div;
 
 	cpu_rate = freq;
 
@@ -123,39 +122,6 @@
 	clk_add_alias("uart", NULL, "ahb", NULL);
 }
 
-static void __init ar913x_clocks_init(void)
-{
-	unsigned long ref_rate;
-	unsigned long cpu_rate;
-	unsigned long ddr_rate;
-	unsigned long ahb_rate;
-	u32 pll;
-	u32 freq;
-	u32 div;
-
-	ref_rate = AR913X_BASE_FREQ;
-	pll = ath79_pll_rr(AR913X_PLL_REG_CPU_CONFIG);
-
-	div = ((pll >> AR913X_PLL_FB_SHIFT) & AR913X_PLL_FB_MASK);
-	freq = div * ref_rate;
-
-	cpu_rate = freq;
-
-	div = ((pll >> AR913X_DDR_DIV_SHIFT) & AR913X_DDR_DIV_MASK) + 1;
-	ddr_rate = freq / div;
-
-	div = (((pll >> AR913X_AHB_DIV_SHIFT) & AR913X_AHB_DIV_MASK) + 1) * 2;
-	ahb_rate = cpu_rate / div;
-
-	ath79_add_sys_clkdev("ref", ref_rate);
-	clks[0] = ath79_add_sys_clkdev("cpu", cpu_rate);
-	clks[1] = ath79_add_sys_clkdev("ddr", ddr_rate);
-	clks[2] = ath79_add_sys_clkdev("ahb", ahb_rate);
-
-	clk_add_alias("wdt", NULL, "ahb", NULL);
-	clk_add_alias("uart", NULL, "ahb", NULL);
-}
-
 static void __init ar933x_clocks_init(void)
 {
 	unsigned long ref_rate;
@@ -443,10 +409,8 @@
 {
 	if (soc_is_ar71xx())
 		ar71xx_clocks_init();
-	else if (soc_is_ar724x())
+	else if (soc_is_ar724x() || soc_is_ar913x())
 		ar724x_clocks_init();
-	else if (soc_is_ar913x())
-		ar913x_clocks_init();
 	else if (soc_is_ar933x())
 		ar933x_clocks_init();
 	else if (soc_is_ar934x())

diff --git a/arch/mips/bcm47xx/sprom.c b/arch/mips/bcm47xx/sprom.c
index 959c145..ca7ad13 100644
--- a/arch/mips/bcm47xx/sprom.c
+++ b/arch/mips/bcm47xx/sprom.c

@@ -714,11 +714,11 @@
 {
 #if defined(CONFIG_BCM47XX_SSB)
 	if (ssb_arch_register_fallback_sprom(&bcm47xx_get_sprom_ssb))
-		pr_warn("Failed to registered ssb SPROM handler\n");
+		pr_warn("Failed to register ssb SPROM handler\n");
 #endif
 
 #if defined(CONFIG_BCM47XX_BCMA)
 	if (bcma_arch_register_fallback_sprom(&bcm47xx_get_sprom_bcma))
-		pr_warn("Failed to registered bcma SPROM handler\n");
+		pr_warn("Failed to register bcma SPROM handler\n");
 #endif
 }

diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
index 4eff1ef..309d2ad 100644
--- a/arch/mips/boot/compressed/Makefile
+++ b/arch/mips/boot/compressed/Makefile

@@ -39,10 +39,11 @@
 vmlinuzobjs-$(CONFIG_MIPS_ALCHEMY)		   += $(obj)/uart-alchemy.o
 endif
 
-vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o
+vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o $(obj)/bswapsi.o
 
-$(obj)/ashldi3.o: KBUILD_CFLAGS += -I$(srctree)/arch/mips/lib
-$(obj)/ashldi3.c: $(srctree)/arch/mips/lib/ashldi3.c
+extra-y += ashldi3.c bswapsi.c
+$(obj)/ashldi3.o $(obj)/bswapsi.o: KBUILD_CFLAGS += -I$(srctree)/arch/mips/lib
+$(obj)/ashldi3.c $(obj)/bswapsi.c: $(obj)/%.c: $(srctree)/arch/mips/lib/%.c
 	$(call cmd,shipped)
 
 targets := $(notdir $(vmlinuzobjs-y))

diff --git a/arch/mips/boot/dts/brcm/bcm7435.dtsi b/arch/mips/boot/dts/brcm/bcm7435.dtsi
index adb33e3..56035e5 100644
--- a/arch/mips/boot/dts/brcm/bcm7435.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7435.dtsi

@@ -82,7 +82,7 @@
 		};
 
 		gisb-arb@400000 {
-			compatible = "brcm,bcm7400-gisb-arb";
+			compatible = "brcm,bcm7435-gisb-arb";
 			reg = <0x400000 0xdc>;
 			native-endian;
 			interrupt-parent = <&sun_l2_intc>;

diff --git a/arch/mips/boot/dts/qca/ar9132.dtsi b/arch/mips/boot/dts/qca/ar9132.dtsi
index 3ad4ba9..3c2ed9e 100644
--- a/arch/mips/boot/dts/qca/ar9132.dtsi
+++ b/arch/mips/boot/dts/qca/ar9132.dtsi

@@ -83,7 +83,7 @@
 			};
 
 			pll: pll-controller@18050000 {
-				compatible = "qca,ar9132-ppl",
+				compatible = "qca,ar9132-pll",
 						"qca,ar9130-pll";
 				reg = <0x18050000 0x20>;
 

diff --git a/arch/mips/boot/dts/qca/ar9132_tl_wr1043nd_v1.dts b/arch/mips/boot/dts/qca/ar9132_tl_wr1043nd_v1.dts
index e535ee3..4f1540e5f 100644
--- a/arch/mips/boot/dts/qca/ar9132_tl_wr1043nd_v1.dts
+++ b/arch/mips/boot/dts/qca/ar9132_tl_wr1043nd_v1.dts

@@ -18,7 +18,7 @@
 		reg = <0x0 0x2000000>;
 	};
 
-	extosc: oscillator {
+	extosc: ref {
 		compatible = "fixed-clock";
 		#clock-cells = <0>;
 		clock-frequency = <40000000>;

diff --git a/arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c b/arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c
index e59d1b7..2f415d9 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c

@@ -68,7 +68,7 @@
 		gmx_rx_int_en.s.pause_drp = 1;
 		/* Skipping gmx_rx_int_en.s.reserved_16_18 */
 		/*gmx_rx_int_en.s.ifgerr = 1; */
-		/*gmx_rx_int_en.s.coldet = 1; // Collsion detect */
+		/*gmx_rx_int_en.s.coldet = 1; // Collision detect */
 		/*gmx_rx_int_en.s.falerr = 1; // False carrier error or extend error after slottime */
 		/*gmx_rx_int_en.s.rsverr = 1; // RGMII reserved opcodes */
 		/*gmx_rx_int_en.s.pcterr = 1; // Bad Preamble / Protocol */
@@ -89,7 +89,7 @@
 		/*gmx_rx_int_en.s.phy_spd = 1; */
 		/*gmx_rx_int_en.s.phy_link = 1; */
 		/*gmx_rx_int_en.s.ifgerr = 1; */
-		/*gmx_rx_int_en.s.coldet = 1; // Collsion detect */
+		/*gmx_rx_int_en.s.coldet = 1; // Collision detect */
 		/*gmx_rx_int_en.s.falerr = 1; // False carrier error or extend error after slottime */
 		/*gmx_rx_int_en.s.rsverr = 1; // RGMII reserved opcodes */
 		/*gmx_rx_int_en.s.pcterr = 1; // Bad Preamble / Protocol */
@@ -112,7 +112,7 @@
 		/*gmx_rx_int_en.s.phy_spd = 1; */
 		/*gmx_rx_int_en.s.phy_link = 1; */
 		/*gmx_rx_int_en.s.ifgerr = 1; */
-		/*gmx_rx_int_en.s.coldet = 1; // Collsion detect */
+		/*gmx_rx_int_en.s.coldet = 1; // Collision detect */
 		/*gmx_rx_int_en.s.falerr = 1; // False carrier error or extend error after slottime */
 		/*gmx_rx_int_en.s.rsverr = 1; // RGMII reserved opcodes */
 		/*gmx_rx_int_en.s.pcterr = 1; // Bad Preamble / Protocol */
@@ -134,7 +134,7 @@
 		/*gmx_rx_int_en.s.phy_spd = 1; */
 		/*gmx_rx_int_en.s.phy_link = 1; */
 		/*gmx_rx_int_en.s.ifgerr = 1; */
-		/*gmx_rx_int_en.s.coldet = 1; // Collsion detect */
+		/*gmx_rx_int_en.s.coldet = 1; // Collision detect */
 		/*gmx_rx_int_en.s.falerr = 1; // False carrier error or extend error after slottime */
 		/*gmx_rx_int_en.s.rsverr = 1; // RGMII reserved opcodes */
 		/*gmx_rx_int_en.s.pcterr = 1; // Bad Preamble / Protocol */
@@ -156,7 +156,7 @@
 		/*gmx_rx_int_en.s.phy_spd = 1; */
 		/*gmx_rx_int_en.s.phy_link = 1; */
 		/*gmx_rx_int_en.s.ifgerr = 1; */
-		/*gmx_rx_int_en.s.coldet = 1; // Collsion detect */
+		/*gmx_rx_int_en.s.coldet = 1; // Collision detect */
 		/*gmx_rx_int_en.s.falerr = 1; // False carrier error or extend error after slottime */
 		/*gmx_rx_int_en.s.rsverr = 1; // RGMII reserved opcodes */
 		/*gmx_rx_int_en.s.pcterr = 1; // Bad Preamble / Protocol */
@@ -179,7 +179,7 @@
 		/*gmx_rx_int_en.s.phy_spd = 1; */
 		/*gmx_rx_int_en.s.phy_link = 1; */
 		/*gmx_rx_int_en.s.ifgerr = 1; */
-		/*gmx_rx_int_en.s.coldet = 1; // Collsion detect */
+		/*gmx_rx_int_en.s.coldet = 1; // Collision detect */
 		/*gmx_rx_int_en.s.falerr = 1; // False carrier error or extend error after slottime */
 		/*gmx_rx_int_en.s.rsverr = 1; // RGMII reserved opcodes */
 		/*gmx_rx_int_en.s.pcterr = 1; // Bad Preamble / Protocol */
@@ -209,7 +209,7 @@
 		gmx_rx_int_en.s.pause_drp = 1;
 		/* Skipping gmx_rx_int_en.s.reserved_16_18 */
 		/*gmx_rx_int_en.s.ifgerr = 1; */
-		/*gmx_rx_int_en.s.coldet = 1; // Collsion detect */
+		/*gmx_rx_int_en.s.coldet = 1; // Collision detect */
 		/*gmx_rx_int_en.s.falerr = 1; // False carrier error or extend error after slottime */
 		/*gmx_rx_int_en.s.rsverr = 1; // RGMII reserved opcodes */
 		/*gmx_rx_int_en.s.pcterr = 1; // Bad Preamble / Protocol */

diff --git a/arch/mips/cavium-octeon/executive/cvmx-pko.c b/arch/mips/cavium-octeon/executive/cvmx-pko.c
index 87be167..676fab5 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-pko.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-pko.c

@@ -189,7 +189,7 @@
 	/*
 	 * Set the size of the PKO command buffers to an odd number of
 	 * 64bit words. This allows the normal two word send to stay
-	 * aligned and never span a comamnd word buffer.
+	 * aligned and never span a command word buffer.
 	 */
 	config.u64 = 0;
 	config.s.pool = CVMX_FPA_OUTPUT_BUFFER_POOL;

diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index b7fa9ae..42412ba 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c

@@ -331,7 +331,7 @@
 	}
 
 	if (!(avail_coremask & (1 << coreid))) {
-		/* core not available, assume, that catched by simple-executive */
+		/* core not available, assume, that caught by simple-executive */
 		cvmx_write_csr(CVMX_CIU_PP_RST, 1 << coreid);
 		cvmx_write_csr(CVMX_CIU_PP_RST, 0);
 	}

diff --git a/arch/mips/configs/ci20_defconfig b/arch/mips/configs/ci20_defconfig
index 4e36b6e..43e0ba2 100644
--- a/arch/mips/configs/ci20_defconfig
+++ b/arch/mips/configs/ci20_defconfig

@@ -17,13 +17,12 @@
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_CGROUPS=y
-CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CPUSETS=y
-CONFIG_CGROUP_CPUACCT=y
 CONFIG_MEMCG=y
-CONFIG_MEMCG_KMEM=y
 CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
 CONFIG_NAMESPACES=y
 CONFIG_USER_NS=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
@@ -52,6 +51,11 @@
 # CONFIG_ALLOW_DEV_COREDUMP is not set
 CONFIG_DMA_CMA=y
 CONFIG_CMA_SIZE_MBYTES=32
+CONFIG_MTD=y
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_JZ4780=y
+CONFIG_MTD_UBI=y
+CONFIG_MTD_UBI_FASTMAP=y
 CONFIG_NETDEVICES=y
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
@@ -103,7 +107,7 @@
 # CONFIG_PROC_PAGE_MONITOR is not set
 CONFIG_TMPFS=y
 CONFIG_CONFIGFS_FS=y
-# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_UBIFS_FS=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
 CONFIG_NLS=y
 CONFIG_NLS_CODEPAGE_437=y

diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S
index 8c6f508..d7b9918 100644
--- a/arch/mips/dec/int-handler.S
+++ b/arch/mips/dec/int-handler.S

@@ -5,7 +5,7 @@
  * Written by Ralf Baechle and Andreas Busse, modified for DECstation
  * support by Paul Antoine and Harald Koerfgen.
  *
- * completly rewritten:
+ * completely rewritten:
  * Copyright (C) 1998 Harald Koerfgen
  *
  * Rewritten extensively for controller-driven IRQ support

diff --git a/arch/mips/fw/arc/memory.c b/arch/mips/fw/arc/memory.c
index 5537b94..0d75b5a 100644
--- a/arch/mips/fw/arc/memory.c
+++ b/arch/mips/fw/arc/memory.c

@@ -9,7 +9,7 @@
  * PROM library functions for acquiring/using memory descriptors given to us
  * from the ARCS firmware.  This is only used when CONFIG_ARC_MEMORY is set
  * because on some machines like SGI IP27 the ARC memory configuration data
- * completly bogus and alternate easier to use mechanisms are available.
+ * completely bogus and alternate easier to use mechanisms are available.
  */
 #include <linux/init.h>
 #include <linux/kernel.h>

diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index e7dc785..af12c1f 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h

@@ -102,7 +102,7 @@
 extern void cpu_report(void);
 
 extern const char *__cpu_name[];
-#define cpu_name_string()	__cpu_name[smp_processor_id()]
+#define cpu_name_string()	__cpu_name[raw_smp_processor_id()]
 
 struct seq_file;
 struct notifier_block;

diff --git a/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h b/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h
index cf92fe7..c4873e8 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h

@@ -141,7 +141,7 @@
 .endm
 
 /*
- * Do SMP slave processor setup necessary before we can savely execute C code.
+ * Do SMP slave processor setup necessary before we can safely execute C code.
  */
 	.macro	smp_slave_setup
 	.endm

diff --git a/arch/mips/include/asm/mach-generic/kernel-entry-init.h b/arch/mips/include/asm/mach-generic/kernel-entry-init.h
index 13b0751..a229297 100644
--- a/arch/mips/include/asm/mach-generic/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-generic/kernel-entry-init.h

@@ -16,7 +16,7 @@
 	.endm
 
 /*
- * Do SMP slave processor setup necessary before we can savely execute C code.
+ * Do SMP slave processor setup necessary before we can safely execute C code.
  */
 	.macro	smp_slave_setup
 	.endm

diff --git a/arch/mips/include/asm/mach-ip27/irq.h b/arch/mips/include/asm/mach-ip27/irq.h
index cf4384b..b0b7261 100644
--- a/arch/mips/include/asm/mach-ip27/irq.h
+++ b/arch/mips/include/asm/mach-ip27/irq.h

@@ -11,7 +11,7 @@
 #define __ASM_MACH_IP27_IRQ_H
 
 /*
- * A hardwired interrupt number is completly stupid for this system - a
+ * A hardwired interrupt number is completely stupid for this system - a
  * large configuration might have thousands if not tenthousands of
  * interrupts.
  */

diff --git a/arch/mips/include/asm/mach-ip27/kernel-entry-init.h b/arch/mips/include/asm/mach-ip27/kernel-entry-init.h
index b087cb8..f992c1d 100644
--- a/arch/mips/include/asm/mach-ip27/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-ip27/kernel-entry-init.h

@@ -81,7 +81,7 @@
 	.endm
 
 /*
- * Do SMP slave processor setup necessary before we can savely execute C code.
+ * Do SMP slave processor setup necessary before we can safely execute C code.
  */
 	.macro	smp_slave_setup
 	GET_NASID_ASM	t1

diff --git a/arch/mips/include/asm/mach-jz4740/gpio.h b/arch/mips/include/asm/mach-jz4740/gpio.h
index bf8c3e1..7c7708a 100644
--- a/arch/mips/include/asm/mach-jz4740/gpio.h
+++ b/arch/mips/include/asm/mach-jz4740/gpio.h

@@ -27,7 +27,7 @@
 
 /*
  Usually a driver for a SoC component has to request several gpio pins and
- configure them as funcion pins.
+ configure them as function pins.
  jz_gpio_bulk_request can be used to ease this process.
  Usually one would do something like:
 

diff --git a/arch/mips/include/asm/mach-jz4740/jz4740_nand.h b/arch/mips/include/asm/mach-jz4740/jz4740_nand.h
index 79cff26..398733e 100644
--- a/arch/mips/include/asm/mach-jz4740/jz4740_nand.h
+++ b/arch/mips/include/asm/mach-jz4740/jz4740_nand.h

@@ -25,8 +25,6 @@
 	int			num_partitions;
 	struct mtd_partition	*partitions;
 
-	struct nand_ecclayout	*ecc_layout;
-
 	unsigned char banks[JZ_NAND_NUM_BANKS];
 
 	void (*ident_callback)(struct platform_device *, struct nand_chip *,

diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h
index b196825..d463539 100644
--- a/arch/mips/include/asm/mips-cm.h
+++ b/arch/mips/include/asm/mips-cm.h

@@ -28,7 +28,7 @@
  * This function returns the physical base address of the Coherence Manager
  * global control block, or 0 if no Coherence Manager is present. It provides
  * a default implementation which reads the CMGCRBase register where available,
- * and may be overriden by platforms which determine this address in a
+ * and may be overridden by platforms which determine this address in a
  * different way by defining a function with the same prototype except for the
  * name mips_cm_phys_base (without underscores).
  */

diff --git a/arch/mips/include/asm/mips-r2-to-r6-emul.h b/arch/mips/include/asm/mips-r2-to-r6-emul.h
index 1f6ea83..20621e1 100644
--- a/arch/mips/include/asm/mips-r2-to-r6-emul.h
+++ b/arch/mips/include/asm/mips-r2-to-r6-emul.h

@@ -79,7 +79,7 @@
 };
 
 
-extern void do_trap_or_bp(struct pt_regs *regs, unsigned int code,
+extern void do_trap_or_bp(struct pt_regs *regs, unsigned int code, int si_code,
 			  const char *str);
 
 #ifndef CONFIG_MIPSR2_TO_R6_EMULATOR

diff --git a/arch/mips/include/asm/octeon/cvmx-config.h b/arch/mips/include/asm/octeon/cvmx-config.h
index f7dd17d..f4f1996 100644
--- a/arch/mips/include/asm/octeon/cvmx-config.h
+++ b/arch/mips/include/asm/octeon/cvmx-config.h

@@ -33,7 +33,7 @@
 /* Packet buffers */
 #define CVMX_FPA_PACKET_POOL		    (0)
 #define CVMX_FPA_PACKET_POOL_SIZE	    CVMX_FPA_POOL_0_SIZE
-/* Work queue entrys */
+/* Work queue entries */
 #define CVMX_FPA_WQE_POOL		    (1)
 #define CVMX_FPA_WQE_POOL_SIZE		    CVMX_FPA_POOL_1_SIZE
 /* PKO queue command buffers */

diff --git a/arch/mips/include/asm/octeon/cvmx.h b/arch/mips/include/asm/octeon/cvmx.h
index 19e139c..3e982e0 100644
--- a/arch/mips/include/asm/octeon/cvmx.h
+++ b/arch/mips/include/asm/octeon/cvmx.h

@@ -189,7 +189,7 @@
 static inline void *cvmx_phys_to_ptr(uint64_t physical_address)
 {
 	if (sizeof(void *) == 8) {
-		/* Just set the top bit, avoiding any TLB uglyness */
+		/* Just set the top bit, avoiding any TLB ugliness */
 		return CASTPTR(void,
 			       CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
 					    physical_address));

diff --git a/arch/mips/include/asm/pci/bridge.h b/arch/mips/include/asm/pci/bridge.h
index 8d7a63b..3206245 100644
--- a/arch/mips/include/asm/pci/bridge.h
+++ b/arch/mips/include/asm/pci/bridge.h

@@ -269,16 +269,16 @@
 	union {
 		u32		cmd_word;
 		struct {
-			u32	didn:4,		/* Destination ID */
-				sidn:4,		/* Source ID	  */
-				pactyp:4,	/* Packet type	  */
-				tnum:5,		/* Trans Number	  */
-				coh:1,		/* Coh Transacti  */
-				ds:2,		/* Data size	  */
-				gbr:1,		/* GBR enable	  */
-				vbpm:1,		/* VBPM message	  */
+			u32	didn:4,		/* Destination ID  */
+				sidn:4,		/* Source ID	   */
+				pactyp:4,	/* Packet type	   */
+				tnum:5,		/* Trans Number	   */
+				coh:1,		/* Coh Transaction */
+				ds:2,		/* Data size	   */
+				gbr:1,		/* GBR enable	   */
+				vbpm:1,		/* VBPM message	   */
 				error:1,	/* Error occurred  */
-				barr:1,		/* Barrier op	  */
+				barr:1,		/* Barrier op	   */
 				rsvd:8;
 		} berr_st;
 	} berr_un;

diff --git a/arch/mips/include/asm/sgi/hpc3.h b/arch/mips/include/asm/sgi/hpc3.h
index 59920b3..4a9c990 100644
--- a/arch/mips/include/asm/sgi/hpc3.h
+++ b/arch/mips/include/asm/sgi/hpc3.h

@@ -147,7 +147,7 @@
 #define HPC3_EPCFG_P1	 0x000f /* Cycles to spend in P1 state for PIO */
 #define HPC3_EPCFG_P2	 0x00f0 /* Cycles to spend in P2 state for PIO */
 #define HPC3_EPCFG_P3	 0x0f00 /* Cycles to spend in P3 state for PIO */
-#define HPC3_EPCFG_TST	 0x1000 /* Diagnistic ram test feature bit */
+#define HPC3_EPCFG_TST	 0x1000 /* Diagnostic ram test feature bit */
 
 	u32 _unused2[0x1000/4 - 8];	/* padding */
 

diff --git a/arch/mips/include/asm/sgiarcs.h b/arch/mips/include/asm/sgiarcs.h
index 26ddfff..105a947 100644
--- a/arch/mips/include/asm/sgiarcs.h
+++ b/arch/mips/include/asm/sgiarcs.h

@@ -144,7 +144,7 @@
 struct linux_vdirent {
 	ULONG namelen;
 	unsigned char attr;
-	char fname[32]; /* XXX imperical, should be a define */
+	char fname[32]; /* XXX empirical, should be a define */
 };
 
 /* Other stuff for files. */
@@ -179,7 +179,7 @@
 	enum linux_devtypes   dtype;
 	unsigned long	      namelen;
 	unsigned char	      attr;
-	char		      name[32]; /* XXX imperical, should be define */
+	char		      name[32]; /* XXX empirical, should be define */
 };
 
 /* This describes the vector containing function pointers to the ARC

diff --git a/arch/mips/include/asm/sn/ioc3.h b/arch/mips/include/asm/sn/ioc3.h
index e33f036..feb3851 100644
--- a/arch/mips/include/asm/sn/ioc3.h
+++ b/arch/mips/include/asm/sn/ioc3.h

@@ -355,7 +355,7 @@
 #define SSCR_PAUSE_STATE 0x40000000	/* sets when PAUSE takes effect */
 #define SSCR_RESET	0x80000000	/* reset DMA channels */
 
-/* all producer/comsumer pointers are the same bitfield */
+/* all producer/consumer pointers are the same bitfield */
 #define PROD_CONS_PTR_4K 0x00000ff8	/* for 4K buffers */
 #define PROD_CONS_PTR_1K 0x000003f8	/* for 1K buffers */
 #define PROD_CONS_PTR_OFF 3

diff --git a/arch/mips/include/asm/sn/sn0/hubio.h b/arch/mips/include/asm/sn/sn0/hubio.h
index 5998b13..57ece90 100644
--- a/arch/mips/include/asm/sn/sn0/hubio.h
+++ b/arch/mips/include/asm/sn/sn0/hubio.h

@@ -628,7 +628,7 @@
 /*
  * Values for field imsgtype
  */
-#define IIO_ICRB_IMSGT_XTALK	0	/* Incoming Meessage from Xtalk */
+#define IIO_ICRB_IMSGT_XTALK	0	/* Incoming Message from Xtalk */
 #define IIO_ICRB_IMSGT_BTE	1	/* Incoming message from BTE	*/
 #define IIO_ICRB_IMSGT_SN0NET	2	/* Incoming message from SN0 net */
 #define IIO_ICRB_IMSGT_CRB	3	/* Incoming message from CRB ???  */

diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h
index 095ecaf..7f109d4 100644
--- a/arch/mips/include/asm/uaccess.h
+++ b/arch/mips/include/asm/uaccess.h

@@ -95,7 +95,7 @@
 }
 
 /*
- * Is a address valid? This does a straighforward calculation rather
+ * Is a address valid? This does a straightforward calculation rather
  * than tests.
  *
  * Address valid if:

diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h
index 3129795..24ad815 100644
--- a/arch/mips/include/uapi/asm/unistd.h
+++ b/arch/mips/include/uapi/asm/unistd.h

@@ -381,16 +381,18 @@
 #define __NR_membarrier			(__NR_Linux + 358)
 #define __NR_mlock2			(__NR_Linux + 359)
 #define __NR_copy_file_range		(__NR_Linux + 360)
+#define __NR_preadv2			(__NR_Linux + 361)
+#define __NR_pwritev2			(__NR_Linux + 362)
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls		360
+#define __NR_Linux_syscalls		362
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux			4000
-#define __NR_O32_Linux_syscalls		360
+#define __NR_O32_Linux_syscalls		362
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
@@ -719,16 +721,18 @@
 #define __NR_membarrier			(__NR_Linux + 318)
 #define __NR_mlock2			(__NR_Linux + 319)
 #define __NR_copy_file_range		(__NR_Linux + 320)
+#define __NR_preadv2			(__NR_Linux + 321)
+#define __NR_pwritev2			(__NR_Linux + 322)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls		320
+#define __NR_Linux_syscalls		322
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux			5000
-#define __NR_64_Linux_syscalls		320
+#define __NR_64_Linux_syscalls		322
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
@@ -1061,15 +1065,17 @@
 #define __NR_membarrier			(__NR_Linux + 322)
 #define __NR_mlock2			(__NR_Linux + 323)
 #define __NR_copy_file_range		(__NR_Linux + 324)
+#define __NR_preadv2			(__NR_Linux + 325)
+#define __NR_pwritev2			(__NR_Linux + 326)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls		324
+#define __NR_Linux_syscalls		326
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux			6000
-#define __NR_N32_Linux_syscalls		324
+#define __NR_N32_Linux_syscalls		326
 
 #endif /* _UAPI_ASM_UNISTD_H */

diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c
index 1448c1f..760217b 100644
--- a/arch/mips/kernel/mips-cm.c
+++ b/arch/mips/kernel/mips-cm.c

@@ -24,7 +24,7 @@
 	"0x04", "cpc", "0x06", "0x07"
 };
 
-/* CM3 Tag ECC transation type */
+/* CM3 Tag ECC transaction type */
 static char *cm3_tr[16] = {
 	[0x0] = "ReqNoData",
 	[0x1] = "0x1",

diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
index 1f5aac7..3fff89a 100644
--- a/arch/mips/kernel/mips-r2-to-r6-emul.c
+++ b/arch/mips/kernel/mips-r2-to-r6-emul.c

@@ -940,42 +940,42 @@
 		switch (rt) {
 		case tgei_op:
 			if ((long)regs->regs[rs] >= MIPSInst_SIMM(inst))
-				do_trap_or_bp(regs, 0, "TGEI");
+				do_trap_or_bp(regs, 0, 0, "TGEI");
 
 			MIPS_R2_STATS(traps);
 
 			break;
 		case tgeiu_op:
 			if (regs->regs[rs] >= MIPSInst_UIMM(inst))
-				do_trap_or_bp(regs, 0, "TGEIU");
+				do_trap_or_bp(regs, 0, 0, "TGEIU");
 
 			MIPS_R2_STATS(traps);
 
 			break;
 		case tlti_op:
 			if ((long)regs->regs[rs] < MIPSInst_SIMM(inst))
-				do_trap_or_bp(regs, 0, "TLTI");
+				do_trap_or_bp(regs, 0, 0, "TLTI");
 
 			MIPS_R2_STATS(traps);
 
 			break;
 		case tltiu_op:
 			if (regs->regs[rs] < MIPSInst_UIMM(inst))
-				do_trap_or_bp(regs, 0, "TLTIU");
+				do_trap_or_bp(regs, 0, 0, "TLTIU");
 
 			MIPS_R2_STATS(traps);
 
 			break;
 		case teqi_op:
 			if (regs->regs[rs] == MIPSInst_SIMM(inst))
-				do_trap_or_bp(regs, 0, "TEQI");
+				do_trap_or_bp(regs, 0, 0, "TEQI");
 
 			MIPS_R2_STATS(traps);
 
 			break;
 		case tnei_op:
 			if (regs->regs[rs] != MIPSInst_SIMM(inst))
-				do_trap_or_bp(regs, 0, "TNEI");
+				do_trap_or_bp(regs, 0, 0, "TNEI");
 
 			MIPS_R2_STATS(traps);
 

diff --git a/arch/mips/kernel/module-rela.c b/arch/mips/kernel/module-rela.c
index 2b70723..9083d63 100644
--- a/arch/mips/kernel/module-rela.c
+++ b/arch/mips/kernel/module-rela.c

@@ -109,9 +109,10 @@
 		       struct module *me)
 {
 	Elf_Mips_Rela *rel = (void *) sechdrs[relsec].sh_addr;
+	int (*handler)(struct module *me, u32 *location, Elf_Addr v);
 	Elf_Sym *sym;
 	u32 *location;
-	unsigned int i;
+	unsigned int i, type;
 	Elf_Addr v;
 	int res;
 
@@ -134,9 +135,21 @@
 			return -ENOENT;
 		}
 
-		v = sym->st_value + rel[i].r_addend;
+		type = ELF_MIPS_R_TYPE(rel[i]);
 
-		res = reloc_handlers_rela[ELF_MIPS_R_TYPE(rel[i])](me, location, v);
+		if (type < ARRAY_SIZE(reloc_handlers_rela))
+			handler = reloc_handlers_rela[type];
+		else
+			handler = NULL;
+
+		if (!handler) {
+			pr_err("%s: Unknown relocation type %u\n",
+			       me->name, type);
+			return -EINVAL;
+		}
+
+		v = sym->st_value + rel[i].r_addend;
+		res = handler(me, location, v);
 		if (res)
 			return res;
 	}

diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 1833f51..f9b2936 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c

@@ -197,9 +197,10 @@
 		   struct module *me)
 {
 	Elf_Mips_Rel *rel = (void *) sechdrs[relsec].sh_addr;
+	int (*handler)(struct module *me, u32 *location, Elf_Addr v);
 	Elf_Sym *sym;
 	u32 *location;
-	unsigned int i;
+	unsigned int i, type;
 	Elf_Addr v;
 	int res;
 
@@ -223,9 +224,21 @@
 			return -ENOENT;
 		}
 
-		v = sym->st_value;
+		type = ELF_MIPS_R_TYPE(rel[i]);
 
-		res = reloc_handlers_rel[ELF_MIPS_R_TYPE(rel[i])](me, location, v);
+		if (type < ARRAY_SIZE(reloc_handlers_rel))
+			handler = reloc_handlers_rel[type];
+		else
+			handler = NULL;
+
+		if (!handler) {
+			pr_err("%s: Unknown relocation type %u\n",
+			       me->name, type);
+			return -EINVAL;
+		}
+
+		v = sym->st_value;
+		res = handler(me, location, v);
 		if (res)
 			return res;
 	}

diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index d7b8dd4..9bc1191 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c

@@ -530,7 +530,7 @@
 
 /*
  * MIPS performance counters can be per-TC. The control registers can
- * not be directly accessed accross CPUs. Hence if we want to do global
+ * not be directly accessed across CPUs. Hence if we want to do global
  * control, we need cross CPU calls. on_each_cpu() can help us, but we
  * can not make sure this function is called with interrupts enabled. So
  * here we pause local counters and then grab a rwlock and leave the

diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c
index f63a289..fa3f9eb 100644
--- a/arch/mips/kernel/pm-cps.c
+++ b/arch/mips/kernel/pm-cps.c

@@ -472,7 +472,7 @@
 	/*
 	 * Disable all but self interventions. The load from COHCTL is defined
 	 * by the interAptiv & proAptiv SUMs as ensuring that the operation
-	 * resulting from the preceeding store is complete.
+	 * resulting from the preceding store is complete.
 	 */
 	uasm_i_addiu(&p, t0, zero, 1 << cpu_data[cpu].core);
 	uasm_i_sw(&p, t0, 0, r_pcohctl);

diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index eddd5fd..92880ce 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c

@@ -615,7 +615,7 @@
 	 * allows us to only worry about whether an FP mode switch is in
 	 * progress when FP is first used in a tasks time slice. Pretty much all
 	 * of the mode switch overhead can thus be confined to cases where mode
-	 * switches are actually occuring. That is, to here. However for the
+	 * switches are actually occurring. That is, to here. However for the
 	 * thread performing the mode switch it may take a while...
 	 */
 	if (num_online_cpus() > 1) {

diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index a563174..d01fe53 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S

@@ -596,3 +596,5 @@
 	PTR	sys_membarrier
 	PTR	sys_mlock2
 	PTR	sys_copy_file_range		/* 4360 */
+	PTR	sys_preadv2
+	PTR	sys_pwritev2

diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 2b2dc14..6b73ecc 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S

@@ -434,4 +434,6 @@
 	PTR	sys_membarrier
 	PTR	sys_mlock2
 	PTR	sys_copy_file_range		/* 5320 */
+	PTR	sys_preadv2
+	PTR	sys_pwritev2
 	.size	sys_call_table,.-sys_call_table

diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 2bf5c85..71f99d5 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S

@@ -424,4 +424,6 @@
 	PTR	sys_membarrier
 	PTR	sys_mlock2
 	PTR	sys_copy_file_range
+	PTR	compat_sys_preadv2		/* 6325 */
+	PTR	compat_sys_pwritev2
 	.size	sysn32_call_table,.-sysn32_call_table

diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index c5b759e..91b43ee 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S

@@ -579,4 +579,6 @@
 	PTR	sys_membarrier
 	PTR	sys_mlock2
 	PTR	sys_copy_file_range		/* 4360 */
+	PTR	compat_sys_preadv2
+	PTR	compat_sys_pwritev2
 	.size	sys32_call_table,.-sys32_call_table

diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 37708d9..27cb638 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c

@@ -243,6 +243,18 @@
 	struct irq_domain *ipidomain;
 	struct device_node *node;
 
+	/*
+	 * In some cases like qemu-malta, it is desired to try SMP with
+	 * a single core. Qemu-malta has no GIC, so an attempt to set any IPIs
+	 * would cause a BUG_ON() to be triggered since there's no ipidomain.
+	 *
+	 * Since for a single core system IPIs aren't required really, skip the
+	 * initialisation which should generally keep any such configurations
+	 * happy and only fail hard when trying to truely run SMP.
+	 */
+	if (cpumask_weight(cpu_possible_mask) == 1)
+		return 0;
+
 	node = of_irq_find_parent(of_root);
 	ipidomain = irq_find_matching_host(node, DOMAIN_BUS_IPI);
 

diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index bf14da9..ae0c89d 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c

@@ -56,6 +56,7 @@
 #include <asm/pgtable.h>
 #include <asm/ptrace.h>
 #include <asm/sections.h>
+#include <asm/siginfo.h>
 #include <asm/tlbdebug.h>
 #include <asm/traps.h>
 #include <asm/uaccess.h>
@@ -871,7 +872,7 @@
 	exception_exit(prev_state);
 }
 
-void do_trap_or_bp(struct pt_regs *regs, unsigned int code,
+void do_trap_or_bp(struct pt_regs *regs, unsigned int code, int si_code,
 	const char *str)
 {
 	siginfo_t info = { 0 };
@@ -928,7 +929,13 @@
 	default:
 		scnprintf(b, sizeof(b), "%s instruction in kernel code", str);
 		die_if_kernel(b, regs);
-		force_sig(SIGTRAP, current);
+		if (si_code) {
+			info.si_signo = SIGTRAP;
+			info.si_code = si_code;
+			force_sig_info(SIGTRAP, &info, current);
+		} else {
+			force_sig(SIGTRAP, current);
+		}
 	}
 }
 
@@ -1012,7 +1019,7 @@
 		break;
 	}
 
-	do_trap_or_bp(regs, bcode, "Break");
+	do_trap_or_bp(regs, bcode, TRAP_BRKPT, "Break");
 
 out:
 	set_fs(seg);
@@ -1054,7 +1061,7 @@
 			tcode = (opcode >> 6) & ((1 << 10) - 1);
 	}
 
-	do_trap_or_bp(regs, tcode, "Trap");
+	do_trap_or_bp(regs, tcode, 0, "Trap");
 
 out:
 	set_fs(seg);
@@ -1115,19 +1122,7 @@
 	if (unlikely(compute_return_epc(regs) < 0))
 		goto out;
 
-	if (get_isa16_mode(regs->cp0_epc)) {
-		unsigned short mmop[2] = { 0 };
-
-		if (unlikely(get_user(mmop[0], (u16 __user *)epc + 0) < 0))
-			status = SIGSEGV;
-		if (unlikely(get_user(mmop[1], (u16 __user *)epc + 1) < 0))
-			status = SIGSEGV;
-		opcode = mmop[0];
-		opcode = (opcode << 16) | mmop[1];
-
-		if (status < 0)
-			status = simulate_rdhwr_mm(regs, opcode);
-	} else {
+	if (!get_isa16_mode(regs->cp0_epc)) {
 		if (unlikely(get_user(opcode, epc) < 0))
 			status = SIGSEGV;
 
@@ -1142,6 +1137,18 @@
 
 		if (status < 0)
 			status = simulate_fp(regs, opcode, old_epc, old31);
+	} else if (cpu_has_mmips) {
+		unsigned short mmop[2] = { 0 };
+
+		if (unlikely(get_user(mmop[0], (u16 __user *)epc + 0) < 0))
+			status = SIGSEGV;
+		if (unlikely(get_user(mmop[1], (u16 __user *)epc + 1) < 0))
+			status = SIGSEGV;
+		opcode = mmop[0];
+		opcode = (opcode << 16) | mmop[1];
+
+		if (status < 0)
+			status = simulate_rdhwr_mm(regs, opcode);
 	}
 
 	if (status < 0)
@@ -1492,6 +1499,7 @@
  */
 asmlinkage void do_watch(struct pt_regs *regs)
 {
+	siginfo_t info = { .si_signo = SIGTRAP, .si_code = TRAP_HWBKPT };
 	enum ctx_state prev_state;
 	u32 cause;
 
@@ -1512,7 +1520,7 @@
 	if (test_tsk_thread_flag(current, TIF_LOAD_WATCH)) {
 		mips_read_watch_registers();
 		local_irq_enable();
-		force_sig(SIGTRAP, current);
+		force_sig_info(SIGTRAP, &info, current);
 	} else {
 		mips_clear_watch_registers();
 		local_irq_enable();
@@ -2214,7 +2222,7 @@
 
 	/*
 	 * Copy the generic exception handlers to their final destination.
-	 * This will be overriden later as suitable for a particular
+	 * This will be overridden later as suitable for a particular
 	 * configuration.
 	 */
 	set_handler(0x180, &except_vec3_generic, 0x80);

diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index 490cea5..5c62065 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c

@@ -885,7 +885,7 @@
 {
 	union mips_instruction insn;
 	unsigned long value;
-	unsigned int res;
+	unsigned int res, preempted;
 	unsigned long origpc;
 	unsigned long orig31;
 	void __user *fault_addr = NULL;
@@ -1226,27 +1226,36 @@
 			if (!access_ok(VERIFY_READ, addr, sizeof(*fpr)))
 				goto sigbus;
 
-			/*
-			 * Disable preemption to avoid a race between copying
-			 * state from userland, migrating to another CPU and
-			 * updating the hardware vector register below.
-			 */
-			preempt_disable();
+			do {
+				/*
+				 * If we have live MSA context keep track of
+				 * whether we get preempted in order to avoid
+				 * the register context we load being clobbered
+				 * by the live context as it's saved during
+				 * preemption. If we don't have live context
+				 * then it can't be saved to clobber the value
+				 * we load.
+				 */
+				preempted = test_thread_flag(TIF_USEDMSA);
 
-			res = __copy_from_user_inatomic(fpr, addr,
-							sizeof(*fpr));
-			if (res)
-				goto fault;
+				res = __copy_from_user_inatomic(fpr, addr,
+								sizeof(*fpr));
+				if (res)
+					goto fault;
 
-			/*
-			 * Update the hardware register if it is in use by the
-			 * task in this quantum, in order to avoid having to
-			 * save & restore the whole vector context.
-			 */
-			if (test_thread_flag(TIF_USEDMSA))
-				write_msa_wr(wd, fpr, df);
-
-			preempt_enable();
+				/*
+				 * Update the hardware register if it is in use
+				 * by the task in this quantum, in order to
+				 * avoid having to save & restore the whole
+				 * vector context.
+				 */
+				preempt_disable();
+				if (test_thread_flag(TIF_USEDMSA)) {
+					write_msa_wr(wd, fpr, df);
+					preempted = 0;
+				}
+				preempt_enable();
+			} while (preempted);
 			break;
 
 		case msa_st_op:

diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 0a93e83..54d653e 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S

@@ -58,6 +58,7 @@
 		LOCK_TEXT
 		KPROBES_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		*(.text.*)
 		*(.fixup)
 		*(.gnu.warning)

diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
index a08c439..e0e1d0a 100644
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c

@@ -632,7 +632,7 @@
 
 	kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu);
 
-	/* Alocate new kernel and user ASIDs if needed */
+	/* Allocate new kernel and user ASIDs if needed */
 
 	local_irq_save(flags);
 

diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
index ad98800..c4038d2 100644
--- a/arch/mips/kvm/trap_emul.c
+++ b/arch/mips/kvm/trap_emul.c

@@ -500,7 +500,7 @@
 	kvm_write_c0_guest_config7(cop0, (MIPS_CONF7_WII) | (1 << 10));
 
 	/*
-	 * Setup IntCtl defaults, compatibilty mode for timer interrupts (HW5)
+	 * Setup IntCtl defaults, compatibility mode for timer interrupts (HW5)
 	 */
 	kvm_write_c0_guest_intctl(cop0, 0xFC000000);
 

diff --git a/arch/mips/math-emu/ieee754dp.c b/arch/mips/math-emu/ieee754dp.c
index ad3c734..47d26c8 100644
--- a/arch/mips/math-emu/ieee754dp.c
+++ b/arch/mips/math-emu/ieee754dp.c

@@ -97,7 +97,7 @@
 {
 	assert(xm);		/* we don't gen exact zeros (probably should) */
 
-	assert((xm >> (DP_FBITS + 1 + 3)) == 0);	/* no execess */
+	assert((xm >> (DP_FBITS + 1 + 3)) == 0);	/* no excess */
 	assert(xm & (DP_HIDDEN_BIT << 3));
 
 	if (xe < DP_EMIN) {
@@ -165,7 +165,7 @@
 	/* strip grs bits */
 	xm >>= 3;
 
-	assert((xm >> (DP_FBITS + 1)) == 0);	/* no execess */
+	assert((xm >> (DP_FBITS + 1)) == 0);	/* no excess */
 	assert(xe >= DP_EMIN);
 
 	if (xe > DP_EMAX) {
@@ -198,7 +198,7 @@
 			ieee754_setcx(IEEE754_UNDERFLOW);
 		return builddp(sn, DP_EMIN - 1 + DP_EBIAS, xm);
 	} else {
-		assert((xm >> (DP_FBITS + 1)) == 0);	/* no execess */
+		assert((xm >> (DP_FBITS + 1)) == 0);	/* no excess */
 		assert(xm & DP_HIDDEN_BIT);
 
 		return builddp(sn, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT);

diff --git a/arch/mips/math-emu/ieee754sp.c b/arch/mips/math-emu/ieee754sp.c
index def00ff..e0b2c45 100644
--- a/arch/mips/math-emu/ieee754sp.c
+++ b/arch/mips/math-emu/ieee754sp.c

@@ -97,7 +97,7 @@
 {
 	assert(xm);		/* we don't gen exact zeros (probably should) */
 
-	assert((xm >> (SP_FBITS + 1 + 3)) == 0);	/* no execess */
+	assert((xm >> (SP_FBITS + 1 + 3)) == 0);	/* no excess */
 	assert(xm & (SP_HIDDEN_BIT << 3));
 
 	if (xe < SP_EMIN) {
@@ -163,7 +163,7 @@
 	/* strip grs bits */
 	xm >>= 3;
 
-	assert((xm >> (SP_FBITS + 1)) == 0);	/* no execess */
+	assert((xm >> (SP_FBITS + 1)) == 0);	/* no excess */
 	assert(xe >= SP_EMIN);
 
 	if (xe > SP_EMAX) {
@@ -196,7 +196,7 @@
 			ieee754_setcx(IEEE754_UNDERFLOW);
 		return buildsp(sn, SP_EMIN - 1 + SP_EBIAS, xm);
 	} else {
-		assert((xm >> (SP_FBITS + 1)) == 0);	/* no execess */
+		assert((xm >> (SP_FBITS + 1)) == 0);	/* no excess */
 		assert(xm & SP_HIDDEN_BIT);
 
 		return buildsp(sn, xe + SP_EBIAS, xm & ~SP_HIDDEN_BIT);

diff --git a/arch/mips/mm/sc-ip22.c b/arch/mips/mm/sc-ip22.c
index dc7c5a5..026cb59 100644
--- a/arch/mips/mm/sc-ip22.c
+++ b/arch/mips/mm/sc-ip22.c

@@ -158,7 +158,7 @@
 	return 1;
 }
 
-/* XXX Check with wje if the Indy caches can differenciate between
+/* XXX Check with wje if the Indy caches can differentiate between
    writeback + invalidate and just invalidate.	*/
 static struct bcache_ops indy_sc_ops = {
 	.bc_enable = indy_sc_enable,

diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index 5037d58..c17d762 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c

@@ -19,6 +19,7 @@
 #include <asm/cpu.h>
 #include <asm/cpu-type.h>
 #include <asm/bootinfo.h>
+#include <asm/hazards.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/tlb.h>
@@ -486,6 +487,10 @@
 	 *     be set to fixed-size pages.
 	 */
 	write_c0_pagemask(PM_DEFAULT_MASK);
+	back_to_back_c0_hazard();
+	if (read_c0_pagemask() != PM_DEFAULT_MASK)
+		panic("MMU doesn't support PAGE_SIZE=0x%lx", PAGE_SIZE);
+
 	write_c0_wired(0);
 	if (current_cpu_type() == CPU_R10000 ||
 	    current_cpu_type() == CPU_R12000 ||

diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 5a04b6f..84c6e3f 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c

@@ -12,7 +12,7 @@
  * Copyright (C) 2011  MIPS Technologies, Inc.
  *
  * ... and the days got worse and worse and now you see
- * I've gone completly out of my mind.
+ * I've gone completely out of my mind.
  *
  * They're coming to take me a away haha
  * they're coming to take me a away hoho hihi haha

diff --git a/arch/mips/pic32/Kconfig b/arch/mips/pic32/Kconfig
index fde56a8..1985971 100644
--- a/arch/mips/pic32/Kconfig
+++ b/arch/mips/pic32/Kconfig

@@ -15,7 +15,6 @@
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_LITTLE_ENDIAN
 	select ARCH_REQUIRE_GPIOLIB
-	select HAVE_MACH_CLKDEV
 	select COMMON_CLK
 	select CLKDEV_LOOKUP
 	select LIBFDT

diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index 8d0eb26..f1f8829 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c

@@ -7,7 +7,7 @@
  * Copyright (C) 2000 by Silicon Graphics, Inc.
  * Copyright (C) 2004 by Christoph Hellwig
  *
- * On SGI IP27 the ARC memory configuration data is completly bogus but
+ * On SGI IP27 the ARC memory configuration data is completely bogus but
  * alternate easier to use mechanisms are available.
  */
 #include <linux/init.h>

diff --git a/arch/nios2/kernel/prom.c b/arch/nios2/kernel/prom.c
index 718dd19..367c542 100644
--- a/arch/nios2/kernel/prom.c
+++ b/arch/nios2/kernel/prom.c

@@ -97,8 +97,7 @@
 		return 0;
 #endif
 
-	*addr64 = fdt_translate_address((const void *)initial_boot_params,
-		node);
+	*addr64 = of_flat_dt_translate_address(node);
 
 	return *addr64 == OF_BAD_ADDR ? 0 : 1;
 }

diff --git a/arch/nios2/kernel/vmlinux.lds.S b/arch/nios2/kernel/vmlinux.lds.S
index 326fab4..e23e895 100644
--- a/arch/nios2/kernel/vmlinux.lds.S
+++ b/arch/nios2/kernel/vmlinux.lds.S

@@ -39,6 +39,7 @@
 		SCHED_TEXT
 		LOCK_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		KPROBES_TEXT
 	} =0
 	_etext = .;

diff --git a/arch/openrisc/include/asm/page.h b/arch/openrisc/include/asm/page.h
index 108906f..e613d36 100644
--- a/arch/openrisc/include/asm/page.h
+++ b/arch/openrisc/include/asm/page.h

@@ -40,9 +40,6 @@
 
 #ifndef __ASSEMBLY__
 
-#define get_user_page(vaddr)            __get_free_page(GFP_KERNEL)
-#define free_user_page(page, addr)      free_page(addr)
-
 #define clear_page(page)	memset((page), 0, PAGE_SIZE)
 #define copy_page(to, from)	memcpy((to), (from), PAGE_SIZE)
 

diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S
index 2d69a85..d936de4 100644
--- a/arch/openrisc/kernel/vmlinux.lds.S
+++ b/arch/openrisc/kernel/vmlinux.lds.S

@@ -50,6 +50,7 @@
 	  LOCK_TEXT
 	  KPROBES_TEXT
 	  IRQENTRY_TEXT
+	  SOFTIRQENTRY_TEXT
 	  *(.fixup)
 	  *(.text.__*)
 	  _etext = .;

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 14f655c..bd3c873 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig

@@ -11,6 +11,7 @@
 	select RTC_DRV_GENERIC
 	select INIT_ALL_POSSIBLE
 	select BUG
+	select BUILDTIME_EXTABLE_SORT
 	select HAVE_PERF_EVENTS
 	select GENERIC_ATOMIC64 if !64BIT
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
@@ -29,6 +30,7 @@
 	select TTY # Needed for pdc_cons.c
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_SECCOMP_FILTER
 	select ARCH_NO_COHERENT_DMA_MMAP
 
 	help

diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h
index b3069fd..60e6f07 100644
--- a/arch/parisc/include/asm/assembly.h
+++ b/arch/parisc/include/asm/assembly.h

@@ -523,7 +523,7 @@
 	 */
 #define ASM_EXCEPTIONTABLE_ENTRY(fault_addr, except_addr)	\
 	.section __ex_table,"aw"			!	\
-	ASM_ULONG_INSN	fault_addr, except_addr		!	\
+	.word (fault_addr - .), (except_addr - .)	!	\
 	.previous
 
 

diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
index 0448a2c..3387307 100644
--- a/arch/parisc/include/asm/compat.h
+++ b/arch/parisc/include/asm/compat.h

@@ -183,6 +183,13 @@
 			int _band;      /* POLL_IN, POLL_OUT, POLL_MSG */
 			int _fd;
 		} _sigpoll;
+
+		/* SIGSYS */
+		struct {
+			compat_uptr_t _call_addr; /* calling user insn */
+			int _syscall;	/* triggering system call number */
+			compat_uint_t _arch;	/* AUDIT_ARCH_* of syscall */
+		} _sigsys;
 	} _sifields;
 } compat_siginfo_t;
 

diff --git a/arch/parisc/include/asm/syscall.h b/arch/parisc/include/asm/syscall.h
index a5eba95..637ce8d 100644
--- a/arch/parisc/include/asm/syscall.h
+++ b/arch/parisc/include/asm/syscall.h

@@ -39,6 +39,19 @@
 	}
 }
 
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	regs->gr[28] = error ? error : val;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	/* do nothing */
+}
+
 static inline int syscall_get_arch(void)
 {
 	int arch = AUDIT_ARCH_PARISC;

diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 0abdd4c..d4dd6e5 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h

@@ -60,14 +60,15 @@
  * use a 32bit (unsigned int) address here.
  */
 
+#define ARCH_HAS_RELATIVE_EXTABLE
 struct exception_table_entry {
-	unsigned long insn;	/* address of insn that is allowed to fault. */
-	unsigned long fixup;	/* fixup routine */
+	int insn;	/* relative address of insn that is allowed to fault. */
+	int fixup;	/* relative address of fixup routine */
 };
 
 #define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\
 	".section __ex_table,\"aw\"\n"			   \
-	ASM_WORD_INSN #fault_addr ", " #except_addr "\n\t" \
+	".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \
 	".previous\n"
 
 /*

diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h
index b75039f..cc0ce92 100644
--- a/arch/parisc/include/uapi/asm/unistd.h
+++ b/arch/parisc/include/uapi/asm/unistd.h

@@ -235,8 +235,8 @@
 #define __NR_io_getevents       (__NR_Linux + 217)
 #define __NR_io_submit          (__NR_Linux + 218)
 #define __NR_io_cancel          (__NR_Linux + 219)
-#define __NR_alloc_hugepages    (__NR_Linux + 220)
-#define __NR_free_hugepages     (__NR_Linux + 221)
+#define __NR_alloc_hugepages    (__NR_Linux + 220) /* not used */
+#define __NR_free_hugepages     (__NR_Linux + 221) /* not used */
 #define __NR_exit_group         (__NR_Linux + 222)
 #define __NR_lookup_dcookie     (__NR_Linux + 223)
 #define __NR_epoll_create       (__NR_Linux + 224)
@@ -362,8 +362,10 @@
 #define __NR_userfaultfd	(__NR_Linux + 344)
 #define __NR_mlock2		(__NR_Linux + 345)
 #define __NR_copy_file_range	(__NR_Linux + 346)
+#define __NR_preadv2		(__NR_Linux + 347)
+#define __NR_pwritev2		(__NR_Linux + 348)
 
-#define __NR_Linux_syscalls	(__NR_copy_file_range + 1)
+#define __NR_Linux_syscalls	(__NR_pwritev2 + 1)
 
 
 #define __IGNORE_select		/* newselect */

diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 91c2a39..6700127 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c

@@ -319,7 +319,7 @@
 	if (!mapping)
 		return;
 
-	pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	pgoff = page->index;
 
 	/* We have carefully arranged in arch_get_unmapped_area() that
 	 * *any* mappings of a file are always congruently mapped (whether

diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index ce0b2b4..8fb81a3 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c

@@ -270,7 +270,8 @@
 long do_syscall_trace_enter(struct pt_regs *regs)
 {
 	/* Do the secure computing check first. */
-	secure_computing_strict(regs->gr[20]);
+	if (secure_computing() == -1)
+		return -1;
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE) &&
 	    tracehook_report_syscall_entry(regs)) {
@@ -296,7 +297,11 @@
 			regs->gr[23] & 0xffffffff);
 
 out:
-	return regs->gr[20];
+	/*
+	 * Sign extend the syscall number to 64bit since it may have been
+	 * modified by a compat ptrace call
+	 */
+	return (int) ((u32) regs->gr[20]);
 }
 
 void do_syscall_trace_exit(struct pt_regs *regs)

diff --git a/arch/parisc/kernel/signal32.c b/arch/parisc/kernel/signal32.c
index 984abbe..c342b2e 100644
--- a/arch/parisc/kernel/signal32.c
+++ b/arch/parisc/kernel/signal32.c

@@ -371,6 +371,11 @@
 			val = (compat_int_t)from->si_int;
 			err |= __put_user(val, &to->si_int);
 			break;
+		case __SI_SYS >> 16:
+			err |= __put_user(ptr_to_compat(from->si_call_addr), &to->si_call_addr);
+			err |= __put_user(from->si_syscall, &to->si_syscall);
+			err |= __put_user(from->si_arch, &to->si_arch);
+			break;
 		}
 	}
 	return err;

diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index 5aba01a..0a393a0 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c

@@ -368,16 +368,6 @@
                              ((u64)lenhi << 32) | lenlo);
 }
 
-asmlinkage unsigned long sys_alloc_hugepages(int key, unsigned long addr, unsigned long len, int prot, int flag)
-{
-	return -ENOMEM;
-}
-
-asmlinkage int sys_free_hugepages(unsigned long addr)
-{
-	return -EINVAL;
-}
-
 long parisc_personality(unsigned long personality)
 {
 	long err;

diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index fbafa0d..c976ebf 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S

@@ -329,6 +329,7 @@
 
 	ldo     -THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1      /* get task ptr */
 	LDREG	TI_TASK(%r1), %r1
+	LDREG   TASK_PT_GR28(%r1), %r28		/* Restore return value */
 	LDREG   TASK_PT_GR26(%r1), %r26		/* Restore the users args */
 	LDREG   TASK_PT_GR25(%r1), %r25
 	LDREG   TASK_PT_GR24(%r1), %r24
@@ -342,6 +343,7 @@
 	stw     %r21, -56(%r30)                 /* 6th argument */
 #endif
 
+	cmpib,COND(=),n -1,%r20,tracesys_exit /* seccomp may have returned -1 */
 	comiclr,>>=	__NR_Linux_syscalls, %r20, %r0
 	b,n	.Ltracesys_nosys
 

diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 585d50f..3cfef1d 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S

@@ -315,8 +315,8 @@
 	ENTRY_COMP(io_getevents)
 	ENTRY_COMP(io_submit)
 	ENTRY_SAME(io_cancel)
-	ENTRY_SAME(alloc_hugepages)	/* 220 */
-	ENTRY_SAME(free_hugepages)
+	ENTRY_SAME(ni_syscall)		/* 220: was alloc_hugepages */
+	ENTRY_SAME(ni_syscall)		/* was free_hugepages */
 	ENTRY_SAME(exit_group)
 	ENTRY_COMP(lookup_dcookie)
 	ENTRY_SAME(epoll_create)
@@ -442,6 +442,8 @@
 	ENTRY_SAME(userfaultfd)
 	ENTRY_SAME(mlock2)		/* 345 */
 	ENTRY_SAME(copy_file_range)
+	ENTRY_COMP(preadv2)
+	ENTRY_COMP(pwritev2)
 
 
 .ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b))

diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 553b098..16e0735 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c

@@ -284,11 +284,8 @@
 	if (in_interrupt())
 		panic("Fatal exception in interrupt");
 
-	if (panic_on_oops) {
-		printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n");
-		ssleep(5);
+	if (panic_on_oops)
 		panic("Fatal exception");
-	}
 
 	oops_exit();
 	do_exit(SIGSEGV);

diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index 308f290..f3ead0b 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S

@@ -72,6 +72,7 @@
 		LOCK_TEXT
 		KPROBES_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		*(.text.do_softirq)
 		*(.text.sys_exit)
 		*(.text.do_sigaltstack)

diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index a762864..26fac9c 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c

@@ -140,12 +140,6 @@
 {
 	const struct exception_table_entry *fix;
 
-	/* If we only stored 32bit addresses in the exception table we can drop
-	 * out if we faulted on a 64bit address. */
-	if ((sizeof(regs->iaoq[0]) > sizeof(fix->insn))
-		&& (regs->iaoq[0] >> 32))
-			return 0;
-
 	fix = search_exception_tables(regs->iaoq[0]);
 	if (fix) {
 		struct exception_data *d;
@@ -154,7 +148,8 @@
 		d->fault_space = regs->isr;
 		d->fault_addr = regs->ior;
 
-		regs->iaoq[0] = ((fix->fixup) & ~3);
+		regs->iaoq[0] = (unsigned long)&fix->fixup + fix->fixup;
+		regs->iaoq[0] &= ~3;
 		/*
 		 * NOTE: In some cases the faulting instruction
 		 * may be in the delay slot of a branch. We

diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 3c07d6b..6b3e7c6 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c

@@ -22,7 +22,7 @@
 #include <linux/swap.h>
 #include <linux/unistd.h>
 #include <linux/nodemask.h>	/* for node_online_map */
-#include <linux/pagemap.h>	/* for release_pages and page_cache_release */
+#include <linux/pagemap.h>	/* for release_pages */
 #include <linux/compat.h>
 
 #include <asm/pgalloc.h>

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 8ab8a1a..009fab1 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h

@@ -246,7 +246,7 @@
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_VSX
 	/* VSR status */
-	int		used_vsr;	/* set if process has used altivec */
+	int		used_vsr;	/* set if process has used VSX */
 #endif /* CONFIG_VSX */
 #ifdef CONFIG_SPE
 	unsigned long	evr[32];	/* upper 32-bits of SPE regs */

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 612df30..b8500b4 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c

@@ -983,7 +983,7 @@
 static inline void save_sprs(struct thread_struct *t)
 {
 #ifdef CONFIG_ALTIVEC
-	if (cpu_has_feature(cpu_has_feature(CPU_FTR_ALTIVEC)))
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
 		t->vrsave = mfspr(SPRN_VRSAVE);
 #endif
 #ifdef CONFIG_PPC_BOOK3S_64

diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index d41fd0a..2dd91f7 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S

@@ -55,6 +55,7 @@
 		LOCK_TEXT
 		KPROBES_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 
 #ifdef CONFIG_PPC32
 		*(.got1)

diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 7f7b6d8..eba0bea 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile

@@ -8,7 +8,8 @@
 KVM := ../../../virt/kvm
 
 common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
-		$(KVM)/eventfd.o $(KVM)/vfio.o
+		$(KVM)/eventfd.o
+common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o
 
 CFLAGS_e500_mmu.o := -I.
 CFLAGS_e500_mmu_host.o := -I.

diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 8297004..18cf6d1 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c

@@ -209,6 +209,32 @@
 	return ret;
 }
 
+long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+		      unsigned long ioba, unsigned long tce)
+{
+	struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
+	long ret;
+
+	/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
+	/* 	    liobn, ioba, tce); */
+
+	if (!stt)
+		return H_TOO_HARD;
+
+	ret = kvmppc_ioba_validate(stt, ioba, 1);
+	if (ret != H_SUCCESS)
+		return ret;
+
+	ret = kvmppc_tce_validate(stt, tce);
+	if (ret != H_SUCCESS)
+		return ret;
+
+	kvmppc_tce_put(stt, ioba >> stt->page_shift, tce);
+
+	return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
+
 long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
 		unsigned long liobn, unsigned long ioba,
 		unsigned long tce_list, unsigned long npages)
@@ -264,3 +290,29 @@
 	return ret;
 }
 EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect);
+
+long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
+		unsigned long liobn, unsigned long ioba,
+		unsigned long tce_value, unsigned long npages)
+{
+	struct kvmppc_spapr_tce_table *stt;
+	long i, ret;
+
+	stt = kvmppc_find_table(vcpu, liobn);
+	if (!stt)
+		return H_TOO_HARD;
+
+	ret = kvmppc_ioba_validate(stt, ioba, npages);
+	if (ret != H_SUCCESS)
+		return ret;
+
+	/* Check permission bits only to allow userspace poison TCE for debug */
+	if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
+		return H_PARAMETER;
+
+	for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
+		kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
+
+	return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);

diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index f88b859..d461c44 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c

@@ -180,8 +180,8 @@
 EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
-		      unsigned long ioba, unsigned long tce)
+long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+		unsigned long ioba, unsigned long tce)
 {
 	struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
 	long ret;
@@ -204,7 +204,6 @@
 
 	return H_SUCCESS;
 }
-EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
 
 static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu,
 		unsigned long ua, unsigned long *phpa)
@@ -296,7 +295,7 @@
 	return ret;
 }
 
-long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
+long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
 		unsigned long liobn, unsigned long ioba,
 		unsigned long tce_value, unsigned long npages)
 {
@@ -320,7 +319,6 @@
 
 	return H_SUCCESS;
 }
-EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);
 
 long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 		      unsigned long ioba)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 62ea3c6..e571ad2 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S

@@ -1942,7 +1942,7 @@
 	.long	DOTSYM(kvmppc_h_clear_ref) - hcall_real_table
 	.long	DOTSYM(kvmppc_h_protect) - hcall_real_table
 	.long	DOTSYM(kvmppc_h_get_tce) - hcall_real_table
-	.long	DOTSYM(kvmppc_h_put_tce) - hcall_real_table
+	.long	DOTSYM(kvmppc_rm_h_put_tce) - hcall_real_table
 	.long	0		/* 0x24 - H_SET_SPRG0 */
 	.long	DOTSYM(kvmppc_h_set_dabr) - hcall_real_table
 	.long	0		/* 0x2c */
@@ -2020,7 +2020,7 @@
 	.long	0		/* 0x12c */
 	.long	0		/* 0x130 */
 	.long	DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
-	.long	DOTSYM(kvmppc_h_stuff_tce) - hcall_real_table
+	.long	DOTSYM(kvmppc_rm_h_stuff_tce) - hcall_real_table
 	.long	DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table
 	.long	0		/* 0x140 */
 	.long	0		/* 0x144 */

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 19aa59b..6a68730 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c

@@ -96,6 +96,9 @@
 		 * so we don't miss a request because the requester sees
 		 * OUTSIDE_GUEST_MODE and assumes we'll be checking requests
 		 * before next entering the guest (and thus doesn't IPI).
+		 * This also orders the write to mode from any reads
+		 * to the page tables done while the VCPU is running.
+		 * Please see the comment in kvm_flush_remote_tlbs.
 		 */
 		smp_mb();
 

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 6dd272b..d991b9e 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c

@@ -413,13 +413,13 @@
 {
 	struct hugepd_freelist **batchp;
 
-	batchp = this_cpu_ptr(&hugepd_freelist_cur);
+	batchp = &get_cpu_var(hugepd_freelist_cur);
 
 	if (atomic_read(&tlb->mm->mm_users) < 2 ||
 	    cpumask_equal(mm_cpumask(tlb->mm),
 			  cpumask_of(smp_processor_id()))) {
 		kmem_cache_free(hugepte_cache, hugepte);
-        put_cpu_var(hugepd_freelist_cur);
+		put_cpu_var(hugepd_freelist_cur);
 		return;
 	}
 

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index dfa8638..6ca5f05 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c

@@ -732,8 +732,8 @@
 		return -ENOMEM;
 
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = SPUFS_MAGIC;
 	sb->s_op = &s_ops;
 	sb->s_fs_info = info;

diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index c1cd369..f5bf38b9 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c

@@ -606,6 +606,12 @@
 		if (!port)
 			continue;
 
+		rc = rio_mport_initialize(port);
+		if (rc) {
+			kfree(port);
+			continue;
+		}
+
 		i = *port_index - 1;
 		port->index = (unsigned char)i;
 
@@ -682,12 +688,6 @@
 		dev_info(&dev->dev, "RapidIO Common Transport System size: %d\n",
 				port->sys_size ? 65536 : 256);
 
-		if (rio_register_mport(port)) {
-			release_resource(&port->iores);
-			kfree(priv);
-			kfree(port);
-			continue;
-		}
 		if (port->host_deviceid >= 0)
 			out_be32(priv->regs_win + RIO_GCCSR, RIO_PORT_GEN_HOST |
 				RIO_PORT_GEN_MASTER | RIO_PORT_GEN_DISCOVERED);
@@ -726,7 +726,14 @@
 		fsl_rio_inbound_mem_init(priv);
 
 		dbell->mport[i] = port;
+		pw->mport[i] = port;
 
+		if (rio_register_mport(port)) {
+			release_resource(&port->iores);
+			kfree(priv);
+			kfree(port);
+			continue;
+		}
 		active_ports++;
 	}
 

diff --git a/arch/powerpc/sysdev/fsl_rio.h b/arch/powerpc/sysdev/fsl_rio.h
index d53407a..12dd18f 100644
--- a/arch/powerpc/sysdev/fsl_rio.h
+++ b/arch/powerpc/sysdev/fsl_rio.h

@@ -97,6 +97,7 @@
 };
 
 struct fsl_rio_pw {
+	struct rio_mport *mport[MAX_PORT_NUM];
 	struct device *dev;
 	struct rio_pw_regs __iomem *pw_regs;
 	struct rio_port_write_msg port_write_msg;

diff --git a/arch/powerpc/sysdev/fsl_rmu.c b/arch/powerpc/sysdev/fsl_rmu.c
index ffe0ee8..c1826de 100644
--- a/arch/powerpc/sysdev/fsl_rmu.c
+++ b/arch/powerpc/sysdev/fsl_rmu.c

@@ -481,14 +481,14 @@
 static void fsl_pw_dpc(struct work_struct *work)
 {
 	struct fsl_rio_pw *pw = container_of(work, struct fsl_rio_pw, pw_work);
-	u32 msg_buffer[RIO_PW_MSG_SIZE/sizeof(u32)];
+	union rio_pw_msg msg_buffer;
+	int i;
 
 	/*
 	 * Process port-write messages
 	 */
-	while (kfifo_out_spinlocked(&pw->pw_fifo, (unsigned char *)msg_buffer,
+	while (kfifo_out_spinlocked(&pw->pw_fifo, (unsigned char *)&msg_buffer,
 			 RIO_PW_MSG_SIZE, &pw->pw_fifo_lock)) {
-		/* Process one message */
 #ifdef DEBUG_PW
 		{
 		u32 i;
@@ -496,15 +496,19 @@
 		for (i = 0; i < RIO_PW_MSG_SIZE/sizeof(u32); i++) {
 			if ((i%4) == 0)
 				pr_debug("\n0x%02x: 0x%08x", i*4,
-					 msg_buffer[i]);
+					 msg_buffer.raw[i]);
 			else
-				pr_debug(" 0x%08x", msg_buffer[i]);
+				pr_debug(" 0x%08x", msg_buffer.raw[i]);
 		}
 		pr_debug("\n");
 		}
 #endif
 		/* Pass the port-write message to RIO core for processing */
-		rio_inb_pwrite_handler((union rio_pw_msg *)msg_buffer);
+		for (i = 0; i < MAX_PORT_NUM; i++) {
+			if (pw->mport[i])
+				rio_inb_pwrite_handler(pw->mport[i],
+						       &msg_buffer);
+		}
 	}
 }
 

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b9df8d1..aad23e3 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig

@@ -59,6 +59,9 @@
 config ARCH_SUPPORTS_UPROBES
 	def_bool y
 
+config DEBUG_RODATA
+	def_bool y
+
 config S390
 	def_bool y
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE

diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index b8045b9..d750cc0 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c

@@ -669,11 +669,13 @@
 static struct miscdevice prng_sha512_dev = {
 	.name	= "prandom",
 	.minor	= MISC_DYNAMIC_MINOR,
+	.mode	= 0644,
 	.fops	= &prng_sha512_fops,
 };
 static struct miscdevice prng_tdes_dev = {
 	.name	= "prandom",
 	.minor	= MISC_DYNAMIC_MINOR,
+	.mode	= 0644,
 	.fops	= &prng_tdes_fops,
 };
 

diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 0f3da2c..255c7ee 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c

@@ -278,8 +278,8 @@
 	sbi->uid = current_uid();
 	sbi->gid = current_gid();
 	sb->s_fs_info = sbi;
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = HYPFS_MAGIC;
 	sb->s_op = &hypfs_s_ops;
 	if (hypfs_parse_options(data, sb))

diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h
index 4d7ccac..22da3b3 100644
--- a/arch/s390/include/asm/cache.h
+++ b/arch/s390/include/asm/cache.h

@@ -15,4 +15,7 @@
 
 #define __read_mostly __attribute__((__section__(".data..read_mostly")))
 
+/* Read-only memory is marked before mark_rodata_ro() is called. */
+#define __ro_after_init __read_mostly
+
 #endif

diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 9dd4cc4..e0900dd 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h

@@ -79,18 +79,12 @@
 	int insn, fixup;
 };
 
-static inline unsigned long extable_insn(const struct exception_table_entry *x)
-{
-	return (unsigned long)&x->insn + x->insn;
-}
-
 static inline unsigned long extable_fixup(const struct exception_table_entry *x)
 {
 	return (unsigned long)&x->fixup + x->fixup;
 }
 
-#define ARCH_HAS_SORT_EXTABLE
-#define ARCH_HAS_SEARCH_EXTABLE
+#define ARCH_HAS_RELATIVE_EXTABLE
 
 /**
  * __copy_from_user: - Copy a block of data from user space, with less checking.

diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index ab3aa68..4384bc7 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h

@@ -311,7 +311,9 @@
 #define __NR_shutdown		373
 #define __NR_mlock2		374
 #define __NR_copy_file_range	375
-#define NR_syscalls 376
+#define __NR_preadv2		376
+#define __NR_pwritev2		377
+#define NR_syscalls 378
 
 /* 
  * There are some system calls that are not present on 64 bit, some

diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 58bf457..62f066b 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c

@@ -670,6 +670,7 @@
 
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_ONLINE:
+	case CPU_DOWN_FAILED:
 		flags = PMC_INIT;
 		smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
 		break;

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 1a43474..eaab9a7 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c

@@ -1521,7 +1521,7 @@
 
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
+	case CPU_DOWN_FAILED:
 		flags = PMC_INIT;
 		smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
 		break;

diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 293d8b9..9b59e62 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S

@@ -384,3 +384,5 @@
 SYSCALL(sys_shutdown,sys_shutdown)
 SYSCALL(sys_mlock2,compat_sys_mlock2)
 SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
+SYSCALL(sys_preadv2,compat_sys_preadv2)
+SYSCALL(sys_pwritev2,compat_sys_pwritev2)

diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 445657f..0f41a82 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S

@@ -28,6 +28,7 @@
 		LOCK_TEXT
 		KPROBES_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		*(.fixup)
 		*(.gnu.warning)
 	} :text = 0x0700

diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 2ae54ca..0aa0ad1 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile

@@ -3,7 +3,7 @@
 #
 
 obj-y		:= init.o fault.o extmem.o mmap.o vmem.o maccess.o
-obj-y		+= page-states.o gup.o extable.o pageattr.o mem_detect.o
+obj-y		+= page-states.o gup.o pageattr.o mem_detect.o
 obj-y		+= pgtable.o pgalloc.o
 
 obj-$(CONFIG_CMM)		+= cmm.o

diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
deleted file mode 100644
index 18c8b81..0000000
--- a/arch/s390/mm/extable.c
+++ /dev/null

@@ -1,85 +0,0 @@
-#include <linux/module.h>
-#include <linux/sort.h>
-#include <asm/uaccess.h>
-
-/*
- * Search one exception table for an entry corresponding to the
- * given instruction address, and return the address of the entry,
- * or NULL if none is found.
- * We use a binary search, and thus we assume that the table is
- * already sorted.
- */
-const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
-	       const struct exception_table_entry *last,
-	       unsigned long value)
-{
-	const struct exception_table_entry *mid;
-	unsigned long addr;
-
-	while (first <= last) {
-		mid = ((last - first) >> 1) + first;
-		addr = extable_insn(mid);
-		if (addr < value)
-			first = mid + 1;
-		else if (addr > value)
-			last = mid - 1;
-		else
-			return mid;
-	}
-	return NULL;
-}
-
-/*
- * The exception table needs to be sorted so that the binary
- * search that we use to find entries in it works properly.
- * This is used both for the kernel exception table and for
- * the exception tables of modules that get loaded.
- *
- */
-static int cmp_ex(const void *a, const void *b)
-{
-	const struct exception_table_entry *x = a, *y = b;
-
-	/* This compare is only valid after normalization. */
-	return x->insn - y->insn;
-}
-
-void sort_extable(struct exception_table_entry *start,
-		  struct exception_table_entry *finish)
-{
-	struct exception_table_entry *p;
-	int i;
-
-	/* Normalize entries to being relative to the start of the section */
-	for (p = start, i = 0; p < finish; p++, i += 8) {
-		p->insn += i;
-		p->fixup += i + 4;
-	}
-	sort(start, finish - start, sizeof(*start), cmp_ex, NULL);
-	/* Denormalize all entries */
-	for (p = start, i = 0; p < finish; p++, i += 8) {
-		p->insn -= i;
-		p->fixup -= i + 4;
-	}
-}
-
-#ifdef CONFIG_MODULES
-/*
- * If the exception table is sorted, any referring to the module init
- * will be at the beginning or the end.
- */
-void trim_init_extable(struct module *m)
-{
-	/* Trim the beginning */
-	while (m->num_exentries &&
-	       within_module_init(extable_insn(&m->extable[0]), m)) {
-		m->extable++;
-		m->num_exentries--;
-	}
-	/* Trim the end */
-	while (m->num_exentries &&
-	       within_module_init(extable_insn(&m->extable[m->num_exentries-1]), m))
-		m->num_exentries--;
-}
-#endif /* CONFIG_MODULES */

diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 69247b4..cace818 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c

@@ -23,7 +23,7 @@
 /**
  * gmap_alloc - allocate a guest address space
  * @mm: pointer to the parent mm_struct
- * @limit: maximum size of the gmap address space
+ * @limit: maximum address of the gmap address space
  *
  * Returns a guest address space structure.
  */
@@ -292,7 +292,7 @@
 	if ((from | to | len) & (PMD_SIZE - 1))
 		return -EINVAL;
 	if (len == 0 || from + len < from || to + len < to ||
-	    from + len > TASK_MAX_SIZE || to + len > gmap->asce_end)
+	    from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end)
 		return -EINVAL;
 
 	flush = 0;

diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 49a1c84..a8a6765 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c

@@ -20,9 +20,9 @@
 static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
 {
+	struct page *head, *page;
 	unsigned long mask;
 	pte_t *ptep, pte;
-	struct page *page;
 
 	mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
 
@@ -37,12 +37,14 @@
 			return 0;
 		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
 		page = pte_page(pte);
-		if (!page_cache_get_speculative(page))
+		head = compound_head(page);
+		if (!page_cache_get_speculative(head))
 			return 0;
 		if (unlikely(pte_val(pte) != pte_val(*ptep))) {
-			put_page(page);
+			put_page(head);
 			return 0;
 		}
+		VM_BUG_ON_PAGE(compound_head(page) != head, page);
 		pages[*nr] = page;
 		(*nr)++;
 

diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 73e2903..c7b0451 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c

@@ -108,6 +108,13 @@
 	free_area_init_nodes(max_zone_pfns);
 }
 
+void mark_rodata_ro(void)
+{
+	/* Text and rodata are already protected. Nothing to do here. */
+	pr_info("Write protecting the kernel read-only data: %luk\n",
+		((unsigned long)&_eshared - (unsigned long)&_stext) >> 10);
+}
+
 void __init mem_init(void)
 {
 	if (MACHINE_HAS_TLB_LC)
@@ -126,9 +133,6 @@
 	setup_zero_pages();	/* Setup zeroed pages. */
 
 	mem_init_print_info(NULL);
-	printk("Write protected kernel read-only data: %#lx - %#lx\n",
-	       (unsigned long)&_stext,
-	       PFN_ALIGN((unsigned long)&_eshared) - 1);
 }
 
 void free_initmem(void)

diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 21591dd..1a4512c 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c

@@ -176,8 +176,7 @@
 		rc = clp_store_query_pci_fn(zdev, &rrb->response);
 		if (rc)
 			goto out;
-		if (rrb->response.pfgid)
-			rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid);
+		rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid);
 	} else {
 		zpci_err("Q PCI FN:\n");
 		zpci_err_clp(rrb->response.hdr.rsp, rc);

diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index db88cbf..235a410 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S

@@ -39,6 +39,7 @@
 		LOCK_TEXT
 		KPROBES_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		*(.fixup)
 		*(.gnu.warning)
 		_etext = .;		/* End of text section */

diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
index 830502fe..6f251c4 100644
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h

@@ -307,4 +307,11 @@
 	return test_thread_flag(TIF_32BIT);
 }
 
+static inline bool in_compat_syscall(void)
+{
+	/* Vector 0x110 is LINUX_32BIT_SYSCALL_TRAP */
+	return pt_regs_trap_type(current_pt_regs()) == 0x110;
+}
+#define in_compat_syscall in_compat_syscall
+
 #endif /* _ASM_SPARC64_COMPAT_H */

diff --git a/arch/sparc/include/asm/compat_signal.h b/arch/sparc/include/asm/compat_signal.h
index 9ed1f12..4b027b1 100644
--- a/arch/sparc/include/asm/compat_signal.h
+++ b/arch/sparc/include/asm/compat_signal.h

@@ -6,17 +6,17 @@
 
 #ifdef CONFIG_COMPAT
 struct __new_sigaction32 {
-	unsigned		sa_handler;
+	unsigned int		sa_handler;
 	unsigned int    	sa_flags;
-	unsigned		sa_restorer;     /* not used by Linux/SPARC yet */
+	unsigned int		sa_restorer;     /* not used by Linux/SPARC yet */
 	compat_sigset_t 	sa_mask;
 };
 
 struct __old_sigaction32 {
-	unsigned		sa_handler;
+	unsigned int		sa_handler;
 	compat_old_sigset_t  	sa_mask;
 	unsigned int    	sa_flags;
-	unsigned		sa_restorer;     /* not used by Linux/SPARC yet */
+	unsigned int		sa_restorer;     /* not used by Linux/SPARC yet */
 };
 #endif
 

diff --git a/arch/sparc/include/asm/obio.h b/arch/sparc/include/asm/obio.h
index 910c1d9..426ad75 100644
--- a/arch/sparc/include/asm/obio.h
+++ b/arch/sparc/include/asm/obio.h

@@ -117,9 +117,9 @@
 			      "i" (ASI_M_CTL));
 }
 
-static inline unsigned bw_get_prof_limit(int cpu)
+static inline unsigned int bw_get_prof_limit(int cpu)
 {
-	unsigned limit;
+	unsigned int limit;
 	
 	__asm__ __volatile__ ("lda [%1] %2, %0" :
 			      "=r" (limit) :
@@ -128,7 +128,7 @@
 	return limit;
 }
 
-static inline void bw_set_prof_limit(int cpu, unsigned limit)
+static inline void bw_set_prof_limit(int cpu, unsigned int limit)
 {
 	__asm__ __volatile__ ("sta %0, [%1] %2" : :
 			      "r" (limit),
@@ -136,9 +136,9 @@
 			      "i" (ASI_M_CTL));
 }
 
-static inline unsigned bw_get_ctrl(int cpu)
+static inline unsigned int bw_get_ctrl(int cpu)
 {
-	unsigned ctrl;
+	unsigned int ctrl;
 	
 	__asm__ __volatile__ ("lda [%1] %2, %0" :
 			      "=r" (ctrl) :
@@ -147,7 +147,7 @@
 	return ctrl;
 }
 
-static inline void bw_set_ctrl(int cpu, unsigned ctrl)
+static inline void bw_set_ctrl(int cpu, unsigned int ctrl)
 {
 	__asm__ __volatile__ ("sta %0, [%1] %2" : :
 			      "r" (ctrl),
@@ -155,9 +155,9 @@
 			      "i" (ASI_M_CTL));
 }
 
-static inline unsigned cc_get_ipen(void)
+static inline unsigned int cc_get_ipen(void)
 {
-	unsigned pending;
+	unsigned int pending;
 	
 	__asm__ __volatile__ ("lduha [%1] %2, %0" :
 			      "=r" (pending) :
@@ -166,7 +166,7 @@
 	return pending;
 }
 
-static inline void cc_set_iclr(unsigned clear)
+static inline void cc_set_iclr(unsigned int clear)
 {
 	__asm__ __volatile__ ("stha %0, [%1] %2" : :
 			      "r" (clear),
@@ -174,9 +174,9 @@
 			      "i" (ASI_M_MXCC));
 }
 
-static inline unsigned cc_get_imsk(void)
+static inline unsigned int cc_get_imsk(void)
 {
-	unsigned mask;
+	unsigned int mask;
 	
 	__asm__ __volatile__ ("lduha [%1] %2, %0" :
 			      "=r" (mask) :
@@ -185,7 +185,7 @@
 	return mask;
 }
 
-static inline void cc_set_imsk(unsigned mask)
+static inline void cc_set_imsk(unsigned int mask)
 {
 	__asm__ __volatile__ ("stha %0, [%1] %2" : :
 			      "r" (mask),
@@ -193,9 +193,9 @@
 			      "i" (ASI_M_MXCC));
 }
 
-static inline unsigned cc_get_imsk_other(int cpuid)
+static inline unsigned int cc_get_imsk_other(int cpuid)
 {
-	unsigned mask;
+	unsigned int mask;
 	
 	__asm__ __volatile__ ("lduha [%1] %2, %0" :
 			      "=r" (mask) :
@@ -204,7 +204,7 @@
 	return mask;
 }
 
-static inline void cc_set_imsk_other(int cpuid, unsigned mask)
+static inline void cc_set_imsk_other(int cpuid, unsigned int mask)
 {
 	__asm__ __volatile__ ("stha %0, [%1] %2" : :
 			      "r" (mask),
@@ -212,7 +212,7 @@
 			      "i" (ASI_M_CTL));
 }
 
-static inline void cc_set_igen(unsigned gen)
+static inline void cc_set_igen(unsigned int gen)
 {
 	__asm__ __volatile__ ("sta %0, [%1] %2" : :
 			      "r" (gen),

diff --git a/arch/sparc/include/asm/openprom.h b/arch/sparc/include/asm/openprom.h
index 47eaafa..63374c4 100644
--- a/arch/sparc/include/asm/openprom.h
+++ b/arch/sparc/include/asm/openprom.h

@@ -29,12 +29,12 @@
 /* V2 and later prom device operations. */
 struct linux_dev_v2_funcs {
 	phandle (*v2_inst2pkg)(int d);	/* Convert ihandle to phandle */
-	char * (*v2_dumb_mem_alloc)(char *va, unsigned sz);
-	void (*v2_dumb_mem_free)(char *va, unsigned sz);
+	char * (*v2_dumb_mem_alloc)(char *va, unsigned int sz);
+	void (*v2_dumb_mem_free)(char *va, unsigned int sz);
 
 	/* To map devices into virtual I/O space. */
-	char * (*v2_dumb_mmap)(char *virta, int which_io, unsigned paddr, unsigned sz);
-	void (*v2_dumb_munmap)(char *virta, unsigned size);
+	char * (*v2_dumb_mmap)(char *virta, int which_io, unsigned int paddr, unsigned int sz);
+	void (*v2_dumb_munmap)(char *virta, unsigned int size);
 
 	int (*v2_dev_open)(char *devpath);
 	void (*v2_dev_close)(int d);
@@ -50,7 +50,7 @@
 struct linux_mlist_v0 {
 	struct linux_mlist_v0 *theres_more;
 	unsigned int start_adr;
-	unsigned num_bytes;
+	unsigned int num_bytes;
 };
 
 struct linux_mem_v0 {

diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 7a38d6a..f089cfa 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h

@@ -218,7 +218,7 @@
 extern pgprot_t PAGE_COPY;
 extern pgprot_t PAGE_SHARED;
 
-/* XXX This uglyness is for the atyfb driver's sparc mmap() support. XXX */
+/* XXX This ugliness is for the atyfb driver's sparc mmap() support. XXX */
 extern unsigned long _PAGE_IE;
 extern unsigned long _PAGE_E;
 extern unsigned long _PAGE_CACHE;

diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
index 6924bde..ce2595c 100644
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h

@@ -201,7 +201,7 @@
 #define KSTK_ESP(tsk)  (task_pt_regs(tsk)->u_regs[UREG_FP])
 
 /* Please see the commentary in asm/backoff.h for a description of
- * what these instructions are doing and how they have been choosen.
+ * what these instructions are doing and how they have been chosen.
  * To make a long story short, we are trying to yield the current cpu
  * strand during busy loops.
  */

diff --git a/arch/sparc/include/asm/sigcontext.h b/arch/sparc/include/asm/sigcontext.h
index fc2df1e..f4eb630 100644
--- a/arch/sparc/include/asm/sigcontext.h
+++ b/arch/sparc/include/asm/sigcontext.h

@@ -25,7 +25,7 @@
 	int sigc_oswins;       /* outstanding windows */
 
 	/* stack ptrs for each regwin buf */
-	unsigned sigc_spbuf[__SUNOS_MAXWIN];
+	unsigned int sigc_spbuf[__SUNOS_MAXWIN];
 
 	/* Windows to restore after signal */
 	struct reg_window32 sigc_wbuf[__SUNOS_MAXWIN];

diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h
index 49f71fd..1757cd6 100644
--- a/arch/sparc/include/asm/syscall.h
+++ b/arch/sparc/include/asm/syscall.h

@@ -3,6 +3,7 @@
 
 #include <uapi/linux/audit.h>
 #include <linux/kernel.h>
+#include <linux/compat.h>
 #include <linux/sched.h>
 #include <asm/ptrace.h>
 #include <asm/thread_info.h>
@@ -128,7 +129,13 @@
 
 static inline int syscall_get_arch(void)
 {
-	return is_32bit_task() ? AUDIT_ARCH_SPARC : AUDIT_ARCH_SPARC64;
+#if defined(CONFIG_SPARC64) && defined(CONFIG_COMPAT)
+	return in_compat_syscall() ? AUDIT_ARCH_SPARC : AUDIT_ARCH_SPARC64;
+#elif defined(CONFIG_SPARC64)
+	return AUDIT_ARCH_SPARC64;
+#else
+	return AUDIT_ARCH_SPARC;
+#endif
 }
 
 #endif /* __ASM_SPARC_SYSCALL_H */

diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h
index ecb49cf..c6a155c 100644
--- a/arch/sparc/include/asm/tsb.h
+++ b/arch/sparc/include/asm/tsb.h

@@ -149,7 +149,7 @@
 	 * page size in question.  So for PMD mappings (which fall on
 	 * bit 23, for 8MB per PMD) we must propagate bit 22 for a
 	 * 4MB huge page.  For huge PUDs (which fall on bit 33, for
-	 * 8GB per PUD), we have to accomodate 256MB and 2GB huge
+	 * 8GB per PUD), we have to accommodate 256MB and 2GB huge
 	 * pages.  So for those we propagate bits 32 to 28.
 	 */
 #define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL)	\

diff --git a/arch/sparc/include/uapi/asm/stat.h b/arch/sparc/include/uapi/asm/stat.h
index a232e9e..2f0583a 100644
--- a/arch/sparc/include/uapi/asm/stat.h
+++ b/arch/sparc/include/uapi/asm/stat.h

@@ -6,13 +6,13 @@
 #if defined(__sparc__) && defined(__arch64__)
 /* 64 bit sparc */
 struct stat {
-	unsigned   st_dev;
+	unsigned int st_dev;
 	ino_t   st_ino;
 	mode_t  st_mode;
 	short   st_nlink;
 	uid_t   st_uid;
 	gid_t   st_gid;
-	unsigned   st_rdev;
+	unsigned int st_rdev;
 	off_t   st_size;
 	time_t  st_atime;
 	time_t  st_mtime;

diff --git a/arch/sparc/kernel/audit.c b/arch/sparc/kernel/audit.c
index 24361b4..2585c1e 100644
--- a/arch/sparc/kernel/audit.c
+++ b/arch/sparc/kernel/audit.c

@@ -5,27 +5,27 @@
 
 #include "kernel.h"
 
-static unsigned dir_class[] = {
+static unsigned int dir_class[] = {
 #include <asm-generic/audit_dir_write.h>
 ~0U
 };
 
-static unsigned read_class[] = {
+static unsigned int read_class[] = {
 #include <asm-generic/audit_read.h>
 ~0U
 };
 
-static unsigned write_class[] = {
+static unsigned int write_class[] = {
 #include <asm-generic/audit_write.h>
 ~0U
 };
 
-static unsigned chattr_class[] = {
+static unsigned int chattr_class[] = {
 #include <asm-generic/audit_change_attr.h>
 ~0U
 };
 
-static unsigned signal_class[] = {
+static unsigned int signal_class[] = {
 #include <asm-generic/audit_signal.h>
 ~0U
 };
@@ -39,7 +39,7 @@
 	return 0;
 }
 
-int audit_classify_syscall(int abi, unsigned syscall)
+int audit_classify_syscall(int abi, unsigned int syscall)
 {
 #ifdef CONFIG_COMPAT
 	if (abi == AUDIT_ARCH_SPARC)

diff --git a/arch/sparc/kernel/compat_audit.c b/arch/sparc/kernel/compat_audit.c
index 7062263..e5611cd 100644
--- a/arch/sparc/kernel/compat_audit.c
+++ b/arch/sparc/kernel/compat_audit.c

@@ -2,32 +2,32 @@
 #include <asm/unistd.h>
 #include "kernel.h"
 
-unsigned sparc32_dir_class[] = {
+unsigned int sparc32_dir_class[] = {
 #include <asm-generic/audit_dir_write.h>
 ~0U
 };
 
-unsigned sparc32_chattr_class[] = {
+unsigned int sparc32_chattr_class[] = {
 #include <asm-generic/audit_change_attr.h>
 ~0U
 };
 
-unsigned sparc32_write_class[] = {
+unsigned int sparc32_write_class[] = {
 #include <asm-generic/audit_write.h>
 ~0U
 };
 
-unsigned sparc32_read_class[] = {
+unsigned int sparc32_read_class[] = {
 #include <asm-generic/audit_read.h>
 ~0U
 };
 
-unsigned sparc32_signal_class[] = {
+unsigned int sparc32_signal_class[] = {
 #include <asm-generic/audit_signal.h>
 ~0U
 };
 
-int sparc32_classify_syscall(unsigned syscall)
+int sparc32_classify_syscall(unsigned int syscall)
 {
 	switch(syscall) {
 	case __NR_open:

diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index a83707c..51aa6e8 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S

@@ -1255,7 +1255,7 @@
 kuw_patch1_7win:	sll	%o3, 6, %o3
 
 	/* No matter how much overhead this routine has in the worst
-	 * case scenerio, it is several times better than taking the
+	 * case scenario, it is several times better than taking the
 	 * traps with the old method of just doing flush_user_windows().
 	 */
 kill_user_windows:

diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index 28fed53..ffd5ff4 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c

@@ -131,7 +131,7 @@
 EXPORT_SYMBOL(ioremap);
 
 /*
- * Comlimentary to ioremap().
+ * Complementary to ioremap().
  */
 void iounmap(volatile void __iomem *virtual)
 {
@@ -233,7 +233,7 @@
 }
 
 /*
- * Comlimentary to _sparc_ioremap().
+ * Complementary to _sparc_ioremap().
  */
 static void _sparc_free_io(struct resource *res)
 {
@@ -532,7 +532,7 @@
 }
 
 /* Map a set of buffers described by scatterlist in streaming
- * mode for DMA.  This is the scather-gather version of the
+ * mode for DMA.  This is the scatter-gather version of the
  * above pci_map_single interface.  Here the scatter gather list
  * elements are each tagged with the appropriate dma address
  * and length.  They are obtained via sg_dma_{address,length}(SG).

diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h
index e7f652b..5057ec2 100644
--- a/arch/sparc/kernel/kernel.h
+++ b/arch/sparc/kernel/kernel.h

@@ -54,12 +54,12 @@
 asmlinkage int do_sys32_sigstack(u32 u_ssptr, u32 u_ossptr, unsigned long sp);
 
 /* compat_audit.c */
-extern unsigned sparc32_dir_class[];
-extern unsigned sparc32_chattr_class[];
-extern unsigned sparc32_write_class[];
-extern unsigned sparc32_read_class[];
-extern unsigned sparc32_signal_class[];
-int sparc32_classify_syscall(unsigned syscall);
+extern unsigned int sparc32_dir_class[];
+extern unsigned int sparc32_chattr_class[];
+extern unsigned int sparc32_write_class[];
+extern unsigned int sparc32_read_class[];
+extern unsigned int sparc32_signal_class[];
+int sparc32_classify_syscall(unsigned int syscall);
 #endif
 
 #ifdef CONFIG_SPARC32

diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c
index 42efcf8..33cd171 100644
--- a/arch/sparc/kernel/leon_kernel.c
+++ b/arch/sparc/kernel/leon_kernel.c

@@ -203,7 +203,7 @@
 
 /*
  * Build a LEON IRQ for the edge triggered LEON IRQ controller:
- *  Edge (normal) IRQ           - handle_simple_irq, ack=DONT-CARE, never ack
+ *  Edge (normal) IRQ           - handle_simple_irq, ack=DON'T-CARE, never ack
  *  Level IRQ (PCI|Level-GPIO)  - handle_fasteoi_irq, ack=1, ack after ISR
  *  Per-CPU Edge                - handle_percpu_irq, ack=0
  */

diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 46a5964..c16ef1a 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c

@@ -103,7 +103,7 @@
 	mm_segment_t old_fs;
 	
 	__asm__ __volatile__ ("flushw");
-	rw = compat_ptr((unsigned)regs->u_regs[14]);
+	rw = compat_ptr((unsigned int)regs->u_regs[14]);
 	old_fs = get_fs();
 	set_fs (USER_DS);
 	if (copy_from_user (&r_w, rw, sizeof(r_w))) {

diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
index baef495..69d75ff 100644
--- a/arch/sparc/kernel/setup_32.c
+++ b/arch/sparc/kernel/setup_32.c

@@ -109,7 +109,7 @@
 unsigned char boot_cpu_id = 0xff; /* 0xff will make it into DATA section... */
 
 static void
-prom_console_write(struct console *con, const char *s, unsigned n)
+prom_console_write(struct console *con, const char *s, unsigned int n)
 {
 	prom_write(s, n);
 }

diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index f3185e2..26db95b 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c

@@ -77,7 +77,7 @@
 };
 
 static void
-prom_console_write(struct console *con, const char *s, unsigned n)
+prom_console_write(struct console *con, const char *s, unsigned int n)
 {
 	prom_write(s, n);
 }

diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index 4eed773..3c25241 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c

@@ -144,7 +144,7 @@
 	compat_uptr_t fpu_save;
 	compat_uptr_t rwin_save;
 	unsigned int psr;
-	unsigned pc, npc;
+	unsigned int pc, npc;
 	sigset_t set;
 	compat_sigset_t seta;
 	int err, i;

diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index b489e97..fe8b8ee 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c

@@ -337,10 +337,10 @@
 		switch (call) {
 		case SEMOP:
 			err = sys_semtimedop(first, ptr,
-					     (unsigned)second, NULL);
+					     (unsigned int)second, NULL);
 			goto out;
 		case SEMTIMEDOP:
-			err = sys_semtimedop(first, ptr, (unsigned)second,
+			err = sys_semtimedop(first, ptr, (unsigned int)second,
 				(const struct timespec __user *)
 					     (unsigned long) fifth);
 			goto out;

diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c
index 7f41d40..fa8e21a 100644
--- a/arch/sparc/kernel/sysfs.c
+++ b/arch/sparc/kernel/sysfs.c

@@ -1,4 +1,4 @@
-/* sysfs.c: Toplogy sysfs support code for sparc64.
+/* sysfs.c: Topology sysfs support code for sparc64.
  *
  * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
  */

diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c
index d89e97b..9aacb91 100644
--- a/arch/sparc/kernel/unaligned_64.c
+++ b/arch/sparc/kernel/unaligned_64.c

@@ -209,8 +209,8 @@
 	if (size == 16) {
 		size = 8;
 		zero = (((long)(reg_num ?
-		        (unsigned)fetch_reg(reg_num, regs) : 0)) << 32) |
-			(unsigned)fetch_reg(reg_num + 1, regs);
+		        (unsigned int)fetch_reg(reg_num, regs) : 0)) << 32) |
+			(unsigned int)fetch_reg(reg_num + 1, regs);
 	} else if (reg_num) {
 		src_val_p = fetch_reg_addr(reg_num, regs);
 	}

diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index f1a2f68..aadd321 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S

@@ -48,6 +48,7 @@
 		LOCK_TEXT
 		KPROBES_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		*(.gnu.warning)
 	} = 0
 	_etext = .;

diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index c399e7b..b6c559c 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c

@@ -303,10 +303,10 @@
 		fixup = search_extables_range(regs->pc, &g2);
 		/* Values below 10 are reserved for other things */
 		if (fixup > 10) {
-			extern const unsigned __memset_start[];
-			extern const unsigned __memset_end[];
-			extern const unsigned __csum_partial_copy_start[];
-			extern const unsigned __csum_partial_copy_end[];
+			extern const unsigned int __memset_start[];
+			extern const unsigned int __memset_end[];
+			extern const unsigned int __csum_partial_copy_start[];
+			extern const unsigned int __csum_partial_copy_end[];
 
 #ifdef DEBUG_EXCEPTIONS
 			printk("Exception: PC<%08lx> faddr<%08lx>\n",

diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index 3e6e05a..a6d9204 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c

@@ -351,7 +351,7 @@
  *
  * Sometimes we need to emit a branch earlier in the code
  * sequence.  And in these situations we adjust "destination"
- * to accomodate this difference.  For example, if we needed
+ * to accommodate this difference.  For example, if we needed
  * to emit a branch (and it's delay slot) right before the
  * final instruction emitted for a BPF opcode, we'd use
  * "destination + 4" instead of just plain "destination" above.

diff --git a/arch/tile/include/hv/drv_mpipe_intf.h b/arch/tile/include/hv/drv_mpipe_intf.h
index c97e416..ff7f50f 100644
--- a/arch/tile/include/hv/drv_mpipe_intf.h
+++ b/arch/tile/include/hv/drv_mpipe_intf.h

@@ -211,7 +211,7 @@
  *  request shared data permission on the same link.
  *
  *  No more than one of ::GXIO_MPIPE_LINK_DATA, ::GXIO_MPIPE_LINK_NO_DATA,
- *  or ::GXIO_MPIPE_LINK_EXCL_DATA may be specifed in a gxio_mpipe_link_open()
+ *  or ::GXIO_MPIPE_LINK_EXCL_DATA may be specified in a gxio_mpipe_link_open()
  *  call.  If none are specified, ::GXIO_MPIPE_LINK_DATA is assumed.
  */
 #define GXIO_MPIPE_LINK_DATA               0x00000001UL
@@ -219,7 +219,7 @@
 /** Do not request data permission on the specified link.
  *
  *  No more than one of ::GXIO_MPIPE_LINK_DATA, ::GXIO_MPIPE_LINK_NO_DATA,
- *  or ::GXIO_MPIPE_LINK_EXCL_DATA may be specifed in a gxio_mpipe_link_open()
+ *  or ::GXIO_MPIPE_LINK_EXCL_DATA may be specified in a gxio_mpipe_link_open()
  *  call.  If none are specified, ::GXIO_MPIPE_LINK_DATA is assumed.
  */
 #define GXIO_MPIPE_LINK_NO_DATA            0x00000002UL
@@ -230,7 +230,7 @@
  *  data permission on it, this open will fail.
  *
  *  No more than one of ::GXIO_MPIPE_LINK_DATA, ::GXIO_MPIPE_LINK_NO_DATA,
- *  or ::GXIO_MPIPE_LINK_EXCL_DATA may be specifed in a gxio_mpipe_link_open()
+ *  or ::GXIO_MPIPE_LINK_EXCL_DATA may be specified in a gxio_mpipe_link_open()
  *  call.  If none are specified, ::GXIO_MPIPE_LINK_DATA is assumed.
  */
 #define GXIO_MPIPE_LINK_EXCL_DATA          0x00000004UL
@@ -241,7 +241,7 @@
  *  permission on the same link.
  *
  *  No more than one of ::GXIO_MPIPE_LINK_STATS, ::GXIO_MPIPE_LINK_NO_STATS,
- *  or ::GXIO_MPIPE_LINK_EXCL_STATS may be specifed in a gxio_mpipe_link_open()
+ *  or ::GXIO_MPIPE_LINK_EXCL_STATS may be specified in a gxio_mpipe_link_open()
  *  call.  If none are specified, ::GXIO_MPIPE_LINK_STATS is assumed.
  */
 #define GXIO_MPIPE_LINK_STATS              0x00000008UL
@@ -249,7 +249,7 @@
 /** Do not request stats permission on the specified link.
  *
  *  No more than one of ::GXIO_MPIPE_LINK_STATS, ::GXIO_MPIPE_LINK_NO_STATS,
- *  or ::GXIO_MPIPE_LINK_EXCL_STATS may be specifed in a gxio_mpipe_link_open()
+ *  or ::GXIO_MPIPE_LINK_EXCL_STATS may be specified in a gxio_mpipe_link_open()
  *  call.  If none are specified, ::GXIO_MPIPE_LINK_STATS is assumed.
  */
 #define GXIO_MPIPE_LINK_NO_STATS           0x00000010UL
@@ -267,7 +267,7 @@
  *  reset by other statistics programs.
  *
  *  No more than one of ::GXIO_MPIPE_LINK_STATS, ::GXIO_MPIPE_LINK_NO_STATS,
- *  or ::GXIO_MPIPE_LINK_EXCL_STATS may be specifed in a gxio_mpipe_link_open()
+ *  or ::GXIO_MPIPE_LINK_EXCL_STATS may be specified in a gxio_mpipe_link_open()
  *  call.  If none are specified, ::GXIO_MPIPE_LINK_STATS is assumed.
  */
 #define GXIO_MPIPE_LINK_EXCL_STATS         0x00000020UL
@@ -278,7 +278,7 @@
  *  permission on the same link.
  *
  *  No more than one of ::GXIO_MPIPE_LINK_CTL, ::GXIO_MPIPE_LINK_NO_CTL,
- *  or ::GXIO_MPIPE_LINK_EXCL_CTL may be specifed in a gxio_mpipe_link_open()
+ *  or ::GXIO_MPIPE_LINK_EXCL_CTL may be specified in a gxio_mpipe_link_open()
  *  call.  If none are specified, ::GXIO_MPIPE_LINK_CTL is assumed.
  */
 #define GXIO_MPIPE_LINK_CTL                0x00000040UL
@@ -286,7 +286,7 @@
 /** Do not request control permission on the specified link.
  *
  *  No more than one of ::GXIO_MPIPE_LINK_CTL, ::GXIO_MPIPE_LINK_NO_CTL,
- *  or ::GXIO_MPIPE_LINK_EXCL_CTL may be specifed in a gxio_mpipe_link_open()
+ *  or ::GXIO_MPIPE_LINK_EXCL_CTL may be specified in a gxio_mpipe_link_open()
  *  call.  If none are specified, ::GXIO_MPIPE_LINK_CTL is assumed.
  */
 #define GXIO_MPIPE_LINK_NO_CTL             0x00000080UL
@@ -301,7 +301,7 @@
  *  it prevents programs like mpipe-link from configuring the link.
  *
  *  No more than one of ::GXIO_MPIPE_LINK_CTL, ::GXIO_MPIPE_LINK_NO_CTL,
- *  or ::GXIO_MPIPE_LINK_EXCL_CTL may be specifed in a gxio_mpipe_link_open()
+ *  or ::GXIO_MPIPE_LINK_EXCL_CTL may be specified in a gxio_mpipe_link_open()
  *  call.  If none are specified, ::GXIO_MPIPE_LINK_CTL is assumed.
  */
 #define GXIO_MPIPE_LINK_EXCL_CTL           0x00000100UL
@@ -311,7 +311,7 @@
  *  change the desired state of the link when it is closed or the process
  *  exits.  No more than one of ::GXIO_MPIPE_LINK_AUTO_UP,
  *  ::GXIO_MPIPE_LINK_AUTO_UPDOWN, ::GXIO_MPIPE_LINK_AUTO_DOWN, or
- *  ::GXIO_MPIPE_LINK_AUTO_NONE may be specifed in a gxio_mpipe_link_open()
+ *  ::GXIO_MPIPE_LINK_AUTO_NONE may be specified in a gxio_mpipe_link_open()
  *  call.  If none are specified, ::GXIO_MPIPE_LINK_AUTO_UPDOWN is assumed.
  */
 #define GXIO_MPIPE_LINK_AUTO_UP            0x00000200UL
@@ -322,7 +322,7 @@
  *  open, set the desired state of the link to down.  No more than one of
  *  ::GXIO_MPIPE_LINK_AUTO_UP, ::GXIO_MPIPE_LINK_AUTO_UPDOWN,
  *  ::GXIO_MPIPE_LINK_AUTO_DOWN, or ::GXIO_MPIPE_LINK_AUTO_NONE may be
- *  specifed in a gxio_mpipe_link_open() call.  If none are specified,
+ *  specified in a gxio_mpipe_link_open() call.  If none are specified,
  *  ::GXIO_MPIPE_LINK_AUTO_UPDOWN is assumed.
  */
 #define GXIO_MPIPE_LINK_AUTO_UPDOWN        0x00000400UL
@@ -332,7 +332,7 @@
  *  process has the link open, set the desired state of the link to down.
  *  No more than one of ::GXIO_MPIPE_LINK_AUTO_UP,
  *  ::GXIO_MPIPE_LINK_AUTO_UPDOWN, ::GXIO_MPIPE_LINK_AUTO_DOWN, or
- *  ::GXIO_MPIPE_LINK_AUTO_NONE may be specifed in a gxio_mpipe_link_open()
+ *  ::GXIO_MPIPE_LINK_AUTO_NONE may be specified in a gxio_mpipe_link_open()
  *  call.  If none are specified, ::GXIO_MPIPE_LINK_AUTO_UPDOWN is assumed.
  */
 #define GXIO_MPIPE_LINK_AUTO_DOWN          0x00000800UL
@@ -342,7 +342,7 @@
  *  closed or the process exits.  No more than one of
  *  ::GXIO_MPIPE_LINK_AUTO_UP, ::GXIO_MPIPE_LINK_AUTO_UPDOWN,
  *  ::GXIO_MPIPE_LINK_AUTO_DOWN, or ::GXIO_MPIPE_LINK_AUTO_NONE may be
- *  specifed in a gxio_mpipe_link_open() call.  If none are specified,
+ *  specified in a gxio_mpipe_link_open() call.  If none are specified,
  *  ::GXIO_MPIPE_LINK_AUTO_UPDOWN is assumed.
  */
 #define GXIO_MPIPE_LINK_AUTO_NONE          0x00001000UL

diff --git a/arch/tile/kernel/kgdb.c b/arch/tile/kernel/kgdb.c
index a506c2c..9247d6b 100644
--- a/arch/tile/kernel/kgdb.c
+++ b/arch/tile/kernel/kgdb.c

@@ -126,15 +126,15 @@
 sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task)
 {
 	struct pt_regs *thread_regs;
+	const int NGPRS = TREG_LAST_GPR + 1;
 
 	if (task == NULL)
 		return;
 
-	/* Initialize to zero. */
-	memset(gdb_regs, 0, NUMREGBYTES);
-
 	thread_regs = task_pt_regs(task);
-	memcpy(gdb_regs, thread_regs, TREG_LAST_GPR * sizeof(unsigned long));
+	memcpy(gdb_regs, thread_regs, NGPRS * sizeof(unsigned long));
+	memset(&gdb_regs[NGPRS], 0,
+	       (TILEGX_PC_REGNUM - NGPRS) * sizeof(unsigned long));
 	gdb_regs[TILEGX_PC_REGNUM] = thread_regs->pc;
 	gdb_regs[TILEGX_FAULTNUM_REGNUM] = thread_regs->faultnum;
 }
@@ -433,9 +433,9 @@
 struct kgdb_arch arch_kgdb_ops;
 
 /*
- * kgdb_arch_init - Perform any architecture specific initalization.
+ * kgdb_arch_init - Perform any architecture specific initialization.
  *
- * This function will handle the initalization of any architecture
+ * This function will handle the initialization of any architecture
  * specific callbacks.
  */
 int kgdb_arch_init(void)
@@ -447,9 +447,9 @@
 }
 
 /*
- * kgdb_arch_exit - Perform any architecture specific uninitalization.
+ * kgdb_arch_exit - Perform any architecture specific uninitialization.
  *
- * This function will handle the uninitalization of any architecture
+ * This function will handle the uninitialization of any architecture
  * specific callbacks, for dynamic registration and unregistration.
  */
 void kgdb_arch_exit(void)

diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
index 4c017d0..aa2b44c 100644
--- a/arch/tile/kernel/pci_gx.c
+++ b/arch/tile/kernel/pci_gx.c

@@ -1326,7 +1326,7 @@
 
 
 /*
- * See tile_cfg_read() for relevent comments.
+ * See tile_cfg_read() for relevant comments.
  * Note that "val" is the value to write, not a pointer to that value.
  */
 static int tile_cfg_write(struct pci_bus *bus, unsigned int devfn, int offset,

diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S
index 0e059a0..378f5d8 100644
--- a/arch/tile/kernel/vmlinux.lds.S
+++ b/arch/tile/kernel/vmlinux.lds.S

@@ -45,6 +45,7 @@
     LOCK_TEXT
     KPROBES_TEXT
     IRQENTRY_TEXT
+    SOFTIRQENTRY_TEXT
     __fix_text_end = .;   /* tile-cpack won't rearrange before this */
     ALIGN_FUNCTION();
     *(.hottext*)

diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index b821b13..8a6b571 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c

@@ -133,7 +133,7 @@
 	ptr += strlen("proc");
 	ptr = skip_spaces(ptr);
 
-	file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY);
+	file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY, 0);
 	if (IS_ERR(file)) {
 		mconsole_reply(req, "Failed to open file", 1, 0);
 		printk(KERN_ERR "open /proc/%s: %ld\n", ptr, PTR_ERR(file));

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8b680a5..2dc18605 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig

@@ -28,6 +28,7 @@
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FAST_MULTIPLIER
 	select ARCH_HAS_GCOV_PROFILE_ALL
+	select ARCH_HAS_KCOV			if X86_64
 	select ARCH_HAS_PMEM_API		if X86_64
 	select ARCH_HAS_MMIO_FLUSH
 	select ARCH_HAS_SG_CHAIN
@@ -1209,6 +1210,15 @@
 	def_bool y
 	depends on MICROCODE
 
+config PERF_EVENTS_AMD_POWER
+	depends on PERF_EVENTS && CPU_SUP_AMD
+	tristate "AMD Processor Power Reporting Mechanism"
+	---help---
+	  Provide power reporting mechanism support for AMD processors.
+	  Currently, it leverages X86_FEATURE_ACC_POWER
+	  (CPUID Fn8000_0007_EDX[12]) interface to calculate the
+	  average power consumption on Family 15h processors.
+
 config X86_MSR
 	tristate "/dev/cpu/*/msr - Model-specific register support"
 	---help---

diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 0bf6749..b1ef9e4 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile

@@ -12,6 +12,13 @@
 KASAN_SANITIZE			:= n
 OBJECT_FILES_NON_STANDARD	:= y
 
+# Kernel does not boot with kcov instrumentation here.
+# One of the problems observed was insertion of __sanitizer_cov_trace_pc()
+# callback into middle of per-cpu data enabling code. Thus the callback observed
+# inconsistent state and crashed. We are interested mostly in syscall coverage,
+# so boot code is not interesting anyway.
+KCOV_INSTRUMENT		:= n
+
 # If you want to preset the SVGA mode, uncomment the next line and
 # set SVGA_MODE to whatever number you want.
 # Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 5e1d26e..6915ff2 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile

@@ -19,6 +19,9 @@
 KASAN_SANITIZE			:= n
 OBJECT_FILES_NON_STANDARD	:= y
 
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT		:= n
+
 targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
 	vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
 

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index f9fb859..6874da5 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile

@@ -7,6 +7,9 @@
 UBSAN_SANITIZE			:= n
 OBJECT_FILES_NON_STANDARD	:= y
 
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT		:= n
+
 VDSO64-$(CONFIG_X86_64)		:= y
 VDSOX32-$(CONFIG_X86_X32_ABI)	:= y
 VDSO32-$(CONFIG_X86_32)		:= y

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 1a50e09..03c3eb7 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c

@@ -178,7 +178,7 @@
 
 	/*
 	 * GCC likes to generate cmov here, but this branch is extremely
-	 * predictable (it's just a funciton of time and the likely is
+	 * predictable (it's just a function of time and the likely is
 	 * very likely) and there's a data dependence, so force GCC
 	 * to generate a branch instead.  I don't barrier() because
 	 * we don't actually need a barrier, and if this function

diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
index fdfea15..f59618a 100644
--- a/arch/x86/events/Makefile
+++ b/arch/x86/events/Makefile

@@ -1,6 +1,7 @@
 obj-y			+= core.o
 
 obj-$(CONFIG_CPU_SUP_AMD)               += amd/core.o amd/uncore.o
+obj-$(CONFIG_PERF_EVENTS_AMD_POWER)	+= amd/power.o
 obj-$(CONFIG_X86_LOCAL_APIC)            += amd/ibs.o msr.o
 ifdef CONFIG_AMD_IOMMU
 obj-$(CONFIG_CPU_SUP_AMD)               += amd/iommu.o

diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 049ada8d..86a9bec 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c

@@ -369,7 +369,7 @@
 
 	WARN_ON_ONCE(cpuc->amd_nb);
 
-	if (boot_cpu_data.x86_max_cores < 2)
+	if (!x86_pmu.amd_nb_constraints)
 		return NOTIFY_OK;
 
 	cpuc->amd_nb = amd_alloc_nb(cpu);
@@ -388,7 +388,7 @@
 
 	cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
 
-	if (boot_cpu_data.x86_max_cores < 2)
+	if (!x86_pmu.amd_nb_constraints)
 		return;
 
 	nb_id = amd_get_nb_id(cpu);
@@ -414,7 +414,7 @@
 {
 	struct cpu_hw_events *cpuhw;
 
-	if (boot_cpu_data.x86_max_cores < 2)
+	if (!x86_pmu.amd_nb_constraints)
 		return;
 
 	cpuhw = &per_cpu(cpu_hw_events, cpu);
@@ -648,6 +648,8 @@
 	.cpu_prepare		= amd_pmu_cpu_prepare,
 	.cpu_starting		= amd_pmu_cpu_starting,
 	.cpu_dead		= amd_pmu_cpu_dead,
+
+	.amd_nb_constraints	= 1,
 };
 
 static int __init amd_core_pmu_init(void)
@@ -674,6 +676,11 @@
 	x86_pmu.eventsel	= MSR_F15H_PERF_CTL;
 	x86_pmu.perfctr		= MSR_F15H_PERF_CTR;
 	x86_pmu.num_counters	= AMD64_NUM_COUNTERS_CORE;
+	/*
+	 * AMD Core perfctr has separate MSRs for the NB events, see
+	 * the amd/uncore.c driver.
+	 */
+	x86_pmu.amd_nb_constraints = 0;
 
 	pr_cont("core perfctr, ");
 	return 0;
@@ -693,6 +700,14 @@
 	if (ret)
 		return ret;
 
+	if (num_possible_cpus() == 1) {
+		/*
+		 * No point in allocating data structures to serialize
+		 * against other CPUs, when there is only the one CPU.
+		 */
+		x86_pmu.amd_nb_constraints = 0;
+	}
+
 	/* Events are common for all AMDs */
 	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
 	       sizeof(hw_cache_event_ids));

diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 51087c2..feb90f6 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c

@@ -28,10 +28,46 @@
 #define IBS_FETCH_CONFIG_MASK	(IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
 #define IBS_OP_CONFIG_MASK	IBS_OP_MAX_CNT
 
+
+/*
+ * IBS states:
+ *
+ * ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken
+ * and any further add()s must fail.
+ *
+ * STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are
+ * complicated by the fact that the IBS hardware can send late NMIs (ie. after
+ * we've cleared the EN bit).
+ *
+ * In order to consume these late NMIs we have the STOPPED state, any NMI that
+ * happens after we've cleared the EN state will clear this bit and report the
+ * NMI handled (this is fundamentally racy in the face or multiple NMI sources,
+ * someone else can consume our BIT and our NMI will go unhandled).
+ *
+ * And since we cannot set/clear this separate bit together with the EN bit,
+ * there are races; if we cleared STARTED early, an NMI could land in
+ * between clearing STARTED and clearing the EN bit (in fact multiple NMIs
+ * could happen if the period is small enough), and consume our STOPPED bit
+ * and trigger streams of unhandled NMIs.
+ *
+ * If, however, we clear STARTED late, an NMI can hit between clearing the
+ * EN bit and clearing STARTED, still see STARTED set and process the event.
+ * If this event will have the VALID bit clear, we bail properly, but this
+ * is not a given. With VALID set we can end up calling pmu::stop() again
+ * (the throttle logic) and trigger the WARNs in there.
+ *
+ * So what we do is set STOPPING before clearing EN to avoid the pmu::stop()
+ * nesting, and clear STARTED late, so that we have a well defined state over
+ * the clearing of the EN bit.
+ *
+ * XXX: we could probably be using !atomic bitops for all this.
+ */
+
 enum ibs_states {
 	IBS_ENABLED	= 0,
 	IBS_STARTED	= 1,
 	IBS_STOPPING	= 2,
+	IBS_STOPPED	= 3,
 
 	IBS_MAX_STATES,
 };
@@ -376,7 +412,12 @@
 	hwc->state = 0;
 
 	perf_ibs_set_period(perf_ibs, hwc, &period);
-	set_bit(IBS_STARTED, pcpu->state);
+	/*
+	 * Set STARTED before enabling the hardware, such that a subsequent NMI
+	 * must observe it.
+	 */
+	set_bit(IBS_STARTED,    pcpu->state);
+	clear_bit(IBS_STOPPING, pcpu->state);
 	perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
 
 	perf_event_update_userpage(event);
@@ -390,7 +431,10 @@
 	u64 config;
 	int stopping;
 
-	stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
+	if (test_and_set_bit(IBS_STOPPING, pcpu->state))
+		return;
+
+	stopping = test_bit(IBS_STARTED, pcpu->state);
 
 	if (!stopping && (hwc->state & PERF_HES_UPTODATE))
 		return;
@@ -398,8 +442,24 @@
 	rdmsrl(hwc->config_base, config);
 
 	if (stopping) {
-		set_bit(IBS_STOPPING, pcpu->state);
+		/*
+		 * Set STOPPED before disabling the hardware, such that it
+		 * must be visible to NMIs the moment we clear the EN bit,
+		 * at which point we can generate an !VALID sample which
+		 * we need to consume.
+		 */
+		set_bit(IBS_STOPPED, pcpu->state);
 		perf_ibs_disable_event(perf_ibs, hwc, config);
+		/*
+		 * Clear STARTED after disabling the hardware; if it were
+		 * cleared before an NMI hitting after the clear but before
+		 * clearing the EN bit might think it a spurious NMI and not
+		 * handle it.
+		 *
+		 * Clearing it after, however, creates the problem of the NMI
+		 * handler seeing STARTED but not having a valid sample.
+		 */
+		clear_bit(IBS_STARTED, pcpu->state);
 		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 		hwc->state |= PERF_HES_STOPPED;
 	}
@@ -527,20 +587,24 @@
 	u64 *buf, *config, period;
 
 	if (!test_bit(IBS_STARTED, pcpu->state)) {
+fail:
 		/*
 		 * Catch spurious interrupts after stopping IBS: After
 		 * disabling IBS there could be still incoming NMIs
 		 * with samples that even have the valid bit cleared.
 		 * Mark all this NMIs as handled.
 		 */
-		return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
+		if (test_and_clear_bit(IBS_STOPPED, pcpu->state))
+			return 1;
+
+		return 0;
 	}
 
 	msr = hwc->config_base;
 	buf = ibs_data.regs;
 	rdmsrl(msr, *buf);
 	if (!(*buf++ & perf_ibs->valid_mask))
-		return 0;
+		goto fail;
 
 	config = &ibs_data.regs[0];
 	perf_ibs_event_update(perf_ibs, event, config);
@@ -599,7 +663,7 @@
 	throttle = perf_event_overflow(event, &data, &regs);
 out:
 	if (throttle)
-		perf_ibs_disable_event(perf_ibs, hwc, *config);
+		perf_ibs_stop(event, 0);
 	else
 		perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
 
@@ -611,6 +675,7 @@
 static int
 perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 {
+	u64 stamp = sched_clock();
 	int handled = 0;
 
 	handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
@@ -619,6 +684,8 @@
 	if (handled)
 		inc_irq_stat(apic_perf_irqs);
 
+	perf_sample_event_took(sched_clock() - stamp);
+
 	return handled;
 }
 NOKPROBE_SYMBOL(perf_ibs_nmi_handler);

diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index 635e5eb..40625ca 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c

@@ -118,6 +118,11 @@
 	AMD_IOMMU_EVENT_DESC(cmd_processed,           "csource=0x11"),
 	AMD_IOMMU_EVENT_DESC(cmd_processed_inv,       "csource=0x12"),
 	AMD_IOMMU_EVENT_DESC(tlb_inv,                 "csource=0x13"),
+	AMD_IOMMU_EVENT_DESC(ign_rd_wr_mmio_1ff8h,    "csource=0x14"),
+	AMD_IOMMU_EVENT_DESC(vapic_int_non_guest,     "csource=0x15"),
+	AMD_IOMMU_EVENT_DESC(vapic_int_guest,         "csource=0x16"),
+	AMD_IOMMU_EVENT_DESC(smi_recv,                "csource=0x17"),
+	AMD_IOMMU_EVENT_DESC(smi_blk,                 "csource=0x18"),
 	{ /* end: all zeroes */ },
 };
 

diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c
new file mode 100644
index 0000000..55a3529
--- /dev/null
+++ b/arch/x86/events/amd/power.c

@@ -0,0 +1,353 @@
+/*
+ * Performance events - AMD Processor Power Reporting Mechanism
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Huang Rui <ray.huang@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+#include <asm/cpu_device_id.h>
+#include "../perf_event.h"
+
+#define MSR_F15H_CU_PWR_ACCUMULATOR     0xc001007a
+#define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b
+#define MSR_F15H_PTSC			0xc0010280
+
+/* Event code: LSB 8 bits, passed in attr->config any other bit is reserved. */
+#define AMD_POWER_EVENT_MASK		0xFFULL
+
+/*
+ * Accumulated power status counters.
+ */
+#define AMD_POWER_EVENTSEL_PKG		1
+
+/*
+ * The ratio of compute unit power accumulator sample period to the
+ * PTSC period.
+ */
+static unsigned int cpu_pwr_sample_ratio;
+
+/* Maximum accumulated power of a compute unit. */
+static u64 max_cu_acc_power;
+
+static struct pmu pmu_class;
+
+/*
+ * Accumulated power represents the sum of each compute unit's (CU) power
+ * consumption. On any core of each CU we read the total accumulated power from
+ * MSR_F15H_CU_PWR_ACCUMULATOR. cpu_mask represents CPU bit map of all cores
+ * which are picked to measure the power for the CUs they belong to.
+ */
+static cpumask_t cpu_mask;
+
+static void event_update(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 prev_pwr_acc, new_pwr_acc, prev_ptsc, new_ptsc;
+	u64 delta, tdelta;
+
+	prev_pwr_acc = hwc->pwr_acc;
+	prev_ptsc = hwc->ptsc;
+	rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, new_pwr_acc);
+	rdmsrl(MSR_F15H_PTSC, new_ptsc);
+
+	/*
+	 * Calculate the CU power consumption over a time period, the unit of
+	 * final value (delta) is micro-Watts. Then add it to the event count.
+	 */
+	if (new_pwr_acc < prev_pwr_acc) {
+		delta = max_cu_acc_power + new_pwr_acc;
+		delta -= prev_pwr_acc;
+	} else
+		delta = new_pwr_acc - prev_pwr_acc;
+
+	delta *= cpu_pwr_sample_ratio * 1000;
+	tdelta = new_ptsc - prev_ptsc;
+
+	do_div(delta, tdelta);
+	local64_add(delta, &event->count);
+}
+
+static void __pmu_event_start(struct perf_event *event)
+{
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
+	event->hw.state = 0;
+
+	rdmsrl(MSR_F15H_PTSC, event->hw.ptsc);
+	rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, event->hw.pwr_acc);
+}
+
+static void pmu_event_start(struct perf_event *event, int mode)
+{
+	__pmu_event_start(event);
+}
+
+static void pmu_event_stop(struct perf_event *event, int mode)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	/* Mark event as deactivated and stopped. */
+	if (!(hwc->state & PERF_HES_STOPPED))
+		hwc->state |= PERF_HES_STOPPED;
+
+	/* Check if software counter update is necessary. */
+	if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+		/*
+		 * Drain the remaining delta count out of an event
+		 * that we are disabling:
+		 */
+		event_update(event);
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+}
+
+static int pmu_event_add(struct perf_event *event, int mode)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	if (mode & PERF_EF_START)
+		__pmu_event_start(event);
+
+	return 0;
+}
+
+static void pmu_event_del(struct perf_event *event, int flags)
+{
+	pmu_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int pmu_event_init(struct perf_event *event)
+{
+	u64 cfg = event->attr.config & AMD_POWER_EVENT_MASK;
+
+	/* Only look at AMD power events. */
+	if (event->attr.type != pmu_class.type)
+		return -ENOENT;
+
+	/* Unsupported modes and filters. */
+	if (event->attr.exclude_user   ||
+	    event->attr.exclude_kernel ||
+	    event->attr.exclude_hv     ||
+	    event->attr.exclude_idle   ||
+	    event->attr.exclude_host   ||
+	    event->attr.exclude_guest  ||
+	    /* no sampling */
+	    event->attr.sample_period)
+		return -EINVAL;
+
+	if (cfg != AMD_POWER_EVENTSEL_PKG)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void pmu_event_read(struct perf_event *event)
+{
+	event_update(event);
+}
+
+static ssize_t
+get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return cpumap_print_to_pagebuf(true, buf, &cpu_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, get_attr_cpumask, NULL);
+
+static struct attribute *pmu_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group pmu_attr_group = {
+	.attrs = pmu_attrs,
+};
+
+/*
+ * Currently it only supports to report the power of each
+ * processor/package.
+ */
+EVENT_ATTR_STR(power-pkg, power_pkg, "event=0x01");
+
+EVENT_ATTR_STR(power-pkg.unit, power_pkg_unit, "mWatts");
+
+/* Convert the count from micro-Watts to milli-Watts. */
+EVENT_ATTR_STR(power-pkg.scale, power_pkg_scale, "1.000000e-3");
+
+static struct attribute *events_attr[] = {
+	EVENT_PTR(power_pkg),
+	EVENT_PTR(power_pkg_unit),
+	EVENT_PTR(power_pkg_scale),
+	NULL,
+};
+
+static struct attribute_group pmu_events_group = {
+	.name	= "events",
+	.attrs	= events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+
+static struct attribute *formats_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group pmu_format_group = {
+	.name	= "format",
+	.attrs	= formats_attr,
+};
+
+static const struct attribute_group *attr_groups[] = {
+	&pmu_attr_group,
+	&pmu_format_group,
+	&pmu_events_group,
+	NULL,
+};
+
+static struct pmu pmu_class = {
+	.attr_groups	= attr_groups,
+	/* system-wide only */
+	.task_ctx_nr	= perf_invalid_context,
+	.event_init	= pmu_event_init,
+	.add		= pmu_event_add,
+	.del		= pmu_event_del,
+	.start		= pmu_event_start,
+	.stop		= pmu_event_stop,
+	.read		= pmu_event_read,
+};
+
+static void power_cpu_exit(int cpu)
+{
+	int target;
+
+	if (!cpumask_test_and_clear_cpu(cpu, &cpu_mask))
+		return;
+
+	/*
+	 * Find a new CPU on the same compute unit, if was set in cpumask
+	 * and still some CPUs on compute unit. Then migrate event and
+	 * context to new CPU.
+	 */
+	target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+	if (target < nr_cpumask_bits) {
+		cpumask_set_cpu(target, &cpu_mask);
+		perf_pmu_migrate_context(&pmu_class, cpu, target);
+	}
+}
+
+static void power_cpu_init(int cpu)
+{
+	int target;
+
+	/*
+	 * 1) If any CPU is set at cpu_mask in the same compute unit, do
+	 * nothing.
+	 * 2) If no CPU is set at cpu_mask in the same compute unit,
+	 * set current STARTING CPU.
+	 *
+	 * Note: if there is a CPU aside of the new one already in the
+	 * sibling mask, then it is also in cpu_mask.
+	 */
+	target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+	if (target >= nr_cpumask_bits)
+		cpumask_set_cpu(cpu, &cpu_mask);
+}
+
+static int
+power_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long)hcpu;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_DOWN_FAILED:
+	case CPU_STARTING:
+		power_cpu_init(cpu);
+		break;
+	case CPU_DOWN_PREPARE:
+		power_cpu_exit(cpu);
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block power_cpu_notifier_nb = {
+	.notifier_call = power_cpu_notifier,
+	.priority = CPU_PRI_PERF,
+};
+
+static const struct x86_cpu_id cpu_match[] = {
+	{ .vendor = X86_VENDOR_AMD, .family = 0x15 },
+	{},
+};
+
+static int __init amd_power_pmu_init(void)
+{
+	int cpu, target, ret;
+
+	if (!x86_match_cpu(cpu_match))
+		return 0;
+
+	if (!boot_cpu_has(X86_FEATURE_ACC_POWER))
+		return -ENODEV;
+
+	cpu_pwr_sample_ratio = cpuid_ecx(0x80000007);
+
+	if (rdmsrl_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &max_cu_acc_power)) {
+		pr_err("Failed to read max compute unit power accumulator MSR\n");
+		return -ENODEV;
+	}
+
+	cpu_notifier_register_begin();
+
+	/* Choose one online core of each compute unit. */
+	for_each_online_cpu(cpu) {
+		target = cpumask_first(topology_sibling_cpumask(cpu));
+		if (!cpumask_test_cpu(target, &cpu_mask))
+			cpumask_set_cpu(target, &cpu_mask);
+	}
+
+	ret = perf_pmu_register(&pmu_class, "power", -1);
+	if (WARN_ON(ret)) {
+		pr_warn("AMD Power PMU registration failed\n");
+		goto out;
+	}
+
+	__register_cpu_notifier(&power_cpu_notifier_nb);
+
+	pr_info("AMD Power PMU detected\n");
+
+out:
+	cpu_notifier_register_done();
+
+	return ret;
+}
+module_init(amd_power_pmu_init);
+
+static void __exit amd_power_pmu_exit(void)
+{
+	cpu_notifier_register_begin();
+	__unregister_cpu_notifier(&power_cpu_notifier_nb);
+	cpu_notifier_register_done();
+
+	perf_pmu_unregister(&pmu_class);
+}
+module_exit(amd_power_pmu_exit);
+
+MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
+MODULE_DESCRIPTION("AMD Processor Power Reporting Mechanism");
+MODULE_LICENSE("GPL v2");

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 9b6ad08..041e442 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c

@@ -1602,8 +1602,7 @@
 	return new;
 }
 
-ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
-			  char *page)
+ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page)
 {
 	struct perf_pmu_events_attr *pmu_attr = \
 		container_of(attr, struct perf_pmu_events_attr, attr);
@@ -1615,6 +1614,7 @@
 
 	return x86_pmu.events_sysfs_show(page, config);
 }
+EXPORT_SYMBOL_GPL(events_sysfs_show);
 
 EVENT_ATTR(cpu-cycles,			CPU_CYCLES		);
 EVENT_ATTR(instructions,		INSTRUCTIONS		);

diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 93cb412..7b5fd81 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c

@@ -13,8 +13,16 @@
 #define MSR_IA32_QM_CTR		0x0c8e
 #define MSR_IA32_QM_EVTSEL	0x0c8d
 
+#define MBM_CNTR_WIDTH		24
+/*
+ * Guaranteed time in ms as per SDM where MBM counters will not overflow.
+ */
+#define MBM_CTR_OVERFLOW_TIME	1000
+
 static u32 cqm_max_rmid = -1;
 static unsigned int cqm_l3_scale; /* supposedly cacheline size */
+static bool cqm_enabled, mbm_enabled;
+unsigned int mbm_socket_max;
 
 /**
  * struct intel_pqr_state - State cache for the PQR MSR
@@ -42,8 +50,37 @@
  * interrupts disabled, which is sufficient for the protection.
  */
 static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
+static struct hrtimer *mbm_timers;
+/**
+ * struct sample - mbm event's (local or total) data
+ * @total_bytes    #bytes since we began monitoring
+ * @prev_msr       previous value of MSR
+ */
+struct sample {
+	u64	total_bytes;
+	u64	prev_msr;
+};
 
 /*
+ * samples profiled for total memory bandwidth type events
+ */
+static struct sample *mbm_total;
+/*
+ * samples profiled for local memory bandwidth type events
+ */
+static struct sample *mbm_local;
+
+#define pkg_id	topology_physical_package_id(smp_processor_id())
+/*
+ * rmid_2_index returns the index for the rmid in mbm_local/mbm_total array.
+ * mbm_total[] and mbm_local[] are linearly indexed by socket# * max number of
+ * rmids per socket, an example is given below
+ * RMID1 of Socket0:  vrmid =  1
+ * RMID1 of Socket1:  vrmid =  1 * (cqm_max_rmid + 1) + 1
+ * RMID1 of Socket2:  vrmid =  2 * (cqm_max_rmid + 1) + 1
+ */
+#define rmid_2_index(rmid)  ((pkg_id * (cqm_max_rmid + 1)) + rmid)
+/*
  * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
  * Also protects event->hw.cqm_rmid
  *
@@ -65,9 +102,13 @@
 #define RMID_VAL_ERROR		(1ULL << 63)
 #define RMID_VAL_UNAVAIL	(1ULL << 62)
 
-#define QOS_L3_OCCUP_EVENT_ID	(1 << 0)
-
-#define QOS_EVENT_MASK	QOS_L3_OCCUP_EVENT_ID
+/*
+ * Event IDs are used to program IA32_QM_EVTSEL before reading event
+ * counter from IA32_QM_CTR
+ */
+#define QOS_L3_OCCUP_EVENT_ID	0x01
+#define QOS_MBM_TOTAL_EVENT_ID	0x02
+#define QOS_MBM_LOCAL_EVENT_ID	0x03
 
 /*
  * This is central to the rotation algorithm in __intel_cqm_rmid_rotate().
@@ -211,6 +252,21 @@
 	list_add_tail(&entry->list, &cqm_rmid_limbo_lru);
 }
 
+static void cqm_cleanup(void)
+{
+	int i;
+
+	if (!cqm_rmid_ptrs)
+		return;
+
+	for (i = 0; i < cqm_max_rmid; i++)
+		kfree(cqm_rmid_ptrs[i]);
+
+	kfree(cqm_rmid_ptrs);
+	cqm_rmid_ptrs = NULL;
+	cqm_enabled = false;
+}
+
 static int intel_cqm_setup_rmid_cache(void)
 {
 	struct cqm_rmid_entry *entry;
@@ -218,7 +274,7 @@
 	int r = 0;
 
 	nr_rmids = cqm_max_rmid + 1;
-	cqm_rmid_ptrs = kmalloc(sizeof(struct cqm_rmid_entry *) *
+	cqm_rmid_ptrs = kzalloc(sizeof(struct cqm_rmid_entry *) *
 				nr_rmids, GFP_KERNEL);
 	if (!cqm_rmid_ptrs)
 		return -ENOMEM;
@@ -249,11 +305,9 @@
 	mutex_unlock(&cache_mutex);
 
 	return 0;
-fail:
-	while (r--)
-		kfree(cqm_rmid_ptrs[r]);
 
-	kfree(cqm_rmid_ptrs);
+fail:
+	cqm_cleanup();
 	return -ENOMEM;
 }
 
@@ -281,9 +335,13 @@
 
 	/*
 	 * Events that target same task are placed into the same cache group.
+	 * Mark it as a multi event group, so that we update ->count
+	 * for every event rather than just the group leader later.
 	 */
-	if (a->hw.target == b->hw.target)
+	if (a->hw.target == b->hw.target) {
+		b->hw.is_group_event = true;
 		return true;
+	}
 
 	/*
 	 * Are we an inherited event?
@@ -392,10 +450,26 @@
 
 struct rmid_read {
 	u32 rmid;
+	u32 evt_type;
 	atomic64_t value;
 };
 
 static void __intel_cqm_event_count(void *info);
+static void init_mbm_sample(u32 rmid, u32 evt_type);
+static void __intel_mbm_event_count(void *info);
+
+static bool is_mbm_event(int e)
+{
+	return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_EVENT_ID);
+}
+
+static void cqm_mask_call(struct rmid_read *rr)
+{
+	if (is_mbm_event(rr->evt_type))
+		on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_count, rr, 1);
+	else
+		on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, rr, 1);
+}
 
 /*
  * Exchange the RMID of a group of events.
@@ -413,12 +487,12 @@
 	 */
 	if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) {
 		struct rmid_read rr = {
-			.value = ATOMIC64_INIT(0),
 			.rmid = old_rmid,
+			.evt_type = group->attr.config,
+			.value = ATOMIC64_INIT(0),
 		};
 
-		on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count,
-				 &rr, 1);
+		cqm_mask_call(&rr);
 		local64_set(&group->count, atomic64_read(&rr.value));
 	}
 
@@ -430,6 +504,22 @@
 
 	raw_spin_unlock_irq(&cache_lock);
 
+	/*
+	 * If the allocation is for mbm, init the mbm stats.
+	 * Need to check if each event in the group is mbm event
+	 * because there could be multiple type of events in the same group.
+	 */
+	if (__rmid_valid(rmid)) {
+		event = group;
+		if (is_mbm_event(event->attr.config))
+			init_mbm_sample(rmid, event->attr.config);
+
+		list_for_each_entry(event, head, hw.cqm_group_entry) {
+			if (is_mbm_event(event->attr.config))
+				init_mbm_sample(rmid, event->attr.config);
+		}
+	}
+
 	return old_rmid;
 }
 
@@ -837,6 +927,72 @@
 	schedule_delayed_work(&intel_cqm_rmid_work, delay);
 }
 
+static u64 update_sample(unsigned int rmid, u32 evt_type, int first)
+{
+	struct sample *mbm_current;
+	u32 vrmid = rmid_2_index(rmid);
+	u64 val, bytes, shift;
+	u32 eventid;
+
+	if (evt_type == QOS_MBM_LOCAL_EVENT_ID) {
+		mbm_current = &mbm_local[vrmid];
+		eventid     = QOS_MBM_LOCAL_EVENT_ID;
+	} else {
+		mbm_current = &mbm_total[vrmid];
+		eventid     = QOS_MBM_TOTAL_EVENT_ID;
+	}
+
+	wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
+	rdmsrl(MSR_IA32_QM_CTR, val);
+	if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+		return mbm_current->total_bytes;
+
+	if (first) {
+		mbm_current->prev_msr = val;
+		mbm_current->total_bytes = 0;
+		return mbm_current->total_bytes;
+	}
+
+	/*
+	 * The h/w guarantees that counters will not overflow
+	 * so long as we poll them at least once per second.
+	 */
+	shift = 64 - MBM_CNTR_WIDTH;
+	bytes = (val << shift) - (mbm_current->prev_msr << shift);
+	bytes >>= shift;
+
+	bytes *= cqm_l3_scale;
+
+	mbm_current->total_bytes += bytes;
+	mbm_current->prev_msr = val;
+
+	return mbm_current->total_bytes;
+}
+
+static u64 rmid_read_mbm(unsigned int rmid, u32 evt_type)
+{
+	return update_sample(rmid, evt_type, 0);
+}
+
+static void __intel_mbm_event_init(void *info)
+{
+	struct rmid_read *rr = info;
+
+	update_sample(rr->rmid, rr->evt_type, 1);
+}
+
+static void init_mbm_sample(u32 rmid, u32 evt_type)
+{
+	struct rmid_read rr = {
+		.rmid = rmid,
+		.evt_type = evt_type,
+		.value = ATOMIC64_INIT(0),
+	};
+
+	/* on each socket, init sample */
+	on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_init, &rr, 1);
+}
+
 /*
  * Find a group and setup RMID.
  *
@@ -849,6 +1005,7 @@
 	bool conflict = false;
 	u32 rmid;
 
+	event->hw.is_group_event = false;
 	list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
 		rmid = iter->hw.cqm_rmid;
 
@@ -856,6 +1013,8 @@
 			/* All tasks in a group share an RMID */
 			event->hw.cqm_rmid = rmid;
 			*group = iter;
+			if (is_mbm_event(event->attr.config) && __rmid_valid(rmid))
+				init_mbm_sample(rmid, event->attr.config);
 			return;
 		}
 
@@ -872,6 +1031,9 @@
 	else
 		rmid = __get_rmid();
 
+	if (is_mbm_event(event->attr.config) && __rmid_valid(rmid))
+		init_mbm_sample(rmid, event->attr.config);
+
 	event->hw.cqm_rmid = rmid;
 }
 
@@ -893,7 +1055,10 @@
 	if (!__rmid_valid(rmid))
 		goto out;
 
-	val = __rmid_read(rmid);
+	if (is_mbm_event(event->attr.config))
+		val = rmid_read_mbm(rmid, event->attr.config);
+	else
+		val = __rmid_read(rmid);
 
 	/*
 	 * Ignore this reading on error states and do not update the value.
@@ -924,10 +1089,100 @@
 	return !list_empty(&event->hw.cqm_groups_entry);
 }
 
+static void __intel_mbm_event_count(void *info)
+{
+	struct rmid_read *rr = info;
+	u64 val;
+
+	val = rmid_read_mbm(rr->rmid, rr->evt_type);
+	if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+		return;
+	atomic64_add(val, &rr->value);
+}
+
+static enum hrtimer_restart mbm_hrtimer_handle(struct hrtimer *hrtimer)
+{
+	struct perf_event *iter, *iter1;
+	int ret = HRTIMER_RESTART;
+	struct list_head *head;
+	unsigned long flags;
+	u32 grp_rmid;
+
+	/*
+	 * Need to cache_lock as the timer Event Select MSR reads
+	 * can race with the mbm/cqm count() and mbm_init() reads.
+	 */
+	raw_spin_lock_irqsave(&cache_lock, flags);
+
+	if (list_empty(&cache_groups)) {
+		ret = HRTIMER_NORESTART;
+		goto out;
+	}
+
+	list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
+		grp_rmid = iter->hw.cqm_rmid;
+		if (!__rmid_valid(grp_rmid))
+			continue;
+		if (is_mbm_event(iter->attr.config))
+			update_sample(grp_rmid, iter->attr.config, 0);
+
+		head = &iter->hw.cqm_group_entry;
+		if (list_empty(head))
+			continue;
+		list_for_each_entry(iter1, head, hw.cqm_group_entry) {
+			if (!iter1->hw.is_group_event)
+				break;
+			if (is_mbm_event(iter1->attr.config))
+				update_sample(iter1->hw.cqm_rmid,
+					      iter1->attr.config, 0);
+		}
+	}
+
+	hrtimer_forward_now(hrtimer, ms_to_ktime(MBM_CTR_OVERFLOW_TIME));
+out:
+	raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+	return ret;
+}
+
+static void __mbm_start_timer(void *info)
+{
+	hrtimer_start(&mbm_timers[pkg_id], ms_to_ktime(MBM_CTR_OVERFLOW_TIME),
+			     HRTIMER_MODE_REL_PINNED);
+}
+
+static void __mbm_stop_timer(void *info)
+{
+	hrtimer_cancel(&mbm_timers[pkg_id]);
+}
+
+static void mbm_start_timers(void)
+{
+	on_each_cpu_mask(&cqm_cpumask, __mbm_start_timer, NULL, 1);
+}
+
+static void mbm_stop_timers(void)
+{
+	on_each_cpu_mask(&cqm_cpumask, __mbm_stop_timer, NULL, 1);
+}
+
+static void mbm_hrtimer_init(void)
+{
+	struct hrtimer *hr;
+	int i;
+
+	for (i = 0; i < mbm_socket_max; i++) {
+		hr = &mbm_timers[i];
+		hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hr->function = mbm_hrtimer_handle;
+	}
+}
+
 static u64 intel_cqm_event_count(struct perf_event *event)
 {
 	unsigned long flags;
 	struct rmid_read rr = {
+		.evt_type = event->attr.config,
 		.value = ATOMIC64_INIT(0),
 	};
 
@@ -940,7 +1195,9 @@
 		return __perf_event_count(event);
 
 	/*
-	 * Only the group leader gets to report values. This stops us
+	 * Only the group leader gets to report values except in case of
+	 * multiple events in the same group, we still need to read the
+	 * other events.This stops us
 	 * reporting duplicate values to userspace, and gives us a clear
 	 * rule for which task gets to report the values.
 	 *
@@ -948,7 +1205,7 @@
 	 * specific packages - we forfeit that ability when we create
 	 * task events.
 	 */
-	if (!cqm_group_leader(event))
+	if (!cqm_group_leader(event) && !event->hw.is_group_event)
 		return 0;
 
 	/*
@@ -975,7 +1232,7 @@
 	if (!__rmid_valid(rr.rmid))
 		goto out;
 
-	on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1);
+	cqm_mask_call(&rr);
 
 	raw_spin_lock_irqsave(&cache_lock, flags);
 	if (event->hw.cqm_rmid == rr.rmid)
@@ -1046,8 +1303,14 @@
 static void intel_cqm_event_destroy(struct perf_event *event)
 {
 	struct perf_event *group_other = NULL;
+	unsigned long flags;
 
 	mutex_lock(&cache_mutex);
+	/*
+	* Hold the cache_lock as mbm timer handlers could be
+	* scanning the list of events.
+	*/
+	raw_spin_lock_irqsave(&cache_lock, flags);
 
 	/*
 	 * If there's another event in this group...
@@ -1079,6 +1342,14 @@
 		}
 	}
 
+	raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+	/*
+	 * Stop the mbm overflow timers when the last event is destroyed.
+	*/
+	if (mbm_enabled && list_empty(&cache_groups))
+		mbm_stop_timers();
+
 	mutex_unlock(&cache_mutex);
 }
 
@@ -1086,11 +1357,13 @@
 {
 	struct perf_event *group = NULL;
 	bool rotate = false;
+	unsigned long flags;
 
 	if (event->attr.type != intel_cqm_pmu.type)
 		return -ENOENT;
 
-	if (event->attr.config & ~QOS_EVENT_MASK)
+	if ((event->attr.config < QOS_L3_OCCUP_EVENT_ID) ||
+	     (event->attr.config > QOS_MBM_LOCAL_EVENT_ID))
 		return -EINVAL;
 
 	/* unsupported modes and filters */
@@ -1110,9 +1383,21 @@
 
 	mutex_lock(&cache_mutex);
 
+	/*
+	 * Start the mbm overflow timers when the first event is created.
+	*/
+	if (mbm_enabled && list_empty(&cache_groups))
+		mbm_start_timers();
+
 	/* Will also set rmid */
 	intel_cqm_setup_event(event, &group);
 
+	/*
+	* Hold the cache_lock as mbm timer handlers be
+	* scanning the list of events.
+	*/
+	raw_spin_lock_irqsave(&cache_lock, flags);
+
 	if (group) {
 		list_add_tail(&event->hw.cqm_group_entry,
 			      &group->hw.cqm_group_entry);
@@ -1131,6 +1416,7 @@
 			rotate = true;
 	}
 
+	raw_spin_unlock_irqrestore(&cache_lock, flags);
 	mutex_unlock(&cache_mutex);
 
 	if (rotate)
@@ -1145,6 +1431,16 @@
 EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL);
 EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1");
 
+EVENT_ATTR_STR(total_bytes, intel_cqm_total_bytes, "event=0x02");
+EVENT_ATTR_STR(total_bytes.per-pkg, intel_cqm_total_bytes_pkg, "1");
+EVENT_ATTR_STR(total_bytes.unit, intel_cqm_total_bytes_unit, "MB");
+EVENT_ATTR_STR(total_bytes.scale, intel_cqm_total_bytes_scale, "1e-6");
+
+EVENT_ATTR_STR(local_bytes, intel_cqm_local_bytes, "event=0x03");
+EVENT_ATTR_STR(local_bytes.per-pkg, intel_cqm_local_bytes_pkg, "1");
+EVENT_ATTR_STR(local_bytes.unit, intel_cqm_local_bytes_unit, "MB");
+EVENT_ATTR_STR(local_bytes.scale, intel_cqm_local_bytes_scale, "1e-6");
+
 static struct attribute *intel_cqm_events_attr[] = {
 	EVENT_PTR(intel_cqm_llc),
 	EVENT_PTR(intel_cqm_llc_pkg),
@@ -1154,9 +1450,38 @@
 	NULL,
 };
 
+static struct attribute *intel_mbm_events_attr[] = {
+	EVENT_PTR(intel_cqm_total_bytes),
+	EVENT_PTR(intel_cqm_local_bytes),
+	EVENT_PTR(intel_cqm_total_bytes_pkg),
+	EVENT_PTR(intel_cqm_local_bytes_pkg),
+	EVENT_PTR(intel_cqm_total_bytes_unit),
+	EVENT_PTR(intel_cqm_local_bytes_unit),
+	EVENT_PTR(intel_cqm_total_bytes_scale),
+	EVENT_PTR(intel_cqm_local_bytes_scale),
+	NULL,
+};
+
+static struct attribute *intel_cmt_mbm_events_attr[] = {
+	EVENT_PTR(intel_cqm_llc),
+	EVENT_PTR(intel_cqm_total_bytes),
+	EVENT_PTR(intel_cqm_local_bytes),
+	EVENT_PTR(intel_cqm_llc_pkg),
+	EVENT_PTR(intel_cqm_total_bytes_pkg),
+	EVENT_PTR(intel_cqm_local_bytes_pkg),
+	EVENT_PTR(intel_cqm_llc_unit),
+	EVENT_PTR(intel_cqm_total_bytes_unit),
+	EVENT_PTR(intel_cqm_local_bytes_unit),
+	EVENT_PTR(intel_cqm_llc_scale),
+	EVENT_PTR(intel_cqm_total_bytes_scale),
+	EVENT_PTR(intel_cqm_local_bytes_scale),
+	EVENT_PTR(intel_cqm_llc_snapshot),
+	NULL,
+};
+
 static struct attribute_group intel_cqm_events_group = {
 	.name = "events",
-	.attrs = intel_cqm_events_attr,
+	.attrs = NULL,
 };
 
 PMU_FORMAT_ATTR(event, "config:0-7");
@@ -1303,12 +1628,70 @@
 	{}
 };
 
+static void mbm_cleanup(void)
+{
+	if (!mbm_enabled)
+		return;
+
+	kfree(mbm_local);
+	kfree(mbm_total);
+	mbm_enabled = false;
+}
+
+static const struct x86_cpu_id intel_mbm_local_match[] = {
+	{ .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_MBM_LOCAL },
+	{}
+};
+
+static const struct x86_cpu_id intel_mbm_total_match[] = {
+	{ .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_MBM_TOTAL },
+	{}
+};
+
+static int intel_mbm_init(void)
+{
+	int ret = 0, array_size, maxid = cqm_max_rmid + 1;
+
+	mbm_socket_max = topology_max_packages();
+	array_size = sizeof(struct sample) * maxid * mbm_socket_max;
+	mbm_local = kmalloc(array_size, GFP_KERNEL);
+	if (!mbm_local)
+		return -ENOMEM;
+
+	mbm_total = kmalloc(array_size, GFP_KERNEL);
+	if (!mbm_total) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	array_size = sizeof(struct hrtimer) * mbm_socket_max;
+	mbm_timers = kmalloc(array_size, GFP_KERNEL);
+	if (!mbm_timers) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	mbm_hrtimer_init();
+
+out:
+	if (ret)
+		mbm_cleanup();
+
+	return ret;
+}
+
 static int __init intel_cqm_init(void)
 {
-	char *str, scale[20];
+	char *str = NULL, scale[20];
 	int i, cpu, ret;
 
-	if (!x86_match_cpu(intel_cqm_match))
+	if (x86_match_cpu(intel_cqm_match))
+		cqm_enabled = true;
+
+	if (x86_match_cpu(intel_mbm_local_match) &&
+	     x86_match_cpu(intel_mbm_total_match))
+		mbm_enabled = true;
+
+	if (!cqm_enabled && !mbm_enabled)
 		return -ENODEV;
 
 	cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale;
@@ -1365,16 +1748,41 @@
 		cqm_pick_event_reader(i);
 	}
 
-	__perf_cpu_notifier(intel_cqm_cpu_notifier);
+	if (mbm_enabled)
+		ret = intel_mbm_init();
+	if (ret && !cqm_enabled)
+		goto out;
+
+	if (cqm_enabled && mbm_enabled)
+		intel_cqm_events_group.attrs = intel_cmt_mbm_events_attr;
+	else if (!cqm_enabled && mbm_enabled)
+		intel_cqm_events_group.attrs = intel_mbm_events_attr;
+	else if (cqm_enabled && !mbm_enabled)
+		intel_cqm_events_group.attrs = intel_cqm_events_attr;
 
 	ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
-	if (ret)
+	if (ret) {
 		pr_err("Intel CQM perf registration failed: %d\n", ret);
-	else
-		pr_info("Intel CQM monitoring enabled\n");
+		goto out;
+	}
 
+	if (cqm_enabled)
+		pr_info("Intel CQM monitoring enabled\n");
+	if (mbm_enabled)
+		pr_info("Intel MBM enabled\n");
+
+	/*
+	 * Register the hot cpu notifier once we are sure cqm
+	 * is enabled to avoid notifier leak.
+	 */
+	__perf_cpu_notifier(intel_cqm_cpu_notifier);
 out:
 	cpu_notifier_register_done();
+	if (ret) {
+		kfree(str);
+		cqm_cleanup();
+		mbm_cleanup();
+	}
 
 	return ret;
 }

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index ce7211a..8584b90 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c

@@ -570,11 +570,12 @@
 	 * We will overwrite the from and to address before we output
 	 * the sample.
 	 */
+	rcu_read_lock();
 	perf_prepare_sample(&header, &data, event, &regs);
 
 	if (perf_output_begin(&handle, event, header.size *
 			      (top - base - skip)))
-		return 1;
+		goto unlock;
 
 	for (at = base; at < top; at++) {
 		/* Filter out any records that contain kernel addresses. */
@@ -593,6 +594,8 @@
 	/* There's new data available. */
 	event->hw.interrupts++;
 	event->pending_kill = POLL_IN;
+unlock:
+	rcu_read_unlock();
 	return 1;
 }
 

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 69dd118..6c3b7c1 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c

@@ -649,7 +649,7 @@
 
 /*
  * return the type of control flow change at address "from"
- * intruction is not necessarily a branch (in case of interrupt).
+ * instruction is not necessarily a branch (in case of interrupt).
  *
  * The branch type returned also includes the priv level of the
  * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).

diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index b834a3f..70c93f9 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c

@@ -711,6 +711,7 @@
 		rapl_pmu_events_group.attrs = rapl_events_cln_attr;
 		break;
 	case 63: /* Haswell-Server */
+	case 79: /* Broadwell-Server */
 		apply_quirk = true;
 		rapl_cntr_mask = RAPL_IDX_SRV;
 		rapl_pmu_events_group.attrs = rapl_events_srv_attr;
@@ -718,6 +719,7 @@
 	case 60: /* Haswell */
 	case 69: /* Haswell-Celeron */
 	case 61: /* Broadwell */
+	case 71: /* Broadwell-H */
 		rapl_cntr_mask = RAPL_IDX_HSW;
 		rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
 		break;

diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 93f6bd9..ab2bcaa 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c

@@ -46,7 +46,6 @@
 				(SNBEP_PMON_CTL_EV_SEL_MASK | \
 				 SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
 				 SNBEP_PMON_CTL_EDGE_DET | \
-				 SNBEP_PMON_CTL_EV_SEL_EXT | \
 				 SNBEP_PMON_CTL_INVERT | \
 				 SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
 				 SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
@@ -148,7 +147,6 @@
 /* IVBEP PCU */
 #define IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK	\
 				(SNBEP_PMON_CTL_EV_SEL_MASK | \
-				 SNBEP_PMON_CTL_EV_SEL_EXT | \
 				 SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
 				 SNBEP_PMON_CTL_EDGE_DET | \
 				 SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
@@ -258,7 +256,6 @@
 				 SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
 				 SNBEP_PMON_CTL_EDGE_DET | \
 				 SNBEP_CBO_PMON_CTL_TID_EN | \
-				 SNBEP_PMON_CTL_EV_SEL_EXT | \
 				 SNBEP_PMON_CTL_INVERT | \
 				 KNL_PCU_MSR_PMON_CTL_TRESH_MASK | \
 				 SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
@@ -472,7 +469,7 @@
 };
 
 static struct attribute *snbep_uncore_pcu_formats_attr[] = {
-	&format_attr_event_ext.attr,
+	&format_attr_event.attr,
 	&format_attr_occ_sel.attr,
 	&format_attr_edge.attr,
 	&format_attr_inv.attr,
@@ -1313,7 +1310,7 @@
 };
 
 static struct attribute *ivbep_uncore_pcu_formats_attr[] = {
-	&format_attr_event_ext.attr,
+	&format_attr_event.attr,
 	&format_attr_occ_sel.attr,
 	&format_attr_edge.attr,
 	&format_attr_thresh5.attr,

diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 68155ca..ad4dc7f 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h

@@ -272,7 +272,7 @@
  * events to select for counter rescheduling.
  *
  * Care must be taken as the rescheduling algorithm is O(n!) which
- * will increase scheduling cycles for an over-commited system
+ * will increase scheduling cycles for an over-committed system
  * dramatically.  The number of such EVENT_CONSTRAINT_OVERLAP() macros
  * and its counter masks must be kept at a minimum.
  */
@@ -608,6 +608,11 @@
 	atomic_t	lbr_exclusive[x86_lbr_exclusive_max];
 
 	/*
+	 * AMD bits
+	 */
+	unsigned int	amd_nb_constraints : 1;
+
+	/*
 	 * Extra registers for events
 	 */
 	struct extra_reg *extra_regs;
@@ -795,6 +800,9 @@
 
 struct attribute **merge_attr(struct attribute **a, struct attribute **b);
 
+ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
+			  char *page);
+
 #ifdef CONFIG_CPU_SUP_AMD
 
 int amd_pmu_init(void);
@@ -925,9 +933,6 @@
 
 int knc_pmu_init(void);
 
-ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
-			  char *page);
-
 static inline int is_ht_workaround_enabled(void)
 {
 	return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 0899cfc..98f25bb 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h

@@ -643,8 +643,8 @@
 
 static inline void entering_ack_irq(void)
 {
-	ack_APIC_irq();
 	entering_irq();
+	ack_APIC_irq();
 }
 
 static inline void ipi_entering_ack_irq(void)

diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index acdee09..ebb102e 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h

@@ -316,9 +316,10 @@
 	return false;
 }
 
-static inline bool is_compat_task(void)
+static inline bool in_compat_syscall(void)
 {
 	return is_ia32_task() || is_x32_task();
 }
+#define in_compat_syscall in_compat_syscall	/* override the generic impl */
 
 #endif /* _ASM_X86_COMPAT_H */

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 3d1a843..8f9afef 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h

@@ -94,7 +94,7 @@
 #define X86_FEATURE_REP_GOOD	( 3*32+16) /* rep microcode works well */
 #define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
 #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
-/* free, was #define X86_FEATURE_11AP	( 3*32+19) * "" Bad local APIC aka 11AP */
+#define X86_FEATURE_ACC_POWER	( 3*32+19) /* AMD Accumulated Power Mechanism */
 #define X86_FEATURE_NOPL	( 3*32+20) /* The NOPL (0F 1F) instructions */
 #define X86_FEATURE_ALWAYS	( 3*32+21) /* "" Always-present feature */
 #define X86_FEATURE_XTOPOLOGY	( 3*32+22) /* cpu topology enum extensions */
@@ -245,6 +245,8 @@
 
 /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
 #define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
+#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
+#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
 #define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */

diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 2493885..a4820d4 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h

@@ -52,13 +52,13 @@
  * this screws up the trace output when tracing a ia32 task.
  * Instead of reporting bogus syscalls, just do not trace them.
  *
- * If the user realy wants these, then they should use the
+ * If the user really wants these, then they should use the
  * raw syscall tracepoints with filtering.
  */
 #define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS 1
 static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
 {
-	if (is_compat_task())
+	if (in_compat_syscall())
 		return true;
 	return false;
 }

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 1815b73..b90e105 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h

@@ -141,6 +141,7 @@
 struct irq_cfg {
 	unsigned int		dest_apicid;
 	u8			vector;
+	u8			old_vector;
 };
 
 extern struct irq_cfg *irq_cfg(unsigned int irq);
@@ -168,20 +169,6 @@
 
 extern void elcr_set_level_irq(unsigned int irq);
 
-/* SMP */
-extern __visible void smp_apic_timer_interrupt(struct pt_regs *);
-extern __visible void smp_spurious_interrupt(struct pt_regs *);
-extern __visible void smp_x86_platform_ipi(struct pt_regs *);
-extern __visible void smp_error_interrupt(struct pt_regs *);
-#ifdef CONFIG_X86_IO_APIC
-extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
-#endif
-#ifdef CONFIG_SMP
-extern __visible void smp_reschedule_interrupt(struct pt_regs *);
-extern __visible void smp_call_function_interrupt(struct pt_regs *);
-extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
-#endif
-
 extern char irq_entries_start[];
 #ifdef CONFIG_TRACING
 #define trace_irq_entries_start irq_entries_start

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 01c8b50..b7e3944 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h

@@ -43,7 +43,7 @@
 
 #define KVM_PIO_PAGE_OFFSET 1
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
-#define KVM_HALT_POLL_NS_DEFAULT 500000
+#define KVM_HALT_POLL_NS_DEFAULT 400000
 
 #define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
 
@@ -84,7 +84,8 @@
 			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
 			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
 			  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
-			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP))
+			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP \
+			  | X86_CR4_PKE))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
 
@@ -187,12 +188,14 @@
 #define PFERR_USER_BIT 2
 #define PFERR_RSVD_BIT 3
 #define PFERR_FETCH_BIT 4
+#define PFERR_PK_BIT 5
 
 #define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
 #define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
 #define PFERR_USER_MASK (1U << PFERR_USER_BIT)
 #define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
 #define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
+#define PFERR_PK_MASK (1U << PFERR_PK_BIT)
 
 /* apic attention bits */
 #define KVM_APIC_CHECK_VAPIC	0
@@ -335,6 +338,14 @@
 	 */
 	u8 permissions[16];
 
+	/*
+	* The pkru_mask indicates if protection key checks are needed.  It
+	* consists of 16 domains indexed by page fault error code bits [4:1],
+	* with PFEC.RSVD replaced by ACC_USER_MASK from the page tables.
+	* Each domain has 2 bits which are ANDed with AD and WD from PKRU.
+	*/
+	u32 pkru_mask;
+
 	u64 *pae_root;
 	u64 *lm_root;
 
@@ -874,6 +885,7 @@
 	void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
 	unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
 	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
+	u32 (*get_pkru)(struct kvm_vcpu *vcpu);
 	void (*fpu_activate)(struct kvm_vcpu *vcpu);
 	void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
 

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 2da46ac..426e946 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h

@@ -190,6 +190,7 @@
 #define MSR_PP1_ENERGY_STATUS		0x00000641
 #define MSR_PP1_POLICY			0x00000642
 
+/* Config TDP MSRs */
 #define MSR_CONFIG_TDP_NOMINAL		0x00000648
 #define MSR_CONFIG_TDP_LEVEL_1		0x00000649
 #define MSR_CONFIG_TDP_LEVEL_2		0x0000064A
@@ -210,13 +211,6 @@
 #define MSR_GFX_PERF_LIMIT_REASONS	0x000006B0
 #define MSR_RING_PERF_LIMIT_REASONS	0x000006B1
 
-/* Config TDP MSRs */
-#define MSR_CONFIG_TDP_NOMINAL		0x00000648
-#define MSR_CONFIG_TDP_LEVEL1		0x00000649
-#define MSR_CONFIG_TDP_LEVEL2		0x0000064A
-#define MSR_CONFIG_TDP_CONTROL		0x0000064B
-#define MSR_TURBO_ACTIVATION_RATIO	0x0000064C
-
 /* Hardware P state interface */
 #define MSR_PPERF			0x0000064e
 #define MSR_PERF_LIMIT_REASONS		0x0000064f

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 93fb7c1..7a79ee2 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h

@@ -42,14 +42,6 @@
 	struct saved_msr *array;
 };
 
-static inline unsigned long long native_read_tscp(unsigned int *aux)
-{
-	unsigned long low, high;
-	asm volatile(".byte 0x0f,0x01,0xf9"
-		     : "=a" (low), "=d" (high), "=c" (*aux));
-	return low | ((u64)high << 32);
-}
-
 /*
  * both i386 and x86_64 returns 64-bit value in edx:eax, but gcc's "A"
  * constraint has different meanings. For i386, "A" means exactly

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1ff49ec..97f3242 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h

@@ -107,6 +107,12 @@
 	return 0;
 }
 
+static inline void write_pkru(u32 pkru)
+{
+	if (boot_cpu_has(X86_FEATURE_OSPKE))
+		__write_pkru(pkru);
+}
+
 static inline int pte_young(pte_t pte)
 {
 	return pte_flags(pte) & _PAGE_ACCESSED;

diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index bf8b35d..fbc5e92 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h

@@ -47,6 +47,15 @@
 		BUG();
 }
 
+static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src,
+		size_t n)
+{
+	if (static_cpu_has(X86_FEATURE_MCE_RECOVERY))
+		return memcpy_mcsafe(dst, (void __force *) src, n);
+	memcpy(dst, (void __force *) src, n);
+	return 0;
+}
+
 /**
  * arch_wmb_pmem - synchronize writes to persistent memory
  *

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 983738a..9264476 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h

@@ -132,8 +132,6 @@
 	u16			logical_proc_id;
 	/* Core id: */
 	u16			cpu_core_id;
-	/* Compute unit id */
-	u8			compute_unit_id;
 	/* Index into per_cpu list: */
 	u16			cpu_index;
 	u32			microcode;

diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index cad82c9..ceec86eb 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h

@@ -25,7 +25,7 @@
  * This should be totally fair - if anything is waiting, a process that wants a
  * lock will go to the back of the queue. When the currently active lock is
  * released, if there's a writer at the front of the queue, then that and only
- * that will be woken up; if there's a bunch of consequtive readers at the
+ * that will be woken up; if there's a bunch of consecutive readers at the
  * front, then they'll all be woken up, but no other readers will be.
  */
 

diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 20a3de5..66b0573 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h

@@ -155,6 +155,7 @@
 	wbinvd();
 	return 0;
 }
+#define smp_num_siblings	1
 #endif /* CONFIG_SMP */
 
 extern unsigned disabled_cpus;

diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index aee6e76..d96d043 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h

@@ -113,11 +113,27 @@
 		     : "c" (ecx));
 	return pkru;
 }
+
+static inline void __write_pkru(u32 pkru)
+{
+	u32 ecx = 0, edx = 0;
+
+	/*
+	 * "wrpkru" instruction.  Loads contents in EAX to PKRU,
+	 * requires that ecx = edx = 0.
+	 */
+	asm volatile(".byte 0x0f,0x01,0xef\n\t"
+		     : : "a" (pkru), "c"(ecx), "d"(edx));
+}
 #else
 static inline u32 __read_pkru(void)
 {
 	return 0;
 }
+
+static inline void __write_pkru(u32 pkru)
+{
+}
 #endif
 
 static inline void native_wbinvd(void)

diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index ca6ba36..90dbbd9 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h

@@ -87,9 +87,9 @@
  *
  * Low level memory copy function that catches machine checks
  *
- * Return true for success, false for fail
+ * Return 0 for success, -EFAULT for fail
  */
-bool memcpy_mcsafe(void *dst, const void *src, size_t cnt);
+int memcpy_mcsafe(void *dst, const void *src, size_t cnt);
 
 #endif /* __KERNEL__ */
 

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 8286669..ffae84d 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h

@@ -276,11 +276,9 @@
  */
 #define force_iret() set_thread_flag(TIF_NOTIFY_RESUME)
 
-#endif	/* !__ASSEMBLY__ */
-
-#ifndef __ASSEMBLY__
 extern void arch_task_cache_init(void);
 extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 extern void arch_release_task_struct(struct task_struct *tsk);
-#endif
+#endif	/* !__ASSEMBLY__ */
+
 #endif /* _ASM_X86_THREAD_INFO_H */

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index c24b422..1fde8d5 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h

@@ -319,12 +319,6 @@
 
 #endif	/* SMP */
 
-/* Not inlined due to inc_irq_stat not being defined yet */
-#define flush_tlb_local() {		\
-	inc_irq_stat(irq_tlb_count);	\
-	local_flush_tlb();		\
-}
-
 #ifndef CONFIG_PARAVIRT
 #define flush_tlb_others(mask, mm, start, end)	\
 	native_flush_tlb_others(mask, mm, start, end)

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 88bff6d..a969ae6 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h

@@ -105,9 +105,8 @@
 struct exception_table_entry {
 	int insn, fixup, handler;
 };
-/* This is not the generic standard exception_table_entry format */
-#define ARCH_HAS_SORT_EXTABLE
-#define ARCH_HAS_SEARCH_EXTABLE
+
+#define ARCH_HAS_RELATIVE_EXTABLE
 
 extern int fixup_exception(struct pt_regs *regs, int trapnr);
 extern bool ex_has_fault_handler(unsigned long ip);

diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 8b2d4be..39171b3 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h

@@ -62,4 +62,6 @@
 void xen_arch_unregister_cpu(int num);
 #endif
 
+extern void xen_set_iopl_mask(unsigned mask);
+
 #endif /* _ASM_X86_XEN_HYPERVISOR_H */

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d5fb087..616ebd2 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile

@@ -19,12 +19,19 @@
 KASAN_SANITIZE_head$(BITS).o				:= n
 KASAN_SANITIZE_dumpstack.o				:= n
 KASAN_SANITIZE_dumpstack_$(BITS).o			:= n
+KASAN_SANITIZE_stacktrace.o := n
 
 OBJECT_FILES_NON_STANDARD_head_$(BITS).o		:= y
 OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o	:= y
 OBJECT_FILES_NON_STANDARD_mcount_$(BITS).o		:= y
 OBJECT_FILES_NON_STANDARD_test_nx.o			:= y
 
+# If instrumentation of this dir is enabled, boot hangs during first second.
+# Probably could be more selective here, but note that files related to irqs,
+# boot, dumpstack/stacktrace, etc are either non-interesting or can lead to
+# non-deterministic coverage.
+KCOV_INSTRUMENT		:= n
+
 CFLAGS_irq.o := -I$(src)/../include/asm/trace
 
 obj-y			:= process_$(BITS).o signal.o

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index e759076..8c2f1ef 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c

@@ -956,7 +956,7 @@
 
 	/*
 	 * Note that the LAPIC address is obtained from the MADT (32-bit value)
-	 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
+	 * and (optionally) overridden by a LAPIC_ADDR_OVR entry (64-bit value).
 	 */
 
 	count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
@@ -984,7 +984,7 @@
 
 	/*
 	 * Note that the LAPIC address is obtained from the MADT (32-bit value)
-	 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
+	 * and (optionally) overridden by a LAPIC_ADDR_OVR entry (64-bit value).
 	 */
 
 	count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,

diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 29fa475..a147e67 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c

@@ -170,15 +170,13 @@
 {
 	struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
 	unsigned int mask;
-	int cuid;
 
 	if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 		return 0;
 
 	pci_read_config_dword(link, 0x1d4, &mask);
 
-	cuid = cpu_data(cpu).compute_unit_id;
-	return (mask >> (4 * cuid)) & 0xf;
+	return (mask >> (4 * cpu_data(cpu).cpu_core_id)) & 0xf;
 }
 
 int amd_set_subcaches(int cpu, unsigned long mask)
@@ -204,7 +202,7 @@
 		pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000);
 	}
 
-	cuid = cpu_data(cpu).compute_unit_id;
+	cuid = cpu_data(cpu).cpu_core_id;
 	mask <<= 4 * cuid;
 	mask |= (0xf ^ (1 << cuid)) << 26;
 

diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 222a570..cefacba 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c

@@ -221,7 +221,7 @@
 	unsigned long cpu = (unsigned long)hcpu;
 	struct apbt_dev *adev = &per_cpu(cpu_apbt_dev, cpu);
 
-	switch (action & 0xf) {
+	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_DEAD:
 		dw_apb_clockevent_pause(adev->timer);
 		if (system_state == SYSTEM_RUNNING) {

diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 8bb12ddc..8e63ebd 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile

@@ -2,6 +2,10 @@
 # Makefile for local APIC drivers and for the IO-APIC code
 #
 
+# Leads to non-deterministic coverage that is not a function of syscall inputs.
+# In particualr, smp_apic_timer_interrupt() is called in random places.
+KCOV_INSTRUMENT		:= n
+
 obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o ipi.o vector.o
 obj-y				+= hw_nmi.o
 

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 531b961..d356987 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c

@@ -1611,7 +1611,7 @@
 	legacy_pic->mask_all();
 	mask_ioapic_entries();
 
-	/* If irq_remapping_prepare() succeded, try to enable it */
+	/* If irq_remapping_prepare() succeeded, try to enable it */
 	if (ir_stat >= 0)
 		ir_stat = try_to_enable_IR();
 	/* ir_stat contains the remap mode or an error code */

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 3b670df..ad59d70 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c

@@ -213,6 +213,7 @@
 	 */
 	cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
 	d->move_in_progress = !cpumask_empty(d->old_domain);
+	d->cfg.old_vector = d->move_in_progress ? d->cfg.vector : 0;
 	d->cfg.vector = vector;
 	cpumask_copy(d->domain, vector_cpumask);
 success:
@@ -655,46 +656,97 @@
 }
 
 /*
- * Called with @desc->lock held and interrupts disabled.
+ * Called from fixup_irqs() with @desc->lock held and interrupts disabled.
  */
 void irq_force_complete_move(struct irq_desc *desc)
 {
 	struct irq_data *irqdata = irq_desc_get_irq_data(desc);
 	struct apic_chip_data *data = apic_chip_data(irqdata);
 	struct irq_cfg *cfg = data ? &data->cfg : NULL;
+	unsigned int cpu;
 
 	if (!cfg)
 		return;
 
-	__irq_complete_move(cfg, cfg->vector);
-
 	/*
 	 * This is tricky. If the cleanup of @data->old_domain has not been
 	 * done yet, then the following setaffinity call will fail with
 	 * -EBUSY. This can leave the interrupt in a stale state.
 	 *
-	 * The cleanup cannot make progress because we hold @desc->lock. So in
-	 * case @data->old_domain is not yet cleaned up, we need to drop the
-	 * lock and acquire it again. @desc cannot go away, because the
-	 * hotplug code holds the sparse irq lock.
+	 * All CPUs are stuck in stop machine with interrupts disabled so
+	 * calling __irq_complete_move() would be completely pointless.
 	 */
 	raw_spin_lock(&vector_lock);
-	/* Clean out all offline cpus (including ourself) first. */
+	/*
+	 * Clean out all offline cpus (including the outgoing one) from the
+	 * old_domain mask.
+	 */
 	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
-	while (!cpumask_empty(data->old_domain)) {
+
+	/*
+	 * If move_in_progress is cleared and the old_domain mask is empty,
+	 * then there is nothing to cleanup. fixup_irqs() will take care of
+	 * the stale vectors on the outgoing cpu.
+	 */
+	if (!data->move_in_progress && cpumask_empty(data->old_domain)) {
 		raw_spin_unlock(&vector_lock);
-		raw_spin_unlock(&desc->lock);
-		cpu_relax();
-		raw_spin_lock(&desc->lock);
-		/*
-		 * Reevaluate apic_chip_data. It might have been cleared after
-		 * we dropped @desc->lock.
-		 */
-		data = apic_chip_data(irqdata);
-		if (!data)
-			return;
-		raw_spin_lock(&vector_lock);
+		return;
 	}
+
+	/*
+	 * 1) The interrupt is in move_in_progress state. That means that we
+	 *    have not seen an interrupt since the io_apic was reprogrammed to
+	 *    the new vector.
+	 *
+	 * 2) The interrupt has fired on the new vector, but the cleanup IPIs
+	 *    have not been processed yet.
+	 */
+	if (data->move_in_progress) {
+		/*
+		 * In theory there is a race:
+		 *
+		 * set_ioapic(new_vector) <-- Interrupt is raised before update
+		 *			      is effective, i.e. it's raised on
+		 *			      the old vector.
+		 *
+		 * So if the target cpu cannot handle that interrupt before
+		 * the old vector is cleaned up, we get a spurious interrupt
+		 * and in the worst case the ioapic irq line becomes stale.
+		 *
+		 * But in case of cpu hotplug this should be a non issue
+		 * because if the affinity update happens right before all
+		 * cpus rendevouz in stop machine, there is no way that the
+		 * interrupt can be blocked on the target cpu because all cpus
+		 * loops first with interrupts enabled in stop machine, so the
+		 * old vector is not yet cleaned up when the interrupt fires.
+		 *
+		 * So the only way to run into this issue is if the delivery
+		 * of the interrupt on the apic/system bus would be delayed
+		 * beyond the point where the target cpu disables interrupts
+		 * in stop machine. I doubt that it can happen, but at least
+		 * there is a theroretical chance. Virtualization might be
+		 * able to expose this, but AFAICT the IOAPIC emulation is not
+		 * as stupid as the real hardware.
+		 *
+		 * Anyway, there is nothing we can do about that at this point
+		 * w/o refactoring the whole fixup_irq() business completely.
+		 * We print at least the irq number and the old vector number,
+		 * so we have the necessary information when a problem in that
+		 * area arises.
+		 */
+		pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
+			irqdata->irq, cfg->old_vector);
+	}
+	/*
+	 * If old_domain is not empty, then other cpus still have the irq
+	 * descriptor set in their vector array. Clean it up.
+	 */
+	for_each_cpu(cpu, data->old_domain)
+		per_cpu(vector_irq, cpu)[cfg->old_vector] = VECTOR_UNUSED;
+
+	/* Cleanup the left overs of the (half finished) move */
+	cpumask_clear(data->old_domain);
+	data->move_in_progress = 0;
 	raw_spin_unlock(&vector_lock);
 }
 #endif

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 624db005..8f4942e 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c

@@ -792,7 +792,8 @@
 {
 	long cpu = (long)hcpu;
 
-	switch (action) {
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_DOWN_FAILED:
 	case CPU_ONLINE:
 		uv_heartbeat_enable(cpu);
 		break;
@@ -860,7 +861,7 @@
  */
 void uv_cpu_init(void)
 {
-	/* CPU 0 initilization will be done via uv_system_init. */
+	/* CPU 0 initialization will be done via uv_system_init. */
 	if (!uv_blade_info)
 		return;
 

diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 052c9c3..9307f18 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c

@@ -1088,7 +1088,7 @@
  *	@device: identity of device
  *	@enable: on/off
  *
- *	Activate or deactive power management on either a specific device
+ *	Activate or deactivate power management on either a specific device
  *	or the entire system (%APM_DEVICE_ALL).
  */
 

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 0d373d7..4a8697f 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile

@@ -8,6 +8,10 @@
 CFLAGS_REMOVE_perf_event.o = -pg
 endif
 
+# If these files are instrumented, boot hangs during the first second.
+KCOV_INSTRUMENT_common.o := n
+KCOV_INSTRUMENT_perf_event.o := n
+
 # Make sure load_percpu_segment has no stackprotector
 nostackp := $(call cc-option, -fno-stack-protector)
 CFLAGS_common.o		:= $(nostackp)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 5026a13..7b76eb6 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c

@@ -85,7 +85,7 @@
 #ifdef CONFIG_X86_32
 /*
  * General Systems BIOSen alias the cpu frequency registers
- * of the Elan at 0x000df000. Unfortuantly, one of the Linux
+ * of the Elan at 0x000df000. Unfortunately, one of the Linux
  * drivers subsequently pokes it, and changes the CPU speed.
  * Workaround : Remove the unneeded alias.
  */
@@ -300,7 +300,6 @@
 #ifdef CONFIG_SMP
 static void amd_get_topology(struct cpuinfo_x86 *c)
 {
-	u32 cores_per_cu = 1;
 	u8 node_id;
 	int cpu = smp_processor_id();
 
@@ -309,37 +308,32 @@
 		u32 eax, ebx, ecx, edx;
 
 		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
-		nodes_per_socket = ((ecx >> 8) & 7) + 1;
 		node_id = ecx & 7;
 
 		/* get compute unit information */
 		smp_num_siblings = ((ebx >> 8) & 3) + 1;
-		c->compute_unit_id = ebx & 0xff;
-		cores_per_cu += ((ebx >> 8) & 3);
+		c->x86_max_cores /= smp_num_siblings;
+		c->cpu_core_id = ebx & 0xff;
 	} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
 		u64 value;
 
 		rdmsrl(MSR_FAM10H_NODE_ID, value);
-		nodes_per_socket = ((value >> 3) & 7) + 1;
 		node_id = value & 7;
 	} else
 		return;
 
 	/* fixup multi-node processor information */
 	if (nodes_per_socket > 1) {
-		u32 cores_per_node;
 		u32 cus_per_node;
 
 		set_cpu_cap(c, X86_FEATURE_AMD_DCM);
-		cores_per_node = c->x86_max_cores / nodes_per_socket;
-		cus_per_node = cores_per_node / cores_per_cu;
+		cus_per_node = c->x86_max_cores / nodes_per_socket;
 
 		/* store NodeID, use llc_shared_map to store sibling info */
 		per_cpu(cpu_llc_id, cpu) = node_id;
 
 		/* core id has to be in the [0 .. cores_per_node - 1] range */
-		c->cpu_core_id %= cores_per_node;
-		c->compute_unit_id %= cus_per_node;
+		c->cpu_core_id %= cus_per_node;
 	}
 }
 #endif
@@ -522,6 +516,18 @@
 
 	if (cpu_has(c, X86_FEATURE_MWAITX))
 		use_mwaitx_delay();
+
+	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+		u32 ecx;
+
+		ecx = cpuid_ecx(0x8000001e);
+		nodes_per_socket = ((ecx >> 8) & 7) + 1;
+	} else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
+		u64 value;
+
+		rdmsrl(MSR_FAM10H_NODE_ID, value);
+		nodes_per_socket = ((value >> 3) & 7) + 1;
+	}
 }
 
 static void early_init_amd(struct cpuinfo_x86 *c)
@@ -539,6 +545,10 @@
 			set_sched_clock_stable();
 	}
 
+	/* Bit 12 of 8000_0007 edx is accumulated power mechanism. */
+	if (c->x86_power & BIT(12))
+		set_cpu_cap(c, X86_FEATURE_ACC_POWER);
+
 #ifdef CONFIG_X86_64
 	set_cpu_cap(c, X86_FEATURE_SYSCALL32);
 #else

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 06ad723..8394b3d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c

@@ -692,7 +692,9 @@
 			cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx);
 			c->x86_capability[CPUID_F_1_EDX] = edx;
 
-			if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) {
+			if ((cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) ||
+			      ((cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL)) ||
+			       (cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)))) {
 				c->x86_cache_max_rmid = ecx;
 				c->x86_cache_occ_scale = ebx;
 			}
@@ -968,7 +970,7 @@
 	if (this_cpu->c_identify)
 		this_cpu->c_identify(c);
 
-	/* Clear/Set all flags overriden by options, after probe */
+	/* Clear/Set all flags overridden by options, after probe */
 	for (i = 0; i < NCAPINTS; i++) {
 		c->x86_capability[i] &= ~cpu_caps_cleared[i];
 		c->x86_capability[i] |= cpu_caps_set[i];
@@ -1028,7 +1030,7 @@
 	setup_pku(c);
 
 	/*
-	 * Clear/Set all flags overriden by options, need do it
+	 * Clear/Set all flags overridden by options, need do it
 	 * before following smp all cpus cap AND.
 	 */
 	for (i = 0; i < NCAPINTS; i++) {

diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 0b445c2..ac780ca 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c

@@ -384,6 +384,9 @@
 {
 	__u64 msr_val;
 
+	if (static_cpu_has(X86_FEATURE_HWP))
+		wrmsrl_safe(MSR_HWP_STATUS, 0);
+
 	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
 
 	/* Check for violation of core thermal thresholds*/

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index fcbcb2f..19f5736 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c

@@ -42,7 +42,7 @@
  * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
  * Opteron Processors" (26094 Rev. 3.30 February 2006), section
  * "13.2.1.2 SYSCFG Register": "The MtrrFixDramModEn bit should be set
- * to 1 during BIOS initalization of the fixed MTRRs, then cleared to
+ * to 1 during BIOS initialization of the fixed MTRRs, then cleared to
  * 0 for operation."
  */
 static inline void k8_check_syscfg_dram_mod_en(void)

diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c
index 31f0f33..1dd8294 100644
--- a/arch/x86/kernel/cpu/powerflags.c
+++ b/arch/x86/kernel/cpu/powerflags.c

@@ -18,4 +18,6 @@
 	"",	/* tsc invariant mapped to constant_tsc */
 	"cpb",  /* core performance boost */
 	"eff_freq_ro", /* Readonly aperf/mperf */
+	"proc_feedback", /* processor feedback interface */
+	"acc_power", /* accumulated power mechanism */
 };

diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 21bf924..8a12199 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c

@@ -287,7 +287,7 @@
 	}
 
 	/*
-	 * Lastly, initalize the hardware
+	 * Lastly, initialize the hardware
 	 */
 	if (*s) {
 		if (strcmp(s, "nocfg") == 0)

diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 0bc3490..8bd1c00 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c

@@ -8,7 +8,7 @@
 /*
  * The xstateregs_active() routine is the same as the regset_fpregs_active() routine,
  * as the "regset->n" for the xstate regset will be updated based on the feature
- * capabilites supported by the xsave.
+ * capabilities supported by the xsave.
  */
 int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
 {

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 702547c..d036cfb 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c

@@ -1,5 +1,5 @@
 /*
- * Code for replacing ftrace calls with jumps.
+ * Dynamic function tracing support.
  *
  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
  *

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index be0ebbb..a1f0e4a 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c

@@ -717,7 +717,7 @@
 	struct hpet_work_struct work;
 	struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
 
-	switch (action & 0xf) {
+	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_ONLINE:
 		INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work);
 		init_completion(&work.complete);

diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 37dae79..589b319 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c

@@ -96,9 +96,14 @@
 SYSCALL_DEFINE1(iopl, unsigned int, level)
 {
 	struct pt_regs *regs = current_pt_regs();
-	unsigned int old = (regs->flags >> 12) & 3;
 	struct thread_struct *t = &current->thread;
 
+	/*
+	 * Careful: the IOPL bits in regs->flags are undefined under Xen PV
+	 * and changing them has no effect.
+	 */
+	unsigned int old = t->iopl >> X86_EFLAGS_IOPL_BIT;
+
 	if (level > 3)
 		return -EINVAL;
 	/* Trying to gain more privileges? */
@@ -106,8 +111,9 @@
 		if (!capable(CAP_SYS_RAWIO))
 			return -EPERM;
 	}
-	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12);
-	t->iopl = level << 12;
+	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
+		(level << X86_EFLAGS_IOPL_BIT);
+	t->iopl = level << X86_EFLAGS_IOPL_BIT;
 	set_iopl_mask(t->iopl);
 
 	return 0;

diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 0f8a6bb..2af478e 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c

@@ -271,7 +271,7 @@
 	int ret = -ENOEXEC;
 	struct setup_header *header;
 
-	/* kernel should be atleast two sectors long */
+	/* kernel should be at least two sectors long */
 	if (len < 2 * 512) {
 		pr_err("File is too short to be a bzImage\n");
 		return ret;

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index ed15cd48..2da6ee9 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c

@@ -609,9 +609,9 @@
 };
 
 /**
- *	kgdb_arch_init - Perform any architecture specific initalization.
+ *	kgdb_arch_init - Perform any architecture specific initialization.
  *
- *	This function will handle the initalization of any architecture
+ *	This function will handle the initialization of any architecture
  *	specific callbacks.
  */
 int kgdb_arch_init(void)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 47190bd..8079508 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c

@@ -36,6 +36,7 @@
 #include <linux/kprobes.h>
 #include <linux/debugfs.h>
 #include <linux/nmi.h>
+#include <linux/swait.h>
 #include <asm/timer.h>
 #include <asm/cpu.h>
 #include <asm/traps.h>
@@ -91,14 +92,14 @@
 
 struct kvm_task_sleep_node {
 	struct hlist_node link;
-	wait_queue_head_t wq;
+	struct swait_queue_head wq;
 	u32 token;
 	int cpu;
 	bool halted;
 };
 
 static struct kvm_task_sleep_head {
-	spinlock_t lock;
+	raw_spinlock_t lock;
 	struct hlist_head list;
 } async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
 
@@ -122,17 +123,17 @@
 	u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
 	struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
 	struct kvm_task_sleep_node n, *e;
-	DEFINE_WAIT(wait);
+	DECLARE_SWAITQUEUE(wait);
 
 	rcu_irq_enter();
 
-	spin_lock(&b->lock);
+	raw_spin_lock(&b->lock);
 	e = _find_apf_task(b, token);
 	if (e) {
 		/* dummy entry exist -> wake up was delivered ahead of PF */
 		hlist_del(&e->link);
 		kfree(e);
-		spin_unlock(&b->lock);
+		raw_spin_unlock(&b->lock);
 
 		rcu_irq_exit();
 		return;
@@ -141,13 +142,13 @@
 	n.token = token;
 	n.cpu = smp_processor_id();
 	n.halted = is_idle_task(current) || preempt_count() > 1;
-	init_waitqueue_head(&n.wq);
+	init_swait_queue_head(&n.wq);
 	hlist_add_head(&n.link, &b->list);
-	spin_unlock(&b->lock);
+	raw_spin_unlock(&b->lock);
 
 	for (;;) {
 		if (!n.halted)
-			prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
+			prepare_to_swait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
 		if (hlist_unhashed(&n.link))
 			break;
 
@@ -166,7 +167,7 @@
 		}
 	}
 	if (!n.halted)
-		finish_wait(&n.wq, &wait);
+		finish_swait(&n.wq, &wait);
 
 	rcu_irq_exit();
 	return;
@@ -178,8 +179,8 @@
 	hlist_del_init(&n->link);
 	if (n->halted)
 		smp_send_reschedule(n->cpu);
-	else if (waitqueue_active(&n->wq))
-		wake_up(&n->wq);
+	else if (swait_active(&n->wq))
+		swake_up(&n->wq);
 }
 
 static void apf_task_wake_all(void)
@@ -189,14 +190,14 @@
 	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
 		struct hlist_node *p, *next;
 		struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
-		spin_lock(&b->lock);
+		raw_spin_lock(&b->lock);
 		hlist_for_each_safe(p, next, &b->list) {
 			struct kvm_task_sleep_node *n =
 				hlist_entry(p, typeof(*n), link);
 			if (n->cpu == smp_processor_id())
 				apf_task_wake_one(n);
 		}
-		spin_unlock(&b->lock);
+		raw_spin_unlock(&b->lock);
 	}
 }
 
@@ -212,7 +213,7 @@
 	}
 
 again:
-	spin_lock(&b->lock);
+	raw_spin_lock(&b->lock);
 	n = _find_apf_task(b, token);
 	if (!n) {
 		/*
@@ -225,17 +226,17 @@
 			 * Allocation failed! Busy wait while other cpu
 			 * handles async PF.
 			 */
-			spin_unlock(&b->lock);
+			raw_spin_unlock(&b->lock);
 			cpu_relax();
 			goto again;
 		}
 		n->token = token;
 		n->cpu = smp_processor_id();
-		init_waitqueue_head(&n->wq);
+		init_swait_queue_head(&n->wq);
 		hlist_add_head(&n->link, &b->list);
 	} else
 		apf_task_wake_one(n);
-	spin_unlock(&b->lock);
+	raw_spin_unlock(&b->lock);
 	return;
 }
 EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
@@ -486,7 +487,7 @@
 	paravirt_ops_setup();
 	register_reboot_notifier(&kvm_pv_reboot_nb);
 	for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
-		spin_lock_init(&async_pf_sleepers[i].lock);
+		raw_spin_lock_init(&async_pf_sleepers[i].lock);
 	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
 		x86_init.irqs.trap_init = kvm_apf_trap_init;
 

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 72cef58..1d39bfb 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c

@@ -226,7 +226,7 @@
  * registered memory location. If the guest happens to shutdown, this memory
  * won't be valid. In cases like kexec, in which you install a new kernel, this
  * means a random memory location will be kept being written. So before any
- * kind of shutdown from our side, we unregister the clock by writting anything
+ * kind of shutdown from our side, we unregister the clock by writing anything
  * that does not have the 'enable' bit set in the msr
  */
 #ifdef CONFIG_KEXEC_CORE

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 776229e..6cbab31 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c

@@ -48,6 +48,7 @@
 #include <asm/syscalls.h>
 #include <asm/debugreg.h>
 #include <asm/switch_to.h>
+#include <asm/xen/hypervisor.h>
 
 asmlinkage extern void ret_from_fork(void);
 
@@ -413,6 +414,17 @@
 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
 		__switch_to_xtra(prev_p, next_p, tss);
 
+#ifdef CONFIG_XEN
+	/*
+	 * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
+	 * current_pt_regs()->flags may not match the current task's
+	 * intended IOPL.  We need to switch it manually.
+	 */
+	if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
+		     prev->iopl != next->iopl))
+		xen_set_iopl_mask(next->iopl);
+#endif
+
 	if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
 		/*
 		 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
@@ -478,7 +490,7 @@
 		if (current->mm)
 			current->mm->context.ia32_compat = TIF_X32;
 		current->personality &= ~READ_IMPLIES_EXEC;
-		/* is_compat_task() uses the presence of the x32
+		/* in_compat_syscall() uses the presence of the x32
 		   syscall bit flag to determine compat status */
 		current_thread_info()->status &= ~TS_COMPAT;
 	} else {

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2367ae0..319b08a 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c

@@ -146,31 +146,6 @@
 
 struct boot_params boot_params;
 
-/*
- * Machine setup..
- */
-static struct resource data_resource = {
-	.name	= "Kernel data",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
-};
-
-static struct resource code_resource = {
-	.name	= "Kernel code",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
-};
-
-static struct resource bss_resource = {
-	.name	= "Kernel bss",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
-};
-
-
 #ifdef CONFIG_X86_32
 /* cpu data as detected by the assembly code in head.S */
 struct cpuinfo_x86 new_cpu_data = {
@@ -949,13 +924,6 @@
 
 	mpx_mm_init(&init_mm);
 
-	code_resource.start = __pa_symbol(_text);
-	code_resource.end = __pa_symbol(_etext)-1;
-	data_resource.start = __pa_symbol(_etext);
-	data_resource.end = __pa_symbol(_edata)-1;
-	bss_resource.start = __pa_symbol(__bss_start);
-	bss_resource.end = __pa_symbol(__bss_stop)-1;
-
 #ifdef CONFIG_CMDLINE_BOOL
 #ifdef CONFIG_CMDLINE_OVERRIDE
 	strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
@@ -1019,11 +987,6 @@
 
 	x86_init.resources.probe_roms();
 
-	/* after parse_early_param, so could debug it */
-	insert_resource(&iomem_resource, &code_resource);
-	insert_resource(&iomem_resource, &data_resource);
-	insert_resource(&iomem_resource, &bss_resource);
-
 	e820_add_kernel_range();
 	trim_bios_range();
 #ifdef CONFIG_X86_32

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 643dbdc..a2065d3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c

@@ -274,11 +274,6 @@
 	if (test_and_set_bit(pkg, physical_package_map))
 		goto found;
 
-	if (pkg < __max_logical_packages) {
-		set_bit(pkg, logical_package_map);
-		physical_to_logical_pkg[pkg] = pkg;
-		goto found;
-	}
 	new = find_first_zero_bit(logical_package_map, __max_logical_packages);
 	if (new >= __max_logical_packages) {
 		physical_to_logical_pkg[pkg] = -1;
@@ -317,9 +312,27 @@
 	/*
 	 * Today neither Intel nor AMD support heterogenous systems. That
 	 * might change in the future....
+	 *
+	 * While ideally we'd want '* smp_num_siblings' in the below @ncpus
+	 * computation, this won't actually work since some Intel BIOSes
+	 * report inconsistent HT data when they disable HT.
+	 *
+	 * In particular, they reduce the APIC-IDs to only include the cores,
+	 * but leave the CPUID topology to say there are (2) siblings.
+	 * This means we don't know how many threads there will be until
+	 * after the APIC enumeration.
+	 *
+	 * By not including this we'll sometimes over-estimate the number of
+	 * logical packages by the amount of !present siblings, but this is
+	 * still better than MAX_LOCAL_APIC.
+	 *
+	 * We use total_cpus not nr_cpu_ids because nr_cpu_ids can be limited
+	 * on the command line leading to a similar issue as the HT disable
+	 * problem because the hyperthreads are usually enumerated after the
+	 * primary cores.
 	 */
-	ncpus = boot_cpu_data.x86_max_cores * smp_num_siblings;
-	__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
+	ncpus = boot_cpu_data.x86_max_cores;
+	__max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
 
 	/*
 	 * Possibly larger than what we need as the number of apic ids per
@@ -409,7 +422,7 @@
 
 		if (c->phys_proc_id == o->phys_proc_id &&
 		    per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) &&
-		    c->compute_unit_id == o->compute_unit_id)
+		    c->cpu_core_id == o->cpu_core_id)
 			return topology_sane(c, o, "smt");
 
 	} else if (c->phys_proc_id == o->phys_proc_id &&

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 5638044..c9c4c7c 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c

@@ -881,7 +881,7 @@
 	local_irq_save(flags);
 
 	/*
-	 * We're comming out of suspend, there's no concurrency yet; don't
+	 * We're coming out of suspend, there's no concurrency yet; don't
 	 * bother being nice about the RCU stuff, just write to both
 	 * data fields.
 	 */
@@ -1306,11 +1306,15 @@
 unsigned long calibrate_delay_is_known(void)
 {
 	int sibling, cpu = smp_processor_id();
+	struct cpumask *mask = topology_core_cpumask(cpu);
 
 	if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
 		return 0;
 
-	sibling = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+	if (!mask)
+		return 0;
+
+	sibling = cpumask_any_but(mask, cpu);
 	if (sibling < nr_cpu_ids)
 		return cpu_data(sibling).loops_per_jiffy;
 	return 0;

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index d239639..4c941f8 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S

@@ -101,6 +101,7 @@
 		KPROBES_TEXT
 		ENTRY_TEXT
 		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
 		*(.fixup)
 		*(.gnu.warning)
 		/* End of text section */

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0029644..8efb839 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c

@@ -88,6 +88,16 @@
 			apic->lapic_timer.timer_mode_mask = 1 << 17;
 	}
 
+	best = kvm_find_cpuid_entry(vcpu, 7, 0);
+	if (best) {
+		/* Update OSPKE bit */
+		if (boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7) {
+			best->ecx &= ~F(OSPKE);
+			if (kvm_read_cr4_bits(vcpu, X86_CR4_PKE))
+				best->ecx |= F(OSPKE);
+		}
+	}
+
 	best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
 	if (!best) {
 		vcpu->arch.guest_supported_xcr0 = 0;
@@ -305,7 +315,7 @@
 	unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
 
 	/* cpuid 1.edx */
-	const u32 kvm_supported_word0_x86_features =
+	const u32 kvm_cpuid_1_edx_x86_features =
 		F(FPU) | F(VME) | F(DE) | F(PSE) |
 		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
 		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
@@ -315,7 +325,7 @@
 		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
 		0 /* HTT, TM, Reserved, PBE */;
 	/* cpuid 0x80000001.edx */
-	const u32 kvm_supported_word1_x86_features =
+	const u32 kvm_cpuid_8000_0001_edx_x86_features =
 		F(FPU) | F(VME) | F(DE) | F(PSE) |
 		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
 		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
@@ -325,7 +335,7 @@
 		F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
 		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
 	/* cpuid 1.ecx */
-	const u32 kvm_supported_word4_x86_features =
+	const u32 kvm_cpuid_1_ecx_x86_features =
 		/* NOTE: MONITOR (and MWAIT) are emulated as NOP,
 		 * but *not* advertised to guests via CPUID ! */
 		F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
@@ -337,29 +347,32 @@
 		0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
 		F(F16C) | F(RDRAND);
 	/* cpuid 0x80000001.ecx */
-	const u32 kvm_supported_word6_x86_features =
+	const u32 kvm_cpuid_8000_0001_ecx_x86_features =
 		F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
 		F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
 		F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
 		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
 
 	/* cpuid 0xC0000001.edx */
-	const u32 kvm_supported_word5_x86_features =
+	const u32 kvm_cpuid_C000_0001_edx_x86_features =
 		F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
 		F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
 		F(PMM) | F(PMM_EN);
 
 	/* cpuid 7.0.ebx */
-	const u32 kvm_supported_word9_x86_features =
+	const u32 kvm_cpuid_7_0_ebx_x86_features =
 		F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
 		F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
 		F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
 		F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT);
 
 	/* cpuid 0xD.1.eax */
-	const u32 kvm_supported_word10_x86_features =
+	const u32 kvm_cpuid_D_1_eax_x86_features =
 		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
 
+	/* cpuid 7.0.ecx*/
+	const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/;
+
 	/* all calls to cpuid_count() should be made on the same cpu */
 	get_cpu();
 
@@ -376,10 +389,10 @@
 		entry->eax = min(entry->eax, (u32)0xd);
 		break;
 	case 1:
-		entry->edx &= kvm_supported_word0_x86_features;
-		cpuid_mask(&entry->edx, 0);
-		entry->ecx &= kvm_supported_word4_x86_features;
-		cpuid_mask(&entry->ecx, 4);
+		entry->edx &= kvm_cpuid_1_edx_x86_features;
+		cpuid_mask(&entry->edx, CPUID_1_EDX);
+		entry->ecx &= kvm_cpuid_1_ecx_x86_features;
+		cpuid_mask(&entry->ecx, CPUID_1_ECX);
 		/* we support x2apic emulation even if host does not support
 		 * it since we emulate x2apic in software */
 		entry->ecx |= F(X2APIC);
@@ -433,14 +446,20 @@
 		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 		/* Mask ebx against host capability word 9 */
 		if (index == 0) {
-			entry->ebx &= kvm_supported_word9_x86_features;
-			cpuid_mask(&entry->ebx, 9);
+			entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
+			cpuid_mask(&entry->ebx, CPUID_7_0_EBX);
 			// TSC_ADJUST is emulated
 			entry->ebx |= F(TSC_ADJUST);
-		} else
+			entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
+			cpuid_mask(&entry->ecx, CPUID_7_ECX);
+			/* PKU is not yet implemented for shadow paging. */
+			if (!tdp_enabled)
+				entry->ecx &= ~F(PKU);
+		} else {
 			entry->ebx = 0;
+			entry->ecx = 0;
+		}
 		entry->eax = 0;
-		entry->ecx = 0;
 		entry->edx = 0;
 		break;
 	}
@@ -514,7 +533,7 @@
 
 			do_cpuid_1_ent(&entry[i], function, idx);
 			if (idx == 1) {
-				entry[i].eax &= kvm_supported_word10_x86_features;
+				entry[i].eax &= kvm_cpuid_D_1_eax_x86_features;
 				entry[i].ebx = 0;
 				if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
 					entry[i].ebx =
@@ -564,10 +583,10 @@
 		entry->eax = min(entry->eax, 0x8000001a);
 		break;
 	case 0x80000001:
-		entry->edx &= kvm_supported_word1_x86_features;
-		cpuid_mask(&entry->edx, 1);
-		entry->ecx &= kvm_supported_word6_x86_features;
-		cpuid_mask(&entry->ecx, 6);
+		entry->edx &= kvm_cpuid_8000_0001_edx_x86_features;
+		cpuid_mask(&entry->edx, CPUID_8000_0001_EDX);
+		entry->ecx &= kvm_cpuid_8000_0001_ecx_x86_features;
+		cpuid_mask(&entry->ecx, CPUID_8000_0001_ECX);
 		break;
 	case 0x80000007: /* Advanced power management */
 		/* invariant TSC is CPUID.80000007H:EDX[8] */
@@ -600,8 +619,8 @@
 		entry->eax = min(entry->eax, 0xC0000004);
 		break;
 	case 0xC0000001:
-		entry->edx &= kvm_supported_word5_x86_features;
-		cpuid_mask(&entry->edx, 5);
+		entry->edx &= kvm_cpuid_C000_0001_edx_x86_features;
+		cpuid_mask(&entry->edx, CPUID_C000_0001_EDX);
 		break;
 	case 3: /* Processor serial number */
 	case 5: /* MONITOR/MWAIT */

diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 66a6581..e17a74b 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h

@@ -80,6 +80,14 @@
 	return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
 }
 
+static inline bool guest_cpuid_has_pku(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 7, 0);
+	return best && (best->ecx & bit(X86_FEATURE_PKU));
+}
+
 static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 5ff3485..01bd7b7 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c

@@ -1116,6 +1116,11 @@
 		break;
 	case HVCALL_POST_MESSAGE:
 	case HVCALL_SIGNAL_EVENT:
+		/* don't bother userspace if it has no way to handle it */
+		if (!vcpu_to_synic(vcpu)->active) {
+			res = HV_STATUS_INVALID_HYPERCALL_CODE;
+			break;
+		}
 		vcpu->run->exit_reason = KVM_EXIT_HYPERV;
 		vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
 		vcpu->run->hyperv.u.hcall.input = param;

diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index e1e89ee..762cdf2 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h

@@ -84,6 +84,11 @@
 		| ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
 }
 
+static inline u32 kvm_read_pkru(struct kvm_vcpu *vcpu)
+{
+	return kvm_x86_ops->get_pkru(vcpu);
+}
+
 static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.hflags |= HF_GUEST_MASK;

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 443d2a5..1a2da0e 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c

@@ -1369,7 +1369,7 @@
 
 		hrtimer_start(&apic->lapic_timer.timer,
 			      ktime_add_ns(now, apic->lapic_timer.period),
-			      HRTIMER_MODE_ABS);
+			      HRTIMER_MODE_ABS_PINNED);
 
 		apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
 			   PRIx64 ", "
@@ -1402,7 +1402,7 @@
 			expire = ktime_add_ns(now, ns);
 			expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
 			hrtimer_start(&apic->lapic_timer.timer,
-				      expire, HRTIMER_MODE_ABS);
+				      expire, HRTIMER_MODE_ABS_PINNED);
 		} else
 			apic_timer_expired(apic);
 
@@ -1868,7 +1868,7 @@
 	apic->vcpu = vcpu;
 
 	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
-		     HRTIMER_MODE_ABS);
+		     HRTIMER_MODE_ABS_PINNED);
 	apic->lapic_timer.timer.function = apic_timer_fn;
 
 	/*
@@ -2003,7 +2003,7 @@
 
 	timer = &vcpu->arch.apic->lapic_timer.timer;
 	if (hrtimer_cancel(timer))
-		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
+		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
 }
 
 /*

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index c512f09..1ff4dbb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c

@@ -479,7 +479,7 @@
 static bool spte_has_volatile_bits(u64 spte)
 {
 	/*
-	 * Always atomicly update spte if it can be updated
+	 * Always atomically update spte if it can be updated
 	 * out of mmu-lock, it can ensure dirty bit is not lost,
 	 * also, it can help us to get a stable is_writable_pte()
 	 * to ensure tlb flush is not missed.
@@ -550,15 +550,22 @@
 
 	/*
 	 * For the spte updated out of mmu-lock is safe, since
-	 * we always atomicly update it, see the comments in
+	 * we always atomically update it, see the comments in
 	 * spte_has_volatile_bits().
 	 */
 	if (spte_is_locklessly_modifiable(old_spte) &&
 	      !is_writable_pte(new_spte))
 		ret = true;
 
-	if (!shadow_accessed_mask)
+	if (!shadow_accessed_mask) {
+		/*
+		 * We don't set page dirty when dropping non-writable spte.
+		 * So do it now if the new spte is becoming non-writable.
+		 */
+		if (ret)
+			kvm_set_pfn_dirty(spte_to_pfn(old_spte));
 		return ret;
+	}
 
 	/*
 	 * Flush TLB when accessed/dirty bits are changed in the page tables,
@@ -605,7 +612,8 @@
 
 	if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
 		kvm_set_pfn_accessed(pfn);
-	if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask))
+	if (old_spte & (shadow_dirty_mask ? shadow_dirty_mask :
+					    PT_WRITABLE_MASK))
 		kvm_set_pfn_dirty(pfn);
 	return 1;
 }
@@ -632,12 +640,12 @@
 	 * kvm_flush_remote_tlbs() IPI to all active vcpus.
 	 */
 	local_irq_disable();
-	vcpu->mode = READING_SHADOW_PAGE_TABLES;
+
 	/*
 	 * Make sure a following spte read is not reordered ahead of the write
 	 * to vcpu->mode.
 	 */
-	smp_mb();
+	smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES);
 }
 
 static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
@@ -647,8 +655,7 @@
 	 * reads to sptes.  If it does, kvm_commit_zap_page() can see us
 	 * OUTSIDE_GUEST_MODE and proceed to free the shadow page table.
 	 */
-	smp_mb();
-	vcpu->mode = OUTSIDE_GUEST_MODE;
+	smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE);
 	local_irq_enable();
 }
 
@@ -2390,14 +2397,13 @@
 		return;
 
 	/*
-	 * wmb: make sure everyone sees our modifications to the page tables
-	 * rmb: make sure we see changes to vcpu->mode
-	 */
-	smp_mb();
-
-	/*
-	 * Wait for all vcpus to exit guest mode and/or lockless shadow
-	 * page table walks.
+	 * We need to make sure everyone sees our modifications to
+	 * the page tables and see changes to vcpu->mode here. The barrier
+	 * in the kvm_flush_remote_tlbs() achieves this. This pairs
+	 * with vcpu_enter_guest and walk_shadow_page_lockless_begin/end.
+	 *
+	 * In addition, kvm_flush_remote_tlbs waits for all vcpus to exit
+	 * guest mode and/or lockless shadow page table walks.
 	 */
 	kvm_flush_remote_tlbs(kvm);
 
@@ -3923,6 +3929,81 @@
 	}
 }
 
+/*
+* PKU is an additional mechanism by which the paging controls access to
+* user-mode addresses based on the value in the PKRU register.  Protection
+* key violations are reported through a bit in the page fault error code.
+* Unlike other bits of the error code, the PK bit is not known at the
+* call site of e.g. gva_to_gpa; it must be computed directly in
+* permission_fault based on two bits of PKRU, on some machine state (CR4,
+* CR0, EFER, CPL), and on other bits of the error code and the page tables.
+*
+* In particular the following conditions come from the error code, the
+* page tables and the machine state:
+* - PK is always zero unless CR4.PKE=1 and EFER.LMA=1
+* - PK is always zero if RSVD=1 (reserved bit set) or F=1 (instruction fetch)
+* - PK is always zero if U=0 in the page tables
+* - PKRU.WD is ignored if CR0.WP=0 and the access is a supervisor access.
+*
+* The PKRU bitmask caches the result of these four conditions.  The error
+* code (minus the P bit) and the page table's U bit form an index into the
+* PKRU bitmask.  Two bits of the PKRU bitmask are then extracted and ANDed
+* with the two bits of the PKRU register corresponding to the protection key.
+* For the first three conditions above the bits will be 00, thus masking
+* away both AD and WD.  For all reads or if the last condition holds, WD
+* only will be masked away.
+*/
+static void update_pkru_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+				bool ept)
+{
+	unsigned bit;
+	bool wp;
+
+	if (ept) {
+		mmu->pkru_mask = 0;
+		return;
+	}
+
+	/* PKEY is enabled only if CR4.PKE and EFER.LMA are both set. */
+	if (!kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || !is_long_mode(vcpu)) {
+		mmu->pkru_mask = 0;
+		return;
+	}
+
+	wp = is_write_protection(vcpu);
+
+	for (bit = 0; bit < ARRAY_SIZE(mmu->permissions); ++bit) {
+		unsigned pfec, pkey_bits;
+		bool check_pkey, check_write, ff, uf, wf, pte_user;
+
+		pfec = bit << 1;
+		ff = pfec & PFERR_FETCH_MASK;
+		uf = pfec & PFERR_USER_MASK;
+		wf = pfec & PFERR_WRITE_MASK;
+
+		/* PFEC.RSVD is replaced by ACC_USER_MASK. */
+		pte_user = pfec & PFERR_RSVD_MASK;
+
+		/*
+		 * Only need to check the access which is not an
+		 * instruction fetch and is to a user page.
+		 */
+		check_pkey = (!ff && pte_user);
+		/*
+		 * write access is controlled by PKRU if it is a
+		 * user access or CR0.WP = 1.
+		 */
+		check_write = check_pkey && wf && (uf || wp);
+
+		/* PKRU.AD stops both read and write access. */
+		pkey_bits = !!check_pkey;
+		/* PKRU.WD stops write access. */
+		pkey_bits |= (!!check_write) << 1;
+
+		mmu->pkru_mask |= (pkey_bits & 3) << pfec;
+	}
+}
+
 static void update_last_nonleaf_level(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
 {
 	unsigned root_level = mmu->root_level;
@@ -3941,6 +4022,7 @@
 
 	reset_rsvds_bits_mask(vcpu, context);
 	update_permission_bitmask(vcpu, context, false);
+	update_pkru_bitmask(vcpu, context, false);
 	update_last_nonleaf_level(vcpu, context);
 
 	MMU_WARN_ON(!is_pae(vcpu));
@@ -3968,6 +4050,7 @@
 
 	reset_rsvds_bits_mask(vcpu, context);
 	update_permission_bitmask(vcpu, context, false);
+	update_pkru_bitmask(vcpu, context, false);
 	update_last_nonleaf_level(vcpu, context);
 
 	context->page_fault = paging32_page_fault;
@@ -4026,6 +4109,7 @@
 	}
 
 	update_permission_bitmask(vcpu, context, false);
+	update_pkru_bitmask(vcpu, context, false);
 	update_last_nonleaf_level(vcpu, context);
 	reset_tdp_shadow_zero_bits_mask(vcpu, context);
 }
@@ -4078,6 +4162,7 @@
 	context->direct_map = false;
 
 	update_permission_bitmask(vcpu, context, true);
+	update_pkru_bitmask(vcpu, context, true);
 	reset_rsvds_bits_mask_ept(vcpu, context, execonly);
 	reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
 }
@@ -4132,6 +4217,7 @@
 	}
 
 	update_permission_bitmask(vcpu, g_context, false);
+	update_pkru_bitmask(vcpu, g_context, false);
 	update_last_nonleaf_level(vcpu, g_context);
 }
 

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 58fe98a..b70df72 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h

@@ -10,10 +10,11 @@
 #define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
 
 #define PT_WRITABLE_SHIFT 1
+#define PT_USER_SHIFT 2
 
 #define PT_PRESENT_MASK (1ULL << 0)
 #define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
-#define PT_USER_MASK (1ULL << 2)
+#define PT_USER_MASK (1ULL << PT_USER_SHIFT)
 #define PT_PWT_MASK (1ULL << 3)
 #define PT_PCD_MASK (1ULL << 4)
 #define PT_ACCESSED_SHIFT 5
@@ -141,11 +142,16 @@
 }
 
 /*
- * Will a fault with a given page-fault error code (pfec) cause a permission
- * fault with the given access (in ACC_* format)?
+ * Check if a given access (described through the I/D, W/R and U/S bits of a
+ * page fault error code pfec) causes a permission fault with the given PTE
+ * access rights (in ACC_* format).
+ *
+ * Return zero if the access does not fault; return the page fault error code
+ * if the access faults.
  */
-static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
-				    unsigned pte_access, unsigned pfec)
+static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+				  unsigned pte_access, unsigned pte_pkey,
+				  unsigned pfec)
 {
 	int cpl = kvm_x86_ops->get_cpl(vcpu);
 	unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
@@ -166,10 +172,32 @@
 	unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
 	int index = (pfec >> 1) +
 		    (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
+	bool fault = (mmu->permissions[index] >> pte_access) & 1;
 
-	WARN_ON(pfec & PFERR_RSVD_MASK);
+	WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
+	pfec |= PFERR_PRESENT_MASK;
 
-	return (mmu->permissions[index] >> pte_access) & 1;
+	if (unlikely(mmu->pkru_mask)) {
+		u32 pkru_bits, offset;
+
+		/*
+		* PKRU defines 32 bits, there are 16 domains and 2
+		* attribute bits per domain in pkru.  pte_pkey is the
+		* index of the protection domain, so pte_pkey * 2 is
+		* is the index of the first bit for the domain.
+		*/
+		pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3;
+
+		/* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */
+		offset = pfec - 1 +
+			((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT));
+
+		pkru_bits &= mmu->pkru_mask >> offset;
+		pfec |= -pkru_bits & PFERR_PK_MASK;
+		fault |= (pkru_bits != 0);
+	}
+
+	return -(uint32_t)fault & pfec;
 }
 
 void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);

diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
index 11f7643..b431539 100644
--- a/arch/x86/kvm/page_track.c
+++ b/arch/x86/kvm/page_track.c

@@ -142,12 +142,17 @@
 bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
 			      enum kvm_page_track_mode mode)
 {
-	struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
-	int index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL);
+	struct kvm_memory_slot *slot;
+	int index;
 
 	if (WARN_ON(!page_track_mode_is_valid(mode)))
 		return false;
 
+	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+	if (!slot)
+		return false;
+
+	index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL);
 	return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]);
 }
 

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index e159a81..1d971c7 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h

@@ -257,6 +257,17 @@
 	return 0;
 }
 
+static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte)
+{
+	unsigned pkeys = 0;
+#if PTTYPE == 64
+	pte_t pte = {.pte = gpte};
+
+	pkeys = pte_flags_pkey(pte_flags(pte));
+#endif
+	return pkeys;
+}
+
 /*
  * Fetch a guest pte for a guest virtual address
  */
@@ -268,7 +279,7 @@
 	pt_element_t pte;
 	pt_element_t __user *uninitialized_var(ptep_user);
 	gfn_t table_gfn;
-	unsigned index, pt_access, pte_access, accessed_dirty;
+	unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey;
 	gpa_t pte_gpa;
 	int offset;
 	const int write_fault = access & PFERR_WRITE_MASK;
@@ -359,10 +370,10 @@
 		walker->ptes[walker->level - 1] = pte;
 	} while (!is_last_gpte(mmu, walker->level, pte));
 
-	if (unlikely(permission_fault(vcpu, mmu, pte_access, access))) {
-		errcode |= PFERR_PRESENT_MASK;
+	pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
+	errcode = permission_fault(vcpu, mmu, pte_access, pte_pkey, access);
+	if (unlikely(errcode))
 		goto error;
-	}
 
 	gfn = gpte_to_gfn_lvl(pte, walker->level);
 	gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT;
@@ -949,6 +960,12 @@
 			return 0;
 
 		if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
+			/*
+			 * Update spte before increasing tlbs_dirty to make
+			 * sure no tlb flush is lost after spte is zapped; see
+			 * the comments in kvm_flush_remote_tlbs().
+			 */
+			smp_wmb();
 			vcpu->kvm->tlbs_dirty++;
 			continue;
 		}
@@ -964,6 +981,11 @@
 
 		if (gfn != sp->gfns[i]) {
 			drop_spte(vcpu->kvm, &sp->spt[i]);
+			/*
+			 * The same as above where we are doing
+			 * prefetch_invalid_gpte().
+			 */
+			smp_wmb();
 			vcpu->kvm->tlbs_dirty++;
 			continue;
 		}

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9507038..31346a3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c

@@ -1280,6 +1280,11 @@
 	to_svm(vcpu)->vmcb->save.rflags = rflags;
 }
 
+static u32 svm_get_pkru(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
 static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 {
 	switch (reg) {
@@ -4347,6 +4352,9 @@
 	.cache_reg = svm_cache_reg,
 	.get_rflags = svm_get_rflags,
 	.set_rflags = svm_set_rflags,
+
+	.get_pkru = svm_get_pkru,
+
 	.fpu_activate = svm_fpu_activate,
 	.fpu_deactivate = svm_fpu_deactivate,
 

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 75173ef..ee1c8a9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c

@@ -598,6 +598,10 @@
 	struct page *pml_pg;
 
 	u64 current_tsc_ratio;
+
+	bool guest_pkru_valid;
+	u32 guest_pkru;
+	u32 host_pkru;
 };
 
 enum segment_cache_field {
@@ -2107,6 +2111,7 @@
 	} while (cmpxchg(&pi_desc->control, old.control,
 			new.control) != old.control);
 }
+
 /*
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
@@ -2167,6 +2172,7 @@
 	}
 
 	vmx_vcpu_pi_load(vcpu, cpu);
+	vmx->host_pkru = read_pkru();
 }
 
 static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
@@ -2286,6 +2292,11 @@
 	vmcs_writel(GUEST_RFLAGS, rflags);
 }
 
+static u32 vmx_get_pkru(struct kvm_vcpu *vcpu)
+{
+	return to_vmx(vcpu)->guest_pkru;
+}
+
 static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
 {
 	u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
@@ -2712,8 +2723,15 @@
 	} else
 		vmx->nested.nested_vmx_ept_caps = 0;
 
+	/*
+	 * Old versions of KVM use the single-context version without
+	 * checking for support, so declare that it is supported even
+	 * though it is treated as global context.  The alternative is
+	 * not failing the single-context invvpid, and it is worse.
+	 */
 	if (enable_vpid)
 		vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
+				VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT |
 				VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
 	else
 		vmx->nested.nested_vmx_vpid_caps = 0;
@@ -3886,13 +3904,17 @@
 
 	if (!enable_unrestricted_guest && !is_paging(vcpu))
 		/*
-		 * SMEP/SMAP is disabled if CPU is in non-paging mode in
-		 * hardware.  However KVM always uses paging mode without
-		 * unrestricted guest.
-		 * To emulate this behavior, SMEP/SMAP needs to be manually
-		 * disabled when guest switches to non-paging mode.
+		 * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in
+		 * hardware.  To emulate this behavior, SMEP/SMAP/PKU needs
+		 * to be manually disabled when guest switches to non-paging
+		 * mode.
+		 *
+		 * If !enable_unrestricted_guest, the CPU is always running
+		 * with CR0.PG=1 and CR4 needs to be modified.
+		 * If enable_unrestricted_guest, the CPU automatically
+		 * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0.
 		 */
-		hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
+		hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
 
 	vmcs_writel(CR4_READ_SHADOW, cr4);
 	vmcs_writel(GUEST_CR4, hw_cr4);
@@ -5506,7 +5528,7 @@
 		return kvm_set_cr4(vcpu, val);
 }
 
-/* called to set cr0 as approriate for clts instruction exit. */
+/* called to set cr0 as appropriate for clts instruction exit. */
 static void handle_clts(struct kvm_vcpu *vcpu)
 {
 	if (is_guest_mode(vcpu)) {
@@ -7245,7 +7267,7 @@
 	/* The value to write might be 32 or 64 bits, depending on L1's long
 	 * mode, and eventually we need to write that into a field of several
 	 * possible lengths. The code below first zero-extends the value to 64
-	 * bit (field_value), and then copies only the approriate number of
+	 * bit (field_value), and then copies only the appropriate number of
 	 * bits into the vmcs12 field.
 	 */
 	u64 field_value = 0;
@@ -7399,6 +7421,7 @@
 	if (!(types & (1UL << type))) {
 		nested_vmx_failValid(vcpu,
 				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+		skip_emulated_instruction(vcpu);
 		return 1;
 	}
 
@@ -7457,6 +7480,7 @@
 	if (!(types & (1UL << type))) {
 		nested_vmx_failValid(vcpu,
 			VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+		skip_emulated_instruction(vcpu);
 		return 1;
 	}
 
@@ -7473,12 +7497,17 @@
 	}
 
 	switch (type) {
+	case VMX_VPID_EXTENT_SINGLE_CONTEXT:
+		/*
+		 * Old versions of KVM use the single-context version so we
+		 * have to support it; just treat it the same as all-context.
+		 */
 	case VMX_VPID_EXTENT_ALL_CONTEXT:
 		__vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
 		nested_vmx_succeed(vcpu);
 		break;
 	default:
-		/* Trap single context invalidation invvpid calls */
+		/* Trap individual address invalidation invvpid calls */
 		BUG_ON(1);
 		break;
 	}
@@ -8621,6 +8650,9 @@
 	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
 		vmx_set_interrupt_shadow(vcpu, 0);
 
+	if (vmx->guest_pkru_valid)
+		__write_pkru(vmx->guest_pkru);
+
 	atomic_switch_perf_msrs(vmx);
 	debugctlmsr = get_debugctlmsr();
 
@@ -8761,6 +8793,20 @@
 	vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
 
 	/*
+	 * eager fpu is enabled if PKEY is supported and CR4 is switched
+	 * back on host, so it is safe to read guest PKRU from current
+	 * XSAVE.
+	 */
+	if (boot_cpu_has(X86_FEATURE_OSPKE)) {
+		vmx->guest_pkru = __read_pkru();
+		if (vmx->guest_pkru != vmx->host_pkru) {
+			vmx->guest_pkru_valid = true;
+			__write_pkru(vmx->host_pkru);
+		} else
+			vmx->guest_pkru_valid = false;
+	}
+
+	/*
 	 * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
 	 * we did not inject a still-pending event to L1 now because of
 	 * nested_run_pending, we need to re-enable this bit.
@@ -10884,6 +10930,9 @@
 	.cache_reg = vmx_cache_reg,
 	.get_rflags = vmx_get_rflags,
 	.set_rflags = vmx_set_rflags,
+
+	.get_pkru = vmx_get_pkru,
+
 	.fpu_activate = vmx_fpu_activate,
 	.fpu_deactivate = vmx_fpu_deactivate,
 

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7236bd3..0a2c70e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c

@@ -723,7 +723,7 @@
 {
 	unsigned long old_cr4 = kvm_read_cr4(vcpu);
 	unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
-				   X86_CR4_SMEP | X86_CR4_SMAP;
+				   X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
 
 	if (cr4 & CR4_RESERVED_BITS)
 		return 1;
@@ -740,6 +740,9 @@
 	if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
 		return 1;
 
+	if (!guest_cpuid_has_pku(vcpu) && (cr4 & X86_CR4_PKE))
+		return 1;
+
 	if (is_long_mode(vcpu)) {
 		if (!(cr4 & X86_CR4_PAE))
 			return 1;
@@ -765,7 +768,7 @@
 	    (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
 		kvm_mmu_reset_context(vcpu);
 
-	if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
+	if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
 		kvm_update_cpuid(vcpu);
 
 	return 0;
@@ -1559,7 +1562,7 @@
 
 	/*
 	 * GCC likes to generate cmov here, but this branch is extremely
-	 * predictable (it's just a funciton of time and the likely is
+	 * predictable (it's just a function of time and the likely is
 	 * very likely) and there's a data dependence, so force GCC
 	 * to generate a branch instead.  I don't barrier() because
 	 * we don't actually need a barrier, and if this function
@@ -4326,9 +4329,14 @@
 	u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
 		| (write ? PFERR_WRITE_MASK : 0);
 
+	/*
+	 * currently PKRU is only applied to ept enabled guest so
+	 * there is no pkey in EPT page table for L1 guest or EPT
+	 * shadow page table for L2 guest.
+	 */
 	if (vcpu_match_mmio_gva(vcpu, gva)
 	    && !permission_fault(vcpu, vcpu->arch.walk_mmu,
-				 vcpu->arch.access, access)) {
+				 vcpu->arch.access, 0, access)) {
 		*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
 					(gva & (PAGE_SIZE - 1));
 		trace_vcpu_match_mmio(gva, *gpa, write, false);
@@ -6087,12 +6095,10 @@
 	}
 
 	/* try to inject new event if pending */
-	if (vcpu->arch.nmi_pending) {
-		if (kvm_x86_ops->nmi_allowed(vcpu)) {
-			--vcpu->arch.nmi_pending;
-			vcpu->arch.nmi_injected = true;
-			kvm_x86_ops->set_nmi(vcpu);
-		}
+	if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
+		--vcpu->arch.nmi_pending;
+		vcpu->arch.nmi_injected = true;
+		kvm_x86_ops->set_nmi(vcpu);
 	} else if (kvm_cpu_has_injectable_intr(vcpu)) {
 		/*
 		 * Because interrupts can be injected asynchronously, we are
@@ -6561,10 +6567,12 @@
 		if (inject_pending_event(vcpu, req_int_win) != 0)
 			req_immediate_exit = true;
 		/* enable NMI/IRQ window open exits if needed */
-		else if (vcpu->arch.nmi_pending)
-			kvm_x86_ops->enable_nmi_window(vcpu);
-		else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
-			kvm_x86_ops->enable_irq_window(vcpu);
+		else {
+			if (vcpu->arch.nmi_pending)
+				kvm_x86_ops->enable_nmi_window(vcpu);
+			if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
+				kvm_x86_ops->enable_irq_window(vcpu);
+		}
 
 		if (kvm_lapic_enabled(vcpu)) {
 			update_cr8_intercept(vcpu);
@@ -6588,8 +6596,12 @@
 
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 
-	/* We should set ->mode before check ->requests,
-	 * see the comment in make_all_cpus_request.
+	/*
+	 * We should set ->mode before check ->requests,
+	 * Please see the comment in kvm_make_all_cpus_request.
+	 * This also orders the write to mode from any reads
+	 * to the page tables done while the VCPU is running.
+	 * Please see the comment in kvm_flush_remote_tlbs.
 	 */
 	smp_mb__after_srcu_read_unlock();
 
@@ -7123,7 +7135,7 @@
 
 	mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
 	kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
-	if (sregs->cr4 & X86_CR4_OSXSAVE)
+	if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE))
 		kvm_update_cpuid(vcpu);
 
 	idx = srcu_read_lock(&vcpu->kvm->srcu);

diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 007940f..7ce3634 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h

@@ -183,7 +183,8 @@
 
 #define KVM_SUPPORTED_XCR0     (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
 				| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
-				| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512)
+				| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
+				| XFEATURE_MASK_PKRU)
 extern u64 host_xcr0;
 
 extern u64 kvm_supported_xcr0(void);

diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index a501fa2..72a5767 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile

@@ -2,6 +2,9 @@
 # Makefile for x86 specific library files.
 #
 
+# Produces uninteresting flaky coverage.
+KCOV_INSTRUMENT_delay.o	:= n
+
 inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
 inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
 quiet_cmd_inat_tables = GEN     $@

diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index cbb8ee5..2ec0b0abb 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S

@@ -1,6 +1,7 @@
 /* Copyright 2002 Andi Kleen */
 
 #include <linux/linkage.h>
+#include <asm/errno.h>
 #include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
@@ -268,16 +269,16 @@
 	decl %ecx
 	jnz .L_copy_trailing_bytes
 
-	/* Copy successful. Return true */
+	/* Copy successful. Return zero */
 .L_done_memcpy_trap:
 	xorq %rax, %rax
 	ret
 ENDPROC(memcpy_mcsafe)
 
 	.section .fixup, "ax"
-	/* Return false for any failure */
+	/* Return -EFAULT for any failure */
 .L_memcpy_mcsafe_fail:
-	mov	$1, %rax
+	mov	$-EFAULT, %rax
 	ret
 
 	.previous

diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index c9c8122..e1229ec 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S

@@ -9,7 +9,7 @@
 /*
  * ISO C memset - set a memory block to a byte value. This function uses fast
  * string to get better performance than the original function. The code is
- * simpler and shorter than the orignal function as well.
+ * simpler and shorter than the original function as well.
  *
  * rdi   destination
  * rsi   value (char)

diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 67cf2e1..f989132 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile

@@ -1,3 +1,6 @@
+# Kernel does not boot with instrumentation of tlb.c.
+KCOV_INSTRUMENT_tlb.o	:= n
+
 obj-y	:=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
 	    pat.o pgtable.o physaddr.o gup.o setup_nx.o
 

diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 9dd7e4b..82447b3 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c

@@ -1,17 +1,10 @@
 #include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/sort.h>
 #include <asm/uaccess.h>
 
 typedef bool (*ex_handler_t)(const struct exception_table_entry *,
 			    struct pt_regs *, int);
 
 static inline unsigned long
-ex_insn_addr(const struct exception_table_entry *x)
-{
-	return (unsigned long)&x->insn + x->insn;
-}
-static inline unsigned long
 ex_fixup_addr(const struct exception_table_entry *x)
 {
 	return (unsigned long)&x->fixup + x->fixup;
@@ -110,104 +103,3 @@
 	*ip = new_ip;
 	return 1;
 }
-
-/*
- * Search one exception table for an entry corresponding to the
- * given instruction address, and return the address of the entry,
- * or NULL if none is found.
- * We use a binary search, and thus we assume that the table is
- * already sorted.
- */
-const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
-	       const struct exception_table_entry *last,
-	       unsigned long value)
-{
-	while (first <= last) {
-		const struct exception_table_entry *mid;
-		unsigned long addr;
-
-		mid = ((last - first) >> 1) + first;
-		addr = ex_insn_addr(mid);
-		if (addr < value)
-			first = mid + 1;
-		else if (addr > value)
-			last = mid - 1;
-		else
-			return mid;
-        }
-        return NULL;
-}
-
-/*
- * The exception table needs to be sorted so that the binary
- * search that we use to find entries in it works properly.
- * This is used both for the kernel exception table and for
- * the exception tables of modules that get loaded.
- *
- */
-static int cmp_ex(const void *a, const void *b)
-{
-	const struct exception_table_entry *x = a, *y = b;
-
-	/*
-	 * This value will always end up fittin in an int, because on
-	 * both i386 and x86-64 the kernel symbol-reachable address
-	 * space is < 2 GiB.
-	 *
-	 * This compare is only valid after normalization.
-	 */
-	return x->insn - y->insn;
-}
-
-void sort_extable(struct exception_table_entry *start,
-		  struct exception_table_entry *finish)
-{
-	struct exception_table_entry *p;
-	int i;
-
-	/* Convert all entries to being relative to the start of the section */
-	i = 0;
-	for (p = start; p < finish; p++) {
-		p->insn += i;
-		i += 4;
-		p->fixup += i;
-		i += 4;
-		p->handler += i;
-		i += 4;
-	}
-
-	sort(start, finish - start, sizeof(struct exception_table_entry),
-	     cmp_ex, NULL);
-
-	/* Denormalize all entries */
-	i = 0;
-	for (p = start; p < finish; p++) {
-		p->insn -= i;
-		i += 4;
-		p->fixup -= i;
-		i += 4;
-		p->handler -= i;
-		i += 4;
-	}
-}
-
-#ifdef CONFIG_MODULES
-/*
- * If the exception table is sorted, any referring to the module init
- * will be at the beginning or the end.
- */
-void trim_init_extable(struct module *m)
-{
-	/*trim the beginning*/
-	while (m->num_exentries &&
-	       within_module_init(ex_insn_addr(&m->extable[0]), m)) {
-		m->extable++;
-		m->num_exentries--;
-	}
-	/*trim the end*/
-	while (m->num_exentries &&
-	       within_module_init(ex_insn_addr(&m->extable[m->num_exentries-1]), m))
-		m->num_exentries--;
-}
-#endif /* CONFIG_MODULES */

diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index a0a0b98..8047687 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c

@@ -728,14 +728,14 @@
 
 	/*
 	 * This covers 32-bit emulation as well as 32-bit kernels
-	 * running on 64-bit harware.
+	 * running on 64-bit hardware.
 	 */
 	if (!is_64bit_mm(mm))
 		return (4ULL * GB) / MPX_BD_NR_ENTRIES_32;
 
 	/*
 	 * 'x86_virt_bits' returns what the hardware is capable
-	 * of, and returns the full >32-bit adddress space when
+	 * of, and returns the full >32-bit address space when
 	 * running 32-bit kernels on 64-bit hardware.
 	 */
 	virt_space = (1ULL << boot_cpu_data.x86_virt_bits);

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 04e2e71..faec01e 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c

@@ -149,7 +149,7 @@
 	PAT_WT = 4,		/* Write Through */
 	PAT_WP = 5,		/* Write Protected */
 	PAT_WB = 6,		/* Write Back (default) */
-	PAT_UC_MINUS = 7,	/* UC, but can be overriden by MTRR */
+	PAT_UC_MINUS = 7,	/* UC, but can be overridden by MTRR */
 };
 
 #define CM(c) (_PAGE_CACHE_MODE_ ## c)

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 8f4cc3d..fe9b9f7 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c

@@ -104,10 +104,8 @@
 
 	inc_irq_stat(irq_tlb_count);
 
-	if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
+	if (f->flush_mm && f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
 		return;
-	if (!f->flush_end)
-		f->flush_end = f->flush_start + PAGE_SIZE;
 
 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
 	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
@@ -135,12 +133,20 @@
 				 unsigned long end)
 {
 	struct flush_tlb_info info;
+
+	if (end == 0)
+		end = start + PAGE_SIZE;
 	info.flush_mm = mm;
 	info.flush_start = start;
 	info.flush_end = end;
 
 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
-	trace_tlb_flush(TLB_REMOTE_SEND_IPI, end - start);
+	if (end == TLB_FLUSH_ALL)
+		trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
+	else
+		trace_tlb_flush(TLB_REMOTE_SEND_IPI,
+				(end - start) >> PAGE_SHIFT);
+
 	if (is_uv_system()) {
 		unsigned int cpu;
 

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 1d2e639..0e07e09 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c

@@ -437,7 +437,8 @@
 				 void *data)
 {
 	int cpu = (unsigned long)data;
-	switch (action) {
+
+	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_DOWN_FAILED:
 	case CPU_ONLINE:
 		smp_call_function_single(cpu, nmi_cpu_up, NULL, 0);

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bma023.c b/arch/x86/platform/intel-mid/device_libs/platform_bma023.c
index 0ae7f2a..c26cf39 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bma023.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bma023.c

@@ -1,5 +1,5 @@
 /*
- * platform_bma023.c: bma023 platform data initilization file
+ * platform_bma023.c: bma023 platform data initialization file
  *
  * (C) Copyright 2013 Intel Corporation
  *

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c b/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c
index 69a7836..c259fb6 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c

@@ -1,5 +1,5 @@
 /*
- * platform_emc1403.c: emc1403 platform data initilization file
+ * platform_emc1403.c: emc1403 platform data initialization file
  *
  * (C) Copyright 2013 Intel Corporation
  * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c
index dccae6b..52534ec 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c

@@ -1,5 +1,5 @@
 /*
- * platform_gpio_keys.c: gpio_keys platform data initilization file
+ * platform_gpio_keys.c: gpio_keys platform data initialization file
  *
  * (C) Copyright 2013 Intel Corporation
  * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_lis331.c b/arch/x86/platform/intel-mid/device_libs/platform_lis331.c
index 54226de..a35cf91 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_lis331.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_lis331.c

@@ -1,5 +1,5 @@
 /*
- * platform_lis331.c:  lis331 platform data initilization file
+ * platform_lis331.c:  lis331 platform data initialization file
  *
  * (C) Copyright 2013 Intel Corporation
  * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
index 2c8acbc..6e075af 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c

@@ -1,5 +1,5 @@
 /*
- * platform_max7315.c: max7315 platform data initilization file
+ * platform_max7315.c: max7315 platform data initialization file
  *
  * (C) Copyright 2013 Intel Corporation
  * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c b/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c
index cfe9a47..ee22864 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c

@@ -1,5 +1,5 @@
 /*
- * platform_mpu3050.c: mpu3050 platform data initilization file
+ * platform_mpu3050.c: mpu3050 platform data initialization file
  *
  * (C) Copyright 2013 Intel Corporation
  * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic.c b/arch/x86/platform/intel-mid/device_libs/platform_msic.c
index 9f4a775..e421106 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_msic.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic.c

@@ -1,5 +1,5 @@
 /*
- * platform_msic.c: MSIC platform data initilization file
+ * platform_msic.c: MSIC platform data initialization file
  *
  * (C) Copyright 2013 Intel Corporation
  * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c
index 2962939..cb3490ec 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c

@@ -1,5 +1,5 @@
 /*
- * platform_msic_audio.c: MSIC audio platform data initilization file
+ * platform_msic_audio.c: MSIC audio platform data initialization file
  *
  * (C) Copyright 2013 Intel Corporation
  * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c
index f446c33..4f72193 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c

@@ -1,5 +1,5 @@
 /*
- * platform_msic_battery.c: MSIC battery platform data initilization file
+ * platform_msic_battery.c: MSIC battery platform data initialization file
  *
  * (C) Copyright 2013 Intel Corporation
  * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c
index 2a4f7b1..70de5b5 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c

@@ -1,5 +1,5 @@
 /*
- * platform_msic_gpio.c: MSIC GPIO platform data initilization file
+ * platform_msic_gpio.c: MSIC GPIO platform data initialization file
  *
  * (C) Copyright 2013 Intel Corporation
  * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c
index 6497111..3d7c201 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c

@@ -1,5 +1,5 @@
 /*
- * platform_msic_ocd.c: MSIC OCD platform data initilization file
+ * platform_msic_ocd.c: MSIC OCD platform data initialization file
  *
  * (C) Copyright 2013 Intel Corporation
  * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c
index 83a3459..038f618 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c

@@ -1,5 +1,5 @@
 /*
- * platform_msic_power_btn.c: MSIC power btn platform data initilization file
+ * platform_msic_power_btn.c: MSIC power btn platform data initialization file
  *
  * (C) Copyright 2013 Intel Corporation
  * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c
index a351878..114a575 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c

@@ -1,5 +1,5 @@
 /*
- * platform_msic_thermal.c: msic_thermal platform data initilization file
+ * platform_msic_thermal.c: msic_thermal platform data initialization file
  *
  * (C) Copyright 2013 Intel Corporation
  * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c b/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c
index 65c2a9a..e30cb62 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c

@@ -1,5 +1,5 @@
 /*
- * platform_pmic_gpio.c: PMIC GPIO platform data initilization file
+ * platform_pmic_gpio.c: PMIC GPIO platform data initialization file
  *
  * (C) Copyright 2013 Intel Corporation
  * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c b/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c
index 740fc75..b1526b9 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c

@@ -1,5 +1,5 @@
 /*
- * platform_tc35876x.c: tc35876x platform data initilization file
+ * platform_tc35876x.c: tc35876x platform data initialization file
  *
  * (C) Copyright 2013 Intel Corporation
  * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c b/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c
index 33be0b3..4f41372 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c

@@ -1,5 +1,5 @@
 /*
- * platform_tca6416.c: tca6416 platform data initilization file
+ * platform_tca6416.c: tca6416 platform data initialization file
  *
  * (C) Copyright 2013 Intel Corporation
  * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>

diff --git a/arch/x86/purgatory/stack.S b/arch/x86/purgatory/stack.S
index 3cefba1..50a4147 100644
--- a/arch/x86/purgatory/stack.S
+++ b/arch/x86/purgatory/stack.S

@@ -8,7 +8,7 @@
  */
 
 	/* A stack for the loaded kernel.
-	 * Seperate and in the data section so it can be prepopulated.
+	 * Separate and in the data section so it can be prepopulated.
 	 */
 	.data
 	.balign 4096

diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c
index 55d38cf..9e02dca 100644
--- a/arch/x86/ras/mce_amd_inj.c
+++ b/arch/x86/ras/mce_amd_inj.c

@@ -20,6 +20,7 @@
 #include <linux/pci.h>
 
 #include <asm/mce.h>
+#include <asm/smp.h>
 #include <asm/amd_nb.h>
 #include <asm/irq_vectors.h>
 
@@ -206,7 +207,7 @@
 	struct cpuinfo_x86 *c = &boot_cpu_data;
 	u32 cores_per_node;
 
-	cores_per_node = c->x86_max_cores / amd_get_nodes_per_socket();
+	cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket();
 
 	return cores_per_node * node_id;
 }

diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index 053abe7b0..b959646 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile

@@ -9,6 +9,9 @@
 KASAN_SANITIZE			:= n
 OBJECT_FILES_NON_STANDARD	:= y
 
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT		:= n
+
 always := realmode.bin realmode.relocs
 
 wakeup-objs	:= wakeup_asm.o wakemain.o video-mode.o

diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c
index d5644bb..9fd2484 100644
--- a/arch/x86/video/fbdev.c
+++ b/arch/x86/video/fbdev.c

@@ -14,26 +14,24 @@
 int fb_is_primary_device(struct fb_info *info)
 {
 	struct device *device = info->device;
-	struct pci_dev *pci_dev = NULL;
 	struct pci_dev *default_device = vga_default_device();
-	struct resource *res = NULL;
+	struct pci_dev *pci_dev;
+	struct resource *res;
 
-	if (device)
-		pci_dev = to_pci_dev(device);
-
-	if (!pci_dev)
+	if (!device || !dev_is_pci(device))
 		return 0;
 
+	pci_dev = to_pci_dev(device);
+
 	if (default_device) {
 		if (pci_dev == default_device)
 			return 1;
-		else
-			return 0;
+		return 0;
 	}
 
-	res = &pci_dev->resource[PCI_ROM_RESOURCE];
+	res = pci_dev->resource + PCI_ROM_RESOURCE;
 
-	if (res && res->flags & IORESOURCE_ROM_SHADOW)
+	if (res->flags & IORESOURCE_ROM_SHADOW)
 		return 1;
 
 	return 0;

diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index abf4901..db52a7f 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c

@@ -66,7 +66,7 @@
 
 	ret = HYPERVISOR_platform_op(&op);
 	if (ret)
-		return 0;
+		op.u.pcpu_info.apic_id = BAD_APICID;
 
 	return op.u.pcpu_info.apic_id << 24;
 }
@@ -142,6 +142,14 @@
 {
 }
 
+static int xen_cpu_present_to_apicid(int cpu)
+{
+	if (cpu_present(cpu))
+		return xen_get_apic_id(xen_apic_read(APIC_ID));
+	else
+		return BAD_APICID;
+}
+
 static struct apic xen_pv_apic = {
 	.name 				= "Xen PV",
 	.probe 				= xen_apic_probe_pv,
@@ -162,7 +170,7 @@
 
 	.ioapic_phys_id_map		= default_ioapic_phys_id_map, /* Used on 32-bit */
 	.setup_apic_routing		= NULL,
-	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
+	.cpu_present_to_apicid		= xen_cpu_present_to_apicid,
 	.apicid_to_cpu_present		= physid_set_mask_of_physid, /* Used on 32-bit */
 	.check_phys_apicid_present	= default_check_phys_apicid_present, /* smp_sanity_check needs it */
 	.phys_pkg_id			= xen_phys_pkg_id, /* detect_ht */

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 2379a5a..880862c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c

@@ -962,7 +962,7 @@
 	tss->x86_tss.sp0 = thread->sp0;
 }
 
-static void xen_set_iopl_mask(unsigned mask)
+void xen_set_iopl_mask(unsigned mask)
 {
 	struct physdev_set_iopl set_iopl;
 

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index c913ca4..478a2de 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c

@@ -1256,7 +1256,7 @@
 	xen_cleanhighmap(addr, addr + size);
 	xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
 #ifdef DEBUG
-	/* This is superflous and is not neccessary, but you know what
+	/* This is superfluous and is not necessary, but you know what
 	 * lets do it. The MODULES_VADDR -> MODULES_END should be clear of
 	 * anything at this stage. */
 	xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
@@ -1474,7 +1474,7 @@
 /*
  * At the start of the day - when Xen launches a guest, it has already
  * built pagetables for the guest. We diligently look over them
- * in xen_setup_kernel_pagetable and graft as appropiate them in the
+ * in xen_setup_kernel_pagetable and graft as appropriate them in the
  * init_level4_pgt and its friends. Then when we are happy we load
  * the new init_level4_pgt - and continue on.
  *
@@ -2792,7 +2792,7 @@
 	struct remap_data *rmd = data;
 	pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot));
 
-	/* If we have a contigious range, just update the mfn itself,
+	/* If we have a contiguous range, just update the mfn itself,
 	   else update pointer to be "next mfn". */
 	if (rmd->contiguous)
 		(*rmd->mfn)++;
@@ -2833,7 +2833,7 @@
 
 	rmd.mfn = gfn;
 	rmd.prot = prot;
-	/* We use the err_ptr to indicate if there we are doing a contigious
+	/* We use the err_ptr to indicate if there we are doing a contiguous
 	 * mapping or a discontigious mapping. */
 	rmd.contiguous = !err_ptr;
 

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 3c6d17f..719cf29 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c

@@ -545,6 +545,8 @@
 	 * data back is to call:
 	 */
 	tick_nohz_idle_enter();
+
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 #else /* !CONFIG_HOTPLUG_CPU */

diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index de93b20..7f8d8ab 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S

@@ -26,7 +26,7 @@
 		      (1 << XENFEAT_auto_translated_physmap) | \
 		      (1 << XENFEAT_supervisor_mode_kernel) | \
 		      (1 << XENFEAT_hvm_callback_vector))
-/* The XENFEAT_writable_page_tables is not stricly neccessary as we set that
+/* The XENFEAT_writable_page_tables is not stricly necessary as we set that
  * up regardless whether this CONFIG option is enabled or not, but it
  * clarifies what the right flags need to be.
  */

diff --git a/block/bio.c b/block/bio.c
index f124a0a..807d25e 100644
--- a/block/bio.c
+++ b/block/bio.c

@@ -1339,7 +1339,7 @@
 		 * release the pages we didn't map into the bio, if any
 		 */
 		while (j < page_limit)
-			page_cache_release(pages[j++]);
+			put_page(pages[j++]);
 	}
 
 	kfree(pages);
@@ -1365,7 +1365,7 @@
 	for (j = 0; j < nr_pages; j++) {
 		if (!pages[j])
 			break;
-		page_cache_release(pages[j]);
+		put_page(pages[j]);
 	}
  out:
 	kfree(pages);
@@ -1385,7 +1385,7 @@
 		if (bio_data_dir(bio) == READ)
 			set_page_dirty_lock(bvec->bv_page);
 
-		page_cache_release(bvec->bv_page);
+		put_page(bvec->bv_page);
 	}
 
 	bio_put(bio);
@@ -1615,8 +1615,8 @@
  * the BIO and the offending pages and re-dirty the pages in process context.
  *
  * It is expected that bio_check_pages_dirty() will wholly own the BIO from
- * here on.  It will run one page_cache_release() against each page and will
- * run one bio_put() against the BIO.
+ * here on.  It will run one put_page() against each page and will run one
+ * bio_put() against the BIO.
  */
 
 static void bio_dirty_fn(struct work_struct *work);
@@ -1658,7 +1658,7 @@
 		struct page *page = bvec->bv_page;
 
 		if (PageDirty(page) || PageCompound(page)) {
-			page_cache_release(page);
+			put_page(page);
 			bvec->bv_page = NULL;
 		} else {
 			nr_clean_pages++;

diff --git a/block/blk-core.c b/block/blk-core.c
index 827f8ba..b60537b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c

@@ -706,7 +706,7 @@
 		goto fail_id;
 
 	q->backing_dev_info.ra_pages =
-			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+			(VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
 	q->backing_dev_info.capabilities = BDI_CAP_CGROUP_WRITEBACK;
 	q->backing_dev_info.name = "block";
 	q->node = node_id;

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 431fdda..4ea4dd8 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c

@@ -416,12 +416,14 @@
 static void blk_mq_sysfs_init(struct request_queue *q)
 {
 	struct blk_mq_ctx *ctx;
-	int i;
+	int cpu;
 
 	kobject_init(&q->mq_kobj, &blk_mq_ktype);
 
-	queue_for_each_ctx(q, ctx, i)
+	for_each_possible_cpu(cpu) {
+		ctx = per_cpu_ptr(q->queue_ctx, cpu);
 		kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
+	}
 }
 
 int blk_mq_register_disk(struct gendisk *disk)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 050f7a1..1699baf 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c

@@ -1798,11 +1798,12 @@
 	/*
 	 * Map software to hardware queues
 	 */
-	queue_for_each_ctx(q, ctx, i) {
+	for_each_possible_cpu(i) {
 		/* If the cpu isn't online, the cpu is mapped to first hctx */
 		if (!cpumask_test_cpu(i, online_mask))
 			continue;
 
+		ctx = per_cpu_ptr(q->queue_ctx, i);
 		hctx = q->mq_ops->map_queue(q, i);
 
 		cpumask_set_cpu(i, hctx->cpumask);

diff --git a/block/blk-settings.c b/block/blk-settings.c
index c7bb666..331e4ee 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c

@@ -239,8 +239,8 @@
 	struct queue_limits *limits = &q->limits;
 	unsigned int max_sectors;
 
-	if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) {
-		max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
+	if ((max_hw_sectors << 9) < PAGE_SIZE) {
+		max_hw_sectors = 1 << (PAGE_SHIFT - 9);
 		printk(KERN_INFO "%s: set to minimum %d\n",
 		       __func__, max_hw_sectors);
 	}
@@ -329,8 +329,8 @@
  **/
 void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
 {
-	if (max_size < PAGE_CACHE_SIZE) {
-		max_size = PAGE_CACHE_SIZE;
+	if (max_size < PAGE_SIZE) {
+		max_size = PAGE_SIZE;
 		printk(KERN_INFO "%s: set to minimum %d\n",
 		       __func__, max_size);
 	}
@@ -760,8 +760,8 @@
  **/
 void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask)
 {
-	if (mask < PAGE_CACHE_SIZE - 1) {
-		mask = PAGE_CACHE_SIZE - 1;
+	if (mask < PAGE_SIZE - 1) {
+		mask = PAGE_SIZE - 1;
 		printk(KERN_INFO "%s: set to minimum %lx\n",
 		       __func__, mask);
 	}

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index dd937630..995b58d 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c

@@ -76,7 +76,7 @@
 static ssize_t queue_ra_show(struct request_queue *q, char *page)
 {
 	unsigned long ra_kb = q->backing_dev_info.ra_pages <<
-					(PAGE_CACHE_SHIFT - 10);
+					(PAGE_SHIFT - 10);
 
 	return queue_var_show(ra_kb, (page));
 }
@@ -90,7 +90,7 @@
 	if (ret < 0)
 		return ret;
 
-	q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
+	q->backing_dev_info.ra_pages = ra_kb >> (PAGE_SHIFT - 10);
 
 	return ret;
 }
@@ -117,7 +117,7 @@
 	if (blk_queue_cluster(q))
 		return queue_var_show(queue_max_segment_size(q), (page));
 
-	return queue_var_show(PAGE_CACHE_SIZE, (page));
+	return queue_var_show(PAGE_SIZE, (page));
 }
 
 static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
@@ -198,7 +198,7 @@
 {
 	unsigned long max_sectors_kb,
 		max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1,
-			page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
+			page_kb = 1 << (PAGE_SHIFT - 10);
 	ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
 
 	if (ret < 0)

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index e3c591d..4a34978 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c

@@ -4075,7 +4075,7 @@
 		 * idle timer unplug to continue working.
 		 */
 		if (cfq_cfqq_wait_request(cfqq)) {
-			if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE ||
+			if (blk_rq_bytes(rq) > PAGE_SIZE ||
 			    cfqd->busy_queues > 1) {
 				cfq_del_timer(cfqd, cfqq);
 				cfq_clear_cfqq_wait_request(cfqq);

diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index f678c73..556826a 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c

@@ -710,7 +710,7 @@
 			return -EINVAL;
 		bdi = blk_get_backing_dev_info(bdev);
 		return compat_put_long(arg,
-				       (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
+				       (bdi->ra_pages * PAGE_SIZE) / 512);
 	case BLKROGET: /* compatible */
 		return compat_put_int(arg, bdev_read_only(bdev) != 0);
 	case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */
@@ -729,7 +729,7 @@
 		if (!capable(CAP_SYS_ADMIN))
 			return -EACCES;
 		bdi = blk_get_backing_dev_info(bdev);
-		bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
+		bdi->ra_pages = (arg * 512) / PAGE_SIZE;
 		return 0;
 	case BLKGETSIZE:
 		size = i_size_read(bdev->bd_inode);

diff --git a/block/ioctl.c b/block/ioctl.c
index d8996bb..4ff1f92 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c

@@ -550,7 +550,7 @@
 		if (!arg)
 			return -EINVAL;
 		bdi = blk_get_backing_dev_info(bdev);
-		return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
+		return put_long(arg, (bdi->ra_pages * PAGE_SIZE) / 512);
 	case BLKROGET:
 		return put_int(arg, bdev_read_only(bdev) != 0);
 	case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */
@@ -578,7 +578,7 @@
 		if(!capable(CAP_SYS_ADMIN))
 			return -EACCES;
 		bdi = blk_get_backing_dev_info(bdev);
-		bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
+		bdi->ra_pages = (arg * 512) / PAGE_SIZE;
 		return 0;
 	case BLKBSZSET:
 		return blkdev_bszset(bdev, mode, argp);

diff --git a/block/partition-generic.c b/block/partition-generic.c
index 5d87019..2c6ae2a 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c

@@ -566,8 +566,8 @@
 {
 	struct address_space *mapping = bdev->bd_inode->i_mapping;
 
-	return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
-			NULL);
+	return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)),
+				 NULL);
 }
 
 unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
@@ -584,9 +584,9 @@
 		if (PageError(page))
 			goto fail;
 		p->v = page;
-		return (unsigned char *)page_address(page) +  ((n & ((1 << (PAGE_CACHE_SHIFT - 9)) - 1)) << 9);
+		return (unsigned char *)page_address(page) +  ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << 9);
 fail:
-		page_cache_release(page);
+		put_page(page);
 	}
 	p->v = NULL;
 	return NULL;

diff --git a/crypto/asymmetric_keys/pkcs7_trust.c b/crypto/asymmetric_keys/pkcs7_trust.c
index 3bbdcc7..7d7a39b4 100644
--- a/crypto/asymmetric_keys/pkcs7_trust.c
+++ b/crypto/asymmetric_keys/pkcs7_trust.c

@@ -178,6 +178,8 @@
 	int cached_ret = -ENOKEY;
 	int ret;
 
+	*_trusted = false;
+
 	for (p = pkcs7->certs; p; p = p->next)
 		p->seen = false;
 

diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c
index d0aad06..f245bf3 100644
--- a/drivers/acpi/acpi_apd.c
+++ b/drivers/acpi/acpi_apd.c

@@ -145,6 +145,7 @@
 	{ "AMD0010", APD_ADDR(cz_i2c_desc) },
 	{ "AMDI0010", APD_ADDR(cz_i2c_desc) },
 	{ "AMD0020", APD_ADDR(cz_uart_desc) },
+	{ "AMDI0020", APD_ADDR(cz_uart_desc) },
 	{ "AMD0030", },
 #endif
 #ifdef CONFIG_ARM64

diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index b5e54f2..0d92d0f 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c

@@ -491,6 +491,58 @@
 }
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
 
+#ifdef CONFIG_X86
+static bool acpi_hwp_native_thermal_lvt_set;
+static acpi_status __init acpi_hwp_native_thermal_lvt_osc(acpi_handle handle,
+							  u32 lvl,
+							  void *context,
+							  void **rv)
+{
+	u8 sb_uuid_str[] = "4077A616-290C-47BE-9EBD-D87058713953";
+	u32 capbuf[2];
+	struct acpi_osc_context osc_context = {
+		.uuid_str = sb_uuid_str,
+		.rev = 1,
+		.cap.length = 8,
+		.cap.pointer = capbuf,
+	};
+
+	if (acpi_hwp_native_thermal_lvt_set)
+		return AE_CTRL_TERMINATE;
+
+	capbuf[0] = 0x0000;
+	capbuf[1] = 0x1000; /* set bit 12 */
+
+	if (ACPI_SUCCESS(acpi_run_osc(handle, &osc_context))) {
+		if (osc_context.ret.pointer && osc_context.ret.length > 1) {
+			u32 *capbuf_ret = osc_context.ret.pointer;
+
+			if (capbuf_ret[1] & 0x1000) {
+				acpi_handle_info(handle,
+					"_OSC native thermal LVT Acked\n");
+				acpi_hwp_native_thermal_lvt_set = true;
+			}
+		}
+		kfree(osc_context.ret.pointer);
+	}
+
+	return AE_OK;
+}
+
+void __init acpi_early_processor_osc(void)
+{
+	if (boot_cpu_has(X86_FEATURE_HWP)) {
+		acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
+				    ACPI_UINT32_MAX,
+				    acpi_hwp_native_thermal_lvt_osc,
+				    NULL, NULL, NULL);
+		acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID,
+				 acpi_hwp_native_thermal_lvt_osc,
+				 NULL, NULL);
+	}
+}
+#endif
+
 /*
  * The following ACPI IDs are known to be suitable for representing as
  * processor devices.

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 0e85678..c068c82 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c

@@ -1019,6 +1019,9 @@
 		goto error1;
 	}
 
+	/* Set capability bits for _OSC under processor scope */
+	acpi_early_processor_osc();
+
 	/*
 	 * _OSC method may exist in module level code,
 	 * so it must be run after ACPI_FULL_INITIALIZATION

diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index a37508e..7c18847 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h

@@ -145,6 +145,12 @@
 static inline void acpi_early_processor_set_pdc(void) {}
 #endif
 
+#ifdef CONFIG_X86
+void acpi_early_processor_osc(void);
+#else
+static inline void acpi_early_processor_osc(void) {}
+#endif
+
 /* --------------------------------------------------------------------------
                                   Embedded Controller
    -------------------------------------------------------------------------- */

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 2aee416..f2fd3fe 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c

@@ -816,6 +816,7 @@
 			next = adev->node.next;
 			if (next == head) {
 				child = NULL;
+				adev = ACPI_COMPANION(dev);
 				goto nondev;
 			}
 			adev = list_entry(next, struct acpi_device, node);

diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index d02fd53..56241eb 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c

@@ -27,8 +27,20 @@
 
 #ifdef CONFIG_X86
 #define valid_IRQ(i) (((i) != 0) && ((i) != 2))
+static inline bool acpi_iospace_resource_valid(struct resource *res)
+{
+	/* On X86 IO space is limited to the [0 - 64K] IO port range */
+	return res->end < 0x10003;
+}
 #else
 #define valid_IRQ(i) (true)
+/*
+ * ACPI IO descriptors on arches other than X86 contain MMIO CPU physical
+ * addresses mapping IO space in CPU physical address space, IO space
+ * resources can be placed anywhere in the 64-bit physical address space.
+ */
+static inline bool
+acpi_iospace_resource_valid(struct resource *res) { return true; }
 #endif
 
 static bool acpi_dev_resource_len_valid(u64 start, u64 end, u64 len, bool io)
@@ -127,7 +139,7 @@
 	if (!acpi_dev_resource_len_valid(res->start, res->end, len, true))
 		res->flags |= IORESOURCE_DISABLED | IORESOURCE_UNSET;
 
-	if (res->end >= 0x10003)
+	if (!acpi_iospace_resource_valid(res))
 		res->flags |= IORESOURCE_DISABLED | IORESOURCE_UNSET;
 
 	if (io_decode == ACPI_DECODE_16)

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index fbfcce3..2a8b596 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c

@@ -748,6 +748,7 @@
 
 static void acpi_hibernation_leave(void)
 {
+	pm_set_resume_via_firmware();
 	/*
 	 * If ACPI is not enabled by the BIOS and the boot kernel, we need to
 	 * enable it here.

diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index f12a724..050673f 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c

@@ -692,7 +692,7 @@
 		mask = obj->integer.value;
 	else if (obj->type == ACPI_TYPE_BUFFER)
 		for (i = 0; i < obj->buffer.length && i < 8; i++)
-			mask |= (((u8)obj->buffer.pointer[i]) << (i * 8));
+			mask |= (((u64)obj->buffer.pointer[i]) << (i * 8));
 	ACPI_FREE(obj);
 
 	/*

diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c
index 87b8083..bdf28f7 100644
--- a/drivers/base/dma-coherent.c
+++ b/drivers/base/dma-coherent.c

@@ -2,6 +2,7 @@
  * Coherent per-device memory handling.
  * Borrowed from i386
  */
+#include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -31,7 +32,10 @@
 	if (!size)
 		goto out;
 
-	mem_base = ioremap(phys_addr, size);
+	if (flags & DMA_MEMORY_MAP)
+		mem_base = memremap(phys_addr, size, MEMREMAP_WC);
+	else
+		mem_base = ioremap(phys_addr, size);
 	if (!mem_base)
 		goto out;
 
@@ -54,8 +58,12 @@
 
 out:
 	kfree(dma_mem);
-	if (mem_base)
-		iounmap(mem_base);
+	if (mem_base) {
+		if (flags & DMA_MEMORY_MAP)
+			memunmap(mem_base);
+		else
+			iounmap(mem_base);
+	}
 	return false;
 }
 
@@ -63,7 +71,11 @@
 {
 	if (!mem)
 		return;
-	iounmap(mem->virt_base);
+
+	if (mem->flags & DMA_MEMORY_MAP)
+		memunmap(mem->virt_base);
+	else
+		iounmap(mem->virt_base);
 	kfree(mem->bitmap);
 	kfree(mem);
 }
@@ -175,7 +187,10 @@
 	 */
 	*dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
 	*ret = mem->virt_base + (pageno << PAGE_SHIFT);
-	memset(*ret, 0, size);
+	if (mem->flags & DMA_MEMORY_MAP)
+		memset(*ret, 0, size);
+	else
+		memset_io(*ret, 0, size);
 	spin_unlock_irqrestore(&mem->spinlock, flags);
 
 	return 1;

diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
index 272a52e..0e64a1b 100644
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c

@@ -137,6 +137,62 @@
 	return __pm_clk_add(dev, NULL, clk);
 }
 
+
+/**
+ * of_pm_clk_add_clks - Start using device clock(s) for power management.
+ * @dev: Device whose clock(s) is going to be used for power management.
+ *
+ * Add a series of clocks described in the 'clocks' device-tree node for
+ * a device to the list of clocks used for the power management of @dev.
+ * On success, returns the number of clocks added. Returns a negative
+ * error code if there are no clocks in the device node for the device
+ * or if adding a clock fails.
+ */
+int of_pm_clk_add_clks(struct device *dev)
+{
+	struct clk **clks;
+	unsigned int i, count;
+	int ret;
+
+	if (!dev || !dev->of_node)
+		return -EINVAL;
+
+	count = of_count_phandle_with_args(dev->of_node, "clocks",
+					   "#clock-cells");
+	if (count == 0)
+		return -ENODEV;
+
+	clks = kcalloc(count, sizeof(*clks), GFP_KERNEL);
+	if (!clks)
+		return -ENOMEM;
+
+	for (i = 0; i < count; i++) {
+		clks[i] = of_clk_get(dev->of_node, i);
+		if (IS_ERR(clks[i])) {
+			ret = PTR_ERR(clks[i]);
+			goto error;
+		}
+
+		ret = pm_clk_add_clk(dev, clks[i]);
+		if (ret) {
+			clk_put(clks[i]);
+			goto error;
+		}
+	}
+
+	kfree(clks);
+
+	return i;
+
+error:
+	while (i--)
+		pm_clk_remove_clk(dev, clks[i]);
+
+	kfree(clks);
+
+	return ret;
+}
+
 /**
  * __pm_clk_remove - Destroy PM clock entry.
  * @ce: PM clock entry to destroy.
@@ -198,6 +254,39 @@
 }
 
 /**
+ * pm_clk_remove_clk - Stop using a device clock for power management.
+ * @dev: Device whose clock should not be used for PM any more.
+ * @clk: Clock pointer
+ *
+ * Remove the clock pointed to by @clk from the list of clocks used for
+ * the power management of @dev.
+ */
+void pm_clk_remove_clk(struct device *dev, struct clk *clk)
+{
+	struct pm_subsys_data *psd = dev_to_psd(dev);
+	struct pm_clock_entry *ce;
+
+	if (!psd || !clk)
+		return;
+
+	spin_lock_irq(&psd->lock);
+
+	list_for_each_entry(ce, &psd->clock_list, node) {
+		if (clk == ce->clk)
+			goto remove;
+	}
+
+	spin_unlock_irq(&psd->lock);
+	return;
+
+ remove:
+	list_del(&ce->node);
+	spin_unlock_irq(&psd->lock);
+
+	__pm_clk_remove(ce);
+}
+
+/**
  * pm_clk_init - Initialize a device's list of power management clocks.
  * @dev: Device to initialize the list of PM clocks for.
  *

diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index dd73e1f..ec9d861 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c

@@ -397,7 +397,7 @@
 	WARN_ON(d->flags & DEVFL_UP);
 	blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
 	q->backing_dev_info.name = "aoe";
-	q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE;
+	q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_SIZE;
 	d->bufpool = mp;
 	d->blkq = gd->queue = q;
 	q->queuedata = d;

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index f7ecc28..51a071e 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c

@@ -374,7 +374,7 @@
 		       struct page *page, int rw)
 {
 	struct brd_device *brd = bdev->bd_disk->private_data;
-	int err = brd_do_bvec(brd, page, PAGE_CACHE_SIZE, 0, rw, sector);
+	int err = brd_do_bvec(brd, page, PAGE_SIZE, 0, rw, sector);
 	page_endio(page, rw & WRITE, err);
 	return err;
 }

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index c227fd4..7a1cf7e 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h

@@ -1327,8 +1327,8 @@
 #endif
 #endif
 
-/* BIO_MAX_SIZE is 256 * PAGE_CACHE_SIZE,
- * so for typical PAGE_CACHE_SIZE of 4k, that is (1<<20) Byte.
+/* BIO_MAX_SIZE is 256 * PAGE_SIZE,
+ * so for typical PAGE_SIZE of 4k, that is (1<<20) Byte.
  * Since we may live in a mixed-platform cluster,
  * we limit us to a platform agnostic constant here for now.
  * A followup commit may allow even bigger BIO sizes,

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 226eb0c..1fd1dcc 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c

@@ -1178,7 +1178,7 @@
 	blk_queue_max_hw_sectors(q, max_hw_sectors);
 	/* This is the workaround for "bio would need to, but cannot, be split" */
 	blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
-	blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
+	blk_queue_segment_boundary(q, PAGE_SIZE-1);
 
 	if (b) {
 		struct drbd_connection *connection = first_peer_device(device)->connection;

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index cc2e71d..25824c1 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c

@@ -2051,7 +2051,7 @@
 					 outbuf,
 					 taskout,
 					 DMA_TO_DEVICE);
-		if (outbuf_dma == 0) {
+		if (pci_dma_mapping_error(dd->pdev, outbuf_dma)) {
 			err = -ENOMEM;
 			goto abort;
 		}
@@ -2068,7 +2068,7 @@
 		inbuf_dma = pci_map_single(dd->pdev,
 					 inbuf,
 					 taskin, DMA_FROM_DEVICE);
-		if (inbuf_dma == 0) {
+		if (pci_dma_mapping_error(dd->pdev, inbuf_dma)) {
 			err = -ENOMEM;
 			goto abort;
 		}

diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 64a7b59..cab9759 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c

@@ -742,10 +742,11 @@
 
 	add_disk(disk);
 
+done:
 	mutex_lock(&lock);
 	list_add_tail(&nullb->list, &nullb_list);
 	mutex_unlock(&lock);
-done:
+
 	return 0;
 
 out_cleanup_lightnvm:

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 4a87678..94a1843 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c

@@ -1847,14 +1847,12 @@
 	if (osd_req->r_result < 0)
 		obj_request->result = osd_req->r_result;
 
-	rbd_assert(osd_req->r_num_ops <= CEPH_OSD_MAX_OP);
-
 	/*
 	 * We support a 64-bit length, but ultimately it has to be
 	 * passed to the block layer, which just supports a 32-bit
 	 * length field.
 	 */
-	obj_request->xferred = osd_req->r_reply_op_len[0];
+	obj_request->xferred = osd_req->r_ops[0].outdata_len;
 	rbd_assert(obj_request->xferred < (u64)UINT_MAX);
 
 	opcode = osd_req->r_ops[0].op;
@@ -1955,7 +1953,7 @@
 
 	osdc = &rbd_dev->rbd_client->client->osdc;
 	osd_req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false,
-					  GFP_ATOMIC);
+					  GFP_NOIO);
 	if (!osd_req)
 		return NULL;	/* ENOMEM */
 
@@ -2004,7 +2002,7 @@
 	rbd_dev = img_request->rbd_dev;
 	osdc = &rbd_dev->rbd_client->client->osdc;
 	osd_req = ceph_osdc_alloc_request(osdc, snapc, num_osd_ops,
-						false, GFP_ATOMIC);
+						false, GFP_NOIO);
 	if (!osd_req)
 		return NULL;	/* ENOMEM */
 
@@ -2506,7 +2504,7 @@
 					bio_chain_clone_range(&bio_list,
 								&bio_offset,
 								clone_size,
-								GFP_ATOMIC);
+								GFP_NOIO);
 			if (!obj_request->bio_list)
 				goto out_unwind;
 		} else if (type == OBJ_REQUEST_PAGES) {
@@ -5643,18 +5641,12 @@
 static int rbd_slab_init(void)
 {
 	rbd_assert(!rbd_img_request_cache);
-	rbd_img_request_cache = kmem_cache_create("rbd_img_request",
-					sizeof (struct rbd_img_request),
-					__alignof__(struct rbd_img_request),
-					0, NULL);
+	rbd_img_request_cache = KMEM_CACHE(rbd_img_request, 0);
 	if (!rbd_img_request_cache)
 		return -ENOMEM;
 
 	rbd_assert(!rbd_obj_request_cache);
-	rbd_obj_request_cache = kmem_cache_create("rbd_obj_request",
-					sizeof (struct rbd_obj_request),
-					__alignof__(struct rbd_obj_request),
-					0, NULL);
+	rbd_obj_request_cache = KMEM_CACHE(rbd_obj_request, 0);
 	if (!rbd_obj_request_cache)
 		goto out_err;
 

diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 096f0ce..4facc75 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c

@@ -1140,7 +1140,7 @@
 		   the timer.   So do so. */
 		pretimeout_since_last_heartbeat = 1;
 		if (atomic_inc_and_test(&preop_panic_excl))
-			panic(PFX "pre-timeout");
+			nmi_panic(regs, PFX "pre-timeout");
 	}
 
 	return NMI_HANDLED;

diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index d233688..f8a483c 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c

@@ -286,7 +286,7 @@
 	struct parport *port;
 	struct pardevice *pdev = NULL;
 	char *name;
-	struct pardev_cb ppdev_cb;
+	int fl;
 
 	name = kasprintf(GFP_KERNEL, CHRDEV "%x", minor);
 	if (name == NULL)
@@ -299,11 +299,9 @@
 		return -ENXIO;
 	}
 
-	memset(&ppdev_cb, 0, sizeof(ppdev_cb));
-	ppdev_cb.irq_func = pp_irq;
-	ppdev_cb.flags = (pp->flags & PP_EXCL) ? PARPORT_FLAG_EXCL : 0;
-	ppdev_cb.private = pp;
-	pdev = parport_register_dev_model(port, name, &ppdev_cb, minor);
+	fl = (pp->flags & PP_EXCL) ? PARPORT_FLAG_EXCL : 0;
+	pdev = parport_register_device(port, name, NULL,
+				       NULL, pp_irq, fl, pp);
 	parport_put_port(port);
 
 	if (!pdev) {
@@ -801,23 +799,10 @@
 	device_destroy(ppdev_class, MKDEV(PP_MAJOR, port->number));
 }
 
-static int pp_probe(struct pardevice *par_dev)
-{
-	struct device_driver *drv = par_dev->dev.driver;
-	int len = strlen(drv->name);
-
-	if (strncmp(par_dev->name, drv->name, len))
-		return -ENODEV;
-
-	return 0;
-}
-
 static struct parport_driver pp_driver = {
 	.name		= CHRDEV,
-	.probe		= pp_probe,
-	.match_port	= pp_attach,
+	.attach		= pp_attach,
 	.detach		= pp_detach,
-	.devmodel	= true,
 };
 
 static int __init ppdev_init(void)

diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index de707b2..16f7d33 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig

@@ -6,9 +6,6 @@
 config HAVE_CLK_PREPARE
 	bool
 
-config HAVE_MACH_CLKDEV
-	bool
-
 config COMMON_CLK
 	bool
 	select HAVE_CLK_PREPARE
@@ -99,6 +96,14 @@
 	  This driver supports Silicon Labs 570/571/598/599 programmable
 	  clock generators.
 
+config COMMON_CLK_CDCE706
+	tristate "Clock driver for TI CDCE706 clock synthesizer"
+	depends on I2C
+	select REGMAP_I2C
+	select RATIONAL
+	---help---
+	  This driver supports TI CDCE706 programmable 3-PLL clock synthesizer.
+
 config COMMON_CLK_CDCE925
 	tristate "Clock driver for TI CDCE925 devices"
 	depends on I2C
@@ -190,15 +195,7 @@
 config COMMON_CLK_PXA
 	def_bool COMMON_CLK && ARCH_PXA
 	---help---
-	  Sypport for the Marvell PXA SoC.
-
-config COMMON_CLK_CDCE706
-	tristate "Clock driver for TI CDCE706 clock synthesizer"
-	depends on I2C
-	select REGMAP_I2C
-	select RATIONAL
-	---help---
-	  This driver supports TI CDCE706 programmable 3-PLL clock synthesizer.
+	  Support for the Marvell PXA SoC.
 
 source "drivers/clk/bcm/Kconfig"
 source "drivers/clk/hisilicon/Kconfig"
@@ -206,5 +203,6 @@
 source "drivers/clk/qcom/Kconfig"
 source "drivers/clk/samsung/Kconfig"
 source "drivers/clk/tegra/Kconfig"
+source "drivers/clk/ti/Kconfig"
 
 endmenu

diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index bae4be6..46869d6 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile

@@ -70,15 +70,14 @@
 obj-$(CONFIG_COMMON_CLK_QCOM)		+= qcom/
 obj-$(CONFIG_ARCH_ROCKCHIP)		+= rockchip/
 obj-$(CONFIG_COMMON_CLK_SAMSUNG)	+= samsung/
-obj-$(CONFIG_ARCH_SHMOBILE_MULTI)	+= shmobile/
-obj-$(CONFIG_ARCH_RENESAS)		+= shmobile/
+obj-$(CONFIG_ARCH_RENESAS)		+= renesas/
 obj-$(CONFIG_ARCH_SIRF)			+= sirf/
 obj-$(CONFIG_ARCH_SOCFPGA)		+= socfpga/
 obj-$(CONFIG_PLAT_SPEAR)		+= spear/
 obj-$(CONFIG_ARCH_STI)			+= st/
 obj-$(CONFIG_ARCH_SUNXI)		+= sunxi/
 obj-$(CONFIG_ARCH_TEGRA)		+= tegra/
-obj-$(CONFIG_ARCH_OMAP2PLUS)		+= ti/
+obj-y					+= ti/
 obj-$(CONFIG_ARCH_U8500)		+= ux500/
 obj-$(CONFIG_COMMON_CLK_VERSATILE)	+= versatile/
 obj-$(CONFIG_X86)			+= x86/

diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c
index 4ad3298..e1aa210 100644
--- a/drivers/clk/at91/clk-generated.c
+++ b/drivers/clk/at91/clk-generated.c

@@ -273,14 +273,14 @@
 	u32 id;
 	const char *name;
 	struct clk *clk;
-	int num_parents;
+	unsigned int num_parents;
 	const char *parent_names[GENERATED_SOURCE_MAX];
 	struct device_node *gcknp;
 	struct clk_range range = CLK_RANGE(0, 0);
 	struct regmap *regmap;
 
 	num_parents = of_clk_get_parent_count(np);
-	if (num_parents <= 0 || num_parents > GENERATED_SOURCE_MAX)
+	if (num_parents == 0 || num_parents > GENERATED_SOURCE_MAX)
 		return;
 
 	of_clk_parent_fill(np, parent_names, num_parents);

diff --git a/drivers/clk/at91/clk-main.c b/drivers/clk/at91/clk-main.c
index 4bfc94d..58b5bac 100644
--- a/drivers/clk/at91/clk-main.c
+++ b/drivers/clk/at91/clk-main.c

@@ -291,7 +291,7 @@
 	init.ops = &main_rc_osc_ops;
 	init.parent_names = NULL;
 	init.num_parents = 0;
-	init.flags = CLK_IS_ROOT | CLK_IGNORE_UNUSED;
+	init.flags = CLK_IGNORE_UNUSED;
 
 	osc->hw.init = &init;
 	osc->regmap = regmap;
@@ -572,12 +572,12 @@
 {
 	struct clk *clk;
 	const char *parent_names[2];
-	int num_parents;
+	unsigned int num_parents;
 	const char *name = np->name;
 	struct regmap *regmap;
 
 	num_parents = of_clk_get_parent_count(np);
-	if (num_parents <= 0 || num_parents > 2)
+	if (num_parents == 0 || num_parents > 2)
 		return;
 
 	of_clk_parent_fill(np, parent_names, num_parents);

diff --git a/drivers/clk/at91/clk-master.c b/drivers/clk/at91/clk-master.c
index 7d4a186..d1021e1 100644
--- a/drivers/clk/at91/clk-master.c
+++ b/drivers/clk/at91/clk-master.c

@@ -199,14 +199,14 @@
 			 const struct clk_master_layout *layout)
 {
 	struct clk *clk;
-	int num_parents;
+	unsigned int num_parents;
 	const char *parent_names[MASTER_SOURCE_MAX];
 	const char *name = np->name;
 	struct clk_master_characteristics *characteristics;
 	struct regmap *regmap;
 
 	num_parents = of_clk_get_parent_count(np);
-	if (num_parents <= 0 || num_parents > MASTER_SOURCE_MAX)
+	if (num_parents == 0 || num_parents > MASTER_SOURCE_MAX)
 		return;
 
 	of_clk_parent_fill(np, parent_names, num_parents);

diff --git a/drivers/clk/at91/clk-programmable.c b/drivers/clk/at91/clk-programmable.c
index bc0be62..10f846c 100644
--- a/drivers/clk/at91/clk-programmable.c
+++ b/drivers/clk/at91/clk-programmable.c

@@ -230,14 +230,14 @@
 	int num;
 	u32 id;
 	struct clk *clk;
-	int num_parents;
+	unsigned int num_parents;
 	const char *parent_names[PROG_SOURCE_MAX];
 	const char *name;
 	struct device_node *progclknp;
 	struct regmap *regmap;
 
 	num_parents = of_clk_get_parent_count(np);
-	if (num_parents <= 0 || num_parents > PROG_SOURCE_MAX)
+	if (num_parents == 0 || num_parents > PROG_SOURCE_MAX)
 		return;
 
 	of_clk_parent_fill(np, parent_names, num_parents);

diff --git a/drivers/clk/at91/clk-slow.c b/drivers/clk/at91/clk-slow.c
index 911e941..61090b1 100644
--- a/drivers/clk/at91/clk-slow.c
+++ b/drivers/clk/at91/clk-slow.c

@@ -245,7 +245,7 @@
 	init.ops = &slow_rc_osc_ops;
 	init.parent_names = NULL;
 	init.num_parents = 0;
-	init.flags = CLK_IS_ROOT | CLK_IGNORE_UNUSED;
+	init.flags = CLK_IGNORE_UNUSED;
 
 	osc->hw.init = &init;
 	osc->sckcr = sckcr;
@@ -360,11 +360,11 @@
 {
 	struct clk *clk;
 	const char *parent_names[2];
-	int num_parents;
+	unsigned int num_parents;
 	const char *name = np->name;
 
 	num_parents = of_clk_get_parent_count(np);
-	if (num_parents <= 0 || num_parents > 2)
+	if (num_parents == 0 || num_parents > 2)
 		return;
 
 	of_clk_parent_fill(np, parent_names, num_parents);
@@ -433,7 +433,7 @@
 {
 	struct clk *clk;
 	const char *parent_names[2];
-	int num_parents;
+	unsigned int num_parents;
 	const char *name = np->name;
 	struct regmap *regmap;
 

diff --git a/drivers/clk/at91/clk-smd.c b/drivers/clk/at91/clk-smd.c
index e6948a5..3c04b06 100644
--- a/drivers/clk/at91/clk-smd.c
+++ b/drivers/clk/at91/clk-smd.c

@@ -142,13 +142,13 @@
 static void __init of_at91sam9x5_clk_smd_setup(struct device_node *np)
 {
 	struct clk *clk;
-	int num_parents;
+	unsigned int num_parents;
 	const char *parent_names[SMD_SOURCE_MAX];
 	const char *name = np->name;
 	struct regmap *regmap;
 
 	num_parents = of_clk_get_parent_count(np);
-	if (num_parents <= 0 || num_parents > SMD_SOURCE_MAX)
+	if (num_parents == 0 || num_parents > SMD_SOURCE_MAX)
 		return;
 
 	of_clk_parent_fill(np, parent_names, num_parents);

diff --git a/drivers/clk/at91/clk-usb.c b/drivers/clk/at91/clk-usb.c
index 650ca45..d80bdb0 100644
--- a/drivers/clk/at91/clk-usb.c
+++ b/drivers/clk/at91/clk-usb.c

@@ -366,13 +366,13 @@
 static void __init of_at91sam9x5_clk_usb_setup(struct device_node *np)
 {
 	struct clk *clk;
-	int num_parents;
+	unsigned int num_parents;
 	const char *parent_names[USB_SOURCE_MAX];
 	const char *name = np->name;
 	struct regmap *regmap;
 
 	num_parents = of_clk_get_parent_count(np);
-	if (num_parents <= 0 || num_parents > USB_SOURCE_MAX)
+	if (num_parents == 0 || num_parents > USB_SOURCE_MAX)
 		return;
 
 	of_clk_parent_fill(np, parent_names, num_parents);

diff --git a/drivers/clk/bcm/clk-bcm2835-aux.c b/drivers/clk/bcm/clk-bcm2835-aux.c
index e4f89e2..3a177ad 100644
--- a/drivers/clk/bcm/clk-bcm2835-aux.c
+++ b/drivers/clk/bcm/clk-bcm2835-aux.c

@@ -38,8 +38,8 @@
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	reg = devm_ioremap_resource(dev, res);
-	if (!reg)
-		return -ENODEV;
+	if (IS_ERR(reg))
+		return PTR_ERR(reg);
 
 	onecell = devm_kmalloc(dev, sizeof(*onecell), GFP_KERNEL);
 	if (!onecell)

diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c
index 015e687..c74ed3f 100644
--- a/drivers/clk/bcm/clk-bcm2835.c
+++ b/drivers/clk/bcm/clk-bcm2835.c

@@ -88,10 +88,23 @@
 #define CM_HSMDIV		0x08c
 #define CM_OTPCTL		0x090
 #define CM_OTPDIV		0x094
+#define CM_PCMCTL		0x098
+#define CM_PCMDIV		0x09c
 #define CM_PWMCTL		0x0a0
 #define CM_PWMDIV		0x0a4
+#define CM_SLIMCTL		0x0a8
+#define CM_SLIMDIV		0x0ac
 #define CM_SMICTL		0x0b0
 #define CM_SMIDIV		0x0b4
+/* no definition for 0x0b8  and 0x0bc */
+#define CM_TCNTCTL		0x0c0
+#define CM_TCNTDIV		0x0c4
+#define CM_TECCTL		0x0c8
+#define CM_TECDIV		0x0cc
+#define CM_TD0CTL		0x0d0
+#define CM_TD0DIV		0x0d4
+#define CM_TD1CTL		0x0d8
+#define CM_TD1DIV		0x0dc
 #define CM_TSENSCTL		0x0e0
 #define CM_TSENSDIV		0x0e4
 #define CM_TIMERCTL		0x0e8
@@ -311,21 +324,18 @@
 	struct clk *clk;
 	int ret;
 
-	clk = clk_register_fixed_rate(NULL, "apb_pclk", NULL, CLK_IS_ROOT,
-					126000000);
+	clk = clk_register_fixed_rate(NULL, "apb_pclk", NULL, 0, 126000000);
 	if (IS_ERR(clk))
 		pr_err("apb_pclk not registered\n");
 
-	clk = clk_register_fixed_rate(NULL, "uart0_pclk", NULL, CLK_IS_ROOT,
-					3000000);
+	clk = clk_register_fixed_rate(NULL, "uart0_pclk", NULL, 0, 3000000);
 	if (IS_ERR(clk))
 		pr_err("uart0_pclk not registered\n");
 	ret = clk_register_clkdev(clk, NULL, "20201000.uart");
 	if (ret)
 		pr_err("uart0_pclk alias not registered\n");
 
-	clk = clk_register_fixed_rate(NULL, "uart1_pclk", NULL, CLK_IS_ROOT,
-					125000000);
+	clk = clk_register_fixed_rate(NULL, "uart1_pclk", NULL, 0, 125000000);
 	if (IS_ERR(clk))
 		pr_err("uart1_pclk not registered\n");
 	ret = clk_register_clkdev(clk, NULL, "20215000.uart");
@@ -1060,16 +1070,7 @@
 static unsigned long bcm2835_pll_divider_get_rate(struct clk_hw *hw,
 						  unsigned long parent_rate)
 {
-	struct bcm2835_pll_divider *divider = bcm2835_pll_divider_from_hw(hw);
-	struct bcm2835_cprman *cprman = divider->cprman;
-	const struct bcm2835_pll_divider_data *data = divider->data;
-	u32 div = cprman_read(cprman, data->a2w_reg);
-
-	div &= (1 << A2W_PLL_DIV_BITS) - 1;
-	if (div == 0)
-		div = 256;
-
-	return parent_rate / div;
+	return clk_divider_ops.recalc_rate(hw, parent_rate);
 }
 
 static void bcm2835_pll_divider_off(struct clk_hw *hw)
@@ -1107,13 +1108,15 @@
 	struct bcm2835_pll_divider *divider = bcm2835_pll_divider_from_hw(hw);
 	struct bcm2835_cprman *cprman = divider->cprman;
 	const struct bcm2835_pll_divider_data *data = divider->data;
-	u32 cm;
-	int ret;
+	u32 cm, div, max_div = 1 << A2W_PLL_DIV_BITS;
 
-	ret = clk_divider_ops.set_rate(hw, rate, parent_rate);
-	if (ret)
-		return ret;
+	div = DIV_ROUND_UP_ULL(parent_rate, rate);
 
+	div = min(div, max_div);
+	if (div == max_div)
+		div = 0;
+
+	cprman_write(cprman, data->a2w_reg, div);
 	cm = cprman_read(cprman, data->cm_reg);
 	cprman_write(cprman, data->cm_reg, cm | data->load_mask);
 	cprman_write(cprman, data->cm_reg, cm & ~data->load_mask);
@@ -1428,7 +1431,7 @@
 	divider->div.reg = cprman->regs + data->a2w_reg;
 	divider->div.shift = A2W_PLL_DIV_SHIFT;
 	divider->div.width = A2W_PLL_DIV_BITS;
-	divider->div.flags = 0;
+	divider->div.flags = CLK_DIVIDER_MAX_AT_ZERO;
 	divider->div.lock = &cprman->regs_lock;
 	divider->div.hw.init = &init;
 	divider->div.table = NULL;

diff --git a/drivers/clk/bcm/clk-cygnus.c b/drivers/clk/bcm/clk-cygnus.c
index 3a228b6..464fdc4 100644
--- a/drivers/clk/bcm/clk-cygnus.c
+++ b/drivers/clk/bcm/clk-cygnus.c

@@ -268,3 +268,62 @@
 	iproc_asiu_setup(node, asiu_div, asiu_gate, ARRAY_SIZE(asiu_div));
 }
 CLK_OF_DECLARE(cygnus_asiu_clk, "brcm,cygnus-asiu-clk", cygnus_asiu_init);
+
+/*
+ * AUDIO PLL VCO frequency parameter table
+ *
+ * PLL output frequency = ((ndiv_int + ndiv_frac / 2^20) *
+ * (parent clock rate / pdiv)
+ *
+ * On Cygnus, parent is the 25MHz oscillator
+ */
+static const struct iproc_pll_vco_param audiopll_vco_params[] = {
+	/* rate (Hz) ndiv_int ndiv_frac pdiv */
+	{ 1354750204UL,  54,     199238,   1 },
+	{ 1769470191UL,  70,     816639,   1 },
+};
+
+static const struct iproc_pll_ctrl audiopll = {
+	.flags = IPROC_CLK_PLL_NEEDS_SW_CFG | IPROC_CLK_PLL_HAS_NDIV_FRAC |
+		IPROC_CLK_PLL_USER_MODE_ON | IPROC_CLK_PLL_RESET_ACTIVE_LOW,
+	.reset = RESET_VAL(0x5c, 0, 1),
+	.dig_filter = DF_VAL(0x48, 0, 3, 6, 4, 3, 3),
+	.sw_ctrl = SW_CTRL_VAL(0x4, 0),
+	.ndiv_int = REG_VAL(0x8, 0, 10),
+	.ndiv_frac = REG_VAL(0x8, 10, 20),
+	.pdiv = REG_VAL(0x44, 0, 4),
+	.vco_ctrl = VCO_CTRL_VAL(0x0c, 0x10),
+	.status = REG_VAL(0x54, 0, 1),
+	.macro_mode = REG_VAL(0x0, 0, 3),
+};
+
+static const struct iproc_clk_ctrl audiopll_clk[] = {
+	[BCM_CYGNUS_AUDIOPLL_CH0] = {
+		.channel = BCM_CYGNUS_AUDIOPLL_CH0,
+		.flags = IPROC_CLK_AON |
+				IPROC_CLK_MCLK_DIV_BY_2,
+		.enable = ENABLE_VAL(0x14, 8, 10, 9),
+		.mdiv = REG_VAL(0x14, 0, 8),
+	},
+	[BCM_CYGNUS_AUDIOPLL_CH1] = {
+		.channel = BCM_CYGNUS_AUDIOPLL_CH1,
+		.flags = IPROC_CLK_AON,
+		.enable = ENABLE_VAL(0x18, 8, 10, 9),
+		.mdiv = REG_VAL(0x18, 0, 8),
+	},
+	[BCM_CYGNUS_AUDIOPLL_CH2] = {
+		.channel = BCM_CYGNUS_AUDIOPLL_CH2,
+		.flags = IPROC_CLK_AON,
+		.enable = ENABLE_VAL(0x1c, 8, 10, 9),
+		.mdiv = REG_VAL(0x1c, 0, 8),
+	},
+};
+
+static void __init cygnus_audiopll_clk_init(struct device_node *node)
+{
+	iproc_pll_clk_setup(node, &audiopll, audiopll_vco_params,
+			    ARRAY_SIZE(audiopll_vco_params), audiopll_clk,
+			    ARRAY_SIZE(audiopll_clk));
+}
+CLK_OF_DECLARE(cygnus_audiopll, "brcm,cygnus-audiopll",
+			cygnus_audiopll_clk_init);

diff --git a/drivers/clk/bcm/clk-iproc-pll.c b/drivers/clk/bcm/clk-iproc-pll.c
index afd5891..fd492a5 100644
--- a/drivers/clk/bcm/clk-iproc-pll.c
+++ b/drivers/clk/bcm/clk-iproc-pll.c

@@ -25,6 +25,12 @@
 #define PLL_VCO_HIGH_SHIFT 19
 #define PLL_VCO_LOW_SHIFT  30
 
+/*
+ * PLL MACRO_SELECT modes 0 to 5 choose pre-calculated PLL output frequencies
+ * from a look-up table. Mode 7 allows user to manipulate PLL clock dividers
+ */
+#define PLL_USER_MODE 7
+
 /* number of delay loops waiting for PLL to lock */
 #define LOCK_DELAY 100
 
@@ -215,7 +221,10 @@
 	const struct iproc_pll_reset_ctrl *reset = &ctrl->reset;
 
 	val = readl(pll->control_base + reset->offset);
-	val &= ~(1 << reset->reset_shift | 1 << reset->p_reset_shift);
+	if (ctrl->flags & IPROC_CLK_PLL_RESET_ACTIVE_LOW)
+		val |= BIT(reset->reset_shift) | BIT(reset->p_reset_shift);
+	else
+		val &= ~(BIT(reset->reset_shift) | BIT(reset->p_reset_shift));
 	iproc_pll_write(pll, pll->control_base, reset->offset, val);
 }
 
@@ -236,7 +245,10 @@
 	iproc_pll_write(pll, pll->control_base, dig_filter->offset, val);
 
 	val = readl(pll->control_base + reset->offset);
-	val |= 1 << reset->reset_shift | 1 << reset->p_reset_shift;
+	if (ctrl->flags & IPROC_CLK_PLL_RESET_ACTIVE_LOW)
+		val &= ~(BIT(reset->reset_shift) | BIT(reset->p_reset_shift));
+	else
+		val |= BIT(reset->reset_shift) | BIT(reset->p_reset_shift);
 	iproc_pll_write(pll, pll->control_base, reset->offset, val);
 }
 
@@ -292,6 +304,16 @@
 	/* put PLL in reset */
 	__pll_put_in_reset(pll);
 
+	/* set PLL in user mode before modifying PLL controls */
+	if (ctrl->flags & IPROC_CLK_PLL_USER_MODE_ON) {
+		val = readl(pll->control_base + ctrl->macro_mode.offset);
+		val &= ~(bit_mask(ctrl->macro_mode.width) <<
+			ctrl->macro_mode.shift);
+		val |= PLL_USER_MODE << ctrl->macro_mode.shift;
+		iproc_pll_write(pll, pll->control_base,
+			ctrl->macro_mode.offset, val);
+	}
+
 	iproc_pll_write(pll, pll->control_base, ctrl->vco_ctrl.u_offset, 0);
 
 	val = readl(pll->control_base + ctrl->vco_ctrl.l_offset);
@@ -505,7 +527,10 @@
 	if (mdiv == 0)
 		mdiv = 256;
 
-	clk->rate = parent_rate / mdiv;
+	if (ctrl->flags & IPROC_CLK_MCLK_DIV_BY_2)
+		clk->rate = parent_rate / (mdiv * 2);
+	else
+		clk->rate = parent_rate / mdiv;
 
 	return clk->rate;
 }
@@ -543,7 +568,10 @@
 	if (rate == 0 || parent_rate == 0)
 		return -EINVAL;
 
-	div = DIV_ROUND_UP(parent_rate, rate);
+	if (ctrl->flags & IPROC_CLK_MCLK_DIV_BY_2)
+		div = DIV_ROUND_UP(parent_rate, rate * 2);
+	else
+		div = DIV_ROUND_UP(parent_rate, rate);
 	if (div > 256)
 		return -EINVAL;
 
@@ -555,7 +583,10 @@
 		val |= div << ctrl->mdiv.shift;
 	}
 	iproc_pll_write(pll, pll->control_base, ctrl->mdiv.offset, val);
-	clk->rate = parent_rate / div;
+	if (ctrl->flags & IPROC_CLK_MCLK_DIV_BY_2)
+		clk->rate = parent_rate / (div * 2);
+	else
+		clk->rate = parent_rate / div;
 
 	return 0;
 }

diff --git a/drivers/clk/bcm/clk-iproc.h b/drivers/clk/bcm/clk-iproc.h
index 8988de7..2148b4e 100644
--- a/drivers/clk/bcm/clk-iproc.h
+++ b/drivers/clk/bcm/clk-iproc.h

@@ -61,6 +61,26 @@
 #define IPROC_CLK_PLL_SPLIT_STAT_CTRL BIT(6)
 
 /*
+ * Some PLLs have an additional divide by 2 in master clock calculation;
+ * MCLK = VCO_freq / (Mdiv * 2). Identify this to let the driver know
+ * of modified calculations
+ */
+#define IPROC_CLK_MCLK_DIV_BY_2 BIT(7)
+
+/*
+ * Some PLLs provide a look up table for the leaf clock frequencies and
+ * auto calculates VCO frequency parameters based on the provided leaf
+ * clock frequencies. They have a user mode that allows the divider
+ * controls to be determined by the user
+ */
+#define IPROC_CLK_PLL_USER_MODE_ON BIT(8)
+
+/*
+ * Some PLLs have an active low reset
+ */
+#define IPROC_CLK_PLL_RESET_ACTIVE_LOW BIT(9)
+
+/*
  * Parameters for VCO frequency configuration
  *
  * VCO frequency =
@@ -149,6 +169,7 @@
 	struct iproc_clk_reg_op pdiv;
 	struct iproc_pll_vco_ctrl vco_ctrl;
 	struct iproc_clk_reg_op status;
+	struct iproc_clk_reg_op macro_mode;
 };
 
 /*
@@ -183,16 +204,16 @@
 	unsigned int low_width;
 };
 
-void __init iproc_armpll_setup(struct device_node *node);
-void __init iproc_pll_clk_setup(struct device_node *node,
-				const struct iproc_pll_ctrl *pll_ctrl,
-				const struct iproc_pll_vco_param *vco,
-				unsigned int num_vco_entries,
-				const struct iproc_clk_ctrl *clk_ctrl,
-				unsigned int num_clks);
-void __init iproc_asiu_setup(struct device_node *node,
-			     const struct iproc_asiu_div *div,
-			     const struct iproc_asiu_gate *gate,
-			     unsigned int num_clks);
+void iproc_armpll_setup(struct device_node *node);
+void iproc_pll_clk_setup(struct device_node *node,
+			 const struct iproc_pll_ctrl *pll_ctrl,
+			 const struct iproc_pll_vco_param *vco,
+			 unsigned int num_vco_entries,
+			 const struct iproc_clk_ctrl *clk_ctrl,
+			 unsigned int num_clks);
+void iproc_asiu_setup(struct device_node *node,
+		      const struct iproc_asiu_div *div,
+		      const struct iproc_asiu_gate *gate,
+		      unsigned int num_clks);
 
 #endif /* _CLK_IPROC_H */

diff --git a/drivers/clk/clk-axi-clkgen.c b/drivers/clk/clk-axi-clkgen.c
index 3bcd42f..3294db3 100644
--- a/drivers/clk/clk-axi-clkgen.c
+++ b/drivers/clk/clk-axi-clkgen.c

@@ -16,19 +16,8 @@
 #include <linux/module.h>
 #include <linux/err.h>
 
-#define AXI_CLKGEN_V1_REG_UPDATE_ENABLE	0x04
-#define AXI_CLKGEN_V1_REG_CLK_OUT1	0x08
-#define AXI_CLKGEN_V1_REG_CLK_OUT2	0x0c
-#define AXI_CLKGEN_V1_REG_CLK_DIV	0x10
-#define AXI_CLKGEN_V1_REG_CLK_FB1	0x14
-#define AXI_CLKGEN_V1_REG_CLK_FB2	0x18
-#define AXI_CLKGEN_V1_REG_LOCK1		0x1c
-#define AXI_CLKGEN_V1_REG_LOCK2		0x20
-#define AXI_CLKGEN_V1_REG_LOCK3		0x24
-#define AXI_CLKGEN_V1_REG_FILTER1	0x28
-#define AXI_CLKGEN_V1_REG_FILTER2	0x2c
-
 #define AXI_CLKGEN_V2_REG_RESET		0x40
+#define AXI_CLKGEN_V2_REG_CLKSEL	0x44
 #define AXI_CLKGEN_V2_REG_DRP_CNTRL	0x70
 #define AXI_CLKGEN_V2_REG_DRP_STATUS	0x74
 
@@ -51,40 +40,11 @@
 #define MMCM_REG_FILTER1	0x4e
 #define MMCM_REG_FILTER2	0x4f
 
-struct axi_clkgen;
-
-struct axi_clkgen_mmcm_ops {
-	void (*enable)(struct axi_clkgen *axi_clkgen, bool enable);
-	int (*write)(struct axi_clkgen *axi_clkgen, unsigned int reg,
-		     unsigned int val, unsigned int mask);
-	int (*read)(struct axi_clkgen *axi_clkgen, unsigned int reg,
-		    unsigned int *val);
-};
-
 struct axi_clkgen {
 	void __iomem *base;
-	const struct axi_clkgen_mmcm_ops *mmcm_ops;
 	struct clk_hw clk_hw;
 };
 
-static void axi_clkgen_mmcm_enable(struct axi_clkgen *axi_clkgen,
-	bool enable)
-{
-	axi_clkgen->mmcm_ops->enable(axi_clkgen, enable);
-}
-
-static int axi_clkgen_mmcm_write(struct axi_clkgen *axi_clkgen,
-	unsigned int reg, unsigned int val, unsigned int mask)
-{
-	return axi_clkgen->mmcm_ops->write(axi_clkgen, reg, val, mask);
-}
-
-static int axi_clkgen_mmcm_read(struct axi_clkgen *axi_clkgen,
-	unsigned int reg, unsigned int *val)
-{
-	return axi_clkgen->mmcm_ops->read(axi_clkgen, reg, val);
-}
-
 static uint32_t axi_clkgen_lookup_filter(unsigned int m)
 {
 	switch (m) {
@@ -207,70 +167,6 @@
 	*val = readl(axi_clkgen->base + reg);
 }
 
-static unsigned int axi_clkgen_v1_map_mmcm_reg(unsigned int reg)
-{
-	switch (reg) {
-	case MMCM_REG_CLKOUT0_1:
-		return AXI_CLKGEN_V1_REG_CLK_OUT1;
-	case MMCM_REG_CLKOUT0_2:
-		return AXI_CLKGEN_V1_REG_CLK_OUT2;
-	case MMCM_REG_CLK_FB1:
-		return AXI_CLKGEN_V1_REG_CLK_FB1;
-	case MMCM_REG_CLK_FB2:
-		return AXI_CLKGEN_V1_REG_CLK_FB2;
-	case MMCM_REG_CLK_DIV:
-		return AXI_CLKGEN_V1_REG_CLK_DIV;
-	case MMCM_REG_LOCK1:
-		return AXI_CLKGEN_V1_REG_LOCK1;
-	case MMCM_REG_LOCK2:
-		return AXI_CLKGEN_V1_REG_LOCK2;
-	case MMCM_REG_LOCK3:
-		return AXI_CLKGEN_V1_REG_LOCK3;
-	case MMCM_REG_FILTER1:
-		return AXI_CLKGEN_V1_REG_FILTER1;
-	case MMCM_REG_FILTER2:
-		return AXI_CLKGEN_V1_REG_FILTER2;
-	default:
-		return 0;
-	}
-}
-
-static int axi_clkgen_v1_mmcm_write(struct axi_clkgen *axi_clkgen,
-	unsigned int reg, unsigned int val, unsigned int mask)
-{
-	reg = axi_clkgen_v1_map_mmcm_reg(reg);
-	if (reg == 0)
-		return -EINVAL;
-
-	axi_clkgen_write(axi_clkgen, reg, val);
-
-	return 0;
-}
-
-static int axi_clkgen_v1_mmcm_read(struct axi_clkgen *axi_clkgen,
-	unsigned int reg, unsigned int *val)
-{
-	reg = axi_clkgen_v1_map_mmcm_reg(reg);
-	if (reg == 0)
-		return -EINVAL;
-
-	axi_clkgen_read(axi_clkgen, reg, val);
-
-	return 0;
-}
-
-static void axi_clkgen_v1_mmcm_enable(struct axi_clkgen *axi_clkgen,
-	bool enable)
-{
-	axi_clkgen_write(axi_clkgen, AXI_CLKGEN_V1_REG_UPDATE_ENABLE, enable);
-}
-
-static const struct axi_clkgen_mmcm_ops axi_clkgen_v1_mmcm_ops = {
-	.write = axi_clkgen_v1_mmcm_write,
-	.read = axi_clkgen_v1_mmcm_read,
-	.enable = axi_clkgen_v1_mmcm_enable,
-};
-
 static int axi_clkgen_wait_non_busy(struct axi_clkgen *axi_clkgen)
 {
 	unsigned int timeout = 10000;
@@ -286,7 +182,7 @@
 	return val & 0xffff;
 }
 
-static int axi_clkgen_v2_mmcm_read(struct axi_clkgen *axi_clkgen,
+static int axi_clkgen_mmcm_read(struct axi_clkgen *axi_clkgen,
 	unsigned int reg, unsigned int *val)
 {
 	unsigned int reg_val;
@@ -310,7 +206,7 @@
 	return 0;
 }
 
-static int axi_clkgen_v2_mmcm_write(struct axi_clkgen *axi_clkgen,
+static int axi_clkgen_mmcm_write(struct axi_clkgen *axi_clkgen,
 	unsigned int reg, unsigned int val, unsigned int mask)
 {
 	unsigned int reg_val = 0;
@@ -321,7 +217,7 @@
 		return ret;
 
 	if (mask != 0xffff) {
-		axi_clkgen_v2_mmcm_read(axi_clkgen, reg, &reg_val);
+		axi_clkgen_mmcm_read(axi_clkgen, reg, &reg_val);
 		reg_val &= ~mask;
 	}
 
@@ -332,7 +228,7 @@
 	return 0;
 }
 
-static void axi_clkgen_v2_mmcm_enable(struct axi_clkgen *axi_clkgen,
+static void axi_clkgen_mmcm_enable(struct axi_clkgen *axi_clkgen,
 	bool enable)
 {
 	unsigned int val = AXI_CLKGEN_V2_RESET_ENABLE;
@@ -343,12 +239,6 @@
 	axi_clkgen_write(axi_clkgen, AXI_CLKGEN_V2_REG_RESET, val);
 }
 
-static const struct axi_clkgen_mmcm_ops axi_clkgen_v2_mmcm_ops = {
-	.write = axi_clkgen_v2_mmcm_write,
-	.read = axi_clkgen_v2_mmcm_read,
-	.enable = axi_clkgen_v2_mmcm_enable,
-};
-
 static struct axi_clkgen *clk_hw_to_axi_clkgen(struct clk_hw *clk_hw)
 {
 	return container_of(clk_hw, struct axi_clkgen, clk_hw);
@@ -438,10 +328,7 @@
 	tmp = (unsigned long long)(parent_rate / d) * m;
 	do_div(tmp, dout);
 
-	if (tmp > ULONG_MAX)
-		return ULONG_MAX;
-
-	return tmp;
+	return min_t(unsigned long long, tmp, ULONG_MAX);
 }
 
 static int axi_clkgen_enable(struct clk_hw *clk_hw)
@@ -460,21 +347,38 @@
 	axi_clkgen_mmcm_enable(axi_clkgen, false);
 }
 
+static int axi_clkgen_set_parent(struct clk_hw *clk_hw, u8 index)
+{
+	struct axi_clkgen *axi_clkgen = clk_hw_to_axi_clkgen(clk_hw);
+
+	axi_clkgen_write(axi_clkgen, AXI_CLKGEN_V2_REG_CLKSEL, index);
+
+	return 0;
+}
+
+static u8 axi_clkgen_get_parent(struct clk_hw *clk_hw)
+{
+	struct axi_clkgen *axi_clkgen = clk_hw_to_axi_clkgen(clk_hw);
+	unsigned int parent;
+
+	axi_clkgen_read(axi_clkgen, AXI_CLKGEN_V2_REG_CLKSEL, &parent);
+
+	return parent;
+}
+
 static const struct clk_ops axi_clkgen_ops = {
 	.recalc_rate = axi_clkgen_recalc_rate,
 	.round_rate = axi_clkgen_round_rate,
 	.set_rate = axi_clkgen_set_rate,
 	.enable = axi_clkgen_enable,
 	.disable = axi_clkgen_disable,
+	.set_parent = axi_clkgen_set_parent,
+	.get_parent = axi_clkgen_get_parent,
 };
 
 static const struct of_device_id axi_clkgen_ids[] = {
 	{
-		.compatible = "adi,axi-clkgen-1.00.a",
-		.data = &axi_clkgen_v1_mmcm_ops
-	}, {
 		.compatible = "adi,axi-clkgen-2.00.a",
-		.data = &axi_clkgen_v2_mmcm_ops,
 	},
 	{ },
 };
@@ -485,10 +389,11 @@
 	const struct of_device_id *id;
 	struct axi_clkgen *axi_clkgen;
 	struct clk_init_data init;
-	const char *parent_name;
+	const char *parent_names[2];
 	const char *clk_name;
 	struct resource *mem;
 	struct clk *clk;
+	unsigned int i;
 
 	if (!pdev->dev.of_node)
 		return -ENODEV;
@@ -501,26 +406,29 @@
 	if (!axi_clkgen)
 		return -ENOMEM;
 
-	axi_clkgen->mmcm_ops = id->data;
-
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	axi_clkgen->base = devm_ioremap_resource(&pdev->dev, mem);
 	if (IS_ERR(axi_clkgen->base))
 		return PTR_ERR(axi_clkgen->base);
 
-	parent_name = of_clk_get_parent_name(pdev->dev.of_node, 0);
-	if (!parent_name)
+	init.num_parents = of_clk_get_parent_count(pdev->dev.of_node);
+	if (init.num_parents < 1 || init.num_parents > 2)
 		return -EINVAL;
 
+	for (i = 0; i < init.num_parents; i++) {
+		parent_names[i] = of_clk_get_parent_name(pdev->dev.of_node, i);
+		if (!parent_names[i])
+			return -EINVAL;
+	}
+
 	clk_name = pdev->dev.of_node->name;
 	of_property_read_string(pdev->dev.of_node, "clock-output-names",
 		&clk_name);
 
 	init.name = clk_name;
 	init.ops = &axi_clkgen_ops;
-	init.flags = CLK_SET_RATE_GATE;
-	init.parent_names = &parent_name;
-	init.num_parents = 1;
+	init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE;
+	init.parent_names = parent_names;
 
 	axi_clkgen_mmcm_enable(axi_clkgen, false);
 

diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index 7d62dc3..00e035b 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c

@@ -303,9 +303,8 @@
 	 */
 	maxdiv = min(ULONG_MAX / rate, maxdiv);
 
-	for (i = 1; i <= maxdiv; i = _next_div(table, i, flags)) {
-		if (!_is_valid_div(table, i, flags))
-			continue;
+	for (i = _next_div(table, 0, flags); i <= maxdiv;
+					     i = _next_div(table, i, flags)) {
 		if (rate * i == parent_rate_saved) {
 			/*
 			 * It's the most ideal case if the requested rate can be

diff --git a/drivers/clk/clk-efm32gg.c b/drivers/clk/clk-efm32gg.c
index bac4553..22e4c65 100644
--- a/drivers/clk/clk-efm32gg.c
+++ b/drivers/clk/clk-efm32gg.c

@@ -36,7 +36,7 @@
 	}
 
 	clk[clk_HFXO] = clk_register_fixed_rate(NULL, "HFXO", NULL,
-			CLK_IS_ROOT, 48000000);
+			0, 48000000);
 
 	clk[clk_HFPERCLKUSART0] = clk_register_gate(NULL, "HFPERCLK.USART0",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 0, 0, NULL);

diff --git a/drivers/clk/clk-fixed-factor.c b/drivers/clk/clk-fixed-factor.c
index f0ddf37..053448e 100644
--- a/drivers/clk/clk-fixed-factor.c
+++ b/drivers/clk/clk-fixed-factor.c

@@ -100,6 +100,19 @@
 }
 EXPORT_SYMBOL_GPL(clk_register_fixed_factor);
 
+void clk_unregister_fixed_factor(struct clk *clk)
+{
+	struct clk_hw *hw;
+
+	hw = __clk_get_hw(clk);
+	if (!hw)
+		return;
+
+	clk_unregister(clk);
+	kfree(to_clk_fixed_factor(hw));
+}
+EXPORT_SYMBOL_GPL(clk_unregister_fixed_factor);
+
 #ifdef CONFIG_OF
 /**
  * of_fixed_factor_clk_setup() - Setup function for simple fixed factor clock

diff --git a/drivers/clk/clk-fixed-rate.c b/drivers/clk/clk-fixed-rate.c
index e156beb..cd9dc92 100644
--- a/drivers/clk/clk-fixed-rate.c
+++ b/drivers/clk/clk-fixed-rate.c

@@ -104,6 +104,19 @@
 }
 EXPORT_SYMBOL_GPL(clk_register_fixed_rate);
 
+void clk_unregister_fixed_rate(struct clk *clk)
+{
+	struct clk_hw *hw;
+
+	hw = __clk_get_hw(clk);
+	if (!hw)
+		return;
+
+	clk_unregister(clk);
+	kfree(to_clk_fixed_rate(hw));
+}
+EXPORT_SYMBOL_GPL(clk_unregister_fixed_rate);
+
 #ifdef CONFIG_OF
 /**
  * of_fixed_clk_setup() - Setup function for simple fixed rate clock
@@ -123,8 +136,7 @@
 	of_property_read_string(node, "clock-output-names", &clk_name);
 
 	clk = clk_register_fixed_rate_with_accuracy(NULL, clk_name, NULL,
-						    CLK_IS_ROOT, rate,
-						    accuracy);
+						    0, rate, accuracy);
 	if (!IS_ERR(clk))
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
 }

diff --git a/drivers/clk/clk-gpio.c b/drivers/clk/clk-gpio.c
index 4628ed0..08f65ac 100644
--- a/drivers/clk/clk-gpio.c
+++ b/drivers/clk/clk-gpio.c

@@ -20,6 +20,8 @@
 #include <linux/of_gpio.h>
 #include <linux/err.h>
 #include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/of_device.h>
 
 /**
  * DOC: basic gpio gated clock which can be enabled and disabled
@@ -199,134 +201,69 @@
 }
 EXPORT_SYMBOL_GPL(clk_register_gpio_mux);
 
-#ifdef CONFIG_OF
-/**
- * clk_register_get() has to be delayed, because -EPROBE_DEFER
- * can not be handled properly at of_clk_init() call time.
- */
-
-struct clk_gpio_delayed_register_data {
-	const char *gpio_name;
-	int num_parents;
-	const char **parent_names;
-	struct device_node *node;
-	struct mutex lock;
-	struct clk *clk;
-	struct clk *(*clk_register_get)(const char *name,
-			const char * const *parent_names, u8 num_parents,
-			unsigned gpio, bool active_low);
-};
-
-static struct clk *of_clk_gpio_delayed_register_get(
-		struct of_phandle_args *clkspec, void *_data)
+static int gpio_clk_driver_probe(struct platform_device *pdev)
 {
-	struct clk_gpio_delayed_register_data *data = _data;
-	struct clk *clk;
+	struct device_node *node = pdev->dev.of_node;
+	const char **parent_names, *gpio_name;
+	unsigned int num_parents;
 	int gpio;
 	enum of_gpio_flags of_flags;
-
-	mutex_lock(&data->lock);
-
-	if (data->clk) {
-		mutex_unlock(&data->lock);
-		return data->clk;
-	}
-
-	gpio = of_get_named_gpio_flags(data->node, data->gpio_name, 0,
-			&of_flags);
-	if (gpio < 0) {
-		mutex_unlock(&data->lock);
-		if (gpio == -EPROBE_DEFER)
-			pr_debug("%s: %s: GPIOs not yet available, retry later\n",
-					data->node->name, __func__);
-		else
-			pr_err("%s: %s: Can't get '%s' DT property\n",
-					data->node->name, __func__,
-					data->gpio_name);
-		return ERR_PTR(gpio);
-	}
-
-	clk = data->clk_register_get(data->node->name, data->parent_names,
-			data->num_parents, gpio, of_flags & OF_GPIO_ACTIVE_LOW);
-	if (IS_ERR(clk))
-		goto out;
-
-	data->clk = clk;
-out:
-	mutex_unlock(&data->lock);
-
-	return clk;
-}
-
-static struct clk *of_clk_gpio_gate_delayed_register_get(const char *name,
-		const char * const *parent_names, u8 num_parents,
-		unsigned gpio, bool active_low)
-{
-	return clk_register_gpio_gate(NULL, name, parent_names ?
-			parent_names[0] : NULL, gpio, active_low, 0);
-}
-
-static struct clk *of_clk_gpio_mux_delayed_register_get(const char *name,
-		const char * const *parent_names, u8 num_parents, unsigned gpio,
-		bool active_low)
-{
-	return clk_register_gpio_mux(NULL, name, parent_names, num_parents,
-			gpio, active_low, 0);
-}
-
-static void __init of_gpio_clk_setup(struct device_node *node,
-		const char *gpio_name,
-		struct clk *(*clk_register_get)(const char *name,
-				const char * const *parent_names,
-				u8 num_parents,
-				unsigned gpio, bool active_low))
-{
-	struct clk_gpio_delayed_register_data *data;
-	const char **parent_names;
-	int i, num_parents;
+	struct clk *clk;
+	bool active_low, is_mux;
 
 	num_parents = of_clk_get_parent_count(node);
-	if (num_parents < 0)
-		num_parents = 0;
-
-	data = kzalloc(sizeof(*data), GFP_KERNEL);
-	if (!data)
-		return;
-
 	if (num_parents) {
-		parent_names = kcalloc(num_parents, sizeof(char *), GFP_KERNEL);
-		if (!parent_names) {
-			kfree(data);
-			return;
-		}
+		parent_names = devm_kcalloc(&pdev->dev, num_parents,
+					    sizeof(char *), GFP_KERNEL);
+		if (!parent_names)
+			return -ENOMEM;
 
-		for (i = 0; i < num_parents; i++)
-			parent_names[i] = of_clk_get_parent_name(node, i);
+		of_clk_parent_fill(node, parent_names, num_parents);
 	} else {
 		parent_names = NULL;
 	}
 
-	data->num_parents = num_parents;
-	data->parent_names = parent_names;
-	data->node = node;
-	data->gpio_name = gpio_name;
-	data->clk_register_get = clk_register_get;
-	mutex_init(&data->lock);
+	is_mux = of_device_is_compatible(node, "gpio-mux-clock");
 
-	of_clk_add_provider(node, of_clk_gpio_delayed_register_get, data);
+	gpio_name = is_mux ? "select-gpios" : "enable-gpios";
+	gpio = of_get_named_gpio_flags(node, gpio_name, 0, &of_flags);
+	if (gpio < 0) {
+		if (gpio == -EPROBE_DEFER)
+			pr_debug("%s: %s: GPIOs not yet available, retry later\n",
+					node->name, __func__);
+		else
+			pr_err("%s: %s: Can't get '%s' DT property\n",
+					node->name, __func__,
+					gpio_name);
+		return gpio;
+	}
+
+	active_low = of_flags & OF_GPIO_ACTIVE_LOW;
+
+	if (is_mux)
+		clk = clk_register_gpio_mux(&pdev->dev, node->name,
+				parent_names, num_parents, gpio, active_low, 0);
+	else
+		clk = clk_register_gpio_gate(&pdev->dev, node->name,
+				parent_names ?  parent_names[0] : NULL, gpio,
+				active_low, 0);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	return of_clk_add_provider(node, of_clk_src_simple_get, clk);
 }
 
-static void __init of_gpio_gate_clk_setup(struct device_node *node)
-{
-	of_gpio_clk_setup(node, "enable-gpios",
-		of_clk_gpio_gate_delayed_register_get);
-}
-CLK_OF_DECLARE(gpio_gate_clk, "gpio-gate-clock", of_gpio_gate_clk_setup);
+static const struct of_device_id gpio_clk_match_table[] = {
+	{ .compatible = "gpio-mux-clock" },
+	{ .compatible = "gpio-gate-clock" },
+	{ }
+};
 
-void __init of_gpio_mux_clk_setup(struct device_node *node)
-{
-	of_gpio_clk_setup(node, "select-gpios",
-		of_clk_gpio_mux_delayed_register_get);
-}
-CLK_OF_DECLARE(gpio_mux_clk, "gpio-mux-clock", of_gpio_mux_clk_setup);
-#endif
+static struct platform_driver gpio_clk_driver = {
+	.probe		= gpio_clk_driver_probe,
+	.driver		= {
+		.name	= "gpio-clk",
+		.of_match_table = gpio_clk_match_table,
+	},
+};
+builtin_platform_driver(gpio_clk_driver);

diff --git a/drivers/clk/clk-max77686.c b/drivers/clk/clk-max77686.c
index 446c2fe..9b6f277 100644
--- a/drivers/clk/clk-max77686.c
+++ b/drivers/clk/clk-max77686.c

@@ -38,17 +38,14 @@
 	[MAX77686_CLK_AP] = {
 		.name = "32khz_ap",
 		.ops = &max_gen_clk_ops,
-		.flags = CLK_IS_ROOT,
 	},
 	[MAX77686_CLK_CP] = {
 		.name = "32khz_cp",
 		.ops = &max_gen_clk_ops,
-		.flags = CLK_IS_ROOT,
 	},
 	[MAX77686_CLK_PMIC] = {
 		.name = "32khz_pmic",
 		.ops = &max_gen_clk_ops,
-		.flags = CLK_IS_ROOT,
 	},
 };
 

diff --git a/drivers/clk/clk-max77802.c b/drivers/clk/clk-max77802.c
index 4a89f79..355dd2e 100644
--- a/drivers/clk/clk-max77802.c
+++ b/drivers/clk/clk-max77802.c

@@ -39,12 +39,10 @@
 	[MAX77802_CLK_32K_AP] = {
 		.name = "32khz_ap",
 		.ops = &max_gen_clk_ops,
-		.flags = CLK_IS_ROOT,
 	},
 	[MAX77802_CLK_32K_CP] = {
 		.name = "32khz_cp",
 		.ops = &max_gen_clk_ops,
-		.flags = CLK_IS_ROOT,
 	},
 };
 

diff --git a/drivers/clk/clk-mb86s7x.c b/drivers/clk/clk-mb86s7x.c
index f39c25a..e081775 100644
--- a/drivers/clk/clk-mb86s7x.c
+++ b/drivers/clk/clk-mb86s7x.c

@@ -217,7 +217,7 @@
 	init.name = clkp;
 	init.num_parents = 0;
 	init.ops = &crg_port_ops;
-	init.flags = CLK_IS_ROOT;
+	init.flags = 0;
 	crgclk->hw.init = &init;
 	crgclk->cntrlr = cntrlr;
 	crgclk->domain = domain;
@@ -341,7 +341,7 @@
 
 	init.name = dev_name(cpu_dev);
 	init.ops = &clk_clc_ops;
-	init.flags = CLK_IS_ROOT | CLK_GET_RATE_NOCACHE;
+	init.flags = CLK_GET_RATE_NOCACHE;
 	init.num_parents = 0;
 
 	return devm_clk_register(cpu_dev, &clc->hw);

diff --git a/drivers/clk/clk-palmas.c b/drivers/clk/clk-palmas.c
index 8e3039f..9c0b8e6 100644
--- a/drivers/clk/clk-palmas.c
+++ b/drivers/clk/clk-palmas.c

@@ -44,7 +44,7 @@
 	struct clk *clk;
 	struct clk_hw hw;
 	struct palmas *palmas;
-	struct palmas_clk32k_desc *clk_desc;
+	const struct palmas_clk32k_desc *clk_desc;
 	int ext_control_pin;
 };
 
@@ -125,10 +125,10 @@
 
 struct palmas_clks_of_match_data {
 	struct clk_init_data init;
-	struct palmas_clk32k_desc desc;
+	const struct palmas_clk32k_desc desc;
 };
 
-static struct palmas_clks_of_match_data palmas_of_clk32kg = {
+static const struct palmas_clks_of_match_data palmas_of_clk32kg = {
 	.init = {
 		.name = "clk32kg",
 		.ops = &palmas_clks_ops,
@@ -144,7 +144,7 @@
 	},
 };
 
-static struct palmas_clks_of_match_data palmas_of_clk32kgaudio = {
+static const struct palmas_clks_of_match_data palmas_of_clk32kgaudio = {
 	.init = {
 		.name = "clk32kgaudio",
 		.ops = &palmas_clks_ops,
@@ -240,14 +240,14 @@
 {
 	struct palmas *palmas = dev_get_drvdata(pdev->dev.parent);
 	struct device_node *node = pdev->dev.of_node;
-	struct palmas_clks_of_match_data *match_data;
-	const struct of_device_id *match;
+	const struct palmas_clks_of_match_data *match_data;
 	struct palmas_clock_info *cinfo;
 	struct clk *clk;
 	int ret;
 
-	match = of_match_device(palmas_clks_of_match, &pdev->dev);
-	match_data = (struct palmas_clks_of_match_data *)match->data;
+	match_data = of_device_get_match_data(&pdev->dev);
+	if (!match_data)
+		return 1;
 
 	cinfo = devm_kzalloc(&pdev->dev, sizeof(*cinfo), GFP_KERNEL);
 	if (!cinfo)

diff --git a/drivers/clk/clk-pwm.c b/drivers/clk/clk-pwm.c
index 328fcfc..8830458 100644
--- a/drivers/clk/clk-pwm.c
+++ b/drivers/clk/clk-pwm.c

@@ -95,7 +95,7 @@
 
 	init.name = clk_name;
 	init.ops = &clk_pwm_ops;
-	init.flags = CLK_IS_BASIC | CLK_IS_ROOT;
+	init.flags = CLK_IS_BASIC;
 	init.num_parents = 0;
 
 	clk_pwm->pwm = pwm;

diff --git a/drivers/clk/clk-s2mps11.c b/drivers/clk/clk-s2mps11.c
index 371150a..f8c8397 100644
--- a/drivers/clk/clk-s2mps11.c
+++ b/drivers/clk/clk-s2mps11.c

@@ -99,17 +99,14 @@
 	[S2MPS11_CLK_AP] = {
 		.name = "s2mps11_ap",
 		.ops = &s2mps11_clk_ops,
-		.flags = CLK_IS_ROOT,
 	},
 	[S2MPS11_CLK_CP] = {
 		.name = "s2mps11_cp",
 		.ops = &s2mps11_clk_ops,
-		.flags = CLK_IS_ROOT,
 	},
 	[S2MPS11_CLK_BT] = {
 		.name = "s2mps11_bt",
 		.ops = &s2mps11_clk_ops,
-		.flags = CLK_IS_ROOT,
 	},
 };
 

diff --git a/drivers/clk/clk-scpi.c b/drivers/clk/clk-scpi.c
index 89e9ca7..6962ee5 100644
--- a/drivers/clk/clk-scpi.c
+++ b/drivers/clk/clk-scpi.c

@@ -155,7 +155,7 @@
 	unsigned long min = 0, max = 0;
 
 	init.name = name;
-	init.flags = CLK_IS_ROOT;
+	init.flags = 0;
 	init.num_parents = 0;
 	init.ops = match->data;
 	sclk->hw.init = &init;

diff --git a/drivers/clk/clk-si514.c b/drivers/clk/clk-si514.c
index 6af7dce..ceef25b 100644
--- a/drivers/clk/clk-si514.c
+++ b/drivers/clk/clk-si514.c

@@ -313,7 +313,7 @@
 		return -ENOMEM;
 
 	init.ops = &si514_clk_ops;
-	init.flags = CLK_IS_ROOT;
+	init.flags = 0;
 	init.num_parents = 0;
 	data->hw.init = &init;
 	data->i2c_client = client;

diff --git a/drivers/clk/clk-si5351.c b/drivers/clk/clk-si5351.c
index 850316a..b1bc12c 100644
--- a/drivers/clk/clk-si5351.c
+++ b/drivers/clk/clk-si5351.c

@@ -1495,7 +1495,7 @@
 	if (drvdata->variant == SI5351_VARIANT_B) {
 		init.name = si5351_pll_names[2];
 		init.ops = &si5351_vxco_ops;
-		init.flags = CLK_IS_ROOT;
+		init.flags = 0;
 		init.parent_names = NULL;
 		init.num_parents = 0;
 	} else {

diff --git a/drivers/clk/clk-si570.c b/drivers/clk/clk-si570.c
index cf478aa..d566485 100644
--- a/drivers/clk/clk-si570.c
+++ b/drivers/clk/clk-si570.c

@@ -418,7 +418,7 @@
 		return -ENOMEM;
 
 	init.ops = &si570_clk_ops;
-	init.flags = CLK_IS_ROOT;
+	init.flags = 0;
 	init.num_parents = 0;
 	data->hw.init = &init;
 	data->i2c_client = client;

diff --git a/drivers/clk/clk-vt8500.c b/drivers/clk/clk-vt8500.c
index 37e9288..b0f76a8 100644
--- a/drivers/clk/clk-vt8500.c
+++ b/drivers/clk/clk-vt8500.c

@@ -355,7 +355,7 @@
 #define WM8850_BITS_TO_VAL(m, d1, d2)					\
 		((((m / 2) - 1) << 16) | ((d1 - 1) << 8) | d2)
 
-static void vt8500_find_pll_bits(unsigned long rate, unsigned long parent_rate,
+static int vt8500_find_pll_bits(unsigned long rate, unsigned long parent_rate,
 				u32 *multiplier, u32 *prediv)
 {
 	unsigned long tclk;
@@ -365,7 +365,7 @@
 		pr_err("%s: requested rate out of range\n", __func__);
 		*multiplier = 0;
 		*prediv = 1;
-		return;
+		return -EINVAL;
 	}
 	if (rate <= parent_rate * 31)
 		/* use the prediv to double the resolution */
@@ -379,12 +379,15 @@
 	if (tclk != rate)
 		pr_warn("%s: requested rate %lu, found rate %lu\n", __func__,
 								rate, tclk);
+
+	return 0;
 }
 
-static void wm8650_find_pll_bits(unsigned long rate, unsigned long parent_rate,
+static int wm8650_find_pll_bits(unsigned long rate, unsigned long parent_rate,
 				u32 *multiplier, u32 *divisor1, u32 *divisor2)
 {
-	u32 mul, div1, div2;
+	u32 mul, div1;
+	int div2;
 	u32 best_mul, best_div1, best_div2;
 	unsigned long tclk, rate_err, best_err;
 
@@ -403,7 +406,7 @@
 					*multiplier = mul;
 					*divisor1 = div1;
 					*divisor2 = div2;
-					return;
+					return 0;
 				}
 
 				if (rate_err < best_err) {
@@ -414,12 +417,19 @@
 				}
 			}
 
+	if (best_err == (unsigned long)-1) {
+		pr_warn("%s: impossible rate %lu\n", __func__, rate);
+		return -EINVAL;
+	}
+
 	/* if we got here, it wasn't an exact match */
 	pr_warn("%s: requested rate %lu, found rate %lu\n", __func__, rate,
 							rate - best_err);
 	*multiplier = best_mul;
 	*divisor1 = best_div1;
 	*divisor2 = best_div2;
+
+	return 0;
 }
 
 static u32 wm8750_get_filter(u32 parent_rate, u32 divisor1)
@@ -449,10 +459,11 @@
 	return 0;
 }
 
-static void wm8750_find_pll_bits(unsigned long rate, unsigned long parent_rate,
+static int wm8750_find_pll_bits(unsigned long rate, unsigned long parent_rate,
 				u32 *filter, u32 *multiplier, u32 *divisor1, u32 *divisor2)
 {
-	u32 mul, div1, div2;
+	u32 mul;
+	int div1, div2;
 	u32 best_mul, best_div1, best_div2;
 	unsigned long tclk, rate_err, best_err;
 
@@ -472,7 +483,7 @@
 					*multiplier = mul;
 					*divisor1 = div1;
 					*divisor2 = div2;
-					return;
+					return 0;
 				}
 
 				if (rate_err < best_err) {
@@ -483,6 +494,11 @@
 				}
 			}
 
+	if (best_err == (unsigned long)-1) {
+		pr_warn("%s: impossible rate %lu\n", __func__, rate);
+		return -EINVAL;
+	}
+
 	/* if we got here, it wasn't an exact match */
 	pr_warn("%s: requested rate %lu, found rate %lu\n", __func__, rate,
 							rate - best_err);
@@ -491,12 +507,15 @@
 	*multiplier = best_mul;
 	*divisor1 = best_div1;
 	*divisor2 = best_div2;
+
+	return 0;
 }
 
-static void wm8850_find_pll_bits(unsigned long rate, unsigned long parent_rate,
+static int wm8850_find_pll_bits(unsigned long rate, unsigned long parent_rate,
 				u32 *multiplier, u32 *divisor1, u32 *divisor2)
 {
-	u32 mul, div1, div2;
+	u32 mul;
+	int div1, div2;
 	u32 best_mul, best_div1, best_div2;
 	unsigned long tclk, rate_err, best_err;
 
@@ -516,7 +535,7 @@
 					*multiplier = mul;
 					*divisor1 = div1;
 					*divisor2 = div2;
-					return;
+					return 0;
 				}
 
 				if (rate_err < best_err) {
@@ -527,6 +546,11 @@
 				}
 			}
 
+	if (best_err == (unsigned long)-1) {
+		pr_warn("%s: impossible rate %lu\n", __func__, rate);
+		return -EINVAL;
+	}
+
 	/* if we got here, it wasn't an exact match */
 	pr_warn("%s: requested rate %lu, found rate %lu\n", __func__, rate,
 							rate - best_err);
@@ -534,6 +558,8 @@
 	*multiplier = best_mul;
 	*divisor1 = best_div1;
 	*divisor2 = best_div2;
+
+	return 0;
 }
 
 static int vtwm_pll_set_rate(struct clk_hw *hw, unsigned long rate,
@@ -543,31 +569,39 @@
 	u32 filter, mul, div1, div2;
 	u32 pll_val;
 	unsigned long flags = 0;
+	int ret;
 
 	/* sanity check */
 
 	switch (pll->type) {
 	case PLL_TYPE_VT8500:
-		vt8500_find_pll_bits(rate, parent_rate, &mul, &div1);
-		pll_val = VT8500_BITS_TO_VAL(mul, div1);
+		ret = vt8500_find_pll_bits(rate, parent_rate, &mul, &div1);
+		if (!ret)
+			pll_val = VT8500_BITS_TO_VAL(mul, div1);
 		break;
 	case PLL_TYPE_WM8650:
-		wm8650_find_pll_bits(rate, parent_rate, &mul, &div1, &div2);
-		pll_val = WM8650_BITS_TO_VAL(mul, div1, div2);
+		ret = wm8650_find_pll_bits(rate, parent_rate, &mul, &div1, &div2);
+		if (!ret)
+			pll_val = WM8650_BITS_TO_VAL(mul, div1, div2);
 		break;
 	case PLL_TYPE_WM8750:
-		wm8750_find_pll_bits(rate, parent_rate, &filter, &mul, &div1, &div2);
-		pll_val = WM8750_BITS_TO_VAL(filter, mul, div1, div2);
+		ret = wm8750_find_pll_bits(rate, parent_rate, &filter, &mul, &div1, &div2);
+		if (!ret)
+			pll_val = WM8750_BITS_TO_VAL(filter, mul, div1, div2);
 		break;
 	case PLL_TYPE_WM8850:
-		wm8850_find_pll_bits(rate, parent_rate, &mul, &div1, &div2);
-		pll_val = WM8850_BITS_TO_VAL(mul, div1, div2);
+		ret = wm8850_find_pll_bits(rate, parent_rate, &mul, &div1, &div2);
+		if (!ret)
+			pll_val = WM8850_BITS_TO_VAL(mul, div1, div2);
 		break;
 	default:
 		pr_err("%s: invalid pll type\n", __func__);
-		return 0;
+		ret = -EINVAL;
 	}
 
+	if (ret)
+		return ret;
+
 	spin_lock_irqsave(pll->lock, flags);
 
 	vt8500_pmc_wait_busy();
@@ -585,28 +619,36 @@
 	struct clk_pll *pll = to_clk_pll(hw);
 	u32 filter, mul, div1, div2;
 	long round_rate;
+	int ret;
 
 	switch (pll->type) {
 	case PLL_TYPE_VT8500:
-		vt8500_find_pll_bits(rate, *prate, &mul, &div1);
-		round_rate = VT8500_BITS_TO_FREQ(*prate, mul, div1);
+		ret = vt8500_find_pll_bits(rate, *prate, &mul, &div1);
+		if (!ret)
+			round_rate = VT8500_BITS_TO_FREQ(*prate, mul, div1);
 		break;
 	case PLL_TYPE_WM8650:
-		wm8650_find_pll_bits(rate, *prate, &mul, &div1, &div2);
-		round_rate = WM8650_BITS_TO_FREQ(*prate, mul, div1, div2);
+		ret = wm8650_find_pll_bits(rate, *prate, &mul, &div1, &div2);
+		if (!ret)
+			round_rate = WM8650_BITS_TO_FREQ(*prate, mul, div1, div2);
 		break;
 	case PLL_TYPE_WM8750:
-		wm8750_find_pll_bits(rate, *prate, &filter, &mul, &div1, &div2);
-		round_rate = WM8750_BITS_TO_FREQ(*prate, mul, div1, div2);
+		ret = wm8750_find_pll_bits(rate, *prate, &filter, &mul, &div1, &div2);
+		if (!ret)
+			round_rate = WM8750_BITS_TO_FREQ(*prate, mul, div1, div2);
 		break;
 	case PLL_TYPE_WM8850:
-		wm8850_find_pll_bits(rate, *prate, &mul, &div1, &div2);
-		round_rate = WM8850_BITS_TO_FREQ(*prate, mul, div1, div2);
+		ret = wm8850_find_pll_bits(rate, *prate, &mul, &div1, &div2);
+		if (!ret)
+			round_rate = WM8850_BITS_TO_FREQ(*prate, mul, div1, div2);
 		break;
 	default:
-		round_rate = 0;
+		ret = -EINVAL;
 	}
 
+	if (ret)
+		return ret;
+
 	return round_rate;
 }
 

diff --git a/drivers/clk/clk-xgene.c b/drivers/clk/clk-xgene.c
index bd7156b..d73450b 100644
--- a/drivers/clk/clk-xgene.c
+++ b/drivers/clk/clk-xgene.c

@@ -376,8 +376,8 @@
 		/* Set new divider */
 		data = xgene_clk_read(pclk->param.divider_reg +
 				pclk->param.reg_divider_offset);
-		data &= ~((1 << pclk->param.reg_divider_width) - 1)
-				<< pclk->param.reg_divider_shift;
+		data &= ~(((1 << pclk->param.reg_divider_width) - 1)
+				<< pclk->param.reg_divider_shift);
 		data |= divider;
 		xgene_clk_write(data, pclk->param.divider_reg +
 					pclk->param.reg_divider_offset);

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index b4db67a..fb74dc1 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c

@@ -350,13 +350,12 @@
 {
 	if (!core || index >= core->num_parents)
 		return NULL;
-	else if (!core->parents)
-		return clk_core_lookup(core->parent_names[index]);
-	else if (!core->parents[index])
-		return core->parents[index] =
-			clk_core_lookup(core->parent_names[index]);
-	else
-		return core->parents[index];
+
+	if (!core->parents[index])
+		core->parents[index] =
+				clk_core_lookup(core->parent_names[index]);
+
+	return core->parents[index];
 }
 
 struct clk_hw *
@@ -386,7 +385,7 @@
 
 	ret = core->rate;
 
-	if (core->flags & CLK_IS_ROOT)
+	if (!core->num_parents)
 		goto out;
 
 	if (!core->parent)
@@ -1067,31 +1066,13 @@
 {
 	int i;
 
-	if (!core->parents) {
-		core->parents = kcalloc(core->num_parents,
-					sizeof(struct clk *), GFP_KERNEL);
-		if (!core->parents)
-			return -ENOMEM;
-	}
+	if (!parent)
+		return -EINVAL;
 
-	/*
-	 * find index of new parent clock using cached parent ptrs,
-	 * or if not yet cached, use string name comparison and cache
-	 * them now to avoid future calls to clk_core_lookup.
-	 */
-	for (i = 0; i < core->num_parents; i++) {
-		if (core->parents[i] == parent)
+	for (i = 0; i < core->num_parents; i++)
+		if (clk_core_get_parent_by_index(core, i) == parent)
 			return i;
 
-		if (core->parents[i])
-			continue;
-
-		if (!strcmp(core->parent_names[i], parent->name)) {
-			core->parents[i] = clk_core_lookup(parent->name);
-			return i;
-		}
-	}
-
 	return -EINVAL;
 }
 
@@ -1677,56 +1658,14 @@
 }
 EXPORT_SYMBOL_GPL(clk_get_parent);
 
-/*
- * .get_parent is mandatory for clocks with multiple possible parents.  It is
- * optional for single-parent clocks.  Always call .get_parent if it is
- * available and WARN if it is missing for multi-parent clocks.
- *
- * For single-parent clocks without .get_parent, first check to see if the
- * .parents array exists, and if so use it to avoid an expensive tree
- * traversal.  If .parents does not exist then walk the tree.
- */
 static struct clk_core *__clk_init_parent(struct clk_core *core)
 {
-	struct clk_core *ret = NULL;
-	u8 index;
+	u8 index = 0;
 
-	/* handle the trivial cases */
+	if (core->num_parents > 1 && core->ops->get_parent)
+		index = core->ops->get_parent(core->hw);
 
-	if (!core->num_parents)
-		goto out;
-
-	if (core->num_parents == 1) {
-		if (IS_ERR_OR_NULL(core->parent))
-			core->parent = clk_core_lookup(core->parent_names[0]);
-		ret = core->parent;
-		goto out;
-	}
-
-	if (!core->ops->get_parent) {
-		WARN(!core->ops->get_parent,
-			"%s: multi-parent clocks must implement .get_parent\n",
-			__func__);
-		goto out;
-	}
-
-	/*
-	 * Do our best to cache parent clocks in core->parents.  This prevents
-	 * unnecessary and expensive lookups.  We don't set core->parent here;
-	 * that is done by the calling function.
-	 */
-
-	index = core->ops->get_parent(core->hw);
-
-	if (!core->parents)
-		core->parents =
-			kcalloc(core->num_parents, sizeof(struct clk *),
-					GFP_KERNEL);
-
-	ret = clk_core_get_parent_by_index(core, index);
-
-out:
-	return ret;
+	return clk_core_get_parent_by_index(core, index);
 }
 
 static void clk_core_reparent(struct clk_core *core,
@@ -1809,13 +1748,13 @@
 	/* try finding the new parent index */
 	if (parent) {
 		p_index = clk_fetch_parent_index(core, parent);
-		p_rate = parent->rate;
 		if (p_index < 0) {
 			pr_debug("%s: clk %s can not be parent of clk %s\n",
 					__func__, parent->name, core->name);
 			ret = p_index;
 			goto out;
 		}
+		p_rate = parent->rate;
 	}
 
 	/* propagate PRE_RATE_CHANGE notifications */
@@ -1902,6 +1841,10 @@
 
 	clk_prepare_lock();
 
+	/* bail early if nothing to do */
+	if (degrees == clk->core->phase)
+		goto out;
+
 	trace_clk_set_phase(clk->core, degrees);
 
 	if (clk->core->ops->set_phase)
@@ -1912,6 +1855,7 @@
 	if (!ret)
 		clk->core->phase = degrees;
 
+out:
 	clk_prepare_unlock();
 
 	return ret;
@@ -2218,7 +2162,7 @@
  *
  * Dynamically removes a clk and all its child nodes from the
  * debugfs clk directory if clk->dentry points to debugfs created by
- * clk_debug_register in __clk_init.
+ * clk_debug_register in __clk_core_init.
  */
 static void clk_debug_unregister(struct clk_core *core)
 {
@@ -2303,26 +2247,22 @@
 #endif
 
 /**
- * __clk_init - initialize the data structures in a struct clk
- * @dev:	device initializing this clk, placeholder for now
- * @clk:	clk being initialized
+ * __clk_core_init - initialize the data structures in a struct clk_core
+ * @core:	clk_core being initialized
  *
  * Initializes the lists in struct clk_core, queries the hardware for the
  * parent and rate and sets them both.
  */
-static int __clk_init(struct device *dev, struct clk *clk_user)
+static int __clk_core_init(struct clk_core *core)
 {
 	int i, ret = 0;
 	struct clk_core *orphan;
 	struct hlist_node *tmp2;
-	struct clk_core *core;
 	unsigned long rate;
 
-	if (!clk_user)
+	if (!core)
 		return -EINVAL;
 
-	core = clk_user->core;
-
 	clk_prepare_lock();
 
 	/* check to see if a clock with this name is already registered */
@@ -2337,22 +2277,29 @@
 	if (core->ops->set_rate &&
 	    !((core->ops->round_rate || core->ops->determine_rate) &&
 	      core->ops->recalc_rate)) {
-		pr_warning("%s: %s must implement .round_rate or .determine_rate in addition to .recalc_rate\n",
-				__func__, core->name);
+		pr_err("%s: %s must implement .round_rate or .determine_rate in addition to .recalc_rate\n",
+		       __func__, core->name);
 		ret = -EINVAL;
 		goto out;
 	}
 
 	if (core->ops->set_parent && !core->ops->get_parent) {
-		pr_warning("%s: %s must implement .get_parent & .set_parent\n",
-				__func__, core->name);
+		pr_err("%s: %s must implement .get_parent & .set_parent\n",
+		       __func__, core->name);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (core->num_parents > 1 && !core->ops->get_parent) {
+		pr_err("%s: %s must implement .get_parent as it has multi parents\n",
+		       __func__, core->name);
 		ret = -EINVAL;
 		goto out;
 	}
 
 	if (core->ops->set_rate_and_parent &&
 			!(core->ops->set_parent && core->ops->set_rate)) {
-		pr_warn("%s: %s must implement .set_parent & .set_rate\n",
+		pr_err("%s: %s must implement .set_parent & .set_rate\n",
 				__func__, core->name);
 		ret = -EINVAL;
 		goto out;
@@ -2364,37 +2311,12 @@
 				"%s: invalid NULL in %s's .parent_names\n",
 				__func__, core->name);
 
-	/*
-	 * Allocate an array of struct clk *'s to avoid unnecessary string
-	 * look-ups of clk's possible parents.  This can fail for clocks passed
-	 * in to clk_init during early boot; thus any access to core->parents[]
-	 * must always check for a NULL pointer and try to populate it if
-	 * necessary.
-	 *
-	 * If core->parents is not NULL we skip this entire block.  This allows
-	 * for clock drivers to statically initialize core->parents.
-	 */
-	if (core->num_parents > 1 && !core->parents) {
-		core->parents = kcalloc(core->num_parents, sizeof(struct clk *),
-					GFP_KERNEL);
-		/*
-		 * clk_core_lookup returns NULL for parents that have not been
-		 * clk_init'd; thus any access to clk->parents[] must check
-		 * for a NULL pointer.  We can always perform lazy lookups for
-		 * missing parents later on.
-		 */
-		if (core->parents)
-			for (i = 0; i < core->num_parents; i++)
-				core->parents[i] =
-					clk_core_lookup(core->parent_names[i]);
-	}
-
 	core->parent = __clk_init_parent(core);
 
 	/*
-	 * Populate core->parent if parent has already been __clk_init'd.  If
-	 * parent has not yet been __clk_init'd then place clk in the orphan
-	 * list.  If clk has set the CLK_IS_ROOT flag then place it in the root
+	 * Populate core->parent if parent has already been clk_core_init'd. If
+	 * parent has not yet been clk_core_init'd then place clk in the orphan
+	 * list.  If clk doesn't have any parents then place it in the root
 	 * clk list.
 	 *
 	 * Every time a new clk is clk_init'd then we walk the list of orphan
@@ -2405,7 +2327,7 @@
 		hlist_add_head(&core->child_node,
 				&core->parent->children);
 		core->orphan = core->parent->orphan;
-	} else if (core->flags & CLK_IS_ROOT) {
+	} else if (!core->num_parents) {
 		hlist_add_head(&core->child_node, &clk_root_list);
 		core->orphan = false;
 	} else {
@@ -2454,24 +2376,15 @@
 	core->rate = core->req_rate = rate;
 
 	/*
-	 * walk the list of orphan clocks and reparent any that are children of
-	 * this clock
+	 * walk the list of orphan clocks and reparent any that newly finds a
+	 * parent.
 	 */
 	hlist_for_each_entry_safe(orphan, tmp2, &clk_orphan_list, child_node) {
-		if (orphan->num_parents && orphan->ops->get_parent) {
-			i = orphan->ops->get_parent(orphan->hw);
-			if (i >= 0 && i < orphan->num_parents &&
-			    !strcmp(core->name, orphan->parent_names[i]))
-				clk_core_reparent(orphan, core);
-			continue;
-		}
+		struct clk_core *parent = __clk_init_parent(orphan);
 
-		for (i = 0; i < orphan->num_parents; i++)
-			if (!strcmp(core->name, orphan->parent_names[i])) {
-				clk_core_reparent(orphan, core);
-				break;
-			}
-	 }
+		if (parent)
+			clk_core_reparent(orphan, parent);
+	}
 
 	/*
 	 * optional platform-specific magic
@@ -2585,21 +2498,31 @@
 		}
 	}
 
+	/* avoid unnecessary string look-ups of clk_core's possible parents. */
+	core->parents = kcalloc(core->num_parents, sizeof(*core->parents),
+				GFP_KERNEL);
+	if (!core->parents) {
+		ret = -ENOMEM;
+		goto fail_parents;
+	};
+
 	INIT_HLIST_HEAD(&core->clks);
 
 	hw->clk = __clk_create_clk(hw, NULL, NULL);
 	if (IS_ERR(hw->clk)) {
 		ret = PTR_ERR(hw->clk);
-		goto fail_parent_names_copy;
+		goto fail_parents;
 	}
 
-	ret = __clk_init(dev, hw->clk);
+	ret = __clk_core_init(core);
 	if (!ret)
 		return hw->clk;
 
 	__clk_free_clk(hw->clk);
 	hw->clk = NULL;
 
+fail_parents:
+	kfree(core->parents);
 fail_parent_names_copy:
 	while (--i >= 0)
 		kfree_const(core->parent_names[i]);
@@ -2683,7 +2606,7 @@
 	if (clk->core->ops == &clk_nodrv_ops) {
 		pr_err("%s: unregistered clock: %s\n", __func__,
 		       clk->core->name);
-		return;
+		goto unlock;
 	}
 	/*
 	 * Assign empty clock ops for consumers that might still hold
@@ -2709,7 +2632,7 @@
 		pr_warn("%s: unregistering prepared clock: %s\n",
 					__func__, clk->core->name);
 	kref_put(&clk->core->ref, __clk_release);
-
+unlock:
 	clk_prepare_unlock();
 }
 EXPORT_SYMBOL_GPL(clk_unregister);
@@ -3061,10 +2984,23 @@
 {
 	return __of_clk_get_from_provider(clkspec, NULL, __func__);
 }
+EXPORT_SYMBOL_GPL(of_clk_get_from_provider);
 
-int of_clk_get_parent_count(struct device_node *np)
+/**
+ * of_clk_get_parent_count() - Count the number of clocks a device node has
+ * @np: device node to count
+ *
+ * Returns: The number of clocks that are possible parents of this node
+ */
+unsigned int of_clk_get_parent_count(struct device_node *np)
 {
-	return of_count_phandle_with_args(np, "clocks", "#clock-cells");
+	int count;
+
+	count = of_count_phandle_with_args(np, "clocks", "#clock-cells");
+	if (count < 0)
+		return 0;
+
+	return count;
 }
 EXPORT_SYMBOL_GPL(of_clk_get_parent_count);
 
@@ -3214,6 +3150,9 @@
 	for_each_matching_node_and_match(np, matches, &match) {
 		struct clock_provider *parent;
 
+		if (!of_device_is_available(np))
+			continue;
+
 		parent = kzalloc(sizeof(*parent), GFP_KERNEL);
 		if (!parent) {
 			list_for_each_entry_safe(clk_provider, next,

diff --git a/drivers/clk/h8300/clk-div.c b/drivers/clk/h8300/clk-div.c
index d71d011..4bf44a2 100644
--- a/drivers/clk/h8300/clk-div.c
+++ b/drivers/clk/h8300/clk-div.c

@@ -13,7 +13,7 @@
 
 static void __init h8300_div_clk_setup(struct device_node *node)
 {
-	int num_parents;
+	unsigned int num_parents;
 	struct clk *clk;
 	const char *clk_name = node->name;
 	const char *parent_name;
@@ -22,7 +22,7 @@
 	int offset;
 
 	num_parents = of_clk_get_parent_count(node);
-	if (num_parents < 1) {
+	if (!num_parents) {
 		pr_err("%s: no parent found", clk_name);
 		return;
 	}
@@ -34,7 +34,7 @@
 	}
 	offset = (unsigned long)divcr & 3;
 	offset = (3 - offset) * 8;
-	divcr = (void *)((unsigned long)divcr & ~3);
+	divcr = (void __iomem *)((unsigned long)divcr & ~3);
 
 	parent_name = of_clk_get_parent_name(node, 0);
 	of_property_read_u32(node, "renesas,width", &width);

diff --git a/drivers/clk/h8300/clk-h8s2678.c b/drivers/clk/h8300/clk-h8s2678.c
index 6cf38dc..c9c2fd5 100644
--- a/drivers/clk/h8300/clk-h8s2678.c
+++ b/drivers/clk/h8300/clk-h8s2678.c

@@ -83,7 +83,7 @@
 
 static void __init h8s2678_pll_clk_setup(struct device_node *node)
 {
-	int num_parents;
+	unsigned int num_parents;
 	struct clk *clk;
 	const char *clk_name = node->name;
 	const char *parent_name;
@@ -91,7 +91,7 @@
 	struct clk_init_data init;
 
 	num_parents = of_clk_get_parent_count(node);
-	if (num_parents < 1) {
+	if (!num_parents) {
 		pr_err("%s: no parent found", clk_name);
 		return;
 	}

diff --git a/drivers/clk/hisilicon/clk-hi3620.c b/drivers/clk/hisilicon/clk-hi3620.c
index 7d03fe1..d04a104 100644
--- a/drivers/clk/hisilicon/clk-hi3620.c
+++ b/drivers/clk/hisilicon/clk-hi3620.c

@@ -78,15 +78,15 @@
 
 /* fixed rate clocks */
 static struct hisi_fixed_rate_clock hi3620_fixed_rate_clks[] __initdata = {
-	{ HI3620_OSC32K,   "osc32k",   NULL, CLK_IS_ROOT, 32768, },
-	{ HI3620_OSC26M,   "osc26m",   NULL, CLK_IS_ROOT, 26000000, },
-	{ HI3620_PCLK,     "pclk",     NULL, CLK_IS_ROOT, 26000000, },
-	{ HI3620_PLL_ARM0, "armpll0",  NULL, CLK_IS_ROOT, 1600000000, },
-	{ HI3620_PLL_ARM1, "armpll1",  NULL, CLK_IS_ROOT, 1600000000, },
-	{ HI3620_PLL_PERI, "armpll2",  NULL, CLK_IS_ROOT, 1440000000, },
-	{ HI3620_PLL_USB,  "armpll3",  NULL, CLK_IS_ROOT, 1440000000, },
-	{ HI3620_PLL_HDMI, "armpll4",  NULL, CLK_IS_ROOT, 1188000000, },
-	{ HI3620_PLL_GPU,  "armpll5",  NULL, CLK_IS_ROOT, 1300000000, },
+	{ HI3620_OSC32K,   "osc32k",   NULL, 0, 32768, },
+	{ HI3620_OSC26M,   "osc26m",   NULL, 0, 26000000, },
+	{ HI3620_PCLK,     "pclk",     NULL, 0, 26000000, },
+	{ HI3620_PLL_ARM0, "armpll0",  NULL, 0, 1600000000, },
+	{ HI3620_PLL_ARM1, "armpll1",  NULL, 0, 1600000000, },
+	{ HI3620_PLL_PERI, "armpll2",  NULL, 0, 1440000000, },
+	{ HI3620_PLL_USB,  "armpll3",  NULL, 0, 1440000000, },
+	{ HI3620_PLL_HDMI, "armpll4",  NULL, 0, 1188000000, },
+	{ HI3620_PLL_GPU,  "armpll5",  NULL, 0, 1300000000, },
 };
 
 /* fixed factor clocks */

diff --git a/drivers/clk/hisilicon/clk-hi6220-stub.c b/drivers/clk/hisilicon/clk-hi6220-stub.c
index 8afb40e..329a092 100644
--- a/drivers/clk/hisilicon/clk-hi6220-stub.c
+++ b/drivers/clk/hisilicon/clk-hi6220-stub.c

@@ -235,7 +235,7 @@
 	init.name = "acpu0";
 	init.ops = &hi6220_stub_clk_ops;
 	init.num_parents = 0;
-	init.flags = CLK_IS_ROOT;
+	init.flags = 0;
 
 	clk = devm_clk_register(dev, &stub_clk->hw);
 	if (IS_ERR(clk))

diff --git a/drivers/clk/hisilicon/clk-hi6220.c b/drivers/clk/hisilicon/clk-hi6220.c
index 4563343..f02cb41 100644
--- a/drivers/clk/hisilicon/clk-hi6220.c
+++ b/drivers/clk/hisilicon/clk-hi6220.c

@@ -26,19 +26,19 @@
 
 /* clocks in AO (always on) controller */
 static struct hisi_fixed_rate_clock hi6220_fixed_rate_clks[] __initdata = {
-	{ HI6220_REF32K,	"ref32k",	NULL, CLK_IS_ROOT, 32764,     },
-	{ HI6220_CLK_TCXO,	"clk_tcxo",	NULL, CLK_IS_ROOT, 19200000,  },
-	{ HI6220_MMC1_PAD,	"mmc1_pad",	NULL, CLK_IS_ROOT, 100000000, },
-	{ HI6220_MMC2_PAD,	"mmc2_pad",	NULL, CLK_IS_ROOT, 100000000, },
-	{ HI6220_MMC0_PAD,	"mmc0_pad",	NULL, CLK_IS_ROOT, 200000000, },
-	{ HI6220_PLL_BBP,	"bbppll0",	NULL, CLK_IS_ROOT, 245760000, },
-	{ HI6220_PLL_GPU,	"gpupll",	NULL, CLK_IS_ROOT, 1000000000,},
-	{ HI6220_PLL1_DDR,	"ddrpll1",	NULL, CLK_IS_ROOT, 1066000000,},
-	{ HI6220_PLL_SYS,	"syspll",	NULL, CLK_IS_ROOT, 1200000000,},
-	{ HI6220_PLL_SYS_MEDIA,	"media_syspll",	NULL, CLK_IS_ROOT, 1200000000,},
-	{ HI6220_DDR_SRC,	"ddr_sel_src",  NULL, CLK_IS_ROOT, 1200000000,},
-	{ HI6220_PLL_MEDIA,	"media_pll",    NULL, CLK_IS_ROOT, 1440000000,},
-	{ HI6220_PLL_DDR,	"ddrpll0",      NULL, CLK_IS_ROOT, 1600000000,},
+	{ HI6220_REF32K,	"ref32k",	NULL, 0, 32764,     },
+	{ HI6220_CLK_TCXO,	"clk_tcxo",	NULL, 0, 19200000,  },
+	{ HI6220_MMC1_PAD,	"mmc1_pad",	NULL, 0, 100000000, },
+	{ HI6220_MMC2_PAD,	"mmc2_pad",	NULL, 0, 100000000, },
+	{ HI6220_MMC0_PAD,	"mmc0_pad",	NULL, 0, 200000000, },
+	{ HI6220_PLL_BBP,	"bbppll0",	NULL, 0, 245760000, },
+	{ HI6220_PLL_GPU,	"gpupll",	NULL, 0, 1000000000,},
+	{ HI6220_PLL1_DDR,	"ddrpll1",	NULL, 0, 1066000000,},
+	{ HI6220_PLL_SYS,	"syspll",	NULL, 0, 1200000000,},
+	{ HI6220_PLL_SYS_MEDIA,	"media_syspll",	NULL, 0, 1200000000,},
+	{ HI6220_DDR_SRC,	"ddr_sel_src",  NULL, 0, 1200000000,},
+	{ HI6220_PLL_MEDIA,	"media_pll",    NULL, 0, 1440000000,},
+	{ HI6220_PLL_DDR,	"ddrpll0",      NULL, 0, 1600000000,},
 };
 
 static struct hisi_fixed_factor_clock hi6220_fixed_factor_clks[] __initdata = {

diff --git a/drivers/clk/hisilicon/clk-hip04.c b/drivers/clk/hisilicon/clk-hip04.c
index 8ca9673..b38e03d 100644
--- a/drivers/clk/hisilicon/clk-hip04.c
+++ b/drivers/clk/hisilicon/clk-hip04.c

@@ -36,9 +36,9 @@
 
 /* fixed rate clocks */
 static struct hisi_fixed_rate_clock hip04_fixed_rate_clks[] __initdata = {
-	{ HIP04_OSC50M,   "osc50m",   NULL, CLK_IS_ROOT, 50000000, },
-	{ HIP04_CLK_50M,  "clk50m",   NULL, CLK_IS_ROOT, 50000000, },
-	{ HIP04_CLK_168M, "clk168m",  NULL, CLK_IS_ROOT, 168750000, },
+	{ HIP04_OSC50M,   "osc50m",   NULL, 0, 50000000, },
+	{ HIP04_CLK_50M,  "clk50m",   NULL, 0, 50000000, },
+	{ HIP04_CLK_168M, "clk168m",  NULL, 0, 168750000, },
 };
 
 static void __init hip04_clk_init(struct device_node *np)

diff --git a/drivers/clk/hisilicon/clk-hix5hd2.c b/drivers/clk/hisilicon/clk-hix5hd2.c
index 0aaf29d..14b05ef 100644
--- a/drivers/clk/hisilicon/clk-hix5hd2.c
+++ b/drivers/clk/hisilicon/clk-hix5hd2.c

@@ -14,36 +14,36 @@
 #include "clk.h"
 
 static struct hisi_fixed_rate_clock hix5hd2_fixed_rate_clks[] __initdata = {
-	{ HIX5HD2_FIXED_1200M, "1200m", NULL, CLK_IS_ROOT, 1200000000, },
-	{ HIX5HD2_FIXED_400M, "400m", NULL, CLK_IS_ROOT, 400000000, },
-	{ HIX5HD2_FIXED_48M, "48m", NULL, CLK_IS_ROOT, 48000000, },
-	{ HIX5HD2_FIXED_24M, "24m", NULL, CLK_IS_ROOT, 24000000, },
-	{ HIX5HD2_FIXED_600M, "600m", NULL, CLK_IS_ROOT, 600000000, },
-	{ HIX5HD2_FIXED_300M, "300m", NULL, CLK_IS_ROOT, 300000000, },
-	{ HIX5HD2_FIXED_75M, "75m", NULL, CLK_IS_ROOT, 75000000, },
-	{ HIX5HD2_FIXED_200M, "200m", NULL, CLK_IS_ROOT, 200000000, },
-	{ HIX5HD2_FIXED_100M, "100m", NULL, CLK_IS_ROOT, 100000000, },
-	{ HIX5HD2_FIXED_40M, "40m", NULL, CLK_IS_ROOT, 40000000, },
-	{ HIX5HD2_FIXED_150M, "150m", NULL, CLK_IS_ROOT, 150000000, },
-	{ HIX5HD2_FIXED_1728M, "1728m", NULL, CLK_IS_ROOT, 1728000000, },
-	{ HIX5HD2_FIXED_28P8M, "28p8m", NULL, CLK_IS_ROOT, 28000000, },
-	{ HIX5HD2_FIXED_432M, "432m", NULL, CLK_IS_ROOT, 432000000, },
-	{ HIX5HD2_FIXED_345P6M, "345p6m", NULL, CLK_IS_ROOT, 345000000, },
-	{ HIX5HD2_FIXED_288M, "288m", NULL, CLK_IS_ROOT, 288000000, },
-	{ HIX5HD2_FIXED_60M,	"60m", NULL, CLK_IS_ROOT, 60000000, },
-	{ HIX5HD2_FIXED_750M, "750m", NULL, CLK_IS_ROOT, 750000000, },
-	{ HIX5HD2_FIXED_500M, "500m", NULL, CLK_IS_ROOT, 500000000, },
-	{ HIX5HD2_FIXED_54M,	"54m", NULL, CLK_IS_ROOT, 54000000, },
-	{ HIX5HD2_FIXED_27M, "27m", NULL, CLK_IS_ROOT, 27000000, },
-	{ HIX5HD2_FIXED_1500M, "1500m", NULL, CLK_IS_ROOT, 1500000000, },
-	{ HIX5HD2_FIXED_375M, "375m", NULL, CLK_IS_ROOT, 375000000, },
-	{ HIX5HD2_FIXED_187M, "187m", NULL, CLK_IS_ROOT, 187000000, },
-	{ HIX5HD2_FIXED_250M, "250m", NULL, CLK_IS_ROOT, 250000000, },
-	{ HIX5HD2_FIXED_125M, "125m", NULL, CLK_IS_ROOT, 125000000, },
-	{ HIX5HD2_FIXED_2P02M, "2m", NULL, CLK_IS_ROOT, 2000000, },
-	{ HIX5HD2_FIXED_50M, "50m", NULL, CLK_IS_ROOT, 50000000, },
-	{ HIX5HD2_FIXED_25M, "25m", NULL, CLK_IS_ROOT, 25000000, },
-	{ HIX5HD2_FIXED_83M, "83m", NULL, CLK_IS_ROOT, 83333333, },
+	{ HIX5HD2_FIXED_1200M, "1200m", NULL, 0, 1200000000, },
+	{ HIX5HD2_FIXED_400M, "400m", NULL, 0, 400000000, },
+	{ HIX5HD2_FIXED_48M, "48m", NULL, 0, 48000000, },
+	{ HIX5HD2_FIXED_24M, "24m", NULL, 0, 24000000, },
+	{ HIX5HD2_FIXED_600M, "600m", NULL, 0, 600000000, },
+	{ HIX5HD2_FIXED_300M, "300m", NULL, 0, 300000000, },
+	{ HIX5HD2_FIXED_75M, "75m", NULL, 0, 75000000, },
+	{ HIX5HD2_FIXED_200M, "200m", NULL, 0, 200000000, },
+	{ HIX5HD2_FIXED_100M, "100m", NULL, 0, 100000000, },
+	{ HIX5HD2_FIXED_40M, "40m", NULL, 0, 40000000, },
+	{ HIX5HD2_FIXED_150M, "150m", NULL, 0, 150000000, },
+	{ HIX5HD2_FIXED_1728M, "1728m", NULL, 0, 1728000000, },
+	{ HIX5HD2_FIXED_28P8M, "28p8m", NULL, 0, 28000000, },
+	{ HIX5HD2_FIXED_432M, "432m", NULL, 0, 432000000, },
+	{ HIX5HD2_FIXED_345P6M, "345p6m", NULL, 0, 345000000, },
+	{ HIX5HD2_FIXED_288M, "288m", NULL, 0, 288000000, },
+	{ HIX5HD2_FIXED_60M,	"60m", NULL, 0, 60000000, },
+	{ HIX5HD2_FIXED_750M, "750m", NULL, 0, 750000000, },
+	{ HIX5HD2_FIXED_500M, "500m", NULL, 0, 500000000, },
+	{ HIX5HD2_FIXED_54M,	"54m", NULL, 0, 54000000, },
+	{ HIX5HD2_FIXED_27M, "27m", NULL, 0, 27000000, },
+	{ HIX5HD2_FIXED_1500M, "1500m", NULL, 0, 1500000000, },
+	{ HIX5HD2_FIXED_375M, "375m", NULL, 0, 375000000, },
+	{ HIX5HD2_FIXED_187M, "187m", NULL, 0, 187000000, },
+	{ HIX5HD2_FIXED_250M, "250m", NULL, 0, 250000000, },
+	{ HIX5HD2_FIXED_125M, "125m", NULL, 0, 125000000, },
+	{ HIX5HD2_FIXED_2P02M, "2m", NULL, 0, 2000000, },
+	{ HIX5HD2_FIXED_50M, "50m", NULL, 0, 50000000, },
+	{ HIX5HD2_FIXED_25M, "25m", NULL, 0, 25000000, },
+	{ HIX5HD2_FIXED_83M, "83m", NULL, 0, 83333333, },
 };
 
 static const char *const sfc_mux_p[] __initconst = {

diff --git a/drivers/clk/imx/clk-imx6q.c b/drivers/clk/imx/clk-imx6q.c
index f0efc6f..02e1818 100644
--- a/drivers/clk/imx/clk-imx6q.c
+++ b/drivers/clk/imx/clk-imx6q.c

@@ -34,7 +34,9 @@
 static const char *axi_sels[]		= { "periph", "pll2_pfd2_396m", "periph", "pll3_pfd1_540m", };
 static const char *audio_sels[]	= { "pll4_audio_div", "pll3_pfd2_508m", "pll3_pfd3_454m", "pll3_usb_otg", };
 static const char *gpu_axi_sels[]	= { "axi", "ahb", };
+static const char *pre_axi_sels[]	= { "axi", "ahb", };
 static const char *gpu2d_core_sels[]	= { "axi", "pll3_usb_otg", "pll2_pfd0_352m", "pll2_pfd2_396m", };
+static const char *gpu2d_core_sels_2[]	= { "mmdc_ch0_axi", "pll3_usb_otg", "pll2_pfd1_594m", "pll3_pfd0_720m",};
 static const char *gpu3d_core_sels[]	= { "mmdc_ch0_axi", "pll3_usb_otg", "pll2_pfd1_594m", "pll2_pfd2_396m", };
 static const char *gpu3d_shader_sels[] = { "mmdc_ch0_axi", "pll3_usb_otg", "pll2_pfd1_594m", "pll3_pfd0_720m", };
 static const char *ipu_sels[]		= { "mmdc_ch0_axi", "pll2_pfd2_396m", "pll3_120m", "pll3_pfd1_540m", };
@@ -44,15 +46,24 @@
 static const char *ipu1_di1_sels[]	= { "ipu1_di1_pre", "dummy", "dummy", "ldb_di0", "ldb_di1", };
 static const char *ipu2_di0_sels[]	= { "ipu2_di0_pre", "dummy", "dummy", "ldb_di0", "ldb_di1", };
 static const char *ipu2_di1_sels[]	= { "ipu2_di1_pre", "dummy", "dummy", "ldb_di0", "ldb_di1", };
+static const char *ipu1_di0_sels_2[]	= { "ipu1_di0_pre", "dummy", "dummy", "ldb_di0_podf", "ldb_di1_podf", };
+static const char *ipu1_di1_sels_2[]	= { "ipu1_di1_pre", "dummy", "dummy", "ldb_di0_podf", "ldb_di1_podf", };
+static const char *ipu2_di0_sels_2[]	= { "ipu2_di0_pre", "dummy", "dummy", "ldb_di0_podf", "ldb_di1_podf", };
+static const char *ipu2_di1_sels_2[]	= { "ipu2_di1_pre", "dummy", "dummy", "ldb_di0_podf", "ldb_di1_podf", };
 static const char *hsi_tx_sels[]	= { "pll3_120m", "pll2_pfd2_396m", };
 static const char *pcie_axi_sels[]	= { "axi", "ahb", };
 static const char *ssi_sels[]		= { "pll3_pfd2_508m", "pll3_pfd3_454m", "pll4_audio_div", };
 static const char *usdhc_sels[]	= { "pll2_pfd2_396m", "pll2_pfd0_352m", };
 static const char *enfc_sels[]	= { "pll2_pfd0_352m", "pll2_bus", "pll3_usb_otg", "pll2_pfd2_396m", };
+static const char *enfc_sels_2[] = {"pll2_pfd0_352m", "pll2_bus", "pll3_usb_otg", "pll2_pfd2_396m", "pll3_pfd3_454m", "dummy", };
 static const char *eim_sels[]		= { "pll2_pfd2_396m", "pll3_usb_otg", "axi", "pll2_pfd0_352m", };
 static const char *eim_slow_sels[]      = { "axi", "pll3_usb_otg", "pll2_pfd2_396m", "pll2_pfd0_352m", };
 static const char *vdo_axi_sels[]	= { "axi", "ahb", };
 static const char *vpu_axi_sels[]	= { "axi", "pll2_pfd2_396m", "pll2_pfd0_352m", };
+static const char *uart_sels[] = { "pll3_80m", "osc", };
+static const char *ipg_per_sels[] = { "ipg", "osc", };
+static const char *ecspi_sels[] = { "pll3_60m", "osc", };
+static const char *can_sels[] = { "pll3_60m", "osc", "pll3_80m", };
 static const char *cko1_sels[]	= { "pll3_usb_otg", "pll2_bus", "pll1_sys", "pll5_video_div",
 				    "dummy", "axi", "enfc", "ipu1_di0", "ipu1_di1", "ipu2_di0",
 				    "ipu2_di1", "ahb", "ipg", "ipg_per", "ckil", "pll4_audio_div", };
@@ -121,12 +132,19 @@
 static unsigned int share_count_ssi3;
 static unsigned int share_count_mipi_core_cfg;
 static unsigned int share_count_spdif;
+static unsigned int share_count_prg0;
+static unsigned int share_count_prg1;
 
 static inline int clk_on_imx6q(void)
 {
 	return of_machine_is_compatible("fsl,imx6q");
 }
 
+static inline int clk_on_imx6qp(void)
+{
+	return of_machine_is_compatible("fsl,imx6qp");
+}
+
 static inline int clk_on_imx6dl(void)
 {
 	return of_machine_is_compatible("fsl,imx6dl");
@@ -265,7 +283,7 @@
 	clk[IMX6QDL_CLK_TWD]       = imx_clk_fixed_factor("twd",       "arm",            1, 2);
 	clk[IMX6QDL_CLK_GPT_3M]    = imx_clk_fixed_factor("gpt_3m",    "osc",            1, 8);
 	clk[IMX6QDL_CLK_VIDEO_27M] = imx_clk_fixed_factor("video_27m", "pll3_pfd1_540m", 1, 20);
-	if (clk_on_imx6dl()) {
+	if (clk_on_imx6dl() || clk_on_imx6qp()) {
 		clk[IMX6QDL_CLK_GPU2D_AXI] = imx_clk_fixed_factor("gpu2d_axi", "mmdc_ch0_axi_podf", 1, 1);
 		clk[IMX6QDL_CLK_GPU3D_AXI] = imx_clk_fixed_factor("gpu3d_axi", "mmdc_ch0_axi_podf", 1, 1);
 	}
@@ -294,7 +312,15 @@
 		clk[IMX6QDL_CLK_GPU2D_AXI]        = imx_clk_mux("gpu2d_axi",        base + 0x18, 0,  1, gpu_axi_sels,      ARRAY_SIZE(gpu_axi_sels));
 		clk[IMX6QDL_CLK_GPU3D_AXI]        = imx_clk_mux("gpu3d_axi",        base + 0x18, 1,  1, gpu_axi_sels,      ARRAY_SIZE(gpu_axi_sels));
 	}
-	clk[IMX6QDL_CLK_GPU2D_CORE_SEL]   = imx_clk_mux("gpu2d_core_sel",   base + 0x18, 16, 2, gpu2d_core_sels,   ARRAY_SIZE(gpu2d_core_sels));
+	if (clk_on_imx6qp()) {
+		clk[IMX6QDL_CLK_CAN_SEL]   = imx_clk_mux("can_sel",	base + 0x20, 8,  2, can_sels, ARRAY_SIZE(can_sels));
+		clk[IMX6QDL_CLK_ECSPI_SEL] = imx_clk_mux("ecspi_sel",	base + 0x38, 18, 1, ecspi_sels,  ARRAY_SIZE(ecspi_sels));
+		clk[IMX6QDL_CLK_IPG_PER_SEL] = imx_clk_mux("ipg_per_sel", base + 0x1c, 6, 1, ipg_per_sels, ARRAY_SIZE(ipg_per_sels));
+		clk[IMX6QDL_CLK_UART_SEL] = imx_clk_mux("uart_sel", base + 0x24, 6, 1, uart_sels, ARRAY_SIZE(uart_sels));
+		clk[IMX6QDL_CLK_GPU2D_CORE_SEL] = imx_clk_mux("gpu2d_core_sel", base + 0x18, 16, 2, gpu2d_core_sels_2, ARRAY_SIZE(gpu2d_core_sels_2));
+	} else {
+		clk[IMX6QDL_CLK_GPU2D_CORE_SEL] = imx_clk_mux("gpu2d_core_sel",   base + 0x18, 16, 2, gpu2d_core_sels,   ARRAY_SIZE(gpu2d_core_sels));
+	}
 	clk[IMX6QDL_CLK_GPU3D_CORE_SEL]   = imx_clk_mux("gpu3d_core_sel",   base + 0x18, 4,  2, gpu3d_core_sels,   ARRAY_SIZE(gpu3d_core_sels));
 	clk[IMX6QDL_CLK_GPU3D_SHADER_SEL] = imx_clk_mux("gpu3d_shader_sel", base + 0x18, 8,  2, gpu3d_shader_sels, ARRAY_SIZE(gpu3d_shader_sels));
 	clk[IMX6QDL_CLK_IPU1_SEL]         = imx_clk_mux("ipu1_sel",         base + 0x3c, 9,  2, ipu_sels,          ARRAY_SIZE(ipu_sels));
@@ -305,22 +331,40 @@
 	clk[IMX6QDL_CLK_IPU1_DI1_PRE_SEL] = imx_clk_mux_flags("ipu1_di1_pre_sel", base + 0x34, 15, 3, ipu_di_pre_sels,   ARRAY_SIZE(ipu_di_pre_sels), CLK_SET_RATE_PARENT);
 	clk[IMX6QDL_CLK_IPU2_DI0_PRE_SEL] = imx_clk_mux_flags("ipu2_di0_pre_sel", base + 0x38, 6,  3, ipu_di_pre_sels,   ARRAY_SIZE(ipu_di_pre_sels), CLK_SET_RATE_PARENT);
 	clk[IMX6QDL_CLK_IPU2_DI1_PRE_SEL] = imx_clk_mux_flags("ipu2_di1_pre_sel", base + 0x38, 15, 3, ipu_di_pre_sels,   ARRAY_SIZE(ipu_di_pre_sels), CLK_SET_RATE_PARENT);
-	clk[IMX6QDL_CLK_IPU1_DI0_SEL]     = imx_clk_mux_flags("ipu1_di0_sel",     base + 0x34, 0,  3, ipu1_di0_sels,     ARRAY_SIZE(ipu1_di0_sels), CLK_SET_RATE_PARENT);
-	clk[IMX6QDL_CLK_IPU1_DI1_SEL]     = imx_clk_mux_flags("ipu1_di1_sel",     base + 0x34, 9,  3, ipu1_di1_sels,     ARRAY_SIZE(ipu1_di1_sels), CLK_SET_RATE_PARENT);
-	clk[IMX6QDL_CLK_IPU2_DI0_SEL]     = imx_clk_mux_flags("ipu2_di0_sel",     base + 0x38, 0,  3, ipu2_di0_sels,     ARRAY_SIZE(ipu2_di0_sels), CLK_SET_RATE_PARENT);
-	clk[IMX6QDL_CLK_IPU2_DI1_SEL]     = imx_clk_mux_flags("ipu2_di1_sel",     base + 0x38, 9,  3, ipu2_di1_sels,     ARRAY_SIZE(ipu2_di1_sels), CLK_SET_RATE_PARENT);
 	clk[IMX6QDL_CLK_HSI_TX_SEL]       = imx_clk_mux("hsi_tx_sel",       base + 0x30, 28, 1, hsi_tx_sels,       ARRAY_SIZE(hsi_tx_sels));
 	clk[IMX6QDL_CLK_PCIE_AXI_SEL]     = imx_clk_mux("pcie_axi_sel",     base + 0x18, 10, 1, pcie_axi_sels,     ARRAY_SIZE(pcie_axi_sels));
-	clk[IMX6QDL_CLK_SSI1_SEL]         = imx_clk_fixup_mux("ssi1_sel",   base + 0x1c, 10, 2, ssi_sels,          ARRAY_SIZE(ssi_sels), imx_cscmr1_fixup);
-	clk[IMX6QDL_CLK_SSI2_SEL]         = imx_clk_fixup_mux("ssi2_sel",   base + 0x1c, 12, 2, ssi_sels,          ARRAY_SIZE(ssi_sels), imx_cscmr1_fixup);
-	clk[IMX6QDL_CLK_SSI3_SEL]         = imx_clk_fixup_mux("ssi3_sel",   base + 0x1c, 14, 2, ssi_sels,          ARRAY_SIZE(ssi_sels), imx_cscmr1_fixup);
-	clk[IMX6QDL_CLK_USDHC1_SEL]       = imx_clk_fixup_mux("usdhc1_sel", base + 0x1c, 16, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels), imx_cscmr1_fixup);
-	clk[IMX6QDL_CLK_USDHC2_SEL]       = imx_clk_fixup_mux("usdhc2_sel", base + 0x1c, 17, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels), imx_cscmr1_fixup);
-	clk[IMX6QDL_CLK_USDHC3_SEL]       = imx_clk_fixup_mux("usdhc3_sel", base + 0x1c, 18, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels), imx_cscmr1_fixup);
-	clk[IMX6QDL_CLK_USDHC4_SEL]       = imx_clk_fixup_mux("usdhc4_sel", base + 0x1c, 19, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels), imx_cscmr1_fixup);
-	clk[IMX6QDL_CLK_ENFC_SEL]         = imx_clk_mux("enfc_sel",         base + 0x2c, 16, 2, enfc_sels,         ARRAY_SIZE(enfc_sels));
-	clk[IMX6QDL_CLK_EIM_SEL]          = imx_clk_fixup_mux("eim_sel",      base + 0x1c, 27, 2, eim_sels,        ARRAY_SIZE(eim_sels), imx_cscmr1_fixup);
-	clk[IMX6QDL_CLK_EIM_SLOW_SEL]     = imx_clk_fixup_mux("eim_slow_sel", base + 0x1c, 29, 2, eim_slow_sels,   ARRAY_SIZE(eim_slow_sels), imx_cscmr1_fixup);
+	if (clk_on_imx6qp()) {
+		clk[IMX6QDL_CLK_IPU1_DI0_SEL]     = imx_clk_mux_flags("ipu1_di0_sel",     base + 0x34, 0,  3, ipu1_di0_sels_2,     ARRAY_SIZE(ipu1_di0_sels_2), CLK_SET_RATE_PARENT);
+		clk[IMX6QDL_CLK_IPU1_DI1_SEL]     = imx_clk_mux_flags("ipu1_di1_sel",     base + 0x34, 9,  3, ipu1_di1_sels_2,     ARRAY_SIZE(ipu1_di1_sels_2), CLK_SET_RATE_PARENT);
+		clk[IMX6QDL_CLK_IPU2_DI0_SEL]     = imx_clk_mux_flags("ipu2_di0_sel",     base + 0x38, 0,  3, ipu2_di0_sels_2,     ARRAY_SIZE(ipu2_di0_sels_2), CLK_SET_RATE_PARENT);
+		clk[IMX6QDL_CLK_IPU2_DI1_SEL]     = imx_clk_mux_flags("ipu2_di1_sel",     base + 0x38, 9,  3, ipu2_di1_sels_2,     ARRAY_SIZE(ipu2_di1_sels_2), CLK_SET_RATE_PARENT);
+		clk[IMX6QDL_CLK_SSI1_SEL]         = imx_clk_mux("ssi1_sel",   base + 0x1c, 10, 2, ssi_sels,          ARRAY_SIZE(ssi_sels));
+		clk[IMX6QDL_CLK_SSI2_SEL]         = imx_clk_mux("ssi2_sel",   base + 0x1c, 12, 2, ssi_sels,          ARRAY_SIZE(ssi_sels));
+		clk[IMX6QDL_CLK_SSI3_SEL]         = imx_clk_mux("ssi3_sel",   base + 0x1c, 14, 2, ssi_sels,          ARRAY_SIZE(ssi_sels));
+		clk[IMX6QDL_CLK_USDHC1_SEL]       = imx_clk_mux("usdhc1_sel", base + 0x1c, 16, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels));
+		clk[IMX6QDL_CLK_USDHC2_SEL]       = imx_clk_mux("usdhc2_sel", base + 0x1c, 17, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels));
+		clk[IMX6QDL_CLK_USDHC3_SEL]       = imx_clk_mux("usdhc3_sel", base + 0x1c, 18, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels));
+		clk[IMX6QDL_CLK_USDHC4_SEL]       = imx_clk_mux("usdhc4_sel", base + 0x1c, 19, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels));
+		clk[IMX6QDL_CLK_ENFC_SEL]         = imx_clk_mux("enfc_sel",         base + 0x2c, 15, 3, enfc_sels_2,         ARRAY_SIZE(enfc_sels_2));
+		clk[IMX6QDL_CLK_EIM_SEL]          = imx_clk_mux("eim_sel",      base + 0x1c, 27, 2, eim_sels,        ARRAY_SIZE(eim_sels));
+		clk[IMX6QDL_CLK_EIM_SLOW_SEL]     = imx_clk_mux("eim_slow_sel", base + 0x1c, 29, 2, eim_slow_sels,   ARRAY_SIZE(eim_slow_sels));
+		clk[IMX6QDL_CLK_PRE_AXI]	  = imx_clk_mux("pre_axi",	base + 0x18, 1,  1, pre_axi_sels,    ARRAY_SIZE(pre_axi_sels));
+	} else {
+		clk[IMX6QDL_CLK_IPU1_DI0_SEL]     = imx_clk_mux_flags("ipu1_di0_sel",     base + 0x34, 0,  3, ipu1_di0_sels,     ARRAY_SIZE(ipu1_di0_sels), CLK_SET_RATE_PARENT);
+		clk[IMX6QDL_CLK_IPU1_DI1_SEL]     = imx_clk_mux_flags("ipu1_di1_sel",     base + 0x34, 9,  3, ipu1_di1_sels,     ARRAY_SIZE(ipu1_di1_sels), CLK_SET_RATE_PARENT);
+		clk[IMX6QDL_CLK_IPU2_DI0_SEL]     = imx_clk_mux_flags("ipu2_di0_sel",     base + 0x38, 0,  3, ipu2_di0_sels,     ARRAY_SIZE(ipu2_di0_sels), CLK_SET_RATE_PARENT);
+		clk[IMX6QDL_CLK_IPU2_DI1_SEL]     = imx_clk_mux_flags("ipu2_di1_sel",     base + 0x38, 9,  3, ipu2_di1_sels,     ARRAY_SIZE(ipu2_di1_sels), CLK_SET_RATE_PARENT);
+		clk[IMX6QDL_CLK_SSI1_SEL]         = imx_clk_fixup_mux("ssi1_sel",   base + 0x1c, 10, 2, ssi_sels,          ARRAY_SIZE(ssi_sels), imx_cscmr1_fixup);
+		clk[IMX6QDL_CLK_SSI2_SEL]         = imx_clk_fixup_mux("ssi2_sel",   base + 0x1c, 12, 2, ssi_sels,          ARRAY_SIZE(ssi_sels), imx_cscmr1_fixup);
+		clk[IMX6QDL_CLK_SSI3_SEL]         = imx_clk_fixup_mux("ssi3_sel",   base + 0x1c, 14, 2, ssi_sels,          ARRAY_SIZE(ssi_sels), imx_cscmr1_fixup);
+		clk[IMX6QDL_CLK_USDHC1_SEL]       = imx_clk_fixup_mux("usdhc1_sel", base + 0x1c, 16, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels), imx_cscmr1_fixup);
+		clk[IMX6QDL_CLK_USDHC2_SEL]       = imx_clk_fixup_mux("usdhc2_sel", base + 0x1c, 17, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels), imx_cscmr1_fixup);
+		clk[IMX6QDL_CLK_USDHC3_SEL]       = imx_clk_fixup_mux("usdhc3_sel", base + 0x1c, 18, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels), imx_cscmr1_fixup);
+		clk[IMX6QDL_CLK_USDHC4_SEL]       = imx_clk_fixup_mux("usdhc4_sel", base + 0x1c, 19, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels), imx_cscmr1_fixup);
+		clk[IMX6QDL_CLK_ENFC_SEL]         = imx_clk_mux("enfc_sel",         base + 0x2c, 16, 2, enfc_sels,         ARRAY_SIZE(enfc_sels));
+		clk[IMX6QDL_CLK_EIM_SEL]          = imx_clk_fixup_mux("eim_sel",      base + 0x1c, 27, 2, eim_sels,        ARRAY_SIZE(eim_sels), imx_cscmr1_fixup);
+		clk[IMX6QDL_CLK_EIM_SLOW_SEL]     = imx_clk_fixup_mux("eim_slow_sel", base + 0x1c, 29, 2, eim_slow_sels,   ARRAY_SIZE(eim_slow_sels), imx_cscmr1_fixup);
+	}
 	clk[IMX6QDL_CLK_VDO_AXI_SEL]      = imx_clk_mux("vdo_axi_sel",      base + 0x18, 11, 1, vdo_axi_sels,      ARRAY_SIZE(vdo_axi_sels));
 	clk[IMX6QDL_CLK_VPU_AXI_SEL]      = imx_clk_mux("vpu_axi_sel",      base + 0x18, 14, 2, vpu_axi_sels,      ARRAY_SIZE(vpu_axi_sels));
 	clk[IMX6QDL_CLK_CKO1_SEL]         = imx_clk_mux("cko1_sel",         base + 0x60, 0,  4, cko1_sels,         ARRAY_SIZE(cko1_sels));
@@ -335,23 +379,33 @@
 	clk[IMX6QDL_CLK_PERIPH_CLK2]      = imx_clk_divider("periph_clk2",      "periph_clk2_sel",   base + 0x14, 27, 3);
 	clk[IMX6QDL_CLK_PERIPH2_CLK2]     = imx_clk_divider("periph2_clk2",     "periph2_clk2_sel",  base + 0x14, 0,  3);
 	clk[IMX6QDL_CLK_IPG]              = imx_clk_divider("ipg",              "ahb",               base + 0x14, 8,  2);
-	clk[IMX6QDL_CLK_IPG_PER]          = imx_clk_fixup_divider("ipg_per",    "ipg",               base + 0x1c, 0,  6, imx_cscmr1_fixup);
 	clk[IMX6QDL_CLK_ESAI_PRED]        = imx_clk_divider("esai_pred",        "esai_sel",          base + 0x28, 9,  3);
 	clk[IMX6QDL_CLK_ESAI_PODF]        = imx_clk_divider("esai_podf",        "esai_pred",         base + 0x28, 25, 3);
 	clk[IMX6QDL_CLK_ASRC_PRED]        = imx_clk_divider("asrc_pred",        "asrc_sel",          base + 0x30, 12, 3);
 	clk[IMX6QDL_CLK_ASRC_PODF]        = imx_clk_divider("asrc_podf",        "asrc_pred",         base + 0x30, 9,  3);
 	clk[IMX6QDL_CLK_SPDIF_PRED]       = imx_clk_divider("spdif_pred",       "spdif_sel",         base + 0x30, 25, 3);
 	clk[IMX6QDL_CLK_SPDIF_PODF]       = imx_clk_divider("spdif_podf",       "spdif_pred",        base + 0x30, 22, 3);
-	clk[IMX6QDL_CLK_CAN_ROOT]         = imx_clk_divider("can_root",         "pll3_60m",          base + 0x20, 2,  6);
-	clk[IMX6QDL_CLK_ECSPI_ROOT]       = imx_clk_divider("ecspi_root",       "pll3_60m",          base + 0x38, 19, 6);
+	if (clk_on_imx6qp()) {
+		clk[IMX6QDL_CLK_IPG_PER] = imx_clk_divider("ipg_per", "ipg_per_sel", base + 0x1c, 0, 6);
+		clk[IMX6QDL_CLK_ECSPI_ROOT] = imx_clk_divider("ecspi_root", "ecspi_sel", base + 0x38, 19, 6);
+		clk[IMX6QDL_CLK_CAN_ROOT] = imx_clk_divider("can_root", "can_sel", base + 0x20, 2, 6);
+		clk[IMX6QDL_CLK_UART_SERIAL_PODF] = imx_clk_divider("uart_serial_podf", "uart_sel", base + 0x24, 0, 6);
+		clk[IMX6QDL_CLK_LDB_DI0_DIV_3_5] = imx_clk_fixed_factor("ldb_di0_div_3_5", "ldb_di0", 2, 7);
+		clk[IMX6QDL_CLK_LDB_DI1_DIV_3_5] = imx_clk_fixed_factor("ldb_di1_div_3_5", "ldb_di1", 2, 7);
+	} else {
+		clk[IMX6QDL_CLK_ECSPI_ROOT] = imx_clk_divider("ecspi_root", "pll3_60m", base + 0x38, 19, 6);
+		clk[IMX6QDL_CLK_CAN_ROOT] = imx_clk_divider("can_root", "pll3_60", base + 0x20, 2, 6);
+		clk[IMX6QDL_CLK_IPG_PER] = imx_clk_fixup_divider("ipg_per", "ipg", base + 0x1c, 0, 6, imx_cscmr1_fixup);
+		clk[IMX6QDL_CLK_UART_SERIAL_PODF] = imx_clk_divider("uart_serial_podf", "pll3_80m",          base + 0x24, 0,  6);
+		clk[IMX6QDL_CLK_LDB_DI0_DIV_3_5] = imx_clk_fixed_factor("ldb_di0_div_3_5", "ldb_di0_sel", 2, 7);
+		clk[IMX6QDL_CLK_LDB_DI1_DIV_3_5] = imx_clk_fixed_factor("ldb_di1_div_3_5", "ldb_di1_sel", 2, 7);
+	}
 	clk[IMX6QDL_CLK_GPU2D_CORE_PODF]  = imx_clk_divider("gpu2d_core_podf",  "gpu2d_core_sel",    base + 0x18, 23, 3);
 	clk[IMX6QDL_CLK_GPU3D_CORE_PODF]  = imx_clk_divider("gpu3d_core_podf",  "gpu3d_core_sel",    base + 0x18, 26, 3);
 	clk[IMX6QDL_CLK_GPU3D_SHADER]     = imx_clk_divider("gpu3d_shader",     "gpu3d_shader_sel",  base + 0x18, 29, 3);
 	clk[IMX6QDL_CLK_IPU1_PODF]        = imx_clk_divider("ipu1_podf",        "ipu1_sel",          base + 0x3c, 11, 3);
 	clk[IMX6QDL_CLK_IPU2_PODF]        = imx_clk_divider("ipu2_podf",        "ipu2_sel",          base + 0x3c, 16, 3);
-	clk[IMX6QDL_CLK_LDB_DI0_DIV_3_5]  = imx_clk_fixed_factor("ldb_di0_div_3_5", "ldb_di0_sel", 2, 7);
 	clk[IMX6QDL_CLK_LDB_DI0_PODF]     = imx_clk_divider_flags("ldb_di0_podf", "ldb_di0_div_3_5", base + 0x20, 10, 1, 0);
-	clk[IMX6QDL_CLK_LDB_DI1_DIV_3_5]  = imx_clk_fixed_factor("ldb_di1_div_3_5", "ldb_di1_sel", 2, 7);
 	clk[IMX6QDL_CLK_LDB_DI1_PODF]     = imx_clk_divider_flags("ldb_di1_podf", "ldb_di1_div_3_5", base + 0x20, 11, 1, 0);
 	clk[IMX6QDL_CLK_IPU1_DI0_PRE]     = imx_clk_divider("ipu1_di0_pre",     "ipu1_di0_pre_sel",  base + 0x34, 3,  3);
 	clk[IMX6QDL_CLK_IPU1_DI1_PRE]     = imx_clk_divider("ipu1_di1_pre",     "ipu1_di1_pre_sel",  base + 0x34, 12, 3);
@@ -364,15 +418,19 @@
 	clk[IMX6QDL_CLK_SSI2_PODF]        = imx_clk_divider("ssi2_podf",        "ssi2_pred",         base + 0x2c, 0,  6);
 	clk[IMX6QDL_CLK_SSI3_PRED]        = imx_clk_divider("ssi3_pred",        "ssi3_sel",          base + 0x28, 22, 3);
 	clk[IMX6QDL_CLK_SSI3_PODF]        = imx_clk_divider("ssi3_podf",        "ssi3_pred",         base + 0x28, 16, 6);
-	clk[IMX6QDL_CLK_UART_SERIAL_PODF] = imx_clk_divider("uart_serial_podf", "pll3_80m",          base + 0x24, 0,  6);
 	clk[IMX6QDL_CLK_USDHC1_PODF]      = imx_clk_divider("usdhc1_podf",      "usdhc1_sel",        base + 0x24, 11, 3);
 	clk[IMX6QDL_CLK_USDHC2_PODF]      = imx_clk_divider("usdhc2_podf",      "usdhc2_sel",        base + 0x24, 16, 3);
 	clk[IMX6QDL_CLK_USDHC3_PODF]      = imx_clk_divider("usdhc3_podf",      "usdhc3_sel",        base + 0x24, 19, 3);
 	clk[IMX6QDL_CLK_USDHC4_PODF]      = imx_clk_divider("usdhc4_podf",      "usdhc4_sel",        base + 0x24, 22, 3);
 	clk[IMX6QDL_CLK_ENFC_PRED]        = imx_clk_divider("enfc_pred",        "enfc_sel",          base + 0x2c, 18, 3);
 	clk[IMX6QDL_CLK_ENFC_PODF]        = imx_clk_divider("enfc_podf",        "enfc_pred",         base + 0x2c, 21, 6);
-	clk[IMX6QDL_CLK_EIM_PODF]         = imx_clk_fixup_divider("eim_podf",   "eim_sel",           base + 0x1c, 20, 3, imx_cscmr1_fixup);
-	clk[IMX6QDL_CLK_EIM_SLOW_PODF]    = imx_clk_fixup_divider("eim_slow_podf", "eim_slow_sel",   base + 0x1c, 23, 3, imx_cscmr1_fixup);
+	if (clk_on_imx6qp()) {
+		clk[IMX6QDL_CLK_EIM_PODF]         = imx_clk_divider("eim_podf",   "eim_sel",           base + 0x1c, 20, 3);
+		clk[IMX6QDL_CLK_EIM_SLOW_PODF]    = imx_clk_divider("eim_slow_podf", "eim_slow_sel",   base + 0x1c, 23, 3);
+	} else {
+		clk[IMX6QDL_CLK_EIM_PODF]         = imx_clk_fixup_divider("eim_podf",   "eim_sel",           base + 0x1c, 20, 3, imx_cscmr1_fixup);
+		clk[IMX6QDL_CLK_EIM_SLOW_PODF]    = imx_clk_fixup_divider("eim_slow_podf", "eim_slow_sel",   base + 0x1c, 23, 3, imx_cscmr1_fixup);
+	}
 	clk[IMX6QDL_CLK_VPU_AXI_PODF]     = imx_clk_divider("vpu_axi_podf",     "vpu_axi_sel",       base + 0x24, 25, 3);
 	clk[IMX6QDL_CLK_CKO1_PODF]        = imx_clk_divider("cko1_podf",        "cko1_sel",          base + 0x60, 4,  3);
 	clk[IMX6QDL_CLK_CKO2_PODF]        = imx_clk_divider("cko2_podf",        "cko2_sel",          base + 0x60, 21, 3);
@@ -380,7 +438,12 @@
 	/*                                                        name                 parent_name    reg        shift width busy: reg, shift */
 	clk[IMX6QDL_CLK_AXI]               = imx_clk_busy_divider("axi",               "axi_sel",     base + 0x14, 16,  3,   base + 0x48, 0);
 	clk[IMX6QDL_CLK_MMDC_CH0_AXI_PODF] = imx_clk_busy_divider("mmdc_ch0_axi_podf", "periph",      base + 0x14, 19,  3,   base + 0x48, 4);
-	clk[IMX6QDL_CLK_MMDC_CH1_AXI_PODF] = imx_clk_busy_divider("mmdc_ch1_axi_podf", "periph2",     base + 0x14, 3,   3,   base + 0x48, 2);
+	if (clk_on_imx6qp()) {
+		clk[IMX6QDL_CLK_MMDC_CH1_AXI_CG] = imx_clk_gate("mmdc_ch1_axi_cg", "periph2", base + 0x4, 18);
+		clk[IMX6QDL_CLK_MMDC_CH1_AXI_PODF] = imx_clk_busy_divider("mmdc_ch1_axi_podf", "mmdc_ch1_axi_cg", base + 0x14, 3, 3, base + 0x48, 2);
+	} else {
+		clk[IMX6QDL_CLK_MMDC_CH1_AXI_PODF] = imx_clk_busy_divider("mmdc_ch1_axi_podf", "periph2",     base + 0x14, 3,   3,   base + 0x48, 2);
+	}
 	clk[IMX6QDL_CLK_ARM]               = imx_clk_busy_divider("arm",               "pll1_sw",     base + 0x10, 0,   3,   base + 0x48, 16);
 	clk[IMX6QDL_CLK_AHB]               = imx_clk_busy_divider("ahb",               "periph",      base + 0x14, 10,  3,   base + 0x48, 1);
 
@@ -432,8 +495,13 @@
 	clk[IMX6QDL_CLK_IPU1_DI1]     = imx_clk_gate2("ipu1_di1",      "ipu1_di1_sel",      base + 0x74, 4);
 	clk[IMX6QDL_CLK_IPU2]         = imx_clk_gate2("ipu2",          "ipu2_podf",         base + 0x74, 6);
 	clk[IMX6QDL_CLK_IPU2_DI0]     = imx_clk_gate2("ipu2_di0",      "ipu2_di0_sel",      base + 0x74, 8);
-	clk[IMX6QDL_CLK_LDB_DI0]      = imx_clk_gate2("ldb_di0",       "ldb_di0_podf",      base + 0x74, 12);
-	clk[IMX6QDL_CLK_LDB_DI1]      = imx_clk_gate2("ldb_di1",       "ldb_di1_podf",      base + 0x74, 14);
+	if (clk_on_imx6qp()) {
+		clk[IMX6QDL_CLK_LDB_DI0]      = imx_clk_gate2("ldb_di0",       "ldb_di0_sel",      base + 0x74, 12);
+		clk[IMX6QDL_CLK_LDB_DI1]      = imx_clk_gate2("ldb_di1",       "ldb_di1_sel",      base + 0x74, 14);
+	} else {
+		clk[IMX6QDL_CLK_LDB_DI0]      = imx_clk_gate2("ldb_di0",       "ldb_di0_podf",      base + 0x74, 12);
+		clk[IMX6QDL_CLK_LDB_DI1]      = imx_clk_gate2("ldb_di1",       "ldb_di1_podf",      base + 0x74, 14);
+	}
 	clk[IMX6QDL_CLK_IPU2_DI1]     = imx_clk_gate2("ipu2_di1",      "ipu2_di1_sel",      base + 0x74, 10);
 	clk[IMX6QDL_CLK_HSI_TX]       = imx_clk_gate2_shared("hsi_tx", "hsi_tx_podf",       base + 0x74, 16, &share_count_mipi_core_cfg);
 	clk[IMX6QDL_CLK_MIPI_CORE_CFG] = imx_clk_gate2_shared("mipi_core_cfg", "video_27m", base + 0x74, 16, &share_count_mipi_core_cfg);
@@ -482,6 +550,16 @@
 	clk[IMX6QDL_CLK_EIM_SLOW]     = imx_clk_gate2("eim_slow",      "eim_slow_podf",     base + 0x80, 10);
 	clk[IMX6QDL_CLK_VDO_AXI]      = imx_clk_gate2("vdo_axi",       "vdo_axi_sel",       base + 0x80, 12);
 	clk[IMX6QDL_CLK_VPU_AXI]      = imx_clk_gate2("vpu_axi",       "vpu_axi_podf",      base + 0x80, 14);
+	if (clk_on_imx6qp()) {
+		clk[IMX6QDL_CLK_PRE0] = imx_clk_gate2("pre0",	       "pre_axi",	    base + 0x80, 16);
+		clk[IMX6QDL_CLK_PRE1] = imx_clk_gate2("pre1",	       "pre_axi",	    base + 0x80, 18);
+		clk[IMX6QDL_CLK_PRE2] = imx_clk_gate2("pre2",	       "pre_axi",         base + 0x80, 20);
+		clk[IMX6QDL_CLK_PRE3] = imx_clk_gate2("pre3",	       "pre_axi",	    base + 0x80, 22);
+		clk[IMX6QDL_CLK_PRG0_AXI] = imx_clk_gate2_shared("prg0_axi",  "ipu1_podf",  base + 0x80, 24, &share_count_prg0);
+		clk[IMX6QDL_CLK_PRG1_AXI] = imx_clk_gate2_shared("prg1_axi",  "ipu2_podf",  base + 0x80, 26, &share_count_prg1);
+		clk[IMX6QDL_CLK_PRG0_APB] = imx_clk_gate2_shared("prg0_apb",  "ipg",	    base + 0x80, 24, &share_count_prg0);
+		clk[IMX6QDL_CLK_PRG1_APB] = imx_clk_gate2_shared("prg1_apb",  "ipg",	    base + 0x80, 26, &share_count_prg1);
+	}
 	clk[IMX6QDL_CLK_CKO1]         = imx_clk_gate("cko1",           "cko1_podf",         base + 0x60, 7);
 	clk[IMX6QDL_CLK_CKO2]         = imx_clk_gate("cko2",           "cko2_podf",         base + 0x60, 24);
 

diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c
index 08692d7..0f1f17a 100644
--- a/drivers/clk/imx/clk-imx6ul.c
+++ b/drivers/clk/imx/clk-imx6ul.c

@@ -157,9 +157,9 @@
 	clk_set_parent(clks[IMX6UL_PLL7_BYPASS], clks[IMX6UL_CLK_PLL7]);
 
 	clks[IMX6UL_CLK_PLL1_SYS]	= imx_clk_fixed_factor("pll1_sys",	"pll1_bypass", 1, 1);
-	clks[IMX6UL_CLK_PLL2_BUS]	= imx_clk_gate("pll2_bus", 	"pll2_bypass", base + 0x30, 13);
-	clks[IMX6UL_CLK_PLL3_USB_OTG]	= imx_clk_gate("pll3_usb_otg", 	"pll3_bypass", base + 0x10, 13);
-	clks[IMX6UL_CLK_PLL4_AUDIO]	= imx_clk_gate("pll4_audio", 	"pll4_bypass", base + 0x70, 13);
+	clks[IMX6UL_CLK_PLL2_BUS]	= imx_clk_gate("pll2_bus",	"pll2_bypass", base + 0x30, 13);
+	clks[IMX6UL_CLK_PLL3_USB_OTG]	= imx_clk_gate("pll3_usb_otg",	"pll3_bypass", base + 0x10, 13);
+	clks[IMX6UL_CLK_PLL4_AUDIO]	= imx_clk_gate("pll4_audio",	"pll4_bypass", base + 0x70, 13);
 	clks[IMX6UL_CLK_PLL5_VIDEO]	= imx_clk_gate("pll5_video",	"pll5_bypass", base + 0xa0, 13);
 	clks[IMX6UL_CLK_PLL6_ENET]	= imx_clk_gate("pll6_enet",	"pll6_bypass", base + 0xe0, 13);
 	clks[IMX6UL_CLK_PLL7_USB_HOST]	= imx_clk_gate("pll7_usb_host",	"pll7_bypass", base + 0x20, 13);
@@ -196,8 +196,8 @@
 			base + 0xe0, 2, 2, 0, clk_enet_ref_table, &imx_ccm_lock);
 
 	clks[IMX6UL_CLK_ENET2_REF_125M] = imx_clk_gate("enet_ref_125m", "enet2_ref", base + 0xe0, 20);
-	clks[IMX6UL_CLK_ENET_PTP_REF] 	= imx_clk_fixed_factor("enet_ptp_ref", "pll6_enet", 1, 20);
-	clks[IMX6UL_CLK_ENET_PTP] 	= imx_clk_gate("enet_ptp", "enet_ptp_ref", base + 0xe0, 21);
+	clks[IMX6UL_CLK_ENET_PTP_REF]	= imx_clk_fixed_factor("enet_ptp_ref", "pll6_enet", 1, 20);
+	clks[IMX6UL_CLK_ENET_PTP]	= imx_clk_gate("enet_ptp", "enet_ptp_ref", base + 0xe0, 21);
 
 	clks[IMX6UL_CLK_PLL4_POST_DIV]  = clk_register_divider_table(NULL, "pll4_post_div", "pll4_audio",
 		 CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE, base + 0x70, 19, 2, 0, post_div_table, &imx_ccm_lock);
@@ -210,8 +210,8 @@
 
 	/*						   name		parent_name	 mult  div */
 	clks[IMX6UL_CLK_PLL2_198M] = imx_clk_fixed_factor("pll2_198m", "pll2_pfd2_396m", 1,	2);
-	clks[IMX6UL_CLK_PLL3_80M]  = imx_clk_fixed_factor("pll3_80m",  "pll3_usb_otg",   1, 	6);
-	clks[IMX6UL_CLK_PLL3_60M]  = imx_clk_fixed_factor("pll3_60m",  "pll3_usb_otg",   1, 	8);
+	clks[IMX6UL_CLK_PLL3_80M]  = imx_clk_fixed_factor("pll3_80m",  "pll3_usb_otg",   1,	6);
+	clks[IMX6UL_CLK_PLL3_60M]  = imx_clk_fixed_factor("pll3_60m",  "pll3_usb_otg",   1,	8);
 	clks[IMX6UL_CLK_GPT_3M]	   = imx_clk_fixed_factor("gpt_3m",	"osc",		 1,	8);
 
 	np = ccm_node;
@@ -219,34 +219,34 @@
 	WARN_ON(!base);
 
 	clks[IMX6UL_CA7_SECONDARY_SEL]	  = imx_clk_mux("ca7_secondary_sel", base + 0xc, 3, 1, ca7_secondary_sels, ARRAY_SIZE(ca7_secondary_sels));
-	clks[IMX6UL_CLK_STEP] 	 	  = imx_clk_mux("step", base + 0x0c, 8, 1, step_sels, ARRAY_SIZE(step_sels));
-	clks[IMX6UL_CLK_PLL1_SW] 	  = imx_clk_mux_flags("pll1_sw",   base + 0x0c, 2,  1, pll1_sw_sels, ARRAY_SIZE(pll1_sw_sels), 0);
+	clks[IMX6UL_CLK_STEP]		  = imx_clk_mux("step", base + 0x0c, 8, 1, step_sels, ARRAY_SIZE(step_sels));
+	clks[IMX6UL_CLK_PLL1_SW]	  = imx_clk_mux_flags("pll1_sw",   base + 0x0c, 2,  1, pll1_sw_sels, ARRAY_SIZE(pll1_sw_sels), 0);
 	clks[IMX6UL_CLK_AXI_ALT_SEL]	  = imx_clk_mux("axi_alt_sel",		base + 0x14, 7,  1, axi_alt_sels, ARRAY_SIZE(axi_alt_sels));
-	clks[IMX6UL_CLK_AXI_SEL] 	  = imx_clk_mux_flags("axi_sel", 	base + 0x14, 6,  1, axi_sels, ARRAY_SIZE(axi_sels), 0);
-	clks[IMX6UL_CLK_PERIPH_PRE] 	  = imx_clk_mux("periph_pre",       base + 0x18, 18, 2, periph_pre_sels, ARRAY_SIZE(periph_pre_sels));
-	clks[IMX6UL_CLK_PERIPH2_PRE] 	  = imx_clk_mux("periph2_pre",      base + 0x18, 21, 2, periph2_pre_sels, ARRAY_SIZE(periph2_pre_sels));
+	clks[IMX6UL_CLK_AXI_SEL]	  = imx_clk_mux_flags("axi_sel",	base + 0x14, 6,  1, axi_sels, ARRAY_SIZE(axi_sels), 0);
+	clks[IMX6UL_CLK_PERIPH_PRE]	  = imx_clk_mux("periph_pre",       base + 0x18, 18, 2, periph_pre_sels, ARRAY_SIZE(periph_pre_sels));
+	clks[IMX6UL_CLK_PERIPH2_PRE]	  = imx_clk_mux("periph2_pre",      base + 0x18, 21, 2, periph2_pre_sels, ARRAY_SIZE(periph2_pre_sels));
 	clks[IMX6UL_CLK_PERIPH_CLK2_SEL]  = imx_clk_mux("periph_clk2_sel",  base + 0x18, 12, 2, periph_clk2_sels, ARRAY_SIZE(periph_clk2_sels));
 	clks[IMX6UL_CLK_PERIPH2_CLK2_SEL] = imx_clk_mux("periph2_clk2_sel", base + 0x18, 20, 1, periph2_clk2_sels, ARRAY_SIZE(periph2_clk2_sels));
-	clks[IMX6UL_CLK_EIM_SLOW_SEL] 	  = imx_clk_mux("eim_slow_sel", base + 0x1c, 29, 2, eim_slow_sels, ARRAY_SIZE(eim_slow_sels));
+	clks[IMX6UL_CLK_EIM_SLOW_SEL]	  = imx_clk_mux("eim_slow_sel", base + 0x1c, 29, 2, eim_slow_sels, ARRAY_SIZE(eim_slow_sels));
 	clks[IMX6UL_CLK_GPMI_SEL]	  = imx_clk_mux("gpmi_sel",     base + 0x1c, 19, 1, gpmi_sels, ARRAY_SIZE(gpmi_sels));
-	clks[IMX6UL_CLK_BCH_SEL]      	  = imx_clk_mux("bch_sel", 	base + 0x1c, 18, 1, bch_sels, ARRAY_SIZE(bch_sels));
+	clks[IMX6UL_CLK_BCH_SEL]	  = imx_clk_mux("bch_sel",	base + 0x1c, 18, 1, bch_sels, ARRAY_SIZE(bch_sels));
 	clks[IMX6UL_CLK_USDHC2_SEL]	  = imx_clk_mux("usdhc2_sel",   base + 0x1c, 17, 1, usdhc_sels, ARRAY_SIZE(usdhc_sels));
 	clks[IMX6UL_CLK_USDHC1_SEL]	  = imx_clk_mux("usdhc1_sel",   base + 0x1c, 16, 1, usdhc_sels, ARRAY_SIZE(usdhc_sels));
-	clks[IMX6UL_CLK_SAI3_SEL]     	  = imx_clk_mux("sai3_sel",     base + 0x1c, 14, 2, sai_sels, ARRAY_SIZE(sai_sels));
+	clks[IMX6UL_CLK_SAI3_SEL]	  = imx_clk_mux("sai3_sel",     base + 0x1c, 14, 2, sai_sels, ARRAY_SIZE(sai_sels));
 	clks[IMX6UL_CLK_SAI2_SEL]         = imx_clk_mux("sai2_sel",     base + 0x1c, 12, 2, sai_sels, ARRAY_SIZE(sai_sels));
-	clks[IMX6UL_CLK_SAI1_SEL]    	  = imx_clk_mux("sai1_sel",     base + 0x1c, 10, 2, sai_sels, ARRAY_SIZE(sai_sels));
-	clks[IMX6UL_CLK_QSPI1_SEL] 	  = imx_clk_mux("qspi1_sel",    base + 0x1c, 7,  3, qspi1_sels, ARRAY_SIZE(qspi1_sels));
-	clks[IMX6UL_CLK_PERCLK_SEL] 	  = imx_clk_mux("perclk_sel",	base + 0x1c, 6,  1, perclk_sels, ARRAY_SIZE(perclk_sels));
-	clks[IMX6UL_CLK_CAN_SEL]      	  = imx_clk_mux("can_sel",	base + 0x20, 8,  2, can_sels, ARRAY_SIZE(can_sels));
+	clks[IMX6UL_CLK_SAI1_SEL]	  = imx_clk_mux("sai1_sel",     base + 0x1c, 10, 2, sai_sels, ARRAY_SIZE(sai_sels));
+	clks[IMX6UL_CLK_QSPI1_SEL]	  = imx_clk_mux("qspi1_sel",    base + 0x1c, 7,  3, qspi1_sels, ARRAY_SIZE(qspi1_sels));
+	clks[IMX6UL_CLK_PERCLK_SEL]	  = imx_clk_mux("perclk_sel",	base + 0x1c, 6,  1, perclk_sels, ARRAY_SIZE(perclk_sels));
+	clks[IMX6UL_CLK_CAN_SEL]	  = imx_clk_mux("can_sel",	base + 0x20, 8,  2, can_sels, ARRAY_SIZE(can_sels));
 	clks[IMX6UL_CLK_UART_SEL]	  = imx_clk_mux("uart_sel",	base + 0x24, 6,  1, uart_sels, ARRAY_SIZE(uart_sels));
 	clks[IMX6UL_CLK_ENFC_SEL]	  = imx_clk_mux("enfc_sel",	base + 0x2c, 15, 3, enfc_sels, ARRAY_SIZE(enfc_sels));
 	clks[IMX6UL_CLK_LDB_DI0_SEL]	  = imx_clk_mux("ldb_di0_sel",	base + 0x2c, 9,  3, ldb_di0_sels, ARRAY_SIZE(ldb_di0_sels));
 	clks[IMX6UL_CLK_SPDIF_SEL]	  = imx_clk_mux("spdif_sel",	base + 0x30, 20, 2, spdif_sels, ARRAY_SIZE(spdif_sels));
-	clks[IMX6UL_CLK_SIM_PRE_SEL] 	  = imx_clk_mux("sim_pre_sel",	base + 0x34, 15, 3, sim_pre_sels, ARRAY_SIZE(sim_pre_sels));
-	clks[IMX6UL_CLK_SIM_SEL]	  = imx_clk_mux("sim_sel", 	base + 0x34, 9, 3, sim_sels, ARRAY_SIZE(sim_sels));
+	clks[IMX6UL_CLK_SIM_PRE_SEL]	  = imx_clk_mux("sim_pre_sel",	base + 0x34, 15, 3, sim_pre_sels, ARRAY_SIZE(sim_pre_sels));
+	clks[IMX6UL_CLK_SIM_SEL]	  = imx_clk_mux("sim_sel",	base + 0x34, 9, 3, sim_sels, ARRAY_SIZE(sim_sels));
 	clks[IMX6UL_CLK_ECSPI_SEL]	  = imx_clk_mux("ecspi_sel",	base + 0x38, 18, 1, ecspi_sels, ARRAY_SIZE(ecspi_sels));
 	clks[IMX6UL_CLK_LCDIF_PRE_SEL]	  = imx_clk_mux("lcdif_pre_sel", base + 0x38, 15, 3, lcdif_pre_sels, ARRAY_SIZE(lcdif_pre_sels));
-	clks[IMX6UL_CLK_LCDIF_SEL]	  = imx_clk_mux("lcdif_sel", 	base + 0x38, 9, 3, lcdif_sels, ARRAY_SIZE(lcdif_sels));
+	clks[IMX6UL_CLK_LCDIF_SEL]	  = imx_clk_mux("lcdif_sel",	base + 0x38, 9, 3, lcdif_sels, ARRAY_SIZE(lcdif_sels));
 
 	clks[IMX6UL_CLK_LDB_DI0_DIV_SEL]  = imx_clk_mux("ldb_di0", base + 0x20, 10, 1, ldb_di0_div_sels, ARRAY_SIZE(ldb_di0_div_sels));
 	clks[IMX6UL_CLK_LDB_DI1_DIV_SEL]  = imx_clk_mux("ldb_di1", base + 0x20, 11, 1, ldb_di1_div_sels, ARRAY_SIZE(ldb_di1_div_sels));
@@ -259,11 +259,11 @@
 	clks[IMX6UL_CLK_PERIPH]  = imx_clk_busy_mux("periph",  base + 0x14, 25, 1, base + 0x48, 5, periph_sels, ARRAY_SIZE(periph_sels));
 	clks[IMX6UL_CLK_PERIPH2] = imx_clk_busy_mux("periph2", base + 0x14, 26, 1, base + 0x48, 3, periph2_sels, ARRAY_SIZE(periph2_sels));
 
-	clks[IMX6UL_CLK_PERIPH_CLK2]	= imx_clk_divider("periph_clk2",   "periph_clk2_sel",  	base + 0x14, 27, 3);
-	clks[IMX6UL_CLK_PERIPH2_CLK2]	= imx_clk_divider("periph2_clk2",  "periph2_clk2_sel", 	base + 0x14, 0,  3);
+	clks[IMX6UL_CLK_PERIPH_CLK2]	= imx_clk_divider("periph_clk2",   "periph_clk2_sel",	base + 0x14, 27, 3);
+	clks[IMX6UL_CLK_PERIPH2_CLK2]	= imx_clk_divider("periph2_clk2",  "periph2_clk2_sel",	base + 0x14, 0,  3);
 	clks[IMX6UL_CLK_IPG]		= imx_clk_divider("ipg",	   "ahb",		base + 0x14, 8,	 2);
 	clks[IMX6UL_CLK_LCDIF_PODF]	= imx_clk_divider("lcdif_podf",	   "lcdif_pred",	base + 0x18, 23, 3);
-	clks[IMX6UL_CLK_QSPI1_PDOF] 	= imx_clk_divider("qspi1_podf",	   "qspi1_sel",		base + 0x1c, 26, 3);
+	clks[IMX6UL_CLK_QSPI1_PDOF]	= imx_clk_divider("qspi1_podf",	   "qspi1_sel",		base + 0x1c, 26, 3);
 	clks[IMX6UL_CLK_EIM_SLOW_PODF]	= imx_clk_divider("eim_slow_podf", "eim_slow_sel",	base + 0x1c, 23, 3);
 	clks[IMX6UL_CLK_PERCLK]		= imx_clk_divider("perclk",	   "perclk_sel",	base + 0x1c, 0,  6);
 	clks[IMX6UL_CLK_CAN_PODF]	= imx_clk_divider("can_podf",	   "can_sel",		base + 0x20, 2,  6);
@@ -287,14 +287,14 @@
 	clks[IMX6UL_CLK_LCDIF_PRED]	= imx_clk_divider("lcdif_pred",	   "lcdif_pre_sel",	base + 0x38, 12, 3);
 	clks[IMX6UL_CLK_CSI_PODF]       = imx_clk_divider("csi_podf",      "csi_sel",           base + 0x3c, 11, 3);
 
-	clks[IMX6UL_CLK_ARM]		= imx_clk_busy_divider("arm", 	    "pll1_sw",	base +	0x10, 0,  3,  base + 0x48, 16);
+	clks[IMX6UL_CLK_ARM]		= imx_clk_busy_divider("arm",	    "pll1_sw",	base +	0x10, 0,  3,  base + 0x48, 16);
 	clks[IMX6UL_CLK_MMDC_PODF]	= imx_clk_busy_divider("mmdc_podf", "periph2",	base +  0x14, 3,  3,  base + 0x48, 2);
 	clks[IMX6UL_CLK_AXI_PODF]	= imx_clk_busy_divider("axi_podf",  "axi_sel",	base +  0x14, 16, 3,  base + 0x48, 0);
 	clks[IMX6UL_CLK_AHB]		= imx_clk_busy_divider("ahb",	    "periph",	base +  0x14, 10, 3,  base + 0x48, 1);
 
 	/* CCGR0 */
-	clks[IMX6UL_CLK_AIPSTZ1]	= imx_clk_gate2("aips_tz1", 	"ahb",		base + 0x68,	0);
-	clks[IMX6UL_CLK_AIPSTZ2]	= imx_clk_gate2("aips_tz2", 	"ahb",		base + 0x68,	2);
+	clks[IMX6UL_CLK_AIPSTZ1]	= imx_clk_gate2("aips_tz1",	"ahb",		base + 0x68,	0);
+	clks[IMX6UL_CLK_AIPSTZ2]	= imx_clk_gate2("aips_tz2",	"ahb",		base + 0x68,	2);
 	clks[IMX6UL_CLK_APBHDMA]	= imx_clk_gate2("apbh_dma",	"bch_podf",	base + 0x68,	4);
 	clks[IMX6UL_CLK_ASRC_IPG]	= imx_clk_gate2_shared("asrc_ipg",	"ahb",	base + 0x68,	6, &share_count_asrc);
 	clks[IMX6UL_CLK_ASRC_MEM]	= imx_clk_gate2_shared("asrc_mem",	"ahb",	base + 0x68,	6, &share_count_asrc);
@@ -302,7 +302,7 @@
 	clks[IMX6UL_CLK_CAAM_ACLK]	= imx_clk_gate2("caam_aclk",	"ahb",		base + 0x68,	10);
 	clks[IMX6UL_CLK_CAAM_IPG]	= imx_clk_gate2("caam_ipg",	"ipg",		base + 0x68,	12);
 	clks[IMX6UL_CLK_CAN1_IPG]	= imx_clk_gate2("can1_ipg",	"ipg",		base + 0x68,	14);
-	clks[IMX6UL_CLK_CAN1_SERIAL]	= imx_clk_gate2("can1_serial",	"can_podf",	base + 0x68, 	16);
+	clks[IMX6UL_CLK_CAN1_SERIAL]	= imx_clk_gate2("can1_serial",	"can_podf",	base + 0x68,	16);
 	clks[IMX6UL_CLK_CAN2_IPG]	= imx_clk_gate2("can2_ipg",	"ipg",		base + 0x68,	18);
 	clks[IMX6UL_CLK_CAN2_SERIAL]	= imx_clk_gate2("can2_serial",	"can_podf",	base + 0x68,	20);
 	clks[IMX6UL_CLK_GPT2_BUS]	= imx_clk_gate2("gpt_bus",	"perclk",	base + 0x68,	24);
@@ -331,7 +331,7 @@
 	clks[IMX6UL_CLK_CSI]		= imx_clk_gate2("csi",		"csi_podf",		base + 0x70,	2);
 	clks[IMX6UL_CLK_I2C1]		= imx_clk_gate2("i2c1",		"perclk",	base + 0x70,	6);
 	clks[IMX6UL_CLK_I2C2]		= imx_clk_gate2("i2c2",		"perclk",	base + 0x70,	8);
-	clks[IMX6UL_CLK_I2C3] 		= imx_clk_gate2("i2c3",		"perclk",	base + 0x70,	10);
+	clks[IMX6UL_CLK_I2C3]		= imx_clk_gate2("i2c3",		"perclk",	base + 0x70,	10);
 	clks[IMX6UL_CLK_OCOTP]		= imx_clk_gate2("ocotp",	"ipg",		base + 0x70,	12);
 	clks[IMX6UL_CLK_IOMUXC]		= imx_clk_gate2("iomuxc",	"lcdif_podf",	base + 0x70,	14);
 	clks[IMX6UL_CLK_LCDIF_APB]	= imx_clk_gate2("lcdif_apb",	"axi",		base + 0x70,	28);
@@ -365,6 +365,7 @@
 	/* CCGR5 */
 	clks[IMX6UL_CLK_ROM]		= imx_clk_gate2("rom",		"ahb",		base + 0x7c,	0);
 	clks[IMX6UL_CLK_SDMA]		= imx_clk_gate2("sdma",		"ahb",		base + 0x7c,	6);
+	clks[IMX6UL_CLK_KPP]		= imx_clk_gate2("kpp",		"ipg",		base + 0x7c,	8);
 	clks[IMX6UL_CLK_WDOG2]		= imx_clk_gate2("wdog2",	"ipg",		base + 0x7c,	10);
 	clks[IMX6UL_CLK_SPBA]		= imx_clk_gate2("spba",		"ipg",		base + 0x7c,	12);
 	clks[IMX6UL_CLK_SPDIF]		= imx_clk_gate2_shared("spdif",		"spdif_podf",	base + 0x7c,	14, &share_count_audio);
@@ -391,10 +392,10 @@
 	clks[IMX6UL_CLK_UART8_IPG]	= imx_clk_gate2("uart8_ipg",	"ipg",		 base + 0x80,	14);
 	clks[IMX6UL_CLK_UART8_SERIAL]	= imx_clk_gate2("uart8_serial", "uart_podf",	 base + 0x80,	14);
 	clks[IMX6UL_CLK_WDOG3]		= imx_clk_gate2("wdog3",	"ipg",		 base + 0x80,	20);
-	clks[IMX6UL_CLK_I2C4]		= imx_clk_gate2("i2c4",		"perclk", 	 base + 0x80,	24);
+	clks[IMX6UL_CLK_I2C4]		= imx_clk_gate2("i2c4",		"perclk",	 base + 0x80,	24);
 	clks[IMX6UL_CLK_PWM5]		= imx_clk_gate2("pwm5",		"perclk",	 base + 0x80,	26);
 	clks[IMX6UL_CLK_PWM6]		= imx_clk_gate2("pwm6",		"perclk",	 base +	0x80,	28);
-	clks[IMX6UL_CLK_PWM7]		= imx_clk_gate2("Pwm7",		"perclk",	 base + 0x80,	30);
+	clks[IMX6UL_CLK_PWM7]		= imx_clk_gate2("pwm7",		"perclk",	 base + 0x80,	30);
 
 	/* mask handshake of mmdc */
 	writel_relaxed(BM_CCM_CCDR_MMDC_CH0_MASK, base + CCDR);

diff --git a/drivers/clk/imx/clk.h b/drivers/clk/imx/clk.h
index c94ac5c..d942f57 100644
--- a/drivers/clk/imx/clk.h
+++ b/drivers/clk/imx/clk.h

@@ -87,7 +87,7 @@
 
 static inline struct clk *imx_clk_fixed(const char *name, int rate)
 {
-	return clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate);
+	return clk_register_fixed_rate(NULL, name, NULL, 0, rate);
 }
 
 static inline struct clk *imx_clk_divider(const char *name, const char *parent,

diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c
index 3528303..5ada644 100644
--- a/drivers/clk/mediatek/clk-mtk.c
+++ b/drivers/clk/mediatek/clk-mtk.c

@@ -58,8 +58,8 @@
 	for (i = 0; i < num; i++) {
 		const struct mtk_fixed_clk *rc = &clks[i];
 
-		clk = clk_register_fixed_rate(NULL, rc->name, rc->parent,
-				rc->parent ? 0 : CLK_IS_ROOT, rc->rate);
+		clk = clk_register_fixed_rate(NULL, rc->name, rc->parent, 0,
+					      rc->rate);
 
 		if (IS_ERR(clk)) {
 			pr_err("Failed to register clk %s: %ld\n",

diff --git a/drivers/clk/mediatek/reset.c b/drivers/clk/mediatek/reset.c
index 9e9fe4b..309049d 100644
--- a/drivers/clk/mediatek/reset.c
+++ b/drivers/clk/mediatek/reset.c

@@ -57,7 +57,7 @@
 	return mtk_reset_deassert(rcdev, id);
 }
 
-static struct reset_control_ops mtk_reset_ops = {
+static const struct reset_control_ops mtk_reset_ops = {
 	.assert = mtk_reset_assert,
 	.deassert = mtk_reset_deassert,
 	.reset = mtk_reset,

diff --git a/drivers/clk/meson/clkc.c b/drivers/clk/meson/clkc.c
index c83ae13..d920d41 100644
--- a/drivers/clk/meson/clkc.c
+++ b/drivers/clk/meson/clkc.c

@@ -198,7 +198,7 @@
 }
 
 void __init meson_clk_register_clks(const struct clk_conf *clk_confs,
-				    size_t nr_confs,
+				    unsigned int nr_confs,
 				    void __iomem *clk_base)
 {
 	unsigned int i;

diff --git a/drivers/clk/mmp/reset.c b/drivers/clk/mmp/reset.c
index b54da1f..b4e4d6a 100644
--- a/drivers/clk/mmp/reset.c
+++ b/drivers/clk/mmp/reset.c

@@ -74,7 +74,7 @@
 	return 0;
 }
 
-static struct reset_control_ops mmp_clk_reset_ops = {
+static const struct reset_control_ops mmp_clk_reset_ops = {
 	.assert		= mmp_clk_reset_assert,
 	.deassert	= mmp_clk_reset_deassert,
 };

diff --git a/drivers/clk/mvebu/Kconfig b/drivers/clk/mvebu/Kconfig
index 2769625..eaee8f0 100644
--- a/drivers/clk/mvebu/Kconfig
+++ b/drivers/clk/mvebu/Kconfig

@@ -11,7 +11,6 @@
 	bool
 	select MVEBU_CLK_COMMON
 	select MVEBU_CLK_CPU
-	select MVEBU_CLK_COREDIV
 
 config ARMADA_375_CLK
 	bool
@@ -29,7 +28,6 @@
 	bool
 	select MVEBU_CLK_COMMON
 	select MVEBU_CLK_CPU
-	select MVEBU_CLK_COREDIV
 
 config DOVE_CLK
 	bool

diff --git a/drivers/clk/mvebu/common.c b/drivers/clk/mvebu/common.c
index daa6ebd..66be2e0 100644
--- a/drivers/clk/mvebu/common.c
+++ b/drivers/clk/mvebu/common.c

@@ -137,8 +137,8 @@
 	of_property_read_string_index(np, "clock-output-names", 0,
 				      &tclk_name);
 	rate = desc->get_tclk_freq(base);
-	clk_data.clks[0] = clk_register_fixed_rate(NULL, tclk_name, NULL,
-						   CLK_IS_ROOT, rate);
+	clk_data.clks[0] = clk_register_fixed_rate(NULL, tclk_name, NULL, 0,
+						   rate);
 	WARN_ON(IS_ERR(clk_data.clks[0]));
 
 	/* Register CPU clock */
@@ -150,8 +150,8 @@
 		&& desc->is_sscg_enabled(base))
 		rate = desc->fix_sscg_deviation(rate);
 
-	clk_data.clks[1] = clk_register_fixed_rate(NULL, cpuclk_name, NULL,
-						   CLK_IS_ROOT, rate);
+	clk_data.clks[1] = clk_register_fixed_rate(NULL, cpuclk_name, NULL, 0,
+						   rate);
 	WARN_ON(IS_ERR(clk_data.clks[1]));
 
 	/* Register fixed-factor clocks derived from CPU clock */
@@ -174,8 +174,7 @@
 					      2 + desc->num_ratios, &name);
 		rate = desc->get_refclk_freq(base);
 		clk_data.clks[2 + desc->num_ratios] =
-			clk_register_fixed_rate(NULL, name, NULL,
-						CLK_IS_ROOT, rate);
+			clk_register_fixed_rate(NULL, name, NULL, 0, rate);
 		WARN_ON(IS_ERR(clk_data.clks[2 + desc->num_ratios]));
 	}
 

diff --git a/drivers/clk/mvebu/dove-divider.c b/drivers/clk/mvebu/dove-divider.c
index 3e0b52d..4091f3c 100644
--- a/drivers/clk/mvebu/dove-divider.c
+++ b/drivers/clk/mvebu/dove-divider.c

@@ -225,8 +225,7 @@
 	 * Create the core PLL clock.  We treat this as a fixed rate
 	 * clock as we don't know any better, and documentation is sparse.
 	 */
-	clk = clk_register_fixed_rate(dev, core_pll[0], NULL, CLK_IS_ROOT,
-				      2000000000UL);
+	clk = clk_register_fixed_rate(dev, core_pll[0], NULL, 0, 2000000000UL);
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 

diff --git a/drivers/clk/mxs/clk.h b/drivers/clk/mxs/clk.h
index a459095..5a264a4 100644
--- a/drivers/clk/mxs/clk.h
+++ b/drivers/clk/mxs/clk.h

@@ -38,7 +38,7 @@
 
 static inline struct clk *mxs_clk_fixed(const char *name, int rate)
 {
-	return clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate);
+	return clk_register_fixed_rate(NULL, name, NULL, 0, rate);
 }
 
 static inline struct clk *mxs_clk_gate(const char *name,

diff --git a/drivers/clk/nxp/Makefile b/drivers/clk/nxp/Makefile
index 607bd48..d456ee6 100644
--- a/drivers/clk/nxp/Makefile
+++ b/drivers/clk/nxp/Makefile

@@ -1,3 +1,4 @@
 obj-$(CONFIG_ARCH_LPC18XX)	+= clk-lpc18xx-cgu.o
 obj-$(CONFIG_ARCH_LPC18XX)	+= clk-lpc18xx-ccu.o
+obj-$(CONFIG_ARCH_LPC18XX)	+= clk-lpc18xx-creg.o
 obj-$(CONFIG_ARCH_LPC32XX)	+= clk-lpc32xx.o

diff --git a/drivers/clk/nxp/clk-lpc18xx-cgu.c b/drivers/clk/nxp/clk-lpc18xx-cgu.c
index c924572..2531174 100644
--- a/drivers/clk/nxp/clk-lpc18xx-cgu.c
+++ b/drivers/clk/nxp/clk-lpc18xx-cgu.c

@@ -605,7 +605,7 @@
 
 	/* Register the internal 12 MHz RC oscillator (IRC) */
 	clk = clk_register_fixed_rate(NULL, clk_src_names[CLK_SRC_IRC],
-				      NULL, CLK_IS_ROOT, 12000000);
+				      NULL, 0, 12000000);
 	if (IS_ERR(clk))
 		pr_warn("%s: failed to register irc clk\n", __func__);
 

diff --git a/drivers/clk/nxp/clk-lpc18xx-creg.c b/drivers/clk/nxp/clk-lpc18xx-creg.c
new file mode 100644
index 0000000..d44b61a
--- /dev/null
+++ b/drivers/clk/nxp/clk-lpc18xx-creg.c

@@ -0,0 +1,226 @@
+/*
+ * Clk driver for NXP LPC18xx/43xx Configuration Registers (CREG)
+ *
+ * Copyright (C) 2015 Joachim Eastwood <manabian@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#define LPC18XX_CREG_CREG0			0x004
+#define  LPC18XX_CREG_CREG0_EN1KHZ		BIT(0)
+#define  LPC18XX_CREG_CREG0_EN32KHZ		BIT(1)
+#define  LPC18XX_CREG_CREG0_RESET32KHZ		BIT(2)
+#define  LPC18XX_CREG_CREG0_PD32KHZ		BIT(3)
+
+#define to_clk_creg(_hw) container_of(_hw, struct clk_creg_data, hw)
+
+enum {
+	CREG_CLK_1KHZ,
+	CREG_CLK_32KHZ,
+	CREG_CLK_MAX,
+};
+
+struct clk_creg_data {
+	struct clk_hw hw;
+	const char *name;
+	struct regmap *reg;
+	unsigned int en_mask;
+	const struct clk_ops *ops;
+};
+
+#define CREG_CLK(_name, _emask, _ops)		\
+{						\
+	.name = _name,				\
+	.en_mask = LPC18XX_CREG_CREG0_##_emask,	\
+	.ops = &_ops,				\
+}
+
+static int clk_creg_32k_prepare(struct clk_hw *hw)
+{
+	struct clk_creg_data *creg = to_clk_creg(hw);
+	int ret;
+
+	ret = regmap_update_bits(creg->reg, LPC18XX_CREG_CREG0,
+				 LPC18XX_CREG_CREG0_PD32KHZ |
+				 LPC18XX_CREG_CREG0_RESET32KHZ, 0);
+
+	/*
+	 * Powering up the 32k oscillator takes a long while
+	 * and sadly there aren't any status bit to poll.
+	 */
+	msleep(2500);
+
+	return ret;
+}
+
+static void clk_creg_32k_unprepare(struct clk_hw *hw)
+{
+	struct clk_creg_data *creg = to_clk_creg(hw);
+
+	regmap_update_bits(creg->reg, LPC18XX_CREG_CREG0,
+			   LPC18XX_CREG_CREG0_PD32KHZ,
+			   LPC18XX_CREG_CREG0_PD32KHZ);
+}
+
+static int clk_creg_32k_is_prepared(struct clk_hw *hw)
+{
+	struct clk_creg_data *creg = to_clk_creg(hw);
+	u32 reg;
+
+	regmap_read(creg->reg, LPC18XX_CREG_CREG0, &reg);
+
+	return !(reg & LPC18XX_CREG_CREG0_PD32KHZ) &&
+	       !(reg & LPC18XX_CREG_CREG0_RESET32KHZ);
+}
+
+static unsigned long clk_creg_1k_recalc_rate(struct clk_hw *hw,
+					     unsigned long parent_rate)
+{
+	return parent_rate / 32;
+}
+
+static int clk_creg_enable(struct clk_hw *hw)
+{
+	struct clk_creg_data *creg = to_clk_creg(hw);
+
+	return regmap_update_bits(creg->reg, LPC18XX_CREG_CREG0,
+				  creg->en_mask, creg->en_mask);
+}
+
+static void clk_creg_disable(struct clk_hw *hw)
+{
+	struct clk_creg_data *creg = to_clk_creg(hw);
+
+	regmap_update_bits(creg->reg, LPC18XX_CREG_CREG0,
+			   creg->en_mask, 0);
+}
+
+static int clk_creg_is_enabled(struct clk_hw *hw)
+{
+	struct clk_creg_data *creg = to_clk_creg(hw);
+	u32 reg;
+
+	regmap_read(creg->reg, LPC18XX_CREG_CREG0, &reg);
+
+	return !!(reg & creg->en_mask);
+}
+
+static const struct clk_ops clk_creg_32k = {
+	.enable		= clk_creg_enable,
+	.disable	= clk_creg_disable,
+	.is_enabled	= clk_creg_is_enabled,
+	.prepare	= clk_creg_32k_prepare,
+	.unprepare	= clk_creg_32k_unprepare,
+	.is_prepared	= clk_creg_32k_is_prepared,
+};
+
+static const struct clk_ops clk_creg_1k = {
+	.enable		= clk_creg_enable,
+	.disable	= clk_creg_disable,
+	.is_enabled	= clk_creg_is_enabled,
+	.recalc_rate	= clk_creg_1k_recalc_rate,
+};
+
+static struct clk_creg_data clk_creg_clocks[] = {
+	[CREG_CLK_1KHZ]  = CREG_CLK("1khz_clk",  EN1KHZ,  clk_creg_1k),
+	[CREG_CLK_32KHZ] = CREG_CLK("32khz_clk", EN32KHZ, clk_creg_32k),
+};
+
+static struct clk *clk_register_creg_clk(struct device *dev,
+					 struct clk_creg_data *creg_clk,
+					 const char **parent_name,
+					 struct regmap *syscon)
+{
+	struct clk_init_data init;
+
+	init.ops = creg_clk->ops;
+	init.name = creg_clk->name;
+	init.parent_names = parent_name;
+	init.num_parents = 1;
+
+	creg_clk->reg = syscon;
+	creg_clk->hw.init = &init;
+
+	if (dev)
+		return devm_clk_register(dev, &creg_clk->hw);
+
+	return clk_register(NULL, &creg_clk->hw);
+}
+
+static struct clk *clk_creg_early[CREG_CLK_MAX];
+static struct clk_onecell_data clk_creg_early_data = {
+	.clks = clk_creg_early,
+	.clk_num = CREG_CLK_MAX,
+};
+
+static void __init lpc18xx_creg_clk_init(struct device_node *np)
+{
+	const char *clk_32khz_parent;
+	struct regmap *syscon;
+
+	syscon = syscon_node_to_regmap(np->parent);
+	if (IS_ERR(syscon)) {
+		pr_err("%s: syscon lookup failed\n", __func__);
+		return;
+	}
+
+	clk_32khz_parent = of_clk_get_parent_name(np, 0);
+
+	clk_creg_early[CREG_CLK_32KHZ] =
+		clk_register_creg_clk(NULL, &clk_creg_clocks[CREG_CLK_32KHZ],
+				      &clk_32khz_parent, syscon);
+	clk_creg_early[CREG_CLK_1KHZ] = ERR_PTR(-EPROBE_DEFER);
+
+	of_clk_add_provider(np, of_clk_src_onecell_get, &clk_creg_early_data);
+}
+CLK_OF_DECLARE(lpc18xx_creg_clk, "nxp,lpc1850-creg-clk", lpc18xx_creg_clk_init);
+
+static struct clk *clk_creg[CREG_CLK_MAX];
+static struct clk_onecell_data clk_creg_data = {
+	.clks = clk_creg,
+	.clk_num = CREG_CLK_MAX,
+};
+
+static int lpc18xx_creg_clk_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct regmap *syscon;
+
+	syscon = syscon_node_to_regmap(np->parent);
+	if (IS_ERR(syscon)) {
+		dev_err(&pdev->dev, "syscon lookup failed\n");
+		return PTR_ERR(syscon);
+	}
+
+	clk_creg[CREG_CLK_32KHZ] = clk_creg_early[CREG_CLK_32KHZ];
+	clk_creg[CREG_CLK_1KHZ] =
+		clk_register_creg_clk(NULL, &clk_creg_clocks[CREG_CLK_1KHZ],
+				      &clk_creg_clocks[CREG_CLK_32KHZ].name,
+				      syscon);
+
+	return of_clk_add_provider(np, of_clk_src_onecell_get, &clk_creg_data);
+}
+
+static const struct of_device_id lpc18xx_creg_clk_of_match[] = {
+	{ .compatible = "nxp,lpc1850-creg-clk" },
+	{},
+};
+
+static struct platform_driver lpc18xx_creg_clk_driver = {
+	.probe = lpc18xx_creg_clk_probe,
+	.driver = {
+		.name = "lpc18xx-creg-clk",
+		.of_match_table = lpc18xx_creg_clk_of_match,
+	},
+};
+builtin_platform_driver(lpc18xx_creg_clk_driver);

diff --git a/drivers/clk/nxp/clk-lpc32xx.c b/drivers/clk/nxp/clk-lpc32xx.c
index 10dd0fd..481b264 100644
--- a/drivers/clk/nxp/clk-lpc32xx.c
+++ b/drivers/clk/nxp/clk-lpc32xx.c

@@ -87,7 +87,7 @@
 
 enum {
 	/* Start from the last defined clock in dt bindings */
-	LPC32XX_CLK_ADC_DIV = LPC32XX_CLK_ADC + 1,
+	LPC32XX_CLK_ADC_DIV = LPC32XX_CLK_HCLK_PLL + 1,
 	LPC32XX_CLK_ADC_RTC,
 	LPC32XX_CLK_TEST1,
 	LPC32XX_CLK_TEST2,
@@ -96,7 +96,6 @@
 	LPC32XX_CLK_OSC,
 	LPC32XX_CLK_SYS,
 	LPC32XX_CLK_PLL397X,
-	LPC32XX_CLK_HCLK_PLL,
 	LPC32XX_CLK_HCLK_DIV_PERIPH,
 	LPC32XX_CLK_HCLK_DIV,
 	LPC32XX_CLK_HCLK,
@@ -589,7 +588,8 @@
 				    unsigned long *parent_rate)
 {
 	struct lpc32xx_pll_clk *clk = to_lpc32xx_pll_clk(hw);
-	u64 m_i, m, n, p, o = rate, i = *parent_rate, d = (u64)rate << 6;
+	u64 m_i, o = rate, i = *parent_rate, d = (u64)rate << 6;
+	u64 m = 0, n = 0, p = 0;
 	int p_i, n_i;
 
 	pr_debug("%s: %lu/%lu\n", clk_hw_get_name(hw), *parent_rate, rate);
@@ -1429,6 +1429,8 @@
 			hw = &clk_hw->hw0.div.hw;
 		else if (clk_hw->type == CLK_GATE)
 			hw = &clk_hw->hw0.gate.hw;
+		else
+			return ERR_PTR(-EINVAL);
 
 		hw->init = &clk_init;
 		clk = clk_register(NULL, hw);
@@ -1515,7 +1517,7 @@
 		return;
 	}
 
-	for (i = 0; i < LPC32XX_CLK_MAX; i++) {
+	for (i = 1; i < LPC32XX_CLK_MAX; i++) {
 		clk[i] = lpc32xx_clk_register(i);
 		if (IS_ERR(clk[i])) {
 			pr_err("failed to register %s clock: %ld\n",
@@ -1526,9 +1528,6 @@
 
 	of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data);
 
-	/* For 13MHz osc valid output range of PLL is from 156MHz to 266.5MHz */
-	clk_set_rate(clk[LPC32XX_CLK_HCLK_PLL], 208000000);
-
 	/* Set 48MHz rate of USB PLL clock */
 	clk_set_rate(clk[LPC32XX_CLK_USB_PLL], 48000000);
 
@@ -1555,7 +1554,7 @@
 		return;
 	}
 
-	for (i = 0; i < LPC32XX_USB_CLK_MAX; i++) {
+	for (i = 1; i < LPC32XX_USB_CLK_MAX; i++) {
 		usb_clk[i] = lpc32xx_clk_register(i + LPC32XX_CLK_USB_OFFSET);
 		if (IS_ERR(usb_clk[i])) {
 			pr_err("failed to register %s clock: %ld\n",

diff --git a/drivers/clk/pxa/clk-pxa25x.c b/drivers/clk/pxa/clk-pxa25x.c
index a9353cd..a98b98e 100644
--- a/drivers/clk/pxa/clk-pxa25x.c
+++ b/drivers/clk/pxa/clk-pxa25x.c

@@ -200,12 +200,10 @@
 static void __init pxa25x_register_plls(void)
 {
 	clk_register_fixed_rate(NULL, "osc_3_6864mhz", NULL,
-				CLK_GET_RATE_NOCACHE | CLK_IS_ROOT,
-				3686400);
+				CLK_GET_RATE_NOCACHE, 3686400);
 	clk_register_fixed_rate(NULL, "osc_32_768khz", NULL,
-				CLK_GET_RATE_NOCACHE | CLK_IS_ROOT,
-				32768);
-	clk_register_fixed_rate(NULL, "clk_dummy", NULL, CLK_IS_ROOT, 0);
+				CLK_GET_RATE_NOCACHE, 32768);
+	clk_register_fixed_rate(NULL, "clk_dummy", NULL, 0, 0);
 	clk_register_fixed_factor(NULL, "ppll_95_85mhz", "osc_3_6864mhz",
 				  0, 26, 1);
 	clk_register_fixed_factor(NULL, "ppll_147_46mhz", "osc_3_6864mhz",

diff --git a/drivers/clk/pxa/clk-pxa27x.c b/drivers/clk/pxa/clk-pxa27x.c
index fc2abf9..c40b180 100644
--- a/drivers/clk/pxa/clk-pxa27x.c
+++ b/drivers/clk/pxa/clk-pxa27x.c

@@ -208,12 +208,12 @@
 static void __init pxa27x_register_plls(void)
 {
 	clk_register_fixed_rate(NULL, "osc_13mhz", NULL,
-				CLK_GET_RATE_NOCACHE | CLK_IS_ROOT,
+				CLK_GET_RATE_NOCACHE,
 				13 * MHz);
 	clk_register_fixed_rate(NULL, "osc_32_768khz", NULL,
-				CLK_GET_RATE_NOCACHE | CLK_IS_ROOT,
+				CLK_GET_RATE_NOCACHE,
 				32768 * KHz);
-	clk_register_fixed_rate(NULL, "clk_dummy", NULL, CLK_IS_ROOT, 0);
+	clk_register_fixed_rate(NULL, "clk_dummy", NULL, 0, 0);
 	clk_register_fixed_factor(NULL, "ppll_312mhz", "osc_13mhz", 0, 24, 1);
 }
 

diff --git a/drivers/clk/pxa/clk-pxa3xx.c b/drivers/clk/pxa/clk-pxa3xx.c
index ea67971..42bdaa7 100644
--- a/drivers/clk/pxa/clk-pxa3xx.c
+++ b/drivers/clk/pxa/clk-pxa3xx.c

@@ -284,15 +284,15 @@
 static void __init pxa3xx_register_plls(void)
 {
 	clk_register_fixed_rate(NULL, "osc_13mhz", NULL,
-				CLK_GET_RATE_NOCACHE | CLK_IS_ROOT,
+				CLK_GET_RATE_NOCACHE,
 				13 * MHz);
 	clk_register_fixed_rate(NULL, "osc_32_768khz", NULL,
-				CLK_GET_RATE_NOCACHE | CLK_IS_ROOT,
+				CLK_GET_RATE_NOCACHE,
 				32768);
 	clk_register_fixed_rate(NULL, "ring_osc_120mhz", NULL,
-				CLK_GET_RATE_NOCACHE | CLK_IS_ROOT,
+				CLK_GET_RATE_NOCACHE,
 				120 * MHz);
-	clk_register_fixed_rate(NULL, "clk_dummy", NULL, CLK_IS_ROOT, 0);
+	clk_register_fixed_rate(NULL, "clk_dummy", NULL, 0, 0);
 	clk_register_fixed_factor(NULL, "spll_624mhz", "osc_13mhz", 0, 48, 1);
 	clk_register_fixed_factor(NULL, "ring_osc_60mhz", "ring_osc_120mhz",
 				  0, 1, 2);

diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig
index b552ece..95e3b3e 100644
--- a/drivers/clk/qcom/Kconfig
+++ b/drivers/clk/qcom/Kconfig

@@ -28,6 +28,14 @@
 	  Say Y if you want to support multimedia devices such as display,
 	  graphics, video encode/decode, camera, etc.
 
+config IPQ_GCC_4019
+	tristate "IPQ4019 Global Clock Controller"
+	depends on COMMON_CLK_QCOM
+	help
+	  Support for the global clock controller on ipq4019 devices.
+	  Say Y if you want to use peripheral devices such as UART, SPI,
+	  i2c, USB, SD/eMMC, etc.
+
 config IPQ_GCC_806X
 	tristate "IPQ806x Global Clock Controller"
 	depends on COMMON_CLK_QCOM

diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile
index dc4280b..2a25f4e 100644
--- a/drivers/clk/qcom/Makefile
+++ b/drivers/clk/qcom/Makefile

@@ -14,6 +14,7 @@
 
 obj-$(CONFIG_APQ_GCC_8084) += gcc-apq8084.o
 obj-$(CONFIG_APQ_MMCC_8084) += mmcc-apq8084.o
+obj-$(CONFIG_IPQ_GCC_4019) += gcc-ipq4019.o
 obj-$(CONFIG_IPQ_GCC_806X) += gcc-ipq806x.o
 obj-$(CONFIG_IPQ_LCC_806X) += lcc-ipq806x.o
 obj-$(CONFIG_MSM_GCC_8660) += gcc-msm8660.o

diff --git a/drivers/clk/qcom/clk-rcg.c b/drivers/clk/qcom/clk-rcg.c
index bfbb28f..67ce7c1 100644
--- a/drivers/clk/qcom/clk-rcg.c
+++ b/drivers/clk/qcom/clk-rcg.c

@@ -638,7 +638,6 @@
 		return ret;
 
 	src = ns_to_src(&rcg->s, ns);
-	f.pre_div = ns_to_pre_div(&rcg->p, ns) + 1;
 
 	for (i = 0; i < num_parents; i++) {
 		if (src == rcg->s.parent_map[i].cfg) {
@@ -647,6 +646,9 @@
 		}
 	}
 
+	/* bypass the pre divider */
+	f.pre_div = 1;
+
 	/* let us find appropriate m/n values for this */
 	for (; frac->num; frac++) {
 		request = (rate * frac->den) / frac->num;

diff --git a/drivers/clk/qcom/common.c b/drivers/clk/qcom/common.c
index c112eba..f7c226a 100644
--- a/drivers/clk/qcom/common.c
+++ b/drivers/clk/qcom/common.c

@@ -119,7 +119,6 @@
 		fixed->hw.init = &init_data;
 
 		init_data.name = path;
-		init_data.flags = CLK_IS_ROOT;
 		init_data.ops = &clk_fixed_rate_ops;
 
 		clk = devm_clk_register(dev, &fixed->hw);
@@ -185,6 +184,7 @@
 	struct clk **clks;
 	struct qcom_reset_controller *reset;
 	struct qcom_cc *cc;
+	struct gdsc_desc *scd;
 	size_t num_clks = desc->num_clks;
 	struct clk_regmap **rclks = desc->clks;
 
@@ -213,7 +213,11 @@
 	if (ret)
 		return ret;
 
-	devm_add_action(dev, qcom_cc_del_clk_provider, pdev->dev.of_node);
+	ret = devm_add_action_or_reset(dev, qcom_cc_del_clk_provider,
+				       pdev->dev.of_node);
+
+	if (ret)
+		return ret;
 
 	reset = &cc->reset;
 	reset->rcdev.of_node = dev->of_node;
@@ -227,18 +231,28 @@
 	if (ret)
 		return ret;
 
-	devm_add_action(dev, qcom_cc_reset_unregister, &reset->rcdev);
+	ret = devm_add_action_or_reset(dev, qcom_cc_reset_unregister,
+				       &reset->rcdev);
+
+	if (ret)
+		return ret;
 
 	if (desc->gdscs && desc->num_gdscs) {
-		ret = gdsc_register(dev, desc->gdscs, desc->num_gdscs,
-				    &reset->rcdev, regmap);
+		scd = devm_kzalloc(dev, sizeof(*scd), GFP_KERNEL);
+		if (!scd)
+			return -ENOMEM;
+		scd->dev = dev;
+		scd->scs = desc->gdscs;
+		scd->num = desc->num_gdscs;
+		ret = gdsc_register(scd, &reset->rcdev, regmap);
+		if (ret)
+			return ret;
+		ret = devm_add_action_or_reset(dev, qcom_cc_gdsc_unregister,
+					       scd);
 		if (ret)
 			return ret;
 	}
 
-	devm_add_action(dev, qcom_cc_gdsc_unregister, dev);
-
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(qcom_cc_really_probe);

diff --git a/drivers/clk/qcom/gcc-ipq4019.c b/drivers/clk/qcom/gcc-ipq4019.c
new file mode 100644
index 0000000..3cd1af0
--- /dev/null
+++ b/drivers/clk/qcom/gcc-ipq4019.c

@@ -0,0 +1,1354 @@
+/*
+ * Copyright (c) 2015 The Linux Foundation. All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/clk-provider.h>
+#include <linux/regmap.h>
+#include <linux/reset-controller.h>
+
+#include <dt-bindings/clock/qcom,gcc-ipq4019.h>
+
+#include "common.h"
+#include "clk-regmap.h"
+#include "clk-rcg.h"
+#include "clk-branch.h"
+#include "reset.h"
+
+enum {
+	P_XO,
+	P_FEPLL200,
+	P_FEPLL500,
+	P_DDRPLL,
+	P_FEPLLWCSS2G,
+	P_FEPLLWCSS5G,
+	P_FEPLL125DLY,
+	P_DDRPLLAPSS,
+};
+
+static struct parent_map gcc_xo_200_500_map[] = {
+	{ P_XO, 0 },
+	{ P_FEPLL200, 1 },
+	{ P_FEPLL500, 2 },
+};
+
+static const char * const gcc_xo_200_500[] = {
+	"xo",
+	"fepll200",
+	"fepll500",
+};
+
+static struct parent_map gcc_xo_200_map[] = {
+	{  P_XO, 0 },
+	{  P_FEPLL200, 1 },
+};
+
+static const char * const gcc_xo_200[] = {
+	"xo",
+	"fepll200",
+};
+
+static struct parent_map gcc_xo_200_spi_map[] = {
+	{  P_XO, 0 },
+	{  P_FEPLL200, 2 },
+};
+
+static const char * const gcc_xo_200_spi[] = {
+	"xo",
+	"fepll200",
+};
+
+static struct parent_map gcc_xo_sdcc1_500_map[] = {
+	{  P_XO, 0 },
+	{  P_DDRPLL, 1 },
+	{  P_FEPLL500, 2 },
+};
+
+static const char * const gcc_xo_sdcc1_500[] = {
+	"xo",
+	"ddrpll",
+	"fepll500",
+};
+
+static struct parent_map gcc_xo_wcss2g_map[] = {
+	{  P_XO, 0 },
+	{  P_FEPLLWCSS2G, 1 },
+};
+
+static const char * const gcc_xo_wcss2g[] = {
+	"xo",
+	"fepllwcss2g",
+};
+
+static struct parent_map gcc_xo_wcss5g_map[] = {
+	{  P_XO, 0 },
+	{  P_FEPLLWCSS5G, 1 },
+};
+
+static const char * const gcc_xo_wcss5g[] = {
+	"xo",
+	"fepllwcss5g",
+};
+
+static struct parent_map gcc_xo_125_dly_map[] = {
+	{  P_XO, 0 },
+	{  P_FEPLL125DLY, 1 },
+};
+
+static const char * const gcc_xo_125_dly[] = {
+	"xo",
+	"fepll125dly",
+};
+
+static struct parent_map gcc_xo_ddr_500_200_map[] = {
+	{  P_XO, 0 },
+	{  P_FEPLL200, 3 },
+	{  P_FEPLL500, 2 },
+	{  P_DDRPLLAPSS, 1 },
+};
+
+static const char * const gcc_xo_ddr_500_200[] = {
+	"xo",
+	"fepll200",
+	"fepll500",
+	"ddrpllapss",
+};
+
+#define F(f, s, h, m, n) { (f), (s), (2 * (h) - 1), (m), (n) }
+
+static const struct freq_tbl ftbl_gcc_audio_pwm_clk[] = {
+	F(48000000, P_XO, 1, 0, 0),
+	F(200000000, P_FEPLL200, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 audio_clk_src = {
+	.cmd_rcgr = 0x1b000,
+	.hid_width = 5,
+	.parent_map = gcc_xo_200_map,
+	.freq_tbl = ftbl_gcc_audio_pwm_clk,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "audio_clk_src",
+		.parent_names = gcc_xo_200,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+
+	},
+};
+
+static struct clk_branch gcc_audio_ahb_clk = {
+	.halt_reg = 0x1b010,
+	.clkr = {
+		.enable_reg = 0x1b010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_audio_ahb_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.flags = CLK_SET_RATE_PARENT,
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_audio_pwm_clk = {
+	.halt_reg = 0x1b00C,
+	.clkr = {
+		.enable_reg = 0x1b00C,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_audio_pwm_clk",
+			.parent_names = (const char *[]){
+				"audio_clk_src",
+			},
+			.flags = CLK_SET_RATE_PARENT,
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_blsp1_qup1_2_i2c_apps_clk[] = {
+	F(19200000, P_XO, 1, 2, 5),
+	F(24000000, P_XO, 1, 1, 2),
+	{ }
+};
+
+static struct clk_rcg2 blsp1_qup1_i2c_apps_clk_src = {
+	.cmd_rcgr = 0x200c,
+	.hid_width = 5,
+	.parent_map = gcc_xo_200_map,
+	.freq_tbl = ftbl_gcc_blsp1_qup1_2_i2c_apps_clk,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "blsp1_qup1_i2c_apps_clk_src",
+		.parent_names = gcc_xo_200,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_blsp1_qup1_i2c_apps_clk = {
+	.halt_reg = 0x2008,
+	.clkr = {
+		.enable_reg = 0x2008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_blsp1_qup1_i2c_apps_clk",
+			.parent_names = (const char *[]){
+				"blsp1_qup1_i2c_apps_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg2 blsp1_qup2_i2c_apps_clk_src = {
+	.cmd_rcgr = 0x3000,
+	.hid_width = 5,
+	.parent_map = gcc_xo_200_map,
+	.freq_tbl = ftbl_gcc_blsp1_qup1_2_i2c_apps_clk,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "blsp1_qup2_i2c_apps_clk_src",
+		.parent_names = gcc_xo_200,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_blsp1_qup2_i2c_apps_clk = {
+	.halt_reg = 0x3010,
+	.clkr = {
+		.enable_reg = 0x3010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_blsp1_qup2_i2c_apps_clk",
+			.parent_names = (const char *[]){
+				"blsp1_qup2_i2c_apps_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_blsp1_qup1_2_spi_apps_clk[] = {
+	F(960000, P_XO, 12, 1, 4),
+	F(4800000, P_XO, 1, 1, 10),
+	F(9600000, P_XO, 1, 1, 5),
+	F(15000000, P_XO, 1, 1, 3),
+	F(19200000, P_XO, 1, 2, 5),
+	F(24000000, P_XO, 1, 1, 2),
+	F(48000000, P_XO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 blsp1_qup1_spi_apps_clk_src = {
+	.cmd_rcgr = 0x2024,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_xo_200_spi_map,
+	.freq_tbl = ftbl_gcc_blsp1_qup1_2_spi_apps_clk,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "blsp1_qup1_spi_apps_clk_src",
+		.parent_names = gcc_xo_200_spi,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_blsp1_qup1_spi_apps_clk = {
+	.halt_reg = 0x2004,
+	.clkr = {
+		.enable_reg = 0x2004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_blsp1_qup1_spi_apps_clk",
+			.parent_names = (const char *[]){
+				"blsp1_qup1_spi_apps_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg2 blsp1_qup2_spi_apps_clk_src = {
+	.cmd_rcgr = 0x3014,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gcc_blsp1_qup1_2_spi_apps_clk,
+	.parent_map = gcc_xo_200_spi_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "blsp1_qup2_spi_apps_clk_src",
+		.parent_names = gcc_xo_200_spi,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_blsp1_qup2_spi_apps_clk = {
+	.halt_reg = 0x300c,
+	.clkr = {
+		.enable_reg = 0x300c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_blsp1_qup2_spi_apps_clk",
+			.parent_names = (const char *[]){
+				"blsp1_qup2_spi_apps_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_blsp1_uart1_2_apps_clk[] = {
+	F(1843200, P_FEPLL200, 1, 144, 15625),
+	F(3686400, P_FEPLL200, 1, 288, 15625),
+	F(7372800, P_FEPLL200, 1, 576, 15625),
+	F(14745600, P_FEPLL200, 1, 1152, 15625),
+	F(16000000, P_FEPLL200, 1, 2, 25),
+	F(24000000, P_XO, 1, 1, 2),
+	F(32000000, P_FEPLL200, 1, 4, 25),
+	F(40000000, P_FEPLL200, 1, 1, 5),
+	F(46400000, P_FEPLL200, 1, 29, 125),
+	F(48000000, P_XO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 blsp1_uart1_apps_clk_src = {
+	.cmd_rcgr = 0x2044,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gcc_blsp1_uart1_2_apps_clk,
+	.parent_map = gcc_xo_200_spi_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "blsp1_uart1_apps_clk_src",
+		.parent_names = gcc_xo_200_spi,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_blsp1_uart1_apps_clk = {
+	.halt_reg = 0x203c,
+	.clkr = {
+		.enable_reg = 0x203c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_blsp1_uart1_apps_clk",
+			.parent_names = (const char *[]){
+				"blsp1_uart1_apps_clk_src",
+			},
+			.flags = CLK_SET_RATE_PARENT,
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_rcg2 blsp1_uart2_apps_clk_src = {
+	.cmd_rcgr = 0x3034,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gcc_blsp1_uart1_2_apps_clk,
+	.parent_map = gcc_xo_200_spi_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "blsp1_uart2_apps_clk_src",
+		.parent_names = gcc_xo_200_spi,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_blsp1_uart2_apps_clk = {
+	.halt_reg = 0x302c,
+	.clkr = {
+		.enable_reg = 0x302c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_blsp1_uart2_apps_clk",
+			.parent_names = (const char *[]){
+				"blsp1_uart2_apps_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_gp_clk[] = {
+	F(1250000,  P_FEPLL200, 1, 16, 0),
+	F(2500000,  P_FEPLL200, 1,  8, 0),
+	F(5000000,  P_FEPLL200, 1,  4, 0),
+	{ }
+};
+
+static struct clk_rcg2 gp1_clk_src = {
+	.cmd_rcgr = 0x8004,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gcc_gp_clk,
+	.parent_map = gcc_xo_200_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gp1_clk_src",
+		.parent_names = gcc_xo_200,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_gp1_clk = {
+	.halt_reg = 0x8000,
+	.clkr = {
+		.enable_reg = 0x8000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gp1_clk",
+			.parent_names = (const char *[]){
+				"gp1_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg2 gp2_clk_src = {
+	.cmd_rcgr = 0x9004,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gcc_gp_clk,
+	.parent_map = gcc_xo_200_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gp2_clk_src",
+		.parent_names = gcc_xo_200,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_gp2_clk = {
+	.halt_reg = 0x9000,
+	.clkr = {
+		.enable_reg = 0x9000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gp2_clk",
+			.parent_names = (const char *[]){
+				"gp2_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg2 gp3_clk_src = {
+	.cmd_rcgr = 0xa004,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gcc_gp_clk,
+	.parent_map = gcc_xo_200_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gp3_clk_src",
+		.parent_names = gcc_xo_200,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_gp3_clk = {
+	.halt_reg = 0xa000,
+	.clkr = {
+		.enable_reg = 0xa000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gp3_clk",
+			.parent_names = (const char *[]){
+				"gp3_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_sdcc1_apps_clk[] = {
+	F(144000,    P_XO,			1,  3, 240),
+	F(400000,    P_XO,			1,  1, 0),
+	F(20000000,  P_FEPLL500,		1,  1, 25),
+	F(25000000,  P_FEPLL500,		1,  1, 20),
+	F(50000000,  P_FEPLL500,		1,  1, 10),
+	F(100000000, P_FEPLL500,		1,  1, 5),
+	F(193000000, P_DDRPLL,		1,  0, 0),
+	{ }
+};
+
+static struct clk_rcg2  sdcc1_apps_clk_src = {
+	.cmd_rcgr = 0x18004,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gcc_sdcc1_apps_clk,
+	.parent_map = gcc_xo_sdcc1_500_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "sdcc1_apps_clk_src",
+		.parent_names = gcc_xo_sdcc1_500,
+		.num_parents = 3,
+		.ops = &clk_rcg2_ops,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_apps_clk[] = {
+	F(48000000, P_XO,	   1, 0, 0),
+	F(200000000, P_FEPLL200,   1, 0, 0),
+	F(500000000, P_FEPLL500,   1, 0, 0),
+	F(626000000, P_DDRPLLAPSS, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 apps_clk_src = {
+	.cmd_rcgr = 0x1900c,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gcc_apps_clk,
+	.parent_map = gcc_xo_ddr_500_200_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "apps_clk_src",
+		.parent_names = gcc_xo_ddr_500_200,
+		.num_parents = 4,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_apps_ahb_clk[] = {
+	F(48000000, P_XO,	   1, 0, 0),
+	F(100000000, P_FEPLL200,   2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 apps_ahb_clk_src = {
+	.cmd_rcgr = 0x19014,
+	.hid_width = 5,
+	.parent_map = gcc_xo_200_500_map,
+	.freq_tbl = ftbl_gcc_apps_ahb_clk,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "apps_ahb_clk_src",
+		.parent_names = gcc_xo_200_500,
+		.num_parents = 3,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_apss_ahb_clk = {
+	.halt_reg = 0x19004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x6000,
+		.enable_mask = BIT(14),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_apss_ahb_clk",
+			.parent_names = (const char *[]){
+				"apps_ahb_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch gcc_blsp1_ahb_clk = {
+	.halt_reg = 0x1008,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x6000,
+		.enable_mask = BIT(10),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_blsp1_ahb_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_dcd_xo_clk = {
+	.halt_reg = 0x2103c,
+	.clkr = {
+		.enable_reg = 0x2103c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_dcd_xo_clk",
+			.parent_names = (const char *[]){
+				"xo",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_boot_rom_ahb_clk = {
+	.halt_reg = 0x1300c,
+	.clkr = {
+		.enable_reg = 0x1300c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_boot_rom_ahb_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch gcc_crypto_ahb_clk = {
+	.halt_reg = 0x16024,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x6000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_crypto_ahb_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_crypto_axi_clk = {
+	.halt_reg = 0x16020,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x6000,
+		.enable_mask = BIT(1),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_crypto_axi_clk",
+			.parent_names = (const char *[]){
+				"fepll125",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_crypto_clk = {
+	.halt_reg = 0x1601c,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x6000,
+		.enable_mask = BIT(2),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_crypto_clk",
+			.parent_names = (const char *[]){
+				"fepll125",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ess_clk = {
+	.halt_reg = 0x12010,
+	.clkr = {
+		.enable_reg = 0x12010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ess_clk",
+			.parent_names = (const char *[]){
+				"fephy_125m_dly_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch gcc_imem_axi_clk = {
+	.halt_reg = 0xe004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x6000,
+		.enable_mask = BIT(17),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_imem_axi_clk",
+			.parent_names = (const char *[]){
+				"fepll200",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_imem_cfg_ahb_clk = {
+	.halt_reg = 0xe008,
+	.clkr = {
+		.enable_reg = 0xe008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_imem_cfg_ahb_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_pcie_ahb_clk = {
+	.halt_reg = 0x1d00c,
+	.clkr = {
+		.enable_reg = 0x1d00c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_pcie_ahb_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_pcie_axi_m_clk = {
+	.halt_reg = 0x1d004,
+	.clkr = {
+		.enable_reg = 0x1d004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_pcie_axi_m_clk",
+			.parent_names = (const char *[]){
+				"fepll200",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_pcie_axi_s_clk = {
+	.halt_reg = 0x1d008,
+	.clkr = {
+		.enable_reg = 0x1d008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_pcie_axi_s_clk",
+			.parent_names = (const char *[]){
+				"fepll200",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_prng_ahb_clk = {
+	.halt_reg = 0x13004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x6000,
+		.enable_mask = BIT(8),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_prng_ahb_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qpic_ahb_clk = {
+	.halt_reg = 0x1c008,
+	.clkr = {
+		.enable_reg = 0x1c008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qpic_ahb_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qpic_clk = {
+	.halt_reg = 0x1c004,
+	.clkr = {
+		.enable_reg = 0x1c004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qpic_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sdcc1_ahb_clk = {
+	.halt_reg = 0x18010,
+	.clkr = {
+		.enable_reg = 0x18010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sdcc1_ahb_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sdcc1_apps_clk = {
+	.halt_reg = 0x1800c,
+	.clkr = {
+		.enable_reg = 0x1800c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sdcc1_apps_clk",
+			.parent_names = (const char *[]){
+				"sdcc1_apps_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch gcc_tlmm_ahb_clk = {
+	.halt_reg = 0x5004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x6000,
+		.enable_mask = BIT(5),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_tlmm_ahb_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb2_master_clk = {
+	.halt_reg = 0x1e00c,
+	.clkr = {
+		.enable_reg = 0x1e00c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb2_master_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb2_sleep_clk = {
+	.halt_reg = 0x1e010,
+	.clkr = {
+		.enable_reg = 0x1e010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb2_sleep_clk",
+			.parent_names = (const char *[]){
+				"gcc_sleep_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb2_mock_utmi_clk = {
+	.halt_reg = 0x1e014,
+	.clkr = {
+		.enable_reg = 0x1e014,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb2_mock_utmi_clk",
+			.parent_names = (const char *[]){
+				"usb30_mock_utmi_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_usb30_mock_utmi_clk[] = {
+	F(2000000, P_FEPLL200, 10, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 usb30_mock_utmi_clk_src = {
+	.cmd_rcgr = 0x1e000,
+	.hid_width = 5,
+	.parent_map = gcc_xo_200_map,
+	.freq_tbl = ftbl_gcc_usb30_mock_utmi_clk,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "usb30_mock_utmi_clk_src",
+		.parent_names = gcc_xo_200,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_usb3_master_clk = {
+	.halt_reg = 0x1e028,
+	.clkr = {
+		.enable_reg = 0x1e028,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb3_master_clk",
+			.parent_names = (const char *[]){
+				"fepll125",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb3_sleep_clk = {
+	.halt_reg = 0x1e02C,
+	.clkr = {
+		.enable_reg = 0x1e02C,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb3_sleep_clk",
+			.parent_names = (const char *[]){
+				"gcc_sleep_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb3_mock_utmi_clk = {
+	.halt_reg = 0x1e030,
+	.clkr = {
+		.enable_reg = 0x1e030,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb3_mock_utmi_clk",
+			.parent_names = (const char *[]){
+				"usb30_mock_utmi_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_fephy_dly_clk[] = {
+	F(125000000, P_FEPLL125DLY, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 fephy_125m_dly_clk_src = {
+	.cmd_rcgr = 0x12000,
+	.hid_width = 5,
+	.parent_map = gcc_xo_125_dly_map,
+	.freq_tbl = ftbl_gcc_fephy_dly_clk,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "fephy_125m_dly_clk_src",
+		.parent_names = gcc_xo_125_dly,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+
+static const struct freq_tbl ftbl_gcc_wcss2g_clk[] = {
+	F(48000000, P_XO, 1, 0, 0),
+	F(250000000, P_FEPLLWCSS2G, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 wcss2g_clk_src = {
+	.cmd_rcgr = 0x1f000,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gcc_wcss2g_clk,
+	.parent_map = gcc_xo_wcss2g_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "wcss2g_clk_src",
+		.parent_names = gcc_xo_wcss2g,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static struct clk_branch gcc_wcss2g_clk = {
+	.halt_reg = 0x1f00C,
+	.clkr = {
+		.enable_reg = 0x1f00C,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_wcss2g_clk",
+			.parent_names = (const char *[]){
+				"wcss2g_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch gcc_wcss2g_ref_clk = {
+	.halt_reg = 0x1f00C,
+	.clkr = {
+		.enable_reg = 0x1f00C,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_wcss2g_ref_clk",
+			.parent_names = (const char *[]){
+				"xo",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch gcc_wcss2g_rtc_clk = {
+	.halt_reg = 0x1f010,
+	.clkr = {
+		.enable_reg = 0x1f010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_wcss2g_rtc_clk",
+			.parent_names = (const char *[]){
+				"gcc_sleep_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_wcss5g_clk[] = {
+	F(48000000, P_XO, 1, 0, 0),
+	F(250000000, P_FEPLLWCSS5G, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 wcss5g_clk_src = {
+	.cmd_rcgr = 0x20000,
+	.hid_width = 5,
+	.parent_map = gcc_xo_wcss5g_map,
+	.freq_tbl = ftbl_gcc_wcss5g_clk,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "wcss5g_clk_src",
+		.parent_names = gcc_xo_wcss5g,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_wcss5g_clk = {
+	.halt_reg = 0x2000c,
+	.clkr = {
+		.enable_reg = 0x2000c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_wcss5g_clk",
+			.parent_names = (const char *[]){
+				"wcss5g_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch gcc_wcss5g_ref_clk = {
+	.halt_reg = 0x2000c,
+	.clkr = {
+		.enable_reg = 0x2000c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_wcss5g_ref_clk",
+			.parent_names = (const char *[]){
+				"xo",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch gcc_wcss5g_rtc_clk = {
+	.halt_reg = 0x20010,
+	.clkr = {
+		.enable_reg = 0x20010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_wcss5g_rtc_clk",
+			.parent_names = (const char *[]){
+				"gcc_sleep_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_regmap *gcc_ipq4019_clocks[] = {
+	[AUDIO_CLK_SRC] = &audio_clk_src.clkr,
+	[BLSP1_QUP1_I2C_APPS_CLK_SRC] = &blsp1_qup1_i2c_apps_clk_src.clkr,
+	[BLSP1_QUP1_SPI_APPS_CLK_SRC] = &blsp1_qup1_spi_apps_clk_src.clkr,
+	[BLSP1_QUP2_I2C_APPS_CLK_SRC] = &blsp1_qup2_i2c_apps_clk_src.clkr,
+	[BLSP1_QUP2_SPI_APPS_CLK_SRC] = &blsp1_qup2_spi_apps_clk_src.clkr,
+	[BLSP1_UART1_APPS_CLK_SRC] = &blsp1_uart1_apps_clk_src.clkr,
+	[BLSP1_UART2_APPS_CLK_SRC] = &blsp1_uart2_apps_clk_src.clkr,
+	[GCC_USB3_MOCK_UTMI_CLK_SRC] = &usb30_mock_utmi_clk_src.clkr,
+	[GCC_APPS_CLK_SRC] = &apps_clk_src.clkr,
+	[GCC_APPS_AHB_CLK_SRC] = &apps_ahb_clk_src.clkr,
+	[GP1_CLK_SRC] = &gp1_clk_src.clkr,
+	[GP2_CLK_SRC] = &gp2_clk_src.clkr,
+	[GP3_CLK_SRC] = &gp3_clk_src.clkr,
+	[SDCC1_APPS_CLK_SRC] = &sdcc1_apps_clk_src.clkr,
+	[FEPHY_125M_DLY_CLK_SRC] = &fephy_125m_dly_clk_src.clkr,
+	[WCSS2G_CLK_SRC] = &wcss2g_clk_src.clkr,
+	[WCSS5G_CLK_SRC] = &wcss5g_clk_src.clkr,
+	[GCC_APSS_AHB_CLK] = &gcc_apss_ahb_clk.clkr,
+	[GCC_AUDIO_AHB_CLK] = &gcc_audio_ahb_clk.clkr,
+	[GCC_AUDIO_PWM_CLK] = &gcc_audio_pwm_clk.clkr,
+	[GCC_BLSP1_AHB_CLK] = &gcc_blsp1_ahb_clk.clkr,
+	[GCC_BLSP1_QUP1_I2C_APPS_CLK] = &gcc_blsp1_qup1_i2c_apps_clk.clkr,
+	[GCC_BLSP1_QUP1_SPI_APPS_CLK] = &gcc_blsp1_qup1_spi_apps_clk.clkr,
+	[GCC_BLSP1_QUP2_I2C_APPS_CLK] = &gcc_blsp1_qup2_i2c_apps_clk.clkr,
+	[GCC_BLSP1_QUP2_SPI_APPS_CLK] = &gcc_blsp1_qup2_spi_apps_clk.clkr,
+	[GCC_BLSP1_UART1_APPS_CLK] = &gcc_blsp1_uart1_apps_clk.clkr,
+	[GCC_BLSP1_UART2_APPS_CLK] = &gcc_blsp1_uart2_apps_clk.clkr,
+	[GCC_DCD_XO_CLK] = &gcc_dcd_xo_clk.clkr,
+	[GCC_GP1_CLK] = &gcc_gp1_clk.clkr,
+	[GCC_GP2_CLK] = &gcc_gp2_clk.clkr,
+	[GCC_GP3_CLK] = &gcc_gp3_clk.clkr,
+	[GCC_BOOT_ROM_AHB_CLK] = &gcc_boot_rom_ahb_clk.clkr,
+	[GCC_CRYPTO_AHB_CLK] = &gcc_crypto_ahb_clk.clkr,
+	[GCC_CRYPTO_AXI_CLK] = &gcc_crypto_axi_clk.clkr,
+	[GCC_CRYPTO_CLK] = &gcc_crypto_clk.clkr,
+	[GCC_ESS_CLK] = &gcc_ess_clk.clkr,
+	[GCC_IMEM_AXI_CLK] = &gcc_imem_axi_clk.clkr,
+	[GCC_IMEM_CFG_AHB_CLK] = &gcc_imem_cfg_ahb_clk.clkr,
+	[GCC_PCIE_AHB_CLK] = &gcc_pcie_ahb_clk.clkr,
+	[GCC_PCIE_AXI_M_CLK] = &gcc_pcie_axi_m_clk.clkr,
+	[GCC_PCIE_AXI_S_CLK] = &gcc_pcie_axi_s_clk.clkr,
+	[GCC_PRNG_AHB_CLK] = &gcc_prng_ahb_clk.clkr,
+	[GCC_QPIC_AHB_CLK] = &gcc_qpic_ahb_clk.clkr,
+	[GCC_QPIC_CLK] = &gcc_qpic_clk.clkr,
+	[GCC_SDCC1_AHB_CLK] = &gcc_sdcc1_ahb_clk.clkr,
+	[GCC_SDCC1_APPS_CLK] = &gcc_sdcc1_apps_clk.clkr,
+	[GCC_TLMM_AHB_CLK] = &gcc_tlmm_ahb_clk.clkr,
+	[GCC_USB2_MASTER_CLK] = &gcc_usb2_master_clk.clkr,
+	[GCC_USB2_SLEEP_CLK] = &gcc_usb2_sleep_clk.clkr,
+	[GCC_USB2_MOCK_UTMI_CLK] = &gcc_usb2_mock_utmi_clk.clkr,
+	[GCC_USB3_MASTER_CLK] = &gcc_usb3_master_clk.clkr,
+	[GCC_USB3_SLEEP_CLK] = &gcc_usb3_sleep_clk.clkr,
+	[GCC_USB3_MOCK_UTMI_CLK] = &gcc_usb3_mock_utmi_clk.clkr,
+	[GCC_WCSS2G_CLK] = &gcc_wcss2g_clk.clkr,
+	[GCC_WCSS2G_REF_CLK] = &gcc_wcss2g_ref_clk.clkr,
+	[GCC_WCSS2G_RTC_CLK] = &gcc_wcss2g_rtc_clk.clkr,
+	[GCC_WCSS5G_CLK] = &gcc_wcss5g_clk.clkr,
+	[GCC_WCSS5G_REF_CLK] = &gcc_wcss5g_ref_clk.clkr,
+	[GCC_WCSS5G_RTC_CLK] = &gcc_wcss5g_rtc_clk.clkr,
+};
+
+static const struct qcom_reset_map gcc_ipq4019_resets[] = {
+	[WIFI0_CPU_INIT_RESET] = { 0x1f008, 5 },
+	[WIFI0_RADIO_SRIF_RESET] = { 0x1f008, 4 },
+	[WIFI0_RADIO_WARM_RESET] = { 0x1f008, 3 },
+	[WIFI0_RADIO_COLD_RESET] = { 0x1f008, 2 },
+	[WIFI0_CORE_WARM_RESET] = { 0x1f008, 1 },
+	[WIFI0_CORE_COLD_RESET] = { 0x1f008, 0 },
+	[WIFI1_CPU_INIT_RESET] = { 0x20008, 5 },
+	[WIFI1_RADIO_SRIF_RESET] = { 0x20008, 4 },
+	[WIFI1_RADIO_WARM_RESET] = { 0x20008, 3 },
+	[WIFI1_RADIO_COLD_RESET] = { 0x20008, 2 },
+	[WIFI1_CORE_WARM_RESET] = { 0x20008, 1 },
+	[WIFI1_CORE_COLD_RESET] = { 0x20008, 0 },
+	[USB3_UNIPHY_PHY_ARES] = { 0x1e038, 5 },
+	[USB3_HSPHY_POR_ARES] = { 0x1e038, 4 },
+	[USB3_HSPHY_S_ARES] = { 0x1e038, 2 },
+	[USB2_HSPHY_POR_ARES] = { 0x1e01c, 4 },
+	[USB2_HSPHY_S_ARES] = { 0x1e01c, 2 },
+	[PCIE_PHY_AHB_ARES] = { 0x1d010, 11 },
+	[PCIE_AHB_ARES] = { 0x1d010, 10 },
+	[PCIE_PWR_ARES] = { 0x1d010, 9 },
+	[PCIE_PIPE_STICKY_ARES] = { 0x1d010, 8 },
+	[PCIE_AXI_M_STICKY_ARES] = { 0x1d010, 7 },
+	[PCIE_PHY_ARES] = { 0x1d010, 6 },
+	[PCIE_PARF_XPU_ARES] = { 0x1d010, 5 },
+	[PCIE_AXI_S_XPU_ARES] = { 0x1d010, 4 },
+	[PCIE_AXI_M_VMIDMT_ARES] = { 0x1d010, 3 },
+	[PCIE_PIPE_ARES] = { 0x1d010, 2 },
+	[PCIE_AXI_S_ARES] = { 0x1d010, 1 },
+	[PCIE_AXI_M_ARES] = { 0x1d010, 0 },
+	[ESS_RESET] = { 0x12008, 0},
+	[GCC_BLSP1_BCR] = {0x01000, 0},
+	[GCC_BLSP1_QUP1_BCR] = {0x02000, 0},
+	[GCC_BLSP1_UART1_BCR] = {0x02038, 0},
+	[GCC_BLSP1_QUP2_BCR] = {0x03008, 0},
+	[GCC_BLSP1_UART2_BCR] = {0x03028, 0},
+	[GCC_BIMC_BCR] = {0x04000, 0},
+	[GCC_TLMM_BCR] = {0x05000, 0},
+	[GCC_IMEM_BCR] = {0x0E000, 0},
+	[GCC_ESS_BCR] = {0x12008, 0},
+	[GCC_PRNG_BCR] = {0x13000, 0},
+	[GCC_BOOT_ROM_BCR] = {0x13008, 0},
+	[GCC_CRYPTO_BCR] = {0x16000, 0},
+	[GCC_SDCC1_BCR] = {0x18000, 0},
+	[GCC_SEC_CTRL_BCR] = {0x1A000, 0},
+	[GCC_AUDIO_BCR] = {0x1B008, 0},
+	[GCC_QPIC_BCR] = {0x1C000, 0},
+	[GCC_PCIE_BCR] = {0x1D000, 0},
+	[GCC_USB2_BCR] = {0x1E008, 0},
+	[GCC_USB2_PHY_BCR] = {0x1E018, 0},
+	[GCC_USB3_BCR] = {0x1E024, 0},
+	[GCC_USB3_PHY_BCR] = {0x1E034, 0},
+	[GCC_SYSTEM_NOC_BCR] = {0x21000, 0},
+	[GCC_PCNOC_BCR] = {0x2102C, 0},
+	[GCC_DCD_BCR] = {0x21038, 0},
+	[GCC_SNOC_BUS_TIMEOUT0_BCR] = {0x21064, 0},
+	[GCC_SNOC_BUS_TIMEOUT1_BCR] = {0x2106C, 0},
+	[GCC_SNOC_BUS_TIMEOUT2_BCR] = {0x21074, 0},
+	[GCC_SNOC_BUS_TIMEOUT3_BCR] = {0x2107C, 0},
+	[GCC_PCNOC_BUS_TIMEOUT0_BCR] = {0x21084, 0},
+	[GCC_PCNOC_BUS_TIMEOUT1_BCR] = {0x2108C, 0},
+	[GCC_PCNOC_BUS_TIMEOUT2_BCR] = {0x21094, 0},
+	[GCC_PCNOC_BUS_TIMEOUT3_BCR] = {0x2109C, 0},
+	[GCC_PCNOC_BUS_TIMEOUT4_BCR] = {0x210A4, 0},
+	[GCC_PCNOC_BUS_TIMEOUT5_BCR] = {0x210AC, 0},
+	[GCC_PCNOC_BUS_TIMEOUT6_BCR] = {0x210B4, 0},
+	[GCC_PCNOC_BUS_TIMEOUT7_BCR] = {0x210BC, 0},
+	[GCC_PCNOC_BUS_TIMEOUT8_BCR] = {0x210C4, 0},
+	[GCC_PCNOC_BUS_TIMEOUT9_BCR] = {0x210CC, 0},
+	[GCC_TCSR_BCR] = {0x22000, 0},
+	[GCC_MPM_BCR] = {0x24000, 0},
+	[GCC_SPDM_BCR] = {0x25000, 0},
+};
+
+static const struct regmap_config gcc_ipq4019_regmap_config = {
+	.reg_bits	= 32,
+	.reg_stride	= 4,
+	.val_bits	= 32,
+	.max_register	= 0x2dfff,
+	.fast_io	= true,
+};
+
+static const struct qcom_cc_desc gcc_ipq4019_desc = {
+	.config = &gcc_ipq4019_regmap_config,
+	.clks = gcc_ipq4019_clocks,
+	.num_clks = ARRAY_SIZE(gcc_ipq4019_clocks),
+	.resets = gcc_ipq4019_resets,
+	.num_resets = ARRAY_SIZE(gcc_ipq4019_resets),
+};
+
+static const struct of_device_id gcc_ipq4019_match_table[] = {
+	{ .compatible = "qcom,gcc-ipq4019" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, gcc_ipq4019_match_table);
+
+static int gcc_ipq4019_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+
+	clk_register_fixed_rate(dev, "fepll125", "xo", 0, 200000000);
+	clk_register_fixed_rate(dev, "fepll125dly", "xo", 0, 200000000);
+	clk_register_fixed_rate(dev, "fepllwcss2g", "xo", 0, 200000000);
+	clk_register_fixed_rate(dev, "fepllwcss5g", "xo", 0, 200000000);
+	clk_register_fixed_rate(dev, "fepll200", "xo", 0, 200000000);
+	clk_register_fixed_rate(dev, "fepll500", "xo", 0, 200000000);
+	clk_register_fixed_rate(dev, "ddrpllapss", "xo", 0, 666000000);
+
+	return qcom_cc_probe(pdev, &gcc_ipq4019_desc);
+}
+
+static struct platform_driver gcc_ipq4019_driver = {
+	.probe		= gcc_ipq4019_probe,
+	.driver		= {
+		.name	= "qcom,gcc-ipq4019",
+		.owner	= THIS_MODULE,
+		.of_match_table = gcc_ipq4019_match_table,
+	},
+};
+
+static int __init gcc_ipq4019_init(void)
+{
+	return platform_driver_register(&gcc_ipq4019_driver);
+}
+core_initcall(gcc_ipq4019_init);
+
+static void __exit gcc_ipq4019_exit(void)
+{
+	platform_driver_unregister(&gcc_ipq4019_driver);
+}
+module_exit(gcc_ipq4019_exit);
+
+MODULE_ALIAS("platform:gcc-ipq4019");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("QCOM GCC IPQ4019 driver");

diff --git a/drivers/clk/qcom/gcc-ipq806x.c b/drivers/clk/qcom/gcc-ipq806x.c
index dd5402b..52a7d39 100644
--- a/drivers/clk/qcom/gcc-ipq806x.c
+++ b/drivers/clk/qcom/gcc-ipq806x.c

@@ -890,7 +890,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -906,7 +905,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi2_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -922,7 +920,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi4_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -938,7 +935,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi5_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -954,7 +950,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi6_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -970,7 +965,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi7_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1144,7 +1138,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pmem_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1308,7 +1301,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1324,7 +1316,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc3_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1394,7 +1385,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "tsif_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1410,7 +1400,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "dma_bam_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1425,7 +1414,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "adm0_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1442,7 +1430,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "adm0_pbus_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1457,7 +1444,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pmic_arb0_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1472,7 +1458,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pmic_arb1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1487,7 +1472,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pmic_ssbi2_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1504,7 +1488,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "rpm_msg_ram_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1563,7 +1546,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie_a_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1577,7 +1559,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie_aux_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1591,7 +1572,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1605,7 +1585,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie_phy_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1659,7 +1638,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie1_a_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1673,7 +1651,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie1_aux_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1687,7 +1664,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1701,7 +1677,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie1_phy_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1755,7 +1730,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie2_a_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1769,7 +1743,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie2_aux_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1783,7 +1756,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie2_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1797,7 +1769,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie2_phy_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1887,7 +1858,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "sata_a_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1901,7 +1871,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "sata_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1915,7 +1884,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "sfab_sata_s_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1929,7 +1897,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "sata_phy_cfg_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2139,7 +2106,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_hs1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2218,7 +2184,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_fs1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2234,7 +2199,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "ebi2_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2248,7 +2212,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "ebi2_always_on_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };

diff --git a/drivers/clk/qcom/gcc-msm8660.c b/drivers/clk/qcom/gcc-msm8660.c
index ad41303..6dc5586 100644
--- a/drivers/clk/qcom/gcc-msm8660.c
+++ b/drivers/clk/qcom/gcc-msm8660.c

@@ -1479,7 +1479,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pmem_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2027,7 +2026,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2041,7 +2039,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi2_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2055,7 +2052,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi3_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2069,7 +2065,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi4_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2083,7 +2078,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi5_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2097,7 +2091,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi6_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2111,7 +2104,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi7_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2125,7 +2117,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi8_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2139,7 +2130,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi9_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2153,7 +2143,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi10_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2167,7 +2156,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi11_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2181,7 +2169,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi12_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2195,7 +2182,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "tsif_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2209,7 +2195,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_fs1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2223,7 +2208,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_fs2_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2237,7 +2221,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_hs1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2251,7 +2234,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2265,7 +2247,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc2_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2279,7 +2260,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc3_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2293,7 +2273,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc4_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2307,7 +2286,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc5_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2322,7 +2300,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "adm0_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2337,7 +2314,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "adm0_pbus_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2352,7 +2328,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "adm1_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2367,7 +2342,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "adm1_pbus_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2382,7 +2356,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "modem_ahb1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2397,7 +2370,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "modem_ahb2_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2412,7 +2384,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pmic_arb0_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2427,7 +2398,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pmic_arb1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2442,7 +2412,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pmic_ssbi2_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2459,7 +2428,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "rpm_msg_ram_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };

diff --git a/drivers/clk/qcom/gcc-msm8916.c b/drivers/clk/qcom/gcc-msm8916.c
index 8cc9b28..9c29080 100644
--- a/drivers/clk/qcom/gcc-msm8916.c
+++ b/drivers/clk/qcom/gcc-msm8916.c

@@ -2590,6 +2590,23 @@
 	},
 };
 
+static struct clk_branch gcc_mss_q6_bimc_axi_clk = {
+	.halt_reg = 0x49004,
+	.clkr = {
+		.enable_reg = 0x49004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_mss_q6_bimc_axi_clk",
+			.parent_names = (const char *[]){
+				"bimc_ddr_clk_src",
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
 static struct clk_branch gcc_oxili_ahb_clk = {
 	.halt_reg = 0x59028,
 	.clkr = {
@@ -3227,6 +3244,7 @@
 	[GCC_ULTAUDIO_LPAIF_SEC_I2S_CLK] = &gcc_ultaudio_lpaif_sec_i2s_clk.clkr,
 	[GCC_ULTAUDIO_LPAIF_AUX_I2S_CLK] = &gcc_ultaudio_lpaif_aux_i2s_clk.clkr,
 	[GCC_CODEC_DIGCODEC_CLK] = &gcc_codec_digcodec_clk.clkr,
+	[GCC_MSS_Q6_BIMC_AXI_CLK] = &gcc_mss_q6_bimc_axi_clk.clkr,
 };
 
 static struct gdsc *gcc_msm8916_gdscs[] = {

diff --git a/drivers/clk/qcom/gcc-msm8960.c b/drivers/clk/qcom/gcc-msm8960.c
index 983dd7d..eb551c7 100644
--- a/drivers/clk/qcom/gcc-msm8960.c
+++ b/drivers/clk/qcom/gcc-msm8960.c

@@ -1546,7 +1546,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pmem_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2143,7 +2142,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_hsic_hsio_cal_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2293,7 +2291,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "ce1_core_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2307,7 +2304,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "ce1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2323,7 +2319,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "dma_bam_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2339,7 +2334,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2355,7 +2349,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi2_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2371,7 +2364,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi3_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2387,7 +2379,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi4_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2403,7 +2394,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi5_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2419,7 +2409,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi6_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2435,7 +2424,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi7_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2451,7 +2439,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi8_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2467,7 +2454,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi9_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2483,7 +2469,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi10_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2499,7 +2484,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi11_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2515,7 +2499,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi12_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2531,7 +2514,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "tsif_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2545,7 +2527,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_fs1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2559,7 +2540,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_fs2_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2575,7 +2555,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_hs1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2589,7 +2568,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_hs3_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2603,7 +2581,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_hs4_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2617,7 +2594,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_hsic_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2633,7 +2609,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2649,7 +2624,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc2_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2665,7 +2639,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc3_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2681,7 +2654,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc4_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2697,7 +2669,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc5_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2712,7 +2683,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "adm0_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2729,7 +2699,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "adm0_pbus_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2753,7 +2722,7 @@
 	},
 	.freq_tbl = clk_tbl_ce3,
 	.clkr = {
-		.enable_reg = 0x2c08,
+		.enable_reg = 0x36c0,
 		.enable_mask = BIT(7),
 		.hw.init = &(struct clk_init_data){
 			.name = "ce3_src",
@@ -2769,7 +2738,7 @@
 	.halt_reg = 0x2fdc,
 	.halt_bit = 5,
 	.clkr = {
-		.enable_reg = 0x36c4,
+		.enable_reg = 0x36cc,
 		.enable_mask = BIT(4),
 		.hw.init = &(struct clk_init_data){
 			.name = "ce3_core_clk",
@@ -2883,7 +2852,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "sata_a_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2897,7 +2865,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "sata_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2911,7 +2878,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "sfab_sata_s_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2925,7 +2891,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "sata_phy_cfg_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2939,7 +2904,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie_phy_ref_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2953,7 +2917,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2967,7 +2930,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie_a_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2982,7 +2944,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pmic_arb0_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2997,7 +2958,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pmic_arb1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -3012,7 +2972,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "pmic_ssbi2_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -3029,7 +2988,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "rpm_msg_ram_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };

diff --git a/drivers/clk/qcom/gcc-msm8974.c b/drivers/clk/qcom/gcc-msm8974.c
index 335952d..0091520 100644
--- a/drivers/clk/qcom/gcc-msm8974.c
+++ b/drivers/clk/qcom/gcc-msm8974.c

@@ -1965,7 +1965,6 @@
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_mss_q6_bimc_axi_clk",
-			.flags = CLK_IS_ROOT,
 			.ops = &clk_branch2_ops,
 		},
 	},

diff --git a/drivers/clk/qcom/gcc-msm8996.c b/drivers/clk/qcom/gcc-msm8996.c
index 16d7c32..c9b96f3 100644
--- a/drivers/clk/qcom/gcc-msm8996.c
+++ b/drivers/clk/qcom/gcc-msm8996.c

@@ -30,6 +30,7 @@
 #include "clk-rcg.h"
 #include "clk-branch.h"
 #include "reset.h"
+#include "gdsc.h"
 
 #define F(f, s, h, m, n) { (f), (s), (2 * (h) - 1), (m), (n) }
 
@@ -1320,7 +1321,7 @@
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_mmss_bimc_gfx_clk",
-			.flags = CLK_SET_RATE_PARENT | CLK_IS_ROOT,
+			.flags = CLK_SET_RATE_PARENT,
 			.ops = &clk_branch2_ops,
 		},
 	},
@@ -2314,7 +2315,7 @@
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_bimc_gfx_clk",
-			.flags = CLK_SET_RATE_PARENT | CLK_IS_ROOT,
+			.flags = CLK_SET_RATE_PARENT,
 			.ops = &clk_branch2_ops,
 		},
 	},
@@ -2814,7 +2815,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_ufs_sys_clk_core_clk",
 			.ops = &clk_branch2_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2827,7 +2827,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_ufs_tx_symbol_clk_core_clk",
 			.ops = &clk_branch2_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -3059,6 +3058,83 @@
 	&ufs_ice_core_postdiv_clk_src.hw,
 };
 
+static struct gdsc aggre0_noc_gdsc = {
+	.gdscr = 0x81004,
+	.gds_hw_ctrl = 0x81028,
+	.pd = {
+		.name = "aggre0_noc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc hlos1_vote_aggre0_noc_gdsc = {
+	.gdscr = 0x7d024,
+	.pd = {
+		.name = "hlos1_vote_aggre0_noc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc hlos1_vote_lpass_adsp_gdsc = {
+	.gdscr = 0x7d034,
+	.pd = {
+		.name = "hlos1_vote_lpass_adsp",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc hlos1_vote_lpass_core_gdsc = {
+	.gdscr = 0x7d038,
+	.pd = {
+		.name = "hlos1_vote_lpass_core",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc usb30_gdsc = {
+	.gdscr = 0xf004,
+	.pd = {
+		.name = "usb30",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc pcie0_gdsc = {
+	.gdscr = 0x6b004,
+	.pd = {
+		.name = "pcie0",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc pcie1_gdsc = {
+	.gdscr = 0x6d004,
+	.pd = {
+		.name = "pcie1",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc pcie2_gdsc = {
+	.gdscr = 0x6e004,
+	.pd = {
+		.name = "pcie2",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc ufs_gdsc = {
+	.gdscr = 0x75004,
+	.pd = {
+		.name = "ufs",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
 static struct clk_regmap *gcc_msm8996_clocks[] = {
 	[GPLL0_EARLY] = &gpll0_early.clkr,
 	[GPLL0] = &gpll0.clkr,
@@ -3245,6 +3321,18 @@
 	[GCC_RX1_USB2_CLKREF_CLK] = &gcc_rx1_usb2_clkref_clk.clkr,
 };
 
+static struct gdsc *gcc_msm8996_gdscs[] = {
+	[AGGRE0_NOC_GDSC] = &aggre0_noc_gdsc,
+	[HLOS1_VOTE_AGGRE0_NOC_GDSC] = &hlos1_vote_aggre0_noc_gdsc,
+	[HLOS1_VOTE_LPASS_ADSP_GDSC] = &hlos1_vote_lpass_adsp_gdsc,
+	[HLOS1_VOTE_LPASS_CORE_GDSC] = &hlos1_vote_lpass_core_gdsc,
+	[USB30_GDSC] = &usb30_gdsc,
+	[PCIE0_GDSC] = &pcie0_gdsc,
+	[PCIE1_GDSC] = &pcie1_gdsc,
+	[PCIE2_GDSC] = &pcie2_gdsc,
+	[UFS_GDSC] = &ufs_gdsc,
+};
+
 static const struct qcom_reset_map gcc_msm8996_resets[] = {
 	[GCC_SYSTEM_NOC_BCR] = { 0x4000 },
 	[GCC_CONFIG_NOC_BCR] = { 0x5000 },
@@ -3363,6 +3451,8 @@
 	.num_clks = ARRAY_SIZE(gcc_msm8996_clocks),
 	.resets = gcc_msm8996_resets,
 	.num_resets = ARRAY_SIZE(gcc_msm8996_resets),
+	.gdscs = gcc_msm8996_gdscs,
+	.num_gdscs = ARRAY_SIZE(gcc_msm8996_gdscs),
 };
 
 static const struct of_device_id gcc_msm8996_match_table[] = {

diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c
index da9fad8..f12d7b2 100644
--- a/drivers/clk/qcom/gdsc.c
+++ b/drivers/clk/qcom/gdsc.c

@@ -16,6 +16,7 @@
 #include <linux/err.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
+#include <linux/ktime.h>
 #include <linux/pm_domain.h>
 #include <linux/regmap.h>
 #include <linux/reset-controller.h>
@@ -42,12 +43,12 @@
 
 #define domain_to_gdsc(domain) container_of(domain, struct gdsc, pd)
 
-static int gdsc_is_enabled(struct gdsc *sc)
+static int gdsc_is_enabled(struct gdsc *sc, unsigned int reg)
 {
 	u32 val;
 	int ret;
 
-	ret = regmap_read(sc->regmap, sc->gdscr, &val);
+	ret = regmap_read(sc->regmap, reg, &val);
 	if (ret)
 		return ret;
 
@@ -58,28 +59,46 @@
 {
 	int ret;
 	u32 val = en ? 0 : SW_COLLAPSE_MASK;
-	u32 check = en ? PWR_ON_MASK : 0;
-	unsigned long timeout;
+	ktime_t start;
+	unsigned int status_reg = sc->gdscr;
 
 	ret = regmap_update_bits(sc->regmap, sc->gdscr, SW_COLLAPSE_MASK, val);
 	if (ret)
 		return ret;
 
-	timeout = jiffies + usecs_to_jiffies(TIMEOUT_US);
+	/* If disabling votable gdscs, don't poll on status */
+	if ((sc->flags & VOTABLE) && !en) {
+		/*
+		 * Add a short delay here to ensure that an enable
+		 * right after it was disabled does not put it in an
+		 * unknown state
+		 */
+		udelay(TIMEOUT_US);
+		return 0;
+	}
+
+	if (sc->gds_hw_ctrl) {
+		status_reg = sc->gds_hw_ctrl;
+		/*
+		 * The gds hw controller asserts/de-asserts the status bit soon
+		 * after it receives a power on/off request from a master.
+		 * The controller then takes around 8 xo cycles to start its
+		 * internal state machine and update the status bit. During
+		 * this time, the status bit does not reflect the true status
+		 * of the core.
+		 * Add a delay of 1 us between writing to the SW_COLLAPSE bit
+		 * and polling the status bit.
+		 */
+		udelay(1);
+	}
+
+	start = ktime_get();
 	do {
-		ret = regmap_read(sc->regmap, sc->gdscr, &val);
-		if (ret)
-			return ret;
-
-		if ((val & PWR_ON_MASK) == check)
+		if (gdsc_is_enabled(sc, status_reg) == en)
 			return 0;
-	} while (time_before(jiffies, timeout));
+	} while (ktime_us_delta(ktime_get(), start) < TIMEOUT_US);
 
-	ret = regmap_read(sc->regmap, sc->gdscr, &val);
-	if (ret)
-		return ret;
-
-	if ((val & PWR_ON_MASK) == check)
+	if (gdsc_is_enabled(sc, status_reg) == en)
 		return 0;
 
 	return -ETIMEDOUT;
@@ -165,6 +184,7 @@
 {
 	u32 mask, val;
 	int on, ret;
+	unsigned int reg;
 
 	/*
 	 * Disable HW trigger: collapse/restore occur based on registers writes.
@@ -185,10 +205,18 @@
 			return ret;
 	}
 
-	on = gdsc_is_enabled(sc);
+	reg = sc->gds_hw_ctrl ? sc->gds_hw_ctrl : sc->gdscr;
+	on = gdsc_is_enabled(sc, reg);
 	if (on < 0)
 		return on;
 
+	/*
+	 * Votable GDSCs can be ON due to Vote from other masters.
+	 * If a Votable GDSC is ON, make sure we have a Vote.
+	 */
+	if ((sc->flags & VOTABLE) && on)
+		gdsc_enable(&sc->pd);
+
 	if (on || (sc->pwrsts & PWRSTS_RET))
 		gdsc_force_mem_on(sc);
 	else
@@ -201,11 +229,14 @@
 	return 0;
 }
 
-int gdsc_register(struct device *dev, struct gdsc **scs, size_t num,
+int gdsc_register(struct gdsc_desc *desc,
 		  struct reset_controller_dev *rcdev, struct regmap *regmap)
 {
 	int i, ret;
 	struct genpd_onecell_data *data;
+	struct device *dev = desc->dev;
+	struct gdsc **scs = desc->scs;
+	size_t num = desc->num;
 
 	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
@@ -228,10 +259,30 @@
 		data->domains[i] = &scs[i]->pd;
 	}
 
+	/* Add subdomains */
+	for (i = 0; i < num; i++) {
+		if (!scs[i])
+			continue;
+		if (scs[i]->parent)
+			pm_genpd_add_subdomain(scs[i]->parent, &scs[i]->pd);
+	}
+
 	return of_genpd_add_provider_onecell(dev->of_node, data);
 }
 
-void gdsc_unregister(struct device *dev)
+void gdsc_unregister(struct gdsc_desc *desc)
 {
+	int i;
+	struct device *dev = desc->dev;
+	struct gdsc **scs = desc->scs;
+	size_t num = desc->num;
+
+	/* Remove subdomains */
+	for (i = 0; i < num; i++) {
+		if (!scs[i])
+			continue;
+		if (scs[i]->parent)
+			pm_genpd_remove_subdomain(scs[i]->parent, &scs[i]->pd);
+	}
 	of_genpd_del_provider(dev->of_node);
 }

diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h
index 5ded268..3bf497c 100644
--- a/drivers/clk/qcom/gdsc.h
+++ b/drivers/clk/qcom/gdsc.h

@@ -20,18 +20,12 @@
 struct regmap;
 struct reset_controller_dev;
 
-/* Powerdomain allowable state bitfields */
-#define PWRSTS_OFF		BIT(0)
-#define PWRSTS_RET		BIT(1)
-#define PWRSTS_ON		BIT(2)
-#define PWRSTS_OFF_ON		(PWRSTS_OFF | PWRSTS_ON)
-#define PWRSTS_RET_ON		(PWRSTS_RET | PWRSTS_ON)
-
 /**
  * struct gdsc - Globally Distributed Switch Controller
  * @pd: generic power domain
  * @regmap: regmap for MMIO accesses
  * @gdscr: gsdc control register
+ * @gds_hw_ctrl: gds_hw_ctrl register
  * @cxcs: offsets of branch registers to toggle mem/periph bits in
  * @cxc_count: number of @cxcs
  * @pwrsts: Possible powerdomain power states
@@ -41,28 +35,44 @@
  */
 struct gdsc {
 	struct generic_pm_domain	pd;
+	struct generic_pm_domain	*parent;
 	struct regmap			*regmap;
 	unsigned int			gdscr;
+	unsigned int			gds_hw_ctrl;
 	unsigned int			*cxcs;
 	unsigned int			cxc_count;
 	const u8			pwrsts;
+/* Powerdomain allowable state bitfields */
+#define PWRSTS_OFF		BIT(0)
+#define PWRSTS_RET		BIT(1)
+#define PWRSTS_ON		BIT(2)
+#define PWRSTS_OFF_ON		(PWRSTS_OFF | PWRSTS_ON)
+#define PWRSTS_RET_ON		(PWRSTS_RET | PWRSTS_ON)
+	const u8			flags;
+#define VOTABLE		BIT(0)
 	struct reset_controller_dev	*rcdev;
 	unsigned int			*resets;
 	unsigned int			reset_count;
 };
 
+struct gdsc_desc {
+	struct device *dev;
+	struct gdsc **scs;
+	size_t num;
+};
+
 #ifdef CONFIG_QCOM_GDSC
-int gdsc_register(struct device *, struct gdsc **, size_t n,
-		  struct reset_controller_dev *, struct regmap *);
-void gdsc_unregister(struct device *);
+int gdsc_register(struct gdsc_desc *desc, struct reset_controller_dev *,
+		  struct regmap *);
+void gdsc_unregister(struct gdsc_desc *desc);
 #else
-static inline int gdsc_register(struct device *d, struct gdsc **g, size_t n,
+static inline int gdsc_register(struct gdsc_desc *desc,
 				struct reset_controller_dev *rcdev,
 				struct regmap *r)
 {
 	return -ENOSYS;
 }
 
-static inline void gdsc_unregister(struct device *d) {};
+static inline void gdsc_unregister(struct gdsc_desc *desc) {};
 #endif /* CONFIG_QCOM_GDSC */
 #endif /* __QCOM_GDSC_H__ */

diff --git a/drivers/clk/qcom/mmcc-msm8960.c b/drivers/clk/qcom/mmcc-msm8960.c
index 00e3619..7f21421 100644
--- a/drivers/clk/qcom/mmcc-msm8960.c
+++ b/drivers/clk/qcom/mmcc-msm8960.c

@@ -1789,7 +1789,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gmem_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1805,7 +1804,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "ijpeg_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1821,7 +1819,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "mmss_imem_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1835,7 +1832,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "jpegd_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1851,7 +1847,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "vcodec_axi_b_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1867,7 +1862,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "vcodec_axi_a_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1883,7 +1877,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "vcodec_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1897,7 +1890,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "vfe_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1913,7 +1905,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "mdp_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1929,7 +1920,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "rot_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1945,7 +1935,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "vcap_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1961,7 +1950,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "vpe_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1977,7 +1965,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gfx3d_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1991,7 +1978,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "amp_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2005,7 +1991,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "csi_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT
 		},
 	},
 };
@@ -2019,7 +2004,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "dsi_m_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2035,7 +2019,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "dsi_s_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2049,7 +2032,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "dsi2_m_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT
 		},
 	},
 };
@@ -2065,7 +2047,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "dsi2_s_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2425,7 +2406,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gfx2d0_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2441,7 +2421,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gfx2d1_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2457,7 +2436,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "gfx3d_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2473,7 +2451,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "hdmi_m_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2489,7 +2466,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "hdmi_s_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2503,7 +2479,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "ijpeg_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT
 		},
 	},
 };
@@ -2519,7 +2494,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "mmss_imem_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT
 		},
 	},
 };
@@ -2533,7 +2507,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "jpegd_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2547,7 +2520,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "mdp_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2561,7 +2533,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "rot_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT
 		},
 	},
 };
@@ -2577,7 +2548,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "smmu_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2591,7 +2561,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "tv_enc_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2605,7 +2574,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "vcap_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2621,7 +2589,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "vcodec_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2635,7 +2602,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "vfe_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2649,7 +2615,6 @@
 		.hw.init = &(struct clk_init_data){
 			.name = "vpe_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };

diff --git a/drivers/clk/qcom/mmcc-msm8974.c b/drivers/clk/qcom/mmcc-msm8974.c
index 9d790bc..715e7cd 100644
--- a/drivers/clk/qcom/mmcc-msm8974.c
+++ b/drivers/clk/qcom/mmcc-msm8974.c

@@ -2400,6 +2400,7 @@
 	.pd = {
 		.name = "oxilicx",
 	},
+	.parent = &oxili_gdsc.pd,
 	.pwrsts = PWRSTS_OFF_ON,
 };
 
@@ -2615,7 +2616,6 @@
 static int mmcc_msm8974_probe(struct platform_device *pdev)
 {
 	struct regmap *regmap;
-	int ret;
 
 	regmap = qcom_cc_map(pdev, &mmcc_msm8974_desc);
 	if (IS_ERR(regmap))
@@ -2624,22 +2624,11 @@
 	clk_pll_configure_sr_hpm_lp(&mmpll1, regmap, &mmpll1_config, true);
 	clk_pll_configure_sr_hpm_lp(&mmpll3, regmap, &mmpll3_config, false);
 
-	ret = qcom_cc_really_probe(pdev, &mmcc_msm8974_desc, regmap);
-	if (ret)
-		return ret;
-
-	return pm_genpd_add_subdomain(&oxili_gdsc.pd, &oxilicx_gdsc.pd);
-}
-
-static int mmcc_msm8974_remove(struct platform_device *pdev)
-{
-	pm_genpd_remove_subdomain(&oxili_gdsc.pd, &oxilicx_gdsc.pd);
-	return 0;
+	return qcom_cc_really_probe(pdev, &mmcc_msm8974_desc, regmap);
 }
 
 static struct platform_driver mmcc_msm8974_driver = {
 	.probe		= mmcc_msm8974_probe,
-	.remove		= mmcc_msm8974_remove,
 	.driver		= {
 		.name	= "mmcc-msm8974",
 		.of_match_table = mmcc_msm8974_match_table,

diff --git a/drivers/clk/qcom/mmcc-msm8996.c b/drivers/clk/qcom/mmcc-msm8996.c
index 064f3ea..6df7ff3 100644
--- a/drivers/clk/qcom/mmcc-msm8996.c
+++ b/drivers/clk/qcom/mmcc-msm8996.c

@@ -32,6 +32,7 @@
 #include "clk-rcg.h"
 #include "clk-branch.h"
 #include "reset.h"
+#include "gdsc.h"
 
 #define F(f, s, h, m, n) { (f), (s), (2 * (h) - 1), (m), (n) }
 
@@ -2917,6 +2918,144 @@
 	&gpll0_div.hw,
 };
 
+static struct gdsc mmagic_video_gdsc = {
+	.gdscr = 0x119c,
+	.gds_hw_ctrl = 0x120c,
+	.pd = {
+		.name = "mmagic_video",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc mmagic_mdss_gdsc = {
+	.gdscr = 0x247c,
+	.gds_hw_ctrl = 0x2480,
+	.pd = {
+		.name = "mmagic_mdss",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc mmagic_camss_gdsc = {
+	.gdscr = 0x3c4c,
+	.gds_hw_ctrl = 0x3c50,
+	.pd = {
+		.name = "mmagic_camss",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc venus_gdsc = {
+	.gdscr = 0x1024,
+	.cxcs = (unsigned int []){ 0x1028, 0x1034, 0x1038 },
+	.cxc_count = 3,
+	.pd = {
+		.name = "venus",
+	},
+	.parent = &mmagic_video_gdsc.pd,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc venus_core0_gdsc = {
+	.gdscr = 0x1040,
+	.cxcs = (unsigned int []){ 0x1048 },
+	.cxc_count = 1,
+	.pd = {
+		.name = "venus_core0",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc venus_core1_gdsc = {
+	.gdscr = 0x1044,
+	.cxcs = (unsigned int []){ 0x104c },
+	.cxc_count = 1,
+	.pd = {
+		.name = "venus_core1",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc camss_gdsc = {
+	.gdscr = 0x34a0,
+	.cxcs = (unsigned int []){ 0x36bc, 0x36c4 },
+	.cxc_count = 2,
+	.pd = {
+		.name = "camss",
+	},
+	.parent = &mmagic_camss_gdsc.pd,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc vfe0_gdsc = {
+	.gdscr = 0x3664,
+	.cxcs = (unsigned int []){ 0x36a8 },
+	.cxc_count = 1,
+	.pd = {
+		.name = "vfe0",
+	},
+	.parent = &camss_gdsc.pd,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc vfe1_gdsc = {
+	.gdscr = 0x3674,
+	.cxcs = (unsigned int []){ 0x36ac },
+	.cxc_count = 1,
+	.pd = {
+		.name = "vfe0",
+	},
+	.parent = &camss_gdsc.pd,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc jpeg_gdsc = {
+	.gdscr = 0x35a4,
+	.cxcs = (unsigned int []){ 0x35a8, 0x35b0, 0x35c0, 0x35b8 },
+	.cxc_count = 4,
+	.pd = {
+		.name = "jpeg",
+	},
+	.parent = &camss_gdsc.pd,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc cpp_gdsc = {
+	.gdscr = 0x36d4,
+	.cxcs = (unsigned int []){ 0x36b0 },
+	.cxc_count = 1,
+	.pd = {
+		.name = "cpp",
+	},
+	.parent = &camss_gdsc.pd,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc fd_gdsc = {
+	.gdscr = 0x3b64,
+	.cxcs = (unsigned int []){ 0x3b68, 0x3b6c },
+	.cxc_count = 2,
+	.pd = {
+		.name = "fd",
+	},
+	.parent = &camss_gdsc.pd,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc mdss_gdsc = {
+	.gdscr = 0x2304,
+	.cxcs = (unsigned int []){ 0x2310, 0x231c },
+	.cxc_count = 2,
+	.pd = {
+		.name = "mdss",
+	},
+	.parent = &mmagic_mdss_gdsc.pd,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
 static struct clk_regmap *mmcc_msm8996_clocks[] = {
 	[MMPLL0_EARLY] = &mmpll0_early.clkr,
 	[MMPLL0_PLL] = &mmpll0.clkr,
@@ -3093,6 +3232,22 @@
 	[FD_AHB_CLK] = &fd_ahb_clk.clkr,
 };
 
+static struct gdsc *mmcc_msm8996_gdscs[] = {
+	[MMAGIC_VIDEO_GDSC] = &mmagic_video_gdsc,
+	[MMAGIC_MDSS_GDSC] = &mmagic_mdss_gdsc,
+	[MMAGIC_CAMSS_GDSC] = &mmagic_camss_gdsc,
+	[VENUS_GDSC] = &venus_gdsc,
+	[VENUS_CORE0_GDSC] = &venus_core0_gdsc,
+	[VENUS_CORE1_GDSC] = &venus_core1_gdsc,
+	[CAMSS_GDSC] = &camss_gdsc,
+	[VFE0_GDSC] = &vfe0_gdsc,
+	[VFE1_GDSC] = &vfe1_gdsc,
+	[JPEG_GDSC] = &jpeg_gdsc,
+	[CPP_GDSC] = &cpp_gdsc,
+	[FD_GDSC] = &fd_gdsc,
+	[MDSS_GDSC] = &mdss_gdsc,
+};
+
 static const struct qcom_reset_map mmcc_msm8996_resets[] = {
 	[MMAGICAHB_BCR] = { 0x5020 },
 	[MMAGIC_CFG_BCR] = { 0x5050 },
@@ -3170,6 +3325,8 @@
 	.num_clks = ARRAY_SIZE(mmcc_msm8996_clocks),
 	.resets = mmcc_msm8996_resets,
 	.num_resets = ARRAY_SIZE(mmcc_msm8996_resets),
+	.gdscs = mmcc_msm8996_gdscs,
+	.num_gdscs = ARRAY_SIZE(mmcc_msm8996_gdscs),
 };
 
 static const struct of_device_id mmcc_msm8996_match_table[] = {

diff --git a/drivers/clk/qcom/reset.c b/drivers/clk/qcom/reset.c
index 6c977d3..0324d8d 100644
--- a/drivers/clk/qcom/reset.c
+++ b/drivers/clk/qcom/reset.c

@@ -55,7 +55,7 @@
 	return regmap_update_bits(rst->regmap, map->reg, mask, 0);
 }
 
-struct reset_control_ops qcom_reset_ops = {
+const struct reset_control_ops qcom_reset_ops = {
 	.reset = qcom_reset,
 	.assert = qcom_reset_assert,
 	.deassert = qcom_reset_deassert,

diff --git a/drivers/clk/qcom/reset.h b/drivers/clk/qcom/reset.h
index 0e11e21..cda8779 100644
--- a/drivers/clk/qcom/reset.h
+++ b/drivers/clk/qcom/reset.h

@@ -32,6 +32,6 @@
 #define to_qcom_reset_controller(r) \
 	container_of(r, struct qcom_reset_controller, rcdev);
 
-extern struct reset_control_ops qcom_reset_ops;
+extern const struct reset_control_ops qcom_reset_ops;
 
 #endif

diff --git a/drivers/clk/shmobile/Makefile b/drivers/clk/renesas/Makefile
similarity index 100%
rename from drivers/clk/shmobile/Makefile
rename to drivers/clk/renesas/Makefile


diff --git a/drivers/clk/shmobile/clk-div6.c b/drivers/clk/renesas/clk-div6.c
similarity index 98%
rename from drivers/clk/shmobile/clk-div6.c
rename to drivers/clk/renesas/clk-div6.c
index 9999947..0627860 100644
--- a/drivers/clk/shmobile/clk-div6.c
+++ b/drivers/clk/renesas/clk-div6.c

@@ -82,9 +82,8 @@
 						unsigned long parent_rate)
 {
 	struct div6_clock *clock = to_div6_clock(hw);
-	unsigned int div = (clk_readl(clock->reg) & CPG_DIV6_DIV_MASK) + 1;
 
-	return parent_rate / div;
+	return parent_rate / clock->div;
 }
 
 static unsigned int cpg_div6_clock_calc_div(unsigned long rate,

diff --git a/drivers/clk/shmobile/clk-div6.h b/drivers/clk/renesas/clk-div6.h
similarity index 68%
rename from drivers/clk/shmobile/clk-div6.h
rename to drivers/clk/renesas/clk-div6.h
index 9a85a95..567b31d 100644
--- a/drivers/clk/shmobile/clk-div6.h
+++ b/drivers/clk/renesas/clk-div6.h

@@ -1,5 +1,5 @@
-#ifndef __SHMOBILE_CLK_DIV6_H__
-#define __SHMOBILE_CLK_DIV6_H__
+#ifndef __RENESAS_CLK_DIV6_H__
+#define __RENESAS_CLK_DIV6_H__
 
 struct clk *cpg_div6_register(const char *name, unsigned int num_parents,
 			      const char **parent_names, void __iomem *reg);

diff --git a/drivers/clk/shmobile/clk-emev2.c b/drivers/clk/renesas/clk-emev2.c
similarity index 100%
rename from drivers/clk/shmobile/clk-emev2.c
rename to drivers/clk/renesas/clk-emev2.c


diff --git a/drivers/clk/shmobile/clk-mstp.c b/drivers/clk/renesas/clk-mstp.c
similarity index 99%
rename from drivers/clk/shmobile/clk-mstp.c
rename to drivers/clk/renesas/clk-mstp.c
index 3b09716..3d44e18 100644
--- a/drivers/clk/shmobile/clk-mstp.c
+++ b/drivers/clk/renesas/clk-mstp.c

@@ -14,7 +14,7 @@
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/clkdev.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/device.h>
 #include <linux/io.h>
 #include <linux/of.h>

diff --git a/drivers/clk/shmobile/clk-r8a73a4.c b/drivers/clk/renesas/clk-r8a73a4.c
similarity index 99%
rename from drivers/clk/shmobile/clk-r8a73a4.c
rename to drivers/clk/renesas/clk-r8a73a4.c
index 9326204..28d204b 100644
--- a/drivers/clk/shmobile/clk-r8a73a4.c
+++ b/drivers/clk/renesas/clk-r8a73a4.c

@@ -9,7 +9,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>

diff --git a/drivers/clk/shmobile/clk-r8a7740.c b/drivers/clk/renesas/clk-r8a7740.c
similarity index 99%
rename from drivers/clk/shmobile/clk-r8a7740.c
rename to drivers/clk/renesas/clk-r8a7740.c
index 1e6b1da..2f7ce66 100644
--- a/drivers/clk/shmobile/clk-r8a7740.c
+++ b/drivers/clk/renesas/clk-r8a7740.c

@@ -9,7 +9,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>

diff --git a/drivers/clk/shmobile/clk-r8a7778.c b/drivers/clk/renesas/clk-r8a7778.c
similarity index 98%
rename from drivers/clk/shmobile/clk-r8a7778.c
rename to drivers/clk/renesas/clk-r8a7778.c
index b174155..40e3a50 100644
--- a/drivers/clk/shmobile/clk-r8a7778.c
+++ b/drivers/clk/renesas/clk-r8a7778.c

@@ -9,7 +9,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/of_address.h>
 #include <linux/slab.h>
 

diff --git a/drivers/clk/shmobile/clk-r8a7779.c b/drivers/clk/renesas/clk-r8a7779.c
similarity index 98%
rename from drivers/clk/shmobile/clk-r8a7779.c
rename to drivers/clk/renesas/clk-r8a7779.c
index 92275c5f..cf2a37d 100644
--- a/drivers/clk/shmobile/clk-r8a7779.c
+++ b/drivers/clk/renesas/clk-r8a7779.c

@@ -11,7 +11,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/of.h>

diff --git a/drivers/clk/shmobile/clk-rcar-gen2.c b/drivers/clk/renesas/clk-rcar-gen2.c
similarity index 99%
rename from drivers/clk/shmobile/clk-rcar-gen2.c
rename to drivers/clk/renesas/clk-rcar-gen2.c
index 8419772..00e6aba 100644
--- a/drivers/clk/shmobile/clk-rcar-gen2.c
+++ b/drivers/clk/renesas/clk-rcar-gen2.c

@@ -11,7 +11,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/math64.h>

diff --git a/drivers/clk/shmobile/clk-rz.c b/drivers/clk/renesas/clk-rz.c
similarity index 98%
rename from drivers/clk/shmobile/clk-rz.c
rename to drivers/clk/renesas/clk-rz.c
index 9766e3c..f6312c6 100644
--- a/drivers/clk/shmobile/clk-rz.c
+++ b/drivers/clk/renesas/clk-rz.c

@@ -10,7 +10,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/of.h>

diff --git a/drivers/clk/shmobile/clk-sh73a0.c b/drivers/clk/renesas/clk-sh73a0.c
similarity index 99%
rename from drivers/clk/shmobile/clk-sh73a0.c
rename to drivers/clk/renesas/clk-sh73a0.c
index 8966f8b..eea38f6 100644
--- a/drivers/clk/shmobile/clk-sh73a0.c
+++ b/drivers/clk/renesas/clk-sh73a0.c

@@ -9,7 +9,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/of.h>

diff --git a/drivers/clk/shmobile/r8a7795-cpg-mssr.c b/drivers/clk/renesas/r8a7795-cpg-mssr.c
similarity index 61%
rename from drivers/clk/shmobile/r8a7795-cpg-mssr.c
rename to drivers/clk/renesas/r8a7795-cpg-mssr.c
index 13e9947..b2198aef 100644
--- a/drivers/clk/shmobile/r8a7795-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7795-cpg-mssr.c

@@ -20,6 +20,7 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/of.h>
+#include <linux/slab.h>
 
 #include <dt-bindings/clock/r8a7795-cpg-mssr.h>
 
@@ -61,6 +62,7 @@
 	CLK_TYPE_GEN3_PLL2,
 	CLK_TYPE_GEN3_PLL3,
 	CLK_TYPE_GEN3_PLL4,
+	CLK_TYPE_GEN3_SD,
 };
 
 static const struct cpg_core_clk r8a7795_core_clks[] __initconst = {
@@ -99,11 +101,18 @@
 	DEF_FIXED("s3d1",       R8A7795_CLK_S3D1,  CLK_S3,         1, 1),
 	DEF_FIXED("s3d2",       R8A7795_CLK_S3D2,  CLK_S3,         2, 1),
 	DEF_FIXED("s3d4",       R8A7795_CLK_S3D4,  CLK_S3,         4, 1),
+
+	DEF_SD("sd0",           R8A7795_CLK_SD0,   CLK_PLL1_DIV2, 0x0074),
+	DEF_SD("sd1",           R8A7795_CLK_SD1,   CLK_PLL1_DIV2, 0x0078),
+	DEF_SD("sd2",           R8A7795_CLK_SD2,   CLK_PLL1_DIV2, 0x0268),
+	DEF_SD("sd3",           R8A7795_CLK_SD3,   CLK_PLL1_DIV2, 0x026c),
+
 	DEF_FIXED("cl",         R8A7795_CLK_CL,    CLK_PLL1_DIV2, 48, 1),
 	DEF_FIXED("cp",         R8A7795_CLK_CP,    CLK_EXTAL,      2, 1),
 
 	DEF_DIV6P1("mso",       R8A7795_CLK_MSO,   CLK_PLL1_DIV4, 0x014),
 	DEF_DIV6P1("hdmi",      R8A7795_CLK_HDMI,  CLK_PLL1_DIV2, 0x250),
+	DEF_DIV6P1("canfd",     R8A7795_CLK_CANFD, CLK_PLL1_DIV4, 0x244),
 };
 
 static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
@@ -120,8 +129,17 @@
 	DEF_MOD("sys-dmac1",		 218,	R8A7795_CLK_S3D1),
 	DEF_MOD("sys-dmac0",		 219,	R8A7795_CLK_S3D1),
 	DEF_MOD("scif2",		 310,	R8A7795_CLK_S3D4),
+	DEF_MOD("sdif3",		 311,	R8A7795_CLK_SD3),
+	DEF_MOD("sdif2",		 312,	R8A7795_CLK_SD2),
+	DEF_MOD("sdif1",		 313,	R8A7795_CLK_SD1),
+	DEF_MOD("sdif0",		 314,	R8A7795_CLK_SD0),
 	DEF_MOD("pcie1",		 318,	R8A7795_CLK_S3D1),
 	DEF_MOD("pcie0",		 319,	R8A7795_CLK_S3D1),
+	DEF_MOD("usb3-if1",		 327,	R8A7795_CLK_S3D1),
+	DEF_MOD("usb3-if0",		 328,	R8A7795_CLK_S3D1),
+	DEF_MOD("usb-dmac0",		 330,	R8A7795_CLK_S3D1),
+	DEF_MOD("usb-dmac1",		 331,	R8A7795_CLK_S3D1),
+	DEF_MOD("intc-ex",		 407,	R8A7795_CLK_CP),
 	DEF_MOD("intc-ap",		 408,	R8A7795_CLK_S3D1),
 	DEF_MOD("audmac0",		 502,	R8A7795_CLK_S3D4),
 	DEF_MOD("audmac1",		 501,	R8A7795_CLK_S3D4),
@@ -130,6 +148,21 @@
 	DEF_MOD("hscif2",		 518,	R8A7795_CLK_S3D1),
 	DEF_MOD("hscif1",		 519,	R8A7795_CLK_S3D1),
 	DEF_MOD("hscif0",		 520,	R8A7795_CLK_S3D1),
+	DEF_MOD("fcpvd3",		 600,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpvd2",		 601,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpvd1",		 602,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpvd0",		 603,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpvb1",		 606,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpvb0",		 607,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpvi2",		 609,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpvi1",		 610,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpvi0",		 611,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpf2",		 613,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpf1",		 614,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpf0",		 615,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpci1",		 616,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpci0",		 617,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpcs",		 619,	R8A7795_CLK_S2D1),
 	DEF_MOD("vspd3",		 620,	R8A7795_CLK_S2D1),
 	DEF_MOD("vspd2",		 621,	R8A7795_CLK_S2D1),
 	DEF_MOD("vspd1",		 622,	R8A7795_CLK_S2D1),
@@ -147,6 +180,7 @@
 	DEF_MOD("du2",			 722,	R8A7795_CLK_S2D1),
 	DEF_MOD("du1",			 723,	R8A7795_CLK_S2D1),
 	DEF_MOD("du0",			 724,	R8A7795_CLK_S2D1),
+	DEF_MOD("lvds",			 727,	R8A7795_CLK_S2D1),
 	DEF_MOD("hdmi1",		 728,	R8A7795_CLK_HDMI),
 	DEF_MOD("hdmi0",		 729,	R8A7795_CLK_HDMI),
 	DEF_MOD("etheravb",		 812,	R8A7795_CLK_S3D2),
@@ -159,6 +193,9 @@
 	DEF_MOD("gpio2",		 910,	R8A7795_CLK_CP),
 	DEF_MOD("gpio1",		 911,	R8A7795_CLK_CP),
 	DEF_MOD("gpio0",		 912,	R8A7795_CLK_CP),
+	DEF_MOD("can-fd",		 914,	R8A7795_CLK_S3D2),
+	DEF_MOD("can-if1",		 915,	R8A7795_CLK_S3D4),
+	DEF_MOD("can-if0",		 916,	R8A7795_CLK_S3D4),
 	DEF_MOD("i2c6",			 918,	R8A7795_CLK_S3D2),
 	DEF_MOD("i2c5",			 919,	R8A7795_CLK_S3D2),
 	DEF_MOD("i2c4",			 927,	R8A7795_CLK_S3D2),
@@ -198,6 +235,221 @@
 	MOD_CLK_ID(408),	/* INTC-AP (GIC) */
 };
 
+/* -----------------------------------------------------------------------------
+ * SDn Clock
+ *
+ */
+#define CPG_SD_STP_HCK		BIT(9)
+#define CPG_SD_STP_CK		BIT(8)
+
+#define CPG_SD_STP_MASK		(CPG_SD_STP_HCK | CPG_SD_STP_CK)
+#define CPG_SD_FC_MASK		(0x7 << 2 | 0x3 << 0)
+
+#define CPG_SD_DIV_TABLE_DATA(stp_hck, stp_ck, sd_srcfc, sd_fc, sd_div) \
+{ \
+	.val = ((stp_hck) ? CPG_SD_STP_HCK : 0) | \
+	       ((stp_ck) ? CPG_SD_STP_CK : 0) | \
+	       ((sd_srcfc) << 2) | \
+	       ((sd_fc) << 0), \
+	.div = (sd_div), \
+}
+
+struct sd_div_table {
+	u32 val;
+	unsigned int div;
+};
+
+struct sd_clock {
+	struct clk_hw hw;
+	void __iomem *reg;
+	const struct sd_div_table *div_table;
+	unsigned int div_num;
+	unsigned int div_min;
+	unsigned int div_max;
+};
+
+/* SDn divider
+ *                     sd_srcfc   sd_fc   div
+ * stp_hck   stp_ck    (div)      (div)     = sd_srcfc x sd_fc
+ *-------------------------------------------------------------------
+ *  0         0         0 (1)      1 (4)      4
+ *  0         0         1 (2)      1 (4)      8
+ *  1         0         2 (4)      1 (4)     16
+ *  1         0         3 (8)      1 (4)     32
+ *  1         0         4 (16)     1 (4)     64
+ *  0         0         0 (1)      0 (2)      2
+ *  0         0         1 (2)      0 (2)      4
+ *  1         0         2 (4)      0 (2)      8
+ *  1         0         3 (8)      0 (2)     16
+ *  1         0         4 (16)     0 (2)     32
+ */
+static const struct sd_div_table cpg_sd_div_table[] = {
+/*	CPG_SD_DIV_TABLE_DATA(stp_hck,  stp_ck,   sd_srcfc,   sd_fc,  sd_div) */
+	CPG_SD_DIV_TABLE_DATA(0,        0,        0,          1,        4),
+	CPG_SD_DIV_TABLE_DATA(0,        0,        1,          1,        8),
+	CPG_SD_DIV_TABLE_DATA(1,        0,        2,          1,       16),
+	CPG_SD_DIV_TABLE_DATA(1,        0,        3,          1,       32),
+	CPG_SD_DIV_TABLE_DATA(1,        0,        4,          1,       64),
+	CPG_SD_DIV_TABLE_DATA(0,        0,        0,          0,        2),
+	CPG_SD_DIV_TABLE_DATA(0,        0,        1,          0,        4),
+	CPG_SD_DIV_TABLE_DATA(1,        0,        2,          0,        8),
+	CPG_SD_DIV_TABLE_DATA(1,        0,        3,          0,       16),
+	CPG_SD_DIV_TABLE_DATA(1,        0,        4,          0,       32),
+};
+
+#define to_sd_clock(_hw) container_of(_hw, struct sd_clock, hw)
+
+static int cpg_sd_clock_enable(struct clk_hw *hw)
+{
+	struct sd_clock *clock = to_sd_clock(hw);
+	u32 val, sd_fc;
+	unsigned int i;
+
+	val = clk_readl(clock->reg);
+
+	sd_fc = val & CPG_SD_FC_MASK;
+	for (i = 0; i < clock->div_num; i++)
+		if (sd_fc == (clock->div_table[i].val & CPG_SD_FC_MASK))
+			break;
+
+	if (i >= clock->div_num)
+		return -EINVAL;
+
+	val &= ~(CPG_SD_STP_MASK);
+	val |= clock->div_table[i].val & CPG_SD_STP_MASK;
+
+	clk_writel(val, clock->reg);
+
+	return 0;
+}
+
+static void cpg_sd_clock_disable(struct clk_hw *hw)
+{
+	struct sd_clock *clock = to_sd_clock(hw);
+
+	clk_writel(clk_readl(clock->reg) | CPG_SD_STP_MASK, clock->reg);
+}
+
+static int cpg_sd_clock_is_enabled(struct clk_hw *hw)
+{
+	struct sd_clock *clock = to_sd_clock(hw);
+
+	return !(clk_readl(clock->reg) & CPG_SD_STP_MASK);
+}
+
+static unsigned long cpg_sd_clock_recalc_rate(struct clk_hw *hw,
+						unsigned long parent_rate)
+{
+	struct sd_clock *clock = to_sd_clock(hw);
+	unsigned long rate = parent_rate;
+	u32 val, sd_fc;
+	unsigned int i;
+
+	val = clk_readl(clock->reg);
+
+	sd_fc = val & CPG_SD_FC_MASK;
+	for (i = 0; i < clock->div_num; i++)
+		if (sd_fc == (clock->div_table[i].val & CPG_SD_FC_MASK))
+			break;
+
+	if (i >= clock->div_num)
+		return -EINVAL;
+
+	return DIV_ROUND_CLOSEST(rate, clock->div_table[i].div);
+}
+
+static unsigned int cpg_sd_clock_calc_div(struct sd_clock *clock,
+					  unsigned long rate,
+					  unsigned long parent_rate)
+{
+	unsigned int div;
+
+	if (!rate)
+		rate = 1;
+
+	div = DIV_ROUND_CLOSEST(parent_rate, rate);
+
+	return clamp_t(unsigned int, div, clock->div_min, clock->div_max);
+}
+
+static long cpg_sd_clock_round_rate(struct clk_hw *hw, unsigned long rate,
+				      unsigned long *parent_rate)
+{
+	struct sd_clock *clock = to_sd_clock(hw);
+	unsigned int div = cpg_sd_clock_calc_div(clock, rate, *parent_rate);
+
+	return DIV_ROUND_CLOSEST(*parent_rate, div);
+}
+
+static int cpg_sd_clock_set_rate(struct clk_hw *hw, unsigned long rate,
+				   unsigned long parent_rate)
+{
+	struct sd_clock *clock = to_sd_clock(hw);
+	unsigned int div = cpg_sd_clock_calc_div(clock, rate, parent_rate);
+	u32 val;
+	unsigned int i;
+
+	for (i = 0; i < clock->div_num; i++)
+		if (div == clock->div_table[i].div)
+			break;
+
+	if (i >= clock->div_num)
+		return -EINVAL;
+
+	val = clk_readl(clock->reg);
+	val &= ~(CPG_SD_STP_MASK | CPG_SD_FC_MASK);
+	val |= clock->div_table[i].val & (CPG_SD_STP_MASK | CPG_SD_FC_MASK);
+	clk_writel(val, clock->reg);
+
+	return 0;
+}
+
+static const struct clk_ops cpg_sd_clock_ops = {
+	.enable = cpg_sd_clock_enable,
+	.disable = cpg_sd_clock_disable,
+	.is_enabled = cpg_sd_clock_is_enabled,
+	.recalc_rate = cpg_sd_clock_recalc_rate,
+	.round_rate = cpg_sd_clock_round_rate,
+	.set_rate = cpg_sd_clock_set_rate,
+};
+
+static struct clk * __init cpg_sd_clk_register(const struct cpg_core_clk *core,
+					       void __iomem *base,
+					       const char *parent_name)
+{
+	struct clk_init_data init;
+	struct sd_clock *clock;
+	struct clk *clk;
+	unsigned int i;
+
+	clock = kzalloc(sizeof(*clock), GFP_KERNEL);
+	if (!clock)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = core->name;
+	init.ops = &cpg_sd_clock_ops;
+	init.flags = CLK_IS_BASIC | CLK_SET_RATE_PARENT;
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+
+	clock->reg = base + core->offset;
+	clock->hw.init = &init;
+	clock->div_table = cpg_sd_div_table;
+	clock->div_num = ARRAY_SIZE(cpg_sd_div_table);
+
+	clock->div_max = clock->div_table[0].div;
+	clock->div_min = clock->div_max;
+	for (i = 1; i < clock->div_num; i++) {
+		clock->div_max = max(clock->div_max, clock->div_table[i].div);
+		clock->div_min = min(clock->div_min, clock->div_table[i].div);
+	}
+
+	clk = clk_register(NULL, &clock->hw);
+	if (IS_ERR(clk))
+		kfree(clock);
+
+	return clk;
+}
 
 #define CPG_PLL0CR	0x00d8
 #define CPG_PLL2CR	0x002c
@@ -323,6 +575,9 @@
 		mult = (((value >> 24) & 0x7f) + 1) * 2;
 		break;
 
+	case CLK_TYPE_GEN3_SD:
+		return cpg_sd_clk_register(core, base, __clk_get_name(parent));
+
 	default:
 		return ERR_PTR(-EINVAL);
 	}

diff --git a/drivers/clk/shmobile/renesas-cpg-mssr.c b/drivers/clk/renesas/renesas-cpg-mssr.c
similarity index 98%
rename from drivers/clk/shmobile/renesas-cpg-mssr.c
rename to drivers/clk/renesas/renesas-cpg-mssr.c
index 9a4d888..58e24b3 100644
--- a/drivers/clk/shmobile/renesas-cpg-mssr.c
+++ b/drivers/clk/renesas/renesas-cpg-mssr.c

@@ -348,6 +348,7 @@
 #else
 			dev_dbg(dev, "Ignoring MSTP %s to prevent disabling\n",
 				mod->name);
+			kfree(clock);
 			return;
 #endif
 		}
@@ -568,7 +569,11 @@
 	if (error)
 		return error;
 
-	devm_add_action(dev, cpg_mssr_del_clk_provider, np);
+	error = devm_add_action_or_reset(dev,
+					 cpg_mssr_del_clk_provider,
+					 np);
+	if (error)
+		return error;
 
 	error = cpg_mssr_add_clk_domain(dev, info->core_pm_clks,
 					info->num_core_pm_clks);

diff --git a/drivers/clk/shmobile/renesas-cpg-mssr.h b/drivers/clk/renesas/renesas-cpg-mssr.h
similarity index 96%
rename from drivers/clk/shmobile/renesas-cpg-mssr.h
rename to drivers/clk/renesas/renesas-cpg-mssr.h
index e09f03c..952b695 100644
--- a/drivers/clk/shmobile/renesas-cpg-mssr.h
+++ b/drivers/clk/renesas/renesas-cpg-mssr.h

@@ -53,6 +53,8 @@
 	DEF_BASE(_name, _id, CLK_TYPE_FF, _parent, .div = _div, .mult = _mult)
 #define DEF_DIV6P1(_name, _id, _parent, _offset)	\
 	DEF_BASE(_name, _id, CLK_TYPE_DIV6P1, _parent, .offset = _offset)
+#define DEF_SD(_name, _id, _parent, _offset)	\
+	DEF_BASE(_name, _id, CLK_TYPE_GEN3_SD, _parent, .offset = _offset)
 
 
     /*

diff --git a/drivers/clk/rockchip/clk-cpu.c b/drivers/clk/rockchip/clk-cpu.c
index d07374f..4e73ed5 100644
--- a/drivers/clk/rockchip/clk-cpu.c
+++ b/drivers/clk/rockchip/clk-cpu.c

@@ -116,7 +116,7 @@
 
 		pr_debug("%s: setting reg 0x%x to 0x%x\n",
 			 __func__, clksel->reg, clksel->val);
-		writel(clksel->val , cpuclk->reg_base + clksel->reg);
+		writel(clksel->val, cpuclk->reg_base + clksel->reg);
 	}
 }
 
@@ -290,14 +290,14 @@
 		pr_err("%s: could not lookup parent clock %s\n",
 		       __func__, parent_names[0]);
 		ret = -EINVAL;
-		goto free_cpuclk;
+		goto free_alt_parent;
 	}
 
 	ret = clk_notifier_register(clk, &cpuclk->clk_nb);
 	if (ret) {
 		pr_err("%s: failed to register clock notifier for %s\n",
 				__func__, name);
-		goto free_cpuclk;
+		goto free_alt_parent;
 	}
 
 	if (nrates > 0) {
@@ -326,6 +326,8 @@
 	kfree(cpuclk->rate_table);
 unregister_notifier:
 	clk_notifier_unregister(clk, &cpuclk->clk_nb);
+free_alt_parent:
+	clk_disable_unprepare(cpuclk->alt_parent);
 free_cpuclk:
 	kfree(cpuclk);
 	return ERR_PTR(ret);

diff --git a/drivers/clk/rockchip/clk-inverter.c b/drivers/clk/rockchip/clk-inverter.c
index 7cbf43b..dcb6e37 100644
--- a/drivers/clk/rockchip/clk-inverter.c
+++ b/drivers/clk/rockchip/clk-inverter.c

@@ -90,7 +90,7 @@
 
 	inv_clock = kmalloc(sizeof(*inv_clock), GFP_KERNEL);
 	if (!inv_clock)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	init.name = name;
 	init.num_parents = num_parents;
@@ -106,11 +106,7 @@
 
 	clk = clk_register(NULL, &inv_clock->hw);
 	if (IS_ERR(clk))
-		goto err_free;
+		kfree(inv_clock);
 
 	return clk;
-
-err_free:
-	kfree(inv_clock);
-	return NULL;
 }

diff --git a/drivers/clk/rockchip/clk-mmc-phase.c b/drivers/clk/rockchip/clk-mmc-phase.c
index 2685644..e0dc7e8 100644
--- a/drivers/clk/rockchip/clk-mmc-phase.c
+++ b/drivers/clk/rockchip/clk-mmc-phase.c

@@ -150,7 +150,7 @@
 
 	mmc_clock = kmalloc(sizeof(*mmc_clock), GFP_KERNEL);
 	if (!mmc_clock)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	init.name = name;
 	init.num_parents = num_parents;
@@ -172,11 +172,7 @@
 
 	clk = clk_register(NULL, &mmc_clock->hw);
 	if (IS_ERR(clk))
-		goto err_free;
+		kfree(mmc_clock);
 
 	return clk;
-
-err_free:
-	kfree(mmc_clock);
-	return NULL;
 }

diff --git a/drivers/clk/rockchip/clk-pll.c b/drivers/clk/rockchip/clk-pll.c
index b7e66c9..5de797e 100644
--- a/drivers/clk/rockchip/clk-pll.c
+++ b/drivers/clk/rockchip/clk-pll.c

@@ -94,6 +94,11 @@
 	unsigned int val;
 	int delay = 24000000, ret;
 
+	if (IS_ERR(grf)) {
+		pr_err("%s: grf regmap not available\n", __func__);
+		return PTR_ERR(grf);
+	}
+
 	while (delay > 0) {
 		ret = regmap_read(grf, pll->lock_offset, &val);
 		if (ret) {

diff --git a/drivers/clk/rockchip/clk-rk3036.c b/drivers/clk/rockchip/clk-rk3036.c
index 53e9c39..7cdb2d6 100644
--- a/drivers/clk/rockchip/clk-rk3036.c
+++ b/drivers/clk/rockchip/clk-rk3036.c

@@ -177,6 +177,8 @@
 	GATE(0, "gpll_armclk", "gpll", CLK_IGNORE_UNUSED,
 			RK2928_CLKGATE_CON(0), 6, GFLAGS),
 
+	FACTOR(0, "xin12m", "xin24m", 0, 1, 2),
+
 	/*
 	 * Clock-Architecture Diagram 2
 	 */
@@ -187,6 +189,7 @@
 			RK2928_CLKGATE_CON(0), 8, GFLAGS),
 	COMPOSITE_NOGATE(0, "ddrphy2x", mux_ddrphy_p, CLK_IGNORE_UNUSED,
 			RK2928_CLKSEL_CON(26), 8, 1, MFLAGS, 0, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO),
+	FACTOR(0, "ddrphy", "ddrphy2x", 0, 1, 2),
 
 	COMPOSITE_NOMUX(0, "pclk_dbg", "armclk", CLK_IGNORE_UNUSED,
 			RK2928_CLKSEL_CON(1), 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY,
@@ -263,6 +266,8 @@
 	COMPOSITE(0, "aclk_vcodec", mux_pll_src_3plls_p, 0,
 			RK2928_CLKSEL_CON(32), 14, 2, MFLAGS, 8, 5, DFLAGS,
 			RK2928_CLKGATE_CON(3), 11, GFLAGS),
+	FACTOR_GATE(HCLK_VCODEC, "hclk_vcodec", "aclk_vcodec", 0, 1, 4,
+			RK2928_CLKGATE_CON(3), 12, GFLAGS),
 
 	COMPOSITE(0, "aclk_hvec", mux_pll_src_3plls_p, 0,
 			RK2928_CLKSEL_CON(20), 0, 2, MFLAGS, 2, 5, DFLAGS,
@@ -351,6 +356,7 @@
 	COMPOSITE_NOMUX(SCLK_MAC, "mac_clk", "mac_clk_ref", 0,
 			RK2928_CLKSEL_CON(21), 4, 5, DFLAGS,
 			RK2928_CLKGATE_CON(2), 6, GFLAGS),
+	FACTOR(0, "sclk_macref_out", "hclk_peri_src", 0, 1, 2),
 
 	MUX(SCLK_HDMI, "dclk_hdmi", mux_dclk_p, 0,
 			RK2928_CLKSEL_CON(31), 0, 1, MFLAGS),
@@ -376,11 +382,9 @@
 	GATE(ACLK_VIO, "aclk_vio", "aclk_disp1_pre", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(6), 13, GFLAGS),
 	GATE(ACLK_LCDC, "aclk_lcdc", "aclk_disp1_pre", 0, RK2928_CLKGATE_CON(9), 6, GFLAGS),
 
-	GATE(HCLK_VIO_BUS, "hclk_vio_bus", "hclk_disp_pre", 0, RK2928_CLKGATE_CON(6), 12, GFLAGS),
+	GATE(HCLK_VIO_BUS, "hclk_vio_bus", "hclk_disp_pre", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(6), 12, GFLAGS),
 	GATE(HCLK_LCDC, "hclk_lcdc", "hclk_disp_pre", 0, RK2928_CLKGATE_CON(9), 5, GFLAGS),
 
-	/* hclk_video gates */
-	GATE(HCLK_VCODEC, "hclk_vcodec", "hclk_disp_pre", 0, RK2928_CLKGATE_CON(3), 12, GFLAGS),
 
 	/* xin24m gates */
 	GATE(SCLK_PVTM_CORE, "sclk_pvtm_core", "xin24m", 0, RK2928_CLKGATE_CON(10), 0, GFLAGS),
@@ -444,34 +448,11 @@
 
 	rockchip_clk_init(np, reg_base, CLK_NR_CLKS);
 
-	/* xin12m is created by an cru-internal divider */
-	clk = clk_register_fixed_factor(NULL, "xin12m", "xin24m", 0, 1, 2);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock xin12m: %ld\n",
-			__func__, PTR_ERR(clk));
-
 	clk = clk_register_fixed_factor(NULL, "usb480m", "xin24m", 0, 20, 1);
 	if (IS_ERR(clk))
 		pr_warn("%s: could not register clock usb480m: %ld\n",
 			__func__, PTR_ERR(clk));
 
-	clk = clk_register_fixed_factor(NULL, "ddrphy", "ddrphy2x", 0, 1, 2);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock ddrphy: %ld\n",
-			__func__, PTR_ERR(clk));
-
-	clk = clk_register_fixed_factor(NULL, "hclk_vcodec_pre",
-					"aclk_vcodec", 0, 1, 4);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock hclk_vcodec_pre: %ld\n",
-			__func__, PTR_ERR(clk));
-
-	clk = clk_register_fixed_factor(NULL, "sclk_macref_out",
-					"hclk_peri_src", 0, 1, 2);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock sclk_macref_out: %ld\n",
-			__func__, PTR_ERR(clk));
-
 	rockchip_clk_register_plls(rk3036_pll_clks,
 				   ARRAY_SIZE(rk3036_pll_clks),
 				   RK3036_GRF_SOC_STATUS0);

diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c
index 7f7444cb..40bab39 100644
--- a/drivers/clk/rockchip/clk-rk3188.c
+++ b/drivers/clk/rockchip/clk-rk3188.c

@@ -339,13 +339,15 @@
 	INVERTER(0, "pclk_cif0", "pclkin_cif0",
 			RK2928_CLKSEL_CON(30), 8, IFLAGS),
 
+	FACTOR(0, "xin12m", "xin24m", 0, 1, 2),
+
 	/*
 	 * the 480m are generated inside the usb block from these clocks,
 	 * but they are also a source for the hsicphy clock.
 	 */
-	GATE(SCLK_OTGPHY0, "sclk_otgphy0", "usb480m", CLK_IGNORE_UNUSED,
+	GATE(SCLK_OTGPHY0, "sclk_otgphy0", "xin24m", CLK_IGNORE_UNUSED,
 			RK2928_CLKGATE_CON(1), 5, GFLAGS),
-	GATE(SCLK_OTGPHY1, "sclk_otgphy1", "usb480m", CLK_IGNORE_UNUSED,
+	GATE(SCLK_OTGPHY1, "sclk_otgphy1", "xin24m", CLK_IGNORE_UNUSED,
 			RK2928_CLKGATE_CON(1), 6, GFLAGS),
 
 	COMPOSITE(0, "mac_src", mux_mac_p, 0,
@@ -605,7 +607,7 @@
 	GATE(SCLK_TIMER2, "timer2", "xin24m", 0,
 			RK2928_CLKGATE_CON(3), 2, GFLAGS),
 
-	COMPOSITE_NOMUX(0, "sclk_tsadc", "xin24m", 0,
+	COMPOSITE_NOMUX(SCLK_TSADC, "sclk_tsadc", "xin24m", 0,
 			RK2928_CLKSEL_CON(34), 0, 16, DFLAGS,
 			RK2928_CLKGATE_CON(2), 15, GFLAGS),
 
@@ -662,11 +664,11 @@
 	{ /* sentinel */ },
 };
 
-PNAME(mux_hsicphy_p)		= { "sclk_otgphy0", "sclk_otgphy1",
+PNAME(mux_hsicphy_p)		= { "sclk_otgphy0_480m", "sclk_otgphy1_480m",
 				    "gpll", "cpll" };
 
 static struct rockchip_clk_branch rk3188_i2s0_fracmux __initdata =
-	MUX(SCLK_I2S0, "sclk_i2s0", mux_sclk_i2s0_p, 0,
+	MUX(SCLK_I2S0, "sclk_i2s0", mux_sclk_i2s0_p, CLK_SET_RATE_PARENT,
 			RK2928_CLKSEL_CON(3), 8, 2, MFLAGS);
 
 static struct rockchip_clk_branch rk3188_clk_branches[] __initdata = {
@@ -722,7 +724,7 @@
 	COMPOSITE_NOMUX(0, "i2s0_pre", "i2s_src", 0,
 			RK2928_CLKSEL_CON(3), 0, 7, DFLAGS,
 			RK2928_CLKGATE_CON(0), 9, GFLAGS),
-	COMPOSITE_FRACMUX(0, "i2s0_frac", "i2s0_pre", 0,
+	COMPOSITE_FRACMUX(0, "i2s0_frac", "i2s0_pre", CLK_SET_RATE_PARENT,
 			RK2928_CLKSEL_CON(7), 0,
 			RK2928_CLKGATE_CON(0), 10, GFLAGS,
 			&rk3188_i2s0_fracmux),
@@ -748,12 +750,12 @@
 	"hclk_peri",
 	"pclk_cpu",
 	"pclk_peri",
+	"hclk_cpubus"
 };
 
 static void __init rk3188_common_clk_init(struct device_node *np)
 {
 	void __iomem *reg_base;
-	struct clk *clk;
 
 	reg_base = of_iomap(np, 0);
 	if (!reg_base) {
@@ -763,17 +765,6 @@
 
 	rockchip_clk_init(np, reg_base, CLK_NR_CLKS);
 
-	/* xin12m is created by an cru-internal divider */
-	clk = clk_register_fixed_factor(NULL, "xin12m", "xin24m", 0, 1, 2);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock xin12m: %ld\n",
-			__func__, PTR_ERR(clk));
-
-	clk = clk_register_fixed_factor(NULL, "usb480m", "xin24m", 0, 20, 1);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock usb480m: %ld\n",
-			__func__, PTR_ERR(clk));
-
 	rockchip_clk_register_branches(common_clk_branches,
 				  ARRAY_SIZE(common_clk_branches));
 

diff --git a/drivers/clk/rockchip/clk-rk3228.c b/drivers/clk/rockchip/clk-rk3228.c
index 981a502..7702d28 100644
--- a/drivers/clk/rockchip/clk-rk3228.c
+++ b/drivers/clk/rockchip/clk-rk3228.c

@@ -187,7 +187,7 @@
 			RK2928_CLKGATE_CON(7), 1, GFLAGS),
 	GATE(0, "ddrc", "ddrphy_pre", CLK_IGNORE_UNUSED,
 			RK2928_CLKGATE_CON(8), 5, GFLAGS),
-	GATE(0, "ddrphy", "ddrphy_pre", CLK_IGNORE_UNUSED,
+	FACTOR_GATE(0, "ddrphy", "ddrphy4x", CLK_IGNORE_UNUSED, 1, 4,
 			RK2928_CLKGATE_CON(7), 0, GFLAGS),
 
 	/* PD_CORE */
@@ -240,13 +240,13 @@
 	COMPOSITE(0, "aclk_vpu_pre", mux_pll_src_4plls_p, 0,
 			RK2928_CLKSEL_CON(32), 5, 2, MFLAGS, 0, 5, DFLAGS,
 			RK2928_CLKGATE_CON(3), 11, GFLAGS),
-	GATE(0, "hclk_vpu_src", "aclk_vpu_pre", 0,
+	FACTOR_GATE(0, "hclk_vpu_pre", "aclk_vpu_pre", 0, 1, 4,
 			RK2928_CLKGATE_CON(4), 4, GFLAGS),
 
 	COMPOSITE(0, "aclk_rkvdec_pre", mux_pll_src_4plls_p, 0,
 			RK2928_CLKSEL_CON(28), 6, 2, MFLAGS, 0, 5, DFLAGS,
 			RK2928_CLKGATE_CON(3), 2, GFLAGS),
-	GATE(0, "hclk_rkvdec_src", "aclk_rkvdec_pre", 0,
+	FACTOR_GATE(0, "hclk_rkvdec_pre", "aclk_rkvdec_pre", 0, 1, 4,
 			RK2928_CLKGATE_CON(4), 5, GFLAGS),
 
 	COMPOSITE(0, "sclk_vdec_cabac", mux_pll_src_4plls_p, 0,
@@ -285,7 +285,7 @@
 			RK2928_CLKSEL_CON(23), 14, 2, MFLAGS, 8, 6, DFLAGS,
 			RK2928_CLKGATE_CON(3), 5, GFLAGS),
 
-	GATE(0, "sclk_hdmi_hdcp", "xin24m", 0,
+	GATE(SCLK_HDMI_HDCP, "sclk_hdmi_hdcp", "xin24m", 0,
 			RK2928_CLKGATE_CON(3), 7, GFLAGS),
 
 	COMPOSITE(0, "sclk_hdmi_cec", mux_sclk_hdmi_cec_p, 0,
@@ -364,13 +364,15 @@
 			RK2928_CLKGATE_CON(3), 1, GFLAGS),
 	MUX(0, "sclk_vop_src", mux_sclk_vop_src_p, 0,
 			RK2928_CLKSEL_CON(27), 0, 1, MFLAGS),
-	DIV(0, "dclk_hdmiphy", "sclk_vop_src", 0,
+	DIV(DCLK_HDMI_PHY, "dclk_hdmiphy", "sclk_vop_src", 0,
 			RK2928_CLKSEL_CON(29), 0, 3, DFLAGS),
 	DIV(0, "sclk_vop_pre", "sclk_vop_src", 0,
 			RK2928_CLKSEL_CON(27), 8, 8, DFLAGS),
-	MUX(0, "dclk_vop", mux_dclk_vop_p, 0,
+	MUX(DCLK_VOP, "dclk_vop", mux_dclk_vop_p, 0,
 			RK2928_CLKSEL_CON(27), 1, 1, MFLAGS),
 
+	FACTOR(0, "xin12m", "xin24m", 0, 1, 2),
+
 	COMPOSITE(0, "i2s0_src", mux_pll_src_2plls_p, 0,
 			RK2928_CLKSEL_CON(9), 15, 1, MFLAGS, 0, 7, DFLAGS,
 			RK2928_CLKGATE_CON(0), 3, GFLAGS),
@@ -422,7 +424,7 @@
 	GATE(0, "sclk_otgphy1", "xin24m", 0,
 			RK2928_CLKGATE_CON(1), 6, GFLAGS),
 
-	COMPOSITE_NOMUX(0, "sclk_tsadc", "xin24m", 0,
+	COMPOSITE_NOMUX(SCLK_TSADC, "sclk_tsadc", "xin24m", 0,
 			RK2928_CLKSEL_CON(24), 6, 10, DFLAGS,
 			RK2928_CLKGATE_CON(2), 8, GFLAGS),
 
@@ -503,7 +505,7 @@
 	GATE(0, "aclk_iep", "aclk_iep_pre", 0, RK2928_CLKGATE_CON(13), 2, GFLAGS),
 	GATE(0, "aclk_iep_noc", "aclk_iep_pre", 0, RK2928_CLKGATE_CON(13), 9, GFLAGS),
 
-	GATE(0, "aclk_vop", "aclk_vop_pre", 0, RK2928_CLKGATE_CON(13), 5, GFLAGS),
+	GATE(ACLK_VOP, "aclk_vop", "aclk_vop_pre", 0, RK2928_CLKGATE_CON(13), 5, GFLAGS),
 	GATE(0, "aclk_vop_noc", "aclk_vop_pre", 0, RK2928_CLKGATE_CON(13), 12, GFLAGS),
 
 	GATE(0, "aclk_hdcp", "aclk_hdcp_pre", 0, RK2928_CLKGATE_CON(14), 10, GFLAGS),
@@ -511,13 +513,13 @@
 
 	GATE(0, "hclk_rga", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(13), 1, GFLAGS),
 	GATE(0, "hclk_iep", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(13), 3, GFLAGS),
-	GATE(0, "hclk_vop", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(13), 6, GFLAGS),
+	GATE(HCLK_VOP, "hclk_vop", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(13), 6, GFLAGS),
 	GATE(0, "hclk_vio_ahb_arbi", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(13), 7, GFLAGS),
 	GATE(0, "hclk_vio_noc", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(13), 8, GFLAGS),
 	GATE(0, "hclk_vop_noc", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(13), 13, GFLAGS),
 	GATE(0, "hclk_vio_h2p", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 7, GFLAGS),
 	GATE(0, "hclk_hdcp_mmu", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 12, GFLAGS),
-	GATE(0, "pclk_hdmi_ctrl", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 6, GFLAGS),
+	GATE(PCLK_HDMI_CTRL, "pclk_hdmi_ctrl", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 6, GFLAGS),
 	GATE(0, "pclk_vio_h2p", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 8, GFLAGS),
 	GATE(0, "pclk_hdcp", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 11, GFLAGS),
 
@@ -582,7 +584,7 @@
 	GATE(PCLK_UART0, "pclk_uart0", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 12, GFLAGS),
 	GATE(PCLK_UART1, "pclk_uart1", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 13, GFLAGS),
 	GATE(PCLK_UART2, "pclk_uart2", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 14, GFLAGS),
-	GATE(0, "pclk_tsadc", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 15, GFLAGS),
+	GATE(PCLK_TSADC, "pclk_tsadc", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 15, GFLAGS),
 	GATE(PCLK_GRF, "pclk_grf", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(10), 0, GFLAGS),
 	GATE(0, "pclk_cru", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(10), 1, GFLAGS),
 	GATE(0, "pclk_sgrf", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(10), 2, GFLAGS),
@@ -590,7 +592,7 @@
 
 	GATE(0, "pclk_ddrphy", "pclk_phy_pre", 0, RK2928_CLKGATE_CON(10), 3, GFLAGS),
 	GATE(0, "pclk_acodecphy", "pclk_phy_pre", 0, RK2928_CLKGATE_CON(10), 5, GFLAGS),
-	GATE(0, "pclk_hdmiphy", "pclk_phy_pre", 0, RK2928_CLKGATE_CON(10), 7, GFLAGS),
+	GATE(PCLK_HDMI_PHY, "pclk_hdmiphy", "pclk_phy_pre", 0, RK2928_CLKGATE_CON(10), 7, GFLAGS),
 	GATE(0, "pclk_vdacphy", "pclk_phy_pre", 0, RK2928_CLKGATE_CON(10), 8, GFLAGS),
 	GATE(0, "pclk_phy_noc", "pclk_phy_pre", 0, RK2928_CLKGATE_CON(10), 9, GFLAGS),
 
@@ -605,13 +607,13 @@
 
 	/* PD_MMC */
 	MMC(SCLK_SDMMC_DRV,    "sdmmc_drv",    "sclk_sdmmc", RK3228_SDMMC_CON0, 1),
-	MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RK3228_SDMMC_CON1, 1),
+	MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RK3228_SDMMC_CON1, 0),
 
 	MMC(SCLK_SDIO_DRV,     "sdio_drv",     "sclk_sdio",  RK3228_SDIO_CON0,  1),
-	MMC(SCLK_SDIO_SAMPLE,  "sdio_sample",  "sclk_sdio",  RK3228_SDIO_CON1,  1),
+	MMC(SCLK_SDIO_SAMPLE,  "sdio_sample",  "sclk_sdio",  RK3228_SDIO_CON1,  0),
 
 	MMC(SCLK_EMMC_DRV,     "emmc_drv",     "sclk_emmc",  RK3228_EMMC_CON0,  1),
-	MMC(SCLK_EMMC_SAMPLE,  "emmc_sample",  "sclk_emmc",  RK3228_EMMC_CON1,  1),
+	MMC(SCLK_EMMC_SAMPLE,  "emmc_sample",  "sclk_emmc",  RK3228_EMMC_CON1,  0),
 };
 
 static const char *const rk3228_critical_clocks[] __initconst = {
@@ -624,7 +626,6 @@
 static void __init rk3228_clk_init(struct device_node *np)
 {
 	void __iomem *reg_base;
-	struct clk *clk;
 
 	reg_base = of_iomap(np, 0);
 	if (!reg_base) {
@@ -634,29 +635,6 @@
 
 	rockchip_clk_init(np, reg_base, CLK_NR_CLKS);
 
-	/* xin12m is created by an cru-internal divider */
-	clk = clk_register_fixed_factor(NULL, "xin12m", "xin24m", 0, 1, 2);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock xin12m: %ld\n",
-				__func__, PTR_ERR(clk));
-
-	clk = clk_register_fixed_factor(NULL, "ddrphy_pre", "ddrphy4x", 0, 1, 4);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock ddrphy_pre: %ld\n",
-			__func__, PTR_ERR(clk));
-
-	clk = clk_register_fixed_factor(NULL, "hclk_vpu_pre",
-					"hclk_vpu_src", 0, 1, 4);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock hclk_vpu_pre: %ld\n",
-			__func__, PTR_ERR(clk));
-
-	clk = clk_register_fixed_factor(NULL, "hclk_rkvdec_pre",
-					"hclk_rkvdec_src", 0, 1, 4);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock hclk_rkvdec_pre: %ld\n",
-			__func__, PTR_ERR(clk));
-
 	rockchip_clk_register_plls(rk3228_pll_clks,
 				   ARRAY_SIZE(rk3228_pll_clks),
 				   RK3228_GRF_SOC_STATUS0);

diff --git a/drivers/clk/rockchip/clk-rk3288.c b/drivers/clk/rockchip/clk-rk3288.c
index 984fc18..3cb7216 100644
--- a/drivers/clk/rockchip/clk-rk3288.c
+++ b/drivers/clk/rockchip/clk-rk3288.c

@@ -195,8 +195,8 @@
 PNAME(mux_edp_24m_p)	= { "ext_edp_24m", "xin24m" };
 PNAME(mux_tspout_p)	= { "cpll", "gpll", "npll", "xin27m" };
 
-PNAME(mux_usbphy480m_p)		= { "sclk_otgphy1", "sclk_otgphy2",
-				    "sclk_otgphy0" };
+PNAME(mux_usbphy480m_p)		= { "sclk_otgphy1_480m", "sclk_otgphy2_480m",
+				    "sclk_otgphy0_480m" };
 PNAME(mux_hsicphy480m_p)	= { "cpll", "gpll", "usbphy480m_src" };
 PNAME(mux_hsicphy12m_p)		= { "hsicphy12m_xin12m", "hsicphy12m_usbphy" };
 
@@ -333,6 +333,8 @@
 	GATE(0, "aclk_bus_2pmu", "aclk_cpu_pre", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(0), 7, GFLAGS),
 
+	FACTOR(0, "xin12m", "xin24m", 0, 1, 2),
+
 	COMPOSITE(0, "i2s_src", mux_pll_src_cpll_gpll_p, 0,
 			RK3288_CLKSEL_CON(4), 15, 1, MFLAGS, 0, 7, DFLAGS,
 			RK3288_CLKGATE_CON(4), 1, GFLAGS),
@@ -399,12 +401,10 @@
 	 */
 	GATE(ACLK_VCODEC, "aclk_vcodec", "aclk_vdpu", 0,
 		RK3288_CLKGATE_CON(9), 0, GFLAGS),
-	/*
-	 * We introduce a virtul node of hclk_vodec_pre_v to split one clock
-	 * struct with a gate and a fix divider into two node in software.
-	 */
-	GATE(0, "hclk_vcodec_pre_v", "aclk_vdpu", 0,
+
+	FACTOR_GATE(0, "hclk_vcodec_pre", "aclk_vdpu", 0, 1, 4,
 		RK3288_CLKGATE_CON(3), 10, GFLAGS),
+
 	GATE(HCLK_VCODEC, "hclk_vcodec", "hclk_vcodec_pre", 0,
 		RK3288_CLKGATE_CON(9), 1, GFLAGS),
 
@@ -537,11 +537,11 @@
 			RK3288_CLKSEL_CON(35), 6, 2, MFLAGS, 0, 5, DFLAGS,
 			RK3288_CLKGATE_CON(4), 10, GFLAGS),
 
-	GATE(SCLK_OTGPHY0, "sclk_otgphy0", "usb480m", CLK_IGNORE_UNUSED,
+	GATE(SCLK_OTGPHY0, "sclk_otgphy0", "xin24m", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(13), 4, GFLAGS),
-	GATE(SCLK_OTGPHY1, "sclk_otgphy1", "usb480m", CLK_IGNORE_UNUSED,
+	GATE(SCLK_OTGPHY1, "sclk_otgphy1", "xin24m", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(13), 5, GFLAGS),
-	GATE(SCLK_OTGPHY2, "sclk_otgphy2", "usb480m", CLK_IGNORE_UNUSED,
+	GATE(SCLK_OTGPHY2, "sclk_otgphy2", "xin24m", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(13), 6, GFLAGS),
 	GATE(SCLK_OTG_ADP, "sclk_otg_adp", "xin32k", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(13), 7, GFLAGS),
@@ -888,24 +888,6 @@
 
 	rockchip_clk_init(np, rk3288_cru_base, CLK_NR_CLKS);
 
-	/* xin12m is created by an cru-internal divider */
-	clk = clk_register_fixed_factor(NULL, "xin12m", "xin24m", 0, 1, 2);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock xin12m: %ld\n",
-			__func__, PTR_ERR(clk));
-
-
-	clk = clk_register_fixed_factor(NULL, "usb480m", "xin24m", 0, 20, 1);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock usb480m: %ld\n",
-			__func__, PTR_ERR(clk));
-
-	clk = clk_register_fixed_factor(NULL, "hclk_vcodec_pre",
-					"hclk_vcodec_pre_v", 0, 1, 4);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock hclk_vcodec_pre: %ld\n",
-			__func__, PTR_ERR(clk));
-
 	/* Watchdog pclk is controlled by RK3288_SGRF_SOC_CON0[1]. */
 	clk = clk_register_fixed_factor(NULL, "pclk_wdt", "pclk_pd_alive", 0, 1, 1);
 	if (IS_ERR(clk))

diff --git a/drivers/clk/rockchip/clk-rk3368.c b/drivers/clk/rockchip/clk-rk3368.c
index 21f3ea9..a2bb122 100644
--- a/drivers/clk/rockchip/clk-rk3368.c
+++ b/drivers/clk/rockchip/clk-rk3368.c

@@ -121,7 +121,7 @@
 				    "dummy", "xin12m" };
 PNAME(mux_spdif_8ch_p)		= { "spdif_8ch_pre", "spdif_8ch_frac",
 				    "ext_i2s", "xin12m" };
-PNAME(mux_edp_24m_p)		= { "dummy", "xin24m" };
+PNAME(mux_edp_24m_p)		= { "xin24m", "dummy" };
 PNAME(mux_vip_out_p)		= { "vip_src", "xin24m" };
 PNAME(mux_usbphy480m_p)		= { "usbotg_out", "xin24m" };
 PNAME(mux_hsic_usbphy480m_p)	= { "usbotg_out", "dummy" };
@@ -165,7 +165,7 @@
 	.core_reg = RK3368_CLKSEL_CON(0),
 	.div_core_shift = 0,
 	.div_core_mask = 0x1f,
-	.mux_core_shift = 15,
+	.mux_core_shift = 7,
 };
 
 static const struct rockchip_cpuclk_reg_data rk3368_cpuclkl_data = {
@@ -218,36 +218,66 @@
 	}
 
 static struct rockchip_cpuclk_rate_table rk3368_cpuclkb_rates[] __initdata = {
-	RK3368_CPUCLKB_RATE(1512000000, 2, 6, 6),
-	RK3368_CPUCLKB_RATE(1488000000, 2, 5, 5),
-	RK3368_CPUCLKB_RATE(1416000000, 2, 5, 5),
-	RK3368_CPUCLKB_RATE(1200000000, 2, 4, 4),
-	RK3368_CPUCLKB_RATE(1008000000, 2, 4, 4),
-	RK3368_CPUCLKB_RATE( 816000000, 2, 3, 3),
-	RK3368_CPUCLKB_RATE( 696000000, 2, 3, 3),
-	RK3368_CPUCLKB_RATE( 600000000, 2, 2, 2),
-	RK3368_CPUCLKB_RATE( 408000000, 2, 2, 2),
-	RK3368_CPUCLKB_RATE( 312000000, 2, 2, 2),
+	RK3368_CPUCLKB_RATE(1512000000, 1, 5, 5),
+	RK3368_CPUCLKB_RATE(1488000000, 1, 4, 4),
+	RK3368_CPUCLKB_RATE(1416000000, 1, 4, 4),
+	RK3368_CPUCLKB_RATE(1200000000, 1, 3, 3),
+	RK3368_CPUCLKB_RATE(1008000000, 1, 3, 3),
+	RK3368_CPUCLKB_RATE( 816000000, 1, 2, 2),
+	RK3368_CPUCLKB_RATE( 696000000, 1, 2, 2),
+	RK3368_CPUCLKB_RATE( 600000000, 1, 1, 1),
+	RK3368_CPUCLKB_RATE( 408000000, 1, 1, 1),
+	RK3368_CPUCLKB_RATE( 312000000, 1, 1, 1),
 };
 
 static struct rockchip_cpuclk_rate_table rk3368_cpuclkl_rates[] __initdata = {
-	RK3368_CPUCLKL_RATE(1512000000, 2, 7, 7),
-	RK3368_CPUCLKL_RATE(1488000000, 2, 6, 6),
-	RK3368_CPUCLKL_RATE(1416000000, 2, 6, 6),
-	RK3368_CPUCLKL_RATE(1200000000, 2, 5, 5),
-	RK3368_CPUCLKL_RATE(1008000000, 2, 5, 5),
-	RK3368_CPUCLKL_RATE( 816000000, 2, 4, 4),
-	RK3368_CPUCLKL_RATE( 696000000, 2, 3, 3),
-	RK3368_CPUCLKL_RATE( 600000000, 2, 3, 3),
-	RK3368_CPUCLKL_RATE( 408000000, 2, 2, 2),
-	RK3368_CPUCLKL_RATE( 312000000, 2, 2, 2),
+	RK3368_CPUCLKL_RATE(1512000000, 1, 6, 6),
+	RK3368_CPUCLKL_RATE(1488000000, 1, 5, 5),
+	RK3368_CPUCLKL_RATE(1416000000, 1, 5, 5),
+	RK3368_CPUCLKL_RATE(1200000000, 1, 4, 4),
+	RK3368_CPUCLKL_RATE(1008000000, 1, 4, 4),
+	RK3368_CPUCLKL_RATE( 816000000, 1, 3, 3),
+	RK3368_CPUCLKL_RATE( 696000000, 1, 2, 2),
+	RK3368_CPUCLKL_RATE( 600000000, 1, 2, 2),
+	RK3368_CPUCLKL_RATE( 408000000, 1, 1, 1),
+	RK3368_CPUCLKL_RATE( 312000000, 1, 1, 1),
 };
 
+static struct rockchip_clk_branch rk3368_i2s_8ch_fracmux __initdata =
+	MUX(0, "i2s_8ch_pre", mux_i2s_8ch_pre_p, CLK_SET_RATE_PARENT,
+	    RK3368_CLKSEL_CON(27), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3368_spdif_8ch_fracmux __initdata =
+	MUX(0, "spdif_8ch_pre", mux_spdif_8ch_p, CLK_SET_RATE_PARENT,
+	    RK3368_CLKSEL_CON(31), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3368_i2s_2ch_fracmux __initdata =
+	MUX(0, "i2s_2ch_pre", mux_i2s_2ch_p, CLK_SET_RATE_PARENT,
+	    RK3368_CLKSEL_CON(53), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3368_uart0_fracmux __initdata =
+	MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT,
+	    RK3368_CLKSEL_CON(33), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3368_uart1_fracmux __initdata =
+	MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT,
+	    RK3368_CLKSEL_CON(35), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3368_uart3_fracmux __initdata =
+	MUX(SCLK_UART3, "sclk_uart3", mux_uart3_p, CLK_SET_RATE_PARENT,
+	    RK3368_CLKSEL_CON(39), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3368_uart4_fracmux __initdata =
+	MUX(SCLK_UART4, "sclk_uart4", mux_uart4_p, CLK_SET_RATE_PARENT,
+	    RK3368_CLKSEL_CON(41), 8, 2, MFLAGS);
+
 static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = {
 	/*
 	 * Clock-Architecture Diagram 2
 	 */
 
+	FACTOR(0, "xin12m", "xin24m", 0, 1, 2),
+
 	MUX(SCLK_USBPHY480M, "usbphy_480m", mux_usbphy480m_p, CLK_SET_RATE_PARENT,
 			RK3368_CLKSEL_CON(13), 8, 1, MFLAGS),
 
@@ -299,7 +329,7 @@
 	COMPOSITE_NOGATE_DIVTBL(0, "ddrphy_src", mux_ddrphy_p, CLK_IGNORE_UNUSED,
 			RK3368_CLKSEL_CON(13), 4, 1, MFLAGS, 0, 2, DFLAGS, div_ddrphy_t),
 
-	GATE(0, "sclk_ddr", "ddrphy_div4", CLK_IGNORE_UNUSED,
+	FACTOR_GATE(0, "sclk_ddr", "ddrphy_src", CLK_IGNORE_UNUSED, 1, 4,
 			RK3368_CLKGATE_CON(6), 14, GFLAGS),
 	GATE(0, "sclk_ddr4x", "ddrphy_src", CLK_IGNORE_UNUSED,
 			RK3368_CLKGATE_CON(6), 15, GFLAGS),
@@ -337,11 +367,10 @@
 	COMPOSITE(0, "i2s_8ch_src", mux_pll_src_cpll_gpll_p, 0,
 			RK3368_CLKSEL_CON(27), 12, 1, MFLAGS, 0, 7, DFLAGS,
 			RK3368_CLKGATE_CON(6), 1, GFLAGS),
-	COMPOSITE_FRAC(0, "i2s_8ch_frac", "i2s_8ch_src", CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(28), 0,
-			RK3368_CLKGATE_CON(6), 2, GFLAGS),
-	MUX(0, "i2s_8ch_pre", mux_i2s_8ch_pre_p, CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(27), 8, 2, MFLAGS),
+	COMPOSITE_FRACMUX(0, "i2s_8ch_frac", "i2s_8ch_src", CLK_SET_RATE_PARENT,
+			  RK3368_CLKSEL_CON(28), 0,
+			  RK3368_CLKGATE_CON(6), 2, GFLAGS,
+			  &rk3368_i2s_8ch_fracmux),
 	COMPOSITE_NODIV(SCLK_I2S_8CH_OUT, "i2s_8ch_clkout", mux_i2s_8ch_clkout_p, 0,
 			RK3368_CLKSEL_CON(27), 15, 1, MFLAGS,
 			RK3368_CLKGATE_CON(6), 0, GFLAGS),
@@ -350,21 +379,21 @@
 	COMPOSITE(0, "spdif_8ch_src", mux_pll_src_cpll_gpll_p, 0,
 			RK3368_CLKSEL_CON(31), 12, 1, MFLAGS, 0, 7, DFLAGS,
 			RK3368_CLKGATE_CON(6), 4, GFLAGS),
-	COMPOSITE_FRAC(0, "spdif_8ch_frac", "spdif_8ch_src", CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(32), 0,
-			RK3368_CLKGATE_CON(6), 5, GFLAGS),
-	COMPOSITE_NODIV(SCLK_SPDIF_8CH, "sclk_spdif_8ch", mux_spdif_8ch_p, 0,
-			RK3368_CLKSEL_CON(31), 8, 2, MFLAGS,
-			RK3368_CLKGATE_CON(6), 6, GFLAGS),
+	COMPOSITE_FRACMUX(0, "spdif_8ch_frac", "spdif_8ch_src", CLK_SET_RATE_PARENT,
+			  RK3368_CLKSEL_CON(32), 0,
+			  RK3368_CLKGATE_CON(6), 5, GFLAGS,
+			  &rk3368_spdif_8ch_fracmux),
+	GATE(SCLK_SPDIF_8CH, "sclk_spdif_8ch", "spdif_8ch_pre", CLK_SET_RATE_PARENT,
+	     RK3368_CLKGATE_CON(6), 6, GFLAGS),
 	COMPOSITE(0, "i2s_2ch_src", mux_pll_src_cpll_gpll_p, 0,
 			RK3368_CLKSEL_CON(53), 12, 1, MFLAGS, 0, 7, DFLAGS,
 			RK3368_CLKGATE_CON(5), 13, GFLAGS),
-	COMPOSITE_FRAC(0, "i2s_2ch_frac", "i2s_2ch_src", CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(54), 0,
-			RK3368_CLKGATE_CON(5), 14, GFLAGS),
-	COMPOSITE_NODIV(SCLK_I2S_2CH, "sclk_i2s_2ch", mux_i2s_2ch_p, 0,
-			RK3368_CLKSEL_CON(53), 8, 2, MFLAGS,
-			RK3368_CLKGATE_CON(5), 15, GFLAGS),
+	COMPOSITE_FRACMUX(0, "i2s_2ch_frac", "i2s_2ch_src", CLK_SET_RATE_PARENT,
+			  RK3368_CLKSEL_CON(54), 0,
+			  RK3368_CLKGATE_CON(5), 14, GFLAGS,
+			  &rk3368_i2s_2ch_fracmux),
+	GATE(SCLK_I2S_2CH, "sclk_i2s_2ch", "i2s_2ch_pre", CLK_SET_RATE_PARENT,
+	     RK3368_CLKGATE_CON(5), 15, GFLAGS),
 
 	COMPOSITE(0, "sclk_tsp", mux_pll_src_cpll_gpll_npll_p, 0,
 			RK3368_CLKSEL_CON(46), 6, 2, MFLAGS, 0, 5, DFLAGS,
@@ -384,18 +413,18 @@
 	 * Clock-Architecture Diagram 3
 	 */
 
-	COMPOSITE(0, "aclk_vepu", mux_pll_src_cpll_gpll_usb_p, 0,
+	COMPOSITE(0, "aclk_vepu", mux_pll_src_cpll_gpll_npll_usb_p, 0,
 			RK3368_CLKSEL_CON(15), 6, 2, MFLAGS, 0, 5, DFLAGS,
 			RK3368_CLKGATE_CON(4), 6, GFLAGS),
-	COMPOSITE(0, "aclk_vdpu", mux_pll_src_cpll_gpll_usb_p, 0,
+	COMPOSITE(0, "aclk_vdpu", mux_pll_src_cpll_gpll_npll_usb_p, 0,
 			RK3368_CLKSEL_CON(15), 14, 2, MFLAGS, 8, 5, DFLAGS,
 			RK3368_CLKGATE_CON(4), 7, GFLAGS),
 
 	/*
-	 * We introduce a virtual node of hclk_vodec_pre_v to split one clock
-	 * struct with a gate and a fix divider into two node in software.
+	 * We use aclk_vdpu by default ---GRF_SOC_CON0[7] setting in system,
+	 * so we ignore the mux and make clocks nodes as following,
 	 */
-	GATE(0, "hclk_video_pre_v", "aclk_vdpu", 0,
+	FACTOR_GATE(0, "hclk_video_pre", "aclk_vdpu", 0, 1, 4,
 		RK3368_CLKGATE_CON(4), 8, GFLAGS),
 
 	COMPOSITE(0, "sclk_hevc_cabac_src", mux_pll_src_cpll_gpll_npll_usb_p, 0,
@@ -442,7 +471,7 @@
 	GATE(SCLK_HDMI_HDCP, "sclk_hdmi_hdcp", "xin24m", 0,
 			RK3368_CLKGATE_CON(4), 13, GFLAGS),
 	GATE(SCLK_HDMI_CEC, "sclk_hdmi_cec", "xin32k", 0,
-			RK3368_CLKGATE_CON(5), 12, GFLAGS),
+			RK3368_CLKGATE_CON(4), 12, GFLAGS),
 
 	COMPOSITE_NODIV(0, "vip_src", mux_pll_src_cpll_gpll_p, 0,
 			RK3368_CLKSEL_CON(21), 15, 1, MFLAGS,
@@ -560,38 +589,34 @@
 	COMPOSITE(0, "uart0_src", mux_pll_src_cpll_gpll_usb_usb_p, 0,
 			RK3368_CLKSEL_CON(33), 12, 2, MFLAGS, 0, 7, DFLAGS,
 			RK3368_CLKGATE_CON(2), 0, GFLAGS),
-	COMPOSITE_FRAC(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(34), 0,
-			RK3368_CLKGATE_CON(2), 1, GFLAGS),
-	MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(33), 8, 2, MFLAGS),
+	COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT,
+			  RK3368_CLKSEL_CON(34), 0,
+			  RK3368_CLKGATE_CON(2), 1, GFLAGS,
+			  &rk3368_uart0_fracmux),
 
 	COMPOSITE_NOMUX(0, "uart1_src", "uart_src", 0,
 			RK3368_CLKSEL_CON(35), 0, 7, DFLAGS,
 			RK3368_CLKGATE_CON(2), 2, GFLAGS),
-	COMPOSITE_FRAC(0, "uart1_frac", "uart1_src", CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(36), 0,
-			RK3368_CLKGATE_CON(2), 3, GFLAGS),
-	MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(35), 8, 2, MFLAGS),
+	COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_src", CLK_SET_RATE_PARENT,
+			  RK3368_CLKSEL_CON(36), 0,
+			  RK3368_CLKGATE_CON(2), 3, GFLAGS,
+			  &rk3368_uart1_fracmux),
 
 	COMPOSITE_NOMUX(0, "uart3_src", "uart_src", 0,
 			RK3368_CLKSEL_CON(39), 0, 7, DFLAGS,
 			RK3368_CLKGATE_CON(2), 6, GFLAGS),
-	COMPOSITE_FRAC(0, "uart3_frac", "uart3_src", CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(40), 0,
-			RK3368_CLKGATE_CON(2), 7, GFLAGS),
-	MUX(SCLK_UART3, "sclk_uart3", mux_uart3_p, CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(39), 8, 2, MFLAGS),
+	COMPOSITE_FRACMUX(0, "uart3_frac", "uart3_src", CLK_SET_RATE_PARENT,
+			  RK3368_CLKSEL_CON(40), 0,
+			  RK3368_CLKGATE_CON(2), 7, GFLAGS,
+			  &rk3368_uart3_fracmux),
 
 	COMPOSITE_NOMUX(0, "uart4_src", "uart_src", 0,
 			RK3368_CLKSEL_CON(41), 0, 7, DFLAGS,
 			RK3368_CLKGATE_CON(2), 8, GFLAGS),
-	COMPOSITE_FRAC(0, "uart4_frac", "uart4_src", CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(42), 0,
-			RK3368_CLKGATE_CON(2), 9, GFLAGS),
-	MUX(SCLK_UART4, "sclk_uart4", mux_uart4_p, CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(41), 8, 2, MFLAGS),
+	COMPOSITE_FRACMUX(0, "uart4_frac", "uart4_src", CLK_SET_RATE_PARENT,
+			  RK3368_CLKSEL_CON(42), 0,
+			  RK3368_CLKGATE_CON(2), 9, GFLAGS,
+			  &rk3368_uart4_fracmux),
 
 	COMPOSITE(0, "mac_pll_src", mux_pll_src_npll_cpll_gpll_p, 0,
 			RK3368_CLKSEL_CON(43), 6, 2, MFLAGS, 0, 5, DFLAGS,
@@ -842,24 +867,6 @@
 
 	rockchip_clk_init(np, reg_base, CLK_NR_CLKS);
 
-	/* xin12m is created by a cru-internal divider */
-	clk = clk_register_fixed_factor(NULL, "xin12m", "xin24m", 0, 1, 2);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock xin12m: %ld\n",
-			__func__, PTR_ERR(clk));
-
-	/* ddrphy_div4 is created by a cru-internal divider */
-	clk = clk_register_fixed_factor(NULL, "ddrphy_div4", "ddrphy_src", 0, 1, 4);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock xin12m: %ld\n",
-			__func__, PTR_ERR(clk));
-
-	clk = clk_register_fixed_factor(NULL, "hclk_video_pre",
-					"hclk_video_pre_v", 0, 1, 4);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock hclk_vcodec_pre: %ld\n",
-			__func__, PTR_ERR(clk));
-
 	/* Watchdog pclk is controlled by sgrf_soc_con3[7]. */
 	clk = clk_register_fixed_factor(NULL, "pclk_wdt", "pclk_pd_alive", 0, 1, 1);
 	if (IS_ERR(clk))

diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c
index f7e8693..ec06350 100644
--- a/drivers/clk/rockchip/clk.c
+++ b/drivers/clk/rockchip/clk.c

@@ -70,7 +70,7 @@
 	if (gate_offset >= 0) {
 		gate = kzalloc(sizeof(*gate), GFP_KERNEL);
 		if (!gate)
-			return ERR_PTR(-ENOMEM);
+			goto err_gate;
 
 		gate->flags = gate_flags;
 		gate->reg = base + gate_offset;
@@ -82,7 +82,7 @@
 	if (div_width > 0) {
 		div = kzalloc(sizeof(*div), GFP_KERNEL);
 		if (!div)
-			return ERR_PTR(-ENOMEM);
+			goto err_div;
 
 		div->flags = div_flags;
 		div->reg = base + muxdiv_offset;
@@ -102,6 +102,11 @@
 				     flags);
 
 	return clk;
+err_div:
+	kfree(gate);
+err_gate:
+	kfree(mux);
+	return ERR_PTR(-ENOMEM);
 }
 
 struct rockchip_clk_frac {
@@ -262,6 +267,53 @@
 	return clk;
 }
 
+static struct clk *rockchip_clk_register_factor_branch(const char *name,
+		const char *const *parent_names, u8 num_parents,
+		void __iomem *base, unsigned int mult, unsigned int div,
+		int gate_offset, u8 gate_shift, u8 gate_flags,
+		unsigned long flags, spinlock_t *lock)
+{
+	struct clk *clk;
+	struct clk_gate *gate = NULL;
+	struct clk_fixed_factor *fix = NULL;
+
+	/* without gate, register a simple factor clock */
+	if (gate_offset == 0) {
+		return clk_register_fixed_factor(NULL, name,
+				parent_names[0], flags, mult,
+				div);
+	}
+
+	gate = kzalloc(sizeof(*gate), GFP_KERNEL);
+	if (!gate)
+		return ERR_PTR(-ENOMEM);
+
+	gate->flags = gate_flags;
+	gate->reg = base + gate_offset;
+	gate->bit_idx = gate_shift;
+	gate->lock = lock;
+
+	fix = kzalloc(sizeof(*fix), GFP_KERNEL);
+	if (!fix) {
+		kfree(gate);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	fix->mult = mult;
+	fix->div = div;
+
+	clk = clk_register_composite(NULL, name, parent_names, num_parents,
+				     NULL, NULL,
+				     &fix->hw, &clk_fixed_factor_ops,
+				     &gate->hw, &clk_gate_ops, flags);
+	if (IS_ERR(clk)) {
+		kfree(fix);
+		kfree(gate);
+	}
+
+	return clk;
+}
+
 static DEFINE_SPINLOCK(clk_lock);
 static struct clk **clk_table;
 static void __iomem *reg_base;
@@ -397,6 +449,14 @@
 				reg_base + list->muxdiv_offset,
 				list->div_shift, list->div_flags, &clk_lock);
 			break;
+		case branch_factor:
+			clk = rockchip_clk_register_factor_branch(
+				list->name, list->parent_names,
+				list->num_parents, reg_base,
+				list->div_shift, list->div_width,
+				list->gate_offset, list->gate_shift,
+				list->gate_flags, flags, &clk_lock);
+			break;
 		}
 
 		/* none of the cases above matched */

diff --git a/drivers/clk/rockchip/clk.h b/drivers/clk/rockchip/clk.h
index ff8bd23..39c198b 100644
--- a/drivers/clk/rockchip/clk.h
+++ b/drivers/clk/rockchip/clk.h

@@ -254,6 +254,7 @@
 	branch_gate,
 	branch_mmc,
 	branch_inverter,
+	branch_factor,
 };
 
 struct rockchip_clk_branch {
@@ -508,6 +509,33 @@
 		.div_flags	= if,				\
 	}
 
+#define FACTOR(_id, cname, pname,  f, fm, fd)			\
+	{							\
+		.id		= _id,				\
+		.branch_type	= branch_factor,		\
+		.name		= cname,			\
+		.parent_names	= (const char *[]){ pname },	\
+		.num_parents	= 1,				\
+		.flags		= f,				\
+		.div_shift	= fm,				\
+		.div_width	= fd,				\
+	}
+
+#define FACTOR_GATE(_id, cname, pname,  f, fm, fd, go, gb, gf)	\
+	{							\
+		.id		= _id,				\
+		.branch_type	= branch_factor,		\
+		.name		= cname,			\
+		.parent_names	= (const char *[]){ pname },	\
+		.num_parents	= 1,				\
+		.flags		= f,				\
+		.div_shift	= fm,				\
+		.div_width	= fd,				\
+		.gate_offset	= go,				\
+		.gate_shift	= gb,				\
+		.gate_flags	= gf,				\
+	}
+
 void rockchip_clk_init(struct device_node *np, void __iomem *base,
 		       unsigned long nr_clks);
 struct regmap *rockchip_clk_get_grf(void);

diff --git a/drivers/clk/rockchip/softrst.c b/drivers/clk/rockchip/softrst.c
index 552f7bb..21218987 100644
--- a/drivers/clk/rockchip/softrst.c
+++ b/drivers/clk/rockchip/softrst.c

@@ -81,7 +81,7 @@
 	return 0;
 }
 
-static struct reset_control_ops rockchip_softrst_ops = {
+static const struct reset_control_ops rockchip_softrst_ops = {
 	.assert		= rockchip_softrst_assert,
 	.deassert	= rockchip_softrst_deassert,
 };

diff --git a/drivers/clk/samsung/clk-exynos4.c b/drivers/clk/samsung/clk-exynos4.c
index ac03e4f..7b3d0f9 100644
--- a/drivers/clk/samsung/clk-exynos4.c
+++ b/drivers/clk/samsung/clk-exynos4.c

@@ -500,19 +500,19 @@
 
 /* fixed rate clocks generated outside the soc */
 static struct samsung_fixed_rate_clock exynos4_fixed_rate_ext_clks[] __initdata = {
-	FRATE(CLK_XXTI, "xxti", NULL, CLK_IS_ROOT, 0),
-	FRATE(CLK_XUSBXTI, "xusbxti", NULL, CLK_IS_ROOT, 0),
+	FRATE(CLK_XXTI, "xxti", NULL, 0, 0),
+	FRATE(CLK_XUSBXTI, "xusbxti", NULL, 0, 0),
 };
 
 /* fixed rate clocks generated inside the soc */
 static struct samsung_fixed_rate_clock exynos4_fixed_rate_clks[] __initdata = {
-	FRATE(0, "sclk_hdmi24m", NULL, CLK_IS_ROOT, 24000000),
+	FRATE(0, "sclk_hdmi24m", NULL, 0, 24000000),
 	FRATE(CLK_SCLK_HDMIPHY, "sclk_hdmiphy", "hdmi", 0, 27000000),
-	FRATE(0, "sclk_usbphy0", NULL, CLK_IS_ROOT, 48000000),
+	FRATE(0, "sclk_usbphy0", NULL, 0, 48000000),
 };
 
 static struct samsung_fixed_rate_clock exynos4210_fixed_rate_clks[] __initdata = {
-	FRATE(0, "sclk_usbphy1", NULL, CLK_IS_ROOT, 48000000),
+	FRATE(0, "sclk_usbphy1", NULL, 0, 48000000),
 };
 
 static struct samsung_fixed_factor_clock exynos4_fixed_factor_clks[] __initdata = {
@@ -1251,7 +1251,7 @@
 	fclk.id = CLK_FIN_PLL;
 	fclk.name = "fin_pll";
 	fclk.parent_name = NULL;
-	fclk.flags = CLK_IS_ROOT;
+	fclk.flags = 0;
 	fclk.fixed_rate = finpll_f;
 	samsung_clk_register_fixed_rate(ctx, &fclk, 1);
 

diff --git a/drivers/clk/samsung/clk-exynos4415.c b/drivers/clk/samsung/clk-exynos4415.c
index 92c39f6e..86ee06b 100644
--- a/drivers/clk/samsung/clk-exynos4415.c
+++ b/drivers/clk/samsung/clk-exynos4415.c

@@ -274,7 +274,7 @@
 };
 
 static struct samsung_fixed_rate_clock exynos4415_fixed_rate_clks[] __initdata = {
-	FRATE(CLK_SCLK_HDMIPHY, "sclk_hdmiphy", NULL, CLK_IS_ROOT, 27000000),
+	FRATE(CLK_SCLK_HDMIPHY, "sclk_hdmiphy", NULL, 0, 27000000),
 };
 
 static struct samsung_mux_clock exynos4415_mux_clks[] __initdata = {

diff --git a/drivers/clk/samsung/clk-exynos5250.c b/drivers/clk/samsung/clk-exynos5250.c
index 5bebf8c..837197d 100644
--- a/drivers/clk/samsung/clk-exynos5250.c
+++ b/drivers/clk/samsung/clk-exynos5250.c

@@ -262,15 +262,15 @@
 
 /* fixed rate clocks generated outside the soc */
 static struct samsung_fixed_rate_clock exynos5250_fixed_rate_ext_clks[] __initdata = {
-	FRATE(CLK_FIN_PLL, "fin_pll", NULL, CLK_IS_ROOT, 0),
+	FRATE(CLK_FIN_PLL, "fin_pll", NULL, 0, 0),
 };
 
 /* fixed rate clocks generated inside the soc */
 static struct samsung_fixed_rate_clock exynos5250_fixed_rate_clks[] __initdata = {
-	FRATE(CLK_SCLK_HDMIPHY, "sclk_hdmiphy", NULL, CLK_IS_ROOT, 24000000),
-	FRATE(0, "sclk_hdmi27m", NULL, CLK_IS_ROOT, 27000000),
-	FRATE(0, "sclk_dptxphy", NULL, CLK_IS_ROOT, 24000000),
-	FRATE(0, "sclk_uhostphy", NULL, CLK_IS_ROOT, 48000000),
+	FRATE(CLK_SCLK_HDMIPHY, "sclk_hdmiphy", NULL, 0, 24000000),
+	FRATE(0, "sclk_hdmi27m", NULL, 0, 27000000),
+	FRATE(0, "sclk_dptxphy", NULL, 0, 24000000),
+	FRATE(0, "sclk_uhostphy", NULL, 0, 48000000),
 };
 
 static struct samsung_fixed_factor_clock exynos5250_fixed_factor_clks[] __initdata = {

diff --git a/drivers/clk/samsung/clk-exynos5260.c b/drivers/clk/samsung/clk-exynos5260.c
index d1a29f6..7a7ed07 100644
--- a/drivers/clk/samsung/clk-exynos5260.c
+++ b/drivers/clk/samsung/clk-exynos5260.c

@@ -1432,42 +1432,38 @@
 /* fixed rate clocks generated inside the soc */
 static struct samsung_fixed_rate_clock fixed_rate_clks[] __initdata = {
 	FRATE(PHYCLK_DPTX_PHY_CH3_TXD_CLK, "phyclk_dptx_phy_ch3_txd_clk", NULL,
-			CLK_IS_ROOT, 270000000),
+			0, 270000000),
 	FRATE(PHYCLK_DPTX_PHY_CH2_TXD_CLK, "phyclk_dptx_phy_ch2_txd_clk", NULL,
-			CLK_IS_ROOT, 270000000),
+			0, 270000000),
 	FRATE(PHYCLK_DPTX_PHY_CH1_TXD_CLK, "phyclk_dptx_phy_ch1_txd_clk", NULL,
-			CLK_IS_ROOT, 270000000),
+			0, 270000000),
 	FRATE(PHYCLK_DPTX_PHY_CH0_TXD_CLK, "phyclk_dptx_phy_ch0_txd_clk", NULL,
-			CLK_IS_ROOT, 270000000),
+			0, 270000000),
 	FRATE(phyclk_hdmi_phy_tmds_clko, "phyclk_hdmi_phy_tmds_clko", NULL,
-			CLK_IS_ROOT, 250000000),
+			0, 250000000),
 	FRATE(PHYCLK_HDMI_PHY_PIXEL_CLKO, "phyclk_hdmi_phy_pixel_clko", NULL,
-			CLK_IS_ROOT, 1660000000),
+			0, 1660000000),
 	FRATE(PHYCLK_HDMI_LINK_O_TMDS_CLKHI, "phyclk_hdmi_link_o_tmds_clkhi",
-			NULL, CLK_IS_ROOT, 125000000),
+			NULL, 0, 125000000),
 	FRATE(PHYCLK_MIPI_DPHY_4L_M_TXBYTECLKHS,
 			"phyclk_mipi_dphy_4l_m_txbyte_clkhs" , NULL,
-			CLK_IS_ROOT, 187500000),
+			0, 187500000),
 	FRATE(PHYCLK_DPTX_PHY_O_REF_CLK_24M, "phyclk_dptx_phy_o_ref_clk_24m",
-			NULL, CLK_IS_ROOT, 24000000),
+			NULL, 0, 24000000),
 	FRATE(PHYCLK_DPTX_PHY_CLK_DIV2, "phyclk_dptx_phy_clk_div2", NULL,
-			CLK_IS_ROOT, 135000000),
+			0, 135000000),
 	FRATE(PHYCLK_MIPI_DPHY_4L_M_RXCLKESC0,
-			"phyclk_mipi_dphy_4l_m_rxclkesc0", NULL,
-			CLK_IS_ROOT, 20000000),
+			"phyclk_mipi_dphy_4l_m_rxclkesc0", NULL, 0, 20000000),
 	FRATE(PHYCLK_USBHOST20_PHY_PHYCLOCK, "phyclk_usbhost20_phy_phyclock",
-			NULL, CLK_IS_ROOT, 60000000),
+			NULL, 0, 60000000),
 	FRATE(PHYCLK_USBHOST20_PHY_FREECLK, "phyclk_usbhost20_phy_freeclk",
-			NULL, CLK_IS_ROOT, 60000000),
+			NULL, 0, 60000000),
 	FRATE(PHYCLK_USBHOST20_PHY_CLK48MOHCI,
-			"phyclk_usbhost20_phy_clk48mohci",
-			NULL, CLK_IS_ROOT, 48000000),
+			"phyclk_usbhost20_phy_clk48mohci", NULL, 0, 48000000),
 	FRATE(PHYCLK_USBDRD30_UDRD30_PIPE_PCLK,
-			"phyclk_usbdrd30_udrd30_pipe_pclk", NULL,
-			CLK_IS_ROOT, 125000000),
+			"phyclk_usbdrd30_udrd30_pipe_pclk", NULL, 0, 125000000),
 	FRATE(PHYCLK_USBDRD30_UDRD30_PHYCLOCK,
-			"phyclk_usbdrd30_udrd30_phyclock", NULL,
-			CLK_IS_ROOT, 60000000),
+			"phyclk_usbdrd30_udrd30_phyclock", NULL, 0, 60000000),
 };
 
 PNAME(mout_memtop_pll_user_p) = {"fin_pll", "dout_mem_pll"};

diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c
index d048ded..be03ed0 100644
--- a/drivers/clk/samsung/clk-exynos5420.c
+++ b/drivers/clk/samsung/clk-exynos5420.c

@@ -480,16 +480,16 @@
 /* fixed rate clocks generated outside the soc */
 static struct samsung_fixed_rate_clock
 		exynos5x_fixed_rate_ext_clks[] __initdata = {
-	FRATE(CLK_FIN_PLL, "fin_pll", NULL, CLK_IS_ROOT, 0),
+	FRATE(CLK_FIN_PLL, "fin_pll", NULL, 0, 0),
 };
 
 /* fixed rate clocks generated inside the soc */
 static struct samsung_fixed_rate_clock exynos5x_fixed_rate_clks[] __initdata = {
-	FRATE(CLK_SCLK_HDMIPHY, "sclk_hdmiphy", NULL, CLK_IS_ROOT, 24000000),
-	FRATE(0, "sclk_pwi", NULL, CLK_IS_ROOT, 24000000),
-	FRATE(0, "sclk_usbh20", NULL, CLK_IS_ROOT, 48000000),
-	FRATE(0, "mphy_refclk_ixtal24", NULL, CLK_IS_ROOT, 48000000),
-	FRATE(0, "sclk_usbh20_scan_clk", NULL, CLK_IS_ROOT, 480000000),
+	FRATE(CLK_SCLK_HDMIPHY, "sclk_hdmiphy", NULL, 0, 24000000),
+	FRATE(0, "sclk_pwi", NULL, 0, 24000000),
+	FRATE(0, "sclk_usbh20", NULL, 0, 48000000),
+	FRATE(0, "mphy_refclk_ixtal24", NULL, 0, 48000000),
+	FRATE(0, "sclk_usbh20_scan_clk", NULL, 0, 480000000),
 };
 
 static struct samsung_fixed_factor_clock

diff --git a/drivers/clk/samsung/clk-exynos5433.c b/drivers/clk/samsung/clk-exynos5433.c
index cee062c..128527b 100644
--- a/drivers/clk/samsung/clk-exynos5433.c
+++ b/drivers/clk/samsung/clk-exynos5433.c

@@ -142,17 +142,6 @@
 	MUX_ENABLE_TOP_FSYS1,
 	MUX_ENABLE_TOP_PERIC0,
 	MUX_ENABLE_TOP_PERIC1,
-	MUX_STAT_TOP0,
-	MUX_STAT_TOP1,
-	MUX_STAT_TOP2,
-	MUX_STAT_TOP3,
-	MUX_STAT_TOP4,
-	MUX_STAT_TOP_MSCL,
-	MUX_STAT_TOP_CAM1,
-	MUX_STAT_TOP_FSYS0,
-	MUX_STAT_TOP_FSYS1,
-	MUX_STAT_TOP_PERIC0,
-	MUX_STAT_TOP_PERIC1,
 	DIV_TOP0,
 	DIV_TOP1,
 	DIV_TOP2,
@@ -170,22 +159,6 @@
 	DIV_TOP_PERIC3,
 	DIV_TOP_PERIC4,
 	DIV_TOP_PLL_FREQ_DET,
-	DIV_STAT_TOP0,
-	DIV_STAT_TOP1,
-	DIV_STAT_TOP2,
-	DIV_STAT_TOP3,
-	DIV_STAT_TOP4,
-	DIV_STAT_TOP_MSCL,
-	DIV_STAT_TOP_CAM10,
-	DIV_STAT_TOP_CAM11,
-	DIV_STAT_TOP_FSYS0,
-	DIV_STAT_TOP_FSYS1,
-	DIV_STAT_TOP_FSYS2,
-	DIV_STAT_TOP_PERIC0,
-	DIV_STAT_TOP_PERIC1,
-	DIV_STAT_TOP_PERIC2,
-	DIV_STAT_TOP_PERIC3,
-	DIV_STAT_TOP_PLL_FREQ_DET,
 	ENABLE_ACLK_TOP,
 	ENABLE_SCLK_TOP,
 	ENABLE_SCLK_TOP_MSCL,
@@ -251,18 +224,18 @@
 
 static struct samsung_fixed_rate_clock top_fixed_clks[] __initdata = {
 	/* Xi2s{0|1}CDCLK input clock for I2S/PCM */
-	FRATE(0, "ioclk_audiocdclk1", NULL, CLK_IS_ROOT, 100000000),
-	FRATE(0, "ioclk_audiocdclk0", NULL, CLK_IS_ROOT, 100000000),
+	FRATE(0, "ioclk_audiocdclk1", NULL, 0, 100000000),
+	FRATE(0, "ioclk_audiocdclk0", NULL, 0, 100000000),
 	/* Xi2s1SDI input clock for SPDIF */
-	FRATE(0, "ioclk_spdif_extclk", NULL, CLK_IS_ROOT, 100000000),
+	FRATE(0, "ioclk_spdif_extclk", NULL, 0, 100000000),
 	/* XspiCLK[4:0] input clock for SPI */
-	FRATE(0, "ioclk_spi4_clk_in", NULL, CLK_IS_ROOT, 50000000),
-	FRATE(0, "ioclk_spi3_clk_in", NULL, CLK_IS_ROOT, 50000000),
-	FRATE(0, "ioclk_spi2_clk_in", NULL, CLK_IS_ROOT, 50000000),
-	FRATE(0, "ioclk_spi1_clk_in", NULL, CLK_IS_ROOT, 50000000),
-	FRATE(0, "ioclk_spi0_clk_in", NULL, CLK_IS_ROOT, 50000000),
+	FRATE(0, "ioclk_spi4_clk_in", NULL, 0, 50000000),
+	FRATE(0, "ioclk_spi3_clk_in", NULL, 0, 50000000),
+	FRATE(0, "ioclk_spi2_clk_in", NULL, 0, 50000000),
+	FRATE(0, "ioclk_spi1_clk_in", NULL, 0, 50000000),
+	FRATE(0, "ioclk_spi0_clk_in", NULL, 0, 50000000),
 	/* Xi2s1SCLK input clock for I2S1_BCLK */
-	FRATE(0, "ioclk_i2s1_bclk_in", NULL, CLK_IS_ROOT, 12288000),
+	FRATE(0, "ioclk_i2s1_bclk_in", NULL, 0, 12288000),
 };
 
 static struct samsung_mux_clock top_mux_clks[] __initdata = {
@@ -490,9 +463,9 @@
 	DIV(CLK_DIV_SCLK_ISP_SENSOR1_A, "div_sclk_isp_sensor1_a",
 			"mout_sclk_isp_sensor1", DIV_TOP_CAM11, 8, 4),
 	DIV(CLK_DIV_SCLK_ISP_SENSOR0_B, "div_sclk_isp_sensor0_b",
-			"div_sclk_isp_sensor0_a", DIV_TOP_CAM11, 12, 4),
+			"div_sclk_isp_sensor0_a", DIV_TOP_CAM11, 4, 4),
 	DIV(CLK_DIV_SCLK_ISP_SENSOR0_A, "div_sclk_isp_sensor0_a",
-			"mout_sclk_isp_sensor0", DIV_TOP_CAM11, 8, 4),
+			"mout_sclk_isp_sensor0", DIV_TOP_CAM11, 0, 4),
 
 	/* DIV_TOP_FSYS0 */
 	DIV(CLK_DIV_SCLK_MMC1_B, "div_sclk_mmc1_b", "div_sclk_mmc1_a",
@@ -999,26 +972,12 @@
 	MUX_ENABLE_MIF5,
 	MUX_ENABLE_MIF6,
 	MUX_ENABLE_MIF7,
-	MUX_STAT_MIF0,
-	MUX_STAT_MIF1,
-	MUX_STAT_MIF2,
-	MUX_STAT_MIF3,
-	MUX_STAT_MIF4,
-	MUX_STAT_MIF5,
-	MUX_STAT_MIF6,
-	MUX_STAT_MIF7,
 	DIV_MIF1,
 	DIV_MIF2,
 	DIV_MIF3,
 	DIV_MIF4,
 	DIV_MIF5,
 	DIV_MIF_PLL_FREQ_DET,
-	DIV_STAT_MIF1,
-	DIV_STAT_MIF2,
-	DIV_STAT_MIF3,
-	DIV_STAT_MIF4,
-	DIV_STAT_MIF5,
-	DIV_STAT_MIF_PLL_FREQ_DET,
 	ENABLE_ACLK_MIF0,
 	ENABLE_ACLK_MIF1,
 	ENABLE_ACLK_MIF2,
@@ -1565,7 +1524,6 @@
 
 static unsigned long peric_clk_regs[] __initdata = {
 	DIV_PERIC,
-	DIV_STAT_PERIC,
 	ENABLE_ACLK_PERIC,
 	ENABLE_PCLK_PERIC0,
 	ENABLE_PCLK_PERIC1,
@@ -2012,11 +1970,6 @@
 	MUX_ENABLE_FSYS2,
 	MUX_ENABLE_FSYS3,
 	MUX_ENABLE_FSYS4,
-	MUX_STAT_FSYS0,
-	MUX_STAT_FSYS1,
-	MUX_STAT_FSYS2,
-	MUX_STAT_FSYS3,
-	MUX_STAT_FSYS4,
 	MUX_IGNORE_FSYS2,
 	MUX_IGNORE_FSYS3,
 	ENABLE_ACLK_FSYS0,
@@ -2031,42 +1984,40 @@
 	/* PHY clocks from USBDRD30_PHY */
 	FRATE(CLK_PHYCLK_USBDRD30_UDRD30_PHYCLOCK_PHY,
 			"phyclk_usbdrd30_udrd30_phyclock_phy", NULL,
-			CLK_IS_ROOT, 60000000),
+			0, 60000000),
 	FRATE(CLK_PHYCLK_USBDRD30_UDRD30_PIPE_PCLK_PHY,
 			"phyclk_usbdrd30_udrd30_pipe_pclk_phy", NULL,
-			CLK_IS_ROOT, 125000000),
+			0, 125000000),
 	/* PHY clocks from USBHOST30_PHY */
 	FRATE(CLK_PHYCLK_USBHOST30_UHOST30_PHYCLOCK_PHY,
 			"phyclk_usbhost30_uhost30_phyclock_phy", NULL,
-			CLK_IS_ROOT, 60000000),
+			0, 60000000),
 	FRATE(CLK_PHYCLK_USBHOST30_UHOST30_PIPE_PCLK_PHY,
 			"phyclk_usbhost30_uhost30_pipe_pclk_phy", NULL,
-			CLK_IS_ROOT, 125000000),
+			0, 125000000),
 	/* PHY clocks from USBHOST20_PHY */
 	FRATE(CLK_PHYCLK_USBHOST20_PHY_FREECLK_PHY,
-			"phyclk_usbhost20_phy_freeclk_phy", NULL, CLK_IS_ROOT,
-			60000000),
+			"phyclk_usbhost20_phy_freeclk_phy", NULL, 0, 60000000),
 	FRATE(CLK_PHYCLK_USBHOST20_PHY_PHYCLOCK_PHY,
-			"phyclk_usbhost20_phy_phyclock_phy", NULL, CLK_IS_ROOT,
-			60000000),
+			"phyclk_usbhost20_phy_phyclock_phy", NULL, 0, 60000000),
 	FRATE(CLK_PHYCLK_USBHOST20_PHY_CLK48MOHCI_PHY,
 			"phyclk_usbhost20_phy_clk48mohci_phy", NULL,
-			CLK_IS_ROOT, 48000000),
+			0, 48000000),
 	FRATE(CLK_PHYCLK_USBHOST20_PHY_HSIC1_PHY,
-			"phyclk_usbhost20_phy_hsic1_phy", NULL, CLK_IS_ROOT,
+			"phyclk_usbhost20_phy_hsic1_phy", NULL, 0,
 			60000000),
 	/* PHY clocks from UFS_PHY */
 	FRATE(CLK_PHYCLK_UFS_TX0_SYMBOL_PHY, "phyclk_ufs_tx0_symbol_phy",
-			NULL, CLK_IS_ROOT, 300000000),
+			NULL, 0, 300000000),
 	FRATE(CLK_PHYCLK_UFS_RX0_SYMBOL_PHY, "phyclk_ufs_rx0_symbol_phy",
-			NULL, CLK_IS_ROOT, 300000000),
+			NULL, 0, 300000000),
 	FRATE(CLK_PHYCLK_UFS_TX1_SYMBOL_PHY, "phyclk_ufs_tx1_symbol_phy",
-			NULL, CLK_IS_ROOT, 300000000),
+			NULL, 0, 300000000),
 	FRATE(CLK_PHYCLK_UFS_RX1_SYMBOL_PHY, "phyclk_ufs_rx1_symbol_phy",
-			NULL, CLK_IS_ROOT, 300000000),
+			NULL, 0, 300000000),
 	/* PHY clocks from LLI_PHY */
 	FRATE(CLK_PHYCLK_LLI_MPHY_TO_UFS_PHY, "phyclk_lli_mphy_to_ufs_phy",
-			NULL, CLK_IS_ROOT, 26000000),
+			NULL, 0, 26000000),
 };
 
 static struct samsung_mux_clock fsys_mux_clks[] __initdata = {
@@ -2362,9 +2313,7 @@
 static unsigned long g2d_clk_regs[] __initdata = {
 	MUX_SEL_G2D0,
 	MUX_SEL_ENABLE_G2D0,
-	MUX_SEL_STAT_G2D0,
 	DIV_G2D,
-	DIV_STAT_G2D,
 	DIV_ENABLE_ACLK_G2D,
 	DIV_ENABLE_ACLK_G2D_SECURE_SMMU_G2D,
 	DIV_ENABLE_PCLK_G2D,
@@ -2520,16 +2469,9 @@
 	MUX_ENABLE_DISP2,
 	MUX_ENABLE_DISP3,
 	MUX_ENABLE_DISP4,
-	MUX_STAT_DISP0,
-	MUX_STAT_DISP1,
-	MUX_STAT_DISP2,
-	MUX_STAT_DISP3,
-	MUX_STAT_DISP4,
 	MUX_IGNORE_DISP2,
 	DIV_DISP,
 	DIV_DISP_PLL_FREQ_DET,
-	DIV_STAT_DISP,
-	DIV_STAT_DISP_PLL_FREQ_DET,
 	ENABLE_ACLK_DISP0,
 	ENABLE_ACLK_DISP1,
 	ENABLE_PCLK_DISP,
@@ -2604,18 +2546,16 @@
 
 static struct samsung_fixed_rate_clock disp_fixed_clks[] __initdata = {
 	/* PHY clocks from MIPI_DPHY1 */
-	FRATE(0, "phyclk_mipidphy1_bitclkdiv8_phy", NULL, CLK_IS_ROOT,
-			188000000),
-	FRATE(0, "phyclk_mipidphy1_rxclkesc0_phy", NULL, CLK_IS_ROOT,
-			100000000),
+	FRATE(0, "phyclk_mipidphy1_bitclkdiv8_phy", NULL, 0, 188000000),
+	FRATE(0, "phyclk_mipidphy1_rxclkesc0_phy", NULL, 0, 100000000),
 	/* PHY clocks from MIPI_DPHY0 */
-	FRATE(0, "phyclk_mipidphy0_bitclkdiv8_phy", NULL, CLK_IS_ROOT,
-			188000000),
-	FRATE(0, "phyclk_mipidphy0_rxclkesc0_phy", NULL, CLK_IS_ROOT,
-			100000000),
+	FRATE(0, "phyclk_mipidphy0_bitclkdiv8_phy", NULL, 0, 188000000),
+	FRATE(0, "phyclk_mipidphy0_rxclkesc0_phy", NULL, 0, 100000000),
 	/* PHY clocks from HDMI_PHY */
-	FRATE(0, "phyclk_hdmiphy_tmds_clko_phy", NULL, CLK_IS_ROOT, 300000000),
-	FRATE(0, "phyclk_hdmiphy_pixel_clko_phy", NULL, CLK_IS_ROOT, 166000000),
+	FRATE(CLK_PHYCLK_HDMIPHY_TMDS_CLKO_PHY, "phyclk_hdmiphy_tmds_clko_phy",
+			NULL, 0, 300000000),
+	FRATE(CLK_PHYCLK_HDMIPHY_PIXEL_CLKO_PHY, "phyclk_hdmiphy_pixel_clko_phy",
+			NULL, 0, 166000000),
 };
 
 static struct samsung_mux_clock disp_mux_clks[] __initdata = {
@@ -2820,6 +2760,8 @@
 			ENABLE_PCLK_DISP, 2, 0, 0),
 	GATE(CLK_PCLK_DECON_TV, "pclk_decon_tv", "div_pclk_disp",
 			ENABLE_PCLK_DISP, 1, 0, 0),
+	GATE(CLK_PCLK_DECON, "pclk_decon", "div_pclk_disp",
+			ENABLE_PCLK_DISP, 0, 0, 0),
 
 	/* ENABLE_SCLK_DISP */
 	GATE(CLK_PHYCLK_MIPIDPHY1_BITCLKDIV8, "phyclk_mipidphy1_bitclkdiv8",
@@ -2919,11 +2861,8 @@
 	MUX_SEL_AUD1,
 	MUX_ENABLE_AUD0,
 	MUX_ENABLE_AUD1,
-	MUX_STAT_AUD0,
 	DIV_AUD0,
 	DIV_AUD1,
-	DIV_STAT_AUD0,
-	DIV_STAT_AUD1,
 	ENABLE_ACLK_AUD,
 	ENABLE_PCLK_AUD,
 	ENABLE_SCLK_AUD0,
@@ -2937,9 +2876,9 @@
 PNAME(mout_sclk_aud_pcm_p)	= { "mout_aud_pll_user", "ioclk_audiocdclk0",};
 
 static struct samsung_fixed_rate_clock aud_fixed_clks[] __initdata = {
-	FRATE(0, "ioclk_jtag_tclk", NULL, CLK_IS_ROOT, 33000000),
-	FRATE(0, "ioclk_slimbus_clk", NULL, CLK_IS_ROOT, 25000000),
-	FRATE(0, "ioclk_i2s_bclk", NULL, CLK_IS_ROOT, 50000000),
+	FRATE(0, "ioclk_jtag_tclk", NULL, 0, 33000000),
+	FRATE(0, "ioclk_slimbus_clk", NULL, 0, 25000000),
+	FRATE(0, "ioclk_i2s_bclk", NULL, 0, 50000000),
 };
 
 static struct samsung_mux_clock aud_mux_clks[] __initdata = {
@@ -3087,7 +3026,6 @@
 
 #define CMU_BUS_COMMON_CLK_REGS	\
 	DIV_BUS,		\
-	DIV_STAT_BUS,		\
 	ENABLE_ACLK_BUS,	\
 	ENABLE_PCLK_BUS,	\
 	ENABLE_IP_BUS0,		\
@@ -3100,7 +3038,6 @@
 static unsigned long bus2_clk_regs[] __initdata = {
 	MUX_SEL_BUS2,
 	MUX_ENABLE_BUS2,
-	MUX_STAT_BUS2,
 	CMU_BUS_COMMON_CLK_REGS,
 };
 
@@ -3259,11 +3196,8 @@
 	G3D_PLL_FREQ_DET,
 	MUX_SEL_G3D,
 	MUX_ENABLE_G3D,
-	MUX_STAT_G3D,
 	DIV_G3D,
 	DIV_G3D_PLL_FREQ_DET,
-	DIV_STAT_G3D,
-	DIV_STAT_G3D_PLL_FREQ_DET,
 	ENABLE_ACLK_G3D,
 	ENABLE_PCLK_G3D,
 	ENABLE_SCLK_G3D,
@@ -3379,7 +3313,6 @@
 static unsigned long gscl_clk_regs[] __initdata = {
 	MUX_SEL_GSCL,
 	MUX_ENABLE_GSCL,
-	MUX_STAT_GSCL,
 	ENABLE_ACLK_GSCL,
 	ENABLE_ACLK_GSCL_SECURE_SMMU_GSCL0,
 	ENABLE_ACLK_GSCL_SECURE_SMMU_GSCL1,
@@ -3472,11 +3405,11 @@
 
 	/* ENABLE_PCLK_GSCL_SECURE_SMMU_GSCL1 */
 	GATE(CLK_PCLK_SMMU_GSCL1, "pclk_smmu_gscl1", "mout_aclk_gscl_111_user",
-		ENABLE_PCLK_GSCL_SECURE_SMMU_GSCL0, 0, 0, 0),
+		ENABLE_PCLK_GSCL_SECURE_SMMU_GSCL1, 0, 0, 0),
 
 	/* ENABLE_PCLK_GSCL_SECURE_SMMU_GSCL2 */
 	GATE(CLK_PCLK_SMMU_GSCL2, "pclk_smmu_gscl2", "mout_aclk_gscl_111_user",
-		ENABLE_PCLK_GSCL_SECURE_SMMU_GSCL0, 0, 0, 0),
+		ENABLE_PCLK_GSCL_SECURE_SMMU_GSCL2, 0, 0, 0),
 };
 
 static struct samsung_cmu_info gscl_cmu_info __initdata = {
@@ -3543,15 +3476,9 @@
 	MUX_ENABLE_APOLLO0,
 	MUX_ENABLE_APOLLO1,
 	MUX_ENABLE_APOLLO2,
-	MUX_STAT_APOLLO0,
-	MUX_STAT_APOLLO1,
-	MUX_STAT_APOLLO2,
 	DIV_APOLLO0,
 	DIV_APOLLO1,
 	DIV_APOLLO_PLL_FREQ_DET,
-	DIV_STAT_APOLLO0,
-	DIV_STAT_APOLLO1,
-	DIV_STAT_APOLLO_PLL_FREQ_DET,
 	ENABLE_ACLK_APOLLO,
 	ENABLE_PCLK_APOLLO,
 	ENABLE_SCLK_APOLLO,
@@ -3735,15 +3662,9 @@
 	MUX_ENABLE_ATLAS0,
 	MUX_ENABLE_ATLAS1,
 	MUX_ENABLE_ATLAS2,
-	MUX_STAT_ATLAS0,
-	MUX_STAT_ATLAS1,
-	MUX_STAT_ATLAS2,
 	DIV_ATLAS0,
 	DIV_ATLAS1,
 	DIV_ATLAS_PLL_FREQ_DET,
-	DIV_STAT_ATLAS0,
-	DIV_STAT_ATLAS1,
-	DIV_STAT_ATLAS_PLL_FREQ_DET,
 	ENABLE_ACLK_ATLAS,
 	ENABLE_PCLK_ATLAS,
 	ENABLE_SCLK_ATLAS,
@@ -3937,10 +3858,7 @@
 	MUX_SEL_MSCL1,
 	MUX_ENABLE_MSCL0,
 	MUX_ENABLE_MSCL1,
-	MUX_STAT_MSCL0,
-	MUX_STAT_MSCL1,
 	DIV_MSCL,
-	DIV_STAT_MSCL,
 	ENABLE_ACLK_MSCL,
 	ENABLE_ACLK_MSCL_SECURE_SMMU_M2MSCALER0,
 	ENABLE_ACLK_MSCL_SECURE_SMMU_M2MSCALER1,
@@ -4097,9 +4015,7 @@
 static unsigned long mfc_clk_regs[] __initdata = {
 	MUX_SEL_MFC,
 	MUX_ENABLE_MFC,
-	MUX_STAT_MFC,
 	DIV_MFC,
-	DIV_STAT_MFC,
 	ENABLE_ACLK_MFC,
 	ENABLE_ACLK_MFC_SECURE_SMMU_MFC,
 	ENABLE_PCLK_MFC,
@@ -4207,9 +4123,7 @@
 static unsigned long hevc_clk_regs[] __initdata = {
 	MUX_SEL_HEVC,
 	MUX_ENABLE_HEVC,
-	MUX_STAT_HEVC,
 	DIV_HEVC,
-	DIV_STAT_HEVC,
 	ENABLE_ACLK_HEVC,
 	ENABLE_ACLK_HEVC_SECURE_SMMU_HEVC,
 	ENABLE_PCLK_HEVC,
@@ -4321,9 +4235,7 @@
 static unsigned long isp_clk_regs[] __initdata = {
 	MUX_SEL_ISP,
 	MUX_ENABLE_ISP,
-	MUX_STAT_ISP,
 	DIV_ISP,
-	DIV_STAT_ISP,
 	ENABLE_ACLK_ISP0,
 	ENABLE_ACLK_ISP1,
 	ENABLE_ACLK_ISP2,
@@ -4603,20 +4515,11 @@
 	MUX_ENABLE_CAM02,
 	MUX_ENABLE_CAM03,
 	MUX_ENABLE_CAM04,
-	MUX_STAT_CAM00,
-	MUX_STAT_CAM01,
-	MUX_STAT_CAM02,
-	MUX_STAT_CAM03,
-	MUX_STAT_CAM04,
 	MUX_IGNORE_CAM01,
 	DIV_CAM00,
 	DIV_CAM01,
 	DIV_CAM02,
 	DIV_CAM03,
-	DIV_STAT_CAM00,
-	DIV_STAT_CAM01,
-	DIV_STAT_CAM02,
-	DIV_STAT_CAM03,
 	ENABLE_ACLK_CAM00,
 	ENABLE_ACLK_CAM01,
 	ENABLE_ACLK_CAM02,
@@ -4687,9 +4590,9 @@
 
 static struct samsung_fixed_rate_clock cam0_fixed_clks[] __initdata = {
 	FRATE(CLK_PHYCLK_RXBYTEECLKHS0_S4_PHY, "phyclk_rxbyteclkhs0_s4_phy",
-			NULL, CLK_IS_ROOT, 100000000),
+			NULL, 0, 100000000),
 	FRATE(CLK_PHYCLK_RXBYTEECLKHS0_S2A_PHY, "phyclk_rxbyteclkhs0_s2a_phy",
-			NULL, CLK_IS_ROOT, 100000000),
+			NULL, 0, 100000000),
 };
 
 static struct samsung_mux_clock cam0_mux_clks[] __initdata = {
@@ -4749,21 +4652,21 @@
 	MUX(CLK_MOUT_SCLK_LITE_FREECNT_C, "mout_sclk_lite_freecnt_c",
 			mout_sclk_lite_freecnt_c_p, MUX_SEL_CAM04, 24, 1),
 	MUX(CLK_MOUT_SCLK_LITE_FREECNT_B, "mout_sclk_lite_freecnt_b",
-			mout_sclk_lite_freecnt_b_p, MUX_SEL_CAM04, 24, 1),
+			mout_sclk_lite_freecnt_b_p, MUX_SEL_CAM04, 20, 1),
 	MUX(CLK_MOUT_SCLK_LITE_FREECNT_A, "mout_sclk_lite_freecnt_a",
-			mout_sclk_lite_freecnt_a_p, MUX_SEL_CAM04, 24, 1),
+			mout_sclk_lite_freecnt_a_p, MUX_SEL_CAM04, 16, 1),
 	MUX(CLK_MOUT_SCLK_PIXELASYNC_LITE_C_B, "mout_sclk_pixelasync_lite_c_b",
-			mout_sclk_pixelasync_lite_c_b_p, MUX_SEL_CAM04, 24, 1),
+			mout_sclk_pixelasync_lite_c_b_p, MUX_SEL_CAM04, 12, 1),
 	MUX(CLK_MOUT_SCLK_PIXELASYNC_LITE_C_A, "mout_sclk_pixelasync_lite_c_a",
-			mout_sclk_pixelasync_lite_c_a_p, MUX_SEL_CAM04, 24, 1),
+			mout_sclk_pixelasync_lite_c_a_p, MUX_SEL_CAM04, 8, 1),
 	MUX(CLK_MOUT_SCLK_PIXELASYNC_LITE_C_INIT_B,
 			"mout_sclk_pixelasync_lite_c_init_b",
 			mout_sclk_pixelasync_lite_c_init_b_p,
-			MUX_SEL_CAM04, 24, 1),
+			MUX_SEL_CAM04, 4, 1),
 	MUX(CLK_MOUT_SCLK_PIXELASYNC_LITE_C_INIT_A,
 			"mout_sclk_pixelasync_lite_c_init_a",
 			mout_sclk_pixelasync_lite_c_init_a_p,
-			MUX_SEL_CAM04, 24, 1),
+			MUX_SEL_CAM04, 0, 1),
 };
 
 static struct samsung_div_clock cam0_div_clks[] __initdata = {
@@ -5074,14 +4977,9 @@
 	MUX_ENABLE_CAM10,
 	MUX_ENABLE_CAM11,
 	MUX_ENABLE_CAM12,
-	MUX_STAT_CAM10,
-	MUX_STAT_CAM11,
-	MUX_STAT_CAM12,
 	MUX_IGNORE_CAM11,
 	DIV_CAM10,
 	DIV_CAM11,
-	DIV_STAT_CAM10,
-	DIV_STAT_CAM11,
 	ENABLE_ACLK_CAM10,
 	ENABLE_ACLK_CAM11,
 	ENABLE_ACLK_CAM12,
@@ -5120,7 +5018,7 @@
 
 static struct samsung_fixed_rate_clock cam1_fixed_clks[] __initdata = {
 	FRATE(CLK_PHYCLK_RXBYTEECLKHS0_S2B, "phyclk_rxbyteclkhs0_s2b_phy", NULL,
-			CLK_IS_ROOT, 100000000),
+			0, 100000000),
 };
 
 static struct samsung_mux_clock cam1_mux_clks[] __initdata = {
@@ -5134,9 +5032,9 @@
 	MUX(CLK_MOUT_ACLK_CAM1_333_USER, "mout_aclk_cam1_333_user",
 			mout_aclk_cam1_333_user_p, MUX_SEL_CAM10, 8, 1),
 	MUX(CLK_MOUT_ACLK_CAM1_400_USER, "mout_aclk_cam1_400_user",
-			mout_aclk_cam1_400_user_p, MUX_SEL_CAM01, 4, 1),
+			mout_aclk_cam1_400_user_p, MUX_SEL_CAM10, 4, 1),
 	MUX(CLK_MOUT_ACLK_CAM1_552_USER, "mout_aclk_cam1_552_user",
-			mout_aclk_cam1_552_user_p, MUX_SEL_CAM01, 0, 1),
+			mout_aclk_cam1_552_user_p, MUX_SEL_CAM10, 0, 1),
 
 	/* MUX_SEL_CAM11 */
 	MUX(CLK_MOUT_PHYCLK_RXBYTECLKHS0_S2B_USER,
@@ -5161,7 +5059,7 @@
 
 static struct samsung_div_clock cam1_div_clks[] __initdata = {
 	/* DIV_CAM10 */
-	DIV(CLK_DIV_SCLK_ISP_WPWM, "div_sclk_isp_wpwm",
+	DIV(CLK_DIV_SCLK_ISP_MPWM, "div_sclk_isp_mpwm",
 			"div_pclk_cam1_83", DIV_CAM10, 16, 2),
 	DIV(CLK_DIV_PCLK_CAM1_83, "div_pclk_cam1_83",
 			"mout_aclk_cam1_333_user", DIV_CAM10, 12, 2),
@@ -5355,7 +5253,7 @@
 			ENABLE_PCLK_CAM1, 5, CLK_IGNORE_UNUSED, 0),
 	GATE(CLK_PCLK_ISP_I2C0, "pclk_isp_i2c0", "div_pclk_cam1_83",
 			ENABLE_PCLK_CAM1, 4, CLK_IGNORE_UNUSED, 0),
-	GATE(CLK_PCLK_ISP_MPWM, "pclk_isp_wpwm", "div_pclk_cam1_83",
+	GATE(CLK_PCLK_ISP_MPWM, "pclk_isp_mpwm", "div_pclk_cam1_83",
 			ENABLE_PCLK_CAM1, 3, CLK_IGNORE_UNUSED, 0),
 	GATE(CLK_PCLK_FD, "pclk_fd", "div_pclk_fd",
 			ENABLE_PCLK_CAM1, 3, CLK_IGNORE_UNUSED, 0),
@@ -5388,7 +5286,7 @@
 			ENABLE_SCLK_CAM1, 5, 0, 0),
 	GATE(CLK_SCLK_ISP_SPI0, "sclk_isp_spi0", "mout_sclk_isp_spi0_user",
 			ENABLE_SCLK_CAM1, 4, 0, 0),
-	GATE(CLK_SCLK_ISP_MPWM, "sclk_isp_wpwm", "div_sclk_isp_wpwm",
+	GATE(CLK_SCLK_ISP_MPWM, "sclk_isp_mpwm", "div_sclk_isp_mpwm",
 			ENABLE_SCLK_CAM1, 3, 0, 0),
 	GATE(CLK_PCLK_DBG_ISP, "sclk_dbg_isp", "div_pclk_dbg_cam1",
 			ENABLE_SCLK_CAM1, 2, 0, 0),

diff --git a/drivers/clk/samsung/clk-exynos5440.c b/drivers/clk/samsung/clk-exynos5440.c
index 5908138..c57cff1 100644
--- a/drivers/clk/samsung/clk-exynos5440.c
+++ b/drivers/clk/samsung/clk-exynos5440.c

@@ -31,16 +31,16 @@
 
 /* fixed rate clocks generated outside the soc */
 static struct samsung_fixed_rate_clock exynos5440_fixed_rate_ext_clks[] __initdata = {
-	FRATE(0, "xtal", NULL, CLK_IS_ROOT, 0),
+	FRATE(0, "xtal", NULL, 0, 0),
 };
 
 /* fixed rate clocks */
 static struct samsung_fixed_rate_clock exynos5440_fixed_rate_clks[] __initdata = {
-	FRATE(0, "ppll", NULL, CLK_IS_ROOT, 1000000000),
-	FRATE(0, "usb_phy0", NULL, CLK_IS_ROOT, 60000000),
-	FRATE(0, "usb_phy1", NULL, CLK_IS_ROOT, 60000000),
-	FRATE(0, "usb_ohci12", NULL, CLK_IS_ROOT, 12000000),
-	FRATE(0, "usb_ohci48", NULL, CLK_IS_ROOT, 48000000),
+	FRATE(0, "ppll", NULL, 0, 1000000000),
+	FRATE(0, "usb_phy0", NULL, 0, 60000000),
+	FRATE(0, "usb_phy1", NULL, 0, 60000000),
+	FRATE(0, "usb_ohci12", NULL, 0, 12000000),
+	FRATE(0, "usb_ohci48", NULL, 0, 48000000),
 };
 
 /* fixed factor clocks */

diff --git a/drivers/clk/samsung/clk-exynos7.c b/drivers/clk/samsung/clk-exynos7.c
index 55f8e2e..ad68d46 100644
--- a/drivers/clk/samsung/clk-exynos7.c
+++ b/drivers/clk/samsung/clk-exynos7.c

@@ -894,10 +894,8 @@
 
 /* fixed rate clocks used in the FSYS0 block */
 static struct samsung_fixed_rate_clock fixed_rate_clks_fsys0[] __initdata = {
-	FRATE(0, "phyclk_usbdrd300_udrd30_phyclock", NULL,
-		CLK_IS_ROOT, 60000000),
-	FRATE(0, "phyclk_usbdrd300_udrd30_pipe_pclk", NULL,
-		CLK_IS_ROOT, 125000000),
+	FRATE(0, "phyclk_usbdrd300_udrd30_phyclock", NULL, 0, 60000000),
+	FRATE(0, "phyclk_usbdrd300_udrd30_pipe_pclk", NULL, 0, 125000000),
 };
 
 static unsigned long fsys0_clk_regs[] __initdata = {
@@ -1009,11 +1007,11 @@
 /* fixed rate clocks used in the FSYS1 block */
 static struct samsung_fixed_rate_clock fixed_rate_clks_fsys1[] __initdata = {
 	FRATE(PHYCLK_UFS20_TX0_SYMBOL, "phyclk_ufs20_tx0_symbol", NULL,
-			CLK_IS_ROOT, 300000000),
+			0, 300000000),
 	FRATE(PHYCLK_UFS20_RX0_SYMBOL, "phyclk_ufs20_rx0_symbol", NULL,
-			CLK_IS_ROOT, 300000000),
+			0, 300000000),
 	FRATE(PHYCLK_UFS20_RX1_SYMBOL, "phyclk_ufs20_rx1_symbol", NULL,
-			CLK_IS_ROOT, 300000000),
+			0, 300000000),
 };
 
 static unsigned long fsys1_clk_regs[] __initdata = {

diff --git a/drivers/clk/samsung/clk-s3c2410.c b/drivers/clk/samsung/clk-s3c2410.c
index 0945a88..d7b011c 100644
--- a/drivers/clk/samsung/clk-s3c2410.c
+++ b/drivers/clk/samsung/clk-s3c2410.c

@@ -344,7 +344,7 @@
  */
 #define XTI	1
 struct samsung_fixed_rate_clock s3c2410_common_frate_clks[] __initdata = {
-	FRATE(XTI, "xti", NULL, CLK_IS_ROOT, 0),
+	FRATE(XTI, "xti", NULL, 0, 0),
 };
 
 static void __init s3c2410_common_clk_register_fixed_ext(

diff --git a/drivers/clk/samsung/clk-s3c2412.c b/drivers/clk/samsung/clk-s3c2412.c
index 44d6a9f..effe373 100644
--- a/drivers/clk/samsung/clk-s3c2412.c
+++ b/drivers/clk/samsung/clk-s3c2412.c

@@ -232,8 +232,8 @@
  */
 #define XTI	1
 struct samsung_fixed_rate_clock s3c2412_common_frate_clks[] __initdata = {
-	FRATE(XTI, "xti", NULL, CLK_IS_ROOT, 0),
-	FRATE(0, "ext", NULL, CLK_IS_ROOT, 0),
+	FRATE(XTI, "xti", NULL, 0, 0),
+	FRATE(0, "ext", NULL, 0, 0),
 };
 
 static void __init s3c2412_common_clk_register_fixed_ext(

diff --git a/drivers/clk/samsung/clk-s3c2443.c b/drivers/clk/samsung/clk-s3c2443.c
index 2c0a1ea..3756278 100644
--- a/drivers/clk/samsung/clk-s3c2443.c
+++ b/drivers/clk/samsung/clk-s3c2443.c

@@ -371,10 +371,10 @@
  * Only necessary until the devicetree-move is complete
  */
 struct samsung_fixed_rate_clock s3c2443_common_frate_clks[] __initdata = {
-	FRATE(0, "xti", NULL, CLK_IS_ROOT, 0),
-	FRATE(0, "ext", NULL, CLK_IS_ROOT, 0),
-	FRATE(0, "ext_i2s", NULL, CLK_IS_ROOT, 0),
-	FRATE(0, "ext_uart", NULL, CLK_IS_ROOT, 0),
+	FRATE(0, "xti", NULL, 0, 0),
+	FRATE(0, "ext", NULL, 0, 0),
+	FRATE(0, "ext_i2s", NULL, 0, 0),
+	FRATE(0, "ext_uart", NULL, 0, 0),
 };
 
 static void __init s3c2443_common_clk_register_fixed_ext(

diff --git a/drivers/clk/samsung/clk-s3c64xx.c b/drivers/clk/samsung/clk-s3c64xx.c
index d325ed1..60aa775 100644
--- a/drivers/clk/samsung/clk-s3c64xx.c
+++ b/drivers/clk/samsung/clk-s3c64xx.c

@@ -176,14 +176,14 @@
 
 /* Fixed rate clocks generated outside the SoC. */
 FIXED_RATE_CLOCKS(s3c64xx_fixed_rate_ext_clks) __initdata = {
-	FRATE(0, "fin_pll", NULL, CLK_IS_ROOT, 0),
-	FRATE(0, "xusbxti", NULL, CLK_IS_ROOT, 0),
+	FRATE(0, "fin_pll", NULL, 0, 0),
+	FRATE(0, "xusbxti", NULL, 0, 0),
 };
 
 /* Fixed rate clocks generated inside the SoC. */
 FIXED_RATE_CLOCKS(s3c64xx_fixed_rate_clks) __initdata = {
-	FRATE(CLK27M, "clk27m", NULL, CLK_IS_ROOT, 27000000),
-	FRATE(CLK48M, "clk48m", NULL, CLK_IS_ROOT, 48000000),
+	FRATE(CLK27M, "clk27m", NULL, 0, 27000000),
+	FRATE(CLK48M, "clk48m", NULL, 0, 48000000),
 };
 
 /* List of clock muxes present on all S3C64xx SoCs. */

diff --git a/drivers/clk/samsung/clk-s5pv210.c b/drivers/clk/samsung/clk-s5pv210.c
index 759aaf3..5230226 100644
--- a/drivers/clk/samsung/clk-s5pv210.c
+++ b/drivers/clk/samsung/clk-s5pv210.c

@@ -503,15 +503,15 @@
 
 /* S5PV210-specific fixed rate clocks generated inside the SoC. */
 static const struct samsung_fixed_rate_clock s5pv210_frate_clks[] __initconst = {
-	FRATE(SCLK_HDMI27M, "sclk_hdmi27m", NULL, CLK_IS_ROOT, 27000000),
-	FRATE(SCLK_HDMIPHY, "sclk_hdmiphy", NULL, CLK_IS_ROOT, 27000000),
-	FRATE(SCLK_USBPHY0, "sclk_usbphy0", NULL, CLK_IS_ROOT, 48000000),
-	FRATE(SCLK_USBPHY1, "sclk_usbphy1", NULL, CLK_IS_ROOT, 48000000),
+	FRATE(SCLK_HDMI27M, "sclk_hdmi27m", NULL, 0, 27000000),
+	FRATE(SCLK_HDMIPHY, "sclk_hdmiphy", NULL, 0, 27000000),
+	FRATE(SCLK_USBPHY0, "sclk_usbphy0", NULL, 0, 48000000),
+	FRATE(SCLK_USBPHY1, "sclk_usbphy1", NULL, 0, 48000000),
 };
 
 /* S5P6442-specific fixed rate clocks generated inside the SoC. */
 static const struct samsung_fixed_rate_clock s5p6442_frate_clks[] __initconst = {
-	FRATE(SCLK_USBPHY0, "sclk_usbphy0", NULL, CLK_IS_ROOT, 30000000),
+	FRATE(SCLK_USBPHY0, "sclk_usbphy0", NULL, 0, 30000000),
 };
 
 /* Common clock dividers. */

diff --git a/drivers/clk/sirf/clk-atlas7.c b/drivers/clk/sirf/clk-atlas7.c
index 957aae6..d0c6c9a 100644
--- a/drivers/clk/sirf/clk-atlas7.c
+++ b/drivers/clk/sirf/clk-atlas7.c

@@ -1423,7 +1423,7 @@
 	return 0;
 }
 
-static struct reset_control_ops atlas7_rst_ops = {
+static const struct reset_control_ops atlas7_rst_ops = {
 	.reset = atlas7_reset_module,
 };
 

diff --git a/drivers/clk/socfpga/clk-gate-a10.c b/drivers/clk/socfpga/clk-gate-a10.c
index 1cebf25..c2d5727 100644
--- a/drivers/clk/socfpga/clk-gate-a10.c
+++ b/drivers/clk/socfpga/clk-gate-a10.c

@@ -115,7 +115,6 @@
 	const char *parent_name[SOCFPGA_MAX_PARENTS];
 	struct clk_init_data init;
 	int rc;
-	int i = 0;
 
 	socfpga_clk = kzalloc(sizeof(*socfpga_clk), GFP_KERNEL);
 	if (WARN_ON(!socfpga_clk))
@@ -167,12 +166,9 @@
 	init.name = clk_name;
 	init.ops = ops;
 	init.flags = 0;
-	while (i < SOCFPGA_MAX_PARENTS && (parent_name[i] =
-			of_clk_get_parent_name(node, i)) != NULL)
-		i++;
 
+	init.num_parents = of_clk_parent_fill(node, parent_name, SOCFPGA_MAX_PARENTS);
 	init.parent_names = parent_name;
-	init.num_parents = i;
 	socfpga_clk->hw.hw.init = &init;
 
 	clk = clk_register(NULL, &socfpga_clk->hw.hw);

diff --git a/drivers/clk/socfpga/clk-periph-a10.c b/drivers/clk/socfpga/clk-periph-a10.c
index 1f397cb..70993f1 100644
--- a/drivers/clk/socfpga/clk-periph-a10.c
+++ b/drivers/clk/socfpga/clk-periph-a10.c

@@ -74,7 +74,7 @@
 	struct clk *clk;
 	struct socfpga_periph_clk *periph_clk;
 	const char *clk_name = node->name;
-	const char *parent_name;
+	const char *parent_name[SOCFPGA_MAX_PARENTS];
 	struct clk_init_data init;
 	int rc;
 	u32 fixed_div;
@@ -109,9 +109,8 @@
 	init.ops = ops;
 	init.flags = 0;
 
-	parent_name = of_clk_get_parent_name(node, 0);
-	init.num_parents = 1;
-	init.parent_names = &parent_name;
+	init.num_parents = of_clk_parent_fill(node, parent_name, SOCFPGA_MAX_PARENTS);
+	init.parent_names = parent_name;
 
 	periph_clk->hw.hw.init = &init;
 

diff --git a/drivers/clk/socfpga/clk-pll-a10.c b/drivers/clk/socfpga/clk-pll-a10.c
index 402d630..35fabe1 100644
--- a/drivers/clk/socfpga/clk-pll-a10.c
+++ b/drivers/clk/socfpga/clk-pll-a10.c

@@ -74,7 +74,7 @@
 	.get_parent = clk_pll_get_parent,
 };
 
-static struct __init clk * __socfpga_pll_init(struct device_node *node,
+static struct clk * __init __socfpga_pll_init(struct device_node *node,
 	const struct clk_ops *ops)
 {
 	u32 reg;

diff --git a/drivers/clk/spear/spear1310_clock.c b/drivers/clk/spear/spear1310_clock.c
index 009bd14..2f86e3f 100644
--- a/drivers/clk/spear/spear1310_clock.c
+++ b/drivers/clk/spear/spear1310_clock.c

@@ -386,24 +386,20 @@
 {
 	struct clk *clk, *clk1;
 
-	clk = clk_register_fixed_rate(NULL, "osc_32k_clk", NULL, CLK_IS_ROOT,
-			32000);
+	clk = clk_register_fixed_rate(NULL, "osc_32k_clk", NULL, 0, 32000);
 	clk_register_clkdev(clk, "osc_32k_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "osc_24m_clk", NULL, CLK_IS_ROOT,
-			24000000);
+	clk = clk_register_fixed_rate(NULL, "osc_24m_clk", NULL, 0, 24000000);
 	clk_register_clkdev(clk, "osc_24m_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "osc_25m_clk", NULL, CLK_IS_ROOT,
-			25000000);
+	clk = clk_register_fixed_rate(NULL, "osc_25m_clk", NULL, 0, 25000000);
 	clk_register_clkdev(clk, "osc_25m_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "gmii_pad_clk", NULL, CLK_IS_ROOT,
-			125000000);
+	clk = clk_register_fixed_rate(NULL, "gmii_pad_clk", NULL, 0, 125000000);
 	clk_register_clkdev(clk, "gmii_pad_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "i2s_src_pad_clk", NULL,
-			CLK_IS_ROOT, 12288000);
+	clk = clk_register_fixed_rate(NULL, "i2s_src_pad_clk", NULL, 0,
+				      12288000);
 	clk_register_clkdev(clk, "i2s_src_pad_clk", NULL);
 
 	/* clock derived from 32 KHz osc clk */
@@ -897,11 +893,10 @@
 			&_lock);
 	clk_register_clkdev(clk, "ras_apb_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "ras_plclk0_clk", NULL, CLK_IS_ROOT,
+	clk = clk_register_fixed_rate(NULL, "ras_plclk0_clk", NULL, 0,
 			50000000);
 
-	clk = clk_register_fixed_rate(NULL, "ras_tx50_clk", NULL, CLK_IS_ROOT,
-			50000000);
+	clk = clk_register_fixed_rate(NULL, "ras_tx50_clk", NULL, 0, 50000000);
 
 	clk = clk_register_gate(NULL, "can0_clk", "apb_clk", 0,
 			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_CAN0_CLK_ENB, 0,

diff --git a/drivers/clk/spear/spear1340_clock.c b/drivers/clk/spear/spear1340_clock.c
index 9c7abfd..cbb19a9 100644
--- a/drivers/clk/spear/spear1340_clock.c
+++ b/drivers/clk/spear/spear1340_clock.c

@@ -443,24 +443,20 @@
 {
 	struct clk *clk, *clk1;
 
-	clk = clk_register_fixed_rate(NULL, "osc_32k_clk", NULL, CLK_IS_ROOT,
-			32000);
+	clk = clk_register_fixed_rate(NULL, "osc_32k_clk", NULL, 0, 32000);
 	clk_register_clkdev(clk, "osc_32k_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "osc_24m_clk", NULL, CLK_IS_ROOT,
-			24000000);
+	clk = clk_register_fixed_rate(NULL, "osc_24m_clk", NULL, 0, 24000000);
 	clk_register_clkdev(clk, "osc_24m_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "osc_25m_clk", NULL, CLK_IS_ROOT,
-			25000000);
+	clk = clk_register_fixed_rate(NULL, "osc_25m_clk", NULL, 0, 25000000);
 	clk_register_clkdev(clk, "osc_25m_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "gmii_pad_clk", NULL, CLK_IS_ROOT,
-			125000000);
+	clk = clk_register_fixed_rate(NULL, "gmii_pad_clk", NULL, 0, 125000000);
 	clk_register_clkdev(clk, "gmii_pad_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "i2s_src_pad_clk", NULL,
-			CLK_IS_ROOT, 12288000);
+	clk = clk_register_fixed_rate(NULL, "i2s_src_pad_clk", NULL, 0,
+				      12288000);
 	clk_register_clkdev(clk, "i2s_src_pad_clk", NULL);
 
 	/* clock derived from 32 KHz osc clk */

diff --git a/drivers/clk/spear/spear3xx_clock.c b/drivers/clk/spear/spear3xx_clock.c
index 404a55e..c403c66 100644
--- a/drivers/clk/spear/spear3xx_clock.c
+++ b/drivers/clk/spear/spear3xx_clock.c

@@ -251,7 +251,7 @@
 	struct clk *clk;
 
 	clk = clk_register_fixed_rate(NULL, "smii_125m_pad_clk", NULL,
-			CLK_IS_ROOT, 125000000);
+			0, 125000000);
 	clk_register_clkdev(clk, "smii_125m_pad", NULL);
 
 	clk = clk_register_fixed_factor(NULL, "clcd_clk", "ras_pll3_clk", 0,
@@ -391,12 +391,10 @@
 {
 	struct clk *clk, *clk1, *ras_apb_clk;
 
-	clk = clk_register_fixed_rate(NULL, "osc_32k_clk", NULL, CLK_IS_ROOT,
-			32000);
+	clk = clk_register_fixed_rate(NULL, "osc_32k_clk", NULL, 0, 32000);
 	clk_register_clkdev(clk, "osc_32k_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "osc_24m_clk", NULL, CLK_IS_ROOT,
-			24000000);
+	clk = clk_register_fixed_rate(NULL, "osc_24m_clk", NULL, 0, 24000000);
 	clk_register_clkdev(clk, "osc_24m_clk", NULL);
 
 	/* clock derived from 32 KHz osc clk */

diff --git a/drivers/clk/spear/spear6xx_clock.c b/drivers/clk/spear/spear6xx_clock.c
index e24f85c..7c9383c 100644
--- a/drivers/clk/spear/spear6xx_clock.c
+++ b/drivers/clk/spear/spear6xx_clock.c

@@ -117,12 +117,10 @@
 {
 	struct clk *clk, *clk1;
 
-	clk = clk_register_fixed_rate(NULL, "osc_32k_clk", NULL, CLK_IS_ROOT,
-			32000);
+	clk = clk_register_fixed_rate(NULL, "osc_32k_clk", NULL, 0, 32000);
 	clk_register_clkdev(clk, "osc_32k_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "osc_30m_clk", NULL, CLK_IS_ROOT,
-			30000000);
+	clk = clk_register_fixed_rate(NULL, "osc_30m_clk", NULL, 0, 30000000);
 	clk_register_clkdev(clk, "osc_30m_clk", NULL);
 
 	/* clock derived from 32 KHz osc clk */

diff --git a/drivers/clk/st/clk-flexgen.c b/drivers/clk/st/clk-flexgen.c
index 24d9959..627267c 100644
--- a/drivers/clk/st/clk-flexgen.c
+++ b/drivers/clk/st/clk-flexgen.c

@@ -244,10 +244,10 @@
 						       int *num_parents)
 {
 	const char **parents;
-	int nparents;
+	unsigned int nparents;
 
 	nparents = of_clk_get_parent_count(np);
-	if (WARN_ON(nparents <= 0))
+	if (WARN_ON(!nparents))
 		return NULL;
 
 	parents = kcalloc(nparents, sizeof(const char *), GFP_KERNEL);

diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c
index ccb324d..dec4eaa 100644
--- a/drivers/clk/st/clkgen-fsyn.c
+++ b/drivers/clk/st/clkgen-fsyn.c

@@ -574,12 +574,16 @@
 	struct stm_fs params;
 	long hwrate = 0;
 	unsigned long flags = 0;
+	int ret;
 
 	if (!rate || !parent_rate)
 		return -EINVAL;
 
-	if (!clk_fs660c32_vco_get_params(parent_rate, rate, &params))
-		clk_fs660c32_vco_get_rate(parent_rate, &params, &hwrate);
+	ret = clk_fs660c32_vco_get_params(parent_rate, rate, &params);
+	if (ret)
+		return ret;
+
+	clk_fs660c32_vco_get_rate(parent_rate, &params, &hwrate);
 
 	pr_debug("%s: %s new rate %ld [ndiv=0x%x]\n",
 		 __func__, clk_hw_get_name(hw),

diff --git a/drivers/clk/st/clkgen-mux.c b/drivers/clk/st/clkgen-mux.c
index 0d9a74b..b1e10ff 100644
--- a/drivers/clk/st/clkgen-mux.c
+++ b/drivers/clk/st/clkgen-mux.c

@@ -26,10 +26,10 @@
 						       int *num_parents)
 {
 	const char **parents;
-	int nparents;
+	unsigned int nparents;
 
 	nparents = of_clk_get_parent_count(np);
-	if (WARN_ON(nparents <= 0))
+	if (WARN_ON(!nparents))
 		return ERR_PTR(-EINVAL);
 
 	parents = kcalloc(nparents, sizeof(const char *), GFP_KERNEL);

diff --git a/drivers/clk/sunxi/clk-a10-hosc.c b/drivers/clk/sunxi/clk-a10-hosc.c
index 0481d5d..6b598c6 100644
--- a/drivers/clk/sunxi/clk-a10-hosc.c
+++ b/drivers/clk/sunxi/clk-a10-hosc.c

@@ -15,9 +15,9 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clkdev.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/slab.h>
 
 #define SUNXI_OSC24M_GATE	0
 
@@ -61,7 +61,6 @@
 		goto err_free_gate;
 
 	of_clk_add_provider(node, of_clk_src_simple_get, clk);
-	clk_register_clkdev(clk, clk_name, NULL);
 
 	return;
 

diff --git a/drivers/clk/sunxi/clk-a10-ve.c b/drivers/clk/sunxi/clk-a10-ve.c
index 044c171..d9ea22e 100644
--- a/drivers/clk/sunxi/clk-a10-ve.c
+++ b/drivers/clk/sunxi/clk-a10-ve.c

@@ -85,7 +85,7 @@
 	return 0;
 }
 
-static struct reset_control_ops sunxi_ve_reset_ops = {
+static const struct reset_control_ops sunxi_ve_reset_ops = {
 	.assert		= sunxi_ve_reset_assert,
 	.deassert	= sunxi_ve_reset_deassert,
 };

diff --git a/drivers/clk/sunxi/clk-a20-gmac.c b/drivers/clk/sunxi/clk-a20-gmac.c
index 1611b03..3437f73 100644
--- a/drivers/clk/sunxi/clk-a20-gmac.c
+++ b/drivers/clk/sunxi/clk-a20-gmac.c

@@ -17,7 +17,6 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clkdev.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/slab.h>
@@ -107,7 +106,6 @@
 		goto iounmap_reg;
 
 	of_clk_add_provider(node, of_clk_src_simple_get, clk);
-	clk_register_clkdev(clk, clk_name, NULL);
 
 	return;
 

diff --git a/drivers/clk/sunxi/clk-factors.c b/drivers/clk/sunxi/clk-factors.c
index 59428db..ddefe96 100644
--- a/drivers/clk/sunxi/clk-factors.c
+++ b/drivers/clk/sunxi/clk-factors.c

@@ -48,7 +48,7 @@
 	u32 reg;
 	unsigned long rate;
 	struct clk_factors *factors = to_clk_factors(hw);
-	struct clk_factors_config *config = factors->config;
+	const struct clk_factors_config *config = factors->config;
 
 	/* Fetch the register value */
 	reg = readl(factors->reg);
@@ -63,25 +63,36 @@
 	if (config->pwidth != SUNXI_FACTORS_NOT_APPLICABLE)
 		p = FACTOR_GET(config->pshift, config->pwidth, reg);
 
+	if (factors->recalc) {
+		struct factors_request factors_req = {
+			.parent_rate = parent_rate,
+			.n = n,
+			.k = k,
+			.m = m,
+			.p = p,
+		};
+
+		/* get mux details from mux clk structure */
+		if (factors->mux)
+			factors_req.parent_index =
+				(reg >> factors->mux->shift) &
+				factors->mux->mask;
+
+		factors->recalc(&factors_req);
+
+		return factors_req.rate;
+	}
+
 	/* Calculate the rate */
 	rate = (parent_rate * (n + config->n_start) * (k + 1) >> p) / (m + 1);
 
 	return rate;
 }
 
-static long clk_factors_round_rate(struct clk_hw *hw, unsigned long rate,
-				   unsigned long *parent_rate)
-{
-	struct clk_factors *factors = to_clk_factors(hw);
-	factors->get_factors((u32 *)&rate, (u32)*parent_rate,
-			     NULL, NULL, NULL, NULL);
-
-	return rate;
-}
-
 static int clk_factors_determine_rate(struct clk_hw *hw,
 				      struct clk_rate_request *req)
 {
+	struct clk_factors *factors = to_clk_factors(hw);
 	struct clk_hw *parent, *best_parent = NULL;
 	int i, num_parents;
 	unsigned long parent_rate, best = 0, child_rate, best_child_rate = 0;
@@ -89,6 +100,10 @@
 	/* find the parent that can help provide the fastest rate <= rate */
 	num_parents = clk_hw_get_num_parents(hw);
 	for (i = 0; i < num_parents; i++) {
+		struct factors_request factors_req = {
+			.rate = req->rate,
+			.parent_index = i,
+		};
 		parent = clk_hw_get_parent_by_index(hw, i);
 		if (!parent)
 			continue;
@@ -97,8 +112,9 @@
 		else
 			parent_rate = clk_hw_get_rate(parent);
 
-		child_rate = clk_factors_round_rate(hw, req->rate,
-						    &parent_rate);
+		factors_req.parent_rate = parent_rate;
+		factors->get_factors(&factors_req);
+		child_rate = factors_req.rate;
 
 		if (child_rate <= req->rate && child_rate > best_child_rate) {
 			best_parent = parent;
@@ -120,13 +136,16 @@
 static int clk_factors_set_rate(struct clk_hw *hw, unsigned long rate,
 				unsigned long parent_rate)
 {
-	u8 n = 0, k = 0, m = 0, p = 0;
+	struct factors_request req = {
+		.rate = rate,
+		.parent_rate = parent_rate,
+	};
 	u32 reg;
 	struct clk_factors *factors = to_clk_factors(hw);
-	struct clk_factors_config *config = factors->config;
+	const struct clk_factors_config *config = factors->config;
 	unsigned long flags = 0;
 
-	factors->get_factors((u32 *)&rate, (u32)parent_rate, &n, &k, &m, &p);
+	factors->get_factors(&req);
 
 	if (factors->lock)
 		spin_lock_irqsave(factors->lock, flags);
@@ -135,10 +154,10 @@
 	reg = readl(factors->reg);
 
 	/* Set up the new factors - macros do not do anything if width is 0 */
-	reg = FACTOR_SET(config->nshift, config->nwidth, reg, n);
-	reg = FACTOR_SET(config->kshift, config->kwidth, reg, k);
-	reg = FACTOR_SET(config->mshift, config->mwidth, reg, m);
-	reg = FACTOR_SET(config->pshift, config->pwidth, reg, p);
+	reg = FACTOR_SET(config->nshift, config->nwidth, reg, req.n);
+	reg = FACTOR_SET(config->kshift, config->kwidth, reg, req.k);
+	reg = FACTOR_SET(config->mshift, config->mwidth, reg, req.m);
+	reg = FACTOR_SET(config->pshift, config->pwidth, reg, req.p);
 
 	/* Apply them now */
 	writel(reg, factors->reg);
@@ -155,7 +174,6 @@
 static const struct clk_ops clk_factors_ops = {
 	.determine_rate = clk_factors_determine_rate,
 	.recalc_rate = clk_factors_recalc_rate,
-	.round_rate = clk_factors_round_rate,
 	.set_rate = clk_factors_set_rate,
 };
 
@@ -172,7 +190,7 @@
 	struct clk_hw *mux_hw = NULL;
 	const char *clk_name = node->name;
 	const char *parents[FACTORS_MAX_PARENTS];
-	int i = 0;
+	int ret, i = 0;
 
 	/* if we have a mux, we will have >1 parents */
 	i = of_clk_parent_fill(node, parents, FACTORS_MAX_PARENTS);
@@ -188,21 +206,22 @@
 
 	factors = kzalloc(sizeof(struct clk_factors), GFP_KERNEL);
 	if (!factors)
-		return NULL;
+		goto err_factors;
 
 	/* set up factors properties */
 	factors->reg = reg;
 	factors->config = data->table;
 	factors->get_factors = data->getter;
+	factors->recalc = data->recalc;
 	factors->lock = lock;
 
 	/* Add a gate if this factor clock can be gated */
 	if (data->enable) {
 		gate = kzalloc(sizeof(struct clk_gate), GFP_KERNEL);
-		if (!gate) {
-			kfree(factors);
-			return NULL;
-		}
+		if (!gate)
+			goto err_gate;
+
+		factors->gate = gate;
 
 		/* set up gate properties */
 		gate->reg = reg;
@@ -214,11 +233,10 @@
 	/* Add a mux if this factor clock can be muxed */
 	if (data->mux) {
 		mux = kzalloc(sizeof(struct clk_mux), GFP_KERNEL);
-		if (!mux) {
-			kfree(factors);
-			kfree(gate);
-			return NULL;
-		}
+		if (!mux)
+			goto err_mux;
+
+		factors->mux = mux;
 
 		/* set up gate properties */
 		mux->reg = reg;
@@ -233,11 +251,44 @@
 			mux_hw, &clk_mux_ops,
 			&factors->hw, &clk_factors_ops,
 			gate_hw, &clk_gate_ops, 0);
+	if (IS_ERR(clk))
+		goto err_register;
 
-	if (!IS_ERR(clk)) {
-		of_clk_add_provider(node, of_clk_src_simple_get, clk);
-		clk_register_clkdev(clk, clk_name, NULL);
-	}
+	ret = of_clk_add_provider(node, of_clk_src_simple_get, clk);
+	if (ret)
+		goto err_provider;
 
 	return clk;
+
+err_provider:
+	/* TODO: The composite clock stuff will leak a bit here. */
+	clk_unregister(clk);
+err_register:
+	kfree(mux);
+err_mux:
+	kfree(gate);
+err_gate:
+	kfree(factors);
+err_factors:
+	return NULL;
+}
+
+void sunxi_factors_unregister(struct device_node *node, struct clk *clk)
+{
+	struct clk_hw *hw = __clk_get_hw(clk);
+	struct clk_factors *factors;
+	const char *name;
+
+	if (!hw)
+		return;
+
+	factors = to_clk_factors(hw);
+	name = clk_hw_get_name(hw);
+
+	of_clk_del_provider(node);
+	/* TODO: The composite clock stuff will leak a bit here. */
+	clk_unregister(clk);
+	kfree(factors->mux);
+	kfree(factors->gate);
+	kfree(factors);
 }

diff --git a/drivers/clk/sunxi/clk-factors.h b/drivers/clk/sunxi/clk-factors.h
index 171085a..1e63c5b 100644
--- a/drivers/clk/sunxi/clk-factors.h
+++ b/drivers/clk/sunxi/clk-factors.h

@@ -2,7 +2,6 @@
 #define __MACH_SUNXI_CLK_FACTORS_H
 
 #include <linux/clk-provider.h>
-#include <linux/clkdev.h>
 #include <linux/spinlock.h>
 
 #define SUNXI_FACTORS_NOT_APPLICABLE	(0)
@@ -19,21 +18,36 @@
 	u8 n_start;
 };
 
+struct factors_request {
+	unsigned long rate;
+	unsigned long parent_rate;
+	u8 parent_index;
+	u8 n;
+	u8 k;
+	u8 m;
+	u8 p;
+};
+
 struct factors_data {
 	int enable;
 	int mux;
 	int muxmask;
-	struct clk_factors_config *table;
-	void (*getter) (u32 *rate, u32 parent_rate, u8 *n, u8 *k, u8 *m, u8 *p);
+	const struct clk_factors_config *table;
+	void (*getter)(struct factors_request *req);
+	void (*recalc)(struct factors_request *req);
 	const char *name;
 };
 
 struct clk_factors {
 	struct clk_hw hw;
 	void __iomem *reg;
-	struct clk_factors_config *config;
-	void (*get_factors) (u32 *rate, u32 parent, u8 *n, u8 *k, u8 *m, u8 *p);
+	const struct clk_factors_config *config;
+	void (*get_factors)(struct factors_request *req);
+	void (*recalc)(struct factors_request *req);
 	spinlock_t *lock;
+	/* for cleanup */
+	struct clk_mux *mux;
+	struct clk_gate *gate;
 };
 
 struct clk *sunxi_factors_register(struct device_node *node,
@@ -41,4 +55,6 @@
 				   spinlock_t *lock,
 				   void __iomem *reg);
 
+void sunxi_factors_unregister(struct device_node *node, struct clk *clk);
+
 #endif

diff --git a/drivers/clk/sunxi/clk-mod0.c b/drivers/clk/sunxi/clk-mod0.c
index d167e1e..b38d71c 100644
--- a/drivers/clk/sunxi/clk-mod0.c
+++ b/drivers/clk/sunxi/clk-mod0.c

@@ -15,6 +15,7 @@
  */
 
 #include <linux/clk.h>
+#include <linux/clkdev.h>
 #include <linux/clk-provider.h>
 #include <linux/of_address.h>
 #include <linux/platform_device.h>
@@ -28,17 +29,16 @@
  * rate = (parent_rate >> p) / (m + 1);
  */
 
-static void sun4i_a10_get_mod0_factors(u32 *freq, u32 parent_rate,
-				       u8 *n, u8 *k, u8 *m, u8 *p)
+static void sun4i_a10_get_mod0_factors(struct factors_request *req)
 {
 	u8 div, calcm, calcp;
 
 	/* These clocks can only divide, so we will never be able to achieve
 	 * frequencies higher than the parent frequency */
-	if (*freq > parent_rate)
-		*freq = parent_rate;
+	if (req->rate > req->parent_rate)
+		req->rate = req->parent_rate;
 
-	div = DIV_ROUND_UP(parent_rate, *freq);
+	div = DIV_ROUND_UP(req->parent_rate, req->rate);
 
 	if (div < 16)
 		calcp = 0;
@@ -51,18 +51,13 @@
 
 	calcm = DIV_ROUND_UP(div, 1 << calcp);
 
-	*freq = (parent_rate >> calcp) / calcm;
-
-	/* we were called to round the frequency, we can now return */
-	if (n == NULL)
-		return;
-
-	*m = calcm - 1;
-	*p = calcp;
+	req->rate = (req->parent_rate >> calcp) / calcm;
+	req->m = calcm - 1;
+	req->p = calcp;
 }
 
 /* user manual says "n" but it's really "p" */
-static struct clk_factors_config sun4i_a10_mod0_config = {
+static const struct clk_factors_config sun4i_a10_mod0_config = {
 	.mshift = 0,
 	.mwidth = 4,
 	.pshift = 16,

diff --git a/drivers/clk/sunxi/clk-simple-gates.c b/drivers/clk/sunxi/clk-simple-gates.c
index f4da52b..a085c3b 100644
--- a/drivers/clk/sunxi/clk-simple-gates.c
+++ b/drivers/clk/sunxi/clk-simple-gates.c

@@ -98,6 +98,8 @@
 	sunxi_simple_gates_setup(node, NULL, 0);
 }
 
+CLK_OF_DECLARE(sun4i_a10_gates, "allwinner,sun4i-a10-gates-clk",
+	       sunxi_simple_gates_init);
 CLK_OF_DECLARE(sun4i_a10_apb0, "allwinner,sun4i-a10-apb0-gates-clk",
 	       sunxi_simple_gates_init);
 CLK_OF_DECLARE(sun4i_a10_apb1, "allwinner,sun4i-a10-apb1-gates-clk",
@@ -130,6 +132,8 @@
 	       sunxi_simple_gates_init);
 CLK_OF_DECLARE(sun8i_a33_ahb1, "allwinner,sun8i-a33-ahb1-gates-clk",
 	       sunxi_simple_gates_init);
+CLK_OF_DECLARE(sun8i_a83t_apb0, "allwinner,sun8i-a83t-apb0-gates-clk",
+	       sunxi_simple_gates_init);
 CLK_OF_DECLARE(sun9i_a80_ahb0, "allwinner,sun9i-a80-ahb0-gates-clk",
 	       sunxi_simple_gates_init);
 CLK_OF_DECLARE(sun9i_a80_ahb1, "allwinner,sun9i-a80-ahb1-gates-clk",

diff --git a/drivers/clk/sunxi/clk-sun6i-apb0-gates.c b/drivers/clk/sunxi/clk-sun6i-apb0-gates.c
index 23d042a..68021fa 100644
--- a/drivers/clk/sunxi/clk-sun6i-apb0-gates.c
+++ b/drivers/clk/sunxi/clk-sun6i-apb0-gates.c

@@ -9,7 +9,6 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clkdev.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -87,7 +86,6 @@
 						      clk_parent, 0, reg, i,
 						      0, NULL);
 		WARN_ON(IS_ERR(clk_data->clks[i]));
-		clk_register_clkdev(clk_data->clks[i], clk_name, NULL);
 
 		j++;
 	}

diff --git a/drivers/clk/sunxi/clk-sun6i-ar100.c b/drivers/clk/sunxi/clk-sun6i-ar100.c
index 2088768..84a187e 100644
--- a/drivers/clk/sunxi/clk-sun6i-ar100.c
+++ b/drivers/clk/sunxi/clk-sun6i-ar100.c

@@ -8,211 +8,97 @@
  *
  */
 
+#include <linux/bitops.h>
 #include <linux/clk-provider.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/spinlock.h>
 
-#define SUN6I_AR100_MAX_PARENTS		4
-#define SUN6I_AR100_SHIFT_MASK		0x3
-#define SUN6I_AR100_SHIFT_MAX		SUN6I_AR100_SHIFT_MASK
-#define SUN6I_AR100_SHIFT_SHIFT		4
-#define SUN6I_AR100_DIV_MASK		0x1f
-#define SUN6I_AR100_DIV_MAX		(SUN6I_AR100_DIV_MASK + 1)
-#define SUN6I_AR100_DIV_SHIFT		8
-#define SUN6I_AR100_MUX_MASK		0x3
-#define SUN6I_AR100_MUX_SHIFT		16
+#include "clk-factors.h"
 
-struct ar100_clk {
-	struct clk_hw hw;
-	void __iomem *reg;
-};
-
-static inline struct ar100_clk *to_ar100_clk(struct clk_hw *hw)
+/**
+ * sun6i_get_ar100_factors - Calculates factors p, m for AR100
+ *
+ * AR100 rate is calculated as follows
+ * rate = (parent_rate >> p) / (m + 1);
+ */
+static void sun6i_get_ar100_factors(struct factors_request *req)
 {
-	return container_of(hw, struct ar100_clk, hw);
-}
-
-static unsigned long ar100_recalc_rate(struct clk_hw *hw,
-				       unsigned long parent_rate)
-{
-	struct ar100_clk *clk = to_ar100_clk(hw);
-	u32 val = readl(clk->reg);
-	int shift = (val >> SUN6I_AR100_SHIFT_SHIFT) & SUN6I_AR100_SHIFT_MASK;
-	int div = (val >> SUN6I_AR100_DIV_SHIFT) & SUN6I_AR100_DIV_MASK;
-
-	return (parent_rate >> shift) / (div + 1);
-}
-
-static int ar100_determine_rate(struct clk_hw *hw,
-				struct clk_rate_request *req)
-{
-	int nparents = clk_hw_get_num_parents(hw);
-	long best_rate = -EINVAL;
-	int i;
-
-	req->best_parent_hw = NULL;
-
-	for (i = 0; i < nparents; i++) {
-		unsigned long parent_rate;
-		unsigned long tmp_rate;
-		struct clk_hw *parent;
-		unsigned long div;
-		int shift;
-
-		parent = clk_hw_get_parent_by_index(hw, i);
-		parent_rate = clk_hw_get_rate(parent);
-		div = DIV_ROUND_UP(parent_rate, req->rate);
-
-		/*
-		 * The AR100 clk contains 2 divisors:
-		 * - one power of 2 divisor
-		 * - one regular divisor
-		 *
-		 * First check if we can safely shift (or divide by a power
-		 * of 2) without losing precision on the requested rate.
-		 */
-		shift = ffs(div) - 1;
-		if (shift > SUN6I_AR100_SHIFT_MAX)
-			shift = SUN6I_AR100_SHIFT_MAX;
-
-		div >>= shift;
-
-		/*
-		 * Then if the divisor is still bigger than what the HW
-		 * actually supports, use a bigger shift (or power of 2
-		 * divider) value and accept to lose some precision.
-		 */
-		while (div > SUN6I_AR100_DIV_MAX) {
-			shift++;
-			div >>= 1;
-			if (shift > SUN6I_AR100_SHIFT_MAX)
-				break;
-		}
-
-		/*
-		 * If the shift value (or power of 2 divider) is bigger
-		 * than what the HW actually support, skip this parent.
-		 */
-		if (shift > SUN6I_AR100_SHIFT_MAX)
-			continue;
-
-		tmp_rate = (parent_rate >> shift) / div;
-		if (!req->best_parent_hw || tmp_rate > best_rate) {
-			req->best_parent_hw = parent;
-			req->best_parent_rate = parent_rate;
-			best_rate = tmp_rate;
-		}
-	}
-
-	if (best_rate < 0)
-		return best_rate;
-
-	req->rate = best_rate;
-
-	return 0;
-}
-
-static int ar100_set_parent(struct clk_hw *hw, u8 index)
-{
-	struct ar100_clk *clk = to_ar100_clk(hw);
-	u32 val = readl(clk->reg);
-
-	if (index >= SUN6I_AR100_MAX_PARENTS)
-		return -EINVAL;
-
-	val &= ~(SUN6I_AR100_MUX_MASK << SUN6I_AR100_MUX_SHIFT);
-	val |= (index << SUN6I_AR100_MUX_SHIFT);
-	writel(val, clk->reg);
-
-	return 0;
-}
-
-static u8 ar100_get_parent(struct clk_hw *hw)
-{
-	struct ar100_clk *clk = to_ar100_clk(hw);
-	return (readl(clk->reg) >> SUN6I_AR100_MUX_SHIFT) &
-	       SUN6I_AR100_MUX_MASK;
-}
-
-static int ar100_set_rate(struct clk_hw *hw, unsigned long rate,
-			  unsigned long parent_rate)
-{
-	unsigned long div = parent_rate / rate;
-	struct ar100_clk *clk = to_ar100_clk(hw);
-	u32 val = readl(clk->reg);
+	unsigned long div;
 	int shift;
 
-	if (parent_rate % rate)
-		return -EINVAL;
+	/* clock only divides */
+	if (req->rate > req->parent_rate)
+		req->rate = req->parent_rate;
 
-	shift = ffs(div) - 1;
-	if (shift > SUN6I_AR100_SHIFT_MAX)
-		shift = SUN6I_AR100_SHIFT_MAX;
+	div = DIV_ROUND_UP(req->parent_rate, req->rate);
+
+	if (div < 32)
+		shift = 0;
+	else if (div >> 1 < 32)
+		shift = 1;
+	else if (div >> 2 < 32)
+		shift = 2;
+	else
+		shift = 3;
 
 	div >>= shift;
 
-	if (div > SUN6I_AR100_DIV_MAX)
-		return -EINVAL;
+	if (div > 32)
+		div = 32;
 
-	val &= ~((SUN6I_AR100_SHIFT_MASK << SUN6I_AR100_SHIFT_SHIFT) |
-		 (SUN6I_AR100_DIV_MASK << SUN6I_AR100_DIV_SHIFT));
-	val |= (shift << SUN6I_AR100_SHIFT_SHIFT) |
-	       (div << SUN6I_AR100_DIV_SHIFT);
-	writel(val, clk->reg);
+	req->rate = (req->parent_rate >> shift) / div;
+	req->m = div - 1;
+	req->p = shift;
+}
+
+static const struct clk_factors_config sun6i_ar100_config = {
+	.mwidth = 5,
+	.mshift = 8,
+	.pwidth = 2,
+	.pshift = 4,
+};
+
+static const struct factors_data sun6i_ar100_data = {
+	.mux = 16,
+	.muxmask = GENMASK(1, 0),
+	.table = &sun6i_ar100_config,
+	.getter = sun6i_get_ar100_factors,
+};
+
+static DEFINE_SPINLOCK(sun6i_ar100_lock);
+
+static int sun6i_a31_ar100_clk_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct resource *r;
+	void __iomem *reg;
+	struct clk *clk;
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	reg = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(reg))
+		return PTR_ERR(reg);
+
+	clk = sunxi_factors_register(np, &sun6i_ar100_data, &sun6i_ar100_lock,
+				     reg);
+	if (!clk)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, clk);
 
 	return 0;
 }
 
-static struct clk_ops ar100_ops = {
-	.recalc_rate = ar100_recalc_rate,
-	.determine_rate = ar100_determine_rate,
-	.set_parent = ar100_set_parent,
-	.get_parent = ar100_get_parent,
-	.set_rate = ar100_set_rate,
-};
-
-static int sun6i_a31_ar100_clk_probe(struct platform_device *pdev)
+static int sun6i_a31_ar100_clk_remove(struct platform_device *pdev)
 {
-	const char *parents[SUN6I_AR100_MAX_PARENTS];
 	struct device_node *np = pdev->dev.of_node;
-	const char *clk_name = np->name;
-	struct clk_init_data init;
-	struct ar100_clk *ar100;
-	struct resource *r;
-	struct clk *clk;
-	int nparents;
+	struct clk *clk = platform_get_drvdata(pdev);
 
-	ar100 = devm_kzalloc(&pdev->dev, sizeof(*ar100), GFP_KERNEL);
-	if (!ar100)
-		return -ENOMEM;
+	sunxi_factors_unregister(np, clk);
 
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	ar100->reg = devm_ioremap_resource(&pdev->dev, r);
-	if (IS_ERR(ar100->reg))
-		return PTR_ERR(ar100->reg);
-
-	nparents = of_clk_get_parent_count(np);
-	if (nparents > SUN6I_AR100_MAX_PARENTS)
-		nparents = SUN6I_AR100_MAX_PARENTS;
-
-	of_clk_parent_fill(np, parents, nparents);
-
-	of_property_read_string(np, "clock-output-names", &clk_name);
-
-	init.name = clk_name;
-	init.ops = &ar100_ops;
-	init.parent_names = parents;
-	init.num_parents = nparents;
-	init.flags = 0;
-
-	ar100->hw.init = &init;
-
-	clk = clk_register(&pdev->dev, &ar100->hw);
-	if (IS_ERR(clk))
-		return PTR_ERR(clk);
-
-	return of_clk_add_provider(np, of_clk_src_simple_get, clk);
+	return 0;
 }
 
 static const struct of_device_id sun6i_a31_ar100_clk_dt_ids[] = {
@@ -227,6 +113,7 @@
 		.of_match_table = sun6i_a31_ar100_clk_dt_ids,
 	},
 	.probe = sun6i_a31_ar100_clk_probe,
+	.remove = sun6i_a31_ar100_clk_remove,
 };
 module_platform_driver(sun6i_a31_ar100_clk_driver);
 

diff --git a/drivers/clk/sunxi/clk-sun8i-apb0.c b/drivers/clk/sunxi/clk-sun8i-apb0.c
index 7ba6110..2ea61de 100644
--- a/drivers/clk/sunxi/clk-sun8i-apb0.c
+++ b/drivers/clk/sunxi/clk-sun8i-apb0.c

@@ -36,7 +36,7 @@
 
 	/* The A23 APB0 clock is a standard 2 bit wide divider clock */
 	clk = clk_register_divider(NULL, clk_name, clk_parent, 0, reg,
-				   0, 2, CLK_DIVIDER_POWER_OF_TWO, NULL);
+				   0, 2, 0, NULL);
 	if (IS_ERR(clk))
 		return clk;
 

diff --git a/drivers/clk/sunxi/clk-sun8i-bus-gates.c b/drivers/clk/sunxi/clk-sun8i-bus-gates.c
index e32d18b..63fdb79 100644
--- a/drivers/clk/sunxi/clk-sun8i-bus-gates.c
+++ b/drivers/clk/sunxi/clk-sun8i-bus-gates.c

@@ -17,7 +17,6 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
@@ -110,3 +109,5 @@
 
 CLK_OF_DECLARE(sun8i_h3_bus_gates, "allwinner,sun8i-h3-bus-gates-clk",
 	       sun8i_h3_bus_gates_init);
+CLK_OF_DECLARE(sun8i_a83t_bus_gates, "allwinner,sun8i-a83t-bus-gates-clk",
+	       sun8i_h3_bus_gates_init);

diff --git a/drivers/clk/sunxi/clk-sun8i-mbus.c b/drivers/clk/sunxi/clk-sun8i-mbus.c
index bf117a6..411d303 100644
--- a/drivers/clk/sunxi/clk-sun8i-mbus.c
+++ b/drivers/clk/sunxi/clk-sun8i-mbus.c

@@ -15,74 +15,106 @@
  */
 
 #include <linux/clk.h>
+#include <linux/clkdev.h>
 #include <linux/clk-provider.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
 #include <linux/of_address.h>
 
-#include "clk-factors.h"
-
-/**
- * sun8i_a23_get_mbus_factors() - calculates m factor for MBUS clocks
- * MBUS rate is calculated as follows
- * rate = parent_rate / (m + 1);
- */
-
-static void sun8i_a23_get_mbus_factors(u32 *freq, u32 parent_rate,
-				       u8 *n, u8 *k, u8 *m, u8 *p)
-{
-	u8 div;
-
-	/*
-	 * These clocks can only divide, so we will never be able to
-	 * achieve frequencies higher than the parent frequency
-	 */
-	if (*freq > parent_rate)
-		*freq = parent_rate;
-
-	div = DIV_ROUND_UP(parent_rate, *freq);
-
-	if (div > 8)
-		div = 8;
-
-	*freq = parent_rate / div;
-
-	/* we were called to round the frequency, we can now return */
-	if (m == NULL)
-		return;
-
-	*m = div - 1;
-}
-
-static struct clk_factors_config sun8i_a23_mbus_config = {
-	.mshift = 0,
-	.mwidth = 3,
-};
-
-static const struct factors_data sun8i_a23_mbus_data __initconst = {
-	.enable = 31,
-	.mux = 24,
-	.muxmask = BIT(1) | BIT(0),
-	.table = &sun8i_a23_mbus_config,
-	.getter = sun8i_a23_get_mbus_factors,
-};
+#define SUN8I_MBUS_ENABLE	31
+#define SUN8I_MBUS_MUX_SHIFT	24
+#define SUN8I_MBUS_MUX_MASK	0x3
+#define SUN8I_MBUS_DIV_SHIFT	0
+#define SUN8I_MBUS_DIV_WIDTH	3
+#define SUN8I_MBUS_MAX_PARENTS	4
 
 static DEFINE_SPINLOCK(sun8i_a23_mbus_lock);
 
 static void __init sun8i_a23_mbus_setup(struct device_node *node)
 {
-	struct clk *mbus;
+	int num_parents = of_clk_get_parent_count(node);
+	const char **parents;
+	const char *clk_name = node->name;
+	struct resource res;
+	struct clk_divider *div;
+	struct clk_gate *gate;
+	struct clk_mux *mux;
+	struct clk *clk;
 	void __iomem *reg;
+	int err;
 
-	reg = of_iomap(node, 0);
-	if (!reg) {
-		pr_err("Could not get registers for a23-mbus-clk\n");
+	parents = kcalloc(num_parents, sizeof(*parents), GFP_KERNEL);
+	if (!parents)
 		return;
+
+	reg = of_io_request_and_map(node, 0, of_node_full_name(node));
+	if (!reg) {
+		pr_err("Could not get registers for sun8i-mbus-clk\n");
+		goto err_free_parents;
 	}
 
-	mbus = sunxi_factors_register(node, &sun8i_a23_mbus_data,
-				      &sun8i_a23_mbus_lock, reg);
+	div = kzalloc(sizeof(*div), GFP_KERNEL);
+	if (!div)
+		goto err_unmap;
 
+	mux = kzalloc(sizeof(*mux), GFP_KERNEL);
+	if (!mux)
+		goto err_free_div;
+
+	gate = kzalloc(sizeof(*gate), GFP_KERNEL);
+	if (!gate)
+		goto err_free_mux;
+
+	of_property_read_string(node, "clock-output-names", &clk_name);
+	of_clk_parent_fill(node, parents, num_parents);
+
+	gate->reg = reg;
+	gate->bit_idx = SUN8I_MBUS_ENABLE;
+	gate->lock = &sun8i_a23_mbus_lock;
+
+	div->reg = reg;
+	div->shift = SUN8I_MBUS_DIV_SHIFT;
+	div->width = SUN8I_MBUS_DIV_WIDTH;
+	div->lock = &sun8i_a23_mbus_lock;
+
+	mux->reg = reg;
+	mux->shift = SUN8I_MBUS_MUX_SHIFT;
+	mux->mask = SUN8I_MBUS_MUX_MASK;
+	mux->lock = &sun8i_a23_mbus_lock;
+
+	clk = clk_register_composite(NULL, clk_name, parents, num_parents,
+				     &mux->hw, &clk_mux_ops,
+				     &div->hw, &clk_divider_ops,
+				     &gate->hw, &clk_gate_ops,
+				     0);
+	if (IS_ERR(clk))
+		goto err_free_gate;
+
+	err = of_clk_add_provider(node, of_clk_src_simple_get, clk);
+	if (err)
+		goto err_unregister_clk;
+
+	kfree(parents); /* parents is deep copied */
 	/* The MBUS clocks needs to be always enabled */
-	__clk_get(mbus);
-	clk_prepare_enable(mbus);
+	__clk_get(clk);
+	clk_prepare_enable(clk);
+
+	return;
+
+err_unregister_clk:
+	/* TODO: The composite clock stuff will leak a bit here. */
+	clk_unregister(clk);
+err_free_gate:
+	kfree(gate);
+err_free_mux:
+	kfree(mux);
+err_free_div:
+	kfree(div);
+err_unmap:
+	iounmap(reg);
+	of_address_to_resource(node, 0, &res);
+	release_mem_region(res.start, resource_size(&res));
+err_free_parents:
+	kfree(parents);
 }
 CLK_OF_DECLARE(sun8i_a23_mbus, "allwinner,sun8i-a23-mbus-clk", sun8i_a23_mbus_setup);

diff --git a/drivers/clk/sunxi/clk-sun9i-core.c b/drivers/clk/sunxi/clk-sun9i-core.c
index 6c4c983..43f014f 100644
--- a/drivers/clk/sunxi/clk-sun9i-core.c
+++ b/drivers/clk/sunxi/clk-sun9i-core.c

@@ -15,6 +15,7 @@
  */
 
 #include <linux/clk.h>
+#include <linux/clkdev.h>
 #include <linux/clk-provider.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
@@ -32,15 +33,14 @@
  * p and m are named div1 and div2 in Allwinner's SDK
  */
 
-static void sun9i_a80_get_pll4_factors(u32 *freq, u32 parent_rate,
-				       u8 *n_ret, u8 *k, u8 *m_ret, u8 *p_ret)
+static void sun9i_a80_get_pll4_factors(struct factors_request *req)
 {
 	int n;
 	int m = 1;
 	int p = 1;
 
 	/* Normalize value to a 6 MHz multiple (24 MHz / 4) */
-	n = DIV_ROUND_UP(*freq, 6000000);
+	n = DIV_ROUND_UP(req->rate, 6000000);
 
 	/* If n is too large switch to steps of 12 MHz */
 	if (n > 255) {
@@ -60,18 +60,13 @@
 	else if (n < 12)
 		n = 12;
 
-	*freq = ((24000000 * n) >> p) / (m + 1);
-
-	/* we were called to round the frequency, we can now return */
-	if (n_ret == NULL)
-		return;
-
-	*n_ret = n;
-	*m_ret = m;
-	*p_ret = p;
+	req->rate = ((24000000 * n) >> p) / (m + 1);
+	req->n = n;
+	req->m = m;
+	req->p = p;
 }
 
-static struct clk_factors_config sun9i_a80_pll4_config = {
+static const struct clk_factors_config sun9i_a80_pll4_config = {
 	.mshift = 18,
 	.mwidth = 1,
 	.nshift = 8,
@@ -111,30 +106,24 @@
  * rate = parent_rate / (m + 1);
  */
 
-static void sun9i_a80_get_gt_factors(u32 *freq, u32 parent_rate,
-				     u8 *n, u8 *k, u8 *m, u8 *p)
+static void sun9i_a80_get_gt_factors(struct factors_request *req)
 {
 	u32 div;
 
-	if (parent_rate < *freq)
-		*freq = parent_rate;
+	if (req->parent_rate < req->rate)
+		req->rate = req->parent_rate;
 
-	div = DIV_ROUND_UP(parent_rate, *freq);
+	div = DIV_ROUND_UP(req->parent_rate, req->rate);
 
 	/* maximum divider is 4 */
 	if (div > 4)
 		div = 4;
 
-	*freq = parent_rate / div;
-
-	/* we were called to round the frequency, we can now return */
-	if (!m)
-		return;
-
-	*m = div;
+	req->rate = req->parent_rate / div;
+	req->m = div;
 }
 
-static struct clk_factors_config sun9i_a80_gt_config = {
+static const struct clk_factors_config sun9i_a80_gt_config = {
 	.mshift = 0,
 	.mwidth = 2,
 };
@@ -176,30 +165,24 @@
  * rate = parent_rate >> p;
  */
 
-static void sun9i_a80_get_ahb_factors(u32 *freq, u32 parent_rate,
-				      u8 *n, u8 *k, u8 *m, u8 *p)
+static void sun9i_a80_get_ahb_factors(struct factors_request *req)
 {
 	u32 _p;
 
-	if (parent_rate < *freq)
-		*freq = parent_rate;
+	if (req->parent_rate < req->rate)
+		req->rate = req->parent_rate;
 
-	_p = order_base_2(DIV_ROUND_UP(parent_rate, *freq));
+	_p = order_base_2(DIV_ROUND_UP(req->parent_rate, req->rate));
 
 	/* maximum p is 3 */
 	if (_p > 3)
 		_p = 3;
 
-	*freq = parent_rate >> _p;
-
-	/* we were called to round the frequency, we can now return */
-	if (!p)
-		return;
-
-	*p = _p;
+	req->rate = req->parent_rate >> _p;
+	req->p = _p;
 }
 
-static struct clk_factors_config sun9i_a80_ahb_config = {
+static const struct clk_factors_config sun9i_a80_ahb_config = {
 	.pshift = 0,
 	.pwidth = 2,
 };
@@ -262,34 +245,25 @@
  * rate = (parent_rate >> p) / (m + 1);
  */
 
-static void sun9i_a80_get_apb1_factors(u32 *freq, u32 parent_rate,
-				       u8 *n, u8 *k, u8 *m, u8 *p)
+static void sun9i_a80_get_apb1_factors(struct factors_request *req)
 {
 	u32 div;
-	u8 calcm, calcp;
 
-	if (parent_rate < *freq)
-		*freq = parent_rate;
+	if (req->parent_rate < req->rate)
+		req->rate = req->parent_rate;
 
-	div = DIV_ROUND_UP(parent_rate, *freq);
+	div = DIV_ROUND_UP(req->parent_rate, req->rate);
 
 	/* Highest possible divider is 256 (p = 3, m = 31) */
 	if (div > 256)
 		div = 256;
 
-	calcp = order_base_2(div);
-	calcm = (parent_rate >> calcp) - 1;
-	*freq = (parent_rate >> calcp) / (calcm + 1);
-
-	/* we were called to round the frequency, we can now return */
-	if (n == NULL)
-		return;
-
-	*m = calcm;
-	*p = calcp;
+	req->p = order_base_2(div);
+	req->m = (req->parent_rate >> req->p) - 1;
+	req->rate = (req->parent_rate >> req->p) / (req->m + 1);
 }
 
-static struct clk_factors_config sun9i_a80_apb1_config = {
+static const struct clk_factors_config sun9i_a80_apb1_config = {
 	.mshift = 0,
 	.mwidth = 5,
 	.pshift = 16,

diff --git a/drivers/clk/sunxi/clk-sun9i-mmc.c b/drivers/clk/sunxi/clk-sun9i-mmc.c
index a9b1761..028dd83 100644
--- a/drivers/clk/sunxi/clk-sun9i-mmc.c
+++ b/drivers/clk/sunxi/clk-sun9i-mmc.c

@@ -83,7 +83,7 @@
 	return 0;
 }
 
-static struct reset_control_ops sun9i_mmc_reset_ops = {
+static const struct reset_control_ops sun9i_mmc_reset_ops = {
 	.assert		= sun9i_mmc_reset_assert,
 	.deassert	= sun9i_mmc_reset_deassert,
 };

diff --git a/drivers/clk/sunxi/clk-sunxi.c b/drivers/clk/sunxi/clk-sunxi.c
index 5ba2188..91de0a0 100644
--- a/drivers/clk/sunxi/clk-sunxi.c
+++ b/drivers/clk/sunxi/clk-sunxi.c

@@ -28,60 +28,272 @@
 
 static DEFINE_SPINLOCK(clk_lock);
 
+/* Maximum number of parents our clocks have */
+#define SUNXI_MAX_PARENTS	5
+
 /**
- * sun6i_a31_ahb1_clk_setup() - Setup function for a31 ahb1 composite clk
+ * sun4i_get_pll1_factors() - calculates n, k, m, p factors for PLL1
+ * PLL1 rate is calculated as follows
+ * rate = (parent_rate * n * (k + 1) >> p) / (m + 1);
+ * parent_rate is always 24Mhz
  */
 
-#define SUN6I_AHB1_MAX_PARENTS		4
-#define SUN6I_AHB1_MUX_PARENT_PLL6	3
-#define SUN6I_AHB1_MUX_SHIFT		12
-/* un-shifted mask is what mux_clk expects */
-#define SUN6I_AHB1_MUX_MASK		0x3
-#define SUN6I_AHB1_MUX_GET_PARENT(reg)	((reg >> SUN6I_AHB1_MUX_SHIFT) & \
-					 SUN6I_AHB1_MUX_MASK)
-
-#define SUN6I_AHB1_DIV_SHIFT		4
-#define SUN6I_AHB1_DIV_MASK		(0x3 << SUN6I_AHB1_DIV_SHIFT)
-#define SUN6I_AHB1_DIV_GET(reg)		((reg & SUN6I_AHB1_DIV_MASK) >> \
-						SUN6I_AHB1_DIV_SHIFT)
-#define SUN6I_AHB1_DIV_SET(reg, div)	((reg & ~SUN6I_AHB1_DIV_MASK) | \
-						(div << SUN6I_AHB1_DIV_SHIFT))
-#define SUN6I_AHB1_PLL6_DIV_SHIFT	6
-#define SUN6I_AHB1_PLL6_DIV_MASK	(0x3 << SUN6I_AHB1_PLL6_DIV_SHIFT)
-#define SUN6I_AHB1_PLL6_DIV_GET(reg)	((reg & SUN6I_AHB1_PLL6_DIV_MASK) >> \
-						SUN6I_AHB1_PLL6_DIV_SHIFT)
-#define SUN6I_AHB1_PLL6_DIV_SET(reg, div) ((reg & ~SUN6I_AHB1_PLL6_DIV_MASK) | \
-						(div << SUN6I_AHB1_PLL6_DIV_SHIFT))
-
-struct sun6i_ahb1_clk {
-	struct clk_hw hw;
-	void __iomem *reg;
-};
-
-#define to_sun6i_ahb1_clk(_hw) container_of(_hw, struct sun6i_ahb1_clk, hw)
-
-static unsigned long sun6i_ahb1_clk_recalc_rate(struct clk_hw *hw,
-						unsigned long parent_rate)
+static void sun4i_get_pll1_factors(struct factors_request *req)
 {
-	struct sun6i_ahb1_clk *ahb1 = to_sun6i_ahb1_clk(hw);
-	unsigned long rate;
-	u32 reg;
+	u8 div;
 
-	/* Fetch the register value */
-	reg = readl(ahb1->reg);
+	/* Normalize value to a 6M multiple */
+	div = req->rate / 6000000;
+	req->rate = 6000000 * div;
 
-	/* apply pre-divider first if parent is pll6 */
-	if (SUN6I_AHB1_MUX_GET_PARENT(reg) == SUN6I_AHB1_MUX_PARENT_PLL6)
-		parent_rate /= SUN6I_AHB1_PLL6_DIV_GET(reg) + 1;
+	/* m is always zero for pll1 */
+	req->m = 0;
 
-	/* clk divider */
-	rate = parent_rate >> SUN6I_AHB1_DIV_GET(reg);
+	/* k is 1 only on these cases */
+	if (req->rate >= 768000000 || req->rate == 42000000 ||
+			req->rate == 54000000)
+		req->k = 1;
+	else
+		req->k = 0;
 
-	return rate;
+	/* p will be 3 for divs under 10 */
+	if (div < 10)
+		req->p = 3;
+
+	/* p will be 2 for divs between 10 - 20 and odd divs under 32 */
+	else if (div < 20 || (div < 32 && (div & 1)))
+		req->p = 2;
+
+	/* p will be 1 for even divs under 32, divs under 40 and odd pairs
+	 * of divs between 40-62 */
+	else if (div < 40 || (div < 64 && (div & 2)))
+		req->p = 1;
+
+	/* any other entries have p = 0 */
+	else
+		req->p = 0;
+
+	/* calculate a suitable n based on k and p */
+	div <<= req->p;
+	div /= (req->k + 1);
+	req->n = div / 4;
 }
 
-static long sun6i_ahb1_clk_round(unsigned long rate, u8 *divp, u8 *pre_divp,
-				 u8 parent, unsigned long parent_rate)
+/**
+ * sun6i_a31_get_pll1_factors() - calculates n, k and m factors for PLL1
+ * PLL1 rate is calculated as follows
+ * rate = parent_rate * (n + 1) * (k + 1) / (m + 1);
+ * parent_rate should always be 24MHz
+ */
+static void sun6i_a31_get_pll1_factors(struct factors_request *req)
+{
+	/*
+	 * We can operate only on MHz, this will make our life easier
+	 * later.
+	 */
+	u32 freq_mhz = req->rate / 1000000;
+	u32 parent_freq_mhz = req->parent_rate / 1000000;
+
+	/*
+	 * Round down the frequency to the closest multiple of either
+	 * 6 or 16
+	 */
+	u32 round_freq_6 = round_down(freq_mhz, 6);
+	u32 round_freq_16 = round_down(freq_mhz, 16);
+
+	if (round_freq_6 > round_freq_16)
+		freq_mhz = round_freq_6;
+	else
+		freq_mhz = round_freq_16;
+
+	req->rate = freq_mhz * 1000000;
+
+	/* If the frequency is a multiple of 32 MHz, k is always 3 */
+	if (!(freq_mhz % 32))
+		req->k = 3;
+	/* If the frequency is a multiple of 9 MHz, k is always 2 */
+	else if (!(freq_mhz % 9))
+		req->k = 2;
+	/* If the frequency is a multiple of 8 MHz, k is always 1 */
+	else if (!(freq_mhz % 8))
+		req->k = 1;
+	/* Otherwise, we don't use the k factor */
+	else
+		req->k = 0;
+
+	/*
+	 * If the frequency is a multiple of 2 but not a multiple of
+	 * 3, m is 3. This is the first time we use 6 here, yet we
+	 * will use it on several other places.
+	 * We use this number because it's the lowest frequency we can
+	 * generate (with n = 0, k = 0, m = 3), so every other frequency
+	 * somehow relates to this frequency.
+	 */
+	if ((freq_mhz % 6) == 2 || (freq_mhz % 6) == 4)
+		req->m = 2;
+	/*
+	 * If the frequency is a multiple of 6MHz, but the factor is
+	 * odd, m will be 3
+	 */
+	else if ((freq_mhz / 6) & 1)
+		req->m = 3;
+	/* Otherwise, we end up with m = 1 */
+	else
+		req->m = 1;
+
+	/* Calculate n thanks to the above factors we already got */
+	req->n = freq_mhz * (req->m + 1) / ((req->k + 1) * parent_freq_mhz)
+		 - 1;
+
+	/*
+	 * If n end up being outbound, and that we can still decrease
+	 * m, do it.
+	 */
+	if ((req->n + 1) > 31 && (req->m + 1) > 1) {
+		req->n = (req->n + 1) / 2 - 1;
+		req->m = (req->m + 1) / 2 - 1;
+	}
+}
+
+/**
+ * sun8i_a23_get_pll1_factors() - calculates n, k, m, p factors for PLL1
+ * PLL1 rate is calculated as follows
+ * rate = (parent_rate * (n + 1) * (k + 1) >> p) / (m + 1);
+ * parent_rate is always 24Mhz
+ */
+
+static void sun8i_a23_get_pll1_factors(struct factors_request *req)
+{
+	u8 div;
+
+	/* Normalize value to a 6M multiple */
+	div = req->rate / 6000000;
+	req->rate = 6000000 * div;
+
+	/* m is always zero for pll1 */
+	req->m = 0;
+
+	/* k is 1 only on these cases */
+	if (req->rate >= 768000000 || req->rate == 42000000 ||
+			req->rate == 54000000)
+		req->k = 1;
+	else
+		req->k = 0;
+
+	/* p will be 2 for divs under 20 and odd divs under 32 */
+	if (div < 20 || (div < 32 && (div & 1)))
+		req->p = 2;
+
+	/* p will be 1 for even divs under 32, divs under 40 and odd pairs
+	 * of divs between 40-62 */
+	else if (div < 40 || (div < 64 && (div & 2)))
+		req->p = 1;
+
+	/* any other entries have p = 0 */
+	else
+		req->p = 0;
+
+	/* calculate a suitable n based on k and p */
+	div <<= req->p;
+	div /= (req->k + 1);
+	req->n = div / 4 - 1;
+}
+
+/**
+ * sun4i_get_pll5_factors() - calculates n, k factors for PLL5
+ * PLL5 rate is calculated as follows
+ * rate = parent_rate * n * (k + 1)
+ * parent_rate is always 24Mhz
+ */
+
+static void sun4i_get_pll5_factors(struct factors_request *req)
+{
+	u8 div;
+
+	/* Normalize value to a parent_rate multiple (24M) */
+	div = req->rate / req->parent_rate;
+	req->rate = req->parent_rate * div;
+
+	if (div < 31)
+		req->k = 0;
+	else if (div / 2 < 31)
+		req->k = 1;
+	else if (div / 3 < 31)
+		req->k = 2;
+	else
+		req->k = 3;
+
+	req->n = DIV_ROUND_UP(div, (req->k + 1));
+}
+
+/**
+ * sun6i_a31_get_pll6_factors() - calculates n, k factors for A31 PLL6x2
+ * PLL6x2 rate is calculated as follows
+ * rate = parent_rate * (n + 1) * (k + 1)
+ * parent_rate is always 24Mhz
+ */
+
+static void sun6i_a31_get_pll6_factors(struct factors_request *req)
+{
+	u8 div;
+
+	/* Normalize value to a parent_rate multiple (24M) */
+	div = req->rate / req->parent_rate;
+	req->rate = req->parent_rate * div;
+
+	req->k = div / 32;
+	if (req->k > 3)
+		req->k = 3;
+
+	req->n = DIV_ROUND_UP(div, (req->k + 1)) - 1;
+}
+
+/**
+ * sun5i_a13_get_ahb_factors() - calculates m, p factors for AHB
+ * AHB rate is calculated as follows
+ * rate = parent_rate >> p
+ */
+
+static void sun5i_a13_get_ahb_factors(struct factors_request *req)
+{
+	u32 div;
+
+	/* divide only */
+	if (req->parent_rate < req->rate)
+		req->rate = req->parent_rate;
+
+	/*
+	 * user manual says valid speed is 8k ~ 276M, but tests show it
+	 * can work at speeds up to 300M, just after reparenting to pll6
+	 */
+	if (req->rate < 8000)
+		req->rate = 8000;
+	if (req->rate > 300000000)
+		req->rate = 300000000;
+
+	div = order_base_2(DIV_ROUND_UP(req->parent_rate, req->rate));
+
+	/* p = 0 ~ 3 */
+	if (div > 3)
+		div = 3;
+
+	req->rate = req->parent_rate >> div;
+
+	req->p = div;
+}
+
+#define SUN6I_AHB1_PARENT_PLL6	3
+
+/**
+ * sun6i_a31_get_ahb_factors() - calculates m, p factors for AHB
+ * AHB rate is calculated as follows
+ * rate = parent_rate >> p
+ *
+ * if parent is pll6, then
+ * parent_rate = pll6 rate / (m + 1)
+ */
+
+static void sun6i_get_ahb1_factors(struct factors_request *req)
 {
 	u8 div, calcp, calcm = 1;
 
@@ -89,13 +301,13 @@
 	 * clock can only divide, so we will never be able to achieve
 	 * frequencies higher than the parent frequency
 	 */
-	if (parent_rate && rate > parent_rate)
-		rate = parent_rate;
+	if (req->parent_rate && req->rate > req->parent_rate)
+		req->rate = req->parent_rate;
 
-	div = DIV_ROUND_UP(parent_rate, rate);
+	div = DIV_ROUND_UP(req->parent_rate, req->rate);
 
 	/* calculate pre-divider if parent is pll6 */
-	if (parent == SUN6I_AHB1_MUX_PARENT_PLL6) {
+	if (req->parent_index == SUN6I_AHB1_PARENT_PLL6) {
 		if (div < 4)
 			calcp = 0;
 		else if (div / 2 < 4)
@@ -111,414 +323,25 @@
 		calcp = calcp > 3 ? 3 : calcp;
 	}
 
-	/* we were asked to pass back divider values */
-	if (divp) {
-		*divp = calcp;
-		*pre_divp = calcm - 1;
-	}
-
-	return (parent_rate / calcm) >> calcp;
-}
-
-static int sun6i_ahb1_clk_determine_rate(struct clk_hw *hw,
-					 struct clk_rate_request *req)
-{
-	struct clk_hw *parent, *best_parent = NULL;
-	int i, num_parents;
-	unsigned long parent_rate, best = 0, child_rate, best_child_rate = 0;
-
-	/* find the parent that can help provide the fastest rate <= rate */
-	num_parents = clk_hw_get_num_parents(hw);
-	for (i = 0; i < num_parents; i++) {
-		parent = clk_hw_get_parent_by_index(hw, i);
-		if (!parent)
-			continue;
-		if (clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT)
-			parent_rate = clk_hw_round_rate(parent, req->rate);
-		else
-			parent_rate = clk_hw_get_rate(parent);
-
-		child_rate = sun6i_ahb1_clk_round(req->rate, NULL, NULL, i,
-						  parent_rate);
-
-		if (child_rate <= req->rate && child_rate > best_child_rate) {
-			best_parent = parent;
-			best = parent_rate;
-			best_child_rate = child_rate;
-		}
-	}
-
-	if (!best_parent)
-		return -EINVAL;
-
-	req->best_parent_hw = best_parent;
-	req->best_parent_rate = best;
-	req->rate = best_child_rate;
-
-	return 0;
-}
-
-static int sun6i_ahb1_clk_set_rate(struct clk_hw *hw, unsigned long rate,
-				   unsigned long parent_rate)
-{
-	struct sun6i_ahb1_clk *ahb1 = to_sun6i_ahb1_clk(hw);
-	unsigned long flags;
-	u8 div, pre_div, parent;
-	u32 reg;
-
-	spin_lock_irqsave(&clk_lock, flags);
-
-	reg = readl(ahb1->reg);
-
-	/* need to know which parent is used to apply pre-divider */
-	parent = SUN6I_AHB1_MUX_GET_PARENT(reg);
-	sun6i_ahb1_clk_round(rate, &div, &pre_div, parent, parent_rate);
-
-	reg = SUN6I_AHB1_DIV_SET(reg, div);
-	reg = SUN6I_AHB1_PLL6_DIV_SET(reg, pre_div);
-	writel(reg, ahb1->reg);
-
-	spin_unlock_irqrestore(&clk_lock, flags);
-
-	return 0;
-}
-
-static const struct clk_ops sun6i_ahb1_clk_ops = {
-	.determine_rate	= sun6i_ahb1_clk_determine_rate,
-	.recalc_rate	= sun6i_ahb1_clk_recalc_rate,
-	.set_rate	= sun6i_ahb1_clk_set_rate,
-};
-
-static void __init sun6i_ahb1_clk_setup(struct device_node *node)
-{
-	struct clk *clk;
-	struct sun6i_ahb1_clk *ahb1;
-	struct clk_mux *mux;
-	const char *clk_name = node->name;
-	const char *parents[SUN6I_AHB1_MAX_PARENTS];
-	void __iomem *reg;
-	int i;
-
-	reg = of_io_request_and_map(node, 0, of_node_full_name(node));
-	if (IS_ERR(reg))
-		return;
-
-	/* we have a mux, we will have >1 parents */
-	i = of_clk_parent_fill(node, parents, SUN6I_AHB1_MAX_PARENTS);
-	of_property_read_string(node, "clock-output-names", &clk_name);
-
-	ahb1 = kzalloc(sizeof(struct sun6i_ahb1_clk), GFP_KERNEL);
-	if (!ahb1)
-		return;
-
-	mux = kzalloc(sizeof(struct clk_mux), GFP_KERNEL);
-	if (!mux) {
-		kfree(ahb1);
-		return;
-	}
-
-	/* set up clock properties */
-	mux->reg = reg;
-	mux->shift = SUN6I_AHB1_MUX_SHIFT;
-	mux->mask = SUN6I_AHB1_MUX_MASK;
-	mux->lock = &clk_lock;
-	ahb1->reg = reg;
-
-	clk = clk_register_composite(NULL, clk_name, parents, i,
-				     &mux->hw, &clk_mux_ops,
-				     &ahb1->hw, &sun6i_ahb1_clk_ops,
-				     NULL, NULL, 0);
-
-	if (!IS_ERR(clk)) {
-		of_clk_add_provider(node, of_clk_src_simple_get, clk);
-		clk_register_clkdev(clk, clk_name, NULL);
-	}
-}
-CLK_OF_DECLARE(sun6i_a31_ahb1, "allwinner,sun6i-a31-ahb1-clk", sun6i_ahb1_clk_setup);
-
-/* Maximum number of parents our clocks have */
-#define SUNXI_MAX_PARENTS	5
-
-/**
- * sun4i_get_pll1_factors() - calculates n, k, m, p factors for PLL1
- * PLL1 rate is calculated as follows
- * rate = (parent_rate * n * (k + 1) >> p) / (m + 1);
- * parent_rate is always 24Mhz
- */
-
-static void sun4i_get_pll1_factors(u32 *freq, u32 parent_rate,
-				   u8 *n, u8 *k, u8 *m, u8 *p)
-{
-	u8 div;
-
-	/* Normalize value to a 6M multiple */
-	div = *freq / 6000000;
-	*freq = 6000000 * div;
-
-	/* we were called to round the frequency, we can now return */
-	if (n == NULL)
-		return;
-
-	/* m is always zero for pll1 */
-	*m = 0;
-
-	/* k is 1 only on these cases */
-	if (*freq >= 768000000 || *freq == 42000000 || *freq == 54000000)
-		*k = 1;
-	else
-		*k = 0;
-
-	/* p will be 3 for divs under 10 */
-	if (div < 10)
-		*p = 3;
-
-	/* p will be 2 for divs between 10 - 20 and odd divs under 32 */
-	else if (div < 20 || (div < 32 && (div & 1)))
-		*p = 2;
-
-	/* p will be 1 for even divs under 32, divs under 40 and odd pairs
-	 * of divs between 40-62 */
-	else if (div < 40 || (div < 64 && (div & 2)))
-		*p = 1;
-
-	/* any other entries have p = 0 */
-	else
-		*p = 0;
-
-	/* calculate a suitable n based on k and p */
-	div <<= *p;
-	div /= (*k + 1);
-	*n = div / 4;
+	req->rate = (req->parent_rate / calcm) >> calcp;
+	req->p = calcp;
+	req->m = calcm - 1;
 }
 
 /**
- * sun6i_a31_get_pll1_factors() - calculates n, k and m factors for PLL1
- * PLL1 rate is calculated as follows
- * rate = parent_rate * (n + 1) * (k + 1) / (m + 1);
- * parent_rate should always be 24MHz
+ * sun6i_ahb1_recalc() - calculates AHB clock rate from m, p factors and
+ *			 parent index
  */
-static void sun6i_a31_get_pll1_factors(u32 *freq, u32 parent_rate,
-				       u8 *n, u8 *k, u8 *m, u8 *p)
+static void sun6i_ahb1_recalc(struct factors_request *req)
 {
-	/*
-	 * We can operate only on MHz, this will make our life easier
-	 * later.
-	 */
-	u32 freq_mhz = *freq / 1000000;
-	u32 parent_freq_mhz = parent_rate / 1000000;
+	req->rate = req->parent_rate;
 
-	/*
-	 * Round down the frequency to the closest multiple of either
-	 * 6 or 16
-	 */
-	u32 round_freq_6 = round_down(freq_mhz, 6);
-	u32 round_freq_16 = round_down(freq_mhz, 16);
+	/* apply pre-divider first if parent is pll6 */
+	if (req->parent_index == SUN6I_AHB1_PARENT_PLL6)
+		req->rate /= req->m + 1;
 
-	if (round_freq_6 > round_freq_16)
-		freq_mhz = round_freq_6;
-	else
-		freq_mhz = round_freq_16;
-
-	*freq = freq_mhz * 1000000;
-
-	/*
-	 * If the factors pointer are null, we were just called to
-	 * round down the frequency.
-	 * Exit.
-	 */
-	if (n == NULL)
-		return;
-
-	/* If the frequency is a multiple of 32 MHz, k is always 3 */
-	if (!(freq_mhz % 32))
-		*k = 3;
-	/* If the frequency is a multiple of 9 MHz, k is always 2 */
-	else if (!(freq_mhz % 9))
-		*k = 2;
-	/* If the frequency is a multiple of 8 MHz, k is always 1 */
-	else if (!(freq_mhz % 8))
-		*k = 1;
-	/* Otherwise, we don't use the k factor */
-	else
-		*k = 0;
-
-	/*
-	 * If the frequency is a multiple of 2 but not a multiple of
-	 * 3, m is 3. This is the first time we use 6 here, yet we
-	 * will use it on several other places.
-	 * We use this number because it's the lowest frequency we can
-	 * generate (with n = 0, k = 0, m = 3), so every other frequency
-	 * somehow relates to this frequency.
-	 */
-	if ((freq_mhz % 6) == 2 || (freq_mhz % 6) == 4)
-		*m = 2;
-	/*
-	 * If the frequency is a multiple of 6MHz, but the factor is
-	 * odd, m will be 3
-	 */
-	else if ((freq_mhz / 6) & 1)
-		*m = 3;
-	/* Otherwise, we end up with m = 1 */
-	else
-		*m = 1;
-
-	/* Calculate n thanks to the above factors we already got */
-	*n = freq_mhz * (*m + 1) / ((*k + 1) * parent_freq_mhz) - 1;
-
-	/*
-	 * If n end up being outbound, and that we can still decrease
-	 * m, do it.
-	 */
-	if ((*n + 1) > 31 && (*m + 1) > 1) {
-		*n = (*n + 1) / 2 - 1;
-		*m = (*m + 1) / 2 - 1;
-	}
-}
-
-/**
- * sun8i_a23_get_pll1_factors() - calculates n, k, m, p factors for PLL1
- * PLL1 rate is calculated as follows
- * rate = (parent_rate * (n + 1) * (k + 1) >> p) / (m + 1);
- * parent_rate is always 24Mhz
- */
-
-static void sun8i_a23_get_pll1_factors(u32 *freq, u32 parent_rate,
-				   u8 *n, u8 *k, u8 *m, u8 *p)
-{
-	u8 div;
-
-	/* Normalize value to a 6M multiple */
-	div = *freq / 6000000;
-	*freq = 6000000 * div;
-
-	/* we were called to round the frequency, we can now return */
-	if (n == NULL)
-		return;
-
-	/* m is always zero for pll1 */
-	*m = 0;
-
-	/* k is 1 only on these cases */
-	if (*freq >= 768000000 || *freq == 42000000 || *freq == 54000000)
-		*k = 1;
-	else
-		*k = 0;
-
-	/* p will be 2 for divs under 20 and odd divs under 32 */
-	if (div < 20 || (div < 32 && (div & 1)))
-		*p = 2;
-
-	/* p will be 1 for even divs under 32, divs under 40 and odd pairs
-	 * of divs between 40-62 */
-	else if (div < 40 || (div < 64 && (div & 2)))
-		*p = 1;
-
-	/* any other entries have p = 0 */
-	else
-		*p = 0;
-
-	/* calculate a suitable n based on k and p */
-	div <<= *p;
-	div /= (*k + 1);
-	*n = div / 4 - 1;
-}
-
-/**
- * sun4i_get_pll5_factors() - calculates n, k factors for PLL5
- * PLL5 rate is calculated as follows
- * rate = parent_rate * n * (k + 1)
- * parent_rate is always 24Mhz
- */
-
-static void sun4i_get_pll5_factors(u32 *freq, u32 parent_rate,
-				   u8 *n, u8 *k, u8 *m, u8 *p)
-{
-	u8 div;
-
-	/* Normalize value to a parent_rate multiple (24M) */
-	div = *freq / parent_rate;
-	*freq = parent_rate * div;
-
-	/* we were called to round the frequency, we can now return */
-	if (n == NULL)
-		return;
-
-	if (div < 31)
-		*k = 0;
-	else if (div / 2 < 31)
-		*k = 1;
-	else if (div / 3 < 31)
-		*k = 2;
-	else
-		*k = 3;
-
-	*n = DIV_ROUND_UP(div, (*k+1));
-}
-
-/**
- * sun6i_a31_get_pll6_factors() - calculates n, k factors for A31 PLL6x2
- * PLL6x2 rate is calculated as follows
- * rate = parent_rate * (n + 1) * (k + 1)
- * parent_rate is always 24Mhz
- */
-
-static void sun6i_a31_get_pll6_factors(u32 *freq, u32 parent_rate,
-				       u8 *n, u8 *k, u8 *m, u8 *p)
-{
-	u8 div;
-
-	/* Normalize value to a parent_rate multiple (24M) */
-	div = *freq / parent_rate;
-	*freq = parent_rate * div;
-
-	/* we were called to round the frequency, we can now return */
-	if (n == NULL)
-		return;
-
-	*k = div / 32;
-	if (*k > 3)
-		*k = 3;
-
-	*n = DIV_ROUND_UP(div, (*k+1)) - 1;
-}
-
-/**
- * sun5i_a13_get_ahb_factors() - calculates m, p factors for AHB
- * AHB rate is calculated as follows
- * rate = parent_rate >> p
- */
-
-static void sun5i_a13_get_ahb_factors(u32 *freq, u32 parent_rate,
-				       u8 *n, u8 *k, u8 *m, u8 *p)
-{
-	u32 div;
-
-	/* divide only */
-	if (parent_rate < *freq)
-		*freq = parent_rate;
-
-	/*
-	 * user manual says valid speed is 8k ~ 276M, but tests show it
-	 * can work at speeds up to 300M, just after reparenting to pll6
-	 */
-	if (*freq < 8000)
-		*freq = 8000;
-	if (*freq > 300000000)
-		*freq = 300000000;
-
-	div = order_base_2(DIV_ROUND_UP(parent_rate, *freq));
-
-	/* p = 0 ~ 3 */
-	if (div > 3)
-		div = 3;
-
-	*freq = parent_rate >> div;
-
-	/* we were called to round the frequency, we can now return */
-	if (p == NULL)
-		return;
-
-	*p = div;
+	/* clk divider */
+	req->rate >>= req->p;
 }
 
 /**
@@ -527,39 +350,34 @@
  * rate = (parent_rate >> p) / (m + 1);
  */
 
-static void sun4i_get_apb1_factors(u32 *freq, u32 parent_rate,
-				   u8 *n, u8 *k, u8 *m, u8 *p)
+static void sun4i_get_apb1_factors(struct factors_request *req)
 {
 	u8 calcm, calcp;
+	int div;
 
-	if (parent_rate < *freq)
-		*freq = parent_rate;
+	if (req->parent_rate < req->rate)
+		req->rate = req->parent_rate;
 
-	parent_rate = DIV_ROUND_UP(parent_rate, *freq);
+	div = DIV_ROUND_UP(req->parent_rate, req->rate);
 
 	/* Invalid rate! */
-	if (parent_rate > 32)
+	if (div > 32)
 		return;
 
-	if (parent_rate <= 4)
+	if (div <= 4)
 		calcp = 0;
-	else if (parent_rate <= 8)
+	else if (div <= 8)
 		calcp = 1;
-	else if (parent_rate <= 16)
+	else if (div <= 16)
 		calcp = 2;
 	else
 		calcp = 3;
 
-	calcm = (parent_rate >> calcp) - 1;
+	calcm = (req->parent_rate >> calcp) - 1;
 
-	*freq = (parent_rate >> calcp) / (calcm + 1);
-
-	/* we were called to round the frequency, we can now return */
-	if (n == NULL)
-		return;
-
-	*m = calcm;
-	*p = calcp;
+	req->rate = (req->parent_rate >> calcp) / (calcm + 1);
+	req->m = calcm;
+	req->p = calcp;
 }
 
 
@@ -571,17 +389,16 @@
  * rate = (parent_rate >> p) / (m + 1);
  */
 
-static void sun7i_a20_get_out_factors(u32 *freq, u32 parent_rate,
-				      u8 *n, u8 *k, u8 *m, u8 *p)
+static void sun7i_a20_get_out_factors(struct factors_request *req)
 {
 	u8 div, calcm, calcp;
 
 	/* These clocks can only divide, so we will never be able to achieve
 	 * frequencies higher than the parent frequency */
-	if (*freq > parent_rate)
-		*freq = parent_rate;
+	if (req->rate > req->parent_rate)
+		req->rate = req->parent_rate;
 
-	div = DIV_ROUND_UP(parent_rate, *freq);
+	div = DIV_ROUND_UP(req->parent_rate, req->rate);
 
 	if (div < 32)
 		calcp = 0;
@@ -594,21 +411,16 @@
 
 	calcm = DIV_ROUND_UP(div, 1 << calcp);
 
-	*freq = (parent_rate >> calcp) / calcm;
-
-	/* we were called to round the frequency, we can now return */
-	if (n == NULL)
-		return;
-
-	*m = calcm - 1;
-	*p = calcp;
+	req->rate = (req->parent_rate >> calcp) / calcm;
+	req->m = calcm - 1;
+	req->p = calcp;
 }
 
 /**
  * sunxi_factors_clk_setup() - Setup function for factor clocks
  */
 
-static struct clk_factors_config sun4i_pll1_config = {
+static const struct clk_factors_config sun4i_pll1_config = {
 	.nshift = 8,
 	.nwidth = 5,
 	.kshift = 4,
@@ -619,7 +431,7 @@
 	.pwidth = 2,
 };
 
-static struct clk_factors_config sun6i_a31_pll1_config = {
+static const struct clk_factors_config sun6i_a31_pll1_config = {
 	.nshift	= 8,
 	.nwidth = 5,
 	.kshift = 4,
@@ -629,7 +441,7 @@
 	.n_start = 1,
 };
 
-static struct clk_factors_config sun8i_a23_pll1_config = {
+static const struct clk_factors_config sun8i_a23_pll1_config = {
 	.nshift = 8,
 	.nwidth = 5,
 	.kshift = 4,
@@ -641,14 +453,14 @@
 	.n_start = 1,
 };
 
-static struct clk_factors_config sun4i_pll5_config = {
+static const struct clk_factors_config sun4i_pll5_config = {
 	.nshift = 8,
 	.nwidth = 5,
 	.kshift = 4,
 	.kwidth = 2,
 };
 
-static struct clk_factors_config sun6i_a31_pll6_config = {
+static const struct clk_factors_config sun6i_a31_pll6_config = {
 	.nshift	= 8,
 	.nwidth = 5,
 	.kshift = 4,
@@ -656,12 +468,19 @@
 	.n_start = 1,
 };
 
-static struct clk_factors_config sun5i_a13_ahb_config = {
+static const struct clk_factors_config sun5i_a13_ahb_config = {
 	.pshift = 4,
 	.pwidth = 2,
 };
 
-static struct clk_factors_config sun4i_apb1_config = {
+static const struct clk_factors_config sun6i_ahb1_config = {
+	.mshift = 6,
+	.mwidth = 2,
+	.pshift = 4,
+	.pwidth = 2,
+};
+
+static const struct clk_factors_config sun4i_apb1_config = {
 	.mshift = 0,
 	.mwidth = 5,
 	.pshift = 16,
@@ -669,7 +488,7 @@
 };
 
 /* user manual says "n" but it's really "p" */
-static struct clk_factors_config sun7i_a20_out_config = {
+static const struct clk_factors_config sun7i_a20_out_config = {
 	.mshift = 8,
 	.mwidth = 5,
 	.pshift = 20,
@@ -728,6 +547,14 @@
 	.getter = sun5i_a13_get_ahb_factors,
 };
 
+static const struct factors_data sun6i_ahb1_data __initconst = {
+	.mux = 12,
+	.muxmask = BIT(1) | BIT(0),
+	.table = &sun6i_ahb1_config,
+	.getter = sun6i_get_ahb1_factors,
+	.recalc = sun6i_ahb1_recalc,
+};
+
 static const struct factors_data sun4i_apb1_data __initconst = {
 	.mux = 24,
 	.muxmask = BIT(1) | BIT(0),
@@ -758,6 +585,61 @@
 	return sunxi_factors_register(node, data, &clk_lock, reg);
 }
 
+static void __init sun4i_pll1_clk_setup(struct device_node *node)
+{
+	sunxi_factors_clk_setup(node, &sun4i_pll1_data);
+}
+CLK_OF_DECLARE(sun4i_pll1, "allwinner,sun4i-a10-pll1-clk",
+	       sun4i_pll1_clk_setup);
+
+static void __init sun6i_pll1_clk_setup(struct device_node *node)
+{
+	sunxi_factors_clk_setup(node, &sun6i_a31_pll1_data);
+}
+CLK_OF_DECLARE(sun6i_pll1, "allwinner,sun6i-a31-pll1-clk",
+	       sun6i_pll1_clk_setup);
+
+static void __init sun8i_pll1_clk_setup(struct device_node *node)
+{
+	sunxi_factors_clk_setup(node, &sun8i_a23_pll1_data);
+}
+CLK_OF_DECLARE(sun8i_pll1, "allwinner,sun8i-a23-pll1-clk",
+	       sun8i_pll1_clk_setup);
+
+static void __init sun7i_pll4_clk_setup(struct device_node *node)
+{
+	sunxi_factors_clk_setup(node, &sun7i_a20_pll4_data);
+}
+CLK_OF_DECLARE(sun7i_pll4, "allwinner,sun7i-a20-pll4-clk",
+	       sun7i_pll4_clk_setup);
+
+static void __init sun5i_ahb_clk_setup(struct device_node *node)
+{
+	sunxi_factors_clk_setup(node, &sun5i_a13_ahb_data);
+}
+CLK_OF_DECLARE(sun5i_ahb, "allwinner,sun5i-a13-ahb-clk",
+	       sun5i_ahb_clk_setup);
+
+static void __init sun6i_ahb1_clk_setup(struct device_node *node)
+{
+	sunxi_factors_clk_setup(node, &sun6i_ahb1_data);
+}
+CLK_OF_DECLARE(sun6i_a31_ahb1, "allwinner,sun6i-a31-ahb1-clk",
+	       sun6i_ahb1_clk_setup);
+
+static void __init sun4i_apb1_clk_setup(struct device_node *node)
+{
+	sunxi_factors_clk_setup(node, &sun4i_apb1_data);
+}
+CLK_OF_DECLARE(sun4i_apb1, "allwinner,sun4i-a10-apb1-clk",
+	       sun4i_apb1_clk_setup);
+
+static void __init sun7i_out_clk_setup(struct device_node *node)
+{
+	sunxi_factors_clk_setup(node, &sun7i_a20_out_data);
+}
+CLK_OF_DECLARE(sun7i_out, "allwinner,sun7i-a20-out-clk",
+	       sun7i_out_clk_setup);
 
 
 /**
@@ -782,8 +664,8 @@
 	.shift = 0,
 };
 
-static void __init sunxi_mux_clk_setup(struct device_node *node,
-				       struct mux_data *data)
+static struct clk * __init sunxi_mux_clk_setup(struct device_node *node,
+					       const struct mux_data *data)
 {
 	struct clk *clk;
 	const char *clk_name = node->name;
@@ -792,21 +674,71 @@
 	int i;
 
 	reg = of_iomap(node, 0);
+	if (!reg) {
+		pr_err("Could not map registers for mux-clk: %s\n",
+		       of_node_full_name(node));
+		return NULL;
+	}
 
 	i = of_clk_parent_fill(node, parents, SUNXI_MAX_PARENTS);
-	of_property_read_string(node, "clock-output-names", &clk_name);
+	if (of_property_read_string(node, "clock-output-names", &clk_name)) {
+		pr_err("%s: could not read clock-output-names from \"%s\"\n",
+		       __func__, of_node_full_name(node));
+		goto out_unmap;
+	}
 
 	clk = clk_register_mux(NULL, clk_name, parents, i,
 			       CLK_SET_RATE_PARENT, reg,
 			       data->shift, SUNXI_MUX_GATE_WIDTH,
 			       0, &clk_lock);
 
-	if (clk) {
-		of_clk_add_provider(node, of_clk_src_simple_get, clk);
-		clk_register_clkdev(clk, clk_name, NULL);
+	if (IS_ERR(clk)) {
+		pr_err("%s: failed to register mux clock %s: %ld\n", __func__,
+		       clk_name, PTR_ERR(clk));
+		goto out_unmap;
 	}
+
+	if (of_clk_add_provider(node, of_clk_src_simple_get, clk)) {
+		pr_err("%s: failed to add clock provider for %s\n",
+		       __func__, clk_name);
+		clk_unregister_divider(clk);
+		goto out_unmap;
+	}
+
+	return clk;
+out_unmap:
+	iounmap(reg);
+	return NULL;
 }
 
+static void __init sun4i_cpu_clk_setup(struct device_node *node)
+{
+	struct clk *clk;
+
+	clk = sunxi_mux_clk_setup(node, &sun4i_cpu_mux_data);
+	if (!clk)
+		return;
+
+	/* Protect CPU clock */
+	__clk_get(clk);
+	clk_prepare_enable(clk);
+}
+CLK_OF_DECLARE(sun4i_cpu, "allwinner,sun4i-a10-cpu-clk",
+	       sun4i_cpu_clk_setup);
+
+static void __init sun6i_ahb1_mux_clk_setup(struct device_node *node)
+{
+	sunxi_mux_clk_setup(node, &sun6i_a31_ahb1_mux_data);
+}
+CLK_OF_DECLARE(sun6i_ahb1_mux, "allwinner,sun6i-a31-ahb1-mux-clk",
+	       sun6i_ahb1_mux_clk_setup);
+
+static void __init sun8i_ahb2_clk_setup(struct device_node *node)
+{
+	sunxi_mux_clk_setup(node, &sun8i_h3_ahb2_mux_data);
+}
+CLK_OF_DECLARE(sun8i_ahb2, "allwinner,sun8i-h3-ahb2-clk",
+	       sun8i_ahb2_clk_setup);
 
 
 /**
@@ -865,7 +797,7 @@
 };
 
 static void __init sunxi_divider_clk_setup(struct device_node *node,
-					   struct div_data *data)
+					   const struct div_data *data)
 {
 	struct clk *clk;
 	const char *clk_name = node->name;
@@ -873,21 +805,77 @@
 	void __iomem *reg;
 
 	reg = of_iomap(node, 0);
+	if (!reg) {
+		pr_err("Could not map registers for mux-clk: %s\n",
+		       of_node_full_name(node));
+		return;
+	}
 
 	clk_parent = of_clk_get_parent_name(node, 0);
 
-	of_property_read_string(node, "clock-output-names", &clk_name);
+	if (of_property_read_string(node, "clock-output-names", &clk_name)) {
+		pr_err("%s: could not read clock-output-names from \"%s\"\n",
+		       __func__, of_node_full_name(node));
+		goto out_unmap;
+	}
 
 	clk = clk_register_divider_table(NULL, clk_name, clk_parent, 0,
 					 reg, data->shift, data->width,
 					 data->pow ? CLK_DIVIDER_POWER_OF_TWO : 0,
 					 data->table, &clk_lock);
-	if (clk) {
-		of_clk_add_provider(node, of_clk_src_simple_get, clk);
-		clk_register_clkdev(clk, clk_name, NULL);
+	if (IS_ERR(clk)) {
+		pr_err("%s: failed to register divider clock %s: %ld\n",
+		       __func__, clk_name, PTR_ERR(clk));
+		goto out_unmap;
 	}
+
+	if (of_clk_add_provider(node, of_clk_src_simple_get, clk)) {
+		pr_err("%s: failed to add clock provider for %s\n",
+		       __func__, clk_name);
+		goto out_unregister;
+	}
+
+	if (clk_register_clkdev(clk, clk_name, NULL)) {
+		of_clk_del_provider(node);
+		goto out_unregister;
+	}
+
+	return;
+out_unregister:
+	clk_unregister_divider(clk);
+
+out_unmap:
+	iounmap(reg);
 }
 
+static void __init sun4i_ahb_clk_setup(struct device_node *node)
+{
+	sunxi_divider_clk_setup(node, &sun4i_ahb_data);
+}
+CLK_OF_DECLARE(sun4i_ahb, "allwinner,sun4i-a10-ahb-clk",
+	       sun4i_ahb_clk_setup);
+
+static void __init sun4i_apb0_clk_setup(struct device_node *node)
+{
+	sunxi_divider_clk_setup(node, &sun4i_apb0_data);
+}
+CLK_OF_DECLARE(sun4i_apb0, "allwinner,sun4i-a10-apb0-clk",
+	       sun4i_apb0_clk_setup);
+
+static void __init sun4i_axi_clk_setup(struct device_node *node)
+{
+	sunxi_divider_clk_setup(node, &sun4i_axi_data);
+}
+CLK_OF_DECLARE(sun4i_axi, "allwinner,sun4i-a10-axi-clk",
+	       sun4i_axi_clk_setup);
+
+static void __init sun8i_axi_clk_setup(struct device_node *node)
+{
+	sunxi_divider_clk_setup(node, &sun8i_a23_axi_data);
+}
+CLK_OF_DECLARE(sun8i_axi, "allwinner,sun8i-a23-axi-clk",
+	       sun8i_axi_clk_setup);
+
 
 
 /**
@@ -975,8 +963,8 @@
  *           |________________________|
  */
 
-static void __init sunxi_divs_clk_setup(struct device_node *node,
-					struct divs_data *data)
+static struct clk ** __init sunxi_divs_clk_setup(struct device_node *node,
+						 const struct divs_data *data)
 {
 	struct clk_onecell_data *clk_data;
 	const char *parent;
@@ -997,13 +985,20 @@
 
 	/* Set up factor clock that we will be dividing */
 	pclk = sunxi_factors_clk_setup(node, data->factors);
+	if (!pclk)
+		return NULL;
 	parent = __clk_get_name(pclk);
 
 	reg = of_iomap(node, 0);
+	if (!reg) {
+		pr_err("Could not map registers for divs-clk: %s\n",
+		       of_node_full_name(node));
+		return NULL;
+	}
 
 	clk_data = kmalloc(sizeof(struct clk_onecell_data), GFP_KERNEL);
 	if (!clk_data)
-		return;
+		goto out_unmap;
 
 	clks = kcalloc(ndivs, sizeof(*clks), GFP_KERNEL);
 	if (!clks)
@@ -1081,146 +1076,54 @@
 						 clkflags);
 
 		WARN_ON(IS_ERR(clk_data->clks[i]));
-		clk_register_clkdev(clks[i], clk_name, NULL);
 	}
 
 	/* Adjust to the real max */
 	clk_data->clk_num = i;
 
-	of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+	if (of_clk_add_provider(node, of_clk_src_onecell_get, clk_data)) {
+		pr_err("%s: failed to add clock provider for %s\n",
+		       __func__, clk_name);
+		goto free_gate;
+	}
 
-	return;
-
+	return clks;
 free_gate:
 	kfree(gate);
 free_clks:
 	kfree(clks);
 free_clkdata:
 	kfree(clk_data);
+out_unmap:
+	iounmap(reg);
+	return NULL;
 }
 
-
-
-/* Matches for factors clocks */
-static const struct of_device_id clk_factors_match[] __initconst = {
-	{.compatible = "allwinner,sun4i-a10-pll1-clk", .data = &sun4i_pll1_data,},
-	{.compatible = "allwinner,sun6i-a31-pll1-clk", .data = &sun6i_a31_pll1_data,},
-	{.compatible = "allwinner,sun8i-a23-pll1-clk", .data = &sun8i_a23_pll1_data,},
-	{.compatible = "allwinner,sun7i-a20-pll4-clk", .data = &sun7i_a20_pll4_data,},
-	{.compatible = "allwinner,sun5i-a13-ahb-clk", .data = &sun5i_a13_ahb_data,},
-	{.compatible = "allwinner,sun4i-a10-apb1-clk", .data = &sun4i_apb1_data,},
-	{.compatible = "allwinner,sun7i-a20-out-clk", .data = &sun7i_a20_out_data,},
-	{}
-};
-
-/* Matches for divider clocks */
-static const struct of_device_id clk_div_match[] __initconst = {
-	{.compatible = "allwinner,sun4i-a10-axi-clk", .data = &sun4i_axi_data,},
-	{.compatible = "allwinner,sun8i-a23-axi-clk", .data = &sun8i_a23_axi_data,},
-	{.compatible = "allwinner,sun4i-a10-ahb-clk", .data = &sun4i_ahb_data,},
-	{.compatible = "allwinner,sun4i-a10-apb0-clk", .data = &sun4i_apb0_data,},
-	{}
-};
-
-/* Matches for divided outputs */
-static const struct of_device_id clk_divs_match[] __initconst = {
-	{.compatible = "allwinner,sun4i-a10-pll5-clk", .data = &pll5_divs_data,},
-	{.compatible = "allwinner,sun4i-a10-pll6-clk", .data = &pll6_divs_data,},
-	{.compatible = "allwinner,sun6i-a31-pll6-clk", .data = &sun6i_a31_pll6_divs_data,},
-	{}
-};
-
-/* Matches for mux clocks */
-static const struct of_device_id clk_mux_match[] __initconst = {
-	{.compatible = "allwinner,sun4i-a10-cpu-clk", .data = &sun4i_cpu_mux_data,},
-	{.compatible = "allwinner,sun6i-a31-ahb1-mux-clk", .data = &sun6i_a31_ahb1_mux_data,},
-	{.compatible = "allwinner,sun8i-h3-ahb2-clk", .data = &sun8i_h3_ahb2_mux_data,},
-	{}
-};
-
-
-static void __init of_sunxi_table_clock_setup(const struct of_device_id *clk_match,
-					      void *function)
+static void __init sun4i_pll5_clk_setup(struct device_node *node)
 {
-	struct device_node *np;
-	const struct div_data *data;
-	const struct of_device_id *match;
-	void (*setup_function)(struct device_node *, const void *) = function;
+	struct clk **clks;
 
-	for_each_matching_node_and_match(np, clk_match, &match) {
-		data = match->data;
-		setup_function(np, data);
-	}
+	clks = sunxi_divs_clk_setup(node, &pll5_divs_data);
+	if (!clks)
+		return;
+
+	/* Protect PLL5_DDR */
+	__clk_get(clks[0]);
+	clk_prepare_enable(clks[0]);
 }
+CLK_OF_DECLARE(sun4i_pll5, "allwinner,sun4i-a10-pll5-clk",
+	       sun4i_pll5_clk_setup);
 
-static void __init sunxi_init_clocks(const char *clocks[], int nclocks)
+static void __init sun4i_pll6_clk_setup(struct device_node *node)
 {
-	unsigned int i;
-
-	/* Register divided output clocks */
-	of_sunxi_table_clock_setup(clk_divs_match, sunxi_divs_clk_setup);
-
-	/* Register factor clocks */
-	of_sunxi_table_clock_setup(clk_factors_match, sunxi_factors_clk_setup);
-
-	/* Register divider clocks */
-	of_sunxi_table_clock_setup(clk_div_match, sunxi_divider_clk_setup);
-
-	/* Register mux clocks */
-	of_sunxi_table_clock_setup(clk_mux_match, sunxi_mux_clk_setup);
-
-	/* Protect the clocks that needs to stay on */
-	for (i = 0; i < nclocks; i++) {
-		struct clk *clk = clk_get(NULL, clocks[i]);
-
-		if (!IS_ERR(clk))
-			clk_prepare_enable(clk);
-	}
+	sunxi_divs_clk_setup(node, &pll6_divs_data);
 }
+CLK_OF_DECLARE(sun4i_pll6, "allwinner,sun4i-a10-pll6-clk",
+	       sun4i_pll6_clk_setup);
 
-static const char *sun4i_a10_critical_clocks[] __initdata = {
-	"pll5_ddr",
-};
-
-static void __init sun4i_a10_init_clocks(struct device_node *node)
+static void __init sun6i_pll6_clk_setup(struct device_node *node)
 {
-	sunxi_init_clocks(sun4i_a10_critical_clocks,
-			  ARRAY_SIZE(sun4i_a10_critical_clocks));
+	sunxi_divs_clk_setup(node, &sun6i_a31_pll6_divs_data);
 }
-CLK_OF_DECLARE(sun4i_a10_clk_init, "allwinner,sun4i-a10", sun4i_a10_init_clocks);
-
-static const char *sun5i_critical_clocks[] __initdata = {
-	"cpu",
-	"pll5_ddr",
-};
-
-static void __init sun5i_init_clocks(struct device_node *node)
-{
-	sunxi_init_clocks(sun5i_critical_clocks,
-			  ARRAY_SIZE(sun5i_critical_clocks));
-}
-CLK_OF_DECLARE(sun5i_a10s_clk_init, "allwinner,sun5i-a10s", sun5i_init_clocks);
-CLK_OF_DECLARE(sun5i_a13_clk_init, "allwinner,sun5i-a13", sun5i_init_clocks);
-CLK_OF_DECLARE(sun5i_r8_clk_init, "allwinner,sun5i-r8", sun5i_init_clocks);
-CLK_OF_DECLARE(sun7i_a20_clk_init, "allwinner,sun7i-a20", sun5i_init_clocks);
-
-static const char *sun6i_critical_clocks[] __initdata = {
-	"cpu",
-};
-
-static void __init sun6i_init_clocks(struct device_node *node)
-{
-	sunxi_init_clocks(sun6i_critical_clocks,
-			  ARRAY_SIZE(sun6i_critical_clocks));
-}
-CLK_OF_DECLARE(sun6i_a31_clk_init, "allwinner,sun6i-a31", sun6i_init_clocks);
-CLK_OF_DECLARE(sun6i_a31s_clk_init, "allwinner,sun6i-a31s", sun6i_init_clocks);
-CLK_OF_DECLARE(sun8i_a23_clk_init, "allwinner,sun8i-a23", sun6i_init_clocks);
-CLK_OF_DECLARE(sun8i_a33_clk_init, "allwinner,sun8i-a33", sun6i_init_clocks);
-CLK_OF_DECLARE(sun8i_h3_clk_init, "allwinner,sun8i-h3", sun6i_init_clocks);
-
-static void __init sun9i_init_clocks(struct device_node *node)
-{
-	sunxi_init_clocks(NULL, 0);
-}
-CLK_OF_DECLARE(sun9i_a80_clk_init, "allwinner,sun9i-a80", sun9i_init_clocks);
+CLK_OF_DECLARE(sun6i_pll6, "allwinner,sun6i-a31-pll6-clk",
+	       sun6i_pll6_clk_setup);

diff --git a/drivers/clk/sunxi/clk-usb.c b/drivers/clk/sunxi/clk-usb.c
index 67b8e38..fe0c3d1 100644
--- a/drivers/clk/sunxi/clk-usb.c
+++ b/drivers/clk/sunxi/clk-usb.c

@@ -76,7 +76,7 @@
 	return 0;
 }
 
-static struct reset_control_ops sunxi_usb_reset_ops = {
+static const struct reset_control_ops sunxi_usb_reset_ops = {
 	.assert		= sunxi_usb_reset_assert,
 	.deassert	= sunxi_usb_reset_deassert,
 };
@@ -216,6 +216,18 @@
 }
 CLK_OF_DECLARE(sun8i_a23_usb, "allwinner,sun8i-a23-usb-clk", sun8i_a23_usb_setup);
 
+static const struct usb_clk_data sun8i_h3_usb_clk_data __initconst = {
+	.clk_mask =  BIT(19) | BIT(18) | BIT(17) | BIT(16) |
+		     BIT(11) | BIT(10) | BIT(9) | BIT(8),
+	.reset_mask = BIT(3) | BIT(2) | BIT(1) | BIT(0),
+};
+
+static void __init sun8i_h3_usb_setup(struct device_node *node)
+{
+	sunxi_usb_clk_setup(node, &sun8i_h3_usb_clk_data, &sun4i_a10_usb_lock);
+}
+CLK_OF_DECLARE(sun8i_h3_usb, "allwinner,sun8i-h3-usb-clk", sun8i_h3_usb_setup);
+
 static const struct usb_clk_data sun9i_a80_usb_mod_data __initconst = {
 	.clk_mask = BIT(6) | BIT(5) | BIT(4) | BIT(3) | BIT(2) | BIT(1),
 	.reset_mask = BIT(19) | BIT(18) | BIT(17),
@@ -243,15 +255,3 @@
 	sunxi_usb_clk_setup(node, &sun9i_a80_usb_phy_data, &a80_usb_phy_lock);
 }
 CLK_OF_DECLARE(sun9i_a80_usb_phy, "allwinner,sun9i-a80-usb-phy-clk", sun9i_a80_usb_phy_setup);
-
-static const struct usb_clk_data sun8i_h3_usb_clk_data __initconst = {
-	.clk_mask =  BIT(19) | BIT(18) | BIT(17) | BIT(16) |
-		     BIT(11) | BIT(10) | BIT(9) | BIT(8),
-	.reset_mask = BIT(3) | BIT(2) | BIT(1) | BIT(0),
-};
-
-static void __init sun8i_h3_usb_setup(struct device_node *node)
-{
-	sunxi_usb_clk_setup(node, &sun8i_h3_usb_clk_data, &sun4i_a10_usb_lock);
-}
-CLK_OF_DECLARE(sun8i_h3_usb, "allwinner,sun8i-h3-usb-clk", sun8i_h3_usb_setup);

diff --git a/drivers/clk/tegra/clk-audio-sync.c b/drivers/clk/tegra/clk-audio-sync.c
index c0f7843..92d04ce 100644
--- a/drivers/clk/tegra/clk-audio-sync.c
+++ b/drivers/clk/tegra/clk-audio-sync.c

@@ -72,7 +72,7 @@
 
 	init.ops = &tegra_clk_sync_source_ops;
 	init.name = name;
-	init.flags = CLK_IS_ROOT;
+	init.flags = 0;
 	init.parent_names = NULL;
 	init.num_parents = 0;
 

diff --git a/drivers/clk/tegra/clk-dfll.c b/drivers/clk/tegra/clk-dfll.c
index 86a307b..19bfa07 100644
--- a/drivers/clk/tegra/clk-dfll.c
+++ b/drivers/clk/tegra/clk-dfll.c

@@ -995,7 +995,6 @@
 };
 
 static struct clk_init_data dfll_clk_init_data = {
-	.flags		= CLK_IS_ROOT,
 	.ops		= &dfll_clk_ops,
 	.num_parents	= 0,
 };

diff --git a/drivers/clk/tegra/clk-tegra-fixed.c b/drivers/clk/tegra/clk-tegra-fixed.c
index da0b594..d64ec7a 100644
--- a/drivers/clk/tegra/clk-tegra-fixed.c
+++ b/drivers/clk/tegra/clk-tegra-fixed.c

@@ -52,8 +52,7 @@
 		return -EINVAL;
 	}
 
-	osc = clk_register_fixed_rate(NULL, "osc", NULL, CLK_IS_ROOT,
-				      *osc_freq);
+	osc = clk_register_fixed_rate(NULL, "osc", NULL, 0, *osc_freq);
 
 	dt_clk = tegra_lookup_dt_id(tegra_clk_clk_m, clks);
 	if (!dt_clk)
@@ -88,8 +87,7 @@
 	/* clk_32k */
 	dt_clk = tegra_lookup_dt_id(tegra_clk_clk_32k, tegra_clks);
 	if (dt_clk) {
-		clk = clk_register_fixed_rate(NULL, "clk_32k", NULL,
-					CLK_IS_ROOT, 32768);
+		clk = clk_register_fixed_rate(NULL, "clk_32k", NULL, 0, 32768);
 		*dt_clk = clk;
 	}
 

diff --git a/drivers/clk/tegra/clk-tegra114.c b/drivers/clk/tegra/clk-tegra114.c
index 4a24aa4..df47ec3 100644
--- a/drivers/clk/tegra/clk-tegra114.c
+++ b/drivers/clk/tegra/clk-tegra114.c

@@ -972,8 +972,7 @@
 	struct clk *clk;
 
 	/* clk_32k */
-	clk = clk_register_fixed_rate(NULL, "clk_32k", NULL, CLK_IS_ROOT,
-				      32768);
+	clk = clk_register_fixed_rate(NULL, "clk_32k", NULL, 0, 32768);
 	clks[TEGRA114_CLK_CLK_32K] = clk;
 
 	/* clk_m_div2 */

diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c
index 7a48e98..7ad6383 100644
--- a/drivers/clk/tegra/clk-tegra20.c
+++ b/drivers/clk/tegra/clk-tegra20.c

@@ -837,15 +837,13 @@
 	clks[TEGRA20_CLK_PEX] = clk;
 
 	/* cdev1 */
-	clk = clk_register_fixed_rate(NULL, "cdev1_fixed", NULL, CLK_IS_ROOT,
-				      26000000);
+	clk = clk_register_fixed_rate(NULL, "cdev1_fixed", NULL, 0, 26000000);
 	clk = tegra_clk_register_periph_gate("cdev1", "cdev1_fixed", 0,
 				    clk_base, 0, 94, periph_clk_enb_refcnt);
 	clks[TEGRA20_CLK_CDEV1] = clk;
 
 	/* cdev2 */
-	clk = clk_register_fixed_rate(NULL, "cdev2_fixed", NULL, CLK_IS_ROOT,
-				      26000000);
+	clk = clk_register_fixed_rate(NULL, "cdev2_fixed", NULL, 0, 26000000);
 	clk = tegra_clk_register_periph_gate("cdev2", "cdev2_fixed", 0,
 				    clk_base, 0, 93, periph_clk_enb_refcnt);
 	clks[TEGRA20_CLK_CDEV2] = clk;
@@ -879,8 +877,8 @@
 	input_freq = tegra20_clk_measure_input_freq();
 
 	/* clk_m */
-	clk = clk_register_fixed_rate(NULL, "clk_m", NULL, CLK_IS_ROOT |
-				      CLK_IGNORE_UNUSED, input_freq);
+	clk = clk_register_fixed_rate(NULL, "clk_m", NULL, CLK_IGNORE_UNUSED,
+				      input_freq);
 	clks[TEGRA20_CLK_CLK_M] = clk;
 
 	/* pll_ref */

diff --git a/drivers/clk/tegra/clk.c b/drivers/clk/tegra/clk.c
index 2a3a4fe..f60fe2e 100644
--- a/drivers/clk/tegra/clk.c
+++ b/drivers/clk/tegra/clk.c

@@ -271,7 +271,7 @@
 	}
 }
 
-static struct reset_control_ops rst_ops = {
+static const struct reset_control_ops rst_ops = {
 	.assert = tegra_clk_rst_assert,
 	.deassert = tegra_clk_rst_deassert,
 };

diff --git a/drivers/clk/ti/Kconfig b/drivers/clk/ti/Kconfig
new file mode 100644
index 0000000..2713417
--- /dev/null
+++ b/drivers/clk/ti/Kconfig

@@ -0,0 +1,6 @@
+config COMMON_CLK_TI_ADPLL
+	tristate "Clock driver for dm814x ADPLL"
+	depends on ARCH_OMAP2PLUS || COMPILE_TEST
+	default y if SOC_TI81XX
+	---help---
+	  ADPLL clock driver for the dm814x SoC using common clock framework.

diff --git a/drivers/clk/ti/Makefile b/drivers/clk/ti/Makefile
index d4ac960..0deac98 100644
--- a/drivers/clk/ti/Makefile
+++ b/drivers/clk/ti/Makefile

@@ -1,3 +1,5 @@
+ifeq ($(CONFIG_ARCH_OMAP2PLUS), y)
+
 obj-y					+= clk.o autoidle.o clockdomain.o
 clk-common				= dpll.o composite.o divider.o gate.o \
 					  fixed-factor.o mux.o apll.o \
@@ -18,3 +20,7 @@
 ifdef CONFIG_ATAGS
 obj-$(CONFIG_ARCH_OMAP3)                += clk-3xxx-legacy.o
 endif
+
+endif	# CONFIG_ARCH_OMAP2PLUS
+
+obj-$(CONFIG_COMMON_CLK_TI_ADPLL)	+= adpll.o

diff --git a/drivers/clk/ti/adpll.c b/drivers/clk/ti/adpll.c
new file mode 100644
index 0000000..255cafb
--- /dev/null
+++ b/drivers/clk/ti/adpll.c

@@ -0,0 +1,983 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/string.h>
+
+#define ADPLL_PLLSS_MMR_LOCK_OFFSET	0x00	/* Managed by MPPULL */
+#define ADPLL_PLLSS_MMR_LOCK_ENABLED	0x1f125B64
+#define ADPLL_PLLSS_MMR_UNLOCK_MAGIC	0x1eda4c3d
+
+#define ADPLL_PWRCTRL_OFFSET		0x00
+#define ADPLL_PWRCTRL_PONIN		5
+#define ADPLL_PWRCTRL_PGOODIN		4
+#define ADPLL_PWRCTRL_RET		3
+#define ADPLL_PWRCTRL_ISORET		2
+#define ADPLL_PWRCTRL_ISOSCAN		1
+#define ADPLL_PWRCTRL_OFFMODE		0
+
+#define ADPLL_CLKCTRL_OFFSET		0x04
+#define ADPLL_CLKCTRL_CLKDCOLDOEN	29
+#define ADPLL_CLKCTRL_IDLE		23
+#define ADPLL_CLKCTRL_CLKOUTEN		20
+#define ADPLL_CLKINPHIFSEL_ADPLL_S	19	/* REVISIT: which bit? */
+#define ADPLL_CLKCTRL_CLKOUTLDOEN_ADPLL_LJ 19
+#define ADPLL_CLKCTRL_ULOWCLKEN		18
+#define ADPLL_CLKCTRL_CLKDCOLDOPWDNZ	17
+#define ADPLL_CLKCTRL_M2PWDNZ		16
+#define ADPLL_CLKCTRL_M3PWDNZ_ADPLL_S	15
+#define ADPLL_CLKCTRL_LOWCURRSTDBY_ADPLL_S 13
+#define ADPLL_CLKCTRL_LPMODE_ADPLL_S	12
+#define ADPLL_CLKCTRL_REGM4XEN_ADPLL_S	10
+#define ADPLL_CLKCTRL_SELFREQDCO_ADPLL_LJ 10
+#define ADPLL_CLKCTRL_TINITZ		0
+
+#define ADPLL_TENABLE_OFFSET		0x08
+#define ADPLL_TENABLEDIV_OFFSET		0x8c
+
+#define ADPLL_M2NDIV_OFFSET		0x10
+#define ADPLL_M2NDIV_M2			16
+#define ADPLL_M2NDIV_M2_ADPLL_S_WIDTH	5
+#define ADPLL_M2NDIV_M2_ADPLL_LJ_WIDTH	7
+
+#define ADPLL_MN2DIV_OFFSET		0x14
+#define ADPLL_MN2DIV_N2			16
+
+#define ADPLL_FRACDIV_OFFSET		0x18
+#define ADPLL_FRACDIV_REGSD		24
+#define ADPLL_FRACDIV_FRACTIONALM	0
+#define ADPLL_FRACDIV_FRACTIONALM_MASK	0x3ffff
+
+#define ADPLL_BWCTRL_OFFSET		0x1c
+#define ADPLL_BWCTRL_BWCONTROL		1
+#define ADPLL_BWCTRL_BW_INCR_DECRZ	0
+
+#define ADPLL_RESERVED_OFFSET		0x20
+
+#define ADPLL_STATUS_OFFSET		0x24
+#define ADPLL_STATUS_PONOUT		31
+#define ADPLL_STATUS_PGOODOUT		30
+#define ADPLL_STATUS_LDOPWDN		29
+#define ADPLL_STATUS_RECAL_BSTATUS3	28
+#define ADPLL_STATUS_RECAL_OPPIN	27
+#define ADPLL_STATUS_PHASELOCK		10
+#define ADPLL_STATUS_FREQLOCK		9
+#define ADPLL_STATUS_BYPASSACK		8
+#define ADPLL_STATUS_LOSSREF		6
+#define ADPLL_STATUS_CLKOUTENACK	5
+#define ADPLL_STATUS_LOCK2		4
+#define ADPLL_STATUS_M2CHANGEACK	3
+#define ADPLL_STATUS_HIGHJITTER		1
+#define ADPLL_STATUS_BYPASS		0
+#define ADPLL_STATUS_PREPARED_MASK	(BIT(ADPLL_STATUS_PHASELOCK) | \
+					 BIT(ADPLL_STATUS_FREQLOCK))
+
+#define ADPLL_M3DIV_OFFSET		0x28	/* Only on MPUPLL */
+#define ADPLL_M3DIV_M3			0
+#define ADPLL_M3DIV_M3_WIDTH		5
+#define ADPLL_M3DIV_M3_MASK		0x1f
+
+#define ADPLL_RAMPCTRL_OFFSET		0x2c	/* Only on MPUPLL */
+#define ADPLL_RAMPCTRL_CLKRAMPLEVEL	19
+#define ADPLL_RAMPCTRL_CLKRAMPRATE	16
+#define ADPLL_RAMPCTRL_RELOCK_RAMP_EN	0
+
+#define MAX_ADPLL_INPUTS		3
+#define MAX_ADPLL_OUTPUTS		4
+#define ADPLL_MAX_RETRIES		5
+
+#define to_dco(_hw)	container_of(_hw, struct ti_adpll_dco_data, hw)
+#define to_adpll(_hw)	container_of(_hw, struct ti_adpll_data, dco)
+#define to_clkout(_hw)	container_of(_hw, struct ti_adpll_clkout_data, hw)
+
+enum ti_adpll_clocks {
+	TI_ADPLL_DCO,
+	TI_ADPLL_DCO_GATE,
+	TI_ADPLL_N2,
+	TI_ADPLL_M2,
+	TI_ADPLL_M2_GATE,
+	TI_ADPLL_BYPASS,
+	TI_ADPLL_HIF,
+	TI_ADPLL_DIV2,
+	TI_ADPLL_CLKOUT,
+	TI_ADPLL_CLKOUT2,
+	TI_ADPLL_M3,
+};
+
+#define TI_ADPLL_NR_CLOCKS	(TI_ADPLL_M3 + 1)
+
+enum ti_adpll_inputs {
+	TI_ADPLL_CLKINP,
+	TI_ADPLL_CLKINPULOW,
+	TI_ADPLL_CLKINPHIF,
+};
+
+enum ti_adpll_s_outputs {
+	TI_ADPLL_S_DCOCLKLDO,
+	TI_ADPLL_S_CLKOUT,
+	TI_ADPLL_S_CLKOUTX2,
+	TI_ADPLL_S_CLKOUTHIF,
+};
+
+enum ti_adpll_lj_outputs {
+	TI_ADPLL_LJ_CLKDCOLDO,
+	TI_ADPLL_LJ_CLKOUT,
+	TI_ADPLL_LJ_CLKOUTLDO,
+};
+
+struct ti_adpll_platform_data {
+	const bool is_type_s;
+	const int nr_max_inputs;
+	const int nr_max_outputs;
+	const int output_index;
+};
+
+struct ti_adpll_clock {
+	struct clk *clk;
+	struct clk_lookup *cl;
+	void (*unregister)(struct clk *clk);
+};
+
+struct ti_adpll_dco_data {
+	struct clk_hw hw;
+};
+
+struct ti_adpll_clkout_data {
+	struct ti_adpll_data *adpll;
+	struct clk_gate gate;
+	struct clk_hw hw;
+};
+
+struct ti_adpll_data {
+	struct device *dev;
+	const struct ti_adpll_platform_data *c;
+	struct device_node *np;
+	unsigned long pa;
+	void __iomem *iobase;
+	void __iomem *regs;
+	spinlock_t lock;	/* For ADPLL shared register access */
+	const char *parent_names[MAX_ADPLL_INPUTS];
+	struct clk *parent_clocks[MAX_ADPLL_INPUTS];
+	struct ti_adpll_clock *clocks;
+	struct clk_onecell_data outputs;
+	struct ti_adpll_dco_data dco;
+};
+
+static const char *ti_adpll_clk_get_name(struct ti_adpll_data *d,
+					 int output_index,
+					 const char *postfix)
+{
+	const char *name;
+	int err;
+
+	if (output_index >= 0) {
+		err = of_property_read_string_index(d->np,
+						    "clock-output-names",
+						    output_index,
+						    &name);
+		if (err)
+			return NULL;
+	} else {
+		const char *base_name = "adpll";
+		char *buf;
+
+		buf = devm_kzalloc(d->dev, 8 + 1 + strlen(base_name) + 1 +
+				    strlen(postfix), GFP_KERNEL);
+		if (!buf)
+			return NULL;
+		sprintf(buf, "%08lx.%s.%s", d->pa, base_name, postfix);
+		name = buf;
+	}
+
+	return name;
+}
+
+#define ADPLL_MAX_CON_ID	16	/* See MAX_CON_ID */
+
+static int ti_adpll_setup_clock(struct ti_adpll_data *d, struct clk *clock,
+				int index, int output_index, const char *name,
+				void (*unregister)(struct clk *clk))
+{
+	struct clk_lookup *cl;
+	const char *postfix = NULL;
+	char con_id[ADPLL_MAX_CON_ID];
+
+	d->clocks[index].clk = clock;
+	d->clocks[index].unregister = unregister;
+
+	/* Separate con_id in format "pll040dcoclkldo" to fit MAX_CON_ID */
+	postfix = strrchr(name, '.');
+	if (strlen(postfix) > 1) {
+		if (strlen(postfix) > ADPLL_MAX_CON_ID)
+			dev_warn(d->dev, "clock %s con_id lookup may fail\n",
+				 name);
+		snprintf(con_id, 16, "pll%03lx%s", d->pa & 0xfff, postfix + 1);
+		cl = clkdev_create(clock, con_id, NULL);
+		if (!cl)
+			return -ENOMEM;
+		d->clocks[index].cl = cl;
+	} else {
+		dev_warn(d->dev, "no con_id for clock %s\n", name);
+	}
+
+	if (output_index < 0)
+		return 0;
+
+	d->outputs.clks[output_index] = clock;
+	d->outputs.clk_num++;
+
+	return 0;
+}
+
+static int ti_adpll_init_divider(struct ti_adpll_data *d,
+				 enum ti_adpll_clocks index,
+				 int output_index, char *name,
+				 struct clk *parent_clock,
+				 void __iomem *reg,
+				 u8 shift, u8 width,
+				 u8 clk_divider_flags)
+{
+	const char *child_name;
+	const char *parent_name;
+	struct clk *clock;
+
+	child_name = ti_adpll_clk_get_name(d, output_index, name);
+	if (!child_name)
+		return -EINVAL;
+
+	parent_name = __clk_get_name(parent_clock);
+	clock = clk_register_divider(d->dev, child_name, parent_name, 0,
+				     reg, shift, width, clk_divider_flags,
+				     &d->lock);
+	if (IS_ERR(clock)) {
+		dev_err(d->dev, "failed to register divider %s: %li\n",
+			name, PTR_ERR(clock));
+		return PTR_ERR(clock);
+	}
+
+	return ti_adpll_setup_clock(d, clock, index, output_index, child_name,
+				    clk_unregister_divider);
+}
+
+static int ti_adpll_init_mux(struct ti_adpll_data *d,
+			     enum ti_adpll_clocks index,
+			     char *name, struct clk *clk0,
+			     struct clk *clk1,
+			     void __iomem *reg,
+			     u8 shift)
+{
+	const char *child_name;
+	const char *parents[2];
+	struct clk *clock;
+
+	child_name = ti_adpll_clk_get_name(d, -ENODEV, name);
+	if (!child_name)
+		return -ENOMEM;
+	parents[0] = __clk_get_name(clk0);
+	parents[1] = __clk_get_name(clk1);
+	clock = clk_register_mux(d->dev, child_name, parents, 2, 0,
+				 reg, shift, 1, 0, &d->lock);
+	if (IS_ERR(clock)) {
+		dev_err(d->dev, "failed to register mux %s: %li\n",
+			name, PTR_ERR(clock));
+		return PTR_ERR(clock);
+	}
+
+	return ti_adpll_setup_clock(d, clock, index, -ENODEV, child_name,
+				    clk_unregister_mux);
+}
+
+static int ti_adpll_init_gate(struct ti_adpll_data *d,
+			      enum ti_adpll_clocks index,
+			      int output_index, char *name,
+			      struct clk *parent_clock,
+			      void __iomem *reg,
+			      u8 bit_idx,
+			      u8 clk_gate_flags)
+{
+	const char *child_name;
+	const char *parent_name;
+	struct clk *clock;
+
+	child_name = ti_adpll_clk_get_name(d, output_index, name);
+	if (!child_name)
+		return -EINVAL;
+
+	parent_name = __clk_get_name(parent_clock);
+	clock = clk_register_gate(d->dev, child_name, parent_name, 0,
+				  reg, bit_idx, clk_gate_flags,
+				  &d->lock);
+	if (IS_ERR(clock)) {
+		dev_err(d->dev, "failed to register gate %s: %li\n",
+			name, PTR_ERR(clock));
+		return PTR_ERR(clock);
+	}
+
+	return ti_adpll_setup_clock(d, clock, index, output_index, child_name,
+				    clk_unregister_gate);
+}
+
+static int ti_adpll_init_fixed_factor(struct ti_adpll_data *d,
+				      enum ti_adpll_clocks index,
+				      char *name,
+				      struct clk *parent_clock,
+				      unsigned int mult,
+				      unsigned int div)
+{
+	const char *child_name;
+	const char *parent_name;
+	struct clk *clock;
+
+	child_name = ti_adpll_clk_get_name(d, -ENODEV, name);
+	if (!child_name)
+		return -ENOMEM;
+
+	parent_name = __clk_get_name(parent_clock);
+	clock = clk_register_fixed_factor(d->dev, child_name, parent_name,
+					  0, mult, div);
+	if (IS_ERR(clock))
+		return PTR_ERR(clock);
+
+	return ti_adpll_setup_clock(d, clock, index, -ENODEV, child_name,
+				    clk_unregister);
+}
+
+static void ti_adpll_set_idle_bypass(struct ti_adpll_data *d)
+{
+	unsigned long flags;
+	u32 v;
+
+	spin_lock_irqsave(&d->lock, flags);
+	v = readl_relaxed(d->regs + ADPLL_CLKCTRL_OFFSET);
+	v |= BIT(ADPLL_CLKCTRL_IDLE);
+	writel_relaxed(v, d->regs + ADPLL_CLKCTRL_OFFSET);
+	spin_unlock_irqrestore(&d->lock, flags);
+}
+
+static void ti_adpll_clear_idle_bypass(struct ti_adpll_data *d)
+{
+	unsigned long flags;
+	u32 v;
+
+	spin_lock_irqsave(&d->lock, flags);
+	v = readl_relaxed(d->regs + ADPLL_CLKCTRL_OFFSET);
+	v &= ~BIT(ADPLL_CLKCTRL_IDLE);
+	writel_relaxed(v, d->regs + ADPLL_CLKCTRL_OFFSET);
+	spin_unlock_irqrestore(&d->lock, flags);
+}
+
+static bool ti_adpll_clock_is_bypass(struct ti_adpll_data *d)
+{
+	u32 v;
+
+	v = readl_relaxed(d->regs + ADPLL_STATUS_OFFSET);
+
+	return v & BIT(ADPLL_STATUS_BYPASS);
+}
+
+/*
+ * Locked and bypass are not actually mutually exclusive:  if you only care
+ * about the DCO clock and not CLKOUT you can clear M2PWDNZ before enabling
+ * the PLL, resulting in status (FREQLOCK | PHASELOCK | BYPASS) after lock.
+ */
+static bool ti_adpll_is_locked(struct ti_adpll_data *d)
+{
+	u32 v = readl_relaxed(d->regs + ADPLL_STATUS_OFFSET);
+
+	return (v & ADPLL_STATUS_PREPARED_MASK) == ADPLL_STATUS_PREPARED_MASK;
+}
+
+static int ti_adpll_wait_lock(struct ti_adpll_data *d)
+{
+	int retries = ADPLL_MAX_RETRIES;
+
+	do {
+		if (ti_adpll_is_locked(d))
+			return 0;
+		usleep_range(200, 300);
+	} while (retries--);
+
+	dev_err(d->dev, "pll failed to lock\n");
+	return -ETIMEDOUT;
+}
+
+static int ti_adpll_prepare(struct clk_hw *hw)
+{
+	struct ti_adpll_dco_data *dco = to_dco(hw);
+	struct ti_adpll_data *d = to_adpll(dco);
+
+	ti_adpll_clear_idle_bypass(d);
+	ti_adpll_wait_lock(d);
+
+	return 0;
+}
+
+static void ti_adpll_unprepare(struct clk_hw *hw)
+{
+	struct ti_adpll_dco_data *dco = to_dco(hw);
+	struct ti_adpll_data *d = to_adpll(dco);
+
+	ti_adpll_set_idle_bypass(d);
+}
+
+static int ti_adpll_is_prepared(struct clk_hw *hw)
+{
+	struct ti_adpll_dco_data *dco = to_dco(hw);
+	struct ti_adpll_data *d = to_adpll(dco);
+
+	return ti_adpll_is_locked(d);
+}
+
+/*
+ * Note that the DCO clock is never subject to bypass: if the PLL is off,
+ * dcoclk is low.
+ */
+static unsigned long ti_adpll_recalc_rate(struct clk_hw *hw,
+					  unsigned long parent_rate)
+{
+	struct ti_adpll_dco_data *dco = to_dco(hw);
+	struct ti_adpll_data *d = to_adpll(dco);
+	u32 frac_m, divider, v;
+	u64 rate;
+	unsigned long flags;
+
+	if (ti_adpll_clock_is_bypass(d))
+		return 0;
+
+	spin_lock_irqsave(&d->lock, flags);
+	frac_m = readl_relaxed(d->regs + ADPLL_FRACDIV_OFFSET);
+	frac_m &= ADPLL_FRACDIV_FRACTIONALM_MASK;
+	rate = (u64)readw_relaxed(d->regs + ADPLL_MN2DIV_OFFSET) << 18;
+	rate += frac_m;
+	rate *= parent_rate;
+	divider = (readw_relaxed(d->regs + ADPLL_M2NDIV_OFFSET) + 1) << 18;
+	spin_unlock_irqrestore(&d->lock, flags);
+
+	do_div(rate, divider);
+
+	if (d->c->is_type_s) {
+		v = readl_relaxed(d->regs + ADPLL_CLKCTRL_OFFSET);
+		if (v & BIT(ADPLL_CLKCTRL_REGM4XEN_ADPLL_S))
+			rate *= 4;
+		rate *= 2;
+	}
+
+	return rate;
+}
+
+/* PLL parent is always clkinp, bypass only affects the children */
+static u8 ti_adpll_get_parent(struct clk_hw *hw)
+{
+	return 0;
+}
+
+static struct clk_ops ti_adpll_ops = {
+	.prepare = ti_adpll_prepare,
+	.unprepare = ti_adpll_unprepare,
+	.is_prepared = ti_adpll_is_prepared,
+	.recalc_rate = ti_adpll_recalc_rate,
+	.get_parent = ti_adpll_get_parent,
+};
+
+static int ti_adpll_init_dco(struct ti_adpll_data *d)
+{
+	struct clk_init_data init;
+	struct clk *clock;
+	const char *postfix;
+	int width, err;
+
+	d->outputs.clks = devm_kzalloc(d->dev, sizeof(struct clk *) *
+				       MAX_ADPLL_OUTPUTS,
+				       GFP_KERNEL);
+	if (!d->outputs.clks)
+		return -ENOMEM;
+
+	if (d->c->output_index < 0)
+		postfix = "dco";
+	else
+		postfix = NULL;
+
+	init.name = ti_adpll_clk_get_name(d, d->c->output_index, postfix);
+	if (!init.name)
+		return -EINVAL;
+
+	init.parent_names = d->parent_names;
+	init.num_parents = d->c->nr_max_inputs;
+	init.ops = &ti_adpll_ops;
+	init.flags = CLK_GET_RATE_NOCACHE;
+	d->dco.hw.init = &init;
+
+	if (d->c->is_type_s)
+		width = 5;
+	else
+		width = 4;
+
+	/* Internal input clock divider N2 */
+	err = ti_adpll_init_divider(d, TI_ADPLL_N2, -ENODEV, "n2",
+				    d->parent_clocks[TI_ADPLL_CLKINP],
+				    d->regs + ADPLL_MN2DIV_OFFSET,
+				    ADPLL_MN2DIV_N2, width, 0);
+	if (err)
+		return err;
+
+	clock = devm_clk_register(d->dev, &d->dco.hw);
+	if (IS_ERR(clock))
+		return PTR_ERR(clock);
+
+	return ti_adpll_setup_clock(d, clock, TI_ADPLL_DCO, d->c->output_index,
+				    init.name, NULL);
+}
+
+static int ti_adpll_clkout_enable(struct clk_hw *hw)
+{
+	struct ti_adpll_clkout_data *co = to_clkout(hw);
+	struct clk_hw *gate_hw = &co->gate.hw;
+
+	__clk_hw_set_clk(gate_hw, hw);
+
+	return clk_gate_ops.enable(gate_hw);
+}
+
+static void ti_adpll_clkout_disable(struct clk_hw *hw)
+{
+	struct ti_adpll_clkout_data *co = to_clkout(hw);
+	struct clk_hw *gate_hw = &co->gate.hw;
+
+	__clk_hw_set_clk(gate_hw, hw);
+	clk_gate_ops.disable(gate_hw);
+}
+
+static int ti_adpll_clkout_is_enabled(struct clk_hw *hw)
+{
+	struct ti_adpll_clkout_data *co = to_clkout(hw);
+	struct clk_hw *gate_hw = &co->gate.hw;
+
+	__clk_hw_set_clk(gate_hw, hw);
+
+	return clk_gate_ops.is_enabled(gate_hw);
+}
+
+/* Setting PLL bypass puts clkout and clkoutx2 into bypass */
+static u8 ti_adpll_clkout_get_parent(struct clk_hw *hw)
+{
+	struct ti_adpll_clkout_data *co = to_clkout(hw);
+	struct ti_adpll_data *d = co->adpll;
+
+	return ti_adpll_clock_is_bypass(d);
+}
+
+static int ti_adpll_init_clkout(struct ti_adpll_data *d,
+				enum ti_adpll_clocks index,
+				int output_index, int gate_bit,
+				char *name, struct clk *clk0,
+				struct clk *clk1)
+{
+	struct ti_adpll_clkout_data *co;
+	struct clk_init_data init;
+	struct clk_ops *ops;
+	const char *parent_names[2];
+	const char *child_name;
+	struct clk *clock;
+	int err;
+
+	co = devm_kzalloc(d->dev, sizeof(*co), GFP_KERNEL);
+	if (!co)
+		return -ENOMEM;
+	co->adpll = d;
+
+	err = of_property_read_string_index(d->np,
+					    "clock-output-names",
+					    output_index,
+					    &child_name);
+	if (err)
+		return err;
+
+	ops = devm_kzalloc(d->dev, sizeof(*ops), GFP_KERNEL);
+	if (!ops)
+		return -ENOMEM;
+
+	init.name = child_name;
+	init.ops = ops;
+	init.flags = CLK_IS_BASIC;
+	co->hw.init = &init;
+	parent_names[0] = __clk_get_name(clk0);
+	parent_names[1] = __clk_get_name(clk1);
+	init.parent_names = parent_names;
+	init.num_parents = 2;
+
+	ops->get_parent = ti_adpll_clkout_get_parent;
+	ops->determine_rate = __clk_mux_determine_rate;
+	if (gate_bit) {
+		co->gate.lock = &d->lock;
+		co->gate.reg = d->regs + ADPLL_CLKCTRL_OFFSET;
+		co->gate.bit_idx = gate_bit;
+		ops->enable = ti_adpll_clkout_enable;
+		ops->disable = ti_adpll_clkout_disable;
+		ops->is_enabled = ti_adpll_clkout_is_enabled;
+	}
+
+	clock = devm_clk_register(d->dev, &co->hw);
+	if (IS_ERR(clock)) {
+		dev_err(d->dev, "failed to register output %s: %li\n",
+			name, PTR_ERR(clock));
+		return PTR_ERR(clock);
+	}
+
+	return ti_adpll_setup_clock(d, clock, index, output_index, child_name,
+				    NULL);
+}
+
+static int ti_adpll_init_children_adpll_s(struct ti_adpll_data *d)
+{
+	int err;
+
+	if (!d->c->is_type_s)
+		return 0;
+
+	/* Internal mux, sources from divider N2 or clkinpulow */
+	err = ti_adpll_init_mux(d, TI_ADPLL_BYPASS, "bypass",
+				d->clocks[TI_ADPLL_N2].clk,
+				d->parent_clocks[TI_ADPLL_CLKINPULOW],
+				d->regs + ADPLL_CLKCTRL_OFFSET,
+				ADPLL_CLKCTRL_ULOWCLKEN);
+	if (err)
+		return err;
+
+	/* Internal divider M2, sources DCO */
+	err = ti_adpll_init_divider(d, TI_ADPLL_M2, -ENODEV, "m2",
+				    d->clocks[TI_ADPLL_DCO].clk,
+				    d->regs + ADPLL_M2NDIV_OFFSET,
+				    ADPLL_M2NDIV_M2,
+				    ADPLL_M2NDIV_M2_ADPLL_S_WIDTH,
+				    CLK_DIVIDER_ONE_BASED);
+	if (err)
+		return err;
+
+	/* Internal fixed divider, after M2 before clkout */
+	err = ti_adpll_init_fixed_factor(d, TI_ADPLL_DIV2, "div2",
+					 d->clocks[TI_ADPLL_M2].clk,
+					 1, 2);
+	if (err)
+		return err;
+
+	/* Output clkout with a mux and gate, sources from div2 or bypass */
+	err = ti_adpll_init_clkout(d, TI_ADPLL_CLKOUT, TI_ADPLL_S_CLKOUT,
+				   ADPLL_CLKCTRL_CLKOUTEN, "clkout",
+				   d->clocks[TI_ADPLL_DIV2].clk,
+				   d->clocks[TI_ADPLL_BYPASS].clk);
+	if (err)
+		return err;
+
+	/* Output clkoutx2 with a mux and gate, sources from M2 or bypass */
+	err = ti_adpll_init_clkout(d, TI_ADPLL_CLKOUT2, TI_ADPLL_S_CLKOUTX2, 0,
+				   "clkout2", d->clocks[TI_ADPLL_M2].clk,
+				   d->clocks[TI_ADPLL_BYPASS].clk);
+	if (err)
+		return err;
+
+	/* Internal mux, sources from DCO and clkinphif */
+	if (d->parent_clocks[TI_ADPLL_CLKINPHIF]) {
+		err = ti_adpll_init_mux(d, TI_ADPLL_HIF, "hif",
+					d->clocks[TI_ADPLL_DCO].clk,
+					d->parent_clocks[TI_ADPLL_CLKINPHIF],
+					d->regs + ADPLL_CLKCTRL_OFFSET,
+					ADPLL_CLKINPHIFSEL_ADPLL_S);
+		if (err)
+			return err;
+	}
+
+	/* Output clkouthif with a divider M3, sources from hif */
+	err = ti_adpll_init_divider(d, TI_ADPLL_M3, TI_ADPLL_S_CLKOUTHIF, "m3",
+				    d->clocks[TI_ADPLL_HIF].clk,
+				    d->regs + ADPLL_M3DIV_OFFSET,
+				    ADPLL_M3DIV_M3,
+				    ADPLL_M3DIV_M3_WIDTH,
+				    CLK_DIVIDER_ONE_BASED);
+	if (err)
+		return err;
+
+	/* Output clock dcoclkldo is the DCO */
+
+	return 0;
+}
+
+static int ti_adpll_init_children_adpll_lj(struct ti_adpll_data *d)
+{
+	int err;
+
+	if (d->c->is_type_s)
+		return 0;
+
+	/* Output clkdcoldo, gated output of DCO */
+	err = ti_adpll_init_gate(d, TI_ADPLL_DCO_GATE, TI_ADPLL_LJ_CLKDCOLDO,
+				 "clkdcoldo", d->clocks[TI_ADPLL_DCO].clk,
+				 d->regs + ADPLL_CLKCTRL_OFFSET,
+				 ADPLL_CLKCTRL_CLKDCOLDOEN, 0);
+	if (err)
+		return err;
+
+	/* Internal divider M2, sources from DCO */
+	err = ti_adpll_init_divider(d, TI_ADPLL_M2, -ENODEV,
+				    "m2", d->clocks[TI_ADPLL_DCO].clk,
+				    d->regs + ADPLL_M2NDIV_OFFSET,
+				    ADPLL_M2NDIV_M2,
+				    ADPLL_M2NDIV_M2_ADPLL_LJ_WIDTH,
+				    CLK_DIVIDER_ONE_BASED);
+	if (err)
+		return err;
+
+	/* Output clkoutldo, gated output of M2 */
+	err = ti_adpll_init_gate(d, TI_ADPLL_M2_GATE, TI_ADPLL_LJ_CLKOUTLDO,
+				 "clkoutldo", d->clocks[TI_ADPLL_M2].clk,
+				 d->regs + ADPLL_CLKCTRL_OFFSET,
+				 ADPLL_CLKCTRL_CLKOUTLDOEN_ADPLL_LJ,
+				 0);
+	if (err)
+		return err;
+
+	/* Internal mux, sources from divider N2 or clkinpulow */
+	err = ti_adpll_init_mux(d, TI_ADPLL_BYPASS, "bypass",
+				d->clocks[TI_ADPLL_N2].clk,
+				d->parent_clocks[TI_ADPLL_CLKINPULOW],
+				d->regs + ADPLL_CLKCTRL_OFFSET,
+				ADPLL_CLKCTRL_ULOWCLKEN);
+	if (err)
+		return err;
+
+	/* Output clkout, sources M2 or bypass */
+	err = ti_adpll_init_clkout(d, TI_ADPLL_CLKOUT, TI_ADPLL_S_CLKOUT,
+				   ADPLL_CLKCTRL_CLKOUTEN, "clkout",
+				   d->clocks[TI_ADPLL_M2].clk,
+				   d->clocks[TI_ADPLL_BYPASS].clk);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static void ti_adpll_free_resources(struct ti_adpll_data *d)
+{
+	int i;
+
+	for (i = TI_ADPLL_M3; i >= 0; i--) {
+		struct ti_adpll_clock *ac = &d->clocks[i];
+
+		if (!ac || IS_ERR_OR_NULL(ac->clk))
+			continue;
+		if (ac->cl)
+			clkdev_drop(ac->cl);
+		if (ac->unregister)
+			ac->unregister(ac->clk);
+	}
+}
+
+/* MPU PLL manages the lock register for all PLLs */
+static void ti_adpll_unlock_all(void __iomem *reg)
+{
+	u32 v;
+
+	v = readl_relaxed(reg);
+	if (v == ADPLL_PLLSS_MMR_LOCK_ENABLED)
+		writel_relaxed(ADPLL_PLLSS_MMR_UNLOCK_MAGIC, reg);
+}
+
+static int ti_adpll_init_registers(struct ti_adpll_data *d)
+{
+	int register_offset = 0;
+
+	if (d->c->is_type_s) {
+		register_offset = 8;
+		ti_adpll_unlock_all(d->iobase + ADPLL_PLLSS_MMR_LOCK_OFFSET);
+	}
+
+	d->regs = d->iobase + register_offset + ADPLL_PWRCTRL_OFFSET;
+
+	return 0;
+}
+
+static int ti_adpll_init_inputs(struct ti_adpll_data *d)
+{
+	const char *error = "need at least %i inputs";
+	struct clk *clock;
+	int nr_inputs;
+
+	nr_inputs = of_clk_get_parent_count(d->np);
+	if (nr_inputs < d->c->nr_max_inputs) {
+		dev_err(d->dev, error, nr_inputs);
+		return -EINVAL;
+	}
+	of_clk_parent_fill(d->np, d->parent_names, nr_inputs);
+
+	clock = devm_clk_get(d->dev, d->parent_names[0]);
+	if (IS_ERR(clock)) {
+		dev_err(d->dev, "could not get clkinp\n");
+		return PTR_ERR(clock);
+	}
+	d->parent_clocks[TI_ADPLL_CLKINP] = clock;
+
+	clock = devm_clk_get(d->dev, d->parent_names[1]);
+	if (IS_ERR(clock)) {
+		dev_err(d->dev, "could not get clkinpulow clock\n");
+		return PTR_ERR(clock);
+	}
+	d->parent_clocks[TI_ADPLL_CLKINPULOW] = clock;
+
+	if (d->c->is_type_s) {
+		clock =  devm_clk_get(d->dev, d->parent_names[2]);
+		if (IS_ERR(clock)) {
+			dev_err(d->dev, "could not get clkinphif clock\n");
+			return PTR_ERR(clock);
+		}
+		d->parent_clocks[TI_ADPLL_CLKINPHIF] = clock;
+	}
+
+	return 0;
+}
+
+static const struct ti_adpll_platform_data ti_adpll_type_s = {
+	.is_type_s = true,
+	.nr_max_inputs = MAX_ADPLL_INPUTS,
+	.nr_max_outputs = MAX_ADPLL_OUTPUTS,
+	.output_index = TI_ADPLL_S_DCOCLKLDO,
+};
+
+static const struct ti_adpll_platform_data ti_adpll_type_lj = {
+	.is_type_s = false,
+	.nr_max_inputs = MAX_ADPLL_INPUTS - 1,
+	.nr_max_outputs = MAX_ADPLL_OUTPUTS - 1,
+	.output_index = -EINVAL,
+};
+
+static const struct of_device_id ti_adpll_match[] = {
+	{ .compatible = "ti,dm814-adpll-s-clock", &ti_adpll_type_s },
+	{ .compatible = "ti,dm814-adpll-lj-clock", &ti_adpll_type_lj },
+	{},
+};
+MODULE_DEVICE_TABLE(of, ti_adpll_match);
+
+static int ti_adpll_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct device *dev = &pdev->dev;
+	const struct of_device_id *match;
+	const struct ti_adpll_platform_data *pdata;
+	struct ti_adpll_data *d;
+	struct resource *res;
+	int err;
+
+	match = of_match_device(ti_adpll_match, dev);
+	if (match)
+		pdata = match->data;
+	else
+		return -ENODEV;
+
+	d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+	d->dev = dev;
+	d->np = node;
+	d->c = pdata;
+	dev_set_drvdata(d->dev, d);
+	spin_lock_init(&d->lock);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+	d->pa = res->start;
+
+	d->iobase = devm_ioremap_resource(dev, res);
+	if (IS_ERR(d->iobase)) {
+		dev_err(dev, "could not get IO base: %li\n",
+			PTR_ERR(d->iobase));
+		return PTR_ERR(d->iobase);
+	}
+
+	err = ti_adpll_init_registers(d);
+	if (err)
+		return err;
+
+	err = ti_adpll_init_inputs(d);
+	if (err)
+		return err;
+
+	d->clocks = devm_kzalloc(d->dev, sizeof(struct ti_adpll_clock) *
+				 TI_ADPLL_NR_CLOCKS,
+				 GFP_KERNEL);
+	if (!d->clocks)
+		return -ENOMEM;
+
+	err = ti_adpll_init_dco(d);
+	if (err) {
+		dev_err(dev, "could not register dco: %i\n", err);
+		goto free;
+	}
+
+	err = ti_adpll_init_children_adpll_s(d);
+	if (err)
+		goto free;
+	err = ti_adpll_init_children_adpll_lj(d);
+	if (err)
+		goto free;
+
+	err = of_clk_add_provider(d->np, of_clk_src_onecell_get, &d->outputs);
+	if (err)
+		goto free;
+
+	return 0;
+
+free:
+	WARN_ON(1);
+	ti_adpll_free_resources(d);
+
+	return err;
+}
+
+static int ti_adpll_remove(struct platform_device *pdev)
+{
+	struct ti_adpll_data *d = dev_get_drvdata(&pdev->dev);
+
+	ti_adpll_free_resources(d);
+
+	return 0;
+}
+
+static struct platform_driver ti_adpll_driver = {
+	.driver = {
+		.name = "ti-adpll",
+		.of_match_table = ti_adpll_match,
+	},
+	.probe = ti_adpll_probe,
+	.remove = ti_adpll_remove,
+};
+
+static int __init ti_adpll_init(void)
+{
+	return platform_driver_register(&ti_adpll_driver);
+}
+core_initcall(ti_adpll_init);
+
+static void __exit ti_adpll_exit(void)
+{
+	platform_driver_unregister(&ti_adpll_driver);
+}
+module_exit(ti_adpll_exit);
+
+MODULE_DESCRIPTION("Clock driver for dm814x ADPLL");
+MODULE_ALIAS("platform:dm814-adpll-clock");
+MODULE_AUTHOR("Tony LIndgren <tony@atomide.com>");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/clk/ti/apll.c b/drivers/clk/ti/apll.c
index b336a8c..6411e13 100644
--- a/drivers/clk/ti/apll.c
+++ b/drivers/clk/ti/apll.c

@@ -140,11 +140,9 @@
 	struct dpll_data *ad = clk_hw->dpll_data;
 	struct clk *clk;
 
-	ad->clk_ref = of_clk_get(node, 0);
-	ad->clk_bypass = of_clk_get(node, 1);
-
-	if (IS_ERR(ad->clk_ref) || IS_ERR(ad->clk_bypass)) {
-		pr_debug("clk-ref or clk-bypass for %s not ready, retry\n",
+	clk = of_clk_get(node, 0);
+	if (IS_ERR(clk)) {
+		pr_debug("clk-ref for %s not ready, retry\n",
 			 node->name);
 		if (!ti_clk_retry_init(node, hw, omap_clk_register_apll))
 			return;
@@ -152,6 +150,20 @@
 		goto cleanup;
 	}
 
+	ad->clk_ref = __clk_get_hw(clk);
+
+	clk = of_clk_get(node, 1);
+	if (IS_ERR(clk)) {
+		pr_debug("clk-bypass for %s not ready, retry\n",
+			 node->name);
+		if (!ti_clk_retry_init(node, hw, omap_clk_register_apll))
+			return;
+
+		goto cleanup;
+	}
+
+	ad->clk_bypass = __clk_get_hw(clk);
+
 	clk = clk_register(NULL, &clk_hw->hw);
 	if (!IS_ERR(clk)) {
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);

diff --git a/drivers/clk/ti/clk-814x.c b/drivers/clk/ti/clk-814x.c
index 9e85fcc..52c6efc 100644
--- a/drivers/clk/ti/clk-814x.c
+++ b/drivers/clk/ti/clk-814x.c

@@ -5,8 +5,10 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/clk/ti.h>
+#include <linux/of_platform.h>
 
 #include "clock.h"
 
@@ -27,11 +29,62 @@
 	{ .node_name = NULL },
 };
 
+static bool timer_clocks_initialized;
+
+static int __init dm814x_adpll_early_init(void)
+{
+	struct device_node *np;
+
+	if (!timer_clocks_initialized)
+		return -ENODEV;
+
+	np = of_find_node_by_name(NULL, "pllss");
+	if (!np) {
+		pr_err("Could not find node for plls\n");
+		return -ENODEV;
+	}
+
+	of_platform_populate(np, NULL, NULL, NULL);
+
+	return 0;
+}
+core_initcall(dm814x_adpll_early_init);
+
+static const char * const init_clocks[] = {
+	"pll040clkout",		/* MPU 481c5040.adpll.clkout */
+	"pll290clkout",		/* DDR 481c5290.adpll.clkout */
+};
+
+static int __init dm814x_adpll_enable_init_clocks(void)
+{
+	int i, err;
+
+	if (!timer_clocks_initialized)
+		return -ENODEV;
+
+	for (i = 0; i < ARRAY_SIZE(init_clocks); i++) {
+		struct clk *clock;
+
+		clock = clk_get(NULL, init_clocks[i]);
+		if (WARN(IS_ERR(clock), "could not find init clock %s\n",
+			 init_clocks[i]))
+			continue;
+		err = clk_prepare_enable(clock);
+		if (WARN(err, "could not enable init clock %s\n",
+			 init_clocks[i]))
+			continue;
+	}
+
+	return 0;
+}
+postcore_initcall(dm814x_adpll_enable_init_clocks);
+
 int __init dm814x_dt_clk_init(void)
 {
 	ti_dt_clocks_register(dm814_clks);
 	omap2_clk_disable_autoidle_all();
 	omap2_clk_enable_init_clocks(NULL, 0);
+	timer_clocks_initialized = true;
 
 	return 0;
 }

diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
index b5bcd77..5fcf247 100644
--- a/drivers/clk/ti/clk.c
+++ b/drivers/clk/ti/clk.c

@@ -305,8 +305,8 @@
 	case TI_CLK_FIXED:
 		fixed = setup->data;
 
-		clk = clk_register_fixed_rate(NULL, setup->name, NULL,
-					      CLK_IS_ROOT, fixed->frequency);
+		clk = clk_register_fixed_rate(NULL, setup->name, NULL, 0,
+					      fixed->frequency);
 		break;
 	case TI_CLK_MUX:
 		clk = ti_clk_register_mux(setup);

diff --git a/drivers/clk/ti/clkt_dpll.c b/drivers/clk/ti/clkt_dpll.c
index b5cc6f6..032c658 100644
--- a/drivers/clk/ti/clkt_dpll.c
+++ b/drivers/clk/ti/clkt_dpll.c

@@ -254,7 +254,7 @@
 	v >>= __ffs(dd->enable_mask);
 
 	if (_omap2_dpll_is_in_bypass(v))
-		return clk_get_rate(dd->clk_bypass);
+		return clk_hw_get_rate(dd->clk_bypass);
 
 	v = ti_clk_ll_ops->clk_readl(dd->mult_div1_reg);
 	dpll_mult = v & dd->mult_mask;
@@ -262,7 +262,7 @@
 	dpll_div = v & dd->div1_mask;
 	dpll_div >>= __ffs(dd->div1_mask);
 
-	dpll_clk = (u64)clk_get_rate(dd->clk_ref) * dpll_mult;
+	dpll_clk = (u64)clk_hw_get_rate(dd->clk_ref) * dpll_mult;
 	do_div(dpll_clk, dpll_div + 1);
 
 	return dpll_clk;
@@ -301,7 +301,7 @@
 
 	dd = clk->dpll_data;
 
-	ref_rate = clk_get_rate(dd->clk_ref);
+	ref_rate = clk_hw_get_rate(dd->clk_ref);
 	clk_name = clk_hw_get_name(hw);
 	pr_debug("clock: %s: starting DPLL round_rate, target rate %lu\n",
 		 clk_name, target_rate);

diff --git a/drivers/clk/ti/clockdomain.c b/drivers/clk/ti/clockdomain.c
index b9bc3b8..6cf9dd1 100644
--- a/drivers/clk/ti/clockdomain.c
+++ b/drivers/clk/ti/clockdomain.c

@@ -109,7 +109,7 @@
 	struct clk_hw *clk_hw;
 	const char *clkdm_name = node->name;
 	int i;
-	int num_clks;
+	unsigned int num_clks;
 
 	num_clks = of_clk_get_parent_count(node);
 

diff --git a/drivers/clk/ti/composite.c b/drivers/clk/ti/composite.c
index 43345c4..1cf70f4 100644
--- a/drivers/clk/ti/composite.c
+++ b/drivers/clk/ti/composite.c

@@ -234,14 +234,14 @@
 
 static void __init of_ti_composite_clk_setup(struct device_node *node)
 {
-	int num_clks;
+	unsigned int num_clks;
 	int i;
 	struct clk_hw_omap_comp *cclk;
 
 	/* Number of component clocks to be put inside this clock */
 	num_clks = of_clk_get_parent_count(node);
 
-	if (num_clks < 1) {
+	if (!num_clks) {
 		pr_err("composite clk %s must have component(s)\n", node->name);
 		return;
 	}
@@ -271,13 +271,13 @@
 int __init ti_clk_add_component(struct device_node *node, struct clk_hw *hw,
 				int type)
 {
-	int num_parents;
+	unsigned int num_parents;
 	const char **parent_names;
 	struct component_clk *clk;
 
 	num_parents = of_clk_get_parent_count(node);
 
-	if (num_parents < 1) {
+	if (!num_parents) {
 		pr_err("component-clock %s must have parent(s)\n", node->name);
 		return -EINVAL;
 	}

diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c
index 5519b38..3bc9959 100644
--- a/drivers/clk/ti/dpll.c
+++ b/drivers/clk/ti/dpll.c

@@ -147,11 +147,9 @@
 	struct dpll_data *dd = clk_hw->dpll_data;
 	struct clk *clk;
 
-	dd->clk_ref = of_clk_get(node, 0);
-	dd->clk_bypass = of_clk_get(node, 1);
-
-	if (IS_ERR(dd->clk_ref) || IS_ERR(dd->clk_bypass)) {
-		pr_debug("clk-ref or clk-bypass missing for %s, retry later\n",
+	clk = of_clk_get(node, 0);
+	if (IS_ERR(clk)) {
+		pr_debug("clk-ref missing for %s, retry later\n",
 			 node->name);
 		if (!ti_clk_retry_init(node, hw, _register_dpll))
 			return;
@@ -159,6 +157,21 @@
 		goto cleanup;
 	}
 
+	dd->clk_ref = __clk_get_hw(clk);
+
+	clk = of_clk_get(node, 1);
+
+	if (IS_ERR(clk)) {
+		pr_debug("clk-bypass missing for %s, retry later\n",
+			 node->name);
+		if (!ti_clk_retry_init(node, hw, _register_dpll))
+			return;
+
+		goto cleanup;
+	}
+
+	dd->clk_bypass = __clk_get_hw(clk);
+
 	/* register the clock */
 	clk = clk_register(NULL, &clk_hw->hw);
 
@@ -251,8 +264,8 @@
 	dd->recal_en_bit = dpll->recal_en_bit;
 	dd->recal_st_bit = dpll->recal_st_bit;
 
-	dd->clk_ref = clk_ref;
-	dd->clk_bypass = clk_bypass;
+	dd->clk_ref = __clk_get_hw(clk_ref);
+	dd->clk_bypass = __clk_get_hw(clk_bypass);
 
 	if (dpll->flags & CLKF_CORE)
 		ops = &omap3_dpll_core_ck_ops;
@@ -361,7 +374,7 @@
 	init->ops = ops;
 
 	init->num_parents = of_clk_get_parent_count(node);
-	if (init->num_parents < 1) {
+	if (!init->num_parents) {
 		pr_err("%s must have parent(s)\n", node->name);
 		goto cleanup;
 	}

diff --git a/drivers/clk/ti/dpll3xxx.c b/drivers/clk/ti/dpll3xxx.c
index cc73929..88f2ce8 100644
--- a/drivers/clk/ti/dpll3xxx.c
+++ b/drivers/clk/ti/dpll3xxx.c

@@ -98,7 +98,7 @@
 	unsigned long fint;
 	u16 f = 0;
 
-	fint = clk_get_rate(clk->dpll_data->clk_ref) / n;
+	fint = clk_hw_get_rate(clk->dpll_data->clk_ref) / n;
 
 	pr_debug("clock: fint is %lu\n", fint);
 
@@ -460,12 +460,11 @@
 
 	parent = clk_hw_get_parent(hw);
 
-	if (clk_hw_get_rate(hw) ==
-	    clk_hw_get_rate(__clk_get_hw(dd->clk_bypass))) {
-		WARN_ON(parent != __clk_get_hw(dd->clk_bypass));
+	if (clk_hw_get_rate(hw) == clk_hw_get_rate(dd->clk_bypass)) {
+		WARN_ON(parent != dd->clk_bypass);
 		r = _omap3_noncore_dpll_bypass(clk);
 	} else {
-		WARN_ON(parent != __clk_get_hw(dd->clk_ref));
+		WARN_ON(parent != dd->clk_ref);
 		r = _omap3_noncore_dpll_lock(clk);
 	}
 
@@ -513,13 +512,13 @@
 	if (!dd)
 		return -EINVAL;
 
-	if (clk_get_rate(dd->clk_bypass) == req->rate &&
+	if (clk_hw_get_rate(dd->clk_bypass) == req->rate &&
 	    (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) {
-		req->best_parent_hw = __clk_get_hw(dd->clk_bypass);
+		req->best_parent_hw = dd->clk_bypass;
 	} else {
 		req->rate = omap2_dpll_round_rate(hw, req->rate,
 					  &req->best_parent_rate);
-		req->best_parent_hw = __clk_get_hw(dd->clk_ref);
+		req->best_parent_hw = dd->clk_ref;
 	}
 
 	req->best_parent_rate = req->rate;
@@ -577,7 +576,7 @@
 	if (!dd)
 		return -EINVAL;
 
-	if (clk_hw_get_parent(hw) != __clk_get_hw(dd->clk_ref))
+	if (clk_hw_get_parent(hw) != dd->clk_ref)
 		return -EINVAL;
 
 	if (dd->last_rounded_rate == 0)

diff --git a/drivers/clk/ti/dpll44xx.c b/drivers/clk/ti/dpll44xx.c
index 660d743..82c05b5 100644
--- a/drivers/clk/ti/dpll44xx.c
+++ b/drivers/clk/ti/dpll44xx.c

@@ -94,7 +94,7 @@
 {
 	long fint, fout;
 
-	fint = clk_get_rate(dd->clk_ref) / (dd->last_rounded_n + 1);
+	fint = clk_hw_get_rate(dd->clk_ref) / (dd->last_rounded_n + 1);
 	fout = fint * dd->last_rounded_m;
 
 	if ((fint < OMAP4_DPLL_LP_FINT_MAX) && (fout < OMAP4_DPLL_LP_FOUT_MAX))
@@ -212,13 +212,13 @@
 	if (!dd)
 		return -EINVAL;
 
-	if (clk_get_rate(dd->clk_bypass) == req->rate &&
+	if (clk_hw_get_rate(dd->clk_bypass) == req->rate &&
 	    (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) {
-		req->best_parent_hw = __clk_get_hw(dd->clk_bypass);
+		req->best_parent_hw = dd->clk_bypass;
 	} else {
 		req->rate = omap4_dpll_regm4xen_round_rate(hw, req->rate,
 						&req->best_parent_rate);
-		req->best_parent_hw = __clk_get_hw(dd->clk_ref);
+		req->best_parent_hw = dd->clk_ref;
 	}
 
 	req->best_parent_rate = req->rate;

diff --git a/drivers/clk/ti/mux.c b/drivers/clk/ti/mux.c
index 618ded9..44777ab 100644
--- a/drivers/clk/ti/mux.c
+++ b/drivers/clk/ti/mux.c

@@ -178,7 +178,7 @@
 {
 	struct clk *clk;
 	void __iomem *reg;
-	int num_parents;
+	unsigned int num_parents;
 	const char **parent_names;
 	u8 clk_mux_flags = 0;
 	u32 mask = 0;
@@ -261,7 +261,7 @@
 static void __init of_ti_composite_mux_clk_setup(struct device_node *node)
 {
 	struct clk_mux *mux;
-	int num_parents;
+	unsigned int num_parents;
 	u32 val;
 
 	mux = kzalloc(sizeof(*mux), GFP_KERNEL);

diff --git a/drivers/clk/ux500/abx500-clk.c b/drivers/clk/ux500/abx500-clk.c
index 222425d..a07c31e6 100644
--- a/drivers/clk/ux500/abx500-clk.c
+++ b/drivers/clk/ux500/abx500-clk.c

@@ -40,8 +40,7 @@
 		return ret;
 
 	/* ab8500_sysclk */
-	clk = clk_reg_prcmu_gate("ab8500_sysclk", NULL, PRCMU_SYSCLK,
-				CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("ab8500_sysclk", NULL, PRCMU_SYSCLK, 0);
 	clk_register_clkdev(clk, "sysclk", "ab8500-usb.0");
 	clk_register_clkdev(clk, "sysclk", "ab-iddet.0");
 	clk_register_clkdev(clk, "sysclk", "snd-soc-mop500.0");
@@ -68,7 +67,7 @@
 	clk = clk_reg_sysctrl_gate_fixed_rate(dev, "ulpclk", NULL,
 		AB8500_SYSULPCLKCTRL1, AB8500_SYSULPCLKCTRL1_ULPCLKREQ,
 		AB8500_SYSULPCLKCTRL1_ULPCLKREQ,
-		38400000, 9000, CLK_IS_ROOT);
+		38400000, 9000, 0);
 	clk_register_clkdev(clk, "ulpclk", "snd-soc-mop500.0");
 
 	/* ab8500_intclk */

diff --git a/drivers/clk/ux500/u8500_of_clk.c b/drivers/clk/ux500/u8500_of_clk.c
index 271c096..9a736d9 100644
--- a/drivers/clk/ux500/u8500_of_clk.c
+++ b/drivers/clk/ux500/u8500_of_clk.c

@@ -91,21 +91,21 @@
 
 	/* Clock sources */
 	clk = clk_reg_prcmu_gate("soc0_pll", NULL, PRCMU_PLLSOC0,
-				CLK_IS_ROOT|CLK_IGNORE_UNUSED);
+				CLK_IGNORE_UNUSED);
 	prcmu_clk[PRCMU_PLLSOC0] = clk;
 
 	clk = clk_reg_prcmu_gate("soc1_pll", NULL, PRCMU_PLLSOC1,
-				CLK_IS_ROOT|CLK_IGNORE_UNUSED);
+				CLK_IGNORE_UNUSED);
 	prcmu_clk[PRCMU_PLLSOC1] = clk;
 
 	clk = clk_reg_prcmu_gate("ddr_pll", NULL, PRCMU_PLLDDR,
-				CLK_IS_ROOT|CLK_IGNORE_UNUSED);
+				CLK_IGNORE_UNUSED);
 	prcmu_clk[PRCMU_PLLDDR] = clk;
 
 	/* FIXME: Add sys, ulp and int clocks here. */
 
 	rtc_clk = clk_register_fixed_rate(NULL, "rtc32k", "NULL",
-				CLK_IS_ROOT|CLK_IGNORE_UNUSED,
+				CLK_IGNORE_UNUSED,
 				32768);
 
 	/* PRCMU clocks */
@@ -126,105 +126,101 @@
 		clk = clk_reg_prcmu_gate("sgclk", sgaclk_parent,
 					PRCMU_SGACLK, 0);
 	else
-		clk = clk_reg_prcmu_gate("sgclk", NULL,
-					PRCMU_SGACLK, CLK_IS_ROOT);
+		clk = clk_reg_prcmu_gate("sgclk", NULL, PRCMU_SGACLK, 0);
 	prcmu_clk[PRCMU_SGACLK] = clk;
 
-	clk = clk_reg_prcmu_gate("uartclk", NULL, PRCMU_UARTCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("uartclk", NULL, PRCMU_UARTCLK, 0);
 	prcmu_clk[PRCMU_UARTCLK] = clk;
 
-	clk = clk_reg_prcmu_gate("msp02clk", NULL, PRCMU_MSP02CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("msp02clk", NULL, PRCMU_MSP02CLK, 0);
 	prcmu_clk[PRCMU_MSP02CLK] = clk;
 
-	clk = clk_reg_prcmu_gate("msp1clk", NULL, PRCMU_MSP1CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("msp1clk", NULL, PRCMU_MSP1CLK, 0);
 	prcmu_clk[PRCMU_MSP1CLK] = clk;
 
-	clk = clk_reg_prcmu_gate("i2cclk", NULL, PRCMU_I2CCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("i2cclk", NULL, PRCMU_I2CCLK, 0);
 	prcmu_clk[PRCMU_I2CCLK] = clk;
 
-	clk = clk_reg_prcmu_gate("slimclk", NULL, PRCMU_SLIMCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("slimclk", NULL, PRCMU_SLIMCLK, 0);
 	prcmu_clk[PRCMU_SLIMCLK] = clk;
 
-	clk = clk_reg_prcmu_gate("per1clk", NULL, PRCMU_PER1CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per1clk", NULL, PRCMU_PER1CLK, 0);
 	prcmu_clk[PRCMU_PER1CLK] = clk;
 
-	clk = clk_reg_prcmu_gate("per2clk", NULL, PRCMU_PER2CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per2clk", NULL, PRCMU_PER2CLK, 0);
 	prcmu_clk[PRCMU_PER2CLK] = clk;
 
-	clk = clk_reg_prcmu_gate("per3clk", NULL, PRCMU_PER3CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per3clk", NULL, PRCMU_PER3CLK, 0);
 	prcmu_clk[PRCMU_PER3CLK] = clk;
 
-	clk = clk_reg_prcmu_gate("per5clk", NULL, PRCMU_PER5CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per5clk", NULL, PRCMU_PER5CLK, 0);
 	prcmu_clk[PRCMU_PER5CLK] = clk;
 
-	clk = clk_reg_prcmu_gate("per6clk", NULL, PRCMU_PER6CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per6clk", NULL, PRCMU_PER6CLK, 0);
 	prcmu_clk[PRCMU_PER6CLK] = clk;
 
-	clk = clk_reg_prcmu_gate("per7clk", NULL, PRCMU_PER7CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per7clk", NULL, PRCMU_PER7CLK, 0);
 	prcmu_clk[PRCMU_PER7CLK] = clk;
 
 	clk = clk_reg_prcmu_scalable("lcdclk", NULL, PRCMU_LCDCLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				CLK_SET_RATE_GATE);
 	prcmu_clk[PRCMU_LCDCLK] = clk;
 
-	clk = clk_reg_prcmu_opp_gate("bmlclk", NULL, PRCMU_BMLCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_opp_gate("bmlclk", NULL, PRCMU_BMLCLK, 0);
 	prcmu_clk[PRCMU_BMLCLK] = clk;
 
 	clk = clk_reg_prcmu_scalable("hsitxclk", NULL, PRCMU_HSITXCLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				CLK_SET_RATE_GATE);
 	prcmu_clk[PRCMU_HSITXCLK] = clk;
 
 	clk = clk_reg_prcmu_scalable("hsirxclk", NULL, PRCMU_HSIRXCLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				CLK_SET_RATE_GATE);
 	prcmu_clk[PRCMU_HSIRXCLK] = clk;
 
 	clk = clk_reg_prcmu_scalable("hdmiclk", NULL, PRCMU_HDMICLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				CLK_SET_RATE_GATE);
 	prcmu_clk[PRCMU_HDMICLK] = clk;
 
-	clk = clk_reg_prcmu_gate("apeatclk", NULL, PRCMU_APEATCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("apeatclk", NULL, PRCMU_APEATCLK, 0);
 	prcmu_clk[PRCMU_APEATCLK] = clk;
 
 	clk = clk_reg_prcmu_scalable("apetraceclk", NULL, PRCMU_APETRACECLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				CLK_SET_RATE_GATE);
 	prcmu_clk[PRCMU_APETRACECLK] = clk;
 
-	clk = clk_reg_prcmu_gate("mcdeclk", NULL, PRCMU_MCDECLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("mcdeclk", NULL, PRCMU_MCDECLK, 0);
 	prcmu_clk[PRCMU_MCDECLK] = clk;
 
-	clk = clk_reg_prcmu_opp_gate("ipi2cclk", NULL, PRCMU_IPI2CCLK,
-				CLK_IS_ROOT);
+	clk = clk_reg_prcmu_opp_gate("ipi2cclk", NULL, PRCMU_IPI2CCLK, 0);
 	prcmu_clk[PRCMU_IPI2CCLK] = clk;
 
-	clk = clk_reg_prcmu_gate("dsialtclk", NULL, PRCMU_DSIALTCLK,
-				CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("dsialtclk", NULL, PRCMU_DSIALTCLK, 0);
 	prcmu_clk[PRCMU_DSIALTCLK] = clk;
 
-	clk = clk_reg_prcmu_gate("dmaclk", NULL, PRCMU_DMACLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("dmaclk", NULL, PRCMU_DMACLK, 0);
 	prcmu_clk[PRCMU_DMACLK] = clk;
 
-	clk = clk_reg_prcmu_gate("b2r2clk", NULL, PRCMU_B2R2CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("b2r2clk", NULL, PRCMU_B2R2CLK, 0);
 	prcmu_clk[PRCMU_B2R2CLK] = clk;
 
 	clk = clk_reg_prcmu_scalable("tvclk", NULL, PRCMU_TVCLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				CLK_SET_RATE_GATE);
 	prcmu_clk[PRCMU_TVCLK] = clk;
 
-	clk = clk_reg_prcmu_gate("sspclk", NULL, PRCMU_SSPCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("sspclk", NULL, PRCMU_SSPCLK, 0);
 	prcmu_clk[PRCMU_SSPCLK] = clk;
 
-	clk = clk_reg_prcmu_gate("rngclk", NULL, PRCMU_RNGCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("rngclk", NULL, PRCMU_RNGCLK, 0);
 	prcmu_clk[PRCMU_RNGCLK] = clk;
 
-	clk = clk_reg_prcmu_gate("uiccclk", NULL, PRCMU_UICCCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("uiccclk", NULL, PRCMU_UICCCLK, 0);
 	prcmu_clk[PRCMU_UICCCLK] = clk;
 
-	clk = clk_reg_prcmu_gate("timclk", NULL, PRCMU_TIMCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("timclk", NULL, PRCMU_TIMCLK, 0);
 	prcmu_clk[PRCMU_TIMCLK] = clk;
 
 	clk = clk_reg_prcmu_opp_volt_scalable("sdmmcclk", NULL, PRCMU_SDMMCCLK,
-					100000000,
-					CLK_IS_ROOT|CLK_SET_RATE_GATE);
+					100000000, CLK_SET_RATE_GATE);
 	prcmu_clk[PRCMU_SDMMCCLK] = clk;
 
 	clk = clk_reg_prcmu_scalable("dsi_pll", "hdmiclk",
@@ -252,7 +248,7 @@
 	prcmu_clk[PRCMU_DSI2ESCCLK] = clk;
 
 	clk = clk_reg_prcmu_scalable_rate("armss", NULL,
-				PRCMU_ARMSS, 0, CLK_IS_ROOT|CLK_IGNORE_UNUSED);
+				PRCMU_ARMSS, 0, CLK_IGNORE_UNUSED);
 	prcmu_clk[PRCMU_ARMSS] = clk;
 
 	twd_clk = clk_register_fixed_factor(NULL, "smp_twd", "armss",

diff --git a/drivers/clk/ux500/u8540_clk.c b/drivers/clk/ux500/u8540_clk.c
index d7bcb7a..86549e5 100644
--- a/drivers/clk/ux500/u8540_clk.c
+++ b/drivers/clk/ux500/u8540_clk.c

@@ -56,28 +56,28 @@
 	/* Clock sources. */
 	/* Fixed ClockGen */
 	clk = clk_reg_prcmu_gate("soc0_pll", NULL, PRCMU_PLLSOC0,
-				CLK_IS_ROOT|CLK_IGNORE_UNUSED);
+				CLK_IGNORE_UNUSED);
 	clk_register_clkdev(clk, "soc0_pll", NULL);
 
 	clk = clk_reg_prcmu_gate("soc1_pll", NULL, PRCMU_PLLSOC1,
-				CLK_IS_ROOT|CLK_IGNORE_UNUSED);
+				CLK_IGNORE_UNUSED);
 	clk_register_clkdev(clk, "soc1_pll", NULL);
 
 	clk = clk_reg_prcmu_gate("ddr_pll", NULL, PRCMU_PLLDDR,
-				CLK_IS_ROOT|CLK_IGNORE_UNUSED);
+				CLK_IGNORE_UNUSED);
 	clk_register_clkdev(clk, "ddr_pll", NULL);
 
 	clk = clk_register_fixed_rate(NULL, "rtc32k", NULL,
-				CLK_IS_ROOT|CLK_IGNORE_UNUSED,
+				CLK_IGNORE_UNUSED,
 				32768);
 	clk_register_clkdev(clk, "clk32k", NULL);
 	clk_register_clkdev(clk, "apb_pclk", "rtc-pl031");
 
 	clk = clk_register_fixed_rate(NULL, "ulp38m4", NULL,
-				CLK_IS_ROOT|CLK_IGNORE_UNUSED,
+				CLK_IGNORE_UNUSED,
 				38400000);
 
-	clk = clk_reg_prcmu_gate("uartclk", NULL, PRCMU_UARTCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("uartclk", NULL, PRCMU_UARTCLK, 0);
 	clk_register_clkdev(clk, NULL, "UART");
 
 	/* msp02clk needs a abx500 clk as parent. Handle by abx500 clk driver */
@@ -85,120 +85,116 @@
 			PRCMU_MSP02CLK, 0);
 	clk_register_clkdev(clk, NULL, "MSP02");
 
-	clk = clk_reg_prcmu_gate("msp1clk", NULL, PRCMU_MSP1CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("msp1clk", NULL, PRCMU_MSP1CLK, 0);
 	clk_register_clkdev(clk, NULL, "MSP1");
 
-	clk = clk_reg_prcmu_gate("i2cclk", NULL, PRCMU_I2CCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("i2cclk", NULL, PRCMU_I2CCLK, 0);
 	clk_register_clkdev(clk, NULL, "I2C");
 
-	clk = clk_reg_prcmu_gate("slimclk", NULL, PRCMU_SLIMCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("slimclk", NULL, PRCMU_SLIMCLK, 0);
 	clk_register_clkdev(clk, NULL, "slim");
 
-	clk = clk_reg_prcmu_gate("per1clk", NULL, PRCMU_PER1CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per1clk", NULL, PRCMU_PER1CLK, 0);
 	clk_register_clkdev(clk, NULL, "PERIPH1");
 
-	clk = clk_reg_prcmu_gate("per2clk", NULL, PRCMU_PER2CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per2clk", NULL, PRCMU_PER2CLK, 0);
 	clk_register_clkdev(clk, NULL, "PERIPH2");
 
-	clk = clk_reg_prcmu_gate("per3clk", NULL, PRCMU_PER3CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per3clk", NULL, PRCMU_PER3CLK, 0);
 	clk_register_clkdev(clk, NULL, "PERIPH3");
 
-	clk = clk_reg_prcmu_gate("per5clk", NULL, PRCMU_PER5CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per5clk", NULL, PRCMU_PER5CLK, 0);
 	clk_register_clkdev(clk, NULL, "PERIPH5");
 
-	clk = clk_reg_prcmu_gate("per6clk", NULL, PRCMU_PER6CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per6clk", NULL, PRCMU_PER6CLK, 0);
 	clk_register_clkdev(clk, NULL, "PERIPH6");
 
-	clk = clk_reg_prcmu_gate("per7clk", NULL, PRCMU_PER7CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per7clk", NULL, PRCMU_PER7CLK, 0);
 	clk_register_clkdev(clk, NULL, "PERIPH7");
 
 	clk = clk_reg_prcmu_scalable("lcdclk", NULL, PRCMU_LCDCLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				CLK_SET_RATE_GATE);
 	clk_register_clkdev(clk, NULL, "lcd");
 	clk_register_clkdev(clk, "lcd", "mcde");
 
-	clk = clk_reg_prcmu_opp_gate("bmlclk", NULL, PRCMU_BMLCLK,
-				CLK_IS_ROOT);
+	clk = clk_reg_prcmu_opp_gate("bmlclk", NULL, PRCMU_BMLCLK, 0);
 	clk_register_clkdev(clk, NULL, "bml");
 
 	clk = clk_reg_prcmu_scalable("hsitxclk", NULL, PRCMU_HSITXCLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				     CLK_SET_RATE_GATE);
 
 	clk = clk_reg_prcmu_scalable("hsirxclk", NULL, PRCMU_HSIRXCLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				     CLK_SET_RATE_GATE);
 
 	clk = clk_reg_prcmu_scalable("hdmiclk", NULL, PRCMU_HDMICLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				     CLK_SET_RATE_GATE);
 	clk_register_clkdev(clk, NULL, "hdmi");
 	clk_register_clkdev(clk, "hdmi", "mcde");
 
-	clk = clk_reg_prcmu_gate("apeatclk", NULL, PRCMU_APEATCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("apeatclk", NULL, PRCMU_APEATCLK, 0);
 	clk_register_clkdev(clk, NULL, "apeat");
 
-	clk = clk_reg_prcmu_gate("apetraceclk", NULL, PRCMU_APETRACECLK,
-				CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("apetraceclk", NULL, PRCMU_APETRACECLK, 0);
 	clk_register_clkdev(clk, NULL, "apetrace");
 
-	clk = clk_reg_prcmu_gate("mcdeclk", NULL, PRCMU_MCDECLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("mcdeclk", NULL, PRCMU_MCDECLK, 0);
 	clk_register_clkdev(clk, NULL, "mcde");
 	clk_register_clkdev(clk, "mcde", "mcde");
 	clk_register_clkdev(clk, NULL, "dsilink.0");
 	clk_register_clkdev(clk, NULL, "dsilink.1");
 	clk_register_clkdev(clk, NULL, "dsilink.2");
 
-	clk = clk_reg_prcmu_opp_gate("ipi2cclk", NULL, PRCMU_IPI2CCLK,
-				CLK_IS_ROOT);
+	clk = clk_reg_prcmu_opp_gate("ipi2cclk", NULL, PRCMU_IPI2CCLK, 0);
 	clk_register_clkdev(clk, NULL, "ipi2");
 
-	clk = clk_reg_prcmu_gate("dsialtclk", NULL, PRCMU_DSIALTCLK,
-				CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("dsialtclk", NULL, PRCMU_DSIALTCLK, 0);
 	clk_register_clkdev(clk, NULL, "dsialt");
 
-	clk = clk_reg_prcmu_gate("dmaclk", NULL, PRCMU_DMACLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("dmaclk", NULL, PRCMU_DMACLK, 0);
 	clk_register_clkdev(clk, NULL, "dma40.0");
 
-	clk = clk_reg_prcmu_gate("b2r2clk", NULL, PRCMU_B2R2CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("b2r2clk", NULL, PRCMU_B2R2CLK, 0);
 	clk_register_clkdev(clk, NULL, "b2r2");
 	clk_register_clkdev(clk, NULL, "b2r2_core");
 	clk_register_clkdev(clk, NULL, "U8500-B2R2.0");
 	clk_register_clkdev(clk, NULL, "b2r2_1_core");
 
 	clk = clk_reg_prcmu_scalable("tvclk", NULL, PRCMU_TVCLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				     CLK_SET_RATE_GATE);
 	clk_register_clkdev(clk, NULL, "tv");
 	clk_register_clkdev(clk, "tv", "mcde");
 
-	clk = clk_reg_prcmu_gate("sspclk", NULL, PRCMU_SSPCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("sspclk", NULL, PRCMU_SSPCLK, 0);
 	clk_register_clkdev(clk, NULL, "SSP");
 
-	clk = clk_reg_prcmu_gate("rngclk", NULL, PRCMU_RNGCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("rngclk", NULL, PRCMU_RNGCLK, 0);
 	clk_register_clkdev(clk, NULL, "rngclk");
 
-	clk = clk_reg_prcmu_gate("uiccclk", NULL, PRCMU_UICCCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("uiccclk", NULL, PRCMU_UICCCLK, 0);
 	clk_register_clkdev(clk, NULL, "uicc");
 
-	clk = clk_reg_prcmu_gate("timclk", NULL, PRCMU_TIMCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("timclk", NULL, PRCMU_TIMCLK, 0);
 	clk_register_clkdev(clk, NULL, "mtu0");
 	clk_register_clkdev(clk, NULL, "mtu1");
 
 	clk = clk_reg_prcmu_opp_volt_scalable("sdmmcclk", NULL,
 					PRCMU_SDMMCCLK, 100000000,
-					CLK_IS_ROOT|CLK_SET_RATE_GATE);
+					CLK_SET_RATE_GATE);
 	clk_register_clkdev(clk, NULL, "sdmmc");
 
 	clk = clk_reg_prcmu_opp_volt_scalable("sdmmchclk", NULL,
 					PRCMU_SDMMCHCLK, 400000000,
-					CLK_IS_ROOT|CLK_SET_RATE_GATE);
+					CLK_SET_RATE_GATE);
 	clk_register_clkdev(clk, NULL, "sdmmchclk");
 
-	clk = clk_reg_prcmu_gate("hvaclk", NULL, PRCMU_HVACLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("hvaclk", NULL, PRCMU_HVACLK, 0);
 	clk_register_clkdev(clk, NULL, "hva");
 
-	clk = clk_reg_prcmu_gate("g1clk", NULL, PRCMU_G1CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("g1clk", NULL, PRCMU_G1CLK, 0);
 	clk_register_clkdev(clk, NULL, "g1");
 
 	clk = clk_reg_prcmu_scalable("spare1clk", NULL, PRCMU_SPARE1CLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				     CLK_SET_RATE_GATE);
 	clk_register_clkdev(clk, "dsilcd", "mcde");
 
 	clk = clk_reg_prcmu_scalable("dsi_pll", "hdmiclk",
@@ -244,7 +240,7 @@
 	clk_register_clkdev(clk, "dsilp2", "mcde");
 
 	clk = clk_reg_prcmu_scalable_rate("armss", NULL,
-				PRCMU_ARMSS, 0, CLK_IS_ROOT|CLK_IGNORE_UNUSED);
+				PRCMU_ARMSS, 0, CLK_IGNORE_UNUSED);
 	clk_register_clkdev(clk, "armss", NULL);
 
 	clk = clk_register_fixed_factor(NULL, "smp_twd", "armss",

diff --git a/drivers/clk/versatile/clk-icst.c b/drivers/clk/versatile/clk-icst.c
index 3bca438..5e9b652 100644
--- a/drivers/clk/versatile/clk-icst.c
+++ b/drivers/clk/versatile/clk-icst.c

@@ -170,7 +170,7 @@
 
 	init.name = name;
 	init.ops = &icst_ops;
-	init.flags = CLK_IS_ROOT;
+	init.flags = 0;
 	init.parent_names = (parent_name ? &parent_name : NULL);
 	init.num_parents = (parent_name ? 1 : 0);
 	icst->map = map;

diff --git a/drivers/clk/versatile/clk-impd1.c b/drivers/clk/versatile/clk-impd1.c
index 65c842a..74c3216 100644
--- a/drivers/clk/versatile/clk-impd1.c
+++ b/drivers/clk/versatile/clk-impd1.c

@@ -98,8 +98,7 @@
 
 	/* Register the fixed rate PCLK */
 	imc->pclkname = kasprintf(GFP_KERNEL, "lm%x-pclk", id);
-	pclk = clk_register_fixed_rate(NULL, imc->pclkname, NULL,
-				      CLK_IS_ROOT, 0);
+	pclk = clk_register_fixed_rate(NULL, imc->pclkname, NULL, 0, 0);
 	imc->pclk = pclk;
 
 	imc->vco1name = kasprintf(GFP_KERNEL, "lm%x-vco1", id);

diff --git a/drivers/clk/versatile/clk-realview.c b/drivers/clk/versatile/clk-realview.c
index bd4dd24..c56efc7 100644
--- a/drivers/clk/versatile/clk-realview.c
+++ b/drivers/clk/versatile/clk-realview.c

@@ -56,12 +56,11 @@
 	struct clk *clk;
 
 	/* APB clock dummy */
-	clk = clk_register_fixed_rate(NULL, "apb_pclk", NULL, CLK_IS_ROOT, 0);
+	clk = clk_register_fixed_rate(NULL, "apb_pclk", NULL, 0, 0);
 	clk_register_clkdev(clk, "apb_pclk", NULL);
 
 	/* 24 MHz clock */
-	clk = clk_register_fixed_rate(NULL, "clk24mhz", NULL, CLK_IS_ROOT,
-				24000000);
+	clk = clk_register_fixed_rate(NULL, "clk24mhz", NULL, 0, 24000000);
 	clk_register_clkdev(clk, NULL, "dev:uart0");
 	clk_register_clkdev(clk, NULL, "dev:uart1");
 	clk_register_clkdev(clk, NULL, "dev:uart2");
@@ -81,8 +80,7 @@
 
 
 	/* 1 MHz clock */
-	clk = clk_register_fixed_rate(NULL, "clk1mhz", NULL, CLK_IS_ROOT,
-				      1000000);
+	clk = clk_register_fixed_rate(NULL, "clk1mhz", NULL, 0, 1000000);
 	clk_register_clkdev(clk, NULL, "sp804");
 
 	/* ICST VCO clock */

diff --git a/drivers/clk/versatile/clk-sp810.c b/drivers/clk/versatile/clk-sp810.c
index e78755e..1fe1e8d 100644
--- a/drivers/clk/versatile/clk-sp810.c
+++ b/drivers/clk/versatile/clk-sp810.c

@@ -92,6 +92,7 @@
 	int num = ARRAY_SIZE(parent_names);
 	char name[12];
 	struct clk_init_data init;
+	static int instance;
 	int i;
 	bool deprecated;
 
@@ -117,7 +118,7 @@
 	deprecated = !of_find_property(node, "assigned-clock-parents", NULL);
 
 	for (i = 0; i < ARRAY_SIZE(sp810->timerclken); i++) {
-		snprintf(name, ARRAY_SIZE(name), "timerclken%d", i);
+		snprintf(name, sizeof(name), "sp810_%d_%d", instance, i);
 
 		sp810->timerclken[i].sp810 = sp810;
 		sp810->timerclken[i].channel = i;
@@ -138,5 +139,6 @@
 	}
 
 	of_clk_add_provider(node, clk_sp810_timerclken_of_get, sp810);
+	instance++;
 }
 CLK_OF_DECLARE(sp810, "arm,sp810", clk_sp810_of_setup);

diff --git a/drivers/clk/versatile/clk-vexpress-osc.c b/drivers/clk/versatile/clk-vexpress-osc.c
index 89c0609..7e5add7 100644
--- a/drivers/clk/versatile/clk-vexpress-osc.c
+++ b/drivers/clk/versatile/clk-vexpress-osc.c

@@ -94,7 +94,7 @@
 		init.name = dev_name(&pdev->dev);
 
 	init.ops = &vexpress_osc_ops;
-	init.flags = CLK_IS_ROOT;
+	init.flags = 0;
 	init.num_parents = 0;
 
 	osc->hw.init = &init;

diff --git a/drivers/clk/x86/clk-lpt.c b/drivers/clk/x86/clk-lpt.c
index f827083..6b40eb8 100644
--- a/drivers/clk/x86/clk-lpt.c
+++ b/drivers/clk/x86/clk-lpt.c

@@ -10,8 +10,6 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/clk.h>
-#include <linux/clkdev.h>
 #include <linux/clk-provider.h>
 #include <linux/err.h>
 #include <linux/module.h>
@@ -30,7 +28,7 @@
 	/* LPSS free running clock */
 	drvdata->name = "lpss_clk";
 	clk = clk_register_fixed_rate(&pdev->dev, drvdata->name, NULL,
-				      CLK_IS_ROOT, 100000000);
+				      0, 100000000);
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 

diff --git a/drivers/clk/zynq/clkc.c b/drivers/clk/zynq/clkc.c
index 38a65c3..88a2cab 100644
--- a/drivers/clk/zynq/clkc.c
+++ b/drivers/clk/zynq/clkc.c

@@ -265,8 +265,7 @@
 		pr_warn("ps_clk frequency not specified, using 33 MHz.\n");
 		tmp = 33333333;
 	}
-	ps_clk = clk_register_fixed_rate(NULL, "ps_clk", NULL, CLK_IS_ROOT,
-			tmp);
+	ps_clk = clk_register_fixed_rate(NULL, "ps_clk", NULL, 0, tmp);
 
 	/* PLLs */
 	clk = clk_register_zynq_pll("armpll_int", "ps_clk", SLCR_ARMPLL_CTRL,

diff --git a/drivers/clocksource/time-pistachio.c b/drivers/clocksource/time-pistachio.c
index 3269d9e..376e59b 100644
--- a/drivers/clocksource/time-pistachio.c
+++ b/drivers/clocksource/time-pistachio.c

@@ -163,7 +163,7 @@
 
 	periph_regs = syscon_regmap_lookup_by_phandle(node, "img,cr-periph");
 	if (IS_ERR(periph_regs)) {
-		pr_err("cannot get peripheral regmap (%lu)\n",
+		pr_err("cannot get peripheral regmap (%ld)\n",
 		       PTR_ERR(periph_regs));
 		return;
 	}
@@ -176,7 +176,7 @@
 
 	sys_clk = of_clk_get_by_name(node, "sys");
 	if (IS_ERR(sys_clk)) {
-		pr_err("clock get failed (%lu)\n", PTR_ERR(sys_clk));
+		pr_err("clock get failed (%ld)\n", PTR_ERR(sys_clk));
 		return;
 	}
 

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 59a7b38..fb57121 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c

@@ -245,7 +245,7 @@
 	}
 }
 
-u32 cpu_freq_read_intel(struct acpi_pct_register *not_used)
+static u32 cpu_freq_read_intel(struct acpi_pct_register *not_used)
 {
 	u32 val, dummy;
 
@@ -253,7 +253,7 @@
 	return val;
 }
 
-void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val)
+static void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val)
 {
 	u32 lo, hi;
 
@@ -262,7 +262,7 @@
 	wrmsr(MSR_IA32_PERF_CTL, lo, hi);
 }
 
-u32 cpu_freq_read_amd(struct acpi_pct_register *not_used)
+static u32 cpu_freq_read_amd(struct acpi_pct_register *not_used)
 {
 	u32 val, dummy;
 
@@ -270,12 +270,12 @@
 	return val;
 }
 
-void cpu_freq_write_amd(struct acpi_pct_register *not_used, u32 val)
+static void cpu_freq_write_amd(struct acpi_pct_register *not_used, u32 val)
 {
 	wrmsr(MSR_AMD_PERF_CTL, val, 0);
 }
 
-u32 cpu_freq_read_io(struct acpi_pct_register *reg)
+static u32 cpu_freq_read_io(struct acpi_pct_register *reg)
 {
 	u32 val;
 
@@ -283,7 +283,7 @@
 	return val;
 }
 
-void cpu_freq_write_io(struct acpi_pct_register *reg, u32 val)
+static void cpu_freq_write_io(struct acpi_pct_register *reg, u32 val)
 {
 	acpi_os_write_port(reg->address, val, reg->bit_width);
 }
@@ -514,8 +514,10 @@
 	 */
 
 	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
+	case CPU_DOWN_FAILED:
+	case CPU_DOWN_FAILED_FROZEN:
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
 		boost_set_msrs(acpi_cpufreq_driver.boost_enabled, cpumask);
 		break;
 

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 4c78258..b87596b 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c

@@ -76,6 +76,7 @@
 /* internal prototypes */
 static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
 static unsigned int __cpufreq_get(struct cpufreq_policy *policy);
+static int cpufreq_start_governor(struct cpufreq_policy *policy);
 
 /**
  * Two notifier lists: the "policy" list is involved in the
@@ -964,10 +965,7 @@
 	cpumask_set_cpu(cpu, policy->cpus);
 
 	if (has_target()) {
-		ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
-		if (!ret)
-			ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
-
+		ret = cpufreq_start_governor(policy);
 		if (ret)
 			pr_err("%s: Failed to start governor\n", __func__);
 	}
@@ -1308,10 +1306,7 @@
 	/* Start governor again for active policy */
 	if (!policy_is_inactive(policy)) {
 		if (has_target()) {
-			ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
-			if (!ret)
-				ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
-
+			ret = cpufreq_start_governor(policy);
 			if (ret)
 				pr_err("%s: Failed to start governor\n", __func__);
 		}
@@ -1401,9 +1396,17 @@
 {
 	struct cpufreq_policy *policy;
 	unsigned int ret_freq = 0;
+	unsigned long flags;
 
-	if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get)
-		return cpufreq_driver->get(cpu);
+	read_lock_irqsave(&cpufreq_driver_lock, flags);
+
+	if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get) {
+		ret_freq = cpufreq_driver->get(cpu);
+		read_unlock_irqrestore(&cpufreq_driver_lock, flags);
+		return ret_freq;
+	}
+
+	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
 	policy = cpufreq_cpu_get(cpu);
 	if (policy) {
@@ -1484,6 +1487,24 @@
 }
 EXPORT_SYMBOL(cpufreq_get);
 
+static unsigned int cpufreq_update_current_freq(struct cpufreq_policy *policy)
+{
+	unsigned int new_freq;
+
+	new_freq = cpufreq_driver->get(policy->cpu);
+	if (!new_freq)
+		return 0;
+
+	if (!policy->cur) {
+		pr_debug("cpufreq: Driver did not initialize current freq\n");
+		policy->cur = new_freq;
+	} else if (policy->cur != new_freq && has_target()) {
+		cpufreq_out_of_sync(policy, new_freq);
+	}
+
+	return new_freq;
+}
+
 static struct subsys_interface cpufreq_interface = {
 	.name		= "cpufreq",
 	.subsys		= &cpu_subsys,
@@ -1583,9 +1604,7 @@
 				policy);
 		} else {
 			down_write(&policy->rwsem);
-			ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
-			if (!ret)
-				cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+			ret = cpufreq_start_governor(policy);
 			up_write(&policy->rwsem);
 
 			if (ret)
@@ -1593,17 +1612,6 @@
 				       __func__, policy);
 		}
 	}
-
-	/*
-	 * schedule call cpufreq_update_policy() for first-online CPU, as that
-	 * wouldn't be hotplugged-out on suspend. It will verify that the
-	 * current freq is in sync with what we believe it to be.
-	 */
-	policy = cpufreq_cpu_get_raw(cpumask_first(cpu_online_mask));
-	if (WARN_ON(!policy))
-		return;
-
-	schedule_work(&policy->update);
 }
 
 /**
@@ -1927,6 +1935,17 @@
 	return ret;
 }
 
+static int cpufreq_start_governor(struct cpufreq_policy *policy)
+{
+	int ret;
+
+	if (cpufreq_driver->get && !cpufreq_driver->setpolicy)
+		cpufreq_update_current_freq(policy);
+
+	ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
+	return ret ? ret : cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+}
+
 int cpufreq_register_governor(struct cpufreq_governor *governor)
 {
 	int err;
@@ -2063,8 +2082,10 @@
 		return cpufreq_driver->setpolicy(new_policy);
 	}
 
-	if (new_policy->governor == policy->governor)
-		goto out;
+	if (new_policy->governor == policy->governor) {
+		pr_debug("cpufreq: governor limits update\n");
+		return cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+	}
 
 	pr_debug("governor switch\n");
 
@@ -2092,10 +2113,11 @@
 	policy->governor = new_policy->governor;
 	ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT);
 	if (!ret) {
-		ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
-		if (!ret)
-			goto out;
-
+		ret = cpufreq_start_governor(policy);
+		if (!ret) {
+			pr_debug("cpufreq: governor change\n");
+			return 0;
+		}
 		cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
 	}
 
@@ -2106,14 +2128,10 @@
 		if (cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT))
 			policy->governor = NULL;
 		else
-			cpufreq_governor(policy, CPUFREQ_GOV_START);
+			cpufreq_start_governor(policy);
 	}
 
 	return ret;
-
- out:
-	pr_debug("governor: change or update limits\n");
-	return cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
 }
 
 /**
@@ -2144,19 +2162,11 @@
 	 * -> ask driver for current freq and notify governors about a change
 	 */
 	if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {
-		new_policy.cur = cpufreq_driver->get(cpu);
+		new_policy.cur = cpufreq_update_current_freq(policy);
 		if (WARN_ON(!new_policy.cur)) {
 			ret = -EIO;
 			goto unlock;
 		}
-
-		if (!policy->cur) {
-			pr_debug("Driver did not initialize current freq\n");
-			policy->cur = new_policy.cur;
-		} else {
-			if (policy->cur != new_policy.cur && has_target())
-				cpufreq_out_of_sync(policy, new_policy.cur);
-		}
 	}
 
 	ret = cpufreq_set_policy(policy, &new_policy);

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 1c25ef4..10a5cfe 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c

@@ -329,7 +329,7 @@
 	struct policy_dbs_info *policy_dbs;
 
 	policy_dbs = container_of(irq_work, struct policy_dbs_info, irq_work);
-	schedule_work(&policy_dbs->work);
+	schedule_work_on(smp_processor_id(), &policy_dbs->work);
 }
 
 static void dbs_update_util_handler(struct update_util_data *data, u64 time,

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index cb560749..4b64452 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c

@@ -134,7 +134,7 @@
 	int (*get_min)(void);
 	int (*get_turbo)(void);
 	int (*get_scaling)(void);
-	void (*set)(struct cpudata*, int pstate);
+	u64 (*get_val)(struct cpudata*, int pstate);
 	void (*get_vid)(struct cpudata *);
 	int32_t (*get_target_pstate)(struct cpudata *);
 };
@@ -565,7 +565,7 @@
 	return value & 0x7F;
 }
 
-static void atom_set_pstate(struct cpudata *cpudata, int pstate)
+static u64 atom_get_val(struct cpudata *cpudata, int pstate)
 {
 	u64 val;
 	int32_t vid_fp;
@@ -585,9 +585,7 @@
 	if (pstate > cpudata->pstate.max_pstate)
 		vid = cpudata->vid.turbo;
 
-	val |= vid;
-
-	wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
+	return val | vid;
 }
 
 static int silvermont_get_scaling(void)
@@ -711,7 +709,7 @@
 	return 100000;
 }
 
-static void core_set_pstate(struct cpudata *cpudata, int pstate)
+static u64 core_get_val(struct cpudata *cpudata, int pstate)
 {
 	u64 val;
 
@@ -719,7 +717,7 @@
 	if (limits->no_turbo && !limits->turbo_disabled)
 		val |= (u64)1 << 32;
 
-	wrmsrl(MSR_IA32_PERF_CTL, val);
+	return val;
 }
 
 static int knl_get_turbo_pstate(void)
@@ -750,7 +748,7 @@
 		.get_min = core_get_min_pstate,
 		.get_turbo = core_get_turbo_pstate,
 		.get_scaling = core_get_scaling,
-		.set = core_set_pstate,
+		.get_val = core_get_val,
 		.get_target_pstate = get_target_pstate_use_performance,
 	},
 };
@@ -769,7 +767,7 @@
 		.get_max_physical = atom_get_max_pstate,
 		.get_min = atom_get_min_pstate,
 		.get_turbo = atom_get_turbo_pstate,
-		.set = atom_set_pstate,
+		.get_val = atom_get_val,
 		.get_scaling = silvermont_get_scaling,
 		.get_vid = atom_get_vid,
 		.get_target_pstate = get_target_pstate_use_cpu_load,
@@ -790,7 +788,7 @@
 		.get_max_physical = atom_get_max_pstate,
 		.get_min = atom_get_min_pstate,
 		.get_turbo = atom_get_turbo_pstate,
-		.set = atom_set_pstate,
+		.get_val = atom_get_val,
 		.get_scaling = airmont_get_scaling,
 		.get_vid = atom_get_vid,
 		.get_target_pstate = get_target_pstate_use_cpu_load,
@@ -812,7 +810,7 @@
 		.get_min = core_get_min_pstate,
 		.get_turbo = knl_get_turbo_pstate,
 		.get_scaling = core_get_scaling,
-		.set = core_set_pstate,
+		.get_val = core_get_val,
 		.get_target_pstate = get_target_pstate_use_performance,
 	},
 };
@@ -839,25 +837,24 @@
 	*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
 }
 
-static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force)
+static inline void intel_pstate_record_pstate(struct cpudata *cpu, int pstate)
 {
-	int max_perf, min_perf;
-
-	if (force) {
-		update_turbo_state();
-
-		intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
-
-		pstate = clamp_t(int, pstate, min_perf, max_perf);
-
-		if (pstate == cpu->pstate.current_pstate)
-			return;
-	}
 	trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
-
 	cpu->pstate.current_pstate = pstate;
+}
 
-	pstate_funcs.set(cpu, pstate);
+static void intel_pstate_set_min_pstate(struct cpudata *cpu)
+{
+	int pstate = cpu->pstate.min_pstate;
+
+	intel_pstate_record_pstate(cpu, pstate);
+	/*
+	 * Generally, there is no guarantee that this code will always run on
+	 * the CPU being updated, so force the register update to run on the
+	 * right CPU.
+	 */
+	wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
+		      pstate_funcs.get_val(cpu, pstate));
 }
 
 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
@@ -870,7 +867,8 @@
 
 	if (pstate_funcs.get_vid)
 		pstate_funcs.get_vid(cpu);
-	intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
+
+	intel_pstate_set_min_pstate(cpu);
 }
 
 static inline void intel_pstate_calc_busy(struct cpudata *cpu)
@@ -997,6 +995,21 @@
 	return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy);
 }
 
+static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
+{
+	int max_perf, min_perf;
+
+	update_turbo_state();
+
+	intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
+	pstate = clamp_t(int, pstate, min_perf, max_perf);
+	if (pstate == cpu->pstate.current_pstate)
+		return;
+
+	intel_pstate_record_pstate(cpu, pstate);
+	wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
+}
+
 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 {
 	int from, target_pstate;
@@ -1006,7 +1019,7 @@
 
 	target_pstate = pstate_funcs.get_target_pstate(cpu);
 
-	intel_pstate_set_pstate(cpu, target_pstate, true);
+	intel_pstate_update_pstate(cpu, target_pstate);
 
 	sample = &cpu->sample;
 	trace_pstate_sample(fp_toint(sample->core_pct_busy),
@@ -1180,7 +1193,7 @@
 	if (hwp_active)
 		return;
 
-	intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
+	intel_pstate_set_min_pstate(cpu);
 }
 
 static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
@@ -1255,7 +1268,7 @@
 	pstate_funcs.get_min   = funcs->get_min;
 	pstate_funcs.get_turbo = funcs->get_turbo;
 	pstate_funcs.get_scaling = funcs->get_scaling;
-	pstate_funcs.set       = funcs->set;
+	pstate_funcs.get_val   = funcs->get_val;
 	pstate_funcs.get_vid   = funcs->get_vid;
 	pstate_funcs.get_target_pstate = funcs->get_target_pstate;
 

diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 50bf120..39ac78c 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c

@@ -44,7 +44,6 @@
 
 static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
 static bool rebooting, throttled, occ_reset;
-static unsigned int *core_to_chip_map;
 
 static const char * const throttle_reason[] = {
 	"No throttling",
@@ -55,6 +54,16 @@
 	"OCC Reset"
 };
 
+enum throttle_reason_type {
+	NO_THROTTLE = 0,
+	POWERCAP,
+	CPU_OVERTEMP,
+	POWER_SUPPLY_FAILURE,
+	OVERCURRENT,
+	OCC_RESET_THROTTLE,
+	OCC_MAX_REASON
+};
+
 static struct chip {
 	unsigned int id;
 	bool throttled;
@@ -62,9 +71,13 @@
 	u8 throttle_reason;
 	cpumask_t mask;
 	struct work_struct throttle;
+	int throttle_turbo;
+	int throttle_sub_turbo;
+	int reason[OCC_MAX_REASON];
 } *chips;
 
 static int nr_chips;
+static DEFINE_PER_CPU(struct chip *, chip_info);
 
 /*
  * Note: The set of pstates consists of contiguous integers, the
@@ -196,6 +209,42 @@
 	NULL,
 };
 
+#define throttle_attr(name, member)					\
+static ssize_t name##_show(struct cpufreq_policy *policy, char *buf)	\
+{									\
+	struct chip *chip = per_cpu(chip_info, policy->cpu);		\
+									\
+	return sprintf(buf, "%u\n", chip->member);			\
+}									\
+									\
+static struct freq_attr throttle_attr_##name = __ATTR_RO(name)		\
+
+throttle_attr(unthrottle, reason[NO_THROTTLE]);
+throttle_attr(powercap, reason[POWERCAP]);
+throttle_attr(overtemp, reason[CPU_OVERTEMP]);
+throttle_attr(supply_fault, reason[POWER_SUPPLY_FAILURE]);
+throttle_attr(overcurrent, reason[OVERCURRENT]);
+throttle_attr(occ_reset, reason[OCC_RESET_THROTTLE]);
+throttle_attr(turbo_stat, throttle_turbo);
+throttle_attr(sub_turbo_stat, throttle_sub_turbo);
+
+static struct attribute *throttle_attrs[] = {
+	&throttle_attr_unthrottle.attr,
+	&throttle_attr_powercap.attr,
+	&throttle_attr_overtemp.attr,
+	&throttle_attr_supply_fault.attr,
+	&throttle_attr_overcurrent.attr,
+	&throttle_attr_occ_reset.attr,
+	&throttle_attr_turbo_stat.attr,
+	&throttle_attr_sub_turbo_stat.attr,
+	NULL,
+};
+
+static const struct attribute_group throttle_attr_grp = {
+	.name	= "throttle_stats",
+	.attrs	= throttle_attrs,
+};
+
 /* Helper routines */
 
 /* Access helpers to power mgt SPR */
@@ -324,34 +373,35 @@
 
 static void powernv_cpufreq_throttle_check(void *data)
 {
+	struct chip *chip;
 	unsigned int cpu = smp_processor_id();
-	unsigned int chip_id = core_to_chip_map[cpu_core_index_of_thread(cpu)];
 	unsigned long pmsr;
-	int pmsr_pmax, i;
+	int pmsr_pmax;
 
 	pmsr = get_pmspr(SPRN_PMSR);
-
-	for (i = 0; i < nr_chips; i++)
-		if (chips[i].id == chip_id)
-			break;
+	chip = this_cpu_read(chip_info);
 
 	/* Check for Pmax Capping */
 	pmsr_pmax = (s8)PMSR_MAX(pmsr);
 	if (pmsr_pmax != powernv_pstate_info.max) {
-		if (chips[i].throttled)
+		if (chip->throttled)
 			goto next;
-		chips[i].throttled = true;
-		if (pmsr_pmax < powernv_pstate_info.nominal)
+		chip->throttled = true;
+		if (pmsr_pmax < powernv_pstate_info.nominal) {
 			pr_warn_once("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n",
-				     cpu, chips[i].id, pmsr_pmax,
+				     cpu, chip->id, pmsr_pmax,
 				     powernv_pstate_info.nominal);
-		trace_powernv_throttle(chips[i].id,
-				      throttle_reason[chips[i].throttle_reason],
+			chip->throttle_sub_turbo++;
+		} else {
+			chip->throttle_turbo++;
+		}
+		trace_powernv_throttle(chip->id,
+				      throttle_reason[chip->throttle_reason],
 				      pmsr_pmax);
-	} else if (chips[i].throttled) {
-		chips[i].throttled = false;
-		trace_powernv_throttle(chips[i].id,
-				      throttle_reason[chips[i].throttle_reason],
+	} else if (chip->throttled) {
+		chip->throttled = false;
+		trace_powernv_throttle(chip->id,
+				      throttle_reason[chip->throttle_reason],
 				      pmsr_pmax);
 	}
 
@@ -411,6 +461,21 @@
 	for (i = 0; i < threads_per_core; i++)
 		cpumask_set_cpu(base + i, policy->cpus);
 
+	if (!policy->driver_data) {
+		int ret;
+
+		ret = sysfs_create_group(&policy->kobj, &throttle_attr_grp);
+		if (ret) {
+			pr_info("Failed to create throttle stats directory for cpu %d\n",
+				policy->cpu);
+			return ret;
+		}
+		/*
+		 * policy->driver_data is used as a flag for one-time
+		 * creation of throttle sysfs files.
+		 */
+		policy->driver_data = policy;
+	}
 	return cpufreq_table_validate_and_show(policy, powernv_freqs);
 }
 
@@ -517,8 +582,10 @@
 				break;
 
 		if (omsg.throttle_status >= 0 &&
-		    omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS)
+		    omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS) {
 			chips[i].throttle_reason = omsg.throttle_status;
+			chips[i].reason[omsg.throttle_status]++;
+		}
 
 		if (!omsg.throttle_status)
 			chips[i].restore = true;
@@ -558,47 +625,34 @@
 	unsigned int chip[256];
 	unsigned int cpu, i;
 	unsigned int prev_chip_id = UINT_MAX;
-	cpumask_t cpu_mask;
-	int ret = -ENOMEM;
 
-	core_to_chip_map = kcalloc(cpu_nr_cores(), sizeof(unsigned int),
-				   GFP_KERNEL);
-	if (!core_to_chip_map)
-		goto out;
-
-	cpumask_copy(&cpu_mask, cpu_possible_mask);
-	for_each_cpu(cpu, &cpu_mask) {
+	for_each_possible_cpu(cpu) {
 		unsigned int id = cpu_to_chip_id(cpu);
 
 		if (prev_chip_id != id) {
 			prev_chip_id = id;
 			chip[nr_chips++] = id;
 		}
-		core_to_chip_map[cpu_core_index_of_thread(cpu)] = id;
-		cpumask_andnot(&cpu_mask, &cpu_mask, cpu_sibling_mask(cpu));
 	}
 
 	chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL);
 	if (!chips)
-		goto free_chip_map;
+		return -ENOMEM;
 
 	for (i = 0; i < nr_chips; i++) {
 		chips[i].id = chip[i];
 		cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i]));
 		INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn);
+		for_each_cpu(cpu, &chips[i].mask)
+			per_cpu(chip_info, cpu) =  &chips[i];
 	}
 
 	return 0;
-free_chip_map:
-	kfree(core_to_chip_map);
-out:
-	return ret;
 }
 
 static inline void clean_chip_info(void)
 {
 	kfree(chips);
-	kfree(core_to_chip_map);
 }
 
 static inline void unregister_all_notifiers(void)

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 27fc733..03d38c2 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c

@@ -196,7 +196,7 @@
  * of points is below a threshold. If it is... then use the
  * average of these 8 points as the estimated value.
  */
-static void get_typical_interval(struct menu_device *data)
+static unsigned int get_typical_interval(struct menu_device *data)
 {
 	int i, divisor;
 	unsigned int max, thresh, avg;
@@ -253,9 +253,7 @@
 	if (likely(variance <= U64_MAX/36)) {
 		if ((((u64)avg*avg > variance*36) && (divisor * 4 >= INTERVALS * 3))
 							|| variance <= 400) {
-			if (data->next_timer_us > avg)
-				data->predicted_us = avg;
-			return;
+			return avg;
 		}
 	}
 
@@ -269,7 +267,7 @@
 	 * with sporadic activity with a bunch of short pauses.
 	 */
 	if ((divisor * 4) <= INTERVALS * 3)
-		return;
+		return UINT_MAX;
 
 	thresh = max - 1;
 	goto again;
@@ -286,6 +284,7 @@
 	int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
 	int i;
 	unsigned int interactivity_req;
+	unsigned int expected_interval;
 	unsigned long nr_iowaiters, cpu_load;
 
 	if (data->needs_update) {
@@ -312,31 +311,42 @@
 					 data->correction_factor[data->bucket],
 					 RESOLUTION * DECAY);
 
-	get_typical_interval(data);
+	expected_interval = get_typical_interval(data);
+	expected_interval = min(expected_interval, data->next_timer_us);
+
+	if (CPUIDLE_DRIVER_STATE_START > 0) {
+		struct cpuidle_state *s = &drv->states[CPUIDLE_DRIVER_STATE_START];
+		unsigned int polling_threshold;
+
+		/*
+		 * We want to default to C1 (hlt), not to busy polling
+		 * unless the timer is happening really really soon, or
+		 * C1's exit latency exceeds the user configured limit.
+		 */
+		polling_threshold = max_t(unsigned int, 20, s->target_residency);
+		if (data->next_timer_us > polling_threshold &&
+		    latency_req > s->exit_latency && !s->disabled &&
+		    !dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable)
+			data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
+		else
+			data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1;
+	} else {
+		data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
+	}
 
 	/*
-	 * Performance multiplier defines a minimum predicted idle
-	 * duration / latency ratio. Adjust the latency limit if
-	 * necessary.
+	 * Use the lowest expected idle interval to pick the idle state.
+	 */
+	data->predicted_us = min(data->predicted_us, expected_interval);
+
+	/*
+	 * Use the performance multiplier and the user-configurable
+	 * latency_req to determine the maximum exit latency.
 	 */
 	interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
 	if (latency_req > interactivity_req)
 		latency_req = interactivity_req;
 
-	if (CPUIDLE_DRIVER_STATE_START > 0) {
-		data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1;
-		/*
-		 * We want to default to C1 (hlt), not to busy polling
-		 * unless the timer is happening really really soon.
-		 */
-		if (interactivity_req > 20 &&
-		    !drv->states[CPUIDLE_DRIVER_STATE_START].disabled &&
-			dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable == 0)
-			data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
-	} else {
-		data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
-	}
-
 	/*
 	 * Find the idle state with the lowest power while satisfying
 	 * our constraints.

diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index 336e5b7..4dbc187 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c

@@ -53,7 +53,7 @@
 static LIST_HEAD(ccp_units);
 
 /* Round-robin counter */
-static DEFINE_RWLOCK(ccp_rr_lock);
+static DEFINE_SPINLOCK(ccp_rr_lock);
 static struct ccp_device *ccp_rr;
 
 /* Ever-increasing value to produce unique unit numbers */
@@ -128,14 +128,14 @@
 	 */
 	read_lock_irqsave(&ccp_unit_lock, flags);
 	if (!list_empty(&ccp_units)) {
-		write_lock_irqsave(&ccp_rr_lock, flags);
+		spin_lock(&ccp_rr_lock);
 		dp = ccp_rr;
 		if (list_is_last(&ccp_rr->entry, &ccp_units))
 			ccp_rr = list_first_entry(&ccp_units, struct ccp_device,
 						  entry);
 		else
 			ccp_rr = list_next_entry(ccp_rr, entry);
-		write_unlock_irqrestore(&ccp_rr_lock, flags);
+		spin_unlock(&ccp_rr_lock);
 	}
 	read_unlock_irqrestore(&ccp_unit_lock, flags);
 

diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c
index c0656e7..80239ae 100644
--- a/drivers/crypto/marvell/cesa.c
+++ b/drivers/crypto/marvell/cesa.c

@@ -420,7 +420,7 @@
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
 	cesa->regs = devm_ioremap_resource(dev, res);
 	if (IS_ERR(cesa->regs))
-		return -ENOMEM;
+		return PTR_ERR(cesa->regs);
 
 	ret = mv_cesa_dev_dma_init(cesa);
 	if (ret)

diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h
index bd985e7..74071e4 100644
--- a/drivers/crypto/marvell/cesa.h
+++ b/drivers/crypto/marvell/cesa.h

@@ -588,6 +588,7 @@
 	struct mv_cesa_tdma_req base;
 	u8 *padding;
 	dma_addr_t padding_dma;
+	u8 *cache;
 	dma_addr_t cache_dma;
 };
 
@@ -609,7 +610,7 @@
 		struct mv_cesa_ahash_std_req std;
 	} req;
 	struct mv_cesa_op_ctx op_tmpl;
-	u8 *cache;
+	u8 cache[CESA_MAX_HASH_BLOCK_SIZE];
 	unsigned int cache_ptr;
 	u64 len;
 	int src_nents;

diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c
index 683cca9..7ca2e0f 100644
--- a/drivers/crypto/marvell/hash.c
+++ b/drivers/crypto/marvell/hash.c

@@ -45,69 +45,25 @@
 	return mv_cesa_req_dma_iter_next_op(&iter->base);
 }
 
-static inline int mv_cesa_ahash_dma_alloc_cache(struct mv_cesa_ahash_req *creq,
-						gfp_t flags)
+static inline int
+mv_cesa_ahash_dma_alloc_cache(struct mv_cesa_ahash_dma_req *req, gfp_t flags)
 {
-	struct mv_cesa_ahash_dma_req *dreq = &creq->req.dma;
-
-	creq->cache = dma_pool_alloc(cesa_dev->dma->cache_pool, flags,
-				     &dreq->cache_dma);
-	if (!creq->cache)
+	req->cache = dma_pool_alloc(cesa_dev->dma->cache_pool, flags,
+				    &req->cache_dma);
+	if (!req->cache)
 		return -ENOMEM;
 
 	return 0;
 }
 
-static inline int mv_cesa_ahash_std_alloc_cache(struct mv_cesa_ahash_req *creq,
-						gfp_t flags)
+static inline void
+mv_cesa_ahash_dma_free_cache(struct mv_cesa_ahash_dma_req *req)
 {
-	creq->cache = kzalloc(CESA_MAX_HASH_BLOCK_SIZE, flags);
-	if (!creq->cache)
-		return -ENOMEM;
-
-	return 0;
-}
-
-static int mv_cesa_ahash_alloc_cache(struct ahash_request *req)
-{
-	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
-	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-		      GFP_KERNEL : GFP_ATOMIC;
-	int ret;
-
-	if (creq->cache)
-		return 0;
-
-	if (creq->req.base.type == CESA_DMA_REQ)
-		ret = mv_cesa_ahash_dma_alloc_cache(creq, flags);
-	else
-		ret = mv_cesa_ahash_std_alloc_cache(creq, flags);
-
-	return ret;
-}
-
-static inline void mv_cesa_ahash_dma_free_cache(struct mv_cesa_ahash_req *creq)
-{
-	dma_pool_free(cesa_dev->dma->cache_pool, creq->cache,
-		      creq->req.dma.cache_dma);
-}
-
-static inline void mv_cesa_ahash_std_free_cache(struct mv_cesa_ahash_req *creq)
-{
-	kfree(creq->cache);
-}
-
-static void mv_cesa_ahash_free_cache(struct mv_cesa_ahash_req *creq)
-{
-	if (!creq->cache)
+	if (!req->cache)
 		return;
 
-	if (creq->req.base.type == CESA_DMA_REQ)
-		mv_cesa_ahash_dma_free_cache(creq);
-	else
-		mv_cesa_ahash_std_free_cache(creq);
-
-	creq->cache = NULL;
+	dma_pool_free(cesa_dev->dma->cache_pool, req->cache,
+		      req->cache_dma);
 }
 
 static int mv_cesa_ahash_dma_alloc_padding(struct mv_cesa_ahash_dma_req *req,
@@ -146,6 +102,7 @@
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 
 	dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE);
+	mv_cesa_ahash_dma_free_cache(&creq->req.dma);
 	mv_cesa_dma_cleanup(&creq->req.dma.base);
 }
 
@@ -161,8 +118,6 @@
 {
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 
-	mv_cesa_ahash_free_cache(creq);
-
 	if (creq->req.base.type == CESA_DMA_REQ)
 		mv_cesa_ahash_dma_last_cleanup(req);
 }
@@ -445,14 +400,6 @@
 static int mv_cesa_ahash_cache_req(struct ahash_request *req, bool *cached)
 {
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
-	int ret;
-
-	if (((creq->cache_ptr + req->nbytes) & CESA_HASH_BLOCK_SIZE_MSK) &&
-	    !creq->last_req) {
-		ret = mv_cesa_ahash_alloc_cache(req);
-		if (ret)
-			return ret;
-	}
 
 	if (creq->cache_ptr + req->nbytes < 64 && !creq->last_req) {
 		*cached = true;
@@ -505,10 +452,17 @@
 			    gfp_t flags)
 {
 	struct mv_cesa_ahash_dma_req *ahashdreq = &creq->req.dma;
+	int ret;
 
 	if (!creq->cache_ptr)
 		return 0;
 
+	ret = mv_cesa_ahash_dma_alloc_cache(ahashdreq, flags);
+	if (ret)
+		return ret;
+
+	memcpy(ahashdreq->cache, creq->cache, creq->cache_ptr);
+
 	return mv_cesa_dma_add_data_transfer(chain,
 					     CESA_SA_DATA_SRAM_OFFSET,
 					     ahashdreq->cache_dma,
@@ -848,10 +802,6 @@
 	if (!cache_ptr)
 		return 0;
 
-	ret = mv_cesa_ahash_alloc_cache(req);
-	if (ret)
-		return ret;
-
 	memcpy(creq->cache, cache, cache_ptr);
 	creq->cache_ptr = cache_ptr;
 
@@ -860,9 +810,14 @@
 
 static int mv_cesa_md5_init(struct ahash_request *req)
 {
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 	struct mv_cesa_op_ctx tmpl = { };
 
 	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_MD5);
+	creq->state[0] = MD5_H0;
+	creq->state[1] = MD5_H1;
+	creq->state[2] = MD5_H2;
+	creq->state[3] = MD5_H3;
 
 	mv_cesa_ahash_init(req, &tmpl, true);
 
@@ -923,9 +878,15 @@
 
 static int mv_cesa_sha1_init(struct ahash_request *req)
 {
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 	struct mv_cesa_op_ctx tmpl = { };
 
 	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_SHA1);
+	creq->state[0] = SHA1_H0;
+	creq->state[1] = SHA1_H1;
+	creq->state[2] = SHA1_H2;
+	creq->state[3] = SHA1_H3;
+	creq->state[4] = SHA1_H4;
 
 	mv_cesa_ahash_init(req, &tmpl, false);
 
@@ -986,9 +947,18 @@
 
 static int mv_cesa_sha256_init(struct ahash_request *req)
 {
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 	struct mv_cesa_op_ctx tmpl = { };
 
 	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_SHA256);
+	creq->state[0] = SHA256_H0;
+	creq->state[1] = SHA256_H1;
+	creq->state[2] = SHA256_H2;
+	creq->state[3] = SHA256_H3;
+	creq->state[4] = SHA256_H4;
+	creq->state[5] = SHA256_H5;
+	creq->state[6] = SHA256_H6;
+	creq->state[7] = SHA256_H7;
 
 	mv_cesa_ahash_init(req, &tmpl, false);
 

diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
index 64281bb..4de78c5 100644
--- a/drivers/devfreq/Kconfig
+++ b/drivers/devfreq/Kconfig

@@ -61,7 +61,7 @@
 	  Sets the frequency at the user specified one.
 	  This governor returns the user configured frequency if there
 	  has been an input to /sys/devices/.../power/devfreq_set_freq.
-	  Otherwise, the governor does not change the frequnecy
+	  Otherwise, the governor does not change the frequency
 	  given at the initialization.
 
 comment "DEVFREQ Drivers"

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 9810d1d..4a2c07e 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c

@@ -259,6 +259,7 @@
 	struct dma_buf *dmabuf;
 	struct dma_buf_sync sync;
 	enum dma_data_direction direction;
+	int ret;
 
 	dmabuf = file->private_data;
 
@@ -285,11 +286,11 @@
 		}
 
 		if (sync.flags & DMA_BUF_SYNC_END)
-			dma_buf_end_cpu_access(dmabuf, direction);
+			ret = dma_buf_end_cpu_access(dmabuf, direction);
 		else
-			dma_buf_begin_cpu_access(dmabuf, direction);
+			ret = dma_buf_begin_cpu_access(dmabuf, direction);
 
-		return 0;
+		return ret;
 	default:
 		return -ENOTTY;
 	}
@@ -611,15 +612,19 @@
  * @dmabuf:	[in]	buffer to complete cpu access for.
  * @direction:	[in]	length of range for cpu access.
  *
- * This call must always succeed.
+ * Can return negative error values, returns 0 on success.
  */
-void dma_buf_end_cpu_access(struct dma_buf *dmabuf,
-			    enum dma_data_direction direction)
+int dma_buf_end_cpu_access(struct dma_buf *dmabuf,
+			   enum dma_data_direction direction)
 {
+	int ret = 0;
+
 	WARN_ON(!dmabuf);
 
 	if (dmabuf->ops->end_cpu_access)
-		dmabuf->ops->end_cpu_access(dmabuf, direction);
+		ret = dmabuf->ops->end_cpu_access(dmabuf, direction);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(dma_buf_end_cpu_access);
 

diff --git a/drivers/dma/idma64.h b/drivers/dma/idma64.h
index dc68744..6b81687 100644
--- a/drivers/dma/idma64.h
+++ b/drivers/dma/idma64.h

@@ -16,7 +16,7 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
-#include <asm-generic/io-64-nonatomic-lo-hi.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 
 #include "virt-dma.h"
 

diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 36a7c2d..aee149b 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c

@@ -221,7 +221,7 @@
 #ifdef CONFIG_COMPAT
 static void __user *u64_to_uptr(u64 value)
 {
-	if (is_compat_task())
+	if (in_compat_syscall())
 		return compat_ptr(value);
 	else
 		return (void __user *)(unsigned long)value;
@@ -229,7 +229,7 @@
 
 static u64 uptr_to_u64(void __user *ptr)
 {
-	if (is_compat_task())
+	if (in_compat_syscall())
 		return ptr_to_compat(ptr);
 	else
 		return (u64)(unsigned long)ptr;

diff --git a/drivers/firewire/nosy.c b/drivers/firewire/nosy.c
index 8a46077..631c977 100644
--- a/drivers/firewire/nosy.c
+++ b/drivers/firewire/nosy.c

@@ -446,14 +446,16 @@
 bus_reset_irq_handler(struct pcilynx *lynx)
 {
 	struct client *client;
-	struct timeval tv;
+	struct timespec64 ts64;
+	u32    timestamp;
 
-	do_gettimeofday(&tv);
+	ktime_get_real_ts64(&ts64);
+	timestamp = ts64.tv_nsec / NSEC_PER_USEC;
 
 	spin_lock(&lynx->client_list_lock);
 
 	list_for_each_entry(client, &lynx->client_list, link)
-		packet_buffer_put(&client->buffer, &tv.tv_usec, 4);
+		packet_buffer_put(&client->buffer, &timestamp, 4);
 
 	spin_unlock(&lynx->client_list_lock);
 }

diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
index b23a271..096adcb 100644
--- a/drivers/firmware/efi/efivars.c
+++ b/drivers/firmware/efi/efivars.c

@@ -231,7 +231,7 @@
 
 static inline bool is_compat(void)
 {
-	if (IS_ENABLED(CONFIG_COMPAT) && is_compat_task())
+	if (IS_ENABLED(CONFIG_COMPAT) && in_compat_syscall())
 		return true;
 
 	return false;

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index a15841e..da99bbb 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile

@@ -25,6 +25,9 @@
 UBSAN_SANITIZE			:= n
 OBJECT_FILES_NON_STANDARD	:= y
 
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT			:= n
+
 lib-y				:= efi-stub-helper.o
 
 # include the stub's generic dependencies from lib/ when building for ARM/arm64

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index fedbff5..815c4a5 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c

@@ -77,12 +77,28 @@
 static inline void fw_cfg_read_blob(u16 key,
 				    void *buf, loff_t pos, size_t count)
 {
+	u32 glk;
+	acpi_status status;
+
+	/* If we have ACPI, ensure mutual exclusion against any potential
+	 * device access by the firmware, e.g. via AML methods:
+	 */
+	status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
+	if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
+		/* Should never get here */
+		WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
+		memset(buf, 0, count);
+		return;
+	}
+
 	mutex_lock(&fw_cfg_dev_lock);
 	iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
 	while (pos-- > 0)
 		ioread8(fw_cfg_reg_data);
 	ioread8_rep(fw_cfg_reg_data, buf, count);
 	mutex_unlock(&fw_cfg_dev_lock);
+
+	acpi_release_global_lock(glk);
 }
 
 /* clean up fw_cfg device i/o */
@@ -727,12 +743,18 @@
 
 static int __init fw_cfg_sysfs_init(void)
 {
+	int ret;
+
 	/* create /sys/firmware/qemu_fw_cfg/ top level directory */
 	fw_cfg_top_ko = kobject_create_and_add("qemu_fw_cfg", firmware_kobj);
 	if (!fw_cfg_top_ko)
 		return -ENOMEM;
 
-	return platform_driver_register(&fw_cfg_sysfs_driver);
+	ret = platform_driver_register(&fw_cfg_sysfs_driver);
+	if (ret)
+		fw_cfg_kobj_cleanup(fw_cfg_top_ko);
+
+	return ret;
 }
 
 static void __exit fw_cfg_sysfs_exit(void)

diff --git a/drivers/gpio/gpio-menz127.c b/drivers/gpio/gpio-menz127.c
index a68e199..c5c9599 100644
--- a/drivers/gpio/gpio-menz127.c
+++ b/drivers/gpio/gpio-menz127.c

@@ -37,7 +37,6 @@
 	void __iomem *reg_base;
 	struct mcb_device *mdev;
 	struct resource *mem;
-	spinlock_t lock;
 };
 
 static int men_z127_debounce(struct gpio_chip *gc, unsigned gpio,
@@ -69,7 +68,7 @@
 		debounce /= 50;
 	}
 
-	spin_lock(&priv->lock);
+	spin_lock(&gc->bgpio_lock);
 
 	db_en = readl(priv->reg_base + MEN_Z127_DBER);
 
@@ -84,7 +83,7 @@
 	writel(db_en, priv->reg_base + MEN_Z127_DBER);
 	writel(db_cnt, priv->reg_base + GPIO_TO_DBCNT_REG(gpio));
 
-	spin_unlock(&priv->lock);
+	spin_unlock(&gc->bgpio_lock);
 
 	return 0;
 }
@@ -97,7 +96,7 @@
 	if (gpio_pin >= gc->ngpio)
 		return -EINVAL;
 
-	spin_lock(&priv->lock);
+	spin_lock(&gc->bgpio_lock);
 	od_en = readl(priv->reg_base + MEN_Z127_ODER);
 
 	if (gpiochip_line_is_open_drain(gc, gpio_pin))
@@ -106,7 +105,7 @@
 		od_en &= ~BIT(gpio_pin);
 
 	writel(od_en, priv->reg_base + MEN_Z127_ODER);
-	spin_unlock(&priv->lock);
+	spin_unlock(&gc->bgpio_lock);
 
 	return 0;
 }

diff --git a/drivers/gpio/gpio-xgene.c b/drivers/gpio/gpio-xgene.c
index c0aa387..0dc9161 100644
--- a/drivers/gpio/gpio-xgene.c
+++ b/drivers/gpio/gpio-xgene.c

@@ -173,6 +173,11 @@
 	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		err = -EINVAL;
+		goto err;
+	}
+
 	gpio->base = devm_ioremap_nocache(&pdev->dev, res->start,
 							resource_size(res));
 	if (!gpio->base) {

diff --git a/drivers/gpu/drm/amd/acp/Kconfig b/drivers/gpu/drm/amd/acp/Kconfig
index 0f734ee..ca77ec1 100644
--- a/drivers/gpu/drm/amd/acp/Kconfig
+++ b/drivers/gpu/drm/amd/acp/Kconfig

@@ -1,10 +1,14 @@
-menu "ACP Configuration"
+menu "ACP (Audio CoProcessor) Configuration"
 
 config DRM_AMD_ACP
-       bool "Enable ACP IP support"
+       bool "Enable AMD Audio CoProcessor IP support"
        select MFD_CORE
        select PM_GENERIC_DOMAINS if PM
        help
 	Choose this option to enable ACP IP support for AMD SOCs.
+	This adds the ACP (Audio CoProcessor) IP driver and wires
+	it up into the amdgpu driver.  The ACP block provides the DMA
+	engine for the i2s-based ALSA driver. It is required for audio
+	on APUs which utilize an i2s codec.
 
 endmenu

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c4a21c6..62a7780 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

@@ -1591,6 +1591,7 @@
 	struct amdgpu_bo	*vcpu_bo;
 	void			*cpu_addr;
 	uint64_t		gpu_addr;
+	void			*saved_bo;
 	atomic_t		handles[AMDGPU_MAX_UVD_HANDLES];
 	struct drm_file		*filp[AMDGPU_MAX_UVD_HANDLES];
 	struct delayed_work	idle_work;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 7a4b101..6043dc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c

@@ -816,10 +816,13 @@
 	struct drm_device *ddev = adev->ddev;
 	struct drm_crtc *crtc;
 	uint32_t line_time_us, vblank_lines;
+	struct cgs_mode_info *mode_info;
 
 	if (info == NULL)
 		return -EINVAL;
 
+	mode_info = info->mode_info;
+
 	if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
 		list_for_each_entry(crtc,
 				&ddev->mode_config.crtc_list, head) {
@@ -828,7 +831,7 @@
 				info->active_display_mask |= (1 << amdgpu_crtc->crtc_id);
 				info->display_count++;
 			}
-			if (info->mode_info != NULL &&
+			if (mode_info != NULL &&
 				crtc->enabled && amdgpu_crtc->enabled &&
 				amdgpu_crtc->hw_mode.clock) {
 				line_time_us = (amdgpu_crtc->hw_mode.crtc_htotal * 1000) /
@@ -836,10 +839,10 @@
 				vblank_lines = amdgpu_crtc->hw_mode.crtc_vblank_end -
 							amdgpu_crtc->hw_mode.crtc_vdisplay +
 							(amdgpu_crtc->v_border * 2);
-				info->mode_info->vblank_time_us = vblank_lines * line_time_us;
-				info->mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode);
-				info->mode_info->ref_clock = adev->clock.spll.reference_freq;
-				info->mode_info++;
+				mode_info->vblank_time_us = vblank_lines * line_time_us;
+				mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode);
+				mode_info->ref_clock = adev->clock.spll.reference_freq;
+				mode_info = NULL;
 			}
 		}
 	}
@@ -847,6 +850,16 @@
 	return 0;
 }
 
+
+static int amdgpu_cgs_notify_dpm_enabled(void *cgs_device, bool enabled)
+{
+	CGS_FUNC_ADEV;
+
+	adev->pm.dpm_enabled = enabled;
+
+	return 0;
+}
+
 /** \brief evaluate acpi namespace object, handle or pathname must be valid
  *  \param cgs_device
  *  \param info input/output arguments for the control method
@@ -1097,6 +1110,7 @@
 	amdgpu_cgs_set_powergating_state,
 	amdgpu_cgs_set_clockgating_state,
 	amdgpu_cgs_get_active_displays_info,
+	amdgpu_cgs_notify_dpm_enabled,
 	amdgpu_cgs_call_acpi_method,
 	amdgpu_cgs_query_system_info,
 };

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index f0ed974..3fb405b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c

@@ -57,7 +57,7 @@
 	if (!fence_add_callback(fence, &work->cb, amdgpu_flip_callback))
 		return true;
 
-	fence_put(*f);
+	fence_put(fence);
 	return false;
 }
 

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 4303b44..d81f1f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c

@@ -121,7 +121,7 @@
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_fence *fence;
-	struct fence **ptr;
+	struct fence *old, **ptr;
 	uint32_t seq;
 
 	fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
@@ -141,7 +141,11 @@
 	/* This function can't be called concurrently anyway, otherwise
 	 * emitting the fence would mess up the hardware ring buffer.
 	 */
-	BUG_ON(rcu_dereference_protected(*ptr, 1));
+	old = rcu_dereference_protected(*ptr, 1);
+	if (old && !fence_is_signaled(old)) {
+		DRM_INFO("rcu slot is busy\n");
+		fence_wait(old, false);
+	}
 
 	rcu_assign_pointer(*ptr, fence_get(&fence->base));
 

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index f594cfa..762cfdb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c

@@ -219,6 +219,8 @@
 	if (r) {
 		return r;
 	}
+	adev->ddev->vblank_disable_allowed = true;
+
 	/* enable msi */
 	adev->irq.msi_enabled = false;
 

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 7805a87..598eb0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

@@ -382,6 +382,7 @@
 		struct drm_amdgpu_info_vram_gtt vram_gtt;
 
 		vram_gtt.vram_size = adev->mc.real_vram_size;
+		vram_gtt.vram_size -= adev->vram_pin_size;
 		vram_gtt.vram_cpu_accessible_size = adev->mc.visible_vram_size;
 		vram_gtt.vram_cpu_accessible_size -= adev->vram_pin_size;
 		vram_gtt.gtt_size  = adev->mc.gtt_size;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index d7ec9bd..9f4a45c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c

@@ -48,7 +48,8 @@
 	/* protected by adev->mn_lock */
 	struct hlist_node	node;
 
-	/* objects protected by mm->mmap_sem */
+	/* objects protected by lock */
+	struct mutex		lock;
 	struct rb_root		objects;
 };
 
@@ -72,7 +73,7 @@
 	struct amdgpu_bo *bo, *next_bo;
 
 	mutex_lock(&adev->mn_lock);
-	down_write(&rmn->mm->mmap_sem);
+	mutex_lock(&rmn->lock);
 	hash_del(&rmn->node);
 	rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects,
 					     it.rb) {
@@ -82,7 +83,7 @@
 		}
 		kfree(node);
 	}
-	up_write(&rmn->mm->mmap_sem);
+	mutex_unlock(&rmn->lock);
 	mutex_unlock(&adev->mn_lock);
 	mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm);
 	kfree(rmn);
@@ -105,6 +106,76 @@
 }
 
 /**
+ * amdgpu_mn_invalidate_node - unmap all BOs of a node
+ *
+ * @node: the node with the BOs to unmap
+ *
+ * We block for all BOs and unmap them by move them
+ * into system domain again.
+ */
+static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
+				      unsigned long start,
+				      unsigned long end)
+{
+	struct amdgpu_bo *bo;
+	long r;
+
+	list_for_each_entry(bo, &node->bos, mn_list) {
+
+		if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end))
+			continue;
+
+		r = amdgpu_bo_reserve(bo, true);
+		if (r) {
+			DRM_ERROR("(%ld) failed to reserve user bo\n", r);
+			continue;
+		}
+
+		r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
+			true, false, MAX_SCHEDULE_TIMEOUT);
+		if (r <= 0)
+			DRM_ERROR("(%ld) failed to wait for user bo\n", r);
+
+		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+		r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
+		if (r)
+			DRM_ERROR("(%ld) failed to validate user bo\n", r);
+
+		amdgpu_bo_unreserve(bo);
+	}
+}
+
+/**
+ * amdgpu_mn_invalidate_page - callback to notify about mm change
+ *
+ * @mn: our notifier
+ * @mn: the mm this callback is about
+ * @address: address of invalidate page
+ *
+ * Invalidation of a single page. Blocks for all BOs mapping it
+ * and unmap them by move them into system domain again.
+ */
+static void amdgpu_mn_invalidate_page(struct mmu_notifier *mn,
+				      struct mm_struct *mm,
+				      unsigned long address)
+{
+	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
+	struct interval_tree_node *it;
+
+	mutex_lock(&rmn->lock);
+
+	it = interval_tree_iter_first(&rmn->objects, address, address);
+	if (it) {
+		struct amdgpu_mn_node *node;
+
+		node = container_of(it, struct amdgpu_mn_node, it);
+		amdgpu_mn_invalidate_node(node, address, address);
+	}
+
+	mutex_unlock(&rmn->lock);
+}
+
+/**
  * amdgpu_mn_invalidate_range_start - callback to notify about mm change
  *
  * @mn: our notifier
@@ -126,44 +197,24 @@
 	/* notification is exclusive, but interval is inclusive */
 	end -= 1;
 
+	mutex_lock(&rmn->lock);
+
 	it = interval_tree_iter_first(&rmn->objects, start, end);
 	while (it) {
 		struct amdgpu_mn_node *node;
-		struct amdgpu_bo *bo;
-		long r;
 
 		node = container_of(it, struct amdgpu_mn_node, it);
 		it = interval_tree_iter_next(it, start, end);
 
-		list_for_each_entry(bo, &node->bos, mn_list) {
-
-			if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start,
-							  end))
-				continue;
-
-			r = amdgpu_bo_reserve(bo, true);
-			if (r) {
-				DRM_ERROR("(%ld) failed to reserve user bo\n", r);
-				continue;
-			}
-
-			r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
-				true, false, MAX_SCHEDULE_TIMEOUT);
-			if (r <= 0)
-				DRM_ERROR("(%ld) failed to wait for user bo\n", r);
-
-			amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
-			r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
-			if (r)
-				DRM_ERROR("(%ld) failed to validate user bo\n", r);
-
-			amdgpu_bo_unreserve(bo);
-		}
+		amdgpu_mn_invalidate_node(node, start, end);
 	}
+
+	mutex_unlock(&rmn->lock);
 }
 
 static const struct mmu_notifier_ops amdgpu_mn_ops = {
 	.release = amdgpu_mn_release,
+	.invalidate_page = amdgpu_mn_invalidate_page,
 	.invalidate_range_start = amdgpu_mn_invalidate_range_start,
 };
 
@@ -196,6 +247,7 @@
 	rmn->adev = adev;
 	rmn->mm = mm;
 	rmn->mn.ops = &amdgpu_mn_ops;
+	mutex_init(&rmn->lock);
 	rmn->objects = RB_ROOT;
 
 	r = __mmu_notifier_register(&rmn->mn, mm);
@@ -242,7 +294,7 @@
 
 	INIT_LIST_HEAD(&bos);
 
-	down_write(&rmn->mm->mmap_sem);
+	mutex_lock(&rmn->lock);
 
 	while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) {
 		kfree(node);
@@ -256,7 +308,7 @@
 	if (!node) {
 		node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
 		if (!node) {
-			up_write(&rmn->mm->mmap_sem);
+			mutex_unlock(&rmn->lock);
 			return -ENOMEM;
 		}
 	}
@@ -271,7 +323,7 @@
 
 	interval_tree_insert(&node->it, &rmn->objects);
 
-	up_write(&rmn->mm->mmap_sem);
+	mutex_unlock(&rmn->lock);
 
 	return 0;
 }
@@ -297,7 +349,7 @@
 		return;
 	}
 
-	down_write(&rmn->mm->mmap_sem);
+	mutex_lock(&rmn->lock);
 
 	/* save the next list entry for later */
 	head = bo->mn_list.next;
@@ -312,6 +364,6 @@
 		kfree(node);
 	}
 
-	up_write(&rmn->mm->mmap_sem);
+	mutex_unlock(&rmn->lock);
 	mutex_unlock(&adev->mn_lock);
 }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 151a2d4..5b6639f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

@@ -476,6 +476,17 @@
 	return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM);
 }
 
+static const char *amdgpu_vram_names[] = {
+	"UNKNOWN",
+	"GDDR1",
+	"DDR2",
+	"GDDR3",
+	"GDDR4",
+	"GDDR5",
+	"HBM",
+	"DDR3"
+};
+
 int amdgpu_bo_init(struct amdgpu_device *adev)
 {
 	/* Add an MTRR for the VRAM */
@@ -484,8 +495,8 @@
 	DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
 		adev->mc.mc_vram_size >> 20,
 		(unsigned long long)adev->mc.aper_size >> 20);
-	DRM_INFO("RAM width %dbits DDR\n",
-			adev->mc.vram_width);
+	DRM_INFO("RAM width %dbits %s\n",
+		 adev->mc.vram_width, amdgpu_vram_names[adev->mc.vram_type]);
 	return amdgpu_ttm_init(adev);
 }
 
@@ -608,6 +619,10 @@
 	if ((offset + size) <= adev->mc.visible_vram_size)
 		return 0;
 
+	/* Can't move a pinned BO to visible VRAM */
+	if (abo->pin_count > 0)
+		return -EINVAL;
+
 	/* hurrah the memory is not visible ! */
 	amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM);
 	lpfn =	adev->mc.visible_vram_size >> PAGE_SHIFT;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
index 3cb6d6c..e9c6ae6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c

@@ -143,7 +143,7 @@
 					adev->powerplay.pp_handle);
 
 #ifdef CONFIG_DRM_AMD_POWERPLAY
-	if (adev->pp_enabled) {
+	if (adev->pp_enabled && adev->pm.dpm_enabled) {
 		amdgpu_pm_sysfs_init(adev);
 		amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_COMPLETE_INIT, NULL, NULL);
 	}
@@ -161,12 +161,8 @@
 					adev->powerplay.pp_handle);
 
 #ifdef CONFIG_DRM_AMD_POWERPLAY
-	if (adev->pp_enabled) {
-		if (amdgpu_dpm == 0)
-			adev->pm.dpm_enabled = false;
-		else
-			adev->pm.dpm_enabled = true;
-	}
+	if (adev->pp_enabled)
+		adev->pm.dpm_enabled = true;
 #endif
 
 	return ret;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index ab34190..6f3369d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

@@ -384,9 +384,15 @@
 			struct ttm_mem_reg *new_mem)
 {
 	struct amdgpu_device *adev;
+	struct amdgpu_bo *abo;
 	struct ttm_mem_reg *old_mem = &bo->mem;
 	int r;
 
+	/* Can't move a pinned BO */
+	abo = container_of(bo, struct amdgpu_bo, tbo);
+	if (WARN_ON_ONCE(abo->pin_count > 0))
+		return -EINVAL;
+
 	adev = amdgpu_get_adev(bo->bdev);
 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
 		amdgpu_move_null(bo, new_mem);
@@ -616,7 +622,7 @@
 			set_page_dirty(page);
 
 		mark_page_accessed(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	sg_free_table(ttm->sg);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index c1a5810..338da80 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c

@@ -241,32 +241,28 @@
 
 int amdgpu_uvd_suspend(struct amdgpu_device *adev)
 {
-	struct amdgpu_ring *ring = &adev->uvd.ring;
-	int i, r;
+	unsigned size;
+	void *ptr;
+	int i;
 
 	if (adev->uvd.vcpu_bo == NULL)
 		return 0;
 
-	for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
-		uint32_t handle = atomic_read(&adev->uvd.handles[i]);
-		if (handle != 0) {
-			struct fence *fence;
+	for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i)
+		if (atomic_read(&adev->uvd.handles[i]))
+			break;
 
-			amdgpu_uvd_note_usage(adev);
+	if (i == AMDGPU_MAX_UVD_HANDLES)
+		return 0;
 
-			r = amdgpu_uvd_get_destroy_msg(ring, handle, false, &fence);
-			if (r) {
-				DRM_ERROR("Error destroying UVD (%d)!\n", r);
-				continue;
-			}
+	size = amdgpu_bo_size(adev->uvd.vcpu_bo);
+	ptr = adev->uvd.cpu_addr;
 
-			fence_wait(fence, false);
-			fence_put(fence);
+	adev->uvd.saved_bo = kmalloc(size, GFP_KERNEL);
+	if (!adev->uvd.saved_bo)
+		return -ENOMEM;
 
-			adev->uvd.filp[i] = NULL;
-			atomic_set(&adev->uvd.handles[i], 0);
-		}
-	}
+	memcpy(adev->uvd.saved_bo, ptr, size);
 
 	return 0;
 }
@@ -275,23 +271,29 @@
 {
 	unsigned size;
 	void *ptr;
-	const struct common_firmware_header *hdr;
-	unsigned offset;
 
 	if (adev->uvd.vcpu_bo == NULL)
 		return -EINVAL;
 
-	hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
-	offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
-	memcpy(adev->uvd.cpu_addr, (adev->uvd.fw->data) + offset,
-		(adev->uvd.fw->size) - offset);
-
 	size = amdgpu_bo_size(adev->uvd.vcpu_bo);
-	size -= le32_to_cpu(hdr->ucode_size_bytes);
 	ptr = adev->uvd.cpu_addr;
-	ptr += le32_to_cpu(hdr->ucode_size_bytes);
 
-	memset(ptr, 0, size);
+	if (adev->uvd.saved_bo != NULL) {
+		memcpy(ptr, adev->uvd.saved_bo, size);
+		kfree(adev->uvd.saved_bo);
+		adev->uvd.saved_bo = NULL;
+	} else {
+		const struct common_firmware_header *hdr;
+		unsigned offset;
+
+		hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
+		offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+		memcpy(adev->uvd.cpu_addr, (adev->uvd.fw->data) + offset,
+			(adev->uvd.fw->size) - offset);
+		size -= le32_to_cpu(hdr->ucode_size_bytes);
+		ptr += le32_to_cpu(hdr->ucode_size_bytes);
+		memset(ptr, 0, size);
+	}
 
 	return 0;
 }

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 82ce7d9..05b0353 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c

@@ -903,14 +903,6 @@
 	gmc_v7_0_set_gart_funcs(adev);
 	gmc_v7_0_set_irq_funcs(adev);
 
-	if (adev->flags & AMD_IS_APU) {
-		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
-	} else {
-		u32 tmp = RREG32(mmMC_SEQ_MISC0);
-		tmp &= MC_SEQ_MISC0__MT__MASK;
-		adev->mc.vram_type = gmc_v7_0_convert_vram_type(tmp);
-	}
-
 	return 0;
 }
 
@@ -927,6 +919,14 @@
 	int dma_bits;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (adev->flags & AMD_IS_APU) {
+		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+	} else {
+		u32 tmp = RREG32(mmMC_SEQ_MISC0);
+		tmp &= MC_SEQ_MISC0__MT__MASK;
+		adev->mc.vram_type = gmc_v7_0_convert_vram_type(tmp);
+	}
+
 	r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault);
 	if (r)
 		return r;

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 29bd7b5..02deb32 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c

@@ -863,14 +863,6 @@
 	gmc_v8_0_set_gart_funcs(adev);
 	gmc_v8_0_set_irq_funcs(adev);
 
-	if (adev->flags & AMD_IS_APU) {
-		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
-	} else {
-		u32 tmp = RREG32(mmMC_SEQ_MISC0);
-		tmp &= MC_SEQ_MISC0__MT__MASK;
-		adev->mc.vram_type = gmc_v8_0_convert_vram_type(tmp);
-	}
-
 	return 0;
 }
 
@@ -881,12 +873,27 @@
 	return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
 }
 
+#define mmMC_SEQ_MISC0_FIJI 0xA71
+
 static int gmc_v8_0_sw_init(void *handle)
 {
 	int r;
 	int dma_bits;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (adev->flags & AMD_IS_APU) {
+		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+	} else {
+		u32 tmp;
+
+		if (adev->asic_type == CHIP_FIJI)
+			tmp = RREG32(mmMC_SEQ_MISC0_FIJI);
+		else
+			tmp = RREG32(mmMC_SEQ_MISC0);
+		tmp &= MC_SEQ_MISC0__MT__MASK;
+		adev->mc.vram_type = gmc_v8_0_convert_vram_type(tmp);
+	}
+
 	r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault);
 	if (r)
 		return r;

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index c606ccb..cb46375 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c

@@ -224,11 +224,11 @@
 	int r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	r = amdgpu_uvd_suspend(adev);
+	r = uvd_v4_2_hw_fini(adev);
 	if (r)
 		return r;
 
-	r = uvd_v4_2_hw_fini(adev);
+	r = amdgpu_uvd_suspend(adev);
 	if (r)
 		return r;
 

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index e3c852d..16476d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c

@@ -220,11 +220,11 @@
 	int r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	r = amdgpu_uvd_suspend(adev);
+	r = uvd_v5_0_hw_fini(adev);
 	if (r)
 		return r;
 
-	r = uvd_v5_0_hw_fini(adev);
+	r = amdgpu_uvd_suspend(adev);
 	if (r)
 		return r;
 

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 3375e614..d493791 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c

@@ -214,15 +214,16 @@
 	int r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	r = uvd_v6_0_hw_fini(adev);
+	if (r)
+		return r;
+
 	/* Skip this for APU for now */
 	if (!(adev->flags & AMD_IS_APU)) {
 		r = amdgpu_uvd_suspend(adev);
 		if (r)
 			return r;
 	}
-	r = uvd_v6_0_hw_fini(adev);
-	if (r)
-		return r;
 
 	return r;
 }

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index d2b49c0..07ac724 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

@@ -107,7 +107,7 @@
 	if (iminor(inode) != 0)
 		return -ENODEV;
 
-	is_32bit_user_mode = is_compat_task();
+	is_32bit_user_mode = in_compat_syscall();
 
 	if (is_32bit_user_mode == true) {
 		dev_warn(kfd_device,

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index a902ae0..ac00579 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c

@@ -311,7 +311,7 @@
 		goto err_process_pqm_init;
 
 	/* init process apertures*/
-	process->is_32bit_user_mode = is_compat_task();
+	process->is_32bit_user_mode = in_compat_syscall();
 	if (kfd_init_apertures(process) != 0)
 		goto err_init_apretures;
 

diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h
index aec38fc..ab84d49 100644
--- a/drivers/gpu/drm/amd/include/cgs_common.h
+++ b/drivers/gpu/drm/amd/include/cgs_common.h

@@ -589,6 +589,8 @@
 					void *cgs_device,
 					struct cgs_display_info *info);
 
+typedef int (*cgs_notify_dpm_enabled)(void *cgs_device, bool enabled);
+
 typedef int (*cgs_call_acpi_method)(void *cgs_device,
 					uint32_t acpi_method,
 					uint32_t acpi_function,
@@ -644,6 +646,8 @@
 	cgs_set_clockgating_state set_clockgating_state;
 	/* display manager */
 	cgs_get_active_displays_info get_active_displays_info;
+	/* notify dpm enabled */
+	cgs_notify_dpm_enabled notify_dpm_enabled;
 	/* ACPI */
 	cgs_call_acpi_method call_acpi_method;
 	/* get system info */
@@ -734,8 +738,12 @@
 	CGS_CALL(set_powergating_state, dev, block_type, state)
 #define cgs_set_clockgating_state(dev, block_type, state)	\
 	CGS_CALL(set_clockgating_state, dev, block_type, state)
+#define cgs_notify_dpm_enabled(dev, enabled)	\
+	CGS_CALL(notify_dpm_enabled, dev, enabled)
+
 #define cgs_get_active_displays_info(dev, info)	\
 	CGS_CALL(get_active_displays_info, dev, info)
+
 #define cgs_call_acpi_method(dev, acpi_method, acpi_function, pintput, poutput, output_count, input_size, output_size)	\
 	CGS_CALL(call_acpi_method, dev, acpi_method, acpi_function, pintput, poutput, output_count, input_size, output_size)
 #define cgs_query_system_info(dev, sys_info)	\

diff --git a/drivers/gpu/drm/amd/powerplay/Makefile b/drivers/gpu/drm/amd/powerplay/Makefile
index e195bf5..043e6eb 100644
--- a/drivers/gpu/drm/amd/powerplay/Makefile
+++ b/drivers/gpu/drm/amd/powerplay/Makefile

@@ -1,17 +1,17 @@
 
 subdir-ccflags-y += -Iinclude/drm  \
-		-Idrivers/gpu/drm/amd/powerplay/inc/  \
-		-Idrivers/gpu/drm/amd/include/asic_reg  \
-		-Idrivers/gpu/drm/amd/include  \
-		-Idrivers/gpu/drm/amd/powerplay/smumgr\
-		-Idrivers/gpu/drm/amd/powerplay/hwmgr \
-		-Idrivers/gpu/drm/amd/powerplay/eventmgr
+		-I$(FULL_AMD_PATH)/powerplay/inc/  \
+		-I$(FULL_AMD_PATH)/include/asic_reg  \
+		-I$(FULL_AMD_PATH)/include  \
+		-I$(FULL_AMD_PATH)/powerplay/smumgr\
+		-I$(FULL_AMD_PATH)/powerplay/hwmgr \
+		-I$(FULL_AMD_PATH)/powerplay/eventmgr
 
 AMD_PP_PATH = ../powerplay
 
 PP_LIBS = smumgr hwmgr eventmgr
 
-AMD_POWERPLAY = $(addsuffix /Makefile,$(addprefix drivers/gpu/drm/amd/powerplay/,$(PP_LIBS)))
+AMD_POWERPLAY = $(addsuffix /Makefile,$(addprefix $(FULL_AMD_PATH)/powerplay/,$(PP_LIBS)))
 
 include $(AMD_POWERPLAY)
 

diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c
index 6b52c78..56856a2 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c

@@ -137,14 +137,14 @@
 	reset_display_configCounter_tasks,
 	update_dal_configuration_tasks,
 	vari_bright_resume_tasks,
-	block_adjust_power_state_tasks,
 	setup_asic_tasks,
 	enable_stutter_mode_tasks, /*must do this in boot state and before SMC is started */
 	enable_dynamic_state_management_tasks,
 	enable_clock_power_gatings_tasks,
 	enable_disable_bapm_tasks,
 	initialize_thermal_controller_tasks,
-	reset_boot_state_tasks,
+	get_2d_performance_state_tasks,
+	set_performance_state_tasks,
 	adjust_power_state_tasks,
 	enable_disable_fps_tasks,
 	notify_hw_power_source_tasks,

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
index 51dedf8..89f31bc 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c

@@ -2389,6 +2389,7 @@
 
 	for(count = 0; count < table->VceLevelCount; count++) {
 		table->VceLevel[count].Frequency = mm_table->entries[count].eclk;
+		table->VceLevel[count].MinVoltage = 0;
 		table->VceLevel[count].MinVoltage |=
 				(mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT;
 		table->VceLevel[count].MinVoltage |=
@@ -2465,6 +2466,7 @@
 
 	for (count = 0; count < table->SamuLevelCount; count++) {
 		/* not sure whether we need evclk or not */
+		table->SamuLevel[count].MinVoltage = 0;
 		table->SamuLevel[count].Frequency = mm_table->entries[count].samclock;
 		table->SamuLevel[count].MinVoltage |= (mm_table->entries[count].vddc *
 				VOLTAGE_SCALE) << VDDC_SHIFT;
@@ -2562,6 +2564,7 @@
 	table->UvdBootLevel = 0;
 
 	for (count = 0; count < table->UvdLevelCount; count++) {
+		table->UvdLevel[count].MinVoltage = 0;
 		table->UvdLevel[count].VclkFrequency = mm_table->entries[count].vclk;
 		table->UvdLevel[count].DclkFrequency = mm_table->entries[count].dclk;
 		table->UvdLevel[count].MinVoltage |= (mm_table->entries[count].vddc *
@@ -2900,6 +2903,8 @@
 	if(FIJI_VOLTAGE_CONTROL_NONE != data->voltage_control)
 		fiji_populate_smc_voltage_tables(hwmgr, table);
 
+	table->SystemFlags = 0;
+
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_AutomaticDCTransition))
 		table->SystemFlags |= PPSMC_SYSTEMFLAG_GPIO_DC;
@@ -2997,6 +3002,7 @@
 	table->MemoryThermThrottleEnable = 1;
 	table->PCIeBootLinkLevel = 0;      /* 0:Gen1 1:Gen2 2:Gen3*/
 	table->PCIeGenInterval = 1;
+	table->VRConfig = 0;
 
 	result = fiji_populate_vr_config(hwmgr, table);
 	PP_ASSERT_WITH_CODE(0 == result,
@@ -5195,6 +5201,67 @@
 	return size;
 }
 
+static inline bool fiji_are_power_levels_equal(const struct fiji_performance_level *pl1,
+							   const struct fiji_performance_level *pl2)
+{
+	return ((pl1->memory_clock == pl2->memory_clock) &&
+		  (pl1->engine_clock == pl2->engine_clock) &&
+		  (pl1->pcie_gen == pl2->pcie_gen) &&
+		  (pl1->pcie_lane == pl2->pcie_lane));
+}
+
+int fiji_check_states_equal(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *pstate1, const struct pp_hw_power_state *pstate2, bool *equal)
+{
+	const struct fiji_power_state *psa = cast_const_phw_fiji_power_state(pstate1);
+	const struct fiji_power_state *psb = cast_const_phw_fiji_power_state(pstate2);
+	int i;
+
+	if (equal == NULL || psa == NULL || psb == NULL)
+		return -EINVAL;
+
+	/* If the two states don't even have the same number of performance levels they cannot be the same state. */
+	if (psa->performance_level_count != psb->performance_level_count) {
+		*equal = false;
+		return 0;
+	}
+
+	for (i = 0; i < psa->performance_level_count; i++) {
+		if (!fiji_are_power_levels_equal(&(psa->performance_levels[i]), &(psb->performance_levels[i]))) {
+			/* If we have found even one performance level pair that is different the states are different. */
+			*equal = false;
+			return 0;
+		}
+	}
+
+	/* If all performance levels are the same try to use the UVD clocks to break the tie.*/
+	*equal = ((psa->uvd_clks.vclk == psb->uvd_clks.vclk) && (psa->uvd_clks.dclk == psb->uvd_clks.dclk));
+	*equal &= ((psa->vce_clks.evclk == psb->vce_clks.evclk) && (psa->vce_clks.ecclk == psb->vce_clks.ecclk));
+	*equal &= (psa->sclk_threshold == psb->sclk_threshold);
+	*equal &= (psa->acp_clk == psb->acp_clk);
+
+	return 0;
+}
+
+bool fiji_check_smc_update_required_for_display_configuration(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	bool is_update_required = false;
+	struct cgs_display_info info = {0,0,NULL};
+
+	cgs_get_active_displays_info(hwmgr->device, &info);
+
+	if (data->display_timing.num_existing_displays != info.display_count)
+		is_update_required = true;
+/* TO DO NEED TO GET DEEP SLEEP CLOCK FROM DAL
+	if (phm_cap_enabled(hwmgr->hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) {
+		cgs_get_min_clock_settings(hwmgr->device, &min_clocks);
+		if(min_clocks.engineClockInSR != data->display_timing.minClockInSR)
+			is_update_required = true;
+*/
+	return is_update_required;
+}
+
+
 static const struct pp_hwmgr_func fiji_hwmgr_funcs = {
 	.backend_init = &fiji_hwmgr_backend_init,
 	.backend_fini = &tonga_hwmgr_backend_fini,
@@ -5230,6 +5297,8 @@
 	.register_internal_thermal_interrupt = fiji_register_internal_thermal_interrupt,
 	.set_fan_control_mode = fiji_set_fan_control_mode,
 	.get_fan_control_mode = fiji_get_fan_control_mode,
+	.check_states_equal = fiji_check_states_equal,
+	.check_smc_update_required_for_display_configuration = fiji_check_smc_update_required_for_display_configuration,
 	.get_pp_table = fiji_get_pp_table,
 	.set_pp_table = fiji_set_pp_table,
 	.force_clock_level = fiji_force_clock_level,

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index be31bed..fa208ad 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c

@@ -58,6 +58,9 @@
 
 	phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_VpuRecoveryInProgress);
 
+	phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_UVDDPM);
+	phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_VCEDPM);
+
 	if (acpi_atcs_functions_supported(hwmgr->device, ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST) &&
 		acpi_atcs_functions_supported(hwmgr->device, ATCS_FUNCTION_PCIE_DEVICE_READY_NOTIFICATION))
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_PCIEPerformanceRequest);
@@ -130,18 +133,25 @@
 
 int phm_enable_dynamic_state_management(struct pp_hwmgr *hwmgr)
 {
+	int ret = 1;
+	bool enabled;
 	PHM_FUNC_CHECK(hwmgr);
 
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 		PHM_PlatformCaps_TablelessHardwareInterface)) {
 		if (NULL != hwmgr->hwmgr_func->dynamic_state_management_enable)
-			return hwmgr->hwmgr_func->dynamic_state_management_enable(hwmgr);
+			ret = hwmgr->hwmgr_func->dynamic_state_management_enable(hwmgr);
 	} else {
-		return phm_dispatch_table(hwmgr,
+		ret = phm_dispatch_table(hwmgr,
 				&(hwmgr->enable_dynamic_state_management),
 				NULL, NULL);
 	}
-	return 0;
+
+	enabled = ret == 0 ? true : false;
+
+	cgs_notify_dpm_enabled(hwmgr->device, enabled);
+
+	return ret;
 }
 
 int phm_force_dpm_levels(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_level level)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
index 34f4bef..b156481 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c

@@ -512,8 +512,10 @@
 
 	hwmgr->dyn_state.cac_dtp_table = kzalloc(table_size, GFP_KERNEL);
 
-	if (NULL == hwmgr->dyn_state.cac_dtp_table)
+	if (NULL == hwmgr->dyn_state.cac_dtp_table) {
+		kfree(tdp_table);
 		return -ENOMEM;
+	}
 
 	memset(hwmgr->dyn_state.cac_dtp_table, 0x00, table_size);
 

diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c
index 56b829f..3ac1ae4 100644
--- a/drivers/gpu/drm/arm/hdlcd_drv.c
+++ b/drivers/gpu/drm/arm/hdlcd_drv.c

@@ -57,14 +57,13 @@
 		DRM_ERROR("failed to map control registers area\n");
 		ret = PTR_ERR(hdlcd->mmio);
 		hdlcd->mmio = NULL;
-		goto fail;
+		return ret;
 	}
 
 	version = hdlcd_read(hdlcd, HDLCD_REG_VERSION);
 	if ((version & HDLCD_PRODUCT_MASK) != HDLCD_PRODUCT_ID) {
 		DRM_ERROR("unknown product id: 0x%x\n", version);
-		ret = -EINVAL;
-		goto fail;
+		return -EINVAL;
 	}
 	DRM_INFO("found ARM HDLCD version r%dp%d\n",
 		(version & HDLCD_VERSION_MAJOR_MASK) >> 8,
@@ -73,7 +72,7 @@
 	/* Get the optional framebuffer memory resource */
 	ret = of_reserved_mem_device_init(drm->dev);
 	if (ret && ret != -ENODEV)
-		goto fail;
+		return ret;
 
 	ret = dma_set_mask_and_coherent(drm->dev, DMA_BIT_MASK(32));
 	if (ret)
@@ -101,8 +100,6 @@
 	drm_crtc_cleanup(&hdlcd->crtc);
 setup_fail:
 	of_reserved_mem_device_release(drm->dev);
-fail:
-	devm_clk_put(drm->dev, hdlcd->clk);
 
 	return ret;
 }
@@ -412,7 +409,6 @@
 	pm_runtime_put_sync(drm->dev);
 	pm_runtime_disable(drm->dev);
 	of_reserved_mem_device_release(drm->dev);
-	devm_clk_put(dev, hdlcd->clk);
 err_free:
 	drm_dev_unref(drm);
 
@@ -436,10 +432,6 @@
 	pm_runtime_put_sync(drm->dev);
 	pm_runtime_disable(drm->dev);
 	of_reserved_mem_device_release(drm->dev);
-	if (!IS_ERR(hdlcd->clk)) {
-		devm_clk_put(drm->dev, hdlcd->clk);
-		hdlcd->clk = NULL;
-	}
 	drm_mode_config_cleanup(drm);
 	drm_dev_unregister(drm);
 	drm_dev_unref(drm);

diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
index 6e731db..aca7f9c 100644
--- a/drivers/gpu/drm/armada/armada_gem.c
+++ b/drivers/gpu/drm/armada/armada_gem.c

@@ -481,7 +481,7 @@
 
  release:
 	for_each_sg(sgt->sgl, sg, num, i)
-		page_cache_release(sg_page(sg));
+		put_page(sg_page(sg));
  free_table:
 	sg_free_table(sgt);
  free_sgt:
@@ -502,7 +502,7 @@
 	if (dobj->obj.filp) {
 		struct scatterlist *sg;
 		for_each_sg(sgt->sgl, sg, sgt->nents, i)
-			page_cache_release(sg_page(sg));
+			put_page(sg_page(sg));
 	}
 
 	sg_free_table(sgt);

diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
index 1ffe9c3..d65dcae 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c

@@ -558,7 +558,7 @@
 	if (!state->base.crtc || !fb)
 		return 0;
 
-	crtc_state = s->state->crtc_states[drm_crtc_index(s->crtc)];
+	crtc_state = drm_atomic_get_existing_crtc_state(s->state, s->crtc);
 	mode = &crtc_state->adjusted_mode;
 
 	state->src_x = s->src_x;

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index a2596eb..8ee1db8 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c

@@ -380,7 +380,6 @@
  * drm_atomic_replace_property_blob - replace a blob property
  * @blob: a pointer to the member blob to be replaced
  * @new_blob: the new blob to replace with
- * @expected_size: the expected size of the new blob
  * @replaced: whether the blob has been replaced
  *
  * RETURNS:

diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 2bb90fa..4befe25 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c

@@ -67,7 +67,8 @@
 	struct drm_crtc_state *crtc_state;
 
 	if (plane->state->crtc) {
-		crtc_state = state->crtc_states[drm_crtc_index(plane->state->crtc)];
+		crtc_state = drm_atomic_get_existing_crtc_state(state,
+								plane->state->crtc);
 
 		if (WARN_ON(!crtc_state))
 			return;
@@ -76,8 +77,8 @@
 	}
 
 	if (plane_state->crtc) {
-		crtc_state =
-			state->crtc_states[drm_crtc_index(plane_state->crtc)];
+		crtc_state = drm_atomic_get_existing_crtc_state(state,
+								plane_state->crtc);
 
 		if (WARN_ON(!crtc_state))
 			return;
@@ -374,8 +375,8 @@
 		if (!conn_state->crtc || !conn_state->best_encoder)
 			continue;
 
-		crtc_state =
-			state->crtc_states[drm_crtc_index(conn_state->crtc)];
+		crtc_state = drm_atomic_get_existing_crtc_state(state,
+								conn_state->crtc);
 
 		/*
 		 * Each encoder has at most one connector (since we always steal
@@ -679,7 +680,8 @@
 		if (!old_conn_state->crtc)
 			continue;
 
-		old_crtc_state = old_state->crtc_states[drm_crtc_index(old_conn_state->crtc)];
+		old_crtc_state = drm_atomic_get_existing_crtc_state(old_state,
+								    old_conn_state->crtc);
 
 		if (!old_crtc_state->active ||
 		    !drm_atomic_crtc_needs_modeset(old_conn_state->crtc->state))

diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index 7d58f59..df64ed1 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c

@@ -179,7 +179,7 @@
 {
 	struct drm_dp_aux_msg msg;
 	unsigned int retry;
-	int err;
+	int err = 0;
 
 	memset(&msg, 0, sizeof(msg));
 	msg.address = offset;
@@ -187,6 +187,8 @@
 	msg.buffer = buffer;
 	msg.size = size;
 
+	mutex_lock(&aux->hw_mutex);
+
 	/*
 	 * The specification doesn't give any recommendation on how often to
 	 * retry native transactions. We used to retry 7 times like for
@@ -195,25 +197,24 @@
 	 */
 	for (retry = 0; retry < 32; retry++) {
 
-		mutex_lock(&aux->hw_mutex);
 		err = aux->transfer(aux, &msg);
-		mutex_unlock(&aux->hw_mutex);
 		if (err < 0) {
 			if (err == -EBUSY)
 				continue;
 
-			return err;
+			goto unlock;
 		}
 
 
 		switch (msg.reply & DP_AUX_NATIVE_REPLY_MASK) {
 		case DP_AUX_NATIVE_REPLY_ACK:
 			if (err < size)
-				return -EPROTO;
-			return err;
+				err = -EPROTO;
+			goto unlock;
 
 		case DP_AUX_NATIVE_REPLY_NACK:
-			return -EIO;
+			err = -EIO;
+			goto unlock;
 
 		case DP_AUX_NATIVE_REPLY_DEFER:
 			usleep_range(AUX_RETRY_INTERVAL, AUX_RETRY_INTERVAL + 100);
@@ -222,7 +223,11 @@
 	}
 
 	DRM_DEBUG_KMS("too many retries, giving up\n");
-	return -EIO;
+	err = -EIO;
+
+unlock:
+	mutex_unlock(&aux->hw_mutex);
+	return err;
 }
 
 /**
@@ -544,9 +549,7 @@
 	int max_retries = max(7, drm_dp_i2c_retry_count(msg, dp_aux_i2c_speed_khz));
 
 	for (retry = 0, defer_i2c = 0; retry < (max_retries + defer_i2c); retry++) {
-		mutex_lock(&aux->hw_mutex);
 		ret = aux->transfer(aux, msg);
-		mutex_unlock(&aux->hw_mutex);
 		if (ret < 0) {
 			if (ret == -EBUSY)
 				continue;
@@ -685,6 +688,8 @@
 
 	memset(&msg, 0, sizeof(msg));
 
+	mutex_lock(&aux->hw_mutex);
+
 	for (i = 0; i < num; i++) {
 		msg.address = msgs[i].addr;
 		drm_dp_i2c_msg_set_request(&msg, &msgs[i]);
@@ -739,6 +744,8 @@
 	msg.size = 0;
 	(void)drm_dp_i2c_do_msg(aux, &msg);
 
+	mutex_unlock(&aux->hw_mutex);
+
 	return err;
 }
 

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 2e8c77e..da0c532 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c

@@ -534,7 +534,7 @@
 
 fail:
 	while (i--)
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 
 	drm_free_large(pages);
 	return ERR_CAST(p);
@@ -569,7 +569,7 @@
 			mark_page_accessed(pages[i]);
 
 		/* Undo the reference we took when populating the table */
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 	}
 
 	drm_free_large(pages);

diff --git a/drivers/gpu/drm/gma500/cdv_intel_dp.c b/drivers/gpu/drm/gma500/cdv_intel_dp.c
index 7bb1f1a..c52f9ad 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_dp.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_dp.c

@@ -220,7 +220,7 @@
  * FIXME: This is the old dp aux helper, gma500 is the last driver that needs to
  * be ported over to the new helper code in drm_dp_helper.c like i915 or radeon.
  */
-static int __deprecated
+static int
 i2c_dp_aux_add_bus(struct i2c_adapter *adapter)
 {
 	int error;

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3d31d3a..dabc089 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c

@@ -177,7 +177,7 @@
 		drm_clflush_virt_range(vaddr, PAGE_SIZE);
 		kunmap_atomic(src);
 
-		page_cache_release(page);
+		put_page(page);
 		vaddr += PAGE_SIZE;
 	}
 
@@ -243,7 +243,7 @@
 			set_page_dirty(page);
 			if (obj->madv == I915_MADV_WILLNEED)
 				mark_page_accessed(page);
-			page_cache_release(page);
+			put_page(page);
 			vaddr += PAGE_SIZE;
 		}
 		obj->dirty = 0;
@@ -2206,7 +2206,7 @@
 		if (obj->madv == I915_MADV_WILLNEED)
 			mark_page_accessed(page);
 
-		page_cache_release(page);
+		put_page(page);
 	}
 	obj->dirty = 0;
 
@@ -2346,7 +2346,7 @@
 err_pages:
 	sg_mark_end(sg);
 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
-		page_cache_release(sg_page_iter_page(&sg_iter));
+		put_page(sg_page_iter_page(&sg_iter));
 	sg_free_table(st);
 	kfree(st);
 

diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 1f3eef6..0506016 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c

@@ -228,25 +228,20 @@
 	return ret;
 }
 
-static void i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
+static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
 	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	bool was_interruptible;
 	int ret;
 
-	mutex_lock(&dev->struct_mutex);
-	was_interruptible = dev_priv->mm.interruptible;
-	dev_priv->mm.interruptible = false;
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		return ret;
 
 	ret = i915_gem_object_set_to_gtt_domain(obj, false);
-
-	dev_priv->mm.interruptible = was_interruptible;
 	mutex_unlock(&dev->struct_mutex);
 
-	if (unlikely(ret))
-		DRM_ERROR("unable to flush buffer following CPU access; rendering may be corrupt\n");
+	return ret;
 }
 
 static const struct dma_buf_ops i915_dmabuf_ops =  {

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 6be40f3..18ba813 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c

@@ -683,7 +683,7 @@
 			set_page_dirty(page);
 
 		mark_page_accessed(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	obj->dirty = 0;
 

diff --git a/drivers/gpu/drm/imx/dw_hdmi-imx.c b/drivers/gpu/drm/imx/dw_hdmi-imx.c
index 2a95d10..a24631fd 100644
--- a/drivers/gpu/drm/imx/dw_hdmi-imx.c
+++ b/drivers/gpu/drm/imx/dw_hdmi-imx.c

@@ -225,8 +225,6 @@
 	if (!iores)
 		return -ENXIO;
 
-	platform_set_drvdata(pdev, hdmi);
-
 	encoder->possible_crtcs = drm_of_find_possible_crtcs(drm, dev->of_node);
 	/*
 	 * If we failed to find the CRTC(s) which this encoder is
@@ -245,7 +243,16 @@
 	drm_encoder_init(drm, encoder, &dw_hdmi_imx_encoder_funcs,
 			 DRM_MODE_ENCODER_TMDS, NULL);
 
-	return dw_hdmi_bind(dev, master, data, encoder, iores, irq, plat_data);
+	ret = dw_hdmi_bind(dev, master, data, encoder, iores, irq, plat_data);
+
+	/*
+	 * If dw_hdmi_bind() fails we'll never call dw_hdmi_unbind(),
+	 * which would have called the encoder cleanup.  Do it manually.
+	 */
+	if (ret)
+		drm_encoder_cleanup(encoder);
+
+	return ret;
 }
 
 static void dw_hdmi_imx_unbind(struct device *dev, struct device *master,

diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c
index 9876e0f..e26dcde 100644
--- a/drivers/gpu/drm/imx/imx-drm-core.c
+++ b/drivers/gpu/drm/imx/imx-drm-core.c

@@ -326,7 +326,6 @@
 {
 	struct imx_drm_device *imxdrm = drm->dev_private;
 	struct imx_drm_crtc *imx_drm_crtc;
-	int ret;
 
 	/*
 	 * The vblank arrays are dimensioned by MAX_CRTC - we can't
@@ -351,10 +350,6 @@
 
 	*new_crtc = imx_drm_crtc;
 
-	ret = drm_mode_crtc_set_gamma_size(imx_drm_crtc->crtc, 256);
-	if (ret)
-		goto err_register;
-
 	drm_crtc_helper_add(crtc,
 			imx_drm_crtc->imx_drm_helper_funcs.crtc_helper_funcs);
 
@@ -362,11 +357,6 @@
 			imx_drm_crtc->imx_drm_helper_funcs.crtc_funcs, NULL);
 
 	return 0;
-
-err_register:
-	imxdrm->crtc[--imxdrm->pipes] = NULL;
-	kfree(imx_drm_crtc);
-	return ret;
 }
 EXPORT_SYMBOL_GPL(imx_drm_add_crtc);
 

diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index 5888278..681ec6e 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c

@@ -72,22 +72,101 @@
 int ipu_plane_set_base(struct ipu_plane *ipu_plane, struct drm_framebuffer *fb,
 		       int x, int y)
 {
-	struct drm_gem_cma_object *cma_obj;
-	unsigned long eba;
-	int active;
+	struct drm_gem_cma_object *cma_obj[3];
+	unsigned long eba, ubo, vbo;
+	int active, i;
 
-	cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
-	if (!cma_obj) {
-		DRM_DEBUG_KMS("entry is null.\n");
-		return -EFAULT;
+	for (i = 0; i < drm_format_num_planes(fb->pixel_format); i++) {
+		cma_obj[i] = drm_fb_cma_get_gem_obj(fb, i);
+		if (!cma_obj[i]) {
+			DRM_DEBUG_KMS("plane %d entry is null.\n", i);
+			return -EFAULT;
+		}
 	}
 
-	dev_dbg(ipu_plane->base.dev->dev, "phys = %pad, x = %d, y = %d",
-		&cma_obj->paddr, x, y);
-
-	eba = cma_obj->paddr + fb->offsets[0] +
+	eba = cma_obj[0]->paddr + fb->offsets[0] +
 	      fb->pitches[0] * y + (fb->bits_per_pixel >> 3) * x;
 
+	if (eba & 0x7) {
+		DRM_DEBUG_KMS("base address must be a multiple of 8.\n");
+		return -EINVAL;
+	}
+
+	if (fb->pitches[0] < 1 || fb->pitches[0] > 16384) {
+		DRM_DEBUG_KMS("pitches out of range.\n");
+		return -EINVAL;
+	}
+
+	if (ipu_plane->enabled && fb->pitches[0] != ipu_plane->stride[0]) {
+		DRM_DEBUG_KMS("pitches must not change while plane is enabled.\n");
+		return -EINVAL;
+	}
+
+	ipu_plane->stride[0] = fb->pitches[0];
+
+	switch (fb->pixel_format) {
+	case DRM_FORMAT_YUV420:
+	case DRM_FORMAT_YVU420:
+		/*
+		 * Multiplanar formats have to meet the following restrictions:
+		 * - The (up to) three plane addresses are EBA, EBA+UBO, EBA+VBO
+		 * - EBA, UBO and VBO are a multiple of 8
+		 * - UBO and VBO are unsigned and not larger than 0xfffff8
+		 * - Only EBA may be changed while scanout is active
+		 * - The strides of U and V planes must be identical.
+		 */
+		ubo = cma_obj[1]->paddr + fb->offsets[1] +
+		      fb->pitches[1] * y / 2 + x / 2 - eba;
+		vbo = cma_obj[2]->paddr + fb->offsets[2] +
+		      fb->pitches[2] * y / 2 + x / 2 - eba;
+
+		if ((ubo & 0x7) || (vbo & 0x7)) {
+			DRM_DEBUG_KMS("U/V buffer offsets must be a multiple of 8.\n");
+			return -EINVAL;
+		}
+
+		if ((ubo > 0xfffff8) || (vbo > 0xfffff8)) {
+			DRM_DEBUG_KMS("U/V buffer offsets must be positive and not larger than 0xfffff8.\n");
+			return -EINVAL;
+		}
+
+		if (ipu_plane->enabled && ((ipu_plane->u_offset != ubo) ||
+					   (ipu_plane->v_offset != vbo))) {
+			DRM_DEBUG_KMS("U/V buffer offsets must not change while plane is enabled.\n");
+			return -EINVAL;
+		}
+
+		if (fb->pitches[1] != fb->pitches[2]) {
+			DRM_DEBUG_KMS("U/V pitches must be identical.\n");
+			return -EINVAL;
+		}
+
+		if (fb->pitches[1] < 1 || fb->pitches[1] > 16384) {
+			DRM_DEBUG_KMS("U/V pitches out of range.\n");
+			return -EINVAL;
+		}
+
+		if (ipu_plane->enabled &&
+		    (ipu_plane->stride[1] != fb->pitches[1])) {
+			DRM_DEBUG_KMS("U/V pitches must not change while plane is enabled.\n");
+			return -EINVAL;
+		}
+
+		ipu_plane->u_offset = ubo;
+		ipu_plane->v_offset = vbo;
+		ipu_plane->stride[1] = fb->pitches[1];
+
+		dev_dbg(ipu_plane->base.dev->dev,
+			"phys = %pad %pad %pad, x = %d, y = %d",
+			&cma_obj[0]->paddr, &cma_obj[1]->paddr,
+			&cma_obj[2]->paddr, x, y);
+		break;
+	default:
+		dev_dbg(ipu_plane->base.dev->dev, "phys = %pad, x = %d, y = %d",
+			&cma_obj[0]->paddr, x, y);
+		break;
+	}
+
 	if (ipu_plane->enabled) {
 		active = ipu_idmac_get_current_buffer(ipu_plane->ipu_ch);
 		ipu_cpmem_set_buffer(ipu_plane->ipu_ch, !active, eba);
@@ -201,12 +280,6 @@
 		}
 	}
 
-	ret = ipu_dmfc_init_channel(ipu_plane->dmfc, crtc_w);
-	if (ret) {
-		dev_err(dev, "initializing dmfc channel failed with %d\n", ret);
-		return ret;
-	}
-
 	ret = ipu_dmfc_alloc_bandwidth(ipu_plane->dmfc,
 			calc_bandwidth(crtc_w, crtc_h,
 				       calc_vref(mode)), 64);
@@ -215,6 +288,8 @@
 		return ret;
 	}
 
+	ipu_dmfc_config_wait4eot(ipu_plane->dmfc, crtc_w);
+
 	ipu_cpmem_zero(ipu_plane->ipu_ch);
 	ipu_cpmem_set_resolution(ipu_plane->ipu_ch, src_w, src_h);
 	ret = ipu_cpmem_set_fmt(ipu_plane->ipu_ch, fb->pixel_format);
@@ -233,6 +308,18 @@
 	if (interlaced)
 		ipu_cpmem_interlaced_scan(ipu_plane->ipu_ch, fb->pitches[0]);
 
+	if (fb->pixel_format == DRM_FORMAT_YUV420) {
+		ipu_cpmem_set_yuv_planar_full(ipu_plane->ipu_ch,
+					      ipu_plane->stride[1],
+					      ipu_plane->u_offset,
+					      ipu_plane->v_offset);
+	} else if (fb->pixel_format == DRM_FORMAT_YVU420) {
+		ipu_cpmem_set_yuv_planar_full(ipu_plane->ipu_ch,
+					      ipu_plane->stride[1],
+					      ipu_plane->v_offset,
+					      ipu_plane->u_offset);
+	}
+
 	ipu_plane->w = src_w;
 	ipu_plane->h = src_h;
 

diff --git a/drivers/gpu/drm/imx/ipuv3-plane.h b/drivers/gpu/drm/imx/ipuv3-plane.h
index 3a443b4..4448fd4 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.h
+++ b/drivers/gpu/drm/imx/ipuv3-plane.h

@@ -29,6 +29,10 @@
 	int			w;
 	int			h;
 
+	unsigned int		u_offset;
+	unsigned int		v_offset;
+	unsigned int		stride[2];
+
 	bool			enabled;
 };
 

diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.h b/drivers/gpu/drm/msm/hdmi/hdmi.h
index b04a646..65428cf 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.h
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.h

@@ -196,7 +196,7 @@
 int msm_hdmi_pll_8960_init(struct platform_device *pdev);
 int msm_hdmi_pll_8996_init(struct platform_device *pdev);
 #else
-static inline int msm_hdmi_pll_8960_init(struct platform_device *pdev);
+static inline int msm_hdmi_pll_8960_init(struct platform_device *pdev)
 {
 	return -ENODEV;
 }

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index d52910e..c03b967 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c

@@ -467,9 +467,6 @@
 	struct msm_file_private *ctx = file->driver_priv;
 	struct msm_kms *kms = priv->kms;
 
-	if (kms)
-		kms->funcs->preclose(kms, file);
-
 	mutex_lock(&dev->struct_mutex);
 	if (ctx == priv->lastctx)
 		priv->lastctx = NULL;

diff --git a/drivers/gpu/drm/msm/msm_kms.h b/drivers/gpu/drm/msm/msm_kms.h
index 9bcabaa..e32222c 100644
--- a/drivers/gpu/drm/msm/msm_kms.h
+++ b/drivers/gpu/drm/msm/msm_kms.h

@@ -55,7 +55,6 @@
 			struct drm_encoder *slave_encoder,
 			bool is_cmd_mode);
 	/* cleanup: */
-	void (*preclose)(struct msm_kms *kms, struct drm_file *file);
 	void (*destroy)(struct msm_kms *kms);
 };
 

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h b/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h
index 16641ce..b5370cb 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h

@@ -11,6 +11,7 @@
 
 	struct reset_control *rst;
 	struct clk *clk;
+	struct clk *clk_ref;
 	struct clk *clk_pwr;
 
 	struct regulator *vdd;
@@ -36,6 +37,10 @@
 	 * bypassed). A value of 0 means an IOMMU is never used.
 	 */
 	u8 iommu_bit;
+	/*
+	 * Whether the chip requires a reference clock
+	 */
+	bool require_ref_clk;
 };
 
 int nvkm_device_tegra_new(const struct nvkm_device_tegra_func *,

diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.c b/drivers/gpu/drm/nouveau/nouveau_platform.c
index 2dfe58a..4c4cc22 100644
--- a/drivers/gpu/drm/nouveau/nouveau_platform.c
+++ b/drivers/gpu/drm/nouveau/nouveau_platform.c

@@ -55,6 +55,11 @@
 	.iommu_bit = 34,
 };
 
+static const struct nvkm_device_tegra_func gm20b_platform_data = {
+	.iommu_bit = 34,
+	.require_ref_clk = true,
+};
+
 static const struct of_device_id nouveau_platform_match[] = {
 	{
 		.compatible = "nvidia,gk20a",
@@ -62,7 +67,7 @@
 	},
 	{
 		.compatible = "nvidia,gm20b",
-		.data = &gk20a_platform_data,
+		.data = &gm20b_platform_data,
 	},
 	{ }
 };

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
index 9afa5f3..ec12efb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c

@@ -35,6 +35,11 @@
 	ret = clk_prepare_enable(tdev->clk);
 	if (ret)
 		goto err_clk;
+	if (tdev->clk_ref) {
+		ret = clk_prepare_enable(tdev->clk_ref);
+		if (ret)
+			goto err_clk_ref;
+	}
 	ret = clk_prepare_enable(tdev->clk_pwr);
 	if (ret)
 		goto err_clk_pwr;
@@ -57,6 +62,9 @@
 err_clamp:
 	clk_disable_unprepare(tdev->clk_pwr);
 err_clk_pwr:
+	if (tdev->clk_ref)
+		clk_disable_unprepare(tdev->clk_ref);
+err_clk_ref:
 	clk_disable_unprepare(tdev->clk);
 err_clk:
 	regulator_disable(tdev->vdd);
@@ -71,6 +79,8 @@
 	udelay(10);
 
 	clk_disable_unprepare(tdev->clk_pwr);
+	if (tdev->clk_ref)
+		clk_disable_unprepare(tdev->clk_ref);
 	clk_disable_unprepare(tdev->clk);
 	udelay(10);
 
@@ -274,6 +284,13 @@
 		goto free;
 	}
 
+	if (func->require_ref_clk)
+		tdev->clk_ref = devm_clk_get(&pdev->dev, "ref");
+	if (IS_ERR(tdev->clk_ref)) {
+		ret = PTR_ERR(tdev->clk_ref);
+		goto free;
+	}
+
 	tdev->clk_pwr = devm_clk_get(&pdev->dev, "pwr");
 	if (IS_ERR(tdev->clk_pwr)) {
 		ret = PTR_ERR(tdev->clk_pwr);

diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
index 3cf8aab..af267c3 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c

@@ -97,11 +97,12 @@
 	return omap_gem_get_pages(obj, &pages, true);
 }
 
-static void omap_gem_dmabuf_end_cpu_access(struct dma_buf *buffer,
-		enum dma_data_direction dir)
+static int omap_gem_dmabuf_end_cpu_access(struct dma_buf *buffer,
+					  enum dma_data_direction dir)
 {
 	struct drm_gem_object *obj = buffer->priv;
 	omap_gem_put_pages(obj);
+	return 0;
 }
 
 

diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index cf61e08..b80b08f 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c

@@ -275,13 +275,15 @@
 		if (ASIC_IS_DCE3(rdev) && !ASIC_IS_DCE6(rdev))
 			atombios_enable_crtc_memreq(crtc, ATOM_ENABLE);
 		atombios_blank_crtc(crtc, ATOM_DISABLE);
-		drm_vblank_on(dev, radeon_crtc->crtc_id);
+		if (dev->num_crtcs > radeon_crtc->crtc_id)
+			drm_vblank_on(dev, radeon_crtc->crtc_id);
 		radeon_crtc_load_lut(crtc);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		drm_vblank_off(dev, radeon_crtc->crtc_id);
+		if (dev->num_crtcs > radeon_crtc->crtc_id)
+			drm_vblank_off(dev, radeon_crtc->crtc_id);
 		if (radeon_crtc->enabled)
 			atombios_blank_crtc(crtc, ATOM_ENABLE);
 		if (ASIC_IS_DCE3(rdev) && !ASIC_IS_DCE6(rdev))

diff --git a/drivers/gpu/drm/radeon/radeon_dp_mst.c b/drivers/gpu/drm/radeon/radeon_dp_mst.c
index df7a171..43cffb5 100644
--- a/drivers/gpu/drm/radeon/radeon_dp_mst.c
+++ b/drivers/gpu/drm/radeon/radeon_dp_mst.c

@@ -510,6 +510,7 @@
 {
 	struct radeon_encoder_mst *mst_enc;
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct radeon_connector_atom_dig *dig_connector;
 	int bpp = 24;
 
 	mst_enc = radeon_encoder->enc_priv;
@@ -523,22 +524,11 @@
 
 
 	drm_mode_set_crtcinfo(adjusted_mode, 0);
-	{
-	  struct radeon_connector_atom_dig *dig_connector;
-	  int ret;
-
-	  dig_connector = mst_enc->connector->con_priv;
-	  ret = radeon_dp_get_dp_link_config(&mst_enc->connector->base,
-					     dig_connector->dpcd, adjusted_mode->clock,
-					     &dig_connector->dp_lane_count,
-					     &dig_connector->dp_clock);
-	  if (ret) {
-		  dig_connector->dp_lane_count = 0;
-		  dig_connector->dp_clock = 0;
-	  }
-	  DRM_DEBUG_KMS("dig clock %p %d %d\n", dig_connector,
-			dig_connector->dp_lane_count, dig_connector->dp_clock);
-	}
+	dig_connector = mst_enc->connector->con_priv;
+	dig_connector->dp_lane_count = drm_dp_max_lane_count(dig_connector->dpcd);
+	dig_connector->dp_clock = drm_dp_max_link_rate(dig_connector->dpcd);
+	DRM_DEBUG_KMS("dig clock %p %d %d\n", dig_connector,
+		      dig_connector->dp_lane_count, dig_connector->dp_clock);
 	return true;
 }
 

diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c
index 979f3bf..1e9304d 100644
--- a/drivers/gpu/drm/radeon/radeon_irq_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c

@@ -291,6 +291,8 @@
 	if (r) {
 		return r;
 	}
+	rdev->ddev->vblank_disable_allowed = true;
+
 	/* enable msi */
 	rdev->msi_enabled = 0;
 

diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index 24152df..478d409 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c

@@ -331,13 +331,15 @@
 									 RADEON_CRTC_DISP_REQ_EN_B));
 			WREG32_P(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl, ~(mask | crtc_ext_cntl));
 		}
-		drm_vblank_on(dev, radeon_crtc->crtc_id);
+		if (dev->num_crtcs > radeon_crtc->crtc_id)
+			drm_vblank_on(dev, radeon_crtc->crtc_id);
 		radeon_crtc_load_lut(crtc);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		drm_vblank_off(dev, radeon_crtc->crtc_id);
+		if (dev->num_crtcs > radeon_crtc->crtc_id)
+			drm_vblank_off(dev, radeon_crtc->crtc_id);
 		if (radeon_crtc->crtc_id)
 			WREG32_P(RADEON_CRTC2_GEN_CNTL, mask, ~(RADEON_CRTC2_EN | mask));
 		else {

diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index dd46c38..2d901bf 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c

@@ -799,6 +799,10 @@
 	if ((offset + size) <= rdev->mc.visible_vram_size)
 		return 0;
 
+	/* Can't move a pinned BO to visible VRAM */
+	if (rbo->pin_count > 0)
+		return -EINVAL;
+
 	/* hurrah the memory is not visible ! */
 	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
 	lpfn =	rdev->mc.visible_vram_size >> PAGE_SHIFT;

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 6d8c323..7dddfdc 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c

@@ -397,9 +397,15 @@
 			struct ttm_mem_reg *new_mem)
 {
 	struct radeon_device *rdev;
+	struct radeon_bo *rbo;
 	struct ttm_mem_reg *old_mem = &bo->mem;
 	int r;
 
+	/* Can't move a pinned BO */
+	rbo = container_of(bo, struct radeon_bo, tbo);
+	if (WARN_ON_ONCE(rbo->pin_count > 0))
+		return -EINVAL;
+
 	rdev = radeon_get_rdev(bo->bdev);
 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
 		radeon_move_null(bo, new_mem);
@@ -609,7 +615,7 @@
 			set_page_dirty(page);
 
 		mark_page_accessed(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	sg_free_table(ttm->sg);

diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index cb75ab7..af4df81 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c

@@ -2926,9 +2926,11 @@
 	/* PITCAIRN - https://bugs.freedesktop.org/show_bug.cgi?id=76490 */
 	{ PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 },
 	{ PCI_VENDOR_ID_ATI, 0x6811, 0x174b, 0xe271, 0, 120000 },
+	{ PCI_VENDOR_ID_ATI, 0x6811, 0x174b, 0x2015, 0, 120000 },
 	{ PCI_VENDOR_ID_ATI, 0x6810, 0x174b, 0xe271, 85000, 90000 },
 	{ PCI_VENDOR_ID_ATI, 0x6811, 0x1462, 0x2015, 0, 120000 },
 	{ PCI_VENDOR_ID_ATI, 0x6811, 0x1043, 0x2015, 0, 120000 },
+	{ PCI_VENDOR_ID_ATI, 0x6811, 0x148c, 0x2015, 0, 120000 },
 	{ 0, 0, 0, 0 },
 };
 
@@ -3008,6 +3010,10 @@
 		}
 		++p;
 	}
+	/* limit mclk on all R7 370 parts for stability */
+	if (rdev->pdev->device == 0x6811 &&
+	    rdev->pdev->revision == 0x81)
+		max_mclk = 120000;
 
 	if (rps->vce_active) {
 		rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk;

diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
index 3d3cf2f..d5cfef7 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c

@@ -271,8 +271,6 @@
 	if (!iores)
 		return -ENXIO;
 
-	platform_set_drvdata(pdev, hdmi);
-
 	encoder->possible_crtcs = drm_of_find_possible_crtcs(drm, dev->of_node);
 	/*
 	 * If we failed to find the CRTC(s) which this encoder is
@@ -293,7 +291,16 @@
 	drm_encoder_init(drm, encoder, &dw_hdmi_rockchip_encoder_funcs,
 			 DRM_MODE_ENCODER_TMDS, NULL);
 
-	return dw_hdmi_bind(dev, master, data, encoder, iores, irq, plat_data);
+	ret = dw_hdmi_bind(dev, master, data, encoder, iores, irq, plat_data);
+
+	/*
+	 * If dw_hdmi_bind() fails we'll never call dw_hdmi_unbind(),
+	 * which would have called the encoder cleanup.  Do it manually.
+	 */
+	if (ret)
+		drm_encoder_cleanup(encoder);
+
+	return ret;
 }
 
 static void dw_hdmi_rockchip_unbind(struct device *dev, struct device *master,

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index 896da09..f556a8f 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c

@@ -251,6 +251,27 @@
 	return 0;
 }
 
+static void rockchip_drm_crtc_cancel_pending_vblank(struct drm_crtc *crtc,
+						    struct drm_file *file_priv)
+{
+	struct rockchip_drm_private *priv = crtc->dev->dev_private;
+	int pipe = drm_crtc_index(crtc);
+
+	if (pipe < ROCKCHIP_MAX_CRTC &&
+	    priv->crtc_funcs[pipe] &&
+	    priv->crtc_funcs[pipe]->cancel_pending_vblank)
+		priv->crtc_funcs[pipe]->cancel_pending_vblank(crtc, file_priv);
+}
+
+static void rockchip_drm_preclose(struct drm_device *dev,
+				  struct drm_file *file_priv)
+{
+	struct drm_crtc *crtc;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
+		rockchip_drm_crtc_cancel_pending_vblank(crtc, file_priv);
+}
+
 void rockchip_drm_lastclose(struct drm_device *dev)
 {
 	struct rockchip_drm_private *priv = dev->dev_private;
@@ -281,6 +302,7 @@
 				  DRIVER_PRIME | DRIVER_ATOMIC,
 	.load			= rockchip_drm_load,
 	.unload			= rockchip_drm_unload,
+	.preclose		= rockchip_drm_preclose,
 	.lastclose		= rockchip_drm_lastclose,
 	.get_vblank_counter	= drm_vblank_no_hw_counter,
 	.enable_vblank		= rockchip_drm_crtc_enable_vblank,

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
index 3529f69..00d17d7 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h

@@ -40,6 +40,7 @@
 	int (*enable_vblank)(struct drm_crtc *crtc);
 	void (*disable_vblank)(struct drm_crtc *crtc);
 	void (*wait_for_update)(struct drm_crtc *crtc);
+	void (*cancel_pending_vblank)(struct drm_crtc *crtc, struct drm_file *file_priv);
 };
 
 struct rockchip_atomic_commit {

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index fd37054..a619f12 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c

@@ -499,10 +499,25 @@
 static void vop_crtc_disable(struct drm_crtc *crtc)
 {
 	struct vop *vop = to_vop(crtc);
+	int i;
 
 	if (!vop->is_enabled)
 		return;
 
+	/*
+	 * We need to make sure that all windows are disabled before we
+	 * disable that crtc. Otherwise we might try to scan from a destroyed
+	 * buffer later.
+	 */
+	for (i = 0; i < vop->data->win_size; i++) {
+		struct vop_win *vop_win = &vop->win[i];
+		const struct vop_win_data *win = vop_win->data;
+
+		spin_lock(&vop->reg_lock);
+		VOP_WIN_SET(vop, win, enable, 0);
+		spin_unlock(&vop->reg_lock);
+	}
+
 	drm_crtc_vblank_off(crtc);
 
 	/*
@@ -549,6 +564,7 @@
 			   struct drm_plane_state *state)
 {
 	struct drm_crtc *crtc = state->crtc;
+	struct drm_crtc_state *crtc_state;
 	struct drm_framebuffer *fb = state->fb;
 	struct vop_win *vop_win = to_vop_win(plane);
 	struct vop_plane_state *vop_plane_state = to_vop_plane_state(state);
@@ -563,12 +579,13 @@
 	int max_scale = win->phy->scl ? FRAC_16_16(8, 1) :
 					DRM_PLANE_HELPER_NO_SCALING;
 
-	crtc = crtc ? crtc : plane->state->crtc;
-	/*
-	 * Both crtc or plane->state->crtc can be null.
-	 */
 	if (!crtc || !fb)
 		goto out_disable;
+
+	crtc_state = drm_atomic_get_existing_crtc_state(state->state, crtc);
+	if (WARN_ON(!crtc_state))
+		return -EINVAL;
+
 	src->x1 = state->src_x;
 	src->y1 = state->src_y;
 	src->x2 = state->src_x + state->src_w;
@@ -580,8 +597,8 @@
 
 	clip.x1 = 0;
 	clip.y1 = 0;
-	clip.x2 = crtc->mode.hdisplay;
-	clip.y2 = crtc->mode.vdisplay;
+	clip.x2 = crtc_state->adjusted_mode.hdisplay;
+	clip.y2 = crtc_state->adjusted_mode.vdisplay;
 
 	ret = drm_plane_helper_check_update(plane, crtc, state->fb,
 					    src, dest, &clip,
@@ -873,10 +890,30 @@
 	WARN_ON(!wait_for_completion_timeout(&vop->wait_update_complete, 100));
 }
 
+static void vop_crtc_cancel_pending_vblank(struct drm_crtc *crtc,
+					   struct drm_file *file_priv)
+{
+	struct drm_device *drm = crtc->dev;
+	struct vop *vop = to_vop(crtc);
+	struct drm_pending_vblank_event *e;
+	unsigned long flags;
+
+	spin_lock_irqsave(&drm->event_lock, flags);
+	e = vop->event;
+	if (e && e->base.file_priv == file_priv) {
+		vop->event = NULL;
+
+		e->base.destroy(&e->base);
+		file_priv->event_space += sizeof(e->event);
+	}
+	spin_unlock_irqrestore(&drm->event_lock, flags);
+}
+
 static const struct rockchip_crtc_funcs private_crtc_funcs = {
 	.enable_vblank = vop_crtc_enable_vblank,
 	.disable_vblank = vop_crtc_disable_vblank,
 	.wait_for_update = vop_crtc_wait_for_update,
+	.cancel_pending_vblank = vop_crtc_cancel_pending_vblank,
 };
 
 static bool vop_crtc_mode_fixup(struct drm_crtc *crtc,
@@ -885,9 +922,6 @@
 {
 	struct vop *vop = to_vop(crtc);
 
-	if (adjusted_mode->htotal == 0 || adjusted_mode->vtotal == 0)
-		return false;
-
 	adjusted_mode->clock =
 		clk_round_rate(vop->dclk, mode->clock * 1000) / 1000;
 
@@ -1108,7 +1142,7 @@
 	const struct vop_data *vop_data = vop->data;
 	struct device *dev = vop->dev;
 	struct drm_device *drm_dev = vop->drm_dev;
-	struct drm_plane *primary = NULL, *cursor = NULL, *plane;
+	struct drm_plane *primary = NULL, *cursor = NULL, *plane, *tmp;
 	struct drm_crtc *crtc = &vop->crtc;
 	struct device_node *port;
 	int ret;
@@ -1148,7 +1182,7 @@
 	ret = drm_crtc_init_with_planes(drm_dev, crtc, primary, cursor,
 					&vop_crtc_funcs, NULL);
 	if (ret)
-		return ret;
+		goto err_cleanup_planes;
 
 	drm_crtc_helper_add(crtc, &vop_crtc_helper_funcs);
 
@@ -1181,6 +1215,7 @@
 	if (!port) {
 		DRM_ERROR("no port node found in %s\n",
 			  dev->of_node->full_name);
+		ret = -ENOENT;
 		goto err_cleanup_crtc;
 	}
 
@@ -1194,7 +1229,8 @@
 err_cleanup_crtc:
 	drm_crtc_cleanup(crtc);
 err_cleanup_planes:
-	list_for_each_entry(plane, &drm_dev->mode_config.plane_list, head)
+	list_for_each_entry_safe(plane, tmp, &drm_dev->mode_config.plane_list,
+				 head)
 		drm_plane_cleanup(plane);
 	return ret;
 }
@@ -1202,9 +1238,28 @@
 static void vop_destroy_crtc(struct vop *vop)
 {
 	struct drm_crtc *crtc = &vop->crtc;
+	struct drm_device *drm_dev = vop->drm_dev;
+	struct drm_plane *plane, *tmp;
 
 	rockchip_unregister_crtc_funcs(crtc);
 	of_node_put(crtc->port);
+
+	/*
+	 * We need to cleanup the planes now.  Why?
+	 *
+	 * The planes are "&vop->win[i].base".  That means the memory is
+	 * all part of the big "struct vop" chunk of memory.  That memory
+	 * was devm allocated and associated with this component.  We need to
+	 * free it ourselves before vop_unbind() finishes.
+	 */
+	list_for_each_entry_safe(plane, tmp, &drm_dev->mode_config.plane_list,
+				 head)
+		vop_plane_destroy(plane);
+
+	/*
+	 * Destroy CRTC after vop_plane_destroy() since vop_disable_plane()
+	 * references the CRTC.
+	 */
 	drm_crtc_cleanup(crtc);
 }
 

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 4e19d0f..077ae9b 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c

@@ -311,7 +311,7 @@
 			goto out_err;
 
 		copy_highpage(to_page, from_page);
-		page_cache_release(from_page);
+		put_page(from_page);
 	}
 
 	if (!(ttm->page_flags & TTM_PAGE_FLAG_PERSISTENT_SWAP))
@@ -361,7 +361,7 @@
 		copy_highpage(to_page, from_page);
 		set_page_dirty(to_page);
 		mark_page_accessed(to_page);
-		page_cache_release(to_page);
+		put_page(to_page);
 	}
 
 	ttm_tt_unpopulate(ttm);

diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
index c427499..fd1eb9d 100644
--- a/drivers/gpu/drm/udl/udl_fb.c
+++ b/drivers/gpu/drm/udl/udl_fb.c

@@ -423,8 +423,8 @@
 	}
 
 	if (ufb->obj->base.import_attach) {
-		dma_buf_end_cpu_access(ufb->obj->base.import_attach->dmabuf,
-				       DMA_FROM_DEVICE);
+		ret = dma_buf_end_cpu_access(ufb->obj->base.import_attach->dmabuf,
+					     DMA_FROM_DEVICE);
 	}
 
  unlock:
@@ -536,7 +536,7 @@
 out_destroy_fbi:
 	drm_fb_helper_release_fbi(helper);
 out_gfree:
-	drm_gem_object_unreference(&ufbdev->ufb.obj->base);
+	drm_gem_object_unreference_unlocked(&ufbdev->ufb.obj->base);
 out:
 	return ret;
 }

diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c
index 2a0a784..d7528e0 100644
--- a/drivers/gpu/drm/udl/udl_gem.c
+++ b/drivers/gpu/drm/udl/udl_gem.c

@@ -52,7 +52,7 @@
 		return ret;
 	}
 
-	drm_gem_object_unreference(&obj->base);
+	drm_gem_object_unreference_unlocked(&obj->base);
 	*handle_p = handle;
 	return 0;
 }

diff --git a/drivers/gpu/drm/via/via_dmablit.c b/drivers/gpu/drm/via/via_dmablit.c
index e797dfc..7e2a12c 100644
--- a/drivers/gpu/drm/via/via_dmablit.c
+++ b/drivers/gpu/drm/via/via_dmablit.c

@@ -188,7 +188,7 @@
 			if (NULL != (page = vsg->pages[i])) {
 				if (!PageReserved(page) && (DMA_FROM_DEVICE == vsg->direction))
 					SetPageDirty(page);
-				page_cache_release(page);
+				put_page(page);
 			}
 		}
 	case dr_via_pages_alloc:

diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c
index 883a314..6494a4d 100644
--- a/drivers/gpu/ipu-v3/ipu-cpmem.c
+++ b/drivers/gpu/ipu-v3/ipu-cpmem.c

@@ -395,60 +395,48 @@
 EXPORT_SYMBOL_GPL(ipu_cpmem_set_yuv_interleaved);
 
 void ipu_cpmem_set_yuv_planar_full(struct ipuv3_channel *ch,
-				   u32 pixel_format, int stride,
-				   int u_offset, int v_offset)
+				   unsigned int uv_stride,
+				   unsigned int u_offset, unsigned int v_offset)
 {
-	switch (pixel_format) {
-	case V4L2_PIX_FMT_YUV420:
-	case V4L2_PIX_FMT_YUV422P:
-		ipu_ch_param_write_field(ch, IPU_FIELD_SLUV, (stride / 2) - 1);
-		ipu_ch_param_write_field(ch, IPU_FIELD_UBO, u_offset / 8);
-		ipu_ch_param_write_field(ch, IPU_FIELD_VBO, v_offset / 8);
-		break;
-	case V4L2_PIX_FMT_YVU420:
-		ipu_ch_param_write_field(ch, IPU_FIELD_SLUV, (stride / 2) - 1);
-		ipu_ch_param_write_field(ch, IPU_FIELD_UBO, v_offset / 8);
-		ipu_ch_param_write_field(ch, IPU_FIELD_VBO, u_offset / 8);
-		break;
-	case V4L2_PIX_FMT_NV12:
-	case V4L2_PIX_FMT_NV16:
-		ipu_ch_param_write_field(ch, IPU_FIELD_SLUV, stride - 1);
-		ipu_ch_param_write_field(ch, IPU_FIELD_UBO, u_offset / 8);
-		ipu_ch_param_write_field(ch, IPU_FIELD_VBO, u_offset / 8);
-		break;
-	}
+	ipu_ch_param_write_field(ch, IPU_FIELD_SLUV, uv_stride - 1);
+	ipu_ch_param_write_field(ch, IPU_FIELD_UBO, u_offset / 8);
+	ipu_ch_param_write_field(ch, IPU_FIELD_VBO, v_offset / 8);
 }
 EXPORT_SYMBOL_GPL(ipu_cpmem_set_yuv_planar_full);
 
 void ipu_cpmem_set_yuv_planar(struct ipuv3_channel *ch,
 			      u32 pixel_format, int stride, int height)
 {
-	int u_offset, v_offset;
+	int fourcc, u_offset, v_offset;
 	int uv_stride = 0;
 
-	switch (pixel_format) {
-	case V4L2_PIX_FMT_YUV420:
-	case V4L2_PIX_FMT_YVU420:
+	fourcc = v4l2_pix_fmt_to_drm_fourcc(pixel_format);
+	switch (fourcc) {
+	case DRM_FORMAT_YUV420:
 		uv_stride = stride / 2;
 		u_offset = stride * height;
 		v_offset = u_offset + (uv_stride * height / 2);
-		ipu_cpmem_set_yuv_planar_full(ch, pixel_format, stride,
-					      u_offset, v_offset);
 		break;
-	case V4L2_PIX_FMT_YUV422P:
+	case DRM_FORMAT_YVU420:
+		uv_stride = stride / 2;
+		v_offset = stride * height;
+		u_offset = v_offset + (uv_stride * height / 2);
+		break;
+	case DRM_FORMAT_YUV422:
 		uv_stride = stride / 2;
 		u_offset = stride * height;
 		v_offset = u_offset + (uv_stride * height);
-		ipu_cpmem_set_yuv_planar_full(ch, pixel_format, stride,
-					      u_offset, v_offset);
 		break;
-	case V4L2_PIX_FMT_NV12:
-	case V4L2_PIX_FMT_NV16:
+	case DRM_FORMAT_NV12:
+	case DRM_FORMAT_NV16:
+		uv_stride = stride;
 		u_offset = stride * height;
-		ipu_cpmem_set_yuv_planar_full(ch, pixel_format, stride,
-					      u_offset, 0);
+		v_offset = 0;
 		break;
+	default:
+		return;
 	}
+	ipu_cpmem_set_yuv_planar_full(ch, uv_stride, u_offset, v_offset);
 }
 EXPORT_SYMBOL_GPL(ipu_cpmem_set_yuv_planar);
 
@@ -684,6 +672,15 @@
 
 	switch (pix->pixelformat) {
 	case V4L2_PIX_FMT_YUV420:
+		offset = Y_OFFSET(pix, image->rect.left, image->rect.top);
+		u_offset = U_OFFSET(pix, image->rect.left,
+				    image->rect.top) - offset;
+		v_offset = V_OFFSET(pix, image->rect.left,
+				    image->rect.top) - offset;
+
+		ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline / 2,
+					      u_offset, v_offset);
+		break;
 	case V4L2_PIX_FMT_YVU420:
 		offset = Y_OFFSET(pix, image->rect.left, image->rect.top);
 		u_offset = U_OFFSET(pix, image->rect.left,
@@ -691,9 +688,8 @@
 		v_offset = V_OFFSET(pix, image->rect.left,
 				    image->rect.top) - offset;
 
-		ipu_cpmem_set_yuv_planar_full(ch, pix->pixelformat,
-					      pix->bytesperline,
-					      u_offset, v_offset);
+		ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline / 2,
+					      v_offset, u_offset);
 		break;
 	case V4L2_PIX_FMT_YUV422P:
 		offset = Y_OFFSET(pix, image->rect.left, image->rect.top);
@@ -702,8 +698,7 @@
 		v_offset = V2_OFFSET(pix, image->rect.left,
 				     image->rect.top) - offset;
 
-		ipu_cpmem_set_yuv_planar_full(ch, pix->pixelformat,
-					      pix->bytesperline,
+		ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline / 2,
 					      u_offset, v_offset);
 		break;
 	case V4L2_PIX_FMT_NV12:
@@ -712,8 +707,7 @@
 				     image->rect.top) - offset;
 		v_offset = 0;
 
-		ipu_cpmem_set_yuv_planar_full(ch, pix->pixelformat,
-					      pix->bytesperline,
+		ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline,
 					      u_offset, v_offset);
 		break;
 	case V4L2_PIX_FMT_NV16:
@@ -722,8 +716,7 @@
 				      image->rect.top) - offset;
 		v_offset = 0;
 
-		ipu_cpmem_set_yuv_planar_full(ch, pix->pixelformat,
-					      pix->bytesperline,
+		ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline,
 					      u_offset, v_offset);
 		break;
 	case V4L2_PIX_FMT_UYVY:

diff --git a/drivers/gpu/ipu-v3/ipu-dmfc.c b/drivers/gpu/ipu-v3/ipu-dmfc.c
index 042c395..837b1ec2 100644
--- a/drivers/gpu/ipu-v3/ipu-dmfc.c
+++ b/drivers/gpu/ipu-v3/ipu-dmfc.c

@@ -350,11 +350,13 @@
 }
 EXPORT_SYMBOL_GPL(ipu_dmfc_alloc_bandwidth);
 
-int ipu_dmfc_init_channel(struct dmfc_channel *dmfc, int width)
+void ipu_dmfc_config_wait4eot(struct dmfc_channel *dmfc, int width)
 {
 	struct ipu_dmfc_priv *priv = dmfc->priv;
 	u32 dmfc_gen1;
 
+	mutex_lock(&priv->mutex);
+
 	dmfc_gen1 = readl(priv->base + DMFC_GENERAL1);
 
 	if ((dmfc->slots * 64 * 4) / width > dmfc->data->max_fifo_lines)
@@ -364,9 +366,9 @@
 
 	writel(dmfc_gen1, priv->base + DMFC_GENERAL1);
 
-	return 0;
+	mutex_unlock(&priv->mutex);
 }
-EXPORT_SYMBOL_GPL(ipu_dmfc_init_channel);
+EXPORT_SYMBOL_GPL(ipu_dmfc_config_wait4eot);
 
 struct dmfc_channel *ipu_dmfc_get(struct ipu_soc *ipu, int ipu_channel)
 {

diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c
index e094c57..16b6f11 100644
--- a/drivers/hid/uhid.c
+++ b/drivers/hid/uhid.c

@@ -384,7 +384,7 @@
 static int uhid_event_from_user(const char __user *buffer, size_t len,
 				struct uhid_event *event)
 {
-	if (is_compat_task()) {
+	if (in_compat_syscall()) {
 		u32 type;
 
 		if (get_user(type, buffer))

diff --git a/drivers/hwmon/max1111.c b/drivers/hwmon/max1111.c
index 36544c4..303d0c9 100644
--- a/drivers/hwmon/max1111.c
+++ b/drivers/hwmon/max1111.c

@@ -85,6 +85,9 @@
 
 int max1111_read_channel(int channel)
 {
+	if (!the_max1111 || !the_max1111->spi)
+		return -ENODEV;
+
 	return max1111_read(&the_max1111->spi->dev, channel);
 }
 EXPORT_SYMBOL(max1111_read_channel);
@@ -258,6 +261,9 @@
 {
 	struct max1111_data *data = spi_get_drvdata(spi);
 
+#ifdef CONFIG_SHARPSL_PM
+	the_max1111 = NULL;
+#endif
 	hwmon_device_unregister(data->hwmon_dev);
 	sysfs_remove_group(&spi->dev.kobj, &max1110_attr_group);
 	sysfs_remove_group(&spi->dev.kobj, &max1111_attr_group);

diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c
index 9f0a48e..80e933b 100644
--- a/drivers/ide/icside.c
+++ b/drivers/ide/icside.c

@@ -451,7 +451,7 @@
 	return ret;
 }
 
-static const struct ide_port_info icside_v6_port_info __initconst = {
+static const struct ide_port_info icside_v6_port_info = {
 	.init_dma		= icside_dma_off_init,
 	.port_ops		= &icside_v6_no_dma_port_ops,
 	.host_flags		= IDE_HFLAG_SERIALIZE | IDE_HFLAG_MMIO,

diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c
index 8012e43..46427ea 100644
--- a/drivers/ide/palm_bk3710.c
+++ b/drivers/ide/palm_bk3710.c

@@ -325,6 +325,8 @@
 
 	clk_enable(clk);
 	rate = clk_get_rate(clk);
+	if (!rate)
+		return -EINVAL;
 
 	/* NOTE:  round *down* to meet minimum timings; we count in clocks */
 	ideclk_period = 1000000000UL / rate;

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index cd4510a..ba947df 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c

@@ -65,7 +65,7 @@
 #include <asm/mwait.h>
 #include <asm/msr.h>
 
-#define INTEL_IDLE_VERSION "0.4"
+#define INTEL_IDLE_VERSION "0.4.1"
 #define PREFIX "intel_idle: "
 
 static struct cpuidle_driver intel_idle_driver = {
@@ -716,6 +716,26 @@
 	{
 		.enter = NULL }
 };
+static struct cpuidle_state knl_cstates[] = {
+	{
+		.name = "C1-KNL",
+		.desc = "MWAIT 0x00",
+		.flags = MWAIT2flg(0x00),
+		.exit_latency = 1,
+		.target_residency = 2,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze },
+	{
+		.name = "C6-KNL",
+		.desc = "MWAIT 0x10",
+		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 120,
+		.target_residency = 500,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze },
+	{
+		.enter = NULL }
+};
 
 /**
  * intel_idle
@@ -890,6 +910,10 @@
 	.disable_promotion_to_c1e = true,
 };
 
+static const struct idle_cpu idle_cpu_knl = {
+	.state_table = knl_cstates,
+};
+
 #define ICPU(model, cpu) \
 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu }
 
@@ -921,6 +945,7 @@
 	ICPU(0x56, idle_cpu_bdw),
 	ICPU(0x4e, idle_cpu_skl),
 	ICPU(0x5e, idle_cpu_skl),
+	ICPU(0x57, idle_cpu_knl),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
@@ -994,36 +1019,92 @@
 }
 
 /*
+ * ivt_idle_state_table_update(void)
+ *
+ * Tune IVT multi-socket targets
+ * Assumption: num_sockets == (max_package_num + 1)
+ */
+static void ivt_idle_state_table_update(void)
+{
+	/* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
+	int cpu, package_num, num_sockets = 1;
+
+	for_each_online_cpu(cpu) {
+		package_num = topology_physical_package_id(cpu);
+		if (package_num + 1 > num_sockets) {
+			num_sockets = package_num + 1;
+
+			if (num_sockets > 4) {
+				cpuidle_state_table = ivt_cstates_8s;
+				return;
+			}
+		}
+	}
+
+	if (num_sockets > 2)
+		cpuidle_state_table = ivt_cstates_4s;
+
+	/* else, 1 and 2 socket systems use default ivt_cstates */
+}
+/*
+ * sklh_idle_state_table_update(void)
+ *
+ * On SKL-H (model 0x5e) disable C8 and C9 if:
+ * C10 is enabled and SGX disabled
+ */
+static void sklh_idle_state_table_update(void)
+{
+	unsigned long long msr;
+	unsigned int eax, ebx, ecx, edx;
+
+
+	/* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
+	if (max_cstate <= 7)
+		return;
+
+	/* if PC10 not present in CPUID.MWAIT.EDX */
+	if ((mwait_substates & (0xF << 28)) == 0)
+		return;
+
+	rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr);
+
+	/* PC10 is not enabled in PKG C-state limit */
+	if ((msr & 0xF) != 8)
+		return;
+
+	ecx = 0;
+	cpuid(7, &eax, &ebx, &ecx, &edx);
+
+	/* if SGX is present */
+	if (ebx & (1 << 2)) {
+
+		rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
+
+		/* if SGX is enabled */
+		if (msr & (1 << 18))
+			return;
+	}
+
+	skl_cstates[5].disabled = 1;	/* C8-SKL */
+	skl_cstates[6].disabled = 1;	/* C9-SKL */
+}
+/*
  * intel_idle_state_table_update()
  *
  * Update the default state_table for this CPU-id
- *
- * Currently used to access tuned IVT multi-socket targets
- * Assumption: num_sockets == (max_package_num + 1)
  */
-void intel_idle_state_table_update(void)
+
+static void intel_idle_state_table_update(void)
 {
-	/* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
-	if (boot_cpu_data.x86_model == 0x3e) { /* IVT */
-		int cpu, package_num, num_sockets = 1;
+	switch (boot_cpu_data.x86_model) {
 
-		for_each_online_cpu(cpu) {
-			package_num = topology_physical_package_id(cpu);
-			if (package_num + 1 > num_sockets) {
-				num_sockets = package_num + 1;
-
-				if (num_sockets > 4) {
-					cpuidle_state_table = ivt_cstates_8s;
-					return;
-				}
-			}
-		}
-
-		if (num_sockets > 2)
-			cpuidle_state_table = ivt_cstates_4s;
-		/* else, 1 and 2 socket systems use default ivt_cstates */
+	case 0x3e: /* IVT */
+		ivt_idle_state_table_update();
+		break;
+	case 0x5e: /* SKL-H */
+		sklh_idle_state_table_update();
+		break;
 	}
-	return;
 }
 
 /*
@@ -1063,6 +1144,14 @@
 		if (num_substates == 0)
 			continue;
 
+		/* if state marked as disabled, skip it */
+		if (cpuidle_state_table[cstate].disabled != 0) {
+			pr_debug(PREFIX "state %s is disabled",
+				cpuidle_state_table[cstate].name);
+			continue;
+		}
+
+
 		if (((mwait_cstate + 1) > 2) &&
 			!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
 			mark_tsc_unstable("TSC halts in idle"

diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 8a8440c..6425c0e 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig

@@ -68,6 +68,7 @@
 source "drivers/infiniband/hw/qib/Kconfig"
 source "drivers/infiniband/hw/cxgb3/Kconfig"
 source "drivers/infiniband/hw/cxgb4/Kconfig"
+source "drivers/infiniband/hw/i40iw/Kconfig"
 source "drivers/infiniband/hw/mlx4/Kconfig"
 source "drivers/infiniband/hw/mlx5/Kconfig"
 source "drivers/infiniband/hw/nes/Kconfig"
@@ -82,4 +83,6 @@
 source "drivers/infiniband/ulp/iser/Kconfig"
 source "drivers/infiniband/ulp/isert/Kconfig"
 
+source "drivers/infiniband/sw/rdmavt/Kconfig"
+
 endif # INFINIBAND

diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index dc21836..fad0b44 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile

@@ -1,3 +1,4 @@
 obj-$(CONFIG_INFINIBAND)		+= core/
 obj-$(CONFIG_INFINIBAND)		+= hw/
 obj-$(CONFIG_INFINIBAND)		+= ulp/
+obj-$(CONFIG_INFINIBAND)		+= sw/

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 270c7ff..1097984 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c

@@ -650,10 +650,23 @@
 		  u8 port_num,
 		  struct ib_port_attr *port_attr)
 {
+	union ib_gid gid;
+	int err;
+
 	if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
 		return -EINVAL;
 
-	return device->query_port(device, port_num, port_attr);
+	memset(port_attr, 0, sizeof(*port_attr));
+	err = device->query_port(device, port_num, port_attr);
+	if (err || port_attr->subnet_prefix)
+		return err;
+
+	err = ib_query_gid(device, port_num, 0, &gid, NULL);
+	if (err)
+		return err;
+
+	port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix);
+	return 0;
 }
 EXPORT_SYMBOL(ib_query_port);
 

diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index b5656a2..8a09c0f 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c

@@ -885,6 +885,11 @@
 	ah_attr.dlid     = port_attr.sm_lid;
 	ah_attr.sl       = port_attr.sm_sl;
 	ah_attr.port_num = port->port_num;
+	if (port_attr.grh_required) {
+		ah_attr.ah_flags = IB_AH_GRH;
+		ah_attr.grh.dgid.global.subnet_prefix = cpu_to_be64(port_attr.subnet_prefix);
+		ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID);
+	}
 
 	new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
 	if (IS_ERR(new_ah->ah)) {

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 3638c78..6fdc7ec 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c

@@ -402,7 +402,7 @@
 	resp->hw_ver		= attr->hw_ver;
 	resp->max_qp		= attr->max_qp;
 	resp->max_qp_wr		= attr->max_qp_wr;
-	resp->device_cap_flags	= attr->device_cap_flags;
+	resp->device_cap_flags	= lower_32_bits(attr->device_cap_flags);
 	resp->max_sge		= attr->max_sge;
 	resp->max_sge_rd	= attr->max_sge_rd;
 	resp->max_cq		= attr->max_cq;
@@ -3600,9 +3600,9 @@
 			      struct ib_udata *ucore,
 			      struct ib_udata *uhw)
 {
-	struct ib_uverbs_ex_query_device_resp resp;
+	struct ib_uverbs_ex_query_device_resp resp = { {0} };
 	struct ib_uverbs_ex_query_device  cmd;
-	struct ib_device_attr attr;
+	struct ib_device_attr attr = {0};
 	int err;
 
 	if (ucore->inlen < sizeof(cmd))
@@ -3623,14 +3623,11 @@
 	if (ucore->outlen < resp.response_length)
 		return -ENOSPC;
 
-	memset(&attr, 0, sizeof(attr));
-
 	err = ib_dev->query_device(ib_dev, &attr, uhw);
 	if (err)
 		return err;
 
 	copy_query_dev_fields(file, ib_dev, &resp.base, &attr);
-	resp.comp_mask = 0;
 
 	if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
 		goto end;
@@ -3643,9 +3640,6 @@
 		attr.odp_caps.per_transport_caps.uc_odp_caps;
 	resp.odp_caps.per_transport_caps.ud_odp_caps =
 		attr.odp_caps.per_transport_caps.ud_odp_caps;
-	resp.odp_caps.reserved = 0;
-#else
-	memset(&resp.odp_caps, 0, sizeof(resp.odp_caps));
 #endif
 	resp.response_length += sizeof(resp.odp_caps);
 
@@ -3663,8 +3657,5 @@
 
 end:
 	err = ib_copy_to_udata(ucore, &resp, resp.response_length);
-	if (err)
-		return err;
-
-	return 0;
+	return err;
 }

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 5cd1e39..15b8adb 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c

@@ -1551,6 +1551,46 @@
 }
 EXPORT_SYMBOL(ib_check_mr_status);
 
+int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
+			 int state)
+{
+	if (!device->set_vf_link_state)
+		return -ENOSYS;
+
+	return device->set_vf_link_state(device, vf, port, state);
+}
+EXPORT_SYMBOL(ib_set_vf_link_state);
+
+int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
+		     struct ifla_vf_info *info)
+{
+	if (!device->get_vf_config)
+		return -ENOSYS;
+
+	return device->get_vf_config(device, vf, port, info);
+}
+EXPORT_SYMBOL(ib_get_vf_config);
+
+int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
+		    struct ifla_vf_stats *stats)
+{
+	if (!device->get_vf_stats)
+		return -ENOSYS;
+
+	return device->get_vf_stats(device, vf, port, stats);
+}
+EXPORT_SYMBOL(ib_get_vf_stats);
+
+int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
+		   int type)
+{
+	if (!device->set_vf_guid)
+		return -ENOSYS;
+
+	return device->set_vf_guid(device, vf, port, guid, type);
+}
+EXPORT_SYMBOL(ib_set_vf_guid);
+
 /**
  * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list
  *     and set it the memory region.

diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index aded2a5..c7ad0a4 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile

@@ -2,6 +2,7 @@
 obj-$(CONFIG_INFINIBAND_QIB)		+= qib/
 obj-$(CONFIG_INFINIBAND_CXGB3)		+= cxgb3/
 obj-$(CONFIG_INFINIBAND_CXGB4)		+= cxgb4/
+obj-$(CONFIG_INFINIBAND_I40IW)		+= i40iw/
 obj-$(CONFIG_MLX4_INFINIBAND)		+= mlx4/
 obj-$(CONFIG_MLX5_INFINIBAND)		+= mlx5/
 obj-$(CONFIG_INFINIBAND_NES)		+= nes/

diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
index 343e8daf..1e26669 100644
--- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
+++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h

@@ -753,103 +753,4 @@
 #define FW_RI_WR_P2PTYPE_G(x)	\
 	(((x) >> FW_RI_WR_P2PTYPE_S) & FW_RI_WR_P2PTYPE_M)
 
-struct tcp_options {
-	__be16 mss;
-	__u8 wsf;
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	__u8:4;
-	__u8 unknown:1;
-	__u8:1;
-	__u8 sack:1;
-	__u8 tstamp:1;
-#else
-	__u8 tstamp:1;
-	__u8 sack:1;
-	__u8:1;
-	__u8 unknown:1;
-	__u8:4;
-#endif
-};
-
-struct cpl_pass_accept_req {
-	union opcode_tid ot;
-	__be16 rsvd;
-	__be16 len;
-	__be32 hdr_len;
-	__be16 vlan;
-	__be16 l2info;
-	__be32 tos_stid;
-	struct tcp_options tcpopt;
-};
-
-/* cpl_pass_accept_req.hdr_len fields */
-#define SYN_RX_CHAN_S    0
-#define SYN_RX_CHAN_M    0xF
-#define SYN_RX_CHAN_V(x) ((x) << SYN_RX_CHAN_S)
-#define SYN_RX_CHAN_G(x) (((x) >> SYN_RX_CHAN_S) & SYN_RX_CHAN_M)
-
-#define TCP_HDR_LEN_S    10
-#define TCP_HDR_LEN_M    0x3F
-#define TCP_HDR_LEN_V(x) ((x) << TCP_HDR_LEN_S)
-#define TCP_HDR_LEN_G(x) (((x) >> TCP_HDR_LEN_S) & TCP_HDR_LEN_M)
-
-#define IP_HDR_LEN_S    16
-#define IP_HDR_LEN_M    0x3FF
-#define IP_HDR_LEN_V(x) ((x) << IP_HDR_LEN_S)
-#define IP_HDR_LEN_G(x) (((x) >> IP_HDR_LEN_S) & IP_HDR_LEN_M)
-
-#define ETH_HDR_LEN_S    26
-#define ETH_HDR_LEN_M    0x1F
-#define ETH_HDR_LEN_V(x) ((x) << ETH_HDR_LEN_S)
-#define ETH_HDR_LEN_G(x) (((x) >> ETH_HDR_LEN_S) & ETH_HDR_LEN_M)
-
-/* cpl_pass_accept_req.l2info fields */
-#define SYN_MAC_IDX_S    0
-#define SYN_MAC_IDX_M    0x1FF
-#define SYN_MAC_IDX_V(x) ((x) << SYN_MAC_IDX_S)
-#define SYN_MAC_IDX_G(x) (((x) >> SYN_MAC_IDX_S) & SYN_MAC_IDX_M)
-
-#define SYN_XACT_MATCH_S    9
-#define SYN_XACT_MATCH_V(x) ((x) << SYN_XACT_MATCH_S)
-#define SYN_XACT_MATCH_F    SYN_XACT_MATCH_V(1U)
-
-#define SYN_INTF_S    12
-#define SYN_INTF_M    0xF
-#define SYN_INTF_V(x) ((x) << SYN_INTF_S)
-#define SYN_INTF_G(x) (((x) >> SYN_INTF_S) & SYN_INTF_M)
-
-struct ulptx_idata {
-	__be32 cmd_more;
-	__be32 len;
-};
-
-#define ULPTX_NSGE_S    0
-#define ULPTX_NSGE_M    0xFFFF
-#define ULPTX_NSGE_V(x) ((x) << ULPTX_NSGE_S)
-
-#define RX_DACK_MODE_S    29
-#define RX_DACK_MODE_M    0x3
-#define RX_DACK_MODE_V(x) ((x) << RX_DACK_MODE_S)
-#define RX_DACK_MODE_G(x) (((x) >> RX_DACK_MODE_S) & RX_DACK_MODE_M)
-
-#define RX_DACK_CHANGE_S    31
-#define RX_DACK_CHANGE_V(x) ((x) << RX_DACK_CHANGE_S)
-#define RX_DACK_CHANGE_F    RX_DACK_CHANGE_V(1U)
-
-enum {                     /* TCP congestion control algorithms */
-	CONG_ALG_RENO,
-	CONG_ALG_TAHOE,
-	CONG_ALG_NEWRENO,
-	CONG_ALG_HIGHSPEED
-};
-
-#define CONG_CNTRL_S    14
-#define CONG_CNTRL_M    0x3
-#define CONG_CNTRL_V(x) ((x) << CONG_CNTRL_S)
-#define CONG_CNTRL_G(x) (((x) >> CONG_CNTRL_S) & CONG_CNTRL_M)
-
-#define T5_ISS_S    18
-#define T5_ISS_V(x) ((x) << T5_ISS_S)
-#define T5_ISS_F    T5_ISS_V(1U)
-
 #endif /* _T4FW_RI_API_H_ */

diff --git a/drivers/infiniband/hw/i40iw/Kconfig b/drivers/infiniband/hw/i40iw/Kconfig
new file mode 100644
index 0000000..6e7d27a
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/Kconfig

@@ -0,0 +1,7 @@
+config INFINIBAND_I40IW
+	tristate "Intel(R) Ethernet X722 iWARP Driver"
+	depends on INET && I40E
+	select GENERIC_ALLOCATOR
+	---help---
+	Intel(R) Ethernet X722 iWARP Driver
+	INET && I40IW && INFINIBAND && I40E

diff --git a/drivers/infiniband/hw/i40iw/Makefile b/drivers/infiniband/hw/i40iw/Makefile
new file mode 100644
index 0000000..90068c0
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/Makefile

@@ -0,0 +1,9 @@
+ccflags-y :=  -Idrivers/net/ethernet/intel/i40e
+
+obj-$(CONFIG_INFINIBAND_I40IW) += i40iw.o
+
+i40iw-objs :=\
+               i40iw_cm.o i40iw_ctrl.o \
+               i40iw_hmc.o i40iw_hw.o i40iw_main.o  \
+               i40iw_pble.o i40iw_puda.o i40iw_uk.o i40iw_utils.o \
+               i40iw_verbs.o i40iw_virtchnl.o i40iw_vf.o

diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
new file mode 100644
index 0000000..8197676
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw.h

@@ -0,0 +1,570 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_IW_H
+#define I40IW_IW_H
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/crc32c.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+#include <rdma/rdma_cm.h>
+#include <rdma/iw_cm.h>
+#include <rdma/iw_portmap.h>
+#include <rdma/rdma_netlink.h>
+#include <crypto/hash.h>
+
+#include "i40iw_status.h"
+#include "i40iw_osdep.h"
+#include "i40iw_d.h"
+#include "i40iw_hmc.h"
+
+#include <i40e_client.h>
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+#include "i40iw_ucontext.h"
+#include "i40iw_pble.h"
+#include "i40iw_verbs.h"
+#include "i40iw_cm.h"
+#include "i40iw_user.h"
+#include "i40iw_puda.h"
+
+#define I40IW_FW_VERSION  2
+#define I40IW_HW_VERSION  2
+
+#define I40IW_ARP_ADD     1
+#define I40IW_ARP_DELETE  2
+#define I40IW_ARP_RESOLVE 3
+
+#define I40IW_MACIP_ADD     1
+#define I40IW_MACIP_DELETE  2
+
+#define IW_CCQ_SIZE         (I40IW_CQP_SW_SQSIZE_2048 + 1)
+#define IW_CEQ_SIZE         2048
+#define IW_AEQ_SIZE         2048
+
+#define RX_BUF_SIZE            (1536 + 8)
+#define IW_REG0_SIZE           (4 * 1024)
+#define IW_TX_TIMEOUT          (6 * HZ)
+#define IW_FIRST_QPN           1
+#define IW_SW_CONTEXT_ALIGN    1024
+
+#define MAX_DPC_ITERATIONS		128
+
+#define I40IW_EVENT_TIMEOUT		100000
+#define I40IW_VCHNL_EVENT_TIMEOUT	100000
+
+#define	I40IW_NO_VLAN			0xffff
+#define	I40IW_NO_QSET			0xffff
+
+/* access to mcast filter list */
+#define IW_ADD_MCAST false
+#define IW_DEL_MCAST true
+
+#define I40IW_DRV_OPT_ENABLE_MPA_VER_0     0x00000001
+#define I40IW_DRV_OPT_DISABLE_MPA_CRC      0x00000002
+#define I40IW_DRV_OPT_DISABLE_FIRST_WRITE  0x00000004
+#define I40IW_DRV_OPT_DISABLE_INTF         0x00000008
+#define I40IW_DRV_OPT_ENABLE_MSI           0x00000010
+#define I40IW_DRV_OPT_DUAL_LOGICAL_PORT    0x00000020
+#define I40IW_DRV_OPT_NO_INLINE_DATA       0x00000080
+#define I40IW_DRV_OPT_DISABLE_INT_MOD      0x00000100
+#define I40IW_DRV_OPT_DISABLE_VIRT_WQ      0x00000200
+#define I40IW_DRV_OPT_ENABLE_PAU           0x00000400
+#define I40IW_DRV_OPT_MCAST_LOGPORT_MAP    0x00000800
+
+#define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types)
+#define IW_CFG_FPM_QP_COUNT		32768
+
+#define I40IW_MTU_TO_MSS		40
+#define I40IW_DEFAULT_MSS		1460
+
+struct i40iw_cqp_compl_info {
+	u32 op_ret_val;
+	u16 maj_err_code;
+	u16 min_err_code;
+	bool error;
+	u8 op_code;
+};
+
+#define i40iw_pr_err(fmt, args ...) pr_err("%s: "fmt, __func__, ## args)
+
+#define i40iw_pr_info(fmt, args ...) pr_info("%s: " fmt, __func__, ## args)
+
+#define i40iw_pr_warn(fmt, args ...) pr_warn("%s: " fmt, __func__, ## args)
+
+struct i40iw_cqp_request {
+	struct cqp_commands_info info;
+	wait_queue_head_t waitq;
+	struct list_head list;
+	atomic_t refcount;
+	void (*callback_fcn)(struct i40iw_cqp_request*, u32);
+	void *param;
+	struct i40iw_cqp_compl_info compl_info;
+	bool waiting;
+	bool request_done;
+	bool dynamic;
+};
+
+struct i40iw_cqp {
+	struct i40iw_sc_cqp sc_cqp;
+	spinlock_t req_lock; /*cqp request list */
+	wait_queue_head_t waitq;
+	struct i40iw_dma_mem sq;
+	struct i40iw_dma_mem host_ctx;
+	u64 *scratch_array;
+	struct i40iw_cqp_request *cqp_requests;
+	struct list_head cqp_avail_reqs;
+	struct list_head cqp_pending_reqs;
+};
+
+struct i40iw_device;
+
+struct i40iw_ccq {
+	struct i40iw_sc_cq sc_cq;
+	spinlock_t lock; /* ccq control */
+	wait_queue_head_t waitq;
+	struct i40iw_dma_mem mem_cq;
+	struct i40iw_dma_mem shadow_area;
+};
+
+struct i40iw_ceq {
+	struct i40iw_sc_ceq sc_ceq;
+	struct i40iw_dma_mem mem;
+	u32 irq;
+	u32 msix_idx;
+	struct i40iw_device *iwdev;
+	struct tasklet_struct dpc_tasklet;
+};
+
+struct i40iw_aeq {
+	struct i40iw_sc_aeq sc_aeq;
+	struct i40iw_dma_mem mem;
+};
+
+struct i40iw_arp_entry {
+	u32 ip_addr[4];
+	u8 mac_addr[ETH_ALEN];
+};
+
+enum init_completion_state {
+	INVALID_STATE = 0,
+	INITIAL_STATE,
+	CQP_CREATED,
+	HMC_OBJS_CREATED,
+	PBLE_CHUNK_MEM,
+	CCQ_CREATED,
+	AEQ_CREATED,
+	CEQ_CREATED,
+	ILQ_CREATED,
+	IEQ_CREATED,
+	INET_NOTIFIER,
+	IP_ADDR_REGISTERED,
+	RDMA_DEV_REGISTERED
+};
+
+struct i40iw_msix_vector {
+	u32 idx;
+	u32 irq;
+	u32 cpu_affinity;
+	u32 ceq_id;
+};
+
+#define I40IW_MSIX_TABLE_SIZE   65
+
+struct virtchnl_work {
+	struct work_struct work;
+	union {
+		struct i40iw_cqp_request *cqp_request;
+		struct i40iw_virtchnl_work_info work_info;
+	};
+};
+
+struct i40e_qvlist_info;
+
+struct i40iw_device {
+	struct i40iw_ib_device *iwibdev;
+	struct net_device *netdev;
+	wait_queue_head_t vchnl_waitq;
+	struct i40iw_sc_dev sc_dev;
+	struct i40iw_handler *hdl;
+	struct i40e_info *ldev;
+	struct i40e_client *client;
+	struct i40iw_hw hw;
+	struct i40iw_cm_core cm_core;
+	unsigned long *mem_resources;
+	unsigned long *allocated_qps;
+	unsigned long *allocated_cqs;
+	unsigned long *allocated_mrs;
+	unsigned long *allocated_pds;
+	unsigned long *allocated_arps;
+	struct i40iw_qp **qp_table;
+	bool msix_shared;
+	u32 msix_count;
+	struct i40iw_msix_vector *iw_msixtbl;
+	struct i40e_qvlist_info *iw_qvlist;
+
+	struct i40iw_hmc_pble_rsrc *pble_rsrc;
+	struct i40iw_arp_entry *arp_table;
+	struct i40iw_cqp cqp;
+	struct i40iw_ccq ccq;
+	u32 ceqs_count;
+	struct i40iw_ceq *ceqlist;
+	struct i40iw_aeq aeq;
+	u32 arp_table_size;
+	u32 next_arp_index;
+	spinlock_t resource_lock; /* hw resource access */
+	u32 vendor_id;
+	u32 vendor_part_id;
+	u32 of_device_registered;
+
+	u32 device_cap_flags;
+	unsigned long db_start;
+	u8 resource_profile;
+	u8 max_rdma_vfs;
+	u8 max_enabled_vfs;
+	u8 max_sge;
+	u8 iw_status;
+	u8 send_term_ok;
+	bool push_mode;		/* Initialized from parameter passed to driver */
+
+	/* x710 specific */
+	struct mutex pbl_mutex;
+	struct tasklet_struct dpc_tasklet;
+	struct workqueue_struct *virtchnl_wq;
+	struct virtchnl_work virtchnl_w[I40IW_MAX_PE_ENABLED_VF_COUNT];
+	struct i40iw_dma_mem obj_mem;
+	struct i40iw_dma_mem obj_next;
+	u8 *hmc_info_mem;
+	u32 sd_type;
+	struct workqueue_struct *param_wq;
+	atomic_t params_busy;
+	u32 mss;
+	enum init_completion_state init_state;
+	u16 mac_ip_table_idx;
+	atomic_t vchnl_msgs;
+	u32 max_mr;
+	u32 max_qp;
+	u32 max_cq;
+	u32 max_pd;
+	u32 next_qp;
+	u32 next_cq;
+	u32 next_pd;
+	u32 max_mr_size;
+	u32 max_qp_wr;
+	u32 max_cqe;
+	u32 mr_stagmask;
+	u32 mpa_version;
+	bool dcb;
+};
+
+struct i40iw_ib_device {
+	struct ib_device ibdev;
+	struct i40iw_device *iwdev;
+};
+
+struct i40iw_handler {
+	struct list_head list;
+	struct i40e_client *client;
+	struct i40iw_device device;
+	struct i40e_info ldev;
+};
+
+/**
+ * to_iwdev - get device
+ * @ibdev: ib device
+ **/
+static inline struct i40iw_device *to_iwdev(struct ib_device *ibdev)
+{
+	return container_of(ibdev, struct i40iw_ib_device, ibdev)->iwdev;
+}
+
+/**
+ * to_ucontext - get user context
+ * @ibucontext: ib user context
+ **/
+static inline struct i40iw_ucontext *to_ucontext(struct ib_ucontext *ibucontext)
+{
+	return container_of(ibucontext, struct i40iw_ucontext, ibucontext);
+}
+
+/**
+ * to_iwpd - get protection domain
+ * @ibpd: ib pd
+ **/
+static inline struct i40iw_pd *to_iwpd(struct ib_pd *ibpd)
+{
+	return container_of(ibpd, struct i40iw_pd, ibpd);
+}
+
+/**
+ * to_iwmr - get device memory region
+ * @ibdev: ib memory region
+ **/
+static inline struct i40iw_mr *to_iwmr(struct ib_mr *ibmr)
+{
+	return container_of(ibmr, struct i40iw_mr, ibmr);
+}
+
+/**
+ * to_iwmr_from_ibfmr - get device memory region
+ * @ibfmr: ib fmr
+ **/
+static inline struct i40iw_mr *to_iwmr_from_ibfmr(struct ib_fmr *ibfmr)
+{
+	return container_of(ibfmr, struct i40iw_mr, ibfmr);
+}
+
+/**
+ * to_iwmw - get device memory window
+ * @ibmw: ib memory window
+ **/
+static inline struct i40iw_mr *to_iwmw(struct ib_mw *ibmw)
+{
+	return container_of(ibmw, struct i40iw_mr, ibmw);
+}
+
+/**
+ * to_iwcq - get completion queue
+ * @ibcq: ib cqdevice
+ **/
+static inline struct i40iw_cq *to_iwcq(struct ib_cq *ibcq)
+{
+	return container_of(ibcq, struct i40iw_cq, ibcq);
+}
+
+/**
+ * to_iwqp - get device qp
+ * @ibqp: ib qp
+ **/
+static inline struct i40iw_qp *to_iwqp(struct ib_qp *ibqp)
+{
+	return container_of(ibqp, struct i40iw_qp, ibqp);
+}
+
+/* i40iw.c */
+void i40iw_add_ref(struct ib_qp *);
+void i40iw_rem_ref(struct ib_qp *);
+struct ib_qp *i40iw_get_qp(struct ib_device *, int);
+
+void i40iw_flush_wqes(struct i40iw_device *iwdev,
+		      struct i40iw_qp *qp);
+
+void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
+			    unsigned char *mac_addr,
+			    __be32 *ip_addr,
+			    bool ipv4,
+			    u32 action);
+
+int i40iw_manage_apbvt(struct i40iw_device *iwdev,
+		       u16 accel_local_port,
+		       bool add_port);
+
+struct i40iw_cqp_request *i40iw_get_cqp_request(struct i40iw_cqp *cqp, bool wait);
+void i40iw_free_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp_request);
+void i40iw_put_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp_request);
+
+/**
+ * i40iw_alloc_resource - allocate a resource
+ * @iwdev: device pointer
+ * @resource_array: resource bit array:
+ * @max_resources: maximum resource number
+ * @req_resources_num: Allocated resource number
+ * @next: next free id
+ **/
+static inline int i40iw_alloc_resource(struct i40iw_device *iwdev,
+				       unsigned long *resource_array,
+				       u32 max_resources,
+				       u32 *req_resource_num,
+				       u32 *next)
+{
+	u32 resource_num;
+	unsigned long flags;
+
+	spin_lock_irqsave(&iwdev->resource_lock, flags);
+	resource_num = find_next_zero_bit(resource_array, max_resources, *next);
+	if (resource_num >= max_resources) {
+		resource_num = find_first_zero_bit(resource_array, max_resources);
+		if (resource_num >= max_resources) {
+			spin_unlock_irqrestore(&iwdev->resource_lock, flags);
+			return -EOVERFLOW;
+		}
+	}
+	set_bit(resource_num, resource_array);
+	*next = resource_num + 1;
+	if (*next == max_resources)
+		*next = 0;
+	spin_unlock_irqrestore(&iwdev->resource_lock, flags);
+	*req_resource_num = resource_num;
+
+	return 0;
+}
+
+/**
+ * i40iw_is_resource_allocated - detrmine if resource is
+ * allocated
+ * @iwdev: device pointer
+ * @resource_array: resource array for the resource_num
+ * @resource_num: resource number to check
+ **/
+static inline bool i40iw_is_resource_allocated(struct i40iw_device *iwdev,
+					       unsigned long *resource_array,
+					       u32 resource_num)
+{
+	bool bit_is_set;
+	unsigned long flags;
+
+	spin_lock_irqsave(&iwdev->resource_lock, flags);
+
+	bit_is_set = test_bit(resource_num, resource_array);
+	spin_unlock_irqrestore(&iwdev->resource_lock, flags);
+
+	return bit_is_set;
+}
+
+/**
+ * i40iw_free_resource - free a resource
+ * @iwdev: device pointer
+ * @resource_array: resource array for the resource_num
+ * @resource_num: resource number to free
+ **/
+static inline void i40iw_free_resource(struct i40iw_device *iwdev,
+				       unsigned long *resource_array,
+				       u32 resource_num)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&iwdev->resource_lock, flags);
+	clear_bit(resource_num, resource_array);
+	spin_unlock_irqrestore(&iwdev->resource_lock, flags);
+}
+
+/**
+ * to_iwhdl - Get the handler from the device pointer
+ * @iwdev: device pointer
+ **/
+static inline struct i40iw_handler *to_iwhdl(struct i40iw_device *iw_dev)
+{
+	return container_of(iw_dev, struct i40iw_handler, device);
+}
+
+struct i40iw_handler *i40iw_find_netdev(struct net_device *netdev);
+
+/**
+ * iw_init_resources -
+ */
+u32 i40iw_initialize_hw_resources(struct i40iw_device *iwdev);
+
+int i40iw_register_rdma_device(struct i40iw_device *iwdev);
+void i40iw_port_ibevent(struct i40iw_device *iwdev);
+int i40iw_cm_disconn(struct i40iw_qp *);
+void i40iw_cm_disconn_worker(void *);
+int mini_cm_recv_pkt(struct i40iw_cm_core *, struct i40iw_device *,
+		     struct sk_buff *);
+
+enum i40iw_status_code i40iw_handle_cqp_op(struct i40iw_device *iwdev,
+					   struct i40iw_cqp_request *cqp_request);
+enum i40iw_status_code i40iw_add_mac_addr(struct i40iw_device *iwdev,
+					  u8 *mac_addr, u8 *mac_index);
+int i40iw_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *);
+
+void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev);
+void i40iw_add_pdusecount(struct i40iw_pd *iwpd);
+void i40iw_hw_modify_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp,
+			struct i40iw_modify_qp_info *info, bool wait);
+
+enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev,
+					  struct i40iw_cm_info *cminfo,
+					  enum i40iw_quad_entry_type etype,
+					  enum i40iw_quad_hash_manage_type mtype,
+					  void *cmnode,
+					  bool wait);
+void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf);
+void i40iw_free_sqbuf(struct i40iw_sc_dev *dev, void *bufp);
+void i40iw_free_qp_resources(struct i40iw_device *iwdev,
+			     struct i40iw_qp *iwqp,
+			     u32 qp_num);
+enum i40iw_status_code i40iw_obj_aligned_mem(struct i40iw_device *iwdev,
+					     struct i40iw_dma_mem *memptr,
+					     u32 size, u32 mask);
+
+void i40iw_request_reset(struct i40iw_device *iwdev);
+void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev);
+void i40iw_setup_cm_core(struct i40iw_device *iwdev);
+void i40iw_cleanup_cm_core(struct i40iw_cm_core *cm_core);
+void i40iw_process_ceq(struct i40iw_device *, struct i40iw_ceq *iwceq);
+void i40iw_process_aeq(struct i40iw_device *);
+void i40iw_next_iw_state(struct i40iw_qp *iwqp,
+			 u8 state, u8 del_hash,
+			 u8 term, u8 term_len);
+int i40iw_send_syn(struct i40iw_cm_node *cm_node, u32 sendack);
+struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core,
+				      u16 rem_port,
+				      u32 *rem_addr,
+				      u16 loc_port,
+				      u32 *loc_addr,
+				      bool add_refcnt);
+
+enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev,
+					   struct i40iw_sc_qp *qp,
+					   struct i40iw_qp_flush_info *info,
+					   bool wait);
+
+void i40iw_copy_ip_ntohl(u32 *dst, u32 *src);
+struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *ib_pd,
+				u64 addr,
+				u64 size,
+				int acc,
+				u64 *iova_start);
+
+int i40iw_inetaddr_event(struct notifier_block *notifier,
+			 unsigned long event,
+			 void *ptr);
+int i40iw_inet6addr_event(struct notifier_block *notifier,
+			  unsigned long event,
+			  void *ptr);
+int i40iw_net_event(struct notifier_block *notifier,
+		    unsigned long event,
+		    void *ptr);
+
+#endif

diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
new file mode 100644
index 0000000..38f917a
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c

@@ -0,0 +1,4137 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include <linux/atomic.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/notifier.h>
+#include <linux/net.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/time.h>
+#include <linux/delay.h>
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+#include <linux/random.h>
+#include <linux/list.h>
+#include <linux/threads.h>
+#include <linux/highmem.h>
+#include <net/arp.h>
+#include <net/ndisc.h>
+#include <net/neighbour.h>
+#include <net/route.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+#include <net/ip_fib.h>
+#include <net/tcp.h>
+#include <asm/checksum.h>
+
+#include "i40iw.h"
+
+static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *);
+static void i40iw_cm_post_event(struct i40iw_cm_event *event);
+static void i40iw_disconnect_worker(struct work_struct *work);
+
+/**
+ * i40iw_free_sqbuf - put back puda buffer if refcount = 0
+ * @dev: FPK device
+ * @buf: puda buffer to free
+ */
+void i40iw_free_sqbuf(struct i40iw_sc_dev *dev, void *bufp)
+{
+	struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)bufp;
+	struct i40iw_puda_rsrc *ilq = dev->ilq;
+
+	if (!atomic_dec_return(&buf->refcount))
+		i40iw_puda_ret_bufpool(ilq, buf);
+}
+
+/**
+ * i40iw_derive_hw_ird_setting - Calculate IRD
+ *
+ * @cm_ird: IRD of connection's node
+ *
+ * The ird from the connection is rounded to a supported HW
+ * setting (2,8,32,64) and then encoded for ird_size field of
+ * qp_ctx
+ */
+static u8 i40iw_derive_hw_ird_setting(u16 cm_ird)
+{
+	u8 encoded_ird_size;
+	u8 pof2_cm_ird = 1;
+
+	/* round-off to next powerof2 */
+	while (pof2_cm_ird < cm_ird)
+		pof2_cm_ird *= 2;
+
+	/* ird_size field is encoded in qp_ctx */
+	switch (pof2_cm_ird) {
+	case I40IW_HW_IRD_SETTING_64:
+		encoded_ird_size = 3;
+		break;
+	case I40IW_HW_IRD_SETTING_32:
+	case I40IW_HW_IRD_SETTING_16:
+		encoded_ird_size = 2;
+		break;
+	case I40IW_HW_IRD_SETTING_8:
+	case I40IW_HW_IRD_SETTING_4:
+		encoded_ird_size = 1;
+		break;
+	case I40IW_HW_IRD_SETTING_2:
+	default:
+		encoded_ird_size = 0;
+		break;
+	}
+	return encoded_ird_size;
+}
+
+/**
+ * i40iw_record_ird_ord - Record IRD/ORD passed in
+ * @cm_node: connection's node
+ * @conn_ird: connection IRD
+ * @conn_ord: connection ORD
+ */
+static void i40iw_record_ird_ord(struct i40iw_cm_node *cm_node, u16 conn_ird, u16 conn_ord)
+{
+	if (conn_ird > I40IW_MAX_IRD_SIZE)
+		conn_ird = I40IW_MAX_IRD_SIZE;
+
+	if (conn_ord > I40IW_MAX_ORD_SIZE)
+		conn_ord = I40IW_MAX_ORD_SIZE;
+
+	cm_node->ird_size = conn_ird;
+	cm_node->ord_size = conn_ord;
+}
+
+/**
+ * i40iw_copy_ip_ntohl - change network to host ip
+ * @dst: host ip
+ * @src: big endian
+ */
+void i40iw_copy_ip_ntohl(u32 *dst, __be32 *src)
+{
+	*dst++ = ntohl(*src++);
+	*dst++ = ntohl(*src++);
+	*dst++ = ntohl(*src++);
+	*dst = ntohl(*src);
+}
+
+/**
+ * i40iw_copy_ip_htonl - change host addr to network ip
+ * @dst: host ip
+ * @src: little endian
+ */
+static inline void i40iw_copy_ip_htonl(__be32 *dst, u32 *src)
+{
+	*dst++ = htonl(*src++);
+	*dst++ = htonl(*src++);
+	*dst++ = htonl(*src++);
+	*dst = htonl(*src);
+}
+
+/**
+ * i40iw_fill_sockaddr4 - get addr info for passive connection
+ * @cm_node: connection's node
+ * @event: upper layer's cm event
+ */
+static inline void i40iw_fill_sockaddr4(struct i40iw_cm_node *cm_node,
+					struct iw_cm_event *event)
+{
+	struct sockaddr_in *laddr = (struct sockaddr_in *)&event->local_addr;
+	struct sockaddr_in *raddr = (struct sockaddr_in *)&event->remote_addr;
+
+	laddr->sin_family = AF_INET;
+	raddr->sin_family = AF_INET;
+
+	laddr->sin_port = htons(cm_node->loc_port);
+	raddr->sin_port = htons(cm_node->rem_port);
+
+	laddr->sin_addr.s_addr = htonl(cm_node->loc_addr[0]);
+	raddr->sin_addr.s_addr = htonl(cm_node->rem_addr[0]);
+}
+
+/**
+ * i40iw_fill_sockaddr6 - get ipv6 addr info for passive side
+ * @cm_node: connection's node
+ * @event: upper layer's cm event
+ */
+static inline void i40iw_fill_sockaddr6(struct i40iw_cm_node *cm_node,
+					struct iw_cm_event *event)
+{
+	struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)&event->local_addr;
+	struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)&event->remote_addr;
+
+	laddr6->sin6_family = AF_INET6;
+	raddr6->sin6_family = AF_INET6;
+
+	laddr6->sin6_port = htons(cm_node->loc_port);
+	raddr6->sin6_port = htons(cm_node->rem_port);
+
+	i40iw_copy_ip_htonl(laddr6->sin6_addr.in6_u.u6_addr32,
+			    cm_node->loc_addr);
+	i40iw_copy_ip_htonl(raddr6->sin6_addr.in6_u.u6_addr32,
+			    cm_node->rem_addr);
+}
+
+/**
+ * i40iw_get_addr_info
+ * @cm_node: contains ip/tcp info
+ * @cm_info: to get a copy of the cm_node ip/tcp info
+*/
+static void i40iw_get_addr_info(struct i40iw_cm_node *cm_node,
+				struct i40iw_cm_info *cm_info)
+{
+	cm_info->ipv4 = cm_node->ipv4;
+	cm_info->vlan_id = cm_node->vlan_id;
+	memcpy(cm_info->loc_addr, cm_node->loc_addr, sizeof(cm_info->loc_addr));
+	memcpy(cm_info->rem_addr, cm_node->rem_addr, sizeof(cm_info->rem_addr));
+	cm_info->loc_port = cm_node->loc_port;
+	cm_info->rem_port = cm_node->rem_port;
+}
+
+/**
+ * i40iw_get_cmevent_info - for cm event upcall
+ * @cm_node: connection's node
+ * @cm_id: upper layers cm struct for the event
+ * @event: upper layer's cm event
+ */
+static inline void i40iw_get_cmevent_info(struct i40iw_cm_node *cm_node,
+					  struct iw_cm_id *cm_id,
+					  struct iw_cm_event *event)
+{
+	memcpy(&event->local_addr, &cm_id->m_local_addr,
+	       sizeof(event->local_addr));
+	memcpy(&event->remote_addr, &cm_id->m_remote_addr,
+	       sizeof(event->remote_addr));
+	if (cm_node) {
+		event->private_data = (void *)cm_node->pdata_buf;
+		event->private_data_len = (u8)cm_node->pdata.size;
+		event->ird = cm_node->ird_size;
+		event->ord = cm_node->ord_size;
+	}
+}
+
+/**
+ * i40iw_send_cm_event - upcall cm's event handler
+ * @cm_node: connection's node
+ * @cm_id: upper layer's cm info struct
+ * @type: Event type to indicate
+ * @status: status for the event type
+ */
+static int i40iw_send_cm_event(struct i40iw_cm_node *cm_node,
+			       struct iw_cm_id *cm_id,
+			       enum iw_cm_event_type type,
+			       int status)
+{
+	struct iw_cm_event event;
+
+	memset(&event, 0, sizeof(event));
+	event.event = type;
+	event.status = status;
+	switch (type) {
+	case IW_CM_EVENT_CONNECT_REQUEST:
+		if (cm_node->ipv4)
+			i40iw_fill_sockaddr4(cm_node, &event);
+		else
+			i40iw_fill_sockaddr6(cm_node, &event);
+		event.provider_data = (void *)cm_node;
+		event.private_data = (void *)cm_node->pdata_buf;
+		event.private_data_len = (u8)cm_node->pdata.size;
+		break;
+	case IW_CM_EVENT_CONNECT_REPLY:
+		i40iw_get_cmevent_info(cm_node, cm_id, &event);
+		break;
+	case IW_CM_EVENT_ESTABLISHED:
+		event.ird = cm_node->ird_size;
+		event.ord = cm_node->ord_size;
+		break;
+	case IW_CM_EVENT_DISCONNECT:
+		break;
+	case IW_CM_EVENT_CLOSE:
+		break;
+	default:
+		i40iw_pr_err("event type received type = %d\n", type);
+		return -1;
+	}
+	return cm_id->event_handler(cm_id, &event);
+}
+
+/**
+ * i40iw_create_event - create cm event
+ * @cm_node: connection's node
+ * @type: Event type to generate
+ */
+static struct i40iw_cm_event *i40iw_create_event(struct i40iw_cm_node *cm_node,
+						 enum i40iw_cm_event_type type)
+{
+	struct i40iw_cm_event *event;
+
+	if (!cm_node->cm_id)
+		return NULL;
+
+	event = kzalloc(sizeof(*event), GFP_ATOMIC);
+
+	if (!event)
+		return NULL;
+
+	event->type = type;
+	event->cm_node = cm_node;
+	memcpy(event->cm_info.rem_addr, cm_node->rem_addr, sizeof(event->cm_info.rem_addr));
+	memcpy(event->cm_info.loc_addr, cm_node->loc_addr, sizeof(event->cm_info.loc_addr));
+	event->cm_info.rem_port = cm_node->rem_port;
+	event->cm_info.loc_port = cm_node->loc_port;
+	event->cm_info.cm_id = cm_node->cm_id;
+
+	i40iw_debug(cm_node->dev,
+		    I40IW_DEBUG_CM,
+		    "node=%p event=%p type=%u dst=%pI4 src=%pI4\n",
+		    cm_node,
+		    event,
+		    type,
+		    event->cm_info.loc_addr,
+		    event->cm_info.rem_addr);
+
+	i40iw_cm_post_event(event);
+	return event;
+}
+
+/**
+ * i40iw_free_retrans_entry - free send entry
+ * @cm_node: connection's node
+ */
+static void i40iw_free_retrans_entry(struct i40iw_cm_node *cm_node)
+{
+	struct i40iw_sc_dev *dev = cm_node->dev;
+	struct i40iw_timer_entry *send_entry;
+
+	send_entry = cm_node->send_entry;
+	if (send_entry) {
+		cm_node->send_entry = NULL;
+		i40iw_free_sqbuf(dev, (void *)send_entry->sqbuf);
+		kfree(send_entry);
+		atomic_dec(&cm_node->ref_count);
+	}
+}
+
+/**
+ * i40iw_cleanup_retrans_entry - free send entry with lock
+ * @cm_node: connection's node
+ */
+static void i40iw_cleanup_retrans_entry(struct i40iw_cm_node *cm_node)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+	i40iw_free_retrans_entry(cm_node);
+	spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+}
+
+static bool is_remote_ne020_or_chelsio(struct i40iw_cm_node *cm_node)
+{
+	if ((cm_node->rem_mac[0] == 0x0) &&
+	    (((cm_node->rem_mac[1] == 0x12) && (cm_node->rem_mac[2] == 0x55)) ||
+	     ((cm_node->rem_mac[1] == 0x07 && (cm_node->rem_mac[2] == 0x43)))))
+		return true;
+	return false;
+}
+
+/**
+ * i40iw_form_cm_frame - get a free packet and build frame
+ * @cm_node: connection's node ionfo to use in frame
+ * @options: pointer to options info
+ * @hdr: pointer mpa header
+ * @pdata: pointer to private data
+ * @flags:  indicates FIN or ACK
+ */
+static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node,
+						  struct i40iw_kmem_info *options,
+						  struct i40iw_kmem_info *hdr,
+						  struct i40iw_kmem_info *pdata,
+						  u8 flags)
+{
+	struct i40iw_puda_buf *sqbuf;
+	struct i40iw_sc_dev *dev = cm_node->dev;
+	u8 *buf;
+
+	struct tcphdr *tcph;
+	struct iphdr *iph;
+	struct ipv6hdr *ip6h;
+	struct ethhdr *ethh;
+	u16 packetsize;
+	u16 eth_hlen = ETH_HLEN;
+	u32 opts_len = 0;
+	u32 pd_len = 0;
+	u32 hdr_len = 0;
+
+	sqbuf = i40iw_puda_get_bufpool(dev->ilq);
+	if (!sqbuf)
+		return NULL;
+	buf = sqbuf->mem.va;
+
+	if (options)
+		opts_len = (u32)options->size;
+
+	if (hdr)
+		hdr_len = hdr->size;
+
+	if (pdata) {
+		pd_len = pdata->size;
+		if (!is_remote_ne020_or_chelsio(cm_node))
+			pd_len += MPA_ZERO_PAD_LEN;
+	}
+
+	if (cm_node->vlan_id < VLAN_TAG_PRESENT)
+		eth_hlen += 4;
+
+	if (cm_node->ipv4)
+		packetsize = sizeof(*iph) + sizeof(*tcph);
+	else
+		packetsize = sizeof(*ip6h) + sizeof(*tcph);
+	packetsize += opts_len + hdr_len + pd_len;
+
+	memset(buf, 0x00, eth_hlen + packetsize);
+
+	sqbuf->totallen = packetsize + eth_hlen;
+	sqbuf->maclen = eth_hlen;
+	sqbuf->tcphlen = sizeof(*tcph) + opts_len;
+	sqbuf->scratch = (void *)cm_node;
+
+	ethh = (struct ethhdr *)buf;
+	buf += eth_hlen;
+
+	if (cm_node->ipv4) {
+		sqbuf->ipv4 = true;
+
+		iph = (struct iphdr *)buf;
+		buf += sizeof(*iph);
+		tcph = (struct tcphdr *)buf;
+		buf += sizeof(*tcph);
+
+		ether_addr_copy(ethh->h_dest, cm_node->rem_mac);
+		ether_addr_copy(ethh->h_source, cm_node->loc_mac);
+		if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
+			((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q);
+			((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(cm_node->vlan_id);
+
+			((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto = htons(ETH_P_IP);
+		} else {
+			ethh->h_proto = htons(ETH_P_IP);
+		}
+
+		iph->version = IPVERSION;
+		iph->ihl = 5;	/* 5 * 4Byte words, IP headr len */
+		iph->tos = 0;
+		iph->tot_len = htons(packetsize);
+		iph->id = htons(++cm_node->tcp_cntxt.loc_id);
+
+		iph->frag_off = htons(0x4000);
+		iph->ttl = 0x40;
+		iph->protocol = IPPROTO_TCP;
+		iph->saddr = htonl(cm_node->loc_addr[0]);
+		iph->daddr = htonl(cm_node->rem_addr[0]);
+	} else {
+		sqbuf->ipv4 = false;
+		ip6h = (struct ipv6hdr *)buf;
+		buf += sizeof(*ip6h);
+		tcph = (struct tcphdr *)buf;
+		buf += sizeof(*tcph);
+
+		ether_addr_copy(ethh->h_dest, cm_node->rem_mac);
+		ether_addr_copy(ethh->h_source, cm_node->loc_mac);
+		if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
+			((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q);
+			((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(cm_node->vlan_id);
+			((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto = htons(ETH_P_IPV6);
+		} else {
+			ethh->h_proto = htons(ETH_P_IPV6);
+		}
+		ip6h->version = 6;
+		ip6h->flow_lbl[0] = 0;
+		ip6h->flow_lbl[1] = 0;
+		ip6h->flow_lbl[2] = 0;
+		ip6h->payload_len = htons(packetsize - sizeof(*ip6h));
+		ip6h->nexthdr = 6;
+		ip6h->hop_limit = 128;
+		i40iw_copy_ip_htonl(ip6h->saddr.in6_u.u6_addr32,
+				    cm_node->loc_addr);
+		i40iw_copy_ip_htonl(ip6h->daddr.in6_u.u6_addr32,
+				    cm_node->rem_addr);
+	}
+
+	tcph->source = htons(cm_node->loc_port);
+	tcph->dest = htons(cm_node->rem_port);
+
+	tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
+
+	if (flags & SET_ACK) {
+		cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
+		tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num);
+		tcph->ack = 1;
+	} else {
+		tcph->ack_seq = 0;
+	}
+
+	if (flags & SET_SYN) {
+		cm_node->tcp_cntxt.loc_seq_num++;
+		tcph->syn = 1;
+	} else {
+		cm_node->tcp_cntxt.loc_seq_num += hdr_len + pd_len;
+	}
+
+	if (flags & SET_FIN) {
+		cm_node->tcp_cntxt.loc_seq_num++;
+		tcph->fin = 1;
+	}
+
+	if (flags & SET_RST)
+		tcph->rst = 1;
+
+	tcph->doff = (u16)((sizeof(*tcph) + opts_len + 3) >> 2);
+	sqbuf->tcphlen = tcph->doff << 2;
+	tcph->window = htons(cm_node->tcp_cntxt.rcv_wnd);
+	tcph->urg_ptr = 0;
+
+	if (opts_len) {
+		memcpy(buf, options->addr, opts_len);
+		buf += opts_len;
+	}
+
+	if (hdr_len) {
+		memcpy(buf, hdr->addr, hdr_len);
+		buf += hdr_len;
+	}
+
+	if (pd_len)
+		memcpy(buf, pdata->addr, pd_len);
+
+	atomic_set(&sqbuf->refcount, 1);
+
+	return sqbuf;
+}
+
+/**
+ * i40iw_send_reset - Send RST packet
+ * @cm_node: connection's node
+ */
+static int i40iw_send_reset(struct i40iw_cm_node *cm_node)
+{
+	struct i40iw_puda_buf *sqbuf;
+	int flags = SET_RST | SET_ACK;
+
+	sqbuf = i40iw_form_cm_frame(cm_node, NULL, NULL, NULL, flags);
+	if (!sqbuf) {
+		i40iw_pr_err("no sqbuf\n");
+		return -1;
+	}
+
+	return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 0, 1);
+}
+
+/**
+ * i40iw_active_open_err - send event for active side cm error
+ * @cm_node: connection's node
+ * @reset: Flag to send reset or not
+ */
+static void i40iw_active_open_err(struct i40iw_cm_node *cm_node, bool reset)
+{
+	i40iw_cleanup_retrans_entry(cm_node);
+	cm_node->cm_core->stats_connect_errs++;
+	if (reset) {
+		i40iw_debug(cm_node->dev,
+			    I40IW_DEBUG_CM,
+			    "%s cm_node=%p state=%d\n",
+			    __func__,
+			    cm_node,
+			    cm_node->state);
+		atomic_inc(&cm_node->ref_count);
+		i40iw_send_reset(cm_node);
+	}
+
+	cm_node->state = I40IW_CM_STATE_CLOSED;
+	i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
+}
+
+/**
+ * i40iw_passive_open_err - handle passive side cm error
+ * @cm_node: connection's node
+ * @reset: send reset or just free cm_node
+ */
+static void i40iw_passive_open_err(struct i40iw_cm_node *cm_node, bool reset)
+{
+	i40iw_cleanup_retrans_entry(cm_node);
+	cm_node->cm_core->stats_passive_errs++;
+	cm_node->state = I40IW_CM_STATE_CLOSED;
+	i40iw_debug(cm_node->dev,
+		    I40IW_DEBUG_CM,
+		    "%s cm_node=%p state =%d\n",
+		    __func__,
+		    cm_node,
+		    cm_node->state);
+	if (reset)
+		i40iw_send_reset(cm_node);
+	else
+		i40iw_rem_ref_cm_node(cm_node);
+}
+
+/**
+ * i40iw_event_connect_error - to create connect error event
+ * @event: cm information for connect event
+ */
+static void i40iw_event_connect_error(struct i40iw_cm_event *event)
+{
+	struct i40iw_qp *iwqp;
+	struct iw_cm_id *cm_id;
+
+	cm_id = event->cm_node->cm_id;
+	if (!cm_id)
+		return;
+
+	iwqp = cm_id->provider_data;
+
+	if (!iwqp || !iwqp->iwdev)
+		return;
+
+	iwqp->cm_id = NULL;
+	cm_id->provider_data = NULL;
+	i40iw_send_cm_event(event->cm_node, cm_id,
+			    IW_CM_EVENT_CONNECT_REPLY,
+			    -ECONNRESET);
+	cm_id->rem_ref(cm_id);
+	i40iw_rem_ref_cm_node(event->cm_node);
+}
+
+/**
+ * i40iw_process_options
+ * @cm_node: connection's node
+ * @optionsloc: point to start of options
+ * @optionsize: size of all options
+ * @syn_packet: flag if syn packet
+ */
+static int i40iw_process_options(struct i40iw_cm_node *cm_node,
+				 u8 *optionsloc,
+				 u32 optionsize,
+				 u32 syn_packet)
+{
+	u32 tmp;
+	u32 offset = 0;
+	union all_known_options *all_options;
+	char got_mss_option = 0;
+
+	while (offset < optionsize) {
+		all_options = (union all_known_options *)(optionsloc + offset);
+		switch (all_options->as_base.optionnum) {
+		case OPTION_NUMBER_END:
+			offset = optionsize;
+			break;
+		case OPTION_NUMBER_NONE:
+			offset += 1;
+			continue;
+		case OPTION_NUMBER_MSS:
+			i40iw_debug(cm_node->dev,
+				    I40IW_DEBUG_CM,
+				    "%s: MSS Length: %d Offset: %d Size: %d\n",
+				    __func__,
+				    all_options->as_mss.length,
+				    offset,
+				    optionsize);
+			got_mss_option = 1;
+			if (all_options->as_mss.length != 4)
+				return -1;
+			tmp = ntohs(all_options->as_mss.mss);
+			if (tmp > 0 && tmp < cm_node->tcp_cntxt.mss)
+				cm_node->tcp_cntxt.mss = tmp;
+			break;
+		case OPTION_NUMBER_WINDOW_SCALE:
+			cm_node->tcp_cntxt.snd_wscale =
+			    all_options->as_windowscale.shiftcount;
+			break;
+		default:
+			i40iw_debug(cm_node->dev,
+				    I40IW_DEBUG_CM,
+				    "TCP Option not understood: %x\n",
+				    all_options->as_base.optionnum);
+			break;
+		}
+		offset += all_options->as_base.length;
+	}
+	if (!got_mss_option && syn_packet)
+		cm_node->tcp_cntxt.mss = I40IW_CM_DEFAULT_MSS;
+	return 0;
+}
+
+/**
+ * i40iw_handle_tcp_options -
+ * @cm_node: connection's node
+ * @tcph: pointer tcp header
+ * @optionsize: size of options rcvd
+ * @passive: active or passive flag
+ */
+static int i40iw_handle_tcp_options(struct i40iw_cm_node *cm_node,
+				    struct tcphdr *tcph,
+				    int optionsize,
+				    int passive)
+{
+	u8 *optionsloc = (u8 *)&tcph[1];
+
+	if (optionsize) {
+		if (i40iw_process_options(cm_node,
+					  optionsloc,
+					  optionsize,
+					  (u32)tcph->syn)) {
+			i40iw_debug(cm_node->dev,
+				    I40IW_DEBUG_CM,
+				    "%s: Node %p, Sending RESET\n",
+				    __func__,
+				    cm_node);
+			if (passive)
+				i40iw_passive_open_err(cm_node, true);
+			else
+				i40iw_active_open_err(cm_node, true);
+			return -1;
+		}
+	}
+
+	cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) <<
+	    cm_node->tcp_cntxt.snd_wscale;
+
+	if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd)
+		cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd;
+	return 0;
+}
+
+/**
+ * i40iw_build_mpa_v1 - build a MPA V1 frame
+ * @cm_node: connection's node
+ * @mpa_key: to do read0 or write0
+ */
+static void i40iw_build_mpa_v1(struct i40iw_cm_node *cm_node,
+			       void *start_addr,
+			       u8 mpa_key)
+{
+	struct ietf_mpa_v1 *mpa_frame = (struct ietf_mpa_v1 *)start_addr;
+
+	switch (mpa_key) {
+	case MPA_KEY_REQUEST:
+		memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
+		break;
+	case MPA_KEY_REPLY:
+		memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
+		break;
+	default:
+		break;
+	}
+	mpa_frame->flags = IETF_MPA_FLAGS_CRC;
+	mpa_frame->rev = cm_node->mpa_frame_rev;
+	mpa_frame->priv_data_len = htons(cm_node->pdata.size);
+}
+
+/**
+ * i40iw_build_mpa_v2 - build a MPA V2 frame
+ * @cm_node: connection's node
+ * @start_addr: buffer start address
+ * @mpa_key: to do read0 or write0
+ */
+static void i40iw_build_mpa_v2(struct i40iw_cm_node *cm_node,
+			       void *start_addr,
+			       u8 mpa_key)
+{
+	struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr;
+	struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg;
+
+	/* initialize the upper 5 bytes of the frame */
+	i40iw_build_mpa_v1(cm_node, start_addr, mpa_key);
+	mpa_frame->flags |= IETF_MPA_V2_FLAG;
+	mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE);
+
+	/* initialize RTR msg */
+	if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
+		rtr_msg->ctrl_ird = IETF_NO_IRD_ORD;
+		rtr_msg->ctrl_ord = IETF_NO_IRD_ORD;
+	} else {
+		rtr_msg->ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
+			IETF_NO_IRD_ORD : cm_node->ird_size;
+		rtr_msg->ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
+			IETF_NO_IRD_ORD : cm_node->ord_size;
+	}
+
+	rtr_msg->ctrl_ird |= IETF_PEER_TO_PEER;
+	rtr_msg->ctrl_ird |= IETF_FLPDU_ZERO_LEN;
+
+	switch (mpa_key) {
+	case MPA_KEY_REQUEST:
+		rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE;
+		rtr_msg->ctrl_ord |= IETF_RDMA0_READ;
+		break;
+	case MPA_KEY_REPLY:
+		switch (cm_node->send_rdma0_op) {
+		case SEND_RDMA_WRITE_ZERO:
+			rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE;
+			break;
+		case SEND_RDMA_READ_ZERO:
+			rtr_msg->ctrl_ord |= IETF_RDMA0_READ;
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+	rtr_msg->ctrl_ird = htons(rtr_msg->ctrl_ird);
+	rtr_msg->ctrl_ord = htons(rtr_msg->ctrl_ord);
+}
+
+/**
+ * i40iw_cm_build_mpa_frame - build mpa frame for mpa version 1 or version 2
+ * @cm_node: connection's node
+ * @mpa: mpa: data buffer
+ * @mpa_key: to do read0 or write0
+ */
+static int i40iw_cm_build_mpa_frame(struct i40iw_cm_node *cm_node,
+				    struct i40iw_kmem_info *mpa,
+				    u8 mpa_key)
+{
+	int hdr_len = 0;
+
+	switch (cm_node->mpa_frame_rev) {
+	case IETF_MPA_V1:
+		hdr_len = sizeof(struct ietf_mpa_v1);
+		i40iw_build_mpa_v1(cm_node, mpa->addr, mpa_key);
+		break;
+	case IETF_MPA_V2:
+		hdr_len = sizeof(struct ietf_mpa_v2);
+		i40iw_build_mpa_v2(cm_node, mpa->addr, mpa_key);
+		break;
+	default:
+		break;
+	}
+
+	return hdr_len;
+}
+
+/**
+ * i40iw_send_mpa_request - active node send mpa request to passive node
+ * @cm_node: connection's node
+ */
+static int i40iw_send_mpa_request(struct i40iw_cm_node *cm_node)
+{
+	struct i40iw_puda_buf *sqbuf;
+
+	if (!cm_node) {
+		i40iw_pr_err("cm_node == NULL\n");
+		return -1;
+	}
+
+	cm_node->mpa_hdr.addr = &cm_node->mpa_frame;
+	cm_node->mpa_hdr.size = i40iw_cm_build_mpa_frame(cm_node,
+							 &cm_node->mpa_hdr,
+							 MPA_KEY_REQUEST);
+	if (!cm_node->mpa_hdr.size) {
+		i40iw_pr_err("mpa size = %d\n", cm_node->mpa_hdr.size);
+		return -1;
+	}
+
+	sqbuf = i40iw_form_cm_frame(cm_node,
+				    NULL,
+				    &cm_node->mpa_hdr,
+				    &cm_node->pdata,
+				    SET_ACK);
+	if (!sqbuf) {
+		i40iw_pr_err("sq_buf == NULL\n");
+		return -1;
+	}
+	return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 1, 0);
+}
+
+/**
+ * i40iw_send_mpa_reject -
+ * @cm_node: connection's node
+ * @pdata: reject data for connection
+ * @plen: length of reject data
+ */
+static int i40iw_send_mpa_reject(struct i40iw_cm_node *cm_node,
+				 const void *pdata,
+				 u8 plen)
+{
+	struct i40iw_puda_buf *sqbuf;
+	struct i40iw_kmem_info priv_info;
+
+	cm_node->mpa_hdr.addr = &cm_node->mpa_frame;
+	cm_node->mpa_hdr.size = i40iw_cm_build_mpa_frame(cm_node,
+							 &cm_node->mpa_hdr,
+							 MPA_KEY_REPLY);
+
+	cm_node->mpa_frame.flags |= IETF_MPA_FLAGS_REJECT;
+	priv_info.addr = (void *)pdata;
+	priv_info.size = plen;
+
+	sqbuf = i40iw_form_cm_frame(cm_node,
+				    NULL,
+				    &cm_node->mpa_hdr,
+				    &priv_info,
+				    SET_ACK | SET_FIN);
+	if (!sqbuf) {
+		i40iw_pr_err("no sqbuf\n");
+		return -ENOMEM;
+	}
+	cm_node->state = I40IW_CM_STATE_FIN_WAIT1;
+	return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 1, 0);
+}
+
+/**
+ * recv_mpa - process an IETF MPA frame
+ * @cm_node: connection's node
+ * @buffer: Data pointer
+ * @type: to return accept or reject
+ * @len: Len of mpa buffer
+ */
+static int i40iw_parse_mpa(struct i40iw_cm_node *cm_node, u8 *buffer, u32 *type, u32 len)
+{
+	struct ietf_mpa_v1 *mpa_frame;
+	struct ietf_mpa_v2 *mpa_v2_frame;
+	struct ietf_rtr_msg *rtr_msg;
+	int mpa_hdr_len;
+	int priv_data_len;
+
+	*type = I40IW_MPA_REQUEST_ACCEPT;
+
+	if (len < sizeof(struct ietf_mpa_v1)) {
+		i40iw_pr_err("ietf buffer small (%x)\n", len);
+		return -1;
+	}
+
+	mpa_frame = (struct ietf_mpa_v1 *)buffer;
+	mpa_hdr_len = sizeof(struct ietf_mpa_v1);
+	priv_data_len = ntohs(mpa_frame->priv_data_len);
+
+	if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) {
+		i40iw_pr_err("large pri_data %d\n", priv_data_len);
+		return -1;
+	}
+	if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) {
+		i40iw_pr_err("unsupported mpa rev = %d\n", mpa_frame->rev);
+		return -1;
+	}
+	if (mpa_frame->rev > cm_node->mpa_frame_rev) {
+		i40iw_pr_err("rev %d\n", mpa_frame->rev);
+		return -1;
+	}
+	cm_node->mpa_frame_rev = mpa_frame->rev;
+
+	if (cm_node->state != I40IW_CM_STATE_MPAREQ_SENT) {
+		if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) {
+			i40iw_pr_err("Unexpected MPA Key received\n");
+			return -1;
+		}
+	} else {
+		if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE)) {
+			i40iw_pr_err("Unexpected MPA Key received\n");
+			return -1;
+		}
+	}
+
+	if (priv_data_len + mpa_hdr_len > len) {
+		i40iw_pr_err("ietf buffer len(%x + %x != %x)\n",
+			     priv_data_len, mpa_hdr_len, len);
+		return -1;
+	}
+	if (len > MAX_CM_BUFFER) {
+		i40iw_pr_err("ietf buffer large len = %d\n", len);
+		return -1;
+	}
+
+	switch (mpa_frame->rev) {
+	case IETF_MPA_V2:{
+			u16 ird_size;
+			u16 ord_size;
+			u16 ctrl_ord;
+			u16 ctrl_ird;
+
+			mpa_v2_frame = (struct ietf_mpa_v2 *)buffer;
+			mpa_hdr_len += IETF_RTR_MSG_SIZE;
+			rtr_msg = &mpa_v2_frame->rtr_msg;
+
+			/* parse rtr message */
+			ctrl_ord = ntohs(rtr_msg->ctrl_ord);
+			ctrl_ird = ntohs(rtr_msg->ctrl_ird);
+			ird_size = ctrl_ird & IETF_NO_IRD_ORD;
+			ord_size = ctrl_ord & IETF_NO_IRD_ORD;
+
+			if (!(ctrl_ird & IETF_PEER_TO_PEER))
+				return -1;
+
+			if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD) {
+				cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD;
+				goto negotiate_done;
+			}
+
+			if (cm_node->state != I40IW_CM_STATE_MPAREQ_SENT) {
+				/* responder */
+				if (!ord_size && (ctrl_ord & IETF_RDMA0_READ))
+					cm_node->ird_size = 1;
+				if (cm_node->ord_size > ird_size)
+					cm_node->ord_size = ird_size;
+			} else {
+				/* initiator */
+				if (!ird_size && (ctrl_ord & IETF_RDMA0_READ))
+					return -1;
+				if (cm_node->ord_size > ird_size)
+					cm_node->ord_size = ird_size;
+
+				if (cm_node->ird_size < ord_size)
+					/* no resources available */
+					return -1;
+			}
+
+negotiate_done:
+			if (ctrl_ord & IETF_RDMA0_READ)
+				cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+			else if (ctrl_ord & IETF_RDMA0_WRITE)
+				cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO;
+			else	/* Not supported RDMA0 operation */
+				return -1;
+			i40iw_debug(cm_node->dev, I40IW_DEBUG_CM,
+				    "MPAV2: Negotiated ORD: %d, IRD: %d\n",
+				    cm_node->ord_size, cm_node->ird_size);
+			break;
+		}
+		break;
+	case IETF_MPA_V1:
+	default:
+		break;
+	}
+
+	memcpy(cm_node->pdata_buf, buffer + mpa_hdr_len, priv_data_len);
+	cm_node->pdata.size = priv_data_len;
+
+	if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT)
+		*type = I40IW_MPA_REQUEST_REJECT;
+
+	if (mpa_frame->flags & IETF_MPA_FLAGS_MARKERS)
+		cm_node->snd_mark_en = true;
+
+	return 0;
+}
+
+/**
+ * i40iw_schedule_cm_timer
+ * @@cm_node: connection's node
+ * @sqbuf: buffer to send
+ * @type: if it es send ot close
+ * @send_retrans: if rexmits to be done
+ * @close_when_complete: is cm_node to be removed
+ *
+ * note - cm_node needs to be protected before calling this. Encase in:
+ *		i40iw_rem_ref_cm_node(cm_core, cm_node);
+ *		i40iw_schedule_cm_timer(...)
+ *		atomic_inc(&cm_node->ref_count);
+ */
+int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node,
+			    struct i40iw_puda_buf *sqbuf,
+			    enum i40iw_timer_type type,
+			    int send_retrans,
+			    int close_when_complete)
+{
+	struct i40iw_sc_dev *dev = cm_node->dev;
+	struct i40iw_cm_core *cm_core = cm_node->cm_core;
+	struct i40iw_timer_entry *new_send;
+	int ret = 0;
+	u32 was_timer_set;
+	unsigned long flags;
+
+	new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
+	if (!new_send) {
+		i40iw_free_sqbuf(cm_node->dev, (void *)sqbuf);
+		return -ENOMEM;
+	}
+	new_send->retrycount = I40IW_DEFAULT_RETRYS;
+	new_send->retranscount = I40IW_DEFAULT_RETRANS;
+	new_send->sqbuf = sqbuf;
+	new_send->timetosend = jiffies;
+	new_send->type = type;
+	new_send->send_retrans = send_retrans;
+	new_send->close_when_complete = close_when_complete;
+
+	if (type == I40IW_TIMER_TYPE_CLOSE) {
+		new_send->timetosend += (HZ / 10);
+		if (cm_node->close_entry) {
+			kfree(new_send);
+			i40iw_free_sqbuf(cm_node->dev, (void *)sqbuf);
+			i40iw_pr_err("already close entry\n");
+			return -EINVAL;
+		}
+		cm_node->close_entry = new_send;
+	}
+
+	if (type == I40IW_TIMER_TYPE_SEND) {
+		spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+		cm_node->send_entry = new_send;
+		atomic_inc(&cm_node->ref_count);
+		spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+		new_send->timetosend = jiffies + I40IW_RETRY_TIMEOUT;
+
+		atomic_inc(&sqbuf->refcount);
+		i40iw_puda_send_buf(dev->ilq, sqbuf);
+		if (!send_retrans) {
+			i40iw_cleanup_retrans_entry(cm_node);
+			if (close_when_complete)
+				i40iw_rem_ref_cm_node(cm_node);
+			return ret;
+		}
+	}
+
+	spin_lock_irqsave(&cm_core->ht_lock, flags);
+	was_timer_set = timer_pending(&cm_core->tcp_timer);
+
+	if (!was_timer_set) {
+		cm_core->tcp_timer.expires = new_send->timetosend;
+		add_timer(&cm_core->tcp_timer);
+	}
+	spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+	return ret;
+}
+
+/**
+ * i40iw_retrans_expired - Could not rexmit the packet
+ * @cm_node: connection's node
+ */
+static void i40iw_retrans_expired(struct i40iw_cm_node *cm_node)
+{
+	struct iw_cm_id *cm_id = cm_node->cm_id;
+	enum i40iw_cm_node_state state = cm_node->state;
+
+	cm_node->state = I40IW_CM_STATE_CLOSED;
+	switch (state) {
+	case I40IW_CM_STATE_SYN_RCVD:
+	case I40IW_CM_STATE_CLOSING:
+		i40iw_rem_ref_cm_node(cm_node);
+		break;
+	case I40IW_CM_STATE_FIN_WAIT1:
+	case I40IW_CM_STATE_LAST_ACK:
+		if (cm_node->cm_id)
+			cm_id->rem_ref(cm_id);
+		i40iw_send_reset(cm_node);
+		break;
+	default:
+		atomic_inc(&cm_node->ref_count);
+		i40iw_send_reset(cm_node);
+		i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
+		break;
+	}
+}
+
+/**
+ * i40iw_handle_close_entry - for handling retry/timeouts
+ * @cm_node: connection's node
+ * @rem_node: flag for remove cm_node
+ */
+static void i40iw_handle_close_entry(struct i40iw_cm_node *cm_node, u32 rem_node)
+{
+	struct i40iw_timer_entry *close_entry = cm_node->close_entry;
+	struct iw_cm_id *cm_id = cm_node->cm_id;
+	struct i40iw_qp *iwqp;
+	unsigned long flags;
+
+	if (!close_entry)
+		return;
+	iwqp = (struct i40iw_qp *)close_entry->sqbuf;
+	if (iwqp) {
+		spin_lock_irqsave(&iwqp->lock, flags);
+		if (iwqp->cm_id) {
+			iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSED;
+			iwqp->hw_iwarp_state = I40IW_QP_STATE_ERROR;
+			iwqp->last_aeq = I40IW_AE_RESET_SENT;
+			iwqp->ibqp_state = IB_QPS_ERR;
+			spin_unlock_irqrestore(&iwqp->lock, flags);
+			i40iw_cm_disconn(iwqp);
+		} else {
+			spin_unlock_irqrestore(&iwqp->lock, flags);
+		}
+	} else if (rem_node) {
+		/* TIME_WAIT state */
+		i40iw_rem_ref_cm_node(cm_node);
+	}
+	if (cm_id)
+		cm_id->rem_ref(cm_id);
+	kfree(close_entry);
+	cm_node->close_entry = NULL;
+}
+
+/**
+ * i40iw_cm_timer_tick - system's timer expired callback
+ * @pass: Pointing to cm_core
+ */
+static void i40iw_cm_timer_tick(unsigned long pass)
+{
+	unsigned long nexttimeout = jiffies + I40IW_LONG_TIME;
+	struct i40iw_cm_node *cm_node;
+	struct i40iw_timer_entry *send_entry, *close_entry;
+	struct list_head *list_core_temp;
+	struct list_head *list_node;
+	struct i40iw_cm_core *cm_core = (struct i40iw_cm_core *)pass;
+	u32 settimer = 0;
+	unsigned long timetosend;
+	struct i40iw_sc_dev *dev;
+	unsigned long flags;
+
+	struct list_head timer_list;
+
+	INIT_LIST_HEAD(&timer_list);
+	spin_lock_irqsave(&cm_core->ht_lock, flags);
+
+	list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) {
+		cm_node = container_of(list_node, struct i40iw_cm_node, list);
+		if (cm_node->close_entry || cm_node->send_entry) {
+			atomic_inc(&cm_node->ref_count);
+			list_add(&cm_node->timer_entry, &timer_list);
+		}
+	}
+	spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+	list_for_each_safe(list_node, list_core_temp, &timer_list) {
+		cm_node = container_of(list_node,
+				       struct i40iw_cm_node,
+				       timer_entry);
+		close_entry = cm_node->close_entry;
+
+		if (close_entry) {
+			if (time_after(close_entry->timetosend, jiffies)) {
+				if (nexttimeout > close_entry->timetosend ||
+				    !settimer) {
+					nexttimeout = close_entry->timetosend;
+					settimer = 1;
+				}
+			} else {
+				i40iw_handle_close_entry(cm_node, 1);
+			}
+		}
+
+		spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+
+		send_entry = cm_node->send_entry;
+		if (!send_entry)
+			goto done;
+		if (time_after(send_entry->timetosend, jiffies)) {
+			if (cm_node->state != I40IW_CM_STATE_OFFLOADED) {
+				if ((nexttimeout > send_entry->timetosend) ||
+				    !settimer) {
+					nexttimeout = send_entry->timetosend;
+					settimer = 1;
+				}
+			} else {
+				i40iw_free_retrans_entry(cm_node);
+			}
+			goto done;
+		}
+
+		if ((cm_node->state == I40IW_CM_STATE_OFFLOADED) ||
+		    (cm_node->state == I40IW_CM_STATE_CLOSED)) {
+			i40iw_free_retrans_entry(cm_node);
+			goto done;
+		}
+
+		if (!send_entry->retranscount || !send_entry->retrycount) {
+			i40iw_free_retrans_entry(cm_node);
+
+			spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+			i40iw_retrans_expired(cm_node);
+			cm_node->state = I40IW_CM_STATE_CLOSED;
+			spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+			goto done;
+		}
+		cm_node->cm_core->stats_pkt_retrans++;
+		spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+
+		dev = cm_node->dev;
+		atomic_inc(&send_entry->sqbuf->refcount);
+		i40iw_puda_send_buf(dev->ilq, send_entry->sqbuf);
+		spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+		if (send_entry->send_retrans) {
+			send_entry->retranscount--;
+			timetosend = (I40IW_RETRY_TIMEOUT <<
+				      (I40IW_DEFAULT_RETRANS -
+				       send_entry->retranscount));
+
+			send_entry->timetosend = jiffies +
+			    min(timetosend, I40IW_MAX_TIMEOUT);
+			if (nexttimeout > send_entry->timetosend || !settimer) {
+				nexttimeout = send_entry->timetosend;
+				settimer = 1;
+			}
+		} else {
+			int close_when_complete;
+
+			close_when_complete = send_entry->close_when_complete;
+			i40iw_debug(cm_node->dev,
+				    I40IW_DEBUG_CM,
+				    "cm_node=%p state=%d\n",
+				    cm_node,
+				    cm_node->state);
+			i40iw_free_retrans_entry(cm_node);
+			if (close_when_complete)
+				i40iw_rem_ref_cm_node(cm_node);
+		}
+done:
+		spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+		i40iw_rem_ref_cm_node(cm_node);
+	}
+
+	if (settimer) {
+		spin_lock_irqsave(&cm_core->ht_lock, flags);
+		if (!timer_pending(&cm_core->tcp_timer)) {
+			cm_core->tcp_timer.expires = nexttimeout;
+			add_timer(&cm_core->tcp_timer);
+		}
+		spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+	}
+}
+
+/**
+ * i40iw_send_syn - send SYN packet
+ * @cm_node: connection's node
+ * @sendack: flag to set ACK bit or not
+ */
+int i40iw_send_syn(struct i40iw_cm_node *cm_node, u32 sendack)
+{
+	struct i40iw_puda_buf *sqbuf;
+	int flags = SET_SYN;
+	char optionsbuffer[sizeof(struct option_mss) +
+			   sizeof(struct option_windowscale) +
+			   sizeof(struct option_base) + TCP_OPTIONS_PADDING];
+	struct i40iw_kmem_info opts;
+
+	int optionssize = 0;
+	/* Sending MSS option */
+	union all_known_options *options;
+
+	opts.addr = optionsbuffer;
+	if (!cm_node) {
+		i40iw_pr_err("no cm_node\n");
+		return -EINVAL;
+	}
+
+	options = (union all_known_options *)&optionsbuffer[optionssize];
+	options->as_mss.optionnum = OPTION_NUMBER_MSS;
+	options->as_mss.length = sizeof(struct option_mss);
+	options->as_mss.mss = htons(cm_node->tcp_cntxt.mss);
+	optionssize += sizeof(struct option_mss);
+
+	options = (union all_known_options *)&optionsbuffer[optionssize];
+	options->as_windowscale.optionnum = OPTION_NUMBER_WINDOW_SCALE;
+	options->as_windowscale.length = sizeof(struct option_windowscale);
+	options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale;
+	optionssize += sizeof(struct option_windowscale);
+	options = (union all_known_options *)&optionsbuffer[optionssize];
+	options->as_end = OPTION_NUMBER_END;
+	optionssize += 1;
+
+	if (sendack)
+		flags |= SET_ACK;
+
+	opts.size = optionssize;
+
+	sqbuf = i40iw_form_cm_frame(cm_node, &opts, NULL, NULL, flags);
+	if (!sqbuf) {
+		i40iw_pr_err("no sqbuf\n");
+		return -1;
+	}
+	return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 1, 0);
+}
+
+/**
+ * i40iw_send_ack - Send ACK packet
+ * @cm_node: connection's node
+ */
+static void i40iw_send_ack(struct i40iw_cm_node *cm_node)
+{
+	struct i40iw_puda_buf *sqbuf;
+
+	sqbuf = i40iw_form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK);
+	if (sqbuf)
+		i40iw_puda_send_buf(cm_node->dev->ilq, sqbuf);
+	else
+		i40iw_pr_err("no sqbuf\n");
+}
+
+/**
+ * i40iw_send_fin - Send FIN pkt
+ * @cm_node: connection's node
+ */
+static int i40iw_send_fin(struct i40iw_cm_node *cm_node)
+{
+	struct i40iw_puda_buf *sqbuf;
+
+	sqbuf = i40iw_form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK | SET_FIN);
+	if (!sqbuf) {
+		i40iw_pr_err("no sqbuf\n");
+		return -1;
+	}
+	return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 1, 0);
+}
+
+/**
+ * i40iw_find_node - find a cm node that matches the reference cm node
+ * @cm_core: cm's core
+ * @rem_port: remote tcp port num
+ * @rem_addr: remote ip addr
+ * @loc_port: local tcp port num
+ * @loc_addr: loc ip addr
+ * @add_refcnt: flag to increment refcount of cm_node
+ */
+struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core,
+				      u16 rem_port,
+				      u32 *rem_addr,
+				      u16 loc_port,
+				      u32 *loc_addr,
+				      bool add_refcnt)
+{
+	struct list_head *hte;
+	struct i40iw_cm_node *cm_node;
+	unsigned long flags;
+
+	hte = &cm_core->connected_nodes;
+
+	/* walk list and find cm_node associated with this session ID */
+	spin_lock_irqsave(&cm_core->ht_lock, flags);
+	list_for_each_entry(cm_node, hte, list) {
+		if (!memcmp(cm_node->loc_addr, loc_addr, sizeof(cm_node->loc_addr)) &&
+		    (cm_node->loc_port == loc_port) &&
+		    !memcmp(cm_node->rem_addr, rem_addr, sizeof(cm_node->rem_addr)) &&
+		    (cm_node->rem_port == rem_port)) {
+			if (add_refcnt)
+				atomic_inc(&cm_node->ref_count);
+			spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+			return cm_node;
+		}
+	}
+	spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+	/* no owner node */
+	return NULL;
+}
+
+/**
+ * i40iw_find_listener - find a cm node listening on this addr-port pair
+ * @cm_core: cm's core
+ * @dst_port: listener tcp port num
+ * @dst_addr: listener ip addr
+ * @listener_state: state to match with listen node's
+ */
+static struct i40iw_cm_listener *i40iw_find_listener(
+						     struct i40iw_cm_core *cm_core,
+						     u32 *dst_addr,
+						     u16 dst_port,
+						     u16 vlan_id,
+						     enum i40iw_cm_listener_state
+						     listener_state)
+{
+	struct i40iw_cm_listener *listen_node;
+	static const u32 ip_zero[4] = { 0, 0, 0, 0 };
+	u32 listen_addr[4];
+	u16 listen_port;
+	unsigned long flags;
+
+	/* walk list and find cm_node associated with this session ID */
+	spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+	list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
+		memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr));
+		listen_port = listen_node->loc_port;
+		/* compare node pair, return node handle if a match */
+		if ((!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) ||
+		     !memcmp(listen_addr, ip_zero, sizeof(listen_addr))) &&
+		     (listen_port == dst_port) &&
+		     (listener_state & listen_node->listener_state)) {
+			atomic_inc(&listen_node->ref_count);
+			spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+			return listen_node;
+		}
+	}
+	spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+	return NULL;
+}
+
+/**
+ * i40iw_add_hte_node - add a cm node to the hash table
+ * @cm_core: cm's core
+ * @cm_node: connection's node
+ */
+static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core,
+			       struct i40iw_cm_node *cm_node)
+{
+	struct list_head *hte;
+	unsigned long flags;
+
+	if (!cm_node || !cm_core) {
+		i40iw_pr_err("cm_node or cm_core == NULL\n");
+		return;
+	}
+	spin_lock_irqsave(&cm_core->ht_lock, flags);
+
+	/* get a handle on the hash table element (list head for this slot) */
+	hte = &cm_core->connected_nodes;
+	list_add_tail(&cm_node->list, hte);
+	spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+}
+
+/**
+ * listen_port_in_use - determine if port is in use
+ * @port: Listen port number
+ */
+static bool i40iw_listen_port_in_use(struct i40iw_cm_core *cm_core, u16 port)
+{
+	struct i40iw_cm_listener *listen_node;
+	unsigned long flags;
+	bool ret = false;
+
+	spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+	list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
+		if (listen_node->loc_port == port) {
+			ret = true;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+	return ret;
+}
+
+/**
+ * i40iw_del_multiple_qhash - Remove qhash and child listens
+ * @iwdev: iWarp device
+ * @cm_info: CM info for parent listen node
+ * @cm_parent_listen_node: The parent listen node
+ */
+static enum i40iw_status_code i40iw_del_multiple_qhash(
+						       struct i40iw_device *iwdev,
+						       struct i40iw_cm_info *cm_info,
+						       struct i40iw_cm_listener *cm_parent_listen_node)
+{
+	struct i40iw_cm_listener *child_listen_node;
+	enum i40iw_status_code ret = I40IW_ERR_CONFIG;
+	struct list_head *pos, *tpos;
+	unsigned long flags;
+
+	spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
+	list_for_each_safe(pos, tpos, &cm_parent_listen_node->child_listen_list) {
+		child_listen_node = list_entry(pos, struct i40iw_cm_listener, child_listen_list);
+		if (child_listen_node->ipv4)
+			i40iw_debug(&iwdev->sc_dev,
+				    I40IW_DEBUG_CM,
+				    "removing child listen for IP=%pI4, port=%d, vlan=%d\n",
+				    child_listen_node->loc_addr,
+				    child_listen_node->loc_port,
+				    child_listen_node->vlan_id);
+		else
+			i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM,
+				    "removing child listen for IP=%pI6, port=%d, vlan=%d\n",
+				    child_listen_node->loc_addr,
+				    child_listen_node->loc_port,
+				    child_listen_node->vlan_id);
+		list_del(pos);
+		memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
+		       sizeof(cm_info->loc_addr));
+		cm_info->vlan_id = child_listen_node->vlan_id;
+		ret = i40iw_manage_qhash(iwdev, cm_info,
+					 I40IW_QHASH_TYPE_TCP_SYN,
+					 I40IW_QHASH_MANAGE_TYPE_DELETE, NULL, false);
+		kfree(child_listen_node);
+		cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++;
+		i40iw_debug(&iwdev->sc_dev,
+			    I40IW_DEBUG_CM,
+			    "freed pointer = %p\n",
+			    child_listen_node);
+	}
+	spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
+
+	return ret;
+}
+
+/**
+ * i40iw_netdev_vlan_ipv6 - Gets the netdev and mac
+ * @addr: local IPv6 address
+ * @vlan_id: vlan id for the given IPv6 address
+ * @mac: mac address for the given IPv6 address
+ *
+ * Returns the net_device of the IPv6 address and also sets the
+ * vlan id and mac for that address.
+ */
+static struct net_device *i40iw_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac)
+{
+	struct net_device *ip_dev = NULL;
+#if IS_ENABLED(CONFIG_IPV6)
+	struct in6_addr laddr6;
+
+	i40iw_copy_ip_htonl(laddr6.in6_u.u6_addr32, addr);
+	if (vlan_id)
+		*vlan_id = I40IW_NO_VLAN;
+	if (mac)
+		eth_zero_addr(mac);
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, ip_dev) {
+		if (ipv6_chk_addr(&init_net, &laddr6, ip_dev, 1)) {
+			if (vlan_id)
+				*vlan_id = rdma_vlan_dev_vlan_id(ip_dev);
+			if (ip_dev->dev_addr && mac)
+				ether_addr_copy(mac, ip_dev->dev_addr);
+			break;
+		}
+	}
+	rcu_read_unlock();
+#endif
+	return ip_dev;
+}
+
+/**
+ * i40iw_get_vlan_ipv4 - Returns the vlan_id for IPv4 address
+ * @addr: local IPv4 address
+ */
+static u16 i40iw_get_vlan_ipv4(u32 *addr)
+{
+	struct net_device *netdev;
+	u16 vlan_id = I40IW_NO_VLAN;
+
+	netdev = ip_dev_find(&init_net, htonl(addr[0]));
+	if (netdev) {
+		vlan_id = rdma_vlan_dev_vlan_id(netdev);
+		dev_put(netdev);
+	}
+	return vlan_id;
+}
+
+/**
+ * i40iw_add_mqh_6 - Adds multiple qhashes for IPv6
+ * @iwdev: iWarp device
+ * @cm_info: CM info for parent listen node
+ * @cm_parent_listen_node: The parent listen node
+ *
+ * Adds a qhash and a child listen node for every IPv6 address
+ * on the adapter and adds the associated qhash filter
+ */
+static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev,
+					      struct i40iw_cm_info *cm_info,
+					      struct i40iw_cm_listener *cm_parent_listen_node)
+{
+	struct net_device *ip_dev;
+	struct inet6_dev *idev;
+	struct inet6_ifaddr *ifp;
+	enum i40iw_status_code ret = 0;
+	struct i40iw_cm_listener *child_listen_node;
+	unsigned long flags;
+
+	rtnl_lock();
+	for_each_netdev_rcu(&init_net, ip_dev) {
+		if ((((rdma_vlan_dev_vlan_id(ip_dev) < I40IW_NO_VLAN) &&
+		      (rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) ||
+		     (ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) {
+			idev = __in6_dev_get(ip_dev);
+			if (!idev) {
+				i40iw_pr_err("idev == NULL\n");
+				break;
+			}
+			list_for_each_entry(ifp, &idev->addr_list, if_list) {
+				i40iw_debug(&iwdev->sc_dev,
+					    I40IW_DEBUG_CM,
+					    "IP=%pI6, vlan_id=%d, MAC=%pM\n",
+					    &ifp->addr,
+					    rdma_vlan_dev_vlan_id(ip_dev),
+					    ip_dev->dev_addr);
+				child_listen_node =
+					kzalloc(sizeof(*child_listen_node), GFP_ATOMIC);
+				i40iw_debug(&iwdev->sc_dev,
+					    I40IW_DEBUG_CM,
+					    "Allocating child listener %p\n",
+					    child_listen_node);
+				if (!child_listen_node) {
+					i40iw_pr_err("listener memory allocation\n");
+					ret = I40IW_ERR_NO_MEMORY;
+					goto exit;
+				}
+				cm_info->vlan_id = rdma_vlan_dev_vlan_id(ip_dev);
+				cm_parent_listen_node->vlan_id = cm_info->vlan_id;
+
+				memcpy(child_listen_node, cm_parent_listen_node,
+				       sizeof(*child_listen_node));
+
+				i40iw_copy_ip_ntohl(child_listen_node->loc_addr,
+						    ifp->addr.in6_u.u6_addr32);
+				memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
+				       sizeof(cm_info->loc_addr));
+
+				ret = i40iw_manage_qhash(iwdev, cm_info,
+							 I40IW_QHASH_TYPE_TCP_SYN,
+							 I40IW_QHASH_MANAGE_TYPE_ADD,
+							 NULL, true);
+				if (!ret) {
+					spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
+					list_add(&child_listen_node->child_listen_list,
+						 &cm_parent_listen_node->child_listen_list);
+					spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
+					cm_parent_listen_node->cm_core->stats_listen_nodes_created++;
+				} else {
+					kfree(child_listen_node);
+				}
+			}
+		}
+	}
+exit:
+	rtnl_unlock();
+	return ret;
+}
+
+/**
+ * i40iw_add_mqh_4 - Adds multiple qhashes for IPv4
+ * @iwdev: iWarp device
+ * @cm_info: CM info for parent listen node
+ * @cm_parent_listen_node: The parent listen node
+ *
+ * Adds a qhash and a child listen node for every IPv4 address
+ * on the adapter and adds the associated qhash filter
+ */
+static enum i40iw_status_code i40iw_add_mqh_4(
+				struct i40iw_device *iwdev,
+				struct i40iw_cm_info *cm_info,
+				struct i40iw_cm_listener *cm_parent_listen_node)
+{
+	struct net_device *dev;
+	struct in_device *idev;
+	struct i40iw_cm_listener *child_listen_node;
+	enum i40iw_status_code ret = 0;
+	unsigned long flags;
+
+	rtnl_lock();
+	for_each_netdev(&init_net, dev) {
+		if ((((rdma_vlan_dev_vlan_id(dev) < I40IW_NO_VLAN) &&
+		      (rdma_vlan_dev_real_dev(dev) == iwdev->netdev)) ||
+		    (dev == iwdev->netdev)) && (dev->flags & IFF_UP)) {
+			idev = in_dev_get(dev);
+			for_ifa(idev) {
+				i40iw_debug(&iwdev->sc_dev,
+					    I40IW_DEBUG_CM,
+					    "Allocating child CM Listener forIP=%pI4, vlan_id=%d, MAC=%pM\n",
+					    &ifa->ifa_address,
+					    rdma_vlan_dev_vlan_id(dev),
+					    dev->dev_addr);
+				child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_ATOMIC);
+				cm_parent_listen_node->cm_core->stats_listen_nodes_created++;
+				i40iw_debug(&iwdev->sc_dev,
+					    I40IW_DEBUG_CM,
+					    "Allocating child listener %p\n",
+					    child_listen_node);
+				if (!child_listen_node) {
+					i40iw_pr_err("listener memory allocation\n");
+					in_dev_put(idev);
+					ret = I40IW_ERR_NO_MEMORY;
+					goto exit;
+				}
+				cm_info->vlan_id = rdma_vlan_dev_vlan_id(dev);
+				cm_parent_listen_node->vlan_id = cm_info->vlan_id;
+				memcpy(child_listen_node,
+				       cm_parent_listen_node,
+				       sizeof(*child_listen_node));
+
+				child_listen_node->loc_addr[0] = ntohl(ifa->ifa_address);
+				memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
+				       sizeof(cm_info->loc_addr));
+
+				ret = i40iw_manage_qhash(iwdev,
+							 cm_info,
+							 I40IW_QHASH_TYPE_TCP_SYN,
+							 I40IW_QHASH_MANAGE_TYPE_ADD,
+							 NULL,
+							 true);
+				if (!ret) {
+					spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
+					list_add(&child_listen_node->child_listen_list,
+						 &cm_parent_listen_node->child_listen_list);
+					spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
+				} else {
+					kfree(child_listen_node);
+					cm_parent_listen_node->cm_core->stats_listen_nodes_created--;
+				}
+			}
+			endfor_ifa(idev);
+			in_dev_put(idev);
+		}
+	}
+exit:
+	rtnl_unlock();
+	return ret;
+}
+
+/**
+ * i40iw_dec_refcnt_listen - delete listener and associated cm nodes
+ * @cm_core: cm's core
+ * @free_hanging_nodes: to free associated cm_nodes
+ * @apbvt_del: flag to delete the apbvt
+ */
+static int i40iw_dec_refcnt_listen(struct i40iw_cm_core *cm_core,
+				   struct i40iw_cm_listener *listener,
+				   int free_hanging_nodes, bool apbvt_del)
+{
+	int ret = -EINVAL;
+	int err = 0;
+	struct list_head *list_pos;
+	struct list_head *list_temp;
+	struct i40iw_cm_node *cm_node;
+	struct list_head reset_list;
+	struct i40iw_cm_info nfo;
+	struct i40iw_cm_node *loopback;
+	enum i40iw_cm_node_state old_state;
+	unsigned long flags;
+
+	/* free non-accelerated child nodes for this listener */
+	INIT_LIST_HEAD(&reset_list);
+	if (free_hanging_nodes) {
+		spin_lock_irqsave(&cm_core->ht_lock, flags);
+		list_for_each_safe(list_pos, list_temp, &cm_core->connected_nodes) {
+			cm_node = container_of(list_pos, struct i40iw_cm_node, list);
+			if ((cm_node->listener == listener) && !cm_node->accelerated) {
+				atomic_inc(&cm_node->ref_count);
+				list_add(&cm_node->reset_entry, &reset_list);
+			}
+		}
+		spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+	}
+
+	list_for_each_safe(list_pos, list_temp, &reset_list) {
+		cm_node = container_of(list_pos, struct i40iw_cm_node, reset_entry);
+		loopback = cm_node->loopbackpartner;
+		if (cm_node->state >= I40IW_CM_STATE_FIN_WAIT1) {
+			i40iw_rem_ref_cm_node(cm_node);
+		} else {
+			if (!loopback) {
+				i40iw_cleanup_retrans_entry(cm_node);
+				err = i40iw_send_reset(cm_node);
+				if (err) {
+					cm_node->state = I40IW_CM_STATE_CLOSED;
+					i40iw_pr_err("send reset\n");
+				} else {
+					old_state = cm_node->state;
+					cm_node->state = I40IW_CM_STATE_LISTENER_DESTROYED;
+					if (old_state != I40IW_CM_STATE_MPAREQ_RCVD)
+						i40iw_rem_ref_cm_node(cm_node);
+				}
+			} else {
+				struct i40iw_cm_event event;
+
+				event.cm_node = loopback;
+				memcpy(event.cm_info.rem_addr,
+				       loopback->rem_addr, sizeof(event.cm_info.rem_addr));
+				memcpy(event.cm_info.loc_addr,
+				       loopback->loc_addr, sizeof(event.cm_info.loc_addr));
+				event.cm_info.rem_port = loopback->rem_port;
+				event.cm_info.loc_port = loopback->loc_port;
+				event.cm_info.cm_id = loopback->cm_id;
+				event.cm_info.ipv4 = loopback->ipv4;
+				atomic_inc(&loopback->ref_count);
+				loopback->state = I40IW_CM_STATE_CLOSED;
+				i40iw_event_connect_error(&event);
+				cm_node->state = I40IW_CM_STATE_LISTENER_DESTROYED;
+				i40iw_rem_ref_cm_node(cm_node);
+			}
+		}
+	}
+
+	if (!atomic_dec_return(&listener->ref_count)) {
+		spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+		list_del(&listener->list);
+		spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
+		if (listener->iwdev) {
+			if (apbvt_del && !i40iw_listen_port_in_use(cm_core, listener->loc_port))
+				i40iw_manage_apbvt(listener->iwdev,
+						   listener->loc_port,
+						   I40IW_MANAGE_APBVT_DEL);
+
+			memcpy(nfo.loc_addr, listener->loc_addr, sizeof(nfo.loc_addr));
+			nfo.loc_port = listener->loc_port;
+			nfo.ipv4 = listener->ipv4;
+			nfo.vlan_id = listener->vlan_id;
+
+			if (!list_empty(&listener->child_listen_list)) {
+				i40iw_del_multiple_qhash(listener->iwdev, &nfo, listener);
+			} else {
+				if (listener->qhash_set)
+					i40iw_manage_qhash(listener->iwdev,
+							   &nfo,
+							   I40IW_QHASH_TYPE_TCP_SYN,
+							   I40IW_QHASH_MANAGE_TYPE_DELETE,
+							   NULL,
+							   false);
+			}
+		}
+
+		cm_core->stats_listen_destroyed++;
+		kfree(listener);
+		cm_core->stats_listen_nodes_destroyed++;
+		listener = NULL;
+		ret = 0;
+	}
+
+	if (listener) {
+		if (atomic_read(&listener->pend_accepts_cnt) > 0)
+			i40iw_debug(cm_core->dev,
+				    I40IW_DEBUG_CM,
+				    "%s: listener (%p) pending accepts=%u\n",
+				    __func__,
+				    listener,
+				    atomic_read(&listener->pend_accepts_cnt));
+	}
+
+	return ret;
+}
+
+/**
+ * i40iw_cm_del_listen - delete a linstener
+ * @cm_core: cm's core
+  * @listener: passive connection's listener
+ * @apbvt_del: flag to delete apbvt
+ */
+static int i40iw_cm_del_listen(struct i40iw_cm_core *cm_core,
+			       struct i40iw_cm_listener *listener,
+			       bool apbvt_del)
+{
+	listener->listener_state = I40IW_CM_LISTENER_PASSIVE_STATE;
+	listener->cm_id = NULL;	/* going to be destroyed pretty soon */
+	return i40iw_dec_refcnt_listen(cm_core, listener, 1, apbvt_del);
+}
+
+/**
+ * i40iw_addr_resolve_neigh - resolve neighbor address
+ * @iwdev: iwarp device structure
+ * @src_ip: local ip address
+ * @dst_ip: remote ip address
+ * @arpindex: if there is an arp entry
+ */
+static int i40iw_addr_resolve_neigh(struct i40iw_device *iwdev,
+				    u32 src_ip,
+				    u32 dst_ip,
+				    int arpindex)
+{
+	struct rtable *rt;
+	struct neighbour *neigh;
+	int rc = arpindex;
+	struct net_device *netdev = iwdev->netdev;
+	__be32 dst_ipaddr = htonl(dst_ip);
+	__be32 src_ipaddr = htonl(src_ip);
+
+	rt = ip_route_output(&init_net, dst_ipaddr, src_ipaddr, 0, 0);
+	if (IS_ERR(rt)) {
+		i40iw_pr_err("ip_route_output\n");
+		return rc;
+	}
+
+	if (netif_is_bond_slave(netdev))
+		netdev = netdev_master_upper_dev_get(netdev);
+
+	neigh = dst_neigh_lookup(&rt->dst, &dst_ipaddr);
+
+	rcu_read_lock();
+	if (neigh) {
+		if (neigh->nud_state & NUD_VALID) {
+			if (arpindex >= 0) {
+				if (ether_addr_equal(iwdev->arp_table[arpindex].mac_addr,
+						     neigh->ha))
+					/* Mac address same as arp table */
+					goto resolve_neigh_exit;
+				i40iw_manage_arp_cache(iwdev,
+						       iwdev->arp_table[arpindex].mac_addr,
+						       &dst_ip,
+						       true,
+						       I40IW_ARP_DELETE);
+			}
+
+			i40iw_manage_arp_cache(iwdev, neigh->ha, &dst_ip, true, I40IW_ARP_ADD);
+			rc = i40iw_arp_table(iwdev, &dst_ip, true, NULL, I40IW_ARP_RESOLVE);
+		} else {
+			neigh_event_send(neigh, NULL);
+		}
+	}
+ resolve_neigh_exit:
+
+	rcu_read_unlock();
+	if (neigh)
+		neigh_release(neigh);
+
+	ip_rt_put(rt);
+	return rc;
+}
+
+/**
+ * i40iw_get_dst_ipv6
+ */
+static struct dst_entry *i40iw_get_dst_ipv6(struct sockaddr_in6 *src_addr,
+					    struct sockaddr_in6 *dst_addr)
+{
+	struct dst_entry *dst;
+	struct flowi6 fl6;
+
+	memset(&fl6, 0, sizeof(fl6));
+	fl6.daddr = dst_addr->sin6_addr;
+	fl6.saddr = src_addr->sin6_addr;
+	if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
+		fl6.flowi6_oif = dst_addr->sin6_scope_id;
+
+	dst = ip6_route_output(&init_net, NULL, &fl6);
+	return dst;
+}
+
+/**
+ * i40iw_addr_resolve_neigh_ipv6 - resolve neighbor ipv6 address
+ * @iwdev: iwarp device structure
+ * @dst_ip: remote ip address
+ * @arpindex: if there is an arp entry
+ */
+static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev,
+					 u32 *src,
+					 u32 *dest,
+					 int arpindex)
+{
+	struct neighbour *neigh;
+	int rc = arpindex;
+	struct net_device *netdev = iwdev->netdev;
+	struct dst_entry *dst;
+	struct sockaddr_in6 dst_addr;
+	struct sockaddr_in6 src_addr;
+
+	memset(&dst_addr, 0, sizeof(dst_addr));
+	dst_addr.sin6_family = AF_INET6;
+	i40iw_copy_ip_htonl(dst_addr.sin6_addr.in6_u.u6_addr32, dest);
+	memset(&src_addr, 0, sizeof(src_addr));
+	src_addr.sin6_family = AF_INET6;
+	i40iw_copy_ip_htonl(src_addr.sin6_addr.in6_u.u6_addr32, src);
+	dst = i40iw_get_dst_ipv6(&src_addr, &dst_addr);
+	if (!dst || dst->error) {
+		if (dst) {
+			dst_release(dst);
+			i40iw_pr_err("ip6_route_output returned dst->error = %d\n",
+				     dst->error);
+		}
+		return rc;
+	}
+
+	if (netif_is_bond_slave(netdev))
+		netdev = netdev_master_upper_dev_get(netdev);
+
+	neigh = dst_neigh_lookup(dst, &dst_addr);
+
+	rcu_read_lock();
+	if (neigh) {
+		i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, "dst_neigh_lookup MAC=%pM\n", neigh->ha);
+		if (neigh->nud_state & NUD_VALID) {
+			if (arpindex >= 0) {
+				if (ether_addr_equal
+				    (iwdev->arp_table[arpindex].mac_addr,
+				     neigh->ha)) {
+					/* Mac address same as in arp table */
+					goto resolve_neigh_exit6;
+				}
+				i40iw_manage_arp_cache(iwdev,
+						       iwdev->arp_table[arpindex].mac_addr,
+						       dest,
+						       false,
+						       I40IW_ARP_DELETE);
+			}
+			i40iw_manage_arp_cache(iwdev,
+					       neigh->ha,
+					       dest,
+					       false,
+					       I40IW_ARP_ADD);
+			rc = i40iw_arp_table(iwdev,
+					     dest,
+					     false,
+					     NULL,
+					     I40IW_ARP_RESOLVE);
+		} else {
+			neigh_event_send(neigh, NULL);
+		}
+	}
+
+ resolve_neigh_exit6:
+	rcu_read_unlock();
+	if (neigh)
+		neigh_release(neigh);
+	dst_release(dst);
+	return rc;
+}
+
+/**
+ * i40iw_ipv4_is_loopback - check if loopback
+ * @loc_addr: local addr to compare
+ * @rem_addr: remote address
+ */
+static bool i40iw_ipv4_is_loopback(u32 loc_addr, u32 rem_addr)
+{
+	return ipv4_is_loopback(htonl(rem_addr)) || (loc_addr == rem_addr);
+}
+
+/**
+ * i40iw_ipv6_is_loopback - check if loopback
+ * @loc_addr: local addr to compare
+ * @rem_addr: remote address
+ */
+static bool i40iw_ipv6_is_loopback(u32 *loc_addr, u32 *rem_addr)
+{
+	struct in6_addr raddr6;
+
+	i40iw_copy_ip_htonl(raddr6.in6_u.u6_addr32, rem_addr);
+	return (!memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6));
+}
+
+/**
+ * i40iw_make_cm_node - create a new instance of a cm node
+ * @cm_core: cm's core
+ * @iwdev: iwarp device structure
+ * @cm_info: quad info for connection
+ * @listener: passive connection's listener
+ */
+static struct i40iw_cm_node *i40iw_make_cm_node(
+				   struct i40iw_cm_core *cm_core,
+				   struct i40iw_device *iwdev,
+				   struct i40iw_cm_info *cm_info,
+				   struct i40iw_cm_listener *listener)
+{
+	struct i40iw_cm_node *cm_node;
+	struct timespec ts;
+	int oldarpindex;
+	int arpindex;
+	struct net_device *netdev = iwdev->netdev;
+
+	/* create an hte and cm_node for this instance */
+	cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC);
+	if (!cm_node)
+		return NULL;
+
+	/* set our node specific transport info */
+	cm_node->ipv4 = cm_info->ipv4;
+	cm_node->vlan_id = cm_info->vlan_id;
+	memcpy(cm_node->loc_addr, cm_info->loc_addr, sizeof(cm_node->loc_addr));
+	memcpy(cm_node->rem_addr, cm_info->rem_addr, sizeof(cm_node->rem_addr));
+	cm_node->loc_port = cm_info->loc_port;
+	cm_node->rem_port = cm_info->rem_port;
+
+	cm_node->mpa_frame_rev = iwdev->mpa_version;
+	cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+	cm_node->ird_size = I40IW_MAX_IRD_SIZE;
+	cm_node->ord_size = I40IW_MAX_ORD_SIZE;
+
+	cm_node->listener = listener;
+	cm_node->cm_id = cm_info->cm_id;
+	ether_addr_copy(cm_node->loc_mac, netdev->dev_addr);
+	spin_lock_init(&cm_node->retrans_list_lock);
+
+	atomic_set(&cm_node->ref_count, 1);
+	/* associate our parent CM core */
+	cm_node->cm_core = cm_core;
+	cm_node->tcp_cntxt.loc_id = I40IW_CM_DEF_LOCAL_ID;
+	cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE;
+	cm_node->tcp_cntxt.rcv_wnd =
+			I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE;
+	ts = current_kernel_time();
+	cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec);
+	cm_node->tcp_cntxt.mss = iwdev->mss;
+
+	cm_node->iwdev = iwdev;
+	cm_node->dev = &iwdev->sc_dev;
+
+	if ((cm_node->ipv4 &&
+	     i40iw_ipv4_is_loopback(cm_node->loc_addr[0], cm_node->rem_addr[0])) ||
+	     (!cm_node->ipv4 && i40iw_ipv6_is_loopback(cm_node->loc_addr,
+						       cm_node->rem_addr))) {
+		arpindex = i40iw_arp_table(iwdev,
+					   cm_node->rem_addr,
+					   false,
+					   NULL,
+					   I40IW_ARP_RESOLVE);
+	} else {
+		oldarpindex = i40iw_arp_table(iwdev,
+					      cm_node->rem_addr,
+					      false,
+					      NULL,
+					      I40IW_ARP_RESOLVE);
+		if (cm_node->ipv4)
+			arpindex = i40iw_addr_resolve_neigh(iwdev,
+							    cm_info->loc_addr[0],
+							    cm_info->rem_addr[0],
+							    oldarpindex);
+		else if (IS_ENABLED(CONFIG_IPV6))
+			arpindex = i40iw_addr_resolve_neigh_ipv6(iwdev,
+								 cm_info->loc_addr,
+								 cm_info->rem_addr,
+								 oldarpindex);
+		else
+			arpindex = -EINVAL;
+	}
+	if (arpindex < 0) {
+		i40iw_pr_err("cm_node arpindex\n");
+		kfree(cm_node);
+		return NULL;
+	}
+	ether_addr_copy(cm_node->rem_mac, iwdev->arp_table[arpindex].mac_addr);
+	i40iw_add_hte_node(cm_core, cm_node);
+	cm_core->stats_nodes_created++;
+	return cm_node;
+}
+
+/**
+ * i40iw_rem_ref_cm_node - destroy an instance of a cm node
+ * @cm_node: connection's node
+ */
+static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node)
+{
+	struct i40iw_cm_core *cm_core = cm_node->cm_core;
+	struct i40iw_qp *iwqp;
+	struct i40iw_cm_info nfo;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cm_node->cm_core->ht_lock, flags);
+	if (atomic_dec_return(&cm_node->ref_count)) {
+		spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags);
+		return;
+	}
+	list_del(&cm_node->list);
+	spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags);
+
+	/* if the node is destroyed before connection was accelerated */
+	if (!cm_node->accelerated && cm_node->accept_pend) {
+		pr_err("node destroyed before established\n");
+		atomic_dec(&cm_node->listener->pend_accepts_cnt);
+	}
+	if (cm_node->close_entry)
+		i40iw_handle_close_entry(cm_node, 0);
+	if (cm_node->listener) {
+		i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
+	} else {
+		if (!i40iw_listen_port_in_use(cm_core, htons(cm_node->loc_port)) &&
+		    cm_node->apbvt_set && cm_node->iwdev) {
+			i40iw_manage_apbvt(cm_node->iwdev,
+					   cm_node->loc_port,
+					   I40IW_MANAGE_APBVT_DEL);
+			i40iw_get_addr_info(cm_node, &nfo);
+			if (cm_node->qhash_set) {
+				i40iw_manage_qhash(cm_node->iwdev,
+						   &nfo,
+						   I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+						   I40IW_QHASH_MANAGE_TYPE_DELETE,
+						   NULL,
+						   false);
+				cm_node->qhash_set = 0;
+			}
+		}
+	}
+
+	iwqp = cm_node->iwqp;
+	if (iwqp) {
+		iwqp->cm_node = NULL;
+		i40iw_rem_ref(&iwqp->ibqp);
+		cm_node->iwqp = NULL;
+	} else if (cm_node->qhash_set) {
+		i40iw_get_addr_info(cm_node, &nfo);
+		i40iw_manage_qhash(cm_node->iwdev,
+				   &nfo,
+				   I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+				   I40IW_QHASH_MANAGE_TYPE_DELETE,
+				   NULL,
+				   false);
+		cm_node->qhash_set = 0;
+	}
+
+	cm_node->cm_core->stats_nodes_destroyed++;
+	kfree(cm_node);
+}
+
+/**
+ * i40iw_handle_fin_pkt - FIN packet received
+ * @cm_node: connection's node
+ */
+static void i40iw_handle_fin_pkt(struct i40iw_cm_node *cm_node)
+{
+	u32 ret;
+
+	switch (cm_node->state) {
+	case I40IW_CM_STATE_SYN_RCVD:
+	case I40IW_CM_STATE_SYN_SENT:
+	case I40IW_CM_STATE_ESTABLISHED:
+	case I40IW_CM_STATE_MPAREJ_RCVD:
+		cm_node->tcp_cntxt.rcv_nxt++;
+		i40iw_cleanup_retrans_entry(cm_node);
+		cm_node->state = I40IW_CM_STATE_LAST_ACK;
+		i40iw_send_fin(cm_node);
+		break;
+	case I40IW_CM_STATE_MPAREQ_SENT:
+		i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
+		cm_node->tcp_cntxt.rcv_nxt++;
+		i40iw_cleanup_retrans_entry(cm_node);
+		cm_node->state = I40IW_CM_STATE_CLOSED;
+		atomic_inc(&cm_node->ref_count);
+		i40iw_send_reset(cm_node);
+		break;
+	case I40IW_CM_STATE_FIN_WAIT1:
+		cm_node->tcp_cntxt.rcv_nxt++;
+		i40iw_cleanup_retrans_entry(cm_node);
+		cm_node->state = I40IW_CM_STATE_CLOSING;
+		i40iw_send_ack(cm_node);
+		/*
+		 * Wait for ACK as this is simultaneous close.
+		 * After we receive ACK, do not send anything.
+		 * Just rm the node.
+		 */
+		break;
+	case I40IW_CM_STATE_FIN_WAIT2:
+		cm_node->tcp_cntxt.rcv_nxt++;
+		i40iw_cleanup_retrans_entry(cm_node);
+		cm_node->state = I40IW_CM_STATE_TIME_WAIT;
+		i40iw_send_ack(cm_node);
+		ret =
+		    i40iw_schedule_cm_timer(cm_node, NULL, I40IW_TIMER_TYPE_CLOSE, 1, 0);
+		if (ret)
+			i40iw_pr_err("node %p state = %d\n", cm_node, cm_node->state);
+		break;
+	case I40IW_CM_STATE_TIME_WAIT:
+		cm_node->tcp_cntxt.rcv_nxt++;
+		i40iw_cleanup_retrans_entry(cm_node);
+		cm_node->state = I40IW_CM_STATE_CLOSED;
+		i40iw_rem_ref_cm_node(cm_node);
+		break;
+	case I40IW_CM_STATE_OFFLOADED:
+	default:
+		i40iw_pr_err("bad state node %p state = %d\n", cm_node, cm_node->state);
+		break;
+	}
+}
+
+/**
+ * i40iw_handle_rst_pkt - process received RST packet
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void i40iw_handle_rst_pkt(struct i40iw_cm_node *cm_node,
+				 struct i40iw_puda_buf *rbuf)
+{
+	i40iw_cleanup_retrans_entry(cm_node);
+	switch (cm_node->state) {
+	case I40IW_CM_STATE_SYN_SENT:
+	case I40IW_CM_STATE_MPAREQ_SENT:
+		switch (cm_node->mpa_frame_rev) {
+		case IETF_MPA_V2:
+			cm_node->mpa_frame_rev = IETF_MPA_V1;
+			/* send a syn and goto syn sent state */
+			cm_node->state = I40IW_CM_STATE_SYN_SENT;
+			if (i40iw_send_syn(cm_node, 0))
+				i40iw_active_open_err(cm_node, false);
+			break;
+		case IETF_MPA_V1:
+		default:
+			i40iw_active_open_err(cm_node, false);
+			break;
+		}
+		break;
+	case I40IW_CM_STATE_MPAREQ_RCVD:
+		atomic_add_return(1, &cm_node->passive_state);
+		break;
+	case I40IW_CM_STATE_ESTABLISHED:
+	case I40IW_CM_STATE_SYN_RCVD:
+	case I40IW_CM_STATE_LISTENING:
+		i40iw_pr_err("Bad state state = %d\n", cm_node->state);
+		i40iw_passive_open_err(cm_node, false);
+		break;
+	case I40IW_CM_STATE_OFFLOADED:
+		i40iw_active_open_err(cm_node, false);
+		break;
+	case I40IW_CM_STATE_CLOSED:
+		break;
+	case I40IW_CM_STATE_FIN_WAIT2:
+	case I40IW_CM_STATE_FIN_WAIT1:
+	case I40IW_CM_STATE_LAST_ACK:
+		cm_node->cm_id->rem_ref(cm_node->cm_id);
+	case I40IW_CM_STATE_TIME_WAIT:
+		cm_node->state = I40IW_CM_STATE_CLOSED;
+		i40iw_rem_ref_cm_node(cm_node);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * i40iw_handle_rcv_mpa - Process a recv'd mpa buffer
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void i40iw_handle_rcv_mpa(struct i40iw_cm_node *cm_node,
+				 struct i40iw_puda_buf *rbuf)
+{
+	int ret;
+	int datasize = rbuf->datalen;
+	u8 *dataloc = rbuf->data;
+
+	enum i40iw_cm_event_type type = I40IW_CM_EVENT_UNKNOWN;
+	u32 res_type;
+
+	ret = i40iw_parse_mpa(cm_node, dataloc, &res_type, datasize);
+	if (ret) {
+		if (cm_node->state == I40IW_CM_STATE_MPAREQ_SENT)
+			i40iw_active_open_err(cm_node, true);
+		else
+			i40iw_passive_open_err(cm_node, true);
+		return;
+	}
+
+	switch (cm_node->state) {
+	case I40IW_CM_STATE_ESTABLISHED:
+		if (res_type == I40IW_MPA_REQUEST_REJECT)
+			i40iw_pr_err("state for reject\n");
+		cm_node->state = I40IW_CM_STATE_MPAREQ_RCVD;
+		type = I40IW_CM_EVENT_MPA_REQ;
+		i40iw_send_ack(cm_node);	/* ACK received MPA request */
+		atomic_set(&cm_node->passive_state,
+			   I40IW_PASSIVE_STATE_INDICATED);
+		break;
+	case I40IW_CM_STATE_MPAREQ_SENT:
+		i40iw_cleanup_retrans_entry(cm_node);
+		if (res_type == I40IW_MPA_REQUEST_REJECT) {
+			type = I40IW_CM_EVENT_MPA_REJECT;
+			cm_node->state = I40IW_CM_STATE_MPAREJ_RCVD;
+		} else {
+			type = I40IW_CM_EVENT_CONNECTED;
+			cm_node->state = I40IW_CM_STATE_OFFLOADED;
+			i40iw_send_ack(cm_node);
+		}
+		break;
+	default:
+		pr_err("%s wrong cm_node state =%d\n", __func__, cm_node->state);
+		break;
+	}
+	i40iw_create_event(cm_node, type);
+}
+
+/**
+ * i40iw_indicate_pkt_err - Send up err event to cm
+ * @cm_node: connection's node
+ */
+static void i40iw_indicate_pkt_err(struct i40iw_cm_node *cm_node)
+{
+	switch (cm_node->state) {
+	case I40IW_CM_STATE_SYN_SENT:
+	case I40IW_CM_STATE_MPAREQ_SENT:
+		i40iw_active_open_err(cm_node, true);
+		break;
+	case I40IW_CM_STATE_ESTABLISHED:
+	case I40IW_CM_STATE_SYN_RCVD:
+		i40iw_passive_open_err(cm_node, true);
+		break;
+	case I40IW_CM_STATE_OFFLOADED:
+	default:
+		break;
+	}
+}
+
+/**
+ * i40iw_check_syn - Check for error on received syn ack
+ * @cm_node: connection's node
+ * @tcph: pointer tcp header
+ */
+static int i40iw_check_syn(struct i40iw_cm_node *cm_node, struct tcphdr *tcph)
+{
+	int err = 0;
+
+	if (ntohl(tcph->ack_seq) != cm_node->tcp_cntxt.loc_seq_num) {
+		err = 1;
+		i40iw_active_open_err(cm_node, true);
+	}
+	return err;
+}
+
+/**
+ * i40iw_check_seq - check seq numbers if OK
+ * @cm_node: connection's node
+ * @tcph: pointer tcp header
+ */
+static int i40iw_check_seq(struct i40iw_cm_node *cm_node, struct tcphdr *tcph)
+{
+	int err = 0;
+	u32 seq;
+	u32 ack_seq;
+	u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num;
+	u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
+	u32 rcv_wnd;
+
+	seq = ntohl(tcph->seq);
+	ack_seq = ntohl(tcph->ack_seq);
+	rcv_wnd = cm_node->tcp_cntxt.rcv_wnd;
+	if (ack_seq != loc_seq_num)
+		err = -1;
+	else if (!between(seq, rcv_nxt, (rcv_nxt + rcv_wnd)))
+		err = -1;
+	if (err) {
+		i40iw_pr_err("seq number\n");
+		i40iw_indicate_pkt_err(cm_node);
+	}
+	return err;
+}
+
+/**
+ * i40iw_handle_syn_pkt - is for Passive node
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void i40iw_handle_syn_pkt(struct i40iw_cm_node *cm_node,
+				 struct i40iw_puda_buf *rbuf)
+{
+	struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+	int ret;
+	u32 inc_sequence;
+	int optionsize;
+	struct i40iw_cm_info nfo;
+
+	optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+	inc_sequence = ntohl(tcph->seq);
+
+	switch (cm_node->state) {
+	case I40IW_CM_STATE_SYN_SENT:
+	case I40IW_CM_STATE_MPAREQ_SENT:
+		/* Rcvd syn on active open connection */
+		i40iw_active_open_err(cm_node, 1);
+		break;
+	case I40IW_CM_STATE_LISTENING:
+		/* Passive OPEN */
+		if (atomic_read(&cm_node->listener->pend_accepts_cnt) >
+		    cm_node->listener->backlog) {
+			cm_node->cm_core->stats_backlog_drops++;
+			i40iw_passive_open_err(cm_node, false);
+			break;
+		}
+		ret = i40iw_handle_tcp_options(cm_node, tcph, optionsize, 1);
+		if (ret) {
+			i40iw_passive_open_err(cm_node, false);
+			/* drop pkt */
+			break;
+		}
+		cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
+		cm_node->accept_pend = 1;
+		atomic_inc(&cm_node->listener->pend_accepts_cnt);
+
+		cm_node->state = I40IW_CM_STATE_SYN_RCVD;
+		i40iw_get_addr_info(cm_node, &nfo);
+		ret = i40iw_manage_qhash(cm_node->iwdev,
+					 &nfo,
+					 I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+					 I40IW_QHASH_MANAGE_TYPE_ADD,
+					 (void *)cm_node,
+					 false);
+		cm_node->qhash_set = true;
+		break;
+	case I40IW_CM_STATE_CLOSED:
+		i40iw_cleanup_retrans_entry(cm_node);
+		atomic_inc(&cm_node->ref_count);
+		i40iw_send_reset(cm_node);
+		break;
+	case I40IW_CM_STATE_OFFLOADED:
+	case I40IW_CM_STATE_ESTABLISHED:
+	case I40IW_CM_STATE_FIN_WAIT1:
+	case I40IW_CM_STATE_FIN_WAIT2:
+	case I40IW_CM_STATE_MPAREQ_RCVD:
+	case I40IW_CM_STATE_LAST_ACK:
+	case I40IW_CM_STATE_CLOSING:
+	case I40IW_CM_STATE_UNKNOWN:
+	default:
+		break;
+	}
+}
+
+/**
+ * i40iw_handle_synack_pkt - Process SYN+ACK packet (active side)
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void i40iw_handle_synack_pkt(struct i40iw_cm_node *cm_node,
+				    struct i40iw_puda_buf *rbuf)
+{
+	struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+	int ret;
+	u32 inc_sequence;
+	int optionsize;
+
+	optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+	inc_sequence = ntohl(tcph->seq);
+	switch (cm_node->state) {
+	case I40IW_CM_STATE_SYN_SENT:
+		i40iw_cleanup_retrans_entry(cm_node);
+		/* active open */
+		if (i40iw_check_syn(cm_node, tcph)) {
+			i40iw_pr_err("check syn fail\n");
+			return;
+		}
+		cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+		/* setup options */
+		ret = i40iw_handle_tcp_options(cm_node, tcph, optionsize, 0);
+		if (ret) {
+			i40iw_debug(cm_node->dev,
+				    I40IW_DEBUG_CM,
+				    "cm_node=%p tcp_options failed\n",
+				    cm_node);
+			break;
+		}
+		i40iw_cleanup_retrans_entry(cm_node);
+		cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
+		i40iw_send_ack(cm_node);	/* ACK  for the syn_ack */
+		ret = i40iw_send_mpa_request(cm_node);
+		if (ret) {
+			i40iw_debug(cm_node->dev,
+				    I40IW_DEBUG_CM,
+				    "cm_node=%p i40iw_send_mpa_request failed\n",
+				    cm_node);
+			break;
+		}
+		cm_node->state = I40IW_CM_STATE_MPAREQ_SENT;
+		break;
+	case I40IW_CM_STATE_MPAREQ_RCVD:
+		i40iw_passive_open_err(cm_node, true);
+		break;
+	case I40IW_CM_STATE_LISTENING:
+		cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+		i40iw_cleanup_retrans_entry(cm_node);
+		cm_node->state = I40IW_CM_STATE_CLOSED;
+		i40iw_send_reset(cm_node);
+		break;
+	case I40IW_CM_STATE_CLOSED:
+		cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+		i40iw_cleanup_retrans_entry(cm_node);
+		atomic_inc(&cm_node->ref_count);
+		i40iw_send_reset(cm_node);
+		break;
+	case I40IW_CM_STATE_ESTABLISHED:
+	case I40IW_CM_STATE_FIN_WAIT1:
+	case I40IW_CM_STATE_FIN_WAIT2:
+	case I40IW_CM_STATE_LAST_ACK:
+	case I40IW_CM_STATE_OFFLOADED:
+	case I40IW_CM_STATE_CLOSING:
+	case I40IW_CM_STATE_UNKNOWN:
+	case I40IW_CM_STATE_MPAREQ_SENT:
+	default:
+		break;
+	}
+}
+
+/**
+ * i40iw_handle_ack_pkt - process packet with ACK
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static int i40iw_handle_ack_pkt(struct i40iw_cm_node *cm_node,
+				struct i40iw_puda_buf *rbuf)
+{
+	struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+	u32 inc_sequence;
+	int ret = 0;
+	int optionsize;
+	u32 datasize = rbuf->datalen;
+
+	optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+
+	if (i40iw_check_seq(cm_node, tcph))
+		return -EINVAL;
+
+	inc_sequence = ntohl(tcph->seq);
+	switch (cm_node->state) {
+	case I40IW_CM_STATE_SYN_RCVD:
+		i40iw_cleanup_retrans_entry(cm_node);
+		ret = i40iw_handle_tcp_options(cm_node, tcph, optionsize, 1);
+		if (ret)
+			break;
+		cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+		cm_node->state = I40IW_CM_STATE_ESTABLISHED;
+		if (datasize) {
+			cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+			i40iw_handle_rcv_mpa(cm_node, rbuf);
+		}
+		break;
+	case I40IW_CM_STATE_ESTABLISHED:
+		i40iw_cleanup_retrans_entry(cm_node);
+		if (datasize) {
+			cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+			i40iw_handle_rcv_mpa(cm_node, rbuf);
+		}
+		break;
+	case I40IW_CM_STATE_MPAREQ_SENT:
+		cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+		if (datasize) {
+			cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+			i40iw_handle_rcv_mpa(cm_node, rbuf);
+		}
+		break;
+	case I40IW_CM_STATE_LISTENING:
+		i40iw_cleanup_retrans_entry(cm_node);
+		cm_node->state = I40IW_CM_STATE_CLOSED;
+		i40iw_send_reset(cm_node);
+		break;
+	case I40IW_CM_STATE_CLOSED:
+		i40iw_cleanup_retrans_entry(cm_node);
+		atomic_inc(&cm_node->ref_count);
+		i40iw_send_reset(cm_node);
+		break;
+	case I40IW_CM_STATE_LAST_ACK:
+	case I40IW_CM_STATE_CLOSING:
+		i40iw_cleanup_retrans_entry(cm_node);
+		cm_node->state = I40IW_CM_STATE_CLOSED;
+		if (!cm_node->accept_pend)
+			cm_node->cm_id->rem_ref(cm_node->cm_id);
+		i40iw_rem_ref_cm_node(cm_node);
+		break;
+	case I40IW_CM_STATE_FIN_WAIT1:
+		i40iw_cleanup_retrans_entry(cm_node);
+		cm_node->state = I40IW_CM_STATE_FIN_WAIT2;
+		break;
+	case I40IW_CM_STATE_SYN_SENT:
+	case I40IW_CM_STATE_FIN_WAIT2:
+	case I40IW_CM_STATE_OFFLOADED:
+	case I40IW_CM_STATE_MPAREQ_RCVD:
+	case I40IW_CM_STATE_UNKNOWN:
+	default:
+		i40iw_cleanup_retrans_entry(cm_node);
+		break;
+	}
+	return ret;
+}
+
+/**
+ * i40iw_process_packet - process cm packet
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void i40iw_process_packet(struct i40iw_cm_node *cm_node,
+				 struct i40iw_puda_buf *rbuf)
+{
+	enum i40iw_tcpip_pkt_type pkt_type = I40IW_PKT_TYPE_UNKNOWN;
+	struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+	u32 fin_set = 0;
+	int ret;
+
+	if (tcph->rst) {
+		pkt_type = I40IW_PKT_TYPE_RST;
+	} else if (tcph->syn) {
+		pkt_type = I40IW_PKT_TYPE_SYN;
+		if (tcph->ack)
+			pkt_type = I40IW_PKT_TYPE_SYNACK;
+	} else if (tcph->ack) {
+		pkt_type = I40IW_PKT_TYPE_ACK;
+	}
+	if (tcph->fin)
+		fin_set = 1;
+
+	switch (pkt_type) {
+	case I40IW_PKT_TYPE_SYN:
+		i40iw_handle_syn_pkt(cm_node, rbuf);
+		break;
+	case I40IW_PKT_TYPE_SYNACK:
+		i40iw_handle_synack_pkt(cm_node, rbuf);
+		break;
+	case I40IW_PKT_TYPE_ACK:
+		ret = i40iw_handle_ack_pkt(cm_node, rbuf);
+		if (fin_set && !ret)
+			i40iw_handle_fin_pkt(cm_node);
+		break;
+	case I40IW_PKT_TYPE_RST:
+		i40iw_handle_rst_pkt(cm_node, rbuf);
+		break;
+	default:
+		if (fin_set &&
+		    (!i40iw_check_seq(cm_node, (struct tcphdr *)rbuf->tcph)))
+			i40iw_handle_fin_pkt(cm_node);
+		break;
+	}
+}
+
+/**
+ * i40iw_make_listen_node - create a listen node with params
+ * @cm_core: cm's core
+ * @iwdev: iwarp device structure
+ * @cm_info: quad info for connection
+ */
+static struct i40iw_cm_listener *i40iw_make_listen_node(
+					struct i40iw_cm_core *cm_core,
+					struct i40iw_device *iwdev,
+					struct i40iw_cm_info *cm_info)
+{
+	struct i40iw_cm_listener *listener;
+	unsigned long flags;
+
+	/* cannot have multiple matching listeners */
+	listener = i40iw_find_listener(cm_core, cm_info->loc_addr,
+				       cm_info->loc_port,
+				       cm_info->vlan_id,
+				       I40IW_CM_LISTENER_EITHER_STATE);
+	if (listener &&
+	    (listener->listener_state == I40IW_CM_LISTENER_ACTIVE_STATE)) {
+		atomic_dec(&listener->ref_count);
+		i40iw_debug(cm_core->dev,
+			    I40IW_DEBUG_CM,
+			    "Not creating listener since it already exists\n");
+		return NULL;
+	}
+
+	if (!listener) {
+		/* create a CM listen node (1/2 node to compare incoming traffic to) */
+		listener = kzalloc(sizeof(*listener), GFP_ATOMIC);
+		if (!listener)
+			return NULL;
+		cm_core->stats_listen_nodes_created++;
+		memcpy(listener->loc_addr, cm_info->loc_addr, sizeof(listener->loc_addr));
+		listener->loc_port = cm_info->loc_port;
+
+		INIT_LIST_HEAD(&listener->child_listen_list);
+
+		atomic_set(&listener->ref_count, 1);
+	} else {
+		listener->reused_node = 1;
+	}
+
+	listener->cm_id = cm_info->cm_id;
+	listener->ipv4 = cm_info->ipv4;
+	listener->vlan_id = cm_info->vlan_id;
+	atomic_set(&listener->pend_accepts_cnt, 0);
+	listener->cm_core = cm_core;
+	listener->iwdev = iwdev;
+
+	listener->backlog = cm_info->backlog;
+	listener->listener_state = I40IW_CM_LISTENER_ACTIVE_STATE;
+
+	if (!listener->reused_node) {
+		spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+		list_add(&listener->list, &cm_core->listen_nodes);
+		spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+	}
+
+	return listener;
+}
+
+/**
+ * i40iw_create_cm_node - make a connection node with params
+ * @cm_core: cm's core
+ * @iwdev: iwarp device structure
+ * @private_data_len: len to provate data for mpa request
+ * @private_data: pointer to private data for connection
+ * @cm_info: quad info for connection
+ */
+static struct i40iw_cm_node *i40iw_create_cm_node(
+					struct i40iw_cm_core *cm_core,
+					struct i40iw_device *iwdev,
+					u16 private_data_len,
+					void *private_data,
+					struct i40iw_cm_info *cm_info)
+{
+	int ret;
+	struct i40iw_cm_node *cm_node;
+	struct i40iw_cm_listener *loopback_remotelistener;
+	struct i40iw_cm_node *loopback_remotenode;
+	struct i40iw_cm_info loopback_cm_info;
+
+	/* create a CM connection node */
+	cm_node = i40iw_make_cm_node(cm_core, iwdev, cm_info, NULL);
+	if (!cm_node)
+		return NULL;
+	/* set our node side to client (active) side */
+	cm_node->tcp_cntxt.client = 1;
+	cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE;
+
+	if (!memcmp(cm_info->loc_addr, cm_info->rem_addr, sizeof(cm_info->loc_addr))) {
+		loopback_remotelistener = i40iw_find_listener(
+						cm_core,
+						cm_info->rem_addr,
+						cm_node->rem_port,
+						cm_node->vlan_id,
+						I40IW_CM_LISTENER_ACTIVE_STATE);
+		if (!loopback_remotelistener) {
+			i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
+		} else {
+			loopback_cm_info = *cm_info;
+			loopback_cm_info.loc_port = cm_info->rem_port;
+			loopback_cm_info.rem_port = cm_info->loc_port;
+			loopback_cm_info.cm_id = loopback_remotelistener->cm_id;
+			loopback_cm_info.ipv4 = cm_info->ipv4;
+			loopback_remotenode = i40iw_make_cm_node(cm_core,
+								 iwdev,
+								 &loopback_cm_info,
+								 loopback_remotelistener);
+			if (!loopback_remotenode) {
+				i40iw_rem_ref_cm_node(cm_node);
+				return NULL;
+			}
+			cm_core->stats_loopbacks++;
+			loopback_remotenode->loopbackpartner = cm_node;
+			loopback_remotenode->tcp_cntxt.rcv_wscale =
+				I40IW_CM_DEFAULT_RCV_WND_SCALE;
+			cm_node->loopbackpartner = loopback_remotenode;
+			memcpy(loopback_remotenode->pdata_buf, private_data,
+			       private_data_len);
+			loopback_remotenode->pdata.size = private_data_len;
+
+			cm_node->state = I40IW_CM_STATE_OFFLOADED;
+			cm_node->tcp_cntxt.rcv_nxt =
+				loopback_remotenode->tcp_cntxt.loc_seq_num;
+			loopback_remotenode->tcp_cntxt.rcv_nxt =
+				cm_node->tcp_cntxt.loc_seq_num;
+			cm_node->tcp_cntxt.max_snd_wnd =
+				loopback_remotenode->tcp_cntxt.rcv_wnd;
+			loopback_remotenode->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.rcv_wnd;
+			cm_node->tcp_cntxt.snd_wnd = loopback_remotenode->tcp_cntxt.rcv_wnd;
+			loopback_remotenode->tcp_cntxt.snd_wnd = cm_node->tcp_cntxt.rcv_wnd;
+			cm_node->tcp_cntxt.snd_wscale = loopback_remotenode->tcp_cntxt.rcv_wscale;
+			loopback_remotenode->tcp_cntxt.snd_wscale = cm_node->tcp_cntxt.rcv_wscale;
+			loopback_remotenode->state = I40IW_CM_STATE_MPAREQ_RCVD;
+			i40iw_create_event(loopback_remotenode, I40IW_CM_EVENT_MPA_REQ);
+		}
+		return cm_node;
+	}
+
+	cm_node->pdata.size = private_data_len;
+	cm_node->pdata.addr = cm_node->pdata_buf;
+
+	memcpy(cm_node->pdata_buf, private_data, private_data_len);
+
+	cm_node->state = I40IW_CM_STATE_SYN_SENT;
+	ret = i40iw_send_syn(cm_node, 0);
+
+	if (ret) {
+		if (cm_node->ipv4)
+			i40iw_debug(cm_node->dev,
+				    I40IW_DEBUG_CM,
+				    "Api - connect() FAILED: dest addr=%pI4",
+				    cm_node->rem_addr);
+		else
+			i40iw_debug(cm_node->dev, I40IW_DEBUG_CM,
+				    "Api - connect() FAILED: dest addr=%pI6",
+				    cm_node->rem_addr);
+		i40iw_rem_ref_cm_node(cm_node);
+		cm_node = NULL;
+	}
+
+	if (cm_node)
+		i40iw_debug(cm_node->dev,
+			    I40IW_DEBUG_CM,
+			    "Api - connect(): port=0x%04x, cm_node=%p, cm_id = %p.\n",
+			    cm_node->rem_port,
+			    cm_node,
+			    cm_node->cm_id);
+
+	return cm_node;
+}
+
+/**
+ * i40iw_cm_reject - reject and teardown a connection
+ * @cm_node: connection's node
+ * @pdate: ptr to private data for reject
+ * @plen: size of private data
+ */
+static int i40iw_cm_reject(struct i40iw_cm_node *cm_node, const void *pdata, u8 plen)
+{
+	int ret = 0;
+	int err;
+	int passive_state;
+	struct iw_cm_id *cm_id = cm_node->cm_id;
+	struct i40iw_cm_node *loopback = cm_node->loopbackpartner;
+
+	if (cm_node->tcp_cntxt.client)
+		return ret;
+	i40iw_cleanup_retrans_entry(cm_node);
+
+	if (!loopback) {
+		passive_state = atomic_add_return(1, &cm_node->passive_state);
+		if (passive_state == I40IW_SEND_RESET_EVENT) {
+			cm_node->state = I40IW_CM_STATE_CLOSED;
+			i40iw_rem_ref_cm_node(cm_node);
+		} else {
+			if (cm_node->state == I40IW_CM_STATE_LISTENER_DESTROYED) {
+				i40iw_rem_ref_cm_node(cm_node);
+			} else {
+				ret = i40iw_send_mpa_reject(cm_node, pdata, plen);
+				if (ret) {
+					cm_node->state = I40IW_CM_STATE_CLOSED;
+					err = i40iw_send_reset(cm_node);
+					if (err)
+						i40iw_pr_err("send reset failed\n");
+				} else {
+					cm_id->add_ref(cm_id);
+				}
+			}
+		}
+	} else {
+		cm_node->cm_id = NULL;
+		if (cm_node->state == I40IW_CM_STATE_LISTENER_DESTROYED) {
+			i40iw_rem_ref_cm_node(cm_node);
+			i40iw_rem_ref_cm_node(loopback);
+		} else {
+			ret = i40iw_send_cm_event(loopback,
+						  loopback->cm_id,
+						  IW_CM_EVENT_CONNECT_REPLY,
+						  -ECONNREFUSED);
+			i40iw_rem_ref_cm_node(cm_node);
+			loopback->state = I40IW_CM_STATE_CLOSING;
+
+			cm_id = loopback->cm_id;
+			i40iw_rem_ref_cm_node(loopback);
+			cm_id->rem_ref(cm_id);
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * i40iw_cm_close - close of cm connection
+ * @cm_node: connection's node
+ */
+static int i40iw_cm_close(struct i40iw_cm_node *cm_node)
+{
+	int ret = 0;
+
+	if (!cm_node)
+		return -EINVAL;
+
+	switch (cm_node->state) {
+	case I40IW_CM_STATE_SYN_RCVD:
+	case I40IW_CM_STATE_SYN_SENT:
+	case I40IW_CM_STATE_ONE_SIDE_ESTABLISHED:
+	case I40IW_CM_STATE_ESTABLISHED:
+	case I40IW_CM_STATE_ACCEPTING:
+	case I40IW_CM_STATE_MPAREQ_SENT:
+	case I40IW_CM_STATE_MPAREQ_RCVD:
+		i40iw_cleanup_retrans_entry(cm_node);
+		i40iw_send_reset(cm_node);
+		break;
+	case I40IW_CM_STATE_CLOSE_WAIT:
+		cm_node->state = I40IW_CM_STATE_LAST_ACK;
+		i40iw_send_fin(cm_node);
+		break;
+	case I40IW_CM_STATE_FIN_WAIT1:
+	case I40IW_CM_STATE_FIN_WAIT2:
+	case I40IW_CM_STATE_LAST_ACK:
+	case I40IW_CM_STATE_TIME_WAIT:
+	case I40IW_CM_STATE_CLOSING:
+		ret = -1;
+		break;
+	case I40IW_CM_STATE_LISTENING:
+		i40iw_cleanup_retrans_entry(cm_node);
+		i40iw_send_reset(cm_node);
+		break;
+	case I40IW_CM_STATE_MPAREJ_RCVD:
+	case I40IW_CM_STATE_UNKNOWN:
+	case I40IW_CM_STATE_INITED:
+	case I40IW_CM_STATE_CLOSED:
+	case I40IW_CM_STATE_LISTENER_DESTROYED:
+		i40iw_rem_ref_cm_node(cm_node);
+		break;
+	case I40IW_CM_STATE_OFFLOADED:
+		if (cm_node->send_entry)
+			i40iw_pr_err("send_entry\n");
+		i40iw_rem_ref_cm_node(cm_node);
+		break;
+	}
+	return ret;
+}
+
+/**
+ * i40iw_receive_ilq - recv an ETHERNET packet, and process it
+ * through CM
+ * @dev: FPK dev struct
+ * @rbuf: receive buffer
+ */
+void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf)
+{
+	struct i40iw_cm_node *cm_node;
+	struct i40iw_cm_listener *listener;
+	struct iphdr *iph;
+	struct ipv6hdr *ip6h;
+	struct tcphdr *tcph;
+	struct i40iw_cm_info cm_info;
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+	struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+	struct vlan_ethhdr *ethh;
+
+	/* if vlan, then maclen = 18 else 14 */
+	iph = (struct iphdr *)rbuf->iph;
+	memset(&cm_info, 0, sizeof(cm_info));
+
+	i40iw_debug_buf(dev,
+			I40IW_DEBUG_ILQ,
+			"RECEIVE ILQ BUFFER",
+			rbuf->mem.va,
+			rbuf->totallen);
+	ethh = (struct vlan_ethhdr *)rbuf->mem.va;
+
+	if (ethh->h_vlan_proto == htons(ETH_P_8021Q)) {
+		cm_info.vlan_id = ntohs(ethh->h_vlan_TCI) & VLAN_VID_MASK;
+		i40iw_debug(cm_core->dev,
+			    I40IW_DEBUG_CM,
+			    "%s vlan_id=%d\n",
+			    __func__,
+			    cm_info.vlan_id);
+	} else {
+		cm_info.vlan_id = I40IW_NO_VLAN;
+	}
+	tcph = (struct tcphdr *)rbuf->tcph;
+
+	if (rbuf->ipv4) {
+		cm_info.loc_addr[0] = ntohl(iph->daddr);
+		cm_info.rem_addr[0] = ntohl(iph->saddr);
+		cm_info.ipv4 = true;
+	} else {
+		ip6h = (struct ipv6hdr *)rbuf->iph;
+		i40iw_copy_ip_ntohl(cm_info.loc_addr,
+				    ip6h->daddr.in6_u.u6_addr32);
+		i40iw_copy_ip_ntohl(cm_info.rem_addr,
+				    ip6h->saddr.in6_u.u6_addr32);
+		cm_info.ipv4 = false;
+	}
+	cm_info.loc_port = ntohs(tcph->dest);
+	cm_info.rem_port = ntohs(tcph->source);
+	cm_node = i40iw_find_node(cm_core,
+				  cm_info.rem_port,
+				  cm_info.rem_addr,
+				  cm_info.loc_port,
+				  cm_info.loc_addr,
+				  true);
+
+	if (!cm_node) {
+		/* Only type of packet accepted are for */
+		/* the PASSIVE open (syn only) */
+		if (!tcph->syn || tcph->ack)
+			return;
+		listener =
+		    i40iw_find_listener(cm_core,
+					cm_info.loc_addr,
+					cm_info.loc_port,
+					cm_info.vlan_id,
+					I40IW_CM_LISTENER_ACTIVE_STATE);
+		if (!listener) {
+			cm_info.cm_id = NULL;
+			i40iw_debug(cm_core->dev,
+				    I40IW_DEBUG_CM,
+				    "%s no listener found\n",
+				    __func__);
+			return;
+		}
+		cm_info.cm_id = listener->cm_id;
+		cm_node = i40iw_make_cm_node(cm_core, iwdev, &cm_info, listener);
+		if (!cm_node) {
+			i40iw_debug(cm_core->dev,
+				    I40IW_DEBUG_CM,
+				    "%s allocate node failed\n",
+				    __func__);
+			atomic_dec(&listener->ref_count);
+			return;
+		}
+		if (!tcph->rst && !tcph->fin) {
+			cm_node->state = I40IW_CM_STATE_LISTENING;
+		} else {
+			i40iw_rem_ref_cm_node(cm_node);
+			return;
+		}
+		atomic_inc(&cm_node->ref_count);
+	} else if (cm_node->state == I40IW_CM_STATE_OFFLOADED) {
+		i40iw_rem_ref_cm_node(cm_node);
+		return;
+	}
+	i40iw_process_packet(cm_node, rbuf);
+	i40iw_rem_ref_cm_node(cm_node);
+}
+
+/**
+ * i40iw_setup_cm_core - allocate a top level instance of a cm
+ * core
+ * @iwdev: iwarp device structure
+ */
+void i40iw_setup_cm_core(struct i40iw_device *iwdev)
+{
+	struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+
+	cm_core->iwdev = iwdev;
+	cm_core->dev = &iwdev->sc_dev;
+
+	INIT_LIST_HEAD(&cm_core->connected_nodes);
+	INIT_LIST_HEAD(&cm_core->listen_nodes);
+
+	init_timer(&cm_core->tcp_timer);
+	cm_core->tcp_timer.function = i40iw_cm_timer_tick;
+	cm_core->tcp_timer.data = (unsigned long)cm_core;
+
+	spin_lock_init(&cm_core->ht_lock);
+	spin_lock_init(&cm_core->listen_list_lock);
+
+	cm_core->event_wq = create_singlethread_workqueue("iwewq");
+	cm_core->disconn_wq = create_singlethread_workqueue("iwdwq");
+}
+
+/**
+ * i40iw_cleanup_cm_core - deallocate a top level instance of a
+ * cm core
+ * @cm_core: cm's core
+ */
+void i40iw_cleanup_cm_core(struct i40iw_cm_core *cm_core)
+{
+	unsigned long flags;
+
+	if (!cm_core)
+		return;
+
+	spin_lock_irqsave(&cm_core->ht_lock, flags);
+	if (timer_pending(&cm_core->tcp_timer))
+		del_timer_sync(&cm_core->tcp_timer);
+	spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+	destroy_workqueue(cm_core->event_wq);
+	destroy_workqueue(cm_core->disconn_wq);
+}
+
+/**
+ * i40iw_init_tcp_ctx - setup qp context
+ * @cm_node: connection's node
+ * @tcp_info: offload info for tcp
+ * @iwqp: associate qp for the connection
+ */
+static void i40iw_init_tcp_ctx(struct i40iw_cm_node *cm_node,
+			       struct i40iw_tcp_offload_info *tcp_info,
+			       struct i40iw_qp *iwqp)
+{
+	tcp_info->ipv4 = cm_node->ipv4;
+	tcp_info->drop_ooo_seg = true;
+	tcp_info->wscale = true;
+	tcp_info->ignore_tcp_opt = true;
+	tcp_info->ignore_tcp_uns_opt = true;
+	tcp_info->no_nagle = false;
+
+	tcp_info->ttl = I40IW_DEFAULT_TTL;
+	tcp_info->rtt_var = cpu_to_le32(I40IW_DEFAULT_RTT_VAR);
+	tcp_info->ss_thresh = cpu_to_le32(I40IW_DEFAULT_SS_THRESH);
+	tcp_info->rexmit_thresh = I40IW_DEFAULT_REXMIT_THRESH;
+
+	tcp_info->tcp_state = I40IW_TCP_STATE_ESTABLISHED;
+	tcp_info->snd_wscale = cm_node->tcp_cntxt.snd_wscale;
+	tcp_info->rcv_wscale = cm_node->tcp_cntxt.rcv_wscale;
+
+	tcp_info->snd_nxt = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
+	tcp_info->snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.snd_wnd);
+	tcp_info->rcv_nxt = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
+	tcp_info->snd_max = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
+
+	tcp_info->snd_una = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
+	tcp_info->cwnd = cpu_to_le32(2 * cm_node->tcp_cntxt.mss);
+	tcp_info->snd_wl1 = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
+	tcp_info->snd_wl2 = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
+	tcp_info->max_snd_window = cpu_to_le32(cm_node->tcp_cntxt.max_snd_wnd);
+	tcp_info->rcv_wnd = cpu_to_le32(cm_node->tcp_cntxt.rcv_wnd <<
+					cm_node->tcp_cntxt.rcv_wscale);
+
+	tcp_info->flow_label = 0;
+	tcp_info->snd_mss = cpu_to_le32(((u32)cm_node->tcp_cntxt.mss));
+	if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
+		tcp_info->insert_vlan_tag = true;
+		tcp_info->vlan_tag = cpu_to_le16(cm_node->vlan_id);
+	}
+	if (cm_node->ipv4) {
+		tcp_info->src_port = cpu_to_le16(cm_node->loc_port);
+		tcp_info->dst_port = cpu_to_le16(cm_node->rem_port);
+
+		tcp_info->dest_ip_addr3 = cpu_to_le32(cm_node->rem_addr[0]);
+		tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[0]);
+		tcp_info->arp_idx = cpu_to_le32(i40iw_arp_table(iwqp->iwdev,
+								&tcp_info->dest_ip_addr3,
+								true,
+								NULL,
+								I40IW_ARP_RESOLVE));
+	} else {
+		tcp_info->src_port = cpu_to_le16(cm_node->loc_port);
+		tcp_info->dst_port = cpu_to_le16(cm_node->rem_port);
+		tcp_info->dest_ip_addr0 = cpu_to_le32(cm_node->rem_addr[0]);
+		tcp_info->dest_ip_addr1 = cpu_to_le32(cm_node->rem_addr[1]);
+		tcp_info->dest_ip_addr2 = cpu_to_le32(cm_node->rem_addr[2]);
+		tcp_info->dest_ip_addr3 = cpu_to_le32(cm_node->rem_addr[3]);
+		tcp_info->local_ipaddr0 = cpu_to_le32(cm_node->loc_addr[0]);
+		tcp_info->local_ipaddr1 = cpu_to_le32(cm_node->loc_addr[1]);
+		tcp_info->local_ipaddr2 = cpu_to_le32(cm_node->loc_addr[2]);
+		tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[3]);
+		tcp_info->arp_idx = cpu_to_le32(i40iw_arp_table(
+							iwqp->iwdev,
+							&tcp_info->dest_ip_addr0,
+							false,
+							NULL,
+							I40IW_ARP_RESOLVE));
+	}
+}
+
+/**
+ * i40iw_cm_init_tsa_conn - setup qp for RTS
+ * @iwqp: associate qp for the connection
+ * @cm_node: connection's node
+ */
+static void i40iw_cm_init_tsa_conn(struct i40iw_qp *iwqp,
+				   struct i40iw_cm_node *cm_node)
+{
+	struct i40iw_tcp_offload_info tcp_info;
+	struct i40iwarp_offload_info *iwarp_info;
+	struct i40iw_qp_host_ctx_info *ctx_info;
+	struct i40iw_device *iwdev = iwqp->iwdev;
+	struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev;
+
+	memset(&tcp_info, 0x00, sizeof(struct i40iw_tcp_offload_info));
+	iwarp_info = &iwqp->iwarp_info;
+	ctx_info = &iwqp->ctx_info;
+
+	ctx_info->tcp_info = &tcp_info;
+	ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+	ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+
+	iwarp_info->ord_size = cm_node->ord_size;
+	iwarp_info->ird_size = i40iw_derive_hw_ird_setting(cm_node->ird_size);
+
+	if (iwarp_info->ord_size == 1)
+		iwarp_info->ord_size = 2;
+
+	iwarp_info->rd_enable = true;
+	iwarp_info->rdmap_ver = 1;
+	iwarp_info->ddp_ver = 1;
+
+	iwarp_info->pd_id = iwqp->iwpd->sc_pd.pd_id;
+
+	ctx_info->tcp_info_valid = true;
+	ctx_info->iwarp_info_valid = true;
+
+	i40iw_init_tcp_ctx(cm_node, &tcp_info, iwqp);
+	if (cm_node->snd_mark_en) {
+		iwarp_info->snd_mark_en = true;
+		iwarp_info->snd_mark_offset = (tcp_info.snd_nxt &
+				SNDMARKER_SEQNMASK) + cm_node->lsmm_size;
+	}
+
+	cm_node->state = I40IW_CM_STATE_OFFLOADED;
+	tcp_info.tcp_state = I40IW_TCP_STATE_ESTABLISHED;
+	tcp_info.src_mac_addr_idx = iwdev->mac_ip_table_idx;
+
+	dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp, (u64 *)(iwqp->host_ctx.va), ctx_info);
+
+	/* once tcp_info is set, no need to do it again */
+	ctx_info->tcp_info_valid = false;
+	ctx_info->iwarp_info_valid = false;
+}
+
+/**
+ * i40iw_cm_disconn - when a connection is being closed
+ * @iwqp: associate qp for the connection
+ */
+int i40iw_cm_disconn(struct i40iw_qp *iwqp)
+{
+	struct disconn_work *work;
+	struct i40iw_device *iwdev = iwqp->iwdev;
+	struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+
+	work = kzalloc(sizeof(*work), GFP_ATOMIC);
+	if (!work)
+		return -ENOMEM;	/* Timer will clean up */
+
+	i40iw_add_ref(&iwqp->ibqp);
+	work->iwqp = iwqp;
+	INIT_WORK(&work->work, i40iw_disconnect_worker);
+	queue_work(cm_core->disconn_wq, &work->work);
+	return 0;
+}
+
+/**
+ * i40iw_loopback_nop - Send a nop
+ * @qp: associated hw qp
+ */
+static void i40iw_loopback_nop(struct i40iw_sc_qp *qp)
+{
+	u64 *wqe;
+	u64 header;
+
+	wqe = qp->qp_uk.sq_base->elem;
+	set_64bit_val(wqe, 0, 0);
+	set_64bit_val(wqe, 8, 0);
+	set_64bit_val(wqe, 16, 0);
+
+	header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
+	    LS_64(0, I40IWQPSQ_SIGCOMPL) |
+	    LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+	set_64bit_val(wqe, 24, header);
+}
+
+/**
+ * i40iw_qp_disconnect - free qp and close cm
+ * @iwqp: associate qp for the connection
+ */
+static void i40iw_qp_disconnect(struct i40iw_qp *iwqp)
+{
+	struct i40iw_device *iwdev;
+	struct i40iw_ib_device *iwibdev;
+
+	iwdev = to_iwdev(iwqp->ibqp.device);
+	if (!iwdev) {
+		i40iw_pr_err("iwdev == NULL\n");
+		return;
+	}
+
+	iwibdev = iwdev->iwibdev;
+
+	if (iwqp->active_conn) {
+		/* indicate this connection is NOT active */
+		iwqp->active_conn = 0;
+	} else {
+		/* Need to free the Last Streaming Mode Message */
+		if (iwqp->ietf_mem.va) {
+			if (iwqp->lsmm_mr)
+				iwibdev->ibdev.dereg_mr(iwqp->lsmm_mr);
+			i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->ietf_mem);
+		}
+	}
+
+	/* close the CM node down if it is still active */
+	if (iwqp->cm_node) {
+		i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, "%s Call close API\n", __func__);
+		i40iw_cm_close(iwqp->cm_node);
+	}
+}
+
+/**
+ * i40iw_cm_disconn_true - called by worker thread to disconnect qp
+ * @iwqp: associate qp for the connection
+ */
+static void i40iw_cm_disconn_true(struct i40iw_qp *iwqp)
+{
+	struct iw_cm_id *cm_id;
+	struct i40iw_device *iwdev;
+	struct i40iw_sc_qp *qp = &iwqp->sc_qp;
+	u16 last_ae;
+	u8 original_hw_tcp_state;
+	u8 original_ibqp_state;
+	int disconn_status = 0;
+	int issue_disconn = 0;
+	int issue_close = 0;
+	int issue_flush = 0;
+	struct ib_event ibevent;
+	unsigned long flags;
+	int ret;
+
+	if (!iwqp) {
+		i40iw_pr_err("iwqp == NULL\n");
+		return;
+	}
+
+	spin_lock_irqsave(&iwqp->lock, flags);
+	cm_id = iwqp->cm_id;
+	/* make sure we havent already closed this connection */
+	if (!cm_id) {
+		spin_unlock_irqrestore(&iwqp->lock, flags);
+		return;
+	}
+
+	iwdev = to_iwdev(iwqp->ibqp.device);
+
+	original_hw_tcp_state = iwqp->hw_tcp_state;
+	original_ibqp_state = iwqp->ibqp_state;
+	last_ae = iwqp->last_aeq;
+
+	if (qp->term_flags) {
+		issue_disconn = 1;
+		issue_close = 1;
+		iwqp->cm_id = NULL;
+		/*When term timer expires after cm_timer, don't want
+		 *terminate-handler to issue cm_disconn which can re-free
+		 *a QP even after its refcnt=0.
+		 */
+		del_timer(&iwqp->terminate_timer);
+		if (!iwqp->flush_issued) {
+			iwqp->flush_issued = 1;
+			issue_flush = 1;
+		}
+	} else if ((original_hw_tcp_state == I40IW_TCP_STATE_CLOSE_WAIT) ||
+		   ((original_ibqp_state == IB_QPS_RTS) &&
+		    (last_ae == I40IW_AE_LLP_CONNECTION_RESET))) {
+		issue_disconn = 1;
+		if (last_ae == I40IW_AE_LLP_CONNECTION_RESET)
+			disconn_status = -ECONNRESET;
+	}
+
+	if (((original_hw_tcp_state == I40IW_TCP_STATE_CLOSED) ||
+	     (original_hw_tcp_state == I40IW_TCP_STATE_TIME_WAIT) ||
+	     (last_ae == I40IW_AE_RDMAP_ROE_BAD_LLP_CLOSE) ||
+	     (last_ae == I40IW_AE_LLP_CONNECTION_RESET))) {
+		issue_close = 1;
+		iwqp->cm_id = NULL;
+		if (!iwqp->flush_issued) {
+			iwqp->flush_issued = 1;
+			issue_flush = 1;
+		}
+	}
+
+	spin_unlock_irqrestore(&iwqp->lock, flags);
+	if (issue_flush && !iwqp->destroyed) {
+		/* Flush the queues */
+		i40iw_flush_wqes(iwdev, iwqp);
+
+		if (qp->term_flags) {
+			ibevent.device = iwqp->ibqp.device;
+			ibevent.event = (qp->eventtype == TERM_EVENT_QP_FATAL) ?
+					IB_EVENT_QP_FATAL : IB_EVENT_QP_ACCESS_ERR;
+			ibevent.element.qp = &iwqp->ibqp;
+			iwqp->ibqp.event_handler(&ibevent, iwqp->ibqp.qp_context);
+		}
+	}
+
+	if (cm_id && cm_id->event_handler) {
+		if (issue_disconn) {
+			ret = i40iw_send_cm_event(NULL,
+						  cm_id,
+						  IW_CM_EVENT_DISCONNECT,
+						  disconn_status);
+
+			if (ret)
+				i40iw_debug(&iwdev->sc_dev,
+					    I40IW_DEBUG_CM,
+					    "disconnect event failed %s: - cm_id = %p\n",
+					    __func__, cm_id);
+		}
+		if (issue_close) {
+			i40iw_qp_disconnect(iwqp);
+			cm_id->provider_data = iwqp;
+			ret = i40iw_send_cm_event(NULL, cm_id, IW_CM_EVENT_CLOSE, 0);
+			if (ret)
+				i40iw_debug(&iwdev->sc_dev,
+					    I40IW_DEBUG_CM,
+					    "close event failed %s: - cm_id = %p\n",
+					    __func__, cm_id);
+			cm_id->rem_ref(cm_id);
+		}
+	}
+}
+
+/**
+ * i40iw_disconnect_worker - worker for connection close
+ * @work: points or disconn structure
+ */
+static void i40iw_disconnect_worker(struct work_struct *work)
+{
+	struct disconn_work *dwork = container_of(work, struct disconn_work, work);
+	struct i40iw_qp *iwqp = dwork->iwqp;
+
+	kfree(dwork);
+	i40iw_cm_disconn_true(iwqp);
+	i40iw_rem_ref(&iwqp->ibqp);
+}
+
+/**
+ * i40iw_accept - registered call for connection to be accepted
+ * @cm_id: cm information for passive connection
+ * @conn_param: accpet parameters
+ */
+int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+	struct ib_qp *ibqp;
+	struct i40iw_qp *iwqp;
+	struct i40iw_device *iwdev;
+	struct i40iw_sc_dev *dev;
+	struct i40iw_cm_node *cm_node;
+	struct ib_qp_attr attr;
+	int passive_state;
+	struct i40iw_ib_device *iwibdev;
+	struct ib_mr *ibmr;
+	struct i40iw_pd *iwpd;
+	u16 buf_len = 0;
+	struct i40iw_kmem_info accept;
+	enum i40iw_status_code status;
+	u64 tagged_offset;
+
+	memset(&attr, 0, sizeof(attr));
+	ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
+	if (!ibqp)
+		return -EINVAL;
+
+	iwqp = to_iwqp(ibqp);
+	iwdev = iwqp->iwdev;
+	dev = &iwdev->sc_dev;
+	cm_node = (struct i40iw_cm_node *)cm_id->provider_data;
+
+	if (((struct sockaddr_in *)&cm_id->local_addr)->sin_family == AF_INET) {
+		cm_node->ipv4 = true;
+		cm_node->vlan_id = i40iw_get_vlan_ipv4(cm_node->loc_addr);
+	} else {
+		cm_node->ipv4 = false;
+		i40iw_netdev_vlan_ipv6(cm_node->loc_addr, &cm_node->vlan_id, NULL);
+	}
+	i40iw_debug(cm_node->dev,
+		    I40IW_DEBUG_CM,
+		    "Accept vlan_id=%d\n",
+		    cm_node->vlan_id);
+	if (cm_node->state == I40IW_CM_STATE_LISTENER_DESTROYED) {
+		if (cm_node->loopbackpartner)
+			i40iw_rem_ref_cm_node(cm_node->loopbackpartner);
+		i40iw_rem_ref_cm_node(cm_node);
+		return -EINVAL;
+	}
+
+	passive_state = atomic_add_return(1, &cm_node->passive_state);
+	if (passive_state == I40IW_SEND_RESET_EVENT) {
+		i40iw_rem_ref_cm_node(cm_node);
+		return -ECONNRESET;
+	}
+
+	cm_node->cm_core->stats_accepts++;
+	iwqp->cm_node = (void *)cm_node;
+	cm_node->iwqp = iwqp;
+
+	buf_len = conn_param->private_data_len + I40IW_MAX_IETF_SIZE + MPA_ZERO_PAD_LEN;
+
+	status = i40iw_allocate_dma_mem(dev->hw, &iwqp->ietf_mem, buf_len, 1);
+
+	if (status)
+		return -ENOMEM;
+	cm_node->pdata.size = conn_param->private_data_len;
+	accept.addr = iwqp->ietf_mem.va;
+	accept.size = i40iw_cm_build_mpa_frame(cm_node, &accept, MPA_KEY_REPLY);
+	memcpy(accept.addr + accept.size, conn_param->private_data,
+	       conn_param->private_data_len);
+
+	/* setup our first outgoing iWarp send WQE (the IETF frame response) */
+	if ((cm_node->ipv4 &&
+	     !i40iw_ipv4_is_loopback(cm_node->loc_addr[0], cm_node->rem_addr[0])) ||
+	    (!cm_node->ipv4 &&
+	     !i40iw_ipv6_is_loopback(cm_node->loc_addr, cm_node->rem_addr))) {
+		iwibdev = iwdev->iwibdev;
+		iwpd = iwqp->iwpd;
+		tagged_offset = (uintptr_t)iwqp->ietf_mem.va;
+		ibmr = i40iw_reg_phys_mr(&iwpd->ibpd,
+					 iwqp->ietf_mem.pa,
+					 buf_len,
+					 IB_ACCESS_LOCAL_WRITE,
+					 &tagged_offset);
+		if (IS_ERR(ibmr)) {
+			i40iw_free_dma_mem(dev->hw, &iwqp->ietf_mem);
+			return -ENOMEM;
+		}
+
+		ibmr->pd = &iwpd->ibpd;
+		ibmr->device = iwpd->ibpd.device;
+		iwqp->lsmm_mr = ibmr;
+		if (iwqp->page)
+			iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
+		if (is_remote_ne020_or_chelsio(cm_node))
+			dev->iw_priv_qp_ops->qp_send_lsmm(
+							&iwqp->sc_qp,
+							iwqp->ietf_mem.va,
+							(accept.size + conn_param->private_data_len),
+							ibmr->lkey);
+		else
+			dev->iw_priv_qp_ops->qp_send_lsmm(
+							&iwqp->sc_qp,
+							iwqp->ietf_mem.va,
+							(accept.size + conn_param->private_data_len + MPA_ZERO_PAD_LEN),
+							ibmr->lkey);
+
+	} else {
+		if (iwqp->page)
+			iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
+		i40iw_loopback_nop(&iwqp->sc_qp);
+	}
+
+	if (iwqp->page)
+		kunmap(iwqp->page);
+
+	iwqp->cm_id = cm_id;
+	cm_node->cm_id = cm_id;
+
+	cm_id->provider_data = (void *)iwqp;
+	iwqp->active_conn = 0;
+
+	cm_node->lsmm_size = accept.size + conn_param->private_data_len;
+	i40iw_cm_init_tsa_conn(iwqp, cm_node);
+	cm_id->add_ref(cm_id);
+	i40iw_add_ref(&iwqp->ibqp);
+
+	i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0);
+
+	attr.qp_state = IB_QPS_RTS;
+	cm_node->qhash_set = false;
+	i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+	if (cm_node->loopbackpartner) {
+		cm_node->loopbackpartner->pdata.size = conn_param->private_data_len;
+
+		/* copy entire MPA frame to our cm_node's frame */
+		memcpy(cm_node->loopbackpartner->pdata_buf,
+		       conn_param->private_data,
+		       conn_param->private_data_len);
+		i40iw_create_event(cm_node->loopbackpartner, I40IW_CM_EVENT_CONNECTED);
+	}
+
+	cm_node->accelerated = 1;
+	if (cm_node->accept_pend) {
+		if (!cm_node->listener)
+			i40iw_pr_err("cm_node->listener NULL for passive node\n");
+		atomic_dec(&cm_node->listener->pend_accepts_cnt);
+		cm_node->accept_pend = 0;
+	}
+	return 0;
+}
+
+/**
+ * i40iw_reject - registered call for connection to be rejected
+ * @cm_id: cm information for passive connection
+ * @pdata: private data to be sent
+ * @pdata_len: private data length
+ */
+int i40iw_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+	struct i40iw_device *iwdev;
+	struct i40iw_cm_node *cm_node;
+	struct i40iw_cm_node *loopback;
+
+	cm_node = (struct i40iw_cm_node *)cm_id->provider_data;
+	loopback = cm_node->loopbackpartner;
+	cm_node->cm_id = cm_id;
+	cm_node->pdata.size = pdata_len;
+
+	iwdev = to_iwdev(cm_id->device);
+	if (!iwdev)
+		return -EINVAL;
+	cm_node->cm_core->stats_rejects++;
+
+	if (pdata_len + sizeof(struct ietf_mpa_v2) > MAX_CM_BUFFER)
+		return -EINVAL;
+
+	if (loopback) {
+		memcpy(&loopback->pdata_buf, pdata, pdata_len);
+		loopback->pdata.size = pdata_len;
+	}
+
+	return i40iw_cm_reject(cm_node, pdata, pdata_len);
+}
+
+/**
+ * i40iw_connect - registered call for connection to be established
+ * @cm_id: cm information for passive connection
+ * @conn_param: Information about the connection
+ */
+int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+	struct ib_qp *ibqp;
+	struct i40iw_qp *iwqp;
+	struct i40iw_device *iwdev;
+	struct i40iw_cm_node *cm_node;
+	struct i40iw_cm_info cm_info;
+	struct sockaddr_in *laddr;
+	struct sockaddr_in *raddr;
+	struct sockaddr_in6 *laddr6;
+	struct sockaddr_in6 *raddr6;
+	int apbvt_set = 0;
+	enum i40iw_status_code status;
+
+	ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
+	if (!ibqp)
+		return -EINVAL;
+	iwqp = to_iwqp(ibqp);
+	if (!iwqp)
+		return -EINVAL;
+	iwdev = to_iwdev(iwqp->ibqp.device);
+	if (!iwdev)
+		return -EINVAL;
+
+	laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+	raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
+	laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
+	raddr6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
+
+	if (!(laddr->sin_port) || !(raddr->sin_port))
+		return -EINVAL;
+
+	iwqp->active_conn = 1;
+	iwqp->cm_id = NULL;
+	cm_id->provider_data = iwqp;
+
+	/* set up the connection params for the node */
+	if (cm_id->remote_addr.ss_family == AF_INET) {
+		cm_info.ipv4 = true;
+		memset(cm_info.loc_addr, 0, sizeof(cm_info.loc_addr));
+		memset(cm_info.rem_addr, 0, sizeof(cm_info.rem_addr));
+		cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr);
+		cm_info.rem_addr[0] = ntohl(raddr->sin_addr.s_addr);
+		cm_info.loc_port = ntohs(laddr->sin_port);
+		cm_info.rem_port = ntohs(raddr->sin_port);
+		cm_info.vlan_id = i40iw_get_vlan_ipv4(cm_info.loc_addr);
+	} else {
+		cm_info.ipv4 = false;
+		i40iw_copy_ip_ntohl(cm_info.loc_addr,
+				    laddr6->sin6_addr.in6_u.u6_addr32);
+		i40iw_copy_ip_ntohl(cm_info.rem_addr,
+				    raddr6->sin6_addr.in6_u.u6_addr32);
+		cm_info.loc_port = ntohs(laddr6->sin6_port);
+		cm_info.rem_port = ntohs(raddr6->sin6_port);
+		i40iw_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id, NULL);
+	}
+	cm_info.cm_id = cm_id;
+	if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) ||
+	    (!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32,
+				     raddr6->sin6_addr.in6_u.u6_addr32,
+				     sizeof(laddr6->sin6_addr.in6_u.u6_addr32)))) {
+		status = i40iw_manage_qhash(iwdev,
+					    &cm_info,
+					    I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+					    I40IW_QHASH_MANAGE_TYPE_ADD,
+					    NULL,
+					    true);
+		if (status)
+			return -EINVAL;
+	}
+	status = i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD);
+	if (status) {
+		i40iw_manage_qhash(iwdev,
+				   &cm_info,
+				   I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+				   I40IW_QHASH_MANAGE_TYPE_DELETE,
+				   NULL,
+				   false);
+		return -EINVAL;
+	}
+
+	apbvt_set = 1;
+	cm_id->add_ref(cm_id);
+	cm_node = i40iw_create_cm_node(&iwdev->cm_core, iwdev,
+				       conn_param->private_data_len,
+				       (void *)conn_param->private_data,
+				       &cm_info);
+	if (!cm_node) {
+		i40iw_manage_qhash(iwdev,
+				   &cm_info,
+				   I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+				   I40IW_QHASH_MANAGE_TYPE_DELETE,
+				   NULL,
+				   false);
+
+		if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core,
+							   cm_info.loc_port))
+			i40iw_manage_apbvt(iwdev,
+					   cm_info.loc_port,
+					   I40IW_MANAGE_APBVT_DEL);
+		cm_id->rem_ref(cm_id);
+		iwdev->cm_core.stats_connect_errs++;
+		return -ENOMEM;
+	}
+
+	i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
+	if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
+	    !cm_node->ord_size)
+		cm_node->ord_size = 1;
+
+	cm_node->apbvt_set = apbvt_set;
+	cm_node->qhash_set = true;
+	iwqp->cm_node = cm_node;
+	cm_node->iwqp = iwqp;
+	iwqp->cm_id = cm_id;
+	i40iw_add_ref(&iwqp->ibqp);
+	return 0;
+}
+
+/**
+ * i40iw_create_listen - registered call creating listener
+ * @cm_id: cm information for passive connection
+ * @backlog: to max accept pending count
+ */
+int i40iw_create_listen(struct iw_cm_id *cm_id, int backlog)
+{
+	struct i40iw_device *iwdev;
+	struct i40iw_cm_listener *cm_listen_node;
+	struct i40iw_cm_info cm_info;
+	enum i40iw_status_code ret;
+	struct sockaddr_in *laddr;
+	struct sockaddr_in6 *laddr6;
+	bool wildcard = false;
+
+	iwdev = to_iwdev(cm_id->device);
+	if (!iwdev)
+		return -EINVAL;
+
+	laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+	laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
+	memset(&cm_info, 0, sizeof(cm_info));
+	if (laddr->sin_family == AF_INET) {
+		cm_info.ipv4 = true;
+		cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr);
+		cm_info.loc_port = ntohs(laddr->sin_port);
+
+		if (laddr->sin_addr.s_addr != INADDR_ANY)
+			cm_info.vlan_id = i40iw_get_vlan_ipv4(cm_info.loc_addr);
+		else
+			wildcard = true;
+
+	} else {
+		cm_info.ipv4 = false;
+		i40iw_copy_ip_ntohl(cm_info.loc_addr,
+				    laddr6->sin6_addr.in6_u.u6_addr32);
+		cm_info.loc_port = ntohs(laddr6->sin6_port);
+		if (ipv6_addr_type(&laddr6->sin6_addr) != IPV6_ADDR_ANY)
+			i40iw_netdev_vlan_ipv6(cm_info.loc_addr,
+					       &cm_info.vlan_id,
+					       NULL);
+		else
+			wildcard = true;
+	}
+	cm_info.backlog = backlog;
+	cm_info.cm_id = cm_id;
+
+	cm_listen_node = i40iw_make_listen_node(&iwdev->cm_core, iwdev, &cm_info);
+	if (!cm_listen_node) {
+		i40iw_pr_err("cm_listen_node == NULL\n");
+		return -ENOMEM;
+	}
+
+	cm_id->provider_data = cm_listen_node;
+
+	if (!cm_listen_node->reused_node) {
+		if (wildcard) {
+			if (cm_info.ipv4)
+				ret = i40iw_add_mqh_4(iwdev,
+						      &cm_info,
+						      cm_listen_node);
+			else
+				ret = i40iw_add_mqh_6(iwdev,
+						      &cm_info,
+						      cm_listen_node);
+			if (ret)
+				goto error;
+
+			ret = i40iw_manage_apbvt(iwdev,
+						 cm_info.loc_port,
+						 I40IW_MANAGE_APBVT_ADD);
+
+			if (ret)
+				goto error;
+		} else {
+			ret = i40iw_manage_qhash(iwdev,
+						 &cm_info,
+						 I40IW_QHASH_TYPE_TCP_SYN,
+						 I40IW_QHASH_MANAGE_TYPE_ADD,
+						 NULL,
+						 true);
+			if (ret)
+				goto error;
+			cm_listen_node->qhash_set = true;
+			ret = i40iw_manage_apbvt(iwdev,
+						 cm_info.loc_port,
+						 I40IW_MANAGE_APBVT_ADD);
+			if (ret)
+				goto error;
+		}
+	}
+	cm_id->add_ref(cm_id);
+	cm_listen_node->cm_core->stats_listen_created++;
+	return 0;
+ error:
+	i40iw_cm_del_listen(&iwdev->cm_core, (void *)cm_listen_node, false);
+	return -EINVAL;
+}
+
+/**
+ * i40iw_destroy_listen - registered call to destroy listener
+ * @cm_id: cm information for passive connection
+ */
+int i40iw_destroy_listen(struct iw_cm_id *cm_id)
+{
+	struct i40iw_device *iwdev;
+
+	iwdev = to_iwdev(cm_id->device);
+	if (cm_id->provider_data)
+		i40iw_cm_del_listen(&iwdev->cm_core, cm_id->provider_data, true);
+	else
+		i40iw_pr_err("cm_id->provider_data was NULL\n");
+
+	cm_id->rem_ref(cm_id);
+
+	return 0;
+}
+
+/**
+ * i40iw_cm_event_connected - handle connected active node
+ * @event: the info for cm_node of connection
+ */
+static void i40iw_cm_event_connected(struct i40iw_cm_event *event)
+{
+	struct i40iw_qp *iwqp;
+	struct i40iw_device *iwdev;
+	struct i40iw_cm_node *cm_node;
+	struct i40iw_sc_dev *dev;
+	struct ib_qp_attr attr;
+	struct iw_cm_id *cm_id;
+	int status;
+	bool read0;
+
+	cm_node = event->cm_node;
+	cm_id = cm_node->cm_id;
+	iwqp = (struct i40iw_qp *)cm_id->provider_data;
+	iwdev = to_iwdev(iwqp->ibqp.device);
+	dev = &iwdev->sc_dev;
+
+	if (iwqp->destroyed) {
+		status = -ETIMEDOUT;
+		goto error;
+	}
+	i40iw_cm_init_tsa_conn(iwqp, cm_node);
+	read0 = (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO);
+	if (iwqp->page)
+		iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
+	dev->iw_priv_qp_ops->qp_send_rtt(&iwqp->sc_qp, read0);
+	if (iwqp->page)
+		kunmap(iwqp->page);
+	status = i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, 0);
+	if (status)
+		i40iw_pr_err("send cm event\n");
+
+	memset(&attr, 0, sizeof(attr));
+	attr.qp_state = IB_QPS_RTS;
+	cm_node->qhash_set = false;
+	i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+
+	cm_node->accelerated = 1;
+	if (cm_node->accept_pend) {
+		if (!cm_node->listener)
+			i40iw_pr_err("listener is null for passive node\n");
+		atomic_dec(&cm_node->listener->pend_accepts_cnt);
+		cm_node->accept_pend = 0;
+	}
+	return;
+
+error:
+	iwqp->cm_id = NULL;
+	cm_id->provider_data = NULL;
+	i40iw_send_cm_event(event->cm_node,
+			    cm_id,
+			    IW_CM_EVENT_CONNECT_REPLY,
+			    status);
+	cm_id->rem_ref(cm_id);
+	i40iw_rem_ref_cm_node(event->cm_node);
+}
+
+/**
+ * i40iw_cm_event_reset - handle reset
+ * @event: the info for cm_node of connection
+ */
+static void i40iw_cm_event_reset(struct i40iw_cm_event *event)
+{
+	struct i40iw_cm_node *cm_node = event->cm_node;
+	struct iw_cm_id   *cm_id = cm_node->cm_id;
+	struct i40iw_qp *iwqp;
+
+	if (!cm_id)
+		return;
+
+	iwqp = cm_id->provider_data;
+	if (!iwqp)
+		return;
+
+	i40iw_debug(cm_node->dev,
+		    I40IW_DEBUG_CM,
+		    "reset event %p - cm_id = %p\n",
+		     event->cm_node, cm_id);
+	iwqp->cm_id = NULL;
+
+	i40iw_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_DISCONNECT, -ECONNRESET);
+	i40iw_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_CLOSE, 0);
+}
+
+/**
+ * i40iw_cm_event_handler - worker thread callback to send event to cm upper layer
+ * @work: pointer of cm event info.
+ */
+static void i40iw_cm_event_handler(struct work_struct *work)
+{
+	struct i40iw_cm_event *event = container_of(work,
+						    struct i40iw_cm_event,
+						    event_work);
+	struct i40iw_cm_node *cm_node;
+
+	if (!event || !event->cm_node || !event->cm_node->cm_core)
+		return;
+
+	cm_node = event->cm_node;
+
+	switch (event->type) {
+	case I40IW_CM_EVENT_MPA_REQ:
+		i40iw_send_cm_event(cm_node,
+				    cm_node->cm_id,
+				    IW_CM_EVENT_CONNECT_REQUEST,
+				    0);
+		break;
+	case I40IW_CM_EVENT_RESET:
+		i40iw_cm_event_reset(event);
+		break;
+	case I40IW_CM_EVENT_CONNECTED:
+		if (!event->cm_node->cm_id ||
+		    (event->cm_node->state != I40IW_CM_STATE_OFFLOADED))
+			break;
+		i40iw_cm_event_connected(event);
+		break;
+	case I40IW_CM_EVENT_MPA_REJECT:
+		if (!event->cm_node->cm_id ||
+		    (cm_node->state == I40IW_CM_STATE_OFFLOADED))
+			break;
+		i40iw_send_cm_event(cm_node,
+				    cm_node->cm_id,
+				    IW_CM_EVENT_CONNECT_REPLY,
+				    -ECONNREFUSED);
+		break;
+	case I40IW_CM_EVENT_ABORTED:
+		if (!event->cm_node->cm_id ||
+		    (event->cm_node->state == I40IW_CM_STATE_OFFLOADED))
+			break;
+		i40iw_event_connect_error(event);
+		break;
+	default:
+		i40iw_pr_err("event type = %d\n", event->type);
+		break;
+	}
+
+	event->cm_info.cm_id->rem_ref(event->cm_info.cm_id);
+	i40iw_rem_ref_cm_node(event->cm_node);
+	kfree(event);
+}
+
+/**
+ * i40iw_cm_post_event - queue event request for worker thread
+ * @event: cm node's info for up event call
+ */
+static void i40iw_cm_post_event(struct i40iw_cm_event *event)
+{
+	atomic_inc(&event->cm_node->ref_count);
+	event->cm_info.cm_id->add_ref(event->cm_info.cm_id);
+	INIT_WORK(&event->event_work, i40iw_cm_event_handler);
+
+	queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
+}

diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
new file mode 100644
index 0000000..5f8ceb4
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h

@@ -0,0 +1,456 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_CM_H
+#define I40IW_CM_H
+
+#define QUEUE_EVENTS
+
+#define I40IW_MANAGE_APBVT_DEL 0
+#define I40IW_MANAGE_APBVT_ADD 1
+
+#define I40IW_MPA_REQUEST_ACCEPT  1
+#define I40IW_MPA_REQUEST_REJECT  2
+
+/* IETF MPA -- defines, enums, structs */
+#define IEFT_MPA_KEY_REQ  "MPA ID Req Frame"
+#define IEFT_MPA_KEY_REP  "MPA ID Rep Frame"
+#define IETF_MPA_KEY_SIZE 16
+#define IETF_MPA_VERSION  1
+#define IETF_MAX_PRIV_DATA_LEN 512
+#define IETF_MPA_FRAME_SIZE    20
+#define IETF_RTR_MSG_SIZE      4
+#define IETF_MPA_V2_FLAG       0x10
+#define SNDMARKER_SEQNMASK     0x000001FF
+
+#define I40IW_MAX_IETF_SIZE      32
+
+#define MPA_ZERO_PAD_LEN	4
+
+/* IETF RTR MSG Fields               */
+#define IETF_PEER_TO_PEER       0x8000
+#define IETF_FLPDU_ZERO_LEN     0x4000
+#define IETF_RDMA0_WRITE        0x8000
+#define IETF_RDMA0_READ         0x4000
+#define IETF_NO_IRD_ORD         0x3FFF
+
+/* HW-supported IRD sizes*/
+#define	I40IW_HW_IRD_SETTING_2	2
+#define	I40IW_HW_IRD_SETTING_4	4
+#define	I40IW_HW_IRD_SETTING_8	8
+#define	I40IW_HW_IRD_SETTING_16	16
+#define	I40IW_HW_IRD_SETTING_32	32
+#define	I40IW_HW_IRD_SETTING_64	64
+
+enum ietf_mpa_flags {
+	IETF_MPA_FLAGS_MARKERS = 0x80,	/* receive Markers */
+	IETF_MPA_FLAGS_CRC = 0x40,	/* receive Markers */
+	IETF_MPA_FLAGS_REJECT = 0x20,	/* Reject */
+};
+
+struct ietf_mpa_v1 {
+	u8 key[IETF_MPA_KEY_SIZE];
+	u8 flags;
+	u8 rev;
+	__be16 priv_data_len;
+	u8 priv_data[0];
+};
+
+#define ietf_mpa_req_resp_frame ietf_mpa_frame
+
+struct ietf_rtr_msg {
+	__be16 ctrl_ird;
+	__be16 ctrl_ord;
+};
+
+struct ietf_mpa_v2 {
+	u8 key[IETF_MPA_KEY_SIZE];
+	u8 flags;
+	u8 rev;
+	__be16 priv_data_len;
+	struct ietf_rtr_msg rtr_msg;
+	u8 priv_data[0];
+};
+
+struct i40iw_cm_node;
+enum i40iw_timer_type {
+	I40IW_TIMER_TYPE_SEND,
+	I40IW_TIMER_TYPE_RECV,
+	I40IW_TIMER_NODE_CLEANUP,
+	I40IW_TIMER_TYPE_CLOSE,
+};
+
+#define I40IW_PASSIVE_STATE_INDICATED    0
+#define I40IW_DO_NOT_SEND_RESET_EVENT    1
+#define I40IW_SEND_RESET_EVENT           2
+
+#define MAX_I40IW_IFS 4
+
+#define SET_ACK 0x1
+#define SET_SYN 0x2
+#define SET_FIN 0x4
+#define SET_RST 0x8
+
+#define TCP_OPTIONS_PADDING     3
+
+struct option_base {
+	u8 optionnum;
+	u8 length;
+};
+
+enum option_numbers {
+	OPTION_NUMBER_END,
+	OPTION_NUMBER_NONE,
+	OPTION_NUMBER_MSS,
+	OPTION_NUMBER_WINDOW_SCALE,
+	OPTION_NUMBER_SACK_PERM,
+	OPTION_NUMBER_SACK,
+	OPTION_NUMBER_WRITE0 = 0xbc
+};
+
+struct option_mss {
+	u8 optionnum;
+	u8 length;
+	__be16 mss;
+};
+
+struct option_windowscale {
+	u8 optionnum;
+	u8 length;
+	u8 shiftcount;
+};
+
+union all_known_options {
+	char as_end;
+	struct option_base as_base;
+	struct option_mss as_mss;
+	struct option_windowscale as_windowscale;
+};
+
+struct i40iw_timer_entry {
+	struct list_head list;
+	unsigned long timetosend;	/* jiffies */
+	struct i40iw_puda_buf *sqbuf;
+	u32 type;
+	u32 retrycount;
+	u32 retranscount;
+	u32 context;
+	u32 send_retrans;
+	int close_when_complete;
+};
+
+#define I40IW_DEFAULT_RETRYS	64
+#define I40IW_DEFAULT_RETRANS	8
+#define I40IW_DEFAULT_TTL	0x40
+#define I40IW_DEFAULT_RTT_VAR	0x6
+#define I40IW_DEFAULT_SS_THRESH 0x3FFFFFFF
+#define I40IW_DEFAULT_REXMIT_THRESH 8
+
+#define I40IW_RETRY_TIMEOUT   HZ
+#define I40IW_SHORT_TIME      10
+#define I40IW_LONG_TIME       (2 * HZ)
+#define I40IW_MAX_TIMEOUT     ((unsigned long)(12 * HZ))
+
+#define I40IW_CM_HASHTABLE_SIZE         1024
+#define I40IW_CM_TCP_TIMER_INTERVAL     3000
+#define I40IW_CM_DEFAULT_MTU            1540
+#define I40IW_CM_DEFAULT_FRAME_CNT      10
+#define I40IW_CM_THREAD_STACK_SIZE      256
+#define I40IW_CM_DEFAULT_RCV_WND        64240
+#define I40IW_CM_DEFAULT_RCV_WND_SCALED 0x3fffc
+#define I40IW_CM_DEFAULT_RCV_WND_SCALE  2
+#define I40IW_CM_DEFAULT_FREE_PKTS      0x000A
+#define I40IW_CM_FREE_PKT_LO_WATERMARK  2
+
+#define I40IW_CM_DEFAULT_MSS   536
+
+#define I40IW_CM_DEF_SEQ       0x159bf75f
+#define I40IW_CM_DEF_LOCAL_ID  0x3b47
+
+#define I40IW_CM_DEF_SEQ2      0x18ed5740
+#define I40IW_CM_DEF_LOCAL_ID2 0xb807
+#define MAX_CM_BUFFER   (I40IW_MAX_IETF_SIZE + IETF_MAX_PRIV_DATA_LEN)
+
+typedef u32 i40iw_addr_t;
+
+#define i40iw_cm_tsa_context i40iw_qp_context
+
+struct i40iw_qp;
+
+/* cm node transition states */
+enum i40iw_cm_node_state {
+	I40IW_CM_STATE_UNKNOWN,
+	I40IW_CM_STATE_INITED,
+	I40IW_CM_STATE_LISTENING,
+	I40IW_CM_STATE_SYN_RCVD,
+	I40IW_CM_STATE_SYN_SENT,
+	I40IW_CM_STATE_ONE_SIDE_ESTABLISHED,
+	I40IW_CM_STATE_ESTABLISHED,
+	I40IW_CM_STATE_ACCEPTING,
+	I40IW_CM_STATE_MPAREQ_SENT,
+	I40IW_CM_STATE_MPAREQ_RCVD,
+	I40IW_CM_STATE_MPAREJ_RCVD,
+	I40IW_CM_STATE_OFFLOADED,
+	I40IW_CM_STATE_FIN_WAIT1,
+	I40IW_CM_STATE_FIN_WAIT2,
+	I40IW_CM_STATE_CLOSE_WAIT,
+	I40IW_CM_STATE_TIME_WAIT,
+	I40IW_CM_STATE_LAST_ACK,
+	I40IW_CM_STATE_CLOSING,
+	I40IW_CM_STATE_LISTENER_DESTROYED,
+	I40IW_CM_STATE_CLOSED
+};
+
+enum mpa_frame_version {
+	IETF_MPA_V1 = 1,
+	IETF_MPA_V2 = 2
+};
+
+enum mpa_frame_key {
+	MPA_KEY_REQUEST,
+	MPA_KEY_REPLY
+};
+
+enum send_rdma0 {
+	SEND_RDMA_READ_ZERO = 1,
+	SEND_RDMA_WRITE_ZERO = 2
+};
+
+enum i40iw_tcpip_pkt_type {
+	I40IW_PKT_TYPE_UNKNOWN,
+	I40IW_PKT_TYPE_SYN,
+	I40IW_PKT_TYPE_SYNACK,
+	I40IW_PKT_TYPE_ACK,
+	I40IW_PKT_TYPE_FIN,
+	I40IW_PKT_TYPE_RST
+};
+
+/* CM context params */
+struct i40iw_cm_tcp_context {
+	u8 client;
+
+	u32 loc_seq_num;
+	u32 loc_ack_num;
+	u32 rem_ack_num;
+	u32 rcv_nxt;
+
+	u32 loc_id;
+	u32 rem_id;
+
+	u32 snd_wnd;
+	u32 max_snd_wnd;
+
+	u32 rcv_wnd;
+	u32 mss;
+	u8 snd_wscale;
+	u8 rcv_wscale;
+
+	struct timeval sent_ts;
+};
+
+enum i40iw_cm_listener_state {
+	I40IW_CM_LISTENER_PASSIVE_STATE = 1,
+	I40IW_CM_LISTENER_ACTIVE_STATE = 2,
+	I40IW_CM_LISTENER_EITHER_STATE = 3
+};
+
+struct i40iw_cm_listener {
+	struct list_head list;
+	struct i40iw_cm_core *cm_core;
+	u8 loc_mac[ETH_ALEN];
+	u32 loc_addr[4];
+	u16 loc_port;
+	u32 map_loc_addr[4];
+	u16 map_loc_port;
+	struct iw_cm_id *cm_id;
+	atomic_t ref_count;
+	struct i40iw_device *iwdev;
+	atomic_t pend_accepts_cnt;
+	int backlog;
+	enum i40iw_cm_listener_state listener_state;
+	u32 reused_node;
+	u8 user_pri;
+	u16 vlan_id;
+	bool qhash_set;
+	bool ipv4;
+	struct list_head child_listen_list;
+
+};
+
+struct i40iw_kmem_info {
+	void *addr;
+	u32 size;
+};
+
+/* per connection node and node state information */
+struct i40iw_cm_node {
+	u32 loc_addr[4], rem_addr[4];
+	u16 loc_port, rem_port;
+	u32 map_loc_addr[4], map_rem_addr[4];
+	u16 map_loc_port, map_rem_port;
+	u16 vlan_id;
+	enum i40iw_cm_node_state state;
+	u8 loc_mac[ETH_ALEN];
+	u8 rem_mac[ETH_ALEN];
+	atomic_t ref_count;
+	struct i40iw_qp *iwqp;
+	struct i40iw_device *iwdev;
+	struct i40iw_sc_dev *dev;
+	struct i40iw_cm_tcp_context tcp_cntxt;
+	struct i40iw_cm_core *cm_core;
+	struct i40iw_cm_node *loopbackpartner;
+	struct i40iw_timer_entry *send_entry;
+	struct i40iw_timer_entry *close_entry;
+	spinlock_t retrans_list_lock; /* cm transmit packet */
+	enum send_rdma0 send_rdma0_op;
+	u16 ird_size;
+	u16 ord_size;
+	u16     mpav2_ird_ord;
+	struct iw_cm_id *cm_id;
+	struct list_head list;
+	int accelerated;
+	struct i40iw_cm_listener *listener;
+	int apbvt_set;
+	int accept_pend;
+	struct list_head timer_entry;
+	struct list_head reset_entry;
+	atomic_t passive_state;
+	bool qhash_set;
+	u8 user_pri;
+	bool ipv4;
+	bool snd_mark_en;
+	u16 lsmm_size;
+	enum mpa_frame_version mpa_frame_rev;
+	struct i40iw_kmem_info pdata;
+	union {
+		struct ietf_mpa_v1 mpa_frame;
+		struct ietf_mpa_v2 mpa_v2_frame;
+	};
+
+	u8 pdata_buf[IETF_MAX_PRIV_DATA_LEN];
+	struct i40iw_kmem_info mpa_hdr;
+};
+
+/* structure for client or CM to fill when making CM api calls. */
+/*	- only need to set relevant data, based on op. */
+struct i40iw_cm_info {
+	struct iw_cm_id *cm_id;
+	u16 loc_port;
+	u16 rem_port;
+	u32 loc_addr[4];
+	u32 rem_addr[4];
+	u16 map_loc_port;
+	u16 map_rem_port;
+	u32 map_loc_addr[4];
+	u32 map_rem_addr[4];
+	u16 vlan_id;
+	int backlog;
+	u16 user_pri;
+	bool ipv4;
+};
+
+/* CM event codes */
+enum i40iw_cm_event_type {
+	I40IW_CM_EVENT_UNKNOWN,
+	I40IW_CM_EVENT_ESTABLISHED,
+	I40IW_CM_EVENT_MPA_REQ,
+	I40IW_CM_EVENT_MPA_CONNECT,
+	I40IW_CM_EVENT_MPA_ACCEPT,
+	I40IW_CM_EVENT_MPA_REJECT,
+	I40IW_CM_EVENT_MPA_ESTABLISHED,
+	I40IW_CM_EVENT_CONNECTED,
+	I40IW_CM_EVENT_RESET,
+	I40IW_CM_EVENT_ABORTED
+};
+
+/* event to post to CM event handler */
+struct i40iw_cm_event {
+	enum i40iw_cm_event_type type;
+	struct i40iw_cm_info cm_info;
+	struct work_struct event_work;
+	struct i40iw_cm_node *cm_node;
+};
+
+struct i40iw_cm_core {
+	struct i40iw_device *iwdev;
+	struct i40iw_sc_dev *dev;
+
+	struct list_head listen_nodes;
+	struct list_head connected_nodes;
+
+	struct timer_list tcp_timer;
+
+	struct workqueue_struct *event_wq;
+	struct workqueue_struct *disconn_wq;
+
+	spinlock_t ht_lock; /* manage hash table */
+	spinlock_t listen_list_lock; /* listen list */
+
+	u64	stats_nodes_created;
+	u64	stats_nodes_destroyed;
+	u64	stats_listen_created;
+	u64	stats_listen_destroyed;
+	u64	stats_listen_nodes_created;
+	u64	stats_listen_nodes_destroyed;
+	u64	stats_loopbacks;
+	u64	stats_accepts;
+	u64	stats_rejects;
+	u64	stats_connect_errs;
+	u64	stats_passive_errs;
+	u64	stats_pkt_retrans;
+	u64	stats_backlog_drops;
+};
+
+int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node,
+			    struct i40iw_puda_buf *sqbuf,
+			    enum i40iw_timer_type type,
+			    int send_retrans,
+			    int close_when_complete);
+
+int i40iw_accept(struct iw_cm_id *, struct iw_cm_conn_param *);
+int i40iw_reject(struct iw_cm_id *, const void *, u8);
+int i40iw_connect(struct iw_cm_id *, struct iw_cm_conn_param *);
+int i40iw_create_listen(struct iw_cm_id *, int);
+int i40iw_destroy_listen(struct iw_cm_id *);
+
+int i40iw_cm_start(struct i40iw_device *);
+int i40iw_cm_stop(struct i40iw_device *);
+
+int i40iw_arp_table(struct i40iw_device *iwdev,
+		    u32 *ip_addr,
+		    bool ipv4,
+		    u8 *mac_addr,
+		    u32 action);
+
+#endif /* I40IW_CM_H */

diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
new file mode 100644
index 0000000..f05802b
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c

@@ -0,0 +1,4743 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_osdep.h"
+#include "i40iw_register.h"
+#include "i40iw_status.h"
+#include "i40iw_hmc.h"
+
+#include "i40iw_d.h"
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+#include "i40iw_vf.h"
+#include "i40iw_virtchnl.h"
+
+/**
+ * i40iw_insert_wqe_hdr - write wqe header
+ * @wqe: cqp wqe for header
+ * @header: header for the cqp wqe
+ */
+static inline void i40iw_insert_wqe_hdr(u64 *wqe, u64 header)
+{
+	wmb();            /* make sure WQE is populated before polarity is set */
+	set_64bit_val(wqe, 24, header);
+}
+
+/**
+ * i40iw_get_cqp_reg_info - get head and tail for cqp using registers
+ * @cqp: struct for cqp hw
+ * @val: cqp tail register value
+ * @tail:wqtail register value
+ * @error: cqp processing err
+ */
+static inline void i40iw_get_cqp_reg_info(struct i40iw_sc_cqp *cqp,
+					  u32 *val,
+					  u32 *tail,
+					  u32 *error)
+{
+	if (cqp->dev->is_pf) {
+		*val = i40iw_rd32(cqp->dev->hw, I40E_PFPE_CQPTAIL);
+		*tail = RS_32(*val, I40E_PFPE_CQPTAIL_WQTAIL);
+		*error = RS_32(*val, I40E_PFPE_CQPTAIL_CQP_OP_ERR);
+	} else {
+		*val = i40iw_rd32(cqp->dev->hw, I40E_VFPE_CQPTAIL1);
+		*tail = RS_32(*val, I40E_VFPE_CQPTAIL_WQTAIL);
+		*error = RS_32(*val, I40E_VFPE_CQPTAIL_CQP_OP_ERR);
+	}
+}
+
+/**
+ * i40iw_cqp_poll_registers - poll cqp registers
+ * @cqp: struct for cqp hw
+ * @tail:wqtail register value
+ * @count: how many times to try for completion
+ */
+static enum i40iw_status_code i40iw_cqp_poll_registers(
+						struct i40iw_sc_cqp *cqp,
+						u32 tail,
+						u32 count)
+{
+	u32 i = 0;
+	u32 newtail, error, val;
+
+	while (i < count) {
+		i++;
+		i40iw_get_cqp_reg_info(cqp, &val, &newtail, &error);
+		if (error) {
+			error = (cqp->dev->is_pf) ?
+				 i40iw_rd32(cqp->dev->hw, I40E_PFPE_CQPERRCODES) :
+				 i40iw_rd32(cqp->dev->hw, I40E_VFPE_CQPERRCODES1);
+			return I40IW_ERR_CQP_COMPL_ERROR;
+		}
+		if (newtail != tail) {
+			/* SUCCESS */
+			I40IW_RING_MOVE_TAIL(cqp->sq_ring);
+			return 0;
+		}
+		udelay(I40IW_SLEEP_COUNT);
+	}
+	return I40IW_ERR_TIMEOUT;
+}
+
+/**
+ * i40iw_sc_parse_fpm_commit_buf - parse fpm commit buffer
+ * @buf: ptr to fpm commit buffer
+ * @info: ptr to i40iw_hmc_obj_info struct
+ *
+ * parses fpm commit info and copy base value
+ * of hmc objects in hmc_info
+ */
+static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf(
+				u64 *buf,
+				struct i40iw_hmc_obj_info *info)
+{
+	u64 temp;
+	u32 i, j;
+	u32 low;
+
+	/* copy base values in obj_info */
+	for (i = I40IW_HMC_IW_QP, j = 0;
+			i <= I40IW_HMC_IW_PBLE; i++, j += 8) {
+		get_64bit_val(buf, j, &temp);
+		info[i].base = RS_64_1(temp, 32) * 512;
+		low = (u32)(temp);
+		if (low)
+			info[i].cnt = low;
+	}
+	return 0;
+}
+
+/**
+ * i40iw_sc_parse_fpm_query_buf() - parses fpm query buffer
+ * @buf: ptr to fpm query buffer
+ * @info: ptr to i40iw_hmc_obj_info struct
+ * @hmc_fpm_misc: ptr to fpm data
+ *
+ * parses fpm query buffer and copy max_cnt and
+ * size value of hmc objects in hmc_info
+ */
+static enum i40iw_status_code i40iw_sc_parse_fpm_query_buf(
+				u64 *buf,
+				struct i40iw_hmc_info *hmc_info,
+				struct i40iw_hmc_fpm_misc *hmc_fpm_misc)
+{
+	u64 temp;
+	struct i40iw_hmc_obj_info *obj_info;
+	u32 i, j, size;
+	u16 max_pe_sds;
+
+	obj_info = hmc_info->hmc_obj;
+
+	get_64bit_val(buf, 0, &temp);
+	hmc_info->first_sd_index = (u16)RS_64(temp, I40IW_QUERY_FPM_FIRST_PE_SD_INDEX);
+	max_pe_sds = (u16)RS_64(temp, I40IW_QUERY_FPM_MAX_PE_SDS);
+
+	/* Reduce SD count for VFs by 1 to account for PBLE backing page rounding */
+	if (hmc_info->hmc_fn_id >= I40IW_FIRST_VF_FPM_ID)
+		max_pe_sds--;
+	hmc_fpm_misc->max_sds = max_pe_sds;
+	hmc_info->sd_table.sd_cnt = max_pe_sds + hmc_info->first_sd_index;
+
+	for (i = I40IW_HMC_IW_QP, j = 8;
+	     i <= I40IW_HMC_IW_ARP; i++, j += 8) {
+		get_64bit_val(buf, j, &temp);
+		if (i == I40IW_HMC_IW_QP)
+			obj_info[i].max_cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_QPS);
+		else if (i == I40IW_HMC_IW_CQ)
+			obj_info[i].max_cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_CQS);
+		else
+			obj_info[i].max_cnt = (u32)temp;
+
+		size = (u32)RS_64_1(temp, 32);
+		obj_info[i].size = ((u64)1 << size);
+	}
+	for (i = I40IW_HMC_IW_MR, j = 48;
+			i <= I40IW_HMC_IW_PBLE; i++, j += 8) {
+		get_64bit_val(buf, j, &temp);
+		obj_info[i].max_cnt = (u32)temp;
+		size = (u32)RS_64_1(temp, 32);
+		obj_info[i].size = LS_64_1(1, size);
+	}
+
+	get_64bit_val(buf, 120, &temp);
+	hmc_fpm_misc->max_ceqs = (u8)RS_64(temp, I40IW_QUERY_FPM_MAX_CEQS);
+	get_64bit_val(buf, 120, &temp);
+	hmc_fpm_misc->ht_multiplier = RS_64(temp, I40IW_QUERY_FPM_HTMULTIPLIER);
+	get_64bit_val(buf, 120, &temp);
+	hmc_fpm_misc->timer_bucket = RS_64(temp, I40IW_QUERY_FPM_TIMERBUCKET);
+	get_64bit_val(buf, 64, &temp);
+	hmc_fpm_misc->xf_block_size = RS_64(temp, I40IW_QUERY_FPM_XFBLOCKSIZE);
+	if (!hmc_fpm_misc->xf_block_size)
+		return I40IW_ERR_INVALID_SIZE;
+	get_64bit_val(buf, 80, &temp);
+	hmc_fpm_misc->q1_block_size = RS_64(temp, I40IW_QUERY_FPM_Q1BLOCKSIZE);
+	if (!hmc_fpm_misc->q1_block_size)
+		return I40IW_ERR_INVALID_SIZE;
+	return 0;
+}
+
+/**
+ * i40iw_sc_pd_init - initialize sc pd struct
+ * @dev: sc device struct
+ * @pd: sc pd ptr
+ * @pd_id: pd_id for allocated pd
+ */
+static void i40iw_sc_pd_init(struct i40iw_sc_dev *dev,
+			     struct i40iw_sc_pd *pd,
+			     u16 pd_id)
+{
+	pd->size = sizeof(*pd);
+	pd->pd_id = pd_id;
+	pd->dev = dev;
+}
+
+/**
+ * i40iw_get_encoded_wqe_size - given wq size, returns hardware encoded size
+ * @wqsize: size of the wq (sq, rq, srq) to encoded_size
+ * @cqpsq: encoded size for sq for cqp as its encoded size is 1+ other wq's
+ */
+u8 i40iw_get_encoded_wqe_size(u32 wqsize, bool cqpsq)
+{
+	u8 encoded_size = 0;
+
+	/* cqp sq's hw coded value starts from 1 for size of 4
+	 * while it starts from 0 for qp' wq's.
+	 */
+	if (cqpsq)
+		encoded_size = 1;
+	wqsize >>= 2;
+	while (wqsize >>= 1)
+		encoded_size++;
+	return encoded_size;
+}
+
+/**
+ * i40iw_sc_cqp_init - Initialize buffers for a control Queue Pair
+ * @cqp: IWARP control queue pair pointer
+ * @info: IWARP control queue pair init info pointer
+ *
+ * Initializes the object and context buffers for a control Queue Pair.
+ */
+static enum i40iw_status_code i40iw_sc_cqp_init(struct i40iw_sc_cqp *cqp,
+						struct i40iw_cqp_init_info *info)
+{
+	u8 hw_sq_size;
+
+	if ((info->sq_size > I40IW_CQP_SW_SQSIZE_2048) ||
+	    (info->sq_size < I40IW_CQP_SW_SQSIZE_4) ||
+	    ((info->sq_size & (info->sq_size - 1))))
+		return I40IW_ERR_INVALID_SIZE;
+
+	hw_sq_size = i40iw_get_encoded_wqe_size(info->sq_size, true);
+	cqp->size = sizeof(*cqp);
+	cqp->sq_size = info->sq_size;
+	cqp->hw_sq_size = hw_sq_size;
+	cqp->sq_base = info->sq;
+	cqp->host_ctx = info->host_ctx;
+	cqp->sq_pa = info->sq_pa;
+	cqp->host_ctx_pa = info->host_ctx_pa;
+	cqp->dev = info->dev;
+	cqp->struct_ver = info->struct_ver;
+	cqp->scratch_array = info->scratch_array;
+	cqp->polarity = 0;
+	cqp->en_datacenter_tcp = info->en_datacenter_tcp;
+	cqp->enabled_vf_count = info->enabled_vf_count;
+	cqp->hmc_profile = info->hmc_profile;
+	info->dev->cqp = cqp;
+
+	I40IW_RING_INIT(cqp->sq_ring, cqp->sq_size);
+	i40iw_debug(cqp->dev, I40IW_DEBUG_WQE,
+		    "%s: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%llxh] cqp[%p] polarity[x%04X]\n",
+		    __func__, cqp->sq_size, cqp->hw_sq_size,
+		    cqp->sq_base, cqp->sq_pa, cqp, cqp->polarity);
+	return 0;
+}
+
+/**
+ * i40iw_sc_cqp_create - create cqp during bringup
+ * @cqp: struct for cqp hw
+ * @disable_pfpdus: if pfpdu to be disabled
+ * @maj_err: If error, major err number
+ * @min_err: If error, minor err number
+ */
+static enum i40iw_status_code i40iw_sc_cqp_create(struct i40iw_sc_cqp *cqp,
+						  bool disable_pfpdus,
+						  u16 *maj_err,
+						  u16 *min_err)
+{
+	u64 temp;
+	u32 cnt = 0, p1, p2, val = 0, err_code;
+	enum i40iw_status_code ret_code;
+
+	ret_code = i40iw_allocate_dma_mem(cqp->dev->hw,
+					  &cqp->sdbuf,
+					  128,
+					  I40IW_SD_BUF_ALIGNMENT);
+
+	if (ret_code)
+		goto exit;
+
+	temp = LS_64(cqp->hw_sq_size, I40IW_CQPHC_SQSIZE) |
+	       LS_64(cqp->struct_ver, I40IW_CQPHC_SVER);
+
+	if (disable_pfpdus)
+		temp |= LS_64(1, I40IW_CQPHC_DISABLE_PFPDUS);
+
+	set_64bit_val(cqp->host_ctx, 0, temp);
+	set_64bit_val(cqp->host_ctx, 8, cqp->sq_pa);
+	temp = LS_64(cqp->enabled_vf_count, I40IW_CQPHC_ENABLED_VFS) |
+	       LS_64(cqp->hmc_profile, I40IW_CQPHC_HMC_PROFILE);
+	set_64bit_val(cqp->host_ctx, 16, temp);
+	set_64bit_val(cqp->host_ctx, 24, (uintptr_t)cqp);
+	set_64bit_val(cqp->host_ctx, 32, 0);
+	set_64bit_val(cqp->host_ctx, 40, 0);
+	set_64bit_val(cqp->host_ctx, 48, 0);
+	set_64bit_val(cqp->host_ctx, 56, 0);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CQP_HOST_CTX",
+			cqp->host_ctx, I40IW_CQP_CTX_SIZE * 8);
+
+	p1 = RS_32_1(cqp->host_ctx_pa, 32);
+	p2 = (u32)cqp->host_ctx_pa;
+
+	if (cqp->dev->is_pf) {
+		i40iw_wr32(cqp->dev->hw, I40E_PFPE_CCQPHIGH, p1);
+		i40iw_wr32(cqp->dev->hw, I40E_PFPE_CCQPLOW, p2);
+	} else {
+		i40iw_wr32(cqp->dev->hw, I40E_VFPE_CCQPHIGH1, p1);
+		i40iw_wr32(cqp->dev->hw, I40E_VFPE_CCQPLOW1, p2);
+	}
+	do {
+		if (cnt++ > I40IW_DONE_COUNT) {
+			i40iw_free_dma_mem(cqp->dev->hw, &cqp->sdbuf);
+			ret_code = I40IW_ERR_TIMEOUT;
+			/*
+			 * read PFPE_CQPERRORCODES register to get the minor
+			 * and major error code
+			 */
+			if (cqp->dev->is_pf)
+				err_code = i40iw_rd32(cqp->dev->hw, I40E_PFPE_CQPERRCODES);
+			else
+				err_code = i40iw_rd32(cqp->dev->hw, I40E_VFPE_CQPERRCODES1);
+			*min_err = RS_32(err_code, I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE);
+			*maj_err = RS_32(err_code, I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE);
+			goto exit;
+		}
+		udelay(I40IW_SLEEP_COUNT);
+		if (cqp->dev->is_pf)
+			val = i40iw_rd32(cqp->dev->hw, I40E_PFPE_CCQPSTATUS);
+		else
+			val = i40iw_rd32(cqp->dev->hw, I40E_VFPE_CCQPSTATUS1);
+	} while (!val);
+
+exit:
+	if (!ret_code)
+		cqp->process_cqp_sds = i40iw_update_sds_noccq;
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_cqp_post_sq - post of cqp's sq
+ * @cqp: struct for cqp hw
+ */
+void i40iw_sc_cqp_post_sq(struct i40iw_sc_cqp *cqp)
+{
+	if (cqp->dev->is_pf)
+		i40iw_wr32(cqp->dev->hw, I40E_PFPE_CQPDB, I40IW_RING_GETCURRENT_HEAD(cqp->sq_ring));
+	else
+		i40iw_wr32(cqp->dev->hw, I40E_VFPE_CQPDB1, I40IW_RING_GETCURRENT_HEAD(cqp->sq_ring));
+
+	i40iw_debug(cqp->dev,
+		    I40IW_DEBUG_WQE,
+		    "%s: HEAD_TAIL[%04d,%04d,%04d]\n",
+		    __func__,
+		    cqp->sq_ring.head,
+		    cqp->sq_ring.tail,
+		    cqp->sq_ring.size);
+}
+
+/**
+ * i40iw_sc_cqp_get_next_send_wqe - get next wqe on cqp sq
+ * @cqp: struct for cqp hw
+ * @wqe_idx: we index of cqp ring
+ */
+u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch)
+{
+	u64 *wqe = NULL;
+	u32	wqe_idx;
+	enum i40iw_status_code ret_code;
+
+	if (I40IW_RING_FULL_ERR(cqp->sq_ring)) {
+		i40iw_debug(cqp->dev,
+			    I40IW_DEBUG_WQE,
+			    "%s: ring is full head %x tail %x size %x\n",
+			    __func__,
+			    cqp->sq_ring.head,
+			    cqp->sq_ring.tail,
+			    cqp->sq_ring.size);
+		return NULL;
+	}
+	I40IW_ATOMIC_RING_MOVE_HEAD(cqp->sq_ring, wqe_idx, ret_code);
+	if (ret_code)
+		return NULL;
+	if (!wqe_idx)
+		cqp->polarity = !cqp->polarity;
+
+	wqe = cqp->sq_base[wqe_idx].elem;
+	cqp->scratch_array[wqe_idx] = scratch;
+	I40IW_CQP_INIT_WQE(wqe);
+
+	return wqe;
+}
+
+/**
+ * i40iw_sc_cqp_destroy - destroy cqp during close
+ * @cqp: struct for cqp hw
+ */
+static enum i40iw_status_code i40iw_sc_cqp_destroy(struct i40iw_sc_cqp *cqp)
+{
+	u32 cnt = 0, val = 1;
+	enum i40iw_status_code ret_code = 0;
+	u32 cqpstat_addr;
+
+	if (cqp->dev->is_pf) {
+		i40iw_wr32(cqp->dev->hw, I40E_PFPE_CCQPHIGH, 0);
+		i40iw_wr32(cqp->dev->hw, I40E_PFPE_CCQPLOW, 0);
+		cqpstat_addr = I40E_PFPE_CCQPSTATUS;
+	} else {
+		i40iw_wr32(cqp->dev->hw, I40E_VFPE_CCQPHIGH1, 0);
+		i40iw_wr32(cqp->dev->hw, I40E_VFPE_CCQPLOW1, 0);
+		cqpstat_addr = I40E_VFPE_CCQPSTATUS1;
+	}
+	do {
+		if (cnt++ > I40IW_DONE_COUNT) {
+			ret_code = I40IW_ERR_TIMEOUT;
+			break;
+		}
+		udelay(I40IW_SLEEP_COUNT);
+		val = i40iw_rd32(cqp->dev->hw, cqpstat_addr);
+	} while (val);
+
+	i40iw_free_dma_mem(cqp->dev->hw, &cqp->sdbuf);
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_ccq_arm - enable intr for control cq
+ * @ccq: ccq sc struct
+ */
+static void i40iw_sc_ccq_arm(struct i40iw_sc_cq *ccq)
+{
+	u64 temp_val;
+	u16 sw_cq_sel;
+	u8 arm_next_se;
+	u8 arm_seq_num;
+
+	/* write to cq doorbell shadow area */
+	/* arm next se should always be zero */
+	get_64bit_val(ccq->cq_uk.shadow_area, 32, &temp_val);
+
+	sw_cq_sel = (u16)RS_64(temp_val, I40IW_CQ_DBSA_SW_CQ_SELECT);
+	arm_next_se = (u8)RS_64(temp_val, I40IW_CQ_DBSA_ARM_NEXT_SE);
+
+	arm_seq_num = (u8)RS_64(temp_val, I40IW_CQ_DBSA_ARM_SEQ_NUM);
+	arm_seq_num++;
+
+	temp_val = LS_64(arm_seq_num, I40IW_CQ_DBSA_ARM_SEQ_NUM) |
+		   LS_64(sw_cq_sel, I40IW_CQ_DBSA_SW_CQ_SELECT) |
+		   LS_64(arm_next_se, I40IW_CQ_DBSA_ARM_NEXT_SE) |
+		   LS_64(1, I40IW_CQ_DBSA_ARM_NEXT);
+
+	set_64bit_val(ccq->cq_uk.shadow_area, 32, temp_val);
+
+	wmb();       /* make sure shadow area is updated before arming */
+
+	if (ccq->dev->is_pf)
+		i40iw_wr32(ccq->dev->hw, I40E_PFPE_CQARM, ccq->cq_uk.cq_id);
+	else
+		i40iw_wr32(ccq->dev->hw, I40E_VFPE_CQARM1, ccq->cq_uk.cq_id);
+}
+
+/**
+ * i40iw_sc_ccq_get_cqe_info - get ccq's cq entry
+ * @ccq: ccq sc struct
+ * @info: completion q entry to return
+ */
+static enum i40iw_status_code i40iw_sc_ccq_get_cqe_info(
+					struct i40iw_sc_cq *ccq,
+					struct i40iw_ccq_cqe_info *info)
+{
+	u64 qp_ctx, temp, temp1;
+	u64 *cqe;
+	struct i40iw_sc_cqp *cqp;
+	u32 wqe_idx;
+	u8 polarity;
+	enum i40iw_status_code ret_code = 0;
+
+	if (ccq->cq_uk.avoid_mem_cflct)
+		cqe = (u64 *)I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(&ccq->cq_uk);
+	else
+		cqe = (u64 *)I40IW_GET_CURRENT_CQ_ELEMENT(&ccq->cq_uk);
+
+	get_64bit_val(cqe, 24, &temp);
+	polarity = (u8)RS_64(temp, I40IW_CQ_VALID);
+	if (polarity != ccq->cq_uk.polarity)
+		return I40IW_ERR_QUEUE_EMPTY;
+
+	get_64bit_val(cqe, 8, &qp_ctx);
+	cqp = (struct i40iw_sc_cqp *)(unsigned long)qp_ctx;
+	info->error = (bool)RS_64(temp, I40IW_CQ_ERROR);
+	info->min_err_code = (u16)RS_64(temp, I40IW_CQ_MINERR);
+	if (info->error) {
+		info->maj_err_code = (u16)RS_64(temp, I40IW_CQ_MAJERR);
+		info->min_err_code = (u16)RS_64(temp, I40IW_CQ_MINERR);
+	}
+	wqe_idx = (u32)RS_64(temp, I40IW_CQ_WQEIDX);
+	info->scratch = cqp->scratch_array[wqe_idx];
+
+	get_64bit_val(cqe, 16, &temp1);
+	info->op_ret_val = (u32)RS_64(temp1, I40IW_CCQ_OPRETVAL);
+	get_64bit_val(cqp->sq_base[wqe_idx].elem, 24, &temp1);
+	info->op_code = (u8)RS_64(temp1, I40IW_CQPSQ_OPCODE);
+	info->cqp = cqp;
+
+	/*  move the head for cq */
+	I40IW_RING_MOVE_HEAD(ccq->cq_uk.cq_ring, ret_code);
+	if (I40IW_RING_GETCURRENT_HEAD(ccq->cq_uk.cq_ring) == 0)
+		ccq->cq_uk.polarity ^= 1;
+
+	/* update cq tail in cq shadow memory also */
+	I40IW_RING_MOVE_TAIL(ccq->cq_uk.cq_ring);
+	set_64bit_val(ccq->cq_uk.shadow_area,
+		      0,
+		      I40IW_RING_GETCURRENT_HEAD(ccq->cq_uk.cq_ring));
+	wmb(); /* write shadow area before tail */
+	I40IW_RING_MOVE_TAIL(cqp->sq_ring);
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_poll_for_cqp_op_done - Waits for last write to complete in CQP SQ
+ * @cqp: struct for cqp hw
+ * @op_code: cqp opcode for completion
+ * @info: completion q entry to return
+ */
+static enum i40iw_status_code i40iw_sc_poll_for_cqp_op_done(
+					struct i40iw_sc_cqp *cqp,
+					u8 op_code,
+					struct i40iw_ccq_cqe_info *compl_info)
+{
+	struct i40iw_ccq_cqe_info info;
+	struct i40iw_sc_cq *ccq;
+	enum i40iw_status_code ret_code = 0;
+	u32 cnt = 0;
+
+	memset(&info, 0, sizeof(info));
+	ccq = cqp->dev->ccq;
+	while (1) {
+		if (cnt++ > I40IW_DONE_COUNT)
+			return I40IW_ERR_TIMEOUT;
+
+		if (i40iw_sc_ccq_get_cqe_info(ccq, &info)) {
+			udelay(I40IW_SLEEP_COUNT);
+			continue;
+		}
+
+		if (info.error) {
+			ret_code = I40IW_ERR_CQP_COMPL_ERROR;
+			break;
+		}
+		/* check if opcode is cq create */
+		if (op_code != info.op_code) {
+			i40iw_debug(cqp->dev, I40IW_DEBUG_WQE,
+				    "%s: opcode mismatch for my op code 0x%x, returned opcode %x\n",
+				    __func__, op_code, info.op_code);
+		}
+		/* success, exit out of the loop */
+		if (op_code == info.op_code)
+			break;
+	}
+
+	if (compl_info)
+		memcpy(compl_info, &info, sizeof(*compl_info));
+
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_manage_push_page - Handle push page
+ * @cqp: struct for cqp hw
+ * @info: push page info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_manage_push_page(
+				struct i40iw_sc_cqp *cqp,
+				struct i40iw_cqp_manage_push_page_info *info,
+				u64 scratch,
+				bool post_sq)
+{
+	u64 *wqe;
+	u64 header;
+
+	if (info->push_idx >= I40IW_MAX_PUSH_PAGE_COUNT)
+		return I40IW_ERR_INVALID_PUSH_PAGE_INDEX;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	set_64bit_val(wqe, 16, info->qs_handle);
+
+	header = LS_64(info->push_idx, I40IW_CQPSQ_MPP_PPIDX) |
+		 LS_64(I40IW_CQP_OP_MANAGE_PUSH_PAGES, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) |
+		 LS_64(info->free_page, I40IW_CQPSQ_MPP_FREE_PAGE);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_PUSH_PAGES WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_manage_hmc_pm_func_table - manage of function table
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @vf_index: vf index for cqp
+ * @free_pm_fcn: function number
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_manage_hmc_pm_func_table(
+				struct i40iw_sc_cqp *cqp,
+				u64 scratch,
+				u8 vf_index,
+				bool free_pm_fcn,
+				bool post_sq)
+{
+	u64 *wqe;
+	u64 header;
+
+	if (vf_index >= I40IW_MAX_VF_PER_PF)
+		return I40IW_ERR_INVALID_VF_ID;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	header = LS_64(vf_index, I40IW_CQPSQ_MHMC_VFIDX) |
+		 LS_64(I40IW_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE, I40IW_CQPSQ_OPCODE) |
+		 LS_64(free_pm_fcn, I40IW_CQPSQ_MHMC_FREEPMFN) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_HMC_PM_FUNC_TABLE WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_set_hmc_resource_profile - cqp wqe for hmc profile
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @hmc_profile_type: type of profile to set
+ * @vf_num: vf number for profile
+ * @post_sq: flag for cqp db to ring
+ * @poll_registers: flag to poll register for cqp completion
+ */
+static enum i40iw_status_code i40iw_sc_set_hmc_resource_profile(
+				struct i40iw_sc_cqp *cqp,
+				u64 scratch,
+				u8 hmc_profile_type,
+				u8 vf_num, bool post_sq,
+				bool poll_registers)
+{
+	u64 *wqe;
+	u64 header;
+	u32 val, tail, error;
+	enum i40iw_status_code ret_code = 0;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	set_64bit_val(wqe, 16,
+		      (LS_64(hmc_profile_type, I40IW_CQPSQ_SHMCRP_HMC_PROFILE) |
+				LS_64(vf_num, I40IW_CQPSQ_SHMCRP_VFNUM)));
+
+	header = LS_64(I40IW_CQP_OP_SET_HMC_RESOURCE_PROFILE, I40IW_CQPSQ_OPCODE) |
+		       LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_HMC_PM_FUNC_TABLE WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
+	if (error)
+		return I40IW_ERR_CQP_COMPL_ERROR;
+
+	if (post_sq) {
+		i40iw_sc_cqp_post_sq(cqp);
+		if (poll_registers)
+			ret_code = i40iw_cqp_poll_registers(cqp, tail, 1000000);
+		else
+			ret_code = i40iw_sc_poll_for_cqp_op_done(cqp,
+								 I40IW_CQP_OP_SHMC_PAGES_ALLOCATED,
+								 NULL);
+	}
+
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_manage_hmc_pm_func_table_done - wait for cqp wqe completion for function table
+ * @cqp: struct for cqp hw
+ */
+static enum i40iw_status_code i40iw_sc_manage_hmc_pm_func_table_done(struct i40iw_sc_cqp *cqp)
+{
+	return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE, NULL);
+}
+
+/**
+ * i40iw_sc_commit_fpm_values_done - wait for cqp eqe completion for fpm commit
+ * @cqp: struct for cqp hw
+ */
+static enum i40iw_status_code i40iw_sc_commit_fpm_values_done(struct i40iw_sc_cqp *cqp)
+{
+	return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_COMMIT_FPM_VALUES, NULL);
+}
+
+/**
+ * i40iw_sc_commit_fpm_values - cqp wqe for commit fpm values
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @hmc_fn_id: hmc function id
+ * @commit_fpm_mem; Memory for fpm values
+ * @post_sq: flag for cqp db to ring
+ * @wait_type: poll ccq or cqp registers for cqp completion
+ */
+static enum i40iw_status_code i40iw_sc_commit_fpm_values(
+					struct i40iw_sc_cqp *cqp,
+					u64 scratch,
+					u8 hmc_fn_id,
+					struct i40iw_dma_mem *commit_fpm_mem,
+					bool post_sq,
+					u8 wait_type)
+{
+	u64 *wqe;
+	u64 header;
+	u32 tail, val, error;
+	enum i40iw_status_code ret_code = 0;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	set_64bit_val(wqe, 16, hmc_fn_id);
+	set_64bit_val(wqe, 32, commit_fpm_mem->pa);
+
+	header = LS_64(I40IW_CQP_OP_COMMIT_FPM_VALUES, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "COMMIT_FPM_VALUES WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
+	if (error)
+		return I40IW_ERR_CQP_COMPL_ERROR;
+
+	if (post_sq) {
+		i40iw_sc_cqp_post_sq(cqp);
+
+		if (wait_type == I40IW_CQP_WAIT_POLL_REGS)
+			ret_code = i40iw_cqp_poll_registers(cqp, tail, I40IW_DONE_COUNT);
+		else if (wait_type == I40IW_CQP_WAIT_POLL_CQ)
+			ret_code = i40iw_sc_commit_fpm_values_done(cqp);
+	}
+
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_query_fpm_values_done - poll for cqp wqe completion for query fpm
+ * @cqp: struct for cqp hw
+ */
+static enum i40iw_status_code i40iw_sc_query_fpm_values_done(struct i40iw_sc_cqp *cqp)
+{
+	return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_QUERY_FPM_VALUES, NULL);
+}
+
+/**
+ * i40iw_sc_query_fpm_values - cqp wqe query fpm values
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @hmc_fn_id: hmc function id
+ * @query_fpm_mem: memory for return fpm values
+ * @post_sq: flag for cqp db to ring
+ * @wait_type: poll ccq or cqp registers for cqp completion
+ */
+static enum i40iw_status_code i40iw_sc_query_fpm_values(
+					struct i40iw_sc_cqp *cqp,
+					u64 scratch,
+					u8 hmc_fn_id,
+					struct i40iw_dma_mem *query_fpm_mem,
+					bool post_sq,
+					u8 wait_type)
+{
+	u64 *wqe;
+	u64 header;
+	u32 tail, val, error;
+	enum i40iw_status_code ret_code = 0;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	set_64bit_val(wqe, 16, hmc_fn_id);
+	set_64bit_val(wqe, 32, query_fpm_mem->pa);
+
+	header = LS_64(I40IW_CQP_OP_QUERY_FPM_VALUES, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QUERY_FPM WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	/* read the tail from CQP_TAIL register */
+	i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
+
+	if (error)
+		return I40IW_ERR_CQP_COMPL_ERROR;
+
+	if (post_sq) {
+		i40iw_sc_cqp_post_sq(cqp);
+		if (wait_type == I40IW_CQP_WAIT_POLL_REGS)
+			ret_code = i40iw_cqp_poll_registers(cqp, tail, I40IW_DONE_COUNT);
+		else if (wait_type == I40IW_CQP_WAIT_POLL_CQ)
+			ret_code = i40iw_sc_query_fpm_values_done(cqp);
+	}
+
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_add_arp_cache_entry - cqp wqe add arp cache entry
+ * @cqp: struct for cqp hw
+ * @info: arp entry information
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_add_arp_cache_entry(
+				struct i40iw_sc_cqp *cqp,
+				struct i40iw_add_arp_cache_entry_info *info,
+				u64 scratch,
+				bool post_sq)
+{
+	u64 *wqe;
+	u64 temp, header;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 8, info->reach_max);
+
+	temp = info->mac_addr[5] |
+	       LS_64_1(info->mac_addr[4], 8) |
+	       LS_64_1(info->mac_addr[3], 16) |
+	       LS_64_1(info->mac_addr[2], 24) |
+	       LS_64_1(info->mac_addr[1], 32) |
+	       LS_64_1(info->mac_addr[0], 40);
+
+	set_64bit_val(wqe, 16, temp);
+
+	header = info->arp_index |
+		 LS_64(I40IW_CQP_OP_MANAGE_ARP, I40IW_CQPSQ_OPCODE) |
+		 LS_64((info->permanent ? 1 : 0), I40IW_CQPSQ_MAT_PERMANENT) |
+		 LS_64(1, I40IW_CQPSQ_MAT_ENTRYVALID) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "ARP_CACHE_ENTRY WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_del_arp_cache_entry - dele arp cache entry
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @arp_index: arp index to delete arp entry
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_del_arp_cache_entry(
+					struct i40iw_sc_cqp *cqp,
+					u64 scratch,
+					u16 arp_index,
+					bool post_sq)
+{
+	u64 *wqe;
+	u64 header;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	header = arp_index |
+		 LS_64(I40IW_CQP_OP_MANAGE_ARP, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "ARP_CACHE_DEL_ENTRY WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_query_arp_cache_entry - cqp wqe to query arp and arp index
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @arp_index: arp index to delete arp entry
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_query_arp_cache_entry(
+				struct i40iw_sc_cqp *cqp,
+				u64 scratch,
+				u16 arp_index,
+				bool post_sq)
+{
+	u64 *wqe;
+	u64 header;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	header = arp_index |
+		 LS_64(I40IW_CQP_OP_MANAGE_ARP, I40IW_CQPSQ_OPCODE) |
+		 LS_64(1, I40IW_CQPSQ_MAT_QUERY) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QUERY_ARP_CACHE_ENTRY WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_manage_apbvt_entry - for adding and deleting apbvt entries
+ * @cqp: struct for cqp hw
+ * @info: info for apbvt entry to add or delete
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_manage_apbvt_entry(
+				struct i40iw_sc_cqp *cqp,
+				struct i40iw_apbvt_info *info,
+				u64 scratch,
+				bool post_sq)
+{
+	u64 *wqe;
+	u64 header;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	set_64bit_val(wqe, 16, info->port);
+
+	header = LS_64(I40IW_CQP_OP_MANAGE_APBVT, I40IW_CQPSQ_OPCODE) |
+		 LS_64(info->add, I40IW_CQPSQ_MAPT_ADDPORT) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_APBVT WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_manage_qhash_table_entry - manage quad hash entries
+ * @cqp: struct for cqp hw
+ * @info: info for quad hash to manage
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ *
+ * This is called before connection establishment is started. For passive connections, when
+ * listener is created, it will call with entry type of  I40IW_QHASH_TYPE_TCP_SYN with local
+ * ip address and tcp port. When SYN is received (passive connections) or
+ * sent (active connections), this routine is called with entry type of
+ * I40IW_QHASH_TYPE_TCP_ESTABLISHED and quad is passed in info.
+ *
+ * When iwarp connection is done and its state moves to RTS, the quad hash entry in
+ * the hardware will point to iwarp's qp number and requires no calls from the driver.
+ */
+static enum i40iw_status_code i40iw_sc_manage_qhash_table_entry(
+					struct i40iw_sc_cqp *cqp,
+					struct i40iw_qhash_table_info *info,
+					u64 scratch,
+					bool post_sq)
+{
+	u64 *wqe;
+	u64 qw1 = 0;
+	u64 qw2 = 0;
+	u64 temp;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	temp = info->mac_addr[5] |
+		LS_64_1(info->mac_addr[4], 8) |
+		LS_64_1(info->mac_addr[3], 16) |
+		LS_64_1(info->mac_addr[2], 24) |
+		LS_64_1(info->mac_addr[1], 32) |
+		LS_64_1(info->mac_addr[0], 40);
+
+	set_64bit_val(wqe, 0, temp);
+
+	qw1 = LS_64(info->qp_num, I40IW_CQPSQ_QHASH_QPN) |
+	      LS_64(info->dest_port, I40IW_CQPSQ_QHASH_DEST_PORT);
+	if (info->ipv4_valid) {
+		set_64bit_val(wqe,
+			      48,
+			      LS_64(info->dest_ip[0], I40IW_CQPSQ_QHASH_ADDR3));
+	} else {
+		set_64bit_val(wqe,
+			      56,
+			      LS_64(info->dest_ip[0], I40IW_CQPSQ_QHASH_ADDR0) |
+			      LS_64(info->dest_ip[1], I40IW_CQPSQ_QHASH_ADDR1));
+
+		set_64bit_val(wqe,
+			      48,
+			      LS_64(info->dest_ip[2], I40IW_CQPSQ_QHASH_ADDR2) |
+			      LS_64(info->dest_ip[3], I40IW_CQPSQ_QHASH_ADDR3));
+	}
+	qw2 = LS_64(cqp->dev->qs_handle, I40IW_CQPSQ_QHASH_QS_HANDLE);
+	if (info->vlan_valid)
+		qw2 |= LS_64(info->vlan_id, I40IW_CQPSQ_QHASH_VLANID);
+	set_64bit_val(wqe, 16, qw2);
+	if (info->entry_type == I40IW_QHASH_TYPE_TCP_ESTABLISHED) {
+		qw1 |= LS_64(info->src_port, I40IW_CQPSQ_QHASH_SRC_PORT);
+		if (!info->ipv4_valid) {
+			set_64bit_val(wqe,
+				      40,
+				      LS_64(info->src_ip[0], I40IW_CQPSQ_QHASH_ADDR0) |
+				      LS_64(info->src_ip[1], I40IW_CQPSQ_QHASH_ADDR1));
+			set_64bit_val(wqe,
+				      32,
+				      LS_64(info->src_ip[2], I40IW_CQPSQ_QHASH_ADDR2) |
+				      LS_64(info->src_ip[3], I40IW_CQPSQ_QHASH_ADDR3));
+		} else {
+			set_64bit_val(wqe,
+				      32,
+				      LS_64(info->src_ip[0], I40IW_CQPSQ_QHASH_ADDR3));
+		}
+	}
+
+	set_64bit_val(wqe, 8, qw1);
+	temp = LS_64(cqp->polarity, I40IW_CQPSQ_QHASH_WQEVALID) |
+	       LS_64(I40IW_CQP_OP_MANAGE_QUAD_HASH_TABLE_ENTRY, I40IW_CQPSQ_QHASH_OPCODE) |
+	       LS_64(info->manage, I40IW_CQPSQ_QHASH_MANAGE) |
+	       LS_64(info->ipv4_valid, I40IW_CQPSQ_QHASH_IPV4VALID) |
+	       LS_64(info->vlan_valid, I40IW_CQPSQ_QHASH_VLANVALID) |
+	       LS_64(info->entry_type, I40IW_CQPSQ_QHASH_ENTRYTYPE);
+
+	i40iw_insert_wqe_hdr(wqe, temp);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_QHASH WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_alloc_local_mac_ipaddr_entry - cqp wqe for loc mac entry
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_alloc_local_mac_ipaddr_entry(
+					struct i40iw_sc_cqp *cqp,
+					u64 scratch,
+					bool post_sq)
+{
+	u64 *wqe;
+	u64 header;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	header = LS_64(I40IW_CQP_OP_ALLOCATE_LOC_MAC_IP_TABLE_ENTRY, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "ALLOCATE_LOCAL_MAC_IPADDR WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_add_local_mac_ipaddr_entry - add mac enry
+ * @cqp: struct for cqp hw
+ * @info:mac addr info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_add_local_mac_ipaddr_entry(
+				struct i40iw_sc_cqp *cqp,
+				struct i40iw_local_mac_ipaddr_entry_info *info,
+				u64 scratch,
+				bool post_sq)
+{
+	u64 *wqe;
+	u64 temp, header;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	temp = info->mac_addr[5] |
+		LS_64_1(info->mac_addr[4], 8) |
+		LS_64_1(info->mac_addr[3], 16) |
+		LS_64_1(info->mac_addr[2], 24) |
+		LS_64_1(info->mac_addr[1], 32) |
+		LS_64_1(info->mac_addr[0], 40);
+
+	set_64bit_val(wqe, 32, temp);
+
+	header = LS_64(info->entry_idx, I40IW_CQPSQ_MLIPA_IPTABLEIDX) |
+		 LS_64(I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "ADD_LOCAL_MAC_IPADDR WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_del_local_mac_ipaddr_entry - cqp wqe to dele local mac
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @entry_idx: index of mac entry
+ * @ ignore_ref_count: to force mac adde delete
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_del_local_mac_ipaddr_entry(
+				struct i40iw_sc_cqp *cqp,
+				u64 scratch,
+				u8 entry_idx,
+				u8 ignore_ref_count,
+				bool post_sq)
+{
+	u64 *wqe;
+	u64 header;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	header = LS_64(entry_idx, I40IW_CQPSQ_MLIPA_IPTABLEIDX) |
+		 LS_64(I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE, I40IW_CQPSQ_OPCODE) |
+		 LS_64(1, I40IW_CQPSQ_MLIPA_FREEENTRY) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) |
+		 LS_64(ignore_ref_count, I40IW_CQPSQ_MLIPA_IGNORE_REF_CNT);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "DEL_LOCAL_MAC_IPADDR WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_cqp_nop - send a nop wqe
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_cqp_nop(struct i40iw_sc_cqp *cqp,
+					       u64 scratch,
+					       bool post_sq)
+{
+	u64 *wqe;
+	u64 header;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	header = LS_64(I40IW_CQP_OP_NOP, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+	i40iw_insert_wqe_hdr(wqe, header);
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "NOP WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_ceq_init - initialize ceq
+ * @ceq: ceq sc structure
+ * @info: ceq initialization info
+ */
+static enum i40iw_status_code i40iw_sc_ceq_init(struct i40iw_sc_ceq *ceq,
+						struct i40iw_ceq_init_info *info)
+{
+	u32 pble_obj_cnt;
+
+	if ((info->elem_cnt < I40IW_MIN_CEQ_ENTRIES) ||
+	    (info->elem_cnt > I40IW_MAX_CEQ_ENTRIES))
+		return I40IW_ERR_INVALID_SIZE;
+
+	if (info->ceq_id >= I40IW_MAX_CEQID)
+		return I40IW_ERR_INVALID_CEQ_ID;
+
+	pble_obj_cnt = info->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+	if (info->virtual_map && (info->first_pm_pbl_idx >= pble_obj_cnt))
+		return I40IW_ERR_INVALID_PBLE_INDEX;
+
+	ceq->size = sizeof(*ceq);
+	ceq->ceqe_base = (struct i40iw_ceqe *)info->ceqe_base;
+	ceq->ceq_id = info->ceq_id;
+	ceq->dev = info->dev;
+	ceq->elem_cnt = info->elem_cnt;
+	ceq->ceq_elem_pa = info->ceqe_pa;
+	ceq->virtual_map = info->virtual_map;
+
+	ceq->pbl_chunk_size = (ceq->virtual_map ? info->pbl_chunk_size : 0);
+	ceq->first_pm_pbl_idx = (ceq->virtual_map ? info->first_pm_pbl_idx : 0);
+	ceq->pbl_list = (ceq->virtual_map ? info->pbl_list : NULL);
+
+	ceq->tph_en = info->tph_en;
+	ceq->tph_val = info->tph_val;
+	ceq->polarity = 1;
+	I40IW_RING_INIT(ceq->ceq_ring, ceq->elem_cnt);
+	ceq->dev->ceq[info->ceq_id] = ceq;
+
+	return 0;
+}
+
+/**
+ * i40iw_sc_ceq_create - create ceq wqe
+ * @ceq: ceq sc structure
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_ceq_create(struct i40iw_sc_ceq *ceq,
+						  u64 scratch,
+						  bool post_sq)
+{
+	struct i40iw_sc_cqp *cqp;
+	u64 *wqe;
+	u64 header;
+
+	cqp = ceq->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 16, ceq->elem_cnt);
+	set_64bit_val(wqe, 32, (ceq->virtual_map ? 0 : ceq->ceq_elem_pa));
+	set_64bit_val(wqe, 48, (ceq->virtual_map ? ceq->first_pm_pbl_idx : 0));
+	set_64bit_val(wqe, 56, LS_64(ceq->tph_val, I40IW_CQPSQ_TPHVAL));
+
+	header = ceq->ceq_id |
+		 LS_64(I40IW_CQP_OP_CREATE_CEQ, I40IW_CQPSQ_OPCODE) |
+		 LS_64(ceq->pbl_chunk_size, I40IW_CQPSQ_CEQ_LPBLSIZE) |
+		 LS_64(ceq->virtual_map, I40IW_CQPSQ_CEQ_VMAP) |
+		 LS_64(ceq->tph_en, I40IW_CQPSQ_TPHEN) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CEQ_CREATE WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_cceq_create_done - poll for control ceq wqe to complete
+ * @ceq: ceq sc structure
+ */
+static enum i40iw_status_code i40iw_sc_cceq_create_done(struct i40iw_sc_ceq *ceq)
+{
+	struct i40iw_sc_cqp *cqp;
+
+	cqp = ceq->dev->cqp;
+	return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_CREATE_CEQ, NULL);
+}
+
+/**
+ * i40iw_sc_cceq_destroy_done - poll for destroy cceq to complete
+ * @ceq: ceq sc structure
+ */
+static enum i40iw_status_code i40iw_sc_cceq_destroy_done(struct i40iw_sc_ceq *ceq)
+{
+	struct i40iw_sc_cqp *cqp;
+
+	cqp = ceq->dev->cqp;
+	cqp->process_cqp_sds = i40iw_update_sds_noccq;
+	return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_DESTROY_CEQ, NULL);
+}
+
+/**
+ * i40iw_sc_cceq_create - create cceq
+ * @ceq: ceq sc structure
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static enum i40iw_status_code i40iw_sc_cceq_create(struct i40iw_sc_ceq *ceq, u64 scratch)
+{
+	enum i40iw_status_code ret_code;
+
+	ret_code = i40iw_sc_ceq_create(ceq, scratch, true);
+	if (!ret_code)
+		ret_code = i40iw_sc_cceq_create_done(ceq);
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_ceq_destroy - destroy ceq
+ * @ceq: ceq sc structure
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_ceq_destroy(struct i40iw_sc_ceq *ceq,
+						   u64 scratch,
+						   bool post_sq)
+{
+	struct i40iw_sc_cqp *cqp;
+	u64 *wqe;
+	u64 header;
+
+	cqp = ceq->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 16, ceq->elem_cnt);
+	set_64bit_val(wqe, 48, ceq->first_pm_pbl_idx);
+	header = ceq->ceq_id |
+		 LS_64(I40IW_CQP_OP_DESTROY_CEQ, I40IW_CQPSQ_OPCODE) |
+		 LS_64(ceq->pbl_chunk_size, I40IW_CQPSQ_CEQ_LPBLSIZE) |
+		 LS_64(ceq->virtual_map, I40IW_CQPSQ_CEQ_VMAP) |
+		 LS_64(ceq->tph_en, I40IW_CQPSQ_TPHEN) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+	i40iw_insert_wqe_hdr(wqe, header);
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CEQ_DESTROY WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_process_ceq - process ceq
+ * @dev: sc device struct
+ * @ceq: ceq sc structure
+ */
+static void *i40iw_sc_process_ceq(struct i40iw_sc_dev *dev, struct i40iw_sc_ceq *ceq)
+{
+	u64 temp;
+	u64 *ceqe;
+	struct i40iw_sc_cq *cq = NULL;
+	u8 polarity;
+
+	ceqe = (u64 *)I40IW_GET_CURRENT_CEQ_ELEMENT(ceq);
+	get_64bit_val(ceqe, 0, &temp);
+	polarity = (u8)RS_64(temp, I40IW_CEQE_VALID);
+	if (polarity != ceq->polarity)
+		return cq;
+
+	cq = (struct i40iw_sc_cq *)(unsigned long)LS_64_1(temp, 1);
+
+	I40IW_RING_MOVE_TAIL(ceq->ceq_ring);
+	if (I40IW_RING_GETCURRENT_TAIL(ceq->ceq_ring) == 0)
+		ceq->polarity ^= 1;
+
+	if (dev->is_pf)
+		i40iw_wr32(dev->hw, I40E_PFPE_CQACK, cq->cq_uk.cq_id);
+	else
+		i40iw_wr32(dev->hw, I40E_VFPE_CQACK1, cq->cq_uk.cq_id);
+
+	return cq;
+}
+
+/**
+ * i40iw_sc_aeq_init - initialize aeq
+ * @aeq: aeq structure ptr
+ * @info: aeq initialization info
+ */
+static enum i40iw_status_code i40iw_sc_aeq_init(struct i40iw_sc_aeq *aeq,
+						struct i40iw_aeq_init_info *info)
+{
+	u32 pble_obj_cnt;
+
+	if ((info->elem_cnt < I40IW_MIN_AEQ_ENTRIES) ||
+	    (info->elem_cnt > I40IW_MAX_AEQ_ENTRIES))
+		return I40IW_ERR_INVALID_SIZE;
+	pble_obj_cnt = info->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+	if (info->virtual_map && (info->first_pm_pbl_idx >= pble_obj_cnt))
+		return I40IW_ERR_INVALID_PBLE_INDEX;
+
+	aeq->size = sizeof(*aeq);
+	aeq->polarity = 1;
+	aeq->aeqe_base = (struct i40iw_sc_aeqe *)info->aeqe_base;
+	aeq->dev = info->dev;
+	aeq->elem_cnt = info->elem_cnt;
+
+	aeq->aeq_elem_pa = info->aeq_elem_pa;
+	I40IW_RING_INIT(aeq->aeq_ring, aeq->elem_cnt);
+	info->dev->aeq = aeq;
+
+	aeq->virtual_map = info->virtual_map;
+	aeq->pbl_list = (aeq->virtual_map ? info->pbl_list : NULL);
+	aeq->pbl_chunk_size = (aeq->virtual_map ? info->pbl_chunk_size : 0);
+	aeq->first_pm_pbl_idx = (aeq->virtual_map ? info->first_pm_pbl_idx : 0);
+	info->dev->aeq = aeq;
+	return 0;
+}
+
+/**
+ * i40iw_sc_aeq_create - create aeq
+ * @aeq: aeq structure ptr
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_aeq_create(struct i40iw_sc_aeq *aeq,
+						  u64 scratch,
+						  bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+
+	cqp = aeq->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 16, aeq->elem_cnt);
+	set_64bit_val(wqe, 32,
+		      (aeq->virtual_map ? 0 : aeq->aeq_elem_pa));
+	set_64bit_val(wqe, 48,
+		      (aeq->virtual_map ? aeq->first_pm_pbl_idx : 0));
+
+	header = LS_64(I40IW_CQP_OP_CREATE_AEQ, I40IW_CQPSQ_OPCODE) |
+		 LS_64(aeq->pbl_chunk_size, I40IW_CQPSQ_AEQ_LPBLSIZE) |
+		 LS_64(aeq->virtual_map, I40IW_CQPSQ_AEQ_VMAP) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "AEQ_CREATE WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_aeq_destroy - destroy aeq during close
+ * @aeq: aeq structure ptr
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_aeq_destroy(struct i40iw_sc_aeq *aeq,
+						   u64 scratch,
+						   bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+
+	cqp = aeq->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 16, aeq->elem_cnt);
+	set_64bit_val(wqe, 48, aeq->first_pm_pbl_idx);
+	header = LS_64(I40IW_CQP_OP_DESTROY_AEQ, I40IW_CQPSQ_OPCODE) |
+		 LS_64(aeq->pbl_chunk_size, I40IW_CQPSQ_AEQ_LPBLSIZE) |
+		 LS_64(aeq->virtual_map, I40IW_CQPSQ_AEQ_VMAP) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "AEQ_DESTROY WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_get_next_aeqe - get next aeq entry
+ * @aeq: aeq structure ptr
+ * @info: aeqe info to be returned
+ */
+static enum i40iw_status_code i40iw_sc_get_next_aeqe(struct i40iw_sc_aeq *aeq,
+						     struct i40iw_aeqe_info *info)
+{
+	u64 temp, compl_ctx;
+	u64 *aeqe;
+	u16 wqe_idx;
+	u8 ae_src;
+	u8 polarity;
+
+	aeqe = (u64 *)I40IW_GET_CURRENT_AEQ_ELEMENT(aeq);
+	get_64bit_val(aeqe, 0, &compl_ctx);
+	get_64bit_val(aeqe, 8, &temp);
+	polarity = (u8)RS_64(temp, I40IW_AEQE_VALID);
+
+	if (aeq->polarity != polarity)
+		return I40IW_ERR_QUEUE_EMPTY;
+
+	i40iw_debug_buf(aeq->dev, I40IW_DEBUG_WQE, "AEQ_ENTRY", aeqe, 16);
+
+	ae_src = (u8)RS_64(temp, I40IW_AEQE_AESRC);
+	wqe_idx = (u16)RS_64(temp, I40IW_AEQE_WQDESCIDX);
+	info->qp_cq_id = (u32)RS_64(temp, I40IW_AEQE_QPCQID);
+	info->ae_id = (u16)RS_64(temp, I40IW_AEQE_AECODE);
+	info->tcp_state = (u8)RS_64(temp, I40IW_AEQE_TCPSTATE);
+	info->iwarp_state = (u8)RS_64(temp, I40IW_AEQE_IWSTATE);
+	info->q2_data_written = (u8)RS_64(temp, I40IW_AEQE_Q2DATA);
+	info->aeqe_overflow = (bool)RS_64(temp, I40IW_AEQE_OVERFLOW);
+	switch (ae_src) {
+	case I40IW_AE_SOURCE_RQ:
+	case I40IW_AE_SOURCE_RQ_0011:
+		info->qp = true;
+		info->wqe_idx = wqe_idx;
+		info->compl_ctx = compl_ctx;
+		break;
+	case I40IW_AE_SOURCE_CQ:
+	case I40IW_AE_SOURCE_CQ_0110:
+	case I40IW_AE_SOURCE_CQ_1010:
+	case I40IW_AE_SOURCE_CQ_1110:
+		info->cq = true;
+		info->compl_ctx = LS_64_1(compl_ctx, 1);
+		break;
+	case I40IW_AE_SOURCE_SQ:
+	case I40IW_AE_SOURCE_SQ_0111:
+		info->qp = true;
+		info->sq = true;
+		info->wqe_idx = wqe_idx;
+		info->compl_ctx = compl_ctx;
+		break;
+	case I40IW_AE_SOURCE_IN_RR_WR:
+	case I40IW_AE_SOURCE_IN_RR_WR_1011:
+		info->qp = true;
+		info->compl_ctx = compl_ctx;
+		info->in_rdrsp_wr = true;
+		break;
+	case I40IW_AE_SOURCE_OUT_RR:
+	case I40IW_AE_SOURCE_OUT_RR_1111:
+		info->qp = true;
+		info->compl_ctx = compl_ctx;
+		info->out_rdrsp = true;
+		break;
+	default:
+		break;
+	}
+	I40IW_RING_MOVE_TAIL(aeq->aeq_ring);
+	if (I40IW_RING_GETCURRENT_TAIL(aeq->aeq_ring) == 0)
+		aeq->polarity ^= 1;
+	return 0;
+}
+
+/**
+ * i40iw_sc_repost_aeq_entries - repost completed aeq entries
+ * @dev: sc device struct
+ * @count: allocate count
+ */
+static enum i40iw_status_code i40iw_sc_repost_aeq_entries(struct i40iw_sc_dev *dev,
+							  u32 count)
+{
+	if (count > I40IW_MAX_AEQ_ALLOCATE_COUNT)
+		return I40IW_ERR_INVALID_SIZE;
+
+	if (dev->is_pf)
+		i40iw_wr32(dev->hw, I40E_PFPE_AEQALLOC, count);
+	else
+		i40iw_wr32(dev->hw, I40E_VFPE_AEQALLOC1, count);
+
+	return 0;
+}
+
+/**
+ * i40iw_sc_aeq_create_done - create aeq
+ * @aeq: aeq structure ptr
+ */
+static enum i40iw_status_code i40iw_sc_aeq_create_done(struct i40iw_sc_aeq *aeq)
+{
+	struct i40iw_sc_cqp *cqp;
+
+	cqp = aeq->dev->cqp;
+	return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_CREATE_AEQ, NULL);
+}
+
+/**
+ * i40iw_sc_aeq_destroy_done - destroy of aeq during close
+ * @aeq: aeq structure ptr
+ */
+static enum i40iw_status_code i40iw_sc_aeq_destroy_done(struct i40iw_sc_aeq *aeq)
+{
+	struct i40iw_sc_cqp *cqp;
+
+	cqp = aeq->dev->cqp;
+	return  i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_DESTROY_AEQ, NULL);
+}
+
+/**
+ * i40iw_sc_ccq_init - initialize control cq
+ * @cq: sc's cq ctruct
+ * @info: info for control cq initialization
+ */
+static enum i40iw_status_code i40iw_sc_ccq_init(struct i40iw_sc_cq *cq,
+						struct i40iw_ccq_init_info *info)
+{
+	u32 pble_obj_cnt;
+
+	if (info->num_elem < I40IW_MIN_CQ_SIZE || info->num_elem > I40IW_MAX_CQ_SIZE)
+		return I40IW_ERR_INVALID_SIZE;
+
+	if (info->ceq_id > I40IW_MAX_CEQID)
+		return I40IW_ERR_INVALID_CEQ_ID;
+
+	pble_obj_cnt = info->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+	if (info->virtual_map && (info->first_pm_pbl_idx >= pble_obj_cnt))
+		return I40IW_ERR_INVALID_PBLE_INDEX;
+
+	cq->cq_pa = info->cq_pa;
+	cq->cq_uk.cq_base = info->cq_base;
+	cq->shadow_area_pa = info->shadow_area_pa;
+	cq->cq_uk.shadow_area = info->shadow_area;
+	cq->shadow_read_threshold = info->shadow_read_threshold;
+	cq->dev = info->dev;
+	cq->ceq_id = info->ceq_id;
+	cq->cq_uk.cq_size = info->num_elem;
+	cq->cq_type = I40IW_CQ_TYPE_CQP;
+	cq->ceqe_mask = info->ceqe_mask;
+	I40IW_RING_INIT(cq->cq_uk.cq_ring, info->num_elem);
+
+	cq->cq_uk.cq_id = 0;    /* control cq is id 0 always */
+	cq->ceq_id_valid = info->ceq_id_valid;
+	cq->tph_en = info->tph_en;
+	cq->tph_val = info->tph_val;
+	cq->cq_uk.avoid_mem_cflct = info->avoid_mem_cflct;
+
+	cq->pbl_list = info->pbl_list;
+	cq->virtual_map = info->virtual_map;
+	cq->pbl_chunk_size = info->pbl_chunk_size;
+	cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
+	cq->cq_uk.polarity = true;
+
+	/* following are only for iw cqs so initialize them to zero */
+	cq->cq_uk.cqe_alloc_reg = NULL;
+	info->dev->ccq = cq;
+	return 0;
+}
+
+/**
+ * i40iw_sc_ccq_create_done - poll cqp for ccq create
+ * @ccq: ccq sc struct
+ */
+static enum i40iw_status_code i40iw_sc_ccq_create_done(struct i40iw_sc_cq *ccq)
+{
+	struct i40iw_sc_cqp *cqp;
+
+	cqp = ccq->dev->cqp;
+	return	i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_CREATE_CQ, NULL);
+}
+
+/**
+ * i40iw_sc_ccq_create - create control cq
+ * @ccq: ccq sc struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @check_overflow: overlow flag for ccq
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_ccq_create(struct i40iw_sc_cq *ccq,
+						  u64 scratch,
+						  bool check_overflow,
+						  bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+	enum i40iw_status_code ret_code;
+
+	cqp = ccq->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 0, ccq->cq_uk.cq_size);
+	set_64bit_val(wqe, 8, RS_64_1(ccq, 1));
+	set_64bit_val(wqe, 16,
+		      LS_64(ccq->shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
+	set_64bit_val(wqe, 32, (ccq->virtual_map ? 0 : ccq->cq_pa));
+	set_64bit_val(wqe, 40, ccq->shadow_area_pa);
+	set_64bit_val(wqe, 48,
+		      (ccq->virtual_map ? ccq->first_pm_pbl_idx : 0));
+	set_64bit_val(wqe, 56,
+		      LS_64(ccq->tph_val, I40IW_CQPSQ_TPHVAL));
+
+	header = ccq->cq_uk.cq_id |
+		 LS_64((ccq->ceq_id_valid ? ccq->ceq_id : 0), I40IW_CQPSQ_CQ_CEQID) |
+		 LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
+		 LS_64(ccq->pbl_chunk_size, I40IW_CQPSQ_CQ_LPBLSIZE) |
+		 LS_64(check_overflow, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
+		 LS_64(ccq->virtual_map, I40IW_CQPSQ_CQ_VIRTMAP) |
+		 LS_64(ccq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+		 LS_64(ccq->ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
+		 LS_64(ccq->tph_en, I40IW_CQPSQ_TPHEN) |
+		 LS_64(ccq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CCQ_CREATE WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq) {
+		i40iw_sc_cqp_post_sq(cqp);
+		ret_code = i40iw_sc_ccq_create_done(ccq);
+		if (ret_code)
+			return ret_code;
+	}
+	cqp->process_cqp_sds = i40iw_cqp_sds_cmd;
+
+	return 0;
+}
+
+/**
+ * i40iw_sc_ccq_destroy - destroy ccq during close
+ * @ccq: ccq sc struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_ccq_destroy(struct i40iw_sc_cq *ccq,
+						   u64 scratch,
+						   bool post_sq)
+{
+	struct i40iw_sc_cqp *cqp;
+	u64 *wqe;
+	u64 header;
+	enum i40iw_status_code ret_code = 0;
+	u32 tail, val, error;
+
+	cqp = ccq->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 0, ccq->cq_uk.cq_size);
+	set_64bit_val(wqe, 8, RS_64_1(ccq, 1));
+	set_64bit_val(wqe, 40, ccq->shadow_area_pa);
+
+	header = ccq->cq_uk.cq_id |
+		 LS_64((ccq->ceq_id_valid ? ccq->ceq_id : 0), I40IW_CQPSQ_CQ_CEQID) |
+		 LS_64(I40IW_CQP_OP_DESTROY_CQ, I40IW_CQPSQ_OPCODE) |
+		 LS_64(ccq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+		 LS_64(ccq->ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
+		 LS_64(ccq->tph_en, I40IW_CQPSQ_TPHEN) |
+		 LS_64(ccq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CCQ_DESTROY WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
+	if (error)
+		return I40IW_ERR_CQP_COMPL_ERROR;
+
+	if (post_sq) {
+		i40iw_sc_cqp_post_sq(cqp);
+		ret_code = i40iw_cqp_poll_registers(cqp, tail, 1000);
+	}
+
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_cq_init - initialize completion q
+ * @cq: cq struct
+ * @info: cq initialization info
+ */
+static enum i40iw_status_code i40iw_sc_cq_init(struct i40iw_sc_cq *cq,
+					       struct i40iw_cq_init_info *info)
+{
+	u32 __iomem *cqe_alloc_reg = NULL;
+	enum i40iw_status_code ret_code;
+	u32 pble_obj_cnt;
+	u32 arm_offset;
+
+	pble_obj_cnt = info->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+	if (info->virtual_map && (info->first_pm_pbl_idx >= pble_obj_cnt))
+		return I40IW_ERR_INVALID_PBLE_INDEX;
+
+	cq->cq_pa = info->cq_base_pa;
+	cq->dev = info->dev;
+	cq->ceq_id = info->ceq_id;
+	arm_offset = (info->dev->is_pf) ? I40E_PFPE_CQARM : I40E_VFPE_CQARM1;
+	if (i40iw_get_hw_addr(cq->dev))
+		cqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(cq->dev) +
+					      arm_offset);
+	info->cq_uk_init_info.cqe_alloc_reg = cqe_alloc_reg;
+	ret_code = i40iw_cq_uk_init(&cq->cq_uk, &info->cq_uk_init_info);
+	if (ret_code)
+		return ret_code;
+	cq->virtual_map = info->virtual_map;
+	cq->pbl_chunk_size = info->pbl_chunk_size;
+	cq->ceqe_mask = info->ceqe_mask;
+	cq->cq_type = (info->type) ? info->type : I40IW_CQ_TYPE_IWARP;
+
+	cq->shadow_area_pa = info->shadow_area_pa;
+	cq->shadow_read_threshold = info->shadow_read_threshold;
+
+	cq->ceq_id_valid = info->ceq_id_valid;
+	cq->tph_en = info->tph_en;
+	cq->tph_val = info->tph_val;
+
+	cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
+
+	return 0;
+}
+
+/**
+ * i40iw_sc_cq_create - create completion q
+ * @cq: cq struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @check_overflow: flag for overflow check
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_cq_create(struct i40iw_sc_cq *cq,
+						 u64 scratch,
+						 bool check_overflow,
+						 bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+
+	if (cq->cq_uk.cq_id > I40IW_MAX_CQID)
+		return I40IW_ERR_INVALID_CQ_ID;
+
+	if (cq->ceq_id > I40IW_MAX_CEQID)
+		return I40IW_ERR_INVALID_CEQ_ID;
+
+	cqp = cq->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
+	set_64bit_val(wqe, 8, RS_64_1(cq, 1));
+	set_64bit_val(wqe,
+		      16,
+		      LS_64(cq->shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
+
+	set_64bit_val(wqe, 32, (cq->virtual_map ? 0 : cq->cq_pa));
+
+	set_64bit_val(wqe, 40, cq->shadow_area_pa);
+	set_64bit_val(wqe, 48, (cq->virtual_map ? cq->first_pm_pbl_idx : 0));
+	set_64bit_val(wqe, 56, LS_64(cq->tph_val, I40IW_CQPSQ_TPHVAL));
+
+	header = cq->cq_uk.cq_id |
+		 LS_64((cq->ceq_id_valid ? cq->ceq_id : 0), I40IW_CQPSQ_CQ_CEQID) |
+		 LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cq->pbl_chunk_size, I40IW_CQPSQ_CQ_LPBLSIZE) |
+		 LS_64(check_overflow, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
+		 LS_64(cq->virtual_map, I40IW_CQPSQ_CQ_VIRTMAP) |
+		 LS_64(cq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+		 LS_64(cq->ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
+		 LS_64(cq->tph_en, I40IW_CQPSQ_TPHEN) |
+		 LS_64(cq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CQ_CREATE WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_cq_destroy - destroy completion q
+ * @cq: cq struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_cq_destroy(struct i40iw_sc_cq *cq,
+						  u64 scratch,
+						  bool post_sq)
+{
+	struct i40iw_sc_cqp *cqp;
+	u64 *wqe;
+	u64 header;
+
+	cqp = cq->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
+	set_64bit_val(wqe, 8, RS_64_1(cq, 1));
+	set_64bit_val(wqe, 40, cq->shadow_area_pa);
+	set_64bit_val(wqe, 48, (cq->virtual_map ? cq->first_pm_pbl_idx : 0));
+
+	header = cq->cq_uk.cq_id |
+		 LS_64((cq->ceq_id_valid ? cq->ceq_id : 0), I40IW_CQPSQ_CQ_CEQID) |
+		 LS_64(I40IW_CQP_OP_DESTROY_CQ, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cq->pbl_chunk_size, I40IW_CQPSQ_CQ_LPBLSIZE) |
+		 LS_64(cq->virtual_map, I40IW_CQPSQ_CQ_VIRTMAP) |
+		 LS_64(cq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+		 LS_64(cq->ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
+		 LS_64(cq->tph_en, I40IW_CQPSQ_TPHEN) |
+		 LS_64(cq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CQ_DESTROY WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_cq_modify - modify a Completion Queue
+ * @cq: cq struct
+ * @info: modification info struct
+ * @scratch:
+ * @post_sq: flag to post to sq
+ */
+static enum i40iw_status_code i40iw_sc_cq_modify(struct i40iw_sc_cq *cq,
+						 struct i40iw_modify_cq_info *info,
+						 u64 scratch,
+						 bool post_sq)
+{
+	struct i40iw_sc_cqp *cqp;
+	u64 *wqe;
+	u64 header;
+	u32 cq_size, ceq_id, first_pm_pbl_idx;
+	u8 pbl_chunk_size;
+	bool virtual_map, ceq_id_valid, check_overflow;
+	u32 pble_obj_cnt;
+
+	if (info->ceq_valid && (info->ceq_id > I40IW_MAX_CEQID))
+		return I40IW_ERR_INVALID_CEQ_ID;
+
+	pble_obj_cnt = cq->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+	if (info->cq_resize && info->virtual_map &&
+	    (info->first_pm_pbl_idx >= pble_obj_cnt))
+		return I40IW_ERR_INVALID_PBLE_INDEX;
+
+	cqp = cq->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	cq->pbl_list = info->pbl_list;
+	cq->cq_pa = info->cq_pa;
+	cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
+
+	cq_size = info->cq_resize ? info->cq_size : cq->cq_uk.cq_size;
+	if (info->ceq_change) {
+		ceq_id_valid = true;
+		ceq_id = info->ceq_id;
+	} else {
+		ceq_id_valid = cq->ceq_id_valid;
+		ceq_id = ceq_id_valid ? cq->ceq_id : 0;
+	}
+	virtual_map = info->cq_resize ? info->virtual_map : cq->virtual_map;
+	first_pm_pbl_idx = (info->cq_resize ?
+			    (info->virtual_map ? info->first_pm_pbl_idx : 0) :
+			    (cq->virtual_map ? cq->first_pm_pbl_idx : 0));
+	pbl_chunk_size = (info->cq_resize ?
+			  (info->virtual_map ? info->pbl_chunk_size : 0) :
+			  (cq->virtual_map ? cq->pbl_chunk_size : 0));
+	check_overflow = info->check_overflow_change ? info->check_overflow :
+			 cq->check_overflow;
+	cq->cq_uk.cq_size = cq_size;
+	cq->ceq_id_valid = ceq_id_valid;
+	cq->ceq_id = ceq_id;
+	cq->virtual_map = virtual_map;
+	cq->first_pm_pbl_idx = first_pm_pbl_idx;
+	cq->pbl_chunk_size = pbl_chunk_size;
+	cq->check_overflow = check_overflow;
+
+	set_64bit_val(wqe, 0, cq_size);
+	set_64bit_val(wqe, 8, RS_64_1(cq, 1));
+	set_64bit_val(wqe, 16,
+		      LS_64(info->shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
+	set_64bit_val(wqe, 32, (cq->virtual_map ? 0 : cq->cq_pa));
+	set_64bit_val(wqe, 40, cq->shadow_area_pa);
+	set_64bit_val(wqe, 48, (cq->virtual_map ? first_pm_pbl_idx : 0));
+	set_64bit_val(wqe, 56, LS_64(cq->tph_val, I40IW_CQPSQ_TPHVAL));
+
+	header = cq->cq_uk.cq_id |
+		 LS_64(ceq_id, I40IW_CQPSQ_CQ_CEQID) |
+		 LS_64(I40IW_CQP_OP_MODIFY_CQ, I40IW_CQPSQ_OPCODE) |
+		 LS_64(info->cq_resize, I40IW_CQPSQ_CQ_CQRESIZE) |
+		 LS_64(pbl_chunk_size, I40IW_CQPSQ_CQ_LPBLSIZE) |
+		 LS_64(check_overflow, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
+		 LS_64(virtual_map, I40IW_CQPSQ_CQ_VIRTMAP) |
+		 LS_64(cq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+		 LS_64(ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
+		 LS_64(cq->tph_en, I40IW_CQPSQ_TPHEN) |
+		 LS_64(cq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CQ_MODIFY WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_qp_init - initialize qp
+ * @qp: sc qp
+ * @info: initialization qp info
+ */
+static enum i40iw_status_code i40iw_sc_qp_init(struct i40iw_sc_qp *qp,
+					       struct i40iw_qp_init_info *info)
+{
+	u32 __iomem *wqe_alloc_reg = NULL;
+	enum i40iw_status_code ret_code;
+	u32 pble_obj_cnt;
+	u8 wqe_size;
+	u32 offset;
+
+	qp->dev = info->pd->dev;
+	qp->sq_pa = info->sq_pa;
+	qp->rq_pa = info->rq_pa;
+	qp->hw_host_ctx_pa = info->host_ctx_pa;
+	qp->q2_pa = info->q2_pa;
+	qp->shadow_area_pa = info->shadow_area_pa;
+
+	qp->q2_buf = info->q2;
+	qp->pd = info->pd;
+	qp->hw_host_ctx = info->host_ctx;
+	offset = (qp->pd->dev->is_pf) ? I40E_PFPE_WQEALLOC : I40E_VFPE_WQEALLOC1;
+	if (i40iw_get_hw_addr(qp->pd->dev))
+		wqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(qp->pd->dev) +
+					      offset);
+
+	info->qp_uk_init_info.wqe_alloc_reg = wqe_alloc_reg;
+	ret_code = i40iw_qp_uk_init(&qp->qp_uk, &info->qp_uk_init_info);
+	if (ret_code)
+		return ret_code;
+	qp->virtual_map = info->virtual_map;
+
+	pble_obj_cnt = info->pd->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+	if ((info->virtual_map && (info->sq_pa >= pble_obj_cnt)) ||
+	    (info->virtual_map && (info->rq_pa >= pble_obj_cnt)))
+		return I40IW_ERR_INVALID_PBLE_INDEX;
+
+	qp->llp_stream_handle = (void *)(-1);
+	qp->qp_type = (info->type) ? info->type : I40IW_QP_TYPE_IWARP;
+
+	qp->hw_sq_size = i40iw_get_encoded_wqe_size(qp->qp_uk.sq_ring.size,
+						    false);
+	i40iw_debug(qp->dev, I40IW_DEBUG_WQE, "%s: hw_sq_size[%04d] sq_ring.size[%04d]\n",
+		    __func__, qp->hw_sq_size, qp->qp_uk.sq_ring.size);
+	ret_code = i40iw_fragcnt_to_wqesize_rq(qp->qp_uk.max_rq_frag_cnt,
+					       &wqe_size);
+	if (ret_code)
+		return ret_code;
+	qp->hw_rq_size = i40iw_get_encoded_wqe_size(qp->qp_uk.rq_size *
+				(wqe_size / I40IW_QP_WQE_MIN_SIZE), false);
+	i40iw_debug(qp->dev, I40IW_DEBUG_WQE,
+		    "%s: hw_rq_size[%04d] qp_uk.rq_size[%04d] wqe_size[%04d]\n",
+		    __func__, qp->hw_rq_size, qp->qp_uk.rq_size, wqe_size);
+	qp->sq_tph_val = info->sq_tph_val;
+	qp->rq_tph_val = info->rq_tph_val;
+	qp->sq_tph_en = info->sq_tph_en;
+	qp->rq_tph_en = info->rq_tph_en;
+	qp->rcv_tph_en = info->rcv_tph_en;
+	qp->xmit_tph_en = info->xmit_tph_en;
+	qp->qs_handle = qp->pd->dev->qs_handle;
+	qp->exception_lan_queue = qp->pd->dev->exception_lan_queue;
+
+	return 0;
+}
+
+/**
+ * i40iw_sc_qp_create - create qp
+ * @qp: sc qp
+ * @info: qp create info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_qp_create(
+				struct i40iw_sc_qp *qp,
+				struct i40iw_create_qp_info *info,
+				u64 scratch,
+				bool post_sq)
+{
+	struct i40iw_sc_cqp *cqp;
+	u64 *wqe;
+	u64 header;
+
+	if ((qp->qp_uk.qp_id < I40IW_MIN_IW_QP_ID) ||
+	    (qp->qp_uk.qp_id > I40IW_MAX_IW_QP_ID))
+		return I40IW_ERR_INVALID_QP_ID;
+
+	cqp = qp->pd->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
+
+	set_64bit_val(wqe, 40, qp->shadow_area_pa);
+
+	header = qp->qp_uk.qp_id |
+		 LS_64(I40IW_CQP_OP_CREATE_QP, I40IW_CQPSQ_OPCODE) |
+		 LS_64((info->ord_valid ? 1 : 0), I40IW_CQPSQ_QP_ORDVALID) |
+		 LS_64(info->tcp_ctx_valid, I40IW_CQPSQ_QP_TOECTXVALID) |
+		 LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) |
+		 LS_64(qp->virtual_map, I40IW_CQPSQ_QP_VQ) |
+		 LS_64(info->cq_num_valid, I40IW_CQPSQ_QP_CQNUMVALID) |
+		 LS_64(info->static_rsrc, I40IW_CQPSQ_QP_STATRSRC) |
+		 LS_64(info->arp_cache_idx_valid, I40IW_CQPSQ_QP_ARPTABIDXVALID) |
+		 LS_64(info->next_iwarp_state, I40IW_CQPSQ_QP_NEXTIWSTATE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QP_CREATE WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_qp_modify - modify qp cqp wqe
+ * @qp: sc qp
+ * @info: modify qp info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_qp_modify(
+				struct i40iw_sc_qp *qp,
+				struct i40iw_modify_qp_info *info,
+				u64 scratch,
+				bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+	u8 term_actions = 0;
+	u8 term_len = 0;
+
+	cqp = qp->pd->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	if (info->next_iwarp_state == I40IW_QP_STATE_TERMINATE) {
+		if (info->dont_send_fin)
+			term_actions += I40IWQP_TERM_SEND_TERM_ONLY;
+		if (info->dont_send_term)
+			term_actions += I40IWQP_TERM_SEND_FIN_ONLY;
+		if ((term_actions == I40IWQP_TERM_SEND_TERM_AND_FIN) ||
+		    (term_actions == I40IWQP_TERM_SEND_TERM_ONLY))
+			term_len = info->termlen;
+	}
+
+	set_64bit_val(wqe,
+		      8,
+		      LS_64(info->new_mss, I40IW_CQPSQ_QP_NEWMSS) |
+		      LS_64(term_len, I40IW_CQPSQ_QP_TERMLEN));
+
+	set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
+	set_64bit_val(wqe, 40, qp->shadow_area_pa);
+
+	header = qp->qp_uk.qp_id |
+		 LS_64(I40IW_CQP_OP_MODIFY_QP, I40IW_CQPSQ_OPCODE) |
+		 LS_64(info->ord_valid, I40IW_CQPSQ_QP_ORDVALID) |
+		 LS_64(info->tcp_ctx_valid, I40IW_CQPSQ_QP_TOECTXVALID) |
+		 LS_64(info->cached_var_valid, I40IW_CQPSQ_QP_CACHEDVARVALID) |
+		 LS_64(qp->virtual_map, I40IW_CQPSQ_QP_VQ) |
+		 LS_64(info->cq_num_valid, I40IW_CQPSQ_QP_CQNUMVALID) |
+		 LS_64(info->force_loopback, I40IW_CQPSQ_QP_FORCELOOPBACK) |
+		 LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) |
+		 LS_64(info->mss_change, I40IW_CQPSQ_QP_MSSCHANGE) |
+		 LS_64(info->static_rsrc, I40IW_CQPSQ_QP_STATRSRC) |
+		 LS_64(info->remove_hash_idx, I40IW_CQPSQ_QP_REMOVEHASHENTRY) |
+		 LS_64(term_actions, I40IW_CQPSQ_QP_TERMACT) |
+		 LS_64(info->reset_tcp_conn, I40IW_CQPSQ_QP_RESETCON) |
+		 LS_64(info->arp_cache_idx_valid, I40IW_CQPSQ_QP_ARPTABIDXVALID) |
+		 LS_64(info->next_iwarp_state, I40IW_CQPSQ_QP_NEXTIWSTATE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QP_MODIFY WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_qp_destroy - cqp destroy qp
+ * @qp: sc qp
+ * @scratch: u64 saved to be used during cqp completion
+ * @remove_hash_idx: flag if to remove hash idx
+ * @ignore_mw_bnd: memory window bind flag
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_qp_destroy(
+					struct i40iw_sc_qp *qp,
+					u64 scratch,
+					bool remove_hash_idx,
+					bool ignore_mw_bnd,
+					bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+
+	cqp = qp->pd->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
+	set_64bit_val(wqe, 40, qp->shadow_area_pa);
+
+	header = qp->qp_uk.qp_id |
+		 LS_64(I40IW_CQP_OP_DESTROY_QP, I40IW_CQPSQ_OPCODE) |
+		 LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) |
+		 LS_64(ignore_mw_bnd, I40IW_CQPSQ_QP_IGNOREMWBOUND) |
+		 LS_64(remove_hash_idx, I40IW_CQPSQ_QP_REMOVEHASHENTRY) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QP_DESTROY WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_qp_flush_wqes - flush qp's wqe
+ * @qp: sc qp
+ * @info: dlush information
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_qp_flush_wqes(
+				struct i40iw_sc_qp *qp,
+				struct i40iw_qp_flush_info *info,
+				u64 scratch,
+				bool post_sq)
+{
+	u64 temp = 0;
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+	bool flush_sq = false, flush_rq = false;
+
+	if (info->rq && !qp->flush_rq)
+		flush_rq = true;
+
+	if (info->sq && !qp->flush_sq)
+		flush_sq = true;
+
+	qp->flush_sq |= flush_sq;
+	qp->flush_rq |= flush_rq;
+	if (!flush_sq && !flush_rq) {
+		if (info->ae_code != I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR)
+			return 0;
+	}
+
+	cqp = qp->pd->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	if (info->userflushcode) {
+		if (flush_rq) {
+			temp |= LS_64(info->rq_minor_code, I40IW_CQPSQ_FWQE_RQMNERR) |
+				LS_64(info->rq_major_code, I40IW_CQPSQ_FWQE_RQMJERR);
+		}
+		if (flush_sq) {
+			temp |= LS_64(info->sq_minor_code, I40IW_CQPSQ_FWQE_SQMNERR) |
+				LS_64(info->sq_major_code, I40IW_CQPSQ_FWQE_SQMJERR);
+		}
+	}
+	set_64bit_val(wqe, 16, temp);
+
+	temp = (info->generate_ae) ?
+		info->ae_code | LS_64(info->ae_source, I40IW_CQPSQ_FWQE_AESOURCE) : 0;
+
+	set_64bit_val(wqe, 8, temp);
+
+	header = qp->qp_uk.qp_id |
+		 LS_64(I40IW_CQP_OP_FLUSH_WQES, I40IW_CQPSQ_OPCODE) |
+		 LS_64(info->generate_ae, I40IW_CQPSQ_FWQE_GENERATE_AE) |
+		 LS_64(info->userflushcode, I40IW_CQPSQ_FWQE_USERFLCODE) |
+		 LS_64(flush_sq, I40IW_CQPSQ_FWQE_FLUSHSQ) |
+		 LS_64(flush_rq, I40IW_CQPSQ_FWQE_FLUSHRQ) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QP_FLUSH WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_qp_upload_context - upload qp's context
+ * @dev: sc device struct
+ * @info: upload context info ptr for return
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_qp_upload_context(
+					struct i40iw_sc_dev *dev,
+					struct i40iw_upload_context_info *info,
+					u64 scratch,
+					bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+
+	cqp = dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 16, info->buf_pa);
+
+	header = LS_64(info->qp_id, I40IW_CQPSQ_UCTX_QPID) |
+		 LS_64(I40IW_CQP_OP_UPLOAD_CONTEXT, I40IW_CQPSQ_OPCODE) |
+		 LS_64(info->qp_type, I40IW_CQPSQ_UCTX_QPTYPE) |
+		 LS_64(info->raw_format, I40IW_CQPSQ_UCTX_RAWFORMAT) |
+		 LS_64(info->freeze_qp, I40IW_CQPSQ_UCTX_FREEZEQP) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "QP_UPLOAD_CTX WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_qp_setctx - set qp's context
+ * @qp: sc qp
+ * @qp_ctx: context ptr
+ * @info: ctx info
+ */
+static enum i40iw_status_code i40iw_sc_qp_setctx(
+				struct i40iw_sc_qp *qp,
+				u64 *qp_ctx,
+				struct i40iw_qp_host_ctx_info *info)
+{
+	struct i40iwarp_offload_info *iw;
+	struct i40iw_tcp_offload_info *tcp;
+	u64 qw0, qw3, qw7 = 0;
+
+	iw = info->iwarp_info;
+	tcp = info->tcp_info;
+	qw0 = LS_64(qp->qp_uk.rq_wqe_size, I40IWQPC_RQWQESIZE) |
+	      LS_64(info->err_rq_idx_valid, I40IWQPC_ERR_RQ_IDX_VALID) |
+	      LS_64(qp->rcv_tph_en, I40IWQPC_RCVTPHEN) |
+	      LS_64(qp->xmit_tph_en, I40IWQPC_XMITTPHEN) |
+	      LS_64(qp->rq_tph_en, I40IWQPC_RQTPHEN) |
+	      LS_64(qp->sq_tph_en, I40IWQPC_SQTPHEN) |
+	      LS_64(info->push_idx, I40IWQPC_PPIDX) |
+	      LS_64(info->push_mode_en, I40IWQPC_PMENA);
+
+	set_64bit_val(qp_ctx, 8, qp->sq_pa);
+	set_64bit_val(qp_ctx, 16, qp->rq_pa);
+
+	qw3 = LS_64(qp->src_mac_addr_idx, I40IWQPC_SRCMACADDRIDX) |
+	      LS_64(qp->hw_rq_size, I40IWQPC_RQSIZE) |
+	      LS_64(qp->hw_sq_size, I40IWQPC_SQSIZE);
+
+	set_64bit_val(qp_ctx,
+		      128,
+		      LS_64(info->err_rq_idx, I40IWQPC_ERR_RQ_IDX));
+
+	set_64bit_val(qp_ctx,
+		      136,
+		      LS_64(info->send_cq_num, I40IWQPC_TXCQNUM) |
+		      LS_64(info->rcv_cq_num, I40IWQPC_RXCQNUM));
+
+	set_64bit_val(qp_ctx,
+		      168,
+		      LS_64(info->qp_compl_ctx, I40IWQPC_QPCOMPCTX));
+	set_64bit_val(qp_ctx,
+		      176,
+		      LS_64(qp->sq_tph_val, I40IWQPC_SQTPHVAL) |
+		      LS_64(qp->rq_tph_val, I40IWQPC_RQTPHVAL) |
+		      LS_64(qp->qs_handle, I40IWQPC_QSHANDLE) |
+		      LS_64(qp->exception_lan_queue, I40IWQPC_EXCEPTION_LAN_QUEUE));
+
+	if (info->iwarp_info_valid) {
+		qw0 |= LS_64(iw->ddp_ver, I40IWQPC_DDP_VER) |
+		       LS_64(iw->rdmap_ver, I40IWQPC_RDMAP_VER);
+
+		qw7 |= LS_64(iw->pd_id, I40IWQPC_PDIDX);
+		set_64bit_val(qp_ctx, 144, qp->q2_pa);
+		set_64bit_val(qp_ctx,
+			      152,
+			      LS_64(iw->last_byte_sent, I40IWQPC_LASTBYTESENT));
+
+		/*
+		* Hard-code IRD_SIZE to hw-limit, 128, in qpctx, i.e matching an
+		*advertisable IRD of 64
+		*/
+		iw->ird_size = I40IW_QPCTX_ENCD_MAXIRD;
+		set_64bit_val(qp_ctx,
+			      160,
+			      LS_64(iw->ord_size, I40IWQPC_ORDSIZE) |
+			      LS_64(iw->ird_size, I40IWQPC_IRDSIZE) |
+			      LS_64(iw->wr_rdresp_en, I40IWQPC_WRRDRSPOK) |
+			      LS_64(iw->rd_enable, I40IWQPC_RDOK) |
+			      LS_64(iw->snd_mark_en, I40IWQPC_SNDMARKERS) |
+			      LS_64(iw->bind_en, I40IWQPC_BINDEN) |
+			      LS_64(iw->fast_reg_en, I40IWQPC_FASTREGEN) |
+			      LS_64(iw->priv_mode_en, I40IWQPC_PRIVEN) |
+			      LS_64(1, I40IWQPC_IWARPMODE) |
+			      LS_64(iw->rcv_mark_en, I40IWQPC_RCVMARKERS) |
+			      LS_64(iw->align_hdrs, I40IWQPC_ALIGNHDRS) |
+			      LS_64(iw->rcv_no_mpa_crc, I40IWQPC_RCVNOMPACRC) |
+			      LS_64(iw->rcv_mark_offset, I40IWQPC_RCVMARKOFFSET) |
+			      LS_64(iw->snd_mark_offset, I40IWQPC_SNDMARKOFFSET));
+	}
+	if (info->tcp_info_valid) {
+		qw0 |= LS_64(tcp->ipv4, I40IWQPC_IPV4) |
+		       LS_64(tcp->no_nagle, I40IWQPC_NONAGLE) |
+		       LS_64(tcp->insert_vlan_tag, I40IWQPC_INSERTVLANTAG) |
+		       LS_64(tcp->time_stamp, I40IWQPC_TIMESTAMP) |
+		       LS_64(tcp->cwnd_inc_limit, I40IWQPC_LIMIT) |
+		       LS_64(tcp->drop_ooo_seg, I40IWQPC_DROPOOOSEG) |
+		       LS_64(tcp->dup_ack_thresh, I40IWQPC_DUPACK_THRESH);
+
+		qw3 |= LS_64(tcp->ttl, I40IWQPC_TTL) |
+		       LS_64(tcp->src_mac_addr_idx, I40IWQPC_SRCMACADDRIDX) |
+		       LS_64(tcp->avoid_stretch_ack, I40IWQPC_AVOIDSTRETCHACK) |
+		       LS_64(tcp->tos, I40IWQPC_TOS) |
+		       LS_64(tcp->src_port, I40IWQPC_SRCPORTNUM) |
+		       LS_64(tcp->dst_port, I40IWQPC_DESTPORTNUM);
+
+		qp->src_mac_addr_idx = tcp->src_mac_addr_idx;
+		set_64bit_val(qp_ctx,
+			      32,
+			      LS_64(tcp->dest_ip_addr2, I40IWQPC_DESTIPADDR2) |
+			      LS_64(tcp->dest_ip_addr3, I40IWQPC_DESTIPADDR3));
+
+		set_64bit_val(qp_ctx,
+			      40,
+			      LS_64(tcp->dest_ip_addr0, I40IWQPC_DESTIPADDR0) |
+			      LS_64(tcp->dest_ip_addr1, I40IWQPC_DESTIPADDR1));
+
+		set_64bit_val(qp_ctx,
+			      48,
+			      LS_64(tcp->snd_mss, I40IWQPC_SNDMSS) |
+				LS_64(tcp->vlan_tag, I40IWQPC_VLANTAG) |
+				LS_64(tcp->arp_idx, I40IWQPC_ARPIDX));
+
+		qw7 |= LS_64(tcp->flow_label, I40IWQPC_FLOWLABEL) |
+		       LS_64(tcp->wscale, I40IWQPC_WSCALE) |
+		       LS_64(tcp->ignore_tcp_opt, I40IWQPC_IGNORE_TCP_OPT) |
+		       LS_64(tcp->ignore_tcp_uns_opt, I40IWQPC_IGNORE_TCP_UNS_OPT) |
+		       LS_64(tcp->tcp_state, I40IWQPC_TCPSTATE) |
+		       LS_64(tcp->rcv_wscale, I40IWQPC_RCVSCALE) |
+		       LS_64(tcp->snd_wscale, I40IWQPC_SNDSCALE);
+
+		set_64bit_val(qp_ctx,
+			      72,
+			      LS_64(tcp->time_stamp_recent, I40IWQPC_TIMESTAMP_RECENT) |
+			      LS_64(tcp->time_stamp_age, I40IWQPC_TIMESTAMP_AGE));
+		set_64bit_val(qp_ctx,
+			      80,
+			      LS_64(tcp->snd_nxt, I40IWQPC_SNDNXT) |
+			      LS_64(tcp->snd_wnd, I40IWQPC_SNDWND));
+
+		set_64bit_val(qp_ctx,
+			      88,
+			      LS_64(tcp->rcv_nxt, I40IWQPC_RCVNXT) |
+			      LS_64(tcp->rcv_wnd, I40IWQPC_RCVWND));
+		set_64bit_val(qp_ctx,
+			      96,
+			      LS_64(tcp->snd_max, I40IWQPC_SNDMAX) |
+			      LS_64(tcp->snd_una, I40IWQPC_SNDUNA));
+		set_64bit_val(qp_ctx,
+			      104,
+			      LS_64(tcp->srtt, I40IWQPC_SRTT) |
+			      LS_64(tcp->rtt_var, I40IWQPC_RTTVAR));
+		set_64bit_val(qp_ctx,
+			      112,
+			      LS_64(tcp->ss_thresh, I40IWQPC_SSTHRESH) |
+			      LS_64(tcp->cwnd, I40IWQPC_CWND));
+		set_64bit_val(qp_ctx,
+			      120,
+			      LS_64(tcp->snd_wl1, I40IWQPC_SNDWL1) |
+			      LS_64(tcp->snd_wl2, I40IWQPC_SNDWL2));
+		set_64bit_val(qp_ctx,
+			      128,
+			      LS_64(tcp->max_snd_window, I40IWQPC_MAXSNDWND) |
+			      LS_64(tcp->rexmit_thresh, I40IWQPC_REXMIT_THRESH));
+		set_64bit_val(qp_ctx,
+			      184,
+			      LS_64(tcp->local_ipaddr3, I40IWQPC_LOCAL_IPADDR3) |
+			      LS_64(tcp->local_ipaddr2, I40IWQPC_LOCAL_IPADDR2));
+		set_64bit_val(qp_ctx,
+			      192,
+			      LS_64(tcp->local_ipaddr1, I40IWQPC_LOCAL_IPADDR1) |
+			      LS_64(tcp->local_ipaddr0, I40IWQPC_LOCAL_IPADDR0));
+	}
+
+	set_64bit_val(qp_ctx, 0, qw0);
+	set_64bit_val(qp_ctx, 24, qw3);
+	set_64bit_val(qp_ctx, 56, qw7);
+
+	i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "QP_HOST)CTX WQE",
+			qp_ctx, I40IW_QP_CTX_SIZE);
+	return 0;
+}
+
+/**
+ * i40iw_sc_alloc_stag - mr stag alloc
+ * @dev: sc device struct
+ * @info: stag info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_alloc_stag(
+				struct i40iw_sc_dev *dev,
+				struct i40iw_allocate_stag_info *info,
+				u64 scratch,
+				bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+
+	cqp = dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe,
+		      8,
+		      LS_64(info->pd_id, I40IW_CQPSQ_STAG_PDID) |
+		      LS_64(info->total_len, I40IW_CQPSQ_STAG_STAGLEN));
+	set_64bit_val(wqe,
+		      16,
+		      LS_64(info->stag_idx, I40IW_CQPSQ_STAG_IDX));
+	set_64bit_val(wqe,
+		      40,
+		      LS_64(info->hmc_fcn_index, I40IW_CQPSQ_STAG_HMCFNIDX));
+
+	header = LS_64(I40IW_CQP_OP_ALLOC_STAG, I40IW_CQPSQ_OPCODE) |
+		 LS_64(1, I40IW_CQPSQ_STAG_MR) |
+		 LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
+		 LS_64(info->chunk_size, I40IW_CQPSQ_STAG_LPBLSIZE) |
+		 LS_64(info->page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
+		 LS_64(info->remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
+		 LS_64(info->use_hmc_fcn_index, I40IW_CQPSQ_STAG_USEHMCFNIDX) |
+		 LS_64(info->use_pf_rid, I40IW_CQPSQ_STAG_USEPFRID) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "ALLOC_STAG WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_mr_reg_non_shared - non-shared mr registration
+ * @dev: sc device struct
+ * @info: mr info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_mr_reg_non_shared(
+				struct i40iw_sc_dev *dev,
+				struct i40iw_reg_ns_stag_info *info,
+				u64 scratch,
+				bool post_sq)
+{
+	u64 *wqe;
+	u64 temp;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+	u32 pble_obj_cnt;
+	bool remote_access;
+	u8 addr_type;
+
+	if (info->access_rights & (I40IW_ACCESS_FLAGS_REMOTEREAD_ONLY |
+				   I40IW_ACCESS_FLAGS_REMOTEWRITE_ONLY))
+		remote_access = true;
+	else
+		remote_access = false;
+
+	pble_obj_cnt = dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+	if (info->chunk_size && (info->first_pm_pbl_index >= pble_obj_cnt))
+		return I40IW_ERR_INVALID_PBLE_INDEX;
+
+	cqp = dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	temp = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? (uintptr_t)info->va : info->fbo;
+	set_64bit_val(wqe, 0, temp);
+
+	set_64bit_val(wqe,
+		      8,
+		      LS_64(info->total_len, I40IW_CQPSQ_STAG_STAGLEN) |
+		      LS_64(info->pd_id, I40IW_CQPSQ_STAG_PDID));
+
+	set_64bit_val(wqe,
+		      16,
+		      LS_64(info->stag_key, I40IW_CQPSQ_STAG_KEY) |
+		      LS_64(info->stag_idx, I40IW_CQPSQ_STAG_IDX));
+	if (!info->chunk_size) {
+		set_64bit_val(wqe, 32, info->reg_addr_pa);
+		set_64bit_val(wqe, 48, 0);
+	} else {
+		set_64bit_val(wqe, 32, 0);
+		set_64bit_val(wqe, 48, info->first_pm_pbl_index);
+	}
+	set_64bit_val(wqe, 40, info->hmc_fcn_index);
+	set_64bit_val(wqe, 56, 0);
+
+	addr_type = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? 1 : 0;
+	header = LS_64(I40IW_CQP_OP_REG_MR, I40IW_CQPSQ_OPCODE) |
+		 LS_64(1, I40IW_CQPSQ_STAG_MR) |
+		 LS_64(info->chunk_size, I40IW_CQPSQ_STAG_LPBLSIZE) |
+		 LS_64(info->page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
+		 LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
+		 LS_64(remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
+		 LS_64(addr_type, I40IW_CQPSQ_STAG_VABASEDTO) |
+		 LS_64(info->use_hmc_fcn_index, I40IW_CQPSQ_STAG_USEHMCFNIDX) |
+		 LS_64(info->use_pf_rid, I40IW_CQPSQ_STAG_USEPFRID) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "MR_REG_NS WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_mr_reg_shared - registered shared memory region
+ * @dev: sc device struct
+ * @info: info for shared memory registeration
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_mr_reg_shared(
+					struct i40iw_sc_dev *dev,
+					struct i40iw_register_shared_stag *info,
+					u64 scratch,
+					bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 temp, va64, fbo, header;
+	u32 va32;
+	bool remote_access;
+	u8 addr_type;
+
+	if (info->access_rights & (I40IW_ACCESS_FLAGS_REMOTEREAD_ONLY |
+				   I40IW_ACCESS_FLAGS_REMOTEWRITE_ONLY))
+		remote_access = true;
+	else
+		remote_access = false;
+	cqp = dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	va64 = (uintptr_t)(info->va);
+	va32 = (u32)(va64 & 0x00000000FFFFFFFF);
+	fbo = (u64)(va32 & (4096 - 1));
+
+	set_64bit_val(wqe,
+		      0,
+		      (info->addr_type == I40IW_ADDR_TYPE_VA_BASED ? (uintptr_t)info->va : fbo));
+
+	set_64bit_val(wqe,
+		      8,
+		      LS_64(info->pd_id, I40IW_CQPSQ_STAG_PDID));
+	temp = LS_64(info->new_stag_key, I40IW_CQPSQ_STAG_KEY) |
+	       LS_64(info->new_stag_idx, I40IW_CQPSQ_STAG_IDX) |
+	       LS_64(info->parent_stag_idx, I40IW_CQPSQ_STAG_PARENTSTAGIDX);
+	set_64bit_val(wqe, 16, temp);
+
+	addr_type = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? 1 : 0;
+	header = LS_64(I40IW_CQP_OP_REG_SMR, I40IW_CQPSQ_OPCODE) |
+		 LS_64(1, I40IW_CQPSQ_STAG_MR) |
+		 LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
+		 LS_64(remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
+		 LS_64(addr_type, I40IW_CQPSQ_STAG_VABASEDTO) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "MR_REG_SHARED WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_dealloc_stag - deallocate stag
+ * @dev: sc device struct
+ * @info: dealloc stag info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_dealloc_stag(
+					struct i40iw_sc_dev *dev,
+					struct i40iw_dealloc_stag_info *info,
+					u64 scratch,
+					bool post_sq)
+{
+	u64 header;
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+
+	cqp = dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe,
+		      8,
+		      LS_64(info->pd_id, I40IW_CQPSQ_STAG_PDID));
+	set_64bit_val(wqe,
+		      16,
+		      LS_64(info->stag_idx, I40IW_CQPSQ_STAG_IDX));
+
+	header = LS_64(I40IW_CQP_OP_DEALLOC_STAG, I40IW_CQPSQ_OPCODE) |
+		 LS_64(info->mr, I40IW_CQPSQ_STAG_MR) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "DEALLOC_STAG WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_query_stag - query hardware for stag
+ * @dev: sc device struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @stag_index: stag index for query
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_query_stag(struct i40iw_sc_dev *dev,
+						  u64 scratch,
+						  u32 stag_index,
+						  bool post_sq)
+{
+	u64 header;
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+
+	cqp = dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe,
+		      16,
+		      LS_64(stag_index, I40IW_CQPSQ_QUERYSTAG_IDX));
+
+	header = LS_64(I40IW_CQP_OP_QUERY_STAG, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "QUERY_STAG WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_mw_alloc - mw allocate
+ * @dev: sc device struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @mw_stag_index:stag index
+ * @pd_id: pd is for this mw
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_mw_alloc(
+					struct i40iw_sc_dev *dev,
+					u64 scratch,
+					u32 mw_stag_index,
+					u16 pd_id,
+					bool post_sq)
+{
+	u64 header;
+	struct i40iw_sc_cqp *cqp;
+	u64 *wqe;
+
+	cqp = dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 8, LS_64(pd_id, I40IW_CQPSQ_STAG_PDID));
+	set_64bit_val(wqe,
+		      16,
+		      LS_64(mw_stag_index, I40IW_CQPSQ_STAG_IDX));
+
+	header = LS_64(I40IW_CQP_OP_ALLOC_STAG, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "MW_ALLOC WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_send_lsmm - send last streaming mode message
+ * @qp: sc qp struct
+ * @lsmm_buf: buffer with lsmm message
+ * @size: size of lsmm buffer
+ * @stag: stag of lsmm buffer
+ */
+static void i40iw_sc_send_lsmm(struct i40iw_sc_qp *qp,
+			       void *lsmm_buf,
+			       u32 size,
+			       i40iw_stag stag)
+{
+	u64 *wqe;
+	u64 header;
+	struct i40iw_qp_uk *qp_uk;
+
+	qp_uk = &qp->qp_uk;
+	wqe = qp_uk->sq_base->elem;
+
+	set_64bit_val(wqe, 0, (uintptr_t)lsmm_buf);
+
+	set_64bit_val(wqe, 8, (size | LS_64(stag, I40IWQPSQ_FRAG_STAG)));
+
+	set_64bit_val(wqe, 16, 0);
+
+	header = LS_64(I40IWQP_OP_RDMA_SEND, I40IWQPSQ_OPCODE) |
+		 LS_64(1, I40IWQPSQ_STREAMMODE) |
+		 LS_64(1, I40IWQPSQ_WAITFORRCVPDU) |
+		 LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(qp->dev, I40IW_DEBUG_QP, "SEND_LSMM WQE",
+			wqe, I40IW_QP_WQE_MIN_SIZE);
+}
+
+/**
+ * i40iw_sc_send_lsmm_nostag - for privilege qp
+ * @qp: sc qp struct
+ * @lsmm_buf: buffer with lsmm message
+ * @size: size of lsmm buffer
+ */
+static void i40iw_sc_send_lsmm_nostag(struct i40iw_sc_qp *qp,
+				      void *lsmm_buf,
+				      u32 size)
+{
+	u64 *wqe;
+	u64 header;
+	struct i40iw_qp_uk *qp_uk;
+
+	qp_uk = &qp->qp_uk;
+	wqe = qp_uk->sq_base->elem;
+
+	set_64bit_val(wqe, 0, (uintptr_t)lsmm_buf);
+
+	set_64bit_val(wqe, 8, size);
+
+	set_64bit_val(wqe, 16, 0);
+
+	header = LS_64(I40IWQP_OP_RDMA_SEND, I40IWQPSQ_OPCODE) |
+		 LS_64(1, I40IWQPSQ_STREAMMODE) |
+		 LS_64(1, I40IWQPSQ_WAITFORRCVPDU) |
+		 LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "SEND_LSMM_NOSTAG WQE",
+			wqe, I40IW_QP_WQE_MIN_SIZE);
+}
+
+/**
+ * i40iw_sc_send_rtt - send last read0 or write0
+ * @qp: sc qp struct
+ * @read: Do read0 or write0
+ */
+static void i40iw_sc_send_rtt(struct i40iw_sc_qp *qp, bool read)
+{
+	u64 *wqe;
+	u64 header;
+	struct i40iw_qp_uk *qp_uk;
+
+	qp_uk = &qp->qp_uk;
+	wqe = qp_uk->sq_base->elem;
+
+	set_64bit_val(wqe, 0, 0);
+	set_64bit_val(wqe, 8, 0);
+	set_64bit_val(wqe, 16, 0);
+	if (read) {
+		header = LS_64(0x1234, I40IWQPSQ_REMSTAG) |
+			 LS_64(I40IWQP_OP_RDMA_READ, I40IWQPSQ_OPCODE) |
+			 LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+		set_64bit_val(wqe, 8, ((u64)0xabcd << 32));
+	} else {
+		header = LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) |
+			 LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+	}
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "RTR WQE",
+			wqe, I40IW_QP_WQE_MIN_SIZE);
+}
+
+/**
+ * i40iw_sc_post_wqe0 - send wqe with opcode
+ * @qp: sc qp struct
+ * @opcode: opcode to use for wqe0
+ */
+static enum i40iw_status_code i40iw_sc_post_wqe0(struct i40iw_sc_qp *qp, u8 opcode)
+{
+	u64 *wqe;
+	u64 header;
+	struct i40iw_qp_uk *qp_uk;
+
+	qp_uk = &qp->qp_uk;
+	wqe = qp_uk->sq_base->elem;
+
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+	switch (opcode) {
+	case I40IWQP_OP_NOP:
+		set_64bit_val(wqe, 0, 0);
+		set_64bit_val(wqe, 8, 0);
+		set_64bit_val(wqe, 16, 0);
+		header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
+			 LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+
+		i40iw_insert_wqe_hdr(wqe, header);
+		break;
+	case I40IWQP_OP_RDMA_SEND:
+		set_64bit_val(wqe, 0, 0);
+		set_64bit_val(wqe, 8, 0);
+		set_64bit_val(wqe, 16, 0);
+		header = LS_64(I40IWQP_OP_RDMA_SEND, I40IWQPSQ_OPCODE) |
+			 LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID) |
+			 LS_64(1, I40IWQPSQ_STREAMMODE) |
+			 LS_64(1, I40IWQPSQ_WAITFORRCVPDU);
+
+		i40iw_insert_wqe_hdr(wqe, header);
+		break;
+	default:
+		i40iw_debug(qp->dev, I40IW_DEBUG_QP, "%s: Invalid WQE zero opcode\n",
+			    __func__);
+		break;
+	}
+	return 0;
+}
+
+/**
+ * i40iw_sc_init_iw_hmc() - queries fpm values using cqp and populates hmc_info
+ * @dev : ptr to i40iw_dev struct
+ * @hmc_fn_id: hmc function id
+ */
+enum i40iw_status_code i40iw_sc_init_iw_hmc(struct i40iw_sc_dev *dev, u8 hmc_fn_id)
+{
+	struct i40iw_hmc_info *hmc_info;
+	struct i40iw_dma_mem query_fpm_mem;
+	struct i40iw_virt_mem virt_mem;
+	struct i40iw_vfdev *vf_dev = NULL;
+	u32 mem_size;
+	enum i40iw_status_code ret_code = 0;
+	bool poll_registers = true;
+	u16 iw_vf_idx;
+	u8 wait_type;
+
+	if (hmc_fn_id >= I40IW_MAX_VF_FPM_ID ||
+	    (dev->hmc_fn_id != hmc_fn_id && hmc_fn_id < I40IW_FIRST_VF_FPM_ID))
+		return I40IW_ERR_INVALID_HMCFN_ID;
+
+	i40iw_debug(dev, I40IW_DEBUG_HMC, "hmc_fn_id %u, dev->hmc_fn_id %u\n", hmc_fn_id,
+		    dev->hmc_fn_id);
+	if (hmc_fn_id == dev->hmc_fn_id) {
+		hmc_info = dev->hmc_info;
+		query_fpm_mem.pa = dev->fpm_query_buf_pa;
+		query_fpm_mem.va = dev->fpm_query_buf;
+	} else {
+		vf_dev = i40iw_vfdev_from_fpm(dev, hmc_fn_id);
+		if (!vf_dev)
+			return I40IW_ERR_INVALID_VF_ID;
+
+		hmc_info = &vf_dev->hmc_info;
+		iw_vf_idx = vf_dev->iw_vf_idx;
+		i40iw_debug(dev, I40IW_DEBUG_HMC, "vf_dev %p, hmc_info %p, hmc_obj %p\n", vf_dev,
+			    hmc_info, hmc_info->hmc_obj);
+		if (!vf_dev->fpm_query_buf) {
+			if (!dev->vf_fpm_query_buf[iw_vf_idx].va) {
+				ret_code = i40iw_alloc_query_fpm_buf(dev,
+								     &dev->vf_fpm_query_buf[iw_vf_idx]);
+				if (ret_code)
+					return ret_code;
+			}
+			vf_dev->fpm_query_buf = dev->vf_fpm_query_buf[iw_vf_idx].va;
+			vf_dev->fpm_query_buf_pa = dev->vf_fpm_query_buf[iw_vf_idx].pa;
+		}
+		query_fpm_mem.pa = vf_dev->fpm_query_buf_pa;
+		query_fpm_mem.va = vf_dev->fpm_query_buf;
+		/**
+		 * It is HARDWARE specific:
+		 * this call is done by PF for VF and
+		 * i40iw_sc_query_fpm_values needs ccq poll
+		 * because PF ccq is already created.
+		 */
+		poll_registers = false;
+	}
+
+	hmc_info->hmc_fn_id = hmc_fn_id;
+
+	if (hmc_fn_id != dev->hmc_fn_id) {
+		ret_code =
+			i40iw_cqp_query_fpm_values_cmd(dev, &query_fpm_mem, hmc_fn_id);
+	} else {
+		wait_type = poll_registers ? (u8)I40IW_CQP_WAIT_POLL_REGS :
+			    (u8)I40IW_CQP_WAIT_POLL_CQ;
+
+		ret_code = i40iw_sc_query_fpm_values(
+					dev->cqp,
+					0,
+					hmc_info->hmc_fn_id,
+					&query_fpm_mem,
+					true,
+					wait_type);
+	}
+	if (ret_code)
+		return ret_code;
+
+	/* parse the fpm_query_buf and fill hmc obj info */
+	ret_code =
+		i40iw_sc_parse_fpm_query_buf((u64 *)query_fpm_mem.va,
+					     hmc_info,
+					     &dev->hmc_fpm_misc);
+	if (ret_code)
+		return ret_code;
+	i40iw_debug_buf(dev, I40IW_DEBUG_HMC, "QUERY FPM BUFFER",
+			query_fpm_mem.va, I40IW_QUERY_FPM_BUF_SIZE);
+
+	if (hmc_fn_id != dev->hmc_fn_id) {
+		i40iw_cqp_commit_fpm_values_cmd(dev, &query_fpm_mem, hmc_fn_id);
+
+		/* parse the fpm_commit_buf and fill hmc obj info */
+		i40iw_sc_parse_fpm_commit_buf((u64 *)query_fpm_mem.va, hmc_info->hmc_obj);
+		mem_size = sizeof(struct i40iw_hmc_sd_entry) *
+			   (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index);
+		ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size);
+		if (ret_code)
+			return ret_code;
+		hmc_info->sd_table.sd_entry = virt_mem.va;
+	}
+
+	/* fill size of objects which are fixed */
+	hmc_info->hmc_obj[I40IW_HMC_IW_XFFL].size = 4;
+	hmc_info->hmc_obj[I40IW_HMC_IW_Q1FL].size = 4;
+	hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].size = 8;
+	hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].size = 8192;
+	hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].max_cnt = 1;
+
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_configure_iw_fpm() - commits hmc obj cnt values using cqp command and
+ * populates fpm base address in hmc_info
+ * @dev : ptr to i40iw_dev struct
+ * @hmc_fn_id: hmc function id
+ */
+static enum i40iw_status_code i40iw_sc_configure_iw_fpm(struct i40iw_sc_dev *dev,
+							u8 hmc_fn_id)
+{
+	struct i40iw_hmc_info *hmc_info;
+	struct i40iw_hmc_obj_info *obj_info;
+	u64 *buf;
+	struct i40iw_dma_mem commit_fpm_mem;
+	u32 i, j;
+	enum i40iw_status_code ret_code = 0;
+	bool poll_registers = true;
+	u8 wait_type;
+
+	if (hmc_fn_id >= I40IW_MAX_VF_FPM_ID ||
+	    (dev->hmc_fn_id != hmc_fn_id && hmc_fn_id < I40IW_FIRST_VF_FPM_ID))
+		return I40IW_ERR_INVALID_HMCFN_ID;
+
+	if (hmc_fn_id == dev->hmc_fn_id) {
+		hmc_info = dev->hmc_info;
+	} else {
+		hmc_info = i40iw_vf_hmcinfo_from_fpm(dev, hmc_fn_id);
+		poll_registers = false;
+	}
+	if (!hmc_info)
+		return I40IW_ERR_BAD_PTR;
+
+	obj_info = hmc_info->hmc_obj;
+	buf = dev->fpm_commit_buf;
+
+	/* copy cnt values in commit buf */
+	for (i = I40IW_HMC_IW_QP, j = 0; i <= I40IW_HMC_IW_PBLE;
+	     i++, j += 8)
+		set_64bit_val(buf, j, (u64)obj_info[i].cnt);
+
+	set_64bit_val(buf, 40, 0);   /* APBVT rsvd */
+
+	commit_fpm_mem.pa = dev->fpm_commit_buf_pa;
+	commit_fpm_mem.va = dev->fpm_commit_buf;
+	wait_type = poll_registers ? (u8)I40IW_CQP_WAIT_POLL_REGS :
+			(u8)I40IW_CQP_WAIT_POLL_CQ;
+	ret_code = i40iw_sc_commit_fpm_values(
+					dev->cqp,
+					0,
+					hmc_info->hmc_fn_id,
+					&commit_fpm_mem,
+					true,
+					wait_type);
+
+	/* parse the fpm_commit_buf and fill hmc obj info */
+	if (!ret_code)
+		ret_code = i40iw_sc_parse_fpm_commit_buf(dev->fpm_commit_buf, hmc_info->hmc_obj);
+
+	i40iw_debug_buf(dev, I40IW_DEBUG_HMC, "COMMIT FPM BUFFER",
+			commit_fpm_mem.va, I40IW_COMMIT_FPM_BUF_SIZE);
+
+	return ret_code;
+}
+
+/**
+ * cqp_sds_wqe_fill - fill cqp wqe doe sd
+ * @cqp: struct for cqp hw
+ * @info; sd info for wqe
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static enum i40iw_status_code cqp_sds_wqe_fill(struct i40iw_sc_cqp *cqp,
+					       struct i40iw_update_sds_info *info,
+					       u64 scratch)
+{
+	u64 data;
+	u64 header;
+	u64 *wqe;
+	int mem_entries, wqe_entries;
+	struct i40iw_dma_mem *sdbuf = &cqp->sdbuf;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	I40IW_CQP_INIT_WQE(wqe);
+	wqe_entries = (info->cnt > 3) ? 3 : info->cnt;
+	mem_entries = info->cnt - wqe_entries;
+
+	header = LS_64(I40IW_CQP_OP_UPDATE_PE_SDS, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) |
+		 LS_64(mem_entries, I40IW_CQPSQ_UPESD_ENTRY_COUNT);
+
+	if (mem_entries) {
+		memcpy(sdbuf->va, &info->entry[3], (mem_entries << 4));
+		data = sdbuf->pa;
+	} else {
+		data = 0;
+	}
+	data |= LS_64(info->hmc_fn_id, I40IW_CQPSQ_UPESD_HMCFNID);
+
+	set_64bit_val(wqe, 16, data);
+
+	switch (wqe_entries) {
+	case 3:
+		set_64bit_val(wqe, 48,
+			      (LS_64(info->entry[2].cmd, I40IW_CQPSQ_UPESD_SDCMD) |
+					LS_64(1, I40IW_CQPSQ_UPESD_ENTRY_VALID)));
+
+		set_64bit_val(wqe, 56, info->entry[2].data);
+		/* fallthrough */
+	case 2:
+		set_64bit_val(wqe, 32,
+			      (LS_64(info->entry[1].cmd, I40IW_CQPSQ_UPESD_SDCMD) |
+					LS_64(1, I40IW_CQPSQ_UPESD_ENTRY_VALID)));
+
+		set_64bit_val(wqe, 40, info->entry[1].data);
+		/* fallthrough */
+	case 1:
+		set_64bit_val(wqe, 0,
+			      LS_64(info->entry[0].cmd, I40IW_CQPSQ_UPESD_SDCMD));
+
+		set_64bit_val(wqe, 8, info->entry[0].data);
+		break;
+	default:
+		break;
+	}
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "UPDATE_PE_SDS WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+	return 0;
+}
+
+/**
+ * i40iw_update_pe_sds - cqp wqe for sd
+ * @dev: ptr to i40iw_dev struct
+ * @info: sd info for sd's
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static enum i40iw_status_code i40iw_update_pe_sds(struct i40iw_sc_dev *dev,
+						  struct i40iw_update_sds_info *info,
+						  u64 scratch)
+{
+	struct i40iw_sc_cqp *cqp = dev->cqp;
+	enum i40iw_status_code ret_code;
+
+	ret_code = cqp_sds_wqe_fill(cqp, info, scratch);
+	if (!ret_code)
+		i40iw_sc_cqp_post_sq(cqp);
+
+	return ret_code;
+}
+
+/**
+ * i40iw_update_sds_noccq - update sd before ccq created
+ * @dev: sc device struct
+ * @info: sd info for sd's
+ */
+enum i40iw_status_code i40iw_update_sds_noccq(struct i40iw_sc_dev *dev,
+					      struct i40iw_update_sds_info *info)
+{
+	u32 error, val, tail;
+	struct i40iw_sc_cqp *cqp = dev->cqp;
+	enum i40iw_status_code ret_code;
+
+	ret_code = cqp_sds_wqe_fill(cqp, info, 0);
+	if (ret_code)
+		return ret_code;
+	i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
+	if (error)
+		return I40IW_ERR_CQP_COMPL_ERROR;
+
+	i40iw_sc_cqp_post_sq(cqp);
+	ret_code = i40iw_cqp_poll_registers(cqp, tail, I40IW_DONE_COUNT);
+
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_suspend_qp - suspend qp for param change
+ * @cqp: struct for cqp hw
+ * @qp: sc qp struct
+ * @scratch: u64 saved to be used during cqp completion
+ */
+enum i40iw_status_code i40iw_sc_suspend_qp(struct i40iw_sc_cqp *cqp,
+					   struct i40iw_sc_qp *qp,
+					   u64 scratch)
+{
+	u64 header;
+	u64 *wqe;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	header = LS_64(qp->qp_uk.qp_id, I40IW_CQPSQ_SUSPENDQP_QPID) |
+		 LS_64(I40IW_CQP_OP_SUSPEND_QP, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "SUSPEND_QP WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_resume_qp - resume qp after suspend
+ * @cqp: struct for cqp hw
+ * @qp: sc qp struct
+ * @scratch: u64 saved to be used during cqp completion
+ */
+enum i40iw_status_code i40iw_sc_resume_qp(struct i40iw_sc_cqp *cqp,
+					  struct i40iw_sc_qp *qp,
+					  u64 scratch)
+{
+	u64 header;
+	u64 *wqe;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe,
+		      16,
+			LS_64(qp->qs_handle, I40IW_CQPSQ_RESUMEQP_QSHANDLE));
+
+	header = LS_64(qp->qp_uk.qp_id, I40IW_CQPSQ_RESUMEQP_QPID) |
+		 LS_64(I40IW_CQP_OP_RESUME_QP, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "RESUME_QP WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_static_hmc_pages_allocated - cqp wqe to allocate hmc pages
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @hmc_fn_id: hmc function id
+ * @post_sq: flag for cqp db to ring
+ * @poll_registers: flag to poll register for cqp completion
+ */
+enum i40iw_status_code i40iw_sc_static_hmc_pages_allocated(
+					struct i40iw_sc_cqp *cqp,
+					u64 scratch,
+					u8 hmc_fn_id,
+					bool post_sq,
+					bool poll_registers)
+{
+	u64 header;
+	u64 *wqe;
+	u32 tail, val, error;
+	enum i40iw_status_code ret_code = 0;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe,
+		      16,
+		      LS_64(hmc_fn_id, I40IW_SHMC_PAGE_ALLOCATED_HMC_FN_ID));
+
+	header = LS_64(I40IW_CQP_OP_SHMC_PAGES_ALLOCATED, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "SHMC_PAGES_ALLOCATED WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+	i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
+	if (error) {
+		ret_code = I40IW_ERR_CQP_COMPL_ERROR;
+		return ret_code;
+	}
+	if (post_sq) {
+		i40iw_sc_cqp_post_sq(cqp);
+		if (poll_registers)
+			/* check for cqp sq tail update */
+			ret_code = i40iw_cqp_poll_registers(cqp, tail, 1000);
+		else
+			ret_code = i40iw_sc_poll_for_cqp_op_done(cqp,
+								 I40IW_CQP_OP_SHMC_PAGES_ALLOCATED,
+								 NULL);
+	}
+
+	return ret_code;
+}
+
+/**
+ * i40iw_ring_full - check if cqp ring is full
+ * @cqp: struct for cqp hw
+ */
+static bool i40iw_ring_full(struct i40iw_sc_cqp *cqp)
+{
+	return I40IW_RING_FULL_ERR(cqp->sq_ring);
+}
+
+/**
+ * i40iw_config_fpm_values - configure HMC objects
+ * @dev: sc device struct
+ * @qp_count: desired qp count
+ */
+enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_count)
+{
+	struct i40iw_virt_mem virt_mem;
+	u32 i, mem_size;
+	u32 qpwantedoriginal, qpwanted, mrwanted, pblewanted;
+	u32 powerof2;
+	u64 sd_needed, bytes_needed;
+	u32 loop_count = 0;
+
+	struct i40iw_hmc_info *hmc_info;
+	struct i40iw_hmc_fpm_misc *hmc_fpm_misc;
+	enum i40iw_status_code ret_code = 0;
+
+	hmc_info = dev->hmc_info;
+	hmc_fpm_misc = &dev->hmc_fpm_misc;
+
+	ret_code = i40iw_sc_init_iw_hmc(dev, dev->hmc_fn_id);
+	if (ret_code) {
+		i40iw_debug(dev, I40IW_DEBUG_HMC,
+			    "i40iw_sc_init_iw_hmc returned error_code = %d\n",
+			    ret_code);
+		return ret_code;
+	}
+
+	bytes_needed = 0;
+	for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++) {
+		hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt;
+		bytes_needed +=
+		    (hmc_info->hmc_obj[i].max_cnt) * (hmc_info->hmc_obj[i].size);
+		i40iw_debug(dev, I40IW_DEBUG_HMC,
+			    "%s i[%04d] max_cnt[0x%04X] size[0x%04llx]\n",
+			    __func__, i, hmc_info->hmc_obj[i].max_cnt,
+			    hmc_info->hmc_obj[i].size);
+	}
+	sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1; /* round up */
+	i40iw_debug(dev, I40IW_DEBUG_HMC,
+		    "%s: FW initial max sd_count[%08lld] first_sd_index[%04d]\n",
+		    __func__, sd_needed, hmc_info->first_sd_index);
+	i40iw_debug(dev, I40IW_DEBUG_HMC,
+		    "%s: bytes_needed=0x%llx sd count %d where max sd is %d\n",
+		    __func__, bytes_needed, hmc_info->sd_table.sd_cnt,
+		    hmc_fpm_misc->max_sds);
+
+	qpwanted = min(qp_count, hmc_info->hmc_obj[I40IW_HMC_IW_QP].max_cnt);
+	qpwantedoriginal = qpwanted;
+	mrwanted = hmc_info->hmc_obj[I40IW_HMC_IW_MR].max_cnt;
+	pblewanted = hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].max_cnt;
+
+	i40iw_debug(dev, I40IW_DEBUG_HMC,
+		    "req_qp=%d max_sd=%d, max_qp = %d, max_cq=%d, max_mr=%d, max_pble=%d\n",
+		    qp_count, hmc_fpm_misc->max_sds,
+		    hmc_info->hmc_obj[I40IW_HMC_IW_QP].max_cnt,
+		    hmc_info->hmc_obj[I40IW_HMC_IW_CQ].max_cnt,
+		    hmc_info->hmc_obj[I40IW_HMC_IW_MR].max_cnt,
+		    hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].max_cnt);
+
+	do {
+		++loop_count;
+		hmc_info->hmc_obj[I40IW_HMC_IW_QP].cnt = qpwanted;
+		hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt =
+			min(2 * qpwanted, hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt);
+		hmc_info->hmc_obj[I40IW_HMC_IW_SRQ].cnt = 0x00; /* Reserved */
+		hmc_info->hmc_obj[I40IW_HMC_IW_HTE].cnt =
+					qpwanted * hmc_fpm_misc->ht_multiplier;
+		hmc_info->hmc_obj[I40IW_HMC_IW_ARP].cnt =
+			hmc_info->hmc_obj[I40IW_HMC_IW_ARP].max_cnt;
+		hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].cnt = 1;
+		hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt = mrwanted;
+
+		hmc_info->hmc_obj[I40IW_HMC_IW_XF].cnt = I40IW_MAX_WQ_ENTRIES * qpwanted;
+		hmc_info->hmc_obj[I40IW_HMC_IW_Q1].cnt = 4 * I40IW_MAX_IRD_SIZE * qpwanted;
+		hmc_info->hmc_obj[I40IW_HMC_IW_XFFL].cnt =
+			hmc_info->hmc_obj[I40IW_HMC_IW_XF].cnt / hmc_fpm_misc->xf_block_size;
+		hmc_info->hmc_obj[I40IW_HMC_IW_Q1FL].cnt =
+			hmc_info->hmc_obj[I40IW_HMC_IW_Q1].cnt / hmc_fpm_misc->q1_block_size;
+		hmc_info->hmc_obj[I40IW_HMC_IW_TIMER].cnt =
+			((qpwanted) / 512 + 1) * hmc_fpm_misc->timer_bucket;
+		hmc_info->hmc_obj[I40IW_HMC_IW_FSIMC].cnt = 0x00;
+		hmc_info->hmc_obj[I40IW_HMC_IW_FSIAV].cnt = 0x00;
+		hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt = pblewanted;
+
+		/* How much memory is needed for all the objects. */
+		bytes_needed = 0;
+		for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++)
+			bytes_needed +=
+			    (hmc_info->hmc_obj[i].cnt) * (hmc_info->hmc_obj[i].size);
+		sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1;
+		if ((loop_count > 1000) ||
+		    ((!(loop_count % 10)) &&
+		    (qpwanted > qpwantedoriginal * 2 / 3))) {
+			if (qpwanted > FPM_MULTIPLIER) {
+				qpwanted -= FPM_MULTIPLIER;
+				powerof2 = 1;
+				while (powerof2 < qpwanted)
+					powerof2 *= 2;
+				powerof2 /= 2;
+				qpwanted = powerof2;
+			} else {
+				qpwanted /= 2;
+			}
+		}
+		if (mrwanted > FPM_MULTIPLIER * 10)
+			mrwanted -= FPM_MULTIPLIER * 10;
+		if (pblewanted > FPM_MULTIPLIER * 1000)
+			pblewanted -= FPM_MULTIPLIER * 1000;
+	} while (sd_needed > hmc_fpm_misc->max_sds && loop_count < 2000);
+
+	bytes_needed = 0;
+	for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++) {
+		bytes_needed += (hmc_info->hmc_obj[i].cnt) * (hmc_info->hmc_obj[i].size);
+		i40iw_debug(dev, I40IW_DEBUG_HMC,
+			    "%s i[%04d] cnt[0x%04x] size[0x%04llx]\n",
+			    __func__, i, hmc_info->hmc_obj[i].cnt,
+			    hmc_info->hmc_obj[i].size);
+	}
+	sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1;    /* round up not truncate. */
+
+	i40iw_debug(dev, I40IW_DEBUG_HMC,
+		    "loop_cnt=%d, sd_needed=%lld, qpcnt = %d, cqcnt=%d, mrcnt=%d, pblecnt=%d\n",
+		    loop_count, sd_needed,
+		    hmc_info->hmc_obj[I40IW_HMC_IW_QP].cnt,
+		    hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt,
+		    hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt,
+		    hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt);
+
+	ret_code = i40iw_sc_configure_iw_fpm(dev, dev->hmc_fn_id);
+	if (ret_code) {
+		i40iw_debug(dev, I40IW_DEBUG_HMC,
+			    "configure_iw_fpm returned error_code[x%08X]\n",
+			    i40iw_rd32(dev->hw, dev->is_pf ? I40E_PFPE_CQPERRCODES : I40E_VFPE_CQPERRCODES1));
+		return ret_code;
+	}
+
+	hmc_info->sd_table.sd_cnt = (u32)sd_needed;
+
+	mem_size = sizeof(struct i40iw_hmc_sd_entry) *
+		   (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index + 1);
+	ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size);
+	if (ret_code) {
+		i40iw_debug(dev, I40IW_DEBUG_HMC,
+			    "%s: failed to allocate memory for sd_entry buffer\n",
+			    __func__);
+		return ret_code;
+	}
+	hmc_info->sd_table.sd_entry = virt_mem.va;
+
+	return ret_code;
+}
+
+/**
+ * i40iw_exec_cqp_cmd - execute cqp cmd when wqe are available
+ * @dev: rdma device
+ * @pcmdinfo: cqp command info
+ */
+static enum i40iw_status_code i40iw_exec_cqp_cmd(struct i40iw_sc_dev *dev,
+						 struct cqp_commands_info *pcmdinfo)
+{
+	enum i40iw_status_code status;
+	struct i40iw_dma_mem values_mem;
+
+	dev->cqp_cmd_stats[pcmdinfo->cqp_cmd]++;
+	switch (pcmdinfo->cqp_cmd) {
+	case OP_DELETE_LOCAL_MAC_IPADDR_ENTRY:
+		status = i40iw_sc_del_local_mac_ipaddr_entry(
+				pcmdinfo->in.u.del_local_mac_ipaddr_entry.cqp,
+				pcmdinfo->in.u.del_local_mac_ipaddr_entry.scratch,
+				pcmdinfo->in.u.del_local_mac_ipaddr_entry.entry_idx,
+				pcmdinfo->in.u.del_local_mac_ipaddr_entry.ignore_ref_count,
+				pcmdinfo->post_sq);
+		break;
+	case OP_CEQ_DESTROY:
+		status = i40iw_sc_ceq_destroy(pcmdinfo->in.u.ceq_destroy.ceq,
+					      pcmdinfo->in.u.ceq_destroy.scratch,
+					      pcmdinfo->post_sq);
+		break;
+	case OP_AEQ_DESTROY:
+		status = i40iw_sc_aeq_destroy(pcmdinfo->in.u.aeq_destroy.aeq,
+					      pcmdinfo->in.u.aeq_destroy.scratch,
+					      pcmdinfo->post_sq);
+
+		break;
+	case OP_DELETE_ARP_CACHE_ENTRY:
+		status = i40iw_sc_del_arp_cache_entry(
+				pcmdinfo->in.u.del_arp_cache_entry.cqp,
+				pcmdinfo->in.u.del_arp_cache_entry.scratch,
+				pcmdinfo->in.u.del_arp_cache_entry.arp_index,
+				pcmdinfo->post_sq);
+		break;
+	case OP_MANAGE_APBVT_ENTRY:
+		status = i40iw_sc_manage_apbvt_entry(
+				pcmdinfo->in.u.manage_apbvt_entry.cqp,
+				&pcmdinfo->in.u.manage_apbvt_entry.info,
+				pcmdinfo->in.u.manage_apbvt_entry.scratch,
+				pcmdinfo->post_sq);
+		break;
+	case OP_CEQ_CREATE:
+		status = i40iw_sc_ceq_create(pcmdinfo->in.u.ceq_create.ceq,
+					     pcmdinfo->in.u.ceq_create.scratch,
+					     pcmdinfo->post_sq);
+		break;
+	case OP_AEQ_CREATE:
+		status = i40iw_sc_aeq_create(pcmdinfo->in.u.aeq_create.aeq,
+					     pcmdinfo->in.u.aeq_create.scratch,
+					     pcmdinfo->post_sq);
+		break;
+	case OP_ALLOC_LOCAL_MAC_IPADDR_ENTRY:
+		status = i40iw_sc_alloc_local_mac_ipaddr_entry(
+				pcmdinfo->in.u.alloc_local_mac_ipaddr_entry.cqp,
+				pcmdinfo->in.u.alloc_local_mac_ipaddr_entry.scratch,
+				pcmdinfo->post_sq);
+		break;
+	case OP_ADD_LOCAL_MAC_IPADDR_ENTRY:
+		status = i40iw_sc_add_local_mac_ipaddr_entry(
+				pcmdinfo->in.u.add_local_mac_ipaddr_entry.cqp,
+				&pcmdinfo->in.u.add_local_mac_ipaddr_entry.info,
+				pcmdinfo->in.u.add_local_mac_ipaddr_entry.scratch,
+				pcmdinfo->post_sq);
+		break;
+	case OP_MANAGE_QHASH_TABLE_ENTRY:
+		status = i40iw_sc_manage_qhash_table_entry(
+				pcmdinfo->in.u.manage_qhash_table_entry.cqp,
+				&pcmdinfo->in.u.manage_qhash_table_entry.info,
+				pcmdinfo->in.u.manage_qhash_table_entry.scratch,
+				pcmdinfo->post_sq);
+
+		break;
+	case OP_QP_MODIFY:
+		status = i40iw_sc_qp_modify(
+				pcmdinfo->in.u.qp_modify.qp,
+				&pcmdinfo->in.u.qp_modify.info,
+				pcmdinfo->in.u.qp_modify.scratch,
+				pcmdinfo->post_sq);
+
+		break;
+	case OP_QP_UPLOAD_CONTEXT:
+		status = i40iw_sc_qp_upload_context(
+				pcmdinfo->in.u.qp_upload_context.dev,
+				&pcmdinfo->in.u.qp_upload_context.info,
+				pcmdinfo->in.u.qp_upload_context.scratch,
+				pcmdinfo->post_sq);
+
+		break;
+	case OP_CQ_CREATE:
+		status = i40iw_sc_cq_create(
+				pcmdinfo->in.u.cq_create.cq,
+				pcmdinfo->in.u.cq_create.scratch,
+				pcmdinfo->in.u.cq_create.check_overflow,
+				pcmdinfo->post_sq);
+		break;
+	case OP_CQ_DESTROY:
+		status = i40iw_sc_cq_destroy(
+				pcmdinfo->in.u.cq_destroy.cq,
+				pcmdinfo->in.u.cq_destroy.scratch,
+				pcmdinfo->post_sq);
+
+		break;
+	case OP_QP_CREATE:
+		status = i40iw_sc_qp_create(
+				pcmdinfo->in.u.qp_create.qp,
+				&pcmdinfo->in.u.qp_create.info,
+				pcmdinfo->in.u.qp_create.scratch,
+				pcmdinfo->post_sq);
+		break;
+	case OP_QP_DESTROY:
+		status = i40iw_sc_qp_destroy(
+				pcmdinfo->in.u.qp_destroy.qp,
+				pcmdinfo->in.u.qp_destroy.scratch,
+				pcmdinfo->in.u.qp_destroy.remove_hash_idx,
+				pcmdinfo->in.u.qp_destroy.
+				ignore_mw_bnd,
+				pcmdinfo->post_sq);
+
+		break;
+	case OP_ALLOC_STAG:
+		status = i40iw_sc_alloc_stag(
+				pcmdinfo->in.u.alloc_stag.dev,
+				&pcmdinfo->in.u.alloc_stag.info,
+				pcmdinfo->in.u.alloc_stag.scratch,
+				pcmdinfo->post_sq);
+		break;
+	case OP_MR_REG_NON_SHARED:
+		status = i40iw_sc_mr_reg_non_shared(
+				pcmdinfo->in.u.mr_reg_non_shared.dev,
+				&pcmdinfo->in.u.mr_reg_non_shared.info,
+				pcmdinfo->in.u.mr_reg_non_shared.scratch,
+				pcmdinfo->post_sq);
+
+		break;
+	case OP_DEALLOC_STAG:
+		status = i40iw_sc_dealloc_stag(
+				pcmdinfo->in.u.dealloc_stag.dev,
+				&pcmdinfo->in.u.dealloc_stag.info,
+				pcmdinfo->in.u.dealloc_stag.scratch,
+				pcmdinfo->post_sq);
+
+		break;
+	case OP_MW_ALLOC:
+		status = i40iw_sc_mw_alloc(
+				pcmdinfo->in.u.mw_alloc.dev,
+				pcmdinfo->in.u.mw_alloc.scratch,
+				pcmdinfo->in.u.mw_alloc.mw_stag_index,
+				pcmdinfo->in.u.mw_alloc.pd_id,
+				pcmdinfo->post_sq);
+
+		break;
+	case OP_QP_FLUSH_WQES:
+		status = i40iw_sc_qp_flush_wqes(
+				pcmdinfo->in.u.qp_flush_wqes.qp,
+				&pcmdinfo->in.u.qp_flush_wqes.info,
+				pcmdinfo->in.u.qp_flush_wqes.
+				scratch, pcmdinfo->post_sq);
+		break;
+	case OP_ADD_ARP_CACHE_ENTRY:
+		status = i40iw_sc_add_arp_cache_entry(
+				pcmdinfo->in.u.add_arp_cache_entry.cqp,
+				&pcmdinfo->in.u.add_arp_cache_entry.info,
+				pcmdinfo->in.u.add_arp_cache_entry.scratch,
+				pcmdinfo->post_sq);
+		break;
+	case OP_MANAGE_PUSH_PAGE:
+		status = i40iw_sc_manage_push_page(
+				pcmdinfo->in.u.manage_push_page.cqp,
+				&pcmdinfo->in.u.manage_push_page.info,
+				pcmdinfo->in.u.manage_push_page.scratch,
+				pcmdinfo->post_sq);
+		break;
+	case OP_UPDATE_PE_SDS:
+		/* case I40IW_CQP_OP_UPDATE_PE_SDS */
+		status = i40iw_update_pe_sds(
+				pcmdinfo->in.u.update_pe_sds.dev,
+				&pcmdinfo->in.u.update_pe_sds.info,
+				pcmdinfo->in.u.update_pe_sds.
+				scratch);
+
+		break;
+	case OP_MANAGE_HMC_PM_FUNC_TABLE:
+		status = i40iw_sc_manage_hmc_pm_func_table(
+				pcmdinfo->in.u.manage_hmc_pm.dev->cqp,
+				pcmdinfo->in.u.manage_hmc_pm.scratch,
+				(u8)pcmdinfo->in.u.manage_hmc_pm.info.vf_id,
+				pcmdinfo->in.u.manage_hmc_pm.info.free_fcn,
+				true);
+		break;
+	case OP_SUSPEND:
+		status = i40iw_sc_suspend_qp(
+				pcmdinfo->in.u.suspend_resume.cqp,
+				pcmdinfo->in.u.suspend_resume.qp,
+				pcmdinfo->in.u.suspend_resume.scratch);
+		break;
+	case OP_RESUME:
+		status = i40iw_sc_resume_qp(
+				pcmdinfo->in.u.suspend_resume.cqp,
+				pcmdinfo->in.u.suspend_resume.qp,
+				pcmdinfo->in.u.suspend_resume.scratch);
+		break;
+	case OP_MANAGE_VF_PBLE_BP:
+		status = i40iw_manage_vf_pble_bp(
+				pcmdinfo->in.u.manage_vf_pble_bp.cqp,
+				&pcmdinfo->in.u.manage_vf_pble_bp.info,
+				pcmdinfo->in.u.manage_vf_pble_bp.scratch, true);
+		break;
+	case OP_QUERY_FPM_VALUES:
+		values_mem.pa = pcmdinfo->in.u.query_fpm_values.fpm_values_pa;
+		values_mem.va = pcmdinfo->in.u.query_fpm_values.fpm_values_va;
+		status = i40iw_sc_query_fpm_values(
+				pcmdinfo->in.u.query_fpm_values.cqp,
+				pcmdinfo->in.u.query_fpm_values.scratch,
+				pcmdinfo->in.u.query_fpm_values.hmc_fn_id,
+				&values_mem, true, I40IW_CQP_WAIT_EVENT);
+		break;
+	case OP_COMMIT_FPM_VALUES:
+		values_mem.pa = pcmdinfo->in.u.commit_fpm_values.fpm_values_pa;
+		values_mem.va = pcmdinfo->in.u.commit_fpm_values.fpm_values_va;
+		status = i40iw_sc_commit_fpm_values(
+				pcmdinfo->in.u.commit_fpm_values.cqp,
+				pcmdinfo->in.u.commit_fpm_values.scratch,
+				pcmdinfo->in.u.commit_fpm_values.hmc_fn_id,
+				&values_mem,
+				true,
+				I40IW_CQP_WAIT_EVENT);
+		break;
+	default:
+		status = I40IW_NOT_SUPPORTED;
+		break;
+	}
+
+	return status;
+}
+
+/**
+ * i40iw_process_cqp_cmd - process all cqp commands
+ * @dev: sc device struct
+ * @pcmdinfo: cqp command info
+ */
+enum i40iw_status_code i40iw_process_cqp_cmd(struct i40iw_sc_dev *dev,
+					     struct cqp_commands_info *pcmdinfo)
+{
+	enum i40iw_status_code status = 0;
+	unsigned long	flags;
+
+	spin_lock_irqsave(&dev->cqp_lock, flags);
+	if (list_empty(&dev->cqp_cmd_head) && !i40iw_ring_full(dev->cqp))
+		status = i40iw_exec_cqp_cmd(dev, pcmdinfo);
+	else
+		list_add_tail(&pcmdinfo->cqp_cmd_entry, &dev->cqp_cmd_head);
+	spin_unlock_irqrestore(&dev->cqp_lock, flags);
+	return status;
+}
+
+/**
+ * i40iw_process_bh - called from tasklet for cqp list
+ * @dev: sc device struct
+ */
+enum i40iw_status_code i40iw_process_bh(struct i40iw_sc_dev *dev)
+{
+	enum i40iw_status_code status = 0;
+	struct cqp_commands_info *pcmdinfo;
+	unsigned long	flags;
+
+	spin_lock_irqsave(&dev->cqp_lock, flags);
+	while (!list_empty(&dev->cqp_cmd_head) && !i40iw_ring_full(dev->cqp)) {
+		pcmdinfo = (struct cqp_commands_info *)i40iw_remove_head(&dev->cqp_cmd_head);
+
+		status = i40iw_exec_cqp_cmd(dev, pcmdinfo);
+		if (status)
+			break;
+	}
+	spin_unlock_irqrestore(&dev->cqp_lock, flags);
+	return status;
+}
+
+/**
+ * i40iw_iwarp_opcode - determine if incoming is rdma layer
+ * @info: aeq info for the packet
+ * @pkt: packet for error
+ */
+static u32 i40iw_iwarp_opcode(struct i40iw_aeqe_info *info, u8 *pkt)
+{
+	u16 *mpa;
+	u32 opcode = 0xffffffff;
+
+	if (info->q2_data_written) {
+		mpa = (u16 *)pkt;
+		opcode = ntohs(mpa[1]) & 0xf;
+	}
+	return opcode;
+}
+
+/**
+ * i40iw_locate_mpa - return pointer to mpa in the pkt
+ * @pkt: packet with data
+ */
+static u8 *i40iw_locate_mpa(u8 *pkt)
+{
+	/* skip over ethernet header */
+	pkt += I40IW_MAC_HLEN;
+
+	/* Skip over IP and TCP headers */
+	pkt += 4 * (pkt[0] & 0x0f);
+	pkt += 4 * ((pkt[12] >> 4) & 0x0f);
+	return pkt;
+}
+
+/**
+ * i40iw_setup_termhdr - termhdr for terminate pkt
+ * @qp: sc qp ptr for pkt
+ * @hdr: term hdr
+ * @opcode: flush opcode for termhdr
+ * @layer_etype: error layer + error type
+ * @err: error cod ein the header
+ */
+static void i40iw_setup_termhdr(struct i40iw_sc_qp *qp,
+				struct i40iw_terminate_hdr *hdr,
+				enum i40iw_flush_opcode opcode,
+				u8 layer_etype,
+				u8 err)
+{
+	qp->flush_code = opcode;
+	hdr->layer_etype = layer_etype;
+	hdr->error_code = err;
+}
+
+/**
+ * i40iw_bld_terminate_hdr - build terminate message header
+ * @qp: qp associated with received terminate AE
+ * @info: the struct contiaing AE information
+ */
+static int i40iw_bld_terminate_hdr(struct i40iw_sc_qp *qp,
+				   struct i40iw_aeqe_info *info)
+{
+	u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET;
+	u16 ddp_seg_len;
+	int copy_len = 0;
+	u8 is_tagged = 0;
+	enum i40iw_flush_opcode flush_code = FLUSH_INVALID;
+	u32 opcode;
+	struct i40iw_terminate_hdr *termhdr;
+
+	termhdr = (struct i40iw_terminate_hdr *)qp->q2_buf;
+	memset(termhdr, 0, Q2_BAD_FRAME_OFFSET);
+
+	if (info->q2_data_written) {
+		/* Use data from offending packet to fill in ddp & rdma hdrs */
+		pkt = i40iw_locate_mpa(pkt);
+		ddp_seg_len = ntohs(*(u16 *)pkt);
+		if (ddp_seg_len) {
+			copy_len = 2;
+			termhdr->hdrct = DDP_LEN_FLAG;
+			if (pkt[2] & 0x80) {
+				is_tagged = 1;
+				if (ddp_seg_len >= TERM_DDP_LEN_TAGGED) {
+					copy_len += TERM_DDP_LEN_TAGGED;
+					termhdr->hdrct |= DDP_HDR_FLAG;
+				}
+			} else {
+				if (ddp_seg_len >= TERM_DDP_LEN_UNTAGGED) {
+					copy_len += TERM_DDP_LEN_UNTAGGED;
+					termhdr->hdrct |= DDP_HDR_FLAG;
+				}
+
+				if (ddp_seg_len >= (TERM_DDP_LEN_UNTAGGED + TERM_RDMA_LEN)) {
+					if ((pkt[3] & RDMA_OPCODE_MASK) == RDMA_READ_REQ_OPCODE) {
+						copy_len += TERM_RDMA_LEN;
+						termhdr->hdrct |= RDMA_HDR_FLAG;
+					}
+				}
+			}
+		}
+	}
+
+	opcode = i40iw_iwarp_opcode(info, pkt);
+
+	switch (info->ae_id) {
+	case I40IW_AE_AMP_UNALLOCATED_STAG:
+		qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+		if (opcode == I40IW_OP_TYPE_RDMA_WRITE)
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_PROT_ERR,
+					    (LAYER_DDP << 4) | DDP_TAGGED_BUFFER, DDP_TAGGED_INV_STAG);
+		else
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+					    (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_INV_STAG);
+		break;
+	case I40IW_AE_AMP_BOUNDS_VIOLATION:
+		qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+		if (info->q2_data_written)
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_PROT_ERR,
+					    (LAYER_DDP << 4) | DDP_TAGGED_BUFFER, DDP_TAGGED_BOUNDS);
+		else
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+					    (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_INV_BOUNDS);
+		break;
+	case I40IW_AE_AMP_BAD_PD:
+		switch (opcode) {
+		case I40IW_OP_TYPE_RDMA_WRITE:
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_PROT_ERR,
+					    (LAYER_DDP << 4) | DDP_TAGGED_BUFFER, DDP_TAGGED_UNASSOC_STAG);
+			break;
+		case I40IW_OP_TYPE_SEND_INV:
+		case I40IW_OP_TYPE_SEND_SOL_INV:
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+					    (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_CANT_INV_STAG);
+			break;
+		default:
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+					    (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_UNASSOC_STAG);
+		}
+		break;
+	case I40IW_AE_AMP_INVALID_STAG:
+		qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+				    (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_INV_STAG);
+		break;
+	case I40IW_AE_AMP_BAD_QP:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_QP_OP_ERR,
+				    (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_QN);
+		break;
+	case I40IW_AE_AMP_BAD_STAG_KEY:
+	case I40IW_AE_AMP_BAD_STAG_INDEX:
+		qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+		switch (opcode) {
+		case I40IW_OP_TYPE_SEND_INV:
+		case I40IW_OP_TYPE_SEND_SOL_INV:
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_OP_ERR,
+					    (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_CANT_INV_STAG);
+			break;
+		default:
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+					    (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_INV_STAG);
+		}
+		break;
+	case I40IW_AE_AMP_RIGHTS_VIOLATION:
+	case I40IW_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
+	case I40IW_AE_PRIV_OPERATION_DENIED:
+		qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+				    (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_ACCESS);
+		break;
+	case I40IW_AE_AMP_TO_WRAP:
+		qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+				    (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_TO_WRAP);
+		break;
+	case I40IW_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_LEN_ERR,
+				    (LAYER_MPA << 4) | DDP_LLP, MPA_MARKER);
+		break;
+	case I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+				    (LAYER_MPA << 4) | DDP_LLP, MPA_CRC);
+		break;
+	case I40IW_AE_LLP_SEGMENT_TOO_LARGE:
+	case I40IW_AE_LLP_SEGMENT_TOO_SMALL:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_LEN_ERR,
+				    (LAYER_DDP << 4) | DDP_CATASTROPHIC, DDP_CATASTROPHIC_LOCAL);
+		break;
+	case I40IW_AE_LCE_QP_CATASTROPHIC:
+	case I40IW_AE_DDP_NO_L_BIT:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_FATAL_ERR,
+				    (LAYER_DDP << 4) | DDP_CATASTROPHIC, DDP_CATASTROPHIC_LOCAL);
+		break;
+	case I40IW_AE_DDP_INVALID_MSN_GAP_IN_MSN:
+	case I40IW_AE_DDP_INVALID_MSN_RANGE_IS_NOT_VALID:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+				    (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_MSN_RANGE);
+		break;
+	case I40IW_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+		qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_LEN_ERR,
+				    (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_TOO_LONG);
+		break;
+	case I40IW_AE_DDP_UBE_INVALID_DDP_VERSION:
+		if (is_tagged)
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+					    (LAYER_DDP << 4) | DDP_TAGGED_BUFFER, DDP_TAGGED_INV_DDP_VER);
+		else
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+					    (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_DDP_VER);
+		break;
+	case I40IW_AE_DDP_UBE_INVALID_MO:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+				    (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_MO);
+		break;
+	case I40IW_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_OP_ERR,
+				    (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_MSN_NO_BUF);
+		break;
+	case I40IW_AE_DDP_UBE_INVALID_QN:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+				    (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_QN);
+		break;
+	case I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+				    (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_INV_RDMAP_VER);
+		break;
+	case I40IW_AE_RDMAP_ROE_UNEXPECTED_OPCODE:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_QP_OP_ERR,
+				    (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_UNEXPECTED_OP);
+		break;
+	default:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_FATAL_ERR,
+				    (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_UNSPECIFIED);
+		break;
+	}
+
+	if (copy_len)
+		memcpy(termhdr + 1, pkt, copy_len);
+
+	if (flush_code && !info->in_rdrsp_wr)
+		qp->sq_flush = (info->sq) ? true : false;
+
+	return sizeof(struct i40iw_terminate_hdr) + copy_len;
+}
+
+/**
+ * i40iw_terminate_send_fin() - Send fin for terminate message
+ * @qp: qp associated with received terminate AE
+ */
+void i40iw_terminate_send_fin(struct i40iw_sc_qp *qp)
+{
+	/* Send the fin only */
+	i40iw_term_modify_qp(qp,
+			     I40IW_QP_STATE_TERMINATE,
+			     I40IWQP_TERM_SEND_FIN_ONLY,
+			     0);
+}
+
+/**
+ * i40iw_terminate_connection() - Bad AE and send terminate to remote QP
+ * @qp: qp associated with received terminate AE
+ * @info: the struct contiaing AE information
+ */
+void i40iw_terminate_connection(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info)
+{
+	u8 termlen = 0;
+
+	if (qp->term_flags & I40IW_TERM_SENT)
+		return;         /* Sanity check */
+
+	/* Eventtype can change from bld_terminate_hdr */
+	qp->eventtype = TERM_EVENT_QP_FATAL;
+	termlen = i40iw_bld_terminate_hdr(qp, info);
+	i40iw_terminate_start_timer(qp);
+	qp->term_flags |= I40IW_TERM_SENT;
+	i40iw_term_modify_qp(qp, I40IW_QP_STATE_TERMINATE,
+			     I40IWQP_TERM_SEND_TERM_ONLY, termlen);
+}
+
+/**
+ * i40iw_terminate_received - handle terminate received AE
+ * @qp: qp associated with received terminate AE
+ * @info: the struct contiaing AE information
+ */
+void i40iw_terminate_received(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info)
+{
+	u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET;
+	u32 *mpa;
+	u8 ddp_ctl;
+	u8 rdma_ctl;
+	u16 aeq_id = 0;
+	struct i40iw_terminate_hdr *termhdr;
+
+	mpa = (u32 *)i40iw_locate_mpa(pkt);
+	if (info->q2_data_written) {
+		/* did not validate the frame - do it now */
+		ddp_ctl = (ntohl(mpa[0]) >> 8) & 0xff;
+		rdma_ctl = ntohl(mpa[0]) & 0xff;
+		if ((ddp_ctl & 0xc0) != 0x40)
+			aeq_id = I40IW_AE_LCE_QP_CATASTROPHIC;
+		else if ((ddp_ctl & 0x03) != 1)
+			aeq_id = I40IW_AE_DDP_UBE_INVALID_DDP_VERSION;
+		else if (ntohl(mpa[2]) != 2)
+			aeq_id = I40IW_AE_DDP_UBE_INVALID_QN;
+		else if (ntohl(mpa[3]) != 1)
+			aeq_id = I40IW_AE_DDP_INVALID_MSN_GAP_IN_MSN;
+		else if (ntohl(mpa[4]) != 0)
+			aeq_id = I40IW_AE_DDP_UBE_INVALID_MO;
+		else if ((rdma_ctl & 0xc0) != 0x40)
+			aeq_id = I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION;
+
+		info->ae_id = aeq_id;
+		if (info->ae_id) {
+			/* Bad terminate recvd - send back a terminate */
+			i40iw_terminate_connection(qp, info);
+			return;
+		}
+	}
+
+	qp->term_flags |= I40IW_TERM_RCVD;
+	qp->eventtype = TERM_EVENT_QP_FATAL;
+	termhdr = (struct i40iw_terminate_hdr *)&mpa[5];
+	if (termhdr->layer_etype == RDMAP_REMOTE_PROT ||
+	    termhdr->layer_etype == RDMAP_REMOTE_OP) {
+		i40iw_terminate_done(qp, 0);
+	} else {
+		i40iw_terminate_start_timer(qp);
+		i40iw_terminate_send_fin(qp);
+	}
+}
+
+/**
+ * i40iw_hw_stat_init - Initiliaze HW stats table
+ * @devstat: pestat struct
+ * @fcn_idx: PCI fn id
+ * @hw: PF i40iw_hw structure.
+ * @is_pf: Is it a PF?
+ *
+ * Populate the HW stat table with register offset addr for each
+ * stat. And start the perioidic stats timer.
+ */
+static void i40iw_hw_stat_init(struct i40iw_dev_pestat *devstat,
+			       u8 fcn_idx,
+			       struct i40iw_hw *hw, bool is_pf)
+{
+	u32 stat_reg_offset;
+	u32 stat_index;
+	struct i40iw_dev_hw_stat_offsets *stat_table =
+		&devstat->hw_stat_offsets;
+	struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats;
+
+	devstat->hw = hw;
+
+	if (is_pf) {
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
+				I40E_GLPES_PFIP4RXDISCARD(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
+				I40E_GLPES_PFIP4RXTRUNC(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
+				I40E_GLPES_PFIP4TXNOROUTE(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
+				I40E_GLPES_PFIP6RXDISCARD(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
+				I40E_GLPES_PFIP6RXTRUNC(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
+				I40E_GLPES_PFIP6TXNOROUTE(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
+				I40E_GLPES_PFTCPRTXSEG(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
+				I40E_GLPES_PFTCPRXOPTERR(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
+				I40E_GLPES_PFTCPRXPROTOERR(fcn_idx);
+
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
+				I40E_GLPES_PFIP4RXOCTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
+				I40E_GLPES_PFIP4RXPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
+				I40E_GLPES_PFIP4RXFRAGSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
+				I40E_GLPES_PFIP4RXMCPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
+				I40E_GLPES_PFIP4TXOCTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
+				I40E_GLPES_PFIP4TXPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
+				I40E_GLPES_PFIP4TXFRAGSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
+				I40E_GLPES_PFIP4TXMCPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
+				I40E_GLPES_PFIP6RXOCTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
+				I40E_GLPES_PFIP6RXPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
+				I40E_GLPES_PFIP6RXFRAGSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
+				I40E_GLPES_PFIP6RXMCPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
+				I40E_GLPES_PFIP6TXOCTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+				I40E_GLPES_PFIP6TXPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+				I40E_GLPES_PFIP6TXPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
+				I40E_GLPES_PFIP6TXFRAGSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
+				I40E_GLPES_PFTCPRXSEGSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
+				I40E_GLPES_PFTCPTXSEGLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
+				I40E_GLPES_PFRDMARXRDSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
+				I40E_GLPES_PFRDMARXSNDSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
+				I40E_GLPES_PFRDMARXWRSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
+				I40E_GLPES_PFRDMATXRDSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
+				I40E_GLPES_PFRDMATXSNDSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
+				I40E_GLPES_PFRDMATXWRSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
+				I40E_GLPES_PFRDMAVBNDLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
+				I40E_GLPES_PFRDMAVINVLO(fcn_idx);
+	} else {
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
+				I40E_GLPES_VFIP4RXDISCARD(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
+				I40E_GLPES_VFIP4RXTRUNC(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
+				I40E_GLPES_VFIP4TXNOROUTE(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
+				I40E_GLPES_VFIP6RXDISCARD(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
+				I40E_GLPES_VFIP6RXTRUNC(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
+				I40E_GLPES_VFIP6TXNOROUTE(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
+				I40E_GLPES_VFTCPRTXSEG(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
+				I40E_GLPES_VFTCPRXOPTERR(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
+				I40E_GLPES_VFTCPRXPROTOERR(fcn_idx);
+
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
+				I40E_GLPES_VFIP4RXOCTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
+				I40E_GLPES_VFIP4RXPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
+				I40E_GLPES_VFIP4RXFRAGSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
+				I40E_GLPES_VFIP4RXMCPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
+				I40E_GLPES_VFIP4TXOCTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
+				I40E_GLPES_VFIP4TXPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
+				I40E_GLPES_VFIP4TXFRAGSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
+				I40E_GLPES_VFIP4TXMCPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
+				I40E_GLPES_VFIP6RXOCTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
+				I40E_GLPES_VFIP6RXPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
+				I40E_GLPES_VFIP6RXFRAGSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
+				I40E_GLPES_VFIP6RXMCPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
+				I40E_GLPES_VFIP6TXOCTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+				I40E_GLPES_VFIP6TXPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+				I40E_GLPES_VFIP6TXPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
+				I40E_GLPES_VFIP6TXFRAGSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
+				I40E_GLPES_VFTCPRXSEGSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
+				I40E_GLPES_VFTCPTXSEGLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
+				I40E_GLPES_VFRDMARXRDSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
+				I40E_GLPES_VFRDMARXSNDSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
+				I40E_GLPES_VFRDMARXWRSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
+				I40E_GLPES_VFRDMATXRDSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
+				I40E_GLPES_VFRDMATXSNDSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
+				I40E_GLPES_VFRDMATXWRSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
+				I40E_GLPES_VFRDMAVBNDLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
+				I40E_GLPES_VFRDMAVINVLO(fcn_idx);
+	}
+
+	for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64;
+	     stat_index++) {
+		stat_reg_offset = stat_table->stat_offset_64[stat_index];
+		last_rd_stats->stat_value_64[stat_index] =
+			readq(devstat->hw->hw_addr + stat_reg_offset);
+	}
+
+	for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32;
+	     stat_index++) {
+		stat_reg_offset = stat_table->stat_offset_32[stat_index];
+		last_rd_stats->stat_value_32[stat_index] =
+			i40iw_rd32(devstat->hw, stat_reg_offset);
+	}
+}
+
+/**
+ * i40iw_hw_stat_read_32 - Read 32-bit HW stat counters and accommodates for roll-overs.
+ * @devstat: pestat struct
+ * @index: index in HW stat table which contains offset reg-addr
+ * @value: hw stat value
+ */
+static void i40iw_hw_stat_read_32(struct i40iw_dev_pestat *devstat,
+				  enum i40iw_hw_stat_index_32b index,
+				  u64 *value)
+{
+	struct i40iw_dev_hw_stat_offsets *stat_table =
+		&devstat->hw_stat_offsets;
+	struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats;
+	struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
+	u64 new_stat_value = 0;
+	u32 stat_reg_offset = stat_table->stat_offset_32[index];
+
+	new_stat_value = i40iw_rd32(devstat->hw, stat_reg_offset);
+	/*roll-over case */
+	if (new_stat_value < last_rd_stats->stat_value_32[index])
+		hw_stats->stat_value_32[index] += new_stat_value;
+	else
+		hw_stats->stat_value_32[index] +=
+			new_stat_value - last_rd_stats->stat_value_32[index];
+	last_rd_stats->stat_value_32[index] = new_stat_value;
+	*value = hw_stats->stat_value_32[index];
+}
+
+/**
+ * i40iw_hw_stat_read_64 - Read HW stat counters (greater than 32-bit) and accommodates for roll-overs.
+ * @devstat: pestat struct
+ * @index: index in HW stat table which contains offset reg-addr
+ * @value: hw stat value
+ */
+static void i40iw_hw_stat_read_64(struct i40iw_dev_pestat *devstat,
+				  enum i40iw_hw_stat_index_64b index,
+				  u64 *value)
+{
+	struct i40iw_dev_hw_stat_offsets *stat_table =
+		&devstat->hw_stat_offsets;
+	struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats;
+	struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
+	u64 new_stat_value = 0;
+	u32 stat_reg_offset = stat_table->stat_offset_64[index];
+
+	new_stat_value = readq(devstat->hw->hw_addr + stat_reg_offset);
+	/*roll-over case */
+	if (new_stat_value < last_rd_stats->stat_value_64[index])
+		hw_stats->stat_value_64[index] += new_stat_value;
+	else
+		hw_stats->stat_value_64[index] +=
+			new_stat_value - last_rd_stats->stat_value_64[index];
+	last_rd_stats->stat_value_64[index] = new_stat_value;
+	*value = hw_stats->stat_value_64[index];
+}
+
+/**
+ * i40iw_hw_stat_read_all - read all HW stat counters
+ * @devstat: pestat struct
+ * @stat_values: hw stats structure
+ *
+ * Read all the HW stat counters and populates hw_stats structure
+ * of passed-in dev's pestat as well as copy created in stat_values.
+ */
+static void i40iw_hw_stat_read_all(struct i40iw_dev_pestat *devstat,
+				   struct i40iw_dev_hw_stats *stat_values)
+{
+	u32 stat_index;
+
+	for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32;
+	     stat_index++)
+		i40iw_hw_stat_read_32(devstat, stat_index,
+				      &stat_values->stat_value_32[stat_index]);
+	for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64;
+	     stat_index++)
+		i40iw_hw_stat_read_64(devstat, stat_index,
+				      &stat_values->stat_value_64[stat_index]);
+}
+
+/**
+ * i40iw_hw_stat_refresh_all - Update all HW stat structs
+ * @devstat: pestat struct
+ * @stat_values: hw stats structure
+ *
+ * Read all the HW stat counters to refresh values in hw_stats structure
+ * of passed-in dev's pestat
+ */
+static void i40iw_hw_stat_refresh_all(struct i40iw_dev_pestat *devstat)
+{
+	u64 stat_value;
+	u32 stat_index;
+
+	for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32;
+	     stat_index++)
+		i40iw_hw_stat_read_32(devstat, stat_index, &stat_value);
+	for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64;
+	     stat_index++)
+		i40iw_hw_stat_read_64(devstat, stat_index, &stat_value);
+}
+
+static struct i40iw_cqp_ops iw_cqp_ops = {
+	i40iw_sc_cqp_init,
+	i40iw_sc_cqp_create,
+	i40iw_sc_cqp_post_sq,
+	i40iw_sc_cqp_get_next_send_wqe,
+	i40iw_sc_cqp_destroy,
+	i40iw_sc_poll_for_cqp_op_done
+};
+
+static struct i40iw_ccq_ops iw_ccq_ops = {
+	i40iw_sc_ccq_init,
+	i40iw_sc_ccq_create,
+	i40iw_sc_ccq_destroy,
+	i40iw_sc_ccq_create_done,
+	i40iw_sc_ccq_get_cqe_info,
+	i40iw_sc_ccq_arm
+};
+
+static struct i40iw_ceq_ops iw_ceq_ops = {
+	i40iw_sc_ceq_init,
+	i40iw_sc_ceq_create,
+	i40iw_sc_cceq_create_done,
+	i40iw_sc_cceq_destroy_done,
+	i40iw_sc_cceq_create,
+	i40iw_sc_ceq_destroy,
+	i40iw_sc_process_ceq
+};
+
+static struct i40iw_aeq_ops iw_aeq_ops = {
+	i40iw_sc_aeq_init,
+	i40iw_sc_aeq_create,
+	i40iw_sc_aeq_destroy,
+	i40iw_sc_get_next_aeqe,
+	i40iw_sc_repost_aeq_entries,
+	i40iw_sc_aeq_create_done,
+	i40iw_sc_aeq_destroy_done
+};
+
+/* iwarp pd ops */
+static struct i40iw_pd_ops iw_pd_ops = {
+	i40iw_sc_pd_init,
+};
+
+static struct i40iw_priv_qp_ops iw_priv_qp_ops = {
+	i40iw_sc_qp_init,
+	i40iw_sc_qp_create,
+	i40iw_sc_qp_modify,
+	i40iw_sc_qp_destroy,
+	i40iw_sc_qp_flush_wqes,
+	i40iw_sc_qp_upload_context,
+	i40iw_sc_qp_setctx,
+	i40iw_sc_send_lsmm,
+	i40iw_sc_send_lsmm_nostag,
+	i40iw_sc_send_rtt,
+	i40iw_sc_post_wqe0,
+};
+
+static struct i40iw_priv_cq_ops iw_priv_cq_ops = {
+	i40iw_sc_cq_init,
+	i40iw_sc_cq_create,
+	i40iw_sc_cq_destroy,
+	i40iw_sc_cq_modify,
+};
+
+static struct i40iw_mr_ops iw_mr_ops = {
+	i40iw_sc_alloc_stag,
+	i40iw_sc_mr_reg_non_shared,
+	i40iw_sc_mr_reg_shared,
+	i40iw_sc_dealloc_stag,
+	i40iw_sc_query_stag,
+	i40iw_sc_mw_alloc
+};
+
+static struct i40iw_cqp_misc_ops iw_cqp_misc_ops = {
+	i40iw_sc_manage_push_page,
+	i40iw_sc_manage_hmc_pm_func_table,
+	i40iw_sc_set_hmc_resource_profile,
+	i40iw_sc_commit_fpm_values,
+	i40iw_sc_query_fpm_values,
+	i40iw_sc_static_hmc_pages_allocated,
+	i40iw_sc_add_arp_cache_entry,
+	i40iw_sc_del_arp_cache_entry,
+	i40iw_sc_query_arp_cache_entry,
+	i40iw_sc_manage_apbvt_entry,
+	i40iw_sc_manage_qhash_table_entry,
+	i40iw_sc_alloc_local_mac_ipaddr_entry,
+	i40iw_sc_add_local_mac_ipaddr_entry,
+	i40iw_sc_del_local_mac_ipaddr_entry,
+	i40iw_sc_cqp_nop,
+	i40iw_sc_commit_fpm_values_done,
+	i40iw_sc_query_fpm_values_done,
+	i40iw_sc_manage_hmc_pm_func_table_done,
+	i40iw_sc_suspend_qp,
+	i40iw_sc_resume_qp
+};
+
+static struct i40iw_hmc_ops iw_hmc_ops = {
+	i40iw_sc_init_iw_hmc,
+	i40iw_sc_parse_fpm_query_buf,
+	i40iw_sc_configure_iw_fpm,
+	i40iw_sc_parse_fpm_commit_buf,
+	i40iw_sc_create_hmc_obj,
+	i40iw_sc_del_hmc_obj,
+	NULL,
+	NULL
+};
+
+static const struct i40iw_device_pestat_ops iw_device_pestat_ops = {
+	i40iw_hw_stat_init,
+	i40iw_hw_stat_read_32,
+	i40iw_hw_stat_read_64,
+	i40iw_hw_stat_read_all,
+	i40iw_hw_stat_refresh_all
+};
+
+/**
+ * i40iw_device_init_pestat - Initialize the pestat structure
+ * @dev: pestat struct
+ */
+enum i40iw_status_code i40iw_device_init_pestat(struct i40iw_dev_pestat *devstat)
+{
+	devstat->ops = iw_device_pestat_ops;
+	return 0;
+}
+
+/**
+ * i40iw_device_init - Initialize IWARP device
+ * @dev: IWARP device pointer
+ * @info: IWARP init info
+ */
+enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev,
+					 struct i40iw_device_init_info *info)
+{
+	u32 val;
+	u32 vchnl_ver = 0;
+	u16 hmc_fcn = 0;
+	enum i40iw_status_code ret_code = 0;
+	u8 db_size;
+
+	spin_lock_init(&dev->cqp_lock);
+	INIT_LIST_HEAD(&dev->cqp_cmd_head);             /* for the cqp commands backlog. */
+
+	i40iw_device_init_uk(&dev->dev_uk);
+
+	dev->debug_mask = info->debug_mask;
+
+	ret_code = i40iw_device_init_pestat(&dev->dev_pestat);
+	if (ret_code) {
+		i40iw_debug(dev, I40IW_DEBUG_DEV,
+			    "%s: i40iw_device_init_pestat failed\n", __func__);
+		return ret_code;
+	}
+	dev->hmc_fn_id = info->hmc_fn_id;
+	dev->qs_handle = info->qs_handle;
+	dev->exception_lan_queue = info->exception_lan_queue;
+	dev->is_pf = info->is_pf;
+
+	dev->fpm_query_buf_pa = info->fpm_query_buf_pa;
+	dev->fpm_query_buf = info->fpm_query_buf;
+
+	dev->fpm_commit_buf_pa = info->fpm_commit_buf_pa;
+	dev->fpm_commit_buf = info->fpm_commit_buf;
+
+	dev->hw = info->hw;
+	dev->hw->hw_addr = info->bar0;
+
+	val = i40iw_rd32(dev->hw, I40E_GLPCI_DREVID);
+	dev->hw_rev = (u8)RS_32(val, I40E_GLPCI_DREVID_DEFAULT_REVID);
+
+	if (dev->is_pf) {
+		dev->dev_pestat.ops.iw_hw_stat_init(&dev->dev_pestat,
+			dev->hmc_fn_id, dev->hw, true);
+		spin_lock_init(&dev->dev_pestat.stats_lock);
+		/*start the periodic stats_timer */
+		i40iw_hw_stats_start_timer(dev);
+		val = i40iw_rd32(dev->hw, I40E_GLPCI_LBARCTRL);
+		db_size = (u8)RS_32(val, I40E_GLPCI_LBARCTRL_PE_DB_SIZE);
+		if ((db_size != I40IW_PE_DB_SIZE_4M) &&
+		    (db_size != I40IW_PE_DB_SIZE_8M)) {
+			i40iw_debug(dev, I40IW_DEBUG_DEV,
+				    "%s: PE doorbell is not enabled in CSR val 0x%x\n",
+				    __func__, val);
+			ret_code = I40IW_ERR_PE_DOORBELL_NOT_ENABLED;
+			return ret_code;
+		}
+		dev->db_addr = dev->hw->hw_addr + I40IW_DB_ADDR_OFFSET;
+		dev->vchnl_if.vchnl_recv = i40iw_vchnl_recv_pf;
+	} else {
+		dev->db_addr = dev->hw->hw_addr + I40IW_VF_DB_ADDR_OFFSET;
+	}
+
+	dev->cqp_ops = &iw_cqp_ops;
+	dev->ccq_ops = &iw_ccq_ops;
+	dev->ceq_ops = &iw_ceq_ops;
+	dev->aeq_ops = &iw_aeq_ops;
+	dev->cqp_misc_ops = &iw_cqp_misc_ops;
+	dev->iw_pd_ops = &iw_pd_ops;
+	dev->iw_priv_qp_ops = &iw_priv_qp_ops;
+	dev->iw_priv_cq_ops = &iw_priv_cq_ops;
+	dev->mr_ops = &iw_mr_ops;
+	dev->hmc_ops = &iw_hmc_ops;
+	dev->vchnl_if.vchnl_send = info->vchnl_send;
+	if (dev->vchnl_if.vchnl_send)
+		dev->vchnl_up = true;
+	else
+		dev->vchnl_up = false;
+	if (!dev->is_pf) {
+		dev->vchnl_if.vchnl_recv = i40iw_vchnl_recv_vf;
+		ret_code = i40iw_vchnl_vf_get_ver(dev, &vchnl_ver);
+		if (!ret_code) {
+			i40iw_debug(dev, I40IW_DEBUG_DEV,
+				    "%s: Get Channel version rc = 0x%0x, version is %u\n",
+				__func__, ret_code, vchnl_ver);
+			ret_code = i40iw_vchnl_vf_get_hmc_fcn(dev, &hmc_fcn);
+			if (!ret_code) {
+				i40iw_debug(dev, I40IW_DEBUG_DEV,
+					    "%s Get HMC function rc = 0x%0x, hmc fcn is %u\n",
+					    __func__, ret_code, hmc_fcn);
+				dev->hmc_fn_id = (u8)hmc_fcn;
+			}
+		}
+	}
+	dev->iw_vf_cqp_ops = &iw_vf_cqp_ops;
+
+	return ret_code;
+}

diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h
new file mode 100644
index 0000000..aab88d6
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_d.h

@@ -0,0 +1,1713 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_D_H
+#define I40IW_D_H
+
+#define I40IW_DB_ADDR_OFFSET    (4 * 1024 * 1024 - 64 * 1024)
+#define I40IW_VF_DB_ADDR_OFFSET (64 * 1024)
+
+#define I40IW_PUSH_OFFSET       (4 * 1024 * 1024)
+#define I40IW_PF_FIRST_PUSH_PAGE_INDEX 16
+#define I40IW_VF_PUSH_OFFSET    ((8 + 64) * 1024)
+#define I40IW_VF_FIRST_PUSH_PAGE_INDEX 2
+
+#define I40IW_PE_DB_SIZE_4M     1
+#define I40IW_PE_DB_SIZE_8M     2
+
+#define I40IW_DDP_VER 1
+#define I40IW_RDMAP_VER 1
+
+#define I40IW_RDMA_MODE_RDMAC 0
+#define I40IW_RDMA_MODE_IETF  1
+
+#define I40IW_QP_STATE_INVALID 0
+#define I40IW_QP_STATE_IDLE 1
+#define I40IW_QP_STATE_RTS 2
+#define I40IW_QP_STATE_CLOSING 3
+#define I40IW_QP_STATE_RESERVED 4
+#define I40IW_QP_STATE_TERMINATE 5
+#define I40IW_QP_STATE_ERROR 6
+
+#define I40IW_STAG_STATE_INVALID 0
+#define I40IW_STAG_STATE_VALID 1
+
+#define I40IW_STAG_TYPE_SHARED 0
+#define I40IW_STAG_TYPE_NONSHARED 1
+
+#define I40IW_MAX_USER_PRIORITY 8
+
+#define LS_64_1(val, bits)      ((u64)(uintptr_t)val << bits)
+#define RS_64_1(val, bits)      ((u64)(uintptr_t)val >> bits)
+#define LS_32_1(val, bits)      (u32)(val << bits)
+#define RS_32_1(val, bits)      (u32)(val >> bits)
+#define I40E_HI_DWORD(x)        ((u32)((((x) >> 16) >> 16) & 0xFFFFFFFF))
+
+#define LS_64(val, field) (((u64)val << field ## _SHIFT) & (field ## _MASK))
+
+#define RS_64(val, field) ((u64)(val & field ## _MASK) >> field ## _SHIFT)
+#define LS_32(val, field) ((val << field ## _SHIFT) & (field ## _MASK))
+#define RS_32(val, field) ((val & field ## _MASK) >> field ## _SHIFT)
+
+#define TERM_DDP_LEN_TAGGED     14
+#define TERM_DDP_LEN_UNTAGGED   18
+#define TERM_RDMA_LEN           28
+#define RDMA_OPCODE_MASK        0x0f
+#define RDMA_READ_REQ_OPCODE    1
+#define Q2_BAD_FRAME_OFFSET     72
+#define CQE_MAJOR_DRV           0x8000
+
+#define I40IW_TERM_SENT 0x01
+#define I40IW_TERM_RCVD 0x02
+#define I40IW_TERM_DONE 0x04
+#define I40IW_MAC_HLEN  14
+
+#define I40IW_INVALID_WQE_INDEX 0xffffffff
+
+#define I40IW_CQP_WAIT_POLL_REGS 1
+#define I40IW_CQP_WAIT_POLL_CQ 2
+#define I40IW_CQP_WAIT_EVENT 3
+
+#define I40IW_CQP_INIT_WQE(wqe) memset(wqe, 0, 64)
+
+#define I40IW_GET_CURRENT_CQ_ELEMENT(_cq) \
+	( \
+		&((_cq)->cq_base[I40IW_RING_GETCURRENT_HEAD((_cq)->cq_ring)])  \
+	)
+#define I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(_cq) \
+	( \
+		&(((struct i40iw_extended_cqe *)        \
+		   ((_cq)->cq_base))[I40IW_RING_GETCURRENT_HEAD((_cq)->cq_ring)]) \
+	)
+
+#define I40IW_GET_CURRENT_AEQ_ELEMENT(_aeq) \
+	( \
+		&_aeq->aeqe_base[I40IW_RING_GETCURRENT_TAIL(_aeq->aeq_ring)]   \
+	)
+
+#define I40IW_GET_CURRENT_CEQ_ELEMENT(_ceq) \
+	( \
+		&_ceq->ceqe_base[I40IW_RING_GETCURRENT_TAIL(_ceq->ceq_ring)]   \
+	)
+
+#define I40IW_AE_SOURCE_RQ              0x1
+#define I40IW_AE_SOURCE_RQ_0011         0x3
+
+#define I40IW_AE_SOURCE_CQ              0x2
+#define I40IW_AE_SOURCE_CQ_0110         0x6
+#define I40IW_AE_SOURCE_CQ_1010         0xA
+#define I40IW_AE_SOURCE_CQ_1110         0xE
+
+#define I40IW_AE_SOURCE_SQ              0x5
+#define I40IW_AE_SOURCE_SQ_0111         0x7
+
+#define I40IW_AE_SOURCE_IN_RR_WR        0x9
+#define I40IW_AE_SOURCE_IN_RR_WR_1011   0xB
+#define I40IW_AE_SOURCE_OUT_RR          0xD
+#define I40IW_AE_SOURCE_OUT_RR_1111     0xF
+
+#define I40IW_TCP_STATE_NON_EXISTENT 0
+#define I40IW_TCP_STATE_CLOSED 1
+#define I40IW_TCP_STATE_LISTEN 2
+#define I40IW_STATE_SYN_SEND 3
+#define I40IW_TCP_STATE_SYN_RECEIVED 4
+#define I40IW_TCP_STATE_ESTABLISHED 5
+#define I40IW_TCP_STATE_CLOSE_WAIT 6
+#define I40IW_TCP_STATE_FIN_WAIT_1 7
+#define I40IW_TCP_STATE_CLOSING  8
+#define I40IW_TCP_STATE_LAST_ACK 9
+#define I40IW_TCP_STATE_FIN_WAIT_2 10
+#define I40IW_TCP_STATE_TIME_WAIT 11
+#define I40IW_TCP_STATE_RESERVED_1 12
+#define I40IW_TCP_STATE_RESERVED_2 13
+#define I40IW_TCP_STATE_RESERVED_3 14
+#define I40IW_TCP_STATE_RESERVED_4 15
+
+/* ILQ CQP hash table fields */
+#define I40IW_CQPSQ_QHASH_VLANID_SHIFT 32
+#define I40IW_CQPSQ_QHASH_VLANID_MASK \
+	((u64)0xfff << I40IW_CQPSQ_QHASH_VLANID_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_QPN_SHIFT 32
+#define I40IW_CQPSQ_QHASH_QPN_MASK \
+	((u64)0x3ffff << I40IW_CQPSQ_QHASH_QPN_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_QS_HANDLE_SHIFT 0
+#define I40IW_CQPSQ_QHASH_QS_HANDLE_MASK ((u64)0x3ff << I40IW_CQPSQ_QHASH_QS_HANDLE_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_SRC_PORT_SHIFT 16
+#define I40IW_CQPSQ_QHASH_SRC_PORT_MASK \
+	((u64)0xffff << I40IW_CQPSQ_QHASH_SRC_PORT_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_DEST_PORT_SHIFT 0
+#define I40IW_CQPSQ_QHASH_DEST_PORT_MASK \
+	((u64)0xffff << I40IW_CQPSQ_QHASH_DEST_PORT_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_ADDR0_SHIFT 32
+#define I40IW_CQPSQ_QHASH_ADDR0_MASK \
+	((u64)0xffffffff << I40IW_CQPSQ_QHASH_ADDR0_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_ADDR1_SHIFT 0
+#define I40IW_CQPSQ_QHASH_ADDR1_MASK \
+	((u64)0xffffffff << I40IW_CQPSQ_QHASH_ADDR1_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_ADDR2_SHIFT 32
+#define I40IW_CQPSQ_QHASH_ADDR2_MASK \
+	((u64)0xffffffff << I40IW_CQPSQ_QHASH_ADDR2_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_ADDR3_SHIFT 0
+#define I40IW_CQPSQ_QHASH_ADDR3_MASK \
+	((u64)0xffffffff << I40IW_CQPSQ_QHASH_ADDR3_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_WQEVALID_SHIFT 63
+#define I40IW_CQPSQ_QHASH_WQEVALID_MASK \
+	((u64)0x1 << I40IW_CQPSQ_QHASH_WQEVALID_SHIFT)
+#define I40IW_CQPSQ_QHASH_OPCODE_SHIFT 32
+#define I40IW_CQPSQ_QHASH_OPCODE_MASK \
+	((u64)0x3f << I40IW_CQPSQ_QHASH_OPCODE_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_MANAGE_SHIFT 61
+#define I40IW_CQPSQ_QHASH_MANAGE_MASK \
+	((u64)0x3 << I40IW_CQPSQ_QHASH_MANAGE_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_IPV4VALID_SHIFT 60
+#define I40IW_CQPSQ_QHASH_IPV4VALID_MASK \
+	((u64)0x1 << I40IW_CQPSQ_QHASH_IPV4VALID_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_VLANVALID_SHIFT 59
+#define I40IW_CQPSQ_QHASH_VLANVALID_MASK \
+	((u64)0x1 << I40IW_CQPSQ_QHASH_VLANVALID_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_ENTRYTYPE_SHIFT 42
+#define I40IW_CQPSQ_QHASH_ENTRYTYPE_MASK \
+	((u64)0x7 << I40IW_CQPSQ_QHASH_ENTRYTYPE_SHIFT)
+/* CQP Host Context */
+#define I40IW_CQPHC_EN_DC_TCP_SHIFT 0
+#define I40IW_CQPHC_EN_DC_TCP_MASK (1UL << I40IW_CQPHC_EN_DC_TCP_SHIFT)
+
+#define I40IW_CQPHC_SQSIZE_SHIFT 8
+#define I40IW_CQPHC_SQSIZE_MASK (0xfUL << I40IW_CQPHC_SQSIZE_SHIFT)
+
+#define I40IW_CQPHC_DISABLE_PFPDUS_SHIFT 1
+#define I40IW_CQPHC_DISABLE_PFPDUS_MASK (0x1UL << I40IW_CQPHC_DISABLE_PFPDUS_SHIFT)
+
+#define I40IW_CQPHC_ENABLED_VFS_SHIFT 32
+#define I40IW_CQPHC_ENABLED_VFS_MASK (0x3fULL << I40IW_CQPHC_ENABLED_VFS_SHIFT)
+
+#define I40IW_CQPHC_HMC_PROFILE_SHIFT 0
+#define I40IW_CQPHC_HMC_PROFILE_MASK (0x7ULL << I40IW_CQPHC_HMC_PROFILE_SHIFT)
+
+#define I40IW_CQPHC_SVER_SHIFT 24
+#define I40IW_CQPHC_SVER_MASK (0xffUL << I40IW_CQPHC_SVER_SHIFT)
+
+#define I40IW_CQPHC_SQBASE_SHIFT 9
+#define I40IW_CQPHC_SQBASE_MASK \
+	(0xfffffffffffffeULL << I40IW_CQPHC_SQBASE_SHIFT)
+
+#define I40IW_CQPHC_QPCTX_SHIFT 0
+#define I40IW_CQPHC_QPCTX_MASK  \
+	(0xffffffffffffffffULL << I40IW_CQPHC_QPCTX_SHIFT)
+#define I40IW_CQPHC_SVER        1
+
+#define I40IW_CQP_SW_SQSIZE_4 4
+#define I40IW_CQP_SW_SQSIZE_2048 2048
+
+/* iWARP QP Doorbell shadow area */
+#define I40IW_QP_DBSA_HW_SQ_TAIL_SHIFT 0
+#define I40IW_QP_DBSA_HW_SQ_TAIL_MASK \
+	(0x3fffUL << I40IW_QP_DBSA_HW_SQ_TAIL_SHIFT)
+
+/* Completion Queue Doorbell shadow area */
+#define I40IW_CQ_DBSA_CQEIDX_SHIFT 0
+#define I40IW_CQ_DBSA_CQEIDX_MASK (0xfffffUL << I40IW_CQ_DBSA_CQEIDX_SHIFT)
+
+#define I40IW_CQ_DBSA_SW_CQ_SELECT_SHIFT 0
+#define I40IW_CQ_DBSA_SW_CQ_SELECT_MASK \
+	(0x3fffUL << I40IW_CQ_DBSA_SW_CQ_SELECT_SHIFT)
+
+#define I40IW_CQ_DBSA_ARM_NEXT_SHIFT 14
+#define I40IW_CQ_DBSA_ARM_NEXT_MASK (1UL << I40IW_CQ_DBSA_ARM_NEXT_SHIFT)
+
+#define I40IW_CQ_DBSA_ARM_NEXT_SE_SHIFT 15
+#define I40IW_CQ_DBSA_ARM_NEXT_SE_MASK (1UL << I40IW_CQ_DBSA_ARM_NEXT_SE_SHIFT)
+
+#define I40IW_CQ_DBSA_ARM_SEQ_NUM_SHIFT 16
+#define I40IW_CQ_DBSA_ARM_SEQ_NUM_MASK \
+	(0x3UL << I40IW_CQ_DBSA_ARM_SEQ_NUM_SHIFT)
+
+/* CQP and iWARP Completion Queue */
+#define I40IW_CQ_QPCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQ_QPCTX_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CCQ_OPRETVAL_SHIFT 0
+#define I40IW_CCQ_OPRETVAL_MASK (0xffffffffUL << I40IW_CCQ_OPRETVAL_SHIFT)
+
+#define I40IW_CQ_MINERR_SHIFT 0
+#define I40IW_CQ_MINERR_MASK (0xffffUL << I40IW_CQ_MINERR_SHIFT)
+
+#define I40IW_CQ_MAJERR_SHIFT 16
+#define I40IW_CQ_MAJERR_MASK (0xffffUL << I40IW_CQ_MAJERR_SHIFT)
+
+#define I40IW_CQ_WQEIDX_SHIFT 32
+#define I40IW_CQ_WQEIDX_MASK (0x3fffULL << I40IW_CQ_WQEIDX_SHIFT)
+
+#define I40IW_CQ_ERROR_SHIFT 55
+#define I40IW_CQ_ERROR_MASK (1ULL << I40IW_CQ_ERROR_SHIFT)
+
+#define I40IW_CQ_SQ_SHIFT 62
+#define I40IW_CQ_SQ_MASK (1ULL << I40IW_CQ_SQ_SHIFT)
+
+#define I40IW_CQ_VALID_SHIFT 63
+#define I40IW_CQ_VALID_MASK (1ULL << I40IW_CQ_VALID_SHIFT)
+
+#define I40IWCQ_PAYLDLEN_SHIFT 0
+#define I40IWCQ_PAYLDLEN_MASK (0xffffffffUL << I40IWCQ_PAYLDLEN_SHIFT)
+
+#define I40IWCQ_TCPSEQNUM_SHIFT 32
+#define I40IWCQ_TCPSEQNUM_MASK (0xffffffffULL << I40IWCQ_TCPSEQNUM_SHIFT)
+
+#define I40IWCQ_INVSTAG_SHIFT 0
+#define I40IWCQ_INVSTAG_MASK (0xffffffffUL << I40IWCQ_INVSTAG_SHIFT)
+
+#define I40IWCQ_QPID_SHIFT 32
+#define I40IWCQ_QPID_MASK (0x3ffffULL << I40IWCQ_QPID_SHIFT)
+
+#define I40IWCQ_PSHDROP_SHIFT 51
+#define I40IWCQ_PSHDROP_MASK (1ULL << I40IWCQ_PSHDROP_SHIFT)
+
+#define I40IWCQ_SRQ_SHIFT 52
+#define I40IWCQ_SRQ_MASK (1ULL << I40IWCQ_SRQ_SHIFT)
+
+#define I40IWCQ_STAG_SHIFT 53
+#define I40IWCQ_STAG_MASK (1ULL << I40IWCQ_STAG_SHIFT)
+
+#define I40IWCQ_SOEVENT_SHIFT 54
+#define I40IWCQ_SOEVENT_MASK (1ULL << I40IWCQ_SOEVENT_SHIFT)
+
+#define I40IWCQ_OP_SHIFT 56
+#define I40IWCQ_OP_MASK (0x3fULL << I40IWCQ_OP_SHIFT)
+
+/* CEQE format */
+#define I40IW_CEQE_CQCTX_SHIFT 0
+#define I40IW_CEQE_CQCTX_MASK   \
+	(0x7fffffffffffffffULL << I40IW_CEQE_CQCTX_SHIFT)
+
+#define I40IW_CEQE_VALID_SHIFT 63
+#define I40IW_CEQE_VALID_MASK (1ULL << I40IW_CEQE_VALID_SHIFT)
+
+/* AEQE format */
+#define I40IW_AEQE_COMPCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_AEQE_COMPCTX_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_AEQE_QPCQID_SHIFT 0
+#define I40IW_AEQE_QPCQID_MASK (0x3ffffUL << I40IW_AEQE_QPCQID_SHIFT)
+
+#define I40IW_AEQE_WQDESCIDX_SHIFT 18
+#define I40IW_AEQE_WQDESCIDX_MASK (0x3fffULL << I40IW_AEQE_WQDESCIDX_SHIFT)
+
+#define I40IW_AEQE_OVERFLOW_SHIFT 33
+#define I40IW_AEQE_OVERFLOW_MASK (1ULL << I40IW_AEQE_OVERFLOW_SHIFT)
+
+#define I40IW_AEQE_AECODE_SHIFT 34
+#define I40IW_AEQE_AECODE_MASK (0xffffULL << I40IW_AEQE_AECODE_SHIFT)
+
+#define I40IW_AEQE_AESRC_SHIFT 50
+#define I40IW_AEQE_AESRC_MASK (0xfULL << I40IW_AEQE_AESRC_SHIFT)
+
+#define I40IW_AEQE_IWSTATE_SHIFT 54
+#define I40IW_AEQE_IWSTATE_MASK (0x7ULL << I40IW_AEQE_IWSTATE_SHIFT)
+
+#define I40IW_AEQE_TCPSTATE_SHIFT 57
+#define I40IW_AEQE_TCPSTATE_MASK (0xfULL << I40IW_AEQE_TCPSTATE_SHIFT)
+
+#define I40IW_AEQE_Q2DATA_SHIFT 61
+#define I40IW_AEQE_Q2DATA_MASK (0x3ULL << I40IW_AEQE_Q2DATA_SHIFT)
+
+#define I40IW_AEQE_VALID_SHIFT 63
+#define I40IW_AEQE_VALID_MASK (1ULL << I40IW_AEQE_VALID_SHIFT)
+
+/* CQP SQ WQES */
+#define I40IW_QP_TYPE_IWARP     1
+#define I40IW_QP_TYPE_UDA       2
+#define I40IW_QP_TYPE_CQP       4
+
+#define I40IW_CQ_TYPE_IWARP     1
+#define I40IW_CQ_TYPE_ILQ       2
+#define I40IW_CQ_TYPE_IEQ       3
+#define I40IW_CQ_TYPE_CQP       4
+
+#define I40IWQP_TERM_SEND_TERM_AND_FIN          0
+#define I40IWQP_TERM_SEND_TERM_ONLY             1
+#define I40IWQP_TERM_SEND_FIN_ONLY              2
+#define I40IWQP_TERM_DONOT_SEND_TERM_OR_FIN     3
+
+#define I40IW_CQP_OP_CREATE_QP                  0
+#define I40IW_CQP_OP_MODIFY_QP                  0x1
+#define I40IW_CQP_OP_DESTROY_QP                 0x02
+#define I40IW_CQP_OP_CREATE_CQ                  0x03
+#define I40IW_CQP_OP_MODIFY_CQ                  0x04
+#define I40IW_CQP_OP_DESTROY_CQ                 0x05
+#define I40IW_CQP_OP_CREATE_SRQ                 0x06
+#define I40IW_CQP_OP_MODIFY_SRQ                 0x07
+#define I40IW_CQP_OP_DESTROY_SRQ                0x08
+#define I40IW_CQP_OP_ALLOC_STAG                 0x09
+#define I40IW_CQP_OP_REG_MR                     0x0a
+#define I40IW_CQP_OP_QUERY_STAG                 0x0b
+#define I40IW_CQP_OP_REG_SMR                    0x0c
+#define I40IW_CQP_OP_DEALLOC_STAG               0x0d
+#define I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE    0x0e
+#define I40IW_CQP_OP_MANAGE_ARP                 0x0f
+#define I40IW_CQP_OP_MANAGE_VF_PBLE_BP          0x10
+#define I40IW_CQP_OP_MANAGE_PUSH_PAGES          0x11
+#define I40IW_CQP_OP_MANAGE_PE_TEAM             0x12
+#define I40IW_CQP_OP_UPLOAD_CONTEXT             0x13
+#define I40IW_CQP_OP_ALLOCATE_LOC_MAC_IP_TABLE_ENTRY 0x14
+#define I40IW_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE   0x15
+#define I40IW_CQP_OP_CREATE_CEQ                 0x16
+#define I40IW_CQP_OP_DESTROY_CEQ                0x18
+#define I40IW_CQP_OP_CREATE_AEQ                 0x19
+#define I40IW_CQP_OP_DESTROY_AEQ                0x1b
+#define I40IW_CQP_OP_CREATE_ADDR_VECT           0x1c
+#define I40IW_CQP_OP_MODIFY_ADDR_VECT           0x1d
+#define I40IW_CQP_OP_DESTROY_ADDR_VECT          0x1e
+#define I40IW_CQP_OP_UPDATE_PE_SDS              0x1f
+#define I40IW_CQP_OP_QUERY_FPM_VALUES           0x20
+#define I40IW_CQP_OP_COMMIT_FPM_VALUES          0x21
+#define I40IW_CQP_OP_FLUSH_WQES                 0x22
+#define I40IW_CQP_OP_MANAGE_APBVT               0x23
+#define I40IW_CQP_OP_NOP                        0x24
+#define I40IW_CQP_OP_MANAGE_QUAD_HASH_TABLE_ENTRY 0x25
+#define I40IW_CQP_OP_CREATE_UDA_MCAST_GROUP     0x26
+#define I40IW_CQP_OP_MODIFY_UDA_MCAST_GROUP     0x27
+#define I40IW_CQP_OP_DESTROY_UDA_MCAST_GROUP    0x28
+#define I40IW_CQP_OP_SUSPEND_QP                 0x29
+#define I40IW_CQP_OP_RESUME_QP                  0x2a
+#define I40IW_CQP_OP_SHMC_PAGES_ALLOCATED       0x2b
+#define I40IW_CQP_OP_SET_HMC_RESOURCE_PROFILE   0x2d
+
+#define I40IW_UDA_QPSQ_NEXT_HEADER_SHIFT 16
+#define I40IW_UDA_QPSQ_NEXT_HEADER_MASK ((u64)0xff << I40IW_UDA_QPSQ_NEXT_HEADER_SHIFT)
+
+#define I40IW_UDA_QPSQ_OPCODE_SHIFT 32
+#define I40IW_UDA_QPSQ_OPCODE_MASK ((u64)0x3f << I40IW_UDA_QPSQ_OPCODE_SHIFT)
+
+#define I40IW_UDA_QPSQ_MACLEN_SHIFT 56
+#define I40IW_UDA_QPSQ_MACLEN_MASK \
+	((u64)0x7f << I40IW_UDA_QPSQ_MACLEN_SHIFT)
+
+#define I40IW_UDA_QPSQ_IPLEN_SHIFT 48
+#define I40IW_UDA_QPSQ_IPLEN_MASK \
+	((u64)0x7f << I40IW_UDA_QPSQ_IPLEN_SHIFT)
+
+#define I40IW_UDA_QPSQ_L4T_SHIFT 30
+#define I40IW_UDA_QPSQ_L4T_MASK \
+	((u64)0x3 << I40IW_UDA_QPSQ_L4T_SHIFT)
+
+#define I40IW_UDA_QPSQ_IIPT_SHIFT 28
+#define I40IW_UDA_QPSQ_IIPT_MASK \
+	((u64)0x3 << I40IW_UDA_QPSQ_IIPT_SHIFT)
+
+#define I40IW_UDA_QPSQ_L4LEN_SHIFT 24
+#define I40IW_UDA_QPSQ_L4LEN_MASK ((u64)0xf << I40IW_UDA_QPSQ_L4LEN_SHIFT)
+
+#define I40IW_UDA_QPSQ_AVIDX_SHIFT 0
+#define I40IW_UDA_QPSQ_AVIDX_MASK ((u64)0xffff << I40IW_UDA_QPSQ_AVIDX_SHIFT)
+
+#define I40IW_UDA_QPSQ_VALID_SHIFT 63
+#define I40IW_UDA_QPSQ_VALID_MASK \
+	((u64)0x1 << I40IW_UDA_QPSQ_VALID_SHIFT)
+
+#define I40IW_UDA_QPSQ_SIGCOMPL_SHIFT 62
+#define I40IW_UDA_QPSQ_SIGCOMPL_MASK ((u64)0x1 << I40IW_UDA_QPSQ_SIGCOMPL_SHIFT)
+
+#define I40IW_UDA_PAYLOADLEN_SHIFT 0
+#define I40IW_UDA_PAYLOADLEN_MASK ((u64)0x3fff << I40IW_UDA_PAYLOADLEN_SHIFT)
+
+#define I40IW_UDA_HDRLEN_SHIFT 16
+#define I40IW_UDA_HDRLEN_MASK ((u64)0x1ff << I40IW_UDA_HDRLEN_SHIFT)
+
+#define I40IW_VLAN_TAG_VALID_SHIFT 50
+#define I40IW_VLAN_TAG_VALID_MASK ((u64)0x1 << I40IW_VLAN_TAG_VALID_SHIFT)
+
+#define I40IW_UDA_L3PROTO_SHIFT 0
+#define I40IW_UDA_L3PROTO_MASK ((u64)0x3 << I40IW_UDA_L3PROTO_SHIFT)
+
+#define I40IW_UDA_L4PROTO_SHIFT 16
+#define I40IW_UDA_L4PROTO_MASK ((u64)0x3 << I40IW_UDA_L4PROTO_SHIFT)
+
+#define I40IW_UDA_QPSQ_DOLOOPBACK_SHIFT 44
+#define I40IW_UDA_QPSQ_DOLOOPBACK_MASK \
+	((u64)0x1 << I40IW_UDA_QPSQ_DOLOOPBACK_SHIFT)
+
+/* CQP SQ WQE common fields */
+#define I40IW_CQPSQ_OPCODE_SHIFT 32
+#define I40IW_CQPSQ_OPCODE_MASK (0x3fULL << I40IW_CQPSQ_OPCODE_SHIFT)
+
+#define I40IW_CQPSQ_WQEVALID_SHIFT 63
+#define I40IW_CQPSQ_WQEVALID_MASK (1ULL << I40IW_CQPSQ_WQEVALID_SHIFT)
+
+#define I40IW_CQPSQ_TPHVAL_SHIFT 0
+#define I40IW_CQPSQ_TPHVAL_MASK (0xffUL << I40IW_CQPSQ_TPHVAL_SHIFT)
+
+#define I40IW_CQPSQ_TPHEN_SHIFT 60
+#define I40IW_CQPSQ_TPHEN_MASK (1ULL << I40IW_CQPSQ_TPHEN_SHIFT)
+
+#define I40IW_CQPSQ_PBUFADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_PBUFADDR_MASK I40IW_CQPHC_QPCTX_MASK
+
+/* Create/Modify/Destroy QP */
+
+#define I40IW_CQPSQ_QP_NEWMSS_SHIFT 32
+#define I40IW_CQPSQ_QP_NEWMSS_MASK (0x3fffULL << I40IW_CQPSQ_QP_NEWMSS_SHIFT)
+
+#define I40IW_CQPSQ_QP_TERMLEN_SHIFT 48
+#define I40IW_CQPSQ_QP_TERMLEN_MASK (0xfULL << I40IW_CQPSQ_QP_TERMLEN_SHIFT)
+
+#define I40IW_CQPSQ_QP_QPCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_QP_QPCTX_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_QP_QPID_SHIFT 0
+#define I40IW_CQPSQ_QP_QPID_MASK (0x3FFFFUL)
+/* I40IWCQ_QPID_MASK */
+
+#define I40IW_CQPSQ_QP_OP_SHIFT 32
+#define I40IW_CQPSQ_QP_OP_MASK I40IWCQ_OP_MASK
+
+#define I40IW_CQPSQ_QP_ORDVALID_SHIFT 42
+#define I40IW_CQPSQ_QP_ORDVALID_MASK (1ULL << I40IW_CQPSQ_QP_ORDVALID_SHIFT)
+
+#define I40IW_CQPSQ_QP_TOECTXVALID_SHIFT 43
+#define I40IW_CQPSQ_QP_TOECTXVALID_MASK \
+	(1ULL << I40IW_CQPSQ_QP_TOECTXVALID_SHIFT)
+
+#define I40IW_CQPSQ_QP_CACHEDVARVALID_SHIFT 44
+#define I40IW_CQPSQ_QP_CACHEDVARVALID_MASK      \
+	(1ULL << I40IW_CQPSQ_QP_CACHEDVARVALID_SHIFT)
+
+#define I40IW_CQPSQ_QP_VQ_SHIFT 45
+#define I40IW_CQPSQ_QP_VQ_MASK (1ULL << I40IW_CQPSQ_QP_VQ_SHIFT)
+
+#define I40IW_CQPSQ_QP_FORCELOOPBACK_SHIFT 46
+#define I40IW_CQPSQ_QP_FORCELOOPBACK_MASK       \
+	(1ULL << I40IW_CQPSQ_QP_FORCELOOPBACK_SHIFT)
+
+#define I40IW_CQPSQ_QP_CQNUMVALID_SHIFT 47
+#define I40IW_CQPSQ_QP_CQNUMVALID_MASK  \
+	(1ULL << I40IW_CQPSQ_QP_CQNUMVALID_SHIFT)
+
+#define I40IW_CQPSQ_QP_QPTYPE_SHIFT 48
+#define I40IW_CQPSQ_QP_QPTYPE_MASK (0x3ULL << I40IW_CQPSQ_QP_QPTYPE_SHIFT)
+
+#define I40IW_CQPSQ_QP_MSSCHANGE_SHIFT 52
+#define I40IW_CQPSQ_QP_MSSCHANGE_MASK (1ULL << I40IW_CQPSQ_QP_MSSCHANGE_SHIFT)
+
+#define I40IW_CQPSQ_QP_STATRSRC_SHIFT 53
+#define I40IW_CQPSQ_QP_STATRSRC_MASK (1ULL << I40IW_CQPSQ_QP_STATRSRC_SHIFT)
+
+#define I40IW_CQPSQ_QP_IGNOREMWBOUND_SHIFT 54
+#define I40IW_CQPSQ_QP_IGNOREMWBOUND_MASK       \
+	(1ULL << I40IW_CQPSQ_QP_IGNOREMWBOUND_SHIFT)
+
+#define I40IW_CQPSQ_QP_REMOVEHASHENTRY_SHIFT 55
+#define I40IW_CQPSQ_QP_REMOVEHASHENTRY_MASK     \
+	(1ULL << I40IW_CQPSQ_QP_REMOVEHASHENTRY_SHIFT)
+
+#define I40IW_CQPSQ_QP_TERMACT_SHIFT 56
+#define I40IW_CQPSQ_QP_TERMACT_MASK (0x3ULL << I40IW_CQPSQ_QP_TERMACT_SHIFT)
+
+#define I40IW_CQPSQ_QP_RESETCON_SHIFT 58
+#define I40IW_CQPSQ_QP_RESETCON_MASK (1ULL << I40IW_CQPSQ_QP_RESETCON_SHIFT)
+
+#define I40IW_CQPSQ_QP_ARPTABIDXVALID_SHIFT 59
+#define I40IW_CQPSQ_QP_ARPTABIDXVALID_MASK      \
+	(1ULL << I40IW_CQPSQ_QP_ARPTABIDXVALID_SHIFT)
+
+#define I40IW_CQPSQ_QP_NEXTIWSTATE_SHIFT 60
+#define I40IW_CQPSQ_QP_NEXTIWSTATE_MASK \
+	(0x7ULL << I40IW_CQPSQ_QP_NEXTIWSTATE_SHIFT)
+
+#define I40IW_CQPSQ_QP_DBSHADOWADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_QP_DBSHADOWADDR_MASK I40IW_CQPHC_QPCTX_MASK
+
+/* Create/Modify/Destroy CQ */
+#define I40IW_CQPSQ_CQ_CQSIZE_SHIFT 0
+#define I40IW_CQPSQ_CQ_CQSIZE_MASK (0x3ffffUL << I40IW_CQPSQ_CQ_CQSIZE_SHIFT)
+
+#define I40IW_CQPSQ_CQ_CQCTX_SHIFT 0
+#define I40IW_CQPSQ_CQ_CQCTX_MASK       \
+	(0x7fffffffffffffffULL << I40IW_CQPSQ_CQ_CQCTX_SHIFT)
+
+#define I40IW_CQPSQ_CQ_CQCTX_SHIFT 0
+#define I40IW_CQPSQ_CQ_CQCTX_MASK       \
+	(0x7fffffffffffffffULL << I40IW_CQPSQ_CQ_CQCTX_SHIFT)
+
+#define I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD_SHIFT 0
+#define I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD_MASK       \
+	(0x3ffff << I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD_SHIFT)
+
+#define I40IW_CQPSQ_CQ_CEQID_SHIFT 24
+#define I40IW_CQPSQ_CQ_CEQID_MASK (0x7fUL << I40IW_CQPSQ_CQ_CEQID_SHIFT)
+
+#define I40IW_CQPSQ_CQ_OP_SHIFT 32
+#define I40IW_CQPSQ_CQ_OP_MASK (0x3fULL << I40IW_CQPSQ_CQ_OP_SHIFT)
+
+#define I40IW_CQPSQ_CQ_CQRESIZE_SHIFT 43
+#define I40IW_CQPSQ_CQ_CQRESIZE_MASK (1ULL << I40IW_CQPSQ_CQ_CQRESIZE_SHIFT)
+
+#define I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT 44
+#define I40IW_CQPSQ_CQ_LPBLSIZE_MASK (3ULL << I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT)
+
+#define I40IW_CQPSQ_CQ_CHKOVERFLOW_SHIFT 46
+#define I40IW_CQPSQ_CQ_CHKOVERFLOW_MASK         \
+	(1ULL << I40IW_CQPSQ_CQ_CHKOVERFLOW_SHIFT)
+
+#define I40IW_CQPSQ_CQ_VIRTMAP_SHIFT 47
+#define I40IW_CQPSQ_CQ_VIRTMAP_MASK (1ULL << I40IW_CQPSQ_CQ_VIRTMAP_SHIFT)
+
+#define I40IW_CQPSQ_CQ_ENCEQEMASK_SHIFT 48
+#define I40IW_CQPSQ_CQ_ENCEQEMASK_MASK  \
+	(1ULL << I40IW_CQPSQ_CQ_ENCEQEMASK_SHIFT)
+
+#define I40IW_CQPSQ_CQ_CEQIDVALID_SHIFT 49
+#define I40IW_CQPSQ_CQ_CEQIDVALID_MASK  \
+	(1ULL << I40IW_CQPSQ_CQ_CEQIDVALID_SHIFT)
+
+#define I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT_SHIFT 61
+#define I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT_MASK      \
+	(1ULL << I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT_SHIFT)
+
+/* Create/Modify/Destroy Shared Receive Queue */
+
+#define I40IW_CQPSQ_SRQ_RQSIZE_SHIFT 0
+#define I40IW_CQPSQ_SRQ_RQSIZE_MASK (0xfUL << I40IW_CQPSQ_SRQ_RQSIZE_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_RQWQESIZE_SHIFT 4
+#define I40IW_CQPSQ_SRQ_RQWQESIZE_MASK \
+	(0x7UL << I40IW_CQPSQ_SRQ_RQWQESIZE_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_SRQLIMIT_SHIFT 32
+#define I40IW_CQPSQ_SRQ_SRQLIMIT_MASK   \
+	(0xfffULL << I40IW_CQPSQ_SRQ_SRQLIMIT_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_SRQCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_SRQ_SRQCTX_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_SRQ_PDID_SHIFT 16
+#define I40IW_CQPSQ_SRQ_PDID_MASK       \
+	(0x7fffULL << I40IW_CQPSQ_SRQ_PDID_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_SRQID_SHIFT 0
+#define I40IW_CQPSQ_SRQ_SRQID_MASK (0x7fffUL << I40IW_CQPSQ_SRQ_SRQID_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_LPBLSIZE_SHIFT I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT
+#define I40IW_CQPSQ_SRQ_LPBLSIZE_MASK I40IW_CQPSQ_CQ_LPBLSIZE_MASK
+
+#define I40IW_CQPSQ_SRQ_VIRTMAP_SHIFT I40IW_CQPSQ_CQ_VIRTMAP_SHIFT
+#define I40IW_CQPSQ_SRQ_VIRTMAP_MASK I40IW_CQPSQ_CQ_VIRTMAP_MASK
+
+#define I40IW_CQPSQ_SRQ_TPHEN_SHIFT I40IW_CQPSQ_TPHEN_SHIFT
+#define I40IW_CQPSQ_SRQ_TPHEN_MASK I40IW_CQPSQ_TPHEN_MASK
+
+#define I40IW_CQPSQ_SRQ_ARMLIMITEVENT_SHIFT 61
+#define I40IW_CQPSQ_SRQ_ARMLIMITEVENT_MASK      \
+	(1ULL << I40IW_CQPSQ_SRQ_ARMLIMITEVENT_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_DBSHADOWAREA_SHIFT 6
+#define I40IW_CQPSQ_SRQ_DBSHADOWAREA_MASK       \
+	(0x3ffffffffffffffULL << I40IW_CQPSQ_SRQ_DBSHADOWAREA_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_FIRSTPMPBLIDX_SHIFT 0
+#define I40IW_CQPSQ_SRQ_FIRSTPMPBLIDX_MASK      \
+	(0xfffffffUL << I40IW_CQPSQ_SRQ_FIRSTPMPBLIDX_SHIFT)
+
+/* Allocate/Register/Register Shared/Deallocate Stag */
+#define I40IW_CQPSQ_STAG_VA_FBO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_STAG_VA_FBO_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_STAG_STAGLEN_SHIFT 0
+#define I40IW_CQPSQ_STAG_STAGLEN_MASK   \
+	(0x3fffffffffffULL << I40IW_CQPSQ_STAG_STAGLEN_SHIFT)
+
+#define I40IW_CQPSQ_STAG_PDID_SHIFT 48
+#define I40IW_CQPSQ_STAG_PDID_MASK (0x7fffULL << I40IW_CQPSQ_STAG_PDID_SHIFT)
+
+#define I40IW_CQPSQ_STAG_KEY_SHIFT 0
+#define I40IW_CQPSQ_STAG_KEY_MASK (0xffUL << I40IW_CQPSQ_STAG_KEY_SHIFT)
+
+#define I40IW_CQPSQ_STAG_IDX_SHIFT 8
+#define I40IW_CQPSQ_STAG_IDX_MASK (0xffffffUL << I40IW_CQPSQ_STAG_IDX_SHIFT)
+
+#define I40IW_CQPSQ_STAG_PARENTSTAGIDX_SHIFT 32
+#define I40IW_CQPSQ_STAG_PARENTSTAGIDX_MASK     \
+	(0xffffffULL << I40IW_CQPSQ_STAG_PARENTSTAGIDX_SHIFT)
+
+#define I40IW_CQPSQ_STAG_MR_SHIFT 43
+#define I40IW_CQPSQ_STAG_MR_MASK (1ULL << I40IW_CQPSQ_STAG_MR_SHIFT)
+
+#define I40IW_CQPSQ_STAG_LPBLSIZE_SHIFT I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT
+#define I40IW_CQPSQ_STAG_LPBLSIZE_MASK I40IW_CQPSQ_CQ_LPBLSIZE_MASK
+
+#define I40IW_CQPSQ_STAG_HPAGESIZE_SHIFT 46
+#define I40IW_CQPSQ_STAG_HPAGESIZE_MASK \
+	(1ULL << I40IW_CQPSQ_STAG_HPAGESIZE_SHIFT)
+
+#define I40IW_CQPSQ_STAG_ARIGHTS_SHIFT 48
+#define I40IW_CQPSQ_STAG_ARIGHTS_MASK   \
+	(0x1fULL << I40IW_CQPSQ_STAG_ARIGHTS_SHIFT)
+
+#define I40IW_CQPSQ_STAG_REMACCENABLED_SHIFT 53
+#define I40IW_CQPSQ_STAG_REMACCENABLED_MASK     \
+	(1ULL << I40IW_CQPSQ_STAG_REMACCENABLED_SHIFT)
+
+#define I40IW_CQPSQ_STAG_VABASEDTO_SHIFT 59
+#define I40IW_CQPSQ_STAG_VABASEDTO_MASK \
+	(1ULL << I40IW_CQPSQ_STAG_VABASEDTO_SHIFT)
+
+#define I40IW_CQPSQ_STAG_USEHMCFNIDX_SHIFT 60
+#define I40IW_CQPSQ_STAG_USEHMCFNIDX_MASK       \
+	(1ULL << I40IW_CQPSQ_STAG_USEHMCFNIDX_SHIFT)
+
+#define I40IW_CQPSQ_STAG_USEPFRID_SHIFT 61
+#define I40IW_CQPSQ_STAG_USEPFRID_MASK  \
+	(1ULL << I40IW_CQPSQ_STAG_USEPFRID_SHIFT)
+
+#define I40IW_CQPSQ_STAG_PBA_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_STAG_PBA_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_STAG_HMCFNIDX_SHIFT 0
+#define I40IW_CQPSQ_STAG_HMCFNIDX_MASK \
+	(0x3fUL << I40IW_CQPSQ_STAG_HMCFNIDX_SHIFT)
+
+#define I40IW_CQPSQ_STAG_FIRSTPMPBLIDX_SHIFT 0
+#define I40IW_CQPSQ_STAG_FIRSTPMPBLIDX_MASK     \
+	(0xfffffffUL << I40IW_CQPSQ_STAG_FIRSTPMPBLIDX_SHIFT)
+
+/* Query stag */
+#define I40IW_CQPSQ_QUERYSTAG_IDX_SHIFT I40IW_CQPSQ_STAG_IDX_SHIFT
+#define I40IW_CQPSQ_QUERYSTAG_IDX_MASK I40IW_CQPSQ_STAG_IDX_MASK
+
+/* Allocate Local IP Address Entry */
+
+/* Manage Local IP Address Table - MLIPA */
+#define I40IW_CQPSQ_MLIPA_IPV6LO_SHIFT  I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_MLIPA_IPV6LO_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_MLIPA_IPV6HI_SHIFT  I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_MLIPA_IPV6HI_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_MLIPA_IPV4_SHIFT 0
+#define I40IW_CQPSQ_MLIPA_IPV4_MASK \
+	(0xffffffffUL << I40IW_CQPSQ_MLIPA_IPV4_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_IPTABLEIDX_SHIFT 0
+#define I40IW_CQPSQ_MLIPA_IPTABLEIDX_MASK       \
+	(0x3fUL << I40IW_CQPSQ_MLIPA_IPTABLEIDX_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_IPV4VALID_SHIFT 42
+#define I40IW_CQPSQ_MLIPA_IPV4VALID_MASK        \
+	(1ULL << I40IW_CQPSQ_MLIPA_IPV4VALID_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_IPV6VALID_SHIFT 43
+#define I40IW_CQPSQ_MLIPA_IPV6VALID_MASK        \
+	(1ULL << I40IW_CQPSQ_MLIPA_IPV6VALID_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_FREEENTRY_SHIFT 62
+#define I40IW_CQPSQ_MLIPA_FREEENTRY_MASK        \
+	(1ULL << I40IW_CQPSQ_MLIPA_FREEENTRY_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_IGNORE_REF_CNT_SHIFT 61
+#define I40IW_CQPSQ_MLIPA_IGNORE_REF_CNT_MASK   \
+	(1ULL << I40IW_CQPSQ_MLIPA_IGNORE_REF_CNT_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_MAC0_SHIFT 0
+#define I40IW_CQPSQ_MLIPA_MAC0_MASK (0xffUL << I40IW_CQPSQ_MLIPA_MAC0_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_MAC1_SHIFT 8
+#define I40IW_CQPSQ_MLIPA_MAC1_MASK (0xffUL << I40IW_CQPSQ_MLIPA_MAC1_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_MAC2_SHIFT 16
+#define I40IW_CQPSQ_MLIPA_MAC2_MASK (0xffUL << I40IW_CQPSQ_MLIPA_MAC2_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_MAC3_SHIFT 24
+#define I40IW_CQPSQ_MLIPA_MAC3_MASK (0xffUL << I40IW_CQPSQ_MLIPA_MAC3_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_MAC4_SHIFT 32
+#define I40IW_CQPSQ_MLIPA_MAC4_MASK (0xffULL << I40IW_CQPSQ_MLIPA_MAC4_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_MAC5_SHIFT 40
+#define I40IW_CQPSQ_MLIPA_MAC5_MASK (0xffULL << I40IW_CQPSQ_MLIPA_MAC5_SHIFT)
+
+/* Manage ARP Table  - MAT */
+#define I40IW_CQPSQ_MAT_REACHMAX_SHIFT 0
+#define I40IW_CQPSQ_MAT_REACHMAX_MASK   \
+	(0xffffffffUL << I40IW_CQPSQ_MAT_REACHMAX_SHIFT)
+
+#define I40IW_CQPSQ_MAT_MACADDR_SHIFT 0
+#define I40IW_CQPSQ_MAT_MACADDR_MASK    \
+	(0xffffffffffffULL << I40IW_CQPSQ_MAT_MACADDR_SHIFT)
+
+#define I40IW_CQPSQ_MAT_ARPENTRYIDX_SHIFT 0
+#define I40IW_CQPSQ_MAT_ARPENTRYIDX_MASK        \
+	(0xfffUL << I40IW_CQPSQ_MAT_ARPENTRYIDX_SHIFT)
+
+#define I40IW_CQPSQ_MAT_ENTRYVALID_SHIFT 42
+#define I40IW_CQPSQ_MAT_ENTRYVALID_MASK \
+	(1ULL << I40IW_CQPSQ_MAT_ENTRYVALID_SHIFT)
+
+#define I40IW_CQPSQ_MAT_PERMANENT_SHIFT 43
+#define I40IW_CQPSQ_MAT_PERMANENT_MASK  \
+	(1ULL << I40IW_CQPSQ_MAT_PERMANENT_SHIFT)
+
+#define I40IW_CQPSQ_MAT_QUERY_SHIFT 44
+#define I40IW_CQPSQ_MAT_QUERY_MASK (1ULL << I40IW_CQPSQ_MAT_QUERY_SHIFT)
+
+/* Manage VF PBLE Backing Pages - MVPBP*/
+#define I40IW_CQPSQ_MVPBP_PD_ENTRY_CNT_SHIFT 0
+#define I40IW_CQPSQ_MVPBP_PD_ENTRY_CNT_MASK \
+	(0x3ffULL << I40IW_CQPSQ_MVPBP_PD_ENTRY_CNT_SHIFT)
+
+#define I40IW_CQPSQ_MVPBP_FIRST_PD_INX_SHIFT 16
+#define I40IW_CQPSQ_MVPBP_FIRST_PD_INX_MASK \
+	(0x1ffULL << I40IW_CQPSQ_MVPBP_FIRST_PD_INX_SHIFT)
+
+#define I40IW_CQPSQ_MVPBP_SD_INX_SHIFT 32
+#define I40IW_CQPSQ_MVPBP_SD_INX_MASK \
+	(0xfffULL << I40IW_CQPSQ_MVPBP_SD_INX_SHIFT)
+
+#define I40IW_CQPSQ_MVPBP_INV_PD_ENT_SHIFT 62
+#define I40IW_CQPSQ_MVPBP_INV_PD_ENT_MASK \
+	(0x1ULL << I40IW_CQPSQ_MVPBP_INV_PD_ENT_SHIFT)
+
+#define I40IW_CQPSQ_MVPBP_PD_PLPBA_SHIFT 3
+#define I40IW_CQPSQ_MVPBP_PD_PLPBA_MASK \
+	(0x1fffffffffffffffULL << I40IW_CQPSQ_MVPBP_PD_PLPBA_SHIFT)
+
+/* Manage Push Page - MPP */
+#define I40IW_INVALID_PUSH_PAGE_INDEX 0xffff
+
+#define I40IW_CQPSQ_MPP_QS_HANDLE_SHIFT 0
+#define I40IW_CQPSQ_MPP_QS_HANDLE_MASK (0xffffUL << \
+					I40IW_CQPSQ_MPP_QS_HANDLE_SHIFT)
+
+#define I40IW_CQPSQ_MPP_PPIDX_SHIFT 0
+#define I40IW_CQPSQ_MPP_PPIDX_MASK (0x3ffUL << I40IW_CQPSQ_MPP_PPIDX_SHIFT)
+
+#define I40IW_CQPSQ_MPP_FREE_PAGE_SHIFT 62
+#define I40IW_CQPSQ_MPP_FREE_PAGE_MASK (1ULL << I40IW_CQPSQ_MPP_FREE_PAGE_SHIFT)
+
+/* Upload Context - UCTX */
+#define I40IW_CQPSQ_UCTX_QPCTXADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_UCTX_QPCTXADDR_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_UCTX_QPID_SHIFT 0
+#define I40IW_CQPSQ_UCTX_QPID_MASK (0x3ffffUL << I40IW_CQPSQ_UCTX_QPID_SHIFT)
+
+#define I40IW_CQPSQ_UCTX_QPTYPE_SHIFT 48
+#define I40IW_CQPSQ_UCTX_QPTYPE_MASK (0xfULL << I40IW_CQPSQ_UCTX_QPTYPE_SHIFT)
+
+#define I40IW_CQPSQ_UCTX_RAWFORMAT_SHIFT 61
+#define I40IW_CQPSQ_UCTX_RAWFORMAT_MASK \
+	(1ULL << I40IW_CQPSQ_UCTX_RAWFORMAT_SHIFT)
+
+#define I40IW_CQPSQ_UCTX_FREEZEQP_SHIFT 62
+#define I40IW_CQPSQ_UCTX_FREEZEQP_MASK  \
+	(1ULL << I40IW_CQPSQ_UCTX_FREEZEQP_SHIFT)
+
+/* Manage HMC PM Function Table - MHMC */
+#define I40IW_CQPSQ_MHMC_VFIDX_SHIFT 0
+#define I40IW_CQPSQ_MHMC_VFIDX_MASK (0x7fUL << I40IW_CQPSQ_MHMC_VFIDX_SHIFT)
+
+#define I40IW_CQPSQ_MHMC_FREEPMFN_SHIFT 62
+#define I40IW_CQPSQ_MHMC_FREEPMFN_MASK  \
+	(1ULL << I40IW_CQPSQ_MHMC_FREEPMFN_SHIFT)
+
+/* Set HMC Resource Profile - SHMCRP */
+#define I40IW_CQPSQ_SHMCRP_HMC_PROFILE_SHIFT 0
+#define I40IW_CQPSQ_SHMCRP_HMC_PROFILE_MASK \
+	(0x7ULL << I40IW_CQPSQ_SHMCRP_HMC_PROFILE_SHIFT)
+#define I40IW_CQPSQ_SHMCRP_VFNUM_SHIFT 32
+#define I40IW_CQPSQ_SHMCRP_VFNUM_MASK (0x3fULL << I40IW_CQPSQ_SHMCRP_VFNUM_SHIFT)
+
+/* Create/Destroy CEQ */
+#define I40IW_CQPSQ_CEQ_CEQSIZE_SHIFT 0
+#define I40IW_CQPSQ_CEQ_CEQSIZE_MASK \
+	(0x1ffffUL << I40IW_CQPSQ_CEQ_CEQSIZE_SHIFT)
+
+#define I40IW_CQPSQ_CEQ_CEQID_SHIFT 0
+#define I40IW_CQPSQ_CEQ_CEQID_MASK (0x7fUL << I40IW_CQPSQ_CEQ_CEQID_SHIFT)
+
+#define I40IW_CQPSQ_CEQ_LPBLSIZE_SHIFT I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT
+#define I40IW_CQPSQ_CEQ_LPBLSIZE_MASK I40IW_CQPSQ_CQ_LPBLSIZE_MASK
+
+#define I40IW_CQPSQ_CEQ_VMAP_SHIFT 47
+#define I40IW_CQPSQ_CEQ_VMAP_MASK (1ULL << I40IW_CQPSQ_CEQ_VMAP_SHIFT)
+
+#define I40IW_CQPSQ_CEQ_FIRSTPMPBLIDX_SHIFT 0
+#define I40IW_CQPSQ_CEQ_FIRSTPMPBLIDX_MASK      \
+	(0xfffffffUL << I40IW_CQPSQ_CEQ_FIRSTPMPBLIDX_SHIFT)
+
+/* Create/Destroy AEQ */
+#define I40IW_CQPSQ_AEQ_AEQECNT_SHIFT 0
+#define I40IW_CQPSQ_AEQ_AEQECNT_MASK \
+	(0x7ffffUL << I40IW_CQPSQ_AEQ_AEQECNT_SHIFT)
+
+#define I40IW_CQPSQ_AEQ_LPBLSIZE_SHIFT I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT
+#define I40IW_CQPSQ_AEQ_LPBLSIZE_MASK I40IW_CQPSQ_CQ_LPBLSIZE_MASK
+
+#define I40IW_CQPSQ_AEQ_VMAP_SHIFT 47
+#define I40IW_CQPSQ_AEQ_VMAP_MASK (1ULL << I40IW_CQPSQ_AEQ_VMAP_SHIFT)
+
+#define I40IW_CQPSQ_AEQ_FIRSTPMPBLIDX_SHIFT 0
+#define I40IW_CQPSQ_AEQ_FIRSTPMPBLIDX_MASK      \
+	(0xfffffffUL << I40IW_CQPSQ_AEQ_FIRSTPMPBLIDX_SHIFT)
+
+/* Commit FPM Values - CFPM */
+#define I40IW_CQPSQ_CFPM_HMCFNID_SHIFT 0
+#define I40IW_CQPSQ_CFPM_HMCFNID_MASK (0x3fUL << I40IW_CQPSQ_CFPM_HMCFNID_SHIFT)
+
+/* Flush WQEs - FWQE */
+#define I40IW_CQPSQ_FWQE_AECODE_SHIFT 0
+#define I40IW_CQPSQ_FWQE_AECODE_MASK (0xffffUL << I40IW_CQPSQ_FWQE_AECODE_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_AESOURCE_SHIFT 16
+#define I40IW_CQPSQ_FWQE_AESOURCE_MASK \
+	(0xfUL << I40IW_CQPSQ_FWQE_AESOURCE_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_RQMNERR_SHIFT 0
+#define I40IW_CQPSQ_FWQE_RQMNERR_MASK \
+	(0xffffUL << I40IW_CQPSQ_FWQE_RQMNERR_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_RQMJERR_SHIFT 16
+#define I40IW_CQPSQ_FWQE_RQMJERR_MASK \
+	(0xffffUL << I40IW_CQPSQ_FWQE_RQMJERR_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_SQMNERR_SHIFT 32
+#define I40IW_CQPSQ_FWQE_SQMNERR_MASK   \
+	(0xffffULL << I40IW_CQPSQ_FWQE_SQMNERR_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_SQMJERR_SHIFT 48
+#define I40IW_CQPSQ_FWQE_SQMJERR_MASK   \
+	(0xffffULL << I40IW_CQPSQ_FWQE_SQMJERR_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_QPID_SHIFT 0
+#define I40IW_CQPSQ_FWQE_QPID_MASK (0x3ffffULL << I40IW_CQPSQ_FWQE_QPID_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_GENERATE_AE_SHIFT 59
+#define I40IW_CQPSQ_FWQE_GENERATE_AE_MASK (1ULL <<      \
+					   I40IW_CQPSQ_FWQE_GENERATE_AE_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_USERFLCODE_SHIFT 60
+#define I40IW_CQPSQ_FWQE_USERFLCODE_MASK        \
+	(1ULL << I40IW_CQPSQ_FWQE_USERFLCODE_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_FLUSHSQ_SHIFT 61
+#define I40IW_CQPSQ_FWQE_FLUSHSQ_MASK (1ULL << I40IW_CQPSQ_FWQE_FLUSHSQ_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_FLUSHRQ_SHIFT 62
+#define I40IW_CQPSQ_FWQE_FLUSHRQ_MASK (1ULL << I40IW_CQPSQ_FWQE_FLUSHRQ_SHIFT)
+
+/* Manage Accelerated Port Table - MAPT */
+#define I40IW_CQPSQ_MAPT_PORT_SHIFT 0
+#define I40IW_CQPSQ_MAPT_PORT_MASK (0xffffUL << I40IW_CQPSQ_MAPT_PORT_SHIFT)
+
+#define I40IW_CQPSQ_MAPT_ADDPORT_SHIFT 62
+#define I40IW_CQPSQ_MAPT_ADDPORT_MASK (1ULL << I40IW_CQPSQ_MAPT_ADDPORT_SHIFT)
+
+/* Update Protocol Engine SDs */
+#define I40IW_CQPSQ_UPESD_SDCMD_SHIFT 0
+#define I40IW_CQPSQ_UPESD_SDCMD_MASK (0xffffffffUL << I40IW_CQPSQ_UPESD_SDCMD_SHIFT)
+
+#define I40IW_CQPSQ_UPESD_SDDATALOW_SHIFT 0
+#define I40IW_CQPSQ_UPESD_SDDATALOW_MASK        \
+	(0xffffffffUL << I40IW_CQPSQ_UPESD_SDDATALOW_SHIFT)
+
+#define I40IW_CQPSQ_UPESD_SDDATAHI_SHIFT 32
+#define I40IW_CQPSQ_UPESD_SDDATAHI_MASK \
+	(0xffffffffULL << I40IW_CQPSQ_UPESD_SDDATAHI_SHIFT)
+#define I40IW_CQPSQ_UPESD_HMCFNID_SHIFT 0
+#define I40IW_CQPSQ_UPESD_HMCFNID_MASK  \
+	(0x3fUL << I40IW_CQPSQ_UPESD_HMCFNID_SHIFT)
+
+#define I40IW_CQPSQ_UPESD_ENTRY_VALID_SHIFT 63
+#define I40IW_CQPSQ_UPESD_ENTRY_VALID_MASK      \
+	((u64)1 << I40IW_CQPSQ_UPESD_ENTRY_VALID_SHIFT)
+
+#define I40IW_CQPSQ_UPESD_ENTRY_COUNT_SHIFT 0
+#define I40IW_CQPSQ_UPESD_ENTRY_COUNT_MASK      \
+	(0xfUL << I40IW_CQPSQ_UPESD_ENTRY_COUNT_SHIFT)
+
+#define I40IW_CQPSQ_UPESD_SKIP_ENTRY_SHIFT 7
+#define I40IW_CQPSQ_UPESD_SKIP_ENTRY_MASK       \
+	(0x1UL << I40IW_CQPSQ_UPESD_SKIP_ENTRY_SHIFT)
+
+/* Suspend QP */
+#define I40IW_CQPSQ_SUSPENDQP_QPID_SHIFT 0
+#define I40IW_CQPSQ_SUSPENDQP_QPID_MASK (0x3FFFFUL)
+/* I40IWCQ_QPID_MASK */
+
+/* Resume QP */
+#define I40IW_CQPSQ_RESUMEQP_QSHANDLE_SHIFT 0
+#define I40IW_CQPSQ_RESUMEQP_QSHANDLE_MASK      \
+	(0xffffffffUL << I40IW_CQPSQ_RESUMEQP_QSHANDLE_SHIFT)
+
+#define I40IW_CQPSQ_RESUMEQP_QPID_SHIFT 0
+#define I40IW_CQPSQ_RESUMEQP_QPID_MASK (0x3FFFFUL)
+/* I40IWCQ_QPID_MASK */
+
+/* IW QP Context */
+#define I40IWQPC_DDP_VER_SHIFT 0
+#define I40IWQPC_DDP_VER_MASK (3UL << I40IWQPC_DDP_VER_SHIFT)
+
+#define I40IWQPC_SNAP_SHIFT 2
+#define I40IWQPC_SNAP_MASK (1UL << I40IWQPC_SNAP_SHIFT)
+
+#define I40IWQPC_IPV4_SHIFT 3
+#define I40IWQPC_IPV4_MASK (1UL << I40IWQPC_IPV4_SHIFT)
+
+#define I40IWQPC_NONAGLE_SHIFT 4
+#define I40IWQPC_NONAGLE_MASK (1UL << I40IWQPC_NONAGLE_SHIFT)
+
+#define I40IWQPC_INSERTVLANTAG_SHIFT 5
+#define I40IWQPC_INSERTVLANTAG_MASK (1 << I40IWQPC_INSERTVLANTAG_SHIFT)
+
+#define I40IWQPC_USESRQ_SHIFT 6
+#define I40IWQPC_USESRQ_MASK (1UL << I40IWQPC_USESRQ_SHIFT)
+
+#define I40IWQPC_TIMESTAMP_SHIFT 7
+#define I40IWQPC_TIMESTAMP_MASK (1UL << I40IWQPC_TIMESTAMP_SHIFT)
+
+#define I40IWQPC_RQWQESIZE_SHIFT 8
+#define I40IWQPC_RQWQESIZE_MASK (3UL << I40IWQPC_RQWQESIZE_SHIFT)
+
+#define I40IWQPC_INSERTL2TAG2_SHIFT 11
+#define I40IWQPC_INSERTL2TAG2_MASK (1UL << I40IWQPC_INSERTL2TAG2_SHIFT)
+
+#define I40IWQPC_LIMIT_SHIFT 12
+#define I40IWQPC_LIMIT_MASK (3UL << I40IWQPC_LIMIT_SHIFT)
+
+#define I40IWQPC_DROPOOOSEG_SHIFT 15
+#define I40IWQPC_DROPOOOSEG_MASK (1UL << I40IWQPC_DROPOOOSEG_SHIFT)
+
+#define I40IWQPC_DUPACK_THRESH_SHIFT 16
+#define I40IWQPC_DUPACK_THRESH_MASK (7UL << I40IWQPC_DUPACK_THRESH_SHIFT)
+
+#define I40IWQPC_ERR_RQ_IDX_VALID_SHIFT 19
+#define I40IWQPC_ERR_RQ_IDX_VALID_MASK  (1UL << I40IWQPC_ERR_RQ_IDX_VALID_SHIFT)
+
+#define I40IWQPC_DIS_VLAN_CHECKS_SHIFT 19
+#define I40IWQPC_DIS_VLAN_CHECKS_MASK (7UL << I40IWQPC_DIS_VLAN_CHECKS_SHIFT)
+
+#define I40IWQPC_RCVTPHEN_SHIFT 28
+#define I40IWQPC_RCVTPHEN_MASK (1UL << I40IWQPC_RCVTPHEN_SHIFT)
+
+#define I40IWQPC_XMITTPHEN_SHIFT 29
+#define I40IWQPC_XMITTPHEN_MASK (1ULL << I40IWQPC_XMITTPHEN_SHIFT)
+
+#define I40IWQPC_RQTPHEN_SHIFT 30
+#define I40IWQPC_RQTPHEN_MASK (1UL << I40IWQPC_RQTPHEN_SHIFT)
+
+#define I40IWQPC_SQTPHEN_SHIFT 31
+#define I40IWQPC_SQTPHEN_MASK (1ULL << I40IWQPC_SQTPHEN_SHIFT)
+
+#define I40IWQPC_PPIDX_SHIFT 32
+#define I40IWQPC_PPIDX_MASK (0x3ffULL << I40IWQPC_PPIDX_SHIFT)
+
+#define I40IWQPC_PMENA_SHIFT 47
+#define I40IWQPC_PMENA_MASK (1ULL << I40IWQPC_PMENA_SHIFT)
+
+#define I40IWQPC_RDMAP_VER_SHIFT 62
+#define I40IWQPC_RDMAP_VER_MASK (3ULL << I40IWQPC_RDMAP_VER_SHIFT)
+
+#define I40IWQPC_SQADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPC_SQADDR_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPC_RQADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPC_RQADDR_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPC_TTL_SHIFT 0
+#define I40IWQPC_TTL_MASK (0xffUL << I40IWQPC_TTL_SHIFT)
+
+#define I40IWQPC_RQSIZE_SHIFT 8
+#define I40IWQPC_RQSIZE_MASK (0xfUL << I40IWQPC_RQSIZE_SHIFT)
+
+#define I40IWQPC_SQSIZE_SHIFT 12
+#define I40IWQPC_SQSIZE_MASK (0xfUL << I40IWQPC_SQSIZE_SHIFT)
+
+#define I40IWQPC_SRCMACADDRIDX_SHIFT 16
+#define I40IWQPC_SRCMACADDRIDX_MASK (0x3fUL << I40IWQPC_SRCMACADDRIDX_SHIFT)
+
+#define I40IWQPC_AVOIDSTRETCHACK_SHIFT 23
+#define I40IWQPC_AVOIDSTRETCHACK_MASK (1UL << I40IWQPC_AVOIDSTRETCHACK_SHIFT)
+
+#define I40IWQPC_TOS_SHIFT 24
+#define I40IWQPC_TOS_MASK (0xffUL << I40IWQPC_TOS_SHIFT)
+
+#define I40IWQPC_SRCPORTNUM_SHIFT 32
+#define I40IWQPC_SRCPORTNUM_MASK (0xffffULL << I40IWQPC_SRCPORTNUM_SHIFT)
+
+#define I40IWQPC_DESTPORTNUM_SHIFT 48
+#define I40IWQPC_DESTPORTNUM_MASK (0xffffULL << I40IWQPC_DESTPORTNUM_SHIFT)
+
+#define I40IWQPC_DESTIPADDR0_SHIFT 32
+#define I40IWQPC_DESTIPADDR0_MASK       \
+	(0xffffffffULL << I40IWQPC_DESTIPADDR0_SHIFT)
+
+#define I40IWQPC_DESTIPADDR1_SHIFT 0
+#define I40IWQPC_DESTIPADDR1_MASK       \
+	(0xffffffffULL << I40IWQPC_DESTIPADDR1_SHIFT)
+
+#define I40IWQPC_DESTIPADDR2_SHIFT 32
+#define I40IWQPC_DESTIPADDR2_MASK       \
+	(0xffffffffULL << I40IWQPC_DESTIPADDR2_SHIFT)
+
+#define I40IWQPC_DESTIPADDR3_SHIFT 0
+#define I40IWQPC_DESTIPADDR3_MASK       \
+	(0xffffffffULL << I40IWQPC_DESTIPADDR3_SHIFT)
+
+#define I40IWQPC_SNDMSS_SHIFT 16
+#define I40IWQPC_SNDMSS_MASK (0x3fffUL << I40IWQPC_SNDMSS_SHIFT)
+
+#define I40IWQPC_VLANTAG_SHIFT 32
+#define I40IWQPC_VLANTAG_MASK (0xffffULL << I40IWQPC_VLANTAG_SHIFT)
+
+#define I40IWQPC_ARPIDX_SHIFT 48
+#define I40IWQPC_ARPIDX_MASK (0xfffULL << I40IWQPC_ARPIDX_SHIFT)
+
+#define I40IWQPC_FLOWLABEL_SHIFT 0
+#define I40IWQPC_FLOWLABEL_MASK (0xfffffUL << I40IWQPC_FLOWLABEL_SHIFT)
+
+#define I40IWQPC_WSCALE_SHIFT 20
+#define I40IWQPC_WSCALE_MASK (1UL << I40IWQPC_WSCALE_SHIFT)
+
+#define I40IWQPC_KEEPALIVE_SHIFT 21
+#define I40IWQPC_KEEPALIVE_MASK (1UL << I40IWQPC_KEEPALIVE_SHIFT)
+
+#define I40IWQPC_IGNORE_TCP_OPT_SHIFT 22
+#define I40IWQPC_IGNORE_TCP_OPT_MASK (1UL << I40IWQPC_IGNORE_TCP_OPT_SHIFT)
+
+#define I40IWQPC_IGNORE_TCP_UNS_OPT_SHIFT 23
+#define I40IWQPC_IGNORE_TCP_UNS_OPT_MASK        \
+	(1UL << I40IWQPC_IGNORE_TCP_UNS_OPT_SHIFT)
+
+#define I40IWQPC_TCPSTATE_SHIFT 28
+#define I40IWQPC_TCPSTATE_MASK (0xfUL << I40IWQPC_TCPSTATE_SHIFT)
+
+#define I40IWQPC_RCVSCALE_SHIFT 32
+#define I40IWQPC_RCVSCALE_MASK (0xfULL << I40IWQPC_RCVSCALE_SHIFT)
+
+#define I40IWQPC_SNDSCALE_SHIFT 40
+#define I40IWQPC_SNDSCALE_MASK (0xfULL << I40IWQPC_SNDSCALE_SHIFT)
+
+#define I40IWQPC_PDIDX_SHIFT 48
+#define I40IWQPC_PDIDX_MASK (0x7fffULL << I40IWQPC_PDIDX_SHIFT)
+
+#define I40IWQPC_KALIVE_TIMER_MAX_PROBES_SHIFT 16
+#define I40IWQPC_KALIVE_TIMER_MAX_PROBES_MASK   \
+	(0xffUL << I40IWQPC_KALIVE_TIMER_MAX_PROBES_SHIFT)
+
+#define I40IWQPC_KEEPALIVE_INTERVAL_SHIFT 24
+#define I40IWQPC_KEEPALIVE_INTERVAL_MASK        \
+	(0xffUL << I40IWQPC_KEEPALIVE_INTERVAL_SHIFT)
+
+#define I40IWQPC_TIMESTAMP_RECENT_SHIFT 0
+#define I40IWQPC_TIMESTAMP_RECENT_MASK  \
+	(0xffffffffUL << I40IWQPC_TIMESTAMP_RECENT_SHIFT)
+
+#define I40IWQPC_TIMESTAMP_AGE_SHIFT 32
+#define I40IWQPC_TIMESTAMP_AGE_MASK     \
+	(0xffffffffULL << I40IWQPC_TIMESTAMP_AGE_SHIFT)
+
+#define I40IWQPC_SNDNXT_SHIFT 0
+#define I40IWQPC_SNDNXT_MASK (0xffffffffUL << I40IWQPC_SNDNXT_SHIFT)
+
+#define I40IWQPC_SNDWND_SHIFT 32
+#define I40IWQPC_SNDWND_MASK (0xffffffffULL << I40IWQPC_SNDWND_SHIFT)
+
+#define I40IWQPC_RCVNXT_SHIFT 0
+#define I40IWQPC_RCVNXT_MASK (0xffffffffUL << I40IWQPC_RCVNXT_SHIFT)
+
+#define I40IWQPC_RCVWND_SHIFT 32
+#define I40IWQPC_RCVWND_MASK (0xffffffffULL << I40IWQPC_RCVWND_SHIFT)
+
+#define I40IWQPC_SNDMAX_SHIFT 0
+#define I40IWQPC_SNDMAX_MASK (0xffffffffUL << I40IWQPC_SNDMAX_SHIFT)
+
+#define I40IWQPC_SNDUNA_SHIFT 32
+#define I40IWQPC_SNDUNA_MASK (0xffffffffULL << I40IWQPC_SNDUNA_SHIFT)
+
+#define I40IWQPC_SRTT_SHIFT 0
+#define I40IWQPC_SRTT_MASK (0xffffffffUL << I40IWQPC_SRTT_SHIFT)
+
+#define I40IWQPC_RTTVAR_SHIFT 32
+#define I40IWQPC_RTTVAR_MASK (0xffffffffULL << I40IWQPC_RTTVAR_SHIFT)
+
+#define I40IWQPC_SSTHRESH_SHIFT 0
+#define I40IWQPC_SSTHRESH_MASK (0xffffffffUL << I40IWQPC_SSTHRESH_SHIFT)
+
+#define I40IWQPC_CWND_SHIFT 32
+#define I40IWQPC_CWND_MASK (0xffffffffULL << I40IWQPC_CWND_SHIFT)
+
+#define I40IWQPC_SNDWL1_SHIFT 0
+#define I40IWQPC_SNDWL1_MASK (0xffffffffUL << I40IWQPC_SNDWL1_SHIFT)
+
+#define I40IWQPC_SNDWL2_SHIFT 32
+#define I40IWQPC_SNDWL2_MASK (0xffffffffULL << I40IWQPC_SNDWL2_SHIFT)
+
+#define I40IWQPC_ERR_RQ_IDX_SHIFT 32
+#define I40IWQPC_ERR_RQ_IDX_MASK  (0x3fffULL << I40IWQPC_ERR_RQ_IDX_SHIFT)
+
+#define I40IWQPC_MAXSNDWND_SHIFT 0
+#define I40IWQPC_MAXSNDWND_MASK (0xffffffffUL << I40IWQPC_MAXSNDWND_SHIFT)
+
+#define I40IWQPC_REXMIT_THRESH_SHIFT 48
+#define I40IWQPC_REXMIT_THRESH_MASK (0x3fULL << I40IWQPC_REXMIT_THRESH_SHIFT)
+
+#define I40IWQPC_TXCQNUM_SHIFT 0
+#define I40IWQPC_TXCQNUM_MASK (0x1ffffUL << I40IWQPC_TXCQNUM_SHIFT)
+
+#define I40IWQPC_RXCQNUM_SHIFT 32
+#define I40IWQPC_RXCQNUM_MASK (0x1ffffULL << I40IWQPC_RXCQNUM_SHIFT)
+
+#define I40IWQPC_Q2ADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPC_Q2ADDR_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPC_LASTBYTESENT_SHIFT 0
+#define I40IWQPC_LASTBYTESENT_MASK (0xffUL << I40IWQPC_LASTBYTESENT_SHIFT)
+
+#define I40IWQPC_SRQID_SHIFT 32
+#define I40IWQPC_SRQID_MASK (0xffULL << I40IWQPC_SRQID_SHIFT)
+
+#define I40IWQPC_ORDSIZE_SHIFT 0
+#define I40IWQPC_ORDSIZE_MASK (0x7fUL << I40IWQPC_ORDSIZE_SHIFT)
+
+#define I40IWQPC_IRDSIZE_SHIFT 16
+#define I40IWQPC_IRDSIZE_MASK (0x3UL << I40IWQPC_IRDSIZE_SHIFT)
+
+#define I40IWQPC_WRRDRSPOK_SHIFT 20
+#define I40IWQPC_WRRDRSPOK_MASK (1UL << I40IWQPC_WRRDRSPOK_SHIFT)
+
+#define I40IWQPC_RDOK_SHIFT 21
+#define I40IWQPC_RDOK_MASK (1UL << I40IWQPC_RDOK_SHIFT)
+
+#define I40IWQPC_SNDMARKERS_SHIFT 22
+#define I40IWQPC_SNDMARKERS_MASK (1UL << I40IWQPC_SNDMARKERS_SHIFT)
+
+#define I40IWQPC_BINDEN_SHIFT 23
+#define I40IWQPC_BINDEN_MASK (1UL << I40IWQPC_BINDEN_SHIFT)
+
+#define I40IWQPC_FASTREGEN_SHIFT 24
+#define I40IWQPC_FASTREGEN_MASK (1UL << I40IWQPC_FASTREGEN_SHIFT)
+
+#define I40IWQPC_PRIVEN_SHIFT 25
+#define I40IWQPC_PRIVEN_MASK (1UL << I40IWQPC_PRIVEN_SHIFT)
+
+#define I40IWQPC_LSMMPRESENT_SHIFT 26
+#define I40IWQPC_LSMMPRESENT_MASK (1UL << I40IWQPC_LSMMPRESENT_SHIFT)
+
+#define I40IWQPC_ADJUSTFORLSMM_SHIFT 27
+#define I40IWQPC_ADJUSTFORLSMM_MASK (1UL << I40IWQPC_ADJUSTFORLSMM_SHIFT)
+
+#define I40IWQPC_IWARPMODE_SHIFT 28
+#define I40IWQPC_IWARPMODE_MASK (1UL << I40IWQPC_IWARPMODE_SHIFT)
+
+#define I40IWQPC_RCVMARKERS_SHIFT 29
+#define I40IWQPC_RCVMARKERS_MASK (1UL << I40IWQPC_RCVMARKERS_SHIFT)
+
+#define I40IWQPC_ALIGNHDRS_SHIFT 30
+#define I40IWQPC_ALIGNHDRS_MASK (1UL << I40IWQPC_ALIGNHDRS_SHIFT)
+
+#define I40IWQPC_RCVNOMPACRC_SHIFT 31
+#define I40IWQPC_RCVNOMPACRC_MASK (1UL << I40IWQPC_RCVNOMPACRC_SHIFT)
+
+#define I40IWQPC_RCVMARKOFFSET_SHIFT 33
+#define I40IWQPC_RCVMARKOFFSET_MASK (0x1ffULL << I40IWQPC_RCVMARKOFFSET_SHIFT)
+
+#define I40IWQPC_SNDMARKOFFSET_SHIFT 48
+#define I40IWQPC_SNDMARKOFFSET_MASK (0x1ffULL << I40IWQPC_SNDMARKOFFSET_SHIFT)
+
+#define I40IWQPC_QPCOMPCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPC_QPCOMPCTX_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPC_SQTPHVAL_SHIFT 0
+#define I40IWQPC_SQTPHVAL_MASK (0xffUL << I40IWQPC_SQTPHVAL_SHIFT)
+
+#define I40IWQPC_RQTPHVAL_SHIFT 8
+#define I40IWQPC_RQTPHVAL_MASK (0xffUL << I40IWQPC_RQTPHVAL_SHIFT)
+
+#define I40IWQPC_QSHANDLE_SHIFT 16
+#define I40IWQPC_QSHANDLE_MASK (0x3ffUL << I40IWQPC_QSHANDLE_SHIFT)
+
+#define I40IWQPC_EXCEPTION_LAN_QUEUE_SHIFT 32
+#define I40IWQPC_EXCEPTION_LAN_QUEUE_MASK (0xfffULL <<  \
+					   I40IWQPC_EXCEPTION_LAN_QUEUE_SHIFT)
+
+#define I40IWQPC_LOCAL_IPADDR3_SHIFT 0
+#define I40IWQPC_LOCAL_IPADDR3_MASK \
+	(0xffffffffUL << I40IWQPC_LOCAL_IPADDR3_SHIFT)
+
+#define I40IWQPC_LOCAL_IPADDR2_SHIFT 32
+#define I40IWQPC_LOCAL_IPADDR2_MASK     \
+	(0xffffffffULL << I40IWQPC_LOCAL_IPADDR2_SHIFT)
+
+#define I40IWQPC_LOCAL_IPADDR1_SHIFT 0
+#define I40IWQPC_LOCAL_IPADDR1_MASK     \
+	(0xffffffffUL << I40IWQPC_LOCAL_IPADDR1_SHIFT)
+
+#define I40IWQPC_LOCAL_IPADDR0_SHIFT 32
+#define I40IWQPC_LOCAL_IPADDR0_MASK     \
+	(0xffffffffULL << I40IWQPC_LOCAL_IPADDR0_SHIFT)
+
+/* wqe size considering 32 bytes per wqe*/
+#define I40IWQP_SW_MIN_WQSIZE 4		/* 128 bytes */
+#define I40IWQP_SW_MAX_WQSIZE 16384	/* 524288 bytes */
+
+#define I40IWQP_OP_RDMA_WRITE 0
+#define I40IWQP_OP_RDMA_READ 1
+#define I40IWQP_OP_RDMA_SEND 3
+#define I40IWQP_OP_RDMA_SEND_INV 4
+#define I40IWQP_OP_RDMA_SEND_SOL_EVENT 5
+#define I40IWQP_OP_RDMA_SEND_SOL_EVENT_INV 6
+#define I40IWQP_OP_BIND_MW 8
+#define I40IWQP_OP_FAST_REGISTER 9
+#define I40IWQP_OP_LOCAL_INVALIDATE 10
+#define I40IWQP_OP_RDMA_READ_LOC_INV 11
+#define I40IWQP_OP_NOP 12
+
+#define I40IW_RSVD_SHIFT        41
+#define I40IW_RSVD_MASK (0x7fffULL << I40IW_RSVD_SHIFT)
+
+/* iwarp QP SQ WQE common fields */
+#define I40IWQPSQ_OPCODE_SHIFT 32
+#define I40IWQPSQ_OPCODE_MASK (0x3fULL << I40IWQPSQ_OPCODE_SHIFT)
+
+#define I40IWQPSQ_ADDFRAGCNT_SHIFT 38
+#define I40IWQPSQ_ADDFRAGCNT_MASK (0x7ULL << I40IWQPSQ_ADDFRAGCNT_SHIFT)
+
+#define I40IWQPSQ_PUSHWQE_SHIFT 56
+#define I40IWQPSQ_PUSHWQE_MASK (1ULL << I40IWQPSQ_PUSHWQE_SHIFT)
+
+#define I40IWQPSQ_STREAMMODE_SHIFT 58
+#define I40IWQPSQ_STREAMMODE_MASK (1ULL << I40IWQPSQ_STREAMMODE_SHIFT)
+
+#define I40IWQPSQ_WAITFORRCVPDU_SHIFT 59
+#define I40IWQPSQ_WAITFORRCVPDU_MASK (1ULL << I40IWQPSQ_WAITFORRCVPDU_SHIFT)
+
+#define I40IWQPSQ_READFENCE_SHIFT 60
+#define I40IWQPSQ_READFENCE_MASK (1ULL << I40IWQPSQ_READFENCE_SHIFT)
+
+#define I40IWQPSQ_LOCALFENCE_SHIFT 61
+#define I40IWQPSQ_LOCALFENCE_MASK (1ULL << I40IWQPSQ_LOCALFENCE_SHIFT)
+
+#define I40IWQPSQ_SIGCOMPL_SHIFT 62
+#define I40IWQPSQ_SIGCOMPL_MASK (1ULL << I40IWQPSQ_SIGCOMPL_SHIFT)
+
+#define I40IWQPSQ_VALID_SHIFT 63
+#define I40IWQPSQ_VALID_MASK (1ULL << I40IWQPSQ_VALID_SHIFT)
+
+#define I40IWQPSQ_FRAG_TO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPSQ_FRAG_TO_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPSQ_FRAG_LEN_SHIFT 0
+#define I40IWQPSQ_FRAG_LEN_MASK (0xffffffffUL << I40IWQPSQ_FRAG_LEN_SHIFT)
+
+#define I40IWQPSQ_FRAG_STAG_SHIFT 32
+#define I40IWQPSQ_FRAG_STAG_MASK (0xffffffffULL << I40IWQPSQ_FRAG_STAG_SHIFT)
+
+#define I40IWQPSQ_REMSTAGINV_SHIFT 0
+#define I40IWQPSQ_REMSTAGINV_MASK (0xffffffffUL << I40IWQPSQ_REMSTAGINV_SHIFT)
+
+#define I40IWQPSQ_INLINEDATAFLAG_SHIFT 57
+#define I40IWQPSQ_INLINEDATAFLAG_MASK (1ULL << I40IWQPSQ_INLINEDATAFLAG_SHIFT)
+
+#define I40IWQPSQ_INLINEDATALEN_SHIFT 48
+#define I40IWQPSQ_INLINEDATALEN_MASK    \
+	(0x7fULL << I40IWQPSQ_INLINEDATALEN_SHIFT)
+
+/* iwarp send with push mode */
+#define I40IWQPSQ_WQDESCIDX_SHIFT 0
+#define I40IWQPSQ_WQDESCIDX_MASK (0x3fffUL << I40IWQPSQ_WQDESCIDX_SHIFT)
+
+/* rdma write */
+#define I40IWQPSQ_REMSTAG_SHIFT 0
+#define I40IWQPSQ_REMSTAG_MASK (0xffffffffUL << I40IWQPSQ_REMSTAG_SHIFT)
+
+#define I40IWQPSQ_REMTO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPSQ_REMTO_MASK I40IW_CQPHC_QPCTX_MASK
+
+/* memory window */
+#define I40IWQPSQ_STAGRIGHTS_SHIFT 48
+#define I40IWQPSQ_STAGRIGHTS_MASK (0x1fULL << I40IWQPSQ_STAGRIGHTS_SHIFT)
+
+#define I40IWQPSQ_VABASEDTO_SHIFT 53
+#define I40IWQPSQ_VABASEDTO_MASK (1ULL << I40IWQPSQ_VABASEDTO_SHIFT)
+
+#define I40IWQPSQ_MWLEN_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPSQ_MWLEN_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPSQ_PARENTMRSTAG_SHIFT 0
+#define I40IWQPSQ_PARENTMRSTAG_MASK \
+	(0xffffffffUL << I40IWQPSQ_PARENTMRSTAG_SHIFT)
+
+#define I40IWQPSQ_MWSTAG_SHIFT 32
+#define I40IWQPSQ_MWSTAG_MASK (0xffffffffULL << I40IWQPSQ_MWSTAG_SHIFT)
+
+#define I40IWQPSQ_BASEVA_TO_FBO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPSQ_BASEVA_TO_FBO_MASK I40IW_CQPHC_QPCTX_MASK
+
+/* Local Invalidate */
+#define I40IWQPSQ_LOCSTAG_SHIFT 32
+#define I40IWQPSQ_LOCSTAG_MASK (0xffffffffULL << I40IWQPSQ_LOCSTAG_SHIFT)
+
+/* Fast Register */
+#define I40IWQPSQ_STAGKEY_SHIFT 0
+#define I40IWQPSQ_STAGKEY_MASK (0xffUL << I40IWQPSQ_STAGKEY_SHIFT)
+
+#define I40IWQPSQ_STAGINDEX_SHIFT 8
+#define I40IWQPSQ_STAGINDEX_MASK (0xffffffUL << I40IWQPSQ_STAGINDEX_SHIFT)
+
+#define I40IWQPSQ_COPYHOSTPBLS_SHIFT 43
+#define I40IWQPSQ_COPYHOSTPBLS_MASK (1ULL << I40IWQPSQ_COPYHOSTPBLS_SHIFT)
+
+#define I40IWQPSQ_LPBLSIZE_SHIFT 44
+#define I40IWQPSQ_LPBLSIZE_MASK (3ULL << I40IWQPSQ_LPBLSIZE_SHIFT)
+
+#define I40IWQPSQ_HPAGESIZE_SHIFT 46
+#define I40IWQPSQ_HPAGESIZE_MASK (3ULL << I40IWQPSQ_HPAGESIZE_SHIFT)
+
+#define I40IWQPSQ_STAGLEN_SHIFT 0
+#define I40IWQPSQ_STAGLEN_MASK (0x1ffffffffffULL << I40IWQPSQ_STAGLEN_SHIFT)
+
+#define I40IWQPSQ_FIRSTPMPBLIDXLO_SHIFT 48
+#define I40IWQPSQ_FIRSTPMPBLIDXLO_MASK  \
+	(0xffffULL << I40IWQPSQ_FIRSTPMPBLIDXLO_SHIFT)
+
+#define I40IWQPSQ_FIRSTPMPBLIDXHI_SHIFT 0
+#define I40IWQPSQ_FIRSTPMPBLIDXHI_MASK  \
+	(0xfffUL << I40IWQPSQ_FIRSTPMPBLIDXHI_SHIFT)
+
+#define I40IWQPSQ_PBLADDR_SHIFT 12
+#define I40IWQPSQ_PBLADDR_MASK (0xfffffffffffffULL << I40IWQPSQ_PBLADDR_SHIFT)
+
+/*  iwarp QP RQ WQE common fields */
+#define I40IWQPRQ_ADDFRAGCNT_SHIFT I40IWQPSQ_ADDFRAGCNT_SHIFT
+#define I40IWQPRQ_ADDFRAGCNT_MASK I40IWQPSQ_ADDFRAGCNT_MASK
+
+#define I40IWQPRQ_VALID_SHIFT I40IWQPSQ_VALID_SHIFT
+#define I40IWQPRQ_VALID_MASK I40IWQPSQ_VALID_MASK
+
+#define I40IWQPRQ_COMPLCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPRQ_COMPLCTX_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPRQ_FRAG_LEN_SHIFT I40IWQPSQ_FRAG_LEN_SHIFT
+#define I40IWQPRQ_FRAG_LEN_MASK I40IWQPSQ_FRAG_LEN_MASK
+
+#define I40IWQPRQ_STAG_SHIFT I40IWQPSQ_FRAG_STAG_SHIFT
+#define I40IWQPRQ_STAG_MASK I40IWQPSQ_FRAG_STAG_MASK
+
+#define I40IWQPRQ_TO_SHIFT I40IWQPSQ_FRAG_TO_SHIFT
+#define I40IWQPRQ_TO_MASK I40IWQPSQ_FRAG_TO_MASK
+
+/* Query FPM CQP buf */
+#define I40IW_QUERY_FPM_MAX_QPS_SHIFT 0
+#define I40IW_QUERY_FPM_MAX_QPS_MASK               \
+	(0x7ffffUL << I40IW_QUERY_FPM_MAX_QPS_SHIFT)
+
+#define I40IW_QUERY_FPM_MAX_CQS_SHIFT 0
+#define I40IW_QUERY_FPM_MAX_CQS_MASK               \
+	(0x3ffffUL << I40IW_QUERY_FPM_MAX_CQS_SHIFT)
+
+#define I40IW_QUERY_FPM_FIRST_PE_SD_INDEX_SHIFT 0
+#define I40IW_QUERY_FPM_FIRST_PE_SD_INDEX_MASK  \
+	(0x3fffUL << I40IW_QUERY_FPM_FIRST_PE_SD_INDEX_SHIFT)
+
+#define I40IW_QUERY_FPM_MAX_PE_SDS_SHIFT 32
+#define I40IW_QUERY_FPM_MAX_PE_SDS_MASK \
+	(0x3fffULL << I40IW_QUERY_FPM_MAX_PE_SDS_SHIFT)
+
+#define I40IW_QUERY_FPM_MAX_QPS_SHIFT 0
+#define I40IW_QUERY_FPM_MAX_QPS_MASK    \
+	(0x7ffffUL << I40IW_QUERY_FPM_MAX_QPS_SHIFT)
+
+#define I40IW_QUERY_FPM_MAX_CQS_SHIFT 0
+#define I40IW_QUERY_FPM_MAX_CQS_MASK    \
+	(0x3ffffUL << I40IW_QUERY_FPM_MAX_CQS_SHIFT)
+
+#define I40IW_QUERY_FPM_MAX_CEQS_SHIFT 0
+#define I40IW_QUERY_FPM_MAX_CEQS_MASK   \
+	(0xffUL << I40IW_QUERY_FPM_MAX_CEQS_SHIFT)
+
+#define I40IW_QUERY_FPM_XFBLOCKSIZE_SHIFT 32
+#define I40IW_QUERY_FPM_XFBLOCKSIZE_MASK        \
+	(0xffffffffULL << I40IW_QUERY_FPM_XFBLOCKSIZE_SHIFT)
+
+#define I40IW_QUERY_FPM_Q1BLOCKSIZE_SHIFT 32
+#define I40IW_QUERY_FPM_Q1BLOCKSIZE_MASK        \
+	(0xffffffffULL << I40IW_QUERY_FPM_Q1BLOCKSIZE_SHIFT)
+
+#define I40IW_QUERY_FPM_HTMULTIPLIER_SHIFT 16
+#define I40IW_QUERY_FPM_HTMULTIPLIER_MASK       \
+	(0xfUL << I40IW_QUERY_FPM_HTMULTIPLIER_SHIFT)
+
+#define I40IW_QUERY_FPM_TIMERBUCKET_SHIFT 32
+#define I40IW_QUERY_FPM_TIMERBUCKET_MASK        \
+	(0xffFFULL << I40IW_QUERY_FPM_TIMERBUCKET_SHIFT)
+
+/* Static HMC pages allocated buf */
+#define I40IW_SHMC_PAGE_ALLOCATED_HMC_FN_ID_SHIFT 0
+#define I40IW_SHMC_PAGE_ALLOCATED_HMC_FN_ID_MASK        \
+	(0x3fUL << I40IW_SHMC_PAGE_ALLOCATED_HMC_FN_ID_SHIFT)
+
+#define I40IW_HW_PAGE_SIZE	4096
+#define I40IW_DONE_COUNT	1000
+#define I40IW_SLEEP_COUNT	10
+
+enum {
+	I40IW_QUEUES_ALIGNMENT_MASK =		(128 - 1),
+	I40IW_AEQ_ALIGNMENT_MASK =		(256 - 1),
+	I40IW_Q2_ALIGNMENT_MASK =		(256 - 1),
+	I40IW_CEQ_ALIGNMENT_MASK =		(256 - 1),
+	I40IW_CQ0_ALIGNMENT_MASK =		(256 - 1),
+	I40IW_HOST_CTX_ALIGNMENT_MASK =		(4 - 1),
+	I40IW_SHADOWAREA_MASK =			(128 - 1),
+	I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK =	0,
+	I40IW_FPM_COMMIT_BUF_ALIGNMENT_MASK =	0
+};
+
+enum i40iw_alignment {
+	I40IW_CQP_ALIGNMENT =		0x200,
+	I40IW_AEQ_ALIGNMENT =		0x100,
+	I40IW_CEQ_ALIGNMENT =		0x100,
+	I40IW_CQ0_ALIGNMENT =		0x100,
+	I40IW_SD_BUF_ALIGNMENT =	0x100
+};
+
+#define I40IW_QP_WQE_MIN_SIZE	32
+#define I40IW_QP_WQE_MAX_SIZE	128
+
+#define I40IW_CQE_QTYPE_RQ 0
+#define I40IW_CQE_QTYPE_SQ 1
+
+#define I40IW_RING_INIT(_ring, _size) \
+	{ \
+		(_ring).head = 0; \
+		(_ring).tail = 0; \
+		(_ring).size = (_size); \
+	}
+#define I40IW_RING_GETSIZE(_ring) ((_ring).size)
+#define I40IW_RING_GETCURRENT_HEAD(_ring) ((_ring).head)
+#define I40IW_RING_GETCURRENT_TAIL(_ring) ((_ring).tail)
+
+#define I40IW_RING_MOVE_HEAD(_ring, _retcode) \
+	{ \
+		register u32 size; \
+		size = (_ring).size;  \
+		if (!I40IW_RING_FULL_ERR(_ring)) { \
+			(_ring).head = ((_ring).head + 1) % size; \
+			(_retcode) = 0; \
+		} else { \
+			(_retcode) = I40IW_ERR_RING_FULL; \
+		} \
+	}
+
+#define I40IW_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \
+	{ \
+		register u32 size; \
+		size = (_ring).size; \
+		if ((I40IW_RING_WORK_AVAILABLE(_ring) + (_count)) < size) { \
+			(_ring).head = ((_ring).head + (_count)) % size; \
+			(_retcode) = 0; \
+		} else { \
+			(_retcode) = I40IW_ERR_RING_FULL; \
+		} \
+	}
+
+#define I40IW_RING_MOVE_TAIL(_ring) \
+	(_ring).tail = ((_ring).tail + 1) % (_ring).size
+
+#define I40IW_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \
+	(_ring).tail = ((_ring).tail + (_count)) % (_ring).size
+
+#define I40IW_RING_SET_TAIL(_ring, _pos) \
+	(_ring).tail = (_pos) % (_ring).size
+
+#define I40IW_RING_FULL_ERR(_ring) \
+	( \
+		(I40IW_RING_WORK_AVAILABLE(_ring) == ((_ring).size - 1))  \
+	)
+
+#define I40IW_ERR_RING_FULL2(_ring) \
+	( \
+		(I40IW_RING_WORK_AVAILABLE(_ring) == ((_ring).size - 2))  \
+	)
+
+#define I40IW_ERR_RING_FULL3(_ring) \
+	( \
+		(I40IW_RING_WORK_AVAILABLE(_ring) == ((_ring).size - 3))  \
+	)
+
+#define I40IW_RING_MORE_WORK(_ring) \
+	( \
+		(I40IW_RING_WORK_AVAILABLE(_ring) != 0) \
+	)
+
+#define I40IW_RING_WORK_AVAILABLE(_ring) \
+	( \
+		(((_ring).head + (_ring).size - (_ring).tail) % (_ring).size) \
+	)
+
+#define I40IW_RING_GET_WQES_AVAILABLE(_ring) \
+	( \
+		((_ring).size - I40IW_RING_WORK_AVAILABLE(_ring) - 1) \
+	)
+
+#define I40IW_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \
+	{ \
+		index = I40IW_RING_GETCURRENT_HEAD(_ring); \
+		I40IW_RING_MOVE_HEAD(_ring, _retcode); \
+	}
+
+/* Async Events codes */
+#define I40IW_AE_AMP_UNALLOCATED_STAG                                   0x0102
+#define I40IW_AE_AMP_INVALID_STAG                                       0x0103
+#define I40IW_AE_AMP_BAD_QP                                             0x0104
+#define I40IW_AE_AMP_BAD_PD                                             0x0105
+#define I40IW_AE_AMP_BAD_STAG_KEY                                       0x0106
+#define I40IW_AE_AMP_BAD_STAG_INDEX                                     0x0107
+#define I40IW_AE_AMP_BOUNDS_VIOLATION                                   0x0108
+#define I40IW_AE_AMP_RIGHTS_VIOLATION                                   0x0109
+#define I40IW_AE_AMP_TO_WRAP                                            0x010a
+#define I40IW_AE_AMP_FASTREG_SHARED                                     0x010b
+#define I40IW_AE_AMP_FASTREG_VALID_STAG                                 0x010c
+#define I40IW_AE_AMP_FASTREG_MW_STAG                                    0x010d
+#define I40IW_AE_AMP_FASTREG_INVALID_RIGHTS                             0x010e
+#define I40IW_AE_AMP_FASTREG_PBL_TABLE_OVERFLOW                         0x010f
+#define I40IW_AE_AMP_FASTREG_INVALID_LENGTH                             0x0110
+#define I40IW_AE_AMP_INVALIDATE_SHARED                                  0x0111
+#define I40IW_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS                 0x0112
+#define I40IW_AE_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS                   0x0113
+#define I40IW_AE_AMP_MWBIND_VALID_STAG                                  0x0114
+#define I40IW_AE_AMP_MWBIND_OF_MR_STAG                                  0x0115
+#define I40IW_AE_AMP_MWBIND_TO_ZERO_BASED_STAG                          0x0116
+#define I40IW_AE_AMP_MWBIND_TO_MW_STAG                                  0x0117
+#define I40IW_AE_AMP_MWBIND_INVALID_RIGHTS                              0x0118
+#define I40IW_AE_AMP_MWBIND_INVALID_BOUNDS                              0x0119
+#define I40IW_AE_AMP_MWBIND_TO_INVALID_PARENT                           0x011a
+#define I40IW_AE_AMP_MWBIND_BIND_DISABLED                               0x011b
+#define I40IW_AE_AMP_WQE_INVALID_PARAMETER                              0x0130
+#define I40IW_AE_BAD_CLOSE                                              0x0201
+#define I40IW_AE_RDMAP_ROE_BAD_LLP_CLOSE                                0x0202
+#define I40IW_AE_CQ_OPERATION_ERROR                                     0x0203
+#define I40IW_AE_PRIV_OPERATION_DENIED                                  0x011c
+#define I40IW_AE_RDMA_READ_WHILE_ORD_ZERO                               0x0205
+#define I40IW_AE_STAG_ZERO_INVALID                                      0x0206
+#define I40IW_AE_IB_RREQ_AND_Q1_FULL                                    0x0207
+#define I40IW_AE_SRQ_LIMIT                                              0x0209
+#define I40IW_AE_WQE_UNEXPECTED_OPCODE                                  0x020a
+#define I40IW_AE_WQE_INVALID_PARAMETER                                  0x020b
+#define I40IW_AE_WQE_LSMM_TOO_LONG                                      0x0220
+#define I40IW_AE_DDP_INVALID_MSN_GAP_IN_MSN                             0x0301
+#define I40IW_AE_DDP_INVALID_MSN_RANGE_IS_NOT_VALID                     0x0302
+#define I40IW_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER      0x0303
+#define I40IW_AE_DDP_UBE_INVALID_DDP_VERSION                            0x0304
+#define I40IW_AE_DDP_UBE_INVALID_MO                                     0x0305
+#define I40IW_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE                0x0306
+#define I40IW_AE_DDP_UBE_INVALID_QN                                     0x0307
+#define I40IW_AE_DDP_NO_L_BIT                                           0x0308
+#define I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION                        0x0311
+#define I40IW_AE_RDMAP_ROE_UNEXPECTED_OPCODE                            0x0312
+#define I40IW_AE_ROE_INVALID_RDMA_READ_REQUEST                          0x0313
+#define I40IW_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP                    0x0314
+#define I40IW_AE_INVALID_ARP_ENTRY                                      0x0401
+#define I40IW_AE_INVALID_TCP_OPTION_RCVD                                0x0402
+#define I40IW_AE_STALE_ARP_ENTRY                                        0x0403
+#define I40IW_AE_INVALID_WQE_LENGTH                                     0x0404
+#define I40IW_AE_INVALID_MAC_ENTRY                                      0x0405
+#define I40IW_AE_LLP_CLOSE_COMPLETE                                     0x0501
+#define I40IW_AE_LLP_CONNECTION_RESET                                   0x0502
+#define I40IW_AE_LLP_FIN_RECEIVED                                       0x0503
+#define I40IW_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH       0x0504
+#define I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR                             0x0505
+#define I40IW_AE_LLP_SEGMENT_TOO_LARGE                                  0x0506
+#define I40IW_AE_LLP_SEGMENT_TOO_SMALL                                  0x0507
+#define I40IW_AE_LLP_SYN_RECEIVED                                       0x0508
+#define I40IW_AE_LLP_TERMINATE_RECEIVED                                 0x0509
+#define I40IW_AE_LLP_TOO_MANY_RETRIES                                   0x050a
+#define I40IW_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES                         0x050b
+#define I40IW_AE_LLP_DOUBT_REACHABILITY                                 0x050c
+#define I40IW_AE_LLP_RX_VLAN_MISMATCH                                   0x050d
+#define I40IW_AE_RESOURCE_EXHAUSTION                                    0x0520
+#define I40IW_AE_RESET_SENT                                             0x0601
+#define I40IW_AE_TERMINATE_SENT                                         0x0602
+#define I40IW_AE_RESET_NOT_SENT                                         0x0603
+#define I40IW_AE_LCE_QP_CATASTROPHIC                                    0x0700
+#define I40IW_AE_LCE_FUNCTION_CATASTROPHIC                              0x0701
+#define I40IW_AE_LCE_CQ_CATASTROPHIC                                    0x0702
+#define I40IW_AE_UDA_XMIT_FRAG_SEQ                                      0x0800
+#define I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG                                0x0801
+#define I40IW_AE_UDA_XMIT_IPADDR_MISMATCH                               0x0802
+#define I40IW_AE_QP_SUSPEND_COMPLETE                                    0x0900
+
+#define OP_DELETE_LOCAL_MAC_IPADDR_ENTRY        1
+#define OP_CEQ_DESTROY                          2
+#define OP_AEQ_DESTROY                          3
+#define OP_DELETE_ARP_CACHE_ENTRY               4
+#define OP_MANAGE_APBVT_ENTRY                   5
+#define OP_CEQ_CREATE                           6
+#define OP_AEQ_CREATE                           7
+#define OP_ALLOC_LOCAL_MAC_IPADDR_ENTRY         8
+#define OP_ADD_LOCAL_MAC_IPADDR_ENTRY           9
+#define OP_MANAGE_QHASH_TABLE_ENTRY             10
+#define OP_QP_MODIFY                            11
+#define OP_QP_UPLOAD_CONTEXT                    12
+#define OP_CQ_CREATE                            13
+#define OP_CQ_DESTROY                           14
+#define OP_QP_CREATE                            15
+#define OP_QP_DESTROY                           16
+#define OP_ALLOC_STAG                           17
+#define OP_MR_REG_NON_SHARED                    18
+#define OP_DEALLOC_STAG                         19
+#define OP_MW_ALLOC                             20
+#define OP_QP_FLUSH_WQES                        21
+#define OP_ADD_ARP_CACHE_ENTRY                  22
+#define OP_MANAGE_PUSH_PAGE                     23
+#define OP_UPDATE_PE_SDS                        24
+#define OP_MANAGE_HMC_PM_FUNC_TABLE             25
+#define OP_SUSPEND                              26
+#define OP_RESUME                               27
+#define OP_MANAGE_VF_PBLE_BP                    28
+#define OP_QUERY_FPM_VALUES                     29
+#define OP_COMMIT_FPM_VALUES                    30
+#define OP_SIZE_CQP_STAT_ARRAY                  31
+
+#endif

diff --git a/drivers/infiniband/hw/i40iw/i40iw_hmc.c b/drivers/infiniband/hw/i40iw/i40iw_hmc.c
new file mode 100644
index 0000000..5484cbf
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_hmc.c

@@ -0,0 +1,821 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_osdep.h"
+#include "i40iw_register.h"
+#include "i40iw_status.h"
+#include "i40iw_hmc.h"
+#include "i40iw_d.h"
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+#include "i40iw_vf.h"
+#include "i40iw_virtchnl.h"
+
+/**
+ * i40iw_find_sd_index_limit - finds segment descriptor index limit
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @type: type of HMC resources we're searching
+ * @index: starting index for the object
+ * @cnt: number of objects we're trying to create
+ * @sd_idx: pointer to return index of the segment descriptor in question
+ * @sd_limit: pointer to return the maximum number of segment descriptors
+ *
+ * This function calculates the segment descriptor index and index limit
+ * for the resource defined by i40iw_hmc_rsrc_type.
+ */
+
+static inline void i40iw_find_sd_index_limit(struct i40iw_hmc_info *hmc_info,
+					     u32 type,
+					     u32 idx,
+					     u32 cnt,
+					     u32 *sd_idx,
+					     u32 *sd_limit)
+{
+	u64 fpm_addr, fpm_limit;
+
+	fpm_addr = hmc_info->hmc_obj[(type)].base +
+			hmc_info->hmc_obj[type].size * idx;
+	fpm_limit = fpm_addr + hmc_info->hmc_obj[type].size * cnt;
+	*sd_idx = (u32)(fpm_addr / I40IW_HMC_DIRECT_BP_SIZE);
+	*sd_limit = (u32)((fpm_limit - 1) / I40IW_HMC_DIRECT_BP_SIZE);
+	*sd_limit += 1;
+}
+
+/**
+ * i40iw_find_pd_index_limit - finds page descriptor index limit
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @type: HMC resource type we're examining
+ * @idx: starting index for the object
+ * @cnt: number of objects we're trying to create
+ * @pd_index: pointer to return page descriptor index
+ * @pd_limit: pointer to return page descriptor index limit
+ *
+ * Calculates the page descriptor index and index limit for the resource
+ * defined by i40iw_hmc_rsrc_type.
+ */
+
+static inline void i40iw_find_pd_index_limit(struct i40iw_hmc_info *hmc_info,
+					     u32 type,
+					     u32 idx,
+					     u32 cnt,
+					     u32 *pd_idx,
+					     u32 *pd_limit)
+{
+	u64 fpm_adr, fpm_limit;
+
+	fpm_adr = hmc_info->hmc_obj[type].base +
+			hmc_info->hmc_obj[type].size * idx;
+	fpm_limit = fpm_adr + (hmc_info)->hmc_obj[(type)].size * (cnt);
+	*(pd_idx) = (u32)(fpm_adr / I40IW_HMC_PAGED_BP_SIZE);
+	*(pd_limit) = (u32)((fpm_limit - 1) / I40IW_HMC_PAGED_BP_SIZE);
+	*(pd_limit) += 1;
+}
+
+/**
+ * i40iw_set_sd_entry - setup entry for sd programming
+ * @pa: physical addr
+ * @idx: sd index
+ * @type: paged or direct sd
+ * @entry: sd entry ptr
+ */
+static inline void i40iw_set_sd_entry(u64 pa,
+				      u32 idx,
+				      enum i40iw_sd_entry_type type,
+				      struct update_sd_entry *entry)
+{
+	entry->data = pa | (I40IW_HMC_MAX_BP_COUNT << I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT) |
+			(((type == I40IW_SD_TYPE_PAGED) ? 0 : 1) <<
+				I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT) |
+			(1 << I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT);
+	entry->cmd = (idx | (1 << I40E_PFHMC_SDCMD_PMSDWR_SHIFT) | (1 << 15));
+}
+
+/**
+ * i40iw_clr_sd_entry - setup entry for sd clear
+ * @idx: sd index
+ * @type: paged or direct sd
+ * @entry: sd entry ptr
+ */
+static inline void i40iw_clr_sd_entry(u32 idx, enum i40iw_sd_entry_type type,
+				      struct update_sd_entry *entry)
+{
+	entry->data = (I40IW_HMC_MAX_BP_COUNT <<
+			I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT) |
+			(((type == I40IW_SD_TYPE_PAGED) ? 0 : 1) <<
+				I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT);
+	entry->cmd = (idx | (1 << I40E_PFHMC_SDCMD_PMSDWR_SHIFT) | (1 << 15));
+}
+
+/**
+ * i40iw_hmc_sd_one - setup 1 sd entry for cqp
+ * @dev: pointer to the device structure
+ * @hmc_fn_id: hmc's function id
+ * @pa: physical addr
+ * @sd_idx: sd index
+ * @type: paged or direct sd
+ * @setsd: flag to set or clear sd
+ */
+enum i40iw_status_code i40iw_hmc_sd_one(struct i40iw_sc_dev *dev,
+					u8 hmc_fn_id,
+					u64 pa, u32 sd_idx,
+					enum i40iw_sd_entry_type type,
+					bool setsd)
+{
+	struct i40iw_update_sds_info sdinfo;
+
+	sdinfo.cnt = 1;
+	sdinfo.hmc_fn_id = hmc_fn_id;
+	if (setsd)
+		i40iw_set_sd_entry(pa, sd_idx, type, sdinfo.entry);
+	else
+		i40iw_clr_sd_entry(sd_idx, type, sdinfo.entry);
+
+	return dev->cqp->process_cqp_sds(dev, &sdinfo);
+}
+
+/**
+ * i40iw_hmc_sd_grp - setup group od sd entries for cqp
+ * @dev: pointer to the device structure
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @sd_index: sd index
+ * @sd_cnt: number of sd entries
+ * @setsd: flag to set or clear sd
+ */
+static enum i40iw_status_code i40iw_hmc_sd_grp(struct i40iw_sc_dev *dev,
+					       struct i40iw_hmc_info *hmc_info,
+					       u32 sd_index,
+					       u32 sd_cnt,
+					       bool setsd)
+{
+	struct i40iw_hmc_sd_entry *sd_entry;
+	struct i40iw_update_sds_info sdinfo;
+	u64 pa;
+	u32 i;
+	enum i40iw_status_code ret_code = 0;
+
+	memset(&sdinfo, 0, sizeof(sdinfo));
+	sdinfo.hmc_fn_id = hmc_info->hmc_fn_id;
+	for (i = sd_index; i < sd_index + sd_cnt; i++) {
+		sd_entry = &hmc_info->sd_table.sd_entry[i];
+		if (!sd_entry ||
+		    (!sd_entry->valid && setsd) ||
+		    (sd_entry->valid && !setsd))
+			continue;
+		if (setsd) {
+			pa = (sd_entry->entry_type == I40IW_SD_TYPE_PAGED) ?
+			    sd_entry->u.pd_table.pd_page_addr.pa :
+			    sd_entry->u.bp.addr.pa;
+			i40iw_set_sd_entry(pa, i, sd_entry->entry_type,
+					   &sdinfo.entry[sdinfo.cnt]);
+		} else {
+			i40iw_clr_sd_entry(i, sd_entry->entry_type,
+					   &sdinfo.entry[sdinfo.cnt]);
+		}
+		sdinfo.cnt++;
+		if (sdinfo.cnt == I40IW_MAX_SD_ENTRIES) {
+			ret_code = dev->cqp->process_cqp_sds(dev, &sdinfo);
+			if (ret_code) {
+				i40iw_debug(dev, I40IW_DEBUG_HMC,
+					    "i40iw_hmc_sd_grp: sd_programming failed err=%d\n",
+					    ret_code);
+				return ret_code;
+			}
+			sdinfo.cnt = 0;
+		}
+	}
+	if (sdinfo.cnt)
+		ret_code = dev->cqp->process_cqp_sds(dev, &sdinfo);
+
+	return ret_code;
+}
+
+/**
+ * i40iw_vfdev_from_fpm - return vf dev ptr for hmc function id
+ * @dev: pointer to the device structure
+ * @hmc_fn_id: hmc's function id
+ */
+struct i40iw_vfdev *i40iw_vfdev_from_fpm(struct i40iw_sc_dev *dev, u8 hmc_fn_id)
+{
+	struct i40iw_vfdev *vf_dev = NULL;
+	u16 idx;
+
+	for (idx = 0; idx < I40IW_MAX_PE_ENABLED_VF_COUNT; idx++) {
+		if (dev->vf_dev[idx] &&
+		    ((u8)dev->vf_dev[idx]->pmf_index == hmc_fn_id)) {
+			vf_dev = dev->vf_dev[idx];
+			break;
+		}
+	}
+	return vf_dev;
+}
+
+/**
+ * i40iw_vf_hmcinfo_from_fpm - get ptr to hmc for func_id
+ * @dev: pointer to the device structure
+ * @hmc_fn_id: hmc's function id
+ */
+struct i40iw_hmc_info *i40iw_vf_hmcinfo_from_fpm(struct i40iw_sc_dev *dev,
+						 u8 hmc_fn_id)
+{
+	struct i40iw_hmc_info *hmc_info = NULL;
+	u16 idx;
+
+	for (idx = 0; idx < I40IW_MAX_PE_ENABLED_VF_COUNT; idx++) {
+		if (dev->vf_dev[idx] &&
+		    ((u8)dev->vf_dev[idx]->pmf_index == hmc_fn_id)) {
+			hmc_info = &dev->vf_dev[idx]->hmc_info;
+			break;
+		}
+	}
+	return hmc_info;
+}
+
+/**
+ * i40iw_hmc_finish_add_sd_reg - program sd entries for objects
+ * @dev: pointer to the device structure
+ * @info: create obj info
+ */
+static enum i40iw_status_code i40iw_hmc_finish_add_sd_reg(struct i40iw_sc_dev *dev,
+							  struct i40iw_hmc_create_obj_info *info)
+{
+	if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt)
+		return I40IW_ERR_INVALID_HMC_OBJ_INDEX;
+
+	if ((info->start_idx + info->count) >
+			info->hmc_info->hmc_obj[info->rsrc_type].cnt)
+		return I40IW_ERR_INVALID_HMC_OBJ_COUNT;
+
+	if (!info->add_sd_cnt)
+		return 0;
+
+	return i40iw_hmc_sd_grp(dev, info->hmc_info,
+				info->hmc_info->sd_indexes[0],
+				info->add_sd_cnt, true);
+}
+
+/**
+ * i40iw_create_iw_hmc_obj - allocate backing store for hmc objects
+ * @dev: pointer to the device structure
+ * @info: pointer to i40iw_hmc_iw_create_obj_info struct
+ *
+ * This will allocate memory for PDs and backing pages and populate
+ * the sd and pd entries.
+ */
+enum i40iw_status_code i40iw_sc_create_hmc_obj(struct i40iw_sc_dev *dev,
+					       struct i40iw_hmc_create_obj_info *info)
+{
+	struct i40iw_hmc_sd_entry *sd_entry;
+	u32 sd_idx, sd_lmt;
+	u32 pd_idx = 0, pd_lmt = 0;
+	u32 pd_idx1 = 0, pd_lmt1 = 0;
+	u32 i, j;
+	bool pd_error = false;
+	enum i40iw_status_code ret_code = 0;
+
+	if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt)
+		return I40IW_ERR_INVALID_HMC_OBJ_INDEX;
+
+	if ((info->start_idx + info->count) >
+	    info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+		i40iw_debug(dev, I40IW_DEBUG_HMC,
+			    "%s: error type %u, start = %u, req cnt %u, cnt = %u\n",
+			    __func__, info->rsrc_type, info->start_idx, info->count,
+			    info->hmc_info->hmc_obj[info->rsrc_type].cnt);
+		return I40IW_ERR_INVALID_HMC_OBJ_COUNT;
+	}
+
+	if (!dev->is_pf)
+		return i40iw_vchnl_vf_add_hmc_objs(dev, info->rsrc_type, 0, info->count);
+
+	i40iw_find_sd_index_limit(info->hmc_info, info->rsrc_type,
+				  info->start_idx, info->count,
+				  &sd_idx, &sd_lmt);
+	if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
+	    sd_lmt > info->hmc_info->sd_table.sd_cnt) {
+		return I40IW_ERR_INVALID_SD_INDEX;
+	}
+	i40iw_find_pd_index_limit(info->hmc_info, info->rsrc_type,
+				  info->start_idx, info->count, &pd_idx, &pd_lmt);
+
+	for (j = sd_idx; j < sd_lmt; j++) {
+		ret_code = i40iw_add_sd_table_entry(dev->hw, info->hmc_info,
+						    j,
+						    info->entry_type,
+						    I40IW_HMC_DIRECT_BP_SIZE);
+		if (ret_code)
+			goto exit_sd_error;
+		sd_entry = &info->hmc_info->sd_table.sd_entry[j];
+
+		if ((sd_entry->entry_type == I40IW_SD_TYPE_PAGED) &&
+		    ((dev->hmc_info == info->hmc_info) &&
+		     (info->rsrc_type != I40IW_HMC_IW_PBLE))) {
+			pd_idx1 = max(pd_idx, (j * I40IW_HMC_MAX_BP_COUNT));
+			pd_lmt1 = min(pd_lmt,
+				      (j + 1) * I40IW_HMC_MAX_BP_COUNT);
+			for (i = pd_idx1; i < pd_lmt1; i++) {
+				/* update the pd table entry */
+				ret_code = i40iw_add_pd_table_entry(dev->hw, info->hmc_info,
+								    i, NULL);
+				if (ret_code) {
+					pd_error = true;
+					break;
+				}
+			}
+			if (pd_error) {
+				while (i && (i > pd_idx1)) {
+					i40iw_remove_pd_bp(dev->hw, info->hmc_info, (i - 1),
+							   info->is_pf);
+					i--;
+				}
+			}
+		}
+		if (sd_entry->valid)
+			continue;
+
+		info->hmc_info->sd_indexes[info->add_sd_cnt] = (u16)j;
+		info->add_sd_cnt++;
+		sd_entry->valid = true;
+	}
+	return i40iw_hmc_finish_add_sd_reg(dev, info);
+
+exit_sd_error:
+	while (j && (j > sd_idx)) {
+		sd_entry = &info->hmc_info->sd_table.sd_entry[j - 1];
+		switch (sd_entry->entry_type) {
+		case I40IW_SD_TYPE_PAGED:
+			pd_idx1 = max(pd_idx,
+				      (j - 1) * I40IW_HMC_MAX_BP_COUNT);
+			pd_lmt1 = min(pd_lmt, (j * I40IW_HMC_MAX_BP_COUNT));
+			for (i = pd_idx1; i < pd_lmt1; i++)
+				i40iw_prep_remove_pd_page(info->hmc_info, i);
+			break;
+		case I40IW_SD_TYPE_DIRECT:
+			i40iw_prep_remove_pd_page(info->hmc_info, (j - 1));
+			break;
+		default:
+			ret_code = I40IW_ERR_INVALID_SD_TYPE;
+			break;
+		}
+		j--;
+	}
+
+	return ret_code;
+}
+
+/**
+ * i40iw_finish_del_sd_reg - delete sd entries for objects
+ * @dev: pointer to the device structure
+ * @info: dele obj info
+ * @reset: true if called before reset
+ */
+static enum i40iw_status_code i40iw_finish_del_sd_reg(struct i40iw_sc_dev *dev,
+						      struct i40iw_hmc_del_obj_info *info,
+						      bool reset)
+{
+	struct i40iw_hmc_sd_entry *sd_entry;
+	enum i40iw_status_code ret_code = 0;
+	u32 i, sd_idx;
+	struct i40iw_dma_mem *mem;
+
+	if (dev->is_pf && !reset)
+		ret_code = i40iw_hmc_sd_grp(dev, info->hmc_info,
+					    info->hmc_info->sd_indexes[0],
+					    info->del_sd_cnt, false);
+
+	if (ret_code)
+		i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: error cqp sd sd_grp\n", __func__);
+
+	for (i = 0; i < info->del_sd_cnt; i++) {
+		sd_idx = info->hmc_info->sd_indexes[i];
+		sd_entry = &info->hmc_info->sd_table.sd_entry[sd_idx];
+		if (!sd_entry)
+			continue;
+		mem = (sd_entry->entry_type == I40IW_SD_TYPE_PAGED) ?
+			&sd_entry->u.pd_table.pd_page_addr :
+			&sd_entry->u.bp.addr;
+
+		if (!mem || !mem->va)
+			i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: error cqp sd mem\n", __func__);
+		else
+			i40iw_free_dma_mem(dev->hw, mem);
+	}
+	return ret_code;
+}
+
+/**
+ * i40iw_del_iw_hmc_obj - remove pe hmc objects
+ * @dev: pointer to the device structure
+ * @info: pointer to i40iw_hmc_del_obj_info struct
+ * @reset: true if called before reset
+ *
+ * This will de-populate the SDs and PDs.  It frees
+ * the memory for PDS and backing storage.  After this function is returned,
+ * caller should deallocate memory allocated previously for
+ * book-keeping information about PDs and backing storage.
+ */
+enum i40iw_status_code i40iw_sc_del_hmc_obj(struct i40iw_sc_dev *dev,
+					    struct i40iw_hmc_del_obj_info *info,
+					    bool reset)
+{
+	struct i40iw_hmc_pd_table *pd_table;
+	u32 sd_idx, sd_lmt;
+	u32 pd_idx, pd_lmt, rel_pd_idx;
+	u32 i, j;
+	enum i40iw_status_code ret_code = 0;
+
+	if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+		i40iw_debug(dev, I40IW_DEBUG_HMC,
+			    "%s: error start_idx[%04d]  >= [type %04d].cnt[%04d]\n",
+			    __func__, info->start_idx, info->rsrc_type,
+			    info->hmc_info->hmc_obj[info->rsrc_type].cnt);
+		return I40IW_ERR_INVALID_HMC_OBJ_INDEX;
+	}
+
+	if ((info->start_idx + info->count) >
+	    info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+		i40iw_debug(dev, I40IW_DEBUG_HMC,
+			    "%s: error start_idx[%04d] + count %04d  >= [type %04d].cnt[%04d]\n",
+			    __func__, info->start_idx, info->count,
+			    info->rsrc_type,
+			    info->hmc_info->hmc_obj[info->rsrc_type].cnt);
+		return I40IW_ERR_INVALID_HMC_OBJ_COUNT;
+	}
+	if (!dev->is_pf) {
+		ret_code = i40iw_vchnl_vf_del_hmc_obj(dev, info->rsrc_type, 0,
+						      info->count);
+		if (info->rsrc_type != I40IW_HMC_IW_PBLE)
+			return ret_code;
+	}
+
+	i40iw_find_pd_index_limit(info->hmc_info, info->rsrc_type,
+				  info->start_idx, info->count, &pd_idx, &pd_lmt);
+
+	for (j = pd_idx; j < pd_lmt; j++) {
+		sd_idx = j / I40IW_HMC_PD_CNT_IN_SD;
+
+		if (info->hmc_info->sd_table.sd_entry[sd_idx].entry_type !=
+		    I40IW_SD_TYPE_PAGED)
+			continue;
+
+		rel_pd_idx = j % I40IW_HMC_PD_CNT_IN_SD;
+		pd_table = &info->hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+		if (pd_table->pd_entry[rel_pd_idx].valid) {
+			ret_code = i40iw_remove_pd_bp(dev->hw, info->hmc_info, j,
+						      info->is_pf);
+			if (ret_code) {
+				i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: error\n", __func__);
+				return ret_code;
+			}
+		}
+	}
+
+	i40iw_find_sd_index_limit(info->hmc_info, info->rsrc_type,
+				  info->start_idx, info->count, &sd_idx, &sd_lmt);
+	if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
+	    sd_lmt > info->hmc_info->sd_table.sd_cnt) {
+		i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: error invalid sd_idx\n", __func__);
+		return I40IW_ERR_INVALID_SD_INDEX;
+	}
+
+	for (i = sd_idx; i < sd_lmt; i++) {
+		if (!info->hmc_info->sd_table.sd_entry[i].valid)
+			continue;
+		switch (info->hmc_info->sd_table.sd_entry[i].entry_type) {
+		case I40IW_SD_TYPE_DIRECT:
+			ret_code = i40iw_prep_remove_sd_bp(info->hmc_info, i);
+			if (!ret_code) {
+				info->hmc_info->sd_indexes[info->del_sd_cnt] = (u16)i;
+				info->del_sd_cnt++;
+			}
+			break;
+		case I40IW_SD_TYPE_PAGED:
+			ret_code = i40iw_prep_remove_pd_page(info->hmc_info, i);
+			if (!ret_code) {
+				info->hmc_info->sd_indexes[info->del_sd_cnt] = (u16)i;
+				info->del_sd_cnt++;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+	return i40iw_finish_del_sd_reg(dev, info, reset);
+}
+
+/**
+ * i40iw_add_sd_table_entry - Adds a segment descriptor to the table
+ * @hw: pointer to our hw struct
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @sd_index: segment descriptor index to manipulate
+ * @type: what type of segment descriptor we're manipulating
+ * @direct_mode_sz: size to alloc in direct mode
+ */
+enum i40iw_status_code i40iw_add_sd_table_entry(struct i40iw_hw *hw,
+						struct i40iw_hmc_info *hmc_info,
+						u32 sd_index,
+						enum i40iw_sd_entry_type type,
+						u64 direct_mode_sz)
+{
+	enum i40iw_status_code ret_code = 0;
+	struct i40iw_hmc_sd_entry *sd_entry;
+	bool dma_mem_alloc_done = false;
+	struct i40iw_dma_mem mem;
+	u64 alloc_len;
+
+	sd_entry = &hmc_info->sd_table.sd_entry[sd_index];
+	if (!sd_entry->valid) {
+		if (type == I40IW_SD_TYPE_PAGED)
+			alloc_len = I40IW_HMC_PAGED_BP_SIZE;
+		else
+			alloc_len = direct_mode_sz;
+
+		/* allocate a 4K pd page or 2M backing page */
+		ret_code = i40iw_allocate_dma_mem(hw, &mem, alloc_len,
+						  I40IW_HMC_PD_BP_BUF_ALIGNMENT);
+		if (ret_code)
+			goto exit;
+		dma_mem_alloc_done = true;
+		if (type == I40IW_SD_TYPE_PAGED) {
+			ret_code = i40iw_allocate_virt_mem(hw,
+							   &sd_entry->u.pd_table.pd_entry_virt_mem,
+							   sizeof(struct i40iw_hmc_pd_entry) * 512);
+			if (ret_code)
+				goto exit;
+			sd_entry->u.pd_table.pd_entry = (struct i40iw_hmc_pd_entry *)
+							 sd_entry->u.pd_table.pd_entry_virt_mem.va;
+
+			memcpy(&sd_entry->u.pd_table.pd_page_addr, &mem, sizeof(struct i40iw_dma_mem));
+		} else {
+			memcpy(&sd_entry->u.bp.addr, &mem, sizeof(struct i40iw_dma_mem));
+			sd_entry->u.bp.sd_pd_index = sd_index;
+		}
+
+		hmc_info->sd_table.sd_entry[sd_index].entry_type = type;
+
+		I40IW_INC_SD_REFCNT(&hmc_info->sd_table);
+	}
+	if (sd_entry->entry_type == I40IW_SD_TYPE_DIRECT)
+		I40IW_INC_BP_REFCNT(&sd_entry->u.bp);
+exit:
+	if (ret_code)
+		if (dma_mem_alloc_done)
+			i40iw_free_dma_mem(hw, &mem);
+
+	return ret_code;
+}
+
+/**
+ * i40iw_add_pd_table_entry - Adds page descriptor to the specified table
+ * @hw: pointer to our HW structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @pd_index: which page descriptor index to manipulate
+ * @rsrc_pg: if not NULL, use preallocated page instead of allocating new one.
+ *
+ * This function:
+ *	1. Initializes the pd entry
+ *	2. Adds pd_entry in the pd_table
+ *	3. Mark the entry valid in i40iw_hmc_pd_entry structure
+ *	4. Initializes the pd_entry's ref count to 1
+ * assumptions:
+ *	1. The memory for pd should be pinned down, physically contiguous and
+ *	   aligned on 4K boundary and zeroed memory.
+ *	2. It should be 4K in size.
+ */
+enum i40iw_status_code i40iw_add_pd_table_entry(struct i40iw_hw *hw,
+						struct i40iw_hmc_info *hmc_info,
+						u32 pd_index,
+						struct i40iw_dma_mem *rsrc_pg)
+{
+	enum i40iw_status_code ret_code = 0;
+	struct i40iw_hmc_pd_table *pd_table;
+	struct i40iw_hmc_pd_entry *pd_entry;
+	struct i40iw_dma_mem mem;
+	struct i40iw_dma_mem *page = &mem;
+	u32 sd_idx, rel_pd_idx;
+	u64 *pd_addr;
+	u64 page_desc;
+
+	if (pd_index / I40IW_HMC_PD_CNT_IN_SD >= hmc_info->sd_table.sd_cnt)
+		return I40IW_ERR_INVALID_PAGE_DESC_INDEX;
+
+	sd_idx = (pd_index / I40IW_HMC_PD_CNT_IN_SD);
+	if (hmc_info->sd_table.sd_entry[sd_idx].entry_type != I40IW_SD_TYPE_PAGED)
+		return 0;
+
+	rel_pd_idx = (pd_index % I40IW_HMC_PD_CNT_IN_SD);
+	pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+	pd_entry = &pd_table->pd_entry[rel_pd_idx];
+	if (!pd_entry->valid) {
+		if (rsrc_pg) {
+			pd_entry->rsrc_pg = true;
+			page = rsrc_pg;
+		} else {
+			ret_code = i40iw_allocate_dma_mem(hw, page,
+							  I40IW_HMC_PAGED_BP_SIZE,
+							  I40IW_HMC_PD_BP_BUF_ALIGNMENT);
+			if (ret_code)
+				return ret_code;
+			pd_entry->rsrc_pg = false;
+		}
+
+		memcpy(&pd_entry->bp.addr, page, sizeof(struct i40iw_dma_mem));
+		pd_entry->bp.sd_pd_index = pd_index;
+		pd_entry->bp.entry_type = I40IW_SD_TYPE_PAGED;
+		page_desc = page->pa | 0x1;
+
+		pd_addr = (u64 *)pd_table->pd_page_addr.va;
+		pd_addr += rel_pd_idx;
+
+		memcpy(pd_addr, &page_desc, sizeof(*pd_addr));
+
+		pd_entry->sd_index = sd_idx;
+		pd_entry->valid = true;
+		I40IW_INC_PD_REFCNT(pd_table);
+		if (hmc_info->hmc_fn_id < I40IW_FIRST_VF_FPM_ID)
+			I40IW_INVALIDATE_PF_HMC_PD(hw, sd_idx, rel_pd_idx);
+		else if (hw->hmc.hmc_fn_id != hmc_info->hmc_fn_id)
+			I40IW_INVALIDATE_VF_HMC_PD(hw, sd_idx, rel_pd_idx,
+						   hmc_info->hmc_fn_id);
+	}
+	I40IW_INC_BP_REFCNT(&pd_entry->bp);
+
+	return 0;
+}
+
+/**
+ * i40iw_remove_pd_bp - remove a backing page from a page descriptor
+ * @hw: pointer to our HW structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ * @is_pf: distinguishes a VF from a PF
+ *
+ * This function:
+ *	1. Marks the entry in pd table (for paged address mode) or in sd table
+ *	   (for direct address mode) invalid.
+ *	2. Write to register PMPDINV to invalidate the backing page in FV cache
+ *	3. Decrement the ref count for the pd _entry
+ * assumptions:
+ *	1. Caller can deallocate the memory used by backing storage after this
+ *	   function returns.
+ */
+enum i40iw_status_code i40iw_remove_pd_bp(struct i40iw_hw *hw,
+					  struct i40iw_hmc_info *hmc_info,
+					  u32 idx,
+					  bool is_pf)
+{
+	struct i40iw_hmc_pd_entry *pd_entry;
+	struct i40iw_hmc_pd_table *pd_table;
+	struct i40iw_hmc_sd_entry *sd_entry;
+	u32 sd_idx, rel_pd_idx;
+	struct i40iw_dma_mem *mem;
+	u64 *pd_addr;
+
+	sd_idx = idx / I40IW_HMC_PD_CNT_IN_SD;
+	rel_pd_idx = idx % I40IW_HMC_PD_CNT_IN_SD;
+	if (sd_idx >= hmc_info->sd_table.sd_cnt)
+		return I40IW_ERR_INVALID_PAGE_DESC_INDEX;
+
+	sd_entry = &hmc_info->sd_table.sd_entry[sd_idx];
+	if (sd_entry->entry_type != I40IW_SD_TYPE_PAGED)
+		return I40IW_ERR_INVALID_SD_TYPE;
+
+	pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+	pd_entry = &pd_table->pd_entry[rel_pd_idx];
+	I40IW_DEC_BP_REFCNT(&pd_entry->bp);
+	if (pd_entry->bp.ref_cnt)
+		return 0;
+
+	pd_entry->valid = false;
+	I40IW_DEC_PD_REFCNT(pd_table);
+	pd_addr = (u64 *)pd_table->pd_page_addr.va;
+	pd_addr += rel_pd_idx;
+	memset(pd_addr, 0, sizeof(u64));
+	if (is_pf)
+		I40IW_INVALIDATE_PF_HMC_PD(hw, sd_idx, idx);
+	else
+		I40IW_INVALIDATE_VF_HMC_PD(hw, sd_idx, idx,
+					   hmc_info->hmc_fn_id);
+
+	if (!pd_entry->rsrc_pg) {
+		mem = &pd_entry->bp.addr;
+		if (!mem || !mem->va)
+			return I40IW_ERR_PARAM;
+		i40iw_free_dma_mem(hw, mem);
+	}
+	if (!pd_table->ref_cnt)
+		i40iw_free_virt_mem(hw, &pd_table->pd_entry_virt_mem);
+
+	return 0;
+}
+
+/**
+ * i40iw_prep_remove_sd_bp - Prepares to remove a backing page from a sd entry
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ */
+enum i40iw_status_code i40iw_prep_remove_sd_bp(struct i40iw_hmc_info *hmc_info, u32 idx)
+{
+	struct i40iw_hmc_sd_entry *sd_entry;
+
+	sd_entry = &hmc_info->sd_table.sd_entry[idx];
+	I40IW_DEC_BP_REFCNT(&sd_entry->u.bp);
+	if (sd_entry->u.bp.ref_cnt)
+		return I40IW_ERR_NOT_READY;
+
+	I40IW_DEC_SD_REFCNT(&hmc_info->sd_table);
+	sd_entry->valid = false;
+
+	return 0;
+}
+
+/**
+ * i40iw_prep_remove_pd_page - Prepares to remove a PD page from sd entry.
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: segment descriptor index to find the relevant page descriptor
+ */
+enum i40iw_status_code i40iw_prep_remove_pd_page(struct i40iw_hmc_info *hmc_info,
+						 u32 idx)
+{
+	struct i40iw_hmc_sd_entry *sd_entry;
+
+	sd_entry = &hmc_info->sd_table.sd_entry[idx];
+
+	if (sd_entry->u.pd_table.ref_cnt)
+		return I40IW_ERR_NOT_READY;
+
+	sd_entry->valid = false;
+	I40IW_DEC_SD_REFCNT(&hmc_info->sd_table);
+
+	return 0;
+}
+
+/**
+ * i40iw_pf_init_vfhmc -
+ * @vf_cnt_array: array of cnt values of iwarp hmc objects
+ * @vf_hmc_fn_id: hmc function id ofr vf driver
+ * @dev: pointer to i40iw_dev struct
+ *
+ * Called by pf driver to initialize hmc_info for vf driver instance.
+ */
+enum i40iw_status_code i40iw_pf_init_vfhmc(struct i40iw_sc_dev *dev,
+					   u8 vf_hmc_fn_id,
+					   u32 *vf_cnt_array)
+{
+	struct i40iw_hmc_info *hmc_info;
+	enum i40iw_status_code ret_code = 0;
+	u32 i;
+
+	if ((vf_hmc_fn_id < I40IW_FIRST_VF_FPM_ID) ||
+	    (vf_hmc_fn_id >= I40IW_FIRST_VF_FPM_ID +
+	     I40IW_MAX_PE_ENABLED_VF_COUNT)) {
+		i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: invalid vf_hmc_fn_id  0x%x\n",
+			    __func__, vf_hmc_fn_id);
+		return I40IW_ERR_INVALID_HMCFN_ID;
+	}
+
+	ret_code = i40iw_sc_init_iw_hmc(dev, vf_hmc_fn_id);
+	if (ret_code)
+		return ret_code;
+
+	hmc_info = i40iw_vf_hmcinfo_from_fpm(dev, vf_hmc_fn_id);
+
+	for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++)
+		if (vf_cnt_array)
+			hmc_info->hmc_obj[i].cnt =
+			    vf_cnt_array[i - I40IW_HMC_IW_QP];
+		else
+			hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt;
+
+	return 0;
+}

diff --git a/drivers/infiniband/hw/i40iw/i40iw_hmc.h b/drivers/infiniband/hw/i40iw/i40iw_hmc.h
new file mode 100644
index 0000000..4c3fdd8
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_hmc.h

@@ -0,0 +1,241 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_HMC_H
+#define I40IW_HMC_H
+
+#include "i40iw_d.h"
+
+struct i40iw_hw;
+enum i40iw_status_code;
+
+#define I40IW_HMC_MAX_BP_COUNT 512
+#define I40IW_MAX_SD_ENTRIES 11
+#define I40IW_HW_DBG_HMC_INVALID_BP_MARK     0xCA
+
+#define I40IW_HMC_INFO_SIGNATURE	0x484D5347
+#define I40IW_HMC_PD_CNT_IN_SD		512
+#define I40IW_HMC_DIRECT_BP_SIZE	0x200000
+#define I40IW_HMC_MAX_SD_COUNT		4096
+#define I40IW_HMC_PAGED_BP_SIZE		4096
+#define I40IW_HMC_PD_BP_BUF_ALIGNMENT	4096
+#define I40IW_FIRST_VF_FPM_ID		16
+#define FPM_MULTIPLIER			1024
+
+#define I40IW_INC_SD_REFCNT(sd_table)   ((sd_table)->ref_cnt++)
+#define I40IW_INC_PD_REFCNT(pd_table)   ((pd_table)->ref_cnt++)
+#define I40IW_INC_BP_REFCNT(bp)         ((bp)->ref_cnt++)
+
+#define I40IW_DEC_SD_REFCNT(sd_table)   ((sd_table)->ref_cnt--)
+#define I40IW_DEC_PD_REFCNT(pd_table)   ((pd_table)->ref_cnt--)
+#define I40IW_DEC_BP_REFCNT(bp)         ((bp)->ref_cnt--)
+
+/**
+ * I40IW_INVALIDATE_PF_HMC_PD - Invalidates the pd cache in the hardware
+ * @hw: pointer to our hw struct
+ * @sd_idx: segment descriptor index
+ * @pd_idx: page descriptor index
+ */
+#define I40IW_INVALIDATE_PF_HMC_PD(hw, sd_idx, pd_idx)                  \
+	i40iw_wr32((hw), I40E_PFHMC_PDINV,                                    \
+		(((sd_idx) << I40E_PFHMC_PDINV_PMSDIDX_SHIFT) |             \
+		(0x1 << I40E_PFHMC_PDINV_PMSDPARTSEL_SHIFT) | \
+		((pd_idx) << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)))
+
+/**
+ * I40IW_INVALIDATE_VF_HMC_PD - Invalidates the pd cache in the hardware
+ * @hw: pointer to our hw struct
+ * @sd_idx: segment descriptor index
+ * @pd_idx: page descriptor index
+ * @hmc_fn_id: VF's function id
+ */
+#define I40IW_INVALIDATE_VF_HMC_PD(hw, sd_idx, pd_idx, hmc_fn_id)        \
+	i40iw_wr32(hw, I40E_GLHMC_VFPDINV(hmc_fn_id - I40IW_FIRST_VF_FPM_ID),  \
+	     ((sd_idx << I40E_PFHMC_PDINV_PMSDIDX_SHIFT) |              \
+	      (pd_idx << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)))
+
+struct i40iw_hmc_obj_info {
+	u64 base;
+	u32 max_cnt;
+	u32 cnt;
+	u64 size;
+};
+
+enum i40iw_sd_entry_type {
+	I40IW_SD_TYPE_INVALID = 0,
+	I40IW_SD_TYPE_PAGED = 1,
+	I40IW_SD_TYPE_DIRECT = 2
+};
+
+struct i40iw_hmc_bp {
+	enum i40iw_sd_entry_type entry_type;
+	struct i40iw_dma_mem addr;
+	u32 sd_pd_index;
+	u32 ref_cnt;
+};
+
+struct i40iw_hmc_pd_entry {
+	struct i40iw_hmc_bp bp;
+	u32 sd_index;
+	bool rsrc_pg;
+	bool valid;
+};
+
+struct i40iw_hmc_pd_table {
+	struct i40iw_dma_mem pd_page_addr;
+	struct i40iw_hmc_pd_entry *pd_entry;
+	struct i40iw_virt_mem pd_entry_virt_mem;
+	u32 ref_cnt;
+	u32 sd_index;
+};
+
+struct i40iw_hmc_sd_entry {
+	enum i40iw_sd_entry_type entry_type;
+	bool valid;
+
+	union {
+		struct i40iw_hmc_pd_table pd_table;
+		struct i40iw_hmc_bp bp;
+	} u;
+};
+
+struct i40iw_hmc_sd_table {
+	struct i40iw_virt_mem addr;
+	u32 sd_cnt;
+	u32 ref_cnt;
+	struct i40iw_hmc_sd_entry *sd_entry;
+};
+
+struct i40iw_hmc_info {
+	u32 signature;
+	u8 hmc_fn_id;
+	u16 first_sd_index;
+
+	struct i40iw_hmc_obj_info *hmc_obj;
+	struct i40iw_virt_mem hmc_obj_virt_mem;
+	struct i40iw_hmc_sd_table sd_table;
+	u16 sd_indexes[I40IW_HMC_MAX_SD_COUNT];
+};
+
+struct update_sd_entry {
+	u64 cmd;
+	u64 data;
+};
+
+struct i40iw_update_sds_info {
+	u32 cnt;
+	u8 hmc_fn_id;
+	struct update_sd_entry entry[I40IW_MAX_SD_ENTRIES];
+};
+
+struct i40iw_ccq_cqe_info;
+struct i40iw_hmc_fcn_info {
+	void (*callback_fcn)(struct i40iw_sc_dev *, void *,
+			     struct i40iw_ccq_cqe_info *);
+	void *cqp_callback_param;
+	u32 vf_id;
+	u16 iw_vf_idx;
+	bool free_fcn;
+};
+
+enum i40iw_hmc_rsrc_type {
+	I40IW_HMC_IW_QP = 0,
+	I40IW_HMC_IW_CQ = 1,
+	I40IW_HMC_IW_SRQ = 2,
+	I40IW_HMC_IW_HTE = 3,
+	I40IW_HMC_IW_ARP = 4,
+	I40IW_HMC_IW_APBVT_ENTRY = 5,
+	I40IW_HMC_IW_MR = 6,
+	I40IW_HMC_IW_XF = 7,
+	I40IW_HMC_IW_XFFL = 8,
+	I40IW_HMC_IW_Q1 = 9,
+	I40IW_HMC_IW_Q1FL = 10,
+	I40IW_HMC_IW_TIMER = 11,
+	I40IW_HMC_IW_FSIMC = 12,
+	I40IW_HMC_IW_FSIAV = 13,
+	I40IW_HMC_IW_PBLE = 14,
+	I40IW_HMC_IW_MAX = 15,
+};
+
+struct i40iw_hmc_create_obj_info {
+	struct i40iw_hmc_info *hmc_info;
+	struct i40iw_virt_mem add_sd_virt_mem;
+	u32 rsrc_type;
+	u32 start_idx;
+	u32 count;
+	u32 add_sd_cnt;
+	enum i40iw_sd_entry_type entry_type;
+	bool is_pf;
+};
+
+struct i40iw_hmc_del_obj_info {
+	struct i40iw_hmc_info *hmc_info;
+	struct i40iw_virt_mem del_sd_virt_mem;
+	u32 rsrc_type;
+	u32 start_idx;
+	u32 count;
+	u32 del_sd_cnt;
+	bool is_pf;
+};
+
+enum i40iw_status_code i40iw_copy_dma_mem(struct i40iw_hw *hw, void *dest_buf,
+					  struct i40iw_dma_mem *src_mem, u64 src_offset, u64 size);
+enum i40iw_status_code i40iw_sc_create_hmc_obj(struct i40iw_sc_dev *dev,
+					       struct i40iw_hmc_create_obj_info *info);
+enum i40iw_status_code i40iw_sc_del_hmc_obj(struct i40iw_sc_dev *dev,
+					    struct i40iw_hmc_del_obj_info *info,
+					    bool reset);
+enum i40iw_status_code i40iw_hmc_sd_one(struct i40iw_sc_dev *dev, u8 hmc_fn_id,
+					u64 pa, u32 sd_idx, enum i40iw_sd_entry_type type,
+					bool setsd);
+enum i40iw_status_code i40iw_update_sds_noccq(struct i40iw_sc_dev *dev,
+					      struct i40iw_update_sds_info *info);
+struct i40iw_vfdev *i40iw_vfdev_from_fpm(struct i40iw_sc_dev *dev, u8 hmc_fn_id);
+struct i40iw_hmc_info *i40iw_vf_hmcinfo_from_fpm(struct i40iw_sc_dev *dev,
+						 u8 hmc_fn_id);
+enum i40iw_status_code i40iw_add_sd_table_entry(struct i40iw_hw *hw,
+						struct i40iw_hmc_info *hmc_info, u32 sd_index,
+						enum i40iw_sd_entry_type type, u64 direct_mode_sz);
+enum i40iw_status_code i40iw_add_pd_table_entry(struct i40iw_hw *hw,
+						struct i40iw_hmc_info *hmc_info, u32 pd_index,
+						struct i40iw_dma_mem *rsrc_pg);
+enum i40iw_status_code i40iw_remove_pd_bp(struct i40iw_hw *hw,
+					  struct i40iw_hmc_info *hmc_info, u32 idx, bool is_pf);
+enum i40iw_status_code i40iw_prep_remove_sd_bp(struct i40iw_hmc_info *hmc_info, u32 idx);
+enum i40iw_status_code i40iw_prep_remove_pd_page(struct i40iw_hmc_info *hmc_info, u32 idx);
+
+#define     ENTER_SHARED_FUNCTION()
+#define     EXIT_SHARED_FUNCTION()
+
+#endif				/* I40IW_HMC_H */

diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
new file mode 100644
index 0000000..9fd3024
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c

@@ -0,0 +1,730 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/if_vlan.h>
+
+#include "i40iw.h"
+
+/**
+ * i40iw_initialize_hw_resources - initialize hw resource during open
+ * @iwdev: iwarp device
+ */
+u32 i40iw_initialize_hw_resources(struct i40iw_device *iwdev)
+{
+	unsigned long num_pds;
+	u32 resources_size;
+	u32 max_mr;
+	u32 max_qp;
+	u32 max_cq;
+	u32 arp_table_size;
+	u32 mrdrvbits;
+	void *resource_ptr;
+
+	max_qp = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_QP].cnt;
+	max_cq = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt;
+	max_mr = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt;
+	arp_table_size = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_ARP].cnt;
+	iwdev->max_cqe = 0xFFFFF;
+	num_pds = max_qp * 4;
+	resources_size = sizeof(struct i40iw_arp_entry) * arp_table_size;
+	resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_qp);
+	resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_mr);
+	resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_cq);
+	resources_size += sizeof(unsigned long) * BITS_TO_LONGS(num_pds);
+	resources_size += sizeof(unsigned long) * BITS_TO_LONGS(arp_table_size);
+	resources_size += sizeof(struct i40iw_qp **) * max_qp;
+	iwdev->mem_resources = kzalloc(resources_size, GFP_KERNEL);
+
+	if (!iwdev->mem_resources)
+		return -ENOMEM;
+
+	iwdev->max_qp = max_qp;
+	iwdev->max_mr = max_mr;
+	iwdev->max_cq = max_cq;
+	iwdev->max_pd = num_pds;
+	iwdev->arp_table_size = arp_table_size;
+	iwdev->arp_table = (struct i40iw_arp_entry *)iwdev->mem_resources;
+	resource_ptr = iwdev->mem_resources + (sizeof(struct i40iw_arp_entry) * arp_table_size);
+
+	iwdev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
+	    IB_DEVICE_MEM_WINDOW | IB_DEVICE_MEM_MGT_EXTENSIONS;
+
+	iwdev->allocated_qps = resource_ptr;
+	iwdev->allocated_cqs = &iwdev->allocated_qps[BITS_TO_LONGS(max_qp)];
+	iwdev->allocated_mrs = &iwdev->allocated_cqs[BITS_TO_LONGS(max_cq)];
+	iwdev->allocated_pds = &iwdev->allocated_mrs[BITS_TO_LONGS(max_mr)];
+	iwdev->allocated_arps = &iwdev->allocated_pds[BITS_TO_LONGS(num_pds)];
+	iwdev->qp_table = (struct i40iw_qp **)(&iwdev->allocated_arps[BITS_TO_LONGS(arp_table_size)]);
+	set_bit(0, iwdev->allocated_mrs);
+	set_bit(0, iwdev->allocated_qps);
+	set_bit(0, iwdev->allocated_cqs);
+	set_bit(0, iwdev->allocated_pds);
+	set_bit(0, iwdev->allocated_arps);
+
+	/* Following for ILQ/IEQ */
+	set_bit(1, iwdev->allocated_qps);
+	set_bit(1, iwdev->allocated_cqs);
+	set_bit(1, iwdev->allocated_pds);
+	set_bit(2, iwdev->allocated_cqs);
+	set_bit(2, iwdev->allocated_pds);
+
+	spin_lock_init(&iwdev->resource_lock);
+	mrdrvbits = 24 - get_count_order(iwdev->max_mr);
+	iwdev->mr_stagmask = ~(((1 << mrdrvbits) - 1) << (32 - mrdrvbits));
+	return 0;
+}
+
+/**
+ * i40iw_cqp_ce_handler - handle cqp completions
+ * @iwdev: iwarp device
+ * @arm: flag to arm after completions
+ * @cq: cq for cqp completions
+ */
+static void i40iw_cqp_ce_handler(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq, bool arm)
+{
+	struct i40iw_cqp_request *cqp_request;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	u32 cqe_count = 0;
+	struct i40iw_ccq_cqe_info info;
+	int ret;
+
+	do {
+		memset(&info, 0, sizeof(info));
+		ret = dev->ccq_ops->ccq_get_cqe_info(cq, &info);
+		if (ret)
+			break;
+		cqp_request = (struct i40iw_cqp_request *)(unsigned long)info.scratch;
+		if (info.error)
+			i40iw_pr_err("opcode = 0x%x maj_err_code = 0x%x min_err_code = 0x%x\n",
+				     info.op_code, info.maj_err_code, info.min_err_code);
+		if (cqp_request) {
+			cqp_request->compl_info.maj_err_code = info.maj_err_code;
+			cqp_request->compl_info.min_err_code = info.min_err_code;
+			cqp_request->compl_info.op_ret_val = info.op_ret_val;
+			cqp_request->compl_info.error = info.error;
+
+			if (cqp_request->waiting) {
+				cqp_request->request_done = true;
+				wake_up(&cqp_request->waitq);
+				i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
+			} else {
+				if (cqp_request->callback_fcn)
+					cqp_request->callback_fcn(cqp_request, 1);
+				i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
+			}
+		}
+
+		cqe_count++;
+	} while (1);
+
+	if (arm && cqe_count) {
+		i40iw_process_bh(dev);
+		dev->ccq_ops->ccq_arm(cq);
+	}
+}
+
+/**
+ * i40iw_iwarp_ce_handler - handle iwarp completions
+ * @iwdev: iwarp device
+ * @iwcp: iwarp cq receiving event
+ */
+static void i40iw_iwarp_ce_handler(struct i40iw_device *iwdev,
+				   struct i40iw_sc_cq *iwcq)
+{
+	struct i40iw_cq *i40iwcq = iwcq->back_cq;
+
+	if (i40iwcq->ibcq.comp_handler)
+		i40iwcq->ibcq.comp_handler(&i40iwcq->ibcq,
+					   i40iwcq->ibcq.cq_context);
+}
+
+/**
+ * i40iw_puda_ce_handler - handle puda completion events
+ * @iwdev: iwarp device
+ * @cq: puda completion q for event
+ */
+static void i40iw_puda_ce_handler(struct i40iw_device *iwdev,
+				  struct i40iw_sc_cq *cq)
+{
+	struct i40iw_sc_dev *dev = (struct i40iw_sc_dev *)&iwdev->sc_dev;
+	enum i40iw_status_code status;
+	u32 compl_error;
+
+	do {
+		status = i40iw_puda_poll_completion(dev, cq, &compl_error);
+		if (status == I40IW_ERR_QUEUE_EMPTY)
+			break;
+		if (status) {
+			i40iw_pr_err("puda  status = %d\n", status);
+			break;
+		}
+		if (compl_error) {
+			i40iw_pr_err("puda compl_err  =0x%x\n", compl_error);
+			break;
+		}
+	} while (1);
+
+	dev->ccq_ops->ccq_arm(cq);
+}
+
+/**
+ * i40iw_process_ceq - handle ceq for completions
+ * @iwdev: iwarp device
+ * @ceq: ceq having cq for completion
+ */
+void i40iw_process_ceq(struct i40iw_device *iwdev, struct i40iw_ceq *ceq)
+{
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_sc_ceq *sc_ceq;
+	struct i40iw_sc_cq *cq;
+	bool arm = true;
+
+	sc_ceq = &ceq->sc_ceq;
+	do {
+		cq = dev->ceq_ops->process_ceq(dev, sc_ceq);
+		if (!cq)
+			break;
+
+		if (cq->cq_type == I40IW_CQ_TYPE_CQP)
+			i40iw_cqp_ce_handler(iwdev, cq, arm);
+		else if (cq->cq_type == I40IW_CQ_TYPE_IWARP)
+			i40iw_iwarp_ce_handler(iwdev, cq);
+		else if ((cq->cq_type == I40IW_CQ_TYPE_ILQ) ||
+			 (cq->cq_type == I40IW_CQ_TYPE_IEQ))
+			i40iw_puda_ce_handler(iwdev, cq);
+	} while (1);
+}
+
+/**
+ * i40iw_next_iw_state - modify qp state
+ * @iwqp: iwarp qp to modify
+ * @state: next state for qp
+ * @del_hash: del hash
+ * @term: term message
+ * @termlen: length of term message
+ */
+void i40iw_next_iw_state(struct i40iw_qp *iwqp,
+			 u8 state,
+			 u8 del_hash,
+			 u8 term,
+			 u8 termlen)
+{
+	struct i40iw_modify_qp_info info;
+
+	memset(&info, 0, sizeof(info));
+	info.next_iwarp_state = state;
+	info.remove_hash_idx = del_hash;
+	info.cq_num_valid = true;
+	info.arp_cache_idx_valid = true;
+	info.dont_send_term = true;
+	info.dont_send_fin = true;
+	info.termlen = termlen;
+
+	if (term & I40IWQP_TERM_SEND_TERM_ONLY)
+		info.dont_send_term = false;
+	if (term & I40IWQP_TERM_SEND_FIN_ONLY)
+		info.dont_send_fin = false;
+	if (iwqp->sc_qp.term_flags && (state == I40IW_QP_STATE_ERROR))
+		info.reset_tcp_conn = true;
+	i40iw_hw_modify_qp(iwqp->iwdev, iwqp, &info, 0);
+}
+
+/**
+ * i40iw_process_aeq - handle aeq events
+ * @iwdev: iwarp device
+ */
+void i40iw_process_aeq(struct i40iw_device *iwdev)
+{
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_aeq *aeq = &iwdev->aeq;
+	struct i40iw_sc_aeq *sc_aeq = &aeq->sc_aeq;
+	struct i40iw_aeqe_info aeinfo;
+	struct i40iw_aeqe_info *info = &aeinfo;
+	int ret;
+	struct i40iw_qp *iwqp = NULL;
+	struct i40iw_sc_cq *cq = NULL;
+	struct i40iw_cq *iwcq = NULL;
+	struct i40iw_sc_qp *qp = NULL;
+	struct i40iw_qp_host_ctx_info *ctx_info = NULL;
+	unsigned long flags;
+
+	u32 aeqcnt = 0;
+
+	if (!sc_aeq->size)
+		return;
+
+	do {
+		memset(info, 0, sizeof(*info));
+		ret = dev->aeq_ops->get_next_aeqe(sc_aeq, info);
+		if (ret)
+			break;
+
+		aeqcnt++;
+		i40iw_debug(dev, I40IW_DEBUG_AEQ,
+			    "%s ae_id = 0x%x bool qp=%d qp_id = %d\n",
+			    __func__, info->ae_id, info->qp, info->qp_cq_id);
+		if (info->qp) {
+			iwqp = iwdev->qp_table[info->qp_cq_id];
+			if (!iwqp) {
+				i40iw_pr_err("qp_id %d is already freed\n", info->qp_cq_id);
+				continue;
+			}
+			qp = &iwqp->sc_qp;
+			spin_lock_irqsave(&iwqp->lock, flags);
+			iwqp->hw_tcp_state = info->tcp_state;
+			iwqp->hw_iwarp_state = info->iwarp_state;
+			iwqp->last_aeq = info->ae_id;
+			spin_unlock_irqrestore(&iwqp->lock, flags);
+			ctx_info = &iwqp->ctx_info;
+			ctx_info->err_rq_idx_valid = true;
+		} else {
+			if (info->ae_id != I40IW_AE_CQ_OPERATION_ERROR)
+				continue;
+		}
+
+		switch (info->ae_id) {
+		case I40IW_AE_LLP_FIN_RECEIVED:
+			if (qp->term_flags)
+				continue;
+			if (atomic_inc_return(&iwqp->close_timer_started) == 1) {
+				iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSE_WAIT;
+				if ((iwqp->hw_tcp_state == I40IW_TCP_STATE_CLOSE_WAIT) &&
+				    (iwqp->ibqp_state == IB_QPS_RTS)) {
+					i40iw_next_iw_state(iwqp,
+							    I40IW_QP_STATE_CLOSING, 0, 0, 0);
+					i40iw_cm_disconn(iwqp);
+				}
+				iwqp->cm_id->add_ref(iwqp->cm_id);
+				i40iw_schedule_cm_timer(iwqp->cm_node,
+							(struct i40iw_puda_buf *)iwqp,
+							I40IW_TIMER_TYPE_CLOSE, 1, 0);
+			}
+			break;
+		case I40IW_AE_LLP_CLOSE_COMPLETE:
+			if (qp->term_flags)
+				i40iw_terminate_done(qp, 0);
+			else
+				i40iw_cm_disconn(iwqp);
+			break;
+		case I40IW_AE_RESET_SENT:
+			i40iw_next_iw_state(iwqp, I40IW_QP_STATE_ERROR, 1, 0, 0);
+			i40iw_cm_disconn(iwqp);
+			break;
+		case I40IW_AE_LLP_CONNECTION_RESET:
+			if (atomic_read(&iwqp->close_timer_started))
+				continue;
+			i40iw_cm_disconn(iwqp);
+			break;
+		case I40IW_AE_TERMINATE_SENT:
+			i40iw_terminate_send_fin(qp);
+			break;
+		case I40IW_AE_LLP_TERMINATE_RECEIVED:
+			i40iw_terminate_received(qp, info);
+			break;
+		case I40IW_AE_CQ_OPERATION_ERROR:
+			i40iw_pr_err("Processing an iWARP related AE for CQ misc = 0x%04X\n",
+				     info->ae_id);
+			cq = (struct i40iw_sc_cq *)(unsigned long)info->compl_ctx;
+			iwcq = (struct i40iw_cq *)cq->back_cq;
+
+			if (iwcq->ibcq.event_handler) {
+				struct ib_event ibevent;
+
+				ibevent.device = iwcq->ibcq.device;
+				ibevent.event = IB_EVENT_CQ_ERR;
+				ibevent.element.cq = &iwcq->ibcq;
+				iwcq->ibcq.event_handler(&ibevent, iwcq->ibcq.cq_context);
+			}
+			break;
+		case I40IW_AE_PRIV_OPERATION_DENIED:
+		case I40IW_AE_STAG_ZERO_INVALID:
+		case I40IW_AE_IB_RREQ_AND_Q1_FULL:
+		case I40IW_AE_DDP_UBE_INVALID_DDP_VERSION:
+		case I40IW_AE_DDP_UBE_INVALID_MO:
+		case I40IW_AE_DDP_UBE_INVALID_QN:
+		case I40IW_AE_DDP_NO_L_BIT:
+		case I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION:
+		case I40IW_AE_RDMAP_ROE_UNEXPECTED_OPCODE:
+		case I40IW_AE_ROE_INVALID_RDMA_READ_REQUEST:
+		case I40IW_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
+		case I40IW_AE_INVALID_ARP_ENTRY:
+		case I40IW_AE_INVALID_TCP_OPTION_RCVD:
+		case I40IW_AE_STALE_ARP_ENTRY:
+		case I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR:
+		case I40IW_AE_LLP_SEGMENT_TOO_SMALL:
+		case I40IW_AE_LLP_SYN_RECEIVED:
+		case I40IW_AE_LLP_TOO_MANY_RETRIES:
+		case I40IW_AE_LLP_DOUBT_REACHABILITY:
+		case I40IW_AE_LCE_QP_CATASTROPHIC:
+		case I40IW_AE_LCE_FUNCTION_CATASTROPHIC:
+		case I40IW_AE_LCE_CQ_CATASTROPHIC:
+		case I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG:
+		case I40IW_AE_UDA_XMIT_IPADDR_MISMATCH:
+		case I40IW_AE_QP_SUSPEND_COMPLETE:
+			ctx_info->err_rq_idx_valid = false;
+		default:
+				if (!info->sq && ctx_info->err_rq_idx_valid) {
+					ctx_info->err_rq_idx = info->wqe_idx;
+					ctx_info->tcp_info_valid = false;
+					ctx_info->iwarp_info_valid = false;
+					ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
+									     iwqp->host_ctx.va,
+									     ctx_info);
+				}
+				i40iw_terminate_connection(qp, info);
+				break;
+		}
+	} while (1);
+
+	if (aeqcnt)
+		dev->aeq_ops->repost_aeq_entries(dev, aeqcnt);
+}
+
+/**
+ * i40iw_manage_apbvt - add or delete tcp port
+ * @iwdev: iwarp device
+ * @accel_local_port: port for apbvt
+ * @add_port: add or delete port
+ */
+int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool add_port)
+{
+	struct i40iw_apbvt_info *info;
+	enum i40iw_status_code status;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, add_port);
+	if (!cqp_request)
+		return -ENOMEM;
+
+	cqp_info = &cqp_request->info;
+	info = &cqp_info->in.u.manage_apbvt_entry.info;
+
+	memset(info, 0, sizeof(*info));
+	info->add = add_port;
+	info->port = cpu_to_le16(accel_local_port);
+
+	cqp_info->cqp_cmd = OP_MANAGE_APBVT_ENTRY;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.manage_apbvt_entry.cqp = &iwdev->cqp.sc_cqp;
+	cqp_info->in.u.manage_apbvt_entry.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Manage APBVT entry fail");
+	return status;
+}
+
+/**
+ * i40iw_manage_arp_cache - manage hw arp cache
+ * @iwdev: iwarp device
+ * @mac_addr: mac address ptr
+ * @ip_addr: ip addr for arp cache
+ * @action: add, delete or modify
+ */
+void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
+			    unsigned char *mac_addr,
+			    __be32 *ip_addr,
+			    bool ipv4,
+			    u32 action)
+{
+	struct i40iw_add_arp_cache_entry_info *info;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	int arp_index;
+
+	arp_index = i40iw_arp_table(iwdev, ip_addr, ipv4, mac_addr, action);
+	if (arp_index == -1)
+		return;
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
+	if (!cqp_request)
+		return;
+
+	cqp_info = &cqp_request->info;
+	if (action == I40IW_ARP_ADD) {
+		cqp_info->cqp_cmd = OP_ADD_ARP_CACHE_ENTRY;
+		info = &cqp_info->in.u.add_arp_cache_entry.info;
+		memset(info, 0, sizeof(*info));
+		info->arp_index = cpu_to_le32(arp_index);
+		info->permanent = true;
+		ether_addr_copy(info->mac_addr, mac_addr);
+		cqp_info->in.u.add_arp_cache_entry.scratch = (uintptr_t)cqp_request;
+		cqp_info->in.u.add_arp_cache_entry.cqp = &iwdev->cqp.sc_cqp;
+	} else {
+		cqp_info->cqp_cmd = OP_DELETE_ARP_CACHE_ENTRY;
+		cqp_info->in.u.del_arp_cache_entry.scratch = (uintptr_t)cqp_request;
+		cqp_info->in.u.del_arp_cache_entry.cqp = &iwdev->cqp.sc_cqp;
+		cqp_info->in.u.del_arp_cache_entry.arp_index = arp_index;
+	}
+
+	cqp_info->in.u.add_arp_cache_entry.cqp = &iwdev->cqp.sc_cqp;
+	cqp_info->in.u.add_arp_cache_entry.scratch = (uintptr_t)cqp_request;
+	cqp_info->post_sq = 1;
+	if (i40iw_handle_cqp_op(iwdev, cqp_request))
+		i40iw_pr_err("CQP-OP Add/Del Arp Cache entry fail");
+}
+
+/**
+ * i40iw_send_syn_cqp_callback - do syn/ack after qhash
+ * @cqp_request: qhash cqp completion
+ * @send_ack: flag send ack
+ */
+static void i40iw_send_syn_cqp_callback(struct i40iw_cqp_request *cqp_request, u32 send_ack)
+{
+	i40iw_send_syn(cqp_request->param, send_ack);
+}
+
+/**
+ * i40iw_manage_qhash - add or modify qhash
+ * @iwdev: iwarp device
+ * @cminfo: cm info for qhash
+ * @etype: type (syn or quad)
+ * @mtype: type of qhash
+ * @cmnode: cmnode associated with connection
+ * @wait: wait for completion
+ * @user_pri:user pri of the connection
+ */
+enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev,
+					  struct i40iw_cm_info *cminfo,
+					  enum i40iw_quad_entry_type etype,
+					  enum i40iw_quad_hash_manage_type mtype,
+					  void *cmnode,
+					  bool wait)
+{
+	struct i40iw_qhash_table_info *info;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	enum i40iw_status_code status;
+	struct i40iw_cqp *iwcqp = &iwdev->cqp;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+
+	cqp_request = i40iw_get_cqp_request(iwcqp, wait);
+	if (!cqp_request)
+		return I40IW_ERR_NO_MEMORY;
+	cqp_info = &cqp_request->info;
+	info = &cqp_info->in.u.manage_qhash_table_entry.info;
+	memset(info, 0, sizeof(*info));
+
+	info->manage = mtype;
+	info->entry_type = etype;
+	if (cminfo->vlan_id != 0xFFFF) {
+		info->vlan_valid = true;
+		info->vlan_id = cpu_to_le16(cminfo->vlan_id);
+	} else {
+		info->vlan_valid = false;
+	}
+
+	info->ipv4_valid = cminfo->ipv4;
+	ether_addr_copy(info->mac_addr, iwdev->netdev->dev_addr);
+	info->qp_num = cpu_to_le32(dev->ilq->qp_id);
+	info->dest_port = cpu_to_le16(cminfo->loc_port);
+	info->dest_ip[0] = cpu_to_le32(cminfo->loc_addr[0]);
+	info->dest_ip[1] = cpu_to_le32(cminfo->loc_addr[1]);
+	info->dest_ip[2] = cpu_to_le32(cminfo->loc_addr[2]);
+	info->dest_ip[3] = cpu_to_le32(cminfo->loc_addr[3]);
+	if (etype == I40IW_QHASH_TYPE_TCP_ESTABLISHED) {
+		info->src_port = cpu_to_le16(cminfo->rem_port);
+		info->src_ip[0] = cpu_to_le32(cminfo->rem_addr[0]);
+		info->src_ip[1] = cpu_to_le32(cminfo->rem_addr[1]);
+		info->src_ip[2] = cpu_to_le32(cminfo->rem_addr[2]);
+		info->src_ip[3] = cpu_to_le32(cminfo->rem_addr[3]);
+	}
+	if (cmnode) {
+		cqp_request->callback_fcn = i40iw_send_syn_cqp_callback;
+		cqp_request->param = (void *)cmnode;
+	}
+
+	if (info->ipv4_valid)
+		i40iw_debug(dev, I40IW_DEBUG_CM,
+			    "%s:%s IP=%pI4, port=%d, mac=%pM, vlan_id=%d\n",
+			    __func__, (!mtype) ? "DELETE" : "ADD",
+			    info->dest_ip,
+			    info->dest_port, info->mac_addr, cminfo->vlan_id);
+	else
+		i40iw_debug(dev, I40IW_DEBUG_CM,
+			    "%s:%s IP=%pI6, port=%d, mac=%pM, vlan_id=%d\n",
+			    __func__, (!mtype) ? "DELETE" : "ADD",
+			    info->dest_ip,
+			    info->dest_port, info->mac_addr, cminfo->vlan_id);
+	cqp_info->in.u.manage_qhash_table_entry.cqp = &iwdev->cqp.sc_cqp;
+	cqp_info->in.u.manage_qhash_table_entry.scratch = (uintptr_t)cqp_request;
+	cqp_info->cqp_cmd = OP_MANAGE_QHASH_TABLE_ENTRY;
+	cqp_info->post_sq = 1;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Manage Qhash Entry fail");
+	return status;
+}
+
+/**
+ * i40iw_hw_flush_wqes - flush qp's wqe
+ * @iwdev: iwarp device
+ * @qp: hardware control qp
+ * @info: info for flush
+ * @wait: flag wait for completion
+ */
+enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev,
+					   struct i40iw_sc_qp *qp,
+					   struct i40iw_qp_flush_info *info,
+					   bool wait)
+{
+	enum i40iw_status_code status;
+	struct i40iw_qp_flush_info *hw_info;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
+	if (!cqp_request)
+		return I40IW_ERR_NO_MEMORY;
+
+	cqp_info = &cqp_request->info;
+	hw_info = &cqp_request->info.in.u.qp_flush_wqes.info;
+	memcpy(hw_info, info, sizeof(*hw_info));
+
+	cqp_info->cqp_cmd = OP_QP_FLUSH_WQES;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.qp_flush_wqes.qp = qp;
+	cqp_info->in.u.qp_flush_wqes.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Flush WQE's fail");
+	return status;
+}
+
+/**
+ * i40iw_hw_manage_vf_pble_bp - manage vf pbles
+ * @iwdev: iwarp device
+ * @info: info for managing pble
+ * @wait: flag wait for completion
+ */
+enum i40iw_status_code i40iw_hw_manage_vf_pble_bp(struct i40iw_device *iwdev,
+						  struct i40iw_manage_vf_pble_info *info,
+						  bool wait)
+{
+	enum i40iw_status_code status;
+	struct i40iw_manage_vf_pble_info *hw_info;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+
+	if ((iwdev->init_state < CCQ_CREATED) && wait)
+		wait = false;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
+	if (!cqp_request)
+		return I40IW_ERR_NO_MEMORY;
+
+	cqp_info = &cqp_request->info;
+	hw_info = &cqp_request->info.in.u.manage_vf_pble_bp.info;
+	memcpy(hw_info, info, sizeof(*hw_info));
+
+	cqp_info->cqp_cmd = OP_MANAGE_VF_PBLE_BP;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.manage_vf_pble_bp.cqp = &iwdev->cqp.sc_cqp;
+	cqp_info->in.u.manage_vf_pble_bp.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Manage VF pble_bp fail");
+	return status;
+}
+
+/**
+ * i40iw_get_ib_wc - return change flush code to IB's
+ * @opcode: iwarp flush code
+ */
+static enum ib_wc_status i40iw_get_ib_wc(enum i40iw_flush_opcode opcode)
+{
+	switch (opcode) {
+	case FLUSH_PROT_ERR:
+		return IB_WC_LOC_PROT_ERR;
+	case FLUSH_REM_ACCESS_ERR:
+		return IB_WC_REM_ACCESS_ERR;
+	case FLUSH_LOC_QP_OP_ERR:
+		return IB_WC_LOC_QP_OP_ERR;
+	case FLUSH_REM_OP_ERR:
+		return IB_WC_REM_OP_ERR;
+	case FLUSH_LOC_LEN_ERR:
+		return IB_WC_LOC_LEN_ERR;
+	case FLUSH_GENERAL_ERR:
+		return IB_WC_GENERAL_ERR;
+	case FLUSH_FATAL_ERR:
+	default:
+		return IB_WC_FATAL_ERR;
+	}
+}
+
+/**
+ * i40iw_set_flush_info - set flush info
+ * @pinfo: set flush info
+ * @min: minor err
+ * @maj: major err
+ * @opcode: flush error code
+ */
+static void i40iw_set_flush_info(struct i40iw_qp_flush_info *pinfo,
+				 u16 *min,
+				 u16 *maj,
+				 enum i40iw_flush_opcode opcode)
+{
+	*min = (u16)i40iw_get_ib_wc(opcode);
+	*maj = CQE_MAJOR_DRV;
+	pinfo->userflushcode = true;
+}
+
+/**
+ * i40iw_flush_wqes - flush wqe for qp
+ * @iwdev: iwarp device
+ * @iwqp: qp to flush wqes
+ */
+void i40iw_flush_wqes(struct i40iw_device *iwdev, struct i40iw_qp *iwqp)
+{
+	struct i40iw_qp_flush_info info;
+	struct i40iw_qp_flush_info *pinfo = &info;
+
+	struct i40iw_sc_qp *qp = &iwqp->sc_qp;
+
+	memset(pinfo, 0, sizeof(*pinfo));
+	info.sq = true;
+	info.rq = true;
+	if (qp->term_flags) {
+		i40iw_set_flush_info(pinfo, &pinfo->sq_minor_code,
+				     &pinfo->sq_major_code, qp->flush_code);
+		i40iw_set_flush_info(pinfo, &pinfo->rq_minor_code,
+				     &pinfo->rq_major_code, qp->flush_code);
+	}
+	(void)i40iw_hw_flush_wqes(iwdev, &iwqp->sc_qp, &info, true);
+}

diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
new file mode 100644
index 0000000..90e5af2
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c

@@ -0,0 +1,1910 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/if_vlan.h>
+#include <net/addrconf.h>
+
+#include "i40iw.h"
+#include "i40iw_register.h"
+#include <net/netevent.h>
+#define CLIENT_IW_INTERFACE_VERSION_MAJOR 0
+#define CLIENT_IW_INTERFACE_VERSION_MINOR 01
+#define CLIENT_IW_INTERFACE_VERSION_BUILD 00
+
+#define DRV_VERSION_MAJOR 0
+#define DRV_VERSION_MINOR 5
+#define DRV_VERSION_BUILD 123
+#define DRV_VERSION	__stringify(DRV_VERSION_MAJOR) "."		\
+	__stringify(DRV_VERSION_MINOR) "." __stringify(DRV_VERSION_BUILD)
+
+static int push_mode;
+module_param(push_mode, int, 0644);
+MODULE_PARM_DESC(push_mode, "Low latency mode: 0=disabled (default), 1=enabled)");
+
+static int debug;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "debug flags: 0=disabled (default), 0x7fffffff=all");
+
+static int resource_profile;
+module_param(resource_profile, int, 0644);
+MODULE_PARM_DESC(resource_profile,
+		 "Resource Profile: 0=no VF RDMA support (default), 1=Weighted VF, 2=Even Distribution");
+
+static int max_rdma_vfs = 32;
+module_param(max_rdma_vfs, int, 0644);
+MODULE_PARM_DESC(max_rdma_vfs, "Maximum VF count: 0-32 32=default");
+static int mpa_version = 2;
+module_param(mpa_version, int, 0644);
+MODULE_PARM_DESC(mpa_version, "MPA version to be used in MPA Req/Resp 1 or 2");
+
+MODULE_AUTHOR("Intel Corporation, <e1000-rdma@lists.sourceforge.net>");
+MODULE_DESCRIPTION("Intel(R) Ethernet Connection X722 iWARP RDMA Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+static struct i40e_client i40iw_client;
+static char i40iw_client_name[I40E_CLIENT_STR_LENGTH] = "i40iw";
+
+static LIST_HEAD(i40iw_handlers);
+static spinlock_t i40iw_handler_lock;
+
+static enum i40iw_status_code i40iw_virtchnl_send(struct i40iw_sc_dev *dev,
+						  u32 vf_id, u8 *msg, u16 len);
+
+static struct notifier_block i40iw_inetaddr_notifier = {
+	.notifier_call = i40iw_inetaddr_event
+};
+
+static struct notifier_block i40iw_inetaddr6_notifier = {
+	.notifier_call = i40iw_inet6addr_event
+};
+
+static struct notifier_block i40iw_net_notifier = {
+	.notifier_call = i40iw_net_event
+};
+
+static int i40iw_notifiers_registered;
+
+/**
+ * i40iw_find_i40e_handler - find a handler given a client info
+ * @ldev: pointer to a client info
+ */
+static struct i40iw_handler *i40iw_find_i40e_handler(struct i40e_info *ldev)
+{
+	struct i40iw_handler *hdl;
+	unsigned long flags;
+
+	spin_lock_irqsave(&i40iw_handler_lock, flags);
+	list_for_each_entry(hdl, &i40iw_handlers, list) {
+		if (hdl->ldev.netdev == ldev->netdev) {
+			spin_unlock_irqrestore(&i40iw_handler_lock, flags);
+			return hdl;
+		}
+	}
+	spin_unlock_irqrestore(&i40iw_handler_lock, flags);
+	return NULL;
+}
+
+/**
+ * i40iw_find_netdev - find a handler given a netdev
+ * @netdev: pointer to net_device
+ */
+struct i40iw_handler *i40iw_find_netdev(struct net_device *netdev)
+{
+	struct i40iw_handler *hdl;
+	unsigned long flags;
+
+	spin_lock_irqsave(&i40iw_handler_lock, flags);
+	list_for_each_entry(hdl, &i40iw_handlers, list) {
+		if (hdl->ldev.netdev == netdev) {
+			spin_unlock_irqrestore(&i40iw_handler_lock, flags);
+			return hdl;
+		}
+	}
+	spin_unlock_irqrestore(&i40iw_handler_lock, flags);
+	return NULL;
+}
+
+/**
+ * i40iw_add_handler - add a handler to the list
+ * @hdl: handler to be added to the handler list
+ */
+static void i40iw_add_handler(struct i40iw_handler *hdl)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&i40iw_handler_lock, flags);
+	list_add(&hdl->list, &i40iw_handlers);
+	spin_unlock_irqrestore(&i40iw_handler_lock, flags);
+}
+
+/**
+ * i40iw_del_handler - delete a handler from the list
+ * @hdl: handler to be deleted from the handler list
+ */
+static int i40iw_del_handler(struct i40iw_handler *hdl)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&i40iw_handler_lock, flags);
+	list_del(&hdl->list);
+	spin_unlock_irqrestore(&i40iw_handler_lock, flags);
+	return 0;
+}
+
+/**
+ * i40iw_enable_intr - set up device interrupts
+ * @dev: hardware control device structure
+ * @msix_id: id of the interrupt to be enabled
+ */
+static void i40iw_enable_intr(struct i40iw_sc_dev *dev, u32 msix_id)
+{
+	u32 val;
+
+	val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
+		I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
+		(3 << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
+	if (dev->is_pf)
+		i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_id - 1), val);
+	else
+		i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_id - 1), val);
+}
+
+/**
+ * i40iw_dpc - tasklet for aeq and ceq 0
+ * @data: iwarp device
+ */
+static void i40iw_dpc(unsigned long data)
+{
+	struct i40iw_device *iwdev = (struct i40iw_device *)data;
+
+	if (iwdev->msix_shared)
+		i40iw_process_ceq(iwdev, iwdev->ceqlist);
+	i40iw_process_aeq(iwdev);
+	i40iw_enable_intr(&iwdev->sc_dev, iwdev->iw_msixtbl[0].idx);
+}
+
+/**
+ * i40iw_ceq_dpc - dpc handler for CEQ
+ * @data: data points to CEQ
+ */
+static void i40iw_ceq_dpc(unsigned long data)
+{
+	struct i40iw_ceq *iwceq = (struct i40iw_ceq *)data;
+	struct i40iw_device *iwdev = iwceq->iwdev;
+
+	i40iw_process_ceq(iwdev, iwceq);
+	i40iw_enable_intr(&iwdev->sc_dev, iwceq->msix_idx);
+}
+
+/**
+ * i40iw_irq_handler - interrupt handler for aeq and ceq0
+ * @irq: Interrupt request number
+ * @data: iwarp device
+ */
+static irqreturn_t i40iw_irq_handler(int irq, void *data)
+{
+	struct i40iw_device *iwdev = (struct i40iw_device *)data;
+
+	tasklet_schedule(&iwdev->dpc_tasklet);
+	return IRQ_HANDLED;
+}
+
+/**
+ * i40iw_destroy_cqp  - destroy control qp
+ * @iwdev: iwarp device
+ * @create_done: 1 if cqp create poll was success
+ *
+ * Issue destroy cqp request and
+ * free the resources associated with the cqp
+ */
+static void i40iw_destroy_cqp(struct i40iw_device *iwdev, bool free_hwcqp)
+{
+	enum i40iw_status_code status = 0;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_cqp *cqp = &iwdev->cqp;
+
+	if (free_hwcqp && dev->cqp_ops->cqp_destroy)
+		status = dev->cqp_ops->cqp_destroy(dev->cqp);
+	if (status)
+		i40iw_pr_err("destroy cqp failed");
+
+	i40iw_free_dma_mem(dev->hw, &cqp->sq);
+	kfree(cqp->scratch_array);
+	iwdev->cqp.scratch_array = NULL;
+
+	kfree(cqp->cqp_requests);
+	cqp->cqp_requests = NULL;
+}
+
+/**
+ * i40iw_disable_irqs - disable device interrupts
+ * @dev: hardware control device structure
+ * @msic_vec: msix vector to disable irq
+ * @dev_id: parameter to pass to free_irq (used during irq setup)
+ *
+ * The function is called when destroying aeq/ceq
+ */
+static void i40iw_disable_irq(struct i40iw_sc_dev *dev,
+			      struct i40iw_msix_vector *msix_vec,
+			      void *dev_id)
+{
+	if (dev->is_pf)
+		i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_vec->idx - 1), 0);
+	else
+		i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_vec->idx - 1), 0);
+	synchronize_irq(msix_vec->irq);
+	free_irq(msix_vec->irq, dev_id);
+}
+
+/**
+ * i40iw_destroy_aeq - destroy aeq
+ * @iwdev: iwarp device
+ * @reset: true if called before reset
+ *
+ * Issue a destroy aeq request and
+ * free the resources associated with the aeq
+ * The function is called during driver unload
+ */
+static void i40iw_destroy_aeq(struct i40iw_device *iwdev, bool reset)
+{
+	enum i40iw_status_code status = I40IW_ERR_NOT_READY;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_aeq *aeq = &iwdev->aeq;
+
+	if (!iwdev->msix_shared)
+		i40iw_disable_irq(dev, iwdev->iw_msixtbl, (void *)iwdev);
+	if (reset)
+		goto exit;
+
+	if (!dev->aeq_ops->aeq_destroy(&aeq->sc_aeq, 0, 1))
+		status = dev->aeq_ops->aeq_destroy_done(&aeq->sc_aeq);
+	if (status)
+		i40iw_pr_err("destroy aeq failed %d\n", status);
+
+exit:
+	i40iw_free_dma_mem(dev->hw, &aeq->mem);
+}
+
+/**
+ * i40iw_destroy_ceq - destroy ceq
+ * @iwdev: iwarp device
+ * @iwceq: ceq to be destroyed
+ * @reset: true if called before reset
+ *
+ * Issue a destroy ceq request and
+ * free the resources associated with the ceq
+ */
+static void i40iw_destroy_ceq(struct i40iw_device *iwdev,
+			      struct i40iw_ceq *iwceq,
+			      bool reset)
+{
+	enum i40iw_status_code status;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+
+	if (reset)
+		goto exit;
+
+	status = dev->ceq_ops->ceq_destroy(&iwceq->sc_ceq, 0, 1);
+	if (status) {
+		i40iw_pr_err("ceq destroy command failed %d\n", status);
+		goto exit;
+	}
+
+	status = dev->ceq_ops->cceq_destroy_done(&iwceq->sc_ceq);
+	if (status)
+		i40iw_pr_err("ceq destroy completion failed %d\n", status);
+exit:
+	i40iw_free_dma_mem(dev->hw, &iwceq->mem);
+}
+
+/**
+ * i40iw_dele_ceqs - destroy all ceq's
+ * @iwdev: iwarp device
+ * @reset: true if called before reset
+ *
+ * Go through all of the device ceq's and for each ceq
+ * disable the ceq interrupt and destroy the ceq
+ */
+static void i40iw_dele_ceqs(struct i40iw_device *iwdev, bool reset)
+{
+	u32 i = 0;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_ceq *iwceq = iwdev->ceqlist;
+	struct i40iw_msix_vector *msix_vec = iwdev->iw_msixtbl;
+
+	if (iwdev->msix_shared) {
+		i40iw_disable_irq(dev, msix_vec, (void *)iwdev);
+		i40iw_destroy_ceq(iwdev, iwceq, reset);
+		iwceq++;
+		i++;
+	}
+
+	for (msix_vec++; i < iwdev->ceqs_count; i++, msix_vec++, iwceq++) {
+		i40iw_disable_irq(dev, msix_vec, (void *)iwceq);
+		i40iw_destroy_ceq(iwdev, iwceq, reset);
+	}
+}
+
+/**
+ * i40iw_destroy_ccq - destroy control cq
+ * @iwdev: iwarp device
+ * @reset: true if called before reset
+ *
+ * Issue destroy ccq request and
+ * free the resources associated with the ccq
+ */
+static void i40iw_destroy_ccq(struct i40iw_device *iwdev, bool reset)
+{
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_ccq *ccq = &iwdev->ccq;
+	enum i40iw_status_code status = 0;
+
+	if (!reset)
+		status = dev->ccq_ops->ccq_destroy(dev->ccq, 0, true);
+	if (status)
+		i40iw_pr_err("ccq destroy failed %d\n", status);
+	i40iw_free_dma_mem(dev->hw, &ccq->mem_cq);
+}
+
+/* types of hmc objects */
+static enum i40iw_hmc_rsrc_type iw_hmc_obj_types[] = {
+	I40IW_HMC_IW_QP,
+	I40IW_HMC_IW_CQ,
+	I40IW_HMC_IW_HTE,
+	I40IW_HMC_IW_ARP,
+	I40IW_HMC_IW_APBVT_ENTRY,
+	I40IW_HMC_IW_MR,
+	I40IW_HMC_IW_XF,
+	I40IW_HMC_IW_XFFL,
+	I40IW_HMC_IW_Q1,
+	I40IW_HMC_IW_Q1FL,
+	I40IW_HMC_IW_TIMER,
+};
+
+/**
+ * i40iw_close_hmc_objects_type - delete hmc objects of a given type
+ * @iwdev: iwarp device
+ * @obj_type: the hmc object type to be deleted
+ * @is_pf: true if the function is PF otherwise false
+ * @reset: true if called before reset
+ */
+static void i40iw_close_hmc_objects_type(struct i40iw_sc_dev *dev,
+					 enum i40iw_hmc_rsrc_type obj_type,
+					 struct i40iw_hmc_info *hmc_info,
+					 bool is_pf,
+					 bool reset)
+{
+	struct i40iw_hmc_del_obj_info info;
+
+	memset(&info, 0, sizeof(info));
+	info.hmc_info = hmc_info;
+	info.rsrc_type = obj_type;
+	info.count = hmc_info->hmc_obj[obj_type].cnt;
+	info.is_pf = is_pf;
+	if (dev->hmc_ops->del_hmc_object(dev, &info, reset))
+		i40iw_pr_err("del obj of type %d failed\n", obj_type);
+}
+
+/**
+ * i40iw_del_hmc_objects - remove all device hmc objects
+ * @dev: iwarp device
+ * @hmc_info: hmc_info to free
+ * @is_pf: true if hmc_info belongs to PF, not vf nor allocated
+ *	   by PF on behalf of VF
+ * @reset: true if called before reset
+ */
+static void i40iw_del_hmc_objects(struct i40iw_sc_dev *dev,
+				  struct i40iw_hmc_info *hmc_info,
+				  bool is_pf,
+				  bool reset)
+{
+	unsigned int i;
+
+	for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++)
+		i40iw_close_hmc_objects_type(dev, iw_hmc_obj_types[i], hmc_info, is_pf, reset);
+}
+
+/**
+ * i40iw_ceq_handler - interrupt handler for ceq
+ * @data: ceq pointer
+ */
+static irqreturn_t i40iw_ceq_handler(int irq, void *data)
+{
+	struct i40iw_ceq *iwceq = (struct i40iw_ceq *)data;
+
+	if (iwceq->irq != irq)
+		i40iw_pr_err("expected irq = %d received irq = %d\n", iwceq->irq, irq);
+	tasklet_schedule(&iwceq->dpc_tasklet);
+	return IRQ_HANDLED;
+}
+
+/**
+ * i40iw_create_hmc_obj_type - create hmc object of a given type
+ * @dev: hardware control device structure
+ * @info: information for the hmc object to create
+ */
+static enum i40iw_status_code i40iw_create_hmc_obj_type(struct i40iw_sc_dev *dev,
+							struct i40iw_hmc_create_obj_info *info)
+{
+	return dev->hmc_ops->create_hmc_object(dev, info);
+}
+
+/**
+ * i40iw_create_hmc_objs - create all hmc objects for the device
+ * @iwdev: iwarp device
+ * @is_pf: true if the function is PF otherwise false
+ *
+ * Create the device hmc objects and allocate hmc pages
+ * Return 0 if successful, otherwise clean up and return error
+ */
+static enum i40iw_status_code i40iw_create_hmc_objs(struct i40iw_device *iwdev,
+						    bool is_pf)
+{
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_hmc_create_obj_info info;
+	enum i40iw_status_code status;
+	int i;
+
+	memset(&info, 0, sizeof(info));
+	info.hmc_info = dev->hmc_info;
+	info.is_pf = is_pf;
+	info.entry_type = iwdev->sd_type;
+	for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) {
+		info.rsrc_type = iw_hmc_obj_types[i];
+		info.count = dev->hmc_info->hmc_obj[info.rsrc_type].cnt;
+		status = i40iw_create_hmc_obj_type(dev, &info);
+		if (status) {
+			i40iw_pr_err("create obj type %d status = %d\n",
+				     iw_hmc_obj_types[i], status);
+			break;
+		}
+	}
+	if (!status)
+		return (dev->cqp_misc_ops->static_hmc_pages_allocated(dev->cqp, 0,
+								      dev->hmc_fn_id,
+								      true, true));
+
+	while (i) {
+		i--;
+		/* destroy the hmc objects of a given type */
+		i40iw_close_hmc_objects_type(dev,
+					     iw_hmc_obj_types[i],
+					     dev->hmc_info,
+					     is_pf,
+					     false);
+	}
+	return status;
+}
+
+/**
+ * i40iw_obj_aligned_mem - get aligned memory from device allocated memory
+ * @iwdev: iwarp device
+ * @memptr: points to the memory addresses
+ * @size: size of memory needed
+ * @mask: mask for the aligned memory
+ *
+ * Get aligned memory of the requested size and
+ * update the memptr to point to the new aligned memory
+ * Return 0 if successful, otherwise return no memory error
+ */
+enum i40iw_status_code i40iw_obj_aligned_mem(struct i40iw_device *iwdev,
+					     struct i40iw_dma_mem *memptr,
+					     u32 size,
+					     u32 mask)
+{
+	unsigned long va, newva;
+	unsigned long extra;
+
+	va = (unsigned long)iwdev->obj_next.va;
+	newva = va;
+	if (mask)
+		newva = ALIGN(va, (mask + 1));
+	extra = newva - va;
+	memptr->va = (u8 *)va + extra;
+	memptr->pa = iwdev->obj_next.pa + extra;
+	memptr->size = size;
+	if ((memptr->va + size) > (iwdev->obj_mem.va + iwdev->obj_mem.size))
+		return I40IW_ERR_NO_MEMORY;
+
+	iwdev->obj_next.va = memptr->va + size;
+	iwdev->obj_next.pa = memptr->pa + size;
+	return 0;
+}
+
+/**
+ * i40iw_create_cqp - create control qp
+ * @iwdev: iwarp device
+ *
+ * Return 0, if the cqp and all the resources associated with it
+ * are successfully created, otherwise return error
+ */
+static enum i40iw_status_code i40iw_create_cqp(struct i40iw_device *iwdev)
+{
+	enum i40iw_status_code status;
+	u32 sqsize = I40IW_CQP_SW_SQSIZE_2048;
+	struct i40iw_dma_mem mem;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_cqp_init_info cqp_init_info;
+	struct i40iw_cqp *cqp = &iwdev->cqp;
+	u16 maj_err, min_err;
+	int i;
+
+	cqp->cqp_requests = kcalloc(sqsize, sizeof(*cqp->cqp_requests), GFP_KERNEL);
+	if (!cqp->cqp_requests)
+		return I40IW_ERR_NO_MEMORY;
+	cqp->scratch_array = kcalloc(sqsize, sizeof(*cqp->scratch_array), GFP_KERNEL);
+	if (!cqp->scratch_array) {
+		kfree(cqp->cqp_requests);
+		return I40IW_ERR_NO_MEMORY;
+	}
+	dev->cqp = &cqp->sc_cqp;
+	dev->cqp->dev = dev;
+	memset(&cqp_init_info, 0, sizeof(cqp_init_info));
+	status = i40iw_allocate_dma_mem(dev->hw, &cqp->sq,
+					(sizeof(struct i40iw_cqp_sq_wqe) * sqsize),
+					I40IW_CQP_ALIGNMENT);
+	if (status)
+		goto exit;
+	status = i40iw_obj_aligned_mem(iwdev, &mem, sizeof(struct i40iw_cqp_ctx),
+				       I40IW_HOST_CTX_ALIGNMENT_MASK);
+	if (status)
+		goto exit;
+	dev->cqp->host_ctx_pa = mem.pa;
+	dev->cqp->host_ctx = mem.va;
+	/* populate the cqp init info */
+	cqp_init_info.dev = dev;
+	cqp_init_info.sq_size = sqsize;
+	cqp_init_info.sq = cqp->sq.va;
+	cqp_init_info.sq_pa = cqp->sq.pa;
+	cqp_init_info.host_ctx_pa = mem.pa;
+	cqp_init_info.host_ctx = mem.va;
+	cqp_init_info.hmc_profile = iwdev->resource_profile;
+	cqp_init_info.enabled_vf_count = iwdev->max_rdma_vfs;
+	cqp_init_info.scratch_array = cqp->scratch_array;
+	status = dev->cqp_ops->cqp_init(dev->cqp, &cqp_init_info);
+	if (status) {
+		i40iw_pr_err("cqp init status %d maj_err %d min_err %d\n",
+			     status, maj_err, min_err);
+		goto exit;
+	}
+	status = dev->cqp_ops->cqp_create(dev->cqp, true, &maj_err, &min_err);
+	if (status) {
+		i40iw_pr_err("cqp create status %d maj_err %d min_err %d\n",
+			     status, maj_err, min_err);
+		goto exit;
+	}
+	spin_lock_init(&cqp->req_lock);
+	INIT_LIST_HEAD(&cqp->cqp_avail_reqs);
+	INIT_LIST_HEAD(&cqp->cqp_pending_reqs);
+	/* init the waitq of the cqp_requests and add them to the list */
+	for (i = 0; i < I40IW_CQP_SW_SQSIZE_2048; i++) {
+		init_waitqueue_head(&cqp->cqp_requests[i].waitq);
+		list_add_tail(&cqp->cqp_requests[i].list, &cqp->cqp_avail_reqs);
+	}
+	return 0;
+exit:
+	/* clean up the created resources */
+	i40iw_destroy_cqp(iwdev, false);
+	return status;
+}
+
+/**
+ * i40iw_create_ccq - create control cq
+ * @iwdev: iwarp device
+ *
+ * Return 0, if the ccq and the resources associated with it
+ * are successfully created, otherwise return error
+ */
+static enum i40iw_status_code i40iw_create_ccq(struct i40iw_device *iwdev)
+{
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_dma_mem mem;
+	enum i40iw_status_code status;
+	struct i40iw_ccq_init_info info;
+	struct i40iw_ccq *ccq = &iwdev->ccq;
+
+	memset(&info, 0, sizeof(info));
+	dev->ccq = &ccq->sc_cq;
+	dev->ccq->dev = dev;
+	info.dev = dev;
+	ccq->shadow_area.size = sizeof(struct i40iw_cq_shadow_area);
+	ccq->mem_cq.size = sizeof(struct i40iw_cqe) * IW_CCQ_SIZE;
+	status = i40iw_allocate_dma_mem(dev->hw, &ccq->mem_cq,
+					ccq->mem_cq.size, I40IW_CQ0_ALIGNMENT);
+	if (status)
+		goto exit;
+	status = i40iw_obj_aligned_mem(iwdev, &mem, ccq->shadow_area.size,
+				       I40IW_SHADOWAREA_MASK);
+	if (status)
+		goto exit;
+	ccq->sc_cq.back_cq = (void *)ccq;
+	/* populate the ccq init info */
+	info.cq_base = ccq->mem_cq.va;
+	info.cq_pa = ccq->mem_cq.pa;
+	info.num_elem = IW_CCQ_SIZE;
+	info.shadow_area = mem.va;
+	info.shadow_area_pa = mem.pa;
+	info.ceqe_mask = false;
+	info.ceq_id_valid = true;
+	info.shadow_read_threshold = 16;
+	status = dev->ccq_ops->ccq_init(dev->ccq, &info);
+	if (!status)
+		status = dev->ccq_ops->ccq_create(dev->ccq, 0, true, true);
+exit:
+	if (status)
+		i40iw_free_dma_mem(dev->hw, &ccq->mem_cq);
+	return status;
+}
+
+/**
+ * i40iw_configure_ceq_vector - set up the msix interrupt vector for ceq
+ * @iwdev: iwarp device
+ * @msix_vec: interrupt vector information
+ * @iwceq: ceq associated with the vector
+ * @ceq_id: the id number of the iwceq
+ *
+ * Allocate interrupt resources and enable irq handling
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iwdev,
+							 struct i40iw_ceq *iwceq,
+							 u32 ceq_id,
+							 struct i40iw_msix_vector *msix_vec)
+{
+	enum i40iw_status_code status;
+
+	if (iwdev->msix_shared && !ceq_id) {
+		tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
+		status = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "AEQCEQ", iwdev);
+	} else {
+		tasklet_init(&iwceq->dpc_tasklet, i40iw_ceq_dpc, (unsigned long)iwceq);
+		status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq);
+	}
+
+	if (status) {
+		i40iw_pr_err("ceq irq config fail\n");
+		return I40IW_ERR_CONFIG;
+	}
+	msix_vec->ceq_id = ceq_id;
+	msix_vec->cpu_affinity = 0;
+
+	return 0;
+}
+
+/**
+ * i40iw_create_ceq - create completion event queue
+ * @iwdev: iwarp device
+ * @iwceq: pointer to the ceq resources to be created
+ * @ceq_id: the id number of the iwceq
+ *
+ * Return 0, if the ceq and the resources associated with it
+ * are successfully created, otherwise return error
+ */
+static enum i40iw_status_code i40iw_create_ceq(struct i40iw_device *iwdev,
+					       struct i40iw_ceq *iwceq,
+					       u32 ceq_id)
+{
+	enum i40iw_status_code status;
+	struct i40iw_ceq_init_info info;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	u64 scratch;
+
+	memset(&info, 0, sizeof(info));
+	info.ceq_id = ceq_id;
+	iwceq->iwdev = iwdev;
+	iwceq->mem.size = sizeof(struct i40iw_ceqe) *
+		iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt;
+	status = i40iw_allocate_dma_mem(dev->hw, &iwceq->mem, iwceq->mem.size,
+					I40IW_CEQ_ALIGNMENT);
+	if (status)
+		goto exit;
+	info.ceq_id = ceq_id;
+	info.ceqe_base = iwceq->mem.va;
+	info.ceqe_pa = iwceq->mem.pa;
+
+	info.elem_cnt = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt;
+	iwceq->sc_ceq.ceq_id = ceq_id;
+	info.dev = dev;
+	scratch = (uintptr_t)&iwdev->cqp.sc_cqp;
+	status = dev->ceq_ops->ceq_init(&iwceq->sc_ceq, &info);
+	if (!status)
+		status = dev->ceq_ops->cceq_create(&iwceq->sc_ceq, scratch);
+
+exit:
+	if (status)
+		i40iw_free_dma_mem(dev->hw, &iwceq->mem);
+	return status;
+}
+
+void i40iw_request_reset(struct i40iw_device *iwdev)
+{
+	struct i40e_info *ldev = iwdev->ldev;
+
+	ldev->ops->request_reset(ldev, iwdev->client, 1);
+}
+
+/**
+ * i40iw_setup_ceqs - manage the device ceq's and their interrupt resources
+ * @iwdev: iwarp device
+ * @ldev: i40e lan device
+ *
+ * Allocate a list for all device completion event queues
+ * Create the ceq's and configure their msix interrupt vectors
+ * Return 0, if at least one ceq is successfully set up, otherwise return error
+ */
+static enum i40iw_status_code i40iw_setup_ceqs(struct i40iw_device *iwdev,
+					       struct i40e_info *ldev)
+{
+	u32 i;
+	u32 ceq_id;
+	struct i40iw_ceq *iwceq;
+	struct i40iw_msix_vector *msix_vec;
+	enum i40iw_status_code status = 0;
+	u32 num_ceqs;
+
+	if (ldev && ldev->ops && ldev->ops->setup_qvlist) {
+		status = ldev->ops->setup_qvlist(ldev, &i40iw_client,
+						 iwdev->iw_qvlist);
+		if (status)
+			goto exit;
+	} else {
+		status = I40IW_ERR_BAD_PTR;
+		goto exit;
+	}
+
+	num_ceqs = min(iwdev->msix_count, iwdev->sc_dev.hmc_fpm_misc.max_ceqs);
+	iwdev->ceqlist = kcalloc(num_ceqs, sizeof(*iwdev->ceqlist), GFP_KERNEL);
+	if (!iwdev->ceqlist) {
+		status = I40IW_ERR_NO_MEMORY;
+		goto exit;
+	}
+	i = (iwdev->msix_shared) ? 0 : 1;
+	for (ceq_id = 0; i < num_ceqs; i++, ceq_id++) {
+		iwceq = &iwdev->ceqlist[ceq_id];
+		status = i40iw_create_ceq(iwdev, iwceq, ceq_id);
+		if (status) {
+			i40iw_pr_err("create ceq status = %d\n", status);
+			break;
+		}
+
+		msix_vec = &iwdev->iw_msixtbl[i];
+		iwceq->irq = msix_vec->irq;
+		iwceq->msix_idx = msix_vec->idx;
+		status = i40iw_configure_ceq_vector(iwdev, iwceq, ceq_id, msix_vec);
+		if (status) {
+			i40iw_destroy_ceq(iwdev, iwceq, false);
+			break;
+		}
+		i40iw_enable_intr(&iwdev->sc_dev, msix_vec->idx);
+		iwdev->ceqs_count++;
+	}
+
+exit:
+	if (status) {
+		if (!iwdev->ceqs_count) {
+			kfree(iwdev->ceqlist);
+			iwdev->ceqlist = NULL;
+		} else {
+			status = 0;
+		}
+	}
+	return status;
+}
+
+/**
+ * i40iw_configure_aeq_vector - set up the msix vector for aeq
+ * @iwdev: iwarp device
+ *
+ * Allocate interrupt resources and enable irq handling
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_configure_aeq_vector(struct i40iw_device *iwdev)
+{
+	struct i40iw_msix_vector *msix_vec = iwdev->iw_msixtbl;
+	u32 ret = 0;
+
+	if (!iwdev->msix_shared) {
+		tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
+		ret = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "i40iw", iwdev);
+	}
+	if (ret) {
+		i40iw_pr_err("aeq irq config fail\n");
+		return I40IW_ERR_CONFIG;
+	}
+
+	return 0;
+}
+
+/**
+ * i40iw_create_aeq - create async event queue
+ * @iwdev: iwarp device
+ *
+ * Return 0, if the aeq and the resources associated with it
+ * are successfully created, otherwise return error
+ */
+static enum i40iw_status_code i40iw_create_aeq(struct i40iw_device *iwdev)
+{
+	enum i40iw_status_code status;
+	struct i40iw_aeq_init_info info;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_aeq *aeq = &iwdev->aeq;
+	u64 scratch = 0;
+	u32 aeq_size;
+
+	aeq_size = 2 * iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_QP].cnt +
+		iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt;
+	memset(&info, 0, sizeof(info));
+	aeq->mem.size = sizeof(struct i40iw_sc_aeqe) * aeq_size;
+	status = i40iw_allocate_dma_mem(dev->hw, &aeq->mem, aeq->mem.size,
+					I40IW_AEQ_ALIGNMENT);
+	if (status)
+		goto exit;
+
+	info.aeqe_base = aeq->mem.va;
+	info.aeq_elem_pa = aeq->mem.pa;
+	info.elem_cnt = aeq_size;
+	info.dev = dev;
+	status = dev->aeq_ops->aeq_init(&aeq->sc_aeq, &info);
+	if (status)
+		goto exit;
+	status = dev->aeq_ops->aeq_create(&aeq->sc_aeq, scratch, 1);
+	if (!status)
+		status = dev->aeq_ops->aeq_create_done(&aeq->sc_aeq);
+exit:
+	if (status)
+		i40iw_free_dma_mem(dev->hw, &aeq->mem);
+	return status;
+}
+
+/**
+ * i40iw_setup_aeq - set up the device aeq
+ * @iwdev: iwarp device
+ *
+ * Create the aeq and configure its msix interrupt vector
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_setup_aeq(struct i40iw_device *iwdev)
+{
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	enum i40iw_status_code status;
+
+	status = i40iw_create_aeq(iwdev);
+	if (status)
+		return status;
+
+	status = i40iw_configure_aeq_vector(iwdev);
+	if (status) {
+		i40iw_destroy_aeq(iwdev, false);
+		return status;
+	}
+
+	if (!iwdev->msix_shared)
+		i40iw_enable_intr(dev, iwdev->iw_msixtbl[0].idx);
+	return 0;
+}
+
+/**
+ * i40iw_initialize_ilq - create iwarp local queue for cm
+ * @iwdev: iwarp device
+ *
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_initialize_ilq(struct i40iw_device *iwdev)
+{
+	struct i40iw_puda_rsrc_info info;
+	enum i40iw_status_code status;
+
+	info.type = I40IW_PUDA_RSRC_TYPE_ILQ;
+	info.cq_id = 1;
+	info.qp_id = 0;
+	info.count = 1;
+	info.pd_id = 1;
+	info.sq_size = 8192;
+	info.rq_size = 8192;
+	info.buf_size = 1024;
+	info.tx_buf_cnt = 16384;
+	info.mss = iwdev->mss;
+	info.receive = i40iw_receive_ilq;
+	info.xmit_complete = i40iw_free_sqbuf;
+	status = i40iw_puda_create_rsrc(&iwdev->sc_dev, &info);
+	if (status)
+		i40iw_pr_err("ilq create fail\n");
+	return status;
+}
+
+/**
+ * i40iw_initialize_ieq - create iwarp exception queue
+ * @iwdev: iwarp device
+ *
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_initialize_ieq(struct i40iw_device *iwdev)
+{
+	struct i40iw_puda_rsrc_info info;
+	enum i40iw_status_code status;
+
+	info.type = I40IW_PUDA_RSRC_TYPE_IEQ;
+	info.cq_id = 2;
+	info.qp_id = iwdev->sc_dev.exception_lan_queue;
+	info.count = 1;
+	info.pd_id = 2;
+	info.sq_size = 8192;
+	info.rq_size = 8192;
+	info.buf_size = 2048;
+	info.mss = iwdev->mss;
+	info.tx_buf_cnt = 16384;
+	status = i40iw_puda_create_rsrc(&iwdev->sc_dev, &info);
+	if (status)
+		i40iw_pr_err("ieq create fail\n");
+	return status;
+}
+
+/**
+ * i40iw_hmc_setup - create hmc objects for the device
+ * @iwdev: iwarp device
+ *
+ * Set up the device private memory space for the number and size of
+ * the hmc objects and create the objects
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_hmc_setup(struct i40iw_device *iwdev)
+{
+	enum i40iw_status_code status;
+
+	iwdev->sd_type = I40IW_SD_TYPE_DIRECT;
+	status = i40iw_config_fpm_values(&iwdev->sc_dev, IW_CFG_FPM_QP_COUNT);
+	if (status)
+		goto exit;
+	status = i40iw_create_hmc_objs(iwdev, true);
+	if (status)
+		goto exit;
+	iwdev->init_state = HMC_OBJS_CREATED;
+exit:
+	return status;
+}
+
+/**
+ * i40iw_del_init_mem - deallocate memory resources
+ * @iwdev: iwarp device
+ */
+static void i40iw_del_init_mem(struct i40iw_device *iwdev)
+{
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+
+	i40iw_free_dma_mem(&iwdev->hw, &iwdev->obj_mem);
+	kfree(dev->hmc_info->sd_table.sd_entry);
+	dev->hmc_info->sd_table.sd_entry = NULL;
+	kfree(iwdev->mem_resources);
+	iwdev->mem_resources = NULL;
+	kfree(iwdev->ceqlist);
+	iwdev->ceqlist = NULL;
+	kfree(iwdev->iw_msixtbl);
+	iwdev->iw_msixtbl = NULL;
+	kfree(iwdev->hmc_info_mem);
+	iwdev->hmc_info_mem = NULL;
+}
+
+/**
+ * i40iw_del_macip_entry - remove a mac ip address entry from the hw table
+ * @iwdev: iwarp device
+ * @idx: the index of the mac ip address to delete
+ */
+static void i40iw_del_macip_entry(struct i40iw_device *iwdev, u8 idx)
+{
+	struct i40iw_cqp *iwcqp = &iwdev->cqp;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	enum i40iw_status_code status = 0;
+
+	cqp_request = i40iw_get_cqp_request(iwcqp, true);
+	if (!cqp_request) {
+		i40iw_pr_err("cqp_request memory failed\n");
+		return;
+	}
+	cqp_info = &cqp_request->info;
+	cqp_info->cqp_cmd = OP_DELETE_LOCAL_MAC_IPADDR_ENTRY;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.del_local_mac_ipaddr_entry.cqp = &iwcqp->sc_cqp;
+	cqp_info->in.u.del_local_mac_ipaddr_entry.scratch = (uintptr_t)cqp_request;
+	cqp_info->in.u.del_local_mac_ipaddr_entry.entry_idx = idx;
+	cqp_info->in.u.del_local_mac_ipaddr_entry.ignore_ref_count = 0;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Del MAC Ip entry fail");
+}
+
+/**
+ * i40iw_add_mac_ipaddr_entry - add a mac ip address entry to the hw table
+ * @iwdev: iwarp device
+ * @mac_addr: pointer to mac address
+ * @idx: the index of the mac ip address to add
+ */
+static enum i40iw_status_code i40iw_add_mac_ipaddr_entry(struct i40iw_device *iwdev,
+							 u8 *mac_addr,
+							 u8 idx)
+{
+	struct i40iw_local_mac_ipaddr_entry_info *info;
+	struct i40iw_cqp *iwcqp = &iwdev->cqp;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	enum i40iw_status_code status = 0;
+
+	cqp_request = i40iw_get_cqp_request(iwcqp, true);
+	if (!cqp_request) {
+		i40iw_pr_err("cqp_request memory failed\n");
+		return I40IW_ERR_NO_MEMORY;
+	}
+
+	cqp_info = &cqp_request->info;
+
+	cqp_info->post_sq = 1;
+	info = &cqp_info->in.u.add_local_mac_ipaddr_entry.info;
+	ether_addr_copy(info->mac_addr, mac_addr);
+	info->entry_idx = idx;
+	cqp_info->in.u.add_local_mac_ipaddr_entry.scratch = (uintptr_t)cqp_request;
+	cqp_info->cqp_cmd = OP_ADD_LOCAL_MAC_IPADDR_ENTRY;
+	cqp_info->in.u.add_local_mac_ipaddr_entry.cqp = &iwcqp->sc_cqp;
+	cqp_info->in.u.add_local_mac_ipaddr_entry.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Add MAC Ip entry fail");
+	return status;
+}
+
+/**
+ * i40iw_alloc_local_mac_ipaddr_entry - allocate a mac ip address entry
+ * @iwdev: iwarp device
+ * @mac_ip_tbl_idx: the index of the new mac ip address
+ *
+ * Allocate a mac ip address entry and update the mac_ip_tbl_idx
+ * to hold the index of the newly created mac ip address
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_alloc_local_mac_ipaddr_entry(struct i40iw_device *iwdev,
+								 u16 *mac_ip_tbl_idx)
+{
+	struct i40iw_cqp *iwcqp = &iwdev->cqp;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	enum i40iw_status_code status = 0;
+
+	cqp_request = i40iw_get_cqp_request(iwcqp, true);
+	if (!cqp_request) {
+		i40iw_pr_err("cqp_request memory failed\n");
+		return I40IW_ERR_NO_MEMORY;
+	}
+
+	/* increment refcount, because we need the cqp request ret value */
+	atomic_inc(&cqp_request->refcount);
+
+	cqp_info = &cqp_request->info;
+	cqp_info->cqp_cmd = OP_ALLOC_LOCAL_MAC_IPADDR_ENTRY;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.alloc_local_mac_ipaddr_entry.cqp = &iwcqp->sc_cqp;
+	cqp_info->in.u.alloc_local_mac_ipaddr_entry.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (!status)
+		*mac_ip_tbl_idx = cqp_request->compl_info.op_ret_val;
+	else
+		i40iw_pr_err("CQP-OP Alloc MAC Ip entry fail");
+	/* decrement refcount and free the cqp request, if no longer used */
+	i40iw_put_cqp_request(iwcqp, cqp_request);
+	return status;
+}
+
+/**
+ * i40iw_alloc_set_mac_ipaddr - set up a mac ip address table entry
+ * @iwdev: iwarp device
+ * @macaddr: pointer to mac address
+ *
+ * Allocate a mac ip address entry and add it to the hw table
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_alloc_set_mac_ipaddr(struct i40iw_device *iwdev,
+							 u8 *macaddr)
+{
+	enum i40iw_status_code status;
+
+	status = i40iw_alloc_local_mac_ipaddr_entry(iwdev, &iwdev->mac_ip_table_idx);
+	if (!status) {
+		status = i40iw_add_mac_ipaddr_entry(iwdev, macaddr,
+						    (u8)iwdev->mac_ip_table_idx);
+		if (!status)
+			status = i40iw_add_mac_ipaddr_entry(iwdev, macaddr,
+							    (u8)iwdev->mac_ip_table_idx);
+		else
+			i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
+	}
+	return status;
+}
+
+/**
+ * i40iw_add_ipv6_addr - add ipv6 address to the hw arp table
+ * @iwdev: iwarp device
+ */
+static void i40iw_add_ipv6_addr(struct i40iw_device *iwdev)
+{
+	struct net_device *ip_dev;
+	struct inet6_dev *idev;
+	struct inet6_ifaddr *ifp;
+	__be32 local_ipaddr6[4];
+
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, ip_dev) {
+		if ((((rdma_vlan_dev_vlan_id(ip_dev) < 0xFFFF) &&
+		      (rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) ||
+		     (ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) {
+			idev = __in6_dev_get(ip_dev);
+			if (!idev) {
+				i40iw_pr_err("ipv6 inet device not found\n");
+				break;
+			}
+			list_for_each_entry(ifp, &idev->addr_list, if_list) {
+				i40iw_pr_info("IP=%pI6, vlan_id=%d, MAC=%pM\n", &ifp->addr,
+					      rdma_vlan_dev_vlan_id(ip_dev), ip_dev->dev_addr);
+				i40iw_copy_ip_ntohl(local_ipaddr6,
+						    ifp->addr.in6_u.u6_addr32);
+				i40iw_manage_arp_cache(iwdev,
+						       ip_dev->dev_addr,
+						       local_ipaddr6,
+						       false,
+						       I40IW_ARP_ADD);
+			}
+		}
+	}
+	rcu_read_unlock();
+}
+
+/**
+ * i40iw_add_ipv4_addr - add ipv4 address to the hw arp table
+ * @iwdev: iwarp device
+ */
+static void i40iw_add_ipv4_addr(struct i40iw_device *iwdev)
+{
+	struct net_device *dev;
+	struct in_device *idev;
+	bool got_lock = true;
+	u32 ip_addr;
+
+	if (!rtnl_trylock())
+		got_lock = false;
+
+	for_each_netdev(&init_net, dev) {
+		if ((((rdma_vlan_dev_vlan_id(dev) < 0xFFFF) &&
+		      (rdma_vlan_dev_real_dev(dev) == iwdev->netdev)) ||
+		    (dev == iwdev->netdev)) && (dev->flags & IFF_UP)) {
+			idev = in_dev_get(dev);
+			for_ifa(idev) {
+				i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM,
+					    "IP=%pI4, vlan_id=%d, MAC=%pM\n", &ifa->ifa_address,
+					     rdma_vlan_dev_vlan_id(dev), dev->dev_addr);
+
+				ip_addr = ntohl(ifa->ifa_address);
+				i40iw_manage_arp_cache(iwdev,
+						       dev->dev_addr,
+						       &ip_addr,
+						       true,
+						       I40IW_ARP_ADD);
+			}
+			endfor_ifa(idev);
+			in_dev_put(idev);
+		}
+	}
+	if (got_lock)
+		rtnl_unlock();
+}
+
+/**
+ * i40iw_add_mac_ip - add mac and ip addresses
+ * @iwdev: iwarp device
+ *
+ * Create and add a mac ip address entry to the hw table and
+ * ipv4/ipv6 addresses to the arp cache
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_add_mac_ip(struct i40iw_device *iwdev)
+{
+	struct net_device *netdev = iwdev->netdev;
+	enum i40iw_status_code status;
+
+	status = i40iw_alloc_set_mac_ipaddr(iwdev, (u8 *)netdev->dev_addr);
+	if (status)
+		return status;
+	i40iw_add_ipv4_addr(iwdev);
+	i40iw_add_ipv6_addr(iwdev);
+	return 0;
+}
+
+/**
+ * i40iw_wait_pe_ready - Check if firmware is ready
+ * @hw: provides access to registers
+ */
+static void i40iw_wait_pe_ready(struct i40iw_hw *hw)
+{
+	u32 statusfw;
+	u32 statuscpu0;
+	u32 statuscpu1;
+	u32 statuscpu2;
+	u32 retrycount = 0;
+
+	do {
+		statusfw = i40iw_rd32(hw, I40E_GLPE_FWLDSTATUS);
+		i40iw_pr_info("[%04d] fm load status[x%04X]\n", __LINE__, statusfw);
+		statuscpu0 = i40iw_rd32(hw, I40E_GLPE_CPUSTATUS0);
+		i40iw_pr_info("[%04d] CSR_CQP status[x%04X]\n", __LINE__, statuscpu0);
+		statuscpu1 = i40iw_rd32(hw, I40E_GLPE_CPUSTATUS1);
+		i40iw_pr_info("[%04d] I40E_GLPE_CPUSTATUS1 status[x%04X]\n",
+			      __LINE__, statuscpu1);
+		statuscpu2 = i40iw_rd32(hw, I40E_GLPE_CPUSTATUS2);
+		i40iw_pr_info("[%04d] I40E_GLPE_CPUSTATUS2 status[x%04X]\n",
+			      __LINE__, statuscpu2);
+		if ((statuscpu0 == 0x80) && (statuscpu1 == 0x80) && (statuscpu2 == 0x80))
+			break;	/* SUCCESS */
+		mdelay(1000);
+		retrycount++;
+	} while (retrycount < 14);
+	i40iw_wr32(hw, 0xb4040, 0x4C104C5);
+}
+
+/**
+ * i40iw_initialize_dev - initialize device
+ * @iwdev: iwarp device
+ * @ldev: lan device information
+ *
+ * Allocate memory for the hmc objects and initialize iwdev
+ * Return 0 if successful, otherwise clean up the resources
+ * and return error
+ */
+static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev,
+						   struct i40e_info *ldev)
+{
+	enum i40iw_status_code status;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_device_init_info info;
+	struct i40iw_dma_mem mem;
+	u32 size;
+
+	memset(&info, 0, sizeof(info));
+	size = sizeof(struct i40iw_hmc_pble_rsrc) + sizeof(struct i40iw_hmc_info) +
+				(sizeof(struct i40iw_hmc_obj_info) * I40IW_HMC_IW_MAX);
+	iwdev->hmc_info_mem = kzalloc(size, GFP_KERNEL);
+	if (!iwdev->hmc_info_mem) {
+		i40iw_pr_err("memory alloc fail\n");
+		return I40IW_ERR_NO_MEMORY;
+	}
+	iwdev->pble_rsrc = (struct i40iw_hmc_pble_rsrc *)iwdev->hmc_info_mem;
+	dev->hmc_info = &iwdev->hw.hmc;
+	dev->hmc_info->hmc_obj = (struct i40iw_hmc_obj_info *)(iwdev->pble_rsrc + 1);
+	status = i40iw_obj_aligned_mem(iwdev, &mem, I40IW_QUERY_FPM_BUF_SIZE,
+				       I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK);
+	if (status)
+		goto exit;
+	info.fpm_query_buf_pa = mem.pa;
+	info.fpm_query_buf = mem.va;
+	status = i40iw_obj_aligned_mem(iwdev, &mem, I40IW_COMMIT_FPM_BUF_SIZE,
+				       I40IW_FPM_COMMIT_BUF_ALIGNMENT_MASK);
+	if (status)
+		goto exit;
+	info.fpm_commit_buf_pa = mem.pa;
+	info.fpm_commit_buf = mem.va;
+	info.hmc_fn_id = ldev->fid;
+	info.is_pf = (ldev->ftype) ? false : true;
+	info.bar0 = ldev->hw_addr;
+	info.hw = &iwdev->hw;
+	info.debug_mask = debug;
+	info.qs_handle = ldev->params.qos.prio_qos[0].qs_handle;
+	info.exception_lan_queue = 1;
+	info.vchnl_send = i40iw_virtchnl_send;
+	status = i40iw_device_init(&iwdev->sc_dev, &info);
+exit:
+	if (status) {
+		kfree(iwdev->hmc_info_mem);
+		iwdev->hmc_info_mem = NULL;
+	}
+	return status;
+}
+
+/**
+ * i40iw_register_notifiers - register tcp ip notifiers
+ */
+static void i40iw_register_notifiers(void)
+{
+	if (!i40iw_notifiers_registered) {
+		register_inetaddr_notifier(&i40iw_inetaddr_notifier);
+		register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
+		register_netevent_notifier(&i40iw_net_notifier);
+	}
+	i40iw_notifiers_registered++;
+}
+
+/**
+ * i40iw_save_msix_info - copy msix vector information to iwarp device
+ * @iwdev: iwarp device
+ * @ldev: lan device information
+ *
+ * Allocate iwdev msix table and copy the ldev msix info to the table
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_save_msix_info(struct i40iw_device *iwdev,
+						   struct i40e_info *ldev)
+{
+	struct i40e_qvlist_info *iw_qvlist;
+	struct i40e_qv_info *iw_qvinfo;
+	u32 ceq_idx;
+	u32 i;
+	u32 size;
+
+	iwdev->msix_count = ldev->msix_count;
+
+	size = sizeof(struct i40iw_msix_vector) * iwdev->msix_count;
+	size += sizeof(struct i40e_qvlist_info);
+	size +=  sizeof(struct i40e_qv_info) * iwdev->msix_count - 1;
+	iwdev->iw_msixtbl = kzalloc(size, GFP_KERNEL);
+
+	if (!iwdev->iw_msixtbl)
+		return I40IW_ERR_NO_MEMORY;
+	iwdev->iw_qvlist = (struct i40e_qvlist_info *)(&iwdev->iw_msixtbl[iwdev->msix_count]);
+	iw_qvlist = iwdev->iw_qvlist;
+	iw_qvinfo = iw_qvlist->qv_info;
+	iw_qvlist->num_vectors = iwdev->msix_count;
+	if (iwdev->msix_count <= num_online_cpus())
+		iwdev->msix_shared = true;
+	for (i = 0, ceq_idx = 0; i < iwdev->msix_count; i++, iw_qvinfo++) {
+		iwdev->iw_msixtbl[i].idx = ldev->msix_entries[i].entry;
+		iwdev->iw_msixtbl[i].irq = ldev->msix_entries[i].vector;
+		if (i == 0) {
+			iw_qvinfo->aeq_idx = 0;
+			if (iwdev->msix_shared)
+				iw_qvinfo->ceq_idx = ceq_idx++;
+			else
+				iw_qvinfo->ceq_idx = I40E_QUEUE_INVALID_IDX;
+		} else {
+			iw_qvinfo->aeq_idx = I40E_QUEUE_INVALID_IDX;
+			iw_qvinfo->ceq_idx = ceq_idx++;
+		}
+		iw_qvinfo->itr_idx = 3;
+		iw_qvinfo->v_idx = iwdev->iw_msixtbl[i].idx;
+	}
+	return 0;
+}
+
+/**
+ * i40iw_deinit_device - clean up the device resources
+ * @iwdev: iwarp device
+ * @reset: true if called before reset
+ * @del_hdl: true if delete hdl entry
+ *
+ * Destroy the ib device interface, remove the mac ip entry and ipv4/ipv6 addresses,
+ * destroy the device queues and free the pble and the hmc objects
+ */
+static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del_hdl)
+{
+	struct i40e_info *ldev = iwdev->ldev;
+
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+
+	i40iw_pr_info("state = %d\n", iwdev->init_state);
+
+	switch (iwdev->init_state) {
+	case RDMA_DEV_REGISTERED:
+		iwdev->iw_status = 0;
+		i40iw_port_ibevent(iwdev);
+		i40iw_destroy_rdma_device(iwdev->iwibdev);
+		/* fallthrough */
+	case IP_ADDR_REGISTERED:
+		if (!reset)
+			i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
+		/* fallthrough */
+	case INET_NOTIFIER:
+		if (i40iw_notifiers_registered > 0) {
+			i40iw_notifiers_registered--;
+			unregister_netevent_notifier(&i40iw_net_notifier);
+			unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
+			unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
+		}
+		/* fallthrough */
+	case CEQ_CREATED:
+		i40iw_dele_ceqs(iwdev, reset);
+		/* fallthrough */
+	case AEQ_CREATED:
+		i40iw_destroy_aeq(iwdev, reset);
+		/* fallthrough */
+	case IEQ_CREATED:
+		i40iw_puda_dele_resources(dev, I40IW_PUDA_RSRC_TYPE_IEQ, reset);
+		/* fallthrough */
+	case ILQ_CREATED:
+		i40iw_puda_dele_resources(dev, I40IW_PUDA_RSRC_TYPE_ILQ, reset);
+		/* fallthrough */
+	case CCQ_CREATED:
+		i40iw_destroy_ccq(iwdev, reset);
+		/* fallthrough */
+	case PBLE_CHUNK_MEM:
+		i40iw_destroy_pble_pool(dev, iwdev->pble_rsrc);
+		/* fallthrough */
+	case HMC_OBJS_CREATED:
+		i40iw_del_hmc_objects(dev, dev->hmc_info, true, reset);
+		/* fallthrough */
+	case CQP_CREATED:
+		i40iw_destroy_cqp(iwdev, !reset);
+		/* fallthrough */
+	case INITIAL_STATE:
+		i40iw_cleanup_cm_core(&iwdev->cm_core);
+		if (dev->is_pf)
+			i40iw_hw_stats_del_timer(dev);
+
+		i40iw_del_init_mem(iwdev);
+		break;
+	case INVALID_STATE:
+		/* fallthrough */
+	default:
+		i40iw_pr_err("bad init_state = %d\n", iwdev->init_state);
+		break;
+	}
+
+	if (del_hdl)
+		i40iw_del_handler(i40iw_find_i40e_handler(ldev));
+	kfree(iwdev->hdl);
+}
+
+/**
+ * i40iw_setup_init_state - set up the initial device struct
+ * @hdl: handler for iwarp device - one per instance
+ * @ldev: lan device information
+ * @client: iwarp client information, provided during registration
+ *
+ * Initialize the iwarp device and its hdl information
+ * using the ldev and client information
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl,
+						     struct i40e_info *ldev,
+						     struct i40e_client *client)
+{
+	struct i40iw_device *iwdev = &hdl->device;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	enum i40iw_status_code status;
+
+	memcpy(&hdl->ldev, ldev, sizeof(*ldev));
+	if (resource_profile == 1)
+		resource_profile = 2;
+
+	iwdev->mpa_version = mpa_version;
+	iwdev->resource_profile = (resource_profile < I40IW_HMC_PROFILE_EQUAL) ?
+	    (u8)resource_profile + I40IW_HMC_PROFILE_DEFAULT :
+	    I40IW_HMC_PROFILE_DEFAULT;
+	iwdev->max_rdma_vfs =
+		(iwdev->resource_profile != I40IW_HMC_PROFILE_DEFAULT) ?  max_rdma_vfs : 0;
+	iwdev->netdev = ldev->netdev;
+	hdl->client = client;
+	iwdev->mss = (!ldev->params.mtu) ? I40IW_DEFAULT_MSS : ldev->params.mtu - I40IW_MTU_TO_MSS;
+	if (!ldev->ftype)
+		iwdev->db_start = pci_resource_start(ldev->pcidev, 0) + I40IW_DB_ADDR_OFFSET;
+	else
+		iwdev->db_start = pci_resource_start(ldev->pcidev, 0) + I40IW_VF_DB_ADDR_OFFSET;
+
+	status = i40iw_save_msix_info(iwdev, ldev);
+	if (status)
+		goto exit;
+	iwdev->hw.dev_context = (void *)ldev->pcidev;
+	iwdev->hw.hw_addr = ldev->hw_addr;
+	status = i40iw_allocate_dma_mem(&iwdev->hw,
+					&iwdev->obj_mem, 8192, 4096);
+	if (status)
+		goto exit;
+	iwdev->obj_next = iwdev->obj_mem;
+	iwdev->push_mode = push_mode;
+	init_waitqueue_head(&iwdev->vchnl_waitq);
+	status = i40iw_initialize_dev(iwdev, ldev);
+exit:
+	if (status) {
+		kfree(iwdev->iw_msixtbl);
+		i40iw_free_dma_mem(dev->hw, &iwdev->obj_mem);
+		iwdev->iw_msixtbl = NULL;
+	}
+	return status;
+}
+
+/**
+ * i40iw_open - client interface operation open for iwarp/uda device
+ * @ldev: lan device information
+ * @client: iwarp client information, provided during registration
+ *
+ * Called by the lan driver during the processing of client register
+ * Create device resources, set up queues, pble and hmc objects and
+ * register the device with the ib verbs interface
+ * Return 0 if successful, otherwise return error
+ */
+static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client)
+{
+	struct i40iw_device *iwdev;
+	struct i40iw_sc_dev *dev;
+	enum i40iw_status_code status;
+	struct i40iw_handler *hdl;
+
+	hdl = kzalloc(sizeof(*hdl), GFP_KERNEL);
+	if (!hdl)
+		return -ENOMEM;
+	iwdev = &hdl->device;
+	iwdev->hdl = hdl;
+	dev = &iwdev->sc_dev;
+	i40iw_setup_cm_core(iwdev);
+
+	dev->back_dev = (void *)iwdev;
+	iwdev->ldev = &hdl->ldev;
+	iwdev->client = client;
+	mutex_init(&iwdev->pbl_mutex);
+	i40iw_add_handler(hdl);
+
+	do {
+		status = i40iw_setup_init_state(hdl, ldev, client);
+		if (status)
+			break;
+		iwdev->init_state = INITIAL_STATE;
+		if (dev->is_pf)
+			i40iw_wait_pe_ready(dev->hw);
+		status = i40iw_create_cqp(iwdev);
+		if (status)
+			break;
+		iwdev->init_state = CQP_CREATED;
+		status = i40iw_hmc_setup(iwdev);
+		if (status)
+			break;
+		status = i40iw_create_ccq(iwdev);
+		if (status)
+			break;
+		iwdev->init_state = CCQ_CREATED;
+		status = i40iw_initialize_ilq(iwdev);
+		if (status)
+			break;
+		iwdev->init_state = ILQ_CREATED;
+		status = i40iw_initialize_ieq(iwdev);
+		if (status)
+			break;
+		iwdev->init_state = IEQ_CREATED;
+		status = i40iw_setup_aeq(iwdev);
+		if (status)
+			break;
+		iwdev->init_state = AEQ_CREATED;
+		status = i40iw_setup_ceqs(iwdev, ldev);
+		if (status)
+			break;
+		iwdev->init_state = CEQ_CREATED;
+		status = i40iw_initialize_hw_resources(iwdev);
+		if (status)
+			break;
+		dev->ccq_ops->ccq_arm(dev->ccq);
+		status = i40iw_hmc_init_pble(&iwdev->sc_dev, iwdev->pble_rsrc);
+		if (status)
+			break;
+		iwdev->virtchnl_wq = create_singlethread_workqueue("iwvch");
+		i40iw_register_notifiers();
+		iwdev->init_state = INET_NOTIFIER;
+		status = i40iw_add_mac_ip(iwdev);
+		if (status)
+			break;
+		iwdev->init_state = IP_ADDR_REGISTERED;
+		if (i40iw_register_rdma_device(iwdev)) {
+			i40iw_pr_err("register rdma device fail\n");
+			break;
+		};
+
+		iwdev->init_state = RDMA_DEV_REGISTERED;
+		iwdev->iw_status = 1;
+		i40iw_port_ibevent(iwdev);
+		i40iw_pr_info("i40iw_open completed\n");
+		return 0;
+	} while (0);
+
+	i40iw_pr_err("status = %d last completion = %d\n", status, iwdev->init_state);
+	i40iw_deinit_device(iwdev, false, false);
+	return -ERESTART;
+}
+
+/**
+ * i40iw_l2param_change : handle qs handles for qos and mss change
+ * @ldev: lan device information
+ * @client: client for paramater change
+ * @params: new parameters from L2
+ */
+static void i40iw_l2param_change(struct i40e_info *ldev,
+				 struct i40e_client *client,
+				 struct i40e_params *params)
+{
+	struct i40iw_handler *hdl;
+	struct i40iw_device *iwdev;
+
+	hdl = i40iw_find_i40e_handler(ldev);
+	if (!hdl)
+		return;
+
+	iwdev = &hdl->device;
+	if (params->mtu)
+		iwdev->mss = params->mtu - I40IW_MTU_TO_MSS;
+}
+
+/**
+ * i40iw_close - client interface operation close for iwarp/uda device
+ * @ldev: lan device information
+ * @client: client to close
+ *
+ * Called by the lan driver during the processing of client unregister
+ * Destroy and clean up the driver resources
+ */
+static void i40iw_close(struct i40e_info *ldev, struct i40e_client *client, bool reset)
+{
+	struct i40iw_device *iwdev;
+	struct i40iw_handler *hdl;
+
+	hdl = i40iw_find_i40e_handler(ldev);
+	if (!hdl)
+		return;
+
+	iwdev = &hdl->device;
+	destroy_workqueue(iwdev->virtchnl_wq);
+	i40iw_deinit_device(iwdev, reset, true);
+}
+
+/**
+ * i40iw_vf_reset - process VF reset
+ * @ldev: lan device information
+ * @client: client interface instance
+ * @vf_id: virtual function id
+ *
+ * Called when a VF is reset by the PF
+ * Destroy and clean up the VF resources
+ */
+static void i40iw_vf_reset(struct i40e_info *ldev, struct i40e_client *client, u32 vf_id)
+{
+	struct i40iw_handler *hdl;
+	struct i40iw_sc_dev *dev;
+	struct i40iw_hmc_fcn_info hmc_fcn_info;
+	struct i40iw_virt_mem vf_dev_mem;
+	struct i40iw_vfdev *tmp_vfdev;
+	unsigned int i;
+	unsigned long flags;
+
+	hdl = i40iw_find_i40e_handler(ldev);
+	if (!hdl)
+		return;
+
+	dev = &hdl->device.sc_dev;
+
+	for (i = 0; i < I40IW_MAX_PE_ENABLED_VF_COUNT; i++) {
+		if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id != vf_id))
+			continue;
+
+		/* free all resources allocated on behalf of vf */
+		tmp_vfdev = dev->vf_dev[i];
+		spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags);
+		dev->vf_dev[i] = NULL;
+		spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags);
+		i40iw_del_hmc_objects(dev, &tmp_vfdev->hmc_info, false, false);
+		/* remove vf hmc function */
+		memset(&hmc_fcn_info, 0, sizeof(hmc_fcn_info));
+		hmc_fcn_info.vf_id = vf_id;
+		hmc_fcn_info.iw_vf_idx = tmp_vfdev->iw_vf_idx;
+		hmc_fcn_info.free_fcn = true;
+		i40iw_cqp_manage_hmc_fcn_cmd(dev, &hmc_fcn_info);
+		/* free vf_dev */
+		vf_dev_mem.va = tmp_vfdev;
+		vf_dev_mem.size = sizeof(struct i40iw_vfdev) +
+					sizeof(struct i40iw_hmc_obj_info) * I40IW_HMC_IW_MAX;
+		i40iw_free_virt_mem(dev->hw, &vf_dev_mem);
+		break;
+	}
+}
+
+/**
+ * i40iw_vf_enable - enable a number of VFs
+ * @ldev: lan device information
+ * @client: client interface instance
+ * @num_vfs: number of VFs for the PF
+ *
+ * Called when the number of VFs changes
+ */
+static void i40iw_vf_enable(struct i40e_info *ldev,
+			    struct i40e_client *client,
+			    u32 num_vfs)
+{
+	struct i40iw_handler *hdl;
+
+	hdl = i40iw_find_i40e_handler(ldev);
+	if (!hdl)
+		return;
+
+	if (num_vfs > I40IW_MAX_PE_ENABLED_VF_COUNT)
+		hdl->device.max_enabled_vfs = I40IW_MAX_PE_ENABLED_VF_COUNT;
+	else
+		hdl->device.max_enabled_vfs = num_vfs;
+}
+
+/**
+ * i40iw_vf_capable - check if VF capable
+ * @ldev: lan device information
+ * @client: client interface instance
+ * @vf_id: virtual function id
+ *
+ * Return 1 if a VF slot is available or if VF is already RDMA enabled
+ * Return 0 otherwise
+ */
+static int i40iw_vf_capable(struct i40e_info *ldev,
+			    struct i40e_client *client,
+			    u32 vf_id)
+{
+	struct i40iw_handler *hdl;
+	struct i40iw_sc_dev *dev;
+	unsigned int i;
+
+	hdl = i40iw_find_i40e_handler(ldev);
+	if (!hdl)
+		return 0;
+
+	dev = &hdl->device.sc_dev;
+
+	for (i = 0; i < hdl->device.max_enabled_vfs; i++) {
+		if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id == vf_id))
+			return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * i40iw_virtchnl_receive - receive a message through the virtual channel
+ * @ldev: lan device information
+ * @client: client interface instance
+ * @vf_id: virtual function id associated with the message
+ * @msg: message buffer pointer
+ * @len: length of the message
+ *
+ * Invoke virtual channel receive operation for the given msg
+ * Return 0 if successful, otherwise return error
+ */
+static int i40iw_virtchnl_receive(struct i40e_info *ldev,
+				  struct i40e_client *client,
+				  u32 vf_id,
+				  u8 *msg,
+				  u16 len)
+{
+	struct i40iw_handler *hdl;
+	struct i40iw_sc_dev *dev;
+	struct i40iw_device *iwdev;
+	int ret_code = I40IW_NOT_SUPPORTED;
+
+	if (!len || !msg)
+		return I40IW_ERR_PARAM;
+
+	hdl = i40iw_find_i40e_handler(ldev);
+	if (!hdl)
+		return I40IW_ERR_PARAM;
+
+	dev = &hdl->device.sc_dev;
+	iwdev = dev->back_dev;
+
+	i40iw_debug(dev, I40IW_DEBUG_VIRT, "msg %p, message length %u\n", msg, len);
+
+	if (dev->vchnl_if.vchnl_recv) {
+		ret_code = dev->vchnl_if.vchnl_recv(dev, vf_id, msg, len);
+		if (!dev->is_pf) {
+			atomic_dec(&iwdev->vchnl_msgs);
+			wake_up(&iwdev->vchnl_waitq);
+		}
+	}
+	return ret_code;
+}
+
+/**
+ * i40iw_virtchnl_send - send a message through the virtual channel
+ * @dev: iwarp device
+ * @vf_id: virtual function id associated with the message
+ * @msg: virtual channel message buffer pointer
+ * @len: length of the message
+ *
+ * Invoke virtual channel send operation for the given msg
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_virtchnl_send(struct i40iw_sc_dev *dev,
+						  u32 vf_id,
+						  u8 *msg,
+						  u16 len)
+{
+	struct i40iw_device *iwdev;
+	struct i40e_info *ldev;
+	enum i40iw_status_code ret_code = I40IW_ERR_BAD_PTR;
+
+	if (!dev || !dev->back_dev)
+		return ret_code;
+
+	iwdev = dev->back_dev;
+	ldev = iwdev->ldev;
+
+	if (ldev && ldev->ops && ldev->ops->virtchnl_send)
+		ret_code = ldev->ops->virtchnl_send(ldev, &i40iw_client, vf_id, msg, len);
+
+	return ret_code;
+}
+
+/* client interface functions */
+static struct i40e_client_ops i40e_ops = {
+	.open = i40iw_open,
+	.close = i40iw_close,
+	.l2_param_change = i40iw_l2param_change,
+	.virtchnl_receive = i40iw_virtchnl_receive,
+	.vf_reset = i40iw_vf_reset,
+	.vf_enable = i40iw_vf_enable,
+	.vf_capable = i40iw_vf_capable
+};
+
+/**
+ * i40iw_init_module - driver initialization function
+ *
+ * First function to call when the driver is loaded
+ * Register the driver as i40e client and port mapper client
+ */
+static int __init i40iw_init_module(void)
+{
+	int ret;
+
+	memset(&i40iw_client, 0, sizeof(i40iw_client));
+	i40iw_client.version.major = CLIENT_IW_INTERFACE_VERSION_MAJOR;
+	i40iw_client.version.minor = CLIENT_IW_INTERFACE_VERSION_MINOR;
+	i40iw_client.version.build = CLIENT_IW_INTERFACE_VERSION_BUILD;
+	i40iw_client.ops = &i40e_ops;
+	memcpy(i40iw_client.name, i40iw_client_name, I40E_CLIENT_STR_LENGTH);
+	i40iw_client.type = I40E_CLIENT_IWARP;
+	spin_lock_init(&i40iw_handler_lock);
+	ret = i40e_register_client(&i40iw_client);
+	return ret;
+}
+
+/**
+ * i40iw_exit_module - driver exit clean up function
+ *
+ * The function is called just before the driver is unloaded
+ * Unregister the driver as i40e client and port mapper client
+ */
+static void __exit i40iw_exit_module(void)
+{
+	i40e_unregister_client(&i40iw_client);
+}
+
+module_init(i40iw_init_module);
+module_exit(i40iw_exit_module);

diff --git a/drivers/infiniband/hw/i40iw/i40iw_osdep.h b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
new file mode 100644
index 0000000..7e20493
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_osdep.h

@@ -0,0 +1,215 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_OSDEP_H
+#define I40IW_OSDEP_H
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/bitops.h>
+#include <net/tcp.h>
+#include <crypto/hash.h>
+/* get readq/writeq support for 32 bit kernels, use the low-first version */
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+#define STATS_TIMER_DELAY 1000
+
+static inline void set_64bit_val(u64 *wqe_words, u32 byte_index, u64 value)
+{
+	wqe_words[byte_index >> 3] = value;
+}
+
+/**
+ * set_32bit_val - set 32 value to hw wqe
+ * @wqe_words: wqe addr to write
+ * @byte_index: index in wqe
+ * @value: value to write
+ **/
+static inline void set_32bit_val(u32 *wqe_words, u32 byte_index, u32 value)
+{
+	wqe_words[byte_index >> 2] = value;
+}
+
+/**
+ * get_64bit_val - read 64 bit value from wqe
+ * @wqe_words: wqe addr
+ * @byte_index: index to read from
+ * @value: read value
+ **/
+static inline void get_64bit_val(u64 *wqe_words, u32 byte_index, u64 *value)
+{
+	*value = wqe_words[byte_index >> 3];
+}
+
+/**
+ * get_32bit_val - read 32 bit value from wqe
+ * @wqe_words: wqe addr
+ * @byte_index: index to reaad from
+ * @value: return 32 bit value
+ **/
+static inline void get_32bit_val(u32 *wqe_words, u32 byte_index, u32 *value)
+{
+	*value = wqe_words[byte_index >> 2];
+}
+
+struct i40iw_dma_mem {
+	void *va;
+	dma_addr_t pa;
+	u32 size;
+} __packed;
+
+struct i40iw_virt_mem {
+	void *va;
+	u32 size;
+} __packed;
+
+#define i40iw_debug(h, m, s, ...)                               \
+do {                                                            \
+	if (((m) & (h)->debug_mask))                            \
+		pr_info("i40iw " s, ##__VA_ARGS__);             \
+} while (0)
+
+#define i40iw_flush(a)          readl((a)->hw_addr + I40E_GLGEN_STAT)
+
+#define I40E_GLHMC_VFSDCMD(_i)  (0x000C8000 + ((_i) * 4)) \
+				/* _i=0...31 */
+#define I40E_GLHMC_VFSDCMD_MAX_INDEX    31
+#define I40E_GLHMC_VFSDCMD_PMSDIDX_SHIFT  0
+#define I40E_GLHMC_VFSDCMD_PMSDIDX_MASK  (0xFFF \
+					  << I40E_GLHMC_VFSDCMD_PMSDIDX_SHIFT)
+#define I40E_GLHMC_VFSDCMD_PF_SHIFT       16
+#define I40E_GLHMC_VFSDCMD_PF_MASK        (0xF << I40E_GLHMC_VFSDCMD_PF_SHIFT)
+#define I40E_GLHMC_VFSDCMD_VF_SHIFT       20
+#define I40E_GLHMC_VFSDCMD_VF_MASK        (0x1FF << I40E_GLHMC_VFSDCMD_VF_SHIFT)
+#define I40E_GLHMC_VFSDCMD_PMF_TYPE_SHIFT 29
+#define I40E_GLHMC_VFSDCMD_PMF_TYPE_MASK  (0x3 \
+					   << I40E_GLHMC_VFSDCMD_PMF_TYPE_SHIFT)
+#define I40E_GLHMC_VFSDCMD_PMSDWR_SHIFT   31
+#define I40E_GLHMC_VFSDCMD_PMSDWR_MASK  (0x1 << I40E_GLHMC_VFSDCMD_PMSDWR_SHIFT)
+
+#define I40E_GLHMC_VFSDDATAHIGH(_i)     (0x000C8200 + ((_i) * 4)) \
+				/* _i=0...31 */
+#define I40E_GLHMC_VFSDDATAHIGH_MAX_INDEX       31
+#define I40E_GLHMC_VFSDDATAHIGH_PMSDDATAHIGH_SHIFT 0
+#define I40E_GLHMC_VFSDDATAHIGH_PMSDDATAHIGH_MASK  (0xFFFFFFFF \
+			<< I40E_GLHMC_VFSDDATAHIGH_PMSDDATAHIGH_SHIFT)
+
+#define I40E_GLHMC_VFSDDATALOW(_i)      (0x000C8100 + ((_i) * 4)) \
+				/* _i=0...31 */
+#define I40E_GLHMC_VFSDDATALOW_MAX_INDEX        31
+#define I40E_GLHMC_VFSDDATALOW_PMSDVALID_SHIFT   0
+#define I40E_GLHMC_VFSDDATALOW_PMSDVALID_MASK  (0x1 \
+			<< I40E_GLHMC_VFSDDATALOW_PMSDVALID_SHIFT)
+#define I40E_GLHMC_VFSDDATALOW_PMSDTYPE_SHIFT    1
+#define I40E_GLHMC_VFSDDATALOW_PMSDTYPE_MASK  (0x1 \
+			<< I40E_GLHMC_VFSDDATALOW_PMSDTYPE_SHIFT)
+#define I40E_GLHMC_VFSDDATALOW_PMSDBPCOUNT_SHIFT 2
+#define I40E_GLHMC_VFSDDATALOW_PMSDBPCOUNT_MASK  (0x3FF \
+			<< I40E_GLHMC_VFSDDATALOW_PMSDBPCOUNT_SHIFT)
+#define I40E_GLHMC_VFSDDATALOW_PMSDDATALOW_SHIFT 12
+#define I40E_GLHMC_VFSDDATALOW_PMSDDATALOW_MASK  (0xFFFFF \
+			<< I40E_GLHMC_VFSDDATALOW_PMSDDATALOW_SHIFT)
+
+#define I40E_GLPE_FWLDSTATUS                     0x0000D200
+#define I40E_GLPE_FWLDSTATUS_LOAD_REQUESTED_SHIFT 0
+#define I40E_GLPE_FWLDSTATUS_LOAD_REQUESTED_MASK  (0x1 \
+			<< I40E_GLPE_FWLDSTATUS_LOAD_REQUESTED_SHIFT)
+#define I40E_GLPE_FWLDSTATUS_DONE_SHIFT           1
+#define I40E_GLPE_FWLDSTATUS_DONE_MASK  (0x1 << I40E_GLPE_FWLDSTATUS_DONE_SHIFT)
+#define I40E_GLPE_FWLDSTATUS_CQP_FAIL_SHIFT       2
+#define I40E_GLPE_FWLDSTATUS_CQP_FAIL_MASK  (0x1 \
+			 << I40E_GLPE_FWLDSTATUS_CQP_FAIL_SHIFT)
+#define I40E_GLPE_FWLDSTATUS_TEP_FAIL_SHIFT       3
+#define I40E_GLPE_FWLDSTATUS_TEP_FAIL_MASK  (0x1 \
+			 << I40E_GLPE_FWLDSTATUS_TEP_FAIL_SHIFT)
+#define I40E_GLPE_FWLDSTATUS_OOP_FAIL_SHIFT       4
+#define I40E_GLPE_FWLDSTATUS_OOP_FAIL_MASK  (0x1 \
+			 << I40E_GLPE_FWLDSTATUS_OOP_FAIL_SHIFT)
+
+struct i40iw_sc_dev;
+struct i40iw_sc_qp;
+struct i40iw_puda_buf;
+struct i40iw_puda_completion_info;
+struct i40iw_update_sds_info;
+struct i40iw_hmc_fcn_info;
+struct i40iw_virtchnl_work_info;
+struct i40iw_manage_vf_pble_info;
+struct i40iw_device;
+struct i40iw_hmc_info;
+struct i40iw_hw;
+
+u8 __iomem *i40iw_get_hw_addr(void *dev);
+void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
+enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev);
+enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc, void *addr,
+					      u32 length, u32 value);
+struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *buf);
+void i40iw_ieq_update_tcpip_info(struct i40iw_puda_buf *buf, u16 length, u32 seqnum);
+void i40iw_free_hash_desc(struct shash_desc *);
+enum i40iw_status_code i40iw_init_hash_desc(struct shash_desc **);
+enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_info *info,
+						 struct i40iw_puda_buf *buf);
+enum i40iw_status_code i40iw_cqp_sds_cmd(struct i40iw_sc_dev *dev,
+					 struct i40iw_update_sds_info *info);
+enum i40iw_status_code i40iw_cqp_manage_hmc_fcn_cmd(struct i40iw_sc_dev *dev,
+						    struct i40iw_hmc_fcn_info *hmcfcninfo);
+enum i40iw_status_code i40iw_cqp_query_fpm_values_cmd(struct i40iw_sc_dev *dev,
+						      struct i40iw_dma_mem *values_mem,
+						      u8 hmc_fn_id);
+enum i40iw_status_code i40iw_cqp_commit_fpm_values_cmd(struct i40iw_sc_dev *dev,
+						       struct i40iw_dma_mem *values_mem,
+						       u8 hmc_fn_id);
+enum i40iw_status_code i40iw_alloc_query_fpm_buf(struct i40iw_sc_dev *dev,
+						 struct i40iw_dma_mem *mem);
+enum i40iw_status_code i40iw_cqp_manage_vf_pble_bp(struct i40iw_sc_dev *dev,
+						   struct i40iw_manage_vf_pble_info *info);
+void i40iw_cqp_spawn_worker(struct i40iw_sc_dev *dev,
+			    struct i40iw_virtchnl_work_info *work_info, u32 iw_vf_idx);
+void *i40iw_remove_head(struct list_head *list);
+
+void i40iw_term_modify_qp(struct i40iw_sc_qp *qp, u8 next_state, u8 term, u8 term_len);
+void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred);
+void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp);
+void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp);
+
+enum i40iw_status_code i40iw_hw_manage_vf_pble_bp(struct i40iw_device *iwdev,
+						  struct i40iw_manage_vf_pble_info *info,
+						  bool wait);
+struct i40iw_dev_pestat;
+void i40iw_hw_stats_start_timer(struct i40iw_sc_dev *);
+void i40iw_hw_stats_del_timer(struct i40iw_sc_dev *);
+#define i40iw_mmiowb() mmiowb()
+void i40iw_wr32(struct i40iw_hw *hw, u32 reg, u32 value);
+u32  i40iw_rd32(struct i40iw_hw *hw, u32 reg);
+#endif				/* _I40IW_OSDEP_H_ */

diff --git a/drivers/infiniband/hw/i40iw/i40iw_p.h b/drivers/infiniband/hw/i40iw/i40iw_p.h
new file mode 100644
index 0000000..a0b8ca1
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_p.h

@@ -0,0 +1,106 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_P_H
+#define I40IW_P_H
+
+#define PAUSE_TIMER_VALUE  0xFFFF
+#define REFRESH_THRESHOLD  0x7FFF
+#define HIGH_THRESHOLD     0x800
+#define LOW_THRESHOLD      0x200
+#define ALL_TC2PFC         0xFF
+
+void i40iw_debug_buf(struct i40iw_sc_dev *dev, enum i40iw_debug_flag mask,
+		     char *desc, u64 *buf, u32 size);
+/* init operations */
+enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev,
+					 struct i40iw_device_init_info *info);
+
+enum i40iw_status_code i40iw_device_init_pestat(struct i40iw_dev_pestat *);
+
+void i40iw_sc_cqp_post_sq(struct i40iw_sc_cqp *cqp);
+
+u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch);
+
+enum i40iw_status_code i40iw_sc_mr_fast_register(struct i40iw_sc_qp *qp,
+						 struct i40iw_fast_reg_stag_info *info,
+						 bool post_sq);
+
+/* HMC/FPM functions */
+enum i40iw_status_code i40iw_sc_init_iw_hmc(struct i40iw_sc_dev *dev,
+					    u8 hmc_fn_id);
+
+enum i40iw_status_code i40iw_pf_init_vfhmc(struct i40iw_sc_dev *dev, u8 vf_hmc_fn_id,
+					   u32 *vf_cnt_array);
+
+/* cqp misc functions */
+
+void i40iw_terminate_send_fin(struct i40iw_sc_qp *qp);
+
+void i40iw_terminate_connection(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info);
+
+void i40iw_terminate_received(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info);
+
+enum i40iw_status_code i40iw_sc_suspend_qp(struct i40iw_sc_cqp *cqp,
+					   struct i40iw_sc_qp *qp, u64 scratch);
+
+enum i40iw_status_code i40iw_sc_resume_qp(struct i40iw_sc_cqp *cqp,
+					  struct i40iw_sc_qp *qp, u64 scratch);
+
+enum i40iw_status_code i40iw_sc_static_hmc_pages_allocated(struct i40iw_sc_cqp *cqp,
+							   u64 scratch, u8 hmc_fn_id,
+							   bool post_sq,
+							   bool poll_registers);
+
+enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_count);
+
+void free_sd_mem(struct i40iw_sc_dev *dev);
+
+enum i40iw_status_code i40iw_process_cqp_cmd(struct i40iw_sc_dev *dev,
+					     struct cqp_commands_info *pcmdinfo);
+
+enum i40iw_status_code i40iw_process_bh(struct i40iw_sc_dev *dev);
+
+/* prototype for functions used for dynamic memory allocation */
+enum i40iw_status_code i40iw_allocate_dma_mem(struct i40iw_hw *hw,
+					      struct i40iw_dma_mem *mem, u64 size,
+					      u32 alignment);
+void i40iw_free_dma_mem(struct i40iw_hw *hw, struct i40iw_dma_mem *mem);
+enum i40iw_status_code i40iw_allocate_virt_mem(struct i40iw_hw *hw,
+					       struct i40iw_virt_mem *mem, u32 size);
+enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw,
+					   struct i40iw_virt_mem *mem);
+u8 i40iw_get_encoded_wqe_size(u32 wqsize, bool cqpsq);
+
+#endif

diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c
new file mode 100644
index 0000000..ded853d
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c

@@ -0,0 +1,618 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_status.h"
+#include "i40iw_osdep.h"
+#include "i40iw_register.h"
+#include "i40iw_hmc.h"
+
+#include "i40iw_d.h"
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+
+#include <linux/pci.h>
+#include <linux/genalloc.h>
+#include <linux/vmalloc.h>
+#include "i40iw_pble.h"
+#include "i40iw.h"
+
+struct i40iw_device;
+static enum i40iw_status_code add_pble_pool(struct i40iw_sc_dev *dev,
+					    struct i40iw_hmc_pble_rsrc *pble_rsrc);
+static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chunk);
+
+/**
+ * i40iw_destroy_pble_pool - destroy pool during module unload
+ * @pble_rsrc:	pble resources
+ */
+void i40iw_destroy_pble_pool(struct i40iw_sc_dev *dev, struct i40iw_hmc_pble_rsrc *pble_rsrc)
+{
+	struct list_head *clist;
+	struct list_head *tlist;
+	struct i40iw_chunk *chunk;
+	struct i40iw_pble_pool *pinfo = &pble_rsrc->pinfo;
+
+	if (pinfo->pool) {
+		list_for_each_safe(clist, tlist, &pinfo->clist) {
+			chunk = list_entry(clist, struct i40iw_chunk, list);
+			if (chunk->type == I40IW_VMALLOC)
+				i40iw_free_vmalloc_mem(dev->hw, chunk);
+			kfree(chunk);
+		}
+		gen_pool_destroy(pinfo->pool);
+	}
+}
+
+/**
+ * i40iw_hmc_init_pble - Initialize pble resources during module load
+ * @dev: i40iw_sc_dev struct
+ * @pble_rsrc:	pble resources
+ */
+enum i40iw_status_code i40iw_hmc_init_pble(struct i40iw_sc_dev *dev,
+					   struct i40iw_hmc_pble_rsrc *pble_rsrc)
+{
+	struct i40iw_hmc_info *hmc_info;
+	u32 fpm_idx = 0;
+
+	hmc_info = dev->hmc_info;
+	pble_rsrc->fpm_base_addr = hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].base;
+	/* Now start the pble' on 4k boundary */
+	if (pble_rsrc->fpm_base_addr & 0xfff)
+		fpm_idx = (PAGE_SIZE - (pble_rsrc->fpm_base_addr & 0xfff)) >> 3;
+
+	pble_rsrc->unallocated_pble =
+	    hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt - fpm_idx;
+	pble_rsrc->next_fpm_addr = pble_rsrc->fpm_base_addr + (fpm_idx << 3);
+
+	pble_rsrc->pinfo.pool_shift = POOL_SHIFT;
+	pble_rsrc->pinfo.pool = gen_pool_create(pble_rsrc->pinfo.pool_shift, -1);
+	INIT_LIST_HEAD(&pble_rsrc->pinfo.clist);
+	if (!pble_rsrc->pinfo.pool)
+		goto error;
+
+	if (add_pble_pool(dev, pble_rsrc))
+		goto error;
+
+	return 0;
+
+ error:i40iw_destroy_pble_pool(dev, pble_rsrc);
+	return I40IW_ERR_NO_MEMORY;
+}
+
+/**
+ * get_sd_pd_idx -  Returns sd index, pd index and rel_pd_idx from fpm address
+ * @ pble_rsrc:	structure containing fpm address
+ * @ idx: where to return indexes
+ */
+static inline void get_sd_pd_idx(struct i40iw_hmc_pble_rsrc *pble_rsrc,
+				 struct sd_pd_idx *idx)
+{
+	idx->sd_idx = (u32)(pble_rsrc->next_fpm_addr) / I40IW_HMC_DIRECT_BP_SIZE;
+	idx->pd_idx = (u32)(pble_rsrc->next_fpm_addr) / I40IW_HMC_PAGED_BP_SIZE;
+	idx->rel_pd_idx = (idx->pd_idx % I40IW_HMC_PD_CNT_IN_SD);
+}
+
+/**
+ * add_sd_direct - add sd direct for pble
+ * @dev: hardware control device structure
+ * @pble_rsrc: pble resource ptr
+ * @info: page info for sd
+ */
+static enum i40iw_status_code add_sd_direct(struct i40iw_sc_dev *dev,
+					    struct i40iw_hmc_pble_rsrc *pble_rsrc,
+					    struct i40iw_add_page_info *info)
+{
+	enum i40iw_status_code ret_code = 0;
+	struct sd_pd_idx *idx = &info->idx;
+	struct i40iw_chunk *chunk = info->chunk;
+	struct i40iw_hmc_info *hmc_info = info->hmc_info;
+	struct i40iw_hmc_sd_entry *sd_entry = info->sd_entry;
+	u32 offset = 0;
+
+	if (!sd_entry->valid) {
+		if (dev->is_pf) {
+			ret_code = i40iw_add_sd_table_entry(dev->hw, hmc_info,
+							    info->idx.sd_idx,
+							    I40IW_SD_TYPE_DIRECT,
+							    I40IW_HMC_DIRECT_BP_SIZE);
+			if (ret_code)
+				return ret_code;
+			chunk->type = I40IW_DMA_COHERENT;
+		}
+	}
+	offset = idx->rel_pd_idx << I40IW_HMC_PAGED_BP_SHIFT;
+	chunk->size = info->pages << I40IW_HMC_PAGED_BP_SHIFT;
+	chunk->vaddr = ((u8 *)sd_entry->u.bp.addr.va + offset);
+	chunk->fpm_addr = pble_rsrc->next_fpm_addr;
+	i40iw_debug(dev, I40IW_DEBUG_PBLE, "chunk_size[%d] = 0x%x vaddr=%p fpm_addr = %llx\n",
+		    chunk->size, chunk->size, chunk->vaddr, chunk->fpm_addr);
+	return 0;
+}
+
+/**
+ * i40iw_free_vmalloc_mem - free vmalloc during close
+ * @hw: hw struct
+ * @chunk: chunk information for vmalloc
+ */
+static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chunk)
+{
+	struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+	int i;
+
+	if (!chunk->pg_cnt)
+		goto done;
+	for (i = 0; i < chunk->pg_cnt; i++)
+		dma_unmap_page(&pcidev->dev, chunk->dmaaddrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+ done:
+	kfree(chunk->dmaaddrs);
+	chunk->dmaaddrs = NULL;
+	vfree(chunk->vaddr);
+	chunk->vaddr = NULL;
+	chunk->type = 0;
+}
+
+/**
+ * i40iw_get_vmalloc_mem - get 2M page for sd
+ * @hw: hardware address
+ * @chunk: chunk to adf
+ * @pg_cnt: #of 4 K pages
+ */
+static enum i40iw_status_code i40iw_get_vmalloc_mem(struct i40iw_hw *hw,
+						    struct i40iw_chunk *chunk,
+						    int pg_cnt)
+{
+	struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+	struct page *page;
+	u8 *addr;
+	u32 size;
+	int i;
+
+	chunk->dmaaddrs = kzalloc(pg_cnt << 3, GFP_KERNEL);
+	if (!chunk->dmaaddrs)
+		return I40IW_ERR_NO_MEMORY;
+	size = PAGE_SIZE * pg_cnt;
+	chunk->vaddr = vmalloc(size);
+	if (!chunk->vaddr) {
+		kfree(chunk->dmaaddrs);
+		chunk->dmaaddrs = NULL;
+		return I40IW_ERR_NO_MEMORY;
+	}
+	chunk->size = size;
+	addr = (u8 *)chunk->vaddr;
+	for (i = 0; i < pg_cnt; i++) {
+		page = vmalloc_to_page((void *)addr);
+		if (!page)
+			break;
+		chunk->dmaaddrs[i] = dma_map_page(&pcidev->dev, page, 0,
+						  PAGE_SIZE, DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(&pcidev->dev, chunk->dmaaddrs[i]))
+			break;
+		addr += PAGE_SIZE;
+	}
+
+	chunk->pg_cnt = i;
+	chunk->type = I40IW_VMALLOC;
+	if (i == pg_cnt)
+		return 0;
+
+	i40iw_free_vmalloc_mem(hw, chunk);
+	return I40IW_ERR_NO_MEMORY;
+}
+
+/**
+ * fpm_to_idx - given fpm address, get pble index
+ * @pble_rsrc: pble resource management
+ * @addr: fpm address for index
+ */
+static inline u32 fpm_to_idx(struct i40iw_hmc_pble_rsrc *pble_rsrc, u64 addr)
+{
+	return (addr - (pble_rsrc->fpm_base_addr)) >> 3;
+}
+
+/**
+ * add_bp_pages - add backing pages for sd
+ * @dev: hardware control device structure
+ * @pble_rsrc: pble resource management
+ * @info: page info for sd
+ */
+static enum i40iw_status_code add_bp_pages(struct i40iw_sc_dev *dev,
+					   struct i40iw_hmc_pble_rsrc *pble_rsrc,
+					   struct i40iw_add_page_info *info)
+{
+	u8 *addr;
+	struct i40iw_dma_mem mem;
+	struct i40iw_hmc_pd_entry *pd_entry;
+	struct i40iw_hmc_sd_entry *sd_entry = info->sd_entry;
+	struct i40iw_hmc_info *hmc_info = info->hmc_info;
+	struct i40iw_chunk *chunk = info->chunk;
+	struct i40iw_manage_vf_pble_info vf_pble_info;
+	enum i40iw_status_code status = 0;
+	u32 rel_pd_idx = info->idx.rel_pd_idx;
+	u32 pd_idx = info->idx.pd_idx;
+	u32 i;
+
+	status = i40iw_get_vmalloc_mem(dev->hw, chunk, info->pages);
+	if (status)
+		return I40IW_ERR_NO_MEMORY;
+	status = i40iw_add_sd_table_entry(dev->hw, hmc_info,
+					  info->idx.sd_idx, I40IW_SD_TYPE_PAGED,
+					  I40IW_HMC_DIRECT_BP_SIZE);
+	if (status) {
+		i40iw_free_vmalloc_mem(dev->hw, chunk);
+		return status;
+	}
+	if (!dev->is_pf) {
+		status = i40iw_vchnl_vf_add_hmc_objs(dev, I40IW_HMC_IW_PBLE,
+						     fpm_to_idx(pble_rsrc,
+								pble_rsrc->next_fpm_addr),
+						     (info->pages << PBLE_512_SHIFT));
+		if (status) {
+			i40iw_pr_err("allocate PBLEs in the PF.  Error %i\n", status);
+			i40iw_free_vmalloc_mem(dev->hw, chunk);
+			return status;
+		}
+	}
+	addr = chunk->vaddr;
+	for (i = 0; i < info->pages; i++) {
+		mem.pa = chunk->dmaaddrs[i];
+		mem.size = PAGE_SIZE;
+		mem.va = (void *)(addr);
+		pd_entry = &sd_entry->u.pd_table.pd_entry[rel_pd_idx++];
+		if (!pd_entry->valid) {
+			status = i40iw_add_pd_table_entry(dev->hw, hmc_info, pd_idx++, &mem);
+			if (status)
+				goto error;
+			addr += PAGE_SIZE;
+		} else {
+			i40iw_pr_err("pd entry is valid expecting to be invalid\n");
+		}
+	}
+	if (!dev->is_pf) {
+		vf_pble_info.first_pd_index = info->idx.rel_pd_idx;
+		vf_pble_info.inv_pd_ent = false;
+		vf_pble_info.pd_entry_cnt = PBLE_PER_PAGE;
+		vf_pble_info.pd_pl_pba = sd_entry->u.pd_table.pd_page_addr.pa;
+		vf_pble_info.sd_index = info->idx.sd_idx;
+		status = i40iw_hw_manage_vf_pble_bp(dev->back_dev,
+						    &vf_pble_info, true);
+		if (status) {
+			i40iw_pr_err("CQP manage VF PBLE BP failed.  %i\n", status);
+			goto error;
+		}
+	}
+	chunk->fpm_addr = pble_rsrc->next_fpm_addr;
+	return 0;
+error:
+	i40iw_free_vmalloc_mem(dev->hw, chunk);
+	return status;
+}
+
+/**
+ * add_pble_pool - add a sd entry for pble resoure
+ * @dev: hardware control device structure
+ * @pble_rsrc: pble resource management
+ */
+static enum i40iw_status_code add_pble_pool(struct i40iw_sc_dev *dev,
+					    struct i40iw_hmc_pble_rsrc *pble_rsrc)
+{
+	struct i40iw_hmc_sd_entry *sd_entry;
+	struct i40iw_hmc_info *hmc_info;
+	struct i40iw_chunk *chunk;
+	struct i40iw_add_page_info info;
+	struct sd_pd_idx *idx = &info.idx;
+	enum i40iw_status_code ret_code = 0;
+	enum i40iw_sd_entry_type sd_entry_type;
+	u64 sd_reg_val = 0;
+	u32 pages;
+
+	if (pble_rsrc->unallocated_pble < PBLE_PER_PAGE)
+		return I40IW_ERR_NO_MEMORY;
+	if (pble_rsrc->next_fpm_addr & 0xfff) {
+		i40iw_pr_err("next fpm_addr %llx\n", pble_rsrc->next_fpm_addr);
+		return I40IW_ERR_INVALID_PAGE_DESC_INDEX;
+	}
+	chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
+	if (!chunk)
+		return I40IW_ERR_NO_MEMORY;
+	hmc_info = dev->hmc_info;
+	chunk->fpm_addr = pble_rsrc->next_fpm_addr;
+	get_sd_pd_idx(pble_rsrc, idx);
+	sd_entry = &hmc_info->sd_table.sd_entry[idx->sd_idx];
+	pages = (idx->rel_pd_idx) ? (I40IW_HMC_PD_CNT_IN_SD -
+			idx->rel_pd_idx) : I40IW_HMC_PD_CNT_IN_SD;
+	pages = min(pages, pble_rsrc->unallocated_pble >> PBLE_512_SHIFT);
+	if (!pages) {
+		ret_code = I40IW_ERR_NO_PBLCHUNKS_AVAILABLE;
+		goto error;
+	}
+	info.chunk = chunk;
+	info.hmc_info = hmc_info;
+	info.pages = pages;
+	info.sd_entry = sd_entry;
+	if (!sd_entry->valid) {
+		sd_entry_type = (!idx->rel_pd_idx &&
+				 (pages == I40IW_HMC_PD_CNT_IN_SD) &&
+				 dev->is_pf) ? I40IW_SD_TYPE_DIRECT : I40IW_SD_TYPE_PAGED;
+	} else {
+		sd_entry_type = sd_entry->entry_type;
+	}
+	i40iw_debug(dev, I40IW_DEBUG_PBLE,
+		    "pages = %d, unallocated_pble[%u] current_fpm_addr = %llx\n",
+		    pages, pble_rsrc->unallocated_pble, pble_rsrc->next_fpm_addr);
+	i40iw_debug(dev, I40IW_DEBUG_PBLE, "sd_entry_type = %d sd_entry valid = %d\n",
+		    sd_entry_type, sd_entry->valid);
+
+	if (sd_entry_type == I40IW_SD_TYPE_DIRECT)
+		ret_code = add_sd_direct(dev, pble_rsrc, &info);
+	if (ret_code)
+		sd_entry_type = I40IW_SD_TYPE_PAGED;
+	else
+		pble_rsrc->stats_direct_sds++;
+
+	if (sd_entry_type == I40IW_SD_TYPE_PAGED) {
+		ret_code = add_bp_pages(dev, pble_rsrc, &info);
+		if (ret_code)
+			goto error;
+		else
+			pble_rsrc->stats_paged_sds++;
+	}
+
+	if (gen_pool_add_virt(pble_rsrc->pinfo.pool, (unsigned long)chunk->vaddr,
+			      (phys_addr_t)chunk->fpm_addr, chunk->size, -1)) {
+		i40iw_pr_err("could not allocate memory by gen_pool_addr_virt()\n");
+		ret_code = I40IW_ERR_NO_MEMORY;
+		goto error;
+	}
+	pble_rsrc->next_fpm_addr += chunk->size;
+	i40iw_debug(dev, I40IW_DEBUG_PBLE, "next_fpm_addr = %llx chunk_size[%u] = 0x%x\n",
+		    pble_rsrc->next_fpm_addr, chunk->size, chunk->size);
+	pble_rsrc->unallocated_pble -= (chunk->size >> 3);
+	list_add(&chunk->list, &pble_rsrc->pinfo.clist);
+	sd_reg_val = (sd_entry_type == I40IW_SD_TYPE_PAGED) ?
+			sd_entry->u.pd_table.pd_page_addr.pa : sd_entry->u.bp.addr.pa;
+	if (sd_entry->valid)
+		return 0;
+	if (dev->is_pf)
+		ret_code = i40iw_hmc_sd_one(dev, hmc_info->hmc_fn_id,
+					    sd_reg_val, idx->sd_idx,
+					    sd_entry->entry_type, true);
+	if (ret_code) {
+		i40iw_pr_err("cqp cmd failed for sd (pbles)\n");
+		goto error;
+	}
+
+	sd_entry->valid = true;
+	return 0;
+ error:
+	kfree(chunk);
+	return ret_code;
+}
+
+/**
+ * free_lvl2 - fee level 2 pble
+ * @pble_rsrc: pble resource management
+ * @palloc: level 2 pble allocation
+ */
+static void free_lvl2(struct i40iw_hmc_pble_rsrc *pble_rsrc,
+		      struct i40iw_pble_alloc *palloc)
+{
+	u32 i;
+	struct gen_pool *pool;
+	struct i40iw_pble_level2 *lvl2 = &palloc->level2;
+	struct i40iw_pble_info *root = &lvl2->root;
+	struct i40iw_pble_info *leaf = lvl2->leaf;
+
+	pool = pble_rsrc->pinfo.pool;
+
+	for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) {
+		if (leaf->addr)
+			gen_pool_free(pool, leaf->addr, (leaf->cnt << 3));
+		else
+			break;
+	}
+
+	if (root->addr)
+		gen_pool_free(pool, root->addr, (root->cnt << 3));
+
+	kfree(lvl2->leaf);
+	lvl2->leaf = NULL;
+}
+
+/**
+ * get_lvl2_pble - get level 2 pble resource
+ * @pble_rsrc: pble resource management
+ * @palloc: level 2 pble allocation
+ * @pool: pool pointer
+ */
+static enum i40iw_status_code get_lvl2_pble(struct i40iw_hmc_pble_rsrc *pble_rsrc,
+					    struct i40iw_pble_alloc *palloc,
+					    struct gen_pool *pool)
+{
+	u32 lf4k, lflast, total, i;
+	u32 pblcnt = PBLE_PER_PAGE;
+	u64 *addr;
+	struct i40iw_pble_level2 *lvl2 = &palloc->level2;
+	struct i40iw_pble_info *root = &lvl2->root;
+	struct i40iw_pble_info *leaf;
+
+	/* number of full 512 (4K) leafs) */
+	lf4k = palloc->total_cnt >> 9;
+	lflast = palloc->total_cnt % PBLE_PER_PAGE;
+	total = (lflast == 0) ? lf4k : lf4k + 1;
+	lvl2->leaf_cnt = total;
+
+	leaf = kzalloc((sizeof(*leaf) * total), GFP_ATOMIC);
+	if (!leaf)
+		return I40IW_ERR_NO_MEMORY;
+	lvl2->leaf = leaf;
+	/* allocate pbles for the root */
+	root->addr = gen_pool_alloc(pool, (total << 3));
+	if (!root->addr) {
+		kfree(lvl2->leaf);
+		lvl2->leaf = NULL;
+		return I40IW_ERR_NO_MEMORY;
+	}
+	root->idx = fpm_to_idx(pble_rsrc,
+			       (u64)gen_pool_virt_to_phys(pool, root->addr));
+	root->cnt = total;
+	addr = (u64 *)root->addr;
+	for (i = 0; i < total; i++, leaf++) {
+		pblcnt = (lflast && ((i + 1) == total)) ? lflast : PBLE_PER_PAGE;
+		leaf->addr = gen_pool_alloc(pool, (pblcnt << 3));
+		if (!leaf->addr)
+			goto error;
+		leaf->idx = fpm_to_idx(pble_rsrc, (u64)gen_pool_virt_to_phys(pool, leaf->addr));
+
+		leaf->cnt = pblcnt;
+		*addr = (u64)leaf->idx;
+		addr++;
+	}
+	palloc->level = I40IW_LEVEL_2;
+	pble_rsrc->stats_lvl2++;
+	return 0;
+ error:
+	free_lvl2(pble_rsrc, palloc);
+	return I40IW_ERR_NO_MEMORY;
+}
+
+/**
+ * get_lvl1_pble - get level 1 pble resource
+ * @dev: hardware control device structure
+ * @pble_rsrc: pble resource management
+ * @palloc: level 1 pble allocation
+ */
+static enum i40iw_status_code get_lvl1_pble(struct i40iw_sc_dev *dev,
+					    struct i40iw_hmc_pble_rsrc *pble_rsrc,
+					    struct i40iw_pble_alloc *palloc)
+{
+	u64 *addr;
+	struct gen_pool *pool;
+	struct i40iw_pble_info *lvl1 = &palloc->level1;
+
+	pool = pble_rsrc->pinfo.pool;
+	addr = (u64 *)gen_pool_alloc(pool, (palloc->total_cnt << 3));
+
+	if (!addr)
+		return I40IW_ERR_NO_MEMORY;
+
+	palloc->level = I40IW_LEVEL_1;
+	lvl1->addr = (unsigned long)addr;
+	lvl1->idx = fpm_to_idx(pble_rsrc, (u64)gen_pool_virt_to_phys(pool,
+			       (unsigned long)addr));
+	lvl1->cnt = palloc->total_cnt;
+	pble_rsrc->stats_lvl1++;
+	return 0;
+}
+
+/**
+ * get_lvl1_lvl2_pble - calls get_lvl1 and get_lvl2 pble routine
+ * @dev: i40iw_sc_dev struct
+ * @pble_rsrc:	pble resources
+ * @palloc: contains all inforamtion regarding pble (idx + pble addr)
+ * @pool: pointer to general purpose special memory pool descriptor
+ */
+static inline enum i40iw_status_code get_lvl1_lvl2_pble(struct i40iw_sc_dev *dev,
+							struct i40iw_hmc_pble_rsrc *pble_rsrc,
+							struct i40iw_pble_alloc *palloc,
+							struct gen_pool *pool)
+{
+	enum i40iw_status_code status = 0;
+
+	status = get_lvl1_pble(dev, pble_rsrc, palloc);
+	if (status && (palloc->total_cnt > PBLE_PER_PAGE))
+		status = get_lvl2_pble(pble_rsrc, palloc, pool);
+	return status;
+}
+
+/**
+ * i40iw_get_pble - allocate pbles from the pool
+ * @dev: i40iw_sc_dev struct
+ * @pble_rsrc:	pble resources
+ * @palloc: contains all inforamtion regarding pble (idx + pble addr)
+ * @pble_cnt: #of pbles requested
+ */
+enum i40iw_status_code i40iw_get_pble(struct i40iw_sc_dev *dev,
+				      struct i40iw_hmc_pble_rsrc *pble_rsrc,
+				      struct i40iw_pble_alloc *palloc,
+				      u32 pble_cnt)
+{
+	struct gen_pool *pool;
+	enum i40iw_status_code status = 0;
+	u32 max_sds = 0;
+	int i;
+
+	pool = pble_rsrc->pinfo.pool;
+	palloc->total_cnt = pble_cnt;
+	palloc->level = I40IW_LEVEL_0;
+	/*check first to see if we can get pble's without acquiring additional sd's */
+	status = get_lvl1_lvl2_pble(dev, pble_rsrc, palloc, pool);
+	if (!status)
+		goto exit;
+	max_sds = (palloc->total_cnt >> 18) + 1;
+	for (i = 0; i < max_sds; i++) {
+		status = add_pble_pool(dev, pble_rsrc);
+		if (status)
+			break;
+		status = get_lvl1_lvl2_pble(dev, pble_rsrc, palloc, pool);
+		if (!status)
+			break;
+	}
+exit:
+	if (!status)
+		pble_rsrc->stats_alloc_ok++;
+	else
+		pble_rsrc->stats_alloc_fail++;
+
+	return status;
+}
+
+/**
+ * i40iw_free_pble - put pbles back into pool
+ * @pble_rsrc:	pble resources
+ * @palloc: contains all inforamtion regarding pble resource being freed
+ */
+void i40iw_free_pble(struct i40iw_hmc_pble_rsrc *pble_rsrc,
+		     struct i40iw_pble_alloc *palloc)
+{
+	struct gen_pool *pool;
+
+	pool = pble_rsrc->pinfo.pool;
+	if (palloc->level == I40IW_LEVEL_2)
+		free_lvl2(pble_rsrc, palloc);
+	else
+		gen_pool_free(pool, palloc->level1.addr,
+			      (palloc->level1.cnt << 3));
+	pble_rsrc->stats_alloc_freed++;
+}

diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.h b/drivers/infiniband/hw/i40iw/i40iw_pble.h
new file mode 100644
index 0000000..7b1851d
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_pble.h

@@ -0,0 +1,131 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_PBLE_H
+#define I40IW_PBLE_H
+
+#define POOL_SHIFT      6
+#define PBLE_PER_PAGE   512
+#define I40IW_HMC_PAGED_BP_SHIFT 12
+#define PBLE_512_SHIFT  9
+
+enum i40iw_pble_level {
+	I40IW_LEVEL_0 = 0,
+	I40IW_LEVEL_1 = 1,
+	I40IW_LEVEL_2 = 2
+};
+
+enum i40iw_alloc_type {
+	I40IW_NO_ALLOC = 0,
+	I40IW_DMA_COHERENT = 1,
+	I40IW_VMALLOC = 2
+};
+
+struct i40iw_pble_info {
+	unsigned long addr;
+	u32 idx;
+	u32 cnt;
+};
+
+struct i40iw_pble_level2 {
+	struct i40iw_pble_info root;
+	struct i40iw_pble_info *leaf;
+	u32 leaf_cnt;
+};
+
+struct i40iw_pble_alloc {
+	u32 total_cnt;
+	enum i40iw_pble_level level;
+	union {
+		struct i40iw_pble_info level1;
+		struct i40iw_pble_level2 level2;
+	};
+};
+
+struct sd_pd_idx {
+	u32 sd_idx;
+	u32 pd_idx;
+	u32 rel_pd_idx;
+};
+
+struct i40iw_add_page_info {
+	struct i40iw_chunk *chunk;
+	struct i40iw_hmc_sd_entry *sd_entry;
+	struct i40iw_hmc_info *hmc_info;
+	struct sd_pd_idx idx;
+	u32 pages;
+};
+
+struct i40iw_chunk {
+	struct list_head list;
+	u32 size;
+	void *vaddr;
+	u64 fpm_addr;
+	u32 pg_cnt;
+	dma_addr_t *dmaaddrs;
+	enum i40iw_alloc_type type;
+};
+
+struct i40iw_pble_pool {
+	struct gen_pool *pool;
+	struct list_head clist;
+	u32 total_pble_alloc;
+	u32 free_pble_cnt;
+	u32 pool_shift;
+};
+
+struct i40iw_hmc_pble_rsrc {
+	u32 unallocated_pble;
+	u64 fpm_base_addr;
+	u64 next_fpm_addr;
+	struct i40iw_pble_pool pinfo;
+
+	u32 stats_direct_sds;
+	u32 stats_paged_sds;
+	u64 stats_alloc_ok;
+	u64 stats_alloc_fail;
+	u64 stats_alloc_freed;
+	u64 stats_lvl1;
+	u64 stats_lvl2;
+};
+
+void i40iw_destroy_pble_pool(struct i40iw_sc_dev *dev, struct i40iw_hmc_pble_rsrc *pble_rsrc);
+enum i40iw_status_code i40iw_hmc_init_pble(struct i40iw_sc_dev *dev,
+					   struct i40iw_hmc_pble_rsrc *pble_rsrc);
+void i40iw_free_pble(struct i40iw_hmc_pble_rsrc *pble_rsrc, struct i40iw_pble_alloc *palloc);
+enum i40iw_status_code i40iw_get_pble(struct i40iw_sc_dev *dev,
+				      struct i40iw_hmc_pble_rsrc *pble_rsrc,
+				      struct i40iw_pble_alloc *palloc,
+				      u32 pble_cnt);
+#endif

diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
new file mode 100644
index 0000000..8eb400d8
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c

@@ -0,0 +1,1436 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_osdep.h"
+#include "i40iw_register.h"
+#include "i40iw_status.h"
+#include "i40iw_hmc.h"
+
+#include "i40iw_d.h"
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+#include "i40iw_puda.h"
+
+static void i40iw_ieq_receive(struct i40iw_sc_dev *dev,
+			      struct i40iw_puda_buf *buf);
+static void i40iw_ieq_tx_compl(struct i40iw_sc_dev *dev, void *sqwrid);
+static void i40iw_ilq_putback_rcvbuf(struct i40iw_sc_qp *qp, u32 wqe_idx);
+static enum i40iw_status_code i40iw_puda_replenish_rq(struct i40iw_puda_rsrc
+						      *rsrc, bool initial);
+/**
+ * i40iw_puda_get_listbuf - get buffer from puda list
+ * @list: list to use for buffers (ILQ or IEQ)
+ */
+static struct i40iw_puda_buf *i40iw_puda_get_listbuf(struct list_head *list)
+{
+	struct i40iw_puda_buf *buf = NULL;
+
+	if (!list_empty(list)) {
+		buf = (struct i40iw_puda_buf *)list->next;
+		list_del((struct list_head *)&buf->list);
+	}
+	return buf;
+}
+
+/**
+ * i40iw_puda_get_bufpool - return buffer from resource
+ * @rsrc: resource to use for buffer
+ */
+struct i40iw_puda_buf *i40iw_puda_get_bufpool(struct i40iw_puda_rsrc *rsrc)
+{
+	struct i40iw_puda_buf *buf = NULL;
+	struct list_head *list = &rsrc->bufpool;
+	unsigned long	flags;
+
+	spin_lock_irqsave(&rsrc->bufpool_lock, flags);
+	buf = i40iw_puda_get_listbuf(list);
+	if (buf)
+		rsrc->avail_buf_count--;
+	else
+		rsrc->stats_buf_alloc_fail++;
+	spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+	return buf;
+}
+
+/**
+ * i40iw_puda_ret_bufpool - return buffer to rsrc list
+ * @rsrc: resource to use for buffer
+ * @buf: buffe to return to resouce
+ */
+void i40iw_puda_ret_bufpool(struct i40iw_puda_rsrc *rsrc,
+			    struct i40iw_puda_buf *buf)
+{
+	unsigned long	flags;
+
+	spin_lock_irqsave(&rsrc->bufpool_lock, flags);
+	list_add(&buf->list, &rsrc->bufpool);
+	spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+	rsrc->avail_buf_count++;
+}
+
+/**
+ * i40iw_puda_post_recvbuf - set wqe for rcv buffer
+ * @rsrc: resource ptr
+ * @wqe_idx: wqe index to use
+ * @buf: puda buffer for rcv q
+ * @initial: flag if during init time
+ */
+static void i40iw_puda_post_recvbuf(struct i40iw_puda_rsrc *rsrc, u32 wqe_idx,
+				    struct i40iw_puda_buf *buf, bool initial)
+{
+	u64 *wqe;
+	struct i40iw_sc_qp *qp = &rsrc->qp;
+	u64 offset24 = 0;
+
+	qp->qp_uk.rq_wrid_array[wqe_idx] = (uintptr_t)buf;
+	wqe = qp->qp_uk.rq_base[wqe_idx].elem;
+	i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+		    "%s: wqe_idx= %d buf = %p wqe = %p\n", __func__,
+		    wqe_idx, buf, wqe);
+	if (!initial)
+		get_64bit_val(wqe, 24, &offset24);
+
+	offset24 = (offset24) ? 0 : LS_64(1, I40IWQPSQ_VALID);
+	set_64bit_val(wqe, 24, offset24);
+
+	set_64bit_val(wqe, 0, buf->mem.pa);
+	set_64bit_val(wqe, 8,
+		      LS_64(buf->mem.size, I40IWQPSQ_FRAG_LEN));
+	set_64bit_val(wqe, 24, offset24);
+}
+
+/**
+ * i40iw_puda_replenish_rq - post rcv buffers
+ * @rsrc: resource to use for buffer
+ * @initial: flag if during init time
+ */
+static enum i40iw_status_code i40iw_puda_replenish_rq(struct i40iw_puda_rsrc *rsrc,
+						      bool initial)
+{
+	u32 i;
+	u32 invalid_cnt = rsrc->rxq_invalid_cnt;
+	struct i40iw_puda_buf *buf = NULL;
+
+	for (i = 0; i < invalid_cnt; i++) {
+		buf = i40iw_puda_get_bufpool(rsrc);
+		if (!buf)
+			return I40IW_ERR_list_empty;
+		i40iw_puda_post_recvbuf(rsrc, rsrc->rx_wqe_idx, buf,
+					initial);
+		rsrc->rx_wqe_idx =
+		    ((rsrc->rx_wqe_idx + 1) % rsrc->rq_size);
+		rsrc->rxq_invalid_cnt--;
+	}
+	return 0;
+}
+
+/**
+ * i40iw_puda_alloc_buf - allocate mem for buffer
+ * @dev: iwarp device
+ * @length: length of buffer
+ */
+static struct i40iw_puda_buf *i40iw_puda_alloc_buf(struct i40iw_sc_dev *dev,
+						   u32 length)
+{
+	struct i40iw_puda_buf *buf = NULL;
+	struct i40iw_virt_mem buf_mem;
+	enum i40iw_status_code ret;
+
+	ret = i40iw_allocate_virt_mem(dev->hw, &buf_mem,
+				      sizeof(struct i40iw_puda_buf));
+	if (ret) {
+		i40iw_debug(dev, I40IW_DEBUG_PUDA,
+			    "%s: error mem for buf\n", __func__);
+		return NULL;
+	}
+	buf = (struct i40iw_puda_buf *)buf_mem.va;
+	ret = i40iw_allocate_dma_mem(dev->hw, &buf->mem, length, 1);
+	if (ret) {
+		i40iw_debug(dev, I40IW_DEBUG_PUDA,
+			    "%s: error dma mem for buf\n", __func__);
+		i40iw_free_virt_mem(dev->hw, &buf_mem);
+		return NULL;
+	}
+	buf->buf_mem.va = buf_mem.va;
+	buf->buf_mem.size = buf_mem.size;
+	return buf;
+}
+
+/**
+ * i40iw_puda_dele_buf - delete buffer back to system
+ * @dev: iwarp device
+ * @buf: buffer to free
+ */
+static void i40iw_puda_dele_buf(struct i40iw_sc_dev *dev,
+				struct i40iw_puda_buf *buf)
+{
+	i40iw_free_dma_mem(dev->hw, &buf->mem);
+	i40iw_free_virt_mem(dev->hw, &buf->buf_mem);
+}
+
+/**
+ * i40iw_puda_get_next_send_wqe - return next wqe for processing
+ * @qp: puda qp for wqe
+ * @wqe_idx: wqe index for caller
+ */
+static u64 *i40iw_puda_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx)
+{
+	u64 *wqe = NULL;
+	enum i40iw_status_code ret_code = 0;
+
+	*wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+	if (!*wqe_idx)
+		qp->swqe_polarity = !qp->swqe_polarity;
+	I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
+	if (ret_code)
+		return wqe;
+	wqe = qp->sq_base[*wqe_idx].elem;
+
+	return wqe;
+}
+
+/**
+ * i40iw_puda_poll_info - poll cq for completion
+ * @cq: cq for poll
+ * @info: info return for successful completion
+ */
+static enum i40iw_status_code i40iw_puda_poll_info(struct i40iw_sc_cq *cq,
+						   struct i40iw_puda_completion_info *info)
+{
+	u64 qword0, qword2, qword3;
+	u64 *cqe;
+	u64 comp_ctx;
+	bool valid_bit;
+	u32 major_err, minor_err;
+	bool error;
+
+	cqe = (u64 *)I40IW_GET_CURRENT_CQ_ELEMENT(&cq->cq_uk);
+	get_64bit_val(cqe, 24, &qword3);
+	valid_bit = (bool)RS_64(qword3, I40IW_CQ_VALID);
+
+	if (valid_bit != cq->cq_uk.polarity)
+		return I40IW_ERR_QUEUE_EMPTY;
+
+	i40iw_debug_buf(cq->dev, I40IW_DEBUG_PUDA, "PUDA CQE", cqe, 32);
+	error = (bool)RS_64(qword3, I40IW_CQ_ERROR);
+	if (error) {
+		i40iw_debug(cq->dev, I40IW_DEBUG_PUDA, "%s receive error\n", __func__);
+		major_err = (u32)(RS_64(qword3, I40IW_CQ_MAJERR));
+		minor_err = (u32)(RS_64(qword3, I40IW_CQ_MINERR));
+		info->compl_error = major_err << 16 | minor_err;
+		return I40IW_ERR_CQ_COMPL_ERROR;
+	}
+
+	get_64bit_val(cqe, 0, &qword0);
+	get_64bit_val(cqe, 16, &qword2);
+
+	info->q_type = (u8)RS_64(qword3, I40IW_CQ_SQ);
+	info->qp_id = (u32)RS_64(qword2, I40IWCQ_QPID);
+
+	get_64bit_val(cqe, 8, &comp_ctx);
+	info->qp = (struct i40iw_qp_uk *)(unsigned long)comp_ctx;
+	info->wqe_idx = (u32)RS_64(qword3, I40IW_CQ_WQEIDX);
+
+	if (info->q_type == I40IW_CQE_QTYPE_RQ) {
+		info->vlan_valid = (bool)RS_64(qword3, I40IW_VLAN_TAG_VALID);
+		info->l4proto = (u8)RS_64(qword2, I40IW_UDA_L4PROTO);
+		info->l3proto = (u8)RS_64(qword2, I40IW_UDA_L3PROTO);
+		info->payload_len = (u16)RS_64(qword0, I40IW_UDA_PAYLOADLEN);
+	}
+
+	return 0;
+}
+
+/**
+ * i40iw_puda_poll_completion - processes completion for cq
+ * @dev: iwarp device
+ * @cq: cq getting interrupt
+ * @compl_err: return any completion err
+ */
+enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev,
+						  struct i40iw_sc_cq *cq, u32 *compl_err)
+{
+	struct i40iw_qp_uk *qp;
+	struct i40iw_cq_uk *cq_uk = &cq->cq_uk;
+	struct i40iw_puda_completion_info info;
+	enum i40iw_status_code ret = 0;
+	struct i40iw_puda_buf *buf;
+	struct i40iw_puda_rsrc *rsrc;
+	void *sqwrid;
+	u8 cq_type = cq->cq_type;
+	unsigned long	flags;
+
+	if ((cq_type == I40IW_CQ_TYPE_ILQ) || (cq_type == I40IW_CQ_TYPE_IEQ)) {
+		rsrc = (cq_type == I40IW_CQ_TYPE_ILQ) ? dev->ilq : dev->ieq;
+	} else {
+		i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s qp_type error\n", __func__);
+		return I40IW_ERR_BAD_PTR;
+	}
+	memset(&info, 0, sizeof(info));
+	ret = i40iw_puda_poll_info(cq, &info);
+	*compl_err = info.compl_error;
+	if (ret == I40IW_ERR_QUEUE_EMPTY)
+		return ret;
+	if (ret)
+		goto done;
+
+	qp = info.qp;
+	if (!qp || !rsrc) {
+		ret = I40IW_ERR_BAD_PTR;
+		goto done;
+	}
+
+	if (qp->qp_id != rsrc->qp_id) {
+		ret = I40IW_ERR_BAD_PTR;
+		goto done;
+	}
+
+	if (info.q_type == I40IW_CQE_QTYPE_RQ) {
+		buf = (struct i40iw_puda_buf *)(uintptr_t)qp->rq_wrid_array[info.wqe_idx];
+		/* Get all the tcpip information in the buf header */
+		ret = i40iw_puda_get_tcpip_info(&info, buf);
+		if (ret) {
+			rsrc->stats_rcvd_pkt_err++;
+			if (cq_type == I40IW_CQ_TYPE_ILQ) {
+				i40iw_ilq_putback_rcvbuf(&rsrc->qp,
+							 info.wqe_idx);
+			} else {
+				i40iw_puda_ret_bufpool(rsrc, buf);
+				i40iw_puda_replenish_rq(rsrc, false);
+			}
+			goto done;
+		}
+
+		rsrc->stats_pkt_rcvd++;
+		rsrc->compl_rxwqe_idx = info.wqe_idx;
+		i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s RQ completion\n", __func__);
+		rsrc->receive(rsrc->dev, buf);
+		if (cq_type == I40IW_CQ_TYPE_ILQ)
+			i40iw_ilq_putback_rcvbuf(&rsrc->qp, info.wqe_idx);
+		else
+			i40iw_puda_replenish_rq(rsrc, false);
+
+	} else {
+		i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s SQ completion\n", __func__);
+		sqwrid = (void *)(uintptr_t)qp->sq_wrtrk_array[info.wqe_idx].wrid;
+		I40IW_RING_SET_TAIL(qp->sq_ring, info.wqe_idx);
+		rsrc->xmit_complete(rsrc->dev, sqwrid);
+		spin_lock_irqsave(&rsrc->bufpool_lock, flags);
+		rsrc->tx_wqe_avail_cnt++;
+		spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+		if (!list_empty(&dev->ilq->txpend))
+			i40iw_puda_send_buf(dev->ilq, NULL);
+	}
+
+done:
+	I40IW_RING_MOVE_HEAD(cq_uk->cq_ring, ret);
+	if (I40IW_RING_GETCURRENT_HEAD(cq_uk->cq_ring) == 0)
+		cq_uk->polarity = !cq_uk->polarity;
+	/* update cq tail in cq shadow memory also */
+	I40IW_RING_MOVE_TAIL(cq_uk->cq_ring);
+	set_64bit_val(cq_uk->shadow_area, 0,
+		      I40IW_RING_GETCURRENT_HEAD(cq_uk->cq_ring));
+	return 0;
+}
+
+/**
+ * i40iw_puda_send - complete send wqe for transmit
+ * @qp: puda qp for send
+ * @info: buffer information for transmit
+ */
+enum i40iw_status_code i40iw_puda_send(struct i40iw_sc_qp *qp,
+				       struct i40iw_puda_send_info *info)
+{
+	u64 *wqe;
+	u32 iplen, l4len;
+	u64 header[2];
+	u32 wqe_idx;
+	u8 iipt;
+
+	/* number of 32 bits DWORDS in header */
+	l4len = info->tcplen >> 2;
+	if (info->ipv4) {
+		iipt = 3;
+		iplen = 5;
+	} else {
+		iipt = 1;
+		iplen = 10;
+	}
+
+	wqe = i40iw_puda_get_next_send_wqe(&qp->qp_uk, &wqe_idx);
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+	qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid = (uintptr_t)info->scratch;
+	/* Third line of WQE descriptor */
+	/* maclen is in words */
+	header[0] = LS_64((info->maclen >> 1), I40IW_UDA_QPSQ_MACLEN) |
+		    LS_64(iplen, I40IW_UDA_QPSQ_IPLEN) | LS_64(1, I40IW_UDA_QPSQ_L4T) |
+		    LS_64(iipt, I40IW_UDA_QPSQ_IIPT) |
+		    LS_64(l4len, I40IW_UDA_QPSQ_L4LEN);
+	/* Forth line of WQE descriptor */
+	header[1] = LS_64(I40IW_OP_TYPE_SEND, I40IW_UDA_QPSQ_OPCODE) |
+		    LS_64(1, I40IW_UDA_QPSQ_SIGCOMPL) |
+		    LS_64(info->doloopback, I40IW_UDA_QPSQ_DOLOOPBACK) |
+		    LS_64(qp->qp_uk.swqe_polarity, I40IW_UDA_QPSQ_VALID);
+
+	set_64bit_val(wqe, 0, info->paddr);
+	set_64bit_val(wqe, 8, LS_64(info->len, I40IWQPSQ_FRAG_LEN));
+	set_64bit_val(wqe, 16, header[0]);
+	set_64bit_val(wqe, 24, header[1]);
+
+	i40iw_debug_buf(qp->dev, I40IW_DEBUG_PUDA, "PUDA SEND WQE", wqe, 32);
+	i40iw_qp_post_wr(&qp->qp_uk);
+	return 0;
+}
+
+/**
+ * i40iw_puda_send_buf - transmit puda buffer
+ * @rsrc: resource to use for buffer
+ * @buf: puda buffer to transmit
+ */
+void i40iw_puda_send_buf(struct i40iw_puda_rsrc *rsrc, struct i40iw_puda_buf *buf)
+{
+	struct i40iw_puda_send_info info;
+	enum i40iw_status_code ret = 0;
+	unsigned long	flags;
+
+	spin_lock_irqsave(&rsrc->bufpool_lock, flags);
+	/* if no wqe available or not from a completion and we have
+	 * pending buffers, we must queue new buffer
+	 */
+	if (!rsrc->tx_wqe_avail_cnt || (buf && !list_empty(&rsrc->txpend))) {
+		list_add_tail(&buf->list, &rsrc->txpend);
+		spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+		rsrc->stats_sent_pkt_q++;
+		if (rsrc->type == I40IW_PUDA_RSRC_TYPE_ILQ)
+			i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+				    "%s: adding to txpend\n", __func__);
+		return;
+	}
+	rsrc->tx_wqe_avail_cnt--;
+	/* if we are coming from a completion and have pending buffers
+	 * then Get one from pending list
+	 */
+	if (!buf) {
+		buf = i40iw_puda_get_listbuf(&rsrc->txpend);
+		if (!buf)
+			goto done;
+	}
+
+	info.scratch = (void *)buf;
+	info.paddr = buf->mem.pa;
+	info.len = buf->totallen;
+	info.tcplen = buf->tcphlen;
+	info.maclen = buf->maclen;
+	info.ipv4 = buf->ipv4;
+	info.doloopback = (rsrc->type == I40IW_PUDA_RSRC_TYPE_IEQ);
+
+	ret = i40iw_puda_send(&rsrc->qp, &info);
+	if (ret) {
+		rsrc->tx_wqe_avail_cnt++;
+		rsrc->stats_sent_pkt_q++;
+		list_add(&buf->list, &rsrc->txpend);
+		if (rsrc->type == I40IW_PUDA_RSRC_TYPE_ILQ)
+			i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+				    "%s: adding to puda_send\n", __func__);
+	} else {
+		rsrc->stats_pkt_sent++;
+	}
+done:
+	spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+}
+
+/**
+ * i40iw_puda_qp_setctx - during init, set qp's context
+ * @rsrc: qp's resource
+ */
+static void i40iw_puda_qp_setctx(struct i40iw_puda_rsrc *rsrc)
+{
+	struct i40iw_sc_qp *qp = &rsrc->qp;
+	u64 *qp_ctx = qp->hw_host_ctx;
+
+	set_64bit_val(qp_ctx, 8, qp->sq_pa);
+	set_64bit_val(qp_ctx, 16, qp->rq_pa);
+
+	set_64bit_val(qp_ctx, 24,
+		      LS_64(qp->hw_rq_size, I40IWQPC_RQSIZE) |
+		      LS_64(qp->hw_sq_size, I40IWQPC_SQSIZE));
+
+	set_64bit_val(qp_ctx, 48, LS_64(1514, I40IWQPC_SNDMSS));
+	set_64bit_val(qp_ctx, 56, 0);
+	set_64bit_val(qp_ctx, 64, 1);
+
+	set_64bit_val(qp_ctx, 136,
+		      LS_64(rsrc->cq_id, I40IWQPC_TXCQNUM) |
+		      LS_64(rsrc->cq_id, I40IWQPC_RXCQNUM));
+
+	set_64bit_val(qp_ctx, 160, LS_64(1, I40IWQPC_PRIVEN));
+
+	set_64bit_val(qp_ctx, 168,
+		      LS_64((uintptr_t)qp, I40IWQPC_QPCOMPCTX));
+
+	set_64bit_val(qp_ctx, 176,
+		      LS_64(qp->sq_tph_val, I40IWQPC_SQTPHVAL) |
+		      LS_64(qp->rq_tph_val, I40IWQPC_RQTPHVAL) |
+		      LS_64(qp->qs_handle, I40IWQPC_QSHANDLE));
+
+	i40iw_debug_buf(rsrc->dev, I40IW_DEBUG_PUDA, "PUDA QP CONTEXT",
+			qp_ctx, I40IW_QP_CTX_SIZE);
+}
+
+/**
+ * i40iw_puda_qp_wqe - setup wqe for qp create
+ * @rsrc: resource for qp
+ */
+static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_puda_rsrc *rsrc)
+{
+	struct i40iw_sc_qp *qp = &rsrc->qp;
+	struct i40iw_sc_dev *dev = rsrc->dev;
+	struct i40iw_sc_cqp *cqp;
+	u64 *wqe;
+	u64 header;
+	struct i40iw_ccq_cqe_info compl_info;
+	enum i40iw_status_code status = 0;
+
+	cqp = dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, 0);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
+	set_64bit_val(wqe, 40, qp->shadow_area_pa);
+	header = qp->qp_uk.qp_id |
+		 LS_64(I40IW_CQP_OP_CREATE_QP, I40IW_CQPSQ_OPCODE) |
+		 LS_64(I40IW_QP_TYPE_UDA, I40IW_CQPSQ_QP_QPTYPE) |
+		 LS_64(1, I40IW_CQPSQ_QP_CQNUMVALID) |
+		 LS_64(2, I40IW_CQPSQ_QP_NEXTIWSTATE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	set_64bit_val(wqe, 24, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_PUDA, "PUDA CQE", wqe, 32);
+	i40iw_sc_cqp_post_sq(cqp);
+	status = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+						    I40IW_CQP_OP_CREATE_QP,
+						    &compl_info);
+	return status;
+}
+
+/**
+ * i40iw_puda_qp_create - create qp for resource
+ * @rsrc: resource to use for buffer
+ */
+static enum i40iw_status_code i40iw_puda_qp_create(struct i40iw_puda_rsrc *rsrc)
+{
+	struct i40iw_sc_qp *qp = &rsrc->qp;
+	struct i40iw_qp_uk *ukqp = &qp->qp_uk;
+	enum i40iw_status_code ret = 0;
+	u32 sq_size, rq_size, t_size;
+	struct i40iw_dma_mem *mem;
+
+	sq_size = rsrc->sq_size * I40IW_QP_WQE_MIN_SIZE;
+	rq_size = rsrc->rq_size * I40IW_QP_WQE_MIN_SIZE;
+	t_size = (sq_size + rq_size + (I40IW_SHADOW_AREA_SIZE << 3) +
+		  I40IW_QP_CTX_SIZE);
+	/* Get page aligned memory */
+	ret =
+	    i40iw_allocate_dma_mem(rsrc->dev->hw, &rsrc->qpmem, t_size,
+				   I40IW_HW_PAGE_SIZE);
+	if (ret) {
+		i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA, "%s: error dma mem\n", __func__);
+		return ret;
+	}
+
+	mem = &rsrc->qpmem;
+	memset(mem->va, 0, t_size);
+	qp->hw_sq_size = i40iw_get_encoded_wqe_size(rsrc->sq_size, false);
+	qp->hw_rq_size = i40iw_get_encoded_wqe_size(rsrc->rq_size, false);
+	qp->pd = &rsrc->sc_pd;
+	qp->qp_type = I40IW_QP_TYPE_UDA;
+	qp->dev = rsrc->dev;
+	qp->back_qp = (void *)rsrc;
+	qp->sq_pa = mem->pa;
+	qp->rq_pa = qp->sq_pa + sq_size;
+	ukqp->sq_base = mem->va;
+	ukqp->rq_base = &ukqp->sq_base[rsrc->sq_size];
+	ukqp->shadow_area = ukqp->rq_base[rsrc->rq_size].elem;
+	qp->shadow_area_pa = qp->rq_pa + rq_size;
+	qp->hw_host_ctx = ukqp->shadow_area + I40IW_SHADOW_AREA_SIZE;
+	qp->hw_host_ctx_pa =
+		qp->shadow_area_pa + (I40IW_SHADOW_AREA_SIZE << 3);
+	ukqp->qp_id = rsrc->qp_id;
+	ukqp->sq_wrtrk_array = rsrc->sq_wrtrk_array;
+	ukqp->rq_wrid_array = rsrc->rq_wrid_array;
+
+	ukqp->qp_id = rsrc->qp_id;
+	ukqp->sq_size = rsrc->sq_size;
+	ukqp->rq_size = rsrc->rq_size;
+
+	I40IW_RING_INIT(ukqp->sq_ring, ukqp->sq_size);
+	I40IW_RING_INIT(ukqp->initial_ring, ukqp->sq_size);
+	I40IW_RING_INIT(ukqp->rq_ring, ukqp->rq_size);
+
+	if (qp->pd->dev->is_pf)
+		ukqp->wqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(qp->pd->dev) +
+						    I40E_PFPE_WQEALLOC);
+	else
+		ukqp->wqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(qp->pd->dev) +
+						    I40E_VFPE_WQEALLOC1);
+
+	qp->qs_handle = qp->dev->qs_handle;
+	i40iw_puda_qp_setctx(rsrc);
+	ret = i40iw_puda_qp_wqe(rsrc);
+	if (ret)
+		i40iw_free_dma_mem(rsrc->dev->hw, &rsrc->qpmem);
+	return ret;
+}
+
+/**
+ * i40iw_puda_cq_create - create cq for resource
+ * @rsrc: resource for which cq to create
+ */
+static enum i40iw_status_code i40iw_puda_cq_create(struct i40iw_puda_rsrc *rsrc)
+{
+	struct i40iw_sc_dev *dev = rsrc->dev;
+	struct i40iw_sc_cq *cq = &rsrc->cq;
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+	enum i40iw_status_code ret = 0;
+	u32 tsize, cqsize;
+	u32 shadow_read_threshold = 128;
+	struct i40iw_dma_mem *mem;
+	struct i40iw_ccq_cqe_info compl_info;
+	struct i40iw_cq_init_info info;
+	struct i40iw_cq_uk_init_info *init_info = &info.cq_uk_init_info;
+
+	cq->back_cq = (void *)rsrc;
+	cqsize = rsrc->cq_size * (sizeof(struct i40iw_cqe));
+	tsize = cqsize + sizeof(struct i40iw_cq_shadow_area);
+	ret = i40iw_allocate_dma_mem(dev->hw, &rsrc->cqmem, tsize,
+				     I40IW_CQ0_ALIGNMENT_MASK);
+	if (ret)
+		return ret;
+
+	mem = &rsrc->cqmem;
+	memset(&info, 0, sizeof(info));
+	info.dev = dev;
+	info.type = (rsrc->type == I40IW_PUDA_RSRC_TYPE_ILQ) ?
+			 I40IW_CQ_TYPE_ILQ : I40IW_CQ_TYPE_IEQ;
+	info.shadow_read_threshold = rsrc->cq_size >> 2;
+	info.ceq_id_valid = true;
+	info.cq_base_pa = mem->pa;
+	info.shadow_area_pa = mem->pa + cqsize;
+	init_info->cq_base = mem->va;
+	init_info->shadow_area = (u64 *)((u8 *)mem->va + cqsize);
+	init_info->cq_size = rsrc->cq_size;
+	init_info->cq_id = rsrc->cq_id;
+	ret = dev->iw_priv_cq_ops->cq_init(cq, &info);
+	if (ret)
+		goto error;
+	cqp = dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, 0);
+	if (!wqe) {
+		ret = I40IW_ERR_RING_FULL;
+		goto error;
+	}
+
+	set_64bit_val(wqe, 0, rsrc->cq_size);
+	set_64bit_val(wqe, 8, RS_64_1(cq, 1));
+	set_64bit_val(wqe, 16, LS_64(shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
+	set_64bit_val(wqe, 32, cq->cq_pa);
+
+	set_64bit_val(wqe, 40, cq->shadow_area_pa);
+
+	header = rsrc->cq_id |
+	    LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
+	    LS_64(1, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
+	    LS_64(1, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+	    LS_64(1, I40IW_CQPSQ_CQ_CEQIDVALID) |
+	    LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+	set_64bit_val(wqe, 24, header);
+
+	i40iw_debug_buf(dev, I40IW_DEBUG_PUDA, "PUDA CQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	i40iw_sc_cqp_post_sq(dev->cqp);
+	ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+						 I40IW_CQP_OP_CREATE_CQ,
+						 &compl_info);
+
+error:
+	if (ret)
+		i40iw_free_dma_mem(dev->hw, &rsrc->cqmem);
+	return ret;
+}
+
+/**
+ * i40iw_puda_dele_resources - delete all resources during close
+ * @dev: iwarp device
+ * @type: type of resource to dele
+ * @reset: true if reset chip
+ */
+void i40iw_puda_dele_resources(struct i40iw_sc_dev *dev,
+			       enum puda_resource_type type,
+			       bool reset)
+{
+	struct i40iw_ccq_cqe_info compl_info;
+	struct i40iw_puda_rsrc *rsrc;
+	struct i40iw_puda_buf *buf = NULL;
+	struct i40iw_puda_buf *nextbuf = NULL;
+	struct i40iw_virt_mem *vmem;
+	enum i40iw_status_code ret;
+
+	switch (type) {
+	case I40IW_PUDA_RSRC_TYPE_ILQ:
+		rsrc = dev->ilq;
+		vmem = &dev->ilq_mem;
+		break;
+	case I40IW_PUDA_RSRC_TYPE_IEQ:
+		rsrc = dev->ieq;
+		vmem = &dev->ieq_mem;
+		break;
+	default:
+		i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s: error resource type = 0x%x\n",
+			    __func__, type);
+		return;
+	}
+
+	switch (rsrc->completion) {
+	case PUDA_HASH_CRC_COMPLETE:
+		i40iw_free_hash_desc(rsrc->hash_desc);
+	case PUDA_QP_CREATED:
+		do {
+			if (reset)
+				break;
+			ret = dev->iw_priv_qp_ops->qp_destroy(&rsrc->qp,
+							      0, false, true, true);
+			if (ret)
+				i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+					    "%s error ieq qp destroy\n",
+					    __func__);
+
+			ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+								 I40IW_CQP_OP_DESTROY_QP,
+								 &compl_info);
+			if (ret)
+				i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+					    "%s error ieq qp destroy done\n",
+					    __func__);
+		} while (0);
+
+		i40iw_free_dma_mem(dev->hw, &rsrc->qpmem);
+		/* fallthrough */
+	case PUDA_CQ_CREATED:
+		do {
+			if (reset)
+				break;
+			ret = dev->iw_priv_cq_ops->cq_destroy(&rsrc->cq, 0, true);
+			if (ret)
+				i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+					    "%s error ieq cq destroy\n",
+					    __func__);
+
+			ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+								 I40IW_CQP_OP_DESTROY_CQ,
+								 &compl_info);
+			if (ret)
+				i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+					    "%s error ieq qp destroy done\n",
+					    __func__);
+		} while (0);
+
+		i40iw_free_dma_mem(dev->hw, &rsrc->cqmem);
+		break;
+	default:
+		i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA, "%s error no resources\n", __func__);
+		break;
+	}
+	/* Free all allocated puda buffers for both tx and rx */
+	buf = rsrc->alloclist;
+	while (buf) {
+		nextbuf = buf->next;
+		i40iw_puda_dele_buf(dev, buf);
+		buf = nextbuf;
+		rsrc->alloc_buf_count--;
+	}
+	i40iw_free_virt_mem(dev->hw, vmem);
+}
+
+/**
+ * i40iw_puda_allocbufs - allocate buffers for resource
+ * @rsrc: resource for buffer allocation
+ * @count: number of buffers to create
+ */
+static enum i40iw_status_code i40iw_puda_allocbufs(struct i40iw_puda_rsrc *rsrc,
+						   u32 count)
+{
+	u32 i;
+	struct i40iw_puda_buf *buf;
+	struct i40iw_puda_buf *nextbuf;
+
+	for (i = 0; i < count; i++) {
+		buf = i40iw_puda_alloc_buf(rsrc->dev, rsrc->buf_size);
+		if (!buf) {
+			rsrc->stats_buf_alloc_fail++;
+			return I40IW_ERR_NO_MEMORY;
+		}
+		i40iw_puda_ret_bufpool(rsrc, buf);
+		rsrc->alloc_buf_count++;
+		if (!rsrc->alloclist) {
+			rsrc->alloclist = buf;
+		} else {
+			nextbuf = rsrc->alloclist;
+			rsrc->alloclist = buf;
+			buf->next = nextbuf;
+		}
+	}
+	rsrc->avail_buf_count = rsrc->alloc_buf_count;
+	return 0;
+}
+
+/**
+ * i40iw_puda_create_rsrc - create resouce (ilq or ieq)
+ * @dev: iwarp device
+ * @info: resource information
+ */
+enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev,
+					      struct i40iw_puda_rsrc_info *info)
+{
+	enum i40iw_status_code ret = 0;
+	struct i40iw_puda_rsrc *rsrc;
+	u32 pudasize;
+	u32 sqwridsize, rqwridsize;
+	struct i40iw_virt_mem *vmem;
+
+	info->count = 1;
+	pudasize = sizeof(struct i40iw_puda_rsrc);
+	sqwridsize = info->sq_size * sizeof(struct i40iw_sq_uk_wr_trk_info);
+	rqwridsize = info->rq_size * 8;
+	switch (info->type) {
+	case I40IW_PUDA_RSRC_TYPE_ILQ:
+		vmem = &dev->ilq_mem;
+		break;
+	case I40IW_PUDA_RSRC_TYPE_IEQ:
+		vmem = &dev->ieq_mem;
+		break;
+	default:
+		return I40IW_NOT_SUPPORTED;
+	}
+	ret =
+	    i40iw_allocate_virt_mem(dev->hw, vmem,
+				    pudasize + sqwridsize + rqwridsize);
+	if (ret)
+		return ret;
+	rsrc = (struct i40iw_puda_rsrc *)vmem->va;
+	spin_lock_init(&rsrc->bufpool_lock);
+	if (info->type == I40IW_PUDA_RSRC_TYPE_ILQ) {
+		dev->ilq = (struct i40iw_puda_rsrc *)vmem->va;
+		dev->ilq_count = info->count;
+		rsrc->receive = info->receive;
+		rsrc->xmit_complete = info->xmit_complete;
+	} else {
+		vmem = &dev->ieq_mem;
+		dev->ieq_count = info->count;
+		dev->ieq = (struct i40iw_puda_rsrc *)vmem->va;
+		rsrc->receive = i40iw_ieq_receive;
+		rsrc->xmit_complete = i40iw_ieq_tx_compl;
+	}
+
+	rsrc->type = info->type;
+	rsrc->sq_wrtrk_array = (struct i40iw_sq_uk_wr_trk_info *)((u8 *)vmem->va + pudasize);
+	rsrc->rq_wrid_array = (u64 *)((u8 *)vmem->va + pudasize + sqwridsize);
+	rsrc->mss = info->mss;
+	/* Initialize all ieq lists */
+	INIT_LIST_HEAD(&rsrc->bufpool);
+	INIT_LIST_HEAD(&rsrc->txpend);
+
+	rsrc->tx_wqe_avail_cnt = info->sq_size - 1;
+	dev->iw_pd_ops->pd_init(dev, &rsrc->sc_pd, info->pd_id);
+	rsrc->qp_id = info->qp_id;
+	rsrc->cq_id = info->cq_id;
+	rsrc->sq_size = info->sq_size;
+	rsrc->rq_size = info->rq_size;
+	rsrc->cq_size = info->rq_size + info->sq_size;
+	rsrc->buf_size = info->buf_size;
+	rsrc->dev = dev;
+
+	ret = i40iw_puda_cq_create(rsrc);
+	if (!ret) {
+		rsrc->completion = PUDA_CQ_CREATED;
+		ret = i40iw_puda_qp_create(rsrc);
+	}
+	if (ret) {
+		i40iw_debug(dev, I40IW_DEBUG_PUDA, "[%s] error qp_create\n", __func__);
+		goto error;
+	}
+	rsrc->completion = PUDA_QP_CREATED;
+
+	ret = i40iw_puda_allocbufs(rsrc, info->tx_buf_cnt + info->rq_size);
+	if (ret) {
+		i40iw_debug(dev, I40IW_DEBUG_PUDA, "[%s] error allloc_buf\n", __func__);
+		goto error;
+	}
+
+	rsrc->rxq_invalid_cnt = info->rq_size;
+	ret = i40iw_puda_replenish_rq(rsrc, true);
+	if (ret)
+		goto error;
+
+	if (info->type == I40IW_PUDA_RSRC_TYPE_IEQ) {
+		if (!i40iw_init_hash_desc(&rsrc->hash_desc)) {
+			rsrc->check_crc = true;
+			rsrc->completion = PUDA_HASH_CRC_COMPLETE;
+			ret = 0;
+		}
+	}
+
+	dev->ccq_ops->ccq_arm(&rsrc->cq);
+	return ret;
+ error:
+	i40iw_puda_dele_resources(dev, info->type, false);
+
+	return ret;
+}
+
+/**
+ * i40iw_ilq_putback_rcvbuf - ilq buffer to put back on rq
+ * @qp: ilq's qp resource
+ * @wqe_idx:  wqe index of completed rcvbuf
+ */
+static void i40iw_ilq_putback_rcvbuf(struct i40iw_sc_qp *qp, u32 wqe_idx)
+{
+	u64 *wqe;
+	u64 offset24;
+
+	wqe = qp->qp_uk.rq_base[wqe_idx].elem;
+	get_64bit_val(wqe, 24, &offset24);
+	offset24 = (offset24) ? 0 : LS_64(1, I40IWQPSQ_VALID);
+	set_64bit_val(wqe, 24, offset24);
+}
+
+/**
+ * i40iw_ieq_get_fpdu - given length return fpdu length
+ * @length: length if fpdu
+ */
+static u16 i40iw_ieq_get_fpdu_length(u16 length)
+{
+	u16 fpdu_len;
+
+	fpdu_len = length + I40IW_IEQ_MPA_FRAMING;
+	fpdu_len = (fpdu_len + 3) & 0xfffffffc;
+	return fpdu_len;
+}
+
+/**
+ * i40iw_ieq_copy_to_txbuf - copydata from rcv buf to tx buf
+ * @buf: rcv buffer with partial
+ * @txbuf: tx buffer for sendign back
+ * @buf_offset: rcv buffer offset to copy from
+ * @txbuf_offset: at offset in tx buf to copy
+ * @length: length of data to copy
+ */
+static void i40iw_ieq_copy_to_txbuf(struct i40iw_puda_buf *buf,
+				    struct i40iw_puda_buf *txbuf,
+				    u16 buf_offset, u32 txbuf_offset,
+				    u32 length)
+{
+	void *mem1 = (u8 *)buf->mem.va + buf_offset;
+	void *mem2 = (u8 *)txbuf->mem.va + txbuf_offset;
+
+	memcpy(mem2, mem1, length);
+}
+
+/**
+ * i40iw_ieq_setup_tx_buf - setup tx buffer for partial handling
+ * @buf: reeive buffer with partial
+ * @txbuf: buffer to prepare
+ */
+static void i40iw_ieq_setup_tx_buf(struct i40iw_puda_buf *buf,
+				   struct i40iw_puda_buf *txbuf)
+{
+	txbuf->maclen = buf->maclen;
+	txbuf->tcphlen = buf->tcphlen;
+	txbuf->ipv4 = buf->ipv4;
+	txbuf->hdrlen = buf->hdrlen;
+	i40iw_ieq_copy_to_txbuf(buf, txbuf, 0, 0, buf->hdrlen);
+}
+
+/**
+ * i40iw_ieq_check_first_buf - check if rcv buffer's seq is in range
+ * @buf: receive exception buffer
+ * @fps: first partial sequence number
+ */
+static void i40iw_ieq_check_first_buf(struct i40iw_puda_buf *buf, u32 fps)
+{
+	u32 offset;
+
+	if (buf->seqnum < fps) {
+		offset = fps - buf->seqnum;
+		if (offset > buf->datalen)
+			return;
+		buf->data += offset;
+		buf->datalen -= (u16)offset;
+		buf->seqnum = fps;
+	}
+}
+
+/**
+ * i40iw_ieq_compl_pfpdu - write txbuf with full fpdu
+ * @ieq: ieq resource
+ * @rxlist: ieq's received buffer list
+ * @pbufl: temporary list for buffers for fpddu
+ * @txbuf: tx buffer for fpdu
+ * @fpdu_len: total length of fpdu
+ */
+static void  i40iw_ieq_compl_pfpdu(struct i40iw_puda_rsrc *ieq,
+				   struct list_head *rxlist,
+				   struct list_head *pbufl,
+				   struct i40iw_puda_buf *txbuf,
+				   u16 fpdu_len)
+{
+	struct i40iw_puda_buf *buf;
+	u32 nextseqnum;
+	u16 txoffset, bufoffset;
+
+	buf = i40iw_puda_get_listbuf(pbufl);
+	nextseqnum = buf->seqnum + fpdu_len;
+	txbuf->totallen = buf->hdrlen + fpdu_len;
+	txbuf->data = (u8 *)txbuf->mem.va + buf->hdrlen;
+	i40iw_ieq_setup_tx_buf(buf, txbuf);
+
+	txoffset = buf->hdrlen;
+	bufoffset = (u16)(buf->data - (u8 *)buf->mem.va);
+
+	do {
+		if (buf->datalen >= fpdu_len) {
+			/* copied full fpdu */
+			i40iw_ieq_copy_to_txbuf(buf, txbuf, bufoffset, txoffset, fpdu_len);
+			buf->datalen -= fpdu_len;
+			buf->data += fpdu_len;
+			buf->seqnum = nextseqnum;
+			break;
+		}
+		/* copy partial fpdu */
+		i40iw_ieq_copy_to_txbuf(buf, txbuf, bufoffset, txoffset, buf->datalen);
+		txoffset += buf->datalen;
+		fpdu_len -= buf->datalen;
+		i40iw_puda_ret_bufpool(ieq, buf);
+		buf = i40iw_puda_get_listbuf(pbufl);
+		bufoffset = (u16)(buf->data - (u8 *)buf->mem.va);
+	} while (1);
+
+	/* last buffer on the list*/
+	if (buf->datalen)
+		list_add(&buf->list, rxlist);
+	else
+		i40iw_puda_ret_bufpool(ieq, buf);
+}
+
+/**
+ * i40iw_ieq_create_pbufl - create buffer list for single fpdu
+ * @rxlist: resource list for receive ieq buffes
+ * @pbufl: temp. list for buffers for fpddu
+ * @buf: first receive buffer
+ * @fpdu_len: total length of fpdu
+ */
+static enum i40iw_status_code i40iw_ieq_create_pbufl(
+						     struct i40iw_pfpdu *pfpdu,
+						     struct list_head *rxlist,
+						     struct list_head *pbufl,
+						     struct i40iw_puda_buf *buf,
+						     u16 fpdu_len)
+{
+	enum i40iw_status_code status = 0;
+	struct i40iw_puda_buf *nextbuf;
+	u32	nextseqnum;
+	u16 plen = fpdu_len - buf->datalen;
+	bool done = false;
+
+	nextseqnum = buf->seqnum + buf->datalen;
+	do {
+		nextbuf = i40iw_puda_get_listbuf(rxlist);
+		if (!nextbuf) {
+			status = I40IW_ERR_list_empty;
+			break;
+		}
+		list_add_tail(&nextbuf->list, pbufl);
+		if (nextbuf->seqnum != nextseqnum) {
+			pfpdu->bad_seq_num++;
+			status = I40IW_ERR_SEQ_NUM;
+			break;
+		}
+		if (nextbuf->datalen >= plen) {
+			done = true;
+		} else {
+			plen -= nextbuf->datalen;
+			nextseqnum = nextbuf->seqnum + nextbuf->datalen;
+		}
+
+	} while (!done);
+
+	return status;
+}
+
+/**
+ * i40iw_ieq_handle_partial - process partial fpdu buffer
+ * @ieq: ieq resource
+ * @pfpdu: partial management per user qp
+ * @buf: receive buffer
+ * @fpdu_len: fpdu len in the buffer
+ */
+static enum i40iw_status_code i40iw_ieq_handle_partial(struct i40iw_puda_rsrc *ieq,
+						       struct i40iw_pfpdu *pfpdu,
+						       struct i40iw_puda_buf *buf,
+						       u16 fpdu_len)
+{
+	enum i40iw_status_code status = 0;
+	u8 *crcptr;
+	u32 mpacrc;
+	u32 seqnum = buf->seqnum;
+	struct list_head pbufl;	/* partial buffer list */
+	struct i40iw_puda_buf *txbuf = NULL;
+	struct list_head *rxlist = &pfpdu->rxlist;
+
+	INIT_LIST_HEAD(&pbufl);
+	list_add(&buf->list, &pbufl);
+
+	status = i40iw_ieq_create_pbufl(pfpdu, rxlist, &pbufl, buf, fpdu_len);
+	if (!status)
+		goto error;
+
+	txbuf = i40iw_puda_get_bufpool(ieq);
+	if (!txbuf) {
+		pfpdu->no_tx_bufs++;
+		status = I40IW_ERR_NO_TXBUFS;
+		goto error;
+	}
+
+	i40iw_ieq_compl_pfpdu(ieq, rxlist, &pbufl, txbuf, fpdu_len);
+	i40iw_ieq_update_tcpip_info(txbuf, fpdu_len, seqnum);
+	crcptr = txbuf->data + fpdu_len - 4;
+	mpacrc = *(u32 *)crcptr;
+	if (ieq->check_crc) {
+		status = i40iw_ieq_check_mpacrc(ieq->hash_desc, txbuf->data,
+						(fpdu_len - 4), mpacrc);
+		if (status) {
+			i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
+				    "%s: error bad crc\n", __func__);
+			goto error;
+		}
+	}
+
+	i40iw_debug_buf(ieq->dev, I40IW_DEBUG_IEQ, "IEQ TX BUFFER",
+			txbuf->mem.va, txbuf->totallen);
+	i40iw_puda_send_buf(ieq, txbuf);
+	pfpdu->rcv_nxt = seqnum + fpdu_len;
+	return status;
+ error:
+	while (!list_empty(&pbufl)) {
+		buf = (struct i40iw_puda_buf *)(pbufl.prev);
+		list_del(&buf->list);
+		list_add(&buf->list, rxlist);
+	}
+	if (txbuf)
+		i40iw_puda_ret_bufpool(ieq, txbuf);
+	return status;
+}
+
+/**
+ * i40iw_ieq_process_buf - process buffer rcvd for ieq
+ * @ieq: ieq resource
+ * @pfpdu: partial management per user qp
+ * @buf: receive buffer
+ */
+static enum i40iw_status_code i40iw_ieq_process_buf(struct i40iw_puda_rsrc *ieq,
+						    struct i40iw_pfpdu *pfpdu,
+						    struct i40iw_puda_buf *buf)
+{
+	u16 fpdu_len = 0;
+	u16 datalen = buf->datalen;
+	u8 *datap = buf->data;
+	u8 *crcptr;
+	u16 ioffset = 0;
+	u32 mpacrc;
+	u32 seqnum = buf->seqnum;
+	u16 length = 0;
+	u16 full = 0;
+	bool partial = false;
+	struct i40iw_puda_buf *txbuf;
+	struct list_head *rxlist = &pfpdu->rxlist;
+	enum i40iw_status_code ret = 0;
+	enum i40iw_status_code status = 0;
+
+	ioffset = (u16)(buf->data - (u8 *)buf->mem.va);
+	while (datalen) {
+		fpdu_len = i40iw_ieq_get_fpdu_length(ntohs(*(u16 *)datap));
+		if (fpdu_len > pfpdu->max_fpdu_data) {
+			i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
+				    "%s: error bad fpdu_len\n", __func__);
+			status = I40IW_ERR_MPA_CRC;
+			list_add(&buf->list, rxlist);
+			return status;
+		}
+
+		if (datalen < fpdu_len) {
+			partial = true;
+			break;
+		}
+		crcptr = datap + fpdu_len - 4;
+		mpacrc = *(u32 *)crcptr;
+		if (ieq->check_crc)
+			ret = i40iw_ieq_check_mpacrc(ieq->hash_desc,
+						     datap, fpdu_len - 4, mpacrc);
+		if (ret) {
+			status = I40IW_ERR_MPA_CRC;
+			list_add(&buf->list, rxlist);
+			return status;
+		}
+		full++;
+		pfpdu->fpdu_processed++;
+		datap += fpdu_len;
+		length += fpdu_len;
+		datalen -= fpdu_len;
+	}
+	if (full) {
+		/* copy full pdu's in the txbuf and send them out */
+		txbuf = i40iw_puda_get_bufpool(ieq);
+		if (!txbuf) {
+			pfpdu->no_tx_bufs++;
+			status = I40IW_ERR_NO_TXBUFS;
+			list_add(&buf->list, rxlist);
+			return status;
+		}
+		/* modify txbuf's buffer header */
+		i40iw_ieq_setup_tx_buf(buf, txbuf);
+		/* copy full fpdu's to new buffer */
+		i40iw_ieq_copy_to_txbuf(buf, txbuf, ioffset, buf->hdrlen,
+					length);
+		txbuf->totallen = buf->hdrlen + length;
+
+		i40iw_ieq_update_tcpip_info(txbuf, length, buf->seqnum);
+		i40iw_puda_send_buf(ieq, txbuf);
+
+		if (!datalen) {
+			pfpdu->rcv_nxt = buf->seqnum + length;
+			i40iw_puda_ret_bufpool(ieq, buf);
+			return status;
+		}
+		buf->data = datap;
+		buf->seqnum = seqnum + length;
+		buf->datalen = datalen;
+		pfpdu->rcv_nxt = buf->seqnum;
+	}
+	if (partial)
+		status = i40iw_ieq_handle_partial(ieq, pfpdu, buf, fpdu_len);
+
+	return status;
+}
+
+/**
+ * i40iw_ieq_process_fpdus - process fpdu's buffers on its list
+ * @qp: qp for which partial fpdus
+ * @ieq: ieq resource
+ */
+static void i40iw_ieq_process_fpdus(struct i40iw_sc_qp *qp,
+				    struct i40iw_puda_rsrc *ieq)
+{
+	struct i40iw_pfpdu *pfpdu = &qp->pfpdu;
+	struct list_head *rxlist = &pfpdu->rxlist;
+	struct i40iw_puda_buf *buf;
+	enum i40iw_status_code status;
+
+	do {
+		if (list_empty(rxlist))
+			break;
+		buf = i40iw_puda_get_listbuf(rxlist);
+		if (!buf) {
+			i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
+				    "%s: error no buf\n", __func__);
+			break;
+		}
+		if (buf->seqnum != pfpdu->rcv_nxt) {
+			/* This could be out of order or missing packet */
+			pfpdu->out_of_order++;
+			list_add(&buf->list, rxlist);
+			break;
+		}
+		/* keep processing buffers from the head of the list */
+		status = i40iw_ieq_process_buf(ieq, pfpdu, buf);
+		if (status == I40IW_ERR_MPA_CRC) {
+			pfpdu->mpa_crc_err = true;
+			while (!list_empty(rxlist)) {
+				buf = i40iw_puda_get_listbuf(rxlist);
+				i40iw_puda_ret_bufpool(ieq, buf);
+				pfpdu->crc_err++;
+			}
+			/* create CQP for AE */
+			i40iw_ieq_mpa_crc_ae(ieq->dev, qp);
+		}
+	} while (!status);
+}
+
+/**
+ * i40iw_ieq_handle_exception - handle qp's exception
+ * @ieq: ieq resource
+ * @qp: qp receiving excpetion
+ * @buf: receive buffer
+ */
+static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq,
+				       struct i40iw_sc_qp *qp,
+				       struct i40iw_puda_buf *buf)
+{
+	struct i40iw_puda_buf *tmpbuf = NULL;
+	struct i40iw_pfpdu *pfpdu = &qp->pfpdu;
+	u32 *hw_host_ctx = (u32 *)qp->hw_host_ctx;
+	u32 rcv_wnd = hw_host_ctx[23];
+	/* first partial seq # in q2 */
+	u32 fps = qp->q2_buf[16];
+	struct list_head *rxlist = &pfpdu->rxlist;
+	struct list_head *plist;
+
+	pfpdu->total_ieq_bufs++;
+
+	if (pfpdu->mpa_crc_err) {
+		pfpdu->crc_err++;
+		goto error;
+	}
+	if (pfpdu->mode && (fps != pfpdu->fps)) {
+		/* clean up qp as it is new partial sequence */
+		i40iw_ieq_cleanup_qp(ieq->dev, qp);
+		i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
+			    "%s: restarting new partial\n", __func__);
+		pfpdu->mode = false;
+	}
+
+	if (!pfpdu->mode) {
+		i40iw_debug_buf(ieq->dev, I40IW_DEBUG_IEQ, "Q2 BUFFER", (u64 *)qp->q2_buf, 128);
+		/* First_Partial_Sequence_Number check */
+		pfpdu->rcv_nxt = fps;
+		pfpdu->fps = fps;
+		pfpdu->mode = true;
+		pfpdu->max_fpdu_data = ieq->mss;
+		pfpdu->pmode_count++;
+		INIT_LIST_HEAD(rxlist);
+		i40iw_ieq_check_first_buf(buf, fps);
+	}
+
+	if (!(rcv_wnd >= (buf->seqnum - pfpdu->rcv_nxt))) {
+		pfpdu->bad_seq_num++;
+		goto error;
+	}
+
+	if (!list_empty(rxlist)) {
+		tmpbuf = (struct i40iw_puda_buf *)rxlist->next;
+		plist = &tmpbuf->list;
+		while ((struct list_head *)tmpbuf != rxlist) {
+			if ((int)(buf->seqnum - tmpbuf->seqnum) < 0)
+				break;
+			tmpbuf = (struct i40iw_puda_buf *)plist->next;
+		}
+		/* Insert buf before tmpbuf */
+		list_add_tail(&buf->list, &tmpbuf->list);
+	} else {
+		list_add_tail(&buf->list, rxlist);
+	}
+	i40iw_ieq_process_fpdus(qp, ieq);
+	return;
+ error:
+	i40iw_puda_ret_bufpool(ieq, buf);
+}
+
+/**
+ * i40iw_ieq_receive - received exception buffer
+ * @dev: iwarp device
+ * @buf: exception buffer received
+ */
+static void i40iw_ieq_receive(struct i40iw_sc_dev *dev,
+			      struct i40iw_puda_buf *buf)
+{
+	struct i40iw_puda_rsrc *ieq = dev->ieq;
+	struct i40iw_sc_qp *qp = NULL;
+	u32 wqe_idx = ieq->compl_rxwqe_idx;
+
+	qp = i40iw_ieq_get_qp(dev, buf);
+	if (!qp) {
+		ieq->stats_bad_qp_id++;
+		i40iw_puda_ret_bufpool(ieq, buf);
+	} else {
+		i40iw_ieq_handle_exception(ieq, qp, buf);
+	}
+	/*
+	 * ieq->rx_wqe_idx is used by i40iw_puda_replenish_rq()
+	 * on which wqe_idx to start replenish rq
+	 */
+	if (!ieq->rxq_invalid_cnt)
+		ieq->rx_wqe_idx = wqe_idx;
+	ieq->rxq_invalid_cnt++;
+}
+
+/**
+ * i40iw_ieq_tx_compl - put back after sending completed exception buffer
+ * @dev: iwarp device
+ * @sqwrid: pointer to puda buffer
+ */
+static void i40iw_ieq_tx_compl(struct i40iw_sc_dev *dev, void *sqwrid)
+{
+	struct i40iw_puda_rsrc *ieq = dev->ieq;
+	struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)sqwrid;
+
+	i40iw_puda_ret_bufpool(ieq, buf);
+	if (!list_empty(&ieq->txpend)) {
+		buf = i40iw_puda_get_listbuf(&ieq->txpend);
+		i40iw_puda_send_buf(ieq, buf);
+	}
+}
+
+/**
+ * i40iw_ieq_cleanup_qp - qp is being destroyed
+ * @dev: iwarp device
+ * @qp: all pending fpdu buffers
+ */
+void i40iw_ieq_cleanup_qp(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
+{
+	struct i40iw_puda_buf *buf;
+	struct i40iw_pfpdu *pfpdu = &qp->pfpdu;
+	struct list_head *rxlist = &pfpdu->rxlist;
+	struct i40iw_puda_rsrc *ieq = dev->ieq;
+
+	if (!pfpdu->mode)
+		return;
+	while (!list_empty(rxlist)) {
+		buf = i40iw_puda_get_listbuf(rxlist);
+		i40iw_puda_ret_bufpool(ieq, buf);
+	}
+}

diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.h b/drivers/infiniband/hw/i40iw/i40iw_puda.h
new file mode 100644
index 0000000..52bf782
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.h

@@ -0,0 +1,183 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_PUDA_H
+#define I40IW_PUDA_H
+
+#define I40IW_IEQ_MPA_FRAMING 6
+
+struct i40iw_sc_dev;
+struct i40iw_sc_qp;
+struct i40iw_sc_cq;
+
+enum puda_resource_type {
+	I40IW_PUDA_RSRC_TYPE_ILQ = 1,
+	I40IW_PUDA_RSRC_TYPE_IEQ
+};
+
+enum puda_rsrc_complete {
+	PUDA_CQ_CREATED = 1,
+	PUDA_QP_CREATED,
+	PUDA_TX_COMPLETE,
+	PUDA_RX_COMPLETE,
+	PUDA_HASH_CRC_COMPLETE
+};
+
+struct i40iw_puda_completion_info {
+	struct i40iw_qp_uk *qp;
+	u8 q_type;
+	u8 vlan_valid;
+	u8 l3proto;
+	u8 l4proto;
+	u16 payload_len;
+	u32 compl_error;	/* No_err=0, else major and minor err code */
+	u32 qp_id;
+	u32 wqe_idx;
+};
+
+struct i40iw_puda_send_info {
+	u64 paddr;		/* Physical address */
+	u32 len;
+	u8 tcplen;
+	u8 maclen;
+	bool ipv4;
+	bool doloopback;
+	void *scratch;
+};
+
+struct i40iw_puda_buf {
+	struct list_head list;	/* MUST be first entry */
+	struct i40iw_dma_mem mem;	/* DMA memory for the buffer */
+	struct i40iw_puda_buf *next;	/* for alloclist in rsrc struct */
+	struct i40iw_virt_mem buf_mem;	/* Buffer memory for this buffer */
+	void *scratch;
+	u8 *iph;
+	u8 *tcph;
+	u8 *data;
+	u16 datalen;
+	u16 vlan_id;
+	u8 tcphlen;		/* tcp length in bytes */
+	u8 maclen;		/* mac length in bytes */
+	u32 totallen;		/* machlen+iphlen+tcphlen+datalen */
+	atomic_t refcount;
+	u8 hdrlen;
+	bool ipv4;
+	u32 seqnum;
+};
+
+struct i40iw_puda_rsrc_info {
+	enum puda_resource_type type;	/* ILQ or IEQ */
+	u32 count;
+	u16 pd_id;
+	u32 cq_id;
+	u32 qp_id;
+	u32 sq_size;
+	u32 rq_size;
+	u16 buf_size;
+	u16 mss;
+	u32 tx_buf_cnt;		/* total bufs allocated will be rq_size + tx_buf_cnt */
+	void (*receive)(struct i40iw_sc_dev *, struct i40iw_puda_buf *);
+	void (*xmit_complete)(struct i40iw_sc_dev *, void *);
+};
+
+struct i40iw_puda_rsrc {
+	struct i40iw_sc_cq cq;
+	struct i40iw_sc_qp qp;
+	struct i40iw_sc_pd sc_pd;
+	struct i40iw_sc_dev *dev;
+	struct i40iw_dma_mem cqmem;
+	struct i40iw_dma_mem qpmem;
+	struct i40iw_virt_mem ilq_mem;
+	enum puda_rsrc_complete completion;
+	enum puda_resource_type type;
+	u16 buf_size;		/*buffer must be max datalen + tcpip hdr + mac */
+	u16 mss;
+	u32 cq_id;
+	u32 qp_id;
+	u32 sq_size;
+	u32 rq_size;
+	u32 cq_size;
+	struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array;
+	u64 *rq_wrid_array;
+	u32 compl_rxwqe_idx;
+	u32 rx_wqe_idx;
+	u32 rxq_invalid_cnt;
+	u32 tx_wqe_avail_cnt;
+	bool check_crc;
+	struct shash_desc *hash_desc;
+	struct list_head txpend;
+	struct list_head bufpool;	/* free buffers pool list for recv and xmit */
+	u32 alloc_buf_count;
+	u32 avail_buf_count;		/* snapshot of currently available buffers */
+	spinlock_t bufpool_lock;
+	struct i40iw_puda_buf *alloclist;
+	void (*receive)(struct i40iw_sc_dev *, struct i40iw_puda_buf *);
+	void (*xmit_complete)(struct i40iw_sc_dev *, void *);
+	/* puda stats */
+	u64 stats_buf_alloc_fail;
+	u64 stats_pkt_rcvd;
+	u64 stats_pkt_sent;
+	u64 stats_rcvd_pkt_err;
+	u64 stats_sent_pkt_q;
+	u64 stats_bad_qp_id;
+};
+
+struct i40iw_puda_buf *i40iw_puda_get_bufpool(struct i40iw_puda_rsrc *rsrc);
+void i40iw_puda_ret_bufpool(struct i40iw_puda_rsrc *rsrc,
+			    struct i40iw_puda_buf *buf);
+void i40iw_puda_send_buf(struct i40iw_puda_rsrc *rsrc,
+			 struct i40iw_puda_buf *buf);
+enum i40iw_status_code i40iw_puda_send(struct i40iw_sc_qp *qp,
+				       struct i40iw_puda_send_info *info);
+enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev,
+					      struct i40iw_puda_rsrc_info *info);
+void i40iw_puda_dele_resources(struct i40iw_sc_dev *dev,
+			       enum puda_resource_type type,
+			       bool reset);
+enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev,
+						  struct i40iw_sc_cq *cq, u32 *compl_err);
+void i40iw_ieq_cleanup_qp(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
+
+struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev,
+				     struct i40iw_puda_buf *buf);
+enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_info *info,
+						 struct i40iw_puda_buf *buf);
+enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc,
+					      void *addr, u32 length, u32 value);
+enum i40iw_status_code i40iw_init_hash_desc(struct shash_desc **desc);
+void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
+void i40iw_free_hash_desc(struct shash_desc *desc);
+void i40iw_ieq_update_tcpip_info(struct i40iw_puda_buf *buf, u16 length,
+				 u32 seqnum);
+#endif

diff --git a/drivers/infiniband/hw/i40iw/i40iw_register.h b/drivers/infiniband/hw/i40iw/i40iw_register.h
new file mode 100644
index 0000000..5776818
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_register.h

@@ -0,0 +1,1030 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_REGISTER_H
+#define I40IW_REGISTER_H
+
+#define I40E_GLGEN_STAT               0x000B612C /* Reset: POR */
+
+#define I40E_PFHMC_PDINV               0x000C0300 /* Reset: PFR */
+#define I40E_PFHMC_PDINV_PMSDIDX_SHIFT 0
+#define I40E_PFHMC_PDINV_PMSDIDX_MASK  (0xFFF <<  I40E_PFHMC_PDINV_PMSDIDX_SHIFT)
+#define I40E_PFHMC_PDINV_PMPDIDX_SHIFT 16
+#define I40E_PFHMC_PDINV_PMPDIDX_MASK  (0x1FF <<  I40E_PFHMC_PDINV_PMPDIDX_SHIFT)
+#define I40E_PFHMC_SDCMD_PMSDWR_SHIFT  31
+#define I40E_PFHMC_SDCMD_PMSDWR_MASK   (0x1 <<  I40E_PFHMC_SDCMD_PMSDWR_SHIFT)
+#define I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT   0
+#define I40E_PFHMC_SDDATALOW_PMSDVALID_MASK    (0x1 <<  I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT)
+#define I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT    1
+#define I40E_PFHMC_SDDATALOW_PMSDTYPE_MASK     (0x1 <<  I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT)
+#define I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT 2
+#define I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_MASK  (0x3FF <<  I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT)
+
+#define I40E_PFINT_DYN_CTLN(_INTPF) (0x00034800 + ((_INTPF) * 4)) /* _i=0...511 */	/* Reset: PFR */
+#define I40E_PFINT_DYN_CTLN_INTENA_SHIFT          0
+#define I40E_PFINT_DYN_CTLN_INTENA_MASK           (0x1 <<  I40E_PFINT_DYN_CTLN_INTENA_SHIFT)
+#define I40E_PFINT_DYN_CTLN_CLEARPBA_SHIFT        1
+#define I40E_PFINT_DYN_CTLN_CLEARPBA_MASK         (0x1 <<  I40E_PFINT_DYN_CTLN_CLEARPBA_SHIFT)
+#define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT        3
+#define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK         (0x3 <<  I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT)
+
+#define I40E_VFINT_DYN_CTLN1(_INTVF)               (0x00003800 + ((_INTVF) * 4)) /* _i=0...15 */ /* Reset: VFR */
+#define I40E_GLHMC_VFPDINV(_i)               (0x000C8300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+
+#define I40E_PFHMC_PDINV_PMSDPARTSEL_SHIFT 15
+#define I40E_PFHMC_PDINV_PMSDPARTSEL_MASK  (0x1 <<  I40E_PFHMC_PDINV_PMSDPARTSEL_SHIFT)
+#define I40E_GLPCI_LBARCTRL                    0x000BE484 /* Reset: POR */
+#define I40E_GLPCI_LBARCTRL_PE_DB_SIZE_SHIFT    4
+#define I40E_GLPCI_LBARCTRL_PE_DB_SIZE_MASK     (0x3 <<  I40E_GLPCI_LBARCTRL_PE_DB_SIZE_SHIFT)
+#define I40E_GLPCI_DREVID			0x0009C480 /* Reset: PCIR */
+#define I40E_GLPCI_DREVID_DEFAULT_REVID_SHIFT 0
+#define I40E_GLPCI_DREVID_DEFAULT_REVID_MASK 0xFF
+
+#define I40E_PFPE_AEQALLOC               0x00131180 /* Reset: PFR */
+#define I40E_PFPE_AEQALLOC_AECOUNT_SHIFT 0
+#define I40E_PFPE_AEQALLOC_AECOUNT_MASK  (0xFFFFFFFF <<  I40E_PFPE_AEQALLOC_AECOUNT_SHIFT)
+#define I40E_PFPE_CCQPHIGH                  0x00008200 /* Reset: PFR */
+#define I40E_PFPE_CCQPHIGH_PECCQPHIGH_SHIFT 0
+#define I40E_PFPE_CCQPHIGH_PECCQPHIGH_MASK  (0xFFFFFFFF <<  I40E_PFPE_CCQPHIGH_PECCQPHIGH_SHIFT)
+#define I40E_PFPE_CCQPLOW                 0x00008180 /* Reset: PFR */
+#define I40E_PFPE_CCQPLOW_PECCQPLOW_SHIFT 0
+#define I40E_PFPE_CCQPLOW_PECCQPLOW_MASK  (0xFFFFFFFF <<  I40E_PFPE_CCQPLOW_PECCQPLOW_SHIFT)
+#define I40E_PFPE_CCQPSTATUS                   0x00008100 /* Reset: PFR */
+#define I40E_PFPE_CCQPSTATUS_CCQP_DONE_SHIFT   0
+#define I40E_PFPE_CCQPSTATUS_CCQP_DONE_MASK    (0x1 <<  I40E_PFPE_CCQPSTATUS_CCQP_DONE_SHIFT)
+#define I40E_PFPE_CCQPSTATUS_HMC_PROFILE_SHIFT 4
+#define I40E_PFPE_CCQPSTATUS_HMC_PROFILE_MASK  (0x7 <<  I40E_PFPE_CCQPSTATUS_HMC_PROFILE_SHIFT)
+#define I40E_PFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT 16
+#define I40E_PFPE_CCQPSTATUS_RDMA_EN_VFS_MASK  (0x3F <<  I40E_PFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT)
+#define I40E_PFPE_CCQPSTATUS_CCQP_ERR_SHIFT    31
+#define I40E_PFPE_CCQPSTATUS_CCQP_ERR_MASK     (0x1 <<  I40E_PFPE_CCQPSTATUS_CCQP_ERR_SHIFT)
+#define I40E_PFPE_CQACK              0x00131100 /* Reset: PFR */
+#define I40E_PFPE_CQACK_PECQID_SHIFT 0
+#define I40E_PFPE_CQACK_PECQID_MASK  (0x1FFFF <<  I40E_PFPE_CQACK_PECQID_SHIFT)
+#define I40E_PFPE_CQARM              0x00131080 /* Reset: PFR */
+#define I40E_PFPE_CQARM_PECQID_SHIFT 0
+#define I40E_PFPE_CQARM_PECQID_MASK  (0x1FFFF <<  I40E_PFPE_CQARM_PECQID_SHIFT)
+#define I40E_PFPE_CQPDB              0x00008000 /* Reset: PFR */
+#define I40E_PFPE_CQPDB_WQHEAD_SHIFT 0
+#define I40E_PFPE_CQPDB_WQHEAD_MASK  (0x7FF <<  I40E_PFPE_CQPDB_WQHEAD_SHIFT)
+#define I40E_PFPE_CQPERRCODES                      0x00008880 /* Reset: PFR */
+#define I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT 0
+#define I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_MASK  (0xFFFF <<  I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT)
+#define I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT 16
+#define I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_MASK  (0xFFFF <<  I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT)
+#define I40E_PFPE_CQPTAIL                  0x00008080 /* Reset: PFR */
+#define I40E_PFPE_CQPTAIL_WQTAIL_SHIFT     0
+#define I40E_PFPE_CQPTAIL_WQTAIL_MASK      (0x7FF <<  I40E_PFPE_CQPTAIL_WQTAIL_SHIFT)
+#define I40E_PFPE_CQPTAIL_CQP_OP_ERR_SHIFT 31
+#define I40E_PFPE_CQPTAIL_CQP_OP_ERR_MASK  (0x1 <<  I40E_PFPE_CQPTAIL_CQP_OP_ERR_SHIFT)
+#define I40E_PFPE_FLMQ1ALLOCERR                   0x00008980 /* Reset: PFR */
+#define I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_SHIFT 0
+#define I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_MASK  (0xFFFF <<  I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_SHIFT)
+#define I40E_PFPE_FLMXMITALLOCERR                   0x00008900 /* Reset: PFR */
+#define I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_SHIFT 0
+#define I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_MASK  (0xFFFF <<  I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_SHIFT)
+#define I40E_PFPE_IPCONFIG0                        0x00008280 /* Reset: PFR */
+#define I40E_PFPE_IPCONFIG0_PEIPID_SHIFT           0
+#define I40E_PFPE_IPCONFIG0_PEIPID_MASK            (0xFFFF <<  I40E_PFPE_IPCONFIG0_PEIPID_SHIFT)
+#define I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT 16
+#define I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_MASK  (0x1 <<  I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT)
+#define I40E_PFPE_MRTEIDXMASK                       0x00008600 /* Reset: PFR */
+#define I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT 0
+#define I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_MASK  (0x1F <<  I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT)
+#define I40E_PFPE_RCVUNEXPECTEDERROR                        0x00008680 /* Reset: PFR */
+#define I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT 0
+#define I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_MASK  (0xFFFFFF <<  I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT)
+#define I40E_PFPE_TCPNOWTIMER               0x00008580 /* Reset: PFR */
+#define I40E_PFPE_TCPNOWTIMER_TCP_NOW_SHIFT 0
+#define I40E_PFPE_TCPNOWTIMER_TCP_NOW_MASK  (0xFFFFFFFF <<  I40E_PFPE_TCPNOWTIMER_TCP_NOW_SHIFT)
+
+#define I40E_PFPE_WQEALLOC                      0x00138C00 /* Reset: PFR */
+#define I40E_PFPE_WQEALLOC_PEQPID_SHIFT         0
+#define I40E_PFPE_WQEALLOC_PEQPID_MASK          (0x3FFFF <<  I40E_PFPE_WQEALLOC_PEQPID_SHIFT)
+#define I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT 20
+#define I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_MASK  (0xFFF <<  I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT)
+
+#define I40E_VFPE_AEQALLOC(_VF)          (0x00130C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_AEQALLOC_MAX_INDEX     127
+#define I40E_VFPE_AEQALLOC_AECOUNT_SHIFT 0
+#define I40E_VFPE_AEQALLOC_AECOUNT_MASK  (0xFFFFFFFF <<  I40E_VFPE_AEQALLOC_AECOUNT_SHIFT)
+#define I40E_VFPE_CCQPHIGH(_VF)             (0x00001000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CCQPHIGH_MAX_INDEX        127
+#define I40E_VFPE_CCQPHIGH_PECCQPHIGH_SHIFT 0
+#define I40E_VFPE_CCQPHIGH_PECCQPHIGH_MASK  (0xFFFFFFFF <<  I40E_VFPE_CCQPHIGH_PECCQPHIGH_SHIFT)
+#define I40E_VFPE_CCQPLOW(_VF)            (0x00000C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CCQPLOW_MAX_INDEX       127
+#define I40E_VFPE_CCQPLOW_PECCQPLOW_SHIFT 0
+#define I40E_VFPE_CCQPLOW_PECCQPLOW_MASK  (0xFFFFFFFF <<  I40E_VFPE_CCQPLOW_PECCQPLOW_SHIFT)
+#define I40E_VFPE_CCQPSTATUS(_VF)              (0x00000800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CCQPSTATUS_MAX_INDEX         127
+#define I40E_VFPE_CCQPSTATUS_CCQP_DONE_SHIFT   0
+#define I40E_VFPE_CCQPSTATUS_CCQP_DONE_MASK    (0x1 <<  I40E_VFPE_CCQPSTATUS_CCQP_DONE_SHIFT)
+#define I40E_VFPE_CCQPSTATUS_HMC_PROFILE_SHIFT 4
+#define I40E_VFPE_CCQPSTATUS_HMC_PROFILE_MASK  (0x7 <<  I40E_VFPE_CCQPSTATUS_HMC_PROFILE_SHIFT)
+#define I40E_VFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT 16
+#define I40E_VFPE_CCQPSTATUS_RDMA_EN_VFS_MASK  (0x3F <<  I40E_VFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT)
+#define I40E_VFPE_CCQPSTATUS_CCQP_ERR_SHIFT    31
+#define I40E_VFPE_CCQPSTATUS_CCQP_ERR_MASK     (0x1 <<  I40E_VFPE_CCQPSTATUS_CCQP_ERR_SHIFT)
+#define I40E_VFPE_CQACK(_VF)         (0x00130800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CQACK_MAX_INDEX    127
+#define I40E_VFPE_CQACK_PECQID_SHIFT 0
+#define I40E_VFPE_CQACK_PECQID_MASK  (0x1FFFF <<  I40E_VFPE_CQACK_PECQID_SHIFT)
+#define I40E_VFPE_CQARM(_VF)         (0x00130400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CQARM_MAX_INDEX    127
+#define I40E_VFPE_CQARM_PECQID_SHIFT 0
+#define I40E_VFPE_CQARM_PECQID_MASK  (0x1FFFF <<  I40E_VFPE_CQARM_PECQID_SHIFT)
+#define I40E_VFPE_CQPDB(_VF)         (0x00000000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CQPDB_MAX_INDEX    127
+#define I40E_VFPE_CQPDB_WQHEAD_SHIFT 0
+#define I40E_VFPE_CQPDB_WQHEAD_MASK  (0x7FF <<  I40E_VFPE_CQPDB_WQHEAD_SHIFT)
+#define I40E_VFPE_CQPERRCODES(_VF)                 (0x00001800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CQPERRCODES_MAX_INDEX            127
+#define I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT 0
+#define I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_MASK  (0xFFFF <<  I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT)
+#define I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT 16
+#define I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_MASK  (0xFFFF <<  I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT)
+#define I40E_VFPE_CQPTAIL(_VF)             (0x00000400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CQPTAIL_MAX_INDEX        127
+#define I40E_VFPE_CQPTAIL_WQTAIL_SHIFT     0
+#define I40E_VFPE_CQPTAIL_WQTAIL_MASK      (0x7FF <<  I40E_VFPE_CQPTAIL_WQTAIL_SHIFT)
+#define I40E_VFPE_CQPTAIL_CQP_OP_ERR_SHIFT 31
+#define I40E_VFPE_CQPTAIL_CQP_OP_ERR_MASK  (0x1 <<  I40E_VFPE_CQPTAIL_CQP_OP_ERR_SHIFT)
+#define I40E_VFPE_IPCONFIG0(_VF)                   (0x00001400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_IPCONFIG0_MAX_INDEX              127
+#define I40E_VFPE_IPCONFIG0_PEIPID_SHIFT           0
+#define I40E_VFPE_IPCONFIG0_PEIPID_MASK            (0xFFFF <<  I40E_VFPE_IPCONFIG0_PEIPID_SHIFT)
+#define I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT 16
+#define I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_MASK  (0x1 <<  I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT)
+#define I40E_VFPE_MRTEIDXMASK(_VF)                  (0x00003000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_MRTEIDXMASK_MAX_INDEX             127
+#define I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT 0
+#define I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_MASK  (0x1F <<  I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT)
+#define I40E_VFPE_RCVUNEXPECTEDERROR(_VF)                   (0x00003400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_RCVUNEXPECTEDERROR_MAX_INDEX              127
+#define I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT 0
+#define I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_MASK  (0xFFFFFF <<  I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT)
+#define I40E_VFPE_TCPNOWTIMER(_VF)          (0x00002C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_TCPNOWTIMER_MAX_INDEX     127
+#define I40E_VFPE_TCPNOWTIMER_TCP_NOW_SHIFT 0
+#define I40E_VFPE_TCPNOWTIMER_TCP_NOW_MASK  (0xFFFFFFFF <<  I40E_VFPE_TCPNOWTIMER_TCP_NOW_SHIFT)
+#define I40E_VFPE_WQEALLOC(_VF)                 (0x00138000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_WQEALLOC_MAX_INDEX            127
+#define I40E_VFPE_WQEALLOC_PEQPID_SHIFT         0
+#define I40E_VFPE_WQEALLOC_PEQPID_MASK          (0x3FFFF <<  I40E_VFPE_WQEALLOC_PEQPID_SHIFT)
+#define I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT 20
+#define I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_MASK  (0xFFF <<  I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT)
+
+#define I40E_GLPE_CPUSTATUS0                    0x0000D040 /* Reset: PE_CORER */
+#define I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_SHIFT 0
+#define I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_MASK  (0xFFFFFFFF <<  I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_SHIFT)
+#define I40E_GLPE_CPUSTATUS1                    0x0000D044 /* Reset: PE_CORER */
+#define I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_SHIFT 0
+#define I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_MASK  (0xFFFFFFFF <<  I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_SHIFT)
+#define I40E_GLPE_CPUSTATUS2                    0x0000D048 /* Reset: PE_CORER */
+#define I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_SHIFT 0
+#define I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_MASK  (0xFFFFFFFF <<  I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_SHIFT)
+#define I40E_GLPE_CPUTRIG0                   0x0000D060 /* Reset: PE_CORER */
+#define I40E_GLPE_CPUTRIG0_PECPUTRIG0_SHIFT  0
+#define I40E_GLPE_CPUTRIG0_PECPUTRIG0_MASK   (0xFFFF <<  I40E_GLPE_CPUTRIG0_PECPUTRIG0_SHIFT)
+#define I40E_GLPE_CPUTRIG0_TEPREQUEST0_SHIFT 17
+#define I40E_GLPE_CPUTRIG0_TEPREQUEST0_MASK  (0x1 <<  I40E_GLPE_CPUTRIG0_TEPREQUEST0_SHIFT)
+#define I40E_GLPE_CPUTRIG0_OOPREQUEST0_SHIFT 18
+#define I40E_GLPE_CPUTRIG0_OOPREQUEST0_MASK  (0x1 <<  I40E_GLPE_CPUTRIG0_OOPREQUEST0_SHIFT)
+#define I40E_GLPE_DUAL40_RUPM                     0x0000DA04 /* Reset: PE_CORER */
+#define I40E_GLPE_DUAL40_RUPM_DUAL_40G_MODE_SHIFT 0
+#define I40E_GLPE_DUAL40_RUPM_DUAL_40G_MODE_MASK  (0x1 <<  I40E_GLPE_DUAL40_RUPM_DUAL_40G_MODE_SHIFT)
+#define I40E_GLPE_PFAEQEDROPCNT(_i)               (0x00131440 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLPE_PFAEQEDROPCNT_MAX_INDEX         15
+#define I40E_GLPE_PFAEQEDROPCNT_AEQEDROPCNT_SHIFT 0
+#define I40E_GLPE_PFAEQEDROPCNT_AEQEDROPCNT_MASK  (0xFFFF <<  I40E_GLPE_PFAEQEDROPCNT_AEQEDROPCNT_SHIFT)
+#define I40E_GLPE_PFCEQEDROPCNT(_i)               (0x001313C0 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLPE_PFCEQEDROPCNT_MAX_INDEX         15
+#define I40E_GLPE_PFCEQEDROPCNT_CEQEDROPCNT_SHIFT 0
+#define I40E_GLPE_PFCEQEDROPCNT_CEQEDROPCNT_MASK  (0xFFFF <<  I40E_GLPE_PFCEQEDROPCNT_CEQEDROPCNT_SHIFT)
+#define I40E_GLPE_PFCQEDROPCNT(_i)              (0x00131340 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLPE_PFCQEDROPCNT_MAX_INDEX        15
+#define I40E_GLPE_PFCQEDROPCNT_CQEDROPCNT_SHIFT 0
+#define I40E_GLPE_PFCQEDROPCNT_CQEDROPCNT_MASK  (0xFFFF <<  I40E_GLPE_PFCQEDROPCNT_CQEDROPCNT_SHIFT)
+#define I40E_GLPE_RUPM_CQPPOOL                0x0000DACC /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_CQPPOOL_CQPSPADS_SHIFT 0
+#define I40E_GLPE_RUPM_CQPPOOL_CQPSPADS_MASK  (0xFF <<  I40E_GLPE_RUPM_CQPPOOL_CQPSPADS_SHIFT)
+#define I40E_GLPE_RUPM_FLRPOOL                0x0000DAC4 /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_FLRPOOL_FLRSPADS_SHIFT 0
+#define I40E_GLPE_RUPM_FLRPOOL_FLRSPADS_MASK  (0xFF <<  I40E_GLPE_RUPM_FLRPOOL_FLRSPADS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL                   0x0000DA00 /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_GCTL_ALLOFFTH_SHIFT    0
+#define I40E_GLPE_RUPM_GCTL_ALLOFFTH_MASK     (0xFF <<  I40E_GLPE_RUPM_GCTL_ALLOFFTH_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_RUPM_P0_DIS_SHIFT 26
+#define I40E_GLPE_RUPM_GCTL_RUPM_P0_DIS_MASK  (0x1 <<  I40E_GLPE_RUPM_GCTL_RUPM_P0_DIS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_RUPM_P1_DIS_SHIFT 27
+#define I40E_GLPE_RUPM_GCTL_RUPM_P1_DIS_MASK  (0x1 <<  I40E_GLPE_RUPM_GCTL_RUPM_P1_DIS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_RUPM_P2_DIS_SHIFT 28
+#define I40E_GLPE_RUPM_GCTL_RUPM_P2_DIS_MASK  (0x1 <<  I40E_GLPE_RUPM_GCTL_RUPM_P2_DIS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_RUPM_P3_DIS_SHIFT 29
+#define I40E_GLPE_RUPM_GCTL_RUPM_P3_DIS_MASK  (0x1 <<  I40E_GLPE_RUPM_GCTL_RUPM_P3_DIS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_RUPM_DIS_SHIFT    30
+#define I40E_GLPE_RUPM_GCTL_RUPM_DIS_MASK     (0x1 <<  I40E_GLPE_RUPM_GCTL_RUPM_DIS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_SWLB_MODE_SHIFT   31
+#define I40E_GLPE_RUPM_GCTL_SWLB_MODE_MASK    (0x1 <<  I40E_GLPE_RUPM_GCTL_SWLB_MODE_SHIFT)
+#define I40E_GLPE_RUPM_PTXPOOL                0x0000DAC8 /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_PTXPOOL_PTXSPADS_SHIFT 0
+#define I40E_GLPE_RUPM_PTXPOOL_PTXSPADS_MASK  (0xFF <<  I40E_GLPE_RUPM_PTXPOOL_PTXSPADS_SHIFT)
+#define I40E_GLPE_RUPM_PUSHPOOL                 0x0000DAC0 /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_PUSHPOOL_PUSHSPADS_SHIFT 0
+#define I40E_GLPE_RUPM_PUSHPOOL_PUSHSPADS_MASK  (0xFF <<  I40E_GLPE_RUPM_PUSHPOOL_PUSHSPADS_SHIFT)
+#define I40E_GLPE_RUPM_TXHOST_EN                 0x0000DA08 /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_TXHOST_EN_TXHOST_EN_SHIFT 0
+#define I40E_GLPE_RUPM_TXHOST_EN_TXHOST_EN_MASK  (0x1 <<  I40E_GLPE_RUPM_TXHOST_EN_TXHOST_EN_SHIFT)
+#define I40E_GLPE_VFAEQEDROPCNT(_i)               (0x00132540 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLPE_VFAEQEDROPCNT_MAX_INDEX         31
+#define I40E_GLPE_VFAEQEDROPCNT_AEQEDROPCNT_SHIFT 0
+#define I40E_GLPE_VFAEQEDROPCNT_AEQEDROPCNT_MASK  (0xFFFF <<  I40E_GLPE_VFAEQEDROPCNT_AEQEDROPCNT_SHIFT)
+#define I40E_GLPE_VFCEQEDROPCNT(_i)               (0x00132440 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLPE_VFCEQEDROPCNT_MAX_INDEX         31
+#define I40E_GLPE_VFCEQEDROPCNT_CEQEDROPCNT_SHIFT 0
+#define I40E_GLPE_VFCEQEDROPCNT_CEQEDROPCNT_MASK  (0xFFFF <<  I40E_GLPE_VFCEQEDROPCNT_CEQEDROPCNT_SHIFT)
+#define I40E_GLPE_VFCQEDROPCNT(_i)              (0x00132340 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLPE_VFCQEDROPCNT_MAX_INDEX        31
+#define I40E_GLPE_VFCQEDROPCNT_CQEDROPCNT_SHIFT 0
+#define I40E_GLPE_VFCQEDROPCNT_CQEDROPCNT_MASK  (0xFFFF <<  I40E_GLPE_VFCQEDROPCNT_CQEDROPCNT_SHIFT)
+#define I40E_GLPE_VFFLMOBJCTRL(_i)                  (0x0000D400 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPE_VFFLMOBJCTRL_MAX_INDEX            31
+#define I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_SHIFT 0
+#define I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_MASK  (0x7 <<  I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_SHIFT)
+#define I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_SHIFT   8
+#define I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_MASK    (0x7 <<  I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_SHIFT)
+#define I40E_GLPE_VFFLMQ1ALLOCERR(_i)               (0x0000C700 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPE_VFFLMQ1ALLOCERR_MAX_INDEX         31
+#define I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_SHIFT 0
+#define I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_MASK  (0xFFFF <<  I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_SHIFT)
+#define I40E_GLPE_VFFLMXMITALLOCERR(_i)               (0x0000C600 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPE_VFFLMXMITALLOCERR_MAX_INDEX         31
+#define I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_SHIFT 0
+#define I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_MASK  (0xFFFF <<  I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_SHIFT)
+#define I40E_GLPE_VFUDACTRL(_i)                    (0x0000C000 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPE_VFUDACTRL_MAX_INDEX              31
+#define I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_SHIFT  0
+#define I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_MASK   (0x1 <<  I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_SHIFT)
+#define I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_SHIFT  1
+#define I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_MASK   (0x1 <<  I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_SHIFT)
+#define I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_SHIFT  2
+#define I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_MASK   (0x1 <<  I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_SHIFT)
+#define I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_SHIFT  3
+#define I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_MASK   (0x1 <<  I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_SHIFT)
+#define I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_SHIFT 4
+#define I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_MASK  (0x1 <<  I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_SHIFT)
+#define I40E_GLPE_VFUDAUCFBQPN(_i)         (0x0000C100 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPE_VFUDAUCFBQPN_MAX_INDEX   31
+#define I40E_GLPE_VFUDAUCFBQPN_QPN_SHIFT   0
+#define I40E_GLPE_VFUDAUCFBQPN_QPN_MASK    (0x3FFFF <<  I40E_GLPE_VFUDAUCFBQPN_QPN_SHIFT)
+#define I40E_GLPE_VFUDAUCFBQPN_VALID_SHIFT 31
+#define I40E_GLPE_VFUDAUCFBQPN_VALID_MASK  (0x1 <<  I40E_GLPE_VFUDAUCFBQPN_VALID_SHIFT)
+
+#define I40E_GLPES_PFIP4RXDISCARD(_i)                (0x00010600 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXDISCARD_MAX_INDEX          15
+#define I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_SHIFT 0
+#define I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_SHIFT)
+#define I40E_GLPES_PFIP4RXFRAGSHI(_i)                (0x00010804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXFRAGSHI_MAX_INDEX          15
+#define I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXFRAGSLO(_i)                (0x00010800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXFRAGSLO_MAX_INDEX          15
+#define I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXMCOCTSHI(_i)                 (0x00010A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXMCOCTSHI_MAX_INDEX           15
+#define I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXMCOCTSLO(_i)                 (0x00010A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXMCOCTSLO_MAX_INDEX           15
+#define I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXMCPKTSHI(_i)                 (0x00010C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXMCPKTSHI_MAX_INDEX           15
+#define I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXMCPKTSLO(_i)                 (0x00010C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXMCPKTSLO_MAX_INDEX           15
+#define I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXOCTSHI(_i)               (0x00010204 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXOCTSHI_MAX_INDEX         15
+#define I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXOCTSLO(_i)               (0x00010200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXOCTSLO_MAX_INDEX         15
+#define I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXPKTSHI(_i)               (0x00010404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXPKTSHI_MAX_INDEX         15
+#define I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXPKTSLO(_i)               (0x00010400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXPKTSLO_MAX_INDEX         15
+#define I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXTRUNC(_i)              (0x00010700 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXTRUNC_MAX_INDEX        15
+#define I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_SHIFT 0
+#define I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_SHIFT)
+#define I40E_GLPES_PFIP4TXFRAGSHI(_i)                (0x00011E04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXFRAGSHI_MAX_INDEX          15
+#define I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXFRAGSLO(_i)                (0x00011E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXFRAGSLO_MAX_INDEX          15
+#define I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT)
+#define I40E_GLPES_PFIP4TXMCOCTSHI(_i)                 (0x00012004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXMCOCTSHI_MAX_INDEX           15
+#define I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXMCOCTSLO(_i)                 (0x00012000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXMCOCTSLO_MAX_INDEX           15
+#define I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP4TXMCPKTSHI(_i)                 (0x00012204 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXMCPKTSHI_MAX_INDEX           15
+#define I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXMCPKTSLO(_i)                 (0x00012200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXMCPKTSLO_MAX_INDEX           15
+#define I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP4TXNOROUTE(_i)                (0x00012E00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXNOROUTE_MAX_INDEX          15
+#define I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT 0
+#define I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_MASK  (0xFFFFFF <<  I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT)
+#define I40E_GLPES_PFIP4TXOCTSHI(_i)               (0x00011A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXOCTSHI_MAX_INDEX         15
+#define I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXOCTSLO(_i)               (0x00011A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXOCTSLO_MAX_INDEX         15
+#define I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP4TXPKTSHI(_i)               (0x00011C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXPKTSHI_MAX_INDEX         15
+#define I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXPKTSLO(_i)               (0x00011C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXPKTSLO_MAX_INDEX         15
+#define I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXDISCARD(_i)                (0x00011200 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXDISCARD_MAX_INDEX          15
+#define I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_SHIFT 0
+#define I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_SHIFT)
+#define I40E_GLPES_PFIP6RXFRAGSHI(_i)                (0x00011404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXFRAGSHI_MAX_INDEX          15
+#define I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXFRAGSLO(_i)                (0x00011400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXFRAGSLO_MAX_INDEX          15
+#define I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXMCOCTSHI(_i)                 (0x00011604 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXMCOCTSHI_MAX_INDEX           15
+#define I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXMCOCTSLO(_i)                 (0x00011600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXMCOCTSLO_MAX_INDEX           15
+#define I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXMCPKTSHI(_i)                 (0x00011804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXMCPKTSHI_MAX_INDEX           15
+#define I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXMCPKTSLO(_i)                 (0x00011800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXMCPKTSLO_MAX_INDEX           15
+#define I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXOCTSHI(_i)               (0x00010E04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXOCTSHI_MAX_INDEX         15
+#define I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXOCTSLO(_i)               (0x00010E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXOCTSLO_MAX_INDEX         15
+#define I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXPKTSHI(_i)               (0x00011004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXPKTSHI_MAX_INDEX         15
+#define I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXPKTSLO(_i)               (0x00011000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXPKTSLO_MAX_INDEX         15
+#define I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXTRUNC(_i)              (0x00011300 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXTRUNC_MAX_INDEX        15
+#define I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_SHIFT 0
+#define I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_SHIFT)
+#define I40E_GLPES_PFIP6TXFRAGSHI(_i)                (0x00012804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXFRAGSHI_MAX_INDEX          15
+#define I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXFRAGSLO(_i)                (0x00012800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXFRAGSLO_MAX_INDEX          15
+#define I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT)
+#define I40E_GLPES_PFIP6TXMCOCTSHI(_i)                 (0x00012A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXMCOCTSHI_MAX_INDEX           15
+#define I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXMCOCTSLO(_i)                 (0x00012A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXMCOCTSLO_MAX_INDEX           15
+#define I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP6TXMCPKTSHI(_i)                 (0x00012C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXMCPKTSHI_MAX_INDEX           15
+#define I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXMCPKTSLO(_i)                 (0x00012C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXMCPKTSLO_MAX_INDEX           15
+#define I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP6TXNOROUTE(_i)                (0x00012F00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXNOROUTE_MAX_INDEX          15
+#define I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT 0
+#define I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_MASK  (0xFFFFFF <<  I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT)
+#define I40E_GLPES_PFIP6TXOCTSHI(_i)               (0x00012404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXOCTSHI_MAX_INDEX         15
+#define I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXOCTSLO(_i)               (0x00012400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXOCTSLO_MAX_INDEX         15
+#define I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP6TXPKTSHI(_i)               (0x00012604 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXPKTSHI_MAX_INDEX         15
+#define I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXPKTSLO(_i)               (0x00012600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXPKTSLO_MAX_INDEX         15
+#define I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT)
+#define I40E_GLPES_PFRDMARXRDSHI(_i)               (0x00013E04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXRDSHI_MAX_INDEX         15
+#define I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_SHIFT 0
+#define I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_MASK  (0xFFFF <<  I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_SHIFT)
+#define I40E_GLPES_PFRDMARXRDSLO(_i)               (0x00013E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXRDSLO_MAX_INDEX         15
+#define I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_SHIFT 0
+#define I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_SHIFT)
+#define I40E_GLPES_PFRDMARXSNDSHI(_i)                (0x00014004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXSNDSHI_MAX_INDEX          15
+#define I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT 0
+#define I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_MASK  (0xFFFF <<  I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT)
+#define I40E_GLPES_PFRDMARXSNDSLO(_i)                (0x00014000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXSNDSLO_MAX_INDEX          15
+#define I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT 0
+#define I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT)
+#define I40E_GLPES_PFRDMARXWRSHI(_i)               (0x00013C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXWRSHI_MAX_INDEX         15
+#define I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_SHIFT 0
+#define I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_MASK  (0xFFFF <<  I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_SHIFT)
+#define I40E_GLPES_PFRDMARXWRSLO(_i)               (0x00013C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXWRSLO_MAX_INDEX         15
+#define I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_SHIFT 0
+#define I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_SHIFT)
+#define I40E_GLPES_PFRDMATXRDSHI(_i)               (0x00014404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXRDSHI_MAX_INDEX         15
+#define I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_SHIFT 0
+#define I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_MASK  (0xFFFF <<  I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_SHIFT)
+#define I40E_GLPES_PFRDMATXRDSLO(_i)               (0x00014400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXRDSLO_MAX_INDEX         15
+#define I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_SHIFT 0
+#define I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_SHIFT)
+#define I40E_GLPES_PFRDMATXSNDSHI(_i)                (0x00014604 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXSNDSHI_MAX_INDEX          15
+#define I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT 0
+#define I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_MASK  (0xFFFF <<  I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT)
+#define I40E_GLPES_PFRDMATXSNDSLO(_i)                (0x00014600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXSNDSLO_MAX_INDEX          15
+#define I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT 0
+#define I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT)
+#define I40E_GLPES_PFRDMATXWRSHI(_i)               (0x00014204 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXWRSHI_MAX_INDEX         15
+#define I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_SHIFT 0
+#define I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_MASK  (0xFFFF <<  I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_SHIFT)
+#define I40E_GLPES_PFRDMATXWRSLO(_i)               (0x00014200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXWRSLO_MAX_INDEX         15
+#define I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_SHIFT 0
+#define I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_SHIFT)
+#define I40E_GLPES_PFRDMAVBNDHI(_i)              (0x00014804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMAVBNDHI_MAX_INDEX        15
+#define I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_SHIFT 0
+#define I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_SHIFT)
+#define I40E_GLPES_PFRDMAVBNDLO(_i)              (0x00014800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMAVBNDLO_MAX_INDEX        15
+#define I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_SHIFT 0
+#define I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_SHIFT)
+#define I40E_GLPES_PFRDMAVINVHI(_i)              (0x00014A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMAVINVHI_MAX_INDEX        15
+#define I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_SHIFT 0
+#define I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_SHIFT)
+#define I40E_GLPES_PFRDMAVINVLO(_i)              (0x00014A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMAVINVLO_MAX_INDEX        15
+#define I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_SHIFT 0
+#define I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_SHIFT)
+#define I40E_GLPES_PFRXVLANERR(_i)             (0x00010000 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRXVLANERR_MAX_INDEX       15
+#define I40E_GLPES_PFRXVLANERR_RXVLANERR_SHIFT 0
+#define I40E_GLPES_PFRXVLANERR_RXVLANERR_MASK  (0xFFFFFF <<  I40E_GLPES_PFRXVLANERR_RXVLANERR_SHIFT)
+#define I40E_GLPES_PFTCPRTXSEG(_i)             (0x00013600 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRTXSEG_MAX_INDEX       15
+#define I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_SHIFT 0
+#define I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_SHIFT)
+#define I40E_GLPES_PFTCPRXOPTERR(_i)               (0x00013200 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXOPTERR_MAX_INDEX         15
+#define I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_SHIFT 0
+#define I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_MASK  (0xFFFFFF <<  I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_SHIFT)
+#define I40E_GLPES_PFTCPRXPROTOERR(_i)                 (0x00013300 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXPROTOERR_MAX_INDEX           15
+#define I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT 0
+#define I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_MASK  (0xFFFFFF <<  I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT)
+#define I40E_GLPES_PFTCPRXSEGSHI(_i)               (0x00013004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXSEGSHI_MAX_INDEX         15
+#define I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT 0
+#define I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_MASK  (0xFFFF <<  I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT)
+#define I40E_GLPES_PFTCPRXSEGSLO(_i)               (0x00013000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXSEGSLO_MAX_INDEX         15
+#define I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT 0
+#define I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT)
+#define I40E_GLPES_PFTCPTXSEGHI(_i)              (0x00013404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPTXSEGHI_MAX_INDEX        15
+#define I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_SHIFT 0
+#define I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_MASK  (0xFFFF <<  I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_SHIFT)
+#define I40E_GLPES_PFTCPTXSEGLO(_i)              (0x00013400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPTXSEGLO_MAX_INDEX        15
+#define I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_SHIFT 0
+#define I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_SHIFT)
+#define I40E_GLPES_PFUDPRXPKTSHI(_i)               (0x00013804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFUDPRXPKTSHI_MAX_INDEX         15
+#define I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT)
+#define I40E_GLPES_PFUDPRXPKTSLO(_i)               (0x00013800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFUDPRXPKTSLO_MAX_INDEX         15
+#define I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT)
+#define I40E_GLPES_PFUDPTXPKTSHI(_i)               (0x00013A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFUDPTXPKTSHI_MAX_INDEX         15
+#define I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT)
+#define I40E_GLPES_PFUDPTXPKTSLO(_i)               (0x00013A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFUDPTXPKTSLO_MAX_INDEX         15
+#define I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT)
+#define I40E_GLPES_RDMARXMULTFPDUSHI                         0x0001E014 /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_SHIFT 0
+#define I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_MASK  (0xFFFFFF <<  I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_SHIFT)
+#define I40E_GLPES_RDMARXMULTFPDUSLO                         0x0001E010 /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_SHIFT 0
+#define I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_SHIFT)
+#define I40E_GLPES_RDMARXOOODDPHI                      0x0001E01C /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_SHIFT 0
+#define I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_MASK  (0xFFFFFF <<  I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_SHIFT)
+#define I40E_GLPES_RDMARXOOODDPLO                      0x0001E018 /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_SHIFT 0
+#define I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_SHIFT)
+#define I40E_GLPES_RDMARXOOONOMARK                     0x0001E004 /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_SHIFT 0
+#define I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_MASK  (0xFFFFFFFF <<  I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_SHIFT)
+#define I40E_GLPES_RDMARXUNALIGN                     0x0001E000 /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_SHIFT 0
+#define I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_MASK  (0xFFFFFFFF <<  I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_SHIFT)
+#define I40E_GLPES_TCPRXFOURHOLEHI                       0x0001E044 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_SHIFT 0
+#define I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_MASK  (0xFFFFFF <<  I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_SHIFT)
+#define I40E_GLPES_TCPRXFOURHOLELO                       0x0001E040 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_SHIFT 0
+#define I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_MASK  (0xFFFFFFFF <<  I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_SHIFT)
+#define I40E_GLPES_TCPRXONEHOLEHI                      0x0001E02C /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_SHIFT 0
+#define I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_MASK  (0xFFFFFF <<  I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_SHIFT)
+#define I40E_GLPES_TCPRXONEHOLELO                      0x0001E028 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_SHIFT 0
+#define I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_MASK  (0xFFFFFFFF <<  I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_SHIFT)
+#define I40E_GLPES_TCPRXPUREACKHI                       0x0001E024 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_SHIFT 0
+#define I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_MASK  (0xFFFFFF <<  I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_SHIFT)
+#define I40E_GLPES_TCPRXPUREACKSLO                      0x0001E020 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_SHIFT 0
+#define I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_SHIFT)
+#define I40E_GLPES_TCPRXTHREEHOLEHI                        0x0001E03C /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_SHIFT 0
+#define I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_MASK  (0xFFFFFF <<  I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_SHIFT)
+#define I40E_GLPES_TCPRXTHREEHOLELO                        0x0001E038 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_SHIFT 0
+#define I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_MASK  (0xFFFFFFFF <<  I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_SHIFT)
+#define I40E_GLPES_TCPRXTWOHOLEHI                      0x0001E034 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_SHIFT 0
+#define I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_MASK  (0xFFFFFF <<  I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_SHIFT)
+#define I40E_GLPES_TCPRXTWOHOLELO                      0x0001E030 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_SHIFT 0
+#define I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_MASK  (0xFFFFFFFF <<  I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_SHIFT)
+#define I40E_GLPES_TCPTXRETRANSFASTHI                          0x0001E04C /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_SHIFT 0
+#define I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_MASK  (0xFFFFFF <<  I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_SHIFT)
+#define I40E_GLPES_TCPTXRETRANSFASTLO                          0x0001E048 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_SHIFT 0
+#define I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_SHIFT)
+#define I40E_GLPES_TCPTXTOUTSFASTHI                        0x0001E054 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_SHIFT 0
+#define I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_MASK  (0xFFFFFF <<  I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_SHIFT)
+#define I40E_GLPES_TCPTXTOUTSFASTLO                        0x0001E050 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_SHIFT 0
+#define I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_SHIFT)
+#define I40E_GLPES_TCPTXTOUTSHI                    0x0001E05C /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_SHIFT 0
+#define I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_MASK  (0xFFFFFF <<  I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_SHIFT)
+#define I40E_GLPES_TCPTXTOUTSLO                    0x0001E058 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_SHIFT 0
+#define I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXDISCARD(_i)                (0x00018600 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXDISCARD_MAX_INDEX          31
+#define I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_SHIFT 0
+#define I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_SHIFT)
+#define I40E_GLPES_VFIP4RXFRAGSHI(_i)                (0x00018804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXFRAGSHI_MAX_INDEX          31
+#define I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXFRAGSLO(_i)                (0x00018800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXFRAGSLO_MAX_INDEX          31
+#define I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXMCOCTSHI(_i)                 (0x00018A04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXMCOCTSHI_MAX_INDEX           31
+#define I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXMCOCTSLO(_i)                 (0x00018A00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXMCOCTSLO_MAX_INDEX           31
+#define I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXMCPKTSHI(_i)                 (0x00018C04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXMCPKTSHI_MAX_INDEX           31
+#define I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXMCPKTSLO(_i)                 (0x00018C00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXMCPKTSLO_MAX_INDEX           31
+#define I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXOCTSHI(_i)               (0x00018204 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXOCTSHI_MAX_INDEX         31
+#define I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXOCTSLO(_i)               (0x00018200 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXOCTSLO_MAX_INDEX         31
+#define I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXPKTSHI(_i)               (0x00018404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXPKTSHI_MAX_INDEX         31
+#define I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXPKTSLO(_i)               (0x00018400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXPKTSLO_MAX_INDEX         31
+#define I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXTRUNC(_i)              (0x00018700 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXTRUNC_MAX_INDEX        31
+#define I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_SHIFT 0
+#define I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_SHIFT)
+#define I40E_GLPES_VFIP4TXFRAGSHI(_i)                (0x00019E04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXFRAGSHI_MAX_INDEX          31
+#define I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXFRAGSLO(_i)                (0x00019E00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXFRAGSLO_MAX_INDEX          31
+#define I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT)
+#define I40E_GLPES_VFIP4TXMCOCTSHI(_i)                 (0x0001A004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXMCOCTSHI_MAX_INDEX           31
+#define I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXMCOCTSLO(_i)                 (0x0001A000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXMCOCTSLO_MAX_INDEX           31
+#define I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP4TXMCPKTSHI(_i)                 (0x0001A204 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXMCPKTSHI_MAX_INDEX           31
+#define I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXMCPKTSLO(_i)                 (0x0001A200 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXMCPKTSLO_MAX_INDEX           31
+#define I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP4TXNOROUTE(_i)                (0x0001AE00 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXNOROUTE_MAX_INDEX          31
+#define I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT 0
+#define I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_MASK  (0xFFFFFF <<  I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT)
+#define I40E_GLPES_VFIP4TXOCTSHI(_i)               (0x00019A04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXOCTSHI_MAX_INDEX         31
+#define I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXOCTSLO(_i)               (0x00019A00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXOCTSLO_MAX_INDEX         31
+#define I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP4TXPKTSHI(_i)               (0x00019C04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXPKTSHI_MAX_INDEX         31
+#define I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXPKTSLO(_i)               (0x00019C00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXPKTSLO_MAX_INDEX         31
+#define I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXDISCARD(_i)                (0x00019200 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXDISCARD_MAX_INDEX          31
+#define I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_SHIFT 0
+#define I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_SHIFT)
+#define I40E_GLPES_VFIP6RXFRAGSHI(_i)                (0x00019404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXFRAGSHI_MAX_INDEX          31
+#define I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXFRAGSLO(_i)                (0x00019400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXFRAGSLO_MAX_INDEX          31
+#define I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXMCOCTSHI(_i)                 (0x00019604 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXMCOCTSHI_MAX_INDEX           31
+#define I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXMCOCTSLO(_i)                 (0x00019600 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXMCOCTSLO_MAX_INDEX           31
+#define I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXMCPKTSHI(_i)                 (0x00019804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXMCPKTSHI_MAX_INDEX           31
+#define I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXMCPKTSLO(_i)                 (0x00019800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXMCPKTSLO_MAX_INDEX           31
+#define I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXOCTSHI(_i)               (0x00018E04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXOCTSHI_MAX_INDEX         31
+#define I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXOCTSLO(_i)               (0x00018E00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXOCTSLO_MAX_INDEX         31
+#define I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXPKTSHI(_i)               (0x00019004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXPKTSHI_MAX_INDEX         31
+#define I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXPKTSLO(_i)               (0x00019000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXPKTSLO_MAX_INDEX         31
+#define I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXTRUNC(_i)              (0x00019300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXTRUNC_MAX_INDEX        31
+#define I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_SHIFT 0
+#define I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_SHIFT)
+#define I40E_GLPES_VFIP6TXFRAGSHI(_i)                (0x0001A804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXFRAGSHI_MAX_INDEX          31
+#define I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXFRAGSLO(_i)                (0x0001A800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXFRAGSLO_MAX_INDEX          31
+#define I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT)
+#define I40E_GLPES_VFIP6TXMCOCTSHI(_i)                 (0x0001AA04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXMCOCTSHI_MAX_INDEX           31
+#define I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXMCOCTSLO(_i)                 (0x0001AA00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXMCOCTSLO_MAX_INDEX           31
+#define I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP6TXMCPKTSHI(_i)                 (0x0001AC04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXMCPKTSHI_MAX_INDEX           31
+#define I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXMCPKTSLO(_i)                 (0x0001AC00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXMCPKTSLO_MAX_INDEX           31
+#define I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP6TXNOROUTE(_i)                (0x0001AF00 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXNOROUTE_MAX_INDEX          31
+#define I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT 0
+#define I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_MASK  (0xFFFFFF <<  I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT)
+#define I40E_GLPES_VFIP6TXOCTSHI(_i)               (0x0001A404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXOCTSHI_MAX_INDEX         31
+#define I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXOCTSLO(_i)               (0x0001A400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXOCTSLO_MAX_INDEX         31
+#define I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP6TXPKTSHI(_i)               (0x0001A604 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXPKTSHI_MAX_INDEX         31
+#define I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXPKTSLO(_i)               (0x0001A600 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXPKTSLO_MAX_INDEX         31
+#define I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT)
+#define I40E_GLPES_VFRDMARXRDSHI(_i)               (0x0001BE04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXRDSHI_MAX_INDEX         31
+#define I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_SHIFT 0
+#define I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_MASK  (0xFFFF <<  I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_SHIFT)
+#define I40E_GLPES_VFRDMARXRDSLO(_i)               (0x0001BE00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXRDSLO_MAX_INDEX         31
+#define I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_SHIFT 0
+#define I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_SHIFT)
+#define I40E_GLPES_VFRDMARXSNDSHI(_i)                (0x0001C004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXSNDSHI_MAX_INDEX          31
+#define I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT 0
+#define I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_MASK  (0xFFFF <<  I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT)
+#define I40E_GLPES_VFRDMARXSNDSLO(_i)                (0x0001C000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXSNDSLO_MAX_INDEX          31
+#define I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT 0
+#define I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT)
+#define I40E_GLPES_VFRDMARXWRSHI(_i)               (0x0001BC04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXWRSHI_MAX_INDEX         31
+#define I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_SHIFT 0
+#define I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_MASK  (0xFFFF <<  I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_SHIFT)
+#define I40E_GLPES_VFRDMARXWRSLO(_i)               (0x0001BC00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXWRSLO_MAX_INDEX         31
+#define I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_SHIFT 0
+#define I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_SHIFT)
+#define I40E_GLPES_VFRDMATXRDSHI(_i)               (0x0001C404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXRDSHI_MAX_INDEX         31
+#define I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_SHIFT 0
+#define I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_MASK  (0xFFFF <<  I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_SHIFT)
+#define I40E_GLPES_VFRDMATXRDSLO(_i)               (0x0001C400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXRDSLO_MAX_INDEX         31
+#define I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_SHIFT 0
+#define I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_SHIFT)
+#define I40E_GLPES_VFRDMATXSNDSHI(_i)                (0x0001C604 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXSNDSHI_MAX_INDEX          31
+#define I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT 0
+#define I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_MASK  (0xFFFF <<  I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT)
+#define I40E_GLPES_VFRDMATXSNDSLO(_i)                (0x0001C600 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXSNDSLO_MAX_INDEX          31
+#define I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT 0
+#define I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT)
+#define I40E_GLPES_VFRDMATXWRSHI(_i)               (0x0001C204 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXWRSHI_MAX_INDEX         31
+#define I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_SHIFT 0
+#define I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_MASK  (0xFFFF <<  I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_SHIFT)
+#define I40E_GLPES_VFRDMATXWRSLO(_i)               (0x0001C200 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXWRSLO_MAX_INDEX         31
+#define I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_SHIFT 0
+#define I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_SHIFT)
+#define I40E_GLPES_VFRDMAVBNDHI(_i)              (0x0001C804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMAVBNDHI_MAX_INDEX        31
+#define I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_SHIFT 0
+#define I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_SHIFT)
+#define I40E_GLPES_VFRDMAVBNDLO(_i)              (0x0001C800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMAVBNDLO_MAX_INDEX        31
+#define I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_SHIFT 0
+#define I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_SHIFT)
+#define I40E_GLPES_VFRDMAVINVHI(_i)              (0x0001CA04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMAVINVHI_MAX_INDEX        31
+#define I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_SHIFT 0
+#define I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_SHIFT)
+#define I40E_GLPES_VFRDMAVINVLO(_i)              (0x0001CA00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMAVINVLO_MAX_INDEX        31
+#define I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_SHIFT 0
+#define I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_SHIFT)
+#define I40E_GLPES_VFRXVLANERR(_i)             (0x00018000 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRXVLANERR_MAX_INDEX       31
+#define I40E_GLPES_VFRXVLANERR_RXVLANERR_SHIFT 0
+#define I40E_GLPES_VFRXVLANERR_RXVLANERR_MASK  (0xFFFFFF <<  I40E_GLPES_VFRXVLANERR_RXVLANERR_SHIFT)
+#define I40E_GLPES_VFTCPRTXSEG(_i)             (0x0001B600 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPRTXSEG_MAX_INDEX       31
+#define I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_SHIFT 0
+#define I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_SHIFT)
+#define I40E_GLPES_VFTCPRXOPTERR(_i)               (0x0001B200 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPRXOPTERR_MAX_INDEX         31
+#define I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_SHIFT 0
+#define I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_MASK  (0xFFFFFF <<  I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_SHIFT)
+#define I40E_GLPES_VFTCPRXPROTOERR(_i)                 (0x0001B300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPRXPROTOERR_MAX_INDEX           31
+#define I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT 0
+#define I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_MASK  (0xFFFFFF <<  I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT)
+#define I40E_GLPES_VFTCPRXSEGSHI(_i)               (0x0001B004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPRXSEGSHI_MAX_INDEX         31
+#define I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT 0
+#define I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_MASK  (0xFFFF <<  I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT)
+#define I40E_GLPES_VFTCPRXSEGSLO(_i)               (0x0001B000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPRXSEGSLO_MAX_INDEX         31
+#define I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT 0
+#define I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT)
+#define I40E_GLPES_VFTCPTXSEGHI(_i)              (0x0001B404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPTXSEGHI_MAX_INDEX        31
+#define I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_SHIFT 0
+#define I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_MASK  (0xFFFF <<  I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_SHIFT)
+#define I40E_GLPES_VFTCPTXSEGLO(_i)              (0x0001B400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPTXSEGLO_MAX_INDEX        31
+#define I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_SHIFT 0
+#define I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_SHIFT)
+#define I40E_GLPES_VFUDPRXPKTSHI(_i)               (0x0001B804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFUDPRXPKTSHI_MAX_INDEX         31
+#define I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT)
+#define I40E_GLPES_VFUDPRXPKTSLO(_i)               (0x0001B800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFUDPRXPKTSLO_MAX_INDEX         31
+#define I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT)
+#define I40E_GLPES_VFUDPTXPKTSHI(_i)               (0x0001BA04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFUDPTXPKTSHI_MAX_INDEX         31
+#define I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT)
+#define I40E_GLPES_VFUDPTXPKTSLO(_i)               (0x0001BA00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFUDPTXPKTSLO_MAX_INDEX         31
+#define I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT)
+
+#define I40E_VFPE_AEQALLOC1               0x0000A400 /* Reset: VFR */
+#define I40E_VFPE_AEQALLOC1_AECOUNT_SHIFT 0
+#define I40E_VFPE_AEQALLOC1_AECOUNT_MASK  (0xFFFFFFFF <<  I40E_VFPE_AEQALLOC1_AECOUNT_SHIFT)
+#define I40E_VFPE_CCQPHIGH1                  0x00009800 /* Reset: VFR */
+#define I40E_VFPE_CCQPHIGH1_PECCQPHIGH_SHIFT 0
+#define I40E_VFPE_CCQPHIGH1_PECCQPHIGH_MASK  (0xFFFFFFFF <<  I40E_VFPE_CCQPHIGH1_PECCQPHIGH_SHIFT)
+#define I40E_VFPE_CCQPLOW1                 0x0000AC00 /* Reset: VFR */
+#define I40E_VFPE_CCQPLOW1_PECCQPLOW_SHIFT 0
+#define I40E_VFPE_CCQPLOW1_PECCQPLOW_MASK  (0xFFFFFFFF <<  I40E_VFPE_CCQPLOW1_PECCQPLOW_SHIFT)
+#define I40E_VFPE_CCQPSTATUS1                   0x0000B800 /* Reset: VFR */
+#define I40E_VFPE_CCQPSTATUS1_CCQP_DONE_SHIFT   0
+#define I40E_VFPE_CCQPSTATUS1_CCQP_DONE_MASK    (0x1 <<  I40E_VFPE_CCQPSTATUS1_CCQP_DONE_SHIFT)
+#define I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_SHIFT 4
+#define I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_MASK  (0x7 <<  I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_SHIFT)
+#define I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_SHIFT 16
+#define I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_MASK  (0x3F <<  I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_SHIFT)
+#define I40E_VFPE_CCQPSTATUS1_CCQP_ERR_SHIFT    31
+#define I40E_VFPE_CCQPSTATUS1_CCQP_ERR_MASK     (0x1 <<  I40E_VFPE_CCQPSTATUS1_CCQP_ERR_SHIFT)
+#define I40E_VFPE_CQACK1              0x0000B000 /* Reset: VFR */
+#define I40E_VFPE_CQACK1_PECQID_SHIFT 0
+#define I40E_VFPE_CQACK1_PECQID_MASK  (0x1FFFF <<  I40E_VFPE_CQACK1_PECQID_SHIFT)
+#define I40E_VFPE_CQARM1              0x0000B400 /* Reset: VFR */
+#define I40E_VFPE_CQARM1_PECQID_SHIFT 0
+#define I40E_VFPE_CQARM1_PECQID_MASK  (0x1FFFF <<  I40E_VFPE_CQARM1_PECQID_SHIFT)
+#define I40E_VFPE_CQPDB1              0x0000BC00 /* Reset: VFR */
+#define I40E_VFPE_CQPDB1_WQHEAD_SHIFT 0
+#define I40E_VFPE_CQPDB1_WQHEAD_MASK  (0x7FF <<  I40E_VFPE_CQPDB1_WQHEAD_SHIFT)
+#define I40E_VFPE_CQPERRCODES1                      0x00009C00 /* Reset: VFR */
+#define I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_SHIFT 0
+#define I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_MASK  (0xFFFF <<  I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_SHIFT)
+#define I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_SHIFT 16
+#define I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_MASK  (0xFFFF <<  I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_SHIFT)
+#define I40E_VFPE_CQPTAIL1                  0x0000A000 /* Reset: VFR */
+#define I40E_VFPE_CQPTAIL1_WQTAIL_SHIFT     0
+#define I40E_VFPE_CQPTAIL1_WQTAIL_MASK      (0x7FF <<  I40E_VFPE_CQPTAIL1_WQTAIL_SHIFT)
+#define I40E_VFPE_CQPTAIL1_CQP_OP_ERR_SHIFT 31
+#define I40E_VFPE_CQPTAIL1_CQP_OP_ERR_MASK  (0x1 <<  I40E_VFPE_CQPTAIL1_CQP_OP_ERR_SHIFT)
+#define I40E_VFPE_IPCONFIG01                        0x00008C00 /* Reset: VFR */
+#define I40E_VFPE_IPCONFIG01_PEIPID_SHIFT           0
+#define I40E_VFPE_IPCONFIG01_PEIPID_MASK            (0xFFFF <<  I40E_VFPE_IPCONFIG01_PEIPID_SHIFT)
+#define I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_SHIFT 16
+#define I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_MASK  (0x1 <<  I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_SHIFT)
+#define I40E_VFPE_MRTEIDXMASK1                       0x00009000 /* Reset: VFR */
+#define I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_SHIFT 0
+#define I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_MASK  (0x1F <<  I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_SHIFT)
+#define I40E_VFPE_RCVUNEXPECTEDERROR1                        0x00009400 /* Reset: VFR */
+#define I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_SHIFT 0
+#define I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_MASK  (0xFFFFFF <<  I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_SHIFT)
+#define I40E_VFPE_TCPNOWTIMER1               0x0000A800 /* Reset: VFR */
+#define I40E_VFPE_TCPNOWTIMER1_TCP_NOW_SHIFT 0
+#define I40E_VFPE_TCPNOWTIMER1_TCP_NOW_MASK  (0xFFFFFFFF <<  I40E_VFPE_TCPNOWTIMER1_TCP_NOW_SHIFT)
+#define I40E_VFPE_WQEALLOC1                      0x0000C000 /* Reset: VFR */
+#define I40E_VFPE_WQEALLOC1_PEQPID_SHIFT         0
+#define I40E_VFPE_WQEALLOC1_PEQPID_MASK          (0x3FFFF <<  I40E_VFPE_WQEALLOC1_PEQPID_SHIFT)
+#define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT 20
+#define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_MASK  (0xFFF <<  I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT)
+#endif /* I40IW_REGISTER_H */

diff --git a/drivers/infiniband/hw/i40iw/i40iw_status.h b/drivers/infiniband/hw/i40iw/i40iw_status.h
new file mode 100644
index 0000000..b0110c1
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_status.h

@@ -0,0 +1,100 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_STATUS_H
+#define I40IW_STATUS_H
+
+/* Error Codes */
+enum i40iw_status_code {
+	I40IW_SUCCESS = 0,
+	I40IW_ERR_NVM = -1,
+	I40IW_ERR_NVM_CHECKSUM = -2,
+	I40IW_ERR_CONFIG = -4,
+	I40IW_ERR_PARAM = -5,
+	I40IW_ERR_DEVICE_NOT_SUPPORTED = -6,
+	I40IW_ERR_RESET_FAILED = -7,
+	I40IW_ERR_SWFW_SYNC = -8,
+	I40IW_ERR_NO_MEMORY = -9,
+	I40IW_ERR_BAD_PTR = -10,
+	I40IW_ERR_INVALID_PD_ID = -11,
+	I40IW_ERR_INVALID_QP_ID = -12,
+	I40IW_ERR_INVALID_CQ_ID = -13,
+	I40IW_ERR_INVALID_CEQ_ID = -14,
+	I40IW_ERR_INVALID_AEQ_ID = -15,
+	I40IW_ERR_INVALID_SIZE = -16,
+	I40IW_ERR_INVALID_ARP_INDEX = -17,
+	I40IW_ERR_INVALID_FPM_FUNC_ID = -18,
+	I40IW_ERR_QP_INVALID_MSG_SIZE = -19,
+	I40IW_ERR_QP_TOOMANY_WRS_POSTED = -20,
+	I40IW_ERR_INVALID_FRAG_COUNT = -21,
+	I40IW_ERR_QUEUE_EMPTY = -22,
+	I40IW_ERR_INVALID_ALIGNMENT = -23,
+	I40IW_ERR_FLUSHED_QUEUE = -24,
+	I40IW_ERR_INVALID_PUSH_PAGE_INDEX = -25,
+	I40IW_ERR_INVALID_IMM_DATA_SIZE = -26,
+	I40IW_ERR_TIMEOUT = -27,
+	I40IW_ERR_OPCODE_MISMATCH = -28,
+	I40IW_ERR_CQP_COMPL_ERROR = -29,
+	I40IW_ERR_INVALID_VF_ID = -30,
+	I40IW_ERR_INVALID_HMCFN_ID = -31,
+	I40IW_ERR_BACKING_PAGE_ERROR = -32,
+	I40IW_ERR_NO_PBLCHUNKS_AVAILABLE = -33,
+	I40IW_ERR_INVALID_PBLE_INDEX = -34,
+	I40IW_ERR_INVALID_SD_INDEX = -35,
+	I40IW_ERR_INVALID_PAGE_DESC_INDEX = -36,
+	I40IW_ERR_INVALID_SD_TYPE = -37,
+	I40IW_ERR_MEMCPY_FAILED = -38,
+	I40IW_ERR_INVALID_HMC_OBJ_INDEX = -39,
+	I40IW_ERR_INVALID_HMC_OBJ_COUNT = -40,
+	I40IW_ERR_INVALID_SRQ_ARM_LIMIT = -41,
+	I40IW_ERR_SRQ_ENABLED = -42,
+	I40IW_ERR_BUF_TOO_SHORT = -43,
+	I40IW_ERR_BAD_IWARP_CQE = -44,
+	I40IW_ERR_NVM_BLANK_MODE = -45,
+	I40IW_ERR_NOT_IMPLEMENTED = -46,
+	I40IW_ERR_PE_DOORBELL_NOT_ENABLED = -47,
+	I40IW_ERR_NOT_READY = -48,
+	I40IW_NOT_SUPPORTED = -49,
+	I40IW_ERR_FIRMWARE_API_VERSION = -50,
+	I40IW_ERR_RING_FULL = -51,
+	I40IW_ERR_MPA_CRC = -61,
+	I40IW_ERR_NO_TXBUFS = -62,
+	I40IW_ERR_SEQ_NUM = -63,
+	I40IW_ERR_list_empty = -64,
+	I40IW_ERR_INVALID_MAC_ADDR = -65,
+	I40IW_ERR_BAD_STAG      = -66,
+	I40IW_ERR_CQ_COMPL_ERROR = -67,
+
+};
+#endif

diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h
new file mode 100644
index 0000000..edb3a8c
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_type.h

@@ -0,0 +1,1312 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_TYPE_H
+#define I40IW_TYPE_H
+#include "i40iw_user.h"
+#include "i40iw_hmc.h"
+#include "i40iw_vf.h"
+#include "i40iw_virtchnl.h"
+
+struct i40iw_cqp_sq_wqe {
+	u64 buf[I40IW_CQP_WQE_SIZE];
+};
+
+struct i40iw_sc_aeqe {
+	u64 buf[I40IW_AEQE_SIZE];
+};
+
+struct i40iw_ceqe {
+	u64 buf[I40IW_CEQE_SIZE];
+};
+
+struct i40iw_cqp_ctx {
+	u64 buf[I40IW_CQP_CTX_SIZE];
+};
+
+struct i40iw_cq_shadow_area {
+	u64 buf[I40IW_SHADOW_AREA_SIZE];
+};
+
+struct i40iw_sc_dev;
+struct i40iw_hmc_info;
+struct i40iw_dev_pestat;
+
+struct i40iw_cqp_ops;
+struct i40iw_ccq_ops;
+struct i40iw_ceq_ops;
+struct i40iw_aeq_ops;
+struct i40iw_mr_ops;
+struct i40iw_cqp_misc_ops;
+struct i40iw_pd_ops;
+struct i40iw_priv_qp_ops;
+struct i40iw_priv_cq_ops;
+struct i40iw_hmc_ops;
+
+enum i40iw_resource_indicator_type {
+	I40IW_RSRC_INDICATOR_TYPE_ADAPTER = 0,
+	I40IW_RSRC_INDICATOR_TYPE_CQ,
+	I40IW_RSRC_INDICATOR_TYPE_QP,
+	I40IW_RSRC_INDICATOR_TYPE_SRQ
+};
+
+enum i40iw_hdrct_flags {
+	DDP_LEN_FLAG = 0x80,
+	DDP_HDR_FLAG = 0x40,
+	RDMA_HDR_FLAG = 0x20
+};
+
+enum i40iw_term_layers {
+	LAYER_RDMA = 0,
+	LAYER_DDP = 1,
+	LAYER_MPA = 2
+};
+
+enum i40iw_term_error_types {
+	RDMAP_REMOTE_PROT = 1,
+	RDMAP_REMOTE_OP = 2,
+	DDP_CATASTROPHIC = 0,
+	DDP_TAGGED_BUFFER = 1,
+	DDP_UNTAGGED_BUFFER = 2,
+	DDP_LLP = 3
+};
+
+enum i40iw_term_rdma_errors {
+	RDMAP_INV_STAG = 0x00,
+	RDMAP_INV_BOUNDS = 0x01,
+	RDMAP_ACCESS = 0x02,
+	RDMAP_UNASSOC_STAG = 0x03,
+	RDMAP_TO_WRAP = 0x04,
+	RDMAP_INV_RDMAP_VER = 0x05,
+	RDMAP_UNEXPECTED_OP = 0x06,
+	RDMAP_CATASTROPHIC_LOCAL = 0x07,
+	RDMAP_CATASTROPHIC_GLOBAL = 0x08,
+	RDMAP_CANT_INV_STAG = 0x09,
+	RDMAP_UNSPECIFIED = 0xff
+};
+
+enum i40iw_term_ddp_errors {
+	DDP_CATASTROPHIC_LOCAL = 0x00,
+	DDP_TAGGED_INV_STAG = 0x00,
+	DDP_TAGGED_BOUNDS = 0x01,
+	DDP_TAGGED_UNASSOC_STAG = 0x02,
+	DDP_TAGGED_TO_WRAP = 0x03,
+	DDP_TAGGED_INV_DDP_VER = 0x04,
+	DDP_UNTAGGED_INV_QN = 0x01,
+	DDP_UNTAGGED_INV_MSN_NO_BUF = 0x02,
+	DDP_UNTAGGED_INV_MSN_RANGE = 0x03,
+	DDP_UNTAGGED_INV_MO = 0x04,
+	DDP_UNTAGGED_INV_TOO_LONG = 0x05,
+	DDP_UNTAGGED_INV_DDP_VER = 0x06
+};
+
+enum i40iw_term_mpa_errors {
+	MPA_CLOSED = 0x01,
+	MPA_CRC = 0x02,
+	MPA_MARKER = 0x03,
+	MPA_REQ_RSP = 0x04,
+};
+
+enum i40iw_flush_opcode {
+	FLUSH_INVALID = 0,
+	FLUSH_PROT_ERR,
+	FLUSH_REM_ACCESS_ERR,
+	FLUSH_LOC_QP_OP_ERR,
+	FLUSH_REM_OP_ERR,
+	FLUSH_LOC_LEN_ERR,
+	FLUSH_GENERAL_ERR,
+	FLUSH_FATAL_ERR
+};
+
+enum i40iw_term_eventtypes {
+	TERM_EVENT_QP_FATAL,
+	TERM_EVENT_QP_ACCESS_ERR
+};
+
+struct i40iw_terminate_hdr {
+	u8 layer_etype;
+	u8 error_code;
+	u8 hdrct;
+	u8 rsvd;
+};
+
+enum i40iw_debug_flag {
+	I40IW_DEBUG_NONE	= 0x00000000,
+	I40IW_DEBUG_ERR		= 0x00000001,
+	I40IW_DEBUG_INIT	= 0x00000002,
+	I40IW_DEBUG_DEV		= 0x00000004,
+	I40IW_DEBUG_CM		= 0x00000008,
+	I40IW_DEBUG_VERBS	= 0x00000010,
+	I40IW_DEBUG_PUDA	= 0x00000020,
+	I40IW_DEBUG_ILQ		= 0x00000040,
+	I40IW_DEBUG_IEQ		= 0x00000080,
+	I40IW_DEBUG_QP		= 0x00000100,
+	I40IW_DEBUG_CQ		= 0x00000200,
+	I40IW_DEBUG_MR		= 0x00000400,
+	I40IW_DEBUG_PBLE	= 0x00000800,
+	I40IW_DEBUG_WQE		= 0x00001000,
+	I40IW_DEBUG_AEQ		= 0x00002000,
+	I40IW_DEBUG_CQP		= 0x00004000,
+	I40IW_DEBUG_HMC		= 0x00008000,
+	I40IW_DEBUG_USER	= 0x00010000,
+	I40IW_DEBUG_VIRT	= 0x00020000,
+	I40IW_DEBUG_DCB		= 0x00040000,
+	I40IW_DEBUG_CQE		= 0x00800000,
+	I40IW_DEBUG_ALL		= 0xFFFFFFFF
+};
+
+enum i40iw_hw_stat_index_32b {
+	I40IW_HW_STAT_INDEX_IP4RXDISCARD = 0,
+	I40IW_HW_STAT_INDEX_IP4RXTRUNC,
+	I40IW_HW_STAT_INDEX_IP4TXNOROUTE,
+	I40IW_HW_STAT_INDEX_IP6RXDISCARD,
+	I40IW_HW_STAT_INDEX_IP6RXTRUNC,
+	I40IW_HW_STAT_INDEX_IP6TXNOROUTE,
+	I40IW_HW_STAT_INDEX_TCPRTXSEG,
+	I40IW_HW_STAT_INDEX_TCPRXOPTERR,
+	I40IW_HW_STAT_INDEX_TCPRXPROTOERR,
+	I40IW_HW_STAT_INDEX_MAX_32
+};
+
+enum i40iw_hw_stat_index_64b {
+	I40IW_HW_STAT_INDEX_IP4RXOCTS = 0,
+	I40IW_HW_STAT_INDEX_IP4RXPKTS,
+	I40IW_HW_STAT_INDEX_IP4RXFRAGS,
+	I40IW_HW_STAT_INDEX_IP4RXMCPKTS,
+	I40IW_HW_STAT_INDEX_IP4TXOCTS,
+	I40IW_HW_STAT_INDEX_IP4TXPKTS,
+	I40IW_HW_STAT_INDEX_IP4TXFRAGS,
+	I40IW_HW_STAT_INDEX_IP4TXMCPKTS,
+	I40IW_HW_STAT_INDEX_IP6RXOCTS,
+	I40IW_HW_STAT_INDEX_IP6RXPKTS,
+	I40IW_HW_STAT_INDEX_IP6RXFRAGS,
+	I40IW_HW_STAT_INDEX_IP6RXMCPKTS,
+	I40IW_HW_STAT_INDEX_IP6TXOCTS,
+	I40IW_HW_STAT_INDEX_IP6TXPKTS,
+	I40IW_HW_STAT_INDEX_IP6TXFRAGS,
+	I40IW_HW_STAT_INDEX_IP6TXMCPKTS,
+	I40IW_HW_STAT_INDEX_TCPRXSEGS,
+	I40IW_HW_STAT_INDEX_TCPTXSEG,
+	I40IW_HW_STAT_INDEX_RDMARXRDS,
+	I40IW_HW_STAT_INDEX_RDMARXSNDS,
+	I40IW_HW_STAT_INDEX_RDMARXWRS,
+	I40IW_HW_STAT_INDEX_RDMATXRDS,
+	I40IW_HW_STAT_INDEX_RDMATXSNDS,
+	I40IW_HW_STAT_INDEX_RDMATXWRS,
+	I40IW_HW_STAT_INDEX_RDMAVBND,
+	I40IW_HW_STAT_INDEX_RDMAVINV,
+	I40IW_HW_STAT_INDEX_MAX_64
+};
+
+struct i40iw_dev_hw_stat_offsets {
+	u32 stat_offset_32[I40IW_HW_STAT_INDEX_MAX_32];
+	u32 stat_offset_64[I40IW_HW_STAT_INDEX_MAX_64];
+};
+
+struct i40iw_dev_hw_stats {
+	u64 stat_value_32[I40IW_HW_STAT_INDEX_MAX_32];
+	u64 stat_value_64[I40IW_HW_STAT_INDEX_MAX_64];
+};
+
+struct i40iw_device_pestat_ops {
+	void (*iw_hw_stat_init)(struct i40iw_dev_pestat *, u8, struct i40iw_hw *, bool);
+	void (*iw_hw_stat_read_32)(struct i40iw_dev_pestat *, enum i40iw_hw_stat_index_32b, u64 *);
+	void (*iw_hw_stat_read_64)(struct i40iw_dev_pestat *, enum i40iw_hw_stat_index_64b, u64 *);
+	void (*iw_hw_stat_read_all)(struct i40iw_dev_pestat *, struct i40iw_dev_hw_stats *);
+	void (*iw_hw_stat_refresh_all)(struct i40iw_dev_pestat *);
+};
+
+struct i40iw_dev_pestat {
+	struct i40iw_hw *hw;
+	struct i40iw_device_pestat_ops ops;
+	struct i40iw_dev_hw_stats hw_stats;
+	struct i40iw_dev_hw_stats last_read_hw_stats;
+	struct i40iw_dev_hw_stat_offsets hw_stat_offsets;
+	struct timer_list stats_timer;
+	spinlock_t stats_lock; /* rdma stats lock */
+};
+
+struct i40iw_hw {
+	u8 __iomem *hw_addr;
+	void *dev_context;
+	struct i40iw_hmc_info hmc;
+};
+
+struct i40iw_pfpdu {
+	struct list_head rxlist;
+	u32 rcv_nxt;
+	u32 fps;
+	u32 max_fpdu_data;
+	bool mode;
+	bool mpa_crc_err;
+	u64 total_ieq_bufs;
+	u64 fpdu_processed;
+	u64 bad_seq_num;
+	u64 crc_err;
+	u64 no_tx_bufs;
+	u64 tx_err;
+	u64 out_of_order;
+	u64 pmode_count;
+};
+
+struct i40iw_sc_pd {
+	u32 size;
+	struct i40iw_sc_dev *dev;
+	u16 pd_id;
+};
+
+struct i40iw_cqp_quanta {
+	u64 elem[I40IW_CQP_WQE_SIZE];
+};
+
+struct i40iw_sc_cqp {
+	u32 size;
+	u64 sq_pa;
+	u64 host_ctx_pa;
+	void *back_cqp;
+	struct i40iw_sc_dev *dev;
+	enum i40iw_status_code (*process_cqp_sds)(struct i40iw_sc_dev *,
+						  struct i40iw_update_sds_info *);
+	struct i40iw_dma_mem sdbuf;
+	struct i40iw_ring sq_ring;
+	struct i40iw_cqp_quanta *sq_base;
+	u64 *host_ctx;
+	u64 *scratch_array;
+	u32 cqp_id;
+	u32 sq_size;
+	u32 hw_sq_size;
+	u8 struct_ver;
+	u8 polarity;
+	bool en_datacenter_tcp;
+	u8 hmc_profile;
+	u8 enabled_vf_count;
+	u8 timeout_count;
+};
+
+struct i40iw_sc_aeq {
+	u32 size;
+	u64 aeq_elem_pa;
+	struct i40iw_sc_dev *dev;
+	struct i40iw_sc_aeqe *aeqe_base;
+	void *pbl_list;
+	u32 elem_cnt;
+	struct i40iw_ring aeq_ring;
+	bool virtual_map;
+	u8 pbl_chunk_size;
+	u32 first_pm_pbl_idx;
+	u8 polarity;
+};
+
+struct i40iw_sc_ceq {
+	u32 size;
+	u64 ceq_elem_pa;
+	struct i40iw_sc_dev *dev;
+	struct i40iw_ceqe *ceqe_base;
+	void *pbl_list;
+	u32 ceq_id;
+	u32 elem_cnt;
+	struct i40iw_ring ceq_ring;
+	bool virtual_map;
+	u8 pbl_chunk_size;
+	bool tph_en;
+	u8 tph_val;
+	u32 first_pm_pbl_idx;
+	u8 polarity;
+};
+
+struct i40iw_sc_cq {
+	struct i40iw_cq_uk cq_uk;
+	u64 cq_pa;
+	u64 shadow_area_pa;
+	struct i40iw_sc_dev *dev;
+	void *pbl_list;
+	void *back_cq;
+	u32 ceq_id;
+	u32 shadow_read_threshold;
+	bool ceqe_mask;
+	bool virtual_map;
+	u8 pbl_chunk_size;
+	u8 cq_type;
+	bool ceq_id_valid;
+	bool tph_en;
+	u8 tph_val;
+	u32 first_pm_pbl_idx;
+	bool check_overflow;
+};
+
+struct i40iw_sc_qp {
+	struct i40iw_qp_uk qp_uk;
+	u64 sq_pa;
+	u64 rq_pa;
+	u64 hw_host_ctx_pa;
+	u64 shadow_area_pa;
+	u64 q2_pa;
+	struct i40iw_sc_dev *dev;
+	struct i40iw_sc_pd *pd;
+	u64 *hw_host_ctx;
+	void *llp_stream_handle;
+	void *back_qp;
+	struct i40iw_pfpdu pfpdu;
+	u8 *q2_buf;
+	u64 qp_compl_ctx;
+	u16 qs_handle;
+	u16 exception_lan_queue;
+	u16 push_idx;
+	u8 sq_tph_val;
+	u8 rq_tph_val;
+	u8 qp_state;
+	u8 qp_type;
+	u8 hw_sq_size;
+	u8 hw_rq_size;
+	u8 src_mac_addr_idx;
+	bool sq_tph_en;
+	bool rq_tph_en;
+	bool rcv_tph_en;
+	bool xmit_tph_en;
+	bool virtual_map;
+	bool flush_sq;
+	bool flush_rq;
+	bool sq_flush;
+	enum i40iw_flush_opcode flush_code;
+	enum i40iw_term_eventtypes eventtype;
+	u8 term_flags;
+};
+
+struct i40iw_hmc_fpm_misc {
+	u32 max_ceqs;
+	u32 max_sds;
+	u32 xf_block_size;
+	u32 q1_block_size;
+	u32 ht_multiplier;
+	u32 timer_bucket;
+};
+
+struct i40iw_vchnl_if {
+	enum i40iw_status_code (*vchnl_recv)(struct i40iw_sc_dev *, u32, u8 *, u16);
+	enum i40iw_status_code (*vchnl_send)(struct i40iw_sc_dev *dev, u32, u8 *, u16);
+};
+
+#define I40IW_VCHNL_MAX_VF_MSG_SIZE 512
+
+struct i40iw_vchnl_vf_msg_buffer {
+	struct i40iw_virtchnl_op_buf vchnl_msg;
+	char parm_buffer[I40IW_VCHNL_MAX_VF_MSG_SIZE - 1];
+};
+
+struct i40iw_vfdev {
+	struct i40iw_sc_dev *pf_dev;
+	u8 *hmc_info_mem;
+	struct i40iw_dev_pestat dev_pestat;
+	struct i40iw_hmc_pble_info *pble_info;
+	struct i40iw_hmc_info hmc_info;
+	struct i40iw_vchnl_vf_msg_buffer vf_msg_buffer;
+	u64 fpm_query_buf_pa;
+	u64 *fpm_query_buf;
+	u32 vf_id;
+	u32 msg_count;
+	bool pf_hmc_initialized;
+	u16 pmf_index;
+	u16 iw_vf_idx;		/* VF Device table index */
+	bool stats_initialized;
+};
+
+struct i40iw_sc_dev {
+	struct list_head cqp_cmd_head;	/* head of the CQP command list */
+	spinlock_t cqp_lock; /* cqp list sync */
+	struct i40iw_dev_uk dev_uk;
+	struct i40iw_dev_pestat dev_pestat;
+	struct i40iw_dma_mem vf_fpm_query_buf[I40IW_MAX_PE_ENABLED_VF_COUNT];
+	u64 fpm_query_buf_pa;
+	u64 fpm_commit_buf_pa;
+	u64 *fpm_query_buf;
+	u64 *fpm_commit_buf;
+	void *back_dev;
+	struct i40iw_hw *hw;
+	u8 __iomem *db_addr;
+	struct i40iw_hmc_info *hmc_info;
+	struct i40iw_hmc_pble_info *pble_info;
+	struct i40iw_vfdev *vf_dev[I40IW_MAX_PE_ENABLED_VF_COUNT];
+	struct i40iw_sc_cqp *cqp;
+	struct i40iw_sc_aeq *aeq;
+	struct i40iw_sc_ceq *ceq[I40IW_CEQ_MAX_COUNT];
+	struct i40iw_sc_cq *ccq;
+	struct i40iw_cqp_ops *cqp_ops;
+	struct i40iw_ccq_ops *ccq_ops;
+	struct i40iw_ceq_ops *ceq_ops;
+	struct i40iw_aeq_ops *aeq_ops;
+	struct i40iw_pd_ops *iw_pd_ops;
+	struct i40iw_priv_qp_ops *iw_priv_qp_ops;
+	struct i40iw_priv_cq_ops *iw_priv_cq_ops;
+	struct i40iw_mr_ops *mr_ops;
+	struct i40iw_cqp_misc_ops *cqp_misc_ops;
+	struct i40iw_hmc_ops *hmc_ops;
+	struct i40iw_vchnl_if vchnl_if;
+	u32 ilq_count;
+	struct i40iw_virt_mem ilq_mem;
+	struct i40iw_puda_rsrc *ilq;
+	u32 ieq_count;
+	struct i40iw_virt_mem ieq_mem;
+	struct i40iw_puda_rsrc *ieq;
+
+	struct i40iw_vf_cqp_ops *iw_vf_cqp_ops;
+
+	struct i40iw_hmc_fpm_misc hmc_fpm_misc;
+	u16 qs_handle;
+	u32	debug_mask;
+	u16 exception_lan_queue;
+	u8 hmc_fn_id;
+	bool is_pf;
+	bool vchnl_up;
+	u8 vf_id;
+	u64 cqp_cmd_stats[OP_SIZE_CQP_STAT_ARRAY];
+	struct i40iw_vchnl_vf_msg_buffer vchnl_vf_msg_buf;
+	u8 hw_rev;
+};
+
+struct i40iw_modify_cq_info {
+	u64 cq_pa;
+	struct i40iw_cqe *cq_base;
+	void *pbl_list;
+	u32 ceq_id;
+	u32 cq_size;
+	u32 shadow_read_threshold;
+	bool virtual_map;
+	u8 pbl_chunk_size;
+	bool check_overflow;
+	bool cq_resize;
+	bool ceq_change;
+	bool check_overflow_change;
+	u32 first_pm_pbl_idx;
+	bool ceq_valid;
+};
+
+struct i40iw_create_qp_info {
+	u8 next_iwarp_state;
+	bool ord_valid;
+	bool tcp_ctx_valid;
+	bool cq_num_valid;
+	bool static_rsrc;
+	bool arp_cache_idx_valid;
+};
+
+struct i40iw_modify_qp_info {
+	u64 rx_win0;
+	u64 rx_win1;
+	u16 new_mss;
+	u8 next_iwarp_state;
+	u8 termlen;
+	bool ord_valid;
+	bool tcp_ctx_valid;
+	bool cq_num_valid;
+	bool static_rsrc;
+	bool arp_cache_idx_valid;
+	bool reset_tcp_conn;
+	bool remove_hash_idx;
+	bool dont_send_term;
+	bool dont_send_fin;
+	bool cached_var_valid;
+	bool mss_change;
+	bool force_loopback;
+};
+
+struct i40iw_ccq_cqe_info {
+	struct i40iw_sc_cqp *cqp;
+	u64 scratch;
+	u32 op_ret_val;
+	u16 maj_err_code;
+	u16 min_err_code;
+	u8 op_code;
+	bool error;
+};
+
+struct i40iw_l2params {
+	u16 qs_handle_list[I40IW_MAX_USER_PRIORITY];
+	u16 mss;
+};
+
+struct i40iw_device_init_info {
+	u64 fpm_query_buf_pa;
+	u64 fpm_commit_buf_pa;
+	u64 *fpm_query_buf;
+	u64 *fpm_commit_buf;
+	struct i40iw_hw *hw;
+	void __iomem *bar0;
+	enum i40iw_status_code (*vchnl_send)(struct i40iw_sc_dev *, u32, u8 *, u16);
+	u16 qs_handle;
+	u16 exception_lan_queue;
+	u8 hmc_fn_id;
+	bool is_pf;
+	u32 debug_mask;
+};
+
+enum i40iw_cqp_hmc_profile {
+	I40IW_HMC_PROFILE_DEFAULT = 1,
+	I40IW_HMC_PROFILE_FAVOR_VF = 2,
+	I40IW_HMC_PROFILE_EQUAL = 3,
+};
+
+struct i40iw_cqp_init_info {
+	u64 cqp_compl_ctx;
+	u64 host_ctx_pa;
+	u64 sq_pa;
+	struct i40iw_sc_dev *dev;
+	struct i40iw_cqp_quanta *sq;
+	u64 *host_ctx;
+	u64 *scratch_array;
+	u32 sq_size;
+	u8 struct_ver;
+	bool en_datacenter_tcp;
+	u8 hmc_profile;
+	u8 enabled_vf_count;
+};
+
+struct i40iw_ceq_init_info {
+	u64 ceqe_pa;
+	struct i40iw_sc_dev *dev;
+	u64 *ceqe_base;
+	void *pbl_list;
+	u32 elem_cnt;
+	u32 ceq_id;
+	bool virtual_map;
+	u8 pbl_chunk_size;
+	bool tph_en;
+	u8 tph_val;
+	u32 first_pm_pbl_idx;
+};
+
+struct i40iw_aeq_init_info {
+	u64 aeq_elem_pa;
+	struct i40iw_sc_dev *dev;
+	u32 *aeqe_base;
+	void *pbl_list;
+	u32 elem_cnt;
+	bool virtual_map;
+	u8 pbl_chunk_size;
+	u32 first_pm_pbl_idx;
+};
+
+struct i40iw_ccq_init_info {
+	u64 cq_pa;
+	u64 shadow_area_pa;
+	struct i40iw_sc_dev *dev;
+	struct i40iw_cqe *cq_base;
+	u64 *shadow_area;
+	void *pbl_list;
+	u32 num_elem;
+	u32 ceq_id;
+	u32 shadow_read_threshold;
+	bool ceqe_mask;
+	bool ceq_id_valid;
+	bool tph_en;
+	u8 tph_val;
+	bool avoid_mem_cflct;
+	bool virtual_map;
+	u8 pbl_chunk_size;
+	u32 first_pm_pbl_idx;
+};
+
+struct i40iwarp_offload_info {
+	u16 rcv_mark_offset;
+	u16 snd_mark_offset;
+	u16 pd_id;
+	u8 ddp_ver;
+	u8 rdmap_ver;
+	u8 ord_size;
+	u8 ird_size;
+	bool wr_rdresp_en;
+	bool rd_enable;
+	bool snd_mark_en;
+	bool rcv_mark_en;
+	bool bind_en;
+	bool fast_reg_en;
+	bool priv_mode_en;
+	bool lsmm_present;
+	u8 iwarp_mode;
+	bool align_hdrs;
+	bool rcv_no_mpa_crc;
+
+	u8 last_byte_sent;
+};
+
+struct i40iw_tcp_offload_info {
+	bool ipv4;
+	bool no_nagle;
+	bool insert_vlan_tag;
+	bool time_stamp;
+	u8 cwnd_inc_limit;
+	bool drop_ooo_seg;
+	bool dup_ack_thresh;
+	u8 ttl;
+	u8 src_mac_addr_idx;
+	bool avoid_stretch_ack;
+	u8 tos;
+	u16 src_port;
+	u16 dst_port;
+	u32 dest_ip_addr0;
+	u32 dest_ip_addr1;
+	u32 dest_ip_addr2;
+	u32 dest_ip_addr3;
+	u32 snd_mss;
+	u16 vlan_tag;
+	u16 arp_idx;
+	u32 flow_label;
+	bool wscale;
+	u8 tcp_state;
+	u8 snd_wscale;
+	u8 rcv_wscale;
+	u32 time_stamp_recent;
+	u32 time_stamp_age;
+	u32 snd_nxt;
+	u32 snd_wnd;
+	u32 rcv_nxt;
+	u32 rcv_wnd;
+	u32 snd_max;
+	u32 snd_una;
+	u32 srtt;
+	u32 rtt_var;
+	u32 ss_thresh;
+	u32 cwnd;
+	u32 snd_wl1;
+	u32 snd_wl2;
+	u32 max_snd_window;
+	u8 rexmit_thresh;
+	u32 local_ipaddr0;
+	u32 local_ipaddr1;
+	u32 local_ipaddr2;
+	u32 local_ipaddr3;
+	bool ignore_tcp_opt;
+	bool ignore_tcp_uns_opt;
+};
+
+struct i40iw_qp_host_ctx_info {
+	u64 qp_compl_ctx;
+	struct i40iw_tcp_offload_info *tcp_info;
+	struct i40iwarp_offload_info *iwarp_info;
+	u32 send_cq_num;
+	u32 rcv_cq_num;
+	u16 push_idx;
+	bool push_mode_en;
+	bool tcp_info_valid;
+	bool iwarp_info_valid;
+	bool err_rq_idx_valid;
+	u16 err_rq_idx;
+};
+
+struct i40iw_aeqe_info {
+	u64 compl_ctx;
+	u32 qp_cq_id;
+	u16 ae_id;
+	u16 wqe_idx;
+	u8 tcp_state;
+	u8 iwarp_state;
+	bool qp;
+	bool cq;
+	bool sq;
+	bool in_rdrsp_wr;
+	bool out_rdrsp;
+	u8 q2_data_written;
+	bool aeqe_overflow;
+};
+
+struct i40iw_allocate_stag_info {
+	u64 total_len;
+	u32 chunk_size;
+	u32 stag_idx;
+	u32 page_size;
+	u16 pd_id;
+	u16 access_rights;
+	bool remote_access;
+	bool use_hmc_fcn_index;
+	u8 hmc_fcn_index;
+	bool use_pf_rid;
+};
+
+struct i40iw_reg_ns_stag_info {
+	u64 reg_addr_pa;
+	u64 fbo;
+	void *va;
+	u64 total_len;
+	u32 page_size;
+	u32 chunk_size;
+	u32 first_pm_pbl_index;
+	enum i40iw_addressing_type addr_type;
+	i40iw_stag_index stag_idx;
+	u16 access_rights;
+	u16 pd_id;
+	i40iw_stag_key stag_key;
+	bool use_hmc_fcn_index;
+	u8 hmc_fcn_index;
+	bool use_pf_rid;
+};
+
+struct i40iw_fast_reg_stag_info {
+	u64 wr_id;
+	u64 reg_addr_pa;
+	u64 fbo;
+	void *va;
+	u64 total_len;
+	u32 page_size;
+	u32 chunk_size;
+	u32 first_pm_pbl_index;
+	enum i40iw_addressing_type addr_type;
+	i40iw_stag_index stag_idx;
+	u16 access_rights;
+	u16 pd_id;
+	i40iw_stag_key stag_key;
+	bool local_fence;
+	bool read_fence;
+	bool signaled;
+	bool use_hmc_fcn_index;
+	u8 hmc_fcn_index;
+	bool use_pf_rid;
+	bool defer_flag;
+};
+
+struct i40iw_dealloc_stag_info {
+	u32 stag_idx;
+	u16 pd_id;
+	bool mr;
+	bool dealloc_pbl;
+};
+
+struct i40iw_register_shared_stag {
+	void *va;
+	enum i40iw_addressing_type addr_type;
+	i40iw_stag_index new_stag_idx;
+	i40iw_stag_index parent_stag_idx;
+	u32 access_rights;
+	u16 pd_id;
+	i40iw_stag_key new_stag_key;
+};
+
+struct i40iw_qp_init_info {
+	struct i40iw_qp_uk_init_info qp_uk_init_info;
+	struct i40iw_sc_pd *pd;
+	u64 *host_ctx;
+	u8 *q2;
+	u64 sq_pa;
+	u64 rq_pa;
+	u64 host_ctx_pa;
+	u64 q2_pa;
+	u64 shadow_area_pa;
+	u8 sq_tph_val;
+	u8 rq_tph_val;
+	u8 type;
+	bool sq_tph_en;
+	bool rq_tph_en;
+	bool rcv_tph_en;
+	bool xmit_tph_en;
+	bool virtual_map;
+};
+
+struct i40iw_cq_init_info {
+	struct i40iw_sc_dev *dev;
+	u64 cq_base_pa;
+	u64 shadow_area_pa;
+	u32 ceq_id;
+	u32 shadow_read_threshold;
+	bool virtual_map;
+	bool ceqe_mask;
+	u8 pbl_chunk_size;
+	u32 first_pm_pbl_idx;
+	bool ceq_id_valid;
+	bool tph_en;
+	u8 tph_val;
+	u8 type;
+	struct i40iw_cq_uk_init_info cq_uk_init_info;
+};
+
+struct i40iw_upload_context_info {
+	u64 buf_pa;
+	bool freeze_qp;
+	bool raw_format;
+	u32 qp_id;
+	u8 qp_type;
+};
+
+struct i40iw_add_arp_cache_entry_info {
+	u8 mac_addr[6];
+	u32 reach_max;
+	u16 arp_index;
+	bool permanent;
+};
+
+struct i40iw_apbvt_info {
+	u16 port;
+	bool add;
+};
+
+enum i40iw_quad_entry_type {
+	I40IW_QHASH_TYPE_TCP_ESTABLISHED = 1,
+	I40IW_QHASH_TYPE_TCP_SYN,
+};
+
+enum i40iw_quad_hash_manage_type {
+	I40IW_QHASH_MANAGE_TYPE_DELETE = 0,
+	I40IW_QHASH_MANAGE_TYPE_ADD,
+	I40IW_QHASH_MANAGE_TYPE_MODIFY
+};
+
+struct i40iw_qhash_table_info {
+	enum i40iw_quad_hash_manage_type manage;
+	enum i40iw_quad_entry_type entry_type;
+	bool vlan_valid;
+	bool ipv4_valid;
+	u8 mac_addr[6];
+	u16 vlan_id;
+	u16 qs_handle;
+	u32 qp_num;
+	u32 dest_ip[4];
+	u32 src_ip[4];
+	u32 dest_port;
+	u32 src_port;
+};
+
+struct i40iw_local_mac_ipaddr_entry_info {
+	u8 mac_addr[6];
+	u8 entry_idx;
+};
+
+struct i40iw_cqp_manage_push_page_info {
+	u32 push_idx;
+	u16 qs_handle;
+	u8 free_page;
+};
+
+struct i40iw_qp_flush_info {
+	u16 sq_minor_code;
+	u16 sq_major_code;
+	u16 rq_minor_code;
+	u16 rq_major_code;
+	u16 ae_code;
+	u8 ae_source;
+	bool sq;
+	bool rq;
+	bool userflushcode;
+	bool generate_ae;
+};
+
+struct i40iw_cqp_commit_fpm_values {
+	u64 qp_base;
+	u64 cq_base;
+	u32 hte_base;
+	u32 arp_base;
+	u32 apbvt_inuse_base;
+	u32 mr_base;
+	u32 xf_base;
+	u32 xffl_base;
+	u32 q1_base;
+	u32 q1fl_base;
+	u32 fsimc_base;
+	u32 fsiav_base;
+	u32 pbl_base;
+
+	u32 qp_cnt;
+	u32 cq_cnt;
+	u32 hte_cnt;
+	u32 arp_cnt;
+	u32 mr_cnt;
+	u32 xf_cnt;
+	u32 xffl_cnt;
+	u32 q1_cnt;
+	u32 q1fl_cnt;
+	u32 fsimc_cnt;
+	u32 fsiav_cnt;
+	u32 pbl_cnt;
+};
+
+struct i40iw_cqp_query_fpm_values {
+	u16 first_pe_sd_index;
+	u32 qp_objsize;
+	u32 cq_objsize;
+	u32 hte_objsize;
+	u32 arp_objsize;
+	u32 mr_objsize;
+	u32 xf_objsize;
+	u32 q1_objsize;
+	u32 fsimc_objsize;
+	u32 fsiav_objsize;
+
+	u32 qp_max;
+	u32 cq_max;
+	u32 hte_max;
+	u32 arp_max;
+	u32 mr_max;
+	u32 xf_max;
+	u32 xffl_max;
+	u32 q1_max;
+	u32 q1fl_max;
+	u32 fsimc_max;
+	u32 fsiav_max;
+	u32 pbl_max;
+};
+
+struct i40iw_cqp_ops {
+	enum i40iw_status_code (*cqp_init)(struct i40iw_sc_cqp *,
+					   struct i40iw_cqp_init_info *);
+	enum i40iw_status_code (*cqp_create)(struct i40iw_sc_cqp *, bool, u16 *, u16 *);
+	void (*cqp_post_sq)(struct i40iw_sc_cqp *);
+	u64 *(*cqp_get_next_send_wqe)(struct i40iw_sc_cqp *, u64 scratch);
+	enum i40iw_status_code (*cqp_destroy)(struct i40iw_sc_cqp *);
+	enum i40iw_status_code (*poll_for_cqp_op_done)(struct i40iw_sc_cqp *, u8,
+						       struct i40iw_ccq_cqe_info *);
+};
+
+struct i40iw_ccq_ops {
+	enum i40iw_status_code (*ccq_init)(struct i40iw_sc_cq *,
+					   struct i40iw_ccq_init_info *);
+	enum i40iw_status_code (*ccq_create)(struct i40iw_sc_cq *, u64, bool, bool);
+	enum i40iw_status_code (*ccq_destroy)(struct i40iw_sc_cq *, u64, bool);
+	enum i40iw_status_code (*ccq_create_done)(struct i40iw_sc_cq *);
+	enum i40iw_status_code (*ccq_get_cqe_info)(struct i40iw_sc_cq *,
+						   struct i40iw_ccq_cqe_info *);
+	void (*ccq_arm)(struct i40iw_sc_cq *);
+};
+
+struct i40iw_ceq_ops {
+	enum i40iw_status_code (*ceq_init)(struct i40iw_sc_ceq *,
+					   struct i40iw_ceq_init_info *);
+	enum i40iw_status_code (*ceq_create)(struct i40iw_sc_ceq *, u64, bool);
+	enum i40iw_status_code (*cceq_create_done)(struct i40iw_sc_ceq *);
+	enum i40iw_status_code (*cceq_destroy_done)(struct i40iw_sc_ceq *);
+	enum i40iw_status_code (*cceq_create)(struct i40iw_sc_ceq *, u64);
+	enum i40iw_status_code (*ceq_destroy)(struct i40iw_sc_ceq *, u64, bool);
+	void *(*process_ceq)(struct i40iw_sc_dev *, struct i40iw_sc_ceq *);
+};
+
+struct i40iw_aeq_ops {
+	enum i40iw_status_code (*aeq_init)(struct i40iw_sc_aeq *,
+					   struct i40iw_aeq_init_info *);
+	enum i40iw_status_code (*aeq_create)(struct i40iw_sc_aeq *, u64, bool);
+	enum i40iw_status_code (*aeq_destroy)(struct i40iw_sc_aeq *, u64, bool);
+	enum i40iw_status_code (*get_next_aeqe)(struct i40iw_sc_aeq *,
+						struct i40iw_aeqe_info *);
+	enum i40iw_status_code (*repost_aeq_entries)(struct i40iw_sc_dev *, u32);
+	enum i40iw_status_code (*aeq_create_done)(struct i40iw_sc_aeq *);
+	enum i40iw_status_code (*aeq_destroy_done)(struct i40iw_sc_aeq *);
+};
+
+struct i40iw_pd_ops {
+	void (*pd_init)(struct i40iw_sc_dev *, struct i40iw_sc_pd *, u16);
+};
+
+struct i40iw_priv_qp_ops {
+	enum i40iw_status_code (*qp_init)(struct i40iw_sc_qp *, struct i40iw_qp_init_info *);
+	enum i40iw_status_code (*qp_create)(struct i40iw_sc_qp *,
+					    struct i40iw_create_qp_info *, u64, bool);
+	enum i40iw_status_code (*qp_modify)(struct i40iw_sc_qp *,
+					    struct i40iw_modify_qp_info *, u64, bool);
+	enum i40iw_status_code (*qp_destroy)(struct i40iw_sc_qp *, u64, bool, bool, bool);
+	enum i40iw_status_code (*qp_flush_wqes)(struct i40iw_sc_qp *,
+						struct i40iw_qp_flush_info *, u64, bool);
+	enum i40iw_status_code (*qp_upload_context)(struct i40iw_sc_dev *,
+						    struct i40iw_upload_context_info *,
+						    u64, bool);
+	enum i40iw_status_code (*qp_setctx)(struct i40iw_sc_qp *, u64 *,
+					    struct i40iw_qp_host_ctx_info *);
+
+	void (*qp_send_lsmm)(struct i40iw_sc_qp *, void *, u32, i40iw_stag);
+	void (*qp_send_lsmm_nostag)(struct i40iw_sc_qp *, void *, u32);
+	void (*qp_send_rtt)(struct i40iw_sc_qp *, bool);
+	enum i40iw_status_code (*qp_post_wqe0)(struct i40iw_sc_qp *, u8);
+};
+
+struct i40iw_priv_cq_ops {
+	enum i40iw_status_code (*cq_init)(struct i40iw_sc_cq *, struct i40iw_cq_init_info *);
+	enum i40iw_status_code (*cq_create)(struct i40iw_sc_cq *, u64, bool, bool);
+	enum i40iw_status_code (*cq_destroy)(struct i40iw_sc_cq *, u64, bool);
+	enum i40iw_status_code (*cq_modify)(struct i40iw_sc_cq *,
+					    struct i40iw_modify_cq_info *, u64, bool);
+};
+
+struct i40iw_mr_ops {
+	enum i40iw_status_code (*alloc_stag)(struct i40iw_sc_dev *,
+					     struct i40iw_allocate_stag_info *, u64, bool);
+	enum i40iw_status_code (*mr_reg_non_shared)(struct i40iw_sc_dev *,
+						    struct i40iw_reg_ns_stag_info *,
+						    u64, bool);
+	enum i40iw_status_code (*mr_reg_shared)(struct i40iw_sc_dev *,
+						struct i40iw_register_shared_stag *,
+						u64, bool);
+	enum i40iw_status_code (*dealloc_stag)(struct i40iw_sc_dev *,
+					       struct i40iw_dealloc_stag_info *,
+					       u64, bool);
+	enum i40iw_status_code (*query_stag)(struct i40iw_sc_dev *, u64, u32, bool);
+	enum i40iw_status_code (*mw_alloc)(struct i40iw_sc_dev *, u64, u32, u16, bool);
+};
+
+struct i40iw_cqp_misc_ops {
+	enum i40iw_status_code (*manage_push_page)(struct i40iw_sc_cqp *,
+						   struct i40iw_cqp_manage_push_page_info *,
+						   u64, bool);
+	enum i40iw_status_code (*manage_hmc_pm_func_table)(struct i40iw_sc_cqp *,
+							   u64, u8, bool, bool);
+	enum i40iw_status_code (*set_hmc_resource_profile)(struct i40iw_sc_cqp *,
+							   u64, u8, u8, bool, bool);
+	enum i40iw_status_code (*commit_fpm_values)(struct i40iw_sc_cqp *, u64, u8,
+						    struct i40iw_dma_mem *, bool, u8);
+	enum i40iw_status_code (*query_fpm_values)(struct i40iw_sc_cqp *, u64, u8,
+						   struct i40iw_dma_mem *, bool, u8);
+	enum i40iw_status_code (*static_hmc_pages_allocated)(struct i40iw_sc_cqp *,
+							     u64, u8, bool, bool);
+	enum i40iw_status_code (*add_arp_cache_entry)(struct i40iw_sc_cqp *,
+						      struct i40iw_add_arp_cache_entry_info *,
+						      u64, bool);
+	enum i40iw_status_code (*del_arp_cache_entry)(struct i40iw_sc_cqp *, u64, u16, bool);
+	enum i40iw_status_code (*query_arp_cache_entry)(struct i40iw_sc_cqp *, u64, u16, bool);
+	enum i40iw_status_code (*manage_apbvt_entry)(struct i40iw_sc_cqp *,
+						     struct i40iw_apbvt_info *, u64, bool);
+	enum i40iw_status_code (*manage_qhash_table_entry)(struct i40iw_sc_cqp *,
+							   struct i40iw_qhash_table_info *, u64, bool);
+	enum i40iw_status_code (*alloc_local_mac_ipaddr_table_entry)(struct i40iw_sc_cqp *, u64, bool);
+	enum i40iw_status_code (*add_local_mac_ipaddr_entry)(struct i40iw_sc_cqp *,
+							     struct i40iw_local_mac_ipaddr_entry_info *,
+							     u64, bool);
+	enum i40iw_status_code (*del_local_mac_ipaddr_entry)(struct i40iw_sc_cqp *, u64, u8, u8, bool);
+	enum i40iw_status_code (*cqp_nop)(struct i40iw_sc_cqp *, u64, bool);
+	enum i40iw_status_code (*commit_fpm_values_done)(struct i40iw_sc_cqp
+							  *);
+	enum i40iw_status_code (*query_fpm_values_done)(struct i40iw_sc_cqp *);
+	enum i40iw_status_code (*manage_hmc_pm_func_table_done)(struct i40iw_sc_cqp *);
+	enum i40iw_status_code (*update_suspend_qp)(struct i40iw_sc_cqp *, struct i40iw_sc_qp *, u64);
+	enum i40iw_status_code (*update_resume_qp)(struct i40iw_sc_cqp *, struct i40iw_sc_qp *, u64);
+};
+
+struct i40iw_hmc_ops {
+	enum i40iw_status_code (*init_iw_hmc)(struct i40iw_sc_dev *, u8);
+	enum i40iw_status_code (*parse_fpm_query_buf)(u64 *, struct i40iw_hmc_info *,
+						      struct i40iw_hmc_fpm_misc *);
+	enum i40iw_status_code (*configure_iw_fpm)(struct i40iw_sc_dev *, u8);
+	enum i40iw_status_code (*parse_fpm_commit_buf)(u64 *, struct i40iw_hmc_obj_info *);
+	enum i40iw_status_code (*create_hmc_object)(struct i40iw_sc_dev *dev,
+						    struct i40iw_hmc_create_obj_info *);
+	enum i40iw_status_code (*del_hmc_object)(struct i40iw_sc_dev *dev,
+						 struct i40iw_hmc_del_obj_info *,
+						 bool reset);
+	enum i40iw_status_code (*pf_init_vfhmc)(struct i40iw_sc_dev *, u8, u32 *);
+	enum i40iw_status_code (*vf_configure_vffpm)(struct i40iw_sc_dev *, u32 *);
+};
+
+struct cqp_info {
+	union {
+		struct {
+			struct i40iw_sc_qp *qp;
+			struct i40iw_create_qp_info info;
+			u64 scratch;
+		} qp_create;
+
+		struct {
+			struct i40iw_sc_qp *qp;
+			struct i40iw_modify_qp_info info;
+			u64 scratch;
+		} qp_modify;
+
+		struct {
+			struct i40iw_sc_qp *qp;
+			u64 scratch;
+			bool remove_hash_idx;
+			bool ignore_mw_bnd;
+		} qp_destroy;
+
+		struct {
+			struct i40iw_sc_cq *cq;
+			u64 scratch;
+			bool check_overflow;
+		} cq_create;
+
+		struct {
+			struct i40iw_sc_cq *cq;
+			u64 scratch;
+		} cq_destroy;
+
+		struct {
+			struct i40iw_sc_dev *dev;
+			struct i40iw_allocate_stag_info info;
+			u64 scratch;
+		} alloc_stag;
+
+		struct {
+			struct i40iw_sc_dev *dev;
+			u64 scratch;
+			u32 mw_stag_index;
+			u16 pd_id;
+		} mw_alloc;
+
+		struct {
+			struct i40iw_sc_dev *dev;
+			struct i40iw_reg_ns_stag_info info;
+			u64 scratch;
+		} mr_reg_non_shared;
+
+		struct {
+			struct i40iw_sc_dev *dev;
+			struct i40iw_dealloc_stag_info info;
+			u64 scratch;
+		} dealloc_stag;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			struct i40iw_local_mac_ipaddr_entry_info info;
+			u64 scratch;
+		} add_local_mac_ipaddr_entry;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			struct i40iw_add_arp_cache_entry_info info;
+			u64 scratch;
+		} add_arp_cache_entry;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			u64 scratch;
+			u8 entry_idx;
+			u8 ignore_ref_count;
+		} del_local_mac_ipaddr_entry;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			u64 scratch;
+			u16 arp_index;
+		} del_arp_cache_entry;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			struct i40iw_manage_vf_pble_info info;
+			u64 scratch;
+		} manage_vf_pble_bp;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			struct i40iw_cqp_manage_push_page_info info;
+			u64 scratch;
+		} manage_push_page;
+
+		struct {
+			struct i40iw_sc_dev *dev;
+			struct i40iw_upload_context_info info;
+			u64 scratch;
+		} qp_upload_context;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			u64 scratch;
+		} alloc_local_mac_ipaddr_entry;
+
+		struct {
+			struct i40iw_sc_dev *dev;
+			struct i40iw_hmc_fcn_info info;
+			u64 scratch;
+		} manage_hmc_pm;
+
+		struct {
+			struct i40iw_sc_ceq *ceq;
+			u64 scratch;
+		} ceq_create;
+
+		struct {
+			struct i40iw_sc_ceq *ceq;
+			u64 scratch;
+		} ceq_destroy;
+
+		struct {
+			struct i40iw_sc_aeq *aeq;
+			u64 scratch;
+		} aeq_create;
+
+		struct {
+			struct i40iw_sc_aeq *aeq;
+			u64 scratch;
+		} aeq_destroy;
+
+		struct {
+			struct i40iw_sc_qp *qp;
+			struct i40iw_qp_flush_info info;
+			u64 scratch;
+		} qp_flush_wqes;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			void *fpm_values_va;
+			u64 fpm_values_pa;
+			u8 hmc_fn_id;
+			u64 scratch;
+		} query_fpm_values;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			void *fpm_values_va;
+			u64 fpm_values_pa;
+			u8 hmc_fn_id;
+			u64 scratch;
+		} commit_fpm_values;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			struct i40iw_apbvt_info info;
+			u64 scratch;
+		} manage_apbvt_entry;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			struct i40iw_qhash_table_info info;
+			u64 scratch;
+		} manage_qhash_table_entry;
+
+		struct {
+			struct i40iw_sc_dev *dev;
+			struct i40iw_update_sds_info info;
+			u64 scratch;
+		} update_pe_sds;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			struct i40iw_sc_qp *qp;
+			u64 scratch;
+		} suspend_resume;
+	} u;
+};
+
+struct cqp_commands_info {
+	struct list_head cqp_cmd_entry;
+	u8 cqp_cmd;
+	u8 post_sq;
+	struct cqp_info in;
+};
+
+struct i40iw_virtchnl_work_info {
+	void (*callback_fcn)(void *vf_dev);
+	void *worker_vf_dev;
+};
+
+#endif

diff --git a/drivers/infiniband/hw/i40iw/i40iw_ucontext.h b/drivers/infiniband/hw/i40iw/i40iw_ucontext.h
new file mode 100644
index 0000000..12acd68
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_ucontext.h

@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2006 - 2016 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef I40IW_USER_CONTEXT_H
+#define I40IW_USER_CONTEXT_H
+
+#include <linux/types.h>
+
+#define I40IW_ABI_USERSPACE_VER 4
+#define I40IW_ABI_KERNEL_VER    4
+struct i40iw_alloc_ucontext_req {
+	__u32 reserved32;
+	__u8 userspace_ver;
+	__u8 reserved8[3];
+};
+
+struct i40iw_alloc_ucontext_resp {
+	__u32 max_pds;		/* maximum pds allowed for this user process */
+	__u32 max_qps;		/* maximum qps allowed for this user process */
+	__u32 wq_size;		/* size of the WQs (sq+rq) allocated to the mmaped area */
+	__u8 kernel_ver;
+	__u8 reserved[3];
+};
+
+struct i40iw_alloc_pd_resp {
+	__u32 pd_id;
+	__u8 reserved[4];
+};
+
+struct i40iw_create_cq_req {
+	__u64 user_cq_buffer;
+	__u64 user_shadow_area;
+};
+
+struct i40iw_create_qp_req {
+	__u64 user_wqe_buffers;
+	__u64 user_compl_ctx;
+
+	/* UDA QP PHB */
+	__u64 user_sq_phb;	/* place for VA of the sq phb buff */
+	__u64 user_rq_phb;	/* place for VA of the rq phb buff */
+};
+
+enum i40iw_memreg_type {
+	IW_MEMREG_TYPE_MEM = 0x0000,
+	IW_MEMREG_TYPE_QP = 0x0001,
+	IW_MEMREG_TYPE_CQ = 0x0002,
+};
+
+struct i40iw_mem_reg_req {
+	__u16 reg_type;		/* Memory, QP or CQ */
+	__u16 cq_pages;
+	__u16 rq_pages;
+	__u16 sq_pages;
+};
+
+struct i40iw_create_cq_resp {
+	__u32 cq_id;
+	__u32 cq_size;
+	__u32 mmap_db_index;
+	__u32 reserved;
+};
+
+struct i40iw_create_qp_resp {
+	__u32 qp_id;
+	__u32 actual_sq_size;
+	__u32 actual_rq_size;
+	__u32 i40iw_drv_opt;
+	__u16 push_idx;
+	__u8  lsmm;
+	__u8  rsvd2;
+};
+
+#endif

diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c
new file mode 100644
index 0000000..f78c3dc
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c

@@ -0,0 +1,1204 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_osdep.h"
+#include "i40iw_status.h"
+#include "i40iw_d.h"
+#include "i40iw_user.h"
+#include "i40iw_register.h"
+
+static u32 nop_signature = 0x55550000;
+
+/**
+ * i40iw_nop_1 - insert a nop wqe and move head. no post work
+ * @qp: hw qp ptr
+ */
+static enum i40iw_status_code i40iw_nop_1(struct i40iw_qp_uk *qp)
+{
+	u64 header, *wqe;
+	u64 *wqe_0 = NULL;
+	u32 wqe_idx, peek_head;
+	bool signaled = false;
+
+	if (!qp->sq_ring.head)
+		return I40IW_ERR_PARAM;
+
+	wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+	wqe = qp->sq_base[wqe_idx].elem;
+	peek_head = (qp->sq_ring.head + 1) % qp->sq_ring.size;
+	wqe_0 = qp->sq_base[peek_head].elem;
+	if (peek_head)
+		wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID);
+	else
+		wqe_0[3] = LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+	set_64bit_val(wqe, 0, 0);
+	set_64bit_val(wqe, 8, 0);
+	set_64bit_val(wqe, 16, 0);
+
+	header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
+	    LS_64(signaled, I40IWQPSQ_SIGCOMPL) |
+	    LS_64(qp->swqe_polarity, I40IWQPSQ_VALID) | nop_signature++;
+
+	wmb();	/* Memory barrier to ensure data is written before valid bit is set */
+
+	set_64bit_val(wqe, 24, header);
+	return 0;
+}
+
+/**
+ * i40iw_qp_post_wr - post wr to hrdware
+ * @qp: hw qp ptr
+ */
+void i40iw_qp_post_wr(struct i40iw_qp_uk *qp)
+{
+	u64 temp;
+	u32 hw_sq_tail;
+	u32 sw_sq_head;
+
+	mb(); /* valid bit is written and loads completed before reading shadow */
+
+	/* read the doorbell shadow area */
+	get_64bit_val(qp->shadow_area, 0, &temp);
+
+	hw_sq_tail = (u32)RS_64(temp, I40IW_QP_DBSA_HW_SQ_TAIL);
+	sw_sq_head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+	if (sw_sq_head != hw_sq_tail) {
+		if (sw_sq_head > qp->initial_ring.head) {
+			if ((hw_sq_tail >= qp->initial_ring.head) &&
+			    (hw_sq_tail < sw_sq_head)) {
+				writel(qp->qp_id, qp->wqe_alloc_reg);
+			}
+		} else if (sw_sq_head != qp->initial_ring.head) {
+			if ((hw_sq_tail >= qp->initial_ring.head) ||
+			    (hw_sq_tail < sw_sq_head)) {
+				writel(qp->qp_id, qp->wqe_alloc_reg);
+			}
+		}
+	}
+
+	qp->initial_ring.head = qp->sq_ring.head;
+}
+
+/**
+ * i40iw_qp_ring_push_db -  ring qp doorbell
+ * @qp: hw qp ptr
+ * @wqe_idx: wqe index
+ */
+static void i40iw_qp_ring_push_db(struct i40iw_qp_uk *qp, u32 wqe_idx)
+{
+	set_32bit_val(qp->push_db, 0, LS_32((wqe_idx >> 2), I40E_PFPE_WQEALLOC_WQE_DESC_INDEX) | qp->qp_id);
+	qp->initial_ring.head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+}
+
+/**
+ * i40iw_qp_get_next_send_wqe - return next wqe ptr
+ * @qp: hw qp ptr
+ * @wqe_idx: return wqe index
+ * @wqe_size: size of sq wqe
+ */
+u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp,
+				u32 *wqe_idx,
+				u8 wqe_size)
+{
+	u64 *wqe = NULL;
+	u64 wqe_ptr;
+	u32 peek_head = 0;
+	u16 offset;
+	enum i40iw_status_code ret_code = 0;
+	u8 nop_wqe_cnt = 0, i;
+	u64 *wqe_0 = NULL;
+
+	*wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+
+	if (!*wqe_idx)
+		qp->swqe_polarity = !qp->swqe_polarity;
+	wqe_ptr = (uintptr_t)qp->sq_base[*wqe_idx].elem;
+	offset = (u16)(wqe_ptr) & 0x7F;
+	if ((offset + wqe_size) > I40IW_QP_WQE_MAX_SIZE) {
+		nop_wqe_cnt = (u8)(I40IW_QP_WQE_MAX_SIZE - offset) / I40IW_QP_WQE_MIN_SIZE;
+		for (i = 0; i < nop_wqe_cnt; i++) {
+			i40iw_nop_1(qp);
+			I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
+			if (ret_code)
+				return NULL;
+		}
+
+		*wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+		if (!*wqe_idx)
+			qp->swqe_polarity = !qp->swqe_polarity;
+	}
+	for (i = 0; i < wqe_size / I40IW_QP_WQE_MIN_SIZE; i++) {
+		I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
+		if (ret_code)
+			return NULL;
+	}
+
+	wqe = qp->sq_base[*wqe_idx].elem;
+
+	peek_head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+	wqe_0 = qp->sq_base[peek_head].elem;
+	if (peek_head & 0x3)
+		wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID);
+	return wqe;
+}
+
+/**
+ * i40iw_set_fragment - set fragment in wqe
+ * @wqe: wqe for setting fragment
+ * @offset: offset value
+ * @sge: sge length and stag
+ */
+static void i40iw_set_fragment(u64 *wqe, u32 offset, struct i40iw_sge *sge)
+{
+	if (sge) {
+		set_64bit_val(wqe, offset, LS_64(sge->tag_off, I40IWQPSQ_FRAG_TO));
+		set_64bit_val(wqe, (offset + 8),
+			      (LS_64(sge->len, I40IWQPSQ_FRAG_LEN) |
+			       LS_64(sge->stag, I40IWQPSQ_FRAG_STAG)));
+	}
+}
+
+/**
+ * i40iw_qp_get_next_recv_wqe - get next qp's rcv wqe
+ * @qp: hw qp ptr
+ * @wqe_idx: return wqe index
+ */
+u64 *i40iw_qp_get_next_recv_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx)
+{
+	u64 *wqe = NULL;
+	enum i40iw_status_code ret_code;
+
+	if (I40IW_RING_FULL_ERR(qp->rq_ring))
+		return NULL;
+
+	I40IW_ATOMIC_RING_MOVE_HEAD(qp->rq_ring, *wqe_idx, ret_code);
+	if (ret_code)
+		return NULL;
+	if (!*wqe_idx)
+		qp->rwqe_polarity = !qp->rwqe_polarity;
+	/* rq_wqe_size_multiplier is no of qwords in one rq wqe */
+	wqe = qp->rq_base[*wqe_idx * (qp->rq_wqe_size_multiplier >> 2)].elem;
+
+	return wqe;
+}
+
+/**
+ * i40iw_rdma_write - rdma write operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_rdma_write(struct i40iw_qp_uk *qp,
+					       struct i40iw_post_sq_info *info,
+					       bool post_sq)
+{
+	u64 header;
+	u64 *wqe;
+	struct i40iw_rdma_write *op_info;
+	u32 i, wqe_idx;
+	u32 total_size = 0, byte_off;
+	enum i40iw_status_code ret_code;
+	bool read_fence = false;
+	u8 wqe_size;
+
+	op_info = &info->op.rdma_write;
+	if (op_info->num_lo_sges > qp->max_sq_frag_cnt)
+		return I40IW_ERR_INVALID_FRAG_COUNT;
+
+	for (i = 0; i < op_info->num_lo_sges; i++)
+		total_size += op_info->lo_sg_list[i].len;
+
+	if (total_size > I40IW_MAX_OUTBOUND_MESSAGE_SIZE)
+		return I40IW_ERR_QP_INVALID_MSG_SIZE;
+
+	read_fence |= info->read_fence;
+
+	ret_code = i40iw_fragcnt_to_wqesize_sq(op_info->num_lo_sges, &wqe_size);
+	if (ret_code)
+		return ret_code;
+
+	wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+	qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+	qp->sq_wrtrk_array[wqe_idx].wr_len = total_size;
+	set_64bit_val(wqe, 16,
+		      LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
+	if (!op_info->rem_addr.stag)
+		return I40IW_ERR_BAD_STAG;
+
+	header = LS_64(op_info->rem_addr.stag, I40IWQPSQ_REMSTAG) |
+		 LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) |
+		 LS_64((op_info->num_lo_sges > 1 ?  (op_info->num_lo_sges - 1) : 0), I40IWQPSQ_ADDFRAGCNT) |
+		 LS_64(read_fence, I40IWQPSQ_READFENCE) |
+		 LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
+		 LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+		 LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+	i40iw_set_fragment(wqe, 0, op_info->lo_sg_list);
+
+	for (i = 1; i < op_info->num_lo_sges; i++) {
+		byte_off = 32 + (i - 1) * 16;
+		i40iw_set_fragment(wqe, byte_off, &op_info->lo_sg_list[i]);
+	}
+
+	wmb(); /* make sure WQE is populated before valid bit is set */
+
+	set_64bit_val(wqe, 24, header);
+
+	if (post_sq)
+		i40iw_qp_post_wr(qp);
+
+	return 0;
+}
+
+/**
+ * i40iw_rdma_read - rdma read command
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @inv_stag: flag for inv_stag
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_rdma_read(struct i40iw_qp_uk *qp,
+					      struct i40iw_post_sq_info *info,
+					      bool inv_stag,
+					      bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_rdma_read *op_info;
+	u64 header;
+	u32 wqe_idx;
+	enum i40iw_status_code ret_code;
+	u8 wqe_size;
+	bool local_fence = false;
+
+	op_info = &info->op.rdma_read;
+	ret_code = i40iw_fragcnt_to_wqesize_sq(1, &wqe_size);
+	if (ret_code)
+		return ret_code;
+	wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+	qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+	qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->lo_addr.len;
+	local_fence |= info->local_fence;
+
+	set_64bit_val(wqe, 16, LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
+	header = LS_64(op_info->rem_addr.stag, I40IWQPSQ_REMSTAG) |
+		 LS_64((inv_stag ? I40IWQP_OP_RDMA_READ_LOC_INV : I40IWQP_OP_RDMA_READ), I40IWQPSQ_OPCODE) |
+		 LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
+		 LS_64(local_fence, I40IWQPSQ_LOCALFENCE) |
+		 LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+		 LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+	i40iw_set_fragment(wqe, 0, &op_info->lo_addr);
+
+	wmb(); /* make sure WQE is populated before valid bit is set */
+
+	set_64bit_val(wqe, 24, header);
+	if (post_sq)
+		i40iw_qp_post_wr(qp);
+
+	return 0;
+}
+
+/**
+ * i40iw_send - rdma send command
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @stag_to_inv: stag_to_inv value
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_send(struct i40iw_qp_uk *qp,
+					 struct i40iw_post_sq_info *info,
+					 u32 stag_to_inv,
+					 bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_post_send *op_info;
+	u64 header;
+	u32 i, wqe_idx, total_size = 0, byte_off;
+	enum i40iw_status_code ret_code;
+	bool read_fence = false;
+	u8 wqe_size;
+
+	op_info = &info->op.send;
+	if (qp->max_sq_frag_cnt < op_info->num_sges)
+		return I40IW_ERR_INVALID_FRAG_COUNT;
+
+	for (i = 0; i < op_info->num_sges; i++)
+		total_size += op_info->sg_list[i].len;
+	ret_code = i40iw_fragcnt_to_wqesize_sq(op_info->num_sges, &wqe_size);
+	if (ret_code)
+		return ret_code;
+
+	wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+	read_fence |= info->read_fence;
+	qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+	qp->sq_wrtrk_array[wqe_idx].wr_len = total_size;
+	set_64bit_val(wqe, 16, 0);
+	header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) |
+		 LS_64(info->op_type, I40IWQPSQ_OPCODE) |
+		 LS_64((op_info->num_sges > 1 ? (op_info->num_sges - 1) : 0),
+		       I40IWQPSQ_ADDFRAGCNT) |
+		 LS_64(read_fence, I40IWQPSQ_READFENCE) |
+		 LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
+		 LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+		 LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+	i40iw_set_fragment(wqe, 0, op_info->sg_list);
+
+	for (i = 1; i < op_info->num_sges; i++) {
+		byte_off = 32 + (i - 1) * 16;
+		i40iw_set_fragment(wqe, byte_off, &op_info->sg_list[i]);
+	}
+
+	wmb(); /* make sure WQE is populated before valid bit is set */
+
+	set_64bit_val(wqe, 24, header);
+	if (post_sq)
+		i40iw_qp_post_wr(qp);
+
+	return 0;
+}
+
+/**
+ * i40iw_inline_rdma_write - inline rdma write operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp,
+						      struct i40iw_post_sq_info *info,
+						      bool post_sq)
+{
+	u64 *wqe;
+	u8 *dest, *src;
+	struct i40iw_inline_rdma_write *op_info;
+	u64 *push;
+	u64 header = 0;
+	u32 i, wqe_idx;
+	enum i40iw_status_code ret_code;
+	bool read_fence = false;
+	u8 wqe_size;
+
+	op_info = &info->op.inline_rdma_write;
+	if (op_info->len > I40IW_MAX_INLINE_DATA_SIZE)
+		return I40IW_ERR_INVALID_IMM_DATA_SIZE;
+
+	ret_code = i40iw_inline_data_size_to_wqesize(op_info->len, &wqe_size);
+	if (ret_code)
+		return ret_code;
+
+	wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+	read_fence |= info->read_fence;
+	qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+	qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len;
+	set_64bit_val(wqe, 16,
+		      LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
+
+	header = LS_64(op_info->rem_addr.stag, I40IWQPSQ_REMSTAG) |
+		 LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) |
+		 LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) |
+		 LS_64(1, I40IWQPSQ_INLINEDATAFLAG) |
+		 LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) |
+		 LS_64(read_fence, I40IWQPSQ_READFENCE) |
+		 LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
+		 LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+		 LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+	dest = (u8 *)wqe;
+	src = (u8 *)(op_info->data);
+
+	if (op_info->len <= 16) {
+		for (i = 0; i < op_info->len; i++, src++, dest++)
+			*dest = *src;
+	} else {
+		for (i = 0; i < 16; i++, src++, dest++)
+			*dest = *src;
+		dest = (u8 *)wqe + 32;
+		for (; i < op_info->len; i++, src++, dest++)
+			*dest = *src;
+	}
+
+	wmb(); /* make sure WQE is populated before valid bit is set */
+
+	set_64bit_val(wqe, 24, header);
+
+	if (qp->push_db) {
+		push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20);
+		memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32);
+		i40iw_qp_ring_push_db(qp, wqe_idx);
+	} else {
+		if (post_sq)
+			i40iw_qp_post_wr(qp);
+	}
+
+	return 0;
+}
+
+/**
+ * i40iw_inline_send - inline send operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @stag_to_inv: remote stag
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp,
+						struct i40iw_post_sq_info *info,
+						u32 stag_to_inv,
+						bool post_sq)
+{
+	u64 *wqe;
+	u8 *dest, *src;
+	struct i40iw_post_inline_send *op_info;
+	u64 header;
+	u32 wqe_idx, i;
+	enum i40iw_status_code ret_code;
+	bool read_fence = false;
+	u8 wqe_size;
+	u64 *push;
+
+	op_info = &info->op.inline_send;
+	if (op_info->len > I40IW_MAX_INLINE_DATA_SIZE)
+		return I40IW_ERR_INVALID_IMM_DATA_SIZE;
+
+	ret_code = i40iw_inline_data_size_to_wqesize(op_info->len, &wqe_size);
+	if (ret_code)
+		return ret_code;
+
+	wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+	read_fence |= info->read_fence;
+
+	qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+	qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len;
+	header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) |
+	    LS_64(info->op_type, I40IWQPSQ_OPCODE) |
+	    LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) |
+	    LS_64(1, I40IWQPSQ_INLINEDATAFLAG) |
+	    LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) |
+	    LS_64(read_fence, I40IWQPSQ_READFENCE) |
+	    LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
+	    LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+	    LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+	dest = (u8 *)wqe;
+	src = (u8 *)(op_info->data);
+
+	if (op_info->len <= 16) {
+		for (i = 0; i < op_info->len; i++, src++, dest++)
+			*dest = *src;
+	} else {
+		for (i = 0; i < 16; i++, src++, dest++)
+			*dest = *src;
+		dest = (u8 *)wqe + 32;
+		for (; i < op_info->len; i++, src++, dest++)
+			*dest = *src;
+	}
+
+	wmb(); /* make sure WQE is populated before valid bit is set */
+
+	set_64bit_val(wqe, 24, header);
+
+	if (qp->push_db) {
+		push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20);
+		memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32);
+		i40iw_qp_ring_push_db(qp, wqe_idx);
+	} else {
+		if (post_sq)
+			i40iw_qp_post_wr(qp);
+	}
+
+	return 0;
+}
+
+/**
+ * i40iw_stag_local_invalidate - stag invalidate operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_stag_local_invalidate(struct i40iw_qp_uk *qp,
+							  struct i40iw_post_sq_info *info,
+							  bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_inv_local_stag *op_info;
+	u64 header;
+	u32 wqe_idx;
+	bool local_fence = false;
+
+	op_info = &info->op.inv_local_stag;
+	local_fence = info->local_fence;
+
+	wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE);
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+	qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+	qp->sq_wrtrk_array[wqe_idx].wr_len = 0;
+	set_64bit_val(wqe, 0, 0);
+	set_64bit_val(wqe, 8,
+		      LS_64(op_info->target_stag, I40IWQPSQ_LOCSTAG));
+	set_64bit_val(wqe, 16, 0);
+	header = LS_64(I40IW_OP_TYPE_INV_STAG, I40IWQPSQ_OPCODE) |
+	    LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
+	    LS_64(local_fence, I40IWQPSQ_LOCALFENCE) |
+	    LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+	    LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+	wmb(); /* make sure WQE is populated before valid bit is set */
+
+	set_64bit_val(wqe, 24, header);
+
+	if (post_sq)
+		i40iw_qp_post_wr(qp);
+
+	return 0;
+}
+
+/**
+ * i40iw_mw_bind - Memory Window bind operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_mw_bind(struct i40iw_qp_uk *qp,
+					    struct i40iw_post_sq_info *info,
+					    bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_bind_window *op_info;
+	u64 header;
+	u32 wqe_idx;
+	bool local_fence = false;
+
+	op_info = &info->op.bind_window;
+
+	local_fence |= info->local_fence;
+	wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE);
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+	qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+	qp->sq_wrtrk_array[wqe_idx].wr_len = 0;
+	set_64bit_val(wqe, 0, (uintptr_t)op_info->va);
+	set_64bit_val(wqe, 8,
+		      LS_64(op_info->mr_stag, I40IWQPSQ_PARENTMRSTAG) |
+		      LS_64(op_info->mw_stag, I40IWQPSQ_MWSTAG));
+	set_64bit_val(wqe, 16, op_info->bind_length);
+	header = LS_64(I40IW_OP_TYPE_BIND_MW, I40IWQPSQ_OPCODE) |
+	    LS_64(((op_info->enable_reads << 2) |
+		   (op_info->enable_writes << 3)),
+		  I40IWQPSQ_STAGRIGHTS) |
+	    LS_64((op_info->addressing_type == I40IW_ADDR_TYPE_VA_BASED ?  1 : 0),
+		  I40IWQPSQ_VABASEDTO) |
+	    LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
+	    LS_64(local_fence, I40IWQPSQ_LOCALFENCE) |
+	    LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+	    LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+	wmb(); /* make sure WQE is populated before valid bit is set */
+
+	set_64bit_val(wqe, 24, header);
+
+	if (post_sq)
+		i40iw_qp_post_wr(qp);
+
+	return 0;
+}
+
+/**
+ * i40iw_post_receive - post receive wqe
+ * @qp: hw qp ptr
+ * @info: post rq information
+ */
+static enum i40iw_status_code i40iw_post_receive(struct i40iw_qp_uk *qp,
+						 struct i40iw_post_rq_info *info)
+{
+	u64 *wqe;
+	u64 header;
+	u32 total_size = 0, wqe_idx, i, byte_off;
+
+	if (qp->max_rq_frag_cnt < info->num_sges)
+		return I40IW_ERR_INVALID_FRAG_COUNT;
+	for (i = 0; i < info->num_sges; i++)
+		total_size += info->sg_list[i].len;
+	wqe = i40iw_qp_get_next_recv_wqe(qp, &wqe_idx);
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+	qp->rq_wrid_array[wqe_idx] = info->wr_id;
+	set_64bit_val(wqe, 16, 0);
+
+	header = LS_64((info->num_sges > 1 ? (info->num_sges - 1) : 0),
+		       I40IWQPSQ_ADDFRAGCNT) |
+	    LS_64(qp->rwqe_polarity, I40IWQPSQ_VALID);
+
+	i40iw_set_fragment(wqe, 0, info->sg_list);
+
+	for (i = 1; i < info->num_sges; i++) {
+		byte_off = 32 + (i - 1) * 16;
+		i40iw_set_fragment(wqe, byte_off, &info->sg_list[i]);
+	}
+
+	wmb(); /* make sure WQE is populated before valid bit is set */
+
+	set_64bit_val(wqe, 24, header);
+
+	return 0;
+}
+
+/**
+ * i40iw_cq_request_notification - cq notification request (door bell)
+ * @cq: hw cq
+ * @cq_notify: notification type
+ */
+static void i40iw_cq_request_notification(struct i40iw_cq_uk *cq,
+					  enum i40iw_completion_notify cq_notify)
+{
+	u64 temp_val;
+	u16 sw_cq_sel;
+	u8 arm_next_se = 0;
+	u8 arm_next = 0;
+	u8 arm_seq_num;
+
+	get_64bit_val(cq->shadow_area, 32, &temp_val);
+	arm_seq_num = (u8)RS_64(temp_val, I40IW_CQ_DBSA_ARM_SEQ_NUM);
+	arm_seq_num++;
+
+	sw_cq_sel = (u16)RS_64(temp_val, I40IW_CQ_DBSA_SW_CQ_SELECT);
+	arm_next_se = (u8)RS_64(temp_val, I40IW_CQ_DBSA_ARM_NEXT_SE);
+	arm_next_se |= 1;
+	if (cq_notify == IW_CQ_COMPL_EVENT)
+		arm_next = 1;
+	temp_val = LS_64(arm_seq_num, I40IW_CQ_DBSA_ARM_SEQ_NUM) |
+	    LS_64(sw_cq_sel, I40IW_CQ_DBSA_SW_CQ_SELECT) |
+	    LS_64(arm_next_se, I40IW_CQ_DBSA_ARM_NEXT_SE) |
+	    LS_64(arm_next, I40IW_CQ_DBSA_ARM_NEXT);
+
+	set_64bit_val(cq->shadow_area, 32, temp_val);
+
+	wmb(); /* make sure WQE is populated before valid bit is set */
+
+	writel(cq->cq_id, cq->cqe_alloc_reg);
+}
+
+/**
+ * i40iw_cq_post_entries - update tail in shadow memory
+ * @cq: hw cq
+ * @count: # of entries processed
+ */
+static enum i40iw_status_code i40iw_cq_post_entries(struct i40iw_cq_uk *cq,
+						    u8 count)
+{
+	I40IW_RING_MOVE_TAIL_BY_COUNT(cq->cq_ring, count);
+	set_64bit_val(cq->shadow_area, 0,
+		      I40IW_RING_GETCURRENT_HEAD(cq->cq_ring));
+	return 0;
+}
+
+/**
+ * i40iw_cq_poll_completion - get cq completion info
+ * @cq: hw cq
+ * @info: cq poll information returned
+ * @post_cq: update cq tail
+ */
+static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
+						       struct i40iw_cq_poll_info *info,
+						       bool post_cq)
+{
+	u64 comp_ctx, qword0, qword2, qword3, wqe_qword;
+	u64 *cqe, *sw_wqe;
+	struct i40iw_qp_uk *qp;
+	struct i40iw_ring *pring = NULL;
+	u32 wqe_idx, q_type, array_idx = 0;
+	enum i40iw_status_code ret_code = 0;
+	enum i40iw_status_code ret_code2 = 0;
+	bool move_cq_head = true;
+	u8 polarity;
+	u8 addl_frag_cnt, addl_wqes = 0;
+
+	if (cq->avoid_mem_cflct)
+		cqe = (u64 *)I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(cq);
+	else
+		cqe = (u64 *)I40IW_GET_CURRENT_CQ_ELEMENT(cq);
+
+	get_64bit_val(cqe, 24, &qword3);
+	polarity = (u8)RS_64(qword3, I40IW_CQ_VALID);
+
+	if (polarity != cq->polarity)
+		return I40IW_ERR_QUEUE_EMPTY;
+
+	q_type = (u8)RS_64(qword3, I40IW_CQ_SQ);
+	info->error = (bool)RS_64(qword3, I40IW_CQ_ERROR);
+	info->push_dropped = (bool)RS_64(qword3, I40IWCQ_PSHDROP);
+	if (info->error) {
+		info->comp_status = I40IW_COMPL_STATUS_FLUSHED;
+		info->major_err = (bool)RS_64(qword3, I40IW_CQ_MAJERR);
+		info->minor_err = (bool)RS_64(qword3, I40IW_CQ_MINERR);
+	} else {
+		info->comp_status = I40IW_COMPL_STATUS_SUCCESS;
+	}
+
+	get_64bit_val(cqe, 0, &qword0);
+	get_64bit_val(cqe, 16, &qword2);
+
+	info->tcp_seq_num = (u8)RS_64(qword0, I40IWCQ_TCPSEQNUM);
+
+	info->qp_id = (u32)RS_64(qword2, I40IWCQ_QPID);
+
+	get_64bit_val(cqe, 8, &comp_ctx);
+
+	info->solicited_event = (bool)RS_64(qword3, I40IWCQ_SOEVENT);
+	info->is_srq = (bool)RS_64(qword3, I40IWCQ_SRQ);
+
+	qp = (struct i40iw_qp_uk *)(unsigned long)comp_ctx;
+	wqe_idx = (u32)RS_64(qword3, I40IW_CQ_WQEIDX);
+	info->qp_handle = (i40iw_qp_handle)(unsigned long)qp;
+
+	if (q_type == I40IW_CQE_QTYPE_RQ) {
+		array_idx = (wqe_idx * 4) / qp->rq_wqe_size_multiplier;
+		if (info->comp_status == I40IW_COMPL_STATUS_FLUSHED) {
+			info->wr_id = qp->rq_wrid_array[qp->rq_ring.tail];
+			array_idx = qp->rq_ring.tail;
+		} else {
+			info->wr_id = qp->rq_wrid_array[array_idx];
+		}
+
+		info->op_type = I40IW_OP_TYPE_REC;
+		if (qword3 & I40IWCQ_STAG_MASK) {
+			info->stag_invalid_set = true;
+			info->inv_stag = (u32)RS_64(qword2, I40IWCQ_INVSTAG);
+		} else {
+			info->stag_invalid_set = false;
+		}
+		info->bytes_xfered = (u32)RS_64(qword0, I40IWCQ_PAYLDLEN);
+		I40IW_RING_SET_TAIL(qp->rq_ring, array_idx + 1);
+		pring = &qp->rq_ring;
+	} else {
+		if (info->comp_status != I40IW_COMPL_STATUS_FLUSHED) {
+			info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
+			info->bytes_xfered = qp->sq_wrtrk_array[wqe_idx].wr_len;
+
+			info->op_type = (u8)RS_64(qword3, I40IWCQ_OP);
+			sw_wqe = qp->sq_base[wqe_idx].elem;
+			get_64bit_val(sw_wqe, 24, &wqe_qword);
+			addl_frag_cnt =
+			    (u8)RS_64(wqe_qword, I40IWQPSQ_ADDFRAGCNT);
+			i40iw_fragcnt_to_wqesize_sq(addl_frag_cnt + 1, &addl_wqes);
+
+			addl_wqes = (addl_wqes / I40IW_QP_WQE_MIN_SIZE);
+			I40IW_RING_SET_TAIL(qp->sq_ring, (wqe_idx + addl_wqes));
+		} else {
+			do {
+				u8 op_type;
+				u32 tail;
+
+				tail = qp->sq_ring.tail;
+				sw_wqe = qp->sq_base[tail].elem;
+				get_64bit_val(sw_wqe, 24, &wqe_qword);
+				op_type = (u8)RS_64(wqe_qword, I40IWQPSQ_OPCODE);
+				info->op_type = op_type;
+				addl_frag_cnt = (u8)RS_64(wqe_qword, I40IWQPSQ_ADDFRAGCNT);
+				i40iw_fragcnt_to_wqesize_sq(addl_frag_cnt + 1, &addl_wqes);
+				addl_wqes = (addl_wqes / I40IW_QP_WQE_MIN_SIZE);
+				I40IW_RING_SET_TAIL(qp->sq_ring, (tail + addl_wqes));
+				if (op_type != I40IWQP_OP_NOP) {
+					info->wr_id = qp->sq_wrtrk_array[tail].wrid;
+					info->bytes_xfered = qp->sq_wrtrk_array[tail].wr_len;
+					break;
+				}
+			} while (1);
+		}
+		pring = &qp->sq_ring;
+	}
+
+	ret_code = 0;
+
+	if (!ret_code &&
+	    (info->comp_status == I40IW_COMPL_STATUS_FLUSHED))
+		if (pring && (I40IW_RING_MORE_WORK(*pring)))
+			move_cq_head = false;
+
+	if (move_cq_head) {
+		I40IW_RING_MOVE_HEAD(cq->cq_ring, ret_code2);
+
+		if (ret_code2 && !ret_code)
+			ret_code = ret_code2;
+
+		if (I40IW_RING_GETCURRENT_HEAD(cq->cq_ring) == 0)
+			cq->polarity ^= 1;
+
+		if (post_cq) {
+			I40IW_RING_MOVE_TAIL(cq->cq_ring);
+			set_64bit_val(cq->shadow_area, 0,
+				      I40IW_RING_GETCURRENT_HEAD(cq->cq_ring));
+		}
+	} else {
+		if (info->is_srq)
+			return ret_code;
+		qword3 &= ~I40IW_CQ_WQEIDX_MASK;
+		qword3 |= LS_64(pring->tail, I40IW_CQ_WQEIDX);
+		set_64bit_val(cqe, 24, qword3);
+	}
+
+	return ret_code;
+}
+
+/**
+ * i40iw_get_wqe_shift - get shift count for maximum wqe size
+ * @wqdepth: depth of wq required.
+ * @sge: Maximum Scatter Gather Elements wqe
+ * @shift: Returns the shift needed based on sge
+ *
+ * Shift can be used to left shift the wqe size based on sge.
+ * If sge, == 1, shift =0 (wqe_size of 32 bytes), for sge=2 and 3, shift =1
+ * (64 bytes wqes) and 2 otherwise (128 bytes wqe).
+ */
+enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u8 sge, u8 *shift)
+{
+	u32 size;
+
+	*shift = 0;
+	if (sge > 1)
+		*shift = (sge < 4) ? 1 : 2;
+
+	/* check if wqdepth is multiple of 2 or not */
+
+	if ((wqdepth < I40IWQP_SW_MIN_WQSIZE) || (wqdepth & (wqdepth - 1)))
+		return I40IW_ERR_INVALID_SIZE;
+
+	size = wqdepth << *shift;	/* multiple of 32 bytes count */
+	if (size > I40IWQP_SW_MAX_WQSIZE)
+		return I40IW_ERR_INVALID_SIZE;
+	return 0;
+}
+
+static struct i40iw_qp_uk_ops iw_qp_uk_ops = {
+	i40iw_qp_post_wr,
+	i40iw_qp_ring_push_db,
+	i40iw_rdma_write,
+	i40iw_rdma_read,
+	i40iw_send,
+	i40iw_inline_rdma_write,
+	i40iw_inline_send,
+	i40iw_stag_local_invalidate,
+	i40iw_mw_bind,
+	i40iw_post_receive,
+	i40iw_nop
+};
+
+static struct i40iw_cq_ops iw_cq_ops = {
+	i40iw_cq_request_notification,
+	i40iw_cq_poll_completion,
+	i40iw_cq_post_entries,
+	i40iw_clean_cq
+};
+
+static struct i40iw_device_uk_ops iw_device_uk_ops = {
+	i40iw_cq_uk_init,
+	i40iw_qp_uk_init,
+};
+
+/**
+ * i40iw_qp_uk_init - initialize shared qp
+ * @qp: hw qp (user and kernel)
+ * @info: qp initialization info
+ *
+ * initializes the vars used in both user and kernel mode.
+ * size of the wqe depends on numbers of max. fragements
+ * allowed. Then size of wqe * the number of wqes should be the
+ * amount of memory allocated for sq and rq. If srq is used,
+ * then rq_base will point to one rq wqe only (not the whole
+ * array of wqes)
+ */
+enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp,
+					struct i40iw_qp_uk_init_info *info)
+{
+	enum i40iw_status_code ret_code = 0;
+	u32 sq_ring_size;
+	u8 sqshift, rqshift;
+
+	if (info->max_sq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT)
+		return I40IW_ERR_INVALID_FRAG_COUNT;
+
+	if (info->max_rq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT)
+		return I40IW_ERR_INVALID_FRAG_COUNT;
+	ret_code = i40iw_get_wqe_shift(info->sq_size, info->max_sq_frag_cnt, &sqshift);
+	if (ret_code)
+		return ret_code;
+
+	ret_code = i40iw_get_wqe_shift(info->rq_size, info->max_rq_frag_cnt, &rqshift);
+	if (ret_code)
+		return ret_code;
+
+	qp->sq_base = info->sq;
+	qp->rq_base = info->rq;
+	qp->shadow_area = info->shadow_area;
+	qp->sq_wrtrk_array = info->sq_wrtrk_array;
+	qp->rq_wrid_array = info->rq_wrid_array;
+
+	qp->wqe_alloc_reg = info->wqe_alloc_reg;
+	qp->qp_id = info->qp_id;
+
+	qp->sq_size = info->sq_size;
+	qp->push_db = info->push_db;
+	qp->push_wqe = info->push_wqe;
+
+	qp->max_sq_frag_cnt = info->max_sq_frag_cnt;
+	sq_ring_size = qp->sq_size << sqshift;
+
+	I40IW_RING_INIT(qp->sq_ring, sq_ring_size);
+	I40IW_RING_INIT(qp->initial_ring, sq_ring_size);
+	I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
+	I40IW_RING_MOVE_TAIL(qp->sq_ring);
+	I40IW_RING_MOVE_HEAD(qp->initial_ring, ret_code);
+	qp->swqe_polarity = 1;
+	qp->swqe_polarity_deferred = 1;
+	qp->rwqe_polarity = 0;
+
+	if (!qp->use_srq) {
+		qp->rq_size = info->rq_size;
+		qp->max_rq_frag_cnt = info->max_rq_frag_cnt;
+		qp->rq_wqe_size = rqshift;
+		I40IW_RING_INIT(qp->rq_ring, qp->rq_size);
+		qp->rq_wqe_size_multiplier = 4 << rqshift;
+	}
+	qp->ops = iw_qp_uk_ops;
+
+	return ret_code;
+}
+
+/**
+ * i40iw_cq_uk_init - initialize shared cq (user and kernel)
+ * @cq: hw cq
+ * @info: hw cq initialization info
+ */
+enum i40iw_status_code i40iw_cq_uk_init(struct i40iw_cq_uk *cq,
+					struct i40iw_cq_uk_init_info *info)
+{
+	if ((info->cq_size < I40IW_MIN_CQ_SIZE) ||
+	    (info->cq_size > I40IW_MAX_CQ_SIZE))
+		return I40IW_ERR_INVALID_SIZE;
+	cq->cq_base = (struct i40iw_cqe *)info->cq_base;
+	cq->cq_id = info->cq_id;
+	cq->cq_size = info->cq_size;
+	cq->cqe_alloc_reg = info->cqe_alloc_reg;
+	cq->shadow_area = info->shadow_area;
+	cq->avoid_mem_cflct = info->avoid_mem_cflct;
+
+	I40IW_RING_INIT(cq->cq_ring, cq->cq_size);
+	cq->polarity = 1;
+	cq->ops = iw_cq_ops;
+
+	return 0;
+}
+
+/**
+ * i40iw_device_init_uk - setup routines for iwarp shared device
+ * @dev: iwarp shared (user and kernel)
+ */
+void i40iw_device_init_uk(struct i40iw_dev_uk *dev)
+{
+	dev->ops_uk = iw_device_uk_ops;
+}
+
+/**
+ * i40iw_clean_cq - clean cq entries
+ * @ queue completion context
+ * @cq: cq to clean
+ */
+void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq)
+{
+	u64 *cqe;
+	u64 qword3, comp_ctx;
+	u32 cq_head;
+	u8 polarity, temp;
+
+	cq_head = cq->cq_ring.head;
+	temp = cq->polarity;
+	do {
+		if (cq->avoid_mem_cflct)
+			cqe = (u64 *)&(((struct i40iw_extended_cqe *)cq->cq_base)[cq_head]);
+		else
+			cqe = (u64 *)&cq->cq_base[cq_head];
+		get_64bit_val(cqe, 24, &qword3);
+		polarity = (u8)RS_64(qword3, I40IW_CQ_VALID);
+
+		if (polarity != temp)
+			break;
+
+		get_64bit_val(cqe, 8, &comp_ctx);
+		if ((void *)(unsigned long)comp_ctx == queue)
+			set_64bit_val(cqe, 8, 0);
+
+		cq_head = (cq_head + 1) % cq->cq_ring.size;
+		if (!cq_head)
+			temp ^= 1;
+	} while (true);
+}
+
+/**
+ * i40iw_nop - send a nop
+ * @qp: hw qp ptr
+ * @wr_id: work request id
+ * @signaled: flag if signaled for completion
+ * @post_sq: flag to post sq
+ */
+enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp,
+				 u64 wr_id,
+				 bool signaled,
+				 bool post_sq)
+{
+	u64 header, *wqe;
+	u32 wqe_idx;
+
+	wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE);
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+	qp->sq_wrtrk_array[wqe_idx].wrid = wr_id;
+	qp->sq_wrtrk_array[wqe_idx].wr_len = 0;
+	set_64bit_val(wqe, 0, 0);
+	set_64bit_val(wqe, 8, 0);
+	set_64bit_val(wqe, 16, 0);
+
+	header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
+	    LS_64(signaled, I40IWQPSQ_SIGCOMPL) |
+	    LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+	wmb(); /* make sure WQE is populated before valid bit is set */
+
+	set_64bit_val(wqe, 24, header);
+	if (post_sq)
+		i40iw_qp_post_wr(qp);
+
+	return 0;
+}
+
+/**
+ * i40iw_fragcnt_to_wqesize_sq - calculate wqe size based on fragment count for SQ
+ * @frag_cnt: number of fragments
+ * @wqe_size: size of sq wqe returned
+ */
+enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u8 frag_cnt, u8 *wqe_size)
+{
+	switch (frag_cnt) {
+	case 0:
+	case 1:
+		*wqe_size = I40IW_QP_WQE_MIN_SIZE;
+		break;
+	case 2:
+	case 3:
+		*wqe_size = 64;
+		break;
+	case 4:
+	case 5:
+		*wqe_size = 96;
+		break;
+	case 6:
+	case 7:
+		*wqe_size = 128;
+		break;
+	default:
+		return I40IW_ERR_INVALID_FRAG_COUNT;
+	}
+
+	return 0;
+}
+
+/**
+ * i40iw_fragcnt_to_wqesize_rq - calculate wqe size based on fragment count for RQ
+ * @frag_cnt: number of fragments
+ * @wqe_size: size of rq wqe returned
+ */
+enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u8 frag_cnt, u8 *wqe_size)
+{
+	switch (frag_cnt) {
+	case 0:
+	case 1:
+		*wqe_size = 32;
+		break;
+	case 2:
+	case 3:
+		*wqe_size = 64;
+		break;
+	case 4:
+	case 5:
+	case 6:
+	case 7:
+		*wqe_size = 128;
+		break;
+	default:
+		return I40IW_ERR_INVALID_FRAG_COUNT;
+	}
+
+	return 0;
+}
+
+/**
+ * i40iw_inline_data_size_to_wqesize - based on inline data, wqe size
+ * @data_size: data size for inline
+ * @wqe_size: size of sq wqe returned
+ */
+enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size,
+							 u8 *wqe_size)
+{
+	if (data_size > I40IW_MAX_INLINE_DATA_SIZE)
+		return I40IW_ERR_INVALID_IMM_DATA_SIZE;
+
+	if (data_size <= 16)
+		*wqe_size = I40IW_QP_WQE_MIN_SIZE;
+	else if (data_size <= 48)
+		*wqe_size = 64;
+	else if (data_size <= 80)
+		*wqe_size = 96;
+	else
+		*wqe_size = 128;
+
+	return 0;
+}

diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h
new file mode 100644
index 0000000..5cd971b
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_user.h

@@ -0,0 +1,442 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_USER_H
+#define I40IW_USER_H
+
+enum i40iw_device_capabilities_const {
+	I40IW_WQE_SIZE =			4,
+	I40IW_CQP_WQE_SIZE =			8,
+	I40IW_CQE_SIZE =			4,
+	I40IW_EXTENDED_CQE_SIZE =		8,
+	I40IW_AEQE_SIZE =			2,
+	I40IW_CEQE_SIZE =			1,
+	I40IW_CQP_CTX_SIZE =			8,
+	I40IW_SHADOW_AREA_SIZE =		8,
+	I40IW_CEQ_MAX_COUNT =			256,
+	I40IW_QUERY_FPM_BUF_SIZE =		128,
+	I40IW_COMMIT_FPM_BUF_SIZE =		128,
+	I40IW_MIN_IW_QP_ID =			1,
+	I40IW_MAX_IW_QP_ID =			262143,
+	I40IW_MIN_CEQID =			0,
+	I40IW_MAX_CEQID =			256,
+	I40IW_MIN_CQID =			0,
+	I40IW_MAX_CQID =			131071,
+	I40IW_MIN_AEQ_ENTRIES =			1,
+	I40IW_MAX_AEQ_ENTRIES =			524287,
+	I40IW_MIN_CEQ_ENTRIES =			1,
+	I40IW_MAX_CEQ_ENTRIES =			131071,
+	I40IW_MIN_CQ_SIZE =			1,
+	I40IW_MAX_CQ_SIZE =			1048575,
+	I40IW_MAX_AEQ_ALLOCATE_COUNT =		255,
+	I40IW_DB_ID_ZERO =			0,
+	I40IW_MAX_WQ_FRAGMENT_COUNT =		6,
+	I40IW_MAX_SGE_RD =			1,
+	I40IW_MAX_OUTBOUND_MESSAGE_SIZE =	2147483647,
+	I40IW_MAX_INBOUND_MESSAGE_SIZE =	2147483647,
+	I40IW_MAX_PUSH_PAGE_COUNT =		4096,
+	I40IW_MAX_PE_ENABLED_VF_COUNT =		32,
+	I40IW_MAX_VF_FPM_ID =			47,
+	I40IW_MAX_VF_PER_PF =			127,
+	I40IW_MAX_SQ_PAYLOAD_SIZE =		2145386496,
+	I40IW_MAX_INLINE_DATA_SIZE =		112,
+	I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE =	112,
+	I40IW_MAX_IRD_SIZE =			32,
+	I40IW_QPCTX_ENCD_MAXIRD =		3,
+	I40IW_MAX_WQ_ENTRIES =			2048,
+	I40IW_MAX_ORD_SIZE =			32,
+	I40IW_Q2_BUFFER_SIZE =			(248 + 100),
+	I40IW_QP_CTX_SIZE =			248
+};
+
+#define i40iw_handle void *
+#define i40iw_adapter_handle i40iw_handle
+#define i40iw_qp_handle i40iw_handle
+#define i40iw_cq_handle i40iw_handle
+#define i40iw_srq_handle i40iw_handle
+#define i40iw_pd_id i40iw_handle
+#define i40iw_stag_handle i40iw_handle
+#define i40iw_stag_index u32
+#define i40iw_stag u32
+#define i40iw_stag_key u8
+
+#define i40iw_tagged_offset u64
+#define i40iw_access_privileges u32
+#define i40iw_physical_fragment u64
+#define i40iw_address_list u64 *
+
+#define I40IW_CREATE_STAG(index, key)       (((index) << 8) + (key))
+
+#define I40IW_STAG_KEY_FROM_STAG(stag)      ((stag) && 0x000000FF)
+
+#define I40IW_STAG_INDEX_FROM_STAG(stag)    (((stag) && 0xFFFFFF00) >> 8)
+
+struct i40iw_qp_uk;
+struct i40iw_cq_uk;
+struct i40iw_srq_uk;
+struct i40iw_qp_uk_init_info;
+struct i40iw_cq_uk_init_info;
+struct i40iw_srq_uk_init_info;
+
+struct i40iw_sge {
+	i40iw_tagged_offset tag_off;
+	u32 len;
+	i40iw_stag stag;
+};
+
+#define i40iw_sgl struct i40iw_sge *
+
+struct i40iw_ring {
+	u32 head;
+	u32 tail;
+	u32 size;
+};
+
+struct i40iw_cqe {
+	u64 buf[I40IW_CQE_SIZE];
+};
+
+struct i40iw_extended_cqe {
+	u64 buf[I40IW_EXTENDED_CQE_SIZE];
+};
+
+struct i40iw_wqe {
+	u64 buf[I40IW_WQE_SIZE];
+};
+
+struct i40iw_qp_uk_ops;
+
+enum i40iw_addressing_type {
+	I40IW_ADDR_TYPE_ZERO_BASED = 0,
+	I40IW_ADDR_TYPE_VA_BASED = 1,
+};
+
+#define I40IW_ACCESS_FLAGS_LOCALREAD		0x01
+#define I40IW_ACCESS_FLAGS_LOCALWRITE		0x02
+#define I40IW_ACCESS_FLAGS_REMOTEREAD_ONLY	0x04
+#define I40IW_ACCESS_FLAGS_REMOTEREAD		0x05
+#define I40IW_ACCESS_FLAGS_REMOTEWRITE_ONLY	0x08
+#define I40IW_ACCESS_FLAGS_REMOTEWRITE		0x0a
+#define I40IW_ACCESS_FLAGS_BIND_WINDOW		0x10
+#define I40IW_ACCESS_FLAGS_ALL			0x1F
+
+#define I40IW_OP_TYPE_RDMA_WRITE	0
+#define I40IW_OP_TYPE_RDMA_READ		1
+#define I40IW_OP_TYPE_SEND		3
+#define I40IW_OP_TYPE_SEND_INV		4
+#define I40IW_OP_TYPE_SEND_SOL		5
+#define I40IW_OP_TYPE_SEND_SOL_INV	6
+#define I40IW_OP_TYPE_REC		7
+#define I40IW_OP_TYPE_BIND_MW		8
+#define I40IW_OP_TYPE_FAST_REG_NSMR	9
+#define I40IW_OP_TYPE_INV_STAG		10
+#define I40IW_OP_TYPE_RDMA_READ_INV_STAG 11
+#define I40IW_OP_TYPE_NOP		12
+
+enum i40iw_completion_status {
+	I40IW_COMPL_STATUS_SUCCESS = 0,
+	I40IW_COMPL_STATUS_FLUSHED,
+	I40IW_COMPL_STATUS_INVALID_WQE,
+	I40IW_COMPL_STATUS_QP_CATASTROPHIC,
+	I40IW_COMPL_STATUS_REMOTE_TERMINATION,
+	I40IW_COMPL_STATUS_INVALID_STAG,
+	I40IW_COMPL_STATUS_BASE_BOUND_VIOLATION,
+	I40IW_COMPL_STATUS_ACCESS_VIOLATION,
+	I40IW_COMPL_STATUS_INVALID_PD_ID,
+	I40IW_COMPL_STATUS_WRAP_ERROR,
+	I40IW_COMPL_STATUS_STAG_INVALID_PDID,
+	I40IW_COMPL_STATUS_RDMA_READ_ZERO_ORD,
+	I40IW_COMPL_STATUS_QP_NOT_PRIVLEDGED,
+	I40IW_COMPL_STATUS_STAG_NOT_INVALID,
+	I40IW_COMPL_STATUS_INVALID_PHYS_BUFFER_SIZE,
+	I40IW_COMPL_STATUS_INVALID_PHYS_BUFFER_ENTRY,
+	I40IW_COMPL_STATUS_INVALID_FBO,
+	I40IW_COMPL_STATUS_INVALID_LENGTH,
+	I40IW_COMPL_STATUS_INVALID_ACCESS,
+	I40IW_COMPL_STATUS_PHYS_BUFFER_LIST_TOO_LONG,
+	I40IW_COMPL_STATUS_INVALID_VIRT_ADDRESS,
+	I40IW_COMPL_STATUS_INVALID_REGION,
+	I40IW_COMPL_STATUS_INVALID_WINDOW,
+	I40IW_COMPL_STATUS_INVALID_TOTAL_LENGTH
+};
+
+enum i40iw_completion_notify {
+	IW_CQ_COMPL_EVENT = 0,
+	IW_CQ_COMPL_SOLICITED = 1
+};
+
+struct i40iw_post_send {
+	i40iw_sgl sg_list;
+	u8 num_sges;
+};
+
+struct i40iw_post_inline_send {
+	void *data;
+	u32 len;
+};
+
+struct i40iw_post_send_w_inv {
+	i40iw_sgl sg_list;
+	u32 num_sges;
+	i40iw_stag remote_stag_to_inv;
+};
+
+struct i40iw_post_inline_send_w_inv {
+	void *data;
+	u32 len;
+	i40iw_stag remote_stag_to_inv;
+};
+
+struct i40iw_rdma_write {
+	i40iw_sgl lo_sg_list;
+	u8 num_lo_sges;
+	struct i40iw_sge rem_addr;
+};
+
+struct i40iw_inline_rdma_write {
+	void *data;
+	u32 len;
+	struct i40iw_sge rem_addr;
+};
+
+struct i40iw_rdma_read {
+	struct i40iw_sge lo_addr;
+	struct i40iw_sge rem_addr;
+};
+
+struct i40iw_bind_window {
+	i40iw_stag mr_stag;
+	u64 bind_length;
+	void *va;
+	enum i40iw_addressing_type addressing_type;
+	bool enable_reads;
+	bool enable_writes;
+	i40iw_stag mw_stag;
+};
+
+struct i40iw_inv_local_stag {
+	i40iw_stag target_stag;
+};
+
+struct i40iw_post_sq_info {
+	u64 wr_id;
+	u8 op_type;
+	bool signaled;
+	bool read_fence;
+	bool local_fence;
+	bool inline_data;
+	bool defer_flag;
+	union {
+		struct i40iw_post_send send;
+		struct i40iw_post_send send_w_sol;
+		struct i40iw_post_send_w_inv send_w_inv;
+		struct i40iw_post_send_w_inv send_w_sol_inv;
+		struct i40iw_rdma_write rdma_write;
+		struct i40iw_rdma_read rdma_read;
+		struct i40iw_rdma_read rdma_read_inv;
+		struct i40iw_bind_window bind_window;
+		struct i40iw_inv_local_stag inv_local_stag;
+		struct i40iw_inline_rdma_write inline_rdma_write;
+		struct i40iw_post_inline_send inline_send;
+		struct i40iw_post_inline_send inline_send_w_sol;
+		struct i40iw_post_inline_send_w_inv inline_send_w_inv;
+		struct i40iw_post_inline_send_w_inv inline_send_w_sol_inv;
+	} op;
+};
+
+struct i40iw_post_rq_info {
+	u64 wr_id;
+	i40iw_sgl sg_list;
+	u32 num_sges;
+};
+
+struct i40iw_cq_poll_info {
+	u64 wr_id;
+	i40iw_qp_handle qp_handle;
+	u32 bytes_xfered;
+	u32 tcp_seq_num;
+	u32 qp_id;
+	i40iw_stag inv_stag;
+	enum i40iw_completion_status comp_status;
+	u16 major_err;
+	u16 minor_err;
+	u8 op_type;
+	bool stag_invalid_set;
+	bool push_dropped;
+	bool error;
+	bool is_srq;
+	bool solicited_event;
+};
+
+struct i40iw_qp_uk_ops {
+	void (*iw_qp_post_wr)(struct i40iw_qp_uk *);
+	void (*iw_qp_ring_push_db)(struct i40iw_qp_uk *, u32);
+	enum i40iw_status_code (*iw_rdma_write)(struct i40iw_qp_uk *,
+						struct i40iw_post_sq_info *, bool);
+	enum i40iw_status_code (*iw_rdma_read)(struct i40iw_qp_uk *,
+					       struct i40iw_post_sq_info *, bool, bool);
+	enum i40iw_status_code (*iw_send)(struct i40iw_qp_uk *,
+					  struct i40iw_post_sq_info *, u32, bool);
+	enum i40iw_status_code (*iw_inline_rdma_write)(struct i40iw_qp_uk *,
+						       struct i40iw_post_sq_info *, bool);
+	enum i40iw_status_code (*iw_inline_send)(struct i40iw_qp_uk *,
+						 struct i40iw_post_sq_info *, u32, bool);
+	enum i40iw_status_code (*iw_stag_local_invalidate)(struct i40iw_qp_uk *,
+							   struct i40iw_post_sq_info *, bool);
+	enum i40iw_status_code (*iw_mw_bind)(struct i40iw_qp_uk *,
+					     struct i40iw_post_sq_info *, bool);
+	enum i40iw_status_code (*iw_post_receive)(struct i40iw_qp_uk *,
+						  struct i40iw_post_rq_info *);
+	enum i40iw_status_code (*iw_post_nop)(struct i40iw_qp_uk *, u64, bool, bool);
+};
+
+struct i40iw_cq_ops {
+	void (*iw_cq_request_notification)(struct i40iw_cq_uk *,
+					   enum i40iw_completion_notify);
+	enum i40iw_status_code (*iw_cq_poll_completion)(struct i40iw_cq_uk *,
+							struct i40iw_cq_poll_info *, bool);
+	enum i40iw_status_code (*iw_cq_post_entries)(struct i40iw_cq_uk *, u8 count);
+	void (*iw_cq_clean)(void *, struct i40iw_cq_uk *);
+};
+
+struct i40iw_dev_uk;
+
+struct i40iw_device_uk_ops {
+	enum i40iw_status_code (*iwarp_cq_uk_init)(struct i40iw_cq_uk *,
+						   struct i40iw_cq_uk_init_info *);
+	enum i40iw_status_code (*iwarp_qp_uk_init)(struct i40iw_qp_uk *,
+						   struct i40iw_qp_uk_init_info *);
+};
+
+struct i40iw_dev_uk {
+	struct i40iw_device_uk_ops ops_uk;
+};
+
+struct i40iw_sq_uk_wr_trk_info {
+	u64 wrid;
+	u64 wr_len;
+};
+
+struct i40iw_qp_quanta {
+	u64 elem[I40IW_WQE_SIZE];
+};
+
+struct i40iw_qp_uk {
+	struct i40iw_qp_quanta *sq_base;
+	struct i40iw_qp_quanta *rq_base;
+	u32 __iomem *wqe_alloc_reg;
+	struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array;
+	u64 *rq_wrid_array;
+	u64 *shadow_area;
+	u32 *push_db;
+	u64 *push_wqe;
+	struct i40iw_ring sq_ring;
+	struct i40iw_ring rq_ring;
+	struct i40iw_ring initial_ring;
+	u32 qp_id;
+	u32 sq_size;
+	u32 rq_size;
+	struct i40iw_qp_uk_ops ops;
+	bool use_srq;
+	u8 swqe_polarity;
+	u8 swqe_polarity_deferred;
+	u8 rwqe_polarity;
+	u8 rq_wqe_size;
+	u8 rq_wqe_size_multiplier;
+	u8 max_sq_frag_cnt;
+	u8 max_rq_frag_cnt;
+	bool deferred_flag;
+};
+
+struct i40iw_cq_uk {
+	struct i40iw_cqe *cq_base;
+	u32 __iomem *cqe_alloc_reg;
+	u64 *shadow_area;
+	u32 cq_id;
+	u32 cq_size;
+	struct i40iw_ring cq_ring;
+	u8 polarity;
+	bool avoid_mem_cflct;
+
+	struct i40iw_cq_ops ops;
+};
+
+struct i40iw_qp_uk_init_info {
+	struct i40iw_qp_quanta *sq;
+	struct i40iw_qp_quanta *rq;
+	u32 __iomem *wqe_alloc_reg;
+	u64 *shadow_area;
+	struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array;
+	u64 *rq_wrid_array;
+	u32 *push_db;
+	u64 *push_wqe;
+	u32 qp_id;
+	u32 sq_size;
+	u32 rq_size;
+	u8 max_sq_frag_cnt;
+	u8 max_rq_frag_cnt;
+
+};
+
+struct i40iw_cq_uk_init_info {
+	u32 __iomem *cqe_alloc_reg;
+	struct i40iw_cqe *cq_base;
+	u64 *shadow_area;
+	u32 cq_size;
+	u32 cq_id;
+	bool avoid_mem_cflct;
+};
+
+void i40iw_device_init_uk(struct i40iw_dev_uk *dev);
+
+void i40iw_qp_post_wr(struct i40iw_qp_uk *qp);
+u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx,
+				u8 wqe_size);
+u64 *i40iw_qp_get_next_recv_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx);
+u64 *i40iw_qp_get_next_srq_wqe(struct i40iw_srq_uk *srq, u32 *wqe_idx);
+
+enum i40iw_status_code i40iw_cq_uk_init(struct i40iw_cq_uk *cq,
+					struct i40iw_cq_uk_init_info *info);
+enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp,
+					struct i40iw_qp_uk_init_info *info);
+
+void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq);
+enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp, u64 wr_id,
+				 bool signaled, bool post_sq);
+enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u8 frag_cnt, u8 *wqe_size);
+enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u8 frag_cnt, u8 *wqe_size);
+enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size,
+							 u8 *wqe_size);
+enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u8 sge, u8 *shift);
+#endif

diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
new file mode 100644
index 0000000..1ceec81
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c

@@ -0,0 +1,1270 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+#include <net/netevent.h>
+#include <net/neighbour.h>
+#include "i40iw.h"
+
+/**
+ * i40iw_arp_table - manage arp table
+ * @iwdev: iwarp device
+ * @ip_addr: ip address for device
+ * @mac_addr: mac address ptr
+ * @action: modify, delete or add
+ */
+int i40iw_arp_table(struct i40iw_device *iwdev,
+		    __be32 *ip_addr,
+		    bool ipv4,
+		    u8 *mac_addr,
+		    u32 action)
+{
+	int arp_index;
+	int err;
+	u32 ip[4];
+
+	if (ipv4) {
+		memset(ip, 0, sizeof(ip));
+		ip[0] = *ip_addr;
+	} else {
+		memcpy(ip, ip_addr, sizeof(ip));
+	}
+
+	for (arp_index = 0; (u32)arp_index < iwdev->arp_table_size; arp_index++)
+		if (memcmp(iwdev->arp_table[arp_index].ip_addr, ip, sizeof(ip)) == 0)
+			break;
+	switch (action) {
+	case I40IW_ARP_ADD:
+		if (arp_index != iwdev->arp_table_size)
+			return -1;
+
+		arp_index = 0;
+		err = i40iw_alloc_resource(iwdev, iwdev->allocated_arps,
+					   iwdev->arp_table_size,
+					   (u32 *)&arp_index,
+					   &iwdev->next_arp_index);
+
+		if (err)
+			return err;
+
+		memcpy(iwdev->arp_table[arp_index].ip_addr, ip, sizeof(ip));
+		ether_addr_copy(iwdev->arp_table[arp_index].mac_addr, mac_addr);
+		break;
+	case I40IW_ARP_RESOLVE:
+		if (arp_index == iwdev->arp_table_size)
+			return -1;
+		break;
+	case I40IW_ARP_DELETE:
+		if (arp_index == iwdev->arp_table_size)
+			return -1;
+		memset(iwdev->arp_table[arp_index].ip_addr, 0,
+		       sizeof(iwdev->arp_table[arp_index].ip_addr));
+		eth_zero_addr(iwdev->arp_table[arp_index].mac_addr);
+		i40iw_free_resource(iwdev, iwdev->allocated_arps, arp_index);
+		break;
+	default:
+		return -1;
+	}
+	return arp_index;
+}
+
+/**
+ * i40iw_wr32 - write 32 bits to hw register
+ * @hw: hardware information including registers
+ * @reg: register offset
+ * @value: vvalue to write to register
+ */
+inline void i40iw_wr32(struct i40iw_hw *hw, u32 reg, u32 value)
+{
+	writel(value, hw->hw_addr + reg);
+}
+
+/**
+ * i40iw_rd32 - read a 32 bit hw register
+ * @hw: hardware information including registers
+ * @reg: register offset
+ *
+ * Return value of register content
+ */
+inline u32 i40iw_rd32(struct i40iw_hw *hw, u32 reg)
+{
+	return readl(hw->hw_addr + reg);
+}
+
+/**
+ * i40iw_inetaddr_event - system notifier for netdev events
+ * @notfier: not used
+ * @event: event for notifier
+ * @ptr: if address
+ */
+int i40iw_inetaddr_event(struct notifier_block *notifier,
+			 unsigned long event,
+			 void *ptr)
+{
+	struct in_ifaddr *ifa = ptr;
+	struct net_device *event_netdev = ifa->ifa_dev->dev;
+	struct net_device *netdev;
+	struct net_device *upper_dev;
+	struct i40iw_device *iwdev;
+	struct i40iw_handler *hdl;
+	__be32 local_ipaddr;
+
+	hdl = i40iw_find_netdev(event_netdev);
+	if (!hdl)
+		return NOTIFY_DONE;
+
+	iwdev = &hdl->device;
+	netdev = iwdev->ldev->netdev;
+	upper_dev = netdev_master_upper_dev_get(netdev);
+	if (netdev != event_netdev)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_DOWN:
+		if (upper_dev)
+			local_ipaddr =
+				((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address;
+		else
+			local_ipaddr = ifa->ifa_address;
+		local_ipaddr = ntohl(local_ipaddr);
+		i40iw_manage_arp_cache(iwdev,
+				       netdev->dev_addr,
+				       &local_ipaddr,
+				       true,
+				       I40IW_ARP_DELETE);
+		return NOTIFY_OK;
+	case NETDEV_UP:
+		if (upper_dev)
+			local_ipaddr =
+				((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address;
+		else
+			local_ipaddr = ifa->ifa_address;
+		local_ipaddr = ntohl(local_ipaddr);
+		i40iw_manage_arp_cache(iwdev,
+				       netdev->dev_addr,
+				       &local_ipaddr,
+				       true,
+				       I40IW_ARP_ADD);
+		break;
+	case NETDEV_CHANGEADDR:
+		/* Add the address to the IP table */
+		if (upper_dev)
+			local_ipaddr =
+				((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address;
+		else
+			local_ipaddr = ifa->ifa_address;
+
+		local_ipaddr = ntohl(local_ipaddr);
+		i40iw_manage_arp_cache(iwdev,
+				       netdev->dev_addr,
+				       &local_ipaddr,
+				       true,
+				       I40IW_ARP_ADD);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+/**
+ * i40iw_inet6addr_event - system notifier for ipv6 netdev events
+ * @notfier: not used
+ * @event: event for notifier
+ * @ptr: if address
+ */
+int i40iw_inet6addr_event(struct notifier_block *notifier,
+			  unsigned long event,
+			  void *ptr)
+{
+	struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
+	struct net_device *event_netdev = ifa->idev->dev;
+	struct net_device *netdev;
+	struct i40iw_device *iwdev;
+	struct i40iw_handler *hdl;
+	__be32 local_ipaddr6[4];
+
+	hdl = i40iw_find_netdev(event_netdev);
+	if (!hdl)
+		return NOTIFY_DONE;
+
+	iwdev = &hdl->device;
+	netdev = iwdev->ldev->netdev;
+	if (netdev != event_netdev)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_DOWN:
+		i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
+		i40iw_manage_arp_cache(iwdev,
+				       netdev->dev_addr,
+				       local_ipaddr6,
+				       false,
+				       I40IW_ARP_DELETE);
+		return NOTIFY_OK;
+	case NETDEV_UP:
+		/* Fall through */
+	case NETDEV_CHANGEADDR:
+		i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
+		i40iw_manage_arp_cache(iwdev,
+				       netdev->dev_addr,
+				       local_ipaddr6,
+				       false,
+				       I40IW_ARP_ADD);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+/**
+ * i40iw_net_event - system notifier for net events
+ * @notfier: not used
+ * @event: event for notifier
+ * @ptr: neighbor
+ */
+int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void *ptr)
+{
+	struct neighbour *neigh = ptr;
+	struct i40iw_device *iwdev;
+	struct i40iw_handler *iwhdl;
+	__be32 *p;
+	u32 local_ipaddr[4];
+
+	switch (event) {
+	case NETEVENT_NEIGH_UPDATE:
+		iwhdl = i40iw_find_netdev((struct net_device *)neigh->dev);
+		if (!iwhdl)
+			return NOTIFY_DONE;
+		iwdev = &iwhdl->device;
+		p = (__be32 *)neigh->primary_key;
+		i40iw_copy_ip_ntohl(local_ipaddr, p);
+		if (neigh->nud_state & NUD_VALID) {
+			i40iw_manage_arp_cache(iwdev,
+					       neigh->ha,
+					       local_ipaddr,
+					       false,
+					       I40IW_ARP_ADD);
+
+		} else {
+			i40iw_manage_arp_cache(iwdev,
+					       neigh->ha,
+					       local_ipaddr,
+					       false,
+					       I40IW_ARP_DELETE);
+		}
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+/**
+ * i40iw_get_cqp_request - get cqp struct
+ * @cqp: device cqp ptr
+ * @wait: cqp to be used in wait mode
+ */
+struct i40iw_cqp_request *i40iw_get_cqp_request(struct i40iw_cqp *cqp, bool wait)
+{
+	struct i40iw_cqp_request *cqp_request = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cqp->req_lock, flags);
+	if (!list_empty(&cqp->cqp_avail_reqs)) {
+		cqp_request = list_entry(cqp->cqp_avail_reqs.next,
+					 struct i40iw_cqp_request, list);
+		list_del_init(&cqp_request->list);
+	}
+	spin_unlock_irqrestore(&cqp->req_lock, flags);
+	if (!cqp_request) {
+		cqp_request = kzalloc(sizeof(*cqp_request), GFP_ATOMIC);
+		if (cqp_request) {
+			cqp_request->dynamic = true;
+			INIT_LIST_HEAD(&cqp_request->list);
+			init_waitqueue_head(&cqp_request->waitq);
+		}
+	}
+	if (!cqp_request) {
+		i40iw_pr_err("CQP Request Fail: No Memory");
+		return NULL;
+	}
+
+	if (wait) {
+		atomic_set(&cqp_request->refcount, 2);
+		cqp_request->waiting = true;
+	} else {
+		atomic_set(&cqp_request->refcount, 1);
+	}
+	return cqp_request;
+}
+
+/**
+ * i40iw_free_cqp_request - free cqp request
+ * @cqp: cqp ptr
+ * @cqp_request: to be put back in cqp list
+ */
+void i40iw_free_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp_request)
+{
+	unsigned long flags;
+
+	if (cqp_request->dynamic) {
+		kfree(cqp_request);
+	} else {
+		cqp_request->request_done = false;
+		cqp_request->callback_fcn = NULL;
+		cqp_request->waiting = false;
+
+		spin_lock_irqsave(&cqp->req_lock, flags);
+		list_add_tail(&cqp_request->list, &cqp->cqp_avail_reqs);
+		spin_unlock_irqrestore(&cqp->req_lock, flags);
+	}
+}
+
+/**
+ * i40iw_put_cqp_request - dec ref count and free if 0
+ * @cqp: cqp ptr
+ * @cqp_request: to be put back in cqp list
+ */
+void i40iw_put_cqp_request(struct i40iw_cqp *cqp,
+			   struct i40iw_cqp_request *cqp_request)
+{
+	if (atomic_dec_and_test(&cqp_request->refcount))
+		i40iw_free_cqp_request(cqp, cqp_request);
+}
+
+/**
+ * i40iw_free_qp - callback after destroy cqp completes
+ * @cqp_request: cqp request for destroy qp
+ * @num: not used
+ */
+static void i40iw_free_qp(struct i40iw_cqp_request *cqp_request, u32 num)
+{
+	struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)cqp_request->param;
+	struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
+	struct i40iw_device *iwdev;
+	u32 qp_num = iwqp->ibqp.qp_num;
+
+	iwdev = iwqp->iwdev;
+
+	i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
+	i40iw_free_qp_resources(iwdev, iwqp, qp_num);
+}
+
+/**
+ * i40iw_wait_event - wait for completion
+ * @iwdev: iwarp device
+ * @cqp_request: cqp request to wait
+ */
+static int i40iw_wait_event(struct i40iw_device *iwdev,
+			    struct i40iw_cqp_request *cqp_request)
+{
+	struct cqp_commands_info *info = &cqp_request->info;
+	struct i40iw_cqp *iwcqp = &iwdev->cqp;
+	bool cqp_error = false;
+	int err_code = 0;
+	int timeout_ret = 0;
+
+	timeout_ret = wait_event_timeout(cqp_request->waitq,
+					 cqp_request->request_done,
+					 I40IW_EVENT_TIMEOUT);
+	if (!timeout_ret) {
+		i40iw_pr_err("error cqp command 0x%x timed out ret = %d\n",
+			     info->cqp_cmd, timeout_ret);
+		err_code = -ETIME;
+		i40iw_request_reset(iwdev);
+		goto done;
+	}
+	cqp_error = cqp_request->compl_info.error;
+	if (cqp_error) {
+		i40iw_pr_err("error cqp command 0x%x completion maj = 0x%x min=0x%x\n",
+			     info->cqp_cmd, cqp_request->compl_info.maj_err_code,
+			     cqp_request->compl_info.min_err_code);
+		err_code = -EPROTO;
+		goto done;
+	}
+done:
+	i40iw_put_cqp_request(iwcqp, cqp_request);
+	return err_code;
+}
+
+/**
+ * i40iw_handle_cqp_op - process cqp command
+ * @iwdev: iwarp device
+ * @cqp_request: cqp request to process
+ */
+enum i40iw_status_code i40iw_handle_cqp_op(struct i40iw_device *iwdev,
+					   struct i40iw_cqp_request
+					   *cqp_request)
+{
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	enum i40iw_status_code status;
+	struct cqp_commands_info *info = &cqp_request->info;
+	int err_code = 0;
+
+	status = i40iw_process_cqp_cmd(dev, info);
+	if (status) {
+		i40iw_pr_err("error cqp command 0x%x failed\n", info->cqp_cmd);
+		i40iw_free_cqp_request(&iwdev->cqp, cqp_request);
+		return status;
+	}
+	if (cqp_request->waiting)
+		err_code = i40iw_wait_event(iwdev, cqp_request);
+	if (err_code)
+		status = I40IW_ERR_CQP_COMPL_ERROR;
+	return status;
+}
+
+/**
+ * i40iw_add_pdusecount - add pd refcount
+ * @iwpd: pd for refcount
+ */
+void i40iw_add_pdusecount(struct i40iw_pd *iwpd)
+{
+	atomic_inc(&iwpd->usecount);
+}
+
+/**
+ * i40iw_rem_pdusecount - decrement refcount for pd and free if 0
+ * @iwpd: pd for refcount
+ * @iwdev: iwarp device
+ */
+void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev)
+{
+	if (!atomic_dec_and_test(&iwpd->usecount))
+		return;
+	i40iw_free_resource(iwdev, iwdev->allocated_pds, iwpd->sc_pd.pd_id);
+	kfree(iwpd);
+}
+
+/**
+ * i40iw_add_ref - add refcount for qp
+ * @ibqp: iqarp qp
+ */
+void i40iw_add_ref(struct ib_qp *ibqp)
+{
+	struct i40iw_qp *iwqp = (struct i40iw_qp *)ibqp;
+
+	atomic_inc(&iwqp->refcount);
+}
+
+/**
+ * i40iw_rem_ref - rem refcount for qp and free if 0
+ * @ibqp: iqarp qp
+ */
+void i40iw_rem_ref(struct ib_qp *ibqp)
+{
+	struct i40iw_qp *iwqp;
+	enum i40iw_status_code status;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	struct i40iw_device *iwdev;
+	u32 qp_num;
+
+	iwqp = to_iwqp(ibqp);
+	if (!atomic_dec_and_test(&iwqp->refcount))
+		return;
+
+	iwdev = iwqp->iwdev;
+	qp_num = iwqp->ibqp.qp_num;
+	iwdev->qp_table[qp_num] = NULL;
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
+	if (!cqp_request)
+		return;
+
+	cqp_request->callback_fcn = i40iw_free_qp;
+	cqp_request->param = (void *)&iwqp->sc_qp;
+	cqp_info = &cqp_request->info;
+	cqp_info->cqp_cmd = OP_QP_DESTROY;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.qp_destroy.qp = &iwqp->sc_qp;
+	cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request;
+	cqp_info->in.u.qp_destroy.remove_hash_idx = true;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Destroy QP fail");
+}
+
+/**
+ * i40iw_get_qp - get qp address
+ * @device: iwarp device
+ * @qpn: qp number
+ */
+struct ib_qp *i40iw_get_qp(struct ib_device *device, int qpn)
+{
+	struct i40iw_device *iwdev = to_iwdev(device);
+
+	if ((qpn < IW_FIRST_QPN) || (qpn >= iwdev->max_qp))
+		return NULL;
+
+	return &iwdev->qp_table[qpn]->ibqp;
+}
+
+/**
+ * i40iw_debug_buf - print debug msg and buffer is mask set
+ * @dev: hardware control device structure
+ * @mask: mask to compare if to print debug buffer
+ * @buf: points buffer addr
+ * @size: saize of buffer to print
+ */
+void i40iw_debug_buf(struct i40iw_sc_dev *dev,
+		     enum i40iw_debug_flag mask,
+		     char *desc,
+		     u64 *buf,
+		     u32 size)
+{
+	u32 i;
+
+	if (!(dev->debug_mask & mask))
+		return;
+	i40iw_debug(dev, mask, "%s\n", desc);
+	i40iw_debug(dev, mask, "starting address virt=%p phy=%llxh\n", buf,
+		    (unsigned long long)virt_to_phys(buf));
+
+	for (i = 0; i < size; i += 8)
+		i40iw_debug(dev, mask, "index %03d val: %016llx\n", i, buf[i / 8]);
+}
+
+/**
+ * i40iw_get_hw_addr - return hw addr
+ * @par: points to shared dev
+ */
+u8 __iomem *i40iw_get_hw_addr(void *par)
+{
+	struct i40iw_sc_dev *dev = (struct i40iw_sc_dev *)par;
+
+	return dev->hw->hw_addr;
+}
+
+/**
+ * i40iw_remove_head - return head entry and remove from list
+ * @list: list for entry
+ */
+void *i40iw_remove_head(struct list_head *list)
+{
+	struct list_head *entry;
+
+	if (list_empty(list))
+		return NULL;
+
+	entry = (void *)list->next;
+	list_del(entry);
+	return (void *)entry;
+}
+
+/**
+ * i40iw_allocate_dma_mem - Memory alloc helper fn
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to fill out
+ * @size: size of memory requested
+ * @alignment: what to align the allocation to
+ */
+enum i40iw_status_code i40iw_allocate_dma_mem(struct i40iw_hw *hw,
+					      struct i40iw_dma_mem *mem,
+					      u64 size,
+					      u32 alignment)
+{
+	struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+
+	if (!mem)
+		return I40IW_ERR_PARAM;
+	mem->size = ALIGN(size, alignment);
+	mem->va = dma_zalloc_coherent(&pcidev->dev, mem->size,
+				      (dma_addr_t *)&mem->pa, GFP_KERNEL);
+	if (!mem->va)
+		return I40IW_ERR_NO_MEMORY;
+	return 0;
+}
+
+/**
+ * i40iw_free_dma_mem - Memory free helper fn
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to free
+ */
+void i40iw_free_dma_mem(struct i40iw_hw *hw, struct i40iw_dma_mem *mem)
+{
+	struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+
+	if (!mem || !mem->va)
+		return;
+
+	dma_free_coherent(&pcidev->dev, mem->size,
+			  mem->va, (dma_addr_t)mem->pa);
+	mem->va = NULL;
+}
+
+/**
+ * i40iw_allocate_virt_mem - virtual memory alloc helper fn
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to fill out
+ * @size: size of memory requested
+ */
+enum i40iw_status_code i40iw_allocate_virt_mem(struct i40iw_hw *hw,
+					       struct i40iw_virt_mem *mem,
+					       u32 size)
+{
+	if (!mem)
+		return I40IW_ERR_PARAM;
+
+	mem->size = size;
+	mem->va = kzalloc(size, GFP_KERNEL);
+
+	if (mem->va)
+		return 0;
+	else
+		return I40IW_ERR_NO_MEMORY;
+}
+
+/**
+ * i40iw_free_virt_mem - virtual memory free helper fn
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to free
+ */
+enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw,
+					   struct i40iw_virt_mem *mem)
+{
+	if (!mem)
+		return I40IW_ERR_PARAM;
+	kfree(mem->va);
+	mem->va = NULL;
+	return 0;
+}
+
+/**
+ * i40iw_cqp_sds_cmd - create cqp command for sd
+ * @dev: hardware control device structure
+ * @sd_info: information  for sd cqp
+ *
+ */
+enum i40iw_status_code i40iw_cqp_sds_cmd(struct i40iw_sc_dev *dev,
+					 struct i40iw_update_sds_info *sdinfo)
+{
+	enum i40iw_status_code status;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+	if (!cqp_request)
+		return I40IW_ERR_NO_MEMORY;
+	cqp_info = &cqp_request->info;
+	memcpy(&cqp_info->in.u.update_pe_sds.info, sdinfo,
+	       sizeof(cqp_info->in.u.update_pe_sds.info));
+	cqp_info->cqp_cmd = OP_UPDATE_PE_SDS;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.update_pe_sds.dev = dev;
+	cqp_info->in.u.update_pe_sds.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Update SD's fail");
+	return status;
+}
+
+/**
+ * i40iw_term_modify_qp - modify qp for term message
+ * @qp: hardware control qp
+ * @next_state: qp's next state
+ * @term: terminate code
+ * @term_len: length
+ */
+void i40iw_term_modify_qp(struct i40iw_sc_qp *qp, u8 next_state, u8 term, u8 term_len)
+{
+	struct i40iw_qp *iwqp;
+
+	iwqp = (struct i40iw_qp *)qp->back_qp;
+	i40iw_next_iw_state(iwqp, next_state, 0, term, term_len);
+};
+
+/**
+ * i40iw_terminate_done - after terminate is completed
+ * @qp: hardware control qp
+ * @timeout_occurred: indicates if terminate timer expired
+ */
+void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred)
+{
+	struct i40iw_qp *iwqp;
+	u32 next_iwarp_state = I40IW_QP_STATE_ERROR;
+	u8 hte = 0;
+	bool first_time;
+	unsigned long flags;
+
+	iwqp = (struct i40iw_qp *)qp->back_qp;
+	spin_lock_irqsave(&iwqp->lock, flags);
+	if (iwqp->hte_added) {
+		iwqp->hte_added = 0;
+		hte = 1;
+	}
+	first_time = !(qp->term_flags & I40IW_TERM_DONE);
+	qp->term_flags |= I40IW_TERM_DONE;
+	spin_unlock_irqrestore(&iwqp->lock, flags);
+	if (first_time) {
+		if (!timeout_occurred)
+			i40iw_terminate_del_timer(qp);
+		else
+			next_iwarp_state = I40IW_QP_STATE_CLOSING;
+
+		i40iw_next_iw_state(iwqp, next_iwarp_state, hte, 0, 0);
+		i40iw_cm_disconn(iwqp);
+	}
+}
+
+/**
+ * i40iw_terminate_imeout - timeout happened
+ * @context: points to iwarp qp
+ */
+static void i40iw_terminate_timeout(unsigned long context)
+{
+	struct i40iw_qp *iwqp = (struct i40iw_qp *)context;
+	struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)&iwqp->sc_qp;
+
+	i40iw_terminate_done(qp, 1);
+}
+
+/**
+ * i40iw_terminate_start_timer - start terminate timeout
+ * @qp: hardware control qp
+ */
+void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp)
+{
+	struct i40iw_qp *iwqp;
+
+	iwqp = (struct i40iw_qp *)qp->back_qp;
+	init_timer(&iwqp->terminate_timer);
+	iwqp->terminate_timer.function = i40iw_terminate_timeout;
+	iwqp->terminate_timer.expires = jiffies + HZ;
+	iwqp->terminate_timer.data = (unsigned long)iwqp;
+	add_timer(&iwqp->terminate_timer);
+}
+
+/**
+ * i40iw_terminate_del_timer - delete terminate timeout
+ * @qp: hardware control qp
+ */
+void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp)
+{
+	struct i40iw_qp *iwqp;
+
+	iwqp = (struct i40iw_qp *)qp->back_qp;
+	del_timer(&iwqp->terminate_timer);
+}
+
+/**
+ * i40iw_cqp_generic_worker - generic worker for cqp
+ * @work: work pointer
+ */
+static void i40iw_cqp_generic_worker(struct work_struct *work)
+{
+	struct i40iw_virtchnl_work_info *work_info =
+	    &((struct virtchnl_work *)work)->work_info;
+
+	if (work_info->worker_vf_dev)
+		work_info->callback_fcn(work_info->worker_vf_dev);
+}
+
+/**
+ * i40iw_cqp_spawn_worker - spawn worket thread
+ * @iwdev: device struct pointer
+ * @work_info: work request info
+ * @iw_vf_idx: virtual function index
+ */
+void i40iw_cqp_spawn_worker(struct i40iw_sc_dev *dev,
+			    struct i40iw_virtchnl_work_info *work_info,
+			    u32 iw_vf_idx)
+{
+	struct virtchnl_work *work;
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+	work = &iwdev->virtchnl_w[iw_vf_idx];
+	memcpy(&work->work_info, work_info, sizeof(*work_info));
+	INIT_WORK(&work->work, i40iw_cqp_generic_worker);
+	queue_work(iwdev->virtchnl_wq, &work->work);
+}
+
+/**
+ * i40iw_cqp_manage_hmc_fcn_worker -
+ * @work: work pointer for hmc info
+ */
+static void i40iw_cqp_manage_hmc_fcn_worker(struct work_struct *work)
+{
+	struct i40iw_cqp_request *cqp_request =
+	    ((struct virtchnl_work *)work)->cqp_request;
+	struct i40iw_ccq_cqe_info ccq_cqe_info;
+	struct i40iw_hmc_fcn_info *hmcfcninfo =
+			&cqp_request->info.in.u.manage_hmc_pm.info;
+	struct i40iw_device *iwdev =
+	    (struct i40iw_device *)cqp_request->info.in.u.manage_hmc_pm.dev->back_dev;
+
+	ccq_cqe_info.cqp = NULL;
+	ccq_cqe_info.maj_err_code = cqp_request->compl_info.maj_err_code;
+	ccq_cqe_info.min_err_code = cqp_request->compl_info.min_err_code;
+	ccq_cqe_info.op_code = cqp_request->compl_info.op_code;
+	ccq_cqe_info.op_ret_val = cqp_request->compl_info.op_ret_val;
+	ccq_cqe_info.scratch = 0;
+	ccq_cqe_info.error = cqp_request->compl_info.error;
+	hmcfcninfo->callback_fcn(cqp_request->info.in.u.manage_hmc_pm.dev,
+				 hmcfcninfo->cqp_callback_param, &ccq_cqe_info);
+	i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
+}
+
+/**
+ * i40iw_cqp_manage_hmc_fcn_callback - called function after cqp completion
+ * @cqp_request: cqp request info struct for hmc fun
+ * @unused: unused param of callback
+ */
+static void i40iw_cqp_manage_hmc_fcn_callback(struct i40iw_cqp_request *cqp_request,
+					      u32 unused)
+{
+	struct virtchnl_work *work;
+	struct i40iw_hmc_fcn_info *hmcfcninfo =
+	    &cqp_request->info.in.u.manage_hmc_pm.info;
+	struct i40iw_device *iwdev =
+	    (struct i40iw_device *)cqp_request->info.in.u.manage_hmc_pm.dev->
+	    back_dev;
+
+	if (hmcfcninfo && hmcfcninfo->callback_fcn) {
+		i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_HMC, "%s1\n", __func__);
+		atomic_inc(&cqp_request->refcount);
+		work = &iwdev->virtchnl_w[hmcfcninfo->iw_vf_idx];
+		work->cqp_request = cqp_request;
+		INIT_WORK(&work->work, i40iw_cqp_manage_hmc_fcn_worker);
+		queue_work(iwdev->virtchnl_wq, &work->work);
+		i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_HMC, "%s2\n", __func__);
+	} else {
+		i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_HMC, "%s: Something wrong\n", __func__);
+	}
+}
+
+/**
+ * i40iw_cqp_manage_hmc_fcn_cmd - issue cqp command to manage hmc
+ * @dev: hardware control device structure
+ * @hmcfcninfo: info for hmc
+ */
+enum i40iw_status_code i40iw_cqp_manage_hmc_fcn_cmd(struct i40iw_sc_dev *dev,
+						    struct i40iw_hmc_fcn_info *hmcfcninfo)
+{
+	enum i40iw_status_code status;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+	i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_HMC, "%s\n", __func__);
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
+	if (!cqp_request)
+		return I40IW_ERR_NO_MEMORY;
+	cqp_info = &cqp_request->info;
+	cqp_request->callback_fcn = i40iw_cqp_manage_hmc_fcn_callback;
+	cqp_request->param = hmcfcninfo;
+	memcpy(&cqp_info->in.u.manage_hmc_pm.info, hmcfcninfo,
+	       sizeof(*hmcfcninfo));
+	cqp_info->in.u.manage_hmc_pm.dev = dev;
+	cqp_info->cqp_cmd = OP_MANAGE_HMC_PM_FUNC_TABLE;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.manage_hmc_pm.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Manage HMC fail");
+	return status;
+}
+
+/**
+ * i40iw_cqp_query_fpm_values_cmd - send cqp command for fpm
+ * @iwdev: function device struct
+ * @values_mem: buffer for fpm
+ * @hmc_fn_id: function id for fpm
+ */
+enum i40iw_status_code i40iw_cqp_query_fpm_values_cmd(struct i40iw_sc_dev *dev,
+						      struct i40iw_dma_mem *values_mem,
+						      u8 hmc_fn_id)
+{
+	enum i40iw_status_code status;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+	if (!cqp_request)
+		return I40IW_ERR_NO_MEMORY;
+	cqp_info = &cqp_request->info;
+	cqp_request->param = NULL;
+	cqp_info->in.u.query_fpm_values.cqp = dev->cqp;
+	cqp_info->in.u.query_fpm_values.fpm_values_pa = values_mem->pa;
+	cqp_info->in.u.query_fpm_values.fpm_values_va = values_mem->va;
+	cqp_info->in.u.query_fpm_values.hmc_fn_id = hmc_fn_id;
+	cqp_info->cqp_cmd = OP_QUERY_FPM_VALUES;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.query_fpm_values.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Query FPM fail");
+	return status;
+}
+
+/**
+ * i40iw_cqp_commit_fpm_values_cmd - commit fpm values in hw
+ * @dev: hardware control device structure
+ * @values_mem: buffer with fpm values
+ * @hmc_fn_id: function id for fpm
+ */
+enum i40iw_status_code i40iw_cqp_commit_fpm_values_cmd(struct i40iw_sc_dev *dev,
+						       struct i40iw_dma_mem *values_mem,
+						       u8 hmc_fn_id)
+{
+	enum i40iw_status_code status;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+	if (!cqp_request)
+		return I40IW_ERR_NO_MEMORY;
+	cqp_info = &cqp_request->info;
+	cqp_request->param = NULL;
+	cqp_info->in.u.commit_fpm_values.cqp = dev->cqp;
+	cqp_info->in.u.commit_fpm_values.fpm_values_pa = values_mem->pa;
+	cqp_info->in.u.commit_fpm_values.fpm_values_va = values_mem->va;
+	cqp_info->in.u.commit_fpm_values.hmc_fn_id = hmc_fn_id;
+	cqp_info->cqp_cmd = OP_COMMIT_FPM_VALUES;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.commit_fpm_values.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Commit FPM fail");
+	return status;
+}
+
+/**
+ * i40iw_vf_wait_vchnl_resp - wait for channel msg
+ * @iwdev: function's device struct
+ */
+enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev)
+{
+	struct i40iw_device *iwdev = dev->back_dev;
+	enum i40iw_status_code err_code = 0;
+	int timeout_ret;
+
+	i40iw_debug(dev, I40IW_DEBUG_VIRT, "%s[%u] dev %p, iwdev %p\n",
+		    __func__, __LINE__, dev, iwdev);
+	atomic_add(2, &iwdev->vchnl_msgs);
+	timeout_ret = wait_event_timeout(iwdev->vchnl_waitq,
+					 (atomic_read(&iwdev->vchnl_msgs) == 1),
+					 I40IW_VCHNL_EVENT_TIMEOUT);
+	atomic_dec(&iwdev->vchnl_msgs);
+	if (!timeout_ret) {
+		i40iw_pr_err("virt channel completion timeout = 0x%x\n", timeout_ret);
+		err_code = I40IW_ERR_TIMEOUT;
+	}
+	return err_code;
+}
+
+/**
+ * i40iw_ieq_mpa_crc_ae - generate AE for crc error
+ * @dev: hardware control device structure
+ * @qp: hardware control qp
+ */
+void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
+{
+	struct i40iw_qp_flush_info info;
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+	i40iw_debug(dev, I40IW_DEBUG_AEQ, "%s entered\n", __func__);
+	memset(&info, 0, sizeof(info));
+	info.ae_code = I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR;
+	info.generate_ae = true;
+	info.ae_source = 0x3;
+	(void)i40iw_hw_flush_wqes(iwdev, qp, &info, false);
+}
+
+/**
+ * i40iw_init_hash_desc - initialize hash for crc calculation
+ * @desc: cryption type
+ */
+enum i40iw_status_code i40iw_init_hash_desc(struct shash_desc **desc)
+{
+	struct crypto_shash *tfm;
+	struct shash_desc *tdesc;
+
+	tfm = crypto_alloc_shash("crc32c", 0, 0);
+	if (IS_ERR(tfm))
+		return I40IW_ERR_MPA_CRC;
+
+	tdesc = kzalloc(sizeof(*tdesc) + crypto_shash_descsize(tfm),
+			GFP_KERNEL);
+	if (!tdesc) {
+		crypto_free_shash(tfm);
+		return I40IW_ERR_MPA_CRC;
+	}
+	tdesc->tfm = tfm;
+	*desc = tdesc;
+
+	return 0;
+}
+
+/**
+ * i40iw_free_hash_desc - free hash desc
+ * @desc: to be freed
+ */
+void i40iw_free_hash_desc(struct shash_desc *desc)
+{
+	if (desc) {
+		crypto_free_shash(desc->tfm);
+		kfree(desc);
+	}
+}
+
+/**
+ * i40iw_alloc_query_fpm_buf - allocate buffer for fpm
+ * @dev: hardware control device structure
+ * @mem: buffer ptr for fpm to be allocated
+ * @return: memory allocation status
+ */
+enum i40iw_status_code i40iw_alloc_query_fpm_buf(struct i40iw_sc_dev *dev,
+						 struct i40iw_dma_mem *mem)
+{
+	enum i40iw_status_code status;
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+	status = i40iw_obj_aligned_mem(iwdev, mem, I40IW_QUERY_FPM_BUF_SIZE,
+				       I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK);
+	return status;
+}
+
+/**
+ * i40iw_ieq_check_mpacrc - check if mpa crc is OK
+ * @desc: desc for hash
+ * @addr: address of buffer for crc
+ * @length: length of buffer
+ * @value: value to be compared
+ */
+enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc,
+					      void *addr,
+					      u32 length,
+					      u32 value)
+{
+	u32 crc = 0;
+	int ret;
+	enum i40iw_status_code ret_code = 0;
+
+	crypto_shash_init(desc);
+	ret = crypto_shash_update(desc, addr, length);
+	if (!ret)
+		crypto_shash_final(desc, (u8 *)&crc);
+	if (crc != value) {
+		i40iw_pr_err("mpa crc check fail\n");
+		ret_code = I40IW_ERR_MPA_CRC;
+	}
+	return ret_code;
+}
+
+/**
+ * i40iw_ieq_get_qp - get qp based on quad in puda buffer
+ * @dev: hardware control device structure
+ * @buf: receive puda buffer on exception q
+ */
+struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev,
+				     struct i40iw_puda_buf *buf)
+{
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+	struct i40iw_qp *iwqp;
+	struct i40iw_cm_node *cm_node;
+	u32 loc_addr[4], rem_addr[4];
+	u16 loc_port, rem_port;
+	struct ipv6hdr *ip6h;
+	struct iphdr *iph = (struct iphdr *)buf->iph;
+	struct tcphdr *tcph = (struct tcphdr *)buf->tcph;
+
+	if (iph->version == 4) {
+		memset(loc_addr, 0, sizeof(loc_addr));
+		loc_addr[0] = ntohl(iph->daddr);
+		memset(rem_addr, 0, sizeof(rem_addr));
+		rem_addr[0] = ntohl(iph->saddr);
+	} else {
+		ip6h = (struct ipv6hdr *)buf->iph;
+		i40iw_copy_ip_ntohl(loc_addr, ip6h->daddr.in6_u.u6_addr32);
+		i40iw_copy_ip_ntohl(rem_addr, ip6h->saddr.in6_u.u6_addr32);
+	}
+	loc_port = ntohs(tcph->dest);
+	rem_port = ntohs(tcph->source);
+
+	cm_node = i40iw_find_node(&iwdev->cm_core, rem_port, rem_addr, loc_port,
+				  loc_addr, false);
+	if (!cm_node)
+		return NULL;
+	iwqp = cm_node->iwqp;
+	return &iwqp->sc_qp;
+}
+
+/**
+ * i40iw_ieq_update_tcpip_info - update tcpip in the buffer
+ * @buf: puda to update
+ * @length: length of buffer
+ * @seqnum: seq number for tcp
+ */
+void i40iw_ieq_update_tcpip_info(struct i40iw_puda_buf *buf, u16 length, u32 seqnum)
+{
+	struct tcphdr *tcph;
+	struct iphdr *iph;
+	u16 iphlen;
+	u16 packetsize;
+	u8 *addr = (u8 *)buf->mem.va;
+
+	iphlen = (buf->ipv4) ? 20 : 40;
+	iph = (struct iphdr *)(addr + buf->maclen);
+	tcph = (struct tcphdr *)(addr + buf->maclen + iphlen);
+	packetsize = length + buf->tcphlen + iphlen;
+
+	iph->tot_len = htons(packetsize);
+	tcph->seq = htonl(seqnum);
+}
+
+/**
+ * i40iw_puda_get_tcpip_info - get tcpip info from puda buffer
+ * @info: to get information
+ * @buf: puda buffer
+ */
+enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_info *info,
+						 struct i40iw_puda_buf *buf)
+{
+	struct iphdr *iph;
+	struct ipv6hdr *ip6h;
+	struct tcphdr *tcph;
+	u16 iphlen;
+	u16 pkt_len;
+	u8 *mem = (u8 *)buf->mem.va;
+	struct ethhdr *ethh = (struct ethhdr *)buf->mem.va;
+
+	if (ethh->h_proto == htons(0x8100)) {
+		info->vlan_valid = true;
+		buf->vlan_id = ntohs(((struct vlan_ethhdr *)ethh)->h_vlan_TCI) & VLAN_VID_MASK;
+	}
+	buf->maclen = (info->vlan_valid) ? 18 : 14;
+	iphlen = (info->l3proto) ? 40 : 20;
+	buf->ipv4 = (info->l3proto) ? false : true;
+	buf->iph = mem + buf->maclen;
+	iph = (struct iphdr *)buf->iph;
+
+	buf->tcph = buf->iph + iphlen;
+	tcph = (struct tcphdr *)buf->tcph;
+
+	if (buf->ipv4) {
+		pkt_len = ntohs(iph->tot_len);
+	} else {
+		ip6h = (struct ipv6hdr *)buf->iph;
+		pkt_len = ntohs(ip6h->payload_len) + iphlen;
+	}
+
+	buf->totallen = pkt_len + buf->maclen;
+
+	if (info->payload_len < buf->totallen - 4) {
+		i40iw_pr_err("payload_len = 0x%x totallen expected0x%x\n",
+			     info->payload_len, buf->totallen);
+		return I40IW_ERR_INVALID_SIZE;
+	}
+
+	buf->tcphlen = (tcph->doff) << 2;
+	buf->datalen = pkt_len - iphlen - buf->tcphlen;
+	buf->data = (buf->datalen) ? buf->tcph + buf->tcphlen : NULL;
+	buf->hdrlen = buf->maclen + iphlen + buf->tcphlen;
+	buf->seqnum = ntohl(tcph->seq);
+	return 0;
+}
+
+/**
+ * i40iw_hw_stats_timeout - Stats timer-handler which updates all HW stats
+ * @dev: hardware control device structure
+ */
+static void i40iw_hw_stats_timeout(unsigned long dev)
+{
+	struct i40iw_sc_dev *pf_dev = (struct i40iw_sc_dev *)dev;
+	struct i40iw_dev_pestat *pf_devstat = &pf_dev->dev_pestat;
+	struct i40iw_dev_pestat *vf_devstat = NULL;
+	u16 iw_vf_idx;
+	unsigned long flags;
+
+	/*PF*/
+	pf_devstat->ops.iw_hw_stat_read_all(pf_devstat, &pf_devstat->hw_stats);
+	for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) {
+		spin_lock_irqsave(&pf_devstat->stats_lock, flags);
+		if (pf_dev->vf_dev[iw_vf_idx]) {
+			if (pf_dev->vf_dev[iw_vf_idx]->stats_initialized) {
+				vf_devstat = &pf_dev->vf_dev[iw_vf_idx]->dev_pestat;
+				vf_devstat->ops.iw_hw_stat_read_all(vf_devstat, &vf_devstat->hw_stats);
+			}
+		}
+		spin_unlock_irqrestore(&pf_devstat->stats_lock, flags);
+	}
+
+	mod_timer(&pf_devstat->stats_timer,
+		  jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
+}
+
+/**
+ * i40iw_hw_stats_start_timer - Start periodic stats timer
+ * @dev: hardware control device structure
+ */
+void i40iw_hw_stats_start_timer(struct i40iw_sc_dev *dev)
+{
+	struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
+
+	init_timer(&devstat->stats_timer);
+	devstat->stats_timer.function = i40iw_hw_stats_timeout;
+	devstat->stats_timer.data = (unsigned long)dev;
+	mod_timer(&devstat->stats_timer,
+		  jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
+}
+
+/**
+ * i40iw_hw_stats_del_timer - Delete periodic stats timer
+ * @dev: hardware control device structure
+ */
+void i40iw_hw_stats_del_timer(struct i40iw_sc_dev *dev)
+{
+	struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
+
+	del_timer_sync(&devstat->stats_timer);
+}

diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
new file mode 100644
index 0000000..1fe3b84
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c

@@ -0,0 +1,2437 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/random.h>
+#include <linux/highmem.h>
+#include <linux/time.h>
+#include <asm/byteorder.h>
+#include <net/ip.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/iw_cm.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+#include "i40iw.h"
+
+/**
+ * i40iw_query_device - get device attributes
+ * @ibdev: device pointer from stack
+ * @props: returning device attributes
+ * @udata: user data
+ */
+static int i40iw_query_device(struct ib_device *ibdev,
+			      struct ib_device_attr *props,
+			      struct ib_udata *udata)
+{
+	struct i40iw_device *iwdev = to_iwdev(ibdev);
+
+	if (udata->inlen || udata->outlen)
+		return -EINVAL;
+	memset(props, 0, sizeof(*props));
+	ether_addr_copy((u8 *)&props->sys_image_guid, iwdev->netdev->dev_addr);
+	props->fw_ver = I40IW_FW_VERSION;
+	props->device_cap_flags = iwdev->device_cap_flags;
+	props->vendor_id = iwdev->vendor_id;
+	props->vendor_part_id = iwdev->vendor_part_id;
+	props->hw_ver = (u32)iwdev->sc_dev.hw_rev;
+	props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
+	props->max_qp = iwdev->max_qp;
+	props->max_qp_wr = (I40IW_MAX_WQ_ENTRIES >> 2) - 1;
+	props->max_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
+	props->max_cq = iwdev->max_cq;
+	props->max_cqe = iwdev->max_cqe;
+	props->max_mr = iwdev->max_mr;
+	props->max_pd = iwdev->max_pd;
+	props->max_sge_rd = 1;
+	props->max_qp_rd_atom = I40IW_MAX_IRD_SIZE;
+	props->max_qp_init_rd_atom = props->max_qp_rd_atom;
+	props->atomic_cap = IB_ATOMIC_NONE;
+	props->max_map_per_fmr = 1;
+	return 0;
+}
+
+/**
+ * i40iw_query_port - get port attrubutes
+ * @ibdev: device pointer from stack
+ * @port: port number for query
+ * @props: returning device attributes
+ */
+static int i40iw_query_port(struct ib_device *ibdev,
+			    u8 port,
+			    struct ib_port_attr *props)
+{
+	struct i40iw_device *iwdev = to_iwdev(ibdev);
+	struct net_device *netdev = iwdev->netdev;
+
+	memset(props, 0, sizeof(*props));
+
+	props->max_mtu = IB_MTU_4096;
+	if (netdev->mtu >= 4096)
+		props->active_mtu = IB_MTU_4096;
+	else if (netdev->mtu >= 2048)
+		props->active_mtu = IB_MTU_2048;
+	else if (netdev->mtu >= 1024)
+		props->active_mtu = IB_MTU_1024;
+	else if (netdev->mtu >= 512)
+		props->active_mtu = IB_MTU_512;
+	else
+		props->active_mtu = IB_MTU_256;
+
+	props->lid = 1;
+	if (netif_carrier_ok(iwdev->netdev))
+		props->state = IB_PORT_ACTIVE;
+	else
+		props->state = IB_PORT_DOWN;
+	props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
+		IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
+	props->gid_tbl_len = 1;
+	props->pkey_tbl_len = 1;
+	props->active_width = IB_WIDTH_4X;
+	props->active_speed = 1;
+	props->max_msg_sz = 0x80000000;
+	return 0;
+}
+
+/**
+ * i40iw_alloc_ucontext - Allocate the user context data structure
+ * @ibdev: device pointer from stack
+ * @udata: user data
+ *
+ * This keeps track of all objects associated with a particular
+ * user-mode client.
+ */
+static struct ib_ucontext *i40iw_alloc_ucontext(struct ib_device *ibdev,
+						struct ib_udata *udata)
+{
+	struct i40iw_device *iwdev = to_iwdev(ibdev);
+	struct i40iw_alloc_ucontext_req req;
+	struct i40iw_alloc_ucontext_resp uresp;
+	struct i40iw_ucontext *ucontext;
+
+	if (ib_copy_from_udata(&req, udata, sizeof(req)))
+		return ERR_PTR(-EINVAL);
+
+	if (req.userspace_ver != I40IW_ABI_USERSPACE_VER) {
+		i40iw_pr_err("Invalid userspace driver version detected. Detected version %d, should be %d\n",
+			     req.userspace_ver, I40IW_ABI_USERSPACE_VER);
+		return ERR_PTR(-EINVAL);
+	}
+
+	memset(&uresp, 0, sizeof(uresp));
+	uresp.max_qps = iwdev->max_qp;
+	uresp.max_pds = iwdev->max_pd;
+	uresp.wq_size = iwdev->max_qp_wr * 2;
+	uresp.kernel_ver = I40IW_ABI_KERNEL_VER;
+
+	ucontext = kzalloc(sizeof(*ucontext), GFP_KERNEL);
+	if (!ucontext)
+		return ERR_PTR(-ENOMEM);
+
+	ucontext->iwdev = iwdev;
+
+	if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
+		kfree(ucontext);
+		return ERR_PTR(-EFAULT);
+	}
+
+	INIT_LIST_HEAD(&ucontext->cq_reg_mem_list);
+	spin_lock_init(&ucontext->cq_reg_mem_list_lock);
+	INIT_LIST_HEAD(&ucontext->qp_reg_mem_list);
+	spin_lock_init(&ucontext->qp_reg_mem_list_lock);
+
+	return &ucontext->ibucontext;
+}
+
+/**
+ * i40iw_dealloc_ucontext - deallocate the user context data structure
+ * @context: user context created during alloc
+ */
+static int i40iw_dealloc_ucontext(struct ib_ucontext *context)
+{
+	struct i40iw_ucontext *ucontext = to_ucontext(context);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+	if (!list_empty(&ucontext->cq_reg_mem_list)) {
+		spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+		return -EBUSY;
+	}
+	spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+	spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+	if (!list_empty(&ucontext->qp_reg_mem_list)) {
+		spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+		return -EBUSY;
+	}
+	spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+
+	kfree(ucontext);
+	return 0;
+}
+
+/**
+ * i40iw_mmap - user memory map
+ * @context: context created during alloc
+ * @vma: kernel info for user memory map
+ */
+static int i40iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	struct i40iw_ucontext *ucontext;
+	u64 db_addr_offset;
+	u64 push_offset;
+
+	ucontext = to_ucontext(context);
+	if (ucontext->iwdev->sc_dev.is_pf) {
+		db_addr_offset = I40IW_DB_ADDR_OFFSET;
+		push_offset = I40IW_PUSH_OFFSET;
+		if (vma->vm_pgoff)
+			vma->vm_pgoff += I40IW_PF_FIRST_PUSH_PAGE_INDEX - 1;
+	} else {
+		db_addr_offset = I40IW_VF_DB_ADDR_OFFSET;
+		push_offset = I40IW_VF_PUSH_OFFSET;
+		if (vma->vm_pgoff)
+			vma->vm_pgoff += I40IW_VF_FIRST_PUSH_PAGE_INDEX - 1;
+	}
+
+	vma->vm_pgoff += db_addr_offset >> PAGE_SHIFT;
+
+	if (vma->vm_pgoff == (db_addr_offset >> PAGE_SHIFT)) {
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+		vma->vm_private_data = ucontext;
+	} else {
+		if ((vma->vm_pgoff - (push_offset >> PAGE_SHIFT)) % 2)
+			vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+		else
+			vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	}
+
+	if (io_remap_pfn_range(vma, vma->vm_start,
+			       vma->vm_pgoff + (pci_resource_start(ucontext->iwdev->ldev->pcidev, 0) >> PAGE_SHIFT),
+			       PAGE_SIZE, vma->vm_page_prot))
+		return -EAGAIN;
+
+	return 0;
+}
+
+/**
+ * i40iw_alloc_push_page - allocate a push page for qp
+ * @iwdev: iwarp device
+ * @qp: hardware control qp
+ */
+static void i40iw_alloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp)
+{
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	enum i40iw_status_code status;
+
+	if (qp->push_idx != I40IW_INVALID_PUSH_PAGE_INDEX)
+		return;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+	if (!cqp_request)
+		return;
+
+	atomic_inc(&cqp_request->refcount);
+
+	cqp_info = &cqp_request->info;
+	cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE;
+	cqp_info->post_sq = 1;
+
+	cqp_info->in.u.manage_push_page.info.qs_handle = dev->qs_handle;
+	cqp_info->in.u.manage_push_page.info.free_page = 0;
+	cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp;
+	cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
+
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (!status)
+		qp->push_idx = cqp_request->compl_info.op_ret_val;
+	else
+		i40iw_pr_err("CQP-OP Push page fail");
+	i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
+}
+
+/**
+ * i40iw_dealloc_push_page - free a push page for qp
+ * @iwdev: iwarp device
+ * @qp: hardware control qp
+ */
+static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp)
+{
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	enum i40iw_status_code status;
+
+	if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX)
+		return;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
+	if (!cqp_request)
+		return;
+
+	cqp_info = &cqp_request->info;
+	cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE;
+	cqp_info->post_sq = 1;
+
+	cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx;
+	cqp_info->in.u.manage_push_page.info.qs_handle = dev->qs_handle;
+	cqp_info->in.u.manage_push_page.info.free_page = 1;
+	cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp;
+	cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
+
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (!status)
+		qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
+	else
+		i40iw_pr_err("CQP-OP Push page fail");
+}
+
+/**
+ * i40iw_alloc_pd - allocate protection domain
+ * @ibdev: device pointer from stack
+ * @context: user context created during alloc
+ * @udata: user data
+ */
+static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev,
+				    struct ib_ucontext *context,
+				    struct ib_udata *udata)
+{
+	struct i40iw_pd *iwpd;
+	struct i40iw_device *iwdev = to_iwdev(ibdev);
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_alloc_pd_resp uresp;
+	struct i40iw_sc_pd *sc_pd;
+	u32 pd_id = 0;
+	int err;
+
+	err = i40iw_alloc_resource(iwdev, iwdev->allocated_pds,
+				   iwdev->max_pd, &pd_id, &iwdev->next_pd);
+	if (err) {
+		i40iw_pr_err("alloc resource failed\n");
+		return ERR_PTR(err);
+	}
+
+	iwpd = kzalloc(sizeof(*iwpd), GFP_KERNEL);
+	if (!iwpd) {
+		err = -ENOMEM;
+		goto free_res;
+	}
+
+	sc_pd = &iwpd->sc_pd;
+	dev->iw_pd_ops->pd_init(dev, sc_pd, pd_id);
+
+	if (context) {
+		memset(&uresp, 0, sizeof(uresp));
+		uresp.pd_id = pd_id;
+		if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
+			err = -EFAULT;
+			goto error;
+		}
+	}
+
+	i40iw_add_pdusecount(iwpd);
+	return &iwpd->ibpd;
+error:
+	kfree(iwpd);
+free_res:
+	i40iw_free_resource(iwdev, iwdev->allocated_pds, pd_id);
+	return ERR_PTR(err);
+}
+
+/**
+ * i40iw_dealloc_pd - deallocate pd
+ * @ibpd: ptr of pd to be deallocated
+ */
+static int i40iw_dealloc_pd(struct ib_pd *ibpd)
+{
+	struct i40iw_pd *iwpd = to_iwpd(ibpd);
+	struct i40iw_device *iwdev = to_iwdev(ibpd->device);
+
+	i40iw_rem_pdusecount(iwpd, iwdev);
+	return 0;
+}
+
+/**
+ * i40iw_qp_roundup - return round up qp ring size
+ * @wr_ring_size: ring size to round up
+ */
+static int i40iw_qp_roundup(u32 wr_ring_size)
+{
+	int scount = 1;
+
+	if (wr_ring_size < I40IWQP_SW_MIN_WQSIZE)
+		wr_ring_size = I40IWQP_SW_MIN_WQSIZE;
+
+	for (wr_ring_size--; scount <= 16; scount *= 2)
+		wr_ring_size |= wr_ring_size >> scount;
+	return ++wr_ring_size;
+}
+
+/**
+ * i40iw_get_pbl - Retrieve pbl from a list given a virtual
+ * address
+ * @va: user virtual address
+ * @pbl_list: pbl list to search in (QP's or CQ's)
+ */
+static struct i40iw_pbl *i40iw_get_pbl(unsigned long va,
+				       struct list_head *pbl_list)
+{
+	struct i40iw_pbl *iwpbl;
+
+	list_for_each_entry(iwpbl, pbl_list, list) {
+		if (iwpbl->user_base == va) {
+			list_del(&iwpbl->list);
+			return iwpbl;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * i40iw_free_qp_resources - free up memory resources for qp
+ * @iwdev: iwarp device
+ * @iwqp: qp ptr (user or kernel)
+ * @qp_num: qp number assigned
+ */
+void i40iw_free_qp_resources(struct i40iw_device *iwdev,
+			     struct i40iw_qp *iwqp,
+			     u32 qp_num)
+{
+	i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp);
+	if (qp_num)
+		i40iw_free_resource(iwdev, iwdev->allocated_qps, qp_num);
+	i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->q2_ctx_mem);
+	i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->kqp.dma_mem);
+	kfree(iwqp->kqp.wrid_mem);
+	iwqp->kqp.wrid_mem = NULL;
+	kfree(iwqp->allocated_buffer);
+	iwqp->allocated_buffer = NULL;
+}
+
+/**
+ * i40iw_clean_cqes - clean cq entries for qp
+ * @iwqp: qp ptr (user or kernel)
+ * @iwcq: cq ptr
+ */
+static void i40iw_clean_cqes(struct i40iw_qp *iwqp, struct i40iw_cq *iwcq)
+{
+	struct i40iw_cq_uk *ukcq = &iwcq->sc_cq.cq_uk;
+
+	ukcq->ops.iw_cq_clean(&iwqp->sc_qp.qp_uk, ukcq);
+}
+
+/**
+ * i40iw_destroy_qp - destroy qp
+ * @ibqp: qp's ib pointer also to get to device's qp address
+ */
+static int i40iw_destroy_qp(struct ib_qp *ibqp)
+{
+	struct i40iw_qp *iwqp = to_iwqp(ibqp);
+
+	iwqp->destroyed = 1;
+
+	if (iwqp->ibqp_state >= IB_QPS_INIT && iwqp->ibqp_state < IB_QPS_RTS)
+		i40iw_next_iw_state(iwqp, I40IW_QP_STATE_ERROR, 0, 0, 0);
+
+	if (!iwqp->user_mode) {
+		if (iwqp->iwscq) {
+			i40iw_clean_cqes(iwqp, iwqp->iwscq);
+			if (iwqp->iwrcq != iwqp->iwscq)
+				i40iw_clean_cqes(iwqp, iwqp->iwrcq);
+		}
+	}
+
+	i40iw_rem_ref(&iwqp->ibqp);
+	return 0;
+}
+
+/**
+ * i40iw_setup_virt_qp - setup for allocation of virtual qp
+ * @dev: iwarp device
+ * @qp: qp ptr
+ * @init_info: initialize info to return
+ */
+static int i40iw_setup_virt_qp(struct i40iw_device *iwdev,
+			       struct i40iw_qp *iwqp,
+			       struct i40iw_qp_init_info *init_info)
+{
+	struct i40iw_pbl *iwpbl = iwqp->iwpbl;
+	struct i40iw_qp_mr *qpmr = &iwpbl->qp_mr;
+
+	iwqp->page = qpmr->sq_page;
+	init_info->shadow_area_pa = cpu_to_le64(qpmr->shadow);
+	if (iwpbl->pbl_allocated) {
+		init_info->virtual_map = true;
+		init_info->sq_pa = qpmr->sq_pbl.idx;
+		init_info->rq_pa = qpmr->rq_pbl.idx;
+	} else {
+		init_info->sq_pa = qpmr->sq_pbl.addr;
+		init_info->rq_pa = qpmr->rq_pbl.addr;
+	}
+	return 0;
+}
+
+/**
+ * i40iw_setup_kmode_qp - setup initialization for kernel mode qp
+ * @iwdev: iwarp device
+ * @iwqp: qp ptr (user or kernel)
+ * @info: initialize info to return
+ */
+static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev,
+				struct i40iw_qp *iwqp,
+				struct i40iw_qp_init_info *info)
+{
+	struct i40iw_dma_mem *mem = &iwqp->kqp.dma_mem;
+	u32 sqdepth, rqdepth;
+	u32 sq_size, rq_size;
+	u8 sqshift, rqshift;
+	u32 size;
+	enum i40iw_status_code status;
+	struct i40iw_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
+
+	ukinfo->max_sq_frag_cnt = I40IW_MAX_WQ_FRAGMENT_COUNT;
+
+	sq_size = i40iw_qp_roundup(ukinfo->sq_size + 1);
+	rq_size = i40iw_qp_roundup(ukinfo->rq_size + 1);
+
+	status = i40iw_get_wqe_shift(sq_size, ukinfo->max_sq_frag_cnt, &sqshift);
+	if (!status)
+		status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, &rqshift);
+
+	if (status)
+		return -ENOSYS;
+
+	sqdepth = sq_size << sqshift;
+	rqdepth = rq_size << rqshift;
+
+	size = sqdepth * sizeof(struct i40iw_sq_uk_wr_trk_info) + (rqdepth << 3);
+	iwqp->kqp.wrid_mem = kzalloc(size, GFP_KERNEL);
+
+	ukinfo->sq_wrtrk_array = (struct i40iw_sq_uk_wr_trk_info *)iwqp->kqp.wrid_mem;
+	if (!ukinfo->sq_wrtrk_array)
+		return -ENOMEM;
+
+	ukinfo->rq_wrid_array = (u64 *)&ukinfo->sq_wrtrk_array[sqdepth];
+
+	size = (sqdepth + rqdepth) * I40IW_QP_WQE_MIN_SIZE;
+	size += (I40IW_SHADOW_AREA_SIZE << 3);
+
+	status = i40iw_allocate_dma_mem(iwdev->sc_dev.hw, mem, size, 256);
+	if (status) {
+		kfree(ukinfo->sq_wrtrk_array);
+		ukinfo->sq_wrtrk_array = NULL;
+		return -ENOMEM;
+	}
+
+	ukinfo->sq = mem->va;
+	info->sq_pa = mem->pa;
+
+	ukinfo->rq = &ukinfo->sq[sqdepth];
+	info->rq_pa = info->sq_pa + (sqdepth * I40IW_QP_WQE_MIN_SIZE);
+
+	ukinfo->shadow_area = ukinfo->rq[rqdepth].elem;
+	info->shadow_area_pa = info->rq_pa + (rqdepth * I40IW_QP_WQE_MIN_SIZE);
+
+	ukinfo->sq_size = sq_size;
+	ukinfo->rq_size = rq_size;
+	ukinfo->qp_id = iwqp->ibqp.qp_num;
+	return 0;
+}
+
+/**
+ * i40iw_create_qp - create qp
+ * @ibpd: ptr of pd
+ * @init_attr: attributes for qp
+ * @udata: user data for create qp
+ */
+static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
+				     struct ib_qp_init_attr *init_attr,
+				     struct ib_udata *udata)
+{
+	struct i40iw_pd *iwpd = to_iwpd(ibpd);
+	struct i40iw_device *iwdev = to_iwdev(ibpd->device);
+	struct i40iw_cqp *iwcqp = &iwdev->cqp;
+	struct i40iw_qp *iwqp;
+	struct i40iw_ucontext *ucontext;
+	struct i40iw_create_qp_req req;
+	struct i40iw_create_qp_resp uresp;
+	u32 qp_num = 0;
+	void *mem;
+	enum i40iw_status_code ret;
+	int err_code;
+	int sq_size;
+	int rq_size;
+	struct i40iw_sc_qp *qp;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_qp_init_info init_info;
+	struct i40iw_create_qp_info *qp_info;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+
+	struct i40iw_qp_host_ctx_info *ctx_info;
+	struct i40iwarp_offload_info *iwarp_info;
+	unsigned long flags;
+
+	if (init_attr->create_flags)
+		return ERR_PTR(-EINVAL);
+	if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE)
+		init_attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE;
+
+	memset(&init_info, 0, sizeof(init_info));
+
+	sq_size = init_attr->cap.max_send_wr;
+	rq_size = init_attr->cap.max_recv_wr;
+
+	init_info.qp_uk_init_info.sq_size = sq_size;
+	init_info.qp_uk_init_info.rq_size = rq_size;
+	init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge;
+	init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge;
+
+	mem = kzalloc(sizeof(*iwqp), GFP_KERNEL);
+	if (!mem)
+		return ERR_PTR(-ENOMEM);
+
+	iwqp = (struct i40iw_qp *)mem;
+	qp = &iwqp->sc_qp;
+	qp->back_qp = (void *)iwqp;
+	qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
+
+	iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info;
+
+	if (i40iw_allocate_dma_mem(dev->hw,
+				   &iwqp->q2_ctx_mem,
+				   I40IW_Q2_BUFFER_SIZE + I40IW_QP_CTX_SIZE,
+				   256)) {
+		i40iw_pr_err("dma_mem failed\n");
+		err_code = -ENOMEM;
+		goto error;
+	}
+
+	init_info.q2 = iwqp->q2_ctx_mem.va;
+	init_info.q2_pa = iwqp->q2_ctx_mem.pa;
+
+	init_info.host_ctx = (void *)init_info.q2 + I40IW_Q2_BUFFER_SIZE;
+	init_info.host_ctx_pa = init_info.q2_pa + I40IW_Q2_BUFFER_SIZE;
+
+	err_code = i40iw_alloc_resource(iwdev, iwdev->allocated_qps, iwdev->max_qp,
+					&qp_num, &iwdev->next_qp);
+	if (err_code) {
+		i40iw_pr_err("qp resource\n");
+		goto error;
+	}
+
+	iwqp->allocated_buffer = mem;
+	iwqp->iwdev = iwdev;
+	iwqp->iwpd = iwpd;
+	iwqp->ibqp.qp_num = qp_num;
+	qp = &iwqp->sc_qp;
+	iwqp->iwscq = to_iwcq(init_attr->send_cq);
+	iwqp->iwrcq = to_iwcq(init_attr->recv_cq);
+
+	iwqp->host_ctx.va = init_info.host_ctx;
+	iwqp->host_ctx.pa = init_info.host_ctx_pa;
+	iwqp->host_ctx.size = I40IW_QP_CTX_SIZE;
+
+	init_info.pd = &iwpd->sc_pd;
+	init_info.qp_uk_init_info.qp_id = iwqp->ibqp.qp_num;
+	iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp;
+
+	if (init_attr->qp_type != IB_QPT_RC) {
+		err_code = -ENOSYS;
+		goto error;
+	}
+	if (iwdev->push_mode)
+		i40iw_alloc_push_page(iwdev, qp);
+	if (udata) {
+		err_code = ib_copy_from_udata(&req, udata, sizeof(req));
+		if (err_code) {
+			i40iw_pr_err("ib_copy_from_data\n");
+			goto error;
+		}
+		iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx;
+		if (ibpd->uobject && ibpd->uobject->context) {
+			iwqp->user_mode = 1;
+			ucontext = to_ucontext(ibpd->uobject->context);
+
+			if (req.user_wqe_buffers) {
+				spin_lock_irqsave(
+				    &ucontext->qp_reg_mem_list_lock, flags);
+				iwqp->iwpbl = i40iw_get_pbl(
+				    (unsigned long)req.user_wqe_buffers,
+				    &ucontext->qp_reg_mem_list);
+				spin_unlock_irqrestore(
+				    &ucontext->qp_reg_mem_list_lock, flags);
+
+				if (!iwqp->iwpbl) {
+					err_code = -ENODATA;
+					i40iw_pr_err("no pbl info\n");
+					goto error;
+				}
+			}
+		}
+		err_code = i40iw_setup_virt_qp(iwdev, iwqp, &init_info);
+	} else {
+		err_code = i40iw_setup_kmode_qp(iwdev, iwqp, &init_info);
+	}
+
+	if (err_code) {
+		i40iw_pr_err("setup qp failed\n");
+		goto error;
+	}
+
+	init_info.type = I40IW_QP_TYPE_IWARP;
+	ret = dev->iw_priv_qp_ops->qp_init(qp, &init_info);
+	if (ret) {
+		err_code = -EPROTO;
+		i40iw_pr_err("qp_init fail\n");
+		goto error;
+	}
+	ctx_info = &iwqp->ctx_info;
+	iwarp_info = &iwqp->iwarp_info;
+	iwarp_info->rd_enable = true;
+	iwarp_info->wr_rdresp_en = true;
+	if (!iwqp->user_mode)
+		iwarp_info->priv_mode_en = true;
+	iwarp_info->ddp_ver = 1;
+	iwarp_info->rdmap_ver = 1;
+
+	ctx_info->iwarp_info_valid = true;
+	ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+	ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+	if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX) {
+		ctx_info->push_mode_en = false;
+	} else {
+		ctx_info->push_mode_en = true;
+		ctx_info->push_idx = qp->push_idx;
+	}
+
+	ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
+					     (u64 *)iwqp->host_ctx.va,
+					     ctx_info);
+	ctx_info->iwarp_info_valid = false;
+	cqp_request = i40iw_get_cqp_request(iwcqp, true);
+	if (!cqp_request) {
+		err_code = -ENOMEM;
+		goto error;
+	}
+	cqp_info = &cqp_request->info;
+	qp_info = &cqp_request->info.in.u.qp_create.info;
+
+	memset(qp_info, 0, sizeof(*qp_info));
+
+	qp_info->cq_num_valid = true;
+	qp_info->next_iwarp_state = I40IW_QP_STATE_IDLE;
+
+	cqp_info->cqp_cmd = OP_QP_CREATE;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.qp_create.qp = qp;
+	cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request;
+	ret = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (ret) {
+		i40iw_pr_err("CQP-OP QP create fail");
+		err_code = -EACCES;
+		goto error;
+	}
+
+	i40iw_add_ref(&iwqp->ibqp);
+	spin_lock_init(&iwqp->lock);
+	iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
+	iwdev->qp_table[qp_num] = iwqp;
+	i40iw_add_pdusecount(iwqp->iwpd);
+	if (ibpd->uobject && udata) {
+		memset(&uresp, 0, sizeof(uresp));
+		uresp.actual_sq_size = sq_size;
+		uresp.actual_rq_size = rq_size;
+		uresp.qp_id = qp_num;
+		uresp.push_idx = qp->push_idx;
+		err_code = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+		if (err_code) {
+			i40iw_pr_err("copy_to_udata failed\n");
+			i40iw_destroy_qp(&iwqp->ibqp);
+			   /* let the completion of the qp destroy free the qp */
+			return ERR_PTR(err_code);
+		}
+	}
+
+	return &iwqp->ibqp;
+error:
+	i40iw_free_qp_resources(iwdev, iwqp, qp_num);
+	kfree(mem);
+	return ERR_PTR(err_code);
+}
+
+/**
+ * i40iw_query - query qp attributes
+ * @ibqp: qp pointer
+ * @attr: attributes pointer
+ * @attr_mask: Not used
+ * @init_attr: qp attributes to return
+ */
+static int i40iw_query_qp(struct ib_qp *ibqp,
+			  struct ib_qp_attr *attr,
+			  int attr_mask,
+			  struct ib_qp_init_attr *init_attr)
+{
+	struct i40iw_qp *iwqp = to_iwqp(ibqp);
+	struct i40iw_sc_qp *qp = &iwqp->sc_qp;
+
+	attr->qp_access_flags = 0;
+	attr->cap.max_send_wr = qp->qp_uk.sq_size;
+	attr->cap.max_recv_wr = qp->qp_uk.rq_size;
+	attr->cap.max_recv_sge = 1;
+	attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE;
+	init_attr->event_handler = iwqp->ibqp.event_handler;
+	init_attr->qp_context = iwqp->ibqp.qp_context;
+	init_attr->send_cq = iwqp->ibqp.send_cq;
+	init_attr->recv_cq = iwqp->ibqp.recv_cq;
+	init_attr->srq = iwqp->ibqp.srq;
+	init_attr->cap = attr->cap;
+	return 0;
+}
+
+/**
+ * i40iw_hw_modify_qp - setup cqp for modify qp
+ * @iwdev: iwarp device
+ * @iwqp: qp ptr (user or kernel)
+ * @info: info for modify qp
+ * @wait: flag to wait or not for modify qp completion
+ */
+void i40iw_hw_modify_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp,
+			struct i40iw_modify_qp_info *info, bool wait)
+{
+	enum i40iw_status_code status;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	struct i40iw_modify_qp_info *m_info;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
+	if (!cqp_request)
+		return;
+
+	cqp_info = &cqp_request->info;
+	m_info = &cqp_info->in.u.qp_modify.info;
+	memcpy(m_info, info, sizeof(*m_info));
+	cqp_info->cqp_cmd = OP_QP_MODIFY;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.qp_modify.qp = &iwqp->sc_qp;
+	cqp_info->in.u.qp_modify.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Modify QP fail");
+}
+
+/**
+ * i40iw_modify_qp - modify qp request
+ * @ibqp: qp's pointer for modify
+ * @attr: access attributes
+ * @attr_mask: state mask
+ * @udata: user data
+ */
+int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		    int attr_mask, struct ib_udata *udata)
+{
+	struct i40iw_qp *iwqp = to_iwqp(ibqp);
+	struct i40iw_device *iwdev = iwqp->iwdev;
+	struct i40iw_qp_host_ctx_info *ctx_info;
+	struct i40iwarp_offload_info *iwarp_info;
+	struct i40iw_modify_qp_info info;
+	u8 issue_modify_qp = 0;
+	u8 dont_wait = 0;
+	u32 err;
+	unsigned long flags;
+
+	memset(&info, 0, sizeof(info));
+	ctx_info = &iwqp->ctx_info;
+	iwarp_info = &iwqp->iwarp_info;
+
+	spin_lock_irqsave(&iwqp->lock, flags);
+
+	if (attr_mask & IB_QP_STATE) {
+		switch (attr->qp_state) {
+		case IB_QPS_INIT:
+		case IB_QPS_RTR:
+			if (iwqp->iwarp_state > (u32)I40IW_QP_STATE_IDLE) {
+				err = -EINVAL;
+				goto exit;
+			}
+			if (iwqp->iwarp_state == I40IW_QP_STATE_INVALID) {
+				info.next_iwarp_state = I40IW_QP_STATE_IDLE;
+				issue_modify_qp = 1;
+			}
+			break;
+		case IB_QPS_RTS:
+			if ((iwqp->iwarp_state > (u32)I40IW_QP_STATE_RTS) ||
+			    (!iwqp->cm_id)) {
+				err = -EINVAL;
+				goto exit;
+			}
+
+			issue_modify_qp = 1;
+			iwqp->hw_tcp_state = I40IW_TCP_STATE_ESTABLISHED;
+			iwqp->hte_added = 1;
+			info.next_iwarp_state = I40IW_QP_STATE_RTS;
+			info.tcp_ctx_valid = true;
+			info.ord_valid = true;
+			info.arp_cache_idx_valid = true;
+			info.cq_num_valid = true;
+			break;
+		case IB_QPS_SQD:
+			if (iwqp->hw_iwarp_state > (u32)I40IW_QP_STATE_RTS) {
+				err = 0;
+				goto exit;
+			}
+			if ((iwqp->iwarp_state == (u32)I40IW_QP_STATE_CLOSING) ||
+			    (iwqp->iwarp_state < (u32)I40IW_QP_STATE_RTS)) {
+				err = 0;
+				goto exit;
+			}
+			if (iwqp->iwarp_state > (u32)I40IW_QP_STATE_CLOSING) {
+				err = -EINVAL;
+				goto exit;
+			}
+			info.next_iwarp_state = I40IW_QP_STATE_CLOSING;
+			issue_modify_qp = 1;
+			break;
+		case IB_QPS_SQE:
+			if (iwqp->iwarp_state >= (u32)I40IW_QP_STATE_TERMINATE) {
+				err = -EINVAL;
+				goto exit;
+			}
+			info.next_iwarp_state = I40IW_QP_STATE_TERMINATE;
+			issue_modify_qp = 1;
+			break;
+		case IB_QPS_ERR:
+		case IB_QPS_RESET:
+			if (iwqp->iwarp_state == (u32)I40IW_QP_STATE_ERROR) {
+				err = -EINVAL;
+				goto exit;
+			}
+			if (iwqp->sc_qp.term_flags)
+				del_timer(&iwqp->terminate_timer);
+			info.next_iwarp_state = I40IW_QP_STATE_ERROR;
+			if ((iwqp->hw_tcp_state > I40IW_TCP_STATE_CLOSED) &&
+			    iwdev->iw_status &&
+			    (iwqp->hw_tcp_state != I40IW_TCP_STATE_TIME_WAIT))
+				info.reset_tcp_conn = true;
+			else
+				dont_wait = 1;
+			issue_modify_qp = 1;
+			info.next_iwarp_state = I40IW_QP_STATE_ERROR;
+			break;
+		default:
+			err = -EINVAL;
+			goto exit;
+		}
+
+		iwqp->ibqp_state = attr->qp_state;
+
+		if (issue_modify_qp)
+			iwqp->iwarp_state = info.next_iwarp_state;
+		else
+			info.next_iwarp_state = iwqp->iwarp_state;
+	}
+	if (attr_mask & IB_QP_ACCESS_FLAGS) {
+		ctx_info->iwarp_info_valid = true;
+		if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE)
+			iwarp_info->wr_rdresp_en = true;
+		if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
+			iwarp_info->wr_rdresp_en = true;
+		if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
+			iwarp_info->rd_enable = true;
+		if (attr->qp_access_flags & IB_ACCESS_MW_BIND)
+			iwarp_info->bind_en = true;
+
+		if (iwqp->user_mode) {
+			iwarp_info->rd_enable = true;
+			iwarp_info->wr_rdresp_en = true;
+			iwarp_info->priv_mode_en = false;
+		}
+	}
+
+	if (ctx_info->iwarp_info_valid) {
+		struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+		int ret;
+
+		ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+		ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+		ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
+						     (u64 *)iwqp->host_ctx.va,
+						     ctx_info);
+		if (ret) {
+			i40iw_pr_err("setting QP context\n");
+			err = -EINVAL;
+			goto exit;
+		}
+	}
+
+	spin_unlock_irqrestore(&iwqp->lock, flags);
+
+	if (issue_modify_qp)
+		i40iw_hw_modify_qp(iwdev, iwqp, &info, true);
+
+	if (issue_modify_qp && (iwqp->ibqp_state > IB_QPS_RTS)) {
+		if (dont_wait) {
+			if (iwqp->cm_id && iwqp->hw_tcp_state) {
+				spin_lock_irqsave(&iwqp->lock, flags);
+				iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSED;
+				iwqp->last_aeq = I40IW_AE_RESET_SENT;
+				spin_unlock_irqrestore(&iwqp->lock, flags);
+			}
+		}
+	}
+	return 0;
+exit:
+	spin_unlock_irqrestore(&iwqp->lock, flags);
+	return err;
+}
+
+/**
+ * cq_free_resources - free up recources for cq
+ * @iwdev: iwarp device
+ * @iwcq: cq ptr
+ */
+static void cq_free_resources(struct i40iw_device *iwdev, struct i40iw_cq *iwcq)
+{
+	struct i40iw_sc_cq *cq = &iwcq->sc_cq;
+
+	if (!iwcq->user_mode)
+		i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwcq->kmem);
+	i40iw_free_resource(iwdev, iwdev->allocated_cqs, cq->cq_uk.cq_id);
+}
+
+/**
+ * cq_wq_destroy - send cq destroy cqp
+ * @iwdev: iwarp device
+ * @cq: hardware control cq
+ */
+static void cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq)
+{
+	enum i40iw_status_code status;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+	if (!cqp_request)
+		return;
+
+	cqp_info = &cqp_request->info;
+
+	cqp_info->cqp_cmd = OP_CQ_DESTROY;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.cq_destroy.cq = cq;
+	cqp_info->in.u.cq_destroy.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Destroy QP fail");
+}
+
+/**
+ * i40iw_destroy_cq - destroy cq
+ * @ib_cq: cq pointer
+ */
+static int i40iw_destroy_cq(struct ib_cq *ib_cq)
+{
+	struct i40iw_cq *iwcq;
+	struct i40iw_device *iwdev;
+	struct i40iw_sc_cq *cq;
+
+	if (!ib_cq) {
+		i40iw_pr_err("ib_cq == NULL\n");
+		return 0;
+	}
+
+	iwcq = to_iwcq(ib_cq);
+	iwdev = to_iwdev(ib_cq->device);
+	cq = &iwcq->sc_cq;
+	cq_wq_destroy(iwdev, cq);
+	cq_free_resources(iwdev, iwcq);
+	kfree(iwcq);
+	return 0;
+}
+
+/**
+ * i40iw_create_cq - create cq
+ * @ibdev: device pointer from stack
+ * @attr: attributes for cq
+ * @context: user context created during alloc
+ * @udata: user data
+ */
+static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev,
+				     const struct ib_cq_init_attr *attr,
+				     struct ib_ucontext *context,
+				     struct ib_udata *udata)
+{
+	struct i40iw_device *iwdev = to_iwdev(ibdev);
+	struct i40iw_cq *iwcq;
+	struct i40iw_pbl *iwpbl;
+	u32 cq_num = 0;
+	struct i40iw_sc_cq *cq;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_cq_init_info info;
+	enum i40iw_status_code status;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	struct i40iw_cq_uk_init_info *ukinfo = &info.cq_uk_init_info;
+	unsigned long flags;
+	int err_code;
+	int entries = attr->cqe;
+
+	if (entries > iwdev->max_cqe)
+		return ERR_PTR(-EINVAL);
+
+	iwcq = kzalloc(sizeof(*iwcq), GFP_KERNEL);
+	if (!iwcq)
+		return ERR_PTR(-ENOMEM);
+
+	memset(&info, 0, sizeof(info));
+
+	err_code = i40iw_alloc_resource(iwdev, iwdev->allocated_cqs,
+					iwdev->max_cq, &cq_num,
+					&iwdev->next_cq);
+	if (err_code)
+		goto error;
+
+	cq = &iwcq->sc_cq;
+	cq->back_cq = (void *)iwcq;
+	spin_lock_init(&iwcq->lock);
+
+	info.dev = dev;
+	ukinfo->cq_size = max(entries, 4);
+	ukinfo->cq_id = cq_num;
+	iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size;
+	info.ceqe_mask = 0;
+	info.ceq_id = 0;
+	info.ceq_id_valid = true;
+	info.ceqe_mask = 1;
+	info.type = I40IW_CQ_TYPE_IWARP;
+	if (context) {
+		struct i40iw_ucontext *ucontext;
+		struct i40iw_create_cq_req req;
+		struct i40iw_cq_mr *cqmr;
+
+		memset(&req, 0, sizeof(req));
+		iwcq->user_mode = true;
+		ucontext = to_ucontext(context);
+		if (ib_copy_from_udata(&req, udata, sizeof(struct i40iw_create_cq_req)))
+			goto cq_free_resources;
+
+		spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+		iwpbl = i40iw_get_pbl((unsigned long)req.user_cq_buffer,
+				      &ucontext->cq_reg_mem_list);
+		spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+		if (!iwpbl) {
+			err_code = -EPROTO;
+			goto cq_free_resources;
+		}
+
+		iwcq->iwpbl = iwpbl;
+		iwcq->cq_mem_size = 0;
+		cqmr = &iwpbl->cq_mr;
+		info.shadow_area_pa = cpu_to_le64(cqmr->shadow);
+		if (iwpbl->pbl_allocated) {
+			info.virtual_map = true;
+			info.pbl_chunk_size = 1;
+			info.first_pm_pbl_idx = cqmr->cq_pbl.idx;
+		} else {
+			info.cq_base_pa = cqmr->cq_pbl.addr;
+		}
+	} else {
+		/* Kmode allocations */
+		int rsize;
+		int shadow;
+
+		rsize = info.cq_uk_init_info.cq_size * sizeof(struct i40iw_cqe);
+		rsize = round_up(rsize, 256);
+		shadow = I40IW_SHADOW_AREA_SIZE << 3;
+		status = i40iw_allocate_dma_mem(dev->hw, &iwcq->kmem,
+						rsize + shadow, 256);
+		if (status) {
+			err_code = -ENOMEM;
+			goto cq_free_resources;
+		}
+		ukinfo->cq_base = iwcq->kmem.va;
+		info.cq_base_pa = iwcq->kmem.pa;
+		info.shadow_area_pa = info.cq_base_pa + rsize;
+		ukinfo->shadow_area = iwcq->kmem.va + rsize;
+	}
+
+	if (dev->iw_priv_cq_ops->cq_init(cq, &info)) {
+		i40iw_pr_err("init cq fail\n");
+		err_code = -EPROTO;
+		goto cq_free_resources;
+	}
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+	if (!cqp_request) {
+		err_code = -ENOMEM;
+		goto cq_free_resources;
+	}
+
+	cqp_info = &cqp_request->info;
+	cqp_info->cqp_cmd = OP_CQ_CREATE;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.cq_create.cq = cq;
+	cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status) {
+		i40iw_pr_err("CQP-OP Create QP fail");
+		err_code = -EPROTO;
+		goto cq_free_resources;
+	}
+
+	if (context) {
+		struct i40iw_create_cq_resp resp;
+
+		memset(&resp, 0, sizeof(resp));
+		resp.cq_id = info.cq_uk_init_info.cq_id;
+		resp.cq_size = info.cq_uk_init_info.cq_size;
+		if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
+			i40iw_pr_err("copy to user data\n");
+			err_code = -EPROTO;
+			goto cq_destroy;
+		}
+	}
+
+	return (struct ib_cq *)iwcq;
+
+cq_destroy:
+	cq_wq_destroy(iwdev, cq);
+cq_free_resources:
+	cq_free_resources(iwdev, iwcq);
+error:
+	kfree(iwcq);
+	return ERR_PTR(err_code);
+}
+
+/**
+ * i40iw_get_user_access - get hw access from IB access
+ * @acc: IB access to return hw access
+ */
+static inline u16 i40iw_get_user_access(int acc)
+{
+	u16 access = 0;
+
+	access |= (acc & IB_ACCESS_LOCAL_WRITE) ? I40IW_ACCESS_FLAGS_LOCALWRITE : 0;
+	access |= (acc & IB_ACCESS_REMOTE_WRITE) ? I40IW_ACCESS_FLAGS_REMOTEWRITE : 0;
+	access |= (acc & IB_ACCESS_REMOTE_READ) ? I40IW_ACCESS_FLAGS_REMOTEREAD : 0;
+	access |= (acc & IB_ACCESS_MW_BIND) ? I40IW_ACCESS_FLAGS_BIND_WINDOW : 0;
+	return access;
+}
+
+/**
+ * i40iw_free_stag - free stag resource
+ * @iwdev: iwarp device
+ * @stag: stag to free
+ */
+static void i40iw_free_stag(struct i40iw_device *iwdev, u32 stag)
+{
+	u32 stag_idx;
+
+	stag_idx = (stag & iwdev->mr_stagmask) >> I40IW_CQPSQ_STAG_IDX_SHIFT;
+	i40iw_free_resource(iwdev, iwdev->allocated_mrs, stag_idx);
+}
+
+/**
+ * i40iw_create_stag - create random stag
+ * @iwdev: iwarp device
+ */
+static u32 i40iw_create_stag(struct i40iw_device *iwdev)
+{
+	u32 stag = 0;
+	u32 stag_index = 0;
+	u32 next_stag_index;
+	u32 driver_key;
+	u32 random;
+	u8 consumer_key;
+	int ret;
+
+	get_random_bytes(&random, sizeof(random));
+	consumer_key = (u8)random;
+
+	driver_key = random & ~iwdev->mr_stagmask;
+	next_stag_index = (random & iwdev->mr_stagmask) >> 8;
+	next_stag_index %= iwdev->max_mr;
+
+	ret = i40iw_alloc_resource(iwdev,
+				   iwdev->allocated_mrs, iwdev->max_mr,
+				   &stag_index, &next_stag_index);
+	if (!ret) {
+		stag = stag_index << I40IW_CQPSQ_STAG_IDX_SHIFT;
+		stag |= driver_key;
+		stag += (u32)consumer_key;
+	}
+	return stag;
+}
+
+/**
+ * i40iw_next_pbl_addr - Get next pbl address
+ * @palloc: Poiner to allocated pbles
+ * @pbl: pointer to a pble
+ * @pinfo: info pointer
+ * @idx: index
+ */
+static inline u64 *i40iw_next_pbl_addr(struct i40iw_pble_alloc *palloc,
+				       u64 *pbl,
+				       struct i40iw_pble_info **pinfo,
+				       u32 *idx)
+{
+	*idx += 1;
+	if ((!(*pinfo)) || (*idx != (*pinfo)->cnt))
+		return ++pbl;
+	*idx = 0;
+	(*pinfo)++;
+	return (u64 *)(*pinfo)->addr;
+}
+
+/**
+ * i40iw_copy_user_pgaddrs - copy user page address to pble's os locally
+ * @iwmr: iwmr for IB's user page addresses
+ * @pbl: ple pointer to save 1 level or 0 level pble
+ * @level: indicated level 0, 1 or 2
+ */
+static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr,
+				    u64 *pbl,
+				    enum i40iw_pble_level level)
+{
+	struct ib_umem *region = iwmr->region;
+	struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
+	int chunk_pages, entry, pg_shift, i;
+	struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
+	struct i40iw_pble_info *pinfo;
+	struct scatterlist *sg;
+	u32 idx = 0;
+
+	pinfo = (level == I40IW_LEVEL_1) ? NULL : palloc->level2.leaf;
+	pg_shift = ffs(region->page_size) - 1;
+	for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
+		chunk_pages = sg_dma_len(sg) >> pg_shift;
+		if ((iwmr->type == IW_MEMREG_TYPE_QP) &&
+		    !iwpbl->qp_mr.sq_page)
+			iwpbl->qp_mr.sq_page = sg_page(sg);
+		for (i = 0; i < chunk_pages; i++) {
+			*pbl = cpu_to_le64(sg_dma_address(sg) + region->page_size * i);
+			pbl = i40iw_next_pbl_addr(palloc, pbl, &pinfo, &idx);
+		}
+	}
+}
+
+/**
+ * i40iw_setup_pbles - copy user pg address to pble's
+ * @iwdev: iwarp device
+ * @iwmr: mr pointer for this memory registration
+ * @use_pbles: flag if to use pble's or memory (level 0)
+ */
+static int i40iw_setup_pbles(struct i40iw_device *iwdev,
+			     struct i40iw_mr *iwmr,
+			     bool use_pbles)
+{
+	struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
+	struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
+	struct i40iw_pble_info *pinfo;
+	u64 *pbl;
+	enum i40iw_status_code status;
+	enum i40iw_pble_level level = I40IW_LEVEL_1;
+
+	if (!use_pbles && (iwmr->page_cnt > MAX_SAVE_PAGE_ADDRS))
+		return -ENOMEM;
+
+	if (use_pbles) {
+		mutex_lock(&iwdev->pbl_mutex);
+		status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt);
+		mutex_unlock(&iwdev->pbl_mutex);
+		if (status)
+			return -ENOMEM;
+
+		iwpbl->pbl_allocated = true;
+		level = palloc->level;
+		pinfo = (level == I40IW_LEVEL_1) ? &palloc->level1 : palloc->level2.leaf;
+		pbl = (u64 *)pinfo->addr;
+	} else {
+		pbl = iwmr->pgaddrmem;
+	}
+
+	i40iw_copy_user_pgaddrs(iwmr, pbl, level);
+	return 0;
+}
+
+/**
+ * i40iw_handle_q_mem - handle memory for qp and cq
+ * @iwdev: iwarp device
+ * @req: information for q memory management
+ * @iwpbl: pble struct
+ * @use_pbles: flag to use pble
+ */
+static int i40iw_handle_q_mem(struct i40iw_device *iwdev,
+			      struct i40iw_mem_reg_req *req,
+			      struct i40iw_pbl *iwpbl,
+			      bool use_pbles)
+{
+	struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
+	struct i40iw_mr *iwmr = iwpbl->iwmr;
+	struct i40iw_qp_mr *qpmr = &iwpbl->qp_mr;
+	struct i40iw_cq_mr *cqmr = &iwpbl->cq_mr;
+	struct i40iw_hmc_pble *hmc_p;
+	u64 *arr = iwmr->pgaddrmem;
+	int err;
+	int total;
+
+	total = req->sq_pages + req->rq_pages + req->cq_pages;
+
+	err = i40iw_setup_pbles(iwdev, iwmr, use_pbles);
+	if (err)
+		return err;
+	if (use_pbles && (palloc->level != I40IW_LEVEL_1)) {
+		i40iw_free_pble(iwdev->pble_rsrc, palloc);
+		iwpbl->pbl_allocated = false;
+		return -ENOMEM;
+	}
+
+	if (use_pbles)
+		arr = (u64 *)palloc->level1.addr;
+	if (req->reg_type == IW_MEMREG_TYPE_QP) {
+		hmc_p = &qpmr->sq_pbl;
+		qpmr->shadow = (dma_addr_t)arr[total];
+		if (use_pbles) {
+			hmc_p->idx = palloc->level1.idx;
+			hmc_p = &qpmr->rq_pbl;
+			hmc_p->idx = palloc->level1.idx + req->sq_pages;
+		} else {
+			hmc_p->addr = arr[0];
+			hmc_p = &qpmr->rq_pbl;
+			hmc_p->addr = arr[1];
+		}
+	} else {		/* CQ */
+		hmc_p = &cqmr->cq_pbl;
+		cqmr->shadow = (dma_addr_t)arr[total];
+		if (use_pbles)
+			hmc_p->idx = palloc->level1.idx;
+		else
+			hmc_p->addr = arr[0];
+	}
+	return err;
+}
+
+/**
+ * i40iw_hwreg_mr - send cqp command for memory registration
+ * @iwdev: iwarp device
+ * @iwmr: iwarp mr pointer
+ * @access: access for MR
+ */
+static int i40iw_hwreg_mr(struct i40iw_device *iwdev,
+			  struct i40iw_mr *iwmr,
+			  u16 access)
+{
+	struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
+	struct i40iw_reg_ns_stag_info *stag_info;
+	struct i40iw_pd *iwpd = to_iwpd(iwmr->ibmr.pd);
+	struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
+	enum i40iw_status_code status;
+	int err = 0;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+	if (!cqp_request)
+		return -ENOMEM;
+
+	cqp_info = &cqp_request->info;
+	stag_info = &cqp_info->in.u.mr_reg_non_shared.info;
+	memset(stag_info, 0, sizeof(*stag_info));
+	stag_info->va = (void *)(unsigned long)iwpbl->user_base;
+	stag_info->stag_idx = iwmr->stag >> I40IW_CQPSQ_STAG_IDX_SHIFT;
+	stag_info->stag_key = (u8)iwmr->stag;
+	stag_info->total_len = iwmr->length;
+	stag_info->access_rights = access;
+	stag_info->pd_id = iwpd->sc_pd.pd_id;
+	stag_info->addr_type = I40IW_ADDR_TYPE_VA_BASED;
+
+	if (iwmr->page_cnt > 1) {
+		if (palloc->level == I40IW_LEVEL_1) {
+			stag_info->first_pm_pbl_index = palloc->level1.idx;
+			stag_info->chunk_size = 1;
+		} else {
+			stag_info->first_pm_pbl_index = palloc->level2.root.idx;
+			stag_info->chunk_size = 3;
+		}
+	} else {
+		stag_info->reg_addr_pa = iwmr->pgaddrmem[0];
+	}
+
+	cqp_info->cqp_cmd = OP_MR_REG_NON_SHARED;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.mr_reg_non_shared.dev = &iwdev->sc_dev;
+	cqp_info->in.u.mr_reg_non_shared.scratch = (uintptr_t)cqp_request;
+
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status) {
+		err = -ENOMEM;
+		i40iw_pr_err("CQP-OP MR Reg fail");
+	}
+	return err;
+}
+
+/**
+ * i40iw_reg_user_mr - Register a user memory region
+ * @pd: ptr of pd
+ * @start: virtual start address
+ * @length: length of mr
+ * @virt: virtual address
+ * @acc: access of mr
+ * @udata: user data
+ */
+static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
+				       u64 start,
+				       u64 length,
+				       u64 virt,
+				       int acc,
+				       struct ib_udata *udata)
+{
+	struct i40iw_pd *iwpd = to_iwpd(pd);
+	struct i40iw_device *iwdev = to_iwdev(pd->device);
+	struct i40iw_ucontext *ucontext;
+	struct i40iw_pble_alloc *palloc;
+	struct i40iw_pbl *iwpbl;
+	struct i40iw_mr *iwmr;
+	struct ib_umem *region;
+	struct i40iw_mem_reg_req req;
+	u32 pbl_depth = 0;
+	u32 stag = 0;
+	u16 access;
+	u32 region_length;
+	bool use_pbles = false;
+	unsigned long flags;
+	int err = -ENOSYS;
+
+	region = ib_umem_get(pd->uobject->context, start, length, acc, 0);
+	if (IS_ERR(region))
+		return (struct ib_mr *)region;
+
+	if (ib_copy_from_udata(&req, udata, sizeof(req))) {
+		ib_umem_release(region);
+		return ERR_PTR(-EFAULT);
+	}
+
+	iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
+	if (!iwmr) {
+		ib_umem_release(region);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	iwpbl = &iwmr->iwpbl;
+	iwpbl->iwmr = iwmr;
+	iwmr->region = region;
+	iwmr->ibmr.pd = pd;
+	iwmr->ibmr.device = pd->device;
+	ucontext = to_ucontext(pd->uobject->context);
+	region_length = region->length + (start & 0xfff);
+	pbl_depth = region_length >> 12;
+	pbl_depth += (region_length & (4096 - 1)) ? 1 : 0;
+	iwmr->length = region->length;
+
+	iwpbl->user_base = virt;
+	palloc = &iwpbl->pble_alloc;
+
+	iwmr->type = req.reg_type;
+	iwmr->page_cnt = pbl_depth;
+
+	switch (req.reg_type) {
+	case IW_MEMREG_TYPE_QP:
+		use_pbles = ((req.sq_pages + req.rq_pages) > 2);
+		err = i40iw_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
+		if (err)
+			goto error;
+		spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+		list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list);
+		spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+		break;
+	case IW_MEMREG_TYPE_CQ:
+		use_pbles = (req.cq_pages > 1);
+		err = i40iw_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
+		if (err)
+			goto error;
+
+		spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+		list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list);
+		spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+		break;
+	case IW_MEMREG_TYPE_MEM:
+		access = I40IW_ACCESS_FLAGS_LOCALREAD;
+
+		use_pbles = (iwmr->page_cnt != 1);
+		err = i40iw_setup_pbles(iwdev, iwmr, use_pbles);
+		if (err)
+			goto error;
+
+		access |= i40iw_get_user_access(acc);
+		stag = i40iw_create_stag(iwdev);
+		if (!stag) {
+			err = -ENOMEM;
+			goto error;
+		}
+
+		iwmr->stag = stag;
+		iwmr->ibmr.rkey = stag;
+		iwmr->ibmr.lkey = stag;
+
+		err = i40iw_hwreg_mr(iwdev, iwmr, access);
+		if (err) {
+			i40iw_free_stag(iwdev, stag);
+			goto error;
+		}
+		break;
+	default:
+		goto error;
+	}
+
+	iwmr->type = req.reg_type;
+	if (req.reg_type == IW_MEMREG_TYPE_MEM)
+		i40iw_add_pdusecount(iwpd);
+	return &iwmr->ibmr;
+
+error:
+	if (palloc->level != I40IW_LEVEL_0)
+		i40iw_free_pble(iwdev->pble_rsrc, palloc);
+	ib_umem_release(region);
+	kfree(iwmr);
+	return ERR_PTR(err);
+}
+
+/**
+ * i40iw_reg_phys_mr - register kernel physical memory
+ * @pd: ibpd pointer
+ * @addr: physical address of memory to register
+ * @size: size of memory to register
+ * @acc: Access rights
+ * @iova_start: start of virtual address for physical buffers
+ */
+struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *pd,
+				u64 addr,
+				u64 size,
+				int acc,
+				u64 *iova_start)
+{
+	struct i40iw_pd *iwpd = to_iwpd(pd);
+	struct i40iw_device *iwdev = to_iwdev(pd->device);
+	struct i40iw_pbl *iwpbl;
+	struct i40iw_mr *iwmr;
+	enum i40iw_status_code status;
+	u32 stag;
+	u16 access = I40IW_ACCESS_FLAGS_LOCALREAD;
+	int ret;
+
+	iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
+	if (!iwmr)
+		return ERR_PTR(-ENOMEM);
+	iwmr->ibmr.pd = pd;
+	iwmr->ibmr.device = pd->device;
+	iwpbl = &iwmr->iwpbl;
+	iwpbl->iwmr = iwmr;
+	iwmr->type = IW_MEMREG_TYPE_MEM;
+	iwpbl->user_base = *iova_start;
+	stag = i40iw_create_stag(iwdev);
+	if (!stag) {
+		ret = -EOVERFLOW;
+		goto err;
+	}
+	access |= i40iw_get_user_access(acc);
+	iwmr->stag = stag;
+	iwmr->ibmr.rkey = stag;
+	iwmr->ibmr.lkey = stag;
+	iwmr->page_cnt = 1;
+	iwmr->pgaddrmem[0]  = addr;
+	status = i40iw_hwreg_mr(iwdev, iwmr, access);
+	if (status) {
+		i40iw_free_stag(iwdev, stag);
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	i40iw_add_pdusecount(iwpd);
+	return &iwmr->ibmr;
+ err:
+	kfree(iwmr);
+	return ERR_PTR(ret);
+}
+
+/**
+ * i40iw_get_dma_mr - register physical mem
+ * @pd: ptr of pd
+ * @acc: access for memory
+ */
+static struct ib_mr *i40iw_get_dma_mr(struct ib_pd *pd, int acc)
+{
+	u64 kva = 0;
+
+	return i40iw_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva);
+}
+
+/**
+ * i40iw_del_mem_list - Deleting pbl list entries for CQ/QP
+ * @iwmr: iwmr for IB's user page addresses
+ * @ucontext: ptr to user context
+ */
+static void i40iw_del_memlist(struct i40iw_mr *iwmr,
+			      struct i40iw_ucontext *ucontext)
+{
+	struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
+	unsigned long flags;
+
+	switch (iwmr->type) {
+	case IW_MEMREG_TYPE_CQ:
+		spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+		if (!list_empty(&ucontext->cq_reg_mem_list))
+			list_del(&iwpbl->list);
+		spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+		break;
+	case IW_MEMREG_TYPE_QP:
+		spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+		if (!list_empty(&ucontext->qp_reg_mem_list))
+			list_del(&iwpbl->list);
+		spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * i40iw_dereg_mr - deregister mr
+ * @ib_mr: mr ptr for dereg
+ */
+static int i40iw_dereg_mr(struct ib_mr *ib_mr)
+{
+	struct ib_pd *ibpd = ib_mr->pd;
+	struct i40iw_pd *iwpd = to_iwpd(ibpd);
+	struct i40iw_mr *iwmr = to_iwmr(ib_mr);
+	struct i40iw_device *iwdev = to_iwdev(ib_mr->device);
+	enum i40iw_status_code status;
+	struct i40iw_dealloc_stag_info *info;
+	struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
+	struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	u32 stag_idx;
+
+	if (iwmr->region)
+		ib_umem_release(iwmr->region);
+
+	if (iwmr->type != IW_MEMREG_TYPE_MEM) {
+		if (ibpd->uobject) {
+			struct i40iw_ucontext *ucontext;
+
+			ucontext = to_ucontext(ibpd->uobject->context);
+			i40iw_del_memlist(iwmr, ucontext);
+		}
+		if (iwpbl->pbl_allocated)
+			i40iw_free_pble(iwdev->pble_rsrc, palloc);
+		kfree(iwpbl->iwmr);
+		iwpbl->iwmr = NULL;
+		return 0;
+	}
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+	if (!cqp_request)
+		return -ENOMEM;
+
+	cqp_info = &cqp_request->info;
+	info = &cqp_info->in.u.dealloc_stag.info;
+	memset(info, 0, sizeof(*info));
+
+	info->pd_id = cpu_to_le32(iwpd->sc_pd.pd_id & 0x00007fff);
+	info->stag_idx = RS_64_1(ib_mr->rkey, I40IW_CQPSQ_STAG_IDX_SHIFT);
+	stag_idx = info->stag_idx;
+	info->mr = true;
+	if (iwpbl->pbl_allocated)
+		info->dealloc_pbl = true;
+
+	cqp_info->cqp_cmd = OP_DEALLOC_STAG;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.dealloc_stag.dev = &iwdev->sc_dev;
+	cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP dealloc failed for stag_idx = 0x%x\n", stag_idx);
+	i40iw_rem_pdusecount(iwpd, iwdev);
+	i40iw_free_stag(iwdev, iwmr->stag);
+	if (iwpbl->pbl_allocated)
+		i40iw_free_pble(iwdev->pble_rsrc, palloc);
+	kfree(iwmr);
+	return 0;
+}
+
+/**
+ * i40iw_show_rev
+ */
+static ssize_t i40iw_show_rev(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct i40iw_ib_device *iwibdev = container_of(dev,
+						       struct i40iw_ib_device,
+						       ibdev.dev);
+	u32 hw_rev = iwibdev->iwdev->sc_dev.hw_rev;
+
+	return sprintf(buf, "%x\n", hw_rev);
+}
+
+/**
+ * i40iw_show_fw_ver
+ */
+static ssize_t i40iw_show_fw_ver(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	u32 firmware_version = I40IW_FW_VERSION;
+
+	return sprintf(buf, "%u.%u\n", firmware_version,
+		       (firmware_version & 0x000000ff));
+}
+
+/**
+ * i40iw_show_hca
+ */
+static ssize_t i40iw_show_hca(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "I40IW\n");
+}
+
+/**
+ * i40iw_show_board
+ */
+static ssize_t i40iw_show_board(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	return sprintf(buf, "%.*s\n", 32, "I40IW Board ID");
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, i40iw_show_rev, NULL);
+static DEVICE_ATTR(fw_ver, S_IRUGO, i40iw_show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, i40iw_show_hca, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, i40iw_show_board, NULL);
+
+static struct device_attribute *i40iw_dev_attributes[] = {
+	&dev_attr_hw_rev,
+	&dev_attr_fw_ver,
+	&dev_attr_hca_type,
+	&dev_attr_board_id
+};
+
+/**
+ * i40iw_copy_sg_list - copy sg list for qp
+ * @sg_list: copied into sg_list
+ * @sgl: copy from sgl
+ * @num_sges: count of sg entries
+ */
+static void i40iw_copy_sg_list(struct i40iw_sge *sg_list, struct ib_sge *sgl, int num_sges)
+{
+	unsigned int i;
+
+	for (i = 0; (i < num_sges) && (i < I40IW_MAX_WQ_FRAGMENT_COUNT); i++) {
+		sg_list[i].tag_off = sgl[i].addr;
+		sg_list[i].len = sgl[i].length;
+		sg_list[i].stag = sgl[i].lkey;
+	}
+}
+
+/**
+ * i40iw_post_send -  kernel application wr
+ * @ibqp: qp ptr for wr
+ * @ib_wr: work request ptr
+ * @bad_wr: return of bad wr if err
+ */
+static int i40iw_post_send(struct ib_qp *ibqp,
+			   struct ib_send_wr *ib_wr,
+			   struct ib_send_wr **bad_wr)
+{
+	struct i40iw_qp *iwqp;
+	struct i40iw_qp_uk *ukqp;
+	struct i40iw_post_sq_info info;
+	enum i40iw_status_code ret;
+	int err = 0;
+	unsigned long flags;
+
+	iwqp = (struct i40iw_qp *)ibqp;
+	ukqp = &iwqp->sc_qp.qp_uk;
+
+	spin_lock_irqsave(&iwqp->lock, flags);
+	while (ib_wr) {
+		memset(&info, 0, sizeof(info));
+		info.wr_id = (u64)(ib_wr->wr_id);
+		if ((ib_wr->send_flags & IB_SEND_SIGNALED) || iwqp->sig_all)
+			info.signaled = true;
+		if (ib_wr->send_flags & IB_SEND_FENCE)
+			info.read_fence = true;
+
+		switch (ib_wr->opcode) {
+		case IB_WR_SEND:
+			if (ib_wr->send_flags & IB_SEND_SOLICITED)
+				info.op_type = I40IW_OP_TYPE_SEND_SOL;
+			else
+				info.op_type = I40IW_OP_TYPE_SEND;
+
+			if (ib_wr->send_flags & IB_SEND_INLINE) {
+				info.op.inline_send.data = (void *)(unsigned long)ib_wr->sg_list[0].addr;
+				info.op.inline_send.len = ib_wr->sg_list[0].length;
+				ret = ukqp->ops.iw_inline_send(ukqp, &info, rdma_wr(ib_wr)->rkey, false);
+			} else {
+				info.op.send.num_sges = ib_wr->num_sge;
+				info.op.send.sg_list = (struct i40iw_sge *)ib_wr->sg_list;
+				ret = ukqp->ops.iw_send(ukqp, &info, rdma_wr(ib_wr)->rkey, false);
+			}
+
+			if (ret)
+				err = -EIO;
+			break;
+		case IB_WR_RDMA_WRITE:
+			info.op_type = I40IW_OP_TYPE_RDMA_WRITE;
+
+			if (ib_wr->send_flags & IB_SEND_INLINE) {
+				info.op.inline_rdma_write.data = (void *)(unsigned long)ib_wr->sg_list[0].addr;
+				info.op.inline_rdma_write.len = ib_wr->sg_list[0].length;
+				info.op.inline_rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
+				info.op.inline_rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey;
+				info.op.inline_rdma_write.rem_addr.len = ib_wr->sg_list->length;
+				ret = ukqp->ops.iw_inline_rdma_write(ukqp, &info, false);
+			} else {
+				info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list;
+				info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
+				info.op.rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
+				info.op.rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey;
+				info.op.rdma_write.rem_addr.len = ib_wr->sg_list->length;
+				ret = ukqp->ops.iw_rdma_write(ukqp, &info, false);
+			}
+
+			if (ret)
+				err = -EIO;
+			break;
+		case IB_WR_RDMA_READ:
+			info.op_type = I40IW_OP_TYPE_RDMA_READ;
+			info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
+			info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey;
+			info.op.rdma_read.rem_addr.len = ib_wr->sg_list->length;
+			info.op.rdma_read.lo_addr.tag_off = ib_wr->sg_list->addr;
+			info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey;
+			info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length;
+			ret = ukqp->ops.iw_rdma_read(ukqp, &info, false, false);
+			if (ret)
+				err = -EIO;
+			break;
+		default:
+			err = -EINVAL;
+			i40iw_pr_err(" upost_send bad opcode = 0x%x\n",
+				     ib_wr->opcode);
+			break;
+		}
+
+		if (err)
+			break;
+		ib_wr = ib_wr->next;
+	}
+
+	if (err)
+		*bad_wr = ib_wr;
+	else
+		ukqp->ops.iw_qp_post_wr(ukqp);
+	spin_unlock_irqrestore(&iwqp->lock, flags);
+
+	return err;
+}
+
+/**
+ * i40iw_post_recv - post receive wr for kernel application
+ * @ibqp: ib qp pointer
+ * @ib_wr: work request for receive
+ * @bad_wr: bad wr caused an error
+ */
+static int i40iw_post_recv(struct ib_qp *ibqp,
+			   struct ib_recv_wr *ib_wr,
+			   struct ib_recv_wr **bad_wr)
+{
+	struct i40iw_qp *iwqp;
+	struct i40iw_qp_uk *ukqp;
+	struct i40iw_post_rq_info post_recv;
+	struct i40iw_sge sg_list[I40IW_MAX_WQ_FRAGMENT_COUNT];
+	enum i40iw_status_code ret = 0;
+	unsigned long flags;
+
+	iwqp = (struct i40iw_qp *)ibqp;
+	ukqp = &iwqp->sc_qp.qp_uk;
+
+	memset(&post_recv, 0, sizeof(post_recv));
+	spin_lock_irqsave(&iwqp->lock, flags);
+	while (ib_wr) {
+		post_recv.num_sges = ib_wr->num_sge;
+		post_recv.wr_id = ib_wr->wr_id;
+		i40iw_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge);
+		post_recv.sg_list = sg_list;
+		ret = ukqp->ops.iw_post_receive(ukqp, &post_recv);
+		if (ret) {
+			i40iw_pr_err(" post_recv err %d\n", ret);
+			*bad_wr = ib_wr;
+			goto out;
+		}
+		ib_wr = ib_wr->next;
+	}
+ out:
+	spin_unlock_irqrestore(&iwqp->lock, flags);
+	if (ret)
+		return -ENOSYS;
+	return 0;
+}
+
+/**
+ * i40iw_poll_cq - poll cq for completion (kernel apps)
+ * @ibcq: cq to poll
+ * @num_entries: number of entries to poll
+ * @entry: wr of entry completed
+ */
+static int i40iw_poll_cq(struct ib_cq *ibcq,
+			 int num_entries,
+			 struct ib_wc *entry)
+{
+	struct i40iw_cq *iwcq;
+	int cqe_count = 0;
+	struct i40iw_cq_poll_info cq_poll_info;
+	enum i40iw_status_code ret;
+	struct i40iw_cq_uk *ukcq;
+	struct i40iw_sc_qp *qp;
+	unsigned long flags;
+
+	iwcq = (struct i40iw_cq *)ibcq;
+	ukcq = &iwcq->sc_cq.cq_uk;
+
+	spin_lock_irqsave(&iwcq->lock, flags);
+	while (cqe_count < num_entries) {
+		ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info, true);
+		if (ret == I40IW_ERR_QUEUE_EMPTY) {
+			break;
+		} else if (ret) {
+			if (!cqe_count)
+				cqe_count = -1;
+			break;
+		}
+		entry->wc_flags = 0;
+		entry->wr_id = cq_poll_info.wr_id;
+		if (!cq_poll_info.error)
+			entry->status = IB_WC_SUCCESS;
+		else
+			entry->status = IB_WC_WR_FLUSH_ERR;
+
+		switch (cq_poll_info.op_type) {
+		case I40IW_OP_TYPE_RDMA_WRITE:
+			entry->opcode = IB_WC_RDMA_WRITE;
+			break;
+		case I40IW_OP_TYPE_RDMA_READ_INV_STAG:
+		case I40IW_OP_TYPE_RDMA_READ:
+			entry->opcode = IB_WC_RDMA_READ;
+			break;
+		case I40IW_OP_TYPE_SEND_SOL:
+		case I40IW_OP_TYPE_SEND_SOL_INV:
+		case I40IW_OP_TYPE_SEND_INV:
+		case I40IW_OP_TYPE_SEND:
+			entry->opcode = IB_WC_SEND;
+			break;
+		case I40IW_OP_TYPE_REC:
+			entry->opcode = IB_WC_RECV;
+			break;
+		default:
+			entry->opcode = IB_WC_RECV;
+			break;
+		}
+
+		entry->vendor_err =
+		    cq_poll_info.major_err << 16 | cq_poll_info.minor_err;
+		entry->ex.imm_data = 0;
+		qp = (struct i40iw_sc_qp *)cq_poll_info.qp_handle;
+		entry->qp = (struct ib_qp *)qp->back_qp;
+		entry->src_qp = cq_poll_info.qp_id;
+		entry->byte_len = cq_poll_info.bytes_xfered;
+		entry++;
+		cqe_count++;
+	}
+	spin_unlock_irqrestore(&iwcq->lock, flags);
+	return cqe_count;
+}
+
+/**
+ * i40iw_req_notify_cq - arm cq kernel application
+ * @ibcq: cq to arm
+ * @notify_flags: notofication flags
+ */
+static int i40iw_req_notify_cq(struct ib_cq *ibcq,
+			       enum ib_cq_notify_flags notify_flags)
+{
+	struct i40iw_cq *iwcq;
+	struct i40iw_cq_uk *ukcq;
+	enum i40iw_completion_notify cq_notify = IW_CQ_COMPL_SOLICITED;
+
+	iwcq = (struct i40iw_cq *)ibcq;
+	ukcq = &iwcq->sc_cq.cq_uk;
+	if (notify_flags == IB_CQ_NEXT_COMP)
+		cq_notify = IW_CQ_COMPL_EVENT;
+	ukcq->ops.iw_cq_request_notification(ukcq, cq_notify);
+	return 0;
+}
+
+/**
+ * i40iw_port_immutable - return port's immutable data
+ * @ibdev: ib dev struct
+ * @port_num: port number
+ * @immutable: immutable data for the port return
+ */
+static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num,
+				struct ib_port_immutable *immutable)
+{
+	struct ib_port_attr attr;
+	int err;
+
+	err = i40iw_query_port(ibdev, port_num, &attr);
+
+	if (err)
+		return err;
+
+	immutable->pkey_tbl_len = attr.pkey_tbl_len;
+	immutable->gid_tbl_len = attr.gid_tbl_len;
+	immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+	return 0;
+}
+
+/**
+ * i40iw_get_protocol_stats - Populates the rdma_stats structure
+ * @ibdev: ib dev struct
+ * @stats: iw protocol stats struct
+ */
+static int i40iw_get_protocol_stats(struct ib_device *ibdev,
+				    union rdma_protocol_stats *stats)
+{
+	struct i40iw_device *iwdev = to_iwdev(ibdev);
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
+	struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
+	struct timespec curr_time;
+	static struct timespec last_rd_time = {0, 0};
+	enum i40iw_status_code status = 0;
+	unsigned long flags;
+
+	curr_time = current_kernel_time();
+	memset(stats, 0, sizeof(*stats));
+
+	if (dev->is_pf) {
+		spin_lock_irqsave(&devstat->stats_lock, flags);
+		devstat->ops.iw_hw_stat_read_all(devstat,
+			&devstat->hw_stats);
+		spin_unlock_irqrestore(&devstat->stats_lock, flags);
+	} else {
+		if (((u64)curr_time.tv_sec - (u64)last_rd_time.tv_sec) > 1)
+			status = i40iw_vchnl_vf_get_pe_stats(dev,
+							     &devstat->hw_stats);
+
+		if (status)
+			return -ENOSYS;
+	}
+
+	stats->iw.ipInReceives = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] +
+				 hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXPKTS];
+	stats->iw.ipInTruncatedPkts = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] +
+				      hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC];
+	stats->iw.ipInDiscards = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] +
+				 hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD];
+	stats->iw.ipOutNoRoutes = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] +
+				  hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE];
+	stats->iw.ipReasmReqds = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] +
+				 hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS];
+	stats->iw.ipFragCreates = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] +
+				  hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS];
+	stats->iw.ipInMcastPkts = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] +
+				  hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS];
+	stats->iw.ipOutMcastPkts = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] +
+				   hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6TXMCPKTS];
+	stats->iw.tcpOutSegs = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_TCPTXSEG];
+	stats->iw.tcpInSegs = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_TCPRXSEGS];
+	stats->iw.tcpRetransSegs = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_TCPRTXSEG];
+
+	last_rd_time = curr_time;
+	return 0;
+}
+
+/**
+ * i40iw_query_gid - Query port GID
+ * @ibdev: device pointer from stack
+ * @port: port number
+ * @index: Entry index
+ * @gid: Global ID
+ */
+static int i40iw_query_gid(struct ib_device *ibdev,
+			   u8 port,
+			   int index,
+			   union ib_gid *gid)
+{
+	struct i40iw_device *iwdev = to_iwdev(ibdev);
+
+	memset(gid->raw, 0, sizeof(gid->raw));
+	ether_addr_copy(gid->raw, iwdev->netdev->dev_addr);
+	return 0;
+}
+
+/**
+ * i40iw_modify_port  Modify port properties
+ * @ibdev: device pointer from stack
+ * @port: port number
+ * @port_modify_mask: mask for port modifications
+ * @props: port properties
+ */
+static int i40iw_modify_port(struct ib_device *ibdev,
+			     u8 port,
+			     int port_modify_mask,
+			     struct ib_port_modify *props)
+{
+	return 0;
+}
+
+/**
+ * i40iw_query_pkey - Query partition key
+ * @ibdev: device pointer from stack
+ * @port: port number
+ * @index: index of pkey
+ * @pkey: pointer to store the pkey
+ */
+static int i40iw_query_pkey(struct ib_device *ibdev,
+			    u8 port,
+			    u16 index,
+			    u16 *pkey)
+{
+	*pkey = 0;
+	return 0;
+}
+
+/**
+ * i40iw_create_ah - create address handle
+ * @ibpd: ptr of pd
+ * @ah_attr: address handle attributes
+ */
+static struct ib_ah *i40iw_create_ah(struct ib_pd *ibpd,
+				     struct ib_ah_attr *attr)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+/**
+ * i40iw_destroy_ah - Destroy address handle
+ * @ah: pointer to address handle
+ */
+static int i40iw_destroy_ah(struct ib_ah *ah)
+{
+	return -ENOSYS;
+}
+
+/**
+ * i40iw_init_rdma_device - initialization of iwarp device
+ * @iwdev: iwarp device
+ */
+static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev)
+{
+	struct i40iw_ib_device *iwibdev;
+	struct net_device *netdev = iwdev->netdev;
+	struct pci_dev *pcidev = (struct pci_dev *)iwdev->hw.dev_context;
+
+	iwibdev = (struct i40iw_ib_device *)ib_alloc_device(sizeof(*iwibdev));
+	if (!iwibdev) {
+		i40iw_pr_err("iwdev == NULL\n");
+		return NULL;
+	}
+	strlcpy(iwibdev->ibdev.name, "i40iw%d", IB_DEVICE_NAME_MAX);
+	iwibdev->ibdev.owner = THIS_MODULE;
+	iwdev->iwibdev = iwibdev;
+	iwibdev->iwdev = iwdev;
+
+	iwibdev->ibdev.node_type = RDMA_NODE_RNIC;
+	ether_addr_copy((u8 *)&iwibdev->ibdev.node_guid, netdev->dev_addr);
+
+	iwibdev->ibdev.uverbs_cmd_mask =
+	    (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+	    (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+	    (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+	    (1ull << IB_USER_VERBS_CMD_REG_MR) |
+	    (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+	    (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_POST_RECV) |
+	    (1ull << IB_USER_VERBS_CMD_POST_SEND);
+	iwibdev->ibdev.phys_port_cnt = 1;
+	iwibdev->ibdev.num_comp_vectors = 1;
+	iwibdev->ibdev.dma_device = &pcidev->dev;
+	iwibdev->ibdev.dev.parent = &pcidev->dev;
+	iwibdev->ibdev.query_port = i40iw_query_port;
+	iwibdev->ibdev.modify_port = i40iw_modify_port;
+	iwibdev->ibdev.query_pkey = i40iw_query_pkey;
+	iwibdev->ibdev.query_gid = i40iw_query_gid;
+	iwibdev->ibdev.alloc_ucontext = i40iw_alloc_ucontext;
+	iwibdev->ibdev.dealloc_ucontext = i40iw_dealloc_ucontext;
+	iwibdev->ibdev.mmap = i40iw_mmap;
+	iwibdev->ibdev.alloc_pd = i40iw_alloc_pd;
+	iwibdev->ibdev.dealloc_pd = i40iw_dealloc_pd;
+	iwibdev->ibdev.create_qp = i40iw_create_qp;
+	iwibdev->ibdev.modify_qp = i40iw_modify_qp;
+	iwibdev->ibdev.query_qp = i40iw_query_qp;
+	iwibdev->ibdev.destroy_qp = i40iw_destroy_qp;
+	iwibdev->ibdev.create_cq = i40iw_create_cq;
+	iwibdev->ibdev.destroy_cq = i40iw_destroy_cq;
+	iwibdev->ibdev.get_dma_mr = i40iw_get_dma_mr;
+	iwibdev->ibdev.reg_user_mr = i40iw_reg_user_mr;
+	iwibdev->ibdev.dereg_mr = i40iw_dereg_mr;
+	iwibdev->ibdev.get_protocol_stats = i40iw_get_protocol_stats;
+	iwibdev->ibdev.query_device = i40iw_query_device;
+	iwibdev->ibdev.create_ah = i40iw_create_ah;
+	iwibdev->ibdev.destroy_ah = i40iw_destroy_ah;
+	iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL);
+	if (!iwibdev->ibdev.iwcm) {
+		ib_dealloc_device(&iwibdev->ibdev);
+		i40iw_pr_err("iwcm == NULL\n");
+		return NULL;
+	}
+
+	iwibdev->ibdev.iwcm->add_ref = i40iw_add_ref;
+	iwibdev->ibdev.iwcm->rem_ref = i40iw_rem_ref;
+	iwibdev->ibdev.iwcm->get_qp = i40iw_get_qp;
+	iwibdev->ibdev.iwcm->connect = i40iw_connect;
+	iwibdev->ibdev.iwcm->accept = i40iw_accept;
+	iwibdev->ibdev.iwcm->reject = i40iw_reject;
+	iwibdev->ibdev.iwcm->create_listen = i40iw_create_listen;
+	iwibdev->ibdev.iwcm->destroy_listen = i40iw_destroy_listen;
+	memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name,
+	       sizeof(iwibdev->ibdev.iwcm->ifname));
+	iwibdev->ibdev.get_port_immutable   = i40iw_port_immutable;
+	iwibdev->ibdev.poll_cq = i40iw_poll_cq;
+	iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq;
+	iwibdev->ibdev.post_send = i40iw_post_send;
+	iwibdev->ibdev.post_recv = i40iw_post_recv;
+
+	return iwibdev;
+}
+
+/**
+ * i40iw_port_ibevent - indicate port event
+ * @iwdev: iwarp device
+ */
+void i40iw_port_ibevent(struct i40iw_device *iwdev)
+{
+	struct i40iw_ib_device *iwibdev = iwdev->iwibdev;
+	struct ib_event event;
+
+	event.device = &iwibdev->ibdev;
+	event.element.port_num = 1;
+	event.event = iwdev->iw_status ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+	ib_dispatch_event(&event);
+}
+
+/**
+ * i40iw_unregister_rdma_device - unregister of iwarp from IB
+ * @iwibdev: rdma device ptr
+ */
+static void i40iw_unregister_rdma_device(struct i40iw_ib_device *iwibdev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i)
+		device_remove_file(&iwibdev->ibdev.dev,
+				   i40iw_dev_attributes[i]);
+	ib_unregister_device(&iwibdev->ibdev);
+}
+
+/**
+ * i40iw_destroy_rdma_device - destroy rdma device and free resources
+ * @iwibdev: IB device ptr
+ */
+void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
+{
+	if (!iwibdev)
+		return;
+
+	i40iw_unregister_rdma_device(iwibdev);
+	kfree(iwibdev->ibdev.iwcm);
+	iwibdev->ibdev.iwcm = NULL;
+	ib_dealloc_device(&iwibdev->ibdev);
+}
+
+/**
+ * i40iw_register_rdma_device - register iwarp device to IB
+ * @iwdev: iwarp device
+ */
+int i40iw_register_rdma_device(struct i40iw_device *iwdev)
+{
+	int i, ret;
+	struct i40iw_ib_device *iwibdev;
+
+	iwdev->iwibdev = i40iw_init_rdma_device(iwdev);
+	if (!iwdev->iwibdev)
+		return -ENOSYS;
+	iwibdev = iwdev->iwibdev;
+
+	ret = ib_register_device(&iwibdev->ibdev, NULL);
+	if (ret)
+		goto error;
+
+	for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i) {
+		ret =
+		    device_create_file(&iwibdev->ibdev.dev,
+				       i40iw_dev_attributes[i]);
+		if (ret) {
+			while (i > 0) {
+				i--;
+				device_remove_file(&iwibdev->ibdev.dev, i40iw_dev_attributes[i]);
+			}
+			ib_unregister_device(&iwibdev->ibdev);
+			goto error;
+		}
+	}
+	return 0;
+error:
+	kfree(iwdev->iwibdev->ibdev.iwcm);
+	iwdev->iwibdev->ibdev.iwcm = NULL;
+	ib_dealloc_device(&iwdev->iwibdev->ibdev);
+	return -ENOSYS;
+}

diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
new file mode 100644
index 0000000..1101f77
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h

@@ -0,0 +1,173 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_VERBS_H
+#define I40IW_VERBS_H
+
+struct i40iw_ucontext {
+	struct ib_ucontext ibucontext;
+	struct i40iw_device *iwdev;
+	struct list_head cq_reg_mem_list;
+	spinlock_t cq_reg_mem_list_lock; /* memory list for cq's */
+	struct list_head qp_reg_mem_list;
+	spinlock_t qp_reg_mem_list_lock; /* memory list for qp's */
+};
+
+struct i40iw_pd {
+	struct ib_pd ibpd;
+	struct i40iw_sc_pd sc_pd;
+	atomic_t usecount;
+};
+
+struct i40iw_hmc_pble {
+	union {
+		u32 idx;
+		dma_addr_t addr;
+	};
+};
+
+struct i40iw_cq_mr {
+	struct i40iw_hmc_pble cq_pbl;
+	dma_addr_t shadow;
+};
+
+struct i40iw_qp_mr {
+	struct i40iw_hmc_pble sq_pbl;
+	struct i40iw_hmc_pble rq_pbl;
+	dma_addr_t shadow;
+	struct page *sq_page;
+};
+
+struct i40iw_pbl {
+	struct list_head list;
+	union {
+		struct i40iw_qp_mr qp_mr;
+		struct i40iw_cq_mr cq_mr;
+	};
+
+	bool pbl_allocated;
+	u64 user_base;
+	struct i40iw_pble_alloc pble_alloc;
+	struct i40iw_mr *iwmr;
+};
+
+#define MAX_SAVE_PAGE_ADDRS     4
+struct i40iw_mr {
+	union {
+		struct ib_mr ibmr;
+		struct ib_mw ibmw;
+		struct ib_fmr ibfmr;
+	};
+	struct ib_umem *region;
+	u16 type;
+	u32 page_cnt;
+	u32 stag;
+	u64 length;
+	u64 pgaddrmem[MAX_SAVE_PAGE_ADDRS];
+	struct i40iw_pbl iwpbl;
+};
+
+struct i40iw_cq {
+	struct ib_cq ibcq;
+	struct i40iw_sc_cq sc_cq;
+	u16 cq_head;
+	u16 cq_size;
+	u16 cq_number;
+	bool user_mode;
+	u32 polled_completions;
+	u32 cq_mem_size;
+	struct i40iw_dma_mem kmem;
+	spinlock_t lock; /* for poll cq */
+	struct i40iw_pbl *iwpbl;
+};
+
+struct disconn_work {
+	struct work_struct work;
+	struct i40iw_qp *iwqp;
+};
+
+struct iw_cm_id;
+struct ietf_mpa_frame;
+struct i40iw_ud_file;
+
+struct i40iw_qp_kmode {
+	struct i40iw_dma_mem dma_mem;
+	u64 *wrid_mem;
+};
+
+struct i40iw_qp {
+	struct ib_qp ibqp;
+	struct i40iw_sc_qp sc_qp;
+	struct i40iw_device *iwdev;
+	struct i40iw_cq *iwscq;
+	struct i40iw_cq *iwrcq;
+	struct i40iw_pd *iwpd;
+	struct i40iw_qp_host_ctx_info ctx_info;
+	struct i40iwarp_offload_info iwarp_info;
+	void *allocated_buffer;
+	atomic_t refcount;
+	struct iw_cm_id *cm_id;
+	void *cm_node;
+	struct ib_mr *lsmm_mr;
+	struct work_struct work;
+	enum ib_qp_state ibqp_state;
+	u32 iwarp_state;
+	u32 qp_mem_size;
+	u32 last_aeq;
+	atomic_t close_timer_started;
+	spinlock_t lock; /* for post work requests */
+	struct i40iw_qp_context *iwqp_context;
+	void *pbl_vbase;
+	dma_addr_t pbl_pbase;
+	struct page *page;
+	u8 active_conn:1;
+	u8 user_mode:1;
+	u8 hte_added:1;
+	u8 flush_issued:1;
+	u8 destroyed:1;
+	u8 sig_all:1;
+	u8 pau_mode:1;
+	u8 rsvd:1;
+	u16 term_sq_flush_code;
+	u16 term_rq_flush_code;
+	u8 hw_iwarp_state;
+	u8 hw_tcp_state;
+	struct i40iw_qp_kmode kqp;
+	struct i40iw_dma_mem host_ctx;
+	struct timer_list terminate_timer;
+	struct i40iw_pbl *iwpbl;
+	struct i40iw_dma_mem q2_ctx_mem;
+	struct i40iw_dma_mem ietf_mem;
+};
+#endif

diff --git a/drivers/infiniband/hw/i40iw/i40iw_vf.c b/drivers/infiniband/hw/i40iw/i40iw_vf.c
new file mode 100644
index 0000000..cb0f183
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_vf.c

@@ -0,0 +1,85 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_osdep.h"
+#include "i40iw_register.h"
+#include "i40iw_status.h"
+#include "i40iw_hmc.h"
+#include "i40iw_d.h"
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+#include "i40iw_vf.h"
+
+/**
+ * i40iw_manage_vf_pble_bp - manage vf pble
+ * @cqp: cqp for cqp' sq wqe
+ * @info: pble info
+ * @scratch: pointer for completion
+ * @post_sq: to post and ring
+ */
+enum i40iw_status_code i40iw_manage_vf_pble_bp(struct i40iw_sc_cqp *cqp,
+					       struct i40iw_manage_vf_pble_info *info,
+					       u64 scratch,
+					       bool post_sq)
+{
+	u64 *wqe;
+	u64 temp, header, pd_pl_pba = 0;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	temp = LS_64(info->pd_entry_cnt, I40IW_CQPSQ_MVPBP_PD_ENTRY_CNT) |
+	    LS_64(info->first_pd_index, I40IW_CQPSQ_MVPBP_FIRST_PD_INX) |
+	    LS_64(info->sd_index, I40IW_CQPSQ_MVPBP_SD_INX);
+	set_64bit_val(wqe, 16, temp);
+
+	header = LS_64((info->inv_pd_ent ? 1 : 0), I40IW_CQPSQ_MVPBP_INV_PD_ENT) |
+	    LS_64(I40IW_CQP_OP_MANAGE_VF_PBLE_BP, I40IW_CQPSQ_OPCODE) |
+	    LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+	set_64bit_val(wqe, 24, header);
+
+	pd_pl_pba = LS_64(info->pd_pl_pba >> 3, I40IW_CQPSQ_MVPBP_PD_PLPBA);
+	set_64bit_val(wqe, 32, pd_pl_pba);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE VF_PBLE_BP WQE", wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+struct i40iw_vf_cqp_ops iw_vf_cqp_ops = {
+	i40iw_manage_vf_pble_bp
+};

diff --git a/drivers/infiniband/hw/i40iw/i40iw_vf.h b/drivers/infiniband/hw/i40iw/i40iw_vf.h
new file mode 100644
index 0000000..f649f3a
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_vf.h

@@ -0,0 +1,62 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_VF_H
+#define I40IW_VF_H
+
+struct i40iw_sc_cqp;
+
+struct i40iw_manage_vf_pble_info {
+	u32 sd_index;
+	u16 first_pd_index;
+	u16 pd_entry_cnt;
+	u8 inv_pd_ent;
+	u64 pd_pl_pba;
+};
+
+struct i40iw_vf_cqp_ops {
+	enum i40iw_status_code (*manage_vf_pble_bp)(struct i40iw_sc_cqp *,
+						    struct i40iw_manage_vf_pble_info *,
+						    u64,
+						    bool);
+};
+
+enum i40iw_status_code i40iw_manage_vf_pble_bp(struct i40iw_sc_cqp *cqp,
+					       struct i40iw_manage_vf_pble_info *info,
+					       u64 scratch,
+					       bool post_sq);
+
+extern struct i40iw_vf_cqp_ops iw_vf_cqp_ops;
+
+#endif

diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
new file mode 100644
index 0000000..6b68f78
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c

@@ -0,0 +1,748 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_osdep.h"
+#include "i40iw_register.h"
+#include "i40iw_status.h"
+#include "i40iw_hmc.h"
+#include "i40iw_d.h"
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+#include "i40iw_virtchnl.h"
+
+/**
+ * vchnl_vf_send_get_ver_req - Request Channel version
+ * @dev: IWARP device pointer
+ * @vchnl_req: Virtual channel message request pointer
+ */
+static enum i40iw_status_code vchnl_vf_send_get_ver_req(struct i40iw_sc_dev *dev,
+							struct i40iw_virtchnl_req *vchnl_req)
+{
+	enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
+	struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
+
+	if (!dev->vchnl_up)
+		return ret_code;
+
+	memset(vchnl_msg, 0, sizeof(*vchnl_msg));
+	vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
+	vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg);
+	vchnl_msg->iw_op_code = I40IW_VCHNL_OP_GET_VER;
+	vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_GET_VER_V0;
+	ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
+	if (ret_code)
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+	return ret_code;
+}
+
+/**
+ * vchnl_vf_send_get_hmc_fcn_req - Request HMC Function from VF
+ * @dev: IWARP device pointer
+ * @vchnl_req: Virtual channel message request pointer
+ */
+static enum i40iw_status_code vchnl_vf_send_get_hmc_fcn_req(struct i40iw_sc_dev *dev,
+							    struct i40iw_virtchnl_req *vchnl_req)
+{
+	enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
+	struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
+
+	if (!dev->vchnl_up)
+		return ret_code;
+
+	memset(vchnl_msg, 0, sizeof(*vchnl_msg));
+	vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
+	vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg);
+	vchnl_msg->iw_op_code = I40IW_VCHNL_OP_GET_HMC_FCN;
+	vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_GET_HMC_FCN_V0;
+	ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
+	if (ret_code)
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+	return ret_code;
+}
+
+/**
+ * vchnl_vf_send_get_pe_stats_req - Request PE stats from VF
+ * @dev: IWARP device pointer
+ * @vchnl_req: Virtual channel message request pointer
+ */
+static enum i40iw_status_code vchnl_vf_send_get_pe_stats_req(struct i40iw_sc_dev *dev,
+							     struct i40iw_virtchnl_req  *vchnl_req)
+{
+	enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
+	struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
+
+	if (!dev->vchnl_up)
+		return ret_code;
+
+	memset(vchnl_msg, 0, sizeof(*vchnl_msg));
+	vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
+	vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg) + sizeof(struct i40iw_dev_hw_stats) - 1;
+	vchnl_msg->iw_op_code = I40IW_VCHNL_OP_GET_STATS;
+	vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_GET_STATS_V0;
+	ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
+	if (ret_code)
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+	return ret_code;
+}
+
+/**
+ * vchnl_vf_send_add_hmc_objs_req - Add HMC objects
+ * @dev: IWARP device pointer
+ * @vchnl_req: Virtual channel message request pointer
+ */
+static enum i40iw_status_code vchnl_vf_send_add_hmc_objs_req(struct i40iw_sc_dev *dev,
+							     struct i40iw_virtchnl_req *vchnl_req,
+							     enum i40iw_hmc_rsrc_type rsrc_type,
+							     u32 start_index,
+							     u32 rsrc_count)
+{
+	enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
+	struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
+	struct i40iw_virtchnl_hmc_obj_range *add_hmc_obj;
+
+	if (!dev->vchnl_up)
+		return ret_code;
+
+	add_hmc_obj = (struct i40iw_virtchnl_hmc_obj_range *)vchnl_msg->iw_chnl_buf;
+	memset(vchnl_msg, 0, sizeof(*vchnl_msg));
+	memset(add_hmc_obj, 0, sizeof(*add_hmc_obj));
+	vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
+	vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg) + sizeof(struct i40iw_virtchnl_hmc_obj_range) - 1;
+	vchnl_msg->iw_op_code = I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE;
+	vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE_V0;
+	add_hmc_obj->obj_type = (u16)rsrc_type;
+	add_hmc_obj->start_index = start_index;
+	add_hmc_obj->obj_count = rsrc_count;
+	ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
+	if (ret_code)
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+	return ret_code;
+}
+
+/**
+ * vchnl_vf_send_del_hmc_objs_req - del HMC objects
+ * @dev: IWARP device pointer
+ * @vchnl_req: Virtual channel message request pointer
+ * @ rsrc_type - resource type to delete
+ * @ start_index - starting index for resource
+ * @ rsrc_count - number of resource type to delete
+ */
+static enum i40iw_status_code vchnl_vf_send_del_hmc_objs_req(struct i40iw_sc_dev *dev,
+							     struct i40iw_virtchnl_req *vchnl_req,
+							     enum i40iw_hmc_rsrc_type rsrc_type,
+							     u32 start_index,
+							     u32 rsrc_count)
+{
+	enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
+	struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
+	struct i40iw_virtchnl_hmc_obj_range *add_hmc_obj;
+
+	if (!dev->vchnl_up)
+		return ret_code;
+
+	add_hmc_obj = (struct i40iw_virtchnl_hmc_obj_range *)vchnl_msg->iw_chnl_buf;
+	memset(vchnl_msg, 0, sizeof(*vchnl_msg));
+	memset(add_hmc_obj, 0, sizeof(*add_hmc_obj));
+	vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
+	vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg) + sizeof(struct i40iw_virtchnl_hmc_obj_range) - 1;
+	vchnl_msg->iw_op_code = I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE;
+	vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE_V0;
+	add_hmc_obj->obj_type = (u16)rsrc_type;
+	add_hmc_obj->start_index = start_index;
+	add_hmc_obj->obj_count = rsrc_count;
+	ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
+	if (ret_code)
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+	return ret_code;
+}
+
+/**
+ * vchnl_pf_send_get_ver_resp - Send channel version to VF
+ * @dev: IWARP device pointer
+ * @vf_id: Virtual function ID associated with the message
+ * @vchnl_msg: Virtual channel message buffer pointer
+ */
+static void vchnl_pf_send_get_ver_resp(struct i40iw_sc_dev *dev,
+				       u32 vf_id,
+				       struct i40iw_virtchnl_op_buf *vchnl_msg)
+{
+	enum i40iw_status_code ret_code;
+	u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(u32) - 1];
+	struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)resp_buffer;
+
+	memset(resp_buffer, 0, sizeof(*resp_buffer));
+	vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
+	vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
+	vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS;
+	*((u32 *)vchnl_msg_resp->iw_chnl_buf) = I40IW_VCHNL_CHNL_VER_V0;
+	ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
+	if (ret_code)
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+}
+
+/**
+ * vchnl_pf_send_get_hmc_fcn_resp - Send HMC Function to VF
+ * @dev: IWARP device pointer
+ * @vf_id: Virtual function ID associated with the message
+ * @vchnl_msg: Virtual channel message buffer pointer
+ */
+static void vchnl_pf_send_get_hmc_fcn_resp(struct i40iw_sc_dev *dev,
+					   u32 vf_id,
+					   struct i40iw_virtchnl_op_buf *vchnl_msg,
+					   u16 hmc_fcn)
+{
+	enum i40iw_status_code ret_code;
+	u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(u16) - 1];
+	struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)resp_buffer;
+
+	memset(resp_buffer, 0, sizeof(*resp_buffer));
+	vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
+	vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
+	vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS;
+	*((u16 *)vchnl_msg_resp->iw_chnl_buf) = hmc_fcn;
+	ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
+	if (ret_code)
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+}
+
+/**
+ * vchnl_pf_send_get_pe_stats_resp - Send PE Stats to VF
+ * @dev: IWARP device pointer
+ * @vf_id: Virtual function ID associated with the message
+ * @vchnl_msg: Virtual channel message buffer pointer
+ * @hw_stats: HW Stats struct
+ */
+
+static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev,
+					    u32 vf_id,
+					    struct i40iw_virtchnl_op_buf *vchnl_msg,
+					    struct i40iw_dev_hw_stats hw_stats)
+{
+	enum i40iw_status_code ret_code;
+	u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(struct i40iw_dev_hw_stats) - 1];
+	struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)resp_buffer;
+
+	memset(resp_buffer, 0, sizeof(*resp_buffer));
+	vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
+	vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
+	vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS;
+	*((struct i40iw_dev_hw_stats *)vchnl_msg_resp->iw_chnl_buf) = hw_stats;
+	ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
+	if (ret_code)
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+}
+
+/**
+ * vchnl_pf_send_error_resp - Send an error response to VF
+ * @dev: IWARP device pointer
+ * @vf_id: Virtual function ID associated with the message
+ * @vchnl_msg: Virtual channel message buffer pointer
+ */
+static void vchnl_pf_send_error_resp(struct i40iw_sc_dev *dev, u32 vf_id,
+				     struct i40iw_virtchnl_op_buf *vchnl_msg,
+				     u16 op_ret_code)
+{
+	enum i40iw_status_code ret_code;
+	u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf)];
+	struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)resp_buffer;
+
+	memset(resp_buffer, 0, sizeof(resp_buffer));
+	vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
+	vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
+	vchnl_msg_resp->iw_op_ret_code = (u16)op_ret_code;
+	ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
+	if (ret_code)
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+}
+
+/**
+ * pf_cqp_get_hmc_fcn_callback - Callback for Get HMC Fcn
+ * @cqp_req_param: CQP Request param value
+ * @not_used: unused CQP callback parameter
+ */
+static void pf_cqp_get_hmc_fcn_callback(struct i40iw_sc_dev *dev, void *callback_param,
+					struct i40iw_ccq_cqe_info *cqe_info)
+{
+	struct i40iw_vfdev *vf_dev = callback_param;
+	struct i40iw_virt_mem vf_dev_mem;
+
+	if (cqe_info->error) {
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "CQP Completion Error on Get HMC Function.  Maj = 0x%04x, Minor = 0x%04x\n",
+			    cqe_info->maj_err_code, cqe_info->min_err_code);
+		dev->vf_dev[vf_dev->iw_vf_idx] = NULL;
+		vchnl_pf_send_error_resp(dev, vf_dev->vf_id, &vf_dev->vf_msg_buffer.vchnl_msg,
+					 (u16)I40IW_ERR_CQP_COMPL_ERROR);
+		vf_dev_mem.va = vf_dev;
+		vf_dev_mem.size = sizeof(*vf_dev);
+		i40iw_free_virt_mem(dev->hw, &vf_dev_mem);
+	} else {
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "CQP Completion Operation Return information = 0x%08x\n",
+			    cqe_info->op_ret_val);
+		vf_dev->pmf_index = (u16)cqe_info->op_ret_val;
+		vf_dev->msg_count--;
+		vchnl_pf_send_get_hmc_fcn_resp(dev,
+					       vf_dev->vf_id,
+					       &vf_dev->vf_msg_buffer.vchnl_msg,
+					       vf_dev->pmf_index);
+	}
+}
+
+/**
+ * pf_add_hmc_obj - Callback for Add HMC Object
+ * @vf_dev: pointer to the VF Device
+ */
+static void pf_add_hmc_obj_callback(void *work_vf_dev)
+{
+	struct i40iw_vfdev *vf_dev = (struct i40iw_vfdev *)work_vf_dev;
+	struct i40iw_hmc_info *hmc_info = &vf_dev->hmc_info;
+	struct i40iw_virtchnl_op_buf *vchnl_msg = &vf_dev->vf_msg_buffer.vchnl_msg;
+	struct i40iw_hmc_create_obj_info info;
+	struct i40iw_virtchnl_hmc_obj_range *add_hmc_obj;
+	enum i40iw_status_code ret_code;
+
+	if (!vf_dev->pf_hmc_initialized) {
+		ret_code = i40iw_pf_init_vfhmc(vf_dev->pf_dev, (u8)vf_dev->pmf_index, NULL);
+		if (ret_code)
+			goto add_out;
+		vf_dev->pf_hmc_initialized = true;
+	}
+
+	add_hmc_obj = (struct i40iw_virtchnl_hmc_obj_range *)vchnl_msg->iw_chnl_buf;
+
+	memset(&info, 0, sizeof(info));
+	info.hmc_info = hmc_info;
+	info.is_pf = false;
+	info.rsrc_type = (u32)add_hmc_obj->obj_type;
+	info.entry_type = (info.rsrc_type == I40IW_HMC_IW_PBLE) ? I40IW_SD_TYPE_PAGED : I40IW_SD_TYPE_DIRECT;
+	info.start_idx = add_hmc_obj->start_index;
+	info.count = add_hmc_obj->obj_count;
+	i40iw_debug(vf_dev->pf_dev, I40IW_DEBUG_VIRT,
+		    "I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE.  Add %u type %u objects\n",
+		    info.count, info.rsrc_type);
+	ret_code = i40iw_sc_create_hmc_obj(vf_dev->pf_dev, &info);
+	if (!ret_code)
+		vf_dev->hmc_info.hmc_obj[add_hmc_obj->obj_type].cnt = add_hmc_obj->obj_count;
+add_out:
+	vf_dev->msg_count--;
+	vchnl_pf_send_error_resp(vf_dev->pf_dev, vf_dev->vf_id, vchnl_msg, (u16)ret_code);
+}
+
+/**
+ * pf_del_hmc_obj_callback - Callback for delete HMC Object
+ * @work_vf_dev: pointer to the VF Device
+ */
+static void pf_del_hmc_obj_callback(void *work_vf_dev)
+{
+	struct i40iw_vfdev *vf_dev = (struct i40iw_vfdev *)work_vf_dev;
+	struct i40iw_hmc_info *hmc_info = &vf_dev->hmc_info;
+	struct i40iw_virtchnl_op_buf *vchnl_msg = &vf_dev->vf_msg_buffer.vchnl_msg;
+	struct i40iw_hmc_del_obj_info info;
+	struct i40iw_virtchnl_hmc_obj_range *del_hmc_obj;
+	enum i40iw_status_code ret_code = I40IW_SUCCESS;
+
+	if (!vf_dev->pf_hmc_initialized)
+		goto del_out;
+
+	del_hmc_obj = (struct i40iw_virtchnl_hmc_obj_range *)vchnl_msg->iw_chnl_buf;
+
+	memset(&info, 0, sizeof(info));
+	info.hmc_info = hmc_info;
+	info.is_pf = false;
+	info.rsrc_type = (u32)del_hmc_obj->obj_type;
+	info.start_idx = del_hmc_obj->start_index;
+	info.count = del_hmc_obj->obj_count;
+	i40iw_debug(vf_dev->pf_dev, I40IW_DEBUG_VIRT,
+		    "I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE.  Delete %u type %u objects\n",
+		    info.count, info.rsrc_type);
+	ret_code = i40iw_sc_del_hmc_obj(vf_dev->pf_dev, &info, false);
+del_out:
+	vf_dev->msg_count--;
+	vchnl_pf_send_error_resp(vf_dev->pf_dev, vf_dev->vf_id, vchnl_msg, (u16)ret_code);
+}
+
+/**
+ * i40iw_vchnl_recv_pf - Receive PF virtual channel messages
+ * @dev: IWARP device pointer
+ * @vf_id: Virtual function ID associated with the message
+ * @msg: Virtual channel message buffer pointer
+ * @len: Length of the virtual channels message
+ */
+enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev,
+					   u32 vf_id,
+					   u8 *msg,
+					   u16 len)
+{
+	struct i40iw_virtchnl_op_buf *vchnl_msg = (struct i40iw_virtchnl_op_buf *)msg;
+	struct i40iw_vfdev *vf_dev = NULL;
+	struct i40iw_hmc_fcn_info hmc_fcn_info;
+	u16 iw_vf_idx;
+	u16 first_avail_iw_vf = I40IW_MAX_PE_ENABLED_VF_COUNT;
+	struct i40iw_virt_mem vf_dev_mem;
+	struct i40iw_virtchnl_work_info work_info;
+	struct i40iw_dev_pestat *devstat;
+	enum i40iw_status_code ret_code;
+	unsigned long flags;
+
+	if (!dev || !msg || !len)
+		return I40IW_ERR_PARAM;
+
+	if (!dev->vchnl_up)
+		return I40IW_ERR_NOT_READY;
+	if (vchnl_msg->iw_op_code == I40IW_VCHNL_OP_GET_VER) {
+		if (vchnl_msg->iw_op_ver != I40IW_VCHNL_OP_GET_VER_V0)
+			vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
+		else
+			vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
+		return I40IW_SUCCESS;
+	}
+	for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT;
+	     iw_vf_idx++) {
+		if (!dev->vf_dev[iw_vf_idx]) {
+			if (first_avail_iw_vf ==
+			    I40IW_MAX_PE_ENABLED_VF_COUNT)
+				first_avail_iw_vf = iw_vf_idx;
+			continue;
+		}
+		if (dev->vf_dev[iw_vf_idx]->vf_id == vf_id) {
+			vf_dev = dev->vf_dev[iw_vf_idx];
+			break;
+		}
+	}
+	if (vf_dev) {
+		if (!vf_dev->msg_count) {
+			vf_dev->msg_count++;
+		} else {
+			i40iw_debug(dev, I40IW_DEBUG_VIRT,
+				    "VF%u already has a channel message in progress.\n",
+				    vf_id);
+			return I40IW_SUCCESS;
+		}
+	}
+	switch (vchnl_msg->iw_op_code) {
+	case I40IW_VCHNL_OP_GET_HMC_FCN:
+		if (!vf_dev &&
+		    (first_avail_iw_vf != I40IW_MAX_PE_ENABLED_VF_COUNT)) {
+			ret_code = i40iw_allocate_virt_mem(dev->hw, &vf_dev_mem, sizeof(struct i40iw_vfdev) +
+							   (sizeof(struct i40iw_hmc_obj_info) * I40IW_HMC_IW_MAX));
+			if (!ret_code) {
+				vf_dev = vf_dev_mem.va;
+				vf_dev->stats_initialized = false;
+				vf_dev->pf_dev = dev;
+				vf_dev->msg_count = 1;
+				vf_dev->vf_id = vf_id;
+				vf_dev->iw_vf_idx = first_avail_iw_vf;
+				vf_dev->pf_hmc_initialized = false;
+				vf_dev->hmc_info.hmc_obj = (struct i40iw_hmc_obj_info *)(&vf_dev[1]);
+				i40iw_debug(dev, I40IW_DEBUG_VIRT,
+					    "vf_dev %p, hmc_info %p, hmc_obj %p\n",
+					    vf_dev, &vf_dev->hmc_info, vf_dev->hmc_info.hmc_obj);
+				dev->vf_dev[first_avail_iw_vf] = vf_dev;
+				iw_vf_idx = first_avail_iw_vf;
+			} else {
+				i40iw_debug(dev, I40IW_DEBUG_VIRT,
+					    "VF%u Unable to allocate a VF device structure.\n",
+					    vf_id);
+				vchnl_pf_send_error_resp(dev, vf_id, vchnl_msg, (u16)I40IW_ERR_NO_MEMORY);
+				return I40IW_SUCCESS;
+			}
+			memcpy(&vf_dev->vf_msg_buffer.vchnl_msg, vchnl_msg, len);
+			hmc_fcn_info.callback_fcn = pf_cqp_get_hmc_fcn_callback;
+			hmc_fcn_info.vf_id = vf_id;
+			hmc_fcn_info.iw_vf_idx = vf_dev->iw_vf_idx;
+			hmc_fcn_info.cqp_callback_param = vf_dev;
+			hmc_fcn_info.free_fcn = false;
+			ret_code = i40iw_cqp_manage_hmc_fcn_cmd(dev, &hmc_fcn_info);
+			if (ret_code)
+				i40iw_debug(dev, I40IW_DEBUG_VIRT,
+					    "VF%u error CQP HMC Function operation.\n",
+					    vf_id);
+			ret_code = i40iw_device_init_pestat(&vf_dev->dev_pestat);
+			if (ret_code)
+				i40iw_debug(dev, I40IW_DEBUG_VIRT,
+					    "VF%u - i40iw_device_init_pestat failed\n",
+					    vf_id);
+			vf_dev->dev_pestat.ops.iw_hw_stat_init(&vf_dev->dev_pestat,
+							      (u8)vf_dev->pmf_index,
+							      dev->hw, false);
+			vf_dev->stats_initialized = true;
+		} else {
+			if (vf_dev) {
+				vf_dev->msg_count--;
+				vchnl_pf_send_get_hmc_fcn_resp(dev, vf_id, vchnl_msg, vf_dev->pmf_index);
+			} else {
+				vchnl_pf_send_error_resp(dev, vf_id, vchnl_msg,
+							 (u16)I40IW_ERR_NO_MEMORY);
+			}
+		}
+		break;
+	case I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE:
+		if (!vf_dev)
+			return I40IW_ERR_BAD_PTR;
+		work_info.worker_vf_dev = vf_dev;
+		work_info.callback_fcn = pf_add_hmc_obj_callback;
+		memcpy(&vf_dev->vf_msg_buffer.vchnl_msg, vchnl_msg, len);
+		i40iw_cqp_spawn_worker(dev, &work_info, vf_dev->iw_vf_idx);
+		break;
+	case I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE:
+		if (!vf_dev)
+			return I40IW_ERR_BAD_PTR;
+		work_info.worker_vf_dev = vf_dev;
+		work_info.callback_fcn = pf_del_hmc_obj_callback;
+		memcpy(&vf_dev->vf_msg_buffer.vchnl_msg, vchnl_msg, len);
+		i40iw_cqp_spawn_worker(dev, &work_info, vf_dev->iw_vf_idx);
+		break;
+	case I40IW_VCHNL_OP_GET_STATS:
+		if (!vf_dev)
+			return I40IW_ERR_BAD_PTR;
+		devstat = &vf_dev->dev_pestat;
+		spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags);
+		devstat->ops.iw_hw_stat_read_all(devstat, &devstat->hw_stats);
+		spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags);
+		vf_dev->msg_count--;
+		vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, devstat->hw_stats);
+		break;
+	default:
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "40iw_vchnl_recv_pf: Invalid OpCode 0x%x\n",
+			    vchnl_msg->iw_op_code);
+		vchnl_pf_send_error_resp(dev, vf_id,
+					 vchnl_msg, (u16)I40IW_ERR_NOT_IMPLEMENTED);
+	}
+	return I40IW_SUCCESS;
+}
+
+/**
+ * i40iw_vchnl_recv_vf - Receive VF virtual channel messages
+ * @dev: IWARP device pointer
+ * @vf_id: Virtual function ID associated with the message
+ * @msg: Virtual channel message buffer pointer
+ * @len: Length of the virtual channels message
+ */
+enum i40iw_status_code i40iw_vchnl_recv_vf(struct i40iw_sc_dev *dev,
+					   u32 vf_id,
+					   u8 *msg,
+					   u16 len)
+{
+	struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)msg;
+	struct i40iw_virtchnl_req *vchnl_req;
+
+	vchnl_req = (struct i40iw_virtchnl_req *)(uintptr_t)vchnl_msg_resp->iw_chnl_op_ctx;
+	vchnl_req->ret_code = (enum i40iw_status_code)vchnl_msg_resp->iw_op_ret_code;
+	if (len == (sizeof(*vchnl_msg_resp) + vchnl_req->parm_len - 1)) {
+		if (vchnl_req->parm_len && vchnl_req->parm)
+			memcpy(vchnl_req->parm, vchnl_msg_resp->iw_chnl_buf, vchnl_req->parm_len);
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: Got response, data size %u\n", __func__,
+			    vchnl_req->parm_len);
+	} else {
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: error length on response, Got %u, expected %u\n", __func__,
+			    len, (u32)(sizeof(*vchnl_msg_resp) + vchnl_req->parm_len - 1));
+	}
+
+	return I40IW_SUCCESS;
+}
+
+/**
+ * i40iw_vchnl_vf_get_ver - Request Channel version
+ * @dev: IWARP device pointer
+ * @vchnl_ver: Virtual channel message version pointer
+ */
+enum i40iw_status_code i40iw_vchnl_vf_get_ver(struct i40iw_sc_dev *dev,
+					      u32 *vchnl_ver)
+{
+	struct i40iw_virtchnl_req vchnl_req;
+	enum i40iw_status_code ret_code;
+
+	memset(&vchnl_req, 0, sizeof(vchnl_req));
+	vchnl_req.dev = dev;
+	vchnl_req.parm = vchnl_ver;
+	vchnl_req.parm_len = sizeof(*vchnl_ver);
+	vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+	ret_code = vchnl_vf_send_get_ver_req(dev, &vchnl_req);
+	if (!ret_code) {
+		ret_code = i40iw_vf_wait_vchnl_resp(dev);
+		if (!ret_code)
+			ret_code = vchnl_req.ret_code;
+		else
+			dev->vchnl_up = false;
+	} else {
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s Send message failed 0x%0x\n", __func__, ret_code);
+	}
+	return ret_code;
+}
+
+/**
+ * i40iw_vchnl_vf_get_hmc_fcn - Request HMC Function
+ * @dev: IWARP device pointer
+ * @hmc_fcn: HMC function index pointer
+ */
+enum i40iw_status_code i40iw_vchnl_vf_get_hmc_fcn(struct i40iw_sc_dev *dev,
+						  u16 *hmc_fcn)
+{
+	struct i40iw_virtchnl_req vchnl_req;
+	enum i40iw_status_code ret_code;
+
+	memset(&vchnl_req, 0, sizeof(vchnl_req));
+	vchnl_req.dev = dev;
+	vchnl_req.parm = hmc_fcn;
+	vchnl_req.parm_len = sizeof(*hmc_fcn);
+	vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+	ret_code = vchnl_vf_send_get_hmc_fcn_req(dev, &vchnl_req);
+	if (!ret_code) {
+		ret_code = i40iw_vf_wait_vchnl_resp(dev);
+		if (!ret_code)
+			ret_code = vchnl_req.ret_code;
+		else
+			dev->vchnl_up = false;
+	} else {
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s Send message failed 0x%0x\n", __func__, ret_code);
+	}
+	return ret_code;
+}
+
+/**
+ * i40iw_vchnl_vf_add_hmc_objs - Add HMC Object
+ * @dev: IWARP device pointer
+ * @rsrc_type: HMC Resource type
+ * @start_index: Starting index of the objects to be added
+ * @rsrc_count: Number of resources to be added
+ */
+enum i40iw_status_code i40iw_vchnl_vf_add_hmc_objs(struct i40iw_sc_dev *dev,
+						   enum i40iw_hmc_rsrc_type rsrc_type,
+						   u32 start_index,
+						   u32 rsrc_count)
+{
+	struct i40iw_virtchnl_req vchnl_req;
+	enum i40iw_status_code ret_code;
+
+	memset(&vchnl_req, 0, sizeof(vchnl_req));
+	vchnl_req.dev = dev;
+	vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+	ret_code = vchnl_vf_send_add_hmc_objs_req(dev,
+						  &vchnl_req,
+						  rsrc_type,
+						  start_index,
+						  rsrc_count);
+	if (!ret_code) {
+		ret_code = i40iw_vf_wait_vchnl_resp(dev);
+		if (!ret_code)
+			ret_code = vchnl_req.ret_code;
+		else
+			dev->vchnl_up = false;
+	} else {
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s Send message failed 0x%0x\n", __func__, ret_code);
+	}
+	return ret_code;
+}
+
+/**
+ * i40iw_vchnl_vf_del_hmc_obj - del HMC obj
+ * @dev: IWARP device pointer
+ * @rsrc_type: HMC Resource type
+ * @start_index: Starting index of the object to delete
+ * @rsrc_count: Number of resources to be delete
+ */
+enum i40iw_status_code i40iw_vchnl_vf_del_hmc_obj(struct i40iw_sc_dev *dev,
+						  enum i40iw_hmc_rsrc_type rsrc_type,
+						  u32 start_index,
+						  u32 rsrc_count)
+{
+	struct i40iw_virtchnl_req vchnl_req;
+	enum i40iw_status_code ret_code;
+
+	memset(&vchnl_req, 0, sizeof(vchnl_req));
+	vchnl_req.dev = dev;
+	vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+	ret_code = vchnl_vf_send_del_hmc_objs_req(dev,
+						  &vchnl_req,
+						  rsrc_type,
+						  start_index,
+						  rsrc_count);
+	if (!ret_code) {
+		ret_code = i40iw_vf_wait_vchnl_resp(dev);
+		if (!ret_code)
+			ret_code = vchnl_req.ret_code;
+		else
+			dev->vchnl_up = false;
+	} else {
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s Send message failed 0x%0x\n", __func__, ret_code);
+	}
+	return ret_code;
+}
+
+/**
+ * i40iw_vchnl_vf_get_pe_stats - Get PE stats
+ * @dev: IWARP device pointer
+ * @hw_stats: HW stats struct
+ */
+enum i40iw_status_code i40iw_vchnl_vf_get_pe_stats(struct i40iw_sc_dev *dev,
+						   struct i40iw_dev_hw_stats *hw_stats)
+{
+	struct i40iw_virtchnl_req  vchnl_req;
+	enum i40iw_status_code ret_code;
+
+	memset(&vchnl_req, 0, sizeof(vchnl_req));
+	vchnl_req.dev = dev;
+	vchnl_req.parm = hw_stats;
+	vchnl_req.parm_len = sizeof(*hw_stats);
+	vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+	ret_code = vchnl_vf_send_get_pe_stats_req(dev, &vchnl_req);
+	if (!ret_code) {
+		ret_code = i40iw_vf_wait_vchnl_resp(dev);
+		if (!ret_code)
+			ret_code = vchnl_req.ret_code;
+		else
+			dev->vchnl_up = false;
+	} else {
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s Send message failed 0x%0x\n", __func__, ret_code);
+	}
+	return ret_code;
+}

diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.h b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.h
new file mode 100644
index 0000000..24886ef
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.h

@@ -0,0 +1,124 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_VIRTCHNL_H
+#define I40IW_VIRTCHNL_H
+
+#include "i40iw_hmc.h"
+
+#pragma pack(push, 1)
+
+struct i40iw_virtchnl_op_buf {
+	u16 iw_op_code;
+	u16 iw_op_ver;
+	u16 iw_chnl_buf_len;
+	u16 rsvd;
+	u64 iw_chnl_op_ctx;
+	/* Member alignment MUST be maintained above this location */
+	u8 iw_chnl_buf[1];
+};
+
+struct i40iw_virtchnl_resp_buf {
+	u64 iw_chnl_op_ctx;
+	u16 iw_chnl_buf_len;
+	s16 iw_op_ret_code;
+	/* Member alignment MUST be maintained above this location */
+	u16 rsvd[2];
+	u8 iw_chnl_buf[1];
+};
+
+enum i40iw_virtchnl_ops {
+	I40IW_VCHNL_OP_GET_VER = 0,
+	I40IW_VCHNL_OP_GET_HMC_FCN,
+	I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE,
+	I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE,
+	I40IW_VCHNL_OP_GET_STATS
+};
+
+#define I40IW_VCHNL_OP_GET_VER_V0 0
+#define I40IW_VCHNL_OP_GET_HMC_FCN_V0 0
+#define I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE_V0 0
+#define I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE_V0 0
+#define I40IW_VCHNL_OP_GET_STATS_V0 0
+#define I40IW_VCHNL_CHNL_VER_V0 0
+
+struct i40iw_dev_hw_stats;
+
+struct i40iw_virtchnl_hmc_obj_range {
+	u16 obj_type;
+	u16 rsvd;
+	u32 start_index;
+	u32 obj_count;
+};
+
+enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev,
+					   u32 vf_id,
+					   u8 *msg,
+					   u16 len);
+
+enum i40iw_status_code i40iw_vchnl_recv_vf(struct i40iw_sc_dev *dev,
+					   u32 vf_id,
+					   u8 *msg,
+					   u16 len);
+
+struct i40iw_virtchnl_req {
+	struct i40iw_sc_dev *dev;
+	struct i40iw_virtchnl_op_buf *vchnl_msg;
+	void *parm;
+	u32 vf_id;
+	u16 parm_len;
+	s16 ret_code;
+};
+
+#pragma pack(pop)
+
+enum i40iw_status_code i40iw_vchnl_vf_get_ver(struct i40iw_sc_dev *dev,
+					      u32 *vchnl_ver);
+
+enum i40iw_status_code i40iw_vchnl_vf_get_hmc_fcn(struct i40iw_sc_dev *dev,
+						  u16 *hmc_fcn);
+
+enum i40iw_status_code i40iw_vchnl_vf_add_hmc_objs(struct i40iw_sc_dev *dev,
+						   enum i40iw_hmc_rsrc_type rsrc_type,
+						   u32 start_index,
+						   u32 rsrc_count);
+
+enum i40iw_status_code i40iw_vchnl_vf_del_hmc_obj(struct i40iw_sc_dev *dev,
+						  enum i40iw_hmc_rsrc_type rsrc_type,
+						  u32 start_index,
+						  u32 rsrc_count);
+
+enum i40iw_status_code i40iw_vchnl_vf_get_pe_stats(struct i40iw_sc_dev *dev,
+						   struct i40iw_dev_hw_stats *hw_stats);
+#endif

diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 4e85188..7493a83 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile

@@ -1,4 +1,4 @@
 obj-$(CONFIG_MLX5_INFINIBAND)	+= mlx5_ib.o
 
-mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o
+mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o

diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c
new file mode 100644
index 0000000..c1b9de8
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ib_virt.c

@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/mlx5/vport.h>
+#include "mlx5_ib.h"
+
+static inline u32 mlx_to_net_policy(enum port_state_policy mlx_policy)
+{
+	switch (mlx_policy) {
+	case MLX5_POLICY_DOWN:
+		return IFLA_VF_LINK_STATE_DISABLE;
+	case MLX5_POLICY_UP:
+		return IFLA_VF_LINK_STATE_ENABLE;
+	case MLX5_POLICY_FOLLOW:
+		return IFLA_VF_LINK_STATE_AUTO;
+	default:
+		return __IFLA_VF_LINK_STATE_MAX;
+	}
+}
+
+int mlx5_ib_get_vf_config(struct ib_device *device, int vf, u8 port,
+			  struct ifla_vf_info *info)
+{
+	struct mlx5_ib_dev *dev = to_mdev(device);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	struct mlx5_hca_vport_context *rep;
+	int err;
+
+	rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+	if (!rep)
+		return -ENOMEM;
+
+	err = mlx5_query_hca_vport_context(mdev, 1, 1,  vf + 1, rep);
+	if (err) {
+		mlx5_ib_warn(dev, "failed to query port policy for vf %d (%d)\n",
+			     vf, err);
+		goto free;
+	}
+	memset(info, 0, sizeof(*info));
+	info->linkstate = mlx_to_net_policy(rep->policy);
+	if (info->linkstate == __IFLA_VF_LINK_STATE_MAX)
+		err = -EINVAL;
+
+free:
+	kfree(rep);
+	return err;
+}
+
+static inline enum port_state_policy net_to_mlx_policy(int policy)
+{
+	switch (policy) {
+	case IFLA_VF_LINK_STATE_DISABLE:
+		return MLX5_POLICY_DOWN;
+	case IFLA_VF_LINK_STATE_ENABLE:
+		return MLX5_POLICY_UP;
+	case IFLA_VF_LINK_STATE_AUTO:
+		return MLX5_POLICY_FOLLOW;
+	default:
+		return MLX5_POLICY_INVALID;
+	}
+}
+
+int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
+			      u8 port, int state)
+{
+	struct mlx5_ib_dev *dev = to_mdev(device);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	struct mlx5_hca_vport_context *in;
+	int err;
+
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	in->policy = net_to_mlx_policy(state);
+	if (in->policy == MLX5_POLICY_INVALID) {
+		err = -EINVAL;
+		goto out;
+	}
+	in->field_select = MLX5_HCA_VPORT_SEL_STATE_POLICY;
+	err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+
+out:
+	kfree(in);
+	return err;
+}
+
+int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
+			 u8 port, struct ifla_vf_stats *stats)
+{
+	int out_sz = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+	struct mlx5_core_dev *mdev;
+	struct mlx5_ib_dev *dev;
+	void *out;
+	int err;
+
+	dev = to_mdev(device);
+	mdev = dev->mdev;
+
+	out = kzalloc(out_sz, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	err = mlx5_core_query_vport_counter(mdev, true, vf, port, out, out_sz);
+	if (err)
+		goto ex;
+
+	stats->rx_packets = MLX5_GET64_PR(query_vport_counter_out, out, received_ib_unicast.packets);
+	stats->tx_packets = MLX5_GET64_PR(query_vport_counter_out, out, transmitted_ib_unicast.packets);
+	stats->rx_bytes = MLX5_GET64_PR(query_vport_counter_out, out, received_ib_unicast.octets);
+	stats->tx_bytes = MLX5_GET64_PR(query_vport_counter_out, out, transmitted_ib_unicast.octets);
+	stats->multicast = MLX5_GET64_PR(query_vport_counter_out, out, received_ib_multicast.packets);
+
+ex:
+	kfree(out);
+	return err;
+}
+
+static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
+{
+	struct mlx5_ib_dev *dev = to_mdev(device);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	struct mlx5_hca_vport_context *in;
+	int err;
+
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID;
+	in->node_guid = guid;
+	err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+	kfree(in);
+	return err;
+}
+
+static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
+{
+	struct mlx5_ib_dev *dev = to_mdev(device);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	struct mlx5_hca_vport_context *in;
+	int err;
+
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID;
+	in->port_guid = guid;
+	err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+	kfree(in);
+	return err;
+}
+
+int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
+			u64 guid, int type)
+{
+	if (type == IFLA_VF_IB_NODE_GUID)
+		return set_vf_node_guid(device, vf, port, guid);
+	else if (type == IFLA_VF_IB_PORT_GUID)
+		return set_vf_port_guid(device, vf, port, guid);
+
+	return -EINVAL;
+}

diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 41d8a00..1534af1 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c

@@ -208,7 +208,7 @@
 		if (!out_cnt)
 			return IB_MAD_RESULT_FAILURE;
 
-		err = mlx5_core_query_vport_counter(dev->mdev, 0,
+		err = mlx5_core_query_vport_counter(dev->mdev, 0, 0,
 						    port_num, out_cnt, sz);
 		if (!err)
 			pma_cnt_ext_assign(pma_cnt_ext, out_cnt);

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index edd8b87..5acf346 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c

@@ -284,7 +284,7 @@
 
 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
 {
-	return !dev->mdev->issi;
+	return !MLX5_CAP_GEN(dev->mdev, ib_virt);
 }
 
 enum {
@@ -563,6 +563,9 @@
 	if (MLX5_CAP_GEN(mdev, cd))
 		props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
 
+	if (!mlx5_core_is_pf(mdev))
+		props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
+
 	return 0;
 }
 
@@ -700,6 +703,7 @@
 	props->qkey_viol_cntr	= rep->qkey_violation_counter;
 	props->subnet_timeout	= rep->subnet_timeout;
 	props->init_type_reply	= rep->init_type_reply;
+	props->grh_required	= rep->grh_required;
 
 	err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
 	if (err)
@@ -2350,6 +2354,12 @@
 	dev->ib_dev.map_mr_sg		= mlx5_ib_map_mr_sg;
 	dev->ib_dev.check_mr_status	= mlx5_ib_check_mr_status;
 	dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
+	if (mlx5_core_is_pf(mdev)) {
+		dev->ib_dev.get_vf_config	= mlx5_ib_get_vf_config;
+		dev->ib_dev.set_vf_link_state	= mlx5_ib_set_vf_link_state;
+		dev->ib_dev.get_vf_stats	= mlx5_ib_get_vf_stats;
+		dev->ib_dev.set_vf_guid		= mlx5_ib_set_vf_guid;
+	}
 
 	mlx5_ib_internal_fill_odp_caps(dev);
 

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 76b2b42..b46c255 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h

@@ -776,7 +776,6 @@
 void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp);
 void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
 			      unsigned long end);
-
 #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
 {
@@ -793,6 +792,15 @@
 
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
+int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
+			  u8 port, struct ifla_vf_info *info);
+int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
+			      u8 port, int state);
+int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
+			 u8 port, struct ifla_vf_stats *stats);
+int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
+			u64 guid, int type);
+
 __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
 			       int index);
 

diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig
index 495be09..e0fdb92 100644
--- a/drivers/infiniband/hw/qib/Kconfig
+++ b/drivers/infiniband/hw/qib/Kconfig

@@ -1,6 +1,6 @@
 config INFINIBAND_QIB
 	tristate "Intel PCIe HCA support"
-	depends on 64BIT
+	depends on 64BIT && INFINIBAND_RDMAVT
 	---help---
 	This is a low-level driver for Intel PCIe QLE InfiniBand host
 	channel adapters.  This driver does not support the Intel

diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile
index 57f8103..79ebd79 100644
--- a/drivers/infiniband/hw/qib/Makefile
+++ b/drivers/infiniband/hw/qib/Makefile

@@ -1,11 +1,11 @@
 obj-$(CONFIG_INFINIBAND_QIB) += ib_qib.o
 
-ib_qib-y := qib_cq.o qib_diag.o qib_dma.o qib_driver.o qib_eeprom.o \
-	qib_file_ops.o qib_fs.o qib_init.o qib_intr.o qib_keys.o \
-	qib_mad.o qib_mmap.o qib_mr.o qib_pcie.o qib_pio_copy.o \
-	qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o qib_srq.o \
+ib_qib-y := qib_diag.o qib_driver.o qib_eeprom.o \
+	qib_file_ops.o qib_fs.o qib_init.o qib_intr.o \
+	qib_mad.o qib_pcie.o qib_pio_copy.o \
+	qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o \
 	qib_sysfs.o qib_twsi.o qib_tx.o qib_uc.o qib_ud.o \
-	qib_user_pages.o qib_user_sdma.o qib_verbs_mcast.o qib_iba7220.o \
+	qib_user_pages.o qib_user_sdma.o qib_iba7220.o \
 	qib_sd7220.o qib_iba7322.o qib_verbs.o
 
 # 6120 has no fallback if no MSI interrupts, others can do INTx

diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 7df16f7..bbf0a16 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h

@@ -52,6 +52,7 @@
 #include <linux/kref.h>
 #include <linux/sched.h>
 #include <linux/kthread.h>
+#include <rdma/rdma_vt.h>
 
 #include "qib_common.h"
 #include "qib_verbs.h"
@@ -229,9 +230,6 @@
 	u8 redirect_seq_cnt;
 	/* ctxt rcvhdrq head offset */
 	u32 head;
-	/* lookaside fields */
-	struct qib_qp *lookaside_qp;
-	u32 lookaside_qpn;
 	/* QPs waiting for context processing */
 	struct list_head qp_wait_list;
 #ifdef CONFIG_DEBUG_FS
@@ -240,7 +238,7 @@
 #endif
 };
 
-struct qib_sge_state;
+struct rvt_sge_state;
 
 struct qib_sdma_txreq {
 	int                 flags;
@@ -258,14 +256,14 @@
 
 struct qib_verbs_txreq {
 	struct qib_sdma_txreq   txreq;
-	struct qib_qp           *qp;
-	struct qib_swqe         *wqe;
+	struct rvt_qp           *qp;
+	struct rvt_swqe         *wqe;
 	u32                     dwords;
 	u16                     hdr_dwords;
 	u16                     hdr_inx;
 	struct qib_pio_header	*align_buf;
-	struct qib_mregion	*mr;
-	struct qib_sge_state    *ss;
+	struct rvt_mregion	*mr;
+	struct rvt_sge_state    *ss;
 };
 
 #define QIB_SDMA_TXREQ_F_USELARGEBUF  0x1
@@ -1096,8 +1094,6 @@
 	u16 psxmitwait_check_rate;
 	/* high volume overflow errors defered to tasklet */
 	struct tasklet_struct error_tasklet;
-	/* per device cq worker */
-	struct kthread_worker *worker;
 
 	int assigned_node_id; /* NUMA node closest to HCA */
 };
@@ -1135,8 +1131,9 @@
 extern struct qib_devdata *qib_lookup(int unit);
 extern u32 qib_cpulist_count;
 extern unsigned long *qib_cpulist;
-
+extern u16 qpt_mask;
 extern unsigned qib_cc_table_size;
+
 int qib_init(struct qib_devdata *, int);
 int init_chip_wc_pat(struct qib_devdata *dd, u32);
 int qib_enable_wc(struct qib_devdata *dd);
@@ -1323,7 +1320,7 @@
 void qib_sdma_intr(struct qib_pportdata *);
 void qib_user_sdma_send_desc(struct qib_pportdata *dd,
 			struct list_head *pktlist);
-int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *,
+int qib_sdma_verbs_send(struct qib_pportdata *, struct rvt_sge_state *,
 			u32, struct qib_verbs_txreq *);
 /* ppd->sdma_lock should be locked before calling this. */
 int qib_sdma_make_progress(struct qib_pportdata *dd);
@@ -1454,6 +1451,8 @@
 dma_addr_t qib_map_page(struct pci_dev *, struct page *, unsigned long,
 			  size_t, int);
 const char *qib_get_unit_name(int unit);
+const char *qib_get_card_name(struct rvt_dev_info *rdi);
+struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi);
 
 /*
  * Flush write combining store buffers (if present) and perform a write
@@ -1540,4 +1539,14 @@
 void qib_format_hwerrors(u64 hwerrs,
 			 const struct qib_hwerror_msgs *hwerrmsgs,
 			 size_t nhwerrmsgs, char *msg, size_t lmsg);
+
+void qib_stop_send_queue(struct rvt_qp *qp);
+void qib_quiesce_qp(struct rvt_qp *qp);
+void qib_flush_qp_waiters(struct rvt_qp *qp);
+int qib_mtu_to_path_mtu(u32 mtu);
+u32 qib_mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu);
+void qib_notify_error_qp(struct rvt_qp *qp);
+int qib_get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+			   struct ib_qp_attr *attr);
+
 #endif                          /* _QIB_KERNEL_H */

diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h
index 4fb78ab..1d6e63e 100644
--- a/drivers/infiniband/hw/qib/qib_common.h
+++ b/drivers/infiniband/hw/qib/qib_common.h

@@ -742,14 +742,11 @@
 #define SIZE_OF_CRC 1
 
 #define QIB_DEFAULT_P_KEY 0xFFFF
-#define QIB_PERMISSIVE_LID 0xFFFF
 #define QIB_AETH_CREDIT_SHIFT 24
 #define QIB_AETH_CREDIT_MASK 0x1F
 #define QIB_AETH_CREDIT_INVAL 0x1F
 #define QIB_PSN_MASK 0xFFFFFF
 #define QIB_MSN_MASK 0xFFFFFF
-#define QIB_QPN_MASK 0xFFFFFF
-#define QIB_MULTICAST_LID_BASE 0xC000
 #define QIB_EAGER_TID_ID QLOGIC_IB_I_TID_MASK
 #define QIB_MULTICAST_QPN 0xFFFFFF
 

diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c
deleted file mode 100644
index 2b45d0b..0000000
--- a/drivers/infiniband/hw/qib/qib_cq.c
+++ /dev/null

@@ -1,545 +0,0 @@
-/*
- * Copyright (c) 2013 Intel Corporation.  All rights reserved.
- * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/kthread.h>
-
-#include "qib_verbs.h"
-#include "qib.h"
-
-/**
- * qib_cq_enter - add a new entry to the completion queue
- * @cq: completion queue
- * @entry: work completion entry to add
- * @sig: true if @entry is a solicitated entry
- *
- * This may be called with qp->s_lock held.
- */
-void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited)
-{
-	struct qib_cq_wc *wc;
-	unsigned long flags;
-	u32 head;
-	u32 next;
-
-	spin_lock_irqsave(&cq->lock, flags);
-
-	/*
-	 * Note that the head pointer might be writable by user processes.
-	 * Take care to verify it is a sane value.
-	 */
-	wc = cq->queue;
-	head = wc->head;
-	if (head >= (unsigned) cq->ibcq.cqe) {
-		head = cq->ibcq.cqe;
-		next = 0;
-	} else
-		next = head + 1;
-	if (unlikely(next == wc->tail)) {
-		spin_unlock_irqrestore(&cq->lock, flags);
-		if (cq->ibcq.event_handler) {
-			struct ib_event ev;
-
-			ev.device = cq->ibcq.device;
-			ev.element.cq = &cq->ibcq;
-			ev.event = IB_EVENT_CQ_ERR;
-			cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
-		}
-		return;
-	}
-	if (cq->ip) {
-		wc->uqueue[head].wr_id = entry->wr_id;
-		wc->uqueue[head].status = entry->status;
-		wc->uqueue[head].opcode = entry->opcode;
-		wc->uqueue[head].vendor_err = entry->vendor_err;
-		wc->uqueue[head].byte_len = entry->byte_len;
-		wc->uqueue[head].ex.imm_data =
-			(__u32 __force)entry->ex.imm_data;
-		wc->uqueue[head].qp_num = entry->qp->qp_num;
-		wc->uqueue[head].src_qp = entry->src_qp;
-		wc->uqueue[head].wc_flags = entry->wc_flags;
-		wc->uqueue[head].pkey_index = entry->pkey_index;
-		wc->uqueue[head].slid = entry->slid;
-		wc->uqueue[head].sl = entry->sl;
-		wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
-		wc->uqueue[head].port_num = entry->port_num;
-		/* Make sure entry is written before the head index. */
-		smp_wmb();
-	} else
-		wc->kqueue[head] = *entry;
-	wc->head = next;
-
-	if (cq->notify == IB_CQ_NEXT_COMP ||
-	    (cq->notify == IB_CQ_SOLICITED &&
-	     (solicited || entry->status != IB_WC_SUCCESS))) {
-		struct kthread_worker *worker;
-		/*
-		 * This will cause send_complete() to be called in
-		 * another thread.
-		 */
-		smp_rmb();
-		worker = cq->dd->worker;
-		if (likely(worker)) {
-			cq->notify = IB_CQ_NONE;
-			cq->triggered++;
-			queue_kthread_work(worker, &cq->comptask);
-		}
-	}
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-}
-
-/**
- * qib_poll_cq - poll for work completion entries
- * @ibcq: the completion queue to poll
- * @num_entries: the maximum number of entries to return
- * @entry: pointer to array where work completions are placed
- *
- * Returns the number of completion entries polled.
- *
- * This may be called from interrupt context.  Also called by ib_poll_cq()
- * in the generic verbs code.
- */
-int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
-	struct qib_cq *cq = to_icq(ibcq);
-	struct qib_cq_wc *wc;
-	unsigned long flags;
-	int npolled;
-	u32 tail;
-
-	/* The kernel can only poll a kernel completion queue */
-	if (cq->ip) {
-		npolled = -EINVAL;
-		goto bail;
-	}
-
-	spin_lock_irqsave(&cq->lock, flags);
-
-	wc = cq->queue;
-	tail = wc->tail;
-	if (tail > (u32) cq->ibcq.cqe)
-		tail = (u32) cq->ibcq.cqe;
-	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
-		if (tail == wc->head)
-			break;
-		/* The kernel doesn't need a RMB since it has the lock. */
-		*entry = wc->kqueue[tail];
-		if (tail >= cq->ibcq.cqe)
-			tail = 0;
-		else
-			tail++;
-	}
-	wc->tail = tail;
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-
-bail:
-	return npolled;
-}
-
-static void send_complete(struct kthread_work *work)
-{
-	struct qib_cq *cq = container_of(work, struct qib_cq, comptask);
-
-	/*
-	 * The completion handler will most likely rearm the notification
-	 * and poll for all pending entries.  If a new completion entry
-	 * is added while we are in this routine, queue_work()
-	 * won't call us again until we return so we check triggered to
-	 * see if we need to call the handler again.
-	 */
-	for (;;) {
-		u8 triggered = cq->triggered;
-
-		/*
-		 * IPoIB connected mode assumes the callback is from a
-		 * soft IRQ. We simulate this by blocking "bottom halves".
-		 * See the implementation for ipoib_cm_handle_tx_wc(),
-		 * netif_tx_lock_bh() and netif_tx_lock().
-		 */
-		local_bh_disable();
-		cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
-		local_bh_enable();
-
-		if (cq->triggered == triggered)
-			return;
-	}
-}
-
-/**
- * qib_create_cq - create a completion queue
- * @ibdev: the device this completion queue is attached to
- * @attr: creation attributes
- * @context: unused by the QLogic_IB driver
- * @udata: user data for libibverbs.so
- *
- * Returns a pointer to the completion queue or negative errno values
- * for failure.
- *
- * Called by ib_create_cq() in the generic verbs code.
- */
-struct ib_cq *qib_create_cq(struct ib_device *ibdev,
-			    const struct ib_cq_init_attr *attr,
-			    struct ib_ucontext *context,
-			    struct ib_udata *udata)
-{
-	int entries = attr->cqe;
-	struct qib_ibdev *dev = to_idev(ibdev);
-	struct qib_cq *cq;
-	struct qib_cq_wc *wc;
-	struct ib_cq *ret;
-	u32 sz;
-
-	if (attr->flags)
-		return ERR_PTR(-EINVAL);
-
-	if (entries < 1 || entries > ib_qib_max_cqes) {
-		ret = ERR_PTR(-EINVAL);
-		goto done;
-	}
-
-	/* Allocate the completion queue structure. */
-	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
-	if (!cq) {
-		ret = ERR_PTR(-ENOMEM);
-		goto done;
-	}
-
-	/*
-	 * Allocate the completion queue entries and head/tail pointers.
-	 * This is allocated separately so that it can be resized and
-	 * also mapped into user space.
-	 * We need to use vmalloc() in order to support mmap and large
-	 * numbers of entries.
-	 */
-	sz = sizeof(*wc);
-	if (udata && udata->outlen >= sizeof(__u64))
-		sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
-	else
-		sz += sizeof(struct ib_wc) * (entries + 1);
-	wc = vmalloc_user(sz);
-	if (!wc) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_cq;
-	}
-
-	/*
-	 * Return the address of the WC as the offset to mmap.
-	 * See qib_mmap() for details.
-	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		int err;
-
-		cq->ip = qib_create_mmap_info(dev, sz, context, wc);
-		if (!cq->ip) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_wc;
-		}
-
-		err = ib_copy_to_udata(udata, &cq->ip->offset,
-				       sizeof(cq->ip->offset));
-		if (err) {
-			ret = ERR_PTR(err);
-			goto bail_ip;
-		}
-	} else
-		cq->ip = NULL;
-
-	spin_lock(&dev->n_cqs_lock);
-	if (dev->n_cqs_allocated == ib_qib_max_cqs) {
-		spin_unlock(&dev->n_cqs_lock);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_ip;
-	}
-
-	dev->n_cqs_allocated++;
-	spin_unlock(&dev->n_cqs_lock);
-
-	if (cq->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	/*
-	 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
-	 * The number of entries should be >= the number requested or return
-	 * an error.
-	 */
-	cq->dd = dd_from_dev(dev);
-	cq->ibcq.cqe = entries;
-	cq->notify = IB_CQ_NONE;
-	cq->triggered = 0;
-	spin_lock_init(&cq->lock);
-	init_kthread_work(&cq->comptask, send_complete);
-	wc->head = 0;
-	wc->tail = 0;
-	cq->queue = wc;
-
-	ret = &cq->ibcq;
-
-	goto done;
-
-bail_ip:
-	kfree(cq->ip);
-bail_wc:
-	vfree(wc);
-bail_cq:
-	kfree(cq);
-done:
-	return ret;
-}
-
-/**
- * qib_destroy_cq - destroy a completion queue
- * @ibcq: the completion queue to destroy.
- *
- * Returns 0 for success.
- *
- * Called by ib_destroy_cq() in the generic verbs code.
- */
-int qib_destroy_cq(struct ib_cq *ibcq)
-{
-	struct qib_ibdev *dev = to_idev(ibcq->device);
-	struct qib_cq *cq = to_icq(ibcq);
-
-	flush_kthread_work(&cq->comptask);
-	spin_lock(&dev->n_cqs_lock);
-	dev->n_cqs_allocated--;
-	spin_unlock(&dev->n_cqs_lock);
-	if (cq->ip)
-		kref_put(&cq->ip->ref, qib_release_mmap_info);
-	else
-		vfree(cq->queue);
-	kfree(cq);
-
-	return 0;
-}
-
-/**
- * qib_req_notify_cq - change the notification type for a completion queue
- * @ibcq: the completion queue
- * @notify_flags: the type of notification to request
- *
- * Returns 0 for success.
- *
- * This may be called from interrupt context.  Also called by
- * ib_req_notify_cq() in the generic verbs code.
- */
-int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
-{
-	struct qib_cq *cq = to_icq(ibcq);
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&cq->lock, flags);
-	/*
-	 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
-	 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
-	 */
-	if (cq->notify != IB_CQ_NEXT_COMP)
-		cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
-
-	if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
-	    cq->queue->head != cq->queue->tail)
-		ret = 1;
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-
-	return ret;
-}
-
-/**
- * qib_resize_cq - change the size of the CQ
- * @ibcq: the completion queue
- *
- * Returns 0 for success.
- */
-int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
-{
-	struct qib_cq *cq = to_icq(ibcq);
-	struct qib_cq_wc *old_wc;
-	struct qib_cq_wc *wc;
-	u32 head, tail, n;
-	int ret;
-	u32 sz;
-
-	if (cqe < 1 || cqe > ib_qib_max_cqes) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/*
-	 * Need to use vmalloc() if we want to support large #s of entries.
-	 */
-	sz = sizeof(*wc);
-	if (udata && udata->outlen >= sizeof(__u64))
-		sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
-	else
-		sz += sizeof(struct ib_wc) * (cqe + 1);
-	wc = vmalloc_user(sz);
-	if (!wc) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	/* Check that we can write the offset to mmap. */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		__u64 offset = 0;
-
-		ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
-		if (ret)
-			goto bail_free;
-	}
-
-	spin_lock_irq(&cq->lock);
-	/*
-	 * Make sure head and tail are sane since they
-	 * might be user writable.
-	 */
-	old_wc = cq->queue;
-	head = old_wc->head;
-	if (head > (u32) cq->ibcq.cqe)
-		head = (u32) cq->ibcq.cqe;
-	tail = old_wc->tail;
-	if (tail > (u32) cq->ibcq.cqe)
-		tail = (u32) cq->ibcq.cqe;
-	if (head < tail)
-		n = cq->ibcq.cqe + 1 + head - tail;
-	else
-		n = head - tail;
-	if (unlikely((u32)cqe < n)) {
-		ret = -EINVAL;
-		goto bail_unlock;
-	}
-	for (n = 0; tail != head; n++) {
-		if (cq->ip)
-			wc->uqueue[n] = old_wc->uqueue[tail];
-		else
-			wc->kqueue[n] = old_wc->kqueue[tail];
-		if (tail == (u32) cq->ibcq.cqe)
-			tail = 0;
-		else
-			tail++;
-	}
-	cq->ibcq.cqe = cqe;
-	wc->head = n;
-	wc->tail = 0;
-	cq->queue = wc;
-	spin_unlock_irq(&cq->lock);
-
-	vfree(old_wc);
-
-	if (cq->ip) {
-		struct qib_ibdev *dev = to_idev(ibcq->device);
-		struct qib_mmap_info *ip = cq->ip;
-
-		qib_update_mmap_info(dev, ip, sz, wc);
-
-		/*
-		 * Return the offset to mmap.
-		 * See qib_mmap() for details.
-		 */
-		if (udata && udata->outlen >= sizeof(__u64)) {
-			ret = ib_copy_to_udata(udata, &ip->offset,
-					       sizeof(ip->offset));
-			if (ret)
-				goto bail;
-		}
-
-		spin_lock_irq(&dev->pending_lock);
-		if (list_empty(&ip->pending_mmaps))
-			list_add(&ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	ret = 0;
-	goto bail;
-
-bail_unlock:
-	spin_unlock_irq(&cq->lock);
-bail_free:
-	vfree(wc);
-bail:
-	return ret;
-}
-
-int qib_cq_init(struct qib_devdata *dd)
-{
-	int ret = 0;
-	int cpu;
-	struct task_struct *task;
-
-	if (dd->worker)
-		return 0;
-	dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL);
-	if (!dd->worker)
-		return -ENOMEM;
-	init_kthread_worker(dd->worker);
-	task = kthread_create_on_node(
-		kthread_worker_fn,
-		dd->worker,
-		dd->assigned_node_id,
-		"qib_cq%d", dd->unit);
-	if (IS_ERR(task))
-		goto task_fail;
-	cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id));
-	kthread_bind(task, cpu);
-	wake_up_process(task);
-out:
-	return ret;
-task_fail:
-	ret = PTR_ERR(task);
-	kfree(dd->worker);
-	dd->worker = NULL;
-	goto out;
-}
-
-void qib_cq_exit(struct qib_devdata *dd)
-{
-	struct kthread_worker *worker;
-
-	worker = dd->worker;
-	if (!worker)
-		return;
-	/* blocks future queuing from send_complete() */
-	dd->worker = NULL;
-	smp_wmb();
-	flush_kthread_worker(worker);
-	kthread_stop(worker->task);
-	kfree(worker);
-}

diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index f58fdc3..67ee643 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c

@@ -90,6 +90,22 @@
 	return iname;
 }
 
+const char *qib_get_card_name(struct rvt_dev_info *rdi)
+{
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(ibdev,
+					      struct qib_devdata, verbs_dev);
+	return qib_get_unit_name(dd->unit);
+}
+
+struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi)
+{
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(ibdev,
+					      struct qib_devdata, verbs_dev);
+	return dd->pcidev;
+}
+
 /*
  * Return count of units with at least one port ACTIVE.
  */
@@ -306,7 +322,9 @@
 		struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr;
 		struct qib_other_headers *ohdr = NULL;
 		struct qib_ibport *ibp = &ppd->ibport_data;
-		struct qib_qp *qp = NULL;
+		struct qib_devdata *dd = ppd->dd;
+		struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+		struct rvt_qp *qp = NULL;
 		u32 tlen = qib_hdrget_length_in_bytes(rhf_addr);
 		u16 lid  = be16_to_cpu(hdr->lrh[1]);
 		int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
@@ -319,7 +337,7 @@
 		if (tlen < 24)
 			goto drop;
 
-		if (lid < QIB_MULTICAST_LID_BASE) {
+		if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
 			lid &= ~((1 << ppd->lmc) - 1);
 			if (unlikely(lid != ppd->lid))
 				goto drop;
@@ -346,13 +364,16 @@
 		psn = be32_to_cpu(ohdr->bth[2]);
 
 		/* Get the destination QP number. */
-		qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
+		qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
 		if (qp_num != QIB_MULTICAST_QPN) {
 			int ruc_res;
 
-			qp = qib_lookup_qpn(ibp, qp_num);
-			if (!qp)
+			rcu_read_lock();
+			qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
+			if (!qp) {
+				rcu_read_unlock();
 				goto drop;
+			}
 
 			/*
 			 * Handle only RC QPs - for other QP types drop error
@@ -361,9 +382,9 @@
 			spin_lock(&qp->r_lock);
 
 			/* Check for valid receive state. */
-			if (!(ib_qib_state_ops[qp->state] &
-			      QIB_PROCESS_RECV_OK)) {
-				ibp->n_pkt_drops++;
+			if (!(ib_rvt_state_ops[qp->state] &
+			      RVT_PROCESS_RECV_OK)) {
+				ibp->rvp.n_pkt_drops++;
 				goto unlock;
 			}
 
@@ -383,7 +404,7 @@
 				    IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
 					diff = qib_cmp24(psn, qp->r_psn);
 					if (!qp->r_nak_state && diff >= 0) {
-						ibp->n_rc_seqnak++;
+						ibp->rvp.n_rc_seqnak++;
 						qp->r_nak_state =
 							IB_NAK_PSN_ERROR;
 						/* Use the expected PSN. */
@@ -398,7 +419,7 @@
 						 */
 						if (list_empty(&qp->rspwait)) {
 							qp->r_flags |=
-								QIB_R_RSP_NAK;
+								RVT_R_RSP_NAK;
 							atomic_inc(
 								&qp->refcount);
 							list_add_tail(
@@ -419,12 +440,7 @@
 
 unlock:
 			spin_unlock(&qp->r_lock);
-			/*
-			 * Notify qib_destroy_qp() if it is waiting
-			 * for us to finish.
-			 */
-			if (atomic_dec_and_test(&qp->refcount))
-				wake_up(&qp->wait);
+			rcu_read_unlock();
 		} /* Unicast QP */
 	} /* Valid packet with TIDErr */
 
@@ -456,7 +472,7 @@
 	u32 eflags, etype, tlen, i = 0, updegr = 0, crcs = 0;
 	int last;
 	u64 lval;
-	struct qib_qp *qp, *nqp;
+	struct rvt_qp *qp, *nqp;
 
 	l = rcd->head;
 	rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
@@ -549,15 +565,6 @@
 			updegr = 0;
 		}
 	}
-	/*
-	 * Notify qib_destroy_qp() if it is waiting
-	 * for lookaside_qp to finish.
-	 */
-	if (rcd->lookaside_qp) {
-		if (atomic_dec_and_test(&rcd->lookaside_qp->refcount))
-			wake_up(&rcd->lookaside_qp->wait);
-		rcd->lookaside_qp = NULL;
-	}
 
 	rcd->head = l;
 
@@ -567,17 +574,17 @@
 	 */
 	list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
 		list_del_init(&qp->rspwait);
-		if (qp->r_flags & QIB_R_RSP_NAK) {
-			qp->r_flags &= ~QIB_R_RSP_NAK;
+		if (qp->r_flags & RVT_R_RSP_NAK) {
+			qp->r_flags &= ~RVT_R_RSP_NAK;
 			qib_send_rc_ack(qp);
 		}
-		if (qp->r_flags & QIB_R_RSP_SEND) {
+		if (qp->r_flags & RVT_R_RSP_SEND) {
 			unsigned long flags;
 
-			qp->r_flags &= ~QIB_R_RSP_SEND;
+			qp->r_flags &= ~RVT_R_RSP_SEND;
 			spin_lock_irqsave(&qp->s_lock, flags);
-			if (ib_qib_state_ops[qp->state] &
-					QIB_PROCESS_OR_FLUSH_SEND)
+			if (ib_rvt_state_ops[qp->state] &
+					RVT_PROCESS_OR_FLUSH_SEND)
 				qib_schedule_send(qp);
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 		}

diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 4b927809..a3733f2 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c

@@ -2956,13 +2956,13 @@
 	struct qib_ibport *ibp = &ppd->ibport_data;
 	unsigned long flags;
 
-	spin_lock_irqsave(&ibp->lock, flags);
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
 	if (cs->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED) {
 		cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
 		qib_snapshot_counters(ppd, &cs->sword, &cs->rword,
 				      &cs->spkts, &cs->rpkts, &cs->xmit_wait);
 		mod_timer(&cs->pma_timer,
-			  jiffies + usecs_to_jiffies(ibp->pma_sample_interval));
+		      jiffies + usecs_to_jiffies(ibp->rvp.pma_sample_interval));
 	} else if (cs->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
 		u64 ta, tb, tc, td, te;
 
@@ -2975,11 +2975,11 @@
 		cs->rpkts = td - cs->rpkts;
 		cs->xmit_wait = te - cs->xmit_wait;
 	}
-	spin_unlock_irqrestore(&ibp->lock, flags);
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 }
 
 /*
- * Note that the caller has the ibp->lock held.
+ * Note that the caller has the ibp->rvp.lock held.
  */
 static void qib_set_cntr_6120_sample(struct qib_pportdata *ppd, u32 intv,
 				     u32 start)

diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 6c8ff10..82d7c4b 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c

@@ -2910,8 +2910,6 @@
 			spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags);
 			qib_qsfp_deinit(&dd->pport[i].cpspec->qsfp_data);
 		}
-		if (dd->pport[i].ibport_data.smi_ah)
-			ib_destroy_ah(&dd->pport[i].ibport_data.smi_ah->ibah);
 	}
 }
 
@@ -5497,7 +5495,7 @@
 	unsigned delay;
 	int ret;
 
-	agent = ibp->send_agent;
+	agent = ibp->rvp.send_agent;
 	if (!agent)
 		goto retry;
 
@@ -5515,7 +5513,7 @@
 			ret = PTR_ERR(ah);
 		else {
 			send_buf->ah = ah;
-			ibp->smi_ah = to_iah(ah);
+			ibp->smi_ah = ibah_to_rvtah(ah);
 			ret = 0;
 		}
 	} else {

diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 4ff340fe..3f062f0 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c

@@ -42,6 +42,7 @@
 #ifdef CONFIG_INFINIBAND_QIB_DCA
 #include <linux/dca.h>
 #endif
+#include <rdma/rdma_vt.h>
 
 #include "qib.h"
 #include "qib_common.h"
@@ -244,6 +245,13 @@
 		alloc_percpu(struct qib_pma_counters);
 	if (!ppd->ibport_data.pmastats)
 		return -ENOMEM;
+	ppd->ibport_data.rvp.rc_acks = alloc_percpu(u64);
+	ppd->ibport_data.rvp.rc_qacks = alloc_percpu(u64);
+	ppd->ibport_data.rvp.rc_delayed_comp = alloc_percpu(u64);
+	if (!(ppd->ibport_data.rvp.rc_acks) ||
+	    !(ppd->ibport_data.rvp.rc_qacks) ||
+	    !(ppd->ibport_data.rvp.rc_delayed_comp))
+		return -ENOMEM;
 
 	if (qib_cc_table_size < IB_CCT_MIN_ENTRIES)
 		goto bail;
@@ -449,8 +457,6 @@
 	init_timer(&dd->intrchk_timer);
 	dd->intrchk_timer.function = verify_interrupt;
 	dd->intrchk_timer.data = (unsigned long) dd;
-
-	ret = qib_cq_init(dd);
 done:
 	return ret;
 }
@@ -631,6 +637,9 @@
 static void qib_free_pportdata(struct qib_pportdata *ppd)
 {
 	free_percpu(ppd->ibport_data.pmastats);
+	free_percpu(ppd->ibport_data.rvp.rc_acks);
+	free_percpu(ppd->ibport_data.rvp.rc_qacks);
+	free_percpu(ppd->ibport_data.rvp.rc_delayed_comp);
 	ppd->ibport_data.pmastats = NULL;
 }
 
@@ -1081,7 +1090,7 @@
 	qib_dbg_ibdev_exit(&dd->verbs_dev);
 #endif
 	free_percpu(dd->int_counter);
-	ib_dealloc_device(&dd->verbs_dev.ibdev);
+	ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
 }
 
 u64 qib_int_counter(struct qib_devdata *dd)
@@ -1120,9 +1129,12 @@
 {
 	unsigned long flags;
 	struct qib_devdata *dd;
-	int ret;
+	int ret, nports;
 
-	dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra);
+	/* extra is * number of ports */
+	nports = extra / sizeof(struct qib_pportdata);
+	dd = (struct qib_devdata *)rvt_alloc_device(sizeof(*dd) + extra,
+						    nports);
 	if (!dd)
 		return ERR_PTR(-ENOMEM);
 
@@ -1171,7 +1183,7 @@
 bail:
 	if (!list_empty(&dd->list))
 		list_del_init(&dd->list);
-	ib_dealloc_device(&dd->verbs_dev.ibdev);
+	ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
 	return ERR_PTR(ret);
 }
 
@@ -1421,7 +1433,6 @@
 	}
 	kfree(tmp);
 	kfree(dd->boardname);
-	qib_cq_exit(dd);
 }
 
 /*

diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c
index 086616d..a014fd4 100644
--- a/drivers/infiniband/hw/qib/qib_intr.c
+++ b/drivers/infiniband/hw/qib/qib_intr.c

@@ -74,7 +74,7 @@
 	struct ib_event event;
 	struct qib_devdata *dd = ppd->dd;
 
-	event.device = &dd->verbs_dev.ibdev;
+	event.device = &dd->verbs_dev.rdi.ibdev;
 	event.element.port_num = ppd->port;
 	event.event = ev;
 	ib_dispatch_event(&event);

diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
index d725c56..2c3c935 100644
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ b/drivers/infiniband/hw/qib/qib_keys.c

@@ -46,20 +46,20 @@
  *
  */
 
-int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
+int qib_alloc_lkey(struct rvt_mregion *mr, int dma_region)
 {
 	unsigned long flags;
 	u32 r;
 	u32 n;
 	int ret = 0;
 	struct qib_ibdev *dev = to_idev(mr->pd->device);
-	struct qib_lkey_table *rkt = &dev->lk_table;
+	struct rvt_lkey_table *rkt = &dev->lk_table;
 
 	spin_lock_irqsave(&rkt->lock, flags);
 
 	/* special case for dma_mr lkey == 0 */
 	if (dma_region) {
-		struct qib_mregion *tmr;
+		struct rvt_mregion *tmr;
 
 		tmr = rcu_access_pointer(dev->dma_mr);
 		if (!tmr) {
@@ -90,8 +90,8 @@
 	 * bits are capped in qib_verbs.c to insure enough bits
 	 * for generation number
 	 */
-	mr->lkey = (r << (32 - ib_qib_lkey_table_size)) |
-		((((1 << (24 - ib_qib_lkey_table_size)) - 1) & rkt->gen)
+	mr->lkey = (r << (32 - ib_rvt_lkey_table_size)) |
+		((((1 << (24 - ib_rvt_lkey_table_size)) - 1) & rkt->gen)
 		 << 8);
 	if (mr->lkey == 0) {
 		mr->lkey |= 1 << 8;
@@ -114,13 +114,13 @@
  * qib_free_lkey - free an lkey
  * @mr: mr to free from tables
  */
-void qib_free_lkey(struct qib_mregion *mr)
+void qib_free_lkey(struct rvt_mregion *mr)
 {
 	unsigned long flags;
 	u32 lkey = mr->lkey;
 	u32 r;
 	struct qib_ibdev *dev = to_idev(mr->pd->device);
-	struct qib_lkey_table *rkt = &dev->lk_table;
+	struct rvt_lkey_table *rkt = &dev->lk_table;
 
 	spin_lock_irqsave(&rkt->lock, flags);
 	if (!mr->lkey_published)
@@ -128,7 +128,7 @@
 	if (lkey == 0)
 		RCU_INIT_POINTER(dev->dma_mr, NULL);
 	else {
-		r = lkey >> (32 - ib_qib_lkey_table_size);
+		r = lkey >> (32 - ib_rvt_lkey_table_size);
 		RCU_INIT_POINTER(rkt->table[r], NULL);
 	}
 	qib_put_mr(mr);
@@ -138,105 +138,6 @@
 }
 
 /**
- * qib_lkey_ok - check IB SGE for validity and initialize
- * @rkt: table containing lkey to check SGE against
- * @pd: protection domain
- * @isge: outgoing internal SGE
- * @sge: SGE to check
- * @acc: access flags
- *
- * Return 1 if valid and successful, otherwise returns 0.
- *
- * increments the reference count upon success
- *
- * Check the IB SGE for validity and initialize our internal version
- * of it.
- */
-int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
-		struct qib_sge *isge, struct ib_sge *sge, int acc)
-{
-	struct qib_mregion *mr;
-	unsigned n, m;
-	size_t off;
-
-	/*
-	 * We use LKEY == zero for kernel virtual addresses
-	 * (see qib_get_dma_mr and qib_dma.c).
-	 */
-	rcu_read_lock();
-	if (sge->lkey == 0) {
-		struct qib_ibdev *dev = to_idev(pd->ibpd.device);
-
-		if (pd->user)
-			goto bail;
-		mr = rcu_dereference(dev->dma_mr);
-		if (!mr)
-			goto bail;
-		if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
-			goto bail;
-		rcu_read_unlock();
-
-		isge->mr = mr;
-		isge->vaddr = (void *) sge->addr;
-		isge->length = sge->length;
-		isge->sge_length = sge->length;
-		isge->m = 0;
-		isge->n = 0;
-		goto ok;
-	}
-	mr = rcu_dereference(
-		rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]);
-	if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
-		goto bail;
-
-	off = sge->addr - mr->user_base;
-	if (unlikely(sge->addr < mr->user_base ||
-		     off + sge->length > mr->length ||
-		     (mr->access_flags & acc) != acc))
-		goto bail;
-	if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
-		goto bail;
-	rcu_read_unlock();
-
-	off += mr->offset;
-	if (mr->page_shift) {
-		/*
-		page sizes are uniform power of 2 so no loop is necessary
-		entries_spanned_by_off is the number of times the loop below
-		would have executed.
-		*/
-		size_t entries_spanned_by_off;
-
-		entries_spanned_by_off = off >> mr->page_shift;
-		off -= (entries_spanned_by_off << mr->page_shift);
-		m = entries_spanned_by_off/QIB_SEGSZ;
-		n = entries_spanned_by_off%QIB_SEGSZ;
-	} else {
-		m = 0;
-		n = 0;
-		while (off >= mr->map[m]->segs[n].length) {
-			off -= mr->map[m]->segs[n].length;
-			n++;
-			if (n >= QIB_SEGSZ) {
-				m++;
-				n = 0;
-			}
-		}
-	}
-	isge->mr = mr;
-	isge->vaddr = mr->map[m]->segs[n].vaddr + off;
-	isge->length = mr->map[m]->segs[n].length - off;
-	isge->sge_length = sge->length;
-	isge->m = m;
-	isge->n = n;
-ok:
-	return 1;
-bail:
-	rcu_read_unlock();
-	return 0;
-}
-
-/**
  * qib_rkey_ok - check the IB virtual address, length, and RKEY
  * @qp: qp for validation
  * @sge: SGE state
@@ -249,11 +150,11 @@
  *
  * increments the reference count upon success
  */
-int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
+int qib_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
 		u32 len, u64 vaddr, u32 rkey, int acc)
 {
-	struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
-	struct qib_mregion *mr;
+	struct rvt_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
+	struct rvt_mregion *mr;
 	unsigned n, m;
 	size_t off;
 
@@ -263,7 +164,7 @@
 	 */
 	rcu_read_lock();
 	if (rkey == 0) {
-		struct qib_pd *pd = to_ipd(qp->ibqp.pd);
+		struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd);
 		struct qib_ibdev *dev = to_idev(pd->ibpd.device);
 
 		if (pd->user)
@@ -285,7 +186,7 @@
 	}
 
 	mr = rcu_dereference(
-		rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]);
+		rkt->table[(rkey >> (32 - ib_rvt_lkey_table_size))]);
 	if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
 		goto bail;
 
@@ -308,15 +209,15 @@
 
 		entries_spanned_by_off = off >> mr->page_shift;
 		off -= (entries_spanned_by_off << mr->page_shift);
-		m = entries_spanned_by_off/QIB_SEGSZ;
-		n = entries_spanned_by_off%QIB_SEGSZ;
+		m = entries_spanned_by_off / RVT_SEGSZ;
+		n = entries_spanned_by_off % RVT_SEGSZ;
 	} else {
 		m = 0;
 		n = 0;
 		while (off >= mr->map[m]->segs[n].length) {
 			off -= mr->map[m]->segs[n].length;
 			n++;
-			if (n >= QIB_SEGSZ) {
+			if (n >= RVT_SEGSZ) {
 				m++;
 				n = 0;
 			}
@@ -335,58 +236,3 @@
 	return 0;
 }
 
-/*
- * Initialize the memory region specified by the work request.
- */
-int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr)
-{
-	struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
-	struct qib_pd *pd = to_ipd(qp->ibqp.pd);
-	struct qib_mr *mr = to_imr(wr->mr);
-	struct qib_mregion *mrg;
-	u32 key = wr->key;
-	unsigned i, n, m;
-	int ret = -EINVAL;
-	unsigned long flags;
-	u64 *page_list;
-	size_t ps;
-
-	spin_lock_irqsave(&rkt->lock, flags);
-	if (pd->user || key == 0)
-		goto bail;
-
-	mrg = rcu_dereference_protected(
-		rkt->table[(key >> (32 - ib_qib_lkey_table_size))],
-		lockdep_is_held(&rkt->lock));
-	if (unlikely(mrg == NULL || qp->ibqp.pd != mrg->pd))
-		goto bail;
-
-	if (mr->npages > mrg->max_segs)
-		goto bail;
-
-	ps = mr->ibmr.page_size;
-	if (mr->ibmr.length > ps * mr->npages)
-		goto bail;
-
-	mrg->user_base = mr->ibmr.iova;
-	mrg->iova = mr->ibmr.iova;
-	mrg->lkey = key;
-	mrg->length = mr->ibmr.length;
-	mrg->access_flags = wr->access;
-	page_list = mr->pages;
-	m = 0;
-	n = 0;
-	for (i = 0; i < mr->npages; i++) {
-		mrg->map[m]->segs[n].vaddr = (void *) page_list[i];
-		mrg->map[m]->segs[n].length = ps;
-		if (++n == QIB_SEGSZ) {
-			m++;
-			n = 0;
-		}
-	}
-
-	ret = 0;
-bail:
-	spin_unlock_irqrestore(&rkt->lock, flags);
-	return ret;
-}

diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 9625e7c..0bd1837 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c

@@ -70,7 +70,7 @@
 	unsigned long flags;
 	unsigned long timeout;
 
-	agent = ibp->send_agent;
+	agent = ibp->rvp.send_agent;
 	if (!agent)
 		return;
 
@@ -79,7 +79,8 @@
 		return;
 
 	/* o14-2 */
-	if (ibp->trap_timeout && time_before(jiffies, ibp->trap_timeout))
+	if (ibp->rvp.trap_timeout &&
+	    time_before(jiffies, ibp->rvp.trap_timeout))
 		return;
 
 	send_buf = ib_create_send_mad(agent, 0, 0, 0, IB_MGMT_MAD_HDR,
@@ -93,42 +94,42 @@
 	smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
 	smp->class_version = 1;
 	smp->method = IB_MGMT_METHOD_TRAP;
-	ibp->tid++;
-	smp->tid = cpu_to_be64(ibp->tid);
+	ibp->rvp.tid++;
+	smp->tid = cpu_to_be64(ibp->rvp.tid);
 	smp->attr_id = IB_SMP_ATTR_NOTICE;
 	/* o14-1: smp->mkey = 0; */
 	memcpy(smp->data, data, len);
 
-	spin_lock_irqsave(&ibp->lock, flags);
-	if (!ibp->sm_ah) {
-		if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
+	if (!ibp->rvp.sm_ah) {
+		if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
 			struct ib_ah *ah;
 
-			ah = qib_create_qp0_ah(ibp, ibp->sm_lid);
+			ah = qib_create_qp0_ah(ibp, ibp->rvp.sm_lid);
 			if (IS_ERR(ah))
 				ret = PTR_ERR(ah);
 			else {
 				send_buf->ah = ah;
-				ibp->sm_ah = to_iah(ah);
+				ibp->rvp.sm_ah = ibah_to_rvtah(ah);
 				ret = 0;
 			}
 		} else
 			ret = -EINVAL;
 	} else {
-		send_buf->ah = &ibp->sm_ah->ibah;
+		send_buf->ah = &ibp->rvp.sm_ah->ibah;
 		ret = 0;
 	}
-	spin_unlock_irqrestore(&ibp->lock, flags);
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 
 	if (!ret)
 		ret = ib_post_send_mad(send_buf, NULL);
 	if (!ret) {
 		/* 4.096 usec. */
-		timeout = (4096 * (1UL << ibp->subnet_timeout)) / 1000;
-		ibp->trap_timeout = jiffies + usecs_to_jiffies(timeout);
+		timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000;
+		ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout);
 	} else {
 		ib_free_send_mad(send_buf);
-		ibp->trap_timeout = 0;
+		ibp->rvp.trap_timeout = 0;
 	}
 }
 
@@ -141,10 +142,10 @@
 	struct ib_mad_notice_attr data;
 
 	if (trap_num == IB_NOTICE_TRAP_BAD_PKEY)
-		ibp->pkey_violations++;
+		ibp->rvp.pkey_violations++;
 	else
-		ibp->qkey_violations++;
-	ibp->n_pkt_drops++;
+		ibp->rvp.qkey_violations++;
+	ibp->rvp.n_pkt_drops++;
 
 	/* Send violation trap */
 	data.generic_type = IB_NOTICE_TYPE_SECURITY;
@@ -205,8 +206,11 @@
 /*
  * Send a Port Capability Mask Changed trap (ch. 14.3.11).
  */
-void qib_cap_mask_chg(struct qib_ibport *ibp)
+void qib_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
 {
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = dd_from_dev(ibdev);
+	struct qib_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
 	struct ib_mad_notice_attr data;
 
 	data.generic_type = IB_NOTICE_TYPE_INFO;
@@ -217,8 +221,8 @@
 	data.toggle_count = 0;
 	memset(&data.details, 0, sizeof(data.details));
 	data.details.ntc_144.lid = data.issuer_lid;
-	data.details.ntc_144.new_cap_mask = cpu_to_be32(ibp->port_cap_flags);
-
+	data.details.ntc_144.new_cap_mask =
+					cpu_to_be32(ibp->rvp.port_cap_flags);
 	qib_send_trap(ibp, &data, sizeof(data));
 }
 
@@ -409,37 +413,38 @@
 	int ret = 0;
 
 	/* Is the mkey in the process of expiring? */
-	if (ibp->mkey_lease_timeout &&
-	    time_after_eq(jiffies, ibp->mkey_lease_timeout)) {
+	if (ibp->rvp.mkey_lease_timeout &&
+	    time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
 		/* Clear timeout and mkey protection field. */
-		ibp->mkey_lease_timeout = 0;
-		ibp->mkeyprot = 0;
+		ibp->rvp.mkey_lease_timeout = 0;
+		ibp->rvp.mkeyprot = 0;
 	}
 
-	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->mkey == 0 ||
-	    ibp->mkey == smp->mkey)
+	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->rvp.mkey == 0 ||
+	    ibp->rvp.mkey == smp->mkey)
 		valid_mkey = 1;
 
 	/* Unset lease timeout on any valid Get/Set/TrapRepress */
-	if (valid_mkey && ibp->mkey_lease_timeout &&
+	if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
 	    (smp->method == IB_MGMT_METHOD_GET ||
 	     smp->method == IB_MGMT_METHOD_SET ||
 	     smp->method == IB_MGMT_METHOD_TRAP_REPRESS))
-		ibp->mkey_lease_timeout = 0;
+		ibp->rvp.mkey_lease_timeout = 0;
 
 	if (!valid_mkey) {
 		switch (smp->method) {
 		case IB_MGMT_METHOD_GET:
 			/* Bad mkey not a violation below level 2 */
-			if (ibp->mkeyprot < 2)
+			if (ibp->rvp.mkeyprot < 2)
 				break;
 		case IB_MGMT_METHOD_SET:
 		case IB_MGMT_METHOD_TRAP_REPRESS:
-			if (ibp->mkey_violations != 0xFFFF)
-				++ibp->mkey_violations;
-			if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period)
-				ibp->mkey_lease_timeout = jiffies +
-					ibp->mkey_lease_period * HZ;
+			if (ibp->rvp.mkey_violations != 0xFFFF)
+				++ibp->rvp.mkey_violations;
+			if (!ibp->rvp.mkey_lease_timeout &&
+			    ibp->rvp.mkey_lease_period)
+				ibp->rvp.mkey_lease_timeout = jiffies +
+					ibp->rvp.mkey_lease_period * HZ;
 			/* Generate a trap notice. */
 			qib_bad_mkey(ibp, smp);
 			ret = 1;
@@ -489,15 +494,15 @@
 
 	/* Only return the mkey if the protection field allows it. */
 	if (!(smp->method == IB_MGMT_METHOD_GET &&
-	      ibp->mkey != smp->mkey &&
-	      ibp->mkeyprot == 1))
-		pip->mkey = ibp->mkey;
-	pip->gid_prefix = ibp->gid_prefix;
+	      ibp->rvp.mkey != smp->mkey &&
+	      ibp->rvp.mkeyprot == 1))
+		pip->mkey = ibp->rvp.mkey;
+	pip->gid_prefix = ibp->rvp.gid_prefix;
 	pip->lid = cpu_to_be16(ppd->lid);
-	pip->sm_lid = cpu_to_be16(ibp->sm_lid);
-	pip->cap_mask = cpu_to_be32(ibp->port_cap_flags);
+	pip->sm_lid = cpu_to_be16(ibp->rvp.sm_lid);
+	pip->cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
 	/* pip->diag_code; */
-	pip->mkey_lease_period = cpu_to_be16(ibp->mkey_lease_period);
+	pip->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
 	pip->local_port_num = port;
 	pip->link_width_enabled = ppd->link_width_enabled;
 	pip->link_width_supported = ppd->link_width_supported;
@@ -508,7 +513,7 @@
 	pip->portphysstate_linkdown =
 		(dd->f_ibphys_portstate(ppd->lastibcstat) << 4) |
 		(get_linkdowndefaultstate(ppd) ? 1 : 2);
-	pip->mkeyprot_resv_lmc = (ibp->mkeyprot << 6) | ppd->lmc;
+	pip->mkeyprot_resv_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
 	pip->linkspeedactive_enabled = (ppd->link_speed_active << 4) |
 		ppd->link_speed_enabled;
 	switch (ppd->ibmtu) {
@@ -529,9 +534,9 @@
 		mtu = IB_MTU_256;
 		break;
 	}
-	pip->neighbormtu_mastersmsl = (mtu << 4) | ibp->sm_sl;
+	pip->neighbormtu_mastersmsl = (mtu << 4) | ibp->rvp.sm_sl;
 	pip->vlcap_inittype = ppd->vls_supported << 4;  /* InitType = 0 */
-	pip->vl_high_limit = ibp->vl_high_limit;
+	pip->vl_high_limit = ibp->rvp.vl_high_limit;
 	pip->vl_arb_high_cap =
 		dd->f_get_ib_cfg(ppd, QIB_IB_CFG_VL_HIGH_CAP);
 	pip->vl_arb_low_cap =
@@ -542,20 +547,20 @@
 	/* pip->vlstallcnt_hoqlife; */
 	pip->operationalvl_pei_peo_fpi_fpo =
 		dd->f_get_ib_cfg(ppd, QIB_IB_CFG_OP_VLS) << 4;
-	pip->mkey_violations = cpu_to_be16(ibp->mkey_violations);
+	pip->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
 	/* P_KeyViolations are counted by hardware. */
-	pip->pkey_violations = cpu_to_be16(ibp->pkey_violations);
-	pip->qkey_violations = cpu_to_be16(ibp->qkey_violations);
+	pip->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
+	pip->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
 	/* Only the hardware GUID is supported for now */
 	pip->guid_cap = QIB_GUIDS_PER_PORT;
-	pip->clientrereg_resv_subnetto = ibp->subnet_timeout;
+	pip->clientrereg_resv_subnetto = ibp->rvp.subnet_timeout;
 	/* 32.768 usec. response time (guessing) */
 	pip->resv_resptimevalue = 3;
 	pip->localphyerrors_overrunerrors =
 		(get_phyerrthreshold(ppd) << 4) |
 		get_overrunthreshold(ppd);
 	/* pip->max_credit_hint; */
-	if (ibp->port_cap_flags & IB_PORT_LINK_LATENCY_SUP) {
+	if (ibp->rvp.port_cap_flags & IB_PORT_LINK_LATENCY_SUP) {
 		u32 v;
 
 		v = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_LINKLATENCY);
@@ -685,13 +690,13 @@
 	event.device = ibdev;
 	event.element.port_num = port;
 
-	ibp->mkey = pip->mkey;
-	ibp->gid_prefix = pip->gid_prefix;
-	ibp->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
+	ibp->rvp.mkey = pip->mkey;
+	ibp->rvp.gid_prefix = pip->gid_prefix;
+	ibp->rvp.mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
 
 	lid = be16_to_cpu(pip->lid);
 	/* Must be a valid unicast LID address. */
-	if (lid == 0 || lid >= QIB_MULTICAST_LID_BASE)
+	if (lid == 0 || lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
 		smp->status |= IB_SMP_INVALID_FIELD;
 	else if (ppd->lid != lid || ppd->lmc != (pip->mkeyprot_resv_lmc & 7)) {
 		if (ppd->lid != lid)
@@ -706,21 +711,21 @@
 	smlid = be16_to_cpu(pip->sm_lid);
 	msl = pip->neighbormtu_mastersmsl & 0xF;
 	/* Must be a valid unicast LID address. */
-	if (smlid == 0 || smlid >= QIB_MULTICAST_LID_BASE)
+	if (smlid == 0 || smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
 		smp->status |= IB_SMP_INVALID_FIELD;
-	else if (smlid != ibp->sm_lid || msl != ibp->sm_sl) {
-		spin_lock_irqsave(&ibp->lock, flags);
-		if (ibp->sm_ah) {
-			if (smlid != ibp->sm_lid)
-				ibp->sm_ah->attr.dlid = smlid;
-			if (msl != ibp->sm_sl)
-				ibp->sm_ah->attr.sl = msl;
+	else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
+		spin_lock_irqsave(&ibp->rvp.lock, flags);
+		if (ibp->rvp.sm_ah) {
+			if (smlid != ibp->rvp.sm_lid)
+				ibp->rvp.sm_ah->attr.dlid = smlid;
+			if (msl != ibp->rvp.sm_sl)
+				ibp->rvp.sm_ah->attr.sl = msl;
 		}
-		spin_unlock_irqrestore(&ibp->lock, flags);
-		if (smlid != ibp->sm_lid)
-			ibp->sm_lid = smlid;
-		if (msl != ibp->sm_sl)
-			ibp->sm_sl = msl;
+		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+		if (smlid != ibp->rvp.sm_lid)
+			ibp->rvp.sm_lid = smlid;
+		if (msl != ibp->rvp.sm_sl)
+			ibp->rvp.sm_sl = msl;
 		event.event = IB_EVENT_SM_CHANGE;
 		ib_dispatch_event(&event);
 	}
@@ -768,10 +773,10 @@
 		smp->status |= IB_SMP_INVALID_FIELD;
 	}
 
-	ibp->mkeyprot = pip->mkeyprot_resv_lmc >> 6;
-	ibp->vl_high_limit = pip->vl_high_limit;
+	ibp->rvp.mkeyprot = pip->mkeyprot_resv_lmc >> 6;
+	ibp->rvp.vl_high_limit = pip->vl_high_limit;
 	(void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_VL_HIGH_LIMIT,
-				    ibp->vl_high_limit);
+				    ibp->rvp.vl_high_limit);
 
 	mtu = ib_mtu_enum_to_int((pip->neighbormtu_mastersmsl >> 4) & 0xF);
 	if (mtu == -1)
@@ -789,13 +794,13 @@
 	}
 
 	if (pip->mkey_violations == 0)
-		ibp->mkey_violations = 0;
+		ibp->rvp.mkey_violations = 0;
 
 	if (pip->pkey_violations == 0)
-		ibp->pkey_violations = 0;
+		ibp->rvp.pkey_violations = 0;
 
 	if (pip->qkey_violations == 0)
-		ibp->qkey_violations = 0;
+		ibp->rvp.qkey_violations = 0;
 
 	ore = pip->localphyerrors_overrunerrors;
 	if (set_phyerrthreshold(ppd, (ore >> 4) & 0xF))
@@ -804,7 +809,7 @@
 	if (set_overrunthreshold(ppd, (ore & 0xF)))
 		smp->status |= IB_SMP_INVALID_FIELD;
 
-	ibp->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
+	ibp->rvp.subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
 
 	/*
 	 * Do the port state change now that the other link parameters
@@ -1028,7 +1033,7 @@
 		(void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PKEYS, 0);
 
 		event.event = IB_EVENT_PKEY_CHANGE;
-		event.device = &dd->verbs_dev.ibdev;
+		event.device = &dd->verbs_dev.rdi.ibdev;
 		event.element.port_num = port;
 		ib_dispatch_event(&event);
 	}
@@ -1062,7 +1067,7 @@
 
 	memset(smp->data, 0, sizeof(smp->data));
 
-	if (!(ibp->port_cap_flags & IB_PORT_SL_MAP_SUP))
+	if (!(ibp->rvp.port_cap_flags & IB_PORT_SL_MAP_SUP))
 		smp->status |= IB_SMP_UNSUP_METHOD;
 	else
 		for (i = 0; i < ARRAY_SIZE(ibp->sl_to_vl); i += 2)
@@ -1078,7 +1083,7 @@
 	u8 *p = (u8 *) smp->data;
 	unsigned i;
 
-	if (!(ibp->port_cap_flags & IB_PORT_SL_MAP_SUP)) {
+	if (!(ibp->rvp.port_cap_flags & IB_PORT_SL_MAP_SUP)) {
 		smp->status |= IB_SMP_UNSUP_METHOD;
 		return reply(smp);
 	}
@@ -1195,20 +1200,20 @@
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 		goto bail;
 	}
-	spin_lock_irqsave(&ibp->lock, flags);
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
 	p->tick = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_PMA_TICKS);
 	p->sample_status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT);
 	p->counter_width = 4;   /* 32 bit counters */
 	p->counter_mask0_9 = COUNTER_MASK0_9;
-	p->sample_start = cpu_to_be32(ibp->pma_sample_start);
-	p->sample_interval = cpu_to_be32(ibp->pma_sample_interval);
-	p->tag = cpu_to_be16(ibp->pma_tag);
-	p->counter_select[0] = ibp->pma_counter_select[0];
-	p->counter_select[1] = ibp->pma_counter_select[1];
-	p->counter_select[2] = ibp->pma_counter_select[2];
-	p->counter_select[3] = ibp->pma_counter_select[3];
-	p->counter_select[4] = ibp->pma_counter_select[4];
-	spin_unlock_irqrestore(&ibp->lock, flags);
+	p->sample_start = cpu_to_be32(ibp->rvp.pma_sample_start);
+	p->sample_interval = cpu_to_be32(ibp->rvp.pma_sample_interval);
+	p->tag = cpu_to_be16(ibp->rvp.pma_tag);
+	p->counter_select[0] = ibp->rvp.pma_counter_select[0];
+	p->counter_select[1] = ibp->rvp.pma_counter_select[1];
+	p->counter_select[2] = ibp->rvp.pma_counter_select[2];
+	p->counter_select[3] = ibp->rvp.pma_counter_select[3];
+	p->counter_select[4] = ibp->rvp.pma_counter_select[4];
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 
 bail:
 	return reply((struct ib_smp *) pmp);
@@ -1233,7 +1238,7 @@
 		goto bail;
 	}
 
-	spin_lock_irqsave(&ibp->lock, flags);
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
 
 	/* Port Sampling code owns the PS* HW counters */
 	xmit_flags = ppd->cong_stats.flags;
@@ -1242,18 +1247,18 @@
 	if (status == IB_PMA_SAMPLE_STATUS_DONE ||
 	    (status == IB_PMA_SAMPLE_STATUS_RUNNING &&
 	     xmit_flags == IB_PMA_CONG_HW_CONTROL_TIMER)) {
-		ibp->pma_sample_start = be32_to_cpu(p->sample_start);
-		ibp->pma_sample_interval = be32_to_cpu(p->sample_interval);
-		ibp->pma_tag = be16_to_cpu(p->tag);
-		ibp->pma_counter_select[0] = p->counter_select[0];
-		ibp->pma_counter_select[1] = p->counter_select[1];
-		ibp->pma_counter_select[2] = p->counter_select[2];
-		ibp->pma_counter_select[3] = p->counter_select[3];
-		ibp->pma_counter_select[4] = p->counter_select[4];
-		dd->f_set_cntr_sample(ppd, ibp->pma_sample_interval,
-				      ibp->pma_sample_start);
+		ibp->rvp.pma_sample_start = be32_to_cpu(p->sample_start);
+		ibp->rvp.pma_sample_interval = be32_to_cpu(p->sample_interval);
+		ibp->rvp.pma_tag = be16_to_cpu(p->tag);
+		ibp->rvp.pma_counter_select[0] = p->counter_select[0];
+		ibp->rvp.pma_counter_select[1] = p->counter_select[1];
+		ibp->rvp.pma_counter_select[2] = p->counter_select[2];
+		ibp->rvp.pma_counter_select[3] = p->counter_select[3];
+		ibp->rvp.pma_counter_select[4] = p->counter_select[4];
+		dd->f_set_cntr_sample(ppd, ibp->rvp.pma_sample_interval,
+				      ibp->rvp.pma_sample_start);
 	}
-	spin_unlock_irqrestore(&ibp->lock, flags);
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 
 	ret = pma_get_portsamplescontrol(pmp, ibdev, port);
 
@@ -1357,8 +1362,8 @@
 	int i;
 
 	memset(pmp->data, 0, sizeof(pmp->data));
-	spin_lock_irqsave(&ibp->lock, flags);
-	p->tag = cpu_to_be16(ibp->pma_tag);
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
+	p->tag = cpu_to_be16(ibp->rvp.pma_tag);
 	if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_TIMER)
 		p->sample_status = IB_PMA_SAMPLE_STATUS_DONE;
 	else {
@@ -1373,11 +1378,11 @@
 			ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER;
 		}
 	}
-	for (i = 0; i < ARRAY_SIZE(ibp->pma_counter_select); i++)
+	for (i = 0; i < ARRAY_SIZE(ibp->rvp.pma_counter_select); i++)
 		p->counter[i] = cpu_to_be32(
 			get_cache_hw_sample_counters(
-				ppd, ibp->pma_counter_select[i]));
-	spin_unlock_irqrestore(&ibp->lock, flags);
+				ppd, ibp->rvp.pma_counter_select[i]));
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 
 	return reply((struct ib_smp *) pmp);
 }
@@ -1397,8 +1402,8 @@
 
 	/* Port Sampling code owns the PS* HW counters */
 	memset(pmp->data, 0, sizeof(pmp->data));
-	spin_lock_irqsave(&ibp->lock, flags);
-	p->tag = cpu_to_be16(ibp->pma_tag);
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
+	p->tag = cpu_to_be16(ibp->rvp.pma_tag);
 	if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_TIMER)
 		p->sample_status = IB_PMA_SAMPLE_STATUS_DONE;
 	else {
@@ -1415,11 +1420,11 @@
 			ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER;
 		}
 	}
-	for (i = 0; i < ARRAY_SIZE(ibp->pma_counter_select); i++)
+	for (i = 0; i < ARRAY_SIZE(ibp->rvp.pma_counter_select); i++)
 		p->counter[i] = cpu_to_be64(
 			get_cache_hw_sample_counters(
-				ppd, ibp->pma_counter_select[i]));
-	spin_unlock_irqrestore(&ibp->lock, flags);
+				ppd, ibp->rvp.pma_counter_select[i]));
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 
 	return reply((struct ib_smp *) pmp);
 }
@@ -1453,7 +1458,7 @@
 	cntrs.excessive_buffer_overrun_errors -=
 		ibp->z_excessive_buffer_overrun_errors;
 	cntrs.vl15_dropped -= ibp->z_vl15_dropped;
-	cntrs.vl15_dropped += ibp->n_vl15_dropped;
+	cntrs.vl15_dropped += ibp->rvp.n_vl15_dropped;
 
 	memset(pmp->data, 0, sizeof(pmp->data));
 
@@ -1546,9 +1551,9 @@
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 
 	qib_get_counters(ppd, &cntrs);
-	spin_lock_irqsave(&ppd->ibport_data.lock, flags);
+	spin_lock_irqsave(&ppd->ibport_data.rvp.lock, flags);
 	xmit_wait_counter = xmit_wait_get_value_delta(ppd);
-	spin_unlock_irqrestore(&ppd->ibport_data.lock, flags);
+	spin_unlock_irqrestore(&ppd->ibport_data.rvp.lock, flags);
 
 	/* Adjust counters for any resets done. */
 	cntrs.symbol_error_counter -= ibp->z_symbol_error_counter;
@@ -1564,7 +1569,7 @@
 	cntrs.excessive_buffer_overrun_errors -=
 		ibp->z_excessive_buffer_overrun_errors;
 	cntrs.vl15_dropped -= ibp->z_vl15_dropped;
-	cntrs.vl15_dropped += ibp->n_vl15_dropped;
+	cntrs.vl15_dropped += ibp->rvp.n_vl15_dropped;
 	cntrs.port_xmit_data -= ibp->z_port_xmit_data;
 	cntrs.port_rcv_data -= ibp->z_port_rcv_data;
 	cntrs.port_xmit_packets -= ibp->z_port_xmit_packets;
@@ -1743,7 +1748,7 @@
 			cntrs.excessive_buffer_overrun_errors;
 
 	if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) {
-		ibp->n_vl15_dropped = 0;
+		ibp->rvp.n_vl15_dropped = 0;
 		ibp->z_vl15_dropped = cntrs.vl15_dropped;
 	}
 
@@ -1778,11 +1783,11 @@
 	ret = pma_get_portcounters_cong(pmp, ibdev, port);
 
 	if (counter_select & IB_PMA_SEL_CONG_XMIT) {
-		spin_lock_irqsave(&ppd->ibport_data.lock, flags);
+		spin_lock_irqsave(&ppd->ibport_data.rvp.lock, flags);
 		ppd->cong_stats.counter = 0;
 		dd->f_set_cntr_sample(ppd, QIB_CONG_TIMER_PSINTERVAL,
 				      0x0);
-		spin_unlock_irqrestore(&ppd->ibport_data.lock, flags);
+		spin_unlock_irqrestore(&ppd->ibport_data.rvp.lock, flags);
 	}
 	if (counter_select & IB_PMA_SEL_CONG_PORT_DATA) {
 		ibp->z_port_xmit_data = cntrs.port_xmit_data;
@@ -1806,7 +1811,7 @@
 			cntrs.local_link_integrity_errors;
 		ibp->z_excessive_buffer_overrun_errors =
 			cntrs.excessive_buffer_overrun_errors;
-		ibp->n_vl15_dropped = 0;
+		ibp->rvp.n_vl15_dropped = 0;
 		ibp->z_vl15_dropped = cntrs.vl15_dropped;
 	}
 
@@ -1916,12 +1921,12 @@
 			ret = subn_get_vl_arb(smp, ibdev, port);
 			goto bail;
 		case IB_SMP_ATTR_SM_INFO:
-			if (ibp->port_cap_flags & IB_PORT_SM_DISABLED) {
+			if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED) {
 				ret = IB_MAD_RESULT_SUCCESS |
 					IB_MAD_RESULT_CONSUMED;
 				goto bail;
 			}
-			if (ibp->port_cap_flags & IB_PORT_SM) {
+			if (ibp->rvp.port_cap_flags & IB_PORT_SM) {
 				ret = IB_MAD_RESULT_SUCCESS;
 				goto bail;
 			}
@@ -1950,12 +1955,12 @@
 			ret = subn_set_vl_arb(smp, ibdev, port);
 			goto bail;
 		case IB_SMP_ATTR_SM_INFO:
-			if (ibp->port_cap_flags & IB_PORT_SM_DISABLED) {
+			if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED) {
 				ret = IB_MAD_RESULT_SUCCESS |
 					IB_MAD_RESULT_CONSUMED;
 				goto bail;
 			}
-			if (ibp->port_cap_flags & IB_PORT_SM) {
+			if (ibp->rvp.port_cap_flags & IB_PORT_SM) {
 				ret = IB_MAD_RESULT_SUCCESS;
 				goto bail;
 			}
@@ -2443,12 +2448,6 @@
 	return ret;
 }
 
-static void send_handler(struct ib_mad_agent *agent,
-			 struct ib_mad_send_wc *mad_send_wc)
-{
-	ib_free_send_mad(mad_send_wc->send_buf);
-}
-
 static void xmit_wait_timer_func(unsigned long opaque)
 {
 	struct qib_pportdata *ppd = (struct qib_pportdata *)opaque;
@@ -2456,7 +2455,7 @@
 	unsigned long flags;
 	u8 status;
 
-	spin_lock_irqsave(&ppd->ibport_data.lock, flags);
+	spin_lock_irqsave(&ppd->ibport_data.rvp.lock, flags);
 	if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_SAMPLE) {
 		status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT);
 		if (status == IB_PMA_SAMPLE_STATUS_DONE) {
@@ -2469,74 +2468,35 @@
 	ppd->cong_stats.counter = xmit_wait_get_value_delta(ppd);
 	dd->f_set_cntr_sample(ppd, QIB_CONG_TIMER_PSINTERVAL, 0x0);
 done:
-	spin_unlock_irqrestore(&ppd->ibport_data.lock, flags);
+	spin_unlock_irqrestore(&ppd->ibport_data.rvp.lock, flags);
 	mod_timer(&ppd->cong_stats.timer, jiffies + HZ);
 }
 
-int qib_create_agents(struct qib_ibdev *dev)
+void qib_notify_create_mad_agent(struct rvt_dev_info *rdi, int port_idx)
 {
-	struct qib_devdata *dd = dd_from_dev(dev);
-	struct ib_mad_agent *agent;
-	struct qib_ibport *ibp;
-	int p;
-	int ret;
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(ibdev,
+					      struct qib_devdata, verbs_dev);
 
-	for (p = 0; p < dd->num_pports; p++) {
-		ibp = &dd->pport[p].ibport_data;
-		agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI,
-					      NULL, 0, send_handler,
-					      NULL, NULL, 0);
-		if (IS_ERR(agent)) {
-			ret = PTR_ERR(agent);
-			goto err;
-		}
-
-		/* Initialize xmit_wait structure */
-		dd->pport[p].cong_stats.counter = 0;
-		init_timer(&dd->pport[p].cong_stats.timer);
-		dd->pport[p].cong_stats.timer.function = xmit_wait_timer_func;
-		dd->pport[p].cong_stats.timer.data =
-			(unsigned long)(&dd->pport[p]);
-		dd->pport[p].cong_stats.timer.expires = 0;
-		add_timer(&dd->pport[p].cong_stats.timer);
-
-		ibp->send_agent = agent;
-	}
-
-	return 0;
-
-err:
-	for (p = 0; p < dd->num_pports; p++) {
-		ibp = &dd->pport[p].ibport_data;
-		if (ibp->send_agent) {
-			agent = ibp->send_agent;
-			ibp->send_agent = NULL;
-			ib_unregister_mad_agent(agent);
-		}
-	}
-
-	return ret;
+	/* Initialize xmit_wait structure */
+	dd->pport[port_idx].cong_stats.counter = 0;
+	init_timer(&dd->pport[port_idx].cong_stats.timer);
+	dd->pport[port_idx].cong_stats.timer.function = xmit_wait_timer_func;
+	dd->pport[port_idx].cong_stats.timer.data =
+		(unsigned long)(&dd->pport[port_idx]);
+	dd->pport[port_idx].cong_stats.timer.expires = 0;
+	add_timer(&dd->pport[port_idx].cong_stats.timer);
 }
 
-void qib_free_agents(struct qib_ibdev *dev)
+void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx)
 {
-	struct qib_devdata *dd = dd_from_dev(dev);
-	struct ib_mad_agent *agent;
-	struct qib_ibport *ibp;
-	int p;
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(ibdev,
+					      struct qib_devdata, verbs_dev);
 
-	for (p = 0; p < dd->num_pports; p++) {
-		ibp = &dd->pport[p].ibport_data;
-		if (ibp->send_agent) {
-			agent = ibp->send_agent;
-			ibp->send_agent = NULL;
-			ib_unregister_mad_agent(agent);
-		}
-		if (ibp->sm_ah) {
-			ib_destroy_ah(&ibp->sm_ah->ibah);
-			ibp->sm_ah = NULL;
-		}
-		if (dd->pport[p].cong_stats.timer.data)
-			del_timer_sync(&dd->pport[p].cong_stats.timer);
-	}
+	if (dd->pport[port_idx].cong_stats.timer.data)
+		del_timer_sync(&dd->pport[port_idx].cong_stats.timer);
+
+	if (dd->pport[port_idx].ibport_data.smi_ah)
+		ib_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah);
 }

diff --git a/drivers/infiniband/hw/qib/qib_mmap.c b/drivers/infiniband/hw/qib/qib_mmap.c
deleted file mode 100644
index 34927b7..0000000
--- a/drivers/infiniband/hw/qib/qib_mmap.c
+++ /dev/null

@@ -1,174 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-#include <linux/errno.h>
-#include <asm/pgtable.h>
-
-#include "qib_verbs.h"
-
-/**
- * qib_release_mmap_info - free mmap info structure
- * @ref: a pointer to the kref within struct qib_mmap_info
- */
-void qib_release_mmap_info(struct kref *ref)
-{
-	struct qib_mmap_info *ip =
-		container_of(ref, struct qib_mmap_info, ref);
-	struct qib_ibdev *dev = to_idev(ip->context->device);
-
-	spin_lock_irq(&dev->pending_lock);
-	list_del(&ip->pending_mmaps);
-	spin_unlock_irq(&dev->pending_lock);
-
-	vfree(ip->obj);
-	kfree(ip);
-}
-
-/*
- * open and close keep track of how many times the CQ is mapped,
- * to avoid releasing it.
- */
-static void qib_vma_open(struct vm_area_struct *vma)
-{
-	struct qib_mmap_info *ip = vma->vm_private_data;
-
-	kref_get(&ip->ref);
-}
-
-static void qib_vma_close(struct vm_area_struct *vma)
-{
-	struct qib_mmap_info *ip = vma->vm_private_data;
-
-	kref_put(&ip->ref, qib_release_mmap_info);
-}
-
-static const struct vm_operations_struct qib_vm_ops = {
-	.open =     qib_vma_open,
-	.close =    qib_vma_close,
-};
-
-/**
- * qib_mmap - create a new mmap region
- * @context: the IB user context of the process making the mmap() call
- * @vma: the VMA to be initialized
- * Return zero if the mmap is OK. Otherwise, return an errno.
- */
-int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-	struct qib_ibdev *dev = to_idev(context->device);
-	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
-	unsigned long size = vma->vm_end - vma->vm_start;
-	struct qib_mmap_info *ip, *pp;
-	int ret = -EINVAL;
-
-	/*
-	 * Search the device's list of objects waiting for a mmap call.
-	 * Normally, this list is very short since a call to create a
-	 * CQ, QP, or SRQ is soon followed by a call to mmap().
-	 */
-	spin_lock_irq(&dev->pending_lock);
-	list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
-				 pending_mmaps) {
-		/* Only the creator is allowed to mmap the object */
-		if (context != ip->context || (__u64) offset != ip->offset)
-			continue;
-		/* Don't allow a mmap larger than the object. */
-		if (size > ip->size)
-			break;
-
-		list_del_init(&ip->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-
-		ret = remap_vmalloc_range(vma, ip->obj, 0);
-		if (ret)
-			goto done;
-		vma->vm_ops = &qib_vm_ops;
-		vma->vm_private_data = ip;
-		qib_vma_open(vma);
-		goto done;
-	}
-	spin_unlock_irq(&dev->pending_lock);
-done:
-	return ret;
-}
-
-/*
- * Allocate information for qib_mmap
- */
-struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev,
-					   u32 size,
-					   struct ib_ucontext *context,
-					   void *obj) {
-	struct qib_mmap_info *ip;
-
-	ip = kmalloc(sizeof(*ip), GFP_KERNEL);
-	if (!ip)
-		goto bail;
-
-	size = PAGE_ALIGN(size);
-
-	spin_lock_irq(&dev->mmap_offset_lock);
-	if (dev->mmap_offset == 0)
-		dev->mmap_offset = PAGE_SIZE;
-	ip->offset = dev->mmap_offset;
-	dev->mmap_offset += size;
-	spin_unlock_irq(&dev->mmap_offset_lock);
-
-	INIT_LIST_HEAD(&ip->pending_mmaps);
-	ip->size = size;
-	ip->context = context;
-	ip->obj = obj;
-	kref_init(&ip->ref);
-
-bail:
-	return ip;
-}
-
-void qib_update_mmap_info(struct qib_ibdev *dev, struct qib_mmap_info *ip,
-			  u32 size, void *obj)
-{
-	size = PAGE_ALIGN(size);
-
-	spin_lock_irq(&dev->mmap_offset_lock);
-	if (dev->mmap_offset == 0)
-		dev->mmap_offset = PAGE_SIZE;
-	ip->offset = dev->mmap_offset;
-	dev->mmap_offset += size;
-	spin_unlock_irq(&dev->mmap_offset_lock);
-
-	ip->size = size;
-	ip->obj = obj;
-}

diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
deleted file mode 100644
index 5f53304..0000000
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ /dev/null

@@ -1,490 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/ib_umem.h>
-#include <rdma/ib_smi.h>
-
-#include "qib.h"
-
-/* Fast memory region */
-struct qib_fmr {
-	struct ib_fmr ibfmr;
-	struct qib_mregion mr;        /* must be last */
-};
-
-static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr)
-{
-	return container_of(ibfmr, struct qib_fmr, ibfmr);
-}
-
-static int init_qib_mregion(struct qib_mregion *mr, struct ib_pd *pd,
-	int count)
-{
-	int m, i = 0;
-	int rval = 0;
-
-	m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
-	for (; i < m; i++) {
-		mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL);
-		if (!mr->map[i])
-			goto bail;
-	}
-	mr->mapsz = m;
-	init_completion(&mr->comp);
-	/* count returning the ptr to user */
-	atomic_set(&mr->refcount, 1);
-	mr->pd = pd;
-	mr->max_segs = count;
-out:
-	return rval;
-bail:
-	while (i)
-		kfree(mr->map[--i]);
-	rval = -ENOMEM;
-	goto out;
-}
-
-static void deinit_qib_mregion(struct qib_mregion *mr)
-{
-	int i = mr->mapsz;
-
-	mr->mapsz = 0;
-	while (i)
-		kfree(mr->map[--i]);
-}
-
-
-/**
- * qib_get_dma_mr - get a DMA memory region
- * @pd: protection domain for this memory region
- * @acc: access flags
- *
- * Returns the memory region on success, otherwise returns an errno.
- * Note that all DMA addresses should be created via the
- * struct ib_dma_mapping_ops functions (see qib_dma.c).
- */
-struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc)
-{
-	struct qib_mr *mr = NULL;
-	struct ib_mr *ret;
-	int rval;
-
-	if (to_ipd(pd)->user) {
-		ret = ERR_PTR(-EPERM);
-		goto bail;
-	}
-
-	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
-	if (!mr) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	rval = init_qib_mregion(&mr->mr, pd, 0);
-	if (rval) {
-		ret = ERR_PTR(rval);
-		goto bail;
-	}
-
-
-	rval = qib_alloc_lkey(&mr->mr, 1);
-	if (rval) {
-		ret = ERR_PTR(rval);
-		goto bail_mregion;
-	}
-
-	mr->mr.access_flags = acc;
-	ret = &mr->ibmr;
-done:
-	return ret;
-
-bail_mregion:
-	deinit_qib_mregion(&mr->mr);
-bail:
-	kfree(mr);
-	goto done;
-}
-
-static struct qib_mr *alloc_mr(int count, struct ib_pd *pd)
-{
-	struct qib_mr *mr;
-	int rval = -ENOMEM;
-	int m;
-
-	/* Allocate struct plus pointers to first level page tables. */
-	m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
-	mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL);
-	if (!mr)
-		goto bail;
-
-	rval = init_qib_mregion(&mr->mr, pd, count);
-	if (rval)
-		goto bail;
-
-	rval = qib_alloc_lkey(&mr->mr, 0);
-	if (rval)
-		goto bail_mregion;
-	mr->ibmr.lkey = mr->mr.lkey;
-	mr->ibmr.rkey = mr->mr.lkey;
-done:
-	return mr;
-
-bail_mregion:
-	deinit_qib_mregion(&mr->mr);
-bail:
-	kfree(mr);
-	mr = ERR_PTR(rval);
-	goto done;
-}
-
-/**
- * qib_reg_user_mr - register a userspace memory region
- * @pd: protection domain for this memory region
- * @start: starting userspace address
- * @length: length of region to register
- * @mr_access_flags: access flags for this memory region
- * @udata: unused by the QLogic_IB driver
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-			      u64 virt_addr, int mr_access_flags,
-			      struct ib_udata *udata)
-{
-	struct qib_mr *mr;
-	struct ib_umem *umem;
-	struct scatterlist *sg;
-	int n, m, entry;
-	struct ib_mr *ret;
-
-	if (length == 0) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	umem = ib_umem_get(pd->uobject->context, start, length,
-			   mr_access_flags, 0);
-	if (IS_ERR(umem))
-		return (void *) umem;
-
-	n = umem->nmap;
-
-	mr = alloc_mr(n, pd);
-	if (IS_ERR(mr)) {
-		ret = (struct ib_mr *)mr;
-		ib_umem_release(umem);
-		goto bail;
-	}
-
-	mr->mr.user_base = start;
-	mr->mr.iova = virt_addr;
-	mr->mr.length = length;
-	mr->mr.offset = ib_umem_offset(umem);
-	mr->mr.access_flags = mr_access_flags;
-	mr->umem = umem;
-
-	if (is_power_of_2(umem->page_size))
-		mr->mr.page_shift = ilog2(umem->page_size);
-	m = 0;
-	n = 0;
-	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
-			void *vaddr;
-
-			vaddr = page_address(sg_page(sg));
-			if (!vaddr) {
-				ret = ERR_PTR(-EINVAL);
-				goto bail;
-			}
-			mr->mr.map[m]->segs[n].vaddr = vaddr;
-			mr->mr.map[m]->segs[n].length = umem->page_size;
-			n++;
-			if (n == QIB_SEGSZ) {
-				m++;
-				n = 0;
-			}
-	}
-	ret = &mr->ibmr;
-
-bail:
-	return ret;
-}
-
-/**
- * qib_dereg_mr - unregister and free a memory region
- * @ibmr: the memory region to free
- *
- * Returns 0 on success.
- *
- * Note that this is called to free MRs created by qib_get_dma_mr()
- * or qib_reg_user_mr().
- */
-int qib_dereg_mr(struct ib_mr *ibmr)
-{
-	struct qib_mr *mr = to_imr(ibmr);
-	int ret = 0;
-	unsigned long timeout;
-
-	kfree(mr->pages);
-	qib_free_lkey(&mr->mr);
-
-	qib_put_mr(&mr->mr); /* will set completion if last */
-	timeout = wait_for_completion_timeout(&mr->mr.comp,
-		5 * HZ);
-	if (!timeout) {
-		qib_get_mr(&mr->mr);
-		ret = -EBUSY;
-		goto out;
-	}
-	deinit_qib_mregion(&mr->mr);
-	if (mr->umem)
-		ib_umem_release(mr->umem);
-	kfree(mr);
-out:
-	return ret;
-}
-
-/*
- * Allocate a memory region usable with the
- * IB_WR_REG_MR send work request.
- *
- * Return the memory region on success, otherwise return an errno.
- */
-struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
-			   enum ib_mr_type mr_type,
-			   u32 max_num_sg)
-{
-	struct qib_mr *mr;
-
-	if (mr_type != IB_MR_TYPE_MEM_REG)
-		return ERR_PTR(-EINVAL);
-
-	mr = alloc_mr(max_num_sg, pd);
-	if (IS_ERR(mr))
-		return (struct ib_mr *)mr;
-
-	mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
-	if (!mr->pages)
-		goto err;
-
-	return &mr->ibmr;
-
-err:
-	qib_dereg_mr(&mr->ibmr);
-	return ERR_PTR(-ENOMEM);
-}
-
-static int qib_set_page(struct ib_mr *ibmr, u64 addr)
-{
-	struct qib_mr *mr = to_imr(ibmr);
-
-	if (unlikely(mr->npages == mr->mr.max_segs))
-		return -ENOMEM;
-
-	mr->pages[mr->npages++] = addr;
-
-	return 0;
-}
-
-int qib_map_mr_sg(struct ib_mr *ibmr,
-		  struct scatterlist *sg,
-		  int sg_nents)
-{
-	struct qib_mr *mr = to_imr(ibmr);
-
-	mr->npages = 0;
-
-	return ib_sg_to_pages(ibmr, sg, sg_nents, qib_set_page);
-}
-
-/**
- * qib_alloc_fmr - allocate a fast memory region
- * @pd: the protection domain for this memory region
- * @mr_access_flags: access flags for this memory region
- * @fmr_attr: fast memory region attributes
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-			     struct ib_fmr_attr *fmr_attr)
-{
-	struct qib_fmr *fmr;
-	int m;
-	struct ib_fmr *ret;
-	int rval = -ENOMEM;
-
-	/* Allocate struct plus pointers to first level page tables. */
-	m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ;
-	fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL);
-	if (!fmr)
-		goto bail;
-
-	rval = init_qib_mregion(&fmr->mr, pd, fmr_attr->max_pages);
-	if (rval)
-		goto bail;
-
-	/*
-	 * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
-	 * rkey.
-	 */
-	rval = qib_alloc_lkey(&fmr->mr, 0);
-	if (rval)
-		goto bail_mregion;
-	fmr->ibfmr.rkey = fmr->mr.lkey;
-	fmr->ibfmr.lkey = fmr->mr.lkey;
-	/*
-	 * Resources are allocated but no valid mapping (RKEY can't be
-	 * used).
-	 */
-	fmr->mr.access_flags = mr_access_flags;
-	fmr->mr.max_segs = fmr_attr->max_pages;
-	fmr->mr.page_shift = fmr_attr->page_shift;
-
-	ret = &fmr->ibfmr;
-done:
-	return ret;
-
-bail_mregion:
-	deinit_qib_mregion(&fmr->mr);
-bail:
-	kfree(fmr);
-	ret = ERR_PTR(rval);
-	goto done;
-}
-
-/**
- * qib_map_phys_fmr - set up a fast memory region
- * @ibmfr: the fast memory region to set up
- * @page_list: the list of pages to associate with the fast memory region
- * @list_len: the number of pages to associate with the fast memory region
- * @iova: the virtual address of the start of the fast memory region
- *
- * This may be called from interrupt context.
- */
-
-int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
-		     int list_len, u64 iova)
-{
-	struct qib_fmr *fmr = to_ifmr(ibfmr);
-	struct qib_lkey_table *rkt;
-	unsigned long flags;
-	int m, n, i;
-	u32 ps;
-	int ret;
-
-	i = atomic_read(&fmr->mr.refcount);
-	if (i > 2)
-		return -EBUSY;
-
-	if (list_len > fmr->mr.max_segs) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	rkt = &to_idev(ibfmr->device)->lk_table;
-	spin_lock_irqsave(&rkt->lock, flags);
-	fmr->mr.user_base = iova;
-	fmr->mr.iova = iova;
-	ps = 1 << fmr->mr.page_shift;
-	fmr->mr.length = list_len * ps;
-	m = 0;
-	n = 0;
-	for (i = 0; i < list_len; i++) {
-		fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i];
-		fmr->mr.map[m]->segs[n].length = ps;
-		if (++n == QIB_SEGSZ) {
-			m++;
-			n = 0;
-		}
-	}
-	spin_unlock_irqrestore(&rkt->lock, flags);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * qib_unmap_fmr - unmap fast memory regions
- * @fmr_list: the list of fast memory regions to unmap
- *
- * Returns 0 on success.
- */
-int qib_unmap_fmr(struct list_head *fmr_list)
-{
-	struct qib_fmr *fmr;
-	struct qib_lkey_table *rkt;
-	unsigned long flags;
-
-	list_for_each_entry(fmr, fmr_list, ibfmr.list) {
-		rkt = &to_idev(fmr->ibfmr.device)->lk_table;
-		spin_lock_irqsave(&rkt->lock, flags);
-		fmr->mr.user_base = 0;
-		fmr->mr.iova = 0;
-		fmr->mr.length = 0;
-		spin_unlock_irqrestore(&rkt->lock, flags);
-	}
-	return 0;
-}
-
-/**
- * qib_dealloc_fmr - deallocate a fast memory region
- * @ibfmr: the fast memory region to deallocate
- *
- * Returns 0 on success.
- */
-int qib_dealloc_fmr(struct ib_fmr *ibfmr)
-{
-	struct qib_fmr *fmr = to_ifmr(ibfmr);
-	int ret = 0;
-	unsigned long timeout;
-
-	qib_free_lkey(&fmr->mr);
-	qib_put_mr(&fmr->mr); /* will set completion if last */
-	timeout = wait_for_completion_timeout(&fmr->mr.comp,
-		5 * HZ);
-	if (!timeout) {
-		qib_get_mr(&fmr->mr);
-		ret = -EBUSY;
-		goto out;
-	}
-	deinit_qib_mregion(&fmr->mr);
-	kfree(fmr);
-out:
-	return ret;
-}
-
-void mr_rcu_callback(struct rcu_head *list)
-{
-	struct qib_mregion *mr = container_of(list, struct qib_mregion, list);
-
-	complete(&mr->comp);
-}

diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 3eff35c..575b737 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c

@@ -34,32 +34,38 @@
 
 #include <linux/err.h>
 #include <linux/vmalloc.h>
-#include <linux/jhash.h>
+#include <rdma/rdma_vt.h>
 #ifdef CONFIG_DEBUG_FS
 #include <linux/seq_file.h>
 #endif
 
 #include "qib.h"
 
-#define BITS_PER_PAGE           (PAGE_SIZE*BITS_PER_BYTE)
-#define BITS_PER_PAGE_MASK      (BITS_PER_PAGE-1)
+/*
+ * mask field which was present in now deleted qib_qpn_table
+ * is not present in rvt_qpn_table. Defining the same field
+ * as qpt_mask here instead of adding the mask field to
+ * rvt_qpn_table.
+ */
+u16 qpt_mask;
 
-static inline unsigned mk_qpn(struct qib_qpn_table *qpt,
-			      struct qpn_map *map, unsigned off)
+static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
+			      struct rvt_qpn_map *map, unsigned off)
 {
-	return (map - qpt->map) * BITS_PER_PAGE + off;
+	return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
 }
 
-static inline unsigned find_next_offset(struct qib_qpn_table *qpt,
-					struct qpn_map *map, unsigned off,
+static inline unsigned find_next_offset(struct rvt_qpn_table *qpt,
+					struct rvt_qpn_map *map, unsigned off,
 					unsigned n)
 {
-	if (qpt->mask) {
+	if (qpt_mask) {
 		off++;
-		if (((off & qpt->mask) >> 1) >= n)
-			off = (off | qpt->mask) + 2;
-	} else
-		off = find_next_zero_bit(map->page, BITS_PER_PAGE, off);
+		if (((off & qpt_mask) >> 1) >= n)
+			off = (off | qpt_mask) + 2;
+	} else {
+		off = find_next_zero_bit(map->page, RVT_BITS_PER_PAGE, off);
+	}
 	return off;
 }
 
@@ -100,7 +106,7 @@
 	32768                   /* 1E */
 };
 
-static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map,
+static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map,
 			 gfp_t gfp)
 {
 	unsigned long page = get_zeroed_page(gfp);
@@ -121,12 +127,15 @@
  * Allocate the next available QPN or
  * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
  */
-static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
-		     enum ib_qp_type type, u8 port, gfp_t gfp)
+int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
+		  enum ib_qp_type type, u8 port, gfp_t gfp)
 {
 	u32 i, offset, max_scan, qpn;
-	struct qpn_map *map;
+	struct rvt_qpn_map *map;
 	u32 ret;
+	struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata,
+					      verbs_dev);
 
 	if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
 		unsigned n;
@@ -143,12 +152,12 @@
 	}
 
 	qpn = qpt->last + 2;
-	if (qpn >= QPN_MAX)
+	if (qpn >= RVT_QPN_MAX)
 		qpn = 2;
-	if (qpt->mask && ((qpn & qpt->mask) >> 1) >= dd->n_krcv_queues)
-		qpn = (qpn | qpt->mask) + 2;
-	offset = qpn & BITS_PER_PAGE_MASK;
-	map = &qpt->map[qpn / BITS_PER_PAGE];
+	if (qpt_mask && ((qpn & qpt_mask) >> 1) >= dd->n_krcv_queues)
+		qpn = (qpn | qpt_mask) + 2;
+	offset = qpn & RVT_BITS_PER_PAGE_MASK;
+	map = &qpt->map[qpn / RVT_BITS_PER_PAGE];
 	max_scan = qpt->nmaps - !offset;
 	for (i = 0;;) {
 		if (unlikely(!map->page)) {
@@ -173,14 +182,14 @@
 			 * We just need to be sure we don't loop
 			 * forever.
 			 */
-		} while (offset < BITS_PER_PAGE && qpn < QPN_MAX);
+		} while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX);
 		/*
 		 * In order to keep the number of pages allocated to a
 		 * minimum, we scan the all existing pages before increasing
 		 * the size of the bitmap table.
 		 */
 		if (++i > max_scan) {
-			if (qpt->nmaps == QPNMAP_ENTRIES)
+			if (qpt->nmaps == RVT_QPNMAP_ENTRIES)
 				break;
 			map = &qpt->map[qpt->nmaps++];
 			offset = 0;
@@ -200,706 +209,113 @@
 	return ret;
 }
 
-static void free_qpn(struct qib_qpn_table *qpt, u32 qpn)
-{
-	struct qpn_map *map;
-
-	map = qpt->map + qpn / BITS_PER_PAGE;
-	if (map->page)
-		clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
-}
-
-static inline unsigned qpn_hash(struct qib_ibdev *dev, u32 qpn)
-{
-	return jhash_1word(qpn, dev->qp_rnd) &
-		(dev->qp_table_size - 1);
-}
-
-
-/*
- * Put the QP into the hash table.
- * The hash table holds a reference to the QP.
- */
-static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp)
-{
-	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-	unsigned long flags;
-	unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
-
-	atomic_inc(&qp->refcount);
-	spin_lock_irqsave(&dev->qpt_lock, flags);
-
-	if (qp->ibqp.qp_num == 0)
-		rcu_assign_pointer(ibp->qp0, qp);
-	else if (qp->ibqp.qp_num == 1)
-		rcu_assign_pointer(ibp->qp1, qp);
-	else {
-		qp->next = dev->qp_table[n];
-		rcu_assign_pointer(dev->qp_table[n], qp);
-	}
-
-	spin_unlock_irqrestore(&dev->qpt_lock, flags);
-}
-
-/*
- * Remove the QP from the table so it can't be found asynchronously by
- * the receive interrupt routine.
- */
-static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp)
-{
-	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-	unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
-	unsigned long flags;
-	int removed = 1;
-
-	spin_lock_irqsave(&dev->qpt_lock, flags);
-
-	if (rcu_dereference_protected(ibp->qp0,
-			lockdep_is_held(&dev->qpt_lock)) == qp) {
-		RCU_INIT_POINTER(ibp->qp0, NULL);
-	} else if (rcu_dereference_protected(ibp->qp1,
-			lockdep_is_held(&dev->qpt_lock)) == qp) {
-		RCU_INIT_POINTER(ibp->qp1, NULL);
-	} else {
-		struct qib_qp *q;
-		struct qib_qp __rcu **qpp;
-
-		removed = 0;
-		qpp = &dev->qp_table[n];
-		for (; (q = rcu_dereference_protected(*qpp,
-				lockdep_is_held(&dev->qpt_lock))) != NULL;
-				qpp = &q->next)
-			if (q == qp) {
-				RCU_INIT_POINTER(*qpp,
-					rcu_dereference_protected(qp->next,
-					 lockdep_is_held(&dev->qpt_lock)));
-				removed = 1;
-				break;
-			}
-	}
-
-	spin_unlock_irqrestore(&dev->qpt_lock, flags);
-	if (removed) {
-		synchronize_rcu();
-		atomic_dec(&qp->refcount);
-	}
-}
-
 /**
  * qib_free_all_qps - check for QPs still in use
- * @qpt: the QP table to empty
- *
- * There should not be any QPs still in use.
- * Free memory for table.
  */
-unsigned qib_free_all_qps(struct qib_devdata *dd)
+unsigned qib_free_all_qps(struct rvt_dev_info *rdi)
 {
-	struct qib_ibdev *dev = &dd->verbs_dev;
-	unsigned long flags;
-	struct qib_qp *qp;
+	struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata,
+					      verbs_dev);
 	unsigned n, qp_inuse = 0;
 
 	for (n = 0; n < dd->num_pports; n++) {
 		struct qib_ibport *ibp = &dd->pport[n].ibport_data;
 
-		if (!qib_mcast_tree_empty(ibp))
-			qp_inuse++;
 		rcu_read_lock();
-		if (rcu_dereference(ibp->qp0))
+		if (rcu_dereference(ibp->rvp.qp[0]))
 			qp_inuse++;
-		if (rcu_dereference(ibp->qp1))
+		if (rcu_dereference(ibp->rvp.qp[1]))
 			qp_inuse++;
 		rcu_read_unlock();
 	}
-
-	spin_lock_irqsave(&dev->qpt_lock, flags);
-	for (n = 0; n < dev->qp_table_size; n++) {
-		qp = rcu_dereference_protected(dev->qp_table[n],
-			lockdep_is_held(&dev->qpt_lock));
-		RCU_INIT_POINTER(dev->qp_table[n], NULL);
-
-		for (; qp; qp = rcu_dereference_protected(qp->next,
-					lockdep_is_held(&dev->qpt_lock)))
-			qp_inuse++;
-	}
-	spin_unlock_irqrestore(&dev->qpt_lock, flags);
-	synchronize_rcu();
-
 	return qp_inuse;
 }
 
-/**
- * qib_lookup_qpn - return the QP with the given QPN
- * @qpt: the QP table
- * @qpn: the QP number to look up
- *
- * The caller is responsible for decrementing the QP reference count
- * when done.
- */
-struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn)
+void qib_notify_qp_reset(struct rvt_qp *qp)
 {
-	struct qib_qp *qp = NULL;
+	struct qib_qp_priv *priv = qp->priv;
 
-	rcu_read_lock();
-	if (unlikely(qpn <= 1)) {
-		if (qpn == 0)
-			qp = rcu_dereference(ibp->qp0);
-		else
-			qp = rcu_dereference(ibp->qp1);
-		if (qp)
-			atomic_inc(&qp->refcount);
-	} else {
-		struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;
-		unsigned n = qpn_hash(dev, qpn);
-
-		for (qp = rcu_dereference(dev->qp_table[n]); qp;
-			qp = rcu_dereference(qp->next))
-			if (qp->ibqp.qp_num == qpn) {
-				atomic_inc(&qp->refcount);
-				break;
-			}
-	}
-	rcu_read_unlock();
-	return qp;
+	atomic_set(&priv->s_dma_busy, 0);
 }
 
-/**
- * qib_reset_qp - initialize the QP state to the reset state
- * @qp: the QP to reset
- * @type: the QP type
- */
-static void qib_reset_qp(struct qib_qp *qp, enum ib_qp_type type)
+void qib_notify_error_qp(struct rvt_qp *qp)
 {
-	qp->remote_qpn = 0;
-	qp->qkey = 0;
-	qp->qp_access_flags = 0;
-	atomic_set(&qp->s_dma_busy, 0);
-	qp->s_flags &= QIB_S_SIGNAL_REQ_WR;
-	qp->s_hdrwords = 0;
-	qp->s_wqe = NULL;
-	qp->s_draining = 0;
-	qp->s_next_psn = 0;
-	qp->s_last_psn = 0;
-	qp->s_sending_psn = 0;
-	qp->s_sending_hpsn = 0;
-	qp->s_psn = 0;
-	qp->r_psn = 0;
-	qp->r_msn = 0;
-	if (type == IB_QPT_RC) {
-		qp->s_state = IB_OPCODE_RC_SEND_LAST;
-		qp->r_state = IB_OPCODE_RC_SEND_LAST;
-	} else {
-		qp->s_state = IB_OPCODE_UC_SEND_LAST;
-		qp->r_state = IB_OPCODE_UC_SEND_LAST;
-	}
-	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
-	qp->r_nak_state = 0;
-	qp->r_aflags = 0;
-	qp->r_flags = 0;
-	qp->s_head = 0;
-	qp->s_tail = 0;
-	qp->s_cur = 0;
-	qp->s_acked = 0;
-	qp->s_last = 0;
-	qp->s_ssn = 1;
-	qp->s_lsn = 0;
-	qp->s_mig_state = IB_MIG_MIGRATED;
-	memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
-	qp->r_head_ack_queue = 0;
-	qp->s_tail_ack_queue = 0;
-	qp->s_num_rd_atomic = 0;
-	if (qp->r_rq.wq) {
-		qp->r_rq.wq->head = 0;
-		qp->r_rq.wq->tail = 0;
-	}
-	qp->r_sge.num_sge = 0;
-}
-
-static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
-{
-	unsigned n;
-
-	if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
-		qib_put_ss(&qp->s_rdma_read_sge);
-
-	qib_put_ss(&qp->r_sge);
-
-	if (clr_sends) {
-		while (qp->s_last != qp->s_head) {
-			struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
-			unsigned i;
-
-			for (i = 0; i < wqe->wr.num_sge; i++) {
-				struct qib_sge *sge = &wqe->sg_list[i];
-
-				qib_put_mr(sge->mr);
-			}
-			if (qp->ibqp.qp_type == IB_QPT_UD ||
-			    qp->ibqp.qp_type == IB_QPT_SMI ||
-			    qp->ibqp.qp_type == IB_QPT_GSI)
-				atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
-			if (++qp->s_last >= qp->s_size)
-				qp->s_last = 0;
-		}
-		if (qp->s_rdma_mr) {
-			qib_put_mr(qp->s_rdma_mr);
-			qp->s_rdma_mr = NULL;
-		}
-	}
-
-	if (qp->ibqp.qp_type != IB_QPT_RC)
-		return;
-
-	for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) {
-		struct qib_ack_entry *e = &qp->s_ack_queue[n];
-
-		if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
-		    e->rdma_sge.mr) {
-			qib_put_mr(e->rdma_sge.mr);
-			e->rdma_sge.mr = NULL;
-		}
-	}
-}
-
-/**
- * qib_error_qp - put a QP into the error state
- * @qp: the QP to put into the error state
- * @err: the receive completion error to signal if a RWQE is active
- *
- * Flushes both send and receive work queues.
- * Returns true if last WQE event should be generated.
- * The QP r_lock and s_lock should be held and interrupts disabled.
- * If we are already in error state, just return.
- */
-int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err)
-{
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ib_wc wc;
-	int ret = 0;
 
-	if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
-		goto bail;
-
-	qp->state = IB_QPS_ERR;
-
-	if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
-		qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
-		del_timer(&qp->s_timer);
+	spin_lock(&dev->rdi.pending_lock);
+	if (!list_empty(&priv->iowait) && !(qp->s_flags & RVT_S_BUSY)) {
+		qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
+		list_del_init(&priv->iowait);
 	}
+	spin_unlock(&dev->rdi.pending_lock);
 
-	if (qp->s_flags & QIB_S_ANY_WAIT_SEND)
-		qp->s_flags &= ~QIB_S_ANY_WAIT_SEND;
-
-	spin_lock(&dev->pending_lock);
-	if (!list_empty(&qp->iowait) && !(qp->s_flags & QIB_S_BUSY)) {
-		qp->s_flags &= ~QIB_S_ANY_WAIT_IO;
-		list_del_init(&qp->iowait);
-	}
-	spin_unlock(&dev->pending_lock);
-
-	if (!(qp->s_flags & QIB_S_BUSY)) {
+	if (!(qp->s_flags & RVT_S_BUSY)) {
 		qp->s_hdrwords = 0;
 		if (qp->s_rdma_mr) {
-			qib_put_mr(qp->s_rdma_mr);
+			rvt_put_mr(qp->s_rdma_mr);
 			qp->s_rdma_mr = NULL;
 		}
-		if (qp->s_tx) {
-			qib_put_txreq(qp->s_tx);
-			qp->s_tx = NULL;
+		if (priv->s_tx) {
+			qib_put_txreq(priv->s_tx);
+			priv->s_tx = NULL;
 		}
 	}
-
-	/* Schedule the sending tasklet to drain the send work queue. */
-	if (qp->s_last != qp->s_head)
-		qib_schedule_send(qp);
-
-	clear_mr_refs(qp, 0);
-
-	memset(&wc, 0, sizeof(wc));
-	wc.qp = &qp->ibqp;
-	wc.opcode = IB_WC_RECV;
-
-	if (test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) {
-		wc.wr_id = qp->r_wr_id;
-		wc.status = err;
-		qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-	}
-	wc.status = IB_WC_WR_FLUSH_ERR;
-
-	if (qp->r_rq.wq) {
-		struct qib_rwq *wq;
-		u32 head;
-		u32 tail;
-
-		spin_lock(&qp->r_rq.lock);
-
-		/* sanity check pointers before trusting them */
-		wq = qp->r_rq.wq;
-		head = wq->head;
-		if (head >= qp->r_rq.size)
-			head = 0;
-		tail = wq->tail;
-		if (tail >= qp->r_rq.size)
-			tail = 0;
-		while (tail != head) {
-			wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
-			if (++tail >= qp->r_rq.size)
-				tail = 0;
-			qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-		}
-		wq->tail = tail;
-
-		spin_unlock(&qp->r_rq.lock);
-	} else if (qp->ibqp.event_handler)
-		ret = 1;
-
-bail:
-	return ret;
 }
 
-/**
- * qib_modify_qp - modify the attributes of a queue pair
- * @ibqp: the queue pair who's attributes we're modifying
- * @attr: the new attributes
- * @attr_mask: the mask of attributes to modify
- * @udata: user data for libibverbs.so
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		  int attr_mask, struct ib_udata *udata)
+static int mtu_to_enum(u32 mtu)
 {
-	struct qib_ibdev *dev = to_idev(ibqp->device);
-	struct qib_qp *qp = to_iqp(ibqp);
-	enum ib_qp_state cur_state, new_state;
-	struct ib_event ev;
-	int lastwqe = 0;
-	int mig = 0;
-	int ret;
-	u32 pmtu = 0; /* for gcc warning only */
+	int enum_mtu;
 
-	spin_lock_irq(&qp->r_lock);
-	spin_lock(&qp->s_lock);
-
-	cur_state = attr_mask & IB_QP_CUR_STATE ?
-		attr->cur_qp_state : qp->state;
-	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
-
-	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
-				attr_mask, IB_LINK_LAYER_UNSPECIFIED))
-		goto inval;
-
-	if (attr_mask & IB_QP_AV) {
-		if (attr->ah_attr.dlid >= QIB_MULTICAST_LID_BASE)
-			goto inval;
-		if (qib_check_ah(qp->ibqp.device, &attr->ah_attr))
-			goto inval;
-	}
-
-	if (attr_mask & IB_QP_ALT_PATH) {
-		if (attr->alt_ah_attr.dlid >= QIB_MULTICAST_LID_BASE)
-			goto inval;
-		if (qib_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
-			goto inval;
-		if (attr->alt_pkey_index >= qib_get_npkeys(dd_from_dev(dev)))
-			goto inval;
-	}
-
-	if (attr_mask & IB_QP_PKEY_INDEX)
-		if (attr->pkey_index >= qib_get_npkeys(dd_from_dev(dev)))
-			goto inval;
-
-	if (attr_mask & IB_QP_MIN_RNR_TIMER)
-		if (attr->min_rnr_timer > 31)
-			goto inval;
-
-	if (attr_mask & IB_QP_PORT)
-		if (qp->ibqp.qp_type == IB_QPT_SMI ||
-		    qp->ibqp.qp_type == IB_QPT_GSI ||
-		    attr->port_num == 0 ||
-		    attr->port_num > ibqp->device->phys_port_cnt)
-			goto inval;
-
-	if (attr_mask & IB_QP_DEST_QPN)
-		if (attr->dest_qp_num > QIB_QPN_MASK)
-			goto inval;
-
-	if (attr_mask & IB_QP_RETRY_CNT)
-		if (attr->retry_cnt > 7)
-			goto inval;
-
-	if (attr_mask & IB_QP_RNR_RETRY)
-		if (attr->rnr_retry > 7)
-			goto inval;
-
-	/*
-	 * Don't allow invalid path_mtu values.  OK to set greater
-	 * than the active mtu (or even the max_cap, if we have tuned
-	 * that to a small mtu.  We'll set qp->path_mtu
-	 * to the lesser of requested attribute mtu and active,
-	 * for packetizing messages.
-	 * Note that the QP port has to be set in INIT and MTU in RTR.
-	 */
-	if (attr_mask & IB_QP_PATH_MTU) {
-		struct qib_devdata *dd = dd_from_dev(dev);
-		int mtu, pidx = qp->port_num - 1;
-
-		mtu = ib_mtu_enum_to_int(attr->path_mtu);
-		if (mtu == -1)
-			goto inval;
-		if (mtu > dd->pport[pidx].ibmtu) {
-			switch (dd->pport[pidx].ibmtu) {
-			case 4096:
-				pmtu = IB_MTU_4096;
-				break;
-			case 2048:
-				pmtu = IB_MTU_2048;
-				break;
-			case 1024:
-				pmtu = IB_MTU_1024;
-				break;
-			case 512:
-				pmtu = IB_MTU_512;
-				break;
-			case 256:
-				pmtu = IB_MTU_256;
-				break;
-			default:
-				pmtu = IB_MTU_2048;
-			}
-		} else
-			pmtu = attr->path_mtu;
-	}
-
-	if (attr_mask & IB_QP_PATH_MIG_STATE) {
-		if (attr->path_mig_state == IB_MIG_REARM) {
-			if (qp->s_mig_state == IB_MIG_ARMED)
-				goto inval;
-			if (new_state != IB_QPS_RTS)
-				goto inval;
-		} else if (attr->path_mig_state == IB_MIG_MIGRATED) {
-			if (qp->s_mig_state == IB_MIG_REARM)
-				goto inval;
-			if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
-				goto inval;
-			if (qp->s_mig_state == IB_MIG_ARMED)
-				mig = 1;
-		} else
-			goto inval;
-	}
-
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-		if (attr->max_dest_rd_atomic > QIB_MAX_RDMA_ATOMIC)
-			goto inval;
-
-	switch (new_state) {
-	case IB_QPS_RESET:
-		if (qp->state != IB_QPS_RESET) {
-			qp->state = IB_QPS_RESET;
-			spin_lock(&dev->pending_lock);
-			if (!list_empty(&qp->iowait))
-				list_del_init(&qp->iowait);
-			spin_unlock(&dev->pending_lock);
-			qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT);
-			spin_unlock(&qp->s_lock);
-			spin_unlock_irq(&qp->r_lock);
-			/* Stop the sending work queue and retry timer */
-			cancel_work_sync(&qp->s_work);
-			del_timer_sync(&qp->s_timer);
-			wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
-			if (qp->s_tx) {
-				qib_put_txreq(qp->s_tx);
-				qp->s_tx = NULL;
-			}
-			remove_qp(dev, qp);
-			wait_event(qp->wait, !atomic_read(&qp->refcount));
-			spin_lock_irq(&qp->r_lock);
-			spin_lock(&qp->s_lock);
-			clear_mr_refs(qp, 1);
-			qib_reset_qp(qp, ibqp->qp_type);
-		}
+	switch (mtu) {
+	case 4096:
+		enum_mtu = IB_MTU_4096;
 		break;
-
-	case IB_QPS_RTR:
-		/* Allow event to retrigger if QP set to RTR more than once */
-		qp->r_flags &= ~QIB_R_COMM_EST;
-		qp->state = new_state;
+	case 2048:
+		enum_mtu = IB_MTU_2048;
 		break;
-
-	case IB_QPS_SQD:
-		qp->s_draining = qp->s_last != qp->s_cur;
-		qp->state = new_state;
+	case 1024:
+		enum_mtu = IB_MTU_1024;
 		break;
-
-	case IB_QPS_SQE:
-		if (qp->ibqp.qp_type == IB_QPT_RC)
-			goto inval;
-		qp->state = new_state;
+	case 512:
+		enum_mtu = IB_MTU_512;
 		break;
-
-	case IB_QPS_ERR:
-		lastwqe = qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+	case 256:
+		enum_mtu = IB_MTU_256;
 		break;
-
 	default:
-		qp->state = new_state;
-		break;
+		enum_mtu = IB_MTU_2048;
 	}
-
-	if (attr_mask & IB_QP_PKEY_INDEX)
-		qp->s_pkey_index = attr->pkey_index;
-
-	if (attr_mask & IB_QP_PORT)
-		qp->port_num = attr->port_num;
-
-	if (attr_mask & IB_QP_DEST_QPN)
-		qp->remote_qpn = attr->dest_qp_num;
-
-	if (attr_mask & IB_QP_SQ_PSN) {
-		qp->s_next_psn = attr->sq_psn & QIB_PSN_MASK;
-		qp->s_psn = qp->s_next_psn;
-		qp->s_sending_psn = qp->s_next_psn;
-		qp->s_last_psn = qp->s_next_psn - 1;
-		qp->s_sending_hpsn = qp->s_last_psn;
-	}
-
-	if (attr_mask & IB_QP_RQ_PSN)
-		qp->r_psn = attr->rq_psn & QIB_PSN_MASK;
-
-	if (attr_mask & IB_QP_ACCESS_FLAGS)
-		qp->qp_access_flags = attr->qp_access_flags;
-
-	if (attr_mask & IB_QP_AV) {
-		qp->remote_ah_attr = attr->ah_attr;
-		qp->s_srate = attr->ah_attr.static_rate;
-	}
-
-	if (attr_mask & IB_QP_ALT_PATH) {
-		qp->alt_ah_attr = attr->alt_ah_attr;
-		qp->s_alt_pkey_index = attr->alt_pkey_index;
-	}
-
-	if (attr_mask & IB_QP_PATH_MIG_STATE) {
-		qp->s_mig_state = attr->path_mig_state;
-		if (mig) {
-			qp->remote_ah_attr = qp->alt_ah_attr;
-			qp->port_num = qp->alt_ah_attr.port_num;
-			qp->s_pkey_index = qp->s_alt_pkey_index;
-		}
-	}
-
-	if (attr_mask & IB_QP_PATH_MTU) {
-		qp->path_mtu = pmtu;
-		qp->pmtu = ib_mtu_enum_to_int(pmtu);
-	}
-
-	if (attr_mask & IB_QP_RETRY_CNT) {
-		qp->s_retry_cnt = attr->retry_cnt;
-		qp->s_retry = attr->retry_cnt;
-	}
-
-	if (attr_mask & IB_QP_RNR_RETRY) {
-		qp->s_rnr_retry_cnt = attr->rnr_retry;
-		qp->s_rnr_retry = attr->rnr_retry;
-	}
-
-	if (attr_mask & IB_QP_MIN_RNR_TIMER)
-		qp->r_min_rnr_timer = attr->min_rnr_timer;
-
-	if (attr_mask & IB_QP_TIMEOUT) {
-		qp->timeout = attr->timeout;
-		qp->timeout_jiffies =
-			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
-				1000UL);
-	}
-
-	if (attr_mask & IB_QP_QKEY)
-		qp->qkey = attr->qkey;
-
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-		qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
-
-	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
-		qp->s_max_rd_atomic = attr->max_rd_atomic;
-
-	spin_unlock(&qp->s_lock);
-	spin_unlock_irq(&qp->r_lock);
-
-	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
-		insert_qp(dev, qp);
-
-	if (lastwqe) {
-		ev.device = qp->ibqp.device;
-		ev.element.qp = &qp->ibqp;
-		ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-	}
-	if (mig) {
-		ev.device = qp->ibqp.device;
-		ev.element.qp = &qp->ibqp;
-		ev.event = IB_EVENT_PATH_MIG;
-		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-	}
-	ret = 0;
-	goto bail;
-
-inval:
-	spin_unlock(&qp->s_lock);
-	spin_unlock_irq(&qp->r_lock);
-	ret = -EINVAL;
-
-bail:
-	return ret;
+	return enum_mtu;
 }
 
-int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		 int attr_mask, struct ib_qp_init_attr *init_attr)
+int qib_get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+			   struct ib_qp_attr *attr)
 {
-	struct qib_qp *qp = to_iqp(ibqp);
+	int mtu, pmtu, pidx = qp->port_num - 1;
+	struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata,
+					      verbs_dev);
+	mtu = ib_mtu_enum_to_int(attr->path_mtu);
+	if (mtu == -1)
+		return -EINVAL;
 
-	attr->qp_state = qp->state;
-	attr->cur_qp_state = attr->qp_state;
-	attr->path_mtu = qp->path_mtu;
-	attr->path_mig_state = qp->s_mig_state;
-	attr->qkey = qp->qkey;
-	attr->rq_psn = qp->r_psn & QIB_PSN_MASK;
-	attr->sq_psn = qp->s_next_psn & QIB_PSN_MASK;
-	attr->dest_qp_num = qp->remote_qpn;
-	attr->qp_access_flags = qp->qp_access_flags;
-	attr->cap.max_send_wr = qp->s_size - 1;
-	attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
-	attr->cap.max_send_sge = qp->s_max_sge;
-	attr->cap.max_recv_sge = qp->r_rq.max_sge;
-	attr->cap.max_inline_data = 0;
-	attr->ah_attr = qp->remote_ah_attr;
-	attr->alt_ah_attr = qp->alt_ah_attr;
-	attr->pkey_index = qp->s_pkey_index;
-	attr->alt_pkey_index = qp->s_alt_pkey_index;
-	attr->en_sqd_async_notify = 0;
-	attr->sq_draining = qp->s_draining;
-	attr->max_rd_atomic = qp->s_max_rd_atomic;
-	attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
-	attr->min_rnr_timer = qp->r_min_rnr_timer;
-	attr->port_num = qp->port_num;
-	attr->timeout = qp->timeout;
-	attr->retry_cnt = qp->s_retry_cnt;
-	attr->rnr_retry = qp->s_rnr_retry_cnt;
-	attr->alt_port_num = qp->alt_ah_attr.port_num;
-	attr->alt_timeout = qp->alt_timeout;
-
-	init_attr->event_handler = qp->ibqp.event_handler;
-	init_attr->qp_context = qp->ibqp.qp_context;
-	init_attr->send_cq = qp->ibqp.send_cq;
-	init_attr->recv_cq = qp->ibqp.recv_cq;
-	init_attr->srq = qp->ibqp.srq;
-	init_attr->cap = attr->cap;
-	if (qp->s_flags & QIB_S_SIGNAL_REQ_WR)
-		init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
+	if (mtu > dd->pport[pidx].ibmtu)
+		pmtu = mtu_to_enum(dd->pport[pidx].ibmtu);
 	else
-		init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
-	init_attr->qp_type = qp->ibqp.qp_type;
-	init_attr->port_num = qp->port_num;
-	return 0;
+		pmtu = attr->path_mtu;
+	return pmtu;
+}
+
+int qib_mtu_to_path_mtu(u32 mtu)
+{
+	return mtu_to_enum(mtu);
+}
+
+u32 qib_mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
+{
+	return ib_mtu_enum_to_int(pmtu);
 }
 
 /**
@@ -908,7 +324,7 @@
  *
  * Returns the AETH.
  */
-__be32 qib_compute_aeth(struct qib_qp *qp)
+__be32 qib_compute_aeth(struct rvt_qp *qp)
 {
 	u32 aeth = qp->r_msn & QIB_MSN_MASK;
 
@@ -921,7 +337,7 @@
 	} else {
 		u32 min, max, x;
 		u32 credits;
-		struct qib_rwq *wq = qp->r_rq.wq;
+		struct rvt_rwq *wq = qp->r_rq.wq;
 		u32 head;
 		u32 tail;
 
@@ -962,315 +378,63 @@
 	return cpu_to_be32(aeth);
 }
 
-/**
- * qib_create_qp - create a queue pair for a device
- * @ibpd: the protection domain who's device we create the queue pair for
- * @init_attr: the attributes of the queue pair
- * @udata: user data for libibverbs.so
- *
- * Returns the queue pair on success, otherwise returns an errno.
- *
- * Called by the ib_create_qp() core verbs function.
- */
-struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
-			    struct ib_qp_init_attr *init_attr,
-			    struct ib_udata *udata)
+void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp)
 {
-	struct qib_qp *qp;
-	int err;
-	struct qib_swqe *swq = NULL;
-	struct qib_ibdev *dev;
-	struct qib_devdata *dd;
-	size_t sz;
-	size_t sg_list_sz;
-	struct ib_qp *ret;
-	gfp_t gfp;
+	struct qib_qp_priv *priv;
 
+	priv = kzalloc(sizeof(*priv), gfp);
+	if (!priv)
+		return ERR_PTR(-ENOMEM);
+	priv->owner = qp;
 
-	if (init_attr->cap.max_send_sge > ib_qib_max_sges ||
-	    init_attr->cap.max_send_wr > ib_qib_max_qp_wrs ||
-	    init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO))
-		return ERR_PTR(-EINVAL);
-
-	/* GFP_NOIO is applicable in RC QPs only */
-	if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO &&
-	    init_attr->qp_type != IB_QPT_RC)
-		return ERR_PTR(-EINVAL);
-
-	gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ?
-			GFP_NOIO : GFP_KERNEL;
-
-	/* Check receive queue parameters if no SRQ is specified. */
-	if (!init_attr->srq) {
-		if (init_attr->cap.max_recv_sge > ib_qib_max_sges ||
-		    init_attr->cap.max_recv_wr > ib_qib_max_qp_wrs) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-		if (init_attr->cap.max_send_sge +
-		    init_attr->cap.max_send_wr +
-		    init_attr->cap.max_recv_sge +
-		    init_attr->cap.max_recv_wr == 0) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
+	priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), gfp);
+	if (!priv->s_hdr) {
+		kfree(priv);
+		return ERR_PTR(-ENOMEM);
 	}
+	init_waitqueue_head(&priv->wait_dma);
+	INIT_WORK(&priv->s_work, _qib_do_send);
+	INIT_LIST_HEAD(&priv->iowait);
 
-	switch (init_attr->qp_type) {
-	case IB_QPT_SMI:
-	case IB_QPT_GSI:
-		if (init_attr->port_num == 0 ||
-		    init_attr->port_num > ibpd->device->phys_port_cnt) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-	case IB_QPT_UC:
-	case IB_QPT_RC:
-	case IB_QPT_UD:
-		sz = sizeof(struct qib_sge) *
-			init_attr->cap.max_send_sge +
-			sizeof(struct qib_swqe);
-		swq = __vmalloc((init_attr->cap.max_send_wr + 1) * sz,
-				gfp, PAGE_KERNEL);
-		if (swq == NULL) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail;
-		}
-		sz = sizeof(*qp);
-		sg_list_sz = 0;
-		if (init_attr->srq) {
-			struct qib_srq *srq = to_isrq(init_attr->srq);
-
-			if (srq->rq.max_sge > 1)
-				sg_list_sz = sizeof(*qp->r_sg_list) *
-					(srq->rq.max_sge - 1);
-		} else if (init_attr->cap.max_recv_sge > 1)
-			sg_list_sz = sizeof(*qp->r_sg_list) *
-				(init_attr->cap.max_recv_sge - 1);
-		qp = kzalloc(sz + sg_list_sz, gfp);
-		if (!qp) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_swq;
-		}
-		RCU_INIT_POINTER(qp->next, NULL);
-		qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), gfp);
-		if (!qp->s_hdr) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_qp;
-		}
-		qp->timeout_jiffies =
-			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
-				1000UL);
-		if (init_attr->srq)
-			sz = 0;
-		else {
-			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
-			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
-			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
-				sizeof(struct qib_rwqe);
-			if (gfp != GFP_NOIO)
-				qp->r_rq.wq = vmalloc_user(
-						sizeof(struct qib_rwq) +
-						qp->r_rq.size * sz);
-			else
-				qp->r_rq.wq = __vmalloc(
-						sizeof(struct qib_rwq) +
-						qp->r_rq.size * sz,
-						gfp, PAGE_KERNEL);
-
-			if (!qp->r_rq.wq) {
-				ret = ERR_PTR(-ENOMEM);
-				goto bail_qp;
-			}
-		}
-
-		/*
-		 * ib_create_qp() will initialize qp->ibqp
-		 * except for qp->ibqp.qp_num.
-		 */
-		spin_lock_init(&qp->r_lock);
-		spin_lock_init(&qp->s_lock);
-		spin_lock_init(&qp->r_rq.lock);
-		atomic_set(&qp->refcount, 0);
-		init_waitqueue_head(&qp->wait);
-		init_waitqueue_head(&qp->wait_dma);
-		init_timer(&qp->s_timer);
-		qp->s_timer.data = (unsigned long)qp;
-		INIT_WORK(&qp->s_work, qib_do_send);
-		INIT_LIST_HEAD(&qp->iowait);
-		INIT_LIST_HEAD(&qp->rspwait);
-		qp->state = IB_QPS_RESET;
-		qp->s_wq = swq;
-		qp->s_size = init_attr->cap.max_send_wr + 1;
-		qp->s_max_sge = init_attr->cap.max_send_sge;
-		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
-			qp->s_flags = QIB_S_SIGNAL_REQ_WR;
-		dev = to_idev(ibpd->device);
-		dd = dd_from_dev(dev);
-		err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type,
-				init_attr->port_num, gfp);
-		if (err < 0) {
-			ret = ERR_PTR(err);
-			vfree(qp->r_rq.wq);
-			goto bail_qp;
-		}
-		qp->ibqp.qp_num = err;
-		qp->port_num = init_attr->port_num;
-		qib_reset_qp(qp, init_attr->qp_type);
-		break;
-
-	default:
-		/* Don't support raw QPs */
-		ret = ERR_PTR(-ENOSYS);
-		goto bail;
-	}
-
-	init_attr->cap.max_inline_data = 0;
-
-	/*
-	 * Return the address of the RWQ as the offset to mmap.
-	 * See qib_mmap() for details.
-	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		if (!qp->r_rq.wq) {
-			__u64 offset = 0;
-
-			err = ib_copy_to_udata(udata, &offset,
-					       sizeof(offset));
-			if (err) {
-				ret = ERR_PTR(err);
-				goto bail_ip;
-			}
-		} else {
-			u32 s = sizeof(struct qib_rwq) + qp->r_rq.size * sz;
-
-			qp->ip = qib_create_mmap_info(dev, s,
-						      ibpd->uobject->context,
-						      qp->r_rq.wq);
-			if (!qp->ip) {
-				ret = ERR_PTR(-ENOMEM);
-				goto bail_ip;
-			}
-
-			err = ib_copy_to_udata(udata, &(qp->ip->offset),
-					       sizeof(qp->ip->offset));
-			if (err) {
-				ret = ERR_PTR(err);
-				goto bail_ip;
-			}
-		}
-	}
-
-	spin_lock(&dev->n_qps_lock);
-	if (dev->n_qps_allocated == ib_qib_max_qps) {
-		spin_unlock(&dev->n_qps_lock);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_ip;
-	}
-
-	dev->n_qps_allocated++;
-	spin_unlock(&dev->n_qps_lock);
-
-	if (qp->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	ret = &qp->ibqp;
-	goto bail;
-
-bail_ip:
-	if (qp->ip)
-		kref_put(&qp->ip->ref, qib_release_mmap_info);
-	else
-		vfree(qp->r_rq.wq);
-	free_qpn(&dev->qpn_table, qp->ibqp.qp_num);
-bail_qp:
-	kfree(qp->s_hdr);
-	kfree(qp);
-bail_swq:
-	vfree(swq);
-bail:
-	return ret;
+	return priv;
 }
 
-/**
- * qib_destroy_qp - destroy a queue pair
- * @ibqp: the queue pair to destroy
- *
- * Returns 0 on success.
- *
- * Note that this can be called while the QP is actively sending or
- * receiving!
- */
-int qib_destroy_qp(struct ib_qp *ibqp)
+void qib_qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
 {
-	struct qib_qp *qp = to_iqp(ibqp);
-	struct qib_ibdev *dev = to_idev(ibqp->device);
+	struct qib_qp_priv *priv = qp->priv;
 
-	/* Make sure HW and driver activity is stopped. */
-	spin_lock_irq(&qp->s_lock);
-	if (qp->state != IB_QPS_RESET) {
-		qp->state = IB_QPS_RESET;
-		spin_lock(&dev->pending_lock);
-		if (!list_empty(&qp->iowait))
-			list_del_init(&qp->iowait);
-		spin_unlock(&dev->pending_lock);
-		qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT);
-		spin_unlock_irq(&qp->s_lock);
-		cancel_work_sync(&qp->s_work);
-		del_timer_sync(&qp->s_timer);
-		wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
-		if (qp->s_tx) {
-			qib_put_txreq(qp->s_tx);
-			qp->s_tx = NULL;
-		}
-		remove_qp(dev, qp);
-		wait_event(qp->wait, !atomic_read(&qp->refcount));
-		clear_mr_refs(qp, 1);
-	} else
-		spin_unlock_irq(&qp->s_lock);
-
-	/* all user's cleaned up, mark it available */
-	free_qpn(&dev->qpn_table, qp->ibqp.qp_num);
-	spin_lock(&dev->n_qps_lock);
-	dev->n_qps_allocated--;
-	spin_unlock(&dev->n_qps_lock);
-
-	if (qp->ip)
-		kref_put(&qp->ip->ref, qib_release_mmap_info);
-	else
-		vfree(qp->r_rq.wq);
-	vfree(qp->s_wq);
-	kfree(qp->s_hdr);
-	kfree(qp);
-	return 0;
+	kfree(priv->s_hdr);
+	kfree(priv);
 }
 
-/**
- * qib_init_qpn_table - initialize the QP number table for a device
- * @qpt: the QPN table
- */
-void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt)
+void qib_stop_send_queue(struct rvt_qp *qp)
 {
-	spin_lock_init(&qpt->lock);
-	qpt->last = 1;          /* start with QPN 2 */
-	qpt->nmaps = 1;
-	qpt->mask = dd->qpn_mask;
+	struct qib_qp_priv *priv = qp->priv;
+
+	cancel_work_sync(&priv->s_work);
+	del_timer_sync(&qp->s_timer);
 }
 
-/**
- * qib_free_qpn_table - free the QP number table for a device
- * @qpt: the QPN table
- */
-void qib_free_qpn_table(struct qib_qpn_table *qpt)
+void qib_quiesce_qp(struct rvt_qp *qp)
 {
-	int i;
+	struct qib_qp_priv *priv = qp->priv;
 
-	for (i = 0; i < ARRAY_SIZE(qpt->map); i++)
-		if (qpt->map[i].page)
-			free_page((unsigned long) qpt->map[i].page);
+	wait_event(priv->wait_dma, !atomic_read(&priv->s_dma_busy));
+	if (priv->s_tx) {
+		qib_put_txreq(priv->s_tx);
+		priv->s_tx = NULL;
+	}
+}
+
+void qib_flush_qp_waiters(struct rvt_qp *qp)
+{
+	struct qib_qp_priv *priv = qp->priv;
+	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
+
+	spin_lock(&dev->rdi.pending_lock);
+	if (!list_empty(&priv->iowait))
+		list_del_init(&priv->iowait);
+	spin_unlock(&dev->rdi.pending_lock);
 }
 
 /**
@@ -1280,7 +444,7 @@
  *
  * The QP s_lock should be held.
  */
-void qib_get_credit(struct qib_qp *qp, u32 aeth)
+void qib_get_credit(struct rvt_qp *qp, u32 aeth)
 {
 	u32 credit = (aeth >> QIB_AETH_CREDIT_SHIFT) & QIB_AETH_CREDIT_MASK;
 
@@ -1290,31 +454,70 @@
 	 * honor the credit field.
 	 */
 	if (credit == QIB_AETH_CREDIT_INVAL) {
-		if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) {
-			qp->s_flags |= QIB_S_UNLIMITED_CREDIT;
-			if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) {
-				qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT;
+		if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
+			qp->s_flags |= RVT_S_UNLIMITED_CREDIT;
+			if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
+				qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
 				qib_schedule_send(qp);
 			}
 		}
-	} else if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) {
+	} else if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
 		/* Compute new LSN (i.e., MSN + credit) */
 		credit = (aeth + credit_table[credit]) & QIB_MSN_MASK;
 		if (qib_cmp24(credit, qp->s_lsn) > 0) {
 			qp->s_lsn = credit;
-			if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) {
-				qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT;
+			if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
+				qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
 				qib_schedule_send(qp);
 			}
 		}
 	}
 }
 
+/**
+ * qib_check_send_wqe - validate wr/wqe
+ * @qp - The qp
+ * @wqe - The built wqe
+ *
+ * validate wr/wqe.  This is called
+ * prior to inserting the wqe into
+ * the ring but after the wqe has been
+ * setup.
+ *
+ * Returns 1 to force direct progress, 0 otherwise, -EINVAL on failure
+ */
+int qib_check_send_wqe(struct rvt_qp *qp,
+		       struct rvt_swqe *wqe)
+{
+	struct rvt_ah *ah;
+	int ret = 0;
+
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_RC:
+	case IB_QPT_UC:
+		if (wqe->length > 0x80000000U)
+			return -EINVAL;
+		break;
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+	case IB_QPT_UD:
+		ah = ibah_to_rvtah(wqe->ud_wr.ah);
+		if (wqe->length > (1 << ah->log_pmtu))
+			return -EINVAL;
+		/* progress hint */
+		ret = 1;
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
 #ifdef CONFIG_DEBUG_FS
 
 struct qib_qp_iter {
 	struct qib_ibdev *dev;
-	struct qib_qp *qp;
+	struct rvt_qp *qp;
 	int n;
 };
 
@@ -1340,14 +543,14 @@
 	struct qib_ibdev *dev = iter->dev;
 	int n = iter->n;
 	int ret = 1;
-	struct qib_qp *pqp = iter->qp;
-	struct qib_qp *qp;
+	struct rvt_qp *pqp = iter->qp;
+	struct rvt_qp *qp;
 
-	for (; n < dev->qp_table_size; n++) {
+	for (; n < dev->rdi.qp_dev->qp_table_size; n++) {
 		if (pqp)
 			qp = rcu_dereference(pqp->next);
 		else
-			qp = rcu_dereference(dev->qp_table[n]);
+			qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]);
 		pqp = qp;
 		if (qp) {
 			iter->qp = qp;
@@ -1364,10 +567,11 @@
 
 void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter)
 {
-	struct qib_swqe *wqe;
-	struct qib_qp *qp = iter->qp;
+	struct rvt_swqe *wqe;
+	struct rvt_qp *qp = iter->qp;
+	struct qib_qp_priv *priv = qp->priv;
 
-	wqe = get_swqe_ptr(qp, qp->s_last);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 	seq_printf(s,
 		   "N %d QP%u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x\n",
 		   iter->n,
@@ -1377,8 +581,8 @@
 		   wqe->wr.opcode,
 		   qp->s_hdrwords,
 		   qp->s_flags,
-		   atomic_read(&qp->s_dma_busy),
-		   !list_empty(&qp->iowait),
+		   atomic_read(&priv->s_dma_busy),
+		   !list_empty(&priv->iowait),
 		   qp->timeout,
 		   wqe->ssn,
 		   qp->s_lsn,

diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index e6b7556..9088e26 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c

@@ -40,7 +40,7 @@
 
 static void rc_timeout(unsigned long arg);
 
-static u32 restart_sge(struct qib_sge_state *ss, struct qib_swqe *wqe,
+static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
 		       u32 psn, u32 pmtu)
 {
 	u32 len;
@@ -54,9 +54,9 @@
 	return wqe->length - len;
 }
 
-static void start_timer(struct qib_qp *qp)
+static void start_timer(struct rvt_qp *qp)
 {
-	qp->s_flags |= QIB_S_TIMER;
+	qp->s_flags |= RVT_S_TIMER;
 	qp->s_timer.function = rc_timeout;
 	/* 4.096 usec. * (1 << qp->timeout) */
 	qp->s_timer.expires = jiffies + qp->timeout_jiffies;
@@ -74,17 +74,17 @@
  * Note that we are in the responder's side of the QP context.
  * Note the QP s_lock must be held.
  */
-static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
+static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
 			   struct qib_other_headers *ohdr, u32 pmtu)
 {
-	struct qib_ack_entry *e;
+	struct rvt_ack_entry *e;
 	u32 hwords;
 	u32 len;
 	u32 bth0;
 	u32 bth2;
 
 	/* Don't send an ACK if we aren't supposed to. */
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 		goto bail;
 
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -95,7 +95,7 @@
 	case OP(RDMA_READ_RESPONSE_ONLY):
 		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
 		if (e->rdma_sge.mr) {
-			qib_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
 		/* FALLTHROUGH */
@@ -112,7 +112,7 @@
 	case OP(ACKNOWLEDGE):
 		/* Check for no next entry in the queue. */
 		if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
-			if (qp->s_flags & QIB_S_ACK_PENDING)
+			if (qp->s_flags & RVT_S_ACK_PENDING)
 				goto normal;
 			goto bail;
 		}
@@ -133,7 +133,7 @@
 			/* Copy SGE state in case we need to resend */
 			qp->s_rdma_mr = e->rdma_sge.mr;
 			if (qp->s_rdma_mr)
-				qib_get_mr(qp->s_rdma_mr);
+				rvt_get_mr(qp->s_rdma_mr);
 			qp->s_ack_rdma_sge.sge = e->rdma_sge;
 			qp->s_ack_rdma_sge.num_sge = 1;
 			qp->s_cur_sge = &qp->s_ack_rdma_sge;
@@ -172,7 +172,7 @@
 		qp->s_cur_sge = &qp->s_ack_rdma_sge;
 		qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
 		if (qp->s_rdma_mr)
-			qib_get_mr(qp->s_rdma_mr);
+			rvt_get_mr(qp->s_rdma_mr);
 		len = qp->s_ack_rdma_sge.sge.sge_length;
 		if (len > pmtu)
 			len = pmtu;
@@ -196,7 +196,7 @@
 		 * (see above).
 		 */
 		qp->s_ack_state = OP(SEND_ONLY);
-		qp->s_flags &= ~QIB_S_ACK_PENDING;
+		qp->s_flags &= ~RVT_S_ACK_PENDING;
 		qp->s_cur_sge = NULL;
 		if (qp->s_nak_state)
 			ohdr->u.aeth =
@@ -218,7 +218,7 @@
 
 bail:
 	qp->s_ack_state = OP(ACKNOWLEDGE);
-	qp->s_flags &= ~(QIB_S_RESP_PENDING | QIB_S_ACK_PENDING);
+	qp->s_flags &= ~(RVT_S_RESP_PENDING | RVT_S_ACK_PENDING);
 	return 0;
 }
 
@@ -226,63 +226,60 @@
  * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
  * @qp: a pointer to the QP
  *
+ * Assumes the s_lock is held.
+ *
  * Return 1 if constructed; otherwise, return 0.
  */
-int qib_make_rc_req(struct qib_qp *qp)
+int qib_make_rc_req(struct rvt_qp *qp)
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 	struct qib_other_headers *ohdr;
-	struct qib_sge_state *ss;
-	struct qib_swqe *wqe;
+	struct rvt_sge_state *ss;
+	struct rvt_swqe *wqe;
 	u32 hwords;
 	u32 len;
 	u32 bth0;
 	u32 bth2;
 	u32 pmtu = qp->pmtu;
 	char newreq;
-	unsigned long flags;
 	int ret = 0;
 	int delta;
 
-	ohdr = &qp->s_hdr->u.oth;
+	ohdr = &priv->s_hdr->u.oth;
 	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-		ohdr = &qp->s_hdr->u.l.oth;
-
-	/*
-	 * The lock is needed to synchronize between the sending tasklet,
-	 * the receive interrupt handler, and timeout resends.
-	 */
-	spin_lock_irqsave(&qp->s_lock, flags);
+		ohdr = &priv->s_hdr->u.l.oth;
 
 	/* Sending responses has higher priority over sending requests. */
-	if ((qp->s_flags & QIB_S_RESP_PENDING) &&
+	if ((qp->s_flags & RVT_S_RESP_PENDING) &&
 	    qib_make_rc_ack(dev, qp, ohdr, pmtu))
 		goto done;
 
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) {
-		if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
+		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 			goto bail;
 		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send() */
+		if (qp->s_last == ACCESS_ONCE(qp->s_head))
 			goto bail;
 		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_dma_busy)) {
-			qp->s_flags |= QIB_S_WAIT_DMA;
+		if (atomic_read(&priv->s_dma_busy)) {
+			qp->s_flags |= RVT_S_WAIT_DMA;
 			goto bail;
 		}
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
 			IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
 		/* will get called again */
 		goto done;
 	}
 
-	if (qp->s_flags & (QIB_S_WAIT_RNR | QIB_S_WAIT_ACK))
+	if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK))
 		goto bail;
 
 	if (qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) {
 		if (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
-			qp->s_flags |= QIB_S_WAIT_PSN;
+			qp->s_flags |= RVT_S_WAIT_PSN;
 			goto bail;
 		}
 		qp->s_sending_psn = qp->s_psn;
@@ -294,10 +291,10 @@
 	bth0 = 0;
 
 	/* Send a request. */
-	wqe = get_swqe_ptr(qp, qp->s_cur);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 	switch (qp->s_state) {
 	default:
-		if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK))
+		if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
 			goto bail;
 		/*
 		 * Resend an old request or start a new one.
@@ -317,11 +314,11 @@
 			 */
 			if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
 			    qp->s_num_rd_atomic) {
-				qp->s_flags |= QIB_S_WAIT_FENCE;
+				qp->s_flags |= RVT_S_WAIT_FENCE;
 				goto bail;
 			}
-			wqe->psn = qp->s_next_psn;
 			newreq = 1;
+			qp->s_psn = wqe->psn;
 		}
 		/*
 		 * Note that we have to be careful not to modify the
@@ -335,14 +332,12 @@
 		case IB_WR_SEND:
 		case IB_WR_SEND_WITH_IMM:
 			/* If no credit, return. */
-			if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) &&
+			if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
 			    qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
-				qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
+				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
 				goto bail;
 			}
-			wqe->lpsn = wqe->psn;
 			if (len > pmtu) {
-				wqe->lpsn += (len - 1) / pmtu;
 				qp->s_state = OP(SEND_FIRST);
 				len = pmtu;
 				break;
@@ -363,14 +358,14 @@
 			break;
 
 		case IB_WR_RDMA_WRITE:
-			if (newreq && !(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+			if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 				qp->s_lsn++;
 			/* FALLTHROUGH */
 		case IB_WR_RDMA_WRITE_WITH_IMM:
 			/* If no credit, return. */
-			if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) &&
+			if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
 			    qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
-				qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
+				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
 				goto bail;
 			}
 
@@ -380,9 +375,7 @@
 				cpu_to_be32(wqe->rdma_wr.rkey);
 			ohdr->u.rc.reth.length = cpu_to_be32(len);
 			hwords += sizeof(struct ib_reth) / sizeof(u32);
-			wqe->lpsn = wqe->psn;
 			if (len > pmtu) {
-				wqe->lpsn += (len - 1) / pmtu;
 				qp->s_state = OP(RDMA_WRITE_FIRST);
 				len = pmtu;
 				break;
@@ -411,19 +404,12 @@
 			if (newreq) {
 				if (qp->s_num_rd_atomic >=
 				    qp->s_max_rd_atomic) {
-					qp->s_flags |= QIB_S_WAIT_RDMAR;
+					qp->s_flags |= RVT_S_WAIT_RDMAR;
 					goto bail;
 				}
 				qp->s_num_rd_atomic++;
-				if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 					qp->s_lsn++;
-				/*
-				 * Adjust s_next_psn to count the
-				 * expected number of responses.
-				 */
-				if (len > pmtu)
-					qp->s_next_psn += (len - 1) / pmtu;
-				wqe->lpsn = qp->s_next_psn++;
 			}
 
 			ohdr->u.rc.reth.vaddr =
@@ -449,13 +435,12 @@
 			if (newreq) {
 				if (qp->s_num_rd_atomic >=
 				    qp->s_max_rd_atomic) {
-					qp->s_flags |= QIB_S_WAIT_RDMAR;
+					qp->s_flags |= RVT_S_WAIT_RDMAR;
 					goto bail;
 				}
 				qp->s_num_rd_atomic++;
-				if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 					qp->s_lsn++;
-				wqe->lpsn = wqe->psn;
 			}
 			if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
 				qp->s_state = OP(COMPARE_SWAP);
@@ -498,11 +483,8 @@
 		}
 		if (wqe->wr.opcode == IB_WR_RDMA_READ)
 			qp->s_psn = wqe->lpsn + 1;
-		else {
+		else
 			qp->s_psn++;
-			if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-				qp->s_next_psn = qp->s_psn;
-		}
 		break;
 
 	case OP(RDMA_READ_RESPONSE_FIRST):
@@ -522,8 +504,6 @@
 		/* FALLTHROUGH */
 	case OP(SEND_MIDDLE):
 		bth2 = qp->s_psn++ & QIB_PSN_MASK;
-		if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-			qp->s_next_psn = qp->s_psn;
 		ss = &qp->s_sge;
 		len = qp->s_len;
 		if (len > pmtu) {
@@ -563,8 +543,6 @@
 		/* FALLTHROUGH */
 	case OP(RDMA_WRITE_MIDDLE):
 		bth2 = qp->s_psn++ & QIB_PSN_MASK;
-		if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-			qp->s_next_psn = qp->s_psn;
 		ss = &qp->s_sge;
 		len = qp->s_len;
 		if (len > pmtu) {
@@ -618,9 +596,9 @@
 	delta = (((int) bth2 - (int) wqe->psn) << 8) >> 8;
 	if (delta && delta % QIB_PSN_CREDIT == 0)
 		bth2 |= IB_BTH_REQ_ACK;
-	if (qp->s_flags & QIB_S_SEND_ONE) {
-		qp->s_flags &= ~QIB_S_SEND_ONE;
-		qp->s_flags |= QIB_S_WAIT_ACK;
+	if (qp->s_flags & RVT_S_SEND_ONE) {
+		qp->s_flags &= ~RVT_S_SEND_ONE;
+		qp->s_flags |= RVT_S_WAIT_ACK;
 		bth2 |= IB_BTH_REQ_ACK;
 	}
 	qp->s_len -= len;
@@ -629,13 +607,9 @@
 	qp->s_cur_size = len;
 	qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2);
 done:
-	ret = 1;
-	goto unlock;
-
+	return 1;
 bail:
-	qp->s_flags &= ~QIB_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
+	qp->s_flags &= ~RVT_S_BUSY;
 	return ret;
 }
 
@@ -647,7 +621,7 @@
  * Note that RDMA reads and atomics are handled in the
  * send side QP state and tasklet.
  */
-void qib_send_rc_ack(struct qib_qp *qp)
+void qib_send_rc_ack(struct rvt_qp *qp)
 {
 	struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
@@ -665,11 +639,11 @@
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 		goto unlock;
 
 	/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
-	if ((qp->s_flags & QIB_S_RESP_PENDING) || qp->s_rdma_ack_cnt)
+	if ((qp->s_flags & RVT_S_RESP_PENDING) || qp->s_rdma_ack_cnt)
 		goto queue_ack;
 
 	/* Construct the header with s_lock held so APM doesn't change it. */
@@ -758,9 +732,9 @@
 	goto done;
 
 queue_ack:
-	if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
-		ibp->n_rc_qacks++;
-		qp->s_flags |= QIB_S_ACK_PENDING | QIB_S_RESP_PENDING;
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
+		this_cpu_inc(*ibp->rvp.rc_qacks);
+		qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
 		qp->s_nak_state = qp->r_nak_state;
 		qp->s_ack_psn = qp->r_ack_psn;
 
@@ -782,10 +756,10 @@
  * for the given QP.
  * Called at interrupt level with the QP s_lock held.
  */
-static void reset_psn(struct qib_qp *qp, u32 psn)
+static void reset_psn(struct rvt_qp *qp, u32 psn)
 {
 	u32 n = qp->s_acked;
-	struct qib_swqe *wqe = get_swqe_ptr(qp, n);
+	struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
 	u32 opcode;
 
 	qp->s_cur = n;
@@ -808,7 +782,7 @@
 			n = 0;
 		if (n == qp->s_tail)
 			break;
-		wqe = get_swqe_ptr(qp, n);
+		wqe = rvt_get_swqe_ptr(qp, n);
 		diff = qib_cmp24(psn, wqe->psn);
 		if (diff < 0)
 			break;
@@ -854,22 +828,22 @@
 done:
 	qp->s_psn = psn;
 	/*
-	 * Set QIB_S_WAIT_PSN as qib_rc_complete() may start the timer
+	 * Set RVT_S_WAIT_PSN as qib_rc_complete() may start the timer
 	 * asynchronously before the send tasklet can get scheduled.
 	 * Doing it in qib_make_rc_req() is too late.
 	 */
 	if ((qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
 	    (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
-		qp->s_flags |= QIB_S_WAIT_PSN;
+		qp->s_flags |= RVT_S_WAIT_PSN;
 }
 
 /*
  * Back up requester to resend the last un-ACKed request.
  * The QP r_lock and s_lock should be held and interrupts disabled.
  */
-static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait)
+static void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
 {
-	struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_acked);
+	struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	struct qib_ibport *ibp;
 
 	if (qp->s_retry == 0) {
@@ -878,7 +852,7 @@
 			qp->s_retry = qp->s_retry_cnt;
 		} else if (qp->s_last == qp->s_acked) {
 			qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
-			qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+			rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 			return;
 		} else /* XXX need to handle delayed completion */
 			return;
@@ -887,15 +861,15 @@
 
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 	if (wqe->wr.opcode == IB_WR_RDMA_READ)
-		ibp->n_rc_resends++;
+		ibp->rvp.n_rc_resends++;
 	else
-		ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
+		ibp->rvp.n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
 
-	qp->s_flags &= ~(QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR |
-			 QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_PSN |
-			 QIB_S_WAIT_ACK);
+	qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR |
+			 RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN |
+			 RVT_S_WAIT_ACK);
 	if (wait)
-		qp->s_flags |= QIB_S_SEND_ONE;
+		qp->s_flags |= RVT_S_SEND_ONE;
 	reset_psn(qp, psn);
 }
 
@@ -904,16 +878,16 @@
  */
 static void rc_timeout(unsigned long arg)
 {
-	struct qib_qp *qp = (struct qib_qp *)arg;
+	struct rvt_qp *qp = (struct rvt_qp *)arg;
 	struct qib_ibport *ibp;
 	unsigned long flags;
 
 	spin_lock_irqsave(&qp->r_lock, flags);
 	spin_lock(&qp->s_lock);
-	if (qp->s_flags & QIB_S_TIMER) {
+	if (qp->s_flags & RVT_S_TIMER) {
 		ibp = to_iport(qp->ibqp.device, qp->port_num);
-		ibp->n_rc_timeouts++;
-		qp->s_flags &= ~QIB_S_TIMER;
+		ibp->rvp.n_rc_timeouts++;
+		qp->s_flags &= ~RVT_S_TIMER;
 		del_timer(&qp->s_timer);
 		qib_restart_rc(qp, qp->s_last_psn + 1, 1);
 		qib_schedule_send(qp);
@@ -927,12 +901,12 @@
  */
 void qib_rc_rnr_retry(unsigned long arg)
 {
-	struct qib_qp *qp = (struct qib_qp *)arg;
+	struct rvt_qp *qp = (struct rvt_qp *)arg;
 	unsigned long flags;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (qp->s_flags & QIB_S_WAIT_RNR) {
-		qp->s_flags &= ~QIB_S_WAIT_RNR;
+	if (qp->s_flags & RVT_S_WAIT_RNR) {
+		qp->s_flags &= ~RVT_S_WAIT_RNR;
 		del_timer(&qp->s_timer);
 		qib_schedule_send(qp);
 	}
@@ -943,14 +917,14 @@
  * Set qp->s_sending_psn to the next PSN after the given one.
  * This would be psn+1 except when RDMA reads are present.
  */
-static void reset_sending_psn(struct qib_qp *qp, u32 psn)
+static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
 {
-	struct qib_swqe *wqe;
+	struct rvt_swqe *wqe;
 	u32 n = qp->s_last;
 
 	/* Find the work request corresponding to the given PSN. */
 	for (;;) {
-		wqe = get_swqe_ptr(qp, n);
+		wqe = rvt_get_swqe_ptr(qp, n);
 		if (qib_cmp24(psn, wqe->lpsn) <= 0) {
 			if (wqe->wr.opcode == IB_WR_RDMA_READ)
 				qp->s_sending_psn = wqe->lpsn + 1;
@@ -968,16 +942,16 @@
 /*
  * This should be called with the QP s_lock held and interrupts disabled.
  */
-void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr)
+void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr)
 {
 	struct qib_other_headers *ohdr;
-	struct qib_swqe *wqe;
+	struct rvt_swqe *wqe;
 	struct ib_wc wc;
 	unsigned i;
 	u32 opcode;
 	u32 psn;
 
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		return;
 
 	/* Find out where the BTH is */
@@ -1002,22 +976,30 @@
 	 * there are still requests that haven't been acked.
 	 */
 	if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
-	    !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN)) &&
-	    (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+	    !(qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
+	    (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 		start_timer(qp);
 
 	while (qp->s_last != qp->s_acked) {
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		u32 s_last;
+
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 &&
 		    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
 			break;
+		s_last = qp->s_last;
+		if (++s_last >= qp->s_size)
+			s_last = 0;
+		qp->s_last = s_last;
+		/* see post_send() */
+		barrier();
 		for (i = 0; i < wqe->wr.num_sge; i++) {
-			struct qib_sge *sge = &wqe->sg_list[i];
+			struct rvt_sge *sge = &wqe->sg_list[i];
 
-			qib_put_mr(sge->mr);
+			rvt_put_mr(sge->mr);
 		}
 		/* Post a send completion queue entry if requested. */
-		if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+		if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 			memset(&wc, 0, sizeof(wc));
 			wc.wr_id = wqe->wr.wr_id;
@@ -1025,25 +1007,23 @@
 			wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
 			wc.byte_len = wqe->length;
 			wc.qp = &qp->ibqp;
-			qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
 		}
-		if (++qp->s_last >= qp->s_size)
-			qp->s_last = 0;
 	}
 	/*
 	 * If we were waiting for sends to complete before resending,
 	 * and they are now complete, restart sending.
 	 */
-	if (qp->s_flags & QIB_S_WAIT_PSN &&
+	if (qp->s_flags & RVT_S_WAIT_PSN &&
 	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
-		qp->s_flags &= ~QIB_S_WAIT_PSN;
+		qp->s_flags &= ~RVT_S_WAIT_PSN;
 		qp->s_sending_psn = qp->s_psn;
 		qp->s_sending_hpsn = qp->s_psn - 1;
 		qib_schedule_send(qp);
 	}
 }
 
-static inline void update_last_psn(struct qib_qp *qp, u32 psn)
+static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
 {
 	qp->s_last_psn = psn;
 }
@@ -1053,8 +1033,8 @@
  * This is similar to qib_send_complete but has to check to be sure
  * that the SGEs are not being referenced if the SWQE is being resent.
  */
-static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
-					 struct qib_swqe *wqe,
+static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
+					 struct rvt_swqe *wqe,
 					 struct qib_ibport *ibp)
 {
 	struct ib_wc wc;
@@ -1067,13 +1047,21 @@
 	 */
 	if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 ||
 	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
-		for (i = 0; i < wqe->wr.num_sge; i++) {
-			struct qib_sge *sge = &wqe->sg_list[i];
+		u32 s_last;
 
-			qib_put_mr(sge->mr);
+		for (i = 0; i < wqe->wr.num_sge; i++) {
+			struct rvt_sge *sge = &wqe->sg_list[i];
+
+			rvt_put_mr(sge->mr);
 		}
+		s_last = qp->s_last;
+		if (++s_last >= qp->s_size)
+			s_last = 0;
+		qp->s_last = s_last;
+		/* see post_send() */
+		barrier();
 		/* Post a send completion queue entry if requested. */
-		if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+		if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 			memset(&wc, 0, sizeof(wc));
 			wc.wr_id = wqe->wr.wr_id;
@@ -1081,12 +1069,10 @@
 			wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
 			wc.byte_len = wqe->length;
 			wc.qp = &qp->ibqp;
-			qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
 		}
-		if (++qp->s_last >= qp->s_size)
-			qp->s_last = 0;
 	} else
-		ibp->n_rc_delayed_comp++;
+		this_cpu_inc(*ibp->rvp.rc_delayed_comp);
 
 	qp->s_retry = qp->s_retry_cnt;
 	update_last_psn(qp, wqe->lpsn);
@@ -1100,7 +1086,7 @@
 		if (++qp->s_cur >= qp->s_size)
 			qp->s_cur = 0;
 		qp->s_acked = qp->s_cur;
-		wqe = get_swqe_ptr(qp, qp->s_cur);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 		if (qp->s_acked != qp->s_tail) {
 			qp->s_state = OP(SEND_LAST);
 			qp->s_psn = wqe->psn;
@@ -1110,7 +1096,7 @@
 			qp->s_acked = 0;
 		if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
 			qp->s_draining = 0;
-		wqe = get_swqe_ptr(qp, qp->s_acked);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	}
 	return wqe;
 }
@@ -1126,19 +1112,19 @@
  * Called at interrupt level with the QP s_lock held.
  * Returns 1 if OK, 0 if current operation should be aborted (NAK).
  */
-static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
+static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
 		     u64 val, struct qib_ctxtdata *rcd)
 {
 	struct qib_ibport *ibp;
 	enum ib_wc_status status;
-	struct qib_swqe *wqe;
+	struct rvt_swqe *wqe;
 	int ret = 0;
 	u32 ack_psn;
 	int diff;
 
 	/* Remove QP from retry timer */
-	if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
-		qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
+	if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
+		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
 		del_timer(&qp->s_timer);
 	}
 
@@ -1151,7 +1137,7 @@
 	ack_psn = psn;
 	if (aeth >> 29)
 		ack_psn--;
-	wqe = get_swqe_ptr(qp, qp->s_acked);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 
 	/*
@@ -1186,11 +1172,11 @@
 		      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
 		     (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
 			/* Retry this request. */
-			if (!(qp->r_flags & QIB_R_RDMAR_SEQ)) {
-				qp->r_flags |= QIB_R_RDMAR_SEQ;
+			if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
+				qp->r_flags |= RVT_R_RDMAR_SEQ;
 				qib_restart_rc(qp, qp->s_last_psn + 1, 0);
 				if (list_empty(&qp->rspwait)) {
-					qp->r_flags |= QIB_R_RSP_SEND;
+					qp->r_flags |= RVT_R_RSP_SEND;
 					atomic_inc(&qp->refcount);
 					list_add_tail(&qp->rspwait,
 						      &rcd->qp_wait_list);
@@ -1213,14 +1199,14 @@
 		     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
 			qp->s_num_rd_atomic--;
 			/* Restart sending task if fence is complete */
-			if ((qp->s_flags & QIB_S_WAIT_FENCE) &&
+			if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
 			    !qp->s_num_rd_atomic) {
-				qp->s_flags &= ~(QIB_S_WAIT_FENCE |
-						 QIB_S_WAIT_ACK);
+				qp->s_flags &= ~(RVT_S_WAIT_FENCE |
+						 RVT_S_WAIT_ACK);
 				qib_schedule_send(qp);
-			} else if (qp->s_flags & QIB_S_WAIT_RDMAR) {
-				qp->s_flags &= ~(QIB_S_WAIT_RDMAR |
-						 QIB_S_WAIT_ACK);
+			} else if (qp->s_flags & RVT_S_WAIT_RDMAR) {
+				qp->s_flags &= ~(RVT_S_WAIT_RDMAR |
+						 RVT_S_WAIT_ACK);
 				qib_schedule_send(qp);
 			}
 		}
@@ -1231,7 +1217,7 @@
 
 	switch (aeth >> 29) {
 	case 0:         /* ACK */
-		ibp->n_rc_acks++;
+		this_cpu_inc(*ibp->rvp.rc_acks);
 		if (qp->s_acked != qp->s_tail) {
 			/*
 			 * We are expecting more ACKs so
@@ -1248,8 +1234,8 @@
 			qp->s_state = OP(SEND_LAST);
 			qp->s_psn = psn + 1;
 		}
-		if (qp->s_flags & QIB_S_WAIT_ACK) {
-			qp->s_flags &= ~QIB_S_WAIT_ACK;
+		if (qp->s_flags & RVT_S_WAIT_ACK) {
+			qp->s_flags &= ~RVT_S_WAIT_ACK;
 			qib_schedule_send(qp);
 		}
 		qib_get_credit(qp, aeth);
@@ -1260,10 +1246,10 @@
 		goto bail;
 
 	case 1:         /* RNR NAK */
-		ibp->n_rnr_naks++;
+		ibp->rvp.n_rnr_naks++;
 		if (qp->s_acked == qp->s_tail)
 			goto bail;
-		if (qp->s_flags & QIB_S_WAIT_RNR)
+		if (qp->s_flags & RVT_S_WAIT_RNR)
 			goto bail;
 		if (qp->s_rnr_retry == 0) {
 			status = IB_WC_RNR_RETRY_EXC_ERR;
@@ -1275,12 +1261,12 @@
 		/* The last valid PSN is the previous PSN. */
 		update_last_psn(qp, psn - 1);
 
-		ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
+		ibp->rvp.n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
 
 		reset_psn(qp, psn);
 
-		qp->s_flags &= ~(QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_ACK);
-		qp->s_flags |= QIB_S_WAIT_RNR;
+		qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
+		qp->s_flags |= RVT_S_WAIT_RNR;
 		qp->s_timer.function = qib_rc_rnr_retry;
 		qp->s_timer.expires = jiffies + usecs_to_jiffies(
 			ib_qib_rnr_table[(aeth >> QIB_AETH_CREDIT_SHIFT) &
@@ -1296,7 +1282,7 @@
 		switch ((aeth >> QIB_AETH_CREDIT_SHIFT) &
 			QIB_AETH_CREDIT_MASK) {
 		case 0: /* PSN sequence error */
-			ibp->n_seq_naks++;
+			ibp->rvp.n_seq_naks++;
 			/*
 			 * Back up to the responder's expected PSN.
 			 * Note that we might get a NAK in the middle of an
@@ -1309,21 +1295,21 @@
 
 		case 1: /* Invalid Request */
 			status = IB_WC_REM_INV_REQ_ERR;
-			ibp->n_other_naks++;
+			ibp->rvp.n_other_naks++;
 			goto class_b;
 
 		case 2: /* Remote Access Error */
 			status = IB_WC_REM_ACCESS_ERR;
-			ibp->n_other_naks++;
+			ibp->rvp.n_other_naks++;
 			goto class_b;
 
 		case 3: /* Remote Operation Error */
 			status = IB_WC_REM_OP_ERR;
-			ibp->n_other_naks++;
+			ibp->rvp.n_other_naks++;
 class_b:
 			if (qp->s_last == qp->s_acked) {
 				qib_send_complete(qp, wqe, status);
-				qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+				rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 			}
 			break;
 
@@ -1349,18 +1335,18 @@
  * We have seen an out of sequence RDMA read middle or last packet.
  * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
  */
-static void rdma_seq_err(struct qib_qp *qp, struct qib_ibport *ibp, u32 psn,
+static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn,
 			 struct qib_ctxtdata *rcd)
 {
-	struct qib_swqe *wqe;
+	struct rvt_swqe *wqe;
 
 	/* Remove QP from retry timer */
-	if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
-		qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
+	if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
+		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
 		del_timer(&qp->s_timer);
 	}
 
-	wqe = get_swqe_ptr(qp, qp->s_acked);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 
 	while (qib_cmp24(psn, wqe->lpsn) > 0) {
 		if (wqe->wr.opcode == IB_WR_RDMA_READ ||
@@ -1370,11 +1356,11 @@
 		wqe = do_rc_completion(qp, wqe, ibp);
 	}
 
-	ibp->n_rdma_seq++;
-	qp->r_flags |= QIB_R_RDMAR_SEQ;
+	ibp->rvp.n_rdma_seq++;
+	qp->r_flags |= RVT_R_RDMAR_SEQ;
 	qib_restart_rc(qp, qp->s_last_psn + 1, 0);
 	if (list_empty(&qp->rspwait)) {
-		qp->r_flags |= QIB_R_RSP_SEND;
+		qp->r_flags |= RVT_R_RSP_SEND;
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
@@ -1399,12 +1385,12 @@
 static void qib_rc_rcv_resp(struct qib_ibport *ibp,
 			    struct qib_other_headers *ohdr,
 			    void *data, u32 tlen,
-			    struct qib_qp *qp,
+			    struct rvt_qp *qp,
 			    u32 opcode,
 			    u32 psn, u32 hdrsize, u32 pmtu,
 			    struct qib_ctxtdata *rcd)
 {
-	struct qib_swqe *wqe;
+	struct rvt_swqe *wqe;
 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 	enum ib_wc_status status;
 	unsigned long flags;
@@ -1425,7 +1411,7 @@
 			 * If send tasklet not running attempt to progress
 			 * SDMA queue.
 			 */
-			if (!(qp->s_flags & QIB_S_BUSY)) {
+			if (!(qp->s_flags & RVT_S_BUSY)) {
 				/* Acquire SDMA Lock */
 				spin_lock_irqsave(&ppd->sdma_lock, flags);
 				/* Invoke sdma make progress */
@@ -1437,11 +1423,12 @@
 	}
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 		goto ack_done;
 
 	/* Ignore invalid responses. */
-	if (qib_cmp24(psn, qp->s_next_psn) >= 0)
+	smp_read_barrier_depends(); /* see post_one_send */
+	if (qib_cmp24(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
 		goto ack_done;
 
 	/* Ignore duplicate responses. */
@@ -1460,15 +1447,15 @@
 	 * Skip everything other than the PSN we expect, if we are waiting
 	 * for a reply to a restarted RDMA read or atomic op.
 	 */
-	if (qp->r_flags & QIB_R_RDMAR_SEQ) {
+	if (qp->r_flags & RVT_R_RDMAR_SEQ) {
 		if (qib_cmp24(psn, qp->s_last_psn + 1) != 0)
 			goto ack_done;
-		qp->r_flags &= ~QIB_R_RDMAR_SEQ;
+		qp->r_flags &= ~RVT_R_RDMAR_SEQ;
 	}
 
 	if (unlikely(qp->s_acked == qp->s_tail))
 		goto ack_done;
-	wqe = get_swqe_ptr(qp, qp->s_acked);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	status = IB_WC_SUCCESS;
 
 	switch (opcode) {
@@ -1487,7 +1474,7 @@
 		    opcode != OP(RDMA_READ_RESPONSE_FIRST))
 			goto ack_done;
 		hdrsize += 4;
-		wqe = get_swqe_ptr(qp, qp->s_acked);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
 			goto ack_op_err;
 		/*
@@ -1515,10 +1502,10 @@
 		 * We got a response so update the timeout.
 		 * 4.096 usec. * (1 << qp->timeout)
 		 */
-		qp->s_flags |= QIB_S_TIMER;
+		qp->s_flags |= RVT_S_TIMER;
 		mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
-		if (qp->s_flags & QIB_S_WAIT_ACK) {
-			qp->s_flags &= ~QIB_S_WAIT_ACK;
+		if (qp->s_flags & RVT_S_WAIT_ACK) {
+			qp->s_flags &= ~RVT_S_WAIT_ACK;
 			qib_schedule_send(qp);
 		}
 
@@ -1553,7 +1540,7 @@
 		 * have to be careful to copy the data to the right
 		 * location.
 		 */
-		wqe = get_swqe_ptr(qp, qp->s_acked);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
 						  wqe, psn, pmtu);
 		goto read_last;
@@ -1598,7 +1585,7 @@
 ack_err:
 	if (qp->s_last == qp->s_acked) {
 		qib_send_complete(qp, wqe, status);
-		qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+		rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 	}
 ack_done:
 	spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1623,14 +1610,14 @@
  */
 static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
 			    void *data,
-			    struct qib_qp *qp,
+			    struct rvt_qp *qp,
 			    u32 opcode,
 			    u32 psn,
 			    int diff,
 			    struct qib_ctxtdata *rcd)
 {
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-	struct qib_ack_entry *e;
+	struct rvt_ack_entry *e;
 	unsigned long flags;
 	u8 i, prev;
 	int old_req;
@@ -1642,7 +1629,7 @@
 		 * Don't queue the NAK if we already sent one.
 		 */
 		if (!qp->r_nak_state) {
-			ibp->n_rc_seqnak++;
+			ibp->rvp.n_rc_seqnak++;
 			qp->r_nak_state = IB_NAK_PSN_ERROR;
 			/* Use the expected PSN. */
 			qp->r_ack_psn = qp->r_psn;
@@ -1652,7 +1639,7 @@
 			 * Otherwise, we end up propagating congestion.
 			 */
 			if (list_empty(&qp->rspwait)) {
-				qp->r_flags |= QIB_R_RSP_NAK;
+				qp->r_flags |= RVT_R_RSP_NAK;
 				atomic_inc(&qp->refcount);
 				list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 			}
@@ -1678,7 +1665,7 @@
 	 */
 	e = NULL;
 	old_req = 1;
-	ibp->n_rc_dupreq++;
+	ibp->rvp.n_rc_dupreq++;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 
@@ -1732,7 +1719,7 @@
 		if (unlikely(offset + len != e->rdma_sge.sge_length))
 			goto unlock_done;
 		if (e->rdma_sge.mr) {
-			qib_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
 		if (len != 0) {
@@ -1740,7 +1727,7 @@
 			u64 vaddr = be64_to_cpu(reth->vaddr);
 			int ok;
 
-			ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
+			ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
 					 IB_ACCESS_REMOTE_READ);
 			if (unlikely(!ok))
 				goto unlock_done;
@@ -1791,7 +1778,7 @@
 		 * which doesn't accept a RDMA read response or atomic
 		 * response as an ACK for earlier SENDs or RDMA writes.
 		 */
-		if (!(qp->s_flags & QIB_S_RESP_PENDING)) {
+		if (!(qp->s_flags & RVT_S_RESP_PENDING)) {
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 			qp->r_nak_state = 0;
 			qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
@@ -1805,7 +1792,7 @@
 		break;
 	}
 	qp->s_ack_state = OP(ACKNOWLEDGE);
-	qp->s_flags |= QIB_S_RESP_PENDING;
+	qp->s_flags |= RVT_S_RESP_PENDING;
 	qp->r_nak_state = 0;
 	qib_schedule_send(qp);
 
@@ -1818,13 +1805,13 @@
 	return 0;
 }
 
-void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err)
+void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
 {
 	unsigned long flags;
 	int lastwqe;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	lastwqe = qib_error_qp(qp, err);
+	lastwqe = rvt_error_qp(qp, err);
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 
 	if (lastwqe) {
@@ -1837,7 +1824,7 @@
 	}
 }
 
-static inline void qib_update_ack_queue(struct qib_qp *qp, unsigned n)
+static inline void qib_update_ack_queue(struct rvt_qp *qp, unsigned n)
 {
 	unsigned next;
 
@@ -1862,7 +1849,7 @@
  * Called at interrupt level.
  */
 void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
-		int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+		int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
 {
 	struct qib_ibport *ibp = &rcd->ppd->ibport_data;
 	struct qib_other_headers *ohdr;
@@ -1948,8 +1935,8 @@
 		break;
 	}
 
-	if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) {
-		qp->r_flags |= QIB_R_COMM_EST;
+	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) {
+		qp->r_flags |= RVT_R_COMM_EST;
 		if (qp->ibqp.event_handler) {
 			struct ib_event ev;
 
@@ -2026,9 +2013,9 @@
 		if (unlikely(wc.byte_len > qp->r_len))
 			goto nack_inv;
 		qib_copy_sge(&qp->r_sge, data, tlen, 1);
-		qib_put_ss(&qp->r_sge);
+		rvt_put_ss(&qp->r_sge);
 		qp->r_msn++;
-		if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+		if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 			break;
 		wc.wr_id = qp->r_wr_id;
 		wc.status = IB_WC_SUCCESS;
@@ -2047,7 +2034,7 @@
 		wc.dlid_path_bits = 0;
 		wc.port_num = 0;
 		/* Signal completion event if the solicited bit is set. */
-		qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
 			     (ohdr->bth[0] &
 			      cpu_to_be32(IB_BTH_SOLICITED)) != 0);
 		break;
@@ -2069,7 +2056,7 @@
 			int ok;
 
 			/* Check rkey & NAK */
-			ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
+			ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
 					 rkey, IB_ACCESS_REMOTE_WRITE);
 			if (unlikely(!ok))
 				goto nack_acc;
@@ -2096,7 +2083,7 @@
 		goto send_last;
 
 	case OP(RDMA_READ_REQUEST): {
-		struct qib_ack_entry *e;
+		struct rvt_ack_entry *e;
 		u32 len;
 		u8 next;
 
@@ -2114,7 +2101,7 @@
 		}
 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
 		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
-			qib_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
 		reth = &ohdr->u.rc.reth;
@@ -2125,7 +2112,7 @@
 			int ok;
 
 			/* Check rkey & NAK */
-			ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr,
+			ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr,
 					 rkey, IB_ACCESS_REMOTE_READ);
 			if (unlikely(!ok))
 				goto nack_acc_unlck;
@@ -2157,7 +2144,7 @@
 		qp->r_head_ack_queue = next;
 
 		/* Schedule the send tasklet. */
-		qp->s_flags |= QIB_S_RESP_PENDING;
+		qp->s_flags |= RVT_S_RESP_PENDING;
 		qib_schedule_send(qp);
 
 		goto sunlock;
@@ -2166,7 +2153,7 @@
 	case OP(COMPARE_SWAP):
 	case OP(FETCH_ADD): {
 		struct ib_atomic_eth *ateth;
-		struct qib_ack_entry *e;
+		struct rvt_ack_entry *e;
 		u64 vaddr;
 		atomic64_t *maddr;
 		u64 sdata;
@@ -2186,7 +2173,7 @@
 		}
 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
 		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
-			qib_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
 		ateth = &ohdr->u.atomic_eth;
@@ -2196,7 +2183,7 @@
 			goto nack_inv_unlck;
 		rkey = be32_to_cpu(ateth->rkey);
 		/* Check rkey & NAK */
-		if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
 					  vaddr, rkey,
 					  IB_ACCESS_REMOTE_ATOMIC)))
 			goto nack_acc_unlck;
@@ -2208,7 +2195,7 @@
 			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
 				      be64_to_cpu(ateth->compare_data),
 				      sdata);
-		qib_put_mr(qp->r_sge.sge.mr);
+		rvt_put_mr(qp->r_sge.sge.mr);
 		qp->r_sge.num_sge = 0;
 		e->opcode = opcode;
 		e->sent = 0;
@@ -2221,7 +2208,7 @@
 		qp->r_head_ack_queue = next;
 
 		/* Schedule the send tasklet. */
-		qp->s_flags |= QIB_S_RESP_PENDING;
+		qp->s_flags |= RVT_S_RESP_PENDING;
 		qib_schedule_send(qp);
 
 		goto sunlock;
@@ -2245,7 +2232,7 @@
 	qp->r_ack_psn = qp->r_psn;
 	/* Queue RNR NAK for later */
 	if (list_empty(&qp->rspwait)) {
-		qp->r_flags |= QIB_R_RSP_NAK;
+		qp->r_flags |= RVT_R_RSP_NAK;
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
@@ -2257,7 +2244,7 @@
 	qp->r_ack_psn = qp->r_psn;
 	/* Queue NAK for later */
 	if (list_empty(&qp->rspwait)) {
-		qp->r_flags |= QIB_R_RSP_NAK;
+		qp->r_flags |= RVT_R_RSP_NAK;
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
@@ -2271,7 +2258,7 @@
 	qp->r_ack_psn = qp->r_psn;
 	/* Queue NAK for later */
 	if (list_empty(&qp->rspwait)) {
-		qp->r_flags |= QIB_R_RSP_NAK;
+		qp->r_flags |= RVT_R_RSP_NAK;
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}

diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index b1aa21b..a5f07a6 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c

@@ -79,16 +79,16 @@
  * Validate a RWQE and fill in the SGE state.
  * Return 1 if OK.
  */
-static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe)
+static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
 {
 	int i, j, ret;
 	struct ib_wc wc;
-	struct qib_lkey_table *rkt;
-	struct qib_pd *pd;
-	struct qib_sge_state *ss;
+	struct rvt_lkey_table *rkt;
+	struct rvt_pd *pd;
+	struct rvt_sge_state *ss;
 
-	rkt = &to_idev(qp->ibqp.device)->lk_table;
-	pd = to_ipd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
+	rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table;
+	pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
 	ss = &qp->r_sge;
 	ss->sg_list = qp->r_sg_list;
 	qp->r_len = 0;
@@ -96,7 +96,7 @@
 		if (wqe->sg_list[i].length == 0)
 			continue;
 		/* Check LKEY */
-		if (!qib_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
+		if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
 				 &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
 			goto bad_lkey;
 		qp->r_len += wqe->sg_list[i].length;
@@ -109,9 +109,9 @@
 
 bad_lkey:
 	while (j) {
-		struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
+		struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
 
-		qib_put_mr(sge->mr);
+		rvt_put_mr(sge->mr);
 	}
 	ss->num_sge = 0;
 	memset(&wc, 0, sizeof(wc));
@@ -120,7 +120,7 @@
 	wc.opcode = IB_WC_RECV;
 	wc.qp = &qp->ibqp;
 	/* Signal solicited completion event. */
-	qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
 	ret = 0;
 bail:
 	return ret;
@@ -136,19 +136,19 @@
  *
  * Can be called from interrupt level.
  */
-int qib_get_rwqe(struct qib_qp *qp, int wr_id_only)
+int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only)
 {
 	unsigned long flags;
-	struct qib_rq *rq;
-	struct qib_rwq *wq;
-	struct qib_srq *srq;
-	struct qib_rwqe *wqe;
+	struct rvt_rq *rq;
+	struct rvt_rwq *wq;
+	struct rvt_srq *srq;
+	struct rvt_rwqe *wqe;
 	void (*handler)(struct ib_event *, void *);
 	u32 tail;
 	int ret;
 
 	if (qp->ibqp.srq) {
-		srq = to_isrq(qp->ibqp.srq);
+		srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
 		handler = srq->ibsrq.event_handler;
 		rq = &srq->rq;
 	} else {
@@ -158,7 +158,7 @@
 	}
 
 	spin_lock_irqsave(&rq->lock, flags);
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
 		ret = 0;
 		goto unlock;
 	}
@@ -174,7 +174,7 @@
 	}
 	/* Make sure entry is read after head index is read. */
 	smp_rmb();
-	wqe = get_rwqe_ptr(rq, tail);
+	wqe = rvt_get_rwqe_ptr(rq, tail);
 	/*
 	 * Even though we update the tail index in memory, the verbs
 	 * consumer is not supposed to post more entries until a
@@ -190,7 +190,7 @@
 	qp->r_wr_id = wqe->wr_id;
 
 	ret = 1;
-	set_bit(QIB_R_WRID_VALID, &qp->r_aflags);
+	set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
 	if (handler) {
 		u32 n;
 
@@ -227,7 +227,7 @@
  * Switch to alternate path.
  * The QP s_lock should be held and interrupts disabled.
  */
-void qib_migrate_qp(struct qib_qp *qp)
+void qib_migrate_qp(struct rvt_qp *qp)
 {
 	struct ib_event ev;
 
@@ -266,7 +266,7 @@
  * The s_lock will be acquired around the qib_migrate_qp() call.
  */
 int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
-		      int has_grh, struct qib_qp *qp, u32 bth0)
+		      int has_grh, struct rvt_qp *qp, u32 bth0)
 {
 	__be64 guid;
 	unsigned long flags;
@@ -279,7 +279,8 @@
 			if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
 				goto err;
 			guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
-			if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+			if (!gid_ok(&hdr->u.l.grh.dgid,
+				    ibp->rvp.gid_prefix, guid))
 				goto err;
 			if (!gid_ok(&hdr->u.l.grh.sgid,
 			    qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
@@ -311,7 +312,8 @@
 				goto err;
 			guid = get_sguid(ibp,
 					 qp->remote_ah_attr.grh.sgid_index);
-			if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+			if (!gid_ok(&hdr->u.l.grh.dgid,
+				    ibp->rvp.gid_prefix, guid))
 				goto err;
 			if (!gid_ok(&hdr->u.l.grh.sgid,
 			    qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
@@ -353,12 +355,15 @@
  * receive interrupts since this is a connected protocol and all packets
  * will pass through here.
  */
-static void qib_ruc_loopback(struct qib_qp *sqp)
+static void qib_ruc_loopback(struct rvt_qp *sqp)
 {
 	struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
-	struct qib_qp *qp;
-	struct qib_swqe *wqe;
-	struct qib_sge *sge;
+	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	struct qib_devdata *dd = ppd->dd;
+	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+	struct rvt_qp *qp;
+	struct rvt_swqe *wqe;
+	struct rvt_sge *sge;
 	unsigned long flags;
 	struct ib_wc wc;
 	u64 sdata;
@@ -367,29 +372,33 @@
 	int release;
 	int ret;
 
+	rcu_read_lock();
 	/*
 	 * Note that we check the responder QP state after
 	 * checking the requester's state.
 	 */
-	qp = qib_lookup_qpn(ibp, sqp->remote_qpn);
+	qp = rvt_lookup_qpn(rdi, &ibp->rvp, sqp->remote_qpn);
+	if (!qp)
+		goto done;
 
 	spin_lock_irqsave(&sqp->s_lock, flags);
 
 	/* Return if we are already busy processing a work request. */
-	if ((sqp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT)) ||
-	    !(ib_qib_state_ops[sqp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+	if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
+	    !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		goto unlock;
 
-	sqp->s_flags |= QIB_S_BUSY;
+	sqp->s_flags |= RVT_S_BUSY;
 
 again:
-	if (sqp->s_last == sqp->s_head)
+	smp_read_barrier_depends(); /* see post_one_send() */
+	if (sqp->s_last == ACCESS_ONCE(sqp->s_head))
 		goto clr_busy;
-	wqe = get_swqe_ptr(sqp, sqp->s_last);
+	wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
 
 	/* Return if it is not OK to start a new work reqeust. */
-	if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_NEXT_SEND_OK)) {
-		if (!(ib_qib_state_ops[sqp->state] & QIB_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
+		if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
 			goto clr_busy;
 		/* We are in the error state, flush the work request. */
 		send_status = IB_WC_WR_FLUSH_ERR;
@@ -407,9 +416,9 @@
 	}
 	spin_unlock_irqrestore(&sqp->s_lock, flags);
 
-	if (!qp || !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) ||
+	if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
 	    qp->ibqp.qp_type != sqp->ibqp.qp_type) {
-		ibp->n_pkt_drops++;
+		ibp->rvp.n_pkt_drops++;
 		/*
 		 * For RC, the requester would timeout and retry so
 		 * shortcut the timeouts and just signal too many retries.
@@ -458,7 +467,7 @@
 			goto inv_err;
 		if (wqe->length == 0)
 			break;
-		if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
+		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
 					  wqe->rdma_wr.remote_addr,
 					  wqe->rdma_wr.rkey,
 					  IB_ACCESS_REMOTE_WRITE)))
@@ -471,7 +480,7 @@
 	case IB_WR_RDMA_READ:
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
 			goto inv_err;
-		if (unlikely(!qib_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
+		if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
 					  wqe->rdma_wr.remote_addr,
 					  wqe->rdma_wr.rkey,
 					  IB_ACCESS_REMOTE_READ)))
@@ -489,7 +498,7 @@
 	case IB_WR_ATOMIC_FETCH_AND_ADD:
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
 			goto inv_err;
-		if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
 					  wqe->atomic_wr.remote_addr,
 					  wqe->atomic_wr.rkey,
 					  IB_ACCESS_REMOTE_ATOMIC)))
@@ -502,7 +511,7 @@
 			(u64) atomic64_add_return(sdata, maddr) - sdata :
 			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
 				      sdata, wqe->atomic_wr.swap);
-		qib_put_mr(qp->r_sge.sge.mr);
+		rvt_put_mr(qp->r_sge.sge.mr);
 		qp->r_sge.num_sge = 0;
 		goto send_comp;
 
@@ -526,11 +535,11 @@
 		sge->sge_length -= len;
 		if (sge->sge_length == 0) {
 			if (!release)
-				qib_put_mr(sge->mr);
+				rvt_put_mr(sge->mr);
 			if (--sqp->s_sge.num_sge)
 				*sge = *sqp->s_sge.sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= QIB_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -543,9 +552,9 @@
 		sqp->s_len -= len;
 	}
 	if (release)
-		qib_put_ss(&qp->r_sge);
+		rvt_put_ss(&qp->r_sge);
 
-	if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 		goto send_comp;
 
 	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
@@ -561,12 +570,12 @@
 	wc.sl = qp->remote_ah_attr.sl;
 	wc.port_num = 1;
 	/* Signal completion event if the solicited bit is set. */
-	qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-		       wqe->wr.send_flags & IB_SEND_SOLICITED);
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+		     wqe->wr.send_flags & IB_SEND_SOLICITED);
 
 send_comp:
 	spin_lock_irqsave(&sqp->s_lock, flags);
-	ibp->n_loop_pkts++;
+	ibp->rvp.n_loop_pkts++;
 flush_send:
 	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
 	qib_send_complete(sqp, wqe, send_status);
@@ -576,7 +585,7 @@
 	/* Handle RNR NAK */
 	if (qp->ibqp.qp_type == IB_QPT_UC)
 		goto send_comp;
-	ibp->n_rnr_naks++;
+	ibp->rvp.n_rnr_naks++;
 	/*
 	 * Note: we don't need the s_lock held since the BUSY flag
 	 * makes this single threaded.
@@ -588,9 +597,9 @@
 	if (sqp->s_rnr_retry_cnt < 7)
 		sqp->s_rnr_retry--;
 	spin_lock_irqsave(&sqp->s_lock, flags);
-	if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
 		goto clr_busy;
-	sqp->s_flags |= QIB_S_WAIT_RNR;
+	sqp->s_flags |= RVT_S_WAIT_RNR;
 	sqp->s_timer.function = qib_rc_rnr_retry;
 	sqp->s_timer.expires = jiffies +
 		usecs_to_jiffies(ib_qib_rnr_table[qp->r_min_rnr_timer]);
@@ -618,9 +627,9 @@
 	spin_lock_irqsave(&sqp->s_lock, flags);
 	qib_send_complete(sqp, wqe, send_status);
 	if (sqp->ibqp.qp_type == IB_QPT_RC) {
-		int lastwqe = qib_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+		int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
 
-		sqp->s_flags &= ~QIB_S_BUSY;
+		sqp->s_flags &= ~RVT_S_BUSY;
 		spin_unlock_irqrestore(&sqp->s_lock, flags);
 		if (lastwqe) {
 			struct ib_event ev;
@@ -633,12 +642,11 @@
 		goto done;
 	}
 clr_busy:
-	sqp->s_flags &= ~QIB_S_BUSY;
+	sqp->s_flags &= ~RVT_S_BUSY;
 unlock:
 	spin_unlock_irqrestore(&sqp->s_lock, flags);
 done:
-	if (qp && atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
+	rcu_read_unlock();
 }
 
 /**
@@ -663,7 +671,7 @@
 	hdr->next_hdr = IB_GRH_NEXT_HDR;
 	hdr->hop_limit = grh->hop_limit;
 	/* The SGID is 32-bit aligned. */
-	hdr->sgid.global.subnet_prefix = ibp->gid_prefix;
+	hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
 	hdr->sgid.global.interface_id = grh->sgid_index ?
 		ibp->guids[grh->sgid_index - 1] : ppd_from_ibp(ibp)->guid;
 	hdr->dgid = grh->dgid;
@@ -672,9 +680,10 @@
 	return sizeof(struct ib_grh) / sizeof(u32);
 }
 
-void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
+void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr,
 			 u32 bth0, u32 bth2)
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 	u16 lrh0;
 	u32 nwords;
@@ -685,17 +694,18 @@
 	nwords = (qp->s_cur_size + extra_bytes) >> 2;
 	lrh0 = QIB_LRH_BTH;
 	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
-		qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh,
+		qp->s_hdrwords += qib_make_grh(ibp, &priv->s_hdr->u.l.grh,
 					       &qp->remote_ah_attr.grh,
 					       qp->s_hdrwords, nwords);
 		lrh0 = QIB_LRH_GRH;
 	}
 	lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
 		qp->remote_ah_attr.sl << 4;
-	qp->s_hdr->lrh[0] = cpu_to_be16(lrh0);
-	qp->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
-	qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
-	qp->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
+	priv->s_hdr->lrh[0] = cpu_to_be16(lrh0);
+	priv->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+	priv->s_hdr->lrh[2] =
+			cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+	priv->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
 				       qp->remote_ah_attr.src_path_bits);
 	bth0 |= qib_get_pkey(ibp, qp->s_pkey_index);
 	bth0 |= extra_bytes << 20;
@@ -707,20 +717,29 @@
 	this_cpu_inc(ibp->pmastats->n_unicast_xmit);
 }
 
+void _qib_do_send(struct work_struct *work)
+{
+	struct qib_qp_priv *priv = container_of(work, struct qib_qp_priv,
+						s_work);
+	struct rvt_qp *qp = priv->owner;
+
+	qib_do_send(qp);
+}
+
 /**
  * qib_do_send - perform a send on a QP
- * @work: contains a pointer to the QP
+ * @qp: pointer to the QP
  *
  * Process entries in the send work queue until credit or queue is
  * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
  * Otherwise, two threads could send packets out of order.
  */
-void qib_do_send(struct work_struct *work)
+void qib_do_send(struct rvt_qp *qp)
 {
-	struct qib_qp *qp = container_of(work, struct qib_qp, s_work);
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
-	int (*make_req)(struct qib_qp *qp);
+	int (*make_req)(struct rvt_qp *qp);
 	unsigned long flags;
 
 	if ((qp->ibqp.qp_type == IB_QPT_RC ||
@@ -745,50 +764,59 @@
 		return;
 	}
 
-	qp->s_flags |= QIB_S_BUSY;
-
-	spin_unlock_irqrestore(&qp->s_lock, flags);
+	qp->s_flags |= RVT_S_BUSY;
 
 	do {
 		/* Check for a constructed packet to be sent. */
 		if (qp->s_hdrwords != 0) {
+			spin_unlock_irqrestore(&qp->s_lock, flags);
 			/*
 			 * If the packet cannot be sent now, return and
 			 * the send tasklet will be woken up later.
 			 */
-			if (qib_verbs_send(qp, qp->s_hdr, qp->s_hdrwords,
+			if (qib_verbs_send(qp, priv->s_hdr, qp->s_hdrwords,
 					   qp->s_cur_sge, qp->s_cur_size))
-				break;
+				return;
 			/* Record that s_hdr is empty. */
 			qp->s_hdrwords = 0;
+			spin_lock_irqsave(&qp->s_lock, flags);
 		}
 	} while (make_req(qp));
+
+	spin_unlock_irqrestore(&qp->s_lock, flags);
 }
 
 /*
  * This should be called with s_lock held.
  */
-void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
+void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
 		       enum ib_wc_status status)
 {
 	u32 old_last, last;
 	unsigned i;
 
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		return;
 
+	last = qp->s_last;
+	old_last = last;
+	if (++last >= qp->s_size)
+		last = 0;
+	qp->s_last = last;
+	/* See post_send() */
+	barrier();
 	for (i = 0; i < wqe->wr.num_sge; i++) {
-		struct qib_sge *sge = &wqe->sg_list[i];
+		struct rvt_sge *sge = &wqe->sg_list[i];
 
-		qib_put_mr(sge->mr);
+		rvt_put_mr(sge->mr);
 	}
 	if (qp->ibqp.qp_type == IB_QPT_UD ||
 	    qp->ibqp.qp_type == IB_QPT_SMI ||
 	    qp->ibqp.qp_type == IB_QPT_GSI)
-		atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
+		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
 
 	/* See ch. 11.2.4.1 and 10.7.3.1 */
-	if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+	if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
 	    (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
 	    status != IB_WC_SUCCESS) {
 		struct ib_wc wc;
@@ -800,15 +828,10 @@
 		wc.qp = &qp->ibqp;
 		if (status == IB_WC_SUCCESS)
 			wc.byte_len = wqe->length;
-		qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
 			     status != IB_WC_SUCCESS);
 	}
 
-	last = qp->s_last;
-	old_last = last;
-	if (++last >= qp->s_size)
-		last = 0;
-	qp->s_last = last;
 	if (qp->s_acked == old_last)
 		qp->s_acked = last;
 	if (qp->s_cur == old_last)

diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index c6d6a54..891873b 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c

@@ -513,7 +513,9 @@
 static void complete_sdma_err_req(struct qib_pportdata *ppd,
 				  struct qib_verbs_txreq *tx)
 {
-	atomic_inc(&tx->qp->s_dma_busy);
+	struct qib_qp_priv *priv = tx->qp->priv;
+
+	atomic_inc(&priv->s_dma_busy);
 	/* no sdma descriptors, so no unmap_desc */
 	tx->txreq.start_idx = 0;
 	tx->txreq.next_descq_idx = 0;
@@ -531,18 +533,19 @@
  * 3) The SGE addresses are suitable for passing to dma_map_single().
  */
 int qib_sdma_verbs_send(struct qib_pportdata *ppd,
-			struct qib_sge_state *ss, u32 dwords,
+			struct rvt_sge_state *ss, u32 dwords,
 			struct qib_verbs_txreq *tx)
 {
 	unsigned long flags;
-	struct qib_sge *sge;
-	struct qib_qp *qp;
+	struct rvt_sge *sge;
+	struct rvt_qp *qp;
 	int ret = 0;
 	u16 tail;
 	__le64 *descqp;
 	u64 sdmadesc[2];
 	u32 dwoffset;
 	dma_addr_t addr;
+	struct qib_qp_priv *priv;
 
 	spin_lock_irqsave(&ppd->sdma_lock, flags);
 
@@ -621,7 +624,7 @@
 			if (--ss->num_sge)
 				*sge = *ss->sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= QIB_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -644,8 +647,8 @@
 		descqp[0] |= cpu_to_le64(SDMA_DESC_DMA_HEAD);
 	if (tx->txreq.flags & QIB_SDMA_TXREQ_F_INTREQ)
 		descqp[0] |= cpu_to_le64(SDMA_DESC_INTR);
-
-	atomic_inc(&tx->qp->s_dma_busy);
+	priv = tx->qp->priv;
+	atomic_inc(&priv->s_dma_busy);
 	tx->txreq.next_descq_idx = tail;
 	ppd->dd->f_sdma_update_tail(ppd, tail);
 	ppd->sdma_descq_added += tx->txreq.sg_count;
@@ -663,13 +666,14 @@
 		unmap_desc(ppd, tail);
 	}
 	qp = tx->qp;
+	priv = qp->priv;
 	qib_put_txreq(tx);
 	spin_lock(&qp->r_lock);
 	spin_lock(&qp->s_lock);
 	if (qp->ibqp.qp_type == IB_QPT_RC) {
 		/* XXX what about error sending RDMA read responses? */
-		if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)
-			qib_error_qp(qp, IB_WC_GENERAL_ERR);
+		if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)
+			rvt_error_qp(qp, IB_WC_GENERAL_ERR);
 	} else if (qp->s_wqe)
 		qib_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
 	spin_unlock(&qp->s_lock);
@@ -679,8 +683,9 @@
 
 busy:
 	qp = tx->qp;
+	priv = qp->priv;
 	spin_lock(&qp->s_lock);
-	if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		struct qib_ibdev *dev;
 
 		/*
@@ -690,19 +695,19 @@
 		 */
 		tx->ss = ss;
 		tx->dwords = dwords;
-		qp->s_tx = tx;
+		priv->s_tx = tx;
 		dev = &ppd->dd->verbs_dev;
-		spin_lock(&dev->pending_lock);
-		if (list_empty(&qp->iowait)) {
+		spin_lock(&dev->rdi.pending_lock);
+		if (list_empty(&priv->iowait)) {
 			struct qib_ibport *ibp;
 
 			ibp = &ppd->ibport_data;
-			ibp->n_dmawait++;
-			qp->s_flags |= QIB_S_WAIT_DMA_DESC;
-			list_add_tail(&qp->iowait, &dev->dmawait);
+			ibp->rvp.n_dmawait++;
+			qp->s_flags |= RVT_S_WAIT_DMA_DESC;
+			list_add_tail(&priv->iowait, &dev->dmawait);
 		}
-		spin_unlock(&dev->pending_lock);
-		qp->s_flags &= ~QIB_S_BUSY;
+		spin_unlock(&dev->rdi.pending_lock);
+		qp->s_flags &= ~RVT_S_BUSY;
 		spin_unlock(&qp->s_lock);
 		ret = -EBUSY;
 	} else {

diff --git a/drivers/infiniband/hw/qib/qib_srq.c b/drivers/infiniband/hw/qib/qib_srq.c
deleted file mode 100644
index d623593..0000000
--- a/drivers/infiniband/hw/qib/qib_srq.c
+++ /dev/null

@@ -1,380 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "qib_verbs.h"
-
-/**
- * qib_post_srq_receive - post a receive on a shared receive queue
- * @ibsrq: the SRQ to post the receive on
- * @wr: the list of work requests to post
- * @bad_wr: A pointer to the first WR to cause a problem is put here
- *
- * This may be called from interrupt context.
- */
-int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			 struct ib_recv_wr **bad_wr)
-{
-	struct qib_srq *srq = to_isrq(ibsrq);
-	struct qib_rwq *wq;
-	unsigned long flags;
-	int ret;
-
-	for (; wr; wr = wr->next) {
-		struct qib_rwqe *wqe;
-		u32 next;
-		int i;
-
-		if ((unsigned) wr->num_sge > srq->rq.max_sge) {
-			*bad_wr = wr;
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		spin_lock_irqsave(&srq->rq.lock, flags);
-		wq = srq->rq.wq;
-		next = wq->head + 1;
-		if (next >= srq->rq.size)
-			next = 0;
-		if (next == wq->tail) {
-			spin_unlock_irqrestore(&srq->rq.lock, flags);
-			*bad_wr = wr;
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		wqe = get_rwqe_ptr(&srq->rq, wq->head);
-		wqe->wr_id = wr->wr_id;
-		wqe->num_sge = wr->num_sge;
-		for (i = 0; i < wr->num_sge; i++)
-			wqe->sg_list[i] = wr->sg_list[i];
-		/* Make sure queue entry is written before the head index. */
-		smp_wmb();
-		wq->head = next;
-		spin_unlock_irqrestore(&srq->rq.lock, flags);
-	}
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * qib_create_srq - create a shared receive queue
- * @ibpd: the protection domain of the SRQ to create
- * @srq_init_attr: the attributes of the SRQ
- * @udata: data from libibverbs when creating a user SRQ
- */
-struct ib_srq *qib_create_srq(struct ib_pd *ibpd,
-			      struct ib_srq_init_attr *srq_init_attr,
-			      struct ib_udata *udata)
-{
-	struct qib_ibdev *dev = to_idev(ibpd->device);
-	struct qib_srq *srq;
-	u32 sz;
-	struct ib_srq *ret;
-
-	if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
-		ret = ERR_PTR(-ENOSYS);
-		goto done;
-	}
-
-	if (srq_init_attr->attr.max_sge == 0 ||
-	    srq_init_attr->attr.max_sge > ib_qib_max_srq_sges ||
-	    srq_init_attr->attr.max_wr == 0 ||
-	    srq_init_attr->attr.max_wr > ib_qib_max_srq_wrs) {
-		ret = ERR_PTR(-EINVAL);
-		goto done;
-	}
-
-	srq = kmalloc(sizeof(*srq), GFP_KERNEL);
-	if (!srq) {
-		ret = ERR_PTR(-ENOMEM);
-		goto done;
-	}
-
-	/*
-	 * Need to use vmalloc() if we want to support large #s of entries.
-	 */
-	srq->rq.size = srq_init_attr->attr.max_wr + 1;
-	srq->rq.max_sge = srq_init_attr->attr.max_sge;
-	sz = sizeof(struct ib_sge) * srq->rq.max_sge +
-		sizeof(struct qib_rwqe);
-	srq->rq.wq = vmalloc_user(sizeof(struct qib_rwq) + srq->rq.size * sz);
-	if (!srq->rq.wq) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_srq;
-	}
-
-	/*
-	 * Return the address of the RWQ as the offset to mmap.
-	 * See qib_mmap() for details.
-	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		int err;
-		u32 s = sizeof(struct qib_rwq) + srq->rq.size * sz;
-
-		srq->ip =
-		    qib_create_mmap_info(dev, s, ibpd->uobject->context,
-					 srq->rq.wq);
-		if (!srq->ip) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_wq;
-		}
-
-		err = ib_copy_to_udata(udata, &srq->ip->offset,
-				       sizeof(srq->ip->offset));
-		if (err) {
-			ret = ERR_PTR(err);
-			goto bail_ip;
-		}
-	} else
-		srq->ip = NULL;
-
-	/*
-	 * ib_create_srq() will initialize srq->ibsrq.
-	 */
-	spin_lock_init(&srq->rq.lock);
-	srq->rq.wq->head = 0;
-	srq->rq.wq->tail = 0;
-	srq->limit = srq_init_attr->attr.srq_limit;
-
-	spin_lock(&dev->n_srqs_lock);
-	if (dev->n_srqs_allocated == ib_qib_max_srqs) {
-		spin_unlock(&dev->n_srqs_lock);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_ip;
-	}
-
-	dev->n_srqs_allocated++;
-	spin_unlock(&dev->n_srqs_lock);
-
-	if (srq->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	ret = &srq->ibsrq;
-	goto done;
-
-bail_ip:
-	kfree(srq->ip);
-bail_wq:
-	vfree(srq->rq.wq);
-bail_srq:
-	kfree(srq);
-done:
-	return ret;
-}
-
-/**
- * qib_modify_srq - modify a shared receive queue
- * @ibsrq: the SRQ to modify
- * @attr: the new attributes of the SRQ
- * @attr_mask: indicates which attributes to modify
- * @udata: user data for libibverbs.so
- */
-int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		   enum ib_srq_attr_mask attr_mask,
-		   struct ib_udata *udata)
-{
-	struct qib_srq *srq = to_isrq(ibsrq);
-	struct qib_rwq *wq;
-	int ret = 0;
-
-	if (attr_mask & IB_SRQ_MAX_WR) {
-		struct qib_rwq *owq;
-		struct qib_rwqe *p;
-		u32 sz, size, n, head, tail;
-
-		/* Check that the requested sizes are below the limits. */
-		if ((attr->max_wr > ib_qib_max_srq_wrs) ||
-		    ((attr_mask & IB_SRQ_LIMIT) ?
-		     attr->srq_limit : srq->limit) > attr->max_wr) {
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		sz = sizeof(struct qib_rwqe) +
-			srq->rq.max_sge * sizeof(struct ib_sge);
-		size = attr->max_wr + 1;
-		wq = vmalloc_user(sizeof(struct qib_rwq) + size * sz);
-		if (!wq) {
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		/* Check that we can write the offset to mmap. */
-		if (udata && udata->inlen >= sizeof(__u64)) {
-			__u64 offset_addr;
-			__u64 offset = 0;
-
-			ret = ib_copy_from_udata(&offset_addr, udata,
-						 sizeof(offset_addr));
-			if (ret)
-				goto bail_free;
-			udata->outbuf =
-				(void __user *) (unsigned long) offset_addr;
-			ret = ib_copy_to_udata(udata, &offset,
-					       sizeof(offset));
-			if (ret)
-				goto bail_free;
-		}
-
-		spin_lock_irq(&srq->rq.lock);
-		/*
-		 * validate head and tail pointer values and compute
-		 * the number of remaining WQEs.
-		 */
-		owq = srq->rq.wq;
-		head = owq->head;
-		tail = owq->tail;
-		if (head >= srq->rq.size || tail >= srq->rq.size) {
-			ret = -EINVAL;
-			goto bail_unlock;
-		}
-		n = head;
-		if (n < tail)
-			n += srq->rq.size - tail;
-		else
-			n -= tail;
-		if (size <= n) {
-			ret = -EINVAL;
-			goto bail_unlock;
-		}
-		n = 0;
-		p = wq->wq;
-		while (tail != head) {
-			struct qib_rwqe *wqe;
-			int i;
-
-			wqe = get_rwqe_ptr(&srq->rq, tail);
-			p->wr_id = wqe->wr_id;
-			p->num_sge = wqe->num_sge;
-			for (i = 0; i < wqe->num_sge; i++)
-				p->sg_list[i] = wqe->sg_list[i];
-			n++;
-			p = (struct qib_rwqe *)((char *) p + sz);
-			if (++tail >= srq->rq.size)
-				tail = 0;
-		}
-		srq->rq.wq = wq;
-		srq->rq.size = size;
-		wq->head = n;
-		wq->tail = 0;
-		if (attr_mask & IB_SRQ_LIMIT)
-			srq->limit = attr->srq_limit;
-		spin_unlock_irq(&srq->rq.lock);
-
-		vfree(owq);
-
-		if (srq->ip) {
-			struct qib_mmap_info *ip = srq->ip;
-			struct qib_ibdev *dev = to_idev(srq->ibsrq.device);
-			u32 s = sizeof(struct qib_rwq) + size * sz;
-
-			qib_update_mmap_info(dev, ip, s, wq);
-
-			/*
-			 * Return the offset to mmap.
-			 * See qib_mmap() for details.
-			 */
-			if (udata && udata->inlen >= sizeof(__u64)) {
-				ret = ib_copy_to_udata(udata, &ip->offset,
-						       sizeof(ip->offset));
-				if (ret)
-					goto bail;
-			}
-
-			/*
-			 * Put user mapping info onto the pending list
-			 * unless it already is on the list.
-			 */
-			spin_lock_irq(&dev->pending_lock);
-			if (list_empty(&ip->pending_mmaps))
-				list_add(&ip->pending_mmaps,
-					 &dev->pending_mmaps);
-			spin_unlock_irq(&dev->pending_lock);
-		}
-	} else if (attr_mask & IB_SRQ_LIMIT) {
-		spin_lock_irq(&srq->rq.lock);
-		if (attr->srq_limit >= srq->rq.size)
-			ret = -EINVAL;
-		else
-			srq->limit = attr->srq_limit;
-		spin_unlock_irq(&srq->rq.lock);
-	}
-	goto bail;
-
-bail_unlock:
-	spin_unlock_irq(&srq->rq.lock);
-bail_free:
-	vfree(wq);
-bail:
-	return ret;
-}
-
-int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
-{
-	struct qib_srq *srq = to_isrq(ibsrq);
-
-	attr->max_wr = srq->rq.size - 1;
-	attr->max_sge = srq->rq.max_sge;
-	attr->srq_limit = srq->limit;
-	return 0;
-}
-
-/**
- * qib_destroy_srq - destroy a shared receive queue
- * @ibsrq: the SRQ to destroy
- */
-int qib_destroy_srq(struct ib_srq *ibsrq)
-{
-	struct qib_srq *srq = to_isrq(ibsrq);
-	struct qib_ibdev *dev = to_idev(ibsrq->device);
-
-	spin_lock(&dev->n_srqs_lock);
-	dev->n_srqs_allocated--;
-	spin_unlock(&dev->n_srqs_lock);
-	if (srq->ip)
-		kref_put(&srq->ip->ref, qib_release_mmap_info);
-	else
-		vfree(srq->rq.wq);
-	kfree(srq);
-
-	return 0;
-}

diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index 81f56cd..fe4cf5e 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c

@@ -406,7 +406,13 @@
 #define QIB_DIAGC_ATTR(N) \
 	static struct qib_diagc_attr qib_diagc_attr_##N = { \
 		.attr = { .name = __stringify(N), .mode = 0664 }, \
-		.counter = offsetof(struct qib_ibport, n_##N) \
+		.counter = offsetof(struct qib_ibport, rvp.n_##N) \
+	}
+
+#define QIB_DIAGC_ATTR_PER_CPU(N) \
+	static struct qib_diagc_attr qib_diagc_attr_##N = { \
+		.attr = { .name = __stringify(N), .mode = 0664 }, \
+		.counter = offsetof(struct qib_ibport, rvp.z_##N) \
 	}
 
 struct qib_diagc_attr {
@@ -414,10 +420,11 @@
 	size_t counter;
 };
 
+QIB_DIAGC_ATTR_PER_CPU(rc_acks);
+QIB_DIAGC_ATTR_PER_CPU(rc_qacks);
+QIB_DIAGC_ATTR_PER_CPU(rc_delayed_comp);
+
 QIB_DIAGC_ATTR(rc_resends);
-QIB_DIAGC_ATTR(rc_acks);
-QIB_DIAGC_ATTR(rc_qacks);
-QIB_DIAGC_ATTR(rc_delayed_comp);
 QIB_DIAGC_ATTR(seq_naks);
 QIB_DIAGC_ATTR(rdma_seq);
 QIB_DIAGC_ATTR(rnr_naks);
@@ -449,6 +456,35 @@
 	NULL
 };
 
+static u64 get_all_cpu_total(u64 __percpu *cntr)
+{
+	int cpu;
+	u64 counter = 0;
+
+	for_each_possible_cpu(cpu)
+		counter += *per_cpu_ptr(cntr, cpu);
+	return counter;
+}
+
+#define def_write_per_cpu(cntr) \
+static void write_per_cpu_##cntr(struct qib_pportdata *ppd, u32 data)	\
+{									\
+	struct qib_devdata *dd = ppd->dd;				\
+	struct qib_ibport *qibp = &ppd->ibport_data;			\
+	/*  A write can only zero the counter */			\
+	if (data == 0)							\
+		qibp->rvp.z_##cntr = get_all_cpu_total(qibp->rvp.cntr); \
+	else								\
+		qib_dev_err(dd, "Per CPU cntrs can only be zeroed");	\
+}
+
+def_write_per_cpu(rc_acks)
+def_write_per_cpu(rc_qacks)
+def_write_per_cpu(rc_delayed_comp)
+
+#define READ_PER_CPU_CNTR(cntr) (get_all_cpu_total(qibp->rvp.cntr) - \
+							qibp->rvp.z_##cntr)
+
 static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr,
 			       char *buf)
 {
@@ -458,7 +494,16 @@
 		container_of(kobj, struct qib_pportdata, diagc_kobj);
 	struct qib_ibport *qibp = &ppd->ibport_data;
 
-	return sprintf(buf, "%u\n", *(u32 *)((char *)qibp + dattr->counter));
+	if (!strncmp(dattr->attr.name, "rc_acks", 7))
+		return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_acks));
+	else if (!strncmp(dattr->attr.name, "rc_qacks", 8))
+		return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_qacks));
+	else if (!strncmp(dattr->attr.name, "rc_delayed_comp", 15))
+		return sprintf(buf, "%llu\n",
+					READ_PER_CPU_CNTR(rc_delayed_comp));
+	else
+		return sprintf(buf, "%u\n",
+				*(u32 *)((char *)qibp + dattr->counter));
 }
 
 static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr,
@@ -475,7 +520,15 @@
 	ret = kstrtou32(buf, 0, &val);
 	if (ret)
 		return ret;
-	*(u32 *)((char *) qibp + dattr->counter) = val;
+
+	if (!strncmp(dattr->attr.name, "rc_acks", 7))
+		write_per_cpu_rc_acks(ppd, val);
+	else if (!strncmp(dattr->attr.name, "rc_qacks", 8))
+		write_per_cpu_rc_qacks(ppd, val);
+	else if (!strncmp(dattr->attr.name, "rc_delayed_comp", 15))
+		write_per_cpu_rc_delayed_comp(ppd, val);
+	else
+		*(u32 *)((char *)qibp + dattr->counter) = val;
 	return size;
 }
 
@@ -502,7 +555,7 @@
 			char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 
 	return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
 }
@@ -511,7 +564,7 @@
 			char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	int ret;
 
@@ -533,7 +586,7 @@
 				 struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 	/* The string printed here is already newline-terminated. */
@@ -545,7 +598,7 @@
 				  struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 	/* The string printed here is already newline-terminated. */
@@ -557,7 +610,7 @@
 			   struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 	/* Return the number of user ports (contexts) available. */
@@ -572,7 +625,7 @@
 			   struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 	/* Return the number of free user ports (contexts) available. */
@@ -583,7 +636,7 @@
 			   struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 	buf[sizeof(dd->serial)] = '\0';
@@ -597,7 +650,7 @@
 				size_t count)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	int ret;
 
@@ -618,7 +671,7 @@
 			      struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	int ret;
 	int idx;
@@ -778,7 +831,7 @@
  */
 int qib_verbs_register_sysfs(struct qib_devdata *dd)
 {
-	struct ib_device *dev = &dd->verbs_dev.ibdev;
+	struct ib_device *dev = &dd->verbs_dev.rdi.ibdev;
 	int i, ret;
 
 	for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) {

diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 06a5645..7bdbc79 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c

@@ -41,61 +41,62 @@
  * qib_make_uc_req - construct a request packet (SEND, RDMA write)
  * @qp: a pointer to the QP
  *
+ * Assumes the s_lock is held.
+ *
  * Return 1 if constructed; otherwise, return 0.
  */
-int qib_make_uc_req(struct qib_qp *qp)
+int qib_make_uc_req(struct rvt_qp *qp)
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_other_headers *ohdr;
-	struct qib_swqe *wqe;
-	unsigned long flags;
+	struct rvt_swqe *wqe;
 	u32 hwords;
 	u32 bth0;
 	u32 len;
 	u32 pmtu = qp->pmtu;
 	int ret = 0;
 
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) {
-		if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
+		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 			goto bail;
 		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send() */
+		if (qp->s_last == ACCESS_ONCE(qp->s_head))
 			goto bail;
 		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_dma_busy)) {
-			qp->s_flags |= QIB_S_WAIT_DMA;
+		if (atomic_read(&priv->s_dma_busy)) {
+			qp->s_flags |= RVT_S_WAIT_DMA;
 			goto bail;
 		}
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
 		goto done;
 	}
 
-	ohdr = &qp->s_hdr->u.oth;
+	ohdr = &priv->s_hdr->u.oth;
 	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-		ohdr = &qp->s_hdr->u.l.oth;
+		ohdr = &priv->s_hdr->u.l.oth;
 
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
 	hwords = 5;
 	bth0 = 0;
 
 	/* Get the next send request. */
-	wqe = get_swqe_ptr(qp, qp->s_cur);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 	qp->s_wqe = NULL;
 	switch (qp->s_state) {
 	default:
-		if (!(ib_qib_state_ops[qp->state] &
-		    QIB_PROCESS_NEXT_SEND_OK))
+		if (!(ib_rvt_state_ops[qp->state] &
+		    RVT_PROCESS_NEXT_SEND_OK))
 			goto bail;
 		/* Check if send work queue is empty. */
-		if (qp->s_cur == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send() */
+		if (qp->s_cur == ACCESS_ONCE(qp->s_head))
 			goto bail;
 		/*
 		 * Start a new request.
 		 */
-		wqe->psn = qp->s_next_psn;
-		qp->s_psn = qp->s_next_psn;
+		qp->s_psn = wqe->psn;
 		qp->s_sge.sge = wqe->sg_list[0];
 		qp->s_sge.sg_list = wqe->sg_list + 1;
 		qp->s_sge.num_sge = wqe->wr.num_sge;
@@ -214,15 +215,11 @@
 	qp->s_cur_sge = &qp->s_sge;
 	qp->s_cur_size = len;
 	qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
-			    qp->s_next_psn++ & QIB_PSN_MASK);
+			    qp->s_psn++ & QIB_PSN_MASK);
 done:
-	ret = 1;
-	goto unlock;
-
+	return 1;
 bail:
-	qp->s_flags &= ~QIB_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
+	qp->s_flags &= ~RVT_S_BUSY;
 	return ret;
 }
 
@@ -240,7 +237,7 @@
  * Called at interrupt level.
  */
 void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
-		int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+		int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
 {
 	struct qib_other_headers *ohdr;
 	u32 opcode;
@@ -278,10 +275,10 @@
 inv:
 		if (qp->r_state == OP(SEND_FIRST) ||
 		    qp->r_state == OP(SEND_MIDDLE)) {
-			set_bit(QIB_R_REWIND_SGE, &qp->r_aflags);
+			set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
 			qp->r_sge.num_sge = 0;
 		} else
-			qib_put_ss(&qp->r_sge);
+			rvt_put_ss(&qp->r_sge);
 		qp->r_state = OP(SEND_LAST);
 		switch (opcode) {
 		case OP(SEND_FIRST):
@@ -328,8 +325,8 @@
 		goto inv;
 	}
 
-	if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) {
-		qp->r_flags |= QIB_R_COMM_EST;
+	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) {
+		qp->r_flags |= RVT_R_COMM_EST;
 		if (qp->ibqp.event_handler) {
 			struct ib_event ev;
 
@@ -346,7 +343,7 @@
 	case OP(SEND_ONLY):
 	case OP(SEND_ONLY_WITH_IMMEDIATE):
 send_first:
-		if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
+		if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
 			qp->r_sge = qp->s_rdma_read_sge;
 		else {
 			ret = qib_get_rwqe(qp, 0);
@@ -400,7 +397,7 @@
 			goto rewind;
 		wc.opcode = IB_WC_RECV;
 		qib_copy_sge(&qp->r_sge, data, tlen, 0);
-		qib_put_ss(&qp->s_rdma_read_sge);
+		rvt_put_ss(&qp->s_rdma_read_sge);
 last_imm:
 		wc.wr_id = qp->r_wr_id;
 		wc.status = IB_WC_SUCCESS;
@@ -414,7 +411,7 @@
 		wc.dlid_path_bits = 0;
 		wc.port_num = 0;
 		/* Signal completion event if the solicited bit is set. */
-		qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
 			     (ohdr->bth[0] &
 				cpu_to_be32(IB_BTH_SOLICITED)) != 0);
 		break;
@@ -438,7 +435,7 @@
 			int ok;
 
 			/* Check rkey */
-			ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
+			ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
 					 vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
 			if (unlikely(!ok))
 				goto drop;
@@ -483,8 +480,8 @@
 		tlen -= (hdrsize + pad + 4);
 		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
 			goto drop;
-		if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
-			qib_put_ss(&qp->s_rdma_read_sge);
+		if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
+			rvt_put_ss(&qp->s_rdma_read_sge);
 		else {
 			ret = qib_get_rwqe(qp, 1);
 			if (ret < 0)
@@ -495,7 +492,7 @@
 		wc.byte_len = qp->r_len;
 		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
 		qib_copy_sge(&qp->r_sge, data, tlen, 1);
-		qib_put_ss(&qp->r_sge);
+		rvt_put_ss(&qp->r_sge);
 		goto last_imm;
 
 	case OP(RDMA_WRITE_LAST):
@@ -511,7 +508,7 @@
 		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
 			goto drop;
 		qib_copy_sge(&qp->r_sge, data, tlen, 1);
-		qib_put_ss(&qp->r_sge);
+		rvt_put_ss(&qp->r_sge);
 		break;
 
 	default:
@@ -523,10 +520,10 @@
 	return;
 
 rewind:
-	set_bit(QIB_R_REWIND_SGE, &qp->r_aflags);
+	set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
 	qp->r_sge.num_sge = 0;
 drop:
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 	return;
 
 op_err:

diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index 59193f6..d950213 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c

@@ -32,6 +32,7 @@
  */
 
 #include <rdma/ib_smi.h>
+#include <rdma/ib_verbs.h>
 
 #include "qib.h"
 #include "qib_mad.h"
@@ -46,22 +47,26 @@
  * Note that the receive interrupt handler may be calling qib_ud_rcv()
  * while this is being called.
  */
-static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
+static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
 {
 	struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
-	struct qib_pportdata *ppd;
-	struct qib_qp *qp;
+	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	struct qib_devdata *dd = ppd->dd;
+	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+	struct rvt_qp *qp;
 	struct ib_ah_attr *ah_attr;
 	unsigned long flags;
-	struct qib_sge_state ssge;
-	struct qib_sge *sge;
+	struct rvt_sge_state ssge;
+	struct rvt_sge *sge;
 	struct ib_wc wc;
 	u32 length;
 	enum ib_qp_type sqptype, dqptype;
 
-	qp = qib_lookup_qpn(ibp, swqe->ud_wr.remote_qpn);
+	rcu_read_lock();
+	qp = rvt_lookup_qpn(rdi, &ibp->rvp, swqe->ud_wr.remote_qpn);
 	if (!qp) {
-		ibp->n_pkt_drops++;
+		ibp->rvp.n_pkt_drops++;
+		rcu_read_unlock();
 		return;
 	}
 
@@ -71,12 +76,12 @@
 			IB_QPT_UD : qp->ibqp.qp_type;
 
 	if (dqptype != sqptype ||
-	    !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
-		ibp->n_pkt_drops++;
+	    !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
+		ibp->rvp.n_pkt_drops++;
 		goto drop;
 	}
 
-	ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
+	ah_attr = &ibah_to_rvtah(swqe->ud_wr.ah)->attr;
 	ppd = ppd_from_ibp(ibp);
 
 	if (qp->ibqp.qp_num > 1) {
@@ -140,8 +145,8 @@
 	/*
 	 * Get the next work request entry to find where to put the data.
 	 */
-	if (qp->r_flags & QIB_R_REUSE_SGE)
-		qp->r_flags &= ~QIB_R_REUSE_SGE;
+	if (qp->r_flags & RVT_R_REUSE_SGE)
+		qp->r_flags &= ~RVT_R_REUSE_SGE;
 	else {
 		int ret;
 
@@ -152,14 +157,14 @@
 		}
 		if (!ret) {
 			if (qp->ibqp.qp_num == 0)
-				ibp->n_vl15_dropped++;
+				ibp->rvp.n_vl15_dropped++;
 			goto bail_unlock;
 		}
 	}
 	/* Silently drop packets which are too big. */
 	if (unlikely(wc.byte_len > qp->r_len)) {
-		qp->r_flags |= QIB_R_REUSE_SGE;
-		ibp->n_pkt_drops++;
+		qp->r_flags |= RVT_R_REUSE_SGE;
+		ibp->rvp.n_pkt_drops++;
 		goto bail_unlock;
 	}
 
@@ -189,7 +194,7 @@
 			if (--ssge.num_sge)
 				*sge = *ssge.sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= QIB_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -201,8 +206,8 @@
 		}
 		length -= len;
 	}
-	qib_put_ss(&qp->r_sge);
-	if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+	rvt_put_ss(&qp->r_sge);
+	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 		goto bail_unlock;
 	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
@@ -216,30 +221,31 @@
 	wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
 	wc.port_num = qp->port_num;
 	/* Signal completion event if the solicited bit is set. */
-	qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
 		     swqe->wr.send_flags & IB_SEND_SOLICITED);
-	ibp->n_loop_pkts++;
+	ibp->rvp.n_loop_pkts++;
 bail_unlock:
 	spin_unlock_irqrestore(&qp->r_lock, flags);
 drop:
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
+	rcu_read_unlock();
 }
 
 /**
  * qib_make_ud_req - construct a UD request packet
  * @qp: the QP
  *
+ * Assumes the s_lock is held.
+ *
  * Return 1 if constructed; otherwise, return 0.
  */
-int qib_make_ud_req(struct qib_qp *qp)
+int qib_make_ud_req(struct rvt_qp *qp)
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_other_headers *ohdr;
 	struct ib_ah_attr *ah_attr;
 	struct qib_pportdata *ppd;
 	struct qib_ibport *ibp;
-	struct qib_swqe *wqe;
-	unsigned long flags;
+	struct rvt_swqe *wqe;
 	u32 nwords;
 	u32 extra_bytes;
 	u32 bth0;
@@ -248,28 +254,29 @@
 	int ret = 0;
 	int next_cur;
 
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK)) {
-		if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
+		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 			goto bail;
 		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send */
+		if (qp->s_last == ACCESS_ONCE(qp->s_head))
 			goto bail;
 		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_dma_busy)) {
-			qp->s_flags |= QIB_S_WAIT_DMA;
+		if (atomic_read(&priv->s_dma_busy)) {
+			qp->s_flags |= RVT_S_WAIT_DMA;
 			goto bail;
 		}
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
 		goto done;
 	}
 
-	if (qp->s_cur == qp->s_head)
+	/* see post_one_send() */
+	smp_read_barrier_depends();
+	if (qp->s_cur == ACCESS_ONCE(qp->s_head))
 		goto bail;
 
-	wqe = get_swqe_ptr(qp, qp->s_cur);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 	next_cur = qp->s_cur + 1;
 	if (next_cur >= qp->s_size)
 		next_cur = 0;
@@ -277,9 +284,9 @@
 	/* Construct the header. */
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 	ppd = ppd_from_ibp(ibp);
-	ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
-	if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) {
-		if (ah_attr->dlid != QIB_PERMISSIVE_LID)
+	ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
+	if (ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
+		if (ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE))
 			this_cpu_inc(ibp->pmastats->n_multicast_xmit);
 		else
 			this_cpu_inc(ibp->pmastats->n_unicast_xmit);
@@ -287,6 +294,7 @@
 		this_cpu_inc(ibp->pmastats->n_unicast_xmit);
 		lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
 		if (unlikely(lid == ppd->lid)) {
+			unsigned long flags;
 			/*
 			 * If DMAs are in progress, we can't generate
 			 * a completion for the loopback packet since
@@ -294,11 +302,12 @@
 			 * XXX Instead of waiting, we could queue a
 			 * zero length descriptor so we get a callback.
 			 */
-			if (atomic_read(&qp->s_dma_busy)) {
-				qp->s_flags |= QIB_S_WAIT_DMA;
+			if (atomic_read(&priv->s_dma_busy)) {
+				qp->s_flags |= RVT_S_WAIT_DMA;
 				goto bail;
 			}
 			qp->s_cur = next_cur;
+			local_irq_save(flags);
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 			qib_ud_loopback(qp, wqe);
 			spin_lock_irqsave(&qp->s_lock, flags);
@@ -324,11 +333,11 @@
 
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		/* Header size in 32-bit words. */
-		qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh,
+		qp->s_hdrwords += qib_make_grh(ibp, &priv->s_hdr->u.l.grh,
 					       &ah_attr->grh,
 					       qp->s_hdrwords, nwords);
 		lrh0 = QIB_LRH_GRH;
-		ohdr = &qp->s_hdr->u.l.oth;
+		ohdr = &priv->s_hdr->u.l.oth;
 		/*
 		 * Don't worry about sending to locally attached multicast
 		 * QPs.  It is unspecified by the spec. what happens.
@@ -336,7 +345,7 @@
 	} else {
 		/* Header size in 32-bit words. */
 		lrh0 = QIB_LRH_BTH;
-		ohdr = &qp->s_hdr->u.oth;
+		ohdr = &priv->s_hdr->u.oth;
 	}
 	if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
 		qp->s_hdrwords++;
@@ -349,15 +358,16 @@
 		lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
 	else
 		lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12;
-	qp->s_hdr->lrh[0] = cpu_to_be16(lrh0);
-	qp->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid);  /* DEST LID */
-	qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+	priv->s_hdr->lrh[0] = cpu_to_be16(lrh0);
+	priv->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid);  /* DEST LID */
+	priv->s_hdr->lrh[2] =
+			cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
 	lid = ppd->lid;
 	if (lid) {
 		lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
-		qp->s_hdr->lrh[3] = cpu_to_be16(lid);
+		priv->s_hdr->lrh[3] = cpu_to_be16(lid);
 	} else
-		qp->s_hdr->lrh[3] = IB_LID_PERMISSIVE;
+		priv->s_hdr->lrh[3] = IB_LID_PERMISSIVE;
 	if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 		bth0 |= IB_BTH_SOLICITED;
 	bth0 |= extra_bytes << 20;
@@ -368,11 +378,11 @@
 	/*
 	 * Use the multicast QP if the destination LID is a multicast LID.
 	 */
-	ohdr->bth[1] = ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
-		ah_attr->dlid != QIB_PERMISSIVE_LID ?
+	ohdr->bth[1] = ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) &&
+		ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) ?
 		cpu_to_be32(QIB_MULTICAST_QPN) :
 		cpu_to_be32(wqe->ud_wr.remote_qpn);
-	ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK);
+	ohdr->bth[2] = cpu_to_be32(wqe->psn & QIB_PSN_MASK);
 	/*
 	 * Qkeys with the high order bit set mean use the
 	 * qkey from the QP context instead of the WR (see 10.2.5).
@@ -382,13 +392,9 @@
 	ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
 
 done:
-	ret = 1;
-	goto unlock;
-
+	return 1;
 bail:
-	qp->s_flags &= ~QIB_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
+	qp->s_flags &= ~RVT_S_BUSY;
 	return ret;
 }
 
@@ -426,7 +432,7 @@
  * Called at interrupt level.
  */
 void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
-		int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+		int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
 {
 	struct qib_other_headers *ohdr;
 	int opcode;
@@ -446,7 +452,7 @@
 		hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */
 	}
 	qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
-	src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & QIB_QPN_MASK;
+	src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
 
 	/*
 	 * Get the number of bytes the message was padded by
@@ -531,8 +537,8 @@
 	/*
 	 * Get the next work request entry to find where to put the data.
 	 */
-	if (qp->r_flags & QIB_R_REUSE_SGE)
-		qp->r_flags &= ~QIB_R_REUSE_SGE;
+	if (qp->r_flags & RVT_R_REUSE_SGE)
+		qp->r_flags &= ~RVT_R_REUSE_SGE;
 	else {
 		int ret;
 
@@ -543,13 +549,13 @@
 		}
 		if (!ret) {
 			if (qp->ibqp.qp_num == 0)
-				ibp->n_vl15_dropped++;
+				ibp->rvp.n_vl15_dropped++;
 			return;
 		}
 	}
 	/* Silently drop packets which are too big. */
 	if (unlikely(wc.byte_len > qp->r_len)) {
-		qp->r_flags |= QIB_R_REUSE_SGE;
+		qp->r_flags |= RVT_R_REUSE_SGE;
 		goto drop;
 	}
 	if (has_grh) {
@@ -559,8 +565,8 @@
 	} else
 		qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
 	qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
-	qib_put_ss(&qp->r_sge);
-	if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+	rvt_put_ss(&qp->r_sge);
+	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 		return;
 	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
@@ -576,15 +582,15 @@
 	/*
 	 * Save the LMC lower bits if the destination LID is a unicast LID.
 	 */
-	wc.dlid_path_bits = dlid >= QIB_MULTICAST_LID_BASE ? 0 :
+	wc.dlid_path_bits = dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) ? 0 :
 		dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
 	wc.port_num = qp->port_num;
 	/* Signal completion event if the solicited bit is set. */
-	qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
 		     (ohdr->bth[0] &
 			cpu_to_be32(IB_BTH_SOLICITED)) != 0);
 	return;
 
 drop:
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 }

diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index baf1e42..cbf6200 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c

@@ -41,6 +41,7 @@
 #include <linux/mm.h>
 #include <linux/random.h>
 #include <linux/vmalloc.h>
+#include <rdma/rdma_vt.h>
 
 #include "qib.h"
 #include "qib_common.h"
@@ -49,8 +50,8 @@
 module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO);
 MODULE_PARM_DESC(qp_table_size, "QP table size");
 
-unsigned int ib_qib_lkey_table_size = 16;
-module_param_named(lkey_table_size, ib_qib_lkey_table_size, uint,
+static unsigned int qib_lkey_table_size = 16;
+module_param_named(lkey_table_size, qib_lkey_table_size, uint,
 		   S_IRUGO);
 MODULE_PARM_DESC(lkey_table_size,
 		 "LKEY table size in bits (2^n, 1 <= n <= 23)");
@@ -113,36 +114,6 @@
 MODULE_PARM_DESC(disable_sma, "Disable the SMA");
 
 /*
- * Note that it is OK to post send work requests in the SQE and ERR
- * states; qib_do_send() will process them and generate error
- * completions as per IB 1.2 C10-96.
- */
-const int ib_qib_state_ops[IB_QPS_ERR + 1] = {
-	[IB_QPS_RESET] = 0,
-	[IB_QPS_INIT] = QIB_POST_RECV_OK,
-	[IB_QPS_RTR] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK,
-	[IB_QPS_RTS] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
-	    QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK |
-	    QIB_PROCESS_NEXT_SEND_OK,
-	[IB_QPS_SQD] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
-	    QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK,
-	[IB_QPS_SQE] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
-	    QIB_POST_SEND_OK | QIB_FLUSH_SEND,
-	[IB_QPS_ERR] = QIB_POST_RECV_OK | QIB_FLUSH_RECV |
-	    QIB_POST_SEND_OK | QIB_FLUSH_SEND,
-};
-
-struct qib_ucontext {
-	struct ib_ucontext ibucontext;
-};
-
-static inline struct qib_ucontext *to_iucontext(struct ib_ucontext
-						  *ibucontext)
-{
-	return container_of(ibucontext, struct qib_ucontext, ibucontext);
-}
-
-/*
  * Translate ib_wr_opcode into ib_wc_opcode.
  */
 const enum ib_wc_opcode ib_qib_wc_opcode[] = {
@@ -166,9 +137,9 @@
  * @data: the data to copy
  * @length: the length of the data
  */
-void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release)
+void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, int release)
 {
-	struct qib_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
 
 	while (length) {
 		u32 len = sge->length;
@@ -184,11 +155,11 @@
 		sge->sge_length -= len;
 		if (sge->sge_length == 0) {
 			if (release)
-				qib_put_mr(sge->mr);
+				rvt_put_mr(sge->mr);
 			if (--ss->num_sge)
 				*sge = *ss->sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= QIB_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -208,9 +179,9 @@
  * @ss: the SGE state
  * @length: the number of bytes to skip
  */
-void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release)
+void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release)
 {
-	struct qib_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
 
 	while (length) {
 		u32 len = sge->length;
@@ -225,11 +196,11 @@
 		sge->sge_length -= len;
 		if (sge->sge_length == 0) {
 			if (release)
-				qib_put_mr(sge->mr);
+				rvt_put_mr(sge->mr);
 			if (--ss->num_sge)
 				*sge = *ss->sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= QIB_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -248,10 +219,10 @@
  * Don't modify the qib_sge_state to get the count.
  * Return zero if any of the segments is not aligned.
  */
-static u32 qib_count_sge(struct qib_sge_state *ss, u32 length)
+static u32 qib_count_sge(struct rvt_sge_state *ss, u32 length)
 {
-	struct qib_sge *sg_list = ss->sg_list;
-	struct qib_sge sge = ss->sge;
+	struct rvt_sge *sg_list = ss->sg_list;
+	struct rvt_sge sge = ss->sge;
 	u8 num_sge = ss->num_sge;
 	u32 ndesc = 1;  /* count the header */
 
@@ -276,7 +247,7 @@
 			if (--num_sge)
 				sge = *sg_list++;
 		} else if (sge.length == 0 && sge.mr->lkey) {
-			if (++sge.n >= QIB_SEGSZ) {
+			if (++sge.n >= RVT_SEGSZ) {
 				if (++sge.m >= sge.mr->mapsz)
 					break;
 				sge.n = 0;
@@ -294,9 +265,9 @@
 /*
  * Copy from the SGEs to the data buffer.
  */
-static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length)
+static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length)
 {
-	struct qib_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
 
 	while (length) {
 		u32 len = sge->length;
@@ -314,7 +285,7 @@
 			if (--ss->num_sge)
 				*sge = *ss->sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= QIB_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -330,242 +301,6 @@
 }
 
 /**
- * qib_post_one_send - post one RC, UC, or UD send work request
- * @qp: the QP to post on
- * @wr: the work request to send
- */
-static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
-	int *scheduled)
-{
-	struct qib_swqe *wqe;
-	u32 next;
-	int i;
-	int j;
-	int acc;
-	int ret;
-	unsigned long flags;
-	struct qib_lkey_table *rkt;
-	struct qib_pd *pd;
-	int avoid_schedule = 0;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	/* Check that state is OK to post send. */
-	if (unlikely(!(ib_qib_state_ops[qp->state] & QIB_POST_SEND_OK)))
-		goto bail_inval;
-
-	/* IB spec says that num_sge == 0 is OK. */
-	if (wr->num_sge > qp->s_max_sge)
-		goto bail_inval;
-
-	/*
-	 * Don't allow RDMA reads or atomic operations on UC or
-	 * undefined operations.
-	 * Make sure buffer is large enough to hold the result for atomics.
-	 */
-	if (wr->opcode == IB_WR_REG_MR) {
-		if (qib_reg_mr(qp, reg_wr(wr)))
-			goto bail_inval;
-	} else if (qp->ibqp.qp_type == IB_QPT_UC) {
-		if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
-			goto bail_inval;
-	} else if (qp->ibqp.qp_type != IB_QPT_RC) {
-		/* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
-		if (wr->opcode != IB_WR_SEND &&
-		    wr->opcode != IB_WR_SEND_WITH_IMM)
-			goto bail_inval;
-		/* Check UD destination address PD */
-		if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
-			goto bail_inval;
-	} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
-		goto bail_inval;
-	else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
-		   (wr->num_sge == 0 ||
-		    wr->sg_list[0].length < sizeof(u64) ||
-		    wr->sg_list[0].addr & (sizeof(u64) - 1)))
-		goto bail_inval;
-	else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
-		goto bail_inval;
-
-	next = qp->s_head + 1;
-	if (next >= qp->s_size)
-		next = 0;
-	if (next == qp->s_last) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	rkt = &to_idev(qp->ibqp.device)->lk_table;
-	pd = to_ipd(qp->ibqp.pd);
-	wqe = get_swqe_ptr(qp, qp->s_head);
-
-	if (qp->ibqp.qp_type != IB_QPT_UC &&
-	    qp->ibqp.qp_type != IB_QPT_RC)
-		memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
-	else if (wr->opcode == IB_WR_REG_MR)
-		memcpy(&wqe->reg_wr, reg_wr(wr),
-			sizeof(wqe->reg_wr));
-	else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
-		 wr->opcode == IB_WR_RDMA_WRITE ||
-		 wr->opcode == IB_WR_RDMA_READ)
-		memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
-	else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-		 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-		memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
-	else
-		memcpy(&wqe->wr, wr, sizeof(wqe->wr));
-
-	wqe->length = 0;
-	j = 0;
-	if (wr->num_sge) {
-		acc = wr->opcode >= IB_WR_RDMA_READ ?
-			IB_ACCESS_LOCAL_WRITE : 0;
-		for (i = 0; i < wr->num_sge; i++) {
-			u32 length = wr->sg_list[i].length;
-			int ok;
-
-			if (length == 0)
-				continue;
-			ok = qib_lkey_ok(rkt, pd, &wqe->sg_list[j],
-					 &wr->sg_list[i], acc);
-			if (!ok)
-				goto bail_inval_free;
-			wqe->length += length;
-			j++;
-		}
-		wqe->wr.num_sge = j;
-	}
-	if (qp->ibqp.qp_type == IB_QPT_UC ||
-	    qp->ibqp.qp_type == IB_QPT_RC) {
-		if (wqe->length > 0x80000000U)
-			goto bail_inval_free;
-		if (wqe->length <= qp->pmtu)
-			avoid_schedule = 1;
-	} else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport +
-				  qp->port_num - 1)->ibmtu) {
-		goto bail_inval_free;
-	} else {
-		atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount);
-		avoid_schedule = 1;
-	}
-	wqe->ssn = qp->s_ssn++;
-	qp->s_head = next;
-
-	ret = 0;
-	goto bail;
-
-bail_inval_free:
-	while (j) {
-		struct qib_sge *sge = &wqe->sg_list[--j];
-
-		qib_put_mr(sge->mr);
-	}
-bail_inval:
-	ret = -EINVAL;
-bail:
-	if (!ret && !wr->next && !avoid_schedule &&
-	 !qib_sdma_empty(
-	   dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) {
-		qib_schedule_send(qp);
-		*scheduled = 1;
-	}
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	return ret;
-}
-
-/**
- * qib_post_send - post a send on a QP
- * @ibqp: the QP to post the send on
- * @wr: the list of work requests to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-			 struct ib_send_wr **bad_wr)
-{
-	struct qib_qp *qp = to_iqp(ibqp);
-	int err = 0;
-	int scheduled = 0;
-
-	for (; wr; wr = wr->next) {
-		err = qib_post_one_send(qp, wr, &scheduled);
-		if (err) {
-			*bad_wr = wr;
-			goto bail;
-		}
-	}
-
-	/* Try to do the send work in the caller's context. */
-	if (!scheduled)
-		qib_do_send(&qp->s_work);
-
-bail:
-	return err;
-}
-
-/**
- * qib_post_receive - post a receive on a QP
- * @ibqp: the QP to post the receive on
- * @wr: the WR to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int qib_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			    struct ib_recv_wr **bad_wr)
-{
-	struct qib_qp *qp = to_iqp(ibqp);
-	struct qib_rwq *wq = qp->r_rq.wq;
-	unsigned long flags;
-	int ret;
-
-	/* Check that state is OK to post receive. */
-	if (!(ib_qib_state_ops[qp->state] & QIB_POST_RECV_OK) || !wq) {
-		*bad_wr = wr;
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	for (; wr; wr = wr->next) {
-		struct qib_rwqe *wqe;
-		u32 next;
-		int i;
-
-		if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
-			*bad_wr = wr;
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		spin_lock_irqsave(&qp->r_rq.lock, flags);
-		next = wq->head + 1;
-		if (next >= qp->r_rq.size)
-			next = 0;
-		if (next == wq->tail) {
-			spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-			*bad_wr = wr;
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
-		wqe->wr_id = wr->wr_id;
-		wqe->num_sge = wr->num_sge;
-		for (i = 0; i < wr->num_sge; i++)
-			wqe->sg_list[i] = wr->sg_list[i];
-		/* Make sure queue entry is written before the head index. */
-		smp_wmb();
-		wq->head = next;
-		spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-	}
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
  * qib_qp_rcv - processing an incoming packet on a QP
  * @rcd: the context pointer
  * @hdr: the packet header
@@ -579,15 +314,15 @@
  * Called at interrupt level.
  */
 static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
-		       int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+		       int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
 {
 	struct qib_ibport *ibp = &rcd->ppd->ibport_data;
 
 	spin_lock(&qp->r_lock);
 
 	/* Check for valid receive state. */
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
-		ibp->n_pkt_drops++;
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
+		ibp->rvp.n_pkt_drops++;
 		goto unlock;
 	}
 
@@ -632,8 +367,10 @@
 	struct qib_pportdata *ppd = rcd->ppd;
 	struct qib_ibport *ibp = &ppd->ibport_data;
 	struct qib_ib_header *hdr = rhdr;
+	struct qib_devdata *dd = ppd->dd;
+	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 	struct qib_other_headers *ohdr;
-	struct qib_qp *qp;
+	struct rvt_qp *qp;
 	u32 qp_num;
 	int lnh;
 	u8 opcode;
@@ -645,7 +382,7 @@
 
 	/* Check for a valid destination LID (see ch. 7.11.1). */
 	lid = be16_to_cpu(hdr->lrh[1]);
-	if (lid < QIB_MULTICAST_LID_BASE) {
+	if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
 		lid &= ~((1 << ppd->lmc) - 1);
 		if (unlikely(lid != ppd->lid))
 			goto drop;
@@ -674,50 +411,40 @@
 #endif
 
 	/* Get the destination QP number. */
-	qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
+	qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
 	if (qp_num == QIB_MULTICAST_QPN) {
-		struct qib_mcast *mcast;
-		struct qib_mcast_qp *p;
+		struct rvt_mcast *mcast;
+		struct rvt_mcast_qp *p;
 
 		if (lnh != QIB_LRH_GRH)
 			goto drop;
-		mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid);
+		mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid);
 		if (mcast == NULL)
 			goto drop;
 		this_cpu_inc(ibp->pmastats->n_multicast_rcv);
 		list_for_each_entry_rcu(p, &mcast->qp_list, list)
 			qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
 		/*
-		 * Notify qib_multicast_detach() if it is waiting for us
+		 * Notify rvt_multicast_detach() if it is waiting for us
 		 * to finish.
 		 */
 		if (atomic_dec_return(&mcast->refcount) <= 1)
 			wake_up(&mcast->wait);
 	} else {
-		if (rcd->lookaside_qp) {
-			if (rcd->lookaside_qpn != qp_num) {
-				if (atomic_dec_and_test(
-					&rcd->lookaside_qp->refcount))
-					wake_up(
-					 &rcd->lookaside_qp->wait);
-				rcd->lookaside_qp = NULL;
-			}
+		rcu_read_lock();
+		qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
+		if (!qp) {
+			rcu_read_unlock();
+			goto drop;
 		}
-		if (!rcd->lookaside_qp) {
-			qp = qib_lookup_qpn(ibp, qp_num);
-			if (!qp)
-				goto drop;
-			rcd->lookaside_qp = qp;
-			rcd->lookaside_qpn = qp_num;
-		} else
-			qp = rcd->lookaside_qp;
 		this_cpu_inc(ibp->pmastats->n_unicast_rcv);
 		qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
+		rcu_read_unlock();
 	}
 	return;
 
 drop:
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 }
 
 /*
@@ -728,23 +455,25 @@
 {
 	struct qib_ibdev *dev = (struct qib_ibdev *) data;
 	struct list_head *list = &dev->memwait;
-	struct qib_qp *qp = NULL;
+	struct rvt_qp *qp = NULL;
+	struct qib_qp_priv *priv = NULL;
 	unsigned long flags;
 
-	spin_lock_irqsave(&dev->pending_lock, flags);
+	spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 	if (!list_empty(list)) {
-		qp = list_entry(list->next, struct qib_qp, iowait);
-		list_del_init(&qp->iowait);
+		priv = list_entry(list->next, struct qib_qp_priv, iowait);
+		qp = priv->owner;
+		list_del_init(&priv->iowait);
 		atomic_inc(&qp->refcount);
 		if (!list_empty(list))
 			mod_timer(&dev->mem_timer, jiffies + 1);
 	}
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
+	spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 
 	if (qp) {
 		spin_lock_irqsave(&qp->s_lock, flags);
-		if (qp->s_flags & QIB_S_WAIT_KMEM) {
-			qp->s_flags &= ~QIB_S_WAIT_KMEM;
+		if (qp->s_flags & RVT_S_WAIT_KMEM) {
+			qp->s_flags &= ~RVT_S_WAIT_KMEM;
 			qib_schedule_send(qp);
 		}
 		spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -753,9 +482,9 @@
 	}
 }
 
-static void update_sge(struct qib_sge_state *ss, u32 length)
+static void update_sge(struct rvt_sge_state *ss, u32 length)
 {
-	struct qib_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
 
 	sge->vaddr += length;
 	sge->length -= length;
@@ -764,7 +493,7 @@
 		if (--ss->num_sge)
 			*sge = *ss->sg_list++;
 	} else if (sge->length == 0 && sge->mr->lkey) {
-		if (++sge->n >= QIB_SEGSZ) {
+		if (++sge->n >= RVT_SEGSZ) {
 			if (++sge->m >= sge->mr->mapsz)
 				return;
 			sge->n = 0;
@@ -810,7 +539,7 @@
 }
 #endif
 
-static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
+static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss,
 		    u32 length, unsigned flush_wc)
 {
 	u32 extra = 0;
@@ -947,30 +676,31 @@
 }
 
 static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev,
-					   struct qib_qp *qp)
+					   struct rvt_qp *qp)
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_verbs_txreq *tx;
 	unsigned long flags;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	spin_lock(&dev->pending_lock);
+	spin_lock(&dev->rdi.pending_lock);
 
 	if (!list_empty(&dev->txreq_free)) {
 		struct list_head *l = dev->txreq_free.next;
 
 		list_del(l);
-		spin_unlock(&dev->pending_lock);
+		spin_unlock(&dev->rdi.pending_lock);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 		tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
 	} else {
-		if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK &&
-		    list_empty(&qp->iowait)) {
+		if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK &&
+		    list_empty(&priv->iowait)) {
 			dev->n_txwait++;
-			qp->s_flags |= QIB_S_WAIT_TX;
-			list_add_tail(&qp->iowait, &dev->txwait);
+			qp->s_flags |= RVT_S_WAIT_TX;
+			list_add_tail(&priv->iowait, &dev->txwait);
 		}
-		qp->s_flags &= ~QIB_S_BUSY;
-		spin_unlock(&dev->pending_lock);
+		qp->s_flags &= ~RVT_S_BUSY;
+		spin_unlock(&dev->rdi.pending_lock);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 		tx = ERR_PTR(-EBUSY);
 	}
@@ -978,22 +708,22 @@
 }
 
 static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
-					 struct qib_qp *qp)
+					 struct rvt_qp *qp)
 {
 	struct qib_verbs_txreq *tx;
 	unsigned long flags;
 
-	spin_lock_irqsave(&dev->pending_lock, flags);
+	spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 	/* assume the list non empty */
 	if (likely(!list_empty(&dev->txreq_free))) {
 		struct list_head *l = dev->txreq_free.next;
 
 		list_del(l);
-		spin_unlock_irqrestore(&dev->pending_lock, flags);
+		spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 		tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
 	} else {
 		/* call slow path to get the extra lock */
-		spin_unlock_irqrestore(&dev->pending_lock, flags);
+		spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 		tx =  __get_txreq(dev, qp);
 	}
 	return tx;
@@ -1002,16 +732,15 @@
 void qib_put_txreq(struct qib_verbs_txreq *tx)
 {
 	struct qib_ibdev *dev;
-	struct qib_qp *qp;
+	struct rvt_qp *qp;
+	struct qib_qp_priv *priv;
 	unsigned long flags;
 
 	qp = tx->qp;
 	dev = to_idev(qp->ibqp.device);
 
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
 	if (tx->mr) {
-		qib_put_mr(tx->mr);
+		rvt_put_mr(tx->mr);
 		tx->mr = NULL;
 	}
 	if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) {
@@ -1022,21 +751,23 @@
 		kfree(tx->align_buf);
 	}
 
-	spin_lock_irqsave(&dev->pending_lock, flags);
+	spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 
 	/* Put struct back on free list */
 	list_add(&tx->txreq.list, &dev->txreq_free);
 
 	if (!list_empty(&dev->txwait)) {
 		/* Wake up first QP wanting a free struct */
-		qp = list_entry(dev->txwait.next, struct qib_qp, iowait);
-		list_del_init(&qp->iowait);
+		priv = list_entry(dev->txwait.next, struct qib_qp_priv,
+				  iowait);
+		qp = priv->owner;
+		list_del_init(&priv->iowait);
 		atomic_inc(&qp->refcount);
-		spin_unlock_irqrestore(&dev->pending_lock, flags);
+		spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 
 		spin_lock_irqsave(&qp->s_lock, flags);
-		if (qp->s_flags & QIB_S_WAIT_TX) {
-			qp->s_flags &= ~QIB_S_WAIT_TX;
+		if (qp->s_flags & RVT_S_WAIT_TX) {
+			qp->s_flags &= ~RVT_S_WAIT_TX;
 			qib_schedule_send(qp);
 		}
 		spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1044,7 +775,7 @@
 		if (atomic_dec_and_test(&qp->refcount))
 			wake_up(&qp->wait);
 	} else
-		spin_unlock_irqrestore(&dev->pending_lock, flags);
+		spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 }
 
 /*
@@ -1055,36 +786,39 @@
  */
 void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail)
 {
-	struct qib_qp *qp, *nqp;
-	struct qib_qp *qps[20];
+	struct rvt_qp *qp, *nqp;
+	struct qib_qp_priv *qpp, *nqpp;
+	struct rvt_qp *qps[20];
 	struct qib_ibdev *dev;
 	unsigned i, n;
 
 	n = 0;
 	dev = &ppd->dd->verbs_dev;
-	spin_lock(&dev->pending_lock);
+	spin_lock(&dev->rdi.pending_lock);
 
 	/* Search wait list for first QP wanting DMA descriptors. */
-	list_for_each_entry_safe(qp, nqp, &dev->dmawait, iowait) {
+	list_for_each_entry_safe(qpp, nqpp, &dev->dmawait, iowait) {
+		qp = qpp->owner;
+		nqp = nqpp->owner;
 		if (qp->port_num != ppd->port)
 			continue;
 		if (n == ARRAY_SIZE(qps))
 			break;
-		if (qp->s_tx->txreq.sg_count > avail)
+		if (qpp->s_tx->txreq.sg_count > avail)
 			break;
-		avail -= qp->s_tx->txreq.sg_count;
-		list_del_init(&qp->iowait);
+		avail -= qpp->s_tx->txreq.sg_count;
+		list_del_init(&qpp->iowait);
 		atomic_inc(&qp->refcount);
 		qps[n++] = qp;
 	}
 
-	spin_unlock(&dev->pending_lock);
+	spin_unlock(&dev->rdi.pending_lock);
 
 	for (i = 0; i < n; i++) {
 		qp = qps[i];
 		spin_lock(&qp->s_lock);
-		if (qp->s_flags & QIB_S_WAIT_DMA_DESC) {
-			qp->s_flags &= ~QIB_S_WAIT_DMA_DESC;
+		if (qp->s_flags & RVT_S_WAIT_DMA_DESC) {
+			qp->s_flags &= ~RVT_S_WAIT_DMA_DESC;
 			qib_schedule_send(qp);
 		}
 		spin_unlock(&qp->s_lock);
@@ -1100,7 +834,8 @@
 {
 	struct qib_verbs_txreq *tx =
 		container_of(cookie, struct qib_verbs_txreq, txreq);
-	struct qib_qp *qp = tx->qp;
+	struct rvt_qp *qp = tx->qp;
+	struct qib_qp_priv *priv = qp->priv;
 
 	spin_lock(&qp->s_lock);
 	if (tx->wqe)
@@ -1117,11 +852,11 @@
 		}
 		qib_rc_send_complete(qp, hdr);
 	}
-	if (atomic_dec_and_test(&qp->s_dma_busy)) {
+	if (atomic_dec_and_test(&priv->s_dma_busy)) {
 		if (qp->state == IB_QPS_RESET)
-			wake_up(&qp->wait_dma);
-		else if (qp->s_flags & QIB_S_WAIT_DMA) {
-			qp->s_flags &= ~QIB_S_WAIT_DMA;
+			wake_up(&priv->wait_dma);
+		else if (qp->s_flags & RVT_S_WAIT_DMA) {
+			qp->s_flags &= ~RVT_S_WAIT_DMA;
 			qib_schedule_send(qp);
 		}
 	}
@@ -1130,22 +865,23 @@
 	qib_put_txreq(tx);
 }
 
-static int wait_kmem(struct qib_ibdev *dev, struct qib_qp *qp)
+static int wait_kmem(struct qib_ibdev *dev, struct rvt_qp *qp)
 {
+	struct qib_qp_priv *priv = qp->priv;
 	unsigned long flags;
 	int ret = 0;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
-		spin_lock(&dev->pending_lock);
-		if (list_empty(&qp->iowait)) {
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
+		spin_lock(&dev->rdi.pending_lock);
+		if (list_empty(&priv->iowait)) {
 			if (list_empty(&dev->memwait))
 				mod_timer(&dev->mem_timer, jiffies + 1);
-			qp->s_flags |= QIB_S_WAIT_KMEM;
-			list_add_tail(&qp->iowait, &dev->memwait);
+			qp->s_flags |= RVT_S_WAIT_KMEM;
+			list_add_tail(&priv->iowait, &dev->memwait);
 		}
-		spin_unlock(&dev->pending_lock);
-		qp->s_flags &= ~QIB_S_BUSY;
+		spin_unlock(&dev->rdi.pending_lock);
+		qp->s_flags &= ~RVT_S_BUSY;
 		ret = -EBUSY;
 	}
 	spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1153,10 +889,11 @@
 	return ret;
 }
 
-static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
-			      u32 hdrwords, struct qib_sge_state *ss, u32 len,
+static int qib_verbs_send_dma(struct rvt_qp *qp, struct qib_ib_header *hdr,
+			      u32 hdrwords, struct rvt_sge_state *ss, u32 len,
 			      u32 plen, u32 dwords)
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
@@ -1167,9 +904,9 @@
 	u32 ndesc;
 	int ret;
 
-	tx = qp->s_tx;
+	tx = priv->s_tx;
 	if (tx) {
-		qp->s_tx = NULL;
+		priv->s_tx = NULL;
 		/* resend previously constructed packet */
 		ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx);
 		goto bail;
@@ -1182,7 +919,6 @@
 	control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
 				       be16_to_cpu(hdr->lrh[0]) >> 12);
 	tx->qp = qp;
-	atomic_inc(&qp->refcount);
 	tx->wqe = qp->s_wqe;
 	tx->mr = qp->s_rdma_mr;
 	if (qp->s_rdma_mr)
@@ -1245,7 +981,7 @@
 	qib_put_txreq(tx);
 	ret = wait_kmem(dev, qp);
 unaligned:
-	ibp->n_unaligned++;
+	ibp->rvp.n_unaligned++;
 bail:
 	return ret;
 bail_tx:
@@ -1257,8 +993,9 @@
  * If we are now in the error state, return zero to flush the
  * send work request.
  */
-static int no_bufs_available(struct qib_qp *qp)
+static int no_bufs_available(struct rvt_qp *qp)
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 	struct qib_devdata *dd;
 	unsigned long flags;
@@ -1271,25 +1008,25 @@
 	 * enabling the PIO avail interrupt.
 	 */
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
-		spin_lock(&dev->pending_lock);
-		if (list_empty(&qp->iowait)) {
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
+		spin_lock(&dev->rdi.pending_lock);
+		if (list_empty(&priv->iowait)) {
 			dev->n_piowait++;
-			qp->s_flags |= QIB_S_WAIT_PIO;
-			list_add_tail(&qp->iowait, &dev->piowait);
+			qp->s_flags |= RVT_S_WAIT_PIO;
+			list_add_tail(&priv->iowait, &dev->piowait);
 			dd = dd_from_dev(dev);
 			dd->f_wantpiobuf_intr(dd, 1);
 		}
-		spin_unlock(&dev->pending_lock);
-		qp->s_flags &= ~QIB_S_BUSY;
+		spin_unlock(&dev->rdi.pending_lock);
+		qp->s_flags &= ~RVT_S_BUSY;
 		ret = -EBUSY;
 	}
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 	return ret;
 }
 
-static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr,
-			      u32 hdrwords, struct qib_sge_state *ss, u32 len,
+static int qib_verbs_send_pio(struct rvt_qp *qp, struct qib_ib_header *ibhdr,
+			      u32 hdrwords, struct rvt_sge_state *ss, u32 len,
 			      u32 plen, u32 dwords)
 {
 	struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
@@ -1370,7 +1107,7 @@
 	}
 	qib_sendbuf_done(dd, pbufn);
 	if (qp->s_rdma_mr) {
-		qib_put_mr(qp->s_rdma_mr);
+		rvt_put_mr(qp->s_rdma_mr);
 		qp->s_rdma_mr = NULL;
 	}
 	if (qp->s_wqe) {
@@ -1394,10 +1131,10 @@
  * @len: the length of the packet in bytes
  *
  * Return zero if packet is sent or queued OK.
- * Return non-zero and clear qp->s_flags QIB_S_BUSY otherwise.
+ * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise.
  */
-int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr,
-		   u32 hdrwords, struct qib_sge_state *ss, u32 len)
+int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr,
+		   u32 hdrwords, struct rvt_sge_state *ss, u32 len)
 {
 	struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 	u32 plen;
@@ -1529,10 +1266,11 @@
 {
 	struct qib_ibdev *dev = &dd->verbs_dev;
 	struct list_head *list;
-	struct qib_qp *qps[5];
-	struct qib_qp *qp;
+	struct rvt_qp *qps[5];
+	struct rvt_qp *qp;
 	unsigned long flags;
 	unsigned i, n;
+	struct qib_qp_priv *priv;
 
 	list = &dev->piowait;
 	n = 0;
@@ -1543,25 +1281,26 @@
 	 * could end up with QPs on the wait list with the interrupt
 	 * disabled.
 	 */
-	spin_lock_irqsave(&dev->pending_lock, flags);
+	spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 	while (!list_empty(list)) {
 		if (n == ARRAY_SIZE(qps))
 			goto full;
-		qp = list_entry(list->next, struct qib_qp, iowait);
-		list_del_init(&qp->iowait);
+		priv = list_entry(list->next, struct qib_qp_priv, iowait);
+		qp = priv->owner;
+		list_del_init(&priv->iowait);
 		atomic_inc(&qp->refcount);
 		qps[n++] = qp;
 	}
 	dd->f_wantpiobuf_intr(dd, 0);
 full:
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
+	spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 
 	for (i = 0; i < n; i++) {
 		qp = qps[i];
 
 		spin_lock_irqsave(&qp->s_lock, flags);
-		if (qp->s_flags & QIB_S_WAIT_PIO) {
-			qp->s_flags &= ~QIB_S_WAIT_PIO;
+		if (qp->s_flags & RVT_S_WAIT_PIO) {
+			qp->s_flags &= ~RVT_S_WAIT_PIO;
 			qib_schedule_send(qp);
 		}
 		spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1572,82 +1311,24 @@
 	}
 }
 
-static int qib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-			    struct ib_udata *uhw)
-{
-	struct qib_devdata *dd = dd_from_ibdev(ibdev);
-	struct qib_ibdev *dev = to_idev(ibdev);
-
-	if (uhw->inlen || uhw->outlen)
-		return -EINVAL;
-	memset(props, 0, sizeof(*props));
-
-	props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
-		IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
-		IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
-		IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
-	props->page_size_cap = PAGE_SIZE;
-	props->vendor_id =
-		QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3;
-	props->vendor_part_id = dd->deviceid;
-	props->hw_ver = dd->minrev;
-	props->sys_image_guid = ib_qib_sys_image_guid;
-	props->max_mr_size = ~0ULL;
-	props->max_qp = ib_qib_max_qps;
-	props->max_qp_wr = ib_qib_max_qp_wrs;
-	props->max_sge = ib_qib_max_sges;
-	props->max_sge_rd = ib_qib_max_sges;
-	props->max_cq = ib_qib_max_cqs;
-	props->max_ah = ib_qib_max_ahs;
-	props->max_cqe = ib_qib_max_cqes;
-	props->max_mr = dev->lk_table.max;
-	props->max_fmr = dev->lk_table.max;
-	props->max_map_per_fmr = 32767;
-	props->max_pd = ib_qib_max_pds;
-	props->max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC;
-	props->max_qp_init_rd_atom = 255;
-	/* props->max_res_rd_atom */
-	props->max_srq = ib_qib_max_srqs;
-	props->max_srq_wr = ib_qib_max_srq_wrs;
-	props->max_srq_sge = ib_qib_max_srq_sges;
-	/* props->local_ca_ack_delay */
-	props->atomic_cap = IB_ATOMIC_GLOB;
-	props->max_pkeys = qib_get_npkeys(dd);
-	props->max_mcast_grp = ib_qib_max_mcast_grps;
-	props->max_mcast_qp_attach = ib_qib_max_mcast_qp_attached;
-	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
-		props->max_mcast_grp;
-
-	return 0;
-}
-
-static int qib_query_port(struct ib_device *ibdev, u8 port,
+static int qib_query_port(struct rvt_dev_info *rdi, u8 port_num,
 			  struct ib_port_attr *props)
 {
-	struct qib_devdata *dd = dd_from_ibdev(ibdev);
-	struct qib_ibport *ibp = to_iport(ibdev, port);
-	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = dd_from_dev(ibdev);
+	struct qib_pportdata *ppd = &dd->pport[port_num - 1];
 	enum ib_mtu mtu;
 	u16 lid = ppd->lid;
 
-	memset(props, 0, sizeof(*props));
 	props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
 	props->lmc = ppd->lmc;
-	props->sm_lid = ibp->sm_lid;
-	props->sm_sl = ibp->sm_sl;
 	props->state = dd->f_iblink_state(ppd->lastibcstat);
 	props->phys_state = dd->f_ibphys_portstate(ppd->lastibcstat);
-	props->port_cap_flags = ibp->port_cap_flags;
 	props->gid_tbl_len = QIB_GUIDS_PER_PORT;
-	props->max_msg_sz = 0x80000000;
-	props->pkey_tbl_len = qib_get_npkeys(dd);
-	props->bad_pkey_cntr = ibp->pkey_violations;
-	props->qkey_viol_cntr = ibp->qkey_violations;
 	props->active_width = ppd->link_width_active;
 	/* See rate_show() */
 	props->active_speed = ppd->link_speed_active;
 	props->max_vl_num = qib_num_vls(ppd->vls_supported);
-	props->init_type_reply = 0;
 
 	props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096;
 	switch (ppd->ibmtu) {
@@ -1670,7 +1351,6 @@
 		mtu = IB_MTU_2048;
 	}
 	props->active_mtu = mtu;
-	props->subnet_timeout = ibp->subnet_timeout;
 
 	return 0;
 }
@@ -1714,185 +1394,70 @@
 	return ret;
 }
 
-static int qib_modify_port(struct ib_device *ibdev, u8 port,
-			   int port_modify_mask, struct ib_port_modify *props)
+static int qib_shut_down_port(struct rvt_dev_info *rdi, u8 port_num)
 {
-	struct qib_ibport *ibp = to_iport(ibdev, port);
-	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = dd_from_dev(ibdev);
+	struct qib_pportdata *ppd = &dd->pport[port_num - 1];
 
-	ibp->port_cap_flags |= props->set_port_cap_mask;
-	ibp->port_cap_flags &= ~props->clr_port_cap_mask;
-	if (props->set_port_cap_mask || props->clr_port_cap_mask)
-		qib_cap_mask_chg(ibp);
-	if (port_modify_mask & IB_PORT_SHUTDOWN)
-		qib_set_linkstate(ppd, QIB_IB_LINKDOWN);
-	if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
-		ibp->qkey_violations = 0;
+	qib_set_linkstate(ppd, QIB_IB_LINKDOWN);
+
 	return 0;
 }
 
-static int qib_query_gid(struct ib_device *ibdev, u8 port,
-			 int index, union ib_gid *gid)
+static int qib_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
+			   int guid_index, __be64 *guid)
 {
-	struct qib_devdata *dd = dd_from_ibdev(ibdev);
-	int ret = 0;
+	struct qib_ibport *ibp = container_of(rvp, struct qib_ibport, rvp);
+	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 
-	if (!port || port > dd->num_pports)
-		ret = -EINVAL;
-	else {
-		struct qib_ibport *ibp = to_iport(ibdev, port);
-		struct qib_pportdata *ppd = ppd_from_ibp(ibp);
-
-		gid->global.subnet_prefix = ibp->gid_prefix;
-		if (index == 0)
-			gid->global.interface_id = ppd->guid;
-		else if (index < QIB_GUIDS_PER_PORT)
-			gid->global.interface_id = ibp->guids[index - 1];
-		else
-			ret = -EINVAL;
-	}
-
-	return ret;
-}
-
-static struct ib_pd *qib_alloc_pd(struct ib_device *ibdev,
-				  struct ib_ucontext *context,
-				  struct ib_udata *udata)
-{
-	struct qib_ibdev *dev = to_idev(ibdev);
-	struct qib_pd *pd;
-	struct ib_pd *ret;
-
-	/*
-	 * This is actually totally arbitrary.  Some correctness tests
-	 * assume there's a maximum number of PDs that can be allocated.
-	 * We don't actually have this limit, but we fail the test if
-	 * we allow allocations of more than we report for this value.
-	 */
-
-	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	spin_lock(&dev->n_pds_lock);
-	if (dev->n_pds_allocated == ib_qib_max_pds) {
-		spin_unlock(&dev->n_pds_lock);
-		kfree(pd);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	dev->n_pds_allocated++;
-	spin_unlock(&dev->n_pds_lock);
-
-	/* ib_alloc_pd() will initialize pd->ibpd. */
-	pd->user = udata != NULL;
-
-	ret = &pd->ibpd;
-
-bail:
-	return ret;
-}
-
-static int qib_dealloc_pd(struct ib_pd *ibpd)
-{
-	struct qib_pd *pd = to_ipd(ibpd);
-	struct qib_ibdev *dev = to_idev(ibpd->device);
-
-	spin_lock(&dev->n_pds_lock);
-	dev->n_pds_allocated--;
-	spin_unlock(&dev->n_pds_lock);
-
-	kfree(pd);
+	if (guid_index == 0)
+		*guid = ppd->guid;
+	else if (guid_index < QIB_GUIDS_PER_PORT)
+		*guid = ibp->guids[guid_index - 1];
+	else
+		return -EINVAL;
 
 	return 0;
 }
 
 int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
 {
-	/* A multicast address requires a GRH (see ch. 8.4.1). */
-	if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
-	    ah_attr->dlid != QIB_PERMISSIVE_LID &&
-	    !(ah_attr->ah_flags & IB_AH_GRH))
-		goto bail;
-	if ((ah_attr->ah_flags & IB_AH_GRH) &&
-	    ah_attr->grh.sgid_index >= QIB_GUIDS_PER_PORT)
-		goto bail;
-	if (ah_attr->dlid == 0)
-		goto bail;
-	if (ah_attr->port_num < 1 ||
-	    ah_attr->port_num > ibdev->phys_port_cnt)
-		goto bail;
-	if (ah_attr->static_rate != IB_RATE_PORT_CURRENT &&
-	    ib_rate_to_mult(ah_attr->static_rate) < 0)
-		goto bail;
 	if (ah_attr->sl > 15)
-		goto bail;
+		return -EINVAL;
+
 	return 0;
-bail:
-	return -EINVAL;
 }
 
-/**
- * qib_create_ah - create an address handle
- * @pd: the protection domain
- * @ah_attr: the attributes of the AH
- *
- * This may be called from interrupt context.
- */
-static struct ib_ah *qib_create_ah(struct ib_pd *pd,
-				   struct ib_ah_attr *ah_attr)
+static void qib_notify_new_ah(struct ib_device *ibdev,
+			      struct ib_ah_attr *ah_attr,
+			      struct rvt_ah *ah)
 {
-	struct qib_ah *ah;
-	struct ib_ah *ret;
-	struct qib_ibdev *dev = to_idev(pd->device);
-	unsigned long flags;
+	struct qib_ibport *ibp;
+	struct qib_pportdata *ppd;
 
-	if (qib_check_ah(pd->device, ah_attr)) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
+	/*
+	 * Do not trust reading anything from rvt_ah at this point as it is not
+	 * done being setup. We can however modify things which we need to set.
+	 */
 
-	ah = kmalloc(sizeof(*ah), GFP_ATOMIC);
-	if (!ah) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	spin_lock_irqsave(&dev->n_ahs_lock, flags);
-	if (dev->n_ahs_allocated == ib_qib_max_ahs) {
-		spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-		kfree(ah);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	dev->n_ahs_allocated++;
-	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-	/* ib_create_ah() will initialize ah->ibah. */
-	ah->attr = *ah_attr;
-	atomic_set(&ah->refcount, 0);
-
-	ret = &ah->ibah;
-
-bail:
-	return ret;
+	ibp = to_iport(ibdev, ah_attr->port_num);
+	ppd = ppd_from_ibp(ibp);
+	ah->vl = ibp->sl_to_vl[ah->attr.sl];
+	ah->log_pmtu = ilog2(ppd->ibmtu);
 }
 
 struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
 {
 	struct ib_ah_attr attr;
 	struct ib_ah *ah = ERR_PTR(-EINVAL);
-	struct qib_qp *qp0;
+	struct rvt_qp *qp0;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.dlid = dlid;
 	attr.port_num = ppd_from_ibp(ibp)->port;
 	rcu_read_lock();
-	qp0 = rcu_dereference(ibp->qp0);
+	qp0 = rcu_dereference(ibp->rvp.qp[0]);
 	if (qp0)
 		ah = ib_create_ah(qp0->ibqp.pd, &attr);
 	rcu_read_unlock();
@@ -1900,51 +1465,6 @@
 }
 
 /**
- * qib_destroy_ah - destroy an address handle
- * @ibah: the AH to destroy
- *
- * This may be called from interrupt context.
- */
-static int qib_destroy_ah(struct ib_ah *ibah)
-{
-	struct qib_ibdev *dev = to_idev(ibah->device);
-	struct qib_ah *ah = to_iah(ibah);
-	unsigned long flags;
-
-	if (atomic_read(&ah->refcount) != 0)
-		return -EBUSY;
-
-	spin_lock_irqsave(&dev->n_ahs_lock, flags);
-	dev->n_ahs_allocated--;
-	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-	kfree(ah);
-
-	return 0;
-}
-
-static int qib_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
-	struct qib_ah *ah = to_iah(ibah);
-
-	if (qib_check_ah(ibah->device, ah_attr))
-		return -EINVAL;
-
-	ah->attr = *ah_attr;
-
-	return 0;
-}
-
-static int qib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
-	struct qib_ah *ah = to_iah(ibah);
-
-	*ah_attr = ah->attr;
-
-	return 0;
-}
-
-/**
  * qib_get_npkeys - return the size of the PKEY table for context 0
  * @dd: the qlogic_ib device
  */
@@ -1973,75 +1493,27 @@
 	return ret;
 }
 
-static int qib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
-			  u16 *pkey)
-{
-	struct qib_devdata *dd = dd_from_ibdev(ibdev);
-	int ret;
-
-	if (index >= qib_get_npkeys(dd)) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	*pkey = qib_get_pkey(to_iport(ibdev, port), index);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * qib_alloc_ucontext - allocate a ucontest
- * @ibdev: the infiniband device
- * @udata: not used by the QLogic_IB driver
- */
-
-static struct ib_ucontext *qib_alloc_ucontext(struct ib_device *ibdev,
-					      struct ib_udata *udata)
-{
-	struct qib_ucontext *context;
-	struct ib_ucontext *ret;
-
-	context = kmalloc(sizeof(*context), GFP_KERNEL);
-	if (!context) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	ret = &context->ibucontext;
-
-bail:
-	return ret;
-}
-
-static int qib_dealloc_ucontext(struct ib_ucontext *context)
-{
-	kfree(to_iucontext(context));
-	return 0;
-}
-
 static void init_ibport(struct qib_pportdata *ppd)
 {
 	struct qib_verbs_counters cntrs;
 	struct qib_ibport *ibp = &ppd->ibport_data;
 
-	spin_lock_init(&ibp->lock);
+	spin_lock_init(&ibp->rvp.lock);
 	/* Set the prefix to the default value (see ch. 4.1.1) */
-	ibp->gid_prefix = IB_DEFAULT_GID_PREFIX;
-	ibp->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
-	ibp->port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP |
+	ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX;
+	ibp->rvp.sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
+	ibp->rvp.port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP |
 		IB_PORT_CLIENT_REG_SUP | IB_PORT_SL_MAP_SUP |
 		IB_PORT_TRAP_SUP | IB_PORT_AUTO_MIGR_SUP |
 		IB_PORT_DR_NOTICE_SUP | IB_PORT_CAP_MASK_NOTICE_SUP |
 		IB_PORT_OTHER_LOCAL_CHANGES_SUP;
 	if (ppd->dd->flags & QIB_HAS_LINK_LATENCY)
-		ibp->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
-	ibp->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
-	ibp->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
-	ibp->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
-	ibp->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
-	ibp->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
+		ibp->rvp.port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
+	ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
+	ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
+	ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
+	ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
+	ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
 
 	/* Snapshot current HW counters to "clear" them. */
 	qib_get_counters(ppd, &cntrs);
@@ -2061,26 +1533,55 @@
 	ibp->z_excessive_buffer_overrun_errors =
 		cntrs.excessive_buffer_overrun_errors;
 	ibp->z_vl15_dropped = cntrs.vl15_dropped;
-	RCU_INIT_POINTER(ibp->qp0, NULL);
-	RCU_INIT_POINTER(ibp->qp1, NULL);
+	RCU_INIT_POINTER(ibp->rvp.qp[0], NULL);
+	RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
 }
 
-static int qib_port_immutable(struct ib_device *ibdev, u8 port_num,
-			      struct ib_port_immutable *immutable)
+/**
+ * qib_fill_device_attr - Fill in rvt dev info device attributes.
+ * @dd: the device data structure
+ */
+static void qib_fill_device_attr(struct qib_devdata *dd)
 {
-	struct ib_port_attr attr;
-	int err;
+	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 
-	err = qib_query_port(ibdev, port_num, &attr);
-	if (err)
-		return err;
+	memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
 
-	immutable->pkey_tbl_len = attr.pkey_tbl_len;
-	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
-	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
-
-	return 0;
+	rdi->dparms.props.max_pd = ib_qib_max_pds;
+	rdi->dparms.props.max_ah = ib_qib_max_ahs;
+	rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
+		IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
+		IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
+		IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
+	rdi->dparms.props.page_size_cap = PAGE_SIZE;
+	rdi->dparms.props.vendor_id =
+		QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3;
+	rdi->dparms.props.vendor_part_id = dd->deviceid;
+	rdi->dparms.props.hw_ver = dd->minrev;
+	rdi->dparms.props.sys_image_guid = ib_qib_sys_image_guid;
+	rdi->dparms.props.max_mr_size = ~0ULL;
+	rdi->dparms.props.max_qp = ib_qib_max_qps;
+	rdi->dparms.props.max_qp_wr = ib_qib_max_qp_wrs;
+	rdi->dparms.props.max_sge = ib_qib_max_sges;
+	rdi->dparms.props.max_sge_rd = ib_qib_max_sges;
+	rdi->dparms.props.max_cq = ib_qib_max_cqs;
+	rdi->dparms.props.max_cqe = ib_qib_max_cqes;
+	rdi->dparms.props.max_ah = ib_qib_max_ahs;
+	rdi->dparms.props.max_mr = rdi->lkey_table.max;
+	rdi->dparms.props.max_fmr = rdi->lkey_table.max;
+	rdi->dparms.props.max_map_per_fmr = 32767;
+	rdi->dparms.props.max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC;
+	rdi->dparms.props.max_qp_init_rd_atom = 255;
+	rdi->dparms.props.max_srq = ib_qib_max_srqs;
+	rdi->dparms.props.max_srq_wr = ib_qib_max_srq_wrs;
+	rdi->dparms.props.max_srq_sge = ib_qib_max_srq_sges;
+	rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB;
+	rdi->dparms.props.max_pkeys = qib_get_npkeys(dd);
+	rdi->dparms.props.max_mcast_grp = ib_qib_max_mcast_grps;
+	rdi->dparms.props.max_mcast_qp_attach = ib_qib_max_mcast_qp_attached;
+	rdi->dparms.props.max_total_mcast_qp_attach =
+					rdi->dparms.props.max_mcast_qp_attach *
+					rdi->dparms.props.max_mcast_grp;
 }
 
 /**
@@ -2091,68 +1592,20 @@
 int qib_register_ib_device(struct qib_devdata *dd)
 {
 	struct qib_ibdev *dev = &dd->verbs_dev;
-	struct ib_device *ibdev = &dev->ibdev;
+	struct ib_device *ibdev = &dev->rdi.ibdev;
 	struct qib_pportdata *ppd = dd->pport;
-	unsigned i, lk_tab_size;
+	unsigned i, ctxt;
 	int ret;
 
-	dev->qp_table_size = ib_qib_qp_table_size;
 	get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd));
-	dev->qp_table = kmalloc_array(
-				dev->qp_table_size,
-				sizeof(*dev->qp_table),
-				GFP_KERNEL);
-	if (!dev->qp_table) {
-		ret = -ENOMEM;
-		goto err_qpt;
-	}
-	for (i = 0; i < dev->qp_table_size; i++)
-		RCU_INIT_POINTER(dev->qp_table[i], NULL);
-
 	for (i = 0; i < dd->num_pports; i++)
 		init_ibport(ppd + i);
 
 	/* Only need to initialize non-zero fields. */
-	spin_lock_init(&dev->qpt_lock);
-	spin_lock_init(&dev->n_pds_lock);
-	spin_lock_init(&dev->n_ahs_lock);
-	spin_lock_init(&dev->n_cqs_lock);
-	spin_lock_init(&dev->n_qps_lock);
-	spin_lock_init(&dev->n_srqs_lock);
-	spin_lock_init(&dev->n_mcast_grps_lock);
-	init_timer(&dev->mem_timer);
-	dev->mem_timer.function = mem_timer;
-	dev->mem_timer.data = (unsigned long) dev;
+	setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
 
-	qib_init_qpn_table(dd, &dev->qpn_table);
+	qpt_mask = dd->qpn_mask;
 
-	/*
-	 * The top ib_qib_lkey_table_size bits are used to index the
-	 * table.  The lower 8 bits can be owned by the user (copied from
-	 * the LKEY).  The remaining bits act as a generation number or tag.
-	 */
-	spin_lock_init(&dev->lk_table.lock);
-	/* insure generation is at least 4 bits see keys.c */
-	if (ib_qib_lkey_table_size > MAX_LKEY_TABLE_BITS) {
-		qib_dev_warn(dd, "lkey bits %u too large, reduced to %u\n",
-			ib_qib_lkey_table_size, MAX_LKEY_TABLE_BITS);
-		ib_qib_lkey_table_size = MAX_LKEY_TABLE_BITS;
-	}
-	dev->lk_table.max = 1 << ib_qib_lkey_table_size;
-	lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
-	dev->lk_table.table = (struct qib_mregion __rcu **)
-		vmalloc(lk_tab_size);
-	if (dev->lk_table.table == NULL) {
-		ret = -ENOMEM;
-		goto err_lk;
-	}
-	RCU_INIT_POINTER(dev->dma_mr, NULL);
-	for (i = 0; i < dev->lk_table.max; i++)
-		RCU_INIT_POINTER(dev->lk_table.table[i], NULL);
-	INIT_LIST_HEAD(&dev->pending_mmaps);
-	spin_lock_init(&dev->pending_lock);
-	dev->mmap_offset = PAGE_SIZE;
-	spin_lock_init(&dev->mmap_offset_lock);
 	INIT_LIST_HEAD(&dev->piowait);
 	INIT_LIST_HEAD(&dev->dmawait);
 	INIT_LIST_HEAD(&dev->txwait);
@@ -2194,110 +1647,91 @@
 	strlcpy(ibdev->name, "qib%d", IB_DEVICE_NAME_MAX);
 	ibdev->owner = THIS_MODULE;
 	ibdev->node_guid = ppd->guid;
-	ibdev->uverbs_abi_ver = QIB_UVERBS_ABI_VERSION;
-	ibdev->uverbs_cmd_mask =
-		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
-		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
-		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
-		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
-		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
-		(1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
-		(1ull << IB_USER_VERBS_CMD_MODIFY_AH)           |
-		(1ull << IB_USER_VERBS_CMD_QUERY_AH)            |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_AH)          |
-		(1ull << IB_USER_VERBS_CMD_REG_MR)              |
-		(1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
-		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
-		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
-		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
-		(1ull << IB_USER_VERBS_CMD_POLL_CQ)             |
-		(1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)       |
-		(1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
-		(1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
-		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
-		(1ull << IB_USER_VERBS_CMD_POST_SEND)           |
-		(1ull << IB_USER_VERBS_CMD_POST_RECV)           |
-		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
-		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
-		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
-		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
-		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
-		(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
-	ibdev->node_type = RDMA_NODE_IB_CA;
 	ibdev->phys_port_cnt = dd->num_pports;
-	ibdev->num_comp_vectors = 1;
 	ibdev->dma_device = &dd->pcidev->dev;
-	ibdev->query_device = qib_query_device;
 	ibdev->modify_device = qib_modify_device;
-	ibdev->query_port = qib_query_port;
-	ibdev->modify_port = qib_modify_port;
-	ibdev->query_pkey = qib_query_pkey;
-	ibdev->query_gid = qib_query_gid;
-	ibdev->alloc_ucontext = qib_alloc_ucontext;
-	ibdev->dealloc_ucontext = qib_dealloc_ucontext;
-	ibdev->alloc_pd = qib_alloc_pd;
-	ibdev->dealloc_pd = qib_dealloc_pd;
-	ibdev->create_ah = qib_create_ah;
-	ibdev->destroy_ah = qib_destroy_ah;
-	ibdev->modify_ah = qib_modify_ah;
-	ibdev->query_ah = qib_query_ah;
-	ibdev->create_srq = qib_create_srq;
-	ibdev->modify_srq = qib_modify_srq;
-	ibdev->query_srq = qib_query_srq;
-	ibdev->destroy_srq = qib_destroy_srq;
-	ibdev->create_qp = qib_create_qp;
-	ibdev->modify_qp = qib_modify_qp;
-	ibdev->query_qp = qib_query_qp;
-	ibdev->destroy_qp = qib_destroy_qp;
-	ibdev->post_send = qib_post_send;
-	ibdev->post_recv = qib_post_receive;
-	ibdev->post_srq_recv = qib_post_srq_receive;
-	ibdev->create_cq = qib_create_cq;
-	ibdev->destroy_cq = qib_destroy_cq;
-	ibdev->resize_cq = qib_resize_cq;
-	ibdev->poll_cq = qib_poll_cq;
-	ibdev->req_notify_cq = qib_req_notify_cq;
-	ibdev->get_dma_mr = qib_get_dma_mr;
-	ibdev->reg_user_mr = qib_reg_user_mr;
-	ibdev->dereg_mr = qib_dereg_mr;
-	ibdev->alloc_mr = qib_alloc_mr;
-	ibdev->map_mr_sg = qib_map_mr_sg;
-	ibdev->alloc_fmr = qib_alloc_fmr;
-	ibdev->map_phys_fmr = qib_map_phys_fmr;
-	ibdev->unmap_fmr = qib_unmap_fmr;
-	ibdev->dealloc_fmr = qib_dealloc_fmr;
-	ibdev->attach_mcast = qib_multicast_attach;
-	ibdev->detach_mcast = qib_multicast_detach;
 	ibdev->process_mad = qib_process_mad;
-	ibdev->mmap = qib_mmap;
-	ibdev->dma_ops = &qib_dma_mapping_ops;
-	ibdev->get_port_immutable = qib_port_immutable;
 
 	snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
 		 "Intel Infiniband HCA %s", init_utsname()->nodename);
 
-	ret = ib_register_device(ibdev, qib_create_port_files);
-	if (ret)
-		goto err_reg;
+	/*
+	 * Fill in rvt info object.
+	 */
+	dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files;
+	dd->verbs_dev.rdi.driver_f.get_card_name = qib_get_card_name;
+	dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev;
+	dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah;
+	dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe;
+	dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah;
+	dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn;
+	dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc;
+	dd->verbs_dev.rdi.driver_f.qp_priv_free = qib_qp_priv_free;
+	dd->verbs_dev.rdi.driver_f.free_all_qps = qib_free_all_qps;
+	dd->verbs_dev.rdi.driver_f.notify_qp_reset = qib_notify_qp_reset;
+	dd->verbs_dev.rdi.driver_f.do_send = qib_do_send;
+	dd->verbs_dev.rdi.driver_f.schedule_send = qib_schedule_send;
+	dd->verbs_dev.rdi.driver_f.quiesce_qp = qib_quiesce_qp;
+	dd->verbs_dev.rdi.driver_f.stop_send_queue = qib_stop_send_queue;
+	dd->verbs_dev.rdi.driver_f.flush_qp_waiters = qib_flush_qp_waiters;
+	dd->verbs_dev.rdi.driver_f.notify_error_qp = qib_notify_error_qp;
+	dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu;
+	dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp;
+	dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr;
+	dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _qib_schedule_send;
+	dd->verbs_dev.rdi.driver_f.query_port_state = qib_query_port;
+	dd->verbs_dev.rdi.driver_f.shut_down_port = qib_shut_down_port;
+	dd->verbs_dev.rdi.driver_f.cap_mask_chg = qib_cap_mask_chg;
+	dd->verbs_dev.rdi.driver_f.notify_create_mad_agent =
+						qib_notify_create_mad_agent;
+	dd->verbs_dev.rdi.driver_f.notify_free_mad_agent =
+						qib_notify_free_mad_agent;
 
-	ret = qib_create_agents(dev);
+	dd->verbs_dev.rdi.dparms.max_rdma_atomic = QIB_MAX_RDMA_ATOMIC;
+	dd->verbs_dev.rdi.driver_f.get_guid_be = qib_get_guid_be;
+	dd->verbs_dev.rdi.dparms.lkey_table_size = qib_lkey_table_size;
+	dd->verbs_dev.rdi.dparms.qp_table_size = ib_qib_qp_table_size;
+	dd->verbs_dev.rdi.dparms.qpn_start = 1;
+	dd->verbs_dev.rdi.dparms.qpn_res_start = QIB_KD_QP;
+	dd->verbs_dev.rdi.dparms.qpn_res_end = QIB_KD_QP; /* Reserve one QP */
+	dd->verbs_dev.rdi.dparms.qpn_inc = 1;
+	dd->verbs_dev.rdi.dparms.qos_shift = 1;
+	dd->verbs_dev.rdi.dparms.psn_mask = QIB_PSN_MASK;
+	dd->verbs_dev.rdi.dparms.psn_shift = QIB_PSN_SHIFT;
+	dd->verbs_dev.rdi.dparms.psn_modify_mask = QIB_PSN_MASK;
+	dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
+	dd->verbs_dev.rdi.dparms.npkeys = qib_get_npkeys(dd);
+	dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id;
+	dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+	dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE;
+
+	snprintf(dd->verbs_dev.rdi.dparms.cq_name,
+		 sizeof(dd->verbs_dev.rdi.dparms.cq_name),
+		 "qib_cq%d", dd->unit);
+
+	qib_fill_device_attr(dd);
+
+	ppd = dd->pport;
+	for (i = 0; i < dd->num_pports; i++, ppd++) {
+		ctxt = ppd->hw_pidx;
+		rvt_init_port(&dd->verbs_dev.rdi,
+			      &ppd->ibport_data.rvp,
+			      i,
+			      dd->rcd[ctxt]->pkeys);
+	}
+
+	ret = rvt_register_device(&dd->verbs_dev.rdi);
 	if (ret)
-		goto err_agents;
+		goto err_tx;
 
 	ret = qib_verbs_register_sysfs(dd);
 	if (ret)
 		goto err_class;
 
-	goto bail;
+	return ret;
 
 err_class:
-	qib_free_agents(dev);
-err_agents:
-	ib_unregister_device(ibdev);
-err_reg:
+	rvt_unregister_device(&dd->verbs_dev.rdi);
 err_tx:
 	while (!list_empty(&dev->txreq_free)) {
 		struct list_head *l = dev->txreq_free.next;
@@ -2313,27 +1747,17 @@
 					sizeof(struct qib_pio_header),
 				  dev->pio_hdrs, dev->pio_hdrs_phys);
 err_hdrs:
-	vfree(dev->lk_table.table);
-err_lk:
-	kfree(dev->qp_table);
-err_qpt:
 	qib_dev_err(dd, "cannot register verbs: %d!\n", -ret);
-bail:
 	return ret;
 }
 
 void qib_unregister_ib_device(struct qib_devdata *dd)
 {
 	struct qib_ibdev *dev = &dd->verbs_dev;
-	struct ib_device *ibdev = &dev->ibdev;
-	u32 qps_inuse;
-	unsigned lk_tab_size;
 
 	qib_verbs_unregister_sysfs(dd);
 
-	qib_free_agents(dev);
-
-	ib_unregister_device(ibdev);
+	rvt_unregister_device(&dd->verbs_dev.rdi);
 
 	if (!list_empty(&dev->piowait))
 		qib_dev_err(dd, "piowait list not empty!\n");
@@ -2343,16 +1767,8 @@
 		qib_dev_err(dd, "txwait list not empty!\n");
 	if (!list_empty(&dev->memwait))
 		qib_dev_err(dd, "memwait list not empty!\n");
-	if (dev->dma_mr)
-		qib_dev_err(dd, "DMA MR not NULL!\n");
-
-	qps_inuse = qib_free_all_qps(dd);
-	if (qps_inuse)
-		qib_dev_err(dd, "QP memory leak! %u still in use\n",
-			    qps_inuse);
 
 	del_timer_sync(&dev->mem_timer);
-	qib_free_qpn_table(&dev->qpn_table);
 	while (!list_empty(&dev->txreq_free)) {
 		struct list_head *l = dev->txreq_free.next;
 		struct qib_verbs_txreq *tx;
@@ -2366,21 +1782,36 @@
 				  dd->pport->sdma_descq_cnt *
 					sizeof(struct qib_pio_header),
 				  dev->pio_hdrs, dev->pio_hdrs_phys);
-	lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
-	vfree(dev->lk_table.table);
-	kfree(dev->qp_table);
 }
 
-/*
- * This must be called with s_lock held.
+/**
+ * _qib_schedule_send - schedule progress
+ * @qp - the qp
+ *
+ * This schedules progress w/o regard to the s_flags.
+ *
+ * It is only used in post send, which doesn't hold
+ * the s_lock.
  */
-void qib_schedule_send(struct qib_qp *qp)
+void _qib_schedule_send(struct rvt_qp *qp)
 {
-	if (qib_send_ok(qp)) {
-		struct qib_ibport *ibp =
-			to_iport(qp->ibqp.device, qp->port_num);
-		struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	struct qib_ibport *ibp =
+		to_iport(qp->ibqp.device, qp->port_num);
+	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	struct qib_qp_priv *priv = qp->priv;
 
-		queue_work(ppd->qib_wq, &qp->s_work);
-	}
+	queue_work(ppd->qib_wq, &priv->s_work);
+}
+
+/**
+ * qib_schedule_send - schedule progress
+ * @qp - the qp
+ *
+ * This schedules qp progress.  The s_lock
+ * should be held.
+ */
+void qib_schedule_send(struct rvt_qp *qp)
+{
+	if (qib_send_ok(qp))
+		_qib_schedule_send(qp);
 }

diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 6c5e777..4b76a8d 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h

@@ -45,6 +45,8 @@
 #include <linux/completion.h>
 #include <rdma/ib_pack.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_cq.h>
 
 struct qib_ctxtdata;
 struct qib_pportdata;
@@ -53,9 +55,7 @@
 
 #define QIB_MAX_RDMA_ATOMIC     16
 #define QIB_GUIDS_PER_PORT	5
-
-#define QPN_MAX                 (1 << 24)
-#define QPNMAP_ENTRIES          (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
+#define QIB_PSN_SHIFT		8
 
 /*
  * Increment this value if any changes that break userspace ABI
@@ -63,12 +63,6 @@
  */
 #define QIB_UVERBS_ABI_VERSION       2
 
-/*
- * Define an ib_cq_notify value that is not valid so we know when CQ
- * notifications are armed.
- */
-#define IB_CQ_NONE      (IB_CQ_NEXT_COMP + 1)
-
 #define IB_SEQ_NAK	(3 << 29)
 
 /* AETH NAK opcode values */
@@ -79,17 +73,6 @@
 #define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
 #define IB_NAK_INVALID_RD_REQUEST       0x64
 
-/* Flags for checking QP state (see ib_qib_state_ops[]) */
-#define QIB_POST_SEND_OK                0x01
-#define QIB_POST_RECV_OK                0x02
-#define QIB_PROCESS_RECV_OK             0x04
-#define QIB_PROCESS_SEND_OK             0x08
-#define QIB_PROCESS_NEXT_SEND_OK        0x10
-#define QIB_FLUSH_SEND			0x20
-#define QIB_FLUSH_RECV			0x40
-#define QIB_PROCESS_OR_FLUSH_SEND \
-	(QIB_PROCESS_SEND_OK | QIB_FLUSH_SEND)
-
 /* IB Performance Manager status values */
 #define IB_PMA_SAMPLE_STATUS_DONE       0x00
 #define IB_PMA_SAMPLE_STATUS_STARTED    0x01
@@ -203,468 +186,21 @@
 } __packed;
 
 /*
- * There is one struct qib_mcast for each multicast GID.
- * All attached QPs are then stored as a list of
- * struct qib_mcast_qp.
+ * qib specific data structure that will be hidden from rvt after the queue pair
+ * is made common.
  */
-struct qib_mcast_qp {
-	struct list_head list;
-	struct qib_qp *qp;
-};
-
-struct qib_mcast {
-	struct rb_node rb_node;
-	union ib_gid mgid;
-	struct list_head qp_list;
-	wait_queue_head_t wait;
-	atomic_t refcount;
-	int n_attached;
-};
-
-/* Protection domain */
-struct qib_pd {
-	struct ib_pd ibpd;
-	int user;               /* non-zero if created from user space */
-};
-
-/* Address Handle */
-struct qib_ah {
-	struct ib_ah ibah;
-	struct ib_ah_attr attr;
-	atomic_t refcount;
-};
-
-/*
- * This structure is used by qib_mmap() to validate an offset
- * when an mmap() request is made.  The vm_area_struct then uses
- * this as its vm_private_data.
- */
-struct qib_mmap_info {
-	struct list_head pending_mmaps;
-	struct ib_ucontext *context;
-	void *obj;
-	__u64 offset;
-	struct kref ref;
-	unsigned size;
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and completion queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- */
-struct qib_cq_wc {
-	u32 head;               /* index of next entry to fill */
-	u32 tail;               /* index of next ib_poll_cq() entry */
-	union {
-		/* these are actually size ibcq.cqe + 1 */
-		struct ib_uverbs_wc uqueue[0];
-		struct ib_wc kqueue[0];
-	};
-};
-
-/*
- * The completion queue structure.
- */
-struct qib_cq {
-	struct ib_cq ibcq;
-	struct kthread_work comptask;
-	struct qib_devdata *dd;
-	spinlock_t lock; /* protect changes in this struct */
-	u8 notify;
-	u8 triggered;
-	struct qib_cq_wc *queue;
-	struct qib_mmap_info *ip;
-};
-
-/*
- * A segment is a linear region of low physical memory.
- * XXX Maybe we should use phys addr here and kmap()/kunmap().
- * Used by the verbs layer.
- */
-struct qib_seg {
-	void *vaddr;
-	size_t length;
-};
-
-/* The number of qib_segs that fit in a page. */
-#define QIB_SEGSZ     (PAGE_SIZE / sizeof(struct qib_seg))
-
-struct qib_segarray {
-	struct qib_seg segs[QIB_SEGSZ];
-};
-
-struct qib_mregion {
-	struct ib_pd *pd;       /* shares refcnt of ibmr.pd */
-	u64 user_base;          /* User's address for this region */
-	u64 iova;               /* IB start address of this region */
-	size_t length;
-	u32 lkey;
-	u32 offset;             /* offset (bytes) to start of region */
-	int access_flags;
-	u32 max_segs;           /* number of qib_segs in all the arrays */
-	u32 mapsz;              /* size of the map array */
-	u8  page_shift;         /* 0 - non unform/non powerof2 sizes */
-	u8  lkey_published;     /* in global table */
-	struct completion comp; /* complete when refcount goes to zero */
-	struct rcu_head list;
-	atomic_t refcount;
-	struct qib_segarray *map[0];    /* the segments */
-};
-
-/*
- * These keep track of the copy progress within a memory region.
- * Used by the verbs layer.
- */
-struct qib_sge {
-	struct qib_mregion *mr;
-	void *vaddr;            /* kernel virtual address of segment */
-	u32 sge_length;         /* length of the SGE */
-	u32 length;             /* remaining length of the segment */
-	u16 m;                  /* current index: mr->map[m] */
-	u16 n;                  /* current index: mr->map[m]->segs[n] */
-};
-
-/* Memory region */
-struct qib_mr {
-	struct ib_mr ibmr;
-	struct ib_umem *umem;
-	u64 *pages;
-	u32 npages;
-	struct qib_mregion mr;  /* must be last */
-};
-
-/*
- * Send work request queue entry.
- * The size of the sg_list is determined when the QP is created and stored
- * in qp->s_max_sge.
- */
-struct qib_swqe {
-	union {
-		struct ib_send_wr wr;   /* don't use wr.sg_list */
-		struct ib_ud_wr ud_wr;
-		struct ib_reg_wr reg_wr;
-		struct ib_rdma_wr rdma_wr;
-		struct ib_atomic_wr atomic_wr;
-	};
-	u32 psn;                /* first packet sequence number */
-	u32 lpsn;               /* last packet sequence number */
-	u32 ssn;                /* send sequence number */
-	u32 length;             /* total length of data in sg_list */
-	struct qib_sge sg_list[0];
-};
-
-/*
- * Receive work request queue entry.
- * The size of the sg_list is determined when the QP (or SRQ) is created
- * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
- */
-struct qib_rwqe {
-	u64 wr_id;
-	u8 num_sge;
-	struct ib_sge sg_list[0];
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and receive work queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- * Note that the wq array elements are variable size so you can't
- * just index into the array to get the N'th element;
- * use get_rwqe_ptr() instead.
- */
-struct qib_rwq {
-	u32 head;               /* new work requests posted to the head */
-	u32 tail;               /* receives pull requests from here. */
-	struct qib_rwqe wq[0];
-};
-
-struct qib_rq {
-	struct qib_rwq *wq;
-	u32 size;               /* size of RWQE array */
-	u8 max_sge;
-	spinlock_t lock /* protect changes in this struct */
-		____cacheline_aligned_in_smp;
-};
-
-struct qib_srq {
-	struct ib_srq ibsrq;
-	struct qib_rq rq;
-	struct qib_mmap_info *ip;
-	/* send signal when number of RWQEs < limit */
-	u32 limit;
-};
-
-struct qib_sge_state {
-	struct qib_sge *sg_list;      /* next SGE to be used if any */
-	struct qib_sge sge;   /* progress state for the current SGE */
-	u32 total_len;
-	u8 num_sge;
-};
-
-/*
- * This structure holds the information that the send tasklet needs
- * to send a RDMA read response or atomic operation.
- */
-struct qib_ack_entry {
-	u8 opcode;
-	u8 sent;
-	u32 psn;
-	u32 lpsn;
-	union {
-		struct qib_sge rdma_sge;
-		u64 atomic_data;
-	};
-};
-
-/*
- * Variables prefixed with s_ are for the requester (sender).
- * Variables prefixed with r_ are for the responder (receiver).
- * Variables prefixed with ack_ are for responder replies.
- *
- * Common variables are protected by both r_rq.lock and s_lock in that order
- * which only happens in modify_qp() or changing the QP 'state'.
- */
-struct qib_qp {
-	struct ib_qp ibqp;
-	/* read mostly fields above and below */
-	struct ib_ah_attr remote_ah_attr;
-	struct ib_ah_attr alt_ah_attr;
-	struct qib_qp __rcu *next;            /* link list for QPN hash table */
-	struct qib_swqe *s_wq;  /* send work queue */
-	struct qib_mmap_info *ip;
-	struct qib_ib_header *s_hdr;     /* next packet header to send */
-	unsigned long timeout_jiffies;  /* computed from timeout */
-
-	enum ib_mtu path_mtu;
-	u32 remote_qpn;
-	u32 pmtu;		/* decoded from path_mtu */
-	u32 qkey;               /* QKEY for this QP (for UD or RD) */
-	u32 s_size;             /* send work queue size */
-	u32 s_rnr_timeout;      /* number of milliseconds for RNR timeout */
-
-	u8 state;               /* QP state */
-	u8 qp_access_flags;
-	u8 alt_timeout;         /* Alternate path timeout for this QP */
-	u8 timeout;             /* Timeout for this QP */
-	u8 s_srate;
-	u8 s_mig_state;
-	u8 port_num;
-	u8 s_pkey_index;        /* PKEY index to use */
-	u8 s_alt_pkey_index;    /* Alternate path PKEY index to use */
-	u8 r_max_rd_atomic;     /* max number of RDMA read/atomic to receive */
-	u8 s_max_rd_atomic;     /* max number of RDMA read/atomic to send */
-	u8 s_retry_cnt;         /* number of times to retry */
-	u8 s_rnr_retry_cnt;
-	u8 r_min_rnr_timer;     /* retry timeout value for RNR NAKs */
-	u8 s_max_sge;           /* size of s_wq->sg_list */
-	u8 s_draining;
-
-	/* start of read/write fields */
-
-	atomic_t refcount ____cacheline_aligned_in_smp;
-	wait_queue_head_t wait;
-
-
-	struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1]
-		____cacheline_aligned_in_smp;
-	struct qib_sge_state s_rdma_read_sge;
-
-	spinlock_t r_lock ____cacheline_aligned_in_smp;      /* used for APM */
-	unsigned long r_aflags;
-	u64 r_wr_id;            /* ID for current receive WQE */
-	u32 r_ack_psn;          /* PSN for next ACK or atomic ACK */
-	u32 r_len;              /* total length of r_sge */
-	u32 r_rcv_len;          /* receive data len processed */
-	u32 r_psn;              /* expected rcv packet sequence number */
-	u32 r_msn;              /* message sequence number */
-
-	u8 r_state;             /* opcode of last packet received */
-	u8 r_flags;
-	u8 r_head_ack_queue;    /* index into s_ack_queue[] */
-
-	struct list_head rspwait;       /* link for waititing to respond */
-
-	struct qib_sge_state r_sge;     /* current receive data */
-	struct qib_rq r_rq;             /* receive work queue */
-
-	spinlock_t s_lock ____cacheline_aligned_in_smp;
-	struct qib_sge_state *s_cur_sge;
-	u32 s_flags;
-	struct qib_verbs_txreq *s_tx;
-	struct qib_swqe *s_wqe;
-	struct qib_sge_state s_sge;     /* current send request data */
-	struct qib_mregion *s_rdma_mr;
-	atomic_t s_dma_busy;
-	u32 s_cur_size;         /* size of send packet in bytes */
-	u32 s_len;              /* total length of s_sge */
-	u32 s_rdma_read_len;    /* total length of s_rdma_read_sge */
-	u32 s_next_psn;         /* PSN for next request */
-	u32 s_last_psn;         /* last response PSN processed */
-	u32 s_sending_psn;      /* lowest PSN that is being sent */
-	u32 s_sending_hpsn;     /* highest PSN that is being sent */
-	u32 s_psn;              /* current packet sequence number */
-	u32 s_ack_rdma_psn;     /* PSN for sending RDMA read responses */
-	u32 s_ack_psn;          /* PSN for acking sends and RDMA writes */
-	u32 s_head;             /* new entries added here */
-	u32 s_tail;             /* next entry to process */
-	u32 s_cur;              /* current work queue entry */
-	u32 s_acked;            /* last un-ACK'ed entry */
-	u32 s_last;             /* last completed entry */
-	u32 s_ssn;              /* SSN of tail entry */
-	u32 s_lsn;              /* limit sequence number (credit) */
-	u16 s_hdrwords;         /* size of s_hdr in 32 bit words */
-	u16 s_rdma_ack_cnt;
-	u8 s_state;             /* opcode of last packet sent */
-	u8 s_ack_state;         /* opcode of packet to ACK */
-	u8 s_nak_state;         /* non-zero if NAK is pending */
-	u8 r_nak_state;         /* non-zero if NAK is pending */
-	u8 s_retry;             /* requester retry counter */
-	u8 s_rnr_retry;         /* requester RNR retry counter */
-	u8 s_num_rd_atomic;     /* number of RDMA read/atomic pending */
-	u8 s_tail_ack_queue;    /* index into s_ack_queue[] */
-
-	struct qib_sge_state s_ack_rdma_sge;
-	struct timer_list s_timer;
+struct qib_qp_priv {
+	struct qib_ib_header *s_hdr;    /* next packet header to send */
 	struct list_head iowait;        /* link for wait PIO buf */
-
+	atomic_t s_dma_busy;
+	struct qib_verbs_txreq *s_tx;
 	struct work_struct s_work;
-
 	wait_queue_head_t wait_dma;
-
-	struct qib_sge r_sg_list[0] /* verified SGEs */
-		____cacheline_aligned_in_smp;
+	struct rvt_qp *owner;
 };
 
-/*
- * Atomic bit definitions for r_aflags.
- */
-#define QIB_R_WRID_VALID        0
-#define QIB_R_REWIND_SGE        1
-
-/*
- * Bit definitions for r_flags.
- */
-#define QIB_R_REUSE_SGE 0x01
-#define QIB_R_RDMAR_SEQ 0x02
-#define QIB_R_RSP_NAK   0x04
-#define QIB_R_RSP_SEND  0x08
-#define QIB_R_COMM_EST  0x10
-
-/*
- * Bit definitions for s_flags.
- *
- * QIB_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled
- * QIB_S_BUSY - send tasklet is processing the QP
- * QIB_S_TIMER - the RC retry timer is active
- * QIB_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics
- * QIB_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs
- *                         before processing the next SWQE
- * QIB_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete
- *                         before processing the next SWQE
- * QIB_S_WAIT_RNR - waiting for RNR timeout
- * QIB_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
- * QIB_S_WAIT_DMA - waiting for send DMA queue to drain before generating
- *                  next send completion entry not via send DMA
- * QIB_S_WAIT_PIO - waiting for a send buffer to be available
- * QIB_S_WAIT_TX - waiting for a struct qib_verbs_txreq to be available
- * QIB_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available
- * QIB_S_WAIT_KMEM - waiting for kernel memory to be available
- * QIB_S_WAIT_PSN - waiting for a packet to exit the send DMA queue
- * QIB_S_WAIT_ACK - waiting for an ACK packet before sending more requests
- * QIB_S_SEND_ONE - send one packet, request ACK, then wait for ACK
- */
-#define QIB_S_SIGNAL_REQ_WR	0x0001
-#define QIB_S_BUSY		0x0002
-#define QIB_S_TIMER		0x0004
-#define QIB_S_RESP_PENDING	0x0008
-#define QIB_S_ACK_PENDING	0x0010
-#define QIB_S_WAIT_FENCE	0x0020
-#define QIB_S_WAIT_RDMAR	0x0040
-#define QIB_S_WAIT_RNR		0x0080
-#define QIB_S_WAIT_SSN_CREDIT	0x0100
-#define QIB_S_WAIT_DMA		0x0200
-#define QIB_S_WAIT_PIO		0x0400
-#define QIB_S_WAIT_TX		0x0800
-#define QIB_S_WAIT_DMA_DESC	0x1000
-#define QIB_S_WAIT_KMEM		0x2000
-#define QIB_S_WAIT_PSN		0x4000
-#define QIB_S_WAIT_ACK		0x8000
-#define QIB_S_SEND_ONE		0x10000
-#define QIB_S_UNLIMITED_CREDIT	0x20000
-
-/*
- * Wait flags that would prevent any packet type from being sent.
- */
-#define QIB_S_ANY_WAIT_IO (QIB_S_WAIT_PIO | QIB_S_WAIT_TX | \
-	QIB_S_WAIT_DMA_DESC | QIB_S_WAIT_KMEM)
-
-/*
- * Wait flags that would prevent send work requests from making progress.
- */
-#define QIB_S_ANY_WAIT_SEND (QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR | \
-	QIB_S_WAIT_RNR | QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_DMA | \
-	QIB_S_WAIT_PSN | QIB_S_WAIT_ACK)
-
-#define QIB_S_ANY_WAIT (QIB_S_ANY_WAIT_IO | QIB_S_ANY_WAIT_SEND)
-
 #define QIB_PSN_CREDIT  16
 
-/*
- * Since struct qib_swqe is not a fixed size, we can't simply index into
- * struct qib_qp.s_wq.  This function does the array index computation.
- */
-static inline struct qib_swqe *get_swqe_ptr(struct qib_qp *qp,
-					      unsigned n)
-{
-	return (struct qib_swqe *)((char *)qp->s_wq +
-				     (sizeof(struct qib_swqe) +
-				      qp->s_max_sge *
-				      sizeof(struct qib_sge)) * n);
-}
-
-/*
- * Since struct qib_rwqe is not a fixed size, we can't simply index into
- * struct qib_rwq.wq.  This function does the array index computation.
- */
-static inline struct qib_rwqe *get_rwqe_ptr(struct qib_rq *rq, unsigned n)
-{
-	return (struct qib_rwqe *)
-		((char *) rq->wq->wq +
-		 (sizeof(struct qib_rwqe) +
-		  rq->max_sge * sizeof(struct ib_sge)) * n);
-}
-
-/*
- * QPN-map pages start out as NULL, they get allocated upon
- * first use and are never deallocated. This way,
- * large bitmaps are not allocated unless large numbers of QPs are used.
- */
-struct qpn_map {
-	void *page;
-};
-
-struct qib_qpn_table {
-	spinlock_t lock; /* protect changes in this struct */
-	unsigned flags;         /* flags for QP0/1 allocated for each port */
-	u32 last;               /* last QP number allocated */
-	u32 nmaps;              /* size of the map table */
-	u16 limit;
-	u16 mask;
-	/* bit map of free QP numbers other than 0/1 */
-	struct qpn_map map[QPNMAP_ENTRIES];
-};
-
-#define MAX_LKEY_TABLE_BITS 23
-
-struct qib_lkey_table {
-	spinlock_t lock; /* protect changes in this struct */
-	u32 next;               /* next unused index (speeds search) */
-	u32 gen;                /* generation count */
-	u32 max;                /* size of the table */
-	struct qib_mregion __rcu **table;
-};
-
 struct qib_opcode_stats {
 	u64 n_packets;          /* number of packets */
 	u64 n_bytes;            /* total number of bytes */
@@ -682,21 +218,9 @@
 };
 
 struct qib_ibport {
-	struct qib_qp __rcu *qp0;
-	struct qib_qp __rcu *qp1;
-	struct ib_mad_agent *send_agent;	/* agent for SMI (traps) */
-	struct qib_ah *sm_ah;
-	struct qib_ah *smi_ah;
-	struct rb_root mcast_tree;
-	spinlock_t lock;		/* protect changes in this struct */
-
-	/* non-zero when timer is set */
-	unsigned long mkey_lease_timeout;
-	unsigned long trap_timeout;
-	__be64 gid_prefix;      /* in network order */
-	__be64 mkey;
+	struct rvt_ibport rvp;
+	struct rvt_ah *smi_ah;
 	__be64 guids[QIB_GUIDS_PER_PORT	- 1];	/* writable GUIDs */
-	u64 tid;		/* TID for traps */
 	struct qib_pma_counters __percpu *pmastats;
 	u64 z_unicast_xmit;     /* starting count for PMA */
 	u64 z_unicast_rcv;      /* starting count for PMA */
@@ -715,82 +239,25 @@
 	u32 z_local_link_integrity_errors;      /* starting count for PMA */
 	u32 z_excessive_buffer_overrun_errors;  /* starting count for PMA */
 	u32 z_vl15_dropped;                     /* starting count for PMA */
-	u32 n_rc_resends;
-	u32 n_rc_acks;
-	u32 n_rc_qacks;
-	u32 n_rc_delayed_comp;
-	u32 n_seq_naks;
-	u32 n_rdma_seq;
-	u32 n_rnr_naks;
-	u32 n_other_naks;
-	u32 n_loop_pkts;
-	u32 n_pkt_drops;
-	u32 n_vl15_dropped;
-	u32 n_rc_timeouts;
-	u32 n_dmawait;
-	u32 n_unaligned;
-	u32 n_rc_dupreq;
-	u32 n_rc_seqnak;
-	u32 port_cap_flags;
-	u32 pma_sample_start;
-	u32 pma_sample_interval;
-	__be16 pma_counter_select[5];
-	u16 pma_tag;
-	u16 pkey_violations;
-	u16 qkey_violations;
-	u16 mkey_violations;
-	u16 mkey_lease_period;
-	u16 sm_lid;
-	u16 repress_traps;
-	u8 sm_sl;
-	u8 mkeyprot;
-	u8 subnet_timeout;
-	u8 vl_high_limit;
 	u8 sl_to_vl[16];
-
 };
 
-
 struct qib_ibdev {
-	struct ib_device ibdev;
-	struct list_head pending_mmaps;
-	spinlock_t mmap_offset_lock; /* protect mmap_offset */
-	u32 mmap_offset;
-	struct qib_mregion __rcu *dma_mr;
+	struct rvt_dev_info rdi;
 
-	/* QP numbers are shared by all IB ports */
-	struct qib_qpn_table qpn_table;
-	struct qib_lkey_table lk_table;
 	struct list_head piowait;       /* list for wait PIO buf */
 	struct list_head dmawait;	/* list for wait DMA */
 	struct list_head txwait;        /* list for wait qib_verbs_txreq */
 	struct list_head memwait;       /* list for wait kernel memory */
 	struct list_head txreq_free;
 	struct timer_list mem_timer;
-	struct qib_qp __rcu **qp_table;
 	struct qib_pio_header *pio_hdrs;
 	dma_addr_t pio_hdrs_phys;
-	/* list of QPs waiting for RNR timer */
-	spinlock_t pending_lock; /* protect wait lists, PMA counters, etc. */
-	u32 qp_table_size; /* size of the hash table */
 	u32 qp_rnd; /* random bytes for hash */
-	spinlock_t qpt_lock;
 
 	u32 n_piowait;
 	u32 n_txwait;
 
-	u32 n_pds_allocated;    /* number of PDs allocated for device */
-	spinlock_t n_pds_lock;
-	u32 n_ahs_allocated;    /* number of AHs allocated for device */
-	spinlock_t n_ahs_lock;
-	u32 n_cqs_allocated;    /* number of CQs allocated for device */
-	spinlock_t n_cqs_lock;
-	u32 n_qps_allocated;    /* number of QPs allocated for device */
-	spinlock_t n_qps_lock;
-	u32 n_srqs_allocated;   /* number of SRQs allocated for device */
-	spinlock_t n_srqs_lock;
-	u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
-	spinlock_t n_mcast_grps_lock;
 #ifdef CONFIG_DEBUG_FS
 	/* per HCA debugfs */
 	struct dentry *qib_ibdev_dbg;
@@ -813,56 +280,27 @@
 	u32 vl15_dropped;
 };
 
-static inline struct qib_mr *to_imr(struct ib_mr *ibmr)
-{
-	return container_of(ibmr, struct qib_mr, ibmr);
-}
-
-static inline struct qib_pd *to_ipd(struct ib_pd *ibpd)
-{
-	return container_of(ibpd, struct qib_pd, ibpd);
-}
-
-static inline struct qib_ah *to_iah(struct ib_ah *ibah)
-{
-	return container_of(ibah, struct qib_ah, ibah);
-}
-
-static inline struct qib_cq *to_icq(struct ib_cq *ibcq)
-{
-	return container_of(ibcq, struct qib_cq, ibcq);
-}
-
-static inline struct qib_srq *to_isrq(struct ib_srq *ibsrq)
-{
-	return container_of(ibsrq, struct qib_srq, ibsrq);
-}
-
-static inline struct qib_qp *to_iqp(struct ib_qp *ibqp)
-{
-	return container_of(ibqp, struct qib_qp, ibqp);
-}
-
 static inline struct qib_ibdev *to_idev(struct ib_device *ibdev)
 {
-	return container_of(ibdev, struct qib_ibdev, ibdev);
+	struct rvt_dev_info *rdi;
+
+	rdi = container_of(ibdev, struct rvt_dev_info, ibdev);
+	return container_of(rdi, struct qib_ibdev, rdi);
 }
 
 /*
  * Send if not busy or waiting for I/O and either
  * a RC response is pending or we can process send work requests.
  */
-static inline int qib_send_ok(struct qib_qp *qp)
+static inline int qib_send_ok(struct rvt_qp *qp)
 {
-	return !(qp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT_IO)) &&
-		(qp->s_hdrwords || (qp->s_flags & QIB_S_RESP_PENDING) ||
-		 !(qp->s_flags & QIB_S_ANY_WAIT_SEND));
+	return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) &&
+		(qp->s_hdrwords || (qp->s_flags & RVT_S_RESP_PENDING) ||
+		 !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
 }
 
-/*
- * This must be called with s_lock held.
- */
-void qib_schedule_send(struct qib_qp *qp);
+void _qib_schedule_send(struct rvt_qp *qp);
+void qib_schedule_send(struct rvt_qp *qp);
 
 static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
 {
@@ -878,7 +316,7 @@
 
 void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
 		   u32 qp1, u32 qp2, __be16 lid1, __be16 lid2);
-void qib_cap_mask_chg(struct qib_ibport *ibp);
+void qib_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num);
 void qib_sys_guid_chg(struct qib_ibport *ibp);
 void qib_node_desc_chg(struct qib_ibport *ibp);
 int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@@ -886,8 +324,8 @@
 		    const struct ib_mad_hdr *in, size_t in_mad_size,
 		    struct ib_mad_hdr *out, size_t *out_mad_size,
 		    u16 *out_mad_pkey_index);
-int qib_create_agents(struct qib_ibdev *dev);
-void qib_free_agents(struct qib_ibdev *dev);
+void qib_notify_create_mad_agent(struct rvt_dev_info *rdi, int port_idx);
+void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx);
 
 /*
  * Compare the lower 24 bits of the two values.
@@ -898,8 +336,6 @@
 	return (((int) a) - ((int) b)) << 8;
 }
 
-struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid);
-
 int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
 			  u64 *rwords, u64 *spkts, u64 *rpkts,
 			  u64 *xmit_wait);
@@ -907,35 +343,17 @@
 int qib_get_counters(struct qib_pportdata *ppd,
 		     struct qib_verbs_counters *cntrs);
 
-int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+__be32 qib_compute_aeth(struct rvt_qp *qp);
 
-int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int qib_mcast_tree_empty(struct qib_ibport *ibp);
-
-__be32 qib_compute_aeth(struct qib_qp *qp);
-
-struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn);
-
-struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
-			    struct ib_qp_init_attr *init_attr,
-			    struct ib_udata *udata);
-
-int qib_destroy_qp(struct ib_qp *ibqp);
-
-int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err);
-
-int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		  int attr_mask, struct ib_udata *udata);
-
-int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		 int attr_mask, struct ib_qp_init_attr *init_attr);
-
-unsigned qib_free_all_qps(struct qib_devdata *dd);
-
-void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt);
-
-void qib_free_qpn_table(struct qib_qpn_table *qpt);
+/*
+ * Functions provided by qib driver for rdmavt to use
+ */
+unsigned qib_free_all_qps(struct rvt_dev_info *rdi);
+void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp);
+void qib_qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
+void qib_notify_qp_reset(struct rvt_qp *qp);
+int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
+		  enum ib_qp_type type, u8 port, gfp_t gfp);
 
 #ifdef CONFIG_DEBUG_FS
 
@@ -949,7 +367,7 @@
 
 #endif
 
-void qib_get_credit(struct qib_qp *qp, u32 aeth);
+void qib_get_credit(struct rvt_qp *qp, u32 aeth);
 
 unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult);
 
@@ -957,166 +375,66 @@
 
 void qib_put_txreq(struct qib_verbs_txreq *tx);
 
-int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr,
-		   u32 hdrwords, struct qib_sge_state *ss, u32 len);
+int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr,
+		   u32 hdrwords, struct rvt_sge_state *ss, u32 len);
 
-void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length,
+void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
 		  int release);
 
-void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release);
+void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release);
 
 void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
-		int has_grh, void *data, u32 tlen, struct qib_qp *qp);
+		int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
 
 void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
-		int has_grh, void *data, u32 tlen, struct qib_qp *qp);
+		int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
 
 int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
 
+int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+
 struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
 
 void qib_rc_rnr_retry(unsigned long arg);
 
-void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr);
+void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr);
 
-void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err);
+void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
 
-int qib_post_ud_send(struct qib_qp *qp, struct ib_send_wr *wr);
+int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr);
 
 void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
-		int has_grh, void *data, u32 tlen, struct qib_qp *qp);
-
-int qib_alloc_lkey(struct qib_mregion *mr, int dma_region);
-
-void qib_free_lkey(struct qib_mregion *mr);
-
-int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
-		struct qib_sge *isge, struct ib_sge *sge, int acc);
-
-int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
-		u32 len, u64 vaddr, u32 rkey, int acc);
-
-int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			 struct ib_recv_wr **bad_wr);
-
-struct ib_srq *qib_create_srq(struct ib_pd *ibpd,
-			      struct ib_srq_init_attr *srq_init_attr,
-			      struct ib_udata *udata);
-
-int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		   enum ib_srq_attr_mask attr_mask,
-		   struct ib_udata *udata);
-
-int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
-
-int qib_destroy_srq(struct ib_srq *ibsrq);
-
-int qib_cq_init(struct qib_devdata *dd);
-
-void qib_cq_exit(struct qib_devdata *dd);
-
-void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig);
-
-int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-
-struct ib_cq *qib_create_cq(struct ib_device *ibdev,
-			    const struct ib_cq_init_attr *attr,
-			    struct ib_ucontext *context,
-			    struct ib_udata *udata);
-
-int qib_destroy_cq(struct ib_cq *ibcq);
-
-int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
-
-int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
-
-struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc);
-
-struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-			      u64 virt_addr, int mr_access_flags,
-			      struct ib_udata *udata);
-
-int qib_dereg_mr(struct ib_mr *ibmr);
-
-struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
-			   enum ib_mr_type mr_type,
-			   u32 max_entries);
-
-int qib_map_mr_sg(struct ib_mr *ibmr,
-		  struct scatterlist *sg,
-		  int sg_nents);
-
-int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr);
-
-struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-			     struct ib_fmr_attr *fmr_attr);
-
-int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
-		     int list_len, u64 iova);
-
-int qib_unmap_fmr(struct list_head *fmr_list);
-
-int qib_dealloc_fmr(struct ib_fmr *ibfmr);
-
-static inline void qib_get_mr(struct qib_mregion *mr)
-{
-	atomic_inc(&mr->refcount);
-}
+		int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
 
 void mr_rcu_callback(struct rcu_head *list);
 
-static inline void qib_put_mr(struct qib_mregion *mr)
-{
-	if (unlikely(atomic_dec_and_test(&mr->refcount)))
-		call_rcu(&mr->list, mr_rcu_callback);
-}
+int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only);
 
-static inline void qib_put_ss(struct qib_sge_state *ss)
-{
-	while (ss->num_sge) {
-		qib_put_mr(ss->sge.mr);
-		if (--ss->num_sge)
-			ss->sge = *ss->sg_list++;
-	}
-}
-
-
-void qib_release_mmap_info(struct kref *ref);
-
-struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size,
-					   struct ib_ucontext *context,
-					   void *obj);
-
-void qib_update_mmap_info(struct qib_ibdev *dev, struct qib_mmap_info *ip,
-			  u32 size, void *obj);
-
-int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
-
-int qib_get_rwqe(struct qib_qp *qp, int wr_id_only);
-
-void qib_migrate_qp(struct qib_qp *qp);
+void qib_migrate_qp(struct rvt_qp *qp);
 
 int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
-		      int has_grh, struct qib_qp *qp, u32 bth0);
+		      int has_grh, struct rvt_qp *qp, u32 bth0);
 
 u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
 		 struct ib_global_route *grh, u32 hwords, u32 nwords);
 
-void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
+void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr,
 			 u32 bth0, u32 bth2);
 
-void qib_do_send(struct work_struct *work);
+void _qib_do_send(struct work_struct *work);
 
-void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
+void qib_do_send(struct rvt_qp *qp);
+
+void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
 		       enum ib_wc_status status);
 
-void qib_send_rc_ack(struct qib_qp *qp);
+void qib_send_rc_ack(struct rvt_qp *qp);
 
-int qib_make_rc_req(struct qib_qp *qp);
+int qib_make_rc_req(struct rvt_qp *qp);
 
-int qib_make_uc_req(struct qib_qp *qp);
+int qib_make_uc_req(struct rvt_qp *qp);
 
-int qib_make_ud_req(struct qib_qp *qp);
+int qib_make_ud_req(struct rvt_qp *qp);
 
 int qib_register_ib_device(struct qib_devdata *);
 
@@ -1150,11 +468,11 @@
 #define IB_PHYSPORTSTATE_CFG_ENH 0x10
 #define IB_PHYSPORTSTATE_CFG_WAIT_ENH 0x13
 
-extern const int ib_qib_state_ops[];
+extern const int ib_rvt_state_ops[];
 
 extern __be64 ib_qib_sys_image_guid;    /* in network order */
 
-extern unsigned int ib_qib_lkey_table_size;
+extern unsigned int ib_rvt_lkey_table_size;
 
 extern unsigned int ib_qib_max_cqes;
 
@@ -1178,6 +496,4 @@
 
 extern const u32 ib_qib_rnr_table[];
 
-extern struct ib_dma_mapping_ops qib_dma_mapping_ops;
-
 #endif                          /* QIB_VERBS_H */

diff --git a/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/drivers/infiniband/hw/qib/qib_verbs_mcast.c
deleted file mode 100644
index b2fb528..0000000
--- a/drivers/infiniband/hw/qib/qib_verbs_mcast.c
+++ /dev/null

@@ -1,363 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/rculist.h>
-
-#include "qib.h"
-
-/**
- * qib_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
- * @qp: the QP to link
- */
-static struct qib_mcast_qp *qib_mcast_qp_alloc(struct qib_qp *qp)
-{
-	struct qib_mcast_qp *mqp;
-
-	mqp = kmalloc(sizeof(*mqp), GFP_KERNEL);
-	if (!mqp)
-		goto bail;
-
-	mqp->qp = qp;
-	atomic_inc(&qp->refcount);
-
-bail:
-	return mqp;
-}
-
-static void qib_mcast_qp_free(struct qib_mcast_qp *mqp)
-{
-	struct qib_qp *qp = mqp->qp;
-
-	/* Notify qib_destroy_qp() if it is waiting. */
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
-
-	kfree(mqp);
-}
-
-/**
- * qib_mcast_alloc - allocate the multicast GID structure
- * @mgid: the multicast GID
- *
- * A list of QPs will be attached to this structure.
- */
-static struct qib_mcast *qib_mcast_alloc(union ib_gid *mgid)
-{
-	struct qib_mcast *mcast;
-
-	mcast = kmalloc(sizeof(*mcast), GFP_KERNEL);
-	if (!mcast)
-		goto bail;
-
-	mcast->mgid = *mgid;
-	INIT_LIST_HEAD(&mcast->qp_list);
-	init_waitqueue_head(&mcast->wait);
-	atomic_set(&mcast->refcount, 0);
-	mcast->n_attached = 0;
-
-bail:
-	return mcast;
-}
-
-static void qib_mcast_free(struct qib_mcast *mcast)
-{
-	struct qib_mcast_qp *p, *tmp;
-
-	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
-		qib_mcast_qp_free(p);
-
-	kfree(mcast);
-}
-
-/**
- * qib_mcast_find - search the global table for the given multicast GID
- * @ibp: the IB port structure
- * @mgid: the multicast GID to search for
- *
- * Returns NULL if not found.
- *
- * The caller is responsible for decrementing the reference count if found.
- */
-struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid)
-{
-	struct rb_node *n;
-	unsigned long flags;
-	struct qib_mcast *mcast;
-
-	spin_lock_irqsave(&ibp->lock, flags);
-	n = ibp->mcast_tree.rb_node;
-	while (n) {
-		int ret;
-
-		mcast = rb_entry(n, struct qib_mcast, rb_node);
-
-		ret = memcmp(mgid->raw, mcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0)
-			n = n->rb_left;
-		else if (ret > 0)
-			n = n->rb_right;
-		else {
-			atomic_inc(&mcast->refcount);
-			spin_unlock_irqrestore(&ibp->lock, flags);
-			goto bail;
-		}
-	}
-	spin_unlock_irqrestore(&ibp->lock, flags);
-
-	mcast = NULL;
-
-bail:
-	return mcast;
-}
-
-/**
- * qib_mcast_add - insert mcast GID into table and attach QP struct
- * @mcast: the mcast GID table
- * @mqp: the QP to attach
- *
- * Return zero if both were added.  Return EEXIST if the GID was already in
- * the table but the QP was added.  Return ESRCH if the QP was already
- * attached and neither structure was added.
- */
-static int qib_mcast_add(struct qib_ibdev *dev, struct qib_ibport *ibp,
-			 struct qib_mcast *mcast, struct qib_mcast_qp *mqp)
-{
-	struct rb_node **n = &ibp->mcast_tree.rb_node;
-	struct rb_node *pn = NULL;
-	int ret;
-
-	spin_lock_irq(&ibp->lock);
-
-	while (*n) {
-		struct qib_mcast *tmcast;
-		struct qib_mcast_qp *p;
-
-		pn = *n;
-		tmcast = rb_entry(pn, struct qib_mcast, rb_node);
-
-		ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0) {
-			n = &pn->rb_left;
-			continue;
-		}
-		if (ret > 0) {
-			n = &pn->rb_right;
-			continue;
-		}
-
-		/* Search the QP list to see if this is already there. */
-		list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
-			if (p->qp == mqp->qp) {
-				ret = ESRCH;
-				goto bail;
-			}
-		}
-		if (tmcast->n_attached == ib_qib_max_mcast_qp_attached) {
-			ret = ENOMEM;
-			goto bail;
-		}
-
-		tmcast->n_attached++;
-
-		list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
-		ret = EEXIST;
-		goto bail;
-	}
-
-	spin_lock(&dev->n_mcast_grps_lock);
-	if (dev->n_mcast_grps_allocated == ib_qib_max_mcast_grps) {
-		spin_unlock(&dev->n_mcast_grps_lock);
-		ret = ENOMEM;
-		goto bail;
-	}
-
-	dev->n_mcast_grps_allocated++;
-	spin_unlock(&dev->n_mcast_grps_lock);
-
-	mcast->n_attached++;
-
-	list_add_tail_rcu(&mqp->list, &mcast->qp_list);
-
-	atomic_inc(&mcast->refcount);
-	rb_link_node(&mcast->rb_node, pn, n);
-	rb_insert_color(&mcast->rb_node, &ibp->mcast_tree);
-
-	ret = 0;
-
-bail:
-	spin_unlock_irq(&ibp->lock);
-
-	return ret;
-}
-
-int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	struct qib_qp *qp = to_iqp(ibqp);
-	struct qib_ibdev *dev = to_idev(ibqp->device);
-	struct qib_ibport *ibp;
-	struct qib_mcast *mcast;
-	struct qib_mcast_qp *mqp;
-	int ret;
-
-	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/*
-	 * Allocate data structures since its better to do this outside of
-	 * spin locks and it will most likely be needed.
-	 */
-	mcast = qib_mcast_alloc(gid);
-	if (mcast == NULL) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-	mqp = qib_mcast_qp_alloc(qp);
-	if (mqp == NULL) {
-		qib_mcast_free(mcast);
-		ret = -ENOMEM;
-		goto bail;
-	}
-	ibp = to_iport(ibqp->device, qp->port_num);
-	switch (qib_mcast_add(dev, ibp, mcast, mqp)) {
-	case ESRCH:
-		/* Neither was used: OK to attach the same QP twice. */
-		qib_mcast_qp_free(mqp);
-		qib_mcast_free(mcast);
-		break;
-
-	case EEXIST:            /* The mcast wasn't used */
-		qib_mcast_free(mcast);
-		break;
-
-	case ENOMEM:
-		/* Exceeded the maximum number of mcast groups. */
-		qib_mcast_qp_free(mqp);
-		qib_mcast_free(mcast);
-		ret = -ENOMEM;
-		goto bail;
-
-	default:
-		break;
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	struct qib_qp *qp = to_iqp(ibqp);
-	struct qib_ibdev *dev = to_idev(ibqp->device);
-	struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num);
-	struct qib_mcast *mcast = NULL;
-	struct qib_mcast_qp *p, *tmp, *delp = NULL;
-	struct rb_node *n;
-	int last = 0;
-	int ret;
-
-	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET)
-		return -EINVAL;
-
-	spin_lock_irq(&ibp->lock);
-
-	/* Find the GID in the mcast table. */
-	n = ibp->mcast_tree.rb_node;
-	while (1) {
-		if (n == NULL) {
-			spin_unlock_irq(&ibp->lock);
-			return -EINVAL;
-		}
-
-		mcast = rb_entry(n, struct qib_mcast, rb_node);
-		ret = memcmp(gid->raw, mcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0)
-			n = n->rb_left;
-		else if (ret > 0)
-			n = n->rb_right;
-		else
-			break;
-	}
-
-	/* Search the QP list. */
-	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
-		if (p->qp != qp)
-			continue;
-		/*
-		 * We found it, so remove it, but don't poison the forward
-		 * link until we are sure there are no list walkers.
-		 */
-		list_del_rcu(&p->list);
-		mcast->n_attached--;
-		delp = p;
-
-		/* If this was the last attached QP, remove the GID too. */
-		if (list_empty(&mcast->qp_list)) {
-			rb_erase(&mcast->rb_node, &ibp->mcast_tree);
-			last = 1;
-		}
-		break;
-	}
-
-	spin_unlock_irq(&ibp->lock);
-	/* QP not attached */
-	if (!delp)
-		return -EINVAL;
-	/*
-	 * Wait for any list walkers to finish before freeing the
-	 * list element.
-	 */
-	wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
-	qib_mcast_qp_free(delp);
-
-	if (last) {
-		atomic_dec(&mcast->refcount);
-		wait_event(mcast->wait, !atomic_read(&mcast->refcount));
-		qib_mcast_free(mcast);
-		spin_lock_irq(&dev->n_mcast_grps_lock);
-		dev->n_mcast_grps_allocated--;
-		spin_unlock_irq(&dev->n_mcast_grps_lock);
-	}
-	return 0;
-}
-
-int qib_mcast_tree_empty(struct qib_ibport *ibp)
-{
-	return ibp->mcast_tree.rb_node == NULL;
-}

diff --git a/drivers/infiniband/sw/Makefile b/drivers/infiniband/sw/Makefile
new file mode 100644
index 0000000..988b6a0
--- /dev/null
+++ b/drivers/infiniband/sw/Makefile

@@ -0,0 +1 @@
+obj-$(CONFIG_INFINIBAND_RDMAVT)		+= rdmavt/

diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig
new file mode 100644
index 0000000..11aa6a3
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/Kconfig

@@ -0,0 +1,6 @@
+config INFINIBAND_RDMAVT
+	tristate "RDMA verbs transport library"
+	depends on 64BIT
+	default m
+	---help---
+	This is a common software verbs provider for RDMA networks.

diff --git a/drivers/infiniband/sw/rdmavt/Makefile b/drivers/infiniband/sw/rdmavt/Makefile
new file mode 100644
index 0000000..ccaa799
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/Makefile

@@ -0,0 +1,13 @@
+#
+# rdmavt driver
+#
+#
+#
+# Called from the kernel module build system.
+#
+obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o
+
+rdmavt-y := vt.o ah.o cq.o dma.o mad.o mcast.o mmap.o mr.o pd.o qp.o srq.o \
+	trace.o
+
+CFLAGS_trace.o = -I$(src)

diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
new file mode 100644
index 0000000..16c4461
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/ah.c

@@ -0,0 +1,196 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/slab.h>
+#include "ah.h"
+#include "vt.h" /* for prints */
+
+/**
+ * rvt_check_ah - validate the attributes of AH
+ * @ibdev: the ib device
+ * @ah_attr: the attributes of the AH
+ *
+ * If driver supports a more detailed check_ah function call back to it
+ * otherwise just check the basics.
+ *
+ * Return: 0 on success
+ */
+int rvt_check_ah(struct ib_device *ibdev,
+		 struct ib_ah_attr *ah_attr)
+{
+	int err;
+	struct ib_port_attr port_attr;
+	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+	enum rdma_link_layer link = rdma_port_get_link_layer(ibdev,
+							     ah_attr->port_num);
+
+	err = ib_query_port(ibdev, ah_attr->port_num, &port_attr);
+	if (err)
+		return -EINVAL;
+	if (ah_attr->port_num < 1 ||
+	    ah_attr->port_num > ibdev->phys_port_cnt)
+		return -EINVAL;
+	if (ah_attr->static_rate != IB_RATE_PORT_CURRENT &&
+	    ib_rate_to_mbps(ah_attr->static_rate) < 0)
+		return -EINVAL;
+	if ((ah_attr->ah_flags & IB_AH_GRH) &&
+	    ah_attr->grh.sgid_index >= port_attr.gid_tbl_len)
+		return -EINVAL;
+	if (link != IB_LINK_LAYER_ETHERNET) {
+		if (ah_attr->dlid == 0)
+			return -EINVAL;
+		if (ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) &&
+		    ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) &&
+		    !(ah_attr->ah_flags & IB_AH_GRH))
+			return -EINVAL;
+	}
+	if (rdi->driver_f.check_ah)
+		return rdi->driver_f.check_ah(ibdev, ah_attr);
+	return 0;
+}
+EXPORT_SYMBOL(rvt_check_ah);
+
+/**
+ * rvt_create_ah - create an address handle
+ * @pd: the protection domain
+ * @ah_attr: the attributes of the AH
+ *
+ * This may be called from interrupt context.
+ *
+ * Return: newly allocated ah
+ */
+struct ib_ah *rvt_create_ah(struct ib_pd *pd,
+			    struct ib_ah_attr *ah_attr)
+{
+	struct rvt_ah *ah;
+	struct rvt_dev_info *dev = ib_to_rvt(pd->device);
+	unsigned long flags;
+
+	if (rvt_check_ah(pd->device, ah_attr))
+		return ERR_PTR(-EINVAL);
+
+	ah = kmalloc(sizeof(*ah), GFP_ATOMIC);
+	if (!ah)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_irqsave(&dev->n_ahs_lock, flags);
+	if (dev->n_ahs_allocated == dev->dparms.props.max_ah) {
+		spin_unlock(&dev->n_ahs_lock);
+		kfree(ah);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	dev->n_ahs_allocated++;
+	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+
+	ah->attr = *ah_attr;
+	atomic_set(&ah->refcount, 0);
+
+	if (dev->driver_f.notify_new_ah)
+		dev->driver_f.notify_new_ah(pd->device, ah_attr, ah);
+
+	return &ah->ibah;
+}
+
+/**
+ * rvt_destory_ah - Destory an address handle
+ * @ibah: address handle
+ *
+ * Return: 0 on success
+ */
+int rvt_destroy_ah(struct ib_ah *ibah)
+{
+	struct rvt_dev_info *dev = ib_to_rvt(ibah->device);
+	struct rvt_ah *ah = ibah_to_rvtah(ibah);
+	unsigned long flags;
+
+	if (atomic_read(&ah->refcount) != 0)
+		return -EBUSY;
+
+	spin_lock_irqsave(&dev->n_ahs_lock, flags);
+	dev->n_ahs_allocated--;
+	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+
+	kfree(ah);
+
+	return 0;
+}
+
+/**
+ * rvt_modify_ah - modify an ah with given attrs
+ * @ibah: address handle to modify
+ * @ah_attr: attrs to apply
+ *
+ * Return: 0 on success
+ */
+int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+{
+	struct rvt_ah *ah = ibah_to_rvtah(ibah);
+
+	if (rvt_check_ah(ibah->device, ah_attr))
+		return -EINVAL;
+
+	ah->attr = *ah_attr;
+
+	return 0;
+}
+
+/**
+ * rvt_query_ah - return attrs for ah
+ * @ibah: address handle to query
+ * @ah_attr: return info in this
+ *
+ * Return: always 0
+ */
+int rvt_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+{
+	struct rvt_ah *ah = ibah_to_rvtah(ibah);
+
+	*ah_attr = ah->attr;
+
+	return 0;
+}

diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h
new file mode 100644
index 0000000..e9c36be
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/ah.h

@@ -0,0 +1,59 @@
+#ifndef DEF_RVTAH_H
+#define DEF_RVTAH_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+struct ib_ah *rvt_create_ah(struct ib_pd *pd,
+			    struct ib_ah_attr *ah_attr);
+int rvt_destroy_ah(struct ib_ah *ibah);
+int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+int rvt_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+
+#endif          /* DEF_RVTAH_H */

diff --git a/drivers/staging/rdma/hfi1/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
similarity index 72%
rename from drivers/staging/rdma/hfi1/cq.c
rename to drivers/infiniband/sw/rdmavt/cq.c
index 4f046ff..b1ffc8b 100644
--- a/drivers/staging/rdma/hfi1/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -48,25 +45,23 @@
  *
  */
 
-#include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/kthread.h>
-
-#include "verbs.h"
-#include "hfi.h"
+#include "cq.h"
+#include "vt.h"
 
 /**
- * hfi1_cq_enter - add a new entry to the completion queue
+ * rvt_cq_enter - add a new entry to the completion queue
  * @cq: completion queue
  * @entry: work completion entry to add
- * @sig: true if @entry is a solicited entry
+ * @sig: true if @entry is solicited
  *
  * This may be called with qp->s_lock held.
  */
-void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int solicited)
+void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
 {
-	struct hfi1_cq_wc *wc;
+	struct rvt_cq_wc *wc;
 	unsigned long flags;
 	u32 head;
 	u32 next;
@@ -79,11 +74,13 @@
 	 */
 	wc = cq->queue;
 	head = wc->head;
-	if (head >= (unsigned) cq->ibcq.cqe) {
+	if (head >= (unsigned)cq->ibcq.cqe) {
 		head = cq->ibcq.cqe;
 		next = 0;
-	} else
+	} else {
 		next = head + 1;
+	}
+
 	if (unlikely(next == wc->tail)) {
 		spin_unlock_irqrestore(&cq->lock, flags);
 		if (cq->ibcq.event_handler) {
@@ -114,8 +111,9 @@
 		wc->uqueue[head].port_num = entry->port_num;
 		/* Make sure entry is written before the head index. */
 		smp_wmb();
-	} else
+	} else {
 		wc->kqueue[head] = *entry;
+	}
 	wc->head = next;
 
 	if (cq->notify == IB_CQ_NEXT_COMP ||
@@ -126,10 +124,10 @@
 		 * This will cause send_complete() to be called in
 		 * another thread.
 		 */
-		smp_read_barrier_depends(); /* see hfi1_cq_exit */
-		worker = cq->dd->worker;
+		smp_read_barrier_depends(); /* see rvt_cq_exit */
+		worker = cq->rdi->worker;
 		if (likely(worker)) {
-			cq->notify = IB_CQ_NONE;
+			cq->notify = RVT_CQ_NONE;
 			cq->triggered++;
 			queue_kthread_work(worker, &cq->comptask);
 		}
@@ -137,59 +135,11 @@
 
 	spin_unlock_irqrestore(&cq->lock, flags);
 }
-
-/**
- * hfi1_poll_cq - poll for work completion entries
- * @ibcq: the completion queue to poll
- * @num_entries: the maximum number of entries to return
- * @entry: pointer to array where work completions are placed
- *
- * Returns the number of completion entries polled.
- *
- * This may be called from interrupt context.  Also called by ib_poll_cq()
- * in the generic verbs code.
- */
-int hfi1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
-	struct hfi1_cq *cq = to_icq(ibcq);
-	struct hfi1_cq_wc *wc;
-	unsigned long flags;
-	int npolled;
-	u32 tail;
-
-	/* The kernel can only poll a kernel completion queue */
-	if (cq->ip) {
-		npolled = -EINVAL;
-		goto bail;
-	}
-
-	spin_lock_irqsave(&cq->lock, flags);
-
-	wc = cq->queue;
-	tail = wc->tail;
-	if (tail > (u32) cq->ibcq.cqe)
-		tail = (u32) cq->ibcq.cqe;
-	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
-		if (tail == wc->head)
-			break;
-		/* The kernel doesn't need a RMB since it has the lock. */
-		*entry = wc->kqueue[tail];
-		if (tail >= cq->ibcq.cqe)
-			tail = 0;
-		else
-			tail++;
-	}
-	wc->tail = tail;
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-
-bail:
-	return npolled;
-}
+EXPORT_SYMBOL(rvt_cq_enter);
 
 static void send_complete(struct kthread_work *work)
 {
-	struct hfi1_cq *cq = container_of(work, struct hfi1_cq, comptask);
+	struct rvt_cq *cq = container_of(work, struct rvt_cq, comptask);
 
 	/*
 	 * The completion handler will most likely rearm the notification
@@ -217,26 +167,25 @@
 }
 
 /**
- * hfi1_create_cq - create a completion queue
+ * rvt_create_cq - create a completion queue
  * @ibdev: the device this completion queue is attached to
  * @attr: creation attributes
- * @context: unused by the driver
+ * @context: unused by the QLogic_IB driver
  * @udata: user data for libibverbs.so
  *
- * Returns a pointer to the completion queue or negative errno values
- * for failure.
- *
  * Called by ib_create_cq() in the generic verbs code.
+ *
+ * Return: pointer to the completion queue or negative errno values
+ * for failure.
  */
-struct ib_cq *hfi1_create_cq(
-	struct ib_device *ibdev,
-	const struct ib_cq_init_attr *attr,
-	struct ib_ucontext *context,
-	struct ib_udata *udata)
+struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
+			    const struct ib_cq_init_attr *attr,
+			    struct ib_ucontext *context,
+			    struct ib_udata *udata)
 {
-	struct hfi1_ibdev *dev = to_idev(ibdev);
-	struct hfi1_cq *cq;
-	struct hfi1_cq_wc *wc;
+	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+	struct rvt_cq *cq;
+	struct rvt_cq_wc *wc;
 	struct ib_cq *ret;
 	u32 sz;
 	unsigned int entries = attr->cqe;
@@ -244,11 +193,11 @@
 	if (attr->flags)
 		return ERR_PTR(-EINVAL);
 
-	if (entries < 1 || entries > hfi1_max_cqes)
+	if (entries < 1 || entries > rdi->dparms.props.max_cqe)
 		return ERR_PTR(-EINVAL);
 
 	/* Allocate the completion queue structure. */
-	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
+	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
 	if (!cq)
 		return ERR_PTR(-ENOMEM);
 
@@ -272,12 +221,12 @@
 
 	/*
 	 * Return the address of the WC as the offset to mmap.
-	 * See hfi1_mmap() for details.
+	 * See rvt_mmap() for details.
 	 */
 	if (udata && udata->outlen >= sizeof(__u64)) {
 		int err;
 
-		cq->ip = hfi1_create_mmap_info(dev, sz, context, wc);
+		cq->ip = rvt_create_mmap_info(rdi, sz, context, wc);
 		if (!cq->ip) {
 			ret = ERR_PTR(-ENOMEM);
 			goto bail_wc;
@@ -289,23 +238,22 @@
 			ret = ERR_PTR(err);
 			goto bail_ip;
 		}
-	} else
-		cq->ip = NULL;
+	}
 
-	spin_lock(&dev->n_cqs_lock);
-	if (dev->n_cqs_allocated == hfi1_max_cqs) {
-		spin_unlock(&dev->n_cqs_lock);
+	spin_lock(&rdi->n_cqs_lock);
+	if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
+		spin_unlock(&rdi->n_cqs_lock);
 		ret = ERR_PTR(-ENOMEM);
 		goto bail_ip;
 	}
 
-	dev->n_cqs_allocated++;
-	spin_unlock(&dev->n_cqs_lock);
+	rdi->n_cqs_allocated++;
+	spin_unlock(&rdi->n_cqs_lock);
 
 	if (cq->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
+		spin_lock_irq(&rdi->pending_lock);
+		list_add(&cq->ip->pending_mmaps, &rdi->pending_mmaps);
+		spin_unlock_irq(&rdi->pending_lock);
 	}
 
 	/*
@@ -313,14 +261,11 @@
 	 * The number of entries should be >= the number requested or return
 	 * an error.
 	 */
-	cq->dd = dd_from_dev(dev);
+	cq->rdi = rdi;
 	cq->ibcq.cqe = entries;
-	cq->notify = IB_CQ_NONE;
-	cq->triggered = 0;
+	cq->notify = RVT_CQ_NONE;
 	spin_lock_init(&cq->lock);
 	init_kthread_work(&cq->comptask, send_complete);
-	wc->head = 0;
-	wc->tail = 0;
 	cq->queue = wc;
 
 	ret = &cq->ibcq;
@@ -338,24 +283,24 @@
 }
 
 /**
- * hfi1_destroy_cq - destroy a completion queue
+ * rvt_destroy_cq - destroy a completion queue
  * @ibcq: the completion queue to destroy.
  *
- * Returns 0 for success.
- *
  * Called by ib_destroy_cq() in the generic verbs code.
+ *
+ * Return: always 0
  */
-int hfi1_destroy_cq(struct ib_cq *ibcq)
+int rvt_destroy_cq(struct ib_cq *ibcq)
 {
-	struct hfi1_ibdev *dev = to_idev(ibcq->device);
-	struct hfi1_cq *cq = to_icq(ibcq);
+	struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
+	struct rvt_dev_info *rdi = cq->rdi;
 
 	flush_kthread_work(&cq->comptask);
-	spin_lock(&dev->n_cqs_lock);
-	dev->n_cqs_allocated--;
-	spin_unlock(&dev->n_cqs_lock);
+	spin_lock(&rdi->n_cqs_lock);
+	rdi->n_cqs_allocated--;
+	spin_unlock(&rdi->n_cqs_lock);
 	if (cq->ip)
-		kref_put(&cq->ip->ref, hfi1_release_mmap_info);
+		kref_put(&cq->ip->ref, rvt_release_mmap_info);
 	else
 		vfree(cq->queue);
 	kfree(cq);
@@ -364,18 +309,18 @@
 }
 
 /**
- * hfi1_req_notify_cq - change the notification type for a completion queue
+ * rvt_req_notify_cq - change the notification type for a completion queue
  * @ibcq: the completion queue
  * @notify_flags: the type of notification to request
  *
- * Returns 0 for success.
- *
  * This may be called from interrupt context.  Also called by
  * ib_req_notify_cq() in the generic verbs code.
+ *
+ * Return: 0 for success.
  */
-int hfi1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
+int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
 {
-	struct hfi1_cq *cq = to_icq(ibcq);
+	struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
 	unsigned long flags;
 	int ret = 0;
 
@@ -397,24 +342,23 @@
 }
 
 /**
- * hfi1_resize_cq - change the size of the CQ
+ * rvt_resize_cq - change the size of the CQ
  * @ibcq: the completion queue
  *
- * Returns 0 for success.
+ * Return: 0 for success.
  */
-int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
+int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 {
-	struct hfi1_cq *cq = to_icq(ibcq);
-	struct hfi1_cq_wc *old_wc;
-	struct hfi1_cq_wc *wc;
+	struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
+	struct rvt_cq_wc *old_wc;
+	struct rvt_cq_wc *wc;
 	u32 head, tail, n;
 	int ret;
 	u32 sz;
+	struct rvt_dev_info *rdi = cq->rdi;
 
-	if (cqe < 1 || cqe > hfi1_max_cqes) {
-		ret = -EINVAL;
-		goto bail;
-	}
+	if (cqe < 1 || cqe > rdi->dparms.props.max_cqe)
+		return -EINVAL;
 
 	/*
 	 * Need to use vmalloc() if we want to support large #s of entries.
@@ -425,10 +369,8 @@
 	else
 		sz += sizeof(struct ib_wc) * (cqe + 1);
 	wc = vmalloc_user(sz);
-	if (!wc) {
-		ret = -ENOMEM;
-		goto bail;
-	}
+	if (!wc)
+		return -ENOMEM;
 
 	/* Check that we can write the offset to mmap. */
 	if (udata && udata->outlen >= sizeof(__u64)) {
@@ -446,11 +388,11 @@
 	 */
 	old_wc = cq->queue;
 	head = old_wc->head;
-	if (head > (u32) cq->ibcq.cqe)
-		head = (u32) cq->ibcq.cqe;
+	if (head > (u32)cq->ibcq.cqe)
+		head = (u32)cq->ibcq.cqe;
 	tail = old_wc->tail;
-	if (tail > (u32) cq->ibcq.cqe)
-		tail = (u32) cq->ibcq.cqe;
+	if (tail > (u32)cq->ibcq.cqe)
+		tail = (u32)cq->ibcq.cqe;
 	if (head < tail)
 		n = cq->ibcq.cqe + 1 + head - tail;
 	else
@@ -464,7 +406,7 @@
 			wc->uqueue[n] = old_wc->uqueue[tail];
 		else
 			wc->kqueue[n] = old_wc->kqueue[tail];
-		if (tail == (u32) cq->ibcq.cqe)
+		if (tail == (u32)cq->ibcq.cqe)
 			tail = 0;
 		else
 			tail++;
@@ -478,80 +420,131 @@
 	vfree(old_wc);
 
 	if (cq->ip) {
-		struct hfi1_ibdev *dev = to_idev(ibcq->device);
-		struct hfi1_mmap_info *ip = cq->ip;
+		struct rvt_mmap_info *ip = cq->ip;
 
-		hfi1_update_mmap_info(dev, ip, sz, wc);
+		rvt_update_mmap_info(rdi, ip, sz, wc);
 
 		/*
 		 * Return the offset to mmap.
-		 * See hfi1_mmap() for details.
+		 * See rvt_mmap() for details.
 		 */
 		if (udata && udata->outlen >= sizeof(__u64)) {
 			ret = ib_copy_to_udata(udata, &ip->offset,
 					       sizeof(ip->offset));
 			if (ret)
-				goto bail;
+				return ret;
 		}
 
-		spin_lock_irq(&dev->pending_lock);
+		spin_lock_irq(&rdi->pending_lock);
 		if (list_empty(&ip->pending_mmaps))
-			list_add(&ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
+			list_add(&ip->pending_mmaps, &rdi->pending_mmaps);
+		spin_unlock_irq(&rdi->pending_lock);
 	}
 
-	ret = 0;
-	goto bail;
+	return 0;
 
 bail_unlock:
 	spin_unlock_irq(&cq->lock);
 bail_free:
 	vfree(wc);
-bail:
 	return ret;
 }
 
-int hfi1_cq_init(struct hfi1_devdata *dd)
+/**
+ * rvt_poll_cq - poll for work completion entries
+ * @ibcq: the completion queue to poll
+ * @num_entries: the maximum number of entries to return
+ * @entry: pointer to array where work completions are placed
+ *
+ * This may be called from interrupt context.  Also called by ib_poll_cq()
+ * in the generic verbs code.
+ *
+ * Return: the number of completion entries polled.
+ */
+int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
+{
+	struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
+	struct rvt_cq_wc *wc;
+	unsigned long flags;
+	int npolled;
+	u32 tail;
+
+	/* The kernel can only poll a kernel completion queue */
+	if (cq->ip)
+		return -EINVAL;
+
+	spin_lock_irqsave(&cq->lock, flags);
+
+	wc = cq->queue;
+	tail = wc->tail;
+	if (tail > (u32)cq->ibcq.cqe)
+		tail = (u32)cq->ibcq.cqe;
+	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
+		if (tail == wc->head)
+			break;
+		/* The kernel doesn't need a RMB since it has the lock. */
+		*entry = wc->kqueue[tail];
+		if (tail >= cq->ibcq.cqe)
+			tail = 0;
+		else
+			tail++;
+	}
+	wc->tail = tail;
+
+	spin_unlock_irqrestore(&cq->lock, flags);
+
+	return npolled;
+}
+
+/**
+ * rvt_driver_cq_init - Init cq resources on behalf of driver
+ * @rdi: rvt dev structure
+ *
+ * Return: 0 on success
+ */
+int rvt_driver_cq_init(struct rvt_dev_info *rdi)
 {
 	int ret = 0;
 	int cpu;
 	struct task_struct *task;
 
-	if (dd->worker)
+	if (rdi->worker)
 		return 0;
-	dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL);
-	if (!dd->worker)
+	rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL);
+	if (!rdi->worker)
 		return -ENOMEM;
-	init_kthread_worker(dd->worker);
+	init_kthread_worker(rdi->worker);
 	task = kthread_create_on_node(
 		kthread_worker_fn,
-		dd->worker,
-		dd->assigned_node_id,
-		"hfi1_cq%d", dd->unit);
-	if (IS_ERR(task))
-		goto task_fail;
-	cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id));
+		rdi->worker,
+		rdi->dparms.node,
+		"%s", rdi->dparms.cq_name);
+	if (IS_ERR(task)) {
+		kfree(rdi->worker);
+		rdi->worker = NULL;
+		return PTR_ERR(task);
+	}
+
+	cpu = cpumask_first(cpumask_of_node(rdi->dparms.node));
 	kthread_bind(task, cpu);
 	wake_up_process(task);
-out:
 	return ret;
-task_fail:
-	ret = PTR_ERR(task);
-	kfree(dd->worker);
-	dd->worker = NULL;
-	goto out;
 }
 
-void hfi1_cq_exit(struct hfi1_devdata *dd)
+/**
+ * rvt_cq_exit - tear down cq reources
+ * @rdi: rvt dev structure
+ */
+void rvt_cq_exit(struct rvt_dev_info *rdi)
 {
 	struct kthread_worker *worker;
 
-	worker = dd->worker;
+	worker = rdi->worker;
 	if (!worker)
 		return;
 	/* blocks future queuing from send_complete() */
-	dd->worker = NULL;
-	smp_wmb(); /* See hfi1_cq_enter */
+	rdi->worker = NULL;
+	smp_wmb(); /* See rdi_cq_enter */
 	flush_kthread_worker(worker);
 	kthread_stop(worker->task);
 	kfree(worker);

diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h
new file mode 100644
index 0000000..6182c29
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/cq.h

@@ -0,0 +1,64 @@
+#ifndef DEF_RVTCQ_H
+#define DEF_RVTCQ_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_cq.h>
+
+struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
+			    const struct ib_cq_init_attr *attr,
+			    struct ib_ucontext *context,
+			    struct ib_udata *udata);
+int rvt_destroy_cq(struct ib_cq *ibcq);
+int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
+int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
+int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
+int rvt_driver_cq_init(struct rvt_dev_info *rdi);
+void rvt_cq_exit(struct rvt_dev_info *rdi);
+#endif          /* DEF_RVTCQ_H */

diff --git a/drivers/infiniband/sw/rdmavt/dma.c b/drivers/infiniband/sw/rdmavt/dma.c
new file mode 100644
index 0000000..33076a5
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/dma.c

@@ -0,0 +1,184 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <linux/types.h>
+#include <linux/scatterlist.h>
+#include <rdma/ib_verbs.h>
+
+#include "dma.h"
+
+#define BAD_DMA_ADDRESS ((u64)0)
+
+/*
+ * The following functions implement driver specific replacements
+ * for the ib_dma_*() functions.
+ *
+ * These functions return kernel virtual addresses instead of
+ * device bus addresses since the driver uses the CPU to copy
+ * data instead of using hardware DMA.
+ */
+
+static int rvt_mapping_error(struct ib_device *dev, u64 dma_addr)
+{
+	return dma_addr == BAD_DMA_ADDRESS;
+}
+
+static u64 rvt_dma_map_single(struct ib_device *dev, void *cpu_addr,
+			      size_t size, enum dma_data_direction direction)
+{
+	if (WARN_ON(!valid_dma_direction(direction)))
+		return BAD_DMA_ADDRESS;
+
+	return (u64)cpu_addr;
+}
+
+static void rvt_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
+				 enum dma_data_direction direction)
+{
+	/* This is a stub, nothing to be done here */
+}
+
+static u64 rvt_dma_map_page(struct ib_device *dev, struct page *page,
+			    unsigned long offset, size_t size,
+			    enum dma_data_direction direction)
+{
+	u64 addr;
+
+	if (WARN_ON(!valid_dma_direction(direction)))
+		return BAD_DMA_ADDRESS;
+
+	if (offset + size > PAGE_SIZE)
+		return BAD_DMA_ADDRESS;
+
+	addr = (u64)page_address(page);
+	if (addr)
+		addr += offset;
+
+	return addr;
+}
+
+static void rvt_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
+			       enum dma_data_direction direction)
+{
+	/* This is a stub, nothing to be done here */
+}
+
+static int rvt_map_sg(struct ib_device *dev, struct scatterlist *sgl,
+		      int nents, enum dma_data_direction direction)
+{
+	struct scatterlist *sg;
+	u64 addr;
+	int i;
+	int ret = nents;
+
+	if (WARN_ON(!valid_dma_direction(direction)))
+		return 0;
+
+	for_each_sg(sgl, sg, nents, i) {
+		addr = (u64)page_address(sg_page(sg));
+		if (!addr) {
+			ret = 0;
+			break;
+		}
+		sg->dma_address = addr + sg->offset;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+		sg->dma_length = sg->length;
+#endif
+	}
+	return ret;
+}
+
+static void rvt_unmap_sg(struct ib_device *dev,
+			 struct scatterlist *sg, int nents,
+			 enum dma_data_direction direction)
+{
+	/* This is a stub, nothing to be done here */
+}
+
+static void rvt_sync_single_for_cpu(struct ib_device *dev, u64 addr,
+				    size_t size, enum dma_data_direction dir)
+{
+}
+
+static void rvt_sync_single_for_device(struct ib_device *dev, u64 addr,
+				       size_t size,
+				       enum dma_data_direction dir)
+{
+}
+
+static void *rvt_dma_alloc_coherent(struct ib_device *dev, size_t size,
+				    u64 *dma_handle, gfp_t flag)
+{
+	struct page *p;
+	void *addr = NULL;
+
+	p = alloc_pages(flag, get_order(size));
+	if (p)
+		addr = page_address(p);
+	if (dma_handle)
+		*dma_handle = (u64)addr;
+	return addr;
+}
+
+static void rvt_dma_free_coherent(struct ib_device *dev, size_t size,
+				  void *cpu_addr, u64 dma_handle)
+{
+	free_pages((unsigned long)cpu_addr, get_order(size));
+}
+
+struct ib_dma_mapping_ops rvt_default_dma_mapping_ops = {
+	.mapping_error = rvt_mapping_error,
+	.map_single = rvt_dma_map_single,
+	.unmap_single = rvt_dma_unmap_single,
+	.map_page = rvt_dma_map_page,
+	.unmap_page = rvt_dma_unmap_page,
+	.map_sg = rvt_map_sg,
+	.unmap_sg = rvt_unmap_sg,
+	.sync_single_for_cpu = rvt_sync_single_for_cpu,
+	.sync_single_for_device = rvt_sync_single_for_device,
+	.alloc_coherent = rvt_dma_alloc_coherent,
+	.free_coherent = rvt_dma_free_coherent
+};

diff --git a/drivers/infiniband/sw/rdmavt/dma.h b/drivers/infiniband/sw/rdmavt/dma.h
new file mode 100644
index 0000000..979f07e
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/dma.h

@@ -0,0 +1,53 @@
+#ifndef DEF_RDMAVTDMA_H
+#define DEF_RDMAVTDMA_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+extern struct ib_dma_mapping_ops rvt_default_dma_mapping_ops;
+
+#endif          /* DEF_RDMAVTDMA_H */

diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c
new file mode 100644
index 0000000..f6e9977
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/mad.c

@@ -0,0 +1,171 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/ib_mad.h>
+#include "mad.h"
+#include "vt.h"
+
+/**
+ * rvt_process_mad - process an incoming MAD packet
+ * @ibdev: the infiniband device this packet came in on
+ * @mad_flags: MAD flags
+ * @port_num: the port number this packet came in on, 1 based from ib core
+ * @in_wc: the work completion entry for this packet
+ * @in_grh: the global route header for this packet
+ * @in_mad: the incoming MAD
+ * @out_mad: any outgoing MAD reply
+ *
+ * Note that the verbs framework has already done the MAD sanity checks,
+ * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
+ * MADs.
+ *
+ * This is called by the ib_mad module.
+ *
+ * Return: IB_MAD_RESULT_SUCCESS or error
+ */
+int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+		    const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+		    const struct ib_mad_hdr *in, size_t in_mad_size,
+		    struct ib_mad_hdr *out, size_t *out_mad_size,
+		    u16 *out_mad_pkey_index)
+{
+	/*
+	 * MAD processing is quite different between hfi1 and qib. Therfore this
+	 * is expected to be provided by the driver. Other drivers in the future
+	 * may chose to implement this but it should not be made into a
+	 * requirement.
+	 */
+	if (ibport_num_to_idx(ibdev, port_num) < 0)
+		return -EINVAL;
+
+	return IB_MAD_RESULT_FAILURE;
+}
+
+static void rvt_send_mad_handler(struct ib_mad_agent *agent,
+				 struct ib_mad_send_wc *mad_send_wc)
+{
+	ib_free_send_mad(mad_send_wc->send_buf);
+}
+
+/**
+ * rvt_create_mad_agents - create mad agents
+ * @rdi: rvt dev struct
+ *
+ * If driver needs to be notified of mad agent creation then call back
+ *
+ * Return 0 on success
+ */
+int rvt_create_mad_agents(struct rvt_dev_info *rdi)
+{
+	struct ib_mad_agent *agent;
+	struct rvt_ibport *rvp;
+	int p;
+	int ret;
+
+	for (p = 0; p < rdi->dparms.nports; p++) {
+		rvp = rdi->ports[p];
+		agent = ib_register_mad_agent(&rdi->ibdev, p + 1,
+					      IB_QPT_SMI,
+					      NULL, 0, rvt_send_mad_handler,
+					      NULL, NULL, 0);
+		if (IS_ERR(agent)) {
+			ret = PTR_ERR(agent);
+			goto err;
+		}
+
+		rvp->send_agent = agent;
+
+		if (rdi->driver_f.notify_create_mad_agent)
+			rdi->driver_f.notify_create_mad_agent(rdi, p);
+	}
+
+	return 0;
+
+err:
+	for (p = 0; p < rdi->dparms.nports; p++) {
+		rvp = rdi->ports[p];
+		if (rvp->send_agent) {
+			agent = rvp->send_agent;
+			rvp->send_agent = NULL;
+			ib_unregister_mad_agent(agent);
+			if (rdi->driver_f.notify_free_mad_agent)
+				rdi->driver_f.notify_free_mad_agent(rdi, p);
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * rvt_free_mad_agents - free up mad agents
+ * @rdi: rvt dev struct
+ *
+ * If driver needs notification of mad agent removal make the call back
+ */
+void rvt_free_mad_agents(struct rvt_dev_info *rdi)
+{
+	struct ib_mad_agent *agent;
+	struct rvt_ibport *rvp;
+	int p;
+
+	for (p = 0; p < rdi->dparms.nports; p++) {
+		rvp = rdi->ports[p];
+		if (rvp->send_agent) {
+			agent = rvp->send_agent;
+			rvp->send_agent = NULL;
+			ib_unregister_mad_agent(agent);
+		}
+		if (rvp->sm_ah) {
+			ib_destroy_ah(&rvp->sm_ah->ibah);
+			rvp->sm_ah = NULL;
+		}
+
+		if (rdi->driver_f.notify_free_mad_agent)
+			rdi->driver_f.notify_free_mad_agent(rdi, p);
+	}
+}
+

diff --git a/drivers/infiniband/sw/rdmavt/mad.h b/drivers/infiniband/sw/rdmavt/mad.h
new file mode 100644
index 0000000..a9d6eec
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/mad.h

@@ -0,0 +1,60 @@
+#ifndef DEF_RVTMAD_H
+#define DEF_RVTMAD_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+		    const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+		    const struct ib_mad_hdr *in, size_t in_mad_size,
+		    struct ib_mad_hdr *out, size_t *out_mad_size,
+		    u16 *out_mad_pkey_index);
+int rvt_create_mad_agents(struct rvt_dev_info *rdi);
+void rvt_free_mad_agents(struct rvt_dev_info *rdi);
+#endif          /* DEF_RVTMAD_H */

diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c
new file mode 100644
index 0000000..983d319
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/mcast.c

@@ -0,0 +1,417 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/rculist.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_qp.h>
+
+#include "mcast.h"
+
+/**
+ * rvt_driver_mcast - init resources for multicast
+ * @rdi: rvt dev struct
+ *
+ * This is per device that registers with rdmavt
+ */
+void rvt_driver_mcast_init(struct rvt_dev_info *rdi)
+{
+	/*
+	 * Anything that needs setup for multicast on a per driver or per rdi
+	 * basis should be done in here.
+	 */
+	spin_lock_init(&rdi->n_mcast_grps_lock);
+}
+
+/**
+ * mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
+ * @qp: the QP to link
+ */
+static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp)
+{
+	struct rvt_mcast_qp *mqp;
+
+	mqp = kmalloc(sizeof(*mqp), GFP_KERNEL);
+	if (!mqp)
+		goto bail;
+
+	mqp->qp = qp;
+	atomic_inc(&qp->refcount);
+
+bail:
+	return mqp;
+}
+
+static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp)
+{
+	struct rvt_qp *qp = mqp->qp;
+
+	/* Notify hfi1_destroy_qp() if it is waiting. */
+	if (atomic_dec_and_test(&qp->refcount))
+		wake_up(&qp->wait);
+
+	kfree(mqp);
+}
+
+/**
+ * mcast_alloc - allocate the multicast GID structure
+ * @mgid: the multicast GID
+ *
+ * A list of QPs will be attached to this structure.
+ */
+static struct rvt_mcast *rvt_mcast_alloc(union ib_gid *mgid)
+{
+	struct rvt_mcast *mcast;
+
+	mcast = kzalloc(sizeof(*mcast), GFP_KERNEL);
+	if (!mcast)
+		goto bail;
+
+	mcast->mgid = *mgid;
+	INIT_LIST_HEAD(&mcast->qp_list);
+	init_waitqueue_head(&mcast->wait);
+	atomic_set(&mcast->refcount, 0);
+
+bail:
+	return mcast;
+}
+
+static void rvt_mcast_free(struct rvt_mcast *mcast)
+{
+	struct rvt_mcast_qp *p, *tmp;
+
+	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
+		rvt_mcast_qp_free(p);
+
+	kfree(mcast);
+}
+
+/**
+ * rvt_mcast_find - search the global table for the given multicast GID
+ * @ibp: the IB port structure
+ * @mgid: the multicast GID to search for
+ *
+ * The caller is responsible for decrementing the reference count if found.
+ *
+ * Return: NULL if not found.
+ */
+struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid)
+{
+	struct rb_node *n;
+	unsigned long flags;
+	struct rvt_mcast *found = NULL;
+
+	spin_lock_irqsave(&ibp->lock, flags);
+	n = ibp->mcast_tree.rb_node;
+	while (n) {
+		int ret;
+		struct rvt_mcast *mcast;
+
+		mcast = rb_entry(n, struct rvt_mcast, rb_node);
+
+		ret = memcmp(mgid->raw, mcast->mgid.raw,
+			     sizeof(union ib_gid));
+		if (ret < 0) {
+			n = n->rb_left;
+		} else if (ret > 0) {
+			n = n->rb_right;
+		} else {
+			atomic_inc(&mcast->refcount);
+			found = mcast;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&ibp->lock, flags);
+	return found;
+}
+EXPORT_SYMBOL(rvt_mcast_find);
+
+/**
+ * mcast_add - insert mcast GID into table and attach QP struct
+ * @mcast: the mcast GID table
+ * @mqp: the QP to attach
+ *
+ * Return: zero if both were added.  Return EEXIST if the GID was already in
+ * the table but the QP was added.  Return ESRCH if the QP was already
+ * attached and neither structure was added.
+ */
+static int rvt_mcast_add(struct rvt_dev_info *rdi, struct rvt_ibport *ibp,
+			 struct rvt_mcast *mcast, struct rvt_mcast_qp *mqp)
+{
+	struct rb_node **n = &ibp->mcast_tree.rb_node;
+	struct rb_node *pn = NULL;
+	int ret;
+
+	spin_lock_irq(&ibp->lock);
+
+	while (*n) {
+		struct rvt_mcast *tmcast;
+		struct rvt_mcast_qp *p;
+
+		pn = *n;
+		tmcast = rb_entry(pn, struct rvt_mcast, rb_node);
+
+		ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
+			     sizeof(union ib_gid));
+		if (ret < 0) {
+			n = &pn->rb_left;
+			continue;
+		}
+		if (ret > 0) {
+			n = &pn->rb_right;
+			continue;
+		}
+
+		/* Search the QP list to see if this is already there. */
+		list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
+			if (p->qp == mqp->qp) {
+				ret = ESRCH;
+				goto bail;
+			}
+		}
+		if (tmcast->n_attached ==
+		    rdi->dparms.props.max_mcast_qp_attach) {
+			ret = ENOMEM;
+			goto bail;
+		}
+
+		tmcast->n_attached++;
+
+		list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
+		ret = EEXIST;
+		goto bail;
+	}
+
+	spin_lock(&rdi->n_mcast_grps_lock);
+	if (rdi->n_mcast_grps_allocated == rdi->dparms.props.max_mcast_grp) {
+		spin_unlock(&rdi->n_mcast_grps_lock);
+		ret = ENOMEM;
+		goto bail;
+	}
+
+	rdi->n_mcast_grps_allocated++;
+	spin_unlock(&rdi->n_mcast_grps_lock);
+
+	mcast->n_attached++;
+
+	list_add_tail_rcu(&mqp->list, &mcast->qp_list);
+
+	atomic_inc(&mcast->refcount);
+	rb_link_node(&mcast->rb_node, pn, n);
+	rb_insert_color(&mcast->rb_node, &ibp->mcast_tree);
+
+	ret = 0;
+
+bail:
+	spin_unlock_irq(&ibp->lock);
+
+	return ret;
+}
+
+/**
+ * rvt_attach_mcast - attach a qp to a multicast group
+ * @ibqp: Infiniband qp
+ * @igd: multicast guid
+ * @lid: multicast lid
+ *
+ * Return: 0 on success
+ */
+int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+	struct rvt_ibport *ibp = rdi->ports[qp->port_num - 1];
+	struct rvt_mcast *mcast;
+	struct rvt_mcast_qp *mqp;
+	int ret = -ENOMEM;
+
+	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET)
+		return -EINVAL;
+
+	/*
+	 * Allocate data structures since its better to do this outside of
+	 * spin locks and it will most likely be needed.
+	 */
+	mcast = rvt_mcast_alloc(gid);
+	if (!mcast)
+		return -ENOMEM;
+
+	mqp = rvt_mcast_qp_alloc(qp);
+	if (!mqp)
+		goto bail_mcast;
+
+	switch (rvt_mcast_add(rdi, ibp, mcast, mqp)) {
+	case ESRCH:
+		/* Neither was used: OK to attach the same QP twice. */
+		ret = 0;
+		goto bail_mqp;
+	case EEXIST: /* The mcast wasn't used */
+		ret = 0;
+		goto bail_mcast;
+	case ENOMEM:
+		/* Exceeded the maximum number of mcast groups. */
+		ret = -ENOMEM;
+		goto bail_mqp;
+	default:
+		break;
+	}
+
+	return 0;
+
+bail_mqp:
+	rvt_mcast_qp_free(mqp);
+
+bail_mcast:
+	rvt_mcast_free(mcast);
+
+	return ret;
+}
+
+/**
+ * rvt_detach_mcast - remove a qp from a multicast group
+ * @ibqp: Infiniband qp
+ * @igd: multicast guid
+ * @lid: multicast lid
+ *
+ * Return: 0 on success
+ */
+int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+	struct rvt_ibport *ibp = rdi->ports[qp->port_num - 1];
+	struct rvt_mcast *mcast = NULL;
+	struct rvt_mcast_qp *p, *tmp, *delp = NULL;
+	struct rb_node *n;
+	int last = 0;
+	int ret = 0;
+
+	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET)
+		return -EINVAL;
+
+	spin_lock_irq(&ibp->lock);
+
+	/* Find the GID in the mcast table. */
+	n = ibp->mcast_tree.rb_node;
+	while (1) {
+		if (!n) {
+			spin_unlock_irq(&ibp->lock);
+			return -EINVAL;
+		}
+
+		mcast = rb_entry(n, struct rvt_mcast, rb_node);
+		ret = memcmp(gid->raw, mcast->mgid.raw,
+			     sizeof(union ib_gid));
+		if (ret < 0)
+			n = n->rb_left;
+		else if (ret > 0)
+			n = n->rb_right;
+		else
+			break;
+	}
+
+	/* Search the QP list. */
+	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
+		if (p->qp != qp)
+			continue;
+		/*
+		 * We found it, so remove it, but don't poison the forward
+		 * link until we are sure there are no list walkers.
+		 */
+		list_del_rcu(&p->list);
+		mcast->n_attached--;
+		delp = p;
+
+		/* If this was the last attached QP, remove the GID too. */
+		if (list_empty(&mcast->qp_list)) {
+			rb_erase(&mcast->rb_node, &ibp->mcast_tree);
+			last = 1;
+		}
+		break;
+	}
+
+	spin_unlock_irq(&ibp->lock);
+	/* QP not attached */
+	if (!delp)
+		return -EINVAL;
+
+	/*
+	 * Wait for any list walkers to finish before freeing the
+	 * list element.
+	 */
+	wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
+	rvt_mcast_qp_free(delp);
+
+	if (last) {
+		atomic_dec(&mcast->refcount);
+		wait_event(mcast->wait, !atomic_read(&mcast->refcount));
+		rvt_mcast_free(mcast);
+		spin_lock_irq(&rdi->n_mcast_grps_lock);
+		rdi->n_mcast_grps_allocated--;
+		spin_unlock_irq(&rdi->n_mcast_grps_lock);
+	}
+
+	return 0;
+}
+
+/**
+ *rvt_mast_tree_empty - determine if any qps are attached to any mcast group
+ *@rdi: rvt dev struct
+ *
+ * Return: in use count
+ */
+int rvt_mcast_tree_empty(struct rvt_dev_info *rdi)
+{
+	int i;
+	int in_use = 0;
+
+	for (i = 0; i < rdi->dparms.nports; i++)
+		if (rdi->ports[i]->mcast_tree.rb_node)
+			in_use++;
+	return in_use;
+}

diff --git a/drivers/infiniband/sw/rdmavt/mcast.h b/drivers/infiniband/sw/rdmavt/mcast.h
new file mode 100644
index 0000000..29f5792
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/mcast.h

@@ -0,0 +1,58 @@
+#ifndef DEF_RVTMCAST_H
+#define DEF_RVTMCAST_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+void rvt_driver_mcast_init(struct rvt_dev_info *rdi);
+int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+int rvt_mcast_tree_empty(struct rvt_dev_info *rdi);
+
+#endif          /* DEF_RVTMCAST_H */

diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c
new file mode 100644
index 0000000..e202b81
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/mmap.c

@@ -0,0 +1,208 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <asm/pgtable.h>
+#include "mmap.h"
+
+/**
+ * rvt_mmap_init - init link list and lock for mem map
+ * @rdi: rvt dev struct
+ */
+void rvt_mmap_init(struct rvt_dev_info *rdi)
+{
+	INIT_LIST_HEAD(&rdi->pending_mmaps);
+	spin_lock_init(&rdi->pending_lock);
+	rdi->mmap_offset = PAGE_SIZE;
+	spin_lock_init(&rdi->mmap_offset_lock);
+}
+
+/**
+ * rvt_release_mmap_info - free mmap info structure
+ * @ref: a pointer to the kref within struct rvt_mmap_info
+ */
+void rvt_release_mmap_info(struct kref *ref)
+{
+	struct rvt_mmap_info *ip =
+		container_of(ref, struct rvt_mmap_info, ref);
+	struct rvt_dev_info *rdi = ib_to_rvt(ip->context->device);
+
+	spin_lock_irq(&rdi->pending_lock);
+	list_del(&ip->pending_mmaps);
+	spin_unlock_irq(&rdi->pending_lock);
+
+	vfree(ip->obj);
+	kfree(ip);
+}
+
+static void rvt_vma_open(struct vm_area_struct *vma)
+{
+	struct rvt_mmap_info *ip = vma->vm_private_data;
+
+	kref_get(&ip->ref);
+}
+
+static void rvt_vma_close(struct vm_area_struct *vma)
+{
+	struct rvt_mmap_info *ip = vma->vm_private_data;
+
+	kref_put(&ip->ref, rvt_release_mmap_info);
+}
+
+static const struct vm_operations_struct rvt_vm_ops = {
+	.open = rvt_vma_open,
+	.close = rvt_vma_close,
+};
+
+/**
+ * rvt_mmap - create a new mmap region
+ * @context: the IB user context of the process making the mmap() call
+ * @vma: the VMA to be initialized
+ *
+ * Return: zero if the mmap is OK. Otherwise, return an errno.
+ */
+int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	struct rvt_dev_info *rdi = ib_to_rvt(context->device);
+	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long size = vma->vm_end - vma->vm_start;
+	struct rvt_mmap_info *ip, *pp;
+	int ret = -EINVAL;
+
+	/*
+	 * Search the device's list of objects waiting for a mmap call.
+	 * Normally, this list is very short since a call to create a
+	 * CQ, QP, or SRQ is soon followed by a call to mmap().
+	 */
+	spin_lock_irq(&rdi->pending_lock);
+	list_for_each_entry_safe(ip, pp, &rdi->pending_mmaps,
+				 pending_mmaps) {
+		/* Only the creator is allowed to mmap the object */
+		if (context != ip->context || (__u64)offset != ip->offset)
+			continue;
+		/* Don't allow a mmap larger than the object. */
+		if (size > ip->size)
+			break;
+
+		list_del_init(&ip->pending_mmaps);
+		spin_unlock_irq(&rdi->pending_lock);
+
+		ret = remap_vmalloc_range(vma, ip->obj, 0);
+		if (ret)
+			goto done;
+		vma->vm_ops = &rvt_vm_ops;
+		vma->vm_private_data = ip;
+		rvt_vma_open(vma);
+		goto done;
+	}
+	spin_unlock_irq(&rdi->pending_lock);
+done:
+	return ret;
+}
+
+/**
+ * rvt_create_mmap_info - allocate information for hfi1_mmap
+ * @rdi: rvt dev struct
+ * @size: size in bytes to map
+ * @context: user context
+ * @obj: opaque pointer to a cq, wq etc
+ *
+ * Return: rvt_mmap struct on success
+ */
+struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi,
+					   u32 size,
+					   struct ib_ucontext *context,
+					   void *obj)
+{
+	struct rvt_mmap_info *ip;
+
+	ip = kmalloc_node(sizeof(*ip), GFP_KERNEL, rdi->dparms.node);
+	if (!ip)
+		return ip;
+
+	size = PAGE_ALIGN(size);
+
+	spin_lock_irq(&rdi->mmap_offset_lock);
+	if (rdi->mmap_offset == 0)
+		rdi->mmap_offset = PAGE_SIZE;
+	ip->offset = rdi->mmap_offset;
+	rdi->mmap_offset += size;
+	spin_unlock_irq(&rdi->mmap_offset_lock);
+
+	INIT_LIST_HEAD(&ip->pending_mmaps);
+	ip->size = size;
+	ip->context = context;
+	ip->obj = obj;
+	kref_init(&ip->ref);
+
+	return ip;
+}
+
+/**
+ * rvt_update_mmap_info - update a mem map
+ * @rdi: rvt dev struct
+ * @ip: mmap info pointer
+ * @size: size to grow by
+ * @obj: opaque pointer to cq, wq, etc.
+ */
+void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip,
+			  u32 size, void *obj)
+{
+	size = PAGE_ALIGN(size);
+
+	spin_lock_irq(&rdi->mmap_offset_lock);
+	if (rdi->mmap_offset == 0)
+		rdi->mmap_offset = PAGE_SIZE;
+	ip->offset = rdi->mmap_offset;
+	rdi->mmap_offset += size;
+	spin_unlock_irq(&rdi->mmap_offset_lock);
+
+	ip->size = size;
+	ip->obj = obj;
+}

diff --git a/drivers/infiniband/sw/rdmavt/mmap.h b/drivers/infiniband/sw/rdmavt/mmap.h
new file mode 100644
index 0000000..fab0e7b
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/mmap.h

@@ -0,0 +1,63 @@
+#ifndef DEF_RDMAVTMMAP_H
+#define DEF_RDMAVTMMAP_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+void rvt_mmap_init(struct rvt_dev_info *rdi);
+void rvt_release_mmap_info(struct kref *ref);
+int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi,
+					   u32 size,
+					   struct ib_ucontext *context,
+					   void *obj);
+void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip,
+			  u32 size, void *obj);
+
+#endif          /* DEF_RDMAVTMMAP_H */

diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
new file mode 100644
index 0000000..0ff765b
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/mr.c

@@ -0,0 +1,830 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <rdma/ib_umem.h>
+#include <rdma/rdma_vt.h>
+#include "vt.h"
+#include "mr.h"
+
+/**
+ * rvt_driver_mr_init - Init MR resources per driver
+ * @rdi: rvt dev struct
+ *
+ * Do any intilization needed when a driver registers with rdmavt.
+ *
+ * Return: 0 on success or errno on failure
+ */
+int rvt_driver_mr_init(struct rvt_dev_info *rdi)
+{
+	unsigned int lkey_table_size = rdi->dparms.lkey_table_size;
+	unsigned lk_tab_size;
+	int i;
+
+	/*
+	 * The top hfi1_lkey_table_size bits are used to index the
+	 * table.  The lower 8 bits can be owned by the user (copied from
+	 * the LKEY).  The remaining bits act as a generation number or tag.
+	 */
+	if (!lkey_table_size)
+		return -EINVAL;
+
+	spin_lock_init(&rdi->lkey_table.lock);
+
+	/* ensure generation is at least 4 bits */
+	if (lkey_table_size > RVT_MAX_LKEY_TABLE_BITS) {
+		rvt_pr_warn(rdi, "lkey bits %u too large, reduced to %u\n",
+			    lkey_table_size, RVT_MAX_LKEY_TABLE_BITS);
+		rdi->dparms.lkey_table_size = RVT_MAX_LKEY_TABLE_BITS;
+		lkey_table_size = rdi->dparms.lkey_table_size;
+	}
+	rdi->lkey_table.max = 1 << lkey_table_size;
+	lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table);
+	rdi->lkey_table.table = (struct rvt_mregion __rcu **)
+			       vmalloc_node(lk_tab_size, rdi->dparms.node);
+	if (!rdi->lkey_table.table)
+		return -ENOMEM;
+
+	RCU_INIT_POINTER(rdi->dma_mr, NULL);
+	for (i = 0; i < rdi->lkey_table.max; i++)
+		RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL);
+
+	return 0;
+}
+
+/**
+ *rvt_mr_exit: clean up MR
+ *@rdi: rvt dev structure
+ *
+ * called when drivers have unregistered or perhaps failed to register with us
+ */
+void rvt_mr_exit(struct rvt_dev_info *rdi)
+{
+	if (rdi->dma_mr)
+		rvt_pr_err(rdi, "DMA MR not null!\n");
+
+	vfree(rdi->lkey_table.table);
+}
+
+static void rvt_deinit_mregion(struct rvt_mregion *mr)
+{
+	int i = mr->mapsz;
+
+	mr->mapsz = 0;
+	while (i)
+		kfree(mr->map[--i]);
+}
+
+static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd,
+			    int count)
+{
+	int m, i = 0;
+
+	mr->mapsz = 0;
+	m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
+	for (; i < m; i++) {
+		mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL);
+		if (!mr->map[i]) {
+			rvt_deinit_mregion(mr);
+			return -ENOMEM;
+		}
+		mr->mapsz++;
+	}
+	init_completion(&mr->comp);
+	/* count returning the ptr to user */
+	atomic_set(&mr->refcount, 1);
+	mr->pd = pd;
+	mr->max_segs = count;
+	return 0;
+}
+
+/**
+ * rvt_alloc_lkey - allocate an lkey
+ * @mr: memory region that this lkey protects
+ * @dma_region: 0->normal key, 1->restricted DMA key
+ *
+ * Returns 0 if successful, otherwise returns -errno.
+ *
+ * Increments mr reference count as required.
+ *
+ * Sets the lkey field mr for non-dma regions.
+ *
+ */
+static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
+{
+	unsigned long flags;
+	u32 r;
+	u32 n;
+	int ret = 0;
+	struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device);
+	struct rvt_lkey_table *rkt = &dev->lkey_table;
+
+	rvt_get_mr(mr);
+	spin_lock_irqsave(&rkt->lock, flags);
+
+	/* special case for dma_mr lkey == 0 */
+	if (dma_region) {
+		struct rvt_mregion *tmr;
+
+		tmr = rcu_access_pointer(dev->dma_mr);
+		if (!tmr) {
+			rcu_assign_pointer(dev->dma_mr, mr);
+			mr->lkey_published = 1;
+		} else {
+			rvt_put_mr(mr);
+		}
+		goto success;
+	}
+
+	/* Find the next available LKEY */
+	r = rkt->next;
+	n = r;
+	for (;;) {
+		if (!rcu_access_pointer(rkt->table[r]))
+			break;
+		r = (r + 1) & (rkt->max - 1);
+		if (r == n)
+			goto bail;
+	}
+	rkt->next = (r + 1) & (rkt->max - 1);
+	/*
+	 * Make sure lkey is never zero which is reserved to indicate an
+	 * unrestricted LKEY.
+	 */
+	rkt->gen++;
+	/*
+	 * bits are capped to ensure enough bits for generation number
+	 */
+	mr->lkey = (r << (32 - dev->dparms.lkey_table_size)) |
+		((((1 << (24 - dev->dparms.lkey_table_size)) - 1) & rkt->gen)
+		 << 8);
+	if (mr->lkey == 0) {
+		mr->lkey |= 1 << 8;
+		rkt->gen++;
+	}
+	rcu_assign_pointer(rkt->table[r], mr);
+	mr->lkey_published = 1;
+success:
+	spin_unlock_irqrestore(&rkt->lock, flags);
+out:
+	return ret;
+bail:
+	rvt_put_mr(mr);
+	spin_unlock_irqrestore(&rkt->lock, flags);
+	ret = -ENOMEM;
+	goto out;
+}
+
+/**
+ * rvt_free_lkey - free an lkey
+ * @mr: mr to free from tables
+ */
+static void rvt_free_lkey(struct rvt_mregion *mr)
+{
+	unsigned long flags;
+	u32 lkey = mr->lkey;
+	u32 r;
+	struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device);
+	struct rvt_lkey_table *rkt = &dev->lkey_table;
+	int freed = 0;
+
+	spin_lock_irqsave(&rkt->lock, flags);
+	if (!mr->lkey_published)
+		goto out;
+	if (lkey == 0) {
+		RCU_INIT_POINTER(dev->dma_mr, NULL);
+	} else {
+		r = lkey >> (32 - dev->dparms.lkey_table_size);
+		RCU_INIT_POINTER(rkt->table[r], NULL);
+	}
+	mr->lkey_published = 0;
+	freed++;
+out:
+	spin_unlock_irqrestore(&rkt->lock, flags);
+	if (freed) {
+		synchronize_rcu();
+		rvt_put_mr(mr);
+	}
+}
+
+static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd)
+{
+	struct rvt_mr *mr;
+	int rval = -ENOMEM;
+	int m;
+
+	/* Allocate struct plus pointers to first level page tables. */
+	m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
+	mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL);
+	if (!mr)
+		goto bail;
+
+	rval = rvt_init_mregion(&mr->mr, pd, count);
+	if (rval)
+		goto bail;
+	/*
+	 * ib_reg_phys_mr() will initialize mr->ibmr except for
+	 * lkey and rkey.
+	 */
+	rval = rvt_alloc_lkey(&mr->mr, 0);
+	if (rval)
+		goto bail_mregion;
+	mr->ibmr.lkey = mr->mr.lkey;
+	mr->ibmr.rkey = mr->mr.lkey;
+done:
+	return mr;
+
+bail_mregion:
+	rvt_deinit_mregion(&mr->mr);
+bail:
+	kfree(mr);
+	mr = ERR_PTR(rval);
+	goto done;
+}
+
+static void __rvt_free_mr(struct rvt_mr *mr)
+{
+	rvt_deinit_mregion(&mr->mr);
+	rvt_free_lkey(&mr->mr);
+	vfree(mr);
+}
+
+/**
+ * rvt_get_dma_mr - get a DMA memory region
+ * @pd: protection domain for this memory region
+ * @acc: access flags
+ *
+ * Return: the memory region on success, otherwise returns an errno.
+ * Note that all DMA addresses should be created via the
+ * struct ib_dma_mapping_ops functions (see dma.c).
+ */
+struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
+{
+	struct rvt_mr *mr;
+	struct ib_mr *ret;
+	int rval;
+
+	if (ibpd_to_rvtpd(pd)->user)
+		return ERR_PTR(-EPERM);
+
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr) {
+		ret = ERR_PTR(-ENOMEM);
+		goto bail;
+	}
+
+	rval = rvt_init_mregion(&mr->mr, pd, 0);
+	if (rval) {
+		ret = ERR_PTR(rval);
+		goto bail;
+	}
+
+	rval = rvt_alloc_lkey(&mr->mr, 1);
+	if (rval) {
+		ret = ERR_PTR(rval);
+		goto bail_mregion;
+	}
+
+	mr->mr.access_flags = acc;
+	ret = &mr->ibmr;
+done:
+	return ret;
+
+bail_mregion:
+	rvt_deinit_mregion(&mr->mr);
+bail:
+	kfree(mr);
+	goto done;
+}
+
+/**
+ * rvt_reg_user_mr - register a userspace memory region
+ * @pd: protection domain for this memory region
+ * @start: starting userspace address
+ * @length: length of region to register
+ * @mr_access_flags: access flags for this memory region
+ * @udata: unused by the driver
+ *
+ * Return: the memory region on success, otherwise returns an errno.
+ */
+struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+			      u64 virt_addr, int mr_access_flags,
+			      struct ib_udata *udata)
+{
+	struct rvt_mr *mr;
+	struct ib_umem *umem;
+	struct scatterlist *sg;
+	int n, m, entry;
+	struct ib_mr *ret;
+
+	if (length == 0)
+		return ERR_PTR(-EINVAL);
+
+	umem = ib_umem_get(pd->uobject->context, start, length,
+			   mr_access_flags, 0);
+	if (IS_ERR(umem))
+		return (void *)umem;
+
+	n = umem->nmap;
+
+	mr = __rvt_alloc_mr(n, pd);
+	if (IS_ERR(mr)) {
+		ret = (struct ib_mr *)mr;
+		goto bail_umem;
+	}
+
+	mr->mr.user_base = start;
+	mr->mr.iova = virt_addr;
+	mr->mr.length = length;
+	mr->mr.offset = ib_umem_offset(umem);
+	mr->mr.access_flags = mr_access_flags;
+	mr->umem = umem;
+
+	if (is_power_of_2(umem->page_size))
+		mr->mr.page_shift = ilog2(umem->page_size);
+	m = 0;
+	n = 0;
+	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+		void *vaddr;
+
+		vaddr = page_address(sg_page(sg));
+		if (!vaddr) {
+			ret = ERR_PTR(-EINVAL);
+			goto bail_inval;
+		}
+		mr->mr.map[m]->segs[n].vaddr = vaddr;
+		mr->mr.map[m]->segs[n].length = umem->page_size;
+		n++;
+		if (n == RVT_SEGSZ) {
+			m++;
+			n = 0;
+		}
+	}
+	return &mr->ibmr;
+
+bail_inval:
+	__rvt_free_mr(mr);
+
+bail_umem:
+	ib_umem_release(umem);
+
+	return ret;
+}
+
+/**
+ * rvt_dereg_mr - unregister and free a memory region
+ * @ibmr: the memory region to free
+ *
+ *
+ * Note that this is called to free MRs created by rvt_get_dma_mr()
+ * or rvt_reg_user_mr().
+ *
+ * Returns 0 on success.
+ */
+int rvt_dereg_mr(struct ib_mr *ibmr)
+{
+	struct rvt_mr *mr = to_imr(ibmr);
+	struct rvt_dev_info *rdi = ib_to_rvt(ibmr->pd->device);
+	int ret = 0;
+	unsigned long timeout;
+
+	rvt_free_lkey(&mr->mr);
+
+	rvt_put_mr(&mr->mr); /* will set completion if last */
+	timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ);
+	if (!timeout) {
+		rvt_pr_err(rdi,
+			   "rvt_dereg_mr timeout mr %p pd %p refcount %u\n",
+			   mr, mr->mr.pd, atomic_read(&mr->mr.refcount));
+		rvt_get_mr(&mr->mr);
+		ret = -EBUSY;
+		goto out;
+	}
+	rvt_deinit_mregion(&mr->mr);
+	if (mr->umem)
+		ib_umem_release(mr->umem);
+	kfree(mr);
+out:
+	return ret;
+}
+
+/**
+ * rvt_alloc_mr - Allocate a memory region usable with the
+ * @pd: protection domain for this memory region
+ * @mr_type: mem region type
+ * @max_num_sg: Max number of segments allowed
+ *
+ * Return: the memory region on success, otherwise return an errno.
+ */
+struct ib_mr *rvt_alloc_mr(struct ib_pd *pd,
+			   enum ib_mr_type mr_type,
+			   u32 max_num_sg)
+{
+	struct rvt_mr *mr;
+
+	if (mr_type != IB_MR_TYPE_MEM_REG)
+		return ERR_PTR(-EINVAL);
+
+	mr = __rvt_alloc_mr(max_num_sg, pd);
+	if (IS_ERR(mr))
+		return (struct ib_mr *)mr;
+
+	return &mr->ibmr;
+}
+
+/**
+ * rvt_alloc_fmr - allocate a fast memory region
+ * @pd: the protection domain for this memory region
+ * @mr_access_flags: access flags for this memory region
+ * @fmr_attr: fast memory region attributes
+ *
+ * Return: the memory region on success, otherwise returns an errno.
+ */
+struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
+			     struct ib_fmr_attr *fmr_attr)
+{
+	struct rvt_fmr *fmr;
+	int m;
+	struct ib_fmr *ret;
+	int rval = -ENOMEM;
+
+	/* Allocate struct plus pointers to first level page tables. */
+	m = (fmr_attr->max_pages + RVT_SEGSZ - 1) / RVT_SEGSZ;
+	fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL);
+	if (!fmr)
+		goto bail;
+
+	rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages);
+	if (rval)
+		goto bail;
+
+	/*
+	 * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
+	 * rkey.
+	 */
+	rval = rvt_alloc_lkey(&fmr->mr, 0);
+	if (rval)
+		goto bail_mregion;
+	fmr->ibfmr.rkey = fmr->mr.lkey;
+	fmr->ibfmr.lkey = fmr->mr.lkey;
+	/*
+	 * Resources are allocated but no valid mapping (RKEY can't be
+	 * used).
+	 */
+	fmr->mr.access_flags = mr_access_flags;
+	fmr->mr.max_segs = fmr_attr->max_pages;
+	fmr->mr.page_shift = fmr_attr->page_shift;
+
+	ret = &fmr->ibfmr;
+done:
+	return ret;
+
+bail_mregion:
+	rvt_deinit_mregion(&fmr->mr);
+bail:
+	kfree(fmr);
+	ret = ERR_PTR(rval);
+	goto done;
+}
+
+/**
+ * rvt_map_phys_fmr - set up a fast memory region
+ * @ibmfr: the fast memory region to set up
+ * @page_list: the list of pages to associate with the fast memory region
+ * @list_len: the number of pages to associate with the fast memory region
+ * @iova: the virtual address of the start of the fast memory region
+ *
+ * This may be called from interrupt context.
+ *
+ * Return: 0 on success
+ */
+
+int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+		     int list_len, u64 iova)
+{
+	struct rvt_fmr *fmr = to_ifmr(ibfmr);
+	struct rvt_lkey_table *rkt;
+	unsigned long flags;
+	int m, n, i;
+	u32 ps;
+	struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device);
+
+	i = atomic_read(&fmr->mr.refcount);
+	if (i > 2)
+		return -EBUSY;
+
+	if (list_len > fmr->mr.max_segs)
+		return -EINVAL;
+
+	rkt = &rdi->lkey_table;
+	spin_lock_irqsave(&rkt->lock, flags);
+	fmr->mr.user_base = iova;
+	fmr->mr.iova = iova;
+	ps = 1 << fmr->mr.page_shift;
+	fmr->mr.length = list_len * ps;
+	m = 0;
+	n = 0;
+	for (i = 0; i < list_len; i++) {
+		fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i];
+		fmr->mr.map[m]->segs[n].length = ps;
+		if (++n == RVT_SEGSZ) {
+			m++;
+			n = 0;
+		}
+	}
+	spin_unlock_irqrestore(&rkt->lock, flags);
+	return 0;
+}
+
+/**
+ * rvt_unmap_fmr - unmap fast memory regions
+ * @fmr_list: the list of fast memory regions to unmap
+ *
+ * Return: 0 on success.
+ */
+int rvt_unmap_fmr(struct list_head *fmr_list)
+{
+	struct rvt_fmr *fmr;
+	struct rvt_lkey_table *rkt;
+	unsigned long flags;
+	struct rvt_dev_info *rdi;
+
+	list_for_each_entry(fmr, fmr_list, ibfmr.list) {
+		rdi = ib_to_rvt(fmr->ibfmr.device);
+		rkt = &rdi->lkey_table;
+		spin_lock_irqsave(&rkt->lock, flags);
+		fmr->mr.user_base = 0;
+		fmr->mr.iova = 0;
+		fmr->mr.length = 0;
+		spin_unlock_irqrestore(&rkt->lock, flags);
+	}
+	return 0;
+}
+
+/**
+ * rvt_dealloc_fmr - deallocate a fast memory region
+ * @ibfmr: the fast memory region to deallocate
+ *
+ * Return: 0 on success.
+ */
+int rvt_dealloc_fmr(struct ib_fmr *ibfmr)
+{
+	struct rvt_fmr *fmr = to_ifmr(ibfmr);
+	int ret = 0;
+	unsigned long timeout;
+
+	rvt_free_lkey(&fmr->mr);
+	rvt_put_mr(&fmr->mr); /* will set completion if last */
+	timeout = wait_for_completion_timeout(&fmr->mr.comp, 5 * HZ);
+	if (!timeout) {
+		rvt_get_mr(&fmr->mr);
+		ret = -EBUSY;
+		goto out;
+	}
+	rvt_deinit_mregion(&fmr->mr);
+	kfree(fmr);
+out:
+	return ret;
+}
+
+/**
+ * rvt_lkey_ok - check IB SGE for validity and initialize
+ * @rkt: table containing lkey to check SGE against
+ * @pd: protection domain
+ * @isge: outgoing internal SGE
+ * @sge: SGE to check
+ * @acc: access flags
+ *
+ * Check the IB SGE for validity and initialize our internal version
+ * of it.
+ *
+ * Return: 1 if valid and successful, otherwise returns 0.
+ *
+ * increments the reference count upon success
+ *
+ */
+int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
+		struct rvt_sge *isge, struct ib_sge *sge, int acc)
+{
+	struct rvt_mregion *mr;
+	unsigned n, m;
+	size_t off;
+	struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
+
+	/*
+	 * We use LKEY == zero for kernel virtual addresses
+	 * (see rvt_get_dma_mr and dma.c).
+	 */
+	rcu_read_lock();
+	if (sge->lkey == 0) {
+		if (pd->user)
+			goto bail;
+		mr = rcu_dereference(dev->dma_mr);
+		if (!mr)
+			goto bail;
+		atomic_inc(&mr->refcount);
+		rcu_read_unlock();
+
+		isge->mr = mr;
+		isge->vaddr = (void *)sge->addr;
+		isge->length = sge->length;
+		isge->sge_length = sge->length;
+		isge->m = 0;
+		isge->n = 0;
+		goto ok;
+	}
+	mr = rcu_dereference(
+		rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]);
+	if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
+		goto bail;
+
+	off = sge->addr - mr->user_base;
+	if (unlikely(sge->addr < mr->user_base ||
+		     off + sge->length > mr->length ||
+		     (mr->access_flags & acc) != acc))
+		goto bail;
+	atomic_inc(&mr->refcount);
+	rcu_read_unlock();
+
+	off += mr->offset;
+	if (mr->page_shift) {
+		/*
+		 * page sizes are uniform power of 2 so no loop is necessary
+		 * entries_spanned_by_off is the number of times the loop below
+		 * would have executed.
+		*/
+		size_t entries_spanned_by_off;
+
+		entries_spanned_by_off = off >> mr->page_shift;
+		off -= (entries_spanned_by_off << mr->page_shift);
+		m = entries_spanned_by_off / RVT_SEGSZ;
+		n = entries_spanned_by_off % RVT_SEGSZ;
+	} else {
+		m = 0;
+		n = 0;
+		while (off >= mr->map[m]->segs[n].length) {
+			off -= mr->map[m]->segs[n].length;
+			n++;
+			if (n >= RVT_SEGSZ) {
+				m++;
+				n = 0;
+			}
+		}
+	}
+	isge->mr = mr;
+	isge->vaddr = mr->map[m]->segs[n].vaddr + off;
+	isge->length = mr->map[m]->segs[n].length - off;
+	isge->sge_length = sge->length;
+	isge->m = m;
+	isge->n = n;
+ok:
+	return 1;
+bail:
+	rcu_read_unlock();
+	return 0;
+}
+EXPORT_SYMBOL(rvt_lkey_ok);
+
+/**
+ * rvt_rkey_ok - check the IB virtual address, length, and RKEY
+ * @qp: qp for validation
+ * @sge: SGE state
+ * @len: length of data
+ * @vaddr: virtual address to place data
+ * @rkey: rkey to check
+ * @acc: access flags
+ *
+ * Return: 1 if successful, otherwise 0.
+ *
+ * increments the reference count upon success
+ */
+int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
+		u32 len, u64 vaddr, u32 rkey, int acc)
+{
+	struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device);
+	struct rvt_lkey_table *rkt = &dev->lkey_table;
+	struct rvt_mregion *mr;
+	unsigned n, m;
+	size_t off;
+
+	/*
+	 * We use RKEY == zero for kernel virtual addresses
+	 * (see rvt_get_dma_mr and dma.c).
+	 */
+	rcu_read_lock();
+	if (rkey == 0) {
+		struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd);
+		struct rvt_dev_info *rdi = ib_to_rvt(pd->ibpd.device);
+
+		if (pd->user)
+			goto bail;
+		mr = rcu_dereference(rdi->dma_mr);
+		if (!mr)
+			goto bail;
+		atomic_inc(&mr->refcount);
+		rcu_read_unlock();
+
+		sge->mr = mr;
+		sge->vaddr = (void *)vaddr;
+		sge->length = len;
+		sge->sge_length = len;
+		sge->m = 0;
+		sge->n = 0;
+		goto ok;
+	}
+
+	mr = rcu_dereference(
+		rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]);
+	if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
+		goto bail;
+
+	off = vaddr - mr->iova;
+	if (unlikely(vaddr < mr->iova || off + len > mr->length ||
+		     (mr->access_flags & acc) == 0))
+		goto bail;
+	atomic_inc(&mr->refcount);
+	rcu_read_unlock();
+
+	off += mr->offset;
+	if (mr->page_shift) {
+		/*
+		 * page sizes are uniform power of 2 so no loop is necessary
+		 * entries_spanned_by_off is the number of times the loop below
+		 * would have executed.
+		*/
+		size_t entries_spanned_by_off;
+
+		entries_spanned_by_off = off >> mr->page_shift;
+		off -= (entries_spanned_by_off << mr->page_shift);
+		m = entries_spanned_by_off / RVT_SEGSZ;
+		n = entries_spanned_by_off % RVT_SEGSZ;
+	} else {
+		m = 0;
+		n = 0;
+		while (off >= mr->map[m]->segs[n].length) {
+			off -= mr->map[m]->segs[n].length;
+			n++;
+			if (n >= RVT_SEGSZ) {
+				m++;
+				n = 0;
+			}
+		}
+	}
+	sge->mr = mr;
+	sge->vaddr = mr->map[m]->segs[n].vaddr + off;
+	sge->length = mr->map[m]->segs[n].length - off;
+	sge->sge_length = len;
+	sge->m = m;
+	sge->n = n;
+ok:
+	return 1;
+bail:
+	rcu_read_unlock();
+	return 0;
+}
+EXPORT_SYMBOL(rvt_rkey_ok);

diff --git a/drivers/infiniband/sw/rdmavt/mr.h b/drivers/infiniband/sw/rdmavt/mr.h
new file mode 100644
index 0000000..6938051
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/mr.h

@@ -0,0 +1,92 @@
+#ifndef DEF_RVTMR_H
+#define DEF_RVTMR_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+struct rvt_fmr {
+	struct ib_fmr ibfmr;
+	struct rvt_mregion mr;        /* must be last */
+};
+
+struct rvt_mr {
+	struct ib_mr ibmr;
+	struct ib_umem *umem;
+	struct rvt_mregion mr;  /* must be last */
+};
+
+static inline struct rvt_fmr *to_ifmr(struct ib_fmr *ibfmr)
+{
+	return container_of(ibfmr, struct rvt_fmr, ibfmr);
+}
+
+static inline struct rvt_mr *to_imr(struct ib_mr *ibmr)
+{
+	return container_of(ibmr, struct rvt_mr, ibmr);
+}
+
+int rvt_driver_mr_init(struct rvt_dev_info *rdi);
+void rvt_mr_exit(struct rvt_dev_info *rdi);
+
+/* Mem Regions */
+struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc);
+struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+			      u64 virt_addr, int mr_access_flags,
+			      struct ib_udata *udata);
+int rvt_dereg_mr(struct ib_mr *ibmr);
+struct ib_mr *rvt_alloc_mr(struct ib_pd *pd,
+			   enum ib_mr_type mr_type,
+			   u32 max_num_sg);
+struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
+			     struct ib_fmr_attr *fmr_attr);
+int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+		     int list_len, u64 iova);
+int rvt_unmap_fmr(struct list_head *fmr_list);
+int rvt_dealloc_fmr(struct ib_fmr *ibfmr);
+
+#endif          /* DEF_RVTMR_H */

diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c
new file mode 100644
index 0000000..d1292f3
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/pd.c

@@ -0,0 +1,119 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/slab.h>
+#include "pd.h"
+
+/**
+ * rvt_alloc_pd - allocate a protection domain
+ * @ibdev: ib device
+ * @context: optional user context
+ * @udata: optional user data
+ *
+ * Allocate and keep track of a PD.
+ *
+ * Return: 0 on success
+ */
+struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev,
+			   struct ib_ucontext *context,
+			   struct ib_udata *udata)
+{
+	struct rvt_dev_info *dev = ib_to_rvt(ibdev);
+	struct rvt_pd *pd;
+	struct ib_pd *ret;
+
+	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd) {
+		ret = ERR_PTR(-ENOMEM);
+		goto bail;
+	}
+	/*
+	 * While we could continue allocating protecetion domains, being
+	 * constrained only by system resources. The IBTA spec defines that
+	 * there is a max_pd limit that can be set and we need to check for
+	 * that.
+	 */
+
+	spin_lock(&dev->n_pds_lock);
+	if (dev->n_pds_allocated == dev->dparms.props.max_pd) {
+		spin_unlock(&dev->n_pds_lock);
+		kfree(pd);
+		ret = ERR_PTR(-ENOMEM);
+		goto bail;
+	}
+
+	dev->n_pds_allocated++;
+	spin_unlock(&dev->n_pds_lock);
+
+	/* ib_alloc_pd() will initialize pd->ibpd. */
+	pd->user = udata ? 1 : 0;
+
+	ret = &pd->ibpd;
+
+bail:
+	return ret;
+}
+
+/**
+ * rvt_dealloc_pd - Free PD
+ * @ibpd: Free up PD
+ *
+ * Return: always 0
+ */
+int rvt_dealloc_pd(struct ib_pd *ibpd)
+{
+	struct rvt_pd *pd = ibpd_to_rvtpd(ibpd);
+	struct rvt_dev_info *dev = ib_to_rvt(ibpd->device);
+
+	spin_lock(&dev->n_pds_lock);
+	dev->n_pds_allocated--;
+	spin_unlock(&dev->n_pds_lock);
+
+	kfree(pd);
+
+	return 0;
+}

diff --git a/drivers/infiniband/sw/rdmavt/pd.h b/drivers/infiniband/sw/rdmavt/pd.h
new file mode 100644
index 0000000..1892ca4
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/pd.h

@@ -0,0 +1,58 @@
+#ifndef DEF_RDMAVTPD_H
+#define DEF_RDMAVTPD_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev,
+			   struct ib_ucontext *context,
+			   struct ib_udata *udata);
+int rvt_dealloc_pd(struct ib_pd *ibpd);
+
+#endif          /* DEF_RDMAVTPD_H */

diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
new file mode 100644
index 0000000..bd82a69
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/qp.c

@@ -0,0 +1,1696 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/hash.h>
+#include <linux/bitops.h>
+#include <linux/lockdep.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <rdma/ib_verbs.h>
+#include "qp.h"
+#include "vt.h"
+#include "trace.h"
+
+/*
+ * Note that it is OK to post send work requests in the SQE and ERR
+ * states; rvt_do_send() will process them and generate error
+ * completions as per IB 1.2 C10-96.
+ */
+const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
+	[IB_QPS_RESET] = 0,
+	[IB_QPS_INIT] = RVT_POST_RECV_OK,
+	[IB_QPS_RTR] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK,
+	[IB_QPS_RTS] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
+	    RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK |
+	    RVT_PROCESS_NEXT_SEND_OK,
+	[IB_QPS_SQD] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
+	    RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK,
+	[IB_QPS_SQE] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
+	    RVT_POST_SEND_OK | RVT_FLUSH_SEND,
+	[IB_QPS_ERR] = RVT_POST_RECV_OK | RVT_FLUSH_RECV |
+	    RVT_POST_SEND_OK | RVT_FLUSH_SEND,
+};
+EXPORT_SYMBOL(ib_rvt_state_ops);
+
+static void get_map_page(struct rvt_qpn_table *qpt,
+			 struct rvt_qpn_map *map,
+			 gfp_t gfp)
+{
+	unsigned long page = get_zeroed_page(gfp);
+
+	/*
+	 * Free the page if someone raced with us installing it.
+	 */
+
+	spin_lock(&qpt->lock);
+	if (map->page)
+		free_page(page);
+	else
+		map->page = (void *)page;
+	spin_unlock(&qpt->lock);
+}
+
+/**
+ * init_qpn_table - initialize the QP number table for a device
+ * @qpt: the QPN table
+ */
+static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt)
+{
+	u32 offset, i;
+	struct rvt_qpn_map *map;
+	int ret = 0;
+
+	if (!(rdi->dparms.qpn_res_end >= rdi->dparms.qpn_res_start))
+		return -EINVAL;
+
+	spin_lock_init(&qpt->lock);
+
+	qpt->last = rdi->dparms.qpn_start;
+	qpt->incr = rdi->dparms.qpn_inc << rdi->dparms.qos_shift;
+
+	/*
+	 * Drivers may want some QPs beyond what we need for verbs let them use
+	 * our qpn table. No need for two. Lets go ahead and mark the bitmaps
+	 * for those. The reserved range must be *after* the range which verbs
+	 * will pick from.
+	 */
+
+	/* Figure out number of bit maps needed before reserved range */
+	qpt->nmaps = rdi->dparms.qpn_res_start / RVT_BITS_PER_PAGE;
+
+	/* This should always be zero */
+	offset = rdi->dparms.qpn_res_start & RVT_BITS_PER_PAGE_MASK;
+
+	/* Starting with the first reserved bit map */
+	map = &qpt->map[qpt->nmaps];
+
+	rvt_pr_info(rdi, "Reserving QPNs from 0x%x to 0x%x for non-verbs use\n",
+		    rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end);
+	for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) {
+		if (!map->page) {
+			get_map_page(qpt, map, GFP_KERNEL);
+			if (!map->page) {
+				ret = -ENOMEM;
+				break;
+			}
+		}
+		set_bit(offset, map->page);
+		offset++;
+		if (offset == RVT_BITS_PER_PAGE) {
+			/* next page */
+			qpt->nmaps++;
+			map++;
+			offset = 0;
+		}
+	}
+	return ret;
+}
+
+/**
+ * free_qpn_table - free the QP number table for a device
+ * @qpt: the QPN table
+ */
+static void free_qpn_table(struct rvt_qpn_table *qpt)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(qpt->map); i++)
+		free_page((unsigned long)qpt->map[i].page);
+}
+
+/**
+ * rvt_driver_qp_init - Init driver qp resources
+ * @rdi: rvt dev strucutre
+ *
+ * Return: 0 on success
+ */
+int rvt_driver_qp_init(struct rvt_dev_info *rdi)
+{
+	int i;
+	int ret = -ENOMEM;
+
+	if (!rdi->dparms.qp_table_size)
+		return -EINVAL;
+
+	/*
+	 * If driver is not doing any QP allocation then make sure it is
+	 * providing the necessary QP functions.
+	 */
+	if (!rdi->driver_f.free_all_qps ||
+	    !rdi->driver_f.qp_priv_alloc ||
+	    !rdi->driver_f.qp_priv_free ||
+	    !rdi->driver_f.notify_qp_reset)
+		return -EINVAL;
+
+	/* allocate parent object */
+	rdi->qp_dev = kzalloc_node(sizeof(*rdi->qp_dev), GFP_KERNEL,
+				   rdi->dparms.node);
+	if (!rdi->qp_dev)
+		return -ENOMEM;
+
+	/* allocate hash table */
+	rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size;
+	rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size);
+	rdi->qp_dev->qp_table =
+		kmalloc_node(rdi->qp_dev->qp_table_size *
+			     sizeof(*rdi->qp_dev->qp_table),
+			     GFP_KERNEL, rdi->dparms.node);
+	if (!rdi->qp_dev->qp_table)
+		goto no_qp_table;
+
+	for (i = 0; i < rdi->qp_dev->qp_table_size; i++)
+		RCU_INIT_POINTER(rdi->qp_dev->qp_table[i], NULL);
+
+	spin_lock_init(&rdi->qp_dev->qpt_lock);
+
+	/* initialize qpn map */
+	if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table))
+		goto fail_table;
+
+	spin_lock_init(&rdi->n_qps_lock);
+
+	return 0;
+
+fail_table:
+	kfree(rdi->qp_dev->qp_table);
+	free_qpn_table(&rdi->qp_dev->qpn_table);
+
+no_qp_table:
+	kfree(rdi->qp_dev);
+
+	return ret;
+}
+
+/**
+ * free_all_qps - check for QPs still in use
+ * @qpt: the QP table to empty
+ *
+ * There should not be any QPs still in use.
+ * Free memory for table.
+ */
+static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi)
+{
+	unsigned long flags;
+	struct rvt_qp *qp;
+	unsigned n, qp_inuse = 0;
+	spinlock_t *ql; /* work around too long line below */
+
+	if (rdi->driver_f.free_all_qps)
+		qp_inuse = rdi->driver_f.free_all_qps(rdi);
+
+	qp_inuse += rvt_mcast_tree_empty(rdi);
+
+	if (!rdi->qp_dev)
+		return qp_inuse;
+
+	ql = &rdi->qp_dev->qpt_lock;
+	spin_lock_irqsave(ql, flags);
+	for (n = 0; n < rdi->qp_dev->qp_table_size; n++) {
+		qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n],
+					       lockdep_is_held(ql));
+		RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL);
+
+		for (; qp; qp = rcu_dereference_protected(qp->next,
+							  lockdep_is_held(ql)))
+			qp_inuse++;
+	}
+	spin_unlock_irqrestore(ql, flags);
+	synchronize_rcu();
+	return qp_inuse;
+}
+
+/**
+ * rvt_qp_exit - clean up qps on device exit
+ * @rdi: rvt dev structure
+ *
+ * Check for qp leaks and free resources.
+ */
+void rvt_qp_exit(struct rvt_dev_info *rdi)
+{
+	u32 qps_inuse = rvt_free_all_qps(rdi);
+
+	if (qps_inuse)
+		rvt_pr_err(rdi, "QP memory leak! %u still in use\n",
+			   qps_inuse);
+	if (!rdi->qp_dev)
+		return;
+
+	kfree(rdi->qp_dev->qp_table);
+	free_qpn_table(&rdi->qp_dev->qpn_table);
+	kfree(rdi->qp_dev);
+}
+
+static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
+			      struct rvt_qpn_map *map, unsigned off)
+{
+	return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
+}
+
+/**
+ * alloc_qpn - Allocate the next available qpn or zero/one for QP type
+ *	       IB_QPT_SMI/IB_QPT_GSI
+ *@rdi:	rvt device info structure
+ *@qpt: queue pair number table pointer
+ *@port_num: IB port number, 1 based, comes from core
+ *
+ * Return: The queue pair number
+ */
+static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
+		     enum ib_qp_type type, u8 port_num, gfp_t gfp)
+{
+	u32 i, offset, max_scan, qpn;
+	struct rvt_qpn_map *map;
+	u32 ret;
+
+	if (rdi->driver_f.alloc_qpn)
+		return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num, gfp);
+
+	if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
+		unsigned n;
+
+		ret = type == IB_QPT_GSI;
+		n = 1 << (ret + 2 * (port_num - 1));
+		spin_lock(&qpt->lock);
+		if (qpt->flags & n)
+			ret = -EINVAL;
+		else
+			qpt->flags |= n;
+		spin_unlock(&qpt->lock);
+		goto bail;
+	}
+
+	qpn = qpt->last + qpt->incr;
+	if (qpn >= RVT_QPN_MAX)
+		qpn = qpt->incr | ((qpt->last & 1) ^ 1);
+	/* offset carries bit 0 */
+	offset = qpn & RVT_BITS_PER_PAGE_MASK;
+	map = &qpt->map[qpn / RVT_BITS_PER_PAGE];
+	max_scan = qpt->nmaps - !offset;
+	for (i = 0;;) {
+		if (unlikely(!map->page)) {
+			get_map_page(qpt, map, gfp);
+			if (unlikely(!map->page))
+				break;
+		}
+		do {
+			if (!test_and_set_bit(offset, map->page)) {
+				qpt->last = qpn;
+				ret = qpn;
+				goto bail;
+			}
+			offset += qpt->incr;
+			/*
+			 * This qpn might be bogus if offset >= BITS_PER_PAGE.
+			 * That is OK.   It gets re-assigned below
+			 */
+			qpn = mk_qpn(qpt, map, offset);
+		} while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX);
+		/*
+		 * In order to keep the number of pages allocated to a
+		 * minimum, we scan the all existing pages before increasing
+		 * the size of the bitmap table.
+		 */
+		if (++i > max_scan) {
+			if (qpt->nmaps == RVT_QPNMAP_ENTRIES)
+				break;
+			map = &qpt->map[qpt->nmaps++];
+			/* start at incr with current bit 0 */
+			offset = qpt->incr | (offset & 1);
+		} else if (map < &qpt->map[qpt->nmaps]) {
+			++map;
+			/* start at incr with current bit 0 */
+			offset = qpt->incr | (offset & 1);
+		} else {
+			map = &qpt->map[0];
+			/* wrap to first map page, invert bit 0 */
+			offset = qpt->incr | ((offset & 1) ^ 1);
+		}
+		/* there can be no bits at shift and below */
+		WARN_ON(offset & (rdi->dparms.qos_shift - 1));
+		qpn = mk_qpn(qpt, map, offset);
+	}
+
+	ret = -ENOMEM;
+
+bail:
+	return ret;
+}
+
+static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
+{
+	struct rvt_qpn_map *map;
+
+	map = qpt->map + qpn / RVT_BITS_PER_PAGE;
+	if (map->page)
+		clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
+}
+
+/**
+ * rvt_clear_mr_refs - Drop help mr refs
+ * @qp: rvt qp data structure
+ * @clr_sends: If shoudl clear send side or not
+ */
+static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
+{
+	unsigned n;
+
+	if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
+		rvt_put_ss(&qp->s_rdma_read_sge);
+
+	rvt_put_ss(&qp->r_sge);
+
+	if (clr_sends) {
+		while (qp->s_last != qp->s_head) {
+			struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last);
+			unsigned i;
+
+			for (i = 0; i < wqe->wr.num_sge; i++) {
+				struct rvt_sge *sge = &wqe->sg_list[i];
+
+				rvt_put_mr(sge->mr);
+			}
+			if (qp->ibqp.qp_type == IB_QPT_UD ||
+			    qp->ibqp.qp_type == IB_QPT_SMI ||
+			    qp->ibqp.qp_type == IB_QPT_GSI)
+				atomic_dec(&ibah_to_rvtah(
+						wqe->ud_wr.ah)->refcount);
+			if (++qp->s_last >= qp->s_size)
+				qp->s_last = 0;
+			smp_wmb(); /* see qp_set_savail */
+		}
+		if (qp->s_rdma_mr) {
+			rvt_put_mr(qp->s_rdma_mr);
+			qp->s_rdma_mr = NULL;
+		}
+	}
+
+	if (qp->ibqp.qp_type != IB_QPT_RC)
+		return;
+
+	for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) {
+		struct rvt_ack_entry *e = &qp->s_ack_queue[n];
+
+		if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
+		    e->rdma_sge.mr) {
+			rvt_put_mr(e->rdma_sge.mr);
+			e->rdma_sge.mr = NULL;
+		}
+	}
+}
+
+/**
+ * rvt_remove_qp - remove qp form table
+ * @rdi: rvt dev struct
+ * @qp: qp to remove
+ *
+ * Remove the QP from the table so it can't be found asynchronously by
+ * the receive routine.
+ */
+static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
+{
+	struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
+	u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits);
+	unsigned long flags;
+	int removed = 1;
+
+	spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
+
+	if (rcu_dereference_protected(rvp->qp[0],
+			lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) {
+		RCU_INIT_POINTER(rvp->qp[0], NULL);
+	} else if (rcu_dereference_protected(rvp->qp[1],
+			lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) {
+		RCU_INIT_POINTER(rvp->qp[1], NULL);
+	} else {
+		struct rvt_qp *q;
+		struct rvt_qp __rcu **qpp;
+
+		removed = 0;
+		qpp = &rdi->qp_dev->qp_table[n];
+		for (; (q = rcu_dereference_protected(*qpp,
+			lockdep_is_held(&rdi->qp_dev->qpt_lock))) != NULL;
+			qpp = &q->next) {
+			if (q == qp) {
+				RCU_INIT_POINTER(*qpp,
+				     rcu_dereference_protected(qp->next,
+				     lockdep_is_held(&rdi->qp_dev->qpt_lock)));
+				removed = 1;
+				trace_rvt_qpremove(qp, n);
+				break;
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
+	if (removed) {
+		synchronize_rcu();
+		if (atomic_dec_and_test(&qp->refcount))
+			wake_up(&qp->wait);
+	}
+}
+
+/**
+ * reset_qp - initialize the QP state to the reset state
+ * @qp: the QP to reset
+ * @type: the QP type
+ * r and s lock are required to be held by the caller
+ */
+static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+		  enum ib_qp_type type)
+{
+	if (qp->state != IB_QPS_RESET) {
+		qp->state = IB_QPS_RESET;
+
+		/* Let drivers flush their waitlist */
+		rdi->driver_f.flush_qp_waiters(qp);
+		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT);
+		spin_unlock(&qp->s_lock);
+		spin_unlock(&qp->s_hlock);
+		spin_unlock_irq(&qp->r_lock);
+
+		/* Stop the send queue and the retry timer */
+		rdi->driver_f.stop_send_queue(qp);
+
+		/* Wait for things to stop */
+		rdi->driver_f.quiesce_qp(qp);
+
+		/* take qp out the hash and wait for it to be unused */
+		rvt_remove_qp(rdi, qp);
+		wait_event(qp->wait, !atomic_read(&qp->refcount));
+
+		/* grab the lock b/c it was locked at call time */
+		spin_lock_irq(&qp->r_lock);
+		spin_lock(&qp->s_hlock);
+		spin_lock(&qp->s_lock);
+
+		rvt_clear_mr_refs(qp, 1);
+	}
+
+	/*
+	 * Let the driver do any tear down it needs to for a qp
+	 * that has been reset
+	 */
+	rdi->driver_f.notify_qp_reset(qp);
+
+	qp->remote_qpn = 0;
+	qp->qkey = 0;
+	qp->qp_access_flags = 0;
+	qp->s_flags &= RVT_S_SIGNAL_REQ_WR;
+	qp->s_hdrwords = 0;
+	qp->s_wqe = NULL;
+	qp->s_draining = 0;
+	qp->s_next_psn = 0;
+	qp->s_last_psn = 0;
+	qp->s_sending_psn = 0;
+	qp->s_sending_hpsn = 0;
+	qp->s_psn = 0;
+	qp->r_psn = 0;
+	qp->r_msn = 0;
+	if (type == IB_QPT_RC) {
+		qp->s_state = IB_OPCODE_RC_SEND_LAST;
+		qp->r_state = IB_OPCODE_RC_SEND_LAST;
+	} else {
+		qp->s_state = IB_OPCODE_UC_SEND_LAST;
+		qp->r_state = IB_OPCODE_UC_SEND_LAST;
+	}
+	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
+	qp->r_nak_state = 0;
+	qp->r_aflags = 0;
+	qp->r_flags = 0;
+	qp->s_head = 0;
+	qp->s_tail = 0;
+	qp->s_cur = 0;
+	qp->s_acked = 0;
+	qp->s_last = 0;
+	qp->s_ssn = 1;
+	qp->s_lsn = 0;
+	qp->s_mig_state = IB_MIG_MIGRATED;
+	memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
+	qp->r_head_ack_queue = 0;
+	qp->s_tail_ack_queue = 0;
+	qp->s_num_rd_atomic = 0;
+	if (qp->r_rq.wq) {
+		qp->r_rq.wq->head = 0;
+		qp->r_rq.wq->tail = 0;
+	}
+	qp->r_sge.num_sge = 0;
+}
+
+/**
+ * rvt_create_qp - create a queue pair for a device
+ * @ibpd: the protection domain who's device we create the queue pair for
+ * @init_attr: the attributes of the queue pair
+ * @udata: user data for libibverbs.so
+ *
+ * Queue pair creation is mostly an rvt issue. However, drivers have their own
+ * unique idea of what queue pair numbers mean. For instance there is a reserved
+ * range for PSM.
+ *
+ * Return: the queue pair on success, otherwise returns an errno.
+ *
+ * Called by the ib_create_qp() core verbs function.
+ */
+struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
+			    struct ib_qp_init_attr *init_attr,
+			    struct ib_udata *udata)
+{
+	struct rvt_qp *qp;
+	int err;
+	struct rvt_swqe *swq = NULL;
+	size_t sz;
+	size_t sg_list_sz;
+	struct ib_qp *ret = ERR_PTR(-ENOMEM);
+	struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device);
+	void *priv = NULL;
+	gfp_t gfp;
+
+	if (!rdi)
+		return ERR_PTR(-EINVAL);
+
+	if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge ||
+	    init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr ||
+	    init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO))
+		return ERR_PTR(-EINVAL);
+
+	/* GFP_NOIO is applicable to RC QP's only */
+
+	if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO &&
+	    init_attr->qp_type != IB_QPT_RC)
+		return ERR_PTR(-EINVAL);
+
+	gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ?
+						GFP_NOIO : GFP_KERNEL;
+
+	/* Check receive queue parameters if no SRQ is specified. */
+	if (!init_attr->srq) {
+		if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge ||
+		    init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr)
+			return ERR_PTR(-EINVAL);
+
+		if (init_attr->cap.max_send_sge +
+		    init_attr->cap.max_send_wr +
+		    init_attr->cap.max_recv_sge +
+		    init_attr->cap.max_recv_wr == 0)
+			return ERR_PTR(-EINVAL);
+	}
+
+	switch (init_attr->qp_type) {
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+		if (init_attr->port_num == 0 ||
+		    init_attr->port_num > ibpd->device->phys_port_cnt)
+			return ERR_PTR(-EINVAL);
+	case IB_QPT_UC:
+	case IB_QPT_RC:
+	case IB_QPT_UD:
+		sz = sizeof(struct rvt_sge) *
+			init_attr->cap.max_send_sge +
+			sizeof(struct rvt_swqe);
+		if (gfp == GFP_NOIO)
+			swq = __vmalloc(
+				(init_attr->cap.max_send_wr + 1) * sz,
+				gfp, PAGE_KERNEL);
+		else
+			swq = vmalloc_node(
+				(init_attr->cap.max_send_wr + 1) * sz,
+				rdi->dparms.node);
+		if (!swq)
+			return ERR_PTR(-ENOMEM);
+
+		sz = sizeof(*qp);
+		sg_list_sz = 0;
+		if (init_attr->srq) {
+			struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq);
+
+			if (srq->rq.max_sge > 1)
+				sg_list_sz = sizeof(*qp->r_sg_list) *
+					(srq->rq.max_sge - 1);
+		} else if (init_attr->cap.max_recv_sge > 1)
+			sg_list_sz = sizeof(*qp->r_sg_list) *
+				(init_attr->cap.max_recv_sge - 1);
+		qp = kzalloc_node(sz + sg_list_sz, gfp, rdi->dparms.node);
+		if (!qp)
+			goto bail_swq;
+
+		RCU_INIT_POINTER(qp->next, NULL);
+
+		/*
+		 * Driver needs to set up it's private QP structure and do any
+		 * initialization that is needed.
+		 */
+		priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp);
+		if (!priv)
+			goto bail_qp;
+		qp->priv = priv;
+		qp->timeout_jiffies =
+			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
+				1000UL);
+		if (init_attr->srq) {
+			sz = 0;
+		} else {
+			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
+			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
+			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
+				sizeof(struct rvt_rwqe);
+			if (udata)
+				qp->r_rq.wq = vmalloc_user(
+						sizeof(struct rvt_rwq) +
+						qp->r_rq.size * sz);
+			else if (gfp == GFP_NOIO)
+				qp->r_rq.wq = __vmalloc(
+						sizeof(struct rvt_rwq) +
+						qp->r_rq.size * sz,
+						gfp, PAGE_KERNEL);
+			else
+				qp->r_rq.wq = vmalloc_node(
+						sizeof(struct rvt_rwq) +
+						qp->r_rq.size * sz,
+						rdi->dparms.node);
+			if (!qp->r_rq.wq)
+				goto bail_driver_priv;
+		}
+
+		/*
+		 * ib_create_qp() will initialize qp->ibqp
+		 * except for qp->ibqp.qp_num.
+		 */
+		spin_lock_init(&qp->r_lock);
+		spin_lock_init(&qp->s_hlock);
+		spin_lock_init(&qp->s_lock);
+		spin_lock_init(&qp->r_rq.lock);
+		atomic_set(&qp->refcount, 0);
+		init_waitqueue_head(&qp->wait);
+		init_timer(&qp->s_timer);
+		qp->s_timer.data = (unsigned long)qp;
+		INIT_LIST_HEAD(&qp->rspwait);
+		qp->state = IB_QPS_RESET;
+		qp->s_wq = swq;
+		qp->s_size = init_attr->cap.max_send_wr + 1;
+		qp->s_avail = init_attr->cap.max_send_wr;
+		qp->s_max_sge = init_attr->cap.max_send_sge;
+		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
+			qp->s_flags = RVT_S_SIGNAL_REQ_WR;
+
+		err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
+				init_attr->qp_type,
+				init_attr->port_num, gfp);
+		if (err < 0) {
+			ret = ERR_PTR(err);
+			goto bail_rq_wq;
+		}
+		qp->ibqp.qp_num = err;
+		qp->port_num = init_attr->port_num;
+		rvt_reset_qp(rdi, qp, init_attr->qp_type);
+		break;
+
+	default:
+		/* Don't support raw QPs */
+		return ERR_PTR(-EINVAL);
+	}
+
+	init_attr->cap.max_inline_data = 0;
+
+	/*
+	 * Return the address of the RWQ as the offset to mmap.
+	 * See rvt_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		if (!qp->r_rq.wq) {
+			__u64 offset = 0;
+
+			err = ib_copy_to_udata(udata, &offset,
+					       sizeof(offset));
+			if (err) {
+				ret = ERR_PTR(err);
+				goto bail_qpn;
+			}
+		} else {
+			u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz;
+
+			qp->ip = rvt_create_mmap_info(rdi, s,
+						      ibpd->uobject->context,
+						      qp->r_rq.wq);
+			if (!qp->ip) {
+				ret = ERR_PTR(-ENOMEM);
+				goto bail_qpn;
+			}
+
+			err = ib_copy_to_udata(udata, &qp->ip->offset,
+					       sizeof(qp->ip->offset));
+			if (err) {
+				ret = ERR_PTR(err);
+				goto bail_ip;
+			}
+		}
+		qp->pid = current->pid;
+	}
+
+	spin_lock(&rdi->n_qps_lock);
+	if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) {
+		spin_unlock(&rdi->n_qps_lock);
+		ret = ERR_PTR(-ENOMEM);
+		goto bail_ip;
+	}
+
+	rdi->n_qps_allocated++;
+	/*
+	 * Maintain a busy_jiffies variable that will be added to the timeout
+	 * period in mod_retry_timer and add_retry_timer. This busy jiffies
+	 * is scaled by the number of rc qps created for the device to reduce
+	 * the number of timeouts occurring when there is a large number of
+	 * qps. busy_jiffies is incremented every rc qp scaling interval.
+	 * The scaling interval is selected based on extensive performance
+	 * evaluation of targeted workloads.
+	 */
+	if (init_attr->qp_type == IB_QPT_RC) {
+		rdi->n_rc_qps++;
+		rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL;
+	}
+	spin_unlock(&rdi->n_qps_lock);
+
+	if (qp->ip) {
+		spin_lock_irq(&rdi->pending_lock);
+		list_add(&qp->ip->pending_mmaps, &rdi->pending_mmaps);
+		spin_unlock_irq(&rdi->pending_lock);
+	}
+
+	ret = &qp->ibqp;
+
+	/*
+	 * We have our QP and its good, now keep track of what types of opcodes
+	 * can be processed on this QP. We do this by keeping track of what the
+	 * 3 high order bits of the opcode are.
+	 */
+	switch (init_attr->qp_type) {
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+	case IB_QPT_UD:
+		qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & RVT_OPCODE_QP_MASK;
+		break;
+	case IB_QPT_RC:
+		qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & RVT_OPCODE_QP_MASK;
+		break;
+	case IB_QPT_UC:
+		qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & RVT_OPCODE_QP_MASK;
+		break;
+	default:
+		ret = ERR_PTR(-EINVAL);
+		goto bail_ip;
+	}
+
+	return ret;
+
+bail_ip:
+	kref_put(&qp->ip->ref, rvt_release_mmap_info);
+
+bail_qpn:
+	free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
+
+bail_rq_wq:
+	vfree(qp->r_rq.wq);
+
+bail_driver_priv:
+	rdi->driver_f.qp_priv_free(rdi, qp);
+
+bail_qp:
+	kfree(qp);
+
+bail_swq:
+	vfree(swq);
+
+	return ret;
+}
+
+/**
+ * rvt_error_qp - put a QP into the error state
+ * @qp: the QP to put into the error state
+ * @err: the receive completion error to signal if a RWQE is active
+ *
+ * Flushes both send and receive work queues.
+ *
+ * Return: true if last WQE event should be generated.
+ * The QP r_lock and s_lock should be held and interrupts disabled.
+ * If we are already in error state, just return.
+ */
+int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err)
+{
+	struct ib_wc wc;
+	int ret = 0;
+	struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+
+	if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
+		goto bail;
+
+	qp->state = IB_QPS_ERR;
+
+	if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
+		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
+		del_timer(&qp->s_timer);
+	}
+
+	if (qp->s_flags & RVT_S_ANY_WAIT_SEND)
+		qp->s_flags &= ~RVT_S_ANY_WAIT_SEND;
+
+	rdi->driver_f.notify_error_qp(qp);
+
+	/* Schedule the sending tasklet to drain the send work queue. */
+	if (ACCESS_ONCE(qp->s_last) != qp->s_head)
+		rdi->driver_f.schedule_send(qp);
+
+	rvt_clear_mr_refs(qp, 0);
+
+	memset(&wc, 0, sizeof(wc));
+	wc.qp = &qp->ibqp;
+	wc.opcode = IB_WC_RECV;
+
+	if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) {
+		wc.wr_id = qp->r_wr_id;
+		wc.status = err;
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
+	}
+	wc.status = IB_WC_WR_FLUSH_ERR;
+
+	if (qp->r_rq.wq) {
+		struct rvt_rwq *wq;
+		u32 head;
+		u32 tail;
+
+		spin_lock(&qp->r_rq.lock);
+
+		/* sanity check pointers before trusting them */
+		wq = qp->r_rq.wq;
+		head = wq->head;
+		if (head >= qp->r_rq.size)
+			head = 0;
+		tail = wq->tail;
+		if (tail >= qp->r_rq.size)
+			tail = 0;
+		while (tail != head) {
+			wc.wr_id = rvt_get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
+			if (++tail >= qp->r_rq.size)
+				tail = 0;
+			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
+		}
+		wq->tail = tail;
+
+		spin_unlock(&qp->r_rq.lock);
+	} else if (qp->ibqp.event_handler) {
+		ret = 1;
+	}
+
+bail:
+	return ret;
+}
+EXPORT_SYMBOL(rvt_error_qp);
+
+/*
+ * Put the QP into the hash table.
+ * The hash table holds a reference to the QP.
+ */
+static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
+{
+	struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
+	unsigned long flags;
+
+	atomic_inc(&qp->refcount);
+	spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
+
+	if (qp->ibqp.qp_num <= 1) {
+		rcu_assign_pointer(rvp->qp[qp->ibqp.qp_num], qp);
+	} else {
+		u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits);
+
+		qp->next = rdi->qp_dev->qp_table[n];
+		rcu_assign_pointer(rdi->qp_dev->qp_table[n], qp);
+		trace_rvt_qpinsert(qp, n);
+	}
+
+	spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
+}
+
+/**
+ * qib_modify_qp - modify the attributes of a queue pair
+ * @ibqp: the queue pair who's attributes we're modifying
+ * @attr: the new attributes
+ * @attr_mask: the mask of attributes to modify
+ * @udata: user data for libibverbs.so
+ *
+ * Return: 0 on success, otherwise returns an errno.
+ */
+int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		  int attr_mask, struct ib_udata *udata)
+{
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+	enum ib_qp_state cur_state, new_state;
+	struct ib_event ev;
+	int lastwqe = 0;
+	int mig = 0;
+	int pmtu = 0; /* for gcc warning only */
+	enum rdma_link_layer link;
+
+	link = rdma_port_get_link_layer(ibqp->device, qp->port_num);
+
+	spin_lock_irq(&qp->r_lock);
+	spin_lock(&qp->s_hlock);
+	spin_lock(&qp->s_lock);
+
+	cur_state = attr_mask & IB_QP_CUR_STATE ?
+		attr->cur_qp_state : qp->state;
+	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+
+	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+				attr_mask, link))
+		goto inval;
+
+	if (rdi->driver_f.check_modify_qp &&
+	    rdi->driver_f.check_modify_qp(qp, attr, attr_mask, udata))
+		goto inval;
+
+	if (attr_mask & IB_QP_AV) {
+		if (attr->ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
+			goto inval;
+		if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr))
+			goto inval;
+	}
+
+	if (attr_mask & IB_QP_ALT_PATH) {
+		if (attr->alt_ah_attr.dlid >=
+		    be16_to_cpu(IB_MULTICAST_LID_BASE))
+			goto inval;
+		if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
+			goto inval;
+		if (attr->alt_pkey_index >= rvt_get_npkeys(rdi))
+			goto inval;
+	}
+
+	if (attr_mask & IB_QP_PKEY_INDEX)
+		if (attr->pkey_index >= rvt_get_npkeys(rdi))
+			goto inval;
+
+	if (attr_mask & IB_QP_MIN_RNR_TIMER)
+		if (attr->min_rnr_timer > 31)
+			goto inval;
+
+	if (attr_mask & IB_QP_PORT)
+		if (qp->ibqp.qp_type == IB_QPT_SMI ||
+		    qp->ibqp.qp_type == IB_QPT_GSI ||
+		    attr->port_num == 0 ||
+		    attr->port_num > ibqp->device->phys_port_cnt)
+			goto inval;
+
+	if (attr_mask & IB_QP_DEST_QPN)
+		if (attr->dest_qp_num > RVT_QPN_MASK)
+			goto inval;
+
+	if (attr_mask & IB_QP_RETRY_CNT)
+		if (attr->retry_cnt > 7)
+			goto inval;
+
+	if (attr_mask & IB_QP_RNR_RETRY)
+		if (attr->rnr_retry > 7)
+			goto inval;
+
+	/*
+	 * Don't allow invalid path_mtu values.  OK to set greater
+	 * than the active mtu (or even the max_cap, if we have tuned
+	 * that to a small mtu.  We'll set qp->path_mtu
+	 * to the lesser of requested attribute mtu and active,
+	 * for packetizing messages.
+	 * Note that the QP port has to be set in INIT and MTU in RTR.
+	 */
+	if (attr_mask & IB_QP_PATH_MTU) {
+		pmtu = rdi->driver_f.get_pmtu_from_attr(rdi, qp, attr);
+		if (pmtu < 0)
+			goto inval;
+	}
+
+	if (attr_mask & IB_QP_PATH_MIG_STATE) {
+		if (attr->path_mig_state == IB_MIG_REARM) {
+			if (qp->s_mig_state == IB_MIG_ARMED)
+				goto inval;
+			if (new_state != IB_QPS_RTS)
+				goto inval;
+		} else if (attr->path_mig_state == IB_MIG_MIGRATED) {
+			if (qp->s_mig_state == IB_MIG_REARM)
+				goto inval;
+			if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
+				goto inval;
+			if (qp->s_mig_state == IB_MIG_ARMED)
+				mig = 1;
+		} else {
+			goto inval;
+		}
+	}
+
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+		if (attr->max_dest_rd_atomic > rdi->dparms.max_rdma_atomic)
+			goto inval;
+
+	switch (new_state) {
+	case IB_QPS_RESET:
+		if (qp->state != IB_QPS_RESET)
+			rvt_reset_qp(rdi, qp, ibqp->qp_type);
+		break;
+
+	case IB_QPS_RTR:
+		/* Allow event to re-trigger if QP set to RTR more than once */
+		qp->r_flags &= ~RVT_R_COMM_EST;
+		qp->state = new_state;
+		break;
+
+	case IB_QPS_SQD:
+		qp->s_draining = qp->s_last != qp->s_cur;
+		qp->state = new_state;
+		break;
+
+	case IB_QPS_SQE:
+		if (qp->ibqp.qp_type == IB_QPT_RC)
+			goto inval;
+		qp->state = new_state;
+		break;
+
+	case IB_QPS_ERR:
+		lastwqe = rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+		break;
+
+	default:
+		qp->state = new_state;
+		break;
+	}
+
+	if (attr_mask & IB_QP_PKEY_INDEX)
+		qp->s_pkey_index = attr->pkey_index;
+
+	if (attr_mask & IB_QP_PORT)
+		qp->port_num = attr->port_num;
+
+	if (attr_mask & IB_QP_DEST_QPN)
+		qp->remote_qpn = attr->dest_qp_num;
+
+	if (attr_mask & IB_QP_SQ_PSN) {
+		qp->s_next_psn = attr->sq_psn & rdi->dparms.psn_modify_mask;
+		qp->s_psn = qp->s_next_psn;
+		qp->s_sending_psn = qp->s_next_psn;
+		qp->s_last_psn = qp->s_next_psn - 1;
+		qp->s_sending_hpsn = qp->s_last_psn;
+	}
+
+	if (attr_mask & IB_QP_RQ_PSN)
+		qp->r_psn = attr->rq_psn & rdi->dparms.psn_modify_mask;
+
+	if (attr_mask & IB_QP_ACCESS_FLAGS)
+		qp->qp_access_flags = attr->qp_access_flags;
+
+	if (attr_mask & IB_QP_AV) {
+		qp->remote_ah_attr = attr->ah_attr;
+		qp->s_srate = attr->ah_attr.static_rate;
+		qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
+	}
+
+	if (attr_mask & IB_QP_ALT_PATH) {
+		qp->alt_ah_attr = attr->alt_ah_attr;
+		qp->s_alt_pkey_index = attr->alt_pkey_index;
+	}
+
+	if (attr_mask & IB_QP_PATH_MIG_STATE) {
+		qp->s_mig_state = attr->path_mig_state;
+		if (mig) {
+			qp->remote_ah_attr = qp->alt_ah_attr;
+			qp->port_num = qp->alt_ah_attr.port_num;
+			qp->s_pkey_index = qp->s_alt_pkey_index;
+		}
+	}
+
+	if (attr_mask & IB_QP_PATH_MTU) {
+		qp->pmtu = rdi->driver_f.mtu_from_qp(rdi, qp, pmtu);
+		qp->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu);
+		qp->log_pmtu = ilog2(qp->pmtu);
+	}
+
+	if (attr_mask & IB_QP_RETRY_CNT) {
+		qp->s_retry_cnt = attr->retry_cnt;
+		qp->s_retry = attr->retry_cnt;
+	}
+
+	if (attr_mask & IB_QP_RNR_RETRY) {
+		qp->s_rnr_retry_cnt = attr->rnr_retry;
+		qp->s_rnr_retry = attr->rnr_retry;
+	}
+
+	if (attr_mask & IB_QP_MIN_RNR_TIMER)
+		qp->r_min_rnr_timer = attr->min_rnr_timer;
+
+	if (attr_mask & IB_QP_TIMEOUT) {
+		qp->timeout = attr->timeout;
+		qp->timeout_jiffies =
+			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
+				1000UL);
+	}
+
+	if (attr_mask & IB_QP_QKEY)
+		qp->qkey = attr->qkey;
+
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+		qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
+
+	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+		qp->s_max_rd_atomic = attr->max_rd_atomic;
+
+	if (rdi->driver_f.modify_qp)
+		rdi->driver_f.modify_qp(qp, attr, attr_mask, udata);
+
+	spin_unlock(&qp->s_lock);
+	spin_unlock(&qp->s_hlock);
+	spin_unlock_irq(&qp->r_lock);
+
+	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+		rvt_insert_qp(rdi, qp);
+
+	if (lastwqe) {
+		ev.device = qp->ibqp.device;
+		ev.element.qp = &qp->ibqp;
+		ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+	}
+	if (mig) {
+		ev.device = qp->ibqp.device;
+		ev.element.qp = &qp->ibqp;
+		ev.event = IB_EVENT_PATH_MIG;
+		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+	}
+	return 0;
+
+inval:
+	spin_unlock(&qp->s_lock);
+	spin_unlock(&qp->s_hlock);
+	spin_unlock_irq(&qp->r_lock);
+	return -EINVAL;
+}
+
+/** rvt_free_qpn - Free a qpn from the bit map
+ * @qpt: QP table
+ * @qpn: queue pair number to free
+ */
+static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
+{
+	struct rvt_qpn_map *map;
+
+	map = qpt->map + qpn / RVT_BITS_PER_PAGE;
+	if (map->page)
+		clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
+}
+
+/**
+ * rvt_destroy_qp - destroy a queue pair
+ * @ibqp: the queue pair to destroy
+ *
+ * Note that this can be called while the QP is actively sending or
+ * receiving!
+ *
+ * Return: 0 on success.
+ */
+int rvt_destroy_qp(struct ib_qp *ibqp)
+{
+	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+	spin_lock_irq(&qp->r_lock);
+	spin_lock(&qp->s_hlock);
+	spin_lock(&qp->s_lock);
+	rvt_reset_qp(rdi, qp, ibqp->qp_type);
+	spin_unlock(&qp->s_lock);
+	spin_unlock(&qp->s_hlock);
+	spin_unlock_irq(&qp->r_lock);
+
+	/* qpn is now available for use again */
+	rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
+
+	spin_lock(&rdi->n_qps_lock);
+	rdi->n_qps_allocated--;
+	if (qp->ibqp.qp_type == IB_QPT_RC) {
+		rdi->n_rc_qps--;
+		rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL;
+	}
+	spin_unlock(&rdi->n_qps_lock);
+
+	if (qp->ip)
+		kref_put(&qp->ip->ref, rvt_release_mmap_info);
+	else
+		vfree(qp->r_rq.wq);
+	vfree(qp->s_wq);
+	rdi->driver_f.qp_priv_free(rdi, qp);
+	kfree(qp);
+	return 0;
+}
+
+/**
+ * rvt_query_qp - query an ipbq
+ * @ibqp: IB qp to query
+ * @attr: attr struct to fill in
+ * @attr_mask: attr mask ignored
+ * @init_attr: struct to fill in
+ *
+ * Return: always 0
+ */
+int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		 int attr_mask, struct ib_qp_init_attr *init_attr)
+{
+	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+	attr->qp_state = qp->state;
+	attr->cur_qp_state = attr->qp_state;
+	attr->path_mtu = qp->path_mtu;
+	attr->path_mig_state = qp->s_mig_state;
+	attr->qkey = qp->qkey;
+	attr->rq_psn = qp->r_psn & rdi->dparms.psn_mask;
+	attr->sq_psn = qp->s_next_psn & rdi->dparms.psn_mask;
+	attr->dest_qp_num = qp->remote_qpn;
+	attr->qp_access_flags = qp->qp_access_flags;
+	attr->cap.max_send_wr = qp->s_size - 1;
+	attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
+	attr->cap.max_send_sge = qp->s_max_sge;
+	attr->cap.max_recv_sge = qp->r_rq.max_sge;
+	attr->cap.max_inline_data = 0;
+	attr->ah_attr = qp->remote_ah_attr;
+	attr->alt_ah_attr = qp->alt_ah_attr;
+	attr->pkey_index = qp->s_pkey_index;
+	attr->alt_pkey_index = qp->s_alt_pkey_index;
+	attr->en_sqd_async_notify = 0;
+	attr->sq_draining = qp->s_draining;
+	attr->max_rd_atomic = qp->s_max_rd_atomic;
+	attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
+	attr->min_rnr_timer = qp->r_min_rnr_timer;
+	attr->port_num = qp->port_num;
+	attr->timeout = qp->timeout;
+	attr->retry_cnt = qp->s_retry_cnt;
+	attr->rnr_retry = qp->s_rnr_retry_cnt;
+	attr->alt_port_num = qp->alt_ah_attr.port_num;
+	attr->alt_timeout = qp->alt_timeout;
+
+	init_attr->event_handler = qp->ibqp.event_handler;
+	init_attr->qp_context = qp->ibqp.qp_context;
+	init_attr->send_cq = qp->ibqp.send_cq;
+	init_attr->recv_cq = qp->ibqp.recv_cq;
+	init_attr->srq = qp->ibqp.srq;
+	init_attr->cap = attr->cap;
+	if (qp->s_flags & RVT_S_SIGNAL_REQ_WR)
+		init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
+	else
+		init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
+	init_attr->qp_type = qp->ibqp.qp_type;
+	init_attr->port_num = qp->port_num;
+	return 0;
+}
+
+/**
+ * rvt_post_receive - post a receive on a QP
+ * @ibqp: the QP to post the receive on
+ * @wr: the WR to post
+ * @bad_wr: the first bad WR is put here
+ *
+ * This may be called from interrupt context.
+ *
+ * Return: 0 on success otherwise errno
+ */
+int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+		  struct ib_recv_wr **bad_wr)
+{
+	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+	struct rvt_rwq *wq = qp->r_rq.wq;
+	unsigned long flags;
+	int qp_err_flush = (ib_rvt_state_ops[qp->state] & RVT_FLUSH_RECV) &&
+				!qp->ibqp.srq;
+
+	/* Check that state is OK to post receive. */
+	if (!(ib_rvt_state_ops[qp->state] & RVT_POST_RECV_OK) || !wq) {
+		*bad_wr = wr;
+		return -EINVAL;
+	}
+
+	for (; wr; wr = wr->next) {
+		struct rvt_rwqe *wqe;
+		u32 next;
+		int i;
+
+		if ((unsigned)wr->num_sge > qp->r_rq.max_sge) {
+			*bad_wr = wr;
+			return -EINVAL;
+		}
+
+		spin_lock_irqsave(&qp->r_rq.lock, flags);
+		next = wq->head + 1;
+		if (next >= qp->r_rq.size)
+			next = 0;
+		if (next == wq->tail) {
+			spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+			*bad_wr = wr;
+			return -ENOMEM;
+		}
+		if (unlikely(qp_err_flush)) {
+			struct ib_wc wc;
+
+			memset(&wc, 0, sizeof(wc));
+			wc.qp = &qp->ibqp;
+			wc.opcode = IB_WC_RECV;
+			wc.wr_id = wr->wr_id;
+			wc.status = IB_WC_WR_FLUSH_ERR;
+			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
+		} else {
+			wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head);
+			wqe->wr_id = wr->wr_id;
+			wqe->num_sge = wr->num_sge;
+			for (i = 0; i < wr->num_sge; i++)
+				wqe->sg_list[i] = wr->sg_list[i];
+			/*
+			 * Make sure queue entry is written
+			 * before the head index.
+			 */
+			smp_wmb();
+			wq->head = next;
+		}
+		spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+	}
+	return 0;
+}
+
+/**
+ * qp_get_savail - return number of avail send entries
+ *
+ * @qp - the qp
+ *
+ * This assumes the s_hlock is held but the s_last
+ * qp variable is uncontrolled.
+ */
+static inline u32 qp_get_savail(struct rvt_qp *qp)
+{
+	u32 slast;
+	u32 ret;
+
+	smp_read_barrier_depends(); /* see rc.c */
+	slast = ACCESS_ONCE(qp->s_last);
+	if (qp->s_head >= slast)
+		ret = qp->s_size - (qp->s_head - slast);
+	else
+		ret = slast - qp->s_head;
+	return ret - 1;
+}
+
+/**
+ * rvt_post_one_wr - post one RC, UC, or UD send work request
+ * @qp: the QP to post on
+ * @wr: the work request to send
+ */
+static int rvt_post_one_wr(struct rvt_qp *qp,
+			   struct ib_send_wr *wr,
+			   int *call_send)
+{
+	struct rvt_swqe *wqe;
+	u32 next;
+	int i;
+	int j;
+	int acc;
+	struct rvt_lkey_table *rkt;
+	struct rvt_pd *pd;
+	struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+	u8 log_pmtu;
+	int ret;
+
+	/* IB spec says that num_sge == 0 is OK. */
+	if (unlikely(wr->num_sge > qp->s_max_sge))
+		return -EINVAL;
+
+	/*
+	 * Don't allow RDMA reads or atomic operations on UC or
+	 * undefined operations.
+	 * Make sure buffer is large enough to hold the result for atomics.
+	 */
+	if (qp->ibqp.qp_type == IB_QPT_UC) {
+		if ((unsigned)wr->opcode >= IB_WR_RDMA_READ)
+			return -EINVAL;
+	} else if (qp->ibqp.qp_type != IB_QPT_RC) {
+		/* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
+		if (wr->opcode != IB_WR_SEND &&
+		    wr->opcode != IB_WR_SEND_WITH_IMM)
+			return -EINVAL;
+		/* Check UD destination address PD */
+		if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
+			return -EINVAL;
+	} else if ((unsigned)wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) {
+		return -EINVAL;
+	} else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
+		   (wr->num_sge == 0 ||
+		    wr->sg_list[0].length < sizeof(u64) ||
+		    wr->sg_list[0].addr & (sizeof(u64) - 1))) {
+		return -EINVAL;
+	} else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) {
+		return -EINVAL;
+	}
+	/* check for avail */
+	if (unlikely(!qp->s_avail)) {
+		qp->s_avail = qp_get_savail(qp);
+		if (WARN_ON(qp->s_avail > (qp->s_size - 1)))
+			rvt_pr_err(rdi,
+				   "More avail entries than QP RB size.\nQP: %u, size: %u, avail: %u\nhead: %u, tail: %u, cur: %u, acked: %u, last: %u",
+				   qp->ibqp.qp_num, qp->s_size, qp->s_avail,
+				   qp->s_head, qp->s_tail, qp->s_cur,
+				   qp->s_acked, qp->s_last);
+		if (!qp->s_avail)
+			return -ENOMEM;
+	}
+	next = qp->s_head + 1;
+	if (next >= qp->s_size)
+		next = 0;
+
+	rkt = &rdi->lkey_table;
+	pd = ibpd_to_rvtpd(qp->ibqp.pd);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_head);
+
+	if (qp->ibqp.qp_type != IB_QPT_UC &&
+	    qp->ibqp.qp_type != IB_QPT_RC)
+		memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
+	else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+		 wr->opcode == IB_WR_RDMA_WRITE ||
+		 wr->opcode == IB_WR_RDMA_READ)
+		memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
+	else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+		 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+		memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
+	else
+		memcpy(&wqe->wr, wr, sizeof(wqe->wr));
+
+	wqe->length = 0;
+	j = 0;
+	if (wr->num_sge) {
+		acc = wr->opcode >= IB_WR_RDMA_READ ?
+			IB_ACCESS_LOCAL_WRITE : 0;
+		for (i = 0; i < wr->num_sge; i++) {
+			u32 length = wr->sg_list[i].length;
+			int ok;
+
+			if (length == 0)
+				continue;
+			ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j],
+					 &wr->sg_list[i], acc);
+			if (!ok) {
+				ret = -EINVAL;
+				goto bail_inval_free;
+			}
+			wqe->length += length;
+			j++;
+		}
+		wqe->wr.num_sge = j;
+	}
+
+	/* general part of wqe valid - allow for driver checks */
+	if (rdi->driver_f.check_send_wqe) {
+		ret = rdi->driver_f.check_send_wqe(qp, wqe);
+		if (ret < 0)
+			goto bail_inval_free;
+		if (ret)
+			*call_send = ret;
+	}
+
+	log_pmtu = qp->log_pmtu;
+	if (qp->ibqp.qp_type != IB_QPT_UC &&
+	    qp->ibqp.qp_type != IB_QPT_RC) {
+		struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah);
+
+		log_pmtu = ah->log_pmtu;
+		atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
+	}
+
+	wqe->ssn = qp->s_ssn++;
+	wqe->psn = qp->s_next_psn;
+	wqe->lpsn = wqe->psn +
+			(wqe->length ? ((wqe->length - 1) >> log_pmtu) : 0);
+	qp->s_next_psn = wqe->lpsn + 1;
+	trace_rvt_post_one_wr(qp, wqe);
+	smp_wmb(); /* see request builders */
+	qp->s_avail--;
+	qp->s_head = next;
+
+	return 0;
+
+bail_inval_free:
+	/* release mr holds */
+	while (j) {
+		struct rvt_sge *sge = &wqe->sg_list[--j];
+
+		rvt_put_mr(sge->mr);
+	}
+	return ret;
+}
+
+/**
+ * rvt_post_send - post a send on a QP
+ * @ibqp: the QP to post the send on
+ * @wr: the list of work requests to post
+ * @bad_wr: the first bad WR is put here
+ *
+ * This may be called from interrupt context.
+ *
+ * Return: 0 on success else errno
+ */
+int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+		  struct ib_send_wr **bad_wr)
+{
+	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+	unsigned long flags = 0;
+	int call_send;
+	unsigned nreq = 0;
+	int err = 0;
+
+	spin_lock_irqsave(&qp->s_hlock, flags);
+
+	/*
+	 * Ensure QP state is such that we can send. If not bail out early,
+	 * there is no need to do this every time we post a send.
+	 */
+	if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) {
+		spin_unlock_irqrestore(&qp->s_hlock, flags);
+		return -EINVAL;
+	}
+
+	/*
+	 * If the send queue is empty, and we only have a single WR then just go
+	 * ahead and kick the send engine into gear. Otherwise we will always
+	 * just schedule the send to happen later.
+	 */
+	call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next;
+
+	for (; wr; wr = wr->next) {
+		err = rvt_post_one_wr(qp, wr, &call_send);
+		if (unlikely(err)) {
+			*bad_wr = wr;
+			goto bail;
+		}
+		nreq++;
+	}
+bail:
+	spin_unlock_irqrestore(&qp->s_hlock, flags);
+	if (nreq) {
+		if (call_send)
+			rdi->driver_f.schedule_send_no_lock(qp);
+		else
+			rdi->driver_f.do_send(qp);
+	}
+	return err;
+}
+
+/**
+ * rvt_post_srq_receive - post a receive on a shared receive queue
+ * @ibsrq: the SRQ to post the receive on
+ * @wr: the list of work requests to post
+ * @bad_wr: A pointer to the first WR to cause a problem is put here
+ *
+ * This may be called from interrupt context.
+ *
+ * Return: 0 on success else errno
+ */
+int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+		      struct ib_recv_wr **bad_wr)
+{
+	struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
+	struct rvt_rwq *wq;
+	unsigned long flags;
+
+	for (; wr; wr = wr->next) {
+		struct rvt_rwqe *wqe;
+		u32 next;
+		int i;
+
+		if ((unsigned)wr->num_sge > srq->rq.max_sge) {
+			*bad_wr = wr;
+			return -EINVAL;
+		}
+
+		spin_lock_irqsave(&srq->rq.lock, flags);
+		wq = srq->rq.wq;
+		next = wq->head + 1;
+		if (next >= srq->rq.size)
+			next = 0;
+		if (next == wq->tail) {
+			spin_unlock_irqrestore(&srq->rq.lock, flags);
+			*bad_wr = wr;
+			return -ENOMEM;
+		}
+
+		wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head);
+		wqe->wr_id = wr->wr_id;
+		wqe->num_sge = wr->num_sge;
+		for (i = 0; i < wr->num_sge; i++)
+			wqe->sg_list[i] = wr->sg_list[i];
+		/* Make sure queue entry is written before the head index. */
+		smp_wmb();
+		wq->head = next;
+		spin_unlock_irqrestore(&srq->rq.lock, flags);
+	}
+	return 0;
+}

diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h
new file mode 100644
index 0000000..8409f80
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/qp.h

@@ -0,0 +1,69 @@
+#ifndef DEF_RVTQP_H
+#define DEF_RVTQP_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+int rvt_driver_qp_init(struct rvt_dev_info *rdi);
+void rvt_qp_exit(struct rvt_dev_info *rdi);
+struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
+			    struct ib_qp_init_attr *init_attr,
+			    struct ib_udata *udata);
+int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		  int attr_mask, struct ib_udata *udata);
+int rvt_destroy_qp(struct ib_qp *ibqp);
+int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		 int attr_mask, struct ib_qp_init_attr *init_attr);
+int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+		  struct ib_recv_wr **bad_wr);
+int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+		  struct ib_send_wr **bad_wr);
+int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+		      struct ib_recv_wr **bad_wr);
+#endif          /* DEF_RVTQP_H */

diff --git a/drivers/staging/rdma/hfi1/srq.c b/drivers/infiniband/sw/rdmavt/srq.c
similarity index 64%
rename from drivers/staging/rdma/hfi1/srq.c
rename to drivers/infiniband/sw/rdmavt/srq.c
index 67786d4..f7c48e90 100644
--- a/drivers/staging/rdma/hfi1/srq.c
+++ b/drivers/infiniband/sw/rdmavt/srq.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -52,96 +49,50 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 
-#include "verbs.h"
+#include "srq.h"
+#include "vt.h"
 
 /**
- * hfi1_post_srq_receive - post a receive on a shared receive queue
- * @ibsrq: the SRQ to post the receive on
- * @wr: the list of work requests to post
- * @bad_wr: A pointer to the first WR to cause a problem is put here
+ * rvt_driver_srq_init - init srq resources on a per driver basis
+ * @rdi: rvt dev structure
  *
- * This may be called from interrupt context.
+ * Do any initialization needed when a driver registers with rdmavt.
  */
-int hfi1_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			  struct ib_recv_wr **bad_wr)
+void rvt_driver_srq_init(struct rvt_dev_info *rdi)
 {
-	struct hfi1_srq *srq = to_isrq(ibsrq);
-	struct hfi1_rwq *wq;
-	unsigned long flags;
-	int ret;
-
-	for (; wr; wr = wr->next) {
-		struct hfi1_rwqe *wqe;
-		u32 next;
-		int i;
-
-		if ((unsigned) wr->num_sge > srq->rq.max_sge) {
-			*bad_wr = wr;
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		spin_lock_irqsave(&srq->rq.lock, flags);
-		wq = srq->rq.wq;
-		next = wq->head + 1;
-		if (next >= srq->rq.size)
-			next = 0;
-		if (next == wq->tail) {
-			spin_unlock_irqrestore(&srq->rq.lock, flags);
-			*bad_wr = wr;
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		wqe = get_rwqe_ptr(&srq->rq, wq->head);
-		wqe->wr_id = wr->wr_id;
-		wqe->num_sge = wr->num_sge;
-		for (i = 0; i < wr->num_sge; i++)
-			wqe->sg_list[i] = wr->sg_list[i];
-		/* Make sure queue entry is written before the head index. */
-		smp_wmb();
-		wq->head = next;
-		spin_unlock_irqrestore(&srq->rq.lock, flags);
-	}
-	ret = 0;
-
-bail:
-	return ret;
+	spin_lock_init(&rdi->n_srqs_lock);
+	rdi->n_srqs_allocated = 0;
 }
 
 /**
- * hfi1_create_srq - create a shared receive queue
+ * rvt_create_srq - create a shared receive queue
  * @ibpd: the protection domain of the SRQ to create
  * @srq_init_attr: the attributes of the SRQ
  * @udata: data from libibverbs when creating a user SRQ
+ *
+ * Return: Allocated srq object
  */
-struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd,
-			       struct ib_srq_init_attr *srq_init_attr,
-			       struct ib_udata *udata)
+struct ib_srq *rvt_create_srq(struct ib_pd *ibpd,
+			      struct ib_srq_init_attr *srq_init_attr,
+			      struct ib_udata *udata)
 {
-	struct hfi1_ibdev *dev = to_idev(ibpd->device);
-	struct hfi1_srq *srq;
+	struct rvt_dev_info *dev = ib_to_rvt(ibpd->device);
+	struct rvt_srq *srq;
 	u32 sz;
 	struct ib_srq *ret;
 
-	if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
-		ret = ERR_PTR(-ENOSYS);
-		goto done;
-	}
+	if (srq_init_attr->srq_type != IB_SRQT_BASIC)
+		return ERR_PTR(-ENOSYS);
 
 	if (srq_init_attr->attr.max_sge == 0 ||
-	    srq_init_attr->attr.max_sge > hfi1_max_srq_sges ||
+	    srq_init_attr->attr.max_sge > dev->dparms.props.max_srq_sge ||
 	    srq_init_attr->attr.max_wr == 0 ||
-	    srq_init_attr->attr.max_wr > hfi1_max_srq_wrs) {
-		ret = ERR_PTR(-EINVAL);
-		goto done;
-	}
+	    srq_init_attr->attr.max_wr > dev->dparms.props.max_srq_wr)
+		return ERR_PTR(-EINVAL);
 
 	srq = kmalloc(sizeof(*srq), GFP_KERNEL);
-	if (!srq) {
-		ret = ERR_PTR(-ENOMEM);
-		goto done;
-	}
+	if (!srq)
+		return ERR_PTR(-ENOMEM);
 
 	/*
 	 * Need to use vmalloc() if we want to support large #s of entries.
@@ -149,8 +100,8 @@
 	srq->rq.size = srq_init_attr->attr.max_wr + 1;
 	srq->rq.max_sge = srq_init_attr->attr.max_sge;
 	sz = sizeof(struct ib_sge) * srq->rq.max_sge +
-		sizeof(struct hfi1_rwqe);
-	srq->rq.wq = vmalloc_user(sizeof(struct hfi1_rwq) + srq->rq.size * sz);
+		sizeof(struct rvt_rwqe);
+	srq->rq.wq = vmalloc_user(sizeof(struct rvt_rwq) + srq->rq.size * sz);
 	if (!srq->rq.wq) {
 		ret = ERR_PTR(-ENOMEM);
 		goto bail_srq;
@@ -158,15 +109,15 @@
 
 	/*
 	 * Return the address of the RWQ as the offset to mmap.
-	 * See hfi1_mmap() for details.
+	 * See rvt_mmap() for details.
 	 */
 	if (udata && udata->outlen >= sizeof(__u64)) {
 		int err;
-		u32 s = sizeof(struct hfi1_rwq) + srq->rq.size * sz;
+		u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz;
 
 		srq->ip =
-		    hfi1_create_mmap_info(dev, s, ibpd->uobject->context,
-					  srq->rq.wq);
+		    rvt_create_mmap_info(dev, s, ibpd->uobject->context,
+					 srq->rq.wq);
 		if (!srq->ip) {
 			ret = ERR_PTR(-ENOMEM);
 			goto bail_wq;
@@ -178,8 +129,9 @@
 			ret = ERR_PTR(err);
 			goto bail_ip;
 		}
-	} else
+	} else {
 		srq->ip = NULL;
+	}
 
 	/*
 	 * ib_create_srq() will initialize srq->ibsrq.
@@ -190,7 +142,7 @@
 	srq->limit = srq_init_attr->attr.srq_limit;
 
 	spin_lock(&dev->n_srqs_lock);
-	if (dev->n_srqs_allocated == hfi1_max_srqs) {
+	if (dev->n_srqs_allocated == dev->dparms.props.max_srq) {
 		spin_unlock(&dev->n_srqs_lock);
 		ret = ERR_PTR(-ENOMEM);
 		goto bail_ip;
@@ -205,8 +157,7 @@
 		spin_unlock_irq(&dev->pending_lock);
 	}
 
-	ret = &srq->ibsrq;
-	goto done;
+	return &srq->ibsrq;
 
 bail_ip:
 	kfree(srq->ip);
@@ -214,46 +165,44 @@
 	vfree(srq->rq.wq);
 bail_srq:
 	kfree(srq);
-done:
 	return ret;
 }
 
 /**
- * hfi1_modify_srq - modify a shared receive queue
+ * rvt_modify_srq - modify a shared receive queue
  * @ibsrq: the SRQ to modify
  * @attr: the new attributes of the SRQ
  * @attr_mask: indicates which attributes to modify
  * @udata: user data for libibverbs.so
+ *
+ * Return: 0 on success
  */
-int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		    enum ib_srq_attr_mask attr_mask,
-		    struct ib_udata *udata)
+int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+		   enum ib_srq_attr_mask attr_mask,
+		   struct ib_udata *udata)
 {
-	struct hfi1_srq *srq = to_isrq(ibsrq);
-	struct hfi1_rwq *wq;
+	struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
+	struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device);
+	struct rvt_rwq *wq;
 	int ret = 0;
 
 	if (attr_mask & IB_SRQ_MAX_WR) {
-		struct hfi1_rwq *owq;
-		struct hfi1_rwqe *p;
+		struct rvt_rwq *owq;
+		struct rvt_rwqe *p;
 		u32 sz, size, n, head, tail;
 
 		/* Check that the requested sizes are below the limits. */
-		if ((attr->max_wr > hfi1_max_srq_wrs) ||
+		if ((attr->max_wr > dev->dparms.props.max_srq_wr) ||
 		    ((attr_mask & IB_SRQ_LIMIT) ?
-		     attr->srq_limit : srq->limit) > attr->max_wr) {
-			ret = -EINVAL;
-			goto bail;
-		}
+		     attr->srq_limit : srq->limit) > attr->max_wr)
+			return -EINVAL;
 
-		sz = sizeof(struct hfi1_rwqe) +
+		sz = sizeof(struct rvt_rwqe) +
 			srq->rq.max_sge * sizeof(struct ib_sge);
 		size = attr->max_wr + 1;
-		wq = vmalloc_user(sizeof(struct hfi1_rwq) + size * sz);
-		if (!wq) {
-			ret = -ENOMEM;
-			goto bail;
-		}
+		wq = vmalloc_user(sizeof(struct rvt_rwq) + size * sz);
+		if (!wq)
+			return -ENOMEM;
 
 		/* Check that we can write the offset to mmap. */
 		if (udata && udata->inlen >= sizeof(__u64)) {
@@ -264,8 +213,8 @@
 						 sizeof(offset_addr));
 			if (ret)
 				goto bail_free;
-			udata->outbuf =
-				(void __user *) (unsigned long) offset_addr;
+			udata->outbuf = (void __user *)
+					(unsigned long)offset_addr;
 			ret = ib_copy_to_udata(udata, &offset,
 					       sizeof(offset));
 			if (ret)
@@ -296,16 +245,16 @@
 		n = 0;
 		p = wq->wq;
 		while (tail != head) {
-			struct hfi1_rwqe *wqe;
+			struct rvt_rwqe *wqe;
 			int i;
 
-			wqe = get_rwqe_ptr(&srq->rq, tail);
+			wqe = rvt_get_rwqe_ptr(&srq->rq, tail);
 			p->wr_id = wqe->wr_id;
 			p->num_sge = wqe->num_sge;
 			for (i = 0; i < wqe->num_sge; i++)
 				p->sg_list[i] = wqe->sg_list[i];
 			n++;
-			p = (struct hfi1_rwqe *)((char *)p + sz);
+			p = (struct rvt_rwqe *)((char *)p + sz);
 			if (++tail >= srq->rq.size)
 				tail = 0;
 		}
@@ -320,21 +269,21 @@
 		vfree(owq);
 
 		if (srq->ip) {
-			struct hfi1_mmap_info *ip = srq->ip;
-			struct hfi1_ibdev *dev = to_idev(srq->ibsrq.device);
-			u32 s = sizeof(struct hfi1_rwq) + size * sz;
+			struct rvt_mmap_info *ip = srq->ip;
+			struct rvt_dev_info *dev = ib_to_rvt(srq->ibsrq.device);
+			u32 s = sizeof(struct rvt_rwq) + size * sz;
 
-			hfi1_update_mmap_info(dev, ip, s, wq);
+			rvt_update_mmap_info(dev, ip, s, wq);
 
 			/*
 			 * Return the offset to mmap.
-			 * See hfi1_mmap() for details.
+			 * See rvt_mmap() for details.
 			 */
 			if (udata && udata->inlen >= sizeof(__u64)) {
 				ret = ib_copy_to_udata(udata, &ip->offset,
 						       sizeof(ip->offset));
 				if (ret)
-					goto bail;
+					return ret;
 			}
 
 			/*
@@ -355,19 +304,24 @@
 			srq->limit = attr->srq_limit;
 		spin_unlock_irq(&srq->rq.lock);
 	}
-	goto bail;
+	return ret;
 
 bail_unlock:
 	spin_unlock_irq(&srq->rq.lock);
 bail_free:
 	vfree(wq);
-bail:
 	return ret;
 }
 
-int hfi1_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
+/** rvt_query_srq - query srq data
+ * @ibsrq: srq to query
+ * @attr: return info in attr
+ *
+ * Return: always 0
+ */
+int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
 {
-	struct hfi1_srq *srq = to_isrq(ibsrq);
+	struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
 
 	attr->max_wr = srq->rq.size - 1;
 	attr->max_sge = srq->rq.max_sge;
@@ -376,19 +330,21 @@
 }
 
 /**
- * hfi1_destroy_srq - destroy a shared receive queue
- * @ibsrq: the SRQ to destroy
+ * rvt_destroy_srq - destory an srq
+ * @ibsrq: srq object to destroy
+ *
+ * Return always 0
  */
-int hfi1_destroy_srq(struct ib_srq *ibsrq)
+int rvt_destroy_srq(struct ib_srq *ibsrq)
 {
-	struct hfi1_srq *srq = to_isrq(ibsrq);
-	struct hfi1_ibdev *dev = to_idev(ibsrq->device);
+	struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
+	struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device);
 
 	spin_lock(&dev->n_srqs_lock);
 	dev->n_srqs_allocated--;
 	spin_unlock(&dev->n_srqs_lock);
 	if (srq->ip)
-		kref_put(&srq->ip->ref, hfi1_release_mmap_info);
+		kref_put(&srq->ip->ref, rvt_release_mmap_info);
 	else
 		vfree(srq->rq.wq);
 	kfree(srq);

diff --git a/drivers/infiniband/sw/rdmavt/srq.h b/drivers/infiniband/sw/rdmavt/srq.h
new file mode 100644
index 0000000..bf0eaaf
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/srq.h

@@ -0,0 +1,62 @@
+#ifndef DEF_RVTSRQ_H
+#define DEF_RVTSRQ_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+void rvt_driver_srq_init(struct rvt_dev_info *rdi);
+struct ib_srq *rvt_create_srq(struct ib_pd *ibpd,
+			      struct ib_srq_init_attr *srq_init_attr,
+			      struct ib_udata *udata);
+int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+		   enum ib_srq_attr_mask attr_mask,
+		   struct ib_udata *udata);
+int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
+int rvt_destroy_srq(struct ib_srq *ibsrq);
+
+#endif          /* DEF_RVTSRQ_H */

diff --git a/drivers/infiniband/sw/rdmavt/trace.c b/drivers/infiniband/sw/rdmavt/trace.c
new file mode 100644
index 0000000..d593285
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace.c

@@ -0,0 +1,49 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"

diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h
new file mode 100644
index 0000000..6c0457d
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace.h

@@ -0,0 +1,187 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#undef TRACE_SYSTEM_VAR
+#define TRACE_SYSTEM_VAR rdmavt
+
+#if !defined(__RDMAVT_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RDMAVT_TRACE_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+
+#define RDI_DEV_ENTRY(rdi)   __string(dev, rdi->driver_f.get_card_name(rdi))
+#define RDI_DEV_ASSIGN(rdi)  __assign_str(dev, rdi->driver_f.get_card_name(rdi))
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rdmavt
+
+TRACE_EVENT(rvt_dbg,
+	TP_PROTO(struct rvt_dev_info *rdi,
+		 const char *msg),
+	TP_ARGS(rdi, msg),
+	TP_STRUCT__entry(
+		RDI_DEV_ENTRY(rdi)
+		__string(msg, msg)
+	),
+	TP_fast_assign(
+		RDI_DEV_ASSIGN(rdi);
+		__assign_str(msg, msg);
+	),
+	TP_printk("[%s]: %s", __get_str(dev), __get_str(msg))
+);
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_qphash
+DECLARE_EVENT_CLASS(rvt_qphash_template,
+	TP_PROTO(struct rvt_qp *qp, u32 bucket),
+	TP_ARGS(qp, bucket),
+	TP_STRUCT__entry(
+		RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+		__field(u32, qpn)
+		__field(u32, bucket)
+	),
+	TP_fast_assign(
+		RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+		__entry->qpn = qp->ibqp.qp_num;
+		__entry->bucket = bucket;
+	),
+	TP_printk(
+		"[%s] qpn 0x%x bucket %u",
+		__get_str(dev),
+		__entry->qpn,
+		__entry->bucket
+	)
+);
+
+DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert,
+	TP_PROTO(struct rvt_qp *qp, u32 bucket),
+	TP_ARGS(qp, bucket));
+
+DEFINE_EVENT(rvt_qphash_template, rvt_qpremove,
+	TP_PROTO(struct rvt_qp *qp, u32 bucket),
+	TP_ARGS(qp, bucket));
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_tx
+
+#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode  }
+#define show_wr_opcode(opcode)                             \
+__print_symbolic(opcode,                                   \
+	wr_opcode_name(RDMA_WRITE),                        \
+	wr_opcode_name(RDMA_WRITE_WITH_IMM),               \
+	wr_opcode_name(SEND),                              \
+	wr_opcode_name(SEND_WITH_IMM),                     \
+	wr_opcode_name(RDMA_READ),                         \
+	wr_opcode_name(ATOMIC_CMP_AND_SWP),                \
+	wr_opcode_name(ATOMIC_FETCH_AND_ADD),              \
+	wr_opcode_name(LSO),                               \
+	wr_opcode_name(SEND_WITH_INV),                     \
+	wr_opcode_name(RDMA_READ_WITH_INV),                \
+	wr_opcode_name(LOCAL_INV),                         \
+	wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP),         \
+	wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD))
+
+#define POS_PRN \
+"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u"
+
+TRACE_EVENT(
+	rvt_post_one_wr,
+	TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe),
+	TP_ARGS(qp, wqe),
+	TP_STRUCT__entry(
+		RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+		__field(u64, wr_id)
+		__field(u32, qpn)
+		__field(u32, psn)
+		__field(u32, lpsn)
+		__field(u32, length)
+		__field(u32, opcode)
+		__field(u32, size)
+		__field(u32, avail)
+		__field(u32, head)
+		__field(u32, last)
+	),
+	TP_fast_assign(
+		RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+		__entry->wr_id = wqe->wr.wr_id;
+		__entry->qpn = qp->ibqp.qp_num;
+		__entry->psn = wqe->psn;
+		__entry->lpsn = wqe->lpsn;
+		__entry->length = wqe->length;
+		__entry->opcode = wqe->wr.opcode;
+		__entry->size = qp->s_size;
+		__entry->avail = qp->s_avail;
+		__entry->head = qp->s_head;
+		__entry->last = qp->s_last;
+	),
+	TP_printk(
+		POS_PRN,
+		__get_str(dev),
+		__entry->wr_id,
+		__entry->qpn,
+		__entry->psn,
+		__entry->lpsn,
+		__entry->length,
+		__entry->opcode, show_wr_opcode(__entry->opcode),
+		__entry->size,
+		__entry->avail,
+		__entry->head,
+		__entry->last
+	)
+);
+
+#endif /* __RDMAVT_TRACE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>

diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
new file mode 100644
index 0000000..6caf527
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/vt.c

@@ -0,0 +1,873 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include "vt.h"
+#include "trace.h"
+
+#define RVT_UVERBS_ABI_VERSION 2
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("RDMA Verbs Transport Library");
+
+static int rvt_init(void)
+{
+	/*
+	 * rdmavt does not need to do anything special when it starts up. All it
+	 * needs to do is sit and wait until a driver attempts registration.
+	 */
+	return 0;
+}
+module_init(rvt_init);
+
+static void rvt_cleanup(void)
+{
+	/*
+	 * Nothing to do at exit time either. The module won't be able to be
+	 * removed until all drivers are gone which means all the dev structs
+	 * are gone so there is really nothing to do.
+	 */
+}
+module_exit(rvt_cleanup);
+
+/**
+ * rvt_alloc_device - allocate rdi
+ * @size: how big of a structure to allocate
+ * @nports: number of ports to allocate array slots for
+ *
+ * Use IB core device alloc to allocate space for the rdi which is assumed to be
+ * inside of the ib_device. Any extra space that drivers require should be
+ * included in size.
+ *
+ * We also allocate a port array based on the number of ports.
+ *
+ * Return: pointer to allocated rdi
+ */
+struct rvt_dev_info *rvt_alloc_device(size_t size, int nports)
+{
+	struct rvt_dev_info *rdi = ERR_PTR(-ENOMEM);
+
+	rdi = (struct rvt_dev_info *)ib_alloc_device(size);
+	if (!rdi)
+		return rdi;
+
+	rdi->ports = kcalloc(nports,
+			     sizeof(struct rvt_ibport **),
+			     GFP_KERNEL);
+	if (!rdi->ports)
+		ib_dealloc_device(&rdi->ibdev);
+
+	return rdi;
+}
+EXPORT_SYMBOL(rvt_alloc_device);
+
+static int rvt_query_device(struct ib_device *ibdev,
+			    struct ib_device_attr *props,
+			    struct ib_udata *uhw)
+{
+	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+
+	if (uhw->inlen || uhw->outlen)
+		return -EINVAL;
+	/*
+	 * Return rvt_dev_info.dparms.props contents
+	 */
+	*props = rdi->dparms.props;
+	return 0;
+}
+
+static int rvt_modify_device(struct ib_device *device,
+			     int device_modify_mask,
+			     struct ib_device_modify *device_modify)
+{
+	/*
+	 * There is currently no need to supply this based on qib and hfi1.
+	 * Future drivers may need to implement this though.
+	 */
+
+	return -EOPNOTSUPP;
+}
+
+/**
+ * rvt_query_port: Passes the query port call to the driver
+ * @ibdev: Verbs IB dev
+ * @port_num: port number, 1 based from ib core
+ * @props: structure to hold returned properties
+ *
+ * Return: 0 on success
+ */
+static int rvt_query_port(struct ib_device *ibdev, u8 port_num,
+			  struct ib_port_attr *props)
+{
+	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+	struct rvt_ibport *rvp;
+	int port_index = ibport_num_to_idx(ibdev, port_num);
+
+	if (port_index < 0)
+		return -EINVAL;
+
+	rvp = rdi->ports[port_index];
+	memset(props, 0, sizeof(*props));
+	props->sm_lid = rvp->sm_lid;
+	props->sm_sl = rvp->sm_sl;
+	props->port_cap_flags = rvp->port_cap_flags;
+	props->max_msg_sz = 0x80000000;
+	props->pkey_tbl_len = rvt_get_npkeys(rdi);
+	props->bad_pkey_cntr = rvp->pkey_violations;
+	props->qkey_viol_cntr = rvp->qkey_violations;
+	props->subnet_timeout = rvp->subnet_timeout;
+	props->init_type_reply = 0;
+
+	/* Populate the remaining ib_port_attr elements */
+	return rdi->driver_f.query_port_state(rdi, port_num, props);
+}
+
+/**
+ * rvt_modify_port
+ * @ibdev: Verbs IB dev
+ * @port_num: Port number, 1 based from ib core
+ * @port_modify_mask: How to change the port
+ * @props: Structure to fill in
+ *
+ * Return: 0 on success
+ */
+static int rvt_modify_port(struct ib_device *ibdev, u8 port_num,
+			   int port_modify_mask, struct ib_port_modify *props)
+{
+	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+	struct rvt_ibport *rvp;
+	int ret = 0;
+	int port_index = ibport_num_to_idx(ibdev, port_num);
+
+	if (port_index < 0)
+		return -EINVAL;
+
+	rvp = rdi->ports[port_index];
+	rvp->port_cap_flags |= props->set_port_cap_mask;
+	rvp->port_cap_flags &= ~props->clr_port_cap_mask;
+
+	if (props->set_port_cap_mask || props->clr_port_cap_mask)
+		rdi->driver_f.cap_mask_chg(rdi, port_num);
+	if (port_modify_mask & IB_PORT_SHUTDOWN)
+		ret = rdi->driver_f.shut_down_port(rdi, port_num);
+	if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
+		rvp->qkey_violations = 0;
+
+	return ret;
+}
+
+/**
+ * rvt_query_pkey - Return a pkey from the table at a given index
+ * @ibdev: Verbs IB dev
+ * @port_num: Port number, 1 based from ib core
+ * @intex: Index into pkey table
+ *
+ * Return: 0 on failure pkey otherwise
+ */
+static int rvt_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index,
+			  u16 *pkey)
+{
+	/*
+	 * Driver will be responsible for keeping rvt_dev_info.pkey_table up to
+	 * date. This function will just return that value. There is no need to
+	 * lock, if a stale value is read and sent to the user so be it there is
+	 * no way to protect against that anyway.
+	 */
+	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+	int port_index;
+
+	port_index = ibport_num_to_idx(ibdev, port_num);
+	if (port_index < 0)
+		return -EINVAL;
+
+	if (index >= rvt_get_npkeys(rdi))
+		return -EINVAL;
+
+	*pkey = rvt_get_pkey(rdi, port_index, index);
+	return 0;
+}
+
+/**
+ * rvt_query_gid - Return a gid from the table
+ * @ibdev: Verbs IB dev
+ * @port_num: Port number, 1 based from ib core
+ * @index: = Index in table
+ * @gid: Gid to return
+ *
+ * Return: 0 on success
+ */
+static int rvt_query_gid(struct ib_device *ibdev, u8 port_num,
+			 int guid_index, union ib_gid *gid)
+{
+	struct rvt_dev_info *rdi;
+	struct rvt_ibport *rvp;
+	int port_index;
+
+	/*
+	 * Driver is responsible for updating the guid table. Which will be used
+	 * to craft the return value. This will work similar to how query_pkey()
+	 * is being done.
+	 */
+	port_index = ibport_num_to_idx(ibdev, port_num);
+	if (port_index < 0)
+		return -EINVAL;
+
+	rdi = ib_to_rvt(ibdev);
+	rvp = rdi->ports[port_index];
+
+	gid->global.subnet_prefix = rvp->gid_prefix;
+
+	return rdi->driver_f.get_guid_be(rdi, rvp, guid_index,
+					 &gid->global.interface_id);
+}
+
+struct rvt_ucontext {
+	struct ib_ucontext ibucontext;
+};
+
+static inline struct rvt_ucontext *to_iucontext(struct ib_ucontext
+						*ibucontext)
+{
+	return container_of(ibucontext, struct rvt_ucontext, ibucontext);
+}
+
+/**
+ * rvt_alloc_ucontext - Allocate a user context
+ * @ibdev: Vers IB dev
+ * @data: User data allocated
+ */
+static struct ib_ucontext *rvt_alloc_ucontext(struct ib_device *ibdev,
+					      struct ib_udata *udata)
+{
+	struct rvt_ucontext *context;
+
+	context = kmalloc(sizeof(*context), GFP_KERNEL);
+	if (!context)
+		return ERR_PTR(-ENOMEM);
+	return &context->ibucontext;
+}
+
+/**
+ *rvt_dealloc_ucontext - Free a user context
+ *@context - Free this
+ */
+static int rvt_dealloc_ucontext(struct ib_ucontext *context)
+{
+	kfree(to_iucontext(context));
+	return 0;
+}
+
+static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num,
+				  struct ib_port_immutable *immutable)
+{
+	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+	struct ib_port_attr attr;
+	int err, port_index;
+
+	port_index = ibport_num_to_idx(ibdev, port_num);
+	if (port_index < 0)
+		return -EINVAL;
+
+	err = rvt_query_port(ibdev, port_num, &attr);
+	if (err)
+		return err;
+
+	immutable->pkey_tbl_len = attr.pkey_tbl_len;
+	immutable->gid_tbl_len = attr.gid_tbl_len;
+	immutable->core_cap_flags = rdi->dparms.core_cap_flags;
+	immutable->max_mad_size = rdi->dparms.max_mad_size;
+
+	return 0;
+}
+
+enum {
+	MISC,
+	QUERY_DEVICE,
+	MODIFY_DEVICE,
+	QUERY_PORT,
+	MODIFY_PORT,
+	QUERY_PKEY,
+	QUERY_GID,
+	ALLOC_UCONTEXT,
+	DEALLOC_UCONTEXT,
+	GET_PORT_IMMUTABLE,
+	CREATE_QP,
+	MODIFY_QP,
+	DESTROY_QP,
+	QUERY_QP,
+	POST_SEND,
+	POST_RECV,
+	POST_SRQ_RECV,
+	CREATE_AH,
+	DESTROY_AH,
+	MODIFY_AH,
+	QUERY_AH,
+	CREATE_SRQ,
+	MODIFY_SRQ,
+	DESTROY_SRQ,
+	QUERY_SRQ,
+	ATTACH_MCAST,
+	DETACH_MCAST,
+	GET_DMA_MR,
+	REG_USER_MR,
+	DEREG_MR,
+	ALLOC_MR,
+	ALLOC_FMR,
+	MAP_PHYS_FMR,
+	UNMAP_FMR,
+	DEALLOC_FMR,
+	MMAP,
+	CREATE_CQ,
+	DESTROY_CQ,
+	POLL_CQ,
+	REQ_NOTFIY_CQ,
+	RESIZE_CQ,
+	ALLOC_PD,
+	DEALLOC_PD,
+	_VERB_IDX_MAX /* Must always be last! */
+};
+
+static inline int check_driver_override(struct rvt_dev_info *rdi,
+					size_t offset, void *func)
+{
+	if (!*(void **)((void *)&rdi->ibdev + offset)) {
+		*(void **)((void *)&rdi->ibdev + offset) = func;
+		return 0;
+	}
+
+	return 1;
+}
+
+static noinline int check_support(struct rvt_dev_info *rdi, int verb)
+{
+	switch (verb) {
+	case MISC:
+		/*
+		 * These functions are not part of verbs specifically but are
+		 * required for rdmavt to function.
+		 */
+		if ((!rdi->driver_f.port_callback) ||
+		    (!rdi->driver_f.get_card_name) ||
+		    (!rdi->driver_f.get_pci_dev))
+			return -EINVAL;
+		break;
+
+	case QUERY_DEVICE:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    query_device),
+						    rvt_query_device);
+		break;
+
+	case MODIFY_DEVICE:
+		/*
+		 * rdmavt does not support modify device currently drivers must
+		 * provide.
+		 */
+		if (!check_driver_override(rdi, offsetof(struct ib_device,
+							 modify_device),
+					   rvt_modify_device))
+			return -EOPNOTSUPP;
+		break;
+
+	case QUERY_PORT:
+		if (!check_driver_override(rdi, offsetof(struct ib_device,
+							 query_port),
+					   rvt_query_port))
+			if (!rdi->driver_f.query_port_state)
+				return -EINVAL;
+		break;
+
+	case MODIFY_PORT:
+		if (!check_driver_override(rdi, offsetof(struct ib_device,
+							 modify_port),
+					   rvt_modify_port))
+			if (!rdi->driver_f.cap_mask_chg ||
+			    !rdi->driver_f.shut_down_port)
+				return -EINVAL;
+		break;
+
+	case QUERY_PKEY:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    query_pkey),
+				      rvt_query_pkey);
+		break;
+
+	case QUERY_GID:
+		if (!check_driver_override(rdi, offsetof(struct ib_device,
+							 query_gid),
+					   rvt_query_gid))
+			if (!rdi->driver_f.get_guid_be)
+				return -EINVAL;
+		break;
+
+	case ALLOC_UCONTEXT:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    alloc_ucontext),
+				      rvt_alloc_ucontext);
+		break;
+
+	case DEALLOC_UCONTEXT:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    dealloc_ucontext),
+				      rvt_dealloc_ucontext);
+		break;
+
+	case GET_PORT_IMMUTABLE:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    get_port_immutable),
+				      rvt_get_port_immutable);
+		break;
+
+	case CREATE_QP:
+		if (!check_driver_override(rdi, offsetof(struct ib_device,
+							 create_qp),
+					   rvt_create_qp))
+			if (!rdi->driver_f.qp_priv_alloc ||
+			    !rdi->driver_f.qp_priv_free ||
+			    !rdi->driver_f.notify_qp_reset ||
+			    !rdi->driver_f.flush_qp_waiters ||
+			    !rdi->driver_f.stop_send_queue ||
+			    !rdi->driver_f.quiesce_qp)
+				return -EINVAL;
+		break;
+
+	case MODIFY_QP:
+		if (!check_driver_override(rdi, offsetof(struct ib_device,
+							 modify_qp),
+					   rvt_modify_qp))
+			if (!rdi->driver_f.notify_qp_reset ||
+			    !rdi->driver_f.schedule_send ||
+			    !rdi->driver_f.get_pmtu_from_attr ||
+			    !rdi->driver_f.flush_qp_waiters ||
+			    !rdi->driver_f.stop_send_queue ||
+			    !rdi->driver_f.quiesce_qp ||
+			    !rdi->driver_f.notify_error_qp ||
+			    !rdi->driver_f.mtu_from_qp ||
+			    !rdi->driver_f.mtu_to_path_mtu ||
+			    !rdi->driver_f.shut_down_port ||
+			    !rdi->driver_f.cap_mask_chg)
+				return -EINVAL;
+		break;
+
+	case DESTROY_QP:
+		if (!check_driver_override(rdi, offsetof(struct ib_device,
+							 destroy_qp),
+					   rvt_destroy_qp))
+			if (!rdi->driver_f.qp_priv_free ||
+			    !rdi->driver_f.notify_qp_reset ||
+			    !rdi->driver_f.flush_qp_waiters ||
+			    !rdi->driver_f.stop_send_queue ||
+			    !rdi->driver_f.quiesce_qp)
+				return -EINVAL;
+		break;
+
+	case QUERY_QP:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    query_qp),
+						    rvt_query_qp);
+		break;
+
+	case POST_SEND:
+		if (!check_driver_override(rdi, offsetof(struct ib_device,
+							 post_send),
+					   rvt_post_send))
+			if (!rdi->driver_f.schedule_send ||
+			    !rdi->driver_f.do_send)
+				return -EINVAL;
+		break;
+
+	case POST_RECV:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    post_recv),
+				      rvt_post_recv);
+		break;
+	case POST_SRQ_RECV:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    post_srq_recv),
+				      rvt_post_srq_recv);
+		break;
+
+	case CREATE_AH:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    create_ah),
+				      rvt_create_ah);
+		break;
+
+	case DESTROY_AH:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    destroy_ah),
+				      rvt_destroy_ah);
+		break;
+
+	case MODIFY_AH:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    modify_ah),
+				      rvt_modify_ah);
+		break;
+
+	case QUERY_AH:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    query_ah),
+				      rvt_query_ah);
+		break;
+
+	case CREATE_SRQ:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    create_srq),
+				      rvt_create_srq);
+		break;
+
+	case MODIFY_SRQ:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    modify_srq),
+				      rvt_modify_srq);
+		break;
+
+	case DESTROY_SRQ:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    destroy_srq),
+				      rvt_destroy_srq);
+		break;
+
+	case QUERY_SRQ:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    query_srq),
+				      rvt_query_srq);
+		break;
+
+	case ATTACH_MCAST:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    attach_mcast),
+				      rvt_attach_mcast);
+		break;
+
+	case DETACH_MCAST:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    detach_mcast),
+				      rvt_detach_mcast);
+		break;
+
+	case GET_DMA_MR:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    get_dma_mr),
+				      rvt_get_dma_mr);
+		break;
+
+	case REG_USER_MR:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    reg_user_mr),
+				      rvt_reg_user_mr);
+		break;
+
+	case DEREG_MR:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    dereg_mr),
+				      rvt_dereg_mr);
+		break;
+
+	case ALLOC_FMR:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    alloc_fmr),
+				      rvt_alloc_fmr);
+		break;
+
+	case ALLOC_MR:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    alloc_mr),
+				      rvt_alloc_mr);
+		break;
+
+	case MAP_PHYS_FMR:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    map_phys_fmr),
+				      rvt_map_phys_fmr);
+		break;
+
+	case UNMAP_FMR:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    unmap_fmr),
+				      rvt_unmap_fmr);
+		break;
+
+	case DEALLOC_FMR:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    dealloc_fmr),
+				      rvt_dealloc_fmr);
+		break;
+
+	case MMAP:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    mmap),
+				      rvt_mmap);
+		break;
+
+	case CREATE_CQ:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    create_cq),
+				      rvt_create_cq);
+		break;
+
+	case DESTROY_CQ:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    destroy_cq),
+				      rvt_destroy_cq);
+		break;
+
+	case POLL_CQ:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    poll_cq),
+				      rvt_poll_cq);
+		break;
+
+	case REQ_NOTFIY_CQ:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    req_notify_cq),
+				      rvt_req_notify_cq);
+		break;
+
+	case RESIZE_CQ:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    resize_cq),
+				      rvt_resize_cq);
+		break;
+
+	case ALLOC_PD:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    alloc_pd),
+				      rvt_alloc_pd);
+		break;
+
+	case DEALLOC_PD:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    dealloc_pd),
+				      rvt_dealloc_pd);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * rvt_register_device - register a driver
+ * @rdi: main dev structure for all of rdmavt operations
+ *
+ * It is up to drivers to allocate the rdi and fill in the appropriate
+ * information.
+ *
+ * Return: 0 on success otherwise an errno.
+ */
+int rvt_register_device(struct rvt_dev_info *rdi)
+{
+	int ret = 0, i;
+
+	if (!rdi)
+		return -EINVAL;
+
+	/*
+	 * Check to ensure drivers have setup the required helpers for the verbs
+	 * they want rdmavt to handle
+	 */
+	for (i = 0; i < _VERB_IDX_MAX; i++)
+		if (check_support(rdi, i)) {
+			pr_err("Driver support req not met at %d\n", i);
+			return -EINVAL;
+		}
+
+
+	/* Once we get past here we can use rvt_pr macros and tracepoints */
+	trace_rvt_dbg(rdi, "Driver attempting registration");
+	rvt_mmap_init(rdi);
+
+	/* Queue Pairs */
+	ret = rvt_driver_qp_init(rdi);
+	if (ret) {
+		pr_err("Error in driver QP init.\n");
+		return -EINVAL;
+	}
+
+	/* Address Handle */
+	spin_lock_init(&rdi->n_ahs_lock);
+	rdi->n_ahs_allocated = 0;
+
+	/* Shared Receive Queue */
+	rvt_driver_srq_init(rdi);
+
+	/* Multicast */
+	rvt_driver_mcast_init(rdi);
+
+	/* Mem Region */
+	ret = rvt_driver_mr_init(rdi);
+	if (ret) {
+		pr_err("Error in driver MR init.\n");
+		goto bail_no_mr;
+	}
+
+	/* Completion queues */
+	ret = rvt_driver_cq_init(rdi);
+	if (ret) {
+		pr_err("Error in driver CQ init.\n");
+		goto bail_mr;
+	}
+
+	/* DMA Operations */
+	rdi->ibdev.dma_ops =
+		rdi->ibdev.dma_ops ? : &rvt_default_dma_mapping_ops;
+
+	/* Protection Domain */
+	spin_lock_init(&rdi->n_pds_lock);
+	rdi->n_pds_allocated = 0;
+
+	/*
+	 * There are some things which could be set by underlying drivers but
+	 * really should be up to rdmavt to set. For instance drivers can't know
+	 * exactly which functions rdmavt supports, nor do they know the ABI
+	 * version, so we do all of this sort of stuff here.
+	 */
+	rdi->ibdev.uverbs_abi_ver = RVT_UVERBS_ABI_VERSION;
+	rdi->ibdev.uverbs_cmd_mask =
+		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
+		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
+		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
+		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
+		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
+		(1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
+		(1ull << IB_USER_VERBS_CMD_MODIFY_AH)           |
+		(1ull << IB_USER_VERBS_CMD_QUERY_AH)            |
+		(1ull << IB_USER_VERBS_CMD_DESTROY_AH)          |
+		(1ull << IB_USER_VERBS_CMD_REG_MR)              |
+		(1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
+		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
+		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
+		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
+		(1ull << IB_USER_VERBS_CMD_POLL_CQ)             |
+		(1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)       |
+		(1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
+		(1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
+		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
+		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
+		(1ull << IB_USER_VERBS_CMD_POST_SEND)           |
+		(1ull << IB_USER_VERBS_CMD_POST_RECV)           |
+		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
+		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
+		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
+		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
+		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
+		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
+		(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
+	rdi->ibdev.node_type = RDMA_NODE_IB_CA;
+	rdi->ibdev.num_comp_vectors = 1;
+
+	/* We are now good to announce we exist */
+	ret =  ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback);
+	if (ret) {
+		rvt_pr_err(rdi, "Failed to register driver with ib core.\n");
+		goto bail_cq;
+	}
+
+	rvt_create_mad_agents(rdi);
+
+	rvt_pr_info(rdi, "Registration with rdmavt done.\n");
+	return ret;
+
+bail_cq:
+	rvt_cq_exit(rdi);
+
+bail_mr:
+	rvt_mr_exit(rdi);
+
+bail_no_mr:
+	rvt_qp_exit(rdi);
+
+	return ret;
+}
+EXPORT_SYMBOL(rvt_register_device);
+
+/**
+ * rvt_unregister_device - remove a driver
+ * @rdi: rvt dev struct
+ */
+void rvt_unregister_device(struct rvt_dev_info *rdi)
+{
+	trace_rvt_dbg(rdi, "Driver is unregistering.");
+	if (!rdi)
+		return;
+
+	rvt_free_mad_agents(rdi);
+
+	ib_unregister_device(&rdi->ibdev);
+	rvt_cq_exit(rdi);
+	rvt_mr_exit(rdi);
+	rvt_qp_exit(rdi);
+}
+EXPORT_SYMBOL(rvt_unregister_device);
+
+/**
+ * rvt_init_port - init internal data for driver port
+ * @rdi: rvt dev strut
+ * @port: rvt port
+ * @port_index: 0 based index of ports, different from IB core port num
+ *
+ * Keep track of a list of ports. No need to have a detach port.
+ * They persist until the driver goes away.
+ *
+ * Return: always 0
+ */
+int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port,
+		  int port_index, u16 *pkey_table)
+{
+
+	rdi->ports[port_index] = port;
+	rdi->ports[port_index]->pkey_table = pkey_table;
+
+	return 0;
+}
+EXPORT_SYMBOL(rvt_init_port);

diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h
new file mode 100644
index 0000000..6b01eaa
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/vt.h

@@ -0,0 +1,104 @@
+#ifndef DEF_RDMAVT_H
+#define DEF_RDMAVT_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+#include <linux/pci.h>
+#include "dma.h"
+#include "pd.h"
+#include "qp.h"
+#include "ah.h"
+#include "mr.h"
+#include "srq.h"
+#include "mcast.h"
+#include "mmap.h"
+#include "cq.h"
+#include "mad.h"
+#include "mmap.h"
+
+#define rvt_pr_info(rdi, fmt, ...) \
+	__rvt_pr_info(rdi->driver_f.get_pci_dev(rdi), \
+		      rdi->driver_f.get_card_name(rdi), \
+		      fmt, \
+		      ##__VA_ARGS__)
+
+#define rvt_pr_warn(rdi, fmt, ...) \
+	__rvt_pr_warn(rdi->driver_f.get_pci_dev(rdi), \
+		      rdi->driver_f.get_card_name(rdi), \
+		      fmt, \
+		      ##__VA_ARGS__)
+
+#define rvt_pr_err(rdi, fmt, ...) \
+	__rvt_pr_err(rdi->driver_f.get_pci_dev(rdi), \
+		     rdi->driver_f.get_card_name(rdi), \
+		     fmt, \
+		     ##__VA_ARGS__)
+
+#define __rvt_pr_info(pdev, name, fmt, ...) \
+	dev_info(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__)
+
+#define __rvt_pr_warn(pdev, name, fmt, ...) \
+	dev_warn(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__)
+
+#define __rvt_pr_err(pdev, name, fmt, ...) \
+	dev_err(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__)
+
+static inline int ibport_num_to_idx(struct ib_device *ibdev, u8 port_num)
+{
+	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+	int port_index;
+
+	port_index = port_num - 1; /* IB ports start at 1 our arrays at 0 */
+	if ((port_index < 0) || (port_index >= rdi->dparms.nports))
+		return -EINVAL;
+
+	return port_index;
+}
+
+#endif          /* DEF_RDMAVT_H */

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 85be0de..caec8e9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h

@@ -388,7 +388,7 @@
 	struct dentry *mcg_dentry;
 	struct dentry *path_dentry;
 #endif
-	int	hca_caps;
+	u64	hca_caps;
 	struct ipoib_ethtool_st ethtool;
 	struct timer_list poll_timer;
 	unsigned max_send_sge;

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 899e6b7..f0e55e4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c

@@ -180,6 +180,7 @@
 	struct sk_buff *skb;
 	u64 mapping[IPOIB_UD_RX_SG];
 	union ib_gid *dgid;
+	union ib_gid *sgid;
 
 	ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n",
 		       wr_id, wc->status);
@@ -203,13 +204,6 @@
 		return;
 	}
 
-	/*
-	 * Drop packets that this interface sent, ie multicast packets
-	 * that the HCA has replicated.
-	 */
-	if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num)
-		goto repost;
-
 	memcpy(mapping, priv->rx_ring[wr_id].mapping,
 	       IPOIB_UD_RX_SG * sizeof *mapping);
 
@@ -239,6 +233,25 @@
 	else
 		skb->pkt_type = PACKET_MULTICAST;
 
+	sgid = &((struct ib_grh *)skb->data)->sgid;
+
+	/*
+	 * Drop packets that this interface sent, ie multicast packets
+	 * that the HCA has replicated.
+	 */
+	if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num) {
+		int need_repost = 1;
+
+		if ((wc->wc_flags & IB_WC_GRH) &&
+		    sgid->global.interface_id != priv->local_gid.global.interface_id)
+			need_repost = 0;
+
+		if (need_repost) {
+			dev_kfree_skb_any(skb);
+			goto repost;
+		}
+	}
+
 	skb_pull(skb, IB_GRH_BYTES);
 
 	skb->protocol = ((struct ipoib_header *) skb->data)->proto;

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 25509bb..80807d6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c

@@ -51,6 +51,7 @@
 #include <net/addrconf.h>
 #include <linux/inetdevice.h>
 #include <rdma/ib_cache.h>
+#include <linux/pci.h>
 
 #define DRV_VERSION "1.0.0"
 
@@ -1590,11 +1591,67 @@
 	priv->tx_ring = NULL;
 }
 
+static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+	return ib_set_vf_link_state(priv->ca, vf, priv->port, link_state);
+}
+
+static int ipoib_get_vf_config(struct net_device *dev, int vf,
+			       struct ifla_vf_info *ivf)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	int err;
+
+	err = ib_get_vf_config(priv->ca, vf, priv->port, ivf);
+	if (err)
+		return err;
+
+	ivf->vf = vf;
+
+	return 0;
+}
+
+static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+	if (type != IFLA_VF_IB_NODE_GUID && type != IFLA_VF_IB_PORT_GUID)
+		return -EINVAL;
+
+	return ib_set_vf_guid(priv->ca, vf, priv->port, guid, type);
+}
+
+static int ipoib_get_vf_stats(struct net_device *dev, int vf,
+			      struct ifla_vf_stats *vf_stats)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+	return ib_get_vf_stats(priv->ca, vf, priv->port, vf_stats);
+}
+
 static const struct header_ops ipoib_header_ops = {
 	.create	= ipoib_hard_header,
 };
 
-static const struct net_device_ops ipoib_netdev_ops = {
+static const struct net_device_ops ipoib_netdev_ops_pf = {
+	.ndo_uninit		 = ipoib_uninit,
+	.ndo_open		 = ipoib_open,
+	.ndo_stop		 = ipoib_stop,
+	.ndo_change_mtu		 = ipoib_change_mtu,
+	.ndo_fix_features	 = ipoib_fix_features,
+	.ndo_start_xmit		 = ipoib_start_xmit,
+	.ndo_tx_timeout		 = ipoib_timeout,
+	.ndo_set_rx_mode	 = ipoib_set_mcast_list,
+	.ndo_get_iflink		 = ipoib_get_iflink,
+	.ndo_set_vf_link_state	 = ipoib_set_vf_link_state,
+	.ndo_get_vf_config	 = ipoib_get_vf_config,
+	.ndo_get_vf_stats	 = ipoib_get_vf_stats,
+	.ndo_set_vf_guid	 = ipoib_set_vf_guid,
+};
+
+static const struct net_device_ops ipoib_netdev_ops_vf = {
 	.ndo_uninit		 = ipoib_uninit,
 	.ndo_open		 = ipoib_open,
 	.ndo_stop		 = ipoib_stop,
@@ -1610,7 +1667,11 @@
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-	dev->netdev_ops		 = &ipoib_netdev_ops;
+	if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
+		dev->netdev_ops	= &ipoib_netdev_ops_vf;
+	else
+		dev->netdev_ops	= &ipoib_netdev_ops_pf;
+
 	dev->header_ops		 = &ipoib_header_ops;
 
 	ipoib_set_ethtool_ops(dev);

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 60b30d3..411e446 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c

@@ -63,7 +63,6 @@
 struct rdma_cm_id *isert_setup_id(struct isert_np *isert_np);
 
 static void isert_release_work(struct work_struct *work);
-static void isert_wait4flush(struct isert_conn *isert_conn);
 static void isert_recv_done(struct ib_cq *cq, struct ib_wc *wc);
 static void isert_send_done(struct ib_cq *cq, struct ib_wc *wc);
 static void isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc);
@@ -141,7 +140,7 @@
 	attr.qp_context = isert_conn;
 	attr.send_cq = comp->cq;
 	attr.recv_cq = comp->cq;
-	attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS;
+	attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1;
 	attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
 	attr.cap.max_send_sge = device->ib_device->attrs.max_sge;
 	isert_conn->max_sge = min(device->ib_device->attrs.max_sge,
@@ -887,7 +886,7 @@
 		break;
 	case ISER_CONN_UP:
 		isert_conn_terminate(isert_conn);
-		isert_wait4flush(isert_conn);
+		ib_drain_qp(isert_conn->qp);
 		isert_handle_unbound_conn(isert_conn);
 		break;
 	case ISER_CONN_BOUND:
@@ -3213,36 +3212,6 @@
 	}
 }
 
-static void
-isert_beacon_done(struct ib_cq *cq, struct ib_wc *wc)
-{
-	struct isert_conn *isert_conn = wc->qp->qp_context;
-
-	isert_print_wc(wc, "beacon");
-
-	isert_info("conn %p completing wait_comp_err\n", isert_conn);
-	complete(&isert_conn->wait_comp_err);
-}
-
-static void
-isert_wait4flush(struct isert_conn *isert_conn)
-{
-	struct ib_recv_wr *bad_wr;
-	static struct ib_cqe cqe = { .done = isert_beacon_done };
-
-	isert_info("conn %p\n", isert_conn);
-
-	init_completion(&isert_conn->wait_comp_err);
-	isert_conn->beacon.wr_cqe = &cqe;
-	/* post an indication that all flush errors were consumed */
-	if (ib_post_recv(isert_conn->qp, &isert_conn->beacon, &bad_wr)) {
-		isert_err("conn %p failed to post beacon", isert_conn);
-		return;
-	}
-
-	wait_for_completion(&isert_conn->wait_comp_err);
-}
-
 /**
  * isert_put_unsol_pending_cmds() - Drop commands waiting for
  *     unsolicitate dataout
@@ -3288,7 +3257,7 @@
 	isert_conn_terminate(isert_conn);
 	mutex_unlock(&isert_conn->mutex);
 
-	isert_wait4flush(isert_conn);
+	ib_drain_qp(isert_conn->qp);
 	isert_put_unsol_pending_cmds(conn);
 	isert_wait4cmds(conn);
 	isert_wait4logout(isert_conn);
@@ -3300,7 +3269,7 @@
 {
 	struct isert_conn *isert_conn = conn->context;
 
-	isert_wait4flush(isert_conn);
+	ib_drain_qp(isert_conn->qp);
 	isert_put_conn(isert_conn);
 }
 

diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 192788a..147900c 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h

@@ -209,14 +209,12 @@
 	struct ib_qp		*qp;
 	struct isert_device	*device;
 	struct mutex		mutex;
-	struct completion	wait_comp_err;
 	struct kref		kref;
 	struct list_head	fr_pool;
 	int			fr_pool_size;
 	/* lock to protect fastreg pool */
 	spinlock_t		pool_lock;
 	struct work_struct	release_work;
-	struct ib_recv_wr       beacon;
 	bool                    logout_posted;
 	bool                    snd_w_inv;
 };

diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 1d13090..0bd3cb2 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c

@@ -839,7 +839,7 @@
 		if (srpt_set_ch_state(ch, CH_DISCONNECTED))
 			schedule_work(&ch->release_work);
 		else
-			WARN_ONCE("%s-%d\n", ch->sess_name, ch->qp->qp_num);
+			WARN_ONCE(1, "%s-%d\n", ch->sess_name, ch->qp->qp_num);
 	}
 }
 

diff --git a/drivers/input/input-compat.c b/drivers/input/input-compat.c
index 64ca711..d84d20b 100644
--- a/drivers/input/input-compat.c
+++ b/drivers/input/input-compat.c

@@ -17,7 +17,7 @@
 int input_event_from_user(const char __user *buffer,
 			  struct input_event *event)
 {
-	if (INPUT_COMPAT_TEST && !COMPAT_USE_64BIT_TIME) {
+	if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
 		struct input_event_compat compat_event;
 
 		if (copy_from_user(&compat_event, buffer,
@@ -41,7 +41,7 @@
 int input_event_to_user(char __user *buffer,
 			const struct input_event *event)
 {
-	if (INPUT_COMPAT_TEST && !COMPAT_USE_64BIT_TIME) {
+	if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
 		struct input_event_compat compat_event;
 
 		compat_event.time.tv_sec = event->time.tv_sec;
@@ -65,7 +65,7 @@
 int input_ff_effect_from_user(const char __user *buffer, size_t size,
 			      struct ff_effect *effect)
 {
-	if (INPUT_COMPAT_TEST) {
+	if (in_compat_syscall()) {
 		struct ff_effect_compat *compat_effect;
 
 		if (size != sizeof(struct ff_effect_compat))

diff --git a/drivers/input/input-compat.h b/drivers/input/input-compat.h
index 148f66f..1563160 100644
--- a/drivers/input/input-compat.h
+++ b/drivers/input/input-compat.h

@@ -17,18 +17,6 @@
 
 #ifdef CONFIG_COMPAT
 
-/* Note to the author of this code: did it ever occur to
-   you why the ifdefs are needed? Think about it again. -AK */
-#if defined(CONFIG_X86_64) || defined(CONFIG_TILE)
-#  define INPUT_COMPAT_TEST is_compat_task()
-#elif defined(CONFIG_S390)
-#  define INPUT_COMPAT_TEST test_thread_flag(TIF_31BIT)
-#elif defined(CONFIG_MIPS)
-#  define INPUT_COMPAT_TEST test_thread_flag(TIF_32BIT_ADDR)
-#else
-#  define INPUT_COMPAT_TEST test_thread_flag(TIF_32BIT)
-#endif
-
 struct input_event_compat {
 	struct compat_timeval time;
 	__u16 type;
@@ -67,7 +55,7 @@
 
 static inline size_t input_event_size(void)
 {
-	return (INPUT_COMPAT_TEST && !COMPAT_USE_64BIT_TIME) ?
+	return (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) ?
 		sizeof(struct input_event_compat) : sizeof(struct input_event);
 }
 

diff --git a/drivers/input/input.c b/drivers/input/input.c
index 8806059..b87ffbd 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c

@@ -1015,7 +1015,7 @@
 {
 	int len = 0;
 
-	if (INPUT_COMPAT_TEST) {
+	if (in_compat_syscall()) {
 		u32 dword = bits >> 32;
 		if (dword || !skip_empty)
 			len += snprintf(buf, buf_size, "%x ", dword);

diff --git a/drivers/input/misc/ati_remote2.c b/drivers/input/misc/ati_remote2.c
index cfd58e8..1c5914c 100644
--- a/drivers/input/misc/ati_remote2.c
+++ b/drivers/input/misc/ati_remote2.c

@@ -817,26 +817,49 @@
 
 	ar2->udev = udev;
 
+	/* Sanity check, first interface must have an endpoint */
+	if (alt->desc.bNumEndpoints < 1 || !alt->endpoint) {
+		dev_err(&interface->dev,
+			"%s(): interface 0 must have an endpoint\n", __func__);
+		r = -ENODEV;
+		goto fail1;
+	}
 	ar2->intf[0] = interface;
 	ar2->ep[0] = &alt->endpoint[0].desc;
 
+	/* Sanity check, the device must have two interfaces */
 	ar2->intf[1] = usb_ifnum_to_if(udev, 1);
+	if ((udev->actconfig->desc.bNumInterfaces < 2) || !ar2->intf[1]) {
+		dev_err(&interface->dev, "%s(): need 2 interfaces, found %d\n",
+			__func__, udev->actconfig->desc.bNumInterfaces);
+		r = -ENODEV;
+		goto fail1;
+	}
+
 	r = usb_driver_claim_interface(&ati_remote2_driver, ar2->intf[1], ar2);
 	if (r)
 		goto fail1;
+
+	/* Sanity check, second interface must have an endpoint */
 	alt = ar2->intf[1]->cur_altsetting;
+	if (alt->desc.bNumEndpoints < 1 || !alt->endpoint) {
+		dev_err(&interface->dev,
+			"%s(): interface 1 must have an endpoint\n", __func__);
+		r = -ENODEV;
+		goto fail2;
+	}
 	ar2->ep[1] = &alt->endpoint[0].desc;
 
 	r = ati_remote2_urb_init(ar2);
 	if (r)
-		goto fail2;
+		goto fail3;
 
 	ar2->channel_mask = channel_mask;
 	ar2->mode_mask = mode_mask;
 
 	r = ati_remote2_setup(ar2, ar2->channel_mask);
 	if (r)
-		goto fail2;
+		goto fail3;
 
 	usb_make_path(udev, ar2->phys, sizeof(ar2->phys));
 	strlcat(ar2->phys, "/input0", sizeof(ar2->phys));
@@ -845,11 +868,11 @@
 
 	r = sysfs_create_group(&udev->dev.kobj, &ati_remote2_attr_group);
 	if (r)
-		goto fail2;
+		goto fail3;
 
 	r = ati_remote2_input_init(ar2);
 	if (r)
-		goto fail3;
+		goto fail4;
 
 	usb_set_intfdata(interface, ar2);
 
@@ -857,10 +880,11 @@
 
 	return 0;
 
- fail3:
+ fail4:
 	sysfs_remove_group(&udev->dev.kobj, &ati_remote2_attr_group);
- fail2:
+ fail3:
 	ati_remote2_urb_cleanup(ar2);
+ fail2:
 	usb_driver_release_interface(&ati_remote2_driver, ar2->intf[1]);
  fail1:
 	kfree(ar2);

diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c
index ac1fa5f..9c0ea36 100644
--- a/drivers/input/misc/ims-pcu.c
+++ b/drivers/input/misc/ims-pcu.c

@@ -1663,6 +1663,8 @@
 
 	pcu->ctrl_intf = usb_ifnum_to_if(pcu->udev,
 					 union_desc->bMasterInterface0);
+	if (!pcu->ctrl_intf)
+		return -EINVAL;
 
 	alt = pcu->ctrl_intf->cur_altsetting;
 	pcu->ep_ctrl = &alt->endpoint[0].desc;
@@ -1670,6 +1672,8 @@
 
 	pcu->data_intf = usb_ifnum_to_if(pcu->udev,
 					 union_desc->bSlaveInterface0);
+	if (!pcu->data_intf)
+		return -EINVAL;
 
 	alt = pcu->data_intf->cur_altsetting;
 	if (alt->desc.bNumEndpoints != 2) {

diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index 4eb9e4d..abe1a92 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c

@@ -664,7 +664,7 @@
 static int uinput_ff_upload_to_user(char __user *buffer,
 				    const struct uinput_ff_upload *ff_up)
 {
-	if (INPUT_COMPAT_TEST) {
+	if (in_compat_syscall()) {
 		struct uinput_ff_upload_compat ff_up_compat;
 
 		ff_up_compat.request_id = ff_up->request_id;
@@ -695,7 +695,7 @@
 static int uinput_ff_upload_from_user(const char __user *buffer,
 				      struct uinput_ff_upload *ff_up)
 {
-	if (INPUT_COMPAT_TEST) {
+	if (in_compat_syscall()) {
 		struct uinput_ff_upload_compat ff_up_compat;
 
 		if (copy_from_user(&ff_up_compat, buffer,

diff --git a/drivers/input/mouse/byd.c b/drivers/input/mouse/byd.c
index 9425e0f..fdc243ca 100644
--- a/drivers/input/mouse/byd.c
+++ b/drivers/input/mouse/byd.c

@@ -12,10 +12,12 @@
 #include <linux/input.h>
 #include <linux/libps2.h>
 #include <linux/serio.h>
+#include <linux/slab.h>
 
 #include "psmouse.h"
 #include "byd.h"
 
+/* PS2 Bits */
 #define PS2_Y_OVERFLOW	BIT_MASK(7)
 #define PS2_X_OVERFLOW	BIT_MASK(6)
 #define PS2_Y_SIGN	BIT_MASK(5)
@@ -26,69 +28,249 @@
 #define PS2_LEFT	BIT_MASK(0)
 
 /*
- * The touchpad reports gestures in the last byte of each packet. It can take
- * any of the following values:
+ * BYD pad constants
  */
 
-/* One-finger scrolling in one of the edge scroll zones. */
-#define BYD_SCROLLUP		0xCA
-#define BYD_SCROLLDOWN		0x36
-#define BYD_SCROLLLEFT		0xCB
-#define BYD_SCROLLRIGHT		0x35
-/* Two-finger scrolling. */
-#define BYD_2DOWN		0x2B
-#define BYD_2UP			0xD5
-#define BYD_2LEFT		0xD6
-#define BYD_2RIGHT		0x2A
-/* Pinching in or out. */
-#define BYD_ZOOMOUT		0xD8
-#define BYD_ZOOMIN		0x28
-/* Three-finger swipe. */
-#define BYD_3UP			0xD3
-#define BYD_3DOWN		0x2D
-#define BYD_3LEFT		0xD4
-#define BYD_3RIGHT		0x2C
-/* Four-finger swipe. */
-#define BYD_4UP			0xCD
-#define BYD_4DOWN		0x33
+/*
+ * True device resolution is unknown, however experiments show the
+ * resolution is about 111 units/mm.
+ * Absolute coordinate packets are in the range 0-255 for both X and Y
+ * we pick ABS_X/ABS_Y dimensions which are multiples of 256 and in
+ * the right ballpark given the touchpad's physical dimensions and estimate
+ * resolution per spec sheet, device active area dimensions are
+ * 101.6 x 60.1 mm.
+ */
+#define BYD_PAD_WIDTH		11264
+#define BYD_PAD_HEIGHT		6656
+#define BYD_PAD_RESOLUTION	111
 
-int byd_detect(struct psmouse *psmouse, bool set_properties)
+/*
+ * Given the above dimensions, relative packets velocity is in multiples of
+ * 1 unit / 11 milliseconds.  We use this dt to estimate distance traveled
+ */
+#define BYD_DT			11
+/* Time in jiffies used to timeout various touch events (64 ms) */
+#define BYD_TOUCH_TIMEOUT	msecs_to_jiffies(64)
+
+/* BYD commands reverse engineered from windows driver */
+
+/*
+ * Swipe gesture from off-pad to on-pad
+ *  0 : disable
+ *  1 : enable
+ */
+#define BYD_CMD_SET_OFFSCREEN_SWIPE		0x10cc
+/*
+ * Tap and drag delay time
+ *  0 : disable
+ *  1 - 8 : least to most delay
+ */
+#define BYD_CMD_SET_TAP_DRAG_DELAY_TIME		0x10cf
+/*
+ * Physical buttons function mapping
+ *  0 : enable
+ *  4 : normal
+ *  5 : left button custom command
+ *  6 : right button custom command
+ *  8 : disable
+ */
+#define BYD_CMD_SET_PHYSICAL_BUTTONS		0x10d0
+/*
+ * Absolute mode (1 byte X/Y resolution)
+ *  0 : disable
+ *  2 : enable
+ */
+#define BYD_CMD_SET_ABSOLUTE_MODE		0x10d1
+/*
+ * Two finger scrolling
+ *  1 : vertical
+ *  2 : horizontal
+ *  3 : vertical + horizontal
+ *  4 : disable
+ */
+#define BYD_CMD_SET_TWO_FINGER_SCROLL		0x10d2
+/*
+ * Handedness
+ *  1 : right handed
+ *  2 : left handed
+ */
+#define BYD_CMD_SET_HANDEDNESS			0x10d3
+/*
+ * Tap to click
+ *  1 : enable
+ *  2 : disable
+ */
+#define BYD_CMD_SET_TAP				0x10d4
+/*
+ * Tap and drag
+ *  1 : tap and hold to drag
+ *  2 : tap and hold to drag + lock
+ *  3 : disable
+ */
+#define BYD_CMD_SET_TAP_DRAG			0x10d5
+/*
+ * Touch sensitivity
+ *  1 - 7 : least to most sensitive
+ */
+#define BYD_CMD_SET_TOUCH_SENSITIVITY		0x10d6
+/*
+ * One finger scrolling
+ *  1 : vertical
+ *  2 : horizontal
+ *  3 : vertical + horizontal
+ *  4 : disable
+ */
+#define BYD_CMD_SET_ONE_FINGER_SCROLL		0x10d7
+/*
+ * One finger scrolling function
+ *  1 : free scrolling
+ *  2 : edge motion
+ *  3 : free scrolling + edge motion
+ *  4 : disable
+ */
+#define BYD_CMD_SET_ONE_FINGER_SCROLL_FUNC	0x10d8
+/*
+ * Sliding speed
+ *  1 - 5 : slowest to fastest
+ */
+#define BYD_CMD_SET_SLIDING_SPEED		0x10da
+/*
+ * Edge motion
+ *  1 : disable
+ *  2 : enable when dragging
+ *  3 : enable when dragging and pointing
+ */
+#define BYD_CMD_SET_EDGE_MOTION			0x10db
+/*
+ * Left edge region size
+ *  0 - 7 : smallest to largest width
+ */
+#define BYD_CMD_SET_LEFT_EDGE_REGION		0x10dc
+/*
+ * Top edge region size
+ *  0 - 9 : smallest to largest height
+ */
+#define BYD_CMD_SET_TOP_EDGE_REGION		0x10dd
+/*
+ * Disregard palm press as clicks
+ *  1 - 6 : smallest to largest
+ */
+#define BYD_CMD_SET_PALM_CHECK			0x10de
+/*
+ * Right edge region size
+ *  0 - 7 : smallest to largest width
+ */
+#define BYD_CMD_SET_RIGHT_EDGE_REGION		0x10df
+/*
+ * Bottom edge region size
+ *  0 - 9 : smallest to largest height
+ */
+#define BYD_CMD_SET_BOTTOM_EDGE_REGION		0x10e1
+/*
+ * Multitouch gestures
+ *  1 : enable
+ *  2 : disable
+ */
+#define BYD_CMD_SET_MULTITOUCH			0x10e3
+/*
+ * Edge motion speed
+ *  0 : control with finger pressure
+ *  1 - 9 : slowest to fastest
+ */
+#define BYD_CMD_SET_EDGE_MOTION_SPEED		0x10e4
+/*
+ * Two finger scolling function
+ *  0 : free scrolling
+ *  1 : free scrolling (with momentum)
+ *  2 : edge motion
+ *  3 : free scrolling (with momentum) + edge motion
+ *  4 : disable
+ */
+#define BYD_CMD_SET_TWO_FINGER_SCROLL_FUNC	0x10e5
+
+/*
+ * The touchpad generates a mixture of absolute and relative packets, indicated
+ * by the the last byte of each packet being set to one of the following:
+ */
+#define BYD_PACKET_ABSOLUTE			0xf8
+#define BYD_PACKET_RELATIVE			0x00
+/* Multitouch gesture packets */
+#define BYD_PACKET_PINCH_IN			0xd8
+#define BYD_PACKET_PINCH_OUT			0x28
+#define BYD_PACKET_ROTATE_CLOCKWISE		0x29
+#define BYD_PACKET_ROTATE_ANTICLOCKWISE		0xd7
+#define BYD_PACKET_TWO_FINGER_SCROLL_RIGHT	0x2a
+#define BYD_PACKET_TWO_FINGER_SCROLL_DOWN	0x2b
+#define BYD_PACKET_TWO_FINGER_SCROLL_UP		0xd5
+#define BYD_PACKET_TWO_FINGER_SCROLL_LEFT	0xd6
+#define BYD_PACKET_THREE_FINGER_SWIPE_RIGHT	0x2c
+#define BYD_PACKET_THREE_FINGER_SWIPE_DOWN	0x2d
+#define BYD_PACKET_THREE_FINGER_SWIPE_UP	0xd3
+#define BYD_PACKET_THREE_FINGER_SWIPE_LEFT	0xd4
+#define BYD_PACKET_FOUR_FINGER_DOWN		0x33
+#define BYD_PACKET_FOUR_FINGER_UP		0xcd
+#define BYD_PACKET_REGION_SCROLL_RIGHT		0x35
+#define BYD_PACKET_REGION_SCROLL_DOWN		0x36
+#define BYD_PACKET_REGION_SCROLL_UP		0xca
+#define BYD_PACKET_REGION_SCROLL_LEFT		0xcb
+#define BYD_PACKET_RIGHT_CORNER_CLICK		0xd2
+#define BYD_PACKET_LEFT_CORNER_CLICK		0x2e
+#define BYD_PACKET_LEFT_AND_RIGHT_CORNER_CLICK	0x2f
+#define BYD_PACKET_ONTO_PAD_SWIPE_RIGHT		0x37
+#define BYD_PACKET_ONTO_PAD_SWIPE_DOWN		0x30
+#define BYD_PACKET_ONTO_PAD_SWIPE_UP		0xd0
+#define BYD_PACKET_ONTO_PAD_SWIPE_LEFT		0xc9
+
+struct byd_data {
+	struct timer_list timer;
+	s32 abs_x;
+	s32 abs_y;
+	typeof(jiffies) last_touch_time;
+	bool btn_left;
+	bool btn_right;
+	bool touch;
+};
+
+static void byd_report_input(struct psmouse *psmouse)
 {
-	struct ps2dev *ps2dev = &psmouse->ps2dev;
-	unsigned char param[4];
+	struct byd_data *priv = psmouse->private;
+	struct input_dev *dev = psmouse->dev;
 
-	param[0] = 0x03;
-	param[1] = 0x00;
-	param[2] = 0x00;
-	param[3] = 0x00;
+	input_report_key(dev, BTN_TOUCH, priv->touch);
+	input_report_key(dev, BTN_TOOL_FINGER, priv->touch);
 
-	if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES))
-		return -1;
-	if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES))
-		return -1;
-	if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES))
-		return -1;
-	if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES))
-		return -1;
-	if (ps2_command(ps2dev, param, PSMOUSE_CMD_GETINFO))
-		return -1;
+	input_report_abs(dev, ABS_X, priv->abs_x);
+	input_report_abs(dev, ABS_Y, priv->abs_y);
+	input_report_key(dev, BTN_LEFT, priv->btn_left);
+	input_report_key(dev, BTN_RIGHT, priv->btn_right);
 
-	if (param[1] != 0x03 || param[2] != 0x64)
-		return -ENODEV;
+	input_sync(dev);
+}
 
-	psmouse_dbg(psmouse, "BYD touchpad detected\n");
+static void byd_clear_touch(unsigned long data)
+{
+	struct psmouse *psmouse = (struct psmouse *)data;
+	struct byd_data *priv = psmouse->private;
 
-	if (set_properties) {
-		psmouse->vendor = "BYD";
-		psmouse->name = "TouchPad";
-	}
+	serio_pause_rx(psmouse->ps2dev.serio);
+	priv->touch = false;
 
-	return 0;
+	byd_report_input(psmouse);
+
+	serio_continue_rx(psmouse->ps2dev.serio);
+
+	/*
+	 * Move cursor back to center of pad when we lose touch - this
+	 * specifically improves user experience when moving cursor with one
+	 * finger, and pressing a button with another.
+	 */
+	priv->abs_x = BYD_PAD_WIDTH / 2;
+	priv->abs_y = BYD_PAD_HEIGHT / 2;
 }
 
 static psmouse_ret_t byd_process_byte(struct psmouse *psmouse)
 {
-	struct input_dev *dev = psmouse->dev;
+	struct byd_data *priv = psmouse->private;
 	u8 *pkt = psmouse->packet;
 
 	if (psmouse->pktcnt > 0 && !(pkt[0] & PS2_ALWAYS_1)) {
@@ -102,53 +284,34 @@
 
 	/* Otherwise, a full packet has been received */
 	switch (pkt[3]) {
-	case 0: {
+	case BYD_PACKET_ABSOLUTE:
+		/* Only use absolute packets for the start of movement. */
+		if (!priv->touch) {
+			/* needed to detect tap */
+			typeof(jiffies) tap_time =
+				priv->last_touch_time + BYD_TOUCH_TIMEOUT;
+			priv->touch = time_after(jiffies, tap_time);
+
+			/* init abs position */
+			priv->abs_x = pkt[1] * (BYD_PAD_WIDTH / 256);
+			priv->abs_y = (255 - pkt[2]) * (BYD_PAD_HEIGHT / 256);
+		}
+		break;
+	case BYD_PACKET_RELATIVE: {
 		/* Standard packet */
 		/* Sign-extend if a sign bit is set. */
-		unsigned int signx = pkt[0] & PS2_X_SIGN ? ~0xFF : 0;
-		unsigned int signy = pkt[0] & PS2_Y_SIGN ? ~0xFF : 0;
-		int dx = signx | (int) pkt[1];
-		int dy = signy | (int) pkt[2];
+		u32 signx = pkt[0] & PS2_X_SIGN ? ~0xFF : 0;
+		u32 signy = pkt[0] & PS2_Y_SIGN ? ~0xFF : 0;
+		s32 dx = signx | (int) pkt[1];
+		s32 dy = signy | (int) pkt[2];
 
-		input_report_rel(psmouse->dev, REL_X, dx);
-		input_report_rel(psmouse->dev, REL_Y, -dy);
+		/* Update position based on velocity */
+		priv->abs_x += dx * BYD_DT;
+		priv->abs_y -= dy * BYD_DT;
 
-		input_report_key(psmouse->dev, BTN_LEFT, pkt[0] & PS2_LEFT);
-		input_report_key(psmouse->dev, BTN_RIGHT, pkt[0] & PS2_RIGHT);
-		input_report_key(psmouse->dev, BTN_MIDDLE, pkt[0] & PS2_MIDDLE);
+		priv->touch = true;
 		break;
 	}
-
-	case BYD_SCROLLDOWN:
-	case BYD_2DOWN:
-		input_report_rel(dev, REL_WHEEL, -1);
-		break;
-
-	case BYD_SCROLLUP:
-	case BYD_2UP:
-		input_report_rel(dev, REL_WHEEL, 1);
-		break;
-
-	case BYD_SCROLLLEFT:
-	case BYD_2LEFT:
-		input_report_rel(dev, REL_HWHEEL, -1);
-		break;
-
-	case BYD_SCROLLRIGHT:
-	case BYD_2RIGHT:
-		input_report_rel(dev, REL_HWHEEL, 1);
-		break;
-
-	case BYD_ZOOMOUT:
-	case BYD_ZOOMIN:
-	case BYD_3UP:
-	case BYD_3DOWN:
-	case BYD_3LEFT:
-	case BYD_3RIGHT:
-	case BYD_4UP:
-	case BYD_4DOWN:
-		break;
-
 	default:
 		psmouse_warn(psmouse,
 			     "Unrecognized Z: pkt = %02x %02x %02x %02x\n",
@@ -157,134 +320,76 @@
 		return PSMOUSE_BAD_DATA;
 	}
 
-	input_sync(dev);
+	priv->btn_left = pkt[0] & PS2_LEFT;
+	priv->btn_right = pkt[0] & PS2_RIGHT;
+
+	byd_report_input(psmouse);
+
+	/* Reset time since last touch. */
+	if (priv->touch) {
+		priv->last_touch_time = jiffies;
+		mod_timer(&priv->timer, jiffies + BYD_TOUCH_TIMEOUT);
+	}
 
 	return PSMOUSE_FULL_PACKET;
 }
 
-/* Send a sequence of bytes, where each is ACKed before the next is sent. */
-static int byd_send_sequence(struct psmouse *psmouse, const u8 *seq, size_t len)
-{
-	unsigned int i;
-
-	for (i = 0; i < len; ++i) {
-		if (ps2_command(&psmouse->ps2dev, NULL, seq[i]))
-			return -1;
-	}
-	return 0;
-}
-
-/* Keep scrolling after fingers are removed. */
-#define SCROLL_INERTIAL		0x01
-#define SCROLL_NO_INERTIAL	0x02
-
-/* Clicking can be done by tapping or pressing. */
-#define CLICK_BOTH		0x01
-/* Clicking can only be done by pressing. */
-#define CLICK_PRESS_ONLY	0x02
-
-static int byd_enable(struct psmouse *psmouse)
-{
-	const u8 seq1[] = { 0xE2, 0x00, 0xE0, 0x02, 0xE0 };
-	const u8 seq2[] = {
-		0xD3, 0x01,
-		0xD0, 0x00,
-		0xD0, 0x04,
-		/* Whether clicking is done by tapping or pressing. */
-		0xD4, CLICK_PRESS_ONLY,
-		0xD5, 0x01,
-		0xD7, 0x03,
-		/* Vertical and horizontal one-finger scroll zone inertia. */
-		0xD8, SCROLL_INERTIAL,
-		0xDA, 0x05,
-		0xDB, 0x02,
-		0xE4, 0x05,
-		0xD6, 0x01,
-		0xDE, 0x04,
-		0xE3, 0x01,
-		0xCF, 0x00,
-		0xD2, 0x03,
-		/* Vertical and horizontal two-finger scrolling inertia. */
-		0xE5, SCROLL_INERTIAL,
-		0xD9, 0x02,
-		0xD9, 0x07,
-		0xDC, 0x03,
-		0xDD, 0x03,
-		0xDF, 0x03,
-		0xE1, 0x03,
-		0xD1, 0x00,
-		0xCE, 0x00,
-		0xCC, 0x00,
-		0xE0, 0x00,
-		0xE2, 0x01
-	};
-	u8 param[4];
-
-	if (byd_send_sequence(psmouse, seq1, ARRAY_SIZE(seq1)))
-		return -1;
-
-	/* Send a 0x01 command, which should return 4 bytes. */
-	if (ps2_command(&psmouse->ps2dev, param, 0x0401))
-		return -1;
-
-	if (byd_send_sequence(psmouse, seq2, ARRAY_SIZE(seq2)))
-		return -1;
-
-	return 0;
-}
-
-/*
- * Send the set of PS/2 commands required to make it identify as an
- * intellimouse with 4-byte instead of 3-byte packets.
- */
-static int byd_send_intellimouse_sequence(struct psmouse *psmouse)
+static int byd_reset_touchpad(struct psmouse *psmouse)
 {
 	struct ps2dev *ps2dev = &psmouse->ps2dev;
 	u8 param[4];
-	int i;
+	size_t i;
+
 	const struct {
 		u16 command;
 		u8 arg;
 	} seq[] = {
-		{ PSMOUSE_CMD_RESET_BAT, 0 },
-		{ PSMOUSE_CMD_RESET_BAT, 0 },
-		{ PSMOUSE_CMD_GETID, 0 },
-		{ PSMOUSE_CMD_SETSCALE11, 0 },
-		{ PSMOUSE_CMD_SETSCALE11, 0 },
-		{ PSMOUSE_CMD_SETSCALE11, 0 },
-		{ PSMOUSE_CMD_GETINFO, 0 },
-		{ PSMOUSE_CMD_SETRES, 0x03 },
+		/*
+		 * Intellimouse initialization sequence, to get 4-byte instead
+		 * of 3-byte packets.
+		 */
 		{ PSMOUSE_CMD_SETRATE, 0xC8 },
 		{ PSMOUSE_CMD_SETRATE, 0x64 },
 		{ PSMOUSE_CMD_SETRATE, 0x50 },
 		{ PSMOUSE_CMD_GETID, 0 },
-		{ PSMOUSE_CMD_SETRATE, 0xC8 },
-		{ PSMOUSE_CMD_SETRATE, 0xC8 },
-		{ PSMOUSE_CMD_SETRATE, 0x50 },
-		{ PSMOUSE_CMD_GETID, 0 },
-		{ PSMOUSE_CMD_SETRATE, 0x64 },
-		{ PSMOUSE_CMD_SETRES, 0x03 },
-		{ PSMOUSE_CMD_ENABLE, 0 }
+		{ PSMOUSE_CMD_ENABLE, 0 },
+		/*
+		 * BYD-specific initialization, which enables absolute mode and
+		 * (if desired), the touchpad's built-in gesture detection.
+		 */
+		{ 0x10E2, 0x00 },
+		{ 0x10E0, 0x02 },
+		/* The touchpad should reply with 4 seemingly-random bytes */
+		{ 0x14E0, 0x01 },
+		/* Pairs of parameters and values. */
+		{ BYD_CMD_SET_HANDEDNESS, 0x01 },
+		{ BYD_CMD_SET_PHYSICAL_BUTTONS, 0x04 },
+		{ BYD_CMD_SET_TAP, 0x02 },
+		{ BYD_CMD_SET_ONE_FINGER_SCROLL, 0x04 },
+		{ BYD_CMD_SET_ONE_FINGER_SCROLL_FUNC, 0x04 },
+		{ BYD_CMD_SET_EDGE_MOTION, 0x01 },
+		{ BYD_CMD_SET_PALM_CHECK, 0x00 },
+		{ BYD_CMD_SET_MULTITOUCH, 0x02 },
+		{ BYD_CMD_SET_TWO_FINGER_SCROLL, 0x04 },
+		{ BYD_CMD_SET_TWO_FINGER_SCROLL_FUNC, 0x04 },
+		{ BYD_CMD_SET_LEFT_EDGE_REGION, 0x00 },
+		{ BYD_CMD_SET_TOP_EDGE_REGION, 0x00 },
+		{ BYD_CMD_SET_RIGHT_EDGE_REGION, 0x00 },
+		{ BYD_CMD_SET_BOTTOM_EDGE_REGION, 0x00 },
+		{ BYD_CMD_SET_ABSOLUTE_MODE, 0x02 },
+		/* Finalize initialization. */
+		{ 0x10E0, 0x00 },
+		{ 0x10E2, 0x01 },
 	};
 
-	memset(param, 0, sizeof(param));
 	for (i = 0; i < ARRAY_SIZE(seq); ++i) {
+		memset(param, 0, sizeof(param));
 		param[0] = seq[i].arg;
 		if (ps2_command(ps2dev, param, seq[i].command))
-			return -1;
+			return -EIO;
 	}
 
-	return 0;
-}
-
-static int byd_reset_touchpad(struct psmouse *psmouse)
-{
-	if (byd_send_intellimouse_sequence(psmouse))
-		return -EIO;
-
-	if (byd_enable(psmouse))
-		return -EIO;
-
+	psmouse_set_state(psmouse, PSMOUSE_ACTIVATED);
 	return 0;
 }
 
@@ -314,9 +419,50 @@
 	return 0;
 }
 
+static void byd_disconnect(struct psmouse *psmouse)
+{
+	struct byd_data *priv = psmouse->private;
+
+	if (priv) {
+		del_timer(&priv->timer);
+		kfree(psmouse->private);
+		psmouse->private = NULL;
+	}
+}
+
+int byd_detect(struct psmouse *psmouse, bool set_properties)
+{
+	struct ps2dev *ps2dev = &psmouse->ps2dev;
+	u8 param[4] = {0x03, 0x00, 0x00, 0x00};
+
+	if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES))
+		return -1;
+	if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES))
+		return -1;
+	if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES))
+		return -1;
+	if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES))
+		return -1;
+	if (ps2_command(ps2dev, param, PSMOUSE_CMD_GETINFO))
+		return -1;
+
+	if (param[1] != 0x03 || param[2] != 0x64)
+		return -ENODEV;
+
+	psmouse_dbg(psmouse, "BYD touchpad detected\n");
+
+	if (set_properties) {
+		psmouse->vendor = "BYD";
+		psmouse->name = "TouchPad";
+	}
+
+	return 0;
+}
+
 int byd_init(struct psmouse *psmouse)
 {
 	struct input_dev *dev = psmouse->dev;
+	struct byd_data *priv;
 
 	if (psmouse_reset(psmouse))
 		return -EIO;
@@ -324,14 +470,39 @@
 	if (byd_reset_touchpad(psmouse))
 		return -EIO;
 
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	memset(priv, 0, sizeof(*priv));
+	setup_timer(&priv->timer, byd_clear_touch, (unsigned long) psmouse);
+
+	psmouse->private = priv;
+	psmouse->disconnect = byd_disconnect;
 	psmouse->reconnect = byd_reconnect;
 	psmouse->protocol_handler = byd_process_byte;
 	psmouse->pktsize = 4;
 	psmouse->resync_time = 0;
 
-	__set_bit(BTN_MIDDLE, dev->keybit);
-	__set_bit(REL_WHEEL, dev->relbit);
-	__set_bit(REL_HWHEEL, dev->relbit);
+	__set_bit(INPUT_PROP_POINTER, dev->propbit);
+	/* Touchpad */
+	__set_bit(BTN_TOUCH, dev->keybit);
+	__set_bit(BTN_TOOL_FINGER, dev->keybit);
+	/* Buttons */
+	__set_bit(BTN_LEFT, dev->keybit);
+	__set_bit(BTN_RIGHT, dev->keybit);
+	__clear_bit(BTN_MIDDLE, dev->keybit);
+
+	/* Absolute position */
+	__set_bit(EV_ABS, dev->evbit);
+	input_set_abs_params(dev, ABS_X, 0, BYD_PAD_WIDTH, 0, 0);
+	input_set_abs_params(dev, ABS_Y, 0, BYD_PAD_HEIGHT, 0, 0);
+	input_abs_set_res(dev, ABS_X, BYD_PAD_RESOLUTION);
+	input_abs_set_res(dev, ABS_Y, BYD_PAD_RESOLUTION);
+	/* No relative support */
+	__clear_bit(EV_REL, dev->evbit);
+	__clear_bit(REL_X, dev->relbit);
+	__clear_bit(REL_Y, dev->relbit);
 
 	return 0;
 }

diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index 39d1bec..5784e20 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c

@@ -846,7 +846,7 @@
 #ifdef CONFIG_MOUSE_PS2_BYD
 	{
 		.type		= PSMOUSE_BYD,
-		.name		= "BydPS/2",
+		.name		= "BYDPS/2",
 		.alias		= "byd",
 		.detect		= byd_detect,
 		.init		= byd_init,

diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index 6025eb4..a41d832 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c

@@ -862,8 +862,9 @@
 	if (!SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap))
 		return;
 
-	/* Bug in FW 8.1, buttons are reported only when ExtBit is 1 */
-	if (SYN_ID_FULL(priv->identity) == 0x801 &&
+	/* Bug in FW 8.1 & 8.2, buttons are reported only when ExtBit is 1 */
+	if ((SYN_ID_FULL(priv->identity) == 0x801 ||
+	     SYN_ID_FULL(priv->identity) == 0x802) &&
 	    !((psmouse->packet[0] ^ psmouse->packet[3]) & 0x02))
 		return;
 

diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
index da38f0a..faa295e 100644
--- a/drivers/input/rmi4/rmi_driver.c
+++ b/drivers/input/rmi4/rmi_driver.c

@@ -126,7 +126,7 @@
 		return;
 
 	fh = to_rmi_function_handler(fn->dev.driver);
-	if (fn->irq_mask && fh->attention) {
+	if (fh->attention) {
 		bitmap_and(data->fn_irq_bits, data->irq_status, fn->irq_mask,
 				data->irq_count);
 		if (!bitmap_empty(data->fn_irq_bits, data->irq_count))
@@ -172,8 +172,7 @@
 	 * use irq_chip.
 	 */
 	list_for_each_entry(entry, &data->function_list, node)
-		if (entry->irq_mask)
-			process_one_interrupt(data, entry);
+		process_one_interrupt(data, entry);
 
 	if (data->input)
 		input_sync(data->input);

diff --git a/drivers/input/touchscreen/melfas_mip4.c b/drivers/input/touchscreen/melfas_mip4.c
index 89272973..fb5fb91 100644
--- a/drivers/input/touchscreen/melfas_mip4.c
+++ b/drivers/input/touchscreen/melfas_mip4.c

@@ -1310,8 +1310,34 @@
 
 static DEVICE_ATTR(fw_version, S_IRUGO, mip4_sysfs_read_fw_version, NULL);
 
+static ssize_t mip4_sysfs_read_hw_version(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct mip4_ts *ts = i2c_get_clientdata(client);
+	size_t count;
+
+	/* Take lock to prevent racing with firmware update */
+	mutex_lock(&ts->input->mutex);
+
+	/*
+	 * product_name shows the name or version of the hardware
+	 * paired with current firmware in the chip.
+	 */
+	count = snprintf(buf, PAGE_SIZE, "%.*s\n",
+		(int)sizeof(ts->product_name), ts->product_name);
+
+	mutex_unlock(&ts->input->mutex);
+
+	return count;
+}
+
+static DEVICE_ATTR(hw_version, S_IRUGO, mip4_sysfs_read_hw_version, NULL);
+
 static struct attribute *mip4_attrs[] = {
 	&dev_attr_fw_version.attr,
+	&dev_attr_hw_version.attr,
 	&dev_attr_update_fw.attr,
 	NULL,
 };
@@ -1512,6 +1538,6 @@
 module_i2c_driver(mip4_driver);
 
 MODULE_DESCRIPTION("MELFAS MIP4 Touchscreen");
-MODULE_VERSION("2016.03.03");
+MODULE_VERSION("2016.03.12");
 MODULE_AUTHOR("Sangwon Jee <jeesw@melfas.com>");
 MODULE_LICENSE("GPL");

diff --git a/drivers/input/touchscreen/sur40.c b/drivers/input/touchscreen/sur40.c
index b6c4d03..880c40b 100644
--- a/drivers/input/touchscreen/sur40.c
+++ b/drivers/input/touchscreen/sur40.c

@@ -197,28 +197,34 @@
 static int sur40_init(struct sur40_state *dev)
 {
 	int result;
-	u8 buffer[24];
+	u8 *buffer;
+
+	buffer = kmalloc(24, GFP_KERNEL);
+	if (!buffer) {
+		result = -ENOMEM;
+		goto error;
+	}
 
 	/* stupidly replay the original MS driver init sequence */
 	result = sur40_command(dev, SUR40_GET_VERSION, 0x00, buffer, 12);
 	if (result < 0)
-		return result;
+		goto error;
 
 	result = sur40_command(dev, SUR40_GET_VERSION, 0x01, buffer, 12);
 	if (result < 0)
-		return result;
+		goto error;
 
 	result = sur40_command(dev, SUR40_GET_VERSION, 0x02, buffer, 12);
 	if (result < 0)
-		return result;
+		goto error;
 
 	result = sur40_command(dev, SUR40_UNKNOWN2,    0x00, buffer, 24);
 	if (result < 0)
-		return result;
+		goto error;
 
 	result = sur40_command(dev, SUR40_UNKNOWN1,    0x00, buffer,  5);
 	if (result < 0)
-		return result;
+		goto error;
 
 	result = sur40_command(dev, SUR40_GET_VERSION, 0x03, buffer, 12);
 
@@ -226,7 +232,8 @@
 	 * Discard the result buffer - no known data inside except
 	 * some version strings, maybe extract these sometime...
 	 */
-
+error:
+	kfree(buffer);
 	return result;
 }
 

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 7e8c441..3e12479 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig

@@ -32,14 +32,6 @@
 	bool
 	select PCI_MSI_IRQ_DOMAIN
 
-config HISILICON_IRQ_MBIGEN
-	bool "Support mbigen interrupt controller"
-	default n
-	depends on ARM_GIC_V3 && ARM_GIC_V3_ITS && GENERIC_MSI_IRQ_DOMAIN
-	help
-	 Enable the mbigen interrupt controller used on
-	 Hisilicon platform.
-
 config ARM_NVIC
 	bool
 	select IRQ_DOMAIN
@@ -114,6 +106,12 @@
 	select GENERIC_IRQ_CHIP
 	select IRQ_DOMAIN
 
+config HISILICON_IRQ_MBIGEN
+	bool
+	select ARM_GIC_V3
+	select ARM_GIC_V3_ITS
+	select GENERIC_MSI_IRQ_DOMAIN
+
 config IMGPDC_IRQ
 	bool
 	select GENERIC_IRQ_CHIP

diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c
index 4dd3eb8..d67baa2 100644
--- a/drivers/irqchip/irq-mbigen.c
+++ b/drivers/irqchip/irq-mbigen.c

@@ -239,8 +239,11 @@
 static int mbigen_device_probe(struct platform_device *pdev)
 {
 	struct mbigen_device *mgn_chip;
-	struct resource *res;
+	struct platform_device *child;
 	struct irq_domain *domain;
+	struct device_node *np;
+	struct device *parent;
+	struct resource *res;
 	u32 num_pins;
 
 	mgn_chip = devm_kzalloc(&pdev->dev, sizeof(*mgn_chip), GFP_KERNEL);
@@ -254,23 +257,30 @@
 	if (IS_ERR(mgn_chip->base))
 		return PTR_ERR(mgn_chip->base);
 
-	if (of_property_read_u32(pdev->dev.of_node, "num-pins", &num_pins) < 0) {
-		dev_err(&pdev->dev, "No num-pins property\n");
-		return -EINVAL;
+	for_each_child_of_node(pdev->dev.of_node, np) {
+		if (!of_property_read_bool(np, "interrupt-controller"))
+			continue;
+
+		parent = platform_bus_type.dev_root;
+		child = of_platform_device_create(np, NULL, parent);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		if (of_property_read_u32(child->dev.of_node, "num-pins",
+					 &num_pins) < 0) {
+			dev_err(&pdev->dev, "No num-pins property\n");
+			return -EINVAL;
+		}
+
+		domain = platform_msi_create_device_domain(&child->dev, num_pins,
+							   mbigen_write_msg,
+							   &mbigen_domain_ops,
+							   mgn_chip);
+		if (!domain)
+			return -ENOMEM;
 	}
 
-	domain = platform_msi_create_device_domain(&pdev->dev, num_pins,
-							mbigen_write_msg,
-							&mbigen_domain_ops,
-							mgn_chip);
-
-	if (!domain)
-		return -ENOMEM;
-
 	platform_set_drvdata(pdev, mgn_chip);
-
-	dev_info(&pdev->dev, "Allocated %d MSIs\n", num_pins);
-
 	return 0;
 }
 

diff --git a/drivers/irqchip/irq-tegra.c b/drivers/irqchip/irq-tegra.c
index 121ec30..50be963 100644
--- a/drivers/irqchip/irq-tegra.c
+++ b/drivers/irqchip/irq-tegra.c

@@ -275,22 +275,10 @@
 					    &parent_fwspec);
 }
 
-static void tegra_ictlr_domain_free(struct irq_domain *domain,
-				    unsigned int virq,
-				    unsigned int nr_irqs)
-{
-	unsigned int i;
-
-	for (i = 0; i < nr_irqs; i++) {
-		struct irq_data *d = irq_domain_get_irq_data(domain, virq + i);
-		irq_domain_reset_irq_data(d);
-	}
-}
-
 static const struct irq_domain_ops tegra_ictlr_domain_ops = {
 	.translate	= tegra_ictlr_domain_translate,
 	.alloc		= tegra_ictlr_domain_alloc,
-	.free		= tegra_ictlr_domain_free,
+	.free		= irq_domain_free_irqs_common,
 };
 
 static int __init tegra_ictlr_init(struct device_node *node,

diff --git a/drivers/isdn/hisax/isac.c b/drivers/isdn/hisax/isac.c
index 7fdf78f..df7e05c 100644
--- a/drivers/isdn/hisax/isac.c
+++ b/drivers/isdn/hisax/isac.c

@@ -215,9 +215,11 @@
 			if (count == 0)
 				count = 32;
 			isac_empty_fifo(cs, count);
-			if ((count = cs->rcvidx) > 0) {
+			count = cs->rcvidx;
+			if (count > 0) {
 				cs->rcvidx = 0;
-				if (!(skb = alloc_skb(count, GFP_ATOMIC)))
+				skb = alloc_skb(count, GFP_ATOMIC);
+				if (!skb)
 					printk(KERN_WARNING "HiSax: D receive out of memory\n");
 				else {
 					memcpy(skb_put(skb, count), cs->rcvbuf, count);
@@ -251,7 +253,8 @@
 				cs->tx_skb = NULL;
 			}
 		}
-		if ((cs->tx_skb = skb_dequeue(&cs->sq))) {
+		cs->tx_skb = skb_dequeue(&cs->sq);
+		if (cs->tx_skb) {
 			cs->tx_cnt = 0;
 			isac_fill_fifo(cs);
 		} else
@@ -313,7 +316,8 @@
 #if ARCOFI_USE
 			if (v1 & 0x08) {
 				if (!cs->dc.isac.mon_rx) {
-					if (!(cs->dc.isac.mon_rx = kmalloc(MAX_MON_FRAME, GFP_ATOMIC))) {
+					cs->dc.isac.mon_rx = kmalloc(MAX_MON_FRAME, GFP_ATOMIC);
+					if (!cs->dc.isac.mon_rx) {
 						if (cs->debug & L1_DEB_WARN)
 							debugl1(cs, "ISAC MON RX out of memory!");
 						cs->dc.isac.mocr &= 0xf0;
@@ -343,7 +347,8 @@
 		afterMONR0:
 			if (v1 & 0x80) {
 				if (!cs->dc.isac.mon_rx) {
-					if (!(cs->dc.isac.mon_rx = kmalloc(MAX_MON_FRAME, GFP_ATOMIC))) {
+					cs->dc.isac.mon_rx = kmalloc(MAX_MON_FRAME, GFP_ATOMIC);
+					if (!cs->dc.isac.mon_rx) {
 						if (cs->debug & L1_DEB_WARN)
 							debugl1(cs, "ISAC MON RX out of memory!");
 						cs->dc.isac.mocr &= 0x0f;

diff --git a/drivers/isdn/mISDN/clock.c b/drivers/isdn/mISDN/clock.c
index 693fb7c..f8f659f 100644
--- a/drivers/isdn/mISDN/clock.c
+++ b/drivers/isdn/mISDN/clock.c

@@ -37,6 +37,7 @@
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/spinlock.h>
+#include <linux/ktime.h>
 #include <linux/mISDNif.h>
 #include <linux/export.h>
 #include "core.h"
@@ -45,15 +46,15 @@
 static LIST_HEAD(iclock_list);
 static DEFINE_RWLOCK(iclock_lock);
 static u16 iclock_count;		/* counter of last clock */
-static struct timeval iclock_tv;	/* time stamp of last clock */
-static int iclock_tv_valid;		/* already received one timestamp */
+static ktime_t iclock_timestamp;	/* time stamp of last clock */
+static int iclock_timestamp_valid;	/* already received one timestamp */
 static struct mISDNclock *iclock_current;
 
 void
 mISDN_init_clock(u_int *dp)
 {
 	debug = dp;
-	do_gettimeofday(&iclock_tv);
+	iclock_timestamp = ktime_get();
 }
 
 static void
@@ -86,7 +87,7 @@
 	}
 	if (bestclock != iclock_current) {
 		/* no clock received yet */
-		iclock_tv_valid = 0;
+		iclock_timestamp_valid = 0;
 	}
 	iclock_current = bestclock;
 }
@@ -139,12 +140,11 @@
 EXPORT_SYMBOL(mISDN_unregister_clock);
 
 void
-mISDN_clock_update(struct mISDNclock *iclock, int samples, struct timeval *tv)
+mISDN_clock_update(struct mISDNclock *iclock, int samples, ktime_t *timestamp)
 {
 	u_long		flags;
-	struct timeval	tv_now;
-	time_t		elapsed_sec;
-	int		elapsed_8000th;
+	ktime_t		timestamp_now;
+	u16		delta;
 
 	write_lock_irqsave(&iclock_lock, flags);
 	if (iclock_current != iclock) {
@@ -156,33 +156,27 @@
 		write_unlock_irqrestore(&iclock_lock, flags);
 		return;
 	}
-	if (iclock_tv_valid) {
+	if (iclock_timestamp_valid) {
 		/* increment sample counter by given samples */
 		iclock_count += samples;
-		if (tv) { /* tv must be set, if function call is delayed */
-			iclock_tv.tv_sec = tv->tv_sec;
-			iclock_tv.tv_usec = tv->tv_usec;
-		} else
-			do_gettimeofday(&iclock_tv);
+		if (timestamp) { /* timestamp must be set, if function call is delayed */
+			iclock_timestamp = *timestamp;
+		} else	{
+			iclock_timestamp = ktime_get();
+		}
 	} else {
 		/* calc elapsed time by system clock */
-		if (tv) { /* tv must be set, if function call is delayed */
-			tv_now.tv_sec = tv->tv_sec;
-			tv_now.tv_usec = tv->tv_usec;
-		} else
-			do_gettimeofday(&tv_now);
-		elapsed_sec = tv_now.tv_sec - iclock_tv.tv_sec;
-		elapsed_8000th = (tv_now.tv_usec / 125)
-			- (iclock_tv.tv_usec / 125);
-		if (elapsed_8000th < 0) {
-			elapsed_sec -= 1;
-			elapsed_8000th += 8000;
+		if (timestamp) { /* timestamp must be set, if function call is delayed */
+			timestamp_now = *timestamp;
+		} else {
+			timestamp_now = ktime_get();
 		}
+		delta = ktime_divns(ktime_sub(timestamp_now, iclock_timestamp),
+				(NSEC_PER_SEC / 8000));
 		/* add elapsed time to counter and set new timestamp */
-		iclock_count += elapsed_sec * 8000 + elapsed_8000th;
-		iclock_tv.tv_sec = tv_now.tv_sec;
-		iclock_tv.tv_usec = tv_now.tv_usec;
-		iclock_tv_valid = 1;
+		iclock_count += delta;
+		iclock_timestamp = timestamp_now;
+		iclock_timestamp_valid = 1;
 		if (*debug & DEBUG_CLOCK)
 			printk("Received first clock from source '%s'.\n",
 			       iclock_current ? iclock_current->name : "nothing");
@@ -195,22 +189,17 @@
 mISDN_clock_get(void)
 {
 	u_long		flags;
-	struct timeval	tv_now;
-	time_t		elapsed_sec;
-	int		elapsed_8000th;
+	ktime_t		timestamp_now;
+	u16		delta;
 	u16		count;
 
 	read_lock_irqsave(&iclock_lock, flags);
 	/* calc elapsed time by system clock */
-	do_gettimeofday(&tv_now);
-	elapsed_sec = tv_now.tv_sec - iclock_tv.tv_sec;
-	elapsed_8000th = (tv_now.tv_usec / 125) - (iclock_tv.tv_usec / 125);
-	if (elapsed_8000th < 0) {
-		elapsed_sec -= 1;
-		elapsed_8000th += 8000;
-	}
+	timestamp_now = ktime_get();
+	delta = ktime_divns(ktime_sub(timestamp_now, iclock_timestamp),
+			(NSEC_PER_SEC / 8000));
 	/* add elapsed time to counter */
-	count =	iclock_count + elapsed_sec * 8000 + elapsed_8000th;
+	count =	iclock_count + delta;
 	read_unlock_irqrestore(&iclock_lock, flags);
 	return count;
 }

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 1f64151..2251478 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig

@@ -443,6 +443,7 @@
 	tristate "External LED on Dell Business Netbooks"
 	depends on LEDS_CLASS
 	depends on X86 && ACPI_WMI
+	depends on DELL_SMBIOS
 	help
 	  This adds support for the Latitude 2100 and similar
 	  notebooks that have an external LED.

diff --git a/drivers/leds/dell-led.c b/drivers/leds/dell-led.c
index c36acaf..b3d6e9c 100644
--- a/drivers/leds/dell-led.c
+++ b/drivers/leds/dell-led.c

@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/dmi.h>
 #include <linux/dell-led.h>
+#include "../platform/x86/dell-smbios.h"
 
 MODULE_AUTHOR("Louis Davis/Jim Dailey");
 MODULE_DESCRIPTION("Dell LED Control Driver");
@@ -42,120 +43,32 @@
 #define CMD_LED_OFF	17
 #define CMD_LED_BLINK	18
 
-struct app_wmi_args {
-	u16 class;
-	u16 selector;
-	u32 arg1;
-	u32 arg2;
-	u32 arg3;
-	u32 arg4;
-	u32 res1;
-	u32 res2;
-	u32 res3;
-	u32 res4;
-	char dummy[92];
-};
-
 #define GLOBAL_MIC_MUTE_ENABLE	0x364
 #define GLOBAL_MIC_MUTE_DISABLE	0x365
 
-struct dell_bios_data_token {
-	u16 tokenid;
-	u16 location;
-	u16 value;
-};
-
-struct __attribute__ ((__packed__)) dell_bios_calling_interface {
-	struct	dmi_header header;
-	u16	cmd_io_addr;
-	u8	cmd_io_code;
-	u32	supported_cmds;
-	struct	dell_bios_data_token damap[];
-};
-
-static struct dell_bios_data_token dell_mic_tokens[2];
-
-static int dell_wmi_perform_query(struct app_wmi_args *args)
-{
-	struct app_wmi_args *bios_return;
-	union acpi_object *obj;
-	struct acpi_buffer input;
-	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
-	acpi_status status;
-	u32 rc = -EINVAL;
-
-	input.length = 128;
-	input.pointer = args;
-
-	status = wmi_evaluate_method(DELL_APP_GUID, 0, 1, &input, &output);
-	if (!ACPI_SUCCESS(status))
-		goto err_out0;
-
-	obj = output.pointer;
-	if (!obj)
-		goto err_out0;
-
-	if (obj->type != ACPI_TYPE_BUFFER)
-		goto err_out1;
-
-	bios_return = (struct app_wmi_args *)obj->buffer.pointer;
-	rc = bios_return->res1;
-	if (rc)
-		goto err_out1;
-
-	memcpy(args, bios_return, sizeof(struct app_wmi_args));
-	rc = 0;
-
- err_out1:
-	kfree(obj);
- err_out0:
-	return rc;
-}
-
-static void __init find_micmute_tokens(const struct dmi_header *dm, void *dummy)
-{
-	struct dell_bios_calling_interface *calling_interface;
-	struct dell_bios_data_token *token;
-	int token_size = sizeof(struct dell_bios_data_token);
-	int i = 0;
-
-	if (dm->type == 0xda && dm->length > 17) {
-		calling_interface = container_of(dm,
-				struct dell_bios_calling_interface, header);
-
-		token = &calling_interface->damap[i];
-		while (token->tokenid != 0xffff) {
-			if (token->tokenid == GLOBAL_MIC_MUTE_DISABLE)
-				memcpy(&dell_mic_tokens[0], token, token_size);
-			else if (token->tokenid == GLOBAL_MIC_MUTE_ENABLE)
-				memcpy(&dell_mic_tokens[1], token, token_size);
-
-			i++;
-			token = &calling_interface->damap[i];
-		}
-	}
-}
-
 static int dell_micmute_led_set(int state)
 {
-	struct app_wmi_args args;
-	struct dell_bios_data_token *token;
+	struct calling_interface_buffer *buffer;
+	struct calling_interface_token *token;
 
 	if (!wmi_has_guid(DELL_APP_GUID))
 		return -ENODEV;
 
-	if (state == 0 || state == 1)
-		token = &dell_mic_tokens[state];
+	if (state == 0)
+		token = dell_smbios_find_token(GLOBAL_MIC_MUTE_DISABLE);
+	else if (state == 1)
+		token = dell_smbios_find_token(GLOBAL_MIC_MUTE_ENABLE);
 	else
 		return -EINVAL;
 
-	memset(&args, 0, sizeof(struct app_wmi_args));
+	if (!token)
+		return -ENODEV;
 
-	args.class = 1;
-	args.arg1 = token->location;
-	args.arg2 = token->value;
-
-	dell_wmi_perform_query(&args);
+	buffer = dell_smbios_get_buffer();
+	buffer->input[0] = token->location;
+	buffer->input[1] = token->value;
+	dell_smbios_send_request(1, 0);
+	dell_smbios_release_buffer();
 
 	return state;
 }
@@ -177,14 +90,6 @@
 }
 EXPORT_SYMBOL_GPL(dell_app_wmi_led_set);
 
-static int __init dell_micmute_led_init(void)
-{
-	memset(dell_mic_tokens, 0, sizeof(struct dell_bios_data_token) * 2);
-	dmi_walk(find_micmute_tokens, NULL);
-
-	return 0;
-}
-
 struct bios_args {
 	unsigned char length;
 	unsigned char result_code;
@@ -330,9 +235,6 @@
 	if (!wmi_has_guid(DELL_LED_BIOS_GUID) && !wmi_has_guid(DELL_APP_GUID))
 		return -ENODEV;
 
-	if (wmi_has_guid(DELL_APP_GUID))
-		error = dell_micmute_led_init();
-
 	if (wmi_has_guid(DELL_LED_BIOS_GUID)) {
 		error = led_off();
 		if (error != 0)

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 0d1fb6b..0dc9a80 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c

@@ -464,8 +464,13 @@
 	dev->nr_luns = dev->luns_per_chnl * dev->nr_chnls;
 
 	dev->total_secs = dev->nr_luns * dev->sec_per_lun;
+	dev->lun_map = kcalloc(BITS_TO_LONGS(dev->nr_luns),
+					sizeof(unsigned long), GFP_KERNEL);
+	if (!dev->lun_map)
+		return -ENOMEM;
 	INIT_LIST_HEAD(&dev->online_targets);
 	mutex_init(&dev->mlock);
+	spin_lock_init(&dev->lock);
 
 	return 0;
 }
@@ -585,6 +590,7 @@
 
 	return 0;
 err_init:
+	kfree(dev->lun_map);
 	kfree(dev);
 	return ret;
 }
@@ -607,6 +613,7 @@
 	up_write(&nvm_lock);
 
 	nvm_exit(dev);
+	kfree(dev->lun_map);
 	kfree(dev);
 }
 EXPORT_SYMBOL(nvm_unregister);

diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index d65ec36..72e124a 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c

@@ -20,6 +20,68 @@
 
 #include "gennvm.h"
 
+static int gennvm_get_area(struct nvm_dev *dev, sector_t *lba, sector_t len)
+{
+	struct gen_nvm *gn = dev->mp;
+	struct gennvm_area *area, *prev, *next;
+	sector_t begin = 0;
+	sector_t max_sectors = (dev->sec_size * dev->total_secs) >> 9;
+
+	if (len > max_sectors)
+		return -EINVAL;
+
+	area = kmalloc(sizeof(struct gennvm_area), GFP_KERNEL);
+	if (!area)
+		return -ENOMEM;
+
+	prev = NULL;
+
+	spin_lock(&dev->lock);
+	list_for_each_entry(next, &gn->area_list, list) {
+		if (begin + len > next->begin) {
+			begin = next->end;
+			prev = next;
+			continue;
+		}
+		break;
+	}
+
+	if ((begin + len) > max_sectors) {
+		spin_unlock(&dev->lock);
+		kfree(area);
+		return -EINVAL;
+	}
+
+	area->begin = *lba = begin;
+	area->end = begin + len;
+
+	if (prev) /* insert into sorted order */
+		list_add(&area->list, &prev->list);
+	else
+		list_add(&area->list, &gn->area_list);
+	spin_unlock(&dev->lock);
+
+	return 0;
+}
+
+static void gennvm_put_area(struct nvm_dev *dev, sector_t begin)
+{
+	struct gen_nvm *gn = dev->mp;
+	struct gennvm_area *area;
+
+	spin_lock(&dev->lock);
+	list_for_each_entry(area, &gn->area_list, list) {
+		if (area->begin != begin)
+			continue;
+
+		list_del(&area->list);
+		spin_unlock(&dev->lock);
+		kfree(area);
+		return;
+	}
+	spin_unlock(&dev->lock);
+}
+
 static void gennvm_blocks_free(struct nvm_dev *dev)
 {
 	struct gen_nvm *gn = dev->mp;
@@ -195,7 +257,7 @@
 		}
 	}
 
-	if (dev->ops->get_l2p_tbl) {
+	if ((dev->identity.dom & NVM_RSP_L2P) && dev->ops->get_l2p_tbl) {
 		ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_secs,
 							gennvm_block_map, dev);
 		if (ret) {
@@ -229,6 +291,7 @@
 
 	gn->dev = dev;
 	gn->nr_luns = dev->nr_luns;
+	INIT_LIST_HEAD(&gn->area_list);
 	dev->mp = gn;
 
 	ret = gennvm_luns_init(dev, gn);
@@ -419,10 +482,23 @@
 	return nvm_erase_ppa(dev, &addr, 1);
 }
 
+static int gennvm_reserve_lun(struct nvm_dev *dev, int lunid)
+{
+	return test_and_set_bit(lunid, dev->lun_map);
+}
+
+static void gennvm_release_lun(struct nvm_dev *dev, int lunid)
+{
+	WARN_ON(!test_and_clear_bit(lunid, dev->lun_map));
+}
+
 static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid)
 {
 	struct gen_nvm *gn = dev->mp;
 
+	if (unlikely(lunid >= dev->nr_luns))
+		return NULL;
+
 	return &gn->luns[lunid].vlun;
 }
 
@@ -464,7 +540,13 @@
 	.erase_blk		= gennvm_erase_blk,
 
 	.get_lun		= gennvm_get_lun,
+	.reserve_lun		= gennvm_reserve_lun,
+	.release_lun		= gennvm_release_lun,
 	.lun_info_print		= gennvm_lun_info_print,
+
+	.get_area		= gennvm_get_area,
+	.put_area		= gennvm_put_area,
+
 };
 
 static int __init gennvm_module_init(void)

diff --git a/drivers/lightnvm/gennvm.h b/drivers/lightnvm/gennvm.h
index 9c24b5b..04d7c23 100644
--- a/drivers/lightnvm/gennvm.h
+++ b/drivers/lightnvm/gennvm.h

@@ -39,8 +39,14 @@
 
 	int nr_luns;
 	struct gen_lun *luns;
+	struct list_head area_list;
 };
 
+struct gennvm_area {
+	struct list_head list;
+	sector_t begin;
+	sector_t end;	/* end is excluded */
+};
 #define gennvm_for_each_lun(bm, lun, i) \
 		for ((i) = 0, lun = &(bm)->luns[0]; \
 			(i) < (bm)->nr_luns; (i)++, lun = &(bm)->luns[(i)])

diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 8234378..3ab6495 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c

@@ -965,25 +965,11 @@
 
 static void rrpc_gc_free(struct rrpc *rrpc)
 {
-	struct rrpc_lun *rlun;
-	int i;
-
 	if (rrpc->krqd_wq)
 		destroy_workqueue(rrpc->krqd_wq);
 
 	if (rrpc->kgc_wq)
 		destroy_workqueue(rrpc->kgc_wq);
-
-	if (!rrpc->luns)
-		return;
-
-	for (i = 0; i < rrpc->nr_luns; i++) {
-		rlun = &rrpc->luns[i];
-
-		if (!rlun->blocks)
-			break;
-		vfree(rlun->blocks);
-	}
 }
 
 static int rrpc_gc_init(struct rrpc *rrpc)
@@ -1053,8 +1039,11 @@
 {
 	struct nvm_dev *dev = rrpc->dev;
 	sector_t i;
+	u64 slba;
 	int ret;
 
+	slba = rrpc->soffset >> (ilog2(dev->sec_size) - 9);
+
 	rrpc->trans_map = vzalloc(sizeof(struct rrpc_addr) * rrpc->nr_sects);
 	if (!rrpc->trans_map)
 		return -ENOMEM;
@@ -1076,7 +1065,7 @@
 		return 0;
 
 	/* Bring up the mapping table from device */
-	ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_secs, rrpc_l2p_update,
+	ret = dev->ops->get_l2p_tbl(dev, slba, rrpc->nr_sects, rrpc_l2p_update,
 									rrpc);
 	if (ret) {
 		pr_err("nvm: rrpc: could not read L2P table.\n");
@@ -1086,7 +1075,6 @@
 	return 0;
 }
 
-
 /* Minimum pages needed within a lun */
 #define PAGE_POOL_SIZE 16
 #define ADDR_POOL_SIZE 64
@@ -1141,6 +1129,23 @@
 
 static void rrpc_luns_free(struct rrpc *rrpc)
 {
+	struct nvm_dev *dev = rrpc->dev;
+	struct nvm_lun *lun;
+	struct rrpc_lun *rlun;
+	int i;
+
+	if (!rrpc->luns)
+		return;
+
+	for (i = 0; i < rrpc->nr_luns; i++) {
+		rlun = &rrpc->luns[i];
+		lun = rlun->parent;
+		if (!lun)
+			break;
+		dev->mt->release_lun(dev, lun->id);
+		vfree(rlun->blocks);
+	}
+
 	kfree(rrpc->luns);
 }
 
@@ -1148,7 +1153,7 @@
 {
 	struct nvm_dev *dev = rrpc->dev;
 	struct rrpc_lun *rlun;
-	int i, j;
+	int i, j, ret = -EINVAL;
 
 	if (dev->sec_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
 		pr_err("rrpc: number of pages per block too high.");
@@ -1164,25 +1169,26 @@
 
 	/* 1:1 mapping */
 	for (i = 0; i < rrpc->nr_luns; i++) {
-		struct nvm_lun *lun = dev->mt->get_lun(dev, lun_begin + i);
+		int lunid = lun_begin + i;
+		struct nvm_lun *lun;
+
+		if (dev->mt->reserve_lun(dev, lunid)) {
+			pr_err("rrpc: lun %u is already allocated\n", lunid);
+			goto err;
+		}
+
+		lun = dev->mt->get_lun(dev, lunid);
+		if (!lun)
+			goto err;
 
 		rlun = &rrpc->luns[i];
-		rlun->rrpc = rrpc;
 		rlun->parent = lun;
-		INIT_LIST_HEAD(&rlun->prio_list);
-		INIT_LIST_HEAD(&rlun->open_list);
-		INIT_LIST_HEAD(&rlun->closed_list);
-
-		INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
-		spin_lock_init(&rlun->lock);
-
-		rrpc->total_blocks += dev->blks_per_lun;
-		rrpc->nr_sects += dev->sec_per_lun;
-
 		rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
 						rrpc->dev->blks_per_lun);
-		if (!rlun->blocks)
+		if (!rlun->blocks) {
+			ret = -ENOMEM;
 			goto err;
+		}
 
 		for (j = 0; j < rrpc->dev->blks_per_lun; j++) {
 			struct rrpc_block *rblk = &rlun->blocks[j];
@@ -1193,11 +1199,43 @@
 			INIT_LIST_HEAD(&rblk->prio);
 			spin_lock_init(&rblk->lock);
 		}
+
+		rlun->rrpc = rrpc;
+		INIT_LIST_HEAD(&rlun->prio_list);
+		INIT_LIST_HEAD(&rlun->open_list);
+		INIT_LIST_HEAD(&rlun->closed_list);
+
+		INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
+		spin_lock_init(&rlun->lock);
+
+		rrpc->total_blocks += dev->blks_per_lun;
+		rrpc->nr_sects += dev->sec_per_lun;
+
 	}
 
 	return 0;
 err:
-	return -ENOMEM;
+	return ret;
+}
+
+/* returns 0 on success and stores the beginning address in *begin */
+static int rrpc_area_init(struct rrpc *rrpc, sector_t *begin)
+{
+	struct nvm_dev *dev = rrpc->dev;
+	struct nvmm_type *mt = dev->mt;
+	sector_t size = rrpc->nr_sects * dev->sec_size;
+
+	size >>= 9;
+
+	return mt->get_area(dev, begin, size);
+}
+
+static void rrpc_area_free(struct rrpc *rrpc)
+{
+	struct nvm_dev *dev = rrpc->dev;
+	struct nvmm_type *mt = dev->mt;
+
+	mt->put_area(dev, rrpc->soffset);
 }
 
 static void rrpc_free(struct rrpc *rrpc)
@@ -1206,6 +1244,7 @@
 	rrpc_map_free(rrpc);
 	rrpc_core_free(rrpc);
 	rrpc_luns_free(rrpc);
+	rrpc_area_free(rrpc);
 
 	kfree(rrpc);
 }
@@ -1327,6 +1366,7 @@
 	struct request_queue *bqueue = dev->q;
 	struct request_queue *tqueue = tdisk->queue;
 	struct rrpc *rrpc;
+	sector_t soffset;
 	int ret;
 
 	if (!(dev->identity.dom & NVM_RSP_L2P)) {
@@ -1352,6 +1392,13 @@
 	/* simple round-robin strategy */
 	atomic_set(&rrpc->next_lun, -1);
 
+	ret = rrpc_area_init(rrpc, &soffset);
+	if (ret < 0) {
+		pr_err("nvm: rrpc: could not initialize area\n");
+		return ERR_PTR(ret);
+	}
+	rrpc->soffset = soffset;
+
 	ret = rrpc_luns_init(rrpc, lun_begin, lun_end);
 	if (ret) {
 		pr_err("nvm: rrpc: could not initialize luns\n");

diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
index 855f4a5..2653484 100644
--- a/drivers/lightnvm/rrpc.h
+++ b/drivers/lightnvm/rrpc.h

@@ -97,6 +97,7 @@
 	struct nvm_dev *dev;
 	struct gendisk *disk;
 
+	sector_t soffset; /* logical sector offset */
 	u64 poffset; /* physical page offset */
 	int lun_offset;
 

diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig
index 0971984..5305923 100644
--- a/drivers/mailbox/Kconfig
+++ b/drivers/mailbox/Kconfig

@@ -87,6 +87,17 @@
 	  Mailbox implementation for STMicroelectonics family chips with
 	  hardware for interprocessor communication.
 
+config TI_MESSAGE_MANAGER
+	tristate "Texas Instruments Message Manager Driver"
+	depends on ARCH_KEYSTONE
+	help
+	  An implementation of Message Manager slave driver for Keystone
+	  architecture SoCs from Texas Instruments. Message Manager is a
+	  communication entity found on few of Texas Instrument's keystone
+	  architecture SoCs. These may be used for communication between
+	  multiple processors within the SoC. Select this driver if your
+	  platform has support for the hardware block.
+
 config HI6220_MBOX
 	tristate "Hi6220 Mailbox"
 	depends on ARCH_HISI

diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index 9486717..0be3e74 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile

@@ -20,6 +20,8 @@
 
 obj-$(CONFIG_STI_MBOX)		+= mailbox-sti.o
 
+obj-$(CONFIG_TI_MESSAGE_MANAGER) += ti-msgmgr.o
+
 obj-$(CONFIG_XGENE_SLIMPRO_MBOX) += mailbox-xgene-slimpro.o
 
 obj-$(CONFIG_HI6220_MBOX)	+= hi6220-mailbox.o

diff --git a/drivers/mailbox/ti-msgmgr.c b/drivers/mailbox/ti-msgmgr.c
new file mode 100644
index 0000000..54b9e4c
--- /dev/null
+++ b/drivers/mailbox/ti-msgmgr.c

@@ -0,0 +1,639 @@
+/*
+ * Texas Instruments' Message Manager Driver
+ *
+ * Copyright (C) 2015-2016 Texas Instruments Incorporated - http://www.ti.com/
+ *	Nishanth Menon
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mailbox_controller.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/soc/ti/ti-msgmgr.h>
+
+#define Q_DATA_OFFSET(proxy, queue, reg)	\
+		     ((0x10000 * (proxy)) + (0x80 * (queue)) + ((reg) * 4))
+#define Q_STATE_OFFSET(queue)			((queue) * 0x4)
+#define Q_STATE_ENTRY_COUNT_MASK		(0xFFF000)
+
+/**
+ * struct ti_msgmgr_valid_queue_desc - SoC valid queues meant for this processor
+ * @queue_id:	Queue Number for this path
+ * @proxy_id:	Proxy ID representing the processor in SoC
+ * @is_tx:	Is this a receive path?
+ */
+struct ti_msgmgr_valid_queue_desc {
+	u8 queue_id;
+	u8 proxy_id;
+	bool is_tx;
+};
+
+/**
+ * struct ti_msgmgr_desc - Description of message manager integration
+ * @queue_count:	Number of Queues
+ * @max_message_size:	Message size in bytes
+ * @max_messages:	Number of messages
+ * @q_slices:		Number of queue engines
+ * @q_proxies:		Number of queue proxies per page
+ * @data_first_reg:	First data register for proxy data region
+ * @data_last_reg:	Last data register for proxy data region
+ * @tx_polled:		Do I need to use polled mechanism for tx
+ * @tx_poll_timeout_ms: Timeout in ms if polled
+ * @valid_queues:	List of Valid queues that the processor can access
+ * @num_valid_queues:	Number of valid queues
+ *
+ * This structure is used in of match data to describe how integration
+ * for a specific compatible SoC is done.
+ */
+struct ti_msgmgr_desc {
+	u8 queue_count;
+	u8 max_message_size;
+	u8 max_messages;
+	u8 q_slices;
+	u8 q_proxies;
+	u8 data_first_reg;
+	u8 data_last_reg;
+	bool tx_polled;
+	int tx_poll_timeout_ms;
+	const struct ti_msgmgr_valid_queue_desc *valid_queues;
+	int num_valid_queues;
+};
+
+/**
+ * struct ti_queue_inst - Description of a queue instance
+ * @name:	Queue Name
+ * @queue_id:	Queue Identifier as mapped on SoC
+ * @proxy_id:	Proxy Identifier as mapped on SoC
+ * @irq:	IRQ for Rx Queue
+ * @is_tx:	'true' if transmit queue, else, 'false'
+ * @queue_buff_start: First register of Data Buffer
+ * @queue_buff_end: Last (or confirmation) register of Data buffer
+ * @queue_state: Queue status register
+ * @chan:	Mailbox channel
+ * @rx_buff:	Receive buffer pointer allocated at probe, max_message_size
+ */
+struct ti_queue_inst {
+	char name[30];
+	u8 queue_id;
+	u8 proxy_id;
+	int irq;
+	bool is_tx;
+	void __iomem *queue_buff_start;
+	void __iomem *queue_buff_end;
+	void __iomem *queue_state;
+	struct mbox_chan *chan;
+	u32 *rx_buff;
+};
+
+/**
+ * struct ti_msgmgr_inst - Description of a Message Manager Instance
+ * @dev:	device pointer corresponding to the Message Manager instance
+ * @desc:	Description of the SoC integration
+ * @queue_proxy_region:	Queue proxy region where queue buffers are located
+ * @queue_state_debug_region:	Queue status register regions
+ * @num_valid_queues:	Number of valid queues defined for the processor
+ *		Note: other queues are probably reserved for other processors
+ *		in the SoC.
+ * @qinsts:	Array of valid Queue Instances for the Processor
+ * @mbox:	Mailbox Controller
+ * @chans:	Array for channels corresponding to the Queue Instances.
+ */
+struct ti_msgmgr_inst {
+	struct device *dev;
+	const struct ti_msgmgr_desc *desc;
+	void __iomem *queue_proxy_region;
+	void __iomem *queue_state_debug_region;
+	u8 num_valid_queues;
+	struct ti_queue_inst *qinsts;
+	struct mbox_controller mbox;
+	struct mbox_chan *chans;
+};
+
+/**
+ * ti_msgmgr_queue_get_num_messages() - Get the number of pending messages
+ * @qinst:	Queue instance for which we check the number of pending messages
+ *
+ * Return: number of messages pending in the queue (0 == no pending messages)
+ */
+static inline int ti_msgmgr_queue_get_num_messages(struct ti_queue_inst *qinst)
+{
+	u32 val;
+
+	/*
+	 * We cannot use relaxed operation here - update may happen
+	 * real-time.
+	 */
+	val = readl(qinst->queue_state) & Q_STATE_ENTRY_COUNT_MASK;
+	val >>= __ffs(Q_STATE_ENTRY_COUNT_MASK);
+
+	return val;
+}
+
+/**
+ * ti_msgmgr_queue_rx_interrupt() - Interrupt handler for receive Queue
+ * @irq:	Interrupt number
+ * @p:		Channel Pointer
+ *
+ * Return: -EINVAL if there is no instance
+ * IRQ_NONE if the interrupt is not ours.
+ * IRQ_HANDLED if the rx interrupt was successfully handled.
+ */
+static irqreturn_t ti_msgmgr_queue_rx_interrupt(int irq, void *p)
+{
+	struct mbox_chan *chan = p;
+	struct device *dev = chan->mbox->dev;
+	struct ti_msgmgr_inst *inst = dev_get_drvdata(dev);
+	struct ti_queue_inst *qinst = chan->con_priv;
+	const struct ti_msgmgr_desc *desc;
+	int msg_count, num_words;
+	struct ti_msgmgr_message message;
+	void __iomem *data_reg;
+	u32 *word_data;
+
+	if (WARN_ON(!inst)) {
+		dev_err(dev, "no platform drv data??\n");
+		return -EINVAL;
+	}
+
+	/* Do I have an invalid interrupt source? */
+	if (qinst->is_tx) {
+		dev_err(dev, "Cannot handle rx interrupt on tx channel %s\n",
+			qinst->name);
+		return IRQ_NONE;
+	}
+
+	/* Do I actually have messages to read? */
+	msg_count = ti_msgmgr_queue_get_num_messages(qinst);
+	if (!msg_count) {
+		/* Shared IRQ? */
+		dev_dbg(dev, "Spurious event - 0 pending data!\n");
+		return IRQ_NONE;
+	}
+
+	/*
+	 * I have no idea about the protocol being used to communicate with the
+	 * remote producer - 0 could be valid data, so I wont make a judgement
+	 * of how many bytes I should be reading. Let the client figure this
+	 * out.. I just read the full message and pass it on..
+	 */
+	desc = inst->desc;
+	message.len = desc->max_message_size;
+	message.buf = (u8 *)qinst->rx_buff;
+
+	/*
+	 * NOTE about register access involved here:
+	 * the hardware block is implemented with 32bit access operations and no
+	 * support for data splitting.  We don't want the hardware to misbehave
+	 * with sub 32bit access - For example: if the last register read is
+	 * split into byte wise access, it can result in the queue getting
+	 * stuck or indeterminate behavior. An out of order read operation may
+	 * result in weird data results as well.
+	 * Hence, we do not use memcpy_fromio or __ioread32_copy here, instead
+	 * we depend on readl for the purpose.
+	 *
+	 * Also note that the final register read automatically marks the
+	 * queue message as read.
+	 */
+	for (data_reg = qinst->queue_buff_start, word_data = qinst->rx_buff,
+	     num_words = (desc->max_message_size / sizeof(u32));
+	     num_words; num_words--, data_reg += sizeof(u32), word_data++)
+		*word_data = readl(data_reg);
+
+	/*
+	 * Last register read automatically clears the IRQ if only 1 message
+	 * is pending - so send the data up the stack..
+	 * NOTE: Client is expected to be as optimal as possible, since
+	 * we invoke the handler in IRQ context.
+	 */
+	mbox_chan_received_data(chan, (void *)&message);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ti_msgmgr_queue_peek_data() - Peek to see if there are any rx messages.
+ * @chan:	Channel Pointer
+ *
+ * Return: 'true' if there is pending rx data, 'false' if there is none.
+ */
+static bool ti_msgmgr_queue_peek_data(struct mbox_chan *chan)
+{
+	struct ti_queue_inst *qinst = chan->con_priv;
+	int msg_count;
+
+	if (qinst->is_tx)
+		return false;
+
+	msg_count = ti_msgmgr_queue_get_num_messages(qinst);
+
+	return msg_count ? true : false;
+}
+
+/**
+ * ti_msgmgr_last_tx_done() - See if all the tx messages are sent
+ * @chan:	Channel pointer
+ *
+ * Return: 'true' is no pending tx data, 'false' if there are any.
+ */
+static bool ti_msgmgr_last_tx_done(struct mbox_chan *chan)
+{
+	struct ti_queue_inst *qinst = chan->con_priv;
+	int msg_count;
+
+	if (!qinst->is_tx)
+		return false;
+
+	msg_count = ti_msgmgr_queue_get_num_messages(qinst);
+
+	/* if we have any messages pending.. */
+	return msg_count ? false : true;
+}
+
+/**
+ * ti_msgmgr_send_data() - Send data
+ * @chan:	Channel Pointer
+ * @data:	ti_msgmgr_message * Message Pointer
+ *
+ * Return: 0 if all goes good, else appropriate error messages.
+ */
+static int ti_msgmgr_send_data(struct mbox_chan *chan, void *data)
+{
+	struct device *dev = chan->mbox->dev;
+	struct ti_msgmgr_inst *inst = dev_get_drvdata(dev);
+	const struct ti_msgmgr_desc *desc;
+	struct ti_queue_inst *qinst = chan->con_priv;
+	int num_words, trail_bytes;
+	struct ti_msgmgr_message *message = data;
+	void __iomem *data_reg;
+	u32 *word_data;
+
+	if (WARN_ON(!inst)) {
+		dev_err(dev, "no platform drv data??\n");
+		return -EINVAL;
+	}
+	desc = inst->desc;
+
+	if (desc->max_message_size < message->len) {
+		dev_err(dev, "Queue %s message length %d > max %d\n",
+			qinst->name, message->len, desc->max_message_size);
+		return -EINVAL;
+	}
+
+	/* NOTE: Constraints similar to rx path exists here as well */
+	for (data_reg = qinst->queue_buff_start,
+	     num_words = message->len / sizeof(u32),
+	     word_data = (u32 *)message->buf;
+	     num_words; num_words--, data_reg += sizeof(u32), word_data++)
+		writel(*word_data, data_reg);
+
+	trail_bytes = message->len % sizeof(u32);
+	if (trail_bytes) {
+		u32 data_trail = *word_data;
+
+		/* Ensure all unused data is 0 */
+		data_trail &= 0xFFFFFFFF >> (8 * (sizeof(u32) - trail_bytes));
+		writel(data_trail, data_reg);
+		data_reg++;
+	}
+	/*
+	 * 'data_reg' indicates next register to write. If we did not already
+	 * write on tx complete reg(last reg), we must do so for transmit
+	 */
+	if (data_reg <= qinst->queue_buff_end)
+		writel(0, qinst->queue_buff_end);
+
+	return 0;
+}
+
+/**
+ * ti_msgmgr_queue_startup() - Startup queue
+ * @chan:	Channel pointer
+ *
+ * Return: 0 if all goes good, else return corresponding error message
+ */
+static int ti_msgmgr_queue_startup(struct mbox_chan *chan)
+{
+	struct ti_queue_inst *qinst = chan->con_priv;
+	struct device *dev = chan->mbox->dev;
+	int ret;
+
+	if (!qinst->is_tx) {
+		/*
+		 * With the expectation that the IRQ might be shared in SoC
+		 */
+		ret = request_irq(qinst->irq, ti_msgmgr_queue_rx_interrupt,
+				  IRQF_SHARED, qinst->name, chan);
+		if (ret) {
+			dev_err(dev, "Unable to get IRQ %d on %s(res=%d)\n",
+				qinst->irq, qinst->name, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ti_msgmgr_queue_shutdown() - Shutdown the queue
+ * @chan:	Channel pointer
+ */
+static void ti_msgmgr_queue_shutdown(struct mbox_chan *chan)
+{
+	struct ti_queue_inst *qinst = chan->con_priv;
+
+	if (!qinst->is_tx)
+		free_irq(qinst->irq, chan);
+}
+
+/**
+ * ti_msgmgr_of_xlate() - Translation of phandle to queue
+ * @mbox:	Mailbox controller
+ * @p:		phandle pointer
+ *
+ * Return: Mailbox channel corresponding to the queue, else return error
+ * pointer.
+ */
+static struct mbox_chan *ti_msgmgr_of_xlate(struct mbox_controller *mbox,
+					    const struct of_phandle_args *p)
+{
+	struct ti_msgmgr_inst *inst;
+	int req_qid, req_pid;
+	struct ti_queue_inst *qinst;
+	int i;
+
+	inst = container_of(mbox, struct ti_msgmgr_inst, mbox);
+	if (WARN_ON(!inst))
+		return ERR_PTR(-EINVAL);
+
+	/* #mbox-cells is 2 */
+	if (p->args_count != 2) {
+		dev_err(inst->dev, "Invalid arguments in dt[%d] instead of 2\n",
+			p->args_count);
+		return ERR_PTR(-EINVAL);
+	}
+	req_qid = p->args[0];
+	req_pid = p->args[1];
+
+	for (qinst = inst->qinsts, i = 0; i < inst->num_valid_queues;
+	     i++, qinst++) {
+		if (req_qid == qinst->queue_id && req_pid == qinst->proxy_id)
+			return qinst->chan;
+	}
+
+	dev_err(inst->dev, "Queue ID %d, Proxy ID %d is wrong on %s\n",
+		req_qid, req_pid, p->np->name);
+	return ERR_PTR(-ENOENT);
+}
+
+/**
+ * ti_msgmgr_queue_setup() - Setup data structures for each queue instance
+ * @idx:	index of the queue
+ * @dev:	pointer to the message manager device
+ * @np:		pointer to the of node
+ * @inst:	Queue instance pointer
+ * @d:		Message Manager instance description data
+ * @qd:		Queue description data
+ * @qinst:	Queue instance pointer
+ * @chan:	pointer to mailbox channel
+ *
+ * Return: 0 if all went well, else return corresponding error
+ */
+static int ti_msgmgr_queue_setup(int idx, struct device *dev,
+				 struct device_node *np,
+				 struct ti_msgmgr_inst *inst,
+				 const struct ti_msgmgr_desc *d,
+				 const struct ti_msgmgr_valid_queue_desc *qd,
+				 struct ti_queue_inst *qinst,
+				 struct mbox_chan *chan)
+{
+	qinst->proxy_id = qd->proxy_id;
+	qinst->queue_id = qd->queue_id;
+
+	if (qinst->queue_id > d->queue_count) {
+		dev_err(dev, "Queue Data [idx=%d] queuid %d > %d\n",
+			idx, qinst->queue_id, d->queue_count);
+		return -ERANGE;
+	}
+
+	qinst->is_tx = qd->is_tx;
+	snprintf(qinst->name, sizeof(qinst->name), "%s %s_%03d_%03d",
+		 dev_name(dev), qinst->is_tx ? "tx" : "rx", qinst->queue_id,
+		 qinst->proxy_id);
+
+	if (!qinst->is_tx) {
+		char of_rx_irq_name[7];
+
+		snprintf(of_rx_irq_name, sizeof(of_rx_irq_name),
+			 "rx_%03d", qinst->queue_id);
+
+		qinst->irq = of_irq_get_byname(np, of_rx_irq_name);
+		if (qinst->irq < 0) {
+			dev_crit(dev,
+				 "[%d]QID %d PID %d:No IRQ[%s]: %d\n",
+				 idx, qinst->queue_id, qinst->proxy_id,
+				 of_rx_irq_name, qinst->irq);
+			return qinst->irq;
+		}
+		/* Allocate usage buffer for rx */
+		qinst->rx_buff = devm_kzalloc(dev,
+					      d->max_message_size, GFP_KERNEL);
+		if (!qinst->rx_buff)
+			return -ENOMEM;
+	}
+
+	qinst->queue_buff_start = inst->queue_proxy_region +
+	    Q_DATA_OFFSET(qinst->proxy_id, qinst->queue_id, d->data_first_reg);
+	qinst->queue_buff_end = inst->queue_proxy_region +
+	    Q_DATA_OFFSET(qinst->proxy_id, qinst->queue_id, d->data_last_reg);
+	qinst->queue_state = inst->queue_state_debug_region +
+	    Q_STATE_OFFSET(qinst->queue_id);
+	qinst->chan = chan;
+
+	chan->con_priv = qinst;
+
+	dev_dbg(dev, "[%d] qidx=%d pidx=%d irq=%d q_s=%p q_e = %p\n",
+		idx, qinst->queue_id, qinst->proxy_id, qinst->irq,
+		qinst->queue_buff_start, qinst->queue_buff_end);
+	return 0;
+}
+
+/* Queue operations */
+static const struct mbox_chan_ops ti_msgmgr_chan_ops = {
+	.startup = ti_msgmgr_queue_startup,
+	.shutdown = ti_msgmgr_queue_shutdown,
+	.peek_data = ti_msgmgr_queue_peek_data,
+	.last_tx_done = ti_msgmgr_last_tx_done,
+	.send_data = ti_msgmgr_send_data,
+};
+
+/* Keystone K2G SoC integration details */
+static const struct ti_msgmgr_valid_queue_desc k2g_valid_queues[] = {
+	{.queue_id = 0, .proxy_id = 0, .is_tx = true,},
+	{.queue_id = 1, .proxy_id = 0, .is_tx = true,},
+	{.queue_id = 2, .proxy_id = 0, .is_tx = true,},
+	{.queue_id = 3, .proxy_id = 0, .is_tx = true,},
+	{.queue_id = 5, .proxy_id = 2, .is_tx = false,},
+	{.queue_id = 56, .proxy_id = 1, .is_tx = true,},
+	{.queue_id = 57, .proxy_id = 2, .is_tx = false,},
+	{.queue_id = 58, .proxy_id = 3, .is_tx = true,},
+	{.queue_id = 59, .proxy_id = 4, .is_tx = true,},
+	{.queue_id = 60, .proxy_id = 5, .is_tx = true,},
+	{.queue_id = 61, .proxy_id = 6, .is_tx = true,},
+};
+
+static const struct ti_msgmgr_desc k2g_desc = {
+	.queue_count = 64,
+	.max_message_size = 64,
+	.max_messages = 128,
+	.q_slices = 1,
+	.q_proxies = 1,
+	.data_first_reg = 16,
+	.data_last_reg = 31,
+	.tx_polled = false,
+	.valid_queues = k2g_valid_queues,
+	.num_valid_queues = ARRAY_SIZE(k2g_valid_queues),
+};
+
+static const struct of_device_id ti_msgmgr_of_match[] = {
+	{.compatible = "ti,k2g-message-manager", .data = &k2g_desc},
+	{ /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, ti_msgmgr_of_match);
+
+static int ti_msgmgr_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	const struct of_device_id *of_id;
+	struct device_node *np;
+	struct resource *res;
+	const struct ti_msgmgr_desc *desc;
+	struct ti_msgmgr_inst *inst;
+	struct ti_queue_inst *qinst;
+	struct mbox_controller *mbox;
+	struct mbox_chan *chans;
+	int queue_count;
+	int i;
+	int ret = -EINVAL;
+	const struct ti_msgmgr_valid_queue_desc *queue_desc;
+
+	if (!dev->of_node) {
+		dev_err(dev, "no OF information\n");
+		return -EINVAL;
+	}
+	np = dev->of_node;
+
+	of_id = of_match_device(ti_msgmgr_of_match, dev);
+	if (!of_id) {
+		dev_err(dev, "OF data missing\n");
+		return -EINVAL;
+	}
+	desc = of_id->data;
+
+	inst = devm_kzalloc(dev, sizeof(*inst), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+
+	inst->dev = dev;
+	inst->desc = desc;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					   "queue_proxy_region");
+	inst->queue_proxy_region = devm_ioremap_resource(dev, res);
+	if (IS_ERR(inst->queue_proxy_region))
+		return PTR_ERR(inst->queue_proxy_region);
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					   "queue_state_debug_region");
+	inst->queue_state_debug_region = devm_ioremap_resource(dev, res);
+	if (IS_ERR(inst->queue_state_debug_region))
+		return PTR_ERR(inst->queue_state_debug_region);
+
+	dev_dbg(dev, "proxy region=%p, queue_state=%p\n",
+		inst->queue_proxy_region, inst->queue_state_debug_region);
+
+	queue_count = desc->num_valid_queues;
+	if (!queue_count || queue_count > desc->queue_count) {
+		dev_crit(dev, "Invalid Number of queues %d. Max %d\n",
+			 queue_count, desc->queue_count);
+		return -ERANGE;
+	}
+	inst->num_valid_queues = queue_count;
+
+	qinst = devm_kzalloc(dev, sizeof(*qinst) * queue_count, GFP_KERNEL);
+	if (!qinst)
+		return -ENOMEM;
+	inst->qinsts = qinst;
+
+	chans = devm_kzalloc(dev, sizeof(*chans) * queue_count, GFP_KERNEL);
+	if (!chans)
+		return -ENOMEM;
+	inst->chans = chans;
+
+	for (i = 0, queue_desc = desc->valid_queues;
+	     i < queue_count; i++, qinst++, chans++, queue_desc++) {
+		ret = ti_msgmgr_queue_setup(i, dev, np, inst,
+					    desc, queue_desc, qinst, chans);
+		if (ret)
+			return ret;
+	}
+
+	mbox = &inst->mbox;
+	mbox->dev = dev;
+	mbox->ops = &ti_msgmgr_chan_ops;
+	mbox->chans = inst->chans;
+	mbox->num_chans = inst->num_valid_queues;
+	mbox->txdone_irq = false;
+	mbox->txdone_poll = desc->tx_polled;
+	if (desc->tx_polled)
+		mbox->txpoll_period = desc->tx_poll_timeout_ms;
+	mbox->of_xlate = ti_msgmgr_of_xlate;
+
+	platform_set_drvdata(pdev, inst);
+	ret = mbox_controller_register(mbox);
+	if (ret)
+		dev_err(dev, "Failed to register mbox_controller(%d)\n", ret);
+
+	return ret;
+}
+
+static int ti_msgmgr_remove(struct platform_device *pdev)
+{
+	struct ti_msgmgr_inst *inst;
+
+	inst = platform_get_drvdata(pdev);
+	mbox_controller_unregister(&inst->mbox);
+
+	return 0;
+}
+
+static struct platform_driver ti_msgmgr_driver = {
+	.probe = ti_msgmgr_probe,
+	.remove = ti_msgmgr_remove,
+	.driver = {
+		   .name = "ti-msgmgr",
+		   .of_match_table = of_match_ptr(ti_msgmgr_of_match),
+	},
+};
+module_platform_driver(ti_msgmgr_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("TI message manager driver");
+MODULE_AUTHOR("Nishanth Menon");
+MODULE_ALIAS("platform:ti-msgmgr");

diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 7df6b4f..bef7175 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c

@@ -322,7 +322,7 @@
 {
 	ClearPagePrivate(page);
 	set_page_private(page, 0);
-	page_cache_release(page);
+	put_page(page);
 }
 static void free_buffers(struct page *page)
 {

diff --git a/drivers/media/usb/au0828/au0828-cards.c b/drivers/media/usb/au0828/au0828-cards.c
index ca861ae..6b469e8 100644
--- a/drivers/media/usb/au0828/au0828-cards.c
+++ b/drivers/media/usb/au0828/au0828-cards.c

@@ -228,10 +228,6 @@
 				"au8522", 0x8e >> 1, NULL);
 		if (sd == NULL)
 			pr_err("analog subdev registration failed\n");
-#ifdef CONFIG_MEDIA_CONTROLLER
-		if (sd)
-			dev->decoder = &sd->entity;
-#endif
 	}
 
 	/* Setup tuners */

diff --git a/drivers/media/usb/au0828/au0828-core.c b/drivers/media/usb/au0828/au0828-core.c
index 5dc82e8..cc22b32 100644
--- a/drivers/media/usb/au0828/au0828-core.c
+++ b/drivers/media/usb/au0828/au0828-core.c

@@ -137,8 +137,14 @@
 #ifdef CONFIG_MEDIA_CONTROLLER
 	if (dev->media_dev &&
 		media_devnode_is_registered(&dev->media_dev->devnode)) {
+		/* clear enable_source, disable_source */
+		dev->media_dev->source_priv = NULL;
+		dev->media_dev->enable_source = NULL;
+		dev->media_dev->disable_source = NULL;
+
 		media_device_unregister(dev->media_dev);
 		media_device_cleanup(dev->media_dev);
+		kfree(dev->media_dev);
 		dev->media_dev = NULL;
 	}
 #endif
@@ -166,7 +172,7 @@
 	   Set the status so poll routines can check and avoid
 	   access after disconnect.
 	*/
-	dev->dev_state = DEV_DISCONNECTED;
+	set_bit(DEV_DISCONNECTED, &dev->dev_state);
 
 	au0828_rc_unregister(dev);
 	/* Digital TV */
@@ -192,7 +198,7 @@
 #ifdef CONFIG_MEDIA_CONTROLLER
 	struct media_device *mdev;
 
-	mdev = media_device_get_devres(&udev->dev);
+	mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
 	if (!mdev)
 		return -ENOMEM;
 
@@ -456,7 +462,8 @@
 {
 #ifdef CONFIG_MEDIA_CONTROLLER
 	int ret;
-	struct media_entity *entity, *demod = NULL, *tuner = NULL;
+	struct media_entity *entity, *demod = NULL;
+	struct media_link *link;
 
 	if (!dev->media_dev)
 		return 0;
@@ -482,26 +489,37 @@
 	}
 
 	/*
-	 * Find tuner and demod to disable the link between
-	 * the two to avoid disable step when tuner is requested
-	 * by video or audio. Note that this step can't be done
-	 * until dvb graph is created during dvb register.
+	 * Find tuner, decoder and demod.
+	 *
+	 * The tuner and decoder should be cached, as they'll be used by
+	 *	au0828_enable_source.
+	 *
+	 * It also needs to disable the link between tuner and
+	 * decoder/demod, to avoid disable step when tuner is requested
+	 * by video or audio. Note that this step can't be done until dvb
+	 * graph is created during dvb register.
 	*/
 	media_device_for_each_entity(entity, dev->media_dev) {
-		if (entity->function == MEDIA_ENT_F_DTV_DEMOD)
+		switch (entity->function) {
+		case MEDIA_ENT_F_TUNER:
+			dev->tuner = entity;
+			break;
+		case MEDIA_ENT_F_ATV_DECODER:
+			dev->decoder = entity;
+			break;
+		case MEDIA_ENT_F_DTV_DEMOD:
 			demod = entity;
-		else if (entity->function == MEDIA_ENT_F_TUNER)
-			tuner = entity;
+			break;
+		}
 	}
-	/* Disable link between tuner and demod */
-	if (tuner && demod) {
-		struct media_link *link;
 
-		list_for_each_entry(link, &demod->links, list) {
-			if (link->sink->entity == demod &&
-			    link->source->entity == tuner) {
+	/* Disable link between tuner->demod and/or tuner->decoder */
+	if (dev->tuner) {
+		list_for_each_entry(link, &dev->tuner->links, list) {
+			if (demod && link->sink->entity == demod)
 				media_entity_setup_link(link, 0);
-			}
+			if (dev->decoder && link->sink->entity == dev->decoder)
+				media_entity_setup_link(link, 0);
 		}
 	}
 

diff --git a/drivers/media/usb/au0828/au0828-input.c b/drivers/media/usb/au0828/au0828-input.c
index b0f0679..3d6687f 100644
--- a/drivers/media/usb/au0828/au0828-input.c
+++ b/drivers/media/usb/au0828/au0828-input.c

@@ -130,7 +130,7 @@
 	bool first = true;
 
 	/* do nothing if device is disconnected */
-	if (ir->dev->dev_state == DEV_DISCONNECTED)
+	if (test_bit(DEV_DISCONNECTED, &ir->dev->dev_state))
 		return 0;
 
 	/* Check IR int */
@@ -260,7 +260,7 @@
 	cancel_delayed_work_sync(&ir->work);
 
 	/* do nothing if device is disconnected */
-	if (ir->dev->dev_state != DEV_DISCONNECTED) {
+	if (!test_bit(DEV_DISCONNECTED, &ir->dev->dev_state)) {
 		/* Disable IR */
 		au8522_rc_clear(ir, 0xe0, 1 << 4);
 	}

diff --git a/drivers/media/usb/au0828/au0828-video.c b/drivers/media/usb/au0828/au0828-video.c
index 13f6dab..32d7db9 100644
--- a/drivers/media/usb/au0828/au0828-video.c
+++ b/drivers/media/usb/au0828/au0828-video.c

@@ -106,14 +106,13 @@
 
 static int check_dev(struct au0828_dev *dev)
 {
-	if (dev->dev_state & DEV_DISCONNECTED) {
+	if (test_bit(DEV_DISCONNECTED, &dev->dev_state)) {
 		pr_info("v4l2 ioctl: device not present\n");
 		return -ENODEV;
 	}
 
-	if (dev->dev_state & DEV_MISCONFIGURED) {
-		pr_info("v4l2 ioctl: device is misconfigured; "
-		       "close and open it again\n");
+	if (test_bit(DEV_MISCONFIGURED, &dev->dev_state)) {
+		pr_info("v4l2 ioctl: device is misconfigured; close and open it again\n");
 		return -EIO;
 	}
 	return 0;
@@ -521,8 +520,8 @@
 	if (!dev)
 		return 0;
 
-	if ((dev->dev_state & DEV_DISCONNECTED) ||
-	    (dev->dev_state & DEV_MISCONFIGURED))
+	if (test_bit(DEV_DISCONNECTED, &dev->dev_state) ||
+	    test_bit(DEV_MISCONFIGURED, &dev->dev_state))
 		return 0;
 
 	if (urb->status < 0) {
@@ -824,10 +823,10 @@
 	int ret = 0;
 
 	dev->stream_state = STREAM_INTERRUPT;
-	if (dev->dev_state == DEV_DISCONNECTED)
+	if (test_bit(DEV_DISCONNECTED, &dev->dev_state))
 		return -ENODEV;
 	else if (ret) {
-		dev->dev_state = DEV_MISCONFIGURED;
+		set_bit(DEV_MISCONFIGURED, &dev->dev_state);
 		dprintk(1, "%s device is misconfigured!\n", __func__);
 		return ret;
 	}
@@ -1026,7 +1025,7 @@
 	int ret;
 
 	dprintk(1,
-		"%s called std_set %d dev_state %d stream users %d users %d\n",
+		"%s called std_set %d dev_state %ld stream users %d users %d\n",
 		__func__, dev->std_set_in_tuner_core, dev->dev_state,
 		dev->streaming_users, dev->users);
 
@@ -1045,7 +1044,7 @@
 		au0828_analog_stream_enable(dev);
 		au0828_analog_stream_reset(dev);
 		dev->stream_state = STREAM_OFF;
-		dev->dev_state |= DEV_INITIALIZED;
+		set_bit(DEV_INITIALIZED, &dev->dev_state);
 	}
 	dev->users++;
 	mutex_unlock(&dev->lock);
@@ -1059,7 +1058,7 @@
 	struct video_device *vdev = video_devdata(filp);
 
 	dprintk(1,
-		"%s called std_set %d dev_state %d stream users %d users %d\n",
+		"%s called std_set %d dev_state %ld stream users %d users %d\n",
 		__func__, dev->std_set_in_tuner_core, dev->dev_state,
 		dev->streaming_users, dev->users);
 
@@ -1075,7 +1074,7 @@
 		del_timer_sync(&dev->vbi_timeout);
 	}
 
-	if (dev->dev_state == DEV_DISCONNECTED)
+	if (test_bit(DEV_DISCONNECTED, &dev->dev_state))
 		goto end;
 
 	if (dev->users == 1) {
@@ -1135,7 +1134,7 @@
 		.type = V4L2_TUNER_ANALOG_TV,
 	};
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	if (dev->std_set_in_tuner_core)
@@ -1207,7 +1206,7 @@
 	struct video_device *vdev = video_devdata(file);
 	struct au0828_dev *dev = video_drvdata(file);
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	strlcpy(cap->driver, "au0828", sizeof(cap->driver));
@@ -1250,7 +1249,7 @@
 {
 	struct au0828_dev *dev = video_drvdata(file);
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	f->fmt.pix.width = dev->width;
@@ -1269,7 +1268,7 @@
 {
 	struct au0828_dev *dev = video_drvdata(file);
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	return au0828_set_format(dev, VIDIOC_TRY_FMT, f);
@@ -1281,7 +1280,7 @@
 	struct au0828_dev *dev = video_drvdata(file);
 	int rc;
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	rc = check_dev(dev);
@@ -1303,7 +1302,7 @@
 {
 	struct au0828_dev *dev = video_drvdata(file);
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	if (norm == dev->std)
@@ -1335,7 +1334,7 @@
 {
 	struct au0828_dev *dev = video_drvdata(file);
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	*norm = dev->std;
@@ -1357,7 +1356,7 @@
 		[AU0828_VMUX_DVB] = "DVB",
 	};
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	tmp = input->index;
@@ -1387,7 +1386,7 @@
 {
 	struct au0828_dev *dev = video_drvdata(file);
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	*i = dev->ctrl_input;
@@ -1398,7 +1397,7 @@
 {
 	int i;
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	switch (AUVI_INPUT(index).type) {
@@ -1496,7 +1495,7 @@
 {
 	struct au0828_dev *dev = video_drvdata(file);
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	a->index = dev->ctrl_ainput;
@@ -1516,7 +1515,7 @@
 	if (a->index != dev->ctrl_ainput)
 		return -EINVAL;
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 	return 0;
 }
@@ -1534,7 +1533,7 @@
 	if (ret)
 		return ret;
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	strcpy(t->name, "Auvitek tuner");
@@ -1554,7 +1553,7 @@
 	if (t->index != 0)
 		return -EINVAL;
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	au0828_init_tuner(dev);
@@ -1576,7 +1575,7 @@
 
 	if (freq->tuner != 0)
 		return -EINVAL;
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 	freq->frequency = dev->ctrl_freq;
 	return 0;
@@ -1591,7 +1590,7 @@
 	if (freq->tuner != 0)
 		return -EINVAL;
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	au0828_init_tuner(dev);
@@ -1617,7 +1616,7 @@
 {
 	struct au0828_dev *dev = video_drvdata(file);
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	format->fmt.vbi.samples_per_line = dev->vbi_width;
@@ -1643,7 +1642,7 @@
 	if (cc->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		return -EINVAL;
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	cc->bounds.left = 0;
@@ -1665,7 +1664,7 @@
 {
 	struct au0828_dev *dev = video_drvdata(file);
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	reg->val = au0828_read(dev, reg->reg);
@@ -1678,7 +1677,7 @@
 {
 	struct au0828_dev *dev = video_drvdata(file);
 
-	dprintk(1, "%s called std_set %d dev_state %d\n", __func__,
+	dprintk(1, "%s called std_set %d dev_state %ld\n", __func__,
 		dev->std_set_in_tuner_core, dev->dev_state);
 
 	return au0828_writereg(dev, reg->reg, reg->val);

diff --git a/drivers/media/usb/au0828/au0828.h b/drivers/media/usb/au0828/au0828.h
index ff7f851..87f3284 100644
--- a/drivers/media/usb/au0828/au0828.h
+++ b/drivers/media/usb/au0828/au0828.h

@@ -21,6 +21,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/bitops.h>
 #include <linux/usb.h>
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
@@ -121,9 +122,9 @@
 
 /* device state */
 enum au0828_dev_state {
-	DEV_INITIALIZED = 0x01,
-	DEV_DISCONNECTED = 0x02,
-	DEV_MISCONFIGURED = 0x04
+	DEV_INITIALIZED = 0,
+	DEV_DISCONNECTED = 1,
+	DEV_MISCONFIGURED = 2
 };
 
 struct au0828_dev;
@@ -247,7 +248,7 @@
 	int input_type;
 	int std_set_in_tuner_core;
 	unsigned int ctrl_input;
-	enum au0828_dev_state dev_state;
+	long unsigned int dev_state; /* defined at enum au0828_dev_state */;
 	enum au0828_stream_state stream_state;
 	wait_queue_head_t open;
 

diff --git a/drivers/media/v4l2-core/v4l2-mc.c b/drivers/media/v4l2-core/v4l2-mc.c
index 2a7b79b..2228cd3 100644
--- a/drivers/media/v4l2-core/v4l2-mc.c
+++ b/drivers/media/v4l2-core/v4l2-mc.c

@@ -34,7 +34,7 @@
 {
 	struct media_entity *entity;
 	struct media_entity *if_vid = NULL, *if_aud = NULL;
-	struct media_entity *tuner = NULL, *decoder = NULL, *dtv_demod = NULL;
+	struct media_entity *tuner = NULL, *decoder = NULL;
 	struct media_entity *io_v4l = NULL, *io_vbi = NULL, *io_swradio = NULL;
 	bool is_webcam = false;
 	u32 flags;

diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c
index df4c052c..f300f06 100644
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf-dma-sg.c

@@ -349,7 +349,7 @@
 
 	if (dma->pages) {
 		for (i = 0; i < dma->nr_pages; i++)
-			page_cache_release(dma->pages[i]);
+			put_page(dma->pages[i]);
 		kfree(dma->pages);
 		dma->pages = NULL;
 	}

diff --git a/drivers/memory/fsl_ifc.c b/drivers/memory/fsl_ifc.c
index acd1460..2a691da 100644
--- a/drivers/memory/fsl_ifc.c
+++ b/drivers/memory/fsl_ifc.c

@@ -260,7 +260,7 @@
 
 	/* get the Controller level irq */
 	fsl_ifc_ctrl_dev->irq = irq_of_parse_and_map(dev->dev.of_node, 0);
-	if (fsl_ifc_ctrl_dev->irq == NO_IRQ) {
+	if (fsl_ifc_ctrl_dev->irq == 0) {
 		dev_err(&dev->dev, "failed to get irq resource "
 							"for IFC\n");
 		ret = -ENODEV;

diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c
index ef09ba0..d5cfb50 100644
--- a/drivers/memstick/host/r592.c
+++ b/drivers/memstick/host/r592.c

@@ -298,8 +298,7 @@
 	sg_count = dma_map_sg(&dev->pci_dev->dev, &dev->req->sg, 1, is_write ?
 		PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
 
-	if (sg_count != 1 ||
-			(sg_dma_len(&dev->req->sg) < dev->req->sg.length)) {
+	if (sg_count != 1 || sg_dma_len(&dev->req->sg) < R592_LFIFO_SIZE) {
 		message("problem in dma_map_sg");
 		return -EIO;
 	}

diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
index e8b9331..9c677f3 100644
--- a/drivers/misc/ibmasm/ibmasmfs.c
+++ b/drivers/misc/ibmasm/ibmasmfs.c

@@ -116,8 +116,8 @@
 {
 	struct inode *root;
 
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = IBMASMFS_MAGIC;
 	sb->s_op = &ibmasmfs_s_ops;
 	sb->s_time_gran = 1;

diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index f42d9c4..f84a427 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c

@@ -728,7 +728,7 @@
 		if (dirty)
 			set_page_dirty(pages[i]);
 
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 		pages[i] = NULL;
 	}
 }

diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 1d94607..6e4c55a 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c

@@ -356,11 +356,11 @@
 	 * They have to set these according to their abilities.
 	 */
 	host->max_segs = 1;
-	host->max_seg_size = PAGE_CACHE_SIZE;
+	host->max_seg_size = PAGE_SIZE;
 
-	host->max_req_size = PAGE_CACHE_SIZE;
+	host->max_req_size = PAGE_SIZE;
 	host->max_blk_size = 512;
-	host->max_blk_count = PAGE_CACHE_SIZE / 512;
+	host->max_blk_count = PAGE_SIZE / 512;
 
 	return host;
 }

diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 8d870ce..d9a655f 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c

@@ -1513,7 +1513,7 @@
 		mmc->caps |= pd->caps;
 	mmc->max_segs = 32;
 	mmc->max_blk_size = 512;
-	mmc->max_req_size = PAGE_CACHE_SIZE * mmc->max_segs;
+	mmc->max_req_size = PAGE_SIZE * mmc->max_segs;
 	mmc->max_blk_count = mmc->max_req_size / mmc->max_blk_size;
 	mmc->max_seg_size = mmc->max_req_size;
 

diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c
index 6754358..7fb0c03 100644
--- a/drivers/mmc/host/tmio_mmc_dma.c
+++ b/drivers/mmc/host/tmio_mmc_dma.c

@@ -63,7 +63,7 @@
 		}
 	}
 
-	if ((!aligned && (host->sg_len > 1 || sg->length > PAGE_CACHE_SIZE ||
+	if ((!aligned && (host->sg_len > 1 || sg->length > PAGE_SIZE ||
 			  (align & PAGE_MASK))) || !multiple) {
 		ret = -EINVAL;
 		goto pio;
@@ -133,7 +133,7 @@
 		}
 	}
 
-	if ((!aligned && (host->sg_len > 1 || sg->length > PAGE_CACHE_SIZE ||
+	if ((!aligned && (host->sg_len > 1 || sg->length > PAGE_SIZE ||
 			  (align & PAGE_MASK))) || !multiple) {
 		ret = -EINVAL;
 		goto pio;

diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index 03f6e74..0521b46 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c

@@ -1125,7 +1125,7 @@
 	mmc->caps2 |= pdata->capabilities2;
 	mmc->max_segs = 32;
 	mmc->max_blk_size = 512;
-	mmc->max_blk_count = (PAGE_CACHE_SIZE / mmc->max_blk_size) *
+	mmc->max_blk_count = (PAGE_SIZE / mmc->max_blk_size) *
 		mmc->max_segs;
 	mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
 	mmc->max_seg_size = mmc->max_req_size;

diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c
index b2752fe..807c06e 100644
--- a/drivers/mmc/host/usdhi6rol0.c
+++ b/drivers/mmc/host/usdhi6rol0.c

@@ -1789,7 +1789,7 @@
 	/* Set .max_segs to some random number. Feel free to adjust. */
 	mmc->max_segs = 32;
 	mmc->max_blk_size = 512;
-	mmc->max_req_size = PAGE_CACHE_SIZE * mmc->max_segs;
+	mmc->max_req_size = PAGE_SIZE * mmc->max_segs;
 	mmc->max_blk_count = mmc->max_req_size / mmc->max_blk_size;
 	/*
 	 * Setting .max_seg_size to 1 page would simplify our page-mapping code,

diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 42cc953..e83a279 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig

@@ -142,7 +142,7 @@
 
 config MTD_BCM63XX_PARTS
 	tristate "BCM63XX CFE partitioning support"
-	depends on BCM63XX
+	depends on BCM63XX || BMIPS_GENERIC || COMPILE_TEST
 	select CRC32
 	help
 	  This provides partions parsing for BCM63xx devices with CFE

diff --git a/drivers/mtd/bcm47xxpart.c b/drivers/mtd/bcm47xxpart.c
index 8282f47..845dd27 100644
--- a/drivers/mtd/bcm47xxpart.c
+++ b/drivers/mtd/bcm47xxpart.c

@@ -66,11 +66,13 @@
 {
 	uint32_t buf;
 	size_t bytes_read;
+	int err;
 
-	if (mtd_read(master, offset, sizeof(buf), &bytes_read,
-		     (uint8_t *)&buf) < 0) {
-		pr_err("mtd_read error while parsing (offset: 0x%X)!\n",
-			offset);
+	err  = mtd_read(master, offset, sizeof(buf), &bytes_read,
+			(uint8_t *)&buf);
+	if (err && !mtd_is_bitflip(err)) {
+		pr_err("mtd_read error while parsing (offset: 0x%X): %d\n",
+			offset, err);
 		goto out_default;
 	}
 
@@ -95,6 +97,7 @@
 	int trx_part = -1;
 	int last_trx_part = -1;
 	int possible_nvram_sizes[] = { 0x8000, 0xF000, 0x10000, };
+	int err;
 
 	/*
 	 * Some really old flashes (like AT45DB*) had smaller erasesize-s, but
@@ -118,8 +121,8 @@
 	/* Parse block by block looking for magics */
 	for (offset = 0; offset <= master->size - blocksize;
 	     offset += blocksize) {
-		/* Nothing more in higher memory */
-		if (offset >= 0x2000000)
+		/* Nothing more in higher memory on BCM47XX (MIPS) */
+		if (config_enabled(CONFIG_BCM47XX) && offset >= 0x2000000)
 			break;
 
 		if (curr_part >= BCM47XXPART_MAX_PARTS) {
@@ -128,10 +131,11 @@
 		}
 
 		/* Read beginning of the block */
-		if (mtd_read(master, offset, BCM47XXPART_BYTES_TO_READ,
-			     &bytes_read, (uint8_t *)buf) < 0) {
-			pr_err("mtd_read error while parsing (offset: 0x%X)!\n",
-			       offset);
+		err = mtd_read(master, offset, BCM47XXPART_BYTES_TO_READ,
+			       &bytes_read, (uint8_t *)buf);
+		if (err && !mtd_is_bitflip(err)) {
+			pr_err("mtd_read error while parsing (offset: 0x%X): %d\n",
+			       offset, err);
 			continue;
 		}
 
@@ -254,10 +258,11 @@
 		}
 
 		/* Read middle of the block */
-		if (mtd_read(master, offset + 0x8000, 0x4,
-			     &bytes_read, (uint8_t *)buf) < 0) {
-			pr_err("mtd_read error while parsing (offset: 0x%X)!\n",
-			       offset);
+		err = mtd_read(master, offset + 0x8000, 0x4, &bytes_read,
+			       (uint8_t *)buf);
+		if (err && !mtd_is_bitflip(err)) {
+			pr_err("mtd_read error while parsing (offset: 0x%X): %d\n",
+			       offset, err);
 			continue;
 		}
 
@@ -277,10 +282,11 @@
 		}
 
 		offset = master->size - possible_nvram_sizes[i];
-		if (mtd_read(master, offset, 0x4, &bytes_read,
-			     (uint8_t *)buf) < 0) {
-			pr_err("mtd_read error while reading at offset 0x%X!\n",
-			       offset);
+		err = mtd_read(master, offset, 0x4, &bytes_read,
+			       (uint8_t *)buf);
+		if (err && !mtd_is_bitflip(err)) {
+			pr_err("mtd_read error while reading (offset 0x%X): %d\n",
+			       offset, err);
 			continue;
 		}
 

diff --git a/drivers/mtd/bcm63xxpart.c b/drivers/mtd/bcm63xxpart.c
index cec3188..41d1d31 100644
--- a/drivers/mtd/bcm63xxpart.c
+++ b/drivers/mtd/bcm63xxpart.c

@@ -24,6 +24,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/bcm963xx_nvram.h>
 #include <linux/bcm963xx_tag.h>
 #include <linux/crc32.h>
 #include <linux/module.h>
@@ -34,12 +35,15 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 
-#include <asm/mach-bcm63xx/bcm63xx_nvram.h>
-#include <asm/mach-bcm63xx/board_bcm963xx.h>
+#define BCM963XX_CFE_BLOCK_SIZE		SZ_64K	/* always at least 64KiB */
 
-#define BCM63XX_CFE_BLOCK_SIZE	SZ_64K		/* always at least 64KiB */
+#define BCM963XX_CFE_MAGIC_OFFSET	0x4e0
+#define BCM963XX_CFE_VERSION_OFFSET	0x570
+#define BCM963XX_NVRAM_OFFSET		0x580
 
-#define BCM63XX_CFE_MAGIC_OFFSET 0x4e0
+/* Ensure strings read from flash structs are null terminated */
+#define STR_NULL_TERMINATE(x) \
+	do { char *_str = (x); _str[sizeof(x) - 1] = 0; } while (0)
 
 static int bcm63xx_detect_cfe(struct mtd_info *master)
 {
@@ -58,68 +62,130 @@
 		return 0;
 
 	/* very old CFE's do not have the cfe-v string, so check for magic */
-	ret = mtd_read(master, BCM63XX_CFE_MAGIC_OFFSET, 8, &retlen,
+	ret = mtd_read(master, BCM963XX_CFE_MAGIC_OFFSET, 8, &retlen,
 		       (void *)buf);
 	buf[retlen] = 0;
 
 	return strncmp("CFE1CFE1", buf, 8);
 }
 
-static int bcm63xx_parse_cfe_partitions(struct mtd_info *master,
-					const struct mtd_partition **pparts,
-					struct mtd_part_parser_data *data)
+static int bcm63xx_read_nvram(struct mtd_info *master,
+	struct bcm963xx_nvram *nvram)
+{
+	u32 actual_crc, expected_crc;
+	size_t retlen;
+	int ret;
+
+	/* extract nvram data */
+	ret = mtd_read(master, BCM963XX_NVRAM_OFFSET, BCM963XX_NVRAM_V5_SIZE,
+			&retlen, (void *)nvram);
+	if (ret)
+		return ret;
+
+	ret = bcm963xx_nvram_checksum(nvram, &expected_crc, &actual_crc);
+	if (ret)
+		pr_warn("nvram checksum failed, contents may be invalid (expected %08x, got %08x)\n",
+			expected_crc, actual_crc);
+
+	if (!nvram->psi_size)
+		nvram->psi_size = BCM963XX_DEFAULT_PSI_SIZE;
+
+	return 0;
+}
+
+static int bcm63xx_read_image_tag(struct mtd_info *master, const char *name,
+	loff_t tag_offset, struct bcm_tag *buf)
+{
+	int ret;
+	size_t retlen;
+	u32 computed_crc;
+
+	ret = mtd_read(master, tag_offset, sizeof(*buf), &retlen, (void *)buf);
+	if (ret)
+		return ret;
+
+	if (retlen != sizeof(*buf))
+		return -EIO;
+
+	computed_crc = crc32_le(IMAGETAG_CRC_START, (u8 *)buf,
+				offsetof(struct bcm_tag, header_crc));
+	if (computed_crc == buf->header_crc) {
+		STR_NULL_TERMINATE(buf->board_id);
+		STR_NULL_TERMINATE(buf->tag_version);
+
+		pr_info("%s: CFE image tag found at 0x%llx with version %s, board type %s\n",
+			name, tag_offset, buf->tag_version, buf->board_id);
+
+		return 0;
+	}
+
+	pr_warn("%s: CFE image tag at 0x%llx CRC invalid (expected %08x, actual %08x)\n",
+		name, tag_offset, buf->header_crc, computed_crc);
+	return 1;
+}
+
+static int bcm63xx_parse_cfe_nor_partitions(struct mtd_info *master,
+	const struct mtd_partition **pparts, struct bcm963xx_nvram *nvram)
 {
 	/* CFE, NVRAM and global Linux are always present */
 	int nrparts = 3, curpart = 0;
-	struct bcm_tag *buf;
+	struct bcm_tag *buf = NULL;
 	struct mtd_partition *parts;
 	int ret;
-	size_t retlen;
 	unsigned int rootfsaddr, kerneladdr, spareaddr;
 	unsigned int rootfslen, kernellen, sparelen, totallen;
 	unsigned int cfelen, nvramlen;
 	unsigned int cfe_erasesize;
 	int i;
-	u32 computed_crc;
 	bool rootfs_first = false;
 
-	if (bcm63xx_detect_cfe(master))
-		return -EINVAL;
-
 	cfe_erasesize = max_t(uint32_t, master->erasesize,
-			      BCM63XX_CFE_BLOCK_SIZE);
+			      BCM963XX_CFE_BLOCK_SIZE);
 
 	cfelen = cfe_erasesize;
-	nvramlen = bcm63xx_nvram_get_psi_size() * SZ_1K;
+	nvramlen = nvram->psi_size * SZ_1K;
 	nvramlen = roundup(nvramlen, cfe_erasesize);
 
-	/* Allocate memory for buffer */
 	buf = vmalloc(sizeof(struct bcm_tag));
 	if (!buf)
 		return -ENOMEM;
 
 	/* Get the tag */
-	ret = mtd_read(master, cfelen, sizeof(struct bcm_tag), &retlen,
-		       (void *)buf);
+	ret = bcm63xx_read_image_tag(master, "rootfs", cfelen, buf);
+	if (!ret) {
+		STR_NULL_TERMINATE(buf->flash_image_start);
+		if (kstrtouint(buf->flash_image_start, 10, &rootfsaddr) ||
+				rootfsaddr < BCM963XX_EXTENDED_SIZE) {
+			pr_err("invalid rootfs address: %*ph\n",
+				(int)sizeof(buf->flash_image_start),
+				buf->flash_image_start);
+			goto invalid_tag;
+		}
 
-	if (retlen != sizeof(struct bcm_tag)) {
-		vfree(buf);
-		return -EIO;
-	}
+		STR_NULL_TERMINATE(buf->kernel_address);
+		if (kstrtouint(buf->kernel_address, 10, &kerneladdr) ||
+				kerneladdr < BCM963XX_EXTENDED_SIZE) {
+			pr_err("invalid kernel address: %*ph\n",
+				(int)sizeof(buf->kernel_address),
+				buf->kernel_address);
+			goto invalid_tag;
+		}
 
-	computed_crc = crc32_le(IMAGETAG_CRC_START, (u8 *)buf,
-				offsetof(struct bcm_tag, header_crc));
-	if (computed_crc == buf->header_crc) {
-		char *boardid = &(buf->board_id[0]);
-		char *tagversion = &(buf->tag_version[0]);
+		STR_NULL_TERMINATE(buf->kernel_length);
+		if (kstrtouint(buf->kernel_length, 10, &kernellen)) {
+			pr_err("invalid kernel length: %*ph\n",
+				(int)sizeof(buf->kernel_length),
+				buf->kernel_length);
+			goto invalid_tag;
+		}
 
-		sscanf(buf->flash_image_start, "%u", &rootfsaddr);
-		sscanf(buf->kernel_address, "%u", &kerneladdr);
-		sscanf(buf->kernel_length, "%u", &kernellen);
-		sscanf(buf->total_length, "%u", &totallen);
-
-		pr_info("CFE boot tag found with version %s and board type %s\n",
-			tagversion, boardid);
+		STR_NULL_TERMINATE(buf->total_length);
+		if (kstrtouint(buf->total_length, 10, &totallen)) {
+			pr_err("invalid total length: %*ph\n",
+				(int)sizeof(buf->total_length),
+				buf->total_length);
+			goto invalid_tag;
+		}
 
 		kerneladdr = kerneladdr - BCM963XX_EXTENDED_SIZE;
 		rootfsaddr = rootfsaddr - BCM963XX_EXTENDED_SIZE;
@@ -134,13 +200,14 @@
 			rootfsaddr = kerneladdr + kernellen;
 			rootfslen = spareaddr - rootfsaddr;
 		}
-	} else {
-		pr_warn("CFE boot tag CRC invalid (expected %08x, actual %08x)\n",
-			buf->header_crc, computed_crc);
+	} else if (ret > 0) {
+invalid_tag:
 		kernellen = 0;
 		rootfslen = 0;
 		rootfsaddr = 0;
 		spareaddr = cfelen;
+	} else {
+		goto out;
 	}
 	sparelen = master->size - spareaddr - nvramlen;
 
@@ -151,11 +218,10 @@
 	if (kernellen > 0)
 		nrparts++;
 
-	/* Ask kernel for more memory */
 	parts = kzalloc(sizeof(*parts) * nrparts + 10 * nrparts, GFP_KERNEL);
 	if (!parts) {
-		vfree(buf);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto out;
 	}
 
 	/* Start building partition list */
@@ -206,9 +272,43 @@
 		sparelen);
 
 	*pparts = parts;
+	ret = 0;
+
+out:
 	vfree(buf);
 
+	if (ret)
+		return ret;
+
 	return nrparts;
+}
+
+static int bcm63xx_parse_cfe_partitions(struct mtd_info *master,
+					const struct mtd_partition **pparts,
+					struct mtd_part_parser_data *data)
+{
+	struct bcm963xx_nvram *nvram = NULL;
+	int ret;
+
+	if (bcm63xx_detect_cfe(master))
+		return -EINVAL;
+
+	nvram = vzalloc(sizeof(*nvram));
+	if (!nvram)
+		return -ENOMEM;
+
+	ret = bcm63xx_read_nvram(master, nvram);
+	if (ret)
+		goto out;
+
+	if (!mtd_type_is_nand(master))
+		ret = bcm63xx_parse_cfe_nor_partitions(master, pparts, nvram);
+	else
+		ret = -EINVAL;
+
+out:
+	vfree(nvram);
+	return ret;
 };
 
 static struct mtd_part_parser bcm63xx_cfe_parser = {

diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index e2c0057..7c887f1 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c

@@ -75,7 +75,7 @@
 				break;
 			}
 
-		page_cache_release(page);
+		put_page(page);
 		pages--;
 		index++;
 	}
@@ -124,7 +124,7 @@
 			return PTR_ERR(page);
 
 		memcpy(buf, page_address(page) + offset, cpylen);
-		page_cache_release(page);
+		put_page(page);
 
 		if (retlen)
 			*retlen += cpylen;
@@ -164,7 +164,7 @@
 			unlock_page(page);
 			balance_dirty_pages_ratelimited(mapping);
 		}
-		page_cache_release(page);
+		put_page(page);
 
 		if (retlen)
 			*retlen += cpylen;

diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c
index c3a2695..e7b2e43 100644
--- a/drivers/mtd/devices/docg3.c
+++ b/drivers/mtd/devices/docg3.c

@@ -72,13 +72,11 @@
  * @eccbytes: 8 bytes are used (1 for Hamming ECC, 7 for BCH ECC)
  * @eccpos: ecc positions (byte 7 is Hamming ECC, byte 8-14 are BCH ECC)
  * @oobfree: free pageinfo bytes (byte 0 until byte 6, byte 15
- * @oobavail: 8 available bytes remaining after ECC toll
  */
 static struct nand_ecclayout docg3_oobinfo = {
 	.eccbytes = 8,
 	.eccpos = {7, 8, 9, 10, 11, 12, 13, 14},
 	.oobfree = {{0, 7}, {15, 1} },
-	.oobavail = 8,
 };
 
 static inline u8 doc_readb(struct docg3 *docg3, u16 reg)
@@ -1438,7 +1436,7 @@
 		oobdelta = mtd->oobsize;
 		break;
 	case MTD_OPS_AUTO_OOB:
-		oobdelta = mtd->ecclayout->oobavail;
+		oobdelta = mtd->oobavail;
 		break;
 	default:
 		return -EINVAL;
@@ -1860,6 +1858,7 @@
 	mtd->_write_oob = doc_write_oob;
 	mtd->_block_isbad = doc_block_isbad;
 	mtd->ecclayout = &docg3_oobinfo;
+	mtd->oobavail = 8;
 	mtd->ecc_strength = DOC_ECC_BCH_T;
 
 	return 0;

diff --git a/drivers/mtd/devices/mtdram.c b/drivers/mtd/devices/mtdram.c
index 627a9bc..cbd8547 100644
--- a/drivers/mtd/devices/mtdram.c
+++ b/drivers/mtd/devices/mtdram.c

@@ -19,6 +19,7 @@
 
 static unsigned long total_size = CONFIG_MTDRAM_TOTAL_SIZE;
 static unsigned long erase_size = CONFIG_MTDRAM_ERASE_SIZE;
+static unsigned long writebuf_size = 64;
 #define MTDRAM_TOTAL_SIZE (total_size * 1024)
 #define MTDRAM_ERASE_SIZE (erase_size * 1024)
 
@@ -27,6 +28,8 @@
 MODULE_PARM_DESC(total_size, "Total device size in KiB");
 module_param(erase_size, ulong, 0);
 MODULE_PARM_DESC(erase_size, "Device erase block size in KiB");
+module_param(writebuf_size, ulong, 0);
+MODULE_PARM_DESC(writebuf_size, "Device write buf size in Bytes (Default: 64)");
 #endif
 
 // We could store these in the mtd structure, but we only support 1 device..
@@ -123,7 +126,7 @@
 	mtd->flags = MTD_CAP_RAM;
 	mtd->size = size;
 	mtd->writesize = 1;
-	mtd->writebufsize = 64; /* Mimic CFI NOR flashes */
+	mtd->writebufsize = writebuf_size;
 	mtd->erasesize = MTDRAM_ERASE_SIZE;
 	mtd->priv = mapped_address;
 

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 10bf304..08de4b2 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c

@@ -126,10 +126,7 @@
 	if (ops->oobbuf) {
 		size_t len, pages;
 
-		if (ops->mode == MTD_OPS_AUTO_OOB)
-			len = mtd->oobavail;
-		else
-			len = mtd->oobsize;
+		len = mtd_oobavail(mtd, ops);
 		pages = mtd_div_by_ws(mtd->size, mtd);
 		pages -= mtd_div_by_ws(from, mtd);
 		if (ops->ooboffs + ops->ooblen > pages * len)

diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index fc8b3d1..cb06bdd 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c

@@ -346,7 +346,7 @@
 	if (mtd_can_have_bb(d->mtd) && mtd_block_isbad(d->mtd, offset))
 		return MTDSWAP_SCANNED_BAD;
 
-	ops.ooblen = 2 * d->mtd->ecclayout->oobavail;
+	ops.ooblen = 2 * d->mtd->oobavail;
 	ops.oobbuf = d->oob_buf;
 	ops.ooboffs = 0;
 	ops.datbuf = NULL;
@@ -359,7 +359,7 @@
 
 	data = (struct mtdswap_oobdata *)d->oob_buf;
 	data2 = (struct mtdswap_oobdata *)
-		(d->oob_buf + d->mtd->ecclayout->oobavail);
+		(d->oob_buf + d->mtd->oobavail);
 
 	if (le16_to_cpu(data->magic) == MTDSWAP_MAGIC_CLEAN) {
 		eb->erase_count = le32_to_cpu(data->count);
@@ -933,7 +933,7 @@
 
 	ops.mode = MTD_OPS_AUTO_OOB;
 	ops.len = mtd->writesize;
-	ops.ooblen = mtd->ecclayout->oobavail;
+	ops.ooblen = mtd->oobavail;
 	ops.ooboffs = 0;
 	ops.datbuf = d->page_buf;
 	ops.oobbuf = d->oob_buf;
@@ -945,7 +945,7 @@
 		for (i = 0; i < mtd_pages; i++) {
 			patt = mtdswap_test_patt(test + i);
 			memset(d->page_buf, patt, mtd->writesize);
-			memset(d->oob_buf, patt, mtd->ecclayout->oobavail);
+			memset(d->oob_buf, patt, mtd->oobavail);
 			ret = mtd_write_oob(mtd, pos, &ops);
 			if (ret)
 				goto error;
@@ -964,7 +964,7 @@
 				if (p1[j] != patt)
 					goto error;
 
-			for (j = 0; j < mtd->ecclayout->oobavail; j++)
+			for (j = 0; j < mtd->oobavail; j++)
 				if (p2[j] != (unsigned char)patt)
 					goto error;
 
@@ -1387,7 +1387,7 @@
 	if (!d->page_buf)
 		goto page_buf_fail;
 
-	d->oob_buf = kmalloc(2 * mtd->ecclayout->oobavail, GFP_KERNEL);
+	d->oob_buf = kmalloc(2 * mtd->oobavail, GFP_KERNEL);
 	if (!d->oob_buf)
 		goto oob_buf_fail;
 
@@ -1417,7 +1417,6 @@
 	unsigned long part;
 	unsigned int eblocks, eavailable, bad_blocks, spare_cnt;
 	uint64_t swap_size, use_size, size_limit;
-	struct nand_ecclayout *oinfo;
 	int ret;
 
 	parts = &partitions[0];
@@ -1447,17 +1446,10 @@
 		return;
 	}
 
-	oinfo = mtd->ecclayout;
-	if (!oinfo) {
-		printk(KERN_ERR "%s: mtd%d does not have OOB\n",
-			MTDSWAP_PREFIX, mtd->index);
-		return;
-	}
-
-	if (!mtd->oobsize || oinfo->oobavail < MTDSWAP_OOBSIZE) {
+	if (!mtd->oobsize || mtd->oobavail < MTDSWAP_OOBSIZE) {
 		printk(KERN_ERR "%s: Not enough free bytes in OOB, "
 			"%d available, %zu needed.\n",
-			MTDSWAP_PREFIX, oinfo->oobavail, MTDSWAP_OOBSIZE);
+			MTDSWAP_PREFIX, mtd->oobavail, MTDSWAP_OOBSIZE);
 		return;
 	}
 

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 20f01b3..f05e0e9e 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig

@@ -74,6 +74,7 @@
 config MTD_NAND_GPIO
 	tristate "GPIO assisted NAND Flash driver"
 	depends on GPIOLIB || COMPILE_TEST
+	depends on HAS_IOMEM
 	help
 	  This enables a NAND flash driver where control signals are
 	  connected to GPIO pins, and commands and data are communicated
@@ -310,6 +311,7 @@
 config MTD_NAND_CS553X
 	tristate "NAND support for CS5535/CS5536 (AMD Geode companion chip)"
 	depends on X86_32
+	depends on !UML && HAS_IOMEM
 	help
 	  The CS553x companion chips for the AMD Geode processor
 	  include NAND flash controllers with built-in hardware ECC
@@ -463,6 +465,7 @@
 config MTD_NAND_VF610_NFC
 	tristate "Support for Freescale NFC for VF610/MPC5125"
 	depends on (SOC_VF610 || COMPILE_TEST)
+	depends on HAS_IOMEM
 	help
 	  Enables support for NAND Flash Controller on some Freescale
 	  processors like the VF610, MPC5125, MCF54418 or Kinetis K70.
@@ -553,4 +556,11 @@
 	help
 	  Enables support for NAND controller on Hisilicon SoC Hip04.
 
+config MTD_NAND_QCOM
+	tristate "Support for NAND on QCOM SoCs"
+	depends on ARCH_QCOM
+	help
+	  Enables support for NAND flash chips on SoCs containing the EBI2 NAND
+	  controller. This controller is found on IPQ806x SoC.
+
 endif # MTD_NAND

diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 9e36233..f553353 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile

@@ -56,5 +56,6 @@
 obj-$(CONFIG_MTD_NAND_SUNXI)		+= sunxi_nand.o
 obj-$(CONFIG_MTD_NAND_HISI504)	        += hisi504_nand.o
 obj-$(CONFIG_MTD_NAND_BRCMNAND)		+= brcmnand/
+obj-$(CONFIG_MTD_NAND_QCOM)		+= qcom_nandc.o
 
 nand-objs := nand_base.o nand_bbt.o nand_timings.o

diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index bddcf83..20cbaab 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c

@@ -65,6 +65,11 @@
 
 struct atmel_nand_caps {
 	bool pmecc_correct_erase_page;
+	uint8_t pmecc_max_correction;
+};
+
+struct atmel_nand_nfc_caps {
+	uint32_t rb_mask;
 };
 
 /* oob layout for large page size
@@ -111,6 +116,7 @@
 	/* Point to the sram bank which include readed data via NFC */
 	void			*data_in_sram;
 	bool			will_write_sram;
+	const struct atmel_nand_nfc_caps *caps;
 };
 static struct atmel_nfc	nand_nfc;
 
@@ -140,6 +146,7 @@
 	int			pmecc_cw_len;	/* Length of codeword */
 
 	void __iomem		*pmerrloc_base;
+	void __iomem		*pmerrloc_el_base;
 	void __iomem		*pmecc_rom_base;
 
 	/* lookup table for alpha_to and index_of */
@@ -468,6 +475,7 @@
  *                8-bits                13-bytes                 14-bytes
  *               12-bits                20-bytes                 21-bytes
  *               24-bits                39-bytes                 42-bytes
+ *               32-bits                52-bytes                 56-bytes
  */
 static int pmecc_get_ecc_bytes(int cap, int sector_size)
 {
@@ -813,7 +821,7 @@
 	sector_size = host->pmecc_sector_size;
 
 	while (err_nbr) {
-		tmp = pmerrloc_readl_el_relaxed(host->pmerrloc_base, i) - 1;
+		tmp = pmerrloc_readl_el_relaxed(host->pmerrloc_el_base, i) - 1;
 		byte_pos = tmp / 8;
 		bit_pos  = tmp % 8;
 
@@ -825,7 +833,7 @@
 			*(buf + byte_pos) ^= (1 << bit_pos);
 
 			pos = sector_num * host->pmecc_sector_size + byte_pos;
-			dev_info(host->dev, "Bit flip in data area, byte_pos: %d, bit_pos: %d, 0x%02x -> 0x%02x\n",
+			dev_dbg(host->dev, "Bit flip in data area, byte_pos: %d, bit_pos: %d, 0x%02x -> 0x%02x\n",
 				pos, bit_pos, err_byte, *(buf + byte_pos));
 		} else {
 			/* Bit flip in OOB area */
@@ -835,7 +843,7 @@
 			ecc[tmp] ^= (1 << bit_pos);
 
 			pos = tmp + nand_chip->ecc.layout->eccpos[0];
-			dev_info(host->dev, "Bit flip in OOB, oob_byte_pos: %d, bit_pos: %d, 0x%02x -> 0x%02x\n",
+			dev_dbg(host->dev, "Bit flip in OOB, oob_byte_pos: %d, bit_pos: %d, 0x%02x -> 0x%02x\n",
 				pos, bit_pos, err_byte, ecc[tmp]);
 		}
 
@@ -1017,6 +1025,9 @@
 	case 24:
 		val = PMECC_CFG_BCH_ERR24;
 		break;
+	case 32:
+		val = PMECC_CFG_BCH_ERR32;
+		break;
 	}
 
 	if (host->pmecc_sector_size == 512)
@@ -1078,6 +1089,9 @@
 
 	/* If device tree doesn't specify, use NAND's minimum ECC parameters */
 	if (host->pmecc_corr_cap == 0) {
+		if (*cap > host->caps->pmecc_max_correction)
+			return -EINVAL;
+
 		/* use the most fitable ecc bits (the near bigger one ) */
 		if (*cap <= 2)
 			host->pmecc_corr_cap = 2;
@@ -1089,6 +1103,8 @@
 			host->pmecc_corr_cap = 12;
 		else if (*cap <= 24)
 			host->pmecc_corr_cap = 24;
+		else if (*cap <= 32)
+			host->pmecc_corr_cap = 32;
 		else
 			return -EINVAL;
 	}
@@ -1205,6 +1221,8 @@
 		err_no = PTR_ERR(host->pmerrloc_base);
 		goto err;
 	}
+	host->pmerrloc_el_base = host->pmerrloc_base + ATMEL_PMERRLOC_SIGMAx +
+		(host->caps->pmecc_max_correction + 1) * 4;
 
 	if (!host->has_no_lookup_table) {
 		regs_rom = platform_get_resource(pdev, IORESOURCE_MEM, 3);
@@ -1486,8 +1504,6 @@
 		ecc_writel(host->ecc, CR, ATMEL_ECC_RST);
 }
 
-static const struct of_device_id atmel_nand_dt_ids[];
-
 static int atmel_of_init_port(struct atmel_nand_host *host,
 			      struct device_node *np)
 {
@@ -1498,7 +1514,7 @@
 	enum of_gpio_flags flags = 0;
 
 	host->caps = (struct atmel_nand_caps *)
-		of_match_device(atmel_nand_dt_ids, host->dev)->data;
+		of_device_get_match_data(host->dev);
 
 	if (of_property_read_u32(np, "atmel,nand-addr-offset", &val) == 0) {
 		if (val >= 32) {
@@ -1547,10 +1563,16 @@
 	 * them from NAND ONFI parameters.
 	 */
 	if (of_property_read_u32(np, "atmel,pmecc-cap", &val) == 0) {
-		if ((val != 2) && (val != 4) && (val != 8) && (val != 12) &&
-				(val != 24)) {
+		if (val > host->caps->pmecc_max_correction) {
 			dev_err(host->dev,
-				"Unsupported PMECC correction capability: %d; should be 2, 4, 8, 12 or 24\n",
+				"Required ECC strength too high: %u max %u\n",
+				val, host->caps->pmecc_max_correction);
+			return -EINVAL;
+		}
+		if ((val != 2)  && (val != 4)  && (val != 8) &&
+		    (val != 12) && (val != 24) && (val != 32)) {
+			dev_err(host->dev,
+				"Required ECC strength not supported: %u\n",
 				val);
 			return -EINVAL;
 		}
@@ -1560,7 +1582,7 @@
 	if (of_property_read_u32(np, "atmel,pmecc-sector-size", &val) == 0) {
 		if ((val != 512) && (val != 1024)) {
 			dev_err(host->dev,
-				"Unsupported PMECC sector size: %d; should be 512 or 1024 bytes\n",
+				"Required ECC sector size not supported: %u\n",
 				val);
 			return -EINVAL;
 		}
@@ -1677,9 +1699,9 @@
 		nfc_writel(host->nfc->hsmc_regs, IDR, NFC_SR_XFR_DONE);
 		ret = IRQ_HANDLED;
 	}
-	if (pending & NFC_SR_RB_EDGE) {
+	if (pending & host->nfc->caps->rb_mask) {
 		complete(&host->nfc->comp_ready);
-		nfc_writel(host->nfc->hsmc_regs, IDR, NFC_SR_RB_EDGE);
+		nfc_writel(host->nfc->hsmc_regs, IDR, host->nfc->caps->rb_mask);
 		ret = IRQ_HANDLED;
 	}
 	if (pending & NFC_SR_CMD_DONE) {
@@ -1697,7 +1719,7 @@
 	if (flag & NFC_SR_XFR_DONE)
 		init_completion(&host->nfc->comp_xfer_done);
 
-	if (flag & NFC_SR_RB_EDGE)
+	if (flag & host->nfc->caps->rb_mask)
 		init_completion(&host->nfc->comp_ready);
 
 	if (flag & NFC_SR_CMD_DONE)
@@ -1715,7 +1737,7 @@
 	if (flag & NFC_SR_XFR_DONE)
 		comp[index++] = &host->nfc->comp_xfer_done;
 
-	if (flag & NFC_SR_RB_EDGE)
+	if (flag & host->nfc->caps->rb_mask)
 		comp[index++] = &host->nfc->comp_ready;
 
 	if (flag & NFC_SR_CMD_DONE)
@@ -1783,7 +1805,7 @@
 		dev_err(host->dev, "Lost the interrupt flags: 0x%08x\n",
 				mask & status);
 
-	return status & NFC_SR_RB_EDGE;
+	return status & host->nfc->caps->rb_mask;
 }
 
 static void nfc_select_chip(struct mtd_info *mtd, int chip)
@@ -1956,8 +1978,8 @@
 		}
 		/* fall through */
 	default:
-		nfc_prepare_interrupt(host, NFC_SR_RB_EDGE);
-		nfc_wait_interrupt(host, NFC_SR_RB_EDGE);
+		nfc_prepare_interrupt(host, host->nfc->caps->rb_mask);
+		nfc_wait_interrupt(host, host->nfc->caps->rb_mask);
 	}
 }
 
@@ -2304,17 +2326,34 @@
 	return 0;
 }
 
+/*
+ * AT91RM9200 does not have PMECC or PMECC Errloc peripherals for
+ * BCH ECC. Combined with the "atmel,has-pmecc", it is used to describe
+ * devices from the SAM9 family that have those.
+ */
 static const struct atmel_nand_caps at91rm9200_caps = {
 	.pmecc_correct_erase_page = false,
+	.pmecc_max_correction = 24,
 };
 
 static const struct atmel_nand_caps sama5d4_caps = {
 	.pmecc_correct_erase_page = true,
+	.pmecc_max_correction = 24,
+};
+
+/*
+ * The PMECC Errloc controller starting in SAMA5D2 is not compatible,
+ * as the increased correction strength requires more registers.
+ */
+static const struct atmel_nand_caps sama5d2_caps = {
+	.pmecc_correct_erase_page = true,
+	.pmecc_max_correction = 32,
 };
 
 static const struct of_device_id atmel_nand_dt_ids[] = {
 	{ .compatible = "atmel,at91rm9200-nand", .data = &at91rm9200_caps },
 	{ .compatible = "atmel,sama5d4-nand", .data = &sama5d4_caps },
+	{ .compatible = "atmel,sama5d2-nand", .data = &sama5d2_caps },
 	{ /* sentinel */ }
 };
 
@@ -2354,6 +2393,11 @@
 		}
 	}
 
+	nfc->caps = (const struct atmel_nand_nfc_caps *)
+		of_device_get_match_data(&pdev->dev);
+	if (!nfc->caps)
+		return -ENODEV;
+
 	nfc_writel(nfc->hsmc_regs, IDR, 0xffffffff);
 	nfc_readl(nfc->hsmc_regs, SR);	/* clear the NFC_SR */
 
@@ -2382,8 +2426,17 @@
 	return 0;
 }
 
+static const struct atmel_nand_nfc_caps sama5d3_nfc_caps = {
+	.rb_mask = NFC_SR_RB_EDGE0,
+};
+
+static const struct atmel_nand_nfc_caps sama5d4_nfc_caps = {
+	.rb_mask = NFC_SR_RB_EDGE3,
+};
+
 static const struct of_device_id atmel_nand_nfc_match[] = {
-	{ .compatible = "atmel,sama5d3-nfc" },
+	{ .compatible = "atmel,sama5d3-nfc", .data = &sama5d3_nfc_caps },
+	{ .compatible = "atmel,sama5d4-nfc", .data = &sama5d4_nfc_caps },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, atmel_nand_nfc_match);

diff --git a/drivers/mtd/nand/atmel_nand_ecc.h b/drivers/mtd/nand/atmel_nand_ecc.h
index 668e735..834d694 100644
--- a/drivers/mtd/nand/atmel_nand_ecc.h
+++ b/drivers/mtd/nand/atmel_nand_ecc.h

@@ -43,6 +43,7 @@
 #define		PMECC_CFG_BCH_ERR8		(2 << 0)
 #define		PMECC_CFG_BCH_ERR12		(3 << 0)
 #define		PMECC_CFG_BCH_ERR24		(4 << 0)
+#define		PMECC_CFG_BCH_ERR32		(5 << 0)
 
 #define		PMECC_CFG_SECTOR512		(0 << 4)
 #define		PMECC_CFG_SECTOR1024		(1 << 4)
@@ -108,7 +109,11 @@
 #define		PMERRLOC_ERR_NUM_MASK		(0x1f << 8)
 #define		PMERRLOC_CALC_DONE		(1 << 0)
 #define ATMEL_PMERRLOC_SIGMAx		0x028	/* Error location SIGMA x */
-#define ATMEL_PMERRLOC_ELx		0x08c	/* Error location x */
+
+/*
+ * The ATMEL_PMERRLOC_ELx register location depends from the number of
+ * bits corrected by the PMECC controller. Do not use it.
+ */
 
 /* Register access macros for PMECC */
 #define pmecc_readl_relaxed(addr, reg) \
@@ -136,7 +141,7 @@
 	readl_relaxed((addr) + ATMEL_PMERRLOC_SIGMAx + ((n) * 4))
 
 #define pmerrloc_readl_el_relaxed(addr, n) \
-	readl_relaxed((addr) + ATMEL_PMERRLOC_ELx + ((n) * 4))
+	readl_relaxed((addr) + ((n) * 4))
 
 /* Galois field dimension */
 #define PMECC_GF_DIMENSION_13			13

diff --git a/drivers/mtd/nand/atmel_nand_nfc.h b/drivers/mtd/nand/atmel_nand_nfc.h
index 4d5d262..0bbc1fa 100644
--- a/drivers/mtd/nand/atmel_nand_nfc.h
+++ b/drivers/mtd/nand/atmel_nand_nfc.h

@@ -42,7 +42,8 @@
 #define		NFC_SR_UNDEF		(1 << 21)
 #define		NFC_SR_AWB		(1 << 22)
 #define		NFC_SR_ASE		(1 << 23)
-#define		NFC_SR_RB_EDGE		(1 << 24)
+#define		NFC_SR_RB_EDGE0		(1 << 24)
+#define		NFC_SR_RB_EDGE3		(1 << 27)
 
 #define ATMEL_HSMC_NFC_IER	0x0c
 #define ATMEL_HSMC_NFC_IDR	0x10

diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c
index 844fc07..e052839 100644
--- a/drivers/mtd/nand/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/brcmnand/brcmnand.c

@@ -311,6 +311,36 @@
 	[BRCMNAND_FC_BASE]		= 0x400,
 };
 
+/* BRCMNAND v7.1 */
+static const u16 brcmnand_regs_v71[] = {
+	[BRCMNAND_CMD_START]		=  0x04,
+	[BRCMNAND_CMD_EXT_ADDRESS]	=  0x08,
+	[BRCMNAND_CMD_ADDRESS]		=  0x0c,
+	[BRCMNAND_INTFC_STATUS]		=  0x14,
+	[BRCMNAND_CS_SELECT]		=  0x18,
+	[BRCMNAND_CS_XOR]		=  0x1c,
+	[BRCMNAND_LL_OP]		=  0x20,
+	[BRCMNAND_CS0_BASE]		=  0x50,
+	[BRCMNAND_CS1_BASE]		=     0,
+	[BRCMNAND_CORR_THRESHOLD]	=  0xdc,
+	[BRCMNAND_CORR_THRESHOLD_EXT]	=  0xe0,
+	[BRCMNAND_UNCORR_COUNT]		=  0xfc,
+	[BRCMNAND_CORR_COUNT]		= 0x100,
+	[BRCMNAND_CORR_EXT_ADDR]	= 0x10c,
+	[BRCMNAND_CORR_ADDR]		= 0x110,
+	[BRCMNAND_UNCORR_EXT_ADDR]	= 0x114,
+	[BRCMNAND_UNCORR_ADDR]		= 0x118,
+	[BRCMNAND_SEMAPHORE]		= 0x150,
+	[BRCMNAND_ID]			= 0x194,
+	[BRCMNAND_ID_EXT]		= 0x198,
+	[BRCMNAND_LL_RDATA]		= 0x19c,
+	[BRCMNAND_OOB_READ_BASE]	= 0x200,
+	[BRCMNAND_OOB_READ_10_BASE]	=     0,
+	[BRCMNAND_OOB_WRITE_BASE]	= 0x280,
+	[BRCMNAND_OOB_WRITE_10_BASE]	=     0,
+	[BRCMNAND_FC_BASE]		= 0x400,
+};
+
 enum brcmnand_cs_reg {
 	BRCMNAND_CS_CFG_EXT = 0,
 	BRCMNAND_CS_CFG,
@@ -406,7 +436,9 @@
 	}
 
 	/* Register offsets */
-	if (ctrl->nand_version >= 0x0600)
+	if (ctrl->nand_version >= 0x0701)
+		ctrl->reg_offsets = brcmnand_regs_v71;
+	else if (ctrl->nand_version >= 0x0600)
 		ctrl->reg_offsets = brcmnand_regs_v60;
 	else if (ctrl->nand_version >= 0x0500)
 		ctrl->reg_offsets = brcmnand_regs_v50;
@@ -796,7 +828,8 @@
 				    idx2 >= MTD_MAX_OOBFREE_ENTRIES_LARGE - 1)
 				break;
 		}
-		goto out;
+
+		return layout;
 	}
 
 	/*
@@ -847,10 +880,7 @@
 				idx2 >= MTD_MAX_OOBFREE_ENTRIES_LARGE - 1)
 			break;
 	}
-out:
-	/* Sum available OOB */
-	for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES_LARGE; i++)
-		layout->oobavail += layout->oobfree[i].length;
+
 	return layout;
 }
 

diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index aa1a616..e553aff 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c

@@ -537,7 +537,7 @@
 	return 0;
 }
 
-static int cafe_nand_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip)
+static int cafe_nand_block_bad(struct mtd_info *mtd, loff_t ofs)
 {
 	return 0;
 }

diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c
index f170f3c..547c100 100644
--- a/drivers/mtd/nand/diskonchip.c
+++ b/drivers/mtd/nand/diskonchip.c

@@ -794,7 +794,7 @@
 	}
 }
 
-static int doc200x_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip)
+static int doc200x_block_bad(struct mtd_info *mtd, loff_t ofs)
 {
 	/* This is our last resort if we couldn't find or create a BBT.  Just
 	   pretend all blocks are good. */

diff --git a/drivers/mtd/nand/docg4.c b/drivers/mtd/nand/docg4.c
index df4165b..d86a60e 100644
--- a/drivers/mtd/nand/docg4.c
+++ b/drivers/mtd/nand/docg4.c

@@ -225,7 +225,6 @@
 static struct nand_ecclayout docg4_oobinfo = {
 	.eccbytes = 9,
 	.eccpos = {7, 8, 9, 10, 11, 12, 13, 14, 15},
-	.oobavail = 5,
 	.oobfree = { {.offset = 2, .length = 5} }
 };
 
@@ -1121,7 +1120,7 @@
 	return ret;
 }
 
-static int docg4_block_neverbad(struct mtd_info *mtd, loff_t ofs, int getchip)
+static int docg4_block_neverbad(struct mtd_info *mtd, loff_t ofs)
 {
 	/* only called when module_param ignore_badblocks is set */
 	return 0;

diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index 235ddcb..8122c69 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c

@@ -1,7 +1,7 @@
 /*
  * Freescale GPMI NAND Flash Driver
  *
- * Copyright (C) 2010-2011 Freescale Semiconductor, Inc.
+ * Copyright (C) 2010-2015 Freescale Semiconductor, Inc.
  * Copyright (C) 2008 Embedded Alley Solutions, Inc.
  *
  * This program is free software; you can redistribute it and/or modify
@@ -136,7 +136,7 @@
  *
  * We may have available oob space in this case.
  */
-static bool set_geometry_by_ecc_info(struct gpmi_nand_data *this)
+static int set_geometry_by_ecc_info(struct gpmi_nand_data *this)
 {
 	struct bch_geometry *geo = &this->bch_geometry;
 	struct nand_chip *chip = &this->nand;
@@ -145,7 +145,7 @@
 	unsigned int block_mark_bit_offset;
 
 	if (!(chip->ecc_strength_ds > 0 && chip->ecc_step_ds > 0))
-		return false;
+		return -EINVAL;
 
 	switch (chip->ecc_step_ds) {
 	case SZ_512:
@@ -158,19 +158,19 @@
 		dev_err(this->dev,
 			"unsupported nand chip. ecc bits : %d, ecc size : %d\n",
 			chip->ecc_strength_ds, chip->ecc_step_ds);
-		return false;
+		return -EINVAL;
 	}
 	geo->ecc_chunk_size = chip->ecc_step_ds;
 	geo->ecc_strength = round_up(chip->ecc_strength_ds, 2);
 	if (!gpmi_check_ecc(this))
-		return false;
+		return -EINVAL;
 
 	/* Keep the C >= O */
 	if (geo->ecc_chunk_size < mtd->oobsize) {
 		dev_err(this->dev,
 			"unsupported nand chip. ecc size: %d, oob size : %d\n",
 			chip->ecc_step_ds, mtd->oobsize);
-		return false;
+		return -EINVAL;
 	}
 
 	/* The default value, see comment in the legacy_set_geometry(). */
@@ -242,7 +242,7 @@
 				+ ALIGN(geo->ecc_chunk_count, 4);
 
 	if (!this->swap_block_mark)
-		return true;
+		return 0;
 
 	/* For bit swap. */
 	block_mark_bit_offset = mtd->writesize * 8 -
@@ -251,7 +251,7 @@
 
 	geo->block_mark_byte_offset = block_mark_bit_offset / 8;
 	geo->block_mark_bit_offset  = block_mark_bit_offset % 8;
-	return true;
+	return 0;
 }
 
 static int legacy_set_geometry(struct gpmi_nand_data *this)
@@ -285,7 +285,8 @@
 	geo->ecc_strength = get_ecc_strength(this);
 	if (!gpmi_check_ecc(this)) {
 		dev_err(this->dev,
-			"required ecc strength of the NAND chip: %d is not supported by the GPMI controller (%d)\n",
+			"ecc strength: %d cannot be supported by the controller (%d)\n"
+			"try to use minimum ecc strength that NAND chip required\n",
 			geo->ecc_strength,
 			this->devdata->bch_max_ecc_strength);
 		return -EINVAL;
@@ -366,10 +367,11 @@
 
 int common_nfc_set_geometry(struct gpmi_nand_data *this)
 {
-	if (of_property_read_bool(this->dev->of_node, "fsl,use-minimum-ecc")
-		&& set_geometry_by_ecc_info(this))
-		return 0;
-	return legacy_set_geometry(this);
+	if ((of_property_read_bool(this->dev->of_node, "fsl,use-minimum-ecc"))
+				|| legacy_set_geometry(this))
+		return set_geometry_by_ecc_info(this);
+
+	return 0;
 }
 
 struct dma_chan *get_dma_chan(struct gpmi_nand_data *this)
@@ -2033,9 +2035,54 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int gpmi_pm_suspend(struct device *dev)
+{
+	struct gpmi_nand_data *this = dev_get_drvdata(dev);
+
+	release_dma_channels(this);
+	return 0;
+}
+
+static int gpmi_pm_resume(struct device *dev)
+{
+	struct gpmi_nand_data *this = dev_get_drvdata(dev);
+	int ret;
+
+	ret = acquire_dma_channels(this);
+	if (ret < 0)
+		return ret;
+
+	/* re-init the GPMI registers */
+	this->flags &= ~GPMI_TIMING_INIT_OK;
+	ret = gpmi_init(this);
+	if (ret) {
+		dev_err(this->dev, "Error setting GPMI : %d\n", ret);
+		return ret;
+	}
+
+	/* re-init the BCH registers */
+	ret = bch_set_geometry(this);
+	if (ret) {
+		dev_err(this->dev, "Error setting BCH : %d\n", ret);
+		return ret;
+	}
+
+	/* re-init others */
+	gpmi_extra_init(this);
+
+	return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct dev_pm_ops gpmi_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(gpmi_pm_suspend, gpmi_pm_resume)
+};
+
 static struct platform_driver gpmi_nand_driver = {
 	.driver = {
 		.name = "gpmi-nand",
+		.pm = &gpmi_pm_ops,
 		.of_match_table = gpmi_nand_id_table,
 	},
 	.probe   = gpmi_nand_probe,

diff --git a/drivers/mtd/nand/hisi504_nand.c b/drivers/mtd/nand/hisi504_nand.c
index f8d37f3..96502b6 100644
--- a/drivers/mtd/nand/hisi504_nand.c
+++ b/drivers/mtd/nand/hisi504_nand.c

@@ -632,7 +632,6 @@
 }
 
 static struct nand_ecclayout nand_ecc_2K_16bits = {
-	.oobavail = 6,
 	.oobfree = { {2, 6} },
 };
 

diff --git a/drivers/mtd/nand/jz4740_nand.c b/drivers/mtd/nand/jz4740_nand.c
index b19d2a9..673ceb2 100644
--- a/drivers/mtd/nand/jz4740_nand.c
+++ b/drivers/mtd/nand/jz4740_nand.c

@@ -427,9 +427,6 @@
 	chip->ecc.strength	= 4;
 	chip->ecc.options	= NAND_ECC_GENERIC_ERASED_CHECK;
 
-	if (pdata)
-		chip->ecc.layout = pdata->ecc_layout;
-
 	chip->chip_delay = 50;
 	chip->cmd_ctrl = jz_nand_cmd_ctrl;
 	chip->select_chip = jz_nand_select_chip;

diff --git a/drivers/mtd/nand/lpc32xx_mlc.c b/drivers/mtd/nand/lpc32xx_mlc.c
index 9bc435d..d8c3e7a 100644
--- a/drivers/mtd/nand/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/lpc32xx_mlc.c

@@ -750,7 +750,7 @@
 	}
 
 	nand_chip->ecc.mode = NAND_ECC_HW;
-	nand_chip->ecc.size = mtd->writesize;
+	nand_chip->ecc.size = 512;
 	nand_chip->ecc.layout = &lpc32xx_nand_oob;
 	host->mlcsubpages = mtd->writesize / 512;
 

diff --git a/drivers/mtd/nand/mpc5121_nfc.c b/drivers/mtd/nand/mpc5121_nfc.c
index 6b93e89..5d7843f 100644
--- a/drivers/mtd/nand/mpc5121_nfc.c
+++ b/drivers/mtd/nand/mpc5121_nfc.c

@@ -626,7 +626,7 @@
 
 static int mpc5121_nfc_probe(struct platform_device *op)
 {
-	struct device_node *rootnode, *dn = op->dev.of_node;
+	struct device_node *dn = op->dev.of_node;
 	struct clk *clk;
 	struct device *dev = &op->dev;
 	struct mpc5121_nfc_prv *prv;
@@ -712,18 +712,15 @@
 	chip->ecc.mode = NAND_ECC_SOFT;
 
 	/* Support external chip-select logic on ADS5121 board */
-	rootnode = of_find_node_by_path("/");
-	if (of_device_is_compatible(rootnode, "fsl,mpc5121ads")) {
+	if (of_machine_is_compatible("fsl,mpc5121ads")) {
 		retval = ads5121_chipselect_init(mtd);
 		if (retval) {
 			dev_err(dev, "Chipselect init error!\n");
-			of_node_put(rootnode);
 			return retval;
 		}
 
 		chip->select_chip = ads5121_select_chip;
 	}
-	of_node_put(rootnode);
 
 	/* Enable NFC clock */
 	clk = devm_clk_get(dev, "ipg");

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index f2c8ff3..b6facac 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c

@@ -313,13 +313,12 @@
  * nand_block_bad - [DEFAULT] Read bad block marker from the chip
  * @mtd: MTD device structure
  * @ofs: offset from device start
- * @getchip: 0, if the chip is already selected
  *
  * Check, if the block is bad.
  */
-static int nand_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip)
+static int nand_block_bad(struct mtd_info *mtd, loff_t ofs)
 {
-	int page, chipnr, res = 0, i = 0;
+	int page, res = 0, i = 0;
 	struct nand_chip *chip = mtd_to_nand(mtd);
 	u16 bad;
 
@@ -328,15 +327,6 @@
 
 	page = (int)(ofs >> chip->page_shift) & chip->pagemask;
 
-	if (getchip) {
-		chipnr = (int)(ofs >> chip->chip_shift);
-
-		nand_get_device(mtd, FL_READING);
-
-		/* Select the NAND device */
-		chip->select_chip(mtd, chipnr);
-	}
-
 	do {
 		if (chip->options & NAND_BUSWIDTH_16) {
 			chip->cmdfunc(mtd, NAND_CMD_READOOB,
@@ -361,11 +351,6 @@
 		i++;
 	} while (!res && i < 2 && (chip->bbt_options & NAND_BBT_SCAN2NDPAGE));
 
-	if (getchip) {
-		chip->select_chip(mtd, -1);
-		nand_release_device(mtd);
-	}
-
 	return res;
 }
 
@@ -503,19 +488,17 @@
  * nand_block_checkbad - [GENERIC] Check if a block is marked bad
  * @mtd: MTD device structure
  * @ofs: offset from device start
- * @getchip: 0, if the chip is already selected
  * @allowbbt: 1, if its allowed to access the bbt area
  *
  * Check, if the block is bad. Either by reading the bad block table or
  * calling of the scan function.
  */
-static int nand_block_checkbad(struct mtd_info *mtd, loff_t ofs, int getchip,
-			       int allowbbt)
+static int nand_block_checkbad(struct mtd_info *mtd, loff_t ofs, int allowbbt)
 {
 	struct nand_chip *chip = mtd_to_nand(mtd);
 
 	if (!chip->bbt)
-		return chip->block_bad(mtd, ofs, getchip);
+		return chip->block_bad(mtd, ofs);
 
 	/* Return info from the table */
 	return nand_isbad_bbt(mtd, ofs, allowbbt);
@@ -566,8 +549,8 @@
 		cond_resched();
 	} while (time_before(jiffies, timeo));
 
-	pr_warn_ratelimited(
-		"timeout while waiting for chip to become ready\n");
+	if (!chip->dev_ready(mtd))
+		pr_warn_ratelimited("timeout while waiting for chip to become ready\n");
 out:
 	led_trigger_event(nand_led_trigger, LED_OFF);
 }
@@ -1723,8 +1706,7 @@
 	int ret = 0;
 	uint32_t readlen = ops->len;
 	uint32_t oobreadlen = ops->ooblen;
-	uint32_t max_oobsize = ops->mode == MTD_OPS_AUTO_OOB ?
-		mtd->oobavail : mtd->oobsize;
+	uint32_t max_oobsize = mtd_oobavail(mtd, ops);
 
 	uint8_t *bufpoi, *oob, *buf;
 	int use_bufpoi;
@@ -2075,10 +2057,7 @@
 
 	stats = mtd->ecc_stats;
 
-	if (ops->mode == MTD_OPS_AUTO_OOB)
-		len = chip->ecc.layout->oobavail;
-	else
-		len = mtd->oobsize;
+	len = mtd_oobavail(mtd, ops);
 
 	if (unlikely(ops->ooboffs >= len)) {
 		pr_debug("%s: attempt to start read outside oob\n",
@@ -2575,8 +2554,7 @@
 	uint32_t writelen = ops->len;
 
 	uint32_t oobwritelen = ops->ooblen;
-	uint32_t oobmaxlen = ops->mode == MTD_OPS_AUTO_OOB ?
-				mtd->oobavail : mtd->oobsize;
+	uint32_t oobmaxlen = mtd_oobavail(mtd, ops);
 
 	uint8_t *oob = ops->oobbuf;
 	uint8_t *buf = ops->datbuf;
@@ -2766,10 +2744,7 @@
 	pr_debug("%s: to = 0x%08x, len = %i\n",
 			 __func__, (unsigned int)to, (int)ops->ooblen);
 
-	if (ops->mode == MTD_OPS_AUTO_OOB)
-		len = chip->ecc.layout->oobavail;
-	else
-		len = mtd->oobsize;
+	len = mtd_oobavail(mtd, ops);
 
 	/* Do not allow write past end of page */
 	if ((ops->ooboffs + ops->ooblen) > len) {
@@ -2957,7 +2932,7 @@
 	while (len) {
 		/* Check if we have a bad block, we do not erase bad blocks! */
 		if (nand_block_checkbad(mtd, ((loff_t) page) <<
-					chip->page_shift, 0, allowbbt)) {
+					chip->page_shift, allowbbt)) {
 			pr_warn("%s: attempt to erase a bad block at page 0x%08x\n",
 				    __func__, page);
 			instr->state = MTD_ERASE_FAILED;
@@ -3044,7 +3019,20 @@
  */
 static int nand_block_isbad(struct mtd_info *mtd, loff_t offs)
 {
-	return nand_block_checkbad(mtd, offs, 1, 0);
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	int chipnr = (int)(offs >> chip->chip_shift);
+	int ret;
+
+	/* Select the NAND device */
+	nand_get_device(mtd, FL_READING);
+	chip->select_chip(mtd, chipnr);
+
+	ret = nand_block_checkbad(mtd, offs, 0);
+
+	chip->select_chip(mtd, -1);
+	nand_release_device(mtd);
+
+	return ret;
 }
 
 /**
@@ -4287,10 +4275,8 @@
 		}
 
 		/* See nand_bch_init() for details. */
-		ecc->bytes = DIV_ROUND_UP(
-				ecc->strength * fls(8 * ecc->size), 8);
-		ecc->priv = nand_bch_init(mtd, ecc->size, ecc->bytes,
-					       &ecc->layout);
+		ecc->bytes = 0;
+		ecc->priv = nand_bch_init(mtd);
 		if (!ecc->priv) {
 			pr_warn("BCH ECC initialization failed!\n");
 			BUG();
@@ -4325,11 +4311,11 @@
 	 * The number of bytes available for a client to place data into
 	 * the out of band area.
 	 */
-	ecc->layout->oobavail = 0;
-	for (i = 0; ecc->layout->oobfree[i].length
-			&& i < ARRAY_SIZE(ecc->layout->oobfree); i++)
-		ecc->layout->oobavail += ecc->layout->oobfree[i].length;
-	mtd->oobavail = ecc->layout->oobavail;
+	mtd->oobavail = 0;
+	if (ecc->layout) {
+		for (i = 0; ecc->layout->oobfree[i].length; i++)
+			mtd->oobavail += ecc->layout->oobfree[i].length;
+	}
 
 	/* ECC sanity check: warn if it's too weak */
 	if (!nand_ecc_strength_good(mtd))

diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index 4b6a708..2fbb523 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c

@@ -1373,5 +1373,3 @@
 
 	return ret;
 }
-
-EXPORT_SYMBOL(nand_scan_bbt);

diff --git a/drivers/mtd/nand/nand_bch.c b/drivers/mtd/nand/nand_bch.c
index a87c1b6..b585bae 100644
--- a/drivers/mtd/nand/nand_bch.c
+++ b/drivers/mtd/nand/nand_bch.c

@@ -107,9 +107,6 @@
 /**
  * nand_bch_init - [NAND Interface] Initialize NAND BCH error correction
  * @mtd:	MTD block structure
- * @eccsize:	ecc block size in bytes
- * @eccbytes:	ecc length in bytes
- * @ecclayout:	output default layout
  *
  * Returns:
  *  a pointer to a new NAND BCH control structure, or NULL upon failure
@@ -123,14 +120,21 @@
  * @eccsize = 512  (thus, m=13 is the smallest integer such that 2^m-1 > 512*8)
  * @eccbytes = 7   (7 bytes are required to store m*t = 13*4 = 52 bits)
  */
-struct nand_bch_control *
-nand_bch_init(struct mtd_info *mtd, unsigned int eccsize, unsigned int eccbytes,
-	      struct nand_ecclayout **ecclayout)
+struct nand_bch_control *nand_bch_init(struct mtd_info *mtd)
 {
+	struct nand_chip *nand = mtd_to_nand(mtd);
 	unsigned int m, t, eccsteps, i;
-	struct nand_ecclayout *layout;
+	struct nand_ecclayout *layout = nand->ecc.layout;
 	struct nand_bch_control *nbc = NULL;
 	unsigned char *erased_page;
+	unsigned int eccsize = nand->ecc.size;
+	unsigned int eccbytes = nand->ecc.bytes;
+	unsigned int eccstrength = nand->ecc.strength;
+
+	if (!eccbytes && eccstrength) {
+		eccbytes = DIV_ROUND_UP(eccstrength * fls(8 * eccsize), 8);
+		nand->ecc.bytes = eccbytes;
+	}
 
 	if (!eccsize || !eccbytes) {
 		printk(KERN_WARNING "ecc parameters not supplied\n");
@@ -158,7 +162,7 @@
 	eccsteps = mtd->writesize/eccsize;
 
 	/* if no ecc placement scheme was provided, build one */
-	if (!*ecclayout) {
+	if (!layout) {
 
 		/* handle large page devices only */
 		if (mtd->oobsize < 64) {
@@ -184,7 +188,7 @@
 		layout->oobfree[0].offset = 2;
 		layout->oobfree[0].length = mtd->oobsize-2-layout->eccbytes;
 
-		*ecclayout = layout;
+		nand->ecc.layout = layout;
 	}
 
 	/* sanity checks */
@@ -192,7 +196,7 @@
 		printk(KERN_WARNING "eccsize %u is too large\n", eccsize);
 		goto fail;
 	}
-	if ((*ecclayout)->eccbytes != (eccsteps*eccbytes)) {
+	if (layout->eccbytes != (eccsteps*eccbytes)) {
 		printk(KERN_WARNING "invalid ecc layout\n");
 		goto fail;
 	}
@@ -216,6 +220,9 @@
 	for (i = 0; i < eccbytes; i++)
 		nbc->eccmask[i] ^= 0xff;
 
+	if (!eccstrength)
+		nand->ecc.strength = (eccbytes * 8) / fls(8 * eccsize);
+
 	return nbc;
 fail:
 	nand_bch_free(nbc);

diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index a8804a3..ccc05f5 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c

@@ -50,8 +50,8 @@
 		  SZ_16K, SZ_8K, SZ_4M, 0, 6, 1280, NAND_ECC_INFO(40, SZ_1K) },
 	{"H27UCG8T2ATR-BC 64G 3.3V 8-bit",
 		{ .id = {0xad, 0xde, 0x94, 0xda, 0x74, 0xc4} },
-		  SZ_8K, SZ_8K, SZ_2M, 0, 6, 640, NAND_ECC_INFO(40, SZ_1K),
-		  4 },
+		  SZ_8K, SZ_8K, SZ_2M, NAND_NEED_SCRAMBLING, 6, 640,
+		  NAND_ECC_INFO(40, SZ_1K), 4 },
 
 	LEGACY_ID_NAND("NAND 4MiB 5V 8-bit",   0x6B, 4, SZ_8K, SP_OPTIONS),
 	LEGACY_ID_NAND("NAND 4MiB 3,3V 8-bit", 0xE3, 4, SZ_8K, SP_OPTIONS),

diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index 1fd5195..a58169a2 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c

@@ -1339,7 +1339,7 @@
 	int i;
 
 	for (i = 0; i < ns->held_cnt; i++)
-		page_cache_release(ns->held_pages[i]);
+		put_page(ns->held_pages[i]);
 }
 
 /* Get page cache pages in advance to provide NOFS memory allocation */
@@ -1349,8 +1349,8 @@
 	struct page *page;
 	struct address_space *mapping = file->f_mapping;
 
-	start_index = pos >> PAGE_CACHE_SHIFT;
-	end_index = (pos + count - 1) >> PAGE_CACHE_SHIFT;
+	start_index = pos >> PAGE_SHIFT;
+	end_index = (pos + count - 1) >> PAGE_SHIFT;
 	if (end_index - start_index + 1 > NS_MAX_HELD_PAGES)
 		return -EINVAL;
 	ns->held_cnt = 0;

diff --git a/drivers/mtd/nand/nuc900_nand.c b/drivers/mtd/nand/nuc900_nand.c
index 220ddfc..dbc5b57 100644
--- a/drivers/mtd/nand/nuc900_nand.c
+++ b/drivers/mtd/nand/nuc900_nand.c

@@ -113,7 +113,7 @@
 {
 	unsigned int val;
 	spin_lock(&nand->lock);
-	val = __raw_readl(REG_SMISR);
+	val = __raw_readl(nand->reg + REG_SMISR);
 	val &= READYBUSY;
 	spin_unlock(&nand->lock);
 

diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index c553f78..0749ca1 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c

@@ -1807,13 +1807,19 @@
 		goto return_error;
 	}
 
+	/*
+	 * Bail out earlier to let NAND_ECC_SOFT code create its own
+	 * ecclayout instead of using ours.
+	 */
+	if (info->ecc_opt == OMAP_ECC_HAM1_CODE_SW) {
+		nand_chip->ecc.mode = NAND_ECC_SOFT;
+		goto scan_tail;
+	}
+
 	/* populate MTD interface based on ECC scheme */
 	ecclayout		= &info->oobinfo;
+	nand_chip->ecc.layout	= ecclayout;
 	switch (info->ecc_opt) {
-	case OMAP_ECC_HAM1_CODE_SW:
-		nand_chip->ecc.mode = NAND_ECC_SOFT;
-		break;
-
 	case OMAP_ECC_HAM1_CODE_HW:
 		pr_info("nand: using OMAP_ECC_HAM1_CODE_HW\n");
 		nand_chip->ecc.mode             = NAND_ECC_HW;
@@ -1861,10 +1867,7 @@
 		ecclayout->oobfree->offset	= 1 +
 				ecclayout->eccpos[ecclayout->eccbytes - 1] + 1;
 		/* software bch library is used for locating errors */
-		nand_chip->ecc.priv		= nand_bch_init(mtd,
-							nand_chip->ecc.size,
-							nand_chip->ecc.bytes,
-							&ecclayout);
+		nand_chip->ecc.priv		= nand_bch_init(mtd);
 		if (!nand_chip->ecc.priv) {
 			dev_err(&info->pdev->dev, "unable to use BCH library\n");
 			err = -EINVAL;
@@ -1925,10 +1928,7 @@
 		ecclayout->oobfree->offset	= 1 +
 				ecclayout->eccpos[ecclayout->eccbytes - 1] + 1;
 		/* software bch library is used for locating errors */
-		nand_chip->ecc.priv		= nand_bch_init(mtd,
-							nand_chip->ecc.size,
-							nand_chip->ecc.bytes,
-							&ecclayout);
+		nand_chip->ecc.priv		= nand_bch_init(mtd);
 		if (!nand_chip->ecc.priv) {
 			dev_err(&info->pdev->dev, "unable to use BCH library\n");
 			err = -EINVAL;
@@ -2002,9 +2002,6 @@
 		goto return_error;
 	}
 
-	if (info->ecc_opt == OMAP_ECC_HAM1_CODE_SW)
-		goto scan_tail;
-
 	/* all OOB bytes from oobfree->offset till end off OOB are free */
 	ecclayout->oobfree->length = mtd->oobsize - ecclayout->oobfree->offset;
 	/* check if NAND device's OOB is enough to store ECC signatures */
@@ -2015,7 +2012,6 @@
 		err = -EINVAL;
 		goto return_error;
 	}
-	nand_chip->ecc.layout = ecclayout;
 
 scan_tail:
 	/* second phase scan */

diff --git a/drivers/mtd/nand/plat_nand.c b/drivers/mtd/nand/plat_nand.c
index a0e26de..e4e50da 100644
--- a/drivers/mtd/nand/plat_nand.c
+++ b/drivers/mtd/nand/plat_nand.c

@@ -73,7 +73,6 @@
 	data->chip.bbt_options |= pdata->chip.bbt_options;
 
 	data->chip.ecc.hwctl = pdata->ctrl.hwcontrol;
-	data->chip.ecc.layout = pdata->chip.ecclayout;
 	data->chip.ecc.mode = NAND_ECC_SOFT;
 
 	platform_set_drvdata(pdev, data);

diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 86fc245..d650885 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c

@@ -131,11 +131,23 @@
 #define READ_ID_BYTES		7
 
 /* macros for registers read/write */
-#define nand_writel(info, off, val)	\
-	writel_relaxed((val), (info)->mmio_base + (off))
+#define nand_writel(info, off, val)					\
+	do {								\
+		dev_vdbg(&info->pdev->dev,				\
+			 "%s():%d nand_writel(0x%x, 0x%04x)\n",		\
+			 __func__, __LINE__, (val), (off));		\
+		writel_relaxed((val), (info)->mmio_base + (off));	\
+	} while (0)
 
-#define nand_readl(info, off)		\
-	readl_relaxed((info)->mmio_base + (off))
+#define nand_readl(info, off)						\
+	({								\
+		unsigned int _v;					\
+		_v = readl_relaxed((info)->mmio_base + (off));		\
+		dev_vdbg(&info->pdev->dev,				\
+			 "%s():%d nand_readl(0x%04x) = 0x%x\n",		\
+			 __func__, __LINE__, (off), _v);		\
+		_v;							\
+	})
 
 /* error code and state */
 enum {
@@ -199,7 +211,6 @@
 	struct dma_chan		*dma_chan;
 	dma_cookie_t		dma_cookie;
 	int			drcmr_dat;
-	int			drcmr_cmd;
 
 	unsigned char		*data_buff;
 	unsigned char		*oob_buff;
@@ -222,15 +233,44 @@
 	int			use_spare;	/* use spare ? */
 	int			need_wait;
 
-	unsigned int		data_size;	/* data to be read from FIFO */
-	unsigned int		chunk_size;	/* split commands chunk size */
-	unsigned int		oob_size;
+	/* Amount of real data per full chunk */
+	unsigned int		chunk_size;
+
+	/* Amount of spare data per full chunk */
 	unsigned int		spare_size;
+
+	/* Number of full chunks (i.e chunk_size + spare_size) */
+	unsigned int            nfullchunks;
+
+	/*
+	 * Total number of chunks. If equal to nfullchunks, then there
+	 * are only full chunks. Otherwise, there is one last chunk of
+	 * size (last_chunk_size + last_spare_size)
+	 */
+	unsigned int            ntotalchunks;
+
+	/* Amount of real data in the last chunk */
+	unsigned int		last_chunk_size;
+
+	/* Amount of spare data in the last chunk */
+	unsigned int		last_spare_size;
+
 	unsigned int		ecc_size;
 	unsigned int		ecc_err_cnt;
 	unsigned int		max_bitflips;
 	int 			retcode;
 
+	/*
+	 * Variables only valid during command
+	 * execution. step_chunk_size and step_spare_size is the
+	 * amount of real data and spare data in the current
+	 * chunk. cur_chunk is the current chunk being
+	 * read/programmed.
+	 */
+	unsigned int		step_chunk_size;
+	unsigned int		step_spare_size;
+	unsigned int            cur_chunk;
+
 	/* cached register value */
 	uint32_t		reg_ndcr;
 	uint32_t		ndtr0cs0;
@@ -526,25 +566,6 @@
 	return 0;
 }
 
-/*
- * Set the data and OOB size, depending on the selected
- * spare and ECC configuration.
- * Only applicable to READ0, READOOB and PAGEPROG commands.
- */
-static void pxa3xx_set_datasize(struct pxa3xx_nand_info *info,
-				struct mtd_info *mtd)
-{
-	int oob_enable = info->reg_ndcr & NDCR_SPARE_EN;
-
-	info->data_size = mtd->writesize;
-	if (!oob_enable)
-		return;
-
-	info->oob_size = info->spare_size;
-	if (!info->use_ecc)
-		info->oob_size += info->ecc_size;
-}
-
 /**
  * NOTE: it is a must to set ND_RUN firstly, then write
  * command buffer, otherwise, it does not work.
@@ -660,28 +681,28 @@
 
 static void handle_data_pio(struct pxa3xx_nand_info *info)
 {
-	unsigned int do_bytes = min(info->data_size, info->chunk_size);
-
 	switch (info->state) {
 	case STATE_PIO_WRITING:
-		writesl(info->mmio_base + NDDB,
-			info->data_buff + info->data_buff_pos,
-			DIV_ROUND_UP(do_bytes, 4));
+		if (info->step_chunk_size)
+			writesl(info->mmio_base + NDDB,
+				info->data_buff + info->data_buff_pos,
+				DIV_ROUND_UP(info->step_chunk_size, 4));
 
-		if (info->oob_size > 0)
+		if (info->step_spare_size)
 			writesl(info->mmio_base + NDDB,
 				info->oob_buff + info->oob_buff_pos,
-				DIV_ROUND_UP(info->oob_size, 4));
+				DIV_ROUND_UP(info->step_spare_size, 4));
 		break;
 	case STATE_PIO_READING:
-		drain_fifo(info,
-			   info->data_buff + info->data_buff_pos,
-			   DIV_ROUND_UP(do_bytes, 4));
+		if (info->step_chunk_size)
+			drain_fifo(info,
+				   info->data_buff + info->data_buff_pos,
+				   DIV_ROUND_UP(info->step_chunk_size, 4));
 
-		if (info->oob_size > 0)
+		if (info->step_spare_size)
 			drain_fifo(info,
 				   info->oob_buff + info->oob_buff_pos,
-				   DIV_ROUND_UP(info->oob_size, 4));
+				   DIV_ROUND_UP(info->step_spare_size, 4));
 		break;
 	default:
 		dev_err(&info->pdev->dev, "%s: invalid state %d\n", __func__,
@@ -690,9 +711,8 @@
 	}
 
 	/* Update buffer pointers for multi-page read/write */
-	info->data_buff_pos += do_bytes;
-	info->oob_buff_pos += info->oob_size;
-	info->data_size -= do_bytes;
+	info->data_buff_pos += info->step_chunk_size;
+	info->oob_buff_pos += info->step_spare_size;
 }
 
 static void pxa3xx_nand_data_dma_irq(void *data)
@@ -733,8 +753,9 @@
 				info->state);
 		BUG();
 	}
-	info->sg.length = info->data_size +
-		(info->oob_size ? info->spare_size + info->ecc_size : 0);
+	info->sg.length = info->chunk_size;
+	if (info->use_spare)
+		info->sg.length += info->spare_size + info->ecc_size;
 	dma_map_sg(info->dma_chan->device->dev, &info->sg, 1, info->dma_dir);
 
 	tx = dmaengine_prep_slave_sg(info->dma_chan, &info->sg, 1, direction,
@@ -895,9 +916,11 @@
 	/* reset data and oob column point to handle data */
 	info->buf_start		= 0;
 	info->buf_count		= 0;
-	info->oob_size		= 0;
 	info->data_buff_pos	= 0;
 	info->oob_buff_pos	= 0;
+	info->step_chunk_size   = 0;
+	info->step_spare_size   = 0;
+	info->cur_chunk         = 0;
 	info->use_ecc		= 0;
 	info->use_spare		= 1;
 	info->retcode		= ERR_NONE;
@@ -909,8 +932,6 @@
 	case NAND_CMD_READ0:
 	case NAND_CMD_PAGEPROG:
 		info->use_ecc = 1;
-	case NAND_CMD_READOOB:
-		pxa3xx_set_datasize(info, mtd);
 		break;
 	case NAND_CMD_PARAM:
 		info->use_spare = 0;
@@ -969,6 +990,14 @@
 		if (command == NAND_CMD_READOOB)
 			info->buf_start += mtd->writesize;
 
+		if (info->cur_chunk < info->nfullchunks) {
+			info->step_chunk_size = info->chunk_size;
+			info->step_spare_size = info->spare_size;
+		} else {
+			info->step_chunk_size = info->last_chunk_size;
+			info->step_spare_size = info->last_spare_size;
+		}
+
 		/*
 		 * Multiple page read needs an 'extended command type' field,
 		 * which is either naked-read or last-read according to the
@@ -980,8 +1009,8 @@
 			info->ndcb0 |= NDCB0_DBC | (NAND_CMD_READSTART << 8)
 					| NDCB0_LEN_OVRD
 					| NDCB0_EXT_CMD_TYPE(ext_cmd_type);
-			info->ndcb3 = info->chunk_size +
-				      info->oob_size;
+			info->ndcb3 = info->step_chunk_size +
+				info->step_spare_size;
 		}
 
 		set_command_address(info, mtd->writesize, column, page_addr);
@@ -1001,8 +1030,6 @@
 				| NDCB0_EXT_CMD_TYPE(ext_cmd_type)
 				| addr_cycle
 				| command;
-			/* No data transfer in this case */
-			info->data_size = 0;
 			exec_cmd = 1;
 		}
 		break;
@@ -1014,6 +1041,14 @@
 			break;
 		}
 
+		if (info->cur_chunk < info->nfullchunks) {
+			info->step_chunk_size = info->chunk_size;
+			info->step_spare_size = info->spare_size;
+		} else {
+			info->step_chunk_size = info->last_chunk_size;
+			info->step_spare_size = info->last_spare_size;
+		}
+
 		/* Second command setting for large pages */
 		if (mtd->writesize > PAGE_CHUNK_SIZE) {
 			/*
@@ -1024,14 +1059,14 @@
 			info->ndcb0 |= NDCB0_CMD_TYPE(0x1)
 					| NDCB0_LEN_OVRD
 					| NDCB0_EXT_CMD_TYPE(ext_cmd_type);
-			info->ndcb3 = info->chunk_size +
-				      info->oob_size;
+			info->ndcb3 = info->step_chunk_size +
+				      info->step_spare_size;
 
 			/*
 			 * This is the command dispatch that completes a chunked
 			 * page program operation.
 			 */
-			if (info->data_size == 0) {
+			if (info->cur_chunk == info->ntotalchunks) {
 				info->ndcb0 = NDCB0_CMD_TYPE(0x1)
 					| NDCB0_EXT_CMD_TYPE(ext_cmd_type)
 					| command;
@@ -1058,7 +1093,7 @@
 				| command;
 		info->ndcb1 = (column & 0xFF);
 		info->ndcb3 = INIT_BUFFER_SIZE;
-		info->data_size = INIT_BUFFER_SIZE;
+		info->step_chunk_size = INIT_BUFFER_SIZE;
 		break;
 
 	case NAND_CMD_READID:
@@ -1068,7 +1103,7 @@
 				| command;
 		info->ndcb1 = (column & 0xFF);
 
-		info->data_size = 8;
+		info->step_chunk_size = 8;
 		break;
 	case NAND_CMD_STATUS:
 		info->buf_count = 1;
@@ -1076,7 +1111,7 @@
 				| NDCB0_ADDR_CYC(1)
 				| command;
 
-		info->data_size = 8;
+		info->step_chunk_size = 8;
 		break;
 
 	case NAND_CMD_ERASE1:
@@ -1217,6 +1252,7 @@
 	init_completion(&info->dev_ready);
 	do {
 		info->state = STATE_PREPARED;
+
 		exec_cmd = prepare_set_command(info, command, ext_cmd_type,
 					       column, page_addr);
 		if (!exec_cmd) {
@@ -1236,22 +1272,30 @@
 			break;
 		}
 
+		/* Only a few commands need several steps */
+		if (command != NAND_CMD_PAGEPROG &&
+		    command != NAND_CMD_READ0    &&
+		    command != NAND_CMD_READOOB)
+			break;
+
+		info->cur_chunk++;
+
 		/* Check if the sequence is complete */
-		if (info->data_size == 0 && command != NAND_CMD_PAGEPROG)
+		if (info->cur_chunk == info->ntotalchunks && command != NAND_CMD_PAGEPROG)
 			break;
 
 		/*
 		 * After a splitted program command sequence has issued
 		 * the command dispatch, the command sequence is complete.
 		 */
-		if (info->data_size == 0 &&
+		if (info->cur_chunk == (info->ntotalchunks + 1) &&
 		    command == NAND_CMD_PAGEPROG &&
 		    ext_cmd_type == EXT_CMD_TYPE_DISPATCH)
 			break;
 
 		if (command == NAND_CMD_READ0 || command == NAND_CMD_READOOB) {
 			/* Last read: issue a 'last naked read' */
-			if (info->data_size == info->chunk_size)
+			if (info->cur_chunk == info->ntotalchunks - 1)
 				ext_cmd_type = EXT_CMD_TYPE_LAST_RW;
 			else
 				ext_cmd_type = EXT_CMD_TYPE_NAKED_RW;
@@ -1261,7 +1305,7 @@
 		 * the command dispatch must be issued to complete.
 		 */
 		} else if (command == NAND_CMD_PAGEPROG &&
-			   info->data_size == 0) {
+			   info->cur_chunk == info->ntotalchunks) {
 				ext_cmd_type = EXT_CMD_TYPE_DISPATCH;
 		}
 	} while (1);
@@ -1506,6 +1550,8 @@
 			int strength, int ecc_stepsize, int page_size)
 {
 	if (strength == 1 && ecc_stepsize == 512 && page_size == 2048) {
+		info->nfullchunks = 1;
+		info->ntotalchunks = 1;
 		info->chunk_size = 2048;
 		info->spare_size = 40;
 		info->ecc_size = 24;
@@ -1514,6 +1560,8 @@
 		ecc->strength = 1;
 
 	} else if (strength == 1 && ecc_stepsize == 512 && page_size == 512) {
+		info->nfullchunks = 1;
+		info->ntotalchunks = 1;
 		info->chunk_size = 512;
 		info->spare_size = 8;
 		info->ecc_size = 8;
@@ -1527,6 +1575,8 @@
 	 */
 	} else if (strength == 4 && ecc_stepsize == 512 && page_size == 2048) {
 		info->ecc_bch = 1;
+		info->nfullchunks = 1;
+		info->ntotalchunks = 1;
 		info->chunk_size = 2048;
 		info->spare_size = 32;
 		info->ecc_size = 32;
@@ -1537,6 +1587,8 @@
 
 	} else if (strength == 4 && ecc_stepsize == 512 && page_size == 4096) {
 		info->ecc_bch = 1;
+		info->nfullchunks = 2;
+		info->ntotalchunks = 2;
 		info->chunk_size = 2048;
 		info->spare_size = 32;
 		info->ecc_size = 32;
@@ -1551,8 +1603,12 @@
 	 */
 	} else if (strength == 8 && ecc_stepsize == 512 && page_size == 4096) {
 		info->ecc_bch = 1;
+		info->nfullchunks = 4;
+		info->ntotalchunks = 5;
 		info->chunk_size = 1024;
 		info->spare_size = 0;
+		info->last_chunk_size = 0;
+		info->last_spare_size = 64;
 		info->ecc_size = 32;
 		ecc->mode = NAND_ECC_HW;
 		ecc->size = info->chunk_size;
@@ -1738,7 +1794,7 @@
 	if (ret < 0)
 		return ret;
 
-	if (use_dma) {
+	if (!np && use_dma) {
 		r = platform_get_resource(pdev, IORESOURCE_DMA, 0);
 		if (r == NULL) {
 			dev_err(&pdev->dev,
@@ -1747,15 +1803,6 @@
 			goto fail_disable_clk;
 		}
 		info->drcmr_dat = r->start;
-
-		r = platform_get_resource(pdev, IORESOURCE_DMA, 1);
-		if (r == NULL) {
-			dev_err(&pdev->dev,
-				"no resource defined for cmd DMA\n");
-			ret = -ENXIO;
-			goto fail_disable_clk;
-		}
-		info->drcmr_cmd = r->start;
 	}
 
 	irq = platform_get_irq(pdev, 0);

diff --git a/drivers/mtd/nand/qcom_nandc.c b/drivers/mtd/nand/qcom_nandc.c
new file mode 100644
index 0000000..f550a57
--- /dev/null
+++ b/drivers/mtd/nand/qcom_nandc.c

@@ -0,0 +1,2223 @@
+/*
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/slab.h>
+#include <linux/bitops.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/module.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_mtd.h>
+#include <linux/delay.h>
+
+/* NANDc reg offsets */
+#define	NAND_FLASH_CMD			0x00
+#define	NAND_ADDR0			0x04
+#define	NAND_ADDR1			0x08
+#define	NAND_FLASH_CHIP_SELECT		0x0c
+#define	NAND_EXEC_CMD			0x10
+#define	NAND_FLASH_STATUS		0x14
+#define	NAND_BUFFER_STATUS		0x18
+#define	NAND_DEV0_CFG0			0x20
+#define	NAND_DEV0_CFG1			0x24
+#define	NAND_DEV0_ECC_CFG		0x28
+#define	NAND_DEV1_ECC_CFG		0x2c
+#define	NAND_DEV1_CFG0			0x30
+#define	NAND_DEV1_CFG1			0x34
+#define	NAND_READ_ID			0x40
+#define	NAND_READ_STATUS		0x44
+#define	NAND_DEV_CMD0			0xa0
+#define	NAND_DEV_CMD1			0xa4
+#define	NAND_DEV_CMD2			0xa8
+#define	NAND_DEV_CMD_VLD		0xac
+#define	SFLASHC_BURST_CFG		0xe0
+#define	NAND_ERASED_CW_DETECT_CFG	0xe8
+#define	NAND_ERASED_CW_DETECT_STATUS	0xec
+#define	NAND_EBI2_ECC_BUF_CFG		0xf0
+#define	FLASH_BUF_ACC			0x100
+
+#define	NAND_CTRL			0xf00
+#define	NAND_VERSION			0xf08
+#define	NAND_READ_LOCATION_0		0xf20
+#define	NAND_READ_LOCATION_1		0xf24
+
+/* dummy register offsets, used by write_reg_dma */
+#define	NAND_DEV_CMD1_RESTORE		0xdead
+#define	NAND_DEV_CMD_VLD_RESTORE	0xbeef
+
+/* NAND_FLASH_CMD bits */
+#define	PAGE_ACC			BIT(4)
+#define	LAST_PAGE			BIT(5)
+
+/* NAND_FLASH_CHIP_SELECT bits */
+#define	NAND_DEV_SEL			0
+#define	DM_EN				BIT(2)
+
+/* NAND_FLASH_STATUS bits */
+#define	FS_OP_ERR			BIT(4)
+#define	FS_READY_BSY_N			BIT(5)
+#define	FS_MPU_ERR			BIT(8)
+#define	FS_DEVICE_STS_ERR		BIT(16)
+#define	FS_DEVICE_WP			BIT(23)
+
+/* NAND_BUFFER_STATUS bits */
+#define	BS_UNCORRECTABLE_BIT		BIT(8)
+#define	BS_CORRECTABLE_ERR_MSK		0x1f
+
+/* NAND_DEVn_CFG0 bits */
+#define	DISABLE_STATUS_AFTER_WRITE	4
+#define	CW_PER_PAGE			6
+#define	UD_SIZE_BYTES			9
+#define	ECC_PARITY_SIZE_BYTES_RS	19
+#define	SPARE_SIZE_BYTES		23
+#define	NUM_ADDR_CYCLES			27
+#define	STATUS_BFR_READ			30
+#define	SET_RD_MODE_AFTER_STATUS	31
+
+/* NAND_DEVn_CFG0 bits */
+#define	DEV0_CFG1_ECC_DISABLE		0
+#define	WIDE_FLASH			1
+#define	NAND_RECOVERY_CYCLES		2
+#define	CS_ACTIVE_BSY			5
+#define	BAD_BLOCK_BYTE_NUM		6
+#define	BAD_BLOCK_IN_SPARE_AREA		16
+#define	WR_RD_BSY_GAP			17
+#define	ENABLE_BCH_ECC			27
+
+/* NAND_DEV0_ECC_CFG bits */
+#define	ECC_CFG_ECC_DISABLE		0
+#define	ECC_SW_RESET			1
+#define	ECC_MODE			4
+#define	ECC_PARITY_SIZE_BYTES_BCH	8
+#define	ECC_NUM_DATA_BYTES		16
+#define	ECC_FORCE_CLK_OPEN		30
+
+/* NAND_DEV_CMD1 bits */
+#define	READ_ADDR			0
+
+/* NAND_DEV_CMD_VLD bits */
+#define	READ_START_VLD			0
+
+/* NAND_EBI2_ECC_BUF_CFG bits */
+#define	NUM_STEPS			0
+
+/* NAND_ERASED_CW_DETECT_CFG bits */
+#define	ERASED_CW_ECC_MASK		1
+#define	AUTO_DETECT_RES			0
+#define	MASK_ECC			(1 << ERASED_CW_ECC_MASK)
+#define	RESET_ERASED_DET		(1 << AUTO_DETECT_RES)
+#define	ACTIVE_ERASED_DET		(0 << AUTO_DETECT_RES)
+#define	CLR_ERASED_PAGE_DET		(RESET_ERASED_DET | MASK_ECC)
+#define	SET_ERASED_PAGE_DET		(ACTIVE_ERASED_DET | MASK_ECC)
+
+/* NAND_ERASED_CW_DETECT_STATUS bits */
+#define	PAGE_ALL_ERASED			BIT(7)
+#define	CODEWORD_ALL_ERASED		BIT(6)
+#define	PAGE_ERASED			BIT(5)
+#define	CODEWORD_ERASED			BIT(4)
+#define	ERASED_PAGE			(PAGE_ALL_ERASED | PAGE_ERASED)
+#define	ERASED_CW			(CODEWORD_ALL_ERASED | CODEWORD_ERASED)
+
+/* Version Mask */
+#define	NAND_VERSION_MAJOR_MASK		0xf0000000
+#define	NAND_VERSION_MAJOR_SHIFT	28
+#define	NAND_VERSION_MINOR_MASK		0x0fff0000
+#define	NAND_VERSION_MINOR_SHIFT	16
+
+/* NAND OP_CMDs */
+#define	PAGE_READ			0x2
+#define	PAGE_READ_WITH_ECC		0x3
+#define	PAGE_READ_WITH_ECC_SPARE	0x4
+#define	PROGRAM_PAGE			0x6
+#define	PAGE_PROGRAM_WITH_ECC		0x7
+#define	PROGRAM_PAGE_SPARE		0x9
+#define	BLOCK_ERASE			0xa
+#define	FETCH_ID			0xb
+#define	RESET_DEVICE			0xd
+
+/*
+ * the NAND controller performs reads/writes with ECC in 516 byte chunks.
+ * the driver calls the chunks 'step' or 'codeword' interchangeably
+ */
+#define	NANDC_STEP_SIZE			512
+
+/*
+ * the largest page size we support is 8K, this will have 16 steps/codewords
+ * of 512 bytes each
+ */
+#define	MAX_NUM_STEPS			(SZ_8K / NANDC_STEP_SIZE)
+
+/* we read at most 3 registers per codeword scan */
+#define	MAX_REG_RD			(3 * MAX_NUM_STEPS)
+
+/* ECC modes supported by the controller */
+#define	ECC_NONE	BIT(0)
+#define	ECC_RS_4BIT	BIT(1)
+#define	ECC_BCH_4BIT	BIT(2)
+#define	ECC_BCH_8BIT	BIT(3)
+
+struct desc_info {
+	struct list_head node;
+
+	enum dma_data_direction dir;
+	struct scatterlist sgl;
+	struct dma_async_tx_descriptor *dma_desc;
+};
+
+/*
+ * holds the current register values that we want to write. acts as a contiguous
+ * chunk of memory which we use to write the controller registers through DMA.
+ */
+struct nandc_regs {
+	__le32 cmd;
+	__le32 addr0;
+	__le32 addr1;
+	__le32 chip_sel;
+	__le32 exec;
+
+	__le32 cfg0;
+	__le32 cfg1;
+	__le32 ecc_bch_cfg;
+
+	__le32 clrflashstatus;
+	__le32 clrreadstatus;
+
+	__le32 cmd1;
+	__le32 vld;
+
+	__le32 orig_cmd1;
+	__le32 orig_vld;
+
+	__le32 ecc_buf_cfg;
+};
+
+/*
+ * NAND controller data struct
+ *
+ * @controller:			base controller structure
+ * @host_list:			list containing all the chips attached to the
+ *				controller
+ * @dev:			parent device
+ * @base:			MMIO base
+ * @base_dma:			physical base address of controller registers
+ * @core_clk:			controller clock
+ * @aon_clk:			another controller clock
+ *
+ * @chan:			dma channel
+ * @cmd_crci:			ADM DMA CRCI for command flow control
+ * @data_crci:			ADM DMA CRCI for data flow control
+ * @desc_list:			DMA descriptor list (list of desc_infos)
+ *
+ * @data_buffer:		our local DMA buffer for page read/writes,
+ *				used when we can't use the buffer provided
+ *				by upper layers directly
+ * @buf_size/count/start:	markers for chip->read_buf/write_buf functions
+ * @reg_read_buf:		local buffer for reading back registers via DMA
+ * @reg_read_pos:		marker for data read in reg_read_buf
+ *
+ * @regs:			a contiguous chunk of memory for DMA register
+ *				writes. contains the register values to be
+ *				written to controller
+ * @cmd1/vld:			some fixed controller register values
+ * @ecc_modes:			supported ECC modes by the current controller,
+ *				initialized via DT match data
+ */
+struct qcom_nand_controller {
+	struct nand_hw_control controller;
+	struct list_head host_list;
+
+	struct device *dev;
+
+	void __iomem *base;
+	dma_addr_t base_dma;
+
+	struct clk *core_clk;
+	struct clk *aon_clk;
+
+	struct dma_chan *chan;
+	unsigned int cmd_crci;
+	unsigned int data_crci;
+	struct list_head desc_list;
+
+	u8		*data_buffer;
+	int		buf_size;
+	int		buf_count;
+	int		buf_start;
+
+	__le32 *reg_read_buf;
+	int reg_read_pos;
+
+	struct nandc_regs *regs;
+
+	u32 cmd1, vld;
+	u32 ecc_modes;
+};
+
+/*
+ * NAND chip structure
+ *
+ * @chip:			base NAND chip structure
+ * @node:			list node to add itself to host_list in
+ *				qcom_nand_controller
+ *
+ * @cs:				chip select value for this chip
+ * @cw_size:			the number of bytes in a single step/codeword
+ *				of a page, consisting of all data, ecc, spare
+ *				and reserved bytes
+ * @cw_data:			the number of bytes within a codeword protected
+ *				by ECC
+ * @use_ecc:			request the controller to use ECC for the
+ *				upcoming read/write
+ * @bch_enabled:		flag to tell whether BCH ECC mode is used
+ * @ecc_bytes_hw:		ECC bytes used by controller hardware for this
+ *				chip
+ * @status:			value to be returned if NAND_CMD_STATUS command
+ *				is executed
+ * @last_command:		keeps track of last command on this chip. used
+ *				for reading correct status
+ *
+ * @cfg0, cfg1, cfg0_raw..:	NANDc register configurations needed for
+ *				ecc/non-ecc mode for the current nand flash
+ *				device
+ */
+struct qcom_nand_host {
+	struct nand_chip chip;
+	struct list_head node;
+
+	int cs;
+	int cw_size;
+	int cw_data;
+	bool use_ecc;
+	bool bch_enabled;
+	int ecc_bytes_hw;
+	int spare_bytes;
+	int bbm_size;
+	u8 status;
+	int last_command;
+
+	u32 cfg0, cfg1;
+	u32 cfg0_raw, cfg1_raw;
+	u32 ecc_buf_cfg;
+	u32 ecc_bch_cfg;
+	u32 clrflashstatus;
+	u32 clrreadstatus;
+};
+
+static inline struct qcom_nand_host *to_qcom_nand_host(struct nand_chip *chip)
+{
+	return container_of(chip, struct qcom_nand_host, chip);
+}
+
+static inline struct qcom_nand_controller *
+get_qcom_nand_controller(struct nand_chip *chip)
+{
+	return container_of(chip->controller, struct qcom_nand_controller,
+			    controller);
+}
+
+static inline u32 nandc_read(struct qcom_nand_controller *nandc, int offset)
+{
+	return ioread32(nandc->base + offset);
+}
+
+static inline void nandc_write(struct qcom_nand_controller *nandc, int offset,
+			       u32 val)
+{
+	iowrite32(val, nandc->base + offset);
+}
+
+static __le32 *offset_to_nandc_reg(struct nandc_regs *regs, int offset)
+{
+	switch (offset) {
+	case NAND_FLASH_CMD:
+		return &regs->cmd;
+	case NAND_ADDR0:
+		return &regs->addr0;
+	case NAND_ADDR1:
+		return &regs->addr1;
+	case NAND_FLASH_CHIP_SELECT:
+		return &regs->chip_sel;
+	case NAND_EXEC_CMD:
+		return &regs->exec;
+	case NAND_FLASH_STATUS:
+		return &regs->clrflashstatus;
+	case NAND_DEV0_CFG0:
+		return &regs->cfg0;
+	case NAND_DEV0_CFG1:
+		return &regs->cfg1;
+	case NAND_DEV0_ECC_CFG:
+		return &regs->ecc_bch_cfg;
+	case NAND_READ_STATUS:
+		return &regs->clrreadstatus;
+	case NAND_DEV_CMD1:
+		return &regs->cmd1;
+	case NAND_DEV_CMD1_RESTORE:
+		return &regs->orig_cmd1;
+	case NAND_DEV_CMD_VLD:
+		return &regs->vld;
+	case NAND_DEV_CMD_VLD_RESTORE:
+		return &regs->orig_vld;
+	case NAND_EBI2_ECC_BUF_CFG:
+		return &regs->ecc_buf_cfg;
+	default:
+		return NULL;
+	}
+}
+
+static void nandc_set_reg(struct qcom_nand_controller *nandc, int offset,
+			  u32 val)
+{
+	struct nandc_regs *regs = nandc->regs;
+	__le32 *reg;
+
+	reg = offset_to_nandc_reg(regs, offset);
+
+	if (reg)
+		*reg = cpu_to_le32(val);
+}
+
+/* helper to configure address register values */
+static void set_address(struct qcom_nand_host *host, u16 column, int page)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+
+	if (chip->options & NAND_BUSWIDTH_16)
+		column >>= 1;
+
+	nandc_set_reg(nandc, NAND_ADDR0, page << 16 | column);
+	nandc_set_reg(nandc, NAND_ADDR1, page >> 16 & 0xff);
+}
+
+/*
+ * update_rw_regs:	set up read/write register values, these will be
+ *			written to the NAND controller registers via DMA
+ *
+ * @num_cw:		number of steps for the read/write operation
+ * @read:		read or write operation
+ */
+static void update_rw_regs(struct qcom_nand_host *host, int num_cw, bool read)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	u32 cmd, cfg0, cfg1, ecc_bch_cfg;
+
+	if (read) {
+		if (host->use_ecc)
+			cmd = PAGE_READ_WITH_ECC | PAGE_ACC | LAST_PAGE;
+		else
+			cmd = PAGE_READ | PAGE_ACC | LAST_PAGE;
+	} else {
+			cmd = PROGRAM_PAGE | PAGE_ACC | LAST_PAGE;
+	}
+
+	if (host->use_ecc) {
+		cfg0 = (host->cfg0 & ~(7U << CW_PER_PAGE)) |
+				(num_cw - 1) << CW_PER_PAGE;
+
+		cfg1 = host->cfg1;
+		ecc_bch_cfg = host->ecc_bch_cfg;
+	} else {
+		cfg0 = (host->cfg0_raw & ~(7U << CW_PER_PAGE)) |
+				(num_cw - 1) << CW_PER_PAGE;
+
+		cfg1 = host->cfg1_raw;
+		ecc_bch_cfg = 1 << ECC_CFG_ECC_DISABLE;
+	}
+
+	nandc_set_reg(nandc, NAND_FLASH_CMD, cmd);
+	nandc_set_reg(nandc, NAND_DEV0_CFG0, cfg0);
+	nandc_set_reg(nandc, NAND_DEV0_CFG1, cfg1);
+	nandc_set_reg(nandc, NAND_DEV0_ECC_CFG, ecc_bch_cfg);
+	nandc_set_reg(nandc, NAND_EBI2_ECC_BUF_CFG, host->ecc_buf_cfg);
+	nandc_set_reg(nandc, NAND_FLASH_STATUS, host->clrflashstatus);
+	nandc_set_reg(nandc, NAND_READ_STATUS, host->clrreadstatus);
+	nandc_set_reg(nandc, NAND_EXEC_CMD, 1);
+}
+
+static int prep_dma_desc(struct qcom_nand_controller *nandc, bool read,
+			 int reg_off, const void *vaddr, int size,
+			 bool flow_control)
+{
+	struct desc_info *desc;
+	struct dma_async_tx_descriptor *dma_desc;
+	struct scatterlist *sgl;
+	struct dma_slave_config slave_conf;
+	enum dma_transfer_direction dir_eng;
+	int ret;
+
+	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+	if (!desc)
+		return -ENOMEM;
+
+	sgl = &desc->sgl;
+
+	sg_init_one(sgl, vaddr, size);
+
+	if (read) {
+		dir_eng = DMA_DEV_TO_MEM;
+		desc->dir = DMA_FROM_DEVICE;
+	} else {
+		dir_eng = DMA_MEM_TO_DEV;
+		desc->dir = DMA_TO_DEVICE;
+	}
+
+	ret = dma_map_sg(nandc->dev, sgl, 1, desc->dir);
+	if (ret == 0) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	memset(&slave_conf, 0x00, sizeof(slave_conf));
+
+	slave_conf.device_fc = flow_control;
+	if (read) {
+		slave_conf.src_maxburst = 16;
+		slave_conf.src_addr = nandc->base_dma + reg_off;
+		slave_conf.slave_id = nandc->data_crci;
+	} else {
+		slave_conf.dst_maxburst = 16;
+		slave_conf.dst_addr = nandc->base_dma + reg_off;
+		slave_conf.slave_id = nandc->cmd_crci;
+	}
+
+	ret = dmaengine_slave_config(nandc->chan, &slave_conf);
+	if (ret) {
+		dev_err(nandc->dev, "failed to configure dma channel\n");
+		goto err;
+	}
+
+	dma_desc = dmaengine_prep_slave_sg(nandc->chan, sgl, 1, dir_eng, 0);
+	if (!dma_desc) {
+		dev_err(nandc->dev, "failed to prepare desc\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	desc->dma_desc = dma_desc;
+
+	list_add_tail(&desc->node, &nandc->desc_list);
+
+	return 0;
+err:
+	kfree(desc);
+
+	return ret;
+}
+
+/*
+ * read_reg_dma:	prepares a descriptor to read a given number of
+ *			contiguous registers to the reg_read_buf pointer
+ *
+ * @first:		offset of the first register in the contiguous block
+ * @num_regs:		number of registers to read
+ */
+static int read_reg_dma(struct qcom_nand_controller *nandc, int first,
+			int num_regs)
+{
+	bool flow_control = false;
+	void *vaddr;
+	int size;
+
+	if (first == NAND_READ_ID || first == NAND_FLASH_STATUS)
+		flow_control = true;
+
+	size = num_regs * sizeof(u32);
+	vaddr = nandc->reg_read_buf + nandc->reg_read_pos;
+	nandc->reg_read_pos += num_regs;
+
+	return prep_dma_desc(nandc, true, first, vaddr, size, flow_control);
+}
+
+/*
+ * write_reg_dma:	prepares a descriptor to write a given number of
+ *			contiguous registers
+ *
+ * @first:		offset of the first register in the contiguous block
+ * @num_regs:		number of registers to write
+ */
+static int write_reg_dma(struct qcom_nand_controller *nandc, int first,
+			 int num_regs)
+{
+	bool flow_control = false;
+	struct nandc_regs *regs = nandc->regs;
+	void *vaddr;
+	int size;
+
+	vaddr = offset_to_nandc_reg(regs, first);
+
+	if (first == NAND_FLASH_CMD)
+		flow_control = true;
+
+	if (first == NAND_DEV_CMD1_RESTORE)
+		first = NAND_DEV_CMD1;
+
+	if (first == NAND_DEV_CMD_VLD_RESTORE)
+		first = NAND_DEV_CMD_VLD;
+
+	size = num_regs * sizeof(u32);
+
+	return prep_dma_desc(nandc, false, first, vaddr, size, flow_control);
+}
+
+/*
+ * read_data_dma:	prepares a DMA descriptor to transfer data from the
+ *			controller's internal buffer to the buffer 'vaddr'
+ *
+ * @reg_off:		offset within the controller's data buffer
+ * @vaddr:		virtual address of the buffer we want to write to
+ * @size:		DMA transaction size in bytes
+ */
+static int read_data_dma(struct qcom_nand_controller *nandc, int reg_off,
+			 const u8 *vaddr, int size)
+{
+	return prep_dma_desc(nandc, true, reg_off, vaddr, size, false);
+}
+
+/*
+ * write_data_dma:	prepares a DMA descriptor to transfer data from
+ *			'vaddr' to the controller's internal buffer
+ *
+ * @reg_off:		offset within the controller's data buffer
+ * @vaddr:		virtual address of the buffer we want to read from
+ * @size:		DMA transaction size in bytes
+ */
+static int write_data_dma(struct qcom_nand_controller *nandc, int reg_off,
+			  const u8 *vaddr, int size)
+{
+	return prep_dma_desc(nandc, false, reg_off, vaddr, size, false);
+}
+
+/*
+ * helper to prepare dma descriptors to configure registers needed for reading a
+ * codeword/step in a page
+ */
+static void config_cw_read(struct qcom_nand_controller *nandc)
+{
+	write_reg_dma(nandc, NAND_FLASH_CMD, 3);
+	write_reg_dma(nandc, NAND_DEV0_CFG0, 3);
+	write_reg_dma(nandc, NAND_EBI2_ECC_BUF_CFG, 1);
+
+	write_reg_dma(nandc, NAND_EXEC_CMD, 1);
+
+	read_reg_dma(nandc, NAND_FLASH_STATUS, 2);
+	read_reg_dma(nandc, NAND_ERASED_CW_DETECT_STATUS, 1);
+}
+
+/*
+ * helpers to prepare dma descriptors used to configure registers needed for
+ * writing a codeword/step in a page
+ */
+static void config_cw_write_pre(struct qcom_nand_controller *nandc)
+{
+	write_reg_dma(nandc, NAND_FLASH_CMD, 3);
+	write_reg_dma(nandc, NAND_DEV0_CFG0, 3);
+	write_reg_dma(nandc, NAND_EBI2_ECC_BUF_CFG, 1);
+}
+
+static void config_cw_write_post(struct qcom_nand_controller *nandc)
+{
+	write_reg_dma(nandc, NAND_EXEC_CMD, 1);
+
+	read_reg_dma(nandc, NAND_FLASH_STATUS, 1);
+
+	write_reg_dma(nandc, NAND_FLASH_STATUS, 1);
+	write_reg_dma(nandc, NAND_READ_STATUS, 1);
+}
+
+/*
+ * the following functions are used within chip->cmdfunc() to perform different
+ * NAND_CMD_* commands
+ */
+
+/* sets up descriptors for NAND_CMD_PARAM */
+static int nandc_param(struct qcom_nand_host *host)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+
+	/*
+	 * NAND_CMD_PARAM is called before we know much about the FLASH chip
+	 * in use. we configure the controller to perform a raw read of 512
+	 * bytes to read onfi params
+	 */
+	nandc_set_reg(nandc, NAND_FLASH_CMD, PAGE_READ | PAGE_ACC | LAST_PAGE);
+	nandc_set_reg(nandc, NAND_ADDR0, 0);
+	nandc_set_reg(nandc, NAND_ADDR1, 0);
+	nandc_set_reg(nandc, NAND_DEV0_CFG0, 0 << CW_PER_PAGE
+					| 512 << UD_SIZE_BYTES
+					| 5 << NUM_ADDR_CYCLES
+					| 0 << SPARE_SIZE_BYTES);
+	nandc_set_reg(nandc, NAND_DEV0_CFG1, 7 << NAND_RECOVERY_CYCLES
+					| 0 << CS_ACTIVE_BSY
+					| 17 << BAD_BLOCK_BYTE_NUM
+					| 1 << BAD_BLOCK_IN_SPARE_AREA
+					| 2 << WR_RD_BSY_GAP
+					| 0 << WIDE_FLASH
+					| 1 << DEV0_CFG1_ECC_DISABLE);
+	nandc_set_reg(nandc, NAND_EBI2_ECC_BUF_CFG, 1 << ECC_CFG_ECC_DISABLE);
+
+	/* configure CMD1 and VLD for ONFI param probing */
+	nandc_set_reg(nandc, NAND_DEV_CMD_VLD,
+		      (nandc->vld & ~(1 << READ_START_VLD))
+		      | 0 << READ_START_VLD);
+	nandc_set_reg(nandc, NAND_DEV_CMD1,
+		      (nandc->cmd1 & ~(0xFF << READ_ADDR))
+		      | NAND_CMD_PARAM << READ_ADDR);
+
+	nandc_set_reg(nandc, NAND_EXEC_CMD, 1);
+
+	nandc_set_reg(nandc, NAND_DEV_CMD1_RESTORE, nandc->cmd1);
+	nandc_set_reg(nandc, NAND_DEV_CMD_VLD_RESTORE, nandc->vld);
+
+	write_reg_dma(nandc, NAND_DEV_CMD_VLD, 1);
+	write_reg_dma(nandc, NAND_DEV_CMD1, 1);
+
+	nandc->buf_count = 512;
+	memset(nandc->data_buffer, 0xff, nandc->buf_count);
+
+	config_cw_read(nandc);
+
+	read_data_dma(nandc, FLASH_BUF_ACC, nandc->data_buffer,
+		      nandc->buf_count);
+
+	/* restore CMD1 and VLD regs */
+	write_reg_dma(nandc, NAND_DEV_CMD1_RESTORE, 1);
+	write_reg_dma(nandc, NAND_DEV_CMD_VLD_RESTORE, 1);
+
+	return 0;
+}
+
+/* sets up descriptors for NAND_CMD_ERASE1 */
+static int erase_block(struct qcom_nand_host *host, int page_addr)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+
+	nandc_set_reg(nandc, NAND_FLASH_CMD,
+		      BLOCK_ERASE | PAGE_ACC | LAST_PAGE);
+	nandc_set_reg(nandc, NAND_ADDR0, page_addr);
+	nandc_set_reg(nandc, NAND_ADDR1, 0);
+	nandc_set_reg(nandc, NAND_DEV0_CFG0,
+		      host->cfg0_raw & ~(7 << CW_PER_PAGE));
+	nandc_set_reg(nandc, NAND_DEV0_CFG1, host->cfg1_raw);
+	nandc_set_reg(nandc, NAND_EXEC_CMD, 1);
+	nandc_set_reg(nandc, NAND_FLASH_STATUS, host->clrflashstatus);
+	nandc_set_reg(nandc, NAND_READ_STATUS, host->clrreadstatus);
+
+	write_reg_dma(nandc, NAND_FLASH_CMD, 3);
+	write_reg_dma(nandc, NAND_DEV0_CFG0, 2);
+	write_reg_dma(nandc, NAND_EXEC_CMD, 1);
+
+	read_reg_dma(nandc, NAND_FLASH_STATUS, 1);
+
+	write_reg_dma(nandc, NAND_FLASH_STATUS, 1);
+	write_reg_dma(nandc, NAND_READ_STATUS, 1);
+
+	return 0;
+}
+
+/* sets up descriptors for NAND_CMD_READID */
+static int read_id(struct qcom_nand_host *host, int column)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+
+	if (column == -1)
+		return 0;
+
+	nandc_set_reg(nandc, NAND_FLASH_CMD, FETCH_ID);
+	nandc_set_reg(nandc, NAND_ADDR0, column);
+	nandc_set_reg(nandc, NAND_ADDR1, 0);
+	nandc_set_reg(nandc, NAND_FLASH_CHIP_SELECT, DM_EN);
+	nandc_set_reg(nandc, NAND_EXEC_CMD, 1);
+
+	write_reg_dma(nandc, NAND_FLASH_CMD, 4);
+	write_reg_dma(nandc, NAND_EXEC_CMD, 1);
+
+	read_reg_dma(nandc, NAND_READ_ID, 1);
+
+	return 0;
+}
+
+/* sets up descriptors for NAND_CMD_RESET */
+static int reset(struct qcom_nand_host *host)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+
+	nandc_set_reg(nandc, NAND_FLASH_CMD, RESET_DEVICE);
+	nandc_set_reg(nandc, NAND_EXEC_CMD, 1);
+
+	write_reg_dma(nandc, NAND_FLASH_CMD, 1);
+	write_reg_dma(nandc, NAND_EXEC_CMD, 1);
+
+	read_reg_dma(nandc, NAND_FLASH_STATUS, 1);
+
+	return 0;
+}
+
+/* helpers to submit/free our list of dma descriptors */
+static int submit_descs(struct qcom_nand_controller *nandc)
+{
+	struct desc_info *desc;
+	dma_cookie_t cookie = 0;
+
+	list_for_each_entry(desc, &nandc->desc_list, node)
+		cookie = dmaengine_submit(desc->dma_desc);
+
+	if (dma_sync_wait(nandc->chan, cookie) != DMA_COMPLETE)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static void free_descs(struct qcom_nand_controller *nandc)
+{
+	struct desc_info *desc, *n;
+
+	list_for_each_entry_safe(desc, n, &nandc->desc_list, node) {
+		list_del(&desc->node);
+		dma_unmap_sg(nandc->dev, &desc->sgl, 1, desc->dir);
+		kfree(desc);
+	}
+}
+
+/* reset the register read buffer for next NAND operation */
+static void clear_read_regs(struct qcom_nand_controller *nandc)
+{
+	nandc->reg_read_pos = 0;
+	memset(nandc->reg_read_buf, 0,
+	       MAX_REG_RD * sizeof(*nandc->reg_read_buf));
+}
+
+static void pre_command(struct qcom_nand_host *host, int command)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+
+	nandc->buf_count = 0;
+	nandc->buf_start = 0;
+	host->use_ecc = false;
+	host->last_command = command;
+
+	clear_read_regs(nandc);
+}
+
+/*
+ * this is called after NAND_CMD_PAGEPROG and NAND_CMD_ERASE1 to set our
+ * privately maintained status byte, this status byte can be read after
+ * NAND_CMD_STATUS is called
+ */
+static void parse_erase_write_errors(struct qcom_nand_host *host, int command)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	int num_cw;
+	int i;
+
+	num_cw = command == NAND_CMD_PAGEPROG ? ecc->steps : 1;
+
+	for (i = 0; i < num_cw; i++) {
+		u32 flash_status = le32_to_cpu(nandc->reg_read_buf[i]);
+
+		if (flash_status & FS_MPU_ERR)
+			host->status &= ~NAND_STATUS_WP;
+
+		if (flash_status & FS_OP_ERR || (i == (num_cw - 1) &&
+						 (flash_status &
+						  FS_DEVICE_STS_ERR)))
+			host->status |= NAND_STATUS_FAIL;
+	}
+}
+
+static void post_command(struct qcom_nand_host *host, int command)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+
+	switch (command) {
+	case NAND_CMD_READID:
+		memcpy(nandc->data_buffer, nandc->reg_read_buf,
+		       nandc->buf_count);
+		break;
+	case NAND_CMD_PAGEPROG:
+	case NAND_CMD_ERASE1:
+		parse_erase_write_errors(host, command);
+		break;
+	default:
+		break;
+	}
+}
+
+/*
+ * Implements chip->cmdfunc. It's  only used for a limited set of commands.
+ * The rest of the commands wouldn't be called by upper layers. For example,
+ * NAND_CMD_READOOB would never be called because we have our own versions
+ * of read_oob ops for nand_ecc_ctrl.
+ */
+static void qcom_nandc_command(struct mtd_info *mtd, unsigned int command,
+			       int column, int page_addr)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct qcom_nand_host *host = to_qcom_nand_host(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	bool wait = false;
+	int ret = 0;
+
+	pre_command(host, command);
+
+	switch (command) {
+	case NAND_CMD_RESET:
+		ret = reset(host);
+		wait = true;
+		break;
+
+	case NAND_CMD_READID:
+		nandc->buf_count = 4;
+		ret = read_id(host, column);
+		wait = true;
+		break;
+
+	case NAND_CMD_PARAM:
+		ret = nandc_param(host);
+		wait = true;
+		break;
+
+	case NAND_CMD_ERASE1:
+		ret = erase_block(host, page_addr);
+		wait = true;
+		break;
+
+	case NAND_CMD_READ0:
+		/* we read the entire page for now */
+		WARN_ON(column != 0);
+
+		host->use_ecc = true;
+		set_address(host, 0, page_addr);
+		update_rw_regs(host, ecc->steps, true);
+		break;
+
+	case NAND_CMD_SEQIN:
+		WARN_ON(column != 0);
+		set_address(host, 0, page_addr);
+		break;
+
+	case NAND_CMD_PAGEPROG:
+	case NAND_CMD_STATUS:
+	case NAND_CMD_NONE:
+	default:
+		break;
+	}
+
+	if (ret) {
+		dev_err(nandc->dev, "failure executing command %d\n",
+			command);
+		free_descs(nandc);
+		return;
+	}
+
+	if (wait) {
+		ret = submit_descs(nandc);
+		if (ret)
+			dev_err(nandc->dev,
+				"failure submitting descs for command %d\n",
+				command);
+	}
+
+	free_descs(nandc);
+
+	post_command(host, command);
+}
+
+/*
+ * when using BCH ECC, the HW flags an error in NAND_FLASH_STATUS if it read
+ * an erased CW, and reports an erased CW in NAND_ERASED_CW_DETECT_STATUS.
+ *
+ * when using RS ECC, the HW reports the same erros when reading an erased CW,
+ * but it notifies that it is an erased CW by placing special characters at
+ * certain offsets in the buffer.
+ *
+ * verify if the page is erased or not, and fix up the page for RS ECC by
+ * replacing the special characters with 0xff.
+ */
+static bool erased_chunk_check_and_fixup(u8 *data_buf, int data_len)
+{
+	u8 empty1, empty2;
+
+	/*
+	 * an erased page flags an error in NAND_FLASH_STATUS, check if the page
+	 * is erased by looking for 0x54s at offsets 3 and 175 from the
+	 * beginning of each codeword
+	 */
+
+	empty1 = data_buf[3];
+	empty2 = data_buf[175];
+
+	/*
+	 * if the erased codework markers, if they exist override them with
+	 * 0xffs
+	 */
+	if ((empty1 == 0x54 && empty2 == 0xff) ||
+	    (empty1 == 0xff && empty2 == 0x54)) {
+		data_buf[3] = 0xff;
+		data_buf[175] = 0xff;
+	}
+
+	/*
+	 * check if the entire chunk contains 0xffs or not. if it doesn't, then
+	 * restore the original values at the special offsets
+	 */
+	if (memchr_inv(data_buf, 0xff, data_len)) {
+		data_buf[3] = empty1;
+		data_buf[175] = empty2;
+
+		return false;
+	}
+
+	return true;
+}
+
+struct read_stats {
+	__le32 flash;
+	__le32 buffer;
+	__le32 erased_cw;
+};
+
+/*
+ * reads back status registers set by the controller to notify page read
+ * errors. this is equivalent to what 'ecc->correct()' would do.
+ */
+static int parse_read_errors(struct qcom_nand_host *host, u8 *data_buf,
+			     u8 *oob_buf)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	unsigned int max_bitflips = 0;
+	struct read_stats *buf;
+	int i;
+
+	buf = (struct read_stats *)nandc->reg_read_buf;
+
+	for (i = 0; i < ecc->steps; i++, buf++) {
+		u32 flash, buffer, erased_cw;
+		int data_len, oob_len;
+
+		if (i == (ecc->steps - 1)) {
+			data_len = ecc->size - ((ecc->steps - 1) << 2);
+			oob_len = ecc->steps << 2;
+		} else {
+			data_len = host->cw_data;
+			oob_len = 0;
+		}
+
+		flash = le32_to_cpu(buf->flash);
+		buffer = le32_to_cpu(buf->buffer);
+		erased_cw = le32_to_cpu(buf->erased_cw);
+
+		if (flash & (FS_OP_ERR | FS_MPU_ERR)) {
+			bool erased;
+
+			/* ignore erased codeword errors */
+			if (host->bch_enabled) {
+				erased = (erased_cw & ERASED_CW) == ERASED_CW ?
+					 true : false;
+			} else {
+				erased = erased_chunk_check_and_fixup(data_buf,
+								      data_len);
+			}
+
+			if (erased) {
+				data_buf += data_len;
+				if (oob_buf)
+					oob_buf += oob_len + ecc->bytes;
+				continue;
+			}
+
+			if (buffer & BS_UNCORRECTABLE_BIT) {
+				int ret, ecclen, extraooblen;
+				void *eccbuf;
+
+				eccbuf = oob_buf ? oob_buf + oob_len : NULL;
+				ecclen = oob_buf ? host->ecc_bytes_hw : 0;
+				extraooblen = oob_buf ? oob_len : 0;
+
+				/*
+				 * make sure it isn't an erased page reported
+				 * as not-erased by HW because of a few bitflips
+				 */
+				ret = nand_check_erased_ecc_chunk(data_buf,
+					data_len, eccbuf, ecclen, oob_buf,
+					extraooblen, ecc->strength);
+				if (ret < 0) {
+					mtd->ecc_stats.failed++;
+				} else {
+					mtd->ecc_stats.corrected += ret;
+					max_bitflips =
+						max_t(unsigned int, max_bitflips, ret);
+				}
+			}
+		} else {
+			unsigned int stat;
+
+			stat = buffer & BS_CORRECTABLE_ERR_MSK;
+			mtd->ecc_stats.corrected += stat;
+			max_bitflips = max(max_bitflips, stat);
+		}
+
+		data_buf += data_len;
+		if (oob_buf)
+			oob_buf += oob_len + ecc->bytes;
+	}
+
+	return max_bitflips;
+}
+
+/*
+ * helper to perform the actual page read operation, used by ecc->read_page(),
+ * ecc->read_oob()
+ */
+static int read_page_ecc(struct qcom_nand_host *host, u8 *data_buf,
+			 u8 *oob_buf)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	int i, ret;
+
+	/* queue cmd descs for each codeword */
+	for (i = 0; i < ecc->steps; i++) {
+		int data_size, oob_size;
+
+		if (i == (ecc->steps - 1)) {
+			data_size = ecc->size - ((ecc->steps - 1) << 2);
+			oob_size = (ecc->steps << 2) + host->ecc_bytes_hw +
+				   host->spare_bytes;
+		} else {
+			data_size = host->cw_data;
+			oob_size = host->ecc_bytes_hw + host->spare_bytes;
+		}
+
+		config_cw_read(nandc);
+
+		if (data_buf)
+			read_data_dma(nandc, FLASH_BUF_ACC, data_buf,
+				      data_size);
+
+		/*
+		 * when ecc is enabled, the controller doesn't read the real
+		 * or dummy bad block markers in each chunk. To maintain a
+		 * consistent layout across RAW and ECC reads, we just
+		 * leave the real/dummy BBM offsets empty (i.e, filled with
+		 * 0xffs)
+		 */
+		if (oob_buf) {
+			int j;
+
+			for (j = 0; j < host->bbm_size; j++)
+				*oob_buf++ = 0xff;
+
+			read_data_dma(nandc, FLASH_BUF_ACC + data_size,
+				      oob_buf, oob_size);
+		}
+
+		if (data_buf)
+			data_buf += data_size;
+		if (oob_buf)
+			oob_buf += oob_size;
+	}
+
+	ret = submit_descs(nandc);
+	if (ret)
+		dev_err(nandc->dev, "failure to read page/oob\n");
+
+	free_descs(nandc);
+
+	return ret;
+}
+
+/*
+ * a helper that copies the last step/codeword of a page (containing free oob)
+ * into our local buffer
+ */
+static int copy_last_cw(struct qcom_nand_host *host, int page)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	int size;
+	int ret;
+
+	clear_read_regs(nandc);
+
+	size = host->use_ecc ? host->cw_data : host->cw_size;
+
+	/* prepare a clean read buffer */
+	memset(nandc->data_buffer, 0xff, size);
+
+	set_address(host, host->cw_size * (ecc->steps - 1), page);
+	update_rw_regs(host, 1, true);
+
+	config_cw_read(nandc);
+
+	read_data_dma(nandc, FLASH_BUF_ACC, nandc->data_buffer, size);
+
+	ret = submit_descs(nandc);
+	if (ret)
+		dev_err(nandc->dev, "failed to copy last codeword\n");
+
+	free_descs(nandc);
+
+	return ret;
+}
+
+/* implements ecc->read_page() */
+static int qcom_nandc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
+				uint8_t *buf, int oob_required, int page)
+{
+	struct qcom_nand_host *host = to_qcom_nand_host(chip);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	u8 *data_buf, *oob_buf = NULL;
+	int ret;
+
+	data_buf = buf;
+	oob_buf = oob_required ? chip->oob_poi : NULL;
+
+	ret = read_page_ecc(host, data_buf, oob_buf);
+	if (ret) {
+		dev_err(nandc->dev, "failure to read page\n");
+		return ret;
+	}
+
+	return parse_read_errors(host, data_buf, oob_buf);
+}
+
+/* implements ecc->read_page_raw() */
+static int qcom_nandc_read_page_raw(struct mtd_info *mtd,
+				    struct nand_chip *chip, uint8_t *buf,
+				    int oob_required, int page)
+{
+	struct qcom_nand_host *host = to_qcom_nand_host(chip);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	u8 *data_buf, *oob_buf;
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	int i, ret;
+
+	data_buf = buf;
+	oob_buf = chip->oob_poi;
+
+	host->use_ecc = false;
+	update_rw_regs(host, ecc->steps, true);
+
+	for (i = 0; i < ecc->steps; i++) {
+		int data_size1, data_size2, oob_size1, oob_size2;
+		int reg_off = FLASH_BUF_ACC;
+
+		data_size1 = mtd->writesize - host->cw_size * (ecc->steps - 1);
+		oob_size1 = host->bbm_size;
+
+		if (i == (ecc->steps - 1)) {
+			data_size2 = ecc->size - data_size1 -
+				     ((ecc->steps - 1) << 2);
+			oob_size2 = (ecc->steps << 2) + host->ecc_bytes_hw +
+				    host->spare_bytes;
+		} else {
+			data_size2 = host->cw_data - data_size1;
+			oob_size2 = host->ecc_bytes_hw + host->spare_bytes;
+		}
+
+		config_cw_read(nandc);
+
+		read_data_dma(nandc, reg_off, data_buf, data_size1);
+		reg_off += data_size1;
+		data_buf += data_size1;
+
+		read_data_dma(nandc, reg_off, oob_buf, oob_size1);
+		reg_off += oob_size1;
+		oob_buf += oob_size1;
+
+		read_data_dma(nandc, reg_off, data_buf, data_size2);
+		reg_off += data_size2;
+		data_buf += data_size2;
+
+		read_data_dma(nandc, reg_off, oob_buf, oob_size2);
+		oob_buf += oob_size2;
+	}
+
+	ret = submit_descs(nandc);
+	if (ret)
+		dev_err(nandc->dev, "failure to read raw page\n");
+
+	free_descs(nandc);
+
+	return 0;
+}
+
+/* implements ecc->read_oob() */
+static int qcom_nandc_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
+			       int page)
+{
+	struct qcom_nand_host *host = to_qcom_nand_host(chip);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	int ret;
+
+	clear_read_regs(nandc);
+
+	host->use_ecc = true;
+	set_address(host, 0, page);
+	update_rw_regs(host, ecc->steps, true);
+
+	ret = read_page_ecc(host, NULL, chip->oob_poi);
+	if (ret)
+		dev_err(nandc->dev, "failure to read oob\n");
+
+	return ret;
+}
+
+/* implements ecc->write_page() */
+static int qcom_nandc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
+				 const uint8_t *buf, int oob_required, int page)
+{
+	struct qcom_nand_host *host = to_qcom_nand_host(chip);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	u8 *data_buf, *oob_buf;
+	int i, ret;
+
+	clear_read_regs(nandc);
+
+	data_buf = (u8 *)buf;
+	oob_buf = chip->oob_poi;
+
+	host->use_ecc = true;
+	update_rw_regs(host, ecc->steps, false);
+
+	for (i = 0; i < ecc->steps; i++) {
+		int data_size, oob_size;
+
+		if (i == (ecc->steps - 1)) {
+			data_size = ecc->size - ((ecc->steps - 1) << 2);
+			oob_size = (ecc->steps << 2) + host->ecc_bytes_hw +
+				   host->spare_bytes;
+		} else {
+			data_size = host->cw_data;
+			oob_size = ecc->bytes;
+		}
+
+		config_cw_write_pre(nandc);
+
+		write_data_dma(nandc, FLASH_BUF_ACC, data_buf, data_size);
+
+		/*
+		 * when ECC is enabled, we don't really need to write anything
+		 * to oob for the first n - 1 codewords since these oob regions
+		 * just contain ECC bytes that's written by the controller
+		 * itself. For the last codeword, we skip the bbm positions and
+		 * write to the free oob area.
+		 */
+		if (i == (ecc->steps - 1)) {
+			oob_buf += host->bbm_size;
+
+			write_data_dma(nandc, FLASH_BUF_ACC + data_size,
+				       oob_buf, oob_size);
+		}
+
+		config_cw_write_post(nandc);
+
+		data_buf += data_size;
+		oob_buf += oob_size;
+	}
+
+	ret = submit_descs(nandc);
+	if (ret)
+		dev_err(nandc->dev, "failure to write page\n");
+
+	free_descs(nandc);
+
+	return ret;
+}
+
+/* implements ecc->write_page_raw() */
+static int qcom_nandc_write_page_raw(struct mtd_info *mtd,
+				     struct nand_chip *chip, const uint8_t *buf,
+				     int oob_required, int page)
+{
+	struct qcom_nand_host *host = to_qcom_nand_host(chip);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	u8 *data_buf, *oob_buf;
+	int i, ret;
+
+	clear_read_regs(nandc);
+
+	data_buf = (u8 *)buf;
+	oob_buf = chip->oob_poi;
+
+	host->use_ecc = false;
+	update_rw_regs(host, ecc->steps, false);
+
+	for (i = 0; i < ecc->steps; i++) {
+		int data_size1, data_size2, oob_size1, oob_size2;
+		int reg_off = FLASH_BUF_ACC;
+
+		data_size1 = mtd->writesize - host->cw_size * (ecc->steps - 1);
+		oob_size1 = host->bbm_size;
+
+		if (i == (ecc->steps - 1)) {
+			data_size2 = ecc->size - data_size1 -
+				     ((ecc->steps - 1) << 2);
+			oob_size2 = (ecc->steps << 2) + host->ecc_bytes_hw +
+				    host->spare_bytes;
+		} else {
+			data_size2 = host->cw_data - data_size1;
+			oob_size2 = host->ecc_bytes_hw + host->spare_bytes;
+		}
+
+		config_cw_write_pre(nandc);
+
+		write_data_dma(nandc, reg_off, data_buf, data_size1);
+		reg_off += data_size1;
+		data_buf += data_size1;
+
+		write_data_dma(nandc, reg_off, oob_buf, oob_size1);
+		reg_off += oob_size1;
+		oob_buf += oob_size1;
+
+		write_data_dma(nandc, reg_off, data_buf, data_size2);
+		reg_off += data_size2;
+		data_buf += data_size2;
+
+		write_data_dma(nandc, reg_off, oob_buf, oob_size2);
+		oob_buf += oob_size2;
+
+		config_cw_write_post(nandc);
+	}
+
+	ret = submit_descs(nandc);
+	if (ret)
+		dev_err(nandc->dev, "failure to write raw page\n");
+
+	free_descs(nandc);
+
+	return ret;
+}
+
+/*
+ * implements ecc->write_oob()
+ *
+ * the NAND controller cannot write only data or only oob within a codeword,
+ * since ecc is calculated for the combined codeword. we first copy the
+ * entire contents for the last codeword(data + oob), replace the old oob
+ * with the new one in chip->oob_poi, and then write the entire codeword.
+ * this read-copy-write operation results in a slight performance loss.
+ */
+static int qcom_nandc_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
+				int page)
+{
+	struct qcom_nand_host *host = to_qcom_nand_host(chip);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	u8 *oob = chip->oob_poi;
+	int free_boff;
+	int data_size, oob_size;
+	int ret, status = 0;
+
+	host->use_ecc = true;
+
+	ret = copy_last_cw(host, page);
+	if (ret)
+		return ret;
+
+	clear_read_regs(nandc);
+
+	/* calculate the data and oob size for the last codeword/step */
+	data_size = ecc->size - ((ecc->steps - 1) << 2);
+	oob_size = ecc->steps << 2;
+
+	free_boff = ecc->layout->oobfree[0].offset;
+
+	/* override new oob content to last codeword */
+	memcpy(nandc->data_buffer + data_size, oob + free_boff, oob_size);
+
+	set_address(host, host->cw_size * (ecc->steps - 1), page);
+	update_rw_regs(host, 1, false);
+
+	config_cw_write_pre(nandc);
+	write_data_dma(nandc, FLASH_BUF_ACC, nandc->data_buffer,
+		       data_size + oob_size);
+	config_cw_write_post(nandc);
+
+	ret = submit_descs(nandc);
+
+	free_descs(nandc);
+
+	if (ret) {
+		dev_err(nandc->dev, "failure to write oob\n");
+		return -EIO;
+	}
+
+	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+
+	status = chip->waitfunc(mtd, chip);
+
+	return status & NAND_STATUS_FAIL ? -EIO : 0;
+}
+
+static int qcom_nandc_block_bad(struct mtd_info *mtd, loff_t ofs)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct qcom_nand_host *host = to_qcom_nand_host(chip);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	int page, ret, bbpos, bad = 0;
+	u32 flash_status;
+
+	page = (int)(ofs >> chip->page_shift) & chip->pagemask;
+
+	/*
+	 * configure registers for a raw sub page read, the address is set to
+	 * the beginning of the last codeword, we don't care about reading ecc
+	 * portion of oob. we just want the first few bytes from this codeword
+	 * that contains the BBM
+	 */
+	host->use_ecc = false;
+
+	ret = copy_last_cw(host, page);
+	if (ret)
+		goto err;
+
+	flash_status = le32_to_cpu(nandc->reg_read_buf[0]);
+
+	if (flash_status & (FS_OP_ERR | FS_MPU_ERR)) {
+		dev_warn(nandc->dev, "error when trying to read BBM\n");
+		goto err;
+	}
+
+	bbpos = mtd->writesize - host->cw_size * (ecc->steps - 1);
+
+	bad = nandc->data_buffer[bbpos] != 0xff;
+
+	if (chip->options & NAND_BUSWIDTH_16)
+		bad = bad || (nandc->data_buffer[bbpos + 1] != 0xff);
+err:
+	return bad;
+}
+
+static int qcom_nandc_block_markbad(struct mtd_info *mtd, loff_t ofs)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct qcom_nand_host *host = to_qcom_nand_host(chip);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	int page, ret, status = 0;
+
+	clear_read_regs(nandc);
+
+	/*
+	 * to mark the BBM as bad, we flash the entire last codeword with 0s.
+	 * we don't care about the rest of the content in the codeword since
+	 * we aren't going to use this block again
+	 */
+	memset(nandc->data_buffer, 0x00, host->cw_size);
+
+	page = (int)(ofs >> chip->page_shift) & chip->pagemask;
+
+	/* prepare write */
+	host->use_ecc = false;
+	set_address(host, host->cw_size * (ecc->steps - 1), page);
+	update_rw_regs(host, 1, false);
+
+	config_cw_write_pre(nandc);
+	write_data_dma(nandc, FLASH_BUF_ACC, nandc->data_buffer, host->cw_size);
+	config_cw_write_post(nandc);
+
+	ret = submit_descs(nandc);
+
+	free_descs(nandc);
+
+	if (ret) {
+		dev_err(nandc->dev, "failure to update BBM\n");
+		return -EIO;
+	}
+
+	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+
+	status = chip->waitfunc(mtd, chip);
+
+	return status & NAND_STATUS_FAIL ? -EIO : 0;
+}
+
+/*
+ * the three functions below implement chip->read_byte(), chip->read_buf()
+ * and chip->write_buf() respectively. these aren't used for
+ * reading/writing page data, they are used for smaller data like reading
+ * id, status etc
+ */
+static uint8_t qcom_nandc_read_byte(struct mtd_info *mtd)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct qcom_nand_host *host = to_qcom_nand_host(chip);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	u8 *buf = nandc->data_buffer;
+	u8 ret = 0x0;
+
+	if (host->last_command == NAND_CMD_STATUS) {
+		ret = host->status;
+
+		host->status = NAND_STATUS_READY | NAND_STATUS_WP;
+
+		return ret;
+	}
+
+	if (nandc->buf_start < nandc->buf_count)
+		ret = buf[nandc->buf_start++];
+
+	return ret;
+}
+
+static void qcom_nandc_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	int real_len = min_t(size_t, len, nandc->buf_count - nandc->buf_start);
+
+	memcpy(buf, nandc->data_buffer + nandc->buf_start, real_len);
+	nandc->buf_start += real_len;
+}
+
+static void qcom_nandc_write_buf(struct mtd_info *mtd, const uint8_t *buf,
+				 int len)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	int real_len = min_t(size_t, len, nandc->buf_count - nandc->buf_start);
+
+	memcpy(nandc->data_buffer + nandc->buf_start, buf, real_len);
+
+	nandc->buf_start += real_len;
+}
+
+/* we support only one external chip for now */
+static void qcom_nandc_select_chip(struct mtd_info *mtd, int chipnr)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+
+	if (chipnr <= 0)
+		return;
+
+	dev_warn(nandc->dev, "invalid chip select\n");
+}
+
+/*
+ * NAND controller page layout info
+ *
+ * Layout with ECC enabled:
+ *
+ * |----------------------|  |---------------------------------|
+ * |           xx.......yy|  |             *********xx.......yy|
+ * |    DATA   xx..ECC..yy|  |    DATA     **SPARE**xx..ECC..yy|
+ * |   (516)   xx.......yy|  |  (516-n*4)  **(n*4)**xx.......yy|
+ * |           xx.......yy|  |             *********xx.......yy|
+ * |----------------------|  |---------------------------------|
+ *     codeword 1,2..n-1                  codeword n
+ *  <---(528/532 Bytes)-->    <-------(528/532 Bytes)--------->
+ *
+ * n = Number of codewords in the page
+ * . = ECC bytes
+ * * = Spare/free bytes
+ * x = Unused byte(s)
+ * y = Reserved byte(s)
+ *
+ * 2K page: n = 4, spare = 16 bytes
+ * 4K page: n = 8, spare = 32 bytes
+ * 8K page: n = 16, spare = 64 bytes
+ *
+ * the qcom nand controller operates at a sub page/codeword level. each
+ * codeword is 528 and 532 bytes for 4 bit and 8 bit ECC modes respectively.
+ * the number of ECC bytes vary based on the ECC strength and the bus width.
+ *
+ * the first n - 1 codewords contains 516 bytes of user data, the remaining
+ * 12/16 bytes consist of ECC and reserved data. The nth codeword contains
+ * both user data and spare(oobavail) bytes that sum up to 516 bytes.
+ *
+ * When we access a page with ECC enabled, the reserved bytes(s) are not
+ * accessible at all. When reading, we fill up these unreadable positions
+ * with 0xffs. When writing, the controller skips writing the inaccessible
+ * bytes.
+ *
+ * Layout with ECC disabled:
+ *
+ * |------------------------------|  |---------------------------------------|
+ * |         yy          xx.......|  |         bb          *********xx.......|
+ * |  DATA1  yy  DATA2   xx..ECC..|  |  DATA1  bb  DATA2   **SPARE**xx..ECC..|
+ * | (size1) yy (size2)  xx.......|  | (size1) bb (size2)  **(n*4)**xx.......|
+ * |         yy          xx.......|  |         bb          *********xx.......|
+ * |------------------------------|  |---------------------------------------|
+ *         codeword 1,2..n-1                        codeword n
+ *  <-------(528/532 Bytes)------>    <-----------(528/532 Bytes)----------->
+ *
+ * n = Number of codewords in the page
+ * . = ECC bytes
+ * * = Spare/free bytes
+ * x = Unused byte(s)
+ * y = Dummy Bad Bock byte(s)
+ * b = Real Bad Block byte(s)
+ * size1/size2 = function of codeword size and 'n'
+ *
+ * when the ECC block is disabled, one reserved byte (or two for 16 bit bus
+ * width) is now accessible. For the first n - 1 codewords, these are dummy Bad
+ * Block Markers. In the last codeword, this position contains the real BBM
+ *
+ * In order to have a consistent layout between RAW and ECC modes, we assume
+ * the following OOB layout arrangement:
+ *
+ * |-----------|  |--------------------|
+ * |yyxx.......|  |bb*********xx.......|
+ * |yyxx..ECC..|  |bb*FREEOOB*xx..ECC..|
+ * |yyxx.......|  |bb*********xx.......|
+ * |yyxx.......|  |bb*********xx.......|
+ * |-----------|  |--------------------|
+ *  first n - 1       nth OOB region
+ *  OOB regions
+ *
+ * n = Number of codewords in the page
+ * . = ECC bytes
+ * * = FREE OOB bytes
+ * y = Dummy bad block byte(s) (inaccessible when ECC enabled)
+ * x = Unused byte(s)
+ * b = Real bad block byte(s) (inaccessible when ECC enabled)
+ *
+ * This layout is read as is when ECC is disabled. When ECC is enabled, the
+ * inaccessible Bad Block byte(s) are ignored when we write to a page/oob,
+ * and assumed as 0xffs when we read a page/oob. The ECC, unused and
+ * dummy/real bad block bytes are grouped as ecc bytes in nand_ecclayout (i.e,
+ * ecc->bytes is the sum of the three).
+ */
+
+static struct nand_ecclayout *
+qcom_nand_create_layout(struct qcom_nand_host *host)
+{
+	struct nand_chip *chip = &host->chip;
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	struct nand_ecclayout *layout;
+	int i, j, steps, pos = 0, shift = 0;
+
+	layout = devm_kzalloc(nandc->dev, sizeof(*layout), GFP_KERNEL);
+	if (!layout)
+		return NULL;
+
+	steps = mtd->writesize / ecc->size;
+	layout->eccbytes = steps * ecc->bytes;
+
+	layout->oobfree[0].offset = (steps - 1) * ecc->bytes + host->bbm_size;
+	layout->oobfree[0].length = steps << 2;
+
+	/*
+	 * the oob bytes in the first n - 1 codewords are all grouped together
+	 * in the format:
+	 * DUMMY_BBM + UNUSED + ECC
+	 */
+	for (i = 0; i < steps - 1; i++) {
+		for (j = 0; j < ecc->bytes; j++)
+			layout->eccpos[pos++] = i * ecc->bytes + j;
+	}
+
+	/*
+	 * the oob bytes in the last codeword are grouped in the format:
+	 * BBM + FREE OOB + UNUSED + ECC
+	 */
+
+	/* fill up the bbm positions */
+	for (j = 0; j < host->bbm_size; j++)
+		layout->eccpos[pos++] = i * ecc->bytes + j;
+
+	/*
+	 * fill up the ecc and reserved positions, their indices are offseted
+	 * by the free oob region
+	 */
+	shift = layout->oobfree[0].length + host->bbm_size;
+
+	for (j = 0; j < (host->ecc_bytes_hw + host->spare_bytes); j++)
+		layout->eccpos[pos++] = i * ecc->bytes + shift + j;
+
+	return layout;
+}
+
+static int qcom_nand_host_setup(struct qcom_nand_host *host)
+{
+	struct nand_chip *chip = &host->chip;
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	int cwperpage, bad_block_byte;
+	bool wide_bus;
+	int ecc_mode = 1;
+
+	/*
+	 * the controller requires each step consists of 512 bytes of data.
+	 * bail out if DT has populated a wrong step size.
+	 */
+	if (ecc->size != NANDC_STEP_SIZE) {
+		dev_err(nandc->dev, "invalid ecc size\n");
+		return -EINVAL;
+	}
+
+	wide_bus = chip->options & NAND_BUSWIDTH_16 ? true : false;
+
+	if (ecc->strength >= 8) {
+		/* 8 bit ECC defaults to BCH ECC on all platforms */
+		host->bch_enabled = true;
+		ecc_mode = 1;
+
+		if (wide_bus) {
+			host->ecc_bytes_hw = 14;
+			host->spare_bytes = 0;
+			host->bbm_size = 2;
+		} else {
+			host->ecc_bytes_hw = 13;
+			host->spare_bytes = 2;
+			host->bbm_size = 1;
+		}
+	} else {
+		/*
+		 * if the controller supports BCH for 4 bit ECC, the controller
+		 * uses lesser bytes for ECC. If RS is used, the ECC bytes is
+		 * always 10 bytes
+		 */
+		if (nandc->ecc_modes & ECC_BCH_4BIT) {
+			/* BCH */
+			host->bch_enabled = true;
+			ecc_mode = 0;
+
+			if (wide_bus) {
+				host->ecc_bytes_hw = 8;
+				host->spare_bytes = 2;
+				host->bbm_size = 2;
+			} else {
+				host->ecc_bytes_hw = 7;
+				host->spare_bytes = 4;
+				host->bbm_size = 1;
+			}
+		} else {
+			/* RS */
+			host->ecc_bytes_hw = 10;
+
+			if (wide_bus) {
+				host->spare_bytes = 0;
+				host->bbm_size = 2;
+			} else {
+				host->spare_bytes = 1;
+				host->bbm_size = 1;
+			}
+		}
+	}
+
+	/*
+	 * we consider ecc->bytes as the sum of all the non-data content in a
+	 * step. It gives us a clean representation of the oob area (even if
+	 * all the bytes aren't used for ECC).It is always 16 bytes for 8 bit
+	 * ECC and 12 bytes for 4 bit ECC
+	 */
+	ecc->bytes = host->ecc_bytes_hw + host->spare_bytes + host->bbm_size;
+
+	ecc->read_page		= qcom_nandc_read_page;
+	ecc->read_page_raw	= qcom_nandc_read_page_raw;
+	ecc->read_oob		= qcom_nandc_read_oob;
+	ecc->write_page		= qcom_nandc_write_page;
+	ecc->write_page_raw	= qcom_nandc_write_page_raw;
+	ecc->write_oob		= qcom_nandc_write_oob;
+
+	ecc->mode = NAND_ECC_HW;
+
+	ecc->layout = qcom_nand_create_layout(host);
+	if (!ecc->layout)
+		return -ENOMEM;
+
+	cwperpage = mtd->writesize / ecc->size;
+
+	/*
+	 * DATA_UD_BYTES varies based on whether the read/write command protects
+	 * spare data with ECC too. We protect spare data by default, so we set
+	 * it to main + spare data, which are 512 and 4 bytes respectively.
+	 */
+	host->cw_data = 516;
+
+	/*
+	 * total bytes in a step, either 528 bytes for 4 bit ECC, or 532 bytes
+	 * for 8 bit ECC
+	 */
+	host->cw_size = host->cw_data + ecc->bytes;
+
+	if (ecc->bytes * (mtd->writesize / ecc->size) > mtd->oobsize) {
+		dev_err(nandc->dev, "ecc data doesn't fit in OOB area\n");
+		return -EINVAL;
+	}
+
+	bad_block_byte = mtd->writesize - host->cw_size * (cwperpage - 1) + 1;
+
+	host->cfg0 = (cwperpage - 1) << CW_PER_PAGE
+				| host->cw_data << UD_SIZE_BYTES
+				| 0 << DISABLE_STATUS_AFTER_WRITE
+				| 5 << NUM_ADDR_CYCLES
+				| host->ecc_bytes_hw << ECC_PARITY_SIZE_BYTES_RS
+				| 0 << STATUS_BFR_READ
+				| 1 << SET_RD_MODE_AFTER_STATUS
+				| host->spare_bytes << SPARE_SIZE_BYTES;
+
+	host->cfg1 = 7 << NAND_RECOVERY_CYCLES
+				| 0 <<  CS_ACTIVE_BSY
+				| bad_block_byte << BAD_BLOCK_BYTE_NUM
+				| 0 << BAD_BLOCK_IN_SPARE_AREA
+				| 2 << WR_RD_BSY_GAP
+				| wide_bus << WIDE_FLASH
+				| host->bch_enabled << ENABLE_BCH_ECC;
+
+	host->cfg0_raw = (cwperpage - 1) << CW_PER_PAGE
+				| host->cw_size << UD_SIZE_BYTES
+				| 5 << NUM_ADDR_CYCLES
+				| 0 << SPARE_SIZE_BYTES;
+
+	host->cfg1_raw = 7 << NAND_RECOVERY_CYCLES
+				| 0 << CS_ACTIVE_BSY
+				| 17 << BAD_BLOCK_BYTE_NUM
+				| 1 << BAD_BLOCK_IN_SPARE_AREA
+				| 2 << WR_RD_BSY_GAP
+				| wide_bus << WIDE_FLASH
+				| 1 << DEV0_CFG1_ECC_DISABLE;
+
+	host->ecc_bch_cfg = host->bch_enabled << ECC_CFG_ECC_DISABLE
+				| 0 << ECC_SW_RESET
+				| host->cw_data << ECC_NUM_DATA_BYTES
+				| 1 << ECC_FORCE_CLK_OPEN
+				| ecc_mode << ECC_MODE
+				| host->ecc_bytes_hw << ECC_PARITY_SIZE_BYTES_BCH;
+
+	host->ecc_buf_cfg = 0x203 << NUM_STEPS;
+
+	host->clrflashstatus = FS_READY_BSY_N;
+	host->clrreadstatus = 0xc0;
+
+	dev_dbg(nandc->dev,
+		"cfg0 %x cfg1 %x ecc_buf_cfg %x ecc_bch cfg %x cw_size %d cw_data %d strength %d parity_bytes %d steps %d\n",
+		host->cfg0, host->cfg1, host->ecc_buf_cfg, host->ecc_bch_cfg,
+		host->cw_size, host->cw_data, ecc->strength, ecc->bytes,
+		cwperpage);
+
+	return 0;
+}
+
+static int qcom_nandc_alloc(struct qcom_nand_controller *nandc)
+{
+	int ret;
+
+	ret = dma_set_coherent_mask(nandc->dev, DMA_BIT_MASK(32));
+	if (ret) {
+		dev_err(nandc->dev, "failed to set DMA mask\n");
+		return ret;
+	}
+
+	/*
+	 * we use the internal buffer for reading ONFI params, reading small
+	 * data like ID and status, and preforming read-copy-write operations
+	 * when writing to a codeword partially. 532 is the maximum possible
+	 * size of a codeword for our nand controller
+	 */
+	nandc->buf_size = 532;
+
+	nandc->data_buffer = devm_kzalloc(nandc->dev, nandc->buf_size,
+					GFP_KERNEL);
+	if (!nandc->data_buffer)
+		return -ENOMEM;
+
+	nandc->regs = devm_kzalloc(nandc->dev, sizeof(*nandc->regs),
+					GFP_KERNEL);
+	if (!nandc->regs)
+		return -ENOMEM;
+
+	nandc->reg_read_buf = devm_kzalloc(nandc->dev,
+				MAX_REG_RD * sizeof(*nandc->reg_read_buf),
+				GFP_KERNEL);
+	if (!nandc->reg_read_buf)
+		return -ENOMEM;
+
+	nandc->chan = dma_request_slave_channel(nandc->dev, "rxtx");
+	if (!nandc->chan) {
+		dev_err(nandc->dev, "failed to request slave channel\n");
+		return -ENODEV;
+	}
+
+	INIT_LIST_HEAD(&nandc->desc_list);
+	INIT_LIST_HEAD(&nandc->host_list);
+
+	spin_lock_init(&nandc->controller.lock);
+	init_waitqueue_head(&nandc->controller.wq);
+
+	return 0;
+}
+
+static void qcom_nandc_unalloc(struct qcom_nand_controller *nandc)
+{
+	dma_release_channel(nandc->chan);
+}
+
+/* one time setup of a few nand controller registers */
+static int qcom_nandc_setup(struct qcom_nand_controller *nandc)
+{
+	/* kill onenand */
+	nandc_write(nandc, SFLASHC_BURST_CFG, 0);
+
+	/* enable ADM DMA */
+	nandc_write(nandc, NAND_FLASH_CHIP_SELECT, DM_EN);
+
+	/* save the original values of these registers */
+	nandc->cmd1 = nandc_read(nandc, NAND_DEV_CMD1);
+	nandc->vld = nandc_read(nandc, NAND_DEV_CMD_VLD);
+
+	return 0;
+}
+
+static int qcom_nand_host_init(struct qcom_nand_controller *nandc,
+			       struct qcom_nand_host *host,
+			       struct device_node *dn)
+{
+	struct nand_chip *chip = &host->chip;
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct device *dev = nandc->dev;
+	int ret;
+
+	ret = of_property_read_u32(dn, "reg", &host->cs);
+	if (ret) {
+		dev_err(dev, "can't get chip-select\n");
+		return -ENXIO;
+	}
+
+	nand_set_flash_node(chip, dn);
+	mtd->name = devm_kasprintf(dev, GFP_KERNEL, "qcom_nand.%d", host->cs);
+	mtd->owner = THIS_MODULE;
+	mtd->dev.parent = dev;
+
+	chip->cmdfunc		= qcom_nandc_command;
+	chip->select_chip	= qcom_nandc_select_chip;
+	chip->read_byte		= qcom_nandc_read_byte;
+	chip->read_buf		= qcom_nandc_read_buf;
+	chip->write_buf		= qcom_nandc_write_buf;
+
+	/*
+	 * the bad block marker is readable only when we read the last codeword
+	 * of a page with ECC disabled. currently, the nand_base and nand_bbt
+	 * helpers don't allow us to read BB from a nand chip with ECC
+	 * disabled (MTD_OPS_PLACE_OOB is set by default). use the block_bad
+	 * and block_markbad helpers until we permanently switch to using
+	 * MTD_OPS_RAW for all drivers (with the help of badblockbits)
+	 */
+	chip->block_bad		= qcom_nandc_block_bad;
+	chip->block_markbad	= qcom_nandc_block_markbad;
+
+	chip->controller = &nandc->controller;
+	chip->options |= NAND_NO_SUBPAGE_WRITE | NAND_USE_BOUNCE_BUFFER |
+			 NAND_SKIP_BBTSCAN;
+
+	/* set up initial status value */
+	host->status = NAND_STATUS_READY | NAND_STATUS_WP;
+
+	ret = nand_scan_ident(mtd, 1, NULL);
+	if (ret)
+		return ret;
+
+	ret = qcom_nand_host_setup(host);
+	if (ret)
+		return ret;
+
+	ret = nand_scan_tail(mtd);
+	if (ret)
+		return ret;
+
+	return mtd_device_register(mtd, NULL, 0);
+}
+
+/* parse custom DT properties here */
+static int qcom_nandc_parse_dt(struct platform_device *pdev)
+{
+	struct qcom_nand_controller *nandc = platform_get_drvdata(pdev);
+	struct device_node *np = nandc->dev->of_node;
+	int ret;
+
+	ret = of_property_read_u32(np, "qcom,cmd-crci", &nandc->cmd_crci);
+	if (ret) {
+		dev_err(nandc->dev, "command CRCI unspecified\n");
+		return ret;
+	}
+
+	ret = of_property_read_u32(np, "qcom,data-crci", &nandc->data_crci);
+	if (ret) {
+		dev_err(nandc->dev, "data CRCI unspecified\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int qcom_nandc_probe(struct platform_device *pdev)
+{
+	struct qcom_nand_controller *nandc;
+	struct qcom_nand_host *host;
+	const void *dev_data;
+	struct device *dev = &pdev->dev;
+	struct device_node *dn = dev->of_node, *child;
+	struct resource *res;
+	int ret;
+
+	nandc = devm_kzalloc(&pdev->dev, sizeof(*nandc), GFP_KERNEL);
+	if (!nandc)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, nandc);
+	nandc->dev = dev;
+
+	dev_data = of_device_get_match_data(dev);
+	if (!dev_data) {
+		dev_err(&pdev->dev, "failed to get device data\n");
+		return -ENODEV;
+	}
+
+	nandc->ecc_modes = (unsigned long)dev_data;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	nandc->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(nandc->base))
+		return PTR_ERR(nandc->base);
+
+	nandc->base_dma = phys_to_dma(dev, (phys_addr_t)res->start);
+
+	nandc->core_clk = devm_clk_get(dev, "core");
+	if (IS_ERR(nandc->core_clk))
+		return PTR_ERR(nandc->core_clk);
+
+	nandc->aon_clk = devm_clk_get(dev, "aon");
+	if (IS_ERR(nandc->aon_clk))
+		return PTR_ERR(nandc->aon_clk);
+
+	ret = qcom_nandc_parse_dt(pdev);
+	if (ret)
+		return ret;
+
+	ret = qcom_nandc_alloc(nandc);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(nandc->core_clk);
+	if (ret)
+		goto err_core_clk;
+
+	ret = clk_prepare_enable(nandc->aon_clk);
+	if (ret)
+		goto err_aon_clk;
+
+	ret = qcom_nandc_setup(nandc);
+	if (ret)
+		goto err_setup;
+
+	for_each_available_child_of_node(dn, child) {
+		if (of_device_is_compatible(child, "qcom,nandcs")) {
+			host = devm_kzalloc(dev, sizeof(*host), GFP_KERNEL);
+			if (!host) {
+				of_node_put(child);
+				ret = -ENOMEM;
+				goto err_cs_init;
+			}
+
+			ret = qcom_nand_host_init(nandc, host, child);
+			if (ret) {
+				devm_kfree(dev, host);
+				continue;
+			}
+
+			list_add_tail(&host->node, &nandc->host_list);
+		}
+	}
+
+	if (list_empty(&nandc->host_list)) {
+		ret = -ENODEV;
+		goto err_cs_init;
+	}
+
+	return 0;
+
+err_cs_init:
+	list_for_each_entry(host, &nandc->host_list, node)
+		nand_release(nand_to_mtd(&host->chip));
+err_setup:
+	clk_disable_unprepare(nandc->aon_clk);
+err_aon_clk:
+	clk_disable_unprepare(nandc->core_clk);
+err_core_clk:
+	qcom_nandc_unalloc(nandc);
+
+	return ret;
+}
+
+static int qcom_nandc_remove(struct platform_device *pdev)
+{
+	struct qcom_nand_controller *nandc = platform_get_drvdata(pdev);
+	struct qcom_nand_host *host;
+
+	list_for_each_entry(host, &nandc->host_list, node)
+		nand_release(nand_to_mtd(&host->chip));
+
+	qcom_nandc_unalloc(nandc);
+
+	clk_disable_unprepare(nandc->aon_clk);
+	clk_disable_unprepare(nandc->core_clk);
+
+	return 0;
+}
+
+#define EBI2_NANDC_ECC_MODES	(ECC_RS_4BIT | ECC_BCH_8BIT)
+
+/*
+ * data will hold a struct pointer containing more differences once we support
+ * more controller variants
+ */
+static const struct of_device_id qcom_nandc_of_match[] = {
+	{	.compatible = "qcom,ipq806x-nand",
+		.data = (void *)EBI2_NANDC_ECC_MODES,
+	},
+	{}
+};
+MODULE_DEVICE_TABLE(of, qcom_nandc_of_match);
+
+static struct platform_driver qcom_nandc_driver = {
+	.driver = {
+		.name = "qcom-nandc",
+		.of_match_table = qcom_nandc_of_match,
+	},
+	.probe   = qcom_nandc_probe,
+	.remove  = qcom_nandc_remove,
+};
+module_platform_driver(qcom_nandc_driver);
+
+MODULE_AUTHOR("Archit Taneja <architt@codeaurora.org>");
+MODULE_DESCRIPTION("Qualcomm NAND Controller driver");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
index 01ac74f..9c9397b 100644
--- a/drivers/mtd/nand/s3c2410.c
+++ b/drivers/mtd/nand/s3c2410.c

@@ -861,9 +861,6 @@
 	chip->ecc.mode	    = NAND_ECC_SOFT;
 #endif
 
-	if (set->ecc_layout != NULL)
-		chip->ecc.layout = set->ecc_layout;
-
 	if (set->disable_ecc)
 		chip->ecc.mode	= NAND_ECC_NONE;
 

diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c
index 51e10a3..1c03eee 100644
--- a/drivers/mtd/nand/sunxi_nand.c
+++ b/drivers/mtd/nand/sunxi_nand.c

@@ -60,6 +60,7 @@
 #define NFC_REG_ECC_ERR_CNT(x)	((0x0040 + (x)) & ~0x3)
 #define NFC_REG_USER_DATA(x)	(0x0050 + ((x) * 4))
 #define NFC_REG_SPARE_AREA	0x00A0
+#define NFC_REG_PAT_ID		0x00A4
 #define NFC_RAM0_BASE		0x0400
 #define NFC_RAM1_BASE		0x0800
 
@@ -538,6 +539,174 @@
 	sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0);
 }
 
+/* These seed values have been extracted from Allwinner's BSP */
+static const u16 sunxi_nfc_randomizer_page_seeds[] = {
+	0x2b75, 0x0bd0, 0x5ca3, 0x62d1, 0x1c93, 0x07e9, 0x2162, 0x3a72,
+	0x0d67, 0x67f9, 0x1be7, 0x077d, 0x032f, 0x0dac, 0x2716, 0x2436,
+	0x7922, 0x1510, 0x3860, 0x5287, 0x480f, 0x4252, 0x1789, 0x5a2d,
+	0x2a49, 0x5e10, 0x437f, 0x4b4e, 0x2f45, 0x216e, 0x5cb7, 0x7130,
+	0x2a3f, 0x60e4, 0x4dc9, 0x0ef0, 0x0f52, 0x1bb9, 0x6211, 0x7a56,
+	0x226d, 0x4ea7, 0x6f36, 0x3692, 0x38bf, 0x0c62, 0x05eb, 0x4c55,
+	0x60f4, 0x728c, 0x3b6f, 0x2037, 0x7f69, 0x0936, 0x651a, 0x4ceb,
+	0x6218, 0x79f3, 0x383f, 0x18d9, 0x4f05, 0x5c82, 0x2912, 0x6f17,
+	0x6856, 0x5938, 0x1007, 0x61ab, 0x3e7f, 0x57c2, 0x542f, 0x4f62,
+	0x7454, 0x2eac, 0x7739, 0x42d4, 0x2f90, 0x435a, 0x2e52, 0x2064,
+	0x637c, 0x66ad, 0x2c90, 0x0bad, 0x759c, 0x0029, 0x0986, 0x7126,
+	0x1ca7, 0x1605, 0x386a, 0x27f5, 0x1380, 0x6d75, 0x24c3, 0x0f8e,
+	0x2b7a, 0x1418, 0x1fd1, 0x7dc1, 0x2d8e, 0x43af, 0x2267, 0x7da3,
+	0x4e3d, 0x1338, 0x50db, 0x454d, 0x764d, 0x40a3, 0x42e6, 0x262b,
+	0x2d2e, 0x1aea, 0x2e17, 0x173d, 0x3a6e, 0x71bf, 0x25f9, 0x0a5d,
+	0x7c57, 0x0fbe, 0x46ce, 0x4939, 0x6b17, 0x37bb, 0x3e91, 0x76db,
+};
+
+/*
+ * sunxi_nfc_randomizer_ecc512_seeds and sunxi_nfc_randomizer_ecc1024_seeds
+ * have been generated using
+ * sunxi_nfc_randomizer_step(seed, (step_size * 8) + 15), which is what
+ * the randomizer engine does internally before de/scrambling OOB data.
+ *
+ * Those tables are statically defined to avoid calculating randomizer state
+ * at runtime.
+ */
+static const u16 sunxi_nfc_randomizer_ecc512_seeds[] = {
+	0x3346, 0x367f, 0x1f18, 0x769a, 0x4f64, 0x068c, 0x2ef1, 0x6b64,
+	0x28a9, 0x15d7, 0x30f8, 0x3659, 0x53db, 0x7c5f, 0x71d4, 0x4409,
+	0x26eb, 0x03cc, 0x655d, 0x47d4, 0x4daa, 0x0877, 0x712d, 0x3617,
+	0x3264, 0x49aa, 0x7f9e, 0x588e, 0x4fbc, 0x7176, 0x7f91, 0x6c6d,
+	0x4b95, 0x5fb7, 0x3844, 0x4037, 0x0184, 0x081b, 0x0ee8, 0x5b91,
+	0x293d, 0x1f71, 0x0e6f, 0x402b, 0x5122, 0x1e52, 0x22be, 0x3d2d,
+	0x75bc, 0x7c60, 0x6291, 0x1a2f, 0x61d4, 0x74aa, 0x4140, 0x29ab,
+	0x472d, 0x2852, 0x017e, 0x15e8, 0x5ec2, 0x17cf, 0x7d0f, 0x06b8,
+	0x117a, 0x6b94, 0x789b, 0x3126, 0x6ac5, 0x5be7, 0x150f, 0x51f8,
+	0x7889, 0x0aa5, 0x663d, 0x77e8, 0x0b87, 0x3dcb, 0x360d, 0x218b,
+	0x512f, 0x7dc9, 0x6a4d, 0x630a, 0x3547, 0x1dd2, 0x5aea, 0x69a5,
+	0x7bfa, 0x5e4f, 0x1519, 0x6430, 0x3a0e, 0x5eb3, 0x5425, 0x0c7a,
+	0x5540, 0x3670, 0x63c1, 0x31e9, 0x5a39, 0x2de7, 0x5979, 0x2891,
+	0x1562, 0x014b, 0x5b05, 0x2756, 0x5a34, 0x13aa, 0x6cb5, 0x2c36,
+	0x5e72, 0x1306, 0x0861, 0x15ef, 0x1ee8, 0x5a37, 0x7ac4, 0x45dd,
+	0x44c4, 0x7266, 0x2f41, 0x3ccc, 0x045e, 0x7d40, 0x7c66, 0x0fa0,
+};
+
+static const u16 sunxi_nfc_randomizer_ecc1024_seeds[] = {
+	0x2cf5, 0x35f1, 0x63a4, 0x5274, 0x2bd2, 0x778b, 0x7285, 0x32b6,
+	0x6a5c, 0x70d6, 0x757d, 0x6769, 0x5375, 0x1e81, 0x0cf3, 0x3982,
+	0x6787, 0x042a, 0x6c49, 0x1925, 0x56a8, 0x40a9, 0x063e, 0x7bd9,
+	0x4dbf, 0x55ec, 0x672e, 0x7334, 0x5185, 0x4d00, 0x232a, 0x7e07,
+	0x445d, 0x6b92, 0x528f, 0x4255, 0x53ba, 0x7d82, 0x2a2e, 0x3a4e,
+	0x75eb, 0x450c, 0x6844, 0x1b5d, 0x581a, 0x4cc6, 0x0379, 0x37b2,
+	0x419f, 0x0e92, 0x6b27, 0x5624, 0x01e3, 0x07c1, 0x44a5, 0x130c,
+	0x13e8, 0x5910, 0x0876, 0x60c5, 0x54e3, 0x5b7f, 0x2269, 0x509f,
+	0x7665, 0x36fd, 0x3e9a, 0x0579, 0x6295, 0x14ef, 0x0a81, 0x1bcc,
+	0x4b16, 0x64db, 0x0514, 0x4f07, 0x0591, 0x3576, 0x6853, 0x0d9e,
+	0x259f, 0x38b7, 0x64fb, 0x3094, 0x4693, 0x6ddd, 0x29bb, 0x0bc8,
+	0x3f47, 0x490e, 0x0c0e, 0x7933, 0x3c9e, 0x5840, 0x398d, 0x3e68,
+	0x4af1, 0x71f5, 0x57cf, 0x1121, 0x64eb, 0x3579, 0x15ac, 0x584d,
+	0x5f2a, 0x47e2, 0x6528, 0x6eac, 0x196e, 0x6b96, 0x0450, 0x0179,
+	0x609c, 0x06e1, 0x4626, 0x42c7, 0x273e, 0x486f, 0x0705, 0x1601,
+	0x145b, 0x407e, 0x062b, 0x57a5, 0x53f9, 0x5659, 0x4410, 0x3ccd,
+};
+
+static u16 sunxi_nfc_randomizer_step(u16 state, int count)
+{
+	state &= 0x7fff;
+
+	/*
+	 * This loop is just a simple implementation of a Fibonacci LFSR using
+	 * the x16 + x15 + 1 polynomial.
+	 */
+	while (count--)
+		state = ((state >> 1) |
+			 (((state ^ (state >> 1)) & 1) << 14)) & 0x7fff;
+
+	return state;
+}
+
+static u16 sunxi_nfc_randomizer_state(struct mtd_info *mtd, int page, bool ecc)
+{
+	const u16 *seeds = sunxi_nfc_randomizer_page_seeds;
+	int mod = mtd_div_by_ws(mtd->erasesize, mtd);
+
+	if (mod > ARRAY_SIZE(sunxi_nfc_randomizer_page_seeds))
+		mod = ARRAY_SIZE(sunxi_nfc_randomizer_page_seeds);
+
+	if (ecc) {
+		if (mtd->ecc_step_size == 512)
+			seeds = sunxi_nfc_randomizer_ecc512_seeds;
+		else
+			seeds = sunxi_nfc_randomizer_ecc1024_seeds;
+	}
+
+	return seeds[page % mod];
+}
+
+static void sunxi_nfc_randomizer_config(struct mtd_info *mtd,
+					int page, bool ecc)
+{
+	struct nand_chip *nand = mtd_to_nand(mtd);
+	struct sunxi_nfc *nfc = to_sunxi_nfc(nand->controller);
+	u32 ecc_ctl = readl(nfc->regs + NFC_REG_ECC_CTL);
+	u16 state;
+
+	if (!(nand->options & NAND_NEED_SCRAMBLING))
+		return;
+
+	ecc_ctl = readl(nfc->regs + NFC_REG_ECC_CTL);
+	state = sunxi_nfc_randomizer_state(mtd, page, ecc);
+	ecc_ctl = readl(nfc->regs + NFC_REG_ECC_CTL) & ~NFC_RANDOM_SEED_MSK;
+	writel(ecc_ctl | NFC_RANDOM_SEED(state), nfc->regs + NFC_REG_ECC_CTL);
+}
+
+static void sunxi_nfc_randomizer_enable(struct mtd_info *mtd)
+{
+	struct nand_chip *nand = mtd_to_nand(mtd);
+	struct sunxi_nfc *nfc = to_sunxi_nfc(nand->controller);
+
+	if (!(nand->options & NAND_NEED_SCRAMBLING))
+		return;
+
+	writel(readl(nfc->regs + NFC_REG_ECC_CTL) | NFC_RANDOM_EN,
+	       nfc->regs + NFC_REG_ECC_CTL);
+}
+
+static void sunxi_nfc_randomizer_disable(struct mtd_info *mtd)
+{
+	struct nand_chip *nand = mtd_to_nand(mtd);
+	struct sunxi_nfc *nfc = to_sunxi_nfc(nand->controller);
+
+	if (!(nand->options & NAND_NEED_SCRAMBLING))
+		return;
+
+	writel(readl(nfc->regs + NFC_REG_ECC_CTL) & ~NFC_RANDOM_EN,
+	       nfc->regs + NFC_REG_ECC_CTL);
+}
+
+static void sunxi_nfc_randomize_bbm(struct mtd_info *mtd, int page, u8 *bbm)
+{
+	u16 state = sunxi_nfc_randomizer_state(mtd, page, true);
+
+	bbm[0] ^= state;
+	bbm[1] ^= sunxi_nfc_randomizer_step(state, 8);
+}
+
+static void sunxi_nfc_randomizer_write_buf(struct mtd_info *mtd,
+					   const uint8_t *buf, int len,
+					   bool ecc, int page)
+{
+	sunxi_nfc_randomizer_config(mtd, page, ecc);
+	sunxi_nfc_randomizer_enable(mtd);
+	sunxi_nfc_write_buf(mtd, buf, len);
+	sunxi_nfc_randomizer_disable(mtd);
+}
+
+static void sunxi_nfc_randomizer_read_buf(struct mtd_info *mtd, uint8_t *buf,
+					  int len, bool ecc, int page)
+{
+	sunxi_nfc_randomizer_config(mtd, page, ecc);
+	sunxi_nfc_randomizer_enable(mtd);
+	sunxi_nfc_read_buf(mtd, buf, len);
+	sunxi_nfc_randomizer_disable(mtd);
+}
+
 static void sunxi_nfc_hw_ecc_enable(struct mtd_info *mtd)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
@@ -574,18 +743,20 @@
 				       u8 *data, int data_off,
 				       u8 *oob, int oob_off,
 				       int *cur_off,
-				       unsigned int *max_bitflips)
+				       unsigned int *max_bitflips,
+				       bool bbm, int page)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct sunxi_nfc *nfc = to_sunxi_nfc(nand->controller);
 	struct nand_ecc_ctrl *ecc = &nand->ecc;
+	int raw_mode = 0;
 	u32 status;
 	int ret;
 
 	if (*cur_off != data_off)
 		nand->cmdfunc(mtd, NAND_CMD_RNDOUT, data_off, -1);
 
-	sunxi_nfc_read_buf(mtd, NULL, ecc->size);
+	sunxi_nfc_randomizer_read_buf(mtd, NULL, ecc->size, false, page);
 
 	if (data_off + ecc->size != oob_off)
 		nand->cmdfunc(mtd, NAND_CMD_RNDOUT, oob_off, -1);
@@ -594,25 +765,54 @@
 	if (ret)
 		return ret;
 
+	sunxi_nfc_randomizer_enable(mtd);
 	writel(NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD | NFC_ECC_OP,
 	       nfc->regs + NFC_REG_CMD);
 
 	ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0);
+	sunxi_nfc_randomizer_disable(mtd);
 	if (ret)
 		return ret;
 
+	*cur_off = oob_off + ecc->bytes + 4;
+
 	status = readl(nfc->regs + NFC_REG_ECC_ST);
+	if (status & NFC_ECC_PAT_FOUND(0)) {
+		u8 pattern = 0xff;
+
+		if (unlikely(!(readl(nfc->regs + NFC_REG_PAT_ID) & 0x1)))
+			pattern = 0x0;
+
+		memset(data, pattern, ecc->size);
+		memset(oob, pattern, ecc->bytes + 4);
+
+		return 1;
+	}
+
 	ret = NFC_ECC_ERR_CNT(0, readl(nfc->regs + NFC_REG_ECC_ERR_CNT(0)));
 
 	memcpy_fromio(data, nfc->regs + NFC_RAM0_BASE, ecc->size);
 
 	nand->cmdfunc(mtd, NAND_CMD_RNDOUT, oob_off, -1);
-	sunxi_nfc_read_buf(mtd, oob, ecc->bytes + 4);
+	sunxi_nfc_randomizer_read_buf(mtd, oob, ecc->bytes + 4, true, page);
 
 	if (status & NFC_ECC_ERR(0)) {
+		/*
+		 * Re-read the data with the randomizer disabled to identify
+		 * bitflips in erased pages.
+		 */
+		if (nand->options & NAND_NEED_SCRAMBLING) {
+			nand->cmdfunc(mtd, NAND_CMD_RNDOUT, data_off, -1);
+			nand->read_buf(mtd, data, ecc->size);
+			nand->cmdfunc(mtd, NAND_CMD_RNDOUT, oob_off, -1);
+			nand->read_buf(mtd, oob, ecc->bytes + 4);
+		}
+
 		ret = nand_check_erased_ecc_chunk(data,	ecc->size,
 						  oob, ecc->bytes + 4,
 						  NULL, 0, ecc->strength);
+		if (ret >= 0)
+			raw_mode = 1;
 	} else {
 		/*
 		 * The engine protects 4 bytes of OOB data per chunk.
@@ -620,6 +820,10 @@
 		 */
 		sunxi_nfc_user_data_to_buf(readl(nfc->regs + NFC_REG_USER_DATA(0)),
 					   oob);
+
+		/* De-randomize the Bad Block Marker. */
+		if (bbm && nand->options & NAND_NEED_SCRAMBLING)
+			sunxi_nfc_randomize_bbm(mtd, page, oob);
 	}
 
 	if (ret < 0) {
@@ -629,13 +833,12 @@
 		*max_bitflips = max_t(unsigned int, *max_bitflips, ret);
 	}
 
-	*cur_off = oob_off + ecc->bytes + 4;
-
-	return 0;
+	return raw_mode;
 }
 
 static void sunxi_nfc_hw_ecc_read_extra_oob(struct mtd_info *mtd,
-					    u8 *oob, int *cur_off)
+					    u8 *oob, int *cur_off,
+					    bool randomize, int page)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct nand_ecc_ctrl *ecc = &nand->ecc;
@@ -649,7 +852,11 @@
 		nand->cmdfunc(mtd, NAND_CMD_RNDOUT,
 			      offset + mtd->writesize, -1);
 
-	sunxi_nfc_read_buf(mtd, oob + offset, len);
+	if (!randomize)
+		sunxi_nfc_read_buf(mtd, oob + offset, len);
+	else
+		sunxi_nfc_randomizer_read_buf(mtd, oob + offset, len,
+					      false, page);
 
 	*cur_off = mtd->oobsize + mtd->writesize;
 }
@@ -662,7 +869,8 @@
 static int sunxi_nfc_hw_ecc_write_chunk(struct mtd_info *mtd,
 					const u8 *data, int data_off,
 					const u8 *oob, int oob_off,
-					int *cur_off)
+					int *cur_off, bool bbm,
+					int page)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct sunxi_nfc *nfc = to_sunxi_nfc(nand->controller);
@@ -672,11 +880,20 @@
 	if (data_off != *cur_off)
 		nand->cmdfunc(mtd, NAND_CMD_RNDIN, data_off, -1);
 
-	sunxi_nfc_write_buf(mtd, data, ecc->size);
+	sunxi_nfc_randomizer_write_buf(mtd, data, ecc->size, false, page);
 
 	/* Fill OOB data in */
-	writel(sunxi_nfc_buf_to_user_data(oob),
-	       nfc->regs + NFC_REG_USER_DATA(0));
+	if ((nand->options & NAND_NEED_SCRAMBLING) && bbm) {
+		u8 user_data[4];
+
+		memcpy(user_data, oob, 4);
+		sunxi_nfc_randomize_bbm(mtd, page, user_data);
+		writel(sunxi_nfc_buf_to_user_data(user_data),
+		       nfc->regs + NFC_REG_USER_DATA(0));
+	} else {
+		writel(sunxi_nfc_buf_to_user_data(oob),
+		       nfc->regs + NFC_REG_USER_DATA(0));
+	}
 
 	if (data_off + ecc->size != oob_off)
 		nand->cmdfunc(mtd, NAND_CMD_RNDIN, oob_off, -1);
@@ -685,11 +902,13 @@
 	if (ret)
 		return ret;
 
+	sunxi_nfc_randomizer_enable(mtd);
 	writel(NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD |
 	       NFC_ACCESS_DIR | NFC_ECC_OP,
 	       nfc->regs + NFC_REG_CMD);
 
 	ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0);
+	sunxi_nfc_randomizer_disable(mtd);
 	if (ret)
 		return ret;
 
@@ -699,7 +918,8 @@
 }
 
 static void sunxi_nfc_hw_ecc_write_extra_oob(struct mtd_info *mtd,
-					     u8 *oob, int *cur_off)
+					     u8 *oob, int *cur_off,
+					     int page)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct nand_ecc_ctrl *ecc = &nand->ecc;
@@ -713,7 +933,7 @@
 		nand->cmdfunc(mtd, NAND_CMD_RNDIN,
 			      offset + mtd->writesize, -1);
 
-	sunxi_nfc_write_buf(mtd, oob + offset, len);
+	sunxi_nfc_randomizer_write_buf(mtd, oob + offset, len, false, page);
 
 	*cur_off = mtd->oobsize + mtd->writesize;
 }
@@ -725,6 +945,7 @@
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
 	unsigned int max_bitflips = 0;
 	int ret, i, cur_off = 0;
+	bool raw_mode = false;
 
 	sunxi_nfc_hw_ecc_enable(mtd);
 
@@ -736,13 +957,17 @@
 
 		ret = sunxi_nfc_hw_ecc_read_chunk(mtd, data, data_off, oob,
 						  oob_off + mtd->writesize,
-						  &cur_off, &max_bitflips);
-		if (ret)
+						  &cur_off, &max_bitflips,
+						  !i, page);
+		if (ret < 0)
 			return ret;
+		else if (ret)
+			raw_mode = true;
 	}
 
 	if (oob_required)
-		sunxi_nfc_hw_ecc_read_extra_oob(mtd, chip->oob_poi, &cur_off);
+		sunxi_nfc_hw_ecc_read_extra_oob(mtd, chip->oob_poi, &cur_off,
+						!raw_mode, page);
 
 	sunxi_nfc_hw_ecc_disable(mtd);
 
@@ -767,13 +992,14 @@
 
 		ret = sunxi_nfc_hw_ecc_write_chunk(mtd, data, data_off, oob,
 						   oob_off + mtd->writesize,
-						   &cur_off);
+						   &cur_off, !i, page);
 		if (ret)
 			return ret;
 	}
 
-	if (oob_required)
-		sunxi_nfc_hw_ecc_write_extra_oob(mtd, chip->oob_poi, &cur_off);
+	if (oob_required || (chip->options & NAND_NEED_SCRAMBLING))
+		sunxi_nfc_hw_ecc_write_extra_oob(mtd, chip->oob_poi,
+						 &cur_off, page);
 
 	sunxi_nfc_hw_ecc_disable(mtd);
 
@@ -788,6 +1014,7 @@
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
 	unsigned int max_bitflips = 0;
 	int ret, i, cur_off = 0;
+	bool raw_mode = false;
 
 	sunxi_nfc_hw_ecc_enable(mtd);
 
@@ -799,13 +1026,16 @@
 
 		ret = sunxi_nfc_hw_ecc_read_chunk(mtd, data, data_off, oob,
 						  oob_off, &cur_off,
-						  &max_bitflips);
-		if (ret)
+						  &max_bitflips, !i, page);
+		if (ret < 0)
 			return ret;
+		else if (ret)
+			raw_mode = true;
 	}
 
 	if (oob_required)
-		sunxi_nfc_hw_ecc_read_extra_oob(mtd, chip->oob_poi, &cur_off);
+		sunxi_nfc_hw_ecc_read_extra_oob(mtd, chip->oob_poi, &cur_off,
+						!raw_mode, page);
 
 	sunxi_nfc_hw_ecc_disable(mtd);
 
@@ -829,13 +1059,15 @@
 		const u8 *oob = chip->oob_poi + (i * (ecc->bytes + 4));
 
 		ret = sunxi_nfc_hw_ecc_write_chunk(mtd, data, data_off,
-						   oob, oob_off, &cur_off);
+						   oob, oob_off, &cur_off,
+						   false, page);
 		if (ret)
 			return ret;
 	}
 
-	if (oob_required)
-		sunxi_nfc_hw_ecc_write_extra_oob(mtd, chip->oob_poi, &cur_off);
+	if (oob_required || (chip->options & NAND_NEED_SCRAMBLING))
+		sunxi_nfc_hw_ecc_write_extra_oob(mtd, chip->oob_poi,
+						 &cur_off, page);
 
 	sunxi_nfc_hw_ecc_disable(mtd);
 
@@ -1345,6 +1577,9 @@
 	if (nand->bbt_options & NAND_BBT_USE_FLASH)
 		nand->bbt_options |= NAND_BBT_NO_OOB;
 
+	if (nand->options & NAND_NEED_SCRAMBLING)
+		nand->options |= NAND_NO_SUBPAGE_WRITE;
+
 	ret = sunxi_nand_chip_init_timings(chip, np);
 	if (ret) {
 		dev_err(dev, "could not configure chip timings: %d\n", ret);

diff --git a/drivers/mtd/nand/vf610_nfc.c b/drivers/mtd/nand/vf610_nfc.c
index 034420f..293feb1 100644
--- a/drivers/mtd/nand/vf610_nfc.c
+++ b/drivers/mtd/nand/vf610_nfc.c

@@ -795,8 +795,6 @@
 			goto error;
 		}
 
-		/* propagate ecc.layout to mtd_info */
-		mtd->ecclayout = chip->ecc.layout;
 		chip->ecc.read_page = vf610_nfc_read_page;
 		chip->ecc.write_page = vf610_nfc_write_page;
 

diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 43b3392..af28bb3 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c

@@ -1124,11 +1124,7 @@
 	pr_debug("%s: from = 0x%08x, len = %i\n", __func__, (unsigned int)from,
 			(int)len);
 
-	if (ops->mode == MTD_OPS_AUTO_OOB)
-		oobsize = this->ecclayout->oobavail;
-	else
-		oobsize = mtd->oobsize;
-
+	oobsize = mtd_oobavail(mtd, ops);
 	oobcolumn = from & (mtd->oobsize - 1);
 
 	/* Do not allow reads past end of device */
@@ -1229,11 +1225,7 @@
 	pr_debug("%s: from = 0x%08x, len = %i\n", __func__, (unsigned int)from,
 			(int)len);
 
-	if (ops->mode == MTD_OPS_AUTO_OOB)
-		oobsize = this->ecclayout->oobavail;
-	else
-		oobsize = mtd->oobsize;
-
+	oobsize = mtd_oobavail(mtd, ops);
 	oobcolumn = from & (mtd->oobsize - 1);
 
 	/* Do not allow reads past end of device */
@@ -1365,7 +1357,7 @@
 	ops->oobretlen = 0;
 
 	if (mode == MTD_OPS_AUTO_OOB)
-		oobsize = this->ecclayout->oobavail;
+		oobsize = mtd->oobavail;
 	else
 		oobsize = mtd->oobsize;
 
@@ -1885,12 +1877,7 @@
 	/* Check zero length */
 	if (!len)
 		return 0;
-
-	if (ops->mode == MTD_OPS_AUTO_OOB)
-		oobsize = this->ecclayout->oobavail;
-	else
-		oobsize = mtd->oobsize;
-
+	oobsize = mtd_oobavail(mtd, ops);
 	oobcolumn = to & (mtd->oobsize - 1);
 
 	column = to & (mtd->writesize - 1);
@@ -2063,7 +2050,7 @@
 	ops->oobretlen = 0;
 
 	if (mode == MTD_OPS_AUTO_OOB)
-		oobsize = this->ecclayout->oobavail;
+		oobsize = mtd->oobavail;
 	else
 		oobsize = mtd->oobsize;
 
@@ -2599,6 +2586,7 @@
  */
 static int onenand_block_markbad(struct mtd_info *mtd, loff_t ofs)
 {
+	struct onenand_chip *this = mtd->priv;
 	int ret;
 
 	ret = onenand_block_isbad(mtd, ofs);
@@ -2610,7 +2598,7 @@
 	}
 
 	onenand_get_device(mtd, FL_WRITING);
-	ret = mtd_block_markbad(mtd, ofs);
+	ret = this->block_markbad(mtd, ofs);
 	onenand_release_device(mtd);
 	return ret;
 }
@@ -4049,12 +4037,10 @@
 	 * The number of bytes available for a client to place data into
 	 * the out of band area
 	 */
-	this->ecclayout->oobavail = 0;
+	mtd->oobavail = 0;
 	for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES &&
 	    this->ecclayout->oobfree[i].length; i++)
-		this->ecclayout->oobavail +=
-			this->ecclayout->oobfree[i].length;
-	mtd->oobavail = this->ecclayout->oobavail;
+		mtd->oobavail += this->ecclayout->oobfree[i].length;
 
 	mtd->ecclayout = this->ecclayout;
 	mtd->ecc_strength = 1;

diff --git a/drivers/mtd/onenand/onenand_bbt.c b/drivers/mtd/onenand/onenand_bbt.c
index 08d0085..680188a 100644
--- a/drivers/mtd/onenand/onenand_bbt.c
+++ b/drivers/mtd/onenand/onenand_bbt.c

@@ -179,7 +179,7 @@
  * by the onenand_release function.
  *
  */
-int onenand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd)
+static int onenand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd)
 {
 	struct onenand_chip *this = mtd->priv;
 	struct bbm_info *bbm = this->bbm;
@@ -247,6 +247,3 @@
 
 	return onenand_scan_bbt(mtd, bbm->badblock_pattern);
 }
-
-EXPORT_SYMBOL(onenand_scan_bbt);
-EXPORT_SYMBOL(onenand_default_bbt);

diff --git a/drivers/mtd/spi-nor/Kconfig b/drivers/mtd/spi-nor/Kconfig
index 0dc9275..d42c98e 100644
--- a/drivers/mtd/spi-nor/Kconfig
+++ b/drivers/mtd/spi-nor/Kconfig

@@ -9,6 +9,7 @@
 
 config MTD_MT81xx_NOR
 	tristate "Mediatek MT81xx SPI NOR flash controller"
+	depends on HAS_IOMEM
 	help
 	  This enables access to SPI NOR flash, using MT81xx SPI NOR flash
 	  controller. This controller does not support generic SPI BUS, it only
@@ -30,7 +31,7 @@
 
 config SPI_FSL_QUADSPI
 	tristate "Freescale Quad SPI controller"
-	depends on ARCH_MXC || COMPILE_TEST
+	depends on ARCH_MXC || SOC_LS1021A || ARCH_LAYERSCAPE || COMPILE_TEST
 	depends on HAS_IOMEM
 	help
 	  This enables support for the Quad SPI controller in master mode.

diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
index 54640f1..9ab2b51 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c

@@ -213,6 +213,7 @@
 	FSL_QUADSPI_IMX6SX,
 	FSL_QUADSPI_IMX7D,
 	FSL_QUADSPI_IMX6UL,
+	FSL_QUADSPI_LS1021A,
 };
 
 struct fsl_qspi_devtype_data {
@@ -258,6 +259,14 @@
 		       | QUADSPI_QUIRK_4X_INT_CLK,
 };
 
+static struct fsl_qspi_devtype_data ls1021a_data = {
+	.devtype = FSL_QUADSPI_LS1021A,
+	.rxfifo = 128,
+	.txfifo = 64,
+	.ahb_buf_size = 1024,
+	.driver_data = 0,
+};
+
 #define FSL_QSPI_MAX_CHIP	4
 struct fsl_qspi {
 	struct spi_nor nor[FSL_QSPI_MAX_CHIP];
@@ -275,6 +284,7 @@
 	u32 clk_rate;
 	unsigned int chip_base_addr; /* We may support two chips. */
 	bool has_second_chip;
+	bool big_endian;
 	struct mutex lock;
 	struct pm_qos_request pm_qos_req;
 };
@@ -300,6 +310,28 @@
 }
 
 /*
+ * R/W functions for big- or little-endian registers:
+ * The qSPI controller's endian is independent of the CPU core's endian.
+ * So far, although the CPU core is little-endian but the qSPI have two
+ * versions for big-endian and little-endian.
+ */
+static void qspi_writel(struct fsl_qspi *q, u32 val, void __iomem *addr)
+{
+	if (q->big_endian)
+		iowrite32be(val, addr);
+	else
+		iowrite32(val, addr);
+}
+
+static u32 qspi_readl(struct fsl_qspi *q, void __iomem *addr)
+{
+	if (q->big_endian)
+		return ioread32be(addr);
+	else
+		return ioread32(addr);
+}
+
+/*
  * An IC bug makes us to re-arrange the 32-bit data.
  * The following chips, such as IMX6SLX, have fixed this bug.
  */
@@ -310,14 +342,14 @@
 
 static inline void fsl_qspi_unlock_lut(struct fsl_qspi *q)
 {
-	writel(QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
-	writel(QUADSPI_LCKER_UNLOCK, q->iobase + QUADSPI_LCKCR);
+	qspi_writel(q, QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
+	qspi_writel(q, QUADSPI_LCKER_UNLOCK, q->iobase + QUADSPI_LCKCR);
 }
 
 static inline void fsl_qspi_lock_lut(struct fsl_qspi *q)
 {
-	writel(QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
-	writel(QUADSPI_LCKER_LOCK, q->iobase + QUADSPI_LCKCR);
+	qspi_writel(q, QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
+	qspi_writel(q, QUADSPI_LCKER_LOCK, q->iobase + QUADSPI_LCKCR);
 }
 
 static irqreturn_t fsl_qspi_irq_handler(int irq, void *dev_id)
@@ -326,8 +358,8 @@
 	u32 reg;
 
 	/* clear interrupt */
-	reg = readl(q->iobase + QUADSPI_FR);
-	writel(reg, q->iobase + QUADSPI_FR);
+	reg = qspi_readl(q, q->iobase + QUADSPI_FR);
+	qspi_writel(q, reg, q->iobase + QUADSPI_FR);
 
 	if (reg & QUADSPI_FR_TFF_MASK)
 		complete(&q->c);
@@ -348,7 +380,7 @@
 
 	/* Clear all the LUT table */
 	for (i = 0; i < QUADSPI_LUT_NUM; i++)
-		writel(0, base + QUADSPI_LUT_BASE + i * 4);
+		qspi_writel(q, 0, base + QUADSPI_LUT_BASE + i * 4);
 
 	/* Quad Read */
 	lut_base = SEQID_QUAD_READ * 4;
@@ -364,14 +396,15 @@
 		dummy = 8;
 	}
 
-	writel(LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
+	qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
 			base + QUADSPI_LUT(lut_base));
-	writel(LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD4, rxfifo),
+	qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD4, rxfifo),
 			base + QUADSPI_LUT(lut_base + 1));
 
 	/* Write enable */
 	lut_base = SEQID_WREN * 4;
-	writel(LUT0(CMD, PAD1, SPINOR_OP_WREN), base + QUADSPI_LUT(lut_base));
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_WREN),
+			base + QUADSPI_LUT(lut_base));
 
 	/* Page Program */
 	lut_base = SEQID_PP * 4;
@@ -385,13 +418,15 @@
 		addrlen = ADDR32BIT;
 	}
 
-	writel(LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
+	qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
 			base + QUADSPI_LUT(lut_base));
-	writel(LUT0(FSL_WRITE, PAD1, 0), base + QUADSPI_LUT(lut_base + 1));
+	qspi_writel(q, LUT0(FSL_WRITE, PAD1, 0),
+			base + QUADSPI_LUT(lut_base + 1));
 
 	/* Read Status */
 	lut_base = SEQID_RDSR * 4;
-	writel(LUT0(CMD, PAD1, SPINOR_OP_RDSR) | LUT1(FSL_READ, PAD1, 0x1),
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_RDSR) |
+			LUT1(FSL_READ, PAD1, 0x1),
 			base + QUADSPI_LUT(lut_base));
 
 	/* Erase a sector */
@@ -400,40 +435,46 @@
 	cmd = q->nor[0].erase_opcode;
 	addrlen = q->nor_size <= SZ_16M ? ADDR24BIT : ADDR32BIT;
 
-	writel(LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
+	qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
 			base + QUADSPI_LUT(lut_base));
 
 	/* Erase the whole chip */
 	lut_base = SEQID_CHIP_ERASE * 4;
-	writel(LUT0(CMD, PAD1, SPINOR_OP_CHIP_ERASE),
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_CHIP_ERASE),
 			base + QUADSPI_LUT(lut_base));
 
 	/* READ ID */
 	lut_base = SEQID_RDID * 4;
-	writel(LUT0(CMD, PAD1, SPINOR_OP_RDID) | LUT1(FSL_READ, PAD1, 0x8),
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_RDID) |
+			LUT1(FSL_READ, PAD1, 0x8),
 			base + QUADSPI_LUT(lut_base));
 
 	/* Write Register */
 	lut_base = SEQID_WRSR * 4;
-	writel(LUT0(CMD, PAD1, SPINOR_OP_WRSR) | LUT1(FSL_WRITE, PAD1, 0x2),
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_WRSR) |
+			LUT1(FSL_WRITE, PAD1, 0x2),
 			base + QUADSPI_LUT(lut_base));
 
 	/* Read Configuration Register */
 	lut_base = SEQID_RDCR * 4;
-	writel(LUT0(CMD, PAD1, SPINOR_OP_RDCR) | LUT1(FSL_READ, PAD1, 0x1),
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_RDCR) |
+			LUT1(FSL_READ, PAD1, 0x1),
 			base + QUADSPI_LUT(lut_base));
 
 	/* Write disable */
 	lut_base = SEQID_WRDI * 4;
-	writel(LUT0(CMD, PAD1, SPINOR_OP_WRDI), base + QUADSPI_LUT(lut_base));
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_WRDI),
+			base + QUADSPI_LUT(lut_base));
 
 	/* Enter 4 Byte Mode (Micron) */
 	lut_base = SEQID_EN4B * 4;
-	writel(LUT0(CMD, PAD1, SPINOR_OP_EN4B), base + QUADSPI_LUT(lut_base));
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_EN4B),
+			base + QUADSPI_LUT(lut_base));
 
 	/* Enter 4 Byte Mode (Spansion) */
 	lut_base = SEQID_BRWR * 4;
-	writel(LUT0(CMD, PAD1, SPINOR_OP_BRWR), base + QUADSPI_LUT(lut_base));
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_BRWR),
+			base + QUADSPI_LUT(lut_base));
 
 	fsl_qspi_lock_lut(q);
 }
@@ -488,15 +529,16 @@
 			q->chip_base_addr, addr, len, cmd);
 
 	/* save the reg */
-	reg = readl(base + QUADSPI_MCR);
+	reg = qspi_readl(q, base + QUADSPI_MCR);
 
-	writel(q->memmap_phy + q->chip_base_addr + addr, base + QUADSPI_SFAR);
-	writel(QUADSPI_RBCT_WMRK_MASK | QUADSPI_RBCT_RXBRD_USEIPS,
+	qspi_writel(q, q->memmap_phy + q->chip_base_addr + addr,
+			base + QUADSPI_SFAR);
+	qspi_writel(q, QUADSPI_RBCT_WMRK_MASK | QUADSPI_RBCT_RXBRD_USEIPS,
 			base + QUADSPI_RBCT);
-	writel(reg | QUADSPI_MCR_CLR_RXF_MASK, base + QUADSPI_MCR);
+	qspi_writel(q, reg | QUADSPI_MCR_CLR_RXF_MASK, base + QUADSPI_MCR);
 
 	do {
-		reg2 = readl(base + QUADSPI_SR);
+		reg2 = qspi_readl(q, base + QUADSPI_SR);
 		if (reg2 & (QUADSPI_SR_IP_ACC_MASK | QUADSPI_SR_AHB_ACC_MASK)) {
 			udelay(1);
 			dev_dbg(q->dev, "The controller is busy, 0x%x\n", reg2);
@@ -507,21 +549,22 @@
 
 	/* trigger the LUT now */
 	seqid = fsl_qspi_get_seqid(q, cmd);
-	writel((seqid << QUADSPI_IPCR_SEQID_SHIFT) | len, base + QUADSPI_IPCR);
+	qspi_writel(q, (seqid << QUADSPI_IPCR_SEQID_SHIFT) | len,
+			base + QUADSPI_IPCR);
 
 	/* Wait for the interrupt. */
 	if (!wait_for_completion_timeout(&q->c, msecs_to_jiffies(1000))) {
 		dev_err(q->dev,
 			"cmd 0x%.2x timeout, addr@%.8x, FR:0x%.8x, SR:0x%.8x\n",
-			cmd, addr, readl(base + QUADSPI_FR),
-			readl(base + QUADSPI_SR));
+			cmd, addr, qspi_readl(q, base + QUADSPI_FR),
+			qspi_readl(q, base + QUADSPI_SR));
 		err = -ETIMEDOUT;
 	} else {
 		err = 0;
 	}
 
 	/* restore the MCR */
-	writel(reg, base + QUADSPI_MCR);
+	qspi_writel(q, reg, base + QUADSPI_MCR);
 
 	return err;
 }
@@ -533,7 +576,7 @@
 	int i = 0;
 
 	while (len > 0) {
-		tmp = readl(q->iobase + QUADSPI_RBDR + i * 4);
+		tmp = qspi_readl(q, q->iobase + QUADSPI_RBDR + i * 4);
 		tmp = fsl_qspi_endian_xchg(q, tmp);
 		dev_dbg(q->dev, "chip addr:0x%.8x, rcv:0x%.8x\n",
 				q->chip_base_addr, tmp);
@@ -561,9 +604,9 @@
 {
 	u32 reg;
 
-	reg = readl(q->iobase + QUADSPI_MCR);
+	reg = qspi_readl(q, q->iobase + QUADSPI_MCR);
 	reg |= QUADSPI_MCR_SWRSTHD_MASK | QUADSPI_MCR_SWRSTSD_MASK;
-	writel(reg, q->iobase + QUADSPI_MCR);
+	qspi_writel(q, reg, q->iobase + QUADSPI_MCR);
 
 	/*
 	 * The minimum delay : 1 AHB + 2 SFCK clocks.
@@ -572,7 +615,7 @@
 	udelay(1);
 
 	reg &= ~(QUADSPI_MCR_SWRSTHD_MASK | QUADSPI_MCR_SWRSTSD_MASK);
-	writel(reg, q->iobase + QUADSPI_MCR);
+	qspi_writel(q, reg, q->iobase + QUADSPI_MCR);
 }
 
 static int fsl_qspi_nor_write(struct fsl_qspi *q, struct spi_nor *nor,
@@ -586,20 +629,20 @@
 		q->chip_base_addr, to, count);
 
 	/* clear the TX FIFO. */
-	tmp = readl(q->iobase + QUADSPI_MCR);
-	writel(tmp | QUADSPI_MCR_CLR_TXF_MASK, q->iobase + QUADSPI_MCR);
+	tmp = qspi_readl(q, q->iobase + QUADSPI_MCR);
+	qspi_writel(q, tmp | QUADSPI_MCR_CLR_TXF_MASK, q->iobase + QUADSPI_MCR);
 
 	/* fill the TX data to the FIFO */
 	for (j = 0, i = ((count + 3) / 4); j < i; j++) {
 		tmp = fsl_qspi_endian_xchg(q, *txbuf);
-		writel(tmp, q->iobase + QUADSPI_TBDR);
+		qspi_writel(q, tmp, q->iobase + QUADSPI_TBDR);
 		txbuf++;
 	}
 
 	/* fill the TXFIFO upto 16 bytes for i.MX7d */
 	if (needs_fill_txfifo(q))
 		for (; i < 4; i++)
-			writel(tmp, q->iobase + QUADSPI_TBDR);
+			qspi_writel(q, tmp, q->iobase + QUADSPI_TBDR);
 
 	/* Trigger it */
 	ret = fsl_qspi_runcmd(q, opcode, to, count);
@@ -615,10 +658,10 @@
 	int nor_size = q->nor_size;
 	void __iomem *base = q->iobase;
 
-	writel(nor_size + q->memmap_phy, base + QUADSPI_SFA1AD);
-	writel(nor_size * 2 + q->memmap_phy, base + QUADSPI_SFA2AD);
-	writel(nor_size * 3 + q->memmap_phy, base + QUADSPI_SFB1AD);
-	writel(nor_size * 4 + q->memmap_phy, base + QUADSPI_SFB2AD);
+	qspi_writel(q, nor_size + q->memmap_phy, base + QUADSPI_SFA1AD);
+	qspi_writel(q, nor_size * 2 + q->memmap_phy, base + QUADSPI_SFA2AD);
+	qspi_writel(q, nor_size * 3 + q->memmap_phy, base + QUADSPI_SFB1AD);
+	qspi_writel(q, nor_size * 4 + q->memmap_phy, base + QUADSPI_SFB2AD);
 }
 
 /*
@@ -640,24 +683,26 @@
 	int seqid;
 
 	/* AHB configuration for access buffer 0/1/2 .*/
-	writel(QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF0CR);
-	writel(QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF1CR);
-	writel(QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF2CR);
+	qspi_writel(q, QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF0CR);
+	qspi_writel(q, QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF1CR);
+	qspi_writel(q, QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF2CR);
 	/*
 	 * Set ADATSZ with the maximum AHB buffer size to improve the
 	 * read performance.
 	 */
-	writel(QUADSPI_BUF3CR_ALLMST_MASK | ((q->devtype_data->ahb_buf_size / 8)
-			<< QUADSPI_BUF3CR_ADATSZ_SHIFT), base + QUADSPI_BUF3CR);
+	qspi_writel(q, QUADSPI_BUF3CR_ALLMST_MASK |
+			((q->devtype_data->ahb_buf_size / 8)
+			<< QUADSPI_BUF3CR_ADATSZ_SHIFT),
+			base + QUADSPI_BUF3CR);
 
 	/* We only use the buffer3 */
-	writel(0, base + QUADSPI_BUF0IND);
-	writel(0, base + QUADSPI_BUF1IND);
-	writel(0, base + QUADSPI_BUF2IND);
+	qspi_writel(q, 0, base + QUADSPI_BUF0IND);
+	qspi_writel(q, 0, base + QUADSPI_BUF1IND);
+	qspi_writel(q, 0, base + QUADSPI_BUF2IND);
 
 	/* Set the default lut sequence for AHB Read. */
 	seqid = fsl_qspi_get_seqid(q, q->nor[0].read_opcode);
-	writel(seqid << QUADSPI_BFGENCR_SEQID_SHIFT,
+	qspi_writel(q, seqid << QUADSPI_BFGENCR_SEQID_SHIFT,
 		q->iobase + QUADSPI_BFGENCR);
 }
 
@@ -713,7 +758,7 @@
 		return ret;
 
 	/* Reset the module */
-	writel(QUADSPI_MCR_SWRSTSD_MASK | QUADSPI_MCR_SWRSTHD_MASK,
+	qspi_writel(q, QUADSPI_MCR_SWRSTSD_MASK | QUADSPI_MCR_SWRSTHD_MASK,
 		base + QUADSPI_MCR);
 	udelay(1);
 
@@ -721,24 +766,24 @@
 	fsl_qspi_init_lut(q);
 
 	/* Disable the module */
-	writel(QUADSPI_MCR_MDIS_MASK | QUADSPI_MCR_RESERVED_MASK,
+	qspi_writel(q, QUADSPI_MCR_MDIS_MASK | QUADSPI_MCR_RESERVED_MASK,
 			base + QUADSPI_MCR);
 
-	reg = readl(base + QUADSPI_SMPR);
-	writel(reg & ~(QUADSPI_SMPR_FSDLY_MASK
+	reg = qspi_readl(q, base + QUADSPI_SMPR);
+	qspi_writel(q, reg & ~(QUADSPI_SMPR_FSDLY_MASK
 			| QUADSPI_SMPR_FSPHS_MASK
 			| QUADSPI_SMPR_HSENA_MASK
 			| QUADSPI_SMPR_DDRSMP_MASK), base + QUADSPI_SMPR);
 
 	/* Enable the module */
-	writel(QUADSPI_MCR_RESERVED_MASK | QUADSPI_MCR_END_CFG_MASK,
+	qspi_writel(q, QUADSPI_MCR_RESERVED_MASK | QUADSPI_MCR_END_CFG_MASK,
 			base + QUADSPI_MCR);
 
 	/* clear all interrupt status */
-	writel(0xffffffff, q->iobase + QUADSPI_FR);
+	qspi_writel(q, 0xffffffff, q->iobase + QUADSPI_FR);
 
 	/* enable the interrupt */
-	writel(QUADSPI_RSER_TFIE, q->iobase + QUADSPI_RSER);
+	qspi_writel(q, QUADSPI_RSER_TFIE, q->iobase + QUADSPI_RSER);
 
 	return 0;
 }
@@ -776,6 +821,7 @@
 	{ .compatible = "fsl,imx6sx-qspi", .data = (void *)&imx6sx_data, },
 	{ .compatible = "fsl,imx7d-qspi", .data = (void *)&imx7d_data, },
 	{ .compatible = "fsl,imx6ul-qspi", .data = (void *)&imx6ul_data, },
+	{ .compatible = "fsl,ls1021a-qspi", .data = (void *)&ls1021a_data, },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, fsl_qspi_dt_ids);
@@ -954,6 +1000,7 @@
 	if (IS_ERR(q->iobase))
 		return PTR_ERR(q->iobase);
 
+	q->big_endian = of_property_read_bool(np, "big-endian");
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
 					"QuadSPI-memory");
 	if (!devm_request_mem_region(dev, res->start, resource_size(res),
@@ -1101,8 +1148,8 @@
 	}
 
 	/* disable the hardware */
-	writel(QUADSPI_MCR_MDIS_MASK, q->iobase + QUADSPI_MCR);
-	writel(0x0, q->iobase + QUADSPI_RSER);
+	qspi_writel(q, QUADSPI_MCR_MDIS_MASK, q->iobase + QUADSPI_MCR);
+	qspi_writel(q, 0x0, q->iobase + QUADSPI_RSER);
 
 	mutex_destroy(&q->lock);
 

diff --git a/drivers/mtd/spi-nor/mtk-quadspi.c b/drivers/mtd/spi-nor/mtk-quadspi.c
index d5f850d..8bed1a4c 100644
--- a/drivers/mtd/spi-nor/mtk-quadspi.c
+++ b/drivers/mtd/spi-nor/mtk-quadspi.c

@@ -371,8 +371,8 @@
 	return ret;
 }
 
-static int __init mtk_nor_init(struct mt8173_nor *mt8173_nor,
-			       struct device_node *flash_node)
+static int mtk_nor_init(struct mt8173_nor *mt8173_nor,
+			struct device_node *flash_node)
 {
 	int ret;
 	struct spi_nor *nor;

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index ed0c19c..157841d 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c

@@ -61,14 +61,20 @@
 	u16		addr_width;
 
 	u16		flags;
-#define	SECT_4K			0x01	/* SPINOR_OP_BE_4K works uniformly */
-#define	SPI_NOR_NO_ERASE	0x02	/* No erase command needed */
-#define	SST_WRITE		0x04	/* use SST byte programming */
-#define	SPI_NOR_NO_FR		0x08	/* Can't do fastread */
-#define	SECT_4K_PMC		0x10	/* SPINOR_OP_BE_4K_PMC works uniformly */
-#define	SPI_NOR_DUAL_READ	0x20    /* Flash supports Dual Read */
-#define	SPI_NOR_QUAD_READ	0x40    /* Flash supports Quad Read */
-#define	USE_FSR			0x80	/* use flag status register */
+#define SECT_4K			BIT(0)	/* SPINOR_OP_BE_4K works uniformly */
+#define SPI_NOR_NO_ERASE	BIT(1)	/* No erase command needed */
+#define SST_WRITE		BIT(2)	/* use SST byte programming */
+#define SPI_NOR_NO_FR		BIT(3)	/* Can't do fastread */
+#define SECT_4K_PMC		BIT(4)	/* SPINOR_OP_BE_4K_PMC works uniformly */
+#define SPI_NOR_DUAL_READ	BIT(5)	/* Flash supports Dual Read */
+#define SPI_NOR_QUAD_READ	BIT(6)	/* Flash supports Quad Read */
+#define USE_FSR			BIT(7)	/* use flag status register */
+#define SPI_NOR_HAS_LOCK	BIT(8)	/* Flash supports lock/unlock via SR */
+#define SPI_NOR_HAS_TB		BIT(9)	/*
+					 * Flash SR has Top/Bottom (TB) protect
+					 * bit. Must be used with
+					 * SPI_NOR_HAS_LOCK.
+					 */
 };
 
 #define JEDEC_MFR(info)	((info)->id[0])
@@ -434,32 +440,58 @@
 	} else {
 		pow = ((sr & mask) ^ mask) >> shift;
 		*len = mtd->size >> pow;
-		*ofs = mtd->size - *len;
+		if (nor->flags & SNOR_F_HAS_SR_TB && sr & SR_TB)
+			*ofs = 0;
+		else
+			*ofs = mtd->size - *len;
 	}
 }
 
 /*
- * Return 1 if the entire region is locked, 0 otherwise
+ * Return 1 if the entire region is locked (if @locked is true) or unlocked (if
+ * @locked is false); 0 otherwise
  */
-static int stm_is_locked_sr(struct spi_nor *nor, loff_t ofs, uint64_t len,
-			    u8 sr)
+static int stm_check_lock_status_sr(struct spi_nor *nor, loff_t ofs, uint64_t len,
+				    u8 sr, bool locked)
 {
 	loff_t lock_offs;
 	uint64_t lock_len;
 
+	if (!len)
+		return 1;
+
 	stm_get_locked_range(nor, sr, &lock_offs, &lock_len);
 
-	return (ofs + len <= lock_offs + lock_len) && (ofs >= lock_offs);
+	if (locked)
+		/* Requested range is a sub-range of locked range */
+		return (ofs + len <= lock_offs + lock_len) && (ofs >= lock_offs);
+	else
+		/* Requested range does not overlap with locked range */
+		return (ofs >= lock_offs + lock_len) || (ofs + len <= lock_offs);
+}
+
+static int stm_is_locked_sr(struct spi_nor *nor, loff_t ofs, uint64_t len,
+			    u8 sr)
+{
+	return stm_check_lock_status_sr(nor, ofs, len, sr, true);
+}
+
+static int stm_is_unlocked_sr(struct spi_nor *nor, loff_t ofs, uint64_t len,
+			      u8 sr)
+{
+	return stm_check_lock_status_sr(nor, ofs, len, sr, false);
 }
 
 /*
  * Lock a region of the flash. Compatible with ST Micro and similar flash.
- * Supports only the block protection bits BP{0,1,2} in the status register
+ * Supports the block protection bits BP{0,1,2} in the status register
  * (SR). Does not support these features found in newer SR bitfields:
- *   - TB: top/bottom protect - only handle TB=0 (top protect)
  *   - SEC: sector/block protect - only handle SEC=0 (block protect)
  *   - CMP: complement protect - only support CMP=0 (range is not complemented)
  *
+ * Support for the following is provided conditionally for some flash:
+ *   - TB: top/bottom protect
+ *
  * Sample table portion for 8MB flash (Winbond w25q64fw):
  *
  *   SEC  |  TB   |  BP2  |  BP1  |  BP0  |  Prot Length  | Protected Portion
@@ -472,6 +504,13 @@
  *    0   |   0   |   1   |   0   |   1   |  2 MB         | Upper 1/4
  *    0   |   0   |   1   |   1   |   0   |  4 MB         | Upper 1/2
  *    X   |   X   |   1   |   1   |   1   |  8 MB         | ALL
+ *  ------|-------|-------|-------|-------|---------------|-------------------
+ *    0   |   1   |   0   |   0   |   1   |  128 KB       | Lower 1/64
+ *    0   |   1   |   0   |   1   |   0   |  256 KB       | Lower 1/32
+ *    0   |   1   |   0   |   1   |   1   |  512 KB       | Lower 1/16
+ *    0   |   1   |   1   |   0   |   0   |  1 MB         | Lower 1/8
+ *    0   |   1   |   1   |   0   |   1   |  2 MB         | Lower 1/4
+ *    0   |   1   |   1   |   1   |   0   |  4 MB         | Lower 1/2
  *
  * Returns negative on errors, 0 on success.
  */
@@ -481,20 +520,39 @@
 	int status_old, status_new;
 	u8 mask = SR_BP2 | SR_BP1 | SR_BP0;
 	u8 shift = ffs(mask) - 1, pow, val;
+	loff_t lock_len;
+	bool can_be_top = true, can_be_bottom = nor->flags & SNOR_F_HAS_SR_TB;
+	bool use_top;
 	int ret;
 
 	status_old = read_sr(nor);
 	if (status_old < 0)
 		return status_old;
 
-	/* SPI NOR always locks to the end */
-	if (ofs + len != mtd->size) {
-		/* Does combined region extend to end? */
-		if (!stm_is_locked_sr(nor, ofs + len, mtd->size - ofs - len,
-				      status_old))
-			return -EINVAL;
-		len = mtd->size - ofs;
-	}
+	/* If nothing in our range is unlocked, we don't need to do anything */
+	if (stm_is_locked_sr(nor, ofs, len, status_old))
+		return 0;
+
+	/* If anything below us is unlocked, we can't use 'bottom' protection */
+	if (!stm_is_locked_sr(nor, 0, ofs, status_old))
+		can_be_bottom = false;
+
+	/* If anything above us is unlocked, we can't use 'top' protection */
+	if (!stm_is_locked_sr(nor, ofs + len, mtd->size - (ofs + len),
+				status_old))
+		can_be_top = false;
+
+	if (!can_be_bottom && !can_be_top)
+		return -EINVAL;
+
+	/* Prefer top, if both are valid */
+	use_top = can_be_top;
+
+	/* lock_len: length of region that should end up locked */
+	if (use_top)
+		lock_len = mtd->size - ofs;
+	else
+		lock_len = ofs + len;
 
 	/*
 	 * Need smallest pow such that:
@@ -505,7 +563,7 @@
 	 *
 	 *   pow = ceil(log2(size / len)) = log2(size) - floor(log2(len))
 	 */
-	pow = ilog2(mtd->size) - ilog2(len);
+	pow = ilog2(mtd->size) - ilog2(lock_len);
 	val = mask - (pow << shift);
 	if (val & ~mask)
 		return -EINVAL;
@@ -513,10 +571,20 @@
 	if (!(val & mask))
 		return -EINVAL;
 
-	status_new = (status_old & ~mask) | val;
+	status_new = (status_old & ~mask & ~SR_TB) | val;
+
+	/* Disallow further writes if WP pin is asserted */
+	status_new |= SR_SRWD;
+
+	if (!use_top)
+		status_new |= SR_TB;
+
+	/* Don't bother if they're the same */
+	if (status_new == status_old)
+		return 0;
 
 	/* Only modify protection if it will not unlock other areas */
-	if ((status_new & mask) <= (status_old & mask))
+	if ((status_new & mask) < (status_old & mask))
 		return -EINVAL;
 
 	write_enable(nor);
@@ -537,17 +605,40 @@
 	int status_old, status_new;
 	u8 mask = SR_BP2 | SR_BP1 | SR_BP0;
 	u8 shift = ffs(mask) - 1, pow, val;
+	loff_t lock_len;
+	bool can_be_top = true, can_be_bottom = nor->flags & SNOR_F_HAS_SR_TB;
+	bool use_top;
 	int ret;
 
 	status_old = read_sr(nor);
 	if (status_old < 0)
 		return status_old;
 
-	/* Cannot unlock; would unlock larger region than requested */
-	if (stm_is_locked_sr(nor, ofs - mtd->erasesize, mtd->erasesize,
-			     status_old))
+	/* If nothing in our range is locked, we don't need to do anything */
+	if (stm_is_unlocked_sr(nor, ofs, len, status_old))
+		return 0;
+
+	/* If anything below us is locked, we can't use 'top' protection */
+	if (!stm_is_unlocked_sr(nor, 0, ofs, status_old))
+		can_be_top = false;
+
+	/* If anything above us is locked, we can't use 'bottom' protection */
+	if (!stm_is_unlocked_sr(nor, ofs + len, mtd->size - (ofs + len),
+				status_old))
+		can_be_bottom = false;
+
+	if (!can_be_bottom && !can_be_top)
 		return -EINVAL;
 
+	/* Prefer top, if both are valid */
+	use_top = can_be_top;
+
+	/* lock_len: length of region that should remain locked */
+	if (use_top)
+		lock_len = mtd->size - (ofs + len);
+	else
+		lock_len = ofs;
+
 	/*
 	 * Need largest pow such that:
 	 *
@@ -557,8 +648,8 @@
 	 *
 	 *   pow = floor(log2(size / len)) = log2(size) - ceil(log2(len))
 	 */
-	pow = ilog2(mtd->size) - order_base_2(mtd->size - (ofs + len));
-	if (ofs + len == mtd->size) {
+	pow = ilog2(mtd->size) - order_base_2(lock_len);
+	if (lock_len == 0) {
 		val = 0; /* fully unlocked */
 	} else {
 		val = mask - (pow << shift);
@@ -567,10 +658,21 @@
 			return -EINVAL;
 	}
 
-	status_new = (status_old & ~mask) | val;
+	status_new = (status_old & ~mask & ~SR_TB) | val;
+
+	/* Don't protect status register if we're fully unlocked */
+	if (lock_len == mtd->size)
+		status_new &= ~SR_SRWD;
+
+	if (!use_top)
+		status_new |= SR_TB;
+
+	/* Don't bother if they're the same */
+	if (status_new == status_old)
+		return 0;
 
 	/* Only modify protection if it will not lock other areas */
-	if ((status_new & mask) >= (status_old & mask))
+	if ((status_new & mask) > (status_old & mask))
 		return -EINVAL;
 
 	write_enable(nor);
@@ -762,8 +864,8 @@
 	{ "n25q032a",	 INFO(0x20bb16, 0, 64 * 1024,   64, SPI_NOR_QUAD_READ) },
 	{ "n25q064",     INFO(0x20ba17, 0, 64 * 1024,  128, SECT_4K | SPI_NOR_QUAD_READ) },
 	{ "n25q064a",    INFO(0x20bb17, 0, 64 * 1024,  128, SECT_4K | SPI_NOR_QUAD_READ) },
-	{ "n25q128a11",  INFO(0x20bb18, 0, 64 * 1024,  256, SPI_NOR_QUAD_READ) },
-	{ "n25q128a13",  INFO(0x20ba18, 0, 64 * 1024,  256, SPI_NOR_QUAD_READ) },
+	{ "n25q128a11",  INFO(0x20bb18, 0, 64 * 1024,  256, SECT_4K | SPI_NOR_QUAD_READ) },
+	{ "n25q128a13",  INFO(0x20ba18, 0, 64 * 1024,  256, SECT_4K | SPI_NOR_QUAD_READ) },
 	{ "n25q256a",    INFO(0x20ba19, 0, 64 * 1024,  512, SECT_4K | SPI_NOR_QUAD_READ) },
 	{ "n25q512a",    INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
 	{ "n25q512ax3",  INFO(0x20ba20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
@@ -797,6 +899,7 @@
 	{ "s25fl008k",  INFO(0xef4014,      0,  64 * 1024,  16, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
 	{ "s25fl016k",  INFO(0xef4015,      0,  64 * 1024,  32, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
 	{ "s25fl064k",  INFO(0xef4017,      0,  64 * 1024, 128, SECT_4K) },
+	{ "s25fl116k",  INFO(0x014015,      0,  64 * 1024,  32, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
 	{ "s25fl132k",  INFO(0x014016,      0,  64 * 1024,  64, SECT_4K) },
 	{ "s25fl164k",  INFO(0x014017,      0,  64 * 1024, 128, SECT_4K) },
 	{ "s25fl204k",  INFO(0x014013,      0,  64 * 1024,   8, SECT_4K | SPI_NOR_DUAL_READ) },
@@ -860,11 +963,23 @@
 	{ "w25x16", INFO(0xef3015, 0, 64 * 1024,  32, SECT_4K) },
 	{ "w25x32", INFO(0xef3016, 0, 64 * 1024,  64, SECT_4K) },
 	{ "w25q32", INFO(0xef4016, 0, 64 * 1024,  64, SECT_4K) },
-	{ "w25q32dw", INFO(0xef6016, 0, 64 * 1024,  64, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
+	{
+		"w25q32dw", INFO(0xef6016, 0, 64 * 1024,  64,
+			SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ |
+			SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB)
+	},
 	{ "w25x64", INFO(0xef3017, 0, 64 * 1024, 128, SECT_4K) },
 	{ "w25q64", INFO(0xef4017, 0, 64 * 1024, 128, SECT_4K) },
-	{ "w25q64dw", INFO(0xef6017, 0, 64 * 1024, 128, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
-	{ "w25q128fw", INFO(0xef6018, 0, 64 * 1024, 256, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
+	{
+		"w25q64dw", INFO(0xef6017, 0, 64 * 1024, 128,
+			SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ |
+			SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB)
+	},
+	{
+		"w25q128fw", INFO(0xef6018, 0, 64 * 1024, 256,
+			SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ |
+			SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB)
+	},
 	{ "w25q80", INFO(0xef5014, 0, 64 * 1024,  16, SECT_4K) },
 	{ "w25q80bl", INFO(0xef4014, 0, 64 * 1024,  16, SECT_4K) },
 	{ "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) },
@@ -1100,45 +1215,6 @@
 	return 0;
 }
 
-static int micron_quad_enable(struct spi_nor *nor)
-{
-	int ret;
-	u8 val;
-
-	ret = nor->read_reg(nor, SPINOR_OP_RD_EVCR, &val, 1);
-	if (ret < 0) {
-		dev_err(nor->dev, "error %d reading EVCR\n", ret);
-		return ret;
-	}
-
-	write_enable(nor);
-
-	/* set EVCR, enable quad I/O */
-	nor->cmd_buf[0] = val & ~EVCR_QUAD_EN_MICRON;
-	ret = nor->write_reg(nor, SPINOR_OP_WD_EVCR, nor->cmd_buf, 1);
-	if (ret < 0) {
-		dev_err(nor->dev, "error while writing EVCR register\n");
-		return ret;
-	}
-
-	ret = spi_nor_wait_till_ready(nor);
-	if (ret)
-		return ret;
-
-	/* read EVCR and check it */
-	ret = nor->read_reg(nor, SPINOR_OP_RD_EVCR, &val, 1);
-	if (ret < 0) {
-		dev_err(nor->dev, "error %d reading EVCR\n", ret);
-		return ret;
-	}
-	if (val & EVCR_QUAD_EN_MICRON) {
-		dev_err(nor->dev, "Micron EVCR Quad bit not clear\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 static int set_quad_mode(struct spi_nor *nor, const struct flash_info *info)
 {
 	int status;
@@ -1152,12 +1228,7 @@
 		}
 		return status;
 	case SNOR_MFR_MICRON:
-		status = micron_quad_enable(nor);
-		if (status) {
-			dev_err(nor->dev, "Micron quad-read not enabled\n");
-			return -EINVAL;
-		}
-		return status;
+		return 0;
 	default:
 		status = spansion_quad_enable(nor);
 		if (status) {
@@ -1233,9 +1304,11 @@
 
 	if (JEDEC_MFR(info) == SNOR_MFR_ATMEL ||
 	    JEDEC_MFR(info) == SNOR_MFR_INTEL ||
-	    JEDEC_MFR(info) == SNOR_MFR_SST) {
+	    JEDEC_MFR(info) == SNOR_MFR_SST ||
+	    info->flags & SPI_NOR_HAS_LOCK) {
 		write_enable(nor);
 		write_sr(nor, 0);
+		spi_nor_wait_till_ready(nor);
 	}
 
 	if (!mtd->name)
@@ -1249,7 +1322,8 @@
 	mtd->_read = spi_nor_read;
 
 	/* NOR protection support for STmicro/Micron chips and similar */
-	if (JEDEC_MFR(info) == SNOR_MFR_MICRON) {
+	if (JEDEC_MFR(info) == SNOR_MFR_MICRON ||
+			info->flags & SPI_NOR_HAS_LOCK) {
 		nor->flash_lock = stm_lock;
 		nor->flash_unlock = stm_unlock;
 		nor->flash_is_locked = stm_is_locked;
@@ -1269,6 +1343,8 @@
 
 	if (info->flags & USE_FSR)
 		nor->flags |= SNOR_F_USE_FSR;
+	if (info->flags & SPI_NOR_HAS_TB)
+		nor->flags |= SNOR_F_HAS_SR_TB;
 
 #ifdef CONFIG_MTD_SPI_NOR_USE_4K_SECTORS
 	/* prefer "small sector" erase if possible */

diff --git a/drivers/mtd/tests/oobtest.c b/drivers/mtd/tests/oobtest.c
index 3176212..1cb3f77 100644
--- a/drivers/mtd/tests/oobtest.c
+++ b/drivers/mtd/tests/oobtest.c

@@ -215,19 +215,19 @@
 			pr_info("ignoring error as within bitflip_limit\n");
 		}
 
-		if (use_offset != 0 || use_len < mtd->ecclayout->oobavail) {
+		if (use_offset != 0 || use_len < mtd->oobavail) {
 			int k;
 
 			ops.mode      = MTD_OPS_AUTO_OOB;
 			ops.len       = 0;
 			ops.retlen    = 0;
-			ops.ooblen    = mtd->ecclayout->oobavail;
+			ops.ooblen    = mtd->oobavail;
 			ops.oobretlen = 0;
 			ops.ooboffs   = 0;
 			ops.datbuf    = NULL;
 			ops.oobbuf    = readbuf;
 			err = mtd_read_oob(mtd, addr, &ops);
-			if (err || ops.oobretlen != mtd->ecclayout->oobavail) {
+			if (err || ops.oobretlen != mtd->oobavail) {
 				pr_err("error: readoob failed at %#llx\n",
 						(long long)addr);
 				errcnt += 1;
@@ -244,7 +244,7 @@
 			/* verify post-(use_offset + use_len) area for 0xff */
 			k = use_offset + use_len;
 			bitflips += memffshow(addr, k, readbuf + k,
-					      mtd->ecclayout->oobavail - k);
+					      mtd->oobavail - k);
 
 			if (bitflips > bitflip_limit) {
 				pr_err("error: verify failed at %#llx\n",
@@ -269,8 +269,8 @@
 	struct mtd_oob_ops ops;
 	int err = 0;
 	loff_t addr = (loff_t)ebnum * mtd->erasesize;
-	size_t len = mtd->ecclayout->oobavail * pgcnt;
-	size_t oobavail = mtd->ecclayout->oobavail;
+	size_t len = mtd->oobavail * pgcnt;
+	size_t oobavail = mtd->oobavail;
 	size_t bitflips;
 	int i;
 
@@ -394,8 +394,8 @@
 		goto out;
 
 	use_offset = 0;
-	use_len = mtd->ecclayout->oobavail;
-	use_len_max = mtd->ecclayout->oobavail;
+	use_len = mtd->oobavail;
+	use_len_max = mtd->oobavail;
 	vary_offset = 0;
 
 	/* First test: write all OOB, read it back and verify */
@@ -460,8 +460,8 @@
 
 	/* Write all eraseblocks */
 	use_offset = 0;
-	use_len = mtd->ecclayout->oobavail;
-	use_len_max = mtd->ecclayout->oobavail;
+	use_len = mtd->oobavail;
+	use_len_max = mtd->oobavail;
 	vary_offset = 1;
 	prandom_seed_state(&rnd_state, 5);
 
@@ -471,8 +471,8 @@
 
 	/* Check all eraseblocks */
 	use_offset = 0;
-	use_len = mtd->ecclayout->oobavail;
-	use_len_max = mtd->ecclayout->oobavail;
+	use_len = mtd->oobavail;
+	use_len_max = mtd->oobavail;
 	vary_offset = 1;
 	prandom_seed_state(&rnd_state, 5);
 	err = verify_all_eraseblocks();
@@ -480,8 +480,8 @@
 		goto out;
 
 	use_offset = 0;
-	use_len = mtd->ecclayout->oobavail;
-	use_len_max = mtd->ecclayout->oobavail;
+	use_len = mtd->oobavail;
+	use_len_max = mtd->oobavail;
 	vary_offset = 0;
 
 	/* Fourth test: try to write off end of device */
@@ -501,7 +501,7 @@
 	ops.retlen    = 0;
 	ops.ooblen    = 1;
 	ops.oobretlen = 0;
-	ops.ooboffs   = mtd->ecclayout->oobavail;
+	ops.ooboffs   = mtd->oobavail;
 	ops.datbuf    = NULL;
 	ops.oobbuf    = writebuf;
 	pr_info("attempting to start write past end of OOB\n");
@@ -521,7 +521,7 @@
 	ops.retlen    = 0;
 	ops.ooblen    = 1;
 	ops.oobretlen = 0;
-	ops.ooboffs   = mtd->ecclayout->oobavail;
+	ops.ooboffs   = mtd->oobavail;
 	ops.datbuf    = NULL;
 	ops.oobbuf    = readbuf;
 	pr_info("attempting to start read past end of OOB\n");
@@ -543,7 +543,7 @@
 		ops.mode      = MTD_OPS_AUTO_OOB;
 		ops.len       = 0;
 		ops.retlen    = 0;
-		ops.ooblen    = mtd->ecclayout->oobavail + 1;
+		ops.ooblen    = mtd->oobavail + 1;
 		ops.oobretlen = 0;
 		ops.ooboffs   = 0;
 		ops.datbuf    = NULL;
@@ -563,7 +563,7 @@
 		ops.mode      = MTD_OPS_AUTO_OOB;
 		ops.len       = 0;
 		ops.retlen    = 0;
-		ops.ooblen    = mtd->ecclayout->oobavail + 1;
+		ops.ooblen    = mtd->oobavail + 1;
 		ops.oobretlen = 0;
 		ops.ooboffs   = 0;
 		ops.datbuf    = NULL;
@@ -587,7 +587,7 @@
 		ops.mode      = MTD_OPS_AUTO_OOB;
 		ops.len       = 0;
 		ops.retlen    = 0;
-		ops.ooblen    = mtd->ecclayout->oobavail;
+		ops.ooblen    = mtd->oobavail;
 		ops.oobretlen = 0;
 		ops.ooboffs   = 1;
 		ops.datbuf    = NULL;
@@ -607,7 +607,7 @@
 		ops.mode      = MTD_OPS_AUTO_OOB;
 		ops.len       = 0;
 		ops.retlen    = 0;
-		ops.ooblen    = mtd->ecclayout->oobavail;
+		ops.ooblen    = mtd->oobavail;
 		ops.oobretlen = 0;
 		ops.ooboffs   = 1;
 		ops.datbuf    = NULL;
@@ -638,7 +638,7 @@
 	for (i = 0; i < ebcnt - 1; ++i) {
 		int cnt = 2;
 		int pg;
-		size_t sz = mtd->ecclayout->oobavail;
+		size_t sz = mtd->oobavail;
 		if (bbt[i] || bbt[i + 1])
 			continue;
 		addr = (loff_t)(i + 1) * mtd->erasesize - mtd->writesize;
@@ -673,13 +673,12 @@
 	for (i = 0; i < ebcnt - 1; ++i) {
 		if (bbt[i] || bbt[i + 1])
 			continue;
-		prandom_bytes_state(&rnd_state, writebuf,
-					mtd->ecclayout->oobavail * 2);
+		prandom_bytes_state(&rnd_state, writebuf, mtd->oobavail * 2);
 		addr = (loff_t)(i + 1) * mtd->erasesize - mtd->writesize;
 		ops.mode      = MTD_OPS_AUTO_OOB;
 		ops.len       = 0;
 		ops.retlen    = 0;
-		ops.ooblen    = mtd->ecclayout->oobavail * 2;
+		ops.ooblen    = mtd->oobavail * 2;
 		ops.oobretlen = 0;
 		ops.ooboffs   = 0;
 		ops.datbuf    = NULL;
@@ -688,7 +687,7 @@
 		if (err)
 			goto out;
 		if (memcmpshow(addr, readbuf, writebuf,
-			       mtd->ecclayout->oobavail * 2)) {
+			       mtd->oobavail * 2)) {
 			pr_err("error: verify failed at %#llx\n",
 			       (long long)addr);
 			errcnt += 1;

diff --git a/drivers/mtd/ubi/misc.c b/drivers/mtd/ubi/misc.c
index 2a45ac2..989036c 100644
--- a/drivers/mtd/ubi/misc.c
+++ b/drivers/mtd/ubi/misc.c

@@ -153,3 +153,52 @@
 			return 0;
 	return 1;
 }
+
+/* Normal UBI messages */
+void ubi_msg(const struct ubi_device *ubi, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	pr_notice(UBI_NAME_STR "%d: %pV\n", ubi->ubi_num, &vaf);
+
+	va_end(args);
+}
+
+/* UBI warning messages */
+void ubi_warn(const struct ubi_device *ubi, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	pr_warn(UBI_NAME_STR "%d warning: %ps: %pV\n",
+		ubi->ubi_num, __builtin_return_address(0), &vaf);
+
+	va_end(args);
+}
+
+/* UBI error messages */
+void ubi_err(const struct ubi_device *ubi, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	pr_err(UBI_NAME_STR "%d error: %ps: %pV\n",
+	       ubi->ubi_num, __builtin_return_address(0), &vaf);
+	va_end(args);
+}

diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 2974b67..dadc6a9 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h

@@ -49,15 +49,19 @@
 /* UBI name used for character devices, sysfs, etc */
 #define UBI_NAME_STR "ubi"
 
+struct ubi_device;
+
 /* Normal UBI messages */
-#define ubi_msg(ubi, fmt, ...) pr_notice(UBI_NAME_STR "%d: " fmt "\n", \
-					 ubi->ubi_num, ##__VA_ARGS__)
+__printf(2, 3)
+void ubi_msg(const struct ubi_device *ubi, const char *fmt, ...);
+
 /* UBI warning messages */
-#define ubi_warn(ubi, fmt, ...) pr_warn(UBI_NAME_STR "%d warning: %s: " fmt "\n", \
-					ubi->ubi_num, __func__, ##__VA_ARGS__)
+__printf(2, 3)
+void ubi_warn(const struct ubi_device *ubi, const char *fmt, ...);
+
 /* UBI error messages */
-#define ubi_err(ubi, fmt, ...) pr_err(UBI_NAME_STR "%d error: %s: " fmt "\n", \
-				      ubi->ubi_num, __func__, ##__VA_ARGS__)
+__printf(2, 3)
+void ubi_err(const struct ubi_device *ubi, const char *fmt, ...);
 
 /* Background thread name pattern */
 #define UBI_BGT_NAME_PATTERN "ubi_bgt%dd"

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index fa086e0..50454be 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c

@@ -2264,6 +2264,57 @@
 	mutex_unlock(&ps->smi_mutex);
 }
 
+static int _mv88e6xxx_phy_page_write(struct dsa_switch *ds, int port, int page,
+				     int reg, int val)
+{
+	int ret;
+
+	ret = _mv88e6xxx_phy_write_indirect(ds, port, 0x16, page);
+	if (ret < 0)
+		goto restore_page_0;
+
+	ret = _mv88e6xxx_phy_write_indirect(ds, port, reg, val);
+restore_page_0:
+	_mv88e6xxx_phy_write_indirect(ds, port, 0x16, 0x0);
+
+	return ret;
+}
+
+static int _mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page,
+				    int reg)
+{
+	int ret;
+
+	ret = _mv88e6xxx_phy_write_indirect(ds, port, 0x16, page);
+	if (ret < 0)
+		goto restore_page_0;
+
+	ret = _mv88e6xxx_phy_read_indirect(ds, port, reg);
+restore_page_0:
+	_mv88e6xxx_phy_write_indirect(ds, port, 0x16, 0x0);
+
+	return ret;
+}
+
+static int mv88e6xxx_power_on_serdes(struct dsa_switch *ds)
+{
+	int ret;
+
+	ret = _mv88e6xxx_phy_page_read(ds, REG_FIBER_SERDES, PAGE_FIBER_SERDES,
+				       MII_BMCR);
+	if (ret < 0)
+		return ret;
+
+	if (ret & BMCR_PDOWN) {
+		ret &= ~BMCR_PDOWN;
+		ret = _mv88e6xxx_phy_page_write(ds, REG_FIBER_SERDES,
+						PAGE_FIBER_SERDES, MII_BMCR,
+						ret);
+	}
+
+	return ret;
+}
+
 static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
 {
 	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
@@ -2367,6 +2418,23 @@
 			goto abort;
 	}
 
+	/* If this port is connected to a SerDes, make sure the SerDes is not
+	 * powered down.
+	 */
+	if (mv88e6xxx_6352_family(ds)) {
+		ret = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_STATUS);
+		if (ret < 0)
+			goto abort;
+		ret &= PORT_STATUS_CMODE_MASK;
+		if ((ret == PORT_STATUS_CMODE_100BASE_X) ||
+		    (ret == PORT_STATUS_CMODE_1000BASE_X) ||
+		    (ret == PORT_STATUS_CMODE_SGMII)) {
+			ret = mv88e6xxx_power_on_serdes(ds);
+			if (ret < 0)
+				goto abort;
+		}
+	}
+
 	/* Port Control 2: don't force a good FCS, set the maximum frame size to
 	 * 10240 bytes, disable 802.1q tags checking, don't discard tagged or
 	 * untagged frames on this port, do a destination address lookup on all
@@ -2714,13 +2782,9 @@
 	int ret;
 
 	mutex_lock(&ps->smi_mutex);
-	ret = _mv88e6xxx_phy_write_indirect(ds, port, 0x16, page);
-	if (ret < 0)
-		goto error;
-	ret = _mv88e6xxx_phy_read_indirect(ds, port, reg);
-error:
-	_mv88e6xxx_phy_write_indirect(ds, port, 0x16, 0x0);
+	ret = _mv88e6xxx_phy_page_read(ds, port, page, reg);
 	mutex_unlock(&ps->smi_mutex);
+
 	return ret;
 }
 
@@ -2731,14 +2795,9 @@
 	int ret;
 
 	mutex_lock(&ps->smi_mutex);
-	ret = _mv88e6xxx_phy_write_indirect(ds, port, 0x16, page);
-	if (ret < 0)
-		goto error;
-
-	ret = _mv88e6xxx_phy_write_indirect(ds, port, reg, val);
-error:
-	_mv88e6xxx_phy_write_indirect(ds, port, 0x16, 0x0);
+	ret = _mv88e6xxx_phy_page_write(ds, port, page, reg, val);
 	mutex_unlock(&ps->smi_mutex);
+
 	return ret;
 }
 

diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h
index 9a038ab..26a424a 100644
--- a/drivers/net/dsa/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx.h

@@ -28,6 +28,10 @@
 #define SMI_CMD_OP_45_READ_DATA_INC	((3 << 10) | SMI_CMD_BUSY)
 #define SMI_DATA		0x01
 
+/* Fiber/SERDES Registers are located at SMI address F, page 1 */
+#define REG_FIBER_SERDES	0x0f
+#define PAGE_FIBER_SERDES	0x01
+
 #define REG_PORT(p)		(0x10 + (p))
 #define PORT_STATUS		0x00
 #define PORT_STATUS_PAUSE_EN	BIT(15)
@@ -45,6 +49,10 @@
 #define PORT_STATUS_MGMII	BIT(6) /* 6185 */
 #define PORT_STATUS_TX_PAUSED	BIT(5)
 #define PORT_STATUS_FLOW_CTRL	BIT(4)
+#define PORT_STATUS_CMODE_MASK	0x0f
+#define PORT_STATUS_CMODE_100BASE_X	0x8
+#define PORT_STATUS_CMODE_1000BASE_X	0x9
+#define PORT_STATUS_CMODE_SGMII		0xa
 #define PORT_PCS_CTRL		0x01
 #define PORT_PCS_CTRL_RGMII_DELAY_RXCLK	BIT(15)
 #define PORT_PCS_CTRL_RGMII_DELAY_TXCLK	BIT(14)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index aabbd51..12a009d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c

@@ -2653,7 +2653,7 @@
 	/* Write request msg to hwrm channel */
 	__iowrite32_copy(bp->bar0, data, msg_len / 4);
 
-	for (i = msg_len; i < HWRM_MAX_REQ_LEN; i += 4)
+	for (i = msg_len; i < BNXT_HWRM_MAX_REQ_LEN; i += 4)
 		writel(0, bp->bar0 + i);
 
 	/* currently supports only one outstanding message */
@@ -3391,11 +3391,11 @@
 		struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
 		struct bnxt_ring_struct *ring = &cpr->cp_ring_struct;
 
+		cpr->cp_doorbell = bp->bar1 + i * 0x80;
 		rc = hwrm_ring_alloc_send_msg(bp, ring, HWRM_RING_ALLOC_CMPL, i,
 					      INVALID_STATS_CTX_ID);
 		if (rc)
 			goto err_out;
-		cpr->cp_doorbell = bp->bar1 + i * 0x80;
 		BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons);
 		bp->grp_info[i].cp_fw_ring_id = ring->fw_ring_id;
 	}
@@ -3830,6 +3830,7 @@
 	struct hwrm_ver_get_input req = {0};
 	struct hwrm_ver_get_output *resp = bp->hwrm_cmd_resp_addr;
 
+	bp->hwrm_max_req_len = HWRM_MAX_REQ_LEN;
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VER_GET, -1, -1);
 	req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
 	req.hwrm_intf_min = HWRM_VERSION_MINOR;
@@ -3855,6 +3856,9 @@
 	if (!bp->hwrm_cmd_timeout)
 		bp->hwrm_cmd_timeout = DFLT_HWRM_CMD_TIMEOUT;
 
+	if (resp->hwrm_intf_maj >= 1)
+		bp->hwrm_max_req_len = le16_to_cpu(resp->max_req_win_len);
+
 hwrm_ver_get_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
@@ -4555,7 +4559,7 @@
 		if (bp->link_info.req_flow_ctrl & BNXT_LINK_PAUSE_RX)
 			req->auto_pause |= PORT_PHY_CFG_REQ_AUTO_PAUSE_RX;
 		if (bp->link_info.req_flow_ctrl & BNXT_LINK_PAUSE_TX)
-			req->auto_pause |= PORT_PHY_CFG_REQ_AUTO_PAUSE_RX;
+			req->auto_pause |= PORT_PHY_CFG_REQ_AUTO_PAUSE_TX;
 		req->enables |=
 			cpu_to_le32(PORT_PHY_CFG_REQ_ENABLES_AUTO_PAUSE);
 	} else {

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index ec04c47..709b95b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h

@@ -477,6 +477,7 @@
 #define RING_CMP(idx)		((idx) & bp->cp_ring_mask)
 #define NEXT_CMP(idx)		RING_CMP(ADV_RAW_CMP(idx, 1))
 
+#define BNXT_HWRM_MAX_REQ_LEN		(bp->hwrm_max_req_len)
 #define DFLT_HWRM_CMD_TIMEOUT		500
 #define HWRM_CMD_TIMEOUT		(bp->hwrm_cmd_timeout)
 #define HWRM_RESET_TIMEOUT		((HWRM_CMD_TIMEOUT) * 4)
@@ -953,6 +954,7 @@
 	dma_addr_t		hw_tx_port_stats_map;
 	int			hw_port_stats_size;
 
+	u16			hwrm_max_req_len;
 	int			hwrm_cmd_timeout;
 	struct mutex		hwrm_cmd_lock;	/* serialize hwrm messages */
 	struct hwrm_ver_get_output	ver_resp;

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 9ada166..2e472f6 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c

@@ -855,10 +855,8 @@
 	if (BNXT_VF(bp))
 		return;
 	epause->autoneg = !!(link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL);
-	epause->rx_pause =
-		((link_info->auto_pause_setting & BNXT_LINK_PAUSE_RX) != 0);
-	epause->tx_pause =
-		((link_info->auto_pause_setting & BNXT_LINK_PAUSE_TX) != 0);
+	epause->rx_pause = !!(link_info->req_flow_ctrl & BNXT_LINK_PAUSE_RX);
+	epause->tx_pause = !!(link_info->req_flow_ctrl & BNXT_LINK_PAUSE_TX);
 }
 
 static int bnxt_set_pauseparam(struct net_device *dev,

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 6746fd0..cf6445d 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c

@@ -1171,6 +1171,7 @@
 	struct enet_cb *tx_cb_ptr;
 	struct netdev_queue *txq;
 	unsigned int pkts_compl = 0;
+	unsigned int bytes_compl = 0;
 	unsigned int c_index;
 	unsigned int txbds_ready;
 	unsigned int txbds_processed = 0;
@@ -1193,16 +1194,13 @@
 		tx_cb_ptr = &priv->tx_cbs[ring->clean_ptr];
 		if (tx_cb_ptr->skb) {
 			pkts_compl++;
-			dev->stats.tx_packets++;
-			dev->stats.tx_bytes += tx_cb_ptr->skb->len;
+			bytes_compl += GENET_CB(tx_cb_ptr->skb)->bytes_sent;
 			dma_unmap_single(&dev->dev,
 					 dma_unmap_addr(tx_cb_ptr, dma_addr),
 					 dma_unmap_len(tx_cb_ptr, dma_len),
 					 DMA_TO_DEVICE);
 			bcmgenet_free_cb(tx_cb_ptr);
 		} else if (dma_unmap_addr(tx_cb_ptr, dma_addr)) {
-			dev->stats.tx_bytes +=
-				dma_unmap_len(tx_cb_ptr, dma_len);
 			dma_unmap_page(&dev->dev,
 				       dma_unmap_addr(tx_cb_ptr, dma_addr),
 				       dma_unmap_len(tx_cb_ptr, dma_len),
@@ -1220,6 +1218,9 @@
 	ring->free_bds += txbds_processed;
 	ring->c_index = (ring->c_index + txbds_processed) & DMA_C_INDEX_MASK;
 
+	dev->stats.tx_packets += pkts_compl;
+	dev->stats.tx_bytes += bytes_compl;
+
 	if (ring->free_bds > (MAX_SKB_FRAGS + 1)) {
 		txq = netdev_get_tx_queue(dev, ring->queue);
 		if (netif_tx_queue_stopped(txq))
@@ -1296,7 +1297,7 @@
 
 	tx_cb_ptr->skb = skb;
 
-	skb_len = skb_headlen(skb) < ETH_ZLEN ? ETH_ZLEN : skb_headlen(skb);
+	skb_len = skb_headlen(skb);
 
 	mapping = dma_map_single(kdev, skb->data, skb_len, DMA_TO_DEVICE);
 	ret = dma_mapping_error(kdev, mapping);
@@ -1464,6 +1465,11 @@
 		goto out;
 	}
 
+	/* Retain how many bytes will be sent on the wire, without TSB inserted
+	 * by transmit checksum offload
+	 */
+	GENET_CB(skb)->bytes_sent = skb->len;
+
 	/* set the SKB transmit checksum */
 	if (priv->desc_64b_en) {
 		skb = bcmgenet_put_tx_csum(dev, skb);

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 9673675..1e2dc34 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h

@@ -531,6 +531,12 @@
 	u32		flags;
 };
 
+struct bcmgenet_skb_cb {
+	unsigned int bytes_sent;	/* bytes on the wire (no TSB) */
+};
+
+#define GENET_CB(skb)	((struct bcmgenet_skb_cb *)((skb)->cb))
+
 struct bcmgenet_tx_ring {
 	spinlock_t	lock;		/* ring lock */
 	struct napi_struct napi;	/* NAPI per tx queue */

diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 3ce6095..48a7d7d 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c

@@ -917,7 +917,10 @@
 		unsigned int frag_len = bp->rx_buffer_size;
 
 		if (offset + frag_len > len) {
-			BUG_ON(frag != last_frag);
+			if (unlikely(frag != last_frag)) {
+				dev_kfree_skb_any(skb);
+				return -1;
+			}
 			frag_len = len - offset;
 		}
 		skb_copy_to_linear_data_offset(skb, offset,
@@ -945,8 +948,23 @@
 	return 0;
 }
 
+static inline void macb_init_rx_ring(struct macb *bp)
+{
+	dma_addr_t addr;
+	int i;
+
+	addr = bp->rx_buffers_dma;
+	for (i = 0; i < RX_RING_SIZE; i++) {
+		bp->rx_ring[i].addr = addr;
+		bp->rx_ring[i].ctrl = 0;
+		addr += bp->rx_buffer_size;
+	}
+	bp->rx_ring[RX_RING_SIZE - 1].addr |= MACB_BIT(RX_WRAP);
+}
+
 static int macb_rx(struct macb *bp, int budget)
 {
+	bool reset_rx_queue = false;
 	int received = 0;
 	unsigned int tail;
 	int first_frag = -1;
@@ -972,10 +990,18 @@
 
 		if (ctrl & MACB_BIT(RX_EOF)) {
 			int dropped;
-			BUG_ON(first_frag == -1);
+
+			if (unlikely(first_frag == -1)) {
+				reset_rx_queue = true;
+				continue;
+			}
 
 			dropped = macb_rx_frame(bp, first_frag, tail);
 			first_frag = -1;
+			if (unlikely(dropped < 0)) {
+				reset_rx_queue = true;
+				continue;
+			}
 			if (!dropped) {
 				received++;
 				budget--;
@@ -983,6 +1009,26 @@
 		}
 	}
 
+	if (unlikely(reset_rx_queue)) {
+		unsigned long flags;
+		u32 ctrl;
+
+		netdev_err(bp->dev, "RX queue corruption: reset it\n");
+
+		spin_lock_irqsave(&bp->lock, flags);
+
+		ctrl = macb_readl(bp, NCR);
+		macb_writel(bp, NCR, ctrl & ~MACB_BIT(RE));
+
+		macb_init_rx_ring(bp);
+		macb_writel(bp, RBQP, bp->rx_ring_dma);
+
+		macb_writel(bp, NCR, ctrl | MACB_BIT(RE));
+
+		spin_unlock_irqrestore(&bp->lock, flags);
+		return received;
+	}
+
 	if (first_frag != -1)
 		bp->rx_tail = first_frag;
 	else
@@ -1100,7 +1146,7 @@
 			macb_writel(bp, NCR, ctrl | MACB_BIT(RE));
 
 			if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
-				macb_writel(bp, ISR, MACB_BIT(RXUBR));
+				queue_writel(queue, ISR, MACB_BIT(RXUBR));
 		}
 
 		if (status & MACB_BIT(ISR_ROVR)) {
@@ -1523,15 +1569,8 @@
 static void macb_init_rings(struct macb *bp)
 {
 	int i;
-	dma_addr_t addr;
 
-	addr = bp->rx_buffers_dma;
-	for (i = 0; i < RX_RING_SIZE; i++) {
-		bp->rx_ring[i].addr = addr;
-		bp->rx_ring[i].ctrl = 0;
-		addr += bp->rx_buffer_size;
-	}
-	bp->rx_ring[RX_RING_SIZE - 1].addr |= MACB_BIT(RX_WRAP);
+	macb_init_rx_ring(bp);
 
 	for (i = 0; i < TX_RING_SIZE; i++) {
 		bp->queues[0].tx_ring[i].addr = 0;
@@ -2957,9 +2996,10 @@
 	phy_node =  of_get_next_available_child(np, NULL);
 	if (phy_node) {
 		int gpio = of_get_named_gpio(phy_node, "reset-gpios", 0);
-		if (gpio_is_valid(gpio))
+		if (gpio_is_valid(gpio)) {
 			bp->reset_gpio = gpio_to_desc(gpio);
-		gpiod_set_value(bp->reset_gpio, GPIOD_OUT_HIGH);
+			gpiod_direction_output(bp->reset_gpio, 1);
+		}
 	}
 	of_node_put(phy_node);
 
@@ -3029,7 +3069,8 @@
 		mdiobus_free(bp->mii_bus);
 
 		/* Shutdown the PHY if there is a GPIO reset */
-		gpiod_set_value(bp->reset_gpio, GPIOD_OUT_LOW);
+		if (bp->reset_gpio)
+			gpiod_set_value(bp->reset_gpio, 0);
 
 		unregister_netdev(dev);
 		clk_disable_unprepare(bp->tx_clk);

diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig
index 4d187f2..4686a85 100644
--- a/drivers/net/ethernet/chelsio/Kconfig
+++ b/drivers/net/ethernet/chelsio/Kconfig

@@ -96,6 +96,17 @@
 
 	  If unsure, say N.
 
+config CHELSIO_T4_UWIRE
+	bool "Unified Wire Support for Chelsio T5 cards"
+	default n
+	depends on CHELSIO_T4
+	---help---
+	  Enable unified-wire offload features.
+	  Say Y here if you want to enable unified-wire over Ethernet
+	  in the driver.
+
+	  If unsure, say N.
+
 config CHELSIO_T4_FCOE
 	bool "Fibre Channel over Ethernet (FCoE) Support for Chelsio T5 cards"
 	default n

diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index ace0ab9..85c9282 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile

@@ -7,4 +7,5 @@
 cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o
 cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
 cxgb4-$(CONFIG_CHELSIO_T4_FCOE) +=  cxgb4_fcoe.o
+cxgb4-$(CONFIG_CHELSIO_T4_UWIRE) +=  cxgb4_ppm.o
 cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 1dac6c6..984a3cc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h

@@ -404,6 +404,9 @@
 	MAX_CTRL_QUEUES = NCHAN,      /* # of control Tx queues */
 	MAX_RDMA_QUEUES = NCHAN,      /* # of streaming RDMA Rx queues */
 	MAX_RDMA_CIQS = 32,        /* # of  RDMA concentrator IQs */
+
+	/* # of streaming iSCSIT Rx queues */
+	MAX_ISCSIT_QUEUES = MAX_OFLD_QSETS,
 };
 
 enum {
@@ -420,8 +423,8 @@
 enum {
 	INGQ_EXTRAS = 2,        /* firmware event queue and */
 				/*   forwarded interrupts */
-	MAX_INGQ = MAX_ETH_QSETS + MAX_OFLD_QSETS + MAX_RDMA_QUEUES
-		   + MAX_RDMA_CIQS + INGQ_EXTRAS,
+	MAX_INGQ = MAX_ETH_QSETS + MAX_OFLD_QSETS + MAX_RDMA_QUEUES +
+		   MAX_RDMA_CIQS + MAX_ISCSIT_QUEUES + INGQ_EXTRAS,
 };
 
 struct adapter;
@@ -508,6 +511,15 @@
 
 typedef int (*rspq_handler_t)(struct sge_rspq *q, const __be64 *rsp,
 			      const struct pkt_gl *gl);
+typedef void (*rspq_flush_handler_t)(struct sge_rspq *q);
+/* LRO related declarations for ULD */
+struct t4_lro_mgr {
+#define MAX_LRO_SESSIONS		64
+	u8 lro_session_cnt;         /* # of sessions to aggregate */
+	unsigned long lro_pkts;     /* # of LRO super packets */
+	unsigned long lro_merged;   /* # of wire packets merged by LRO */
+	struct sk_buff_head lroq;   /* list of aggregated sessions */
+};
 
 struct sge_rspq {                   /* state for an SGE response queue */
 	struct napi_struct napi;
@@ -532,6 +544,8 @@
 	struct adapter *adap;
 	struct net_device *netdev;  /* associated net device */
 	rspq_handler_t handler;
+	rspq_flush_handler_t flush_handler;
+	struct t4_lro_mgr lro_mgr;
 #ifdef CONFIG_NET_RX_BUSY_POLL
 #define CXGB_POLL_STATE_IDLE		0
 #define CXGB_POLL_STATE_NAPI		BIT(0) /* NAPI owns this poll */
@@ -641,6 +655,7 @@
 
 	struct sge_eth_rxq ethrxq[MAX_ETH_QSETS];
 	struct sge_ofld_rxq iscsirxq[MAX_OFLD_QSETS];
+	struct sge_ofld_rxq iscsitrxq[MAX_ISCSIT_QUEUES];
 	struct sge_ofld_rxq rdmarxq[MAX_RDMA_QUEUES];
 	struct sge_ofld_rxq rdmaciq[MAX_RDMA_CIQS];
 	struct sge_rspq fw_evtq ____cacheline_aligned_in_smp;
@@ -652,9 +667,11 @@
 	u16 ethqsets;               /* # of active Ethernet queue sets */
 	u16 ethtxq_rover;           /* Tx queue to clean up next */
 	u16 iscsiqsets;              /* # of active iSCSI queue sets */
+	u16 niscsitq;               /* # of available iSCST Rx queues */
 	u16 rdmaqs;                 /* # of available RDMA Rx queues */
 	u16 rdmaciqs;               /* # of available RDMA concentrator IQs */
 	u16 iscsi_rxq[MAX_OFLD_QSETS];
+	u16 iscsit_rxq[MAX_ISCSIT_QUEUES];
 	u16 rdma_rxq[MAX_RDMA_QUEUES];
 	u16 rdma_ciq[MAX_RDMA_CIQS];
 	u16 timer_val[SGE_NTIMERS];
@@ -681,6 +698,7 @@
 
 #define for_each_ethrxq(sge, i) for (i = 0; i < (sge)->ethqsets; i++)
 #define for_each_iscsirxq(sge, i) for (i = 0; i < (sge)->iscsiqsets; i++)
+#define for_each_iscsitrxq(sge, i) for (i = 0; i < (sge)->niscsitq; i++)
 #define for_each_rdmarxq(sge, i) for (i = 0; i < (sge)->rdmaqs; i++)
 #define for_each_rdmaciq(sge, i) for (i = 0; i < (sge)->rdmaciqs; i++)
 
@@ -747,6 +765,8 @@
 	struct list_head rcu_node;
 	struct list_head mac_hlist; /* list of MAC addresses in MPS Hash */
 
+	void *iscsi_ppm;
+
 	struct tid_info tids;
 	void **tid_release_head;
 	spinlock_t tid_release_lock;
@@ -1113,7 +1133,8 @@
 int t4_ofld_send(struct adapter *adap, struct sk_buff *skb);
 int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 		     struct net_device *dev, int intr_idx,
-		     struct sge_fl *fl, rspq_handler_t hnd, int cong);
+		     struct sge_fl *fl, rspq_handler_t hnd,
+		     rspq_flush_handler_t flush_handler, int cong);
 int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
 			 struct net_device *dev, struct netdev_queue *netdevq,
 			 unsigned int iqid);

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index e6a4072..0bb41e9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c

@@ -2334,12 +2334,14 @@
 	struct adapter *adap = seq->private;
 	int eth_entries = DIV_ROUND_UP(adap->sge.ethqsets, 4);
 	int iscsi_entries = DIV_ROUND_UP(adap->sge.iscsiqsets, 4);
+	int iscsit_entries = DIV_ROUND_UP(adap->sge.niscsitq, 4);
 	int rdma_entries = DIV_ROUND_UP(adap->sge.rdmaqs, 4);
 	int ciq_entries = DIV_ROUND_UP(adap->sge.rdmaciqs, 4);
 	int ctrl_entries = DIV_ROUND_UP(MAX_CTRL_QUEUES, 4);
 	int i, r = (uintptr_t)v - 1;
 	int iscsi_idx = r - eth_entries;
-	int rdma_idx = iscsi_idx - iscsi_entries;
+	int iscsit_idx = iscsi_idx - iscsi_entries;
+	int rdma_idx = iscsit_idx - iscsit_entries;
 	int ciq_idx = rdma_idx - rdma_entries;
 	int ctrl_idx =  ciq_idx - ciq_entries;
 	int fq_idx =  ctrl_idx - ctrl_entries;
@@ -2453,6 +2455,35 @@
 		RL("FLLow:", fl.low);
 		RL("FLStarving:", fl.starving);
 
+	} else if (iscsit_idx < iscsit_entries) {
+		const struct sge_ofld_rxq *rx =
+			&adap->sge.iscsitrxq[iscsit_idx * 4];
+		int n = min(4, adap->sge.niscsitq - 4 * iscsit_idx);
+
+		S("QType:", "iSCSIT");
+		R("RspQ ID:", rspq.abs_id);
+		R("RspQ size:", rspq.size);
+		R("RspQE size:", rspq.iqe_len);
+		R("RspQ CIDX:", rspq.cidx);
+		R("RspQ Gen:", rspq.gen);
+		S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
+		S3("u", "Intr pktcnt:",
+		   adap->sge.counter_val[rx[i].rspq.pktcnt_idx]);
+		R("FL ID:", fl.cntxt_id);
+		R("FL size:", fl.size - 8);
+		R("FL pend:", fl.pend_cred);
+		R("FL avail:", fl.avail);
+		R("FL PIDX:", fl.pidx);
+		R("FL CIDX:", fl.cidx);
+		RL("RxPackets:", stats.pkts);
+		RL("RxImmPkts:", stats.imm);
+		RL("RxNoMem:", stats.nomem);
+		RL("FLAllocErr:", fl.alloc_failed);
+		RL("FLLrgAlcErr:", fl.large_alloc_failed);
+		RL("FLMapErr:", fl.mapping_err);
+		RL("FLLow:", fl.low);
+		RL("FLStarving:", fl.starving);
+
 	} else if (rdma_idx < rdma_entries) {
 		const struct sge_ofld_rxq *rx =
 				&adap->sge.rdmarxq[rdma_idx * 4];
@@ -2543,6 +2574,7 @@
 {
 	return DIV_ROUND_UP(adap->sge.ethqsets, 4) +
 	       DIV_ROUND_UP(adap->sge.iscsiqsets, 4) +
+	       DIV_ROUND_UP(adap->sge.niscsitq, 4) +
 	       DIV_ROUND_UP(adap->sge.rdmaqs, 4) +
 	       DIV_ROUND_UP(adap->sge.rdmaciqs, 4) +
 	       DIV_ROUND_UP(MAX_CTRL_QUEUES, 4) + 1;

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index adad73f..d1e3f09 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c

@@ -227,7 +227,7 @@
 static LIST_HEAD(adap_rcu_list);
 static DEFINE_SPINLOCK(adap_rcu_lock);
 static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX];
-static const char *uld_str[] = { "RDMA", "iSCSI" };
+static const char *const uld_str[] = { "RDMA", "iSCSI", "iSCSIT" };
 
 static void link_report(struct net_device *dev)
 {
@@ -664,6 +664,13 @@
 	return 0;
 }
 
+/* Flush the aggregated lro sessions */
+static void uldrx_flush_handler(struct sge_rspq *q)
+{
+	if (ulds[q->uld].lro_flush)
+		ulds[q->uld].lro_flush(&q->lro_mgr);
+}
+
 /**
  *	uldrx_handler - response queue handler for ULD queues
  *	@q: the response queue that received the packet
@@ -677,6 +684,7 @@
 			 const struct pkt_gl *gl)
 {
 	struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq);
+	int ret;
 
 	/* FW can send CPLs encapsulated in a CPL_FW4_MSG.
 	 */
@@ -684,10 +692,19 @@
 	    ((const struct cpl_fw4_msg *)(rsp + 1))->type == FW_TYPE_RSSCPL)
 		rsp += 2;
 
-	if (ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld], rsp, gl)) {
+	if (q->flush_handler)
+		ret = ulds[q->uld].lro_rx_handler(q->adap->uld_handle[q->uld],
+						  rsp, gl, &q->lro_mgr,
+						  &q->napi);
+	else
+		ret = ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld],
+					      rsp, gl);
+
+	if (ret) {
 		rxq->stats.nomem++;
 		return -1;
 	}
+
 	if (gl == NULL)
 		rxq->stats.imm++;
 	else if (gl == CXGB4_MSG_AN)
@@ -754,6 +771,10 @@
 		snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iscsi%d",
 			 adap->port[0]->name, i);
 
+	for_each_iscsitrxq(&adap->sge, i)
+		snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iSCSIT%d",
+			 adap->port[0]->name, i);
+
 	for_each_rdmarxq(&adap->sge, i)
 		snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma%d",
 			 adap->port[0]->name, i);
@@ -767,6 +788,7 @@
 {
 	struct sge *s = &adap->sge;
 	int err, ethqidx, iscsiqidx = 0, rdmaqidx = 0, rdmaciqqidx = 0;
+	int iscsitqidx = 0;
 	int msi_index = 2;
 
 	err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0,
@@ -792,6 +814,15 @@
 			goto unwind;
 		msi_index++;
 	}
+	for_each_iscsitrxq(s, iscsitqidx) {
+		err = request_irq(adap->msix_info[msi_index].vec,
+				  t4_sge_intr_msix, 0,
+				  adap->msix_info[msi_index].desc,
+				  &s->iscsitrxq[iscsitqidx].rspq);
+		if (err)
+			goto unwind;
+		msi_index++;
+	}
 	for_each_rdmarxq(s, rdmaqidx) {
 		err = request_irq(adap->msix_info[msi_index].vec,
 				  t4_sge_intr_msix, 0,
@@ -819,6 +850,9 @@
 	while (--rdmaqidx >= 0)
 		free_irq(adap->msix_info[--msi_index].vec,
 			 &s->rdmarxq[rdmaqidx].rspq);
+	while (--iscsitqidx >= 0)
+		free_irq(adap->msix_info[--msi_index].vec,
+			 &s->iscsitrxq[iscsitqidx].rspq);
 	while (--iscsiqidx >= 0)
 		free_irq(adap->msix_info[--msi_index].vec,
 			 &s->iscsirxq[iscsiqidx].rspq);
@@ -840,6 +874,9 @@
 	for_each_iscsirxq(s, i)
 		free_irq(adap->msix_info[msi_index++].vec,
 			 &s->iscsirxq[i].rspq);
+	for_each_iscsitrxq(s, i)
+		free_irq(adap->msix_info[msi_index++].vec,
+			 &s->iscsitrxq[i].rspq);
 	for_each_rdmarxq(s, i)
 		free_irq(adap->msix_info[msi_index++].vec, &s->rdmarxq[i].rspq);
 	for_each_rdmaciq(s, i)
@@ -984,7 +1021,7 @@
 
 static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q,
 			   unsigned int nq, unsigned int per_chan, int msi_idx,
-			   u16 *ids)
+			   u16 *ids, bool lro)
 {
 	int i, err;
 
@@ -994,7 +1031,9 @@
 		err = t4_sge_alloc_rxq(adap, &q->rspq, false,
 				       adap->port[i / per_chan],
 				       msi_idx, q->fl.size ? &q->fl : NULL,
-				       uldrx_handler, 0);
+				       uldrx_handler,
+				       lro ? uldrx_flush_handler : NULL,
+				       0);
 		if (err)
 			return err;
 		memset(&q->stats, 0, sizeof(q->stats));
@@ -1024,7 +1063,7 @@
 		msi_idx = 1;         /* vector 0 is for non-queue interrupts */
 	else {
 		err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
-				       NULL, NULL, -1);
+				       NULL, NULL, NULL, -1);
 		if (err)
 			return err;
 		msi_idx = -((int)s->intrq.abs_id + 1);
@@ -1044,7 +1083,7 @@
 	 *    new/deleted queues.
 	 */
 	err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
-			       msi_idx, NULL, fwevtq_handler, -1);
+			       msi_idx, NULL, fwevtq_handler, NULL, -1);
 	if (err) {
 freeout:	t4_free_sge_resources(adap);
 		return err;
@@ -1062,6 +1101,7 @@
 			err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev,
 					       msi_idx, &q->fl,
 					       t4_ethrx_handler,
+					       NULL,
 					       t4_get_mps_bg_map(adap,
 								 pi->tx_chan));
 			if (err)
@@ -1087,18 +1127,19 @@
 			goto freeout;
 	}
 
-#define ALLOC_OFLD_RXQS(firstq, nq, per_chan, ids) do { \
-	err = alloc_ofld_rxqs(adap, firstq, nq, per_chan, msi_idx, ids); \
+#define ALLOC_OFLD_RXQS(firstq, nq, per_chan, ids, lro) do { \
+	err = alloc_ofld_rxqs(adap, firstq, nq, per_chan, msi_idx, ids, lro); \
 	if (err) \
 		goto freeout; \
 	if (msi_idx > 0) \
 		msi_idx += nq; \
 } while (0)
 
-	ALLOC_OFLD_RXQS(s->iscsirxq, s->iscsiqsets, j, s->iscsi_rxq);
-	ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq);
+	ALLOC_OFLD_RXQS(s->iscsirxq, s->iscsiqsets, j, s->iscsi_rxq, false);
+	ALLOC_OFLD_RXQS(s->iscsitrxq, s->niscsitq, j, s->iscsit_rxq, true);
+	ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq, false);
 	j = s->rdmaciqs / adap->params.nports; /* rdmaq queues per channel */
-	ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, j, s->rdma_ciq);
+	ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, j, s->rdma_ciq, false);
 
 #undef ALLOC_OFLD_RXQS
 
@@ -2430,6 +2471,9 @@
 	} else if (uld == CXGB4_ULD_ISCSI) {
 		lli.rxq_ids = adap->sge.iscsi_rxq;
 		lli.nrxq = adap->sge.iscsiqsets;
+	} else if (uld == CXGB4_ULD_ISCSIT) {
+		lli.rxq_ids = adap->sge.iscsit_rxq;
+		lli.nrxq = adap->sge.niscsitq;
 	}
 	lli.ntxq = adap->sge.iscsiqsets;
 	lli.nchan = adap->params.nports;
@@ -2437,6 +2481,10 @@
 	lli.wr_cred = adap->params.ofldq_wr_cred;
 	lli.adapter_type = adap->params.chip;
 	lli.iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A));
+	lli.iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A);
+	lli.iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A);
+	lli.iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A);
+	lli.iscsi_ppm = &adap->iscsi_ppm;
 	lli.cclk_ps = 1000000000 / adap->params.vpd.cclk;
 	lli.udb_density = 1 << adap->params.sge.eq_qpp;
 	lli.ucq_density = 1 << adap->params.sge.iq_qpp;
@@ -4336,6 +4384,9 @@
 		s->rdmaciqs = (s->rdmaciqs / adap->params.nports) *
 				adap->params.nports;
 		s->rdmaciqs = max_t(int, s->rdmaciqs, adap->params.nports);
+
+		if (!is_t4(adap->params.chip))
+			s->niscsitq = s->iscsiqsets;
 	}
 
 	for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
@@ -4362,6 +4413,16 @@
 		r->fl.size = 72;
 	}
 
+	if (!is_t4(adap->params.chip)) {
+		for (i = 0; i < ARRAY_SIZE(s->iscsitrxq); i++) {
+			struct sge_ofld_rxq *r = &s->iscsitrxq[i];
+
+			init_rspq(adap, &r->rspq, 5, 1, 1024, 64);
+			r->rspq.uld = CXGB4_ULD_ISCSIT;
+			r->fl.size = 72;
+		}
+	}
+
 	for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) {
 		struct sge_ofld_rxq *r = &s->rdmarxq[i];
 
@@ -4436,9 +4497,13 @@
 
 	want = s->max_ethqsets + EXTRA_VECS;
 	if (is_offload(adap)) {
-		want += s->rdmaqs + s->rdmaciqs + s->iscsiqsets;
+		want += s->rdmaqs + s->rdmaciqs + s->iscsiqsets	+
+			s->niscsitq;
 		/* need nchan for each possible ULD */
-		ofld_need = 3 * nchan;
+		if (is_t4(adap->params.chip))
+			ofld_need = 3 * nchan;
+		else
+			ofld_need = 4 * nchan;
 	}
 #ifdef CONFIG_CHELSIO_T4_DCB
 	/* For Data Center Bridging we need 8 Ethernet TX Priority Queues for
@@ -4470,12 +4535,16 @@
 		if (allocated < want) {
 			s->rdmaqs = nchan;
 			s->rdmaciqs = nchan;
+
+			if (!is_t4(adap->params.chip))
+				s->niscsitq = nchan;
 		}
 
 		/* leftovers go to OFLD */
 		i = allocated - EXTRA_VECS - s->max_ethqsets -
-		    s->rdmaqs - s->rdmaciqs;
+		    s->rdmaqs - s->rdmaciqs - s->niscsitq;
 		s->iscsiqsets = (i / nchan) * nchan;  /* round down */
+
 	}
 	for (i = 0; i < allocated; ++i)
 		adap->msix_info[i].vec = entries[i].vector;

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ppm.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ppm.c
new file mode 100644
index 0000000..d88a7a7
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ppm.c

@@ -0,0 +1,464 @@
+/*
+ * cxgb4_ppm.c: Chelsio common library for T4/T5 iSCSI PagePod Manager
+ *
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Written by: Karen Xie (kxie@chelsio.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/pci.h>
+#include <linux/scatterlist.h>
+
+#include "cxgb4_ppm.h"
+
+/* Direct Data Placement -
+ * Directly place the iSCSI Data-In or Data-Out PDU's payload into
+ * pre-posted final destination host-memory buffers based on the
+ * Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT)
+ * in Data-Out PDUs. The host memory address is programmed into
+ * h/w in the format of pagepod entries. The location of the
+ * pagepod entry is encoded into ddp tag which is used as the base
+ * for ITT/TTT.
+ */
+
+/* Direct-Data Placement page size adjustment
+ */
+int cxgbi_ppm_find_page_index(struct cxgbi_ppm *ppm, unsigned long pgsz)
+{
+	struct cxgbi_tag_format *tformat = &ppm->tformat;
+	int i;
+
+	for (i = 0; i < DDP_PGIDX_MAX; i++) {
+		if (pgsz == 1UL << (DDP_PGSZ_BASE_SHIFT +
+					 tformat->pgsz_order[i])) {
+			pr_debug("%s: %s ppm, pgsz %lu -> idx %d.\n",
+				 __func__, ppm->ndev->name, pgsz, i);
+			return i;
+		}
+	}
+	pr_info("ippm: ddp page size %lu not supported.\n", pgsz);
+	return DDP_PGIDX_MAX;
+}
+
+/* DDP setup & teardown
+ */
+static int ppm_find_unused_entries(unsigned long *bmap,
+				   unsigned int max_ppods,
+				   unsigned int start,
+				   unsigned int nr,
+				   unsigned int align_mask)
+{
+	unsigned long i;
+
+	i = bitmap_find_next_zero_area(bmap, max_ppods, start, nr, align_mask);
+
+	if (unlikely(i >= max_ppods) && (start > nr))
+		i = bitmap_find_next_zero_area(bmap, max_ppods, 0, start - 1,
+					       align_mask);
+	if (unlikely(i >= max_ppods))
+		return -ENOSPC;
+
+	bitmap_set(bmap, i, nr);
+	return (int)i;
+}
+
+static void ppm_mark_entries(struct cxgbi_ppm *ppm, int i, int count,
+			     unsigned long caller_data)
+{
+	struct cxgbi_ppod_data *pdata = ppm->ppod_data + i;
+
+	pdata->caller_data = caller_data;
+	pdata->npods = count;
+
+	if (pdata->color == ((1 << PPOD_IDX_SHIFT) - 1))
+		pdata->color = 0;
+	else
+		pdata->color++;
+}
+
+static int ppm_get_cpu_entries(struct cxgbi_ppm *ppm, unsigned int count,
+			       unsigned long caller_data)
+{
+	struct cxgbi_ppm_pool *pool;
+	unsigned int cpu;
+	int i;
+
+	cpu = get_cpu();
+	pool = per_cpu_ptr(ppm->pool, cpu);
+	spin_lock_bh(&pool->lock);
+	put_cpu();
+
+	i = ppm_find_unused_entries(pool->bmap, ppm->pool_index_max,
+				    pool->next, count, 0);
+	if (i < 0) {
+		pool->next = 0;
+		spin_unlock_bh(&pool->lock);
+		return -ENOSPC;
+	}
+
+	pool->next = i + count;
+	if (pool->next >= ppm->pool_index_max)
+		pool->next = 0;
+
+	spin_unlock_bh(&pool->lock);
+
+	pr_debug("%s: cpu %u, idx %d + %d (%d), next %u.\n",
+		 __func__, cpu, i, count, i + cpu * ppm->pool_index_max,
+		pool->next);
+
+	i += cpu * ppm->pool_index_max;
+	ppm_mark_entries(ppm, i, count, caller_data);
+
+	return i;
+}
+
+static int ppm_get_entries(struct cxgbi_ppm *ppm, unsigned int count,
+			   unsigned long caller_data)
+{
+	int i;
+
+	spin_lock_bh(&ppm->map_lock);
+	i = ppm_find_unused_entries(ppm->ppod_bmap, ppm->bmap_index_max,
+				    ppm->next, count, 0);
+	if (i < 0) {
+		ppm->next = 0;
+		spin_unlock_bh(&ppm->map_lock);
+		pr_debug("ippm: NO suitable entries %u available.\n",
+			 count);
+		return -ENOSPC;
+	}
+
+	ppm->next = i + count;
+	if (ppm->next >= ppm->bmap_index_max)
+		ppm->next = 0;
+
+	spin_unlock_bh(&ppm->map_lock);
+
+	pr_debug("%s: idx %d + %d (%d), next %u, caller_data 0x%lx.\n",
+		 __func__, i, count, i + ppm->pool_rsvd, ppm->next,
+		 caller_data);
+
+	i += ppm->pool_rsvd;
+	ppm_mark_entries(ppm, i, count, caller_data);
+
+	return i;
+}
+
+static void ppm_unmark_entries(struct cxgbi_ppm *ppm, int i, int count)
+{
+	pr_debug("%s: idx %d + %d.\n", __func__, i, count);
+
+	if (i < ppm->pool_rsvd) {
+		unsigned int cpu;
+		struct cxgbi_ppm_pool *pool;
+
+		cpu = i / ppm->pool_index_max;
+		i %= ppm->pool_index_max;
+
+		pool = per_cpu_ptr(ppm->pool, cpu);
+		spin_lock_bh(&pool->lock);
+		bitmap_clear(pool->bmap, i, count);
+
+		if (i < pool->next)
+			pool->next = i;
+		spin_unlock_bh(&pool->lock);
+
+		pr_debug("%s: cpu %u, idx %d, next %u.\n",
+			 __func__, cpu, i, pool->next);
+	} else {
+		spin_lock_bh(&ppm->map_lock);
+
+		i -= ppm->pool_rsvd;
+		bitmap_clear(ppm->ppod_bmap, i, count);
+
+		if (i < ppm->next)
+			ppm->next = i;
+		spin_unlock_bh(&ppm->map_lock);
+
+		pr_debug("%s: idx %d, next %u.\n", __func__, i, ppm->next);
+	}
+}
+
+void cxgbi_ppm_ppod_release(struct cxgbi_ppm *ppm, u32 idx)
+{
+	struct cxgbi_ppod_data *pdata;
+
+	if (idx >= ppm->ppmax) {
+		pr_warn("ippm: idx too big %u > %u.\n", idx, ppm->ppmax);
+		return;
+	}
+
+	pdata = ppm->ppod_data + idx;
+	if (!pdata->npods) {
+		pr_warn("ippm: idx %u, npods 0.\n", idx);
+		return;
+	}
+
+	pr_debug("release idx %u, npods %u.\n", idx, pdata->npods);
+	ppm_unmark_entries(ppm, idx, pdata->npods);
+}
+EXPORT_SYMBOL(cxgbi_ppm_ppod_release);
+
+int cxgbi_ppm_ppods_reserve(struct cxgbi_ppm *ppm, unsigned short nr_pages,
+			    u32 per_tag_pg_idx, u32 *ppod_idx,
+			    u32 *ddp_tag, unsigned long caller_data)
+{
+	struct cxgbi_ppod_data *pdata;
+	unsigned int npods;
+	int idx = -1;
+	unsigned int hwidx;
+	u32 tag;
+
+	npods = (nr_pages + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
+	if (!npods) {
+		pr_warn("%s: pages %u -> npods %u, full.\n",
+			__func__, nr_pages, npods);
+		return -EINVAL;
+	}
+
+	/* grab from cpu pool first */
+	idx = ppm_get_cpu_entries(ppm, npods, caller_data);
+	/* try the general pool */
+	if (idx < 0)
+		idx = ppm_get_entries(ppm, npods, caller_data);
+	if (idx < 0) {
+		pr_debug("ippm: pages %u, nospc %u, nxt %u, 0x%lx.\n",
+			 nr_pages, npods, ppm->next, caller_data);
+		return idx;
+	}
+
+	pdata = ppm->ppod_data + idx;
+	hwidx = ppm->base_idx + idx;
+
+	tag = cxgbi_ppm_make_ddp_tag(hwidx, pdata->color);
+
+	if (per_tag_pg_idx)
+		tag |= (per_tag_pg_idx << 30) & 0xC0000000;
+
+	*ppod_idx = idx;
+	*ddp_tag = tag;
+
+	pr_debug("ippm: sg %u, tag 0x%x(%u,%u), data 0x%lx.\n",
+		 nr_pages, tag, idx, npods, caller_data);
+
+	return npods;
+}
+EXPORT_SYMBOL(cxgbi_ppm_ppods_reserve);
+
+void cxgbi_ppm_make_ppod_hdr(struct cxgbi_ppm *ppm, u32 tag,
+			     unsigned int tid, unsigned int offset,
+			     unsigned int length,
+			     struct cxgbi_pagepod_hdr *hdr)
+{
+	/* The ddp tag in pagepod should be with bit 31:30 set to 0.
+	 * The ddp Tag on the wire should be with non-zero 31:30 to the peer
+	 */
+	tag &= 0x3FFFFFFF;
+
+	hdr->vld_tid = htonl(PPOD_VALID_FLAG | PPOD_TID(tid));
+
+	hdr->rsvd = 0;
+	hdr->pgsz_tag_clr = htonl(tag & ppm->tformat.idx_clr_mask);
+	hdr->max_offset = htonl(length);
+	hdr->page_offset = htonl(offset);
+
+	pr_debug("ippm: tag 0x%x, tid 0x%x, xfer %u, off %u.\n",
+		 tag, tid, length, offset);
+}
+EXPORT_SYMBOL(cxgbi_ppm_make_ppod_hdr);
+
+static void ppm_free(struct cxgbi_ppm *ppm)
+{
+	vfree(ppm);
+}
+
+static void ppm_destroy(struct kref *kref)
+{
+	struct cxgbi_ppm *ppm = container_of(kref,
+					     struct cxgbi_ppm,
+					     refcnt);
+	pr_info("ippm: kref 0, destroy %s ppm 0x%p.\n",
+		ppm->ndev->name, ppm);
+
+	*ppm->ppm_pp = NULL;
+
+	free_percpu(ppm->pool);
+	ppm_free(ppm);
+}
+
+int cxgbi_ppm_release(struct cxgbi_ppm *ppm)
+{
+	if (ppm) {
+		int rv;
+
+		rv = kref_put(&ppm->refcnt, ppm_destroy);
+		return rv;
+	}
+	return 1;
+}
+
+static struct cxgbi_ppm_pool *ppm_alloc_cpu_pool(unsigned int *total,
+						 unsigned int *pcpu_ppmax)
+{
+	struct cxgbi_ppm_pool *pools;
+	unsigned int ppmax = (*total) / num_possible_cpus();
+	unsigned int max = (PCPU_MIN_UNIT_SIZE - sizeof(*pools)) << 3;
+	unsigned int bmap;
+	unsigned int alloc_sz;
+	unsigned int count = 0;
+	unsigned int cpu;
+
+	/* make sure per cpu pool fits into PCPU_MIN_UNIT_SIZE */
+	if (ppmax > max)
+		ppmax = max;
+
+	/* pool size must be multiple of unsigned long */
+	bmap = BITS_TO_LONGS(ppmax);
+	ppmax = (bmap * sizeof(unsigned long)) << 3;
+
+	alloc_sz = sizeof(*pools) + sizeof(unsigned long) * bmap;
+	pools = __alloc_percpu(alloc_sz, __alignof__(struct cxgbi_ppm_pool));
+
+	if (!pools)
+		return NULL;
+
+	for_each_possible_cpu(cpu) {
+		struct cxgbi_ppm_pool *ppool = per_cpu_ptr(pools, cpu);
+
+		memset(ppool, 0, alloc_sz);
+		spin_lock_init(&ppool->lock);
+		count += ppmax;
+	}
+
+	*total = count;
+	*pcpu_ppmax = ppmax;
+
+	return pools;
+}
+
+int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev,
+		   struct pci_dev *pdev, void *lldev,
+		   struct cxgbi_tag_format *tformat,
+		   unsigned int ppmax,
+		   unsigned int llimit,
+		   unsigned int start,
+		   unsigned int reserve_factor)
+{
+	struct cxgbi_ppm *ppm = (struct cxgbi_ppm *)(*ppm_pp);
+	struct cxgbi_ppm_pool *pool = NULL;
+	unsigned int ppmax_pool = 0;
+	unsigned int pool_index_max = 0;
+	unsigned int alloc_sz;
+	unsigned int ppod_bmap_size;
+
+	if (ppm) {
+		pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n",
+			ndev->name, ppm_pp, ppm, ppm->ppmax, ppmax);
+		kref_get(&ppm->refcnt);
+		return 1;
+	}
+
+	if (reserve_factor) {
+		ppmax_pool = ppmax / reserve_factor;
+		pool = ppm_alloc_cpu_pool(&ppmax_pool, &pool_index_max);
+
+		pr_debug("%s: ppmax %u, cpu total %u, per cpu %u.\n",
+			 ndev->name, ppmax, ppmax_pool, pool_index_max);
+	}
+
+	ppod_bmap_size = BITS_TO_LONGS(ppmax - ppmax_pool);
+	alloc_sz = sizeof(struct cxgbi_ppm) +
+			ppmax * (sizeof(struct cxgbi_ppod_data)) +
+			ppod_bmap_size * sizeof(unsigned long);
+
+	ppm = vmalloc(alloc_sz);
+	if (!ppm)
+		goto release_ppm_pool;
+
+	memset(ppm, 0, alloc_sz);
+
+	ppm->ppod_bmap = (unsigned long *)(&ppm->ppod_data[ppmax]);
+
+	if ((ppod_bmap_size >> 3) > (ppmax - ppmax_pool)) {
+		unsigned int start = ppmax - ppmax_pool;
+		unsigned int end = ppod_bmap_size >> 3;
+
+		bitmap_set(ppm->ppod_bmap, ppmax, end - start);
+		pr_info("%s: %u - %u < %u * 8, mask extra bits %u, %u.\n",
+			__func__, ppmax, ppmax_pool, ppod_bmap_size, start,
+			end);
+	}
+
+	spin_lock_init(&ppm->map_lock);
+	kref_init(&ppm->refcnt);
+
+	memcpy(&ppm->tformat, tformat, sizeof(struct cxgbi_tag_format));
+
+	ppm->ppm_pp = ppm_pp;
+	ppm->ndev = ndev;
+	ppm->pdev = pdev;
+	ppm->lldev = lldev;
+	ppm->ppmax = ppmax;
+	ppm->next = 0;
+	ppm->llimit = llimit;
+	ppm->base_idx = start > llimit ?
+			(start - llimit + 1) >> PPOD_SIZE_SHIFT : 0;
+	ppm->bmap_index_max = ppmax - ppmax_pool;
+
+	ppm->pool = pool;
+	ppm->pool_rsvd = ppmax_pool;
+	ppm->pool_index_max = pool_index_max;
+
+	/* check one more time */
+	if (*ppm_pp) {
+		ppm_free(ppm);
+		ppm = (struct cxgbi_ppm *)(*ppm_pp);
+
+		pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n",
+			ndev->name, ppm_pp, *ppm_pp, ppm->ppmax, ppmax);
+
+		kref_get(&ppm->refcnt);
+		return 1;
+	}
+	*ppm_pp = ppm;
+
+	ppm->tformat.pgsz_idx_dflt = cxgbi_ppm_find_page_index(ppm, PAGE_SIZE);
+
+	pr_info("ippm %s: ppm 0x%p, 0x%p, base %u/%u, pg %lu,%u, rsvd %u,%u.\n",
+		ndev->name, ppm_pp, ppm, ppm->base_idx, ppm->ppmax, PAGE_SIZE,
+		ppm->tformat.pgsz_idx_dflt, ppm->pool_rsvd,
+		ppm->pool_index_max);
+
+	return 0;
+
+release_ppm_pool:
+	free_percpu(pool);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(cxgbi_ppm_init);
+
+unsigned int cxgbi_tagmask_set(unsigned int ppmax)
+{
+	unsigned int bits = fls(ppmax);
+
+	if (bits > PPOD_IDX_MAX_SIZE)
+		bits = PPOD_IDX_MAX_SIZE;
+
+	pr_info("ippm: ppmax %u/0x%x -> bits %u, tagmask 0x%x.\n",
+		ppmax, ppmax, bits, 1 << (bits + PPOD_IDX_SHIFT));
+
+	return 1 << (bits + PPOD_IDX_SHIFT);
+}

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ppm.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ppm.h
new file mode 100644
index 0000000..d487326
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ppm.h

@@ -0,0 +1,310 @@
+/*
+ * cxgb4_ppm.h: Chelsio common library for T4/T5 iSCSI ddp operation
+ *
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Written by: Karen Xie (kxie@chelsio.com)
+ */
+
+#ifndef	__CXGB4PPM_H__
+#define	__CXGB4PPM_H__
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/debugfs.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/scatterlist.h>
+#include <linux/skbuff.h>
+#include <linux/vmalloc.h>
+#include <linux/bitmap.h>
+
+struct cxgbi_pagepod_hdr {
+	u32 vld_tid;
+	u32 pgsz_tag_clr;
+	u32 max_offset;
+	u32 page_offset;
+	u64 rsvd;
+};
+
+#define PPOD_PAGES_MAX			4
+struct cxgbi_pagepod {
+	struct cxgbi_pagepod_hdr hdr;
+	u64 addr[PPOD_PAGES_MAX + 1];
+};
+
+/* ddp tag format
+ * for a 32-bit tag:
+ * bit #
+ * 31 .....   .....  0
+ *     X   Y...Y Z...Z, where
+ *     ^   ^^^^^ ^^^^
+ *     |   |      |____ when ddp bit = 0: color bits
+ *     |   |
+ *     |   |____ when ddp bit = 0: idx into the ddp memory region
+ *     |
+ *     |____ ddp bit: 0 - ddp tag, 1 - non-ddp tag
+ *
+ *  [page selector:2] [sw/free bits] [0] [idx] [color:6]
+ */
+
+#define DDP_PGIDX_MAX		4
+#define DDP_PGSZ_BASE_SHIFT	12	/* base page 4K */
+
+struct cxgbi_task_tag_info {
+	unsigned char flags;
+#define CXGBI_PPOD_INFO_FLAG_VALID	0x1
+#define CXGBI_PPOD_INFO_FLAG_MAPPED	0x2
+	unsigned char cid;
+	unsigned short pg_shift;
+	unsigned int npods;
+	unsigned int idx;
+	unsigned int tag;
+	struct cxgbi_pagepod_hdr hdr;
+	int nents;
+	int nr_pages;
+	struct scatterlist *sgl;
+};
+
+struct cxgbi_tag_format {
+	unsigned char pgsz_order[DDP_PGIDX_MAX];
+	unsigned char pgsz_idx_dflt;
+	unsigned char free_bits:4;
+	unsigned char color_bits:4;
+	unsigned char idx_bits;
+	unsigned char rsvd_bits;
+	unsigned int  no_ddp_mask;
+	unsigned int  idx_mask;
+	unsigned int  color_mask;
+	unsigned int  idx_clr_mask;
+	unsigned int  rsvd_mask;
+};
+
+struct cxgbi_ppod_data {
+	unsigned char pg_idx:2;
+	unsigned char color:6;
+	unsigned char chan_id;
+	unsigned short npods;
+	unsigned long caller_data;
+};
+
+/* per cpu ppm pool */
+struct cxgbi_ppm_pool {
+	unsigned int base;		/* base index */
+	unsigned int next;		/* next possible free index */
+	spinlock_t lock;		/* ppm pool lock */
+	unsigned long bmap[0];
+} ____cacheline_aligned_in_smp;
+
+struct cxgbi_ppm {
+	struct kref refcnt;
+	struct net_device *ndev;	/* net_device, 1st port */
+	struct pci_dev *pdev;
+	void *lldev;
+	void **ppm_pp;
+	struct cxgbi_tag_format tformat;
+	unsigned int ppmax;
+	unsigned int llimit;
+	unsigned int base_idx;
+
+	unsigned int pool_rsvd;
+	unsigned int pool_index_max;
+	struct cxgbi_ppm_pool __percpu *pool;
+	/* map lock */
+	spinlock_t map_lock;		/* ppm map lock */
+	unsigned int bmap_index_max;
+	unsigned int next;
+	unsigned long *ppod_bmap;
+	struct cxgbi_ppod_data ppod_data[0];
+};
+
+#define DDP_THRESHOLD		512
+
+#define PPOD_PAGES_SHIFT	2       /*  4 pages per pod */
+
+#define IPPOD_SIZE               sizeof(struct cxgbi_pagepod)  /*  64 */
+#define PPOD_SIZE_SHIFT         6
+
+/* page pods are allocated in groups of this size (must be power of 2) */
+#define PPOD_CLUSTER_SIZE	16U
+
+#define ULPMEM_DSGL_MAX_NPPODS	16	/*  1024/PPOD_SIZE */
+#define ULPMEM_IDATA_MAX_NPPODS	3	/* (PPOD_SIZE * 3 + ulptx hdr) < 256B */
+#define PCIE_MEMWIN_MAX_NPPODS	16	/*  1024/PPOD_SIZE */
+
+#define PPOD_COLOR_SHIFT	0
+#define PPOD_COLOR(x)		((x) << PPOD_COLOR_SHIFT)
+
+#define PPOD_IDX_SHIFT          6
+#define PPOD_IDX_MAX_SIZE       24
+
+#define PPOD_TID_SHIFT		0
+#define PPOD_TID(x)		((x) << PPOD_TID_SHIFT)
+
+#define PPOD_TAG_SHIFT		6
+#define PPOD_TAG(x)		((x) << PPOD_TAG_SHIFT)
+
+#define PPOD_VALID_SHIFT	24
+#define PPOD_VALID(x)		((x) << PPOD_VALID_SHIFT)
+#define PPOD_VALID_FLAG		PPOD_VALID(1U)
+
+#define PPOD_PI_EXTRACT_CTL_SHIFT	31
+#define PPOD_PI_EXTRACT_CTL(x)		((x) << PPOD_PI_EXTRACT_CTL_SHIFT)
+#define PPOD_PI_EXTRACT_CTL_FLAG	V_PPOD_PI_EXTRACT_CTL(1U)
+
+#define PPOD_PI_TYPE_SHIFT		29
+#define PPOD_PI_TYPE_MASK		0x3
+#define PPOD_PI_TYPE(x)			((x) << PPOD_PI_TYPE_SHIFT)
+
+#define PPOD_PI_CHECK_CTL_SHIFT		27
+#define PPOD_PI_CHECK_CTL_MASK		0x3
+#define PPOD_PI_CHECK_CTL(x)		((x) << PPOD_PI_CHECK_CTL_SHIFT)
+
+#define PPOD_PI_REPORT_CTL_SHIFT	25
+#define PPOD_PI_REPORT_CTL_MASK		0x3
+#define PPOD_PI_REPORT_CTL(x)		((x) << PPOD_PI_REPORT_CTL_SHIFT)
+
+static inline int cxgbi_ppm_is_ddp_tag(struct cxgbi_ppm *ppm, u32 tag)
+{
+	return !(tag & ppm->tformat.no_ddp_mask);
+}
+
+static inline int cxgbi_ppm_sw_tag_is_usable(struct cxgbi_ppm *ppm,
+					     u32 tag)
+{
+	/* the sw tag must be using <= 31 bits */
+	return !(tag & 0x80000000U);
+}
+
+static inline int cxgbi_ppm_make_non_ddp_tag(struct cxgbi_ppm *ppm,
+					     u32 sw_tag,
+					     u32 *final_tag)
+{
+	struct cxgbi_tag_format *tformat = &ppm->tformat;
+
+	if (!cxgbi_ppm_sw_tag_is_usable(ppm, sw_tag)) {
+		pr_info("sw_tag 0x%x NOT usable.\n", sw_tag);
+		return -EINVAL;
+	}
+
+	if (!sw_tag) {
+		*final_tag = tformat->no_ddp_mask;
+	} else {
+		unsigned int shift = tformat->idx_bits + tformat->color_bits;
+		u32 lower = sw_tag & tformat->idx_clr_mask;
+		u32 upper = (sw_tag >> shift) << (shift + 1);
+
+		*final_tag = upper | tformat->no_ddp_mask | lower;
+	}
+	return 0;
+}
+
+static inline u32 cxgbi_ppm_decode_non_ddp_tag(struct cxgbi_ppm *ppm,
+					       u32 tag)
+{
+	struct cxgbi_tag_format *tformat = &ppm->tformat;
+	unsigned int shift = tformat->idx_bits + tformat->color_bits;
+	u32 lower = tag & tformat->idx_clr_mask;
+	u32 upper = (tag >> tformat->rsvd_bits) << shift;
+
+	return upper | lower;
+}
+
+static inline u32 cxgbi_ppm_ddp_tag_get_idx(struct cxgbi_ppm *ppm,
+					    u32 ddp_tag)
+{
+	u32 hw_idx = (ddp_tag >> PPOD_IDX_SHIFT) &
+			ppm->tformat.idx_mask;
+
+	return hw_idx - ppm->base_idx;
+}
+
+static inline u32 cxgbi_ppm_make_ddp_tag(unsigned int hw_idx,
+					 unsigned char color)
+{
+	return (hw_idx << PPOD_IDX_SHIFT) | ((u32)color);
+}
+
+static inline unsigned long
+cxgbi_ppm_get_tag_caller_data(struct cxgbi_ppm *ppm,
+			      u32 ddp_tag)
+{
+	u32 idx = cxgbi_ppm_ddp_tag_get_idx(ppm, ddp_tag);
+
+	return ppm->ppod_data[idx].caller_data;
+}
+
+/* sw bits are the free bits */
+static inline int cxgbi_ppm_ddp_tag_update_sw_bits(struct cxgbi_ppm *ppm,
+						   u32 val, u32 orig_tag,
+						   u32 *final_tag)
+{
+	struct cxgbi_tag_format *tformat = &ppm->tformat;
+	u32 v = val >> tformat->free_bits;
+
+	if (v) {
+		pr_info("sw_bits 0x%x too large, avail bits %u.\n",
+			val, tformat->free_bits);
+		return -EINVAL;
+	}
+	if (!cxgbi_ppm_is_ddp_tag(ppm, orig_tag))
+		return -EINVAL;
+
+	*final_tag = (val << tformat->rsvd_bits) |
+		     (orig_tag & ppm->tformat.rsvd_mask);
+	return 0;
+}
+
+static inline void cxgbi_ppm_ppod_clear(struct cxgbi_pagepod *ppod)
+{
+	ppod->hdr.vld_tid = 0U;
+}
+
+static inline void cxgbi_tagmask_check(unsigned int tagmask,
+				       struct cxgbi_tag_format *tformat)
+{
+	unsigned int bits = fls(tagmask);
+
+	/* reserve top most 2 bits for page selector */
+	tformat->free_bits = 32 - 2 - bits;
+	tformat->rsvd_bits = bits;
+	tformat->color_bits = PPOD_IDX_SHIFT;
+	tformat->idx_bits = bits - 1 - PPOD_IDX_SHIFT;
+	tformat->no_ddp_mask = 1 << (bits - 1);
+	tformat->idx_mask = (1 << tformat->idx_bits) - 1;
+	tformat->color_mask = (1 << PPOD_IDX_SHIFT) - 1;
+	tformat->idx_clr_mask = (1 << (bits - 1)) - 1;
+	tformat->rsvd_mask = (1 << bits) - 1;
+
+	pr_info("ippm: tagmask 0x%x, rsvd %u=%u+%u+1, mask 0x%x,0x%x, "
+		"pg %u,%u,%u,%u.\n",
+		tagmask, tformat->rsvd_bits, tformat->idx_bits,
+		tformat->color_bits, tformat->no_ddp_mask, tformat->rsvd_mask,
+		tformat->pgsz_order[0], tformat->pgsz_order[1],
+		tformat->pgsz_order[2], tformat->pgsz_order[3]);
+}
+
+int cxgbi_ppm_find_page_index(struct cxgbi_ppm *ppm, unsigned long pgsz);
+void cxgbi_ppm_make_ppod_hdr(struct cxgbi_ppm *ppm, u32 tag,
+			     unsigned int tid, unsigned int offset,
+			     unsigned int length,
+			     struct cxgbi_pagepod_hdr *hdr);
+void cxgbi_ppm_ppod_release(struct cxgbi_ppm *, u32 idx);
+int cxgbi_ppm_ppods_reserve(struct cxgbi_ppm *, unsigned short nr_pages,
+			    u32 per_tag_pg_idx, u32 *ppod_idx, u32 *ddp_tag,
+			    unsigned long caller_data);
+int cxgbi_ppm_init(void **ppm_pp, struct net_device *, struct pci_dev *,
+		   void *lldev, struct cxgbi_tag_format *,
+		   unsigned int ppmax, unsigned int llimit,
+		   unsigned int start,
+		   unsigned int reserve_factor);
+int cxgbi_ppm_release(struct cxgbi_ppm *ppm);
+void cxgbi_tagmask_check(unsigned int tagmask, struct cxgbi_tag_format *);
+unsigned int cxgbi_tagmask_set(unsigned int ppmax);
+
+#endif	/*__CXGB4PPM_H__*/

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index cf711d5..f3c58aa 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h

@@ -191,6 +191,7 @@
 enum cxgb4_uld {
 	CXGB4_ULD_RDMA,
 	CXGB4_ULD_ISCSI,
+	CXGB4_ULD_ISCSIT,
 	CXGB4_ULD_MAX
 };
 
@@ -212,6 +213,7 @@
 struct net_device;
 struct pkt_gl;
 struct tp_tcp_stats;
+struct t4_lro_mgr;
 
 struct cxgb4_range {
 	unsigned int start;
@@ -273,6 +275,10 @@
 	unsigned int max_ordird_qp;          /* Max ORD/IRD depth per RDMA QP */
 	unsigned int max_ird_adapter;        /* Max IRD memory per adapter */
 	bool ulptx_memwrite_dsgl;            /* use of T5 DSGL allowed */
+	unsigned int iscsi_tagmask;	     /* iscsi ddp tag mask */
+	unsigned int iscsi_pgsz_order;	     /* iscsi ddp page size orders */
+	unsigned int iscsi_llimit;	     /* chip's iscsi region llimit */
+	void **iscsi_ppm;		     /* iscsi page pod manager */
 	int nodeid;			     /* device numa node id */
 };
 
@@ -283,6 +289,11 @@
 			  const struct pkt_gl *gl);
 	int (*state_change)(void *handle, enum cxgb4_state new_state);
 	int (*control)(void *handle, enum cxgb4_control control, ...);
+	int (*lro_rx_handler)(void *handle, const __be64 *rsp,
+			      const struct pkt_gl *gl,
+			      struct t4_lro_mgr *lro_mgr,
+			      struct napi_struct *napi);
+	void (*lro_flush)(struct t4_lro_mgr *);
 };
 
 int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);

diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
index 5b0f3ef..60a2603 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c

@@ -48,8 +48,6 @@
 #include "t4_regs.h"
 #include "t4_values.h"
 
-#define VLAN_NONE 0xfff
-
 /* identifies sync vs async L2T_WRITE_REQs */
 #define SYNC_WR_S    12
 #define SYNC_WR_V(x) ((x) << SYNC_WR_S)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.h b/drivers/net/ethernet/chelsio/cxgb4/l2t.h
index 4e2d47a..79665bd 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/l2t.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.h

@@ -39,6 +39,8 @@
 #include <linux/if_ether.h>
 #include <linux/atomic.h>
 
+#define VLAN_NONE 0xfff
+
 enum { L2T_SIZE = 4096 };     /* # of L2T entries */
 
 enum {

diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index deca4a2..13b144b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c

@@ -2157,8 +2157,11 @@
 
 	while (likely(budget_left)) {
 		rc = (void *)q->cur_desc + (q->iqe_len - sizeof(*rc));
-		if (!is_new_response(rc, q))
+		if (!is_new_response(rc, q)) {
+			if (q->flush_handler)
+				q->flush_handler(q);
 			break;
+		}
 
 		dma_rmb();
 		rsp_type = RSPD_TYPE_G(rc->type_gen);
@@ -2544,7 +2547,8 @@
  */
 int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 		     struct net_device *dev, int intr_idx,
-		     struct sge_fl *fl, rspq_handler_t hnd, int cong)
+		     struct sge_fl *fl, rspq_handler_t hnd,
+		     rspq_flush_handler_t flush_hnd, int cong)
 {
 	int ret, flsz = 0;
 	struct fw_iq_cmd c;
@@ -2648,6 +2652,10 @@
 	iq->size--;                           /* subtract status entry */
 	iq->netdev = dev;
 	iq->handler = hnd;
+	iq->flush_handler = flush_hnd;
+
+	memset(&iq->lro_mgr, 0, sizeof(struct t4_lro_mgr));
+	skb_queue_head_init(&iq->lro_mgr.lroq);
 
 	/* set offset to -1 to distinguish ingress queues without FL */
 	iq->offset = fl ? 0 : -1;
@@ -2992,6 +3000,7 @@
 
 	/* clean up RDMA and iSCSI Rx queues */
 	t4_free_ofld_rxqs(adap, adap->sge.iscsiqsets, adap->sge.iscsirxq);
+	t4_free_ofld_rxqs(adap, adap->sge.niscsitq, adap->sge.iscsitrxq);
 	t4_free_ofld_rxqs(adap, adap->sge.rdmaqs, adap->sge.rdmarxq);
 	t4_free_ofld_rxqs(adap, adap->sge.rdmaciqs, adap->sge.rdmaciq);
 

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index 1d2d1da..80417fc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h

@@ -51,6 +51,7 @@
 	CPL_TX_PKT            = 0xE,
 	CPL_L2T_WRITE_REQ     = 0x12,
 	CPL_TID_RELEASE       = 0x1A,
+	CPL_TX_DATA_ISO	      = 0x1F,
 
 	CPL_CLOSE_LISTSRV_RPL = 0x20,
 	CPL_L2T_WRITE_RPL     = 0x23,
@@ -344,6 +345,87 @@
 	u8 status;
 };
 
+struct tcp_options {
+	__be16 mss;
+	__u8 wsf;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u8:4;
+	__u8 unknown:1;
+	__u8:1;
+	__u8 sack:1;
+	__u8 tstamp:1;
+#else
+	__u8 tstamp:1;
+	__u8 sack:1;
+	__u8:1;
+	__u8 unknown:1;
+	__u8:4;
+#endif
+};
+
+struct cpl_pass_accept_req {
+	union opcode_tid ot;
+	__be16 rsvd;
+	__be16 len;
+	__be32 hdr_len;
+	__be16 vlan;
+	__be16 l2info;
+	__be32 tos_stid;
+	struct tcp_options tcpopt;
+};
+
+/* cpl_pass_accept_req.hdr_len fields */
+#define SYN_RX_CHAN_S    0
+#define SYN_RX_CHAN_M    0xF
+#define SYN_RX_CHAN_V(x) ((x) << SYN_RX_CHAN_S)
+#define SYN_RX_CHAN_G(x) (((x) >> SYN_RX_CHAN_S) & SYN_RX_CHAN_M)
+
+#define TCP_HDR_LEN_S    10
+#define TCP_HDR_LEN_M    0x3F
+#define TCP_HDR_LEN_V(x) ((x) << TCP_HDR_LEN_S)
+#define TCP_HDR_LEN_G(x) (((x) >> TCP_HDR_LEN_S) & TCP_HDR_LEN_M)
+
+#define IP_HDR_LEN_S    16
+#define IP_HDR_LEN_M    0x3FF
+#define IP_HDR_LEN_V(x) ((x) << IP_HDR_LEN_S)
+#define IP_HDR_LEN_G(x) (((x) >> IP_HDR_LEN_S) & IP_HDR_LEN_M)
+
+#define ETH_HDR_LEN_S    26
+#define ETH_HDR_LEN_M    0x1F
+#define ETH_HDR_LEN_V(x) ((x) << ETH_HDR_LEN_S)
+#define ETH_HDR_LEN_G(x) (((x) >> ETH_HDR_LEN_S) & ETH_HDR_LEN_M)
+
+/* cpl_pass_accept_req.l2info fields */
+#define SYN_MAC_IDX_S    0
+#define SYN_MAC_IDX_M    0x1FF
+#define SYN_MAC_IDX_V(x) ((x) << SYN_MAC_IDX_S)
+#define SYN_MAC_IDX_G(x) (((x) >> SYN_MAC_IDX_S) & SYN_MAC_IDX_M)
+
+#define SYN_XACT_MATCH_S    9
+#define SYN_XACT_MATCH_V(x) ((x) << SYN_XACT_MATCH_S)
+#define SYN_XACT_MATCH_F    SYN_XACT_MATCH_V(1U)
+
+#define SYN_INTF_S    12
+#define SYN_INTF_M    0xF
+#define SYN_INTF_V(x) ((x) << SYN_INTF_S)
+#define SYN_INTF_G(x) (((x) >> SYN_INTF_S) & SYN_INTF_M)
+
+enum {                     /* TCP congestion control algorithms */
+	CONG_ALG_RENO,
+	CONG_ALG_TAHOE,
+	CONG_ALG_NEWRENO,
+	CONG_ALG_HIGHSPEED
+};
+
+#define CONG_CNTRL_S    14
+#define CONG_CNTRL_M    0x3
+#define CONG_CNTRL_V(x) ((x) << CONG_CNTRL_S)
+#define CONG_CNTRL_G(x) (((x) >> CONG_CNTRL_S) & CONG_CNTRL_M)
+
+#define T5_ISS_S    18
+#define T5_ISS_V(x) ((x) << T5_ISS_S)
+#define T5_ISS_F    T5_ISS_V(1U)
+
 struct cpl_pass_accept_rpl {
 	WR_HDR;
 	union opcode_tid ot;
@@ -818,6 +900,110 @@
 #define ISCSI_DDP_V(x) ((x) << ISCSI_DDP_S)
 #define ISCSI_DDP_F    ISCSI_DDP_V(1U)
 
+struct cpl_rx_data_ddp {
+	union opcode_tid ot;
+	__be16 urg;
+	__be16 len;
+	__be32 seq;
+	union {
+		__be32 nxt_seq;
+		__be32 ddp_report;
+	};
+	__be32 ulp_crc;
+	__be32 ddpvld;
+};
+
+#define cpl_rx_iscsi_ddp cpl_rx_data_ddp
+
+struct cpl_iscsi_data {
+	union opcode_tid ot;
+	__u8 rsvd0[2];
+	__be16 len;
+	__be32 seq;
+	__be16 urg;
+	__u8 rsvd1;
+	__u8 status;
+};
+
+struct cpl_tx_data_iso {
+	__be32 op_to_scsi;
+	__u8   reserved1;
+	__u8   ahs_len;
+	__be16 mpdu;
+	__be32 burst_size;
+	__be32 len;
+	__be32 reserved2_seglen_offset;
+	__be32 datasn_offset;
+	__be32 buffer_offset;
+	__be32 reserved3;
+
+	/* encapsulated CPL_TX_DATA follows here */
+};
+
+/* cpl_tx_data_iso.op_to_scsi fields */
+#define CPL_TX_DATA_ISO_OP_S	24
+#define CPL_TX_DATA_ISO_OP_M	0xff
+#define CPL_TX_DATA_ISO_OP_V(x)	((x) << CPL_TX_DATA_ISO_OP_S)
+#define CPL_TX_DATA_ISO_OP_G(x)	\
+	(((x) >> CPL_TX_DATA_ISO_OP_S) & CPL_TX_DATA_ISO_OP_M)
+
+#define CPL_TX_DATA_ISO_FIRST_S		23
+#define CPL_TX_DATA_ISO_FIRST_M		0x1
+#define CPL_TX_DATA_ISO_FIRST_V(x)	((x) << CPL_TX_DATA_ISO_FIRST_S)
+#define CPL_TX_DATA_ISO_FIRST_G(x)	\
+	(((x) >> CPL_TX_DATA_ISO_FIRST_S) & CPL_TX_DATA_ISO_FIRST_M)
+#define CPL_TX_DATA_ISO_FIRST_F	CPL_TX_DATA_ISO_FIRST_V(1U)
+
+#define CPL_TX_DATA_ISO_LAST_S		22
+#define CPL_TX_DATA_ISO_LAST_M		0x1
+#define CPL_TX_DATA_ISO_LAST_V(x)	((x) << CPL_TX_DATA_ISO_LAST_S)
+#define CPL_TX_DATA_ISO_LAST_G(x)	\
+	(((x) >> CPL_TX_DATA_ISO_LAST_S) & CPL_TX_DATA_ISO_LAST_M)
+#define CPL_TX_DATA_ISO_LAST_F	CPL_TX_DATA_ISO_LAST_V(1U)
+
+#define CPL_TX_DATA_ISO_CPLHDRLEN_S	21
+#define CPL_TX_DATA_ISO_CPLHDRLEN_M	0x1
+#define CPL_TX_DATA_ISO_CPLHDRLEN_V(x)	((x) << CPL_TX_DATA_ISO_CPLHDRLEN_S)
+#define CPL_TX_DATA_ISO_CPLHDRLEN_G(x)	\
+	(((x) >> CPL_TX_DATA_ISO_CPLHDRLEN_S) & CPL_TX_DATA_ISO_CPLHDRLEN_M)
+#define CPL_TX_DATA_ISO_CPLHDRLEN_F	CPL_TX_DATA_ISO_CPLHDRLEN_V(1U)
+
+#define CPL_TX_DATA_ISO_HDRCRC_S	20
+#define CPL_TX_DATA_ISO_HDRCRC_M	0x1
+#define CPL_TX_DATA_ISO_HDRCRC_V(x)	((x) << CPL_TX_DATA_ISO_HDRCRC_S)
+#define CPL_TX_DATA_ISO_HDRCRC_G(x)	\
+	(((x) >> CPL_TX_DATA_ISO_HDRCRC_S) & CPL_TX_DATA_ISO_HDRCRC_M)
+#define CPL_TX_DATA_ISO_HDRCRC_F	CPL_TX_DATA_ISO_HDRCRC_V(1U)
+
+#define CPL_TX_DATA_ISO_PLDCRC_S	19
+#define CPL_TX_DATA_ISO_PLDCRC_M	0x1
+#define CPL_TX_DATA_ISO_PLDCRC_V(x)	((x) << CPL_TX_DATA_ISO_PLDCRC_S)
+#define CPL_TX_DATA_ISO_PLDCRC_G(x)	\
+	(((x) >> CPL_TX_DATA_ISO_PLDCRC_S) & CPL_TX_DATA_ISO_PLDCRC_M)
+#define CPL_TX_DATA_ISO_PLDCRC_F	CPL_TX_DATA_ISO_PLDCRC_V(1U)
+
+#define CPL_TX_DATA_ISO_IMMEDIATE_S	18
+#define CPL_TX_DATA_ISO_IMMEDIATE_M	0x1
+#define CPL_TX_DATA_ISO_IMMEDIATE_V(x)	((x) << CPL_TX_DATA_ISO_IMMEDIATE_S)
+#define CPL_TX_DATA_ISO_IMMEDIATE_G(x)	\
+	(((x) >> CPL_TX_DATA_ISO_IMMEDIATE_S) & CPL_TX_DATA_ISO_IMMEDIATE_M)
+#define CPL_TX_DATA_ISO_IMMEDIATE_F	CPL_TX_DATA_ISO_IMMEDIATE_V(1U)
+
+#define CPL_TX_DATA_ISO_SCSI_S		16
+#define CPL_TX_DATA_ISO_SCSI_M		0x3
+#define CPL_TX_DATA_ISO_SCSI_V(x)	((x) << CPL_TX_DATA_ISO_SCSI_S)
+#define CPL_TX_DATA_ISO_SCSI_G(x)	\
+	(((x) >> CPL_TX_DATA_ISO_SCSI_S) & CPL_TX_DATA_ISO_SCSI_M)
+
+/* cpl_tx_data_iso.reserved2_seglen_offset fields */
+#define CPL_TX_DATA_ISO_SEGLEN_OFFSET_S		0
+#define CPL_TX_DATA_ISO_SEGLEN_OFFSET_M		0xffffff
+#define CPL_TX_DATA_ISO_SEGLEN_OFFSET_V(x)	\
+	((x) << CPL_TX_DATA_ISO_SEGLEN_OFFSET_S)
+#define CPL_TX_DATA_ISO_SEGLEN_OFFSET_G(x)	\
+	(((x) >> CPL_TX_DATA_ISO_SEGLEN_OFFSET_S) & \
+	 CPL_TX_DATA_ISO_SEGLEN_OFFSET_M)
+
 struct cpl_rx_data {
 	union opcode_tid ot;
 	__be16 rsvd;
@@ -854,6 +1040,15 @@
 #define RX_FORCE_ACK_V(x) ((x) << RX_FORCE_ACK_S)
 #define RX_FORCE_ACK_F    RX_FORCE_ACK_V(1U)
 
+#define RX_DACK_MODE_S    29
+#define RX_DACK_MODE_M    0x3
+#define RX_DACK_MODE_V(x) ((x) << RX_DACK_MODE_S)
+#define RX_DACK_MODE_G(x) (((x) >> RX_DACK_MODE_S) & RX_DACK_MODE_M)
+
+#define RX_DACK_CHANGE_S    31
+#define RX_DACK_CHANGE_V(x) ((x) << RX_DACK_CHANGE_S)
+#define RX_DACK_CHANGE_F    RX_DACK_CHANGE_V(1U)
+
 struct cpl_rx_pkt {
 	struct rss_header rsshdr;
 	u8 opcode;
@@ -1090,6 +1285,12 @@
 	__be64 rsvd1;
 };
 
+enum {
+	CPL_FW4_ACK_FLAGS_SEQVAL	= 0x1,	/* seqn valid */
+	CPL_FW4_ACK_FLAGS_CH		= 0x2,	/* channel change complete */
+	CPL_FW4_ACK_FLAGS_FLOWC		= 0x4,	/* fw_flowc_wr complete */
+};
+
 struct cpl_fw6_msg {
 	u8 opcode;
 	u8 type;
@@ -1115,6 +1316,17 @@
 	__u8    rsvd[2];
 };
 
+struct cpl_tx_data {
+	union opcode_tid ot;
+	__be32 len;
+	__be32 rsvd;
+	__be32 flags;
+};
+
+/* cpl_tx_data.flags field */
+#define TX_FORCE_S	13
+#define TX_FORCE_V(x)	((x) << TX_FORCE_S)
+
 enum {
 	ULP_TX_MEM_READ = 2,
 	ULP_TX_MEM_WRITE = 3,
@@ -1143,6 +1355,11 @@
 	struct ulptx_sge_pair sge[0];
 };
 
+struct ulptx_idata {
+	__be32 cmd_more;
+	__be32 len;
+};
+
 #define ULPTX_NSGE_S    0
 #define ULPTX_NSGE_V(x) ((x) << ULPTX_NSGE_S)
 

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index c8661c7..7ad6d4e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h

@@ -101,6 +101,7 @@
 	FW_RI_BIND_MW_WR               = 0x18,
 	FW_RI_FR_NSMR_WR               = 0x19,
 	FW_RI_INV_LSTAG_WR             = 0x1a,
+	FW_ISCSI_TX_DATA_WR	       = 0x45,
 	FW_LASTC2E_WR                  = 0x70
 };
 
@@ -561,7 +562,12 @@
 	FW_FLOWC_MNEM_SNDBUF,
 	FW_FLOWC_MNEM_MSS,
 	FW_FLOWC_MNEM_TXDATAPLEN_MAX,
-	FW_FLOWC_MNEM_SCHEDCLASS = 11,
+	FW_FLOWC_MNEM_TCPSTATE,
+	FW_FLOWC_MNEM_EOSTATE,
+	FW_FLOWC_MNEM_SCHEDCLASS,
+	FW_FLOWC_MNEM_DCBPRIO,
+	FW_FLOWC_MNEM_SND_SCALE,
+	FW_FLOWC_MNEM_RCV_SCALE,
 };
 
 struct fw_flowc_mnemval {

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 37c0815..08243c2 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c

@@ -943,8 +943,8 @@
 		else
 			val &= ~FEC_RACC_OPTIONS;
 		writel(val, fep->hwp + FEC_RACC);
+		writel(PKT_MAXBUF_SIZE, fep->hwp + FEC_FTRL);
 	}
-	writel(PKT_MAXBUF_SIZE, fep->hwp + FEC_FTRL);
 #endif
 
 	/*

diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c
index 79a210a..ea83712 100644
--- a/drivers/net/ethernet/freescale/fman/fman.c
+++ b/drivers/net/ethernet/freescale/fman/fman.c

@@ -35,6 +35,7 @@
 #include "fman.h"
 #include "fman_muram.h"
 
+#include <linux/fsl/guts.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/module.h>
@@ -1871,6 +1872,90 @@
 	return -EINVAL;
 }
 
+static int fman_reset(struct fman *fman)
+{
+	u32 count;
+	int err = 0;
+
+	if (fman->state->rev_info.major < 6) {
+		iowrite32be(FPM_RSTC_FM_RESET, &fman->fpm_regs->fm_rstc);
+		/* Wait for reset completion */
+		count = 100;
+		do {
+			udelay(1);
+		} while (((ioread32be(&fman->fpm_regs->fm_rstc)) &
+			 FPM_RSTC_FM_RESET) && --count);
+		if (count == 0)
+			err = -EBUSY;
+
+		goto _return;
+	} else {
+		struct device_node *guts_node;
+		struct ccsr_guts __iomem *guts_regs;
+		u32 devdisr2, reg;
+
+		/* Errata A007273 */
+		guts_node =
+			of_find_compatible_node(NULL, NULL,
+						"fsl,qoriq-device-config-2.0");
+		if (!guts_node) {
+			dev_err(fman->dev, "%s: Couldn't find guts node\n",
+				__func__);
+			goto guts_node;
+		}
+
+		guts_regs = of_iomap(guts_node, 0);
+		if (!guts_regs) {
+			dev_err(fman->dev, "%s: Couldn't map %s regs\n",
+				__func__, guts_node->full_name);
+			goto guts_regs;
+		}
+#define FMAN1_ALL_MACS_MASK	0xFCC00000
+#define FMAN2_ALL_MACS_MASK	0x000FCC00
+		/* Read current state */
+		devdisr2 = ioread32be(&guts_regs->devdisr2);
+		if (fman->dts_params.id == 0)
+			reg = devdisr2 & ~FMAN1_ALL_MACS_MASK;
+		else
+			reg = devdisr2 & ~FMAN2_ALL_MACS_MASK;
+
+		/* Enable all MACs */
+		iowrite32be(reg, &guts_regs->devdisr2);
+
+		/* Perform FMan reset */
+		iowrite32be(FPM_RSTC_FM_RESET, &fman->fpm_regs->fm_rstc);
+
+		/* Wait for reset completion */
+		count = 100;
+		do {
+			udelay(1);
+		} while (((ioread32be(&fman->fpm_regs->fm_rstc)) &
+			 FPM_RSTC_FM_RESET) && --count);
+		if (count == 0) {
+			iounmap(guts_regs);
+			of_node_put(guts_node);
+			err = -EBUSY;
+			goto _return;
+		}
+
+		/* Restore devdisr2 value */
+		iowrite32be(devdisr2, &guts_regs->devdisr2);
+
+		iounmap(guts_regs);
+		of_node_put(guts_node);
+
+		goto _return;
+
+guts_regs:
+		of_node_put(guts_node);
+guts_node:
+		dev_dbg(fman->dev, "%s: Didn't perform FManV3 reset due to Errata A007273!\n",
+			__func__);
+	}
+_return:
+	return err;
+}
+
 static int fman_init(struct fman *fman)
 {
 	struct fman_cfg *cfg = NULL;
@@ -1914,22 +1999,9 @@
 		fman->liodn_base[i] = liodn_base;
 	}
 
-	/* FMan Reset (supported only for FMan V2) */
-	if (fman->state->rev_info.major >= 6) {
-		/* Errata A007273 */
-		dev_dbg(fman->dev, "%s: FManV3 reset is not supported!\n",
-			__func__);
-	} else {
-		iowrite32be(FPM_RSTC_FM_RESET, &fman->fpm_regs->fm_rstc);
-		/* Wait for reset completion */
-		count = 100;
-		do {
-			udelay(1);
-		} while (((ioread32be(&fman->fpm_regs->fm_rstc)) &
-			 FPM_RSTC_FM_RESET) && --count);
-		if (count == 0)
-			return -EBUSY;
-	}
+	err = fman_reset(fman);
+	if (err)
+		return err;
 
 	if (ioread32be(&fman->qmi_regs->fmqm_gs) & QMI_GS_HALT_NOT_BUSY) {
 		resume(fman->fpm_regs);

diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index 1cbcb9f..e8d36aa 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h

@@ -147,6 +147,8 @@
 
 #define HNSV2_TXD_BUFNUM_S 0
 #define HNSV2_TXD_BUFNUM_M (0x7 << HNSV2_TXD_BUFNUM_S)
+#define HNSV2_TXD_PORTID_S	4
+#define HNSV2_TXD_PORTID_M	(0X7 << HNSV2_TXD_PORTID_S)
 #define HNSV2_TXD_RI_B   1
 #define HNSV2_TXD_L4CS_B   2
 #define HNSV2_TXD_L3CS_B   3
@@ -467,7 +469,7 @@
 				   u32 *tx_usecs, u32 *rx_usecs);
 	void (*get_rx_max_coalesced_frames)(struct hnae_handle *handle,
 					    u32 *tx_frames, u32 *rx_frames);
-	void (*set_coalesce_usecs)(struct hnae_handle *handle, u32 timeout);
+	int (*set_coalesce_usecs)(struct hnae_handle *handle, u32 timeout);
 	int (*set_coalesce_frames)(struct hnae_handle *handle,
 				   u32 coalesce_frames);
 	void (*set_promisc_mode)(struct hnae_handle *handle, u32 en);
@@ -516,6 +518,7 @@
 	int q_num;
 	int vf_id;
 	u32 eport_id;
+	u32 dport_id;	/* v2 tx bd should fill the dport_id */
 	enum hnae_port_type port_type;
 	struct list_head node;    /* list to hnae_ae_dev->handle_list */
 	struct hnae_buf_ops *bops; /* operation for the buffer */

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index d4f92ed..a1cb461 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c

@@ -159,11 +159,6 @@
 		ae_handle->qs[i]->tx_ring.q = ae_handle->qs[i];
 
 		ring_pair_cb->used_by_vf = 1;
-		if (port_idx < DSAF_SERVICE_PORT_NUM_PER_DSAF)
-			ring_pair_cb->port_id_in_dsa = port_idx;
-		else
-			ring_pair_cb->port_id_in_dsa = 0;
-
 		ring_pair_cb++;
 	}
 
@@ -175,6 +170,7 @@
 	ae_handle->phy_node = vf_cb->mac_cb->phy_node;
 	ae_handle->if_support = vf_cb->mac_cb->if_support;
 	ae_handle->port_type = vf_cb->mac_cb->mac_type;
+	ae_handle->dport_id = port_idx;
 
 	return ae_handle;
 vf_id_err:
@@ -419,7 +415,10 @@
 
 static void hns_ae_set_promisc_mode(struct hnae_handle *handle, u32 en)
 {
+	struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
+
 	hns_dsaf_set_promisc_mode(hns_ae_get_dsaf_dev(handle->dev), en);
+	hns_mac_set_promisc(mac_cb, (u8)!!en);
 }
 
 static int hns_ae_get_autoneg(struct hnae_handle *handle)
@@ -449,59 +448,46 @@
 static void hns_ae_get_coalesce_usecs(struct hnae_handle *handle,
 				      u32 *tx_usecs, u32 *rx_usecs)
 {
-	int port;
+	struct ring_pair_cb *ring_pair =
+		container_of(handle->qs[0], struct ring_pair_cb, q);
 
-	port = hns_ae_map_eport_to_dport(handle->eport_id);
-
-	*tx_usecs = hns_rcb_get_coalesce_usecs(
-		hns_ae_get_dsaf_dev(handle->dev),
-		hns_dsaf_get_comm_idx_by_port(port));
-	*rx_usecs = hns_rcb_get_coalesce_usecs(
-		hns_ae_get_dsaf_dev(handle->dev),
-		hns_dsaf_get_comm_idx_by_port(port));
+	*tx_usecs = hns_rcb_get_coalesce_usecs(ring_pair->rcb_common,
+					       ring_pair->port_id_in_comm);
+	*rx_usecs = hns_rcb_get_coalesce_usecs(ring_pair->rcb_common,
+					       ring_pair->port_id_in_comm);
 }
 
 static void hns_ae_get_rx_max_coalesced_frames(struct hnae_handle *handle,
 					       u32 *tx_frames, u32 *rx_frames)
 {
-	int port;
+	struct ring_pair_cb *ring_pair =
+		container_of(handle->qs[0], struct ring_pair_cb, q);
 
-	assert(handle);
-
-	port = hns_ae_map_eport_to_dport(handle->eport_id);
-
-	*tx_frames = hns_rcb_get_coalesced_frames(
-		hns_ae_get_dsaf_dev(handle->dev), port);
-	*rx_frames = hns_rcb_get_coalesced_frames(
-		hns_ae_get_dsaf_dev(handle->dev), port);
+	*tx_frames = hns_rcb_get_coalesced_frames(ring_pair->rcb_common,
+						  ring_pair->port_id_in_comm);
+	*rx_frames = hns_rcb_get_coalesced_frames(ring_pair->rcb_common,
+						  ring_pair->port_id_in_comm);
 }
 
-static void hns_ae_set_coalesce_usecs(struct hnae_handle *handle,
-				      u32 timeout)
+static int hns_ae_set_coalesce_usecs(struct hnae_handle *handle,
+				     u32 timeout)
 {
-	int port;
+	struct ring_pair_cb *ring_pair =
+		container_of(handle->qs[0], struct ring_pair_cb, q);
 
-	assert(handle);
-
-	port = hns_ae_map_eport_to_dport(handle->eport_id);
-
-	hns_rcb_set_coalesce_usecs(hns_ae_get_dsaf_dev(handle->dev),
-				   port, timeout);
+	return hns_rcb_set_coalesce_usecs(
+		ring_pair->rcb_common, ring_pair->port_id_in_comm, timeout);
 }
 
 static int  hns_ae_set_coalesce_frames(struct hnae_handle *handle,
 				       u32 coalesce_frames)
 {
-	int port;
-	int ret;
+	struct ring_pair_cb *ring_pair =
+		container_of(handle->qs[0], struct ring_pair_cb, q);
 
-	assert(handle);
-
-	port = hns_ae_map_eport_to_dport(handle->eport_id);
-
-	ret = hns_rcb_set_coalesced_frames(hns_ae_get_dsaf_dev(handle->dev),
-					   port, coalesce_frames);
-	return ret;
+	return hns_rcb_set_coalesced_frames(
+		ring_pair->rcb_common,
+		ring_pair->port_id_in_comm, coalesce_frames);
 }
 
 void hns_ae_update_stats(struct hnae_handle *handle,
@@ -787,7 +773,8 @@
 		memcpy(key, ppe_cb->rss_key, HNS_PPEV2_RSS_KEY_SIZE);
 
 	/* update the current hash->queue mappings from the shadow RSS table */
-	memcpy(indir, ppe_cb->rss_indir_table, HNS_PPEV2_RSS_IND_TBL_SIZE);
+	memcpy(indir, ppe_cb->rss_indir_table,
+	       HNS_PPEV2_RSS_IND_TBL_SIZE * sizeof(*indir));
 
 	return 0;
 }
@@ -799,10 +786,11 @@
 
 	/* set the RSS Hash Key if specififed by the user */
 	if (key)
-		hns_ppe_set_rss_key(ppe_cb, (int *)key);
+		hns_ppe_set_rss_key(ppe_cb, (u32 *)key);
 
 	/* update the shadow RSS table with user specified qids */
-	memcpy(ppe_cb->rss_indir_table, indir, HNS_PPEV2_RSS_IND_TBL_SIZE);
+	memcpy(ppe_cb->rss_indir_table, indir,
+	       HNS_PPEV2_RSS_IND_TBL_SIZE * sizeof(*indir));
 
 	/* now update the hardware */
 	hns_ppe_set_indir_table(ppe_cb, ppe_cb->rss_indir_table);

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
index b8517b0..44abb08 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c

@@ -290,6 +290,24 @@
 	return 0;
 }
 
+static void hns_gmac_set_uc_match(void *mac_drv, u16 en)
+{
+	struct mac_driver *drv = mac_drv;
+
+	dsaf_set_dev_bit(drv, GMAC_REC_FILT_CONTROL_REG,
+			 GMAC_UC_MATCH_EN_B, !en);
+	dsaf_set_dev_bit(drv, GMAC_STATION_ADDR_HIGH_2_REG,
+			 GMAC_ADDR_EN_B, !en);
+}
+
+static void hns_gmac_set_promisc(void *mac_drv, u8 en)
+{
+	struct mac_driver *drv = mac_drv;
+
+	if (drv->mac_cb->mac_type == HNAE_PORT_DEBUG)
+		hns_gmac_set_uc_match(mac_drv, en);
+}
+
 static void hns_gmac_init(void *mac_drv)
 {
 	u32 port;
@@ -305,6 +323,8 @@
 	mdelay(10);
 	hns_gmac_disable(mac_drv, MAC_COMM_MODE_RX_AND_TX);
 	hns_gmac_tx_loop_pkt_dis(mac_drv);
+	if (drv->mac_cb->mac_type == HNAE_PORT_DEBUG)
+		hns_gmac_set_uc_match(mac_drv, 0);
 }
 
 void hns_gmac_update_stats(void *mac_drv)
@@ -402,14 +422,17 @@
 {
 	struct mac_driver *drv = (struct mac_driver *)mac_drv;
 
-	if (drv->mac_id >= DSAF_SERVICE_NW_NUM) {
-		u32 high_val = mac_addr[1] | (mac_addr[0] << 8);
+	u32 high_val = mac_addr[1] | (mac_addr[0] << 8);
 
-		u32 low_val = mac_addr[5] | (mac_addr[4] << 8)
-			| (mac_addr[3] << 16) | (mac_addr[2] << 24);
-		dsaf_write_dev(drv, GMAC_STATION_ADDR_LOW_2_REG, low_val);
-		dsaf_write_dev(drv, GMAC_STATION_ADDR_HIGH_2_REG, high_val);
-	}
+	u32 low_val = mac_addr[5] | (mac_addr[4] << 8)
+		| (mac_addr[3] << 16) | (mac_addr[2] << 24);
+
+	u32 val = dsaf_read_dev(drv, GMAC_STATION_ADDR_HIGH_2_REG);
+	u32 sta_addr_en = dsaf_get_bit(val, GMAC_ADDR_EN_B);
+
+	dsaf_write_dev(drv, GMAC_STATION_ADDR_LOW_2_REG, low_val);
+	dsaf_write_dev(drv, GMAC_STATION_ADDR_HIGH_2_REG,
+		       high_val | (sta_addr_en << GMAC_ADDR_EN_B));
 }
 
 static int hns_gmac_config_loopback(void *mac_drv, enum hnae_loop loop_mode,
@@ -641,7 +664,8 @@
 		return;
 
 	for (i = 0; i < ARRAY_SIZE(g_gmac_stats_string); i++) {
-		snprintf(buff, ETH_GSTRING_LEN, g_gmac_stats_string[i].desc);
+		snprintf(buff, ETH_GSTRING_LEN, "%s",
+			 g_gmac_stats_string[i].desc);
 		buff = buff + ETH_GSTRING_LEN;
 	}
 }
@@ -699,6 +723,7 @@
 	mac_drv->get_sset_count = hns_gmac_get_sset_count;
 	mac_drv->get_strings = hns_gmac_get_strings;
 	mac_drv->update_stats = hns_gmac_update_stats;
+	mac_drv->set_promiscuous = hns_gmac_set_promisc;
 
 	return (void *)mac_drv;
 }

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 5ef0e96..a38084a 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c

@@ -467,8 +467,13 @@
 	struct mac_driver *drv = hns_mac_get_drv(mac_cb);
 	u32 buf_size = mac_cb->dsaf_dev->buf_size;
 	u32 new_frm = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+	u32 max_frm = AE_IS_VER1(mac_cb->dsaf_dev->dsaf_ver) ?
+			MAC_MAX_MTU : MAC_MAX_MTU_V2;
 
-	if ((new_mtu < MAC_MIN_MTU) || (new_frm > MAC_MAX_MTU) ||
+	if (mac_cb->mac_type == HNAE_PORT_DEBUG)
+		max_frm = MAC_MAX_MTU_DBG;
+
+	if ((new_mtu < MAC_MIN_MTU) || (new_frm > max_frm) ||
 	    (new_frm > HNS_RCB_RING_MAX_BD_PER_PKT * buf_size))
 		return -EINVAL;
 
@@ -861,6 +866,14 @@
 	return mac_ctrl_drv->get_sset_count(stringset);
 }
 
+void hns_mac_set_promisc(struct hns_mac_cb *mac_cb, u8 en)
+{
+	struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb);
+
+	if (mac_ctrl_drv->set_promiscuous)
+		mac_ctrl_drv->set_promiscuous(mac_ctrl_drv, en);
+}
+
 int hns_mac_get_regs_count(struct hns_mac_cb *mac_cb)
 {
 	struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb);

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
index 0b05219..823b6e7 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h

@@ -26,7 +26,9 @@
 
 #define MAC_DEFAULT_MTU	(ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN + ETH_DATA_LEN)
 #define MAC_MAX_MTU		9600
+#define MAC_MAX_MTU_V2		9728
 #define MAC_MIN_MTU		68
+#define MAC_MAX_MTU_DBG		MAC_DEFAULT_MTU
 
 #define MAC_DEFAULT_PAUSE_TIME 0xff
 
@@ -365,7 +367,7 @@
 	/*config rx pause enable*/
 	void (*set_rx_ignore_pause_frames)(void *mac_drv, u32 enable);
 	/* config rx mode for promiscuous*/
-	int (*set_promiscuous)(void *mac_drv, u8 enable);
+	void (*set_promiscuous)(void *mac_drv, u8 enable);
 	/* get mac id */
 	void (*mac_get_id)(void *mac_drv, u8 *mac_id);
 	void (*mac_pausefrm_cfg)(void *mac_drv, u32 rx_en, u32 tx_en);
@@ -453,4 +455,6 @@
 void hns_set_led_opt(struct hns_mac_cb *mac_cb);
 int hns_cpld_led_set_id(struct hns_mac_cb *mac_cb,
 			enum hnae_led_state status);
+void hns_mac_set_promisc(struct hns_mac_cb *mac_cb, u8 en);
+
 #endif /* _HNS_DSAF_MAC_H */

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index 38fc5be..5978a5c 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c

@@ -748,8 +748,9 @@
  */
 static void hns_dsaf_rocee_bp_en(struct dsaf_device *dsaf_dev)
 {
-	dsaf_set_dev_bit(dsaf_dev, DSAF_XGE_CTRL_SIG_CFG_0_REG,
-			 DSAF_FC_XGE_TX_PAUSE_S, 1);
+	if (AE_IS_VER1(dsaf_dev->dsaf_ver))
+		dsaf_set_dev_bit(dsaf_dev, DSAF_XGE_CTRL_SIG_CFG_0_REG,
+				 DSAF_FC_XGE_TX_PAUSE_S, 1);
 }
 
 /* set msk for dsaf exception irq*/
@@ -2218,17 +2219,17 @@
 	/* dsaf onode registers */
 	for (i = 0; i < DSAF_XOD_NUM; i++) {
 		p[311 + i] = dsaf_read_dev(ddev,
-				DSAF_XOD_ETS_TSA_TC0_TC3_CFG_0_REG + j * 0x90);
+				DSAF_XOD_ETS_TSA_TC0_TC3_CFG_0_REG + i * 0x90);
 		p[319 + i] = dsaf_read_dev(ddev,
-				DSAF_XOD_ETS_TSA_TC4_TC7_CFG_0_REG + j * 0x90);
+				DSAF_XOD_ETS_TSA_TC4_TC7_CFG_0_REG + i * 0x90);
 		p[327 + i] = dsaf_read_dev(ddev,
-				DSAF_XOD_ETS_BW_TC0_TC3_CFG_0_REG + j * 0x90);
+				DSAF_XOD_ETS_BW_TC0_TC3_CFG_0_REG + i * 0x90);
 		p[335 + i] = dsaf_read_dev(ddev,
-				DSAF_XOD_ETS_BW_TC4_TC7_CFG_0_REG + j * 0x90);
+				DSAF_XOD_ETS_BW_TC4_TC7_CFG_0_REG + i * 0x90);
 		p[343 + i] = dsaf_read_dev(ddev,
-				DSAF_XOD_ETS_BW_OFFSET_CFG_0_REG + j * 0x90);
+				DSAF_XOD_ETS_BW_OFFSET_CFG_0_REG + i * 0x90);
 		p[351 + i] = dsaf_read_dev(ddev,
-				DSAF_XOD_ETS_TOKEN_CFG_0_REG + j * 0x90);
+				DSAF_XOD_ETS_TOKEN_CFG_0_REG + i * 0x90);
 	}
 
 	p[359] = dsaf_read_dev(ddev, DSAF_XOD_PFS_CFG_0_0_REG + port * 0x90);

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 607c3be..e69b022 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c

@@ -244,31 +244,35 @@
  */
 phy_interface_t hns_mac_get_phy_if(struct hns_mac_cb *mac_cb)
 {
-	u32 hilink3_mode;
-	u32 hilink4_mode;
+	u32 mode;
+	u32 reg;
+	u32 shift;
+	bool is_ver1 = AE_IS_VER1(mac_cb->dsaf_dev->dsaf_ver);
 	void __iomem *sys_ctl_vaddr = mac_cb->sys_ctl_vaddr;
-	int dev_id = mac_cb->mac_id;
+	int mac_id = mac_cb->mac_id;
 	phy_interface_t phy_if = PHY_INTERFACE_MODE_NA;
 
-	hilink3_mode = dsaf_read_reg(sys_ctl_vaddr, HNS_MAC_HILINK3_REG);
-	hilink4_mode = dsaf_read_reg(sys_ctl_vaddr, HNS_MAC_HILINK4_REG);
-	if (dev_id >= 0 && dev_id <= 3) {
-		if (hilink4_mode == 0)
-			phy_if = PHY_INTERFACE_MODE_SGMII;
-		else
-			phy_if = PHY_INTERFACE_MODE_XGMII;
-	} else if (dev_id >= 4 && dev_id <= 5) {
-		if (hilink3_mode == 0)
-			phy_if = PHY_INTERFACE_MODE_SGMII;
-		else
-			phy_if = PHY_INTERFACE_MODE_XGMII;
-	} else {
+	if (is_ver1 && (mac_id >= 6 && mac_id <= 7)) {
 		phy_if = PHY_INTERFACE_MODE_SGMII;
+	} else if (mac_id >= 0 && mac_id <= 3) {
+		reg = is_ver1 ? HNS_MAC_HILINK4_REG : HNS_MAC_HILINK4V2_REG;
+		mode = dsaf_read_reg(sys_ctl_vaddr, reg);
+		/* mac_id 0, 1, 2, 3 ---> hilink4 lane 0, 1, 2, 3 */
+		shift = is_ver1 ? 0 : mac_id;
+		if (dsaf_get_bit(mode, shift))
+			phy_if = PHY_INTERFACE_MODE_XGMII;
+		else
+			phy_if = PHY_INTERFACE_MODE_SGMII;
+	} else if (mac_id >= 4 && mac_id <= 7) {
+		reg = is_ver1 ? HNS_MAC_HILINK3_REG : HNS_MAC_HILINK3V2_REG;
+		mode = dsaf_read_reg(sys_ctl_vaddr, reg);
+		/* mac_id 4, 5, 6, 7 ---> hilink3 lane 2, 3, 0, 1 */
+		shift = is_ver1 ? 0 : mac_id <= 5 ? mac_id - 2 : mac_id - 6;
+		if (dsaf_get_bit(mode, shift))
+			phy_if = PHY_INTERFACE_MODE_XGMII;
+		else
+			phy_if = PHY_INTERFACE_MODE_SGMII;
 	}
-
-	dev_dbg(mac_cb->dev,
-		"hilink3_mode=%d, hilink4_mode=%d dev_id=%d, phy_if=%d\n",
-		hilink3_mode, hilink4_mode, dev_id, phy_if);
 	return phy_if;
 }
 

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
index f302ef9..5b7ae5f 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c

@@ -27,7 +27,7 @@
 void hns_ppe_set_rss_key(struct hns_ppe_cb *ppe_cb,
 			 const u32 rss_key[HNS_PPEV2_RSS_KEY_NUM])
 {
-	int key_item = 0;
+	u32 key_item;
 
 	for (key_item = 0; key_item < HNS_PPEV2_RSS_KEY_NUM; key_item++)
 		dsaf_write_dev(ppe_cb, PPEV2_RSS_KEY_REG + key_item * 0x4,
@@ -343,6 +343,9 @@
 	if (!AE_IS_VER1(dsaf_dev->dsaf_ver)) {
 		hns_ppe_set_vlan_strip(ppe_cb, 0);
 
+		dsaf_write_dev(ppe_cb, PPE_CFG_MAX_FRAME_LEN_REG,
+			       HNS_PPEV2_MAX_FRAME_LEN);
+
 		/* set default RSS key in h/w */
 		hns_ppe_set_rss_key(ppe_cb, ppe_cb->rss_key);
 

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
index 0f5cb69..e9c0ec2 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h

@@ -30,6 +30,8 @@
 #define HNS_PPEV2_RSS_KEY_SIZE 40 /* in bytes or 320 bits */
 #define HNS_PPEV2_RSS_KEY_NUM (HNS_PPEV2_RSS_KEY_SIZE / sizeof(u32))
 
+#define HNS_PPEV2_MAX_FRAME_LEN 0X980
+
 enum ppe_qid_mode {
 	PPE_QID_MODE0 = 0, /* fixed queue id mode */
 	PPE_QID_MODE1,	   /* switch:128VM non switch:6Port/4VM/4TC */

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
index 1218880..28ee26e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c

@@ -215,9 +215,9 @@
 		dsaf_write_dev(q, RCB_RING_RX_RING_BD_LEN_REG,
 			       bd_size_type);
 		dsaf_write_dev(q, RCB_RING_RX_RING_BD_NUM_REG,
-			       ring_pair->port_id_in_dsa);
+			       ring_pair->port_id_in_comm);
 		dsaf_write_dev(q, RCB_RING_RX_RING_PKTLINE_REG,
-			       ring_pair->port_id_in_dsa);
+			       ring_pair->port_id_in_comm);
 	} else {
 		dsaf_write_dev(q, RCB_RING_TX_RING_BASEADDR_L_REG,
 			       (u32)dma);
@@ -227,9 +227,9 @@
 		dsaf_write_dev(q, RCB_RING_TX_RING_BD_LEN_REG,
 			       bd_size_type);
 		dsaf_write_dev(q, RCB_RING_TX_RING_BD_NUM_REG,
-			       ring_pair->port_id_in_dsa);
+			       ring_pair->port_id_in_comm);
 		dsaf_write_dev(q, RCB_RING_TX_RING_PKTLINE_REG,
-			       ring_pair->port_id_in_dsa);
+			       ring_pair->port_id_in_comm);
 	}
 }
 
@@ -256,50 +256,16 @@
 		       desc_cnt);
 }
 
-/**
- *hns_rcb_set_port_coalesced_frames - set rcb port coalesced frames
- *@rcb_common: rcb_common device
- *@port_idx:port index
- *@coalesced_frames:BD num for coalesced frames
- */
-static int  hns_rcb_set_port_coalesced_frames(struct rcb_common_cb *rcb_common,
-					      u32 port_idx,
-					      u32 coalesced_frames)
+static void hns_rcb_set_port_timeout(
+	struct rcb_common_cb *rcb_common, u32 port_idx, u32 timeout)
 {
-	if (coalesced_frames >= rcb_common->desc_num ||
-	    coalesced_frames > HNS_RCB_MAX_COALESCED_FRAMES)
-		return -EINVAL;
-
-	dsaf_write_dev(rcb_common, RCB_CFG_PKTLINE_REG + port_idx * 4,
-		       coalesced_frames);
-	return 0;
-}
-
-/**
- *hns_rcb_get_port_coalesced_frames - set rcb port coalesced frames
- *@rcb_common: rcb_common device
- *@port_idx:port index
- * return coaleseced frames value
- */
-static u32 hns_rcb_get_port_coalesced_frames(struct rcb_common_cb *rcb_common,
-					     u32 port_idx)
-{
-	if (port_idx >= HNS_RCB_SERVICE_NW_ENGINE_NUM)
-		port_idx = 0;
-
-	return dsaf_read_dev(rcb_common,
-			     RCB_CFG_PKTLINE_REG + port_idx * 4);
-}
-
-/**
- *hns_rcb_set_timeout - set rcb port coalesced time_out
- *@rcb_common: rcb_common device
- *@time_out:time for coalesced time_out
- */
-static void hns_rcb_set_timeout(struct rcb_common_cb *rcb_common,
-				u32 timeout)
-{
-	dsaf_write_dev(rcb_common, RCB_CFG_OVERTIME_REG, timeout);
+	if (AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver))
+		dsaf_write_dev(rcb_common, RCB_CFG_OVERTIME_REG,
+			       timeout * HNS_RCB_CLK_FREQ_MHZ);
+	else
+		dsaf_write_dev(rcb_common,
+			       RCB_PORT_CFG_OVERTIME_REG + port_idx * 4,
+			       timeout);
 }
 
 static int hns_rcb_common_get_port_num(struct rcb_common_cb *rcb_common)
@@ -361,10 +327,11 @@
 
 	for (i = 0; i < port_num; i++) {
 		hns_rcb_set_port_desc_cnt(rcb_common, i, rcb_common->desc_num);
-		(void)hns_rcb_set_port_coalesced_frames(
-			rcb_common, i, rcb_common->coalesced_frames);
+		(void)hns_rcb_set_coalesced_frames(
+			rcb_common, i, HNS_RCB_DEF_COALESCED_FRAMES);
+		hns_rcb_set_port_timeout(
+			rcb_common, i, HNS_RCB_DEF_COALESCED_USECS);
 	}
-	hns_rcb_set_timeout(rcb_common, rcb_common->timeout);
 
 	dsaf_write_dev(rcb_common, RCB_COM_CFG_ENDIAN_REG,
 		       HNS_RCB_COMMON_ENDIAN);
@@ -460,7 +427,8 @@
 	hns_rcb_ring_get_cfg(&ring_pair_cb->q, TX_RING);
 }
 
-static int hns_rcb_get_port(struct rcb_common_cb *rcb_common, int ring_idx)
+static int hns_rcb_get_port_in_comm(
+	struct rcb_common_cb *rcb_common, int ring_idx)
 {
 	int comm_index = rcb_common->comm_index;
 	int port;
@@ -470,7 +438,7 @@
 		q_num = (int)rcb_common->max_q_per_vf * rcb_common->max_vfn;
 		port = ring_idx / q_num;
 	} else {
-		port = HNS_RCB_SERVICE_NW_ENGINE_NUM + comm_index - 1;
+		port = 0; /* config debug-ports port_id_in_comm to 0*/
 	}
 
 	return port;
@@ -518,7 +486,8 @@
 		ring_pair_cb->index = i;
 		ring_pair_cb->q.io_base =
 			RCB_COMM_BASE_TO_RING_BASE(rcb_common->io_base, i);
-		ring_pair_cb->port_id_in_dsa = hns_rcb_get_port(rcb_common, i);
+		ring_pair_cb->port_id_in_comm =
+			hns_rcb_get_port_in_comm(rcb_common, i);
 		ring_pair_cb->virq[HNS_RCB_IRQ_IDX_TX] =
 		is_ver1 ? irq_of_parse_and_map(np, base_irq_idx + i * 2) :
 			  platform_get_irq(pdev, base_irq_idx + i * 3 + 1);
@@ -534,82 +503,95 @@
 /**
  *hns_rcb_get_coalesced_frames - get rcb port coalesced frames
  *@rcb_common: rcb_common device
- *@comm_index:port index
- *return coalesced_frames
+ *@port_idx:port id in comm
+ *
+ *Returns: coalesced_frames
  */
-u32 hns_rcb_get_coalesced_frames(struct dsaf_device *dsaf_dev, int port)
+u32 hns_rcb_get_coalesced_frames(
+	struct rcb_common_cb *rcb_common, u32 port_idx)
 {
-	int comm_index =  hns_dsaf_get_comm_idx_by_port(port);
-	struct rcb_common_cb *rcb_comm = dsaf_dev->rcb_common[comm_index];
-
-	return hns_rcb_get_port_coalesced_frames(rcb_comm, port);
+	return dsaf_read_dev(rcb_common, RCB_CFG_PKTLINE_REG + port_idx * 4);
 }
 
 /**
  *hns_rcb_get_coalesce_usecs - get rcb port coalesced time_out
  *@rcb_common: rcb_common device
- *@comm_index:port index
- *return time_out
+ *@port_idx:port id in comm
+ *
+ *Returns: time_out
  */
-u32 hns_rcb_get_coalesce_usecs(struct dsaf_device *dsaf_dev, int comm_index)
+u32 hns_rcb_get_coalesce_usecs(
+	struct rcb_common_cb *rcb_common, u32 port_idx)
 {
-	struct rcb_common_cb *rcb_comm = dsaf_dev->rcb_common[comm_index];
-
-	return rcb_comm->timeout;
+	if (AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver))
+		return dsaf_read_dev(rcb_common, RCB_CFG_OVERTIME_REG) /
+		       HNS_RCB_CLK_FREQ_MHZ;
+	else
+		return dsaf_read_dev(rcb_common,
+				     RCB_PORT_CFG_OVERTIME_REG + port_idx * 4);
 }
 
 /**
  *hns_rcb_set_coalesce_usecs - set rcb port coalesced time_out
  *@rcb_common: rcb_common device
- *@comm_index: comm :index
- *@etx_usecs:tx time for coalesced time_out
- *@rx_usecs:rx time for coalesced time_out
+ *@port_idx:port id in comm
+ *@timeout:tx/rx time for coalesced time_out
+ *
+ * Returns:
+ * Zero for success, or an error code in case of failure
  */
-void hns_rcb_set_coalesce_usecs(struct dsaf_device *dsaf_dev,
-				int port, u32 timeout)
+int hns_rcb_set_coalesce_usecs(
+	struct rcb_common_cb *rcb_common, u32 port_idx, u32 timeout)
 {
-	int comm_index =  hns_dsaf_get_comm_idx_by_port(port);
-	struct rcb_common_cb *rcb_comm = dsaf_dev->rcb_common[comm_index];
+	u32 old_timeout = hns_rcb_get_coalesce_usecs(rcb_common, port_idx);
 
-	if (rcb_comm->timeout == timeout)
-		return;
+	if (timeout == old_timeout)
+		return 0;
 
-	if (comm_index == HNS_DSAF_COMM_SERVICE_NW_IDX) {
-		dev_err(dsaf_dev->dev,
-			"error: not support coalesce_usecs setting!\n");
-		return;
+	if (AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver)) {
+		if (rcb_common->comm_index == HNS_DSAF_COMM_SERVICE_NW_IDX) {
+			dev_err(rcb_common->dsaf_dev->dev,
+				"error: not support coalesce_usecs setting!\n");
+			return -EINVAL;
+		}
 	}
-	rcb_comm->timeout = timeout;
-	hns_rcb_set_timeout(rcb_comm, rcb_comm->timeout);
+	if (timeout > HNS_RCB_MAX_COALESCED_USECS) {
+		dev_err(rcb_common->dsaf_dev->dev,
+			"error: not support coalesce %dus!\n", timeout);
+		return -EINVAL;
+	}
+	hns_rcb_set_port_timeout(rcb_common, port_idx, timeout);
+	return 0;
 }
 
 /**
  *hns_rcb_set_coalesced_frames - set rcb coalesced frames
  *@rcb_common: rcb_common device
- *@tx_frames:tx BD num for coalesced frames
- *@rx_frames:rx BD num for coalesced frames
- *Return 0 on success, negative on failure
+ *@port_idx:port id in comm
+ *@coalesced_frames:tx/rx BD num for coalesced frames
+ *
+ * Returns:
+ * Zero for success, or an error code in case of failure
  */
-int hns_rcb_set_coalesced_frames(struct dsaf_device *dsaf_dev,
-				 int port, u32 coalesced_frames)
+int hns_rcb_set_coalesced_frames(
+	struct rcb_common_cb *rcb_common, u32 port_idx, u32 coalesced_frames)
 {
-	int comm_index =  hns_dsaf_get_comm_idx_by_port(port);
-	struct rcb_common_cb *rcb_comm = dsaf_dev->rcb_common[comm_index];
-	u32 coalesced_reg_val;
-	int ret;
+	u32 old_waterline = hns_rcb_get_coalesced_frames(rcb_common, port_idx);
 
-	coalesced_reg_val = hns_rcb_get_port_coalesced_frames(rcb_comm, port);
-
-	if (coalesced_reg_val == coalesced_frames)
+	if (coalesced_frames == old_waterline)
 		return 0;
 
-	if (coalesced_frames >= HNS_RCB_MIN_COALESCED_FRAMES) {
-		ret = hns_rcb_set_port_coalesced_frames(rcb_comm, port,
-							coalesced_frames);
-		return ret;
-	} else {
+	if (coalesced_frames >= rcb_common->desc_num ||
+	    coalesced_frames > HNS_RCB_MAX_COALESCED_FRAMES ||
+	    coalesced_frames < HNS_RCB_MIN_COALESCED_FRAMES) {
+		dev_err(rcb_common->dsaf_dev->dev,
+			"error: not support coalesce_frames setting!\n");
 		return -EINVAL;
 	}
+
+	dsaf_write_dev(rcb_common, RCB_CFG_PKTLINE_REG + port_idx * 4,
+		       coalesced_frames);
+	return 0;
 }
 
 /**
@@ -749,8 +731,6 @@
 	rcb_common->dsaf_dev = dsaf_dev;
 
 	rcb_common->desc_num = dsaf_dev->desc_num;
-	rcb_common->coalesced_frames = HNS_RCB_DEF_COALESCED_FRAMES;
-	rcb_common->timeout = HNS_RCB_MAX_TIME_OUT;
 
 	hns_rcb_get_queue_mode(dsaf_mode, comm_index, &max_vfn, &max_q_per_vf);
 	rcb_common->max_vfn = max_vfn;
@@ -951,6 +931,10 @@
 void hns_rcb_get_common_regs(struct rcb_common_cb *rcb_com, void *data)
 {
 	u32 *regs = data;
+	bool is_ver1 = AE_IS_VER1(rcb_com->dsaf_dev->dsaf_ver);
+	bool is_dbg = (rcb_com->comm_index != HNS_DSAF_COMM_SERVICE_NW_IDX);
+	u32 reg_tmp;
+	u32 reg_num_tmp;
 	u32 i = 0;
 
 	/*rcb common registers */
@@ -1004,12 +988,16 @@
 			= dsaf_read_dev(rcb_com, RCB_CFG_PKTLINE_REG + 4 * i);
 	}
 
-	regs[70] = dsaf_read_dev(rcb_com, RCB_CFG_OVERTIME_REG);
-	regs[71] = dsaf_read_dev(rcb_com, RCB_CFG_PKTLINE_INT_NUM_REG);
-	regs[72] = dsaf_read_dev(rcb_com, RCB_CFG_OVERTIME_INT_NUM_REG);
+	reg_tmp = is_ver1 ? RCB_CFG_OVERTIME_REG : RCB_PORT_CFG_OVERTIME_REG;
+	reg_num_tmp = (is_ver1 || is_dbg) ? 1 : 6;
+	for (i = 0; i < reg_num_tmp; i++)
+		regs[70 + i] = dsaf_read_dev(rcb_com, reg_tmp);
+
+	regs[76] = dsaf_read_dev(rcb_com, RCB_CFG_PKTLINE_INT_NUM_REG);
+	regs[77] = dsaf_read_dev(rcb_com, RCB_CFG_OVERTIME_INT_NUM_REG);
 
 	/* mark end of rcb common regs */
-	for (i = 73; i < 80; i++)
+	for (i = 78; i < 80; i++)
 		regs[i] = 0xcccccccc;
 }
 

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h
index 81fe9f8..eb61014 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h

@@ -38,7 +38,9 @@
 #define HNS_RCB_MAX_COALESCED_FRAMES		1023
 #define HNS_RCB_MIN_COALESCED_FRAMES		1
 #define HNS_RCB_DEF_COALESCED_FRAMES		50
-#define HNS_RCB_MAX_TIME_OUT			0x500
+#define HNS_RCB_CLK_FREQ_MHZ			350
+#define HNS_RCB_MAX_COALESCED_USECS		0x3ff
+#define HNS_RCB_DEF_COALESCED_USECS		3
 
 #define HNS_RCB_COMMON_ENDIAN			1
 
@@ -82,7 +84,7 @@
 
 	int virq[HNS_RCB_IRQ_NUM_PER_QUEUE];
 
-	u8 port_id_in_dsa;
+	u8 port_id_in_comm;
 	u8 used_by_vf;
 
 	struct hns_ring_hw_stats hw_stats;
@@ -97,8 +99,6 @@
 
 	u8 comm_index;
 	u32 ring_num;
-	u32 coalesced_frames; /* frames  threshold of  rx interrupt   */
-	u32 timeout; /* time threshold of  rx interrupt  */
 	u32 desc_num; /*  desc num per queue*/
 
 	struct ring_pair_cb ring_pair_cb[0];
@@ -125,13 +125,14 @@
 void hns_rcb_init_hw(struct ring_pair_cb *ring);
 void hns_rcb_reset_ring_hw(struct hnae_queue *q);
 void hns_rcb_wait_fbd_clean(struct hnae_queue **qs, int q_num, u32 flag);
-
-u32 hns_rcb_get_coalesced_frames(struct dsaf_device *dsaf_dev, int comm_index);
-u32 hns_rcb_get_coalesce_usecs(struct dsaf_device *dsaf_dev, int comm_index);
-void hns_rcb_set_coalesce_usecs(struct dsaf_device *dsaf_dev,
-				int comm_index, u32 timeout);
-int hns_rcb_set_coalesced_frames(struct dsaf_device *dsaf_dev,
-				 int comm_index, u32 coalesce_frames);
+u32 hns_rcb_get_coalesced_frames(
+	struct rcb_common_cb *rcb_common, u32 port_idx);
+u32 hns_rcb_get_coalesce_usecs(
+	struct rcb_common_cb *rcb_common, u32 port_idx);
+int hns_rcb_set_coalesce_usecs(
+	struct rcb_common_cb *rcb_common, u32 port_idx, u32 timeout);
+int hns_rcb_set_coalesced_frames(
+	struct rcb_common_cb *rcb_common, u32 port_idx, u32 coalesced_frames);
 void hns_rcb_update_stats(struct hnae_queue *queue);
 
 void hns_rcb_get_stats(struct hnae_queue *queue, u64 *data);

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
index 60d695d..7d7204f 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h

@@ -103,6 +103,8 @@
 /*serdes offset**/
 #define HNS_MAC_HILINK3_REG DSAF_SUB_SC_HILINK3_CRG_CTRL0_REG
 #define HNS_MAC_HILINK4_REG DSAF_SUB_SC_HILINK4_CRG_CTRL0_REG
+#define HNS_MAC_HILINK3V2_REG DSAF_SUB_SC_HILINK3_CRG_CTRL1_REG
+#define HNS_MAC_HILINK4V2_REG DSAF_SUB_SC_HILINK4_CRG_CTRL1_REG
 #define HNS_MAC_LANE0_CTLEDFE_REG 0x000BFFCCULL
 #define HNS_MAC_LANE1_CTLEDFE_REG 0x000BFFBCULL
 #define HNS_MAC_LANE2_CTLEDFE_REG 0x000BFFACULL
@@ -404,6 +406,7 @@
 #define RCB_CFG_OVERTIME_REG			0x9300
 #define RCB_CFG_PKTLINE_INT_NUM_REG		0x9304
 #define RCB_CFG_OVERTIME_INT_NUM_REG		0x9308
+#define RCB_PORT_CFG_OVERTIME_REG		0x9430
 
 #define RCB_RING_RX_RING_BASEADDR_L_REG		0x00000
 #define RCB_RING_RX_RING_BASEADDR_H_REG		0x00004
@@ -922,6 +925,8 @@
 #define GMAC_LP_REG_CF2MI_LP_EN_B	2
 
 #define GMAC_MODE_CHANGE_EB_B	0
+#define GMAC_UC_MATCH_EN_B	0
+#define GMAC_ADDR_EN_B		16
 
 #define GMAC_RECV_CTRL_STRIP_PAD_EN_B	3
 #define GMAC_RECV_CTRL_RUNT_PKT_EN_B	4

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
index 802d554..fd90f37 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c

@@ -7,7 +7,7 @@
  * (at your option) any later version.
  */
 
-#include <asm-generic/io-64-nonatomic-hi-lo.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
 #include <linux/of_mdio.h>
 #include "hns_dsaf_main.h"
 #include "hns_dsaf_mac.h"

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 3f77ff7..687204b 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c

@@ -48,7 +48,6 @@
 	struct iphdr *iphdr;
 	struct ipv6hdr *ipv6hdr;
 	struct sk_buff *skb;
-	int skb_tmp_len;
 	__be16 protocol;
 	u8 bn_pid = 0;
 	u8 rrcfv = 0;
@@ -66,10 +65,14 @@
 	desc->addr = cpu_to_le64(dma);
 	desc->tx.send_size = cpu_to_le16((u16)size);
 
-	/*config bd buffer end */
+	/* config bd buffer end */
 	hnae_set_bit(rrcfv, HNSV2_TXD_VLD_B, 1);
 	hnae_set_field(bn_pid, HNSV2_TXD_BUFNUM_M, 0, buf_num - 1);
 
+	/* fill port_id in the tx bd for sending management pkts */
+	hnae_set_field(bn_pid, HNSV2_TXD_PORTID_M,
+		       HNSV2_TXD_PORTID_S, ring->q->handle->dport_id);
+
 	if (type == DESC_TYPE_SKB) {
 		skb = (struct sk_buff *)priv;
 
@@ -90,13 +93,13 @@
 				hnae_set_bit(rrcfv, HNSV2_TXD_L4CS_B, 1);
 
 				/* check for tcp/udp header */
-				if (iphdr->protocol == IPPROTO_TCP) {
+				if (iphdr->protocol == IPPROTO_TCP &&
+				    skb_is_gso(skb)) {
 					hnae_set_bit(tvsvsn,
 						     HNSV2_TXD_TSE_B, 1);
-					skb_tmp_len = SKB_TMP_LEN(skb);
 					l4_len = tcp_hdrlen(skb);
-					mss = mtu - skb_tmp_len - ETH_FCS_LEN;
-					paylen = skb->len - skb_tmp_len;
+					mss = skb_shinfo(skb)->gso_size;
+					paylen = skb->len - SKB_TMP_LEN(skb);
 				}
 			} else if (skb->protocol == htons(ETH_P_IPV6)) {
 				hnae_set_bit(tvsvsn, HNSV2_TXD_IPV6_B, 1);
@@ -104,13 +107,13 @@
 				hnae_set_bit(rrcfv, HNSV2_TXD_L4CS_B, 1);
 
 				/* check for tcp/udp header */
-				if (ipv6hdr->nexthdr == IPPROTO_TCP) {
+				if (ipv6hdr->nexthdr == IPPROTO_TCP &&
+				    skb_is_gso(skb) && skb_is_gso_v6(skb)) {
 					hnae_set_bit(tvsvsn,
 						     HNSV2_TXD_TSE_B, 1);
-					skb_tmp_len = SKB_TMP_LEN(skb);
 					l4_len = tcp_hdrlen(skb);
-					mss = mtu - skb_tmp_len - ETH_FCS_LEN;
-					paylen = skb->len - skb_tmp_len;
+					mss = skb_shinfo(skb)->gso_size;
+					paylen = skb->len - SKB_TMP_LEN(skb);
 				}
 			}
 			desc->tx.ip_offset = ip_offset;
@@ -564,6 +567,7 @@
 	struct sk_buff *skb;
 	struct hnae_desc *desc;
 	struct hnae_desc_cb *desc_cb;
+	struct ethhdr *eh;
 	unsigned char *va;
 	int bnum, length, i;
 	int pull_len;
@@ -670,6 +674,14 @@
 		return -EFAULT;
 	}
 
+	/* filter out multicast pkt with the same src mac as this port */
+	eh = eth_hdr(skb);
+	if (unlikely(is_multicast_ether_addr(eh->h_dest) &&
+		     ether_addr_equal(ndev->dev_addr, eh->h_source))) {
+		dev_kfree_skb_any(skb);
+		return -EFAULT;
+	}
+
 	ring->stats.rx_pkts++;
 	ring->stats.rx_bytes += skb->len;
 
@@ -901,10 +913,7 @@
 static void hns_nic_tx_fini_pro(struct hns_nic_ring_data *ring_data)
 {
 	struct hnae_ring *ring = ring_data->ring;
-	int head = ring->next_to_clean;
-
-	/* for hardware bug fixed */
-	head = readl_relaxed(ring->io_base + RCB_REG_HEAD);
+	int head = readl_relaxed(ring->io_base + RCB_REG_HEAD);
 
 	if (head != ring->next_to_clean) {
 		ring_data->ring->q->handle->dev->ops->toggle_ring_irq(
@@ -947,8 +956,8 @@
 		napi_complete(napi);
 		ring_data->ring->q->handle->dev->ops->toggle_ring_irq(
 			ring_data->ring, 0);
-
-		ring_data->fini_process(ring_data);
+		if (ring_data->fini_process)
+			ring_data->fini_process(ring_data);
 		return 0;
 	}
 
@@ -1711,6 +1720,7 @@
 {
 	struct hnae_handle *h = priv->ae_handle;
 	struct hns_nic_ring_data *rd;
+	bool is_ver1 = AE_IS_VER1(priv->enet_ver);
 	int i;
 
 	if (h->q_num > NIC_MAX_Q_PER_VF) {
@@ -1728,7 +1738,7 @@
 		rd->queue_index = i;
 		rd->ring = &h->qs[i]->tx_ring;
 		rd->poll_one = hns_nic_tx_poll_one;
-		rd->fini_process = hns_nic_tx_fini_pro;
+		rd->fini_process = is_ver1 ? hns_nic_tx_fini_pro : NULL;
 
 		netif_napi_add(priv->netdev, &rd->napi,
 			       hns_nic_common_poll, NIC_TX_CLEAN_MAX_NUM);
@@ -1740,7 +1750,7 @@
 		rd->ring = &h->qs[i - h->q_num]->rx_ring;
 		rd->poll_one = hns_nic_rx_poll_one;
 		rd->ex_process = hns_nic_rx_up_pro;
-		rd->fini_process = hns_nic_rx_fini_pro;
+		rd->fini_process = is_ver1 ? hns_nic_rx_fini_pro : NULL;
 
 		netif_napi_add(priv->netdev, &rd->napi,
 			       hns_nic_common_poll, NIC_RX_CLEAN_MAX_NUM);
@@ -1804,7 +1814,7 @@
 	h = hnae_get_handle(&priv->netdev->dev,
 			    priv->ae_node, priv->port_id, NULL);
 	if (IS_ERR_OR_NULL(h)) {
-		ret = PTR_ERR(h);
+		ret = -ENODEV;
 		dev_dbg(priv->dev, "has not handle, register notifier!\n");
 		goto out;
 	}

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index 3c4a3bc..3d746c8 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c

@@ -794,8 +794,10 @@
 	    (!ops->set_coalesce_frames))
 		return -ESRCH;
 
-	ops->set_coalesce_usecs(priv->ae_handle,
-					ec->rx_coalesce_usecs);
+	ret = ops->set_coalesce_usecs(priv->ae_handle,
+				      ec->rx_coalesce_usecs);
+	if (ret)
+		return ret;
 
 	ret = ops->set_coalesce_frames(
 		priv->ae_handle,
@@ -1013,8 +1015,8 @@
 	struct phy_device *phy_dev = priv->phy;
 
 	retval = phy_write(phy_dev, HNS_PHY_PAGE_REG, HNS_PHY_PAGE_LED);
-	retval = phy_write(phy_dev, HNS_LED_FC_REG, value);
-	retval = phy_write(phy_dev, HNS_PHY_PAGE_REG, HNS_PHY_PAGE_COPPER);
+	retval |= phy_write(phy_dev, HNS_LED_FC_REG, value);
+	retval |= phy_write(phy_dev, HNS_PHY_PAGE_REG, HNS_PHY_PAGE_COPPER);
 	if (retval) {
 		netdev_err(netdev, "mdiobus_write fail !\n");
 		return retval;
@@ -1173,18 +1175,15 @@
 {
 	struct hns_nic_priv *priv = netdev_priv(netdev);
 	struct hnae_ae_ops *ops;
-	u32 ret;
 
 	if (AE_IS_VER1(priv->enet_ver)) {
 		netdev_err(netdev,
 			   "RSS feature is not supported on this hardware\n");
-		return -EOPNOTSUPP;
+		return 0;
 	}
 
 	ops = priv->ae_handle->dev->ops;
-	ret = ops->get_rss_key_size(priv->ae_handle);
-
-	return ret;
+	return ops->get_rss_key_size(priv->ae_handle);
 }
 
 static u32
@@ -1192,18 +1191,15 @@
 {
 	struct hns_nic_priv *priv = netdev_priv(netdev);
 	struct hnae_ae_ops *ops;
-	u32 ret;
 
 	if (AE_IS_VER1(priv->enet_ver)) {
 		netdev_err(netdev,
 			   "RSS feature is not supported on this hardware\n");
-		return -EOPNOTSUPP;
+		return 0;
 	}
 
 	ops = priv->ae_handle->dev->ops;
-	ret = ops->get_rss_indir_size(priv->ae_handle);
-
-	return ret;
+	return ops->get_rss_indir_size(priv->ae_handle);
 }
 
 static int
@@ -1211,7 +1207,6 @@
 {
 	struct hns_nic_priv *priv = netdev_priv(netdev);
 	struct hnae_ae_ops *ops;
-	int ret;
 
 	if (AE_IS_VER1(priv->enet_ver)) {
 		netdev_err(netdev,
@@ -1224,9 +1219,7 @@
 	if (!indir)
 		return 0;
 
-	ret = ops->get_rss(priv->ae_handle, indir, key, hfunc);
-
-	return 0;
+	return ops->get_rss(priv->ae_handle, indir, key, hfunc);
 }
 
 static int
@@ -1235,7 +1228,6 @@
 {
 	struct hns_nic_priv *priv = netdev_priv(netdev);
 	struct hnae_ae_ops *ops;
-	int ret;
 
 	if (AE_IS_VER1(priv->enet_ver)) {
 		netdev_err(netdev,
@@ -1252,7 +1244,22 @@
 	if (!indir)
 		return 0;
 
-	ret = ops->set_rss(priv->ae_handle, indir, key, hfunc);
+	return ops->set_rss(priv->ae_handle, indir, key, hfunc);
+}
+
+static int hns_get_rxnfc(struct net_device *netdev,
+			 struct ethtool_rxnfc *cmd,
+			 u32 *rule_locs)
+{
+	struct hns_nic_priv *priv = netdev_priv(netdev);
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = priv->ae_handle->q_num;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
 
 	return 0;
 }
@@ -1280,6 +1287,7 @@
 	.get_rxfh_indir_size = hns_get_rss_indir_size,
 	.get_rxfh = hns_get_rss,
 	.set_rxfh = hns_set_rss,
+	.get_rxnfc = hns_get_rxnfc,
 };
 
 void hns_ethtool_set_ops(struct net_device *ndev)

diff --git a/drivers/net/ethernet/intel/i40e/Makefile b/drivers/net/ethernet/intel/i40e/Makefile
index b4729ba..3b3c63e 100644
--- a/drivers/net/ethernet/intel/i40e/Makefile
+++ b/drivers/net/ethernet/intel/i40e/Makefile

@@ -41,6 +41,7 @@
 	i40e_diag.o	\
 	i40e_txrx.o	\
 	i40e_ptp.o	\
+	i40e_client.o   \
 	i40e_virtchnl_pf.o
 
 i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 2f6210a..1ce6e9c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h

@@ -58,6 +58,7 @@
 #ifdef I40E_FCOE
 #include "i40e_fcoe.h"
 #endif
+#include "i40e_client.h"
 #include "i40e_virtchnl.h"
 #include "i40e_virtchnl_pf.h"
 #include "i40e_txrx.h"
@@ -190,6 +191,7 @@
 	u16 search_hint;
 	u16 list[0];
 #define I40E_PILE_VALID_BIT  0x8000
+#define I40E_IWARP_IRQ_PILE_ID  (I40E_PILE_VALID_BIT - 2)
 };
 
 #define I40E_DEFAULT_ATR_SAMPLE_RATE	20
@@ -282,6 +284,8 @@
 #endif /* I40E_FCOE */
 	u16 num_lan_qps;           /* num lan queues this PF has set up */
 	u16 num_lan_msix;          /* num queue vectors for the base PF vsi */
+	u16 num_iwarp_msix;        /* num of iwarp vectors for this PF */
+	int iwarp_base_vector;
 	int queues_left;           /* queues left unclaimed */
 	u16 alloc_rss_size;        /* allocated RSS queues */
 	u16 rss_size_max;          /* HW defined max RSS queues */
@@ -329,6 +333,7 @@
 #define I40E_FLAG_16BYTE_RX_DESC_ENABLED	BIT_ULL(13)
 #define I40E_FLAG_CLEAN_ADMINQ			BIT_ULL(14)
 #define I40E_FLAG_FILTER_SYNC			BIT_ULL(15)
+#define I40E_FLAG_SERVICE_CLIENT_REQUESTED	BIT_ULL(16)
 #define I40E_FLAG_PROCESS_MDD_EVENT		BIT_ULL(17)
 #define I40E_FLAG_PROCESS_VFLR_EVENT		BIT_ULL(18)
 #define I40E_FLAG_SRIOV_ENABLED			BIT_ULL(19)
@@ -571,6 +576,8 @@
 	struct kobject *kobj;  /* sysfs object */
 	bool current_isup;     /* Sync 'link up' logging */
 
+	void *priv;	/* client driver data reference. */
+
 	/* VSI specific handlers */
 	irqreturn_t (*irq_handler)(int irq, void *data);
 
@@ -728,6 +735,10 @@
 			      struct i40e_vsi_context *ctxt,
 			      u8 enabled_tc, bool is_add);
 #endif
+void i40e_service_event_schedule(struct i40e_pf *pf);
+void i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id,
+				  u8 *msg, u16 len);
+
 int i40e_vsi_control_rings(struct i40e_vsi *vsi, bool enable);
 int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count);
 struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, u16 uplink_seid,
@@ -750,6 +761,17 @@
 static inline void i40e_dbg_init(void) {}
 static inline void i40e_dbg_exit(void) {}
 #endif /* CONFIG_DEBUG_FS*/
+/* needed by client drivers */
+int i40e_lan_add_device(struct i40e_pf *pf);
+int i40e_lan_del_device(struct i40e_pf *pf);
+void i40e_client_subtask(struct i40e_pf *pf);
+void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi);
+void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi);
+void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset);
+void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs);
+void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id);
+int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id,
+			   enum i40e_client_type type);
 /**
  * i40e_irq_dynamic_enable - Enable default interrupt generation settings
  * @vsi: pointer to a vsi

diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
new file mode 100644
index 0000000..0e6ac84
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c

@@ -0,0 +1,1012 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 - 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#include <linux/list.h>
+#include <linux/errno.h>
+
+#include "i40e.h"
+#include "i40e_prototype.h"
+#include "i40e_client.h"
+
+static const char i40e_client_interface_version_str[] = I40E_CLIENT_VERSION_STR;
+
+static LIST_HEAD(i40e_devices);
+static DEFINE_MUTEX(i40e_device_mutex);
+
+static LIST_HEAD(i40e_clients);
+static DEFINE_MUTEX(i40e_client_mutex);
+
+static LIST_HEAD(i40e_client_instances);
+static DEFINE_MUTEX(i40e_client_instance_mutex);
+
+static int i40e_client_virtchnl_send(struct i40e_info *ldev,
+				     struct i40e_client *client,
+				     u32 vf_id, u8 *msg, u16 len);
+
+static int i40e_client_setup_qvlist(struct i40e_info *ldev,
+				    struct i40e_client *client,
+				    struct i40e_qvlist_info *qvlist_info);
+
+static void i40e_client_request_reset(struct i40e_info *ldev,
+				      struct i40e_client *client,
+				      u32 reset_level);
+
+static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
+				       struct i40e_client *client,
+				       bool is_vf, u32 vf_id,
+				       u32 flag, u32 valid_flag);
+
+static struct i40e_ops i40e_lan_ops = {
+	.virtchnl_send = i40e_client_virtchnl_send,
+	.setup_qvlist = i40e_client_setup_qvlist,
+	.request_reset = i40e_client_request_reset,
+	.update_vsi_ctxt = i40e_client_update_vsi_ctxt,
+};
+
+/**
+ * i40e_client_type_to_vsi_type - convert client type to vsi type
+ * @client_type: the i40e_client type
+ *
+ * returns the related vsi type value
+ **/
+static
+enum i40e_vsi_type i40e_client_type_to_vsi_type(enum i40e_client_type type)
+{
+	switch (type) {
+	case I40E_CLIENT_IWARP:
+		return I40E_VSI_IWARP;
+
+	case I40E_CLIENT_VMDQ2:
+		return I40E_VSI_VMDQ2;
+
+	default:
+		pr_err("i40e: Client type unknown\n");
+		return I40E_VSI_TYPE_UNKNOWN;
+	}
+}
+
+/**
+ * i40e_client_get_params - Get the params that can change at runtime
+ * @vsi: the VSI with the message
+ * @param: clinet param struct
+ *
+ **/
+static
+int i40e_client_get_params(struct i40e_vsi *vsi, struct i40e_params *params)
+{
+	struct i40e_dcbx_config *dcb_cfg = &vsi->back->hw.local_dcbx_config;
+	int i = 0;
+
+	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+		u8 tc = dcb_cfg->etscfg.prioritytable[i];
+		u16 qs_handle;
+
+		/* If TC is not enabled for VSI use TC0 for UP */
+		if (!(vsi->tc_config.enabled_tc & BIT(tc)))
+			tc = 0;
+
+		qs_handle = le16_to_cpu(vsi->info.qs_handle[tc]);
+		params->qos.prio_qos[i].tc = tc;
+		params->qos.prio_qos[i].qs_handle = qs_handle;
+		if (qs_handle == I40E_AQ_VSI_QS_HANDLE_INVALID) {
+			dev_err(&vsi->back->pdev->dev, "Invalid queue set handle for TC = %d, vsi id = %d\n",
+				tc, vsi->id);
+			return -EINVAL;
+		}
+	}
+
+	params->mtu = vsi->netdev->mtu;
+	return 0;
+}
+
+/**
+ * i40e_notify_client_of_vf_msg - call the client vf message callback
+ * @vsi: the VSI with the message
+ * @vf_id: the absolute VF id that sent the message
+ * @msg: message buffer
+ * @len: length of the message
+ *
+ * If there is a client to this VSI, call the client
+ **/
+void
+i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id, u8 *msg, u16 len)
+{
+	struct i40e_client_instance *cdev;
+
+	if (!vsi)
+		return;
+	mutex_lock(&i40e_client_instance_mutex);
+	list_for_each_entry(cdev, &i40e_client_instances, list) {
+		if (cdev->lan_info.pf == vsi->back) {
+			if (!cdev->client ||
+			    !cdev->client->ops ||
+			    !cdev->client->ops->virtchnl_receive) {
+				dev_dbg(&vsi->back->pdev->dev,
+					"Cannot locate client instance virtual channel receive routine\n");
+				continue;
+			}
+			cdev->client->ops->virtchnl_receive(&cdev->lan_info,
+							    cdev->client,
+							    vf_id, msg, len);
+		}
+	}
+	mutex_unlock(&i40e_client_instance_mutex);
+}
+
+/**
+ * i40e_notify_client_of_l2_param_changes - call the client notify callback
+ * @vsi: the VSI with l2 param changes
+ *
+ * If there is a client to this VSI, call the client
+ **/
+void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi)
+{
+	struct i40e_client_instance *cdev;
+	struct i40e_params params;
+
+	if (!vsi)
+		return;
+	memset(&params, 0, sizeof(params));
+	i40e_client_get_params(vsi, &params);
+	mutex_lock(&i40e_client_instance_mutex);
+	list_for_each_entry(cdev, &i40e_client_instances, list) {
+		if (cdev->lan_info.pf == vsi->back) {
+			if (!cdev->client ||
+			    !cdev->client->ops ||
+			    !cdev->client->ops->l2_param_change) {
+				dev_dbg(&vsi->back->pdev->dev,
+					"Cannot locate client instance l2_param_change routine\n");
+				continue;
+			}
+			cdev->lan_info.params = params;
+			cdev->client->ops->l2_param_change(&cdev->lan_info,
+							   cdev->client,
+							   &params);
+		}
+	}
+	mutex_unlock(&i40e_client_instance_mutex);
+}
+
+/**
+ * i40e_notify_client_of_netdev_open - call the client open callback
+ * @vsi: the VSI with netdev opened
+ *
+ * If there is a client to this netdev, call the client with open
+ **/
+void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi)
+{
+	struct i40e_client_instance *cdev;
+
+	if (!vsi)
+		return;
+	mutex_lock(&i40e_client_instance_mutex);
+	list_for_each_entry(cdev, &i40e_client_instances, list) {
+		if (cdev->lan_info.netdev == vsi->netdev) {
+			if (!cdev->client ||
+			    !cdev->client->ops || !cdev->client->ops->open) {
+				dev_dbg(&vsi->back->pdev->dev,
+					"Cannot locate client instance open routine\n");
+				continue;
+			}
+			cdev->client->ops->open(&cdev->lan_info, cdev->client);
+		}
+	}
+	mutex_unlock(&i40e_client_instance_mutex);
+}
+
+/**
+ * i40e_client_release_qvlist
+ * @ldev: pointer to L2 context.
+ *
+ **/
+static void i40e_client_release_qvlist(struct i40e_info *ldev)
+{
+	struct i40e_qvlist_info *qvlist_info = ldev->qvlist_info;
+	u32 i;
+
+	if (!ldev->qvlist_info)
+		return;
+
+	for (i = 0; i < qvlist_info->num_vectors; i++) {
+		struct i40e_pf *pf = ldev->pf;
+		struct i40e_qv_info *qv_info;
+		u32 reg_idx;
+
+		qv_info = &qvlist_info->qv_info[i];
+		if (!qv_info)
+			continue;
+		reg_idx = I40E_PFINT_LNKLSTN(qv_info->v_idx - 1);
+		wr32(&pf->hw, reg_idx, I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK);
+	}
+	kfree(ldev->qvlist_info);
+	ldev->qvlist_info = NULL;
+}
+
+/**
+ * i40e_notify_client_of_netdev_close - call the client close callback
+ * @vsi: the VSI with netdev closed
+ * @reset: true when close called due to a reset pending
+ *
+ * If there is a client to this netdev, call the client with close
+ **/
+void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset)
+{
+	struct i40e_client_instance *cdev;
+
+	if (!vsi)
+		return;
+	mutex_lock(&i40e_client_instance_mutex);
+	list_for_each_entry(cdev, &i40e_client_instances, list) {
+		if (cdev->lan_info.netdev == vsi->netdev) {
+			if (!cdev->client ||
+			    !cdev->client->ops || !cdev->client->ops->close) {
+				dev_dbg(&vsi->back->pdev->dev,
+					"Cannot locate client instance close routine\n");
+				continue;
+			}
+			cdev->client->ops->close(&cdev->lan_info, cdev->client,
+						 reset);
+			i40e_client_release_qvlist(&cdev->lan_info);
+		}
+	}
+	mutex_unlock(&i40e_client_instance_mutex);
+}
+
+/**
+ * i40e_notify_client_of_vf_reset - call the client vf reset callback
+ * @pf: PF device pointer
+ * @vf_id: asolute id of VF being reset
+ *
+ * If there is a client attached to this PF, notify when a VF is reset
+ **/
+void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id)
+{
+	struct i40e_client_instance *cdev;
+
+	if (!pf)
+		return;
+	mutex_lock(&i40e_client_instance_mutex);
+	list_for_each_entry(cdev, &i40e_client_instances, list) {
+		if (cdev->lan_info.pf == pf) {
+			if (!cdev->client ||
+			    !cdev->client->ops ||
+			    !cdev->client->ops->vf_reset) {
+				dev_dbg(&pf->pdev->dev,
+					"Cannot locate client instance VF reset routine\n");
+				continue;
+			}
+			cdev->client->ops->vf_reset(&cdev->lan_info,
+						    cdev->client, vf_id);
+		}
+	}
+	mutex_unlock(&i40e_client_instance_mutex);
+}
+
+/**
+ * i40e_notify_client_of_vf_enable - call the client vf notification callback
+ * @pf: PF device pointer
+ * @num_vfs: the number of VFs currently enabled, 0 for disable
+ *
+ * If there is a client attached to this PF, call its VF notification routine
+ **/
+void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs)
+{
+	struct i40e_client_instance *cdev;
+
+	if (!pf)
+		return;
+	mutex_lock(&i40e_client_instance_mutex);
+	list_for_each_entry(cdev, &i40e_client_instances, list) {
+		if (cdev->lan_info.pf == pf) {
+			if (!cdev->client ||
+			    !cdev->client->ops ||
+			    !cdev->client->ops->vf_enable) {
+				dev_dbg(&pf->pdev->dev,
+					"Cannot locate client instance VF enable routine\n");
+				continue;
+			}
+			cdev->client->ops->vf_enable(&cdev->lan_info,
+						     cdev->client, num_vfs);
+		}
+	}
+	mutex_unlock(&i40e_client_instance_mutex);
+}
+
+/**
+ * i40e_vf_client_capable - ask the client if it likes the specified VF
+ * @pf: PF device pointer
+ * @vf_id: the VF in question
+ *
+ * If there is a client of the specified type attached to this PF, call
+ * its vf_capable routine
+ **/
+int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id,
+			   enum i40e_client_type type)
+{
+	struct i40e_client_instance *cdev;
+	int capable = false;
+
+	if (!pf)
+		return false;
+	mutex_lock(&i40e_client_instance_mutex);
+	list_for_each_entry(cdev, &i40e_client_instances, list) {
+		if (cdev->lan_info.pf == pf) {
+			if (!cdev->client ||
+			    !cdev->client->ops ||
+			    !cdev->client->ops->vf_capable ||
+			    !(cdev->client->type == type)) {
+				dev_dbg(&pf->pdev->dev,
+					"Cannot locate client instance VF capability routine\n");
+				continue;
+			}
+			capable = cdev->client->ops->vf_capable(&cdev->lan_info,
+								cdev->client,
+								vf_id);
+			break;
+		}
+	}
+	mutex_unlock(&i40e_client_instance_mutex);
+	return capable;
+}
+
+/**
+ * i40e_vsi_lookup - finds a matching VSI from the PF list starting at start_vsi
+ * @pf: board private structure
+ * @type: vsi type
+ * @start_vsi: a VSI pointer from where to start the search
+ *
+ * Returns non NULL on success or NULL for failure
+ **/
+struct i40e_vsi *i40e_vsi_lookup(struct i40e_pf *pf,
+				 enum i40e_vsi_type type,
+				 struct i40e_vsi *start_vsi)
+{
+	struct i40e_vsi *vsi;
+	int i = 0;
+
+	if (start_vsi) {
+		for (i = 0; i < pf->num_alloc_vsi; i++) {
+			vsi = pf->vsi[i];
+			if (vsi == start_vsi)
+				break;
+		}
+	}
+	for (; i < pf->num_alloc_vsi; i++) {
+		vsi = pf->vsi[i];
+		if (vsi && vsi->type == type)
+			return vsi;
+	}
+
+	return NULL;
+}
+
+/**
+ * i40e_client_add_instance - add a client instance struct to the instance list
+ * @pf: pointer to the board struct
+ * @client: pointer to a client struct in the client list.
+ *
+ * Returns cdev ptr on success, NULL on failure
+ **/
+static
+struct i40e_client_instance *i40e_client_add_instance(struct i40e_pf *pf,
+						      struct i40e_client *client)
+{
+	struct i40e_client_instance *cdev;
+	struct netdev_hw_addr *mac = NULL;
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+
+	mutex_lock(&i40e_client_instance_mutex);
+	list_for_each_entry(cdev, &i40e_client_instances, list) {
+		if ((cdev->lan_info.pf == pf) && (cdev->client == client)) {
+			cdev = NULL;
+			goto out;
+		}
+	}
+	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
+	if (!cdev)
+		goto out;
+
+	cdev->lan_info.pf = (void *)pf;
+	cdev->lan_info.netdev = vsi->netdev;
+	cdev->lan_info.pcidev = pf->pdev;
+	cdev->lan_info.fid = pf->hw.pf_id;
+	cdev->lan_info.ftype = I40E_CLIENT_FTYPE_PF;
+	cdev->lan_info.hw_addr = pf->hw.hw_addr;
+	cdev->lan_info.ops = &i40e_lan_ops;
+	cdev->lan_info.version.major = I40E_CLIENT_VERSION_MAJOR;
+	cdev->lan_info.version.minor = I40E_CLIENT_VERSION_MINOR;
+	cdev->lan_info.version.build = I40E_CLIENT_VERSION_BUILD;
+	cdev->lan_info.fw_maj_ver = pf->hw.aq.fw_maj_ver;
+	cdev->lan_info.fw_min_ver = pf->hw.aq.fw_min_ver;
+	cdev->lan_info.fw_build = pf->hw.aq.fw_build;
+	set_bit(__I40E_CLIENT_INSTANCE_NONE, &cdev->state);
+
+	if (i40e_client_get_params(vsi, &cdev->lan_info.params)) {
+		kfree(cdev);
+		cdev = NULL;
+		goto out;
+	}
+
+	cdev->lan_info.msix_count = pf->num_iwarp_msix;
+	cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
+
+	mac = list_first_entry(&cdev->lan_info.netdev->dev_addrs.list,
+			       struct netdev_hw_addr, list);
+	if (mac)
+		ether_addr_copy(cdev->lan_info.lanmac, mac->addr);
+	else
+		dev_err(&pf->pdev->dev, "MAC address list is empty!\n");
+
+	cdev->client = client;
+	INIT_LIST_HEAD(&cdev->list);
+	list_add(&cdev->list, &i40e_client_instances);
+out:
+	mutex_unlock(&i40e_client_instance_mutex);
+	return cdev;
+}
+
+/**
+ * i40e_client_del_instance - removes a client instance from the list
+ * @pf: pointer to the board struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+static
+int i40e_client_del_instance(struct i40e_pf *pf, struct i40e_client *client)
+{
+	struct i40e_client_instance *cdev, *tmp;
+	int ret = -ENODEV;
+
+	mutex_lock(&i40e_client_instance_mutex);
+	list_for_each_entry_safe(cdev, tmp, &i40e_client_instances, list) {
+		if ((cdev->lan_info.pf != pf) || (cdev->client != client))
+			continue;
+
+		dev_info(&pf->pdev->dev, "Deleted instance of Client %s, of dev %d bus=0x%02x func=0x%02x)\n",
+			 client->name, pf->hw.pf_id,
+			 pf->hw.bus.device, pf->hw.bus.func);
+		list_del(&cdev->list);
+		kfree(cdev);
+		ret = 0;
+		break;
+	}
+	mutex_unlock(&i40e_client_instance_mutex);
+	return ret;
+}
+
+/**
+ * i40e_client_subtask - client maintenance work
+ * @pf: board private structure
+ **/
+void i40e_client_subtask(struct i40e_pf *pf)
+{
+	struct i40e_client_instance *cdev;
+	struct i40e_client *client;
+	int ret = 0;
+
+	if (!(pf->flags & I40E_FLAG_SERVICE_CLIENT_REQUESTED))
+		return;
+	pf->flags &= ~I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+
+	/* If we're down or resetting, just bail */
+	if (test_bit(__I40E_DOWN, &pf->state) ||
+	    test_bit(__I40E_CONFIG_BUSY, &pf->state))
+		return;
+
+	/* Check client state and instantiate client if client registered */
+	mutex_lock(&i40e_client_mutex);
+	list_for_each_entry(client, &i40e_clients, list) {
+		/* first check client is registered */
+		if (!test_bit(__I40E_CLIENT_REGISTERED, &client->state))
+			continue;
+
+		/* Do we also need the LAN VSI to be up, to create instance */
+		if (!(client->flags & I40E_CLIENT_FLAGS_LAUNCH_ON_PROBE)) {
+			/* check if L2 VSI is up, if not we are not ready */
+			if (test_bit(__I40E_DOWN, &pf->vsi[pf->lan_vsi]->state))
+				continue;
+		}
+
+		/* Add the client instance to the instance list */
+		cdev = i40e_client_add_instance(pf, client);
+		if (!cdev)
+			continue;
+
+		/* Also up the ref_cnt of no. of instances of this client */
+		atomic_inc(&client->ref_cnt);
+		dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x func=0x%02x\n",
+			 client->name, pf->hw.pf_id,
+			 pf->hw.bus.device, pf->hw.bus.func);
+
+		/* Send an Open request to the client */
+		atomic_inc(&cdev->ref_cnt);
+		if (client->ops && client->ops->open)
+			ret = client->ops->open(&cdev->lan_info, client);
+		atomic_dec(&cdev->ref_cnt);
+		if (!ret) {
+			set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
+		} else {
+			/* remove client instance */
+			i40e_client_del_instance(pf, client);
+			atomic_dec(&client->ref_cnt);
+			continue;
+		}
+	}
+	mutex_unlock(&i40e_client_mutex);
+}
+
+/**
+ * i40e_lan_add_device - add a lan device struct to the list of lan devices
+ * @pf: pointer to the board struct
+ *
+ * Returns 0 on success or none 0 on error
+ **/
+int i40e_lan_add_device(struct i40e_pf *pf)
+{
+	struct i40e_device *ldev;
+	int ret = 0;
+
+	mutex_lock(&i40e_device_mutex);
+	list_for_each_entry(ldev, &i40e_devices, list) {
+		if (ldev->pf == pf) {
+			ret = -EEXIST;
+			goto out;
+		}
+	}
+	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
+	if (!ldev) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	ldev->pf = pf;
+	INIT_LIST_HEAD(&ldev->list);
+	list_add(&ldev->list, &i40e_devices);
+	dev_info(&pf->pdev->dev, "Added LAN device PF%d bus=0x%02x func=0x%02x\n",
+		 pf->hw.pf_id, pf->hw.bus.device, pf->hw.bus.func);
+
+	/* Since in some cases register may have happened before a device gets
+	 * added, we can schedule a subtask to go initiate the clients.
+	 */
+	pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+	i40e_service_event_schedule(pf);
+
+out:
+	mutex_unlock(&i40e_device_mutex);
+	return ret;
+}
+
+/**
+ * i40e_lan_del_device - removes a lan device from the device list
+ * @pf: pointer to the board struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+int i40e_lan_del_device(struct i40e_pf *pf)
+{
+	struct i40e_device *ldev, *tmp;
+	int ret = -ENODEV;
+
+	mutex_lock(&i40e_device_mutex);
+	list_for_each_entry_safe(ldev, tmp, &i40e_devices, list) {
+		if (ldev->pf == pf) {
+			dev_info(&pf->pdev->dev, "Deleted LAN device PF%d bus=0x%02x func=0x%02x\n",
+				 pf->hw.pf_id, pf->hw.bus.device,
+				 pf->hw.bus.func);
+			list_del(&ldev->list);
+			kfree(ldev);
+			ret = 0;
+			break;
+		}
+	}
+
+	mutex_unlock(&i40e_device_mutex);
+	return ret;
+}
+
+/**
+ * i40e_client_release - release client specific resources
+ * @client: pointer to the registered client
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_client_release(struct i40e_client *client)
+{
+	struct i40e_client_instance *cdev, *tmp;
+	struct i40e_pf *pf = NULL;
+	int ret = 0;
+
+	LIST_HEAD(cdevs_tmp);
+
+	mutex_lock(&i40e_client_instance_mutex);
+	list_for_each_entry_safe(cdev, tmp, &i40e_client_instances, list) {
+		if (strncmp(cdev->client->name, client->name,
+			    I40E_CLIENT_STR_LENGTH))
+			continue;
+		if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
+			if (atomic_read(&cdev->ref_cnt) > 0) {
+				ret = I40E_ERR_NOT_READY;
+				goto out;
+			}
+			pf = (struct i40e_pf *)cdev->lan_info.pf;
+			if (client->ops && client->ops->close)
+				client->ops->close(&cdev->lan_info, client,
+						   false);
+			i40e_client_release_qvlist(&cdev->lan_info);
+			clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
+
+			dev_warn(&pf->pdev->dev,
+				 "Client %s instance for PF id %d closed\n",
+				 client->name, pf->hw.pf_id);
+		}
+		/* delete the client instance from the list */
+		list_del(&cdev->list);
+		list_add(&cdev->list, &cdevs_tmp);
+		atomic_dec(&client->ref_cnt);
+		dev_info(&pf->pdev->dev, "Deleted client instance of Client %s\n",
+			 client->name);
+	}
+out:
+	mutex_unlock(&i40e_client_instance_mutex);
+
+	/* free the client device and release its vsi */
+	list_for_each_entry_safe(cdev, tmp, &cdevs_tmp, list) {
+		kfree(cdev);
+	}
+	return ret;
+}
+
+/**
+ * i40e_client_prepare - prepare client specific resources
+ * @client: pointer to the registered client
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_client_prepare(struct i40e_client *client)
+{
+	struct i40e_device *ldev;
+	struct i40e_pf *pf;
+	int ret = 0;
+
+	mutex_lock(&i40e_device_mutex);
+	list_for_each_entry(ldev, &i40e_devices, list) {
+		pf = ldev->pf;
+		/* Start the client subtask */
+		pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+		i40e_service_event_schedule(pf);
+	}
+	mutex_unlock(&i40e_device_mutex);
+	return ret;
+}
+
+/**
+ * i40e_client_virtchnl_send - TBD
+ * @ldev: pointer to L2 context
+ * @client: Client pointer
+ * @vf_id: absolute VF identifier
+ * @msg: message buffer
+ * @len: length of message buffer
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_client_virtchnl_send(struct i40e_info *ldev,
+				     struct i40e_client *client,
+				     u32 vf_id, u8 *msg, u16 len)
+{
+	struct i40e_pf *pf = ldev->pf;
+	struct i40e_hw *hw = &pf->hw;
+	i40e_status err;
+
+	err = i40e_aq_send_msg_to_vf(hw, vf_id, I40E_VIRTCHNL_OP_IWARP,
+				     0, msg, len, NULL);
+	if (err)
+		dev_err(&pf->pdev->dev, "Unable to send iWarp message to VF, error %d, aq status %d\n",
+			err, hw->aq.asq_last_status);
+
+	return err;
+}
+
+/**
+ * i40e_client_setup_qvlist
+ * @ldev: pointer to L2 context.
+ * @client: Client pointer.
+ * @qv_info: queue and vector list
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_client_setup_qvlist(struct i40e_info *ldev,
+				    struct i40e_client *client,
+				    struct i40e_qvlist_info *qvlist_info)
+{
+	struct i40e_pf *pf = ldev->pf;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_qv_info *qv_info;
+	u32 v_idx, i, reg_idx, reg;
+	u32 size;
+
+	size = sizeof(struct i40e_qvlist_info) +
+	       (sizeof(struct i40e_qv_info) * (qvlist_info->num_vectors - 1));
+	ldev->qvlist_info = kzalloc(size, GFP_KERNEL);
+	ldev->qvlist_info->num_vectors = qvlist_info->num_vectors;
+
+	for (i = 0; i < qvlist_info->num_vectors; i++) {
+		qv_info = &qvlist_info->qv_info[i];
+		if (!qv_info)
+			continue;
+		v_idx = qv_info->v_idx;
+
+		/* Validate vector id belongs to this client */
+		if ((v_idx >= (pf->iwarp_base_vector + pf->num_iwarp_msix)) ||
+		    (v_idx < pf->iwarp_base_vector))
+			goto err;
+
+		ldev->qvlist_info->qv_info[i] = *qv_info;
+		reg_idx = I40E_PFINT_LNKLSTN(v_idx - 1);
+
+		if (qv_info->ceq_idx == I40E_QUEUE_INVALID_IDX) {
+			/* Special case - No CEQ mapped on this vector */
+			wr32(hw, reg_idx, I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK);
+		} else {
+			reg = (qv_info->ceq_idx &
+			       I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK) |
+			       (I40E_QUEUE_TYPE_PE_CEQ <<
+			       I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+			wr32(hw, reg_idx, reg);
+
+			reg = (I40E_PFINT_CEQCTL_CAUSE_ENA_MASK |
+			       (v_idx << I40E_PFINT_CEQCTL_MSIX_INDX_SHIFT) |
+			       (qv_info->itr_idx <<
+				I40E_PFINT_CEQCTL_ITR_INDX_SHIFT) |
+			       (I40E_QUEUE_END_OF_LIST <<
+				I40E_PFINT_CEQCTL_NEXTQ_INDX_SHIFT));
+			wr32(hw, I40E_PFINT_CEQCTL(qv_info->ceq_idx), reg);
+		}
+		if (qv_info->aeq_idx != I40E_QUEUE_INVALID_IDX) {
+			reg = (I40E_PFINT_AEQCTL_CAUSE_ENA_MASK |
+			       (v_idx << I40E_PFINT_AEQCTL_MSIX_INDX_SHIFT) |
+			       (qv_info->itr_idx <<
+				I40E_PFINT_AEQCTL_ITR_INDX_SHIFT));
+
+			wr32(hw, I40E_PFINT_AEQCTL, reg);
+		}
+	}
+
+	return 0;
+err:
+	kfree(ldev->qvlist_info);
+	ldev->qvlist_info = NULL;
+	return -EINVAL;
+}
+
+/**
+ * i40e_client_request_reset
+ * @ldev: pointer to L2 context.
+ * @client: Client pointer.
+ * @level: reset level
+ **/
+static void i40e_client_request_reset(struct i40e_info *ldev,
+				      struct i40e_client *client,
+				      u32 reset_level)
+{
+	struct i40e_pf *pf = ldev->pf;
+
+	switch (reset_level) {
+	case I40E_CLIENT_RESET_LEVEL_PF:
+		set_bit(__I40E_PF_RESET_REQUESTED, &pf->state);
+		break;
+	case I40E_CLIENT_RESET_LEVEL_CORE:
+		set_bit(__I40E_PF_RESET_REQUESTED, &pf->state);
+		break;
+	default:
+		dev_warn(&pf->pdev->dev,
+			 "Client %s instance for PF id %d request an unsupported reset: %d.\n",
+			 client->name, pf->hw.pf_id, reset_level);
+		break;
+	}
+
+	i40e_service_event_schedule(pf);
+}
+
+/**
+ * i40e_client_update_vsi_ctxt
+ * @ldev: pointer to L2 context.
+ * @client: Client pointer.
+ * @is_vf: if this for the VF
+ * @vf_id: if is_vf true this carries the vf_id
+ * @flag: Any device level setting that needs to be done for PE
+ * @valid_flag: Bits in this match up and enable changing of flag bits
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
+				       struct i40e_client *client,
+				       bool is_vf, u32 vf_id,
+				       u32 flag, u32 valid_flag)
+{
+	struct i40e_pf *pf = ldev->pf;
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+	struct i40e_vsi_context ctxt;
+	bool update = true;
+	i40e_status err;
+
+	/* TODO: for now do not allow setting VF's VSI setting */
+	if (is_vf)
+		return -EINVAL;
+
+	ctxt.seid = pf->main_vsi_seid;
+	ctxt.pf_num = pf->hw.pf_id;
+	err = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL);
+	ctxt.flags = I40E_AQ_VSI_TYPE_PF;
+	if (err) {
+		dev_info(&pf->pdev->dev,
+			 "couldn't get PF vsi config, err %s aq_err %s\n",
+			 i40e_stat_str(&pf->hw, err),
+			 i40e_aq_str(&pf->hw,
+				     pf->hw.aq.asq_last_status));
+		return -ENOENT;
+	}
+
+	if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE) &&
+	    (flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE)) {
+		ctxt.info.valid_sections =
+			cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
+		ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
+	} else if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE) &&
+		  !(flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE)) {
+		ctxt.info.valid_sections =
+			cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
+		ctxt.info.queueing_opt_flags &= ~I40E_AQ_VSI_QUE_OPT_TCP_ENA;
+	} else {
+		update = false;
+		dev_warn(&pf->pdev->dev,
+			 "Client %s instance for PF id %d request an unsupported Config: %x.\n",
+			 client->name, pf->hw.pf_id, flag);
+	}
+
+	if (update) {
+		err = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+		if (err) {
+			dev_info(&pf->pdev->dev,
+				 "update VSI ctxt for PE failed, err %s aq_err %s\n",
+				 i40e_stat_str(&pf->hw, err),
+				 i40e_aq_str(&pf->hw,
+					     pf->hw.aq.asq_last_status));
+		}
+	}
+	return err;
+}
+
+/**
+ * i40e_register_client - Register a i40e client driver with the L2 driver
+ * @client: pointer to the i40e_client struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+int i40e_register_client(struct i40e_client *client)
+{
+	int ret = 0;
+	enum i40e_vsi_type vsi_type;
+
+	if (!client) {
+		ret = -EIO;
+		goto out;
+	}
+
+	if (strlen(client->name) == 0) {
+		pr_info("i40e: Failed to register client with no name\n");
+		ret = -EIO;
+		goto out;
+	}
+
+	mutex_lock(&i40e_client_mutex);
+	if (i40e_client_is_registered(client)) {
+		pr_info("i40e: Client %s has already been registered!\n",
+			client->name);
+		mutex_unlock(&i40e_client_mutex);
+		ret = -EEXIST;
+		goto out;
+	}
+
+	if ((client->version.major != I40E_CLIENT_VERSION_MAJOR) ||
+	    (client->version.minor != I40E_CLIENT_VERSION_MINOR)) {
+		pr_info("i40e: Failed to register client %s due to mismatched client interface version\n",
+			client->name);
+		pr_info("Client is using version: %02d.%02d.%02d while LAN driver supports %s\n",
+			client->version.major, client->version.minor,
+			client->version.build,
+			i40e_client_interface_version_str);
+		mutex_unlock(&i40e_client_mutex);
+		ret = -EIO;
+		goto out;
+	}
+
+	vsi_type = i40e_client_type_to_vsi_type(client->type);
+	if (vsi_type == I40E_VSI_TYPE_UNKNOWN) {
+		pr_info("i40e: Failed to register client %s due to unknown client type %d\n",
+			client->name, client->type);
+		mutex_unlock(&i40e_client_mutex);
+		ret = -EIO;
+		goto out;
+	}
+	list_add(&client->list, &i40e_clients);
+	set_bit(__I40E_CLIENT_REGISTERED, &client->state);
+	mutex_unlock(&i40e_client_mutex);
+
+	if (i40e_client_prepare(client)) {
+		ret = -EIO;
+		goto out;
+	}
+
+	pr_info("i40e: Registered client %s with return code %d\n",
+		client->name, ret);
+out:
+	return ret;
+}
+EXPORT_SYMBOL(i40e_register_client);
+
+/**
+ * i40e_unregister_client - Unregister a i40e client driver with the L2 driver
+ * @client: pointer to the i40e_client struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+int i40e_unregister_client(struct i40e_client *client)
+{
+	int ret = 0;
+
+	/* When a unregister request comes through we would have to send
+	 * a close for each of the client instances that were opened.
+	 * client_release function is called to handle this.
+	 */
+	if (!client || i40e_client_release(client)) {
+		ret = -EIO;
+		goto out;
+	}
+
+	/* TODO: check if device is in reset, or if that matters? */
+	mutex_lock(&i40e_client_mutex);
+	if (!i40e_client_is_registered(client)) {
+		pr_info("i40e: Client %s has not been registered\n",
+			client->name);
+		mutex_unlock(&i40e_client_mutex);
+		ret = -ENODEV;
+		goto out;
+	}
+	if (atomic_read(&client->ref_cnt) == 0) {
+		clear_bit(__I40E_CLIENT_REGISTERED, &client->state);
+		list_del(&client->list);
+		pr_info("i40e: Unregistered client %s with return code %d\n",
+			client->name, ret);
+	} else {
+		ret = I40E_ERR_NOT_READY;
+		pr_err("i40e: Client %s failed unregister - client has open instances\n",
+		       client->name);
+	}
+
+	mutex_unlock(&i40e_client_mutex);
+out:
+	return ret;
+}
+EXPORT_SYMBOL(i40e_unregister_client);

diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.h b/drivers/net/ethernet/intel/i40e/i40e_client.h
new file mode 100644
index 0000000..bf6b453
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.h

@@ -0,0 +1,232 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 - 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#ifndef _I40E_CLIENT_H_
+#define _I40E_CLIENT_H_
+
+#define I40E_CLIENT_STR_LENGTH 10
+
+/* Client interface version should be updated anytime there is a change in the
+ * existing APIs or data structures.
+ */
+#define I40E_CLIENT_VERSION_MAJOR 0
+#define I40E_CLIENT_VERSION_MINOR 01
+#define I40E_CLIENT_VERSION_BUILD 00
+#define I40E_CLIENT_VERSION_STR     \
+	XSTRINGIFY(I40E_CLIENT_VERSION_MAJOR) "." \
+	XSTRINGIFY(I40E_CLIENT_VERSION_MINOR) "." \
+	XSTRINGIFY(I40E_CLIENT_VERSION_BUILD)
+
+struct i40e_client_version {
+	u8 major;
+	u8 minor;
+	u8 build;
+	u8 rsvd;
+};
+
+enum i40e_client_state {
+	__I40E_CLIENT_NULL,
+	__I40E_CLIENT_REGISTERED
+};
+
+enum i40e_client_instance_state {
+	__I40E_CLIENT_INSTANCE_NONE,
+	__I40E_CLIENT_INSTANCE_OPENED,
+};
+
+enum i40e_client_type {
+	I40E_CLIENT_IWARP,
+	I40E_CLIENT_VMDQ2
+};
+
+struct i40e_ops;
+struct i40e_client;
+
+/* HW does not define a type value for AEQ; only for RX/TX and CEQ.
+ * In order for us to keep the interface simple, SW will define a
+ * unique type value for AEQ.
+ */
+#define I40E_QUEUE_TYPE_PE_AEQ  0x80
+#define I40E_QUEUE_INVALID_IDX	0xFFFF
+
+struct i40e_qv_info {
+	u32 v_idx; /* msix_vector */
+	u16 ceq_idx;
+	u16 aeq_idx;
+	u8 itr_idx;
+};
+
+struct i40e_qvlist_info {
+	u32 num_vectors;
+	struct i40e_qv_info qv_info[1];
+};
+
+#define I40E_CLIENT_MSIX_ALL 0xFFFFFFFF
+
+/* set of LAN parameters useful for clients managed by LAN */
+
+/* Struct to hold per priority info */
+struct i40e_prio_qos_params {
+	u16 qs_handle; /* qs handle for prio */
+	u8 tc; /* TC mapped to prio */
+	u8 reserved;
+};
+
+#define I40E_CLIENT_MAX_USER_PRIORITY        8
+/* Struct to hold Client QoS */
+struct i40e_qos_params {
+	struct i40e_prio_qos_params prio_qos[I40E_CLIENT_MAX_USER_PRIORITY];
+};
+
+struct i40e_params {
+	struct i40e_qos_params qos;
+	u16 mtu;
+};
+
+/* Structure to hold Lan device info for a client device */
+struct i40e_info {
+	struct i40e_client_version version;
+	u8 lanmac[6];
+	struct net_device *netdev;
+	struct pci_dev *pcidev;
+	u8 __iomem *hw_addr;
+	u8 fid;	/* function id, PF id or VF id */
+#define I40E_CLIENT_FTYPE_PF 0
+#define I40E_CLIENT_FTYPE_VF 1
+	u8 ftype; /* function type, PF or VF */
+	void *pf;
+
+	/* All L2 params that could change during the life span of the PF
+	 * and needs to be communicated to the client when they change
+	 */
+	struct i40e_qvlist_info *qvlist_info;
+	struct i40e_params params;
+	struct i40e_ops *ops;
+
+	u16 msix_count;	 /* number of msix vectors*/
+	/* Array down below will be dynamically allocated based on msix_count */
+	struct msix_entry *msix_entries;
+	u16 itr_index; /* Which ITR index the PE driver is suppose to use */
+	u16 fw_maj_ver;                 /* firmware major version */
+	u16 fw_min_ver;                 /* firmware minor version */
+	u32 fw_build;                   /* firmware build number */
+};
+
+#define I40E_CLIENT_RESET_LEVEL_PF   1
+#define I40E_CLIENT_RESET_LEVEL_CORE 2
+#define I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE  BIT(1)
+
+struct i40e_ops {
+	/* setup_q_vector_list enables queues with a particular vector */
+	int (*setup_qvlist)(struct i40e_info *ldev, struct i40e_client *client,
+			    struct i40e_qvlist_info *qv_info);
+
+	int (*virtchnl_send)(struct i40e_info *ldev, struct i40e_client *client,
+			     u32 vf_id, u8 *msg, u16 len);
+
+	/* If the PE Engine is unresponsive, RDMA driver can request a reset.
+	 * The level helps determine the level of reset being requested.
+	 */
+	void (*request_reset)(struct i40e_info *ldev,
+			      struct i40e_client *client, u32 level);
+
+	/* API for the RDMA driver to set certain VSI flags that control
+	 * PE Engine.
+	 */
+	int (*update_vsi_ctxt)(struct i40e_info *ldev,
+			       struct i40e_client *client,
+			       bool is_vf, u32 vf_id,
+			       u32 flag, u32 valid_flag);
+};
+
+struct i40e_client_ops {
+	/* Should be called from register_client() or whenever PF is ready
+	 * to create a specific client instance.
+	 */
+	int (*open)(struct i40e_info *ldev, struct i40e_client *client);
+
+	/* Should be called when netdev is unavailable or when unregister
+	 * call comes in. If the close is happenening due to a reset being
+	 * triggered set the reset bit to true.
+	 */
+	void (*close)(struct i40e_info *ldev, struct i40e_client *client,
+		      bool reset);
+
+	/* called when some l2 managed parameters changes - mtu */
+	void (*l2_param_change)(struct i40e_info *ldev,
+				struct i40e_client *client,
+				struct i40e_params *params);
+
+	int (*virtchnl_receive)(struct i40e_info *ldev,
+				struct i40e_client *client, u32 vf_id,
+				u8 *msg, u16 len);
+
+	/* called when a VF is reset by the PF */
+	void (*vf_reset)(struct i40e_info *ldev,
+			 struct i40e_client *client, u32 vf_id);
+
+	/* called when the number of VFs changes */
+	void (*vf_enable)(struct i40e_info *ldev,
+			  struct i40e_client *client, u32 num_vfs);
+
+	/* returns true if VF is capable of specified offload */
+	int (*vf_capable)(struct i40e_info *ldev,
+			  struct i40e_client *client, u32 vf_id);
+};
+
+/* Client device */
+struct i40e_client_instance {
+	struct list_head list;
+	struct i40e_info lan_info;
+	struct i40e_client *client;
+	unsigned long  state;
+	/* A count of all the in-progress calls to the client */
+	atomic_t ref_cnt;
+};
+
+struct i40e_client {
+	struct list_head list;		/* list of registered clients */
+	char name[I40E_CLIENT_STR_LENGTH];
+	struct i40e_client_version version;
+	unsigned long state;		/* client state */
+	atomic_t ref_cnt;  /* Count of all the client devices of this kind */
+	u32 flags;
+#define I40E_CLIENT_FLAGS_LAUNCH_ON_PROBE	BIT(0)
+#define I40E_TX_FLAGS_NOTIFY_OTHER_EVENTS	BIT(2)
+	enum i40e_client_type type;
+	struct i40e_client_ops *ops;	/* client ops provided by the client */
+};
+
+static inline bool i40e_client_is_registered(struct i40e_client *client)
+{
+	return test_bit(__I40E_CLIENT_REGISTERED, &client->state);
+}
+
+/* used by clients */
+int i40e_register_client(struct i40e_client *client);
+int i40e_unregister_client(struct i40e_client *client);
+
+#endif /* _I40E_CLIENT_H_ */

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 70d9605..6700643 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c

@@ -289,7 +289,7 @@
  *
  * If not already scheduled, this puts the task into the work queue
  **/
-static void i40e_service_event_schedule(struct i40e_pf *pf)
+void i40e_service_event_schedule(struct i40e_pf *pf)
 {
 	if (!test_bit(__I40E_DOWN, &pf->state) &&
 	    !test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) &&
@@ -2230,7 +2230,7 @@
 	netdev->mtu = new_mtu;
 	if (netif_running(netdev))
 		i40e_vsi_reinit_locked(vsi);
-
+	i40e_notify_client_of_l2_param_changes(vsi);
 	return 0;
 }
 
@@ -4169,6 +4169,9 @@
 		free_irq(pf->msix_entries[0].vector, pf);
 	}
 
+	i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector,
+		      I40E_IWARP_IRQ_PILE_ID);
+
 	i40e_put_lump(pf->irq_pile, 0, I40E_PILE_VALID_BIT-1);
 	for (i = 0; i < pf->num_alloc_vsi; i++)
 		if (pf->vsi[i])
@@ -4212,12 +4215,17 @@
  **/
 static void i40e_vsi_close(struct i40e_vsi *vsi)
 {
+	bool reset = false;
+
 	if (!test_and_set_bit(__I40E_DOWN, &vsi->state))
 		i40e_down(vsi);
 	i40e_vsi_free_irq(vsi);
 	i40e_vsi_free_tx_resources(vsi);
 	i40e_vsi_free_rx_resources(vsi);
 	vsi->current_netdev_flags = 0;
+	if (test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state))
+		reset = true;
+	i40e_notify_client_of_netdev_close(vsi, reset);
 }
 
 /**
@@ -4850,6 +4858,12 @@
 	ctxt.info = vsi->info;
 	i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
 
+	if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
+		ctxt.info.valid_sections |=
+				cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
+		ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
+	}
+
 	/* Update the VSI after updating the VSI queue-mapping information */
 	ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
 	if (ret) {
@@ -4993,6 +5007,7 @@
 			if (pf->vsi[v]->netdev)
 				i40e_dcbnl_set_all(pf->vsi[v]);
 		}
+		i40e_notify_client_of_l2_param_changes(pf->vsi[v]);
 	}
 }
 
@@ -5191,6 +5206,11 @@
 		}
 		i40e_fdir_filter_restore(vsi);
 	}
+
+	/* On the next run of the service_task, notify any clients of the new
+	 * opened netdev
+	 */
+	pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
 	i40e_service_event_schedule(pf);
 
 	return 0;
@@ -5379,6 +5399,8 @@
 		geneve_get_rx_port(netdev);
 #endif
 
+	i40e_notify_client_of_netdev_open(vsi);
+
 	return 0;
 }
 
@@ -6043,6 +6065,7 @@
 	case I40E_VSI_SRIOV:
 	case I40E_VSI_VMDQ2:
 	case I40E_VSI_CTRL:
+	case I40E_VSI_IWARP:
 	case I40E_VSI_MIRROR:
 	default:
 		/* there is no notification for other VSIs */
@@ -7148,6 +7171,7 @@
 	i40e_vc_process_vflr_event(pf);
 	i40e_watchdog_subtask(pf);
 	i40e_fdir_reinit_subtask(pf);
+	i40e_client_subtask(pf);
 	i40e_sync_filters_subtask(pf);
 	i40e_sync_udp_filters_subtask(pf);
 	i40e_clean_adminq_subtask(pf);
@@ -7550,6 +7574,7 @@
 	int vectors_left;
 	int v_budget, i;
 	int v_actual;
+	int iwarp_requested = 0;
 
 	if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
 		return -ENODEV;
@@ -7563,6 +7588,7 @@
 	 *		is governed by number of cpus in the system.
 	 *	- assumes symmetric Tx/Rx pairing
 	 *   - The number of VMDq pairs
+	 *   - The CPU count within the NUMA node if iWARP is enabled
 #ifdef I40E_FCOE
 	 *   - The number of FCOE qps.
 #endif
@@ -7609,6 +7635,16 @@
 	}
 
 #endif
+	/* can we reserve enough for iWARP? */
+	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+		if (!vectors_left)
+			pf->num_iwarp_msix = 0;
+		else if (vectors_left < pf->num_iwarp_msix)
+			pf->num_iwarp_msix = 1;
+		v_budget += pf->num_iwarp_msix;
+		vectors_left -= pf->num_iwarp_msix;
+	}
+
 	/* any vectors left over go for VMDq support */
 	if (pf->flags & I40E_FLAG_VMDQ_ENABLED) {
 		int vmdq_vecs_wanted = pf->num_vmdq_vsis * pf->num_vmdq_qps;
@@ -7643,6 +7679,8 @@
 		 * of these features based on the policy and at the end disable
 		 * the features that did not get any vectors.
 		 */
+		iwarp_requested = pf->num_iwarp_msix;
+		pf->num_iwarp_msix = 0;
 #ifdef I40E_FCOE
 		pf->num_fcoe_qps = 0;
 		pf->num_fcoe_msix = 0;
@@ -7681,17 +7719,33 @@
 			pf->num_lan_msix = 1;
 			break;
 		case 3:
+			if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+				pf->num_lan_msix = 1;
+				pf->num_iwarp_msix = 1;
+			} else {
+				pf->num_lan_msix = 2;
+			}
 #ifdef I40E_FCOE
 			/* give one vector to FCoE */
 			if (pf->flags & I40E_FLAG_FCOE_ENABLED) {
 				pf->num_lan_msix = 1;
 				pf->num_fcoe_msix = 1;
 			}
-#else
-			pf->num_lan_msix = 2;
 #endif
 			break;
 		default:
+			if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+				pf->num_iwarp_msix = min_t(int, (vec / 3),
+						 iwarp_requested);
+				pf->num_vmdq_vsis = min_t(int, (vec / 3),
+						  I40E_DEFAULT_NUM_VMDQ_VSI);
+			} else {
+				pf->num_vmdq_vsis = min_t(int, (vec / 2),
+						  I40E_DEFAULT_NUM_VMDQ_VSI);
+			}
+			pf->num_lan_msix = min_t(int,
+			       (vec - (pf->num_iwarp_msix + pf->num_vmdq_vsis)),
+							      pf->num_lan_msix);
 #ifdef I40E_FCOE
 			/* give one vector to FCoE */
 			if (pf->flags & I40E_FLAG_FCOE_ENABLED) {
@@ -7699,8 +7753,6 @@
 				vec--;
 			}
 #endif
-			/* give the rest to the PF */
-			pf->num_lan_msix = min_t(int, vec, pf->num_lan_qps);
 			break;
 		}
 	}
@@ -7710,6 +7762,12 @@
 		dev_info(&pf->pdev->dev, "VMDq disabled, not enough MSI-X vectors\n");
 		pf->flags &= ~I40E_FLAG_VMDQ_ENABLED;
 	}
+
+	if ((pf->flags & I40E_FLAG_IWARP_ENABLED) &&
+	    (pf->num_iwarp_msix == 0)) {
+		dev_info(&pf->pdev->dev, "IWARP disabled, not enough MSI-X vectors\n");
+		pf->flags &= ~I40E_FLAG_IWARP_ENABLED;
+	}
 #ifdef I40E_FCOE
 
 	if ((pf->flags & I40E_FLAG_FCOE_ENABLED) && (pf->num_fcoe_msix == 0)) {
@@ -7801,6 +7859,7 @@
 		vectors = i40e_init_msix(pf);
 		if (vectors < 0) {
 			pf->flags &= ~(I40E_FLAG_MSIX_ENABLED	|
+				       I40E_FLAG_IWARP_ENABLED	|
 #ifdef I40E_FCOE
 				       I40E_FLAG_FCOE_ENABLED	|
 #endif
@@ -8474,6 +8533,12 @@
 		pf->num_vmdq_qps = i40e_default_queues_per_vmdq(pf);
 	}
 
+	if (pf->hw.func_caps.iwarp) {
+		pf->flags |= I40E_FLAG_IWARP_ENABLED;
+		/* IWARP needs one extra vector for CQP just like MISC.*/
+		pf->num_iwarp_msix = (int)num_online_cpus() + 1;
+	}
+
 #ifdef I40E_FCOE
 	i40e_init_pf_fcoe(pf);
 
@@ -9328,6 +9393,13 @@
 				cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
 		}
 
+		if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
+			ctxt.info.valid_sections |=
+				cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
+			ctxt.info.queueing_opt_flags |=
+						I40E_AQ_VSI_QUE_OPT_TCP_ENA;
+		}
+
 		ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
 		ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_ALL;
 		if (pf->vf[vsi->vf_id].spoofchk) {
@@ -9351,6 +9423,10 @@
 		break;
 
 #endif /* I40E_FCOE */
+	case I40E_VSI_IWARP:
+		/* send down message to iWARP */
+		break;
+
 	default:
 		return -ENODEV;
 	}
@@ -10467,6 +10543,7 @@
 
 		/* make sure all the fancies are disabled */
 		pf->flags &= ~(I40E_FLAG_RSS_ENABLED	|
+			       I40E_FLAG_IWARP_ENABLED	|
 #ifdef I40E_FCOE
 			       I40E_FLAG_FCOE_ENABLED	|
 #endif
@@ -10484,6 +10561,7 @@
 		queues_left -= pf->num_lan_qps;
 
 		pf->flags &= ~(I40E_FLAG_RSS_ENABLED	|
+			       I40E_FLAG_IWARP_ENABLED	|
 #ifdef I40E_FCOE
 			       I40E_FLAG_FCOE_ENABLED	|
 #endif
@@ -11059,7 +11137,17 @@
 	}
 #endif /* CONFIG_PCI_IOV */
 
-	pfs_found++;
+	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+		pf->iwarp_base_vector = i40e_get_lump(pf, pf->irq_pile,
+						      pf->num_iwarp_msix,
+						      I40E_IWARP_IRQ_PILE_ID);
+		if (pf->iwarp_base_vector < 0) {
+			dev_info(&pdev->dev,
+				 "failed to get tracking for %d vectors for IWARP err=%d\n",
+				 pf->num_iwarp_msix, pf->iwarp_base_vector);
+			pf->flags &= ~I40E_FLAG_IWARP_ENABLED;
+		}
+	}
 
 	i40e_dbg_pf_init(pf);
 
@@ -11070,6 +11158,12 @@
 	mod_timer(&pf->service_timer,
 		  round_jiffies(jiffies + pf->service_timer_period));
 
+	/* add this PF to client device list and launch a client service task */
+	err = i40e_lan_add_device(pf);
+	if (err)
+		dev_info(&pdev->dev, "Failed to add PF to client API service list: %d\n",
+			 err);
+
 #ifdef I40E_FCOE
 	/* create FCoE interface */
 	i40e_fcoe_vsi_setup(pf);
@@ -11245,6 +11339,13 @@
 	if (pf->vsi[pf->lan_vsi])
 		i40e_vsi_release(pf->vsi[pf->lan_vsi]);
 
+	/* remove attached clients */
+	ret_code = i40e_lan_del_device(pf);
+	if (ret_code) {
+		dev_warn(&pdev->dev, "Failed to delete client device: %d\n",
+			 ret_code);
+	}
+
 	/* shutdown and destroy the HMC */
 	if (hw->hmc.hmc_obj) {
 		ret_code = i40e_shutdown_lan_hmc(hw);

diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index 0a0baf7..3335f9d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h

@@ -78,7 +78,7 @@
 	I40E_DEBUG_DCB			= 0x00000400,
 	I40E_DEBUG_DIAG			= 0x00000800,
 	I40E_DEBUG_FD			= 0x00001000,
-
+	I40E_DEBUG_IWARP		= 0x00F00000,
 	I40E_DEBUG_AQ_MESSAGE		= 0x01000000,
 	I40E_DEBUG_AQ_DESCRIPTOR	= 0x02000000,
 	I40E_DEBUG_AQ_DESC_BUFFER	= 0x04000000,
@@ -160,6 +160,7 @@
 	I40E_VSI_MIRROR	= 5,
 	I40E_VSI_SRIOV	= 6,
 	I40E_VSI_FDIR	= 7,
+	I40E_VSI_IWARP	= 8,
 	I40E_VSI_TYPE_UNKNOWN
 };
 

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
index 3226946b..ab866cf 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h

@@ -81,6 +81,9 @@
 	I40E_VIRTCHNL_OP_GET_STATS = 15,
 	I40E_VIRTCHNL_OP_FCOE = 16,
 	I40E_VIRTCHNL_OP_EVENT = 17,
+	I40E_VIRTCHNL_OP_IWARP = 20,
+	I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21,
+	I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP = 22,
 };
 
 /* Virtual channel message descriptor. This overlays the admin queue
@@ -348,6 +351,37 @@
 	int severity;
 };
 
+/* I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP
+ * VF uses this message to request PF to map IWARP vectors to IWARP queues.
+ * The request for this originates from the VF IWARP driver through
+ * a client interface between VF LAN and VF IWARP driver.
+ * A vector could have an AEQ and CEQ attached to it although
+ * there is a single AEQ per VF IWARP instance in which case
+ * most vectors will have an INVALID_IDX for aeq and valid idx for ceq.
+ * There will never be a case where there will be multiple CEQs attached
+ * to a single vector.
+ * PF configures interrupt mapping and returns status.
+ */
+
+/* HW does not define a type value for AEQ; only for RX/TX and CEQ.
+ * In order for us to keep the interface simple, SW will define a
+ * unique type value for AEQ.
+*/
+#define I40E_QUEUE_TYPE_PE_AEQ  0x80
+#define I40E_QUEUE_INVALID_IDX  0xFFFF
+
+struct i40e_virtchnl_iwarp_qv_info {
+	u32 v_idx; /* msix_vector */
+	u16 ceq_idx;
+	u16 aeq_idx;
+	u8 itr_idx;
+};
+
+struct i40e_virtchnl_iwarp_qvlist_info {
+	u32 num_vectors;
+	struct i40e_virtchnl_iwarp_qv_info qv_info[1];
+};
+
 /* VF reset states - these are written into the RSTAT register:
  * I40E_VFGEN_RSTAT1 on the PF
  * I40E_VFGEN_RSTAT on the VF

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index acd2693..816c6bb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c

@@ -352,6 +352,136 @@
 }
 
 /**
+ * i40e_release_iwarp_qvlist
+ * @vf: pointer to the VF.
+ *
+ **/
+static void i40e_release_iwarp_qvlist(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info = vf->qvlist_info;
+	u32 msix_vf;
+	u32 i;
+
+	if (!vf->qvlist_info)
+		return;
+
+	msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
+	for (i = 0; i < qvlist_info->num_vectors; i++) {
+		struct i40e_virtchnl_iwarp_qv_info *qv_info;
+		u32 next_q_index, next_q_type;
+		struct i40e_hw *hw = &pf->hw;
+		u32 v_idx, reg_idx, reg;
+
+		qv_info = &qvlist_info->qv_info[i];
+		if (!qv_info)
+			continue;
+		v_idx = qv_info->v_idx;
+		if (qv_info->ceq_idx != I40E_QUEUE_INVALID_IDX) {
+			/* Figure out the queue after CEQ and make that the
+			 * first queue.
+			 */
+			reg_idx = (msix_vf - 1) * vf->vf_id + qv_info->ceq_idx;
+			reg = rd32(hw, I40E_VPINT_CEQCTL(reg_idx));
+			next_q_index = (reg & I40E_VPINT_CEQCTL_NEXTQ_INDX_MASK)
+					>> I40E_VPINT_CEQCTL_NEXTQ_INDX_SHIFT;
+			next_q_type = (reg & I40E_VPINT_CEQCTL_NEXTQ_TYPE_MASK)
+					>> I40E_VPINT_CEQCTL_NEXTQ_TYPE_SHIFT;
+
+			reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1);
+			reg = (next_q_index &
+			       I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK) |
+			       (next_q_type <<
+			       I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+
+			wr32(hw, I40E_VPINT_LNKLSTN(reg_idx), reg);
+		}
+	}
+	kfree(vf->qvlist_info);
+	vf->qvlist_info = NULL;
+}
+
+/**
+ * i40e_config_iwarp_qvlist
+ * @vf: pointer to the VF info
+ * @qvlist_info: queue and vector list
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_config_iwarp_qvlist(struct i40e_vf *vf,
+				    struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_virtchnl_iwarp_qv_info *qv_info;
+	u32 v_idx, i, reg_idx, reg;
+	u32 next_q_idx, next_q_type;
+	u32 msix_vf, size;
+
+	size = sizeof(struct i40e_virtchnl_iwarp_qvlist_info) +
+	       (sizeof(struct i40e_virtchnl_iwarp_qv_info) *
+						(qvlist_info->num_vectors - 1));
+	vf->qvlist_info = kzalloc(size, GFP_KERNEL);
+	vf->qvlist_info->num_vectors = qvlist_info->num_vectors;
+
+	msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
+	for (i = 0; i < qvlist_info->num_vectors; i++) {
+		qv_info = &qvlist_info->qv_info[i];
+		if (!qv_info)
+			continue;
+		v_idx = qv_info->v_idx;
+
+		/* Validate vector id belongs to this vf */
+		if (!i40e_vc_isvalid_vector_id(vf, v_idx))
+			goto err;
+
+		vf->qvlist_info->qv_info[i] = *qv_info;
+
+		reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1);
+		/* We might be sharing the interrupt, so get the first queue
+		 * index and type, push it down the list by adding the new
+		 * queue on top. Also link it with the new queue in CEQCTL.
+		 */
+		reg = rd32(hw, I40E_VPINT_LNKLSTN(reg_idx));
+		next_q_idx = ((reg & I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK) >>
+				I40E_VPINT_LNKLSTN_FIRSTQ_INDX_SHIFT);
+		next_q_type = ((reg & I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_MASK) >>
+				I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+
+		if (qv_info->ceq_idx != I40E_QUEUE_INVALID_IDX) {
+			reg_idx = (msix_vf - 1) * vf->vf_id + qv_info->ceq_idx;
+			reg = (I40E_VPINT_CEQCTL_CAUSE_ENA_MASK |
+			(v_idx << I40E_VPINT_CEQCTL_MSIX_INDX_SHIFT) |
+			(qv_info->itr_idx << I40E_VPINT_CEQCTL_ITR_INDX_SHIFT) |
+			(next_q_type << I40E_VPINT_CEQCTL_NEXTQ_TYPE_SHIFT) |
+			(next_q_idx << I40E_VPINT_CEQCTL_NEXTQ_INDX_SHIFT));
+			wr32(hw, I40E_VPINT_CEQCTL(reg_idx), reg);
+
+			reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1);
+			reg = (qv_info->ceq_idx &
+			       I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK) |
+			       (I40E_QUEUE_TYPE_PE_CEQ <<
+			       I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+			wr32(hw, I40E_VPINT_LNKLSTN(reg_idx), reg);
+		}
+
+		if (qv_info->aeq_idx != I40E_QUEUE_INVALID_IDX) {
+			reg = (I40E_VPINT_AEQCTL_CAUSE_ENA_MASK |
+			(v_idx << I40E_VPINT_AEQCTL_MSIX_INDX_SHIFT) |
+			(qv_info->itr_idx << I40E_VPINT_AEQCTL_ITR_INDX_SHIFT));
+
+			wr32(hw, I40E_VPINT_AEQCTL(vf->vf_id), reg);
+		}
+	}
+
+	return 0;
+err:
+	kfree(vf->qvlist_info);
+	vf->qvlist_info = NULL;
+	return -EINVAL;
+}
+
+/**
  * i40e_config_vsi_tx_queue
  * @vf: pointer to the VF info
  * @vsi_id: id of VSI as provided by the FW
@@ -850,9 +980,11 @@
 	/* reallocate VF resources to reset the VSI state */
 	i40e_free_vf_res(vf);
 	if (!i40e_alloc_vf_res(vf)) {
+		int abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
 		i40e_enable_vf_mappings(vf);
 		set_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states);
 		clear_bit(I40E_VF_STAT_DISABLED, &vf->vf_states);
+		i40e_notify_client_of_vf_reset(pf, abs_vf_id);
 	}
 	/* tell the VF the reset is done */
 	wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_id), I40E_VFR_VFACTIVE);
@@ -877,11 +1009,7 @@
 	while (test_and_set_bit(__I40E_VF_DISABLE, &pf->state))
 		usleep_range(1000, 2000);
 
-	for (i = 0; i < pf->num_alloc_vfs; i++)
-		if (test_bit(I40E_VF_STAT_INIT, &pf->vf[i].vf_states))
-			i40e_vsi_control_rings(pf->vsi[pf->vf[i].lan_vsi_idx],
-					       false);
-
+	i40e_notify_client_of_vf_enable(pf, 0);
 	for (i = 0; i < pf->num_alloc_vfs; i++)
 		if (test_bit(I40E_VF_STAT_INIT, &pf->vf[i].vf_states))
 			i40e_vsi_control_rings(pf->vsi[pf->vf[i].lan_vsi_idx],
@@ -953,6 +1081,7 @@
 			goto err_iov;
 		}
 	}
+	i40e_notify_client_of_vf_enable(pf, num_alloc_vfs);
 	/* allocate memory */
 	vfs = kcalloc(num_alloc_vfs, sizeof(struct i40e_vf), GFP_KERNEL);
 	if (!vfs) {
@@ -1206,6 +1335,13 @@
 	vsi = pf->vsi[vf->lan_vsi_idx];
 	if (!vsi->info.pvid)
 		vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_VLAN;
+
+	if (i40e_vf_client_capable(pf, vf->vf_id, I40E_CLIENT_IWARP) &&
+	    (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_IWARP)) {
+		vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_IWARP;
+		set_bit(I40E_VF_STAT_IWARPENA, &vf->vf_states);
+	}
+
 	if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) {
 		if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ)
 			vfres->vf_offload_flags |=
@@ -1827,6 +1963,72 @@
 }
 
 /**
+ * i40e_vc_iwarp_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * called from the VF for the iwarp msgs
+ **/
+static int i40e_vc_iwarp_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+{
+	struct i40e_pf *pf = vf->pf;
+	int abs_vf_id = vf->vf_id + pf->hw.func_caps.vf_base_id;
+	i40e_status aq_ret = 0;
+
+	if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) ||
+	    !test_bit(I40E_VF_STAT_IWARPENA, &vf->vf_states)) {
+		aq_ret = I40E_ERR_PARAM;
+		goto error_param;
+	}
+
+	i40e_notify_client_of_vf_msg(pf->vsi[pf->lan_vsi], abs_vf_id,
+				     msg, msglen);
+
+error_param:
+	/* send the response to the VF */
+	return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_IWARP,
+				       aq_ret);
+}
+
+/**
+ * i40e_vc_iwarp_qvmap_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ * @config: config qvmap or release it
+ *
+ * called from the VF for the iwarp msgs
+ **/
+static int i40e_vc_iwarp_qvmap_msg(struct i40e_vf *vf, u8 *msg, u16 msglen,
+				   bool config)
+{
+	struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info =
+				(struct i40e_virtchnl_iwarp_qvlist_info *)msg;
+	i40e_status aq_ret = 0;
+
+	if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) ||
+	    !test_bit(I40E_VF_STAT_IWARPENA, &vf->vf_states)) {
+		aq_ret = I40E_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (config) {
+		if (i40e_config_iwarp_qvlist(vf, qvlist_info))
+			aq_ret = I40E_ERR_PARAM;
+	} else {
+		i40e_release_iwarp_qvlist(vf);
+	}
+
+error_param:
+	/* send the response to the VF */
+	return i40e_vc_send_resp_to_vf(vf,
+			       config ? I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP :
+			       I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP,
+			       aq_ret);
+}
+
+/**
  * i40e_vc_validate_vf_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
@@ -1921,6 +2123,32 @@
 	case I40E_VIRTCHNL_OP_GET_STATS:
 		valid_len = sizeof(struct i40e_virtchnl_queue_select);
 		break;
+	case I40E_VIRTCHNL_OP_IWARP:
+		/* These messages are opaque to us and will be validated in
+		 * the RDMA client code. We just need to check for nonzero
+		 * length. The firmware will enforce max length restrictions.
+		 */
+		if (msglen)
+			valid_len = msglen;
+		else
+			err_msg_format = true;
+		break;
+	case I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP:
+		valid_len = 0;
+		break;
+	case I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP:
+		valid_len = sizeof(struct i40e_virtchnl_iwarp_qvlist_info);
+		if (msglen >= valid_len) {
+			struct i40e_virtchnl_iwarp_qvlist_info *qv =
+				(struct i40e_virtchnl_iwarp_qvlist_info *)msg;
+			if (qv->num_vectors == 0) {
+				err_msg_format = true;
+				break;
+			}
+			valid_len += ((qv->num_vectors - 1) *
+				sizeof(struct i40e_virtchnl_iwarp_qv_info));
+		}
+		break;
 	/* These are always errors coming from the VF. */
 	case I40E_VIRTCHNL_OP_EVENT:
 	case I40E_VIRTCHNL_OP_UNKNOWN:
@@ -2010,6 +2238,15 @@
 	case I40E_VIRTCHNL_OP_GET_STATS:
 		ret = i40e_vc_get_stats_msg(vf, msg, msglen);
 		break;
+	case I40E_VIRTCHNL_OP_IWARP:
+		ret = i40e_vc_iwarp_msg(vf, msg, msglen);
+		break;
+	case I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP:
+		ret = i40e_vc_iwarp_qvmap_msg(vf, msg, msglen, true);
+		break;
+	case I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP:
+		ret = i40e_vc_iwarp_qvmap_msg(vf, msg, msglen, false);
+		break;
 	case I40E_VIRTCHNL_OP_UNKNOWN:
 	default:
 		dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n",

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index e74642a..e7b2fba 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h

@@ -58,6 +58,7 @@
 enum i40e_vf_states {
 	I40E_VF_STAT_INIT = 0,
 	I40E_VF_STAT_ACTIVE,
+	I40E_VF_STAT_IWARPENA,
 	I40E_VF_STAT_FCOEENA,
 	I40E_VF_STAT_DISABLED,
 };
@@ -66,6 +67,7 @@
 enum i40e_vf_capabilities {
 	I40E_VIRTCHNL_VF_CAP_PRIVILEGE = 0,
 	I40E_VIRTCHNL_VF_CAP_L2,
+	I40E_VIRTCHNL_VF_CAP_IWARP,
 };
 
 /* VF information structure */
@@ -106,6 +108,8 @@
 	bool link_forced;
 	bool link_up;		/* only valid if VF link is forced */
 	bool spoofchk;
+	/* RDMA Client */
+	struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info;
 };
 
 void i40e_free_vfs(struct i40e_pf *pf);

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 84fa28c..e4949af 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h

@@ -661,9 +661,7 @@
 #define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP		(u32)(1 << 9)
 #define IXGBE_FLAG2_PTP_PPS_ENABLED		(u32)(1 << 10)
 #define IXGBE_FLAG2_PHY_INTERRUPT		(u32)(1 << 11)
-#ifdef CONFIG_IXGBE_VXLAN
 #define IXGBE_FLAG2_VXLAN_REREG_NEEDED		BIT(12)
-#endif
 #define IXGBE_FLAG2_VLAN_PROMISC		BIT(13)
 
 	/* Tx fast path data */
@@ -675,6 +673,9 @@
 	int num_rx_queues;
 	u16 rx_itr_setting;
 
+	/* Port number used to identify VXLAN traffic */
+	__be16 vxlan_port;
+
 	/* TX */
 	struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp;
 
@@ -782,9 +783,6 @@
 	u32 timer_event_accumulator;
 	u32 vferr_refcount;
 	struct ixgbe_mac_addr *mac_table;
-#ifdef CONFIG_IXGBE_VXLAN
-	u16 vxlan_port;
-#endif
 	struct kobject *info_kobj;
 #ifdef CONFIG_IXGBE_HWMON
 	struct hwmon_buff *ixgbe_hwmon_buff;
@@ -879,6 +877,8 @@
 extern char ixgbe_default_device_descr[];
 #endif /* IXGBE_FCOE */
 
+int ixgbe_open(struct net_device *netdev);
+int ixgbe_close(struct net_device *netdev);
 void ixgbe_up(struct ixgbe_adapter *adapter);
 void ixgbe_down(struct ixgbe_adapter *adapter);
 void ixgbe_reinit_locked(struct ixgbe_adapter *adapter);

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 726e0ee..b3530e1 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c

@@ -2053,7 +2053,7 @@
 
 		if (if_running)
 			/* indicate we're in test mode */
-			dev_close(netdev);
+			ixgbe_close(netdev);
 		else
 			ixgbe_reset(adapter);
 
@@ -2091,7 +2091,7 @@
 		/* clear testing bit and return adapter to previous state */
 		clear_bit(__IXGBE_TESTING, &adapter->state);
 		if (if_running)
-			dev_open(netdev);
+			ixgbe_open(netdev);
 		else if (hw->mac.ops.disable_tx_laser)
 			hw->mac.ops.disable_tx_laser(hw);
 	} else {

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 569cb07..7df3fe2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c

@@ -4531,9 +4531,7 @@
 	case ixgbe_mac_X550:
 	case ixgbe_mac_X550EM_x:
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_VXLANCTRL, 0);
-#ifdef CONFIG_IXGBE_VXLAN
 		adapter->vxlan_port = 0;
-#endif
 		break;
 	default:
 		break;
@@ -5994,7 +5992,7 @@
  * handler is registered with the OS, the watchdog timer is started,
  * and the stack is notified that the interface is ready.
  **/
-static int ixgbe_open(struct net_device *netdev)
+int ixgbe_open(struct net_device *netdev)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -6096,7 +6094,7 @@
  * needs to be disabled.  A global MAC reset is issued to stop the
  * hardware, and all transmit and receive resources are freed.
  **/
-static int ixgbe_close(struct net_device *netdev)
+int ixgbe_close(struct net_device *netdev)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
@@ -7560,11 +7558,10 @@
 		struct ipv6hdr *ipv6;
 	} hdr;
 	struct tcphdr *th;
+	unsigned int hlen;
 	struct sk_buff *skb;
-#ifdef CONFIG_IXGBE_VXLAN
-	u8 encap = false;
-#endif /* CONFIG_IXGBE_VXLAN */
 	__be16 vlan_id;
+	int l4_proto;
 
 	/* if ring doesn't have a interrupt vector, cannot perform ATR */
 	if (!q_vector)
@@ -7576,62 +7573,50 @@
 
 	ring->atr_count++;
 
+	/* currently only IPv4/IPv6 with TCP is supported */
+	if ((first->protocol != htons(ETH_P_IP)) &&
+	    (first->protocol != htons(ETH_P_IPV6)))
+		return;
+
 	/* snag network header to get L4 type and address */
 	skb = first->skb;
 	hdr.network = skb_network_header(skb);
-	if (!skb->encapsulation) {
-		th = tcp_hdr(skb);
-	} else {
 #ifdef CONFIG_IXGBE_VXLAN
+	if (skb->encapsulation &&
+	    first->protocol == htons(ETH_P_IP) &&
+	    hdr.ipv4->protocol != IPPROTO_UDP) {
 		struct ixgbe_adapter *adapter = q_vector->adapter;
 
-		if (!adapter->vxlan_port)
-			return;
-		if (first->protocol != htons(ETH_P_IP) ||
-		    hdr.ipv4->version != IPVERSION ||
-		    hdr.ipv4->protocol != IPPROTO_UDP) {
-			return;
-		}
-		if (ntohs(udp_hdr(skb)->dest) != adapter->vxlan_port)
-			return;
-		encap = true;
-		hdr.network = skb_inner_network_header(skb);
-		th = inner_tcp_hdr(skb);
-#else
-		return;
-#endif /* CONFIG_IXGBE_VXLAN */
+		/* verify the port is recognized as VXLAN */
+		if (adapter->vxlan_port &&
+		    udp_hdr(skb)->dest == adapter->vxlan_port)
+			hdr.network = skb_inner_network_header(skb);
 	}
+#endif /* CONFIG_IXGBE_VXLAN */
 
 	/* Currently only IPv4/IPv6 with TCP is supported */
 	switch (hdr.ipv4->version) {
 	case IPVERSION:
-		if (hdr.ipv4->protocol != IPPROTO_TCP)
-			return;
+		/* access ihl as u8 to avoid unaligned access on ia64 */
+		hlen = (hdr.network[0] & 0x0F) << 2;
+		l4_proto = hdr.ipv4->protocol;
 		break;
 	case 6:
-		if (likely((unsigned char *)th - hdr.network ==
-			   sizeof(struct ipv6hdr))) {
-			if (hdr.ipv6->nexthdr != IPPROTO_TCP)
-				return;
-		} else {
-			__be16 frag_off;
-			u8 l4_hdr;
-
-			ipv6_skip_exthdr(skb, hdr.network - skb->data +
-					      sizeof(struct ipv6hdr),
-					 &l4_hdr, &frag_off);
-			if (unlikely(frag_off))
-				return;
-			if (l4_hdr != IPPROTO_TCP)
-				return;
-		}
+		hlen = hdr.network - skb->data;
+		l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL);
+		hlen -= hdr.network - skb->data;
 		break;
 	default:
 		return;
 	}
 
-	/* skip this packet since it is invalid or the socket is closing */
-	if (!th || th->fin)
+	if (l4_proto != IPPROTO_TCP)
+		return;
+
+	th = (struct tcphdr *)(hdr.network + hlen);
+
+	/* skip this packet since the socket is closing */
+	if (th->fin)
 		return;
 
 	/* sample on all syn packets or once every atr sample count */
@@ -7682,10 +7667,8 @@
 		break;
 	}
 
-#ifdef CONFIG_IXGBE_VXLAN
-	if (encap)
+	if (hdr.network != skb_network_header(skb))
 		input.formatted.flow_type |= IXGBE_ATR_L4TYPE_TUNNEL_MASK;
-#endif /* CONFIG_IXGBE_VXLAN */
 
 	/* This assumes the Rx queue and Tx queue are bound to the same CPU */
 	ixgbe_fdir_add_signature_filter_82599(&q_vector->adapter->hw,
@@ -8209,10 +8192,17 @@
 static int ixgbe_delete_clsu32(struct ixgbe_adapter *adapter,
 			       struct tc_cls_u32_offload *cls)
 {
+	u32 uhtid = TC_U32_USERHTID(cls->knode.handle);
+	u32 loc;
 	int err;
 
+	if ((uhtid != 0x800) && (uhtid >= IXGBE_MAX_LINK_HANDLE))
+		return -EINVAL;
+
+	loc = cls->knode.handle & 0xfffff;
+
 	spin_lock(&adapter->fdir_perfect_lock);
-	err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, cls->knode.handle);
+	err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, loc);
 	spin_unlock(&adapter->fdir_perfect_lock);
 	return err;
 }
@@ -8221,20 +8211,30 @@
 					    __be16 protocol,
 					    struct tc_cls_u32_offload *cls)
 {
+	u32 uhtid = TC_U32_USERHTID(cls->hnode.handle);
+
+	if (uhtid >= IXGBE_MAX_LINK_HANDLE)
+		return -EINVAL;
+
 	/* This ixgbe devices do not support hash tables at the moment
 	 * so abort when given hash tables.
 	 */
 	if (cls->hnode.divisor > 0)
 		return -EINVAL;
 
-	set_bit(TC_U32_USERHTID(cls->hnode.handle), &adapter->tables);
+	set_bit(uhtid - 1, &adapter->tables);
 	return 0;
 }
 
 static int ixgbe_configure_clsu32_del_hnode(struct ixgbe_adapter *adapter,
 					    struct tc_cls_u32_offload *cls)
 {
-	clear_bit(TC_U32_USERHTID(cls->hnode.handle), &adapter->tables);
+	u32 uhtid = TC_U32_USERHTID(cls->hnode.handle);
+
+	if (uhtid >= IXGBE_MAX_LINK_HANDLE)
+		return -EINVAL;
+
+	clear_bit(uhtid - 1, &adapter->tables);
 	return 0;
 }
 
@@ -8252,27 +8252,29 @@
 #endif
 	int i, err = 0;
 	u8 queue;
-	u32 handle;
+	u32 uhtid, link_uhtid;
 
 	memset(&mask, 0, sizeof(union ixgbe_atr_input));
-	handle = cls->knode.handle;
+	uhtid = TC_U32_USERHTID(cls->knode.handle);
+	link_uhtid = TC_U32_USERHTID(cls->knode.link_handle);
 
-	/* At the moment cls_u32 jumps to transport layer and skips past
+	/* At the moment cls_u32 jumps to network layer and skips past
 	 * L2 headers. The canonical method to match L2 frames is to use
 	 * negative values. However this is error prone at best but really
 	 * just broken because there is no way to "know" what sort of hdr
-	 * is in front of the transport layer. Fix cls_u32 to support L2
+	 * is in front of the network layer. Fix cls_u32 to support L2
 	 * headers when needed.
 	 */
 	if (protocol != htons(ETH_P_IP))
 		return -EINVAL;
 
-	if (cls->knode.link_handle ||
-	    cls->knode.link_handle >= IXGBE_MAX_LINK_HANDLE) {
+	if (link_uhtid) {
 		struct ixgbe_nexthdr *nexthdr = ixgbe_ipv4_jumps;
-		u32 uhtid = TC_U32_USERHTID(cls->knode.link_handle);
 
-		if (!test_bit(uhtid, &adapter->tables))
+		if (link_uhtid >= IXGBE_MAX_LINK_HANDLE)
+			return -EINVAL;
+
+		if (!test_bit(link_uhtid - 1, &adapter->tables))
 			return -EINVAL;
 
 		for (i = 0; nexthdr[i].jump; i++) {
@@ -8288,10 +8290,7 @@
 			    nexthdr->mask != cls->knode.sel->keys[0].mask)
 				return -EINVAL;
 
-			if (uhtid >= IXGBE_MAX_LINK_HANDLE)
-				return -EINVAL;
-
-			adapter->jump_tables[uhtid] = nexthdr->jump;
+			adapter->jump_tables[link_uhtid] = nexthdr->jump;
 		}
 		return 0;
 	}
@@ -8308,13 +8307,13 @@
 	 * To add support for new nodes update ixgbe_model.h parse structures
 	 * this function _should_ be generic try not to hardcode values here.
 	 */
-	if (TC_U32_USERHTID(handle) == 0x800) {
+	if (uhtid == 0x800) {
 		field_ptr = adapter->jump_tables[0];
 	} else {
-		if (TC_U32_USERHTID(handle) >= ARRAY_SIZE(adapter->jump_tables))
+		if (uhtid >= IXGBE_MAX_LINK_HANDLE)
 			return -EINVAL;
 
-		field_ptr = adapter->jump_tables[TC_U32_USERHTID(handle)];
+		field_ptr = adapter->jump_tables[uhtid];
 	}
 
 	if (!field_ptr)
@@ -8332,8 +8331,7 @@
 		int j;
 
 		for (j = 0; field_ptr[j].val; j++) {
-			if (field_ptr[j].off == off &&
-			    field_ptr[j].mask == m) {
+			if (field_ptr[j].off == off) {
 				field_ptr[j].val(input, &mask, val, m);
 				input->filter.formatted.flow_type |=
 					field_ptr[j].type;
@@ -8393,8 +8391,8 @@
 	return -EINVAL;
 }
 
-int __ixgbe_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
-		     struct tc_to_netdev *tc)
+static int __ixgbe_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+			    struct tc_to_netdev *tc)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 
@@ -8554,7 +8552,6 @@
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 	struct ixgbe_hw *hw = &adapter->hw;
-	u16 new_port = ntohs(port);
 
 	if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE))
 		return;
@@ -8562,18 +8559,18 @@
 	if (sa_family == AF_INET6)
 		return;
 
-	if (adapter->vxlan_port == new_port)
+	if (adapter->vxlan_port == port)
 		return;
 
 	if (adapter->vxlan_port) {
 		netdev_info(dev,
 			    "Hit Max num of VXLAN ports, not adding port %d\n",
-			    new_port);
+			    ntohs(port));
 		return;
 	}
 
-	adapter->vxlan_port = new_port;
-	IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, new_port);
+	adapter->vxlan_port = port;
+	IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, ntohs(port));
 }
 
 /**
@@ -8586,7 +8583,6 @@
 				 __be16 port)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
-	u16 new_port = ntohs(port);
 
 	if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE))
 		return;
@@ -8594,9 +8590,9 @@
 	if (sa_family == AF_INET6)
 		return;
 
-	if (adapter->vxlan_port != new_port) {
+	if (adapter->vxlan_port != port) {
 		netdev_info(dev, "Port %d was not found, not deleting\n",
-			    new_port);
+			    ntohs(port));
 		return;
 	}
 
@@ -9265,17 +9261,6 @@
 	netdev->priv_flags |= IFF_UNICAST_FLT;
 	netdev->priv_flags |= IFF_SUPP_NOFCS;
 
-#ifdef CONFIG_IXGBE_VXLAN
-	switch (adapter->hw.mac.type) {
-	case ixgbe_mac_X550:
-	case ixgbe_mac_X550EM_x:
-		netdev->hw_enc_features |= NETIF_F_RXCSUM;
-		break;
-	default:
-		break;
-	}
-#endif /* CONFIG_IXGBE_VXLAN */
-
 #ifdef CONFIG_IXGBE_DCB
 	netdev->dcbnl_ops = &dcbnl_ops;
 #endif
@@ -9329,6 +9314,8 @@
 		goto err_sw_init;
 	}
 
+	/* Set hw->mac.addr to permanent MAC address */
+	ether_addr_copy(hw->mac.addr, hw->mac.perm_addr);
 	ixgbe_mac_set_default_filter(adapter);
 
 	setup_timer(&adapter->service_timer, &ixgbe_service_timer,

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
index ce48872..74c53ad 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h

@@ -32,7 +32,6 @@
 
 struct ixgbe_mat_field {
 	unsigned int off;
-	unsigned int mask;
 	int (*val)(struct ixgbe_fdir_filter *input,
 		   union ixgbe_atr_input *mask,
 		   u32 val, u32 m);
@@ -58,35 +57,27 @@
 }
 
 static struct ixgbe_mat_field ixgbe_ipv4_fields[] = {
-	{ .off = 12, .mask = -1, .val = ixgbe_mat_prgm_sip,
+	{ .off = 12, .val = ixgbe_mat_prgm_sip,
 	  .type = IXGBE_ATR_FLOW_TYPE_IPV4},
-	{ .off = 16, .mask = -1, .val = ixgbe_mat_prgm_dip,
+	{ .off = 16, .val = ixgbe_mat_prgm_dip,
 	  .type = IXGBE_ATR_FLOW_TYPE_IPV4},
 	{ .val = NULL } /* terminal node */
 };
 
-static inline int ixgbe_mat_prgm_sport(struct ixgbe_fdir_filter *input,
+static inline int ixgbe_mat_prgm_ports(struct ixgbe_fdir_filter *input,
 				       union ixgbe_atr_input *mask,
 				       u32 val, u32 m)
 {
 	input->filter.formatted.src_port = val & 0xffff;
 	mask->formatted.src_port = m & 0xffff;
-	return 0;
-};
+	input->filter.formatted.dst_port = val >> 16;
+	mask->formatted.dst_port = m >> 16;
 
-static inline int ixgbe_mat_prgm_dport(struct ixgbe_fdir_filter *input,
-				       union ixgbe_atr_input *mask,
-				       u32 val, u32 m)
-{
-	input->filter.formatted.dst_port = val & 0xffff;
-	mask->formatted.dst_port = m & 0xffff;
 	return 0;
 };
 
 static struct ixgbe_mat_field ixgbe_tcp_fields[] = {
-	{.off = 0, .mask = 0xffff, .val = ixgbe_mat_prgm_sport,
-	 .type = IXGBE_ATR_FLOW_TYPE_TCPV4},
-	{.off = 2, .mask = 0xffff, .val = ixgbe_mat_prgm_dport,
+	{.off = 0, .val = ixgbe_mat_prgm_ports,
 	 .type = IXGBE_ATR_FLOW_TYPE_TCPV4},
 	{ .val = NULL } /* terminal node */
 };

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 87aca3f..68a9c64 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c

@@ -355,7 +355,7 @@
 		command = IXGBE_READ_REG(hw, IXGBE_SB_IOSF_INDIRECT_CTRL);
 		if (!(command & IXGBE_SB_IOSF_CTRL_BUSY))
 			break;
-		usleep_range(10, 20);
+		udelay(10);
 	}
 	if (ctrl)
 		*ctrl = command;

diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index c48aef6..d7aa4b2 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c

@@ -680,7 +680,7 @@
 
 		if (if_running)
 			/* indicate we're in test mode */
-			dev_close(netdev);
+			ixgbevf_close(netdev);
 		else
 			ixgbevf_reset(adapter);
 
@@ -692,7 +692,7 @@
 
 		clear_bit(__IXGBEVF_TESTING, &adapter->state);
 		if (if_running)
-			dev_open(netdev);
+			ixgbevf_open(netdev);
 	} else {
 		hw_dbg(&adapter->hw, "online testing starting\n");
 		/* Online tests */

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index 68ec7daa..991eeae 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h

@@ -486,6 +486,8 @@
 extern const char ixgbevf_driver_name[];
 extern const char ixgbevf_driver_version[];
 
+int ixgbevf_open(struct net_device *netdev);
+int ixgbevf_close(struct net_device *netdev);
 void ixgbevf_up(struct ixgbevf_adapter *adapter);
 void ixgbevf_down(struct ixgbevf_adapter *adapter);
 void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter);

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 0ea14c0..b0edae9 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c

@@ -3122,7 +3122,7 @@
  * handler is registered with the OS, the watchdog timer is started,
  * and the stack is notified that the interface is ready.
  **/
-static int ixgbevf_open(struct net_device *netdev)
+int ixgbevf_open(struct net_device *netdev)
 {
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -3205,7 +3205,7 @@
  * needs to be disabled.  A global MAC reset is issued to stop the
  * hardware, and all transmit and receive resources are freed.
  **/
-static int ixgbevf_close(struct net_device *netdev)
+int ixgbevf_close(struct net_device *netdev)
 {
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 
@@ -3692,19 +3692,23 @@
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct sockaddr *addr = p;
+	int err;
 
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
-	ether_addr_copy(netdev->dev_addr, addr->sa_data);
-	ether_addr_copy(hw->mac.addr, addr->sa_data);
-
 	spin_lock_bh(&adapter->mbx_lock);
 
-	hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0);
+	err = hw->mac.ops.set_rar(hw, 0, addr->sa_data, 0);
 
 	spin_unlock_bh(&adapter->mbx_lock);
 
+	if (err)
+		return -EPERM;
+
+	ether_addr_copy(hw->mac.addr, addr->sa_data);
+	ether_addr_copy(netdev->dev_addr, addr->sa_data);
+
 	return 0;
 }
 

diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
index 61a98f4..4d613a4 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c

@@ -408,8 +408,10 @@
 
 	/* if nacked the address was rejected, use "perm_addr" */
 	if (!ret_val &&
-	    (msgbuf[0] == (IXGBE_VF_SET_MAC_ADDR | IXGBE_VT_MSGTYPE_NACK)))
+	    (msgbuf[0] == (IXGBE_VF_SET_MAC_ADDR | IXGBE_VT_MSGTYPE_NACK))) {
 		ixgbevf_get_mac_addr_vf(hw, hw->mac.addr);
+		return IXGBE_ERR_MBX;
+	}
 
 	return ret_val;
 }

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 577f7ca..7fc4902 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c

@@ -260,7 +260,6 @@
 
 #define MVNETA_VLAN_TAG_LEN             4
 
-#define MVNETA_CPU_D_CACHE_LINE_SIZE    32
 #define MVNETA_TX_CSUM_DEF_SIZE		1600
 #define MVNETA_TX_CSUM_MAX_SIZE		9800
 #define MVNETA_ACC_MODE_EXT1		1
@@ -300,7 +299,7 @@
 #define MVNETA_RX_PKT_SIZE(mtu) \
 	ALIGN((mtu) + MVNETA_MH_SIZE + MVNETA_VLAN_TAG_LEN + \
 	      ETH_HLEN + ETH_FCS_LEN,			     \
-	      MVNETA_CPU_D_CACHE_LINE_SIZE)
+	      cache_line_size())
 
 #define IS_TSO_HEADER(txq, addr) \
 	((addr >= txq->tso_hdrs_phys) && \
@@ -2764,9 +2763,6 @@
 	if (rxq->descs == NULL)
 		return -ENOMEM;
 
-	BUG_ON(rxq->descs !=
-	       PTR_ALIGN(rxq->descs, MVNETA_CPU_D_CACHE_LINE_SIZE));
-
 	rxq->last_desc = rxq->size - 1;
 
 	/* Set Rx descriptors queue starting address */
@@ -2837,10 +2833,6 @@
 	if (txq->descs == NULL)
 		return -ENOMEM;
 
-	/* Make sure descriptor address is cache line size aligned  */
-	BUG_ON(txq->descs !=
-	       PTR_ALIGN(txq->descs, MVNETA_CPU_D_CACHE_LINE_SIZE));
-
 	txq->last_desc = txq->size - 1;
 
 	/* Set maximum bandwidth for enabled TXQs */
@@ -3050,6 +3042,20 @@
 	return mtu;
 }
 
+static void mvneta_percpu_enable(void *arg)
+{
+	struct mvneta_port *pp = arg;
+
+	enable_percpu_irq(pp->dev->irq, IRQ_TYPE_NONE);
+}
+
+static void mvneta_percpu_disable(void *arg)
+{
+	struct mvneta_port *pp = arg;
+
+	disable_percpu_irq(pp->dev->irq);
+}
+
 /* Change the device mtu */
 static int mvneta_change_mtu(struct net_device *dev, int mtu)
 {
@@ -3074,6 +3080,7 @@
 	 * reallocation of the queues
 	 */
 	mvneta_stop_dev(pp);
+	on_each_cpu(mvneta_percpu_disable, pp, true);
 
 	mvneta_cleanup_txqs(pp);
 	mvneta_cleanup_rxqs(pp);
@@ -3097,6 +3104,7 @@
 		return ret;
 	}
 
+	on_each_cpu(mvneta_percpu_enable, pp, true);
 	mvneta_start_dev(pp);
 	mvneta_port_up(pp);
 
@@ -3250,20 +3258,6 @@
 	pp->phy_dev = NULL;
 }
 
-static void mvneta_percpu_enable(void *arg)
-{
-	struct mvneta_port *pp = arg;
-
-	enable_percpu_irq(pp->dev->irq, IRQ_TYPE_NONE);
-}
-
-static void mvneta_percpu_disable(void *arg)
-{
-	struct mvneta_port *pp = arg;
-
-	disable_percpu_irq(pp->dev->irq);
-}
-
 /* Electing a CPU must be done in an atomic way: it should be done
  * after or before the removal/insertion of a CPU and this function is
  * not reentrant.

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index c797971a..868a957 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c

@@ -321,7 +321,6 @@
 /* Lbtd 802.3 type */
 #define MVPP2_IP_LBDT_TYPE		0xfffa
 
-#define MVPP2_CPU_D_CACHE_LINE_SIZE	32
 #define MVPP2_TX_CSUM_MAX_SIZE		9800
 
 /* Timeout constants */
@@ -377,7 +376,7 @@
 
 #define MVPP2_RX_PKT_SIZE(mtu) \
 	ALIGN((mtu) + MVPP2_MH_SIZE + MVPP2_VLAN_TAG_LEN + \
-	      ETH_HLEN + ETH_FCS_LEN, MVPP2_CPU_D_CACHE_LINE_SIZE)
+	      ETH_HLEN + ETH_FCS_LEN, cache_line_size())
 
 #define MVPP2_RX_BUF_SIZE(pkt_size)	((pkt_size) + NET_SKB_PAD)
 #define MVPP2_RX_TOTAL_SIZE(buf_size)	((buf_size) + MVPP2_SKB_SHINFO_SIZE)
@@ -4493,10 +4492,6 @@
 	if (!aggr_txq->descs)
 		return -ENOMEM;
 
-	/* Make sure descriptor address is cache line size aligned  */
-	BUG_ON(aggr_txq->descs !=
-	       PTR_ALIGN(aggr_txq->descs, MVPP2_CPU_D_CACHE_LINE_SIZE));
-
 	aggr_txq->last_desc = aggr_txq->size - 1;
 
 	/* Aggr TXQ no reset WA */
@@ -4526,9 +4521,6 @@
 	if (!rxq->descs)
 		return -ENOMEM;
 
-	BUG_ON(rxq->descs !=
-	       PTR_ALIGN(rxq->descs, MVPP2_CPU_D_CACHE_LINE_SIZE));
-
 	rxq->last_desc = rxq->size - 1;
 
 	/* Zero occupied and non-occupied counters - direct access */
@@ -4616,10 +4608,6 @@
 	if (!txq->descs)
 		return -ENOMEM;
 
-	/* Make sure descriptor address is cache line size aligned  */
-	BUG_ON(txq->descs !=
-	       PTR_ALIGN(txq->descs, MVPP2_CPU_D_CACHE_LINE_SIZE));
-
 	txq->last_desc = txq->size - 1;
 
 	/* Set Tx descriptors queue starting address - indirect access */
@@ -6059,8 +6047,10 @@
 
 		/* Map physical Rx queue to port's logical Rx queue */
 		rxq = devm_kzalloc(dev, sizeof(*rxq), GFP_KERNEL);
-		if (!rxq)
+		if (!rxq) {
+			err = -ENOMEM;
 			goto err_free_percpu;
+		}
 		/* Map this Rx queue to a physical queue */
 		rxq->id = port->first_rxq + queue;
 		rxq->port = port->id;

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 7f2126b..e0b68af 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c

@@ -1690,8 +1690,8 @@
 		return -ENOMEM;
 
 	eth->base = devm_ioremap_resource(&pdev->dev, res);
-	if (!eth->base)
-		return -EADDRNOTAVAIL;
+	if (IS_ERR(eth->base))
+		return PTR_ERR(eth->base);
 
 	spin_lock_init(&eth->page_lock);
 

diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index 42d8de8..6aa7397 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c

@@ -39,8 +39,6 @@
 
 #include "mlx4.h"
 
-static const u8 zero_gid[16];	/* automatically initialized to 0 */
-
 int mlx4_get_mgm_entry_size(struct mlx4_dev *dev)
 {
 	return 1 << dev->oper_log_mgm_entry_size;

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 97f5114..eb926e1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c

@@ -407,6 +407,12 @@
 const char *mlx5_command_str(int command)
 {
 	switch (command) {
+	case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
+		return "QUERY_HCA_VPORT_CONTEXT";
+
+	case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT:
+		return "MODIFY_HCA_VPORT_CONTEXT";
+
 	case MLX5_CMD_OP_QUERY_HCA_CAP:
 		return "QUERY_HCA_CAP";
 

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index aa1ab47..75c7ae6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c

@@ -98,88 +98,55 @@
 {
 	int err;
 
-	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_CUR);
-	if (err)
-		return err;
-
-	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_MAX);
+	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
 	if (err)
 		return err;
 
 	if (MLX5_CAP_GEN(dev, eth_net_offloads)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS,
-					 HCA_CAP_OPMOD_GET_CUR);
-		if (err)
-			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS,
-					 HCA_CAP_OPMOD_GET_MAX);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS);
 		if (err)
 			return err;
 	}
 
 	if (MLX5_CAP_GEN(dev, pg)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ODP,
-					 HCA_CAP_OPMOD_GET_CUR);
-		if (err)
-			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ODP,
-					 HCA_CAP_OPMOD_GET_MAX);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ODP);
 		if (err)
 			return err;
 	}
 
 	if (MLX5_CAP_GEN(dev, atomic)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
-					 HCA_CAP_OPMOD_GET_CUR);
-		if (err)
-			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
-					 HCA_CAP_OPMOD_GET_MAX);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
 		if (err)
 			return err;
 	}
 
 	if (MLX5_CAP_GEN(dev, roce)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE,
-					 HCA_CAP_OPMOD_GET_CUR);
-		if (err)
-			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE,
-					 HCA_CAP_OPMOD_GET_MAX);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE);
 		if (err)
 			return err;
 	}
 
 	if (MLX5_CAP_GEN(dev, nic_flow_table)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE,
-					 HCA_CAP_OPMOD_GET_CUR);
-		if (err)
-			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE,
-					 HCA_CAP_OPMOD_GET_MAX);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE);
 		if (err)
 			return err;
 	}
 
 	if (MLX5_CAP_GEN(dev, vport_group_manager) &&
 	    MLX5_CAP_GEN(dev, eswitch_flow_table)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE,
-					 HCA_CAP_OPMOD_GET_CUR);
-		if (err)
-			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE,
-					 HCA_CAP_OPMOD_GET_MAX);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE);
 		if (err)
 			return err;
 	}
 
 	if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH,
-					 HCA_CAP_OPMOD_GET_CUR);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH);
 		if (err)
 			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH,
-					 HCA_CAP_OPMOD_GET_MAX);
+	}
+
+	if (MLX5_CAP_GEN(dev, vector_calc)) {
+		err = mlx5_core_get_caps(dev, MLX5_CAP_VECTOR_CALC);
 		if (err)
 			return err;
 	}

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 72a94e7..3f3b2fa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c

@@ -341,8 +341,9 @@
 	}
 }
 
-int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type,
-		       enum mlx5_cap_mode cap_mode)
+static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
+				   enum mlx5_cap_type cap_type,
+				   enum mlx5_cap_mode cap_mode)
 {
 	u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
 	int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
@@ -392,6 +393,16 @@
 	return err;
 }
 
+int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
+{
+	int ret;
+
+	ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR);
+	if (ret)
+		return ret;
+	return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
+}
+
 static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
 {
 	u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)];
@@ -419,8 +430,7 @@
 	int err;
 
 	if (MLX5_CAP_GEN(dev, atomic)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
-					 HCA_CAP_OPMOD_GET_CUR);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
 		if (err)
 			return err;
 	} else {
@@ -462,11 +472,7 @@
 	if (!set_ctx)
 		goto query_ex;
 
-	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_MAX);
-	if (err)
-		goto query_ex;
-
-	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_CUR);
+	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
 	if (err)
 		goto query_ex;
 

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 90ab09e..bd51840 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c

@@ -852,7 +852,8 @@
 EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
 
 int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
-				  u8 port_num, void *out, size_t out_sz)
+				  int vf, u8 port_num, void *out,
+				  size_t out_sz)
 {
 	int	in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in);
 	int	is_group_manager;
@@ -871,7 +872,7 @@
 	if (other_vport) {
 		if (is_group_manager) {
 			MLX5_SET(query_vport_counter_in, in, other_vport, 1);
-			MLX5_SET(query_vport_counter_in, in, vport_number, 0);
+			MLX5_SET(query_vport_counter_in, in, vport_number, vf + 1);
 		} else {
 			err = -EPERM;
 			goto free;
@@ -890,3 +891,70 @@
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter);
+
+int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
+				       u8 other_vport, u8 port_num,
+				       int vf,
+				       struct mlx5_hca_vport_context *req)
+{
+	int in_sz = MLX5_ST_SZ_BYTES(modify_hca_vport_context_in);
+	u8 out[MLX5_ST_SZ_BYTES(modify_hca_vport_context_out)];
+	int is_group_manager;
+	void *in;
+	int err;
+	void *ctx;
+
+	mlx5_core_dbg(dev, "vf %d\n", vf);
+	is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
+	in = kzalloc(in_sz, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	memset(out, 0, sizeof(out));
+	MLX5_SET(modify_hca_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT);
+	if (other_vport) {
+		if (is_group_manager) {
+			MLX5_SET(modify_hca_vport_context_in, in, other_vport, 1);
+			MLX5_SET(modify_hca_vport_context_in, in, vport_number, vf);
+		} else {
+			err = -EPERM;
+			goto ex;
+		}
+	}
+
+	if (MLX5_CAP_GEN(dev, num_ports) > 1)
+		MLX5_SET(modify_hca_vport_context_in, in, port_num, port_num);
+
+	ctx = MLX5_ADDR_OF(modify_hca_vport_context_in, in, hca_vport_context);
+	MLX5_SET(hca_vport_context, ctx, field_select, req->field_select);
+	MLX5_SET(hca_vport_context, ctx, sm_virt_aware, req->sm_virt_aware);
+	MLX5_SET(hca_vport_context, ctx, has_smi, req->has_smi);
+	MLX5_SET(hca_vport_context, ctx, has_raw, req->has_raw);
+	MLX5_SET(hca_vport_context, ctx, vport_state_policy, req->policy);
+	MLX5_SET(hca_vport_context, ctx, port_physical_state, req->phys_state);
+	MLX5_SET(hca_vport_context, ctx, vport_state, req->vport_state);
+	MLX5_SET64(hca_vport_context, ctx, port_guid, req->port_guid);
+	MLX5_SET64(hca_vport_context, ctx, node_guid, req->node_guid);
+	MLX5_SET(hca_vport_context, ctx, cap_mask1, req->cap_mask1);
+	MLX5_SET(hca_vport_context, ctx, cap_mask1_field_select, req->cap_mask1_perm);
+	MLX5_SET(hca_vport_context, ctx, cap_mask2, req->cap_mask2);
+	MLX5_SET(hca_vport_context, ctx, cap_mask2_field_select, req->cap_mask2_perm);
+	MLX5_SET(hca_vport_context, ctx, lid, req->lid);
+	MLX5_SET(hca_vport_context, ctx, init_type_reply, req->init_type_reply);
+	MLX5_SET(hca_vport_context, ctx, lmc, req->lmc);
+	MLX5_SET(hca_vport_context, ctx, subnet_timeout, req->subnet_timeout);
+	MLX5_SET(hca_vport_context, ctx, sm_lid, req->sm_lid);
+	MLX5_SET(hca_vport_context, ctx, sm_sl, req->sm_sl);
+	MLX5_SET(hca_vport_context, ctx, qkey_violation_counter, req->qkey_violation_counter);
+	MLX5_SET(hca_vport_context, ctx, pkey_violation_counter, req->pkey_violation_counter);
+	err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
+	if (err)
+		goto ex;
+
+	err = mlx5_cmd_status_to_err_v2(out);
+
+ex:
+	kfree(in);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context);

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index ab264e1..75683fb 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h

@@ -45,7 +45,7 @@
 #include <linux/interrupt.h>
 #include <linux/netdevice.h>
 #include <linux/pci.h>
-#include <asm-generic/io-64-nonatomic-hi-lo.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
 
 #include "nfp_net_ctrl.h"
 

diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index ffd0acc..2017b01 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c

@@ -2750,7 +2750,7 @@
 int qed_int_igu_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 		       enum qed_int_mode int_mode)
 {
-	int rc;
+	int rc = 0;
 
 	/* Configure AEU signal change to produce attentions */
 	qed_wr(p_hwfn, p_ptt, IGU_REG_ATTENTION_ENABLE, 0);

diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h
index ef33270..6d31f92 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge.h
+++ b/drivers/net/ethernet/qlogic/qlge/qlge.h

@@ -18,7 +18,7 @@
  */
 #define DRV_NAME  	"qlge"
 #define DRV_STRING 	"QLogic 10 Gigabit PCI-E Ethernet Driver "
-#define DRV_VERSION	"1.00.00.34"
+#define DRV_VERSION	"1.00.00.35"
 
 #define WQ_ADDR_ALIGN	0x3	/* 4 byte alignment */
 

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 4e1a7db..087e14a 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c

@@ -1377,11 +1377,11 @@
 
 		/* TAG and timestamp required flag */
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
-		skb_tx_timestamp(skb);
 		desc->tagh_tsr = (ts_skb->tag >> 4) | TX_TSR;
 		desc->ds_tagl |= le16_to_cpu(ts_skb->tag << 12);
 	}
 
+	skb_tx_timestamp(skb);
 	/* Descriptor type must be set after all the above writes */
 	dma_wmb();
 	desc->die_dt = DT_FEND;

diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
index b02eed1..73427e2 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c

@@ -155,11 +155,11 @@
 	return 0;
 
 err_rx_irq_unmap:
-	while (--i)
+	while (i--)
 		irq_dispose_mapping(priv->rxq[i]->irq_no);
 	i = SXGBE_TX_QUEUES;
 err_tx_irq_unmap:
-	while (--i)
+	while (i--)
 		irq_dispose_mapping(priv->txq[i]->irq_no);
 	irq_dispose_mapping(priv->irq);
 err_drv_remove:

diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c
index 3f57110..a733868 100644
--- a/drivers/net/ethernet/smsc/smc911x.c
+++ b/drivers/net/ethernet/smsc/smc911x.c

@@ -1791,9 +1791,11 @@
 	unsigned int val, chip_id, revision;
 	const char *version_string;
 	unsigned long irq_flags;
+#ifdef SMC_USE_DMA
 	struct dma_slave_config	config;
 	dma_cap_mask_t mask;
 	struct pxad_param param;
+#endif
 
 	DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__);
 

diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index e13228f..011386f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c

@@ -199,11 +199,6 @@
 {
 	unsigned int tdes1 = p->des1;
 
-	if (mode == STMMAC_CHAIN_MODE)
-		norm_set_tx_desc_len_on_chain(p, len);
-	else
-		norm_set_tx_desc_len_on_ring(p, len);
-
 	if (is_fs)
 		tdes1 |= TDES1_FIRST_SEGMENT;
 	else
@@ -217,10 +212,15 @@
 	if (ls)
 		tdes1 |= TDES1_LAST_SEGMENT;
 
-	if (tx_own)
-		tdes1 |= TDES0_OWN;
-
 	p->des1 = tdes1;
+
+	if (mode == STMMAC_CHAIN_MODE)
+		norm_set_tx_desc_len_on_chain(p, len);
+	else
+		norm_set_tx_desc_len_on_ring(p, len);
+
+	if (tx_own)
+		p->des0 |= TDES0_OWN;
 }
 
 static void ndesc_set_tx_ic(struct dma_desc *p)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 4c5ce98..78464fa 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c

@@ -278,7 +278,6 @@
  */
 bool stmmac_eee_init(struct stmmac_priv *priv)
 {
-	char *phy_bus_name = priv->plat->phy_bus_name;
 	unsigned long flags;
 	bool ret = false;
 
@@ -290,7 +289,7 @@
 		goto out;
 
 	/* Never init EEE in case of a switch is attached */
-	if (phy_bus_name && (!strcmp(phy_bus_name, "fixed")))
+	if (priv->phydev->is_pseudo_fixed_link)
 		goto out;
 
 	/* MAC core supports the EEE feature. */
@@ -827,12 +826,8 @@
 		phydev = of_phy_connect(dev, priv->plat->phy_node,
 					&stmmac_adjust_link, 0, interface);
 	} else {
-		if (priv->plat->phy_bus_name)
-			snprintf(bus_id, MII_BUS_ID_SIZE, "%s-%x",
-				 priv->plat->phy_bus_name, priv->plat->bus_id);
-		else
-			snprintf(bus_id, MII_BUS_ID_SIZE, "stmmac-%x",
-				 priv->plat->bus_id);
+		snprintf(bus_id, MII_BUS_ID_SIZE, "stmmac-%x",
+			 priv->plat->bus_id);
 
 		snprintf(phy_id_fmt, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, bus_id,
 			 priv->plat->phy_addr);
@@ -871,9 +866,8 @@
 	}
 
 	/* If attached to a switch, there is no reason to poll phy handler */
-	if (priv->plat->phy_bus_name)
-		if (!strcmp(priv->plat->phy_bus_name, "fixed"))
-			phydev->irq = PHY_IGNORE_INTERRUPT;
+	if (phydev->is_pseudo_fixed_link)
+		phydev->irq = PHY_IGNORE_INTERRUPT;
 
 	pr_debug("stmmac_init_phy:  %s: attached to PHY (UID 0x%x)"
 		 " Link = %d\n", dev->name, phydev->phy_id, phydev->link);

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index ea76129..06704ca 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c

@@ -198,20 +198,12 @@
 	struct mii_bus *new_bus;
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data;
-	int addr, found;
 	struct device_node *mdio_node = priv->plat->mdio_node;
+	int addr, found;
 
 	if (!mdio_bus_data)
 		return 0;
 
-	if (IS_ENABLED(CONFIG_OF)) {
-		if (mdio_node) {
-			netdev_dbg(ndev, "FOUND MDIO subnode\n");
-		} else {
-			netdev_warn(ndev, "No MDIO subnode found\n");
-		}
-	}
-
 	new_bus = mdiobus_alloc();
 	if (new_bus == NULL)
 		return -ENOMEM;

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index dcbd2a1..cf37ea5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c

@@ -132,6 +132,69 @@
 }
 
 /**
+ * stmmac_dt_phy - parse device-tree driver parameters to allocate PHY resources
+ * @plat: driver data platform structure
+ * @np: device tree node
+ * @dev: device pointer
+ * Description:
+ * The mdio bus will be allocated in case of a phy transceiver is on board;
+ * it will be NULL if the fixed-link is configured.
+ * If there is the "snps,dwmac-mdio" sub-node the mdio will be allocated
+ * in any case (for DSA, mdio must be registered even if fixed-link).
+ * The table below sums the supported configurations:
+ *	-------------------------------
+ *	snps,phy-addr	|     Y
+ *	-------------------------------
+ *	phy-handle	|     Y
+ *	-------------------------------
+ *	fixed-link	|     N
+ *	-------------------------------
+ *	snps,dwmac-mdio	|
+ *	  even if	|     Y
+ *	fixed-link	|
+ *	-------------------------------
+ *
+ * It returns 0 in case of success otherwise -ENODEV.
+ */
+static int stmmac_dt_phy(struct plat_stmmacenet_data *plat,
+			 struct device_node *np, struct device *dev)
+{
+	bool mdio = true;
+
+	/* If phy-handle property is passed from DT, use it as the PHY */
+	plat->phy_node = of_parse_phandle(np, "phy-handle", 0);
+	if (plat->phy_node)
+		dev_dbg(dev, "Found phy-handle subnode\n");
+
+	/* If phy-handle is not specified, check if we have a fixed-phy */
+	if (!plat->phy_node && of_phy_is_fixed_link(np)) {
+		if ((of_phy_register_fixed_link(np) < 0))
+			return -ENODEV;
+
+		dev_dbg(dev, "Found fixed-link subnode\n");
+		plat->phy_node = of_node_get(np);
+		mdio = false;
+	}
+
+	/* If snps,dwmac-mdio is passed from DT, always register the MDIO */
+	for_each_child_of_node(np, plat->mdio_node) {
+		if (of_device_is_compatible(plat->mdio_node, "snps,dwmac-mdio"))
+			break;
+	}
+
+	if (plat->mdio_node) {
+		dev_dbg(dev, "Found MDIO subnode\n");
+		mdio = true;
+	}
+
+	if (mdio)
+		plat->mdio_bus_data =
+			devm_kzalloc(dev, sizeof(struct stmmac_mdio_bus_data),
+				     GFP_KERNEL);
+	return 0;
+}
+
+/**
  * stmmac_probe_config_dt - parse device-tree driver parameters
  * @pdev: platform_device structure
  * @plat: driver data platform structure
@@ -146,7 +209,6 @@
 	struct device_node *np = pdev->dev.of_node;
 	struct plat_stmmacenet_data *plat;
 	struct stmmac_dma_cfg *dma_cfg;
-	struct device_node *child_node = NULL;
 
 	plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL);
 	if (!plat)
@@ -166,36 +228,15 @@
 	/* Default to phy auto-detection */
 	plat->phy_addr = -1;
 
-	/* If we find a phy-handle property, use it as the PHY */
-	plat->phy_node = of_parse_phandle(np, "phy-handle", 0);
-
-	/* If phy-handle is not specified, check if we have a fixed-phy */
-	if (!plat->phy_node && of_phy_is_fixed_link(np)) {
-		if ((of_phy_register_fixed_link(np) < 0))
-			return ERR_PTR(-ENODEV);
-
-		plat->phy_node = of_node_get(np);
-	}
-
-	for_each_child_of_node(np, child_node)
-		if (of_device_is_compatible(child_node,	"snps,dwmac-mdio")) {
-			plat->mdio_node = child_node;
-			break;
-		}
-
 	/* "snps,phy-addr" is not a standard property. Mark it as deprecated
 	 * and warn of its use. Remove this when phy node support is added.
 	 */
 	if (of_property_read_u32(np, "snps,phy-addr", &plat->phy_addr) == 0)
 		dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n");
 
-	if ((plat->phy_node && !of_phy_is_fixed_link(np)) || !plat->mdio_node)
-		plat->mdio_bus_data = NULL;
-	else
-		plat->mdio_bus_data =
-			devm_kzalloc(&pdev->dev,
-				     sizeof(struct stmmac_mdio_bus_data),
-				     GFP_KERNEL);
+	/* To Configure PHY by using all device-tree supported properties */
+	if (stmmac_dt_phy(plat, np, &pdev->dev))
+		return ERR_PTR(-ENODEV);
 
 	of_property_read_u32(np, "tx-fifo-depth", &plat->tx_fifo_size);
 

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 192631a..bc16889 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c

@@ -843,8 +843,8 @@
 	if (info) {
 		fl6->daddr = info->key.u.ipv6.dst;
 		fl6->saddr = info->key.u.ipv6.src;
-		fl6->flowi6_tos = RT_TOS(info->key.tos);
-		fl6->flowlabel = info->key.label;
+		fl6->flowlabel = ip6_make_flowinfo(RT_TOS(info->key.tos),
+						   info->key.label);
 		dst_cache = &info->dst_cache;
 	} else {
 		prio = geneve->tos;
@@ -855,8 +855,8 @@
 			use_cache = false;
 		}
 
-		fl6->flowi6_tos = RT_TOS(prio);
-		fl6->flowlabel = geneve->label;
+		fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio),
+						   geneve->label);
 		fl6->daddr = geneve->remote.sin6.sin6_addr;
 		dst_cache = &geneve->dst_cache;
 	}
@@ -1049,7 +1049,8 @@
 		if (unlikely(err))
 			goto err;
 
-		prio = ip_tunnel_ecn_encap(fl6.flowi6_tos, iip, skb);
+		prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel),
+					   iip, skb);
 		ttl = geneve->ttl;
 		if (!ttl && ipv6_addr_is_multicast(&fl6.daddr))
 			ttl = 1;

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index b4c6878..8b3bd8e 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h

@@ -619,6 +619,7 @@
 #define NETVSC_PACKET_SIZE                      4096
 
 #define VRSS_SEND_TAB_SIZE 16
+#define VRSS_CHANNEL_MAX 64
 
 #define RNDIS_MAX_PKT_DEFAULT 8
 #define RNDIS_PKT_ALIGN_DEFAULT 8
@@ -700,13 +701,13 @@
 
 	struct net_device *ndev;
 
-	struct vmbus_channel *chn_table[NR_CPUS];
+	struct vmbus_channel *chn_table[VRSS_CHANNEL_MAX];
 	u32 send_table[VRSS_SEND_TAB_SIZE];
 	u32 max_chn;
 	u32 num_chn;
 	spinlock_t sc_lock; /* Protects num_sc_offered variable */
 	u32 num_sc_offered;
-	atomic_t queue_sends[NR_CPUS];
+	atomic_t queue_sends[VRSS_CHANNEL_MAX];
 
 	/* Holds rndis device info */
 	void *extension;
@@ -718,7 +719,7 @@
 	/* The sub channel callback buffer */
 	unsigned char *sub_cb_buf;
 
-	struct multi_send_data msd[NR_CPUS];
+	struct multi_send_data msd[VRSS_CHANNEL_MAX];
 	u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
 	u32 pkt_align; /* alignment bytes, e.g. 8 */
 

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 0860849..b8121eb 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c

@@ -858,6 +858,7 @@
 	struct netvsc_device *nvdev = hv_get_drvdata(hdev);
 	struct netvsc_device_info device_info;
 	int limit = ETH_DATA_LEN;
+	u32 num_chn;
 	int ret = 0;
 
 	if (nvdev == NULL || nvdev->destroy)
@@ -873,6 +874,8 @@
 	if (ret)
 		goto out;
 
+	num_chn = nvdev->num_chn;
+
 	nvdev->start_remove = true;
 	rndis_filter_device_remove(hdev);
 
@@ -883,7 +886,7 @@
 
 	memset(&device_info, 0, sizeof(device_info));
 	device_info.ring_size = ring_size;
-	device_info.num_chn = nvdev->num_chn;
+	device_info.num_chn = num_chn;
 	device_info.max_num_vrss_chns = max_num_vrss_chns;
 	rndis_filter_device_add(hdev, &device_info);
 

diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 47d07c5..c4e1e04 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c

@@ -986,12 +986,6 @@
 
 	nvscdev = hv_get_drvdata(new_sc->primary_channel->device_obj);
 
-	spin_lock_irqsave(&nvscdev->sc_lock, flags);
-	nvscdev->num_sc_offered--;
-	spin_unlock_irqrestore(&nvscdev->sc_lock, flags);
-	if (nvscdev->num_sc_offered == 0)
-		complete(&nvscdev->channel_init_wait);
-
 	if (chn_index >= nvscdev->num_chn)
 		return;
 
@@ -1004,6 +998,12 @@
 
 	if (ret == 0)
 		nvscdev->chn_table[chn_index] = new_sc;
+
+	spin_lock_irqsave(&nvscdev->sc_lock, flags);
+	nvscdev->num_sc_offered--;
+	spin_unlock_irqrestore(&nvscdev->sc_lock, flags);
+	if (nvscdev->num_sc_offered == 0)
+		complete(&nvscdev->channel_init_wait);
 }
 
 int rndis_filter_device_add(struct hv_device *dev,
@@ -1113,9 +1113,9 @@
 	if (ret || rsscap.num_recv_que < 2)
 		goto out;
 
-	num_rss_qs = min(device_info->max_num_vrss_chns, rsscap.num_recv_que);
+	net_device->max_chn = min_t(u32, VRSS_CHANNEL_MAX, rsscap.num_recv_que);
 
-	net_device->max_chn = rsscap.num_recv_que;
+	num_rss_qs = min(device_info->max_num_vrss_chns, net_device->max_chn);
 
 	/*
 	 * We will limit the VRSS channels to the number CPUs in the NUMA node

diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index 1e901c7..b3ffaee 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c

@@ -277,12 +277,16 @@
 	if (!priv)
 		return -ENOMEM;
 
-	gpiod_reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
+	if (phydev->drv->phy_id != ATH8030_PHY_ID)
+		goto does_not_require_reset_workaround;
+
+	gpiod_reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
 	if (IS_ERR(gpiod_reset))
 		return PTR_ERR(gpiod_reset);
 
 	priv->gpiod_reset = gpiod_reset;
 
+does_not_require_reset_workaround:
 	phydev->priv = priv;
 
 	return 0;
@@ -362,10 +366,10 @@
 
 				at803x_context_save(phydev, &context);
 
-				gpiod_set_value(priv->gpiod_reset, 0);
-				msleep(1);
 				gpiod_set_value(priv->gpiod_reset, 1);
 				msleep(1);
+				gpiod_set_value(priv->gpiod_reset, 0);
+				msleep(1);
 
 				at803x_context_restore(phydev, &context);
 

diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index b881a7b1..9636da0 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c

@@ -339,6 +339,8 @@
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7439, "Broadcom BCM7439"),
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7439_2, "Broadcom BCM7439 (2)"),
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7445, "Broadcom BCM7445"),
+	BCM7XXX_40NM_EPHY(PHY_ID_BCM7346, "Broadcom BCM7346"),
+	BCM7XXX_40NM_EPHY(PHY_ID_BCM7362, "Broadcom BCM7362"),
 	BCM7XXX_40NM_EPHY(PHY_ID_BCM7425, "Broadcom BCM7425"),
 	BCM7XXX_40NM_EPHY(PHY_ID_BCM7429, "Broadcom BCM7429"),
 	BCM7XXX_40NM_EPHY(PHY_ID_BCM7435, "Broadcom BCM7435"),
@@ -348,6 +350,8 @@
 	{ PHY_ID_BCM7250, 0xfffffff0, },
 	{ PHY_ID_BCM7364, 0xfffffff0, },
 	{ PHY_ID_BCM7366, 0xfffffff0, },
+	{ PHY_ID_BCM7346, 0xfffffff0, },
+	{ PHY_ID_BCM7362, 0xfffffff0, },
 	{ PHY_ID_BCM7425, 0xfffffff0, },
 	{ PHY_ID_BCM7429, 0xfffffff0, },
 	{ PHY_ID_BCM7439, 0xfffffff0, },

diff --git a/drivers/net/phy/mdio-sun4i.c b/drivers/net/phy/mdio-sun4i.c
index f70522c..1352965 100644
--- a/drivers/net/phy/mdio-sun4i.c
+++ b/drivers/net/phy/mdio-sun4i.c

@@ -122,6 +122,7 @@
 			return -EPROBE_DEFER;
 
 		dev_info(&pdev->dev, "no regulator found\n");
+		data->regulator = NULL;
 	} else {
 		ret = regulator_enable(data->regulator);
 		if (ret)
@@ -137,7 +138,8 @@
 	return 0;
 
 err_out_disable_regulator:
-	regulator_disable(data->regulator);
+	if (data->regulator)
+		regulator_disable(data->regulator);
 err_out_free_mdiobus:
 	mdiobus_free(bus);
 	return ret;

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 4fd8610..f572b31 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c

@@ -2307,7 +2307,7 @@
 
 	pch->ppp = NULL;
 	pch->chan = chan;
-	pch->chan_net = net;
+	pch->chan_net = get_net(net);
 	chan->ppp = pch;
 	init_ppp_file(&pch->file, CHANNEL);
 	pch->file.hdrlen = chan->hdrlen;
@@ -2404,6 +2404,8 @@
 	spin_lock_bh(&pn->all_channels_lock);
 	list_del(&pch->list);
 	spin_unlock_bh(&pn->all_channels_lock);
+	put_net(pch->chan_net);
+	pch->chan_net = NULL;
 
 	pch->file.dead = 1;
 	wake_up_interruptible(&pch->file.rwait);

diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index 01f08a7..9cfe6ae 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c

@@ -24,6 +24,7 @@
 #include <linux/skbuff.h>
 #include <linux/crc32.h>
 #include <linux/ethtool.h>
+#include <linux/reboot.h>
 
 #define DRV_NAME        "rionet"
 #define DRV_VERSION     "0.3"
@@ -48,6 +49,8 @@
 #define RIONET_TX_RING_SIZE	CONFIG_RIONET_TX_SIZE
 #define RIONET_RX_RING_SIZE	CONFIG_RIONET_RX_SIZE
 #define RIONET_MAX_NETS		8
+#define RIONET_MSG_SIZE         RIO_MAX_MSG_SIZE
+#define RIONET_MAX_MTU          (RIONET_MSG_SIZE - ETH_HLEN)
 
 struct rionet_private {
 	struct rio_mport *mport;
@@ -60,6 +63,7 @@
 	spinlock_t lock;
 	spinlock_t tx_lock;
 	u32 msg_enable;
+	bool open;
 };
 
 struct rionet_peer {
@@ -71,6 +75,7 @@
 struct rionet_net {
 	struct net_device *ndev;
 	struct list_head peers;
+	spinlock_t lock;	/* net info access lock */
 	struct rio_dev **active;
 	int nact;	/* number of active peers */
 };
@@ -232,26 +237,32 @@
 	struct net_device *ndev = dev_id;
 	struct rionet_private *rnet = netdev_priv(ndev);
 	struct rionet_peer *peer;
+	unsigned char netid = rnet->mport->id;
 
 	if (netif_msg_intr(rnet))
 		printk(KERN_INFO "%s: doorbell sid %4.4x tid %4.4x info %4.4x",
 		       DRV_NAME, sid, tid, info);
 	if (info == RIONET_DOORBELL_JOIN) {
-		if (!nets[rnet->mport->id].active[sid]) {
-			list_for_each_entry(peer,
-					   &nets[rnet->mport->id].peers, node) {
+		if (!nets[netid].active[sid]) {
+			spin_lock(&nets[netid].lock);
+			list_for_each_entry(peer, &nets[netid].peers, node) {
 				if (peer->rdev->destid == sid) {
-					nets[rnet->mport->id].active[sid] =
-								peer->rdev;
-					nets[rnet->mport->id].nact++;
+					nets[netid].active[sid] = peer->rdev;
+					nets[netid].nact++;
 				}
 			}
+			spin_unlock(&nets[netid].lock);
+
 			rio_mport_send_doorbell(mport, sid,
 						RIONET_DOORBELL_JOIN);
 		}
 	} else if (info == RIONET_DOORBELL_LEAVE) {
-		nets[rnet->mport->id].active[sid] = NULL;
-		nets[rnet->mport->id].nact--;
+		spin_lock(&nets[netid].lock);
+		if (nets[netid].active[sid]) {
+			nets[netid].active[sid] = NULL;
+			nets[netid].nact--;
+		}
+		spin_unlock(&nets[netid].lock);
 	} else {
 		if (netif_msg_intr(rnet))
 			printk(KERN_WARNING "%s: unhandled doorbell\n",
@@ -280,7 +291,7 @@
 	struct net_device *ndev = dev_id;
 	struct rionet_private *rnet = netdev_priv(ndev);
 
-	spin_lock(&rnet->lock);
+	spin_lock(&rnet->tx_lock);
 
 	if (netif_msg_intr(rnet))
 		printk(KERN_INFO
@@ -299,14 +310,16 @@
 	if (rnet->tx_cnt < RIONET_TX_RING_SIZE)
 		netif_wake_queue(ndev);
 
-	spin_unlock(&rnet->lock);
+	spin_unlock(&rnet->tx_lock);
 }
 
 static int rionet_open(struct net_device *ndev)
 {
 	int i, rc = 0;
-	struct rionet_peer *peer, *tmp;
+	struct rionet_peer *peer;
 	struct rionet_private *rnet = netdev_priv(ndev);
+	unsigned char netid = rnet->mport->id;
+	unsigned long flags;
 
 	if (netif_msg_ifup(rnet))
 		printk(KERN_INFO "%s: open\n", DRV_NAME);
@@ -345,20 +358,13 @@
 	netif_carrier_on(ndev);
 	netif_start_queue(ndev);
 
-	list_for_each_entry_safe(peer, tmp,
-				 &nets[rnet->mport->id].peers, node) {
-		if (!(peer->res = rio_request_outb_dbell(peer->rdev,
-							 RIONET_DOORBELL_JOIN,
-							 RIONET_DOORBELL_LEAVE)))
-		{
-			printk(KERN_ERR "%s: error requesting doorbells\n",
-			       DRV_NAME);
-			continue;
-		}
-
+	spin_lock_irqsave(&nets[netid].lock, flags);
+	list_for_each_entry(peer, &nets[netid].peers, node) {
 		/* Send a join message */
 		rio_send_doorbell(peer->rdev, RIONET_DOORBELL_JOIN);
 	}
+	spin_unlock_irqrestore(&nets[netid].lock, flags);
+	rnet->open = true;
 
       out:
 	return rc;
@@ -367,7 +373,9 @@
 static int rionet_close(struct net_device *ndev)
 {
 	struct rionet_private *rnet = netdev_priv(ndev);
-	struct rionet_peer *peer, *tmp;
+	struct rionet_peer *peer;
+	unsigned char netid = rnet->mport->id;
+	unsigned long flags;
 	int i;
 
 	if (netif_msg_ifup(rnet))
@@ -375,18 +383,21 @@
 
 	netif_stop_queue(ndev);
 	netif_carrier_off(ndev);
+	rnet->open = false;
 
 	for (i = 0; i < RIONET_RX_RING_SIZE; i++)
 		kfree_skb(rnet->rx_skb[i]);
 
-	list_for_each_entry_safe(peer, tmp,
-				 &nets[rnet->mport->id].peers, node) {
-		if (nets[rnet->mport->id].active[peer->rdev->destid]) {
+	spin_lock_irqsave(&nets[netid].lock, flags);
+	list_for_each_entry(peer, &nets[netid].peers, node) {
+		if (nets[netid].active[peer->rdev->destid]) {
 			rio_send_doorbell(peer->rdev, RIONET_DOORBELL_LEAVE);
-			nets[rnet->mport->id].active[peer->rdev->destid] = NULL;
+			nets[netid].active[peer->rdev->destid] = NULL;
 		}
-		rio_release_outb_dbell(peer->rdev, peer->res);
+		if (peer->res)
+			rio_release_outb_dbell(peer->rdev, peer->res);
 	}
+	spin_unlock_irqrestore(&nets[netid].lock, flags);
 
 	rio_release_inb_dbell(rnet->mport, RIONET_DOORBELL_JOIN,
 			      RIONET_DOORBELL_LEAVE);
@@ -400,22 +411,38 @@
 {
 	struct rio_dev *rdev = to_rio_dev(dev);
 	unsigned char netid = rdev->net->hport->id;
-	struct rionet_peer *peer, *tmp;
+	struct rionet_peer *peer;
+	int state, found = 0;
+	unsigned long flags;
 
-	if (dev_rionet_capable(rdev)) {
-		list_for_each_entry_safe(peer, tmp, &nets[netid].peers, node) {
-			if (peer->rdev == rdev) {
-				if (nets[netid].active[rdev->destid]) {
-					nets[netid].active[rdev->destid] = NULL;
-					nets[netid].nact--;
+	if (!dev_rionet_capable(rdev))
+		return;
+
+	spin_lock_irqsave(&nets[netid].lock, flags);
+	list_for_each_entry(peer, &nets[netid].peers, node) {
+		if (peer->rdev == rdev) {
+			list_del(&peer->node);
+			if (nets[netid].active[rdev->destid]) {
+				state = atomic_read(&rdev->state);
+				if (state != RIO_DEVICE_GONE &&
+				    state != RIO_DEVICE_INITIALIZING) {
+					rio_send_doorbell(rdev,
+							RIONET_DOORBELL_LEAVE);
 				}
-
-				list_del(&peer->node);
-				kfree(peer);
-				break;
+				nets[netid].active[rdev->destid] = NULL;
+				nets[netid].nact--;
 			}
+			found = 1;
+			break;
 		}
 	}
+	spin_unlock_irqrestore(&nets[netid].lock, flags);
+
+	if (found) {
+		if (peer->res)
+			rio_release_outb_dbell(rdev, peer->res);
+		kfree(peer);
+	}
 }
 
 static void rionet_get_drvinfo(struct net_device *ndev,
@@ -443,6 +470,17 @@
 	rnet->msg_enable = value;
 }
 
+static int rionet_change_mtu(struct net_device *ndev, int new_mtu)
+{
+	if ((new_mtu < 68) || (new_mtu > RIONET_MAX_MTU)) {
+		printk(KERN_ERR "%s: Invalid MTU size %d\n",
+		       ndev->name, new_mtu);
+		return -EINVAL;
+	}
+	ndev->mtu = new_mtu;
+	return 0;
+}
+
 static const struct ethtool_ops rionet_ethtool_ops = {
 	.get_drvinfo = rionet_get_drvinfo,
 	.get_msglevel = rionet_get_msglevel,
@@ -454,7 +492,7 @@
 	.ndo_open		= rionet_open,
 	.ndo_stop		= rionet_close,
 	.ndo_start_xmit		= rionet_start_xmit,
-	.ndo_change_mtu		= eth_change_mtu,
+	.ndo_change_mtu		= rionet_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 };
@@ -478,6 +516,7 @@
 	/* Set up private area */
 	rnet = netdev_priv(ndev);
 	rnet->mport = mport;
+	rnet->open = false;
 
 	/* Set the default MAC address */
 	device_id = rio_local_get_device_id(mport);
@@ -489,7 +528,7 @@
 	ndev->dev_addr[5] = device_id & 0xff;
 
 	ndev->netdev_ops = &rionet_netdev_ops;
-	ndev->mtu = RIO_MAX_MSG_SIZE - 14;
+	ndev->mtu = RIONET_MAX_MTU;
 	ndev->features = NETIF_F_LLTX;
 	SET_NETDEV_DEV(ndev, &mport->dev);
 	ndev->ethtool_ops = &rionet_ethtool_ops;
@@ -500,8 +539,11 @@
 	rnet->msg_enable = RIONET_DEFAULT_MSGLEVEL;
 
 	rc = register_netdev(ndev);
-	if (rc != 0)
+	if (rc != 0) {
+		free_pages((unsigned long)nets[mport->id].active,
+			   get_order(rionet_active_bytes));
 		goto out;
+	}
 
 	printk(KERN_INFO "%s: %s %s Version %s, MAC %pM, %s\n",
 	       ndev->name,
@@ -515,8 +557,6 @@
 	return rc;
 }
 
-static unsigned long net_table[RIONET_MAX_NETS/sizeof(unsigned long) + 1];
-
 static int rionet_add_dev(struct device *dev, struct subsys_interface *sif)
 {
 	int rc = -ENODEV;
@@ -525,19 +565,16 @@
 	struct net_device *ndev = NULL;
 	struct rio_dev *rdev = to_rio_dev(dev);
 	unsigned char netid = rdev->net->hport->id;
-	int oldnet;
 
 	if (netid >= RIONET_MAX_NETS)
 		return rc;
 
-	oldnet = test_and_set_bit(netid, net_table);
-
 	/*
 	 * If first time through this net, make sure local device is rionet
 	 * capable and setup netdev (this step will be skipped in later probes
 	 * on the same net).
 	 */
-	if (!oldnet) {
+	if (!nets[netid].ndev) {
 		rio_local_read_config_32(rdev->net->hport, RIO_SRC_OPS_CAR,
 					 &lsrc_ops);
 		rio_local_read_config_32(rdev->net->hport, RIO_DST_OPS_CAR,
@@ -555,30 +592,56 @@
 			rc = -ENOMEM;
 			goto out;
 		}
-		nets[netid].ndev = ndev;
+
 		rc = rionet_setup_netdev(rdev->net->hport, ndev);
 		if (rc) {
 			printk(KERN_ERR "%s: failed to setup netdev (rc=%d)\n",
 			       DRV_NAME, rc);
+			free_netdev(ndev);
 			goto out;
 		}
 
 		INIT_LIST_HEAD(&nets[netid].peers);
+		spin_lock_init(&nets[netid].lock);
 		nets[netid].nact = 0;
-	} else if (nets[netid].ndev == NULL)
-		goto out;
+		nets[netid].ndev = ndev;
+	}
 
 	/*
 	 * If the remote device has mailbox/doorbell capabilities,
 	 * add it to the peer list.
 	 */
 	if (dev_rionet_capable(rdev)) {
-		if (!(peer = kmalloc(sizeof(struct rionet_peer), GFP_KERNEL))) {
+		struct rionet_private *rnet;
+		unsigned long flags;
+
+		rnet = netdev_priv(nets[netid].ndev);
+
+		peer = kzalloc(sizeof(*peer), GFP_KERNEL);
+		if (!peer) {
 			rc = -ENOMEM;
 			goto out;
 		}
 		peer->rdev = rdev;
+		peer->res = rio_request_outb_dbell(peer->rdev,
+						RIONET_DOORBELL_JOIN,
+						RIONET_DOORBELL_LEAVE);
+		if (!peer->res) {
+			pr_err("%s: error requesting doorbells\n", DRV_NAME);
+			kfree(peer);
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		spin_lock_irqsave(&nets[netid].lock, flags);
 		list_add_tail(&peer->node, &nets[netid].peers);
+		spin_unlock_irqrestore(&nets[netid].lock, flags);
+		pr_debug("%s: %s add peer %s\n",
+			 DRV_NAME, __func__, rio_name(rdev));
+
+		/* If netdev is already opened, send join request to new peer */
+		if (rnet->open)
+			rio_send_doorbell(peer->rdev, RIONET_DOORBELL_JOIN);
 	}
 
 	return 0;
@@ -586,6 +649,61 @@
 	return rc;
 }
 
+static int rionet_shutdown(struct notifier_block *nb, unsigned long code,
+			   void *unused)
+{
+	struct rionet_peer *peer;
+	unsigned long flags;
+	int i;
+
+	pr_debug("%s: %s\n", DRV_NAME, __func__);
+
+	for (i = 0; i < RIONET_MAX_NETS; i++) {
+		if (!nets[i].ndev)
+			continue;
+
+		spin_lock_irqsave(&nets[i].lock, flags);
+		list_for_each_entry(peer, &nets[i].peers, node) {
+			if (nets[i].active[peer->rdev->destid]) {
+				rio_send_doorbell(peer->rdev,
+						  RIONET_DOORBELL_LEAVE);
+				nets[i].active[peer->rdev->destid] = NULL;
+			}
+		}
+		spin_unlock_irqrestore(&nets[i].lock, flags);
+	}
+
+	return NOTIFY_DONE;
+}
+
+static void rionet_remove_mport(struct device *dev,
+				struct class_interface *class_intf)
+{
+	struct rio_mport *mport = to_rio_mport(dev);
+	struct net_device *ndev;
+	int id = mport->id;
+
+	pr_debug("%s %s\n", __func__, mport->name);
+
+	WARN(nets[id].nact, "%s called when connected to %d peers\n",
+	     __func__, nets[id].nact);
+	WARN(!nets[id].ndev, "%s called for mport without NDEV\n",
+	     __func__);
+
+	if (nets[id].ndev) {
+		ndev = nets[id].ndev;
+		netif_stop_queue(ndev);
+		unregister_netdev(ndev);
+
+		free_pages((unsigned long)nets[id].active,
+			   get_order(sizeof(void *) *
+			   RIO_MAX_ROUTE_ENTRIES(mport->sys_size)));
+		nets[id].active = NULL;
+		free_netdev(ndev);
+		nets[id].ndev = NULL;
+	}
+}
+
 #ifdef MODULE
 static struct rio_device_id rionet_id_table[] = {
 	{RIO_DEVICE(RIO_ANY_ID, RIO_ANY_ID)},
@@ -602,40 +720,43 @@
 	.remove_dev	= rionet_remove_dev,
 };
 
+static struct notifier_block rionet_notifier = {
+	.notifier_call = rionet_shutdown,
+};
+
+/* the rio_mport_interface is used to handle local mport devices */
+static struct class_interface rio_mport_interface __refdata = {
+	.class = &rio_mport_class,
+	.add_dev = NULL,
+	.remove_dev = rionet_remove_mport,
+};
+
 static int __init rionet_init(void)
 {
+	int ret;
+
+	ret = register_reboot_notifier(&rionet_notifier);
+	if (ret) {
+		pr_err("%s: failed to register reboot notifier (err=%d)\n",
+		       DRV_NAME, ret);
+		return ret;
+	}
+
+	ret = class_interface_register(&rio_mport_interface);
+	if (ret) {
+		pr_err("%s: class_interface_register error: %d\n",
+		       DRV_NAME, ret);
+		return ret;
+	}
+
 	return subsys_interface_register(&rionet_interface);
 }
 
 static void __exit rionet_exit(void)
 {
-	struct rionet_private *rnet;
-	struct net_device *ndev;
-	struct rionet_peer *peer, *tmp;
-	int i;
-
-	for (i = 0; i < RIONET_MAX_NETS; i++) {
-		if (nets[i].ndev != NULL) {
-			ndev = nets[i].ndev;
-			rnet = netdev_priv(ndev);
-			unregister_netdev(ndev);
-
-			list_for_each_entry_safe(peer,
-						 tmp, &nets[i].peers, node) {
-				list_del(&peer->node);
-				kfree(peer);
-			}
-
-			free_pages((unsigned long)nets[i].active,
-				 get_order(sizeof(void *) *
-				 RIO_MAX_ROUTE_ENTRIES(rnet->mport->sys_size)));
-			nets[i].active = NULL;
-
-			free_netdev(ndev);
-		}
-	}
-
+	unregister_reboot_notifier(&rionet_notifier);
 	subsys_interface_unregister(&rionet_interface);
+	class_interface_unregister(&rio_mport_interface);
 }
 
 late_initcall(rionet_init);

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 26c64d2..a0f64cb 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c

@@ -1198,6 +1198,9 @@
 		goto err_dev_open;
 	}
 
+	dev_uc_sync_multiple(port_dev, dev);
+	dev_mc_sync_multiple(port_dev, dev);
+
 	err = vlan_vids_add_by_dev(port_dev, dev);
 	if (err) {
 		netdev_err(dev, "Failed to add vlan ids to device %s\n",
@@ -1261,6 +1264,8 @@
 	vlan_vids_del_by_dev(port_dev, dev);
 
 err_vids_add:
+	dev_uc_unsync(port_dev, dev);
+	dev_mc_unsync(port_dev, dev);
 	dev_close(port_dev);
 
 err_dev_open:

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index afdf950..510e90a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c

@@ -622,7 +622,8 @@
 
 	/* Re-attach the filter to persist device */
 	if (!skip_filter && (tun->filter_attached == true)) {
-		err = sk_attach_filter(&tun->fprog, tfile->socket.sk);
+		err = __sk_attach_filter(&tun->fprog, tfile->socket.sk,
+					 lockdep_rtnl_is_held());
 		if (!err)
 			goto out;
 	}
@@ -1822,7 +1823,7 @@
 
 	for (i = 0; i < n; i++) {
 		tfile = rtnl_dereference(tun->tfiles[i]);
-		sk_detach_filter(tfile->socket.sk);
+		__sk_detach_filter(tfile->socket.sk, lockdep_rtnl_is_held());
 	}
 
 	tun->filter_attached = false;
@@ -1835,7 +1836,8 @@
 
 	for (i = 0; i < tun->numqueues; i++) {
 		tfile = rtnl_dereference(tun->tfiles[i]);
-		ret = sk_attach_filter(&tun->fprog, tfile->socket.sk);
+		ret = __sk_attach_filter(&tun->fprog, tfile->socket.sk,
+					 lockdep_rtnl_is_held());
 		if (ret) {
 			tun_detach_filter(tun, i);
 			return ret;

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 86ba30b..2fb31ed 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c

@@ -1626,6 +1626,13 @@
 	  .driver_info = (unsigned long) &wwan_info,
 	},
 
+	/* Telit LE910 V2 */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x1bc7, 0x0036,
+		USB_CLASS_COMM,
+		USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE),
+	  .driver_info = (unsigned long)&wwan_noarp_info,
+	},
+
 	/* DW5812 LTE Verizon Mobile Broadband Card
 	 * Unlike DW5550 this device requires FLAG_NOARP
 	 */

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index d36d5eb..f20890e 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c

@@ -3261,54 +3261,6 @@
 	tasklet_schedule(&dev->bh);
 }
 
-struct rtnl_link_stats64 *lan78xx_get_stats64(struct net_device *netdev,
-					      struct rtnl_link_stats64 *storage)
-{
-	struct lan78xx_net *dev = netdev_priv(netdev);
-	struct lan78xx_statstage64 stats;
-
-	/* curr_stat is updated by timer.
-	 * periodic reading from HW will prevent from entering USB auto suspend.
-	 * if autosuspend is disabled, read from HW.
-	 */
-	if (!dev->udev->dev.power.runtime_auto)
-		lan78xx_update_stats(dev);
-
-	mutex_lock(&dev->stats.access_lock);
-	memcpy(&stats, &dev->stats.curr_stat, sizeof(stats));
-	mutex_unlock(&dev->stats.access_lock);
-
-	/* calc by driver */
-	storage->rx_packets = (__u64)netdev->stats.rx_packets;
-	storage->tx_packets = (__u64)netdev->stats.tx_packets;
-	storage->rx_bytes = (__u64)netdev->stats.rx_bytes;
-	storage->tx_bytes = (__u64)netdev->stats.tx_bytes;
-
-	/* use counter */
-	storage->rx_length_errors = stats.rx_undersize_frame_errors +
-				    stats.rx_oversize_frame_errors;
-	storage->rx_crc_errors = stats.rx_fcs_errors;
-	storage->rx_frame_errors = stats.rx_alignment_errors;
-	storage->rx_fifo_errors = stats.rx_dropped_frames;
-	storage->rx_over_errors = stats.rx_oversize_frame_errors;
-	storage->rx_errors = stats.rx_fcs_errors +
-			     stats.rx_alignment_errors +
-			     stats.rx_fragment_errors +
-			     stats.rx_jabber_errors +
-			     stats.rx_undersize_frame_errors +
-			     stats.rx_oversize_frame_errors +
-			     stats.rx_dropped_frames;
-
-	storage->tx_carrier_errors = stats.tx_carrier_errors;
-	storage->tx_errors = stats.tx_fcs_errors +
-			     stats.tx_excess_deferral_errors +
-			     stats.tx_carrier_errors;
-
-	storage->multicast = stats.rx_multicast_frames;
-
-	return storage;
-}
-
 static const struct net_device_ops lan78xx_netdev_ops = {
 	.ndo_open		= lan78xx_open,
 	.ndo_stop		= lan78xx_stop,
@@ -3322,7 +3274,6 @@
 	.ndo_set_features	= lan78xx_set_features,
 	.ndo_vlan_rx_add_vid	= lan78xx_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= lan78xx_vlan_rx_kill_vid,
-	.ndo_get_stats64	= lan78xx_get_stats64,
 };
 
 static void lan78xx_stat_monitor(unsigned long param)

diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c
index 1bfe0fc..22e1a9a 100644
--- a/drivers/net/usb/plusb.c
+++ b/drivers/net/usb/plusb.c

@@ -38,7 +38,7 @@
  * HEADS UP:  this handshaking isn't all that robust.  This driver
  * gets confused easily if you unplug one end of the cable then
  * try to connect it again; you'll need to restart both ends. The
- * "naplink" software (used by some PlayStation/2 deveopers) does
+ * "naplink" software (used by some PlayStation/2 developers) does
  * the handshaking much better!   Also, sometimes this hardware
  * seems to get wedged under load.  Prolific docs are weak, and
  * don't identify differences between PL2301 and PL2302, much less

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 7d717c6..9d1fce8 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c

@@ -844,6 +844,7 @@
 	{QMI_FIXED_INTF(0x19d2, 0x1426, 2)},	/* ZTE MF91 */
 	{QMI_FIXED_INTF(0x19d2, 0x1428, 2)},	/* Telewell TW-LTE 4G v2 */
 	{QMI_FIXED_INTF(0x19d2, 0x2002, 4)},	/* ZTE (Vodafone) K3765-Z */
+	{QMI_FIXED_INTF(0x2001, 0x7e19, 4)},	/* D-Link DWM-221 B1 */
 	{QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)},    /* Sierra Wireless MC7700 */
 	{QMI_FIXED_INTF(0x114f, 0x68a2, 8)},    /* Sierra Wireless MC7750 */
 	{QMI_FIXED_INTF(0x1199, 0x68a2, 8)},	/* Sierra Wireless MC7710 in QMI mode */

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 800106a7..1c0fa36 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c

@@ -1143,7 +1143,7 @@
 static bool vxlan_remcsum(struct vxlanhdr *unparsed,
 			  struct sk_buff *skb, u32 vxflags)
 {
-	size_t start, offset, plen;
+	size_t start, offset;
 
 	if (!(unparsed->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload)
 		goto out;
@@ -1151,9 +1151,7 @@
 	start = vxlan_rco_start(unparsed->vx_vni);
 	offset = start + vxlan_rco_offset(unparsed->vx_vni);
 
-	plen = sizeof(struct vxlanhdr) + offset + sizeof(u16);
-
-	if (!pskb_may_pull(skb, plen))
+	if (!pskb_may_pull(skb, offset + sizeof(u16)))
 		return false;
 
 	skb_remcsum_process(skb, (void *)(vxlan_hdr(skb) + 1), start, offset,
@@ -1810,10 +1808,9 @@
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_oif = oif;
-	fl6.flowi6_tos = RT_TOS(tos);
 	fl6.daddr = *daddr;
 	fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr;
-	fl6.flowlabel = label;
+	fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label);
 	fl6.flowi6_mark = skb->mark;
 	fl6.flowi6_proto = IPPROTO_UDP;
 

diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
index 588803a..6ccba0d 100644
--- a/drivers/ntb/hw/amd/ntb_hw_amd.c
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.c

@@ -357,20 +357,6 @@
 	return 0;
 }
 
-static int amd_ntb_peer_db_addr(struct ntb_dev *ntb,
-				phys_addr_t *db_addr,
-				resource_size_t *db_size)
-{
-	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
-
-	if (db_addr)
-		*db_addr = (phys_addr_t)(ndev->peer_mmio + AMD_DBREQ_OFFSET);
-	if (db_size)
-		*db_size = sizeof(u32);
-
-	return 0;
-}
-
 static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
 {
 	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
@@ -415,20 +401,6 @@
 	return 0;
 }
 
-static int amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
-				  phys_addr_t *spad_addr)
-{
-	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
-
-	if (idx < 0 || idx >= ndev->spad_count)
-		return -EINVAL;
-
-	if (spad_addr)
-		*spad_addr = (phys_addr_t)(ndev->self_mmio + AMD_SPAD_OFFSET +
-					   ndev->peer_spad + (idx << 2));
-	return 0;
-}
-
 static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
 {
 	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
@@ -472,12 +444,10 @@
 	.db_clear		= amd_ntb_db_clear,
 	.db_set_mask		= amd_ntb_db_set_mask,
 	.db_clear_mask		= amd_ntb_db_clear_mask,
-	.peer_db_addr		= amd_ntb_peer_db_addr,
 	.peer_db_set		= amd_ntb_peer_db_set,
 	.spad_count		= amd_ntb_spad_count,
 	.spad_read		= amd_ntb_spad_read,
 	.spad_write		= amd_ntb_spad_write,
-	.peer_spad_addr		= amd_ntb_peer_spad_addr,
 	.peer_spad_read		= amd_ntb_peer_spad_read,
 	.peer_spad_write	= amd_ntb_peer_spad_write,
 };

diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index ec4775f..2ef9d91 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c

@@ -124,6 +124,7 @@
 
 	bool client_ready;
 	bool link_is_up;
+	bool active;
 
 	u8 qp_num;	/* Only 64 QP's are allowed.  0-63 */
 	u64 qp_bit;
@@ -719,6 +720,7 @@
 static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
 {
 	qp->link_is_up = false;
+	qp->active = false;
 
 	qp->tx_index = 0;
 	qp->rx_index = 0;
@@ -827,7 +829,7 @@
 	struct pci_dev *pdev = ndev->pdev;
 	resource_size_t size;
 	u32 val;
-	int rc, i, spad;
+	int rc = 0, i, spad;
 
 	/* send the local info, in the opposite order of the way we read it */
 	for (i = 0; i < nt->mw_count; i++) {
@@ -897,6 +899,13 @@
 out1:
 	for (i = 0; i < nt->mw_count; i++)
 		ntb_free_mw(nt, i);
+
+	/* if there's an actual failure, we should just bail */
+	if (rc < 0) {
+		ntb_link_disable(ndev);
+		return;
+	}
+
 out:
 	if (ntb_link_is_up(ndev, NULL, NULL) == 1)
 		schedule_delayed_work(&nt->link_work,
@@ -926,11 +935,13 @@
 	if (val & BIT(qp->qp_num)) {
 		dev_info(&pdev->dev, "qp %d: Link Up\n", qp->qp_num);
 		qp->link_is_up = true;
+		qp->active = true;
 
 		if (qp->event_handler)
 			qp->event_handler(qp->cb_data, qp->link_is_up);
 
-		tasklet_schedule(&qp->rxc_db_work);
+		if (qp->active)
+			tasklet_schedule(&qp->rxc_db_work);
 	} else if (nt->link_is_up)
 		schedule_delayed_work(&qp->link_work,
 				      msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT));
@@ -1411,7 +1422,8 @@
 
 	if (i == qp->rx_max_entry) {
 		/* there is more work to do */
-		tasklet_schedule(&qp->rxc_db_work);
+		if (qp->active)
+			tasklet_schedule(&qp->rxc_db_work);
 	} else if (ntb_db_read(qp->ndev) & BIT_ULL(qp->qp_num)) {
 		/* the doorbell bit is set: clear it */
 		ntb_db_clear(qp->ndev, BIT_ULL(qp->qp_num));
@@ -1422,7 +1434,8 @@
 		 * ntb_process_rxc and clearing the doorbell bit:
 		 * there might be some more work to do.
 		 */
-		tasklet_schedule(&qp->rxc_db_work);
+		if (qp->active)
+			tasklet_schedule(&qp->rxc_db_work);
 	}
 }
 
@@ -1760,6 +1773,8 @@
 
 	pdev = qp->ndev->pdev;
 
+	qp->active = false;
+
 	if (qp->tx_dma_chan) {
 		struct dma_chan *chan = qp->tx_dma_chan;
 		/* Putting the dma_chan to NULL will force any new traffic to be
@@ -1793,7 +1808,7 @@
 	qp_bit = BIT_ULL(qp->qp_num);
 
 	ntb_db_set_mask(qp->ndev, qp_bit);
-	tasklet_disable(&qp->rxc_db_work);
+	tasklet_kill(&qp->rxc_db_work);
 
 	cancel_delayed_work_sync(&qp->link_work);
 
@@ -1886,7 +1901,8 @@
 
 	ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_pend_q);
 
-	tasklet_schedule(&qp->rxc_db_work);
+	if (qp->active)
+		tasklet_schedule(&qp->rxc_db_work);
 
 	return 0;
 }
@@ -2069,7 +2085,8 @@
 		qp_num = __ffs(db_bits);
 		qp = &nt->qp_vec[qp_num];
 
-		tasklet_schedule(&qp->rxc_db_work);
+		if (qp->active)
+			tasklet_schedule(&qp->rxc_db_work);
 
 		db_bits &= ~BIT_ULL(qp_num);
 	}

diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index c8a37ba..8dfce9c 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c

@@ -178,7 +178,7 @@
 	atomic_dec(&pctx->dma_sync);
 }
 
-static ssize_t perf_copy(struct pthr_ctx *pctx, char *dst,
+static ssize_t perf_copy(struct pthr_ctx *pctx, char __iomem *dst,
 			 char *src, size_t size)
 {
 	struct perf_ctx *perf = pctx->perf;
@@ -189,7 +189,8 @@
 	dma_cookie_t cookie;
 	size_t src_off, dst_off;
 	struct perf_mw *mw = &perf->mw;
-	u64 vbase, dst_vaddr;
+	void __iomem *vbase;
+	void __iomem *dst_vaddr;
 	dma_addr_t dst_phys;
 	int retries = 0;
 
@@ -204,14 +205,14 @@
 	}
 
 	device = chan->device;
-	src_off = (size_t)src & ~PAGE_MASK;
-	dst_off = (size_t)dst & ~PAGE_MASK;
+	src_off = (uintptr_t)src & ~PAGE_MASK;
+	dst_off = (uintptr_t __force)dst & ~PAGE_MASK;
 
 	if (!is_dma_copy_aligned(device, src_off, dst_off, size))
 		return -ENODEV;
 
-	vbase = (u64)(u64 *)mw->vbase;
-	dst_vaddr = (u64)(u64 *)dst;
+	vbase = mw->vbase;
+	dst_vaddr = dst;
 	dst_phys = mw->phys_addr + (dst_vaddr - vbase);
 
 	unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT);
@@ -261,13 +262,13 @@
 	return 0;
 }
 
-static int perf_move_data(struct pthr_ctx *pctx, char *dst, char *src,
+static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src,
 			  u64 buf_size, u64 win_size, u64 total)
 {
 	int chunks, total_chunks, i;
 	int copied_chunks = 0;
 	u64 copied = 0, result;
-	char *tmp = dst;
+	char __iomem *tmp = dst;
 	u64 perf, diff_us;
 	ktime_t kstart, kstop, kdiff;
 
@@ -324,7 +325,7 @@
 	struct perf_ctx *perf = pctx->perf;
 	struct pci_dev *pdev = perf->ntb->pdev;
 	struct perf_mw *mw = &perf->mw;
-	char *dst;
+	char __iomem *dst;
 	u64 win_size, buf_size, total;
 	void *src;
 	int rc, node, i;
@@ -364,7 +365,7 @@
 	if (buf_size > MAX_TEST_SIZE)
 		buf_size = MAX_TEST_SIZE;
 
-	dst = (char *)mw->vbase;
+	dst = (char __iomem *)mw->vbase;
 
 	atomic_inc(&perf->tsync);
 	while (atomic_read(&perf->tsync) != perf->perf_threads)
@@ -424,6 +425,7 @@
 {
 	struct perf_mw *mw = &perf->mw;
 	size_t xlat_size, buf_size;
+	int rc;
 
 	if (!size)
 		return -EINVAL;
@@ -447,6 +449,13 @@
 		mw->buf_size = 0;
 	}
 
+	rc = ntb_mw_set_trans(perf->ntb, 0, mw->dma_addr, mw->xlat_size);
+	if (rc) {
+		dev_err(&perf->ntb->dev, "Unable to set mw0 translation\n");
+		perf_free_mw(perf);
+		return -EIO;
+	}
+
 	return 0;
 }
 
@@ -541,6 +550,8 @@
 		return 0;
 
 	buf = kmalloc(64, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
 	out_offset = snprintf(buf, 64, "%d\n", perf->run);
 	ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
 	kfree(buf);
@@ -548,6 +559,21 @@
 	return ret;
 }
 
+static void threads_cleanup(struct perf_ctx *perf)
+{
+	struct pthr_ctx *pctx;
+	int i;
+
+	perf->run = false;
+	for (i = 0; i < MAX_THREADS; i++) {
+		pctx = &perf->pthr_ctx[i];
+		if (pctx->thread) {
+			kthread_stop(pctx->thread);
+			pctx->thread = NULL;
+		}
+	}
+}
+
 static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
 				 size_t count, loff_t *offp)
 {
@@ -563,17 +589,9 @@
 	if (atomic_read(&perf->tsync) == 0)
 		perf->run = false;
 
-	if (perf->run) {
-		/* lets stop the threads */
-		perf->run = false;
-		for (i = 0; i < MAX_THREADS; i++) {
-			if (perf->pthr_ctx[i].thread) {
-				kthread_stop(perf->pthr_ctx[i].thread);
-				perf->pthr_ctx[i].thread = NULL;
-			} else
-				break;
-		}
-	} else {
+	if (perf->run)
+		threads_cleanup(perf);
+	else {
 		perf->run = true;
 
 		if (perf->perf_threads > MAX_THREADS) {
@@ -604,17 +622,11 @@
 				kthread_create_on_node(ntb_perf_thread,
 						       (void *)pctx,
 						       node, "ntb_perf %d", i);
-			if (pctx->thread)
+			if (IS_ERR(pctx->thread)) {
+				pctx->thread = NULL;
+				goto err;
+			} else
 				wake_up_process(pctx->thread);
-			else {
-				perf->run = false;
-				for (i = 0; i < MAX_THREADS; i++) {
-					if (pctx->thread) {
-						kthread_stop(pctx->thread);
-						pctx->thread = NULL;
-					}
-				}
-			}
 
 			if (perf->run == false)
 				return -ENXIO;
@@ -623,6 +635,10 @@
 	}
 
 	return count;
+
+err:
+	threads_cleanup(perf);
+	return -ENXIO;
 }
 
 static const struct file_operations ntb_perf_debugfs_run = {

diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index c32cbb5..f068b65 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c

@@ -1204,7 +1204,7 @@
 {
 	struct btt *btt = bdev->bd_disk->private_data;
 
-	btt_do_bvec(btt, NULL, page, PAGE_CACHE_SIZE, 0, rw, sector);
+	btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, rw, sector);
 	page_endio(page, rw & WRITE, 0);
 	return 0;
 }

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index ca5721c..12c86fa 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c

@@ -99,7 +99,7 @@
 		if (unlikely(bad_pmem))
 			rc = -EIO;
 		else {
-			memcpy_from_pmem(mem + off, pmem_addr, len);
+			rc = memcpy_from_pmem(mem + off, pmem_addr, len);
 			flush_dcache_page(page);
 		}
 	} else {
@@ -151,7 +151,7 @@
 	struct pmem_device *pmem = bdev->bd_disk->private_data;
 	int rc;
 
-	rc = pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector);
+	rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector);
 	if (rw & WRITE)
 		wmb_pmem();
 
@@ -295,7 +295,7 @@
 
 		if (unlikely(is_bad_pmem(&pmem->bb, offset / 512, sz_align)))
 			return -EIO;
-		memcpy_from_pmem(buf, pmem->virt_addr + offset, size);
+		return memcpy_from_pmem(buf, pmem->virt_addr + offset, size);
 	} else {
 		memcpy_to_pmem(pmem->virt_addr + offset, buf, size);
 		wmb_pmem();

diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 42a01a9..9461dd6 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c

@@ -146,6 +146,14 @@
 	};
 };
 
+struct nvme_nvm_completion {
+	__le64	result;		/* Used by LightNVM to return ppa completions */
+	__le16	sq_head;	/* how much of this queue may be reclaimed */
+	__le16	sq_id;		/* submission queue that generated this entry */
+	__u16	command_id;	/* of the command which completed */
+	__le16	status;		/* did the command fail, and if so, why? */
+};
+
 #define NVME_NVM_LP_MLC_PAIRS 886
 struct nvme_nvm_lp_mlc {
 	__u16			num_pairs;
@@ -507,6 +515,10 @@
 static void nvme_nvm_end_io(struct request *rq, int error)
 {
 	struct nvm_rq *rqd = rq->end_io_data;
+	struct nvme_nvm_completion *cqe = rq->special;
+
+	if (cqe)
+		rqd->ppa_status = le64_to_cpu(cqe->result);
 
 	nvm_end_io(rqd, error);
 
@@ -526,7 +538,8 @@
 	if (IS_ERR(rq))
 		return -ENOMEM;
 
-	cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL);
+	cmd = kzalloc(sizeof(struct nvme_nvm_command) +
+				sizeof(struct nvme_nvm_completion), GFP_KERNEL);
 	if (!cmd) {
 		blk_mq_free_request(rq);
 		return -ENOMEM;
@@ -545,7 +558,7 @@
 
 	rq->cmd = (unsigned char *)cmd;
 	rq->cmd_len = sizeof(struct nvme_nvm_command);
-	rq->special = (void *)0;
+	rq->special = cmd + 1;
 
 	rq->end_io_data = rqd;
 

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index f8db70a..24ccda3 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c

@@ -723,6 +723,13 @@
 	blk_mq_end_request(req, error);
 }
 
+/* We read the CQE phase first to check if the rest of the entry is valid */
+static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
+		u16 phase)
+{
+	return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
+}
+
 static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
 {
 	u16 head, phase;
@@ -730,13 +737,10 @@
 	head = nvmeq->cq_head;
 	phase = nvmeq->cq_phase;
 
-	for (;;) {
+	while (nvme_cqe_valid(nvmeq, head, phase)) {
 		struct nvme_completion cqe = nvmeq->cqes[head];
-		u16 status = le16_to_cpu(cqe.status);
 		struct request *req;
 
-		if ((status & 1) != phase)
-			break;
 		if (++head == nvmeq->q_depth) {
 			head = 0;
 			phase = !phase;
@@ -767,7 +771,7 @@
 		req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
 		if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special)
 			memcpy(req->special, &cqe, sizeof(cqe));
-		blk_mq_complete_request(req, status >> 1);
+		blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
 
 	}
 
@@ -808,18 +812,16 @@
 static irqreturn_t nvme_irq_check(int irq, void *data)
 {
 	struct nvme_queue *nvmeq = data;
-	struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head];
-	if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase)
-		return IRQ_NONE;
-	return IRQ_WAKE_THREAD;
+	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
+		return IRQ_WAKE_THREAD;
+	return IRQ_NONE;
 }
 
 static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 {
 	struct nvme_queue *nvmeq = hctx->driver_data;
 
-	if ((le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) ==
-	    nvmeq->cq_phase) {
+	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
 		spin_lock_irq(&nvmeq->q_lock);
 		__nvme_process_cq(nvmeq, &tag);
 		spin_unlock_irq(&nvmeq->q_lock);

diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index b48ac630..a0e5260 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c

@@ -239,8 +239,8 @@
 {
 	struct inode *root_inode;
 
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = OPROFILEFS_MAGIC;
 	sb->s_op = &s_ops;
 	sb->s_time_gran = 1;

diff --git a/drivers/pcmcia/db1xxx_ss.c b/drivers/pcmcia/db1xxx_ss.c
index 4c2fa05..944674e 100644
--- a/drivers/pcmcia/db1xxx_ss.c
+++ b/drivers/pcmcia/db1xxx_ss.c

@@ -56,6 +56,7 @@
 	int	stschg_irq;	/* card-status-change irq */
 	int	card_irq;	/* card irq */
 	int	eject_irq;	/* db1200/pb1200 have these */
+	int	insert_gpio;	/* db1000 carddetect gpio */
 
 #define BOARD_TYPE_DEFAULT	0	/* most boards */
 #define BOARD_TYPE_DB1200	1	/* IRQs aren't gpios */
@@ -83,7 +84,7 @@
 /* carddetect gpio: low-active */
 static int db1000_card_inserted(struct db1x_pcmcia_sock *sock)
 {
-	return !gpio_get_value(irq_to_gpio(sock->insert_irq));
+	return !gpio_get_value(sock->insert_gpio);
 }
 
 static int db1x_card_inserted(struct db1x_pcmcia_sock *sock)
@@ -457,9 +458,15 @@
 	r = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "card");
 	sock->card_irq = r ? r->start : 0;
 
-	/* insert: irq which triggers on card insertion/ejection */
+	/* insert: irq which triggers on card insertion/ejection
+	 * BIG FAT NOTE: on DB1000/1100/1500/1550 we pass a GPIO here!
+	 */
 	r = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "insert");
 	sock->insert_irq = r ? r->start : -1;
+	if (sock->board_type == BOARD_TYPE_DEFAULT) {
+		sock->insert_gpio = r ? r->start : -1;
+		sock->insert_irq = r ? gpio_to_irq(r->start) : -1;
+	}
 
 	/* stschg: irq which trigger on card status change (optional) */
 	r = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "stschg");

diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c
index 4621051..9cfa544 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx.c

@@ -762,19 +762,18 @@
 
 	if (of_property_read_bool(dev_np, "fsl,input-sel")) {
 		np = of_parse_phandle(dev_np, "fsl,input-sel", 0);
-		if (np) {
-			ipctl->input_sel_base = of_iomap(np, 0);
-			if (IS_ERR(ipctl->input_sel_base)) {
-				of_node_put(np);
-				dev_err(&pdev->dev,
-					"iomuxc input select base address not found\n");
-				return PTR_ERR(ipctl->input_sel_base);
-			}
-		} else {
+		if (!np) {
 			dev_err(&pdev->dev, "iomuxc fsl,input-sel property not found\n");
 			return -EINVAL;
 		}
+
+		ipctl->input_sel_base = of_iomap(np, 0);
 		of_node_put(np);
+		if (!ipctl->input_sel_base) {
+			dev_err(&pdev->dev,
+				"iomuxc input select base address not found\n");
+			return -ENOMEM;
+		}
 	}
 
 	imx_pinctrl_desc.name = dev_name(&pdev->dev);

diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c
index 85536b4..6c2c816f 100644
--- a/drivers/pinctrl/intel/pinctrl-intel.c
+++ b/drivers/pinctrl/intel/pinctrl-intel.c

@@ -665,6 +665,35 @@
 	spin_unlock(&pctrl->lock);
 }
 
+static void intel_gpio_irq_enable(struct irq_data *d)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+	struct intel_pinctrl *pctrl = gpiochip_get_data(gc);
+	const struct intel_community *community;
+	unsigned pin = irqd_to_hwirq(d);
+	unsigned long flags;
+
+	spin_lock_irqsave(&pctrl->lock, flags);
+
+	community = intel_get_community(pctrl, pin);
+	if (community) {
+		unsigned padno = pin_to_padno(community, pin);
+		unsigned gpp_size = community->gpp_size;
+		unsigned gpp_offset = padno % gpp_size;
+		unsigned gpp = padno / gpp_size;
+		u32 value;
+
+		/* Clear interrupt status first to avoid unexpected interrupt */
+		writel(BIT(gpp_offset), community->regs + GPI_IS + gpp * 4);
+
+		value = readl(community->regs + community->ie_offset + gpp * 4);
+		value |= BIT(gpp_offset);
+		writel(value, community->regs + community->ie_offset + gpp * 4);
+	}
+
+	spin_unlock_irqrestore(&pctrl->lock, flags);
+}
+
 static void intel_gpio_irq_mask_unmask(struct irq_data *d, bool mask)
 {
 	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
@@ -741,8 +770,9 @@
 		value |= PADCFG0_RXINV;
 	} else if (type & IRQ_TYPE_EDGE_RISING) {
 		value |= PADCFG0_RXEVCFG_EDGE << PADCFG0_RXEVCFG_SHIFT;
-	} else if (type & IRQ_TYPE_LEVEL_LOW) {
-		value |= PADCFG0_RXINV;
+	} else if (type & IRQ_TYPE_LEVEL_MASK) {
+		if (type & IRQ_TYPE_LEVEL_LOW)
+			value |= PADCFG0_RXINV;
 	} else {
 		value |= PADCFG0_RXEVCFG_DISABLED << PADCFG0_RXEVCFG_SHIFT;
 	}
@@ -852,6 +882,7 @@
 
 static struct irq_chip intel_gpio_irqchip = {
 	.name = "intel-gpio",
+	.irq_enable = intel_gpio_irq_enable,
 	.irq_ack = intel_gpio_irq_ack,
 	.irq_mask = intel_gpio_irq_mask,
 	.irq_unmask = intel_gpio_irq_unmask,

diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c
index 3524061..c8969dd 100644
--- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c
+++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c

@@ -990,7 +990,7 @@
 		int val;
 
 		if (pull)
-			pullidx = data_out ? 1 : 2;
+			pullidx = data_out ? 2 : 1;
 
 		seq_printf(s, " gpio-%-3d (%-20.20s) in  %s %s",
 			   gpio,

diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c
index 856f736..2673cd9 100644
--- a/drivers/pinctrl/pinctrl-pistachio.c
+++ b/drivers/pinctrl/pinctrl-pistachio.c

@@ -469,27 +469,27 @@
 	"mfio83",
 };
 
-static const char * const pistachio_sys_pll_lock_groups[] = {
+static const char * const pistachio_audio_pll_lock_groups[] = {
 	"mfio84",
 };
 
-static const char * const pistachio_wifi_pll_lock_groups[] = {
+static const char * const pistachio_rpu_v_pll_lock_groups[] = {
 	"mfio85",
 };
 
-static const char * const pistachio_bt_pll_lock_groups[] = {
+static const char * const pistachio_rpu_l_pll_lock_groups[] = {
 	"mfio86",
 };
 
-static const char * const pistachio_rpu_v_pll_lock_groups[] = {
+static const char * const pistachio_sys_pll_lock_groups[] = {
 	"mfio87",
 };
 
-static const char * const pistachio_rpu_l_pll_lock_groups[] = {
+static const char * const pistachio_wifi_pll_lock_groups[] = {
 	"mfio88",
 };
 
-static const char * const pistachio_audio_pll_lock_groups[] = {
+static const char * const pistachio_bt_pll_lock_groups[] = {
 	"mfio89",
 };
 
@@ -559,12 +559,12 @@
 	PISTACHIO_FUNCTION_DREQ4,
 	PISTACHIO_FUNCTION_DREQ5,
 	PISTACHIO_FUNCTION_MIPS_PLL_LOCK,
+	PISTACHIO_FUNCTION_AUDIO_PLL_LOCK,
+	PISTACHIO_FUNCTION_RPU_V_PLL_LOCK,
+	PISTACHIO_FUNCTION_RPU_L_PLL_LOCK,
 	PISTACHIO_FUNCTION_SYS_PLL_LOCK,
 	PISTACHIO_FUNCTION_WIFI_PLL_LOCK,
 	PISTACHIO_FUNCTION_BT_PLL_LOCK,
-	PISTACHIO_FUNCTION_RPU_V_PLL_LOCK,
-	PISTACHIO_FUNCTION_RPU_L_PLL_LOCK,
-	PISTACHIO_FUNCTION_AUDIO_PLL_LOCK,
 	PISTACHIO_FUNCTION_DEBUG_RAW_CCA_IND,
 	PISTACHIO_FUNCTION_DEBUG_ED_SEC20_CCA_IND,
 	PISTACHIO_FUNCTION_DEBUG_ED_SEC40_CCA_IND,
@@ -620,12 +620,12 @@
 	FUNCTION(dreq4),
 	FUNCTION(dreq5),
 	FUNCTION(mips_pll_lock),
+	FUNCTION(audio_pll_lock),
+	FUNCTION(rpu_v_pll_lock),
+	FUNCTION(rpu_l_pll_lock),
 	FUNCTION(sys_pll_lock),
 	FUNCTION(wifi_pll_lock),
 	FUNCTION(bt_pll_lock),
-	FUNCTION(rpu_v_pll_lock),
-	FUNCTION(rpu_l_pll_lock),
-	FUNCTION(audio_pll_lock),
 	FUNCTION(debug_raw_cca_ind),
 	FUNCTION(debug_ed_sec20_cca_ind),
 	FUNCTION(debug_ed_sec40_cca_ind),

diff --git a/drivers/pinctrl/pinctrl-xway.c b/drivers/pinctrl/pinctrl-xway.c
index 412c6b7..a13f2b6 100644
--- a/drivers/pinctrl/pinctrl-xway.c
+++ b/drivers/pinctrl/pinctrl-xway.c

@@ -1573,6 +1573,22 @@
 	return 0;
 }
 
+/*
+ * gpiolib gpiod_to_irq callback function.
+ * Returns the mapped IRQ (external interrupt) number for a given GPIO pin.
+ */
+static int xway_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
+{
+	struct ltq_pinmux_info *info = dev_get_drvdata(chip->parent);
+	int i;
+
+	for (i = 0; i < info->num_exin; i++)
+		if (info->exin[i] == offset)
+			return ltq_eiu_get_irq(i);
+
+	return -1;
+}
+
 static struct gpio_chip xway_chip = {
 	.label = "gpio-xway",
 	.direction_input = xway_gpio_dir_in,
@@ -1581,6 +1597,7 @@
 	.set = xway_gpio_set,
 	.request = gpiochip_generic_request,
 	.free = gpiochip_generic_free,
+	.to_irq = xway_gpio_to_irq,
 	.base = -1,
 };
 

diff --git a/drivers/pinctrl/qcom/pinctrl-ipq4019.c b/drivers/pinctrl/qcom/pinctrl-ipq4019.c
index b5d81ce..b68ae42 100644
--- a/drivers/pinctrl/qcom/pinctrl-ipq4019.c
+++ b/drivers/pinctrl/qcom/pinctrl-ipq4019.c

@@ -237,7 +237,7 @@
 		.pins = gpio##id##_pins,		\
 		.npins = (unsigned)ARRAY_SIZE(gpio##id##_pins),	\
 		.funcs = (int[]){			\
-			qca_mux_NA, /* gpio mode */	\
+			qca_mux_gpio, /* gpio mode */	\
 			qca_mux_##f1,			\
 			qca_mux_##f2,			\
 			qca_mux_##f3,			\
@@ -254,11 +254,11 @@
 			qca_mux_##f14			\
 		},				        \
 		.nfuncs = 15,				\
-		.ctl_reg = 0x1000 + 0x10 * id,		\
-		.io_reg = 0x1004 + 0x10 * id,		\
-		.intr_cfg_reg = 0x1008 + 0x10 * id,	\
-		.intr_status_reg = 0x100c + 0x10 * id,	\
-		.intr_target_reg = 0x400 + 0x4 * id,	\
+		.ctl_reg = 0x0 + 0x1000 * id,		\
+		.io_reg = 0x4 + 0x1000 * id,		\
+		.intr_cfg_reg = 0x8 + 0x1000 * id,	\
+		.intr_status_reg = 0xc + 0x1000 * id,	\
+		.intr_target_reg = 0x8 + 0x1000 * id,	\
 		.mux_bit = 2,			\
 		.pull_bit = 0,			\
 		.drv_bit = 6,			\
@@ -414,7 +414,7 @@
 	.nfunctions = ARRAY_SIZE(ipq4019_functions),
 	.groups = ipq4019_groups,
 	.ngroups = ARRAY_SIZE(ipq4019_groups),
-	.ngpios = 70,
+	.ngpios = 100,
 };
 
 static int ipq4019_pinctrl_probe(struct platform_device *pdev)

diff --git a/drivers/pinctrl/sh-pfc/core.c b/drivers/pinctrl/sh-pfc/core.c
index dc3609f..ee0c1f2 100644
--- a/drivers/pinctrl/sh-pfc/core.c
+++ b/drivers/pinctrl/sh-pfc/core.c

@@ -546,7 +546,9 @@
 			return ret;
 	}
 
-	pinctrl_provide_dummies();
+	/* Enable dummy states for those platforms without pinctrl support */
+	if (!of_have_populated_dt())
+		pinctrl_provide_dummies();
 
 	ret = sh_pfc_init_ranges(pfc);
 	if (ret < 0)

diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c
index 00265f0..8b381d6 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c

@@ -485,6 +485,7 @@
 	.pins = sun8i_a33_pins,
 	.npins = ARRAY_SIZE(sun8i_a33_pins),
 	.irq_banks = 2,
+	.irq_bank_base = 1,
 };
 
 static int sun8i_a33_pinctrl_probe(struct platform_device *pdev)

diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
index 12a1dfa..3b017db 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c

@@ -579,7 +579,7 @@
 static int sunxi_pinctrl_irq_set_type(struct irq_data *d, unsigned int type)
 {
 	struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d);
-	u32 reg = sunxi_irq_cfg_reg(d->hwirq);
+	u32 reg = sunxi_irq_cfg_reg(d->hwirq, pctl->desc->irq_bank_base);
 	u8 index = sunxi_irq_cfg_offset(d->hwirq);
 	unsigned long flags;
 	u32 regval;
@@ -626,7 +626,8 @@
 static void sunxi_pinctrl_irq_ack(struct irq_data *d)
 {
 	struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d);
-	u32 status_reg = sunxi_irq_status_reg(d->hwirq);
+	u32 status_reg = sunxi_irq_status_reg(d->hwirq,
+					      pctl->desc->irq_bank_base);
 	u8 status_idx = sunxi_irq_status_offset(d->hwirq);
 
 	/* Clear the IRQ */
@@ -636,7 +637,7 @@
 static void sunxi_pinctrl_irq_mask(struct irq_data *d)
 {
 	struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d);
-	u32 reg = sunxi_irq_ctrl_reg(d->hwirq);
+	u32 reg = sunxi_irq_ctrl_reg(d->hwirq, pctl->desc->irq_bank_base);
 	u8 idx = sunxi_irq_ctrl_offset(d->hwirq);
 	unsigned long flags;
 	u32 val;
@@ -653,7 +654,7 @@
 static void sunxi_pinctrl_irq_unmask(struct irq_data *d)
 {
 	struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d);
-	u32 reg = sunxi_irq_ctrl_reg(d->hwirq);
+	u32 reg = sunxi_irq_ctrl_reg(d->hwirq, pctl->desc->irq_bank_base);
 	u8 idx = sunxi_irq_ctrl_offset(d->hwirq);
 	unsigned long flags;
 	u32 val;
@@ -745,7 +746,7 @@
 	if (bank == pctl->desc->irq_banks)
 		return;
 
-	reg = sunxi_irq_status_reg_from_bank(bank);
+	reg = sunxi_irq_status_reg_from_bank(bank, pctl->desc->irq_bank_base);
 	val = readl(pctl->membase + reg);
 
 	if (val) {
@@ -1024,9 +1025,11 @@
 
 	for (i = 0; i < pctl->desc->irq_banks; i++) {
 		/* Mask and clear all IRQs before registering a handler */
-		writel(0, pctl->membase + sunxi_irq_ctrl_reg_from_bank(i));
+		writel(0, pctl->membase + sunxi_irq_ctrl_reg_from_bank(i,
+						pctl->desc->irq_bank_base));
 		writel(0xffffffff,
-			pctl->membase + sunxi_irq_status_reg_from_bank(i));
+		       pctl->membase + sunxi_irq_status_reg_from_bank(i,
+						pctl->desc->irq_bank_base));
 
 		irq_set_chained_handler_and_data(pctl->irq[i],
 						 sunxi_pinctrl_irq_handler,

diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.h b/drivers/pinctrl/sunxi/pinctrl-sunxi.h
index e248e81..0afce1a 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sunxi.h
+++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.h

@@ -97,6 +97,7 @@
 	int				npins;
 	unsigned			pin_base;
 	unsigned			irq_banks;
+	unsigned			irq_bank_base;
 	bool				irq_read_needs_mux;
 };
 
@@ -233,12 +234,12 @@
 	return pin_num * PULL_PINS_BITS;
 }
 
-static inline u32 sunxi_irq_cfg_reg(u16 irq)
+static inline u32 sunxi_irq_cfg_reg(u16 irq, unsigned bank_base)
 {
 	u8 bank = irq / IRQ_PER_BANK;
 	u8 reg = (irq % IRQ_PER_BANK) / IRQ_CFG_IRQ_PER_REG * 0x04;
 
-	return IRQ_CFG_REG + bank * IRQ_MEM_SIZE + reg;
+	return IRQ_CFG_REG + (bank_base + bank) * IRQ_MEM_SIZE + reg;
 }
 
 static inline u32 sunxi_irq_cfg_offset(u16 irq)
@@ -247,16 +248,16 @@
 	return irq_num * IRQ_CFG_IRQ_BITS;
 }
 
-static inline u32 sunxi_irq_ctrl_reg_from_bank(u8 bank)
+static inline u32 sunxi_irq_ctrl_reg_from_bank(u8 bank, unsigned bank_base)
 {
-	return IRQ_CTRL_REG + bank * IRQ_MEM_SIZE;
+	return IRQ_CTRL_REG + (bank_base + bank) * IRQ_MEM_SIZE;
 }
 
-static inline u32 sunxi_irq_ctrl_reg(u16 irq)
+static inline u32 sunxi_irq_ctrl_reg(u16 irq, unsigned bank_base)
 {
 	u8 bank = irq / IRQ_PER_BANK;
 
-	return sunxi_irq_ctrl_reg_from_bank(bank);
+	return sunxi_irq_ctrl_reg_from_bank(bank, bank_base);
 }
 
 static inline u32 sunxi_irq_ctrl_offset(u16 irq)
@@ -265,16 +266,16 @@
 	return irq_num * IRQ_CTRL_IRQ_BITS;
 }
 
-static inline u32 sunxi_irq_status_reg_from_bank(u8 bank)
+static inline u32 sunxi_irq_status_reg_from_bank(u8 bank, unsigned bank_base)
 {
-	return IRQ_STATUS_REG + bank * IRQ_MEM_SIZE;
+	return IRQ_STATUS_REG + (bank_base + bank) * IRQ_MEM_SIZE;
 }
 
-static inline u32 sunxi_irq_status_reg(u16 irq)
+static inline u32 sunxi_irq_status_reg(u16 irq, unsigned bank_base)
 {
 	u8 bank = irq / IRQ_PER_BANK;
 
-	return sunxi_irq_status_reg_from_bank(bank);
+	return sunxi_irq_status_reg_from_bank(bank, bank_base);
 }
 
 static inline u32 sunxi_irq_status_offset(u16 irq)

diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c
index 9973ceb..07462d7 100644
--- a/drivers/platform/goldfish/goldfish_pipe.c
+++ b/drivers/platform/goldfish/goldfish_pipe.c

@@ -309,8 +309,7 @@
 		 * much memory to the process.
 		 */
 		down_read(&current->mm->mmap_sem);
-		ret = get_user_pages(current, current->mm, address, 1,
-				     !is_write, 0, &page, NULL);
+		ret = get_user_pages(address, 1, !is_write, 0, &page, NULL);
 		up_read(&current->mm->mmap_sem);
 		if (ret < 0)
 			break;

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 69f93a5..ed2004b 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig

@@ -91,10 +91,21 @@
 
 	  If you have an ACPI-compatible ASUS laptop, say Y or M here.
 
+config DELL_SMBIOS
+	tristate "Dell SMBIOS Support"
+	depends on DCDBAS
+	default n
+	---help---
+	This module provides common functions for kernel modules using
+	Dell SMBIOS.
+
+	If you have a Dell laptop, say Y or M here.
+
 config DELL_LAPTOP
 	tristate "Dell Laptop Extras"
 	depends on X86
-	depends on DCDBAS
+	depends on DELL_SMBIOS
+	depends on DMI
 	depends on BACKLIGHT_CLASS_DEVICE
 	depends on ACPI_VIDEO || ACPI_VIDEO = n
 	depends on RFKILL || RFKILL = n
@@ -110,8 +121,10 @@
 config DELL_WMI
 	tristate "Dell WMI extras"
 	depends on ACPI_WMI
+	depends on DMI
 	depends on INPUT
 	depends on ACPI_VIDEO || ACPI_VIDEO = n
+	depends on DELL_SMBIOS
 	select INPUT_SPARSEKMAP
 	---help---
 	  Say Y here if you want to support WMI-based hotkeys on Dell laptops.

diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index 40574e7..448443c 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile

@@ -11,6 +11,7 @@
 obj-$(CONFIG_MSI_LAPTOP)	+= msi-laptop.o
 obj-$(CONFIG_ACPI_CMPC)		+= classmate-laptop.o
 obj-$(CONFIG_COMPAL_LAPTOP)	+= compal-laptop.o
+obj-$(CONFIG_DELL_SMBIOS)	+= dell-smbios.o
 obj-$(CONFIG_DELL_LAPTOP)	+= dell-laptop.o
 obj-$(CONFIG_DELL_WMI)		+= dell-wmi.o
 obj-$(CONFIG_DELL_WMI_AIO)	+= dell-wmi-aio.o

diff --git a/drivers/platform/x86/alienware-wmi.c b/drivers/platform/x86/alienware-wmi.c
index 1e1e594..0056294 100644
--- a/drivers/platform/x86/alienware-wmi.c
+++ b/drivers/platform/x86/alienware-wmi.c

@@ -33,6 +33,9 @@
 #define WMAX_METHOD_BRIGHTNESS		0x3
 #define WMAX_METHOD_ZONE_CONTROL	0x4
 #define WMAX_METHOD_HDMI_CABLE		0x5
+#define WMAX_METHOD_AMPLIFIER_CABLE	0x6
+#define WMAX_METHOD_DEEP_SLEEP_CONTROL	0x0B
+#define WMAX_METHOD_DEEP_SLEEP_STATUS	0x0C
 
 MODULE_AUTHOR("Mario Limonciello <mario_limonciello@dell.com>");
 MODULE_DESCRIPTION("Alienware special feature control");
@@ -60,6 +63,8 @@
 struct quirk_entry {
 	u8 num_zones;
 	u8 hdmi_mux;
+	u8 amplifier;
+	u8 deepslp;
 };
 
 static struct quirk_entry *quirks;
@@ -67,16 +72,43 @@
 static struct quirk_entry quirk_unknown = {
 	.num_zones = 2,
 	.hdmi_mux = 0,
+	.amplifier = 0,
+	.deepslp = 0,
 };
 
-static struct quirk_entry quirk_x51_family = {
+static struct quirk_entry quirk_x51_r1_r2 = {
 	.num_zones = 3,
-	.hdmi_mux = 0.
+	.hdmi_mux = 0,
+	.amplifier = 0,
+	.deepslp = 0,
+};
+
+static struct quirk_entry quirk_x51_r3 = {
+	.num_zones = 4,
+	.hdmi_mux = 0,
+	.amplifier = 1,
+	.deepslp = 0,
 };
 
 static struct quirk_entry quirk_asm100 = {
 	.num_zones = 2,
 	.hdmi_mux = 1,
+	.amplifier = 0,
+	.deepslp = 0,
+};
+
+static struct quirk_entry quirk_asm200 = {
+	.num_zones = 2,
+	.hdmi_mux = 1,
+	.amplifier = 0,
+	.deepslp = 1,
+};
+
+static struct quirk_entry quirk_asm201 = {
+	.num_zones = 2,
+	.hdmi_mux = 1,
+	.amplifier = 1,
+	.deepslp = 1,
 };
 
 static int __init dmi_matched(const struct dmi_system_id *dmi)
@@ -88,12 +120,12 @@
 static const struct dmi_system_id alienware_quirks[] __initconst = {
 	{
 	 .callback = dmi_matched,
-	 .ident = "Alienware X51 R1",
+	 .ident = "Alienware X51 R3",
 	 .matches = {
 		     DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
-		     DMI_MATCH(DMI_PRODUCT_NAME, "Alienware X51"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "Alienware X51 R3"),
 		     },
-	 .driver_data = &quirk_x51_family,
+	 .driver_data = &quirk_x51_r3,
 	 },
 	{
 	 .callback = dmi_matched,
@@ -102,17 +134,44 @@
 		     DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
 		     DMI_MATCH(DMI_PRODUCT_NAME, "Alienware X51 R2"),
 		     },
-	 .driver_data = &quirk_x51_family,
+	 .driver_data = &quirk_x51_r1_r2,
 	 },
 	{
-		.callback = dmi_matched,
-		.ident = "Alienware ASM100",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "ASM100"),
-		},
-		.driver_data = &quirk_asm100,
-	},
+	 .callback = dmi_matched,
+	 .ident = "Alienware X51 R1",
+	 .matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "Alienware X51"),
+		     },
+	 .driver_data = &quirk_x51_r1_r2,
+	 },
+	{
+	 .callback = dmi_matched,
+	 .ident = "Alienware ASM100",
+	 .matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "ASM100"),
+		     },
+	 .driver_data = &quirk_asm100,
+	 },
+	{
+	 .callback = dmi_matched,
+	 .ident = "Alienware ASM200",
+	 .matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "ASM200"),
+		     },
+	 .driver_data = &quirk_asm200,
+	 },
+	{
+	 .callback = dmi_matched,
+	 .ident = "Alienware ASM201",
+	 .matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "ASM201"),
+		     },
+	 .driver_data = &quirk_asm201,
+	 },
 	{}
 };
 
@@ -133,7 +192,7 @@
 	u32 percentage;
 };
 
-struct hdmi_args {
+struct wmax_basic_args {
 	u8 arg;
 };
 
@@ -170,7 +229,7 @@
 
 /*
  * Helpers used for zone control
-*/
+ */
 static int parse_rgb(const char *buf, struct platform_zone *zone)
 {
 	long unsigned int rgb;
@@ -210,7 +269,7 @@
 
 /*
  * Individual RGB zone control
-*/
+ */
 static int alienware_update_led(struct platform_zone *zone)
 {
 	int method_id;
@@ -218,16 +277,16 @@
 	char *guid;
 	struct acpi_buffer input;
 	struct legacy_led_args legacy_args;
-	struct wmax_led_args wmax_args;
+	struct wmax_led_args wmax_basic_args;
 	if (interface == WMAX) {
-		wmax_args.led_mask = 1 << zone->location;
-		wmax_args.colors = zone->colors;
-		wmax_args.state = lighting_control_state;
+		wmax_basic_args.led_mask = 1 << zone->location;
+		wmax_basic_args.colors = zone->colors;
+		wmax_basic_args.state = lighting_control_state;
 		guid = WMAX_CONTROL_GUID;
 		method_id = WMAX_METHOD_ZONE_CONTROL;
 
-		input.length = (acpi_size) sizeof(wmax_args);
-		input.pointer = &wmax_args;
+		input.length = (acpi_size) sizeof(wmax_basic_args);
+		input.pointer = &wmax_basic_args;
 	} else {
 		legacy_args.colors = zone->colors;
 		legacy_args.brightness = global_brightness;
@@ -283,7 +342,7 @@
 
 /*
  * LED Brightness (Global)
-*/
+ */
 static int wmax_brightness(int brightness)
 {
 	acpi_status status;
@@ -327,7 +386,7 @@
 
 /*
  * Lighting control state device attribute (Global)
-*/
+ */
 static ssize_t show_control_state(struct device *dev,
 				  struct device_attribute *attr, char *buf)
 {
@@ -435,11 +494,7 @@
 	kfree(zone_attrs);
 }
 
-/*
-	The HDMI mux sysfs node indicates the status of the HDMI input mux.
-	It can toggle between standard system GPU output and HDMI input.
-*/
-static acpi_status alienware_hdmi_command(struct hdmi_args *in_args,
+static acpi_status alienware_wmax_command(struct wmax_basic_args *in_args,
 					  u32 command, int *out_data)
 {
 	acpi_status status;
@@ -467,16 +522,20 @@
 
 }
 
+/*
+ *	The HDMI mux sysfs node indicates the status of the HDMI input mux.
+ *	It can toggle between standard system GPU output and HDMI input.
+ */
 static ssize_t show_hdmi_cable(struct device *dev,
 			       struct device_attribute *attr, char *buf)
 {
 	acpi_status status;
 	u32 out_data;
-	struct hdmi_args in_args = {
+	struct wmax_basic_args in_args = {
 		.arg = 0,
 	};
 	status =
-	    alienware_hdmi_command(&in_args, WMAX_METHOD_HDMI_CABLE,
+	    alienware_wmax_command(&in_args, WMAX_METHOD_HDMI_CABLE,
 				   (u32 *) &out_data);
 	if (ACPI_SUCCESS(status)) {
 		if (out_data == 0)
@@ -495,11 +554,11 @@
 {
 	acpi_status status;
 	u32 out_data;
-	struct hdmi_args in_args = {
+	struct wmax_basic_args in_args = {
 		.arg = 0,
 	};
 	status =
-	    alienware_hdmi_command(&in_args, WMAX_METHOD_HDMI_STATUS,
+	    alienware_wmax_command(&in_args, WMAX_METHOD_HDMI_STATUS,
 				   (u32 *) &out_data);
 
 	if (ACPI_SUCCESS(status)) {
@@ -519,7 +578,7 @@
 				  const char *buf, size_t count)
 {
 	acpi_status status;
-	struct hdmi_args args;
+	struct wmax_basic_args args;
 	if (strcmp(buf, "gpu\n") == 0)
 		args.arg = 1;
 	else if (strcmp(buf, "input\n") == 0)
@@ -528,7 +587,7 @@
 		args.arg = 3;
 	pr_debug("alienware-wmi: setting hdmi to %d : %s", args.arg, buf);
 
-	status = alienware_hdmi_command(&args, WMAX_METHOD_HDMI_SOURCE, NULL);
+	status = alienware_wmax_command(&args, WMAX_METHOD_HDMI_SOURCE, NULL);
 
 	if (ACPI_FAILURE(status))
 		pr_err("alienware-wmi: HDMI toggle failed: results: %u\n",
@@ -563,11 +622,144 @@
 
 	ret = sysfs_create_group(&dev->dev.kobj, &hdmi_attribute_group);
 	if (ret)
-		goto error_create_hdmi;
-	return 0;
+		remove_hdmi(dev);
+	return ret;
+}
 
-error_create_hdmi:
-	remove_hdmi(dev);
+/*
+ * Alienware GFX amplifier support
+ * - Currently supports reading cable status
+ * - Leaving expansion room to possibly support dock/undock events later
+ */
+static ssize_t show_amplifier_status(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	acpi_status status;
+	u32 out_data;
+	struct wmax_basic_args in_args = {
+		.arg = 0,
+	};
+	status =
+	    alienware_wmax_command(&in_args, WMAX_METHOD_AMPLIFIER_CABLE,
+				   (u32 *) &out_data);
+	if (ACPI_SUCCESS(status)) {
+		if (out_data == 0)
+			return scnprintf(buf, PAGE_SIZE,
+					 "[unconnected] connected unknown\n");
+		else if (out_data == 1)
+			return scnprintf(buf, PAGE_SIZE,
+					 "unconnected [connected] unknown\n");
+	}
+	pr_err("alienware-wmi: unknown amplifier cable status: %d\n", status);
+	return scnprintf(buf, PAGE_SIZE, "unconnected connected [unknown]\n");
+}
+
+static DEVICE_ATTR(status, S_IRUGO, show_amplifier_status, NULL);
+
+static struct attribute *amplifier_attrs[] = {
+	&dev_attr_status.attr,
+	NULL,
+};
+
+static struct attribute_group amplifier_attribute_group = {
+	.name = "amplifier",
+	.attrs = amplifier_attrs,
+};
+
+static void remove_amplifier(struct platform_device *dev)
+{
+	if (quirks->amplifier > 0)
+		sysfs_remove_group(&dev->dev.kobj, &amplifier_attribute_group);
+}
+
+static int create_amplifier(struct platform_device *dev)
+{
+	int ret;
+
+	ret = sysfs_create_group(&dev->dev.kobj, &amplifier_attribute_group);
+	if (ret)
+		remove_amplifier(dev);
+	return ret;
+}
+
+/*
+ * Deep Sleep Control support
+ * - Modifies BIOS setting for deep sleep control allowing extra wakeup events
+ */
+static ssize_t show_deepsleep_status(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	acpi_status status;
+	u32 out_data;
+	struct wmax_basic_args in_args = {
+		.arg = 0,
+	};
+	status = alienware_wmax_command(&in_args, WMAX_METHOD_DEEP_SLEEP_STATUS,
+					(u32 *) &out_data);
+	if (ACPI_SUCCESS(status)) {
+		if (out_data == 0)
+			return scnprintf(buf, PAGE_SIZE,
+					 "[disabled] s5 s5_s4\n");
+		else if (out_data == 1)
+			return scnprintf(buf, PAGE_SIZE,
+					 "disabled [s5] s5_s4\n");
+		else if (out_data == 2)
+			return scnprintf(buf, PAGE_SIZE,
+					 "disabled s5 [s5_s4]\n");
+	}
+	pr_err("alienware-wmi: unknown deep sleep status: %d\n", status);
+	return scnprintf(buf, PAGE_SIZE, "disabled s5 s5_s4 [unknown]\n");
+}
+
+static ssize_t toggle_deepsleep(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	acpi_status status;
+	struct wmax_basic_args args;
+
+	if (strcmp(buf, "disabled\n") == 0)
+		args.arg = 0;
+	else if (strcmp(buf, "s5\n") == 0)
+		args.arg = 1;
+	else
+		args.arg = 2;
+	pr_debug("alienware-wmi: setting deep sleep to %d : %s", args.arg, buf);
+
+	status = alienware_wmax_command(&args, WMAX_METHOD_DEEP_SLEEP_CONTROL,
+					NULL);
+
+	if (ACPI_FAILURE(status))
+		pr_err("alienware-wmi: deep sleep control failed: results: %u\n",
+			status);
+	return count;
+}
+
+static DEVICE_ATTR(deepsleep, S_IRUGO | S_IWUSR, show_deepsleep_status, toggle_deepsleep);
+
+static struct attribute *deepsleep_attrs[] = {
+	&dev_attr_deepsleep.attr,
+	NULL,
+};
+
+static struct attribute_group deepsleep_attribute_group = {
+	.name = "deepsleep",
+	.attrs = deepsleep_attrs,
+};
+
+static void remove_deepsleep(struct platform_device *dev)
+{
+	if (quirks->deepslp > 0)
+		sysfs_remove_group(&dev->dev.kobj, &deepsleep_attribute_group);
+}
+
+static int create_deepsleep(struct platform_device *dev)
+{
+	int ret;
+
+	ret = sysfs_create_group(&dev->dev.kobj, &deepsleep_attribute_group);
+	if (ret)
+		remove_deepsleep(dev);
 	return ret;
 }
 
@@ -606,6 +798,18 @@
 			goto fail_prep_hdmi;
 	}
 
+	if (quirks->amplifier > 0) {
+		ret = create_amplifier(platform_device);
+		if (ret)
+			goto fail_prep_amplifier;
+	}
+
+	if (quirks->deepslp > 0) {
+		ret = create_deepsleep(platform_device);
+		if (ret)
+			goto fail_prep_deepsleep;
+	}
+
 	ret = alienware_zone_init(platform_device);
 	if (ret)
 		goto fail_prep_zones;
@@ -614,6 +818,8 @@
 
 fail_prep_zones:
 	alienware_zone_exit(platform_device);
+fail_prep_deepsleep:
+fail_prep_amplifier:
 fail_prep_hdmi:
 	platform_device_del(platform_device);
 fail_platform_device2:

diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index 131fee2..091ca7a 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c

@@ -272,6 +272,15 @@
 	},
 	{
 		.callback = dmi_matched,
+		.ident = "ASUSTeK COMPUTER INC. X75VD",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X75VD"),
+		},
+		.driver_data = &quirk_asus_wapf4,
+	},
+	{
+		.callback = dmi_matched,
 		.ident = "ASUSTeK COMPUTER INC. 1015E",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),

diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c
index aaeeae8..2c2f02b 100644
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c

@@ -28,12 +28,11 @@
 #include <linux/acpi.h>
 #include <linux/mm.h>
 #include <linux/i8042.h>
-#include <linux/slab.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <acpi/video.h>
-#include "../../firmware/dcdbas.h"
 #include "dell-rbtn.h"
+#include "dell-smbios.h"
 
 #define BRIGHTNESS_TOKEN 0x7d
 #define KBD_LED_OFF_TOKEN 0x01E1
@@ -44,33 +43,6 @@
 #define KBD_LED_AUTO_75_TOKEN 0x02EC
 #define KBD_LED_AUTO_100_TOKEN 0x02F6
 
-/* This structure will be modified by the firmware when we enter
- * system management mode, hence the volatiles */
-
-struct calling_interface_buffer {
-	u16 class;
-	u16 select;
-	volatile u32 input[4];
-	volatile u32 output[4];
-} __packed;
-
-struct calling_interface_token {
-	u16 tokenID;
-	u16 location;
-	union {
-		u16 value;
-		u16 stringlength;
-	};
-};
-
-struct calling_interface_structure {
-	struct dmi_header header;
-	u16 cmdIOAddress;
-	u8 cmdIOCode;
-	u32 supportedCmds;
-	struct calling_interface_token tokens[];
-} __packed;
-
 struct quirk_entry {
 	u8 touchpad_led;
 
@@ -103,11 +75,6 @@
 	.kbd_timeouts = { 0, 5, 15, 60, 5 * 60, 15 * 60, -1 },
 };
 
-static int da_command_address;
-static int da_command_code;
-static int da_num_tokens;
-static struct calling_interface_token *da_tokens;
-
 static struct platform_driver platform_driver = {
 	.driver = {
 		.name = "dell-laptop",
@@ -306,126 +273,6 @@
 	{ }
 };
 
-static struct calling_interface_buffer *buffer;
-static DEFINE_MUTEX(buffer_mutex);
-
-static void clear_buffer(void)
-{
-	memset(buffer, 0, sizeof(struct calling_interface_buffer));
-}
-
-static void get_buffer(void)
-{
-	mutex_lock(&buffer_mutex);
-	clear_buffer();
-}
-
-static void release_buffer(void)
-{
-	mutex_unlock(&buffer_mutex);
-}
-
-static void __init parse_da_table(const struct dmi_header *dm)
-{
-	/* Final token is a terminator, so we don't want to copy it */
-	int tokens = (dm->length-11)/sizeof(struct calling_interface_token)-1;
-	struct calling_interface_token *new_da_tokens;
-	struct calling_interface_structure *table =
-		container_of(dm, struct calling_interface_structure, header);
-
-	/* 4 bytes of table header, plus 7 bytes of Dell header, plus at least
-	   6 bytes of entry */
-
-	if (dm->length < 17)
-		return;
-
-	da_command_address = table->cmdIOAddress;
-	da_command_code = table->cmdIOCode;
-
-	new_da_tokens = krealloc(da_tokens, (da_num_tokens + tokens) *
-				 sizeof(struct calling_interface_token),
-				 GFP_KERNEL);
-
-	if (!new_da_tokens)
-		return;
-	da_tokens = new_da_tokens;
-
-	memcpy(da_tokens+da_num_tokens, table->tokens,
-	       sizeof(struct calling_interface_token) * tokens);
-
-	da_num_tokens += tokens;
-}
-
-static void __init find_tokens(const struct dmi_header *dm, void *dummy)
-{
-	switch (dm->type) {
-	case 0xd4: /* Indexed IO */
-	case 0xd5: /* Protected Area Type 1 */
-	case 0xd6: /* Protected Area Type 2 */
-		break;
-	case 0xda: /* Calling interface */
-		parse_da_table(dm);
-		break;
-	}
-}
-
-static int find_token_id(int tokenid)
-{
-	int i;
-
-	for (i = 0; i < da_num_tokens; i++) {
-		if (da_tokens[i].tokenID == tokenid)
-			return i;
-	}
-
-	return -1;
-}
-
-static int find_token_location(int tokenid)
-{
-	int id;
-
-	id = find_token_id(tokenid);
-	if (id == -1)
-		return -1;
-
-	return da_tokens[id].location;
-}
-
-static struct calling_interface_buffer *
-dell_send_request(struct calling_interface_buffer *buffer, int class,
-		  int select)
-{
-	struct smi_cmd command;
-
-	command.magic = SMI_CMD_MAGIC;
-	command.command_address = da_command_address;
-	command.command_code = da_command_code;
-	command.ebx = virt_to_phys(buffer);
-	command.ecx = 0x42534931;
-
-	buffer->class = class;
-	buffer->select = select;
-
-	dcdbas_smi_request(&command);
-
-	return buffer;
-}
-
-static inline int dell_smi_error(int value)
-{
-	switch (value) {
-	case 0: /* Completed successfully */
-		return 0;
-	case -1: /* Completed with error */
-		return -EIO;
-	case -2: /* Function not supported */
-		return -ENXIO;
-	default: /* Unknown error */
-		return -EINVAL;
-	}
-}
-
 /*
  * Derived from information in smbios-wireless-ctl:
  *
@@ -548,6 +395,7 @@
 
 static int dell_rfkill_set(void *data, bool blocked)
 {
+	struct calling_interface_buffer *buffer;
 	int disable = blocked ? 1 : 0;
 	unsigned long radio = (unsigned long)data;
 	int hwswitch_bit = (unsigned long)data - 1;
@@ -555,19 +403,19 @@
 	int status;
 	int ret;
 
-	get_buffer();
+	buffer = dell_smbios_get_buffer();
 
-	dell_send_request(buffer, 17, 11);
+	dell_smbios_send_request(17, 11);
 	ret = buffer->output[0];
 	status = buffer->output[1];
 
 	if (ret != 0)
 		goto out;
 
-	clear_buffer();
+	dell_smbios_clear_buffer();
 
 	buffer->input[0] = 0x2;
-	dell_send_request(buffer, 17, 11);
+	dell_smbios_send_request(17, 11);
 	ret = buffer->output[0];
 	hwswitch = buffer->output[1];
 
@@ -577,27 +425,28 @@
 	    (status & BIT(0)) && !(status & BIT(16)))
 		disable = 1;
 
-	clear_buffer();
+	dell_smbios_clear_buffer();
 
 	buffer->input[0] = (1 | (radio<<8) | (disable << 16));
-	dell_send_request(buffer, 17, 11);
+	dell_smbios_send_request(17, 11);
 	ret = buffer->output[0];
 
  out:
-	release_buffer();
-	return dell_smi_error(ret);
+	dell_smbios_release_buffer();
+	return dell_smbios_error(ret);
 }
 
 /* Must be called with the buffer held */
 static void dell_rfkill_update_sw_state(struct rfkill *rfkill, int radio,
-					int status)
+					int status,
+					struct calling_interface_buffer *buffer)
 {
 	if (status & BIT(0)) {
 		/* Has hw-switch, sync sw_state to BIOS */
 		int block = rfkill_blocked(rfkill);
-		clear_buffer();
+		dell_smbios_clear_buffer();
 		buffer->input[0] = (1 | (radio << 8) | (block << 16));
-		dell_send_request(buffer, 17, 11);
+		dell_smbios_send_request(17, 11);
 	} else {
 		/* No hw-switch, sync BIOS state to sw_state */
 		rfkill_set_sw_state(rfkill, !!(status & BIT(radio + 16)));
@@ -613,30 +462,31 @@
 
 static void dell_rfkill_query(struct rfkill *rfkill, void *data)
 {
+	struct calling_interface_buffer *buffer;
 	int radio = ((unsigned long)data & 0xF);
 	int hwswitch;
 	int status;
 	int ret;
 
-	get_buffer();
+	buffer = dell_smbios_get_buffer();
 
-	dell_send_request(buffer, 17, 11);
+	dell_smbios_send_request(17, 11);
 	ret = buffer->output[0];
 	status = buffer->output[1];
 
 	if (ret != 0 || !(status & BIT(0))) {
-		release_buffer();
+		dell_smbios_release_buffer();
 		return;
 	}
 
-	clear_buffer();
+	dell_smbios_clear_buffer();
 
 	buffer->input[0] = 0x2;
-	dell_send_request(buffer, 17, 11);
+	dell_smbios_send_request(17, 11);
 	ret = buffer->output[0];
 	hwswitch = buffer->output[1];
 
-	release_buffer();
+	dell_smbios_release_buffer();
 
 	if (ret != 0)
 		return;
@@ -653,25 +503,26 @@
 
 static int dell_debugfs_show(struct seq_file *s, void *data)
 {
+	struct calling_interface_buffer *buffer;
 	int hwswitch_state;
 	int hwswitch_ret;
 	int status;
 	int ret;
 
-	get_buffer();
+	buffer = dell_smbios_get_buffer();
 
-	dell_send_request(buffer, 17, 11);
+	dell_smbios_send_request(17, 11);
 	ret = buffer->output[0];
 	status = buffer->output[1];
 
-	clear_buffer();
+	dell_smbios_clear_buffer();
 
 	buffer->input[0] = 0x2;
-	dell_send_request(buffer, 17, 11);
+	dell_smbios_send_request(17, 11);
 	hwswitch_ret = buffer->output[0];
 	hwswitch_state = buffer->output[1];
 
-	release_buffer();
+	dell_smbios_release_buffer();
 
 	seq_printf(s, "return:\t%d\n", ret);
 	seq_printf(s, "status:\t0x%X\n", status);
@@ -752,23 +603,24 @@
 
 static void dell_update_rfkill(struct work_struct *ignored)
 {
+	struct calling_interface_buffer *buffer;
 	int hwswitch = 0;
 	int status;
 	int ret;
 
-	get_buffer();
+	buffer = dell_smbios_get_buffer();
 
-	dell_send_request(buffer, 17, 11);
+	dell_smbios_send_request(17, 11);
 	ret = buffer->output[0];
 	status = buffer->output[1];
 
 	if (ret != 0)
 		goto out;
 
-	clear_buffer();
+	dell_smbios_clear_buffer();
 
 	buffer->input[0] = 0x2;
-	dell_send_request(buffer, 17, 11);
+	dell_smbios_send_request(17, 11);
 	ret = buffer->output[0];
 
 	if (ret == 0 && (status & BIT(0)))
@@ -776,20 +628,21 @@
 
 	if (wifi_rfkill) {
 		dell_rfkill_update_hw_state(wifi_rfkill, 1, status, hwswitch);
-		dell_rfkill_update_sw_state(wifi_rfkill, 1, status);
+		dell_rfkill_update_sw_state(wifi_rfkill, 1, status, buffer);
 	}
 	if (bluetooth_rfkill) {
 		dell_rfkill_update_hw_state(bluetooth_rfkill, 2, status,
 					    hwswitch);
-		dell_rfkill_update_sw_state(bluetooth_rfkill, 2, status);
+		dell_rfkill_update_sw_state(bluetooth_rfkill, 2, status,
+					    buffer);
 	}
 	if (wwan_rfkill) {
 		dell_rfkill_update_hw_state(wwan_rfkill, 3, status, hwswitch);
-		dell_rfkill_update_sw_state(wwan_rfkill, 3, status);
+		dell_rfkill_update_sw_state(wwan_rfkill, 3, status, buffer);
 	}
 
  out:
-	release_buffer();
+	dell_smbios_release_buffer();
 }
 static DECLARE_DELAYED_WORK(dell_rfkill_work, dell_update_rfkill);
 
@@ -833,6 +686,7 @@
 
 static int __init dell_setup_rfkill(void)
 {
+	struct calling_interface_buffer *buffer;
 	int status, ret, whitelisted;
 	const char *product;
 
@@ -848,11 +702,11 @@
 	if (!force_rfkill && !whitelisted)
 		return 0;
 
-	get_buffer();
-	dell_send_request(buffer, 17, 11);
+	buffer = dell_smbios_get_buffer();
+	dell_smbios_send_request(17, 11);
 	ret = buffer->output[0];
 	status = buffer->output[1];
-	release_buffer();
+	dell_smbios_release_buffer();
 
 	/* dell wireless info smbios call is not supported */
 	if (ret != 0)
@@ -1005,51 +859,53 @@
 
 static int dell_send_intensity(struct backlight_device *bd)
 {
-	int token;
+	struct calling_interface_buffer *buffer;
+	struct calling_interface_token *token;
 	int ret;
 
-	token = find_token_location(BRIGHTNESS_TOKEN);
-	if (token == -1)
+	token = dell_smbios_find_token(BRIGHTNESS_TOKEN);
+	if (!token)
 		return -ENODEV;
 
-	get_buffer();
-	buffer->input[0] = token;
+	buffer = dell_smbios_get_buffer();
+	buffer->input[0] = token->location;
 	buffer->input[1] = bd->props.brightness;
 
 	if (power_supply_is_system_supplied() > 0)
-		dell_send_request(buffer, 1, 2);
+		dell_smbios_send_request(1, 2);
 	else
-		dell_send_request(buffer, 1, 1);
+		dell_smbios_send_request(1, 1);
 
-	ret = dell_smi_error(buffer->output[0]);
+	ret = dell_smbios_error(buffer->output[0]);
 
-	release_buffer();
+	dell_smbios_release_buffer();
 	return ret;
 }
 
 static int dell_get_intensity(struct backlight_device *bd)
 {
-	int token;
+	struct calling_interface_buffer *buffer;
+	struct calling_interface_token *token;
 	int ret;
 
-	token = find_token_location(BRIGHTNESS_TOKEN);
-	if (token == -1)
+	token = dell_smbios_find_token(BRIGHTNESS_TOKEN);
+	if (!token)
 		return -ENODEV;
 
-	get_buffer();
-	buffer->input[0] = token;
+	buffer = dell_smbios_get_buffer();
+	buffer->input[0] = token->location;
 
 	if (power_supply_is_system_supplied() > 0)
-		dell_send_request(buffer, 0, 2);
+		dell_smbios_send_request(0, 2);
 	else
-		dell_send_request(buffer, 0, 1);
+		dell_smbios_send_request(0, 1);
 
 	if (buffer->output[0])
-		ret = dell_smi_error(buffer->output[0]);
+		ret = dell_smbios_error(buffer->output[0]);
 	else
 		ret = buffer->output[1];
 
-	release_buffer();
+	dell_smbios_release_buffer();
 	return ret;
 }
 
@@ -1293,17 +1149,18 @@
 
 static int kbd_get_info(struct kbd_info *info)
 {
+	struct calling_interface_buffer *buffer;
 	u8 units;
 	int ret;
 
-	get_buffer();
+	buffer = dell_smbios_get_buffer();
 
 	buffer->input[0] = 0x0;
-	dell_send_request(buffer, 4, 11);
+	dell_smbios_send_request(4, 11);
 	ret = buffer->output[0];
 
 	if (ret) {
-		ret = dell_smi_error(ret);
+		ret = dell_smbios_error(ret);
 		goto out;
 	}
 
@@ -1323,7 +1180,7 @@
 		info->days = (buffer->output[3] >> 24) & 0xFF;
 
  out:
-	release_buffer();
+	dell_smbios_release_buffer();
 	return ret;
 }
 
@@ -1382,16 +1239,17 @@
 
 static int kbd_get_state(struct kbd_state *state)
 {
+	struct calling_interface_buffer *buffer;
 	int ret;
 
-	get_buffer();
+	buffer = dell_smbios_get_buffer();
 
 	buffer->input[0] = 0x1;
-	dell_send_request(buffer, 4, 11);
+	dell_smbios_send_request(4, 11);
 	ret = buffer->output[0];
 
 	if (ret) {
-		ret = dell_smi_error(ret);
+		ret = dell_smbios_error(ret);
 		goto out;
 	}
 
@@ -1407,15 +1265,16 @@
 	state->level = (buffer->output[2] >> 16) & 0xFF;
 
  out:
-	release_buffer();
+	dell_smbios_release_buffer();
 	return ret;
 }
 
 static int kbd_set_state(struct kbd_state *state)
 {
+	struct calling_interface_buffer *buffer;
 	int ret;
 
-	get_buffer();
+	buffer = dell_smbios_get_buffer();
 	buffer->input[0] = 0x2;
 	buffer->input[1] = BIT(state->mode_bit) & 0xFFFF;
 	buffer->input[1] |= (state->triggers & 0xFF) << 16;
@@ -1423,11 +1282,11 @@
 	buffer->input[1] |= (state->timeout_unit & 0x3) << 30;
 	buffer->input[2] = state->als_setting & 0xFF;
 	buffer->input[2] |= (state->level & 0xFF) << 16;
-	dell_send_request(buffer, 4, 11);
+	dell_smbios_send_request(4, 11);
 	ret = buffer->output[0];
-	release_buffer();
+	dell_smbios_release_buffer();
 
-	return dell_smi_error(ret);
+	return dell_smbios_error(ret);
 }
 
 static int kbd_set_state_safe(struct kbd_state *state, struct kbd_state *old)
@@ -1452,50 +1311,52 @@
 
 static int kbd_set_token_bit(u8 bit)
 {
-	int id;
+	struct calling_interface_buffer *buffer;
+	struct calling_interface_token *token;
 	int ret;
 
 	if (bit >= ARRAY_SIZE(kbd_tokens))
 		return -EINVAL;
 
-	id = find_token_id(kbd_tokens[bit]);
-	if (id == -1)
+	token = dell_smbios_find_token(kbd_tokens[bit]);
+	if (!token)
 		return -EINVAL;
 
-	get_buffer();
-	buffer->input[0] = da_tokens[id].location;
-	buffer->input[1] = da_tokens[id].value;
-	dell_send_request(buffer, 1, 0);
+	buffer = dell_smbios_get_buffer();
+	buffer->input[0] = token->location;
+	buffer->input[1] = token->value;
+	dell_smbios_send_request(1, 0);
 	ret = buffer->output[0];
-	release_buffer();
+	dell_smbios_release_buffer();
 
-	return dell_smi_error(ret);
+	return dell_smbios_error(ret);
 }
 
 static int kbd_get_token_bit(u8 bit)
 {
-	int id;
+	struct calling_interface_buffer *buffer;
+	struct calling_interface_token *token;
 	int ret;
 	int val;
 
 	if (bit >= ARRAY_SIZE(kbd_tokens))
 		return -EINVAL;
 
-	id = find_token_id(kbd_tokens[bit]);
-	if (id == -1)
+	token = dell_smbios_find_token(kbd_tokens[bit]);
+	if (!token)
 		return -EINVAL;
 
-	get_buffer();
-	buffer->input[0] = da_tokens[id].location;
-	dell_send_request(buffer, 0, 0);
+	buffer = dell_smbios_get_buffer();
+	buffer->input[0] = token->location;
+	dell_smbios_send_request(0, 0);
 	ret = buffer->output[0];
 	val = buffer->output[1];
-	release_buffer();
+	dell_smbios_release_buffer();
 
 	if (ret)
-		return dell_smi_error(ret);
+		return dell_smbios_error(ret);
 
-	return (val == da_tokens[id].value);
+	return (val == token->value);
 }
 
 static int kbd_get_first_active_token_bit(void)
@@ -1597,7 +1458,7 @@
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(kbd_tokens); ++i)
-		if (find_token_id(kbd_tokens[i]) != -1)
+		if (dell_smbios_find_token(kbd_tokens[i]))
 			kbd_token_bits |= BIT(i);
 }
 
@@ -2111,8 +1972,9 @@
 
 static int __init dell_init(void)
 {
+	struct calling_interface_buffer *buffer;
+	struct calling_interface_token *token;
 	int max_intensity = 0;
-	int token;
 	int ret;
 
 	if (!dmi_check_system(dell_device_table))
@@ -2122,13 +1984,6 @@
 	/* find if this machine support other functions */
 	dmi_check_system(dell_quirks);
 
-	dmi_walk(find_tokens, NULL);
-
-	if (!da_tokens)  {
-		pr_info("Unable to find dmi tokens\n");
-		return -ENODEV;
-	}
-
 	ret = platform_driver_register(&platform_driver);
 	if (ret)
 		goto fail_platform_driver;
@@ -2141,16 +1996,6 @@
 	if (ret)
 		goto fail_platform_device2;
 
-	/*
-	 * Allocate buffer below 4GB for SMI data--only 32-bit physical addr
-	 * is passed to SMI handler.
-	 */
-	buffer = (void *)__get_free_page(GFP_KERNEL | GFP_DMA32);
-	if (!buffer) {
-		ret = -ENOMEM;
-		goto fail_buffer;
-	}
-
 	ret = dell_setup_rfkill();
 
 	if (ret) {
@@ -2171,14 +2016,14 @@
 	if (acpi_video_get_backlight_type() != acpi_backlight_vendor)
 		return 0;
 
-	token = find_token_location(BRIGHTNESS_TOKEN);
-	if (token != -1) {
-		get_buffer();
-		buffer->input[0] = token;
-		dell_send_request(buffer, 0, 2);
+	token = dell_smbios_find_token(BRIGHTNESS_TOKEN);
+	if (token) {
+		buffer = dell_smbios_get_buffer();
+		buffer->input[0] = token->location;
+		dell_smbios_send_request(0, 2);
 		if (buffer->output[0] == 0)
 			max_intensity = buffer->output[3];
-		release_buffer();
+		dell_smbios_release_buffer();
 	}
 
 	if (max_intensity) {
@@ -2208,15 +2053,12 @@
 fail_backlight:
 	dell_cleanup_rfkill();
 fail_rfkill:
-	free_page((unsigned long)buffer);
-fail_buffer:
 	platform_device_del(platform_device);
 fail_platform_device2:
 	platform_device_put(platform_device);
 fail_platform_device1:
 	platform_driver_unregister(&platform_driver);
 fail_platform_driver:
-	kfree(da_tokens);
 	return ret;
 }
 
@@ -2232,8 +2074,6 @@
 		platform_device_unregister(platform_device);
 		platform_driver_unregister(&platform_driver);
 	}
-	kfree(da_tokens);
-	free_page((unsigned long)buffer);
 }
 
 /* dell-rbtn.c driver export functions which will not work correctly (and could

diff --git a/drivers/platform/x86/dell-rbtn.c b/drivers/platform/x86/dell-rbtn.c
index cd410e3..b51a200 100644
--- a/drivers/platform/x86/dell-rbtn.c
+++ b/drivers/platform/x86/dell-rbtn.c

@@ -217,6 +217,21 @@
 static const struct acpi_device_id rbtn_ids[] = {
 	{ "DELRBTN", 0 },
 	{ "DELLABCE", 0 },
+
+	/*
+	 * This driver can also handle the "DELLABC6" device that
+	 * appears on the XPS 13 9350, but that device is disabled
+	 * by the DSDT unless booted with acpi_osi="!Windows 2012"
+	 * acpi_osi="!Windows 2013".  Even if we boot that and bind
+	 * the driver, we seem to have inconsistent behavior in
+	 * which NetworkManager can get out of sync with the rfkill
+	 * state.
+	 *
+	 * On the XPS 13 9350 and similar laptops, we're not supposed to
+	 * use DELLABC6 at all.  Instead, we handle the rfkill button
+	 * via the intel-hid driver.
+	 */
+
 	{ "", 0 },
 };
 

diff --git a/drivers/platform/x86/dell-smbios.c b/drivers/platform/x86/dell-smbios.c
new file mode 100644
index 0000000..d2412ab
--- /dev/null
+++ b/drivers/platform/x86/dell-smbios.c

@@ -0,0 +1,193 @@
+/*
+ *  Common functions for kernel modules using Dell SMBIOS
+ *
+ *  Copyright (c) Red Hat <mjg@redhat.com>
+ *  Copyright (c) 2014 Gabriele Mazzotta <gabriele.mzt@gmail.com>
+ *  Copyright (c) 2014 Pali Rohár <pali.rohar@gmail.com>
+ *
+ *  Based on documentation in the libsmbios package:
+ *  Copyright (C) 2005-2014 Dell Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include "../../firmware/dcdbas.h"
+#include "dell-smbios.h"
+
+struct calling_interface_structure {
+	struct dmi_header header;
+	u16 cmdIOAddress;
+	u8 cmdIOCode;
+	u32 supportedCmds;
+	struct calling_interface_token tokens[];
+} __packed;
+
+static struct calling_interface_buffer *buffer;
+static DEFINE_MUTEX(buffer_mutex);
+
+static int da_command_address;
+static int da_command_code;
+static int da_num_tokens;
+static struct calling_interface_token *da_tokens;
+
+int dell_smbios_error(int value)
+{
+	switch (value) {
+	case 0: /* Completed successfully */
+		return 0;
+	case -1: /* Completed with error */
+		return -EIO;
+	case -2: /* Function not supported */
+		return -ENXIO;
+	default: /* Unknown error */
+		return -EINVAL;
+	}
+}
+EXPORT_SYMBOL_GPL(dell_smbios_error);
+
+struct calling_interface_buffer *dell_smbios_get_buffer(void)
+{
+	mutex_lock(&buffer_mutex);
+	dell_smbios_clear_buffer();
+	return buffer;
+}
+EXPORT_SYMBOL_GPL(dell_smbios_get_buffer);
+
+void dell_smbios_clear_buffer(void)
+{
+	memset(buffer, 0, sizeof(struct calling_interface_buffer));
+}
+EXPORT_SYMBOL_GPL(dell_smbios_clear_buffer);
+
+void dell_smbios_release_buffer(void)
+{
+	mutex_unlock(&buffer_mutex);
+}
+EXPORT_SYMBOL_GPL(dell_smbios_release_buffer);
+
+void dell_smbios_send_request(int class, int select)
+{
+	struct smi_cmd command;
+
+	command.magic = SMI_CMD_MAGIC;
+	command.command_address = da_command_address;
+	command.command_code = da_command_code;
+	command.ebx = virt_to_phys(buffer);
+	command.ecx = 0x42534931;
+
+	buffer->class = class;
+	buffer->select = select;
+
+	dcdbas_smi_request(&command);
+}
+EXPORT_SYMBOL_GPL(dell_smbios_send_request);
+
+struct calling_interface_token *dell_smbios_find_token(int tokenid)
+{
+	int i;
+
+	for (i = 0; i < da_num_tokens; i++) {
+		if (da_tokens[i].tokenID == tokenid)
+			return &da_tokens[i];
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(dell_smbios_find_token);
+
+static void __init parse_da_table(const struct dmi_header *dm)
+{
+	/* Final token is a terminator, so we don't want to copy it */
+	int tokens = (dm->length-11)/sizeof(struct calling_interface_token)-1;
+	struct calling_interface_token *new_da_tokens;
+	struct calling_interface_structure *table =
+		container_of(dm, struct calling_interface_structure, header);
+
+	/* 4 bytes of table header, plus 7 bytes of Dell header, plus at least
+	   6 bytes of entry */
+
+	if (dm->length < 17)
+		return;
+
+	da_command_address = table->cmdIOAddress;
+	da_command_code = table->cmdIOCode;
+
+	new_da_tokens = krealloc(da_tokens, (da_num_tokens + tokens) *
+				 sizeof(struct calling_interface_token),
+				 GFP_KERNEL);
+
+	if (!new_da_tokens)
+		return;
+	da_tokens = new_da_tokens;
+
+	memcpy(da_tokens+da_num_tokens, table->tokens,
+	       sizeof(struct calling_interface_token) * tokens);
+
+	da_num_tokens += tokens;
+}
+
+static void __init find_tokens(const struct dmi_header *dm, void *dummy)
+{
+	switch (dm->type) {
+	case 0xd4: /* Indexed IO */
+	case 0xd5: /* Protected Area Type 1 */
+	case 0xd6: /* Protected Area Type 2 */
+		break;
+	case 0xda: /* Calling interface */
+		parse_da_table(dm);
+		break;
+	}
+}
+
+static int __init dell_smbios_init(void)
+{
+	int ret;
+
+	dmi_walk(find_tokens, NULL);
+
+	if (!da_tokens)  {
+		pr_info("Unable to find dmi tokens\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * Allocate buffer below 4GB for SMI data--only 32-bit physical addr
+	 * is passed to SMI handler.
+	 */
+	buffer = (void *)__get_free_page(GFP_KERNEL | GFP_DMA32);
+	if (!buffer) {
+		ret = -ENOMEM;
+		goto fail_buffer;
+	}
+
+	return 0;
+
+fail_buffer:
+	kfree(da_tokens);
+	return ret;
+}
+
+static void __exit dell_smbios_exit(void)
+{
+	kfree(da_tokens);
+	free_page((unsigned long)buffer);
+}
+
+subsys_initcall(dell_smbios_init);
+module_exit(dell_smbios_exit);
+
+MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>");
+MODULE_AUTHOR("Gabriele Mazzotta <gabriele.mzt@gmail.com>");
+MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>");
+MODULE_DESCRIPTION("Common functions for kernel modules using Dell SMBIOS");
+MODULE_LICENSE("GPL");

diff --git a/drivers/platform/x86/dell-smbios.h b/drivers/platform/x86/dell-smbios.h
new file mode 100644
index 0000000..ec7d40a
--- /dev/null
+++ b/drivers/platform/x86/dell-smbios.h

@@ -0,0 +1,46 @@
+/*
+ *  Common functions for kernel modules using Dell SMBIOS
+ *
+ *  Copyright (c) Red Hat <mjg@redhat.com>
+ *  Copyright (c) 2014 Gabriele Mazzotta <gabriele.mzt@gmail.com>
+ *  Copyright (c) 2014 Pali Rohár <pali.rohar@gmail.com>
+ *
+ *  Based on documentation in the libsmbios package:
+ *  Copyright (C) 2005-2014 Dell Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#ifndef _DELL_SMBIOS_H_
+#define _DELL_SMBIOS_H_
+
+/* This structure will be modified by the firmware when we enter
+ * system management mode, hence the volatiles */
+
+struct calling_interface_buffer {
+	u16 class;
+	u16 select;
+	volatile u32 input[4];
+	volatile u32 output[4];
+} __packed;
+
+struct calling_interface_token {
+	u16 tokenID;
+	u16 location;
+	union {
+		u16 value;
+		u16 stringlength;
+	};
+};
+
+int dell_smbios_error(int value);
+
+struct calling_interface_buffer *dell_smbios_get_buffer(void);
+void dell_smbios_clear_buffer(void);
+void dell_smbios_release_buffer(void);
+void dell_smbios_send_request(int class, int select);
+
+struct calling_interface_token *dell_smbios_find_token(int tokenid);
+#endif

diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c
index 368e193..15c6f11 100644
--- a/drivers/platform/x86/dell-wmi.c
+++ b/drivers/platform/x86/dell-wmi.c

@@ -37,6 +37,7 @@
 #include <linux/string.h>
 #include <linux/dmi.h>
 #include <acpi/video.h>
+#include "dell-smbios.h"
 
 MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>");
 MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>");
@@ -47,10 +48,37 @@
 #define DELL_DESCRIPTOR_GUID "8D9DDCBC-A997-11DA-B012-B622A1EF5492"
 
 static u32 dell_wmi_interface_version;
+static bool wmi_requires_smbios_request;
 
 MODULE_ALIAS("wmi:"DELL_EVENT_GUID);
 MODULE_ALIAS("wmi:"DELL_DESCRIPTOR_GUID);
 
+static int __init dmi_matched(const struct dmi_system_id *dmi)
+{
+	wmi_requires_smbios_request = 1;
+	return 1;
+}
+
+static const struct dmi_system_id dell_wmi_smbios_list[] __initconst = {
+	{
+		.callback = dmi_matched,
+		.ident = "Dell Inspiron M5110",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron M5110"),
+		},
+	},
+	{
+		.callback = dmi_matched,
+		.ident = "Dell Vostro V131",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Vostro V131"),
+		},
+	},
+	{ }
+};
+
 /*
  * Certain keys are flagged as KE_IGNORE. All of these are either
  * notifications (rather than requests for change) or are also sent
@@ -90,8 +118,11 @@
 
 	{ KE_IGNORE, 0xe020, { KEY_MUTE } },
 
-	/* Shortcut and audio panel keys */
-	{ KE_IGNORE, 0xe025, { KEY_RESERVED } },
+	/* Dell Instant Launch key */
+	{ KE_KEY, 0xe025, { KEY_PROG4 } },
+	{ KE_KEY, 0xe029, { KEY_PROG4 } },
+
+	/* Audio panel key */
 	{ KE_IGNORE, 0xe026, { KEY_RESERVED } },
 
 	{ KE_IGNORE, 0xe02e, { KEY_VOLUMEDOWN } },
@@ -120,7 +151,10 @@
 
 };
 
-static const struct dell_bios_hotkey_table *dell_bios_hotkey_table;
+struct dell_dmi_results {
+	int err;
+	struct key_entry *keymap;
+};
 
 /* Uninitialized entries here are KEY_RESERVED == 0. */
 static const u16 bios_to_linux_keycode[256] __initconst = {
@@ -166,6 +200,30 @@
 	[255]	= KEY_PROG3,
 };
 
+/*
+ * These are applied if the 0xB2 DMI hotkey table is present and doesn't
+ * override them.
+ */
+static const struct key_entry dell_wmi_extra_keymap[] __initconst = {
+	/* Fn-lock */
+	{ KE_IGNORE, 0x151, { KEY_RESERVED } },
+
+	/* Change keyboard illumination */
+	{ KE_IGNORE, 0x152, { KEY_KBDILLUMTOGGLE } },
+
+	/*
+	 * Radio disable (notify only -- there is no model for which the
+	 * WMI event is supposed to trigger an action).
+	 */
+	{ KE_IGNORE, 0x153, { KEY_RFKILL } },
+
+	/* RGB keyboard backlight control */
+	{ KE_IGNORE, 0x154, { KEY_RESERVED } },
+
+	/* Stealth mode toggle */
+	{ KE_IGNORE, 0x155, { KEY_RESERVED } },
+};
+
 static struct input_dev *dell_wmi_input_dev;
 
 static void dell_wmi_process_key(int reported_key)
@@ -188,6 +246,9 @@
 	    acpi_video_handles_brightness_key_presses())
 		return;
 
+	if (reported_key == 0xe025 && !wmi_requires_smbios_request)
+		return;
+
 	sparse_keymap_report_entry(dell_wmi_input_dev, key, 1, true);
 }
 
@@ -337,20 +398,60 @@
 	kfree(obj);
 }
 
-static const struct key_entry * __init dell_wmi_prepare_new_keymap(void)
+static bool have_scancode(u32 scancode, const struct key_entry *keymap, int len)
 {
-	int hotkey_num = (dell_bios_hotkey_table->header.length - 4) /
-				sizeof(struct dell_bios_keymap_entry);
-	struct key_entry *keymap;
 	int i;
 
-	keymap = kcalloc(hotkey_num + 1, sizeof(struct key_entry), GFP_KERNEL);
-	if (!keymap)
-		return NULL;
+	for (i = 0; i < len; i++)
+		if (keymap[i].code == scancode)
+			return true;
+
+	return false;
+}
+
+static void __init handle_dmi_entry(const struct dmi_header *dm,
+
+				    void *opaque)
+
+{
+	struct dell_dmi_results *results = opaque;
+	struct dell_bios_hotkey_table *table;
+	int hotkey_num, i, pos = 0;
+	struct key_entry *keymap;
+	int num_bios_keys;
+
+	if (results->err || results->keymap)
+		return;		/* We already found the hotkey table. */
+
+	if (dm->type != 0xb2)
+		return;
+
+	table = container_of(dm, struct dell_bios_hotkey_table, header);
+
+	hotkey_num = (table->header.length -
+		      sizeof(struct dell_bios_hotkey_table)) /
+				sizeof(struct dell_bios_keymap_entry);
+	if (hotkey_num < 1) {
+		/*
+		 * Historically, dell-wmi would ignore a DMI entry of
+		 * fewer than 7 bytes.  Sizes between 4 and 8 bytes are
+		 * nonsensical (both the header and all entries are 4
+		 * bytes), so we approximate the old behavior by
+		 * ignoring tables with fewer than one entry.
+		 */
+		return;
+	}
+
+	keymap = kcalloc(hotkey_num + ARRAY_SIZE(dell_wmi_extra_keymap) + 1,
+			 sizeof(struct key_entry), GFP_KERNEL);
+	if (!keymap) {
+		results->err = -ENOMEM;
+		return;
+	}
 
 	for (i = 0; i < hotkey_num; i++) {
 		const struct dell_bios_keymap_entry *bios_entry =
-					&dell_bios_hotkey_table->keymap[i];
+					&table->keymap[i];
 
 		/* Uninitialized entries are 0 aka KEY_RESERVED. */
 		u16 keycode = (bios_entry->keycode <
@@ -370,20 +471,39 @@
 		}
 
 		if (keycode == KEY_KBDILLUMTOGGLE)
-			keymap[i].type = KE_IGNORE;
+			keymap[pos].type = KE_IGNORE;
 		else
-			keymap[i].type = KE_KEY;
-		keymap[i].code = bios_entry->scancode;
-		keymap[i].keycode = keycode;
+			keymap[pos].type = KE_KEY;
+		keymap[pos].code = bios_entry->scancode;
+		keymap[pos].keycode = keycode;
+
+		pos++;
 	}
 
-	keymap[hotkey_num].type = KE_END;
+	num_bios_keys = pos;
 
-	return keymap;
+	for (i = 0; i < ARRAY_SIZE(dell_wmi_extra_keymap); i++) {
+		const struct key_entry *entry = &dell_wmi_extra_keymap[i];
+
+		/*
+		 * Check if we've already found this scancode.  This takes
+		 * quadratic time, but it doesn't matter unless the list
+		 * of extra keys gets very long.
+		 */
+		if (!have_scancode(entry->code, keymap, num_bios_keys)) {
+			keymap[pos] = *entry;
+			pos++;
+		}
+	}
+
+	keymap[pos].type = KE_END;
+
+	results->keymap = keymap;
 }
 
 static int __init dell_wmi_input_setup(void)
 {
+	struct dell_dmi_results dmi_results = {};
 	int err;
 
 	dell_wmi_input_dev = input_allocate_device();
@@ -394,20 +514,31 @@
 	dell_wmi_input_dev->phys = "wmi/input0";
 	dell_wmi_input_dev->id.bustype = BUS_HOST;
 
-	if (dell_new_hk_type) {
-		const struct key_entry *keymap = dell_wmi_prepare_new_keymap();
-		if (!keymap) {
-			err = -ENOMEM;
-			goto err_free_dev;
-		}
+	if (dmi_walk(handle_dmi_entry, &dmi_results)) {
+		/*
+		 * Historically, dell-wmi ignored dmi_walk errors.  A failure
+		 * is certainly surprising, but it probably just indicates
+		 * a very old laptop.
+		 */
+		pr_warn("no DMI; using the old-style hotkey interface\n");
+	}
 
-		err = sparse_keymap_setup(dell_wmi_input_dev, keymap, NULL);
+	if (dmi_results.err) {
+		err = dmi_results.err;
+		goto err_free_dev;
+	}
+
+	if (dmi_results.keymap) {
+		dell_new_hk_type = true;
+
+		err = sparse_keymap_setup(dell_wmi_input_dev,
+					  dmi_results.keymap, NULL);
 
 		/*
 		 * Sparse keymap library makes a copy of keymap so we
 		 * don't need the original one that was allocated.
 		 */
-		kfree(keymap);
+		kfree(dmi_results.keymap);
 	} else {
 		err = sparse_keymap_setup(dell_wmi_input_dev,
 					  dell_wmi_legacy_keymap, NULL);
@@ -434,15 +565,6 @@
 	input_unregister_device(dell_wmi_input_dev);
 }
 
-static void __init find_hk_type(const struct dmi_header *dm, void *dummy)
-{
-	if (dm->type == 0xb2 && dm->length > 6) {
-		dell_new_hk_type = true;
-		dell_bios_hotkey_table =
-			container_of(dm, struct dell_bios_hotkey_table, header);
-	}
-}
-
 /*
  * Descriptor buffer is 128 byte long and contains:
  *
@@ -509,6 +631,38 @@
 	return 0;
 }
 
+/*
+ * According to Dell SMBIOS documentation:
+ *
+ * 17  3  Application Program Registration
+ *
+ *     cbArg1 Application ID 1 = 0x00010000
+ *     cbArg2 Application ID 2
+ *            QUICKSET/DCP = 0x51534554 "QSET"
+ *            ALS Driver   = 0x416c7353 "AlsS"
+ *            Latitude ON  = 0x4c6f6e52 "LonR"
+ *     cbArg3 Application version or revision number
+ *     cbArg4 0 = Unregister application
+ *            1 = Register application
+ *     cbRes1 Standard return codes (0, -1, -2)
+ */
+
+static int dell_wmi_events_set_enabled(bool enable)
+{
+	struct calling_interface_buffer *buffer;
+	int ret;
+
+	buffer = dell_smbios_get_buffer();
+	buffer->input[0] = 0x10000;
+	buffer->input[1] = 0x51534554;
+	buffer->input[3] = enable;
+	dell_smbios_send_request(17, 3);
+	ret = buffer->output[0];
+	dell_smbios_release_buffer();
+
+	return dell_smbios_error(ret);
+}
+
 static int __init dell_wmi_init(void)
 {
 	int err;
@@ -524,8 +678,6 @@
 	if (err)
 		return err;
 
-	dmi_walk(find_hk_type, NULL);
-
 	err = dell_wmi_input_setup();
 	if (err)
 		return err;
@@ -538,12 +690,26 @@
 		return -ENODEV;
 	}
 
+	dmi_check_system(dell_wmi_smbios_list);
+
+	if (wmi_requires_smbios_request) {
+		err = dell_wmi_events_set_enabled(true);
+		if (err) {
+			pr_err("Failed to enable WMI events\n");
+			wmi_remove_notify_handler(DELL_EVENT_GUID);
+			dell_wmi_input_destroy();
+			return err;
+		}
+	}
+
 	return 0;
 }
 module_init(dell_wmi_init);
 
 static void __exit dell_wmi_exit(void)
 {
+	if (wmi_requires_smbios_request)
+		dell_wmi_events_set_enabled(false);
 	wmi_remove_notify_handler(DELL_EVENT_GUID);
 	dell_wmi_input_destroy();
 }

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 1c62caf..ffc84cc 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c

@@ -114,6 +114,7 @@
 #define KEY2_CODE	0x411
 #define KEY3_CODE	0x412
 #define KEY4_CODE	0x413
+#define KEY5_CODE	0x420
 
 #define MAX_HOTKEY_RINGBUFFER_SIZE 100
 #define RINGBUFFERSIZE 40
@@ -149,7 +150,7 @@
 	char phys[32];
 	struct backlight_device *bl_device;
 	struct platform_device *pf_device;
-	int keycode1, keycode2, keycode3, keycode4;
+	int keycode1, keycode2, keycode3, keycode4, keycode5;
 
 	unsigned int max_brightness;
 	unsigned int brightness_changed;
@@ -823,6 +824,7 @@
 	set_bit(fujitsu->keycode2, input->keybit);
 	set_bit(fujitsu->keycode3, input->keybit);
 	set_bit(fujitsu->keycode4, input->keybit);
+	set_bit(fujitsu->keycode5, input->keybit);
 	set_bit(KEY_UNKNOWN, input->keybit);
 
 	error = input_register_device(input);
@@ -962,6 +964,9 @@
 			case KEY4_CODE:
 				keycode = fujitsu->keycode4;
 				break;
+			case KEY5_CODE:
+				keycode = fujitsu->keycode5;
+				break;
 			case 0:
 				keycode = 0;
 				break;
@@ -1072,6 +1077,7 @@
 	fujitsu->keycode2 = KEY_PROG2;
 	fujitsu->keycode3 = KEY_PROG3;
 	fujitsu->keycode4 = KEY_PROG4;
+	fujitsu->keycode5 = KEY_RFKILL;
 	dmi_check_system(fujitsu_dmi_table);
 
 	result = acpi_bus_register_driver(&acpi_fujitsu_driver);

diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index fb4dd7b..6f145f2 100644
--- a/drivers/platform/x86/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c

@@ -157,7 +157,6 @@
 static struct rfkill *wifi_rfkill;
 static struct rfkill *bluetooth_rfkill;
 static struct rfkill *wwan_rfkill;
-static struct rfkill *gps_rfkill;
 
 struct rfkill2_device {
 	u8 id;
@@ -613,10 +612,6 @@
 			rfkill_set_states(wwan_rfkill,
 					  hp_wmi_get_sw_state(HPWMI_WWAN),
 					  hp_wmi_get_hw_state(HPWMI_WWAN));
-		if (gps_rfkill)
-			rfkill_set_states(gps_rfkill,
-					  hp_wmi_get_sw_state(HPWMI_GPS),
-					  hp_wmi_get_hw_state(HPWMI_GPS));
 		break;
 	case HPWMI_CPU_BATTERY_THROTTLE:
 		pr_info("Unimplemented CPU throttle because of 3 Cell battery event detected\n");
@@ -746,7 +741,7 @@
 						(void *) HPWMI_BLUETOOTH);
 		if (!bluetooth_rfkill) {
 			err = -ENOMEM;
-			goto register_wifi_error;
+			goto register_bluetooth_error;
 		}
 		rfkill_init_sw_state(bluetooth_rfkill,
 				     hp_wmi_get_sw_state(HPWMI_BLUETOOTH));
@@ -764,7 +759,7 @@
 					   (void *) HPWMI_WWAN);
 		if (!wwan_rfkill) {
 			err = -ENOMEM;
-			goto register_bluetooth_error;
+			goto register_wwan_error;
 		}
 		rfkill_init_sw_state(wwan_rfkill,
 				     hp_wmi_get_sw_state(HPWMI_WWAN));
@@ -775,35 +770,13 @@
 			goto register_wwan_error;
 	}
 
-	if (wireless & 0x8) {
-		gps_rfkill = rfkill_alloc("hp-gps", &device->dev,
-						RFKILL_TYPE_GPS,
-						&hp_wmi_rfkill_ops,
-						(void *) HPWMI_GPS);
-		if (!gps_rfkill) {
-			err = -ENOMEM;
-			goto register_wwan_error;
-		}
-		rfkill_init_sw_state(gps_rfkill,
-				     hp_wmi_get_sw_state(HPWMI_GPS));
-		rfkill_set_hw_state(gps_rfkill,
-				    hp_wmi_get_hw_state(HPWMI_GPS));
-		err = rfkill_register(gps_rfkill);
-		if (err)
-			goto register_gps_error;
-	}
-
 	return 0;
-register_gps_error:
-	rfkill_destroy(gps_rfkill);
-	gps_rfkill = NULL;
-	if (bluetooth_rfkill)
-		rfkill_unregister(bluetooth_rfkill);
+
 register_wwan_error:
 	rfkill_destroy(wwan_rfkill);
 	wwan_rfkill = NULL;
-	if (gps_rfkill)
-		rfkill_unregister(gps_rfkill);
+	if (bluetooth_rfkill)
+		rfkill_unregister(bluetooth_rfkill);
 register_bluetooth_error:
 	rfkill_destroy(bluetooth_rfkill);
 	bluetooth_rfkill = NULL;
@@ -907,7 +880,6 @@
 	wifi_rfkill = NULL;
 	bluetooth_rfkill = NULL;
 	wwan_rfkill = NULL;
-	gps_rfkill = NULL;
 	rfkill2_count = 0;
 
 	if (hp_wmi_bios_2009_later() || hp_wmi_rfkill_setup(device))
@@ -960,10 +932,6 @@
 		rfkill_unregister(wwan_rfkill);
 		rfkill_destroy(wwan_rfkill);
 	}
-	if (gps_rfkill) {
-		rfkill_unregister(gps_rfkill);
-		rfkill_destroy(gps_rfkill);
-	}
 
 	return 0;
 }
@@ -999,10 +967,6 @@
 		rfkill_set_states(wwan_rfkill,
 				  hp_wmi_get_sw_state(HPWMI_WWAN),
 				  hp_wmi_get_hw_state(HPWMI_WWAN));
-	if (gps_rfkill)
-		rfkill_set_states(gps_rfkill,
-				  hp_wmi_get_sw_state(HPWMI_GPS),
-				  hp_wmi_get_hw_state(HPWMI_GPS));
 
 	return 0;
 }

diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index d78ee15..be3bc2f 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c

@@ -865,6 +865,20 @@
 		},
 	},
 	{
+		.ident = "Lenovo ideapad Y700-15ISK",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad Y700-15ISK"),
+		},
+	},
+	{
+		.ident = "Lenovo ideapad Y700 Touch-15ISK",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad Y700 Touch-15ISK"),
+		},
+	},
+	{
 		.ident = "Lenovo ideapad Y700-17ISK",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),

diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c
index e20f23e..f93abc8 100644
--- a/drivers/platform/x86/intel-hid.c
+++ b/drivers/platform/x86/intel-hid.c

@@ -180,8 +180,7 @@
 		return -ENODEV;
 	}
 
-	priv = devm_kzalloc(&device->dev,
-			    sizeof(struct intel_hid_priv *), GFP_KERNEL);
+	priv = devm_kzalloc(&device->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 	dev_set_drvdata(&device->dev, priv);

diff --git a/drivers/platform/x86/intel_pmc_ipc.c b/drivers/platform/x86/intel_pmc_ipc.c
index 092519e..3fb1d85 100644
--- a/drivers/platform/x86/intel_pmc_ipc.c
+++ b/drivers/platform/x86/intel_pmc_ipc.c

@@ -67,7 +67,8 @@
 /* exported resources from IFWI */
 #define PLAT_RESOURCE_IPC_INDEX		0
 #define PLAT_RESOURCE_IPC_SIZE		0x1000
-#define PLAT_RESOURCE_GCR_SIZE		0x1000
+#define PLAT_RESOURCE_GCR_OFFSET	0x1008
+#define PLAT_RESOURCE_GCR_SIZE		0x4
 #define PLAT_RESOURCE_BIOS_DATA_INDEX	1
 #define PLAT_RESOURCE_BIOS_IFACE_INDEX	2
 #define PLAT_RESOURCE_TELEM_SSRAM_INDEX	3
@@ -766,7 +767,7 @@
 	}
 	ipcdev.ipc_base = addr;
 
-	ipcdev.gcr_base = res->start + size;
+	ipcdev.gcr_base = res->start + PLAT_RESOURCE_GCR_OFFSET;
 	ipcdev.gcr_size = PLAT_RESOURCE_GCR_SIZE;
 	dev_info(&pdev->dev, "ipc res: %pR\n", res);
 
@@ -824,7 +825,8 @@
 		goto err_device;
 	}
 
-	if (request_irq(ipcdev.irq, ioc, 0, "intel_pmc_ipc", &ipcdev)) {
+	if (request_irq(ipcdev.irq, ioc, IRQF_NO_SUSPEND,
+			"intel_pmc_ipc", &ipcdev)) {
 		dev_err(&pdev->dev, "Failed to request irq\n");
 		ret = -EBUSY;
 		goto err_irq;

diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c
index f94b730..e81daff 100644
--- a/drivers/platform/x86/intel_scu_ipc.c
+++ b/drivers/platform/x86/intel_scu_ipc.c

@@ -24,7 +24,6 @@
 #include <linux/pci.h>
 #include <linux/interrupt.h>
 #include <linux/sfi.h>
-#include <linux/module.h>
 #include <asm/intel-mid.h>
 #include <asm/intel_scu_ipc.h>
 
@@ -611,28 +610,6 @@
 	return 0;
 }
 
-/**
- *	ipc_remove	-	remove a bound IPC device
- *	@pdev: PCI device
- *
- *	In practice the SCU is not removable but this function is also
- *	called for each device on a module unload or cleanup which is the
- *	path that will get used.
- *
- *	Free up the mappings and release the PCI resources
- */
-static void ipc_remove(struct pci_dev *pdev)
-{
-	struct intel_scu_ipc_dev *scu = pci_get_drvdata(pdev);
-
-	mutex_lock(&ipclock);
-	scu->dev = NULL;
-	mutex_unlock(&ipclock);
-
-	iounmap(scu->i2c_base);
-	intel_scu_devices_destroy();
-}
-
 static const struct pci_device_id pci_ids[] = {
 	{
 		PCI_VDEVICE(INTEL, PCI_DEVICE_ID_LINCROFT),
@@ -650,17 +627,13 @@
 		0,
 	}
 };
-MODULE_DEVICE_TABLE(pci, pci_ids);
 
 static struct pci_driver ipc_driver = {
+	.driver = {
+		.suppress_bind_attrs = true,
+	},
 	.name = "intel_scu_ipc",
 	.id_table = pci_ids,
 	.probe = ipc_probe,
-	.remove = ipc_remove,
 };
-
-module_pci_driver(ipc_driver);
-
-MODULE_AUTHOR("Sreedhara DS <sreedhara.ds@intel.com>");
-MODULE_DESCRIPTION("Intel SCU IPC driver");
-MODULE_LICENSE("GPL");
+builtin_pci_driver(ipc_driver);

diff --git a/drivers/platform/x86/intel_telemetry_pltdrv.c b/drivers/platform/x86/intel_telemetry_pltdrv.c
index f97019b..397119f 100644
--- a/drivers/platform/x86/intel_telemetry_pltdrv.c
+++ b/drivers/platform/x86/intel_telemetry_pltdrv.c

@@ -1030,8 +1030,19 @@
 	switch (telem_unit) {
 	case TELEM_PSS:
 		ret = intel_punit_ipc_command(
+				IPC_PUNIT_BIOS_READ_TELE_TRACE_CTRL,
+				0, 0, NULL, &temp);
+		if (ret) {
+			pr_err("PSS TRACE_CTRL Read Failed\n");
+			goto out;
+		}
+
+		TELEM_CLEAR_VERBOSITY_BITS(temp);
+		TELEM_SET_VERBOSITY_BITS(temp, verbosity);
+
+		ret = intel_punit_ipc_command(
 				IPC_PUNIT_BIOS_WRITE_TELE_TRACE_CTRL,
-				0, 0, &verbosity, NULL);
+				0, 0, &temp, NULL);
 		if (ret) {
 			pr_err("PSS TRACE_CTRL Verbosity Set Failed\n");
 			goto out;

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index a268a7a..e305ab5 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c

@@ -6653,18 +6653,16 @@
 	switch (b) {
 	case 16:
 		bright_maxlvl = 15;
-		pr_info("detected a 16-level brightness capable ThinkPad\n");
 		break;
 	case 8:
 	case 0:
 		bright_maxlvl = 7;
-		pr_info("detected a 8-level brightness capable ThinkPad\n");
 		break;
 	default:
-		pr_info("Unsupported brightness interface\n");
 		tp_features.bright_unkfw = 1;
 		bright_maxlvl = b - 1;
 	}
+	pr_debug("detected %u brightness levels\n", bright_maxlvl + 1);
 }
 
 static int __init brightness_init(struct ibm_init_struct *iibm)

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 7383307..df1f1a7 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c

@@ -36,6 +36,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/proc_fs.h>
@@ -117,6 +118,7 @@
 #define HCI_LCD_BRIGHTNESS		0x002a
 #define HCI_WIRELESS			0x0056
 #define HCI_ACCELEROMETER		0x006d
+#define HCI_COOLING_METHOD		0x007f
 #define HCI_KBD_ILLUMINATION		0x0095
 #define HCI_ECO_MODE			0x0097
 #define HCI_ACCELEROMETER2		0x00a6
@@ -186,6 +188,7 @@
 	int usbsc_bat_level;
 	int usbsc_mode_base;
 	int hotkey_event_type;
+	int max_cooling_method;
 
 	unsigned int illumination_supported:1;
 	unsigned int video_supported:1;
@@ -205,6 +208,7 @@
 	unsigned int panel_power_on_supported:1;
 	unsigned int usb_three_supported:1;
 	unsigned int wwan_supported:1;
+	unsigned int cooling_method_supported:1;
 	unsigned int sysfs_created:1;
 	unsigned int special_functions;
 
@@ -217,6 +221,10 @@
 
 static struct toshiba_acpi_dev *toshiba_acpi;
 
+static bool disable_hotkeys;
+module_param(disable_hotkeys, bool, 0444);
+MODULE_PARM_DESC(disable_hotkeys, "Disables the hotkeys activation");
+
 static const struct acpi_device_id toshiba_device_ids[] = {
 	{"TOS6200", 0},
 	{"TOS6207", 0},
@@ -1194,6 +1202,53 @@
 	return out[0] == TOS_SUCCESS ? 0 : -EIO;
 }
 
+/* Cooling Method */
+static void toshiba_cooling_method_available(struct toshiba_acpi_dev *dev)
+{
+	u32 in[TCI_WORDS] = { HCI_GET, HCI_COOLING_METHOD, 0, 0, 0, 0 };
+	u32 out[TCI_WORDS];
+	acpi_status status;
+
+	dev->cooling_method_supported = 0;
+	dev->max_cooling_method = 0;
+
+	status = tci_raw(dev, in, out);
+	if (ACPI_FAILURE(status))
+		pr_err("ACPI call to get Cooling Method failed\n");
+
+	if (out[0] != TOS_SUCCESS && out[0] != TOS_SUCCESS2)
+		return;
+
+	dev->cooling_method_supported = 1;
+	dev->max_cooling_method = out[3];
+}
+
+static int toshiba_cooling_method_get(struct toshiba_acpi_dev *dev, u32 *state)
+{
+	u32 result = hci_read(dev, HCI_COOLING_METHOD, state);
+
+	if (result == TOS_FAILURE)
+		pr_err("ACPI call to get Cooling Method failed\n");
+
+	if (result == TOS_NOT_SUPPORTED)
+		return -ENODEV;
+
+	return (result == TOS_SUCCESS || result == TOS_SUCCESS2) ? 0 : -EIO;
+}
+
+static int toshiba_cooling_method_set(struct toshiba_acpi_dev *dev, u32 state)
+{
+	u32 result = hci_write(dev, HCI_COOLING_METHOD, state);
+
+	if (result == TOS_FAILURE)
+		pr_err("ACPI call to get Cooling Method failed\n");
+
+	if (result == TOS_NOT_SUPPORTED)
+		return -ENODEV;
+
+	return (result == TOS_SUCCESS || result == TOS_SUCCESS2) ? 0 : -EIO;
+}
+
 /* Transflective Backlight */
 static int get_tr_backlight_status(struct toshiba_acpi_dev *dev, u32 *status)
 {
@@ -2239,6 +2294,54 @@
 }
 static DEVICE_ATTR_RW(usb_three);
 
+static ssize_t cooling_method_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct toshiba_acpi_dev *toshiba = dev_get_drvdata(dev);
+	int state;
+	int ret;
+
+	ret = toshiba_cooling_method_get(toshiba, &state);
+	if (ret < 0)
+		return ret;
+
+	return sprintf(buf, "%d %d\n", state, toshiba->max_cooling_method);
+}
+
+static ssize_t cooling_method_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct toshiba_acpi_dev *toshiba = dev_get_drvdata(dev);
+	int state;
+	int ret;
+
+	ret = kstrtoint(buf, 0, &state);
+	if (ret)
+		return ret;
+
+	/*
+	 * Check for supported values
+	 * Depending on the laptop model, some only support these two:
+	 * 0 - Maximum Performance
+	 * 1 - Battery Optimized
+	 *
+	 * While some others support all three methods:
+	 * 0 - Maximum Performance
+	 * 1 - Performance
+	 * 2 - Battery Optimized
+	 */
+	if (state < 0 || state > toshiba->max_cooling_method)
+		return -EINVAL;
+
+	ret = toshiba_cooling_method_set(toshiba, state);
+	if (ret)
+		return ret;
+
+	return count;
+}
+static DEVICE_ATTR_RW(cooling_method);
+
 static struct attribute *toshiba_attributes[] = {
 	&dev_attr_version.attr,
 	&dev_attr_fan.attr,
@@ -2255,6 +2358,7 @@
 	&dev_attr_kbd_function_keys.attr,
 	&dev_attr_panel_power_on.attr,
 	&dev_attr_usb_three.attr,
+	&dev_attr_cooling_method.attr,
 	NULL,
 };
 
@@ -2289,6 +2393,8 @@
 		exists = (drv->panel_power_on_supported) ? true : false;
 	else if (attr == &dev_attr_usb_three.attr)
 		exists = (drv->usb_three_supported) ? true : false;
+	else if (attr == &dev_attr_cooling_method.attr)
+		exists = (drv->cooling_method_supported) ? true : false;
 
 	return exists ? attr->mode : 0;
 }
@@ -2591,6 +2697,11 @@
 	acpi_handle ec_handle;
 	int error;
 
+	if (disable_hotkeys) {
+		pr_info("Hotkeys disabled by module parameter\n");
+		return 0;
+	}
+
 	if (wmi_has_guid(TOSHIBA_WMI_EVENT_GUID)) {
 		pr_info("WMI event detected, hotkeys will not be monitored\n");
 		return 0;
@@ -2779,6 +2890,8 @@
 		pr_cont(" usb3");
 	if (dev->wwan_supported)
 		pr_cont(" wwan");
+	if (dev->cooling_method_supported)
+		pr_cont(" cooling-method");
 
 	pr_cont("\n");
 }
@@ -2963,6 +3076,8 @@
 	if (dev->wwan_supported)
 		toshiba_acpi_setup_wwan_rfkill(dev);
 
+	toshiba_cooling_method_available(dev);
+
 	print_supported_features(dev);
 
 	ret = sysfs_create_group(&dev->acpi_dev->dev.kobj,

diff --git a/drivers/power/avs/rockchip-io-domain.c b/drivers/power/avs/rockchip-io-domain.c
index 8099456..8986382 100644
--- a/drivers/power/avs/rockchip-io-domain.c
+++ b/drivers/power/avs/rockchip-io-domain.c

@@ -47,6 +47,10 @@
 #define RK3368_SOC_CON15_FLASH0		BIT(14)
 #define RK3368_SOC_FLASH_SUPPLY_NUM	2
 
+#define RK3399_PMUGRF_CON0		0x180
+#define RK3399_PMUGRF_CON0_VSEL		BIT(8)
+#define RK3399_PMUGRF_VSEL_SUPPLY_NUM	9
+
 struct rockchip_iodomain;
 
 /**
@@ -181,6 +185,25 @@
 		dev_warn(iod->dev, "couldn't update flash0 ctrl\n");
 }
 
+static void rk3399_pmu_iodomain_init(struct rockchip_iodomain *iod)
+{
+	int ret;
+	u32 val;
+
+	/* if no pmu io supply we should leave things alone */
+	if (!iod->supplies[RK3399_PMUGRF_VSEL_SUPPLY_NUM].reg)
+		return;
+
+	/*
+	 * set pmu io iodomain to also use this framework
+	 * instead of a special gpio.
+	 */
+	val = RK3399_PMUGRF_CON0_VSEL | (RK3399_PMUGRF_CON0_VSEL << 16);
+	ret = regmap_write(iod->grf, RK3399_PMUGRF_CON0, val);
+	if (ret < 0)
+		dev_warn(iod->dev, "couldn't update pmu io iodomain ctrl\n");
+}
+
 /*
  * On the rk3188 the io-domains are handled by a shared register with the
  * lower 8 bits being still being continuing drive-strength settings.
@@ -252,6 +275,33 @@
 	},
 };
 
+static const struct rockchip_iodomain_soc_data soc_data_rk3399 = {
+	.grf_offset = 0xe640,
+	.supply_names = {
+		"bt656",		/* APIO2_VDD */
+		"audio",		/* APIO5_VDD */
+		"sdmmc",		/* SDMMC0_VDD */
+		"gpio1830",		/* APIO4_VDD */
+	},
+};
+
+static const struct rockchip_iodomain_soc_data soc_data_rk3399_pmu = {
+	.grf_offset = 0x180,
+	.supply_names = {
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		"pmu1830",		/* PMUIO2_VDD */
+	},
+	.init = rk3399_pmu_iodomain_init,
+};
+
 static const struct of_device_id rockchip_iodomain_match[] = {
 	{
 		.compatible = "rockchip,rk3188-io-voltage-domain",
@@ -269,6 +319,14 @@
 		.compatible = "rockchip,rk3368-pmu-io-voltage-domain",
 		.data = (void *)&soc_data_rk3368_pmu
 	},
+	{
+		.compatible = "rockchip,rk3399-io-voltage-domain",
+		.data = (void *)&soc_data_rk3399
+	},
+	{
+		.compatible = "rockchip,rk3399-pmu-io-voltage-domain",
+		.data = (void *)&soc_data_rk3399_pmu
+	},
 	{ /* sentinel */ },
 };
 MODULE_DEVICE_TABLE(of, rockchip_iodomain_match);

diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index 8cf0dae..c182efc 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig

@@ -316,7 +316,7 @@
 
 config PWM_RENESAS_TPU
 	tristate "Renesas TPU PWM support"
-	depends on ARCH_SHMOBILE || COMPILE_TEST
+	depends on ARCH_RENESAS || COMPILE_TEST
 	depends on HAS_IOMEM
 	help
 	  This driver exposes the Timer Pulse Unit (TPU) PWM controller found

diff --git a/drivers/pwm/pwm-brcmstb.c b/drivers/pwm/pwm-brcmstb.c
index 423ce08..5d5adee 100644
--- a/drivers/pwm/pwm-brcmstb.c
+++ b/drivers/pwm/pwm-brcmstb.c

@@ -274,8 +274,8 @@
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	p->base = devm_ioremap_resource(&pdev->dev, res);
-	if (!p->base) {
-		ret = -ENOMEM;
+	if (IS_ERR(p->base)) {
+		ret = PTR_ERR(p->base);
 		goto out_clk;
 	}
 

diff --git a/drivers/pwm/pwm-img.c b/drivers/pwm/pwm-img.c
index 8a029f9..2fb30de 100644
--- a/drivers/pwm/pwm-img.c
+++ b/drivers/pwm/pwm-img.c

@@ -237,6 +237,11 @@
 	}
 
 	clk_rate = clk_get_rate(pwm->pwm_clk);
+	if (!clk_rate) {
+		dev_err(&pdev->dev, "pwm clock has no frequency\n");
+		ret = -EINVAL;
+		goto disable_pwmclk;
+	}
 
 	/* The maximum input clock divider is 512 */
 	val = (u64)NSEC_PER_SEC * 512 * pwm->data->max_timebase;

diff --git a/drivers/pwm/pwm-lpc18xx-sct.c b/drivers/pwm/pwm-lpc18xx-sct.c
index 9163085..9861fed 100644
--- a/drivers/pwm/pwm-lpc18xx-sct.c
+++ b/drivers/pwm/pwm-lpc18xx-sct.c

@@ -360,6 +360,11 @@
 	}
 
 	lpc18xx_pwm->clk_rate = clk_get_rate(lpc18xx_pwm->pwm_clk);
+	if (!lpc18xx_pwm->clk_rate) {
+		dev_err(&pdev->dev, "pwm clock has no frequency\n");
+		ret = -EINVAL;
+		goto disable_pwmclk;
+	}
 
 	mutex_init(&lpc18xx_pwm->res_lock);
 	mutex_init(&lpc18xx_pwm->period_lock);

diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c
index 826634e..b7e6ecb 100644
--- a/drivers/pwm/pwm-omap-dmtimer.c
+++ b/drivers/pwm/pwm-omap-dmtimer.c

@@ -31,6 +31,7 @@
 #include <linux/time.h>
 
 #define DM_TIMER_LOAD_MIN 0xfffffffe
+#define DM_TIMER_MAX      0xffffffff
 
 struct pwm_omap_dmtimer_chip {
 	struct pwm_chip chip;
@@ -46,13 +47,9 @@
 	return container_of(chip, struct pwm_omap_dmtimer_chip, chip);
 }
 
-static int pwm_omap_dmtimer_calc_value(unsigned long clk_rate, int ns)
+static u32 pwm_omap_dmtimer_get_clock_cycles(unsigned long clk_rate, int ns)
 {
-	u64 c = (u64)clk_rate * ns;
-
-	do_div(c, NSEC_PER_SEC);
-
-	return DM_TIMER_LOAD_MIN - c;
+	return DIV_ROUND_CLOSEST_ULL((u64)clk_rate * ns, NSEC_PER_SEC);
 }
 
 static void pwm_omap_dmtimer_start(struct pwm_omap_dmtimer_chip *omap)
@@ -99,12 +96,14 @@
 				   int duty_ns, int period_ns)
 {
 	struct pwm_omap_dmtimer_chip *omap = to_pwm_omap_dmtimer_chip(chip);
-	int load_value, match_value;
+	u32 period_cycles, duty_cycles;
+	u32 load_value, match_value;
 	struct clk *fclk;
 	unsigned long clk_rate;
 	bool timer_active;
 
-	dev_dbg(chip->dev, "duty cycle: %d, period %d\n", duty_ns, period_ns);
+	dev_dbg(chip->dev, "requested duty cycle: %d ns, period: %d ns\n",
+		duty_ns, period_ns);
 
 	mutex_lock(&omap->mutex);
 	if (duty_ns == pwm_get_duty_cycle(pwm) &&
@@ -117,15 +116,13 @@
 	fclk = omap->pdata->get_fclk(omap->dm_timer);
 	if (!fclk) {
 		dev_err(chip->dev, "invalid pmtimer fclk\n");
-		mutex_unlock(&omap->mutex);
-		return -EINVAL;
+		goto err_einval;
 	}
 
 	clk_rate = clk_get_rate(fclk);
 	if (!clk_rate) {
 		dev_err(chip->dev, "invalid pmtimer fclk rate\n");
-		mutex_unlock(&omap->mutex);
-		return -EINVAL;
+		goto err_einval;
 	}
 
 	dev_dbg(chip->dev, "clk rate: %luHz\n", clk_rate);
@@ -133,11 +130,51 @@
 	/*
 	 * Calculate the appropriate load and match values based on the
 	 * specified period and duty cycle. The load value determines the
-	 * cycle time and the match value determines the duty cycle.
+	 * period time and the match value determines the duty time.
+	 *
+	 * The period lasts for (DM_TIMER_MAX-load_value+1) clock cycles.
+	 * Similarly, the active time lasts (match_value-load_value+1) cycles.
+	 * The non-active time is the remainder: (DM_TIMER_MAX-match_value)
+	 * clock cycles.
+	 *
+	 * NOTE: It is required that: load_value <= match_value < DM_TIMER_MAX
+	 *
+	 * References:
+	 *   OMAP4430/60/70 TRM sections 22.2.4.10 and 22.2.4.11
+	 *   AM335x Sitara TRM sections 20.1.3.5 and 20.1.3.6
 	 */
-	load_value = pwm_omap_dmtimer_calc_value(clk_rate, period_ns);
-	match_value = pwm_omap_dmtimer_calc_value(clk_rate,
-						  period_ns - duty_ns);
+	period_cycles = pwm_omap_dmtimer_get_clock_cycles(clk_rate, period_ns);
+	duty_cycles = pwm_omap_dmtimer_get_clock_cycles(clk_rate, duty_ns);
+
+	if (period_cycles < 2) {
+		dev_info(chip->dev,
+			 "period %d ns too short for clock rate %lu Hz\n",
+			 period_ns, clk_rate);
+		goto err_einval;
+	}
+
+	if (duty_cycles < 1) {
+		dev_dbg(chip->dev,
+			"duty cycle %d ns is too short for clock rate %lu Hz\n",
+			duty_ns, clk_rate);
+		dev_dbg(chip->dev, "using minimum of 1 clock cycle\n");
+		duty_cycles = 1;
+	} else if (duty_cycles >= period_cycles) {
+		dev_dbg(chip->dev,
+			"duty cycle %d ns is too long for period %d ns at clock rate %lu Hz\n",
+			duty_ns, period_ns, clk_rate);
+		dev_dbg(chip->dev, "using maximum of 1 clock cycle less than period\n");
+		duty_cycles = period_cycles - 1;
+	}
+
+	dev_dbg(chip->dev, "effective duty cycle: %lld ns, period: %lld ns\n",
+		DIV_ROUND_CLOSEST_ULL((u64)NSEC_PER_SEC * duty_cycles,
+				      clk_rate),
+		DIV_ROUND_CLOSEST_ULL((u64)NSEC_PER_SEC * period_cycles,
+				      clk_rate));
+
+	load_value = (DM_TIMER_MAX - period_cycles) + 1;
+	match_value = load_value + duty_cycles - 1;
 
 	/*
 	 * We MUST stop the associated dual-mode timer before attempting to
@@ -166,6 +203,11 @@
 	mutex_unlock(&omap->mutex);
 
 	return 0;
+
+err_einval:
+	mutex_unlock(&omap->mutex);
+
+	return -EINVAL;
 }
 
 static int pwm_omap_dmtimer_set_polarity(struct pwm_chip *chip,

diff --git a/drivers/rapidio/Kconfig b/drivers/rapidio/Kconfig
index 3e3be57..b5a10d3 100644
--- a/drivers/rapidio/Kconfig
+++ b/drivers/rapidio/Kconfig

@@ -67,6 +67,14 @@
 
 endchoice
 
+config RAPIDIO_MPORT_CDEV
+	tristate "RapidIO /dev mport device driver"
+	depends on RAPIDIO
+	help
+	  This option includes generic RapidIO mport device driver which
+	  allows to user space applications to perform RapidIO-specific
+	  operations through selected RapidIO mport.
+
 menu "RapidIO Switch drivers"
 	depends on RAPIDIO
 

diff --git a/drivers/rapidio/devices/Makefile b/drivers/rapidio/devices/Makefile
index 9432c49..927dbf8 100644
--- a/drivers/rapidio/devices/Makefile
+++ b/drivers/rapidio/devices/Makefile

@@ -5,3 +5,4 @@
 obj-$(CONFIG_RAPIDIO_TSI721)	+= tsi721_mport.o
 tsi721_mport-y			:= tsi721.o
 tsi721_mport-$(CONFIG_RAPIDIO_DMA_ENGINE) += tsi721_dma.o
+obj-$(CONFIG_RAPIDIO_MPORT_CDEV) += rio_mport_cdev.o

diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c
new file mode 100644
index 0000000..5d4d918
--- /dev/null
+++ b/drivers/rapidio/devices/rio_mport_cdev.c

@@ -0,0 +1,2720 @@
+/*
+ * RapidIO mport character device
+ *
+ * Copyright 2014-2015 Integrated Device Technology, Inc.
+ *    Alexandre Bounine <alexandre.bounine@idt.com>
+ * Copyright 2014-2015 Prodrive Technologies
+ *    Andre van Herk <andre.van.herk@prodrive-technologies.com>
+ *    Jerry Jacobs <jerry.jacobs@prodrive-technologies.com>
+ * Copyright (C) 2014 Texas Instruments Incorporated
+ *    Aurelien Jacquiot <a-jacquiot@ti.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/cdev.h>
+#include <linux/ioctl.h>
+#include <linux/uaccess.h>
+#include <linux/list.h>
+#include <linux/fs.h>
+#include <linux/err.h>
+#include <linux/net.h>
+#include <linux/poll.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/kfifo.h>
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mman.h>
+
+#include <linux/dma-mapping.h>
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+#include <linux/dmaengine.h>
+#endif
+
+#include <linux/rio.h>
+#include <linux/rio_ids.h>
+#include <linux/rio_drv.h>
+#include <linux/rio_mport_cdev.h>
+
+#include "../rio.h"
+
+#define DRV_NAME	"rio_mport"
+#define DRV_PREFIX	DRV_NAME ": "
+#define DEV_NAME	"rio_mport"
+#define DRV_VERSION     "1.0.0"
+
+/* Debug output filtering masks */
+enum {
+	DBG_NONE	= 0,
+	DBG_INIT	= BIT(0), /* driver init */
+	DBG_EXIT	= BIT(1), /* driver exit */
+	DBG_MPORT	= BIT(2), /* mport add/remove */
+	DBG_RDEV	= BIT(3), /* RapidIO device add/remove */
+	DBG_DMA		= BIT(4), /* DMA transfer messages */
+	DBG_MMAP	= BIT(5), /* mapping messages */
+	DBG_IBW		= BIT(6), /* inbound window */
+	DBG_EVENT	= BIT(7), /* event handling messages */
+	DBG_OBW		= BIT(8), /* outbound window messages */
+	DBG_DBELL	= BIT(9), /* doorbell messages */
+	DBG_ALL		= ~0,
+};
+
+#ifdef DEBUG
+#define rmcd_debug(level, fmt, arg...)		\
+	do {					\
+		if (DBG_##level & dbg_level)	\
+			pr_debug(DRV_PREFIX "%s: " fmt "\n", __func__, ##arg); \
+	} while (0)
+#else
+#define rmcd_debug(level, fmt, arg...) \
+		no_printk(KERN_DEBUG pr_fmt(DRV_PREFIX fmt "\n"), ##arg)
+#endif
+
+#define rmcd_warn(fmt, arg...) \
+	pr_warn(DRV_PREFIX "%s WARNING " fmt "\n", __func__, ##arg)
+
+#define rmcd_error(fmt, arg...) \
+	pr_err(DRV_PREFIX "%s ERROR " fmt "\n", __func__, ##arg)
+
+MODULE_AUTHOR("Jerry Jacobs <jerry.jacobs@prodrive-technologies.com>");
+MODULE_AUTHOR("Aurelien Jacquiot <a-jacquiot@ti.com>");
+MODULE_AUTHOR("Alexandre Bounine <alexandre.bounine@idt.com>");
+MODULE_AUTHOR("Andre van Herk <andre.van.herk@prodrive-technologies.com>");
+MODULE_DESCRIPTION("RapidIO mport character device driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+static int dma_timeout = 3000; /* DMA transfer timeout in msec */
+module_param(dma_timeout, int, S_IRUGO);
+MODULE_PARM_DESC(dma_timeout, "DMA Transfer Timeout in msec (default: 3000)");
+
+#ifdef DEBUG
+static u32 dbg_level = DBG_NONE;
+module_param(dbg_level, uint, S_IWUSR | S_IWGRP | S_IRUGO);
+MODULE_PARM_DESC(dbg_level, "Debugging output level (default 0 = none)");
+#endif
+
+/*
+ * An internal DMA coherent buffer
+ */
+struct mport_dma_buf {
+	void		*ib_base;
+	dma_addr_t	ib_phys;
+	u32		ib_size;
+	u64		ib_rio_base;
+	bool		ib_map;
+	struct file	*filp;
+};
+
+/*
+ * Internal memory mapping structure
+ */
+enum rio_mport_map_dir {
+	MAP_INBOUND,
+	MAP_OUTBOUND,
+	MAP_DMA,
+};
+
+struct rio_mport_mapping {
+	struct list_head node;
+	struct mport_dev *md;
+	enum rio_mport_map_dir dir;
+	u32 rioid;
+	u64 rio_addr;
+	dma_addr_t phys_addr; /* for mmap */
+	void *virt_addr; /* kernel address, for dma_free_coherent */
+	u64 size;
+	struct kref ref; /* refcount of vmas sharing the mapping */
+	struct file *filp;
+};
+
+struct rio_mport_dma_map {
+	int valid;
+	uint64_t length;
+	void *vaddr;
+	dma_addr_t paddr;
+};
+
+#define MPORT_MAX_DMA_BUFS	16
+#define MPORT_EVENT_DEPTH	10
+
+/*
+ * mport_dev  driver-specific structure that represents mport device
+ * @active    mport device status flag
+ * @node      list node to maintain list of registered mports
+ * @cdev      character device
+ * @dev       associated device object
+ * @mport     associated subsystem's master port device object
+ * @buf_mutex lock for buffer handling
+ * @file_mutex - lock for open files list
+ * @file_list  - list of open files on given mport
+ * @properties properties of this mport
+ * @portwrites queue of inbound portwrites
+ * @pw_lock    lock for port write queue
+ * @mappings   queue for memory mappings
+ * @dma_chan   DMA channels associated with this device
+ * @dma_ref:
+ * @comp:
+ */
+struct mport_dev {
+	atomic_t		active;
+	struct list_head	node;
+	struct cdev		cdev;
+	struct device		dev;
+	struct rio_mport	*mport;
+	struct mutex		buf_mutex;
+	struct mutex		file_mutex;
+	struct list_head	file_list;
+	struct rio_mport_properties	properties;
+	struct list_head		doorbells;
+	spinlock_t			db_lock;
+	struct list_head		portwrites;
+	spinlock_t			pw_lock;
+	struct list_head	mappings;
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+	struct dma_chan *dma_chan;
+	struct kref	dma_ref;
+	struct completion comp;
+#endif
+};
+
+/*
+ * mport_cdev_priv - data structure specific to individual file object
+ *                   associated with an open device
+ * @md    master port character device object
+ * @async_queue - asynchronous notification queue
+ * @list - file objects tracking list
+ * @db_filters    inbound doorbell filters for this descriptor
+ * @pw_filters    portwrite filters for this descriptor
+ * @event_fifo    event fifo for this descriptor
+ * @event_rx_wait wait queue for this descriptor
+ * @fifo_lock     lock for event_fifo
+ * @event_mask    event mask for this descriptor
+ * @dmach DMA engine channel allocated for specific file object
+ */
+struct mport_cdev_priv {
+	struct mport_dev	*md;
+	struct fasync_struct	*async_queue;
+	struct list_head	list;
+	struct list_head	db_filters;
+	struct list_head        pw_filters;
+	struct kfifo            event_fifo;
+	wait_queue_head_t       event_rx_wait;
+	spinlock_t              fifo_lock;
+	unsigned int            event_mask; /* RIO_DOORBELL, RIO_PORTWRITE */
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+	struct dma_chan		*dmach;
+	struct list_head	async_list;
+	struct list_head	pend_list;
+	spinlock_t              req_lock;
+	struct mutex		dma_lock;
+	struct kref		dma_ref;
+	struct completion	comp;
+#endif
+};
+
+/*
+ * rio_mport_pw_filter - structure to describe a portwrite filter
+ * md_node   node in mport device's list
+ * priv_node node in private file object's list
+ * priv      reference to private data
+ * filter    actual portwrite filter
+ */
+struct rio_mport_pw_filter {
+	struct list_head md_node;
+	struct list_head priv_node;
+	struct mport_cdev_priv *priv;
+	struct rio_pw_filter filter;
+};
+
+/*
+ * rio_mport_db_filter - structure to describe a doorbell filter
+ * @data_node reference to device node
+ * @priv_node node in private data
+ * @priv      reference to private data
+ * @filter    actual doorbell filter
+ */
+struct rio_mport_db_filter {
+	struct list_head data_node;
+	struct list_head priv_node;
+	struct mport_cdev_priv *priv;
+	struct rio_doorbell_filter filter;
+};
+
+static LIST_HEAD(mport_devs);
+static DEFINE_MUTEX(mport_devs_lock);
+
+#if (0) /* used by commented out portion of poll function : FIXME */
+static DECLARE_WAIT_QUEUE_HEAD(mport_cdev_wait);
+#endif
+
+static struct class *dev_class;
+static dev_t dev_number;
+
+static struct workqueue_struct *dma_wq;
+
+static void mport_release_mapping(struct kref *ref);
+
+static int rio_mport_maint_rd(struct mport_cdev_priv *priv, void __user *arg,
+			      int local)
+{
+	struct rio_mport *mport = priv->md->mport;
+	struct rio_mport_maint_io maint_io;
+	u32 *buffer;
+	u32 offset;
+	size_t length;
+	int ret, i;
+
+	if (unlikely(copy_from_user(&maint_io, arg, sizeof(maint_io))))
+		return -EFAULT;
+
+	if ((maint_io.offset % 4) ||
+	    (maint_io.length == 0) || (maint_io.length % 4))
+		return -EINVAL;
+
+	buffer = vmalloc(maint_io.length);
+	if (buffer == NULL)
+		return -ENOMEM;
+	length = maint_io.length/sizeof(u32);
+	offset = maint_io.offset;
+
+	for (i = 0; i < length; i++) {
+		if (local)
+			ret = __rio_local_read_config_32(mport,
+				offset, &buffer[i]);
+		else
+			ret = rio_mport_read_config_32(mport, maint_io.rioid,
+				maint_io.hopcount, offset, &buffer[i]);
+		if (ret)
+			goto out;
+
+		offset += 4;
+	}
+
+	if (unlikely(copy_to_user(maint_io.buffer, buffer, maint_io.length)))
+		ret = -EFAULT;
+out:
+	vfree(buffer);
+	return ret;
+}
+
+static int rio_mport_maint_wr(struct mport_cdev_priv *priv, void __user *arg,
+			      int local)
+{
+	struct rio_mport *mport = priv->md->mport;
+	struct rio_mport_maint_io maint_io;
+	u32 *buffer;
+	u32 offset;
+	size_t length;
+	int ret = -EINVAL, i;
+
+	if (unlikely(copy_from_user(&maint_io, arg, sizeof(maint_io))))
+		return -EFAULT;
+
+	if ((maint_io.offset % 4) ||
+	    (maint_io.length == 0) || (maint_io.length % 4))
+		return -EINVAL;
+
+	buffer = vmalloc(maint_io.length);
+	if (buffer == NULL)
+		return -ENOMEM;
+	length = maint_io.length;
+
+	if (unlikely(copy_from_user(buffer, maint_io.buffer, length))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	offset = maint_io.offset;
+	length /= sizeof(u32);
+
+	for (i = 0; i < length; i++) {
+		if (local)
+			ret = __rio_local_write_config_32(mport,
+							  offset, buffer[i]);
+		else
+			ret = rio_mport_write_config_32(mport, maint_io.rioid,
+							maint_io.hopcount,
+							offset, buffer[i]);
+		if (ret)
+			goto out;
+
+		offset += 4;
+	}
+
+out:
+	vfree(buffer);
+	return ret;
+}
+
+
+/*
+ * Inbound/outbound memory mapping functions
+ */
+static int
+rio_mport_create_outbound_mapping(struct mport_dev *md, struct file *filp,
+				  u32 rioid, u64 raddr, u32 size,
+				  dma_addr_t *paddr)
+{
+	struct rio_mport *mport = md->mport;
+	struct rio_mport_mapping *map;
+	int ret;
+
+	rmcd_debug(OBW, "did=%d ra=0x%llx sz=0x%x", rioid, raddr, size);
+
+	map = kzalloc(sizeof(struct rio_mport_mapping), GFP_KERNEL);
+	if (map == NULL)
+		return -ENOMEM;
+
+	ret = rio_map_outb_region(mport, rioid, raddr, size, 0, paddr);
+	if (ret < 0)
+		goto err_map_outb;
+
+	map->dir = MAP_OUTBOUND;
+	map->rioid = rioid;
+	map->rio_addr = raddr;
+	map->size = size;
+	map->phys_addr = *paddr;
+	map->filp = filp;
+	map->md = md;
+	kref_init(&map->ref);
+	list_add_tail(&map->node, &md->mappings);
+	return 0;
+err_map_outb:
+	kfree(map);
+	return ret;
+}
+
+static int
+rio_mport_get_outbound_mapping(struct mport_dev *md, struct file *filp,
+			       u32 rioid, u64 raddr, u32 size,
+			       dma_addr_t *paddr)
+{
+	struct rio_mport_mapping *map;
+	int err = -ENOMEM;
+
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry(map, &md->mappings, node) {
+		if (map->dir != MAP_OUTBOUND)
+			continue;
+		if (rioid == map->rioid &&
+		    raddr == map->rio_addr && size == map->size) {
+			*paddr = map->phys_addr;
+			err = 0;
+			break;
+		} else if (rioid == map->rioid &&
+			   raddr < (map->rio_addr + map->size - 1) &&
+			   (raddr + size) > map->rio_addr) {
+			err = -EBUSY;
+			break;
+		}
+	}
+
+	/* If not found, create new */
+	if (err == -ENOMEM)
+		err = rio_mport_create_outbound_mapping(md, filp, rioid, raddr,
+						size, paddr);
+	mutex_unlock(&md->buf_mutex);
+	return err;
+}
+
+static int rio_mport_obw_map(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *data = priv->md;
+	struct rio_mmap map;
+	dma_addr_t paddr;
+	int ret;
+
+	if (unlikely(copy_from_user(&map, arg, sizeof(struct rio_mmap))))
+		return -EFAULT;
+
+	rmcd_debug(OBW, "did=%d ra=0x%llx sz=0x%llx",
+		   map.rioid, map.rio_addr, map.length);
+
+	ret = rio_mport_get_outbound_mapping(data, filp, map.rioid,
+					     map.rio_addr, map.length, &paddr);
+	if (ret < 0) {
+		rmcd_error("Failed to set OBW err= %d", ret);
+		return ret;
+	}
+
+	map.handle = paddr;
+
+	if (unlikely(copy_to_user(arg, &map, sizeof(struct rio_mmap))))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ * rio_mport_obw_free() - unmap an OutBound Window from RapidIO address space
+ *
+ * @priv: driver private data
+ * @arg:  buffer handle returned by allocation routine
+ */
+static int rio_mport_obw_free(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md = priv->md;
+	u64 handle;
+	struct rio_mport_mapping *map, *_map;
+
+	if (!md->mport->ops->unmap_outb)
+		return -EPROTONOSUPPORT;
+
+	if (copy_from_user(&handle, arg, sizeof(u64)))
+		return -EFAULT;
+
+	rmcd_debug(OBW, "h=0x%llx", handle);
+
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry_safe(map, _map, &md->mappings, node) {
+		if (map->dir == MAP_OUTBOUND && map->phys_addr == handle) {
+			if (map->filp == filp) {
+				rmcd_debug(OBW, "kref_put h=0x%llx", handle);
+				map->filp = NULL;
+				kref_put(&map->ref, mport_release_mapping);
+			}
+			break;
+		}
+	}
+	mutex_unlock(&md->buf_mutex);
+
+	return 0;
+}
+
+/*
+ * maint_hdid_set() - Set the host Device ID
+ * @priv: driver private data
+ * @arg:	Device Id
+ */
+static int maint_hdid_set(struct mport_cdev_priv *priv, void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	uint16_t hdid;
+
+	if (copy_from_user(&hdid, arg, sizeof(uint16_t)))
+		return -EFAULT;
+
+	md->mport->host_deviceid = hdid;
+	md->properties.hdid = hdid;
+	rio_local_set_device_id(md->mport, hdid);
+
+	rmcd_debug(MPORT, "Set host device Id to %d", hdid);
+
+	return 0;
+}
+
+/*
+ * maint_comptag_set() - Set the host Component Tag
+ * @priv: driver private data
+ * @arg:	Component Tag
+ */
+static int maint_comptag_set(struct mport_cdev_priv *priv, void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	uint32_t comptag;
+
+	if (copy_from_user(&comptag, arg, sizeof(uint32_t)))
+		return -EFAULT;
+
+	rio_local_write_config_32(md->mport, RIO_COMPONENT_TAG_CSR, comptag);
+
+	rmcd_debug(MPORT, "Set host Component Tag to %d", comptag);
+
+	return 0;
+}
+
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+
+struct mport_dma_req {
+	struct list_head node;
+	struct file *filp;
+	struct mport_cdev_priv *priv;
+	enum rio_transfer_sync sync;
+	struct sg_table sgt;
+	struct page **page_list;
+	unsigned int nr_pages;
+	struct rio_mport_mapping *map;
+	struct dma_chan *dmach;
+	enum dma_data_direction dir;
+	dma_cookie_t cookie;
+	enum dma_status	status;
+	struct completion req_comp;
+};
+
+struct mport_faf_work {
+	struct work_struct work;
+	struct mport_dma_req *req;
+};
+
+static void mport_release_def_dma(struct kref *dma_ref)
+{
+	struct mport_dev *md =
+			container_of(dma_ref, struct mport_dev, dma_ref);
+
+	rmcd_debug(EXIT, "DMA_%d", md->dma_chan->chan_id);
+	rio_release_dma(md->dma_chan);
+	md->dma_chan = NULL;
+}
+
+static void mport_release_dma(struct kref *dma_ref)
+{
+	struct mport_cdev_priv *priv =
+			container_of(dma_ref, struct mport_cdev_priv, dma_ref);
+
+	rmcd_debug(EXIT, "DMA_%d", priv->dmach->chan_id);
+	complete(&priv->comp);
+}
+
+static void dma_req_free(struct mport_dma_req *req)
+{
+	struct mport_cdev_priv *priv = req->priv;
+	unsigned int i;
+
+	dma_unmap_sg(req->dmach->device->dev,
+		     req->sgt.sgl, req->sgt.nents, req->dir);
+	sg_free_table(&req->sgt);
+	if (req->page_list) {
+		for (i = 0; i < req->nr_pages; i++)
+			put_page(req->page_list[i]);
+		kfree(req->page_list);
+	}
+
+	if (req->map) {
+		mutex_lock(&req->map->md->buf_mutex);
+		kref_put(&req->map->ref, mport_release_mapping);
+		mutex_unlock(&req->map->md->buf_mutex);
+	}
+
+	kref_put(&priv->dma_ref, mport_release_dma);
+
+	kfree(req);
+}
+
+static void dma_xfer_callback(void *param)
+{
+	struct mport_dma_req *req = (struct mport_dma_req *)param;
+	struct mport_cdev_priv *priv = req->priv;
+
+	req->status = dma_async_is_tx_complete(priv->dmach, req->cookie,
+					       NULL, NULL);
+	complete(&req->req_comp);
+}
+
+static void dma_faf_cleanup(struct work_struct *_work)
+{
+	struct mport_faf_work *work = container_of(_work,
+						struct mport_faf_work, work);
+	struct mport_dma_req *req = work->req;
+
+	dma_req_free(req);
+	kfree(work);
+}
+
+static void dma_faf_callback(void *param)
+{
+	struct mport_dma_req *req = (struct mport_dma_req *)param;
+	struct mport_faf_work *work;
+
+	work = kmalloc(sizeof(*work), GFP_ATOMIC);
+	if (!work)
+		return;
+
+	INIT_WORK(&work->work, dma_faf_cleanup);
+	work->req = req;
+	queue_work(dma_wq, &work->work);
+}
+
+/*
+ * prep_dma_xfer() - Configure and send request to DMAengine to prepare DMA
+ *                   transfer object.
+ * Returns pointer to DMA transaction descriptor allocated by DMA driver on
+ * success or ERR_PTR (and/or NULL) if failed. Caller must check returned
+ * non-NULL pointer using IS_ERR macro.
+ */
+static struct dma_async_tx_descriptor
+*prep_dma_xfer(struct dma_chan *chan, struct rio_transfer_io *transfer,
+	struct sg_table *sgt, int nents, enum dma_transfer_direction dir,
+	enum dma_ctrl_flags flags)
+{
+	struct rio_dma_data tx_data;
+
+	tx_data.sg = sgt->sgl;
+	tx_data.sg_len = nents;
+	tx_data.rio_addr_u = 0;
+	tx_data.rio_addr = transfer->rio_addr;
+	if (dir == DMA_MEM_TO_DEV) {
+		switch (transfer->method) {
+		case RIO_EXCHANGE_NWRITE:
+			tx_data.wr_type = RDW_ALL_NWRITE;
+			break;
+		case RIO_EXCHANGE_NWRITE_R_ALL:
+			tx_data.wr_type = RDW_ALL_NWRITE_R;
+			break;
+		case RIO_EXCHANGE_NWRITE_R:
+			tx_data.wr_type = RDW_LAST_NWRITE_R;
+			break;
+		case RIO_EXCHANGE_DEFAULT:
+			tx_data.wr_type = RDW_DEFAULT;
+			break;
+		default:
+			return ERR_PTR(-EINVAL);
+		}
+	}
+
+	return rio_dma_prep_xfer(chan, transfer->rioid, &tx_data, dir, flags);
+}
+
+/* Request DMA channel associated with this mport device.
+ * Try to request DMA channel for every new process that opened given
+ * mport. If a new DMA channel is not available use default channel
+ * which is the first DMA channel opened on mport device.
+ */
+static int get_dma_channel(struct mport_cdev_priv *priv)
+{
+	mutex_lock(&priv->dma_lock);
+	if (!priv->dmach) {
+		priv->dmach = rio_request_mport_dma(priv->md->mport);
+		if (!priv->dmach) {
+			/* Use default DMA channel if available */
+			if (priv->md->dma_chan) {
+				priv->dmach = priv->md->dma_chan;
+				kref_get(&priv->md->dma_ref);
+			} else {
+				rmcd_error("Failed to get DMA channel");
+				mutex_unlock(&priv->dma_lock);
+				return -ENODEV;
+			}
+		} else if (!priv->md->dma_chan) {
+			/* Register default DMA channel if we do not have one */
+			priv->md->dma_chan = priv->dmach;
+			kref_init(&priv->md->dma_ref);
+			rmcd_debug(DMA, "Register DMA_chan %d as default",
+				   priv->dmach->chan_id);
+		}
+
+		kref_init(&priv->dma_ref);
+		init_completion(&priv->comp);
+	}
+
+	kref_get(&priv->dma_ref);
+	mutex_unlock(&priv->dma_lock);
+	return 0;
+}
+
+static void put_dma_channel(struct mport_cdev_priv *priv)
+{
+	kref_put(&priv->dma_ref, mport_release_dma);
+}
+
+/*
+ * DMA transfer functions
+ */
+static int do_dma_request(struct mport_dma_req *req,
+			  struct rio_transfer_io *xfer,
+			  enum rio_transfer_sync sync, int nents)
+{
+	struct mport_cdev_priv *priv;
+	struct sg_table *sgt;
+	struct dma_chan *chan;
+	struct dma_async_tx_descriptor *tx;
+	dma_cookie_t cookie;
+	unsigned long tmo = msecs_to_jiffies(dma_timeout);
+	enum dma_transfer_direction dir;
+	long wret;
+	int ret = 0;
+
+	priv = req->priv;
+	sgt = &req->sgt;
+
+	chan = priv->dmach;
+	dir = (req->dir == DMA_FROM_DEVICE) ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV;
+
+	rmcd_debug(DMA, "%s(%d) uses %s for DMA_%s",
+		   current->comm, task_pid_nr(current),
+		   dev_name(&chan->dev->device),
+		   (dir == DMA_DEV_TO_MEM)?"READ":"WRITE");
+
+	/* Initialize DMA transaction request */
+	tx = prep_dma_xfer(chan, xfer, sgt, nents, dir,
+			   DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
+
+	if (!tx) {
+		rmcd_debug(DMA, "prep error for %s A:0x%llx L:0x%llx",
+			(dir == DMA_DEV_TO_MEM)?"READ":"WRITE",
+			xfer->rio_addr, xfer->length);
+		ret = -EIO;
+		goto err_out;
+	} else if (IS_ERR(tx)) {
+		ret = PTR_ERR(tx);
+		rmcd_debug(DMA, "prep error %d for %s A:0x%llx L:0x%llx", ret,
+			(dir == DMA_DEV_TO_MEM)?"READ":"WRITE",
+			xfer->rio_addr, xfer->length);
+		goto err_out;
+	}
+
+	if (sync == RIO_TRANSFER_FAF)
+		tx->callback = dma_faf_callback;
+	else
+		tx->callback = dma_xfer_callback;
+	tx->callback_param = req;
+
+	req->dmach = chan;
+	req->sync = sync;
+	req->status = DMA_IN_PROGRESS;
+	init_completion(&req->req_comp);
+
+	cookie = dmaengine_submit(tx);
+	req->cookie = cookie;
+
+	rmcd_debug(DMA, "pid=%d DMA_%s tx_cookie = %d", task_pid_nr(current),
+		   (dir == DMA_DEV_TO_MEM)?"READ":"WRITE", cookie);
+
+	if (dma_submit_error(cookie)) {
+		rmcd_error("submit err=%d (addr:0x%llx len:0x%llx)",
+			   cookie, xfer->rio_addr, xfer->length);
+		ret = -EIO;
+		goto err_out;
+	}
+
+	dma_async_issue_pending(chan);
+
+	if (sync == RIO_TRANSFER_ASYNC) {
+		spin_lock(&priv->req_lock);
+		list_add_tail(&req->node, &priv->async_list);
+		spin_unlock(&priv->req_lock);
+		return cookie;
+	} else if (sync == RIO_TRANSFER_FAF)
+		return 0;
+
+	wret = wait_for_completion_interruptible_timeout(&req->req_comp, tmo);
+
+	if (wret == 0) {
+		/* Timeout on wait occurred */
+		rmcd_error("%s(%d) timed out waiting for DMA_%s %d",
+		       current->comm, task_pid_nr(current),
+		       (dir == DMA_DEV_TO_MEM)?"READ":"WRITE", cookie);
+		return -ETIMEDOUT;
+	} else if (wret == -ERESTARTSYS) {
+		/* Wait_for_completion was interrupted by a signal but DMA may
+		 * be in progress
+		 */
+		rmcd_error("%s(%d) wait for DMA_%s %d was interrupted",
+			current->comm, task_pid_nr(current),
+			(dir == DMA_DEV_TO_MEM)?"READ":"WRITE", cookie);
+		return -EINTR;
+	}
+
+	if (req->status != DMA_COMPLETE) {
+		/* DMA transaction completion was signaled with error */
+		rmcd_error("%s(%d) DMA_%s %d completed with status %d (ret=%d)",
+			current->comm, task_pid_nr(current),
+			(dir == DMA_DEV_TO_MEM)?"READ":"WRITE",
+			cookie, req->status, ret);
+		ret = -EIO;
+	}
+
+err_out:
+	return ret;
+}
+
+/*
+ * rio_dma_transfer() - Perform RapidIO DMA data transfer to/from
+ *                      the remote RapidIO device
+ * @filp: file pointer associated with the call
+ * @transfer_mode: DMA transfer mode
+ * @sync: synchronization mode
+ * @dir: DMA transfer direction (DMA_MEM_TO_DEV = write OR
+ *                               DMA_DEV_TO_MEM = read)
+ * @xfer: data transfer descriptor structure
+ */
+static int
+rio_dma_transfer(struct file *filp, uint32_t transfer_mode,
+		 enum rio_transfer_sync sync, enum dma_data_direction dir,
+		 struct rio_transfer_io *xfer)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	unsigned long nr_pages = 0;
+	struct page **page_list = NULL;
+	struct mport_dma_req *req;
+	struct mport_dev *md = priv->md;
+	struct dma_chan *chan;
+	int i, ret;
+	int nents;
+
+	if (xfer->length == 0)
+		return -EINVAL;
+	req = kzalloc(sizeof(*req), GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	ret = get_dma_channel(priv);
+	if (ret) {
+		kfree(req);
+		return ret;
+	}
+
+	/*
+	 * If parameter loc_addr != NULL, we are transferring data from/to
+	 * data buffer allocated in user-space: lock in memory user-space
+	 * buffer pages and build an SG table for DMA transfer request
+	 *
+	 * Otherwise (loc_addr == NULL) contiguous kernel-space buffer is
+	 * used for DMA data transfers: build single entry SG table using
+	 * offset within the internal buffer specified by handle parameter.
+	 */
+	if (xfer->loc_addr) {
+		unsigned long offset;
+		long pinned;
+
+		offset = (unsigned long)xfer->loc_addr & ~PAGE_MASK;
+		nr_pages = PAGE_ALIGN(xfer->length + offset) >> PAGE_SHIFT;
+
+		page_list = kmalloc_array(nr_pages,
+					  sizeof(*page_list), GFP_KERNEL);
+		if (page_list == NULL) {
+			ret = -ENOMEM;
+			goto err_req;
+		}
+
+		down_read(&current->mm->mmap_sem);
+		pinned = get_user_pages(
+				(unsigned long)xfer->loc_addr & PAGE_MASK,
+				nr_pages, dir == DMA_FROM_DEVICE, 0,
+				page_list, NULL);
+		up_read(&current->mm->mmap_sem);
+
+		if (pinned != nr_pages) {
+			if (pinned < 0) {
+				rmcd_error("get_user_pages err=%ld", pinned);
+				nr_pages = 0;
+			} else
+				rmcd_error("pinned %ld out of %ld pages",
+					   pinned, nr_pages);
+			ret = -EFAULT;
+			goto err_pg;
+		}
+
+		ret = sg_alloc_table_from_pages(&req->sgt, page_list, nr_pages,
+					offset, xfer->length, GFP_KERNEL);
+		if (ret) {
+			rmcd_error("sg_alloc_table failed with err=%d", ret);
+			goto err_pg;
+		}
+
+		req->page_list = page_list;
+		req->nr_pages = nr_pages;
+	} else {
+		dma_addr_t baddr;
+		struct rio_mport_mapping *map;
+
+		baddr = (dma_addr_t)xfer->handle;
+
+		mutex_lock(&md->buf_mutex);
+		list_for_each_entry(map, &md->mappings, node) {
+			if (baddr >= map->phys_addr &&
+			    baddr < (map->phys_addr + map->size)) {
+				kref_get(&map->ref);
+				req->map = map;
+				break;
+			}
+		}
+		mutex_unlock(&md->buf_mutex);
+
+		if (req->map == NULL) {
+			ret = -ENOMEM;
+			goto err_req;
+		}
+
+		if (xfer->length + xfer->offset > map->size) {
+			ret = -EINVAL;
+			goto err_req;
+		}
+
+		ret = sg_alloc_table(&req->sgt, 1, GFP_KERNEL);
+		if (unlikely(ret)) {
+			rmcd_error("sg_alloc_table failed for internal buf");
+			goto err_req;
+		}
+
+		sg_set_buf(req->sgt.sgl,
+			   map->virt_addr + (baddr - map->phys_addr) +
+				xfer->offset, xfer->length);
+	}
+
+	req->dir = dir;
+	req->filp = filp;
+	req->priv = priv;
+	chan = priv->dmach;
+
+	nents = dma_map_sg(chan->device->dev,
+			   req->sgt.sgl, req->sgt.nents, dir);
+	if (nents == -EFAULT) {
+		rmcd_error("Failed to map SG list");
+		return -EFAULT;
+	}
+
+	ret = do_dma_request(req, xfer, sync, nents);
+
+	if (ret >= 0) {
+		if (sync == RIO_TRANSFER_SYNC)
+			goto sync_out;
+		return ret; /* return ASYNC cookie */
+	}
+
+	if (ret == -ETIMEDOUT || ret == -EINTR) {
+		/*
+		 * This can happen only in case of SYNC transfer.
+		 * Do not free unfinished request structure immediately.
+		 * Place it into pending list and deal with it later
+		 */
+		spin_lock(&priv->req_lock);
+		list_add_tail(&req->node, &priv->pend_list);
+		spin_unlock(&priv->req_lock);
+		return ret;
+	}
+
+
+	rmcd_debug(DMA, "do_dma_request failed with err=%d", ret);
+sync_out:
+	dma_unmap_sg(chan->device->dev, req->sgt.sgl, req->sgt.nents, dir);
+	sg_free_table(&req->sgt);
+err_pg:
+	if (page_list) {
+		for (i = 0; i < nr_pages; i++)
+			put_page(page_list[i]);
+		kfree(page_list);
+	}
+err_req:
+	if (req->map) {
+		mutex_lock(&md->buf_mutex);
+		kref_put(&req->map->ref, mport_release_mapping);
+		mutex_unlock(&md->buf_mutex);
+	}
+	put_dma_channel(priv);
+	kfree(req);
+	return ret;
+}
+
+static int rio_mport_transfer_ioctl(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct rio_transaction transaction;
+	struct rio_transfer_io *transfer;
+	enum dma_data_direction dir;
+	int i, ret = 0;
+
+	if (unlikely(copy_from_user(&transaction, arg, sizeof(transaction))))
+		return -EFAULT;
+
+	if (transaction.count != 1)
+		return -EINVAL;
+
+	if ((transaction.transfer_mode &
+	     priv->md->properties.transfer_mode) == 0)
+		return -ENODEV;
+
+	transfer = vmalloc(transaction.count * sizeof(struct rio_transfer_io));
+	if (!transfer)
+		return -ENOMEM;
+
+	if (unlikely(copy_from_user(transfer, transaction.block,
+	      transaction.count * sizeof(struct rio_transfer_io)))) {
+		ret = -EFAULT;
+		goto out_free;
+	}
+
+	dir = (transaction.dir == RIO_TRANSFER_DIR_READ) ?
+					DMA_FROM_DEVICE : DMA_TO_DEVICE;
+	for (i = 0; i < transaction.count && ret == 0; i++)
+		ret = rio_dma_transfer(filp, transaction.transfer_mode,
+			transaction.sync, dir, &transfer[i]);
+
+	if (unlikely(copy_to_user(transaction.block, transfer,
+	      transaction.count * sizeof(struct rio_transfer_io))))
+		ret = -EFAULT;
+
+out_free:
+	vfree(transfer);
+
+	return ret;
+}
+
+static int rio_mport_wait_for_async_dma(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv;
+	struct mport_dev *md;
+	struct rio_async_tx_wait w_param;
+	struct mport_dma_req *req;
+	dma_cookie_t cookie;
+	unsigned long tmo;
+	long wret;
+	int found = 0;
+	int ret;
+
+	priv = (struct mport_cdev_priv *)filp->private_data;
+	md = priv->md;
+
+	if (unlikely(copy_from_user(&w_param, arg, sizeof(w_param))))
+		return -EFAULT;
+
+	cookie = w_param.token;
+	if (w_param.timeout)
+		tmo = msecs_to_jiffies(w_param.timeout);
+	else /* Use default DMA timeout */
+		tmo = msecs_to_jiffies(dma_timeout);
+
+	spin_lock(&priv->req_lock);
+	list_for_each_entry(req, &priv->async_list, node) {
+		if (req->cookie == cookie) {
+			list_del(&req->node);
+			found = 1;
+			break;
+		}
+	}
+	spin_unlock(&priv->req_lock);
+
+	if (!found)
+		return -EAGAIN;
+
+	wret = wait_for_completion_interruptible_timeout(&req->req_comp, tmo);
+
+	if (wret == 0) {
+		/* Timeout on wait occurred */
+		rmcd_error("%s(%d) timed out waiting for ASYNC DMA_%s",
+		       current->comm, task_pid_nr(current),
+		       (req->dir == DMA_FROM_DEVICE)?"READ":"WRITE");
+		ret = -ETIMEDOUT;
+		goto err_tmo;
+	} else if (wret == -ERESTARTSYS) {
+		/* Wait_for_completion was interrupted by a signal but DMA may
+		 * be still in progress
+		 */
+		rmcd_error("%s(%d) wait for ASYNC DMA_%s was interrupted",
+			current->comm, task_pid_nr(current),
+			(req->dir == DMA_FROM_DEVICE)?"READ":"WRITE");
+		ret = -EINTR;
+		goto err_tmo;
+	}
+
+	if (req->status != DMA_COMPLETE) {
+		/* DMA transaction completion signaled with transfer error */
+		rmcd_error("%s(%d) ASYNC DMA_%s completion with status %d",
+			current->comm, task_pid_nr(current),
+			(req->dir == DMA_FROM_DEVICE)?"READ":"WRITE",
+			req->status);
+		ret = -EIO;
+	} else
+		ret = 0;
+
+	if (req->status != DMA_IN_PROGRESS && req->status != DMA_PAUSED)
+		dma_req_free(req);
+
+	return ret;
+
+err_tmo:
+	/* Return request back into async queue */
+	spin_lock(&priv->req_lock);
+	list_add_tail(&req->node, &priv->async_list);
+	spin_unlock(&priv->req_lock);
+	return ret;
+}
+
+static int rio_mport_create_dma_mapping(struct mport_dev *md, struct file *filp,
+			uint64_t size, struct rio_mport_mapping **mapping)
+{
+	struct rio_mport_mapping *map;
+
+	map = kzalloc(sizeof(struct rio_mport_mapping), GFP_KERNEL);
+	if (map == NULL)
+		return -ENOMEM;
+
+	map->virt_addr = dma_alloc_coherent(md->mport->dev.parent, size,
+					    &map->phys_addr, GFP_KERNEL);
+	if (map->virt_addr == NULL) {
+		kfree(map);
+		return -ENOMEM;
+	}
+
+	map->dir = MAP_DMA;
+	map->size = size;
+	map->filp = filp;
+	map->md = md;
+	kref_init(&map->ref);
+	mutex_lock(&md->buf_mutex);
+	list_add_tail(&map->node, &md->mappings);
+	mutex_unlock(&md->buf_mutex);
+	*mapping = map;
+
+	return 0;
+}
+
+static int rio_mport_alloc_dma(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md = priv->md;
+	struct rio_dma_mem map;
+	struct rio_mport_mapping *mapping = NULL;
+	int ret;
+
+	if (unlikely(copy_from_user(&map, arg, sizeof(struct rio_dma_mem))))
+		return -EFAULT;
+
+	ret = rio_mport_create_dma_mapping(md, filp, map.length, &mapping);
+	if (ret)
+		return ret;
+
+	map.dma_handle = mapping->phys_addr;
+
+	if (unlikely(copy_to_user(arg, &map, sizeof(struct rio_dma_mem)))) {
+		mutex_lock(&md->buf_mutex);
+		kref_put(&mapping->ref, mport_release_mapping);
+		mutex_unlock(&md->buf_mutex);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int rio_mport_free_dma(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md = priv->md;
+	u64 handle;
+	int ret = -EFAULT;
+	struct rio_mport_mapping *map, *_map;
+
+	if (copy_from_user(&handle, arg, sizeof(u64)))
+		return -EFAULT;
+	rmcd_debug(EXIT, "filp=%p", filp);
+
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry_safe(map, _map, &md->mappings, node) {
+		if (map->dir == MAP_DMA && map->phys_addr == handle &&
+		    map->filp == filp) {
+			kref_put(&map->ref, mport_release_mapping);
+			ret = 0;
+			break;
+		}
+	}
+	mutex_unlock(&md->buf_mutex);
+
+	if (ret == -EFAULT) {
+		rmcd_debug(DMA, "ERR no matching mapping");
+		return ret;
+	}
+
+	return 0;
+}
+#else
+static int rio_mport_transfer_ioctl(struct file *filp, void *arg)
+{
+	return -ENODEV;
+}
+
+static int rio_mport_wait_for_async_dma(struct file *filp, void __user *arg)
+{
+	return -ENODEV;
+}
+
+static int rio_mport_alloc_dma(struct file *filp, void __user *arg)
+{
+	return -ENODEV;
+}
+
+static int rio_mport_free_dma(struct file *filp, void __user *arg)
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_RAPIDIO_DMA_ENGINE */
+
+/*
+ * Inbound/outbound memory mapping functions
+ */
+
+static int
+rio_mport_create_inbound_mapping(struct mport_dev *md, struct file *filp,
+				u64 raddr, u32 size,
+				struct rio_mport_mapping **mapping)
+{
+	struct rio_mport *mport = md->mport;
+	struct rio_mport_mapping *map;
+	int ret;
+
+	map = kzalloc(sizeof(struct rio_mport_mapping), GFP_KERNEL);
+	if (map == NULL)
+		return -ENOMEM;
+
+	map->virt_addr = dma_alloc_coherent(mport->dev.parent, size,
+					    &map->phys_addr, GFP_KERNEL);
+	if (map->virt_addr == NULL) {
+		ret = -ENOMEM;
+		goto err_dma_alloc;
+	}
+
+	if (raddr == RIO_MAP_ANY_ADDR)
+		raddr = map->phys_addr;
+	ret = rio_map_inb_region(mport, map->phys_addr, raddr, size, 0);
+	if (ret < 0)
+		goto err_map_inb;
+
+	map->dir = MAP_INBOUND;
+	map->rio_addr = raddr;
+	map->size = size;
+	map->filp = filp;
+	map->md = md;
+	kref_init(&map->ref);
+	mutex_lock(&md->buf_mutex);
+	list_add_tail(&map->node, &md->mappings);
+	mutex_unlock(&md->buf_mutex);
+	*mapping = map;
+	return 0;
+
+err_map_inb:
+	dma_free_coherent(mport->dev.parent, size,
+			  map->virt_addr, map->phys_addr);
+err_dma_alloc:
+	kfree(map);
+	return ret;
+}
+
+static int
+rio_mport_get_inbound_mapping(struct mport_dev *md, struct file *filp,
+			      u64 raddr, u32 size,
+			      struct rio_mport_mapping **mapping)
+{
+	struct rio_mport_mapping *map;
+	int err = -ENOMEM;
+
+	if (raddr == RIO_MAP_ANY_ADDR)
+		goto get_new;
+
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry(map, &md->mappings, node) {
+		if (map->dir != MAP_INBOUND)
+			continue;
+		if (raddr == map->rio_addr && size == map->size) {
+			/* allow exact match only */
+			*mapping = map;
+			err = 0;
+			break;
+		} else if (raddr < (map->rio_addr + map->size - 1) &&
+			   (raddr + size) > map->rio_addr) {
+			err = -EBUSY;
+			break;
+		}
+	}
+	mutex_unlock(&md->buf_mutex);
+
+	if (err != -ENOMEM)
+		return err;
+get_new:
+	/* not found, create new */
+	return rio_mport_create_inbound_mapping(md, filp, raddr, size, mapping);
+}
+
+static int rio_mport_map_inbound(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md = priv->md;
+	struct rio_mmap map;
+	struct rio_mport_mapping *mapping = NULL;
+	int ret;
+
+	if (!md->mport->ops->map_inb)
+		return -EPROTONOSUPPORT;
+	if (unlikely(copy_from_user(&map, arg, sizeof(struct rio_mmap))))
+		return -EFAULT;
+
+	rmcd_debug(IBW, "%s filp=%p", dev_name(&priv->md->dev), filp);
+
+	ret = rio_mport_get_inbound_mapping(md, filp, map.rio_addr,
+					    map.length, &mapping);
+	if (ret)
+		return ret;
+
+	map.handle = mapping->phys_addr;
+	map.rio_addr = mapping->rio_addr;
+
+	if (unlikely(copy_to_user(arg, &map, sizeof(struct rio_mmap)))) {
+		/* Delete mapping if it was created by this request */
+		if (ret == 0 && mapping->filp == filp) {
+			mutex_lock(&md->buf_mutex);
+			kref_put(&mapping->ref, mport_release_mapping);
+			mutex_unlock(&md->buf_mutex);
+		}
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/*
+ * rio_mport_inbound_free() - unmap from RapidIO address space and free
+ *                    previously allocated inbound DMA coherent buffer
+ * @priv: driver private data
+ * @arg:  buffer handle returned by allocation routine
+ */
+static int rio_mport_inbound_free(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md = priv->md;
+	u64 handle;
+	struct rio_mport_mapping *map, *_map;
+
+	rmcd_debug(IBW, "%s filp=%p", dev_name(&priv->md->dev), filp);
+
+	if (!md->mport->ops->unmap_inb)
+		return -EPROTONOSUPPORT;
+
+	if (copy_from_user(&handle, arg, sizeof(u64)))
+		return -EFAULT;
+
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry_safe(map, _map, &md->mappings, node) {
+		if (map->dir == MAP_INBOUND && map->phys_addr == handle) {
+			if (map->filp == filp) {
+				map->filp = NULL;
+				kref_put(&map->ref, mport_release_mapping);
+			}
+			break;
+		}
+	}
+	mutex_unlock(&md->buf_mutex);
+
+	return 0;
+}
+
+/*
+ * maint_port_idx_get() - Get the port index of the mport instance
+ * @priv: driver private data
+ * @arg:  port index
+ */
+static int maint_port_idx_get(struct mport_cdev_priv *priv, void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	uint32_t port_idx = md->mport->index;
+
+	rmcd_debug(MPORT, "port_index=%d", port_idx);
+
+	if (copy_to_user(arg, &port_idx, sizeof(port_idx)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int rio_mport_add_event(struct mport_cdev_priv *priv,
+			       struct rio_event *event)
+{
+	int overflow;
+
+	if (!(priv->event_mask & event->header))
+		return -EACCES;
+
+	spin_lock(&priv->fifo_lock);
+	overflow = kfifo_avail(&priv->event_fifo) < sizeof(*event)
+		|| kfifo_in(&priv->event_fifo, (unsigned char *)event,
+			sizeof(*event)) != sizeof(*event);
+	spin_unlock(&priv->fifo_lock);
+
+	wake_up_interruptible(&priv->event_rx_wait);
+
+	if (overflow) {
+		dev_warn(&priv->md->dev, DRV_NAME ": event fifo overflow\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static void rio_mport_doorbell_handler(struct rio_mport *mport, void *dev_id,
+				       u16 src, u16 dst, u16 info)
+{
+	struct mport_dev *data = dev_id;
+	struct mport_cdev_priv *priv;
+	struct rio_mport_db_filter *db_filter;
+	struct rio_event event;
+	int handled;
+
+	event.header = RIO_DOORBELL;
+	event.u.doorbell.rioid = src;
+	event.u.doorbell.payload = info;
+
+	handled = 0;
+	spin_lock(&data->db_lock);
+	list_for_each_entry(db_filter, &data->doorbells, data_node) {
+		if (((db_filter->filter.rioid == 0xffffffff ||
+		      db_filter->filter.rioid == src)) &&
+		      info >= db_filter->filter.low &&
+		      info <= db_filter->filter.high) {
+			priv = db_filter->priv;
+			rio_mport_add_event(priv, &event);
+			handled = 1;
+		}
+	}
+	spin_unlock(&data->db_lock);
+
+	if (!handled)
+		dev_warn(&data->dev,
+			"%s: spurious DB received from 0x%x, info=0x%04x\n",
+			__func__, src, info);
+}
+
+static int rio_mport_add_db_filter(struct mport_cdev_priv *priv,
+				   void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	struct rio_mport_db_filter *db_filter;
+	struct rio_doorbell_filter filter;
+	unsigned long flags;
+	int ret;
+
+	if (copy_from_user(&filter, arg, sizeof(filter)))
+		return -EFAULT;
+
+	if (filter.low > filter.high)
+		return -EINVAL;
+
+	ret = rio_request_inb_dbell(md->mport, md, filter.low, filter.high,
+				    rio_mport_doorbell_handler);
+	if (ret) {
+		rmcd_error("%s failed to register IBDB, err=%d",
+			   dev_name(&md->dev), ret);
+		return ret;
+	}
+
+	db_filter = kzalloc(sizeof(*db_filter), GFP_KERNEL);
+	if (db_filter == NULL) {
+		rio_release_inb_dbell(md->mport, filter.low, filter.high);
+		return -ENOMEM;
+	}
+
+	db_filter->filter = filter;
+	db_filter->priv = priv;
+	spin_lock_irqsave(&md->db_lock, flags);
+	list_add_tail(&db_filter->priv_node, &priv->db_filters);
+	list_add_tail(&db_filter->data_node, &md->doorbells);
+	spin_unlock_irqrestore(&md->db_lock, flags);
+
+	return 0;
+}
+
+static void rio_mport_delete_db_filter(struct rio_mport_db_filter *db_filter)
+{
+	list_del(&db_filter->data_node);
+	list_del(&db_filter->priv_node);
+	kfree(db_filter);
+}
+
+static int rio_mport_remove_db_filter(struct mport_cdev_priv *priv,
+				      void __user *arg)
+{
+	struct rio_mport_db_filter *db_filter;
+	struct rio_doorbell_filter filter;
+	unsigned long flags;
+	int ret = -EINVAL;
+
+	if (copy_from_user(&filter, arg, sizeof(filter)))
+		return -EFAULT;
+
+	spin_lock_irqsave(&priv->md->db_lock, flags);
+	list_for_each_entry(db_filter, &priv->db_filters, priv_node) {
+		if (db_filter->filter.rioid == filter.rioid &&
+		    db_filter->filter.low == filter.low &&
+		    db_filter->filter.high == filter.high) {
+			rio_mport_delete_db_filter(db_filter);
+			ret = 0;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&priv->md->db_lock, flags);
+
+	if (!ret)
+		rio_release_inb_dbell(priv->md->mport, filter.low, filter.high);
+
+	return ret;
+}
+
+static int rio_mport_match_pw(union rio_pw_msg *msg,
+			      struct rio_pw_filter *filter)
+{
+	if ((msg->em.comptag & filter->mask) < filter->low ||
+		(msg->em.comptag & filter->mask) > filter->high)
+		return 0;
+	return 1;
+}
+
+static int rio_mport_pw_handler(struct rio_mport *mport, void *context,
+				union rio_pw_msg *msg, int step)
+{
+	struct mport_dev *md = context;
+	struct mport_cdev_priv *priv;
+	struct rio_mport_pw_filter *pw_filter;
+	struct rio_event event;
+	int handled;
+
+	event.header = RIO_PORTWRITE;
+	memcpy(event.u.portwrite.payload, msg->raw, RIO_PW_MSG_SIZE);
+
+	handled = 0;
+	spin_lock(&md->pw_lock);
+	list_for_each_entry(pw_filter, &md->portwrites, md_node) {
+		if (rio_mport_match_pw(msg, &pw_filter->filter)) {
+			priv = pw_filter->priv;
+			rio_mport_add_event(priv, &event);
+			handled = 1;
+		}
+	}
+	spin_unlock(&md->pw_lock);
+
+	if (!handled) {
+		printk_ratelimited(KERN_WARNING DRV_NAME
+			": mport%d received spurious PW from 0x%08x\n",
+			mport->id, msg->em.comptag);
+	}
+
+	return 0;
+}
+
+static int rio_mport_add_pw_filter(struct mport_cdev_priv *priv,
+				   void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	struct rio_mport_pw_filter *pw_filter;
+	struct rio_pw_filter filter;
+	unsigned long flags;
+	int hadd = 0;
+
+	if (copy_from_user(&filter, arg, sizeof(filter)))
+		return -EFAULT;
+
+	pw_filter = kzalloc(sizeof(*pw_filter), GFP_KERNEL);
+	if (pw_filter == NULL)
+		return -ENOMEM;
+
+	pw_filter->filter = filter;
+	pw_filter->priv = priv;
+	spin_lock_irqsave(&md->pw_lock, flags);
+	if (list_empty(&md->portwrites))
+		hadd = 1;
+	list_add_tail(&pw_filter->priv_node, &priv->pw_filters);
+	list_add_tail(&pw_filter->md_node, &md->portwrites);
+	spin_unlock_irqrestore(&md->pw_lock, flags);
+
+	if (hadd) {
+		int ret;
+
+		ret = rio_add_mport_pw_handler(md->mport, md,
+					       rio_mport_pw_handler);
+		if (ret) {
+			dev_err(&md->dev,
+				"%s: failed to add IB_PW handler, err=%d\n",
+				__func__, ret);
+			return ret;
+		}
+		rio_pw_enable(md->mport, 1);
+	}
+
+	return 0;
+}
+
+static void rio_mport_delete_pw_filter(struct rio_mport_pw_filter *pw_filter)
+{
+	list_del(&pw_filter->md_node);
+	list_del(&pw_filter->priv_node);
+	kfree(pw_filter);
+}
+
+static int rio_mport_match_pw_filter(struct rio_pw_filter *a,
+				     struct rio_pw_filter *b)
+{
+	if ((a->mask == b->mask) && (a->low == b->low) && (a->high == b->high))
+		return 1;
+	return 0;
+}
+
+static int rio_mport_remove_pw_filter(struct mport_cdev_priv *priv,
+				      void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	struct rio_mport_pw_filter *pw_filter;
+	struct rio_pw_filter filter;
+	unsigned long flags;
+	int ret = -EINVAL;
+	int hdel = 0;
+
+	if (copy_from_user(&filter, arg, sizeof(filter)))
+		return -EFAULT;
+
+	spin_lock_irqsave(&md->pw_lock, flags);
+	list_for_each_entry(pw_filter, &priv->pw_filters, priv_node) {
+		if (rio_mport_match_pw_filter(&pw_filter->filter, &filter)) {
+			rio_mport_delete_pw_filter(pw_filter);
+			ret = 0;
+			break;
+		}
+	}
+
+	if (list_empty(&md->portwrites))
+		hdel = 1;
+	spin_unlock_irqrestore(&md->pw_lock, flags);
+
+	if (hdel) {
+		rio_del_mport_pw_handler(md->mport, priv->md,
+					 rio_mport_pw_handler);
+		rio_pw_enable(md->mport, 0);
+	}
+
+	return ret;
+}
+
+/*
+ * rio_release_dev - release routine for kernel RIO device object
+ * @dev: kernel device object associated with a RIO device structure
+ *
+ * Frees a RIO device struct associated a RIO device struct.
+ * The RIO device struct is freed.
+ */
+static void rio_release_dev(struct device *dev)
+{
+	struct rio_dev *rdev;
+
+	rdev = to_rio_dev(dev);
+	pr_info(DRV_PREFIX "%s: %s\n", __func__, rio_name(rdev));
+	kfree(rdev);
+}
+
+
+static void rio_release_net(struct device *dev)
+{
+	struct rio_net *net;
+
+	net = to_rio_net(dev);
+	rmcd_debug(RDEV, "net_%d", net->id);
+	kfree(net);
+}
+
+
+/*
+ * rio_mport_add_riodev - creates a kernel RIO device object
+ *
+ * Allocates a RIO device data structure and initializes required fields based
+ * on device's configuration space contents.
+ * If the device has switch capabilities, then a switch specific portion is
+ * allocated and configured.
+ */
+static int rio_mport_add_riodev(struct mport_cdev_priv *priv,
+				   void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	struct rio_rdev_info dev_info;
+	struct rio_dev *rdev;
+	struct rio_switch *rswitch = NULL;
+	struct rio_mport *mport;
+	size_t size;
+	u32 rval;
+	u32 swpinfo = 0;
+	u16 destid;
+	u8 hopcount;
+	int err;
+
+	if (copy_from_user(&dev_info, arg, sizeof(dev_info)))
+		return -EFAULT;
+
+	rmcd_debug(RDEV, "name:%s ct:0x%x did:0x%x hc:0x%x", dev_info.name,
+		   dev_info.comptag, dev_info.destid, dev_info.hopcount);
+
+	if (bus_find_device_by_name(&rio_bus_type, NULL, dev_info.name)) {
+		rmcd_debug(RDEV, "device %s already exists", dev_info.name);
+		return -EEXIST;
+	}
+
+	size = sizeof(struct rio_dev);
+	mport = md->mport;
+	destid = (u16)dev_info.destid;
+	hopcount = (u8)dev_info.hopcount;
+
+	if (rio_mport_read_config_32(mport, destid, hopcount,
+				     RIO_PEF_CAR, &rval))
+		return -EIO;
+
+	if (rval & RIO_PEF_SWITCH) {
+		rio_mport_read_config_32(mport, destid, hopcount,
+					 RIO_SWP_INFO_CAR, &swpinfo);
+		size += (RIO_GET_TOTAL_PORTS(swpinfo) *
+			 sizeof(rswitch->nextdev[0])) + sizeof(*rswitch);
+	}
+
+	rdev = kzalloc(size, GFP_KERNEL);
+	if (rdev == NULL)
+		return -ENOMEM;
+
+	if (mport->net == NULL) {
+		struct rio_net *net;
+
+		net = rio_alloc_net(mport);
+		if (!net) {
+			err = -ENOMEM;
+			rmcd_debug(RDEV, "failed to allocate net object");
+			goto cleanup;
+		}
+
+		net->id = mport->id;
+		net->hport = mport;
+		dev_set_name(&net->dev, "rnet_%d", net->id);
+		net->dev.parent = &mport->dev;
+		net->dev.release = rio_release_net;
+		err = rio_add_net(net);
+		if (err) {
+			rmcd_debug(RDEV, "failed to register net, err=%d", err);
+			kfree(net);
+			goto cleanup;
+		}
+	}
+
+	rdev->net = mport->net;
+	rdev->pef = rval;
+	rdev->swpinfo = swpinfo;
+	rio_mport_read_config_32(mport, destid, hopcount,
+				 RIO_DEV_ID_CAR, &rval);
+	rdev->did = rval >> 16;
+	rdev->vid = rval & 0xffff;
+	rio_mport_read_config_32(mport, destid, hopcount, RIO_DEV_INFO_CAR,
+				 &rdev->device_rev);
+	rio_mport_read_config_32(mport, destid, hopcount, RIO_ASM_ID_CAR,
+				 &rval);
+	rdev->asm_did = rval >> 16;
+	rdev->asm_vid = rval & 0xffff;
+	rio_mport_read_config_32(mport, destid, hopcount, RIO_ASM_INFO_CAR,
+				 &rval);
+	rdev->asm_rev = rval >> 16;
+
+	if (rdev->pef & RIO_PEF_EXT_FEATURES) {
+		rdev->efptr = rval & 0xffff;
+		rdev->phys_efptr = rio_mport_get_physefb(mport, 0, destid,
+							 hopcount);
+
+		rdev->em_efptr = rio_mport_get_feature(mport, 0, destid,
+						hopcount, RIO_EFB_ERR_MGMNT);
+	}
+
+	rio_mport_read_config_32(mport, destid, hopcount, RIO_SRC_OPS_CAR,
+				 &rdev->src_ops);
+	rio_mport_read_config_32(mport, destid, hopcount, RIO_DST_OPS_CAR,
+				 &rdev->dst_ops);
+
+	rdev->comp_tag = dev_info.comptag;
+	rdev->destid = destid;
+	/* hopcount is stored as specified by a caller, regardles of EP or SW */
+	rdev->hopcount = hopcount;
+
+	if (rdev->pef & RIO_PEF_SWITCH) {
+		rswitch = rdev->rswitch;
+		rswitch->route_table = NULL;
+	}
+
+	if (strlen(dev_info.name))
+		dev_set_name(&rdev->dev, "%s", dev_info.name);
+	else if (rdev->pef & RIO_PEF_SWITCH)
+		dev_set_name(&rdev->dev, "%02x:s:%04x", mport->id,
+			     rdev->comp_tag & RIO_CTAG_UDEVID);
+	else
+		dev_set_name(&rdev->dev, "%02x:e:%04x", mport->id,
+			     rdev->comp_tag & RIO_CTAG_UDEVID);
+
+	INIT_LIST_HEAD(&rdev->net_list);
+	rdev->dev.parent = &mport->net->dev;
+	rio_attach_device(rdev);
+	rdev->dev.release = rio_release_dev;
+
+	if (rdev->dst_ops & RIO_DST_OPS_DOORBELL)
+		rio_init_dbell_res(&rdev->riores[RIO_DOORBELL_RESOURCE],
+				   0, 0xffff);
+	err = rio_add_device(rdev);
+	if (err)
+		goto cleanup;
+	rio_dev_get(rdev);
+
+	return 0;
+cleanup:
+	kfree(rdev);
+	return err;
+}
+
+static int rio_mport_del_riodev(struct mport_cdev_priv *priv, void __user *arg)
+{
+	struct rio_rdev_info dev_info;
+	struct rio_dev *rdev = NULL;
+	struct device  *dev;
+	struct rio_mport *mport;
+	struct rio_net *net;
+
+	if (copy_from_user(&dev_info, arg, sizeof(dev_info)))
+		return -EFAULT;
+
+	mport = priv->md->mport;
+
+	/* If device name is specified, removal by name has priority */
+	if (strlen(dev_info.name)) {
+		dev = bus_find_device_by_name(&rio_bus_type, NULL,
+					      dev_info.name);
+		if (dev)
+			rdev = to_rio_dev(dev);
+	} else {
+		do {
+			rdev = rio_get_comptag(dev_info.comptag, rdev);
+			if (rdev && rdev->dev.parent == &mport->net->dev &&
+			    rdev->destid == (u16)dev_info.destid &&
+			    rdev->hopcount == (u8)dev_info.hopcount)
+				break;
+		} while (rdev);
+	}
+
+	if (!rdev) {
+		rmcd_debug(RDEV,
+			"device name:%s ct:0x%x did:0x%x hc:0x%x not found",
+			dev_info.name, dev_info.comptag, dev_info.destid,
+			dev_info.hopcount);
+		return -ENODEV;
+	}
+
+	net = rdev->net;
+	rio_dev_put(rdev);
+	rio_del_device(rdev, RIO_DEVICE_SHUTDOWN);
+
+	if (list_empty(&net->devices)) {
+		rio_free_net(net);
+		mport->net = NULL;
+	}
+
+	return 0;
+}
+
+/*
+ * Mport cdev management
+ */
+
+/*
+ * mport_cdev_open() - Open character device (mport)
+ */
+static int mport_cdev_open(struct inode *inode, struct file *filp)
+{
+	int ret;
+	int minor = iminor(inode);
+	struct mport_dev *chdev;
+	struct mport_cdev_priv *priv;
+
+	/* Test for valid device */
+	if (minor >= RIO_MAX_MPORTS) {
+		rmcd_error("Invalid minor device number");
+		return -EINVAL;
+	}
+
+	chdev = container_of(inode->i_cdev, struct mport_dev, cdev);
+
+	rmcd_debug(INIT, "%s filp=%p", dev_name(&chdev->dev), filp);
+
+	if (atomic_read(&chdev->active) == 0)
+		return -ENODEV;
+
+	get_device(&chdev->dev);
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		put_device(&chdev->dev);
+		return -ENOMEM;
+	}
+
+	priv->md = chdev;
+
+	mutex_lock(&chdev->file_mutex);
+	list_add_tail(&priv->list, &chdev->file_list);
+	mutex_unlock(&chdev->file_mutex);
+
+	INIT_LIST_HEAD(&priv->db_filters);
+	INIT_LIST_HEAD(&priv->pw_filters);
+	spin_lock_init(&priv->fifo_lock);
+	init_waitqueue_head(&priv->event_rx_wait);
+	ret = kfifo_alloc(&priv->event_fifo,
+			  sizeof(struct rio_event) * MPORT_EVENT_DEPTH,
+			  GFP_KERNEL);
+	if (ret < 0) {
+		dev_err(&chdev->dev, DRV_NAME ": kfifo_alloc failed\n");
+		ret = -ENOMEM;
+		goto err_fifo;
+	}
+
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+	INIT_LIST_HEAD(&priv->async_list);
+	INIT_LIST_HEAD(&priv->pend_list);
+	spin_lock_init(&priv->req_lock);
+	mutex_init(&priv->dma_lock);
+#endif
+
+	filp->private_data = priv;
+	goto out;
+err_fifo:
+	kfree(priv);
+out:
+	return ret;
+}
+
+static int mport_cdev_fasync(int fd, struct file *filp, int mode)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+
+	return fasync_helper(fd, filp, mode, &priv->async_queue);
+}
+
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+static void mport_cdev_release_dma(struct file *filp)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md;
+	struct mport_dma_req *req, *req_next;
+	unsigned long tmo = msecs_to_jiffies(dma_timeout);
+	long wret;
+	LIST_HEAD(list);
+
+	rmcd_debug(EXIT, "from filp=%p %s(%d)",
+		   filp, current->comm, task_pid_nr(current));
+
+	if (!priv->dmach) {
+		rmcd_debug(EXIT, "No DMA channel for filp=%p", filp);
+		return;
+	}
+
+	md = priv->md;
+
+	flush_workqueue(dma_wq);
+
+	spin_lock(&priv->req_lock);
+	if (!list_empty(&priv->async_list)) {
+		rmcd_debug(EXIT, "async list not empty filp=%p %s(%d)",
+			   filp, current->comm, task_pid_nr(current));
+		list_splice_init(&priv->async_list, &list);
+	}
+	spin_unlock(&priv->req_lock);
+
+	if (!list_empty(&list)) {
+		rmcd_debug(EXIT, "temp list not empty");
+		list_for_each_entry_safe(req, req_next, &list, node) {
+			rmcd_debug(EXIT, "free req->filp=%p cookie=%d compl=%s",
+				   req->filp, req->cookie,
+				   completion_done(&req->req_comp)?"yes":"no");
+			list_del(&req->node);
+			dma_req_free(req);
+		}
+	}
+
+	if (!list_empty(&priv->pend_list)) {
+		rmcd_debug(EXIT, "Free pending DMA requests for filp=%p %s(%d)",
+			   filp, current->comm, task_pid_nr(current));
+		list_for_each_entry_safe(req,
+					 req_next, &priv->pend_list, node) {
+			rmcd_debug(EXIT, "free req->filp=%p cookie=%d compl=%s",
+				   req->filp, req->cookie,
+				   completion_done(&req->req_comp)?"yes":"no");
+			list_del(&req->node);
+			dma_req_free(req);
+		}
+	}
+
+	put_dma_channel(priv);
+	wret = wait_for_completion_interruptible_timeout(&priv->comp, tmo);
+
+	if (wret <= 0) {
+		rmcd_error("%s(%d) failed waiting for DMA release err=%ld",
+			current->comm, task_pid_nr(current), wret);
+	}
+
+	spin_lock(&priv->req_lock);
+
+	if (!list_empty(&priv->pend_list)) {
+		rmcd_debug(EXIT, "ATTN: pending DMA requests, filp=%p %s(%d)",
+			   filp, current->comm, task_pid_nr(current));
+	}
+
+	spin_unlock(&priv->req_lock);
+
+	if (priv->dmach != priv->md->dma_chan) {
+		rmcd_debug(EXIT, "Release DMA channel for filp=%p %s(%d)",
+			   filp, current->comm, task_pid_nr(current));
+		rio_release_dma(priv->dmach);
+	} else {
+		rmcd_debug(EXIT, "Adjust default DMA channel refcount");
+		kref_put(&md->dma_ref, mport_release_def_dma);
+	}
+
+	priv->dmach = NULL;
+}
+#else
+#define mport_cdev_release_dma(priv) do {} while (0)
+#endif
+
+/*
+ * mport_cdev_release() - Release character device
+ */
+static int mport_cdev_release(struct inode *inode, struct file *filp)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *chdev;
+	struct rio_mport_pw_filter *pw_filter, *pw_filter_next;
+	struct rio_mport_db_filter *db_filter, *db_filter_next;
+	struct rio_mport_mapping *map, *_map;
+	unsigned long flags;
+
+	rmcd_debug(EXIT, "%s filp=%p", dev_name(&priv->md->dev), filp);
+
+	chdev = priv->md;
+	mport_cdev_release_dma(filp);
+
+	priv->event_mask = 0;
+
+	spin_lock_irqsave(&chdev->pw_lock, flags);
+	if (!list_empty(&priv->pw_filters)) {
+		list_for_each_entry_safe(pw_filter, pw_filter_next,
+					 &priv->pw_filters, priv_node)
+			rio_mport_delete_pw_filter(pw_filter);
+	}
+	spin_unlock_irqrestore(&chdev->pw_lock, flags);
+
+	spin_lock_irqsave(&chdev->db_lock, flags);
+	list_for_each_entry_safe(db_filter, db_filter_next,
+				 &priv->db_filters, priv_node) {
+		rio_mport_delete_db_filter(db_filter);
+	}
+	spin_unlock_irqrestore(&chdev->db_lock, flags);
+
+	kfifo_free(&priv->event_fifo);
+
+	mutex_lock(&chdev->buf_mutex);
+	list_for_each_entry_safe(map, _map, &chdev->mappings, node) {
+		if (map->filp == filp) {
+			rmcd_debug(EXIT, "release mapping %p filp=%p",
+				   map->virt_addr, filp);
+			kref_put(&map->ref, mport_release_mapping);
+		}
+	}
+	mutex_unlock(&chdev->buf_mutex);
+
+	mport_cdev_fasync(-1, filp, 0);
+	filp->private_data = NULL;
+	mutex_lock(&chdev->file_mutex);
+	list_del(&priv->list);
+	mutex_unlock(&chdev->file_mutex);
+	put_device(&chdev->dev);
+	kfree(priv);
+	return 0;
+}
+
+/*
+ * mport_cdev_ioctl() - IOCTLs for character device
+ */
+static long mport_cdev_ioctl(struct file *filp,
+		unsigned int cmd, unsigned long arg)
+{
+	int err = -EINVAL;
+	struct mport_cdev_priv *data = filp->private_data;
+	struct mport_dev *md = data->md;
+
+	if (atomic_read(&md->active) == 0)
+		return -ENODEV;
+
+	switch (cmd) {
+	case RIO_MPORT_MAINT_READ_LOCAL:
+		return rio_mport_maint_rd(data, (void __user *)arg, 1);
+	case RIO_MPORT_MAINT_WRITE_LOCAL:
+		return rio_mport_maint_wr(data, (void __user *)arg, 1);
+	case RIO_MPORT_MAINT_READ_REMOTE:
+		return rio_mport_maint_rd(data, (void __user *)arg, 0);
+	case RIO_MPORT_MAINT_WRITE_REMOTE:
+		return rio_mport_maint_wr(data, (void __user *)arg, 0);
+	case RIO_MPORT_MAINT_HDID_SET:
+		return maint_hdid_set(data, (void __user *)arg);
+	case RIO_MPORT_MAINT_COMPTAG_SET:
+		return maint_comptag_set(data, (void __user *)arg);
+	case RIO_MPORT_MAINT_PORT_IDX_GET:
+		return maint_port_idx_get(data, (void __user *)arg);
+	case RIO_MPORT_GET_PROPERTIES:
+		md->properties.hdid = md->mport->host_deviceid;
+		if (copy_to_user((void __user *)arg, &(data->md->properties),
+				 sizeof(data->md->properties)))
+			return -EFAULT;
+		return 0;
+	case RIO_ENABLE_DOORBELL_RANGE:
+		return rio_mport_add_db_filter(data, (void __user *)arg);
+	case RIO_DISABLE_DOORBELL_RANGE:
+		return rio_mport_remove_db_filter(data, (void __user *)arg);
+	case RIO_ENABLE_PORTWRITE_RANGE:
+		return rio_mport_add_pw_filter(data, (void __user *)arg);
+	case RIO_DISABLE_PORTWRITE_RANGE:
+		return rio_mport_remove_pw_filter(data, (void __user *)arg);
+	case RIO_SET_EVENT_MASK:
+		data->event_mask = arg;
+		return 0;
+	case RIO_GET_EVENT_MASK:
+		if (copy_to_user((void __user *)arg, &data->event_mask,
+				    sizeof(data->event_mask)))
+			return -EFAULT;
+		return 0;
+	case RIO_MAP_OUTBOUND:
+		return rio_mport_obw_map(filp, (void __user *)arg);
+	case RIO_MAP_INBOUND:
+		return rio_mport_map_inbound(filp, (void __user *)arg);
+	case RIO_UNMAP_OUTBOUND:
+		return rio_mport_obw_free(filp, (void __user *)arg);
+	case RIO_UNMAP_INBOUND:
+		return rio_mport_inbound_free(filp, (void __user *)arg);
+	case RIO_ALLOC_DMA:
+		return rio_mport_alloc_dma(filp, (void __user *)arg);
+	case RIO_FREE_DMA:
+		return rio_mport_free_dma(filp, (void __user *)arg);
+	case RIO_WAIT_FOR_ASYNC:
+		return rio_mport_wait_for_async_dma(filp, (void __user *)arg);
+	case RIO_TRANSFER:
+		return rio_mport_transfer_ioctl(filp, (void __user *)arg);
+	case RIO_DEV_ADD:
+		return rio_mport_add_riodev(data, (void __user *)arg);
+	case RIO_DEV_DEL:
+		return rio_mport_del_riodev(data, (void __user *)arg);
+	default:
+		break;
+	}
+
+	return err;
+}
+
+/*
+ * mport_release_mapping - free mapping resources and info structure
+ * @ref: a pointer to the kref within struct rio_mport_mapping
+ *
+ * NOTE: Shall be called while holding buf_mutex.
+ */
+static void mport_release_mapping(struct kref *ref)
+{
+	struct rio_mport_mapping *map =
+			container_of(ref, struct rio_mport_mapping, ref);
+	struct rio_mport *mport = map->md->mport;
+
+	rmcd_debug(MMAP, "type %d mapping @ %p (phys = %pad) for %s",
+		   map->dir, map->virt_addr,
+		   &map->phys_addr, mport->name);
+
+	list_del(&map->node);
+
+	switch (map->dir) {
+	case MAP_INBOUND:
+		rio_unmap_inb_region(mport, map->phys_addr);
+	case MAP_DMA:
+		dma_free_coherent(mport->dev.parent, map->size,
+				  map->virt_addr, map->phys_addr);
+		break;
+	case MAP_OUTBOUND:
+		rio_unmap_outb_region(mport, map->rioid, map->rio_addr);
+		break;
+	}
+	kfree(map);
+}
+
+static void mport_mm_open(struct vm_area_struct *vma)
+{
+	struct rio_mport_mapping *map = vma->vm_private_data;
+
+rmcd_debug(MMAP, "0x%pad", &map->phys_addr);
+	kref_get(&map->ref);
+}
+
+static void mport_mm_close(struct vm_area_struct *vma)
+{
+	struct rio_mport_mapping *map = vma->vm_private_data;
+
+rmcd_debug(MMAP, "0x%pad", &map->phys_addr);
+	mutex_lock(&map->md->buf_mutex);
+	kref_put(&map->ref, mport_release_mapping);
+	mutex_unlock(&map->md->buf_mutex);
+}
+
+static const struct vm_operations_struct vm_ops = {
+	.open =	mport_mm_open,
+	.close = mport_mm_close,
+};
+
+static int mport_cdev_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md;
+	size_t size = vma->vm_end - vma->vm_start;
+	dma_addr_t baddr;
+	unsigned long offset;
+	int found = 0, ret;
+	struct rio_mport_mapping *map;
+
+	rmcd_debug(MMAP, "0x%x bytes at offset 0x%lx",
+		   (unsigned int)size, vma->vm_pgoff);
+
+	md = priv->md;
+	baddr = ((dma_addr_t)vma->vm_pgoff << PAGE_SHIFT);
+
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry(map, &md->mappings, node) {
+		if (baddr >= map->phys_addr &&
+		    baddr < (map->phys_addr + map->size)) {
+			found = 1;
+			break;
+		}
+	}
+	mutex_unlock(&md->buf_mutex);
+
+	if (!found)
+		return -ENOMEM;
+
+	offset = baddr - map->phys_addr;
+
+	if (size + offset > map->size)
+		return -EINVAL;
+
+	vma->vm_pgoff = offset >> PAGE_SHIFT;
+	rmcd_debug(MMAP, "MMAP adjusted offset = 0x%lx", vma->vm_pgoff);
+
+	if (map->dir == MAP_INBOUND || map->dir == MAP_DMA)
+		ret = dma_mmap_coherent(md->mport->dev.parent, vma,
+				map->virt_addr, map->phys_addr, map->size);
+	else if (map->dir == MAP_OUTBOUND) {
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+		ret = vm_iomap_memory(vma, map->phys_addr, map->size);
+	} else {
+		rmcd_error("Attempt to mmap unsupported mapping type");
+		ret = -EIO;
+	}
+
+	if (!ret) {
+		vma->vm_private_data = map;
+		vma->vm_ops = &vm_ops;
+		mport_mm_open(vma);
+	} else {
+		rmcd_error("MMAP exit with err=%d", ret);
+	}
+
+	return ret;
+}
+
+static unsigned int mport_cdev_poll(struct file *filp, poll_table *wait)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+
+	poll_wait(filp, &priv->event_rx_wait, wait);
+	if (kfifo_len(&priv->event_fifo))
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+static ssize_t mport_read(struct file *filp, char __user *buf, size_t count,
+			loff_t *ppos)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	int copied;
+	ssize_t ret;
+
+	if (!count)
+		return 0;
+
+	if (kfifo_is_empty(&priv->event_fifo) &&
+	    (filp->f_flags & O_NONBLOCK))
+		return -EAGAIN;
+
+	if (count % sizeof(struct rio_event))
+		return -EINVAL;
+
+	ret = wait_event_interruptible(priv->event_rx_wait,
+					kfifo_len(&priv->event_fifo) != 0);
+	if (ret)
+		return ret;
+
+	while (ret < count) {
+		if (kfifo_to_user(&priv->event_fifo, buf,
+		      sizeof(struct rio_event), &copied))
+			return -EFAULT;
+		ret += copied;
+		buf += copied;
+	}
+
+	return ret;
+}
+
+static ssize_t mport_write(struct file *filp, const char __user *buf,
+			 size_t count, loff_t *ppos)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct rio_mport *mport = priv->md->mport;
+	struct rio_event event;
+	int len, ret;
+
+	if (!count)
+		return 0;
+
+	if (count % sizeof(event))
+		return -EINVAL;
+
+	len = 0;
+	while ((count - len) >= (int)sizeof(event)) {
+		if (copy_from_user(&event, buf, sizeof(event)))
+			return -EFAULT;
+
+		if (event.header != RIO_DOORBELL)
+			return -EINVAL;
+
+		ret = rio_mport_send_doorbell(mport,
+					      (u16)event.u.doorbell.rioid,
+					      event.u.doorbell.payload);
+		if (ret < 0)
+			return ret;
+
+		len += sizeof(event);
+		buf += sizeof(event);
+	}
+
+	return len;
+}
+
+static const struct file_operations mport_fops = {
+	.owner		= THIS_MODULE,
+	.open		= mport_cdev_open,
+	.release	= mport_cdev_release,
+	.poll		= mport_cdev_poll,
+	.read		= mport_read,
+	.write		= mport_write,
+	.mmap		= mport_cdev_mmap,
+	.fasync		= mport_cdev_fasync,
+	.unlocked_ioctl = mport_cdev_ioctl
+};
+
+/*
+ * Character device management
+ */
+
+static void mport_device_release(struct device *dev)
+{
+	struct mport_dev *md;
+
+	rmcd_debug(EXIT, "%s", dev_name(dev));
+	md = container_of(dev, struct mport_dev, dev);
+	kfree(md);
+}
+
+/*
+ * mport_cdev_add() - Create mport_dev from rio_mport
+ * @mport:	RapidIO master port
+ */
+static struct mport_dev *mport_cdev_add(struct rio_mport *mport)
+{
+	int ret = 0;
+	struct mport_dev *md;
+	struct rio_mport_attr attr;
+
+	md = kzalloc(sizeof(struct mport_dev), GFP_KERNEL);
+	if (!md) {
+		rmcd_error("Unable allocate a device object");
+		return NULL;
+	}
+
+	md->mport = mport;
+	mutex_init(&md->buf_mutex);
+	mutex_init(&md->file_mutex);
+	INIT_LIST_HEAD(&md->file_list);
+	cdev_init(&md->cdev, &mport_fops);
+	md->cdev.owner = THIS_MODULE;
+	ret = cdev_add(&md->cdev, MKDEV(MAJOR(dev_number), mport->id), 1);
+	if (ret < 0) {
+		kfree(md);
+		rmcd_error("Unable to register a device, err=%d", ret);
+		return NULL;
+	}
+
+	md->dev.devt = md->cdev.dev;
+	md->dev.class = dev_class;
+	md->dev.parent = &mport->dev;
+	md->dev.release = mport_device_release;
+	dev_set_name(&md->dev, DEV_NAME "%d", mport->id);
+	atomic_set(&md->active, 1);
+
+	ret = device_register(&md->dev);
+	if (ret) {
+		rmcd_error("Failed to register mport %d (err=%d)",
+		       mport->id, ret);
+		goto err_cdev;
+	}
+
+	get_device(&md->dev);
+
+	INIT_LIST_HEAD(&md->doorbells);
+	spin_lock_init(&md->db_lock);
+	INIT_LIST_HEAD(&md->portwrites);
+	spin_lock_init(&md->pw_lock);
+	INIT_LIST_HEAD(&md->mappings);
+
+	md->properties.id = mport->id;
+	md->properties.sys_size = mport->sys_size;
+	md->properties.hdid = mport->host_deviceid;
+	md->properties.index = mport->index;
+
+	/* The transfer_mode property will be returned through mport query
+	 * interface
+	 */
+#ifdef CONFIG_PPC /* for now: only on Freescale's SoCs */
+	md->properties.transfer_mode |= RIO_TRANSFER_MODE_MAPPED;
+#else
+	md->properties.transfer_mode |= RIO_TRANSFER_MODE_TRANSFER;
+#endif
+	ret = rio_query_mport(mport, &attr);
+	if (!ret) {
+		md->properties.flags = attr.flags;
+		md->properties.link_speed = attr.link_speed;
+		md->properties.link_width = attr.link_width;
+		md->properties.dma_max_sge = attr.dma_max_sge;
+		md->properties.dma_max_size = attr.dma_max_size;
+		md->properties.dma_align = attr.dma_align;
+		md->properties.cap_sys_size = 0;
+		md->properties.cap_transfer_mode = 0;
+		md->properties.cap_addr_size = 0;
+	} else
+		pr_info(DRV_PREFIX "Failed to obtain info for %s cdev(%d:%d)\n",
+			mport->name, MAJOR(dev_number), mport->id);
+
+	mutex_lock(&mport_devs_lock);
+	list_add_tail(&md->node, &mport_devs);
+	mutex_unlock(&mport_devs_lock);
+
+	pr_info(DRV_PREFIX "Added %s cdev(%d:%d)\n",
+		mport->name, MAJOR(dev_number), mport->id);
+
+	return md;
+
+err_cdev:
+	cdev_del(&md->cdev);
+	kfree(md);
+	return NULL;
+}
+
+/*
+ * mport_cdev_terminate_dma() - Stop all active DMA data transfers and release
+ *                              associated DMA channels.
+ */
+static void mport_cdev_terminate_dma(struct mport_dev *md)
+{
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+	struct mport_cdev_priv *client;
+
+	rmcd_debug(DMA, "%s", dev_name(&md->dev));
+
+	mutex_lock(&md->file_mutex);
+	list_for_each_entry(client, &md->file_list, list) {
+		if (client->dmach) {
+			dmaengine_terminate_all(client->dmach);
+			rio_release_dma(client->dmach);
+		}
+	}
+	mutex_unlock(&md->file_mutex);
+
+	if (md->dma_chan) {
+		dmaengine_terminate_all(md->dma_chan);
+		rio_release_dma(md->dma_chan);
+		md->dma_chan = NULL;
+	}
+#endif
+}
+
+
+/*
+ * mport_cdev_kill_fasync() - Send SIGIO signal to all processes with open
+ *                            mport_cdev files.
+ */
+static int mport_cdev_kill_fasync(struct mport_dev *md)
+{
+	unsigned int files = 0;
+	struct mport_cdev_priv *client;
+
+	mutex_lock(&md->file_mutex);
+	list_for_each_entry(client, &md->file_list, list) {
+		if (client->async_queue)
+			kill_fasync(&client->async_queue, SIGIO, POLL_HUP);
+		files++;
+	}
+	mutex_unlock(&md->file_mutex);
+	return files;
+}
+
+/*
+ * mport_cdev_remove() - Remove mport character device
+ * @dev:	Mport device to remove
+ */
+static void mport_cdev_remove(struct mport_dev *md)
+{
+	struct rio_mport_mapping *map, *_map;
+
+	rmcd_debug(EXIT, "Remove %s cdev", md->mport->name);
+	atomic_set(&md->active, 0);
+	mport_cdev_terminate_dma(md);
+	rio_del_mport_pw_handler(md->mport, md, rio_mport_pw_handler);
+	cdev_del(&(md->cdev));
+	mport_cdev_kill_fasync(md);
+
+	flush_workqueue(dma_wq);
+
+	/* TODO: do we need to give clients some time to close file
+	 * descriptors? Simple wait for XX, or kref?
+	 */
+
+	/*
+	 * Release DMA buffers allocated for the mport device.
+	 * Disable associated inbound Rapidio requests mapping if applicable.
+	 */
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry_safe(map, _map, &md->mappings, node) {
+		kref_put(&map->ref, mport_release_mapping);
+	}
+	mutex_unlock(&md->buf_mutex);
+
+	if (!list_empty(&md->mappings))
+		rmcd_warn("WARNING: %s pending mappings on removal",
+			  md->mport->name);
+
+	rio_release_inb_dbell(md->mport, 0, 0x0fff);
+
+	device_unregister(&md->dev);
+	put_device(&md->dev);
+}
+
+/*
+ * RIO rio_mport_interface driver
+ */
+
+/*
+ * mport_add_mport() - Add rio_mport from LDM device struct
+ * @dev:		Linux device model struct
+ * @class_intf:	Linux class_interface
+ */
+static int mport_add_mport(struct device *dev,
+		struct class_interface *class_intf)
+{
+	struct rio_mport *mport = NULL;
+	struct mport_dev *chdev = NULL;
+
+	mport = to_rio_mport(dev);
+	if (!mport)
+		return -ENODEV;
+
+	chdev = mport_cdev_add(mport);
+	if (!chdev)
+		return -ENODEV;
+
+	return 0;
+}
+
+/*
+ * mport_remove_mport() - Remove rio_mport from global list
+ * TODO remove device from global mport_dev list
+ */
+static void mport_remove_mport(struct device *dev,
+		struct class_interface *class_intf)
+{
+	struct rio_mport *mport = NULL;
+	struct mport_dev *chdev;
+	int found = 0;
+
+	mport = to_rio_mport(dev);
+	rmcd_debug(EXIT, "Remove %s", mport->name);
+
+	mutex_lock(&mport_devs_lock);
+	list_for_each_entry(chdev, &mport_devs, node) {
+		if (chdev->mport->id == mport->id) {
+			atomic_set(&chdev->active, 0);
+			list_del(&chdev->node);
+			found = 1;
+			break;
+		}
+	}
+	mutex_unlock(&mport_devs_lock);
+
+	if (found)
+		mport_cdev_remove(chdev);
+}
+
+/* the rio_mport_interface is used to handle local mport devices */
+static struct class_interface rio_mport_interface __refdata = {
+	.class		= &rio_mport_class,
+	.add_dev	= mport_add_mport,
+	.remove_dev	= mport_remove_mport,
+};
+
+/*
+ * Linux kernel module
+ */
+
+/*
+ * mport_init - Driver module loading
+ */
+static int __init mport_init(void)
+{
+	int ret;
+
+	/* Create device class needed by udev */
+	dev_class = class_create(THIS_MODULE, DRV_NAME);
+	if (!dev_class) {
+		rmcd_error("Unable to create " DRV_NAME " class");
+		return -EINVAL;
+	}
+
+	ret = alloc_chrdev_region(&dev_number, 0, RIO_MAX_MPORTS, DRV_NAME);
+	if (ret < 0)
+		goto err_chr;
+
+	rmcd_debug(INIT, "Registered class with major=%d", MAJOR(dev_number));
+
+	/* Register to rio_mport_interface */
+	ret = class_interface_register(&rio_mport_interface);
+	if (ret) {
+		rmcd_error("class_interface_register() failed, err=%d", ret);
+		goto err_cli;
+	}
+
+	dma_wq = create_singlethread_workqueue("dma_wq");
+	if (!dma_wq) {
+		rmcd_error("failed to create DMA work queue");
+		ret = -ENOMEM;
+		goto err_wq;
+	}
+
+	return 0;
+
+err_wq:
+	class_interface_unregister(&rio_mport_interface);
+err_cli:
+	unregister_chrdev_region(dev_number, RIO_MAX_MPORTS);
+err_chr:
+	class_destroy(dev_class);
+	return ret;
+}
+
+/**
+ * mport_exit - Driver module unloading
+ */
+static void __exit mport_exit(void)
+{
+	class_interface_unregister(&rio_mport_interface);
+	class_destroy(dev_class);
+	unregister_chrdev_region(dev_number, RIO_MAX_MPORTS);
+	destroy_workqueue(dma_wq);
+}
+
+module_init(mport_init);
+module_exit(mport_exit);

diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c
index eeca70d..b5b4556 100644
--- a/drivers/rapidio/devices/tsi721.c
+++ b/drivers/rapidio/devices/tsi721.c

@@ -36,7 +36,11 @@
 
 #include "tsi721.h"
 
-#define DEBUG_PW	/* Inbound Port-Write debugging */
+#ifdef DEBUG
+u32 dbg_level = DBG_INIT | DBG_EXIT;
+module_param(dbg_level, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(dbg_level, "Debugging output level (default 0 = none)");
+#endif
 
 static void tsi721_omsg_handler(struct tsi721_device *priv, int ch);
 static void tsi721_imsg_handler(struct tsi721_device *priv, int ch);
@@ -143,9 +147,9 @@
 							& TSI721_DMAC_STS_RUN) {
 		udelay(1);
 		if (++i >= 5000000) {
-			dev_dbg(&priv->pdev->dev,
-				"%s : DMA[%d] read timeout ch_status=%x\n",
-				__func__, priv->mdma.ch_id, ch_stat);
+			tsi_debug(MAINT, &priv->pdev->dev,
+				"DMA[%d] read timeout ch_status=%x",
+				priv->mdma.ch_id, ch_stat);
 			if (!do_wr)
 				*data = 0xffffffff;
 			err = -EIO;
@@ -157,10 +161,12 @@
 		/* If DMA operation aborted due to error,
 		 * reinitialize DMA channel
 		 */
-		dev_dbg(&priv->pdev->dev, "%s : DMA ABORT ch_stat=%x\n",
-			__func__, ch_stat);
-		dev_dbg(&priv->pdev->dev, "OP=%d : destid=%x hc=%x off=%x\n",
-			do_wr ? MAINT_WR : MAINT_RD, destid, hopcount, offset);
+		tsi_debug(MAINT, &priv->pdev->dev, "DMA ABORT ch_stat=%x",
+			  ch_stat);
+		tsi_debug(MAINT, &priv->pdev->dev,
+			  "OP=%d : destid=%x hc=%x off=%x",
+			  do_wr ? MAINT_WR : MAINT_RD,
+			  destid, hopcount, offset);
 		iowrite32(TSI721_DMAC_INT_ALL, regs + TSI721_DMAC_INT);
 		iowrite32(TSI721_DMAC_CTL_INIT, regs + TSI721_DMAC_CTL);
 		udelay(10);
@@ -236,16 +242,15 @@
 
 /**
  * tsi721_pw_handler - Tsi721 inbound port-write interrupt handler
- * @mport: RapidIO master port structure
+ * @priv:  tsi721 device private structure
  *
  * Handles inbound port-write interrupts. Copies PW message from an internal
  * buffer into PW message FIFO and schedules deferred routine to process
  * queued messages.
  */
 static int
-tsi721_pw_handler(struct rio_mport *mport)
+tsi721_pw_handler(struct tsi721_device *priv)
 {
-	struct tsi721_device *priv = mport->priv;
 	u32 pw_stat;
 	u32 pw_buf[TSI721_RIO_PW_MSG_SIZE/sizeof(u32)];
 
@@ -283,30 +288,15 @@
 {
 	struct tsi721_device *priv = container_of(work, struct tsi721_device,
 						    pw_work);
-	u32 msg_buffer[RIO_PW_MSG_SIZE/sizeof(u32)]; /* Use full size PW message
-							buffer for RIO layer */
+	union rio_pw_msg pwmsg;
 
 	/*
 	 * Process port-write messages
 	 */
-	while (kfifo_out_spinlocked(&priv->pw_fifo, (unsigned char *)msg_buffer,
+	while (kfifo_out_spinlocked(&priv->pw_fifo, (unsigned char *)&pwmsg,
 			 TSI721_RIO_PW_MSG_SIZE, &priv->pw_fifo_lock)) {
-		/* Process one message */
-#ifdef DEBUG_PW
-		{
-		u32 i;
-		pr_debug("%s : Port-Write Message:", __func__);
-		for (i = 0; i < RIO_PW_MSG_SIZE/sizeof(u32); ) {
-			pr_debug("0x%02x: %08x %08x %08x %08x", i*4,
-				msg_buffer[i], msg_buffer[i + 1],
-				msg_buffer[i + 2], msg_buffer[i + 3]);
-			i += 4;
-		}
-		pr_debug("\n");
-		}
-#endif
 		/* Pass the port-write message to RIO core for processing */
-		rio_inb_pwrite_handler((union rio_pw_msg *)msg_buffer);
+		rio_inb_pwrite_handler(&priv->mport, &pwmsg);
 	}
 }
 
@@ -354,8 +344,8 @@
 	offset = (((mport->sys_size) ? RIO_TT_CODE_16 : RIO_TT_CODE_8) << 18) |
 		 (destid << 2);
 
-	dev_dbg(&priv->pdev->dev,
-		"Send Doorbell 0x%04x to destID 0x%x\n", data, destid);
+	tsi_debug(DBELL, &priv->pdev->dev,
+		  "Send Doorbell 0x%04x to destID 0x%x", data, destid);
 	iowrite16be(data, priv->odb_base + offset);
 
 	return 0;
@@ -363,16 +353,15 @@
 
 /**
  * tsi721_dbell_handler - Tsi721 doorbell interrupt handler
- * @mport: RapidIO master port structure
+ * @priv: tsi721 device-specific data structure
  *
  * Handles inbound doorbell interrupts. Copies doorbell entry from an internal
  * buffer into DB message FIFO and schedules deferred  routine to process
  * queued DBs.
  */
 static int
-tsi721_dbell_handler(struct rio_mport *mport)
+tsi721_dbell_handler(struct tsi721_device *priv)
 {
-	struct tsi721_device *priv = mport->priv;
 	u32 regval;
 
 	/* Disable IDB interrupts */
@@ -404,7 +393,7 @@
 	/*
 	 * Process queued inbound doorbells
 	 */
-	mport = priv->mport;
+	mport = &priv->mport;
 
 	wr_ptr = ioread32(priv->regs + TSI721_IDQ_WP(IDB_QUEUE)) % IDB_QSIZE;
 	rd_ptr = ioread32(priv->regs + TSI721_IDQ_RP(IDB_QUEUE)) % IDB_QSIZE;
@@ -430,10 +419,10 @@
 			dbell->dinb(mport, dbell->dev_id, DBELL_SID(idb.bytes),
 				    DBELL_TID(idb.bytes), DBELL_INF(idb.bytes));
 		} else {
-			dev_dbg(&priv->pdev->dev,
-				"spurious inb doorbell, sid %2.2x tid %2.2x"
-				" info %4.4x\n", DBELL_SID(idb.bytes),
-				DBELL_TID(idb.bytes), DBELL_INF(idb.bytes));
+			tsi_debug(DBELL, &priv->pdev->dev,
+				  "spurious IDB sid %2.2x tid %2.2x info %4.4x",
+				  DBELL_SID(idb.bytes), DBELL_TID(idb.bytes),
+				  DBELL_INF(idb.bytes));
 		}
 
 		wr_ptr = ioread32(priv->regs +
@@ -457,15 +446,14 @@
 /**
  * tsi721_irqhandler - Tsi721 interrupt handler
  * @irq: Linux interrupt number
- * @ptr: Pointer to interrupt-specific data (mport structure)
+ * @ptr: Pointer to interrupt-specific data (tsi721_device structure)
  *
  * Handles Tsi721 interrupts signaled using MSI and INTA. Checks reported
  * interrupt events and calls an event-specific handler(s).
  */
 static irqreturn_t tsi721_irqhandler(int irq, void *ptr)
 {
-	struct rio_mport *mport = (struct rio_mport *)ptr;
-	struct tsi721_device *priv = mport->priv;
+	struct tsi721_device *priv = (struct tsi721_device *)ptr;
 	u32 dev_int;
 	u32 dev_ch_int;
 	u32 intval;
@@ -488,10 +476,10 @@
 			intval = ioread32(priv->regs +
 						TSI721_SR_CHINT(IDB_QUEUE));
 			if (intval & TSI721_SR_CHINT_IDBQRCV)
-				tsi721_dbell_handler(mport);
+				tsi721_dbell_handler(priv);
 			else
-				dev_info(&priv->pdev->dev,
-					"Unsupported SR_CH_INT %x\n", intval);
+				tsi_info(&priv->pdev->dev,
+					"Unsupported SR_CH_INT %x", intval);
 
 			/* Clear interrupts */
 			iowrite32(intval,
@@ -545,7 +533,7 @@
 		/* Service SRIO MAC interrupts */
 		intval = ioread32(priv->regs + TSI721_RIO_EM_INT_STAT);
 		if (intval & TSI721_RIO_EM_INT_STAT_PW_RX)
-			tsi721_pw_handler(mport);
+			tsi721_pw_handler(priv);
 	}
 
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
@@ -553,8 +541,8 @@
 		int ch;
 
 		if (dev_ch_int & TSI721_INT_BDMA_CHAN_M) {
-			dev_dbg(&priv->pdev->dev,
-				"IRQ from DMA channel 0x%08x\n", dev_ch_int);
+			tsi_debug(DMA, &priv->pdev->dev,
+				  "IRQ from DMA channel 0x%08x", dev_ch_int);
 
 			for (ch = 0; ch < TSI721_DMA_MAXCH; ch++) {
 				if (!(dev_ch_int & TSI721_INT_BDMA_CHAN(ch)))
@@ -613,13 +601,13 @@
 /**
  * tsi721_omsg_msix - MSI-X interrupt handler for outbound messaging
  * @irq: Linux interrupt number
- * @ptr: Pointer to interrupt-specific data (mport structure)
+ * @ptr: Pointer to interrupt-specific data (tsi721_device structure)
  *
  * Handles outbound messaging interrupts signaled using MSI-X.
  */
 static irqreturn_t tsi721_omsg_msix(int irq, void *ptr)
 {
-	struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv;
+	struct tsi721_device *priv = (struct tsi721_device *)ptr;
 	int mbox;
 
 	mbox = (irq - priv->msix[TSI721_VECT_OMB0_DONE].vector) % RIO_MAX_MBOX;
@@ -630,13 +618,13 @@
 /**
  * tsi721_imsg_msix - MSI-X interrupt handler for inbound messaging
  * @irq: Linux interrupt number
- * @ptr: Pointer to interrupt-specific data (mport structure)
+ * @ptr: Pointer to interrupt-specific data (tsi721_device structure)
  *
  * Handles inbound messaging interrupts signaled using MSI-X.
  */
 static irqreturn_t tsi721_imsg_msix(int irq, void *ptr)
 {
-	struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv;
+	struct tsi721_device *priv = (struct tsi721_device *)ptr;
 	int mbox;
 
 	mbox = (irq - priv->msix[TSI721_VECT_IMB0_RCV].vector) % RIO_MAX_MBOX;
@@ -647,19 +635,19 @@
 /**
  * tsi721_srio_msix - Tsi721 MSI-X SRIO MAC interrupt handler
  * @irq: Linux interrupt number
- * @ptr: Pointer to interrupt-specific data (mport structure)
+ * @ptr: Pointer to interrupt-specific data (tsi721_device structure)
  *
  * Handles Tsi721 interrupts from SRIO MAC.
  */
 static irqreturn_t tsi721_srio_msix(int irq, void *ptr)
 {
-	struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv;
+	struct tsi721_device *priv = (struct tsi721_device *)ptr;
 	u32 srio_int;
 
 	/* Service SRIO MAC interrupts */
 	srio_int = ioread32(priv->regs + TSI721_RIO_EM_INT_STAT);
 	if (srio_int & TSI721_RIO_EM_INT_STAT_PW_RX)
-		tsi721_pw_handler((struct rio_mport *)ptr);
+		tsi721_pw_handler(priv);
 
 	return IRQ_HANDLED;
 }
@@ -667,7 +655,7 @@
 /**
  * tsi721_sr2pc_ch_msix - Tsi721 MSI-X SR2PC Channel interrupt handler
  * @irq: Linux interrupt number
- * @ptr: Pointer to interrupt-specific data (mport structure)
+ * @ptr: Pointer to interrupt-specific data (tsi721_device structure)
  *
  * Handles Tsi721 interrupts from SR2PC Channel.
  * NOTE: At this moment services only one SR2PC channel associated with inbound
@@ -675,13 +663,13 @@
  */
 static irqreturn_t tsi721_sr2pc_ch_msix(int irq, void *ptr)
 {
-	struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv;
+	struct tsi721_device *priv = (struct tsi721_device *)ptr;
 	u32 sr_ch_int;
 
 	/* Service Inbound DB interrupt from SR2PC channel */
 	sr_ch_int = ioread32(priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
 	if (sr_ch_int & TSI721_SR_CHINT_IDBQRCV)
-		tsi721_dbell_handler((struct rio_mport *)ptr);
+		tsi721_dbell_handler(priv);
 
 	/* Clear interrupts */
 	iowrite32(sr_ch_int, priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
@@ -693,32 +681,31 @@
 
 /**
  * tsi721_request_msix - register interrupt service for MSI-X mode.
- * @mport: RapidIO master port structure
+ * @priv: tsi721 device-specific data structure
  *
  * Registers MSI-X interrupt service routines for interrupts that are active
  * immediately after mport initialization. Messaging interrupt service routines
  * should be registered during corresponding open requests.
  */
-static int tsi721_request_msix(struct rio_mport *mport)
+static int tsi721_request_msix(struct tsi721_device *priv)
 {
-	struct tsi721_device *priv = mport->priv;
 	int err = 0;
 
 	err = request_irq(priv->msix[TSI721_VECT_IDB].vector,
 			tsi721_sr2pc_ch_msix, 0,
-			priv->msix[TSI721_VECT_IDB].irq_name, (void *)mport);
+			priv->msix[TSI721_VECT_IDB].irq_name, (void *)priv);
 	if (err)
-		goto out;
+		return err;
 
 	err = request_irq(priv->msix[TSI721_VECT_PWRX].vector,
 			tsi721_srio_msix, 0,
-			priv->msix[TSI721_VECT_PWRX].irq_name, (void *)mport);
-	if (err)
-		free_irq(
-			priv->msix[TSI721_VECT_IDB].vector,
-			(void *)mport);
-out:
-	return err;
+			priv->msix[TSI721_VECT_PWRX].irq_name, (void *)priv);
+	if (err) {
+		free_irq(priv->msix[TSI721_VECT_IDB].vector, (void *)priv);
+		return err;
+	}
+
+	return 0;
 }
 
 /**
@@ -770,8 +757,8 @@
 
 	err = pci_enable_msix_exact(priv->pdev, entries, ARRAY_SIZE(entries));
 	if (err) {
-		dev_err(&priv->pdev->dev,
-			"Failed to enable MSI-X (err=%d)\n", err);
+		tsi_err(&priv->pdev->dev,
+			"Failed to enable MSI-X (err=%d)", err);
 		return err;
 	}
 
@@ -831,27 +818,209 @@
 }
 #endif /* CONFIG_PCI_MSI */
 
-static int tsi721_request_irq(struct rio_mport *mport)
+static int tsi721_request_irq(struct tsi721_device *priv)
 {
-	struct tsi721_device *priv = mport->priv;
 	int err;
 
 #ifdef CONFIG_PCI_MSI
 	if (priv->flags & TSI721_USING_MSIX)
-		err = tsi721_request_msix(mport);
+		err = tsi721_request_msix(priv);
 	else
 #endif
 		err = request_irq(priv->pdev->irq, tsi721_irqhandler,
 			  (priv->flags & TSI721_USING_MSI) ? 0 : IRQF_SHARED,
-			  DRV_NAME, (void *)mport);
+			  DRV_NAME, (void *)priv);
 
 	if (err)
-		dev_err(&priv->pdev->dev,
-			"Unable to allocate interrupt, Error: %d\n", err);
+		tsi_err(&priv->pdev->dev,
+			"Unable to allocate interrupt, err=%d", err);
 
 	return err;
 }
 
+static void tsi721_free_irq(struct tsi721_device *priv)
+{
+#ifdef CONFIG_PCI_MSI
+	if (priv->flags & TSI721_USING_MSIX) {
+		free_irq(priv->msix[TSI721_VECT_IDB].vector, (void *)priv);
+		free_irq(priv->msix[TSI721_VECT_PWRX].vector, (void *)priv);
+	} else
+#endif
+	free_irq(priv->pdev->irq, (void *)priv);
+}
+
+static int
+tsi721_obw_alloc(struct tsi721_device *priv, struct tsi721_obw_bar *pbar,
+		 u32 size, int *win_id)
+{
+	u64 win_base;
+	u64 bar_base;
+	u64 bar_end;
+	u32 align;
+	struct tsi721_ob_win *win;
+	struct tsi721_ob_win *new_win = NULL;
+	int new_win_idx = -1;
+	int i = 0;
+
+	bar_base = pbar->base;
+	bar_end =  bar_base + pbar->size;
+	win_base = bar_base;
+	align = size/TSI721_PC2SR_ZONES;
+
+	while (i < TSI721_IBWIN_NUM) {
+		for (i = 0; i < TSI721_IBWIN_NUM; i++) {
+			if (!priv->ob_win[i].active) {
+				if (new_win == NULL) {
+					new_win = &priv->ob_win[i];
+					new_win_idx = i;
+				}
+				continue;
+			}
+
+			/*
+			 * If this window belongs to the current BAR check it
+			 * for overlap
+			 */
+			win = &priv->ob_win[i];
+
+			if (win->base >= bar_base && win->base < bar_end) {
+				if (win_base < (win->base + win->size) &&
+						(win_base + size) > win->base) {
+					/* Overlap detected */
+					win_base = win->base + win->size;
+					win_base = ALIGN(win_base, align);
+					break;
+				}
+			}
+		}
+	}
+
+	if (win_base + size > bar_end)
+		return -ENOMEM;
+
+	if (!new_win) {
+		tsi_err(&priv->pdev->dev, "OBW count tracking failed");
+		return -EIO;
+	}
+
+	new_win->active = true;
+	new_win->base = win_base;
+	new_win->size = size;
+	new_win->pbar = pbar;
+	priv->obwin_cnt--;
+	pbar->free -= size;
+	*win_id = new_win_idx;
+	return 0;
+}
+
+static int tsi721_map_outb_win(struct rio_mport *mport, u16 destid, u64 rstart,
+			u32 size, u32 flags, dma_addr_t *laddr)
+{
+	struct tsi721_device *priv = mport->priv;
+	int i;
+	struct tsi721_obw_bar *pbar;
+	struct tsi721_ob_win *ob_win;
+	int obw = -1;
+	u32 rval;
+	u64 rio_addr;
+	u32 zsize;
+	int ret = -ENOMEM;
+
+	tsi_debug(OBW, &priv->pdev->dev,
+		  "did=%d ra=0x%llx sz=0x%x", destid, rstart, size);
+
+	if (!is_power_of_2(size) || (size < 0x8000) || (rstart & (size - 1)))
+		return -EINVAL;
+
+	if (priv->obwin_cnt == 0)
+		return -EBUSY;
+
+	for (i = 0; i < 2; i++) {
+		if (priv->p2r_bar[i].free >= size) {
+			pbar = &priv->p2r_bar[i];
+			ret = tsi721_obw_alloc(priv, pbar, size, &obw);
+			if (!ret)
+				break;
+		}
+	}
+
+	if (ret)
+		return ret;
+
+	WARN_ON(obw == -1);
+	ob_win = &priv->ob_win[obw];
+	ob_win->destid = destid;
+	ob_win->rstart = rstart;
+	tsi_debug(OBW, &priv->pdev->dev,
+		  "allocated OBW%d @%llx", obw, ob_win->base);
+
+	/*
+	 * Configure Outbound Window
+	 */
+
+	zsize = size/TSI721_PC2SR_ZONES;
+	rio_addr = rstart;
+
+	/*
+	 * Program Address Translation Zones:
+	 *  This implementation uses all 8 zones associated wit window.
+	 */
+	for (i = 0; i < TSI721_PC2SR_ZONES; i++) {
+
+		while (ioread32(priv->regs + TSI721_ZONE_SEL) &
+			TSI721_ZONE_SEL_GO) {
+			udelay(1);
+		}
+
+		rval = (u32)(rio_addr & TSI721_LUT_DATA0_ADD) |
+			TSI721_LUT_DATA0_NREAD | TSI721_LUT_DATA0_NWR;
+		iowrite32(rval, priv->regs + TSI721_LUT_DATA0);
+		rval = (u32)(rio_addr >> 32);
+		iowrite32(rval, priv->regs + TSI721_LUT_DATA1);
+		rval = destid;
+		iowrite32(rval, priv->regs + TSI721_LUT_DATA2);
+
+		rval = TSI721_ZONE_SEL_GO | (obw << 3) | i;
+		iowrite32(rval, priv->regs + TSI721_ZONE_SEL);
+
+		rio_addr += zsize;
+	}
+
+	iowrite32(TSI721_OBWIN_SIZE(size) << 8,
+		  priv->regs + TSI721_OBWINSZ(obw));
+	iowrite32((u32)(ob_win->base >> 32), priv->regs + TSI721_OBWINUB(obw));
+	iowrite32((u32)(ob_win->base & TSI721_OBWINLB_BA) | TSI721_OBWINLB_WEN,
+		  priv->regs + TSI721_OBWINLB(obw));
+
+	*laddr = ob_win->base;
+	return 0;
+}
+
+static void tsi721_unmap_outb_win(struct rio_mport *mport,
+				  u16 destid, u64 rstart)
+{
+	struct tsi721_device *priv = mport->priv;
+	struct tsi721_ob_win *ob_win;
+	int i;
+
+	tsi_debug(OBW, &priv->pdev->dev, "did=%d ra=0x%llx", destid, rstart);
+
+	for (i = 0; i < TSI721_OBWIN_NUM; i++) {
+		ob_win = &priv->ob_win[i];
+
+		if (ob_win->active &&
+		    ob_win->destid == destid && ob_win->rstart == rstart) {
+			tsi_debug(OBW, &priv->pdev->dev,
+				  "free OBW%d @%llx", i, ob_win->base);
+			ob_win->active = false;
+			iowrite32(0, priv->regs + TSI721_OBWINLB(i));
+			ob_win->pbar->free += ob_win->size;
+			priv->obwin_cnt++;
+			break;
+		}
+	}
+}
+
 /**
  * tsi721_init_pc2sr_mapping - initializes outbound (PCIe->SRIO)
  * translation regions.
@@ -861,11 +1030,41 @@
  */
 static void tsi721_init_pc2sr_mapping(struct tsi721_device *priv)
 {
-	int i;
+	int i, z;
+	u32 rval;
 
 	/* Disable all PC2SR translation windows */
 	for (i = 0; i < TSI721_OBWIN_NUM; i++)
 		iowrite32(0, priv->regs + TSI721_OBWINLB(i));
+
+	/* Initialize zone lookup tables to avoid ECC errors on reads */
+	iowrite32(0, priv->regs + TSI721_LUT_DATA0);
+	iowrite32(0, priv->regs + TSI721_LUT_DATA1);
+	iowrite32(0, priv->regs + TSI721_LUT_DATA2);
+
+	for (i = 0; i < TSI721_OBWIN_NUM; i++) {
+		for (z = 0; z < TSI721_PC2SR_ZONES; z++) {
+			while (ioread32(priv->regs + TSI721_ZONE_SEL) &
+				TSI721_ZONE_SEL_GO) {
+				udelay(1);
+			}
+			rval = TSI721_ZONE_SEL_GO | (i << 3) | z;
+			iowrite32(rval, priv->regs + TSI721_ZONE_SEL);
+		}
+	}
+
+	if (priv->p2r_bar[0].size == 0 && priv->p2r_bar[1].size == 0) {
+		priv->obwin_cnt = 0;
+		return;
+	}
+
+	priv->p2r_bar[0].free = priv->p2r_bar[0].size;
+	priv->p2r_bar[1].free = priv->p2r_bar[1].size;
+
+	for (i = 0; i < TSI721_OBWIN_NUM; i++)
+		priv->ob_win[i].active = false;
+
+	priv->obwin_cnt = TSI721_OBWIN_NUM;
 }
 
 /**
@@ -885,45 +1084,148 @@
 		u64 rstart, u32 size, u32 flags)
 {
 	struct tsi721_device *priv = mport->priv;
-	int i;
+	int i, avail = -1;
 	u32 regval;
+	struct tsi721_ib_win *ib_win;
+	bool direct = (lstart == rstart);
+	u64 ibw_size;
+	dma_addr_t loc_start;
+	u64 ibw_start;
+	struct tsi721_ib_win_mapping *map = NULL;
+	int ret = -EBUSY;
 
-	if (!is_power_of_2(size) || size < 0x1000 ||
-	    ((u64)lstart & (size - 1)) || (rstart & (size - 1)))
-		return -EINVAL;
+	if (direct) {
+		/* Calculate minimal acceptable window size and base address */
 
-	/* Search for free inbound translation window */
+		ibw_size = roundup_pow_of_two(size);
+		ibw_start = lstart & ~(ibw_size - 1);
+
+		tsi_debug(IBW, &priv->pdev->dev,
+			"Direct (RIO_0x%llx -> PCIe_0x%pad), size=0x%x, ibw_start = 0x%llx",
+			rstart, &lstart, size, ibw_start);
+
+		while ((lstart + size) > (ibw_start + ibw_size)) {
+			ibw_size *= 2;
+			ibw_start = lstart & ~(ibw_size - 1);
+			if (ibw_size > 0x80000000) { /* Limit max size to 2GB */
+				return -EBUSY;
+			}
+		}
+
+		loc_start = ibw_start;
+
+		map = kzalloc(sizeof(struct tsi721_ib_win_mapping), GFP_ATOMIC);
+		if (map == NULL)
+			return -ENOMEM;
+
+	} else {
+		tsi_debug(IBW, &priv->pdev->dev,
+			"Translated (RIO_0x%llx -> PCIe_0x%pad), size=0x%x",
+			rstart, &lstart, size);
+
+		if (!is_power_of_2(size) || size < 0x1000 ||
+		    ((u64)lstart & (size - 1)) || (rstart & (size - 1)))
+			return -EINVAL;
+		if (priv->ibwin_cnt == 0)
+			return -EBUSY;
+		ibw_start = rstart;
+		ibw_size = size;
+		loc_start = lstart;
+	}
+
+	/*
+	 * Scan for overlapping with active regions and mark the first available
+	 * IB window at the same time.
+	 */
 	for (i = 0; i < TSI721_IBWIN_NUM; i++) {
-		regval = ioread32(priv->regs + TSI721_IBWIN_LB(i));
-		if (!(regval & TSI721_IBWIN_LB_WEN))
+		ib_win = &priv->ib_win[i];
+
+		if (!ib_win->active) {
+			if (avail == -1) {
+				avail = i;
+				ret = 0;
+			}
+		} else if (ibw_start < (ib_win->rstart + ib_win->size) &&
+			   (ibw_start + ibw_size) > ib_win->rstart) {
+			/* Return error if address translation involved */
+			if (direct && ib_win->xlat) {
+				ret = -EFAULT;
+				break;
+			}
+
+			/*
+			 * Direct mappings usually are larger than originally
+			 * requested fragments - check if this new request fits
+			 * into it.
+			 */
+			if (rstart >= ib_win->rstart &&
+			    (rstart + size) <= (ib_win->rstart +
+							ib_win->size)) {
+				/* We are in - no further mapping required */
+				map->lstart = lstart;
+				list_add_tail(&map->node, &ib_win->mappings);
+				return 0;
+			}
+
+			ret = -EFAULT;
 			break;
+		}
 	}
 
-	if (i >= TSI721_IBWIN_NUM) {
-		dev_err(&priv->pdev->dev,
-			"Unable to find free inbound window\n");
-		return -EBUSY;
+	if (ret)
+		goto out;
+	i = avail;
+
+	/* Sanity check: available IB window must be disabled at this point */
+	regval = ioread32(priv->regs + TSI721_IBWIN_LB(i));
+	if (WARN_ON(regval & TSI721_IBWIN_LB_WEN)) {
+		ret = -EIO;
+		goto out;
 	}
 
-	iowrite32(TSI721_IBWIN_SIZE(size) << 8,
+	ib_win = &priv->ib_win[i];
+	ib_win->active = true;
+	ib_win->rstart = ibw_start;
+	ib_win->lstart = loc_start;
+	ib_win->size = ibw_size;
+	ib_win->xlat = (lstart != rstart);
+	INIT_LIST_HEAD(&ib_win->mappings);
+
+	/*
+	 * When using direct IBW mapping and have larger than requested IBW size
+	 * we can have multiple local memory blocks mapped through the same IBW
+	 * To handle this situation we maintain list of "clients" for such IBWs.
+	 */
+	if (direct) {
+		map->lstart = lstart;
+		list_add_tail(&map->node, &ib_win->mappings);
+	}
+
+	iowrite32(TSI721_IBWIN_SIZE(ibw_size) << 8,
 			priv->regs + TSI721_IBWIN_SZ(i));
 
-	iowrite32(((u64)lstart >> 32), priv->regs + TSI721_IBWIN_TUA(i));
-	iowrite32(((u64)lstart & TSI721_IBWIN_TLA_ADD),
+	iowrite32(((u64)loc_start >> 32), priv->regs + TSI721_IBWIN_TUA(i));
+	iowrite32(((u64)loc_start & TSI721_IBWIN_TLA_ADD),
 		  priv->regs + TSI721_IBWIN_TLA(i));
 
-	iowrite32(rstart >> 32, priv->regs + TSI721_IBWIN_UB(i));
-	iowrite32((rstart & TSI721_IBWIN_LB_BA) | TSI721_IBWIN_LB_WEN,
+	iowrite32(ibw_start >> 32, priv->regs + TSI721_IBWIN_UB(i));
+	iowrite32((ibw_start & TSI721_IBWIN_LB_BA) | TSI721_IBWIN_LB_WEN,
 		priv->regs + TSI721_IBWIN_LB(i));
-	dev_dbg(&priv->pdev->dev,
-		"Configured IBWIN%d mapping (RIO_0x%llx -> PCIe_0x%llx)\n",
-		i, rstart, (unsigned long long)lstart);
+
+	priv->ibwin_cnt--;
+
+	tsi_debug(IBW, &priv->pdev->dev,
+		"Configured IBWIN%d (RIO_0x%llx -> PCIe_0x%pad), size=0x%llx",
+		i, ibw_start, &loc_start, ibw_size);
 
 	return 0;
+out:
+	kfree(map);
+	return ret;
 }
 
 /**
- * fsl_rio_unmap_inb_mem -- Unmapping inbound memory region.
+ * tsi721_rio_unmap_inb_mem -- Unmapping inbound memory region.
  * @mport: RapidIO master port
  * @lstart: Local memory space start address.
  */
@@ -931,25 +1233,56 @@
 				dma_addr_t lstart)
 {
 	struct tsi721_device *priv = mport->priv;
+	struct tsi721_ib_win *ib_win;
 	int i;
-	u64 addr;
-	u32 regval;
+
+	tsi_debug(IBW, &priv->pdev->dev,
+		"Unmap IBW mapped to PCIe_0x%pad", &lstart);
 
 	/* Search for matching active inbound translation window */
 	for (i = 0; i < TSI721_IBWIN_NUM; i++) {
-		regval = ioread32(priv->regs + TSI721_IBWIN_LB(i));
-		if (regval & TSI721_IBWIN_LB_WEN) {
-			regval = ioread32(priv->regs + TSI721_IBWIN_TUA(i));
-			addr = (u64)regval << 32;
-			regval = ioread32(priv->regs + TSI721_IBWIN_TLA(i));
-			addr |= regval & TSI721_IBWIN_TLA_ADD;
+		ib_win = &priv->ib_win[i];
 
-			if (addr == (u64)lstart) {
-				iowrite32(0, priv->regs + TSI721_IBWIN_LB(i));
-				break;
+		/* Address translating IBWs must to be an exact march */
+		if (!ib_win->active ||
+		    (ib_win->xlat && lstart != ib_win->lstart))
+			continue;
+
+		if (lstart >= ib_win->lstart &&
+		    lstart < (ib_win->lstart + ib_win->size)) {
+
+			if (!ib_win->xlat) {
+				struct tsi721_ib_win_mapping *map;
+				int found = 0;
+
+				list_for_each_entry(map,
+						    &ib_win->mappings, node) {
+					if (map->lstart == lstart) {
+						list_del(&map->node);
+						kfree(map);
+						found = 1;
+						break;
+					}
+				}
+
+				if (!found)
+					continue;
+
+				if (!list_empty(&ib_win->mappings))
+					break;
 			}
+
+			tsi_debug(IBW, &priv->pdev->dev, "Disable IBWIN_%d", i);
+			iowrite32(0, priv->regs + TSI721_IBWIN_LB(i));
+			ib_win->active = false;
+			priv->ibwin_cnt++;
+			break;
 		}
 	}
+
+	if (i == TSI721_IBWIN_NUM)
+		tsi_debug(IBW, &priv->pdev->dev,
+			"IB window mapped to %pad not found", &lstart);
 }
 
 /**
@@ -966,6 +1299,27 @@
 	/* Disable all SR2PC inbound windows */
 	for (i = 0; i < TSI721_IBWIN_NUM; i++)
 		iowrite32(0, priv->regs + TSI721_IBWIN_LB(i));
+	priv->ibwin_cnt = TSI721_IBWIN_NUM;
+}
+
+/*
+ * tsi721_close_sr2pc_mapping - closes all active inbound (SRIO->PCIe)
+ * translation regions.
+ * @priv: pointer to tsi721 device private data
+ */
+static void tsi721_close_sr2pc_mapping(struct tsi721_device *priv)
+{
+	struct tsi721_ib_win *ib_win;
+	int i;
+
+	/* Disable all active SR2PC inbound windows */
+	for (i = 0; i < TSI721_IBWIN_NUM; i++) {
+		ib_win = &priv->ib_win[i];
+		if (ib_win->active) {
+			iowrite32(0, priv->regs + TSI721_IBWIN_LB(i));
+			ib_win->active = false;
+		}
+	}
 }
 
 /**
@@ -982,7 +1336,7 @@
 	spin_lock_init(&priv->pw_fifo_lock);
 	if (kfifo_alloc(&priv->pw_fifo,
 			TSI721_RIO_PW_MSG_SIZE * 32, GFP_KERNEL)) {
-		dev_err(&priv->pdev->dev, "PW FIFO allocation failed\n");
+		tsi_err(&priv->pdev->dev, "PW FIFO allocation failed");
 		return -ENOMEM;
 	}
 
@@ -991,6 +1345,11 @@
 	return 0;
 }
 
+static void tsi721_port_write_free(struct tsi721_device *priv)
+{
+	kfifo_free(&priv->pw_fifo);
+}
+
 static int tsi721_doorbell_init(struct tsi721_device *priv)
 {
 	/* Outbound Doorbells do not require any setup.
@@ -1009,8 +1368,9 @@
 	if (!priv->idb_base)
 		return -ENOMEM;
 
-	dev_dbg(&priv->pdev->dev, "Allocated IDB buffer @ %p (phys = %llx)\n",
-		priv->idb_base, (unsigned long long)priv->idb_dma);
+	tsi_debug(DBELL, &priv->pdev->dev,
+		  "Allocated IDB buffer @ %p (phys = %pad)",
+		  priv->idb_base, &priv->idb_dma);
 
 	iowrite32(TSI721_IDQ_SIZE_VAL(IDB_QSIZE),
 		priv->regs + TSI721_IDQ_SIZE(IDB_QUEUE));
@@ -1056,9 +1416,8 @@
 	int		bd_num = 2;
 	void __iomem	*regs;
 
-	dev_dbg(&priv->pdev->dev,
-		"Init Block DMA Engine for Maintenance requests, CH%d\n",
-		TSI721_DMACH_MAINT);
+	tsi_debug(MAINT, &priv->pdev->dev,
+		  "Init BDMA_%d Maintenance requests", TSI721_DMACH_MAINT);
 
 	/*
 	 * Initialize DMA channel for maintenance requests
@@ -1078,8 +1437,8 @@
 	priv->mdma.bd_phys = bd_phys;
 	priv->mdma.bd_base = bd_ptr;
 
-	dev_dbg(&priv->pdev->dev, "DMA descriptors @ %p (phys = %llx)\n",
-		bd_ptr, (unsigned long long)bd_phys);
+	tsi_debug(MAINT, &priv->pdev->dev, "DMA descriptors @ %p (phys = %pad)",
+		  bd_ptr, &bd_phys);
 
 	/* Allocate space for descriptor status FIFO */
 	sts_size = (bd_num >= TSI721_DMA_MINSTSSZ) ?
@@ -1101,9 +1460,9 @@
 	priv->mdma.sts_base = sts_ptr;
 	priv->mdma.sts_size = sts_size;
 
-	dev_dbg(&priv->pdev->dev,
-		"desc status FIFO @ %p (phys = %llx) size=0x%x\n",
-		sts_ptr, (unsigned long long)sts_phys, sts_size);
+	tsi_debug(MAINT, &priv->pdev->dev,
+		"desc status FIFO @ %p (phys = %pad) size=0x%x",
+		sts_ptr, &sts_phys, sts_size);
 
 	/* Initialize DMA descriptors ring */
 	bd_ptr[bd_num - 1].type_id = cpu_to_le32(DTYPE3 << 29);
@@ -1304,11 +1663,14 @@
 	struct tsi721_device *priv = mport->priv;
 	struct tsi721_omsg_desc *desc;
 	u32 tx_slot;
+	unsigned long flags;
 
 	if (!priv->omsg_init[mbox] ||
 	    len > TSI721_MSG_MAX_SIZE || len < 8)
 		return -EINVAL;
 
+	spin_lock_irqsave(&priv->omsg_ring[mbox].lock, flags);
+
 	tx_slot = priv->omsg_ring[mbox].tx_slot;
 
 	/* Copy copy message into transfer buffer */
@@ -1320,9 +1682,11 @@
 	/* Build descriptor associated with buffer */
 	desc = priv->omsg_ring[mbox].omd_base;
 	desc[tx_slot].type_id = cpu_to_le32((DTYPE4 << 29) | rdev->destid);
+#ifdef TSI721_OMSG_DESC_INT
+	/* Request IOF_DONE interrupt generation for each N-th frame in queue */
 	if (tx_slot % 4 == 0)
 		desc[tx_slot].type_id |= cpu_to_le32(TSI721_OMD_IOF);
-
+#endif
 	desc[tx_slot].msg_info =
 		cpu_to_le32((mport->sys_size << 26) | (mbox << 22) |
 			    (0xe << 12) | (len & 0xff8));
@@ -1348,6 +1712,8 @@
 		priv->regs + TSI721_OBDMAC_DWRCNT(mbox));
 	ioread32(priv->regs + TSI721_OBDMAC_DWRCNT(mbox));
 
+	spin_unlock_irqrestore(&priv->omsg_ring[mbox].lock, flags);
+
 	return 0;
 }
 
@@ -1361,20 +1727,23 @@
 static void tsi721_omsg_handler(struct tsi721_device *priv, int ch)
 {
 	u32 omsg_int;
+	struct rio_mport *mport = &priv->mport;
+	void *dev_id = NULL;
+	u32 tx_slot = 0xffffffff;
+	int do_callback = 0;
 
 	spin_lock(&priv->omsg_ring[ch].lock);
 
 	omsg_int = ioread32(priv->regs + TSI721_OBDMAC_INT(ch));
 
 	if (omsg_int & TSI721_OBDMAC_INT_ST_FULL)
-		dev_info(&priv->pdev->dev,
-			"OB MBOX%d: Status FIFO is full\n", ch);
+		tsi_info(&priv->pdev->dev,
+			"OB MBOX%d: Status FIFO is full", ch);
 
 	if (omsg_int & (TSI721_OBDMAC_INT_DONE | TSI721_OBDMAC_INT_IOF_DONE)) {
 		u32 srd_ptr;
 		u64 *sts_ptr, last_ptr = 0, prev_ptr = 0;
 		int i, j;
-		u32 tx_slot;
 
 		/*
 		 * Find last successfully processed descriptor
@@ -1402,7 +1771,7 @@
 		priv->omsg_ring[ch].sts_rdptr = srd_ptr;
 		iowrite32(srd_ptr, priv->regs + TSI721_OBDMAC_DSRP(ch));
 
-		if (!priv->mport->outb_msg[ch].mcback)
+		if (!mport->outb_msg[ch].mcback)
 			goto no_sts_update;
 
 		/* Inform upper layer about transfer completion */
@@ -1424,14 +1793,19 @@
 				goto no_sts_update;
 		}
 
+		if (tx_slot >= priv->omsg_ring[ch].size)
+			tsi_debug(OMSG, &priv->pdev->dev,
+				  "OB_MSG tx_slot=%x > size=%x",
+				  tx_slot, priv->omsg_ring[ch].size);
+		WARN_ON(tx_slot >= priv->omsg_ring[ch].size);
+
 		/* Move slot index to the next message to be sent */
 		++tx_slot;
 		if (tx_slot == priv->omsg_ring[ch].size)
 			tx_slot = 0;
-		BUG_ON(tx_slot >= priv->omsg_ring[ch].size);
-		priv->mport->outb_msg[ch].mcback(priv->mport,
-				priv->omsg_ring[ch].dev_id, ch,
-				tx_slot);
+
+		dev_id = priv->omsg_ring[ch].dev_id;
+		do_callback = 1;
 	}
 
 no_sts_update:
@@ -1442,20 +1816,20 @@
 		* reinitialize OB MSG channel
 		*/
 
-		dev_dbg(&priv->pdev->dev, "OB MSG ABORT ch_stat=%x\n",
-			ioread32(priv->regs + TSI721_OBDMAC_STS(ch)));
+		tsi_debug(OMSG, &priv->pdev->dev, "OB MSG ABORT ch_stat=%x",
+			  ioread32(priv->regs + TSI721_OBDMAC_STS(ch)));
 
 		iowrite32(TSI721_OBDMAC_INT_ERROR,
 				priv->regs + TSI721_OBDMAC_INT(ch));
-		iowrite32(TSI721_OBDMAC_CTL_INIT,
+		iowrite32(TSI721_OBDMAC_CTL_RETRY_THR | TSI721_OBDMAC_CTL_INIT,
 				priv->regs + TSI721_OBDMAC_CTL(ch));
 		ioread32(priv->regs + TSI721_OBDMAC_CTL(ch));
 
 		/* Inform upper level to clear all pending tx slots */
-		if (priv->mport->outb_msg[ch].mcback)
-			priv->mport->outb_msg[ch].mcback(priv->mport,
-					priv->omsg_ring[ch].dev_id, ch,
-					priv->omsg_ring[ch].tx_slot);
+		dev_id = priv->omsg_ring[ch].dev_id;
+		tx_slot = priv->omsg_ring[ch].tx_slot;
+		do_callback = 1;
+
 		/* Synch tx_slot tracking */
 		iowrite32(priv->omsg_ring[ch].tx_slot,
 			priv->regs + TSI721_OBDMAC_DRDCNT(ch));
@@ -1477,6 +1851,9 @@
 	}
 
 	spin_unlock(&priv->omsg_ring[ch].lock);
+
+	if (mport->outb_msg[ch].mcback && do_callback)
+		mport->outb_msg[ch].mcback(mport, dev_id, ch, tx_slot);
 }
 
 /**
@@ -1514,9 +1891,8 @@
 				&priv->omsg_ring[mbox].omq_phys[i],
 				GFP_KERNEL);
 		if (priv->omsg_ring[mbox].omq_base[i] == NULL) {
-			dev_dbg(&priv->pdev->dev,
-				"Unable to allocate OB MSG data buffer for"
-				" MBOX%d\n", mbox);
+			tsi_debug(OMSG, &priv->pdev->dev,
+				  "ENOMEM for OB_MSG_%d data buffer", mbox);
 			rc = -ENOMEM;
 			goto out_buf;
 		}
@@ -1528,9 +1904,8 @@
 				(entries + 1) * sizeof(struct tsi721_omsg_desc),
 				&priv->omsg_ring[mbox].omd_phys, GFP_KERNEL);
 	if (priv->omsg_ring[mbox].omd_base == NULL) {
-		dev_dbg(&priv->pdev->dev,
-			"Unable to allocate OB MSG descriptor memory "
-			"for MBOX%d\n", mbox);
+		tsi_debug(OMSG, &priv->pdev->dev,
+			"ENOMEM for OB_MSG_%d descriptor memory", mbox);
 		rc = -ENOMEM;
 		goto out_buf;
 	}
@@ -1544,9 +1919,8 @@
 						sizeof(struct tsi721_dma_sts),
 			&priv->omsg_ring[mbox].sts_phys, GFP_KERNEL);
 	if (priv->omsg_ring[mbox].sts_base == NULL) {
-		dev_dbg(&priv->pdev->dev,
-			"Unable to allocate OB MSG descriptor status FIFO "
-			"for MBOX%d\n", mbox);
+		tsi_debug(OMSG, &priv->pdev->dev,
+			"ENOMEM for OB_MSG_%d status FIFO", mbox);
 		rc = -ENOMEM;
 		goto out_desc;
 	}
@@ -1575,32 +1949,28 @@
 
 #ifdef CONFIG_PCI_MSI
 	if (priv->flags & TSI721_USING_MSIX) {
+		int idx = TSI721_VECT_OMB0_DONE + mbox;
+
 		/* Request interrupt service if we are in MSI-X mode */
-		rc = request_irq(
-			priv->msix[TSI721_VECT_OMB0_DONE + mbox].vector,
-			tsi721_omsg_msix, 0,
-			priv->msix[TSI721_VECT_OMB0_DONE + mbox].irq_name,
-			(void *)mport);
+		rc = request_irq(priv->msix[idx].vector, tsi721_omsg_msix, 0,
+				 priv->msix[idx].irq_name, (void *)priv);
 
 		if (rc) {
-			dev_dbg(&priv->pdev->dev,
-				"Unable to allocate MSI-X interrupt for "
-				"OBOX%d-DONE\n", mbox);
+			tsi_debug(OMSG, &priv->pdev->dev,
+				"Unable to get MSI-X IRQ for OBOX%d-DONE",
+				mbox);
 			goto out_stat;
 		}
 
-		rc = request_irq(priv->msix[TSI721_VECT_OMB0_INT + mbox].vector,
-			tsi721_omsg_msix, 0,
-			priv->msix[TSI721_VECT_OMB0_INT + mbox].irq_name,
-			(void *)mport);
+		idx = TSI721_VECT_OMB0_INT + mbox;
+		rc = request_irq(priv->msix[idx].vector, tsi721_omsg_msix, 0,
+				 priv->msix[idx].irq_name, (void *)priv);
 
 		if (rc)	{
-			dev_dbg(&priv->pdev->dev,
-				"Unable to allocate MSI-X interrupt for "
-				"MBOX%d-INT\n", mbox);
-			free_irq(
-				priv->msix[TSI721_VECT_OMB0_DONE + mbox].vector,
-				(void *)mport);
+			tsi_debug(OMSG, &priv->pdev->dev,
+				"Unable to get MSI-X IRQ for MBOX%d-INT", mbox);
+			idx = TSI721_VECT_OMB0_DONE + mbox;
+			free_irq(priv->msix[idx].vector, (void *)priv);
 			goto out_stat;
 		}
 	}
@@ -1621,7 +1991,8 @@
 	mb();
 
 	/* Initialize Outbound Message engine */
-	iowrite32(TSI721_OBDMAC_CTL_INIT, priv->regs + TSI721_OBDMAC_CTL(mbox));
+	iowrite32(TSI721_OBDMAC_CTL_RETRY_THR | TSI721_OBDMAC_CTL_INIT,
+		  priv->regs + TSI721_OBDMAC_CTL(mbox));
 	ioread32(priv->regs + TSI721_OBDMAC_DWRCNT(mbox));
 	udelay(10);
 
@@ -1684,9 +2055,9 @@
 #ifdef CONFIG_PCI_MSI
 	if (priv->flags & TSI721_USING_MSIX) {
 		free_irq(priv->msix[TSI721_VECT_OMB0_DONE + mbox].vector,
-			 (void *)mport);
+			 (void *)priv);
 		free_irq(priv->msix[TSI721_VECT_OMB0_INT + mbox].vector,
-			 (void *)mport);
+			 (void *)priv);
 	}
 #endif /* CONFIG_PCI_MSI */
 
@@ -1731,30 +2102,28 @@
 {
 	u32 mbox = ch - 4;
 	u32 imsg_int;
+	struct rio_mport *mport = &priv->mport;
 
 	spin_lock(&priv->imsg_ring[mbox].lock);
 
 	imsg_int = ioread32(priv->regs + TSI721_IBDMAC_INT(ch));
 
 	if (imsg_int & TSI721_IBDMAC_INT_SRTO)
-		dev_info(&priv->pdev->dev, "IB MBOX%d SRIO timeout\n",
-			mbox);
+		tsi_info(&priv->pdev->dev, "IB MBOX%d SRIO timeout", mbox);
 
 	if (imsg_int & TSI721_IBDMAC_INT_PC_ERROR)
-		dev_info(&priv->pdev->dev, "IB MBOX%d PCIe error\n",
-			mbox);
+		tsi_info(&priv->pdev->dev, "IB MBOX%d PCIe error", mbox);
 
 	if (imsg_int & TSI721_IBDMAC_INT_FQ_LOW)
-		dev_info(&priv->pdev->dev,
-			"IB MBOX%d IB free queue low\n", mbox);
+		tsi_info(&priv->pdev->dev, "IB MBOX%d IB free queue low", mbox);
 
 	/* Clear IB channel interrupts */
 	iowrite32(imsg_int, priv->regs + TSI721_IBDMAC_INT(ch));
 
 	/* If an IB Msg is received notify the upper layer */
 	if (imsg_int & TSI721_IBDMAC_INT_DQ_RCV &&
-		priv->mport->inb_msg[mbox].mcback)
-		priv->mport->inb_msg[mbox].mcback(priv->mport,
+		mport->inb_msg[mbox].mcback)
+		mport->inb_msg[mbox].mcback(mport,
 				priv->imsg_ring[mbox].dev_id, mbox, -1);
 
 	if (!(priv->flags & TSI721_USING_MSIX)) {
@@ -1810,8 +2179,8 @@
 				   GFP_KERNEL);
 
 	if (priv->imsg_ring[mbox].buf_base == NULL) {
-		dev_err(&priv->pdev->dev,
-			"Failed to allocate buffers for IB MBOX%d\n", mbox);
+		tsi_err(&priv->pdev->dev,
+			"Failed to allocate buffers for IB MBOX%d", mbox);
 		rc = -ENOMEM;
 		goto out;
 	}
@@ -1824,8 +2193,8 @@
 				   GFP_KERNEL);
 
 	if (priv->imsg_ring[mbox].imfq_base == NULL) {
-		dev_err(&priv->pdev->dev,
-			"Failed to allocate free queue for IB MBOX%d\n", mbox);
+		tsi_err(&priv->pdev->dev,
+			"Failed to allocate free queue for IB MBOX%d", mbox);
 		rc = -ENOMEM;
 		goto out_buf;
 	}
@@ -1837,8 +2206,8 @@
 				   &priv->imsg_ring[mbox].imd_phys, GFP_KERNEL);
 
 	if (priv->imsg_ring[mbox].imd_base == NULL) {
-		dev_err(&priv->pdev->dev,
-			"Failed to allocate descriptor memory for IB MBOX%d\n",
+		tsi_err(&priv->pdev->dev,
+			"Failed to allocate descriptor memory for IB MBOX%d",
 			mbox);
 		rc = -ENOMEM;
 		goto out_dma;
@@ -1859,7 +2228,7 @@
 	 * once when first inbound mailbox is requested.
 	 */
 	if (!(priv->flags & TSI721_IMSGID_SET)) {
-		iowrite32((u32)priv->mport->host_deviceid,
+		iowrite32((u32)priv->mport.host_deviceid,
 			priv->regs + TSI721_IB_DEVID);
 		priv->flags |= TSI721_IMSGID_SET;
 	}
@@ -1890,31 +2259,29 @@
 
 #ifdef CONFIG_PCI_MSI
 	if (priv->flags & TSI721_USING_MSIX) {
+		int idx = TSI721_VECT_IMB0_RCV + mbox;
+
 		/* Request interrupt service if we are in MSI-X mode */
-		rc = request_irq(priv->msix[TSI721_VECT_IMB0_RCV + mbox].vector,
-			tsi721_imsg_msix, 0,
-			priv->msix[TSI721_VECT_IMB0_RCV + mbox].irq_name,
-			(void *)mport);
+		rc = request_irq(priv->msix[idx].vector, tsi721_imsg_msix, 0,
+				 priv->msix[idx].irq_name, (void *)priv);
 
 		if (rc) {
-			dev_dbg(&priv->pdev->dev,
-				"Unable to allocate MSI-X interrupt for "
-				"IBOX%d-DONE\n", mbox);
+			tsi_debug(IMSG, &priv->pdev->dev,
+				"Unable to get MSI-X IRQ for IBOX%d-DONE",
+				mbox);
 			goto out_desc;
 		}
 
-		rc = request_irq(priv->msix[TSI721_VECT_IMB0_INT + mbox].vector,
-			tsi721_imsg_msix, 0,
-			priv->msix[TSI721_VECT_IMB0_INT + mbox].irq_name,
-			(void *)mport);
+		idx = TSI721_VECT_IMB0_INT + mbox;
+		rc = request_irq(priv->msix[idx].vector, tsi721_imsg_msix, 0,
+				 priv->msix[idx].irq_name, (void *)priv);
 
 		if (rc)	{
-			dev_dbg(&priv->pdev->dev,
-				"Unable to allocate MSI-X interrupt for "
-				"IBOX%d-INT\n", mbox);
+			tsi_debug(IMSG, &priv->pdev->dev,
+				"Unable to get MSI-X IRQ for IBOX%d-INT", mbox);
 			free_irq(
 				priv->msix[TSI721_VECT_IMB0_RCV + mbox].vector,
-				(void *)mport);
+				(void *)priv);
 			goto out_desc;
 		}
 	}
@@ -1985,9 +2352,9 @@
 #ifdef CONFIG_PCI_MSI
 	if (priv->flags & TSI721_USING_MSIX) {
 		free_irq(priv->msix[TSI721_VECT_IMB0_RCV + mbox].vector,
-				(void *)mport);
+				(void *)priv);
 		free_irq(priv->msix[TSI721_VECT_IMB0_INT + mbox].vector,
-				(void *)mport);
+				(void *)priv);
 	}
 #endif /* CONFIG_PCI_MSI */
 
@@ -2034,8 +2401,8 @@
 
 	rx_slot = priv->imsg_ring[mbox].rx_slot;
 	if (priv->imsg_ring[mbox].imq_base[rx_slot]) {
-		dev_err(&priv->pdev->dev,
-			"Error adding inbound buffer %d, buffer exists\n",
+		tsi_err(&priv->pdev->dev,
+			"Error adding inbound buffer %d, buffer exists",
 			rx_slot);
 		rc = -EINVAL;
 		goto out;
@@ -2153,6 +2520,39 @@
 }
 
 /**
+ * tsi721_query_mport - Fetch inbound message from the Tsi721 MSG Queue
+ * @mport: Master port implementing the Inbound Messaging Engine
+ * @mbox: Inbound mailbox number
+ *
+ * Returns pointer to the message on success or NULL on failure.
+ */
+static int tsi721_query_mport(struct rio_mport *mport,
+			      struct rio_mport_attr *attr)
+{
+	struct tsi721_device *priv = mport->priv;
+	u32 rval;
+
+	rval = ioread32(priv->regs + (0x100 + RIO_PORT_N_ERR_STS_CSR(0)));
+	if (rval & RIO_PORT_N_ERR_STS_PORT_OK) {
+		rval = ioread32(priv->regs + (0x100 + RIO_PORT_N_CTL2_CSR(0)));
+		attr->link_speed = (rval & RIO_PORT_N_CTL2_SEL_BAUD) >> 28;
+		rval = ioread32(priv->regs + (0x100 + RIO_PORT_N_CTL_CSR(0)));
+		attr->link_width = (rval & RIO_PORT_N_CTL_IPW) >> 27;
+	} else
+		attr->link_speed = RIO_LINK_DOWN;
+
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+	attr->flags = RIO_MPORT_DMA | RIO_MPORT_DMA_SG;
+	attr->dma_max_sge = 0;
+	attr->dma_max_size = TSI721_BDMA_MAX_BCOUNT;
+	attr->dma_align = 0;
+#else
+	attr->flags = 0;
+#endif
+	return 0;
+}
+
+/**
  * tsi721_disable_ints - disables all device interrupts
  * @priv: pointer to tsi721 private data
  */
@@ -2203,6 +2603,34 @@
 	iowrite32(0, priv->regs + TSI721_RIO_EM_DEV_INT_EN);
 }
 
+static struct rio_ops tsi721_rio_ops = {
+	.lcread			= tsi721_lcread,
+	.lcwrite		= tsi721_lcwrite,
+	.cread			= tsi721_cread_dma,
+	.cwrite			= tsi721_cwrite_dma,
+	.dsend			= tsi721_dsend,
+	.open_inb_mbox		= tsi721_open_inb_mbox,
+	.close_inb_mbox		= tsi721_close_inb_mbox,
+	.open_outb_mbox		= tsi721_open_outb_mbox,
+	.close_outb_mbox	= tsi721_close_outb_mbox,
+	.add_outb_message	= tsi721_add_outb_message,
+	.add_inb_buffer		= tsi721_add_inb_buffer,
+	.get_inb_message	= tsi721_get_inb_message,
+	.map_inb		= tsi721_rio_map_inb_mem,
+	.unmap_inb		= tsi721_rio_unmap_inb_mem,
+	.pwenable		= tsi721_pw_enable,
+	.query_mport		= tsi721_query_mport,
+	.map_outb		= tsi721_map_outb_win,
+	.unmap_outb		= tsi721_unmap_outb_win,
+};
+
+static void tsi721_mport_release(struct device *dev)
+{
+	struct rio_mport *mport = to_rio_mport(dev);
+
+	tsi_debug(EXIT, dev, "%s id=%d", mport->name, mport->id);
+}
+
 /**
  * tsi721_setup_mport - Setup Tsi721 as RapidIO subsystem master port
  * @priv: pointer to tsi721 private data
@@ -2213,46 +2641,20 @@
 {
 	struct pci_dev *pdev = priv->pdev;
 	int err = 0;
-	struct rio_ops *ops;
+	struct rio_mport *mport = &priv->mport;
 
-	struct rio_mport *mport;
+	err = rio_mport_initialize(mport);
+	if (err)
+		return err;
 
-	ops = kzalloc(sizeof(struct rio_ops), GFP_KERNEL);
-	if (!ops) {
-		dev_dbg(&pdev->dev, "Unable to allocate memory for rio_ops\n");
-		return -ENOMEM;
-	}
-
-	ops->lcread = tsi721_lcread;
-	ops->lcwrite = tsi721_lcwrite;
-	ops->cread = tsi721_cread_dma;
-	ops->cwrite = tsi721_cwrite_dma;
-	ops->dsend = tsi721_dsend;
-	ops->open_inb_mbox = tsi721_open_inb_mbox;
-	ops->close_inb_mbox = tsi721_close_inb_mbox;
-	ops->open_outb_mbox = tsi721_open_outb_mbox;
-	ops->close_outb_mbox = tsi721_close_outb_mbox;
-	ops->add_outb_message = tsi721_add_outb_message;
-	ops->add_inb_buffer = tsi721_add_inb_buffer;
-	ops->get_inb_message = tsi721_get_inb_message;
-	ops->map_inb = tsi721_rio_map_inb_mem;
-	ops->unmap_inb = tsi721_rio_unmap_inb_mem;
-
-	mport = kzalloc(sizeof(struct rio_mport), GFP_KERNEL);
-	if (!mport) {
-		kfree(ops);
-		dev_dbg(&pdev->dev, "Unable to allocate memory for mport\n");
-		return -ENOMEM;
-	}
-
-	mport->ops = ops;
+	mport->ops = &tsi721_rio_ops;
 	mport->index = 0;
 	mport->sys_size = 0; /* small system */
 	mport->phy_type = RIO_PHY_SERIAL;
 	mport->priv = (void *)priv;
 	mport->phys_efptr = 0x100;
 	mport->dev.parent = &pdev->dev;
-	priv->mport = mport;
+	mport->dev.release = tsi721_mport_release;
 
 	INIT_LIST_HEAD(&mport->dbells);
 
@@ -2270,31 +2672,28 @@
 	else if (!pci_enable_msi(pdev))
 		priv->flags |= TSI721_USING_MSI;
 	else
-		dev_info(&pdev->dev,
-			 "MSI/MSI-X is not available. Using legacy INTx.\n");
+		tsi_debug(MPORT, &pdev->dev,
+			 "MSI/MSI-X is not available. Using legacy INTx.");
 #endif /* CONFIG_PCI_MSI */
 
-	err = tsi721_request_irq(mport);
+	err = tsi721_request_irq(priv);
 
-	if (!err) {
-		tsi721_interrupts_init(priv);
-		ops->pwenable = tsi721_pw_enable;
-	} else {
-		dev_err(&pdev->dev, "Unable to get assigned PCI IRQ "
-			"vector %02X err=0x%x\n", pdev->irq, err);
-		goto err_exit;
+	if (err) {
+		tsi_err(&pdev->dev, "Unable to get PCI IRQ %02X (err=0x%x)",
+			pdev->irq, err);
+		return err;
 	}
 
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
-	tsi721_register_dma(priv);
+	err = tsi721_register_dma(priv);
+	if (err)
+		goto err_exit;
 #endif
 	/* Enable SRIO link */
 	iowrite32(ioread32(priv->regs + TSI721_DEVCTL) |
 		  TSI721_DEVCTL_SRBOOT_CMPL,
 		  priv->regs + TSI721_DEVCTL);
 
-	rio_register_mport(mport);
-
 	if (mport->host_deviceid >= 0)
 		iowrite32(RIO_PORT_GEN_HOST | RIO_PORT_GEN_MASTER |
 			  RIO_PORT_GEN_DISCOVERED,
@@ -2302,11 +2701,16 @@
 	else
 		iowrite32(0, priv->regs + (0x100 + RIO_PORT_GEN_CTL_CSR));
 
+	err = rio_register_mport(mport);
+	if (err) {
+		tsi721_unregister_dma(priv);
+		goto err_exit;
+	}
+
 	return 0;
 
 err_exit:
-	kfree(mport);
-	kfree(ops);
+	tsi721_free_irq(priv);
 	return err;
 }
 
@@ -2317,15 +2721,14 @@
 	int err;
 
 	priv = kzalloc(sizeof(struct tsi721_device), GFP_KERNEL);
-	if (priv == NULL) {
-		dev_err(&pdev->dev, "Failed to allocate memory for device\n");
+	if (!priv) {
 		err = -ENOMEM;
 		goto err_exit;
 	}
 
 	err = pci_enable_device(pdev);
 	if (err) {
-		dev_err(&pdev->dev, "Failed to enable PCI device\n");
+		tsi_err(&pdev->dev, "Failed to enable PCI device");
 		goto err_clean;
 	}
 
@@ -2333,13 +2736,12 @@
 
 #ifdef DEBUG
 	{
-	int i;
-	for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
-		dev_dbg(&pdev->dev, "res[%d] @ 0x%llx (0x%lx, 0x%lx)\n",
-			i, (unsigned long long)pci_resource_start(pdev, i),
-			(unsigned long)pci_resource_len(pdev, i),
-			pci_resource_flags(pdev, i));
-	}
+		int i;
+
+		for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
+			tsi_debug(INIT, &pdev->dev, "res%d %pR",
+				  i, &pdev->resource[i]);
+		}
 	}
 #endif
 	/*
@@ -2350,8 +2752,7 @@
 	if (!(pci_resource_flags(pdev, BAR_0) & IORESOURCE_MEM) ||
 	    pci_resource_flags(pdev, BAR_0) & IORESOURCE_MEM_64 ||
 	    pci_resource_len(pdev, BAR_0) < TSI721_REG_SPACE_SIZE) {
-		dev_err(&pdev->dev,
-			"Missing or misconfigured CSR BAR0, aborting.\n");
+		tsi_err(&pdev->dev, "Missing or misconfigured CSR BAR0");
 		err = -ENODEV;
 		goto err_disable_pdev;
 	}
@@ -2360,8 +2761,7 @@
 	if (!(pci_resource_flags(pdev, BAR_1) & IORESOURCE_MEM) ||
 	    pci_resource_flags(pdev, BAR_1) & IORESOURCE_MEM_64 ||
 	    pci_resource_len(pdev, BAR_1) < TSI721_DB_WIN_SIZE) {
-		dev_err(&pdev->dev,
-			"Missing or misconfigured Doorbell BAR1, aborting.\n");
+		tsi_err(&pdev->dev, "Missing or misconfigured Doorbell BAR1");
 		err = -ENODEV;
 		goto err_disable_pdev;
 	}
@@ -2373,20 +2773,32 @@
 	 * It may be a good idea to keep them disabled using HW configuration
 	 * to save PCI memory space.
 	 */
-	if ((pci_resource_flags(pdev, BAR_2) & IORESOURCE_MEM) &&
-	    (pci_resource_flags(pdev, BAR_2) & IORESOURCE_MEM_64)) {
-		dev_info(&pdev->dev, "Outbound BAR2 is not used but enabled.\n");
+
+	priv->p2r_bar[0].size = priv->p2r_bar[1].size = 0;
+
+	if (pci_resource_flags(pdev, BAR_2) & IORESOURCE_MEM_64) {
+		if (pci_resource_flags(pdev, BAR_2) & IORESOURCE_PREFETCH)
+			tsi_debug(INIT, &pdev->dev,
+				 "Prefetchable OBW BAR2 will not be used");
+		else {
+			priv->p2r_bar[0].base = pci_resource_start(pdev, BAR_2);
+			priv->p2r_bar[0].size = pci_resource_len(pdev, BAR_2);
+		}
 	}
 
-	if ((pci_resource_flags(pdev, BAR_4) & IORESOURCE_MEM) &&
-	    (pci_resource_flags(pdev, BAR_4) & IORESOURCE_MEM_64)) {
-		dev_info(&pdev->dev, "Outbound BAR4 is not used but enabled.\n");
+	if (pci_resource_flags(pdev, BAR_4) & IORESOURCE_MEM_64) {
+		if (pci_resource_flags(pdev, BAR_4) & IORESOURCE_PREFETCH)
+			tsi_debug(INIT, &pdev->dev,
+				 "Prefetchable OBW BAR4 will not be used");
+		else {
+			priv->p2r_bar[1].base = pci_resource_start(pdev, BAR_4);
+			priv->p2r_bar[1].size = pci_resource_len(pdev, BAR_4);
+		}
 	}
 
 	err = pci_request_regions(pdev, DRV_NAME);
 	if (err) {
-		dev_err(&pdev->dev, "Cannot obtain PCI resources, "
-			"aborting.\n");
+		tsi_err(&pdev->dev, "Unable to obtain PCI resources");
 		goto err_disable_pdev;
 	}
 
@@ -2394,16 +2806,14 @@
 
 	priv->regs = pci_ioremap_bar(pdev, BAR_0);
 	if (!priv->regs) {
-		dev_err(&pdev->dev,
-			"Unable to map device registers space, aborting\n");
+		tsi_err(&pdev->dev, "Unable to map device registers space");
 		err = -ENOMEM;
 		goto err_free_res;
 	}
 
 	priv->odb_base = pci_ioremap_bar(pdev, BAR_1);
 	if (!priv->odb_base) {
-		dev_err(&pdev->dev,
-			"Unable to map outbound doorbells space, aborting\n");
+		tsi_err(&pdev->dev, "Unable to map outbound doorbells space");
 		err = -ENOMEM;
 		goto err_unmap_bars;
 	}
@@ -2412,25 +2822,23 @@
 	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (err) {
-			dev_info(&pdev->dev, "Unable to set DMA mask\n");
+			tsi_err(&pdev->dev, "Unable to set DMA mask");
 			goto err_unmap_bars;
 		}
 
 		if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)))
-			dev_info(&pdev->dev, "Unable to set consistent DMA mask\n");
+			tsi_info(&pdev->dev, "Unable to set consistent DMA mask");
 	} else {
 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
 		if (err)
-			dev_info(&pdev->dev, "Unable to set consistent DMA mask\n");
+			tsi_info(&pdev->dev, "Unable to set consistent DMA mask");
 	}
 
 	BUG_ON(!pci_is_pcie(pdev));
 
-	/* Clear "no snoop" and "relaxed ordering" bits, use default MRRS. */
+	/* Clear "no snoop" and "relaxed ordering" bits. */
 	pcie_capability_clear_and_set_word(pdev, PCI_EXP_DEVCTL,
-		PCI_EXP_DEVCTL_READRQ | PCI_EXP_DEVCTL_RELAX_EN |
-		PCI_EXP_DEVCTL_NOSNOOP_EN,
-		PCI_EXP_DEVCTL_READRQ_512B);
+		PCI_EXP_DEVCTL_RELAX_EN | PCI_EXP_DEVCTL_NOSNOOP_EN, 0);
 
 	/* Adjust PCIe completion timeout. */
 	pcie_capability_clear_and_set_word(pdev, PCI_EXP_DEVCTL2, 0xf, 0x2);
@@ -2452,7 +2860,7 @@
 	tsi721_init_sr2pc_mapping(priv);
 
 	if (tsi721_bdma_maint_init(priv)) {
-		dev_err(&pdev->dev, "BDMA initialization failed, aborting\n");
+		tsi_err(&pdev->dev, "BDMA initialization failed");
 		err = -ENOMEM;
 		goto err_unmap_bars;
 	}
@@ -2471,9 +2879,13 @@
 	if (err)
 		goto err_free_consistent;
 
+	pci_set_drvdata(pdev, priv);
+	tsi721_interrupts_init(priv);
+
 	return 0;
 
 err_free_consistent:
+	tsi721_port_write_free(priv);
 	tsi721_doorbell_free(priv);
 err_free_bdma:
 	tsi721_bdma_maint_free(priv);
@@ -2493,6 +2905,53 @@
 	return err;
 }
 
+static void tsi721_remove(struct pci_dev *pdev)
+{
+	struct tsi721_device *priv = pci_get_drvdata(pdev);
+
+	tsi_debug(EXIT, &pdev->dev, "enter");
+
+	tsi721_disable_ints(priv);
+	tsi721_free_irq(priv);
+	flush_scheduled_work();
+	rio_unregister_mport(&priv->mport);
+
+	tsi721_unregister_dma(priv);
+	tsi721_bdma_maint_free(priv);
+	tsi721_doorbell_free(priv);
+	tsi721_port_write_free(priv);
+	tsi721_close_sr2pc_mapping(priv);
+
+	if (priv->regs)
+		iounmap(priv->regs);
+	if (priv->odb_base)
+		iounmap(priv->odb_base);
+#ifdef CONFIG_PCI_MSI
+	if (priv->flags & TSI721_USING_MSIX)
+		pci_disable_msix(priv->pdev);
+	else if (priv->flags & TSI721_USING_MSI)
+		pci_disable_msi(priv->pdev);
+#endif
+	pci_release_regions(pdev);
+	pci_clear_master(pdev);
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+	kfree(priv);
+	tsi_debug(EXIT, &pdev->dev, "exit");
+}
+
+static void tsi721_shutdown(struct pci_dev *pdev)
+{
+	struct tsi721_device *priv = pci_get_drvdata(pdev);
+
+	tsi_debug(EXIT, &pdev->dev, "enter");
+
+	tsi721_disable_ints(priv);
+	tsi721_dma_stop_all(priv);
+	pci_clear_master(pdev);
+	pci_disable_device(pdev);
+}
+
 static const struct pci_device_id tsi721_pci_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_IDT, PCI_DEVICE_ID_TSI721) },
 	{ 0, }	/* terminate list */
@@ -2504,14 +2963,11 @@
 	.name		= "tsi721",
 	.id_table	= tsi721_pci_tbl,
 	.probe		= tsi721_probe,
+	.remove		= tsi721_remove,
+	.shutdown	= tsi721_shutdown,
 };
 
-static int __init tsi721_init(void)
-{
-	return pci_register_driver(&tsi721_driver);
-}
-
-device_initcall(tsi721_init);
+module_pci_driver(tsi721_driver);
 
 MODULE_DESCRIPTION("IDT Tsi721 PCIExpress-to-SRIO bridge driver");
 MODULE_AUTHOR("Integrated Device Technology, Inc.");

diff --git a/drivers/rapidio/devices/tsi721.h b/drivers/rapidio/devices/tsi721.h
index 9d25025..5456dbd 100644
--- a/drivers/rapidio/devices/tsi721.h
+++ b/drivers/rapidio/devices/tsi721.h

@@ -21,6 +21,46 @@
 #ifndef __TSI721_H
 #define __TSI721_H
 
+/* Debug output filtering masks */
+enum {
+	DBG_NONE	= 0,
+	DBG_INIT	= BIT(0), /* driver init */
+	DBG_EXIT	= BIT(1), /* driver exit */
+	DBG_MPORT	= BIT(2), /* mport add/remove */
+	DBG_MAINT	= BIT(3), /* maintenance ops messages */
+	DBG_DMA		= BIT(4), /* DMA transfer messages */
+	DBG_DMAV	= BIT(5), /* verbose DMA transfer messages */
+	DBG_IBW		= BIT(6), /* inbound window */
+	DBG_EVENT	= BIT(7), /* event handling messages */
+	DBG_OBW		= BIT(8), /* outbound window messages */
+	DBG_DBELL	= BIT(9), /* doorbell messages */
+	DBG_OMSG	= BIT(10), /* doorbell messages */
+	DBG_IMSG	= BIT(11), /* doorbell messages */
+	DBG_ALL		= ~0,
+};
+
+#ifdef DEBUG
+extern u32 dbg_level;
+
+#define tsi_debug(level, dev, fmt, arg...)				\
+	do {								\
+		if (DBG_##level & dbg_level)				\
+			dev_dbg(dev, "%s: " fmt "\n", __func__, ##arg);	\
+	} while (0)
+#else
+#define tsi_debug(level, dev, fmt, arg...) \
+		no_printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##arg)
+#endif
+
+#define tsi_info(dev, fmt, arg...) \
+	dev_info(dev, "%s: " fmt "\n", __func__, ##arg)
+
+#define tsi_warn(dev, fmt, arg...) \
+	dev_warn(dev, "%s: WARNING " fmt "\n", __func__, ##arg)
+
+#define tsi_err(dev, fmt, arg...) \
+	dev_err(dev, "%s: ERROR " fmt "\n", __func__, ##arg)
+
 #define DRV_NAME	"tsi721"
 
 #define DEFAULT_HOPCOUNT	0xff
@@ -674,7 +714,7 @@
 	struct dma_chan		dchan;
 	struct tsi721_tx_desc	*tx_desc;
 	spinlock_t		lock;
-	struct list_head	active_list;
+	struct tsi721_tx_desc	*active_tx;
 	struct list_head	queue;
 	struct list_head	free_list;
 	struct tasklet_struct	tasklet;
@@ -808,9 +848,38 @@
 };
 #endif /* CONFIG_PCI_MSI */
 
+struct tsi721_ib_win_mapping {
+	struct list_head node;
+	dma_addr_t	lstart;
+};
+
+struct tsi721_ib_win {
+	u64		rstart;
+	u32		size;
+	dma_addr_t	lstart;
+	bool		active;
+	bool		xlat;
+	struct list_head mappings;
+};
+
+struct tsi721_obw_bar {
+	u64		base;
+	u64		size;
+	u64		free;
+};
+
+struct tsi721_ob_win {
+	u64		base;
+	u32		size;
+	u16		destid;
+	u64		rstart;
+	bool		active;
+	struct tsi721_obw_bar *pbar;
+};
+
 struct tsi721_device {
 	struct pci_dev	*pdev;
-	struct rio_mport *mport;
+	struct rio_mport mport;
 	u32		flags;
 	void __iomem	*regs;
 #ifdef CONFIG_PCI_MSI
@@ -843,11 +912,25 @@
 	/* Outbound Messaging */
 	int		omsg_init[TSI721_OMSG_CHNUM];
 	struct tsi721_omsg_ring	omsg_ring[TSI721_OMSG_CHNUM];
+
+	/* Inbound Mapping Windows */
+	struct tsi721_ib_win ib_win[TSI721_IBWIN_NUM];
+	int		ibwin_cnt;
+
+	/* Outbound Mapping Windows */
+	struct tsi721_obw_bar p2r_bar[2];
+	struct tsi721_ob_win  ob_win[TSI721_OBWIN_NUM];
+	int		obwin_cnt;
 };
 
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
 extern void tsi721_bdma_handler(struct tsi721_bdma_chan *bdma_chan);
 extern int tsi721_register_dma(struct tsi721_device *priv);
+extern void tsi721_unregister_dma(struct tsi721_device *priv);
+extern void tsi721_dma_stop_all(struct tsi721_device *priv);
+#else
+#define tsi721_dma_stop_all(priv) do {} while (0)
+#define tsi721_unregister_dma(priv) do {} while (0)
 #endif
 
 #endif

diff --git a/drivers/rapidio/devices/tsi721_dma.c b/drivers/rapidio/devices/tsi721_dma.c
index 4729594..155cae1e 100644
--- a/drivers/rapidio/devices/tsi721_dma.c
+++ b/drivers/rapidio/devices/tsi721_dma.c

@@ -30,6 +30,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include <linux/kfifo.h>
+#include <linux/sched.h>
 #include <linux/delay.h>
 #include "../../dma/dmaengine.h"
 
@@ -63,14 +64,6 @@
 	return container_of(txd, struct tsi721_tx_desc, txd);
 }
 
-static inline
-struct tsi721_tx_desc *tsi721_dma_first_active(
-				struct tsi721_bdma_chan *bdma_chan)
-{
-	return list_first_entry(&bdma_chan->active_list,
-				struct tsi721_tx_desc, desc_node);
-}
-
 static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan, int bd_num)
 {
 	struct tsi721_dma_desc *bd_ptr;
@@ -83,7 +76,7 @@
 	struct tsi721_device *priv = to_tsi721(bdma_chan->dchan.device);
 #endif
 
-	dev_dbg(dev, "Init Block DMA Engine, CH%d\n", bdma_chan->id);
+	tsi_debug(DMA, &bdma_chan->dchan.dev->device, "DMAC%d", bdma_chan->id);
 
 	/*
 	 * Allocate space for DMA descriptors
@@ -91,7 +84,7 @@
 	 */
 	bd_ptr = dma_zalloc_coherent(dev,
 				(bd_num + 1) * sizeof(struct tsi721_dma_desc),
-				&bd_phys, GFP_KERNEL);
+				&bd_phys, GFP_ATOMIC);
 	if (!bd_ptr)
 		return -ENOMEM;
 
@@ -99,8 +92,9 @@
 	bdma_chan->bd_phys = bd_phys;
 	bdma_chan->bd_base = bd_ptr;
 
-	dev_dbg(dev, "DMA descriptors @ %p (phys = %llx)\n",
-		bd_ptr, (unsigned long long)bd_phys);
+	tsi_debug(DMA, &bdma_chan->dchan.dev->device,
+		  "DMAC%d descriptors @ %p (phys = %pad)",
+		  bdma_chan->id, bd_ptr, &bd_phys);
 
 	/* Allocate space for descriptor status FIFO */
 	sts_size = ((bd_num + 1) >= TSI721_DMA_MINSTSSZ) ?
@@ -108,7 +102,7 @@
 	sts_size = roundup_pow_of_two(sts_size);
 	sts_ptr = dma_zalloc_coherent(dev,
 				     sts_size * sizeof(struct tsi721_dma_sts),
-				     &sts_phys, GFP_KERNEL);
+				     &sts_phys, GFP_ATOMIC);
 	if (!sts_ptr) {
 		/* Free space allocated for DMA descriptors */
 		dma_free_coherent(dev,
@@ -122,9 +116,9 @@
 	bdma_chan->sts_base = sts_ptr;
 	bdma_chan->sts_size = sts_size;
 
-	dev_dbg(dev,
-		"desc status FIFO @ %p (phys = %llx) size=0x%x\n",
-		sts_ptr, (unsigned long long)sts_phys, sts_size);
+	tsi_debug(DMA, &bdma_chan->dchan.dev->device,
+		"DMAC%d desc status FIFO @ %p (phys = %pad) size=0x%x",
+		bdma_chan->id, sts_ptr, &sts_phys, sts_size);
 
 	/* Initialize DMA descriptors ring using added link descriptor */
 	bd_ptr[bd_num].type_id = cpu_to_le32(DTYPE3 << 29);
@@ -163,8 +157,9 @@
 				 priv->msix[idx].irq_name, (void *)bdma_chan);
 
 		if (rc) {
-			dev_dbg(dev, "Unable to get MSI-X for BDMA%d-DONE\n",
-				bdma_chan->id);
+			tsi_debug(DMA, &bdma_chan->dchan.dev->device,
+				  "Unable to get MSI-X for DMAC%d-DONE",
+				  bdma_chan->id);
 			goto err_out;
 		}
 
@@ -174,8 +169,9 @@
 				priv->msix[idx].irq_name, (void *)bdma_chan);
 
 		if (rc)	{
-			dev_dbg(dev, "Unable to get MSI-X for BDMA%d-INT\n",
-				bdma_chan->id);
+			tsi_debug(DMA, &bdma_chan->dchan.dev->device,
+				  "Unable to get MSI-X for DMAC%d-INT",
+				  bdma_chan->id);
 			free_irq(
 				priv->msix[TSI721_VECT_DMA0_DONE +
 					    bdma_chan->id].vector,
@@ -286,7 +282,7 @@
 	/* Disable BDMA channel interrupts */
 	iowrite32(0, bdma_chan->regs + TSI721_DMAC_INTE);
 	if (bdma_chan->active)
-		tasklet_schedule(&bdma_chan->tasklet);
+		tasklet_hi_schedule(&bdma_chan->tasklet);
 }
 
 #ifdef CONFIG_PCI_MSI
@@ -301,7 +297,8 @@
 {
 	struct tsi721_bdma_chan *bdma_chan = ptr;
 
-	tsi721_bdma_handler(bdma_chan);
+	if (bdma_chan->active)
+		tasklet_hi_schedule(&bdma_chan->tasklet);
 	return IRQ_HANDLED;
 }
 #endif /* CONFIG_PCI_MSI */
@@ -310,20 +307,22 @@
 static void tsi721_start_dma(struct tsi721_bdma_chan *bdma_chan)
 {
 	if (!tsi721_dma_is_idle(bdma_chan)) {
-		dev_err(bdma_chan->dchan.device->dev,
-			"BUG: Attempt to start non-idle channel\n");
+		tsi_err(&bdma_chan->dchan.dev->device,
+			"DMAC%d Attempt to start non-idle channel",
+			bdma_chan->id);
 		return;
 	}
 
 	if (bdma_chan->wr_count == bdma_chan->wr_count_next) {
-		dev_err(bdma_chan->dchan.device->dev,
-			"BUG: Attempt to start DMA with no BDs ready\n");
+		tsi_err(&bdma_chan->dchan.dev->device,
+			"DMAC%d Attempt to start DMA with no BDs ready %d",
+			bdma_chan->id, task_pid_nr(current));
 		return;
 	}
 
-	dev_dbg(bdma_chan->dchan.device->dev,
-		"%s: chan_%d (wrc=%d)\n", __func__, bdma_chan->id,
-		bdma_chan->wr_count_next);
+	tsi_debug(DMA, &bdma_chan->dchan.dev->device, "DMAC%d (wrc=%d) %d",
+		  bdma_chan->id, bdma_chan->wr_count_next,
+		  task_pid_nr(current));
 
 	iowrite32(bdma_chan->wr_count_next,
 		bdma_chan->regs + TSI721_DMAC_DWRCNT);
@@ -425,10 +424,11 @@
 	struct tsi721_dma_desc *bd_ptr = NULL;
 	u32 idx, rd_idx;
 	u32 add_count = 0;
+	struct device *ch_dev = &dchan->dev->device;
 
 	if (!tsi721_dma_is_idle(bdma_chan)) {
-		dev_err(bdma_chan->dchan.device->dev,
-			"BUG: Attempt to use non-idle channel\n");
+		tsi_err(ch_dev, "DMAC%d ERR: Attempt to use non-idle channel",
+			bdma_chan->id);
 		return -EIO;
 	}
 
@@ -439,7 +439,7 @@
 	rio_addr = desc->rio_addr;
 	next_addr = -1;
 	bcount = 0;
-	sys_size = dma_to_mport(bdma_chan->dchan.device)->sys_size;
+	sys_size = dma_to_mport(dchan->device)->sys_size;
 
 	rd_idx = ioread32(bdma_chan->regs + TSI721_DMAC_DRDCNT);
 	rd_idx %= (bdma_chan->bd_num + 1);
@@ -451,18 +451,18 @@
 		add_count++;
 	}
 
-	dev_dbg(dchan->device->dev, "%s: BD ring status: rdi=%d wri=%d\n",
-		__func__, rd_idx, idx);
+	tsi_debug(DMA, ch_dev, "DMAC%d BD ring status: rdi=%d wri=%d",
+		  bdma_chan->id, rd_idx, idx);
 
 	for_each_sg(desc->sg, sg, desc->sg_len, i) {
 
-		dev_dbg(dchan->device->dev, "sg%d/%d addr: 0x%llx len: %d\n",
-			i, desc->sg_len,
+		tsi_debug(DMAV, ch_dev, "DMAC%d sg%d/%d addr: 0x%llx len: %d",
+			bdma_chan->id, i, desc->sg_len,
 			(unsigned long long)sg_dma_address(sg), sg_dma_len(sg));
 
 		if (sg_dma_len(sg) > TSI721_BDMA_MAX_BCOUNT) {
-			dev_err(dchan->device->dev,
-				"%s: SG entry %d is too large\n", __func__, i);
+			tsi_err(ch_dev, "DMAC%d SG entry %d is too large",
+				bdma_chan->id, i);
 			err = -EINVAL;
 			break;
 		}
@@ -479,17 +479,16 @@
 		} else if (next_addr != -1) {
 			/* Finalize descriptor using total byte count value */
 			tsi721_desc_fill_end(bd_ptr, bcount, 0);
-			dev_dbg(dchan->device->dev,
-				"%s: prev desc final len: %d\n",
-				__func__, bcount);
+			tsi_debug(DMAV, ch_dev,	"DMAC%d prev desc final len: %d",
+				  bdma_chan->id, bcount);
 		}
 
 		desc->rio_addr = rio_addr;
 
 		if (i && idx == rd_idx) {
-			dev_dbg(dchan->device->dev,
-				"%s: HW descriptor ring is full @ %d\n",
-				__func__, i);
+			tsi_debug(DMAV, ch_dev,
+				  "DMAC%d HW descriptor ring is full @ %d",
+				  bdma_chan->id, i);
 			desc->sg = sg;
 			desc->sg_len -= i;
 			break;
@@ -498,13 +497,12 @@
 		bd_ptr = &((struct tsi721_dma_desc *)bdma_chan->bd_base)[idx];
 		err = tsi721_desc_fill_init(desc, bd_ptr, sg, sys_size);
 		if (err) {
-			dev_err(dchan->device->dev,
-				"Failed to build desc: err=%d\n", err);
+			tsi_err(ch_dev, "Failed to build desc: err=%d", err);
 			break;
 		}
 
-		dev_dbg(dchan->device->dev, "bd_ptr = %p did=%d raddr=0x%llx\n",
-			bd_ptr, desc->destid, desc->rio_addr);
+		tsi_debug(DMAV, ch_dev, "DMAC%d bd_ptr = %p did=%d raddr=0x%llx",
+			  bdma_chan->id, bd_ptr, desc->destid, desc->rio_addr);
 
 		next_addr = sg_dma_address(sg);
 		bcount = sg_dma_len(sg);
@@ -519,8 +517,9 @@
 entry_done:
 		if (sg_is_last(sg)) {
 			tsi721_desc_fill_end(bd_ptr, bcount, 0);
-			dev_dbg(dchan->device->dev, "%s: last desc final len: %d\n",
-				__func__, bcount);
+			tsi_debug(DMAV, ch_dev,
+				  "DMAC%d last desc final len: %d",
+				  bdma_chan->id, bcount);
 			desc->sg_len = 0;
 		} else {
 			rio_addr += sg_dma_len(sg);
@@ -534,35 +533,43 @@
 	return err;
 }
 
-static void tsi721_advance_work(struct tsi721_bdma_chan *bdma_chan)
+static void tsi721_advance_work(struct tsi721_bdma_chan *bdma_chan,
+				struct tsi721_tx_desc *desc)
 {
-	struct tsi721_tx_desc *desc;
 	int err;
 
-	dev_dbg(bdma_chan->dchan.device->dev, "%s: Enter\n", __func__);
+	tsi_debug(DMA, &bdma_chan->dchan.dev->device, "DMAC%d", bdma_chan->id);
+
+	if (!tsi721_dma_is_idle(bdma_chan))
+		return;
 
 	/*
-	 * If there are any new transactions in the queue add them
-	 * into the processing list
-	 */
-	if (!list_empty(&bdma_chan->queue))
-		list_splice_init(&bdma_chan->queue, &bdma_chan->active_list);
+	 * If there is no data transfer in progress, fetch new descriptor from
+	 * the pending queue.
+	*/
 
-	/* Start new transaction (if available) */
-	if (!list_empty(&bdma_chan->active_list)) {
-		desc = tsi721_dma_first_active(bdma_chan);
+	if (desc == NULL && bdma_chan->active_tx == NULL &&
+					!list_empty(&bdma_chan->queue)) {
+		desc = list_first_entry(&bdma_chan->queue,
+					struct tsi721_tx_desc, desc_node);
+		list_del_init((&desc->desc_node));
+		bdma_chan->active_tx = desc;
+	}
+
+	if (desc) {
 		err = tsi721_submit_sg(desc);
 		if (!err)
 			tsi721_start_dma(bdma_chan);
 		else {
 			tsi721_dma_tx_err(bdma_chan, desc);
-			dev_dbg(bdma_chan->dchan.device->dev,
-				"ERR: tsi721_submit_sg failed with err=%d\n",
-				err);
+			tsi_debug(DMA, &bdma_chan->dchan.dev->device,
+				"DMAC%d ERR: tsi721_submit_sg failed with err=%d",
+				bdma_chan->id, err);
 		}
 	}
 
-	dev_dbg(bdma_chan->dchan.device->dev, "%s: Exit\n", __func__);
+	tsi_debug(DMA, &bdma_chan->dchan.dev->device, "DMAC%d Exit",
+		  bdma_chan->id);
 }
 
 static void tsi721_dma_tasklet(unsigned long data)
@@ -571,22 +578,84 @@
 	u32 dmac_int, dmac_sts;
 
 	dmac_int = ioread32(bdma_chan->regs + TSI721_DMAC_INT);
-	dev_dbg(bdma_chan->dchan.device->dev, "%s: DMAC%d_INT = 0x%x\n",
-		__func__, bdma_chan->id, dmac_int);
+	tsi_debug(DMA, &bdma_chan->dchan.dev->device, "DMAC%d_INT = 0x%x",
+		  bdma_chan->id, dmac_int);
 	/* Clear channel interrupts */
 	iowrite32(dmac_int, bdma_chan->regs + TSI721_DMAC_INT);
 
 	if (dmac_int & TSI721_DMAC_INT_ERR) {
+		int i = 10000;
+		struct tsi721_tx_desc *desc;
+
+		desc = bdma_chan->active_tx;
 		dmac_sts = ioread32(bdma_chan->regs + TSI721_DMAC_STS);
-		dev_err(bdma_chan->dchan.device->dev,
-			"%s: DMA ERROR - DMAC%d_STS = 0x%x\n",
-			__func__, bdma_chan->id, dmac_sts);
+		tsi_err(&bdma_chan->dchan.dev->device,
+			"DMAC%d_STS = 0x%x did=%d raddr=0x%llx",
+			bdma_chan->id, dmac_sts, desc->destid, desc->rio_addr);
+
+		/* Re-initialize DMA channel if possible */
+
+		if ((dmac_sts & TSI721_DMAC_STS_ABORT) == 0)
+			goto err_out;
+
+		tsi721_clr_stat(bdma_chan);
+
+		spin_lock(&bdma_chan->lock);
+
+		/* Put DMA channel into init state */
+		iowrite32(TSI721_DMAC_CTL_INIT,
+			  bdma_chan->regs + TSI721_DMAC_CTL);
+		do {
+			udelay(1);
+			dmac_sts = ioread32(bdma_chan->regs + TSI721_DMAC_STS);
+			i--;
+		} while ((dmac_sts & TSI721_DMAC_STS_ABORT) && i);
+
+		if (dmac_sts & TSI721_DMAC_STS_ABORT) {
+			tsi_err(&bdma_chan->dchan.dev->device,
+				"Failed to re-initiate DMAC%d",	bdma_chan->id);
+			spin_unlock(&bdma_chan->lock);
+			goto err_out;
+		}
+
+		/* Setup DMA descriptor pointers */
+		iowrite32(((u64)bdma_chan->bd_phys >> 32),
+			bdma_chan->regs + TSI721_DMAC_DPTRH);
+		iowrite32(((u64)bdma_chan->bd_phys & TSI721_DMAC_DPTRL_MASK),
+			bdma_chan->regs + TSI721_DMAC_DPTRL);
+
+		/* Setup descriptor status FIFO */
+		iowrite32(((u64)bdma_chan->sts_phys >> 32),
+			bdma_chan->regs + TSI721_DMAC_DSBH);
+		iowrite32(((u64)bdma_chan->sts_phys & TSI721_DMAC_DSBL_MASK),
+			bdma_chan->regs + TSI721_DMAC_DSBL);
+		iowrite32(TSI721_DMAC_DSSZ_SIZE(bdma_chan->sts_size),
+			bdma_chan->regs + TSI721_DMAC_DSSZ);
+
+		/* Clear interrupt bits */
+		iowrite32(TSI721_DMAC_INT_ALL,
+			bdma_chan->regs + TSI721_DMAC_INT);
+
+		ioread32(bdma_chan->regs + TSI721_DMAC_INT);
+
+		bdma_chan->wr_count = bdma_chan->wr_count_next = 0;
+		bdma_chan->sts_rdptr = 0;
+		udelay(10);
+
+		desc = bdma_chan->active_tx;
+		desc->status = DMA_ERROR;
+		dma_cookie_complete(&desc->txd);
+		list_add(&desc->desc_node, &bdma_chan->free_list);
+		bdma_chan->active_tx = NULL;
+		if (bdma_chan->active)
+			tsi721_advance_work(bdma_chan, NULL);
+		spin_unlock(&bdma_chan->lock);
 	}
 
 	if (dmac_int & TSI721_DMAC_INT_STFULL) {
-		dev_err(bdma_chan->dchan.device->dev,
-			"%s: DMAC%d descriptor status FIFO is full\n",
-			__func__, bdma_chan->id);
+		tsi_err(&bdma_chan->dchan.dev->device,
+			"DMAC%d descriptor status FIFO is full",
+			bdma_chan->id);
 	}
 
 	if (dmac_int & (TSI721_DMAC_INT_DONE | TSI721_DMAC_INT_IOFDONE)) {
@@ -594,7 +663,7 @@
 
 		tsi721_clr_stat(bdma_chan);
 		spin_lock(&bdma_chan->lock);
-		desc = tsi721_dma_first_active(bdma_chan);
+		desc = bdma_chan->active_tx;
 
 		if (desc->sg_len == 0) {
 			dma_async_tx_callback callback = NULL;
@@ -606,17 +675,21 @@
 				callback = desc->txd.callback;
 				param = desc->txd.callback_param;
 			}
-			list_move(&desc->desc_node, &bdma_chan->free_list);
+			list_add(&desc->desc_node, &bdma_chan->free_list);
+			bdma_chan->active_tx = NULL;
+			if (bdma_chan->active)
+				tsi721_advance_work(bdma_chan, NULL);
 			spin_unlock(&bdma_chan->lock);
 			if (callback)
 				callback(param);
-			spin_lock(&bdma_chan->lock);
+		} else {
+			if (bdma_chan->active)
+				tsi721_advance_work(bdma_chan,
+						    bdma_chan->active_tx);
+			spin_unlock(&bdma_chan->lock);
 		}
-
-		tsi721_advance_work(bdma_chan);
-		spin_unlock(&bdma_chan->lock);
 	}
-
+err_out:
 	/* Re-Enable BDMA channel interrupts */
 	iowrite32(TSI721_DMAC_INT_ALL, bdma_chan->regs + TSI721_DMAC_INTE);
 }
@@ -629,8 +702,9 @@
 
 	/* Check if the descriptor is detached from any lists */
 	if (!list_empty(&desc->desc_node)) {
-		dev_err(bdma_chan->dchan.device->dev,
-			"%s: wrong state of descriptor %p\n", __func__, txd);
+		tsi_err(&bdma_chan->dchan.dev->device,
+			"DMAC%d wrong state of descriptor %p",
+			bdma_chan->id, txd);
 		return -EIO;
 	}
 
@@ -655,25 +729,25 @@
 	struct tsi721_tx_desc *desc = NULL;
 	int i;
 
-	dev_dbg(dchan->device->dev, "%s: for channel %d\n",
-		__func__, bdma_chan->id);
+	tsi_debug(DMA, &dchan->dev->device, "DMAC%d", bdma_chan->id);
 
 	if (bdma_chan->bd_base)
 		return TSI721_DMA_TX_QUEUE_SZ;
 
 	/* Initialize BDMA channel */
 	if (tsi721_bdma_ch_init(bdma_chan, dma_desc_per_channel)) {
-		dev_err(dchan->device->dev, "Unable to initialize data DMA"
-			" channel %d, aborting\n", bdma_chan->id);
+		tsi_err(&dchan->dev->device, "Unable to initialize DMAC%d",
+			bdma_chan->id);
 		return -ENODEV;
 	}
 
 	/* Allocate queue of transaction descriptors */
 	desc = kcalloc(TSI721_DMA_TX_QUEUE_SZ, sizeof(struct tsi721_tx_desc),
-			GFP_KERNEL);
+			GFP_ATOMIC);
 	if (!desc) {
-		dev_err(dchan->device->dev,
-			"Failed to allocate logical descriptors\n");
+		tsi_err(&dchan->dev->device,
+			"DMAC%d Failed to allocate logical descriptors",
+			bdma_chan->id);
 		tsi721_bdma_ch_free(bdma_chan);
 		return -ENOMEM;
 	}
@@ -714,15 +788,11 @@
 {
 	struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
 
-	dev_dbg(dchan->device->dev, "%s: for channel %d\n",
-		__func__, bdma_chan->id);
+	tsi_debug(DMA, &dchan->dev->device, "DMAC%d", bdma_chan->id);
 
 	if (bdma_chan->bd_base == NULL)
 		return;
 
-	BUG_ON(!list_empty(&bdma_chan->active_list));
-	BUG_ON(!list_empty(&bdma_chan->queue));
-
 	tsi721_bdma_interrupt_enable(bdma_chan, 0);
 	bdma_chan->active = false;
 	tsi721_sync_dma_irq(bdma_chan);
@@ -736,20 +806,26 @@
 enum dma_status tsi721_tx_status(struct dma_chan *dchan, dma_cookie_t cookie,
 				 struct dma_tx_state *txstate)
 {
-	return dma_cookie_status(dchan, cookie, txstate);
+	struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
+	enum dma_status	status;
+
+	spin_lock_bh(&bdma_chan->lock);
+	status = dma_cookie_status(dchan, cookie, txstate);
+	spin_unlock_bh(&bdma_chan->lock);
+	return status;
 }
 
 static void tsi721_issue_pending(struct dma_chan *dchan)
 {
 	struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
 
-	dev_dbg(dchan->device->dev, "%s: Enter\n", __func__);
+	tsi_debug(DMA, &dchan->dev->device, "DMAC%d", bdma_chan->id);
 
+	spin_lock_bh(&bdma_chan->lock);
 	if (tsi721_dma_is_idle(bdma_chan) && bdma_chan->active) {
-		spin_lock_bh(&bdma_chan->lock);
-		tsi721_advance_work(bdma_chan);
-		spin_unlock_bh(&bdma_chan->lock);
+		tsi721_advance_work(bdma_chan, NULL);
 	}
+	spin_unlock_bh(&bdma_chan->lock);
 }
 
 static
@@ -759,18 +835,19 @@
 			void *tinfo)
 {
 	struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
-	struct tsi721_tx_desc *desc, *_d;
+	struct tsi721_tx_desc *desc;
 	struct rio_dma_ext *rext = tinfo;
 	enum dma_rtype rtype;
 	struct dma_async_tx_descriptor *txd = NULL;
 
 	if (!sgl || !sg_len) {
-		dev_err(dchan->device->dev, "%s: No SG list\n", __func__);
-		return NULL;
+		tsi_err(&dchan->dev->device, "DMAC%d No SG list",
+			bdma_chan->id);
+		return ERR_PTR(-EINVAL);
 	}
 
-	dev_dbg(dchan->device->dev, "%s: %s\n", __func__,
-		(dir == DMA_DEV_TO_MEM)?"READ":"WRITE");
+	tsi_debug(DMA, &dchan->dev->device, "DMAC%d %s", bdma_chan->id,
+		  (dir == DMA_DEV_TO_MEM)?"READ":"WRITE");
 
 	if (dir == DMA_DEV_TO_MEM)
 		rtype = NREAD;
@@ -788,30 +865,36 @@
 			break;
 		}
 	} else {
-		dev_err(dchan->device->dev,
-			"%s: Unsupported DMA direction option\n", __func__);
-		return NULL;
+		tsi_err(&dchan->dev->device,
+			"DMAC%d Unsupported DMA direction option",
+			bdma_chan->id);
+		return ERR_PTR(-EINVAL);
 	}
 
 	spin_lock_bh(&bdma_chan->lock);
 
-	list_for_each_entry_safe(desc, _d, &bdma_chan->free_list, desc_node) {
-		if (async_tx_test_ack(&desc->txd)) {
-			list_del_init(&desc->desc_node);
-			desc->destid = rext->destid;
-			desc->rio_addr = rext->rio_addr;
-			desc->rio_addr_u = 0;
-			desc->rtype = rtype;
-			desc->sg_len	= sg_len;
-			desc->sg	= sgl;
-			txd		= &desc->txd;
-			txd->flags	= flags;
-			break;
-		}
+	if (!list_empty(&bdma_chan->free_list)) {
+		desc = list_first_entry(&bdma_chan->free_list,
+				struct tsi721_tx_desc, desc_node);
+		list_del_init(&desc->desc_node);
+		desc->destid = rext->destid;
+		desc->rio_addr = rext->rio_addr;
+		desc->rio_addr_u = 0;
+		desc->rtype = rtype;
+		desc->sg_len	= sg_len;
+		desc->sg	= sgl;
+		txd		= &desc->txd;
+		txd->flags	= flags;
 	}
 
 	spin_unlock_bh(&bdma_chan->lock);
 
+	if (!txd) {
+		tsi_debug(DMA, &dchan->dev->device,
+			  "DMAC%d free TXD is not available", bdma_chan->id);
+		return ERR_PTR(-EBUSY);
+	}
+
 	return txd;
 }
 
@@ -819,16 +902,18 @@
 {
 	struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
 	struct tsi721_tx_desc *desc, *_d;
-	u32 dmac_int;
 	LIST_HEAD(list);
 
-	dev_dbg(dchan->device->dev, "%s: Entry\n", __func__);
+	tsi_debug(DMA, &dchan->dev->device, "DMAC%d", bdma_chan->id);
 
 	spin_lock_bh(&bdma_chan->lock);
 
 	bdma_chan->active = false;
 
-	if (!tsi721_dma_is_idle(bdma_chan)) {
+	while (!tsi721_dma_is_idle(bdma_chan)) {
+
+		udelay(5);
+#if (0)
 		/* make sure to stop the transfer */
 		iowrite32(TSI721_DMAC_CTL_SUSP,
 			  bdma_chan->regs + TSI721_DMAC_CTL);
@@ -837,9 +922,11 @@
 		do {
 			dmac_int = ioread32(bdma_chan->regs + TSI721_DMAC_INT);
 		} while ((dmac_int & TSI721_DMAC_INT_SUSP) == 0);
+#endif
 	}
 
-	list_splice_init(&bdma_chan->active_list, &list);
+	if (bdma_chan->active_tx)
+		list_add(&bdma_chan->active_tx->desc_node, &list);
 	list_splice_init(&bdma_chan->queue, &list);
 
 	list_for_each_entry_safe(desc, _d, &list, desc_node)
@@ -850,12 +937,42 @@
 	return 0;
 }
 
+static void tsi721_dma_stop(struct tsi721_bdma_chan *bdma_chan)
+{
+	if (!bdma_chan->active)
+		return;
+	spin_lock_bh(&bdma_chan->lock);
+	if (!tsi721_dma_is_idle(bdma_chan)) {
+		int timeout = 100000;
+
+		/* stop the transfer in progress */
+		iowrite32(TSI721_DMAC_CTL_SUSP,
+			  bdma_chan->regs + TSI721_DMAC_CTL);
+
+		/* Wait until DMA channel stops */
+		while (!tsi721_dma_is_idle(bdma_chan) && --timeout)
+			udelay(1);
+	}
+
+	spin_unlock_bh(&bdma_chan->lock);
+}
+
+void tsi721_dma_stop_all(struct tsi721_device *priv)
+{
+	int i;
+
+	for (i = 0; i < TSI721_DMA_MAXCH; i++) {
+		if (i != TSI721_DMACH_MAINT)
+			tsi721_dma_stop(&priv->bdma[i]);
+	}
+}
+
 int tsi721_register_dma(struct tsi721_device *priv)
 {
 	int i;
 	int nr_channels = 0;
 	int err;
-	struct rio_mport *mport = priv->mport;
+	struct rio_mport *mport = &priv->mport;
 
 	INIT_LIST_HEAD(&mport->dma.channels);
 
@@ -875,7 +992,7 @@
 
 		spin_lock_init(&bdma_chan->lock);
 
-		INIT_LIST_HEAD(&bdma_chan->active_list);
+		bdma_chan->active_tx = NULL;
 		INIT_LIST_HEAD(&bdma_chan->queue);
 		INIT_LIST_HEAD(&bdma_chan->free_list);
 
@@ -901,7 +1018,33 @@
 
 	err = dma_async_device_register(&mport->dma);
 	if (err)
-		dev_err(&priv->pdev->dev, "Failed to register DMA device\n");
+		tsi_err(&priv->pdev->dev, "Failed to register DMA device");
 
 	return err;
 }
+
+void tsi721_unregister_dma(struct tsi721_device *priv)
+{
+	struct rio_mport *mport = &priv->mport;
+	struct dma_chan *chan, *_c;
+	struct tsi721_bdma_chan *bdma_chan;
+
+	tsi721_dma_stop_all(priv);
+	dma_async_device_unregister(&mport->dma);
+
+	list_for_each_entry_safe(chan, _c, &mport->dma.channels,
+					device_node) {
+		bdma_chan = to_tsi721_chan(chan);
+		if (bdma_chan->active) {
+			tsi721_bdma_interrupt_enable(bdma_chan, 0);
+			bdma_chan->active = false;
+			tsi721_sync_dma_irq(bdma_chan);
+			tasklet_kill(&bdma_chan->tasklet);
+			INIT_LIST_HEAD(&bdma_chan->free_list);
+			kfree(bdma_chan->tx_desc);
+			tsi721_bdma_ch_free(bdma_chan);
+		}
+
+		list_del(&chan->device_node);
+	}
+}

diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c
index f301f05..128350f 100644
--- a/drivers/rapidio/rio-driver.c
+++ b/drivers/rapidio/rio-driver.c

@@ -131,6 +131,17 @@
 	return 0;
 }
 
+static void rio_device_shutdown(struct device *dev)
+{
+	struct rio_dev *rdev = to_rio_dev(dev);
+	struct rio_driver *rdrv = rdev->driver;
+
+	dev_dbg(dev, "RIO: %s\n", __func__);
+
+	if (rdrv && rdrv->shutdown)
+		rdrv->shutdown(rdev);
+}
+
 /**
  *  rio_register_driver - register a new RIO driver
  *  @rdrv: the RIO driver structure to register
@@ -229,6 +240,7 @@
 	.bus_groups = rio_bus_groups,
 	.probe = rio_device_probe,
 	.remove = rio_device_remove,
+	.shutdown = rio_device_shutdown,
 	.uevent	= rio_uevent,
 };
 

diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
index d6a126c..a63a380 100644
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c

@@ -39,6 +39,13 @@
 
 static void rio_init_em(struct rio_dev *rdev);
 
+struct rio_id_table {
+	u16 start;	/* logical minimal id */
+	u32 max;	/* max number of IDs in table */
+	spinlock_t lock;
+	unsigned long table[0];
+};
+
 static int next_destid = 0;
 static int next_comptag = 1;
 
@@ -62,7 +69,7 @@
 static u16 rio_destid_alloc(struct rio_net *net)
 {
 	int destid;
-	struct rio_id_table *idtab = &net->destid_table;
+	struct rio_id_table *idtab = (struct rio_id_table *)net->enum_data;
 
 	spin_lock(&idtab->lock);
 	destid = find_first_zero_bit(idtab->table, idtab->max);
@@ -88,7 +95,7 @@
 static int rio_destid_reserve(struct rio_net *net, u16 destid)
 {
 	int oldbit;
-	struct rio_id_table *idtab = &net->destid_table;
+	struct rio_id_table *idtab = (struct rio_id_table *)net->enum_data;
 
 	destid -= idtab->start;
 	spin_lock(&idtab->lock);
@@ -106,7 +113,7 @@
  */
 static void rio_destid_free(struct rio_net *net, u16 destid)
 {
-	struct rio_id_table *idtab = &net->destid_table;
+	struct rio_id_table *idtab = (struct rio_id_table *)net->enum_data;
 
 	destid -= idtab->start;
 	spin_lock(&idtab->lock);
@@ -121,7 +128,7 @@
 static u16 rio_destid_first(struct rio_net *net)
 {
 	int destid;
-	struct rio_id_table *idtab = &net->destid_table;
+	struct rio_id_table *idtab = (struct rio_id_table *)net->enum_data;
 
 	spin_lock(&idtab->lock);
 	destid = find_first_bit(idtab->table, idtab->max);
@@ -141,7 +148,7 @@
 static u16 rio_destid_next(struct rio_net *net, u16 from)
 {
 	int destid;
-	struct rio_id_table *idtab = &net->destid_table;
+	struct rio_id_table *idtab = (struct rio_id_table *)net->enum_data;
 
 	spin_lock(&idtab->lock);
 	destid = find_next_bit(idtab->table, idtab->max, from);
@@ -187,19 +194,6 @@
 }
 
 /**
- * rio_local_set_device_id - Set the base/extended device id for a port
- * @port: RIO master port
- * @did: Device ID value to be written
- *
- * Writes the base/extended device id from a device.
- */
-static void rio_local_set_device_id(struct rio_mport *port, u16 did)
-{
-	rio_local_write_config_32(port, RIO_DID_CSR, RIO_SET_DID(port->sys_size,
-				did));
-}
-
-/**
  * rio_clear_locks- Release all host locks and signal enumeration complete
  * @net: RIO network to run on
  *
@@ -449,9 +443,6 @@
 
 		if (do_enum)
 			rio_route_clr_table(rdev, RIO_GLOBAL_TABLE, 0);
-
-		list_add_tail(&rswitch->node, &net->switches);
-
 	} else {
 		if (do_enum)
 			/*Enable Input Output Port (transmitter reviever)*/
@@ -461,13 +452,9 @@
 			     rdev->comp_tag & RIO_CTAG_UDEVID);
 	}
 
-	rdev->dev.parent = &port->dev;
+	rdev->dev.parent = &net->dev;
 	rio_attach_device(rdev);
-
-	device_initialize(&rdev->dev);
 	rdev->dev.release = rio_release_dev;
-	rio_dev_get(rdev);
-
 	rdev->dma_mask = DMA_BIT_MASK(32);
 	rdev->dev.dma_mask = &rdev->dma_mask;
 	rdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
@@ -480,6 +467,8 @@
 	if (ret)
 		goto cleanup;
 
+	rio_dev_get(rdev);
+
 	return rdev;
 
 cleanup:
@@ -621,8 +610,6 @@
 	rdev = rio_setup_device(net, port, RIO_ANY_DESTID(port->sys_size),
 					hopcount, 1);
 	if (rdev) {
-		/* Add device to the global and bus/net specific list. */
-		list_add_tail(&rdev->net_list, &net->devices);
 		rdev->prev = prev;
 		if (prev && rio_is_switch(prev))
 			prev->rswitch->nextdev[prev_port] = rdev;
@@ -778,8 +765,6 @@
 
 	/* Setup new RIO device */
 	if ((rdev = rio_setup_device(net, port, destid, hopcount, 0))) {
-		/* Add device to the global and bus/net specific list. */
-		list_add_tail(&rdev->net_list, &net->devices);
 		rdev->prev = prev;
 		if (prev && rio_is_switch(prev))
 			prev->rswitch->nextdev[prev_port] = rdev;
@@ -864,50 +849,71 @@
 	return result & RIO_PORT_N_ERR_STS_PORT_OK;
 }
 
-/**
- * rio_alloc_net- Allocate and configure a new RIO network
- * @port: Master port associated with the RIO network
- * @do_enum: Enumeration/Discovery mode flag
- * @start: logical minimal start id for new net
- *
- * Allocates a RIO network structure, initializes per-network
- * list heads, and adds the associated master port to the
- * network list of associated master ports. Returns a
- * RIO network pointer on success or %NULL on failure.
- */
-static struct rio_net *rio_alloc_net(struct rio_mport *port,
-					       int do_enum, u16 start)
+static void rio_scan_release_net(struct rio_net *net)
+{
+	pr_debug("RIO-SCAN: %s: net_%d\n", __func__, net->id);
+	kfree(net->enum_data);
+}
+
+static void rio_scan_release_dev(struct device *dev)
 {
 	struct rio_net *net;
 
-	net = kzalloc(sizeof(struct rio_net), GFP_KERNEL);
-	if (net && do_enum) {
-		net->destid_table.table = kcalloc(
-			BITS_TO_LONGS(RIO_MAX_ROUTE_ENTRIES(port->sys_size)),
-			sizeof(long),
-			GFP_KERNEL);
+	net = to_rio_net(dev);
+	pr_debug("RIO-SCAN: %s: net_%d\n", __func__, net->id);
+	kfree(net);
+}
 
-		if (net->destid_table.table == NULL) {
+/*
+ * rio_scan_alloc_net - Allocate and configure a new RIO network
+ * @mport: Master port associated with the RIO network
+ * @do_enum: Enumeration/Discovery mode flag
+ * @start: logical minimal start id for new net
+ *
+ * Allocates a new RIO network structure and initializes enumerator-specific
+ * part of it (if required).
+ * Returns a RIO network pointer on success or %NULL on failure.
+ */
+static struct rio_net *rio_scan_alloc_net(struct rio_mport *mport,
+					  int do_enum, u16 start)
+{
+	struct rio_net *net;
+
+	net = rio_alloc_net(mport);
+
+	if (net && do_enum) {
+		struct rio_id_table *idtab;
+		size_t size;
+
+		size = sizeof(struct rio_id_table) +
+				BITS_TO_LONGS(
+					RIO_MAX_ROUTE_ENTRIES(mport->sys_size)
+					) * sizeof(long);
+
+		idtab = kzalloc(size, GFP_KERNEL);
+
+		if (idtab == NULL) {
 			pr_err("RIO: failed to allocate destID table\n");
-			kfree(net);
+			rio_free_net(net);
 			net = NULL;
 		} else {
-			net->destid_table.start = start;
-			net->destid_table.max =
-					RIO_MAX_ROUTE_ENTRIES(port->sys_size);
-			spin_lock_init(&net->destid_table.lock);
+			net->enum_data = idtab;
+			net->release = rio_scan_release_net;
+			idtab->start = start;
+			idtab->max = RIO_MAX_ROUTE_ENTRIES(mport->sys_size);
+			spin_lock_init(&idtab->lock);
 		}
 	}
 
 	if (net) {
-		INIT_LIST_HEAD(&net->node);
-		INIT_LIST_HEAD(&net->devices);
-		INIT_LIST_HEAD(&net->switches);
-		INIT_LIST_HEAD(&net->mports);
-		list_add_tail(&port->nnode, &net->mports);
-		net->hport = port;
-		net->id = port->id;
+		net->id = mport->id;
+		net->hport = mport;
+		dev_set_name(&net->dev, "rnet_%d", net->id);
+		net->dev.parent = &mport->dev;
+		net->dev.release = rio_scan_release_dev;
+		rio_add_net(net);
 	}
+
 	return net;
 }
 
@@ -968,17 +974,6 @@
 }
 
 /**
- * rio_pw_enable - Enables/disables port-write handling by a master port
- * @port: Master port associated with port-write handling
- * @enable:  1=enable,  0=disable
- */
-static void rio_pw_enable(struct rio_mport *port, int enable)
-{
-	if (port->ops->pwenable)
-		port->ops->pwenable(port, enable);
-}
-
-/**
  * rio_enum_mport- Start enumeration through a master port
  * @mport: Master port to send transactions
  * @flags: Enumeration control flags
@@ -1016,7 +1011,7 @@
 
 	/* If master port has an active link, allocate net and enum peers */
 	if (rio_mport_is_active(mport)) {
-		net = rio_alloc_net(mport, 1, 0);
+		net = rio_scan_alloc_net(mport, 1, 0);
 		if (!net) {
 			printk(KERN_ERR "RIO: failed to allocate new net\n");
 			rc = -ENOMEM;
@@ -1133,7 +1128,7 @@
 enum_done:
 		pr_debug("RIO: ... enumeration done\n");
 
-		net = rio_alloc_net(mport, 0, 0);
+		net = rio_scan_alloc_net(mport, 0, 0);
 		if (!net) {
 			printk(KERN_ERR "RIO: Failed to allocate new net\n");
 			goto bail;

diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index e220edc..0dcaa66 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c

@@ -30,6 +30,20 @@
 
 #include "rio.h"
 
+/*
+ * struct rio_pwrite - RIO portwrite event
+ * @node:    Node in list of doorbell events
+ * @pwcback: Doorbell event callback
+ * @context: Handler specific context to pass on event
+ */
+struct rio_pwrite {
+	struct list_head node;
+
+	int (*pwcback)(struct rio_mport *mport, void *context,
+		       union rio_pw_msg *msg, int step);
+	void *context;
+};
+
 MODULE_DESCRIPTION("RapidIO Subsystem Core");
 MODULE_AUTHOR("Matt Porter <mporter@kernel.crashing.org>");
 MODULE_AUTHOR("Alexandre Bounine <alexandre.bounine@idt.com>");
@@ -42,6 +56,7 @@
 	"Destination ID assignment to local RapidIO controllers");
 
 static LIST_HEAD(rio_devices);
+static LIST_HEAD(rio_nets);
 static DEFINE_SPINLOCK(rio_global_list_lock);
 
 static LIST_HEAD(rio_mports);
@@ -68,6 +83,89 @@
 }
 
 /**
+ * rio_query_mport - Query mport device attributes
+ * @port: mport device to query
+ * @mport_attr: mport attributes data structure
+ *
+ * Returns attributes of specified mport through the
+ * pointer to attributes data structure.
+ */
+int rio_query_mport(struct rio_mport *port,
+		    struct rio_mport_attr *mport_attr)
+{
+	if (!port->ops->query_mport)
+		return -ENODATA;
+	return port->ops->query_mport(port, mport_attr);
+}
+EXPORT_SYMBOL(rio_query_mport);
+
+/**
+ * rio_alloc_net- Allocate and initialize a new RIO network data structure
+ * @mport: Master port associated with the RIO network
+ *
+ * Allocates a RIO network structure, initializes per-network
+ * list heads, and adds the associated master port to the
+ * network list of associated master ports. Returns a
+ * RIO network pointer on success or %NULL on failure.
+ */
+struct rio_net *rio_alloc_net(struct rio_mport *mport)
+{
+	struct rio_net *net;
+
+	net = kzalloc(sizeof(struct rio_net), GFP_KERNEL);
+	if (net) {
+		INIT_LIST_HEAD(&net->node);
+		INIT_LIST_HEAD(&net->devices);
+		INIT_LIST_HEAD(&net->switches);
+		INIT_LIST_HEAD(&net->mports);
+		mport->net = net;
+	}
+	return net;
+}
+EXPORT_SYMBOL_GPL(rio_alloc_net);
+
+int rio_add_net(struct rio_net *net)
+{
+	int err;
+
+	err = device_register(&net->dev);
+	if (err)
+		return err;
+	spin_lock(&rio_global_list_lock);
+	list_add_tail(&net->node, &rio_nets);
+	spin_unlock(&rio_global_list_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rio_add_net);
+
+void rio_free_net(struct rio_net *net)
+{
+	spin_lock(&rio_global_list_lock);
+	if (!list_empty(&net->node))
+		list_del(&net->node);
+	spin_unlock(&rio_global_list_lock);
+	if (net->release)
+		net->release(net);
+	device_unregister(&net->dev);
+}
+EXPORT_SYMBOL_GPL(rio_free_net);
+
+/**
+ * rio_local_set_device_id - Set the base/extended device id for a port
+ * @port: RIO master port
+ * @did: Device ID value to be written
+ *
+ * Writes the base/extended device id from a device.
+ */
+void rio_local_set_device_id(struct rio_mport *port, u16 did)
+{
+	rio_local_write_config_32(port, RIO_DID_CSR,
+				  RIO_SET_DID(port->sys_size, did));
+}
+EXPORT_SYMBOL_GPL(rio_local_set_device_id);
+
+/**
  * rio_add_device- Adds a RIO device to the device model
  * @rdev: RIO device
  *
@@ -79,12 +177,19 @@
 {
 	int err;
 
-	err = device_add(&rdev->dev);
+	atomic_set(&rdev->state, RIO_DEVICE_RUNNING);
+	err = device_register(&rdev->dev);
 	if (err)
 		return err;
 
 	spin_lock(&rio_global_list_lock);
 	list_add_tail(&rdev->global_list, &rio_devices);
+	if (rdev->net) {
+		list_add_tail(&rdev->net_list, &rdev->net->devices);
+		if (rdev->pef & RIO_PEF_SWITCH)
+			list_add_tail(&rdev->rswitch->node,
+				      &rdev->net->switches);
+	}
 	spin_unlock(&rio_global_list_lock);
 
 	rio_create_sysfs_dev_files(rdev);
@@ -93,6 +198,33 @@
 }
 EXPORT_SYMBOL_GPL(rio_add_device);
 
+/*
+ * rio_del_device - removes a RIO device from the device model
+ * @rdev: RIO device
+ * @state: device state to set during removal process
+ *
+ * Removes the RIO device to the kernel device list and subsystem's device list.
+ * Clears sysfs entries for the removed device.
+ */
+void rio_del_device(struct rio_dev *rdev, enum rio_device_state state)
+{
+	pr_debug("RIO: %s: removing %s\n", __func__, rio_name(rdev));
+	atomic_set(&rdev->state, state);
+	spin_lock(&rio_global_list_lock);
+	list_del(&rdev->global_list);
+	if (rdev->net) {
+		list_del(&rdev->net_list);
+		if (rdev->pef & RIO_PEF_SWITCH) {
+			list_del(&rdev->rswitch->node);
+			kfree(rdev->rswitch->route_table);
+		}
+	}
+	spin_unlock(&rio_global_list_lock);
+	rio_remove_sysfs_dev_files(rdev);
+	device_unregister(&rdev->dev);
+}
+EXPORT_SYMBOL_GPL(rio_del_device);
+
 /**
  * rio_request_inb_mbox - request inbound mailbox service
  * @mport: RIO master port from which to allocate the mailbox resource
@@ -258,7 +390,9 @@
 	dbell->dinb = dinb;
 	dbell->dev_id = dev_id;
 
+	mutex_lock(&mport->lock);
 	list_add_tail(&dbell->node, &mport->dbells);
+	mutex_unlock(&mport->lock);
 
       out:
 	return rc;
@@ -322,12 +456,15 @@
 	int rc = 0, found = 0;
 	struct rio_dbell *dbell;
 
+	mutex_lock(&mport->lock);
 	list_for_each_entry(dbell, &mport->dbells, node) {
 		if ((dbell->res->start == start) && (dbell->res->end == end)) {
+			list_del(&dbell->node);
 			found = 1;
 			break;
 		}
 	}
+	mutex_unlock(&mport->lock);
 
 	/* If we can't find an exact match, fail */
 	if (!found) {
@@ -335,9 +472,6 @@
 		goto out;
 	}
 
-	/* Delete from list */
-	list_del(&dbell->node);
-
 	/* Release the doorbell resource */
 	rc = release_resource(dbell->res);
 
@@ -394,7 +528,71 @@
 }
 
 /**
- * rio_request_inb_pwrite - request inbound port-write message service
+ * rio_add_mport_pw_handler - add port-write message handler into the list
+ *                            of mport specific pw handlers
+ * @mport:   RIO master port to bind the portwrite callback
+ * @context: Handler specific context to pass on event
+ * @pwcback: Callback to execute when portwrite is received
+ *
+ * Returns 0 if the request has been satisfied.
+ */
+int rio_add_mport_pw_handler(struct rio_mport *mport, void *context,
+			     int (*pwcback)(struct rio_mport *mport,
+			     void *context, union rio_pw_msg *msg, int step))
+{
+	int rc = 0;
+	struct rio_pwrite *pwrite;
+
+	pwrite = kzalloc(sizeof(struct rio_pwrite), GFP_KERNEL);
+	if (!pwrite) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	pwrite->pwcback = pwcback;
+	pwrite->context = context;
+	mutex_lock(&mport->lock);
+	list_add_tail(&pwrite->node, &mport->pwrites);
+	mutex_unlock(&mport->lock);
+out:
+	return rc;
+}
+EXPORT_SYMBOL_GPL(rio_add_mport_pw_handler);
+
+/**
+ * rio_del_mport_pw_handler - remove port-write message handler from the list
+ *                            of mport specific pw handlers
+ * @mport:   RIO master port to bind the portwrite callback
+ * @context: Registered handler specific context to pass on event
+ * @pwcback: Registered callback function
+ *
+ * Returns 0 if the request has been satisfied.
+ */
+int rio_del_mport_pw_handler(struct rio_mport *mport, void *context,
+			     int (*pwcback)(struct rio_mport *mport,
+			     void *context, union rio_pw_msg *msg, int step))
+{
+	int rc = -EINVAL;
+	struct rio_pwrite *pwrite;
+
+	mutex_lock(&mport->lock);
+	list_for_each_entry(pwrite, &mport->pwrites, node) {
+		if (pwrite->pwcback == pwcback && pwrite->context == context) {
+			list_del(&pwrite->node);
+			kfree(pwrite);
+			rc = 0;
+			break;
+		}
+	}
+	mutex_unlock(&mport->lock);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(rio_del_mport_pw_handler);
+
+/**
+ * rio_request_inb_pwrite - request inbound port-write message service for
+ *                          specific RapidIO device
  * @rdev: RIO device to which register inbound port-write callback routine
  * @pwcback: Callback routine to execute when port-write is received
  *
@@ -419,6 +617,7 @@
 
 /**
  * rio_release_inb_pwrite - release inbound port-write message service
+ *                          associated with specific RapidIO device
  * @rdev: RIO device which registered for inbound port-write callback
  *
  * Removes callback from the rio_dev structure. Returns 0 if the request
@@ -440,6 +639,24 @@
 EXPORT_SYMBOL_GPL(rio_release_inb_pwrite);
 
 /**
+ * rio_pw_enable - Enables/disables port-write handling by a master port
+ * @mport: Master port associated with port-write handling
+ * @enable:  1=enable,  0=disable
+ */
+void rio_pw_enable(struct rio_mport *mport, int enable)
+{
+	if (mport->ops->pwenable) {
+		mutex_lock(&mport->lock);
+
+		if ((enable && ++mport->pwe_refcnt == 1) ||
+		    (!enable && mport->pwe_refcnt && --mport->pwe_refcnt == 0))
+			mport->ops->pwenable(mport, enable);
+		mutex_unlock(&mport->lock);
+	}
+}
+EXPORT_SYMBOL_GPL(rio_pw_enable);
+
+/**
  * rio_map_inb_region -- Map inbound memory region.
  * @mport: Master port.
  * @local: physical address of memory region to be mapped
@@ -483,6 +700,56 @@
 EXPORT_SYMBOL_GPL(rio_unmap_inb_region);
 
 /**
+ * rio_map_outb_region -- Map outbound memory region.
+ * @mport: Master port.
+ * @destid: destination id window points to
+ * @rbase: RIO base address window translates to
+ * @size: Size of the memory region
+ * @rflags: Flags for mapping.
+ * @local: physical address of memory region mapped
+ *
+ * Return: 0 -- Success.
+ *
+ * This function will create the mapping from RIO space to local memory.
+ */
+int rio_map_outb_region(struct rio_mport *mport, u16 destid, u64 rbase,
+			u32 size, u32 rflags, dma_addr_t *local)
+{
+	int rc = 0;
+	unsigned long flags;
+
+	if (!mport->ops->map_outb)
+		return -ENODEV;
+
+	spin_lock_irqsave(&rio_mmap_lock, flags);
+	rc = mport->ops->map_outb(mport, destid, rbase, size,
+		rflags, local);
+	spin_unlock_irqrestore(&rio_mmap_lock, flags);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(rio_map_outb_region);
+
+/**
+ * rio_unmap_inb_region -- Unmap the inbound memory region
+ * @mport: Master port
+ * @destid: destination id mapping points to
+ * @rstart: RIO base address window translates to
+ */
+void rio_unmap_outb_region(struct rio_mport *mport, u16 destid, u64 rstart)
+{
+	unsigned long flags;
+
+	if (!mport->ops->unmap_outb)
+		return;
+
+	spin_lock_irqsave(&rio_mmap_lock, flags);
+	mport->ops->unmap_outb(mport, destid, rstart);
+	spin_unlock_irqrestore(&rio_mmap_lock, flags);
+}
+EXPORT_SYMBOL_GPL(rio_unmap_outb_region);
+
+/**
  * rio_mport_get_physefb - Helper function that returns register offset
  *                      for Physical Layer Extended Features Block.
  * @port: Master port to issue transaction
@@ -864,52 +1131,66 @@
 }
 
 /**
- * rio_inb_pwrite_handler - process inbound port-write message
+ * rio_inb_pwrite_handler - inbound port-write message handler
+ * @mport:  mport device associated with port-write
  * @pw_msg: pointer to inbound port-write message
  *
  * Processes an inbound port-write message. Returns 0 if the request
  * has been satisfied.
  */
-int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg)
+int rio_inb_pwrite_handler(struct rio_mport *mport, union rio_pw_msg *pw_msg)
 {
 	struct rio_dev *rdev;
 	u32 err_status, em_perrdet, em_ltlerrdet;
 	int rc, portnum;
-
-	rdev = rio_get_comptag((pw_msg->em.comptag & RIO_CTAG_UDEVID), NULL);
-	if (rdev == NULL) {
-		/* Device removed or enumeration error */
-		pr_debug("RIO: %s No matching device for CTag 0x%08x\n",
-			__func__, pw_msg->em.comptag);
-		return -EIO;
-	}
-
-	pr_debug("RIO: Port-Write message from %s\n", rio_name(rdev));
+	struct rio_pwrite *pwrite;
 
 #ifdef DEBUG_PW
 	{
-	u32 i;
-	for (i = 0; i < RIO_PW_MSG_SIZE/sizeof(u32);) {
+		u32 i;
+
+		pr_debug("%s: PW to mport_%d:\n", __func__, mport->id);
+		for (i = 0; i < RIO_PW_MSG_SIZE / sizeof(u32); i = i + 4) {
 			pr_debug("0x%02x: %08x %08x %08x %08x\n",
-				 i*4, pw_msg->raw[i], pw_msg->raw[i + 1],
-				 pw_msg->raw[i + 2], pw_msg->raw[i + 3]);
-			i += 4;
-	}
+				i * 4, pw_msg->raw[i], pw_msg->raw[i + 1],
+				pw_msg->raw[i + 2], pw_msg->raw[i + 3]);
+		}
 	}
 #endif
 
-	/* Call an external service function (if such is registered
-	 * for this device). This may be the service for endpoints that send
-	 * device-specific port-write messages. End-point messages expected
-	 * to be handled completely by EP specific device driver.
+	rdev = rio_get_comptag((pw_msg->em.comptag & RIO_CTAG_UDEVID), NULL);
+	if (rdev) {
+		pr_debug("RIO: Port-Write message from %s\n", rio_name(rdev));
+	} else {
+		pr_debug("RIO: %s No matching device for CTag 0x%08x\n",
+			__func__, pw_msg->em.comptag);
+	}
+
+	/* Call a device-specific handler (if it is registered for the device).
+	 * This may be the service for endpoints that send device-specific
+	 * port-write messages. End-point messages expected to be handled
+	 * completely by EP specific device driver.
 	 * For switches rc==0 signals that no standard processing required.
 	 */
-	if (rdev->pwcback != NULL) {
+	if (rdev && rdev->pwcback) {
 		rc = rdev->pwcback(rdev, pw_msg, 0);
 		if (rc == 0)
 			return 0;
 	}
 
+	mutex_lock(&mport->lock);
+	list_for_each_entry(pwrite, &mport->pwrites, node)
+		pwrite->pwcback(mport, pwrite->context, pw_msg, 0);
+	mutex_unlock(&mport->lock);
+
+	if (!rdev)
+		return 0;
+
+	/*
+	 * FIXME: The code below stays as it was before for now until we decide
+	 * how to do default PW handling in combination with per-mport callbacks
+	 */
+
 	portnum = pw_msg->em.is_port & 0xFF;
 
 	/* Check if device and route to it are functional:
@@ -1909,32 +2190,31 @@
 	return hdid[index];
 }
 
+int rio_mport_initialize(struct rio_mport *mport)
+{
+	if (next_portid >= RIO_MAX_MPORTS) {
+		pr_err("RIO: reached specified max number of mports\n");
+		return -ENODEV;
+	}
+
+	atomic_set(&mport->state, RIO_DEVICE_INITIALIZING);
+	mport->id = next_portid++;
+	mport->host_deviceid = rio_get_hdid(mport->id);
+	mport->nscan = NULL;
+	mutex_init(&mport->lock);
+	mport->pwe_refcnt = 0;
+	INIT_LIST_HEAD(&mport->pwrites);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rio_mport_initialize);
+
 int rio_register_mport(struct rio_mport *port)
 {
 	struct rio_scan_node *scan = NULL;
 	int res = 0;
 
-	if (next_portid >= RIO_MAX_MPORTS) {
-		pr_err("RIO: reached specified max number of mports\n");
-		return 1;
-	}
-
-	port->id = next_portid++;
-	port->host_deviceid = rio_get_hdid(port->id);
-	port->nscan = NULL;
-
-	dev_set_name(&port->dev, "rapidio%d", port->id);
-	port->dev.class = &rio_mport_class;
-
-	res = device_register(&port->dev);
-	if (res)
-		dev_err(&port->dev, "RIO: mport%d registration failed ERR=%d\n",
-			port->id, res);
-	else
-		dev_dbg(&port->dev, "RIO: mport%d registered\n", port->id);
-
 	mutex_lock(&rio_mport_list_lock);
-	list_add_tail(&port->node, &rio_mports);
 
 	/*
 	 * Check if there are any registered enumeration/discovery operations
@@ -1948,13 +2228,74 @@
 				break;
 		}
 	}
+
+	list_add_tail(&port->node, &rio_mports);
 	mutex_unlock(&rio_mport_list_lock);
 
-	pr_debug("RIO: %s %s id=%d\n", __func__, port->name, port->id);
-	return 0;
+	dev_set_name(&port->dev, "rapidio%d", port->id);
+	port->dev.class = &rio_mport_class;
+	atomic_set(&port->state, RIO_DEVICE_RUNNING);
+
+	res = device_register(&port->dev);
+	if (res)
+		dev_err(&port->dev, "RIO: mport%d registration failed ERR=%d\n",
+			port->id, res);
+	else
+		dev_dbg(&port->dev, "RIO: registered mport%d\n", port->id);
+
+	return res;
 }
 EXPORT_SYMBOL_GPL(rio_register_mport);
 
+static int rio_mport_cleanup_callback(struct device *dev, void *data)
+{
+	struct rio_dev *rdev = to_rio_dev(dev);
+
+	if (dev->bus == &rio_bus_type)
+		rio_del_device(rdev, RIO_DEVICE_SHUTDOWN);
+	return 0;
+}
+
+static int rio_net_remove_children(struct rio_net *net)
+{
+	/*
+	 * Unregister all RapidIO devices residing on this net (this will
+	 * invoke notification of registered subsystem interfaces as well).
+	 */
+	device_for_each_child(&net->dev, NULL, rio_mport_cleanup_callback);
+	return 0;
+}
+
+int rio_unregister_mport(struct rio_mport *port)
+{
+	pr_debug("RIO: %s %s id=%d\n", __func__, port->name, port->id);
+
+	/* Transition mport to the SHUTDOWN state */
+	if (atomic_cmpxchg(&port->state,
+			   RIO_DEVICE_RUNNING,
+			   RIO_DEVICE_SHUTDOWN) != RIO_DEVICE_RUNNING) {
+		pr_err("RIO: %s unexpected state transition for mport %s\n",
+			__func__, port->name);
+	}
+
+	if (port->net && port->net->hport == port) {
+		rio_net_remove_children(port->net);
+		rio_free_net(port->net);
+	}
+
+	/*
+	 * Unregister all RapidIO devices attached to this mport (this will
+	 * invoke notification of registered subsystem interfaces as well).
+	 */
+	mutex_lock(&rio_mport_list_lock);
+	list_del(&port->node);
+	mutex_unlock(&rio_mport_list_lock);
+	device_unregister(&port->dev);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rio_unregister_mport);
+
 EXPORT_SYMBOL_GPL(rio_local_get_device_id);
 EXPORT_SYMBOL_GPL(rio_get_device);
 EXPORT_SYMBOL_GPL(rio_get_asm);

diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h
index 2d0550e..625d09a 100644
--- a/drivers/rapidio/rio.h
+++ b/drivers/rapidio/rio.h

@@ -28,6 +28,7 @@
 extern int rio_mport_chk_dev_access(struct rio_mport *mport, u16 destid,
 				    u8 hopcount);
 extern int rio_create_sysfs_dev_files(struct rio_dev *rdev);
+extern void rio_remove_sysfs_dev_files(struct rio_dev *rdev);
 extern int rio_lock_device(struct rio_mport *port, u16 destid,
 			u8 hopcount, int wait_ms);
 extern int rio_unlock_device(struct rio_mport *port, u16 destid, u8 hopcount);
@@ -38,7 +39,11 @@
 extern int rio_route_clr_table(struct rio_dev *rdev, u16 table, int lock);
 extern int rio_set_port_lockout(struct rio_dev *rdev, u32 pnum, int lock);
 extern struct rio_dev *rio_get_comptag(u32 comp_tag, struct rio_dev *from);
+extern struct rio_net *rio_alloc_net(struct rio_mport *mport);
+extern int rio_add_net(struct rio_net *net);
+extern void rio_free_net(struct rio_net *net);
 extern int rio_add_device(struct rio_dev *rdev);
+extern void rio_del_device(struct rio_dev *rdev, enum rio_device_state state);
 extern int rio_enable_rx_tx_port(struct rio_mport *port, int local, u16 destid,
 				 u8 hopcount, u8 port_num);
 extern int rio_register_scan(int mport_id, struct rio_scan *scan_ops);

diff --git a/drivers/remoteproc/st_remoteproc.c b/drivers/remoteproc/st_remoteproc.c
index 6bb04d4..6f056ca 100644
--- a/drivers/remoteproc/st_remoteproc.c
+++ b/drivers/remoteproc/st_remoteproc.c

@@ -189,9 +189,9 @@
 	}
 
 	ddata->boot_base = syscon_regmap_lookup_by_phandle(np, "st,syscfg");
-	if (!ddata->boot_base) {
+	if (IS_ERR(ddata->boot_base)) {
 		dev_err(dev, "Boot base not found\n");
-		return -EINVAL;
+		return PTR_ERR(ddata->boot_base);
 	}
 
 	err = of_property_read_u32_index(np, "st,syscfg", 1,

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 544bd34..3e84315 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig

@@ -589,7 +589,7 @@
 	default y
 	help
 	  Say Y here if you want to expose temperature sensor data on
-	  rtc-rv3029c2.
+	  rtc-rv3029.
 
 config RTC_DRV_RV8803
 	tristate "Micro Crystal RV8803"

diff --git a/drivers/rtc/rtc-abx80x.c b/drivers/rtc/rtc-abx80x.c
index d41bbcd..ba0d619 100644
--- a/drivers/rtc/rtc-abx80x.c
+++ b/drivers/rtc/rtc-abx80x.c

@@ -49,7 +49,20 @@
 
 #define ABX8XX_REG_CD_TIMER_CTL	0x18
 
+#define ABX8XX_REG_OSC		0x1c
+#define ABX8XX_OSC_FOS		BIT(3)
+#define ABX8XX_OSC_BOS		BIT(4)
+#define ABX8XX_OSC_ACAL_512	BIT(5)
+#define ABX8XX_OSC_ACAL_1024	BIT(6)
+
+#define ABX8XX_OSC_OSEL		BIT(7)
+
+#define ABX8XX_REG_OSS		0x1d
+#define ABX8XX_OSS_OF		BIT(1)
+#define ABX8XX_OSS_OMODE	BIT(4)
+
 #define ABX8XX_REG_CFG_KEY	0x1f
+#define ABX8XX_CFG_KEY_OSC	0xa1
 #define ABX8XX_CFG_KEY_MISC	0x9d
 
 #define ABX8XX_REG_ID0		0x28
@@ -81,6 +94,20 @@
 	[ABX80X] = {.pn = 0}
 };
 
+static int abx80x_is_rc_mode(struct i2c_client *client)
+{
+	int flags = 0;
+
+	flags =  i2c_smbus_read_byte_data(client, ABX8XX_REG_OSS);
+	if (flags < 0) {
+		dev_err(&client->dev,
+			"Failed to read autocalibration attribute\n");
+		return flags;
+	}
+
+	return (flags & ABX8XX_OSS_OMODE) ? 1 : 0;
+}
+
 static int abx80x_enable_trickle_charger(struct i2c_client *client,
 					 u8 trickle_cfg)
 {
@@ -112,7 +139,23 @@
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	unsigned char buf[8];
-	int err;
+	int err, flags, rc_mode = 0;
+
+	/* Read the Oscillator Failure only in XT mode */
+	rc_mode = abx80x_is_rc_mode(client);
+	if (rc_mode < 0)
+		return rc_mode;
+
+	if (!rc_mode) {
+		flags = i2c_smbus_read_byte_data(client, ABX8XX_REG_OSS);
+		if (flags < 0)
+			return flags;
+
+		if (flags & ABX8XX_OSS_OF) {
+			dev_err(dev, "Oscillator failure, data is invalid.\n");
+			return -EINVAL;
+		}
+	}
 
 	err = i2c_smbus_read_i2c_block_data(client, ABX8XX_REG_HTH,
 					    sizeof(buf), buf);
@@ -140,7 +183,7 @@
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	unsigned char buf[8];
-	int err;
+	int err, flags;
 
 	if (tm->tm_year < 100)
 		return -EINVAL;
@@ -161,6 +204,18 @@
 		return -EIO;
 	}
 
+	/* Clear the OF bit of Oscillator Status Register */
+	flags = i2c_smbus_read_byte_data(client, ABX8XX_REG_OSS);
+	if (flags < 0)
+		return flags;
+
+	err = i2c_smbus_write_byte_data(client, ABX8XX_REG_OSS,
+					flags & ~ABX8XX_OSS_OF);
+	if (err < 0) {
+		dev_err(&client->dev, "Unable to write oscillator status register\n");
+		return err;
+	}
+
 	return 0;
 }
 
@@ -248,6 +303,174 @@
 	return 0;
 }
 
+static int abx80x_rtc_set_autocalibration(struct device *dev,
+					  int autocalibration)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int retval, flags = 0;
+
+	if ((autocalibration != 0) && (autocalibration != 1024) &&
+	    (autocalibration != 512)) {
+		dev_err(dev, "autocalibration value outside permitted range\n");
+		return -EINVAL;
+	}
+
+	flags = i2c_smbus_read_byte_data(client, ABX8XX_REG_OSC);
+	if (flags < 0)
+		return flags;
+
+	if (autocalibration == 0) {
+		flags &= ~(ABX8XX_OSC_ACAL_512 | ABX8XX_OSC_ACAL_1024);
+	} else if (autocalibration == 1024) {
+		/* 1024 autocalibration is 0x10 */
+		flags |= ABX8XX_OSC_ACAL_1024;
+		flags &= ~(ABX8XX_OSC_ACAL_512);
+	} else {
+		/* 512 autocalibration is 0x11 */
+		flags |= (ABX8XX_OSC_ACAL_1024 | ABX8XX_OSC_ACAL_512);
+	}
+
+	/* Unlock write access to Oscillator Control Register */
+	retval = i2c_smbus_write_byte_data(client, ABX8XX_REG_CFG_KEY,
+					   ABX8XX_CFG_KEY_OSC);
+	if (retval < 0) {
+		dev_err(dev, "Failed to write CONFIG_KEY register\n");
+		return retval;
+	}
+
+	retval = i2c_smbus_write_byte_data(client, ABX8XX_REG_OSC, flags);
+
+	return retval;
+}
+
+static int abx80x_rtc_get_autocalibration(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int flags = 0, autocalibration;
+
+	flags =  i2c_smbus_read_byte_data(client, ABX8XX_REG_OSC);
+	if (flags < 0)
+		return flags;
+
+	if (flags & ABX8XX_OSC_ACAL_512)
+		autocalibration = 512;
+	else if (flags & ABX8XX_OSC_ACAL_1024)
+		autocalibration = 1024;
+	else
+		autocalibration = 0;
+
+	return autocalibration;
+}
+
+static ssize_t autocalibration_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	int retval;
+	unsigned long autocalibration = 0;
+
+	retval = kstrtoul(buf, 10, &autocalibration);
+	if (retval < 0) {
+		dev_err(dev, "Failed to store RTC autocalibration attribute\n");
+		return -EINVAL;
+	}
+
+	retval = abx80x_rtc_set_autocalibration(dev, autocalibration);
+
+	return retval ? retval : count;
+}
+
+static ssize_t autocalibration_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	int autocalibration = 0;
+
+	autocalibration = abx80x_rtc_get_autocalibration(dev);
+	if (autocalibration < 0) {
+		dev_err(dev, "Failed to read RTC autocalibration\n");
+		sprintf(buf, "0\n");
+		return autocalibration;
+	}
+
+	return sprintf(buf, "%d\n", autocalibration);
+}
+
+static DEVICE_ATTR_RW(autocalibration);
+
+static ssize_t oscillator_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int retval, flags, rc_mode = 0;
+
+	if (strncmp(buf, "rc", 2) == 0) {
+		rc_mode = 1;
+	} else if (strncmp(buf, "xtal", 4) == 0) {
+		rc_mode = 0;
+	} else {
+		dev_err(dev, "Oscillator selection value outside permitted ones\n");
+		return -EINVAL;
+	}
+
+	flags =  i2c_smbus_read_byte_data(client, ABX8XX_REG_OSC);
+	if (flags < 0)
+		return flags;
+
+	if (rc_mode == 0)
+		flags &= ~(ABX8XX_OSC_OSEL);
+	else
+		flags |= (ABX8XX_OSC_OSEL);
+
+	/* Unlock write access on Oscillator Control register */
+	retval = i2c_smbus_write_byte_data(client, ABX8XX_REG_CFG_KEY,
+					   ABX8XX_CFG_KEY_OSC);
+	if (retval < 0) {
+		dev_err(dev, "Failed to write CONFIG_KEY register\n");
+		return retval;
+	}
+
+	retval = i2c_smbus_write_byte_data(client, ABX8XX_REG_OSC, flags);
+	if (retval < 0) {
+		dev_err(dev, "Failed to write Oscillator Control register\n");
+		return retval;
+	}
+
+	return retval ? retval : count;
+}
+
+static ssize_t oscillator_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	int rc_mode = 0;
+	struct i2c_client *client = to_i2c_client(dev);
+
+	rc_mode = abx80x_is_rc_mode(client);
+
+	if (rc_mode < 0) {
+		dev_err(dev, "Failed to read RTC oscillator selection\n");
+		sprintf(buf, "\n");
+		return rc_mode;
+	}
+
+	if (rc_mode)
+		return sprintf(buf, "rc\n");
+	else
+		return sprintf(buf, "xtal\n");
+}
+
+static DEVICE_ATTR_RW(oscillator);
+
+static struct attribute *rtc_calib_attrs[] = {
+	&dev_attr_autocalibration.attr,
+	&dev_attr_oscillator.attr,
+	NULL,
+};
+
+static const struct attribute_group rtc_calib_attr_group = {
+	.attrs		= rtc_calib_attrs,
+};
+
 static int abx80x_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
 	struct i2c_client *client = to_i2c_client(dev);
@@ -303,6 +526,13 @@
 	return (trickle_cfg | i);
 }
 
+static void rtc_calib_remove_sysfs_group(void *_dev)
+{
+	struct device *dev = _dev;
+
+	sysfs_remove_group(&dev->kobj, &rtc_calib_attr_group);
+}
+
 static int abx80x_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
@@ -405,6 +635,24 @@
 		}
 	}
 
+	/* Export sysfs entries */
+	err = sysfs_create_group(&(&client->dev)->kobj, &rtc_calib_attr_group);
+	if (err) {
+		dev_err(&client->dev, "Failed to create sysfs group: %d\n",
+			err);
+		return err;
+	}
+
+	err = devm_add_action(&client->dev, rtc_calib_remove_sysfs_group,
+			      &client->dev);
+	if (err) {
+		rtc_calib_remove_sysfs_group(&client->dev);
+		dev_err(&client->dev,
+			"Failed to add sysfs cleanup action: %d\n",
+			err);
+		return err;
+	}
+
 	return 0;
 }
 

diff --git a/drivers/rtc/rtc-asm9260.c b/drivers/rtc/rtc-asm9260.c
index 14e08c4..355fdb9 100644
--- a/drivers/rtc/rtc-asm9260.c
+++ b/drivers/rtc/rtc-asm9260.c

@@ -255,7 +255,7 @@
 	.alarm_irq_enable	= asm9260_alarm_irq_enable,
 };
 
-static int __init asm9260_rtc_probe(struct platform_device *pdev)
+static int asm9260_rtc_probe(struct platform_device *pdev)
 {
 	struct asm9260_rtc_priv *priv;
 	struct device *dev = &pdev->dev;
@@ -323,7 +323,7 @@
 	return ret;
 }
 
-static int __exit asm9260_rtc_remove(struct platform_device *pdev)
+static int asm9260_rtc_remove(struct platform_device *pdev)
 {
 	struct asm9260_rtc_priv *priv = platform_get_drvdata(pdev);
 

diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index a82937e..d107a8e 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c

@@ -176,7 +176,13 @@
 		bin2bcd(tm->tm_mday) | (buf[M41T80_REG_DAY] & ~0x3f);
 	buf[M41T80_REG_MON] =
 		bin2bcd(tm->tm_mon + 1) | (buf[M41T80_REG_MON] & ~0x1f);
+
 	/* assume 20YY not 19YY */
+	if (tm->tm_year < 100 || tm->tm_year > 199) {
+		dev_err(&client->dev, "Year must be between 2000 and 2099. It's %d.\n",
+			tm->tm_year + 1900);
+		return -EINVAL;
+	}
 	buf[M41T80_REG_YEAR] = bin2bcd(tm->tm_year % 100);
 
 	if (i2c_transfer(client->adapter, msgs, 1) != 1) {

diff --git a/drivers/rtc/rtc-mcp795.c b/drivers/rtc/rtc-mcp795.c
index 1c91ce8..025bb33 100644
--- a/drivers/rtc/rtc-mcp795.c
+++ b/drivers/rtc/rtc-mcp795.c

@@ -20,6 +20,7 @@
 #include <linux/printk.h>
 #include <linux/spi/spi.h>
 #include <linux/rtc.h>
+#include <linux/of.h>
 
 /* MCP795 Instructions, see datasheet table 3-1 */
 #define MCP795_EEREAD	0x03
@@ -183,9 +184,18 @@
 	return 0;
 }
 
+#ifdef CONFIG_OF
+static const struct of_device_id mcp795_of_match[] = {
+	{ .compatible = "maxim,mcp795" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, mcp795_of_match);
+#endif
+
 static struct spi_driver mcp795_driver = {
 		.driver = {
 				.name = "rtc-mcp795",
+				.of_match_table = of_match_ptr(mcp795_of_match),
 		},
 		.probe = mcp795_probe,
 };

diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c
index 8d9f35c..f623038 100644
--- a/drivers/rtc/rtc-rv8803.c
+++ b/drivers/rtc/rtc-rv8803.c

@@ -61,11 +61,14 @@
 	struct i2c_client *client = dev_id;
 	struct rv8803_data *rv8803 = i2c_get_clientdata(client);
 	unsigned long events = 0;
-	int flags;
+	int flags, try = 0;
 
 	mutex_lock(&rv8803->flags_lock);
 
-	flags = i2c_smbus_read_byte_data(client, RV8803_FLAG);
+	do {
+		flags = i2c_smbus_read_byte_data(client, RV8803_FLAG);
+		try++;
+	} while ((flags == -ENXIO) && (try < 3));
 	if (flags <= 0) {
 		mutex_unlock(&rv8803->flags_lock);
 		return IRQ_NONE;
@@ -424,7 +427,7 @@
 {
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
 	struct rv8803_data *rv8803;
-	int err, flags;
+	int err, flags, try = 0;
 
 	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA |
 				     I2C_FUNC_SMBUS_I2C_BLOCK)) {
@@ -441,7 +444,16 @@
 	rv8803->client = client;
 	i2c_set_clientdata(client, rv8803);
 
-	flags = i2c_smbus_read_byte_data(client, RV8803_FLAG);
+	/*
+	 * There is a 60µs window where the RTC may not reply on the i2c bus in
+	 * that case, the transfer is not ACKed. In that case, ensure there are
+	 * multiple attempts.
+	 */
+	do {
+		flags = i2c_smbus_read_byte_data(client, RV8803_FLAG);
+		try++;
+	} while ((flags == -ENXIO) && (try < 3));
+
 	if (flags < 0)
 		return flags;
 
@@ -476,8 +488,12 @@
 		return PTR_ERR(rv8803->rtc);
 	}
 
-	err = i2c_smbus_write_byte_data(rv8803->client, RV8803_EXT,
-					RV8803_EXT_WADA);
+	try = 0;
+	do {
+		err = i2c_smbus_write_byte_data(rv8803->client, RV8803_EXT,
+						RV8803_EXT_WADA);
+		try++;
+	} while ((err == -ENXIO) && (try < 3));
 	if (err)
 		return err;
 

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index ffb860d..d01ad7e 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c

@@ -501,18 +501,27 @@
 
 	info->rtc_clk = devm_clk_get(&pdev->dev, "rtc");
 	if (IS_ERR(info->rtc_clk)) {
-		dev_err(&pdev->dev, "failed to find rtc clock\n");
-		return PTR_ERR(info->rtc_clk);
+		ret = PTR_ERR(info->rtc_clk);
+		if (ret != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "failed to find rtc clock\n");
+		else
+			dev_dbg(&pdev->dev, "probe deferred due to missing rtc clk\n");
+		return ret;
 	}
 	clk_prepare_enable(info->rtc_clk);
 
 	if (info->data->needs_src_clk) {
 		info->rtc_src_clk = devm_clk_get(&pdev->dev, "rtc_src");
 		if (IS_ERR(info->rtc_src_clk)) {
-			dev_err(&pdev->dev,
-				"failed to find rtc source clock\n");
+			ret = PTR_ERR(info->rtc_src_clk);
+			if (ret != -EPROBE_DEFER)
+				dev_err(&pdev->dev,
+					"failed to find rtc source clock\n");
+			else
+				dev_dbg(&pdev->dev,
+					"probe deferred due to missing rtc src clk\n");
 			clk_disable_unprepare(info->rtc_clk);
-			return PTR_ERR(info->rtc_src_clk);
+			return ret;
 		}
 		clk_prepare_enable(info->rtc_src_clk);
 	}

diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c
index 17ad574..1e56018 100644
--- a/drivers/s390/block/dasd_alias.c
+++ b/drivers/s390/block/dasd_alias.c

@@ -317,17 +317,17 @@
 	struct alias_pav_group *group;
 	struct dasd_uid uid;
 
+	spin_lock(get_ccwdev_lock(device->cdev));
 	private->uid.type = lcu->uac->unit[private->uid.real_unit_addr].ua_type;
 	private->uid.base_unit_addr =
 		lcu->uac->unit[private->uid.real_unit_addr].base_ua;
 	uid = private->uid;
-
+	spin_unlock(get_ccwdev_lock(device->cdev));
 	/* if we have no PAV anyway, we don't need to bother with PAV groups */
 	if (lcu->pav == NO_PAV) {
 		list_move(&device->alias_list, &lcu->active_devices);
 		return 0;
 	}
-
 	group = _find_group(lcu, &uid);
 	if (!group) {
 		group = kzalloc(sizeof(*group), GFP_ATOMIC);
@@ -397,130 +397,6 @@
 	return 0;
 }
 
-/*
- * This function tries to lock all devices on an lcu via trylock
- * return NULL on success otherwise return first failed device
- */
-static struct dasd_device *_trylock_all_devices_on_lcu(struct alias_lcu *lcu,
-						      struct dasd_device *pos)
-
-{
-	struct alias_pav_group *pavgroup;
-	struct dasd_device *device;
-
-	list_for_each_entry(device, &lcu->active_devices, alias_list) {
-		if (device == pos)
-			continue;
-		if (!spin_trylock(get_ccwdev_lock(device->cdev)))
-			return device;
-	}
-	list_for_each_entry(device, &lcu->inactive_devices, alias_list) {
-		if (device == pos)
-			continue;
-		if (!spin_trylock(get_ccwdev_lock(device->cdev)))
-			return device;
-	}
-	list_for_each_entry(pavgroup, &lcu->grouplist, group) {
-		list_for_each_entry(device, &pavgroup->baselist, alias_list) {
-			if (device == pos)
-				continue;
-			if (!spin_trylock(get_ccwdev_lock(device->cdev)))
-				return device;
-		}
-		list_for_each_entry(device, &pavgroup->aliaslist, alias_list) {
-			if (device == pos)
-				continue;
-			if (!spin_trylock(get_ccwdev_lock(device->cdev)))
-				return device;
-		}
-	}
-	return NULL;
-}
-
-/*
- * unlock all devices except the one that is specified as pos
- * stop if enddev is specified and reached
- */
-static void _unlock_all_devices_on_lcu(struct alias_lcu *lcu,
-				       struct dasd_device *pos,
-				       struct dasd_device *enddev)
-
-{
-	struct alias_pav_group *pavgroup;
-	struct dasd_device *device;
-
-	list_for_each_entry(device, &lcu->active_devices, alias_list) {
-		if (device == pos)
-			continue;
-		if (device == enddev)
-			return;
-		spin_unlock(get_ccwdev_lock(device->cdev));
-	}
-	list_for_each_entry(device, &lcu->inactive_devices, alias_list) {
-		if (device == pos)
-			continue;
-		if (device == enddev)
-			return;
-		spin_unlock(get_ccwdev_lock(device->cdev));
-	}
-	list_for_each_entry(pavgroup, &lcu->grouplist, group) {
-		list_for_each_entry(device, &pavgroup->baselist, alias_list) {
-			if (device == pos)
-				continue;
-			if (device == enddev)
-				return;
-			spin_unlock(get_ccwdev_lock(device->cdev));
-		}
-		list_for_each_entry(device, &pavgroup->aliaslist, alias_list) {
-			if (device == pos)
-				continue;
-			if (device == enddev)
-				return;
-			spin_unlock(get_ccwdev_lock(device->cdev));
-		}
-	}
-}
-
-/*
- *  this function is needed because the locking order
- *  device lock -> lcu lock
- *  needs to be assured when iterating over devices in an LCU
- *
- *  if a device is specified in pos then the device lock is already hold
- */
-static void _trylock_and_lock_lcu_irqsave(struct alias_lcu *lcu,
-					  struct dasd_device *pos,
-					  unsigned long *flags)
-{
-	struct dasd_device *failed;
-
-	do {
-		spin_lock_irqsave(&lcu->lock, *flags);
-		failed = _trylock_all_devices_on_lcu(lcu, pos);
-		if (failed) {
-			_unlock_all_devices_on_lcu(lcu, pos, failed);
-			spin_unlock_irqrestore(&lcu->lock, *flags);
-			cpu_relax();
-		}
-	} while (failed);
-}
-
-static void _trylock_and_lock_lcu(struct alias_lcu *lcu,
-				  struct dasd_device *pos)
-{
-	struct dasd_device *failed;
-
-	do {
-		spin_lock(&lcu->lock);
-		failed = _trylock_all_devices_on_lcu(lcu, pos);
-		if (failed) {
-			_unlock_all_devices_on_lcu(lcu, pos, failed);
-			spin_unlock(&lcu->lock);
-			cpu_relax();
-		}
-	} while (failed);
-}
-
 static int read_unit_address_configuration(struct dasd_device *device,
 					   struct alias_lcu *lcu)
 {
@@ -615,7 +491,7 @@
 	if (rc)
 		return rc;
 
-	_trylock_and_lock_lcu_irqsave(lcu, NULL, &flags);
+	spin_lock_irqsave(&lcu->lock, flags);
 	lcu->pav = NO_PAV;
 	for (i = 0; i < MAX_DEVICES_PER_LCU; ++i) {
 		switch (lcu->uac->unit[i].ua_type) {
@@ -634,7 +510,6 @@
 				 alias_list) {
 		_add_device_to_lcu(lcu, device, refdev);
 	}
-	_unlock_all_devices_on_lcu(lcu, NULL, NULL);
 	spin_unlock_irqrestore(&lcu->lock, flags);
 	return 0;
 }
@@ -722,8 +597,7 @@
 
 	lcu = private->lcu;
 	rc = 0;
-	spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
-	spin_lock(&lcu->lock);
+	spin_lock_irqsave(&lcu->lock, flags);
 	if (!(lcu->flags & UPDATE_PENDING)) {
 		rc = _add_device_to_lcu(lcu, device, device);
 		if (rc)
@@ -733,8 +607,7 @@
 		list_move(&device->alias_list, &lcu->active_devices);
 		_schedule_lcu_update(lcu, device);
 	}
-	spin_unlock(&lcu->lock);
-	spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
+	spin_unlock_irqrestore(&lcu->lock, flags);
 	return rc;
 }
 
@@ -933,15 +806,27 @@
 	struct alias_pav_group *pavgroup;
 	struct dasd_device *device;
 
-	list_for_each_entry(device, &lcu->active_devices, alias_list)
+	list_for_each_entry(device, &lcu->active_devices, alias_list) {
+		spin_lock(get_ccwdev_lock(device->cdev));
 		dasd_device_set_stop_bits(device, DASD_STOPPED_SU);
-	list_for_each_entry(device, &lcu->inactive_devices, alias_list)
+		spin_unlock(get_ccwdev_lock(device->cdev));
+	}
+	list_for_each_entry(device, &lcu->inactive_devices, alias_list) {
+		spin_lock(get_ccwdev_lock(device->cdev));
 		dasd_device_set_stop_bits(device, DASD_STOPPED_SU);
+		spin_unlock(get_ccwdev_lock(device->cdev));
+	}
 	list_for_each_entry(pavgroup, &lcu->grouplist, group) {
-		list_for_each_entry(device, &pavgroup->baselist, alias_list)
+		list_for_each_entry(device, &pavgroup->baselist, alias_list) {
+			spin_lock(get_ccwdev_lock(device->cdev));
 			dasd_device_set_stop_bits(device, DASD_STOPPED_SU);
-		list_for_each_entry(device, &pavgroup->aliaslist, alias_list)
+			spin_unlock(get_ccwdev_lock(device->cdev));
+		}
+		list_for_each_entry(device, &pavgroup->aliaslist, alias_list) {
+			spin_lock(get_ccwdev_lock(device->cdev));
 			dasd_device_set_stop_bits(device, DASD_STOPPED_SU);
+			spin_unlock(get_ccwdev_lock(device->cdev));
+		}
 	}
 }
 
@@ -950,15 +835,27 @@
 	struct alias_pav_group *pavgroup;
 	struct dasd_device *device;
 
-	list_for_each_entry(device, &lcu->active_devices, alias_list)
+	list_for_each_entry(device, &lcu->active_devices, alias_list) {
+		spin_lock(get_ccwdev_lock(device->cdev));
 		dasd_device_remove_stop_bits(device, DASD_STOPPED_SU);
-	list_for_each_entry(device, &lcu->inactive_devices, alias_list)
+		spin_unlock(get_ccwdev_lock(device->cdev));
+	}
+	list_for_each_entry(device, &lcu->inactive_devices, alias_list) {
+		spin_lock(get_ccwdev_lock(device->cdev));
 		dasd_device_remove_stop_bits(device, DASD_STOPPED_SU);
+		spin_unlock(get_ccwdev_lock(device->cdev));
+	}
 	list_for_each_entry(pavgroup, &lcu->grouplist, group) {
-		list_for_each_entry(device, &pavgroup->baselist, alias_list)
+		list_for_each_entry(device, &pavgroup->baselist, alias_list) {
+			spin_lock(get_ccwdev_lock(device->cdev));
 			dasd_device_remove_stop_bits(device, DASD_STOPPED_SU);
-		list_for_each_entry(device, &pavgroup->aliaslist, alias_list)
+			spin_unlock(get_ccwdev_lock(device->cdev));
+		}
+		list_for_each_entry(device, &pavgroup->aliaslist, alias_list) {
+			spin_lock(get_ccwdev_lock(device->cdev));
 			dasd_device_remove_stop_bits(device, DASD_STOPPED_SU);
+			spin_unlock(get_ccwdev_lock(device->cdev));
+		}
 	}
 }
 
@@ -984,48 +881,32 @@
 	spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
 	reset_summary_unit_check(lcu, device, suc_data->reason);
 
-	_trylock_and_lock_lcu_irqsave(lcu, NULL, &flags);
+	spin_lock_irqsave(&lcu->lock, flags);
 	_unstop_all_devices_on_lcu(lcu);
 	_restart_all_base_devices_on_lcu(lcu);
 	/* 3. read new alias configuration */
 	_schedule_lcu_update(lcu, device);
 	lcu->suc_data.device = NULL;
 	dasd_put_device(device);
-	_unlock_all_devices_on_lcu(lcu, NULL, NULL);
 	spin_unlock_irqrestore(&lcu->lock, flags);
 }
 
-/*
- * note: this will be called from int handler context (cdev locked)
- */
-void dasd_alias_handle_summary_unit_check(struct dasd_device *device,
-					  struct irb *irb)
+void dasd_alias_handle_summary_unit_check(struct work_struct *work)
 {
+	struct dasd_device *device = container_of(work, struct dasd_device,
+						  suc_work);
 	struct dasd_eckd_private *private = device->private;
 	struct alias_lcu *lcu;
-	char reason;
-	char *sense;
-
-	sense = dasd_get_sense(irb);
-	if (sense) {
-		reason = sense[8];
-		DBF_DEV_EVENT(DBF_NOTICE, device, "%s %x",
-			    "eckd handle summary unit check: reason", reason);
-	} else {
-		DBF_DEV_EVENT(DBF_WARNING, device, "%s",
-			    "eckd handle summary unit check:"
-			    " no reason code available");
-		return;
-	}
+	unsigned long flags;
 
 	lcu = private->lcu;
 	if (!lcu) {
 		DBF_DEV_EVENT(DBF_WARNING, device, "%s",
 			    "device not ready to handle summary"
 			    " unit check (no lcu structure)");
-		return;
+		goto out;
 	}
-	_trylock_and_lock_lcu(lcu, device);
+	spin_lock_irqsave(&lcu->lock, flags);
 	/* If this device is about to be removed just return and wait for
 	 * the next interrupt on a different device
 	 */
@@ -1033,27 +914,26 @@
 		DBF_DEV_EVENT(DBF_WARNING, device, "%s",
 			    "device is in offline processing,"
 			    " don't do summary unit check handling");
-		_unlock_all_devices_on_lcu(lcu, device, NULL);
-		spin_unlock(&lcu->lock);
-		return;
+		goto out_unlock;
 	}
 	if (lcu->suc_data.device) {
 		/* already scheduled or running */
 		DBF_DEV_EVENT(DBF_WARNING, device, "%s",
 			    "previous instance of summary unit check worker"
 			    " still pending");
-		_unlock_all_devices_on_lcu(lcu, device, NULL);
-		spin_unlock(&lcu->lock);
-		return ;
+		goto out_unlock;
 	}
 	_stop_all_devices_on_lcu(lcu);
 	/* prepare for lcu_update */
-	private->lcu->flags |= NEED_UAC_UPDATE | UPDATE_PENDING;
-	lcu->suc_data.reason = reason;
+	lcu->flags |= NEED_UAC_UPDATE | UPDATE_PENDING;
+	lcu->suc_data.reason = private->suc_reason;
 	lcu->suc_data.device = device;
 	dasd_get_device(device);
-	_unlock_all_devices_on_lcu(lcu, device, NULL);
-	spin_unlock(&lcu->lock);
 	if (!schedule_work(&lcu->suc_data.worker))
 		dasd_put_device(device);
+out_unlock:
+	spin_unlock_irqrestore(&lcu->lock, flags);
+out:
+	clear_bit(DASD_FLAG_SUC, &device->flags);
+	dasd_put_device(device);
 };

diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 75c032d..c1b4ae5 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c

@@ -1682,6 +1682,8 @@
 
 	/* setup work queue for validate server*/
 	INIT_WORK(&device->kick_validate, dasd_eckd_do_validate_server);
+	/* setup work queue for summary unit check */
+	INIT_WORK(&device->suc_work, dasd_alias_handle_summary_unit_check);
 
 	if (!ccw_device_is_pathgroup(device->cdev)) {
 		dev_warn(&device->cdev->dev,
@@ -2549,14 +2551,6 @@
 		    device->state == DASD_STATE_ONLINE &&
 		    !test_bit(DASD_FLAG_OFFLINE, &device->flags) &&
 		    !test_bit(DASD_FLAG_SUSPENDED, &device->flags)) {
-			/*
-			 * the state change could be caused by an alias
-			 * reassignment remove device from alias handling
-			 * to prevent new requests from being scheduled on
-			 * the wrong alias device
-			 */
-			dasd_alias_remove_device(device);
-
 			/* schedule worker to reload device */
 			dasd_reload_device(device);
 		}
@@ -2571,7 +2565,27 @@
 	/* summary unit check */
 	if ((sense[27] & DASD_SENSE_BIT_0) && (sense[7] == 0x0D) &&
 	    (scsw_dstat(&irb->scsw) & DEV_STAT_UNIT_CHECK)) {
-		dasd_alias_handle_summary_unit_check(device, irb);
+		if (test_and_set_bit(DASD_FLAG_SUC, &device->flags)) {
+			DBF_DEV_EVENT(DBF_WARNING, device, "%s",
+				      "eckd suc: device already notified");
+			return;
+		}
+		sense = dasd_get_sense(irb);
+		if (!sense) {
+			DBF_DEV_EVENT(DBF_WARNING, device, "%s",
+				      "eckd suc: no reason code available");
+			clear_bit(DASD_FLAG_SUC, &device->flags);
+			return;
+
+		}
+		private->suc_reason = sense[8];
+		DBF_DEV_EVENT(DBF_NOTICE, device, "%s %x",
+			      "eckd handle summary unit check: reason",
+			      private->suc_reason);
+		dasd_get_device(device);
+		if (!schedule_work(&device->suc_work))
+			dasd_put_device(device);
+
 		return;
 	}
 
@@ -4495,6 +4509,12 @@
 	struct dasd_uid uid;
 	unsigned long flags;
 
+	/*
+	 * remove device from alias handling to prevent new requests
+	 * from being scheduled on the wrong alias device
+	 */
+	dasd_alias_remove_device(device);
+
 	spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
 	old_base = private->uid.base_unit_addr;
 	spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);

diff --git a/drivers/s390/block/dasd_eckd.h b/drivers/s390/block/dasd_eckd.h
index f8f91ee..6d9a6d3 100644
--- a/drivers/s390/block/dasd_eckd.h
+++ b/drivers/s390/block/dasd_eckd.h

@@ -525,6 +525,7 @@
 	int count;
 
 	u32 fcx_max_data;
+	char suc_reason;
 };
 
 
@@ -534,7 +535,7 @@
 int dasd_alias_add_device(struct dasd_device *);
 int dasd_alias_remove_device(struct dasd_device *);
 struct dasd_device *dasd_alias_get_start_dev(struct dasd_device *);
-void dasd_alias_handle_summary_unit_check(struct dasd_device *, struct irb *);
+void dasd_alias_handle_summary_unit_check(struct work_struct *);
 void dasd_eckd_reset_ccw_to_base_io(struct dasd_ccw_req *);
 void dasd_alias_lcu_setup_complete(struct dasd_device *);
 void dasd_alias_wait_for_lcu_setup(struct dasd_device *);

diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 8de29be..0f0add9 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h

@@ -470,6 +470,7 @@
 	struct work_struct restore_device;
 	struct work_struct reload_device;
 	struct work_struct kick_validate;
+	struct work_struct suc_work;
 	struct timer_list timer;
 
 	debug_info_t *debug_area;
@@ -542,6 +543,7 @@
 #define DASD_FLAG_SAFE_OFFLINE_RUNNING	11	/* safe offline running */
 #define DASD_FLAG_ABORTALL	12	/* Abort all noretry requests */
 #define DASD_FLAG_PATH_VERIFY	13	/* Path verification worker running */
+#define DASD_FLAG_SUC		14	/* unhandled summary unit check */
 
 #define DASD_SLEEPON_START_TAG	((void *) 1)
 #define DASD_SLEEPON_END_TAG	((void *) 2)

diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.h b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.h
index 22dd8d6..2fd9c76 100644
--- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.h
+++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.h

@@ -25,21 +25,4 @@
 
 #define T5_ISS_VALID		(1 << 18)
 
-struct ulptx_idata {
-	__be32 cmd_more;
-	__be32 len;
-};
-
-struct cpl_rx_data_ddp {
-	union opcode_tid ot;
-	__be16 urg;
-	__be16 len;
-	__be32 seq;
-	union {
-		__be32 nxt_seq;
-		__be32 ddp_report;
-	};
-	__be32 ulp_crc;
-	__be32 ddpvld;
-};
 #endif	/* __CXGB4I_H__ */

diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 5bcdf8d..a404a41 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c

@@ -332,7 +332,7 @@
 {
 	int rel_port = -1, group_id;
 	struct alua_port_group *pg, *old_pg = NULL;
-	bool pg_updated;
+	bool pg_updated = false;
 	unsigned long flags;
 
 	group_id = scsi_vpd_tpg_id(sdev, &rel_port);

diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 266b909..f3032ca 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c

@@ -958,22 +958,21 @@
 	case FCPIO_INVALID_PARAM:    /* some parameter in request invalid */
 	case FCPIO_REQ_NOT_SUPPORTED:/* request type is not supported */
 	default:
-		shost_printk(KERN_ERR, fnic->lport->host, "hdr status = %s\n",
-			     fnic_fcpio_status_to_str(hdr_status));
 		sc->result = (DID_ERROR << 16) | icmnd_cmpl->scsi_status;
 		break;
 	}
 
+	/* Break link with the SCSI command */
+	CMD_SP(sc) = NULL;
+	CMD_FLAGS(sc) |= FNIC_IO_DONE;
+
+	spin_unlock_irqrestore(io_lock, flags);
+
 	if (hdr_status != FCPIO_SUCCESS) {
 		atomic64_inc(&fnic_stats->io_stats.io_failures);
 		shost_printk(KERN_ERR, fnic->lport->host, "hdr status = %s\n",
 			     fnic_fcpio_status_to_str(hdr_status));
 	}
-	/* Break link with the SCSI command */
-	CMD_SP(sc) = NULL;
-	CMD_FLAGS(sc) |= FNIC_IO_DONE;
-
-	spin_unlock_irqrestore(io_lock, flags);
 
 	fnic_release_ioreq_buf(fnic, io_req, sc);
 

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index a544366..f57d02c 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c

@@ -2860,7 +2860,7 @@
 	}
 
 	vports = lpfc_create_vport_work_array(phba);
-	if (vports != NULL)
+	if (vports != NULL) {
 		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			struct Scsi_Host *shost;
 			shost = lpfc_shost_from_vport(vports[i]);
@@ -2877,7 +2877,8 @@
 			}
 			spin_unlock_irq(shost->host_lock);
 		}
-		lpfc_destroy_vport_work_array(phba, vports);
+	}
+	lpfc_destroy_vport_work_array(phba, vports);
 
 	lpfc_unblock_mgmt_io(phba);
 	return 0;

diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h
index 4484e63..fce414a 100644
--- a/drivers/scsi/megaraid/megaraid_sas.h
+++ b/drivers/scsi/megaraid/megaraid_sas.h

@@ -2097,7 +2097,7 @@
 	u8 UnevenSpanSupport;
 
 	u8 supportmax256vd;
-	u8 allow_fw_scan;
+	u8 pd_list_not_supported;
 	u16 fw_supported_vd_count;
 	u16 fw_supported_pd_count;
 

diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 5c08568..e6ebc7a 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c

@@ -1838,7 +1838,7 @@
 	struct megasas_instance *instance;
 
 	instance = megasas_lookup_instance(sdev->host->host_no);
-	if (instance->allow_fw_scan) {
+	if (instance->pd_list_not_supported) {
 		if (sdev->channel < MEGASAS_MAX_PD_CHANNELS &&
 			sdev->type == TYPE_DISK) {
 			pd_index = (sdev->channel * MEGASAS_MAX_DEV_PER_CHANNEL) +
@@ -1874,7 +1874,8 @@
 		pd_index =
 			(sdev->channel * MEGASAS_MAX_DEV_PER_CHANNEL) +
 			sdev->id;
-		if ((instance->allow_fw_scan || instance->pd_list[pd_index].driveState ==
+		if ((instance->pd_list_not_supported ||
+			instance->pd_list[pd_index].driveState ==
 			MR_PD_STATE_SYSTEM)) {
 			goto scan_target;
 		}
@@ -4087,7 +4088,13 @@
 
 	switch (ret) {
 	case DCMD_FAILED:
-		megaraid_sas_kill_hba(instance);
+		dev_info(&instance->pdev->dev, "MR_DCMD_PD_LIST_QUERY "
+			"failed/not supported by firmware\n");
+
+		if (instance->ctrl_context)
+			megaraid_sas_kill_hba(instance);
+		else
+			instance->pd_list_not_supported = 1;
 		break;
 	case DCMD_TIMEOUT:
 
@@ -5034,7 +5041,6 @@
 	case PCI_DEVICE_ID_DELL_PERC5:
 	default:
 		instance->instancet = &megasas_instance_template_xscale;
-		instance->allow_fw_scan = 1;
 		break;
 	}
 
@@ -6650,12 +6656,13 @@
 	}
 
 	for (i = 0; i < ioc->sge_count; i++) {
-		if (kbuff_arr[i])
+		if (kbuff_arr[i]) {
 			dma_free_coherent(&instance->pdev->dev,
 					  le32_to_cpu(kern_sge32[i].length),
 					  kbuff_arr[i],
 					  le32_to_cpu(kern_sge32[i].phys_addr));
 			kbuff_arr[i] = NULL;
+		}
 	}
 
 	megasas_return_cmd(instance, cmd);

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 9852319..8a44d15 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c

@@ -1881,15 +1881,17 @@
 		else
 			vha->req->cnt = vha->req->length -
 			    (vha->req->ring_index - cnt);
+
+		if (unlikely(vha->req->cnt < (req_cnt + 2))) {
+			ql_dbg(ql_dbg_io, vha, 0x305a,
+			    "qla_target(%d): There is no room in the request ring: vha->req->ring_index=%d, vha->req->cnt=%d, req_cnt=%d Req-out=%d Req-in=%d Req-Length=%d\n",
+			    vha->vp_idx, vha->req->ring_index,
+			    vha->req->cnt, req_cnt, cnt, cnt_in,
+			    vha->req->length);
+			return -EAGAIN;
+		}
 	}
 
-	if (unlikely(vha->req->cnt < (req_cnt + 2))) {
-		ql_dbg(ql_dbg_io, vha, 0x305a,
-		    "qla_target(%d): There is no room in the request ring: vha->req->ring_index=%d, vha->req->cnt=%d, req_cnt=%d Req-out=%d Req-in=%d Req-Length=%d\n",
-		    vha->vp_idx, vha->req->ring_index,
-		    vha->req->cnt, req_cnt, cnt, cnt_in, vha->req->length);
-		return -EAGAIN;
-	}
 	vha->req->cnt -= req_cnt;
 
 	return 0;

diff --git a/drivers/scsi/scsi_common.c b/drivers/scsi/scsi_common.c
index c126966..ce79de8 100644
--- a/drivers/scsi/scsi_common.c
+++ b/drivers/scsi/scsi_common.c

@@ -278,8 +278,16 @@
 		ucp[3] = 0;
 		put_unaligned_be64(info, &ucp[4]);
 	} else if ((buf[0] & 0x7f) == 0x70) {
-		buf[0] |= 0x80;
-		put_unaligned_be64(info, &buf[3]);
+		/*
+		 * Only set the 'VALID' bit if we can represent the value
+		 * correctly; otherwise just fill out the lower bytes and
+		 * clear the 'VALID' flag.
+		 */
+		if (info <= 0xffffffffUL)
+			buf[0] |= 0x80;
+		else
+			buf[0] &= 0x7f;
+		put_unaligned_be32((u32)info, &buf[3]);
 	}
 
 	return 0;

diff --git a/drivers/scsi/scsi_sas_internal.h b/drivers/scsi/scsi_sas_internal.h
index 6266a5d..e659912 100644
--- a/drivers/scsi/scsi_sas_internal.h
+++ b/drivers/scsi/scsi_sas_internal.h

@@ -4,7 +4,7 @@
 #define SAS_HOST_ATTRS		0
 #define SAS_PHY_ATTRS		17
 #define SAS_PORT_ATTRS		1
-#define SAS_RPORT_ATTRS		7
+#define SAS_RPORT_ATTRS		8
 #define SAS_END_DEV_ATTRS	5
 #define SAS_EXPANDER_ATTRS	7
 

diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index d164419..92ffd24 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c

@@ -1105,7 +1105,7 @@
 	if (attr == &dev_attr_vpd_pg80 && !sdev->vpd_pg80)
 		return 0;
 
-	if (attr == &dev_attr_vpd_pg83 && sdev->vpd_pg83)
+	if (attr == &dev_attr_vpd_pg83 && !sdev->vpd_pg83)
 		return 0;
 
 	return S_IRUGO;

diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 80520e2..b6f958193 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c

@@ -1286,6 +1286,7 @@
 sas_rphy_simple_attr(identify.sas_address, sas_address, "0x%016llx\n",
 		unsigned long long);
 sas_rphy_simple_attr(identify.phy_identifier, phy_identifier, "%d\n", u8);
+sas_rphy_simple_attr(scsi_target_id, scsi_target_id, "%d\n", u32);
 
 /* only need 8 bytes of data plus header (4 or 8) */
 #define BUF_SIZE 64
@@ -1886,6 +1887,7 @@
 	SETUP_RPORT_ATTRIBUTE(rphy_device_type);
 	SETUP_RPORT_ATTRIBUTE(rphy_sas_address);
 	SETUP_RPORT_ATTRIBUTE(rphy_phy_identifier);
+	SETUP_RPORT_ATTRIBUTE(rphy_scsi_target_id);
 	SETUP_OPTIONAL_RPORT_ATTRIBUTE(rphy_enclosure_identifier,
 				       get_enclosure_identifier);
 	SETUP_OPTIONAL_RPORT_ATTRIBUTE(rphy_bay_identifier,

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 5a5457a..1bd0753 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c

@@ -2891,7 +2891,7 @@
 	if (sdkp->opt_xfer_blocks &&
 	    sdkp->opt_xfer_blocks <= dev_max &&
 	    sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
-	    sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_CACHE_SIZE)
+	    sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE)
 		rw_max = q->limits.io_opt =
 			sdkp->opt_xfer_blocks * sdp->sector_size;
 	else

diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 71c5138..dbf1882c 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c

@@ -4941,7 +4941,7 @@
  out_unmap:
 	if (res > 0) {
 		for (j=0; j < res; j++)
-			page_cache_release(pages[j]);
+			put_page(pages[j]);
 		res = 0;
 	}
 	kfree(pages);
@@ -4963,7 +4963,7 @@
 		/* FIXME: cache flush missing for rw==READ
 		 * FIXME: call the correct reference counting function
 		 */
-		page_cache_release(page);
+		put_page(page);
 	}
 	kfree(STbp->mapped_pages);
 	STbp->mapped_pages = NULL;

diff --git a/drivers/scsi/ufs/Kconfig b/drivers/scsi/ufs/Kconfig
index 5f45307..097894a 100644
--- a/drivers/scsi/ufs/Kconfig
+++ b/drivers/scsi/ufs/Kconfig

@@ -37,6 +37,7 @@
 	depends on SCSI && SCSI_DMA
 	select PM_DEVFREQ
 	select DEVFREQ_GOV_SIMPLE_ONDEMAND
+	select NLS
 	---help---
 	This selects the support for UFS devices in Linux, say Y and make
 	  sure that you know the name of your UFS host adapter (the card

diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c
index 4f38d00..3aedf73 100644
--- a/drivers/scsi/ufs/ufs-qcom.c
+++ b/drivers/scsi/ufs/ufs-qcom.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, Linux Foundation. All rights reserved.
+ * Copyright (c) 2013-2016, Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -16,8 +16,8 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/phy/phy.h>
-
 #include <linux/phy/phy-qcom-ufs.h>
+
 #include "ufshcd.h"
 #include "ufshcd-pltfrm.h"
 #include "unipro.h"
@@ -58,6 +58,12 @@
 			len * 4, false);
 }
 
+static void ufs_qcom_dump_regs_wrapper(struct ufs_hba *hba, int offset, int len,
+		char *prefix, void *priv)
+{
+	ufs_qcom_dump_regs(hba, offset, len, prefix);
+}
+
 static int ufs_qcom_get_connected_tx_lanes(struct ufs_hba *hba, u32 *tx_lanes)
 {
 	int err = 0;
@@ -106,9 +112,11 @@
 	if (!host->is_lane_clks_enabled)
 		return;
 
-	clk_disable_unprepare(host->tx_l1_sync_clk);
+	if (host->hba->lanes_per_direction > 1)
+		clk_disable_unprepare(host->tx_l1_sync_clk);
 	clk_disable_unprepare(host->tx_l0_sync_clk);
-	clk_disable_unprepare(host->rx_l1_sync_clk);
+	if (host->hba->lanes_per_direction > 1)
+		clk_disable_unprepare(host->rx_l1_sync_clk);
 	clk_disable_unprepare(host->rx_l0_sync_clk);
 
 	host->is_lane_clks_enabled = false;
@@ -132,21 +140,24 @@
 	if (err)
 		goto disable_rx_l0;
 
-	err = ufs_qcom_host_clk_enable(dev, "rx_lane1_sync_clk",
-		host->rx_l1_sync_clk);
-	if (err)
-		goto disable_tx_l0;
+	if (host->hba->lanes_per_direction > 1) {
+		err = ufs_qcom_host_clk_enable(dev, "rx_lane1_sync_clk",
+			host->rx_l1_sync_clk);
+		if (err)
+			goto disable_tx_l0;
 
-	err = ufs_qcom_host_clk_enable(dev, "tx_lane1_sync_clk",
-		host->tx_l1_sync_clk);
-	if (err)
-		goto disable_rx_l1;
+		err = ufs_qcom_host_clk_enable(dev, "tx_lane1_sync_clk",
+			host->tx_l1_sync_clk);
+		if (err)
+			goto disable_rx_l1;
+	}
 
 	host->is_lane_clks_enabled = true;
 	goto out;
 
 disable_rx_l1:
-	clk_disable_unprepare(host->rx_l1_sync_clk);
+	if (host->hba->lanes_per_direction > 1)
+		clk_disable_unprepare(host->rx_l1_sync_clk);
 disable_tx_l0:
 	clk_disable_unprepare(host->tx_l0_sync_clk);
 disable_rx_l0:
@@ -170,14 +181,16 @@
 	if (err)
 		goto out;
 
-	err = ufs_qcom_host_clk_get(dev, "rx_lane1_sync_clk",
-		&host->rx_l1_sync_clk);
-	if (err)
-		goto out;
+	/* In case of single lane per direction, don't read lane1 clocks */
+	if (host->hba->lanes_per_direction > 1) {
+		err = ufs_qcom_host_clk_get(dev, "rx_lane1_sync_clk",
+			&host->rx_l1_sync_clk);
+		if (err)
+			goto out;
 
-	err = ufs_qcom_host_clk_get(dev, "tx_lane1_sync_clk",
-		&host->tx_l1_sync_clk);
-
+		err = ufs_qcom_host_clk_get(dev, "tx_lane1_sync_clk",
+			&host->tx_l1_sync_clk);
+	}
 out:
 	return err;
 }
@@ -267,9 +280,8 @@
 	ret = ufs_qcom_phy_calibrate_phy(phy, is_rate_B);
 
 	if (ret) {
-		dev_err(hba->dev,
-		"%s: ufs_qcom_phy_calibrate_phy()failed, ret = %d\n",
-		__func__, ret);
+		dev_err(hba->dev, "%s: ufs_qcom_phy_calibrate_phy() failed, ret = %d\n",
+			__func__, ret);
 		goto out;
 	}
 
@@ -519,6 +531,18 @@
 			err = ufs_qcom_set_dme_vs_core_clk_ctrl_clear_div(hba,
 									  150);
 
+		/*
+		 * Some UFS devices (and may be host) have issues if LCC is
+		 * enabled. So we are setting PA_Local_TX_LCC_Enable to 0
+		 * before link startup which will make sure that both host
+		 * and device TX LCC are disabled once link startup is
+		 * completed.
+		 */
+		if (ufshcd_get_local_unipro_ver(hba) != UFS_UNIPRO_VER_1_41)
+			err = ufshcd_dme_set(hba,
+					UIC_ARG_MIB(PA_LOCAL_TX_LCC_ENABLE),
+					0);
+
 		break;
 	case POST_CHANGE:
 		ufs_qcom_link_startup_post_change(hba);
@@ -962,6 +986,10 @@
 			goto out;
 		}
 
+		/* enable the device ref clock before changing to HS mode */
+		if (!ufshcd_is_hs_mode(&hba->pwr_info) &&
+			ufshcd_is_hs_mode(dev_req_params))
+			ufs_qcom_dev_ref_clk_ctrl(host, true);
 		break;
 	case POST_CHANGE:
 		if (ufs_qcom_cfg_timers(hba, dev_req_params->gear_rx,
@@ -989,6 +1017,11 @@
 		memcpy(&host->dev_req_params,
 				dev_req_params, sizeof(*dev_req_params));
 		ufs_qcom_update_bus_bw_vote(host);
+
+		/* disable the device ref clock if entered PWM mode */
+		if (ufshcd_is_hs_mode(&hba->pwr_info) &&
+			!ufshcd_is_hs_mode(dev_req_params))
+			ufs_qcom_dev_ref_clk_ctrl(host, false);
 		break;
 	default:
 		ret = -EINVAL;
@@ -1090,6 +1123,9 @@
 			ufs_qcom_phy_disable_iface_clk(host->generic_phy);
 			goto out;
 		}
+		/* enable the device ref clock for HS mode*/
+		if (ufshcd_is_hs_mode(&hba->pwr_info))
+			ufs_qcom_dev_ref_clk_ctrl(host, true);
 		vote = host->bus_vote.saved_vote;
 		if (vote == host->bus_vote.min_bw_vote)
 			ufs_qcom_update_bus_bw_vote(host);
@@ -1367,6 +1403,74 @@
 	return err;
 }
 
+static void ufs_qcom_print_hw_debug_reg_all(struct ufs_hba *hba,
+		void *priv, void (*print_fn)(struct ufs_hba *hba,
+		int offset, int num_regs, char *str, void *priv))
+{
+	u32 reg;
+	struct ufs_qcom_host *host;
+
+	if (unlikely(!hba)) {
+		pr_err("%s: hba is NULL\n", __func__);
+		return;
+	}
+	if (unlikely(!print_fn)) {
+		dev_err(hba->dev, "%s: print_fn is NULL\n", __func__);
+		return;
+	}
+
+	host = ufshcd_get_variant(hba);
+	if (!(host->dbg_print_en & UFS_QCOM_DBG_PRINT_REGS_EN))
+		return;
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_UFS_DBG_RD_REG_OCSC);
+	print_fn(hba, reg, 44, "UFS_UFS_DBG_RD_REG_OCSC ", priv);
+
+	reg = ufshcd_readl(hba, REG_UFS_CFG1);
+	reg |= UFS_BIT(17);
+	ufshcd_writel(hba, reg, REG_UFS_CFG1);
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_UFS_DBG_RD_EDTL_RAM);
+	print_fn(hba, reg, 32, "UFS_UFS_DBG_RD_EDTL_RAM ", priv);
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_UFS_DBG_RD_DESC_RAM);
+	print_fn(hba, reg, 128, "UFS_UFS_DBG_RD_DESC_RAM ", priv);
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_UFS_DBG_RD_PRDT_RAM);
+	print_fn(hba, reg, 64, "UFS_UFS_DBG_RD_PRDT_RAM ", priv);
+
+	ufshcd_writel(hba, (reg & ~UFS_BIT(17)), REG_UFS_CFG1);
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_DBG_RD_REG_UAWM);
+	print_fn(hba, reg, 4, "UFS_DBG_RD_REG_UAWM ", priv);
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_DBG_RD_REG_UARM);
+	print_fn(hba, reg, 4, "UFS_DBG_RD_REG_UARM ", priv);
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_DBG_RD_REG_TXUC);
+	print_fn(hba, reg, 48, "UFS_DBG_RD_REG_TXUC ", priv);
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_DBG_RD_REG_RXUC);
+	print_fn(hba, reg, 27, "UFS_DBG_RD_REG_RXUC ", priv);
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_DBG_RD_REG_DFC);
+	print_fn(hba, reg, 19, "UFS_DBG_RD_REG_DFC ", priv);
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_DBG_RD_REG_TRLUT);
+	print_fn(hba, reg, 34, "UFS_DBG_RD_REG_TRLUT ", priv);
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_DBG_RD_REG_TMRLUT);
+	print_fn(hba, reg, 9, "UFS_DBG_RD_REG_TMRLUT ", priv);
+}
+
+static void ufs_qcom_enable_test_bus(struct ufs_qcom_host *host)
+{
+	if (host->dbg_print_en & UFS_QCOM_DBG_PRINT_TEST_BUS_EN)
+		ufshcd_rmwl(host->hba, TEST_BUS_EN, TEST_BUS_EN, REG_UFS_CFG1);
+	else
+		ufshcd_rmwl(host->hba, TEST_BUS_EN, 0, REG_UFS_CFG1);
+}
+
 static void ufs_qcom_get_default_testbus_cfg(struct ufs_qcom_host *host)
 {
 	/* provide a legal default configuration */
@@ -1475,6 +1579,7 @@
 	ufshcd_rmwl(host->hba, mask,
 		    (u32)host->testbus.select_minor << offset,
 		    reg);
+	ufs_qcom_enable_test_bus(host);
 	ufshcd_release(host->hba);
 	pm_runtime_put_sync(host->hba->dev);
 
@@ -1491,8 +1596,10 @@
 	ufs_qcom_dump_regs(hba, REG_UFS_SYS1CLK_1US, 16,
 			"HCI Vendor Specific Registers ");
 
+	ufs_qcom_print_hw_debug_reg_all(hba, NULL, ufs_qcom_dump_regs_wrapper);
 	ufs_qcom_testbus_read(hba);
 }
+
 /**
  * struct ufs_hba_qcom_vops - UFS QCOM specific variant operations
  *
@@ -1537,7 +1644,7 @@
  * ufs_qcom_remove - set driver_data of the device to NULL
  * @pdev: pointer to platform device handle
  *
- * Always return 0
+ * Always returns 0
  */
 static int ufs_qcom_remove(struct platform_device *pdev)
 {

diff --git a/drivers/scsi/ufs/ufs-qcom.h b/drivers/scsi/ufs/ufs-qcom.h
index 36249b3..a19307a 100644
--- a/drivers/scsi/ufs/ufs-qcom.h
+++ b/drivers/scsi/ufs/ufs-qcom.h

@@ -241,6 +241,15 @@
 	struct ufs_qcom_testbus testbus;
 };
 
+static inline u32
+ufs_qcom_get_debug_reg_offset(struct ufs_qcom_host *host, u32 reg)
+{
+	if (host->hw_ver.major <= 0x02)
+		return UFS_CNTLR_2_x_x_VEN_REGS_OFFSET(reg);
+
+	return UFS_CNTLR_3_x_x_VEN_REGS_OFFSET(reg);
+};
+
 #define ufs_qcom_is_link_off(hba) ufshcd_is_link_off(hba)
 #define ufs_qcom_is_link_active(hba) ufshcd_is_link_active(hba)
 #define ufs_qcom_is_link_hibern8(hba) ufshcd_is_link_hibern8(hba)

diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h
index 54a16ce..b291fa6 100644
--- a/drivers/scsi/ufs/ufs.h
+++ b/drivers/scsi/ufs/ufs.h

@@ -43,6 +43,7 @@
 #define GENERAL_UPIU_REQUEST_SIZE 32
 #define QUERY_DESC_MAX_SIZE       255
 #define QUERY_DESC_MIN_SIZE       2
+#define QUERY_DESC_HDR_SIZE       2
 #define QUERY_OSF_SIZE            (GENERAL_UPIU_REQUEST_SIZE - \
 					(sizeof(struct utp_upiu_header)))
 
@@ -195,6 +196,37 @@
 	UNIT_DESC_PARAM_LARGE_UNIT_SIZE_M1	= 0x22,
 };
 
+/* Device descriptor parameters offsets in bytes*/
+enum device_desc_param {
+	DEVICE_DESC_PARAM_LEN			= 0x0,
+	DEVICE_DESC_PARAM_TYPE			= 0x1,
+	DEVICE_DESC_PARAM_DEVICE_TYPE		= 0x2,
+	DEVICE_DESC_PARAM_DEVICE_CLASS		= 0x3,
+	DEVICE_DESC_PARAM_DEVICE_SUB_CLASS	= 0x4,
+	DEVICE_DESC_PARAM_PRTCL			= 0x5,
+	DEVICE_DESC_PARAM_NUM_LU		= 0x6,
+	DEVICE_DESC_PARAM_NUM_WLU		= 0x7,
+	DEVICE_DESC_PARAM_BOOT_ENBL		= 0x8,
+	DEVICE_DESC_PARAM_DESC_ACCSS_ENBL	= 0x9,
+	DEVICE_DESC_PARAM_INIT_PWR_MODE		= 0xA,
+	DEVICE_DESC_PARAM_HIGH_PR_LUN		= 0xB,
+	DEVICE_DESC_PARAM_SEC_RMV_TYPE		= 0xC,
+	DEVICE_DESC_PARAM_SEC_LU		= 0xD,
+	DEVICE_DESC_PARAM_BKOP_TERM_LT		= 0xE,
+	DEVICE_DESC_PARAM_ACTVE_ICC_LVL		= 0xF,
+	DEVICE_DESC_PARAM_SPEC_VER		= 0x10,
+	DEVICE_DESC_PARAM_MANF_DATE		= 0x12,
+	DEVICE_DESC_PARAM_MANF_NAME		= 0x14,
+	DEVICE_DESC_PARAM_PRDCT_NAME		= 0x15,
+	DEVICE_DESC_PARAM_SN			= 0x16,
+	DEVICE_DESC_PARAM_OEM_ID		= 0x17,
+	DEVICE_DESC_PARAM_MANF_ID		= 0x18,
+	DEVICE_DESC_PARAM_UD_OFFSET		= 0x1A,
+	DEVICE_DESC_PARAM_UD_LEN		= 0x1B,
+	DEVICE_DESC_PARAM_RTT_CAP		= 0x1C,
+	DEVICE_DESC_PARAM_FRQ_RTC		= 0x1D,
+};
+
 /*
  * Logical Unit Write Protect
  * 00h: LU not write protected
@@ -469,6 +501,7 @@
 	struct regulator *reg;
 	const char *name;
 	bool enabled;
+	bool unused;
 	int min_uV;
 	int max_uV;
 	int min_uA;

diff --git a/drivers/scsi/ufs/ufs_quirks.h b/drivers/scsi/ufs/ufs_quirks.h
new file mode 100644
index 0000000..ee4ab85
--- /dev/null
+++ b/drivers/scsi/ufs/ufs_quirks.h

@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2014-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _UFS_QUIRKS_H_
+#define _UFS_QUIRKS_H_
+
+/* return true if s1 is a prefix of s2 */
+#define STR_PRFX_EQUAL(s1, s2) !strncmp(s1, s2, strlen(s1))
+
+#define UFS_ANY_VENDOR 0xFFFF
+#define UFS_ANY_MODEL  "ANY_MODEL"
+
+#define MAX_MODEL_LEN 16
+
+#define UFS_VENDOR_TOSHIBA     0x198
+#define UFS_VENDOR_SAMSUNG     0x1CE
+
+/**
+ * ufs_device_info - ufs device details
+ * @wmanufacturerid: card details
+ * @model: card model
+ */
+struct ufs_device_info {
+	u16 wmanufacturerid;
+	char model[MAX_MODEL_LEN + 1];
+};
+
+/**
+ * ufs_dev_fix - ufs device quirk info
+ * @card: ufs card details
+ * @quirk: device quirk
+ */
+struct ufs_dev_fix {
+	struct ufs_device_info card;
+	unsigned int quirk;
+};
+
+#define END_FIX { { 0 }, 0 }
+
+/* add specific device quirk */
+#define UFS_FIX(_vendor, _model, _quirk) \
+		{					  \
+			.card.wmanufacturerid = (_vendor),\
+			.card.model = (_model),		  \
+			.quirk = (_quirk),		  \
+		}
+
+/*
+ * If UFS device is having issue in processing LCC (Line Control
+ * Command) coming from UFS host controller then enable this quirk.
+ * When this quirk is enabled, host controller driver should disable
+ * the LCC transmission on UFS host controller (by clearing
+ * TX_LCC_ENABLE attribute of host to 0).
+ */
+#define UFS_DEVICE_QUIRK_BROKEN_LCC (1 << 0)
+
+/*
+ * Some UFS devices don't need VCCQ rail for device operations. Enabling this
+ * quirk for such devices will make sure that VCCQ rail is not voted.
+ */
+#define UFS_DEVICE_NO_VCCQ (1 << 1)
+
+/*
+ * Some vendor's UFS device sends back to back NACs for the DL data frames
+ * causing the host controller to raise the DFES error status. Sometimes
+ * such UFS devices send back to back NAC without waiting for new
+ * retransmitted DL frame from the host and in such cases it might be possible
+ * the Host UniPro goes into bad state without raising the DFES error
+ * interrupt. If this happens then all the pending commands would timeout
+ * only after respective SW command (which is generally too large).
+ *
+ * We can workaround such device behaviour like this:
+ * - As soon as SW sees the DL NAC error, it should schedule the error handler
+ * - Error handler would sleep for 50ms to see if there are any fatal errors
+ *   raised by UFS controller.
+ *    - If there are fatal errors then SW does normal error recovery.
+ *    - If there are no fatal errors then SW sends the NOP command to device
+ *      to check if link is alive.
+ *        - If NOP command times out, SW does normal error recovery
+ *        - If NOP command succeed, skip the error handling.
+ *
+ * If DL NAC error is seen multiple times with some vendor's UFS devices then
+ * enable this quirk to initiate quick error recovery and also silence related
+ * error logs to reduce spamming of kernel logs.
+ */
+#define UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS (1 << 2)
+
+/*
+ * Some UFS devices may not work properly after resume if the link was kept
+ * in off state during suspend. Enabling this quirk will not allow the
+ * link to be kept in off state during suspend.
+ */
+#define UFS_DEVICE_QUIRK_NO_LINK_OFF	(1 << 3)
+
+/*
+ * Few Toshiba UFS device models advertise RX_MIN_ACTIVATETIME_CAPABILITY as
+ * 600us which may not be enough for reliable hibern8 exit hardware sequence
+ * from UFS device.
+ * To workaround this issue, host should set its PA_TACTIVATE time to 1ms even
+ * if device advertises RX_MIN_ACTIVATETIME_CAPABILITY less than 1ms.
+ */
+#define UFS_DEVICE_QUIRK_PA_TACTIVATE	(1 << 4)
+
+/*
+ * Some UFS memory devices may have really low read/write throughput in
+ * FAST AUTO mode, enable this quirk to make sure that FAST AUTO mode is
+ * never enabled for such devices.
+ */
+#define UFS_DEVICE_NO_FASTAUTO		(1 << 5)
+
+/*
+ * It seems some UFS devices may keep drawing more than sleep current
+ * (atleast for 500us) from UFS rails (especially from VCCQ rail).
+ * To avoid this situation, add 2ms delay before putting these UFS
+ * rails in LPM mode.
+ */
+#define UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM	(1 << 6)
+
+struct ufs_hba;
+void ufs_advertise_fixup_device(struct ufs_hba *hba);
+
+static struct ufs_dev_fix ufs_fixups[] = {
+	/* UFS cards deviations table */
+	UFS_FIX(UFS_VENDOR_SAMSUNG, UFS_ANY_MODEL,
+		UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM),
+	UFS_FIX(UFS_VENDOR_SAMSUNG, UFS_ANY_MODEL, UFS_DEVICE_NO_VCCQ),
+	UFS_FIX(UFS_VENDOR_SAMSUNG, UFS_ANY_MODEL,
+		UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS),
+	UFS_FIX(UFS_VENDOR_SAMSUNG, UFS_ANY_MODEL,
+		UFS_DEVICE_NO_FASTAUTO),
+	UFS_FIX(UFS_VENDOR_TOSHIBA, UFS_ANY_MODEL,
+		UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM),
+	UFS_FIX(UFS_VENDOR_TOSHIBA, "THGLF2G9C8KBADG",
+		UFS_DEVICE_QUIRK_PA_TACTIVATE),
+	UFS_FIX(UFS_VENDOR_TOSHIBA, "THGLF2G9D8KBADG",
+		UFS_DEVICE_QUIRK_PA_TACTIVATE),
+
+	END_FIX
+};
+#endif /* UFS_QUIRKS_H_ */

diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c
index d2a7b12..718f12e 100644
--- a/drivers/scsi/ufs/ufshcd-pltfrm.c
+++ b/drivers/scsi/ufs/ufshcd-pltfrm.c

@@ -40,6 +40,8 @@
 #include "ufshcd.h"
 #include "ufshcd-pltfrm.h"
 
+#define UFSHCD_DEFAULT_LANES_PER_DIRECTION		2
+
 static int ufshcd_parse_clock_info(struct ufs_hba *hba)
 {
 	int ret = 0;
@@ -277,6 +279,21 @@
 }
 EXPORT_SYMBOL_GPL(ufshcd_pltfrm_shutdown);
 
+static void ufshcd_init_lanes_per_dir(struct ufs_hba *hba)
+{
+	struct device *dev = hba->dev;
+	int ret;
+
+	ret = of_property_read_u32(dev->of_node, "lanes-per-direction",
+		&hba->lanes_per_direction);
+	if (ret) {
+		dev_dbg(hba->dev,
+			"%s: failed to read lanes-per-direction, ret=%d\n",
+			__func__, ret);
+		hba->lanes_per_direction = UFSHCD_DEFAULT_LANES_PER_DIRECTION;
+	}
+}
+
 /**
  * ufshcd_pltfrm_init - probe routine of the driver
  * @pdev: pointer to Platform device handle
@@ -331,6 +348,8 @@
 	pm_runtime_set_active(&pdev->dev);
 	pm_runtime_enable(&pdev->dev);
 
+	ufshcd_init_lanes_per_dir(hba);
+
 	err = ufshcd_init(hba, mmio_base, irq);
 	if (err) {
 		dev_err(dev, "Initialization failed\n");

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 9c1b94b..f8fa72c 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c

@@ -39,8 +39,10 @@
 
 #include <linux/async.h>
 #include <linux/devfreq.h>
-
+#include <linux/nls.h>
+#include <linux/of.h>
 #include "ufshcd.h"
+#include "ufs_quirks.h"
 #include "unipro.h"
 
 #define UFSHCD_ENABLE_INTRS	(UTP_TRANSFER_REQ_COMPL |\
@@ -131,9 +133,11 @@
 /* UFSHCD UIC layer error flags */
 enum {
 	UFSHCD_UIC_DL_PA_INIT_ERROR = (1 << 0), /* Data link layer error */
-	UFSHCD_UIC_NL_ERROR = (1 << 1), /* Network layer error */
-	UFSHCD_UIC_TL_ERROR = (1 << 2), /* Transport Layer error */
-	UFSHCD_UIC_DME_ERROR = (1 << 3), /* DME error */
+	UFSHCD_UIC_DL_NAC_RECEIVED_ERROR = (1 << 1), /* Data link layer error */
+	UFSHCD_UIC_DL_TCx_REPLAY_ERROR = (1 << 2), /* Data link layer error */
+	UFSHCD_UIC_NL_ERROR = (1 << 3), /* Network layer error */
+	UFSHCD_UIC_TL_ERROR = (1 << 4), /* Transport Layer error */
+	UFSHCD_UIC_DME_ERROR = (1 << 5), /* DME error */
 };
 
 /* Interrupt configuration options */
@@ -193,6 +197,7 @@
 static int __ufshcd_setup_clocks(struct ufs_hba *hba, bool on,
 				 bool skip_ref_clk);
 static int ufshcd_setup_clocks(struct ufs_hba *hba, bool on);
+static int ufshcd_set_vccq_rail_unused(struct ufs_hba *hba, bool unused);
 static int ufshcd_uic_hibern8_exit(struct ufs_hba *hba);
 static int ufshcd_uic_hibern8_enter(struct ufs_hba *hba);
 static inline void ufshcd_add_delay_before_dme_cmd(struct ufs_hba *hba);
@@ -231,6 +236,16 @@
 	}
 }
 
+/* replace non-printable or non-ASCII characters with spaces */
+static inline void ufshcd_remove_non_printable(char *val)
+{
+	if (!val)
+		return;
+
+	if (*val < 0x20 || *val > 0x7e)
+		*val = ' ';
+}
+
 /*
  * ufshcd_wait_for_register - wait for register value to change
  * @hba - per-adapter interface
@@ -239,11 +254,13 @@
  * @val - wait condition
  * @interval_us - polling interval in microsecs
  * @timeout_ms - timeout in millisecs
+ * @can_sleep - perform sleep or just spin
  *
  * Returns -ETIMEDOUT on error, zero on success
  */
-static int ufshcd_wait_for_register(struct ufs_hba *hba, u32 reg, u32 mask,
-		u32 val, unsigned long interval_us, unsigned long timeout_ms)
+int ufshcd_wait_for_register(struct ufs_hba *hba, u32 reg, u32 mask,
+				u32 val, unsigned long interval_us,
+				unsigned long timeout_ms, bool can_sleep)
 {
 	int err = 0;
 	unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
@@ -252,9 +269,10 @@
 	val = val & mask;
 
 	while ((ufshcd_readl(hba, reg) & mask) != val) {
-		/* wakeup within 50us of expiry */
-		usleep_range(interval_us, interval_us + 50);
-
+		if (can_sleep)
+			usleep_range(interval_us, interval_us + 50);
+		else
+			udelay(interval_us);
 		if (time_after(jiffies, timeout)) {
 			if ((ufshcd_readl(hba, reg) & mask) != val)
 				err = -ETIMEDOUT;
@@ -552,6 +570,34 @@
 	return (ufshcd_readl(hba, REG_CONTROLLER_ENABLE) & 0x1) ? 0 : 1;
 }
 
+u32 ufshcd_get_local_unipro_ver(struct ufs_hba *hba)
+{
+	/* HCI version 1.0 and 1.1 supports UniPro 1.41 */
+	if ((hba->ufs_version == UFSHCI_VERSION_10) ||
+	    (hba->ufs_version == UFSHCI_VERSION_11))
+		return UFS_UNIPRO_VER_1_41;
+	else
+		return UFS_UNIPRO_VER_1_6;
+}
+EXPORT_SYMBOL(ufshcd_get_local_unipro_ver);
+
+static bool ufshcd_is_unipro_pa_params_tuning_req(struct ufs_hba *hba)
+{
+	/*
+	 * If both host and device support UniPro ver1.6 or later, PA layer
+	 * parameters tuning happens during link startup itself.
+	 *
+	 * We can manually tune PA layer parameters if either host or device
+	 * doesn't support UniPro ver 1.6 or later. But to keep manual tuning
+	 * logic simple, we will only do manual tuning if local unipro version
+	 * doesn't support ver1.6 or later.
+	 */
+	if (ufshcd_get_local_unipro_ver(hba) < UFS_UNIPRO_VER_1_6)
+		return true;
+	else
+		return false;
+}
+
 static void ufshcd_ungate_work(struct work_struct *work)
 {
 	int ret;
@@ -1458,7 +1504,7 @@
 	 */
 	err = ufshcd_wait_for_register(hba,
 			REG_UTP_TRANSFER_REQ_DOOR_BELL,
-			mask, ~mask, 1000, 1000);
+			mask, ~mask, 1000, 1000, true);
 
 	return err;
 }
@@ -1857,21 +1903,7 @@
 	return ret;
 }
 
-/**
- * ufshcd_query_descriptor - API function for sending descriptor requests
- * hba: per-adapter instance
- * opcode: attribute opcode
- * idn: attribute idn to access
- * index: index field
- * selector: selector field
- * desc_buf: the buffer that contains the descriptor
- * buf_len: length parameter passed to the device
- *
- * Returns 0 for success, non-zero in case of failure.
- * The buf_len parameter will contain, on return, the length parameter
- * received on the response.
- */
-static int ufshcd_query_descriptor(struct ufs_hba *hba,
+static int __ufshcd_query_descriptor(struct ufs_hba *hba,
 			enum query_opcode opcode, enum desc_idn idn, u8 index,
 			u8 selector, u8 *desc_buf, int *buf_len)
 {
@@ -1936,6 +1968,39 @@
 }
 
 /**
+ * ufshcd_query_descriptor_retry - API function for sending descriptor
+ * requests
+ * hba: per-adapter instance
+ * opcode: attribute opcode
+ * idn: attribute idn to access
+ * index: index field
+ * selector: selector field
+ * desc_buf: the buffer that contains the descriptor
+ * buf_len: length parameter passed to the device
+ *
+ * Returns 0 for success, non-zero in case of failure.
+ * The buf_len parameter will contain, on return, the length parameter
+ * received on the response.
+ */
+int ufshcd_query_descriptor_retry(struct ufs_hba *hba,
+			enum query_opcode opcode, enum desc_idn idn, u8 index,
+			u8 selector, u8 *desc_buf, int *buf_len)
+{
+	int err;
+	int retries;
+
+	for (retries = QUERY_REQ_RETRIES; retries > 0; retries--) {
+		err = __ufshcd_query_descriptor(hba, opcode, idn, index,
+						selector, desc_buf, buf_len);
+		if (!err || err == -EINVAL)
+			break;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL(ufshcd_query_descriptor_retry);
+
+/**
  * ufshcd_read_desc_param - read the specified descriptor parameter
  * @hba: Pointer to adapter instance
  * @desc_id: descriptor idn value
@@ -1977,9 +2042,9 @@
 			return -ENOMEM;
 	}
 
-	ret = ufshcd_query_descriptor(hba, UPIU_QUERY_OPCODE_READ_DESC,
-				      desc_id, desc_index, 0, desc_buf,
-				      &buff_len);
+	ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC,
+					desc_id, desc_index, 0, desc_buf,
+					&buff_len);
 
 	if (ret || (buff_len < ufs_query_desc_max_size[desc_id]) ||
 	    (desc_buf[QUERY_DESC_LENGTH_OFFSET] !=
@@ -2017,6 +2082,82 @@
 	return ufshcd_read_desc(hba, QUERY_DESC_IDN_POWER, 0, buf, size);
 }
 
+int ufshcd_read_device_desc(struct ufs_hba *hba, u8 *buf, u32 size)
+{
+	return ufshcd_read_desc(hba, QUERY_DESC_IDN_DEVICE, 0, buf, size);
+}
+EXPORT_SYMBOL(ufshcd_read_device_desc);
+
+/**
+ * ufshcd_read_string_desc - read string descriptor
+ * @hba: pointer to adapter instance
+ * @desc_index: descriptor index
+ * @buf: pointer to buffer where descriptor would be read
+ * @size: size of buf
+ * @ascii: if true convert from unicode to ascii characters
+ *
+ * Return 0 in case of success, non-zero otherwise
+ */
+int ufshcd_read_string_desc(struct ufs_hba *hba, int desc_index, u8 *buf,
+				u32 size, bool ascii)
+{
+	int err = 0;
+
+	err = ufshcd_read_desc(hba,
+				QUERY_DESC_IDN_STRING, desc_index, buf, size);
+
+	if (err) {
+		dev_err(hba->dev, "%s: reading String Desc failed after %d retries. err = %d\n",
+			__func__, QUERY_REQ_RETRIES, err);
+		goto out;
+	}
+
+	if (ascii) {
+		int desc_len;
+		int ascii_len;
+		int i;
+		char *buff_ascii;
+
+		desc_len = buf[0];
+		/* remove header and divide by 2 to move from UTF16 to UTF8 */
+		ascii_len = (desc_len - QUERY_DESC_HDR_SIZE) / 2 + 1;
+		if (size < ascii_len + QUERY_DESC_HDR_SIZE) {
+			dev_err(hba->dev, "%s: buffer allocated size is too small\n",
+					__func__);
+			err = -ENOMEM;
+			goto out;
+		}
+
+		buff_ascii = kmalloc(ascii_len, GFP_KERNEL);
+		if (!buff_ascii) {
+			err = -ENOMEM;
+			goto out_free_buff;
+		}
+
+		/*
+		 * the descriptor contains string in UTF16 format
+		 * we need to convert to utf-8 so it can be displayed
+		 */
+		utf16s_to_utf8s((wchar_t *)&buf[QUERY_DESC_HDR_SIZE],
+				desc_len - QUERY_DESC_HDR_SIZE,
+				UTF16_BIG_ENDIAN, buff_ascii, ascii_len);
+
+		/* replace non-printable or non-ASCII characters with spaces */
+		for (i = 0; i < ascii_len; i++)
+			ufshcd_remove_non_printable(&buff_ascii[i]);
+
+		memset(buf + QUERY_DESC_HDR_SIZE, 0,
+				size - QUERY_DESC_HDR_SIZE);
+		memcpy(buf + QUERY_DESC_HDR_SIZE, buff_ascii, ascii_len);
+		buf[QUERY_DESC_LENGTH_OFFSET] = ascii_len + QUERY_DESC_HDR_SIZE;
+out_free_buff:
+		kfree(buff_ascii);
+	}
+out:
+	return err;
+}
+EXPORT_SYMBOL(ufshcd_read_string_desc);
+
 /**
  * ufshcd_read_unit_desc_param - read the specified unit descriptor parameter
  * @hba: Pointer to adapter instance
@@ -2814,6 +2955,23 @@
 }
 
 /**
+ * ufshcd_hba_stop - Send controller to reset state
+ * @hba: per adapter instance
+ * @can_sleep: perform sleep or just spin
+ */
+static inline void ufshcd_hba_stop(struct ufs_hba *hba, bool can_sleep)
+{
+	int err;
+
+	ufshcd_writel(hba, CONTROLLER_DISABLE,  REG_CONTROLLER_ENABLE);
+	err = ufshcd_wait_for_register(hba, REG_CONTROLLER_ENABLE,
+					CONTROLLER_ENABLE, CONTROLLER_DISABLE,
+					10, 1, can_sleep);
+	if (err)
+		dev_err(hba->dev, "%s: Controller disable failed\n", __func__);
+}
+
+/**
  * ufshcd_hba_enable - initialize the controller
  * @hba: per adapter instance
  *
@@ -2833,18 +2991,9 @@
 	 * development and testing of this driver. msleep can be changed to
 	 * mdelay and retry count can be reduced based on the controller.
 	 */
-	if (!ufshcd_is_hba_active(hba)) {
-
+	if (!ufshcd_is_hba_active(hba))
 		/* change controller state to "reset state" */
-		ufshcd_hba_stop(hba);
-
-		/*
-		 * This delay is based on the testing done with UFS host
-		 * controller FPGA. The delay can be changed based on the
-		 * host controller used.
-		 */
-		msleep(5);
-	}
+		ufshcd_hba_stop(hba, true);
 
 	/* UniPro link is disabled at this point */
 	ufshcd_set_link_off(hba);
@@ -3365,31 +3514,18 @@
 }
 
 /**
- * ufshcd_transfer_req_compl - handle SCSI and query command completion
+ * __ufshcd_transfer_req_compl - handle SCSI and query command completion
  * @hba: per adapter instance
+ * @completed_reqs: requests to complete
  */
-static void ufshcd_transfer_req_compl(struct ufs_hba *hba)
+static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
+					unsigned long completed_reqs)
 {
 	struct ufshcd_lrb *lrbp;
 	struct scsi_cmnd *cmd;
-	unsigned long completed_reqs;
-	u32 tr_doorbell;
 	int result;
 	int index;
 
-	/* Resetting interrupt aggregation counters first and reading the
-	 * DOOR_BELL afterward allows us to handle all the completed requests.
-	 * In order to prevent other interrupts starvation the DB is read once
-	 * after reset. The down side of this solution is the possibility of
-	 * false interrupt if device completes another request after resetting
-	 * aggregation and before reading the DB.
-	 */
-	if (ufshcd_is_intr_aggr_allowed(hba))
-		ufshcd_reset_intr_aggr(hba);
-
-	tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
-	completed_reqs = tr_doorbell ^ hba->outstanding_reqs;
-
 	for_each_set_bit(index, &completed_reqs, hba->nutrs) {
 		lrbp = &hba->lrb[index];
 		cmd = lrbp->cmd;
@@ -3419,6 +3555,31 @@
 }
 
 /**
+ * ufshcd_transfer_req_compl - handle SCSI and query command completion
+ * @hba: per adapter instance
+ */
+static void ufshcd_transfer_req_compl(struct ufs_hba *hba)
+{
+	unsigned long completed_reqs;
+	u32 tr_doorbell;
+
+	/* Resetting interrupt aggregation counters first and reading the
+	 * DOOR_BELL afterward allows us to handle all the completed requests.
+	 * In order to prevent other interrupts starvation the DB is read once
+	 * after reset. The down side of this solution is the possibility of
+	 * false interrupt if device completes another request after resetting
+	 * aggregation and before reading the DB.
+	 */
+	if (ufshcd_is_intr_aggr_allowed(hba))
+		ufshcd_reset_intr_aggr(hba);
+
+	tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
+	completed_reqs = tr_doorbell ^ hba->outstanding_reqs;
+
+	__ufshcd_transfer_req_compl(hba, completed_reqs);
+}
+
+/**
  * ufshcd_disable_ee - disable exception event
  * @hba: per-adapter instance
  * @mask: exception event to disable
@@ -3630,7 +3791,7 @@
  */
 static int ufshcd_urgent_bkops(struct ufs_hba *hba)
 {
-	return ufshcd_bkops_ctrl(hba, BKOPS_STATUS_PERF_IMPACT);
+	return ufshcd_bkops_ctrl(hba, hba->urgent_bkops_lvl);
 }
 
 static inline int ufshcd_get_ee_status(struct ufs_hba *hba, u32 *status)
@@ -3639,6 +3800,43 @@
 			QUERY_ATTR_IDN_EE_STATUS, 0, 0, status);
 }
 
+static void ufshcd_bkops_exception_event_handler(struct ufs_hba *hba)
+{
+	int err;
+	u32 curr_status = 0;
+
+	if (hba->is_urgent_bkops_lvl_checked)
+		goto enable_auto_bkops;
+
+	err = ufshcd_get_bkops_status(hba, &curr_status);
+	if (err) {
+		dev_err(hba->dev, "%s: failed to get BKOPS status %d\n",
+				__func__, err);
+		goto out;
+	}
+
+	/*
+	 * We are seeing that some devices are raising the urgent bkops
+	 * exception events even when BKOPS status doesn't indicate performace
+	 * impacted or critical. Handle these device by determining their urgent
+	 * bkops status at runtime.
+	 */
+	if (curr_status < BKOPS_STATUS_PERF_IMPACT) {
+		dev_err(hba->dev, "%s: device raised urgent BKOPS exception for bkops status %d\n",
+				__func__, curr_status);
+		/* update the current status as the urgent bkops level */
+		hba->urgent_bkops_lvl = curr_status;
+		hba->is_urgent_bkops_lvl_checked = true;
+	}
+
+enable_auto_bkops:
+	err = ufshcd_enable_auto_bkops(hba);
+out:
+	if (err < 0)
+		dev_err(hba->dev, "%s: failed to handle urgent bkops %d\n",
+				__func__, err);
+}
+
 /**
  * ufshcd_exception_event_handler - handle exceptions raised by device
  * @work: pointer to work data
@@ -3662,17 +3860,95 @@
 	}
 
 	status &= hba->ee_ctrl_mask;
-	if (status & MASK_EE_URGENT_BKOPS) {
-		err = ufshcd_urgent_bkops(hba);
-		if (err < 0)
-			dev_err(hba->dev, "%s: failed to handle urgent bkops %d\n",
-					__func__, err);
-	}
+
+	if (status & MASK_EE_URGENT_BKOPS)
+		ufshcd_bkops_exception_event_handler(hba);
+
 out:
 	pm_runtime_put_sync(hba->dev);
 	return;
 }
 
+/* Complete requests that have door-bell cleared */
+static void ufshcd_complete_requests(struct ufs_hba *hba)
+{
+	ufshcd_transfer_req_compl(hba);
+	ufshcd_tmc_handler(hba);
+}
+
+/**
+ * ufshcd_quirk_dl_nac_errors - This function checks if error handling is
+ *				to recover from the DL NAC errors or not.
+ * @hba: per-adapter instance
+ *
+ * Returns true if error handling is required, false otherwise
+ */
+static bool ufshcd_quirk_dl_nac_errors(struct ufs_hba *hba)
+{
+	unsigned long flags;
+	bool err_handling = true;
+
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	/*
+	 * UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS only workaround the
+	 * device fatal error and/or DL NAC & REPLAY timeout errors.
+	 */
+	if (hba->saved_err & (CONTROLLER_FATAL_ERROR | SYSTEM_BUS_FATAL_ERROR))
+		goto out;
+
+	if ((hba->saved_err & DEVICE_FATAL_ERROR) ||
+	    ((hba->saved_err & UIC_ERROR) &&
+	     (hba->saved_uic_err & UFSHCD_UIC_DL_TCx_REPLAY_ERROR)))
+		goto out;
+
+	if ((hba->saved_err & UIC_ERROR) &&
+	    (hba->saved_uic_err & UFSHCD_UIC_DL_NAC_RECEIVED_ERROR)) {
+		int err;
+		/*
+		 * wait for 50ms to see if we can get any other errors or not.
+		 */
+		spin_unlock_irqrestore(hba->host->host_lock, flags);
+		msleep(50);
+		spin_lock_irqsave(hba->host->host_lock, flags);
+
+		/*
+		 * now check if we have got any other severe errors other than
+		 * DL NAC error?
+		 */
+		if ((hba->saved_err & INT_FATAL_ERRORS) ||
+		    ((hba->saved_err & UIC_ERROR) &&
+		    (hba->saved_uic_err & ~UFSHCD_UIC_DL_NAC_RECEIVED_ERROR)))
+			goto out;
+
+		/*
+		 * As DL NAC is the only error received so far, send out NOP
+		 * command to confirm if link is still active or not.
+		 *   - If we don't get any response then do error recovery.
+		 *   - If we get response then clear the DL NAC error bit.
+		 */
+
+		spin_unlock_irqrestore(hba->host->host_lock, flags);
+		err = ufshcd_verify_dev_init(hba);
+		spin_lock_irqsave(hba->host->host_lock, flags);
+
+		if (err)
+			goto out;
+
+		/* Link seems to be alive hence ignore the DL NAC errors */
+		if (hba->saved_uic_err == UFSHCD_UIC_DL_NAC_RECEIVED_ERROR)
+			hba->saved_err &= ~UIC_ERROR;
+		/* clear NAC error */
+		hba->saved_uic_err &= ~UFSHCD_UIC_DL_NAC_RECEIVED_ERROR;
+		if (!hba->saved_uic_err) {
+			err_handling = false;
+			goto out;
+		}
+	}
+out:
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+	return err_handling;
+}
+
 /**
  * ufshcd_err_handler - handle UFS errors that require s/w attention
  * @work: pointer to work structure
@@ -3685,6 +3961,7 @@
 	u32 err_tm = 0;
 	int err = 0;
 	int tag;
+	bool needs_reset = false;
 
 	hba = container_of(work, struct ufs_hba, eh_work);
 
@@ -3692,40 +3969,86 @@
 	ufshcd_hold(hba, false);
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
-	if (hba->ufshcd_state == UFSHCD_STATE_RESET) {
-		spin_unlock_irqrestore(hba->host->host_lock, flags);
+	if (hba->ufshcd_state == UFSHCD_STATE_RESET)
 		goto out;
-	}
 
 	hba->ufshcd_state = UFSHCD_STATE_RESET;
 	ufshcd_set_eh_in_progress(hba);
 
 	/* Complete requests that have door-bell cleared by h/w */
-	ufshcd_transfer_req_compl(hba);
-	ufshcd_tmc_handler(hba);
-	spin_unlock_irqrestore(hba->host->host_lock, flags);
+	ufshcd_complete_requests(hba);
 
+	if (hba->dev_quirks & UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) {
+		bool ret;
+
+		spin_unlock_irqrestore(hba->host->host_lock, flags);
+		/* release the lock as ufshcd_quirk_dl_nac_errors() may sleep */
+		ret = ufshcd_quirk_dl_nac_errors(hba);
+		spin_lock_irqsave(hba->host->host_lock, flags);
+		if (!ret)
+			goto skip_err_handling;
+	}
+	if ((hba->saved_err & INT_FATAL_ERRORS) ||
+	    ((hba->saved_err & UIC_ERROR) &&
+	    (hba->saved_uic_err & (UFSHCD_UIC_DL_PA_INIT_ERROR |
+				   UFSHCD_UIC_DL_NAC_RECEIVED_ERROR |
+				   UFSHCD_UIC_DL_TCx_REPLAY_ERROR))))
+		needs_reset = true;
+
+	/*
+	 * if host reset is required then skip clearing the pending
+	 * transfers forcefully because they will automatically get
+	 * cleared after link startup.
+	 */
+	if (needs_reset)
+		goto skip_pending_xfer_clear;
+
+	/* release lock as clear command might sleep */
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
 	/* Clear pending transfer requests */
-	for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs)
-		if (ufshcd_clear_cmd(hba, tag))
-			err_xfer |= 1 << tag;
+	for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) {
+		if (ufshcd_clear_cmd(hba, tag)) {
+			err_xfer = true;
+			goto lock_skip_pending_xfer_clear;
+		}
+	}
 
 	/* Clear pending task management requests */
-	for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs)
-		if (ufshcd_clear_tm_cmd(hba, tag))
-			err_tm |= 1 << tag;
+	for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) {
+		if (ufshcd_clear_tm_cmd(hba, tag)) {
+			err_tm = true;
+			goto lock_skip_pending_xfer_clear;
+		}
+	}
+
+lock_skip_pending_xfer_clear:
+	spin_lock_irqsave(hba->host->host_lock, flags);
 
 	/* Complete the requests that are cleared by s/w */
-	spin_lock_irqsave(hba->host->host_lock, flags);
-	ufshcd_transfer_req_compl(hba);
-	ufshcd_tmc_handler(hba);
-	spin_unlock_irqrestore(hba->host->host_lock, flags);
+	ufshcd_complete_requests(hba);
 
+	if (err_xfer || err_tm)
+		needs_reset = true;
+
+skip_pending_xfer_clear:
 	/* Fatal errors need reset */
-	if (err_xfer || err_tm || (hba->saved_err & INT_FATAL_ERRORS) ||
-			((hba->saved_err & UIC_ERROR) &&
-			 (hba->saved_uic_err & UFSHCD_UIC_DL_PA_INIT_ERROR))) {
+	if (needs_reset) {
+		unsigned long max_doorbells = (1UL << hba->nutrs) - 1;
+
+		/*
+		 * ufshcd_reset_and_restore() does the link reinitialization
+		 * which will need atleast one empty doorbell slot to send the
+		 * device management commands (NOP and query commands).
+		 * If there is no slot empty at this moment then free up last
+		 * slot forcefully.
+		 */
+		if (hba->outstanding_reqs == max_doorbells)
+			__ufshcd_transfer_req_compl(hba,
+						    (1UL << (hba->nutrs - 1)));
+
+		spin_unlock_irqrestore(hba->host->host_lock, flags);
 		err = ufshcd_reset_and_restore(hba);
+		spin_lock_irqsave(hba->host->host_lock, flags);
 		if (err) {
 			dev_err(hba->dev, "%s: reset and restore failed\n",
 					__func__);
@@ -3739,9 +4062,19 @@
 		hba->saved_err = 0;
 		hba->saved_uic_err = 0;
 	}
+
+skip_err_handling:
+	if (!needs_reset) {
+		hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+		if (hba->saved_err || hba->saved_uic_err)
+			dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x saved_uic_err 0x%x",
+			    __func__, hba->saved_err, hba->saved_uic_err);
+	}
+
 	ufshcd_clear_eh_in_progress(hba);
 
 out:
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
 	scsi_unblock_requests(hba->host);
 	ufshcd_release(hba);
 	pm_runtime_put_sync(hba->dev);
@@ -3759,6 +4092,14 @@
 	reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER);
 	if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT)
 		hba->uic_error |= UFSHCD_UIC_DL_PA_INIT_ERROR;
+	else if (hba->dev_quirks &
+		   UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) {
+		if (reg & UIC_DATA_LINK_LAYER_ERROR_NAC_RECEIVED)
+			hba->uic_error |=
+				UFSHCD_UIC_DL_NAC_RECEIVED_ERROR;
+		else if (reg & UIC_DATA_LINK_LAYER_ERROR_TCx_REPLAY_TIMEOUT)
+			hba->uic_error |= UFSHCD_UIC_DL_TCx_REPLAY_ERROR;
+	}
 
 	/* UIC NL/TL/DME errors needs software retry */
 	reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_NETWORK_LAYER);
@@ -3796,15 +4137,18 @@
 	}
 
 	if (queue_eh_work) {
+		/*
+		 * update the transfer error masks to sticky bits, let's do this
+		 * irrespective of current ufshcd_state.
+		 */
+		hba->saved_err |= hba->errors;
+		hba->saved_uic_err |= hba->uic_error;
+
 		/* handle fatal errors only when link is functional */
 		if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
 			/* block commands from scsi mid-layer */
 			scsi_block_requests(hba->host);
 
-			/* transfer error masks to sticky bits */
-			hba->saved_err |= hba->errors;
-			hba->saved_uic_err |= hba->uic_error;
-
 			hba->ufshcd_state = UFSHCD_STATE_ERROR;
 			schedule_work(&hba->eh_work);
 		}
@@ -3897,7 +4241,7 @@
 	/* poll for max. 1 sec to clear door bell register by h/w */
 	err = ufshcd_wait_for_register(hba,
 			REG_UTP_TASK_REQ_DOOR_BELL,
-			mask, 0, 1000, 1000);
+			mask, 0, 1000, 1000, true);
 out:
 	return err;
 }
@@ -4179,7 +4523,7 @@
 
 	/* Reset the host controller */
 	spin_lock_irqsave(hba->host->host_lock, flags);
-	ufshcd_hba_stop(hba);
+	ufshcd_hba_stop(hba, false);
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 
 	err = ufshcd_hba_enable(hba);
@@ -4466,6 +4810,164 @@
 	return ret;
 }
 
+static int ufs_get_device_info(struct ufs_hba *hba,
+				struct ufs_device_info *card_data)
+{
+	int err;
+	u8 model_index;
+	u8 str_desc_buf[QUERY_DESC_STRING_MAX_SIZE + 1] = {0};
+	u8 desc_buf[QUERY_DESC_DEVICE_MAX_SIZE];
+
+	err = ufshcd_read_device_desc(hba, desc_buf,
+					QUERY_DESC_DEVICE_MAX_SIZE);
+	if (err) {
+		dev_err(hba->dev, "%s: Failed reading Device Desc. err = %d\n",
+			__func__, err);
+		goto out;
+	}
+
+	/*
+	 * getting vendor (manufacturerID) and Bank Index in big endian
+	 * format
+	 */
+	card_data->wmanufacturerid = desc_buf[DEVICE_DESC_PARAM_MANF_ID] << 8 |
+				     desc_buf[DEVICE_DESC_PARAM_MANF_ID + 1];
+
+	model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME];
+
+	err = ufshcd_read_string_desc(hba, model_index, str_desc_buf,
+					QUERY_DESC_STRING_MAX_SIZE, ASCII_STD);
+	if (err) {
+		dev_err(hba->dev, "%s: Failed reading Product Name. err = %d\n",
+			__func__, err);
+		goto out;
+	}
+
+	str_desc_buf[QUERY_DESC_STRING_MAX_SIZE] = '\0';
+	strlcpy(card_data->model, (str_desc_buf + QUERY_DESC_HDR_SIZE),
+		min_t(u8, str_desc_buf[QUERY_DESC_LENGTH_OFFSET],
+		      MAX_MODEL_LEN));
+
+	/* Null terminate the model string */
+	card_data->model[MAX_MODEL_LEN] = '\0';
+
+out:
+	return err;
+}
+
+void ufs_advertise_fixup_device(struct ufs_hba *hba)
+{
+	int err;
+	struct ufs_dev_fix *f;
+	struct ufs_device_info card_data;
+
+	card_data.wmanufacturerid = 0;
+
+	err = ufs_get_device_info(hba, &card_data);
+	if (err) {
+		dev_err(hba->dev, "%s: Failed getting device info. err = %d\n",
+			__func__, err);
+		return;
+	}
+
+	for (f = ufs_fixups; f->quirk; f++) {
+		if (((f->card.wmanufacturerid == card_data.wmanufacturerid) ||
+		    (f->card.wmanufacturerid == UFS_ANY_VENDOR)) &&
+		    (STR_PRFX_EQUAL(f->card.model, card_data.model) ||
+		     !strcmp(f->card.model, UFS_ANY_MODEL)))
+			hba->dev_quirks |= f->quirk;
+	}
+}
+
+/**
+ * ufshcd_tune_pa_tactivate - Tunes PA_TActivate of local UniPro
+ * @hba: per-adapter instance
+ *
+ * PA_TActivate parameter can be tuned manually if UniPro version is less than
+ * 1.61. PA_TActivate needs to be greater than or equal to peerM-PHY's
+ * RX_MIN_ACTIVATETIME_CAPABILITY attribute. This optimal value can help reduce
+ * the hibern8 exit latency.
+ *
+ * Returns zero on success, non-zero error value on failure.
+ */
+static int ufshcd_tune_pa_tactivate(struct ufs_hba *hba)
+{
+	int ret = 0;
+	u32 peer_rx_min_activatetime = 0, tuned_pa_tactivate;
+
+	ret = ufshcd_dme_peer_get(hba,
+				  UIC_ARG_MIB_SEL(
+					RX_MIN_ACTIVATETIME_CAPABILITY,
+					UIC_ARG_MPHY_RX_GEN_SEL_INDEX(0)),
+				  &peer_rx_min_activatetime);
+	if (ret)
+		goto out;
+
+	/* make sure proper unit conversion is applied */
+	tuned_pa_tactivate =
+		((peer_rx_min_activatetime * RX_MIN_ACTIVATETIME_UNIT_US)
+		 / PA_TACTIVATE_TIME_UNIT_US);
+	ret = ufshcd_dme_set(hba, UIC_ARG_MIB(PA_TACTIVATE),
+			     tuned_pa_tactivate);
+
+out:
+	return ret;
+}
+
+/**
+ * ufshcd_tune_pa_hibern8time - Tunes PA_Hibern8Time of local UniPro
+ * @hba: per-adapter instance
+ *
+ * PA_Hibern8Time parameter can be tuned manually if UniPro version is less than
+ * 1.61. PA_Hibern8Time needs to be maximum of local M-PHY's
+ * TX_HIBERN8TIME_CAPABILITY & peer M-PHY's RX_HIBERN8TIME_CAPABILITY.
+ * This optimal value can help reduce the hibern8 exit latency.
+ *
+ * Returns zero on success, non-zero error value on failure.
+ */
+static int ufshcd_tune_pa_hibern8time(struct ufs_hba *hba)
+{
+	int ret = 0;
+	u32 local_tx_hibern8_time_cap = 0, peer_rx_hibern8_time_cap = 0;
+	u32 max_hibern8_time, tuned_pa_hibern8time;
+
+	ret = ufshcd_dme_get(hba,
+			     UIC_ARG_MIB_SEL(TX_HIBERN8TIME_CAPABILITY,
+					UIC_ARG_MPHY_TX_GEN_SEL_INDEX(0)),
+				  &local_tx_hibern8_time_cap);
+	if (ret)
+		goto out;
+
+	ret = ufshcd_dme_peer_get(hba,
+				  UIC_ARG_MIB_SEL(RX_HIBERN8TIME_CAPABILITY,
+					UIC_ARG_MPHY_RX_GEN_SEL_INDEX(0)),
+				  &peer_rx_hibern8_time_cap);
+	if (ret)
+		goto out;
+
+	max_hibern8_time = max(local_tx_hibern8_time_cap,
+			       peer_rx_hibern8_time_cap);
+	/* make sure proper unit conversion is applied */
+	tuned_pa_hibern8time = ((max_hibern8_time * HIBERN8TIME_UNIT_US)
+				/ PA_HIBERN8_TIME_UNIT_US);
+	ret = ufshcd_dme_set(hba, UIC_ARG_MIB(PA_HIBERN8TIME),
+			     tuned_pa_hibern8time);
+out:
+	return ret;
+}
+
+static void ufshcd_tune_unipro_params(struct ufs_hba *hba)
+{
+	if (ufshcd_is_unipro_pa_params_tuning_req(hba)) {
+		ufshcd_tune_pa_tactivate(hba);
+		ufshcd_tune_pa_hibern8time(hba);
+	}
+
+	if (hba->dev_quirks & UFS_DEVICE_QUIRK_PA_TACTIVATE)
+		/* set 1ms timeout for PA_TACTIVATE */
+		ufshcd_dme_set(hba, UIC_ARG_MIB(PA_TACTIVATE), 10);
+}
+
 /**
  * ufshcd_probe_hba - probe hba to detect device and initialize
  * @hba: per-adapter instance
@@ -4482,6 +4984,10 @@
 
 	ufshcd_init_pwr_info(hba);
 
+	/* set the default level for urgent bkops */
+	hba->urgent_bkops_lvl = BKOPS_STATUS_PERF_IMPACT;
+	hba->is_urgent_bkops_lvl_checked = false;
+
 	/* UniPro link is active now */
 	ufshcd_set_link_active(hba);
 
@@ -4493,6 +4999,14 @@
 	if (ret)
 		goto out;
 
+	ufs_advertise_fixup_device(hba);
+	ufshcd_tune_unipro_params(hba);
+
+	ret = ufshcd_set_vccq_rail_unused(hba,
+		(hba->dev_quirks & UFS_DEVICE_NO_VCCQ) ? true : false);
+	if (ret)
+		goto out;
+
 	/* UFS device is also active now */
 	ufshcd_set_ufs_dev_active(hba);
 	ufshcd_force_reset_auto_bkops(hba);
@@ -4567,6 +5081,41 @@
 	ufshcd_probe_hba(hba);
 }
 
+static enum blk_eh_timer_return ufshcd_eh_timed_out(struct scsi_cmnd *scmd)
+{
+	unsigned long flags;
+	struct Scsi_Host *host;
+	struct ufs_hba *hba;
+	int index;
+	bool found = false;
+
+	if (!scmd || !scmd->device || !scmd->device->host)
+		return BLK_EH_NOT_HANDLED;
+
+	host = scmd->device->host;
+	hba = shost_priv(host);
+	if (!hba)
+		return BLK_EH_NOT_HANDLED;
+
+	spin_lock_irqsave(host->host_lock, flags);
+
+	for_each_set_bit(index, &hba->outstanding_reqs, hba->nutrs) {
+		if (hba->lrb[index].cmd == scmd) {
+			found = true;
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(host->host_lock, flags);
+
+	/*
+	 * Bypass SCSI error handling and reset the block layer timer if this
+	 * SCSI command was not actually dispatched to UFS driver, otherwise
+	 * let SCSI layer handle the error as usual.
+	 */
+	return found ? BLK_EH_NOT_HANDLED : BLK_EH_RESET_TIMER;
+}
+
 static struct scsi_host_template ufshcd_driver_template = {
 	.module			= THIS_MODULE,
 	.name			= UFSHCD,
@@ -4579,6 +5128,7 @@
 	.eh_abort_handler	= ufshcd_abort,
 	.eh_device_reset_handler = ufshcd_eh_device_reset_handler,
 	.eh_host_reset_handler   = ufshcd_eh_host_reset_handler,
+	.eh_timed_out		= ufshcd_eh_timed_out,
 	.this_id		= -1,
 	.sg_tablesize		= SG_ALL,
 	.cmd_per_lun		= UFSHCD_CMD_PER_LUN,
@@ -4607,13 +5157,24 @@
 static inline int ufshcd_config_vreg_lpm(struct ufs_hba *hba,
 					 struct ufs_vreg *vreg)
 {
-	return ufshcd_config_vreg_load(hba->dev, vreg, UFS_VREG_LPM_LOAD_UA);
+	if (!vreg)
+		return 0;
+	else if (vreg->unused)
+		return 0;
+	else
+		return ufshcd_config_vreg_load(hba->dev, vreg,
+					       UFS_VREG_LPM_LOAD_UA);
 }
 
 static inline int ufshcd_config_vreg_hpm(struct ufs_hba *hba,
 					 struct ufs_vreg *vreg)
 {
-	return ufshcd_config_vreg_load(hba->dev, vreg, vreg->max_uA);
+	if (!vreg)
+		return 0;
+	else if (vreg->unused)
+		return 0;
+	else
+		return ufshcd_config_vreg_load(hba->dev, vreg, vreg->max_uA);
 }
 
 static int ufshcd_config_vreg(struct device *dev,
@@ -4648,7 +5209,9 @@
 {
 	int ret = 0;
 
-	if (!vreg || vreg->enabled)
+	if (!vreg)
+		goto out;
+	else if (vreg->enabled || vreg->unused)
 		goto out;
 
 	ret = ufshcd_config_vreg(dev, vreg, true);
@@ -4668,7 +5231,9 @@
 {
 	int ret = 0;
 
-	if (!vreg || !vreg->enabled)
+	if (!vreg)
+		goto out;
+	else if (!vreg->enabled || vreg->unused)
 		goto out;
 
 	ret = regulator_disable(vreg->reg);
@@ -4774,6 +5339,36 @@
 	return 0;
 }
 
+static int ufshcd_set_vccq_rail_unused(struct ufs_hba *hba, bool unused)
+{
+	int ret = 0;
+	struct ufs_vreg_info *info = &hba->vreg_info;
+
+	if (!info)
+		goto out;
+	else if (!info->vccq)
+		goto out;
+
+	if (unused) {
+		/* shut off the rail here */
+		ret = ufshcd_toggle_vreg(hba->dev, info->vccq, false);
+		/*
+		 * Mark this rail as no longer used, so it doesn't get enabled
+		 * later by mistake
+		 */
+		if (!ret)
+			info->vccq->unused = true;
+	} else {
+		/*
+		 * rail should have been already enabled hence just make sure
+		 * that unused flag is cleared.
+		 */
+		info->vccq->unused = false;
+	}
+out:
+	return ret;
+}
+
 static int __ufshcd_setup_clocks(struct ufs_hba *hba, bool on,
 					bool skip_ref_clk)
 {
@@ -5093,10 +5688,20 @@
 		   (!check_for_bkops || (check_for_bkops &&
 		    !hba->auto_bkops_enabled))) {
 		/*
+		 * Let's make sure that link is in low power mode, we are doing
+		 * this currently by putting the link in Hibern8. Otherway to
+		 * put the link in low power mode is to send the DME end point
+		 * to device and then send the DME reset command to local
+		 * unipro. But putting the link in hibern8 is much faster.
+		 */
+		ret = ufshcd_uic_hibern8_enter(hba);
+		if (ret)
+			goto out;
+		/*
 		 * Change controller state to "reset state" which
 		 * should also put the link in off/reset state
 		 */
-		ufshcd_hba_stop(hba);
+		ufshcd_hba_stop(hba, true);
 		/*
 		 * TODO: Check if we need any delay to make sure that
 		 * controller is reset
@@ -5111,6 +5716,16 @@
 static void ufshcd_vreg_set_lpm(struct ufs_hba *hba)
 {
 	/*
+	 * It seems some UFS devices may keep drawing more than sleep current
+	 * (atleast for 500us) from UFS rails (especially from VCCQ rail).
+	 * To avoid this situation, add 2ms delay before putting these UFS
+	 * rails in LPM mode.
+	 */
+	if (!ufshcd_is_link_active(hba) &&
+	    hba->dev_quirks & UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM)
+		usleep_range(2000, 2100);
+
+	/*
 	 * If UFS device is either in UFS_Sleep turn off VCC rail to save some
 	 * power.
 	 *
@@ -5572,7 +6187,7 @@
 	scsi_remove_host(hba->host);
 	/* disable interrupts */
 	ufshcd_disable_intr(hba, hba->intr_mask);
-	ufshcd_hba_stop(hba);
+	ufshcd_hba_stop(hba, true);
 
 	scsi_host_put(hba->host);
 
@@ -5836,6 +6451,21 @@
 	init_waitqueue_head(&hba->dev_cmd.tag_wq);
 
 	ufshcd_init_clk_gating(hba);
+
+	/*
+	 * In order to avoid any spurious interrupt immediately after
+	 * registering UFS controller interrupt handler, clear any pending UFS
+	 * interrupt status and disable all the UFS interrupts.
+	 */
+	ufshcd_writel(hba, ufshcd_readl(hba, REG_INTERRUPT_STATUS),
+		      REG_INTERRUPT_STATUS);
+	ufshcd_writel(hba, 0, REG_INTERRUPT_ENABLE);
+	/*
+	 * Make sure that UFS interrupts are disabled and any pending interrupt
+	 * status is cleared before registering UFS interrupt handler.
+	 */
+	mb();
+
 	/* IRQ registration */
 	err = devm_request_irq(dev, irq, ufshcd_intr, IRQF_SHARED, UFSHCD, hba);
 	if (err) {

diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index e3931d0..4bb6566 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h

@@ -54,6 +54,7 @@
 #include <linux/clk.h>
 #include <linux/completion.h>
 #include <linux/regulator/consumer.h>
+#include "unipro.h"
 
 #include <asm/irq.h>
 #include <asm/byteorder.h>
@@ -383,6 +384,9 @@
  * @clk_list_head: UFS host controller clocks list node head
  * @pwr_info: holds current power mode
  * @max_pwr_info: keeps the device max valid pwm
+ * @urgent_bkops_lvl: keeps track of urgent bkops level for device
+ * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
+ *  device is known or not.
  */
 struct ufs_hba {
 	void __iomem *mmio_base;
@@ -470,6 +474,9 @@
 
 	unsigned int quirks;	/* Deviations from standard UFSHCI spec. */
 
+	/* Device deviations from standard UFS device spec. */
+	unsigned int dev_quirks;
+
 	wait_queue_head_t tm_wq;
 	wait_queue_head_t tm_tag_wq;
 	unsigned long tm_condition;
@@ -509,6 +516,8 @@
 
 	bool wlun_dev_clr_ua;
 
+	/* Number of lanes available (1 or 2) for Rx/Tx */
+	u32 lanes_per_direction;
 	struct ufs_pa_layer_attr pwr_info;
 	struct ufs_pwr_mode_info max_pwr_info;
 
@@ -533,6 +542,9 @@
 	struct devfreq *devfreq;
 	struct ufs_clk_scaling clk_scaling;
 	bool is_sys_suspended;
+
+	enum bkops_status urgent_bkops_lvl;
+	bool is_urgent_bkops_lvl_checked;
 };
 
 /* Returns true if clocks can be gated. Otherwise false */
@@ -588,15 +600,9 @@
 void ufshcd_dealloc_host(struct ufs_hba *);
 int ufshcd_init(struct ufs_hba * , void __iomem * , unsigned int);
 void ufshcd_remove(struct ufs_hba *);
-
-/**
- * ufshcd_hba_stop - Send controller to reset state
- * @hba: per adapter instance
- */
-static inline void ufshcd_hba_stop(struct ufs_hba *hba)
-{
-	ufshcd_writel(hba, CONTROLLER_DISABLE,  REG_CONTROLLER_ENABLE);
-}
+int ufshcd_wait_for_register(struct ufs_hba *hba, u32 reg, u32 mask,
+				u32 val, unsigned long interval_us,
+				unsigned long timeout_ms, bool can_sleep);
 
 static inline void check_upiu_size(void)
 {
@@ -682,11 +688,27 @@
 	return ufshcd_dme_get_attr(hba, attr_sel, mib_val, DME_PEER);
 }
 
+int ufshcd_read_device_desc(struct ufs_hba *hba, u8 *buf, u32 size);
+
+static inline bool ufshcd_is_hs_mode(struct ufs_pa_layer_attr *pwr_info)
+{
+	return (pwr_info->pwr_rx == FAST_MODE ||
+		pwr_info->pwr_rx == FASTAUTO_MODE) &&
+		(pwr_info->pwr_tx == FAST_MODE ||
+		pwr_info->pwr_tx == FASTAUTO_MODE);
+}
+
+#define ASCII_STD true
+
+int ufshcd_read_string_desc(struct ufs_hba *hba, int desc_index, u8 *buf,
+				u32 size, bool ascii);
+
 /* Expose Query-Request API */
 int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode,
 	enum flag_idn idn, bool *flag_res);
 int ufshcd_hold(struct ufs_hba *hba, bool async);
 void ufshcd_release(struct ufs_hba *hba);
+u32 ufshcd_get_local_unipro_ver(struct ufs_hba *hba);
 
 /* Wrapper functions for safely calling variant operations */
 static inline const char *ufshcd_get_var_name(struct ufs_hba *hba)

diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h
index 0ae0967..4cb1cc63f 100644
--- a/drivers/scsi/ufs/ufshci.h
+++ b/drivers/scsi/ufs/ufshci.h

@@ -92,6 +92,7 @@
 	UFSHCI_VERSION_10 = 0x00010000, /* 1.0 */
 	UFSHCI_VERSION_11 = 0x00010100, /* 1.1 */
 	UFSHCI_VERSION_20 = 0x00000200, /* 2.0 */
+	UFSHCI_VERSION_21 = 0x00000210, /* 2.1 */
 };
 
 /*
@@ -170,6 +171,8 @@
 #define UIC_DATA_LINK_LAYER_ERROR		UFS_BIT(31)
 #define UIC_DATA_LINK_LAYER_ERROR_CODE_MASK	0x7FFF
 #define UIC_DATA_LINK_LAYER_ERROR_PA_INIT	0x2000
+#define UIC_DATA_LINK_LAYER_ERROR_NAC_RECEIVED	0x0001
+#define UIC_DATA_LINK_LAYER_ERROR_TCx_REPLAY_TIMEOUT 0x0002
 
 /* UECN - Host UIC Error Code Network Layer 40h */
 #define UIC_NETWORK_LAYER_ERROR			UFS_BIT(31)
@@ -209,6 +212,7 @@
 
 /* GenSelectorIndex calculation macros for M-PHY attributes */
 #define UIC_ARG_MPHY_TX_GEN_SEL_INDEX(lane) (lane)
+#define UIC_ARG_MPHY_RX_GEN_SEL_INDEX(lane) (PA_MAXDATALANES + (lane))
 
 #define UIC_ARG_MIB_SEL(attr, sel)	((((attr) & 0xFFFF) << 16) |\
 					 ((sel) & 0xFFFF))

diff --git a/drivers/scsi/ufs/unipro.h b/drivers/scsi/ufs/unipro.h
index 816a8a4..e2854e4 100644
--- a/drivers/scsi/ufs/unipro.h
+++ b/drivers/scsi/ufs/unipro.h

@@ -15,6 +15,7 @@
 /*
  * M-TX Configuration Attributes
  */
+#define TX_HIBERN8TIME_CAPABILITY		0x000F
 #define TX_MODE					0x0021
 #define TX_HSRATE_SERIES			0x0022
 #define TX_HSGEAR				0x0023
@@ -48,8 +49,12 @@
 #define RX_ENTER_HIBERN8			0x00A7
 #define RX_BYPASS_8B10B_ENABLE			0x00A8
 #define RX_TERMINATION_FORCE_ENABLE		0x0089
+#define RX_MIN_ACTIVATETIME_CAPABILITY		0x008F
+#define RX_HIBERN8TIME_CAPABILITY		0x0092
 
 #define is_mphy_tx_attr(attr)			(attr < RX_MODE)
+#define RX_MIN_ACTIVATETIME_UNIT_US		100
+#define HIBERN8TIME_UNIT_US			100
 /*
  * PHY Adpater attributes
  */
@@ -70,6 +75,7 @@
 #define PA_MAXRXSPEEDFAST	0x1541
 #define PA_MAXRXSPEEDSLOW	0x1542
 #define PA_TXLINKSTARTUPHS	0x1544
+#define PA_LOCAL_TX_LCC_ENABLE	0x155E
 #define PA_TXSPEEDFAST		0x1565
 #define PA_TXSPEEDSLOW		0x1566
 #define PA_REMOTEVERINFO	0x15A0
@@ -110,6 +116,12 @@
 #define PA_STALLNOCONFIGTIME	0x15A3
 #define PA_SAVECONFIGTIME	0x15A4
 
+#define PA_TACTIVATE_TIME_UNIT_US	10
+#define PA_HIBERN8_TIME_UNIT_US		100
+
+/* PHY Adapter Protocol Constants */
+#define PA_MAXDATALANES	4
+
 /* PA power modes */
 enum {
 	FAST_MODE	= 1,
@@ -143,6 +155,16 @@
 	UFS_HS_G3,		/* HS Gear 3 */
 };
 
+enum ufs_unipro_ver {
+	UFS_UNIPRO_VER_RESERVED = 0,
+	UFS_UNIPRO_VER_1_40 = 1, /* UniPro version 1.40 */
+	UFS_UNIPRO_VER_1_41 = 2, /* UniPro version 1.41 */
+	UFS_UNIPRO_VER_1_6 = 3,  /* UniPro version 1.6 */
+	UFS_UNIPRO_VER_MAX = 4,  /* UniPro unsupported version */
+	/* UniPro version field mask in PA_LOCALVERINFO */
+	UFS_UNIPRO_VER_MASK = 0xF,
+};
+
 /*
  * Data Link Layer Attributes
  */

diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index e7a19be..50769078 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c

@@ -211,11 +211,15 @@
 			 struct spi_transfer *transfer)
 {
 	struct spi_imx_data *spi_imx = spi_master_get_devdata(master);
-	unsigned int bpw = transfer->bits_per_word;
+	unsigned int bpw;
 
 	if (!master->dma_rx)
 		return false;
 
+	if (!transfer)
+		return false;
+
+	bpw = transfer->bits_per_word;
 	if (!bpw)
 		bpw = spi->bits_per_word;
 
@@ -333,8 +337,9 @@
 static int __maybe_unused mx51_ecspi_config(struct spi_imx_data *spi_imx,
 		struct spi_imx_config *config)
 {
-	u32 ctrl = MX51_ECSPI_CTRL_ENABLE, cfg = 0;
+	u32 ctrl = MX51_ECSPI_CTRL_ENABLE;
 	u32 clk = config->speed_hz, delay, reg;
+	u32 cfg = readl(spi_imx->base + MX51_ECSPI_CONFIG);
 
 	/*
 	 * The hardware seems to have a race condition when changing modes. The
@@ -358,13 +363,20 @@
 
 	if (config->mode & SPI_CPHA)
 		cfg |= MX51_ECSPI_CONFIG_SCLKPHA(config->cs);
+	else
+		cfg &= ~MX51_ECSPI_CONFIG_SCLKPHA(config->cs);
 
 	if (config->mode & SPI_CPOL) {
 		cfg |= MX51_ECSPI_CONFIG_SCLKPOL(config->cs);
 		cfg |= MX51_ECSPI_CONFIG_SCLKCTL(config->cs);
+	} else {
+		cfg &= ~MX51_ECSPI_CONFIG_SCLKPOL(config->cs);
+		cfg &= ~MX51_ECSPI_CONFIG_SCLKCTL(config->cs);
 	}
 	if (config->mode & SPI_CS_HIGH)
 		cfg |= MX51_ECSPI_CONFIG_SSBPOL(config->cs);
+	else
+		cfg &= ~MX51_ECSPI_CONFIG_SSBPOL(config->cs);
 
 	if (spi_imx->usedma)
 		ctrl |= MX51_ECSPI_CTRL_SMC;

diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 0caa3c8..43a02e3 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c

@@ -423,16 +423,12 @@
 
 	if (mcspi_dma->dma_tx) {
 		struct dma_async_tx_descriptor *tx;
-		struct scatterlist sg;
 
 		dmaengine_slave_config(mcspi_dma->dma_tx, &cfg);
 
-		sg_init_table(&sg, 1);
-		sg_dma_address(&sg) = xfer->tx_dma;
-		sg_dma_len(&sg) = xfer->len;
-
-		tx = dmaengine_prep_slave_sg(mcspi_dma->dma_tx, &sg, 1,
-		DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+		tx = dmaengine_prep_slave_sg(mcspi_dma->dma_tx, xfer->tx_sg.sgl,
+					     xfer->tx_sg.nents, DMA_MEM_TO_DEV,
+					     DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 		if (tx) {
 			tx->callback = omap2_mcspi_tx_callback;
 			tx->callback_param = spi;
@@ -478,20 +474,15 @@
 
 	if (mcspi_dma->dma_rx) {
 		struct dma_async_tx_descriptor *tx;
-		struct scatterlist sg;
 
 		dmaengine_slave_config(mcspi_dma->dma_rx, &cfg);
 
 		if ((l & OMAP2_MCSPI_CHCONF_TURBO) && mcspi->fifo_depth == 0)
 			dma_count -= es;
 
-		sg_init_table(&sg, 1);
-		sg_dma_address(&sg) = xfer->rx_dma;
-		sg_dma_len(&sg) = dma_count;
-
-		tx = dmaengine_prep_slave_sg(mcspi_dma->dma_rx, &sg, 1,
-				DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT |
-				DMA_CTRL_ACK);
+		tx = dmaengine_prep_slave_sg(mcspi_dma->dma_rx, xfer->rx_sg.sgl,
+					     xfer->rx_sg.nents, DMA_DEV_TO_MEM,
+					     DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 		if (tx) {
 			tx->callback = omap2_mcspi_rx_callback;
 			tx->callback_param = spi;
@@ -505,8 +496,6 @@
 	omap2_mcspi_set_dma_req(spi, 1, 1);
 
 	wait_for_completion(&mcspi_dma->dma_rx_completion);
-	dma_unmap_single(mcspi->dev, xfer->rx_dma, count,
-			 DMA_FROM_DEVICE);
 
 	if (mcspi->fifo_depth > 0)
 		return count;
@@ -619,8 +608,6 @@
 
 	if (tx != NULL) {
 		wait_for_completion(&mcspi_dma->dma_tx_completion);
-		dma_unmap_single(mcspi->dev, xfer->tx_dma, xfer->len,
-				 DMA_TO_DEVICE);
 
 		if (mcspi->fifo_depth > 0) {
 			irqstat_reg = mcspi->base + OMAP2_MCSPI_IRQSTATUS;
@@ -1087,6 +1074,16 @@
 		gpio_free(spi->cs_gpio);
 }
 
+static bool omap2_mcspi_can_dma(struct spi_master *master,
+				struct spi_device *spi,
+				struct spi_transfer *xfer)
+{
+	if (xfer->len < DMA_MIN_BYTES)
+		return false;
+
+	return true;
+}
+
 static int omap2_mcspi_work_one(struct omap2_mcspi *mcspi,
 		struct spi_device *spi, struct spi_transfer *t)
 {
@@ -1268,32 +1265,6 @@
 		return -EINVAL;
 	}
 
-	if (len < DMA_MIN_BYTES)
-		goto skip_dma_map;
-
-	if (mcspi_dma->dma_tx && tx_buf != NULL) {
-		t->tx_dma = dma_map_single(mcspi->dev, (void *) tx_buf,
-				len, DMA_TO_DEVICE);
-		if (dma_mapping_error(mcspi->dev, t->tx_dma)) {
-			dev_dbg(mcspi->dev, "dma %cX %d bytes error\n",
-					'T', len);
-			return -EINVAL;
-		}
-	}
-	if (mcspi_dma->dma_rx && rx_buf != NULL) {
-		t->rx_dma = dma_map_single(mcspi->dev, rx_buf, t->len,
-				DMA_FROM_DEVICE);
-		if (dma_mapping_error(mcspi->dev, t->rx_dma)) {
-			dev_dbg(mcspi->dev, "dma %cX %d bytes error\n",
-					'R', len);
-			if (tx_buf != NULL)
-				dma_unmap_single(mcspi->dev, t->tx_dma,
-						len, DMA_TO_DEVICE);
-			return -EINVAL;
-		}
-	}
-
-skip_dma_map:
 	return omap2_mcspi_work_one(mcspi, spi, t);
 }
 
@@ -1377,6 +1348,7 @@
 	master->transfer_one = omap2_mcspi_transfer_one;
 	master->set_cs = omap2_mcspi_set_cs;
 	master->cleanup = omap2_mcspi_cleanup;
+	master->can_dma = omap2_mcspi_can_dma;
 	master->dev.of_node = node;
 	master->max_speed_hz = OMAP2_MCSPI_MAX_FREQ;
 	master->min_speed_hz = OMAP2_MCSPI_MAX_FREQ >> 15;

diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 8f50a40..6c6c001 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c

@@ -534,7 +534,7 @@
 	if (WARN_ON(rs->speed > MAX_SCLK_OUT))
 		rs->speed = MAX_SCLK_OUT;
 
-	/* the minimum divsor is 2 */
+	/* the minimum divisor is 2 */
 	if (rs->max_freq < 2 * rs->speed) {
 		clk_set_rate(rs->spiclk, 2 * rs->speed);
 		rs->max_freq = clk_get_rate(rs->spiclk);
@@ -730,23 +730,27 @@
 	master->transfer_one = rockchip_spi_transfer_one;
 	master->handle_err = rockchip_spi_handle_err;
 
-	rs->dma_tx.ch = dma_request_slave_channel(rs->dev, "tx");
-	if (IS_ERR_OR_NULL(rs->dma_tx.ch)) {
+	rs->dma_tx.ch = dma_request_chan(rs->dev, "tx");
+	if (IS_ERR(rs->dma_tx.ch)) {
 		/* Check tx to see if we need defer probing driver */
 		if (PTR_ERR(rs->dma_tx.ch) == -EPROBE_DEFER) {
 			ret = -EPROBE_DEFER;
 			goto err_get_fifo_len;
 		}
 		dev_warn(rs->dev, "Failed to request TX DMA channel\n");
+		rs->dma_tx.ch = NULL;
 	}
 
-	rs->dma_rx.ch = dma_request_slave_channel(rs->dev, "rx");
-	if (!rs->dma_rx.ch) {
-		if (rs->dma_tx.ch) {
+	rs->dma_rx.ch = dma_request_chan(rs->dev, "rx");
+	if (IS_ERR(rs->dma_rx.ch)) {
+		if (PTR_ERR(rs->dma_rx.ch) == -EPROBE_DEFER) {
 			dma_release_channel(rs->dma_tx.ch);
 			rs->dma_tx.ch = NULL;
+			ret = -EPROBE_DEFER;
+			goto err_get_fifo_len;
 		}
 		dev_warn(rs->dev, "Failed to request RX DMA channel\n");
+		rs->dma_rx.ch = NULL;
 	}
 
 	if (rs->dma_tx.ch && rs->dma_rx.ch) {

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index de2f2f9..0239b45 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c

@@ -1209,7 +1209,7 @@
 	struct spi_master *master =
 		container_of(work, struct spi_master, pump_messages);
 
-	__spi_pump_messages(master, true, false);
+	__spi_pump_messages(master, true, master->bus_lock_flag);
 }
 
 static int spi_init_queue(struct spi_master *master)
@@ -2853,7 +2853,7 @@
  */
 int spi_sync(struct spi_device *spi, struct spi_message *message)
 {
-	return __spi_sync(spi, message, 0);
+	return __spi_sync(spi, message, spi->master->bus_lock_flag);
 }
 EXPORT_SYMBOL_GPL(spi_sync);
 

diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index f0ca4a1..cf84581 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig

@@ -60,8 +60,6 @@
 
 source "drivers/staging/speakup/Kconfig"
 
-source "drivers/staging/ste_rmi4/Kconfig"
-
 source "drivers/staging/nvec/Kconfig"
 
 source "drivers/staging/media/Kconfig"

diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 22464a0..7d6448d 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile

@@ -21,7 +21,6 @@
 obj-$(CONFIG_FB_XGI)		+= xgifb/
 obj-$(CONFIG_USB_EMXX)		+= emxx_udc/
 obj-$(CONFIG_SPEAKUP)		+= speakup/
-obj-$(CONFIG_TOUCHSCREEN_SYNAPTICS_I2C_RMI4)	+= ste_rmi4/
 obj-$(CONFIG_MFD_NVEC)		+= nvec/
 obj-$(CONFIG_STAGING_RDMA)	+= rdma/
 obj-$(CONFIG_ANDROID)		+= android/

diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index bceb813..8536567 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c

@@ -1141,14 +1141,16 @@
 	return PTR_ERR_OR_ZERO(vaddr);
 }
 
-static void ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
-				       enum dma_data_direction direction)
+static int ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
+				      enum dma_data_direction direction)
 {
 	struct ion_buffer *buffer = dmabuf->priv;
 
 	mutex_lock(&buffer->lock);
 	ion_buffer_kmap_put(buffer);
 	mutex_unlock(&buffer->lock);
+
+	return 0;
 }
 
 static struct dma_buf_ops dma_buf_ops = {

diff --git a/drivers/staging/android/ion/ion_test.c b/drivers/staging/android/ion/ion_test.c
index da34bc12..83a3af0 100644
--- a/drivers/staging/android/ion/ion_test.c
+++ b/drivers/staging/android/ion/ion_test.c

@@ -285,8 +285,8 @@
 {
 	ion_test_pdev = platform_device_register_simple("ion-test",
 							-1, NULL, 0);
-	if (!ion_test_pdev)
-		return -ENODEV;
+	if (IS_ERR(ion_test_pdev))
+		return PTR_ERR(ion_test_pdev);
 
 	return platform_driver_probe(&ion_test_platform_driver, ion_test_probe);
 }

diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c
index d1226c9..dcaf7e8 100644
--- a/drivers/staging/comedi/drivers/ni_mio_common.c
+++ b/drivers/staging/comedi/drivers/ni_mio_common.c

@@ -246,24 +246,24 @@
 {
 	if (dev->mmio)
 		writel(data, dev->mmio + reg);
-
-	outl(data, dev->iobase + reg);
+	else
+		outl(data, dev->iobase + reg);
 }
 
 static void ni_writew(struct comedi_device *dev, uint16_t data, int reg)
 {
 	if (dev->mmio)
 		writew(data, dev->mmio + reg);
-
-	outw(data, dev->iobase + reg);
+	else
+		outw(data, dev->iobase + reg);
 }
 
 static void ni_writeb(struct comedi_device *dev, uint8_t data, int reg)
 {
 	if (dev->mmio)
 		writeb(data, dev->mmio + reg);
-
-	outb(data, dev->iobase + reg);
+	else
+		outb(data, dev->iobase + reg);
 }
 
 static uint32_t ni_readl(struct comedi_device *dev, int reg)

diff --git a/drivers/staging/fsl-mc/bus/mc-bus.c b/drivers/staging/fsl-mc/bus/mc-bus.c
index 9f77c37b..b594556 100644
--- a/drivers/staging/fsl-mc/bus/mc-bus.c
+++ b/drivers/staging/fsl-mc/bus/mc-bus.c

@@ -260,14 +260,14 @@
 
 	error = dprc_open(mc_io, 0, container_id, &dprc_handle);
 	if (error < 0) {
-		dev_err(&mc_io->dev, "dprc_open() failed: %d\n", error);
+		dev_err(mc_io->dev, "dprc_open() failed: %d\n", error);
 		return error;
 	}
 
 	memset(&attr, 0, sizeof(attr));
 	error = dprc_get_attributes(mc_io, 0, dprc_handle, &attr);
 	if (error < 0) {
-		dev_err(&mc_io->dev, "dprc_get_attributes() failed: %d\n",
+		dev_err(mc_io->dev, "dprc_get_attributes() failed: %d\n",
 			error);
 		goto common_cleanup;
 	}

diff --git a/drivers/staging/fsl-mc/bus/mc-sys.c b/drivers/staging/fsl-mc/bus/mc-sys.c
index 8101c46..810a611 100644
--- a/drivers/staging/fsl-mc/bus/mc-sys.c
+++ b/drivers/staging/fsl-mc/bus/mc-sys.c

@@ -328,7 +328,7 @@
 			     MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS);
 
 		if (time_after_eq(jiffies, jiffies_until_timeout)) {
-			dev_dbg(&mc_io->dev,
+			dev_dbg(mc_io->dev,
 				"MC command timed out (portal: %#llx, obj handle: %#x, command: %#x)\n",
 				 mc_io->portal_phys_addr,
 				 (unsigned int)
@@ -370,7 +370,7 @@
 		udelay(MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS);
 		timeout_usecs -= MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS;
 		if (timeout_usecs == 0) {
-			dev_dbg(&mc_io->dev,
+			dev_dbg(mc_io->dev,
 				"MC command timed out (portal: %#llx, obj handle: %#x, command: %#x)\n",
 				 mc_io->portal_phys_addr,
 				 (unsigned int)
@@ -426,7 +426,7 @@
 		goto common_exit;
 
 	if (status != MC_CMD_STATUS_OK) {
-		dev_dbg(&mc_io->dev,
+		dev_dbg(mc_io->dev,
 			"MC command failed: portal: %#llx, obj handle: %#x, command: %#x, status: %s (%#x)\n",
 			 mc_io->portal_phys_addr,
 			 (unsigned int)MC_CMD_HDR_READ_TOKEN(cmd->header),

diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
index dab4862..1333543 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h

@@ -88,7 +88,7 @@
 } while (0)
 
 #ifndef LIBCFS_VMALLOC_SIZE
-#define LIBCFS_VMALLOC_SIZE	(2 << PAGE_CACHE_SHIFT) /* 2 pages */
+#define LIBCFS_VMALLOC_SIZE	(2 << PAGE_SHIFT) /* 2 pages */
 #endif
 
 #define LIBCFS_ALLOC_PRE(size, mask)					    \

diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
index 0f2fd79..837eb22 100644
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h

@@ -57,7 +57,7 @@
 #include "../libcfs_cpu.h"
 #endif
 
-#define CFS_PAGE_MASK		   (~((__u64)PAGE_CACHE_SIZE-1))
+#define CFS_PAGE_MASK		   (~((__u64)PAGE_SIZE-1))
 #define page_index(p)       ((p)->index)
 
 #define memory_pressure_get() (current->flags & PF_MEMALLOC)
@@ -67,7 +67,7 @@
 #if BITS_PER_LONG == 32
 /* limit to lowmem on 32-bit systems */
 #define NUM_CACHEPAGES \
-	min(totalram_pages, 1UL << (30 - PAGE_CACHE_SHIFT) * 3 / 4)
+	min(totalram_pages, 1UL << (30 - PAGE_SHIFT) * 3 / 4)
 #else
 #define NUM_CACHEPAGES totalram_pages
 #endif

diff --git a/drivers/staging/lustre/include/linux/lnet/types.h b/drivers/staging/lustre/include/linux/lnet/types.h
index 08f193c..1c679cb 100644
--- a/drivers/staging/lustre/include/linux/lnet/types.h
+++ b/drivers/staging/lustre/include/linux/lnet/types.h

@@ -514,7 +514,7 @@
 	/**
 	 * Starting offset of the fragment within the page. Note that the
 	 * end of the fragment must not pass the end of the page; i.e.,
-	 * kiov_len + kiov_offset <= PAGE_CACHE_SIZE.
+	 * kiov_len + kiov_offset <= PAGE_SIZE.
 	 */
 	unsigned int	 kiov_offset;
 } lnet_kiov_t;

diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
index 3e1f24e..d4ce06d 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c

@@ -291,7 +291,7 @@
 
 	for (nob = i = 0; i < niov; i++) {
 		if ((kiov[i].kiov_offset && i > 0) ||
-		    (kiov[i].kiov_offset + kiov[i].kiov_len != PAGE_CACHE_SIZE && i < niov - 1))
+		    (kiov[i].kiov_offset + kiov[i].kiov_len != PAGE_SIZE && i < niov - 1))
 			return NULL;
 
 		pages[i] = kiov[i].kiov_page;

diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c
index c90e510..c3d628b 100644
--- a/drivers/staging/lustre/lnet/libcfs/debug.c
+++ b/drivers/staging/lustre/lnet/libcfs/debug.c

@@ -517,7 +517,7 @@
 		max = TCD_MAX_PAGES;
 	} else {
 		max = max / num_possible_cpus();
-		max <<= (20 - PAGE_CACHE_SHIFT);
+		max <<= (20 - PAGE_SHIFT);
 	}
 	rc = cfs_tracefile_init(max);
 

diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.c b/drivers/staging/lustre/lnet/libcfs/tracefile.c
index ec3bc04..244eb89 100644
--- a/drivers/staging/lustre/lnet/libcfs/tracefile.c
+++ b/drivers/staging/lustre/lnet/libcfs/tracefile.c

@@ -182,7 +182,7 @@
 	if (tcd->tcd_cur_pages > 0) {
 		__LASSERT(!list_empty(&tcd->tcd_pages));
 		tage = cfs_tage_from_list(tcd->tcd_pages.prev);
-		if (tage->used + len <= PAGE_CACHE_SIZE)
+		if (tage->used + len <= PAGE_SIZE)
 			return tage;
 	}
 
@@ -260,7 +260,7 @@
 	 * from here: this will lead to infinite recursion.
 	 */
 
-	if (len > PAGE_CACHE_SIZE) {
+	if (len > PAGE_SIZE) {
 		pr_err("cowardly refusing to write %lu bytes in a page\n", len);
 		return NULL;
 	}
@@ -349,7 +349,7 @@
 	for (i = 0; i < 2; i++) {
 		tage = cfs_trace_get_tage(tcd, needed + known_size + 1);
 		if (!tage) {
-			if (needed + known_size > PAGE_CACHE_SIZE)
+			if (needed + known_size > PAGE_SIZE)
 				mask |= D_ERROR;
 
 			cfs_trace_put_tcd(tcd);
@@ -360,7 +360,7 @@
 		string_buf = (char *)page_address(tage->page) +
 					tage->used + known_size;
 
-		max_nob = PAGE_CACHE_SIZE - tage->used - known_size;
+		max_nob = PAGE_SIZE - tage->used - known_size;
 		if (max_nob <= 0) {
 			printk(KERN_EMERG "negative max_nob: %d\n",
 			       max_nob);
@@ -424,7 +424,7 @@
 	__LASSERT(debug_buf == string_buf);
 
 	tage->used += needed;
-	__LASSERT(tage->used <= PAGE_CACHE_SIZE);
+	__LASSERT(tage->used <= PAGE_SIZE);
 
 console:
 	if ((mask & libcfs_printk) == 0) {
@@ -835,7 +835,7 @@
 
 int cfs_trace_allocate_string_buffer(char **str, int nob)
 {
-	if (nob > 2 * PAGE_CACHE_SIZE)	    /* string must be "sensible" */
+	if (nob > 2 * PAGE_SIZE)	    /* string must be "sensible" */
 		return -EINVAL;
 
 	*str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO);
@@ -951,7 +951,7 @@
 	}
 
 	mb /= num_possible_cpus();
-	pages = mb << (20 - PAGE_CACHE_SHIFT);
+	pages = mb << (20 - PAGE_SHIFT);
 
 	cfs_tracefile_write_lock();
 
@@ -977,7 +977,7 @@
 
 	cfs_tracefile_read_unlock();
 
-	return (total_pages >> (20 - PAGE_CACHE_SHIFT)) + 1;
+	return (total_pages >> (20 - PAGE_SHIFT)) + 1;
 }
 
 static int tracefiled(void *arg)

diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.h b/drivers/staging/lustre/lnet/libcfs/tracefile.h
index 4c77f90..ac84e7f 100644
--- a/drivers/staging/lustre/lnet/libcfs/tracefile.h
+++ b/drivers/staging/lustre/lnet/libcfs/tracefile.h

@@ -87,7 +87,7 @@
 extern int  libcfs_panic_in_progress;
 int cfs_trace_max_debug_mb(void);
 
-#define TCD_MAX_PAGES (5 << (20 - PAGE_CACHE_SHIFT))
+#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
 #define TCD_STOCK_PAGES (TCD_MAX_PAGES)
 #define CFS_TRACEFILE_SIZE (500 << 20)
 
@@ -96,7 +96,7 @@
 /*
  * Private declare for tracefile
  */
-#define TCD_MAX_PAGES (5 << (20 - PAGE_CACHE_SHIFT))
+#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
 #define TCD_STOCK_PAGES (TCD_MAX_PAGES)
 
 #define CFS_TRACEFILE_SIZE (500 << 20)
@@ -257,7 +257,7 @@
 do {								    \
 	__LASSERT(tage);					\
 	__LASSERT(tage->page);				  \
-	__LASSERT(tage->used <= PAGE_CACHE_SIZE);			 \
+	__LASSERT(tage->used <= PAGE_SIZE);			 \
 	__LASSERT(page_count(tage->page) > 0);		      \
 } while (0)
 

diff --git a/drivers/staging/lustre/lnet/lnet/lib-md.c b/drivers/staging/lustre/lnet/lnet/lib-md.c
index c74514f..75d3121 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-md.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-md.c

@@ -139,7 +139,7 @@
 		for (i = 0; i < (int)niov; i++) {
 			/* We take the page pointer on trust */
 			if (lmd->md_iov.kiov[i].kiov_offset +
-			    lmd->md_iov.kiov[i].kiov_len > PAGE_CACHE_SIZE)
+			    lmd->md_iov.kiov[i].kiov_len > PAGE_SIZE)
 				return -EINVAL; /* invalid length */
 
 			total_length += lmd->md_iov.kiov[i].kiov_len;

diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c
index 0009a8d..f19aa93 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-move.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-move.c

@@ -549,12 +549,12 @@
 		if (len <= frag_len) {
 			dst->kiov_len = len;
 			LASSERT(dst->kiov_offset + dst->kiov_len
-					<= PAGE_CACHE_SIZE);
+					<= PAGE_SIZE);
 			return niov;
 		}
 
 		dst->kiov_len = frag_len;
-		LASSERT(dst->kiov_offset + dst->kiov_len <= PAGE_CACHE_SIZE);
+		LASSERT(dst->kiov_offset + dst->kiov_len <= PAGE_SIZE);
 
 		len -= frag_len;
 		dst++;
@@ -887,7 +887,7 @@
 	rbp = &the_lnet.ln_rtrpools[cpt][0];
 
 	LASSERT(msg->msg_len <= LNET_MTU);
-	while (msg->msg_len > (unsigned int)rbp->rbp_npages * PAGE_CACHE_SIZE) {
+	while (msg->msg_len > (unsigned int)rbp->rbp_npages * PAGE_SIZE) {
 		rbp++;
 		LASSERT(rbp < &the_lnet.ln_rtrpools[cpt][LNET_NRBPOOLS]);
 	}

diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c
index cc0c275..891fd59 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c

@@ -166,9 +166,9 @@
 	nalloc = 16;	/* first guess at max interfaces */
 	toobig = 0;
 	for (;;) {
-		if (nalloc * sizeof(*ifr) > PAGE_CACHE_SIZE) {
+		if (nalloc * sizeof(*ifr) > PAGE_SIZE) {
 			toobig = 1;
-			nalloc = PAGE_CACHE_SIZE / sizeof(*ifr);
+			nalloc = PAGE_SIZE / sizeof(*ifr);
 			CWARN("Too many interfaces: only enumerating first %d\n",
 			      nalloc);
 		}

diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c
index 61459cf..b01dc42 100644
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ b/drivers/staging/lustre/lnet/lnet/router.c

@@ -27,8 +27,8 @@
 #define LNET_NRB_SMALL_PAGES	1
 #define LNET_NRB_LARGE_MIN	256	/* min value for each CPT */
 #define LNET_NRB_LARGE		(LNET_NRB_LARGE_MIN * 4)
-#define LNET_NRB_LARGE_PAGES   ((LNET_MTU + PAGE_CACHE_SIZE - 1) >> \
-				 PAGE_CACHE_SHIFT)
+#define LNET_NRB_LARGE_PAGES   ((LNET_MTU + PAGE_SIZE - 1) >> \
+				 PAGE_SHIFT)
 
 static char *forwarding = "";
 module_param(forwarding, charp, 0444);
@@ -1338,7 +1338,7 @@
 			return NULL;
 		}
 
-		rb->rb_kiov[i].kiov_len = PAGE_CACHE_SIZE;
+		rb->rb_kiov[i].kiov_len = PAGE_SIZE;
 		rb->rb_kiov[i].kiov_offset = 0;
 		rb->rb_kiov[i].kiov_page = page;
 	}

diff --git a/drivers/staging/lustre/lnet/selftest/brw_test.c b/drivers/staging/lustre/lnet/selftest/brw_test.c
index eebc924..dcb6e50 100644
--- a/drivers/staging/lustre/lnet/selftest/brw_test.c
+++ b/drivers/staging/lustre/lnet/selftest/brw_test.c

@@ -90,7 +90,7 @@
 		 * NB: this is not going to work for variable page size,
 		 * but we have to keep it for compatibility
 		 */
-		len = npg * PAGE_CACHE_SIZE;
+		len = npg * PAGE_SIZE;
 
 	} else {
 		test_bulk_req_v1_t *breq = &tsi->tsi_u.bulk_v1;
@@ -104,7 +104,7 @@
 		opc = breq->blk_opc;
 		flags = breq->blk_flags;
 		len = breq->blk_len;
-		npg = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+		npg = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	}
 
 	if (npg > LNET_MAX_IOV || npg <= 0)
@@ -167,13 +167,13 @@
 
 	if (pattern == LST_BRW_CHECK_SIMPLE) {
 		memcpy(addr, &magic, BRW_MSIZE);
-		addr += PAGE_CACHE_SIZE - BRW_MSIZE;
+		addr += PAGE_SIZE - BRW_MSIZE;
 		memcpy(addr, &magic, BRW_MSIZE);
 		return;
 	}
 
 	if (pattern == LST_BRW_CHECK_FULL) {
-		for (i = 0; i < PAGE_CACHE_SIZE / BRW_MSIZE; i++)
+		for (i = 0; i < PAGE_SIZE / BRW_MSIZE; i++)
 			memcpy(addr + i * BRW_MSIZE, &magic, BRW_MSIZE);
 		return;
 	}
@@ -198,7 +198,7 @@
 		if (data != magic)
 			goto bad_data;
 
-		addr += PAGE_CACHE_SIZE - BRW_MSIZE;
+		addr += PAGE_SIZE - BRW_MSIZE;
 		data = *((__u64 *)addr);
 		if (data != magic)
 			goto bad_data;
@@ -207,7 +207,7 @@
 	}
 
 	if (pattern == LST_BRW_CHECK_FULL) {
-		for (i = 0; i < PAGE_CACHE_SIZE / BRW_MSIZE; i++) {
+		for (i = 0; i < PAGE_SIZE / BRW_MSIZE; i++) {
 			data = *(((__u64 *)addr) + i);
 			if (data != magic)
 				goto bad_data;
@@ -278,7 +278,7 @@
 		opc = breq->blk_opc;
 		flags = breq->blk_flags;
 		npg = breq->blk_npg;
-		len = npg * PAGE_CACHE_SIZE;
+		len = npg * PAGE_SIZE;
 
 	} else {
 		test_bulk_req_v1_t *breq = &tsi->tsi_u.bulk_v1;
@@ -292,7 +292,7 @@
 		opc = breq->blk_opc;
 		flags = breq->blk_flags;
 		len = breq->blk_len;
-		npg = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+		npg = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	}
 
 	rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, npg, len, &rpc);
@@ -463,10 +463,10 @@
 			reply->brw_status = EINVAL;
 			return 0;
 		}
-		npg = reqst->brw_len >> PAGE_CACHE_SHIFT;
+		npg = reqst->brw_len >> PAGE_SHIFT;
 
 	} else {
-		npg = (reqst->brw_len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+		npg = (reqst->brw_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	}
 
 	replymsg->msg_ses_feats = reqstmsg->msg_ses_feats;

diff --git a/drivers/staging/lustre/lnet/selftest/conctl.c b/drivers/staging/lustre/lnet/selftest/conctl.c
index 5c7cb72..79ee6c0 100644
--- a/drivers/staging/lustre/lnet/selftest/conctl.c
+++ b/drivers/staging/lustre/lnet/selftest/conctl.c

@@ -743,7 +743,7 @@
 	if (args->lstio_tes_param &&
 	    (args->lstio_tes_param_len <= 0 ||
 	     args->lstio_tes_param_len >
-	     PAGE_CACHE_SIZE - sizeof(lstcon_test_t)))
+	     PAGE_SIZE - sizeof(lstcon_test_t)))
 		return -EINVAL;
 
 	LIBCFS_ALLOC(batch_name, args->lstio_tes_bat_nmlen + 1);
@@ -819,7 +819,7 @@
 
 	opc = data->ioc_u32[0];
 
-	if (data->ioc_plen1 > PAGE_CACHE_SIZE)
+	if (data->ioc_plen1 > PAGE_SIZE)
 		return -EINVAL;
 
 	LIBCFS_ALLOC(buf, data->ioc_plen1);

diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c
index bcd7888..35a227d 100644
--- a/drivers/staging/lustre/lnet/selftest/conrpc.c
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.c

@@ -786,8 +786,8 @@
 	test_bulk_req_t *brq = &req->tsr_u.bulk_v0;
 
 	brq->blk_opc = param->blk_opc;
-	brq->blk_npg = (param->blk_size + PAGE_CACHE_SIZE - 1) /
-			PAGE_CACHE_SIZE;
+	brq->blk_npg = (param->blk_size + PAGE_SIZE - 1) /
+			PAGE_SIZE;
 	brq->blk_flags = param->blk_flags;
 
 	return 0;
@@ -822,7 +822,7 @@
 	if (transop == LST_TRANS_TSBCLIADD) {
 		npg = sfw_id_pages(test->tes_span);
 		nob = !(feats & LST_FEAT_BULK_LEN) ?
-		      npg * PAGE_CACHE_SIZE :
+		      npg * PAGE_SIZE :
 		      sizeof(lnet_process_id_packed_t) * test->tes_span;
 	}
 
@@ -851,8 +851,8 @@
 			LASSERT(nob > 0);
 
 			len = !(feats & LST_FEAT_BULK_LEN) ?
-			      PAGE_CACHE_SIZE :
-			      min_t(int, nob, PAGE_CACHE_SIZE);
+			      PAGE_SIZE :
+			      min_t(int, nob, PAGE_SIZE);
 			nob -= len;
 
 			bulk->bk_iovs[i].kiov_offset = 0;

diff --git a/drivers/staging/lustre/lnet/selftest/framework.c b/drivers/staging/lustre/lnet/selftest/framework.c
index 926c397..e2c5323 100644
--- a/drivers/staging/lustre/lnet/selftest/framework.c
+++ b/drivers/staging/lustre/lnet/selftest/framework.c

@@ -1161,7 +1161,7 @@
 		int len;
 
 		if (!(sn->sn_features & LST_FEAT_BULK_LEN)) {
-			len = npg * PAGE_CACHE_SIZE;
+			len = npg * PAGE_SIZE;
 
 		} else {
 			len = sizeof(lnet_process_id_packed_t) *

diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c
index 69be7d6..7d7748d 100644
--- a/drivers/staging/lustre/lnet/selftest/rpc.c
+++ b/drivers/staging/lustre/lnet/selftest/rpc.c

@@ -90,7 +90,7 @@
 static int
 srpc_add_bulk_page(srpc_bulk_t *bk, struct page *pg, int i, int nob)
 {
-	nob = min_t(int, nob, PAGE_CACHE_SIZE);
+	nob = min_t(int, nob, PAGE_SIZE);
 
 	LASSERT(nob > 0);
 	LASSERT(i >= 0 && i < bk->bk_niov);

diff --git a/drivers/staging/lustre/lnet/selftest/selftest.h b/drivers/staging/lustre/lnet/selftest/selftest.h
index 288522d..e689ca1 100644
--- a/drivers/staging/lustre/lnet/selftest/selftest.h
+++ b/drivers/staging/lustre/lnet/selftest/selftest.h

@@ -390,10 +390,10 @@
 	} tsi_u;
 } sfw_test_instance_t;
 
-/* XXX: trailing (PAGE_CACHE_SIZE % sizeof(lnet_process_id_t)) bytes at
- * the end of pages are not used */
+/* XXX: trailing (PAGE_SIZE % sizeof(lnet_process_id_t)) bytes at the end of
+ * pages are not used */
 #define SFW_MAX_CONCUR	   LST_MAX_CONCUR
-#define SFW_ID_PER_PAGE    (PAGE_CACHE_SIZE / sizeof(lnet_process_id_packed_t))
+#define SFW_ID_PER_PAGE    (PAGE_SIZE / sizeof(lnet_process_id_packed_t))
 #define SFW_MAX_NDESTS	   (LNET_MAX_IOV * SFW_ID_PER_PAGE)
 #define sfw_id_pages(n)    (((n) + SFW_ID_PER_PAGE - 1) / SFW_ID_PER_PAGE)
 

diff --git a/drivers/staging/lustre/lustre/Kconfig b/drivers/staging/lustre/lustre/Kconfig
index a09b51c..8ac7cd4 100644
--- a/drivers/staging/lustre/lustre/Kconfig
+++ b/drivers/staging/lustre/lustre/Kconfig

@@ -1,7 +1,7 @@
 config LUSTRE_FS
 	tristate "Lustre file system client support"
 	depends on m && !MIPS && !XTENSA && !SUPERH
-	select LNET
+	depends on LNET
 	select CRYPTO
 	select CRYPTO_CRC32
 	select CRYPTO_CRC32_PCLMUL if X86

diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h
index 33e0b99..c6c7f54 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h

@@ -52,7 +52,7 @@
 		return;
 
 	if (PagePrivate(page))
-		page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
+		page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
 
 	cancel_dirty_page(page);
 	ClearPageMappedToDisk(page);

diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h
index b5088b1..242bb1e 100644
--- a/drivers/staging/lustre/lustre/include/lu_object.h
+++ b/drivers/staging/lustre/lustre/include/lu_object.h

@@ -1118,7 +1118,7 @@
 	{							 \
 		type *value;				      \
 								  \
-		CLASSERT(PAGE_CACHE_SIZE >= sizeof (*value));       \
+		CLASSERT(PAGE_SIZE >= sizeof (*value));       \
 								  \
 		value = kzalloc(sizeof(*value), GFP_NOFS);	\
 		if (!value)				\

diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
index da8bc6e..5aae1d0 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h

@@ -1022,16 +1022,16 @@
  * MDS_READPAGE page size
  *
  * This is the directory page size packed in MDS_READPAGE RPC.
- * It's different than PAGE_CACHE_SIZE because the client needs to
+ * It's different than PAGE_SIZE because the client needs to
  * access the struct lu_dirpage header packed at the beginning of
  * the "page" and without this there isn't any way to know find the
- * lu_dirpage header is if client and server PAGE_CACHE_SIZE differ.
+ * lu_dirpage header is if client and server PAGE_SIZE differ.
  */
 #define LU_PAGE_SHIFT 12
 #define LU_PAGE_SIZE  (1UL << LU_PAGE_SHIFT)
 #define LU_PAGE_MASK  (~(LU_PAGE_SIZE - 1))
 
-#define LU_PAGE_COUNT (1 << (PAGE_CACHE_SHIFT - LU_PAGE_SHIFT))
+#define LU_PAGE_COUNT (1 << (PAGE_SHIFT - LU_PAGE_SHIFT))
 
 /** @} lu_dir */
 

diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h
index df94f9f..af77eb3 100644
--- a/drivers/staging/lustre/lustre/include/lustre_mdc.h
+++ b/drivers/staging/lustre/lustre/include/lustre_mdc.h

@@ -155,12 +155,12 @@
 		if (cli->cl_max_mds_easize < body->max_mdsize) {
 			cli->cl_max_mds_easize = body->max_mdsize;
 			cli->cl_default_mds_easize =
-			    min_t(__u32, body->max_mdsize, PAGE_CACHE_SIZE);
+			    min_t(__u32, body->max_mdsize, PAGE_SIZE);
 		}
 		if (cli->cl_max_mds_cookiesize < body->max_cookiesize) {
 			cli->cl_max_mds_cookiesize = body->max_cookiesize;
 			cli->cl_default_mds_cookiesize =
-			    min_t(__u32, body->max_cookiesize, PAGE_CACHE_SIZE);
+			    min_t(__u32, body->max_cookiesize, PAGE_SIZE);
 		}
 	}
 }

diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h
index 4fa1a18..69586a5 100644
--- a/drivers/staging/lustre/lustre/include/lustre_net.h
+++ b/drivers/staging/lustre/lustre/include/lustre_net.h

@@ -99,21 +99,21 @@
  */
 #define PTLRPC_MAX_BRW_BITS	(LNET_MTU_BITS + PTLRPC_BULK_OPS_BITS)
 #define PTLRPC_MAX_BRW_SIZE	(1 << PTLRPC_MAX_BRW_BITS)
-#define PTLRPC_MAX_BRW_PAGES	(PTLRPC_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
+#define PTLRPC_MAX_BRW_PAGES	(PTLRPC_MAX_BRW_SIZE >> PAGE_SHIFT)
 
 #define ONE_MB_BRW_SIZE		(1 << LNET_MTU_BITS)
 #define MD_MAX_BRW_SIZE		(1 << LNET_MTU_BITS)
-#define MD_MAX_BRW_PAGES	(MD_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
+#define MD_MAX_BRW_PAGES	(MD_MAX_BRW_SIZE >> PAGE_SHIFT)
 #define DT_MAX_BRW_SIZE		PTLRPC_MAX_BRW_SIZE
-#define DT_MAX_BRW_PAGES	(DT_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
+#define DT_MAX_BRW_PAGES	(DT_MAX_BRW_SIZE >> PAGE_SHIFT)
 #define OFD_MAX_BRW_SIZE	(1 << LNET_MTU_BITS)
 
 /* When PAGE_SIZE is a constant, we can check our arithmetic here with cpp! */
 # if ((PTLRPC_MAX_BRW_PAGES & (PTLRPC_MAX_BRW_PAGES - 1)) != 0)
 #  error "PTLRPC_MAX_BRW_PAGES isn't a power of two"
 # endif
-# if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * PAGE_CACHE_SIZE))
-#  error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * PAGE_CACHE_SIZE"
+# if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * PAGE_SIZE))
+#  error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * PAGE_SIZE"
 # endif
 # if (PTLRPC_MAX_BRW_SIZE > LNET_MTU * PTLRPC_BULK_OPS_COUNT)
 #  error "PTLRPC_MAX_BRW_SIZE too big"

diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h
index 4a0f2e8..4264d97 100644
--- a/drivers/staging/lustre/lustre/include/obd.h
+++ b/drivers/staging/lustre/lustre/include/obd.h

@@ -272,7 +272,7 @@
 	int		 cl_grant_shrink_interval; /* seconds */
 
 	/* A chunk is an optimal size used by osc_extent to determine
-	 * the extent size. A chunk is max(PAGE_CACHE_SIZE, OST block size)
+	 * the extent size. A chunk is max(PAGE_SIZE, OST block size)
 	 */
 	int		  cl_chunkbits;
 	int		  cl_chunk;
@@ -1318,7 +1318,7 @@
 
 static inline int cli_brw_size(struct obd_device *obd)
 {
-	return obd->u.cli.cl_max_pages_per_rpc << PAGE_CACHE_SHIFT;
+	return obd->u.cli.cl_max_pages_per_rpc << PAGE_SHIFT;
 }
 
 #endif /* __OBD_H */

diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h
index 225262fa..f8ee3a3 100644
--- a/drivers/staging/lustre/lustre/include/obd_support.h
+++ b/drivers/staging/lustre/lustre/include/obd_support.h

@@ -500,7 +500,7 @@
 
 #ifdef POISON_BULK
 #define POISON_PAGE(page, val) do {		  \
-	memset(kmap(page), val, PAGE_CACHE_SIZE); \
+	memset(kmap(page), val, PAGE_SIZE); \
 	kunmap(page);				  \
 } while (0)
 #else

diff --git a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c b/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
index aced41a..96141d1 100644
--- a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
+++ b/drivers/staging/lustre/lustre/lclient/lcommon_cl.c

@@ -758,9 +758,9 @@
 				 * --bug 17336
 				 */
 				loff_t size = cl_isize_read(inode);
-				loff_t cur_index = start >> PAGE_CACHE_SHIFT;
+				loff_t cur_index = start >> PAGE_SHIFT;
 				loff_t size_index = (size - 1) >>
-						    PAGE_CACHE_SHIFT;
+						    PAGE_SHIFT;
 
 				if ((size == 0 && cur_index != 0) ||
 				    size_index < cur_index)

diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
index b586d5a..7dd7df5 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c

@@ -307,8 +307,8 @@
 	cli->cl_avail_grant = 0;
 	/* FIXME: Should limit this for the sum of all cl_dirty_max. */
 	cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024;
-	if (cli->cl_dirty_max >> PAGE_CACHE_SHIFT > totalram_pages / 8)
-		cli->cl_dirty_max = totalram_pages << (PAGE_CACHE_SHIFT - 3);
+	if (cli->cl_dirty_max >> PAGE_SHIFT > totalram_pages / 8)
+		cli->cl_dirty_max = totalram_pages << (PAGE_SHIFT - 3);
 	INIT_LIST_HEAD(&cli->cl_cache_waiters);
 	INIT_LIST_HEAD(&cli->cl_loi_ready_list);
 	INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list);
@@ -353,15 +353,15 @@
 	 * In the future this should likely be increased. LU-1431
 	 */
 	cli->cl_max_pages_per_rpc = min_t(int, PTLRPC_MAX_BRW_PAGES,
-					  LNET_MTU >> PAGE_CACHE_SHIFT);
+					  LNET_MTU >> PAGE_SHIFT);
 
 	if (!strcmp(name, LUSTRE_MDC_NAME)) {
 		cli->cl_max_rpcs_in_flight = MDC_MAX_RIF_DEFAULT;
-	} else if (totalram_pages >> (20 - PAGE_CACHE_SHIFT) <= 128 /* MB */) {
+	} else if (totalram_pages >> (20 - PAGE_SHIFT) <= 128 /* MB */) {
 		cli->cl_max_rpcs_in_flight = 2;
-	} else if (totalram_pages >> (20 - PAGE_CACHE_SHIFT) <= 256 /* MB */) {
+	} else if (totalram_pages >> (20 - PAGE_SHIFT) <= 256 /* MB */) {
 		cli->cl_max_rpcs_in_flight = 3;
-	} else if (totalram_pages >> (20 - PAGE_CACHE_SHIFT) <= 512 /* MB */) {
+	} else if (totalram_pages >> (20 - PAGE_SHIFT) <= 512 /* MB */) {
 		cli->cl_max_rpcs_in_flight = 4;
 	} else {
 		cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT;

diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
index 3e937b0..b913ba9 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c

@@ -107,7 +107,7 @@
 /*
  * 50 ldlm locks for 1MB of RAM.
  */
-#define LDLM_POOL_HOST_L ((NUM_CACHEPAGES >> (20 - PAGE_CACHE_SHIFT)) * 50)
+#define LDLM_POOL_HOST_L ((NUM_CACHEPAGES >> (20 - PAGE_SHIFT)) * 50)
 
 /*
  * Maximal possible grant step plan in %.

diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
index c7904a9..74e193e 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c

@@ -546,7 +546,7 @@
 {
 	int avail;
 
-	avail = min_t(int, LDLM_MAXREQSIZE, PAGE_CACHE_SIZE - 512) - req_size;
+	avail = min_t(int, LDLM_MAXREQSIZE, PAGE_SIZE - 512) - req_size;
 	if (likely(avail >= 0))
 		avail /= (int)sizeof(struct lustre_handle);
 	else

diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
index 4e0a3e5..e4c8288 100644
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ b/drivers/staging/lustre/lustre/llite/dir.c

@@ -134,9 +134,8 @@
  * a header lu_dirpage which describes the start/end hash, and whether this
  * page is empty (contains no dir entry) or hash collide with next page.
  * After client receives reply, several pages will be integrated into dir page
- * in PAGE_CACHE_SIZE (if PAGE_CACHE_SIZE greater than LU_PAGE_SIZE), and the
- * lu_dirpage for this integrated page will be adjusted. See
- * lmv_adjust_dirpages().
+ * in PAGE_SIZE (if PAGE_SIZE greater than LU_PAGE_SIZE), and the lu_dirpage
+ * for this integrated page will be adjusted. See lmv_adjust_dirpages().
  *
  */
 
@@ -153,7 +152,7 @@
 	struct page **page_pool;
 	struct page *page;
 	struct lu_dirpage *dp;
-	int max_pages = ll_i2sbi(inode)->ll_md_brw_size >> PAGE_CACHE_SHIFT;
+	int max_pages = ll_i2sbi(inode)->ll_md_brw_size >> PAGE_SHIFT;
 	int nrdpgs = 0; /* number of pages read actually */
 	int npages;
 	int i;
@@ -193,8 +192,8 @@
 		if (body->valid & OBD_MD_FLSIZE)
 			cl_isize_write(inode, body->size);
 
-		nrdpgs = (request->rq_bulk->bd_nob_transferred+PAGE_CACHE_SIZE-1)
-			 >> PAGE_CACHE_SHIFT;
+		nrdpgs = (request->rq_bulk->bd_nob_transferred+PAGE_SIZE-1)
+			 >> PAGE_SHIFT;
 		SetPageUptodate(page0);
 	}
 	unlock_page(page0);
@@ -209,7 +208,7 @@
 		page = page_pool[i];
 
 		if (rc < 0 || i >= nrdpgs) {
-			page_cache_release(page);
+			put_page(page);
 			continue;
 		}
 
@@ -230,7 +229,7 @@
 			CDEBUG(D_VFSTRACE, "page %lu add to page cache failed: %d\n",
 			       offset, ret);
 		}
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	if (page_pool != &page0)
@@ -247,7 +246,7 @@
 			truncate_complete_page(page->mapping, page);
 		unlock_page(page);
 	}
-	page_cache_release(page);
+	put_page(page);
 }
 
 /*
@@ -273,7 +272,7 @@
 	if (found > 0 && !radix_tree_exceptional_entry(page)) {
 		struct lu_dirpage *dp;
 
-		page_cache_get(page);
+		get_page(page);
 		spin_unlock_irq(&mapping->tree_lock);
 		/*
 		 * In contrast to find_lock_page() we are sure that directory
@@ -313,7 +312,7 @@
 				page = NULL;
 			}
 		} else {
-			page_cache_release(page);
+			put_page(page);
 			page = ERR_PTR(-EIO);
 		}
 
@@ -1507,7 +1506,7 @@
 			st.st_gid     = body->gid;
 			st.st_rdev    = body->rdev;
 			st.st_size    = body->size;
-			st.st_blksize = PAGE_CACHE_SIZE;
+			st.st_blksize = PAGE_SIZE;
 			st.st_blocks  = body->blocks;
 			st.st_atime   = body->atime;
 			st.st_mtime   = body->mtime;

diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index 973f5cd..e3c0f1d 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h

@@ -310,10 +310,10 @@
 /* default to about 40meg of readahead on a given system.  That much tied
  * up in 512k readahead requests serviced at 40ms each is about 1GB/s.
  */
-#define SBI_DEFAULT_READAHEAD_MAX (40UL << (20 - PAGE_CACHE_SHIFT))
+#define SBI_DEFAULT_READAHEAD_MAX (40UL << (20 - PAGE_SHIFT))
 
 /* default to read-ahead full files smaller than 2MB on the second read */
-#define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - PAGE_CACHE_SHIFT))
+#define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - PAGE_SHIFT))
 
 enum ra_stat {
 	RA_STAT_HIT = 0,
@@ -657,7 +657,7 @@
 #if BITS_PER_LONG == 32
 	return 1;
 #elif defined(CONFIG_COMPAT)
-	return unlikely(is_compat_task() || (sbi->ll_flags & LL_SBI_32BIT_API));
+	return unlikely(in_compat_syscall() || (sbi->ll_flags & LL_SBI_32BIT_API));
 #else
 	return unlikely(sbi->ll_flags & LL_SBI_32BIT_API);
 #endif
@@ -975,13 +975,13 @@
 static inline void ll_invalidate_page(struct page *vmpage)
 {
 	struct address_space *mapping = vmpage->mapping;
-	loff_t offset = vmpage->index << PAGE_CACHE_SHIFT;
+	loff_t offset = vmpage->index << PAGE_SHIFT;
 
 	LASSERT(PageLocked(vmpage));
 	if (!mapping)
 		return;
 
-	ll_teardown_mmaps(mapping, offset, offset + PAGE_CACHE_SIZE);
+	ll_teardown_mmaps(mapping, offset, offset + PAGE_SIZE);
 	truncate_complete_page(mapping, vmpage);
 }
 

diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index 6d6bb33..b57a992 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c

@@ -85,7 +85,7 @@
 
 	si_meminfo(&si);
 	pages = si.totalram - si.totalhigh;
-	if (pages >> (20 - PAGE_CACHE_SHIFT) < 512)
+	if (pages >> (20 - PAGE_SHIFT) < 512)
 		lru_page_max = pages / 2;
 	else
 		lru_page_max = (pages / 4) * 3;
@@ -272,12 +272,12 @@
 	    valid != CLIENT_CONNECT_MDT_REQD) {
 		char *buf;
 
-		buf = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+		buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
 		if (!buf) {
 			err = -ENOMEM;
 			goto out_md_fid;
 		}
-		obd_connect_flags2str(buf, PAGE_CACHE_SIZE,
+		obd_connect_flags2str(buf, PAGE_SIZE,
 				      valid ^ CLIENT_CONNECT_MDT_REQD, ",");
 		LCONSOLE_ERROR_MSG(0x170, "Server %s does not support feature(s) needed for correct operation of this client (%s). Please upgrade server or downgrade client.\n",
 				   sbi->ll_md_exp->exp_obd->obd_name, buf);
@@ -335,7 +335,7 @@
 	if (data->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
 		sbi->ll_md_brw_size = data->ocd_brw_size;
 	else
-		sbi->ll_md_brw_size = PAGE_CACHE_SIZE;
+		sbi->ll_md_brw_size = PAGE_SIZE;
 
 	if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK) {
 		LCONSOLE_INFO("Layout lock feature supported.\n");

diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c
index 69445a9..5b484e6 100644
--- a/drivers/staging/lustre/lustre/llite/llite_mmap.c
+++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c

@@ -58,7 +58,7 @@
 		     size_t count)
 {
 	policy->l_extent.start = ((addr - vma->vm_start) & CFS_PAGE_MASK) +
-				 (vma->vm_pgoff << PAGE_CACHE_SHIFT);
+				 (vma->vm_pgoff << PAGE_SHIFT);
 	policy->l_extent.end = (policy->l_extent.start + count - 1) |
 			       ~CFS_PAGE_MASK;
 }
@@ -321,7 +321,7 @@
 
 		vmpage = vio->u.fault.ft_vmpage;
 		if (result != 0 && vmpage) {
-			page_cache_release(vmpage);
+			put_page(vmpage);
 			vmf->page = NULL;
 		}
 	}
@@ -360,7 +360,7 @@
 		lock_page(vmpage);
 		if (unlikely(!vmpage->mapping)) { /* unlucky */
 			unlock_page(vmpage);
-			page_cache_release(vmpage);
+			put_page(vmpage);
 			vmf->page = NULL;
 
 			if (!printed && ++count > 16) {
@@ -457,7 +457,7 @@
 	LASSERTF(last > first, "last %llu first %llu\n", last, first);
 	if (mapping_mapped(mapping)) {
 		rc = 0;
-		unmap_mapping_range(mapping, first + PAGE_CACHE_SIZE - 1,
+		unmap_mapping_range(mapping, first + PAGE_SIZE - 1,
 				    last - first + 1, 0);
 	}
 

diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c
index b725fc1..f169c0d 100644
--- a/drivers/staging/lustre/lustre/llite/lloop.c
+++ b/drivers/staging/lustre/lustre/llite/lloop.c

@@ -218,7 +218,7 @@
 		offset = (pgoff_t)(bio->bi_iter.bi_sector << 9) + lo->lo_offset;
 		bio_for_each_segment(bvec, bio, iter) {
 			BUG_ON(bvec.bv_offset != 0);
-			BUG_ON(bvec.bv_len != PAGE_CACHE_SIZE);
+			BUG_ON(bvec.bv_len != PAGE_SIZE);
 
 			pages[page_count] = bvec.bv_page;
 			offsets[page_count] = offset;
@@ -232,7 +232,7 @@
 			(rw == WRITE) ? LPROC_LL_BRW_WRITE : LPROC_LL_BRW_READ,
 			page_count);
 
-	pvec->ldp_size = page_count << PAGE_CACHE_SHIFT;
+	pvec->ldp_size = page_count << PAGE_SHIFT;
 	pvec->ldp_nr = page_count;
 
 	/* FIXME: in ll_direct_rw_pages, it has to allocate many cl_page{}s to
@@ -507,7 +507,7 @@
 
 	set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
 
-	lo->lo_blocksize = PAGE_CACHE_SIZE;
+	lo->lo_blocksize = PAGE_SIZE;
 	lo->lo_device = bdev;
 	lo->lo_flags = lo_flags;
 	lo->lo_backing_file = file;
@@ -525,11 +525,11 @@
 	lo->lo_queue->queuedata = lo;
 
 	/* queue parameters */
-	CLASSERT(PAGE_CACHE_SIZE < (1 << (sizeof(unsigned short) * 8)));
+	CLASSERT(PAGE_SIZE < (1 << (sizeof(unsigned short) * 8)));
 	blk_queue_logical_block_size(lo->lo_queue,
-				     (unsigned short)PAGE_CACHE_SIZE);
+				     (unsigned short)PAGE_SIZE);
 	blk_queue_max_hw_sectors(lo->lo_queue,
-				 LLOOP_MAX_SEGMENTS << (PAGE_CACHE_SHIFT - 9));
+				 LLOOP_MAX_SEGMENTS << (PAGE_SHIFT - 9));
 	blk_queue_max_segments(lo->lo_queue, LLOOP_MAX_SEGMENTS);
 
 	set_capacity(disks[lo->lo_number], size);

diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c
index 45941a6..27ab126 100644
--- a/drivers/staging/lustre/lustre/llite/lproc_llite.c
+++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c

@@ -233,7 +233,7 @@
 	pages_number = sbi->ll_ra_info.ra_max_pages;
 	spin_unlock(&sbi->ll_lock);
 
-	mult = 1 << (20 - PAGE_CACHE_SHIFT);
+	mult = 1 << (20 - PAGE_SHIFT);
 	return lprocfs_read_frac_helper(buf, PAGE_SIZE, pages_number, mult);
 }
 
@@ -251,12 +251,12 @@
 	if (rc)
 		return rc;
 
-	pages_number *= 1 << (20 - PAGE_CACHE_SHIFT); /* MB -> pages */
+	pages_number *= 1 << (20 - PAGE_SHIFT); /* MB -> pages */
 
 	if (pages_number > totalram_pages / 2) {
 
 		CERROR("can't set file readahead more than %lu MB\n",
-		       totalram_pages >> (20 - PAGE_CACHE_SHIFT + 1)); /*1/2 of RAM*/
+		       totalram_pages >> (20 - PAGE_SHIFT + 1)); /*1/2 of RAM*/
 		return -ERANGE;
 	}
 
@@ -281,7 +281,7 @@
 	pages_number = sbi->ll_ra_info.ra_max_pages_per_file;
 	spin_unlock(&sbi->ll_lock);
 
-	mult = 1 << (20 - PAGE_CACHE_SHIFT);
+	mult = 1 << (20 - PAGE_SHIFT);
 	return lprocfs_read_frac_helper(buf, PAGE_SIZE, pages_number, mult);
 }
 
@@ -326,7 +326,7 @@
 	pages_number = sbi->ll_ra_info.ra_max_read_ahead_whole_pages;
 	spin_unlock(&sbi->ll_lock);
 
-	mult = 1 << (20 - PAGE_CACHE_SHIFT);
+	mult = 1 << (20 - PAGE_SHIFT);
 	return lprocfs_read_frac_helper(buf, PAGE_SIZE, pages_number, mult);
 }
 
@@ -349,7 +349,7 @@
 	 */
 	if (pages_number > sbi->ll_ra_info.ra_max_pages_per_file) {
 		CERROR("can't set max_read_ahead_whole_mb more than max_read_ahead_per_file_mb: %lu\n",
-		       sbi->ll_ra_info.ra_max_pages_per_file >> (20 - PAGE_CACHE_SHIFT));
+		       sbi->ll_ra_info.ra_max_pages_per_file >> (20 - PAGE_SHIFT));
 		return -ERANGE;
 	}
 
@@ -366,7 +366,7 @@
 	struct super_block     *sb    = m->private;
 	struct ll_sb_info      *sbi   = ll_s2sbi(sb);
 	struct cl_client_cache *cache = &sbi->ll_cache;
-	int shift = 20 - PAGE_CACHE_SHIFT;
+	int shift = 20 - PAGE_SHIFT;
 	int max_cached_mb;
 	int unused_mb;
 
@@ -405,7 +405,7 @@
 		return -EFAULT;
 	kernbuf[count] = 0;
 
-	mult = 1 << (20 - PAGE_CACHE_SHIFT);
+	mult = 1 << (20 - PAGE_SHIFT);
 	buffer += lprocfs_find_named_value(kernbuf, "max_cached_mb:", &count) -
 		  kernbuf;
 	rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult);
@@ -415,7 +415,7 @@
 	if (pages_number < 0 || pages_number > totalram_pages) {
 		CERROR("%s: can't set max cache more than %lu MB\n",
 		       ll_get_fsname(sb, NULL, 0),
-		       totalram_pages >> (20 - PAGE_CACHE_SHIFT));
+		       totalram_pages >> (20 - PAGE_SHIFT));
 		return -ERANGE;
 	}
 

diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
index 34614ac..edab6c5 100644
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ b/drivers/staging/lustre/lustre/llite/rw.c

@@ -146,10 +146,10 @@
 		 */
 		io->ci_lockreq = CILR_NEVER;
 
-		pos = vmpage->index << PAGE_CACHE_SHIFT;
+		pos = vmpage->index << PAGE_SHIFT;
 
 		/* Create a temp IO to serve write. */
-		result = cl_io_rw_init(env, io, CIT_WRITE, pos, PAGE_CACHE_SIZE);
+		result = cl_io_rw_init(env, io, CIT_WRITE, pos, PAGE_SIZE);
 		if (result == 0) {
 			cio->cui_fd = LUSTRE_FPRIVATE(file);
 			cio->cui_iter = NULL;
@@ -498,7 +498,7 @@
 		}
 		if (rc != 1)
 			unlock_page(vmpage);
-		page_cache_release(vmpage);
+		put_page(vmpage);
 	} else {
 		which = RA_STAT_FAILED_GRAB_PAGE;
 		msg   = "g_c_p_n failed";
@@ -521,13 +521,13 @@
  * striped over, rather than having a constant value for all files here.
  */
 
-/* RAS_INCREASE_STEP should be (1UL << (inode->i_blkbits - PAGE_CACHE_SHIFT)).
+/* RAS_INCREASE_STEP should be (1UL << (inode->i_blkbits - PAGE_SHIFT)).
  * Temporarily set RAS_INCREASE_STEP to 1MB. After 4MB RPC is enabled
  * by default, this should be adjusted corresponding with max_read_ahead_mb
  * and max_read_ahead_per_file_mb otherwise the readahead budget can be used
  * up quickly which will affect read performance significantly. See LU-2816
  */
-#define RAS_INCREASE_STEP(inode) (ONE_MB_BRW_SIZE >> PAGE_CACHE_SHIFT)
+#define RAS_INCREASE_STEP(inode) (ONE_MB_BRW_SIZE >> PAGE_SHIFT)
 
 static inline int stride_io_mode(struct ll_readahead_state *ras)
 {
@@ -739,7 +739,7 @@
 			end = rpc_boundary;
 
 		/* Truncate RA window to end of file */
-		end = min(end, (unsigned long)((kms - 1) >> PAGE_CACHE_SHIFT));
+		end = min(end, (unsigned long)((kms - 1) >> PAGE_SHIFT));
 
 		ras->ras_next_readahead = max(end, end + 1);
 		RAS_CDEBUG(ras);
@@ -776,7 +776,7 @@
 	if (reserved != 0)
 		ll_ra_count_put(ll_i2sbi(inode), reserved);
 
-	if (ra_end == end + 1 && ra_end == (kms >> PAGE_CACHE_SHIFT))
+	if (ra_end == end + 1 && ra_end == (kms >> PAGE_SHIFT))
 		ll_ra_stats_inc(mapping, RA_STAT_EOF);
 
 	/* if we didn't get to the end of the region we reserved from
@@ -985,8 +985,8 @@
 	if (ras->ras_requests == 2 && !ras->ras_request_index) {
 		__u64 kms_pages;
 
-		kms_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
-			    PAGE_CACHE_SHIFT;
+		kms_pages = (i_size_read(inode) + PAGE_SIZE - 1) >>
+			    PAGE_SHIFT;
 
 		CDEBUG(D_READA, "kmsp %llu mwp %lu mp %lu\n", kms_pages,
 		       ra->ra_max_read_ahead_whole_pages, ra->ra_max_pages_per_file);
@@ -1173,7 +1173,7 @@
 		 * PageWriteback or clean the page.
 		 */
 		result = cl_sync_file_range(inode, offset,
-					    offset + PAGE_CACHE_SIZE - 1,
+					    offset + PAGE_SIZE - 1,
 					    CL_FSYNC_LOCAL, 1);
 		if (result > 0) {
 			/* actually we may have written more than one page.
@@ -1211,7 +1211,7 @@
 	int ignore_layout = 0;
 
 	if (wbc->range_cyclic) {
-		start = mapping->writeback_index << PAGE_CACHE_SHIFT;
+		start = mapping->writeback_index << PAGE_SHIFT;
 		end = OBD_OBJECT_EOF;
 	} else {
 		start = wbc->range_start;
@@ -1241,7 +1241,7 @@
 	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) {
 		if (end == OBD_OBJECT_EOF)
 			end = i_size_read(inode);
-		mapping->writeback_index = (end >> PAGE_CACHE_SHIFT) + 1;
+		mapping->writeback_index = (end >> PAGE_SHIFT) + 1;
 	}
 	return result;
 }

diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index 7a5db67..69aa15e 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c

@@ -87,7 +87,7 @@
 	 * below because they are run with page locked and all our io is
 	 * happening with locked page too
 	 */
-	if (offset == 0 && length == PAGE_CACHE_SIZE) {
+	if (offset == 0 && length == PAGE_SIZE) {
 		env = cl_env_get(&refcheck);
 		if (!IS_ERR(env)) {
 			inode = vmpage->mapping->host;
@@ -193,8 +193,8 @@
 		return -EFBIG;
 	}
 
-	*max_pages = (user_addr + size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	*max_pages -= user_addr >> PAGE_CACHE_SHIFT;
+	*max_pages = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	*max_pages -= user_addr >> PAGE_SHIFT;
 
 	*pages = libcfs_kvzalloc(*max_pages * sizeof(**pages), GFP_NOFS);
 	if (*pages) {
@@ -217,7 +217,7 @@
 	for (i = 0; i < npages; i++) {
 		if (do_dirty)
 			set_page_dirty_lock(pages[i]);
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 	}
 	kvfree(pages);
 }
@@ -357,7 +357,7 @@
  * up to 22MB for 128kB kmalloc and up to 682MB for 4MB kmalloc.
  */
 #define MAX_DIO_SIZE ((KMALLOC_MAX_SIZE / sizeof(struct brw_page) *	  \
-		       PAGE_CACHE_SIZE) & ~(DT_MAX_BRW_SIZE - 1))
+		       PAGE_SIZE) & ~(DT_MAX_BRW_SIZE - 1))
 static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
 			       loff_t file_offset)
 {
@@ -382,8 +382,8 @@
 	CDEBUG(D_VFSTRACE,
 	       "VFS Op:inode=%lu/%u(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n",
 	       inode->i_ino, inode->i_generation, inode, count, MAX_DIO_SIZE,
-	       file_offset, file_offset, count >> PAGE_CACHE_SHIFT,
-	       MAX_DIO_SIZE >> PAGE_CACHE_SHIFT);
+	       file_offset, file_offset, count >> PAGE_SHIFT,
+	       MAX_DIO_SIZE >> PAGE_SHIFT);
 
 	/* Check that all user buffers are aligned as well */
 	if (iov_iter_alignment(iter) & ~CFS_PAGE_MASK)
@@ -432,8 +432,8 @@
 			 * page worth of page pointers = 4MB on i386.
 			 */
 			if (result == -ENOMEM &&
-			    size > (PAGE_CACHE_SIZE / sizeof(*pages)) *
-				   PAGE_CACHE_SIZE) {
+			    size > (PAGE_SIZE / sizeof(*pages)) *
+			    PAGE_SIZE) {
 				size = ((((size / 2) - 1) |
 					 ~CFS_PAGE_MASK) + 1) &
 					CFS_PAGE_MASK;
@@ -474,10 +474,10 @@
 			  loff_t pos, unsigned len, unsigned flags,
 			  struct page **pagep, void **fsdata)
 {
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 	struct page *page;
 	int rc;
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned from = pos & (PAGE_SIZE - 1);
 
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
@@ -488,7 +488,7 @@
 	rc = ll_prepare_write(file, page, from, from + len);
 	if (rc) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	return rc;
 }
@@ -497,12 +497,12 @@
 			loff_t pos, unsigned len, unsigned copied,
 			struct page *page, void *fsdata)
 {
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned from = pos & (PAGE_SIZE - 1);
 	int rc;
 
 	rc = ll_commit_write(file, page, from, from + copied);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return rc ?: copied;
 }

diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index fb0c26e..85a8359 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c

@@ -512,9 +512,9 @@
 		vio->cui_ra_window_set = 1;
 		bead->lrr_start = cl_index(obj, pos);
 		/*
-		 * XXX: explicit PAGE_CACHE_SIZE
+		 * XXX: explicit PAGE_SIZE
 		 */
-		bead->lrr_count = cl_index(obj, tot + PAGE_CACHE_SIZE - 1);
+		bead->lrr_count = cl_index(obj, tot + PAGE_SIZE - 1);
 		ll_ra_read_in(file, bead);
 	}
 
@@ -959,7 +959,7 @@
 		 * We're completely overwriting an existing page, so _don't_
 		 * set it up to date until commit_write
 		 */
-		if (from == 0 && to == PAGE_CACHE_SIZE) {
+		if (from == 0 && to == PAGE_SIZE) {
 			CL_PAGE_HEADER(D_PAGE, env, pg, "full page write\n");
 			POISON_PAGE(page, 0x11);
 		} else
@@ -1022,7 +1022,7 @@
 			set_page_dirty(vmpage);
 			vvp_write_pending(cl2ccc(obj), cp);
 		} else if (result == -EDQUOT) {
-			pgoff_t last_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
+			pgoff_t last_index = i_size_read(inode) >> PAGE_SHIFT;
 			bool need_clip = true;
 
 			/*
@@ -1040,7 +1040,7 @@
 			 * being.
 			 */
 			if (last_index > pg->cp_index) {
-				to = PAGE_CACHE_SIZE;
+				to = PAGE_SIZE;
 				need_clip = false;
 			} else if (last_index == pg->cp_index) {
 				int size_to = i_size_read(inode) & ~CFS_PAGE_MASK;

diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c
index 850bae7..33ca3eb 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_page.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_page.c

@@ -57,7 +57,7 @@
 	struct page *vmpage = cp->cpg_page;
 
 	LASSERT(vmpage);
-	page_cache_release(vmpage);
+	put_page(vmpage);
 }
 
 static void vvp_page_fini(const struct lu_env *env,
@@ -164,12 +164,12 @@
 	LASSERT(vmpage);
 	LASSERT(PageLocked(vmpage));
 
-	offset = vmpage->index << PAGE_CACHE_SHIFT;
+	offset = vmpage->index << PAGE_SHIFT;
 
 	/*
 	 * XXX is it safe to call this with the page lock held?
 	 */
-	ll_teardown_mmaps(vmpage->mapping, offset, offset + PAGE_CACHE_SIZE);
+	ll_teardown_mmaps(vmpage->mapping, offset, offset + PAGE_SIZE);
 	return 0;
 }
 
@@ -537,7 +537,7 @@
 	CLOBINVRNT(env, obj, ccc_object_invariant(obj));
 
 	cpg->cpg_page = vmpage;
-	page_cache_get(vmpage);
+	get_page(vmpage);
 
 	INIT_LIST_HEAD(&cpg->cpg_pending_linkage);
 	if (page->cp_type == CPT_CACHEABLE) {

diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
index 0f776cf..9abb7c2 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c

@@ -2017,7 +2017,7 @@
  * |s|e|f|p|ent| 0 | ... | 0 |
  * '-----------------   -----'
  *
- * However, on hosts where the native VM page size (PAGE_CACHE_SIZE) is
+ * However, on hosts where the native VM page size (PAGE_SIZE) is
  * larger than LU_PAGE_SIZE, a single host page may contain multiple
  * lu_dirpages. After reading the lu_dirpages from the MDS, the
  * ldp_hash_end of the first lu_dirpage refers to the one immediately
@@ -2048,7 +2048,7 @@
  * - Adjust the lde_reclen of the ending entry of each lu_dirpage to span
  *   to the first entry of the next lu_dirpage.
  */
-#if PAGE_CACHE_SIZE > LU_PAGE_SIZE
+#if PAGE_SIZE > LU_PAGE_SIZE
 static void lmv_adjust_dirpages(struct page **pages, int ncfspgs, int nlupgs)
 {
 	int i;
@@ -2101,7 +2101,7 @@
 }
 #else
 #define lmv_adjust_dirpages(pages, ncfspgs, nlupgs) do {} while (0)
-#endif	/* PAGE_CACHE_SIZE > LU_PAGE_SIZE */
+#endif	/* PAGE_SIZE > LU_PAGE_SIZE */
 
 static int lmv_readpage(struct obd_export *exp, struct md_op_data *op_data,
 			struct page **pages, struct ptlrpc_request **request)
@@ -2110,7 +2110,7 @@
 	struct lmv_obd		*lmv = &obd->u.lmv;
 	__u64			offset = op_data->op_offset;
 	int			rc;
-	int			ncfspgs; /* pages read in PAGE_CACHE_SIZE */
+	int			ncfspgs; /* pages read in PAGE_SIZE */
 	int			nlupgs; /* pages read in LU_PAGE_SIZE */
 	struct lmv_tgt_desc	*tgt;
 
@@ -2129,8 +2129,8 @@
 	if (rc != 0)
 		return rc;
 
-	ncfspgs = ((*request)->rq_bulk->bd_nob_transferred + PAGE_CACHE_SIZE - 1)
-		 >> PAGE_CACHE_SHIFT;
+	ncfspgs = ((*request)->rq_bulk->bd_nob_transferred + PAGE_SIZE - 1)
+		 >> PAGE_SHIFT;
 	nlupgs = (*request)->rq_bulk->bd_nob_transferred >> LU_PAGE_SHIFT;
 	LASSERT(!((*request)->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK));
 	LASSERT(ncfspgs > 0 && ncfspgs <= op_data->op_npages);

diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c
index 55dd8ef..b91d3ff 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_request.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c

@@ -1002,10 +1002,10 @@
 
 	/* NB req now owns desc and will free it when it gets freed */
 	for (i = 0; i < op_data->op_npages; i++)
-		ptlrpc_prep_bulk_page_pin(desc, pages[i], 0, PAGE_CACHE_SIZE);
+		ptlrpc_prep_bulk_page_pin(desc, pages[i], 0, PAGE_SIZE);
 
 	mdc_readdir_pack(req, op_data->op_offset,
-			 PAGE_CACHE_SIZE * op_data->op_npages,
+			 PAGE_SIZE * op_data->op_npages,
 			 &op_data->op_fid1);
 
 	ptlrpc_request_set_replen(req);
@@ -1037,7 +1037,7 @@
 	if (req->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK) {
 		CERROR("Unexpected # bytes transferred: %d (%ld expected)\n",
 		       req->rq_bulk->bd_nob_transferred,
-		       PAGE_CACHE_SIZE * op_data->op_npages);
+		       PAGE_SIZE * op_data->op_npages);
 		ptlrpc_req_finished(req);
 		return -EPROTO;
 	}

diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c
index 65caffe..3924b09 100644
--- a/drivers/staging/lustre/lustre/mgc/mgc_request.c
+++ b/drivers/staging/lustre/lustre/mgc/mgc_request.c

@@ -1113,7 +1113,7 @@
 }
 
 enum {
-	CONFIG_READ_NRPAGES_INIT = 1 << (20 - PAGE_CACHE_SHIFT),
+	CONFIG_READ_NRPAGES_INIT = 1 << (20 - PAGE_SHIFT),
 	CONFIG_READ_NRPAGES      = 4
 };
 
@@ -1137,19 +1137,19 @@
 	LASSERT(cfg->cfg_instance);
 	LASSERT(cfg->cfg_sb == cfg->cfg_instance);
 
-	inst = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+	inst = kzalloc(PAGE_SIZE, GFP_KERNEL);
 	if (!inst)
 		return -ENOMEM;
 
-	pos = snprintf(inst, PAGE_CACHE_SIZE, "%p", cfg->cfg_instance);
-	if (pos >= PAGE_CACHE_SIZE) {
+	pos = snprintf(inst, PAGE_SIZE, "%p", cfg->cfg_instance);
+	if (pos >= PAGE_SIZE) {
 		kfree(inst);
 		return -E2BIG;
 	}
 
 	++pos;
 	buf   = inst + pos;
-	bufsz = PAGE_CACHE_SIZE - pos;
+	bufsz = PAGE_SIZE - pos;
 
 	while (datalen > 0) {
 		int   entry_len = sizeof(*entry);
@@ -1181,7 +1181,7 @@
 		/* Keep this swab for normal mixed endian handling. LU-1644 */
 		if (mne_swab)
 			lustre_swab_mgs_nidtbl_entry(entry);
-		if (entry->mne_length > PAGE_CACHE_SIZE) {
+		if (entry->mne_length > PAGE_SIZE) {
 			CERROR("MNE too large (%u)\n", entry->mne_length);
 			break;
 		}
@@ -1282,7 +1282,7 @@
 
 		rc = -ENOMEM;
 		lcfg = lustre_cfg_new(LCFG_PARAM, &bufs);
-		if (!lcfg) {
+		if (IS_ERR(lcfg)) {
 			CERROR("mgc: cannot allocate memory\n");
 			break;
 		}
@@ -1371,7 +1371,7 @@
 	}
 	body->mcb_offset = cfg->cfg_last_idx + 1;
 	body->mcb_type   = cld->cld_type;
-	body->mcb_bits   = PAGE_CACHE_SHIFT;
+	body->mcb_bits   = PAGE_SHIFT;
 	body->mcb_units  = nrpages;
 
 	/* allocate bulk transfer descriptor */
@@ -1383,7 +1383,7 @@
 	}
 
 	for (i = 0; i < nrpages; i++)
-		ptlrpc_prep_bulk_page_pin(desc, pages[i], 0, PAGE_CACHE_SIZE);
+		ptlrpc_prep_bulk_page_pin(desc, pages[i], 0, PAGE_SIZE);
 
 	ptlrpc_request_set_replen(req);
 	rc = ptlrpc_queue_wait(req);
@@ -1411,7 +1411,7 @@
 		goto out;
 	}
 
-	if (ealen > nrpages << PAGE_CACHE_SHIFT) {
+	if (ealen > nrpages << PAGE_SHIFT) {
 		rc = -EINVAL;
 		goto out;
 	}
@@ -1439,7 +1439,7 @@
 
 		ptr = kmap(pages[i]);
 		rc2 = mgc_apply_recover_logs(obd, cld, res->mcr_offset, ptr,
-					     min_t(int, ealen, PAGE_CACHE_SIZE),
+					     min_t(int, ealen, PAGE_SIZE),
 					     mne_swab);
 		kunmap(pages[i]);
 		if (rc2 < 0) {
@@ -1448,7 +1448,7 @@
 			break;
 		}
 
-		ealen -= PAGE_CACHE_SIZE;
+		ealen -= PAGE_SIZE;
 	}
 
 out:

diff --git a/drivers/staging/lustre/lustre/obdclass/cl_page.c b/drivers/staging/lustre/lustre/obdclass/cl_page.c
index 231a2f2..3945800 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_page.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_page.c

@@ -1477,7 +1477,7 @@
 	/*
 	 * XXX for now.
 	 */
-	return (loff_t)idx << PAGE_CACHE_SHIFT;
+	return (loff_t)idx << PAGE_SHIFT;
 }
 EXPORT_SYMBOL(cl_offset);
 
@@ -1489,13 +1489,13 @@
 	/*
 	 * XXX for now.
 	 */
-	return offset >> PAGE_CACHE_SHIFT;
+	return offset >> PAGE_SHIFT;
 }
 EXPORT_SYMBOL(cl_index);
 
 int cl_page_size(const struct cl_object *obj)
 {
-	return 1 << PAGE_CACHE_SHIFT;
+	return 1 << PAGE_SHIFT;
 }
 EXPORT_SYMBOL(cl_page_size);
 

diff --git a/drivers/staging/lustre/lustre/obdclass/class_obd.c b/drivers/staging/lustre/lustre/obdclass/class_obd.c
index 1a938e1..c2cf015 100644
--- a/drivers/staging/lustre/lustre/obdclass/class_obd.c
+++ b/drivers/staging/lustre/lustre/obdclass/class_obd.c

@@ -461,9 +461,9 @@
 		CWARN("LPD64 wrong length! strlen(%s)=%d != 2\n", buf, len);
 		ret = -EINVAL;
 	}
-	if ((u64val & ~CFS_PAGE_MASK) >= PAGE_CACHE_SIZE) {
+	if ((u64val & ~CFS_PAGE_MASK) >= PAGE_SIZE) {
 		CWARN("mask failed: u64val %llu >= %llu\n", u64val,
-		      (__u64)PAGE_CACHE_SIZE);
+		      (__u64)PAGE_SIZE);
 		ret = -EINVAL;
 	}
 
@@ -509,7 +509,7 @@
 	 * For clients with less memory, a larger fraction is needed
 	 * for other purposes (mostly for BGL).
 	 */
-	if (totalram_pages <= 512 << (20 - PAGE_CACHE_SHIFT))
+	if (totalram_pages <= 512 << (20 - PAGE_SHIFT))
 		obd_max_dirty_pages = totalram_pages / 4;
 	else
 		obd_max_dirty_pages = totalram_pages / 2;

diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
index 9496c09..b41b65e2 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c

@@ -47,7 +47,6 @@
 #include "../../include/lustre/lustre_idl.h"
 
 #include <linux/fs.h>
-#include <linux/pagemap.h> /* for PAGE_CACHE_SIZE */
 
 void obdo_refresh_inode(struct inode *dst, struct obdo *src, u32 valid)
 {
@@ -71,8 +70,8 @@
 	if (valid & OBD_MD_FLBLKSZ && src->o_blksize > (1 << dst->i_blkbits))
 		dst->i_blkbits = ffs(src->o_blksize) - 1;
 
-	if (dst->i_blkbits < PAGE_CACHE_SHIFT)
-		dst->i_blkbits = PAGE_CACHE_SHIFT;
+	if (dst->i_blkbits < PAGE_SHIFT)
+		dst->i_blkbits = PAGE_SHIFT;
 
 	/* allocation of space */
 	if (valid & OBD_MD_FLBLOCKS && src->o_blocks > dst->i_blocks)

diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
index fd333b9..e6bf414 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c

@@ -100,7 +100,7 @@
 				 char *buf)
 {
 	return sprintf(buf, "%ul\n",
-			obd_max_dirty_pages / (1 << (20 - PAGE_CACHE_SHIFT)));
+			obd_max_dirty_pages / (1 << (20 - PAGE_SHIFT)));
 }
 
 static ssize_t max_dirty_mb_store(struct kobject *kobj, struct attribute *attr,
@@ -113,14 +113,14 @@
 	if (rc)
 		return rc;
 
-	val *= 1 << (20 - PAGE_CACHE_SHIFT); /* convert to pages */
+	val *= 1 << (20 - PAGE_SHIFT); /* convert to pages */
 
 	if (val > ((totalram_pages / 10) * 9)) {
 		/* Somebody wants to assign too much memory to dirty pages */
 		return -EINVAL;
 	}
 
-	if (val < 4 << (20 - PAGE_CACHE_SHIFT)) {
+	if (val < 4 << (20 - PAGE_SHIFT)) {
 		/* Less than 4 Mb for dirty cache is also bad */
 		return -EINVAL;
 	}

diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index 65a4746..978568a 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c

@@ -840,8 +840,8 @@
 
 #if BITS_PER_LONG == 32
 	/* limit hashtable size for lowmem systems to low RAM */
-	if (cache_size > 1 << (30 - PAGE_CACHE_SHIFT))
-		cache_size = 1 << (30 - PAGE_CACHE_SHIFT) * 3 / 4;
+	if (cache_size > 1 << (30 - PAGE_SHIFT))
+		cache_size = 1 << (30 - PAGE_SHIFT) * 3 / 4;
 #endif
 
 	/* clear off unreasonable cache setting. */
@@ -853,7 +853,7 @@
 		lu_cache_percent = LU_CACHE_PERCENT_DEFAULT;
 	}
 	cache_size = cache_size / 100 * lu_cache_percent *
-		(PAGE_CACHE_SIZE / 1024);
+		(PAGE_SIZE / 1024);
 
 	for (bits = 1; (1 << bits) < cache_size; ++bits) {
 		;

diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c
index 64ffe24..1e83669 100644
--- a/drivers/staging/lustre/lustre/obdecho/echo_client.c
+++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c

@@ -278,7 +278,7 @@
 	struct page *vmpage      = ep->ep_vmpage;
 
 	atomic_dec(&eco->eo_npages);
-	page_cache_release(vmpage);
+	put_page(vmpage);
 }
 
 static int echo_page_prep(const struct lu_env *env,
@@ -373,7 +373,7 @@
 	struct echo_object *eco = cl2echo_obj(obj);
 
 	ep->ep_vmpage = vmpage;
-	page_cache_get(vmpage);
+	get_page(vmpage);
 	mutex_init(&ep->ep_lock);
 	cl_page_slice_add(page, &ep->ep_cl, obj, &echo_page_ops);
 	atomic_inc(&eco->eo_npages);
@@ -1138,7 +1138,7 @@
 	LASSERT(rc == 0);
 
 	rc = cl_echo_enqueue0(env, eco, offset,
-			      offset + npages * PAGE_CACHE_SIZE - 1,
+			      offset + npages * PAGE_SIZE - 1,
 			      rw == READ ? LCK_PR : LCK_PW, &lh.cookie,
 			      CEF_NEVER);
 	if (rc < 0)
@@ -1311,11 +1311,11 @@
 	int      delta;
 
 	/* no partial pages on the client */
-	LASSERT(count == PAGE_CACHE_SIZE);
+	LASSERT(count == PAGE_SIZE);
 
 	addr = kmap(page);
 
-	for (delta = 0; delta < PAGE_CACHE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
+	for (delta = 0; delta < PAGE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
 		if (rw == OBD_BRW_WRITE) {
 			stripe_off = offset + delta;
 			stripe_id = id;
@@ -1341,11 +1341,11 @@
 	int     rc2;
 
 	/* no partial pages on the client */
-	LASSERT(count == PAGE_CACHE_SIZE);
+	LASSERT(count == PAGE_SIZE);
 
 	addr = kmap(page);
 
-	for (rc = delta = 0; delta < PAGE_CACHE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
+	for (rc = delta = 0; delta < PAGE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
 		stripe_off = offset + delta;
 		stripe_id = id;
 
@@ -1391,7 +1391,7 @@
 		return -EINVAL;
 
 	/* XXX think again with misaligned I/O */
-	npages = count >> PAGE_CACHE_SHIFT;
+	npages = count >> PAGE_SHIFT;
 
 	if (rw == OBD_BRW_WRITE)
 		brw_flags = OBD_BRW_ASYNC;
@@ -1408,7 +1408,7 @@
 
 	for (i = 0, pgp = pga, off = offset;
 	     i < npages;
-	     i++, pgp++, off += PAGE_CACHE_SIZE) {
+	     i++, pgp++, off += PAGE_SIZE) {
 
 		LASSERT(!pgp->pg);      /* for cleanup */
 
@@ -1418,7 +1418,7 @@
 			goto out;
 
 		pages[i] = pgp->pg;
-		pgp->count = PAGE_CACHE_SIZE;
+		pgp->count = PAGE_SIZE;
 		pgp->off = off;
 		pgp->flag = brw_flags;
 
@@ -1473,8 +1473,8 @@
 	if (count <= 0 || (count & (~CFS_PAGE_MASK)) != 0)
 		return -EINVAL;
 
-	npages = batch >> PAGE_CACHE_SHIFT;
-	tot_pages = count >> PAGE_CACHE_SHIFT;
+	npages = batch >> PAGE_SHIFT;
+	tot_pages = count >> PAGE_SHIFT;
 
 	lnb = kcalloc(npages, sizeof(struct niobuf_local), GFP_NOFS);
 	rnb = kcalloc(npages, sizeof(struct niobuf_remote), GFP_NOFS);
@@ -1497,9 +1497,9 @@
 		if (tot_pages < npages)
 			npages = tot_pages;
 
-		for (i = 0; i < npages; i++, off += PAGE_CACHE_SIZE) {
+		for (i = 0; i < npages; i++, off += PAGE_SIZE) {
 			rnb[i].offset = off;
-			rnb[i].len = PAGE_CACHE_SIZE;
+			rnb[i].len = PAGE_SIZE;
 			rnb[i].flags = brw_flags;
 		}
 
@@ -1878,7 +1878,7 @@
 {
 	LCONSOLE_INFO("Echo OBD driver; http://www.lustre.org/\n");
 
-	LASSERT(PAGE_CACHE_SIZE % OBD_ECHO_BLOCK_SIZE == 0);
+	LASSERT(PAGE_SIZE % OBD_ECHO_BLOCK_SIZE == 0);
 
 	return echo_client_init();
 }

diff --git a/drivers/staging/lustre/lustre/osc/lproc_osc.c b/drivers/staging/lustre/lustre/osc/lproc_osc.c
index 57c43c5..a3358c3 100644
--- a/drivers/staging/lustre/lustre/osc/lproc_osc.c
+++ b/drivers/staging/lustre/lustre/osc/lproc_osc.c

@@ -162,15 +162,15 @@
 	if (rc)
 		return rc;
 
-	pages_number *= 1 << (20 - PAGE_CACHE_SHIFT); /* MB -> pages */
+	pages_number *= 1 << (20 - PAGE_SHIFT); /* MB -> pages */
 
 	if (pages_number <= 0 ||
-	    pages_number > OSC_MAX_DIRTY_MB_MAX << (20 - PAGE_CACHE_SHIFT) ||
+	    pages_number > OSC_MAX_DIRTY_MB_MAX << (20 - PAGE_SHIFT) ||
 	    pages_number > totalram_pages / 4) /* 1/4 of RAM */
 		return -ERANGE;
 
 	client_obd_list_lock(&cli->cl_loi_list_lock);
-	cli->cl_dirty_max = (u32)(pages_number << PAGE_CACHE_SHIFT);
+	cli->cl_dirty_max = (u32)(pages_number << PAGE_SHIFT);
 	osc_wake_cache_waiters(cli);
 	client_obd_list_unlock(&cli->cl_loi_list_lock);
 
@@ -182,7 +182,7 @@
 {
 	struct obd_device *dev = m->private;
 	struct client_obd *cli = &dev->u.cli;
-	int shift = 20 - PAGE_CACHE_SHIFT;
+	int shift = 20 - PAGE_SHIFT;
 
 	seq_printf(m,
 		   "used_mb: %d\n"
@@ -211,7 +211,7 @@
 		return -EFAULT;
 	kernbuf[count] = 0;
 
-	mult = 1 << (20 - PAGE_CACHE_SHIFT);
+	mult = 1 << (20 - PAGE_SHIFT);
 	buffer += lprocfs_find_named_value(kernbuf, "used_mb:", &count) -
 		  kernbuf;
 	rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult);
@@ -569,12 +569,12 @@
 
 	/* if the max_pages is specified in bytes, convert to pages */
 	if (val >= ONE_MB_BRW_SIZE)
-		val >>= PAGE_CACHE_SHIFT;
+		val >>= PAGE_SHIFT;
 
-	chunk_mask = ~((1 << (cli->cl_chunkbits - PAGE_CACHE_SHIFT)) - 1);
+	chunk_mask = ~((1 << (cli->cl_chunkbits - PAGE_SHIFT)) - 1);
 	/* max_pages_per_rpc must be chunk aligned */
 	val = (val + ~chunk_mask) & chunk_mask;
-	if (val == 0 || val > ocd->ocd_brw_size >> PAGE_CACHE_SHIFT) {
+	if (val == 0 || val > ocd->ocd_brw_size >> PAGE_SHIFT) {
 		return -ERANGE;
 	}
 	client_obd_list_lock(&cli->cl_loi_list_lock);

diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c
index 6336311..5f25bf8 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c

@@ -544,7 +544,7 @@
 		return -ERANGE;
 
 	LASSERT(cur->oe_osclock == victim->oe_osclock);
-	ppc_bits = osc_cli(obj)->cl_chunkbits - PAGE_CACHE_SHIFT;
+	ppc_bits = osc_cli(obj)->cl_chunkbits - PAGE_SHIFT;
 	chunk_start = cur->oe_start >> ppc_bits;
 	chunk_end = cur->oe_end >> ppc_bits;
 	if (chunk_start != (victim->oe_end >> ppc_bits) + 1 &&
@@ -647,8 +647,8 @@
 	lock = cl_lock_at_pgoff(env, osc2cl(obj), index, NULL, 1, 0);
 	LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE);
 
-	LASSERT(cli->cl_chunkbits >= PAGE_CACHE_SHIFT);
-	ppc_bits = cli->cl_chunkbits - PAGE_CACHE_SHIFT;
+	LASSERT(cli->cl_chunkbits >= PAGE_SHIFT);
+	ppc_bits = cli->cl_chunkbits - PAGE_SHIFT;
 	chunk_mask = ~((1 << ppc_bits) - 1);
 	chunksize = 1 << cli->cl_chunkbits;
 	chunk = index >> ppc_bits;
@@ -871,8 +871,8 @@
 
 	if (!sent) {
 		lost_grant = ext->oe_grants;
-	} else if (blocksize < PAGE_CACHE_SIZE &&
-		   last_count != PAGE_CACHE_SIZE) {
+	} else if (blocksize < PAGE_SIZE &&
+		   last_count != PAGE_SIZE) {
 		/* For short writes we shouldn't count parts of pages that
 		 * span a whole chunk on the OST side, or our accounting goes
 		 * wrong.  Should match the code in filter_grant_check.
@@ -884,7 +884,7 @@
 		if (end)
 			count += blocksize - end;
 
-		lost_grant = PAGE_CACHE_SIZE - count;
+		lost_grant = PAGE_SIZE - count;
 	}
 	if (ext->oe_grants > 0)
 		osc_free_grant(cli, nr_pages, lost_grant);
@@ -967,7 +967,7 @@
 	struct osc_async_page *oap;
 	struct osc_async_page *tmp;
 	int pages_in_chunk = 0;
-	int ppc_bits = cli->cl_chunkbits - PAGE_CACHE_SHIFT;
+	int ppc_bits = cli->cl_chunkbits - PAGE_SHIFT;
 	__u64 trunc_chunk = trunc_index >> ppc_bits;
 	int grants = 0;
 	int nr_pages = 0;
@@ -1125,7 +1125,7 @@
 	if (!(last->oap_async_flags & ASYNC_COUNT_STABLE)) {
 		last->oap_count = osc_refresh_count(env, last, OBD_BRW_WRITE);
 		LASSERT(last->oap_count > 0);
-		LASSERT(last->oap_page_off + last->oap_count <= PAGE_CACHE_SIZE);
+		LASSERT(last->oap_page_off + last->oap_count <= PAGE_SIZE);
 		last->oap_async_flags |= ASYNC_COUNT_STABLE;
 	}
 
@@ -1134,7 +1134,7 @@
 	 */
 	list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
 		if (!(oap->oap_async_flags & ASYNC_COUNT_STABLE)) {
-			oap->oap_count = PAGE_CACHE_SIZE - oap->oap_page_off;
+			oap->oap_count = PAGE_SIZE - oap->oap_page_off;
 			oap->oap_async_flags |= ASYNC_COUNT_STABLE;
 		}
 	}
@@ -1158,7 +1158,7 @@
 	struct osc_object *obj = ext->oe_obj;
 	struct client_obd *cli = osc_cli(obj);
 	struct osc_extent *next;
-	int ppc_bits = cli->cl_chunkbits - PAGE_CACHE_SHIFT;
+	int ppc_bits = cli->cl_chunkbits - PAGE_SHIFT;
 	pgoff_t chunk = index >> ppc_bits;
 	pgoff_t end_chunk;
 	pgoff_t end_index;
@@ -1293,9 +1293,9 @@
 		return 0;
 	else if (cl_offset(obj, page->cp_index + 1) > kms)
 		/* catch sub-page write at end of file */
-		return kms % PAGE_CACHE_SIZE;
+		return kms % PAGE_SIZE;
 	else
-		return PAGE_CACHE_SIZE;
+		return PAGE_SIZE;
 }
 
 static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
@@ -1376,10 +1376,10 @@
 	assert_spin_locked(&cli->cl_loi_list_lock.lock);
 	LASSERT(!(pga->flag & OBD_BRW_FROM_GRANT));
 	atomic_inc(&obd_dirty_pages);
-	cli->cl_dirty += PAGE_CACHE_SIZE;
+	cli->cl_dirty += PAGE_SIZE;
 	pga->flag |= OBD_BRW_FROM_GRANT;
 	CDEBUG(D_CACHE, "using %lu grant credits for brw %p page %p\n",
-	       PAGE_CACHE_SIZE, pga, pga->pg);
+	       PAGE_SIZE, pga, pga->pg);
 	osc_update_next_shrink(cli);
 }
 
@@ -1396,11 +1396,11 @@
 
 	pga->flag &= ~OBD_BRW_FROM_GRANT;
 	atomic_dec(&obd_dirty_pages);
-	cli->cl_dirty -= PAGE_CACHE_SIZE;
+	cli->cl_dirty -= PAGE_SIZE;
 	if (pga->flag & OBD_BRW_NOCACHE) {
 		pga->flag &= ~OBD_BRW_NOCACHE;
 		atomic_dec(&obd_dirty_transit_pages);
-		cli->cl_dirty_transit -= PAGE_CACHE_SIZE;
+		cli->cl_dirty_transit -= PAGE_SIZE;
 	}
 }
 
@@ -1456,7 +1456,7 @@
  * used, we should return these grants to OST. There're two cases where grants
  * can be lost:
  * 1. truncate;
- * 2. blocksize at OST is less than PAGE_CACHE_SIZE and a partial page was
+ * 2. blocksize at OST is less than PAGE_SIZE and a partial page was
  *    written. In this case OST may use less chunks to serve this partial
  *    write. OSTs don't actually know the page size on the client side. so
  *    clients have to calculate lost grant by the blocksize on the OST.
@@ -1469,7 +1469,7 @@
 
 	client_obd_list_lock(&cli->cl_loi_list_lock);
 	atomic_sub(nr_pages, &obd_dirty_pages);
-	cli->cl_dirty -= nr_pages << PAGE_CACHE_SHIFT;
+	cli->cl_dirty -= nr_pages << PAGE_SHIFT;
 	cli->cl_lost_grant += lost_grant;
 	if (cli->cl_avail_grant < grant && cli->cl_lost_grant >= grant) {
 		/* borrow some grant from truncate to avoid the case that
@@ -1512,11 +1512,11 @@
 	if (rc < 0)
 		return 0;
 
-	if (cli->cl_dirty + PAGE_CACHE_SIZE <= cli->cl_dirty_max &&
+	if (cli->cl_dirty + PAGE_SIZE <= cli->cl_dirty_max &&
 	    atomic_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) {
 		osc_consume_write_grant(cli, &oap->oap_brw_page);
 		if (transient) {
-			cli->cl_dirty_transit += PAGE_CACHE_SIZE;
+			cli->cl_dirty_transit += PAGE_SIZE;
 			atomic_inc(&obd_dirty_transit_pages);
 			oap->oap_brw_flags |= OBD_BRW_NOCACHE;
 		}
@@ -1562,7 +1562,7 @@
 	 * of queued writes and create a discontiguous rpc stream
 	 */
 	if (OBD_FAIL_CHECK(OBD_FAIL_OSC_NO_GRANT) ||
-	    cli->cl_dirty_max < PAGE_CACHE_SIZE     ||
+	    cli->cl_dirty_max < PAGE_SIZE     ||
 	    cli->cl_ar.ar_force_sync || loi->loi_ar.ar_force_sync) {
 		rc = -EDQUOT;
 		goto out;
@@ -1632,7 +1632,7 @@
 
 		ocw->ocw_rc = -EDQUOT;
 		/* we can't dirty more */
-		if ((cli->cl_dirty + PAGE_CACHE_SIZE > cli->cl_dirty_max) ||
+		if ((cli->cl_dirty + PAGE_SIZE > cli->cl_dirty_max) ||
 		    (atomic_read(&obd_dirty_pages) + 1 >
 		     obd_max_dirty_pages)) {
 			CDEBUG(D_CACHE, "no dirty room: dirty: %ld osc max %ld, sys max %d\n",

diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c
index d720b1a..ce9ddd5 100644
--- a/drivers/staging/lustre/lustre/osc/osc_page.c
+++ b/drivers/staging/lustre/lustre/osc/osc_page.c

@@ -410,7 +410,7 @@
 	int result;
 
 	opg->ops_from = 0;
-	opg->ops_to = PAGE_CACHE_SIZE;
+	opg->ops_to = PAGE_SIZE;
 
 	result = osc_prep_async_page(osc, opg, vmpage,
 				     cl_offset(obj, page->cp_index));
@@ -487,9 +487,9 @@
 /* LRU pages are freed in batch mode. OSC should at least free this
  * number of pages to avoid running out of LRU budget, and..
  */
-static const int lru_shrink_min = 2 << (20 - PAGE_CACHE_SHIFT);  /* 2M */
+static const int lru_shrink_min = 2 << (20 - PAGE_SHIFT);  /* 2M */
 /* free this number at most otherwise it will take too long time to finish. */
-static const int lru_shrink_max = 32 << (20 - PAGE_CACHE_SHIFT); /* 32M */
+static const int lru_shrink_max = 32 << (20 - PAGE_SHIFT); /* 32M */
 
 /* Check if we can free LRU slots from this OSC. If there exists LRU waiters,
  * we should free slots aggressively. In this way, slots are freed in a steady

diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c
index 74805f1..30526eb 100644
--- a/drivers/staging/lustre/lustre/osc/osc_request.c
+++ b/drivers/staging/lustre/lustre/osc/osc_request.c

@@ -826,7 +826,7 @@
 		oa->o_undirty = 0;
 	} else {
 		long max_in_flight = (cli->cl_max_pages_per_rpc <<
-				      PAGE_CACHE_SHIFT)*
+				      PAGE_SHIFT)*
 				     (cli->cl_max_rpcs_in_flight + 1);
 		oa->o_undirty = max(cli->cl_dirty_max, max_in_flight);
 	}
@@ -909,11 +909,11 @@
 static int osc_shrink_grant(struct client_obd *cli)
 {
 	__u64 target_bytes = (cli->cl_max_rpcs_in_flight + 1) *
-			     (cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT);
+			     (cli->cl_max_pages_per_rpc << PAGE_SHIFT);
 
 	client_obd_list_lock(&cli->cl_loi_list_lock);
 	if (cli->cl_avail_grant <= target_bytes)
-		target_bytes = cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT;
+		target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT;
 	client_obd_list_unlock(&cli->cl_loi_list_lock);
 
 	return osc_shrink_grant_to_target(cli, target_bytes);
@@ -929,8 +929,8 @@
 	 * We don't want to shrink below a single RPC, as that will negatively
 	 * impact block allocation and long-term performance.
 	 */
-	if (target_bytes < cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT)
-		target_bytes = cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT;
+	if (target_bytes < cli->cl_max_pages_per_rpc << PAGE_SHIFT)
+		target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT;
 
 	if (target_bytes >= cli->cl_avail_grant) {
 		client_obd_list_unlock(&cli->cl_loi_list_lock);
@@ -978,7 +978,7 @@
 		 * cli_brw_size(obd->u.cli.cl_import->imp_obd->obd_self_export)
 		 * Keep comment here so that it can be found by searching.
 		 */
-		int brw_size = client->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT;
+		int brw_size = client->cl_max_pages_per_rpc << PAGE_SHIFT;
 
 		if (client->cl_import->imp_state == LUSTRE_IMP_FULL &&
 		    client->cl_avail_grant > brw_size)
@@ -1052,7 +1052,7 @@
 	}
 
 	/* determine the appropriate chunk size used by osc_extent. */
-	cli->cl_chunkbits = max_t(int, PAGE_CACHE_SHIFT, ocd->ocd_blocksize);
+	cli->cl_chunkbits = max_t(int, PAGE_SHIFT, ocd->ocd_blocksize);
 	client_obd_list_unlock(&cli->cl_loi_list_lock);
 
 	CDEBUG(D_CACHE, "%s, setting cl_avail_grant: %ld cl_lost_grant: %ld chunk bits: %d\n",
@@ -1317,9 +1317,9 @@
 		LASSERT(pg->count > 0);
 		/* make sure there is no gap in the middle of page array */
 		LASSERTF(page_count == 1 ||
-			 (ergo(i == 0, poff + pg->count == PAGE_CACHE_SIZE) &&
+			 (ergo(i == 0, poff + pg->count == PAGE_SIZE) &&
 			  ergo(i > 0 && i < page_count - 1,
-			       poff == 0 && pg->count == PAGE_CACHE_SIZE)   &&
+			       poff == 0 && pg->count == PAGE_SIZE)   &&
 			  ergo(i == page_count - 1, poff == 0)),
 			 "i: %d/%d pg: %p off: %llu, count: %u\n",
 			 i, page_count, pg, pg->off, pg->count);
@@ -1877,7 +1877,7 @@
 						oap->oap_count;
 			else
 				LASSERT(oap->oap_page_off + oap->oap_count ==
-					PAGE_CACHE_SIZE);
+					PAGE_SIZE);
 		}
 	}
 
@@ -1993,7 +1993,7 @@
 		tmp->oap_request = ptlrpc_request_addref(req);
 
 	client_obd_list_lock(&cli->cl_loi_list_lock);
-	starting_offset >>= PAGE_CACHE_SHIFT;
+	starting_offset >>= PAGE_SHIFT;
 	if (cmd == OBD_BRW_READ) {
 		cli->cl_r_in_flight++;
 		lprocfs_oh_tally_log2(&cli->cl_read_page_hist, page_count);
@@ -2790,12 +2790,12 @@
 						CFS_PAGE_MASK;
 
 		if (OBD_OBJECT_EOF - fm_key->fiemap.fm_length <=
-		    fm_key->fiemap.fm_start + PAGE_CACHE_SIZE - 1)
+		    fm_key->fiemap.fm_start + PAGE_SIZE - 1)
 			policy.l_extent.end = OBD_OBJECT_EOF;
 		else
 			policy.l_extent.end = (fm_key->fiemap.fm_start +
 				fm_key->fiemap.fm_length +
-				PAGE_CACHE_SIZE - 1) & CFS_PAGE_MASK;
+				PAGE_SIZE - 1) & CFS_PAGE_MASK;
 
 		ostid_build_res_name(&fm_key->oa.o_oi, &res_id);
 		mode = ldlm_lock_match(exp->exp_obd->obd_namespace,

diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c
index 1b7673e..cf3ac8e 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/client.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/client.c

@@ -174,12 +174,12 @@
 	LASSERT(page);
 	LASSERT(pageoffset >= 0);
 	LASSERT(len > 0);
-	LASSERT(pageoffset + len <= PAGE_CACHE_SIZE);
+	LASSERT(pageoffset + len <= PAGE_SIZE);
 
 	desc->bd_nob += len;
 
 	if (pin)
-		page_cache_get(page);
+		get_page(page);
 
 	ptlrpc_add_bulk_page(desc, page, pageoffset, len);
 }
@@ -206,7 +206,7 @@
 
 	if (unpin) {
 		for (i = 0; i < desc->bd_iov_count; i++)
-			page_cache_release(desc->bd_iov[i].kiov_page);
+			put_page(desc->bd_iov[i].kiov_page);
 	}
 
 	kfree(desc);

diff --git a/drivers/staging/lustre/lustre/ptlrpc/import.c b/drivers/staging/lustre/lustre/ptlrpc/import.c
index b4eddf2..cd94fed 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/import.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/import.c

@@ -1092,7 +1092,7 @@
 
 		if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
 			cli->cl_max_pages_per_rpc =
-				min(ocd->ocd_brw_size >> PAGE_CACHE_SHIFT,
+				min(ocd->ocd_brw_size >> PAGE_SHIFT,
 				    cli->cl_max_pages_per_rpc);
 		else if (imp->imp_connect_op == MDS_CONNECT ||
 			 imp->imp_connect_op == MGS_CONNECT)

diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
index cee04ef..c95a91c 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c

@@ -308,7 +308,7 @@
 	 * hose a kernel by allowing the request history to grow too
 	 * far.
 	 */
-	bufpages = (svc->srv_buf_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	bufpages = (svc->srv_buf_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	if (val > totalram_pages / (2 * bufpages))
 		return -ERANGE;
 
@@ -1226,7 +1226,7 @@
 	const char prefix[] = "connection=";
 	const int prefix_len = sizeof(prefix) - 1;
 
-	if (count > PAGE_CACHE_SIZE - 1 || count <= prefix_len)
+	if (count > PAGE_SIZE - 1 || count <= prefix_len)
 		return -EINVAL;
 
 	kbuf = kzalloc(count + 1, GFP_NOFS);

diff --git a/drivers/staging/lustre/lustre/ptlrpc/recover.c b/drivers/staging/lustre/lustre/ptlrpc/recover.c
index 5f27d9c..30d9a16 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/recover.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/recover.c

@@ -195,7 +195,7 @@
 	}
 
 	list_for_each_entry_safe(req, next, &imp->imp_sending_list, rq_list) {
-		LASSERTF((long)req > PAGE_CACHE_SIZE && req != LP_POISON,
+		LASSERTF((long)req > PAGE_SIZE && req != LP_POISON,
 			 "req %p bad\n", req);
 		LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
 		if (!ptlrpc_no_resend(req))

diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
index 72d5b9b..d3872b8 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c

@@ -58,7 +58,7 @@
  * bulk encryption page pools	   *
  ****************************************/
 
-#define POINTERS_PER_PAGE	(PAGE_CACHE_SIZE / sizeof(void *))
+#define POINTERS_PER_PAGE	(PAGE_SIZE / sizeof(void *))
 #define PAGES_PER_POOL		(POINTERS_PER_PAGE)
 
 #define IDLE_IDX_MAX	 (100)

diff --git a/drivers/staging/most/hdm-dim2/dim2_hdm.c b/drivers/staging/most/hdm-dim2/dim2_hdm.c
index 0dc86ad..a364495 100644
--- a/drivers/staging/most/hdm-dim2/dim2_hdm.c
+++ b/drivers/staging/most/hdm-dim2/dim2_hdm.c

@@ -771,7 +771,7 @@
 	dev->netinfo_task = kthread_run(&deliver_netinfo_thread, (void *)dev,
 					"dim2_netinfo");
 	if (IS_ERR(dev->netinfo_task))
-		ret = PTR_ERR(dev->netinfo_task);
+		return PTR_ERR(dev->netinfo_task);
 
 	for (i = 0; i < DMA_CHANNELS; i++) {
 		struct most_channel_capability *cap = dev->capabilities + i;

diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c
index 9d47c5d..163f21a 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.c
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c

@@ -49,7 +49,6 @@
 		17, 18, 19, 20, 21, 22,
 		33, 34, 35, 36, 37, 38,
 		49, 50, 51, 52, 53, 54, },
-	.oobavail = 32,
 	.oobfree = {
 		{.offset = 8,
 			.length = 8},

diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.h b/drivers/staging/mt29f_spinand/mt29f_spinand.h
index ae62975..457dc7f 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.h
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.h

@@ -78,7 +78,6 @@
 #define BL_ALL_UNLOCKED    0
 
 struct spinand_info {
-	struct nand_ecclayout *ecclayout;
 	struct spi_device *spi;
 	void *priv;
 };

diff --git a/drivers/staging/rdma/hfi1/Kconfig b/drivers/staging/rdma/hfi1/Kconfig
index fd25078..3e668d8 100644
--- a/drivers/staging/rdma/hfi1/Kconfig
+++ b/drivers/staging/rdma/hfi1/Kconfig

@@ -1,6 +1,7 @@
 config INFINIBAND_HFI1
 	tristate "Intel OPA Gen1 support"
-	depends on X86_64
+	depends on X86_64 && INFINIBAND_RDMAVT
+	select MMU_NOTIFIER
 	default m
 	---help---
 	This is a low-level driver for Intel OPA Gen1 adapter.
@@ -25,13 +26,3 @@
 	---help---
 	This is a configuration flag to enable verbose
 	SDMA debug
-config PRESCAN_RXQ
-	bool "Enable prescanning of the RX queue for ECNs"
-	depends on INFINIBAND_HFI1
-	default n
-	---help---
-	This option toggles the prescanning of the receive queue for
-	Explicit Congestion Notifications. If an ECN is detected, it
-	is processed as quickly as possible, the ECN is toggled off.
-	After the prescanning step, the receive queue is processed as
-	usual.

diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/staging/rdma/hfi1/Makefile
index 68c5a31..8dc5938 100644
--- a/drivers/staging/rdma/hfi1/Makefile
+++ b/drivers/staging/rdma/hfi1/Makefile

@@ -7,10 +7,12 @@
 #
 obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
 
-hfi1-y := chip.o cq.o device.o diag.o dma.o driver.o efivar.o eprom.o file_ops.o firmware.o \
-	init.o intr.o keys.o mad.o mmap.o mr.o pcie.o pio.o pio_copy.o \
-	qp.o qsfp.o rc.o ruc.o sdma.o srq.o sysfs.o trace.o twsi.o \
-	uc.o ud.o user_pages.o user_sdma.o verbs_mcast.o verbs.o
+hfi1-y := affinity.o chip.o device.o diag.o driver.o efivar.o \
+	eprom.o file_ops.o firmware.o \
+	init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
+	qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \
+	uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
+	verbs_txreq.o
 hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
 
 CFLAGS_trace.o = -I$(src)

diff --git a/drivers/staging/rdma/hfi1/affinity.c b/drivers/staging/rdma/hfi1/affinity.c
new file mode 100644
index 0000000..2cb8ca7
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/affinity.c

@@ -0,0 +1,430 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <linux/topology.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+
+#include "hfi.h"
+#include "affinity.h"
+#include "sdma.h"
+#include "trace.h"
+
+struct cpu_mask_set {
+	struct cpumask mask;
+	struct cpumask used;
+	uint gen;
+};
+
+struct hfi1_affinity {
+	struct cpu_mask_set def_intr;
+	struct cpu_mask_set rcv_intr;
+	struct cpu_mask_set proc;
+	/* spin lock to protect affinity struct */
+	spinlock_t lock;
+};
+
+/* Name of IRQ types, indexed by enum irq_type */
+static const char * const irq_type_names[] = {
+	"SDMA",
+	"RCVCTXT",
+	"GENERAL",
+	"OTHER",
+};
+
+static inline void init_cpu_mask_set(struct cpu_mask_set *set)
+{
+	cpumask_clear(&set->mask);
+	cpumask_clear(&set->used);
+	set->gen = 0;
+}
+
+/*
+ * Interrupt affinity.
+ *
+ * non-rcv avail gets a default mask that
+ * starts as possible cpus with threads reset
+ * and each rcv avail reset.
+ *
+ * rcv avail gets node relative 1 wrapping back
+ * to the node relative 1 as necessary.
+ *
+ */
+int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
+{
+	int node = pcibus_to_node(dd->pcidev->bus);
+	struct hfi1_affinity *info;
+	const struct cpumask *local_mask;
+	int curr_cpu, possible, i, ht;
+
+	if (node < 0)
+		node = numa_node_id();
+	dd->node = node;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+	spin_lock_init(&info->lock);
+
+	init_cpu_mask_set(&info->def_intr);
+	init_cpu_mask_set(&info->rcv_intr);
+	init_cpu_mask_set(&info->proc);
+
+	local_mask = cpumask_of_node(dd->node);
+	if (cpumask_first(local_mask) >= nr_cpu_ids)
+		local_mask = topology_core_cpumask(0);
+	/* use local mask as default */
+	cpumask_copy(&info->def_intr.mask, local_mask);
+	/*
+	 * Remove HT cores from the default mask.  Do this in two steps below.
+	 */
+	possible = cpumask_weight(&info->def_intr.mask);
+	ht = cpumask_weight(topology_sibling_cpumask(
+					cpumask_first(&info->def_intr.mask)));
+	/*
+	 * Step 1.  Skip over the first N HT siblings and use them as the
+	 * "real" cores.  Assumes that HT cores are not enumerated in
+	 * succession (except in the single core case).
+	 */
+	curr_cpu = cpumask_first(&info->def_intr.mask);
+	for (i = 0; i < possible / ht; i++)
+		curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
+	/*
+	 * Step 2.  Remove the remaining HT siblings.  Use cpumask_next() to
+	 * skip any gaps.
+	 */
+	for (; i < possible; i++) {
+		cpumask_clear_cpu(curr_cpu, &info->def_intr.mask);
+		curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
+	}
+
+	/*  fill in the receive list */
+	possible = cpumask_weight(&info->def_intr.mask);
+	curr_cpu = cpumask_first(&info->def_intr.mask);
+	if (possible == 1) {
+		/*  only one CPU, everyone will use it */
+		cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask);
+	} else {
+		/*
+		 * Retain the first CPU in the default list for the control
+		 * context.
+		 */
+		curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
+		/*
+		 * Remove the remaining kernel receive queues from
+		 * the default list and add them to the receive list.
+		 */
+		for (i = 0; i < dd->n_krcv_queues - 1; i++) {
+			cpumask_clear_cpu(curr_cpu, &info->def_intr.mask);
+			cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask);
+			curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
+			if (curr_cpu >= nr_cpu_ids)
+				break;
+		}
+	}
+
+	cpumask_copy(&info->proc.mask, cpu_online_mask);
+	dd->affinity = info;
+	return 0;
+}
+
+void hfi1_dev_affinity_free(struct hfi1_devdata *dd)
+{
+	kfree(dd->affinity);
+}
+
+int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
+{
+	int ret;
+	cpumask_var_t diff;
+	struct cpu_mask_set *set;
+	struct sdma_engine *sde = NULL;
+	struct hfi1_ctxtdata *rcd = NULL;
+	char extra[64];
+	int cpu = -1;
+
+	extra[0] = '\0';
+	cpumask_clear(&msix->mask);
+
+	ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
+	if (!ret)
+		return -ENOMEM;
+
+	switch (msix->type) {
+	case IRQ_SDMA:
+		sde = (struct sdma_engine *)msix->arg;
+		scnprintf(extra, 64, "engine %u", sde->this_idx);
+		/* fall through */
+	case IRQ_GENERAL:
+		set = &dd->affinity->def_intr;
+		break;
+	case IRQ_RCVCTXT:
+		rcd = (struct hfi1_ctxtdata *)msix->arg;
+		if (rcd->ctxt == HFI1_CTRL_CTXT) {
+			set = &dd->affinity->def_intr;
+			cpu = cpumask_first(&set->mask);
+		} else {
+			set = &dd->affinity->rcv_intr;
+		}
+		scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
+		break;
+	default:
+		dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type);
+		return -EINVAL;
+	}
+
+	/*
+	 * The control receive context is placed on a particular CPU, which
+	 * is set above.  Skip accounting for it.  Everything else finds its
+	 * CPU here.
+	 */
+	if (cpu == -1) {
+		spin_lock(&dd->affinity->lock);
+		if (cpumask_equal(&set->mask, &set->used)) {
+			/*
+			 * We've used up all the CPUs, bump up the generation
+			 * and reset the 'used' map
+			 */
+			set->gen++;
+			cpumask_clear(&set->used);
+		}
+		cpumask_andnot(diff, &set->mask, &set->used);
+		cpu = cpumask_first(diff);
+		cpumask_set_cpu(cpu, &set->used);
+		spin_unlock(&dd->affinity->lock);
+	}
+
+	switch (msix->type) {
+	case IRQ_SDMA:
+		sde->cpu = cpu;
+		break;
+	case IRQ_GENERAL:
+	case IRQ_RCVCTXT:
+	case IRQ_OTHER:
+		break;
+	}
+
+	cpumask_set_cpu(cpu, &msix->mask);
+	dd_dev_info(dd, "IRQ vector: %u, type %s %s -> cpu: %d\n",
+		    msix->msix.vector, irq_type_names[msix->type],
+		    extra, cpu);
+	irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+
+	free_cpumask_var(diff);
+	return 0;
+}
+
+void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
+			   struct hfi1_msix_entry *msix)
+{
+	struct cpu_mask_set *set = NULL;
+	struct hfi1_ctxtdata *rcd;
+
+	switch (msix->type) {
+	case IRQ_SDMA:
+	case IRQ_GENERAL:
+		set = &dd->affinity->def_intr;
+		break;
+	case IRQ_RCVCTXT:
+		rcd = (struct hfi1_ctxtdata *)msix->arg;
+		/* only do accounting for non control contexts */
+		if (rcd->ctxt != HFI1_CTRL_CTXT)
+			set = &dd->affinity->rcv_intr;
+		break;
+	default:
+		return;
+	}
+
+	if (set) {
+		spin_lock(&dd->affinity->lock);
+		cpumask_andnot(&set->used, &set->used, &msix->mask);
+		if (cpumask_empty(&set->used) && set->gen) {
+			set->gen--;
+			cpumask_copy(&set->used, &set->mask);
+		}
+		spin_unlock(&dd->affinity->lock);
+	}
+
+	irq_set_affinity_hint(msix->msix.vector, NULL);
+	cpumask_clear(&msix->mask);
+}
+
+int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
+{
+	int cpu = -1, ret;
+	cpumask_var_t diff, mask, intrs;
+	const struct cpumask *node_mask,
+		*proc_mask = tsk_cpus_allowed(current);
+	struct cpu_mask_set *set = &dd->affinity->proc;
+	char buf[1024];
+
+	/*
+	 * check whether process/context affinity has already
+	 * been set
+	 */
+	if (cpumask_weight(proc_mask) == 1) {
+		scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(proc_mask));
+		hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %s",
+			  current->pid, current->comm, buf);
+		/*
+		 * Mark the pre-set CPU as used. This is atomic so we don't
+		 * need the lock
+		 */
+		cpu = cpumask_first(proc_mask);
+		cpumask_set_cpu(cpu, &set->used);
+		goto done;
+	} else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) {
+		scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(proc_mask));
+		hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %s",
+			  current->pid, current->comm, buf);
+		goto done;
+	}
+
+	/*
+	 * The process does not have a preset CPU affinity so find one to
+	 * recommend. We prefer CPUs on the same NUMA as the device.
+	 */
+
+	ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
+	if (!ret)
+		goto done;
+	ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
+	if (!ret)
+		goto free_diff;
+	ret = zalloc_cpumask_var(&intrs, GFP_KERNEL);
+	if (!ret)
+		goto free_mask;
+
+	spin_lock(&dd->affinity->lock);
+	/*
+	 * If we've used all available CPUs, clear the mask and start
+	 * overloading.
+	 */
+	if (cpumask_equal(&set->mask, &set->used)) {
+		set->gen++;
+		cpumask_clear(&set->used);
+	}
+
+	/* CPUs used by interrupt handlers */
+	cpumask_copy(intrs, (dd->affinity->def_intr.gen ?
+			     &dd->affinity->def_intr.mask :
+			     &dd->affinity->def_intr.used));
+	cpumask_or(intrs, intrs, (dd->affinity->rcv_intr.gen ?
+				  &dd->affinity->rcv_intr.mask :
+				  &dd->affinity->rcv_intr.used));
+	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(intrs));
+	hfi1_cdbg(PROC, "CPUs used by interrupts: %s", buf);
+
+	/*
+	 * If we don't have a NUMA node requested, preference is towards
+	 * device NUMA node
+	 */
+	if (node == -1)
+		node = dd->node;
+	node_mask = cpumask_of_node(node);
+	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(node_mask));
+	hfi1_cdbg(PROC, "device on NUMA %u, CPUs %s", node, buf);
+
+	/* diff will hold all unused cpus */
+	cpumask_andnot(diff, &set->mask, &set->used);
+	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(diff));
+	hfi1_cdbg(PROC, "unused CPUs (all) %s", buf);
+
+	/* get cpumask of available CPUs on preferred NUMA */
+	cpumask_and(mask, diff, node_mask);
+	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(mask));
+	hfi1_cdbg(PROC, "available cpus on NUMA %s", buf);
+
+	/*
+	 * At first, we don't want to place processes on the same
+	 * CPUs as interrupt handlers.
+	 */
+	cpumask_andnot(diff, mask, intrs);
+	if (!cpumask_empty(diff))
+		cpumask_copy(mask, diff);
+
+	/*
+	 * if we don't have a cpu on the preferred NUMA, get
+	 * the list of the remaining available CPUs
+	 */
+	if (cpumask_empty(mask)) {
+		cpumask_andnot(diff, &set->mask, &set->used);
+		cpumask_andnot(mask, diff, node_mask);
+	}
+	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(mask));
+	hfi1_cdbg(PROC, "possible CPUs for process %s", buf);
+
+	cpu = cpumask_first(mask);
+	if (cpu >= nr_cpu_ids) /* empty */
+		cpu = -1;
+	else
+		cpumask_set_cpu(cpu, &set->used);
+	spin_unlock(&dd->affinity->lock);
+
+	free_cpumask_var(intrs);
+free_mask:
+	free_cpumask_var(mask);
+free_diff:
+	free_cpumask_var(diff);
+done:
+	return cpu;
+}
+
+void hfi1_put_proc_affinity(struct hfi1_devdata *dd, int cpu)
+{
+	struct cpu_mask_set *set = &dd->affinity->proc;
+
+	if (cpu < 0)
+		return;
+	spin_lock(&dd->affinity->lock);
+	cpumask_clear_cpu(cpu, &set->used);
+	if (cpumask_empty(&set->used) && set->gen) {
+		set->gen--;
+		cpumask_copy(&set->used, &set->mask);
+	}
+	spin_unlock(&dd->affinity->lock);
+}
+

diff --git a/drivers/staging/rdma/hfi1/affinity.h b/drivers/staging/rdma/hfi1/affinity.h
new file mode 100644
index 0000000..b287e49
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/affinity.h

@@ -0,0 +1,91 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef _HFI1_AFFINITY_H
+#define _HFI1_AFFINITY_H
+
+#include "hfi.h"
+
+enum irq_type {
+	IRQ_SDMA,
+	IRQ_RCVCTXT,
+	IRQ_GENERAL,
+	IRQ_OTHER
+};
+
+/* Can be used for both memory and cpu */
+enum affinity_flags {
+	AFF_AUTO,
+	AFF_NUMA_LOCAL,
+	AFF_DEV_LOCAL,
+	AFF_IRQ_LOCAL
+};
+
+struct hfi1_msix_entry;
+
+/* Initialize driver affinity data */
+int hfi1_dev_affinity_init(struct hfi1_devdata *);
+/* Free driver affinity data */
+void hfi1_dev_affinity_free(struct hfi1_devdata *);
+/*
+ * Set IRQ affinity to a CPU. The function will determine the
+ * CPU and set the affinity to it.
+ */
+int hfi1_get_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *);
+/*
+ * Remove the IRQ's CPU affinity. This function also updates
+ * any internal CPU tracking data
+ */
+void hfi1_put_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *);
+/*
+ * Determine a CPU affinity for a user process, if the process does not
+ * have an affinity set yet.
+ */
+int hfi1_get_proc_affinity(struct hfi1_devdata *, int);
+/* Release a CPU used by a user process. */
+void hfi1_put_proc_affinity(struct hfi1_devdata *, int);
+
+#endif /* _HFI1_AFFINITY_H */

diff --git a/drivers/staging/rdma/hfi1/aspm.h b/drivers/staging/rdma/hfi1/aspm.h
new file mode 100644
index 0000000..0d58fe3
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/aspm.h

@@ -0,0 +1,309 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef _ASPM_H
+#define _ASPM_H
+
+#include "hfi.h"
+
+extern uint aspm_mode;
+
+enum aspm_mode {
+	ASPM_MODE_DISABLED = 0,	/* ASPM always disabled, performance mode */
+	ASPM_MODE_ENABLED = 1,	/* ASPM always enabled, power saving mode */
+	ASPM_MODE_DYNAMIC = 2,	/* ASPM enabled/disabled dynamically */
+};
+
+/* Time after which the timer interrupt will re-enable ASPM */
+#define ASPM_TIMER_MS 1000
+/* Time for which interrupts are ignored after a timer has been scheduled */
+#define ASPM_RESCHED_TIMER_MS (ASPM_TIMER_MS / 2)
+/* Two interrupts within this time trigger ASPM disable */
+#define ASPM_TRIGGER_MS 1
+#define ASPM_TRIGGER_NS (ASPM_TRIGGER_MS * 1000 * 1000ull)
+#define ASPM_L1_SUPPORTED(reg) \
+	(((reg & PCI_EXP_LNKCAP_ASPMS) >> 10) & 0x2)
+
+static inline bool aspm_hw_l1_supported(struct hfi1_devdata *dd)
+{
+	struct pci_dev *parent = dd->pcidev->bus->self;
+	u32 up, dn;
+
+	/*
+	 * If the driver does not have access to the upstream component,
+	 * it cannot support ASPM L1 at all.
+	 */
+	if (!parent)
+		return false;
+
+	pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &dn);
+	dn = ASPM_L1_SUPPORTED(dn);
+
+	pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &up);
+	up = ASPM_L1_SUPPORTED(up);
+
+	/* ASPM works on A-step but is reported as not supported */
+	return (!!dn || is_ax(dd)) && !!up;
+}
+
+/* Set L1 entrance latency for slower entry to L1 */
+static inline void aspm_hw_set_l1_ent_latency(struct hfi1_devdata *dd)
+{
+	u32 l1_ent_lat = 0x4u;
+	u32 reg32;
+
+	pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, &reg32);
+	reg32 &= ~PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SMASK;
+	reg32 |= l1_ent_lat << PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SHIFT;
+	pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, reg32);
+}
+
+static inline void aspm_hw_enable_l1(struct hfi1_devdata *dd)
+{
+	struct pci_dev *parent = dd->pcidev->bus->self;
+
+	/*
+	 * If the driver does not have access to the upstream component,
+	 * it cannot support ASPM L1 at all.
+	 */
+	if (!parent)
+		return;
+
+	/* Enable ASPM L1 first in upstream component and then downstream */
+	pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
+					   PCI_EXP_LNKCTL_ASPMC,
+					   PCI_EXP_LNKCTL_ASPM_L1);
+	pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL,
+					   PCI_EXP_LNKCTL_ASPMC,
+					   PCI_EXP_LNKCTL_ASPM_L1);
+}
+
+static inline void aspm_hw_disable_l1(struct hfi1_devdata *dd)
+{
+	struct pci_dev *parent = dd->pcidev->bus->self;
+
+	/* Disable ASPM L1 first in downstream component and then upstream */
+	pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL,
+					   PCI_EXP_LNKCTL_ASPMC, 0x0);
+	if (parent)
+		pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
+						   PCI_EXP_LNKCTL_ASPMC, 0x0);
+}
+
+static inline void aspm_enable(struct hfi1_devdata *dd)
+{
+	if (dd->aspm_enabled || aspm_mode == ASPM_MODE_DISABLED ||
+	    !dd->aspm_supported)
+		return;
+
+	aspm_hw_enable_l1(dd);
+	dd->aspm_enabled = true;
+}
+
+static inline void aspm_disable(struct hfi1_devdata *dd)
+{
+	if (!dd->aspm_enabled || aspm_mode == ASPM_MODE_ENABLED)
+		return;
+
+	aspm_hw_disable_l1(dd);
+	dd->aspm_enabled = false;
+}
+
+static inline void aspm_disable_inc(struct hfi1_devdata *dd)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dd->aspm_lock, flags);
+	aspm_disable(dd);
+	atomic_inc(&dd->aspm_disabled_cnt);
+	spin_unlock_irqrestore(&dd->aspm_lock, flags);
+}
+
+static inline void aspm_enable_dec(struct hfi1_devdata *dd)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dd->aspm_lock, flags);
+	if (atomic_dec_and_test(&dd->aspm_disabled_cnt))
+		aspm_enable(dd);
+	spin_unlock_irqrestore(&dd->aspm_lock, flags);
+}
+
+/* ASPM processing for each receive context interrupt */
+static inline void aspm_ctx_disable(struct hfi1_ctxtdata *rcd)
+{
+	bool restart_timer;
+	bool close_interrupts;
+	unsigned long flags;
+	ktime_t now, prev;
+
+	/* Quickest exit for minimum impact */
+	if (!rcd->aspm_intr_supported)
+		return;
+
+	spin_lock_irqsave(&rcd->aspm_lock, flags);
+	/* PSM contexts are open */
+	if (!rcd->aspm_intr_enable)
+		goto unlock;
+
+	prev = rcd->aspm_ts_last_intr;
+	now = ktime_get();
+	rcd->aspm_ts_last_intr = now;
+
+	/* An interrupt pair close together in time */
+	close_interrupts = ktime_to_ns(ktime_sub(now, prev)) < ASPM_TRIGGER_NS;
+
+	/* Don't push out our timer till this much time has elapsed */
+	restart_timer = ktime_to_ns(ktime_sub(now, rcd->aspm_ts_timer_sched)) >
+				    ASPM_RESCHED_TIMER_MS * NSEC_PER_MSEC;
+	restart_timer = restart_timer && close_interrupts;
+
+	/* Disable ASPM and schedule timer */
+	if (rcd->aspm_enabled && close_interrupts) {
+		aspm_disable_inc(rcd->dd);
+		rcd->aspm_enabled = false;
+		restart_timer = true;
+	}
+
+	if (restart_timer) {
+		mod_timer(&rcd->aspm_timer,
+			  jiffies + msecs_to_jiffies(ASPM_TIMER_MS));
+		rcd->aspm_ts_timer_sched = now;
+	}
+unlock:
+	spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+}
+
+/* Timer function for re-enabling ASPM in the absence of interrupt activity */
+static inline void aspm_ctx_timer_function(unsigned long data)
+{
+	struct hfi1_ctxtdata *rcd = (struct hfi1_ctxtdata *)data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rcd->aspm_lock, flags);
+	aspm_enable_dec(rcd->dd);
+	rcd->aspm_enabled = true;
+	spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+}
+
+/* Disable interrupt processing for verbs contexts when PSM contexts are open */
+static inline void aspm_disable_all(struct hfi1_devdata *dd)
+{
+	struct hfi1_ctxtdata *rcd;
+	unsigned long flags;
+	unsigned i;
+
+	for (i = 0; i < dd->first_user_ctxt; i++) {
+		rcd = dd->rcd[i];
+		del_timer_sync(&rcd->aspm_timer);
+		spin_lock_irqsave(&rcd->aspm_lock, flags);
+		rcd->aspm_intr_enable = false;
+		spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+	}
+
+	aspm_disable(dd);
+	atomic_set(&dd->aspm_disabled_cnt, 0);
+}
+
+/* Re-enable interrupt processing for verbs contexts */
+static inline void aspm_enable_all(struct hfi1_devdata *dd)
+{
+	struct hfi1_ctxtdata *rcd;
+	unsigned long flags;
+	unsigned i;
+
+	aspm_enable(dd);
+
+	if (aspm_mode != ASPM_MODE_DYNAMIC)
+		return;
+
+	for (i = 0; i < dd->first_user_ctxt; i++) {
+		rcd = dd->rcd[i];
+		spin_lock_irqsave(&rcd->aspm_lock, flags);
+		rcd->aspm_intr_enable = true;
+		rcd->aspm_enabled = true;
+		spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+	}
+}
+
+static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
+{
+	spin_lock_init(&rcd->aspm_lock);
+	setup_timer(&rcd->aspm_timer, aspm_ctx_timer_function,
+		    (unsigned long)rcd);
+	rcd->aspm_intr_supported = rcd->dd->aspm_supported &&
+		aspm_mode == ASPM_MODE_DYNAMIC &&
+		rcd->ctxt < rcd->dd->first_user_ctxt;
+}
+
+static inline void aspm_init(struct hfi1_devdata *dd)
+{
+	unsigned i;
+
+	spin_lock_init(&dd->aspm_lock);
+	dd->aspm_supported = aspm_hw_l1_supported(dd);
+
+	for (i = 0; i < dd->first_user_ctxt; i++)
+		aspm_ctx_init(dd->rcd[i]);
+
+	/* Start with ASPM disabled */
+	aspm_hw_set_l1_ent_latency(dd);
+	dd->aspm_enabled = false;
+	aspm_hw_disable_l1(dd);
+
+	/* Now turn on ASPM if configured */
+	aspm_enable_all(dd);
+}
+
+static inline void aspm_exit(struct hfi1_devdata *dd)
+{
+	aspm_disable_all(dd);
+
+	/* Turn on ASPM on exit to conserve power */
+	aspm_enable(dd);
+}
+
+#endif /* _ASPM_H */

diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c
index 46a1830..16eb653 100644
--- a/drivers/staging/rdma/hfi1/chip.c
+++ b/drivers/staging/rdma/hfi1/chip.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -64,6 +61,8 @@
 #include "sdma.h"
 #include "eprom.h"
 #include "efivar.h"
+#include "platform.h"
+#include "aspm.h"
 
 #define NUM_IB_PORTS 1
 
@@ -420,10 +419,10 @@
 	SEC_SPC_FREEZE,
 	SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK),
 /*23*/	FLAG_ENTRY("PioWriteQwValidParity",
-	SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
+	SEC_WRITE_DROPPED | SEC_SPC_FREEZE,
 	SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK),
 /*24*/	FLAG_ENTRY("PioBlockQwCountParity",
-	SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
+	SEC_WRITE_DROPPED | SEC_SPC_FREEZE,
 	SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK),
 /*25*/	FLAG_ENTRY("PioVlfVlLenParity",
 	SEC_SPC_FREEZE,
@@ -509,6 +508,12 @@
 		| SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK \
 		| SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK)
 
+/* SendEgressErrInfo bits that correspond to a PortXmitDiscard counter */
+#define PORT_DISCARD_EGRESS_ERRS \
+	(SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK \
+	| SEND_EGRESS_ERR_INFO_VL_MAPPING_ERR_SMASK \
+	| SEND_EGRESS_ERR_INFO_VL_ERR_SMASK)
+
 /*
  * TXE Egress Error flags
  */
@@ -936,7 +941,7 @@
 	FLAG_ENTRY0("IRAM_MBE", D8E(IRAM_MBE)),
 	FLAG_ENTRY0("IRAM_SBE", D8E(IRAM_SBE)),
 	FLAG_ENTRY0("UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES",
-		D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)),
+		    D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)),
 	FLAG_ENTRY0("INVALID_CSR_ADDR", D8E(INVALID_CSR_ADDR)),
 };
 
@@ -950,7 +955,7 @@
 	FLAG_ENTRY0("Unknown frame received",  UNKNOWN_FRAME),
 	FLAG_ENTRY0("Target BER not met",      TARGET_BER_NOT_MET),
 	FLAG_ENTRY0("Serdes internal loopback failure",
-					FAILED_SERDES_INTERNAL_LOOPBACK),
+		    FAILED_SERDES_INTERNAL_LOOPBACK),
 	FLAG_ENTRY0("Failed SerDes init",      FAILED_SERDES_INIT),
 	FLAG_ENTRY0("Failed LNI(Polling)",     FAILED_LNI_POLLING),
 	FLAG_ENTRY0("Failed LNI(Debounce)",    FAILED_LNI_DEBOUNCE),
@@ -958,7 +963,8 @@
 	FLAG_ENTRY0("Failed LNI(OptEq)",       FAILED_LNI_OPTEQ),
 	FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1),
 	FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2),
-	FLAG_ENTRY0("Failed LNI(ConfigLT)",    FAILED_LNI_CONFIGLT)
+	FLAG_ENTRY0("Failed LNI(ConfigLT)",    FAILED_LNI_CONFIGLT),
+	FLAG_ENTRY0("Host Handshake Timeout",  HOST_HANDSHAKE_TIMEOUT)
 };
 
 /*
@@ -978,7 +984,6 @@
 	FLAG_ENTRY0("Link going down", 0x0100),
 };
 
-
 static u32 encoded_size(u32 size);
 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate);
 static int set_physical_link_state(struct hfi1_devdata *dd, u64 state);
@@ -1140,11 +1145,8 @@
 	/*
 	 * accessor for stat element, context either dd or ppd
 	 */
-	u64 (*rw_cntr)(const struct cntr_entry *,
-			       void *context,
-			       int vl,
-			       int mode,
-			       u64 data);
+	u64 (*rw_cntr)(const struct cntr_entry *, void *context, int vl,
+		       int mode, u64 data);
 };
 
 #define C_RCV_HDR_OVF_FIRST C_RCV_HDR_OVF_0
@@ -1188,7 +1190,7 @@
 #define OVR_LBL(ctx) C_RCV_HDR_OVF_ ## ctx
 #define OVR_ELM(ctx) \
 CNTR_ELEM("RcvHdrOvr" #ctx, \
-	  (RCV_HDR_OVFL_CNT + ctx*0x100), \
+	  (RCV_HDR_OVFL_CNT + ctx * 0x100), \
 	  0, CNTR_NORMAL, port_access_u64_csr)
 
 /* 32bit TXE */
@@ -1274,7 +1276,6 @@
 {
 	u64 ret;
 
-
 	if (mode == CNTR_MODE_R) {
 		ret = read_csr(dd, csr);
 	} else if (mode == CNTR_MODE_W) {
@@ -1291,17 +1292,65 @@
 
 /* Dev Access */
 static u64 dev_access_u32_csr(const struct cntr_entry *entry,
-			    void *context, int vl, int mode, u64 data)
+			      void *context, int vl, int mode, u64 data)
 {
 	struct hfi1_devdata *dd = context;
+	u64 csr = entry->csr;
 
-	if (vl != CNTR_INVALID_VL)
-		return 0;
-	return read_write_csr(dd, entry->csr, mode, data);
+	if (entry->flags & CNTR_SDMA) {
+		if (vl == CNTR_INVALID_VL)
+			return 0;
+		csr += 0x100 * vl;
+	} else {
+		if (vl != CNTR_INVALID_VL)
+			return 0;
+	}
+	return read_write_csr(dd, csr, mode, data);
+}
+
+static u64 access_sde_err_cnt(const struct cntr_entry *entry,
+			      void *context, int idx, int mode, u64 data)
+{
+	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+	if (dd->per_sdma && idx < dd->num_sdma)
+		return dd->per_sdma[idx].err_cnt;
+	return 0;
+}
+
+static u64 access_sde_int_cnt(const struct cntr_entry *entry,
+			      void *context, int idx, int mode, u64 data)
+{
+	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+	if (dd->per_sdma && idx < dd->num_sdma)
+		return dd->per_sdma[idx].sdma_int_cnt;
+	return 0;
+}
+
+static u64 access_sde_idle_int_cnt(const struct cntr_entry *entry,
+				   void *context, int idx, int mode, u64 data)
+{
+	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+	if (dd->per_sdma && idx < dd->num_sdma)
+		return dd->per_sdma[idx].idle_int_cnt;
+	return 0;
+}
+
+static u64 access_sde_progress_int_cnt(const struct cntr_entry *entry,
+				       void *context, int idx, int mode,
+				       u64 data)
+{
+	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+	if (dd->per_sdma && idx < dd->num_sdma)
+		return dd->per_sdma[idx].progress_int_cnt;
+	return 0;
 }
 
 static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context,
-			    int vl, int mode, u64 data)
+			      int vl, int mode, u64 data)
 {
 	struct hfi1_devdata *dd = context;
 
@@ -1322,7 +1371,7 @@
 }
 
 static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context,
-			    int vl, int mode, u64 data)
+			      int vl, int mode, u64 data)
 {
 	struct hfi1_devdata *dd = context;
 	u32 csr = entry->csr;
@@ -1346,7 +1395,7 @@
 
 /* Port Access */
 static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context,
-			     int vl, int mode, u64 data)
+			       int vl, int mode, u64 data)
 {
 	struct hfi1_pportdata *ppd = context;
 
@@ -1356,7 +1405,7 @@
 }
 
 static u64 port_access_u64_csr(const struct cntr_entry *entry,
-			     void *context, int vl, int mode, u64 data)
+			       void *context, int vl, int mode, u64 data)
 {
 	struct hfi1_pportdata *ppd = context;
 	u64 val;
@@ -1396,7 +1445,7 @@
 }
 
 static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context,
-			       int vl, int mode, u64 data)
+				 int vl, int mode, u64 data)
 {
 	struct hfi1_pportdata *ppd = context;
 
@@ -1406,7 +1455,7 @@
 }
 
 static u64 access_sw_link_up_cnt(const struct cntr_entry *entry, void *context,
-			       int vl, int mode, u64 data)
+				 int vl, int mode, u64 data)
 {
 	struct hfi1_pportdata *ppd = context;
 
@@ -1427,18 +1476,25 @@
 }
 
 static u64 access_sw_xmit_discards(const struct cntr_entry *entry,
-				    void *context, int vl, int mode, u64 data)
+				   void *context, int vl, int mode, u64 data)
 {
-	struct hfi1_pportdata *ppd = context;
+	struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;
+	u64 zero = 0;
+	u64 *counter;
 
-	if (vl != CNTR_INVALID_VL)
-		return 0;
+	if (vl == CNTR_INVALID_VL)
+		counter = &ppd->port_xmit_discards;
+	else if (vl >= 0 && vl < C_VL_COUNT)
+		counter = &ppd->port_xmit_discards_vl[vl];
+	else
+		counter = &zero;
 
-	return read_write_sw(ppd->dd, &ppd->port_xmit_discards, mode, data);
+	return read_write_sw(ppd->dd, counter, mode, data);
 }
 
 static u64 access_xmit_constraint_errs(const struct cntr_entry *entry,
-				     void *context, int vl, int mode, u64 data)
+				       void *context, int vl, int mode,
+				       u64 data)
 {
 	struct hfi1_pportdata *ppd = context;
 
@@ -1450,7 +1506,7 @@
 }
 
 static u64 access_rcv_constraint_errs(const struct cntr_entry *entry,
-				     void *context, int vl, int mode, u64 data)
+				      void *context, int vl, int mode, u64 data)
 {
 	struct hfi1_pportdata *ppd = context;
 
@@ -1475,7 +1531,6 @@
 			  u64 __percpu *cntr,
 			  int vl, int mode, u64 data)
 {
-
 	u64 ret = 0;
 
 	if (vl != CNTR_INVALID_VL)
@@ -1507,7 +1562,7 @@
 }
 
 static u64 access_sw_cpu_rcv_limit(const struct cntr_entry *entry,
-			      void *context, int vl, int mode, u64 data)
+				   void *context, int vl, int mode, u64 data)
 {
 	struct hfi1_devdata *dd = context;
 
@@ -1523,6 +1578,14 @@
 	return dd->verbs_dev.n_piowait;
 }
 
+static u64 access_sw_pio_drain(const struct cntr_entry *entry,
+			       void *context, int vl, int mode, u64 data)
+{
+	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+	return dd->verbs_dev.n_piodrain;
+}
+
 static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
 			      void *context, int vl, int mode, u64 data)
 {
@@ -1540,11 +1603,12 @@
 }
 
 static u64 access_sw_send_schedule(const struct cntr_entry *entry,
-			       void *context, int vl, int mode, u64 data)
+				   void *context, int vl, int mode, u64 data)
 {
 	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
 
-	return dd->verbs_dev.n_send_schedule;
+	return read_write_cpu(dd, &dd->z_send_schedule, dd->send_schedule, vl,
+			      mode, data);
 }
 
 /* Software counters for the error status bits within MISC_ERR_STATUS */
@@ -3882,8 +3946,8 @@
 			      void *context, int vl, int mode, u64 data)      \
 {									      \
 	struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;	      \
-	return read_write_cpu(ppd->dd, &ppd->ibport_data.z_ ##cntr,	      \
-			      ppd->ibport_data.cntr, vl,		      \
+	return read_write_cpu(ppd->dd, &ppd->ibport_data.rvp.z_ ##cntr,	      \
+			      ppd->ibport_data.rvp.cntr, vl,		      \
 			      mode, data);				      \
 }
 
@@ -3900,7 +3964,7 @@
 	if (vl != CNTR_INVALID_VL)					      \
 		return 0;						      \
 									      \
-	return read_write_sw(ppd->dd, &ppd->ibport_data.n_ ##cntr,	      \
+	return read_write_sw(ppd->dd, &ppd->ibport_data.rvp.n_ ##cntr,	      \
 			     mode, data);				      \
 }
 
@@ -4063,10 +4127,28 @@
 			    access_sw_vtx_wait),
 [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
 			    access_sw_pio_wait),
+[C_SW_PIO_DRAIN] = CNTR_ELEM("PioDrain", 0, 0, CNTR_NORMAL,
+			    access_sw_pio_drain),
 [C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
 			    access_sw_kmem_wait),
 [C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
 			    access_sw_send_schedule),
+[C_SDMA_DESC_FETCHED_CNT] = CNTR_ELEM("SDEDscFdCn",
+				      SEND_DMA_DESC_FETCHED_CNT, 0,
+				      CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+				      dev_access_u32_csr),
+[C_SDMA_INT_CNT] = CNTR_ELEM("SDMAInt", 0, 0,
+			     CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+			     access_sde_int_cnt),
+[C_SDMA_ERR_CNT] = CNTR_ELEM("SDMAErrCt", 0, 0,
+			     CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+			     access_sde_err_cnt),
+[C_SDMA_IDLE_INT_CNT] = CNTR_ELEM("SDMAIdInt", 0, 0,
+				  CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+				  access_sde_idle_int_cnt),
+[C_SDMA_PROGRESS_INT_CNT] = CNTR_ELEM("SDMAPrIntCn", 0, 0,
+				      CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+				      access_sde_progress_int_cnt),
 /* MISC_ERR_STATUS */
 [C_MISC_PLL_LOCK_FAIL_ERR] = CNTR_ELEM("MISC_PLL_LOCK_FAIL_ERR", 0, 0,
 				CNTR_NORMAL,
@@ -4876,28 +4958,28 @@
 [C_TX_WORDS] = TXE64_PORT_CNTR_ELEM(TxWords, SEND_DWORD_CNT, CNTR_NORMAL),
 [C_TX_WAIT] = TXE64_PORT_CNTR_ELEM(TxWait, SEND_WAIT_CNT, CNTR_SYNTH),
 [C_TX_FLIT_VL] = TXE64_PORT_CNTR_ELEM(TxFlitVL, SEND_DATA_VL0_CNT,
-			CNTR_SYNTH | CNTR_VL),
+				      CNTR_SYNTH | CNTR_VL),
 [C_TX_PKT_VL] = TXE64_PORT_CNTR_ELEM(TxPktVL, SEND_DATA_PKT_VL0_CNT,
-			CNTR_SYNTH | CNTR_VL),
+				     CNTR_SYNTH | CNTR_VL),
 [C_TX_WAIT_VL] = TXE64_PORT_CNTR_ELEM(TxWaitVL, SEND_WAIT_VL0_CNT,
-			CNTR_SYNTH | CNTR_VL),
+				      CNTR_SYNTH | CNTR_VL),
 [C_RX_PKT] = RXE64_PORT_CNTR_ELEM(RxPkt, RCV_DATA_PKT_CNT, CNTR_NORMAL),
 [C_RX_WORDS] = RXE64_PORT_CNTR_ELEM(RxWords, RCV_DWORD_CNT, CNTR_NORMAL),
 [C_SW_LINK_DOWN] = CNTR_ELEM("SwLinkDown", 0, 0, CNTR_SYNTH | CNTR_32BIT,
-			access_sw_link_dn_cnt),
+			     access_sw_link_dn_cnt),
 [C_SW_LINK_UP] = CNTR_ELEM("SwLinkUp", 0, 0, CNTR_SYNTH | CNTR_32BIT,
-			access_sw_link_up_cnt),
+			   access_sw_link_up_cnt),
 [C_SW_UNKNOWN_FRAME] = CNTR_ELEM("UnknownFrame", 0, 0, CNTR_NORMAL,
 				 access_sw_unknown_frame_cnt),
 [C_SW_XMIT_DSCD] = CNTR_ELEM("XmitDscd", 0, 0, CNTR_SYNTH | CNTR_32BIT,
-			access_sw_xmit_discards),
+			     access_sw_xmit_discards),
 [C_SW_XMIT_DSCD_VL] = CNTR_ELEM("XmitDscdVl", 0, 0,
-			CNTR_SYNTH | CNTR_32BIT | CNTR_VL,
-			access_sw_xmit_discards),
+				CNTR_SYNTH | CNTR_32BIT | CNTR_VL,
+				access_sw_xmit_discards),
 [C_SW_XMIT_CSTR_ERR] = CNTR_ELEM("XmitCstrErr", 0, 0, CNTR_SYNTH,
-			access_xmit_constraint_errs),
+				 access_xmit_constraint_errs),
 [C_SW_RCV_CSTR_ERR] = CNTR_ELEM("RcvCstrErr", 0, 0, CNTR_SYNTH,
-			access_rcv_constraint_errs),
+				access_rcv_constraint_errs),
 [C_SW_IBP_LOOP_PKTS] = SW_IBP_CNTR(LoopPkts, loop_pkts),
 [C_SW_IBP_RC_RESENDS] = SW_IBP_CNTR(RcResend, rc_resends),
 [C_SW_IBP_RNR_NAKS] = SW_IBP_CNTR(RnrNak, rnr_naks),
@@ -4913,9 +4995,9 @@
 [C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
 			       access_sw_cpu_rc_acks),
 [C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
-			       access_sw_cpu_rc_qacks),
+				access_sw_cpu_rc_qacks),
 [C_SW_CPU_RC_DELAYED_COMP] = CNTR_ELEM("RcDelayComp", 0, 0, CNTR_NORMAL,
-			       access_sw_cpu_rc_delayed_comp),
+				       access_sw_cpu_rc_delayed_comp),
 [OVR_LBL(0)] = OVR_ELM(0), [OVR_LBL(1)] = OVR_ELM(1),
 [OVR_LBL(2)] = OVR_ELM(2), [OVR_LBL(3)] = OVR_ELM(3),
 [OVR_LBL(4)] = OVR_ELM(4), [OVR_LBL(5)] = OVR_ELM(5),
@@ -5064,7 +5146,7 @@
  * the buffer.  End in '*' if the buffer is too short.
  */
 static char *flag_string(char *buf, int buf_len, u64 flags,
-				struct flag_table *table, int table_size)
+			 struct flag_table *table, int table_size)
 {
 	char extra[32];
 	char *p = buf;
@@ -5125,10 +5207,8 @@
 	if (source < ARRAY_SIZE(cce_misc_names))
 		strncpy(buf, cce_misc_names[source], bsize);
 	else
-		snprintf(buf,
-			bsize,
-			"Reserved%u",
-			source + IS_GENERAL_ERR_START);
+		snprintf(buf, bsize, "Reserved%u",
+			 source + IS_GENERAL_ERR_START);
 
 	return buf;
 }
@@ -5167,7 +5247,7 @@
 	if (source < ARRAY_SIZE(various_names))
 		strncpy(buf, various_names[source], bsize);
 	else
-		snprintf(buf, bsize, "Reserved%u", source+IS_VARIOUS_START);
+		snprintf(buf, bsize, "Reserved%u", source + IS_VARIOUS_START);
 	return buf;
 }
 
@@ -5252,51 +5332,56 @@
 static char *cce_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-			cce_err_status_flags, ARRAY_SIZE(cce_err_status_flags));
+			   cce_err_status_flags,
+			   ARRAY_SIZE(cce_err_status_flags));
 }
 
 static char *rxe_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-			rxe_err_status_flags, ARRAY_SIZE(rxe_err_status_flags));
+			   rxe_err_status_flags,
+			   ARRAY_SIZE(rxe_err_status_flags));
 }
 
 static char *misc_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags, misc_err_status_flags,
-			ARRAY_SIZE(misc_err_status_flags));
+			   ARRAY_SIZE(misc_err_status_flags));
 }
 
 static char *pio_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-			pio_err_status_flags, ARRAY_SIZE(pio_err_status_flags));
+			   pio_err_status_flags,
+			   ARRAY_SIZE(pio_err_status_flags));
 }
 
 static char *sdma_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-			sdma_err_status_flags,
-			ARRAY_SIZE(sdma_err_status_flags));
+			   sdma_err_status_flags,
+			   ARRAY_SIZE(sdma_err_status_flags));
 }
 
 static char *egress_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-		egress_err_status_flags, ARRAY_SIZE(egress_err_status_flags));
+			   egress_err_status_flags,
+			   ARRAY_SIZE(egress_err_status_flags));
 }
 
 static char *egress_err_info_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-		egress_err_info_flags, ARRAY_SIZE(egress_err_info_flags));
+			   egress_err_info_flags,
+			   ARRAY_SIZE(egress_err_info_flags));
 }
 
 static char *send_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-			send_err_status_flags,
-			ARRAY_SIZE(send_err_status_flags));
+			   send_err_status_flags,
+			   ARRAY_SIZE(send_err_status_flags));
 }
 
 static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
@@ -5309,7 +5394,7 @@
 	 * report or record it.
 	 */
 	dd_dev_info(dd, "CCE Error: %s\n",
-		cce_err_status_string(buf, sizeof(buf), reg));
+		    cce_err_status_string(buf, sizeof(buf), reg));
 
 	if ((reg & CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK) &&
 	    is_ax(dd) && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) {
@@ -5339,14 +5424,14 @@
 	u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
 
 	if (dd->rcv_ovfl_cnt < cur_ovfl_cnt &&
-		ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) {
+	    ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) {
 		dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
-		set_link_down_reason(ppd,
-		  OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
-			OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
+		set_link_down_reason(
+		ppd, OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
+		OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
 		queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
 	}
-	dd->rcv_ovfl_cnt = (u32) cur_ovfl_cnt;
+	dd->rcv_ovfl_cnt = (u32)cur_ovfl_cnt;
 
 	mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
 }
@@ -5372,7 +5457,7 @@
 	int i = 0;
 
 	dd_dev_info(dd, "Receive Error: %s\n",
-		rxe_err_status_string(buf, sizeof(buf), reg));
+		    rxe_err_status_string(buf, sizeof(buf), reg));
 
 	if (reg & ALL_RXE_FREEZE_ERR) {
 		int flags = 0;
@@ -5399,7 +5484,7 @@
 	int i = 0;
 
 	dd_dev_info(dd, "Misc Error: %s",
-		misc_err_status_string(buf, sizeof(buf), reg));
+		    misc_err_status_string(buf, sizeof(buf), reg));
 	for (i = 0; i < NUM_MISC_ERR_STATUS_COUNTERS; i++) {
 		if (reg & (1ull << i))
 			incr_cntr64(&dd->misc_err_status_cnt[i]);
@@ -5412,7 +5497,7 @@
 	int i = 0;
 
 	dd_dev_info(dd, "PIO Error: %s\n",
-		pio_err_status_string(buf, sizeof(buf), reg));
+		    pio_err_status_string(buf, sizeof(buf), reg));
 
 	if (reg & ALL_PIO_FREEZE_ERR)
 		start_freeze_handling(dd->pport, 0);
@@ -5429,7 +5514,7 @@
 	int i = 0;
 
 	dd_dev_info(dd, "SDMA Error: %s\n",
-		sdma_err_status_string(buf, sizeof(buf), reg));
+		    sdma_err_status_string(buf, sizeof(buf), reg));
 
 	if (reg & ALL_SDMA_FREEZE_ERR)
 		start_freeze_handling(dd->pport, 0);
@@ -5440,12 +5525,14 @@
 	}
 }
 
+static inline void __count_port_discards(struct hfi1_pportdata *ppd)
+{
+	incr_cntr64(&ppd->port_xmit_discards);
+}
+
 static void count_port_inactive(struct hfi1_devdata *dd)
 {
-	struct hfi1_pportdata *ppd = dd->pport;
-
-	if (ppd->port_xmit_discards < ~(u64)0)
-		ppd->port_xmit_discards++;
+	__count_port_discards(dd->pport);
 }
 
 /*
@@ -5457,7 +5544,8 @@
  * egress error if more than one packet fails the same integrity check
  * since we cleared the corresponding bit in SEND_EGRESS_ERR_INFO.
  */
-static void handle_send_egress_err_info(struct hfi1_devdata *dd)
+static void handle_send_egress_err_info(struct hfi1_devdata *dd,
+					int vl)
 {
 	struct hfi1_pportdata *ppd = dd->pport;
 	u64 src = read_csr(dd, SEND_EGRESS_ERR_SOURCE); /* read first */
@@ -5468,14 +5556,44 @@
 	write_csr(dd, SEND_EGRESS_ERR_INFO, info);
 
 	dd_dev_info(dd,
-		"Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n",
-		info, egress_err_info_string(buf, sizeof(buf), info), src);
+		    "Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n",
+		    info, egress_err_info_string(buf, sizeof(buf), info), src);
 
 	/* Eventually add other counters for each bit */
+	if (info & PORT_DISCARD_EGRESS_ERRS) {
+		int weight, i;
 
-	if (info & SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK) {
-		if (ppd->port_xmit_discards < ~(u64)0)
-			ppd->port_xmit_discards++;
+		/*
+		 * Count all applicable bits as individual errors and
+		 * attribute them to the packet that triggered this handler.
+		 * This may not be completely accurate due to limitations
+		 * on the available hardware error information.  There is
+		 * a single information register and any number of error
+		 * packets may have occurred and contributed to it before
+		 * this routine is called.  This means that:
+		 * a) If multiple packets with the same error occur before
+		 *    this routine is called, earlier packets are missed.
+		 *    There is only a single bit for each error type.
+		 * b) Errors may not be attributed to the correct VL.
+		 *    The driver is attributing all bits in the info register
+		 *    to the packet that triggered this call, but bits
+		 *    could be an accumulation of different packets with
+		 *    different VLs.
+		 * c) A single error packet may have multiple counts attached
+		 *    to it.  There is no way for the driver to know if
+		 *    multiple bits set in the info register are due to a
+		 *    single packet or multiple packets.  The driver assumes
+		 *    multiple packets.
+		 */
+		weight = hweight64(info & PORT_DISCARD_EGRESS_ERRS);
+		for (i = 0; i < weight; i++) {
+			__count_port_discards(ppd);
+			if (vl >= 0 && vl < TXE_NUM_DATA_VL)
+				incr_cntr64(&ppd->port_xmit_discards_vl[vl]);
+			else if (vl == 15)
+				incr_cntr64(&ppd->port_xmit_discards_vl
+					    [C_VL_15]);
+		}
 	}
 }
 
@@ -5493,12 +5611,71 @@
  * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
  * register. Does it represent a 'disallowed packet' error?
  */
-static inline int disallowed_pkt_err(u64 posn)
+static inline int disallowed_pkt_err(int posn)
 {
 	return (posn >= SEES(TX_SDMA0_DISALLOWED_PACKET) &&
 		posn <= SEES(TX_SDMA15_DISALLOWED_PACKET));
 }
 
+/*
+ * Input value is a bit position of one of the SDMA engine disallowed
+ * packet errors.  Return which engine.  Use of this must be guarded by
+ * disallowed_pkt_err().
+ */
+static inline int disallowed_pkt_engine(int posn)
+{
+	return posn - SEES(TX_SDMA0_DISALLOWED_PACKET);
+}
+
+/*
+ * Translate an SDMA engine to a VL.  Return -1 if the tranlation cannot
+ * be done.
+ */
+static int engine_to_vl(struct hfi1_devdata *dd, int engine)
+{
+	struct sdma_vl_map *m;
+	int vl;
+
+	/* range check */
+	if (engine < 0 || engine >= TXE_NUM_SDMA_ENGINES)
+		return -1;
+
+	rcu_read_lock();
+	m = rcu_dereference(dd->sdma_map);
+	vl = m->engine_to_vl[engine];
+	rcu_read_unlock();
+
+	return vl;
+}
+
+/*
+ * Translate the send context (sofware index) into a VL.  Return -1 if the
+ * translation cannot be done.
+ */
+static int sc_to_vl(struct hfi1_devdata *dd, int sw_index)
+{
+	struct send_context_info *sci;
+	struct send_context *sc;
+	int i;
+
+	sci = &dd->send_contexts[sw_index];
+
+	/* there is no information for user (PSM) and ack contexts */
+	if (sci->type != SC_KERNEL)
+		return -1;
+
+	sc = sci->sc;
+	if (!sc)
+		return -1;
+	if (dd->vld[15].sc == sc)
+		return 15;
+	for (i = 0; i < num_vls; i++)
+		if (dd->vld[i].sc == sc)
+			return i;
+
+	return -1;
+}
+
 static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 {
 	u64 reg_copy = reg, handled = 0;
@@ -5507,34 +5684,34 @@
 
 	if (reg & ALL_TXE_EGRESS_FREEZE_ERR)
 		start_freeze_handling(dd->pport, 0);
-	if (is_ax(dd) && (reg &
-		    SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK)
-		    && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR))
+	else if (is_ax(dd) &&
+		 (reg & SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK) &&
+		 (dd->icode != ICODE_FUNCTIONAL_SIMULATOR))
 		start_freeze_handling(dd->pport, 0);
 
 	while (reg_copy) {
 		int posn = fls64(reg_copy);
-		/*
-		 * fls64() returns a 1-based offset, but we generally
-		 * want 0-based offsets.
-		 */
+		/* fls64() returns a 1-based offset, we want it zero based */
 		int shift = posn - 1;
+		u64 mask = 1ULL << shift;
 
 		if (port_inactive_err(shift)) {
 			count_port_inactive(dd);
-			handled |= (1ULL << shift);
+			handled |= mask;
 		} else if (disallowed_pkt_err(shift)) {
-			handle_send_egress_err_info(dd);
-			handled |= (1ULL << shift);
+			int vl = engine_to_vl(dd, disallowed_pkt_engine(shift));
+
+			handle_send_egress_err_info(dd, vl);
+			handled |= mask;
 		}
-		clear_bit(shift, (unsigned long *)&reg_copy);
+		reg_copy &= ~mask;
 	}
 
 	reg &= ~handled;
 
 	if (reg)
 		dd_dev_info(dd, "Egress Error: %s\n",
-			egress_err_status_string(buf, sizeof(buf), reg));
+			    egress_err_status_string(buf, sizeof(buf), reg));
 
 	for (i = 0; i < NUM_SEND_EGRESS_ERR_STATUS_COUNTERS; i++) {
 		if (reg & (1ull << i))
@@ -5548,7 +5725,7 @@
 	int i = 0;
 
 	dd_dev_info(dd, "Send Error: %s\n",
-		send_err_status_string(buf, sizeof(buf), reg));
+		    send_err_status_string(buf, sizeof(buf), reg));
 
 	for (i = 0; i < NUM_SEND_ERR_STATUS_COUNTERS; i++) {
 		if (reg & (1ull << i))
@@ -5594,7 +5771,7 @@
 			u64 mask;
 
 			dd_dev_err(dd, "Repeating %s bits 0x%llx - masking\n",
-				eri->desc, reg);
+				   eri->desc, reg);
 			/*
 			 * Read-modify-write so any other masked bits
 			 * remain masked.
@@ -5618,14 +5795,15 @@
 		interrupt_clear_down(dd, 0, eri);
 	} else {
 		dd_dev_err(dd, "Unexpected misc interrupt (%u) - reserved\n",
-			source);
+			   source);
 	}
 }
 
 static char *send_context_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-			sc_err_status_flags, ARRAY_SIZE(sc_err_status_flags));
+			   sc_err_status_flags,
+			   ARRAY_SIZE(sc_err_status_flags));
 }
 
 /*
@@ -5650,15 +5828,15 @@
 	sw_index = dd->hw_to_sw[hw_context];
 	if (sw_index >= dd->num_send_contexts) {
 		dd_dev_err(dd,
-			"out of range sw index %u for send context %u\n",
-			sw_index, hw_context);
+			   "out of range sw index %u for send context %u\n",
+			   sw_index, hw_context);
 		return;
 	}
 	sci = &dd->send_contexts[sw_index];
 	sc = sci->sc;
 	if (!sc) {
 		dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__,
-			sw_index, hw_context);
+			   sw_index, hw_context);
 		return;
 	}
 
@@ -5668,10 +5846,11 @@
 	status = read_kctxt_csr(dd, hw_context, SEND_CTXT_ERR_STATUS);
 
 	dd_dev_info(dd, "Send Context %u(%u) Error: %s\n", sw_index, hw_context,
-		send_context_err_status_string(flags, sizeof(flags), status));
+		    send_context_err_status_string(flags, sizeof(flags),
+						   status));
 
 	if (status & SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK)
-		handle_send_egress_err_info(dd);
+		handle_send_egress_err_info(dd, sc_to_vl(dd, sw_index));
 
 	/*
 	 * Automatically restart halted kernel contexts out of interrupt
@@ -5704,6 +5883,7 @@
 	dd_dev_err(sde->dd, "CONFIG SDMA(%u) source: %u status 0x%llx\n",
 		   sde->this_idx, source, (unsigned long long)status);
 #endif
+	sde->err_cnt++;
 	sdma_engine_error(sde, status);
 
 	/*
@@ -5752,23 +5932,22 @@
 		interrupt_clear_down(dd, 0, eri);
 	else
 		dd_dev_info(dd,
-			"%s: Unimplemented/reserved interrupt %d\n",
-			__func__, source);
+			    "%s: Unimplemented/reserved interrupt %d\n",
+			    __func__, source);
 }
 
 static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
 {
-	/* source is always zero */
+	/* src_ctx is always zero */
 	struct hfi1_pportdata *ppd = dd->pport;
 	unsigned long flags;
 	u64 qsfp_int_mgmt = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
 
 	if (reg & QSFP_HFI0_MODPRST_N) {
-
-		dd_dev_info(dd, "%s: ModPresent triggered QSFP interrupt\n",
-				__func__);
-
 		if (!qsfp_mod_present(ppd)) {
+			dd_dev_info(dd, "%s: QSFP module removed\n",
+				    __func__);
+
 			ppd->driver_link_ready = 0;
 			/*
 			 * Cable removed, reset all our information about the
@@ -5781,14 +5960,23 @@
 			 * an interrupt when a cable is inserted
 			 */
 			ppd->qsfp_info.cache_valid = 0;
-			ppd->qsfp_info.qsfp_interrupt_functional = 0;
+			ppd->qsfp_info.reset_needed = 0;
+			ppd->qsfp_info.limiting_active = 0;
 			spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
-						flags);
-			write_csr(dd,
-					dd->hfi1_id ?
-						ASIC_QSFP2_INVERT :
-						ASIC_QSFP1_INVERT,
-				qsfp_int_mgmt);
+					       flags);
+			/* Invert the ModPresent pin now to detect plug-in */
+			write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_INVERT :
+				  ASIC_QSFP1_INVERT, qsfp_int_mgmt);
+
+			if ((ppd->offline_disabled_reason >
+			  HFI1_ODR_MASK(
+			  OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED)) ||
+			  (ppd->offline_disabled_reason ==
+			  HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
+				ppd->offline_disabled_reason =
+				HFI1_ODR_MASK(
+				OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED);
+
 			if (ppd->host_link_state == HLS_DN_POLL) {
 				/*
 				 * The link is still in POLL. This means
@@ -5799,28 +5987,33 @@
 				queue_work(ppd->hfi1_wq, &ppd->link_down_work);
 			}
 		} else {
+			dd_dev_info(dd, "%s: QSFP module inserted\n",
+				    __func__);
+
 			spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
 			ppd->qsfp_info.cache_valid = 0;
 			ppd->qsfp_info.cache_refresh_required = 1;
 			spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
-						flags);
+					       flags);
 
+			/*
+			 * Stop inversion of ModPresent pin to detect
+			 * removal of the cable
+			 */
 			qsfp_int_mgmt &= ~(u64)QSFP_HFI0_MODPRST_N;
-			write_csr(dd,
-					dd->hfi1_id ?
-						ASIC_QSFP2_INVERT :
-						ASIC_QSFP1_INVERT,
-				qsfp_int_mgmt);
+			write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_INVERT :
+				  ASIC_QSFP1_INVERT, qsfp_int_mgmt);
+
+			ppd->offline_disabled_reason =
+				HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
 		}
 	}
 
 	if (reg & QSFP_HFI0_INT_N) {
-
-		dd_dev_info(dd, "%s: IntN triggered QSFP interrupt\n",
-				__func__);
+		dd_dev_info(dd, "%s: Interrupt received from QSFP module\n",
+			    __func__);
 		spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
 		ppd->qsfp_info.check_interrupt_flags = 1;
-		ppd->qsfp_info.qsfp_interrupt_functional = 1;
 		spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags);
 	}
 
@@ -5834,11 +6027,11 @@
 	int ret;
 
 	ret = do_8051_command(dd, HCMD_MISC,
-		(u64)HCMD_MISC_REQUEST_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
-		NULL);
+			      (u64)HCMD_MISC_REQUEST_LCB_ACCESS <<
+			      LOAD_DATA_FIELD_ID_SHIFT, NULL);
 	if (ret != HCMD_SUCCESS) {
 		dd_dev_err(dd, "%s: command failed with error %d\n",
-			__func__, ret);
+			   __func__, ret);
 	}
 	return ret == HCMD_SUCCESS ? 0 : -EBUSY;
 }
@@ -5848,11 +6041,11 @@
 	int ret;
 
 	ret = do_8051_command(dd, HCMD_MISC,
-		(u64)HCMD_MISC_GRANT_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
-		NULL);
+			      (u64)HCMD_MISC_GRANT_LCB_ACCESS <<
+			      LOAD_DATA_FIELD_ID_SHIFT, NULL);
 	if (ret != HCMD_SUCCESS) {
 		dd_dev_err(dd, "%s: command failed with error %d\n",
-			__func__, ret);
+			   __func__, ret);
 	}
 	return ret == HCMD_SUCCESS ? 0 : -EBUSY;
 }
@@ -5864,8 +6057,8 @@
 static inline void set_host_lcb_access(struct hfi1_devdata *dd)
 {
 	write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
-				DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK
-				| DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK);
+		  DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK |
+		  DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK);
 }
 
 /*
@@ -5875,7 +6068,7 @@
 static inline void set_8051_lcb_access(struct hfi1_devdata *dd)
 {
 	write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
-				DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK);
+		  DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK);
 }
 
 /*
@@ -5909,7 +6102,7 @@
 	/* this access is valid only when the link is up */
 	if ((ppd->host_link_state & HLS_UP) == 0) {
 		dd_dev_info(dd, "%s: link state %s not up\n",
-			__func__, link_state_name(ppd->host_link_state));
+			    __func__, link_state_name(ppd->host_link_state));
 		ret = -EBUSY;
 		goto done;
 	}
@@ -5918,8 +6111,8 @@
 		ret = request_host_lcb_access(dd);
 		if (ret) {
 			dd_dev_err(dd,
-				"%s: unable to acquire LCB access, err %d\n",
-				__func__, ret);
+				   "%s: unable to acquire LCB access, err %d\n",
+				   __func__, ret);
 			goto done;
 		}
 		set_host_lcb_access(dd);
@@ -5956,7 +6149,7 @@
 
 	if (dd->lcb_access_count == 0) {
 		dd_dev_err(dd, "%s: LCB access count is zero.  Skipping.\n",
-			__func__);
+			   __func__);
 		goto done;
 	}
 
@@ -5965,8 +6158,8 @@
 		ret = request_8051_lcb_access(dd);
 		if (ret) {
 			dd_dev_err(dd,
-				"%s: unable to release LCB access, err %d\n",
-				__func__, ret);
+				   "%s: unable to release LCB access, err %d\n",
+				   __func__, ret);
 			/* restore host access if the grant didn't work */
 			set_host_lcb_access(dd);
 			goto done;
@@ -5998,19 +6191,26 @@
 static void hreq_response(struct hfi1_devdata *dd, u8 return_code, u16 rsp_data)
 {
 	write_csr(dd, DC_DC8051_CFG_EXT_DEV_0,
-		DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK
-		| (u64)return_code << DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT
-		| (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
+		  DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK |
+		  (u64)return_code <<
+		  DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT |
+		  (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
 }
 
 /*
- * Handle requests from the 8051.
+ * Handle host requests from the 8051.
+ *
+ * This is a work-queue function outside of the interrupt.
  */
-static void handle_8051_request(struct hfi1_devdata *dd)
+void handle_8051_request(struct work_struct *work)
 {
+	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
+							dc_host_req_work);
+	struct hfi1_devdata *dd = ppd->dd;
 	u64 reg;
-	u16 data;
-	u8 type;
+	u16 data = 0;
+	u8 type, i, lanes, *cache = ppd->qsfp_info.cache;
+	u8 cdr_ctrl_byte = cache[QSFP_CDR_CTRL_BYTE_OFFS];
 
 	reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_1);
 	if ((reg & DC_DC8051_CFG_EXT_DEV_1_REQ_NEW_SMASK) == 0)
@@ -6031,12 +6231,46 @@
 	case HREQ_READ_CONFIG:
 	case HREQ_SET_TX_EQ_ABS:
 	case HREQ_SET_TX_EQ_REL:
-	case HREQ_ENABLE:
 		dd_dev_info(dd, "8051 request: request 0x%x not supported\n",
-			type);
+			    type);
 		hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
 		break;
 
+	case HREQ_ENABLE:
+		lanes = data & 0xF;
+		for (i = 0; lanes; lanes >>= 1, i++) {
+			if (!(lanes & 1))
+				continue;
+			if (data & 0x200) {
+				/* enable TX CDR */
+				if (cache[QSFP_MOD_PWR_OFFS] & 0x8 &&
+				    cache[QSFP_CDR_INFO_OFFS] & 0x80)
+					cdr_ctrl_byte |= (1 << (i + 4));
+			} else {
+				/* disable TX CDR */
+				if (cache[QSFP_MOD_PWR_OFFS] & 0x8 &&
+				    cache[QSFP_CDR_INFO_OFFS] & 0x80)
+					cdr_ctrl_byte &= ~(1 << (i + 4));
+			}
+
+			if (data & 0x800) {
+				/* enable RX CDR */
+				if (cache[QSFP_MOD_PWR_OFFS] & 0x4 &&
+				    cache[QSFP_CDR_INFO_OFFS] & 0x40)
+					cdr_ctrl_byte |= (1 << i);
+			} else {
+				/* disable RX CDR */
+				if (cache[QSFP_MOD_PWR_OFFS] & 0x4 &&
+				    cache[QSFP_CDR_INFO_OFFS] & 0x40)
+					cdr_ctrl_byte &= ~(1 << i);
+			}
+		}
+		one_qsfp_write(ppd, dd->hfi1_id, QSFP_CDR_CTRL_BYTE_OFFS,
+			       &cdr_ctrl_byte, 1);
+		hreq_response(dd, HREQ_SUCCESS, data);
+		refresh_qsfp_cache(ppd, &ppd->qsfp_info);
+		break;
+
 	case HREQ_CONFIG_DONE:
 		hreq_response(dd, HREQ_SUCCESS, 0);
 		break;
@@ -6056,11 +6290,11 @@
 				u8 vau, u16 total, u16 shared)
 {
 	write_csr(dd, SEND_CM_GLOBAL_CREDIT,
-		((u64)total
-			<< SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
-		| ((u64)shared
-			<< SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
-		| ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
+		  ((u64)total <<
+		   SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT) |
+		  ((u64)shared <<
+		   SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT) |
+		  ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
 }
 
 /*
@@ -6097,7 +6331,7 @@
 
 	/* remove all previous VL credit limits */
 	for (i = 0; i < TXE_NUM_DATA_VL; i++)
-		write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
+		write_csr(dd, SEND_CM_CREDIT_VL + (8 * i), 0);
 	write_csr(dd, SEND_CM_CREDIT_VL15, 0);
 	write_global_credit(dd, 0, 0, 0);
 	/* reset the CM block */
@@ -6139,15 +6373,14 @@
 	write_csr(dd, DC_LCB_CFG_RUN, 0);
 	/* set tx fifo reset: LCB_CFG_TX_FIFOS_RESET.VAL = 1 */
 	write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET,
-		1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT);
+		  1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT);
 	/* set dcc reset csr: DCC_CFG_RESET.{reset_lcb,reset_rx_fpe} = 1 */
 	dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN);
 	reg = read_csr(dd, DCC_CFG_RESET);
-	write_csr(dd, DCC_CFG_RESET,
-		reg
-		| (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT)
-		| (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
-	(void) read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
+	write_csr(dd, DCC_CFG_RESET, reg |
+		  (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT) |
+		  (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
+	(void)read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
 	if (!abort) {
 		udelay(1);    /* must hold for the longer of 16cclks or 20ns */
 		write_csr(dd, DCC_CFG_RESET, reg);
@@ -6176,14 +6409,18 @@
 	spin_unlock_irqrestore(&dd->dc8051_lock, flags);
 	/* Shutdown the LCB */
 	lcb_shutdown(dd, 1);
-	/* Going to OFFLINE would have causes the 8051 to put the
+	/*
+	 * Going to OFFLINE would have causes the 8051 to put the
 	 * SerDes into reset already. Just need to shut down the 8051,
-	 * itself. */
+	 * itself.
+	 */
 	write_csr(dd, DC_DC8051_CFG_RST, 0x1);
 }
 
-/* Calling this after the DC has been brought out of reset should not
- * do any damage. */
+/*
+ * Calling this after the DC has been brought out of reset should not
+ * do any damage.
+ */
 static void dc_start(struct hfi1_devdata *dd)
 {
 	unsigned long flags;
@@ -6199,7 +6436,7 @@
 	ret = wait_fm_ready(dd, TIMEOUT_8051_START);
 	if (ret) {
 		dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
-			__func__);
+			   __func__);
 	}
 	/* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
 	write_csr(dd, DCC_CFG_RESET, 0x10);
@@ -6292,7 +6529,7 @@
 	write_csr(dd, DC_LCB_CFG_RX_FIFOS_RADR, rx_radr);
 	/* LCB_CFG_IGNORE_LOST_RCLK.EN = 1 */
 	write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
-		DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
+		  DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
 	write_csr(dd, DC_LCB_CFG_TX_FIFOS_RADR, tx_radr);
 }
 
@@ -6309,8 +6546,10 @@
 	u64 msg;
 	int ret;
 
-	/* msg is bytes 1-4 of the 40-bit idle message - the command code
-	   is stripped off */
+	/*
+	 * msg is bytes 1-4 of the 40-bit idle message - the command code
+	 * is stripped off
+	 */
 	ret = read_idle_sma(dd, &msg);
 	if (ret)
 		return;
@@ -6336,8 +6575,8 @@
 		 *
 		 * Can activate the node.  Discard otherwise.
 		 */
-		if (ppd->host_link_state == HLS_UP_ARMED
-					&& ppd->is_active_optimize_enabled) {
+		if (ppd->host_link_state == HLS_UP_ARMED &&
+		    ppd->is_active_optimize_enabled) {
 			ppd->neighbor_normal = 1;
 			ret = set_link_state(ppd, HLS_UP_ACTIVE);
 			if (ret)
@@ -6349,8 +6588,8 @@
 		break;
 	default:
 		dd_dev_err(dd,
-			"%s: received unexpected SMA idle message 0x%llx\n",
-			__func__, msg);
+			   "%s: received unexpected SMA idle message 0x%llx\n",
+			   __func__, msg);
 		break;
 	}
 }
@@ -6442,10 +6681,9 @@
 
 		if (time_after(jiffies, timeout)) {
 			dd_dev_err(dd,
-				"Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing",
-				freeze ? "" : "un",
-				reg & ALL_FROZE,
-				freeze ? ALL_FROZE : 0ull);
+				   "Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing",
+				   freeze ? "" : "un", reg & ALL_FROZE,
+				   freeze ? ALL_FROZE : 0ull);
 			return;
 		}
 		usleep_range(80, 120);
@@ -6475,11 +6713,17 @@
  */
 static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
 {
+	u32 rcvmask;
 	int i;
 
 	/* enable all kernel contexts */
-	for (i = 0; i < dd->n_krcv_queues; i++)
-		hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, i);
+	for (i = 0; i < dd->n_krcv_queues; i++) {
+		rcvmask = HFI1_RCVCTRL_CTXT_ENB;
+		/* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
+		rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
+			HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
+		hfi1_rcvctrl(dd, rcvmask, i);
+	}
 
 	/* enable port */
 	add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
@@ -6564,7 +6808,7 @@
 void handle_link_up(struct work_struct *work)
 {
 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
-								link_up_work);
+						  link_up_work);
 	set_link_state(ppd, HLS_UP_INIT);
 
 	/* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
@@ -6583,17 +6827,20 @@
 	if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) {
 		/* oops - current speed is not enabled, bounce */
 		dd_dev_err(ppd->dd,
-			"Link speed active 0x%x is outside enabled 0x%x, downing link\n",
-			ppd->link_speed_active, ppd->link_speed_enabled);
+			   "Link speed active 0x%x is outside enabled 0x%x, downing link\n",
+			   ppd->link_speed_active, ppd->link_speed_enabled);
 		set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
-			OPA_LINKDOWN_REASON_SPEED_POLICY);
+				     OPA_LINKDOWN_REASON_SPEED_POLICY);
 		set_link_state(ppd, HLS_DN_OFFLINE);
+		tune_serdes(ppd);
 		start_link(ppd);
 	}
 }
 
-/* Several pieces of LNI information were cached for SMA in ppd.
- * Reset these on link down */
+/*
+ * Several pieces of LNI information were cached for SMA in ppd.
+ * Reset these on link down
+ */
 static void reset_neighbor_info(struct hfi1_pportdata *ppd)
 {
 	ppd->neighbor_guid = 0;
@@ -6613,7 +6860,13 @@
 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
 								link_down_work);
 
-	/* go offline first, then deal with reasons */
+	if ((ppd->host_link_state &
+	     (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) &&
+	     ppd->port_type == PORT_TYPE_FIXED)
+		ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NOT_INSTALLED);
+
+	/* Go offline first, then deal with reading/writing through 8051 */
 	set_link_state(ppd, HLS_DN_OFFLINE);
 
 	lcl_reason = 0;
@@ -6633,12 +6886,16 @@
 	/* disable the port */
 	clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
 
-	/* If there is no cable attached, turn the DC off. Otherwise,
-	 * start the link bring up. */
-	if (!qsfp_mod_present(ppd))
+	/*
+	 * If there is no cable attached, turn the DC off. Otherwise,
+	 * start the link bring up.
+	 */
+	if (!qsfp_mod_present(ppd)) {
 		dc_shutdown(ppd->dd);
-	else
+	} else {
+		tune_serdes(ppd);
 		start_link(ppd);
+	}
 }
 
 void handle_link_bounce(struct work_struct *work)
@@ -6651,10 +6908,11 @@
 	 */
 	if (ppd->host_link_state & HLS_UP) {
 		set_link_state(ppd, HLS_DN_OFFLINE);
+		tune_serdes(ppd);
 		start_link(ppd);
 	} else {
 		dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n",
-			__func__, link_state_name(ppd->host_link_state));
+			    __func__, link_state_name(ppd->host_link_state));
 	}
 }
 
@@ -6751,7 +7009,7 @@
 	case 3: return OPA_LINK_WIDTH_3X;
 	default:
 		dd_dev_info(dd, "%s: invalid width %d, using 4\n",
-			__func__, width);
+			    __func__, width);
 		/* fall through */
 	case 4: return OPA_LINK_WIDTH_4X;
 	}
@@ -6763,6 +7021,7 @@
 static const u8 bit_counts[16] = {
 	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
 };
+
 static inline u8 nibble_to_count(u8 nibble)
 {
 	return bit_counts[nibble & 0xf];
@@ -6788,7 +7047,7 @@
 
 	/* read the active lanes */
 	read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
-				&rx_polarity_inversion, &max_rate);
+			 &rx_polarity_inversion, &max_rate);
 	read_local_lni(dd, &enable_lane_rx);
 
 	/* convert to counts */
@@ -6800,8 +7059,8 @@
 	 * handle_verify_cap().  The ASIC 8051 firmware does not correctly
 	 * set the max_rate field in handle_verify_cap until v0.19.
 	 */
-	if ((dd->icode == ICODE_RTL_SILICON)
-				&& (dd->dc8051_ver < dc8051_ver(0, 19))) {
+	if ((dd->icode == ICODE_RTL_SILICON) &&
+	    (dd->dc8051_ver < dc8051_ver(0, 19))) {
 		/* max_rate: 0 = 12.5G, 1 = 25G */
 		switch (max_rate) {
 		case 0:
@@ -6809,8 +7068,8 @@
 			break;
 		default:
 			dd_dev_err(dd,
-				"%s: unexpected max rate %d, using 25Gb\n",
-				__func__, (int)max_rate);
+				   "%s: unexpected max rate %d, using 25Gb\n",
+				   __func__, (int)max_rate);
 			/* fall through */
 		case 1:
 			dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
@@ -6819,8 +7078,8 @@
 	}
 
 	dd_dev_info(dd,
-		"Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n",
-		enable_lane_tx, tx, enable_lane_rx, rx);
+		    "Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n",
+		    enable_lane_tx, tx, enable_lane_rx, rx);
 	*tx_width = link_width_to_bits(dd, tx);
 	*rx_width = link_width_to_bits(dd, rx);
 }
@@ -6923,13 +7182,8 @@
 	 */
 
 	read_vc_remote_phy(dd, &power_management, &continious);
-	read_vc_remote_fabric(
-		dd,
-		&vau,
-		&z,
-		&vcu,
-		&vl15buf,
-		&partner_supported_crc);
+	read_vc_remote_fabric(dd, &vau, &z, &vcu, &vl15buf,
+			      &partner_supported_crc);
 	read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths);
 	read_remote_device_id(dd, &device_id, &device_rev);
 	/*
@@ -6940,19 +7194,16 @@
 	/* print the active widths */
 	get_link_widths(dd, &active_tx, &active_rx);
 	dd_dev_info(dd,
-		"Peer PHY: power management 0x%x, continuous updates 0x%x\n",
-		(int)power_management, (int)continious);
+		    "Peer PHY: power management 0x%x, continuous updates 0x%x\n",
+		    (int)power_management, (int)continious);
 	dd_dev_info(dd,
-		"Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
-		(int)vau,
-		(int)z,
-		(int)vcu,
-		(int)vl15buf,
-		(int)partner_supported_crc);
+		    "Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
+		    (int)vau, (int)z, (int)vcu, (int)vl15buf,
+		    (int)partner_supported_crc);
 	dd_dev_info(dd, "Peer Link Width: tx rate 0x%x, widths 0x%x\n",
-		(u32)remote_tx_rate, (u32)link_widths);
+		    (u32)remote_tx_rate, (u32)link_widths);
 	dd_dev_info(dd, "Peer Device ID: 0x%04x, Revision 0x%02x\n",
-		(u32)device_id, (u32)device_rev);
+		    (u32)device_id, (u32)device_rev);
 	/*
 	 * The peer vAU value just read is the peer receiver value.  HFI does
 	 * not support a transmit vAU of 0 (AU == 8).  We advertised that
@@ -6987,10 +7238,10 @@
 	reg = read_csr(dd, SEND_CM_CTRL);
 	if (crc_val == LCB_CRC_14B && crc_14b_sideband) {
 		write_csr(dd, SEND_CM_CTRL,
-			reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
+			  reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
 	} else {
 		write_csr(dd, SEND_CM_CTRL,
-			reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
+			  reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
 	}
 
 	ppd->link_speed_active = 0;	/* invalid value */
@@ -7015,7 +7266,7 @@
 	}
 	if (ppd->link_speed_active == 0) {
 		dd_dev_err(dd, "%s: unexpected remote tx rate %d, using 25Gb\n",
-			__func__, (int)remote_tx_rate);
+			   __func__, (int)remote_tx_rate);
 		ppd->link_speed_active = OPA_LINK_SPEED_25G;
 	}
 
@@ -7071,9 +7322,9 @@
 		read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
 		DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
 	dd_dev_info(dd,
-		"Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
-		ppd->neighbor_guid, ppd->neighbor_type,
-		ppd->mgmt_allowed, ppd->neighbor_fm_security);
+		    "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
+		    ppd->neighbor_guid, ppd->neighbor_type,
+		    ppd->mgmt_allowed, ppd->neighbor_fm_security);
 	if (ppd->mgmt_allowed)
 		add_full_mgmt_pkey(ppd);
 
@@ -7127,28 +7378,27 @@
 
 		/* bounce if not at starting active width */
 		if ((ppd->link_width_active !=
-					ppd->link_width_downgrade_tx_active)
-				|| (ppd->link_width_active !=
-					ppd->link_width_downgrade_rx_active)) {
+		     ppd->link_width_downgrade_tx_active) ||
+		    (ppd->link_width_active !=
+		     ppd->link_width_downgrade_rx_active)) {
 			dd_dev_err(ppd->dd,
-				"Link downgrade is disabled and link has downgraded, downing link\n");
+				   "Link downgrade is disabled and link has downgraded, downing link\n");
 			dd_dev_err(ppd->dd,
-				"  original 0x%x, tx active 0x%x, rx active 0x%x\n",
-				ppd->link_width_active,
-				ppd->link_width_downgrade_tx_active,
-				ppd->link_width_downgrade_rx_active);
+				   "  original 0x%x, tx active 0x%x, rx active 0x%x\n",
+				   ppd->link_width_active,
+				   ppd->link_width_downgrade_tx_active,
+				   ppd->link_width_downgrade_rx_active);
 			do_bounce = 1;
 		}
-	} else if ((lwde & ppd->link_width_downgrade_tx_active) == 0
-		|| (lwde & ppd->link_width_downgrade_rx_active) == 0) {
+	} else if ((lwde & ppd->link_width_downgrade_tx_active) == 0 ||
+		   (lwde & ppd->link_width_downgrade_rx_active) == 0) {
 		/* Tx or Rx is outside the enabled policy */
 		dd_dev_err(ppd->dd,
-			"Link is outside of downgrade allowed, downing link\n");
+			   "Link is outside of downgrade allowed, downing link\n");
 		dd_dev_err(ppd->dd,
-			"  enabled 0x%x, tx active 0x%x, rx active 0x%x\n",
-			lwde,
-			ppd->link_width_downgrade_tx_active,
-			ppd->link_width_downgrade_rx_active);
+			   "  enabled 0x%x, tx active 0x%x, rx active 0x%x\n",
+			   lwde, ppd->link_width_downgrade_tx_active,
+			   ppd->link_width_downgrade_rx_active);
 		do_bounce = 1;
 	}
 
@@ -7157,8 +7407,9 @@
 
 	if (do_bounce) {
 		set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0,
-		  OPA_LINKDOWN_REASON_WIDTH_POLICY);
+				     OPA_LINKDOWN_REASON_WIDTH_POLICY);
 		set_link_state(ppd, HLS_DN_OFFLINE);
+		tune_serdes(ppd);
 		start_link(ppd);
 	}
 }
@@ -7239,9 +7490,10 @@
 			    & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
 				queue_link_down = 1;
 				dd_dev_info(dd, "Link error: %s\n",
-					dc8051_info_err_string(buf,
-						sizeof(buf),
-						err & FAILED_LNI));
+					    dc8051_info_err_string(buf,
+								   sizeof(buf),
+								   err &
+								   FAILED_LNI));
 			}
 			err &= ~(u64)FAILED_LNI;
 		}
@@ -7253,7 +7505,8 @@
 		if (err) {
 			/* report remaining errors, but do not do anything */
 			dd_dev_err(dd, "8051 info error: %s\n",
-				dc8051_info_err_string(buf, sizeof(buf), err));
+				   dc8051_info_err_string(buf, sizeof(buf),
+							  err));
 		}
 
 		/*
@@ -7281,7 +7534,7 @@
 			host_msg &= ~(u64)LINKUP_ACHIEVED;
 		}
 		if (host_msg & EXT_DEVICE_CFG_REQ) {
-			handle_8051_request(dd);
+			queue_work(ppd->hfi1_wq, &ppd->dc_host_req_work);
 			host_msg &= ~(u64)EXT_DEVICE_CFG_REQ;
 		}
 		if (host_msg & VERIFY_CAP_FRAME) {
@@ -7306,8 +7559,9 @@
 		if (host_msg) {
 			/* report remaining messages, but do not do anything */
 			dd_dev_info(dd, "8051 info host message: %s\n",
-				dc8051_info_host_msg_string(buf, sizeof(buf),
-					host_msg));
+				    dc8051_info_host_msg_string(buf,
+								sizeof(buf),
+								host_msg));
 		}
 
 		reg &= ~DC_DC8051_ERR_FLG_SET_BY_8051_SMASK;
@@ -7320,25 +7574,27 @@
 		 */
 		dd_dev_err(dd, "Lost 8051 heartbeat\n");
 		write_csr(dd, DC_DC8051_ERR_EN,
-			read_csr(dd, DC_DC8051_ERR_EN)
-			  & ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK);
+			  read_csr(dd, DC_DC8051_ERR_EN) &
+			  ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK);
 
 		reg &= ~DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK;
 	}
 	if (reg) {
 		/* report the error, but do not do anything */
 		dd_dev_err(dd, "8051 error: %s\n",
-			dc8051_err_string(buf, sizeof(buf), reg));
+			   dc8051_err_string(buf, sizeof(buf), reg));
 	}
 
 	if (queue_link_down) {
-		/* if the link is already going down or disabled, do not
-		 * queue another */
-		if ((ppd->host_link_state
-				    & (HLS_GOING_OFFLINE|HLS_LINK_COOLDOWN))
-				|| ppd->link_enabled == 0) {
+		/*
+		 * if the link is already going down or disabled, do not
+		 * queue another
+		 */
+		if ((ppd->host_link_state &
+		    (HLS_GOING_OFFLINE | HLS_LINK_COOLDOWN)) ||
+		    ppd->link_enabled == 0) {
 			dd_dev_info(dd, "%s: not queuing link down\n",
-				__func__);
+				    __func__);
 		} else {
 			queue_work(ppd->hfi1_wq, &ppd->link_down_work);
 		}
@@ -7480,8 +7736,10 @@
 			/* set status bit */
 			dd->err_info_rcvport.status_and_code |=
 				OPA_EI_STATUS_SMASK;
-			/* save first 2 flits in the packet that caused
-			 * the error */
+			/*
+			 * save first 2 flits in the packet that caused
+			 * the error
+			 */
 			 dd->err_info_rcvport.packet_flit1 = hdr0;
 			 dd->err_info_rcvport.packet_flit2 = hdr1;
 		}
@@ -7514,7 +7772,7 @@
 		/* just report this */
 		dd_dev_info(dd, "DCC Error: PortRcv error: %s\n", extra);
 		dd_dev_info(dd, "           hdr0 0x%llx, hdr1 0x%llx\n",
-			hdr0, hdr1);
+			    hdr0, hdr1);
 
 		reg &= ~DCC_ERR_FLG_RCVPORT_ERR_SMASK;
 	}
@@ -7533,7 +7791,7 @@
 	/* report any remaining errors */
 	if (reg)
 		dd_dev_info(dd, "DCC Error: %s\n",
-			dcc_err_string(buf, sizeof(buf), reg));
+			    dcc_err_string(buf, sizeof(buf), reg));
 
 	if (lcl_reason == 0)
 		lcl_reason = OPA_LINKDOWN_REASON_UNKNOWN;
@@ -7550,7 +7808,7 @@
 	char buf[96];
 
 	dd_dev_info(dd, "LCB Error: %s\n",
-		lcb_err_string(buf, sizeof(buf), reg));
+		    lcb_err_string(buf, sizeof(buf), reg));
 }
 
 /*
@@ -7640,7 +7898,7 @@
 		err_detail = "out of range";
 	}
 	dd_dev_err(dd, "unexpected %s receive available context interrupt %u\n",
-		err_detail, source);
+		   err_detail, source);
 }
 
 /*
@@ -7666,7 +7924,7 @@
 		err_detail = "out of range";
 	}
 	dd_dev_err(dd, "unexpected %s receive urgent context interrupt %u\n",
-		err_detail, source);
+		   err_detail, source);
 }
 
 /*
@@ -7677,12 +7935,14 @@
 	char name[64];
 
 	dd_dev_err(dd, "unexpected %s interrupt\n",
-				is_reserved_name(name, sizeof(name), source));
+		   is_reserved_name(name, sizeof(name), source));
 }
 
 static const struct is_table is_table[] = {
-/* start		     end
-				name func		interrupt func */
+/*
+ * start		 end
+ *				name func		interrupt func
+ */
 { IS_GENERAL_ERR_START,  IS_GENERAL_ERR_END,
 				is_misc_err_name,	is_misc_err_int },
 { IS_SDMAENG_ERR_START,  IS_SDMAENG_ERR_END,
@@ -7753,7 +8013,7 @@
 
 	/* phase 2: call the appropriate handler */
 	for_each_set_bit(bit, (unsigned long *)&regs[0],
-						CCE_NUM_INT_CSRS*64) {
+			 CCE_NUM_INT_CSRS * 64) {
 		is_interrupt(dd, bit);
 	}
 
@@ -7776,27 +8036,27 @@
 
 	/* This read_csr is really bad in the hot path */
 	status = read_csr(dd,
-			CCE_INT_STATUS + (8*(IS_SDMA_START/64)))
-			& sde->imask;
+			  CCE_INT_STATUS + (8 * (IS_SDMA_START / 64)))
+			  & sde->imask;
 	if (likely(status)) {
 		/* clear the interrupt(s) */
 		write_csr(dd,
-			CCE_INT_CLEAR + (8*(IS_SDMA_START/64)),
-			status);
+			  CCE_INT_CLEAR + (8 * (IS_SDMA_START / 64)),
+			  status);
 
 		/* handle the interrupt(s) */
 		sdma_engine_interrupt(sde, status);
 	} else
 		dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
-			sde->this_idx);
+			   sde->this_idx);
 
 	return IRQ_HANDLED;
 }
 
 /*
- * Clear the receive interrupt, forcing the write and making sure
- * we have data from the chip, pushing everything in front of it
- * back to the host.
+ * Clear the receive interrupt.  Use a read of the interrupt clear CSR
+ * to insure that the write completed.  This does NOT guarantee that
+ * queued DMA writes to memory from the chip are pushed.
  */
 static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd)
 {
@@ -7810,27 +8070,45 @@
 }
 
 /* force the receive interrupt */
-static inline void force_recv_intr(struct hfi1_ctxtdata *rcd)
+void force_recv_intr(struct hfi1_ctxtdata *rcd)
 {
 	write_csr(rcd->dd, CCE_INT_FORCE + (8 * rcd->ireg), rcd->imask);
 }
 
-/* return non-zero if a packet is present */
+/*
+ * Return non-zero if a packet is present.
+ *
+ * This routine is called when rechecking for packets after the RcvAvail
+ * interrupt has been cleared down.  First, do a quick check of memory for
+ * a packet present.  If not found, use an expensive CSR read of the context
+ * tail to determine the actual tail.  The CSR read is necessary because there
+ * is no method to push pending DMAs to memory other than an interrupt and we
+ * are trying to determine if we need to force an interrupt.
+ */
 static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
 {
-	if (!HFI1_CAP_IS_KSET(DMA_RTAIL))
-		return (rcd->seq_cnt ==
-				rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
+	u32 tail;
+	int present;
 
-	/* else is RDMA rtail */
-	return (rcd->head != get_rcvhdrtail(rcd));
+	if (!HFI1_CAP_IS_KSET(DMA_RTAIL))
+		present = (rcd->seq_cnt ==
+				rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
+	else /* is RDMA rtail */
+		present = (rcd->head != get_rcvhdrtail(rcd));
+
+	if (present)
+		return 1;
+
+	/* fall back to a CSR read, correct indpendent of DMA_RTAIL */
+	tail = (u32)read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
+	return rcd->head != tail;
 }
 
 /*
  * Receive packet IRQ handler.  This routine expects to be on its own IRQ.
  * This routine will try to handle packets immediately (latency), but if
  * it finds too many, it will invoke the thread handler (bandwitdh).  The
- * chip receive interupt is *not* cleared down until this or the thread (if
+ * chip receive interrupt is *not* cleared down until this or the thread (if
  * invoked) is finished.  The intent is to avoid extra interrupts while we
  * are processing packets anyway.
  */
@@ -7843,6 +8121,7 @@
 
 	trace_hfi1_receive_interrupt(dd, rcd->ctxt);
 	this_cpu_inc(*dd->int_counter);
+	aspm_ctx_disable(rcd);
 
 	/* receive interrupt remains blocked while processing packets */
 	disposition = rcd->do_interrupt(rcd, 0);
@@ -7909,7 +8188,7 @@
 				& DC_DC8051_STS_CUR_STATE_PORT_MASK;
 }
 
-static u32 read_logical_state(struct hfi1_devdata *dd)
+u32 read_logical_state(struct hfi1_devdata *dd)
 {
 	u64 reg;
 
@@ -8157,8 +8436,8 @@
 	return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL);
 }
 
-static int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
-			    u8 lane_id, u32 config_data)
+int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
+		     u8 lane_id, u32 config_data)
 {
 	u64 data;
 	int ret;
@@ -8169,8 +8448,8 @@
 	ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
 	if (ret != HCMD_SUCCESS) {
 		dd_dev_err(dd,
-			"load 8051 config: field id %d, lane %d, err %d\n",
-			(int)field_id, (int)lane_id, ret);
+			   "load 8051 config: field id %d, lane %d, err %d\n",
+			   (int)field_id, (int)lane_id, ret);
 	}
 	return ret;
 }
@@ -8180,8 +8459,8 @@
  * set the result, even on error.
  * Return 0 on success, -errno on failure
  */
-static int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
-			    u32 *result)
+int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
+		     u32 *result)
 {
 	u64 big_data;
 	u32 addr;
@@ -8207,7 +8486,7 @@
 	} else {
 		*result = 0;
 		dd_dev_err(dd, "%s: direct read failed, lane %d, field %d!\n",
-			__func__, lane_id, field_id);
+			   __func__, lane_id, field_id);
 	}
 
 	return ret;
@@ -8244,7 +8523,7 @@
 	u32 frame;
 
 	read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
-				&frame);
+			 &frame);
 	*misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK;
 	*flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK;
 	*link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
@@ -8326,7 +8605,7 @@
 	u32 frame;
 
 	read_8051_config(dd, VERIFY_CAP_REMOTE_LINK_WIDTH, GENERAL_CONFIG,
-				&frame);
+			 &frame);
 	*remote_tx_rate = (frame >> REMOTE_TX_RATE_SHIFT)
 				& REMOTE_TX_RATE_MASK;
 	*link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
@@ -8366,7 +8645,7 @@
 	*link_quality = 0;
 	if (dd->pport->host_link_state & HLS_UP) {
 		ret = read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG,
-					&frame);
+				       &frame);
 		if (ret == 0)
 			*link_quality = (frame >> LINK_QUALITY_SHIFT)
 						& LINK_QUALITY_MASK;
@@ -8426,10 +8705,9 @@
 	for (lane = 0; lane < 4; lane++) {
 		ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame);
 		if (ret) {
-			dd_dev_err(
-				dd,
-				"Unable to read lane %d firmware details\n",
-				lane);
+			dd_dev_err(dd,
+				   "Unable to read lane %d firmware details\n",
+				   lane);
 			continue;
 		}
 		version = (frame >> SPICO_ROM_VERSION_SHIFT)
@@ -8437,8 +8715,8 @@
 		prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT)
 					& SPICO_ROM_PROD_ID_MASK;
 		dd_dev_info(dd,
-			"Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
-			lane, version, prod_id);
+			    "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
+			    lane, version, prod_id);
 	}
 }
 
@@ -8451,11 +8729,10 @@
 {
 	int ret;
 
-	ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG,
-		type, data_out);
+	ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG, type, data_out);
 	if (ret != HCMD_SUCCESS) {
 		dd_dev_err(dd, "read idle message: type %d, err %d\n",
-			(u32)type, ret);
+			   (u32)type, ret);
 		return -EINVAL;
 	}
 	dd_dev_info(dd, "%s: read idle message 0x%llx\n", __func__, *data_out);
@@ -8472,8 +8749,8 @@
  */
 static int read_idle_sma(struct hfi1_devdata *dd, u64 *data)
 {
-	return read_idle_message(dd,
-			(u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT, data);
+	return read_idle_message(dd, (u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT,
+				 data);
 }
 
 /*
@@ -8489,7 +8766,7 @@
 	ret = do_8051_command(dd, HCMD_SEND_LCB_IDLE_MSG, data, NULL);
 	if (ret != HCMD_SUCCESS) {
 		dd_dev_err(dd, "send idle message: data 0x%llx, err %d\n",
-			data, ret);
+			   data, ret);
 		return -EINVAL;
 	}
 	return 0;
@@ -8504,8 +8781,8 @@
 {
 	u64 data;
 
-	data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT)
-		| ((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT);
+	data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT) |
+		((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT);
 	return send_idle_message(dd, data);
 }
 
@@ -8527,7 +8804,7 @@
 		/* LCB_CFG_LOOPBACK.VAL = 2 */
 		/* LCB_CFG_LANE_WIDTH.VAL = 0 */
 		write_csr(dd, DC_LCB_CFG_LOOPBACK,
-			IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT);
+			  IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT);
 		write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
 	}
 
@@ -8539,25 +8816,24 @@
 	if (loopback && dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
 		/* LCB_CFG_RUN.EN = 1 */
 		write_csr(dd, DC_LCB_CFG_RUN,
-			1ull << DC_LCB_CFG_RUN_EN_SHIFT);
+			  1ull << DC_LCB_CFG_RUN_EN_SHIFT);
 
 		/* watch LCB_STS_LINK_TRANSFER_ACTIVE */
 		timeout = jiffies + msecs_to_jiffies(10);
 		while (1) {
-			reg = read_csr(dd,
-				DC_LCB_STS_LINK_TRANSFER_ACTIVE);
+			reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
 			if (reg)
 				break;
 			if (time_after(jiffies, timeout)) {
 				dd_dev_err(dd,
-					"timeout waiting for LINK_TRANSFER_ACTIVE\n");
+					   "timeout waiting for LINK_TRANSFER_ACTIVE\n");
 				return -ETIMEDOUT;
 			}
 			udelay(2);
 		}
 
 		write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
-			1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
+			  1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
 	}
 
 	if (!loopback) {
@@ -8569,10 +8845,9 @@
 		 * done with LCB set up before resuming.
 		 */
 		dd_dev_err(dd,
-			"Pausing for peer to be finished with LCB set up\n");
+			   "Pausing for peer to be finished with LCB set up\n");
 		msleep(5000);
-		dd_dev_err(dd,
-			"Continuing with quick linkup\n");
+		dd_dev_err(dd, "Continuing with quick linkup\n");
 	}
 
 	write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
@@ -8586,8 +8861,8 @@
 	ret = set_physical_link_state(dd, PLS_QUICK_LINKUP);
 	if (ret != HCMD_SUCCESS) {
 		dd_dev_err(dd,
-			"%s: set physical link state to quick LinkUp failed with return %d\n",
-			__func__, ret);
+			   "%s: set physical link state to quick LinkUp failed with return %d\n",
+			   __func__, ret);
 
 		set_host_lcb_access(dd);
 		write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
@@ -8612,8 +8887,8 @@
 	if (ret == HCMD_SUCCESS)
 		return 0;
 	dd_dev_err(dd,
-		"Set physical link state to SerDes Loopback failed with return %d\n",
-		ret);
+		   "Set physical link state to SerDes Loopback failed with return %d\n",
+		   ret);
 	if (ret >= 0)
 		ret = -EINVAL;
 	return ret;
@@ -8628,7 +8903,7 @@
 
 	/* all loopbacks should disable self GUID check */
 	write_csr(dd, DC_DC8051_CFG_MODE,
-		(read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK));
+		  (read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK));
 
 	/*
 	 * The simulator has only one loopback option - LCB.  Switch
@@ -8636,10 +8911,9 @@
 	 *
 	 * Accept all valid loopback values.
 	 */
-	if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
-		&& (loopback == LOOPBACK_SERDES
-			|| loopback == LOOPBACK_LCB
-			|| loopback == LOOPBACK_CABLE)) {
+	if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR) &&
+	    (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB ||
+	     loopback == LOOPBACK_CABLE)) {
 		loopback = LOOPBACK_LCB;
 		quick_linkup = 1;
 		return 0;
@@ -8660,7 +8934,7 @@
 		/* not supported in emulation due to emulation RTL changes */
 		if (dd->icode == ICODE_FPGA_EMULATION) {
 			dd_dev_err(dd,
-				"LCB loopback not supported in emulation\n");
+				   "LCB loopback not supported in emulation\n");
 			return -EINVAL;
 		}
 		return 0;
@@ -8687,10 +8961,10 @@
 		u16 from;
 		u16 to;
 	} opa_link_xlate[] = {
-		{ OPA_LINK_WIDTH_1X, 1 << (1-1)  },
-		{ OPA_LINK_WIDTH_2X, 1 << (2-1)  },
-		{ OPA_LINK_WIDTH_3X, 1 << (3-1)  },
-		{ OPA_LINK_WIDTH_4X, 1 << (4-1)  },
+		{ OPA_LINK_WIDTH_1X, 1 << (1 - 1)  },
+		{ OPA_LINK_WIDTH_2X, 1 << (2 - 1)  },
+		{ OPA_LINK_WIDTH_3X, 1 << (3 - 1)  },
+		{ OPA_LINK_WIDTH_4X, 1 << (4 - 1)  },
 	};
 
 	for (i = 0; i < ARRAY_SIZE(opa_link_xlate); i++) {
@@ -8716,7 +8990,7 @@
 
 	/* set the local tx rate - need to read-modify-write */
 	ret = read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
-		&rx_polarity_inversion, &ppd->local_tx_rate);
+			       &rx_polarity_inversion, &ppd->local_tx_rate);
 	if (ret)
 		goto set_local_link_attributes_fail;
 
@@ -8737,15 +9011,16 @@
 
 	enable_lane_tx = 0xF; /* enable all four lanes */
 	ret = write_tx_settings(dd, enable_lane_tx, tx_polarity_inversion,
-		     rx_polarity_inversion, ppd->local_tx_rate);
+				rx_polarity_inversion, ppd->local_tx_rate);
 	if (ret != HCMD_SUCCESS)
 		goto set_local_link_attributes_fail;
 
 	/*
 	 * DC supports continuous updates.
 	 */
-	ret = write_vc_local_phy(dd, 0 /* no power management */,
-				     1 /* continuous updates */);
+	ret = write_vc_local_phy(dd,
+				 0 /* no power management */,
+				 1 /* continuous updates */);
 	if (ret != HCMD_SUCCESS)
 		goto set_local_link_attributes_fail;
 
@@ -8756,7 +9031,8 @@
 		goto set_local_link_attributes_fail;
 
 	ret = write_vc_local_link_width(dd, 0, 0,
-		     opa_to_vc_link_widths(ppd->link_width_enabled));
+					opa_to_vc_link_widths(
+						ppd->link_width_enabled));
 	if (ret != HCMD_SUCCESS)
 		goto set_local_link_attributes_fail;
 
@@ -8767,8 +9043,8 @@
 
 set_local_link_attributes_fail:
 	dd_dev_err(dd,
-		"Failed to set local link attributes, return 0x%x\n",
-		ret);
+		   "Failed to set local link attributes, return 0x%x\n",
+		   ret);
 	return ret;
 }
 
@@ -8781,54 +9057,101 @@
 {
 	if (!ppd->link_enabled) {
 		dd_dev_info(ppd->dd,
-			"%s: stopping link start because link is disabled\n",
-			__func__);
+			    "%s: stopping link start because link is disabled\n",
+			    __func__);
 		return 0;
 	}
 	if (!ppd->driver_link_ready) {
 		dd_dev_info(ppd->dd,
-			"%s: stopping link start because driver is not ready\n",
-			__func__);
+			    "%s: stopping link start because driver is not ready\n",
+			    __func__);
 		return 0;
 	}
 
 	if (qsfp_mod_present(ppd) || loopback == LOOPBACK_SERDES ||
-			loopback == LOOPBACK_LCB ||
-			ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
+	    loopback == LOOPBACK_LCB ||
+	    ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
 		return set_link_state(ppd, HLS_DN_POLL);
 
 	dd_dev_info(ppd->dd,
-		"%s: stopping link start because no cable is present\n",
-		__func__);
+		    "%s: stopping link start because no cable is present\n",
+		    __func__);
 	return -EAGAIN;
 }
 
-static void reset_qsfp(struct hfi1_pportdata *ppd)
+static void wait_for_qsfp_init(struct hfi1_pportdata *ppd)
+{
+	struct hfi1_devdata *dd = ppd->dd;
+	u64 mask;
+	unsigned long timeout;
+
+	/*
+	 * Check for QSFP interrupt for t_init (SFF 8679)
+	 */
+	timeout = jiffies + msecs_to_jiffies(2000);
+	while (1) {
+		mask = read_csr(dd, dd->hfi1_id ?
+				ASIC_QSFP2_IN : ASIC_QSFP1_IN);
+		if (!(mask & QSFP_HFI0_INT_N)) {
+			write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR :
+				  ASIC_QSFP1_CLEAR, QSFP_HFI0_INT_N);
+			break;
+		}
+		if (time_after(jiffies, timeout)) {
+			dd_dev_info(dd, "%s: No IntN detected, reset complete\n",
+				    __func__);
+			break;
+		}
+		udelay(2);
+	}
+}
+
+static void set_qsfp_int_n(struct hfi1_pportdata *ppd, u8 enable)
+{
+	struct hfi1_devdata *dd = ppd->dd;
+	u64 mask;
+
+	mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK);
+	if (enable)
+		mask |= (u64)QSFP_HFI0_INT_N;
+	else
+		mask &= ~(u64)QSFP_HFI0_INT_N;
+	write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask);
+}
+
+void reset_qsfp(struct hfi1_pportdata *ppd)
 {
 	struct hfi1_devdata *dd = ppd->dd;
 	u64 mask, qsfp_mask;
 
+	/* Disable INT_N from triggering QSFP interrupts */
+	set_qsfp_int_n(ppd, 0);
+
+	/* Reset the QSFP */
 	mask = (u64)QSFP_HFI0_RESET_N;
-	qsfp_mask = read_csr(dd,
-		dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
+	qsfp_mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
 	qsfp_mask |= mask;
-	write_csr(dd,
-		dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE,
-		qsfp_mask);
+	write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE, qsfp_mask);
 
 	qsfp_mask = read_csr(dd,
-		dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
+			     dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
 	qsfp_mask &= ~mask;
 	write_csr(dd,
-		dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
-		qsfp_mask);
+		  dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT, qsfp_mask);
 
 	udelay(10);
 
 	qsfp_mask |= mask;
 	write_csr(dd,
-		dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
-		qsfp_mask);
+		  dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT, qsfp_mask);
+
+	wait_for_qsfp_init(ppd);
+
+	/*
+	 * Allow INT_N to trigger the QSFP interrupt to watch
+	 * for alarms and warnings
+	 */
+	set_qsfp_int_n(ppd, 1);
 }
 
 static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
@@ -8837,102 +9160,86 @@
 	struct hfi1_devdata *dd = ppd->dd;
 
 	if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
-		(qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
-		dd_dev_info(dd,
-			"%s: QSFP cable on fire\n",
-			__func__);
+	    (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
+		dd_dev_info(dd, "%s: QSFP cable on fire\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
-		(qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
-		dd_dev_info(dd,
-			"%s: QSFP cable temperature too low\n",
-			__func__);
+	    (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
+		dd_dev_info(dd, "%s: QSFP cable temperature too low\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
-		(qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
-		dd_dev_info(dd,
-			"%s: QSFP supply voltage too high\n",
-			__func__);
+	    (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
+		dd_dev_info(dd, "%s: QSFP supply voltage too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) ||
-		(qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
-		dd_dev_info(dd,
-			"%s: QSFP supply voltage too low\n",
-			__func__);
+	    (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
+		dd_dev_info(dd, "%s: QSFP supply voltage too low\n",
+			    __func__);
 
 	/* Byte 2 is vendor specific */
 
 	if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) ||
-		(qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable RX channel 1/2 power too high\n",
-			__func__);
+	    (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable RX channel 1/2 power too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) ||
-		(qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable RX channel 1/2 power too low\n",
-			__func__);
+	    (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable RX channel 1/2 power too low\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) ||
-		(qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable RX channel 3/4 power too high\n",
-			__func__);
+	    (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable RX channel 3/4 power too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) ||
-		(qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable RX channel 3/4 power too low\n",
-			__func__);
+	    (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable RX channel 3/4 power too low\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) ||
-		(qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 1/2 bias too high\n",
-			__func__);
+	    (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) ||
-		(qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 1/2 bias too low\n",
-			__func__);
+	    (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too low\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) ||
-		(qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 3/4 bias too high\n",
-			__func__);
+	    (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) ||
-		(qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 3/4 bias too low\n",
-			__func__);
+	    (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too low\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) ||
-		(qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 1/2 power too high\n",
-			__func__);
+	    (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 1/2 power too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) ||
-		(qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 1/2 power too low\n",
-			__func__);
+	    (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 1/2 power too low\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) ||
-		(qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 3/4 power too high\n",
-			__func__);
+	    (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 3/4 power too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) ||
-		(qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 3/4 power too low\n",
-			__func__);
+	    (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 3/4 power too low\n",
+			    __func__);
 
 	/* Bytes 9-10 and 11-12 are reserved */
 	/* Bytes 13-15 are vendor specific */
@@ -8940,35 +9247,8 @@
 	return 0;
 }
 
-static int do_pre_lni_host_behaviors(struct hfi1_pportdata *ppd)
-{
-	refresh_qsfp_cache(ppd, &ppd->qsfp_info);
-
-	return 0;
-}
-
-static int do_qsfp_intr_fallback(struct hfi1_pportdata *ppd)
-{
-	struct hfi1_devdata *dd = ppd->dd;
-	u8 qsfp_interrupt_status = 0;
-
-	if (qsfp_read(ppd, dd->hfi1_id, 2, &qsfp_interrupt_status, 1)
-		!= 1) {
-		dd_dev_info(dd,
-			"%s: Failed to read status of QSFP module\n",
-			__func__);
-		return -EIO;
-	}
-
-	/* We don't care about alarms & warnings with a non-functional INT_N */
-	if (!(qsfp_interrupt_status & QSFP_DATA_NOT_READY))
-		do_pre_lni_host_behaviors(ppd);
-
-	return 0;
-}
-
 /* This routine will only be scheduled if the QSFP module is present */
-static void qsfp_event(struct work_struct *work)
+void qsfp_event(struct work_struct *work)
 {
 	struct qsfp_data *qd;
 	struct hfi1_pportdata *ppd;
@@ -8990,76 +9270,75 @@
 	dc_start(dd);
 
 	if (qd->cache_refresh_required) {
-		msleep(3000);
-		reset_qsfp(ppd);
+		set_qsfp_int_n(ppd, 0);
 
-		/* Check for QSFP interrupt after t_init (SFF 8679)
-		 * + extra
+		wait_for_qsfp_init(ppd);
+
+		/*
+		 * Allow INT_N to trigger the QSFP interrupt to watch
+		 * for alarms and warnings
 		 */
-		msleep(3000);
-		if (!qd->qsfp_interrupt_functional) {
-			if (do_qsfp_intr_fallback(ppd) < 0)
-				dd_dev_info(dd, "%s: QSFP fallback failed\n",
-					__func__);
-			ppd->driver_link_ready = 1;
-			start_link(ppd);
-		}
+		set_qsfp_int_n(ppd, 1);
+
+		tune_serdes(ppd);
+
+		start_link(ppd);
 	}
 
 	if (qd->check_interrupt_flags) {
 		u8 qsfp_interrupt_status[16] = {0,};
 
-		if (qsfp_read(ppd, dd->hfi1_id, 6,
-			      &qsfp_interrupt_status[0], 16) != 16) {
+		if (one_qsfp_read(ppd, dd->hfi1_id, 6,
+				  &qsfp_interrupt_status[0], 16) != 16) {
 			dd_dev_info(dd,
-				"%s: Failed to read status of QSFP module\n",
-				__func__);
+				    "%s: Failed to read status of QSFP module\n",
+				    __func__);
 		} else {
 			unsigned long flags;
-			u8 data_status;
 
+			handle_qsfp_error_conditions(
+					ppd, qsfp_interrupt_status);
 			spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
 			ppd->qsfp_info.check_interrupt_flags = 0;
 			spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
-								flags);
-
-			if (qsfp_read(ppd, dd->hfi1_id, 2, &data_status, 1)
-				 != 1) {
-				dd_dev_info(dd,
-				"%s: Failed to read status of QSFP module\n",
-					__func__);
-			}
-			if (!(data_status & QSFP_DATA_NOT_READY)) {
-				do_pre_lni_host_behaviors(ppd);
-				start_link(ppd);
-			} else
-				handle_qsfp_error_conditions(ppd,
-						qsfp_interrupt_status);
+					       flags);
 		}
 	}
 }
 
-void init_qsfp(struct hfi1_pportdata *ppd)
+static void init_qsfp_int(struct hfi1_devdata *dd)
 {
-	struct hfi1_devdata *dd = ppd->dd;
-	u64 qsfp_mask;
+	struct hfi1_pportdata *ppd = dd->pport;
+	u64 qsfp_mask, cce_int_mask;
+	const int qsfp1_int_smask = QSFP1_INT % 64;
+	const int qsfp2_int_smask = QSFP2_INT % 64;
 
-	if (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB ||
-			ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
-		ppd->driver_link_ready = 1;
-		return;
+	/*
+	 * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
+	 * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
+	 * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
+	 * the index of the appropriate CSR in the CCEIntMask CSR array
+	 */
+	cce_int_mask = read_csr(dd, CCE_INT_MASK +
+				(8 * (QSFP1_INT / 64)));
+	if (dd->hfi1_id) {
+		cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
+		write_csr(dd, CCE_INT_MASK + (8 * (QSFP1_INT / 64)),
+			  cce_int_mask);
+	} else {
+		cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
+		write_csr(dd, CCE_INT_MASK + (8 * (QSFP2_INT / 64)),
+			  cce_int_mask);
 	}
 
-	ppd->qsfp_info.ppd = ppd;
-	INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
-
 	qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
 	/* Clear current status to avoid spurious interrupts */
-	write_csr(dd,
-			dd->hfi1_id ?
-				ASIC_QSFP2_CLEAR :
-				ASIC_QSFP1_CLEAR,
-		qsfp_mask);
+	write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR : ASIC_QSFP1_CLEAR,
+		  qsfp_mask);
+	write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK,
+		  qsfp_mask);
+
+	set_qsfp_int_n(ppd, 0);
 
 	/* Handle active low nature of INT_N and MODPRST_N pins */
 	if (qsfp_mod_present(ppd))
@@ -9067,29 +9346,6 @@
 	write_csr(dd,
 		  dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
 		  qsfp_mask);
-
-	/* Allow only INT_N and MODPRST_N to trigger QSFP interrupts */
-	qsfp_mask |= (u64)QSFP_HFI0_MODPRST_N;
-	write_csr(dd,
-		dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK,
-		qsfp_mask);
-
-	if (qsfp_mod_present(ppd)) {
-		msleep(3000);
-		reset_qsfp(ppd);
-
-		/* Check for QSFP interrupt after t_init (SFF 8679)
-		 * + extra
-		 */
-		msleep(3000);
-		if (!ppd->qsfp_info.qsfp_interrupt_functional) {
-			if (do_qsfp_intr_fallback(ppd) < 0)
-				dd_dev_info(dd,
-					"%s: QSFP fallback failed\n",
-					__func__);
-			ppd->driver_link_ready = 1;
-		}
-	}
 }
 
 /*
@@ -9097,6 +9353,10 @@
  */
 static void init_lcb(struct hfi1_devdata *dd)
 {
+	/* simulator does not correctly handle LCB cclk loopback, skip */
+	if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
+		return;
+
 	/* the DC has been reset earlier in the driver load */
 
 	/* set LCB for cclk loopback on the port */
@@ -9125,8 +9385,6 @@
 		ppd->guid = guid;
 	}
 
-	/* the link defaults to enabled */
-	ppd->link_enabled = 1;
 	/* Set linkinit_reason on power up per OPA spec */
 	ppd->linkinit_reason = OPA_LINKINIT_REASON_LINKUP;
 
@@ -9139,6 +9397,12 @@
 			return ret;
 	}
 
+	/* tune the SERDES to a ballpark setting for
+	 * optimal signal and bit error rate
+	 * Needs to be done before starting the link
+	 */
+	tune_serdes(ppd);
+
 	return start_link(ppd);
 }
 
@@ -9156,8 +9420,10 @@
 	ppd->driver_link_ready = 0;
 	ppd->link_enabled = 0;
 
+	ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
 	set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
-	  OPA_LINKDOWN_REASON_SMA_DISABLED);
+			     OPA_LINKDOWN_REASON_SMA_DISABLED);
 	set_link_state(ppd, HLS_DN_OFFLINE);
 
 	/* disable the port */
@@ -9171,14 +9437,14 @@
 
 	ppd = (struct hfi1_pportdata *)(dd + 1);
 	for (i = 0; i < dd->num_pports; i++, ppd++) {
-		ppd->ibport_data.rc_acks = NULL;
-		ppd->ibport_data.rc_qacks = NULL;
-		ppd->ibport_data.rc_acks = alloc_percpu(u64);
-		ppd->ibport_data.rc_qacks = alloc_percpu(u64);
-		ppd->ibport_data.rc_delayed_comp = alloc_percpu(u64);
-		if ((ppd->ibport_data.rc_acks == NULL) ||
-		    (ppd->ibport_data.rc_delayed_comp == NULL) ||
-		    (ppd->ibport_data.rc_qacks == NULL))
+		ppd->ibport_data.rvp.rc_acks = NULL;
+		ppd->ibport_data.rvp.rc_qacks = NULL;
+		ppd->ibport_data.rvp.rc_acks = alloc_percpu(u64);
+		ppd->ibport_data.rvp.rc_qacks = alloc_percpu(u64);
+		ppd->ibport_data.rvp.rc_delayed_comp = alloc_percpu(u64);
+		if (!ppd->ibport_data.rvp.rc_acks ||
+		    !ppd->ibport_data.rvp.rc_delayed_comp ||
+		    !ppd->ibport_data.rvp.rc_qacks)
 			return -ENOMEM;
 	}
 
@@ -9213,8 +9479,8 @@
 		pa = 0;
 	} else if (type > PT_INVALID) {
 		dd_dev_err(dd,
-			"unexpected receive array type %u for index %u, not handled\n",
-			type, index);
+			   "unexpected receive array type %u for index %u, not handled\n",
+			   type, index);
 		goto done;
 	}
 
@@ -9429,12 +9695,15 @@
 	/* all kernel receive contexts have the same hdrqentsize */
 	for (i = 0; i < ppd->vls_supported; i++) {
 		sc_set_cr_threshold(dd->vld[i].sc,
-			sc_mtu_to_threshold(dd->vld[i].sc, dd->vld[i].mtu,
-				dd->rcd[0]->rcvhdrqentsize));
+				    sc_mtu_to_threshold(dd->vld[i].sc,
+							dd->vld[i].mtu,
+							dd->rcd[0]->
+							rcvhdrqentsize));
 	}
 	sc_set_cr_threshold(dd->vld[15].sc,
-		sc_mtu_to_threshold(dd->vld[15].sc, dd->vld[15].mtu,
-			dd->rcd[0]->rcvhdrqentsize));
+			    sc_mtu_to_threshold(dd->vld[15].sc,
+						dd->vld[15].mtu,
+						dd->rcd[0]->rcvhdrqentsize));
 
 	/* Adjust maximum MTU for the port in DC */
 	dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 :
@@ -9460,7 +9729,7 @@
 	c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
 		| DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
 	c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
-			<< DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT)|
+			<< DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT) |
 	      ((mask & DCC_CFG_PORT_CONFIG1_DLID_MASK_MASK)
 			<< DCC_CFG_PORT_CONFIG1_DLID_MASK_SHIFT);
 	write_csr(ppd->dd, DCC_CFG_PORT_CONFIG1, c1);
@@ -9495,8 +9764,8 @@
 			break;
 		if (time_after(jiffies, timeout)) {
 			dd_dev_err(dd,
-				"timeout waiting for phy link state 0x%x, current state is 0x%x\n",
-				state, curr_state);
+				   "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
+				   state, curr_state);
 			return -ETIMEDOUT;
 		}
 		usleep_range(1950, 2050); /* sleep 2ms-ish */
@@ -9539,17 +9808,18 @@
 
 	if (do_transition) {
 		ret = set_physical_link_state(dd,
-			PLS_OFFLINE | (rem_reason << 8));
+					      (rem_reason << 8) | PLS_OFFLINE);
 
 		if (ret != HCMD_SUCCESS) {
 			dd_dev_err(dd,
-				"Failed to transition to Offline link state, return %d\n",
-				ret);
+				   "Failed to transition to Offline link state, return %d\n",
+				   ret);
 			return -EINVAL;
 		}
-		if (ppd->offline_disabled_reason == OPA_LINKDOWN_REASON_NONE)
+		if (ppd->offline_disabled_reason ==
+				HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE))
 			ppd->offline_disabled_reason =
-			OPA_LINKDOWN_REASON_TRANSIENT;
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
 	}
 
 	if (do_wait) {
@@ -9570,6 +9840,22 @@
 	write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
 	ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
 
+	if (ppd->port_type == PORT_TYPE_QSFP &&
+	    ppd->qsfp_info.limiting_active &&
+	    qsfp_mod_present(ppd)) {
+		int ret;
+
+		ret = acquire_chip_resource(dd, qsfp_resource(dd), QSFP_WAIT);
+		if (ret == 0) {
+			set_qsfp_tx(ppd, 0);
+			release_chip_resource(dd, qsfp_resource(dd));
+		} else {
+			/* not fatal, but should warn */
+			dd_dev_err(dd,
+				   "Unable to acquire lock to turn off QSFP TX\n");
+		}
+	}
+
 	/*
 	 * The LNI has a mandatory wait time after the physical state
 	 * moves to Offline.Quiet.  The wait time may be different
@@ -9582,7 +9868,7 @@
 	ret = wait_fm_ready(dd, 7000);
 	if (ret) {
 		dd_dev_err(dd,
-			"After going offline, timed out waiting for the 8051 to become ready to accept host requests\n");
+			   "After going offline, timed out waiting for the 8051 to become ready to accept host requests\n");
 		/* state is really offline, so make it so */
 		ppd->host_link_state = HLS_DN_OFFLINE;
 		return ret;
@@ -9605,8 +9891,8 @@
 		read_last_local_state(dd, &last_local_state);
 		read_last_remote_state(dd, &last_remote_state);
 		dd_dev_err(dd,
-			"LNI failure last states: local 0x%08x, remote 0x%08x\n",
-			last_local_state, last_remote_state);
+			   "LNI failure last states: local 0x%08x, remote 0x%08x\n",
+			   last_local_state, last_remote_state);
 	}
 
 	/* the active link width (downgrade) is 0 on link down */
@@ -9754,14 +10040,14 @@
 		state = dd->link_default;
 
 	/* interpret poll -> poll as a link bounce */
-	poll_bounce = ppd->host_link_state == HLS_DN_POLL
-				&& state == HLS_DN_POLL;
+	poll_bounce = ppd->host_link_state == HLS_DN_POLL &&
+		      state == HLS_DN_POLL;
 
 	dd_dev_info(dd, "%s: current %s, new %s %s%s\n", __func__,
-		link_state_name(ppd->host_link_state),
-		link_state_name(orig_new_state),
-		poll_bounce ? "(bounce) " : "",
-		link_state_reason_name(ppd, state));
+		    link_state_name(ppd->host_link_state),
+		    link_state_name(orig_new_state),
+		    poll_bounce ? "(bounce) " : "",
+		    link_state_reason_name(ppd, state));
 
 	was_up = !!(ppd->host_link_state & HLS_UP);
 
@@ -9782,8 +10068,8 @@
 
 	switch (state) {
 	case HLS_UP_INIT:
-		if (ppd->host_link_state == HLS_DN_POLL && (quick_linkup
-			    || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) {
+		if (ppd->host_link_state == HLS_DN_POLL &&
+		    (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) {
 			/*
 			 * Quick link up jumps from polling to here.
 			 *
@@ -9791,7 +10077,7 @@
 			 * simulator jumps from polling to link up.
 			 * Accept that here.
 			 */
-			/* OK */;
+			/* OK */
 		} else if (ppd->host_link_state != HLS_GOING_UP) {
 			goto unexpected;
 		}
@@ -9802,8 +10088,8 @@
 			/* logical state didn't change, stay at going_up */
 			ppd->host_link_state = HLS_GOING_UP;
 			dd_dev_err(dd,
-				"%s: logical state did not change to INIT\n",
-				__func__);
+				   "%s: logical state did not change to INIT\n",
+				   __func__);
 		} else {
 			/* clear old transient LINKINIT_REASON code */
 			if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR)
@@ -9827,8 +10113,8 @@
 			/* logical state didn't change, stay at init */
 			ppd->host_link_state = HLS_UP_INIT;
 			dd_dev_err(dd,
-				"%s: logical state did not change to ARMED\n",
-				__func__);
+				   "%s: logical state did not change to ARMED\n",
+				   __func__);
 		}
 		/*
 		 * The simulator does not currently implement SMA messages,
@@ -9849,15 +10135,14 @@
 			/* logical state didn't change, stay at armed */
 			ppd->host_link_state = HLS_UP_ARMED;
 			dd_dev_err(dd,
-				"%s: logical state did not change to ACTIVE\n",
-				__func__);
+				   "%s: logical state did not change to ACTIVE\n",
+				   __func__);
 		} else {
-
 			/* tell all engines to go running */
 			sdma_all_running(dd);
 
 			/* Signal the IB layer that the port has went active */
-			event.device = &dd->verbs_dev.ibdev;
+			event.device = &dd->verbs_dev.rdi.ibdev;
 			event.element.port_num = ppd->port;
 			event.event = IB_EVENT_PORT_ACTIVE;
 		}
@@ -9884,6 +10169,7 @@
 				ppd->link_enabled = 1;
 		}
 
+		set_all_slowpath(ppd->dd);
 		ret = set_local_link_attributes(ppd);
 		if (ret)
 			break;
@@ -9898,12 +10184,13 @@
 			ret1 = set_physical_link_state(dd, PLS_POLLING);
 			if (ret1 != HCMD_SUCCESS) {
 				dd_dev_err(dd,
-					"Failed to transition to Polling link state, return 0x%x\n",
-					ret1);
+					   "Failed to transition to Polling link state, return 0x%x\n",
+					   ret1);
 				ret = -EINVAL;
 			}
 		}
-		ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE;
+		ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE);
 		/*
 		 * If an error occurred above, go back to offline.  The
 		 * caller may reschedule another attempt.
@@ -9928,8 +10215,8 @@
 		ret1 = set_physical_link_state(dd, PLS_DISABLED);
 		if (ret1 != HCMD_SUCCESS) {
 			dd_dev_err(dd,
-				"Failed to transition to Disabled link state, return 0x%x\n",
-				ret1);
+				   "Failed to transition to Disabled link state, return 0x%x\n",
+				   ret1);
 			ret = -EINVAL;
 			break;
 		}
@@ -9957,8 +10244,8 @@
 		ret1 = set_physical_link_state(dd, PLS_LINKUP);
 		if (ret1 != HCMD_SUCCESS) {
 			dd_dev_err(dd,
-				"Failed to transition to link up state, return 0x%x\n",
-				ret1);
+				   "Failed to transition to link up state, return 0x%x\n",
+				   ret1);
 			ret = -EINVAL;
 			break;
 		}
@@ -9969,7 +10256,7 @@
 	case HLS_LINK_COOLDOWN:		/* transient within goto_offline() */
 	default:
 		dd_dev_info(dd, "%s: state 0x%x: not supported\n",
-			__func__, state);
+			    __func__, state);
 		ret = -EINVAL;
 		break;
 	}
@@ -9989,8 +10276,8 @@
 
 unexpected:
 	dd_dev_err(dd, "%s: unexpected state transition from %s to %s\n",
-		__func__, link_state_name(ppd->host_link_state),
-		link_state_name(state));
+		   __func__, link_state_name(ppd->host_link_state),
+		   link_state_name(state));
 	ret = -EINVAL;
 
 done:
@@ -10016,7 +10303,7 @@
 		 * The VL Arbitrator high limit is sent in units of 4k
 		 * bytes, while HFI stores it in units of 64 bytes.
 		 */
-		val *= 4096/64;
+		val *= 4096 / 64;
 		reg = ((u64)val & SEND_HIGH_PRIORITY_LIMIT_LIMIT_MASK)
 			<< SEND_HIGH_PRIORITY_LIMIT_LIMIT_SHIFT;
 		write_csr(ppd->dd, SEND_HIGH_PRIORITY_LIMIT, reg);
@@ -10031,12 +10318,6 @@
 			ppd->vls_operational = val;
 			if (!ppd->port)
 				ret = -EINVAL;
-			else
-				ret = sdma_map_init(
-					ppd->dd,
-					ppd->port - 1,
-					val,
-					NULL);
 		}
 		break;
 	/*
@@ -10084,8 +10365,8 @@
 	default:
 		if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
 			dd_dev_info(ppd->dd,
-			  "%s: which %s, val 0x%x: not implemented\n",
-			  __func__, ib_cfg_name(which), val);
+				    "%s: which %s, val 0x%x: not implemented\n",
+				    __func__, ib_cfg_name(which), val);
 		break;
 	}
 	return ret;
@@ -10152,6 +10433,7 @@
 {
 	return !memcmp(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
 }
+
 /* end functions related to vl arbitration table caching */
 
 static int set_vl_weights(struct hfi1_pportdata *ppd, u32 target,
@@ -10239,7 +10521,7 @@
 
 	/* OPA and HFI have a 1-1 mapping */
 	for (i = 0; i < TXE_NUM_DATA_VL; i++)
-		read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8*i), &bc->vl[i]);
+		read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8 * i), &bc->vl[i]);
 
 	/* NOTE: assumes that VL* and VL15 CSRs are bit-wise identical */
 	read_one_cm_vl(dd, SEND_CM_CREDIT_VL15, &bc->vl[15]);
@@ -10293,41 +10575,41 @@
 static void set_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
 {
 	write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0,
-		DC_SC_VL_VAL(15_0,
-		0, dp->vlnt[0] & 0xf,
-		1, dp->vlnt[1] & 0xf,
-		2, dp->vlnt[2] & 0xf,
-		3, dp->vlnt[3] & 0xf,
-		4, dp->vlnt[4] & 0xf,
-		5, dp->vlnt[5] & 0xf,
-		6, dp->vlnt[6] & 0xf,
-		7, dp->vlnt[7] & 0xf,
-		8, dp->vlnt[8] & 0xf,
-		9, dp->vlnt[9] & 0xf,
-		10, dp->vlnt[10] & 0xf,
-		11, dp->vlnt[11] & 0xf,
-		12, dp->vlnt[12] & 0xf,
-		13, dp->vlnt[13] & 0xf,
-		14, dp->vlnt[14] & 0xf,
-		15, dp->vlnt[15] & 0xf));
+		  DC_SC_VL_VAL(15_0,
+			       0, dp->vlnt[0] & 0xf,
+			       1, dp->vlnt[1] & 0xf,
+			       2, dp->vlnt[2] & 0xf,
+			       3, dp->vlnt[3] & 0xf,
+			       4, dp->vlnt[4] & 0xf,
+			       5, dp->vlnt[5] & 0xf,
+			       6, dp->vlnt[6] & 0xf,
+			       7, dp->vlnt[7] & 0xf,
+			       8, dp->vlnt[8] & 0xf,
+			       9, dp->vlnt[9] & 0xf,
+			       10, dp->vlnt[10] & 0xf,
+			       11, dp->vlnt[11] & 0xf,
+			       12, dp->vlnt[12] & 0xf,
+			       13, dp->vlnt[13] & 0xf,
+			       14, dp->vlnt[14] & 0xf,
+			       15, dp->vlnt[15] & 0xf));
 	write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16,
-		DC_SC_VL_VAL(31_16,
-		16, dp->vlnt[16] & 0xf,
-		17, dp->vlnt[17] & 0xf,
-		18, dp->vlnt[18] & 0xf,
-		19, dp->vlnt[19] & 0xf,
-		20, dp->vlnt[20] & 0xf,
-		21, dp->vlnt[21] & 0xf,
-		22, dp->vlnt[22] & 0xf,
-		23, dp->vlnt[23] & 0xf,
-		24, dp->vlnt[24] & 0xf,
-		25, dp->vlnt[25] & 0xf,
-		26, dp->vlnt[26] & 0xf,
-		27, dp->vlnt[27] & 0xf,
-		28, dp->vlnt[28] & 0xf,
-		29, dp->vlnt[29] & 0xf,
-		30, dp->vlnt[30] & 0xf,
-		31, dp->vlnt[31] & 0xf));
+		  DC_SC_VL_VAL(31_16,
+			       16, dp->vlnt[16] & 0xf,
+			       17, dp->vlnt[17] & 0xf,
+			       18, dp->vlnt[18] & 0xf,
+			       19, dp->vlnt[19] & 0xf,
+			       20, dp->vlnt[20] & 0xf,
+			       21, dp->vlnt[21] & 0xf,
+			       22, dp->vlnt[22] & 0xf,
+			       23, dp->vlnt[23] & 0xf,
+			       24, dp->vlnt[24] & 0xf,
+			       25, dp->vlnt[25] & 0xf,
+			       26, dp->vlnt[26] & 0xf,
+			       27, dp->vlnt[27] & 0xf,
+			       28, dp->vlnt[28] & 0xf,
+			       29, dp->vlnt[29] & 0xf,
+			       30, dp->vlnt[30] & 0xf,
+			       31, dp->vlnt[31] & 0xf));
 }
 
 static void nonzero_msg(struct hfi1_devdata *dd, int idx, const char *what,
@@ -10335,7 +10617,7 @@
 {
 	if (limit != 0)
 		dd_dev_info(dd, "Invalid %s limit %d on VL %d, ignoring\n",
-			what, (int)limit, idx);
+			    what, (int)limit, idx);
 }
 
 /* change only the shared limit portion of SendCmGLobalCredit */
@@ -10413,14 +10695,14 @@
 	}
 
 	dd_dev_err(dd,
-		"%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n",
-		which, VL_STATUS_CLEAR_TIMEOUT, mask, reg);
+		   "%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n",
+		   which, VL_STATUS_CLEAR_TIMEOUT, mask, reg);
 	/*
 	 * If this occurs, it is likely there was a credit loss on the link.
 	 * The only recovery from that is a link bounce.
 	 */
 	dd_dev_err(dd,
-		"Continuing anyway.  A credit loss may occur.  Suggest a link bounce\n");
+		   "Continuing anyway.  A credit loss may occur.  Suggest a link bounce\n");
 }
 
 /*
@@ -10447,13 +10729,15 @@
  * raise = if the new limit is higher than the current value (may be changed
  *	earlier in the algorithm), set the new limit to the new value
  */
-static int set_buffer_control(struct hfi1_devdata *dd,
-			      struct buffer_control *new_bc)
+int set_buffer_control(struct hfi1_pportdata *ppd,
+		       struct buffer_control *new_bc)
 {
+	struct hfi1_devdata *dd = ppd->dd;
 	u64 changing_mask, ld_mask, stat_mask;
 	int change_count;
 	int i, use_all_mask;
 	int this_shared_changing;
+	int vl_count = 0, ret;
 	/*
 	 * A0: add the variable any_shared_limit_changing below and in the
 	 * algorithm above.  If removing A0 support, it can be removed.
@@ -10478,7 +10762,6 @@
 #define valid_vl(idx) ((idx) < TXE_NUM_DATA_VL || (idx) == 15)
 #define NUM_USABLE_VLS 16	/* look at VL15 and less */
 
-
 	/* find the new total credits, do sanity check on unused VLs */
 	for (i = 0; i < OPA_MAX_VLS; i++) {
 		if (valid_vl(i)) {
@@ -10486,9 +10769,9 @@
 			continue;
 		}
 		nonzero_msg(dd, i, "dedicated",
-			be16_to_cpu(new_bc->vl[i].dedicated));
+			    be16_to_cpu(new_bc->vl[i].dedicated));
 		nonzero_msg(dd, i, "shared",
-			be16_to_cpu(new_bc->vl[i].shared));
+			    be16_to_cpu(new_bc->vl[i].shared));
 		new_bc->vl[i].dedicated = 0;
 		new_bc->vl[i].shared = 0;
 	}
@@ -10502,8 +10785,10 @@
 	 */
 	memset(changing, 0, sizeof(changing));
 	memset(lowering_dedicated, 0, sizeof(lowering_dedicated));
-	/* NOTE: Assumes that the individual VL bits are adjacent and in
-	   increasing order */
+	/*
+	 * NOTE: Assumes that the individual VL bits are adjacent and in
+	 * increasing order
+	 */
 	stat_mask =
 		SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK;
 	changing_mask = 0;
@@ -10517,8 +10802,8 @@
 						!= cur_bc.vl[i].shared;
 		if (this_shared_changing)
 			any_shared_limit_changing = 1;
-		if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated
-				|| this_shared_changing) {
+		if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated ||
+		    this_shared_changing) {
 			changing[i] = 1;
 			changing_mask |= stat_mask;
 			change_count++;
@@ -10557,7 +10842,7 @@
 	}
 
 	wait_for_vl_status_clear(dd, use_all_mask ? all_mask : changing_mask,
-		"shared");
+				 "shared");
 
 	if (change_count > 0) {
 		for (i = 0; i < NUM_USABLE_VLS; i++) {
@@ -10566,7 +10851,8 @@
 
 			if (lowering_dedicated[i]) {
 				set_vl_dedicated(dd, i,
-					be16_to_cpu(new_bc->vl[i].dedicated));
+						 be16_to_cpu(new_bc->
+							     vl[i].dedicated));
 				cur_bc.vl[i].dedicated =
 						new_bc->vl[i].dedicated;
 			}
@@ -10582,7 +10868,8 @@
 			if (be16_to_cpu(new_bc->vl[i].dedicated) >
 					be16_to_cpu(cur_bc.vl[i].dedicated))
 				set_vl_dedicated(dd, i,
-					be16_to_cpu(new_bc->vl[i].dedicated));
+						 be16_to_cpu(new_bc->
+							     vl[i].dedicated));
 		}
 	}
 
@@ -10598,13 +10885,35 @@
 
 	/* finally raise the global shared */
 	if (be16_to_cpu(new_bc->overall_shared_limit) >
-			be16_to_cpu(cur_bc.overall_shared_limit))
+	    be16_to_cpu(cur_bc.overall_shared_limit))
 		set_global_shared(dd,
-			be16_to_cpu(new_bc->overall_shared_limit));
+				  be16_to_cpu(new_bc->overall_shared_limit));
 
 	/* bracket the credit change with a total adjustment */
 	if (new_total < cur_total)
 		set_global_limit(dd, new_total);
+
+	/*
+	 * Determine the actual number of operational VLS using the number of
+	 * dedicated and shared credits for each VL.
+	 */
+	if (change_count > 0) {
+		for (i = 0; i < TXE_NUM_DATA_VL; i++)
+			if (be16_to_cpu(new_bc->vl[i].dedicated) > 0 ||
+			    be16_to_cpu(new_bc->vl[i].shared) > 0)
+				vl_count++;
+		ppd->actual_vls_operational = vl_count;
+		ret = sdma_map_init(dd, ppd->port - 1, vl_count ?
+				    ppd->actual_vls_operational :
+				    ppd->vls_operational,
+				    NULL);
+		if (ret == 0)
+			ret = pio_map_init(dd, ppd->port - 1, vl_count ?
+					   ppd->actual_vls_operational :
+					   ppd->vls_operational, NULL);
+		if (ret)
+			return ret;
+	}
 	return 0;
 }
 
@@ -10696,7 +11005,7 @@
 				     VL_ARB_LOW_PRIO_TABLE_SIZE, t);
 		break;
 	case FM_TBL_BUFFER_CONTROL:
-		ret = set_buffer_control(ppd->dd, t);
+		ret = set_buffer_control(ppd, t);
 		break;
 	case FM_TBL_SC2VLNT:
 		set_sc2vlnt(ppd->dd, t);
@@ -10846,10 +11155,13 @@
 	}
 
 	rcd->rcvavail_timeout = timeout;
-	/* timeout cannot be larger than rcv_intr_timeout_csr which has already
-	   been verified to be in range */
+	/*
+	 * timeout cannot be larger than rcv_intr_timeout_csr which has already
+	 * been verified to be in range
+	 */
 	write_kctxt_csr(dd, rcd->ctxt, RCV_AVAIL_TIME_OUT,
-		(u64)timeout << RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
+			(u64)timeout <<
+			RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
 }
 
 void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
@@ -10915,16 +11227,16 @@
 static u32 encoded_size(u32 size)
 {
 	switch (size) {
-	case   4*1024: return 0x1;
-	case   8*1024: return 0x2;
-	case  16*1024: return 0x3;
-	case  32*1024: return 0x4;
-	case  64*1024: return 0x5;
-	case 128*1024: return 0x6;
-	case 256*1024: return 0x7;
-	case 512*1024: return 0x8;
-	case   1*1024*1024: return 0x9;
-	case   2*1024*1024: return 0xa;
+	case   4 * 1024: return 0x1;
+	case   8 * 1024: return 0x2;
+	case  16 * 1024: return 0x3;
+	case  32 * 1024: return 0x4;
+	case  64 * 1024: return 0x5;
+	case 128 * 1024: return 0x6;
+	case 256 * 1024: return 0x7;
+	case 512 * 1024: return 0x8;
+	case   1 * 1024 * 1024: return 0x9;
+	case   2 * 1024 * 1024: return 0xa;
 	}
 	return 0x1;	/* if invalid, go with the minimum size */
 }
@@ -10943,8 +11255,8 @@
 
 	rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL);
 	/* if the context already enabled, don't do the extra steps */
-	if ((op & HFI1_RCVCTRL_CTXT_ENB)
-			&& !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
+	if ((op & HFI1_RCVCTRL_CTXT_ENB) &&
+	    !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
 		/* reset the tail and hdr addresses, and sequence count */
 		write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
 				rcd->rcvhdrq_phys);
@@ -11018,6 +11330,7 @@
 		if (dd->rcvhdrtail_dummy_physaddr) {
 			write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
 					dd->rcvhdrtail_dummy_physaddr);
+			/* Enabling RcvCtxtCtrl.TailUpd is intentional. */
 			rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
 		}
 
@@ -11029,15 +11342,20 @@
 		rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
 	if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys)
 		rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
-	if (op & HFI1_RCVCTRL_TAILUPD_DIS)
-		rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK;
+	if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
+		/* See comment on RcvCtxtCtrl.TailUpd above */
+		if (!(op & HFI1_RCVCTRL_CTXT_DIS))
+			rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK;
+	}
 	if (op & HFI1_RCVCTRL_TIDFLOW_ENB)
 		rcvctrl |= RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
 	if (op & HFI1_RCVCTRL_TIDFLOW_DIS)
 		rcvctrl &= ~RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
 	if (op & HFI1_RCVCTRL_ONE_PKT_EGR_ENB) {
-		/* In one-packet-per-eager mode, the size comes from
-		   the RcvArray entry. */
+		/*
+		 * In one-packet-per-eager mode, the size comes from
+		 * the RcvArray entry.
+		 */
 		rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
 		rcvctrl |= RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
 	}
@@ -11056,19 +11374,19 @@
 	write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl);
 
 	/* work around sticky RcvCtxtStatus.BlockedRHQFull */
-	if (did_enable
-	    && (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) {
+	if (did_enable &&
+	    (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) {
 		reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
 		if (reg != 0) {
 			dd_dev_info(dd, "ctxt %d status %lld (blocked)\n",
-				ctxt, reg);
+				    ctxt, reg);
 			read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
 			write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x10);
 			write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x00);
 			read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
 			reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
 			dd_dev_info(dd, "ctxt %d status %lld (%s blocked)\n",
-				ctxt, reg, reg == 0 ? "not" : "still");
+				    ctxt, reg, reg == 0 ? "not" : "still");
 		}
 	}
 
@@ -11079,7 +11397,7 @@
 		 */
 		/* set interrupt timeout */
 		write_kctxt_csr(dd, ctxt, RCV_AVAIL_TIME_OUT,
-			(u64)rcd->rcvavail_timeout <<
+				(u64)rcd->rcvavail_timeout <<
 				RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
 
 		/* set RcvHdrHead.Counter, zero RcvHdrHead.Head (again) */
@@ -11097,28 +11415,19 @@
 				dd->rcvhdrtail_dummy_physaddr);
 }
 
-u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
-		    u64 **cntrp)
+u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp)
 {
 	int ret;
 	u64 val = 0;
 
 	if (namep) {
 		ret = dd->cntrnameslen;
-		if (pos != 0) {
-			dd_dev_err(dd, "read_cntrs does not support indexing");
-			return 0;
-		}
 		*namep = dd->cntrnames;
 	} else {
 		const struct cntr_entry *entry;
 		int i, j;
 
 		ret = (dd->ndevcntrs) * sizeof(u64);
-		if (pos != 0) {
-			dd_dev_err(dd, "read_cntrs does not support indexing");
-			return 0;
-		}
 
 		/* Get the start of the block of counters */
 		*cntrp = dd->cntrs;
@@ -11147,6 +11456,20 @@
 						dd->cntrs[entry->offset + j] =
 									    val;
 					}
+				} else if (entry->flags & CNTR_SDMA) {
+					hfi1_cdbg(CNTR,
+						  "\t Per SDMA Engine\n");
+					for (j = 0; j < dd->chip_sdma_engines;
+					     j++) {
+						val =
+						entry->rw_cntr(entry, dd, j,
+							       CNTR_MODE_R, 0);
+						hfi1_cdbg(CNTR,
+							  "\t\tRead 0x%llx for %d\n",
+							  val, j);
+						dd->cntrs[entry->offset + j] =
+									val;
+					}
 				} else {
 					val = entry->rw_cntr(entry, dd,
 							CNTR_INVALID_VL,
@@ -11163,30 +11486,19 @@
 /*
  * Used by sysfs to create files for hfi stats to read
  */
-u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port,
-			char **namep, u64 **cntrp)
+u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp)
 {
 	int ret;
 	u64 val = 0;
 
 	if (namep) {
-		ret = dd->portcntrnameslen;
-		if (pos != 0) {
-			dd_dev_err(dd, "index not supported");
-			return 0;
-		}
-		*namep = dd->portcntrnames;
+		ret = ppd->dd->portcntrnameslen;
+		*namep = ppd->dd->portcntrnames;
 	} else {
 		const struct cntr_entry *entry;
-		struct hfi1_pportdata *ppd;
 		int i, j;
 
-		ret = (dd->nportcntrs) * sizeof(u64);
-		if (pos != 0) {
-			dd_dev_err(dd, "indexing not supported");
-			return 0;
-		}
-		ppd = (struct hfi1_pportdata *)(dd + 1 + port);
+		ret = ppd->dd->nportcntrs * sizeof(u64);
 		*cntrp = ppd->cntrs;
 
 		for (i = 0; i < PORT_CNTR_LAST; i++) {
@@ -11235,14 +11547,14 @@
 	for (i = 0; i < dd->num_pports; i++, ppd++) {
 		kfree(ppd->cntrs);
 		kfree(ppd->scntrs);
-		free_percpu(ppd->ibport_data.rc_acks);
-		free_percpu(ppd->ibport_data.rc_qacks);
-		free_percpu(ppd->ibport_data.rc_delayed_comp);
+		free_percpu(ppd->ibport_data.rvp.rc_acks);
+		free_percpu(ppd->ibport_data.rvp.rc_qacks);
+		free_percpu(ppd->ibport_data.rvp.rc_delayed_comp);
 		ppd->cntrs = NULL;
 		ppd->scntrs = NULL;
-		ppd->ibport_data.rc_acks = NULL;
-		ppd->ibport_data.rc_qacks = NULL;
-		ppd->ibport_data.rc_delayed_comp = NULL;
+		ppd->ibport_data.rvp.rc_acks = NULL;
+		ppd->ibport_data.rvp.rc_qacks = NULL;
+		ppd->ibport_data.rvp.rc_delayed_comp = NULL;
 	}
 	kfree(dd->portcntrnames);
 	dd->portcntrnames = NULL;
@@ -11510,11 +11822,13 @@
 #define C_MAX_NAME 13 /* 12 chars + one for /0 */
 static int init_cntrs(struct hfi1_devdata *dd)
 {
-	int i, rcv_ctxts, index, j;
+	int i, rcv_ctxts, j;
 	size_t sz;
 	char *p;
 	char name[C_MAX_NAME];
 	struct hfi1_pportdata *ppd;
+	const char *bit_type_32 = ",32";
+	const int bit_type_32_sz = strlen(bit_type_32);
 
 	/* set up the stats timer; the add_timer is done at the end */
 	setup_timer(&dd->synth_stats_timer, update_synth_timer,
@@ -11527,49 +11841,57 @@
 	/* size names and determine how many we have*/
 	dd->ndevcntrs = 0;
 	sz = 0;
-	index = 0;
 
 	for (i = 0; i < DEV_CNTR_LAST; i++) {
-		hfi1_dbg_early("Init cntr %s\n", dev_cntrs[i].name);
 		if (dev_cntrs[i].flags & CNTR_DISABLED) {
 			hfi1_dbg_early("\tSkipping %s\n", dev_cntrs[i].name);
 			continue;
 		}
 
 		if (dev_cntrs[i].flags & CNTR_VL) {
-			hfi1_dbg_early("\tProcessing VL cntr\n");
-			dev_cntrs[i].offset = index;
+			dev_cntrs[i].offset = dd->ndevcntrs;
 			for (j = 0; j < C_VL_COUNT; j++) {
-				memset(name, '\0', C_MAX_NAME);
 				snprintf(name, C_MAX_NAME, "%s%d",
-					dev_cntrs[i].name,
-					vl_from_idx(j));
+					 dev_cntrs[i].name, vl_from_idx(j));
 				sz += strlen(name);
+				/* Add ",32" for 32-bit counters */
+				if (dev_cntrs[i].flags & CNTR_32BIT)
+					sz += bit_type_32_sz;
 				sz++;
-				hfi1_dbg_early("\t\t%s\n", name);
 				dd->ndevcntrs++;
-				index++;
+			}
+		} else if (dev_cntrs[i].flags & CNTR_SDMA) {
+			dev_cntrs[i].offset = dd->ndevcntrs;
+			for (j = 0; j < dd->chip_sdma_engines; j++) {
+				snprintf(name, C_MAX_NAME, "%s%d",
+					 dev_cntrs[i].name, j);
+				sz += strlen(name);
+				/* Add ",32" for 32-bit counters */
+				if (dev_cntrs[i].flags & CNTR_32BIT)
+					sz += bit_type_32_sz;
+				sz++;
+				dd->ndevcntrs++;
 			}
 		} else {
-			/* +1 for newline  */
+			/* +1 for newline. */
 			sz += strlen(dev_cntrs[i].name) + 1;
+			/* Add ",32" for 32-bit counters */
+			if (dev_cntrs[i].flags & CNTR_32BIT)
+				sz += bit_type_32_sz;
+			dev_cntrs[i].offset = dd->ndevcntrs;
 			dd->ndevcntrs++;
-			dev_cntrs[i].offset = index;
-			index++;
-			hfi1_dbg_early("\tAdding %s\n", dev_cntrs[i].name);
 		}
 	}
 
 	/* allocate space for the counter values */
-	dd->cntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
+	dd->cntrs = kcalloc(dd->ndevcntrs, sizeof(u64), GFP_KERNEL);
 	if (!dd->cntrs)
 		goto bail;
 
-	dd->scntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
+	dd->scntrs = kcalloc(dd->ndevcntrs, sizeof(u64), GFP_KERNEL);
 	if (!dd->scntrs)
 		goto bail;
 
-
 	/* allocate space for the counter names */
 	dd->cntrnameslen = sz;
 	dd->cntrnames = kmalloc(sz, GFP_KERNEL);
@@ -11577,27 +11899,51 @@
 		goto bail;
 
 	/* fill in the names */
-	for (p = dd->cntrnames, i = 0, index = 0; i < DEV_CNTR_LAST; i++) {
+	for (p = dd->cntrnames, i = 0; i < DEV_CNTR_LAST; i++) {
 		if (dev_cntrs[i].flags & CNTR_DISABLED) {
 			/* Nothing */
-		} else {
-			if (dev_cntrs[i].flags & CNTR_VL) {
-				for (j = 0; j < C_VL_COUNT; j++) {
-					memset(name, '\0', C_MAX_NAME);
-					snprintf(name, C_MAX_NAME, "%s%d",
-						dev_cntrs[i].name,
-						vl_from_idx(j));
-					memcpy(p, name, strlen(name));
-					p += strlen(name);
-					*p++ = '\n';
+		} else if (dev_cntrs[i].flags & CNTR_VL) {
+			for (j = 0; j < C_VL_COUNT; j++) {
+				snprintf(name, C_MAX_NAME, "%s%d",
+					 dev_cntrs[i].name,
+					 vl_from_idx(j));
+				memcpy(p, name, strlen(name));
+				p += strlen(name);
+
+				/* Counter is 32 bits */
+				if (dev_cntrs[i].flags & CNTR_32BIT) {
+					memcpy(p, bit_type_32, bit_type_32_sz);
+					p += bit_type_32_sz;
 				}
-			} else {
-				memcpy(p, dev_cntrs[i].name,
-				       strlen(dev_cntrs[i].name));
-				p += strlen(dev_cntrs[i].name);
+
 				*p++ = '\n';
 			}
-			index++;
+		} else if (dev_cntrs[i].flags & CNTR_SDMA) {
+			for (j = 0; j < dd->chip_sdma_engines; j++) {
+				snprintf(name, C_MAX_NAME, "%s%d",
+					 dev_cntrs[i].name, j);
+				memcpy(p, name, strlen(name));
+				p += strlen(name);
+
+				/* Counter is 32 bits */
+				if (dev_cntrs[i].flags & CNTR_32BIT) {
+					memcpy(p, bit_type_32, bit_type_32_sz);
+					p += bit_type_32_sz;
+				}
+
+				*p++ = '\n';
+			}
+		} else {
+			memcpy(p, dev_cntrs[i].name, strlen(dev_cntrs[i].name));
+			p += strlen(dev_cntrs[i].name);
+
+			/* Counter is 32 bits */
+			if (dev_cntrs[i].flags & CNTR_32BIT) {
+				memcpy(p, bit_type_32, bit_type_32_sz);
+				p += bit_type_32_sz;
+			}
+
+			*p++ = '\n';
 		}
 	}
 
@@ -11620,31 +11966,31 @@
 	sz = 0;
 	dd->nportcntrs = 0;
 	for (i = 0; i < PORT_CNTR_LAST; i++) {
-		hfi1_dbg_early("Init pcntr %s\n", port_cntrs[i].name);
 		if (port_cntrs[i].flags & CNTR_DISABLED) {
 			hfi1_dbg_early("\tSkipping %s\n", port_cntrs[i].name);
 			continue;
 		}
 
 		if (port_cntrs[i].flags & CNTR_VL) {
-			hfi1_dbg_early("\tProcessing VL cntr\n");
 			port_cntrs[i].offset = dd->nportcntrs;
 			for (j = 0; j < C_VL_COUNT; j++) {
-				memset(name, '\0', C_MAX_NAME);
 				snprintf(name, C_MAX_NAME, "%s%d",
-					port_cntrs[i].name,
-					vl_from_idx(j));
+					 port_cntrs[i].name, vl_from_idx(j));
 				sz += strlen(name);
+				/* Add ",32" for 32-bit counters */
+				if (port_cntrs[i].flags & CNTR_32BIT)
+					sz += bit_type_32_sz;
 				sz++;
-				hfi1_dbg_early("\t\t%s\n", name);
 				dd->nportcntrs++;
 			}
 		} else {
-			/* +1 for newline  */
+			/* +1 for newline */
 			sz += strlen(port_cntrs[i].name) + 1;
+			/* Add ",32" for 32-bit counters */
+			if (port_cntrs[i].flags & CNTR_32BIT)
+				sz += bit_type_32_sz;
 			port_cntrs[i].offset = dd->nportcntrs;
 			dd->nportcntrs++;
-			hfi1_dbg_early("\tAdding %s\n", port_cntrs[i].name);
 		}
 	}
 
@@ -11661,18 +12007,30 @@
 
 		if (port_cntrs[i].flags & CNTR_VL) {
 			for (j = 0; j < C_VL_COUNT; j++) {
-				memset(name, '\0', C_MAX_NAME);
 				snprintf(name, C_MAX_NAME, "%s%d",
-					port_cntrs[i].name,
-					vl_from_idx(j));
+					 port_cntrs[i].name, vl_from_idx(j));
 				memcpy(p, name, strlen(name));
 				p += strlen(name);
+
+				/* Counter is 32 bits */
+				if (port_cntrs[i].flags & CNTR_32BIT) {
+					memcpy(p, bit_type_32, bit_type_32_sz);
+					p += bit_type_32_sz;
+				}
+
 				*p++ = '\n';
 			}
 		} else {
 			memcpy(p, port_cntrs[i].name,
 			       strlen(port_cntrs[i].name));
 			p += strlen(port_cntrs[i].name);
+
+			/* Counter is 32 bits */
+			if (port_cntrs[i].flags & CNTR_32BIT) {
+				memcpy(p, bit_type_32, bit_type_32_sz);
+				p += bit_type_32_sz;
+			}
+
 			*p++ = '\n';
 		}
 	}
@@ -11700,14 +12058,13 @@
 	return -ENOMEM;
 }
 
-
 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
 {
 	switch (chip_lstate) {
 	default:
 		dd_dev_err(dd,
-			 "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
-			 chip_lstate);
+			   "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
+			   chip_lstate);
 		/* fall through */
 	case LSTATE_DOWN:
 		return IB_PORT_DOWN;
@@ -11726,7 +12083,7 @@
 	switch (chip_pstate & 0xf0) {
 	default:
 		dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
-			chip_pstate);
+			   chip_pstate);
 		/* fall through */
 	case PLS_DISABLED:
 		return IB_PORTPHYSSTATE_DISABLED;
@@ -11792,7 +12149,7 @@
 	new_state = chip_to_opa_lstate(ppd->dd, read_logical_state(ppd->dd));
 	if (new_state != ppd->lstate) {
 		dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
-			opa_lstate_name(new_state), new_state);
+			    opa_lstate_name(new_state), new_state);
 		ppd->lstate = new_state;
 	}
 	/*
@@ -11851,18 +12208,17 @@
 
 u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
 {
-	static u32 remembered_state = 0xff;
 	u32 pstate;
 	u32 ib_pstate;
 
 	pstate = read_physical_state(ppd->dd);
 	ib_pstate = chip_to_opa_pstate(ppd->dd, pstate);
-	if (remembered_state != ib_pstate) {
+	if (ppd->last_pstate != ib_pstate) {
 		dd_dev_info(ppd->dd,
-			"%s: physical state changed to %s (0x%x), phy 0x%x\n",
-			__func__, opa_pstate_name(ib_pstate), ib_pstate,
-			pstate);
-		remembered_state = ib_pstate;
+			    "%s: physical state changed to %s (0x%x), phy 0x%x\n",
+			    __func__, opa_pstate_name(ib_pstate), ib_pstate,
+			    pstate);
+		ppd->last_pstate = ib_pstate;
 	}
 	return ib_pstate;
 }
@@ -11906,7 +12262,7 @@
 
 int hfi1_init_ctxt(struct send_context *sc)
 {
-	if (sc != NULL) {
+	if (sc) {
 		struct hfi1_devdata *dd = sc->dd;
 		u64 reg;
 		u8 set = (sc->type == SC_USER ?
@@ -11963,34 +12319,14 @@
 	 * In HFI, the mask needs to be 1 to allow interrupts.
 	 */
 	if (enable) {
-		u64 cce_int_mask;
-		const int qsfp1_int_smask = QSFP1_INT % 64;
-		const int qsfp2_int_smask = QSFP2_INT % 64;
-
 		/* enable all interrupts */
 		for (i = 0; i < CCE_NUM_INT_CSRS; i++)
-			write_csr(dd, CCE_INT_MASK + (8*i), ~(u64)0);
+			write_csr(dd, CCE_INT_MASK + (8 * i), ~(u64)0);
 
-		/*
-		 * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
-		 * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
-		 * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
-		 * the index of the appropriate CSR in the CCEIntMask CSR array
-		 */
-		cce_int_mask = read_csr(dd, CCE_INT_MASK +
-						(8*(QSFP1_INT/64)));
-		if (dd->hfi1_id) {
-			cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
-			write_csr(dd, CCE_INT_MASK + (8*(QSFP1_INT/64)),
-					cce_int_mask);
-		} else {
-			cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
-			write_csr(dd, CCE_INT_MASK + (8*(QSFP2_INT/64)),
-					cce_int_mask);
-		}
+		init_qsfp_int(dd);
 	} else {
 		for (i = 0; i < CCE_NUM_INT_CSRS; i++)
-			write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
+			write_csr(dd, CCE_INT_MASK + (8 * i), 0ull);
 	}
 }
 
@@ -12002,7 +12338,7 @@
 	int i;
 
 	for (i = 0; i < CCE_NUM_INT_CSRS; i++)
-		write_csr(dd, CCE_INT_CLEAR + (8*i), ~(u64)0);
+		write_csr(dd, CCE_INT_CLEAR + (8 * i), ~(u64)0);
 
 	write_csr(dd, CCE_ERR_CLEAR, ~(u64)0);
 	write_csr(dd, MISC_ERR_CLEAR, ~(u64)0);
@@ -12037,10 +12373,9 @@
 		struct hfi1_msix_entry *me = dd->msix_entries;
 
 		for (i = 0; i < dd->num_msix_entries; i++, me++) {
-			if (me->arg == NULL) /* => no irq, no affinity */
-				break;
-			irq_set_affinity_hint(dd->msix_entries[i].msix.vector,
-					NULL);
+			if (!me->arg) /* => no irq, no affinity */
+				continue;
+			hfi1_put_irq_affinity(dd, &dd->msix_entries[i]);
 			free_irq(me->msix.vector, me->arg);
 		}
 	} else {
@@ -12061,8 +12396,6 @@
 	}
 
 	/* clean structures */
-	for (i = 0; i < dd->num_msix_entries; i++)
-		free_cpumask_var(dd->msix_entries[i].mask);
 	kfree(dd->msix_entries);
 	dd->msix_entries = NULL;
 	dd->num_msix_entries = 0;
@@ -12085,10 +12418,10 @@
 	/* direct the chip source to the given MSI-X interrupt */
 	m = isrc / 8;
 	n = isrc % 8;
-	reg = read_csr(dd, CCE_INT_MAP + (8*m));
-	reg &= ~((u64)0xff << (8*n));
-	reg |= ((u64)msix_intr & 0xff) << (8*n);
-	write_csr(dd, CCE_INT_MAP + (8*m), reg);
+	reg = read_csr(dd, CCE_INT_MAP + (8 * m));
+	reg &= ~((u64)0xff << (8 * n));
+	reg |= ((u64)msix_intr & 0xff) << (8 * n);
+	write_csr(dd, CCE_INT_MAP + (8 * m), reg);
 }
 
 static void remap_sdma_interrupts(struct hfi1_devdata *dd,
@@ -12101,12 +12434,12 @@
 	 *	SDMAProgress
 	 *	SDMAIdle
 	 */
-	remap_intr(dd, IS_SDMA_START + 0*TXE_NUM_SDMA_ENGINES + engine,
-		msix_intr);
-	remap_intr(dd, IS_SDMA_START + 1*TXE_NUM_SDMA_ENGINES + engine,
-		msix_intr);
-	remap_intr(dd, IS_SDMA_START + 2*TXE_NUM_SDMA_ENGINES + engine,
-		msix_intr);
+	remap_intr(dd, IS_SDMA_START + 0 * TXE_NUM_SDMA_ENGINES + engine,
+		   msix_intr);
+	remap_intr(dd, IS_SDMA_START + 1 * TXE_NUM_SDMA_ENGINES + engine,
+		   msix_intr);
+	remap_intr(dd, IS_SDMA_START + 2 * TXE_NUM_SDMA_ENGINES + engine,
+		   msix_intr);
 }
 
 static int request_intx_irq(struct hfi1_devdata *dd)
@@ -12116,10 +12449,10 @@
 	snprintf(dd->intx_name, sizeof(dd->intx_name), DRIVER_NAME "_%d",
 		 dd->unit);
 	ret = request_irq(dd->pcidev->irq, general_interrupt,
-				  IRQF_SHARED, dd->intx_name, dd);
+			  IRQF_SHARED, dd->intx_name, dd);
 	if (ret)
 		dd_dev_err(dd, "unable to request INTx interrupt, err %d\n",
-				ret);
+			   ret);
 	else
 		dd->requested_intx_irq = 1;
 	return ret;
@@ -12127,70 +12460,20 @@
 
 static int request_msix_irqs(struct hfi1_devdata *dd)
 {
-	const struct cpumask *local_mask;
-	cpumask_var_t def, rcv;
-	bool def_ret, rcv_ret;
 	int first_general, last_general;
 	int first_sdma, last_sdma;
 	int first_rx, last_rx;
-	int first_cpu, curr_cpu;
-	int rcv_cpu, sdma_cpu;
-	int i, ret = 0, possible;
-	int ht;
+	int i, ret = 0;
 
 	/* calculate the ranges we are going to use */
 	first_general = 0;
-	first_sdma = last_general = first_general + 1;
-	first_rx = last_sdma = first_sdma + dd->num_sdma;
+	last_general = first_general + 1;
+	first_sdma = last_general;
+	last_sdma = first_sdma + dd->num_sdma;
+	first_rx = last_sdma;
 	last_rx = first_rx + dd->n_krcv_queues;
 
 	/*
-	 * Interrupt affinity.
-	 *
-	 * non-rcv avail gets a default mask that
-	 * starts as possible cpus with threads reset
-	 * and each rcv avail reset.
-	 *
-	 * rcv avail gets node relative 1 wrapping back
-	 * to the node relative 1 as necessary.
-	 *
-	 */
-	local_mask = cpumask_of_pcibus(dd->pcidev->bus);
-	/* if first cpu is invalid, use NUMA 0 */
-	if (cpumask_first(local_mask) >= nr_cpu_ids)
-		local_mask = topology_core_cpumask(0);
-
-	def_ret = zalloc_cpumask_var(&def, GFP_KERNEL);
-	rcv_ret = zalloc_cpumask_var(&rcv, GFP_KERNEL);
-	if (!def_ret || !rcv_ret)
-		goto bail;
-	/* use local mask as default */
-	cpumask_copy(def, local_mask);
-	possible = cpumask_weight(def);
-	/* disarm threads from default */
-	ht = cpumask_weight(
-			topology_sibling_cpumask(cpumask_first(local_mask)));
-	for (i = possible/ht; i < possible; i++)
-		cpumask_clear_cpu(i, def);
-	/* def now has full cores on chosen node*/
-	first_cpu = cpumask_first(def);
-	if (nr_cpu_ids >= first_cpu)
-		first_cpu++;
-	curr_cpu = first_cpu;
-
-	/*  One context is reserved as control context */
-	for (i = first_cpu; i < dd->n_krcv_queues + first_cpu - 1; i++) {
-		cpumask_clear_cpu(curr_cpu, def);
-		cpumask_set_cpu(curr_cpu, rcv);
-		curr_cpu = cpumask_next(curr_cpu, def);
-		if (curr_cpu >= nr_cpu_ids)
-			break;
-	}
-	/* def mask has non-rcv, rcv has recv mask */
-	rcv_cpu = cpumask_first(rcv);
-	sdma_cpu = cpumask_first(def);
-
-	/*
 	 * Sanity check - the code expects all SDMA chip source
 	 * interrupts to be in the same CSR, starting at bit 0.  Verify
 	 * that this is true by checking the bit location of the start.
@@ -12215,6 +12498,7 @@
 			snprintf(me->name, sizeof(me->name),
 				 DRIVER_NAME "_%d", dd->unit);
 			err_info = "general";
+			me->type = IRQ_GENERAL;
 		} else if (first_sdma <= i && i < last_sdma) {
 			idx = i - first_sdma;
 			sde = &dd->per_sdma[idx];
@@ -12224,6 +12508,7 @@
 				 DRIVER_NAME "_%d sdma%d", dd->unit, idx);
 			err_info = "sdma";
 			remap_sdma_interrupts(dd, idx, i);
+			me->type = IRQ_SDMA;
 		} else if (first_rx <= i && i < last_rx) {
 			idx = i - first_rx;
 			rcd = dd->rcd[idx];
@@ -12234,9 +12519,9 @@
 			 * Set the interrupt register and mask for this
 			 * context's interrupt.
 			 */
-			rcd->ireg = (IS_RCVAVAIL_START+idx) / 64;
+			rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
 			rcd->imask = ((u64)1) <<
-					((IS_RCVAVAIL_START+idx) % 64);
+					((IS_RCVAVAIL_START + idx) % 64);
 			handler = receive_context_interrupt;
 			thread = receive_context_thread;
 			arg = rcd;
@@ -12244,25 +12529,27 @@
 				 DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
 			err_info = "receive context";
 			remap_intr(dd, IS_RCVAVAIL_START + idx, i);
+			me->type = IRQ_RCVCTXT;
 		} else {
 			/* not in our expected range - complain, then
-			   ignore it */
+			 * ignore it
+			 */
 			dd_dev_err(dd,
-				"Unexpected extra MSI-X interrupt %d\n", i);
+				   "Unexpected extra MSI-X interrupt %d\n", i);
 			continue;
 		}
 		/* no argument, no interrupt */
-		if (arg == NULL)
+		if (!arg)
 			continue;
 		/* make sure the name is terminated */
-		me->name[sizeof(me->name)-1] = 0;
+		me->name[sizeof(me->name) - 1] = 0;
 
 		ret = request_threaded_irq(me->msix.vector, handler, thread, 0,
-						me->name, arg);
+					   me->name, arg);
 		if (ret) {
 			dd_dev_err(dd,
-				"unable to allocate %s interrupt, vector %d, index %d, err %d\n",
-				 err_info, me->msix.vector, idx, ret);
+				   "unable to allocate %s interrupt, vector %d, index %d, err %d\n",
+				   err_info, me->msix.vector, idx, ret);
 			return ret;
 		}
 		/*
@@ -12271,52 +12558,13 @@
 		 */
 		me->arg = arg;
 
-		if (!zalloc_cpumask_var(
-			&dd->msix_entries[i].mask,
-			GFP_KERNEL))
-			goto bail;
-		if (handler == sdma_interrupt) {
-			dd_dev_info(dd, "sdma engine %d cpu %d\n",
-				sde->this_idx, sdma_cpu);
-			sde->cpu = sdma_cpu;
-			cpumask_set_cpu(sdma_cpu, dd->msix_entries[i].mask);
-			sdma_cpu = cpumask_next(sdma_cpu, def);
-			if (sdma_cpu >= nr_cpu_ids)
-				sdma_cpu = cpumask_first(def);
-		} else if (handler == receive_context_interrupt) {
-			dd_dev_info(dd, "rcv ctxt %d cpu %d\n", rcd->ctxt,
-				    (rcd->ctxt == HFI1_CTRL_CTXT) ?
-					    cpumask_first(def) : rcv_cpu);
-			if (rcd->ctxt == HFI1_CTRL_CTXT) {
-				/* map to first default */
-				cpumask_set_cpu(cpumask_first(def),
-						dd->msix_entries[i].mask);
-			} else {
-				cpumask_set_cpu(rcv_cpu,
-						dd->msix_entries[i].mask);
-				rcv_cpu = cpumask_next(rcv_cpu, rcv);
-				if (rcv_cpu >= nr_cpu_ids)
-					rcv_cpu = cpumask_first(rcv);
-			}
-		} else {
-			/* otherwise first def */
-			dd_dev_info(dd, "%s cpu %d\n",
-				err_info, cpumask_first(def));
-			cpumask_set_cpu(
-				cpumask_first(def), dd->msix_entries[i].mask);
-		}
-		irq_set_affinity_hint(
-			dd->msix_entries[i].msix.vector,
-			dd->msix_entries[i].mask);
+		ret = hfi1_get_irq_affinity(dd, me);
+		if (ret)
+			dd_dev_err(dd,
+				   "unable to pin IRQ %d\n", ret);
 	}
 
-out:
-	free_cpumask_var(def);
-	free_cpumask_var(rcv);
 	return ret;
-bail:
-	ret = -ENOMEM;
-	goto  out;
 }
 
 /*
@@ -12333,7 +12581,7 @@
 
 	/* all chip interrupts map to MSI-X 0 */
 	for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
-		write_csr(dd, CCE_INT_MAP + (8*i), 0);
+		write_csr(dd, CCE_INT_MAP + (8 * i), 0);
 }
 
 static int set_up_interrupts(struct hfi1_devdata *dd)
@@ -12442,7 +12690,7 @@
 		 */
 		num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS - 1;
 	else
-		num_kernel_contexts = num_online_nodes();
+		num_kernel_contexts = num_online_nodes() + 1;
 	num_kernel_contexts =
 		max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts);
 	/*
@@ -12483,13 +12731,14 @@
 	dd->num_rcv_contexts = total_contexts;
 	dd->n_krcv_queues = num_kernel_contexts;
 	dd->first_user_ctxt = num_kernel_contexts;
+	dd->num_user_contexts = num_user_contexts;
 	dd->freectxts = num_user_contexts;
 	dd_dev_info(dd,
-		"rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
-		(int)dd->chip_rcv_contexts,
-		(int)dd->num_rcv_contexts,
-		(int)dd->n_krcv_queues,
-		(int)dd->num_rcv_contexts - dd->n_krcv_queues);
+		    "rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
+		    (int)dd->chip_rcv_contexts,
+		    (int)dd->num_rcv_contexts,
+		    (int)dd->n_krcv_queues,
+		    (int)dd->num_rcv_contexts - dd->n_krcv_queues);
 
 	/*
 	 * Receive array allocation:
@@ -12515,8 +12764,8 @@
 		dd->rcv_entries.ngroups = (MAX_EAGER_ENTRIES * 2) /
 			dd->rcv_entries.group_size;
 		dd_dev_info(dd,
-		   "RcvArray group count too high, change to %u\n",
-		   dd->rcv_entries.ngroups);
+			    "RcvArray group count too high, change to %u\n",
+			    dd->rcv_entries.ngroups);
 		dd->rcv_entries.nctxt_extra = 0;
 	}
 	/*
@@ -12582,7 +12831,7 @@
 
 	/* CceIntMap */
 	for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
-		write_csr(dd, CCE_INT_MAP+(8*i), 0);
+		write_csr(dd, CCE_INT_MAP + (8 * i), 0);
 
 	/* SendCtxtCreditReturnAddr */
 	for (i = 0; i < dd->chip_send_contexts; i++)
@@ -12590,8 +12839,10 @@
 
 	/* PIO Send buffers */
 	/* SDMA Send buffers */
-	/* These are not normally read, and (presently) have no method
-	   to be read, so are not pre-initialized */
+	/*
+	 * These are not normally read, and (presently) have no method
+	 * to be read, so are not pre-initialized
+	 */
 
 	/* RcvHdrAddr */
 	/* RcvHdrTailAddr */
@@ -12600,13 +12851,13 @@
 		write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
 		write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
 		for (j = 0; j < RXE_NUM_TID_FLOWS; j++)
-			write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE+(8*j), 0);
+			write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j), 0);
 	}
 
 	/* RcvArray */
 	for (i = 0; i < dd->chip_rcv_array_count; i++)
-		write_csr(dd, RCV_ARRAY + (8*i),
-					RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
+		write_csr(dd, RCV_ARRAY + (8 * i),
+			  RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
 
 	/* RcvQPMapTable */
 	for (i = 0; i < 32; i++)
@@ -12638,8 +12889,8 @@
 			return;
 		if (time_after(jiffies, timeout)) {
 			dd_dev_err(dd,
-				"Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n",
-				status_bits, reg & status_bits);
+				   "Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n",
+				   status_bits, reg & status_bits);
 			return;
 		}
 		udelay(1);
@@ -12671,7 +12922,7 @@
 	for (i = 0; i < CCE_NUM_MSIX_VECTORS; i++) {
 		write_csr(dd, CCE_MSIX_TABLE_LOWER + (8 * i), 0);
 		write_csr(dd, CCE_MSIX_TABLE_UPPER + (8 * i),
-					CCE_MSIX_TABLE_UPPER_RESETCSR);
+			  CCE_MSIX_TABLE_UPPER_RESETCSR);
 	}
 	for (i = 0; i < CCE_NUM_MSIX_PBAS; i++) {
 		/* CCE_MSIX_PBA read-only */
@@ -12691,91 +12942,6 @@
 		write_csr(dd, CCE_INT_COUNTER_ARRAY32 + (8 * i), 0);
 }
 
-/* set ASIC CSRs to chip reset defaults */
-static void reset_asic_csrs(struct hfi1_devdata *dd)
-{
-	int i;
-
-	/*
-	 * If the HFIs are shared between separate nodes or VMs,
-	 * then more will need to be done here.  One idea is a module
-	 * parameter that returns early, letting the first power-on or
-	 * a known first load do the reset and blocking all others.
-	 */
-
-	if (!(dd->flags & HFI1_DO_INIT_ASIC))
-		return;
-
-	if (dd->icode != ICODE_FPGA_EMULATION) {
-		/* emulation does not have an SBus - leave these alone */
-		/*
-		 * All writes to ASIC_CFG_SBUS_REQUEST do something.
-		 * Notes:
-		 * o The reset is not zero if aimed at the core.  See the
-		 *   SBus documentation for details.
-		 * o If the SBus firmware has been updated (e.g. by the BIOS),
-		 *   will the reset revert that?
-		 */
-		/* ASIC_CFG_SBUS_REQUEST leave alone */
-		write_csr(dd, ASIC_CFG_SBUS_EXECUTE, 0);
-	}
-	/* ASIC_SBUS_RESULT read-only */
-	write_csr(dd, ASIC_STS_SBUS_COUNTERS, 0);
-	for (i = 0; i < ASIC_NUM_SCRATCH; i++)
-		write_csr(dd, ASIC_CFG_SCRATCH + (8 * i), 0);
-	write_csr(dd, ASIC_CFG_MUTEX, 0);	/* this will clear it */
-
-	/* We might want to retain this state across FLR if we ever use it */
-	write_csr(dd, ASIC_CFG_DRV_STR, 0);
-
-	/* ASIC_CFG_THERM_POLL_EN leave alone */
-	/* ASIC_STS_THERM read-only */
-	/* ASIC_CFG_RESET leave alone */
-
-	write_csr(dd, ASIC_PCIE_SD_HOST_CMD, 0);
-	/* ASIC_PCIE_SD_HOST_STATUS read-only */
-	write_csr(dd, ASIC_PCIE_SD_INTRPT_DATA_CODE, 0);
-	write_csr(dd, ASIC_PCIE_SD_INTRPT_ENABLE, 0);
-	/* ASIC_PCIE_SD_INTRPT_PROGRESS read-only */
-	write_csr(dd, ASIC_PCIE_SD_INTRPT_STATUS, ~0ull); /* clear */
-	/* ASIC_HFI0_PCIE_SD_INTRPT_RSPD_DATA read-only */
-	/* ASIC_HFI1_PCIE_SD_INTRPT_RSPD_DATA read-only */
-	for (i = 0; i < 16; i++)
-		write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (8 * i), 0);
-
-	/* ASIC_GPIO_IN read-only */
-	write_csr(dd, ASIC_GPIO_OE, 0);
-	write_csr(dd, ASIC_GPIO_INVERT, 0);
-	write_csr(dd, ASIC_GPIO_OUT, 0);
-	write_csr(dd, ASIC_GPIO_MASK, 0);
-	/* ASIC_GPIO_STATUS read-only */
-	write_csr(dd, ASIC_GPIO_CLEAR, ~0ull);
-	/* ASIC_GPIO_FORCE leave alone */
-
-	/* ASIC_QSFP1_IN read-only */
-	write_csr(dd, ASIC_QSFP1_OE, 0);
-	write_csr(dd, ASIC_QSFP1_INVERT, 0);
-	write_csr(dd, ASIC_QSFP1_OUT, 0);
-	write_csr(dd, ASIC_QSFP1_MASK, 0);
-	/* ASIC_QSFP1_STATUS read-only */
-	write_csr(dd, ASIC_QSFP1_CLEAR, ~0ull);
-	/* ASIC_QSFP1_FORCE leave alone */
-
-	/* ASIC_QSFP2_IN read-only */
-	write_csr(dd, ASIC_QSFP2_OE, 0);
-	write_csr(dd, ASIC_QSFP2_INVERT, 0);
-	write_csr(dd, ASIC_QSFP2_OUT, 0);
-	write_csr(dd, ASIC_QSFP2_MASK, 0);
-	/* ASIC_QSFP2_STATUS read-only */
-	write_csr(dd, ASIC_QSFP2_CLEAR, ~0ull);
-	/* ASIC_QSFP2_FORCE leave alone */
-
-	write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_RESETCSR);
-	/* this also writes a NOP command, clearing paging mode */
-	write_csr(dd, ASIC_EEP_ADDR_CMD, 0);
-	write_csr(dd, ASIC_EEP_DATA, 0);
-}
-
 /* set MISC CSRs to chip reset defaults */
 static void reset_misc_csrs(struct hfi1_devdata *dd)
 {
@@ -12786,8 +12952,10 @@
 		write_csr(dd, MISC_CFG_RSA_SIGNATURE + (8 * i), 0);
 		write_csr(dd, MISC_CFG_RSA_MODULUS + (8 * i), 0);
 	}
-	/* MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can
-	   only be written 128-byte chunks */
+	/*
+	 * MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can
+	 * only be written 128-byte chunks
+	 */
 	/* init RSA engine to clear lingering errors */
 	write_csr(dd, MISC_CFG_RSA_CMD, 1);
 	write_csr(dd, MISC_CFG_RSA_MU, 0);
@@ -12843,18 +13011,17 @@
 	write_csr(dd, SEND_ERR_CLEAR, ~0ull);
 	/* SEND_ERR_FORCE read-only */
 	for (i = 0; i < VL_ARB_LOW_PRIO_TABLE_SIZE; i++)
-		write_csr(dd, SEND_LOW_PRIORITY_LIST + (8*i), 0);
+		write_csr(dd, SEND_LOW_PRIORITY_LIST + (8 * i), 0);
 	for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++)
-		write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8*i), 0);
-	for (i = 0; i < dd->chip_send_contexts/NUM_CONTEXTS_PER_SET; i++)
-		write_csr(dd, SEND_CONTEXT_SET_CTRL + (8*i), 0);
+		write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8 * i), 0);
+	for (i = 0; i < dd->chip_send_contexts / NUM_CONTEXTS_PER_SET; i++)
+		write_csr(dd, SEND_CONTEXT_SET_CTRL + (8 * i), 0);
 	for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++)
-		write_csr(dd, SEND_COUNTER_ARRAY32 + (8*i), 0);
+		write_csr(dd, SEND_COUNTER_ARRAY32 + (8 * i), 0);
 	for (i = 0; i < TXE_NUM_64_BIT_COUNTER; i++)
-		write_csr(dd, SEND_COUNTER_ARRAY64 + (8*i), 0);
+		write_csr(dd, SEND_COUNTER_ARRAY64 + (8 * i), 0);
 	write_csr(dd, SEND_CM_CTRL, SEND_CM_CTRL_RESETCSR);
-	write_csr(dd, SEND_CM_GLOBAL_CREDIT,
-					SEND_CM_GLOBAL_CREDIT_RESETCSR);
+	write_csr(dd, SEND_CM_GLOBAL_CREDIT, SEND_CM_GLOBAL_CREDIT_RESETCSR);
 	/* SEND_CM_CREDIT_USED_STATUS read-only */
 	write_csr(dd, SEND_CM_TIMER_CTRL, 0);
 	write_csr(dd, SEND_CM_LOCAL_AU_TABLE0_TO3, 0);
@@ -12862,7 +13029,7 @@
 	write_csr(dd, SEND_CM_REMOTE_AU_TABLE0_TO3, 0);
 	write_csr(dd, SEND_CM_REMOTE_AU_TABLE4_TO7, 0);
 	for (i = 0; i < TXE_NUM_DATA_VL; i++)
-		write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
+		write_csr(dd, SEND_CM_CREDIT_VL + (8 * i), 0);
 	write_csr(dd, SEND_CM_CREDIT_VL15, 0);
 	/* SEND_CM_CREDIT_USED_VL read-only */
 	/* SEND_CM_CREDIT_USED_VL15 read-only */
@@ -12948,8 +13115,8 @@
 		 */
 		if (count++ > 500) {
 			dd_dev_err(dd,
-				"%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n",
-				__func__, reg);
+				   "%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n",
+				   __func__, reg);
 			break;
 		}
 		udelay(2); /* do not busy-wait the CSR */
@@ -12978,8 +13145,8 @@
 		/* give up after 100us - slowest possible at 33MHz is 73us */
 		if (count++ > 50) {
 			dd_dev_err(dd,
-				"%s: RcvStatus.RxRbufInit not set, continuing\n",
-				__func__);
+				   "%s: RcvStatus.RxRbufInit not set, continuing\n",
+				   __func__);
 			break;
 		}
 	}
@@ -13005,7 +13172,7 @@
 	write_csr(dd, RCV_VL15, 0);
 	/* this is a clear-down */
 	write_csr(dd, RCV_ERR_INFO,
-			RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
+		  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
 	/* RCV_ERR_STATUS read-only */
 	write_csr(dd, RCV_ERR_MASK, 0);
 	write_csr(dd, RCV_ERR_CLEAR, ~0ull);
@@ -13051,8 +13218,8 @@
 		write_uctxt_csr(dd, i, RCV_EGR_INDEX_HEAD, 0);
 		/* RCV_EGR_OFFSET_TAIL read-only */
 		for (j = 0; j < RXE_NUM_TID_FLOWS; j++) {
-			write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j),
-				0);
+			write_uctxt_csr(dd, i,
+					RCV_TID_FLOW_TABLE + (8 * j), 0);
 		}
 	}
 }
@@ -13154,7 +13321,7 @@
 		write_csr(dd, RCV_CTXT_CTRL, 0);
 	/* mask all interrupt sources */
 	for (i = 0; i < CCE_NUM_INT_CSRS; i++)
-		write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
+		write_csr(dd, CCE_INT_MASK + (8 * i), 0ull);
 
 	/*
 	 * DC Reset: do a full DC reset before the register clear.
@@ -13163,7 +13330,7 @@
 	 * across the clear.
 	 */
 	write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK);
-	(void) read_csr(dd, CCE_DC_CTRL);
+	(void)read_csr(dd, CCE_DC_CTRL);
 
 	if (use_flr) {
 		/*
@@ -13184,22 +13351,19 @@
 			hfi1_pcie_flr(dd);
 			restore_pci_variables(dd);
 		}
-
-		reset_asic_csrs(dd);
 	} else {
 		dd_dev_info(dd, "Resetting CSRs with writes\n");
 		reset_cce_csrs(dd);
 		reset_txe_csrs(dd);
 		reset_rxe_csrs(dd);
-		reset_asic_csrs(dd);
 		reset_misc_csrs(dd);
 	}
 	/* clear the DC reset */
 	write_csr(dd, CCE_DC_CTRL, 0);
 
 	/* Set the LED off */
-	if (is_ax(dd))
-		setextled(dd, 0);
+	setextled(dd, 0);
+
 	/*
 	 * Clear the QSFP reset.
 	 * An FLR enforces a 0 on all out pins. The driver does not touch
@@ -13212,6 +13376,7 @@
 	 */
 	write_csr(dd, ASIC_QSFP1_OUT, 0x1f);
 	write_csr(dd, ASIC_QSFP2_OUT, 0x1f);
+	init_chip_resources(dd);
 }
 
 static void init_early_variables(struct hfi1_devdata *dd)
@@ -13252,12 +13417,12 @@
 		kdeth_qp = DEFAULT_KDETH_QP;
 
 	write_csr(dd, SEND_BTH_QP,
-			(kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK)
-				<< SEND_BTH_QP_KDETH_QP_SHIFT);
+		  (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK) <<
+		  SEND_BTH_QP_KDETH_QP_SHIFT);
 
 	write_csr(dd, RCV_BTH_QP,
-			(kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK)
-				<< RCV_BTH_QP_KDETH_QP_SHIFT);
+		  (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK) <<
+		  RCV_BTH_QP_KDETH_QP_SHIFT);
 }
 
 /**
@@ -13382,22 +13547,21 @@
 		write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rsmmap[i]);
 	/* add rule0 */
 	write_csr(dd, RCV_RSM_CFG /* + (8 * 0) */,
-		RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK
-			<< RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT |
-		2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
+		  RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK <<
+		  RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT |
+		  2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
 	write_csr(dd, RCV_RSM_SELECT /* + (8 * 0) */,
-		LRH_BTH_MATCH_OFFSET
-			<< RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
-		LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
-		LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
-		((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
-		QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
-		((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
+		  LRH_BTH_MATCH_OFFSET << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
+		  LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
+		  LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
+		  ((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
+		  QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
+		  ((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
 	write_csr(dd, RCV_RSM_MATCH /* + (8 * 0) */,
-		LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT |
-		LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT |
-		LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT |
-		LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT);
+		  LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT |
+		  LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT |
+		  LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT |
+		  LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT);
 	/* Enable RSM */
 	add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
 	kfree(rsmmap);
@@ -13415,9 +13579,8 @@
 	/* enable all receive errors */
 	write_csr(dd, RCV_ERR_MASK, ~0ull);
 	/* setup QPN map table - start where VL15 context leaves off */
-	init_qos(
-		dd,
-		dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0);
+	init_qos(dd, dd->n_krcv_queues > MIN_KERNEL_KCTXTS ?
+		 MIN_KERNEL_KCTXTS : 0);
 	/*
 	 * make sure RcvCtrl.RcvWcb <= PCIe Device Control
 	 * Register Max_Payload_Size (PCI_EXP_DEVCTL in Linux PCIe config
@@ -13454,36 +13617,33 @@
 			       u32 csr0to3, u32 csr4to7)
 {
 	write_csr(dd, csr0to3,
-		   0ull <<
-			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT
-		|  1ull <<
-			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT
-		|  2ull * cu <<
-			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT
-		|  4ull * cu <<
-			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT);
+		  0ull << SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT |
+		  1ull << SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT |
+		  2ull * cu <<
+		  SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT |
+		  4ull * cu <<
+		  SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT);
 	write_csr(dd, csr4to7,
-		   8ull * cu <<
-			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT
-		| 16ull * cu <<
-			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT
-		| 32ull * cu <<
-			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT
-		| 64ull * cu <<
-			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT);
-
+		  8ull * cu <<
+		  SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT |
+		  16ull * cu <<
+		  SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT |
+		  32ull * cu <<
+		  SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT |
+		  64ull * cu <<
+		  SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT);
 }
 
 static void assign_local_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
 {
 	assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_LOCAL_AU_TABLE0_TO3,
-					SEND_CM_LOCAL_AU_TABLE4_TO7);
+			   SEND_CM_LOCAL_AU_TABLE4_TO7);
 }
 
 void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
 {
 	assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_REMOTE_AU_TABLE0_TO3,
-					SEND_CM_REMOTE_AU_TABLE4_TO7);
+			   SEND_CM_REMOTE_AU_TABLE4_TO7);
 }
 
 static void init_txe(struct hfi1_devdata *dd)
@@ -13586,9 +13746,9 @@
 	int ret = 0;
 	u64 reg;
 
-	if (ctxt < dd->num_rcv_contexts)
+	if (ctxt < dd->num_rcv_contexts) {
 		rcd = dd->rcd[ctxt];
-	else {
+	} else {
 		ret = -EINVAL;
 		goto done;
 	}
@@ -13614,9 +13774,9 @@
 	int ret = 0;
 	u64 reg;
 
-	if (ctxt < dd->num_rcv_contexts)
+	if (ctxt < dd->num_rcv_contexts) {
 		rcd = dd->rcd[ctxt];
-	else {
+	} else {
 		ret = -EINVAL;
 		goto done;
 	}
@@ -13639,24 +13799,26 @@
  */
 void hfi1_start_cleanup(struct hfi1_devdata *dd)
 {
+	aspm_exit(dd);
 	free_cntrs(dd);
 	free_rcverr(dd);
 	clean_up_interrupts(dd);
+	finish_chip_resources(dd);
 }
 
 #define HFI_BASE_GUID(dev) \
 	((dev)->base_guid & ~(1ULL << GUID_HFI_INDEX_SHIFT))
 
 /*
- * Certain chip functions need to be initialized only once per asic
- * instead of per-device. This function finds the peer device and
- * checks whether that chip initialization needs to be done by this
- * device.
+ * Information can be shared between the two HFIs on the same ASIC
+ * in the same OS.  This function finds the peer device and sets
+ * up a shared structure.
  */
-static void asic_should_init(struct hfi1_devdata *dd)
+static int init_asic_data(struct hfi1_devdata *dd)
 {
 	unsigned long flags;
 	struct hfi1_devdata *tmp, *peer = NULL;
+	int ret = 0;
 
 	spin_lock_irqsave(&hfi1_devs_lock, flags);
 	/* Find our peer device */
@@ -13668,13 +13830,21 @@
 		}
 	}
 
-	/*
-	 * "Claim" the ASIC for initialization if it hasn't been
-	 " "claimed" yet.
-	 */
-	if (!peer || !(peer->flags & HFI1_DO_INIT_ASIC))
-		dd->flags |= HFI1_DO_INIT_ASIC;
+	if (peer) {
+		dd->asic_data = peer->asic_data;
+	} else {
+		dd->asic_data = kzalloc(sizeof(*dd->asic_data), GFP_KERNEL);
+		if (!dd->asic_data) {
+			ret = -ENOMEM;
+			goto done;
+		}
+		mutex_init(&dd->asic_data->asic_resource_mutex);
+	}
+	dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */
+
+done:
 	spin_unlock_irqrestore(&hfi1_devs_lock, flags);
+	return ret;
 }
 
 /*
@@ -13694,7 +13864,7 @@
 	ret = read_hfi1_efi_var(dd, "description", &size,
 				(void **)&dd->boardname);
 	if (ret) {
-		dd_dev_err(dd, "Board description not found\n");
+		dd_dev_info(dd, "Board description not found\n");
 		/* use generic description */
 		dd->boardname = kstrdup(generic, GFP_KERNEL);
 		if (!dd->boardname)
@@ -13703,6 +13873,50 @@
 	return 0;
 }
 
+/*
+ * Check the interrupt registers to make sure that they are mapped correctly.
+ * It is intended to help user identify any mismapping by VMM when the driver
+ * is running in a VM. This function should only be called before interrupt
+ * is set up properly.
+ *
+ * Return 0 on success, -EINVAL on failure.
+ */
+static int check_int_registers(struct hfi1_devdata *dd)
+{
+	u64 reg;
+	u64 all_bits = ~(u64)0;
+	u64 mask;
+
+	/* Clear CceIntMask[0] to avoid raising any interrupts */
+	mask = read_csr(dd, CCE_INT_MASK);
+	write_csr(dd, CCE_INT_MASK, 0ull);
+	reg = read_csr(dd, CCE_INT_MASK);
+	if (reg)
+		goto err_exit;
+
+	/* Clear all interrupt status bits */
+	write_csr(dd, CCE_INT_CLEAR, all_bits);
+	reg = read_csr(dd, CCE_INT_STATUS);
+	if (reg)
+		goto err_exit;
+
+	/* Set all interrupt status bits */
+	write_csr(dd, CCE_INT_FORCE, all_bits);
+	reg = read_csr(dd, CCE_INT_STATUS);
+	if (reg != all_bits)
+		goto err_exit;
+
+	/* Restore the interrupt mask */
+	write_csr(dd, CCE_INT_CLEAR, all_bits);
+	write_csr(dd, CCE_INT_MASK, mask);
+
+	return 0;
+err_exit:
+	write_csr(dd, CCE_INT_MASK, mask);
+	dd_dev_err(dd, "Interrupt registers not properly mapped by VMM\n");
+	return -EINVAL;
+}
+
 /**
  * Allocate and initialize the device structure for the hfi.
  * @dev: the pci_dev for hfi1_ib device
@@ -13727,9 +13941,10 @@
 		"RTL FPGA emulation",
 		"Functional simulator"
 	};
+	struct pci_dev *parent = pdev->bus->self;
 
-	dd = hfi1_alloc_devdata(pdev,
-		NUM_IB_PORTS * sizeof(struct hfi1_pportdata));
+	dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
+				sizeof(struct hfi1_pportdata));
 	if (IS_ERR(dd))
 		goto bail;
 	ppd = dd->pport;
@@ -13750,8 +13965,8 @@
 		/* link width active is 0 when link is down */
 		/* link width downgrade active is 0 when link is down */
 
-		if (num_vls < HFI1_MIN_VLS_SUPPORTED
-			|| num_vls > HFI1_MAX_VLS_SUPPORTED) {
+		if (num_vls < HFI1_MIN_VLS_SUPPORTED ||
+		    num_vls > HFI1_MAX_VLS_SUPPORTED) {
 			hfi1_early_err(&pdev->dev,
 				       "Invalid num_vls %u, using %u VLs\n",
 				    num_vls, HFI1_MAX_VLS_SUPPORTED);
@@ -13759,6 +13974,7 @@
 		}
 		ppd->vls_supported = num_vls;
 		ppd->vls_operational = ppd->vls_supported;
+		ppd->actual_vls_operational = ppd->vls_supported;
 		/* Set the default MTU. */
 		for (vl = 0; vl < num_vls; vl++)
 			dd->vld[vl].mtu = hfi1_max_mtu;
@@ -13778,6 +13994,7 @@
 		/* start in offline */
 		ppd->host_link_state = HLS_DN_OFFLINE;
 		init_vl_arb_caches(ppd);
+		ppd->last_pstate = 0xff; /* invalid value */
 	}
 
 	dd->link_default = HLS_DN_POLL;
@@ -13803,8 +14020,21 @@
 	dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
 			& CCE_REVISION_CHIP_REV_MINOR_MASK;
 
-	/* obtain the hardware ID - NOT related to unit, which is a
-	   software enumeration */
+	/*
+	 * Check interrupt registers mapping if the driver has no access to
+	 * the upstream component. In this case, it is likely that the driver
+	 * is running in a VM.
+	 */
+	if (!parent) {
+		ret = check_int_registers(dd);
+		if (ret)
+			goto bail_cleanup;
+	}
+
+	/*
+	 * obtain the hardware ID - NOT related to unit, which is a
+	 * software enumeration
+	 */
 	reg = read_csr(dd, CCE_REVISION2);
 	dd->hfi1_id = (reg >> CCE_REVISION2_HFI_ID_SHIFT)
 					& CCE_REVISION2_HFI_ID_MASK;
@@ -13812,8 +14042,8 @@
 	dd->icode = reg >> CCE_REVISION2_IMPL_CODE_SHIFT;
 	dd->irev = reg >> CCE_REVISION2_IMPL_REVISION_SHIFT;
 	dd_dev_info(dd, "Implementation: %s, revision 0x%x\n",
-		dd->icode < ARRAY_SIZE(inames) ? inames[dd->icode] : "unknown",
-		(int)dd->irev);
+		    dd->icode < ARRAY_SIZE(inames) ?
+		    inames[dd->icode] : "unknown", (int)dd->irev);
 
 	/* speeds the hardware can support */
 	dd->pport->link_speed_supported = OPA_LINK_SPEED_25G;
@@ -13842,6 +14072,7 @@
 			   num_vls, dd->chip_sdma_engines);
 		num_vls = dd->chip_sdma_engines;
 		ppd->vls_supported = dd->chip_sdma_engines;
+		ppd->vls_operational = ppd->vls_supported;
 	}
 
 	/*
@@ -13863,8 +14094,10 @@
 	/* needs to be done before we look for the peer device */
 	read_guid(dd);
 
-	/* should this device init the ASIC block? */
-	asic_should_init(dd);
+	/* set up shared ASIC data with peer device */
+	ret = init_asic_data(dd);
+	if (ret)
+		goto bail_cleanup;
 
 	/* obtain chip sizes, reset chip CSRs */
 	init_chip(dd);
@@ -13874,6 +14107,9 @@
 	if (ret)
 		goto bail_cleanup;
 
+	/* Needs to be called before hfi1_firmware_init */
+	get_platform_config(dd);
+
 	/* read in firmware */
 	ret = hfi1_firmware_init(dd);
 	if (ret)
@@ -13925,6 +14161,10 @@
 	/* set up KDETH QP prefix in both RX and TX CSRs */
 	init_kdeth_qp(dd);
 
+	ret = hfi1_dev_affinity_init(dd);
+	if (ret)
+		goto bail_cleanup;
+
 	/* send contexts must be set up before receive contexts */
 	ret = init_send_contexts(dd);
 	if (ret)
@@ -14022,7 +14262,6 @@
 	return (u16)delta_cycles;
 }
 
-
 /**
  * create_pbc - build a pbc for transmission
  * @flags: special case flags or-ed in built pbc
@@ -14078,10 +14317,15 @@
 	int ret = 0;
 
 	if (dd->icode != ICODE_RTL_SILICON ||
-	    !(dd->flags & HFI1_DO_INIT_ASIC))
+	    check_chip_resource(dd, CR_THERM_INIT, NULL))
 		return ret;
 
-	acquire_hw_mutex(dd);
+	ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
+	if (ret) {
+		THERM_FAILURE(dd, ret, "Acquire SBus");
+		return ret;
+	}
+
 	dd_dev_info(dd, "Initializing thermal sensor\n");
 	/* Disable polling of thermal readings */
 	write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x0);
@@ -14128,8 +14372,14 @@
 
 	/* Enable polling of thermal readings */
 	write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x1);
+
+	/* Set initialized flag */
+	ret = acquire_chip_resource(dd, CR_THERM_INIT, 0);
+	if (ret)
+		THERM_FAILURE(dd, ret, "Unable to set thermal init flag");
+
 done:
-	release_hw_mutex(dd);
+	release_chip_resource(dd, CR_SBUS);
 	return ret;
 }
 
@@ -14144,7 +14394,7 @@
 	dd_dev_emerg(dd,
 		     "Critical temperature reached! Forcing device into freeze mode!\n");
 	dd->flags |= HFI1_FORCED_FREEZE;
-	start_freeze_handling(ppd, FREEZE_SELF|FREEZE_ABORT);
+	start_freeze_handling(ppd, FREEZE_SELF | FREEZE_ABORT);
 	/*
 	 * Shut DC down as much and as quickly as possible.
 	 *
@@ -14158,8 +14408,8 @@
 	 */
 	ppd->driver_link_ready = 0;
 	ppd->link_enabled = 0;
-	set_physical_link_state(dd, PLS_OFFLINE |
-				(OPA_LINKDOWN_REASON_SMA_DISABLED << 8));
+	set_physical_link_state(dd, (OPA_LINKDOWN_REASON_SMA_DISABLED << 8) |
+				PLS_OFFLINE);
 	/*
 	 * Step 2: Shutdown LCB and 8051
 	 *         After shutdown, do not restore DC_CFG_RESET value.

diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h
index 5b375dd..4f3b878 100644
--- a/drivers/staging/rdma/hfi1/chip.h
+++ b/drivers/staging/rdma/hfi1/chip.h

@@ -1,14 +1,13 @@
 #ifndef _CHIP_H
 #define _CHIP_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -79,8 +76,10 @@
 #define PIO_CMASK 0x7ff	/* counter mask for free and fill counters */
 #define MAX_EAGER_ENTRIES    2048	/* max receive eager entries */
 #define MAX_TID_PAIR_ENTRIES 1024	/* max receive expected pairs */
-/* Virtual? Allocation Unit, defined as AU = 8*2^vAU, 64 bytes, AU is fixed
-   at 64 bytes for all generation one devices */
+/*
+ * Virtual? Allocation Unit, defined as AU = 8*2^vAU, 64 bytes, AU is fixed
+ * at 64 bytes for all generation one devices
+ */
 #define CM_VAU 3
 /* HFI link credit count, AKA receive buffer depth (RBUF_DEPTH) */
 #define CM_GLOBAL_CREDITS 0x940
@@ -93,15 +92,15 @@
 #define TXE_PIO_SEND (TXE + TXE_PIO_SEND_OFFSET)
 
 /* PBC flags */
-#define PBC_INTR		(1ull << 31)
+#define PBC_INTR		BIT_ULL(31)
 #define PBC_DC_INFO_SHIFT	(30)
-#define PBC_DC_INFO		(1ull << PBC_DC_INFO_SHIFT)
-#define PBC_TEST_EBP	(1ull << 29)
-#define PBC_PACKET_BYPASS	(1ull << 28)
-#define PBC_CREDIT_RETURN	(1ull << 25)
-#define PBC_INSERT_BYPASS_ICRC (1ull << 24)
-#define PBC_TEST_BAD_ICRC	(1ull << 23)
-#define PBC_FECN		(1ull << 22)
+#define PBC_DC_INFO		BIT_ULL(PBC_DC_INFO_SHIFT)
+#define PBC_TEST_EBP		BIT_ULL(29)
+#define PBC_PACKET_BYPASS	BIT_ULL(28)
+#define PBC_CREDIT_RETURN	BIT_ULL(25)
+#define PBC_INSERT_BYPASS_ICRC	BIT_ULL(24)
+#define PBC_TEST_BAD_ICRC	BIT_ULL(23)
+#define PBC_FECN		BIT_ULL(22)
 
 /* PbcInsertHcrc field settings */
 #define PBC_IHCRC_LKDETH 0x0	/* insert @ local KDETH offset */
@@ -212,7 +211,7 @@
 #define PLS_CONFIGPHY_DEBOUCE		   0x40
 #define PLS_CONFIGPHY_ESTCOMM		   0x41
 #define PLS_CONFIGPHY_ESTCOMM_TXRX_HUNT	   0x42
-#define PLS_CONFIGPHY_ESTcOMM_LOCAL_COMPLETE   0x43
+#define PLS_CONFIGPHY_ESTCOMM_LOCAL_COMPLETE   0x43
 #define PLS_CONFIGPHY_OPTEQ			   0x44
 #define PLS_CONFIGPHY_OPTEQ_OPTIMIZING	   0x44
 #define PLS_CONFIGPHY_OPTEQ_LOCAL_COMPLETE	   0x45
@@ -242,36 +241,37 @@
 #define HCMD_SUCCESS 2
 
 /* DC_DC8051_DBG_ERR_INFO_SET_BY_8051.ERROR - error flags */
-#define SPICO_ROM_FAILED		    (1 <<  0)
-#define UNKNOWN_FRAME		    (1 <<  1)
-#define TARGET_BER_NOT_MET		    (1 <<  2)
-#define FAILED_SERDES_INTERNAL_LOOPBACK (1 <<  3)
-#define FAILED_SERDES_INIT		    (1 <<  4)
-#define FAILED_LNI_POLLING		    (1 <<  5)
-#define FAILED_LNI_DEBOUNCE		    (1 <<  6)
-#define FAILED_LNI_ESTBCOMM		    (1 <<  7)
-#define FAILED_LNI_OPTEQ		    (1 <<  8)
-#define FAILED_LNI_VERIFY_CAP1	    (1 <<  9)
-#define FAILED_LNI_VERIFY_CAP2	    (1 << 10)
-#define FAILED_LNI_CONFIGLT		    (1 << 11)
+#define SPICO_ROM_FAILED		BIT(0)
+#define UNKNOWN_FRAME			BIT(1)
+#define TARGET_BER_NOT_MET		BIT(2)
+#define FAILED_SERDES_INTERNAL_LOOPBACK	BIT(3)
+#define FAILED_SERDES_INIT		BIT(4)
+#define FAILED_LNI_POLLING		BIT(5)
+#define FAILED_LNI_DEBOUNCE		BIT(6)
+#define FAILED_LNI_ESTBCOMM		BIT(7)
+#define FAILED_LNI_OPTEQ		BIT(8)
+#define FAILED_LNI_VERIFY_CAP1		BIT(9)
+#define FAILED_LNI_VERIFY_CAP2		BIT(10)
+#define FAILED_LNI_CONFIGLT		BIT(11)
+#define HOST_HANDSHAKE_TIMEOUT		BIT(12)
 
 #define FAILED_LNI (FAILED_LNI_POLLING | FAILED_LNI_DEBOUNCE \
 			| FAILED_LNI_ESTBCOMM | FAILED_LNI_OPTEQ \
 			| FAILED_LNI_VERIFY_CAP1 \
 			| FAILED_LNI_VERIFY_CAP2 \
-			| FAILED_LNI_CONFIGLT)
+			| FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT)
 
 /* DC_DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG - host message flags */
-#define HOST_REQ_DONE	   (1 << 0)
-#define BC_PWR_MGM_MSG	   (1 << 1)
-#define BC_SMA_MSG		   (1 << 2)
-#define BC_BCC_UNKOWN_MSG	   (1 << 3)
-#define BC_IDLE_UNKNOWN_MSG	   (1 << 4)
-#define EXT_DEVICE_CFG_REQ	   (1 << 5)
-#define VERIFY_CAP_FRAME	   (1 << 6)
-#define LINKUP_ACHIEVED	   (1 << 7)
-#define LINK_GOING_DOWN	   (1 << 8)
-#define LINK_WIDTH_DOWNGRADED  (1 << 9)
+#define HOST_REQ_DONE		BIT(0)
+#define BC_PWR_MGM_MSG		BIT(1)
+#define BC_SMA_MSG		BIT(2)
+#define BC_BCC_UNKNOWN_MSG	BIT(3)
+#define BC_IDLE_UNKNOWN_MSG	BIT(4)
+#define EXT_DEVICE_CFG_REQ	BIT(5)
+#define VERIFY_CAP_FRAME	BIT(6)
+#define LINKUP_ACHIEVED		BIT(7)
+#define LINK_GOING_DOWN		BIT(8)
+#define LINK_WIDTH_DOWNGRADED	BIT(9)
 
 /* DC_DC8051_CFG_EXT_DEV_1.REQ_TYPE - 8051 host requests */
 #define HREQ_LOAD_CONFIG	0x01
@@ -335,14 +335,14 @@
  * the CSR fields hold multiples of this value.
  */
 #define RCV_SHIFT 3
-#define RCV_INCREMENT (1 << RCV_SHIFT)
+#define RCV_INCREMENT BIT(RCV_SHIFT)
 
 /*
  * Receive header queue entry increment - the CSR holds multiples of
  * this value.
  */
 #define HDRQ_SIZE_SHIFT 5
-#define HDRQ_INCREMENT (1 << HDRQ_SIZE_SHIFT)
+#define HDRQ_INCREMENT BIT(HDRQ_SIZE_SHIFT)
 
 /*
  * Freeze handling flags
@@ -371,6 +371,9 @@
 #define NUM_LANE_FIELDS    0x8
 
 /* 8051 general register Field IDs */
+#define LINK_OPTIMIZATION_SETTINGS   0x00
+#define LINK_TUNING_PARAMETERS	     0x02
+#define DC_HOST_COMM_SETTINGS	     0x03
 #define TX_SETTINGS		     0x06
 #define VERIFY_CAP_LOCAL_PHY	     0x07
 #define VERIFY_CAP_LOCAL_FABRIC	     0x08
@@ -387,6 +390,10 @@
 #define LINK_QUALITY_INFO            0x14
 #define REMOTE_DEVICE_ID	     0x15
 
+/* 8051 lane specific register field IDs */
+#define TX_EQ_SETTINGS		0x00
+#define CHANNEL_LOSS_SETTINGS	0x05
+
 /* Lane ID for general configuration registers */
 #define GENERAL_CONFIG 4
 
@@ -511,8 +518,10 @@
 #define LCB_CRC_48B			0x2	/* 48b CRC */
 #define LCB_CRC_12B_16B_PER_LANE	0x3	/* 12b-16b per lane CRC */
 
-/* the following enum is (almost) a copy/paste of the definition
- * in the OPA spec, section 20.2.2.6.8 (PortInfo) */
+/*
+ * the following enum is (almost) a copy/paste of the definition
+ * in the OPA spec, section 20.2.2.6.8 (PortInfo)
+ */
 enum {
 	PORT_LTP_CRC_MODE_NONE = 0,
 	PORT_LTP_CRC_MODE_14 = 1, /* 14-bit LTP CRC mode (optional) */
@@ -614,6 +623,8 @@
 #define NUM_PCIE_SERDES 16	/* number of PCIe serdes on the SBus */
 extern const u8 pcie_serdes_broadcast[];
 extern const u8 pcie_pcs_addrs[2][NUM_PCIE_SERDES];
+extern uint platform_config_load;
+
 /* SBus commands */
 #define RESET_SBUS_RECEIVER 0x20
 #define WRITE_SBUS_RECEIVER 0x21
@@ -629,6 +640,42 @@
 void dispose_firmware(void);
 int acquire_hw_mutex(struct hfi1_devdata *dd);
 void release_hw_mutex(struct hfi1_devdata *dd);
+
+/*
+ * Bitmask of dynamic access for ASIC block chip resources.  Each HFI has its
+ * own range of bits for the resource so it can clear its own bits on
+ * starting and exiting.  If either HFI has the resource bit set, the
+ * resource is in use.  The separate bit ranges are:
+ *	HFI0 bits  7:0
+ *	HFI1 bits 15:8
+ */
+#define CR_SBUS  0x01	/* SBUS, THERM, and PCIE registers */
+#define CR_EPROM 0x02	/* EEP, GPIO registers */
+#define CR_I2C1  0x04	/* QSFP1_OE register */
+#define CR_I2C2  0x08	/* QSFP2_OE register */
+#define CR_DYN_SHIFT 8	/* dynamic flag shift */
+#define CR_DYN_MASK  ((1ull << CR_DYN_SHIFT) - 1)
+
+/*
+ * Bitmask of static ASIC states these are outside of the dynamic ASIC
+ * block chip resources above.  These are to be set once and never cleared.
+ * Must be holding the SBus dynamic flag when setting.
+ */
+#define CR_THERM_INIT	0x010000
+
+int acquire_chip_resource(struct hfi1_devdata *dd, u32 resource, u32 mswait);
+void release_chip_resource(struct hfi1_devdata *dd, u32 resource);
+bool check_chip_resource(struct hfi1_devdata *dd, u32 resource,
+			 const char *func);
+void init_chip_resources(struct hfi1_devdata *dd);
+void finish_chip_resources(struct hfi1_devdata *dd);
+
+/* ms wait time for access to an SBus resoure */
+#define SBUS_TIMEOUT 4000 /* long enough for a FW download and SBR */
+
+/* ms wait time for a qsfp (i2c) chain to become available */
+#define QSFP_WAIT 20000 /* long enough for FW update to the F4 uc */
+
 void fabric_serdes_reset(struct hfi1_devdata *dd);
 int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result);
 
@@ -644,13 +691,17 @@
 void handle_freeze(struct work_struct *work);
 void handle_link_up(struct work_struct *work);
 void handle_link_down(struct work_struct *work);
+void handle_8051_request(struct work_struct *work);
 void handle_link_downgrade(struct work_struct *work);
 void handle_link_bounce(struct work_struct *work);
 void handle_sma_message(struct work_struct *work);
+void reset_qsfp(struct hfi1_pportdata *ppd);
+void qsfp_event(struct work_struct *work);
 void start_freeze_handling(struct hfi1_pportdata *ppd, int flags);
 int send_idle_sma(struct hfi1_devdata *dd, u64 message);
+int load_8051_config(struct hfi1_devdata *, u8, u8, u32);
+int read_8051_config(struct hfi1_devdata *, u8, u8, u32 *);
 int start_link(struct hfi1_pportdata *ppd);
-void init_qsfp(struct hfi1_pportdata *ppd);
 int bringup_serdes(struct hfi1_pportdata *ppd);
 void set_intr_state(struct hfi1_devdata *dd, u32 enable);
 void apply_link_downgrade_policy(struct hfi1_pportdata *ppd,
@@ -690,6 +741,8 @@
 u64 write_dev_cntr(struct hfi1_devdata *dd, int index, int vl, u64 data);
 u64 read_port_cntr(struct hfi1_pportdata *ppd, int index, int vl);
 u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data);
+u32 read_logical_state(struct hfi1_devdata *dd);
+void force_recv_intr(struct hfi1_ctxtdata *rcd);
 
 /* Per VL indexes */
 enum {
@@ -785,8 +838,14 @@
 	C_SW_CPU_RCV_LIM,
 	C_SW_VTX_WAIT,
 	C_SW_PIO_WAIT,
+	C_SW_PIO_DRAIN,
 	C_SW_KMEM_WAIT,
 	C_SW_SEND_SCHED,
+	C_SDMA_DESC_FETCHED_CNT,
+	C_SDMA_INT_CNT,
+	C_SDMA_ERR_CNT,
+	C_SDMA_IDLE_INT_CNT,
+	C_SDMA_PROGRESS_INT_CNT,
 /* MISC_ERR_STATUS */
 	C_MISC_PLL_LOCK_FAIL_ERR,
 	C_MISC_MBIST_FAIL_ERR,
@@ -1275,10 +1334,8 @@
 		  u32 type, unsigned long pa, u16 order);
 void hfi1_quiet_serdes(struct hfi1_pportdata *ppd);
 void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt);
-u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
-		    u64 **cntrp);
-u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port,
-			char **namep, u64 **cntrp);
+u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp);
+u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp);
 u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd);
 int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which);
 int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val);

diff --git a/drivers/staging/rdma/hfi1/chip_registers.h b/drivers/staging/rdma/hfi1/chip_registers.h
index 014d7a6..770f05c 100644
--- a/drivers/staging/rdma/hfi1/chip_registers.h
+++ b/drivers/staging/rdma/hfi1/chip_registers.h

@@ -2,14 +2,13 @@
 #define DEF_CHIP_REG
 
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -21,8 +20,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -1281,6 +1278,9 @@
 #define SEND_STATIC_RATE_CONTROL_CSR_SRC_RELOAD_SHIFT 0
 #define SEND_STATIC_RATE_CONTROL_CSR_SRC_RELOAD_SMASK 0xFFFFull
 #define PCIE_CFG_REG_PL2 (PCIE + 0x000000000708)
+#define PCIE_CFG_REG_PL3 (PCIE + 0x00000000070C)
+#define PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SHIFT 27
+#define PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SMASK 0x38000000
 #define PCIE_CFG_REG_PL102 (PCIE + 0x000000000898)
 #define PCIE_CFG_REG_PL102_GEN3_EQ_POST_CURSOR_PSET_SHIFT 12
 #define PCIE_CFG_REG_PL102_GEN3_EQ_CURSOR_PSET_SHIFT 6
@@ -1301,5 +1301,6 @@
 #define CCE_INT_BLOCKED (CCE + 0x000000110C00)
 #define SEND_DMA_IDLE_CNT (TXE + 0x000000200040)
 #define SEND_DMA_DESC_FETCHED_CNT (TXE + 0x000000200058)
+#define CCE_MSIX_PBA_OFFSET 0X0110000
 
 #endif          /* DEF_CHIP_REG */

diff --git a/drivers/staging/rdma/hfi1/common.h b/drivers/staging/rdma/hfi1/common.h
index 5dd9272..e9b6bb3 100644
--- a/drivers/staging/rdma/hfi1/common.h
+++ b/drivers/staging/rdma/hfi1/common.h

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -341,19 +338,16 @@
 #define FULL_MGMT_P_KEY      0xFFFF
 
 #define DEFAULT_P_KEY LIM_MGMT_P_KEY
-#define HFI1_PERMISSIVE_LID 0xFFFF
 #define HFI1_AETH_CREDIT_SHIFT 24
 #define HFI1_AETH_CREDIT_MASK 0x1F
 #define HFI1_AETH_CREDIT_INVAL 0x1F
 #define HFI1_MSN_MASK 0xFFFFFF
-#define HFI1_QPN_MASK 0xFFFFFF
 #define HFI1_FECN_SHIFT 31
 #define HFI1_FECN_MASK 1
-#define HFI1_FECN_SMASK (1 << HFI1_FECN_SHIFT)
+#define HFI1_FECN_SMASK BIT(HFI1_FECN_SHIFT)
 #define HFI1_BECN_SHIFT 30
 #define HFI1_BECN_MASK 1
-#define HFI1_BECN_SMASK (1 << HFI1_BECN_SHIFT)
-#define HFI1_MULTICAST_LID_BASE 0xC000
+#define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT)
 
 static inline __u64 rhf_to_cpu(const __le32 *rbuf)
 {

diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/staging/rdma/hfi1/debugfs.c
index acd2269..dbab9d9 100644
--- a/drivers/staging/rdma/hfi1/debugfs.c
+++ b/drivers/staging/rdma/hfi1/debugfs.c

@@ -1,13 +1,12 @@
 #ifdef CONFIG_DEBUG_FS
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -19,8 +18,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -52,6 +49,7 @@
 #include <linux/seq_file.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
+#include <linux/module.h>
 
 #include "hfi.h"
 #include "debugfs.h"
@@ -71,6 +69,7 @@
 	.stop  = _##name##_seq_stop, \
 	.show  = _##name##_seq_show \
 }
+
 #define DEBUGFS_SEQ_FILE_OPEN(name) \
 static int _##name##_open(struct inode *inode, struct file *s) \
 { \
@@ -102,7 +101,6 @@
 		pr_warn("create of %s failed\n", name); \
 } while (0)
 
-
 #define DEBUGFS_SEQ_FILE_CREATE(name, parent, data) \
 	DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO)
 
@@ -127,7 +125,6 @@
 	return pos;
 }
 
-
 static void _opcode_stats_seq_stop(struct seq_file *s, void *v)
 __releases(RCU)
 {
@@ -151,8 +148,8 @@
 	if (!n_packets && !n_bytes)
 		return SEQ_SKIP;
 	seq_printf(s, "%02llx %llu/%llu\n", i,
-		(unsigned long long) n_packets,
-		(unsigned long long) n_bytes);
+		   (unsigned long long)n_packets,
+		   (unsigned long long)n_bytes);
 
 	return 0;
 }
@@ -247,7 +244,7 @@
 }
 
 static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
-				   loff_t *pos)
+				loff_t *pos)
 {
 	struct qp_iter *iter = iter_ptr;
 
@@ -308,7 +305,6 @@
 	return pos;
 }
 
-
 static void _sdes_seq_stop(struct seq_file *s, void *v)
 __releases(RCU)
 {
@@ -341,7 +337,7 @@
 
 	rcu_read_lock();
 	dd = private2dd(file);
-	avail = hfi1_read_cntrs(dd, *ppos, NULL, &counters);
+	avail = hfi1_read_cntrs(dd, NULL, &counters);
 	rval =  simple_read_from_buffer(buf, count, ppos, counters, avail);
 	rcu_read_unlock();
 	return rval;
@@ -358,7 +354,7 @@
 
 	rcu_read_lock();
 	dd = private2dd(file);
-	avail = hfi1_read_cntrs(dd, *ppos, &names, NULL);
+	avail = hfi1_read_cntrs(dd, &names, NULL);
 	rval =  simple_read_from_buffer(buf, count, ppos, names, avail);
 	rcu_read_unlock();
 	return rval;
@@ -385,8 +381,7 @@
 
 	rcu_read_lock();
 	dd = private2dd(file);
-	/* port number n/a here since names are constant */
-	avail = hfi1_read_portcntrs(dd, *ppos, 0, &names, NULL);
+	avail = hfi1_read_portcntrs(dd->pport, &names, NULL);
 	rval = simple_read_from_buffer(buf, count, ppos, names, avail);
 	rcu_read_unlock();
 	return rval;
@@ -394,28 +389,150 @@
 
 /* read the per-port counters */
 static ssize_t portcntrs_debugfs_read(struct file *file, char __user *buf,
-				size_t count, loff_t *ppos)
+				      size_t count, loff_t *ppos)
 {
 	u64 *counters;
 	size_t avail;
-	struct hfi1_devdata *dd;
 	struct hfi1_pportdata *ppd;
 	ssize_t rval;
 
 	rcu_read_lock();
 	ppd = private2ppd(file);
-	dd = ppd->dd;
-	avail = hfi1_read_portcntrs(dd, *ppos, ppd->port - 1, NULL, &counters);
+	avail = hfi1_read_portcntrs(ppd, NULL, &counters);
 	rval = simple_read_from_buffer(buf, count, ppos, counters, avail);
 	rcu_read_unlock();
 	return rval;
 }
 
+static void check_dyn_flag(u64 scratch0, char *p, int size, int *used,
+			   int this_hfi, int hfi, u32 flag, const char *what)
+{
+	u32 mask;
+
+	mask = flag << (hfi ? CR_DYN_SHIFT : 0);
+	if (scratch0 & mask) {
+		*used += scnprintf(p + *used, size - *used,
+				   "  0x%08x - HFI%d %s in use, %s device\n",
+				   mask, hfi, what,
+				   this_hfi == hfi ? "this" : "other");
+	}
+}
+
+static ssize_t asic_flags_read(struct file *file, char __user *buf,
+			       size_t count, loff_t *ppos)
+{
+	struct hfi1_pportdata *ppd;
+	struct hfi1_devdata *dd;
+	u64 scratch0;
+	char *tmp;
+	int ret = 0;
+	int size;
+	int used;
+	int i;
+
+	rcu_read_lock();
+	ppd = private2ppd(file);
+	dd = ppd->dd;
+	size = PAGE_SIZE;
+	used = 0;
+	tmp = kmalloc(size, GFP_KERNEL);
+	if (!tmp) {
+		rcu_read_unlock();
+		return -ENOMEM;
+	}
+
+	scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+	used += scnprintf(tmp + used, size - used,
+			  "Resource flags: 0x%016llx\n", scratch0);
+
+	/* check permanent flag */
+	if (scratch0 & CR_THERM_INIT) {
+		used += scnprintf(tmp + used, size - used,
+				  "  0x%08x - thermal monitoring initialized\n",
+				  (u32)CR_THERM_INIT);
+	}
+
+	/* check each dynamic flag on each HFI */
+	for (i = 0; i < 2; i++) {
+		check_dyn_flag(scratch0, tmp, size, &used, dd->hfi1_id, i,
+			       CR_SBUS, "SBus");
+		check_dyn_flag(scratch0, tmp, size, &used, dd->hfi1_id, i,
+			       CR_EPROM, "EPROM");
+		check_dyn_flag(scratch0, tmp, size, &used, dd->hfi1_id, i,
+			       CR_I2C1, "i2c chain 1");
+		check_dyn_flag(scratch0, tmp, size, &used, dd->hfi1_id, i,
+			       CR_I2C2, "i2c chain 2");
+	}
+	used += scnprintf(tmp + used, size - used, "Write bits to clear\n");
+
+	ret = simple_read_from_buffer(buf, count, ppos, tmp, used);
+	rcu_read_unlock();
+	kfree(tmp);
+	return ret;
+}
+
+static ssize_t asic_flags_write(struct file *file, const char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	struct hfi1_pportdata *ppd;
+	struct hfi1_devdata *dd;
+	char *buff;
+	int ret;
+	unsigned long long value;
+	u64 scratch0;
+	u64 clear;
+
+	rcu_read_lock();
+	ppd = private2ppd(file);
+	dd = ppd->dd;
+
+	buff = kmalloc(count + 1, GFP_KERNEL);
+	if (!buff) {
+		ret = -ENOMEM;
+		goto do_return;
+	}
+
+	ret = copy_from_user(buff, buf, count);
+	if (ret > 0) {
+		ret = -EFAULT;
+		goto do_free;
+	}
+
+	/* zero terminate and read the expected integer */
+	buff[count] = 0;
+	ret = kstrtoull(buff, 0, &value);
+	if (ret)
+		goto do_free;
+	clear = value;
+
+	/* obtain exclusive access */
+	mutex_lock(&dd->asic_data->asic_resource_mutex);
+	acquire_hw_mutex(dd);
+
+	scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+	scratch0 &= ~clear;
+	write_csr(dd, ASIC_CFG_SCRATCH, scratch0);
+	/* force write to be visible to other HFI on another OS */
+	(void)read_csr(dd, ASIC_CFG_SCRATCH);
+
+	release_hw_mutex(dd);
+	mutex_unlock(&dd->asic_data->asic_resource_mutex);
+
+	/* return the number of bytes written */
+	ret = count;
+
+ do_free:
+	kfree(buff);
+ do_return:
+	rcu_read_unlock();
+	return ret;
+}
+
 /*
  * read the per-port QSFP data for ppd
  */
 static ssize_t qsfp_debugfs_dump(struct file *file, char __user *buf,
-			   size_t count, loff_t *ppos)
+				 size_t count, loff_t *ppos)
 {
 	struct hfi1_pportdata *ppd;
 	char *tmp;
@@ -439,7 +556,7 @@
 
 /* Do an i2c write operation on the chain for the given HFI. */
 static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
-			   size_t count, loff_t *ppos, u32 target)
+				   size_t count, loff_t *ppos, u32 target)
 {
 	struct hfi1_pportdata *ppd;
 	char *buff;
@@ -451,6 +568,16 @@
 	rcu_read_lock();
 	ppd = private2ppd(file);
 
+	/* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */
+	i2c_addr = (*ppos >> 16) & 0xffff;
+	offset = *ppos & 0xffff;
+
+	/* explicitly reject invalid address 0 to catch cp and cat */
+	if (i2c_addr == 0) {
+		ret = -EINVAL;
+		goto _return;
+	}
+
 	buff = kmalloc(count, GFP_KERNEL);
 	if (!buff) {
 		ret = -ENOMEM;
@@ -463,9 +590,6 @@
 		goto _free;
 	}
 
-	i2c_addr = (*ppos >> 16) & 0xff;
-	offset = *ppos & 0xffff;
-
 	total_written = i2c_write(ppd, target, i2c_addr, offset, buff, count);
 	if (total_written < 0) {
 		ret = total_written;
@@ -485,21 +609,21 @@
 
 /* Do an i2c write operation on chain for HFI 0. */
 static ssize_t i2c1_debugfs_write(struct file *file, const char __user *buf,
-			   size_t count, loff_t *ppos)
+				  size_t count, loff_t *ppos)
 {
 	return __i2c_debugfs_write(file, buf, count, ppos, 0);
 }
 
 /* Do an i2c write operation on chain for HFI 1. */
 static ssize_t i2c2_debugfs_write(struct file *file, const char __user *buf,
-			   size_t count, loff_t *ppos)
+				  size_t count, loff_t *ppos)
 {
 	return __i2c_debugfs_write(file, buf, count, ppos, 1);
 }
 
 /* Do an i2c read operation on the chain for the given HFI. */
 static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
-			size_t count, loff_t *ppos, u32 target)
+				  size_t count, loff_t *ppos, u32 target)
 {
 	struct hfi1_pportdata *ppd;
 	char *buff;
@@ -511,15 +635,22 @@
 	rcu_read_lock();
 	ppd = private2ppd(file);
 
+	/* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */
+	i2c_addr = (*ppos >> 16) & 0xffff;
+	offset = *ppos & 0xffff;
+
+	/* explicitly reject invalid address 0 to catch cp and cat */
+	if (i2c_addr == 0) {
+		ret = -EINVAL;
+		goto _return;
+	}
+
 	buff = kmalloc(count, GFP_KERNEL);
 	if (!buff) {
 		ret = -ENOMEM;
 		goto _return;
 	}
 
-	i2c_addr = (*ppos >> 16) & 0xff;
-	offset = *ppos & 0xffff;
-
 	total_read = i2c_read(ppd, target, i2c_addr, offset, buff, count);
 	if (total_read < 0) {
 		ret = total_read;
@@ -545,21 +676,21 @@
 
 /* Do an i2c read operation on chain for HFI 0. */
 static ssize_t i2c1_debugfs_read(struct file *file, char __user *buf,
-			size_t count, loff_t *ppos)
+				 size_t count, loff_t *ppos)
 {
 	return __i2c_debugfs_read(file, buf, count, ppos, 0);
 }
 
 /* Do an i2c read operation on chain for HFI 1. */
 static ssize_t i2c2_debugfs_read(struct file *file, char __user *buf,
-			size_t count, loff_t *ppos)
+				 size_t count, loff_t *ppos)
 {
 	return __i2c_debugfs_read(file, buf, count, ppos, 1);
 }
 
 /* Do a QSFP write operation on the i2c chain for the given HFI. */
 static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
-			   size_t count, loff_t *ppos, u32 target)
+				    size_t count, loff_t *ppos, u32 target)
 {
 	struct hfi1_pportdata *ppd;
 	char *buff;
@@ -605,21 +736,21 @@
 
 /* Do a QSFP write operation on i2c chain for HFI 0. */
 static ssize_t qsfp1_debugfs_write(struct file *file, const char __user *buf,
-			   size_t count, loff_t *ppos)
+				   size_t count, loff_t *ppos)
 {
 	return __qsfp_debugfs_write(file, buf, count, ppos, 0);
 }
 
 /* Do a QSFP write operation on i2c chain for HFI 1. */
 static ssize_t qsfp2_debugfs_write(struct file *file, const char __user *buf,
-			   size_t count, loff_t *ppos)
+				   size_t count, loff_t *ppos)
 {
 	return __qsfp_debugfs_write(file, buf, count, ppos, 1);
 }
 
 /* Do a QSFP read operation on the i2c chain for the given HFI. */
 static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf,
-			size_t count, loff_t *ppos, u32 target)
+				   size_t count, loff_t *ppos, u32 target)
 {
 	struct hfi1_pportdata *ppd;
 	char *buff;
@@ -665,18 +796,116 @@
 
 /* Do a QSFP read operation on i2c chain for HFI 0. */
 static ssize_t qsfp1_debugfs_read(struct file *file, char __user *buf,
-			size_t count, loff_t *ppos)
+				  size_t count, loff_t *ppos)
 {
 	return __qsfp_debugfs_read(file, buf, count, ppos, 0);
 }
 
 /* Do a QSFP read operation on i2c chain for HFI 1. */
 static ssize_t qsfp2_debugfs_read(struct file *file, char __user *buf,
-			size_t count, loff_t *ppos)
+				  size_t count, loff_t *ppos)
 {
 	return __qsfp_debugfs_read(file, buf, count, ppos, 1);
 }
 
+static int __i2c_debugfs_open(struct inode *in, struct file *fp, u32 target)
+{
+	struct hfi1_pportdata *ppd;
+	int ret;
+
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	ppd = private2ppd(fp);
+
+	ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0);
+	if (ret) /* failed - release the module */
+		module_put(THIS_MODULE);
+
+	return ret;
+}
+
+static int i2c1_debugfs_open(struct inode *in, struct file *fp)
+{
+	return __i2c_debugfs_open(in, fp, 0);
+}
+
+static int i2c2_debugfs_open(struct inode *in, struct file *fp)
+{
+	return __i2c_debugfs_open(in, fp, 1);
+}
+
+static int __i2c_debugfs_release(struct inode *in, struct file *fp, u32 target)
+{
+	struct hfi1_pportdata *ppd;
+
+	ppd = private2ppd(fp);
+
+	release_chip_resource(ppd->dd, i2c_target(target));
+	module_put(THIS_MODULE);
+
+	return 0;
+}
+
+static int i2c1_debugfs_release(struct inode *in, struct file *fp)
+{
+	return __i2c_debugfs_release(in, fp, 0);
+}
+
+static int i2c2_debugfs_release(struct inode *in, struct file *fp)
+{
+	return __i2c_debugfs_release(in, fp, 1);
+}
+
+static int __qsfp_debugfs_open(struct inode *in, struct file *fp, u32 target)
+{
+	struct hfi1_pportdata *ppd;
+	int ret;
+
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	ppd = private2ppd(fp);
+
+	ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0);
+	if (ret) /* failed - release the module */
+		module_put(THIS_MODULE);
+
+	return ret;
+}
+
+static int qsfp1_debugfs_open(struct inode *in, struct file *fp)
+{
+	return __qsfp_debugfs_open(in, fp, 0);
+}
+
+static int qsfp2_debugfs_open(struct inode *in, struct file *fp)
+{
+	return __qsfp_debugfs_open(in, fp, 1);
+}
+
+static int __qsfp_debugfs_release(struct inode *in, struct file *fp, u32 target)
+{
+	struct hfi1_pportdata *ppd;
+
+	ppd = private2ppd(fp);
+
+	release_chip_resource(ppd->dd, i2c_target(target));
+	module_put(THIS_MODULE);
+
+	return 0;
+}
+
+static int qsfp1_debugfs_release(struct inode *in, struct file *fp)
+{
+	return __qsfp_debugfs_release(in, fp, 0);
+}
+
+static int qsfp2_debugfs_release(struct inode *in, struct file *fp)
+{
+	return __qsfp_debugfs_release(in, fp, 1);
+}
+
 #define DEBUGFS_OPS(nm, readroutine, writeroutine)	\
 { \
 	.name = nm, \
@@ -687,6 +916,18 @@
 	}, \
 }
 
+#define DEBUGFS_XOPS(nm, readf, writef, openf, releasef) \
+{ \
+	.name = nm, \
+	.ops = { \
+		.read = readf, \
+		.write = writef, \
+		.llseek = generic_file_llseek, \
+		.open = openf, \
+		.release = releasef \
+	}, \
+}
+
 static const struct counter_info cntr_ops[] = {
 	DEBUGFS_OPS("counter_names", dev_names_read, NULL),
 	DEBUGFS_OPS("counters", dev_counters_read, NULL),
@@ -695,11 +936,16 @@
 
 static const struct counter_info port_cntr_ops[] = {
 	DEBUGFS_OPS("port%dcounters", portcntrs_debugfs_read, NULL),
-	DEBUGFS_OPS("i2c1", i2c1_debugfs_read, i2c1_debugfs_write),
-	DEBUGFS_OPS("i2c2", i2c2_debugfs_read, i2c2_debugfs_write),
+	DEBUGFS_XOPS("i2c1", i2c1_debugfs_read, i2c1_debugfs_write,
+		     i2c1_debugfs_open, i2c1_debugfs_release),
+	DEBUGFS_XOPS("i2c2", i2c2_debugfs_read, i2c2_debugfs_write,
+		     i2c2_debugfs_open, i2c2_debugfs_release),
 	DEBUGFS_OPS("qsfp_dump%d", qsfp_debugfs_dump, NULL),
-	DEBUGFS_OPS("qsfp1", qsfp1_debugfs_read, qsfp1_debugfs_write),
-	DEBUGFS_OPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write),
+	DEBUGFS_XOPS("qsfp1", qsfp1_debugfs_read, qsfp1_debugfs_write,
+		     qsfp1_debugfs_open, qsfp1_debugfs_release),
+	DEBUGFS_XOPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write,
+		     qsfp2_debugfs_open, qsfp2_debugfs_release),
+	DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write),
 };
 
 void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
@@ -747,8 +993,8 @@
 					    ibd->hfi1_ibdev_dbg,
 					    ppd,
 					    &port_cntr_ops[i].ops,
-					    port_cntr_ops[i].ops.write == NULL ?
-					    S_IRUGO : S_IRUGO|S_IWUSR);
+					    !port_cntr_ops[i].ops.write ?
+					    S_IRUGO : S_IRUGO | S_IWUSR);
 		}
 }
 

diff --git a/drivers/staging/rdma/hfi1/debugfs.h b/drivers/staging/rdma/hfi1/debugfs.h
index 92d6fe1..b6fb681 100644
--- a/drivers/staging/rdma/hfi1/debugfs.h
+++ b/drivers/staging/rdma/hfi1/debugfs.h

@@ -1,14 +1,13 @@
 #ifndef _HFI1_DEBUGFS_H
 #define _HFI1_DEBUGFS_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:

diff --git a/drivers/staging/rdma/hfi1/device.c b/drivers/staging/rdma/hfi1/device.c
index 58472e5..c05c39d 100644
--- a/drivers/staging/rdma/hfi1/device.c
+++ b/drivers/staging/rdma/hfi1/device.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:

diff --git a/drivers/staging/rdma/hfi1/device.h b/drivers/staging/rdma/hfi1/device.h
index 2850ff7..5bb3e83 100644
--- a/drivers/staging/rdma/hfi1/device.h
+++ b/drivers/staging/rdma/hfi1/device.h

@@ -1,14 +1,13 @@
 #ifndef _HFI1_DEVICE_H
 #define _HFI1_DEVICE_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:

diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c
index e41159f..c5b520b 100644
--- a/drivers/staging/rdma/hfi1/diag.c
+++ b/drivers/staging/rdma/hfi1/diag.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -70,6 +67,7 @@
 #include "hfi.h"
 #include "device.h"
 #include "common.h"
+#include "verbs_txreq.h"
 #include "trace.h"
 
 #undef pr_fmt
@@ -80,15 +78,15 @@
 /* Snoop option mask */
 #define SNOOP_DROP_SEND		BIT(0)
 #define SNOOP_USE_METADATA	BIT(1)
+#define SNOOP_SET_VL0TOVL15     BIT(2)
 
 static u8 snoop_flags;
 
 /*
  * Extract packet length from LRH header.
- * Why & 0x7FF? Because len is only 11 bits in case it wasn't 0'd we throw the
- * bogus bits away. This is in Dwords so multiply by 4 to get size in bytes
+ * This is in Dwords so multiply by 4 to get size in bytes
  */
-#define HFI1_GET_PKT_LEN(x)      (((be16_to_cpu((x)->lrh[2]) & 0x7FF)) << 2)
+#define HFI1_GET_PKT_LEN(x)      (((be16_to_cpu((x)->lrh[2]) & 0xFFF)) << 2)
 
 enum hfi1_filter_status {
 	HFI1_FILTER_HIT,
@@ -860,7 +858,7 @@
 			vl = sc4;
 		} else {
 			sl = (byte_two >> 4) & 0xf;
-			ibp = to_iport(&dd->verbs_dev.ibdev, 1);
+			ibp = to_iport(&dd->verbs_dev.rdi.ibdev, 1);
 			sc5 = ibp->sl_to_sc[sl];
 			vl = sc_to_vlt(dd, sc5);
 			if (vl != sc4) {
@@ -966,6 +964,65 @@
 	return ret;
 }
 
+/**
+ * hfi1_assign_snoop_link_credits -- Set up credits for VL15 and others
+ * @ppd : ptr to hfi1 port data
+ * @value : options from user space
+ *
+ * Assumes the rest of the CM credit registers are zero from a
+ * previous global or credit reset.
+ * Leave shared count at zero for both global and all vls.
+ * In snoop mode ideally we don't use shared credits
+ * Reserve 8.5k for VL15
+ * If total credits less than 8.5kbytes return error.
+ * Divide the rest of the credits across VL0 to VL7 and if
+ * each of these levels has less than 34 credits (at least 2048 + 128 bytes)
+ * return with an error.
+ * The credit registers will be reset to zero on link negotiation or link up
+ * so this function should be activated from user space only if the port has
+ * gone past link negotiation and link up.
+ *
+ * Return -- 0 if successful else error condition
+ *
+ */
+static long hfi1_assign_snoop_link_credits(struct hfi1_pportdata *ppd,
+					   int value)
+{
+#define  OPA_MIN_PER_VL_CREDITS  34  /* 2048 + 128 bytes */
+	struct buffer_control t;
+	int i;
+	struct hfi1_devdata *dd = ppd->dd;
+	u16  total_credits = (value >> 16) & 0xffff;
+	u16  vl15_credits = dd->vl15_init / 2;
+	u16  per_vl_credits;
+	__be16 be_per_vl_credits;
+
+	if (!(ppd->host_link_state & HLS_UP))
+		goto err_exit;
+	if (total_credits  <  vl15_credits)
+		goto err_exit;
+
+	per_vl_credits = (total_credits - vl15_credits) / TXE_NUM_DATA_VL;
+
+	if (per_vl_credits < OPA_MIN_PER_VL_CREDITS)
+		goto err_exit;
+
+	memset(&t, 0, sizeof(t));
+	be_per_vl_credits = cpu_to_be16(per_vl_credits);
+
+	for (i = 0; i < TXE_NUM_DATA_VL; i++)
+		t.vl[i].dedicated = be_per_vl_credits;
+
+	t.vl[15].dedicated  = cpu_to_be16(vl15_credits);
+	return set_buffer_control(ppd, &t);
+
+err_exit:
+	snoop_dbg("port_state = 0x%x, total_credits = %d, vl15_credits = %d",
+		  ppd->host_link_state, total_credits, vl15_credits);
+
+	return -EINVAL;
+}
+
 static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
 {
 	struct hfi1_devdata *dd;
@@ -1192,6 +1249,10 @@
 			snoop_flags |= SNOOP_DROP_SEND;
 		if (value & SNOOP_USE_METADATA)
 			snoop_flags |= SNOOP_USE_METADATA;
+		if (value & (SNOOP_SET_VL0TOVL15)) {
+			ppd = &dd->pport[0];  /* first port will do */
+			ret = hfi1_assign_snoop_link_credits(ppd, value);
+		}
 		break;
 	default:
 		return -ENOTTY;
@@ -1603,7 +1664,7 @@
 /*
  * Handle snooping and capturing packets when sdma is being used.
  */
-int snoop_send_dma_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			   u64 pbc)
 {
 	pr_alert("Snooping/Capture of Send DMA Packets Is Not Supported!\n");
@@ -1616,20 +1677,19 @@
  * bypass packets. The only way to send a bypass packet currently is to use the
  * diagpkt interface. When that interface is enable snoop/capture is not.
  */
-int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			   u64 pbc)
 {
-	struct ahg_ib_header *ahdr = qp->s_hdr;
 	u32 hdrwords = qp->s_hdrwords;
-	struct hfi1_sge_state *ss = qp->s_cur_sge;
+	struct rvt_sge_state *ss = qp->s_cur_sge;
 	u32 len = qp->s_cur_size;
 	u32 dwords = (len + 3) >> 2;
 	u32 plen = hdrwords + dwords + 2; /* includes pbc */
 	struct hfi1_pportdata *ppd = ps->ppd;
 	struct snoop_packet *s_packet = NULL;
-	u32 *hdr = (u32 *)&ahdr->ibh;
+	u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
 	u32 length = 0;
-	struct hfi1_sge_state temp_ss;
+	struct rvt_sge_state temp_ss;
 	void *data = NULL;
 	void *data_start = NULL;
 	int ret;
@@ -1638,7 +1698,7 @@
 	struct capture_md md;
 	u32 vl;
 	u32 hdr_len = hdrwords << 2;
-	u32 tlen = HFI1_GET_PKT_LEN(&ahdr->ibh);
+	u32 tlen = HFI1_GET_PKT_LEN(&ps->s_txreq->phdr.hdr);
 
 	md.u.pbc = 0;
 
@@ -1665,7 +1725,7 @@
 		md.port = 1;
 		md.dir = PKT_DIR_EGRESS;
 		if (likely(pbc == 0)) {
-			vl = be16_to_cpu(ahdr->ibh.lrh[0]) >> 12;
+			vl = be16_to_cpu(ps->s_txreq->phdr.hdr.lrh[0]) >> 12;
 			md.u.pbc = create_pbc(ppd, 0, qp->s_srate, vl, plen);
 		} else {
 			md.u.pbc = 0;
@@ -1727,7 +1787,7 @@
 		ret = HFI1_FILTER_HIT;
 	} else {
 		ret = ppd->dd->hfi1_snoop.filter_callback(
-					&ahdr->ibh,
+					&ps->s_txreq->phdr.hdr,
 					NULL,
 					ppd->dd->hfi1_snoop.filter_value);
 	}
@@ -1759,9 +1819,16 @@
 				spin_unlock_irqrestore(&qp->s_lock, flags);
 			} else if (qp->ibqp.qp_type == IB_QPT_RC) {
 				spin_lock_irqsave(&qp->s_lock, flags);
-				hfi1_rc_send_complete(qp, &ahdr->ibh);
+				hfi1_rc_send_complete(qp,
+						      &ps->s_txreq->phdr.hdr);
 				spin_unlock_irqrestore(&qp->s_lock, flags);
 			}
+
+			/*
+			 * If snoop is dropping the packet we need to put the
+			 * txreq back because no one else will.
+			 */
+			hfi1_put_txreq(ps->s_txreq);
 			return 0;
 		}
 		break;

diff --git a/drivers/staging/rdma/hfi1/dma.c b/drivers/staging/rdma/hfi1/dma.c
index e03bd73..7e8dab8 100644
--- a/drivers/staging/rdma/hfi1/dma.c
+++ b/drivers/staging/rdma/hfi1/dma.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -52,7 +49,7 @@
 
 #include "verbs.h"
 
-#define BAD_DMA_ADDRESS ((u64) 0)
+#define BAD_DMA_ADDRESS ((u64)0)
 
 /*
  * The following functions implement driver specific replacements
@@ -74,7 +71,7 @@
 	if (WARN_ON(!valid_dma_direction(direction)))
 		return BAD_DMA_ADDRESS;
 
-	return (u64) cpu_addr;
+	return (u64)cpu_addr;
 }
 
 static void hfi1_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
@@ -95,7 +92,7 @@
 	if (offset + size > PAGE_SIZE)
 		return BAD_DMA_ADDRESS;
 
-	addr = (u64) page_address(page);
+	addr = (u64)page_address(page);
 	if (addr)
 		addr += offset;
 
@@ -120,7 +117,7 @@
 		return BAD_DMA_ADDRESS;
 
 	for_each_sg(sgl, sg, nents, i) {
-		addr = (u64) page_address(sg_page(sg));
+		addr = (u64)page_address(sg_page(sg));
 		if (!addr) {
 			ret = 0;
 			break;
@@ -161,14 +158,14 @@
 	if (p)
 		addr = page_address(p);
 	if (dma_handle)
-		*dma_handle = (u64) addr;
+		*dma_handle = (u64)addr;
 	return addr;
 }
 
 static void hfi1_dma_free_coherent(struct ib_device *dev, size_t size,
 				   void *cpu_addr, u64 dma_handle)
 {
-	free_pages((unsigned long) cpu_addr, get_order(size));
+	free_pages((unsigned long)cpu_addr, get_order(size));
 }
 
 struct ib_dma_mapping_ops hfi1_dma_mapping_ops = {

diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c
index ee50bbf..34511e5 100644
--- a/drivers/staging/rdma/hfi1/driver.c
+++ b/drivers/staging/rdma/hfi1/driver.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -56,6 +53,7 @@
 #include <linux/vmalloc.h>
 #include <linux/module.h>
 #include <linux/prefetch.h>
+#include <rdma/ib_verbs.h>
 
 #include "hfi.h"
 #include "trace.h"
@@ -162,6 +160,22 @@
 	return iname;
 }
 
+const char *get_card_name(struct rvt_dev_info *rdi)
+{
+	struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
+	struct hfi1_devdata *dd = container_of(ibdev,
+					       struct hfi1_devdata, verbs_dev);
+	return get_unit_name(dd->unit);
+}
+
+struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi)
+{
+	struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
+	struct hfi1_devdata *dd = container_of(ibdev,
+					       struct hfi1_devdata, verbs_dev);
+	return dd->pcidev;
+}
+
 /*
  * Return count of units with at least one port ACTIVE.
  */
@@ -265,6 +279,8 @@
 	u32 rte = rhf_rcv_type_err(packet->rhf);
 	int lnh = be16_to_cpu(rhdr->lrh[0]) & 3;
 	struct hfi1_ibport *ibp = &ppd->ibport_data;
+	struct hfi1_devdata *dd = ppd->dd;
+	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 
 	if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR))
 		return;
@@ -283,9 +299,9 @@
 			goto drop;
 
 		/* Check for GRH */
-		if (lnh == HFI1_LRH_BTH)
+		if (lnh == HFI1_LRH_BTH) {
 			ohdr = &hdr->u.oth;
-		else if (lnh == HFI1_LRH_GRH) {
+		} else if (lnh == HFI1_LRH_GRH) {
 			u32 vtf;
 
 			ohdr = &hdr->u.l.oth;
@@ -295,17 +311,17 @@
 			if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
 				goto drop;
 			rcv_flags |= HFI1_HAS_GRH;
-		} else
+		} else {
 			goto drop;
-
+		}
 		/* Get the destination QP number. */
-		qp_num = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK;
-		if (lid < HFI1_MULTICAST_LID_BASE) {
-			struct hfi1_qp *qp;
+		qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
+		if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
+			struct rvt_qp *qp;
 			unsigned long flags;
 
 			rcu_read_lock();
-			qp = hfi1_lookup_qpn(ibp, qp_num);
+			qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
 			if (!qp) {
 				rcu_read_unlock();
 				goto drop;
@@ -318,9 +334,9 @@
 			spin_lock_irqsave(&qp->r_lock, flags);
 
 			/* Check for valid receive state. */
-			if (!(ib_hfi1_state_ops[qp->state] &
-			      HFI1_PROCESS_RECV_OK)) {
-				ibp->n_pkt_drops++;
+			if (!(ib_rvt_state_ops[qp->state] &
+			      RVT_PROCESS_RECV_OK)) {
+				ibp->rvp.n_pkt_drops++;
 			}
 
 			switch (qp->ibqp.qp_type) {
@@ -352,7 +368,7 @@
 		if (rhf_use_egr_bfr(packet->rhf))
 			ebuf = packet->ebuf;
 
-		if (ebuf == NULL)
+		if (!ebuf)
 			goto drop; /* this should never happen */
 
 		if (lnh == HFI1_LRH_BTH)
@@ -370,7 +386,7 @@
 			 * Only in pre-B0 h/w is the CNP_OPCODE handled
 			 * via this code path.
 			 */
-			struct hfi1_qp *qp = NULL;
+			struct rvt_qp *qp = NULL;
 			u32 lqpn, rqpn;
 			u16 rlid;
 			u8 svc_type, sl, sc5;
@@ -380,10 +396,10 @@
 				sc5 |= 0x10;
 			sl = ibp->sc_to_sl[sc5];
 
-			lqpn = be32_to_cpu(bth[1]) & HFI1_QPN_MASK;
+			lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK;
 			rcu_read_lock();
-			qp = hfi1_lookup_qpn(ibp, lqpn);
-			if (qp == NULL) {
+			qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn);
+			if (!qp) {
 				rcu_read_unlock();
 				goto drop;
 			}
@@ -419,9 +435,8 @@
 }
 
 static inline void init_packet(struct hfi1_ctxtdata *rcd,
-			      struct hfi1_packet *packet)
+			       struct hfi1_packet *packet)
 {
-
 	packet->rsize = rcd->rcvhdrqentsize; /* words */
 	packet->maxcnt = rcd->rcvhdrq_cnt * packet->rsize; /* words */
 	packet->rcd = rcd;
@@ -434,12 +449,7 @@
 	packet->rcv_flags = 0;
 }
 
-#ifndef CONFIG_PRESCAN_RXQ
-static void prescan_rxq(struct hfi1_packet *packet) {}
-#else /* !CONFIG_PRESCAN_RXQ */
-static int prescan_receive_queue;
-
-static void process_ecn(struct hfi1_qp *qp, struct hfi1_ib_header *hdr,
+static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr,
 			struct hfi1_other_headers *ohdr,
 			u64 rhf, u32 bth1, struct ib_grh *grh)
 {
@@ -453,7 +463,7 @@
 	case IB_QPT_GSI:
 	case IB_QPT_UD:
 		rlid = be16_to_cpu(hdr->lrh[3]);
-		rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & HFI1_QPN_MASK;
+		rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
 		svc_type = IB_CC_SVCTYPE_UD;
 		break;
 	case IB_QPT_UC:
@@ -483,7 +493,7 @@
 
 	if (bth1 & HFI1_BECN_SMASK) {
 		struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-		u32 lqpn = bth1 & HFI1_QPN_MASK;
+		u32 lqpn = bth1 & RVT_QPN_MASK;
 		u8 sl = ibp->sc_to_sl[sc5];
 
 		process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type);
@@ -562,26 +572,31 @@
  * containing Excplicit Congestion Notifications (FECNs, or BECNs).
  * When an ECN is found, process the Congestion Notification, and toggle
  * it off.
+ * This is declared as a macro to allow quick checking of the port to avoid
+ * the overhead of a function call if not enabled.
  */
-static void prescan_rxq(struct hfi1_packet *packet)
+#define prescan_rxq(rcd, packet) \
+	do { \
+		if (rcd->ppd->cc_prescan) \
+			__prescan_rxq(packet); \
+	} while (0)
+static void __prescan_rxq(struct hfi1_packet *packet)
 {
 	struct hfi1_ctxtdata *rcd = packet->rcd;
 	struct ps_mdata mdata;
 
-	if (!prescan_receive_queue)
-		return;
-
 	init_ps_mdata(&mdata, packet);
 
 	while (1) {
 		struct hfi1_devdata *dd = rcd->dd;
 		struct hfi1_ibport *ibp = &rcd->ppd->ibport_data;
-		__le32 *rhf_addr = (__le32 *) rcd->rcvhdrq + mdata.ps_head +
+		__le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
 					 dd->rhf_offset;
-		struct hfi1_qp *qp;
+		struct rvt_qp *qp;
 		struct hfi1_ib_header *hdr;
 		struct hfi1_other_headers *ohdr;
 		struct ib_grh *grh = NULL;
+		struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 		u64 rhf = rhf_to_cpu(rhf_addr);
 		u32 etype = rhf_rcv_type(rhf), qpn, bth1;
 		int is_ecn = 0;
@@ -600,25 +615,25 @@
 			hfi1_get_msgheader(dd, rhf_addr);
 		lnh = be16_to_cpu(hdr->lrh[0]) & 3;
 
-		if (lnh == HFI1_LRH_BTH)
+		if (lnh == HFI1_LRH_BTH) {
 			ohdr = &hdr->u.oth;
-		else if (lnh == HFI1_LRH_GRH) {
+		} else if (lnh == HFI1_LRH_GRH) {
 			ohdr = &hdr->u.l.oth;
 			grh = &hdr->u.l.grh;
-		} else
+		} else {
 			goto next; /* just in case */
-
+		}
 		bth1 = be32_to_cpu(ohdr->bth[1]);
 		is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK));
 
 		if (!is_ecn)
 			goto next;
 
-		qpn = bth1 & HFI1_QPN_MASK;
+		qpn = bth1 & RVT_QPN_MASK;
 		rcu_read_lock();
-		qp = hfi1_lookup_qpn(ibp, qpn);
+		qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn);
 
-		if (qp == NULL) {
+		if (!qp) {
 			rcu_read_unlock();
 			goto next;
 		}
@@ -633,7 +648,6 @@
 		update_ps_mdata(&mdata, rcd);
 	}
 }
-#endif /* CONFIG_PRESCAN_RXQ */
 
 static inline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
 {
@@ -683,8 +697,9 @@
 		 * The +2 is the size of the RHF.
 		 */
 		prefetch_range(packet->ebuf,
-			packet->tlen - ((packet->rcd->rcvhdrqentsize -
-				  (rhf_hdrq_offset(packet->rhf)+2)) * 4));
+			       packet->tlen - ((packet->rcd->rcvhdrqentsize -
+					       (rhf_hdrq_offset(packet->rhf)
+						+ 2)) * 4));
 	}
 
 	/*
@@ -712,7 +727,7 @@
 		}
 	}
 
-	packet->rhf_addr = (__le32 *) packet->rcd->rcvhdrq + packet->rhqoff +
+	packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
 				      packet->rcd->dd->rhf_offset;
 	packet->rhf = rhf_to_cpu(packet->rhf_addr);
 
@@ -737,7 +752,6 @@
 
 static inline void finish_packet(struct hfi1_packet *packet)
 {
-
 	/*
 	 * Nothing we need to free for the packet.
 	 *
@@ -746,14 +760,12 @@
 	 */
 	update_usrhead(packet->rcd, packet->rcd->head, packet->updegr,
 		       packet->etail, rcv_intr_dynamic, packet->numpkt);
-
 }
 
 static inline void process_rcv_qp_work(struct hfi1_packet *packet)
 {
-
 	struct hfi1_ctxtdata *rcd;
-	struct hfi1_qp *qp, *nqp;
+	struct rvt_qp *qp, *nqp;
 
 	rcd = packet->rcd;
 	rcd->head = packet->rhqoff;
@@ -764,17 +776,17 @@
 	 */
 	list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
 		list_del_init(&qp->rspwait);
-		if (qp->r_flags & HFI1_R_RSP_DEFERED_ACK) {
-			qp->r_flags &= ~HFI1_R_RSP_DEFERED_ACK;
+		if (qp->r_flags & RVT_R_RSP_NAK) {
+			qp->r_flags &= ~RVT_R_RSP_NAK;
 			hfi1_send_rc_ack(rcd, qp, 0);
 		}
-		if (qp->r_flags & HFI1_R_RSP_SEND) {
+		if (qp->r_flags & RVT_R_RSP_SEND) {
 			unsigned long flags;
 
-			qp->r_flags &= ~HFI1_R_RSP_SEND;
+			qp->r_flags &= ~RVT_R_RSP_SEND;
 			spin_lock_irqsave(&qp->s_lock, flags);
-			if (ib_hfi1_state_ops[qp->state] &
-					HFI1_PROCESS_OR_FLUSH_SEND)
+			if (ib_rvt_state_ops[qp->state] &
+					RVT_PROCESS_OR_FLUSH_SEND)
 				hfi1_schedule_send(qp);
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 		}
@@ -799,7 +811,7 @@
 		goto bail;
 	}
 
-	prescan_rxq(&packet);
+	prescan_rxq(rcd, &packet);
 
 	while (last == RCV_PKT_OK) {
 		last = process_rcv_packet(&packet, thread);
@@ -830,7 +842,7 @@
 	}
 	smp_rmb();  /* prevent speculative reads of dma'ed hdrq */
 
-	prescan_rxq(&packet);
+	prescan_rxq(rcd, &packet);
 
 	while (last == RCV_PKT_OK) {
 		last = process_rcv_packet(&packet, thread);
@@ -862,6 +874,37 @@
 			&handle_receive_interrupt_dma_rtail;
 }
 
+void set_all_slowpath(struct hfi1_devdata *dd)
+{
+	int i;
+
+	/* HFI1_CTRL_CTXT must always use the slow path interrupt handler */
+	for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
+		dd->rcd[i]->do_interrupt = &handle_receive_interrupt;
+}
+
+static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
+				      struct hfi1_packet packet,
+				      struct hfi1_devdata *dd)
+{
+	struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
+	struct hfi1_message_header *hdr = hfi1_get_msgheader(packet.rcd->dd,
+							     packet.rhf_addr);
+
+	if (hdr2sc(hdr, packet.rhf) != 0xf) {
+		int hwstate = read_logical_state(dd);
+
+		if (hwstate != LSTATE_ACTIVE) {
+			dd_dev_info(dd, "Unexpected link state %d\n", hwstate);
+			return 0;
+		}
+
+		queue_work(rcd->ppd->hfi1_wq, lsaw);
+		return 1;
+	}
+	return 0;
+}
+
 /*
  * handle_receive_interrupt - receive a packet
  * @rcd: the context
@@ -910,17 +953,17 @@
 		}
 	}
 
-	prescan_rxq(&packet);
+	prescan_rxq(rcd, &packet);
 
 	while (last == RCV_PKT_OK) {
-
-		if (unlikely(dd->do_drop && atomic_xchg(&dd->drop_packet,
-			DROP_PACKET_OFF) == DROP_PACKET_ON)) {
+		if (unlikely(dd->do_drop &&
+			     atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
+			     DROP_PACKET_ON)) {
 			dd->do_drop = 0;
 
 			/* On to the next packet */
 			packet.rhqoff += packet.rsize;
-			packet.rhf_addr = (__le32 *) rcd->rcvhdrq +
+			packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
 					  packet.rhqoff +
 					  dd->rhf_offset;
 			packet.rhf = rhf_to_cpu(packet.rhf_addr);
@@ -929,6 +972,11 @@
 			last = skip_rcv_packet(&packet, thread);
 			skip_pkt = 0;
 		} else {
+			/* Auto activate link on non-SC15 packet receive */
+			if (unlikely(rcd->ppd->host_link_state ==
+				     HLS_UP_ARMED) &&
+			    set_armed_to_active(rcd, packet, dd))
+				goto bail;
 			last = process_rcv_packet(&packet, thread);
 		}
 
@@ -940,8 +988,7 @@
 			if (seq != rcd->seq_cnt)
 				last = RCV_PKT_DONE;
 			if (needset) {
-				dd_dev_info(dd,
-					"Switching to NO_DMA_RTAIL\n");
+				dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n");
 				set_all_nodma_rtail(dd);
 				needset = 0;
 			}
@@ -984,6 +1031,42 @@
 }
 
 /*
+ * We may discover in the interrupt that the hardware link state has
+ * changed from ARMED to ACTIVE (due to the arrival of a non-SC15 packet),
+ * and we need to update the driver's notion of the link state.  We cannot
+ * run set_link_state from interrupt context, so we queue this function on
+ * a workqueue.
+ *
+ * We delay the regular interrupt processing until after the state changes
+ * so that the link will be in the correct state by the time any application
+ * we wake up attempts to send a reply to any message it received.
+ * (Subsequent receive interrupts may possibly force the wakeup before we
+ * update the link state.)
+ *
+ * The rcd is freed in hfi1_free_ctxtdata after hfi1_postinit_cleanup invokes
+ * dd->f_cleanup(dd) to disable the interrupt handler and flush workqueues,
+ * so we're safe from use-after-free of the rcd.
+ */
+void receive_interrupt_work(struct work_struct *work)
+{
+	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
+						  linkstate_active_work);
+	struct hfi1_devdata *dd = ppd->dd;
+	int i;
+
+	/* Received non-SC15 packet implies neighbor_normal */
+	ppd->neighbor_normal = 1;
+	set_link_state(ppd, HLS_UP_ACTIVE);
+
+	/*
+	 * Interrupt all kernel contexts that could have had an
+	 * interrupt during auto activation.
+	 */
+	for (i = HFI1_CTRL_CTXT; i < dd->first_user_ctxt; i++)
+		force_recv_intr(dd->rcd[i]);
+}
+
+/*
  * Convert a given MTU size to the on-wire MAD packet enumeration.
  * Return -1 if the size is invalid.
  */
@@ -1037,9 +1120,9 @@
 	ppd->ibmaxlen = ppd->ibmtu + lrh_max_header_bytes(ppd->dd);
 
 	mutex_lock(&ppd->hls_lock);
-	if (ppd->host_link_state == HLS_UP_INIT
-			|| ppd->host_link_state == HLS_UP_ARMED
-			|| ppd->host_link_state == HLS_UP_ACTIVE)
+	if (ppd->host_link_state == HLS_UP_INIT ||
+	    ppd->host_link_state == HLS_UP_ARMED ||
+	    ppd->host_link_state == HLS_UP_ACTIVE)
 		is_up = 1;
 
 	drain = !is_ax(dd) && is_up;
@@ -1082,79 +1165,80 @@
 	return 0;
 }
 
-/*
- * Following deal with the "obviously simple" task of overriding the state
- * of the LEDs, which normally indicate link physical and logical status.
- * The complications arise in dealing with different hardware mappings
- * and the board-dependent routine being called from interrupts.
- * and then there's the requirement to _flash_ them.
- */
-#define LED_OVER_FREQ_SHIFT 8
-#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
-/* Below is "non-zero" to force override, but both actual LEDs are off */
-#define LED_OVER_BOTH_OFF (8)
+void shutdown_led_override(struct hfi1_pportdata *ppd)
+{
+	struct hfi1_devdata *dd = ppd->dd;
+
+	/*
+	 * This pairs with the memory barrier in hfi1_start_led_override to
+	 * ensure that we read the correct state of LED beaconing represented
+	 * by led_override_timer_active
+	 */
+	smp_rmb();
+	if (atomic_read(&ppd->led_override_timer_active)) {
+		del_timer_sync(&ppd->led_override_timer);
+		atomic_set(&ppd->led_override_timer_active, 0);
+		/* Ensure the atomic_set is visible to all CPUs */
+		smp_wmb();
+	}
+
+	/* Hand control of the LED to the DC for normal operation */
+	write_csr(dd, DCC_CFG_LED_CNTRL, 0);
+}
 
 static void run_led_override(unsigned long opaque)
 {
 	struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)opaque;
 	struct hfi1_devdata *dd = ppd->dd;
-	int timeoff;
-	int ph_idx;
+	unsigned long timeout;
+	int phase_idx;
 
 	if (!(dd->flags & HFI1_INITTED))
 		return;
 
-	ph_idx = ppd->led_override_phase++ & 1;
-	ppd->led_override = ppd->led_override_vals[ph_idx];
-	timeoff = ppd->led_override_timeoff;
+	phase_idx = ppd->led_override_phase & 1;
 
-	/*
-	 * don't re-fire the timer if user asked for it to be off; we let
-	 * it fire one more time after they turn it off to simplify
-	 */
-	if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
-		mod_timer(&ppd->led_override_timer, jiffies + timeoff);
+	setextled(dd, phase_idx);
+
+	timeout = ppd->led_override_vals[phase_idx];
+
+	/* Set up for next phase */
+	ppd->led_override_phase = !ppd->led_override_phase;
+
+	mod_timer(&ppd->led_override_timer, jiffies + timeout);
 }
 
-void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int val)
+/*
+ * To have the LED blink in a particular pattern, provide timeon and timeoff
+ * in milliseconds.
+ * To turn off custom blinking and return to normal operation, use
+ * shutdown_led_override()
+ */
+void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
+			     unsigned int timeoff)
 {
-	struct hfi1_devdata *dd = ppd->dd;
-	int timeoff, freq;
-
-	if (!(dd->flags & HFI1_INITTED))
+	if (!(ppd->dd->flags & HFI1_INITTED))
 		return;
 
-	/* First check if we are blinking. If not, use 1HZ polling */
-	timeoff = HZ;
-	freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
+	/* Convert to jiffies for direct use in timer */
+	ppd->led_override_vals[0] = msecs_to_jiffies(timeoff);
+	ppd->led_override_vals[1] = msecs_to_jiffies(timeon);
 
-	if (freq) {
-		/* For blink, set each phase from one nybble of val */
-		ppd->led_override_vals[0] = val & 0xF;
-		ppd->led_override_vals[1] = (val >> 4) & 0xF;
-		timeoff = (HZ << 4)/freq;
-	} else {
-		/* Non-blink set both phases the same. */
-		ppd->led_override_vals[0] = val & 0xF;
-		ppd->led_override_vals[1] = val & 0xF;
-	}
-	ppd->led_override_timeoff = timeoff;
+	/* Arbitrarily start from LED on phase */
+	ppd->led_override_phase = 1;
 
 	/*
 	 * If the timer has not already been started, do so. Use a "quick"
-	 * timeout so the function will be called soon, to look at our request.
+	 * timeout so the handler will be called soon to look at our request.
 	 */
-	if (atomic_inc_return(&ppd->led_override_timer_active) == 1) {
-		/* Need to start timer */
+	if (!timer_pending(&ppd->led_override_timer)) {
 		setup_timer(&ppd->led_override_timer, run_led_override,
-				(unsigned long)ppd);
-
+			    (unsigned long)ppd);
 		ppd->led_override_timer.expires = jiffies + 1;
 		add_timer(&ppd->led_override_timer);
-	} else {
-		if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
-			mod_timer(&ppd->led_override_timer, jiffies + 1);
-		atomic_dec(&ppd->led_override_timer_active);
+		atomic_set(&ppd->led_override_timer_active, 1);
+		/* Ensure the atomic_set is visible to all CPUs */
+		smp_wmb();
 	}
 }
 
@@ -1184,8 +1268,8 @@
 
 	if (!dd->kregbase || !(dd->flags & HFI1_PRESENT)) {
 		dd_dev_info(dd,
-			"Invalid unit number %u or not initialized or not present\n",
-			unit);
+			    "Invalid unit number %u or not initialized or not present\n",
+			    unit);
 		ret = -ENXIO;
 		goto bail;
 	}
@@ -1203,14 +1287,8 @@
 
 	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
 		ppd = dd->pport + pidx;
-		if (atomic_read(&ppd->led_override_timer_active)) {
-			/* Need to stop LED timer, _then_ shut off LEDs */
-			del_timer_sync(&ppd->led_override_timer);
-			atomic_set(&ppd->led_override_timer_active, 0);
-		}
 
-		/* Shut off LEDs after we are sure timer is not running */
-		ppd->led_override = LED_OVER_BOTH_OFF;
+		shutdown_led_override(ppd);
 	}
 	if (dd->flags & HFI1_HAS_SEND_DMA)
 		sdma_exit(dd);
@@ -1221,11 +1299,11 @@
 
 	if (ret)
 		dd_dev_err(dd,
-			"Reinitialize unit %u after reset failed with %d\n",
-			unit, ret);
+			   "Reinitialize unit %u after reset failed with %d\n",
+			   unit, ret);
 	else
 		dd_dev_info(dd, "Reinitialized unit %u after resetting\n",
-			unit);
+			    unit);
 
 bail:
 	return ret;
@@ -1282,7 +1360,7 @@
 		handle_eflags(packet);
 
 	dd_dev_err(packet->rcd->dd,
-	   "Bypass packets are not supported in normal operation. Dropping\n");
+		   "Bypass packets are not supported in normal operation. Dropping\n");
 	return RHF_RCV_CONTINUE;
 }
 
@@ -1320,6 +1398,6 @@
 int process_receive_invalid(struct hfi1_packet *packet)
 {
 	dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n",
-		rhf_rcv_type(packet->rhf));
+		   rhf_rcv_type(packet->rhf));
 	return RHF_RCV_CONTINUE;
 }

diff --git a/drivers/staging/rdma/hfi1/efivar.c b/drivers/staging/rdma/hfi1/efivar.c
index 47dfe25..106349f 100644
--- a/drivers/staging/rdma/hfi1/efivar.c
+++ b/drivers/staging/rdma/hfi1/efivar.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:

diff --git a/drivers/staging/rdma/hfi1/efivar.h b/drivers/staging/rdma/hfi1/efivar.h
index 0707062..94e9e70 100644
--- a/drivers/staging/rdma/hfi1/efivar.h
+++ b/drivers/staging/rdma/hfi1/efivar.h

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:

diff --git a/drivers/staging/rdma/hfi1/eprom.c b/drivers/staging/rdma/hfi1/eprom.c
index fb620c9..bd87715 100644
--- a/drivers/staging/rdma/hfi1/eprom.c
+++ b/drivers/staging/rdma/hfi1/eprom.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -99,17 +96,17 @@
 
 /* sleep length while waiting for controller */
 #define WAIT_SLEEP_US 100	/* must be larger than 5 (see usage) */
-#define COUNT_DELAY_SEC(n) ((n) * (1000000/WAIT_SLEEP_US))
+#define COUNT_DELAY_SEC(n) ((n) * (1000000 / WAIT_SLEEP_US))
 
 /* GPIO pins */
-#define EPROM_WP_N (1ull << 14)	/* EPROM write line */
+#define EPROM_WP_N BIT_ULL(14)	/* EPROM write line */
 
 /*
- * Use the EP mutex to guard against other callers from within the driver.
- * Also covers usage of eprom_available.
+ * How long to wait for the EPROM to become available, in ms.
+ * The spec 32 Mb EPROM takes around 40s to erase then write.
+ * Double it for safety.
  */
-static DEFINE_MUTEX(eprom_mutex);
-static int eprom_available;	/* default: not available */
+#define EPROM_TIMEOUT 80000 /* ms */
 
 /*
  * Turn on external enable line that allows writing on the flash.
@@ -117,11 +114,9 @@
 static void write_enable(struct hfi1_devdata *dd)
 {
 	/* raise signal */
-	write_csr(dd, ASIC_GPIO_OUT,
-		read_csr(dd, ASIC_GPIO_OUT) | EPROM_WP_N);
+	write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) | EPROM_WP_N);
 	/* raise enable */
-	write_csr(dd, ASIC_GPIO_OE,
-		read_csr(dd, ASIC_GPIO_OE) | EPROM_WP_N);
+	write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) | EPROM_WP_N);
 }
 
 /*
@@ -130,11 +125,9 @@
 static void write_disable(struct hfi1_devdata *dd)
 {
 	/* lower signal */
-	write_csr(dd, ASIC_GPIO_OUT,
-		read_csr(dd, ASIC_GPIO_OUT) & ~EPROM_WP_N);
+	write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) & ~EPROM_WP_N);
 	/* lower enable */
-	write_csr(dd, ASIC_GPIO_OE,
-		read_csr(dd, ASIC_GPIO_OE) & ~EPROM_WP_N);
+	write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) & ~EPROM_WP_N);
 }
 
 /*
@@ -212,8 +205,8 @@
 	/* check the end points for the minimum erase */
 	if ((start & MASK_4KB) || (end & MASK_4KB)) {
 		dd_dev_err(dd,
-			"%s: non-aligned range (0x%x,0x%x) for a 4KB erase\n",
-			__func__, start, end);
+			   "%s: non-aligned range (0x%x,0x%x) for a 4KB erase\n",
+			   __func__, start, end);
 		return -EINVAL;
 	}
 
@@ -256,7 +249,7 @@
 	int i;
 
 	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset));
-	for (i = 0; i < EP_PAGE_SIZE/sizeof(u32); i++)
+	for (i = 0; i < EP_PAGE_SIZE / sizeof(u32); i++)
 		result[i] = (u32)read_csr(dd, ASIC_EEP_DATA);
 	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */
 }
@@ -267,7 +260,7 @@
 static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
 {
 	u32 offset;
-	u32 buffer[EP_PAGE_SIZE/sizeof(u32)];
+	u32 buffer[EP_PAGE_SIZE / sizeof(u32)];
 	int ret = 0;
 
 	/* reject anything not on an EPROM page boundary */
@@ -277,7 +270,7 @@
 	for (offset = 0; offset < len; offset += EP_PAGE_SIZE) {
 		read_page(dd, start + offset, buffer);
 		if (copy_to_user((void __user *)(addr + offset),
-						buffer, EP_PAGE_SIZE)) {
+				 buffer, EP_PAGE_SIZE)) {
 			ret = -EFAULT;
 			goto done;
 		}
@@ -298,7 +291,7 @@
 	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE);
 	write_csr(dd, ASIC_EEP_DATA, data[0]);
 	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_PAGE_PROGRAM(offset));
-	for (i = 1; i < EP_PAGE_SIZE/sizeof(u32); i++)
+	for (i = 1; i < EP_PAGE_SIZE / sizeof(u32); i++)
 		write_csr(dd, ASIC_EEP_DATA, data[i]);
 	/* will close the open page */
 	return wait_for_not_busy(dd);
@@ -310,7 +303,7 @@
 static int write_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
 {
 	u32 offset;
-	u32 buffer[EP_PAGE_SIZE/sizeof(u32)];
+	u32 buffer[EP_PAGE_SIZE / sizeof(u32)];
 	int ret = 0;
 
 	/* reject anything not on an EPROM page boundary */
@@ -321,7 +314,7 @@
 
 	for (offset = 0; offset < len; offset += EP_PAGE_SIZE) {
 		if (copy_from_user(buffer, (void __user *)(addr + offset),
-						EP_PAGE_SIZE)) {
+				   EP_PAGE_SIZE)) {
 			ret = -EFAULT;
 			goto done;
 		}
@@ -353,44 +346,42 @@
  *
  * Return 0 on success, -ERRNO on error
  */
-int handle_eprom_command(const struct hfi1_cmd *cmd)
+int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd)
 {
 	struct hfi1_devdata *dd;
 	u32 dev_id;
 	u32 rlen;	/* range length */
 	u32 rstart;	/* range start */
+	int i_minor;
 	int ret = 0;
 
 	/*
-	 * The EPROM is per-device, so use unit 0 as that will always
-	 * exist.
+	 * Map the device file to device data using the relative minor.
+	 * The device file minor number is the unit number + 1.  0 is
+	 * the generic device file - reject it.
 	 */
-	dd = hfi1_lookup(0);
+	i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
+	if (i_minor <= 0)
+		return -EINVAL;
+	dd = hfi1_lookup(i_minor - 1);
 	if (!dd) {
-		pr_err("%s: cannot find unit 0!\n", __func__);
+		pr_err("%s: cannot find unit %d!\n", __func__, i_minor);
 		return -EINVAL;
 	}
 
-	/* lock against other callers touching the ASIC block */
-	mutex_lock(&eprom_mutex);
+	/* some devices do not have an EPROM */
+	if (!dd->eprom_available)
+		return -EOPNOTSUPP;
 
-	/* some platforms do not have an EPROM */
-	if (!eprom_available) {
-		ret = -ENOSYS;
-		goto done_asic;
-	}
-
-	/* lock against the other HFI on another OS */
-	ret = acquire_hw_mutex(dd);
+	ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
 	if (ret) {
-		dd_dev_err(dd,
-			"%s: unable to acquire hw mutex, no EPROM support\n",
-			__func__);
+		dd_dev_err(dd, "%s: unable to acquire EPROM resource\n",
+			   __func__);
 		goto done_asic;
 	}
 
 	dd_dev_info(dd, "%s: cmd: type %d, len 0x%x, addr 0x%016llx\n",
-		__func__, cmd->type, cmd->len, cmd->addr);
+		    __func__, cmd->type, cmd->len, cmd->addr);
 
 	switch (cmd->type) {
 	case HFI1_CMD_EP_INFO:
@@ -401,7 +392,7 @@
 		dev_id = read_device_id(dd);
 		/* addr points to a u32 user buffer */
 		if (copy_to_user((void __user *)cmd->addr, &dev_id,
-								sizeof(u32)))
+				 sizeof(u32)))
 			ret = -EFAULT;
 		break;
 
@@ -429,14 +420,13 @@
 
 	default:
 		dd_dev_err(dd, "%s: unexpected command %d\n",
-			__func__, cmd->type);
+			   __func__, cmd->type);
 		ret = -EINVAL;
 		break;
 	}
 
-	release_hw_mutex(dd);
+	release_chip_resource(dd, CR_EPROM);
 done_asic:
-	mutex_unlock(&eprom_mutex);
 	return ret;
 }
 
@@ -447,44 +437,35 @@
 {
 	int ret = 0;
 
-	/* only the discrete chip has an EPROM, nothing to do */
+	/* only the discrete chip has an EPROM */
 	if (dd->pcidev->device != PCI_DEVICE_ID_INTEL0)
 		return 0;
 
-	/* lock against other callers */
-	mutex_lock(&eprom_mutex);
-	if (eprom_available)	/* already initialized */
-		goto done_asic;
-
 	/*
-	 * Lock against the other HFI on another OS - the mutex above
-	 * would have caught anything in this driver.  It is OK if
-	 * both OSes reset the EPROM - as long as they don't do it at
-	 * the same time.
+	 * It is OK if both HFIs reset the EPROM as long as they don't
+	 * do it at the same time.
 	 */
-	ret = acquire_hw_mutex(dd);
+	ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
 	if (ret) {
 		dd_dev_err(dd,
-			"%s: unable to acquire hw mutex, no EPROM support\n",
-			__func__);
+			   "%s: unable to acquire EPROM resource, no EPROM support\n",
+			   __func__);
 		goto done_asic;
 	}
 
 	/* reset EPROM to be sure it is in a good state */
 
 	/* set reset */
-	write_csr(dd, ASIC_EEP_CTL_STAT,
-					ASIC_EEP_CTL_STAT_EP_RESET_SMASK);
+	write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_EP_RESET_SMASK);
 	/* clear reset, set speed */
 	write_csr(dd, ASIC_EEP_CTL_STAT,
-			EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT);
+		  EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT);
 
 	/* wake the device with command "release powerdown NoID" */
 	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_RELEASE_POWERDOWN_NOID);
 
-	eprom_available = 1;
-	release_hw_mutex(dd);
+	dd->eprom_available = true;
+	release_chip_resource(dd, CR_EPROM);
 done_asic:
-	mutex_unlock(&eprom_mutex);
 	return ret;
 }

diff --git a/drivers/staging/rdma/hfi1/eprom.h b/drivers/staging/rdma/hfi1/eprom.h
index 64a6427..d41f0b1 100644
--- a/drivers/staging/rdma/hfi1/eprom.h
+++ b/drivers/staging/rdma/hfi1/eprom.h

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -52,4 +49,4 @@
 struct hfi1_devdata;
 
 int eprom_init(struct hfi1_devdata *dd);
-int handle_eprom_command(const struct hfi1_cmd *cmd);
+int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd);

diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c
index 8b911e8..8396dc5 100644
--- a/drivers/staging/rdma/hfi1/file_ops.c
+++ b/drivers/staging/rdma/hfi1/file_ops.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -60,6 +57,8 @@
 #include "user_sdma.h"
 #include "user_exp_rcv.h"
 #include "eprom.h"
+#include "aspm.h"
+#include "mmu_rb.h"
 
 #undef pr_fmt
 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -96,9 +95,6 @@
 static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16);
 static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int);
 static int vma_fault(struct vm_area_struct *, struct vm_fault *);
-static int exp_tid_setup(struct file *, struct hfi1_tid_info *);
-static int exp_tid_free(struct file *, struct hfi1_tid_info *);
-static void unlock_exp_tids(struct hfi1_ctxtdata *);
 
 static const struct file_operations hfi1_file_ops = {
 	.owner = THIS_MODULE,
@@ -164,7 +160,6 @@
 #define dbg(fmt, ...)				\
 	pr_info(fmt, ##__VA_ARGS__)
 
-
 static inline int is_valid_mmap(u64 token)
 {
 	return (HFI1_MMAP_TOKEN_GET(MAGIC, token) == HFI1_MMAP_MAGIC);
@@ -188,6 +183,7 @@
 	struct hfi1_cmd cmd;
 	struct hfi1_user_info uinfo;
 	struct hfi1_tid_info tinfo;
+	unsigned long addr;
 	ssize_t consumed = 0, copy = 0, ret = 0;
 	void *dest = NULL;
 	__u64 user_val = 0;
@@ -219,6 +215,7 @@
 		break;
 	case HFI1_CMD_TID_UPDATE:
 	case HFI1_CMD_TID_FREE:
+	case HFI1_CMD_TID_INVAL_READ:
 		copy = sizeof(tinfo);
 		dest = &tinfo;
 		break;
@@ -294,9 +291,8 @@
 			sc_return_credits(uctxt->sc);
 		break;
 	case HFI1_CMD_TID_UPDATE:
-		ret = exp_tid_setup(fp, &tinfo);
+		ret = hfi1_user_exp_rcv_setup(fp, &tinfo);
 		if (!ret) {
-			unsigned long addr;
 			/*
 			 * Copy the number of tidlist entries we used
 			 * and the length of the buffer we registered.
@@ -311,8 +307,25 @@
 				ret = -EFAULT;
 		}
 		break;
+	case HFI1_CMD_TID_INVAL_READ:
+		ret = hfi1_user_exp_rcv_invalid(fp, &tinfo);
+		if (ret)
+			break;
+		addr = (unsigned long)cmd.addr +
+			offsetof(struct hfi1_tid_info, tidcnt);
+		if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
+				 sizeof(tinfo.tidcnt)))
+			ret = -EFAULT;
+		break;
 	case HFI1_CMD_TID_FREE:
-		ret = exp_tid_free(fp, &tinfo);
+		ret = hfi1_user_exp_rcv_clear(fp, &tinfo);
+		if (ret)
+			break;
+		addr = (unsigned long)cmd.addr +
+			offsetof(struct hfi1_tid_info, tidcnt);
+		if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
+				 sizeof(tinfo.tidcnt)))
+			ret = -EFAULT;
 		break;
 	case HFI1_CMD_RECV_CTRL:
 		ret = manage_rcvq(uctxt, fd->subctxt, (int)user_val);
@@ -373,8 +386,10 @@
 				break;
 			}
 			if (dd->flags & HFI1_FORCED_FREEZE) {
-				/* Don't allow context reset if we are into
-				 * forced freeze */
+				/*
+				 * Don't allow context reset if we are into
+				 * forced freeze
+				 */
 				ret = -ENODEV;
 				break;
 			}
@@ -382,8 +397,9 @@
 			ret = sc_enable(sc);
 			hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB,
 				     uctxt->ctxt);
-		} else
+		} else {
 			ret = sc_restart(sc);
+		}
 		if (!ret)
 			sc_return_credits(sc);
 		break;
@@ -393,7 +409,7 @@
 	case HFI1_CMD_EP_ERASE_RANGE:
 	case HFI1_CMD_EP_READ_RANGE:
 	case HFI1_CMD_EP_WRITE_RANGE:
-		ret = handle_eprom_command(&cmd);
+		ret = handle_eprom_command(fp, &cmd);
 		break;
 	}
 
@@ -732,6 +748,9 @@
 	/* drain user sdma queue */
 	hfi1_user_sdma_free_queues(fdata);
 
+	/* release the cpu */
+	hfi1_put_proc_affinity(dd, fdata->rec_cpu_num);
+
 	/*
 	 * Clear any left over, unhandled events so the next process that
 	 * gets this context doesn't get confused.
@@ -755,6 +774,7 @@
 	hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
 		     HFI1_RCVCTRL_TIDFLOW_DIS |
 		     HFI1_RCVCTRL_INTRAVAIL_DIS |
+		     HFI1_RCVCTRL_TAILUPD_DIS |
 		     HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
 		     HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
 		     HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
@@ -777,14 +797,12 @@
 	uctxt->pionowait = 0;
 	uctxt->event_flags = 0;
 
-	hfi1_clear_tids(uctxt);
+	hfi1_user_exp_rcv_free(fdata);
 	hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
 
-	if (uctxt->tid_pg_list)
-		unlock_exp_tids(uctxt);
-
 	hfi1_stats.sps_ctxts--;
-	dd->freectxts++;
+	if (++dd->freectxts == dd->num_user_contexts)
+		aspm_enable_all(dd);
 	mutex_unlock(&hfi1_mutex);
 	hfi1_free_ctxtdata(dd, uctxt);
 done:
@@ -826,8 +844,16 @@
 
 	mutex_lock(&hfi1_mutex);
 	/* First, lets check if we need to setup a shared context? */
-	if (uinfo->subctxt_cnt)
+	if (uinfo->subctxt_cnt) {
+		struct hfi1_filedata *fd = fp->private_data;
+
 		ret = find_shared_ctxt(fp, uinfo);
+		if (ret < 0)
+			goto done_unlock;
+		if (ret)
+			fd->rec_cpu_num = hfi1_get_proc_affinity(
+				fd->uctxt->dd, fd->uctxt->numa_id);
+	}
 
 	/*
 	 * We execute the following block if we couldn't find a
@@ -837,6 +863,7 @@
 		i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
 		ret = get_user_context(fp, uinfo, i_minor - 1, alg);
 	}
+done_unlock:
 	mutex_unlock(&hfi1_mutex);
 done:
 	return ret;
@@ -962,7 +989,7 @@
 	struct hfi1_filedata *fd = fp->private_data;
 	struct hfi1_ctxtdata *uctxt;
 	unsigned ctxt;
-	int ret;
+	int ret, numa;
 
 	if (dd->flags & HFI1_FROZEN) {
 		/*
@@ -982,17 +1009,26 @@
 	if (ctxt == dd->num_rcv_contexts)
 		return -EBUSY;
 
-	uctxt = hfi1_create_ctxtdata(dd->pport, ctxt);
+	fd->rec_cpu_num = hfi1_get_proc_affinity(dd, -1);
+	if (fd->rec_cpu_num != -1)
+		numa = cpu_to_node(fd->rec_cpu_num);
+	else
+		numa = numa_node_id();
+	uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, numa);
 	if (!uctxt) {
 		dd_dev_err(dd,
 			   "Unable to allocate ctxtdata memory, failing open\n");
 		return -ENOMEM;
 	}
+	hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)",
+		  uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num,
+		  uctxt->numa_id);
+
 	/*
 	 * Allocate and enable a PIO send context.
 	 */
 	uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize,
-			     uctxt->numa_id);
+			     uctxt->dd->node);
 	if (!uctxt->sc)
 		return -ENOMEM;
 
@@ -1026,7 +1062,12 @@
 	INIT_LIST_HEAD(&uctxt->sdma_queues);
 	spin_lock_init(&uctxt->sdma_qlock);
 	hfi1_stats.sps_ctxts++;
-	dd->freectxts--;
+	/*
+	 * Disable ASPM when there are open user/PSM contexts to avoid
+	 * issues with ASPM L1 exit latency
+	 */
+	if (dd->freectxts-- == dd->num_user_contexts)
+		aspm_disable_all(dd);
 	fd->uctxt = uctxt;
 
 	return 0;
@@ -1035,22 +1076,19 @@
 static int init_subctxts(struct hfi1_ctxtdata *uctxt,
 			 const struct hfi1_user_info *uinfo)
 {
-	int ret = 0;
 	unsigned num_subctxts;
 
 	num_subctxts = uinfo->subctxt_cnt;
-	if (num_subctxts > HFI1_MAX_SHARED_CTXTS) {
-		ret = -EINVAL;
-		goto bail;
-	}
+	if (num_subctxts > HFI1_MAX_SHARED_CTXTS)
+		return -EINVAL;
 
 	uctxt->subctxt_cnt = uinfo->subctxt_cnt;
 	uctxt->subctxt_id = uinfo->subctxt_id;
 	uctxt->active_slaves = 1;
 	uctxt->redirect_seq_cnt = 1;
 	set_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
-bail:
-	return ret;
+
+	return 0;
 }
 
 static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
@@ -1105,10 +1143,10 @@
 	 * has done it.
 	 */
 	if (fd->subctxt) {
-		ret = wait_event_interruptible(uctxt->wait,
-			!test_bit(HFI1_CTXT_MASTER_UNINIT,
-			&uctxt->event_flags));
-		goto done;
+		ret = wait_event_interruptible(uctxt->wait, !test_bit(
+					       HFI1_CTXT_MASTER_UNINIT,
+					       &uctxt->event_flags));
+		goto expected;
 	}
 
 	/* initialize poll variables... */
@@ -1146,8 +1184,16 @@
 		rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
 	if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
 		rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
+	/*
+	 * The RcvCtxtCtrl.TailUpd bit has to be explicitly written.
+	 * We can't rely on the correct value to be set from prior
+	 * uses of the chip or ctxt. Therefore, add the rcvctrl op
+	 * for both cases.
+	 */
 	if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
 		rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
+	else
+		rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS;
 	hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
 
 	/* Notify any waiting slaves */
@@ -1155,8 +1201,18 @@
 		clear_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
 		wake_up(&uctxt->wait);
 	}
-	ret = 0;
 
+expected:
+	/*
+	 * Expected receive has to be setup for all processes (including
+	 * shared contexts). However, it has to be done after the master
+	 * context has been fully configured as it depends on the
+	 * eager/expected split of the RcvArray entries.
+	 * Setting it up here ensures that the subcontexts will be waiting
+	 * (due to the above wait_event_interruptible() until the master
+	 * is setup.
+	 */
+	ret = hfi1_user_exp_rcv_init(fp);
 done:
 	return ret;
 }
@@ -1226,46 +1282,6 @@
 			if (ret)
 				goto done;
 		}
-		/* Setup Expected Rcv memories */
-		uctxt->tid_pg_list = vzalloc(uctxt->expected_count *
-					     sizeof(struct page **));
-		if (!uctxt->tid_pg_list) {
-			ret = -ENOMEM;
-			goto done;
-		}
-		uctxt->physshadow = vzalloc(uctxt->expected_count *
-					    sizeof(*uctxt->physshadow));
-		if (!uctxt->physshadow) {
-			ret = -ENOMEM;
-			goto done;
-		}
-		/* allocate expected TID map and initialize the cursor */
-		atomic_set(&uctxt->tidcursor, 0);
-		uctxt->numtidgroups = uctxt->expected_count /
-			dd->rcv_entries.group_size;
-		uctxt->tidmapcnt = uctxt->numtidgroups / BITS_PER_LONG +
-			!!(uctxt->numtidgroups % BITS_PER_LONG);
-		uctxt->tidusemap = kzalloc_node(uctxt->tidmapcnt *
-						sizeof(*uctxt->tidusemap),
-						GFP_KERNEL, uctxt->numa_id);
-		if (!uctxt->tidusemap) {
-			ret = -ENOMEM;
-			goto done;
-		}
-		/*
-		 * In case that the number of groups is not a multiple of
-		 * 64 (the number of groups in a tidusemap element), mark
-		 * the extra ones as used. This will effectively make them
-		 * permanently used and should never be assigned. Otherwise,
-		 * the code which checks how many free groups we have will
-		 * get completely confused about the state of the bits.
-		 */
-		if (uctxt->numtidgroups % BITS_PER_LONG)
-			uctxt->tidusemap[uctxt->tidmapcnt - 1] =
-				~((1ULL << (uctxt->numtidgroups %
-					    BITS_PER_LONG)) - 1);
-		trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 0,
-				       uctxt->tidusemap, uctxt->tidmapcnt);
 	}
 	ret = hfi1_user_sdma_alloc_queues(uctxt, fp);
 	if (ret)
@@ -1391,8 +1407,9 @@
 		set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags);
 		hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt->ctxt);
 		pollflag = 0;
-	} else
+	} else {
 		pollflag = POLLIN | POLLRDNORM;
+	}
 	spin_unlock_irq(&dd->uctxt_lock);
 
 	return pollflag;
@@ -1470,8 +1487,9 @@
 		if (uctxt->rcvhdrtail_kvaddr)
 			clear_rcvhdrtail(uctxt);
 		rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB;
-	} else
+	} else {
 		rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS;
+	}
 	hfi1_rcvctrl(dd, rcvctrl_op, uctxt->ctxt);
 	/* always; new head should be equal to new tail; see above */
 bail:
@@ -1504,367 +1522,6 @@
 	return 0;
 }
 
-#define num_user_pages(vaddr, len)					\
-	(1 + (((((unsigned long)(vaddr) +				\
-		 (unsigned long)(len) - 1) & PAGE_MASK) -		\
-	       ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT))
-
-/**
- * tzcnt - count the number of trailing zeros in a 64bit value
- * @value: the value to be examined
- *
- * Returns the number of trailing least significant zeros in the
- * the input value. If the value is zero, return the number of
- * bits of the value.
- */
-static inline u8 tzcnt(u64 value)
-{
-	return value ? __builtin_ctzl(value) : sizeof(value) * 8;
-}
-
-static inline unsigned num_free_groups(unsigned long map, u16 *start)
-{
-	unsigned free;
-	u16 bitidx = *start;
-
-	if (bitidx >= BITS_PER_LONG)
-		return 0;
-	/* "Turn off" any bits set before our bit index */
-	map &= ~((1ULL << bitidx) - 1);
-	free = tzcnt(map) - bitidx;
-	while (!free && bitidx < BITS_PER_LONG) {
-		/* Zero out the last set bit so we look at the rest */
-		map &= ~(1ULL << bitidx);
-		/*
-		 * Account for the previously checked bits and advance
-		 * the bit index. We don't have to check for bitidx
-		 * getting bigger than BITS_PER_LONG here as it would
-		 * mean extra instructions that we don't need. If it
-		 * did happen, it would push free to a negative value
-		 * which will break the loop.
-		 */
-		free = tzcnt(map) - ++bitidx;
-	}
-	*start = bitidx;
-	return free;
-}
-
-static int exp_tid_setup(struct file *fp, struct hfi1_tid_info *tinfo)
-{
-	int ret = 0;
-	struct hfi1_filedata *fd = fp->private_data;
-	struct hfi1_ctxtdata *uctxt = fd->uctxt;
-	struct hfi1_devdata *dd = uctxt->dd;
-	unsigned tid, mapped = 0, npages, ngroups, exp_groups,
-		tidpairs = uctxt->expected_count / 2;
-	struct page **pages;
-	unsigned long vaddr, tidmap[uctxt->tidmapcnt];
-	dma_addr_t *phys;
-	u32 tidlist[tidpairs], pairidx = 0, tidcursor;
-	u16 useidx, idx, bitidx, tidcnt = 0;
-
-	vaddr = tinfo->vaddr;
-
-	if (offset_in_page(vaddr)) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	npages = num_user_pages(vaddr, tinfo->length);
-	if (!npages) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	if (!access_ok(VERIFY_WRITE, (void __user *)vaddr,
-		       npages * PAGE_SIZE)) {
-		dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n",
-			   (void *)vaddr, npages);
-		ret = -EFAULT;
-		goto bail;
-	}
-
-	memset(tidmap, 0, sizeof(tidmap[0]) * uctxt->tidmapcnt);
-	memset(tidlist, 0, sizeof(tidlist[0]) * tidpairs);
-
-	exp_groups = uctxt->expected_count / dd->rcv_entries.group_size;
-	/* which group set do we look at first? */
-	tidcursor = atomic_read(&uctxt->tidcursor);
-	useidx = (tidcursor >> 16) & 0xffff;
-	bitidx = tidcursor & 0xffff;
-
-	/*
-	 * Keep going until we've mapped all pages or we've exhausted all
-	 * RcvArray entries.
-	 * This iterates over the number of tidmaps + 1
-	 * (idx <= uctxt->tidmapcnt) so we check the bitmap which we
-	 * started from one more time for any free bits before the
-	 * starting point bit.
-	 */
-	for (mapped = 0, idx = 0;
-	     mapped < npages && idx <= uctxt->tidmapcnt;) {
-		u64 i, offset = 0;
-		unsigned free, pinned, pmapped = 0, bits_used;
-		u16 grp;
-
-		/*
-		 * "Reserve" the needed group bits under lock so other
-		 * processes can't step in the middle of it. Once
-		 * reserved, we don't need the lock anymore since we
-		 * are guaranteed the groups.
-		 */
-		spin_lock(&uctxt->exp_lock);
-		if (uctxt->tidusemap[useidx] == -1ULL ||
-		    bitidx >= BITS_PER_LONG) {
-			/* no free groups in the set, use the next */
-			useidx = (useidx + 1) % uctxt->tidmapcnt;
-			idx++;
-			bitidx = 0;
-			spin_unlock(&uctxt->exp_lock);
-			continue;
-		}
-		ngroups = ((npages - mapped) / dd->rcv_entries.group_size) +
-			!!((npages - mapped) % dd->rcv_entries.group_size);
-
-		/*
-		 * If we've gotten here, the current set of groups does have
-		 * one or more free groups.
-		 */
-		free = num_free_groups(uctxt->tidusemap[useidx], &bitidx);
-		if (!free) {
-			/*
-			 * Despite the check above, free could still come back
-			 * as 0 because we don't check the entire bitmap but
-			 * we start from bitidx.
-			 */
-			spin_unlock(&uctxt->exp_lock);
-			continue;
-		}
-		bits_used = min(free, ngroups);
-		tidmap[useidx] |= ((1ULL << bits_used) - 1) << bitidx;
-		uctxt->tidusemap[useidx] |= tidmap[useidx];
-		spin_unlock(&uctxt->exp_lock);
-
-		/*
-		 * At this point, we know where in the map we have free bits.
-		 * properly offset into the various "shadow" arrays and compute
-		 * the RcvArray entry index.
-		 */
-		offset = ((useidx * BITS_PER_LONG) + bitidx) *
-			dd->rcv_entries.group_size;
-		pages = uctxt->tid_pg_list + offset;
-		phys = uctxt->physshadow + offset;
-		tid = uctxt->expected_base + offset;
-
-		/* Calculate how many pages we can pin based on free bits */
-		pinned = min((bits_used * dd->rcv_entries.group_size),
-			     (npages - mapped));
-		/*
-		 * Now that we know how many free RcvArray entries we have,
-		 * we can pin that many user pages.
-		 */
-		ret = hfi1_acquire_user_pages(vaddr + (mapped * PAGE_SIZE),
-					      pinned, true, pages);
-		if (ret) {
-			/*
-			 * We can't continue because the pages array won't be
-			 * initialized. This should never happen,
-			 * unless perhaps the user has mpin'ed the pages
-			 * themselves.
-			 */
-			dd_dev_info(dd,
-				    "Failed to lock addr %p, %u pages: errno %d\n",
-				    (void *) vaddr, pinned, -ret);
-			/*
-			 * Let go of the bits that we reserved since we are not
-			 * going to use them.
-			 */
-			spin_lock(&uctxt->exp_lock);
-			uctxt->tidusemap[useidx] &=
-				~(((1ULL << bits_used) - 1) << bitidx);
-			spin_unlock(&uctxt->exp_lock);
-			goto done;
-		}
-		/*
-		 * How many groups do we need based on how many pages we have
-		 * pinned?
-		 */
-		ngroups = (pinned / dd->rcv_entries.group_size) +
-			!!(pinned % dd->rcv_entries.group_size);
-		/*
-		 * Keep programming RcvArray entries for all the <ngroups> free
-		 * groups.
-		 */
-		for (i = 0, grp = 0; grp < ngroups; i++, grp++) {
-			unsigned j;
-			u32 pair_size = 0, tidsize;
-			/*
-			 * This inner loop will program an entire group or the
-			 * array of pinned pages (which ever limit is hit
-			 * first).
-			 */
-			for (j = 0; j < dd->rcv_entries.group_size &&
-				     pmapped < pinned; j++, pmapped++, tid++) {
-				tidsize = PAGE_SIZE;
-				phys[pmapped] = hfi1_map_page(dd->pcidev,
-						   pages[pmapped], 0,
-						   tidsize, PCI_DMA_FROMDEVICE);
-				trace_hfi1_exp_rcv_set(uctxt->ctxt,
-						       fd->subctxt,
-						       tid, vaddr,
-						       phys[pmapped],
-						       pages[pmapped]);
-				/*
-				 * Each RcvArray entry is programmed with one
-				 * page * worth of memory. This will handle
-				 * the 8K MTU as well as anything smaller
-				 * due to the fact that both entries in the
-				 * RcvTidPair are programmed with a page.
-				 * PSM currently does not handle anything
-				 * bigger than 8K MTU, so should we even worry
-				 * about 10K here?
-				 */
-				hfi1_put_tid(dd, tid, PT_EXPECTED,
-					     phys[pmapped],
-					     ilog2(tidsize >> PAGE_SHIFT) + 1);
-				pair_size += tidsize >> PAGE_SHIFT;
-				EXP_TID_RESET(tidlist[pairidx], LEN, pair_size);
-				if (!(tid % 2)) {
-					tidlist[pairidx] |=
-					   EXP_TID_SET(IDX,
-						(tid - uctxt->expected_base)
-						       / 2);
-					tidlist[pairidx] |=
-						EXP_TID_SET(CTRL, 1);
-					tidcnt++;
-				} else {
-					tidlist[pairidx] |=
-						EXP_TID_SET(CTRL, 2);
-					pair_size = 0;
-					pairidx++;
-				}
-			}
-			/*
-			 * We've programmed the entire group (or as much of the
-			 * group as we'll use. Now, it's time to push it out...
-			 */
-			flush_wc();
-		}
-		mapped += pinned;
-		atomic_set(&uctxt->tidcursor,
-			   (((useidx & 0xffffff) << 16) |
-			    ((bitidx + bits_used) & 0xffffff)));
-	}
-	trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 0, uctxt->tidusemap,
-			       uctxt->tidmapcnt);
-
-done:
-	/* If we've mapped anything, copy relevant info to user */
-	if (mapped) {
-		if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist,
-				 tidlist, sizeof(tidlist[0]) * tidcnt)) {
-			ret = -EFAULT;
-			goto done;
-		}
-		/* copy TID info to user */
-		if (copy_to_user((void __user *)(unsigned long)tinfo->tidmap,
-				 tidmap, sizeof(tidmap[0]) * uctxt->tidmapcnt))
-			ret = -EFAULT;
-	}
-bail:
-	/*
-	 * Calculate mapped length. New Exp TID protocol does not "unwind" and
-	 * report an error if it can't map the entire buffer. It just reports
-	 * the length that was mapped.
-	 */
-	tinfo->length = mapped * PAGE_SIZE;
-	tinfo->tidcnt = tidcnt;
-	return ret;
-}
-
-static int exp_tid_free(struct file *fp, struct hfi1_tid_info *tinfo)
-{
-	struct hfi1_filedata *fd = fp->private_data;
-	struct hfi1_ctxtdata *uctxt = fd->uctxt;
-	struct hfi1_devdata *dd = uctxt->dd;
-	unsigned long tidmap[uctxt->tidmapcnt];
-	struct page **pages;
-	dma_addr_t *phys;
-	u16 idx, bitidx, tid;
-	int ret = 0;
-
-	if (copy_from_user(&tidmap, (void __user *)(unsigned long)
-			   tinfo->tidmap,
-			   sizeof(tidmap[0]) * uctxt->tidmapcnt)) {
-		ret = -EFAULT;
-		goto done;
-	}
-	for (idx = 0; idx < uctxt->tidmapcnt; idx++) {
-		unsigned long map;
-
-		bitidx = 0;
-		if (!tidmap[idx])
-			continue;
-		map = tidmap[idx];
-		while ((bitidx = tzcnt(map)) < BITS_PER_LONG) {
-			int i, pcount = 0;
-			struct page *pshadow[dd->rcv_entries.group_size];
-			unsigned offset = ((idx * BITS_PER_LONG) + bitidx) *
-				dd->rcv_entries.group_size;
-
-			pages = uctxt->tid_pg_list + offset;
-			phys = uctxt->physshadow + offset;
-			tid = uctxt->expected_base + offset;
-			for (i = 0; i < dd->rcv_entries.group_size;
-			     i++, tid++) {
-				if (pages[i]) {
-					hfi1_put_tid(dd, tid, PT_INVALID,
-						      0, 0);
-					trace_hfi1_exp_rcv_free(uctxt->ctxt,
-								fd->subctxt,
-								tid, phys[i],
-								pages[i]);
-					pci_unmap_page(dd->pcidev, phys[i],
-					      PAGE_SIZE, PCI_DMA_FROMDEVICE);
-					pshadow[pcount] = pages[i];
-					pages[i] = NULL;
-					pcount++;
-					phys[i] = 0;
-				}
-			}
-			flush_wc();
-			hfi1_release_user_pages(pshadow, pcount, true);
-			clear_bit(bitidx, &uctxt->tidusemap[idx]);
-			map &= ~(1ULL<<bitidx);
-		}
-	}
-	trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 1, uctxt->tidusemap,
-			       uctxt->tidmapcnt);
-done:
-	return ret;
-}
-
-static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt)
-{
-	struct hfi1_devdata *dd = uctxt->dd;
-	unsigned tid;
-
-	dd_dev_info(dd, "ctxt %u unlocking any locked expTID pages\n",
-		    uctxt->ctxt);
-	for (tid = 0; tid < uctxt->expected_count; tid++) {
-		struct page *p = uctxt->tid_pg_list[tid];
-		dma_addr_t phys;
-
-		if (!p)
-			continue;
-
-		phys = uctxt->physshadow[tid];
-		uctxt->physshadow[tid] = 0;
-		uctxt->tid_pg_list[tid] = NULL;
-		pci_unmap_page(dd->pcidev, phys, PAGE_SIZE, PCI_DMA_FROMDEVICE);
-		hfi1_release_user_pages(&p, 1, true);
-	}
-}
-
 static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
 			 u16 pkey)
 {
@@ -1933,10 +1590,9 @@
 	return filp->f_pos;
 }
 
-
 /* NOTE: assumes unsigned long is 8 bytes */
 static ssize_t ui_read(struct file *filp, char __user *buf, size_t count,
-			loff_t *f_pos)
+		       loff_t *f_pos)
 {
 	struct hfi1_devdata *dd = filp->private_data;
 	void __iomem *base = dd->kregbase;
@@ -1972,12 +1628,12 @@
 		 * them.  These registers are defined as having a read value
 		 * of 0.
 		 */
-		else if (csr_off == ASIC_GPIO_CLEAR
-				|| csr_off == ASIC_GPIO_FORCE
-				|| csr_off == ASIC_QSFP1_CLEAR
-				|| csr_off == ASIC_QSFP1_FORCE
-				|| csr_off == ASIC_QSFP2_CLEAR
-				|| csr_off == ASIC_QSFP2_FORCE)
+		else if (csr_off == ASIC_GPIO_CLEAR ||
+			 csr_off == ASIC_GPIO_FORCE ||
+			 csr_off == ASIC_QSFP1_CLEAR ||
+			 csr_off == ASIC_QSFP1_FORCE ||
+			 csr_off == ASIC_QSFP2_CLEAR ||
+			 csr_off == ASIC_QSFP2_FORCE)
 			data = 0;
 		else if (csr_off >= barlen) {
 			/*

diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c
index 28ae42f..3040162 100644
--- a/drivers/staging/rdma/hfi1/firmware.c
+++ b/drivers/staging/rdma/hfi1/firmware.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -77,7 +74,13 @@
 static uint fw_fabric_serdes_load = 1;
 static uint fw_pcie_serdes_load = 1;
 static uint fw_sbus_load = 1;
-static uint platform_config_load = 1;
+
+/*
+ * Access required in platform.c
+ * Maintains state of whether the platform config was fetched via the
+ * fallback option
+ */
+uint platform_config_load;
 
 /* Firmware file names get set in hfi1_firmware_init() based on the above */
 static char *fw_8051_name;
@@ -107,6 +110,7 @@
 	u32 exponent_size;	/* in DWORDs */
 	u32 reserved[22];
 };
+
 /* expected field values */
 #define CSS_MODULE_TYPE	   0x00000006
 #define CSS_HEADER_LEN	   0x000000a1
@@ -166,6 +170,7 @@
 	FW_FINAL,
 	FW_ERR
 };
+
 static enum fw_state fw_state = FW_EMPTY;
 static int fw_err;
 static struct firmware_details fw_8051;
@@ -193,7 +198,7 @@
 #define RSA_ENGINE_TIMEOUT 100 /* ms */
 
 /* hardware mutex timeout, in ms */
-#define HM_TIMEOUT 4000 /* 4 s */
+#define HM_TIMEOUT 10 /* ms */
 
 /* 8051 memory access timeout, in us */
 #define DC8051_ACCESS_TIMEOUT 100 /* us */
@@ -233,6 +238,8 @@
 
 /* forwards */
 static void dispose_one_firmware(struct firmware_details *fdet);
+static int load_fabric_serdes_firmware(struct hfi1_devdata *dd,
+				       struct firmware_details *fdet);
 
 /*
  * Read a single 64-bit value from 8051 data memory.
@@ -372,8 +379,8 @@
 		return 0;
 
 	dd_dev_err(dd,
-		"invalid firmware header field %s: expected 0x%x, actual 0x%x\n",
-		what, expected, actual);
+		   "invalid firmware header field %s: expected 0x%x, actual 0x%x\n",
+		   what, expected, actual);
 	return 1;
 }
 
@@ -383,19 +390,19 @@
 static int verify_css_header(struct hfi1_devdata *dd, struct css_header *css)
 {
 	/* verify CSS header fields (most sizes are in DW, so add /4) */
-	if (invalid_header(dd, "module_type", css->module_type, CSS_MODULE_TYPE)
-			|| invalid_header(dd, "header_len", css->header_len,
-					(sizeof(struct firmware_file)/4))
-			|| invalid_header(dd, "header_version",
-					css->header_version, CSS_HEADER_VERSION)
-			|| invalid_header(dd, "module_vendor",
-					css->module_vendor, CSS_MODULE_VENDOR)
-			|| invalid_header(dd, "key_size",
-					css->key_size, KEY_SIZE/4)
-			|| invalid_header(dd, "modulus_size",
-					css->modulus_size, KEY_SIZE/4)
-			|| invalid_header(dd, "exponent_size",
-					css->exponent_size, EXPONENT_SIZE/4)) {
+	if (invalid_header(dd, "module_type", css->module_type,
+			   CSS_MODULE_TYPE) ||
+	    invalid_header(dd, "header_len", css->header_len,
+			   (sizeof(struct firmware_file) / 4)) ||
+	    invalid_header(dd, "header_version", css->header_version,
+			   CSS_HEADER_VERSION) ||
+	    invalid_header(dd, "module_vendor", css->module_vendor,
+			   CSS_MODULE_VENDOR) ||
+	    invalid_header(dd, "key_size", css->key_size, KEY_SIZE / 4) ||
+	    invalid_header(dd, "modulus_size", css->modulus_size,
+			   KEY_SIZE / 4) ||
+	    invalid_header(dd, "exponent_size", css->exponent_size,
+			   EXPONENT_SIZE / 4)) {
 		return -EINVAL;
 	}
 	return 0;
@@ -410,8 +417,8 @@
 	/* make sure we have some payload */
 	if (prefix_size >= file_size) {
 		dd_dev_err(dd,
-			"firmware \"%s\", size %ld, must be larger than %ld bytes\n",
-			name, file_size, prefix_size);
+			   "firmware \"%s\", size %ld, must be larger than %ld bytes\n",
+			   name, file_size, prefix_size);
 		return -EINVAL;
 	}
 
@@ -433,8 +440,8 @@
 
 	ret = request_firmware(&fdet->fw, name, &dd->pcidev->dev);
 	if (ret) {
-		dd_dev_err(dd, "cannot find firmware \"%s\", err %d\n",
-			   name, ret);
+		dd_dev_warn(dd, "cannot find firmware \"%s\", err %d\n",
+			    name, ret);
 		return ret;
 	}
 
@@ -480,14 +487,14 @@
 	ret = verify_css_header(dd, css);
 	if (ret) {
 		dd_dev_info(dd, "Invalid CSS header for \"%s\"\n", name);
-	} else if ((css->size*4) == fdet->fw->size) {
+	} else if ((css->size * 4) == fdet->fw->size) {
 		/* non-augmented firmware file */
 		struct firmware_file *ff = (struct firmware_file *)
 							fdet->fw->data;
 
 		/* make sure there are bytes in the payload */
 		ret = payload_check(dd, name, fdet->fw->size,
-						sizeof(struct firmware_file));
+				    sizeof(struct firmware_file));
 		if (ret == 0) {
 			fdet->css_header = css;
 			fdet->modulus = ff->modulus;
@@ -505,14 +512,14 @@
 			dd_dev_err(dd, "driver is unable to validate firmware without r2 and mu (not in firmware file)\n");
 			ret = -EINVAL;
 		}
-	} else if ((css->size*4) + AUGMENT_SIZE == fdet->fw->size) {
+	} else if ((css->size * 4) + AUGMENT_SIZE == fdet->fw->size) {
 		/* augmented firmware file */
 		struct augmented_firmware_file *aff =
 			(struct augmented_firmware_file *)fdet->fw->data;
 
 		/* make sure there are bytes in the payload */
 		ret = payload_check(dd, name, fdet->fw->size,
-					sizeof(struct augmented_firmware_file));
+				    sizeof(struct augmented_firmware_file));
 		if (ret == 0) {
 			fdet->css_header = css;
 			fdet->modulus = aff->modulus;
@@ -527,9 +534,10 @@
 	} else {
 		/* css->size check failed */
 		dd_dev_err(dd,
-			"invalid firmware header field size: expected 0x%lx or 0x%lx, actual 0x%x\n",
-			fdet->fw->size/4, (fdet->fw->size - AUGMENT_SIZE)/4,
-			css->size);
+			   "invalid firmware header field size: expected 0x%lx or 0x%lx, actual 0x%x\n",
+			   fdet->fw->size / 4,
+			   (fdet->fw->size - AUGMENT_SIZE) / 4,
+			   css->size);
 
 		ret = -EINVAL;
 	}
@@ -572,7 +580,7 @@
 		 * We tried the original and it failed.  Move to the
 		 * alternate.
 		 */
-		dd_dev_info(dd, "using alternate firmware names\n");
+		dd_dev_warn(dd, "using alternate firmware names\n");
 		/*
 		 * Let others run.  Some systems, when missing firmware, does
 		 * something that holds for 30 seconds.  If we do that twice
@@ -593,19 +601,6 @@
 		fw_pcie_serdes_name = ALT_FW_PCIE_NAME;
 	}
 
-	if (fw_8051_load) {
-		err = obtain_one_firmware(dd, fw_8051_name, &fw_8051);
-		if (err)
-			goto done;
-	}
-
-	if (fw_fabric_serdes_load) {
-		err = obtain_one_firmware(dd, fw_fabric_serdes_name,
-			&fw_fabric);
-		if (err)
-			goto done;
-	}
-
 	if (fw_sbus_load) {
 		err = obtain_one_firmware(dd, fw_sbus_name, &fw_sbus);
 		if (err)
@@ -618,19 +613,34 @@
 			goto done;
 	}
 
+	if (fw_fabric_serdes_load) {
+		err = obtain_one_firmware(dd, fw_fabric_serdes_name,
+					  &fw_fabric);
+		if (err)
+			goto done;
+	}
+
+	if (fw_8051_load) {
+		err = obtain_one_firmware(dd, fw_8051_name, &fw_8051);
+		if (err)
+			goto done;
+	}
+
 done:
 	if (err) {
 		/* oops, had problems obtaining a firmware */
-		if (fw_state == FW_EMPTY) {
-			/* retry with alternate */
+		if (fw_state == FW_EMPTY && dd->icode == ICODE_RTL_SILICON) {
+			/* retry with alternate (RTL only) */
 			fw_state = FW_TRY;
 			goto retry;
 		}
+		dd_dev_err(dd, "unable to obtain working firmware\n");
 		fw_state = FW_ERR;
 		fw_err = -ENOENT;
 	} else {
 		/* success */
-		if (fw_state == FW_EMPTY)
+		if (fw_state == FW_EMPTY &&
+		    dd->icode != ICODE_FUNCTIONAL_SIMULATOR)
 			fw_state = FW_TRY;	/* may retry later */
 		else
 			fw_state = FW_FINAL;	/* cannot try again */
@@ -673,10 +683,15 @@
 	}
 	/* not in FW_TRY state */
 
-	if (fw_state == FW_FINAL)
+	if (fw_state == FW_FINAL) {
+		if (platform_config) {
+			dd->platform_config.data = platform_config->data;
+			dd->platform_config.size = platform_config->size;
+		}
 		goto done;	/* already acquired */
-	else if (fw_state == FW_ERR)
+	} else if (fw_state == FW_ERR) {
 		goto done;	/* already tried and failed */
+	}
 	/* fw_state is FW_EMPTY */
 
 	/* set fw_state to FW_TRY, FW_FINAL, or FW_ERR, and fw_err */
@@ -685,9 +700,13 @@
 	if (platform_config_load) {
 		platform_config = NULL;
 		err = request_firmware(&platform_config, platform_config_name,
-						&dd->pcidev->dev);
-		if (err)
+				       &dd->pcidev->dev);
+		if (err) {
 			platform_config = NULL;
+			goto done;
+		}
+		dd->platform_config.data = platform_config->data;
+		dd->platform_config.size = platform_config->size;
 	}
 
 done:
@@ -761,7 +780,7 @@
 static void write_rsa_data(struct hfi1_devdata *dd, int what,
 			   const u8 *data, int nbytes)
 {
-	int qw_size = nbytes/8;
+	int qw_size = nbytes / 8;
 	int i;
 
 	if (((unsigned long)data & 0x7) == 0) {
@@ -769,14 +788,14 @@
 		u64 *ptr = (u64 *)data;
 
 		for (i = 0; i < qw_size; i++, ptr++)
-			write_csr(dd, what + (8*i), *ptr);
+			write_csr(dd, what + (8 * i), *ptr);
 	} else {
 		/* not aligned */
 		for (i = 0; i < qw_size; i++, data += 8) {
 			u64 value;
 
 			memcpy(&value, data, 8);
-			write_csr(dd, what + (8*i), value);
+			write_csr(dd, what + (8 * i), value);
 		}
 	}
 }
@@ -789,7 +808,7 @@
 				    const u8 *data, int nbytes)
 {
 	u64 *ptr = (u64 *)data;
-	int qw_size = nbytes/8;
+	int qw_size = nbytes / 8;
 
 	for (; qw_size > 0; qw_size--, ptr++)
 		write_csr(dd, what, *ptr);
@@ -822,7 +841,7 @@
 			     >> MISC_CFG_FW_CTRL_RSA_STATUS_SHIFT;
 	if (status != RSA_STATUS_IDLE) {
 		dd_dev_err(dd, "%s security engine not idle - giving up\n",
-			who);
+			   who);
 		return -EBUSY;
 	}
 
@@ -859,7 +878,7 @@
 		if (status == RSA_STATUS_IDLE) {
 			/* should not happen */
 			dd_dev_err(dd, "%s firmware security bad idle state\n",
-				who);
+				   who);
 			ret = -EINVAL;
 			break;
 		} else if (status == RSA_STATUS_DONE) {
@@ -893,19 +912,20 @@
 	 * is not keeping the error high.
 	 */
 	write_csr(dd, MISC_ERR_CLEAR,
-			MISC_ERR_STATUS_MISC_FW_AUTH_FAILED_ERR_SMASK
-			| MISC_ERR_STATUS_MISC_KEY_MISMATCH_ERR_SMASK);
+		  MISC_ERR_STATUS_MISC_FW_AUTH_FAILED_ERR_SMASK |
+		  MISC_ERR_STATUS_MISC_KEY_MISMATCH_ERR_SMASK);
 	/*
-	 * All that is left are the current errors.  Print failure details,
-	 * if any.
+	 * All that is left are the current errors.  Print warnings on
+	 * authorization failure details, if any.  Firmware authorization
+	 * can be retried, so these are only warnings.
 	 */
 	reg = read_csr(dd, MISC_ERR_STATUS);
 	if (ret) {
 		if (reg & MISC_ERR_STATUS_MISC_FW_AUTH_FAILED_ERR_SMASK)
-			dd_dev_err(dd, "%s firmware authorization failed\n",
-				who);
+			dd_dev_warn(dd, "%s firmware authorization failed\n",
+				    who);
 		if (reg & MISC_ERR_STATUS_MISC_KEY_MISMATCH_ERR_SMASK)
-			dd_dev_err(dd, "%s firmware key mismatch\n", who);
+			dd_dev_warn(dd, "%s firmware key mismatch\n", who);
 	}
 
 	return ret;
@@ -922,7 +942,8 @@
 	write_rsa_data(dd, MISC_CFG_RSA_MU, fdet->mu, MU_SIZE);
 	/* Security variables d.  Write the header */
 	write_streamed_rsa_data(dd, MISC_CFG_SHA_PRELOAD,
-			(u8 *)fdet->css_header, sizeof(struct css_header));
+				(u8 *)fdet->css_header,
+				sizeof(struct css_header));
 }
 
 /* return the 8051 firmware state */
@@ -1002,7 +1023,7 @@
 
 	/* Firmware load steps 3-5 */
 	ret = write_8051(dd, 1/*code*/, 0, fdet->firmware_ptr,
-							fdet->firmware_len);
+			 fdet->firmware_len);
 	if (ret)
 		return ret;
 
@@ -1029,13 +1050,13 @@
 	ret = wait_fm_ready(dd, TIMEOUT_8051_START);
 	if (ret) { /* timed out */
 		dd_dev_err(dd, "8051 start timeout, current state 0x%x\n",
-			get_firmware_state(dd));
+			   get_firmware_state(dd));
 		return -ETIMEDOUT;
 	}
 
 	read_misc_status(dd, &ver_a, &ver_b);
 	dd_dev_info(dd, "8051 firmware version %d.%d\n",
-		(int)ver_b, (int)ver_a);
+		    (int)ver_b, (int)ver_a);
 	dd->dc8051_ver = dc8051_ver(ver_b, ver_a);
 
 	return 0;
@@ -1050,11 +1071,11 @@
 		  u8 receiver_addr, u8 data_addr, u8 command, u32 data_in)
 {
 	write_csr(dd, ASIC_CFG_SBUS_REQUEST,
-		((u64)data_in << ASIC_CFG_SBUS_REQUEST_DATA_IN_SHIFT)
-		| ((u64)command << ASIC_CFG_SBUS_REQUEST_COMMAND_SHIFT)
-		| ((u64)data_addr << ASIC_CFG_SBUS_REQUEST_DATA_ADDR_SHIFT)
-		| ((u64)receiver_addr
-			<< ASIC_CFG_SBUS_REQUEST_RECEIVER_ADDR_SHIFT));
+		  ((u64)data_in << ASIC_CFG_SBUS_REQUEST_DATA_IN_SHIFT) |
+		  ((u64)command << ASIC_CFG_SBUS_REQUEST_COMMAND_SHIFT) |
+		  ((u64)data_addr << ASIC_CFG_SBUS_REQUEST_DATA_ADDR_SHIFT) |
+		  ((u64)receiver_addr <<
+		   ASIC_CFG_SBUS_REQUEST_RECEIVER_ADDR_SHIFT));
 }
 
 /*
@@ -1072,14 +1093,14 @@
 		return;
 
 	dd_dev_info(dd, "Turning off spicos:%s%s\n",
-		flags & SPICO_SBUS ? " SBus" : "",
-		flags & SPICO_FABRIC ? " fabric" : "");
+		    flags & SPICO_SBUS ? " SBus" : "",
+		    flags & SPICO_FABRIC ? " fabric" : "");
 
 	write_csr(dd, MISC_CFG_FW_CTRL, ENABLE_SPICO_SMASK);
 	/* disable SBus spico */
 	if (flags & SPICO_SBUS)
 		sbus_request(dd, SBUS_MASTER_BROADCAST, 0x01,
-			WRITE_SBUS_RECEIVER, 0x00000040);
+			     WRITE_SBUS_RECEIVER, 0x00000040);
 
 	/* disable the fabric serdes spicos */
 	if (flags & SPICO_FABRIC)
@@ -1089,29 +1110,60 @@
 }
 
 /*
- *  Reset all of the fabric serdes for our HFI.
+ * Reset all of the fabric serdes for this HFI in preparation to take the
+ * link to Polling.
+ *
+ * To do a reset, we need to write to to the serdes registers.  Unfortunately,
+ * the fabric serdes download to the other HFI on the ASIC will have turned
+ * off the firmware validation on this HFI.  This means we can't write to the
+ * registers to reset the serdes.  Work around this by performing a complete
+ * re-download and validation of the fabric serdes firmware.  This, as a
+ * by-product, will reset the serdes.  NOTE: the re-download requires that
+ * the 8051 be in the Offline state.  I.e. not actively trying to use the
+ * serdes.  This routine is called at the point where the link is Offline and
+ * is getting ready to go to Polling.
  */
 void fabric_serdes_reset(struct hfi1_devdata *dd)
 {
-	u8 ra;
+	int ret;
 
-	if (dd->icode != ICODE_RTL_SILICON) /* only for RTL */
+	if (!fw_fabric_serdes_load)
 		return;
 
-	ra = fabric_serdes_broadcast[dd->hfi1_id];
-
-	acquire_hw_mutex(dd);
+	ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
+	if (ret) {
+		dd_dev_err(dd,
+			   "Cannot acquire SBus resource to reset fabric SerDes - perhaps you should reboot\n");
+		return;
+	}
 	set_sbus_fast_mode(dd);
-	/* place SerDes in reset and disable SPICO */
-	sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000011);
-	/* wait 100 refclk cycles @ 156.25MHz => 640ns */
-	udelay(1);
-	/* remove SerDes reset */
-	sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000010);
-	/* turn SPICO enable on */
-	sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000002);
+
+	if (is_ax(dd)) {
+		/* A0 serdes do not work with a re-download */
+		u8 ra = fabric_serdes_broadcast[dd->hfi1_id];
+
+		/* place SerDes in reset and disable SPICO */
+		sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000011);
+		/* wait 100 refclk cycles @ 156.25MHz => 640ns */
+		udelay(1);
+		/* remove SerDes reset */
+		sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000010);
+		/* turn SPICO enable on */
+		sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000002);
+	} else {
+		turn_off_spicos(dd, SPICO_FABRIC);
+		/*
+		 * No need for firmware retry - what to download has already
+		 * been decided.
+		 * No need to pay attention to the load return - the only
+		 * failure is a validation failure, which has already been
+		 * checked by the initial download.
+		 */
+		(void)load_fabric_serdes_firmware(dd, &fw_fabric);
+	}
+
 	clear_sbus_fast_mode(dd);
-	release_hw_mutex(dd);
+	release_chip_resource(dd, CR_SBUS);
 }
 
 /* Access to the SBus in this routine should probably be serialized */
@@ -1120,6 +1172,9 @@
 {
 	u64 reg, count = 0;
 
+	/* make sure fast mode is clear */
+	clear_sbus_fast_mode(dd);
+
 	sbus_request(dd, receiver_addr, data_addr, command, data_in);
 	write_csr(dd, ASIC_CFG_SBUS_EXECUTE,
 		  ASIC_CFG_SBUS_EXECUTE_EXECUTE_SMASK);
@@ -1177,7 +1232,7 @@
 	/* step 5: download SerDes machine code */
 	for (i = 0; i < fdet->firmware_len; i += 4) {
 		sbus_request(dd, ra, 0x0a, WRITE_SBUS_RECEIVER,
-					*(u32 *)&fdet->firmware_ptr[i]);
+			     *(u32 *)&fdet->firmware_ptr[i]);
 	}
 	/* step 6: IMEM override off */
 	sbus_request(dd, ra, 0x00, WRITE_SBUS_RECEIVER, 0x00000000);
@@ -1216,7 +1271,7 @@
 	/* step 5: download the SBus Master machine code */
 	for (i = 0; i < fdet->firmware_len; i += 4) {
 		sbus_request(dd, ra, 0x14, WRITE_SBUS_RECEIVER,
-					*(u32 *)&fdet->firmware_ptr[i]);
+			     *(u32 *)&fdet->firmware_ptr[i]);
 	}
 	/* step 6: set IMEM_CNTL_EN off */
 	sbus_request(dd, ra, 0x01, WRITE_SBUS_RECEIVER, 0x00000040);
@@ -1249,19 +1304,23 @@
 	/* step 3: enable XDMEM access */
 	sbus_request(dd, ra, 0x01, WRITE_SBUS_RECEIVER, 0x00000d40);
 	/* step 4: load firmware into SBus Master XDMEM */
-	/* NOTE: the dmem address, write_en, and wdata are all pre-packed,
-	   we only need to pick up the bytes and write them */
+	/*
+	 * NOTE: the dmem address, write_en, and wdata are all pre-packed,
+	 * we only need to pick up the bytes and write them
+	 */
 	for (i = 0; i < fdet->firmware_len; i += 4) {
 		sbus_request(dd, ra, 0x04, WRITE_SBUS_RECEIVER,
-					*(u32 *)&fdet->firmware_ptr[i]);
+			     *(u32 *)&fdet->firmware_ptr[i]);
 	}
 	/* step 5: disable XDMEM access */
 	sbus_request(dd, ra, 0x01, WRITE_SBUS_RECEIVER, 0x00000140);
 	/* step 6: allow SBus Spico to run */
 	sbus_request(dd, ra, 0x05, WRITE_SBUS_RECEIVER, 0x00000000);
 
-	/* steps 7-11: run RSA, if it succeeds, firmware is available to
-	   be swapped */
+	/*
+	 * steps 7-11: run RSA, if it succeeds, firmware is available to
+	 * be swapped
+	 */
 	return run_rsa(dd, "PCIe serdes", fdet->signature);
 }
 
@@ -1285,7 +1344,7 @@
 		 *	23:16	BROADCAST_GROUP_2 (default 0xff)
 		 */
 		sbus_request(dd, addrs[count], 0xfd, WRITE_SBUS_RECEIVER,
-				(u32)bg1 << 4 | (u32)bg2 << 16);
+			     (u32)bg1 << 4 | (u32)bg2 << 16);
 	}
 }
 
@@ -1310,8 +1369,8 @@
 
 	/* timed out */
 	dd_dev_err(dd,
-		"Unable to acquire hardware mutex, mutex mask %u, my mask %u (%s)\n",
-		(u32)user, (u32)mask, (try == 0) ? "retrying" : "giving up");
+		   "Unable to acquire hardware mutex, mutex mask %u, my mask %u (%s)\n",
+		   (u32)user, (u32)mask, (try == 0) ? "retrying" : "giving up");
 
 	if (try == 0) {
 		/* break mutex and retry */
@@ -1328,10 +1387,197 @@
 	write_csr(dd, ASIC_CFG_MUTEX, 0);
 }
 
+/* return the given resource bit(s) as a mask for the given HFI */
+static inline u64 resource_mask(u32 hfi1_id, u32 resource)
+{
+	return ((u64)resource) << (hfi1_id ? CR_DYN_SHIFT : 0);
+}
+
+static void fail_mutex_acquire_message(struct hfi1_devdata *dd,
+				       const char *func)
+{
+	dd_dev_err(dd,
+		   "%s: hardware mutex stuck - suggest rebooting the machine\n",
+		   func);
+}
+
+/*
+ * Acquire access to a chip resource.
+ *
+ * Return 0 on success, -EBUSY if resource busy, -EIO if mutex acquire failed.
+ */
+static int __acquire_chip_resource(struct hfi1_devdata *dd, u32 resource)
+{
+	u64 scratch0, all_bits, my_bit;
+	int ret;
+
+	if (resource & CR_DYN_MASK) {
+		/* a dynamic resource is in use if either HFI has set the bit */
+		all_bits = resource_mask(0, resource) |
+						resource_mask(1, resource);
+		my_bit = resource_mask(dd->hfi1_id, resource);
+	} else {
+		/* non-dynamic resources are not split between HFIs */
+		all_bits = resource;
+		my_bit = resource;
+	}
+
+	/* lock against other callers within the driver wanting a resource */
+	mutex_lock(&dd->asic_data->asic_resource_mutex);
+
+	ret = acquire_hw_mutex(dd);
+	if (ret) {
+		fail_mutex_acquire_message(dd, __func__);
+		ret = -EIO;
+		goto done;
+	}
+
+	scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+	if (scratch0 & all_bits) {
+		ret = -EBUSY;
+	} else {
+		write_csr(dd, ASIC_CFG_SCRATCH, scratch0 | my_bit);
+		/* force write to be visible to other HFI on another OS */
+		(void)read_csr(dd, ASIC_CFG_SCRATCH);
+	}
+
+	release_hw_mutex(dd);
+
+done:
+	mutex_unlock(&dd->asic_data->asic_resource_mutex);
+	return ret;
+}
+
+/*
+ * Acquire access to a chip resource, wait up to mswait milliseconds for
+ * the resource to become available.
+ *
+ * Return 0 on success, -EBUSY if busy (even after wait), -EIO if mutex
+ * acquire failed.
+ */
+int acquire_chip_resource(struct hfi1_devdata *dd, u32 resource, u32 mswait)
+{
+	unsigned long timeout;
+	int ret;
+
+	timeout = jiffies + msecs_to_jiffies(mswait);
+	while (1) {
+		ret = __acquire_chip_resource(dd, resource);
+		if (ret != -EBUSY)
+			return ret;
+		/* resource is busy, check our timeout */
+		if (time_after_eq(jiffies, timeout))
+			return -EBUSY;
+		usleep_range(80, 120);	/* arbitrary delay */
+	}
+}
+
+/*
+ * Release access to a chip resource
+ */
+void release_chip_resource(struct hfi1_devdata *dd, u32 resource)
+{
+	u64 scratch0, bit;
+
+	/* only dynamic resources should ever be cleared */
+	if (!(resource & CR_DYN_MASK)) {
+		dd_dev_err(dd, "%s: invalid resource 0x%x\n", __func__,
+			   resource);
+		return;
+	}
+	bit = resource_mask(dd->hfi1_id, resource);
+
+	/* lock against other callers within the driver wanting a resource */
+	mutex_lock(&dd->asic_data->asic_resource_mutex);
+
+	if (acquire_hw_mutex(dd)) {
+		fail_mutex_acquire_message(dd, __func__);
+		goto done;
+	}
+
+	scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+	if ((scratch0 & bit) != 0) {
+		scratch0 &= ~bit;
+		write_csr(dd, ASIC_CFG_SCRATCH, scratch0);
+		/* force write to be visible to other HFI on another OS */
+		(void)read_csr(dd, ASIC_CFG_SCRATCH);
+	} else {
+		dd_dev_warn(dd, "%s: id %d, resource 0x%x: bit not set\n",
+			    __func__, dd->hfi1_id, resource);
+	}
+
+	release_hw_mutex(dd);
+
+done:
+	mutex_unlock(&dd->asic_data->asic_resource_mutex);
+}
+
+/*
+ * Return true if resource is set, false otherwise.  Print a warning
+ * if not set and a function is supplied.
+ */
+bool check_chip_resource(struct hfi1_devdata *dd, u32 resource,
+			 const char *func)
+{
+	u64 scratch0, bit;
+
+	if (resource & CR_DYN_MASK)
+		bit = resource_mask(dd->hfi1_id, resource);
+	else
+		bit = resource;
+
+	scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+	if ((scratch0 & bit) == 0) {
+		if (func)
+			dd_dev_warn(dd,
+				    "%s: id %d, resource 0x%x, not acquired!\n",
+				    func, dd->hfi1_id, resource);
+		return false;
+	}
+	return true;
+}
+
+static void clear_chip_resources(struct hfi1_devdata *dd, const char *func)
+{
+	u64 scratch0;
+
+	/* lock against other callers within the driver wanting a resource */
+	mutex_lock(&dd->asic_data->asic_resource_mutex);
+
+	if (acquire_hw_mutex(dd)) {
+		fail_mutex_acquire_message(dd, func);
+		goto done;
+	}
+
+	/* clear all dynamic access bits for this HFI */
+	scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+	scratch0 &= ~resource_mask(dd->hfi1_id, CR_DYN_MASK);
+	write_csr(dd, ASIC_CFG_SCRATCH, scratch0);
+	/* force write to be visible to other HFI on another OS */
+	(void)read_csr(dd, ASIC_CFG_SCRATCH);
+
+	release_hw_mutex(dd);
+
+done:
+	mutex_unlock(&dd->asic_data->asic_resource_mutex);
+}
+
+void init_chip_resources(struct hfi1_devdata *dd)
+{
+	/* clear any holds left by us */
+	clear_chip_resources(dd, __func__);
+}
+
+void finish_chip_resources(struct hfi1_devdata *dd)
+{
+	/* clear any holds left by us */
+	clear_chip_resources(dd, __func__);
+}
+
 void set_sbus_fast_mode(struct hfi1_devdata *dd)
 {
 	write_csr(dd, ASIC_CFG_SBUS_EXECUTE,
-				ASIC_CFG_SBUS_EXECUTE_FAST_MODE_SMASK);
+		  ASIC_CFG_SBUS_EXECUTE_FAST_MODE_SMASK);
 }
 
 void clear_sbus_fast_mode(struct hfi1_devdata *dd)
@@ -1354,23 +1600,23 @@
 	int ret;
 
 	if (fw_fabric_serdes_load) {
-		ret = acquire_hw_mutex(dd);
+		ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
 		if (ret)
 			return ret;
 
 		set_sbus_fast_mode(dd);
 
 		set_serdes_broadcast(dd, all_fabric_serdes_broadcast,
-				fabric_serdes_broadcast[dd->hfi1_id],
-				fabric_serdes_addrs[dd->hfi1_id],
-				NUM_FABRIC_SERDES);
+				     fabric_serdes_broadcast[dd->hfi1_id],
+				     fabric_serdes_addrs[dd->hfi1_id],
+				     NUM_FABRIC_SERDES);
 		turn_off_spicos(dd, SPICO_FABRIC);
 		do {
 			ret = load_fabric_serdes_firmware(dd, &fw_fabric);
 		} while (retry_firmware(dd, ret));
 
 		clear_sbus_fast_mode(dd);
-		release_hw_mutex(dd);
+		release_chip_resource(dd, CR_SBUS);
 		if (ret)
 			return ret;
 	}
@@ -1419,18 +1665,57 @@
 	return obtain_firmware(dd);
 }
 
+/*
+ * This function is a helper function for parse_platform_config(...) and
+ * does not check for validity of the platform configuration cache
+ * (because we know it is invalid as we are building up the cache).
+ * As such, this should not be called from anywhere other than
+ * parse_platform_config
+ */
+static int check_meta_version(struct hfi1_devdata *dd, u32 *system_table)
+{
+	u32 meta_ver, meta_ver_meta, ver_start, ver_len, mask;
+	struct platform_config_cache *pcfgcache = &dd->pcfg_cache;
+
+	if (!system_table)
+		return -EINVAL;
+
+	meta_ver_meta =
+	*(pcfgcache->config_tables[PLATFORM_CONFIG_SYSTEM_TABLE].table_metadata
+	+ SYSTEM_TABLE_META_VERSION);
+
+	mask = ((1 << METADATA_TABLE_FIELD_START_LEN_BITS) - 1);
+	ver_start = meta_ver_meta & mask;
+
+	meta_ver_meta >>= METADATA_TABLE_FIELD_LEN_SHIFT;
+
+	mask = ((1 << METADATA_TABLE_FIELD_LEN_LEN_BITS) - 1);
+	ver_len = meta_ver_meta & mask;
+
+	ver_start /= 8;
+	meta_ver = *((u8 *)system_table + ver_start) & ((1 << ver_len) - 1);
+
+	if (meta_ver < 5) {
+		dd_dev_info(
+			dd, "%s:Please update platform config\n", __func__);
+		return -EINVAL;
+	}
+	return 0;
+}
+
 int parse_platform_config(struct hfi1_devdata *dd)
 {
 	struct platform_config_cache *pcfgcache = &dd->pcfg_cache;
 	u32 *ptr = NULL;
-	u32 header1 = 0, header2 = 0, magic_num = 0, crc = 0;
+	u32 header1 = 0, header2 = 0, magic_num = 0, crc = 0, file_length = 0;
 	u32 record_idx = 0, table_type = 0, table_length_dwords = 0;
+	int ret = -EINVAL; /* assume failure */
 
-	if (platform_config == NULL) {
+	if (!dd->platform_config.data) {
 		dd_dev_info(dd, "%s: Missing config file\n", __func__);
 		goto bail;
 	}
-	ptr = (u32 *)platform_config->data;
+	ptr = (u32 *)dd->platform_config.data;
 
 	magic_num = *ptr;
 	ptr++;
@@ -1439,12 +1724,32 @@
 		goto bail;
 	}
 
-	while (ptr < (u32 *)(platform_config->data + platform_config->size)) {
+	/* Field is file size in DWORDs */
+	file_length = (*ptr) * 4;
+	ptr++;
+
+	if (file_length > dd->platform_config.size) {
+		dd_dev_info(dd, "%s:File claims to be larger than read size\n",
+			    __func__);
+		goto bail;
+	} else if (file_length < dd->platform_config.size) {
+		dd_dev_info(dd,
+			    "%s:File claims to be smaller than read size, continuing\n",
+			    __func__);
+	}
+	/* exactly equal, perfection */
+
+	/*
+	 * In both cases where we proceed, using the self-reported file length
+	 * is the safer option
+	 */
+	while (ptr < (u32 *)(dd->platform_config.data + file_length)) {
 		header1 = *ptr;
 		header2 = *(ptr + 1);
 		if (header1 != ~header2) {
 			dd_dev_info(dd, "%s: Failed validation at offset %ld\n",
-				__func__, (ptr - (u32 *)platform_config->data));
+				    __func__, (ptr - (u32 *)
+					       dd->platform_config.data));
 			goto bail;
 		}
 
@@ -1467,6 +1772,9 @@
 			case PLATFORM_CONFIG_SYSTEM_TABLE:
 				pcfgcache->config_tables[table_type].num_table =
 									1;
+				ret = check_meta_version(dd, ptr);
+				if (ret)
+					goto bail;
 				break;
 			case PLATFORM_CONFIG_PORT_TABLE:
 				pcfgcache->config_tables[table_type].num_table =
@@ -1484,9 +1792,10 @@
 				break;
 			default:
 				dd_dev_info(dd,
-				      "%s: Unknown data table %d, offset %ld\n",
-					__func__, table_type,
-				       (ptr - (u32 *)platform_config->data));
+					    "%s: Unknown data table %d, offset %ld\n",
+					    __func__, table_type,
+					    (ptr - (u32 *)
+					     dd->platform_config.data));
 				goto bail; /* We don't trust this file now */
 			}
 			pcfgcache->config_tables[table_type].table = ptr;
@@ -1507,9 +1816,10 @@
 				break;
 			default:
 				dd_dev_info(dd,
-				  "%s: Unknown metadata table %d, offset %ld\n",
-				  __func__, table_type,
-				  (ptr - (u32 *)platform_config->data));
+					    "%s: Unknown meta table %d, offset %ld\n",
+					    __func__, table_type,
+					    (ptr -
+					     (u32 *)dd->platform_config.data));
 				goto bail; /* We don't trust this file now */
 			}
 			pcfgcache->config_tables[table_type].table_metadata =
@@ -1518,14 +1828,16 @@
 
 		/* Calculate and check table crc */
 		crc = crc32_le(~(u32)0, (unsigned char const *)ptr,
-				(table_length_dwords * 4));
+			       (table_length_dwords * 4));
 		crc ^= ~(u32)0;
 
 		/* Jump the table */
 		ptr += table_length_dwords;
 		if (crc != *ptr) {
 			dd_dev_info(dd, "%s: Failed CRC check at offset %ld\n",
-				__func__, (ptr - (u32 *)platform_config->data));
+				    __func__, (ptr -
+					       (u32 *)
+					       dd->platform_config.data));
 			goto bail;
 		}
 		/* Jump the CRC DWORD */
@@ -1536,11 +1848,12 @@
 	return 0;
 bail:
 	memset(pcfgcache, 0, sizeof(struct platform_config_cache));
-	return -EINVAL;
+	return ret;
 }
 
 static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table,
-		int field, u32 *field_len_bits, u32 *field_start_bits)
+					  int field, u32 *field_len_bits,
+					  u32 *field_start_bits)
 {
 	struct platform_config_cache *pcfgcache = &dd->pcfg_cache;
 	u32 *src_ptr = NULL;
@@ -1600,8 +1913,9 @@
  * @len: length of memory pointed by @data in bytes.
  */
 int get_platform_config_field(struct hfi1_devdata *dd,
-			enum platform_config_table_type_encoding table_type,
-			int table_index, int field_index, u32 *data, u32 len)
+			      enum platform_config_table_type_encoding
+			      table_type, int table_index, int field_index,
+			      u32 *data, u32 len)
 {
 	int ret = 0, wlen = 0, seek = 0;
 	u32 field_len_bits = 0, field_start_bits = 0, *src_ptr = NULL;
@@ -1613,7 +1927,8 @@
 		return -EINVAL;
 
 	ret = get_platform_fw_field_metadata(dd, table_type, field_index,
-					&field_len_bits, &field_start_bits);
+					     &field_len_bits,
+					     &field_start_bits);
 	if (ret)
 		return -EINVAL;
 
@@ -1629,19 +1944,21 @@
 			if (len < field_len_bits)
 				return -EINVAL;
 
-			seek = field_start_bits/8;
-			wlen = field_len_bits/8;
+			seek = field_start_bits / 8;
+			wlen = field_len_bits / 8;
 
 			src_ptr = (u32 *)((u8 *)src_ptr + seek);
 
-			/* We expect the field to be byte aligned and whole byte
-			 * lengths if we are here */
+			/*
+			 * We expect the field to be byte aligned and whole byte
+			 * lengths if we are here
+			 */
 			memcpy(data, src_ptr, wlen);
 			return 0;
 		}
 		break;
 	case PLATFORM_CONFIG_PORT_TABLE:
-		/* Port table is 4 DWORDS in META_VERSION 0 */
+		/* Port table is 4 DWORDS */
 		src_ptr = dd->hfi1_id ?
 			pcfgcache->config_tables[table_type].table + 4 :
 			pcfgcache->config_tables[table_type].table;
@@ -1669,7 +1986,7 @@
 	if (!src_ptr || len < field_len_bits)
 		return -EINVAL;
 
-	src_ptr += (field_start_bits/32);
+	src_ptr += (field_start_bits / 32);
 	*data = (*src_ptr >> (field_start_bits % 32)) &
 			((1 << field_len_bits) - 1);
 
@@ -1680,7 +1997,7 @@
  * Download the firmware needed for the Gen3 PCIe SerDes.  An update
  * to the SBus firmware is needed before updating the PCIe firmware.
  *
- * Note: caller must be holding the HW mutex.
+ * Note: caller must be holding the SBus resource.
  */
 int load_pcie_firmware(struct hfi1_devdata *dd)
 {
@@ -1701,9 +2018,9 @@
 	if (fw_pcie_serdes_load) {
 		dd_dev_info(dd, "Setting PCIe SerDes broadcast\n");
 		set_serdes_broadcast(dd, all_pcie_serdes_broadcast,
-					pcie_serdes_broadcast[dd->hfi1_id],
-					pcie_serdes_addrs[dd->hfi1_id],
-					NUM_PCIE_SERDES);
+				     pcie_serdes_broadcast[dd->hfi1_id],
+				     pcie_serdes_addrs[dd->hfi1_id],
+				     NUM_PCIE_SERDES);
 		do {
 			ret = load_pcie_serdes_firmware(dd, &fw_pcie);
 		} while (retry_firmware(dd, ret));
@@ -1724,9 +2041,9 @@
 {
 	/* Take the DC out of reset to get a valid GUID value */
 	write_csr(dd, CCE_DC_CTRL, 0);
-	(void) read_csr(dd, CCE_DC_CTRL);
+	(void)read_csr(dd, CCE_DC_CTRL);
 
 	dd->base_guid = read_csr(dd, DC_DC8051_CFG_LOCAL_GUID);
 	dd_dev_info(dd, "GUID %llx",
-		(unsigned long long)dd->base_guid);
+		    (unsigned long long)dd->base_guid);
 }

diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h
index d4826a9..16cbdc4 100644
--- a/drivers/staging/rdma/hfi1/hfi.h
+++ b/drivers/staging/rdma/hfi1/hfi.h

@@ -1,14 +1,13 @@
 #ifndef _HFI1_KERNEL_H
 #define _HFI1_KERNEL_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -65,6 +62,7 @@
 #include <linux/cdev.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
+#include <rdma/rdma_vt.h>
 
 #include "chip_registers.h"
 #include "common.h"
@@ -73,7 +71,8 @@
 #include "chip.h"
 #include "mad.h"
 #include "qsfp.h"
-#include "platform_config.h"
+#include "platform.h"
+#include "affinity.h"
 
 /* bumped 1 from s/w major version of TrueScale */
 #define HFI1_CHIP_VERS_MAJ 3U
@@ -98,6 +97,8 @@
 #define HFI1_CAP_IS_USET(cap) (!!HFI1_CAP_UGET(cap))
 #define HFI1_MISC_GET() ((hfi1_cap_mask >> HFI1_CAP_MISC_SHIFT) & \
 			HFI1_CAP_MISC_MASK)
+/* Offline Disabled Reason is 4-bits */
+#define HFI1_ODR_MASK(rsn) ((rsn) & OPA_PI_MASK_OFFLINE_REASON)
 
 /*
  * Control context is always 0 and handles the error packets.
@@ -177,6 +178,11 @@
 	} *rcvtids;
 };
 
+struct exp_tid_set {
+	struct list_head list;
+	u32 count;
+};
+
 struct hfi1_ctxtdata {
 	/* shadow the ctxt's RcvCtrl register */
 	u64 rcvctrl;
@@ -233,20 +239,13 @@
 	u32 expected_count;
 	/* index of first expected TID entry. */
 	u32 expected_base;
-	/* cursor into the exp group sets */
-	atomic_t tidcursor;
-	/* number of exp TID groups assigned to the ctxt */
-	u16 numtidgroups;
-	/* size of exp TID group fields in tidusemap */
-	u16 tidmapcnt;
-	/* exp TID group usage bitfield array */
-	unsigned long *tidusemap;
-	/* pinned pages for exp sends, allocated at open */
-	struct page **tid_pg_list;
-	/* dma handles for exp tid pages */
-	dma_addr_t *physshadow;
+
+	struct exp_tid_set tid_group_list;
+	struct exp_tid_set tid_used_list;
+	struct exp_tid_set tid_full_list;
+
 	/* lock protecting all Expected TID data */
-	spinlock_t exp_lock;
+	struct mutex exp_lock;
 	/* number of pio bufs for this ctxt (all procs, if shared) */
 	u32 piocnt;
 	/* first pio buffer for this ctxt */
@@ -311,8 +310,24 @@
 	 */
 	struct task_struct *progress;
 	struct list_head sdma_queues;
+	/* protect sdma queues */
 	spinlock_t sdma_qlock;
 
+	/* Is ASPM interrupt supported for this context */
+	bool aspm_intr_supported;
+	/* ASPM state (enabled/disabled) for this context */
+	bool aspm_enabled;
+	/* Timer for re-enabling ASPM if interrupt activity quietens down */
+	struct timer_list aspm_timer;
+	/* Lock to serialize between intr, timer intr and user threads */
+	spinlock_t aspm_lock;
+	/* Is ASPM processing enabled for this context (in intr context) */
+	bool aspm_intr_enable;
+	/* Last interrupt timestamp */
+	ktime_t aspm_ts_last_intr;
+	/* Last timestamp at which we scheduled a timer for this context */
+	ktime_t aspm_ts_timer_sched;
+
 	/*
 	 * The interrupt handler for a particular receive context can vary
 	 * throughout it's lifetime. This is not a lock protected data member so
@@ -335,7 +350,7 @@
 	void *hdr;
 	struct hfi1_ctxtdata *rcd;
 	__le32 *rhf_addr;
-	struct hfi1_qp *qp;
+	struct rvt_qp *qp;
 	struct hfi1_other_headers *ohdr;
 	u64 rhf;
 	u32 maxcnt;
@@ -363,6 +378,7 @@
 	int mode_flag;
 	struct cdev cdev;
 	struct device *class_dev;
+	/* protect snoop data */
 	spinlock_t snoop_lock;
 	struct list_head queue;
 	wait_queue_head_t waitq;
@@ -375,7 +391,7 @@
 #define HFI1_PORT_SNOOP_MODE     1U
 #define HFI1_PORT_CAPTURE_MODE   2U
 
-struct hfi1_sge_state;
+struct rvt_sge_state;
 
 /*
  * Get/Set IB link-level config parameters for f_get/set_ib_cfg()
@@ -424,17 +440,17 @@
 #define __HLS_GOING_OFFLINE_BP  9
 #define __HLS_LINK_COOLDOWN_BP 10
 
-#define HLS_UP_INIT	  (1 << __HLS_UP_INIT_BP)
-#define HLS_UP_ARMED	  (1 << __HLS_UP_ARMED_BP)
-#define HLS_UP_ACTIVE	  (1 << __HLS_UP_ACTIVE_BP)
-#define HLS_DN_DOWNDEF	  (1 << __HLS_DN_DOWNDEF_BP) /* link down default */
-#define HLS_DN_POLL	  (1 << __HLS_DN_POLL_BP)
-#define HLS_DN_DISABLE	  (1 << __HLS_DN_DISABLE_BP)
-#define HLS_DN_OFFLINE	  (1 << __HLS_DN_OFFLINE_BP)
-#define HLS_VERIFY_CAP	  (1 << __HLS_VERIFY_CAP_BP)
-#define HLS_GOING_UP	  (1 << __HLS_GOING_UP_BP)
-#define HLS_GOING_OFFLINE (1 << __HLS_GOING_OFFLINE_BP)
-#define HLS_LINK_COOLDOWN (1 << __HLS_LINK_COOLDOWN_BP)
+#define HLS_UP_INIT	  BIT(__HLS_UP_INIT_BP)
+#define HLS_UP_ARMED	  BIT(__HLS_UP_ARMED_BP)
+#define HLS_UP_ACTIVE	  BIT(__HLS_UP_ACTIVE_BP)
+#define HLS_DN_DOWNDEF	  BIT(__HLS_DN_DOWNDEF_BP) /* link down default */
+#define HLS_DN_POLL	  BIT(__HLS_DN_POLL_BP)
+#define HLS_DN_DISABLE	  BIT(__HLS_DN_DISABLE_BP)
+#define HLS_DN_OFFLINE	  BIT(__HLS_DN_OFFLINE_BP)
+#define HLS_VERIFY_CAP	  BIT(__HLS_VERIFY_CAP_BP)
+#define HLS_GOING_UP	  BIT(__HLS_GOING_UP_BP)
+#define HLS_GOING_OFFLINE BIT(__HLS_GOING_OFFLINE_BP)
+#define HLS_LINK_COOLDOWN BIT(__HLS_LINK_COOLDOWN_BP)
 
 #define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE)
 
@@ -490,6 +506,7 @@
 #define CNTR_DISABLED		0x2 /* Disable this counter */
 #define CNTR_32BIT		0x4 /* Simulate 64 bits for this counter */
 #define CNTR_VL			0x8 /* Per VL counter */
+#define CNTR_SDMA              0x10
 #define CNTR_INVALID_VL		-1  /* Specifies invalid VL */
 #define CNTR_MODE_W		0x0
 #define CNTR_MODE_R		0x1
@@ -512,10 +529,11 @@
 
 #define MAX_NAME_SIZE 64
 struct hfi1_msix_entry {
+	enum irq_type type;
 	struct msix_entry msix;
 	void *arg;
 	char name[MAX_NAME_SIZE];
-	cpumask_var_t mask;
+	cpumask_t mask;
 };
 
 /* per-SL CCA information */
@@ -542,6 +560,7 @@
 };
 
 struct vl_arb_cache {
+	/* protect vl arb cache */
 	spinlock_t lock;
 	struct ib_vl_weight_elem table[VL_ARB_TABLE_SIZE];
 };
@@ -561,7 +580,8 @@
 	struct kobject sl2sc_kobj;
 	struct kobject vl2mtu_kobj;
 
-	/* QSFP support */
+	/* PHY support */
+	u32 port_type;
 	struct qsfp_data qsfp_info;
 
 	/* GUID for this interface, in host order */
@@ -586,6 +606,7 @@
 	struct work_struct link_vc_work;
 	struct work_struct link_up_work;
 	struct work_struct link_down_work;
+	struct work_struct dc_host_req_work;
 	struct work_struct sma_message_work;
 	struct work_struct freeze_work;
 	struct work_struct link_downgrade_work;
@@ -623,6 +644,7 @@
 	u16 link_speed_active;
 	u8 vls_supported;
 	u8 vls_operational;
+	u8 actual_vls_operational;
 	/* LID mask control */
 	u8 lmc;
 	/* Rx Polarity inversion (compensate for ~tx on partner) */
@@ -642,19 +664,23 @@
 	u8 link_enabled;	/* link enabled? */
 	u8 linkinit_reason;
 	u8 local_tx_rate;	/* rate given to 8051 firmware */
+	u8 last_pstate;		/* info only */
 
 	/* placeholders for IB MAD packet settings */
 	u8 overrun_threshold;
 	u8 phy_error_threshold;
 
-	/* used to override LED behavior */
-	u8 led_override;  /* Substituted for normal value, if non-zero */
-	u16 led_override_timeoff; /* delta to next timer event */
-	u8 led_override_vals[2]; /* Alternates per blink-frame */
-	u8 led_override_phase; /* Just counts, LSB picks from vals[] */
+	/* Used to override LED behavior for things like maintenance beaconing*/
+	/*
+	 * Alternates per phase of blink
+	 * [0] holds LED off duration, [1] holds LED on duration
+	 */
+	unsigned long led_override_vals[2];
+	u8 led_override_phase; /* LSB picks from vals[] */
 	atomic_t led_override_timer_active;
 	/* Used to flash LEDs in override mode */
 	struct timer_list led_override_timer;
+
 	u32 sm_trap_qp;
 	u32 sa_qp;
 
@@ -689,10 +715,12 @@
 	/* CA's max number of 64 entry units in the congestion control table */
 	u8 cc_max_table_entries;
 
-	/* begin congestion log related entries
-	 * cc_log_lock protects all congestion log related data */
+	/*
+	 * begin congestion log related entries
+	 * cc_log_lock protects all congestion log related data
+	 */
 	spinlock_t cc_log_lock ____cacheline_aligned_in_smp;
-	u8 threshold_cong_event_map[OPA_MAX_SLS/8];
+	u8 threshold_cong_event_map[OPA_MAX_SLS / 8];
 	u16 threshold_event_counter;
 	struct opa_hfi1_cong_log_event_internal cc_events[OPA_CONG_LOG_ELEMS];
 	int cc_log_idx; /* index for logging events */
@@ -705,8 +733,9 @@
 	u64 *cntrs;
 	/* port relative synthetic counter buffer */
 	u64 *scntrs;
-	/* we synthesize port_xmit_discards from several egress errors */
+	/* port_xmit_discards are synthesized from different egress errors */
 	u64 port_xmit_discards;
+	u64 port_xmit_discards_vl[C_VL_COUNT];
 	u64 port_xmit_constraint_errors;
 	u64 port_rcv_constraint_errors;
 	/* count of 'link_err' interrupts from DC */
@@ -728,6 +757,9 @@
 	u8 remote_link_down_reason;
 	/* Error events that will cause a port bounce. */
 	u32 port_error_action;
+	struct work_struct linkstate_active_work;
+	/* Does this port need to prescan for FECNs */
+	bool cc_prescan;
 };
 
 typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
@@ -773,6 +805,12 @@
 	u8 triggers;      /* temperature triggers */
 };
 
+/* common data between shared ASIC HFIs */
+struct hfi1_asic_data {
+	struct hfi1_devdata *dds[2];	/* back pointers */
+	struct mutex asic_resource_mutex;
+};
+
 /* device data struct now contains only "general per-device" info.
  * fields related to a physical IB port are in a hfi1_pportdata struct.
  */
@@ -782,6 +820,7 @@
 #define BOARD_VERS_MAX 96 /* how long the version string can be */
 #define SERIAL_MAX 16 /* length of the serial number */
 
+typedef int (*send_routine)(struct rvt_qp *, struct hfi1_pkt_state *, u64);
 struct hfi1_devdata {
 	struct hfi1_ibdev verbs_dev;     /* must be first */
 	struct list_head list;
@@ -811,6 +850,12 @@
 	spinlock_t sc_lock;
 	/* Per VL data. Enough for all VLs but not all elements are set/used. */
 	struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
+	/* lock for pio_map */
+	spinlock_t pio_map_lock;
+	/* array of kernel send contexts */
+	struct send_context **kernel_send_context;
+	/* array of vl maps */
+	struct pio_vl_map __rcu *pio_map;
 	/* seqlock for sc2vl */
 	seqlock_t sc2vl_lock;
 	u64 sc2vl[4];
@@ -841,6 +886,8 @@
 	wait_queue_head_t		  sdma_unfreeze_wq;
 	atomic_t			  sdma_unfreeze_count;
 
+	/* common data between shared ASIC HFIs in this OS */
+	struct hfi1_asic_data *asic_data;
 
 	/* hfi1_pportdata, points to array of (physical) port-specific
 	 * data structs, indexed by pidx (0..n-1)
@@ -873,10 +920,11 @@
 	/* reset value */
 	u64 z_int_counter;
 	u64 z_rcv_limit;
+	u64 z_send_schedule;
 	/* percpu int_counter */
 	u64 __percpu *int_counter;
 	u64 __percpu *rcv_limit;
-
+	u64 __percpu *send_schedule;
 	/* number of receive contexts in use by the driver */
 	u32 num_rcv_contexts;
 	/* number of pio send contexts in use by the driver */
@@ -885,6 +933,8 @@
 	 * number of ctxts available for PSM open
 	 */
 	u32 freectxts;
+	/* total number of available user/PSM contexts */
+	u32 num_user_contexts;
 	/* base receive interrupt timeout, in CSR units */
 	u32 rcv_intr_timeout_csr;
 
@@ -996,9 +1046,8 @@
 	u16 irev;	/* implementation revision */
 	u16 dc8051_ver; /* 8051 firmware version */
 
+	struct platform_config platform_config;
 	struct platform_config_cache pcfg_cache;
-	/* control high-level access to qsfp */
-	struct mutex qsfp_i2c_mutex;
 
 	struct diag_client *diag_client;
 	spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
@@ -1008,8 +1057,6 @@
 	u16 psxmitwait_check_rate;
 	/* high volume overflow errors deferred to tasklet */
 	struct tasklet_struct error_tasklet;
-	/* per device cq worker */
-	struct kthread_worker *worker;
 
 	/* MSI-X information */
 	struct hfi1_msix_entry *msix_entries;
@@ -1090,10 +1137,8 @@
 	 * Handlers for outgoing data so that snoop/capture does not
 	 * have to have its hooks in the send path
 	 */
-	int (*process_pio_send)(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
-				u64 pbc);
-	int (*process_dma_send)(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
-				u64 pbc);
+	send_routine process_pio_send;
+	send_routine process_dma_send;
 	void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
 				u64 pbc, const void *from, size_t count);
 
@@ -1105,7 +1150,6 @@
 	struct timer_list rcverr_timer;
 	u32 rcv_ovfl_cnt;
 
-	int assigned_node_id;
 	wait_queue_head_t event_queue;
 
 	/* Save the enabled LCB error bits */
@@ -1115,6 +1159,16 @@
 	/* receive context tail dummy address */
 	__le64 *rcvhdrtail_dummy_kvaddr;
 	dma_addr_t rcvhdrtail_dummy_physaddr;
+
+	bool eprom_available;	/* true if EPROM is available for this device */
+	bool aspm_supported;	/* Does HW support ASPM */
+	bool aspm_enabled;	/* ASPM state: enabled/disabled */
+	/* Serialize ASPM enable/disable between multiple verbs contexts */
+	spinlock_t aspm_lock;
+	/* Number of verbs contexts which have disabled ASPM */
+	atomic_t aspm_disabled_cnt;
+
+	struct hfi1_affinity *affinity;
 };
 
 /* 8051 firmware version helper */
@@ -1125,6 +1179,9 @@
 #define PT_EAGER    1
 #define PT_INVALID  2
 
+struct tid_rb_node;
+struct mmu_rb_node;
+
 /* Private data for file operations */
 struct hfi1_filedata {
 	struct hfi1_ctxtdata *uctxt;
@@ -1133,6 +1190,16 @@
 	struct hfi1_user_sdma_pkt_q *pq;
 	/* for cpu affinity; -1 if none */
 	int rec_cpu_num;
+	u32 tid_n_pinned;
+	struct rb_root tid_rb_root;
+	struct tid_rb_node **entry_to_rb;
+	spinlock_t tid_lock; /* protect tid_[limit,used] counters */
+	u32 tid_limit;
+	u32 tid_used;
+	u32 *invalid_tids;
+	u32 invalid_tid_idx;
+	/* protect invalid_tids array and invalid_tid_idx */
+	spinlock_t invalid_lock;
 };
 
 extern struct list_head hfi1_dev_list;
@@ -1156,7 +1223,7 @@
 int hfi1_create_rcvhdrq(struct hfi1_devdata *, struct hfi1_ctxtdata *);
 int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *);
 int hfi1_create_ctxts(struct hfi1_devdata *dd);
-struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *, u32);
+struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *, u32, int);
 void hfi1_init_pportdata(struct pci_dev *, struct hfi1_pportdata *,
 			 struct hfi1_devdata *, u8, u8);
 void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *);
@@ -1164,6 +1231,7 @@
 int handle_receive_interrupt(struct hfi1_ctxtdata *, int);
 int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int);
 int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int);
+void set_all_slowpath(struct hfi1_devdata *dd);
 
 /* receive packet handler dispositions */
 #define RCV_PKT_OK      0x0 /* keep going */
@@ -1184,6 +1252,15 @@
 	return ppd->lstate; /* use the cached value */
 }
 
+void receive_interrupt_work(struct work_struct *work);
+
+/* extract service channel from header and rhf */
+static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf)
+{
+	return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) |
+	       ((!!(rhf & RHF_DC_INFO_MASK)) << 4);
+}
+
 static inline u16 generate_jkey(kuid_t uid)
 {
 	return from_kuid(current_user_ns(), uid) & 0xffff;
@@ -1253,7 +1330,7 @@
 void set_link_ipg(struct hfi1_pportdata *ppd);
 void process_becn(struct hfi1_pportdata *ppd, u8 sl,  u16 rlid, u32 lqpn,
 		  u32 rqpn, u8 svc_type);
-void return_cnp(struct hfi1_ibport *ibp, struct hfi1_qp *qp, u32 remote_qpn,
+void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
 		u32 pkey, u32 slid, u32 dlid, u8 sc5,
 		const struct ib_grh *old_grh);
 
@@ -1424,6 +1501,7 @@
 		mtu == 1024 || mtu == 2048 ||
 		mtu == 4096;
 }
+
 static inline int valid_opa_max_mtu(unsigned int mtu)
 {
 	return mtu >= 2048 &&
@@ -1445,12 +1523,13 @@
 void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu);
 
 int snoop_recv_handler(struct hfi1_packet *packet);
-int snoop_send_dma_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			   u64 pbc);
-int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			   u64 pbc);
 void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf,
 			   u64 pbc, const void *from, size_t count);
+int set_buffer_control(struct hfi1_pportdata *ppd, struct buffer_control *bc);
 
 static inline struct hfi1_devdata *dd_from_ppd(struct hfi1_pportdata *ppd)
 {
@@ -1472,6 +1551,11 @@
 	return container_of(ibp, struct hfi1_pportdata, ibport_data);
 }
 
+static inline struct hfi1_ibdev *dev_from_rdi(struct rvt_dev_info *rdi)
+{
+	return container_of(rdi, struct hfi1_ibdev, rdi);
+}
+
 static inline struct hfi1_ibport *to_iport(struct ib_device *ibdev, u8 port)
 {
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -1515,12 +1599,10 @@
 #define HFI1_HAS_SDMA_TIMEOUT  0x8
 #define HFI1_HAS_SEND_DMA      0x10   /* Supports Send DMA */
 #define HFI1_FORCED_FREEZE     0x80   /* driver forced freeze mode */
-#define HFI1_DO_INIT_ASIC      0x100  /* This device will init the ASIC */
 
 /* IB dword length mask in PBC (lower 11 bits); same for all chips */
 #define HFI1_PBC_LENGTH_MASK                     ((1 << 11) - 1)
 
-
 /* ctxt_flag bit offsets */
 		/* context has been setup */
 #define HFI1_CTXT_SETUP_DONE 1
@@ -1538,14 +1620,10 @@
 void cc_state_reclaim(struct rcu_head *rcu);
 struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
 
-/*
- * Set LED override, only the two LSBs have "public" meaning, but
- * any non-zero value substitutes them for the Link and LinkTrain
- * LED states.
- */
-#define HFI1_LED_PHYS 1 /* Physical (linktraining) GREEN LED */
-#define HFI1_LED_LOG 2  /* Logical (link) YELLOW LED */
-void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int val);
+/* LED beaconing functions */
+void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
+			     unsigned int timeoff);
+void shutdown_led_override(struct hfi1_pportdata *ppd);
 
 #define HFI1_CREDIT_RETURN_RATE (100)
 
@@ -1587,12 +1665,13 @@
  */
 #define DEFAULT_RCVHDR_ENTSIZE 32
 
+bool hfi1_can_pin_pages(struct hfi1_devdata *, u32, u32);
 int hfi1_acquire_user_pages(unsigned long, size_t, bool, struct page **);
-void hfi1_release_user_pages(struct page **, size_t, bool);
+void hfi1_release_user_pages(struct mm_struct *, struct page **, size_t, bool);
 
 static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
 {
-	*((u64 *) rcd->rcvhdrtail_kvaddr) = 0ULL;
+	*((u64 *)rcd->rcvhdrtail_kvaddr) = 0ULL;
 }
 
 static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
@@ -1601,7 +1680,7 @@
 	 * volatile because it's a DMA target from the chip, routine is
 	 * inlined, and don't want register caching or reordering.
 	 */
-	return (u32) le64_to_cpu(*rcd->rcvhdrtail_kvaddr);
+	return (u32)le64_to_cpu(*rcd->rcvhdrtail_kvaddr);
 }
 
 /*
@@ -1633,12 +1712,13 @@
 int do_pcie_gen3_transition(struct hfi1_devdata *dd);
 int parse_platform_config(struct hfi1_devdata *dd);
 int get_platform_config_field(struct hfi1_devdata *dd,
-			enum platform_config_table_type_encoding table_type,
-			int table_index, int field_index, u32 *data, u32 len);
+			      enum platform_config_table_type_encoding
+			      table_type, int table_index, int field_index,
+			      u32 *data, u32 len);
 
-dma_addr_t hfi1_map_page(struct pci_dev *, struct page *, unsigned long,
-			 size_t, int);
 const char *get_unit_name(int unit);
+const char *get_card_name(struct rvt_dev_info *rdi);
+struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi);
 
 /*
  * Flush write combining store buffers (if present) and perform a write
@@ -1659,7 +1739,7 @@
 
 extern rhf_rcv_function_ptr snoop_rhf_rcv_functions[8];
 
-void update_sge(struct hfi1_sge_state *ss, u32 length);
+void update_sge(struct rvt_sge_state *ss, u32 length);
 
 /* global module parameter variables */
 extern unsigned int hfi1_max_mtu;
@@ -1667,7 +1747,7 @@
 extern unsigned int user_credit_return_threshold;
 extern int num_user_contexts;
 extern unsigned n_krcvqs;
-extern u8 krcvqs[];
+extern uint krcvqs[];
 extern int krcvqsset;
 extern uint kdeth_qp;
 extern uint loopback;
@@ -1829,13 +1909,14 @@
 
 	dd->z_int_counter = get_all_cpu_total(dd->int_counter);
 	dd->z_rcv_limit = get_all_cpu_total(dd->rcv_limit);
+	dd->z_send_schedule = get_all_cpu_total(dd->send_schedule);
 
 	ppd = (struct hfi1_pportdata *)(dd + 1);
 	for (i = 0; i < dd->num_pports; i++, ppd++) {
-		ppd->ibport_data.z_rc_acks =
-			get_all_cpu_total(ppd->ibport_data.rc_acks);
-		ppd->ibport_data.z_rc_qacks =
-			get_all_cpu_total(ppd->ibport_data.rc_qacks);
+		ppd->ibport_data.rvp.z_rc_acks =
+			get_all_cpu_total(ppd->ibport_data.rvp.rc_acks);
+		ppd->ibport_data.rvp.z_rc_qacks =
+			get_all_cpu_total(ppd->ibport_data.rvp.rc_qacks);
 	}
 }
 
@@ -1848,6 +1929,18 @@
 		write_csr(dd, DCC_CFG_LED_CNTRL, 0x10);
 }
 
+/* return the i2c resource given the target */
+static inline u32 i2c_target(u32 target)
+{
+	return target ? CR_I2C2 : CR_I2C1;
+}
+
+/* return the i2c chain chip resource that this HFI uses for QSFP */
+static inline u32 qsfp_resource(struct hfi1_devdata *dd)
+{
+	return i2c_target(dd->hfi1_id);
+}
+
 int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
 
 #endif                          /* _HFI1_KERNEL_H */

diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c
index 02df291..cfcdc16 100644
--- a/drivers/staging/rdma/hfi1/init.c
+++ b/drivers/staging/rdma/hfi1/init.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -56,6 +53,7 @@
 #include <linux/module.h>
 #include <linux/printk.h>
 #include <linux/hrtimer.h>
+#include <rdma/rdma_vt.h>
 
 #include "hfi.h"
 #include "device.h"
@@ -65,6 +63,7 @@
 #include "sdma.h"
 #include "debugfs.h"
 #include "verbs.h"
+#include "aspm.h"
 
 #undef pr_fmt
 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -75,6 +74,7 @@
 #define HFI1_MIN_USER_CTXT_BUFCNT 7
 
 #define HFI1_MIN_HDRQ_EGRBUF_CNT 2
+#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352
 #define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
 #define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
 
@@ -87,9 +87,9 @@
 MODULE_PARM_DESC(
 	num_user_contexts, "Set max number of user contexts to use");
 
-u8 krcvqs[RXE_NUM_DATA_VL];
+uint krcvqs[RXE_NUM_DATA_VL];
 int krcvqsset;
-module_param_array(krcvqs, byte, &krcvqsset, S_IRUGO);
+module_param_array(krcvqs, uint, &krcvqsset, S_IRUGO);
 MODULE_PARM_DESC(krcvqs, "Array of the number of non-control kernel receive queues by VL");
 
 /* computed based on above array */
@@ -128,16 +128,12 @@
 {
 	unsigned i;
 	int ret;
-	int local_node_id = pcibus_to_node(dd->pcidev->bus);
 
 	/* Control context has to be always 0 */
 	BUILD_BUG_ON(HFI1_CTRL_CTXT != 0);
 
-	if (local_node_id < 0)
-		local_node_id = numa_node_id();
-	dd->assigned_node_id = local_node_id;
-
-	dd->rcd = kcalloc(dd->num_rcv_contexts, sizeof(*dd->rcd), GFP_KERNEL);
+	dd->rcd = kzalloc_node(dd->num_rcv_contexts * sizeof(*dd->rcd),
+			       GFP_KERNEL, dd->node);
 	if (!dd->rcd)
 		goto nomem;
 
@@ -147,10 +143,10 @@
 		struct hfi1_ctxtdata *rcd;
 
 		ppd = dd->pport + (i % dd->num_pports);
-		rcd = hfi1_create_ctxtdata(ppd, i);
+		rcd = hfi1_create_ctxtdata(ppd, i, dd->node);
 		if (!rcd) {
 			dd_dev_err(dd,
-				"Unable to allocate kernel receive context, failing\n");
+				   "Unable to allocate kernel receive context, failing\n");
 			goto nomem;
 		}
 		/*
@@ -171,7 +167,7 @@
 		rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node);
 		if (!rcd->sc) {
 			dd_dev_err(dd,
-				"Unable to allocate kernel send context, failing\n");
+				   "Unable to allocate kernel send context, failing\n");
 			dd->rcd[rcd->ctxt] = NULL;
 			hfi1_free_ctxtdata(dd, rcd);
 			goto nomem;
@@ -189,6 +185,12 @@
 		}
 	}
 
+	/*
+	 * Initialize aspm, to be done after gen3 transition and setting up
+	 * contexts and before enabling interrupts
+	 */
+	aspm_init(dd);
+
 	return 0;
 nomem:
 	ret = -ENOMEM;
@@ -201,7 +203,8 @@
 /*
  * Common code for user and kernel context setup.
  */
-struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt)
+struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
+					   int numa)
 {
 	struct hfi1_devdata *dd = ppd->dd;
 	struct hfi1_ctxtdata *rcd;
@@ -224,10 +227,10 @@
 		rcd->cnt = 1;
 		rcd->ctxt = ctxt;
 		dd->rcd[ctxt] = rcd;
-		rcd->numa_id = numa_node_id();
+		rcd->numa_id = numa;
 		rcd->rcv_array_groups = dd->rcv_entries.ngroups;
 
-		spin_lock_init(&rcd->exp_lock);
+		mutex_init(&rcd->exp_lock);
 
 		/*
 		 * Calculate the context's RcvArray entry starting point.
@@ -260,7 +263,7 @@
 		/* Validate and initialize Rcv Hdr Q variables */
 		if (rcvhdrcnt % HDRQ_INCREMENT) {
 			dd_dev_err(dd,
-				   "ctxt%u: header queue count %d must be divisible by %d\n",
+				   "ctxt%u: header queue count %d must be divisible by %lu\n",
 				   rcd->ctxt, rcvhdrcnt, HDRQ_INCREMENT);
 			goto bail;
 		}
@@ -379,7 +382,7 @@
 
 	cc_state = get_cc_state(ppd);
 
-	if (cc_state == NULL)
+	if (!cc_state)
 		/*
 		 * This should _never_ happen - rcu_read_lock() is held,
 		 * and set_link_ipg() should not be called if cc_state
@@ -431,7 +434,7 @@
 
 	cc_state = get_cc_state(ppd);
 
-	if (cc_state == NULL) {
+	if (!cc_state) {
 		rcu_read_unlock();
 		return HRTIMER_NORESTART;
 	}
@@ -493,14 +496,19 @@
 	INIT_WORK(&ppd->link_vc_work, handle_verify_cap);
 	INIT_WORK(&ppd->link_up_work, handle_link_up);
 	INIT_WORK(&ppd->link_down_work, handle_link_down);
+	INIT_WORK(&ppd->dc_host_req_work, handle_8051_request);
 	INIT_WORK(&ppd->freeze_work, handle_freeze);
 	INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade);
 	INIT_WORK(&ppd->sma_message_work, handle_sma_message);
 	INIT_WORK(&ppd->link_bounce_work, handle_link_bounce);
+	INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work);
+	INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
+
 	mutex_init(&ppd->hls_lock);
 	spin_lock_init(&ppd->sdma_alllock);
 	spin_lock_init(&ppd->qsfp_info.qsfp_lock);
 
+	ppd->qsfp_info.ppd = ppd;
 	ppd->sm_trap_qp = 0x0;
 	ppd->sa_qp = 0x1;
 
@@ -582,8 +590,8 @@
 	 * Enable kernel ctxts' receive and receive interrupt.
 	 * Other ctxts done as user opens and initializes them.
 	 */
-	rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB;
 	for (i = 0; i < dd->first_user_ctxt; ++i) {
+		rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB;
 		rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
 			HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
 		if (!HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, MULTI_PKT_EGR))
@@ -729,7 +737,7 @@
 			lastfail = hfi1_setup_eagerbufs(rcd);
 		if (lastfail)
 			dd_dev_err(dd,
-				"failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
+				   "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
 	}
 	if (lastfail)
 		ret = lastfail;
@@ -762,7 +770,6 @@
 	/* enable chip even if we have an error, so we can debug cause */
 	enable_chip(dd);
 
-	ret = hfi1_cq_init(dd);
 done:
 	/*
 	 * Set status even if port serdes is not initialized
@@ -779,20 +786,15 @@
 		for (pidx = 0; pidx < dd->num_pports; ++pidx) {
 			ppd = dd->pport + pidx;
 
-			/* initialize the qsfp if it exists
-			 * Requires interrupts to be enabled so we are notified
-			 * when the QSFP completes reset, and has
-			 * to be done before bringing up the SERDES
+			/*
+			 * start the serdes - must be after interrupts are
+			 * enabled so we are notified when the link goes up
 			 */
-			init_qsfp(ppd);
-
-			/* start the serdes - must be after interrupts are
-			   enabled so we are notified when the link goes up */
 			lastfail = bringup_serdes(ppd);
 			if (lastfail)
 				dd_dev_info(dd,
-					"Failed to bring up port %u\n",
-					ppd->port);
+					    "Failed to bring up port %u\n",
+					    ppd->port);
 
 			/*
 			 * Set status even if port serdes is not initialized
@@ -904,6 +906,8 @@
 		/* disable the send device */
 		pio_send_control(dd, PSC_GLOBAL_DISABLE);
 
+		shutdown_led_override(ppd);
+
 		/*
 		 * Clear SerdesEnable.
 		 * We can't count on interrupts since we are stopping.
@@ -961,17 +965,33 @@
 	kfree(rcd->egrbufs.buffers);
 
 	sc_free(rcd->sc);
-	vfree(rcd->physshadow);
-	vfree(rcd->tid_pg_list);
 	vfree(rcd->user_event_mask);
 	vfree(rcd->subctxt_uregbase);
 	vfree(rcd->subctxt_rcvegrbuf);
 	vfree(rcd->subctxt_rcvhdr_base);
-	kfree(rcd->tidusemap);
 	kfree(rcd->opstats);
 	kfree(rcd);
 }
 
+/*
+ * Release our hold on the shared asic data.  If we are the last one,
+ * free the structure.  Must be holding hfi1_devs_lock.
+ */
+static void release_asic_data(struct hfi1_devdata *dd)
+{
+	int other;
+
+	if (!dd->asic_data)
+		return;
+	dd->asic_data->dds[dd->hfi1_id] = NULL;
+	other = dd->hfi1_id ? 0 : 1;
+	if (!dd->asic_data->dds[other]) {
+		/* we are the last holder, free it */
+		kfree(dd->asic_data);
+	}
+	dd->asic_data = NULL;
+}
+
 void hfi1_free_devdata(struct hfi1_devdata *dd)
 {
 	unsigned long flags;
@@ -979,12 +999,15 @@
 	spin_lock_irqsave(&hfi1_devs_lock, flags);
 	idr_remove(&hfi1_unit_table, dd->unit);
 	list_del(&dd->list);
+	release_asic_data(dd);
 	spin_unlock_irqrestore(&hfi1_devs_lock, flags);
-	hfi1_dbg_ibdev_exit(&dd->verbs_dev);
+	free_platform_config(dd);
 	rcu_barrier(); /* wait for rcu callbacks to complete */
 	free_percpu(dd->int_counter);
 	free_percpu(dd->rcv_limit);
-	ib_dealloc_device(&dd->verbs_dev.ibdev);
+	hfi1_dev_affinity_free(dd);
+	free_percpu(dd->send_schedule);
+	ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
 }
 
 /*
@@ -999,19 +1022,19 @@
 {
 	unsigned long flags;
 	struct hfi1_devdata *dd;
-	int ret;
+	int ret, nports;
 
-	dd = (struct hfi1_devdata *)ib_alloc_device(sizeof(*dd) + extra);
+	/* extra is * number of ports */
+	nports = extra / sizeof(struct hfi1_pportdata);
+
+	dd = (struct hfi1_devdata *)rvt_alloc_device(sizeof(*dd) + extra,
+						     nports);
 	if (!dd)
 		return ERR_PTR(-ENOMEM);
-	/* extra is * number of ports */
-	dd->num_pports = extra / sizeof(struct hfi1_pportdata);
+	dd->num_pports = nports;
 	dd->pport = (struct hfi1_pportdata *)(dd + 1);
 
 	INIT_LIST_HEAD(&dd->list);
-	dd->node = dev_to_node(&pdev->dev);
-	if (dd->node < 0)
-		dd->node = 0;
 	idr_preload(GFP_KERNEL);
 	spin_lock_irqsave(&hfi1_devs_lock, flags);
 
@@ -1041,9 +1064,9 @@
 	spin_lock_init(&dd->sc_init_lock);
 	spin_lock_init(&dd->dc8051_lock);
 	spin_lock_init(&dd->dc8051_memlock);
-	mutex_init(&dd->qsfp_i2c_mutex);
 	seqlock_init(&dd->sc2vl_lock);
 	spin_lock_init(&dd->sde_map_lock);
+	spin_lock_init(&dd->pio_map_lock);
 	init_waitqueue_head(&dd->event_queue);
 
 	dd->int_counter = alloc_percpu(u64);
@@ -1062,6 +1085,14 @@
 		goto bail;
 	}
 
+	dd->send_schedule = alloc_percpu(u64);
+	if (!dd->send_schedule) {
+		ret = -ENOMEM;
+		hfi1_early_err(&pdev->dev,
+			       "Could not allocate per-cpu int_counter\n");
+		goto bail;
+	}
+
 	if (!hfi1_cpulist_count) {
 		u32 count = num_online_cpus();
 
@@ -1074,13 +1105,12 @@
 			&pdev->dev,
 			"Could not alloc cpulist info, cpu affinity might be wrong\n");
 	}
-	hfi1_dbg_ibdev_init(&dd->verbs_dev);
 	return dd;
 
 bail:
 	if (!list_empty(&dd->list))
 		list_del_init(&dd->list);
-	ib_dealloc_device(&dd->verbs_dev.ibdev);
+	ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
 	return ERR_PTR(ret);
 }
 
@@ -1173,8 +1203,10 @@
 		user_credit_return_threshold = 100;
 
 	compute_krcvqs();
-	/* sanitize receive interrupt count, time must wait until after
-	   the hardware type is known */
+	/*
+	 * sanitize receive interrupt count, time must wait until after
+	 * the hardware type is known
+	 */
 	if (rcv_intr_count > RCV_HDR_HEAD_COUNTER_MASK)
 		rcv_intr_count = RCV_HDR_HEAD_COUNTER_MASK;
 	/* reject invalid combinations */
@@ -1209,6 +1241,9 @@
 	idr_init(&hfi1_unit_table);
 
 	hfi1_dbg_init();
+	ret = hfi1_wss_init();
+	if (ret < 0)
+		goto bail_wss;
 	ret = pci_register_driver(&hfi1_pci_driver);
 	if (ret < 0) {
 		pr_err("Unable to register driver: error %d\n", -ret);
@@ -1217,6 +1252,8 @@
 	goto bail; /* all OK */
 
 bail_dev:
+	hfi1_wss_exit();
+bail_wss:
 	hfi1_dbg_exit();
 	idr_destroy(&hfi1_unit_table);
 	dev_cleanup();
@@ -1232,6 +1269,7 @@
 static void __exit hfi1_mod_cleanup(void)
 {
 	pci_unregister_driver(&hfi1_pci_driver);
+	hfi1_wss_exit();
 	hfi1_dbg_exit();
 	hfi1_cpulist_count = 0;
 	kfree(hfi1_cpulist);
@@ -1303,16 +1341,18 @@
 		}
 	}
 	kfree(tmp);
+	free_pio_map(dd);
 	/* must follow rcv context free - need to remove rcv's hooks */
 	for (ctxt = 0; ctxt < dd->num_send_contexts; ctxt++)
 		sc_free(dd->send_contexts[ctxt].sc);
 	dd->num_send_contexts = 0;
 	kfree(dd->send_contexts);
 	dd->send_contexts = NULL;
+	kfree(dd->hw_to_sw);
+	dd->hw_to_sw = NULL;
 	kfree(dd->boardname);
 	vfree(dd->events);
 	vfree(dd->status);
-	hfi1_cq_exit(dd);
 }
 
 /*
@@ -1346,6 +1386,13 @@
 		ret = -EINVAL;
 		goto bail;
 	}
+	if (rcvhdrcnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
+		hfi1_early_err(&pdev->dev,
+			       "Receive header queue count cannot be greater than %u\n",
+			       HFI1_MAX_HDRQ_EGRBUF_CNT);
+		ret = -EINVAL;
+		goto bail;
+	}
 	/* use the encoding function as a sanitization check */
 	if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
 		hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n",
@@ -1422,8 +1469,11 @@
 	 * we still create devices, so diags, etc. can be used
 	 * to determine cause of problem.
 	 */
-	if (!initfail && !ret)
+	if (!initfail && !ret) {
 		dd->flags |= HFI1_INITTED;
+		/* create debufs files after init and ib register */
+		hfi1_dbg_ibdev_init(&dd->verbs_dev);
+	}
 
 	j = hfi1_device_create(dd);
 	if (j)
@@ -1464,6 +1514,8 @@
 {
 	struct hfi1_devdata *dd = pci_get_drvdata(pdev);
 
+	/* close debugfs files before ib unregister */
+	hfi1_dbg_ibdev_exit(&dd->verbs_dev);
 	/* unregister from IB core */
 	hfi1_unregister_ib_device(dd);
 
@@ -1516,18 +1568,11 @@
 
 		if (!rcd->rcvhdrq) {
 			dd_dev_err(dd,
-				"attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n",
-				amt, rcd->ctxt);
+				   "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n",
+				   amt, rcd->ctxt);
 			goto bail;
 		}
 
-		/* Event mask is per device now and is in hfi1_devdata */
-		/*if (rcd->ctxt >= dd->first_user_ctxt) {
-			rcd->user_event_mask = vmalloc_user(PAGE_SIZE);
-			if (!rcd->user_event_mask)
-				goto bail_free_hdrq;
-				}*/
-
 		if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
 			rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent(
 				&dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
@@ -1568,8 +1613,8 @@
 
 bail_free:
 	dd_dev_err(dd,
-		"attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
-		rcd->ctxt);
+		   "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
+		   rcd->ctxt);
 	vfree(rcd->user_event_mask);
 	rcd->user_event_mask = NULL;
 	dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq,
@@ -1659,7 +1704,7 @@
 			if (rcd->egrbufs.rcvtid_size == round_mtu ||
 			    !HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) {
 				dd_dev_err(dd, "ctxt%u: Failed to allocate eager buffers\n",
-					rcd->ctxt);
+					   rcd->ctxt);
 				goto bail_rcvegrbuf_phys;
 			}
 
@@ -1694,8 +1739,9 @@
 				     rcd->egrbufs.buffers[j].len)) {
 					j++;
 					offset = 0;
-				} else
+				} else {
 					offset += new_size;
+				}
 			}
 			rcd->egrbufs.rcvtid_size = new_size;
 		}
@@ -1708,7 +1754,6 @@
 		  rcd->ctxt, rcd->egrbufs.alloced, rcd->egrbufs.rcvtid_size,
 		  rcd->egrbufs.size);
 
-
 	/*
 	 * Set the contexts rcv array head update threshold to the closest
 	 * power of 2 (so we can use a mask instead of modulo) below half
@@ -1742,14 +1787,14 @@
 
 	for (idx = 0; idx < rcd->egrbufs.alloced; idx++) {
 		hfi1_put_tid(dd, rcd->eager_base + idx, PT_EAGER,
-			      rcd->egrbufs.rcvtids[idx].phys, order);
+			     rcd->egrbufs.rcvtids[idx].phys, order);
 		cond_resched();
 	}
 	goto bail;
 
 bail_rcvegrbuf_phys:
 	for (idx = 0; idx < rcd->egrbufs.alloced &&
-		     rcd->egrbufs.buffers[idx].addr;
+	     rcd->egrbufs.buffers[idx].addr;
 	     idx++) {
 		dma_free_coherent(&dd->pcidev->dev,
 				  rcd->egrbufs.buffers[idx].len,

diff --git a/drivers/staging/rdma/hfi1/intr.c b/drivers/staging/rdma/hfi1/intr.c
index 426582b..65348d1 100644
--- a/drivers/staging/rdma/hfi1/intr.c
+++ b/drivers/staging/rdma/hfi1/intr.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -98,7 +95,7 @@
 	 */
 	if (!(dd->flags & HFI1_INITTED))
 		return;
-	event.device = &dd->verbs_dev.ibdev;
+	event.device = &dd->verbs_dev.rdi.ibdev;
 	event.element.port_num = ppd->port;
 	event.event = ev;
 	ib_dispatch_event(&event);
@@ -131,28 +128,26 @@
 		 * NOTE: This uses this device's vAU, vCU, and vl15_init for
 		 * the remote values.  Both sides must be using the values.
 		 */
-		if (quick_linkup
-			    || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
+		if (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
 			set_up_vl15(dd, dd->vau, dd->vl15_init);
 			assign_remote_cm_au_table(dd, dd->vcu);
 			ppd->neighbor_guid =
-				read_csr(dd,
-					DC_DC8051_STS_REMOTE_GUID);
+				read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
 			ppd->neighbor_type =
 				read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
 					DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
 			ppd->neighbor_port_number =
 				read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
-					DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
-			dd_dev_info(dd,
-				"Neighbor GUID: %llx Neighbor type %d\n",
-				ppd->neighbor_guid,
-				ppd->neighbor_type);
+					 DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
+			dd_dev_info(dd, "Neighbor GUID: %llx Neighbor type %d\n",
+				    ppd->neighbor_guid,
+				    ppd->neighbor_type);
 		}
 
 		/* physical link went up */
 		ppd->linkup = 1;
-		ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE;
+		ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE);
 
 		/* link widths are not available until the link is fully up */
 		get_linkup_link_widths(ppd);
@@ -165,7 +160,7 @@
 		reset_link_credits(dd);
 
 		/* freeze after a link down to guarantee a clean egress */
-		start_freeze_handling(ppd, FREEZE_SELF|FREEZE_LINK_DOWN);
+		start_freeze_handling(ppd, FREEZE_SELF | FREEZE_LINK_DOWN);
 
 		ev = IB_EVENT_PORT_ERR;
 
@@ -177,8 +172,6 @@
 		/* notify IB of the link change */
 		signal_ib_event(ppd, ev);
 	}
-
-
 }
 
 /*

diff --git a/drivers/staging/rdma/hfi1/iowait.h b/drivers/staging/rdma/hfi1/iowait.h
index e8ba560..2ec6ef3 100644
--- a/drivers/staging/rdma/hfi1/iowait.h
+++ b/drivers/staging/rdma/hfi1/iowait.h

@@ -1,14 +1,13 @@
 #ifndef _HFI1_IOWAIT_H
 #define _HFI1_IOWAIT_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -54,6 +51,8 @@
 #include <linux/workqueue.h>
 #include <linux/sched.h>
 
+#include "sdma_txreq.h"
+
 /*
  * typedef (*restart_t)() - restart callback
  * @work: pointer to work structure
@@ -67,9 +66,11 @@
  * @list: used to add/insert into QP/PQ wait lists
  * @tx_head: overflow list of sdma_txreq's
  * @sleep: no space callback
- * @wakeup: space callback
+ * @wakeup: space callback wakeup
+ * @sdma_drained: sdma count drained
  * @iowork: workqueue overhead
  * @wait_dma: wait for sdma_busy == 0
+ * @wait_pio: wait for pio_busy == 0
  * @sdma_busy: # of packets in flight
  * @count: total number of descriptors in tx_head'ed list
  * @tx_limit: limit for overflow queuing
@@ -101,9 +102,12 @@
 		struct sdma_txreq *tx,
 		unsigned seq);
 	void (*wakeup)(struct iowait *wait, int reason);
+	void (*sdma_drained)(struct iowait *wait);
 	struct work_struct iowork;
 	wait_queue_head_t wait_dma;
+	wait_queue_head_t wait_pio;
 	atomic_t sdma_busy;
+	atomic_t pio_busy;
 	u32 count;
 	u32 tx_limit;
 	u32 tx_count;
@@ -117,7 +121,7 @@
  * @tx_limit: limit for overflow queuing
  * @func: restart function for workqueue
  * @sleep: sleep function for no space
- * @wakeup: wakeup function for no space
+ * @resume: wakeup function for no space
  *
  * This function initializes the iowait
  * structure embedded in the QP or PQ.
@@ -133,17 +137,21 @@
 		struct iowait *wait,
 		struct sdma_txreq *tx,
 		unsigned seq),
-	void (*wakeup)(struct iowait *wait, int reason))
+	void (*wakeup)(struct iowait *wait, int reason),
+	void (*sdma_drained)(struct iowait *wait))
 {
 	wait->count = 0;
 	INIT_LIST_HEAD(&wait->list);
 	INIT_LIST_HEAD(&wait->tx_head);
 	INIT_WORK(&wait->iowork, func);
 	init_waitqueue_head(&wait->wait_dma);
+	init_waitqueue_head(&wait->wait_pio);
 	atomic_set(&wait->sdma_busy, 0);
+	atomic_set(&wait->pio_busy, 0);
 	wait->tx_limit = tx_limit;
 	wait->sleep = sleep;
 	wait->wakeup = wakeup;
+	wait->sdma_drained = sdma_drained;
 }
 
 /**
@@ -174,6 +182,88 @@
 }
 
 /**
+ * iowait_sdma_pending() - return sdma pending count
+ *
+ * @wait: iowait structure
+ *
+ */
+static inline int iowait_sdma_pending(struct iowait *wait)
+{
+	return atomic_read(&wait->sdma_busy);
+}
+
+/**
+ * iowait_sdma_inc - note sdma io pending
+ * @wait: iowait structure
+ */
+static inline void iowait_sdma_inc(struct iowait *wait)
+{
+	atomic_inc(&wait->sdma_busy);
+}
+
+/**
+ * iowait_sdma_add - add count to pending
+ * @wait: iowait structure
+ */
+static inline void iowait_sdma_add(struct iowait *wait, int count)
+{
+	atomic_add(count, &wait->sdma_busy);
+}
+
+/**
+ * iowait_sdma_dec - note sdma complete
+ * @wait: iowait structure
+ */
+static inline int iowait_sdma_dec(struct iowait *wait)
+{
+	return atomic_dec_and_test(&wait->sdma_busy);
+}
+
+/**
+ * iowait_pio_drain() - wait for pios to drain
+ *
+ * @wait: iowait structure
+ *
+ * This will delay until the iowait pios have
+ * completed.
+ */
+static inline void iowait_pio_drain(struct iowait *wait)
+{
+	wait_event_timeout(wait->wait_pio,
+			   !atomic_read(&wait->pio_busy),
+			   HZ);
+}
+
+/**
+ * iowait_pio_pending() - return pio pending count
+ *
+ * @wait: iowait structure
+ *
+ */
+static inline int iowait_pio_pending(struct iowait *wait)
+{
+	return atomic_read(&wait->pio_busy);
+}
+
+/**
+ * iowait_pio_inc - note pio pending
+ * @wait: iowait structure
+ */
+static inline void iowait_pio_inc(struct iowait *wait)
+{
+	atomic_inc(&wait->pio_busy);
+}
+
+/**
+ * iowait_sdma_dec - note pio complete
+ * @wait: iowait structure
+ */
+static inline int iowait_pio_dec(struct iowait *wait)
+{
+	return atomic_dec_and_test(&wait->pio_busy);
+}
+
+/**
  * iowait_drain_wakeup() - trigger iowait_drain() waiter
  *
  * @wait: iowait structure
@@ -183,6 +273,28 @@
 static inline void iowait_drain_wakeup(struct iowait *wait)
 {
 	wake_up(&wait->wait_dma);
+	wake_up(&wait->wait_pio);
+	if (wait->sdma_drained)
+		wait->sdma_drained(wait);
+}
+
+/**
+ * iowait_get_txhead() - get packet off of iowait list
+ *
+ * @wait wait struture
+ */
+static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
+{
+	struct sdma_txreq *tx = NULL;
+
+	if (!list_empty(&wait->tx_head)) {
+		tx = list_first_entry(
+			&wait->tx_head,
+			struct sdma_txreq,
+			list);
+		list_del_init(&tx->list);
+	}
+	return tx;
 }
 
 #endif

diff --git a/drivers/staging/rdma/hfi1/keys.c b/drivers/staging/rdma/hfi1/keys.c
deleted file mode 100644
index e34f093..0000000
--- a/drivers/staging/rdma/hfi1/keys.c
+++ /dev/null

@@ -1,356 +0,0 @@
-/*
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include "hfi.h"
-
-/**
- * hfi1_alloc_lkey - allocate an lkey
- * @mr: memory region that this lkey protects
- * @dma_region: 0->normal key, 1->restricted DMA key
- *
- * Returns 0 if successful, otherwise returns -errno.
- *
- * Increments mr reference count as required.
- *
- * Sets the lkey field mr for non-dma regions.
- *
- */
-
-int hfi1_alloc_lkey(struct hfi1_mregion *mr, int dma_region)
-{
-	unsigned long flags;
-	u32 r;
-	u32 n;
-	int ret = 0;
-	struct hfi1_ibdev *dev = to_idev(mr->pd->device);
-	struct hfi1_lkey_table *rkt = &dev->lk_table;
-
-	hfi1_get_mr(mr);
-	spin_lock_irqsave(&rkt->lock, flags);
-
-	/* special case for dma_mr lkey == 0 */
-	if (dma_region) {
-		struct hfi1_mregion *tmr;
-
-		tmr = rcu_access_pointer(dev->dma_mr);
-		if (!tmr) {
-			rcu_assign_pointer(dev->dma_mr, mr);
-			mr->lkey_published = 1;
-		} else {
-			hfi1_put_mr(mr);
-		}
-		goto success;
-	}
-
-	/* Find the next available LKEY */
-	r = rkt->next;
-	n = r;
-	for (;;) {
-		if (!rcu_access_pointer(rkt->table[r]))
-			break;
-		r = (r + 1) & (rkt->max - 1);
-		if (r == n)
-			goto bail;
-	}
-	rkt->next = (r + 1) & (rkt->max - 1);
-	/*
-	 * Make sure lkey is never zero which is reserved to indicate an
-	 * unrestricted LKEY.
-	 */
-	rkt->gen++;
-	/*
-	 * bits are capped in verbs.c to ensure enough bits for
-	 * generation number
-	 */
-	mr->lkey = (r << (32 - hfi1_lkey_table_size)) |
-		((((1 << (24 - hfi1_lkey_table_size)) - 1) & rkt->gen)
-		 << 8);
-	if (mr->lkey == 0) {
-		mr->lkey = 1 << 8;
-		rkt->gen++;
-	}
-	rcu_assign_pointer(rkt->table[r], mr);
-	mr->lkey_published = 1;
-success:
-	spin_unlock_irqrestore(&rkt->lock, flags);
-out:
-	return ret;
-bail:
-	hfi1_put_mr(mr);
-	spin_unlock_irqrestore(&rkt->lock, flags);
-	ret = -ENOMEM;
-	goto out;
-}
-
-/**
- * hfi1_free_lkey - free an lkey
- * @mr: mr to free from tables
- */
-void hfi1_free_lkey(struct hfi1_mregion *mr)
-{
-	unsigned long flags;
-	u32 lkey = mr->lkey;
-	u32 r;
-	struct hfi1_ibdev *dev = to_idev(mr->pd->device);
-	struct hfi1_lkey_table *rkt = &dev->lk_table;
-	int freed = 0;
-
-	spin_lock_irqsave(&rkt->lock, flags);
-	if (!mr->lkey_published)
-		goto out;
-	if (lkey == 0)
-		RCU_INIT_POINTER(dev->dma_mr, NULL);
-	else {
-		r = lkey >> (32 - hfi1_lkey_table_size);
-		RCU_INIT_POINTER(rkt->table[r], NULL);
-	}
-	mr->lkey_published = 0;
-	freed++;
-out:
-	spin_unlock_irqrestore(&rkt->lock, flags);
-	if (freed) {
-		synchronize_rcu();
-		hfi1_put_mr(mr);
-	}
-}
-
-/**
- * hfi1_lkey_ok - check IB SGE for validity and initialize
- * @rkt: table containing lkey to check SGE against
- * @pd: protection domain
- * @isge: outgoing internal SGE
- * @sge: SGE to check
- * @acc: access flags
- *
- * Return 1 if valid and successful, otherwise returns 0.
- *
- * increments the reference count upon success
- *
- * Check the IB SGE for validity and initialize our internal version
- * of it.
- */
-int hfi1_lkey_ok(struct hfi1_lkey_table *rkt, struct hfi1_pd *pd,
-		 struct hfi1_sge *isge, struct ib_sge *sge, int acc)
-{
-	struct hfi1_mregion *mr;
-	unsigned n, m;
-	size_t off;
-
-	/*
-	 * We use LKEY == zero for kernel virtual addresses
-	 * (see hfi1_get_dma_mr and dma.c).
-	 */
-	rcu_read_lock();
-	if (sge->lkey == 0) {
-		struct hfi1_ibdev *dev = to_idev(pd->ibpd.device);
-
-		if (pd->user)
-			goto bail;
-		mr = rcu_dereference(dev->dma_mr);
-		if (!mr)
-			goto bail;
-		atomic_inc(&mr->refcount);
-		rcu_read_unlock();
-
-		isge->mr = mr;
-		isge->vaddr = (void *) sge->addr;
-		isge->length = sge->length;
-		isge->sge_length = sge->length;
-		isge->m = 0;
-		isge->n = 0;
-		goto ok;
-	}
-	mr = rcu_dereference(
-		rkt->table[(sge->lkey >> (32 - hfi1_lkey_table_size))]);
-	if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
-		goto bail;
-
-	off = sge->addr - mr->user_base;
-	if (unlikely(sge->addr < mr->user_base ||
-		     off + sge->length > mr->length ||
-		     (mr->access_flags & acc) != acc))
-		goto bail;
-	atomic_inc(&mr->refcount);
-	rcu_read_unlock();
-
-	off += mr->offset;
-	if (mr->page_shift) {
-		/*
-		page sizes are uniform power of 2 so no loop is necessary
-		entries_spanned_by_off is the number of times the loop below
-		would have executed.
-		*/
-		size_t entries_spanned_by_off;
-
-		entries_spanned_by_off = off >> mr->page_shift;
-		off -= (entries_spanned_by_off << mr->page_shift);
-		m = entries_spanned_by_off / HFI1_SEGSZ;
-		n = entries_spanned_by_off % HFI1_SEGSZ;
-	} else {
-		m = 0;
-		n = 0;
-		while (off >= mr->map[m]->segs[n].length) {
-			off -= mr->map[m]->segs[n].length;
-			n++;
-			if (n >= HFI1_SEGSZ) {
-				m++;
-				n = 0;
-			}
-		}
-	}
-	isge->mr = mr;
-	isge->vaddr = mr->map[m]->segs[n].vaddr + off;
-	isge->length = mr->map[m]->segs[n].length - off;
-	isge->sge_length = sge->length;
-	isge->m = m;
-	isge->n = n;
-ok:
-	return 1;
-bail:
-	rcu_read_unlock();
-	return 0;
-}
-
-/**
- * hfi1_rkey_ok - check the IB virtual address, length, and RKEY
- * @qp: qp for validation
- * @sge: SGE state
- * @len: length of data
- * @vaddr: virtual address to place data
- * @rkey: rkey to check
- * @acc: access flags
- *
- * Return 1 if successful, otherwise 0.
- *
- * increments the reference count upon success
- */
-int hfi1_rkey_ok(struct hfi1_qp *qp, struct hfi1_sge *sge,
-		 u32 len, u64 vaddr, u32 rkey, int acc)
-{
-	struct hfi1_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
-	struct hfi1_mregion *mr;
-	unsigned n, m;
-	size_t off;
-
-	/*
-	 * We use RKEY == zero for kernel virtual addresses
-	 * (see hfi1_get_dma_mr and dma.c).
-	 */
-	rcu_read_lock();
-	if (rkey == 0) {
-		struct hfi1_pd *pd = to_ipd(qp->ibqp.pd);
-		struct hfi1_ibdev *dev = to_idev(pd->ibpd.device);
-
-		if (pd->user)
-			goto bail;
-		mr = rcu_dereference(dev->dma_mr);
-		if (!mr)
-			goto bail;
-		atomic_inc(&mr->refcount);
-		rcu_read_unlock();
-
-		sge->mr = mr;
-		sge->vaddr = (void *) vaddr;
-		sge->length = len;
-		sge->sge_length = len;
-		sge->m = 0;
-		sge->n = 0;
-		goto ok;
-	}
-
-	mr = rcu_dereference(
-		rkt->table[(rkey >> (32 - hfi1_lkey_table_size))]);
-	if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
-		goto bail;
-
-	off = vaddr - mr->iova;
-	if (unlikely(vaddr < mr->iova || off + len > mr->length ||
-		     (mr->access_flags & acc) == 0))
-		goto bail;
-	atomic_inc(&mr->refcount);
-	rcu_read_unlock();
-
-	off += mr->offset;
-	if (mr->page_shift) {
-		/*
-		page sizes are uniform power of 2 so no loop is necessary
-		entries_spanned_by_off is the number of times the loop below
-		would have executed.
-		*/
-		size_t entries_spanned_by_off;
-
-		entries_spanned_by_off = off >> mr->page_shift;
-		off -= (entries_spanned_by_off << mr->page_shift);
-		m = entries_spanned_by_off / HFI1_SEGSZ;
-		n = entries_spanned_by_off % HFI1_SEGSZ;
-	} else {
-		m = 0;
-		n = 0;
-		while (off >= mr->map[m]->segs[n].length) {
-			off -= mr->map[m]->segs[n].length;
-			n++;
-			if (n >= HFI1_SEGSZ) {
-				m++;
-				n = 0;
-			}
-		}
-	}
-	sge->mr = mr;
-	sge->vaddr = mr->map[m]->segs[n].vaddr + off;
-	sge->length = mr->map[m]->segs[n].length - off;
-	sge->sge_length = len;
-	sge->m = m;
-	sge->n = n;
-ok:
-	return 1;
-bail:
-	rcu_read_unlock();
-	return 0;
-}

diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c
index 77700b8..d1e7f4d 100644
--- a/drivers/staging/rdma/hfi1/mad.c
+++ b/drivers/staging/rdma/hfi1/mad.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -55,6 +52,7 @@
 #include "hfi.h"
 #include "mad.h"
 #include "trace.h"
+#include "qp.h"
 
 /* the reset value from the FM is supposed to be 0xffff, handle both */
 #define OPA_LINK_WIDTH_RESET_OLD 0x0fff
@@ -91,7 +89,7 @@
 	int pkey_idx;
 	u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp;
 
-	agent = ibp->send_agent;
+	agent = ibp->rvp.send_agent;
 	if (!agent)
 		return;
 
@@ -100,7 +98,8 @@
 		return;
 
 	/* o14-2 */
-	if (ibp->trap_timeout && time_before(jiffies, ibp->trap_timeout))
+	if (ibp->rvp.trap_timeout && time_before(jiffies,
+						 ibp->rvp.trap_timeout))
 		return;
 
 	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
@@ -121,42 +120,43 @@
 	smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
 	smp->class_version = OPA_SMI_CLASS_VERSION;
 	smp->method = IB_MGMT_METHOD_TRAP;
-	ibp->tid++;
-	smp->tid = cpu_to_be64(ibp->tid);
+	ibp->rvp.tid++;
+	smp->tid = cpu_to_be64(ibp->rvp.tid);
 	smp->attr_id = IB_SMP_ATTR_NOTICE;
 	/* o14-1: smp->mkey = 0; */
 	memcpy(smp->route.lid.data, data, len);
 
-	spin_lock_irqsave(&ibp->lock, flags);
-	if (!ibp->sm_ah) {
-		if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
+	if (!ibp->rvp.sm_ah) {
+		if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
 			struct ib_ah *ah;
 
-			ah = hfi1_create_qp0_ah(ibp, ibp->sm_lid);
-			if (IS_ERR(ah))
+			ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid);
+			if (IS_ERR(ah)) {
 				ret = PTR_ERR(ah);
-			else {
+			} else {
 				send_buf->ah = ah;
-				ibp->sm_ah = to_iah(ah);
+				ibp->rvp.sm_ah = ibah_to_rvtah(ah);
 				ret = 0;
 			}
-		} else
+		} else {
 			ret = -EINVAL;
+		}
 	} else {
-		send_buf->ah = &ibp->sm_ah->ibah;
+		send_buf->ah = &ibp->rvp.sm_ah->ibah;
 		ret = 0;
 	}
-	spin_unlock_irqrestore(&ibp->lock, flags);
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 
 	if (!ret)
 		ret = ib_post_send_mad(send_buf, NULL);
 	if (!ret) {
 		/* 4.096 usec. */
-		timeout = (4096 * (1UL << ibp->subnet_timeout)) / 1000;
-		ibp->trap_timeout = jiffies + usecs_to_jiffies(timeout);
+		timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000;
+		ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout);
 	} else {
 		ib_free_send_mad(send_buf);
-		ibp->trap_timeout = 0;
+		ibp->rvp.trap_timeout = 0;
 	}
 }
 
@@ -174,10 +174,10 @@
 	memset(&data, 0, sizeof(data));
 
 	if (trap_num == OPA_TRAP_BAD_P_KEY)
-		ibp->pkey_violations++;
+		ibp->rvp.pkey_violations++;
 	else
-		ibp->qkey_violations++;
-	ibp->n_pkt_drops++;
+		ibp->rvp.qkey_violations++;
+	ibp->rvp.n_pkt_drops++;
 
 	/* Send violation trap */
 	data.generic_type = IB_NOTICE_TYPE_SECURITY;
@@ -233,9 +233,12 @@
 /*
  * Send a Port Capability Mask Changed trap (ch. 14.3.11).
  */
-void hfi1_cap_mask_chg(struct hfi1_ibport *ibp)
+void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
 {
 	struct opa_mad_notice_attr data;
+	struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
+	struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
+	struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
 	u32 lid = ppd_from_ibp(ibp)->lid;
 
 	memset(&data, 0, sizeof(data));
@@ -245,7 +248,7 @@
 	data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
 	data.issuer_lid = cpu_to_be32(lid);
 	data.ntc_144.lid = data.issuer_lid;
-	data.ntc_144.new_cap_mask = cpu_to_be32(ibp->port_cap_flags);
+	data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
 
 	send_trap(ibp, &data, sizeof(data));
 }
@@ -407,37 +410,38 @@
 	int ret = 0;
 
 	/* Is the mkey in the process of expiring? */
-	if (ibp->mkey_lease_timeout &&
-	    time_after_eq(jiffies, ibp->mkey_lease_timeout)) {
+	if (ibp->rvp.mkey_lease_timeout &&
+	    time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
 		/* Clear timeout and mkey protection field. */
-		ibp->mkey_lease_timeout = 0;
-		ibp->mkeyprot = 0;
+		ibp->rvp.mkey_lease_timeout = 0;
+		ibp->rvp.mkeyprot = 0;
 	}
 
-	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->mkey == 0 ||
-	    ibp->mkey == mkey)
+	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->rvp.mkey == 0 ||
+	    ibp->rvp.mkey == mkey)
 		valid_mkey = 1;
 
 	/* Unset lease timeout on any valid Get/Set/TrapRepress */
-	if (valid_mkey && ibp->mkey_lease_timeout &&
+	if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
 	    (mad->method == IB_MGMT_METHOD_GET ||
 	     mad->method == IB_MGMT_METHOD_SET ||
 	     mad->method == IB_MGMT_METHOD_TRAP_REPRESS))
-		ibp->mkey_lease_timeout = 0;
+		ibp->rvp.mkey_lease_timeout = 0;
 
 	if (!valid_mkey) {
 		switch (mad->method) {
 		case IB_MGMT_METHOD_GET:
 			/* Bad mkey not a violation below level 2 */
-			if (ibp->mkeyprot < 2)
+			if (ibp->rvp.mkeyprot < 2)
 				break;
 		case IB_MGMT_METHOD_SET:
 		case IB_MGMT_METHOD_TRAP_REPRESS:
-			if (ibp->mkey_violations != 0xFFFF)
-				++ibp->mkey_violations;
-			if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period)
-				ibp->mkey_lease_timeout = jiffies +
-					ibp->mkey_lease_period * HZ;
+			if (ibp->rvp.mkey_violations != 0xFFFF)
+				++ibp->rvp.mkey_violations;
+			if (!ibp->rvp.mkey_lease_timeout &&
+			    ibp->rvp.mkey_lease_period)
+				ibp->rvp.mkey_lease_timeout = jiffies +
+					ibp->rvp.mkey_lease_period * HZ;
 			/* Generate a trap notice. */
 			bad_mkey(ibp, mad, mkey, dr_slid, return_path,
 				 hop_cnt);
@@ -501,16 +505,6 @@
 		write_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, reg);
 }
 
-static u8 __opa_porttype(struct hfi1_pportdata *ppd)
-{
-	if (qsfp_mod_present(ppd)) {
-		if (ppd->qsfp_info.cache_valid)
-			return OPA_PORT_TYPE_STANDARD;
-		return OPA_PORT_TYPE_DISCONNECTED;
-	}
-	return OPA_PORT_TYPE_UNKNOWN;
-}
-
 static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 				   struct ib_device *ibdev, u8 port,
 				   u32 *resp_len)
@@ -522,6 +516,7 @@
 	struct opa_port_info *pi = (struct opa_port_info *)data;
 	u8 mtu;
 	u8 credit_rate;
+	u8 is_beaconing_active;
 	u32 state;
 	u32 num_ports = OPA_AM_NPORT(am);
 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
@@ -538,8 +533,8 @@
 	ppd = dd->pport + (port - 1);
 	ibp = &ppd->ibport_data;
 
-	if (ppd->vls_supported/2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
-		ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
+	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
+	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)smp);
 	}
@@ -548,14 +543,14 @@
 
 	/* Only return the mkey if the protection field allows it. */
 	if (!(smp->method == IB_MGMT_METHOD_GET &&
-	      ibp->mkey != smp->mkey &&
-	      ibp->mkeyprot == 1))
-		pi->mkey = ibp->mkey;
+	      ibp->rvp.mkey != smp->mkey &&
+	      ibp->rvp.mkeyprot == 1))
+		pi->mkey = ibp->rvp.mkey;
 
-	pi->subnet_prefix = ibp->gid_prefix;
-	pi->sm_lid = cpu_to_be32(ibp->sm_lid);
-	pi->ib_cap_mask = cpu_to_be32(ibp->port_cap_flags);
-	pi->mkey_lease_period = cpu_to_be16(ibp->mkey_lease_period);
+	pi->subnet_prefix = ibp->rvp.gid_prefix;
+	pi->sm_lid = cpu_to_be32(ibp->rvp.sm_lid);
+	pi->ib_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
+	pi->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
 	pi->sm_trap_qp = cpu_to_be32(ppd->sm_trap_qp);
 	pi->sa_qp = cpu_to_be32(ppd->sa_qp);
 
@@ -581,38 +576,45 @@
 	if (start_of_sm_config && (state == IB_PORT_INIT))
 		ppd->is_sm_config_started = 1;
 
-	pi->port_phys_conf = __opa_porttype(ppd) & 0xf;
+	pi->port_phys_conf = (ppd->port_type & 0xf);
 
 #if PI_LED_ENABLE_SUP
 	pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
 	pi->port_states.ledenable_offlinereason |=
 		ppd->is_sm_config_started << 5;
+	/*
+	 * This pairs with the memory barrier in hfi1_start_led_override to
+	 * ensure that we read the correct state of LED beaconing represented
+	 * by led_override_timer_active
+	 */
+	smp_rmb();
+	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
+	pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6;
 	pi->port_states.ledenable_offlinereason |=
-		ppd->offline_disabled_reason & OPA_PI_MASK_OFFLINE_REASON;
+		ppd->offline_disabled_reason;
 #else
 	pi->port_states.offline_reason = ppd->neighbor_normal << 4;
 	pi->port_states.offline_reason |= ppd->is_sm_config_started << 5;
-	pi->port_states.offline_reason |= ppd->offline_disabled_reason &
-						OPA_PI_MASK_OFFLINE_REASON;
+	pi->port_states.offline_reason |= ppd->offline_disabled_reason;
 #endif /* PI_LED_ENABLE_SUP */
 
 	pi->port_states.portphysstate_portstate =
 		(hfi1_ibphys_portstate(ppd) << 4) | state;
 
-	pi->mkeyprotect_lmc = (ibp->mkeyprot << 6) | ppd->lmc;
+	pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
 
 	memset(pi->neigh_mtu.pvlx_to_mtu, 0, sizeof(pi->neigh_mtu.pvlx_to_mtu));
 	for (i = 0; i < ppd->vls_supported; i++) {
 		mtu = mtu_to_enum(dd->vld[i].mtu, HFI1_DEFAULT_ACTIVE_MTU);
 		if ((i % 2) == 0)
-			pi->neigh_mtu.pvlx_to_mtu[i/2] |= (mtu << 4);
+			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= (mtu << 4);
 		else
-			pi->neigh_mtu.pvlx_to_mtu[i/2] |= mtu;
+			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= mtu;
 	}
 	/* don't forget VL 15 */
 	mtu = mtu_to_enum(dd->vld[15].mtu, 2048);
-	pi->neigh_mtu.pvlx_to_mtu[15/2] |= mtu;
-	pi->smsl = ibp->sm_sl & OPA_PI_MASK_SMSL;
+	pi->neigh_mtu.pvlx_to_mtu[15 / 2] |= mtu;
+	pi->smsl = ibp->rvp.sm_sl & OPA_PI_MASK_SMSL;
 	pi->operational_vls = hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS);
 	pi->partenforce_filterraw |=
 		(ppd->linkinit_reason & OPA_PI_MASK_LINKINIT_REASON);
@@ -620,17 +622,17 @@
 		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_IN;
 	if (ppd->part_enforce & HFI1_PART_ENFORCE_OUT)
 		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_OUT;
-	pi->mkey_violations = cpu_to_be16(ibp->mkey_violations);
+	pi->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
 	/* P_KeyViolations are counted by hardware. */
-	pi->pkey_violations = cpu_to_be16(ibp->pkey_violations);
-	pi->qkey_violations = cpu_to_be16(ibp->qkey_violations);
+	pi->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
+	pi->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
 
 	pi->vl.cap = ppd->vls_supported;
-	pi->vl.high_limit = cpu_to_be16(ibp->vl_high_limit);
+	pi->vl.high_limit = cpu_to_be16(ibp->rvp.vl_high_limit);
 	pi->vl.arb_high_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_CAP);
 	pi->vl.arb_low_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_LOW_CAP);
 
-	pi->clientrereg_subnettimeout = ibp->subnet_timeout;
+	pi->clientrereg_subnettimeout = ibp->rvp.subnet_timeout;
 
 	pi->port_link_mode  = cpu_to_be16(OPA_PORT_LINK_MODE_OPA << 10 |
 					  OPA_PORT_LINK_MODE_OPA << 5 |
@@ -701,8 +703,10 @@
 	/* read the cached value of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
 	read_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, &tmp);
 
-	/* this counter is 16 bits wide, but the replay_depth.wire
-	 * variable is only 8 bits */
+	/*
+	 * this counter is 16 bits wide, but the replay_depth.wire
+	 * variable is only 8 bits
+	 */
 	if (tmp > 0xff)
 		tmp = 0xff;
 	pi->replay_depth.wire = tmp;
@@ -749,7 +753,7 @@
 		return reply((struct ib_mad_hdr *)smp);
 	}
 
-	n_blocks_avail = (u16) (npkeys/OPA_PARTITION_TABLE_BLK_SIZE) + 1;
+	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
 
 	size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
 
@@ -763,7 +767,7 @@
 		return reply((struct ib_mad_hdr *)smp);
 	}
 
-	p = (__be16 *) data;
+	p = (__be16 *)data;
 	q = (u16 *)data;
 	/* get the real pkeys if we are requesting the first block */
 	if (start_block == 0) {
@@ -772,9 +776,9 @@
 			p[i] = cpu_to_be16(q[i]);
 		if (resp_len)
 			*resp_len += size;
-	} else
+	} else {
 		smp->status |= IB_SMP_INVALID_FIELD;
-
+	}
 	return reply((struct ib_mad_hdr *)smp);
 }
 
@@ -901,8 +905,8 @@
 	u32 logical_old = driver_logical_state(ppd);
 	int ret, logical_allowed, physical_allowed;
 
-	logical_allowed = ret =
-		logical_transition_allowed(logical_old, logical_new);
+	ret = logical_transition_allowed(logical_old, logical_new);
+	logical_allowed = ret;
 
 	if (ret == HFI_TRANSITION_DISALLOWED ||
 	    ret == HFI_TRANSITION_UNDEFINED) {
@@ -912,8 +916,8 @@
 		return ret;
 	}
 
-	physical_allowed = ret =
-		physical_transition_allowed(physical_old, physical_new);
+	ret = physical_transition_allowed(physical_old, physical_new);
+	physical_allowed = ret;
 
 	if (ret == HFI_TRANSITION_DISALLOWED ||
 	    ret == HFI_TRANSITION_UNDEFINED) {
@@ -928,6 +932,14 @@
 		return HFI_TRANSITION_IGNORED;
 
 	/*
+	 * A change request of Physical Port State from
+	 * 'Offline' to 'Polling' should be ignored.
+	 */
+	if ((physical_old == OPA_PORTPHYSSTATE_OFFLINE) &&
+	    (physical_new == IB_PORTPHYSSTATE_POLLING))
+		return HFI_TRANSITION_IGNORED;
+
+	/*
 	 * Either physical_allowed or logical_allowed is
 	 * HFI_TRANSITION_ALLOWED.
 	 */
@@ -972,16 +984,15 @@
 			break;
 		/* FALLTHROUGH */
 	case IB_PORT_DOWN:
-		if (phys_state == IB_PORTPHYSSTATE_NOP)
+		if (phys_state == IB_PORTPHYSSTATE_NOP) {
 			link_state = HLS_DN_DOWNDEF;
-		else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
+		} else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
 			link_state = HLS_DN_POLL;
-			set_link_down_reason(ppd,
-			     OPA_LINKDOWN_REASON_FM_BOUNCE, 0,
-			     OPA_LINKDOWN_REASON_FM_BOUNCE);
-		} else if (phys_state == IB_PORTPHYSSTATE_DISABLED)
+			set_link_down_reason(ppd, OPA_LINKDOWN_REASON_FM_BOUNCE,
+					     0, OPA_LINKDOWN_REASON_FM_BOUNCE);
+		} else if (phys_state == IB_PORTPHYSSTATE_DISABLED) {
 			link_state = HLS_DN_DISABLE;
-		else {
+		} else {
 			pr_warn("SubnSet(OPA_PortInfo) invalid physical state 0x%x\n",
 				phys_state);
 			smp->status |= IB_SMP_INVALID_FIELD;
@@ -991,11 +1002,11 @@
 		set_link_state(ppd, link_state);
 		if (link_state == HLS_DN_DISABLE &&
 		    (ppd->offline_disabled_reason >
-		     OPA_LINKDOWN_REASON_SMA_DISABLED ||
+		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
 		     ppd->offline_disabled_reason ==
-		     OPA_LINKDOWN_REASON_NONE))
+		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
 			ppd->offline_disabled_reason =
-			OPA_LINKDOWN_REASON_SMA_DISABLED;
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
 		/*
 		 * Don't send a reply if the response would be sent
 		 * through the disabled port.
@@ -1091,13 +1102,13 @@
 
 	ls_old = driver_lstate(ppd);
 
-	ibp->mkey = pi->mkey;
-	ibp->gid_prefix = pi->subnet_prefix;
-	ibp->mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
+	ibp->rvp.mkey = pi->mkey;
+	ibp->rvp.gid_prefix = pi->subnet_prefix;
+	ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
 
 	/* Must be a valid unicast LID address. */
 	if ((lid == 0 && ls_old > IB_PORT_INIT) ||
-	     lid >= HFI1_MULTICAST_LID_BASE) {
+	    lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n",
 			lid);
@@ -1130,23 +1141,23 @@
 
 	/* Must be a valid unicast LID address. */
 	if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
-	     smlid >= HFI1_MULTICAST_LID_BASE) {
+	    smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid);
-	} else if (smlid != ibp->sm_lid || msl != ibp->sm_sl) {
+	} else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
 		pr_warn("SubnSet(OPA_PortInfo) smlid 0x%x\n", smlid);
-		spin_lock_irqsave(&ibp->lock, flags);
-		if (ibp->sm_ah) {
-			if (smlid != ibp->sm_lid)
-				ibp->sm_ah->attr.dlid = smlid;
-			if (msl != ibp->sm_sl)
-				ibp->sm_ah->attr.sl = msl;
+		spin_lock_irqsave(&ibp->rvp.lock, flags);
+		if (ibp->rvp.sm_ah) {
+			if (smlid != ibp->rvp.sm_lid)
+				ibp->rvp.sm_ah->attr.dlid = smlid;
+			if (msl != ibp->rvp.sm_sl)
+				ibp->rvp.sm_ah->attr.sl = msl;
 		}
-		spin_unlock_irqrestore(&ibp->lock, flags);
-		if (smlid != ibp->sm_lid)
-			ibp->sm_lid = smlid;
-		if (msl != ibp->sm_sl)
-			ibp->sm_sl = msl;
+		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+		if (smlid != ibp->rvp.sm_lid)
+			ibp->rvp.sm_lid = smlid;
+		if (msl != ibp->rvp.sm_sl)
+			ibp->rvp.sm_sl = msl;
 		event.event = IB_EVENT_SM_CHANGE;
 		ib_dispatch_event(&event);
 	}
@@ -1167,8 +1178,8 @@
 	ppd->port_error_action = be32_to_cpu(pi->port_error_action);
 	lwe = be16_to_cpu(pi->link_width.enabled);
 	if (lwe) {
-		if (lwe == OPA_LINK_WIDTH_RESET
-				|| lwe == OPA_LINK_WIDTH_RESET_OLD)
+		if (lwe == OPA_LINK_WIDTH_RESET ||
+		    lwe == OPA_LINK_WIDTH_RESET_OLD)
 			set_link_width_enabled(ppd, ppd->link_width_supported);
 		else if ((lwe & ~ppd->link_width_supported) == 0)
 			set_link_width_enabled(ppd, lwe);
@@ -1177,19 +1188,21 @@
 	}
 	lwe = be16_to_cpu(pi->link_width_downgrade.enabled);
 	/* LWD.E is always applied - 0 means "disabled" */
-	if (lwe == OPA_LINK_WIDTH_RESET
-			|| lwe == OPA_LINK_WIDTH_RESET_OLD) {
+	if (lwe == OPA_LINK_WIDTH_RESET ||
+	    lwe == OPA_LINK_WIDTH_RESET_OLD) {
 		set_link_width_downgrade_enabled(ppd,
-				ppd->link_width_downgrade_supported);
+						 ppd->
+						 link_width_downgrade_supported
+						 );
 	} else if ((lwe & ~ppd->link_width_downgrade_supported) == 0) {
 		/* only set and apply if something changed */
 		if (lwe != ppd->link_width_downgrade_enabled) {
 			set_link_width_downgrade_enabled(ppd, lwe);
 			call_link_downgrade_policy = 1;
 		}
-	} else
+	} else {
 		smp->status |= IB_SMP_INVALID_FIELD;
-
+	}
 	lse = be16_to_cpu(pi->link_speed.enabled);
 	if (lse) {
 		if (lse & be16_to_cpu(pi->link_speed.supported))
@@ -1198,22 +1211,24 @@
 			smp->status |= IB_SMP_INVALID_FIELD;
 	}
 
-	ibp->mkeyprot = (pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
-	ibp->vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
+	ibp->rvp.mkeyprot =
+		(pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
+	ibp->rvp.vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_LIMIT,
-				    ibp->vl_high_limit);
+				    ibp->rvp.vl_high_limit);
 
-	if (ppd->vls_supported/2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
-		ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
+	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
+	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)smp);
 	}
 	for (i = 0; i < ppd->vls_supported; i++) {
 		if ((i % 2) == 0)
-			mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i/2] >> 4)
-					  & 0xF);
+			mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >>
+					   4) & 0xF);
 		else
-			mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i/2] & 0xF);
+			mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] &
+					  0xF);
 		if (mtu == 0xffff) {
 			pr_warn("SubnSet(OPA_PortInfo) mtu invalid %d (0x%x)\n",
 				mtu,
@@ -1223,8 +1238,8 @@
 		}
 		if (dd->vld[i].mtu != mtu) {
 			dd_dev_info(dd,
-				"MTU change on vl %d from %d to %d\n",
-				i, dd->vld[i].mtu, mtu);
+				    "MTU change on vl %d from %d to %d\n",
+				    i, dd->vld[i].mtu, mtu);
 			dd->vld[i].mtu = mtu;
 			call_set_mtu++;
 		}
@@ -1232,13 +1247,13 @@
 	/* As per OPAV1 spec: VL15 must support and be configured
 	 * for operation with a 2048 or larger MTU.
 	 */
-	mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15/2] & 0xF);
+	mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15 / 2] & 0xF);
 	if (mtu < 2048 || mtu == 0xffff)
 		mtu = 2048;
 	if (dd->vld[15].mtu != mtu) {
 		dd_dev_info(dd,
-			"MTU change on vl 15 from %d to %d\n",
-			dd->vld[15].mtu, mtu);
+			    "MTU change on vl 15 from %d to %d\n",
+			    dd->vld[15].mtu, mtu);
 		dd->vld[15].mtu = mtu;
 		call_set_mtu++;
 	}
@@ -1254,21 +1269,21 @@
 			smp->status |= IB_SMP_INVALID_FIELD;
 		} else {
 			if (hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS,
-						vls) == -EINVAL)
+					    vls) == -EINVAL)
 				smp->status |= IB_SMP_INVALID_FIELD;
 		}
 	}
 
 	if (pi->mkey_violations == 0)
-		ibp->mkey_violations = 0;
+		ibp->rvp.mkey_violations = 0;
 
 	if (pi->pkey_violations == 0)
-		ibp->pkey_violations = 0;
+		ibp->rvp.pkey_violations = 0;
 
 	if (pi->qkey_violations == 0)
-		ibp->qkey_violations = 0;
+		ibp->rvp.qkey_violations = 0;
 
-	ibp->subnet_timeout =
+	ibp->rvp.subnet_timeout =
 		pi->clientrereg_subnettimeout & OPA_PI_MASK_SUBNET_TIMEOUT;
 
 	crc_enabled = be16_to_cpu(pi->port_ltp_crc_mode);
@@ -1388,7 +1403,7 @@
 		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
 
 		event.event = IB_EVENT_PKEY_CHANGE;
-		event.device = &dd->verbs_dev.ibdev;
+		event.device = &dd->verbs_dev.rdi.ibdev;
 		event.element.port_num = port;
 		ib_dispatch_event(&event);
 	}
@@ -1402,7 +1417,7 @@
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	u32 n_blocks_sent = OPA_AM_NBLK(am);
 	u32 start_block = am & 0x7ff;
-	u16 *p = (u16 *) data;
+	u16 *p = (u16 *)data;
 	__be16 *q = (__be16 *)data;
 	int i;
 	u16 n_blocks_avail;
@@ -1415,7 +1430,7 @@
 		return reply((struct ib_mad_hdr *)smp);
 	}
 
-	n_blocks_avail = (u16)(npkeys/OPA_PARTITION_TABLE_BLK_SIZE) + 1;
+	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
 
 	if (start_block + n_blocks_sent > n_blocks_avail ||
 	    n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
@@ -1514,14 +1529,22 @@
 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
 	u8 *p = data;
 	int i;
+	u8 sc;
 
 	if (am) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)smp);
 	}
 
-	for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++)
-		ibp->sl_to_sc[i] = *p++;
+	for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++) {
+		sc = *p++;
+		if (ibp->sl_to_sc[i] != sc) {
+			ibp->sl_to_sc[i] = sc;
+
+			/* Put all stale qps into error state */
+			hfi1_error_port_qps(ibp, i);
+		}
+	}
 
 	return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len);
 }
@@ -1574,7 +1597,7 @@
 {
 	u32 n_blocks = OPA_AM_NBLK(am);
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	void *vp = (void *) data;
+	void *vp = (void *)data;
 	size_t size = 4 * sizeof(u64);
 
 	if (n_blocks != 1) {
@@ -1597,7 +1620,7 @@
 	u32 n_blocks = OPA_AM_NBLK(am);
 	int async_update = OPA_AM_ASYNC(am);
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	void *vp = (void *) data;
+	void *vp = (void *)data;
 	struct hfi1_pportdata *ppd;
 	int lstate;
 
@@ -1609,8 +1632,10 @@
 	/* IB numbers ports from 1, hw from 0 */
 	ppd = dd->pport + (port - 1);
 	lstate = driver_lstate(ppd);
-	/* it's known that async_update is 0 by this point, but include
-	 * the explicit check for clarity */
+	/*
+	 * it's known that async_update is 0 by this point, but include
+	 * the explicit check for clarity
+	 */
 	if (!async_update &&
 	    (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE)) {
 		smp->status |= IB_SMP_INVALID_FIELD;
@@ -1629,7 +1654,7 @@
 	u32 n_blocks = OPA_AM_NPORT(am);
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	struct hfi1_pportdata *ppd;
-	void *vp = (void *) data;
+	void *vp = (void *)data;
 	int size;
 
 	if (n_blocks != 1) {
@@ -1654,7 +1679,7 @@
 	u32 n_blocks = OPA_AM_NPORT(am);
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	struct hfi1_pportdata *ppd;
-	void *vp = (void *) data;
+	void *vp = (void *)data;
 	int lstate;
 
 	if (n_blocks != 1) {
@@ -1687,7 +1712,7 @@
 	u32 lstate;
 	struct hfi1_ibport *ibp;
 	struct hfi1_pportdata *ppd;
-	struct opa_port_state_info *psi = (struct opa_port_state_info *) data;
+	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
 
 	if (nports != 1) {
 		smp->status |= IB_SMP_INVALID_FIELD;
@@ -1707,12 +1732,11 @@
 	psi->port_states.ledenable_offlinereason |=
 		ppd->is_sm_config_started << 5;
 	psi->port_states.ledenable_offlinereason |=
-		ppd->offline_disabled_reason & OPA_PI_MASK_OFFLINE_REASON;
+		ppd->offline_disabled_reason;
 #else
 	psi->port_states.offline_reason = ppd->neighbor_normal << 4;
 	psi->port_states.offline_reason |= ppd->is_sm_config_started << 5;
-	psi->port_states.offline_reason |= ppd->offline_disabled_reason &
-				OPA_PI_MASK_OFFLINE_REASON;
+	psi->port_states.offline_reason |= ppd->offline_disabled_reason;
 #endif /* PI_LED_ENABLE_SUP */
 
 	psi->port_states.portphysstate_portstate =
@@ -1737,7 +1761,7 @@
 	u8 ls_new, ps_new;
 	struct hfi1_ibport *ibp;
 	struct hfi1_pportdata *ppd;
-	struct opa_port_state_info *psi = (struct opa_port_state_info *) data;
+	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
 	int ret, invalid = 0;
 
 	if (nports != 1) {
@@ -1782,14 +1806,16 @@
 	u32 len = OPA_AM_CI_LEN(am) + 1;
 	int ret;
 
-#define __CI_PAGE_SIZE (1 << 7) /* 128 bytes */
+#define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
 #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
 #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
 
-	/* check that addr is within spec, and
-	 * addr and (addr + len - 1) are on the same "page" */
+	/*
+	 * check that addr is within spec, and
+	 * addr and (addr + len - 1) are on the same "page"
+	 */
 	if (addr >= 4096 ||
-		(__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
+	    (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)smp);
 	}
@@ -1823,7 +1849,7 @@
 	u32 num_ports = OPA_AM_NPORT(am);
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	struct hfi1_pportdata *ppd;
-	struct buffer_control *p = (struct buffer_control *) data;
+	struct buffer_control *p = (struct buffer_control *)data;
 	int size;
 
 	if (num_ports != 1) {
@@ -1846,7 +1872,7 @@
 	u32 num_ports = OPA_AM_NPORT(am);
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	struct hfi1_pportdata *ppd;
-	struct buffer_control *p = (struct buffer_control *) data;
+	struct buffer_control *p = (struct buffer_control *)data;
 
 	if (num_ports != 1) {
 		smp->status |= IB_SMP_INVALID_FIELD;
@@ -1919,13 +1945,15 @@
 
 	switch (section) {
 	case OPA_VLARB_LOW_ELEMENTS:
-		(void) fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
+		(void)fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
 		break;
 	case OPA_VLARB_HIGH_ELEMENTS:
-		(void) fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
+		(void)fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
 		break;
-	/* neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
-	 * can be changed from the default values */
+	/*
+	 * neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
+	 * can be changed from the default values
+	 */
 	case OPA_VLARB_PREEMPT_ELEMENTS:
 		/* FALLTHROUGH */
 	case OPA_VLARB_PREEMPT_MATRIX:
@@ -2137,8 +2165,10 @@
 };
 
 struct opa_port_error_counters64_msg {
-	/* Request contains first two fields, response contains the
-	 * whole magilla */
+	/*
+	 * Request contains first two fields, response contains the
+	 * whole magilla
+	 */
 	__be64 port_select_mask[4];
 	__be32 vl_select_mask;
 
@@ -2172,7 +2202,6 @@
 	__be32 error_info_select_mask;
 	__be32 reserved1;
 	struct _port_ei {
-
 		u8 port_number;
 		u8 reserved2[7];
 
@@ -2251,7 +2280,7 @@
 };
 
 static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
-				struct ib_device *ibdev, u32 *resp_len)
+				     struct ib_device *ibdev, u32 *resp_len)
 {
 	struct opa_class_port_info *p =
 		(struct opa_class_port_info *)pmp->data;
@@ -2299,9 +2328,9 @@
 	}
 }
 
-
 static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
-			struct ib_device *ibdev, u8 port, u32 *resp_len)
+				  struct ib_device *ibdev,
+				  u8 port, u32 *resp_len)
 {
 	struct opa_port_status_req *req =
 		(struct opa_port_status_req *)pmp->data;
@@ -2326,8 +2355,8 @@
 		return reply((struct ib_mad_hdr *)pmp);
 	}
 
-	if (nports != 1 || (port_num && port_num != port)
-	    || num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
+	if (nports != 1 || (port_num && port_num != port) ||
+	    num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)pmp);
 	}
@@ -2357,7 +2386,7 @@
 					 CNTR_INVALID_VL));
 	rsp->port_multicast_xmit_pkts =
 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
-					CNTR_INVALID_VL));
+					  CNTR_INVALID_VL));
 	rsp->port_multicast_rcv_pkts =
 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
 					  CNTR_INVALID_VL));
@@ -2386,7 +2415,7 @@
 	}
 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
 	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
-					CNTR_INVALID_VL);
+				   CNTR_INVALID_VL);
 	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
 		/* overflow/wrapped */
 		rsp->link_error_recovery = cpu_to_be32(~0);
@@ -2401,13 +2430,13 @@
 		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
 					  CNTR_INVALID_VL));
 	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
-					  CNTR_INVALID_VL));
+						      CNTR_INVALID_VL));
 
 	/* rsp->uncorrectable_errors is 8 bits wide, and it pegs at 0xff */
 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
 	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
 
-	vlinfo = &(rsp->vls[0]);
+	vlinfo = &rsp->vls[0];
 	vfi = 0;
 	/* The vl_select_mask has been checked above, and we know
 	 * that it contains only entries which represent valid VLs.
@@ -2423,27 +2452,27 @@
 
 		rsp->vls[vfi].port_vl_rcv_pkts =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
-					idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_xmit_data =
 			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
-					idx_from_vl(vl)));
+						   idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_xmit_pkts =
 			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
-					idx_from_vl(vl)));
+						   idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_xmit_wait =
 			cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
-					idx_from_vl(vl)));
+						   idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_rcv_fecn =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
-					idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_rcv_becn =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
-					idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 
 		vlinfo++;
 		vfi++;
@@ -2473,7 +2502,7 @@
 	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
 						CNTR_INVALID_VL);
 	error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
-						CNTR_INVALID_VL);
+					       CNTR_INVALID_VL);
 	/* local link integrity must be right-shifted by the lli resolution */
 	tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
 	tmp += read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
@@ -2483,10 +2512,10 @@
 	tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL);
 	error_counter_summary += (tmp >> res_ler);
 	error_counter_summary += read_dev_cntr(dd, C_DC_RCV_ERR,
-						CNTR_INVALID_VL);
+					       CNTR_INVALID_VL);
 	error_counter_summary += read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
 	error_counter_summary += read_dev_cntr(dd, C_DC_FM_CFG_ERR,
-						CNTR_INVALID_VL);
+					       CNTR_INVALID_VL);
 	/* ppd->link_downed is a 32-bit value */
 	error_counter_summary += read_port_cntr(ppd, C_SW_LINK_DOWN,
 						CNTR_INVALID_VL);
@@ -2512,7 +2541,7 @@
 						 idx_from_vl(vl));
 			if (tmp < sum_vl_xmit_wait) {
 				/* we wrapped */
-				sum_vl_xmit_wait = (u64) ~0;
+				sum_vl_xmit_wait = (u64)~0;
 				break;
 			}
 			sum_vl_xmit_wait = tmp;
@@ -2522,8 +2551,30 @@
 	}
 }
 
+static void pma_get_opa_port_dctrs(struct ib_device *ibdev,
+				   struct _port_dctrs *rsp)
+{
+	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+
+	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
+						CNTR_INVALID_VL));
+	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
+						CNTR_INVALID_VL));
+	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
+						CNTR_INVALID_VL));
+	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
+						CNTR_INVALID_VL));
+	rsp->port_multicast_xmit_pkts =
+		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
+					  CNTR_INVALID_VL));
+	rsp->port_multicast_rcv_pkts =
+		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
+					  CNTR_INVALID_VL));
+}
+
 static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
-			struct ib_device *ibdev, u8 port, u32 *resp_len)
+				    struct ib_device *ibdev,
+				    u8 port, u32 *resp_len)
 {
 	struct opa_port_data_counters_msg *req =
 		(struct opa_port_data_counters_msg *)pmp->data;
@@ -2590,39 +2641,19 @@
 	 */
 	hfi1_read_link_quality(dd, &lq);
 	rsp->link_quality_indicator = cpu_to_be32((u32)lq);
+	pma_get_opa_port_dctrs(ibdev, rsp);
 
-	/* rsp->sw_port_congestion is 0 for HFIs */
-	/* rsp->port_xmit_time_cong is 0 for HFIs */
-	/* rsp->port_xmit_wasted_bw ??? */
-	/* rsp->port_xmit_wait_data ??? */
-	/* rsp->port_mark_fecn is 0 for HFIs */
-
-	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
-						CNTR_INVALID_VL));
-	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
-						CNTR_INVALID_VL));
-	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
-						CNTR_INVALID_VL));
-	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
-						CNTR_INVALID_VL));
-	rsp->port_multicast_xmit_pkts =
-		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
-						CNTR_INVALID_VL));
-	rsp->port_multicast_rcv_pkts =
-		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
-						CNTR_INVALID_VL));
 	rsp->port_xmit_wait =
 		cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
 	rsp->port_rcv_fecn =
 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
 	rsp->port_rcv_becn =
 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
-
 	rsp->port_error_counter_summary =
 		cpu_to_be64(get_error_counter_summary(ibdev, port,
 						      res_lli, res_ler));
 
-	vlinfo = &(rsp->vls[0]);
+	vlinfo = &rsp->vls[0];
 	vfi = 0;
 	/* The vl_select_mask has been checked above, and we know
 	 * that it contains only entries which represent valid VLs.
@@ -2630,44 +2661,45 @@
 	 * any additional checks for vl.
 	 */
 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
-		 8 * sizeof(req->vl_select_mask)) {
+			 8 * sizeof(req->vl_select_mask)) {
 		memset(vlinfo, 0, sizeof(*vlinfo));
 
 		rsp->vls[vfi].port_vl_xmit_data =
 			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
-							idx_from_vl(vl)));
+						   idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_rcv_data =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_FLIT_VL,
-							idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_xmit_pkts =
 			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
-							idx_from_vl(vl)));
+						   idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_rcv_pkts =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
-							idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_xmit_wait =
 			cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
-							idx_from_vl(vl)));
+						   idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_rcv_fecn =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
-							idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 		rsp->vls[vfi].port_vl_rcv_becn =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
-							idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 
 		/* rsp->port_vl_xmit_time_cong is 0 for HFIs */
 		/* rsp->port_vl_xmit_wasted_bw ??? */
 		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ???
-		 * does this differ from rsp->vls[vfi].port_vl_xmit_wait */
+		 * does this differ from rsp->vls[vfi].port_vl_xmit_wait
+		 */
 		/*rsp->vls[vfi].port_vl_mark_fecn =
-			cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
-				+ offset));
-		*/
+		 *	cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
+		 *		+ offset));
+		 */
 		vlinfo++;
 		vfi++;
 	}
@@ -2680,12 +2712,88 @@
 	return reply((struct ib_mad_hdr *)pmp);
 }
 
+static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp,
+				       struct ib_device *ibdev, u8 port)
+{
+	struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *)
+						pmp->data;
+	struct _port_dctrs rsp;
+
+	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
+		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
+		goto bail;
+	}
+
+	memset(&rsp, 0, sizeof(rsp));
+	pma_get_opa_port_dctrs(ibdev, &rsp);
+
+	p->port_xmit_data = rsp.port_xmit_data;
+	p->port_rcv_data = rsp.port_rcv_data;
+	p->port_xmit_packets = rsp.port_xmit_pkts;
+	p->port_rcv_packets = rsp.port_rcv_pkts;
+	p->port_unicast_xmit_packets = 0;
+	p->port_unicast_rcv_packets =  0;
+	p->port_multicast_xmit_packets = rsp.port_multicast_xmit_pkts;
+	p->port_multicast_rcv_packets = rsp.port_multicast_rcv_pkts;
+
+bail:
+	return reply((struct ib_mad_hdr *)pmp);
+}
+
+static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
+				   struct _port_ectrs *rsp, u8 port)
+{
+	u64 tmp, tmp2;
+	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+	struct hfi1_ibport *ibp = to_iport(ibdev, port);
+	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+
+	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
+	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
+					CNTR_INVALID_VL);
+	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
+		/* overflow/wrapped */
+		rsp->link_error_recovery = cpu_to_be32(~0);
+	} else {
+		rsp->link_error_recovery = cpu_to_be32(tmp2);
+	}
+
+	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
+						CNTR_INVALID_VL));
+	rsp->port_rcv_errors =
+		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
+	rsp->port_rcv_remote_physical_errors =
+		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
+					  CNTR_INVALID_VL));
+	rsp->port_rcv_switch_relay_errors = 0;
+	rsp->port_xmit_discards =
+		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
+					   CNTR_INVALID_VL));
+	rsp->port_xmit_constraint_errors =
+		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
+					   CNTR_INVALID_VL));
+	rsp->port_rcv_constraint_errors =
+		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
+					   CNTR_INVALID_VL));
+	tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
+	tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
+	if (tmp2 < tmp) {
+		/* overflow/wrapped */
+		rsp->local_link_integrity_errors = cpu_to_be64(~0);
+	} else {
+		rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
+	}
+	rsp->excessive_buffer_overruns =
+		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
+}
+
 static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
-			struct ib_device *ibdev, u8 port, u32 *resp_len)
+				  struct ib_device *ibdev,
+				  u8 port, u32 *resp_len)
 {
 	size_t response_data_size;
 	struct _port_ectrs *rsp;
-	unsigned long port_num;
+	u8 port_num;
 	struct opa_port_error_counters64_msg *req;
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	u32 num_ports;
@@ -2695,7 +2803,7 @@
 	struct hfi1_pportdata *ppd;
 	struct _vls_ectrs *vlinfo;
 	unsigned long vl;
-	u64 port_mask, tmp, tmp2;
+	u64 port_mask, tmp;
 	u32 vl_select_mask;
 	int vfi;
 
@@ -2724,9 +2832,9 @@
 	 */
 	port_mask = be64_to_cpu(req->port_select_mask[3]);
 	port_num = find_first_bit((unsigned long *)&port_mask,
-					sizeof(port_mask));
+				  sizeof(port_mask));
 
-	if ((u8)port_num != port) {
+	if (port_num != port) {
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)pmp);
 	}
@@ -2737,46 +2845,18 @@
 	ppd = ppd_from_ibp(ibp);
 
 	memset(rsp, 0, sizeof(*rsp));
-	rsp->port_number = (u8)port_num;
+	rsp->port_number = port_num;
 
-	rsp->port_rcv_constraint_errors =
-		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
-					   CNTR_INVALID_VL));
-	/* port_rcv_switch_relay_errors is 0 for HFIs */
-	rsp->port_xmit_discards =
-		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
-						CNTR_INVALID_VL));
+	pma_get_opa_port_ectrs(ibdev, rsp, port_num);
+
 	rsp->port_rcv_remote_physical_errors =
 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
-						CNTR_INVALID_VL));
-	tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
-	tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
-	if (tmp2 < tmp) {
-		/* overflow/wrapped */
-		rsp->local_link_integrity_errors = cpu_to_be64(~0);
-	} else {
-		rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
-	}
-	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
-	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
-					CNTR_INVALID_VL);
-	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
-		/* overflow/wrapped */
-		rsp->link_error_recovery = cpu_to_be32(~0);
-	} else {
-		rsp->link_error_recovery = cpu_to_be32(tmp2);
-	}
-	rsp->port_xmit_constraint_errors =
-		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
-					   CNTR_INVALID_VL));
-	rsp->excessive_buffer_overruns =
-		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
+					  CNTR_INVALID_VL));
 	rsp->fm_config_errors =
 		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
-						CNTR_INVALID_VL));
-	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
-						CNTR_INVALID_VL));
+					  CNTR_INVALID_VL));
 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
+
 	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
 
 	vlinfo = &rsp->vls[0];
@@ -2796,8 +2876,94 @@
 	return reply((struct ib_mad_hdr *)pmp);
 }
 
+static int pma_get_ib_portcounters(struct ib_pma_mad *pmp,
+				   struct ib_device *ibdev, u8 port)
+{
+	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
+		pmp->data;
+	struct _port_ectrs rsp;
+	u64 temp_link_overrun_errors;
+	u64 temp_64;
+	u32 temp_32;
+
+	memset(&rsp, 0, sizeof(rsp));
+	pma_get_opa_port_ectrs(ibdev, &rsp, port);
+
+	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
+		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
+		goto bail;
+	}
+
+	p->symbol_error_counter = 0; /* N/A for OPA */
+
+	temp_32 = be32_to_cpu(rsp.link_error_recovery);
+	if (temp_32 > 0xFFUL)
+		p->link_error_recovery_counter = 0xFF;
+	else
+		p->link_error_recovery_counter = (u8)temp_32;
+
+	temp_32 = be32_to_cpu(rsp.link_downed);
+	if (temp_32 > 0xFFUL)
+		p->link_downed_counter = 0xFF;
+	else
+		p->link_downed_counter = (u8)temp_32;
+
+	temp_64 = be64_to_cpu(rsp.port_rcv_errors);
+	if (temp_64 > 0xFFFFUL)
+		p->port_rcv_errors = cpu_to_be16(0xFFFF);
+	else
+		p->port_rcv_errors = cpu_to_be16((u16)temp_64);
+
+	temp_64 = be64_to_cpu(rsp.port_rcv_remote_physical_errors);
+	if (temp_64 > 0xFFFFUL)
+		p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
+	else
+		p->port_rcv_remphys_errors = cpu_to_be16((u16)temp_64);
+
+	temp_64 = be64_to_cpu(rsp.port_rcv_switch_relay_errors);
+	p->port_rcv_switch_relay_errors = cpu_to_be16((u16)temp_64);
+
+	temp_64 = be64_to_cpu(rsp.port_xmit_discards);
+	if (temp_64 > 0xFFFFUL)
+		p->port_xmit_discards = cpu_to_be16(0xFFFF);
+	else
+		p->port_xmit_discards = cpu_to_be16((u16)temp_64);
+
+	temp_64 = be64_to_cpu(rsp.port_xmit_constraint_errors);
+	if (temp_64 > 0xFFUL)
+		p->port_xmit_constraint_errors = 0xFF;
+	else
+		p->port_xmit_constraint_errors = (u8)temp_64;
+
+	temp_64 = be64_to_cpu(rsp.port_rcv_constraint_errors);
+	if (temp_64 > 0xFFUL)
+		p->port_rcv_constraint_errors = 0xFFUL;
+	else
+		p->port_rcv_constraint_errors = (u8)temp_64;
+
+	/* LocalLink: 7:4, BufferOverrun: 3:0 */
+	temp_64 = be64_to_cpu(rsp.local_link_integrity_errors);
+	if (temp_64 > 0xFUL)
+		temp_64 = 0xFUL;
+
+	temp_link_overrun_errors = temp_64 << 4;
+
+	temp_64 = be64_to_cpu(rsp.excessive_buffer_overruns);
+	if (temp_64 > 0xFUL)
+		temp_64 = 0xFUL;
+	temp_link_overrun_errors |= temp_64;
+
+	p->link_overrun_errors = (u8)temp_link_overrun_errors;
+
+	p->vl15_dropped = 0; /* N/A for OPA */
+
+bail:
+	return reply((struct ib_mad_hdr *)pmp);
+}
+
 static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
-			struct ib_device *ibdev, u8 port, u32 *resp_len)
+				 struct ib_device *ibdev,
+				 u8 port, u32 *resp_len)
 {
 	size_t response_data_size;
 	struct _port_ei *rsp;
@@ -2805,7 +2971,7 @@
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	u64 port_mask;
 	u32 num_ports;
-	unsigned long port_num;
+	u8 port_num;
 	u8 num_pslm;
 	u64 reg;
 
@@ -2838,7 +3004,7 @@
 	port_num = find_first_bit((unsigned long *)&port_mask,
 				  sizeof(port_mask));
 
-	if ((u8)port_num != port) {
+	if (port_num != port) {
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)pmp);
 	}
@@ -2847,15 +3013,17 @@
 	rsp->port_rcv_ei.status_and_code =
 		dd->err_info_rcvport.status_and_code;
 	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit1,
-		&dd->err_info_rcvport.packet_flit1, sizeof(u64));
+	       &dd->err_info_rcvport.packet_flit1, sizeof(u64));
 	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit2,
-		&dd->err_info_rcvport.packet_flit2, sizeof(u64));
+	       &dd->err_info_rcvport.packet_flit2, sizeof(u64));
 
 	/* ExcessiverBufferOverrunInfo */
 	reg = read_csr(dd, RCV_ERR_INFO);
 	if (reg & RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK) {
-		/* if the RcvExcessBufferOverrun bit is set, save SC of
-		 * first pkt that encountered an excess buffer overrun */
+		/*
+		 * if the RcvExcessBufferOverrun bit is set, save SC of
+		 * first pkt that encountered an excess buffer overrun
+		 */
 		u8 tmp = (u8)reg;
 
 		tmp &=  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SC_SMASK;
@@ -2892,7 +3060,8 @@
 }
 
 static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
-			struct ib_device *ibdev, u8 port, u32 *resp_len)
+				  struct ib_device *ibdev,
+				  u8 port, u32 *resp_len)
 {
 	struct opa_clear_port_status *req =
 		(struct opa_clear_port_status *)pmp->data;
@@ -2951,8 +3120,9 @@
 		write_dev_cntr(dd, C_DC_RCV_BBL, CNTR_INVALID_VL, 0);
 
 	/* Only applicable for switch */
-	/*if (counter_select & CS_PORT_MARK_FECN)
-		write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);*/
+	/* if (counter_select & CS_PORT_MARK_FECN)
+	 *	write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);
+	 */
 
 	if (counter_select & CS_PORT_RCV_CONSTRAINT_ERRORS)
 		write_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL, 0);
@@ -2975,7 +3145,7 @@
 	if (counter_select & CS_LINK_ERROR_RECOVERY) {
 		write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
 		write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
-						CNTR_INVALID_VL, 0);
+			       CNTR_INVALID_VL, 0);
 	}
 
 	if (counter_select & CS_PORT_RCV_ERRORS)
@@ -2997,7 +3167,6 @@
 
 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
 			 8 * sizeof(vl_select_mask)) {
-
 		if (counter_select & CS_PORT_XMIT_DATA)
 			write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
 
@@ -3026,9 +3195,9 @@
 		if (counter_select & CS_PORT_RCV_BUBBLE)
 			write_dev_cntr(dd, C_DC_RCV_BBL_VL, idx_from_vl(vl), 0);
 
-		/*if (counter_select & CS_PORT_MARK_FECN)
-		     write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
-		*/
+		/* if (counter_select & CS_PORT_MARK_FECN)
+		 *     write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
+		 */
 		/* port_vl_xmit_discards ??? */
 	}
 
@@ -3039,14 +3208,15 @@
 }
 
 static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
-			struct ib_device *ibdev, u8 port, u32 *resp_len)
+				 struct ib_device *ibdev,
+				 u8 port, u32 *resp_len)
 {
 	struct _port_ei *rsp;
 	struct opa_port_error_info_msg *req;
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	u64 port_mask;
 	u32 num_ports;
-	unsigned long port_num;
+	u8 port_num;
 	u8 num_pslm;
 	u32 error_info_select;
 
@@ -3071,7 +3241,7 @@
 	port_num = find_first_bit((unsigned long *)&port_mask,
 				  sizeof(port_mask));
 
-	if ((u8)port_num != port) {
+	if (port_num != port) {
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)pmp);
 	}
@@ -3085,8 +3255,10 @@
 
 	/* ExcessiverBufferOverrunInfo */
 	if (error_info_select & ES_EXCESSIVE_BUFFER_OVERRUN_INFO)
-		/* status bit is essentially kept in the h/w - bit 5 of
-		 * RCV_ERR_INFO */
+		/*
+		 * status bit is essentially kept in the h/w - bit 5 of
+		 * RCV_ERR_INFO
+		 */
 		write_csr(dd, RCV_ERR_INFO,
 			  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
 
@@ -3138,13 +3310,12 @@
 }
 
 static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
-					     u8 *data,
-					     struct ib_device *ibdev,
-					     u8 port, u32 *resp_len)
+				       u8 *data, struct ib_device *ibdev,
+				       u8 port, u32 *resp_len)
 {
 	int i;
 	struct opa_congestion_setting_attr *p =
-		(struct opa_congestion_setting_attr *) data;
+		(struct opa_congestion_setting_attr *)data;
 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	struct opa_congestion_setting_entry_shadow *entries;
@@ -3154,7 +3325,7 @@
 
 	cc_state = get_cc_state(ppd);
 
-	if (cc_state == NULL) {
+	if (!cc_state) {
 		rcu_read_unlock();
 		return reply((struct ib_mad_hdr *)smp);
 	}
@@ -3183,7 +3354,7 @@
 				       u32 *resp_len)
 {
 	struct opa_congestion_setting_attr *p =
-		(struct opa_congestion_setting_attr *) data;
+		(struct opa_congestion_setting_attr *)data;
 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	struct opa_congestion_setting_entry_shadow *entries;
@@ -3245,7 +3416,7 @@
 			continue;
 		memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3);
 		memcpy(cong_log->events[i].remote_qp_number_cn_entry,
-			&cce->rqpn, 3);
+		       &cce->rqpn, 3);
 		cong_log->events[i].sl_svc_type_cn_entry =
 			((cce->sl & 0x1f) << 3) | (cce->svc_type & 0x7);
 		cong_log->events[i].remote_lid_cn_entry =
@@ -3275,7 +3446,7 @@
 				   u32 *resp_len)
 {
 	struct ib_cc_table_attr *cc_table_attr =
-		(struct ib_cc_table_attr *) data;
+		(struct ib_cc_table_attr *)data;
 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	u32 start_block = OPA_AM_START_BLK(am);
@@ -3296,7 +3467,7 @@
 
 	cc_state = get_cc_state(ppd);
 
-	if (cc_state == NULL) {
+	if (!cc_state) {
 		rcu_read_unlock();
 		return reply((struct ib_mad_hdr *)smp);
 	}
@@ -3316,7 +3487,7 @@
 	rcu_read_unlock();
 
 	if (resp_len)
-		*resp_len += sizeof(u16)*(IB_CCT_ENTRIES * n_blocks + 1);
+		*resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
 
 	return reply((struct ib_mad_hdr *)smp);
 }
@@ -3332,7 +3503,7 @@
 				   struct ib_device *ibdev, u8 port,
 				   u32 *resp_len)
 {
-	struct ib_cc_table_attr *p = (struct ib_cc_table_attr *) data;
+	struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	u32 start_block = OPA_AM_START_BLK(am);
@@ -3362,14 +3533,14 @@
 	}
 
 	new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
-	if (new_cc_state == NULL)
+	if (!new_cc_state)
 		goto getit;
 
 	spin_lock(&ppd->cc_state_lock);
 
 	old_cc_state = get_cc_state(ppd);
 
-	if (old_cc_state == NULL) {
+	if (!old_cc_state) {
 		spin_unlock(&ppd->cc_state_lock);
 		kfree(new_cc_state);
 		return reply((struct ib_mad_hdr *)smp);
@@ -3409,26 +3580,31 @@
 };
 
 #define OPA_LED_SHIFT	31
-#define OPA_LED_MASK	(1 << OPA_LED_SHIFT)
+#define OPA_LED_MASK	BIT(OPA_LED_SHIFT)
 
 static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
 				   struct ib_device *ibdev, u8 port,
 				   u32 *resp_len)
 {
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	struct opa_led_info *p = (struct opa_led_info *) data;
+	struct hfi1_pportdata *ppd = dd->pport;
+	struct opa_led_info *p = (struct opa_led_info *)data;
 	u32 nport = OPA_AM_NPORT(am);
-	u64 reg;
+	u32 is_beaconing_active;
 
 	if (nport != 1) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)smp);
 	}
 
-	reg = read_csr(dd, DCC_CFG_LED_CNTRL);
-	if ((reg & DCC_CFG_LED_CNTRL_LED_CNTRL_SMASK) &&
-		((reg & DCC_CFG_LED_CNTRL_LED_SW_BLINK_RATE_SMASK) == 0xf))
-			p->rsvd_led_mask = cpu_to_be32(OPA_LED_MASK);
+	/*
+	 * This pairs with the memory barrier in hfi1_start_led_override to
+	 * ensure that we read the correct state of LED beaconing represented
+	 * by led_override_timer_active
+	 */
+	smp_rmb();
+	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
+	p->rsvd_led_mask = cpu_to_be32(is_beaconing_active << OPA_LED_SHIFT);
 
 	if (resp_len)
 		*resp_len += sizeof(struct opa_led_info);
@@ -3441,7 +3617,7 @@
 				   u32 *resp_len)
 {
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	struct opa_led_info *p = (struct opa_led_info *) data;
+	struct opa_led_info *p = (struct opa_led_info *)data;
 	u32 nport = OPA_AM_NPORT(am);
 	int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
 
@@ -3450,7 +3626,10 @@
 		return reply((struct ib_mad_hdr *)smp);
 	}
 
-	setextled(dd, on);
+	if (on)
+		hfi1_start_led_override(dd->pport, 2000, 1500);
+	else
+		shutdown_led_override(dd->pport);
 
 	return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len);
 }
@@ -3493,7 +3672,7 @@
 		break;
 	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
 		ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
-					       resp_len);
+						resp_len);
 		break;
 	case OPA_ATTRIB_ID_PORT_STATE_INFO:
 		ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
@@ -3532,9 +3711,9 @@
 					      resp_len);
 		break;
 	case IB_SMP_ATTR_SM_INFO:
-		if (ibp->port_cap_flags & IB_PORT_SM_DISABLED)
+		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-		if (ibp->port_cap_flags & IB_PORT_SM)
+		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
 			return IB_MAD_RESULT_SUCCESS;
 		/* FALLTHROUGH */
 	default:
@@ -3575,7 +3754,7 @@
 		break;
 	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
 		ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
-					       resp_len);
+						resp_len);
 		break;
 	case OPA_ATTRIB_ID_PORT_STATE_INFO:
 		ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
@@ -3602,9 +3781,9 @@
 					      resp_len);
 		break;
 	case IB_SMP_ATTR_SM_INFO:
-		if (ibp->port_cap_flags & IB_PORT_SM_DISABLED)
+		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-		if (ibp->port_cap_flags & IB_PORT_SM)
+		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
 			return IB_MAD_RESULT_SUCCESS;
 		/* FALLTHROUGH */
 	default:
@@ -3654,14 +3833,13 @@
 		/* zero the payload for this segment */
 		memset(next_smp + sizeof(*agg), 0, agg_data_len);
 
-		(void) subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
+		(void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
 					ibdev, port, NULL);
 		if (smp->status & ~IB_SMP_DIRECTION) {
 			set_aggr_error(agg);
 			return reply((struct ib_mad_hdr *)smp);
 		}
 		next_smp += agg_size;
-
 	}
 
 	return reply((struct ib_mad_hdr *)smp);
@@ -3698,14 +3876,13 @@
 			return reply((struct ib_mad_hdr *)smp);
 		}
 
-		(void) subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
+		(void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
 					ibdev, port, NULL);
 		if (smp->status & ~IB_SMP_DIRECTION) {
 			set_aggr_error(agg);
 			return reply((struct ib_mad_hdr *)smp);
 		}
 		next_smp += agg_size;
-
 	}
 
 	return reply((struct ib_mad_hdr *)smp);
@@ -3823,7 +4000,7 @@
 	if (smp->class_version != OPA_SMI_CLASS_VERSION) {
 		smp->status |= IB_SMP_UNSUP_VERSION;
 		ret = reply((struct ib_mad_hdr *)smp);
-		goto bail;
+		return ret;
 	}
 	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags, smp->mkey,
 			 smp->route.dr.dr_slid, smp->route.dr.return_path,
@@ -3843,13 +4020,13 @@
 		     smp->method == IB_MGMT_METHOD_SET) &&
 		    port_num && port_num <= ibdev->phys_port_cnt &&
 		    port != port_num)
-			(void) check_mkey(to_iport(ibdev, port_num),
+			(void)check_mkey(to_iport(ibdev, port_num),
 					  (struct ib_mad_hdr *)smp, 0,
 					  smp->mkey, smp->route.dr.dr_slid,
 					  smp->route.dr.return_path,
 					  smp->hop_cnt);
 		ret = IB_MAD_RESULT_FAILURE;
-		goto bail;
+		return ret;
 	}
 
 	*resp_len = opa_get_smp_header_size(smp);
@@ -3861,23 +4038,25 @@
 			clear_opa_smp_data(smp);
 			ret = subn_get_opa_sma(attr_id, smp, am, data,
 					       ibdev, port, resp_len);
-			goto bail;
+			break;
 		case OPA_ATTRIB_ID_AGGREGATE:
 			ret = subn_get_opa_aggregate(smp, ibdev, port,
 						     resp_len);
-			goto bail;
+			break;
 		}
+		break;
 	case IB_MGMT_METHOD_SET:
 		switch (attr_id) {
 		default:
 			ret = subn_set_opa_sma(attr_id, smp, am, data,
 					       ibdev, port, resp_len);
-			goto bail;
+			break;
 		case OPA_ATTRIB_ID_AGGREGATE:
 			ret = subn_set_opa_aggregate(smp, ibdev, port,
 						     resp_len);
-			goto bail;
+			break;
 		}
+		break;
 	case IB_MGMT_METHOD_TRAP:
 	case IB_MGMT_METHOD_REPORT:
 	case IB_MGMT_METHOD_REPORT_RESP:
@@ -3888,13 +4067,13 @@
 		 * Just tell the caller to process it normally.
 		 */
 		ret = IB_MAD_RESULT_SUCCESS;
-		goto bail;
+		break;
 	default:
 		smp->status |= IB_SMP_UNSUP_METHOD;
 		ret = reply((struct ib_mad_hdr *)smp);
+		break;
 	}
 
-bail:
 	return ret;
 }
 
@@ -3910,7 +4089,7 @@
 	if (smp->class_version != 1) {
 		smp->status |= IB_SMP_UNSUP_VERSION;
 		ret = reply((struct ib_mad_hdr *)smp);
-		goto bail;
+		return ret;
 	}
 
 	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags,
@@ -3931,13 +4110,13 @@
 		     smp->method == IB_MGMT_METHOD_SET) &&
 		    port_num && port_num <= ibdev->phys_port_cnt &&
 		    port != port_num)
-			(void) check_mkey(to_iport(ibdev, port_num),
-					  (struct ib_mad_hdr *)smp, 0,
-					  smp->mkey,
-					  (__force __be32)smp->dr_slid,
-					  smp->return_path, smp->hop_cnt);
+			(void)check_mkey(to_iport(ibdev, port_num),
+					 (struct ib_mad_hdr *)smp, 0,
+					 smp->mkey,
+					 (__force __be32)smp->dr_slid,
+					 smp->return_path, smp->hop_cnt);
 		ret = IB_MAD_RESULT_FAILURE;
-		goto bail;
+		return ret;
 	}
 
 	switch (smp->method) {
@@ -3945,15 +4124,77 @@
 		switch (smp->attr_id) {
 		case IB_SMP_ATTR_NODE_INFO:
 			ret = subn_get_nodeinfo(smp, ibdev, port);
-			goto bail;
+			break;
 		default:
 			smp->status |= IB_SMP_UNSUP_METH_ATTR;
 			ret = reply((struct ib_mad_hdr *)smp);
-			goto bail;
+			break;
 		}
+		break;
 	}
 
-bail:
+	return ret;
+}
+
+static int process_perf(struct ib_device *ibdev, u8 port,
+			const struct ib_mad *in_mad,
+			struct ib_mad *out_mad)
+{
+	struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
+	struct ib_class_port_info *cpi = (struct ib_class_port_info *)
+						&pmp->data;
+	int ret = IB_MAD_RESULT_FAILURE;
+
+	*out_mad = *in_mad;
+	if (pmp->mad_hdr.class_version != 1) {
+		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
+		ret = reply((struct ib_mad_hdr *)pmp);
+		return ret;
+	}
+
+	switch (pmp->mad_hdr.method) {
+	case IB_MGMT_METHOD_GET:
+		switch (pmp->mad_hdr.attr_id) {
+		case IB_PMA_PORT_COUNTERS:
+			ret = pma_get_ib_portcounters(pmp, ibdev, port);
+			break;
+		case IB_PMA_PORT_COUNTERS_EXT:
+			ret = pma_get_ib_portcounters_ext(pmp, ibdev, port);
+			break;
+		case IB_PMA_CLASS_PORT_INFO:
+			cpi->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+			ret = reply((struct ib_mad_hdr *)pmp);
+			break;
+		default:
+			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
+			ret = reply((struct ib_mad_hdr *)pmp);
+			break;
+		}
+		break;
+
+	case IB_MGMT_METHOD_SET:
+		if (pmp->mad_hdr.attr_id) {
+			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
+			ret = reply((struct ib_mad_hdr *)pmp);
+		}
+		break;
+
+	case IB_MGMT_METHOD_TRAP:
+	case IB_MGMT_METHOD_GET_RESP:
+		/*
+		 * The ib_mad module will call us to process responses
+		 * before checking for other consumers.
+		 * Just tell the caller to process it normally.
+		 */
+		ret = IB_MAD_RESULT_SUCCESS;
+		break;
+
+	default:
+		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
+		ret = reply((struct ib_mad_hdr *)pmp);
+		break;
+	}
+
 	return ret;
 }
 
@@ -3978,44 +4219,46 @@
 		switch (pmp->mad_hdr.attr_id) {
 		case IB_PMA_CLASS_PORT_INFO:
 			ret = pma_get_opa_classportinfo(pmp, ibdev, resp_len);
-			goto bail;
+			break;
 		case OPA_PM_ATTRIB_ID_PORT_STATUS:
 			ret = pma_get_opa_portstatus(pmp, ibdev, port,
-								resp_len);
-			goto bail;
+						     resp_len);
+			break;
 		case OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS:
 			ret = pma_get_opa_datacounters(pmp, ibdev, port,
-								resp_len);
-			goto bail;
+						       resp_len);
+			break;
 		case OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS:
 			ret = pma_get_opa_porterrors(pmp, ibdev, port,
-								resp_len);
-			goto bail;
+						     resp_len);
+			break;
 		case OPA_PM_ATTRIB_ID_ERROR_INFO:
 			ret = pma_get_opa_errorinfo(pmp, ibdev, port,
-								resp_len);
-			goto bail;
+						    resp_len);
+			break;
 		default:
 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
 			ret = reply((struct ib_mad_hdr *)pmp);
-			goto bail;
+			break;
 		}
+		break;
 
 	case IB_MGMT_METHOD_SET:
 		switch (pmp->mad_hdr.attr_id) {
 		case OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS:
 			ret = pma_set_opa_portstatus(pmp, ibdev, port,
-								resp_len);
-			goto bail;
+						     resp_len);
+			break;
 		case OPA_PM_ATTRIB_ID_ERROR_INFO:
 			ret = pma_set_opa_errorinfo(pmp, ibdev, port,
-								resp_len);
-			goto bail;
+						    resp_len);
+			break;
 		default:
 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
 			ret = reply((struct ib_mad_hdr *)pmp);
-			goto bail;
+			break;
 		}
+		break;
 
 	case IB_MGMT_METHOD_TRAP:
 	case IB_MGMT_METHOD_GET_RESP:
@@ -4025,14 +4268,14 @@
 		 * Just tell the caller to process it normally.
 		 */
 		ret = IB_MAD_RESULT_SUCCESS;
-		goto bail;
+		break;
 
 	default:
 		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
 		ret = reply((struct ib_mad_hdr *)pmp);
+		break;
 	}
 
-bail:
 	return ret;
 }
 
@@ -4097,12 +4340,15 @@
 	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
 	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
 		ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
-		goto bail;
+		break;
+	case IB_MGMT_CLASS_PERF_MGMT:
+		ret = process_perf(ibdev, port, in_mad, out_mad);
+		break;
 	default:
 		ret = IB_MAD_RESULT_SUCCESS;
+		break;
 	}
 
-bail:
 	return ret;
 }
 
@@ -4154,66 +4400,3 @@
 
 	return IB_MAD_RESULT_FAILURE;
 }
-
-static void send_handler(struct ib_mad_agent *agent,
-			 struct ib_mad_send_wc *mad_send_wc)
-{
-	ib_free_send_mad(mad_send_wc->send_buf);
-}
-
-int hfi1_create_agents(struct hfi1_ibdev *dev)
-{
-	struct hfi1_devdata *dd = dd_from_dev(dev);
-	struct ib_mad_agent *agent;
-	struct hfi1_ibport *ibp;
-	int p;
-	int ret;
-
-	for (p = 0; p < dd->num_pports; p++) {
-		ibp = &dd->pport[p].ibport_data;
-		agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI,
-					      NULL, 0, send_handler,
-					      NULL, NULL, 0);
-		if (IS_ERR(agent)) {
-			ret = PTR_ERR(agent);
-			goto err;
-		}
-
-		ibp->send_agent = agent;
-	}
-
-	return 0;
-
-err:
-	for (p = 0; p < dd->num_pports; p++) {
-		ibp = &dd->pport[p].ibport_data;
-		if (ibp->send_agent) {
-			agent = ibp->send_agent;
-			ibp->send_agent = NULL;
-			ib_unregister_mad_agent(agent);
-		}
-	}
-
-	return ret;
-}
-
-void hfi1_free_agents(struct hfi1_ibdev *dev)
-{
-	struct hfi1_devdata *dd = dd_from_dev(dev);
-	struct ib_mad_agent *agent;
-	struct hfi1_ibport *ibp;
-	int p;
-
-	for (p = 0; p < dd->num_pports; p++) {
-		ibp = &dd->pport[p].ibport_data;
-		if (ibp->send_agent) {
-			agent = ibp->send_agent;
-			ibp->send_agent = NULL;
-			ib_unregister_mad_agent(agent);
-		}
-		if (ibp->sm_ah) {
-			ib_destroy_ah(&ibp->sm_ah->ibah);
-			ibp->sm_ah = NULL;
-		}
-	}
-}

diff --git a/drivers/staging/rdma/hfi1/mad.h b/drivers/staging/rdma/hfi1/mad.h
index f031775..55ee086 100644
--- a/drivers/staging/rdma/hfi1/mad.h
+++ b/drivers/staging/rdma/hfi1/mad.h

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -51,8 +48,10 @@
 #define _HFI1_MAD_H
 
 #include <rdma/ib_pma.h>
-#define USE_PI_LED_ENABLE	1 /* use led enabled bit in struct
-				   * opa_port_states, if available */
+#define USE_PI_LED_ENABLE	1 /*
+				   * use led enabled bit in struct
+				   * opa_port_states, if available
+				   */
 #include <rdma/opa_smi.h>
 #include <rdma/opa_port_info.h>
 #ifndef PI_LED_ENABLE_SUP
@@ -235,7 +234,6 @@
 #define IB_CC_SVCTYPE_RD 0x2
 #define IB_CC_SVCTYPE_UD 0x3
 
-
 /*
  * There should be an equivalent IB #define for the following, but
  * I cannot find it.
@@ -267,7 +265,7 @@
 	u8 congestion_flags;
 	__be16 threshold_event_counter;
 	__be32 current_time_stamp;
-	u8 threshold_cong_event_map[OPA_MAX_SLS/8];
+	u8 threshold_cong_event_map[OPA_MAX_SLS / 8];
 	struct opa_hfi1_cong_log_event events[OPA_CONG_LOG_ELEMS];
 } __packed;
 

diff --git a/drivers/staging/rdma/hfi1/mmap.c b/drivers/staging/rdma/hfi1/mmap.c
deleted file mode 100644
index 5173b1c..0000000
--- a/drivers/staging/rdma/hfi1/mmap.c
+++ /dev/null

@@ -1,192 +0,0 @@
-/*
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-#include <linux/errno.h>
-#include <asm/pgtable.h>
-
-#include "verbs.h"
-
-/**
- * hfi1_release_mmap_info - free mmap info structure
- * @ref: a pointer to the kref within struct hfi1_mmap_info
- */
-void hfi1_release_mmap_info(struct kref *ref)
-{
-	struct hfi1_mmap_info *ip =
-		container_of(ref, struct hfi1_mmap_info, ref);
-	struct hfi1_ibdev *dev = to_idev(ip->context->device);
-
-	spin_lock_irq(&dev->pending_lock);
-	list_del(&ip->pending_mmaps);
-	spin_unlock_irq(&dev->pending_lock);
-
-	vfree(ip->obj);
-	kfree(ip);
-}
-
-/*
- * open and close keep track of how many times the CQ is mapped,
- * to avoid releasing it.
- */
-static void hfi1_vma_open(struct vm_area_struct *vma)
-{
-	struct hfi1_mmap_info *ip = vma->vm_private_data;
-
-	kref_get(&ip->ref);
-}
-
-static void hfi1_vma_close(struct vm_area_struct *vma)
-{
-	struct hfi1_mmap_info *ip = vma->vm_private_data;
-
-	kref_put(&ip->ref, hfi1_release_mmap_info);
-}
-
-static struct vm_operations_struct hfi1_vm_ops = {
-	.open =     hfi1_vma_open,
-	.close =    hfi1_vma_close,
-};
-
-/**
- * hfi1_mmap - create a new mmap region
- * @context: the IB user context of the process making the mmap() call
- * @vma: the VMA to be initialized
- * Return zero if the mmap is OK. Otherwise, return an errno.
- */
-int hfi1_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-	struct hfi1_ibdev *dev = to_idev(context->device);
-	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
-	unsigned long size = vma->vm_end - vma->vm_start;
-	struct hfi1_mmap_info *ip, *pp;
-	int ret = -EINVAL;
-
-	/*
-	 * Search the device's list of objects waiting for a mmap call.
-	 * Normally, this list is very short since a call to create a
-	 * CQ, QP, or SRQ is soon followed by a call to mmap().
-	 */
-	spin_lock_irq(&dev->pending_lock);
-	list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
-				 pending_mmaps) {
-		/* Only the creator is allowed to mmap the object */
-		if (context != ip->context || (__u64) offset != ip->offset)
-			continue;
-		/* Don't allow a mmap larger than the object. */
-		if (size > ip->size)
-			break;
-
-		list_del_init(&ip->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-
-		ret = remap_vmalloc_range(vma, ip->obj, 0);
-		if (ret)
-			goto done;
-		vma->vm_ops = &hfi1_vm_ops;
-		vma->vm_private_data = ip;
-		hfi1_vma_open(vma);
-		goto done;
-	}
-	spin_unlock_irq(&dev->pending_lock);
-done:
-	return ret;
-}
-
-/*
- * Allocate information for hfi1_mmap
- */
-struct hfi1_mmap_info *hfi1_create_mmap_info(struct hfi1_ibdev *dev,
-					     u32 size,
-					     struct ib_ucontext *context,
-					     void *obj) {
-	struct hfi1_mmap_info *ip;
-
-	ip = kmalloc(sizeof(*ip), GFP_KERNEL);
-	if (!ip)
-		goto bail;
-
-	size = PAGE_ALIGN(size);
-
-	spin_lock_irq(&dev->mmap_offset_lock);
-	if (dev->mmap_offset == 0)
-		dev->mmap_offset = PAGE_SIZE;
-	ip->offset = dev->mmap_offset;
-	dev->mmap_offset += size;
-	spin_unlock_irq(&dev->mmap_offset_lock);
-
-	INIT_LIST_HEAD(&ip->pending_mmaps);
-	ip->size = size;
-	ip->context = context;
-	ip->obj = obj;
-	kref_init(&ip->ref);
-
-bail:
-	return ip;
-}
-
-void hfi1_update_mmap_info(struct hfi1_ibdev *dev, struct hfi1_mmap_info *ip,
-			   u32 size, void *obj)
-{
-	size = PAGE_ALIGN(size);
-
-	spin_lock_irq(&dev->mmap_offset_lock);
-	if (dev->mmap_offset == 0)
-		dev->mmap_offset = PAGE_SIZE;
-	ip->offset = dev->mmap_offset;
-	dev->mmap_offset += size;
-	spin_unlock_irq(&dev->mmap_offset_lock);
-
-	ip->size = size;
-	ip->obj = obj;
-}

diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/staging/rdma/hfi1/mmu_rb.c
new file mode 100644
index 0000000..c7ad016
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/mmu_rb.c

@@ -0,0 +1,292 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <linux/list.h>
+#include <linux/mmu_notifier.h>
+#include <linux/interval_tree_generic.h>
+
+#include "mmu_rb.h"
+#include "trace.h"
+
+struct mmu_rb_handler {
+	struct list_head list;
+	struct mmu_notifier mn;
+	struct rb_root *root;
+	spinlock_t lock;        /* protect the RB tree */
+	struct mmu_rb_ops *ops;
+};
+
+static LIST_HEAD(mmu_rb_handlers);
+static DEFINE_SPINLOCK(mmu_rb_lock); /* protect mmu_rb_handlers list */
+
+static unsigned long mmu_node_start(struct mmu_rb_node *);
+static unsigned long mmu_node_last(struct mmu_rb_node *);
+static struct mmu_rb_handler *find_mmu_handler(struct rb_root *);
+static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *,
+				     unsigned long);
+static inline void mmu_notifier_range_start(struct mmu_notifier *,
+					    struct mm_struct *,
+					    unsigned long, unsigned long);
+static void mmu_notifier_mem_invalidate(struct mmu_notifier *,
+					unsigned long, unsigned long);
+static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
+					   unsigned long, unsigned long);
+
+static struct mmu_notifier_ops mn_opts = {
+	.invalidate_page = mmu_notifier_page,
+	.invalidate_range_start = mmu_notifier_range_start,
+};
+
+INTERVAL_TREE_DEFINE(struct mmu_rb_node, node, unsigned long, __last,
+		     mmu_node_start, mmu_node_last, static, __mmu_int_rb);
+
+static unsigned long mmu_node_start(struct mmu_rb_node *node)
+{
+	return node->addr & PAGE_MASK;
+}
+
+static unsigned long mmu_node_last(struct mmu_rb_node *node)
+{
+	return PAGE_ALIGN((node->addr & PAGE_MASK) + node->len) - 1;
+}
+
+int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops)
+{
+	struct mmu_rb_handler *handlr;
+	unsigned long flags;
+
+	if (!ops->invalidate)
+		return -EINVAL;
+
+	handlr = kmalloc(sizeof(*handlr), GFP_KERNEL);
+	if (!handlr)
+		return -ENOMEM;
+
+	handlr->root = root;
+	handlr->ops = ops;
+	INIT_HLIST_NODE(&handlr->mn.hlist);
+	spin_lock_init(&handlr->lock);
+	handlr->mn.ops = &mn_opts;
+	spin_lock_irqsave(&mmu_rb_lock, flags);
+	list_add_tail(&handlr->list, &mmu_rb_handlers);
+	spin_unlock_irqrestore(&mmu_rb_lock, flags);
+
+	return mmu_notifier_register(&handlr->mn, current->mm);
+}
+
+void hfi1_mmu_rb_unregister(struct rb_root *root)
+{
+	struct mmu_rb_handler *handler = find_mmu_handler(root);
+	unsigned long flags;
+
+	if (!handler)
+		return;
+
+	spin_lock_irqsave(&mmu_rb_lock, flags);
+	list_del(&handler->list);
+	spin_unlock_irqrestore(&mmu_rb_lock, flags);
+
+	if (!RB_EMPTY_ROOT(root)) {
+		struct rb_node *node;
+		struct mmu_rb_node *rbnode;
+
+		while ((node = rb_first(root))) {
+			rbnode = rb_entry(node, struct mmu_rb_node, node);
+			rb_erase(node, root);
+			if (handler->ops->remove)
+				handler->ops->remove(root, rbnode, false);
+		}
+	}
+
+	if (current->mm)
+		mmu_notifier_unregister(&handler->mn, current->mm);
+	kfree(handler);
+}
+
+int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
+{
+	struct mmu_rb_handler *handler = find_mmu_handler(root);
+	struct mmu_rb_node *node;
+	unsigned long flags;
+	int ret = 0;
+
+	if (!handler)
+		return -EINVAL;
+
+	spin_lock_irqsave(&handler->lock, flags);
+	hfi1_cdbg(MMU, "Inserting node addr 0x%llx, len %u", mnode->addr,
+		  mnode->len);
+	node = __mmu_rb_search(handler, mnode->addr, mnode->len);
+	if (node) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+	__mmu_int_rb_insert(mnode, root);
+
+	if (handler->ops->insert) {
+		ret = handler->ops->insert(root, mnode);
+		if (ret)
+			__mmu_int_rb_remove(mnode, root);
+	}
+unlock:
+	spin_unlock_irqrestore(&handler->lock, flags);
+	return ret;
+}
+
+/* Caller must host handler lock */
+static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
+					   unsigned long addr,
+					   unsigned long len)
+{
+	struct mmu_rb_node *node = NULL;
+
+	hfi1_cdbg(MMU, "Searching for addr 0x%llx, len %u", addr, len);
+	if (!handler->ops->filter) {
+		node = __mmu_int_rb_iter_first(handler->root, addr,
+					       (addr + len) - 1);
+	} else {
+		for (node = __mmu_int_rb_iter_first(handler->root, addr,
+						    (addr + len) - 1);
+		     node;
+		     node = __mmu_int_rb_iter_next(node, addr,
+						   (addr + len) - 1)) {
+			if (handler->ops->filter(node, addr, len))
+				return node;
+		}
+	}
+	return node;
+}
+
+static void __mmu_rb_remove(struct mmu_rb_handler *handler,
+			    struct mmu_rb_node *node, bool arg)
+{
+	/* Validity of handler and node pointers has been checked by caller. */
+	hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr,
+		  node->len);
+	__mmu_int_rb_remove(node, handler->root);
+	if (handler->ops->remove)
+		handler->ops->remove(handler->root, node, arg);
+}
+
+struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr,
+				       unsigned long len)
+{
+	struct mmu_rb_handler *handler = find_mmu_handler(root);
+	struct mmu_rb_node *node;
+	unsigned long flags;
+
+	if (!handler)
+		return ERR_PTR(-EINVAL);
+
+	spin_lock_irqsave(&handler->lock, flags);
+	node = __mmu_rb_search(handler, addr, len);
+	spin_unlock_irqrestore(&handler->lock, flags);
+
+	return node;
+}
+
+void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node)
+{
+	struct mmu_rb_handler *handler = find_mmu_handler(root);
+	unsigned long flags;
+
+	if (!handler || !node)
+		return;
+
+	spin_lock_irqsave(&handler->lock, flags);
+	__mmu_rb_remove(handler, node, false);
+	spin_unlock_irqrestore(&handler->lock, flags);
+}
+
+static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root)
+{
+	struct mmu_rb_handler *handler;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mmu_rb_lock, flags);
+	list_for_each_entry(handler, &mmu_rb_handlers, list) {
+		if (handler->root == root)
+			goto unlock;
+	}
+	handler = NULL;
+unlock:
+	spin_unlock_irqrestore(&mmu_rb_lock, flags);
+	return handler;
+}
+
+static inline void mmu_notifier_page(struct mmu_notifier *mn,
+				     struct mm_struct *mm, unsigned long addr)
+{
+	mmu_notifier_mem_invalidate(mn, addr, addr + PAGE_SIZE);
+}
+
+static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
+					    struct mm_struct *mm,
+					    unsigned long start,
+					    unsigned long end)
+{
+	mmu_notifier_mem_invalidate(mn, start, end);
+}
+
+static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn,
+					unsigned long start, unsigned long end)
+{
+	struct mmu_rb_handler *handler =
+		container_of(mn, struct mmu_rb_handler, mn);
+	struct rb_root *root = handler->root;
+	struct mmu_rb_node *node;
+	unsigned long flags;
+
+	spin_lock_irqsave(&handler->lock, flags);
+	for (node = __mmu_int_rb_iter_first(root, start, end - 1); node;
+	     node = __mmu_int_rb_iter_next(node, start, end - 1)) {
+		hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u",
+			  node->addr, node->len);
+		if (handler->ops->invalidate(root, node))
+			__mmu_rb_remove(handler, node, true);
+	}
+	spin_unlock_irqrestore(&handler->lock, flags);
+}

diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/staging/rdma/hfi1/mmu_rb.h
new file mode 100644
index 0000000..f8523fd
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/mmu_rb.h

@@ -0,0 +1,73 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef _HFI1_MMU_RB_H
+#define _HFI1_MMU_RB_H
+
+#include "hfi.h"
+
+struct mmu_rb_node {
+	unsigned long addr;
+	unsigned long len;
+	unsigned long __last;
+	struct rb_node node;
+};
+
+struct mmu_rb_ops {
+	bool (*filter)(struct mmu_rb_node *, unsigned long, unsigned long);
+	int (*insert)(struct rb_root *, struct mmu_rb_node *);
+	void (*remove)(struct rb_root *, struct mmu_rb_node *, bool);
+	int (*invalidate)(struct rb_root *, struct mmu_rb_node *);
+};
+
+int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops);
+void hfi1_mmu_rb_unregister(struct rb_root *);
+int hfi1_mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
+void hfi1_mmu_rb_remove(struct rb_root *, struct mmu_rb_node *);
+struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *, unsigned long,
+				       unsigned long);
+
+#endif /* _HFI1_MMU_RB_H */

diff --git a/drivers/staging/rdma/hfi1/mr.c b/drivers/staging/rdma/hfi1/mr.c
deleted file mode 100644
index 3825321..0000000
--- a/drivers/staging/rdma/hfi1/mr.c
+++ /dev/null

@@ -1,473 +0,0 @@
-/*
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <rdma/ib_umem.h>
-#include <rdma/ib_smi.h>
-
-#include "hfi.h"
-
-/* Fast memory region */
-struct hfi1_fmr {
-	struct ib_fmr ibfmr;
-	struct hfi1_mregion mr;        /* must be last */
-};
-
-static inline struct hfi1_fmr *to_ifmr(struct ib_fmr *ibfmr)
-{
-	return container_of(ibfmr, struct hfi1_fmr, ibfmr);
-}
-
-static int init_mregion(struct hfi1_mregion *mr, struct ib_pd *pd,
-			int count)
-{
-	int m, i = 0;
-	int rval = 0;
-
-	m = DIV_ROUND_UP(count, HFI1_SEGSZ);
-	for (; i < m; i++) {
-		mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL);
-		if (!mr->map[i])
-			goto bail;
-	}
-	mr->mapsz = m;
-	init_completion(&mr->comp);
-	/* count returning the ptr to user */
-	atomic_set(&mr->refcount, 1);
-	mr->pd = pd;
-	mr->max_segs = count;
-out:
-	return rval;
-bail:
-	while (i)
-		kfree(mr->map[--i]);
-	rval = -ENOMEM;
-	goto out;
-}
-
-static void deinit_mregion(struct hfi1_mregion *mr)
-{
-	int i = mr->mapsz;
-
-	mr->mapsz = 0;
-	while (i)
-		kfree(mr->map[--i]);
-}
-
-
-/**
- * hfi1_get_dma_mr - get a DMA memory region
- * @pd: protection domain for this memory region
- * @acc: access flags
- *
- * Returns the memory region on success, otherwise returns an errno.
- * Note that all DMA addresses should be created via the
- * struct ib_dma_mapping_ops functions (see dma.c).
- */
-struct ib_mr *hfi1_get_dma_mr(struct ib_pd *pd, int acc)
-{
-	struct hfi1_mr *mr = NULL;
-	struct ib_mr *ret;
-	int rval;
-
-	if (to_ipd(pd)->user) {
-		ret = ERR_PTR(-EPERM);
-		goto bail;
-	}
-
-	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
-	if (!mr) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	rval = init_mregion(&mr->mr, pd, 0);
-	if (rval) {
-		ret = ERR_PTR(rval);
-		goto bail;
-	}
-
-
-	rval = hfi1_alloc_lkey(&mr->mr, 1);
-	if (rval) {
-		ret = ERR_PTR(rval);
-		goto bail_mregion;
-	}
-
-	mr->mr.access_flags = acc;
-	ret = &mr->ibmr;
-done:
-	return ret;
-
-bail_mregion:
-	deinit_mregion(&mr->mr);
-bail:
-	kfree(mr);
-	goto done;
-}
-
-static struct hfi1_mr *alloc_mr(int count, struct ib_pd *pd)
-{
-	struct hfi1_mr *mr;
-	int rval = -ENOMEM;
-	int m;
-
-	/* Allocate struct plus pointers to first level page tables. */
-	m = DIV_ROUND_UP(count, HFI1_SEGSZ);
-	mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL);
-	if (!mr)
-		goto bail;
-
-	rval = init_mregion(&mr->mr, pd, count);
-	if (rval)
-		goto bail;
-
-	rval = hfi1_alloc_lkey(&mr->mr, 0);
-	if (rval)
-		goto bail_mregion;
-	mr->ibmr.lkey = mr->mr.lkey;
-	mr->ibmr.rkey = mr->mr.lkey;
-done:
-	return mr;
-
-bail_mregion:
-	deinit_mregion(&mr->mr);
-bail:
-	kfree(mr);
-	mr = ERR_PTR(rval);
-	goto done;
-}
-
-/**
- * hfi1_reg_user_mr - register a userspace memory region
- * @pd: protection domain for this memory region
- * @start: starting userspace address
- * @length: length of region to register
- * @mr_access_flags: access flags for this memory region
- * @udata: unused by the driver
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-			       u64 virt_addr, int mr_access_flags,
-			       struct ib_udata *udata)
-{
-	struct hfi1_mr *mr;
-	struct ib_umem *umem;
-	struct scatterlist *sg;
-	int n, m, entry;
-	struct ib_mr *ret;
-
-	if (length == 0) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	umem = ib_umem_get(pd->uobject->context, start, length,
-			   mr_access_flags, 0);
-	if (IS_ERR(umem))
-		return (void *) umem;
-
-	n = umem->nmap;
-
-	mr = alloc_mr(n, pd);
-	if (IS_ERR(mr)) {
-		ret = (struct ib_mr *)mr;
-		ib_umem_release(umem);
-		goto bail;
-	}
-
-	mr->mr.user_base = start;
-	mr->mr.iova = virt_addr;
-	mr->mr.length = length;
-	mr->mr.offset = ib_umem_offset(umem);
-	mr->mr.access_flags = mr_access_flags;
-	mr->umem = umem;
-
-	if (is_power_of_2(umem->page_size))
-		mr->mr.page_shift = ilog2(umem->page_size);
-	m = 0;
-	n = 0;
-	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
-		void *vaddr;
-
-		vaddr = page_address(sg_page(sg));
-		if (!vaddr) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-		mr->mr.map[m]->segs[n].vaddr = vaddr;
-		mr->mr.map[m]->segs[n].length = umem->page_size;
-		n++;
-		if (n == HFI1_SEGSZ) {
-			m++;
-			n = 0;
-		}
-	}
-	ret = &mr->ibmr;
-
-bail:
-	return ret;
-}
-
-/**
- * hfi1_dereg_mr - unregister and free a memory region
- * @ibmr: the memory region to free
- *
- * Returns 0 on success.
- *
- * Note that this is called to free MRs created by hfi1_get_dma_mr()
- * or hfi1_reg_user_mr().
- */
-int hfi1_dereg_mr(struct ib_mr *ibmr)
-{
-	struct hfi1_mr *mr = to_imr(ibmr);
-	int ret = 0;
-	unsigned long timeout;
-
-	hfi1_free_lkey(&mr->mr);
-
-	hfi1_put_mr(&mr->mr); /* will set completion if last */
-	timeout = wait_for_completion_timeout(&mr->mr.comp,
-		5 * HZ);
-	if (!timeout) {
-		dd_dev_err(
-			dd_from_ibdev(mr->mr.pd->device),
-			"hfi1_dereg_mr timeout mr %p pd %p refcount %u\n",
-			mr, mr->mr.pd, atomic_read(&mr->mr.refcount));
-		hfi1_get_mr(&mr->mr);
-		ret = -EBUSY;
-		goto out;
-	}
-	deinit_mregion(&mr->mr);
-	if (mr->umem)
-		ib_umem_release(mr->umem);
-	kfree(mr);
-out:
-	return ret;
-}
-
-/*
- * Allocate a memory region usable with the
- * IB_WR_REG_MR send work request.
- *
- * Return the memory region on success, otherwise return an errno.
- * FIXME: IB_WR_REG_MR is not supported
- */
-struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd,
-			    enum ib_mr_type mr_type,
-			    u32 max_num_sg)
-{
-	struct hfi1_mr *mr;
-
-	if (mr_type != IB_MR_TYPE_MEM_REG)
-		return ERR_PTR(-EINVAL);
-
-	mr = alloc_mr(max_num_sg, pd);
-	if (IS_ERR(mr))
-		return (struct ib_mr *)mr;
-
-	return &mr->ibmr;
-}
-
-/**
- * hfi1_alloc_fmr - allocate a fast memory region
- * @pd: the protection domain for this memory region
- * @mr_access_flags: access flags for this memory region
- * @fmr_attr: fast memory region attributes
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_fmr *hfi1_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-			      struct ib_fmr_attr *fmr_attr)
-{
-	struct hfi1_fmr *fmr;
-	int m;
-	struct ib_fmr *ret;
-	int rval = -ENOMEM;
-
-	/* Allocate struct plus pointers to first level page tables. */
-	m = DIV_ROUND_UP(fmr_attr->max_pages, HFI1_SEGSZ);
-	fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL);
-	if (!fmr)
-		goto bail;
-
-	rval = init_mregion(&fmr->mr, pd, fmr_attr->max_pages);
-	if (rval)
-		goto bail;
-
-	/*
-	 * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
-	 * rkey.
-	 */
-	rval = hfi1_alloc_lkey(&fmr->mr, 0);
-	if (rval)
-		goto bail_mregion;
-	fmr->ibfmr.rkey = fmr->mr.lkey;
-	fmr->ibfmr.lkey = fmr->mr.lkey;
-	/*
-	 * Resources are allocated but no valid mapping (RKEY can't be
-	 * used).
-	 */
-	fmr->mr.access_flags = mr_access_flags;
-	fmr->mr.max_segs = fmr_attr->max_pages;
-	fmr->mr.page_shift = fmr_attr->page_shift;
-
-	ret = &fmr->ibfmr;
-done:
-	return ret;
-
-bail_mregion:
-	deinit_mregion(&fmr->mr);
-bail:
-	kfree(fmr);
-	ret = ERR_PTR(rval);
-	goto done;
-}
-
-/**
- * hfi1_map_phys_fmr - set up a fast memory region
- * @ibmfr: the fast memory region to set up
- * @page_list: the list of pages to associate with the fast memory region
- * @list_len: the number of pages to associate with the fast memory region
- * @iova: the virtual address of the start of the fast memory region
- *
- * This may be called from interrupt context.
- */
-
-int hfi1_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
-		      int list_len, u64 iova)
-{
-	struct hfi1_fmr *fmr = to_ifmr(ibfmr);
-	struct hfi1_lkey_table *rkt;
-	unsigned long flags;
-	int m, n, i;
-	u32 ps;
-	int ret;
-
-	i = atomic_read(&fmr->mr.refcount);
-	if (i > 2)
-		return -EBUSY;
-
-	if (list_len > fmr->mr.max_segs) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	rkt = &to_idev(ibfmr->device)->lk_table;
-	spin_lock_irqsave(&rkt->lock, flags);
-	fmr->mr.user_base = iova;
-	fmr->mr.iova = iova;
-	ps = 1 << fmr->mr.page_shift;
-	fmr->mr.length = list_len * ps;
-	m = 0;
-	n = 0;
-	for (i = 0; i < list_len; i++) {
-		fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i];
-		fmr->mr.map[m]->segs[n].length = ps;
-		if (++n == HFI1_SEGSZ) {
-			m++;
-			n = 0;
-		}
-	}
-	spin_unlock_irqrestore(&rkt->lock, flags);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * hfi1_unmap_fmr - unmap fast memory regions
- * @fmr_list: the list of fast memory regions to unmap
- *
- * Returns 0 on success.
- */
-int hfi1_unmap_fmr(struct list_head *fmr_list)
-{
-	struct hfi1_fmr *fmr;
-	struct hfi1_lkey_table *rkt;
-	unsigned long flags;
-
-	list_for_each_entry(fmr, fmr_list, ibfmr.list) {
-		rkt = &to_idev(fmr->ibfmr.device)->lk_table;
-		spin_lock_irqsave(&rkt->lock, flags);
-		fmr->mr.user_base = 0;
-		fmr->mr.iova = 0;
-		fmr->mr.length = 0;
-		spin_unlock_irqrestore(&rkt->lock, flags);
-	}
-	return 0;
-}
-
-/**
- * hfi1_dealloc_fmr - deallocate a fast memory region
- * @ibfmr: the fast memory region to deallocate
- *
- * Returns 0 on success.
- */
-int hfi1_dealloc_fmr(struct ib_fmr *ibfmr)
-{
-	struct hfi1_fmr *fmr = to_ifmr(ibfmr);
-	int ret = 0;
-	unsigned long timeout;
-
-	hfi1_free_lkey(&fmr->mr);
-	hfi1_put_mr(&fmr->mr); /* will set completion if last */
-	timeout = wait_for_completion_timeout(&fmr->mr.comp,
-		5 * HZ);
-	if (!timeout) {
-		hfi1_get_mr(&fmr->mr);
-		ret = -EBUSY;
-		goto out;
-	}
-	deinit_mregion(&fmr->mr);
-	kfree(fmr);
-out:
-	return ret;
-}

diff --git a/drivers/staging/rdma/hfi1/opa_compat.h b/drivers/staging/rdma/hfi1/opa_compat.h
index f64eec1..6ef3c1c 100644
--- a/drivers/staging/rdma/hfi1/opa_compat.h
+++ b/drivers/staging/rdma/hfi1/opa_compat.h

@@ -1,14 +1,13 @@
 #ifndef _LINUX_H
 #define _LINUX_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -111,19 +108,4 @@
 	/* values 12-15 are reserved/ignored */
 };
 
-/* OPA_PORT_TYPE_* definitions - these belong in opa_port_info.h */
-#define OPA_PORT_TYPE_UNKNOWN          0
-#define OPA_PORT_TYPE_DISCONNECTED     1
-/* port is not currently usable, CableInfo not available */
-#define OPA_PORT_TYPE_FIXED            2
-/* A fixed backplane port in a director class switch. All OPA ASICS */
-#define OPA_PORT_TYPE_VARIABLE         3
-/* A backplane port in a blade system, possibly mixed configuration */
-#define OPA_PORT_TYPE_STANDARD         4
-/* implies a SFF-8636 defined format for CableInfo (QSFP) */
-#define OPA_PORT_TYPE_SI_PHOTONICS      5
-/* A silicon photonics module implies TBD defined format for CableInfo
- * as defined by Intel SFO group */
-/* 6 - 15 are reserved */
-
 #endif /* _LINUX_H */

diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c
index 47ca631..0bac21e 100644
--- a/drivers/staging/rdma/hfi1/pcie.c
+++ b/drivers/staging/rdma/hfi1/pcie.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -57,6 +54,7 @@
 
 #include "hfi.h"
 #include "chip_registers.h"
+#include "aspm.h"
 
 /* link speed vector for Gen3 speed - not in Linux headers */
 #define GEN1_SPEED_VECTOR 0x1
@@ -122,8 +120,9 @@
 			goto bail;
 		}
 		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-	} else
+	} else {
 		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	}
 	if (ret) {
 		hfi1_early_err(&pdev->dev,
 			       "Unable to set DMA consistent mask: %d\n", ret);
@@ -131,13 +130,7 @@
 	}
 
 	pci_set_master(pdev);
-	ret = pci_enable_pcie_error_reporting(pdev);
-	if (ret) {
-		hfi1_early_err(&pdev->dev,
-			       "Unable to enable pcie error reporting: %d\n",
-			      ret);
-		ret = 0;
-	}
+	(void)pci_enable_pcie_error_reporting(pdev);
 	goto done;
 
 bail:
@@ -222,10 +215,9 @@
 	pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &dd->pcie_devctl);
 	pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, &dd->pcie_lnkctl);
 	pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2,
-							&dd->pcie_devctl2);
+				  &dd->pcie_devctl2);
 	pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0);
-	pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
-							&dd->pci_lnkctl3);
+	pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, &dd->pci_lnkctl3);
 	pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2);
 
 	return 0;
@@ -238,7 +230,7 @@
  */
 void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd)
 {
-	u64 __iomem *base = (void __iomem *) dd->kregbase;
+	u64 __iomem *base = (void __iomem *)dd->kregbase;
 
 	dd->flags &= ~HFI1_PRESENT;
 	dd->kregbase = NULL;
@@ -274,7 +266,7 @@
 
 clear:
 	pcie_capability_set_word(dd->pcidev, PCI_EXP_DEVCTL,
-						PCI_EXP_DEVCTL_BCR_FLR);
+				 PCI_EXP_DEVCTL_BCR_FLR);
 	/* PCIe spec requires the function to be back within 100ms */
 	msleep(100);
 }
@@ -287,9 +279,11 @@
 	struct msix_entry *msix_entry;
 	int i;
 
-	/* We can't pass hfi1_msix_entry array to msix_setup
+	/*
+	 * We can't pass hfi1_msix_entry array to msix_setup
 	 * so use a dummy msix_entry array and copy the allocated
-	 * irq back to the hfi1_msix_entry array. */
+	 * irq back to the hfi1_msix_entry array.
+	 */
 	msix_entry = kmalloc_array(nvec, sizeof(*msix_entry), GFP_KERNEL);
 	if (!msix_entry) {
 		ret = -ENOMEM;
@@ -319,7 +313,6 @@
 		   nvec, ret);
 	*msixcnt = 0;
 	hfi1_enable_intx(dd->pcidev);
-
 }
 
 /* return the PCIe link speed from the given link status */
@@ -367,6 +360,7 @@
 int pcie_speeds(struct hfi1_devdata *dd)
 {
 	u32 linkcap;
+	struct pci_dev *parent = dd->pcidev->bus->self;
 
 	if (!pci_is_pcie(dd->pcidev)) {
 		dd_dev_err(dd, "Can't find PCI Express capability!\n");
@@ -379,15 +373,15 @@
 	pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap);
 	if ((linkcap & PCI_EXP_LNKCAP_SLS) != GEN3_SPEED_VECTOR) {
 		dd_dev_info(dd,
-			"This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n",
-			linkcap & PCI_EXP_LNKCAP_SLS);
+			    "This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n",
+			    linkcap & PCI_EXP_LNKCAP_SLS);
 		dd->link_gen3_capable = 0;
 	}
 
 	/*
 	 * bus->max_bus_speed is set from the bridge's linkcap Max Link Speed
 	 */
-	if (dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
+	if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
 		dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n");
 		dd->link_gen3_capable = 0;
 	}
@@ -395,9 +389,7 @@
 	/* obtain the link width and current speed */
 	update_lbus_info(dd);
 
-	/* check against expected pcie width and complain if "wrong" */
-	if (dd->lbus_width < 16)
-		dd_dev_err(dd, "PCIe width %u (x16 HFI)\n", dd->lbus_width);
+	dd_dev_info(dd, "%s\n", dd->lbus_info);
 
 	return 0;
 }
@@ -436,23 +428,18 @@
 void restore_pci_variables(struct hfi1_devdata *dd)
 {
 	pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command);
-	pci_write_config_dword(dd->pcidev,
-				PCI_BASE_ADDRESS_0, dd->pcibar0);
-	pci_write_config_dword(dd->pcidev,
-				PCI_BASE_ADDRESS_1, dd->pcibar1);
-	pci_write_config_dword(dd->pcidev,
-				PCI_ROM_ADDRESS, dd->pci_rom);
+	pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, dd->pcibar0);
+	pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, dd->pcibar1);
+	pci_write_config_dword(dd->pcidev, PCI_ROM_ADDRESS, dd->pci_rom);
 	pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, dd->pcie_devctl);
 	pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, dd->pcie_lnkctl);
 	pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL2,
-							dd->pcie_devctl2);
+				   dd->pcie_devctl2);
 	pci_write_config_dword(dd->pcidev, PCI_CFG_MSIX0, dd->pci_msix0);
-	pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
-							dd->pci_lnkctl3);
+	pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, dd->pci_lnkctl3);
 	pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2);
 }
 
-
 /*
  * BIOS may not set PCIe bus-utilization parameters for best performance.
  * Check and optionally adjust them to maximize our throughput.
@@ -461,6 +448,10 @@
 module_param_named(pcie_caps, hfi1_pcie_caps, int, S_IRUGO);
 MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
 
+uint aspm_mode = ASPM_MODE_DISABLED;
+module_param_named(aspm, aspm_mode, uint, S_IRUGO);
+MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
+
 static void tune_pcie_caps(struct hfi1_devdata *dd)
 {
 	struct pci_dev *parent;
@@ -479,6 +470,12 @@
 	}
 	/* Find out supported and configured values for parent (root) */
 	parent = dd->pcidev->bus->self;
+	/*
+	 * The driver cannot perform the tuning if it does not have
+	 * access to the upstream component.
+	 */
+	if (!parent)
+		return;
 	if (!pci_is_root_bus(parent->bus)) {
 		dd_dev_info(dd, "Parent not root\n");
 		return;
@@ -532,6 +529,7 @@
 		pcie_set_readrq(dd->pcidev, ep_mrrs);
 	}
 }
+
 /* End of PCIe capability tuning */
 
 /*
@@ -746,21 +744,22 @@
 		c0 = fs - (eq[i][PREC] / div) - (eq[i][POST] / div);
 		c_plus1 = eq[i][POST] / div;
 		pci_write_config_dword(pdev, PCIE_CFG_REG_PL102,
-			eq_value(c_minus1, c0, c_plus1));
+				       eq_value(c_minus1, c0, c_plus1));
 		/* check if these coefficients violate EQ rules */
 		pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL105,
-								&violation);
+				      &violation);
 		if (violation
 		    & PCIE_CFG_REG_PL105_GEN3_EQ_VIOLATE_COEF_RULES_SMASK){
 			if (hit_error == 0) {
 				dd_dev_err(dd,
-					"Gen3 EQ Table Coefficient rule violations\n");
+					   "Gen3 EQ Table Coefficient rule violations\n");
 				dd_dev_err(dd, "         prec   attn   post\n");
 			}
 			dd_dev_err(dd, "   p%02d:   %02x     %02x     %02x\n",
-				i, (u32)eq[i][0], (u32)eq[i][1], (u32)eq[i][2]);
+				   i, (u32)eq[i][0], (u32)eq[i][1],
+				   (u32)eq[i][2]);
 			dd_dev_err(dd, "            %02x     %02x     %02x\n",
-				(u32)c_minus1, (u32)c0, (u32)c_plus1);
+				   (u32)c_minus1, (u32)c0, (u32)c_plus1);
 			hit_error = 1;
 		}
 	}
@@ -772,7 +771,7 @@
 /*
  * Steps to be done after the PCIe firmware is downloaded and
  * before the SBR for the Pcie Gen3.
- * The hardware mutex is already being held.
+ * The SBus resource is already being held.
  */
 static void pcie_post_steps(struct hfi1_devdata *dd)
 {
@@ -815,8 +814,8 @@
 	list_for_each_entry(pdev, &dev->bus->devices, bus_list)
 		if (pdev != dev) {
 			dd_dev_err(dd,
-				"%s: another device is on the same bus\n",
-				__func__);
+				   "%s: another device is on the same bus\n",
+				   __func__);
 			return -ENOTTY;
 		}
 
@@ -840,8 +839,8 @@
 				   u16 code, u16 data)
 {
 	write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (index * 8),
-	    (((u64)code << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_CODE_SHIFT)
-	    |((u64)data << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_DATA_SHIFT)));
+		  (((u64)code << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_CODE_SHIFT) |
+		   ((u64)data << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_DATA_SHIFT)));
 }
 
 /*
@@ -851,14 +850,13 @@
 {
 	u64 reg;
 
-	reg = (((u64)1 << dd->hfi1_id)
-			<< ASIC_PCIE_SD_HOST_CMD_INTRPT_CMD_SHIFT)
-		| ((u64)pcie_serdes_broadcast[dd->hfi1_id]
-			<< ASIC_PCIE_SD_HOST_CMD_SBUS_RCVR_ADDR_SHIFT
-		| ASIC_PCIE_SD_HOST_CMD_SBR_MODE_SMASK
-		| ((u64)SBR_DELAY_US & ASIC_PCIE_SD_HOST_CMD_TIMER_MASK)
-			<< ASIC_PCIE_SD_HOST_CMD_TIMER_SHIFT
-		);
+	reg = (((u64)1 << dd->hfi1_id) <<
+	       ASIC_PCIE_SD_HOST_CMD_INTRPT_CMD_SHIFT) |
+	      ((u64)pcie_serdes_broadcast[dd->hfi1_id] <<
+	       ASIC_PCIE_SD_HOST_CMD_SBUS_RCVR_ADDR_SHIFT |
+	       ASIC_PCIE_SD_HOST_CMD_SBR_MODE_SMASK |
+	       ((u64)SBR_DELAY_US & ASIC_PCIE_SD_HOST_CMD_TIMER_MASK) <<
+	       ASIC_PCIE_SD_HOST_CMD_TIMER_SHIFT);
 	write_csr(dd, ASIC_PCIE_SD_HOST_CMD, reg);
 	/* read back to push the write */
 	read_csr(dd, ASIC_PCIE_SD_HOST_CMD);
@@ -946,7 +944,7 @@
  */
 int do_pcie_gen3_transition(struct hfi1_devdata *dd)
 {
-	struct pci_dev *parent;
+	struct pci_dev *parent = dd->pcidev->bus->self;
 	u64 fw_ctrl;
 	u64 reg, therm;
 	u32 reg32, fs, lf;
@@ -955,8 +953,7 @@
 	int do_retry, retry_count = 0;
 	uint default_pset;
 	u16 target_vector, target_speed;
-	u16 lnkctl, lnkctl2, vendor;
-	u8 nsbr = 1;
+	u16 lnkctl2, vendor;
 	u8 div;
 	const u8 (*eq)[3];
 	int return_error = 0;
@@ -983,17 +980,21 @@
 	/* if already at target speed, done (unless forced) */
 	if (dd->lbus_speed == target_speed) {
 		dd_dev_info(dd, "%s: PCIe already at gen%d, %s\n", __func__,
-			pcie_target,
-			pcie_force ? "re-doing anyway" : "skipping");
+			    pcie_target,
+			    pcie_force ? "re-doing anyway" : "skipping");
 		if (!pcie_force)
 			return 0;
 	}
 
 	/*
-	 * A0 needs an additional SBR
+	 * The driver cannot do the transition if it has no access to the
+	 * upstream component
 	 */
-	if (is_ax(dd))
-		nsbr++;
+	if (!parent) {
+		dd_dev_info(dd, "%s: No upstream, Can't do gen3 transition\n",
+			    __func__);
+		return 0;
+	}
 
 	/*
 	 * Do the Gen3 transition.  Steps are those of the PCIe Gen3
@@ -1009,10 +1010,13 @@
 		goto done_no_mutex;
 	}
 
-	/* hold the HW mutex across the firmware download and SBR */
-	ret = acquire_hw_mutex(dd);
-	if (ret)
+	/* hold the SBus resource across the firmware download and SBR */
+	ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
+	if (ret) {
+		dd_dev_err(dd, "%s: unable to acquire SBus resource\n",
+			   __func__);
 		return ret;
+	}
 
 	/* make sure thermal polling is not causing interrupts */
 	therm = read_csr(dd, ASIC_CFG_THERM_POLL_EN);
@@ -1030,8 +1034,11 @@
 	/* step 4: download PCIe Gen3 SerDes firmware */
 	dd_dev_info(dd, "%s: downloading firmware\n", __func__);
 	ret = load_pcie_firmware(dd);
-	if (ret)
+	if (ret) {
+		/* do not proceed if the firmware cannot be downloaded */
+		return_error = 1;
 		goto done;
+	}
 
 	/* step 5: set up device parameter settings */
 	dd_dev_info(dd, "%s: setting PCIe registers\n", __func__);
@@ -1091,8 +1098,10 @@
 		default_pset = DEFAULT_MCP_PSET;
 	}
 	pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL101,
-		(fs << PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_FS_SHIFT)
-		| (lf << PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_LF_SHIFT));
+			       (fs <<
+				PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_FS_SHIFT) |
+			       (lf <<
+				PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_LF_SHIFT));
 	ret = load_eq_table(dd, eq, fs, div);
 	if (ret)
 		goto done;
@@ -1106,15 +1115,15 @@
 		pcie_pset = default_pset;
 	if (pcie_pset > 10) {	/* valid range is 0-10, inclusive */
 		dd_dev_err(dd, "%s: Invalid Eq Pset %u, setting to %d\n",
-			__func__, pcie_pset, default_pset);
+			   __func__, pcie_pset, default_pset);
 		pcie_pset = default_pset;
 	}
 	dd_dev_info(dd, "%s: using EQ Pset %u\n", __func__, pcie_pset);
 	pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL106,
-		((1 << pcie_pset)
-			<< PCIE_CFG_REG_PL106_GEN3_EQ_PSET_REQ_VEC_SHIFT)
-		| PCIE_CFG_REG_PL106_GEN3_EQ_EVAL2MS_DISABLE_SMASK
-		| PCIE_CFG_REG_PL106_GEN3_EQ_PHASE23_EXIT_MODE_SMASK);
+			       ((1 << pcie_pset) <<
+			PCIE_CFG_REG_PL106_GEN3_EQ_PSET_REQ_VEC_SHIFT) |
+			PCIE_CFG_REG_PL106_GEN3_EQ_EVAL2MS_DISABLE_SMASK |
+			PCIE_CFG_REG_PL106_GEN3_EQ_PHASE23_EXIT_MODE_SMASK);
 
 	/*
 	 * step 5b: Do post firmware download steps via SBus
@@ -1142,11 +1151,12 @@
 	 */
 	write_xmt_margin(dd, __func__);
 
-	/* step 5e: disable active state power management (ASPM) */
+	/*
+	 * step 5e: disable active state power management (ASPM). It
+	 * will be enabled if required later
+	 */
 	dd_dev_info(dd, "%s: clearing ASPM\n", __func__);
-	pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, &lnkctl);
-	lnkctl &= ~PCI_EXP_LNKCTL_ASPMC;
-	pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, lnkctl);
+	aspm_hw_disable_l1(dd);
 
 	/*
 	 * step 5f: clear DirectSpeedChange
@@ -1165,16 +1175,15 @@
 	 * that it is Gen3 capable earlier.
 	 */
 	dd_dev_info(dd, "%s: setting parent target link speed\n", __func__);
-	parent = dd->pcidev->bus->self;
 	pcie_capability_read_word(parent, PCI_EXP_LNKCTL2, &lnkctl2);
 	dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
-		(u32)lnkctl2);
+		    (u32)lnkctl2);
 	/* only write to parent if target is not as high as ours */
 	if ((lnkctl2 & LNKCTL2_TARGET_LINK_SPEED_MASK) < target_vector) {
 		lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK;
 		lnkctl2 |= target_vector;
 		dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
-			(u32)lnkctl2);
+			    (u32)lnkctl2);
 		pcie_capability_write_word(parent, PCI_EXP_LNKCTL2, lnkctl2);
 	} else {
 		dd_dev_info(dd, "%s: ..target speed is OK\n", __func__);
@@ -1183,17 +1192,17 @@
 	dd_dev_info(dd, "%s: setting target link speed\n", __func__);
 	pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2);
 	dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
-		(u32)lnkctl2);
+		    (u32)lnkctl2);
 	lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK;
 	lnkctl2 |= target_vector;
 	dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
-		(u32)lnkctl2);
+		    (u32)lnkctl2);
 	pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2);
 
 	/* step 5h: arm gasket logic */
 	/* hold DC in reset across the SBR */
 	write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK);
-	(void) read_csr(dd, CCE_DC_CTRL); /* DC reset hold */
+	(void)read_csr(dd, CCE_DC_CTRL); /* DC reset hold */
 	/* save firmware control across the SBR */
 	fw_ctrl = read_csr(dd, MISC_CFG_FW_CTRL);
 
@@ -1224,8 +1233,8 @@
 	ret = pci_read_config_word(dd->pcidev, PCI_VENDOR_ID, &vendor);
 	if (ret) {
 		dd_dev_info(dd,
-			"%s: read of VendorID failed after SBR, err %d\n",
-			__func__, ret);
+			    "%s: read of VendorID failed after SBR, err %d\n",
+			    __func__, ret);
 		return_error = 1;
 		goto done;
 	}
@@ -1265,8 +1274,7 @@
 	write_csr(dd, CCE_DC_CTRL, 0);
 
 	/* Set the LED off */
-	if (is_ax(dd))
-		setextled(dd, 0);
+	setextled(dd, 0);
 
 	/* check for any per-lane errors */
 	pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE2, &reg32);
@@ -1277,8 +1285,8 @@
 			& ASIC_PCIE_SD_HOST_STATUS_FW_DNLD_STS_MASK;
 	if ((status & (1 << dd->hfi1_id)) == 0) {
 		dd_dev_err(dd,
-			"%s: gasket status 0x%x, expecting 0x%x\n",
-			__func__, status, 1 << dd->hfi1_id);
+			   "%s: gasket status 0x%x, expecting 0x%x\n",
+			   __func__, status, 1 << dd->hfi1_id);
 		ret = -EIO;
 		goto done;
 	}
@@ -1295,13 +1303,13 @@
 	/* update our link information cache */
 	update_lbus_info(dd);
 	dd_dev_info(dd, "%s: new speed and width: %s\n", __func__,
-		dd->lbus_info);
+		    dd->lbus_info);
 
 	if (dd->lbus_speed != target_speed) { /* not target */
 		/* maybe retry */
 		do_retry = retry_count < pcie_retry;
 		dd_dev_err(dd, "PCIe link speed did not switch to Gen%d%s\n",
-			pcie_target, do_retry ? ", retrying" : "");
+			   pcie_target, do_retry ? ", retrying" : "");
 		retry_count++;
 		if (do_retry) {
 			msleep(100); /* allow time to settle */
@@ -1317,7 +1325,7 @@
 		dd_dev_info(dd, "%s: Re-enable therm polling\n",
 			    __func__);
 	}
-	release_hw_mutex(dd);
+	release_chip_resource(dd, CR_SBUS);
 done_no_mutex:
 	/* return no error if it is OK to be at current speed */
 	if (ret && !return_error) {

diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c
index b51a441..c6849ce 100644
--- a/drivers/staging/rdma/hfi1/pio.c
+++ b/drivers/staging/rdma/hfi1/pio.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -101,7 +98,7 @@
 	/* Fall through */
 	case PSC_DATA_VL_ENABLE:
 		/* Disallow sending on VLs not enabled */
-		mask = (((~0ull)<<num_vls) & SEND_CTRL_UNSUPPORTED_VL_MASK)<<
+		mask = (((~0ull) << num_vls) & SEND_CTRL_UNSUPPORTED_VL_MASK) <<
 				SEND_CTRL_UNSUPPORTED_VL_SHIFT;
 		reg = (reg & ~SEND_CTRL_UNSUPPORTED_VL_SMASK) | mask;
 		break;
@@ -130,7 +127,7 @@
 	if (write) {
 		write_csr(dd, SEND_CTRL, reg);
 		if (flush)
-			(void) read_csr(dd, SEND_CTRL); /* flush write */
+			(void)read_csr(dd, SEND_CTRL); /* flush write */
 	}
 
 	spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
@@ -177,8 +174,10 @@
 
 /* memory pool information, used when calculating final sizes */
 struct mem_pool_info {
-	int centipercent;	/* 100th of 1% of memory to use, -1 if blocks
-				   already set */
+	int centipercent;	/*
+				 * 100th of 1% of memory to use, -1 if blocks
+				 * already set
+				 */
 	int count;		/* count of contexts in the pool */
 	int blocks;		/* block size of the pool */
 	int size;		/* context size, in blocks */
@@ -312,7 +311,7 @@
 		if (i == SC_ACK) {
 			count = dd->n_krcv_queues;
 		} else if (i == SC_KERNEL) {
-			count = num_vls + 1 /* VL15 */;
+			count = (INIT_SC_PER_VL * num_vls) + 1 /* VL15 */;
 		} else if (count == SCC_PER_CPU) {
 			count = dd->num_rcv_contexts - dd->n_krcv_queues;
 		} else if (count < 0) {
@@ -509,7 +508,7 @@
 	sci = &dd->send_contexts[sw_index];
 	if (!sci->allocated) {
 		dd_dev_err(dd, "%s: sw_index %u not allocated? hw_context %u\n",
-			__func__, sw_index, hw_context);
+			   __func__, sw_index, hw_context);
 	}
 	sci->allocated = 0;
 	dd->hw_to_sw[hw_context] = INVALID_SCI;
@@ -625,7 +624,7 @@
 				& SC(CREDIT_CTRL_THRESHOLD_MASK))
 			   << SC(CREDIT_CTRL_THRESHOLD_SHIFT));
 		write_kctxt_csr(sc->dd, sc->hw_context,
-			SC(CREDIT_CTRL), sc->credit_ctrl);
+				SC(CREDIT_CTRL), sc->credit_ctrl);
 
 		/* force a credit return on change to avoid a possible stall */
 		force_return = 1;
@@ -700,7 +699,7 @@
 	if (dd->flags & HFI1_FROZEN)
 		return NULL;
 
-	sc = kzalloc_node(sizeof(struct send_context), GFP_KERNEL, numa);
+	sc = kzalloc_node(sizeof(*sc), GFP_KERNEL, numa);
 	if (!sc)
 		return NULL;
 
@@ -763,9 +762,9 @@
 
 	/* set the default partition key */
 	write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY),
-		(DEFAULT_PKEY &
-			SC(CHECK_PARTITION_KEY_VALUE_MASK))
-		    << SC(CHECK_PARTITION_KEY_VALUE_SHIFT));
+			(SC(CHECK_PARTITION_KEY_VALUE_MASK) &
+			 DEFAULT_PKEY) <<
+			SC(CHECK_PARTITION_KEY_VALUE_SHIFT));
 
 	/* per context type checks */
 	if (type == SC_USER) {
@@ -778,8 +777,8 @@
 
 	/* set the send context check opcode mask and value */
 	write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE),
-		((u64)opmask << SC(CHECK_OPCODE_MASK_SHIFT)) |
-		((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT)));
+			((u64)opmask << SC(CHECK_OPCODE_MASK_SHIFT)) |
+			((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT)));
 
 	/* set up credit return */
 	reg = pa & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK);
@@ -797,7 +796,7 @@
 		thresh = sc_percent_to_threshold(sc, 50);
 	} else if (type == SC_USER) {
 		thresh = sc_percent_to_threshold(sc,
-				user_credit_return_threshold);
+						 user_credit_return_threshold);
 	} else { /* kernel */
 		thresh = sc_mtu_to_threshold(sc, hfi1_max_mtu, hdrqentsize);
 	}
@@ -852,7 +851,6 @@
 		  sc->credit_ctrl,
 		  thresh);
 
-
 	return sc;
 }
 
@@ -971,11 +969,11 @@
 		if (loop > 500) {
 			/* timed out - bounce the link */
 			dd_dev_err(dd,
-				"%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n",
-				__func__, sc->sw_index,
-				sc->hw_context, (u32)reg);
+				   "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n",
+				   __func__, sc->sw_index,
+				   sc->hw_context, (u32)reg);
 			queue_work(dd->pport->hfi1_wq,
-				&dd->pport->link_bounce_work);
+				   &dd->pport->link_bounce_work);
 			break;
 		}
 		loop++;
@@ -1021,7 +1019,7 @@
 		return -EINVAL;
 
 	dd_dev_info(dd, "restarting send context %u(%u)\n", sc->sw_index,
-		sc->hw_context);
+		    sc->hw_context);
 
 	/*
 	 * Step 1: Wait for the context to actually halt.
@@ -1036,7 +1034,7 @@
 			break;
 		if (loop > 100) {
 			dd_dev_err(dd, "%s: context %u(%u) not halting, skipping\n",
-				__func__, sc->sw_index, sc->hw_context);
+				   __func__, sc->sw_index, sc->hw_context);
 			return -ETIME;
 		}
 		loop++;
@@ -1062,9 +1060,9 @@
 				break;
 			if (loop > 100) {
 				dd_dev_err(dd,
-					"%s: context %u(%u) timeout waiting for PIO buffers to zero, remaining %d\n",
-					__func__, sc->sw_index,
-					sc->hw_context, count);
+					   "%s: context %u(%u) timeout waiting for PIO buffers to zero, remaining %d\n",
+					   __func__, sc->sw_index,
+					   sc->hw_context, count);
 			}
 			loop++;
 			udelay(1);
@@ -1177,18 +1175,18 @@
 	if (ret == -EIO) {
 		/* clear the error */
 		write_csr(dd, SEND_PIO_ERR_CLEAR,
-			SEND_PIO_ERR_CLEAR_PIO_INIT_SM_IN_ERR_SMASK);
+			  SEND_PIO_ERR_CLEAR_PIO_INIT_SM_IN_ERR_SMASK);
 	}
 
 	/* reset init all */
 	write_csr(dd, SEND_PIO_INIT_CTXT,
-			SEND_PIO_INIT_CTXT_PIO_ALL_CTXT_INIT_SMASK);
+		  SEND_PIO_INIT_CTXT_PIO_ALL_CTXT_INIT_SMASK);
 	udelay(2);
 	ret = pio_init_wait_progress(dd);
 	if (ret < 0) {
 		dd_dev_err(dd,
-			"PIO send context init %s while initializing all PIO blocks\n",
-			ret == -ETIMEDOUT ? "is stuck" : "had an error");
+			   "PIO send context init %s while initializing all PIO blocks\n",
+			   ret == -ETIMEDOUT ? "is stuck" : "had an error");
 	}
 }
 
@@ -1236,8 +1234,7 @@
 	 */
 	reg = read_kctxt_csr(dd, sc->hw_context, SC(ERR_STATUS));
 	if (reg)
-		write_kctxt_csr(dd, sc->hw_context, SC(ERR_CLEAR),
-			reg);
+		write_kctxt_csr(dd, sc->hw_context, SC(ERR_CLEAR), reg);
 
 	/*
 	 * The HW PIO initialization engine can handle only one init
@@ -1295,7 +1292,7 @@
 
 	/* a 0->1 transition schedules a credit return */
 	write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE),
-		SC(CREDIT_FORCE_FORCE_RETURN_SMASK));
+			SC(CREDIT_FORCE_FORCE_RETURN_SMASK));
 	/*
 	 * Ensure that the write is flushed and the credit return is
 	 * scheduled. We care more about the 0 -> 1 transition.
@@ -1321,7 +1318,7 @@
 		return;
 
 	dd_dev_info(sc->dd, "%s: context %u(%u) - not implemented\n",
-			__func__, sc->sw_index, sc->hw_context);
+		    __func__, sc->sw_index, sc->hw_context);
 }
 
 /*
@@ -1346,7 +1343,7 @@
 	wake_up(&sc->halt_wait);
 }
 
-#define BLOCK_DWORDS (PIO_BLOCK_SIZE/sizeof(u32))
+#define BLOCK_DWORDS (PIO_BLOCK_SIZE / sizeof(u32))
 #define dwords_to_blocks(x) DIV_ROUND_UP(x, BLOCK_DWORDS)
 
 /*
@@ -1430,8 +1427,10 @@
 	next = head + 1;
 	if (next >= sc->sr_size)
 		next = 0;
-	/* update the head - must be last! - the releaser can look at fields
-	   in pbuf once we move the head */
+	/*
+	 * update the head - must be last! - the releaser can look at fields
+	 * in pbuf once we move the head
+	 */
 	smp_wmb();
 	sc->sr_head = next;
 	spin_unlock_irqrestore(&sc->alloc_lock, flags);
@@ -1469,7 +1468,7 @@
 	if (sc->credit_intr_count == 0) {
 		sc->credit_ctrl |= SC(CREDIT_CTRL_CREDIT_INTR_SMASK);
 		write_kctxt_csr(sc->dd, sc->hw_context,
-			SC(CREDIT_CTRL), sc->credit_ctrl);
+				SC(CREDIT_CTRL), sc->credit_ctrl);
 	}
 	sc->credit_intr_count++;
 	spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags);
@@ -1491,7 +1490,7 @@
 	if (sc->credit_intr_count == 0) {
 		sc->credit_ctrl &= ~SC(CREDIT_CTRL_CREDIT_INTR_SMASK);
 		write_kctxt_csr(sc->dd, sc->hw_context,
-			SC(CREDIT_CTRL), sc->credit_ctrl);
+				SC(CREDIT_CTRL), sc->credit_ctrl);
 	}
 	spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags);
 }
@@ -1526,8 +1525,9 @@
 	struct hfi1_devdata *dd = sc->dd;
 	struct hfi1_ibdev *dev = &dd->verbs_dev;
 	struct list_head *list;
-	struct hfi1_qp *qps[PIO_WAIT_BATCH_SIZE];
-	struct hfi1_qp *qp;
+	struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE];
+	struct rvt_qp *qp;
+	struct hfi1_qp_priv *priv;
 	unsigned long flags;
 	unsigned i, n = 0;
 
@@ -1545,24 +1545,28 @@
 		struct iowait *wait;
 
 		if (n == ARRAY_SIZE(qps))
-			goto full;
+			break;
 		wait = list_first_entry(list, struct iowait, list);
-		qp = container_of(wait, struct hfi1_qp, s_iowait);
-		list_del_init(&qp->s_iowait.list);
+		qp = iowait_to_qp(wait);
+		priv = qp->priv;
+		list_del_init(&priv->s_iowait.list);
 		/* refcount held until actual wake up */
 		qps[n++] = qp;
 	}
 	/*
-	 * Counting: only call wantpiobuf_intr() if there were waiters and they
-	 * are now all gone.
+	 * If there had been waiters and there are more
+	 * insure that we redo the force to avoid a potential hang.
 	 */
-	if (n)
+	if (n) {
 		hfi1_sc_wantpiobuf_intr(sc, 0);
-full:
+		if (!list_empty(list))
+			hfi1_sc_wantpiobuf_intr(sc, 1);
+	}
 	write_sequnlock_irqrestore(&dev->iowait_lock, flags);
 
 	for (i = 0; i < n; i++)
-		hfi1_qp_wakeup(qps[i], HFI1_S_WAIT_PIO);
+		hfi1_qp_wakeup(qps[i],
+			       RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN);
 }
 
 /* translate a send credit update to a bit code of reasons */
@@ -1661,7 +1665,7 @@
 	sw_index = dd->hw_to_sw[hw_context];
 	if (unlikely(sw_index >= dd->num_send_contexts)) {
 		dd_dev_err(dd, "%s: invalid hw (%u) to sw (%u) mapping\n",
-			__func__, hw_context, sw_index);
+			   __func__, hw_context, sw_index);
 		goto done;
 	}
 	sc = dd->send_contexts[sw_index].sc;
@@ -1674,8 +1678,8 @@
 		sw_index = dd->hw_to_sw[gc];
 		if (unlikely(sw_index >= dd->num_send_contexts)) {
 			dd_dev_err(dd,
-				"%s: invalid hw (%u) to sw (%u) mapping\n",
-				__func__, hw_context, sw_index);
+				   "%s: invalid hw (%u) to sw (%u) mapping\n",
+				   __func__, hw_context, sw_index);
 			continue;
 		}
 		sc_release_update(dd->send_contexts[sw_index].sc);
@@ -1684,11 +1688,217 @@
 	spin_unlock(&dd->sc_lock);
 }
 
+/*
+ * pio_select_send_context_vl() - select send context
+ * @dd: devdata
+ * @selector: a spreading factor
+ * @vl: this vl
+ *
+ * This function returns a send context based on the selector and a vl.
+ * The mapping fields are protected by RCU
+ */
+struct send_context *pio_select_send_context_vl(struct hfi1_devdata *dd,
+						u32 selector, u8 vl)
+{
+	struct pio_vl_map *m;
+	struct pio_map_elem *e;
+	struct send_context *rval;
+
+	/*
+	 * NOTE This should only happen if SC->VL changed after the initial
+	 * checks on the QP/AH
+	 * Default will return VL0's send context below
+	 */
+	if (unlikely(vl >= num_vls)) {
+		rval = NULL;
+		goto done;
+	}
+
+	rcu_read_lock();
+	m = rcu_dereference(dd->pio_map);
+	if (unlikely(!m)) {
+		rcu_read_unlock();
+		return dd->vld[0].sc;
+	}
+	e = m->map[vl & m->mask];
+	rval = e->ksc[selector & e->mask];
+	rcu_read_unlock();
+
+done:
+	rval = !rval ? dd->vld[0].sc : rval;
+	return rval;
+}
+
+/*
+ * pio_select_send_context_sc() - select send context
+ * @dd: devdata
+ * @selector: a spreading factor
+ * @sc5: the 5 bit sc
+ *
+ * This function returns an send context based on the selector and an sc
+ */
+struct send_context *pio_select_send_context_sc(struct hfi1_devdata *dd,
+						u32 selector, u8 sc5)
+{
+	u8 vl = sc_to_vlt(dd, sc5);
+
+	return pio_select_send_context_vl(dd, selector, vl);
+}
+
+/*
+ * Free the indicated map struct
+ */
+static void pio_map_free(struct pio_vl_map *m)
+{
+	int i;
+
+	for (i = 0; m && i < m->actual_vls; i++)
+		kfree(m->map[i]);
+	kfree(m);
+}
+
+/*
+ * Handle RCU callback
+ */
+static void pio_map_rcu_callback(struct rcu_head *list)
+{
+	struct pio_vl_map *m = container_of(list, struct pio_vl_map, list);
+
+	pio_map_free(m);
+}
+
+/*
+ * pio_map_init - called when #vls change
+ * @dd: hfi1_devdata
+ * @port: port number
+ * @num_vls: number of vls
+ * @vl_scontexts: per vl send context mapping (optional)
+ *
+ * This routine changes the mapping based on the number of vls.
+ *
+ * vl_scontexts is used to specify a non-uniform vl/send context
+ * loading. NULL implies auto computing the loading and giving each
+ * VL an uniform distribution of send contexts per VL.
+ *
+ * The auto algorithm computers the sc_per_vl and the number of extra
+ * send contexts. Any extra send contexts are added from the last VL
+ * on down
+ *
+ * rcu locking is used here to control access to the mapping fields.
+ *
+ * If either the num_vls or num_send_contexts are non-power of 2, the
+ * array sizes in the struct pio_vl_map and the struct pio_map_elem are
+ * rounded up to the next highest power of 2 and the first entry is
+ * reused in a round robin fashion.
+ *
+ * If an error occurs the map change is not done and the mapping is not
+ * chaged.
+ *
+ */
+int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
+{
+	int i, j;
+	int extra, sc_per_vl;
+	int scontext = 1;
+	int num_kernel_send_contexts = 0;
+	u8 lvl_scontexts[OPA_MAX_VLS];
+	struct pio_vl_map *oldmap, *newmap;
+
+	if (!vl_scontexts) {
+		/* send context 0 reserved for VL15 */
+		for (i = 1; i < dd->num_send_contexts; i++)
+			if (dd->send_contexts[i].type == SC_KERNEL)
+				num_kernel_send_contexts++;
+		/* truncate divide */
+		sc_per_vl = num_kernel_send_contexts / num_vls;
+		/* extras */
+		extra = num_kernel_send_contexts % num_vls;
+		vl_scontexts = lvl_scontexts;
+		/* add extras from last vl down */
+		for (i = num_vls - 1; i >= 0; i--, extra--)
+			vl_scontexts[i] = sc_per_vl + (extra > 0 ? 1 : 0);
+	}
+	/* build new map */
+	newmap = kzalloc(sizeof(*newmap) +
+			 roundup_pow_of_two(num_vls) *
+			 sizeof(struct pio_map_elem *),
+			 GFP_KERNEL);
+	if (!newmap)
+		goto bail;
+	newmap->actual_vls = num_vls;
+	newmap->vls = roundup_pow_of_two(num_vls);
+	newmap->mask = (1 << ilog2(newmap->vls)) - 1;
+	for (i = 0; i < newmap->vls; i++) {
+		/* save for wrap around */
+		int first_scontext = scontext;
+
+		if (i < newmap->actual_vls) {
+			int sz = roundup_pow_of_two(vl_scontexts[i]);
+
+			/* only allocate once */
+			newmap->map[i] = kzalloc(sizeof(*newmap->map[i]) +
+						 sz * sizeof(struct
+							     send_context *),
+						 GFP_KERNEL);
+			if (!newmap->map[i])
+				goto bail;
+			newmap->map[i]->mask = (1 << ilog2(sz)) - 1;
+			/* assign send contexts */
+			for (j = 0; j < sz; j++) {
+				if (dd->kernel_send_context[scontext])
+					newmap->map[i]->ksc[j] =
+					dd->kernel_send_context[scontext];
+				if (++scontext >= first_scontext +
+						  vl_scontexts[i])
+					/* wrap back to first send context */
+					scontext = first_scontext;
+			}
+		} else {
+			/* just re-use entry without allocating */
+			newmap->map[i] = newmap->map[i % num_vls];
+		}
+		scontext = first_scontext + vl_scontexts[i];
+	}
+	/* newmap in hand, save old map */
+	spin_lock_irq(&dd->pio_map_lock);
+	oldmap = rcu_dereference_protected(dd->pio_map,
+					   lockdep_is_held(&dd->pio_map_lock));
+
+	/* publish newmap */
+	rcu_assign_pointer(dd->pio_map, newmap);
+
+	spin_unlock_irq(&dd->pio_map_lock);
+	/* success, free any old map after grace period */
+	if (oldmap)
+		call_rcu(&oldmap->list, pio_map_rcu_callback);
+	return 0;
+bail:
+	/* free any partial allocation */
+	pio_map_free(newmap);
+	return -ENOMEM;
+}
+
+void free_pio_map(struct hfi1_devdata *dd)
+{
+	/* Free PIO map if allocated */
+	if (rcu_access_pointer(dd->pio_map)) {
+		spin_lock_irq(&dd->pio_map_lock);
+		pio_map_free(rcu_access_pointer(dd->pio_map));
+		RCU_INIT_POINTER(dd->pio_map, NULL);
+		spin_unlock_irq(&dd->pio_map_lock);
+		synchronize_rcu();
+	}
+	kfree(dd->kernel_send_context);
+	dd->kernel_send_context = NULL;
+}
+
 int init_pervl_scs(struct hfi1_devdata *dd)
 {
 	int i;
-	u64 mask, all_vl_mask = (u64) 0x80ff; /* VLs 0-7, 15 */
+	u64 mask, all_vl_mask = (u64)0x80ff; /* VLs 0-7, 15 */
+	u64 data_vls_mask = (u64)0x00ff; /* VLs 0-7 */
 	u32 ctxt;
+	struct hfi1_pportdata *ppd = dd->pport;
 
 	dd->vld[15].sc = sc_alloc(dd, SC_KERNEL,
 				  dd->rcd[0]->rcvhdrqentsize, dd->node);
@@ -1696,6 +1906,12 @@
 		goto nomem;
 	hfi1_init_ctxt(dd->vld[15].sc);
 	dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048);
+
+	dd->kernel_send_context = kmalloc_node(dd->num_send_contexts *
+					sizeof(struct send_context *),
+					GFP_KERNEL, dd->node);
+	dd->kernel_send_context[0] = dd->vld[15].sc;
+
 	for (i = 0; i < num_vls; i++) {
 		/*
 		 * Since this function does not deal with a specific
@@ -1708,12 +1924,19 @@
 					 dd->rcd[0]->rcvhdrqentsize, dd->node);
 		if (!dd->vld[i].sc)
 			goto nomem;
-
+		dd->kernel_send_context[i + 1] = dd->vld[i].sc;
 		hfi1_init_ctxt(dd->vld[i].sc);
-
 		/* non VL15 start with the max MTU */
 		dd->vld[i].mtu = hfi1_max_mtu;
 	}
+	for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) {
+		dd->kernel_send_context[i + 1] =
+		sc_alloc(dd, SC_KERNEL, dd->rcd[0]->rcvhdrqentsize, dd->node);
+		if (!dd->kernel_send_context[i + 1])
+			goto nomem;
+		hfi1_init_ctxt(dd->kernel_send_context[i + 1]);
+	}
+
 	sc_enable(dd->vld[15].sc);
 	ctxt = dd->vld[15].sc->hw_context;
 	mask = all_vl_mask & ~(1LL << 15);
@@ -1721,17 +1944,29 @@
 	dd_dev_info(dd,
 		    "Using send context %u(%u) for VL15\n",
 		    dd->vld[15].sc->sw_index, ctxt);
+
 	for (i = 0; i < num_vls; i++) {
 		sc_enable(dd->vld[i].sc);
 		ctxt = dd->vld[i].sc->hw_context;
-		mask = all_vl_mask & ~(1LL << i);
+		mask = all_vl_mask & ~(data_vls_mask);
 		write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask);
 	}
+	for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) {
+		sc_enable(dd->kernel_send_context[i + 1]);
+		ctxt = dd->kernel_send_context[i + 1]->hw_context;
+		mask = all_vl_mask & ~(data_vls_mask);
+		write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask);
+	}
+
+	if (pio_map_init(dd, ppd->port - 1, num_vls, NULL))
+		goto nomem;
 	return 0;
 nomem:
 	sc_free(dd->vld[15].sc);
 	for (i = 0; i < num_vls; i++)
 		sc_free(dd->vld[i].sc);
+	for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++)
+		sc_free(dd->kernel_send_context[i + 1]);
 	return -ENOMEM;
 }
 
@@ -1769,11 +2004,11 @@
 					bytes,
 					&dd->cr_base[i].pa,
 					GFP_KERNEL);
-		if (dd->cr_base[i].va == NULL) {
+		if (!dd->cr_base[i].va) {
 			set_dev_node(&dd->pcidev->dev, dd->node);
 			dd_dev_err(dd,
-				"Unable to allocate credit return DMA range for NUMA %d\n",
-				i);
+				   "Unable to allocate credit return DMA range for NUMA %d\n",
+				   i);
 			ret = -ENOMEM;
 			goto done;
 		}
@@ -1797,10 +2032,10 @@
 	for (i = 0; i < num_numa; i++) {
 		if (dd->cr_base[i].va) {
 			dma_free_coherent(&dd->pcidev->dev,
-				TXE_NUM_CONTEXTS
-					* sizeof(struct credit_return),
-				dd->cr_base[i].va,
-				dd->cr_base[i].pa);
+					  TXE_NUM_CONTEXTS *
+					  sizeof(struct credit_return),
+					  dd->cr_base[i].va,
+					  dd->cr_base[i].pa);
 		}
 	}
 	kfree(dd->cr_base);

diff --git a/drivers/staging/rdma/hfi1/pio.h b/drivers/staging/rdma/hfi1/pio.h
index 53d3e0a..0026976 100644
--- a/drivers/staging/rdma/hfi1/pio.h
+++ b/drivers/staging/rdma/hfi1/pio.h

@@ -1,14 +1,13 @@
 #ifndef _PIO_H
 #define _PIO_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -50,7 +47,6 @@
  *
  */
 
-
 /* send context types */
 #define SC_KERNEL 0
 #define SC_ACK    1
@@ -106,6 +102,7 @@
 	struct hfi1_devdata *dd;		/* device */
 	void __iomem *base_addr;	/* start of PIO memory */
 	union pio_shadow_ring *sr;	/* shadow ring */
+
 	volatile __le64 *hw_free;	/* HW free counter */
 	struct work_struct halt_work;	/* halted context work queue entry */
 	unsigned long flags;		/* flags */
@@ -165,6 +162,112 @@
 	short int count;
 };
 
+/*
+ * The diagram below details the relationship of the mapping structures
+ *
+ * Since the mapping now allows for non-uniform send contexts per vl, the
+ * number of send contexts for a vl is either the vl_scontexts[vl] or
+ * a computation based on num_kernel_send_contexts/num_vls:
+ *
+ * For example:
+ * nactual = vl_scontexts ? vl_scontexts[vl] : num_kernel_send_contexts/num_vls
+ *
+ * n = roundup to next highest power of 2 using nactual
+ *
+ * In the case where there are num_kernel_send_contexts/num_vls doesn't divide
+ * evenly, the extras are added from the last vl downward.
+ *
+ * For the case where n > nactual, the send contexts are assigned
+ * in a round robin fashion wrapping back to the first send context
+ * for a particular vl.
+ *
+ *               dd->pio_map
+ *                    |                                   pio_map_elem[0]
+ *                    |                                +--------------------+
+ *                    v                                |       mask         |
+ *               pio_vl_map                            |--------------------|
+ *      +--------------------------+                   | ksc[0] -> sc 1     |
+ *      |    list (RCU)            |                   |--------------------|
+ *      |--------------------------|                 ->| ksc[1] -> sc 2     |
+ *      |    mask                  |              --/  |--------------------|
+ *      |--------------------------|            -/     |        *           |
+ *      |    actual_vls (max 8)    |          -/       |--------------------|
+ *      |--------------------------|       --/         | ksc[n] -> sc n     |
+ *      |    vls (max 8)           |     -/            +--------------------+
+ *      |--------------------------|  --/
+ *      |    map[0]                |-/
+ *      |--------------------------|                   +--------------------+
+ *      |    map[1]                |---                |       mask         |
+ *      |--------------------------|   \----           |--------------------|
+ *      |           *              |        \--        | ksc[0] -> sc 1+n   |
+ *      |           *              |           \----   |--------------------|
+ *      |           *              |                \->| ksc[1] -> sc 2+n   |
+ *      |--------------------------|                   |--------------------|
+ *      |   map[vls - 1]           |-                  |         *          |
+ *      +--------------------------+ \-                |--------------------|
+ *                                     \-              | ksc[m] -> sc m+n   |
+ *                                       \             +--------------------+
+ *                                        \-
+ *                                          \
+ *                                           \-        +--------------------+
+ *                                             \-      |       mask         |
+ *                                               \     |--------------------|
+ *                                                \-   | ksc[0] -> sc 1+m+n |
+ *                                                  \- |--------------------|
+ *                                                    >| ksc[1] -> sc 2+m+n |
+ *                                                     |--------------------|
+ *                                                     |         *          |
+ *                                                     |--------------------|
+ *                                                     | ksc[o] -> sc o+m+n |
+ *                                                     +--------------------+
+ *
+ */
+
+/* Initial number of send contexts per VL */
+#define INIT_SC_PER_VL 2
+
+/*
+ * struct pio_map_elem - mapping for a vl
+ * @mask - selector mask
+ * @ksc - array of kernel send contexts for this vl
+ *
+ * The mask is used to "mod" the selector to
+ * produce index into the trailing array of
+ * kscs
+ */
+struct pio_map_elem {
+	u32 mask;
+	struct send_context *ksc[0];
+};
+
+/*
+ * struct pio_vl_map - mapping for a vl
+ * @list - rcu head for free callback
+ * @mask - vl mask to "mod" the vl to produce an index to map array
+ * @actual_vls - number of vls
+ * @vls - numbers of vls rounded to next power of 2
+ * @map - array of pio_map_elem entries
+ *
+ * This is the parent mapping structure. The trailing members of the
+ * struct point to pio_map_elem entries, which in turn point to an
+ * array of kscs for that vl.
+ */
+struct pio_vl_map {
+	struct rcu_head list;
+	u32 mask;
+	u8 actual_vls;
+	u8 vls;
+	struct pio_map_elem *map[0];
+};
+
+int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls,
+		 u8 *vl_scontexts);
+void free_pio_map(struct hfi1_devdata *dd);
+struct send_context *pio_select_send_context_vl(struct hfi1_devdata *dd,
+						u32 selector, u8 vl);
+struct send_context *pio_select_send_context_sc(struct hfi1_devdata *dd,
+						u32 selector, u8 sc5);
+
 /* send context functions */
 int init_credit_return(struct hfi1_devdata *dd);
 void free_credit_return(struct hfi1_devdata *dd);
@@ -183,7 +286,7 @@
 void sc_drop(struct send_context *sc);
 void sc_stop(struct send_context *sc, int bit);
 struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
-			pio_release_cb cb, void *arg);
+				pio_release_cb cb, void *arg);
 void sc_release_update(struct send_context *sc);
 void sc_return_credits(struct send_context *sc);
 void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context);
@@ -212,12 +315,11 @@
 void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl);
 void pio_send_control(struct hfi1_devdata *dd, int op);
 
-
 /* PIO copy routines */
 void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
 	      const void *from, size_t count);
 void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
-					const void *from, size_t nbytes);
+			const void *from, size_t nbytes);
 void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes);
 void seg_pio_copy_end(struct pio_buf *pbuf);
 

diff --git a/drivers/staging/rdma/hfi1/pio_copy.c b/drivers/staging/rdma/hfi1/pio_copy.c
index 64bef6c2..8c25e1b 100644
--- a/drivers/staging/rdma/hfi1/pio_copy.c
+++ b/drivers/staging/rdma/hfi1/pio_copy.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -52,9 +49,9 @@
 
 /* additive distance between non-SOP and SOP space */
 #define SOP_DISTANCE (TXE_PIO_SIZE / 2)
-#define PIO_BLOCK_MASK (PIO_BLOCK_SIZE-1)
+#define PIO_BLOCK_MASK (PIO_BLOCK_SIZE - 1)
 /* number of QUADWORDs in a block */
-#define PIO_BLOCK_QWS (PIO_BLOCK_SIZE/sizeof(u64))
+#define PIO_BLOCK_QWS (PIO_BLOCK_SIZE / sizeof(u64))
 
 /**
  * pio_copy - copy data block to MMIO space
@@ -83,11 +80,13 @@
 	dest += sizeof(u64);
 
 	/* calculate where the QWORD data ends - in SOP=1 space */
-	dend = dest + ((count>>1) * sizeof(u64));
+	dend = dest + ((count >> 1) * sizeof(u64));
 
 	if (dend < send) {
-		/* all QWORD data is within the SOP block, does *not*
-		   reach the end of the SOP block */
+		/*
+		 * all QWORD data is within the SOP block, does *not*
+		 * reach the end of the SOP block
+		 */
 
 		while (dest < dend) {
 			writeq(*(u64 *)from, dest);
@@ -152,8 +151,10 @@
 		writeq(val.val64, dest);
 		dest += sizeof(u64);
 	}
-	/* fill in rest of block, no need to check pbuf->end
-	   as we only wrap on a block boundary */
+	/*
+	 * fill in rest of block, no need to check pbuf->end
+	 * as we only wrap on a block boundary
+	 */
 	while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
 		writeq(0, dest);
 		dest += sizeof(u64);
@@ -177,7 +178,7 @@
  * "zero" shift - bit shift used to zero out upper bytes.  Input is
  * the count of LSB bytes to preserve.
  */
-#define zshift(x) (8 * (8-(x)))
+#define zshift(x) (8 * (8 - (x)))
 
 /*
  * "merge" shift - bit shift used to merge with carry bytes.  Input is
@@ -196,7 +197,7 @@
  * o nbytes must not span a QW boundary
  */
 static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
-							unsigned int nbytes)
+				  unsigned int nbytes)
 {
 	unsigned long off;
 
@@ -223,7 +224,7 @@
  * o nbytes may span a QW boundary
  */
 static inline void read_extra_bytes(struct pio_buf *pbuf,
-					const void *from, unsigned int nbytes)
+				    const void *from, unsigned int nbytes)
 {
 	unsigned long off = (unsigned long)from & 0x7;
 	unsigned int room, xbytes;
@@ -244,7 +245,7 @@
 		pbuf->carry.val64 |= (((*(u64 *)from)
 					>> mshift(off))
 					<< zshift(xbytes))
-					>> zshift(xbytes+pbuf->carry_bytes);
+					>> zshift(xbytes + pbuf->carry_bytes);
 		off = 0;
 		pbuf->carry_bytes += xbytes;
 		nbytes -= xbytes;
@@ -362,7 +363,7 @@
  * o from may _not_ be u64 aligned.
  */
 static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
-							unsigned int nbytes)
+				  unsigned int nbytes)
 {
 	jcopy(&pbuf->carry.val8[0], from, nbytes);
 	pbuf->carry_bytes = nbytes;
@@ -377,7 +378,7 @@
  * o nbytes may span a QW boundary
  */
 static inline void read_extra_bytes(struct pio_buf *pbuf,
-					const void *from, unsigned int nbytes)
+				    const void *from, unsigned int nbytes)
 {
 	jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes);
 	pbuf->carry_bytes += nbytes;
@@ -411,7 +412,7 @@
 
 	jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder);
 	writeq(pbuf->carry.val64, dest);
-	jcopy(&pbuf->carry.val8[0], src+remainder, pbuf->carry_bytes);
+	jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes);
 }
 
 /*
@@ -433,7 +434,7 @@
 		u64 zero = 0;
 
 		jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero,
-						8 - pbuf->carry_bytes);
+		      8 - pbuf->carry_bytes);
 		writeq(pbuf->carry.val64, dest);
 		return 1;
 	}
@@ -453,7 +454,7 @@
  * @nbytes: bytes to copy
  */
 void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
-				const void *from, size_t nbytes)
+			const void *from, size_t nbytes)
 {
 	void __iomem *dest = pbuf->start + SOP_DISTANCE;
 	void __iomem *send = dest + PIO_BLOCK_SIZE;
@@ -463,11 +464,13 @@
 	dest += sizeof(u64);
 
 	/* calculate where the QWORD data ends - in SOP=1 space */
-	dend = dest + ((nbytes>>3) * sizeof(u64));
+	dend = dest + ((nbytes >> 3) * sizeof(u64));
 
 	if (dend < send) {
-		/* all QWORD data is within the SOP block, does *not*
-		   reach the end of the SOP block */
+		/*
+		 * all QWORD data is within the SOP block, does *not*
+		 * reach the end of the SOP block
+		 */
 
 		while (dest < dend) {
 			writeq(*(u64 *)from, dest);
@@ -562,8 +565,10 @@
 		void __iomem *send;		/* SOP end */
 		void __iomem *xend;
 
-		/* calculate the end of data or end of block, whichever
-		   comes first */
+		/*
+		 * calculate the end of data or end of block, whichever
+		 * comes first
+		 */
 		send = pbuf->start + PIO_BLOCK_SIZE;
 		xend = min(send, dend);
 
@@ -639,13 +644,13 @@
  * Must handle nbytes < 8.
  */
 static void mid_copy_straight(struct pio_buf *pbuf,
-						const void *from, size_t nbytes)
+			      const void *from, size_t nbytes)
 {
 	void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
 	void __iomem *dend;			/* 8-byte data end */
 
 	/* calculate 8-byte data end */
-	dend = dest + ((nbytes>>3) * sizeof(u64));
+	dend = dest + ((nbytes >> 3) * sizeof(u64));
 
 	if (pbuf->qw_written < PIO_BLOCK_QWS) {
 		/*
@@ -656,8 +661,10 @@
 		void __iomem *send;		/* SOP end */
 		void __iomem *xend;
 
-		/* calculate the end of data or end of block, whichever
-		   comes first */
+		/*
+		 * calculate the end of data or end of block, whichever
+		 * comes first
+		 */
 		send = pbuf->start + PIO_BLOCK_SIZE;
 		xend = min(send, dend);
 
@@ -713,7 +720,7 @@
 	/* we know carry_bytes was zero on entry to this routine */
 	read_low_bytes(pbuf, from, nbytes & 0x7);
 
-	pbuf->qw_written += nbytes>>3;
+	pbuf->qw_written += nbytes >> 3;
 }
 
 /*

diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/staging/rdma/hfi1/platform.c
new file mode 100644
index 0000000..0a1d074
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/platform.c

@@ -0,0 +1,893 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "hfi.h"
+#include "efivar.h"
+
+void get_platform_config(struct hfi1_devdata *dd)
+{
+	int ret = 0;
+	unsigned long size = 0;
+	u8 *temp_platform_config = NULL;
+
+	ret = read_hfi1_efi_var(dd, "configuration", &size,
+				(void **)&temp_platform_config);
+	if (ret) {
+		dd_dev_info(dd,
+			    "%s: Failed to get platform config from UEFI, falling back to request firmware\n",
+			    __func__);
+		/* fall back to request firmware */
+		platform_config_load = 1;
+		goto bail;
+	}
+
+	dd->platform_config.data = temp_platform_config;
+	dd->platform_config.size = size;
+
+bail:
+	/* exit */;
+}
+
+void free_platform_config(struct hfi1_devdata *dd)
+{
+	if (!platform_config_load) {
+		/*
+		 * was loaded from EFI, release memory
+		 * allocated by read_efi_var
+		 */
+		kfree(dd->platform_config.data);
+	}
+	/*
+	 * else do nothing, dispose_firmware will release
+	 * struct firmware platform_config on driver exit
+	 */
+}
+
+int set_qsfp_tx(struct hfi1_pportdata *ppd, int on)
+{
+	u8 tx_ctrl_byte = on ? 0x0 : 0xF;
+	int ret = 0;
+
+	ret = qsfp_write(ppd, ppd->dd->hfi1_id, QSFP_TX_CTRL_BYTE_OFFS,
+			 &tx_ctrl_byte, 1);
+	/* we expected 1, so consider 0 an error */
+	if (ret == 0)
+		ret = -EIO;
+	else if (ret == 1)
+		ret = 0;
+	return ret;
+}
+
+static int qual_power(struct hfi1_pportdata *ppd)
+{
+	u32 cable_power_class = 0, power_class_max = 0;
+	u8 *cache = ppd->qsfp_info.cache;
+	int ret = 0;
+
+	ret = get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_SYSTEM_TABLE, 0,
+		SYSTEM_TABLE_QSFP_POWER_CLASS_MAX, &power_class_max, 4);
+	if (ret)
+		return ret;
+
+	if (QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]) != 4)
+		cable_power_class = QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]);
+	else
+		cable_power_class = QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]);
+
+	if (cable_power_class <= 3 && cable_power_class > (power_class_max - 1))
+		ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY);
+	else if (cable_power_class > 4 && cable_power_class > (power_class_max))
+		ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY);
+	/*
+	 * cable_power_class will never have value 4 as this simply
+	 * means the high power settings are unused
+	 */
+
+	if (ppd->offline_disabled_reason ==
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY)) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: Port disabled due to system power restrictions\n",
+			__func__);
+		ret = -EPERM;
+	}
+	return ret;
+}
+
+static int qual_bitrate(struct hfi1_pportdata *ppd)
+{
+	u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled;
+	u8 *cache = ppd->qsfp_info.cache;
+
+	if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G) &&
+	    cache[QSFP_NOM_BIT_RATE_250_OFFS] < 0x64)
+		ppd->offline_disabled_reason =
+			   HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY);
+
+	if ((lss & OPA_LINK_SPEED_12_5G) && (lse & OPA_LINK_SPEED_12_5G) &&
+	    cache[QSFP_NOM_BIT_RATE_100_OFFS] < 0x7D)
+		ppd->offline_disabled_reason =
+			   HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY);
+
+	if (ppd->offline_disabled_reason ==
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY)) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: Cable failed bitrate check, disabling port\n",
+			__func__);
+		return -EPERM;
+	}
+	return 0;
+}
+
+static int set_qsfp_high_power(struct hfi1_pportdata *ppd)
+{
+	u8 cable_power_class = 0, power_ctrl_byte = 0;
+	u8 *cache = ppd->qsfp_info.cache;
+	int ret;
+
+	if (QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]) != 4)
+		cable_power_class = QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]);
+	else
+		cable_power_class = QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]);
+
+	if (cable_power_class) {
+		power_ctrl_byte = cache[QSFP_PWR_CTRL_BYTE_OFFS];
+
+		power_ctrl_byte |= 1;
+		power_ctrl_byte &= ~(0x2);
+
+		ret = qsfp_write(ppd, ppd->dd->hfi1_id,
+				 QSFP_PWR_CTRL_BYTE_OFFS,
+				 &power_ctrl_byte, 1);
+		if (ret != 1)
+			return -EIO;
+
+		if (cable_power_class > 3) {
+			/* > power class 4*/
+			power_ctrl_byte |= (1 << 2);
+			ret = qsfp_write(ppd, ppd->dd->hfi1_id,
+					 QSFP_PWR_CTRL_BYTE_OFFS,
+					 &power_ctrl_byte, 1);
+			if (ret != 1)
+				return -EIO;
+		}
+
+		/* SFF 8679 rev 1.7 LPMode Deassert time */
+		msleep(300);
+	}
+	return 0;
+}
+
+static void apply_rx_cdr(struct hfi1_pportdata *ppd,
+			 u32 rx_preset_index,
+			 u8 *cdr_ctrl_byte)
+{
+	u32 rx_preset;
+	u8 *cache = ppd->qsfp_info.cache;
+
+	if (!((cache[QSFP_MOD_PWR_OFFS] & 0x4) &&
+	      (cache[QSFP_CDR_INFO_OFFS] & 0x40)))
+		return;
+
+	/* rx_preset preset to zero to catch error */
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE,
+		rx_preset_index, RX_PRESET_TABLE_QSFP_RX_CDR_APPLY,
+		&rx_preset, 4);
+
+	if (!rx_preset) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: RX_CDR_APPLY is set to disabled\n",
+			__func__);
+		return;
+	}
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE,
+		rx_preset_index, RX_PRESET_TABLE_QSFP_RX_CDR,
+		&rx_preset, 4);
+
+	/* Expand cdr setting to all 4 lanes */
+	rx_preset = (rx_preset | (rx_preset << 1) |
+			(rx_preset << 2) | (rx_preset << 3));
+
+	if (rx_preset) {
+		*cdr_ctrl_byte |= rx_preset;
+	} else {
+		*cdr_ctrl_byte &= rx_preset;
+		/* Preserve current TX CDR status */
+		*cdr_ctrl_byte |= (cache[QSFP_CDR_CTRL_BYTE_OFFS] & 0xF0);
+	}
+}
+
+static void apply_tx_cdr(struct hfi1_pportdata *ppd,
+			 u32 tx_preset_index,
+			 u8 *ctr_ctrl_byte)
+{
+	u32 tx_preset;
+	u8 *cache = ppd->qsfp_info.cache;
+
+	if (!((cache[QSFP_MOD_PWR_OFFS] & 0x8) &&
+	      (cache[QSFP_CDR_INFO_OFFS] & 0x80)))
+		return;
+
+	get_platform_config_field(
+		ppd->dd,
+		PLATFORM_CONFIG_TX_PRESET_TABLE, tx_preset_index,
+		TX_PRESET_TABLE_QSFP_TX_CDR_APPLY, &tx_preset, 4);
+
+	if (!tx_preset) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: TX_CDR_APPLY is set to disabled\n",
+			__func__);
+		return;
+	}
+	get_platform_config_field(
+		ppd->dd,
+		PLATFORM_CONFIG_TX_PRESET_TABLE,
+		tx_preset_index,
+		TX_PRESET_TABLE_QSFP_TX_CDR, &tx_preset, 4);
+
+	/* Expand cdr setting to all 4 lanes */
+	tx_preset = (tx_preset | (tx_preset << 1) |
+			(tx_preset << 2) | (tx_preset << 3));
+
+	if (tx_preset)
+		*ctr_ctrl_byte |= (tx_preset << 4);
+	else
+		/* Preserve current/determined RX CDR status */
+		*ctr_ctrl_byte &= ((tx_preset << 4) | 0xF);
+}
+
+static void apply_cdr_settings(
+		struct hfi1_pportdata *ppd, u32 rx_preset_index,
+		u32 tx_preset_index)
+{
+	u8 *cache = ppd->qsfp_info.cache;
+	u8 cdr_ctrl_byte = cache[QSFP_CDR_CTRL_BYTE_OFFS];
+
+	apply_rx_cdr(ppd, rx_preset_index, &cdr_ctrl_byte);
+
+	apply_tx_cdr(ppd, tx_preset_index, &cdr_ctrl_byte);
+
+	qsfp_write(ppd, ppd->dd->hfi1_id, QSFP_CDR_CTRL_BYTE_OFFS,
+		   &cdr_ctrl_byte, 1);
+}
+
+static void apply_tx_eq_auto(struct hfi1_pportdata *ppd)
+{
+	u8 *cache = ppd->qsfp_info.cache;
+	u8 tx_eq;
+
+	if (!(cache[QSFP_EQ_INFO_OFFS] & 0x8))
+		return;
+	/* Disable adaptive TX EQ if present */
+	tx_eq = cache[(128 * 3) + 241];
+	tx_eq &= 0xF0;
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 241, &tx_eq, 1);
+}
+
+static void apply_tx_eq_prog(struct hfi1_pportdata *ppd, u32 tx_preset_index)
+{
+	u8 *cache = ppd->qsfp_info.cache;
+	u32 tx_preset;
+	u8 tx_eq;
+
+	if (!(cache[QSFP_EQ_INFO_OFFS] & 0x4))
+		return;
+
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE,
+		tx_preset_index, TX_PRESET_TABLE_QSFP_TX_EQ_APPLY,
+		&tx_preset, 4);
+	if (!tx_preset) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: TX_EQ_APPLY is set to disabled\n",
+			__func__);
+		return;
+	}
+	get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE,
+			tx_preset_index, TX_PRESET_TABLE_QSFP_TX_EQ,
+			&tx_preset, 4);
+
+	if (((cache[(128 * 3) + 224] & 0xF0) >> 4) < tx_preset) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: TX EQ %x unsupported\n",
+			__func__, tx_preset);
+
+		dd_dev_info(
+			ppd->dd,
+			"%s: Applying EQ %x\n",
+			__func__, cache[608] & 0xF0);
+
+		tx_preset = (cache[608] & 0xF0) >> 4;
+	}
+
+	tx_eq = tx_preset | (tx_preset << 4);
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 234, &tx_eq, 1);
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 235, &tx_eq, 1);
+}
+
+static void apply_rx_eq_emp(struct hfi1_pportdata *ppd, u32 rx_preset_index)
+{
+	u32 rx_preset;
+	u8 rx_eq, *cache = ppd->qsfp_info.cache;
+
+	if (!(cache[QSFP_EQ_INFO_OFFS] & 0x2))
+		return;
+	get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE,
+			rx_preset_index, RX_PRESET_TABLE_QSFP_RX_EMP_APPLY,
+			&rx_preset, 4);
+
+	if (!rx_preset) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: RX_EMP_APPLY is set to disabled\n",
+			__func__);
+		return;
+	}
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE,
+		rx_preset_index, RX_PRESET_TABLE_QSFP_RX_EMP,
+		&rx_preset, 4);
+
+	if ((cache[(128 * 3) + 224] & 0xF) < rx_preset) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: Requested RX EMP %x\n",
+			__func__, rx_preset);
+
+		dd_dev_info(
+			ppd->dd,
+			"%s: Applying supported EMP %x\n",
+			__func__, cache[608] & 0xF);
+
+		rx_preset = cache[608] & 0xF;
+	}
+
+	rx_eq = rx_preset | (rx_preset << 4);
+
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 236, &rx_eq, 1);
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 237, &rx_eq, 1);
+}
+
+static void apply_eq_settings(struct hfi1_pportdata *ppd,
+			      u32 rx_preset_index, u32 tx_preset_index)
+{
+	u8 *cache = ppd->qsfp_info.cache;
+
+	/* no point going on w/o a page 3 */
+	if (cache[2] & 4) {
+		dd_dev_info(ppd->dd,
+			    "%s: Upper page 03 not present\n",
+			    __func__);
+		return;
+	}
+
+	apply_tx_eq_auto(ppd);
+
+	apply_tx_eq_prog(ppd, tx_preset_index);
+
+	apply_rx_eq_emp(ppd, rx_preset_index);
+}
+
+static void apply_rx_amplitude_settings(
+		struct hfi1_pportdata *ppd, u32 rx_preset_index,
+		u32 tx_preset_index)
+{
+	u32 rx_preset;
+	u8 rx_amp = 0, i = 0, preferred = 0, *cache = ppd->qsfp_info.cache;
+
+	/* no point going on w/o a page 3 */
+	if (cache[2] & 4) {
+		dd_dev_info(ppd->dd,
+			    "%s: Upper page 03 not present\n",
+			    __func__);
+		return;
+	}
+	if (!(cache[QSFP_EQ_INFO_OFFS] & 0x1)) {
+		dd_dev_info(ppd->dd,
+			    "%s: RX_AMP_APPLY is set to disabled\n",
+			    __func__);
+		return;
+	}
+
+	get_platform_config_field(ppd->dd,
+				  PLATFORM_CONFIG_RX_PRESET_TABLE,
+				  rx_preset_index,
+				  RX_PRESET_TABLE_QSFP_RX_AMP_APPLY,
+				  &rx_preset, 4);
+
+	if (!rx_preset) {
+		dd_dev_info(ppd->dd,
+			    "%s: RX_AMP_APPLY is set to disabled\n",
+			    __func__);
+		return;
+	}
+	get_platform_config_field(ppd->dd,
+				  PLATFORM_CONFIG_RX_PRESET_TABLE,
+				  rx_preset_index,
+				  RX_PRESET_TABLE_QSFP_RX_AMP,
+				  &rx_preset, 4);
+
+	dd_dev_info(ppd->dd,
+		    "%s: Requested RX AMP %x\n",
+		    __func__,
+		    rx_preset);
+
+	for (i = 0; i < 4; i++) {
+		if (cache[(128 * 3) + 225] & (1 << i)) {
+			preferred = i;
+			if (preferred == rx_preset)
+				break;
+		}
+	}
+
+	/*
+	 * Verify that preferred RX amplitude is not just a
+	 * fall through of the default
+	 */
+	if (!preferred && !(cache[(128 * 3) + 225] & 0x1)) {
+		dd_dev_info(ppd->dd, "No supported RX AMP, not applying\n");
+		return;
+	}
+
+	dd_dev_info(ppd->dd,
+		    "%s: Applying RX AMP %x\n", __func__, preferred);
+
+	rx_amp = preferred | (preferred << 4);
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 238, &rx_amp, 1);
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 239, &rx_amp, 1);
+}
+
+#define OPA_INVALID_INDEX 0xFFF
+
+static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id,
+			   u32 config_data, const char *message)
+{
+	u8 i;
+	int ret = HCMD_SUCCESS;
+
+	for (i = 0; i < 4; i++) {
+		ret = load_8051_config(ppd->dd, field_id, i, config_data);
+		if (ret != HCMD_SUCCESS) {
+			dd_dev_err(
+				ppd->dd,
+				"%s: %s for lane %u failed\n",
+				message, __func__, i);
+		}
+	}
+}
+
+static void apply_tunings(
+		struct hfi1_pportdata *ppd, u32 tx_preset_index,
+		u8 tuning_method, u32 total_atten, u8 limiting_active)
+{
+	int ret = 0;
+	u32 config_data = 0, tx_preset = 0;
+	u8 precur = 0, attn = 0, postcur = 0, external_device_config = 0;
+	u8 *cache = ppd->qsfp_info.cache;
+
+	/* Enable external device config if channel is limiting active */
+	read_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS,
+			 GENERAL_CONFIG, &config_data);
+	config_data |= limiting_active;
+	ret = load_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS,
+			       GENERAL_CONFIG, config_data);
+	if (ret != HCMD_SUCCESS)
+		dd_dev_err(
+			ppd->dd,
+			"%s: Failed to set enable external device config\n",
+			__func__);
+
+	config_data = 0; /* re-init  */
+	/* Pass tuning method to 8051 */
+	read_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG,
+			 &config_data);
+	config_data |= tuning_method;
+	ret = load_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG,
+			       config_data);
+	if (ret != HCMD_SUCCESS)
+		dd_dev_err(ppd->dd, "%s: Failed to set tuning method\n",
+			   __func__);
+
+	/* Set same channel loss for both TX and RX */
+	config_data = 0 | (total_atten << 16) | (total_atten << 24);
+	apply_tx_lanes(ppd, CHANNEL_LOSS_SETTINGS, config_data,
+		       "Setting channel loss");
+
+	/* Inform 8051 of cable capabilities */
+	if (ppd->qsfp_info.cache_valid) {
+		external_device_config =
+			((cache[QSFP_MOD_PWR_OFFS] & 0x4) << 3) |
+			((cache[QSFP_MOD_PWR_OFFS] & 0x8) << 2) |
+			((cache[QSFP_EQ_INFO_OFFS] & 0x2) << 1) |
+			(cache[QSFP_EQ_INFO_OFFS] & 0x4);
+		ret = read_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS,
+				       GENERAL_CONFIG, &config_data);
+		/* Clear, then set the external device config field */
+		config_data &= ~(0xFF << 24);
+		config_data |= (external_device_config << 24);
+		ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS,
+				       GENERAL_CONFIG, config_data);
+		if (ret != HCMD_SUCCESS)
+			dd_dev_info(ppd->dd,
+				    "%s: Failed set ext device config params\n",
+				    __func__);
+	}
+
+	if (tx_preset_index == OPA_INVALID_INDEX) {
+		if (ppd->port_type == PORT_TYPE_QSFP && limiting_active)
+			dd_dev_info(ppd->dd, "%s: Invalid Tx preset index\n",
+				    __func__);
+		return;
+	}
+
+	/* Following for limiting active channels only */
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE, tx_preset_index,
+		TX_PRESET_TABLE_PRECUR, &tx_preset, 4);
+	precur = tx_preset;
+
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE,
+		tx_preset_index, TX_PRESET_TABLE_ATTN, &tx_preset, 4);
+	attn = tx_preset;
+
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE,
+		tx_preset_index, TX_PRESET_TABLE_POSTCUR, &tx_preset, 4);
+	postcur = tx_preset;
+
+	config_data = precur | (attn << 8) | (postcur << 16);
+
+	apply_tx_lanes(ppd, TX_EQ_SETTINGS, config_data,
+		       "Applying TX settings");
+}
+
+static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
+			    u32 *ptr_rx_preset, u32 *ptr_total_atten)
+{
+	int ret;
+	u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled;
+	u8 *cache = ppd->qsfp_info.cache;
+
+	ret = acquire_chip_resource(ppd->dd, qsfp_resource(ppd->dd), QSFP_WAIT);
+	if (ret) {
+		dd_dev_err(ppd->dd, "%s: hfi%d: cannot lock i2c chain\n",
+			   __func__, (int)ppd->dd->hfi1_id);
+		return ret;
+	}
+
+	ppd->qsfp_info.limiting_active = 1;
+
+	ret = set_qsfp_tx(ppd, 0);
+	if (ret)
+		goto bail_unlock;
+
+	ret = qual_power(ppd);
+	if (ret)
+		goto bail_unlock;
+
+	ret = qual_bitrate(ppd);
+	if (ret)
+		goto bail_unlock;
+
+	if (ppd->qsfp_info.reset_needed) {
+		reset_qsfp(ppd);
+		ppd->qsfp_info.reset_needed = 0;
+		refresh_qsfp_cache(ppd, &ppd->qsfp_info);
+	} else {
+		ppd->qsfp_info.reset_needed = 1;
+	}
+
+	ret = set_qsfp_high_power(ppd);
+	if (ret)
+		goto bail_unlock;
+
+	if (cache[QSFP_EQ_INFO_OFFS] & 0x4) {
+		ret = get_platform_config_field(
+			ppd->dd,
+			PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_TX_PRESET_IDX_ACTIVE_EQ,
+			ptr_tx_preset, 4);
+		if (ret) {
+			*ptr_tx_preset = OPA_INVALID_INDEX;
+			goto bail_unlock;
+		}
+	} else {
+		ret = get_platform_config_field(
+			ppd->dd,
+			PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_TX_PRESET_IDX_ACTIVE_NO_EQ,
+			ptr_tx_preset, 4);
+		if (ret) {
+			*ptr_tx_preset = OPA_INVALID_INDEX;
+			goto bail_unlock;
+		}
+	}
+
+	ret = get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+		PORT_TABLE_RX_PRESET_IDX, ptr_rx_preset, 4);
+	if (ret) {
+		*ptr_rx_preset = OPA_INVALID_INDEX;
+		goto bail_unlock;
+	}
+
+	if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G))
+		get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_LOCAL_ATTEN_25G, ptr_total_atten, 4);
+	else if ((lss & OPA_LINK_SPEED_12_5G) && (lse & OPA_LINK_SPEED_12_5G))
+		get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_LOCAL_ATTEN_12G, ptr_total_atten, 4);
+
+	apply_cdr_settings(ppd, *ptr_rx_preset, *ptr_tx_preset);
+
+	apply_eq_settings(ppd, *ptr_rx_preset, *ptr_tx_preset);
+
+	apply_rx_amplitude_settings(ppd, *ptr_rx_preset, *ptr_tx_preset);
+
+	ret = set_qsfp_tx(ppd, 1);
+
+bail_unlock:
+	release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
+	return ret;
+}
+
+static int tune_qsfp(struct hfi1_pportdata *ppd,
+		     u32 *ptr_tx_preset, u32 *ptr_rx_preset,
+		     u8 *ptr_tuning_method, u32 *ptr_total_atten)
+{
+	u32 cable_atten = 0, remote_atten = 0, platform_atten = 0;
+	u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled;
+	int ret = 0;
+	u8 *cache = ppd->qsfp_info.cache;
+
+	switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) {
+	case 0xA ... 0xB:
+		ret = get_platform_config_field(
+			ppd->dd,
+			PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_LOCAL_ATTEN_25G,
+			&platform_atten, 4);
+		if (ret)
+			return ret;
+
+		if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G))
+			cable_atten = cache[QSFP_CU_ATTEN_12G_OFFS];
+		else if ((lss & OPA_LINK_SPEED_12_5G) &&
+			 (lse & OPA_LINK_SPEED_12_5G))
+			cable_atten = cache[QSFP_CU_ATTEN_7G_OFFS];
+
+		/* Fallback to configured attenuation if cable memory is bad */
+		if (cable_atten == 0 || cable_atten > 36) {
+			ret = get_platform_config_field(
+				ppd->dd,
+				PLATFORM_CONFIG_SYSTEM_TABLE, 0,
+				SYSTEM_TABLE_QSFP_ATTENUATION_DEFAULT_25G,
+				&cable_atten, 4);
+			if (ret)
+				return ret;
+		}
+
+		ret = get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_REMOTE_ATTEN_25G, &remote_atten, 4);
+		if (ret)
+			return ret;
+
+		*ptr_total_atten = platform_atten + cable_atten + remote_atten;
+
+		*ptr_tuning_method = OPA_PASSIVE_TUNING;
+		break;
+	case 0x0 ... 0x9: /* fallthrough */
+	case 0xC: /* fallthrough */
+	case 0xE:
+		ret = tune_active_qsfp(ppd, ptr_tx_preset, ptr_rx_preset,
+				       ptr_total_atten);
+		if (ret)
+			return ret;
+
+		*ptr_tuning_method = OPA_ACTIVE_TUNING;
+		break;
+	case 0xD: /* fallthrough */
+	case 0xF:
+	default:
+		dd_dev_info(ppd->dd, "%s: Unknown/unsupported cable\n",
+			    __func__);
+		break;
+	}
+	return ret;
+}
+
+/*
+ * This function communicates its success or failure via ppd->driver_link_ready
+ * Thus, it depends on its association with start_link(...) which checks
+ * driver_link_ready before proceeding with the link negotiation and
+ * initialization process.
+ */
+void tune_serdes(struct hfi1_pportdata *ppd)
+{
+	int ret = 0;
+	u32 total_atten = 0;
+	u32 remote_atten = 0, platform_atten = 0;
+	u32 rx_preset_index, tx_preset_index;
+	u8 tuning_method = 0, limiting_active = 0;
+	struct hfi1_devdata *dd = ppd->dd;
+
+	rx_preset_index = OPA_INVALID_INDEX;
+	tx_preset_index = OPA_INVALID_INDEX;
+
+	/* the link defaults to enabled */
+	ppd->link_enabled = 1;
+	/* the driver link ready state defaults to not ready */
+	ppd->driver_link_ready = 0;
+	ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE);
+
+	/* Skip the tuning for testing (loopback != none) and simulations */
+	if (loopback != LOOPBACK_NONE ||
+	    ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
+		ppd->driver_link_ready = 1;
+		return;
+	}
+
+	ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+					PORT_TABLE_PORT_TYPE, &ppd->port_type,
+					4);
+	if (ret)
+		ppd->port_type = PORT_TYPE_UNKNOWN;
+
+	switch (ppd->port_type) {
+	case PORT_TYPE_DISCONNECTED:
+		ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_DISCONNECTED);
+		dd_dev_info(dd, "%s: Port disconnected, disabling port\n",
+			    __func__);
+		goto bail;
+	case PORT_TYPE_FIXED:
+		/* platform_atten, remote_atten pre-zeroed to catch error */
+		get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_LOCAL_ATTEN_25G, &platform_atten, 4);
+
+		get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_REMOTE_ATTEN_25G, &remote_atten, 4);
+
+		total_atten = platform_atten + remote_atten;
+
+		tuning_method = OPA_PASSIVE_TUNING;
+		break;
+	case PORT_TYPE_VARIABLE:
+		if (qsfp_mod_present(ppd)) {
+			/*
+			 * platform_atten, remote_atten pre-zeroed to
+			 * catch error
+			 */
+			get_platform_config_field(
+				ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+				PORT_TABLE_LOCAL_ATTEN_25G,
+				&platform_atten, 4);
+
+			get_platform_config_field(
+				ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+				PORT_TABLE_REMOTE_ATTEN_25G,
+				&remote_atten, 4);
+
+			total_atten = platform_atten + remote_atten;
+
+			tuning_method = OPA_PASSIVE_TUNING;
+		} else
+			ppd->offline_disabled_reason =
+			     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_CHASSIS_CONFIG);
+		break;
+	case PORT_TYPE_QSFP:
+		if (qsfp_mod_present(ppd)) {
+			refresh_qsfp_cache(ppd, &ppd->qsfp_info);
+
+			if (ppd->qsfp_info.cache_valid) {
+				ret = tune_qsfp(ppd,
+						&tx_preset_index,
+						&rx_preset_index,
+						&tuning_method,
+						&total_atten);
+
+				/*
+				 * We may have modified the QSFP memory, so
+				 * update the cache to reflect the changes
+				 */
+				refresh_qsfp_cache(ppd, &ppd->qsfp_info);
+				if (ret)
+					goto bail;
+
+				limiting_active =
+						ppd->qsfp_info.limiting_active;
+			} else {
+				dd_dev_err(dd,
+					   "%s: Reading QSFP memory failed\n",
+					   __func__);
+				goto bail;
+			}
+		} else
+			ppd->offline_disabled_reason =
+			   HFI1_ODR_MASK(
+				OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED);
+		break;
+	default:
+		dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__);
+		ppd->port_type = PORT_TYPE_UNKNOWN;
+		tuning_method = OPA_UNKNOWN_TUNING;
+		total_atten = 0;
+		limiting_active = 0;
+		tx_preset_index = OPA_INVALID_INDEX;
+		break;
+	}
+
+	if (ppd->offline_disabled_reason ==
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE))
+		apply_tunings(ppd, tx_preset_index, tuning_method,
+			      total_atten, limiting_active);
+
+	if (!ret)
+		ppd->driver_link_ready = 1;
+
+	return;
+bail:
+	ppd->driver_link_ready = 0;
+}

diff --git a/drivers/staging/rdma/hfi1/platform_config.h b/drivers/staging/rdma/hfi1/platform.h
similarity index 88%
rename from drivers/staging/rdma/hfi1/platform_config.h
rename to drivers/staging/rdma/hfi1/platform.h
index 8a94a83..19620cf 100644
--- a/drivers/staging/rdma/hfi1/platform_config.h
+++ b/drivers/staging/rdma/hfi1/platform.h

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -47,8 +44,8 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  */
-#ifndef __PLATFORM_CONFIG_H
-#define __PLATFORM_CONFIG_H
+#ifndef __PLATFORM_H
+#define __PLATFORM_H
 
 #define METADATA_TABLE_FIELD_START_SHIFT		0
 #define METADATA_TABLE_FIELD_START_LEN_BITS		15
@@ -94,17 +91,18 @@
 enum platform_config_port_table_fields {
 	PORT_TABLE_RESERVED,
 	PORT_TABLE_PORT_TYPE,
-	PORT_TABLE_ATTENUATION_12G,
-	PORT_TABLE_ATTENUATION_25G,
+	PORT_TABLE_LOCAL_ATTEN_12G,
+	PORT_TABLE_LOCAL_ATTEN_25G,
 	PORT_TABLE_LINK_SPEED_SUPPORTED,
 	PORT_TABLE_LINK_WIDTH_SUPPORTED,
+	PORT_TABLE_AUTO_LANE_SHEDDING_ENABLED,
+	PORT_TABLE_EXTERNAL_LOOPBACK_ALLOWED,
 	PORT_TABLE_VL_CAP,
 	PORT_TABLE_MTU_CAP,
 	PORT_TABLE_TX_LANE_ENABLE_MASK,
 	PORT_TABLE_LOCAL_MAX_TIMEOUT,
-	PORT_TABLE_AUTO_LANE_SHEDDING_ENABLED,
-	PORT_TABLE_EXTERNAL_LOOPBACK_ALLOWED,
-	PORT_TABLE_TX_PRESET_IDX_PASSIVE_CU,
+	PORT_TABLE_REMOTE_ATTEN_12G,
+	PORT_TABLE_REMOTE_ATTEN_25G,
 	PORT_TABLE_TX_PRESET_IDX_ACTIVE_NO_EQ,
 	PORT_TABLE_TX_PRESET_IDX_ACTIVE_EQ,
 	PORT_TABLE_RX_PRESET_IDX,
@@ -115,10 +113,10 @@
 enum platform_config_rx_preset_table_fields {
 	RX_PRESET_TABLE_RESERVED,
 	RX_PRESET_TABLE_QSFP_RX_CDR_APPLY,
-	RX_PRESET_TABLE_QSFP_RX_EQ_APPLY,
+	RX_PRESET_TABLE_QSFP_RX_EMP_APPLY,
 	RX_PRESET_TABLE_QSFP_RX_AMP_APPLY,
 	RX_PRESET_TABLE_QSFP_RX_CDR,
-	RX_PRESET_TABLE_QSFP_RX_EQ,
+	RX_PRESET_TABLE_QSFP_RX_EMP,
 	RX_PRESET_TABLE_QSFP_RX_AMP,
 	RX_PRESET_TABLE_MAX
 };
@@ -149,6 +147,11 @@
 	VARIABLE_SETTINGS_TABLE_MAX
 };
 
+struct platform_config {
+	size_t size;
+	const u8 *data;
+};
+
 struct platform_config_data {
 	u32 *table;
 	u32 *table_metadata;
@@ -179,9 +182,11 @@
  * fields defined for each table above
  */
 
-/*=====================================================
+/*
+ * =====================================================
  *  System table encodings
- *====================================================*/
+ * =====================================================
+ */
 #define PLATFORM_CONFIG_MAGIC_NUM		0x3d4f5041
 #define PLATFORM_CONFIG_MAGIC_NUMBER_LEN	4
 
@@ -199,12 +204,13 @@
 	QSFP_POWER_CLASS_7
 };
 
-
-/*=====================================================
+/*
+ * ====================================================
  *  Port table encodings
- *==================================================== */
+ * ====================================================
+ */
 enum platform_config_port_type_encoding {
-	PORT_TYPE_RESERVED,
+	PORT_TYPE_UNKNOWN,
 	PORT_TYPE_DISCONNECTED,
 	PORT_TYPE_FIXED,
 	PORT_TYPE_VARIABLE,
@@ -283,4 +289,16 @@
 	LOCAL_MAX_TIMEOUT_1000_S
 };
 
-#endif			/*__PLATFORM_CONFIG_H*/
+enum link_tuning_encoding {
+	OPA_PASSIVE_TUNING,
+	OPA_ACTIVE_TUNING,
+	OPA_UNKNOWN_TUNING
+};
+
+/* platform.c */
+void get_platform_config(struct hfi1_devdata *dd);
+void free_platform_config(struct hfi1_devdata *dd);
+int set_qsfp_tx(struct hfi1_pportdata *ppd, int on);
+void tune_serdes(struct hfi1_pportdata *ppd);
+
+#endif			/*__PLATFORM_H*/

diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c
index ce03681..29a5ad2 100644
--- a/drivers/staging/rdma/hfi1/qp.c
+++ b/drivers/staging/rdma/hfi1/qp.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -54,31 +51,32 @@
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/seq_file.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_qp.h>
 
 #include "hfi.h"
 #include "qp.h"
 #include "trace.h"
-#include "sdma.h"
+#include "verbs_txreq.h"
 
-#define BITS_PER_PAGE           (PAGE_SIZE*BITS_PER_BYTE)
-#define BITS_PER_PAGE_MASK      (BITS_PER_PAGE-1)
-
-static unsigned int hfi1_qp_table_size = 256;
+unsigned int hfi1_qp_table_size = 256;
 module_param_named(qp_table_size, hfi1_qp_table_size, uint, S_IRUGO);
 MODULE_PARM_DESC(qp_table_size, "QP table size");
 
-static void flush_tx_list(struct hfi1_qp *qp);
+static void flush_tx_list(struct rvt_qp *qp);
 static int iowait_sleep(
 	struct sdma_engine *sde,
 	struct iowait *wait,
 	struct sdma_txreq *stx,
 	unsigned seq);
 static void iowait_wakeup(struct iowait *wait, int reason);
+static void iowait_sdma_drained(struct iowait *wait);
+static void qp_pio_drain(struct rvt_qp *qp);
 
-static inline unsigned mk_qpn(struct hfi1_qpn_table *qpt,
-			      struct qpn_map *map, unsigned off)
+static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
+			      struct rvt_qpn_map *map, unsigned off)
 {
-	return (map - qpt->map) * BITS_PER_PAGE + off;
+	return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
 }
 
 /*
@@ -118,437 +116,15 @@
 	32768                   /* 1E */
 };
 
-static void get_map_page(struct hfi1_qpn_table *qpt, struct qpn_map *map)
+static void flush_tx_list(struct rvt_qp *qp)
 {
-	unsigned long page = get_zeroed_page(GFP_KERNEL);
+	struct hfi1_qp_priv *priv = qp->priv;
 
-	/*
-	 * Free the page if someone raced with us installing it.
-	 */
-
-	spin_lock(&qpt->lock);
-	if (map->page)
-		free_page(page);
-	else
-		map->page = (void *)page;
-	spin_unlock(&qpt->lock);
-}
-
-/*
- * Allocate the next available QPN or
- * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
- */
-static int alloc_qpn(struct hfi1_devdata *dd, struct hfi1_qpn_table *qpt,
-		     enum ib_qp_type type, u8 port)
-{
-	u32 i, offset, max_scan, qpn;
-	struct qpn_map *map;
-	u32 ret;
-
-	if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
-		unsigned n;
-
-		ret = type == IB_QPT_GSI;
-		n = 1 << (ret + 2 * (port - 1));
-		spin_lock(&qpt->lock);
-		if (qpt->flags & n)
-			ret = -EINVAL;
-		else
-			qpt->flags |= n;
-		spin_unlock(&qpt->lock);
-		goto bail;
-	}
-
-	qpn = qpt->last + qpt->incr;
-	if (qpn >= QPN_MAX)
-		qpn = qpt->incr | ((qpt->last & 1) ^ 1);
-	/* offset carries bit 0 */
-	offset = qpn & BITS_PER_PAGE_MASK;
-	map = &qpt->map[qpn / BITS_PER_PAGE];
-	max_scan = qpt->nmaps - !offset;
-	for (i = 0;;) {
-		if (unlikely(!map->page)) {
-			get_map_page(qpt, map);
-			if (unlikely(!map->page))
-				break;
-		}
-		do {
-			if (!test_and_set_bit(offset, map->page)) {
-				qpt->last = qpn;
-				ret = qpn;
-				goto bail;
-			}
-			offset += qpt->incr;
-			/*
-			 * This qpn might be bogus if offset >= BITS_PER_PAGE.
-			 * That is OK.   It gets re-assigned below
-			 */
-			qpn = mk_qpn(qpt, map, offset);
-		} while (offset < BITS_PER_PAGE && qpn < QPN_MAX);
-		/*
-		 * In order to keep the number of pages allocated to a
-		 * minimum, we scan the all existing pages before increasing
-		 * the size of the bitmap table.
-		 */
-		if (++i > max_scan) {
-			if (qpt->nmaps == QPNMAP_ENTRIES)
-				break;
-			map = &qpt->map[qpt->nmaps++];
-			/* start at incr with current bit 0 */
-			offset = qpt->incr | (offset & 1);
-		} else if (map < &qpt->map[qpt->nmaps]) {
-			++map;
-			/* start at incr with current bit 0 */
-			offset = qpt->incr | (offset & 1);
-		} else {
-			map = &qpt->map[0];
-			/* wrap to first map page, invert bit 0 */
-			offset = qpt->incr | ((offset & 1) ^ 1);
-		}
-		/* there can be no bits at shift and below */
-		WARN_ON(offset & (dd->qos_shift - 1));
-		qpn = mk_qpn(qpt, map, offset);
-	}
-
-	ret = -ENOMEM;
-
-bail:
-	return ret;
-}
-
-static void free_qpn(struct hfi1_qpn_table *qpt, u32 qpn)
-{
-	struct qpn_map *map;
-
-	map = qpt->map + qpn / BITS_PER_PAGE;
-	if (map->page)
-		clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
-}
-
-/*
- * Put the QP into the hash table.
- * The hash table holds a reference to the QP.
- */
-static void insert_qp(struct hfi1_ibdev *dev, struct hfi1_qp *qp)
-{
-	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-	unsigned long flags;
-
-	atomic_inc(&qp->refcount);
-	spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags);
-
-	if (qp->ibqp.qp_num <= 1) {
-		rcu_assign_pointer(ibp->qp[qp->ibqp.qp_num], qp);
-	} else {
-		u32 n = qpn_hash(dev->qp_dev, qp->ibqp.qp_num);
-
-		qp->next = dev->qp_dev->qp_table[n];
-		rcu_assign_pointer(dev->qp_dev->qp_table[n], qp);
-		trace_hfi1_qpinsert(qp, n);
-	}
-
-	spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags);
-}
-
-/*
- * Remove the QP from the table so it can't be found asynchronously by
- * the receive interrupt routine.
- */
-static void remove_qp(struct hfi1_ibdev *dev, struct hfi1_qp *qp)
-{
-	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-	u32 n = qpn_hash(dev->qp_dev, qp->ibqp.qp_num);
-	unsigned long flags;
-	int removed = 1;
-
-	spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags);
-
-	if (rcu_dereference_protected(ibp->qp[0],
-			lockdep_is_held(&dev->qp_dev->qpt_lock)) == qp) {
-		RCU_INIT_POINTER(ibp->qp[0], NULL);
-	} else if (rcu_dereference_protected(ibp->qp[1],
-			lockdep_is_held(&dev->qp_dev->qpt_lock)) == qp) {
-		RCU_INIT_POINTER(ibp->qp[1], NULL);
-	} else {
-		struct hfi1_qp *q;
-		struct hfi1_qp __rcu **qpp;
-
-		removed = 0;
-		qpp = &dev->qp_dev->qp_table[n];
-		for (; (q = rcu_dereference_protected(*qpp,
-				lockdep_is_held(&dev->qp_dev->qpt_lock)))
-					!= NULL;
-				qpp = &q->next)
-			if (q == qp) {
-				RCU_INIT_POINTER(*qpp,
-				 rcu_dereference_protected(qp->next,
-				 lockdep_is_held(&dev->qp_dev->qpt_lock)));
-				removed = 1;
-				trace_hfi1_qpremove(qp, n);
-				break;
-			}
-	}
-
-	spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags);
-	if (removed) {
-		synchronize_rcu();
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
-	}
-}
-
-/**
- * free_all_qps - check for QPs still in use
- * @qpt: the QP table to empty
- *
- * There should not be any QPs still in use.
- * Free memory for table.
- */
-static unsigned free_all_qps(struct hfi1_devdata *dd)
-{
-	struct hfi1_ibdev *dev = &dd->verbs_dev;
-	unsigned long flags;
-	struct hfi1_qp *qp;
-	unsigned n, qp_inuse = 0;
-
-	for (n = 0; n < dd->num_pports; n++) {
-		struct hfi1_ibport *ibp = &dd->pport[n].ibport_data;
-
-		if (!hfi1_mcast_tree_empty(ibp))
-			qp_inuse++;
-		rcu_read_lock();
-		if (rcu_dereference(ibp->qp[0]))
-			qp_inuse++;
-		if (rcu_dereference(ibp->qp[1]))
-			qp_inuse++;
-		rcu_read_unlock();
-	}
-
-	if (!dev->qp_dev)
-		goto bail;
-	spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags);
-	for (n = 0; n < dev->qp_dev->qp_table_size; n++) {
-		qp = rcu_dereference_protected(dev->qp_dev->qp_table[n],
-			lockdep_is_held(&dev->qp_dev->qpt_lock));
-		RCU_INIT_POINTER(dev->qp_dev->qp_table[n], NULL);
-
-		for (; qp; qp = rcu_dereference_protected(qp->next,
-				lockdep_is_held(&dev->qp_dev->qpt_lock)))
-			qp_inuse++;
-	}
-	spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags);
-	synchronize_rcu();
-bail:
-	return qp_inuse;
-}
-
-/**
- * reset_qp - initialize the QP state to the reset state
- * @qp: the QP to reset
- * @type: the QP type
- */
-static void reset_qp(struct hfi1_qp *qp, enum ib_qp_type type)
-{
-	qp->remote_qpn = 0;
-	qp->qkey = 0;
-	qp->qp_access_flags = 0;
-	iowait_init(
-		&qp->s_iowait,
-		1,
-		hfi1_do_send,
-		iowait_sleep,
-		iowait_wakeup);
-	qp->s_flags &= HFI1_S_SIGNAL_REQ_WR;
-	qp->s_hdrwords = 0;
-	qp->s_wqe = NULL;
-	qp->s_draining = 0;
-	qp->s_next_psn = 0;
-	qp->s_last_psn = 0;
-	qp->s_sending_psn = 0;
-	qp->s_sending_hpsn = 0;
-	qp->s_psn = 0;
-	qp->r_psn = 0;
-	qp->r_msn = 0;
-	if (type == IB_QPT_RC) {
-		qp->s_state = IB_OPCODE_RC_SEND_LAST;
-		qp->r_state = IB_OPCODE_RC_SEND_LAST;
-	} else {
-		qp->s_state = IB_OPCODE_UC_SEND_LAST;
-		qp->r_state = IB_OPCODE_UC_SEND_LAST;
-	}
-	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
-	qp->r_nak_state = 0;
-	qp->r_adefered = 0;
-	qp->r_aflags = 0;
-	qp->r_flags = 0;
-	qp->s_head = 0;
-	qp->s_tail = 0;
-	qp->s_cur = 0;
-	qp->s_acked = 0;
-	qp->s_last = 0;
-	qp->s_ssn = 1;
-	qp->s_lsn = 0;
-	clear_ahg(qp);
-	qp->s_mig_state = IB_MIG_MIGRATED;
-	memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
-	qp->r_head_ack_queue = 0;
-	qp->s_tail_ack_queue = 0;
-	qp->s_num_rd_atomic = 0;
-	if (qp->r_rq.wq) {
-		qp->r_rq.wq->head = 0;
-		qp->r_rq.wq->tail = 0;
-	}
-	qp->r_sge.num_sge = 0;
-}
-
-static void clear_mr_refs(struct hfi1_qp *qp, int clr_sends)
-{
-	unsigned n;
-
-	if (test_and_clear_bit(HFI1_R_REWIND_SGE, &qp->r_aflags))
-		hfi1_put_ss(&qp->s_rdma_read_sge);
-
-	hfi1_put_ss(&qp->r_sge);
-
-	if (clr_sends) {
-		while (qp->s_last != qp->s_head) {
-			struct hfi1_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
-			unsigned i;
-
-			for (i = 0; i < wqe->wr.num_sge; i++) {
-				struct hfi1_sge *sge = &wqe->sg_list[i];
-
-				hfi1_put_mr(sge->mr);
-			}
-			if (qp->ibqp.qp_type == IB_QPT_UD ||
-			    qp->ibqp.qp_type == IB_QPT_SMI ||
-			    qp->ibqp.qp_type == IB_QPT_GSI)
-				atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
-			if (++qp->s_last >= qp->s_size)
-				qp->s_last = 0;
-		}
-		if (qp->s_rdma_mr) {
-			hfi1_put_mr(qp->s_rdma_mr);
-			qp->s_rdma_mr = NULL;
-		}
-	}
-
-	if (qp->ibqp.qp_type != IB_QPT_RC)
-		return;
-
-	for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) {
-		struct hfi1_ack_entry *e = &qp->s_ack_queue[n];
-
-		if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
-		    e->rdma_sge.mr) {
-			hfi1_put_mr(e->rdma_sge.mr);
-			e->rdma_sge.mr = NULL;
-		}
-	}
-}
-
-/**
- * hfi1_error_qp - put a QP into the error state
- * @qp: the QP to put into the error state
- * @err: the receive completion error to signal if a RWQE is active
- *
- * Flushes both send and receive work queues.
- * Returns true if last WQE event should be generated.
- * The QP r_lock and s_lock should be held and interrupts disabled.
- * If we are already in error state, just return.
- */
-int hfi1_error_qp(struct hfi1_qp *qp, enum ib_wc_status err)
-{
-	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ib_wc wc;
-	int ret = 0;
-
-	if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
-		goto bail;
-
-	qp->state = IB_QPS_ERR;
-
-	if (qp->s_flags & (HFI1_S_TIMER | HFI1_S_WAIT_RNR)) {
-		qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_WAIT_RNR);
-		del_timer(&qp->s_timer);
-	}
-
-	if (qp->s_flags & HFI1_S_ANY_WAIT_SEND)
-		qp->s_flags &= ~HFI1_S_ANY_WAIT_SEND;
-
-	write_seqlock(&dev->iowait_lock);
-	if (!list_empty(&qp->s_iowait.list) && !(qp->s_flags & HFI1_S_BUSY)) {
-		qp->s_flags &= ~HFI1_S_ANY_WAIT_IO;
-		list_del_init(&qp->s_iowait.list);
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
-	}
-	write_sequnlock(&dev->iowait_lock);
-
-	if (!(qp->s_flags & HFI1_S_BUSY)) {
-		qp->s_hdrwords = 0;
-		if (qp->s_rdma_mr) {
-			hfi1_put_mr(qp->s_rdma_mr);
-			qp->s_rdma_mr = NULL;
-		}
-		flush_tx_list(qp);
-	}
-
-	/* Schedule the sending tasklet to drain the send work queue. */
-	if (qp->s_last != qp->s_head)
-		hfi1_schedule_send(qp);
-
-	clear_mr_refs(qp, 0);
-
-	memset(&wc, 0, sizeof(wc));
-	wc.qp = &qp->ibqp;
-	wc.opcode = IB_WC_RECV;
-
-	if (test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags)) {
-		wc.wr_id = qp->r_wr_id;
-		wc.status = err;
-		hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-	}
-	wc.status = IB_WC_WR_FLUSH_ERR;
-
-	if (qp->r_rq.wq) {
-		struct hfi1_rwq *wq;
-		u32 head;
-		u32 tail;
-
-		spin_lock(&qp->r_rq.lock);
-
-		/* sanity check pointers before trusting them */
-		wq = qp->r_rq.wq;
-		head = wq->head;
-		if (head >= qp->r_rq.size)
-			head = 0;
-		tail = wq->tail;
-		if (tail >= qp->r_rq.size)
-			tail = 0;
-		while (tail != head) {
-			wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
-			if (++tail >= qp->r_rq.size)
-				tail = 0;
-			hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-		}
-		wq->tail = tail;
-
-		spin_unlock(&qp->r_rq.lock);
-	} else if (qp->ibqp.event_handler)
-		ret = 1;
-
-bail:
-	return ret;
-}
-
-static void flush_tx_list(struct hfi1_qp *qp)
-{
-	while (!list_empty(&qp->s_iowait.tx_head)) {
+	while (!list_empty(&priv->s_iowait.tx_head)) {
 		struct sdma_txreq *tx;
 
 		tx = list_first_entry(
-			&qp->s_iowait.tx_head,
+			&priv->s_iowait.tx_head,
 			struct sdma_txreq,
 			list);
 		list_del_init(&tx->list);
@@ -557,14 +133,15 @@
 	}
 }
 
-static void flush_iowait(struct hfi1_qp *qp)
+static void flush_iowait(struct rvt_qp *qp)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
 	unsigned long flags;
 
 	write_seqlock_irqsave(&dev->iowait_lock, flags);
-	if (!list_empty(&qp->s_iowait.list)) {
-		list_del_init(&qp->s_iowait.list);
+	if (!list_empty(&priv->s_iowait.list)) {
+		list_del_init(&priv->s_iowait.list);
 		if (atomic_dec_and_test(&qp->refcount))
 			wake_up(&qp->wait);
 	}
@@ -597,362 +174,106 @@
 	return ib_mtu_enum_to_int(mtu);
 }
 
-
-/**
- * hfi1_modify_qp - modify the attributes of a queue pair
- * @ibqp: the queue pair who's attributes we're modifying
- * @attr: the new attributes
- * @attr_mask: the mask of attributes to modify
- * @udata: user data for libibverbs.so
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		   int attr_mask, struct ib_udata *udata)
+int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
+			 int attr_mask, struct ib_udata *udata)
 {
+	struct ib_qp *ibqp = &qp->ibqp;
 	struct hfi1_ibdev *dev = to_idev(ibqp->device);
-	struct hfi1_qp *qp = to_iqp(ibqp);
-	enum ib_qp_state cur_state, new_state;
-	struct ib_event ev;
-	int lastwqe = 0;
-	int mig = 0;
-	int ret;
-	u32 pmtu = 0; /* for gcc warning only */
 	struct hfi1_devdata *dd = dd_from_dev(dev);
-
-	spin_lock_irq(&qp->r_lock);
-	spin_lock(&qp->s_lock);
-
-	cur_state = attr_mask & IB_QP_CUR_STATE ?
-		attr->cur_qp_state : qp->state;
-	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
-
-	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
-				attr_mask, IB_LINK_LAYER_UNSPECIFIED))
-		goto inval;
+	u8 sc;
 
 	if (attr_mask & IB_QP_AV) {
-		u8 sc;
-
-		if (attr->ah_attr.dlid >= HFI1_MULTICAST_LID_BASE)
-			goto inval;
-		if (hfi1_check_ah(qp->ibqp.device, &attr->ah_attr))
-			goto inval;
 		sc = ah_to_sc(ibqp->device, &attr->ah_attr);
+		if (sc == 0xf)
+			return -EINVAL;
+
 		if (!qp_to_sdma_engine(qp, sc) &&
 		    dd->flags & HFI1_HAS_SEND_DMA)
-			goto inval;
+			return -EINVAL;
+
+		if (!qp_to_send_context(qp, sc))
+			return -EINVAL;
 	}
 
 	if (attr_mask & IB_QP_ALT_PATH) {
-		u8 sc;
-
-		if (attr->alt_ah_attr.dlid >= HFI1_MULTICAST_LID_BASE)
-			goto inval;
-		if (hfi1_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
-			goto inval;
-		if (attr->alt_pkey_index >= hfi1_get_npkeys(dd))
-			goto inval;
 		sc = ah_to_sc(ibqp->device, &attr->alt_ah_attr);
+		if (sc == 0xf)
+			return -EINVAL;
+
 		if (!qp_to_sdma_engine(qp, sc) &&
 		    dd->flags & HFI1_HAS_SEND_DMA)
-			goto inval;
+			return -EINVAL;
+
+		if (!qp_to_send_context(qp, sc))
+			return -EINVAL;
 	}
 
-	if (attr_mask & IB_QP_PKEY_INDEX)
-		if (attr->pkey_index >= hfi1_get_npkeys(dd))
-			goto inval;
-
-	if (attr_mask & IB_QP_MIN_RNR_TIMER)
-		if (attr->min_rnr_timer > 31)
-			goto inval;
-
-	if (attr_mask & IB_QP_PORT)
-		if (qp->ibqp.qp_type == IB_QPT_SMI ||
-		    qp->ibqp.qp_type == IB_QPT_GSI ||
-		    attr->port_num == 0 ||
-		    attr->port_num > ibqp->device->phys_port_cnt)
-			goto inval;
-
-	if (attr_mask & IB_QP_DEST_QPN)
-		if (attr->dest_qp_num > HFI1_QPN_MASK)
-			goto inval;
-
-	if (attr_mask & IB_QP_RETRY_CNT)
-		if (attr->retry_cnt > 7)
-			goto inval;
-
-	if (attr_mask & IB_QP_RNR_RETRY)
-		if (attr->rnr_retry > 7)
-			goto inval;
-
-	/*
-	 * Don't allow invalid path_mtu values.  OK to set greater
-	 * than the active mtu (or even the max_cap, if we have tuned
-	 * that to a small mtu.  We'll set qp->path_mtu
-	 * to the lesser of requested attribute mtu and active,
-	 * for packetizing messages.
-	 * Note that the QP port has to be set in INIT and MTU in RTR.
-	 */
-	if (attr_mask & IB_QP_PATH_MTU) {
-		int mtu, pidx = qp->port_num - 1;
-
-		dd = dd_from_dev(dev);
-		mtu = verbs_mtu_enum_to_int(ibqp->device, attr->path_mtu);
-		if (mtu == -1)
-			goto inval;
-
-		if (mtu > dd->pport[pidx].ibmtu)
-			pmtu = mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048);
-		else
-			pmtu = attr->path_mtu;
-	}
-
-	if (attr_mask & IB_QP_PATH_MIG_STATE) {
-		if (attr->path_mig_state == IB_MIG_REARM) {
-			if (qp->s_mig_state == IB_MIG_ARMED)
-				goto inval;
-			if (new_state != IB_QPS_RTS)
-				goto inval;
-		} else if (attr->path_mig_state == IB_MIG_MIGRATED) {
-			if (qp->s_mig_state == IB_MIG_REARM)
-				goto inval;
-			if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
-				goto inval;
-			if (qp->s_mig_state == IB_MIG_ARMED)
-				mig = 1;
-		} else
-			goto inval;
-	}
-
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-		if (attr->max_dest_rd_atomic > HFI1_MAX_RDMA_ATOMIC)
-			goto inval;
-
-	switch (new_state) {
-	case IB_QPS_RESET:
-		if (qp->state != IB_QPS_RESET) {
-			qp->state = IB_QPS_RESET;
-			flush_iowait(qp);
-			qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_ANY_WAIT);
-			spin_unlock(&qp->s_lock);
-			spin_unlock_irq(&qp->r_lock);
-			/* Stop the sending work queue and retry timer */
-			cancel_work_sync(&qp->s_iowait.iowork);
-			del_timer_sync(&qp->s_timer);
-			iowait_sdma_drain(&qp->s_iowait);
-			flush_tx_list(qp);
-			remove_qp(dev, qp);
-			wait_event(qp->wait, !atomic_read(&qp->refcount));
-			spin_lock_irq(&qp->r_lock);
-			spin_lock(&qp->s_lock);
-			clear_mr_refs(qp, 1);
-			clear_ahg(qp);
-			reset_qp(qp, ibqp->qp_type);
-		}
-		break;
-
-	case IB_QPS_RTR:
-		/* Allow event to re-trigger if QP set to RTR more than once */
-		qp->r_flags &= ~HFI1_R_COMM_EST;
-		qp->state = new_state;
-		break;
-
-	case IB_QPS_SQD:
-		qp->s_draining = qp->s_last != qp->s_cur;
-		qp->state = new_state;
-		break;
-
-	case IB_QPS_SQE:
-		if (qp->ibqp.qp_type == IB_QPT_RC)
-			goto inval;
-		qp->state = new_state;
-		break;
-
-	case IB_QPS_ERR:
-		lastwqe = hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR);
-		break;
-
-	default:
-		qp->state = new_state;
-		break;
-	}
-
-	if (attr_mask & IB_QP_PKEY_INDEX)
-		qp->s_pkey_index = attr->pkey_index;
-
-	if (attr_mask & IB_QP_PORT)
-		qp->port_num = attr->port_num;
-
-	if (attr_mask & IB_QP_DEST_QPN)
-		qp->remote_qpn = attr->dest_qp_num;
-
-	if (attr_mask & IB_QP_SQ_PSN) {
-		qp->s_next_psn = attr->sq_psn & PSN_MODIFY_MASK;
-		qp->s_psn = qp->s_next_psn;
-		qp->s_sending_psn = qp->s_next_psn;
-		qp->s_last_psn = qp->s_next_psn - 1;
-		qp->s_sending_hpsn = qp->s_last_psn;
-	}
-
-	if (attr_mask & IB_QP_RQ_PSN)
-		qp->r_psn = attr->rq_psn & PSN_MODIFY_MASK;
-
-	if (attr_mask & IB_QP_ACCESS_FLAGS)
-		qp->qp_access_flags = attr->qp_access_flags;
-
-	if (attr_mask & IB_QP_AV) {
-		qp->remote_ah_attr = attr->ah_attr;
-		qp->s_srate = attr->ah_attr.static_rate;
-		qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
-		qp->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
-		qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc);
-	}
-
-	if (attr_mask & IB_QP_ALT_PATH) {
-		qp->alt_ah_attr = attr->alt_ah_attr;
-		qp->s_alt_pkey_index = attr->alt_pkey_index;
-	}
-
-	if (attr_mask & IB_QP_PATH_MIG_STATE) {
-		qp->s_mig_state = attr->path_mig_state;
-		if (mig) {
-			qp->remote_ah_attr = qp->alt_ah_attr;
-			qp->port_num = qp->alt_ah_attr.port_num;
-			qp->s_pkey_index = qp->s_alt_pkey_index;
-			qp->s_flags |= HFI1_S_AHG_CLEAR;
-			qp->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
-			qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc);
-		}
-	}
-
-	if (attr_mask & IB_QP_PATH_MTU) {
-		struct hfi1_ibport *ibp;
-		u8 sc, vl;
-		u32 mtu;
-
-		dd = dd_from_dev(dev);
-		ibp = &dd->pport[qp->port_num - 1].ibport_data;
-
-		sc = ibp->sl_to_sc[qp->remote_ah_attr.sl];
-		vl = sc_to_vlt(dd, sc);
-
-		mtu = verbs_mtu_enum_to_int(ibqp->device, pmtu);
-		if (vl < PER_VL_SEND_CONTEXTS)
-			mtu = min_t(u32, mtu, dd->vld[vl].mtu);
-		pmtu = mtu_to_enum(mtu, OPA_MTU_8192);
-
-		qp->path_mtu = pmtu;
-		qp->pmtu = mtu;
-	}
-
-	if (attr_mask & IB_QP_RETRY_CNT) {
-		qp->s_retry_cnt = attr->retry_cnt;
-		qp->s_retry = attr->retry_cnt;
-	}
-
-	if (attr_mask & IB_QP_RNR_RETRY) {
-		qp->s_rnr_retry_cnt = attr->rnr_retry;
-		qp->s_rnr_retry = attr->rnr_retry;
-	}
-
-	if (attr_mask & IB_QP_MIN_RNR_TIMER)
-		qp->r_min_rnr_timer = attr->min_rnr_timer;
-
-	if (attr_mask & IB_QP_TIMEOUT) {
-		qp->timeout = attr->timeout;
-		qp->timeout_jiffies =
-			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
-				1000UL);
-	}
-
-	if (attr_mask & IB_QP_QKEY)
-		qp->qkey = attr->qkey;
-
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-		qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
-
-	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
-		qp->s_max_rd_atomic = attr->max_rd_atomic;
-
-	spin_unlock(&qp->s_lock);
-	spin_unlock_irq(&qp->r_lock);
-
-	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
-		insert_qp(dev, qp);
-
-	if (lastwqe) {
-		ev.device = qp->ibqp.device;
-		ev.element.qp = &qp->ibqp;
-		ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-	}
-	if (mig) {
-		ev.device = qp->ibqp.device;
-		ev.element.qp = &qp->ibqp;
-		ev.event = IB_EVENT_PATH_MIG;
-		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-	}
-	ret = 0;
-	goto bail;
-
-inval:
-	spin_unlock(&qp->s_lock);
-	spin_unlock_irq(&qp->r_lock);
-	ret = -EINVAL;
-
-bail:
-	return ret;
+	return 0;
 }
 
-int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		  int attr_mask, struct ib_qp_init_attr *init_attr)
+void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
+		    int attr_mask, struct ib_udata *udata)
 {
-	struct hfi1_qp *qp = to_iqp(ibqp);
+	struct ib_qp *ibqp = &qp->ibqp;
+	struct hfi1_qp_priv *priv = qp->priv;
 
-	attr->qp_state = qp->state;
-	attr->cur_qp_state = attr->qp_state;
-	attr->path_mtu = qp->path_mtu;
-	attr->path_mig_state = qp->s_mig_state;
-	attr->qkey = qp->qkey;
-	attr->rq_psn = mask_psn(qp->r_psn);
-	attr->sq_psn = mask_psn(qp->s_next_psn);
-	attr->dest_qp_num = qp->remote_qpn;
-	attr->qp_access_flags = qp->qp_access_flags;
-	attr->cap.max_send_wr = qp->s_size - 1;
-	attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
-	attr->cap.max_send_sge = qp->s_max_sge;
-	attr->cap.max_recv_sge = qp->r_rq.max_sge;
-	attr->cap.max_inline_data = 0;
-	attr->ah_attr = qp->remote_ah_attr;
-	attr->alt_ah_attr = qp->alt_ah_attr;
-	attr->pkey_index = qp->s_pkey_index;
-	attr->alt_pkey_index = qp->s_alt_pkey_index;
-	attr->en_sqd_async_notify = 0;
-	attr->sq_draining = qp->s_draining;
-	attr->max_rd_atomic = qp->s_max_rd_atomic;
-	attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
-	attr->min_rnr_timer = qp->r_min_rnr_timer;
-	attr->port_num = qp->port_num;
-	attr->timeout = qp->timeout;
-	attr->retry_cnt = qp->s_retry_cnt;
-	attr->rnr_retry = qp->s_rnr_retry_cnt;
-	attr->alt_port_num = qp->alt_ah_attr.port_num;
-	attr->alt_timeout = qp->alt_timeout;
+	if (attr_mask & IB_QP_AV) {
+		priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
+		priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
+		priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
+	}
 
-	init_attr->event_handler = qp->ibqp.event_handler;
-	init_attr->qp_context = qp->ibqp.qp_context;
-	init_attr->send_cq = qp->ibqp.send_cq;
-	init_attr->recv_cq = qp->ibqp.recv_cq;
-	init_attr->srq = qp->ibqp.srq;
-	init_attr->cap = attr->cap;
-	if (qp->s_flags & HFI1_S_SIGNAL_REQ_WR)
-		init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
-	else
-		init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
-	init_attr->qp_type = qp->ibqp.qp_type;
-	init_attr->port_num = qp->port_num;
-	return 0;
+	if (attr_mask & IB_QP_PATH_MIG_STATE &&
+	    attr->path_mig_state == IB_MIG_MIGRATED &&
+	    qp->s_mig_state == IB_MIG_ARMED) {
+		qp->s_flags |= RVT_S_AHG_CLEAR;
+		priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
+		priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
+		priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
+	}
+}
+
+/**
+ * hfi1_check_send_wqe - validate wqe
+ * @qp - The qp
+ * @wqe - The built wqe
+ *
+ * validate wqe.  This is called
+ * prior to inserting the wqe into
+ * the ring but after the wqe has been
+ * setup.
+ *
+ * Returns 0 on success, -EINVAL on failure
+ *
+ */
+int hfi1_check_send_wqe(struct rvt_qp *qp,
+			struct rvt_swqe *wqe)
+{
+	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+	struct rvt_ah *ah;
+
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_RC:
+	case IB_QPT_UC:
+		if (wqe->length > 0x80000000U)
+			return -EINVAL;
+		break;
+	case IB_QPT_SMI:
+		ah = ibah_to_rvtah(wqe->ud_wr.ah);
+		if (wqe->length > (1 << ah->log_pmtu))
+			return -EINVAL;
+		break;
+	case IB_QPT_GSI:
+	case IB_QPT_UD:
+		ah = ibah_to_rvtah(wqe->ud_wr.ah);
+		if (wqe->length > (1 << ah->log_pmtu))
+			return -EINVAL;
+		if (ibp->sl_to_sc[ah->attr.sl] == 0xf)
+			return -EINVAL;
+	default:
+		break;
+	}
+	return wqe->length <= piothreshold;
 }
 
 /**
@@ -961,7 +282,7 @@
  *
  * Returns the AETH.
  */
-__be32 hfi1_compute_aeth(struct hfi1_qp *qp)
+__be32 hfi1_compute_aeth(struct rvt_qp *qp)
 {
 	u32 aeth = qp->r_msn & HFI1_MSN_MASK;
 
@@ -974,7 +295,7 @@
 	} else {
 		u32 min, max, x;
 		u32 credits;
-		struct hfi1_rwq *wq = qp->r_rq.wq;
+		struct rvt_rwq *wq = qp->r_rq.wq;
 		u32 head;
 		u32 tail;
 
@@ -1004,12 +325,13 @@
 			x = (min + max) / 2;
 			if (credit_table[x] == credits)
 				break;
-			if (credit_table[x] > credits)
+			if (credit_table[x] > credits) {
 				max = x;
-			else if (min == x)
-				break;
-			else
+			} else {
+				if (min == x)
+					break;
 				min = x;
+			}
 		}
 		aeth |= x << HFI1_AETH_CREDIT_SHIFT;
 	}
@@ -1017,348 +339,58 @@
 }
 
 /**
- * hfi1_create_qp - create a queue pair for a device
- * @ibpd: the protection domain who's device we create the queue pair for
- * @init_attr: the attributes of the queue pair
- * @udata: user data for libibverbs.so
+ * _hfi1_schedule_send - schedule progress
+ * @qp: the QP
  *
- * Returns the queue pair on success, otherwise returns an errno.
+ * This schedules qp progress w/o regard to the s_flags.
  *
- * Called by the ib_create_qp() core verbs function.
+ * It is only used in the post send, which doesn't hold
+ * the s_lock.
  */
-struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd,
-			     struct ib_qp_init_attr *init_attr,
-			     struct ib_udata *udata)
+void _hfi1_schedule_send(struct rvt_qp *qp)
 {
-	struct hfi1_qp *qp;
-	int err;
-	struct hfi1_swqe *swq = NULL;
+	struct hfi1_qp_priv *priv = qp->priv;
+	struct hfi1_ibport *ibp =
+		to_iport(qp->ibqp.device, qp->port_num);
+	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+
+	iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
+			priv->s_sde ?
+			priv->s_sde->cpu :
+			cpumask_first(cpumask_of_node(dd->node)));
+}
+
+static void qp_pio_drain(struct rvt_qp *qp)
+{
 	struct hfi1_ibdev *dev;
-	struct hfi1_devdata *dd;
-	size_t sz;
-	size_t sg_list_sz;
-	struct ib_qp *ret;
+	struct hfi1_qp_priv *priv = qp->priv;
 
-	if (init_attr->cap.max_send_sge > hfi1_max_sges ||
-	    init_attr->cap.max_send_wr > hfi1_max_qp_wrs ||
-	    init_attr->create_flags) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
+	if (!priv->s_sendcontext)
+		return;
+	dev = to_idev(qp->ibqp.device);
+	while (iowait_pio_pending(&priv->s_iowait)) {
+		write_seqlock_irq(&dev->iowait_lock);
+		hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1);
+		write_sequnlock_irq(&dev->iowait_lock);
+		iowait_pio_drain(&priv->s_iowait);
+		write_seqlock_irq(&dev->iowait_lock);
+		hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0);
+		write_sequnlock_irq(&dev->iowait_lock);
 	}
-
-	/* Check receive queue parameters if no SRQ is specified. */
-	if (!init_attr->srq) {
-		if (init_attr->cap.max_recv_sge > hfi1_max_sges ||
-		    init_attr->cap.max_recv_wr > hfi1_max_qp_wrs) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-		if (init_attr->cap.max_send_sge +
-		    init_attr->cap.max_send_wr +
-		    init_attr->cap.max_recv_sge +
-		    init_attr->cap.max_recv_wr == 0) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-	}
-
-	switch (init_attr->qp_type) {
-	case IB_QPT_SMI:
-	case IB_QPT_GSI:
-		if (init_attr->port_num == 0 ||
-		    init_attr->port_num > ibpd->device->phys_port_cnt) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-	case IB_QPT_UC:
-	case IB_QPT_RC:
-	case IB_QPT_UD:
-		sz = sizeof(struct hfi1_sge) *
-			init_attr->cap.max_send_sge +
-			sizeof(struct hfi1_swqe);
-		swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
-		if (swq == NULL) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail;
-		}
-		sz = sizeof(*qp);
-		sg_list_sz = 0;
-		if (init_attr->srq) {
-			struct hfi1_srq *srq = to_isrq(init_attr->srq);
-
-			if (srq->rq.max_sge > 1)
-				sg_list_sz = sizeof(*qp->r_sg_list) *
-					(srq->rq.max_sge - 1);
-		} else if (init_attr->cap.max_recv_sge > 1)
-			sg_list_sz = sizeof(*qp->r_sg_list) *
-				(init_attr->cap.max_recv_sge - 1);
-		qp = kzalloc(sz + sg_list_sz, GFP_KERNEL);
-		if (!qp) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_swq;
-		}
-		RCU_INIT_POINTER(qp->next, NULL);
-		qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL);
-		if (!qp->s_hdr) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_qp;
-		}
-		qp->timeout_jiffies =
-			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
-				1000UL);
-		if (init_attr->srq)
-			sz = 0;
-		else {
-			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
-			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
-			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
-				sizeof(struct hfi1_rwqe);
-			qp->r_rq.wq = vmalloc_user(sizeof(struct hfi1_rwq) +
-						   qp->r_rq.size * sz);
-			if (!qp->r_rq.wq) {
-				ret = ERR_PTR(-ENOMEM);
-				goto bail_qp;
-			}
-		}
-
-		/*
-		 * ib_create_qp() will initialize qp->ibqp
-		 * except for qp->ibqp.qp_num.
-		 */
-		spin_lock_init(&qp->r_lock);
-		spin_lock_init(&qp->s_lock);
-		spin_lock_init(&qp->r_rq.lock);
-		atomic_set(&qp->refcount, 0);
-		init_waitqueue_head(&qp->wait);
-		init_timer(&qp->s_timer);
-		qp->s_timer.data = (unsigned long)qp;
-		INIT_LIST_HEAD(&qp->rspwait);
-		qp->state = IB_QPS_RESET;
-		qp->s_wq = swq;
-		qp->s_size = init_attr->cap.max_send_wr + 1;
-		qp->s_max_sge = init_attr->cap.max_send_sge;
-		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
-			qp->s_flags = HFI1_S_SIGNAL_REQ_WR;
-		dev = to_idev(ibpd->device);
-		dd = dd_from_dev(dev);
-		err = alloc_qpn(dd, &dev->qp_dev->qpn_table, init_attr->qp_type,
-				init_attr->port_num);
-		if (err < 0) {
-			ret = ERR_PTR(err);
-			vfree(qp->r_rq.wq);
-			goto bail_qp;
-		}
-		qp->ibqp.qp_num = err;
-		qp->port_num = init_attr->port_num;
-		reset_qp(qp, init_attr->qp_type);
-
-		break;
-
-	default:
-		/* Don't support raw QPs */
-		ret = ERR_PTR(-ENOSYS);
-		goto bail;
-	}
-
-	init_attr->cap.max_inline_data = 0;
-
-	/*
-	 * Return the address of the RWQ as the offset to mmap.
-	 * See hfi1_mmap() for details.
-	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		if (!qp->r_rq.wq) {
-			__u64 offset = 0;
-
-			err = ib_copy_to_udata(udata, &offset,
-					       sizeof(offset));
-			if (err) {
-				ret = ERR_PTR(err);
-				goto bail_ip;
-			}
-		} else {
-			u32 s = sizeof(struct hfi1_rwq) + qp->r_rq.size * sz;
-
-			qp->ip = hfi1_create_mmap_info(dev, s,
-						      ibpd->uobject->context,
-						      qp->r_rq.wq);
-			if (!qp->ip) {
-				ret = ERR_PTR(-ENOMEM);
-				goto bail_ip;
-			}
-
-			err = ib_copy_to_udata(udata, &(qp->ip->offset),
-					       sizeof(qp->ip->offset));
-			if (err) {
-				ret = ERR_PTR(err);
-				goto bail_ip;
-			}
-		}
-	}
-
-	spin_lock(&dev->n_qps_lock);
-	if (dev->n_qps_allocated == hfi1_max_qps) {
-		spin_unlock(&dev->n_qps_lock);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_ip;
-	}
-
-	dev->n_qps_allocated++;
-	spin_unlock(&dev->n_qps_lock);
-
-	if (qp->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	ret = &qp->ibqp;
-
-	/*
-	 * We have our QP and its good, now keep track of what types of opcodes
-	 * can be processed on this QP. We do this by keeping track of what the
-	 * 3 high order bits of the opcode are.
-	 */
-	switch (init_attr->qp_type) {
-	case IB_QPT_SMI:
-	case IB_QPT_GSI:
-	case IB_QPT_UD:
-		qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & OPCODE_QP_MASK;
-		break;
-	case IB_QPT_RC:
-		qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & OPCODE_QP_MASK;
-		break;
-	case IB_QPT_UC:
-		qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & OPCODE_QP_MASK;
-		break;
-	default:
-		ret = ERR_PTR(-EINVAL);
-		goto bail_ip;
-	}
-
-	goto bail;
-
-bail_ip:
-	if (qp->ip)
-		kref_put(&qp->ip->ref, hfi1_release_mmap_info);
-	else
-		vfree(qp->r_rq.wq);
-	free_qpn(&dev->qp_dev->qpn_table, qp->ibqp.qp_num);
-bail_qp:
-	kfree(qp->s_hdr);
-	kfree(qp);
-bail_swq:
-	vfree(swq);
-bail:
-	return ret;
 }
 
 /**
- * hfi1_destroy_qp - destroy a queue pair
- * @ibqp: the queue pair to destroy
+ * hfi1_schedule_send - schedule progress
+ * @qp: the QP
  *
- * Returns 0 on success.
- *
- * Note that this can be called while the QP is actively sending or
- * receiving!
+ * This schedules qp progress and caller should hold
+ * the s_lock.
  */
-int hfi1_destroy_qp(struct ib_qp *ibqp)
+void hfi1_schedule_send(struct rvt_qp *qp)
 {
-	struct hfi1_qp *qp = to_iqp(ibqp);
-	struct hfi1_ibdev *dev = to_idev(ibqp->device);
-
-	/* Make sure HW and driver activity is stopped. */
-	spin_lock_irq(&qp->r_lock);
-	spin_lock(&qp->s_lock);
-	if (qp->state != IB_QPS_RESET) {
-		qp->state = IB_QPS_RESET;
-		flush_iowait(qp);
-		qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_ANY_WAIT);
-		spin_unlock(&qp->s_lock);
-		spin_unlock_irq(&qp->r_lock);
-		cancel_work_sync(&qp->s_iowait.iowork);
-		del_timer_sync(&qp->s_timer);
-		iowait_sdma_drain(&qp->s_iowait);
-		flush_tx_list(qp);
-		remove_qp(dev, qp);
-		wait_event(qp->wait, !atomic_read(&qp->refcount));
-		spin_lock_irq(&qp->r_lock);
-		spin_lock(&qp->s_lock);
-		clear_mr_refs(qp, 1);
-		clear_ahg(qp);
-	}
-	spin_unlock(&qp->s_lock);
-	spin_unlock_irq(&qp->r_lock);
-
-	/* all user's cleaned up, mark it available */
-	free_qpn(&dev->qp_dev->qpn_table, qp->ibqp.qp_num);
-	spin_lock(&dev->n_qps_lock);
-	dev->n_qps_allocated--;
-	spin_unlock(&dev->n_qps_lock);
-
-	if (qp->ip)
-		kref_put(&qp->ip->ref, hfi1_release_mmap_info);
-	else
-		vfree(qp->r_rq.wq);
-	vfree(qp->s_wq);
-	kfree(qp->s_hdr);
-	kfree(qp);
-	return 0;
-}
-
-/**
- * init_qpn_table - initialize the QP number table for a device
- * @qpt: the QPN table
- */
-static int init_qpn_table(struct hfi1_devdata *dd, struct hfi1_qpn_table *qpt)
-{
-	u32 offset, qpn, i;
-	struct qpn_map *map;
-	int ret = 0;
-
-	spin_lock_init(&qpt->lock);
-
-	qpt->last = 0;
-	qpt->incr = 1 << dd->qos_shift;
-
-	/* insure we don't assign QPs from KDETH 64K window */
-	qpn = kdeth_qp << 16;
-	qpt->nmaps = qpn / BITS_PER_PAGE;
-	/* This should always be zero */
-	offset = qpn & BITS_PER_PAGE_MASK;
-	map = &qpt->map[qpt->nmaps];
-	dd_dev_info(dd, "Reserving QPNs for KDETH window from 0x%x to 0x%x\n",
-		qpn, qpn + 65535);
-	for (i = 0; i < 65536; i++) {
-		if (!map->page) {
-			get_map_page(qpt, map);
-			if (!map->page) {
-				ret = -ENOMEM;
-				break;
-			}
-		}
-		set_bit(offset, map->page);
-		offset++;
-		if (offset == BITS_PER_PAGE) {
-			/* next page */
-			qpt->nmaps++;
-			map++;
-			offset = 0;
-		}
-	}
-	return ret;
-}
-
-/**
- * free_qpn_table - free the QP number table for a device
- * @qpt: the QPN table
- */
-static void free_qpn_table(struct hfi1_qpn_table *qpt)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(qpt->map); i++)
-		free_page((unsigned long) qpt->map[i].page);
+	if (hfi1_send_ok(qp))
+		_hfi1_schedule_send(qp);
 }
 
 /**
@@ -1368,7 +400,7 @@
  *
  * The QP s_lock should be held.
  */
-void hfi1_get_credit(struct hfi1_qp *qp, u32 aeth)
+void hfi1_get_credit(struct rvt_qp *qp, u32 aeth)
 {
 	u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK;
 
@@ -1378,27 +410,27 @@
 	 * honor the credit field.
 	 */
 	if (credit == HFI1_AETH_CREDIT_INVAL) {
-		if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT)) {
-			qp->s_flags |= HFI1_S_UNLIMITED_CREDIT;
-			if (qp->s_flags & HFI1_S_WAIT_SSN_CREDIT) {
-				qp->s_flags &= ~HFI1_S_WAIT_SSN_CREDIT;
+		if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
+			qp->s_flags |= RVT_S_UNLIMITED_CREDIT;
+			if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
+				qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
 				hfi1_schedule_send(qp);
 			}
 		}
-	} else if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT)) {
+	} else if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
 		/* Compute new LSN (i.e., MSN + credit) */
 		credit = (aeth + credit_table[credit]) & HFI1_MSN_MASK;
 		if (cmp_msn(credit, qp->s_lsn) > 0) {
 			qp->s_lsn = credit;
-			if (qp->s_flags & HFI1_S_WAIT_SSN_CREDIT) {
-				qp->s_flags &= ~HFI1_S_WAIT_SSN_CREDIT;
+			if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
+				qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
 				hfi1_schedule_send(qp);
 			}
 		}
 	}
 }
 
-void hfi1_qp_wakeup(struct hfi1_qp *qp, u32 flag)
+void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
 {
 	unsigned long flags;
 
@@ -1421,16 +453,17 @@
 	unsigned seq)
 {
 	struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq);
-	struct hfi1_qp *qp;
+	struct rvt_qp *qp;
+	struct hfi1_qp_priv *priv;
 	unsigned long flags;
 	int ret = 0;
 	struct hfi1_ibdev *dev;
 
 	qp = tx->qp;
+	priv = qp->priv;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) {
-
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		/*
 		 * If we couldn't queue the DMA request, save the info
 		 * and try again later rather than destroying the
@@ -1442,18 +475,18 @@
 		write_seqlock(&dev->iowait_lock);
 		if (sdma_progress(sde, seq, stx))
 			goto eagain;
-		if (list_empty(&qp->s_iowait.list)) {
+		if (list_empty(&priv->s_iowait.list)) {
 			struct hfi1_ibport *ibp =
 				to_iport(qp->ibqp.device, qp->port_num);
 
-			ibp->n_dmawait++;
-			qp->s_flags |= HFI1_S_WAIT_DMA_DESC;
-			list_add_tail(&qp->s_iowait.list, &sde->dmawait);
-			trace_hfi1_qpsleep(qp, HFI1_S_WAIT_DMA_DESC);
+			ibp->rvp.n_dmawait++;
+			qp->s_flags |= RVT_S_WAIT_DMA_DESC;
+			list_add_tail(&priv->s_iowait.list, &sde->dmawait);
+			trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
 			atomic_inc(&qp->refcount);
 		}
 		write_sequnlock(&dev->iowait_lock);
-		qp->s_flags &= ~HFI1_S_BUSY;
+		qp->s_flags &= ~RVT_S_BUSY;
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 		ret = -EBUSY;
 	} else {
@@ -1470,61 +503,25 @@
 
 static void iowait_wakeup(struct iowait *wait, int reason)
 {
-	struct hfi1_qp *qp = container_of(wait, struct hfi1_qp, s_iowait);
+	struct rvt_qp *qp = iowait_to_qp(wait);
 
 	WARN_ON(reason != SDMA_AVAIL_REASON);
-	hfi1_qp_wakeup(qp, HFI1_S_WAIT_DMA_DESC);
+	hfi1_qp_wakeup(qp, RVT_S_WAIT_DMA_DESC);
 }
 
-int hfi1_qp_init(struct hfi1_ibdev *dev)
+static void iowait_sdma_drained(struct iowait *wait)
 {
-	struct hfi1_devdata *dd = dd_from_dev(dev);
-	int i;
-	int ret = -ENOMEM;
+	struct rvt_qp *qp = iowait_to_qp(wait);
 
-	/* allocate parent object */
-	dev->qp_dev = kzalloc(sizeof(*dev->qp_dev), GFP_KERNEL);
-	if (!dev->qp_dev)
-		goto nomem;
-	/* allocate hash table */
-	dev->qp_dev->qp_table_size = hfi1_qp_table_size;
-	dev->qp_dev->qp_table_bits = ilog2(hfi1_qp_table_size);
-	dev->qp_dev->qp_table =
-		kmalloc(dev->qp_dev->qp_table_size *
-				sizeof(*dev->qp_dev->qp_table),
-			GFP_KERNEL);
-	if (!dev->qp_dev->qp_table)
-		goto nomem;
-	for (i = 0; i < dev->qp_dev->qp_table_size; i++)
-		RCU_INIT_POINTER(dev->qp_dev->qp_table[i], NULL);
-	spin_lock_init(&dev->qp_dev->qpt_lock);
-	/* initialize qpn map */
-	ret = init_qpn_table(dd, &dev->qp_dev->qpn_table);
-	if (ret)
-		goto nomem;
-	return ret;
-nomem:
-	if (dev->qp_dev) {
-		kfree(dev->qp_dev->qp_table);
-		free_qpn_table(&dev->qp_dev->qpn_table);
-		kfree(dev->qp_dev);
-	}
-	return ret;
-}
-
-void hfi1_qp_exit(struct hfi1_ibdev *dev)
-{
-	struct hfi1_devdata *dd = dd_from_dev(dev);
-	u32 qps_inuse;
-
-	qps_inuse = free_all_qps(dd);
-	if (qps_inuse)
-		dd_dev_err(dd, "QP memory leak! %u still in use\n",
-			   qps_inuse);
-	if (dev->qp_dev) {
-		kfree(dev->qp_dev->qp_table);
-		free_qpn_table(&dev->qp_dev->qpn_table);
-		kfree(dev->qp_dev);
+	/*
+	 * This happens when the send engine notes
+	 * a QP in the error state and cannot
+	 * do the flush work until that QP's
+	 * sdma work has finished.
+	 */
+	if (qp->s_flags & RVT_S_WAIT_DMA) {
+		qp->s_flags &= ~RVT_S_WAIT_DMA;
+		hfi1_schedule_send(qp);
 	}
 }
 
@@ -1537,7 +534,7 @@
  * Return:
  * A send engine for the qp or NULL for SMI type qp.
  */
-struct sdma_engine *qp_to_sdma_engine(struct hfi1_qp *qp, u8 sc5)
+struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5)
 {
 	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 	struct sdma_engine *sde;
@@ -1554,9 +551,33 @@
 	return sde;
 }
 
+/*
+ * qp_to_send_context - map a qp to a send context
+ * @qp: the QP
+ * @sc5: the 5 bit sc
+ *
+ * Return:
+ * A send context for the qp
+ */
+struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5)
+{
+	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_SMI:
+		/* SMA packets to VL15 */
+		return dd->vld[15].sc;
+	default:
+		break;
+	}
+
+	return pio_select_send_context_sc(dd, qp->ibqp.qp_num >> dd->qos_shift,
+					  sc5);
+}
+
 struct qp_iter {
 	struct hfi1_ibdev *dev;
-	struct hfi1_qp *qp;
+	struct rvt_qp *qp;
 	int specials;
 	int n;
 };
@@ -1570,7 +591,7 @@
 		return NULL;
 
 	iter->dev = dev;
-	iter->specials = dev->ibdev.phys_port_cnt * 2;
+	iter->specials = dev->rdi.ibdev.phys_port_cnt * 2;
 	if (qp_iter_next(iter)) {
 		kfree(iter);
 		return NULL;
@@ -1584,8 +605,8 @@
 	struct hfi1_ibdev *dev = iter->dev;
 	int n = iter->n;
 	int ret = 1;
-	struct hfi1_qp *pqp = iter->qp;
-	struct hfi1_qp *qp;
+	struct rvt_qp *pqp = iter->qp;
+	struct rvt_qp *qp;
 
 	/*
 	 * The approach is to consider the special qps
@@ -1597,11 +618,11 @@
 	 *
 	 * n = 0..iter->specials is the special qp indices
 	 *
-	 * n = iter->specials..dev->qp_dev->qp_table_size+iter->specials are
+	 * n = iter->specials..dev->rdi.qp_dev->qp_table_size+iter->specials are
 	 * the potential hash bucket entries
 	 *
 	 */
-	for (; n <  dev->qp_dev->qp_table_size + iter->specials; n++) {
+	for (; n <  dev->rdi.qp_dev->qp_table_size + iter->specials; n++) {
 		if (pqp) {
 			qp = rcu_dereference(pqp->next);
 		} else {
@@ -1610,17 +631,17 @@
 				struct hfi1_ibport *ibp;
 				int pidx;
 
-				pidx = n % dev->ibdev.phys_port_cnt;
+				pidx = n % dev->rdi.ibdev.phys_port_cnt;
 				ppd = &dd_from_dev(dev)->pport[pidx];
 				ibp = &ppd->ibport_data;
 
 				if (!(n & 1))
-					qp = rcu_dereference(ibp->qp[0]);
+					qp = rcu_dereference(ibp->rvp.qp[0]);
 				else
-					qp = rcu_dereference(ibp->qp[1]);
+					qp = rcu_dereference(ibp->rvp.qp[1]);
 			} else {
 				qp = rcu_dereference(
-					dev->qp_dev->qp_table[
+					dev->rdi.qp_dev->qp_table[
 						(n - iter->specials)]);
 			}
 		}
@@ -1638,7 +659,7 @@
 	"SMI", "GSI", "RC", "UC", "UD",
 };
 
-static int qp_idle(struct hfi1_qp *qp)
+static int qp_idle(struct rvt_qp *qp)
 {
 	return
 		qp->s_last == qp->s_acked &&
@@ -1649,14 +670,17 @@
 
 void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
 {
-	struct hfi1_swqe *wqe;
-	struct hfi1_qp *qp = iter->qp;
+	struct rvt_swqe *wqe;
+	struct rvt_qp *qp = iter->qp;
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct sdma_engine *sde;
+	struct send_context *send_context;
 
-	sde = qp_to_sdma_engine(qp, qp->s_sc);
-	wqe = get_swqe_ptr(qp, qp->s_last);
+	sde = qp_to_sdma_engine(qp, priv->s_sc);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_last);
+	send_context = qp_to_send_context(qp, priv->s_sc);
 	seq_printf(s,
-		   "N %d %s QP%u R %u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x SL %u MTU %d %u %u %u SDE %p,%u\n",
+		   "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n",
 		   iter->n,
 		   qp_idle(qp) ? "I" : "B",
 		   qp->ibqp.qp_num,
@@ -1666,8 +690,9 @@
 		   wqe ? wqe->wr.opcode : 0,
 		   qp->s_hdrwords,
 		   qp->s_flags,
-		   atomic_read(&qp->s_iowait.sdma_busy),
-		   !list_empty(&qp->s_iowait.list),
+		   iowait_sdma_pending(&priv->s_iowait),
+		   iowait_pio_pending(&priv->s_iowait),
+		   !list_empty(&priv->s_iowait.list),
 		   qp->timeout,
 		   wqe ? wqe->ssn : 0,
 		   qp->s_lsn,
@@ -1676,20 +701,26 @@
 		   qp->s_sending_psn, qp->s_sending_hpsn,
 		   qp->s_last, qp->s_acked, qp->s_cur,
 		   qp->s_tail, qp->s_head, qp->s_size,
+		   qp->s_avail,
 		   qp->remote_qpn,
 		   qp->remote_ah_attr.dlid,
 		   qp->remote_ah_attr.sl,
 		   qp->pmtu,
+		   qp->s_retry,
 		   qp->s_retry_cnt,
-		   qp->timeout,
 		   qp->s_rnr_retry_cnt,
 		   sde,
-		   sde ? sde->this_idx : 0);
+		   sde ? sde->this_idx : 0,
+		   send_context,
+		   send_context ? send_context->sw_index : 0,
+		   ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head,
+		   ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail,
+		   qp->pid);
 }
 
-void qp_comm_est(struct hfi1_qp *qp)
+void qp_comm_est(struct rvt_qp *qp)
 {
-	qp->r_flags |= HFI1_R_COMM_EST;
+	qp->r_flags |= RVT_R_COMM_EST;
 	if (qp->ibqp.event_handler) {
 		struct ib_event ev;
 
@@ -1700,24 +731,241 @@
 	}
 }
 
+void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+		    gfp_t gfp)
+{
+	struct hfi1_qp_priv *priv;
+
+	priv = kzalloc_node(sizeof(*priv), gfp, rdi->dparms.node);
+	if (!priv)
+		return ERR_PTR(-ENOMEM);
+
+	priv->owner = qp;
+
+	priv->s_hdr = kzalloc_node(sizeof(*priv->s_hdr), gfp, rdi->dparms.node);
+	if (!priv->s_hdr) {
+		kfree(priv);
+		return ERR_PTR(-ENOMEM);
+	}
+	setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp);
+	qp->s_timer.function = hfi1_rc_timeout;
+	return priv;
+}
+
+void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	kfree(priv->s_hdr);
+	kfree(priv);
+}
+
+unsigned free_all_qps(struct rvt_dev_info *rdi)
+{
+	struct hfi1_ibdev *verbs_dev = container_of(rdi,
+						    struct hfi1_ibdev,
+						    rdi);
+	struct hfi1_devdata *dd = container_of(verbs_dev,
+					       struct hfi1_devdata,
+					       verbs_dev);
+	int n;
+	unsigned qp_inuse = 0;
+
+	for (n = 0; n < dd->num_pports; n++) {
+		struct hfi1_ibport *ibp = &dd->pport[n].ibport_data;
+
+		rcu_read_lock();
+		if (rcu_dereference(ibp->rvp.qp[0]))
+			qp_inuse++;
+		if (rcu_dereference(ibp->rvp.qp[1]))
+			qp_inuse++;
+		rcu_read_unlock();
+	}
+
+	return qp_inuse;
+}
+
+void flush_qp_waiters(struct rvt_qp *qp)
+{
+	flush_iowait(qp);
+	hfi1_stop_rc_timers(qp);
+}
+
+void stop_send_queue(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	cancel_work_sync(&priv->s_iowait.iowork);
+	hfi1_del_timers_sync(qp);
+}
+
+void quiesce_qp(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	iowait_sdma_drain(&priv->s_iowait);
+	qp_pio_drain(qp);
+	flush_tx_list(qp);
+}
+
+void notify_qp_reset(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	iowait_init(
+		&priv->s_iowait,
+		1,
+		_hfi1_do_send,
+		iowait_sleep,
+		iowait_wakeup,
+		iowait_sdma_drained);
+	priv->r_adefered = 0;
+	clear_ahg(qp);
+}
+
 /*
  * Switch to alternate path.
  * The QP s_lock should be held and interrupts disabled.
  */
-void hfi1_migrate_qp(struct hfi1_qp *qp)
+void hfi1_migrate_qp(struct rvt_qp *qp)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct ib_event ev;
 
 	qp->s_mig_state = IB_MIG_MIGRATED;
 	qp->remote_ah_attr = qp->alt_ah_attr;
 	qp->port_num = qp->alt_ah_attr.port_num;
 	qp->s_pkey_index = qp->s_alt_pkey_index;
-	qp->s_flags |= HFI1_S_AHG_CLEAR;
-	qp->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
-	qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc);
+	qp->s_flags |= RVT_S_AHG_CLEAR;
+	priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
+	priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
 
 	ev.device = qp->ibqp.device;
 	ev.element.qp = &qp->ibqp;
 	ev.event = IB_EVENT_PATH_MIG;
 	qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
 }
+
+int mtu_to_path_mtu(u32 mtu)
+{
+	return mtu_to_enum(mtu, OPA_MTU_8192);
+}
+
+u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
+{
+	u32 mtu;
+	struct hfi1_ibdev *verbs_dev = container_of(rdi,
+						    struct hfi1_ibdev,
+						    rdi);
+	struct hfi1_devdata *dd = container_of(verbs_dev,
+					       struct hfi1_devdata,
+					       verbs_dev);
+	struct hfi1_ibport *ibp;
+	u8 sc, vl;
+
+	ibp = &dd->pport[qp->port_num - 1].ibport_data;
+	sc = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+	vl = sc_to_vlt(dd, sc);
+
+	mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu);
+	if (vl < PER_VL_SEND_CONTEXTS)
+		mtu = min_t(u32, mtu, dd->vld[vl].mtu);
+	return mtu;
+}
+
+int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+		       struct ib_qp_attr *attr)
+{
+	int mtu, pidx = qp->port_num - 1;
+	struct hfi1_ibdev *verbs_dev = container_of(rdi,
+						    struct hfi1_ibdev,
+						    rdi);
+	struct hfi1_devdata *dd = container_of(verbs_dev,
+					       struct hfi1_devdata,
+					       verbs_dev);
+	mtu = verbs_mtu_enum_to_int(qp->ibqp.device, attr->path_mtu);
+	if (mtu == -1)
+		return -1; /* values less than 0 are error */
+
+	if (mtu > dd->pport[pidx].ibmtu)
+		return mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048);
+	else
+		return attr->path_mtu;
+}
+
+void notify_error_qp(struct rvt_qp *qp)
+{
+	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	write_seqlock(&dev->iowait_lock);
+	if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) {
+		qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
+		list_del_init(&priv->s_iowait.list);
+		if (atomic_dec_and_test(&qp->refcount))
+			wake_up(&qp->wait);
+	}
+	write_sequnlock(&dev->iowait_lock);
+
+	if (!(qp->s_flags & RVT_S_BUSY)) {
+		qp->s_hdrwords = 0;
+		if (qp->s_rdma_mr) {
+			rvt_put_mr(qp->s_rdma_mr);
+			qp->s_rdma_mr = NULL;
+		}
+		flush_tx_list(qp);
+	}
+}
+
+/**
+ * hfi1_error_port_qps - put a port's RC/UC qps into error state
+ * @ibp: the ibport.
+ * @sl: the service level.
+ *
+ * This function places all RC/UC qps with a given service level into error
+ * state. It is generally called to force upper lay apps to abandon stale qps
+ * after an sl->sc mapping change.
+ */
+void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl)
+{
+	struct rvt_qp *qp = NULL;
+	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+	struct hfi1_ibdev *dev = &ppd->dd->verbs_dev;
+	int n;
+	int lastwqe;
+	struct ib_event ev;
+
+	rcu_read_lock();
+
+	/* Deal only with RC/UC qps that use the given SL. */
+	for (n = 0; n < dev->rdi.qp_dev->qp_table_size; n++) {
+		for (qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); qp;
+			qp = rcu_dereference(qp->next)) {
+			if (qp->port_num == ppd->port &&
+			    (qp->ibqp.qp_type == IB_QPT_UC ||
+			     qp->ibqp.qp_type == IB_QPT_RC) &&
+			    qp->remote_ah_attr.sl == sl &&
+			    (ib_rvt_state_ops[qp->state] &
+			     RVT_POST_SEND_OK)) {
+				spin_lock_irq(&qp->r_lock);
+				spin_lock(&qp->s_hlock);
+				spin_lock(&qp->s_lock);
+				lastwqe = rvt_error_qp(qp,
+						       IB_WC_WR_FLUSH_ERR);
+				spin_unlock(&qp->s_lock);
+				spin_unlock(&qp->s_hlock);
+				spin_unlock_irq(&qp->r_lock);
+				if (lastwqe) {
+					ev.device = qp->ibqp.device;
+					ev.element.qp = &qp->ibqp;
+					ev.event =
+						IB_EVENT_QP_LAST_WQE_REACHED;
+					qp->ibqp.event_handler(&ev,
+						qp->ibqp.qp_context);
+				}
+			}
+		}
+	}
+
+	rcu_read_unlock();
+}

diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h
index 62a94c5..e7bc8d6 100644
--- a/drivers/staging/rdma/hfi1/qp.h
+++ b/drivers/staging/rdma/hfi1/qp.h

@@ -1,14 +1,13 @@
 #ifndef _QP_H
 #define _QP_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -51,119 +48,33 @@
  */
 
 #include <linux/hash.h>
+#include <rdma/rdmavt_qp.h>
 #include "verbs.h"
 #include "sdma.h"
 
-#define QPN_MAX                 (1 << 24)
-#define QPNMAP_ENTRIES          (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
+extern unsigned int hfi1_qp_table_size;
 
 /*
- * QPN-map pages start out as NULL, they get allocated upon
- * first use and are never deallocated. This way,
- * large bitmaps are not allocated unless large numbers of QPs are used.
+ * free_ahg - clear ahg from QP
  */
-struct qpn_map {
-	void *page;
-};
-
-struct hfi1_qpn_table {
-	spinlock_t lock; /* protect changes in this struct */
-	unsigned flags;         /* flags for QP0/1 allocated for each port */
-	u32 last;               /* last QP number allocated */
-	u32 nmaps;              /* size of the map table */
-	u16 limit;
-	u8  incr;
-	/* bit map of free QP numbers other than 0/1 */
-	struct qpn_map map[QPNMAP_ENTRIES];
-};
-
-struct hfi1_qp_ibdev {
-	u32 qp_table_size;
-	u32 qp_table_bits;
-	struct hfi1_qp __rcu **qp_table;
-	spinlock_t qpt_lock;
-	struct hfi1_qpn_table qpn_table;
-};
-
-static inline u32 qpn_hash(struct hfi1_qp_ibdev *dev, u32 qpn)
+static inline void clear_ahg(struct rvt_qp *qp)
 {
-	return hash_32(qpn, dev->qp_table_bits);
-}
+	struct hfi1_qp_priv *priv = qp->priv;
 
-/**
- * hfi1_lookup_qpn - return the QP with the given QPN
- * @ibp: the ibport
- * @qpn: the QP number to look up
- *
- * The caller must hold the rcu_read_lock(), and keep the lock until
- * the returned qp is no longer in use.
- */
-static inline struct hfi1_qp *hfi1_lookup_qpn(struct hfi1_ibport *ibp,
-				u32 qpn) __must_hold(RCU)
-{
-	struct hfi1_qp *qp = NULL;
-
-	if (unlikely(qpn <= 1)) {
-		qp = rcu_dereference(ibp->qp[qpn]);
-	} else {
-		struct hfi1_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;
-		u32 n = qpn_hash(dev->qp_dev, qpn);
-
-		for (qp = rcu_dereference(dev->qp_dev->qp_table[n]); qp;
-			qp = rcu_dereference(qp->next))
-			if (qp->ibqp.qp_num == qpn)
-				break;
-	}
-	return qp;
-}
-
-/**
- * clear_ahg - reset ahg status in qp
- * @qp - qp pointer
- */
-static inline void clear_ahg(struct hfi1_qp *qp)
-{
-	qp->s_hdr->ahgcount = 0;
-	qp->s_flags &= ~(HFI1_S_AHG_VALID | HFI1_S_AHG_CLEAR);
-	if (qp->s_sde && qp->s_ahgidx >= 0)
-		sdma_ahg_free(qp->s_sde, qp->s_ahgidx);
+	priv->s_hdr->ahgcount = 0;
+	qp->s_flags &= ~(RVT_S_AHG_VALID | RVT_S_AHG_CLEAR);
+	if (priv->s_sde && qp->s_ahgidx >= 0)
+		sdma_ahg_free(priv->s_sde, qp->s_ahgidx);
 	qp->s_ahgidx = -1;
 }
 
 /**
- * hfi1_error_qp - put a QP into the error state
- * @qp: the QP to put into the error state
- * @err: the receive completion error to signal if a RWQE is active
- *
- * Flushes both send and receive work queues.
- * Returns true if last WQE event should be generated.
- * The QP r_lock and s_lock should be held and interrupts disabled.
- * If we are already in error state, just return.
- */
-int hfi1_error_qp(struct hfi1_qp *qp, enum ib_wc_status err);
-
-/**
- * hfi1_modify_qp - modify the attributes of a queue pair
- * @ibqp: the queue pair who's attributes we're modifying
- * @attr: the new attributes
- * @attr_mask: the mask of attributes to modify
- * @udata: user data for libibverbs.so
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		   int attr_mask, struct ib_udata *udata);
-
-int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		  int attr_mask, struct ib_qp_init_attr *init_attr);
-
-/**
  * hfi1_compute_aeth - compute the AETH (syndrome + MSN)
  * @qp: the queue pair to compute the AETH for
  *
  * Returns the AETH.
  */
-__be32 hfi1_compute_aeth(struct hfi1_qp *qp);
+__be32 hfi1_compute_aeth(struct rvt_qp *qp);
 
 /**
  * hfi1_create_qp - create a queue pair for a device
@@ -179,45 +90,23 @@
 			     struct ib_qp_init_attr *init_attr,
 			     struct ib_udata *udata);
 /**
- * hfi1_destroy_qp - destroy a queue pair
- * @ibqp: the queue pair to destroy
- *
- * Returns 0 on success.
- *
- * Note that this can be called while the QP is actively sending or
- * receiving!
- */
-int hfi1_destroy_qp(struct ib_qp *ibqp);
-
-/**
  * hfi1_get_credit - flush the send work queue of a QP
  * @qp: the qp who's send work queue to flush
  * @aeth: the Acknowledge Extended Transport Header
  *
  * The QP s_lock should be held.
  */
-void hfi1_get_credit(struct hfi1_qp *qp, u32 aeth);
-
-/**
- * hfi1_qp_init - allocate QP tables
- * @dev: a pointer to the hfi1_ibdev
- */
-int hfi1_qp_init(struct hfi1_ibdev *dev);
-
-/**
- * hfi1_qp_exit - free the QP related structures
- * @dev: a pointer to the hfi1_ibdev
- */
-void hfi1_qp_exit(struct hfi1_ibdev *dev);
+void hfi1_get_credit(struct rvt_qp *qp, u32 aeth);
 
 /**
  * hfi1_qp_wakeup - wake up on the indicated event
  * @qp: the QP
  * @flag: flag the qp on which the qp is stalled
  */
-void hfi1_qp_wakeup(struct hfi1_qp *qp, u32 flag);
+void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag);
 
-struct sdma_engine *qp_to_sdma_engine(struct hfi1_qp *qp, u8 sc5);
+struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5);
+struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5);
 
 struct qp_iter;
 
@@ -244,43 +133,28 @@
  * qp_comm_est - handle trap with QP established
  * @qp: the QP
  */
-void qp_comm_est(struct hfi1_qp *qp);
+void qp_comm_est(struct rvt_qp *qp);
 
-/**
- * _hfi1_schedule_send - schedule progress
- * @qp: the QP
- *
- * This schedules qp progress w/o regard to the s_flags.
- *
- * It is only used in the post send, which doesn't hold
- * the s_lock.
+void _hfi1_schedule_send(struct rvt_qp *qp);
+void hfi1_schedule_send(struct rvt_qp *qp);
+
+void hfi1_migrate_qp(struct rvt_qp *qp);
+
+/*
+ * Functions provided by hfi1 driver for rdmavt to use
  */
-static inline void _hfi1_schedule_send(struct hfi1_qp *qp)
-{
-	struct hfi1_ibport *ibp =
-		to_iport(qp->ibqp.device, qp->port_num);
-	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
-
-	iowait_schedule(&qp->s_iowait, ppd->hfi1_wq,
-			qp->s_sde ?
-			qp->s_sde->cpu :
-			cpumask_first(cpumask_of_node(dd->assigned_node_id)));
-}
-
-/**
- * hfi1_schedule_send - schedule progress
- * @qp: the QP
- *
- * This schedules qp progress and caller should hold
- * the s_lock.
- */
-static inline void hfi1_schedule_send(struct hfi1_qp *qp)
-{
-	if (hfi1_send_ok(qp))
-		_hfi1_schedule_send(qp);
-}
-
-void hfi1_migrate_qp(struct hfi1_qp *qp);
-
+void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+		    gfp_t gfp);
+void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
+unsigned free_all_qps(struct rvt_dev_info *rdi);
+void notify_qp_reset(struct rvt_qp *qp);
+int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+		       struct ib_qp_attr *attr);
+void flush_qp_waiters(struct rvt_qp *qp);
+void notify_error_qp(struct rvt_qp *qp);
+void stop_send_queue(struct rvt_qp *qp);
+void quiesce_qp(struct rvt_qp *qp);
+u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu);
+int mtu_to_path_mtu(u32 mtu);
+void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl);
 #endif /* _QP_H */

diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/staging/rdma/hfi1/qsfp.c
index 6326a91..9ed1963 100644
--- a/drivers/staging/rdma/hfi1/qsfp.c
+++ b/drivers/staging/rdma/hfi1/qsfp.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -62,7 +59,7 @@
 #define I2C_MAX_RETRY 4
 
 /*
- * Unlocked i2c write.  Must hold dd->qsfp_i2c_mutex.
+ * Raw i2c write.  No set-up or lock checking.
  */
 static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
 		       int offset, void *bp, int len)
@@ -71,14 +68,6 @@
 	int ret, cnt;
 	u8 *buff = bp;
 
-	/* Make sure TWSI bus is in sane state. */
-	ret = hfi1_twsi_reset(dd, target);
-	if (ret) {
-		hfi1_dev_porterr(dd, ppd->port,
-				 "I2C interface Reset for write failed\n");
-		return -EIO;
-	}
-
 	cnt = 0;
 	while (cnt < len) {
 		int wlen = len - cnt;
@@ -99,48 +88,45 @@
 	return cnt;
 }
 
+/*
+ * Caller must hold the i2c chain resource.
+ */
 int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
 	      void *bp, int len)
 {
-	struct hfi1_devdata *dd = ppd->dd;
 	int ret;
 
-	ret = mutex_lock_interruptible(&dd->qsfp_i2c_mutex);
-	if (!ret) {
-		ret = __i2c_write(ppd, target, i2c_addr, offset, bp, len);
-		mutex_unlock(&dd->qsfp_i2c_mutex);
+	if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__))
+		return -EACCES;
+
+	/* make sure the TWSI bus is in a sane state */
+	ret = hfi1_twsi_reset(ppd->dd, target);
+	if (ret) {
+		hfi1_dev_porterr(ppd->dd, ppd->port,
+				 "I2C chain %d write interface reset failed\n",
+				 target);
+		return ret;
 	}
 
-	return ret;
+	return __i2c_write(ppd, target, i2c_addr, offset, bp, len);
 }
 
 /*
- * Unlocked i2c read.  Must hold dd->qsfp_i2c_mutex.
+ * Raw i2c read.  No set-up or lock checking.
  */
 static int __i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
 		      int offset, void *bp, int len)
 {
 	struct hfi1_devdata *dd = ppd->dd;
 	int ret, cnt, pass = 0;
-	int stuck = 0;
-	u8 *buff = bp;
-
-	/* Make sure TWSI bus is in sane state. */
-	ret = hfi1_twsi_reset(dd, target);
-	if (ret) {
-		hfi1_dev_porterr(dd, ppd->port,
-				 "I2C interface Reset for read failed\n");
-		ret = -EIO;
-		stuck = 1;
-		goto exit;
-	}
+	int orig_offset = offset;
 
 	cnt = 0;
 	while (cnt < len) {
 		int rlen = len - cnt;
 
 		ret = hfi1_twsi_blk_rd(dd, target, i2c_addr, offset,
-				       buff + cnt, rlen);
+				       bp + cnt, rlen);
 		/* Some QSFP's fail first try. Retry as experiment */
 		if (ret && cnt == 0 && ++pass < I2C_MAX_RETRY)
 			continue;
@@ -156,14 +142,11 @@
 	ret = cnt;
 
 exit:
-	if (stuck)
-		dd_dev_err(dd, "I2C interface bus stuck non-idle\n");
-
-	if (pass >= I2C_MAX_RETRY && ret)
+	if (ret < 0) {
 		hfi1_dev_porterr(dd, ppd->port,
-				 "I2C failed even retrying\n");
-	else if (pass)
-		hfi1_dev_porterr(dd, ppd->port, "I2C retries: %d\n", pass);
+				 "I2C chain %d read failed, addr 0x%x, offset 0x%x, len %d\n",
+				 target, i2c_addr, orig_offset, len);
+	}
 
 	/* Must wait min 20us between qsfp i2c transactions */
 	udelay(20);
@@ -171,21 +154,35 @@
 	return ret;
 }
 
+/*
+ * Caller must hold the i2c chain resource.
+ */
 int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
 	     void *bp, int len)
 {
-	struct hfi1_devdata *dd = ppd->dd;
 	int ret;
 
-	ret = mutex_lock_interruptible(&dd->qsfp_i2c_mutex);
-	if (!ret) {
-		ret = __i2c_read(ppd, target, i2c_addr, offset, bp, len);
-		mutex_unlock(&dd->qsfp_i2c_mutex);
+	if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__))
+		return -EACCES;
+
+	/* make sure the TWSI bus is in a sane state */
+	ret = hfi1_twsi_reset(ppd->dd, target);
+	if (ret) {
+		hfi1_dev_porterr(ppd->dd, ppd->port,
+				 "I2C chain %d read interface reset failed\n",
+				 target);
+		return ret;
 	}
 
-	return ret;
+	return __i2c_read(ppd, target, i2c_addr, offset, bp, len);
 }
 
+/*
+ * Write page n, offset m of QSFP memory as defined by SFF 8636
+ * by writing @addr = ((256 * n) + m)
+ *
+ * Caller must hold the i2c chain resource.
+ */
 int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
 	       int len)
 {
@@ -195,50 +192,81 @@
 	int ret;
 	u8 page;
 
-	ret = mutex_lock_interruptible(&ppd->dd->qsfp_i2c_mutex);
-	if (ret)
+	if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__))
+		return -EACCES;
+
+	/* make sure the TWSI bus is in a sane state */
+	ret = hfi1_twsi_reset(ppd->dd, target);
+	if (ret) {
+		hfi1_dev_porterr(ppd->dd, ppd->port,
+				 "QSFP chain %d write interface reset failed\n",
+				 target);
 		return ret;
+	}
 
 	while (count < len) {
 		/*
-		 * Set the qsfp page based on a zero-based addresss
+		 * Set the qsfp page based on a zero-based address
 		 * and a page size of QSFP_PAGESIZE bytes.
 		 */
 		page = (u8)(addr / QSFP_PAGESIZE);
 
-		ret = __i2c_write(ppd, target, QSFP_DEV,
-					QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
+		ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
+				  QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
 		if (ret != 1) {
-			hfi1_dev_porterr(
-			ppd->dd,
-			ppd->port,
-			"can't write QSFP_PAGE_SELECT_BYTE: %d\n", ret);
+			hfi1_dev_porterr(ppd->dd, ppd->port,
+					 "QSFP chain %d can't write QSFP_PAGE_SELECT_BYTE: %d\n",
+					 target, ret);
 			ret = -EIO;
 			break;
 		}
 
-		/* truncate write to end of page if crossing page boundary */
 		offset = addr % QSFP_PAGESIZE;
 		nwrite = len - count;
-		if ((offset + nwrite) > QSFP_PAGESIZE)
-			nwrite = QSFP_PAGESIZE - offset;
+		/* truncate write to boundary if crossing boundary */
+		if (((addr % QSFP_RW_BOUNDARY) + nwrite) > QSFP_RW_BOUNDARY)
+			nwrite = QSFP_RW_BOUNDARY - (addr % QSFP_RW_BOUNDARY);
 
-		ret = __i2c_write(ppd, target, QSFP_DEV, offset, bp + count,
-					nwrite);
-		if (ret <= 0)	/* stop on error or nothing read */
+		ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
+				  offset, bp + count, nwrite);
+		if (ret <= 0)	/* stop on error or nothing written */
 			break;
 
 		count += ret;
 		addr += ret;
 	}
 
-	mutex_unlock(&ppd->dd->qsfp_i2c_mutex);
-
 	if (ret < 0)
 		return ret;
 	return count;
 }
 
+/*
+ * Perform a stand-alone single QSFP write.  Acquire the resource, do the
+ * read, then release the resource.
+ */
+int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
+		   int len)
+{
+	struct hfi1_devdata *dd = ppd->dd;
+	u32 resource = qsfp_resource(dd);
+	int ret;
+
+	ret = acquire_chip_resource(dd, resource, QSFP_WAIT);
+	if (ret)
+		return ret;
+	ret = qsfp_write(ppd, target, addr, bp, len);
+	release_chip_resource(dd, resource);
+
+	return ret;
+}
+
+/*
+ * Access page n, offset m of QSFP memory as defined by SFF 8636
+ * by reading @addr = ((256 * n) + m)
+ *
+ * Caller must hold the i2c chain resource.
+ */
 int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
 	      int len)
 {
@@ -248,9 +276,17 @@
 	int ret;
 	u8 page;
 
-	ret = mutex_lock_interruptible(&ppd->dd->qsfp_i2c_mutex);
-	if (ret)
+	if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__))
+		return -EACCES;
+
+	/* make sure the TWSI bus is in a sane state */
+	ret = hfi1_twsi_reset(ppd->dd, target);
+	if (ret) {
+		hfi1_dev_porterr(ppd->dd, ppd->port,
+				 "QSFP chain %d read interface reset failed\n",
+				 target);
 		return ret;
+	}
 
 	while (count < len) {
 		/*
@@ -258,25 +294,26 @@
 		 * and a page size of QSFP_PAGESIZE bytes.
 		 */
 		page = (u8)(addr / QSFP_PAGESIZE);
-		ret = __i2c_write(ppd, target, QSFP_DEV,
-					QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
+		ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
+				  QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
 		if (ret != 1) {
-			hfi1_dev_porterr(
-			ppd->dd,
-			ppd->port,
-			"can't write QSFP_PAGE_SELECT_BYTE: %d\n", ret);
+			hfi1_dev_porterr(ppd->dd, ppd->port,
+					 "QSFP chain %d can't write QSFP_PAGE_SELECT_BYTE: %d\n",
+					 target, ret);
 			ret = -EIO;
 			break;
 		}
 
-		/* truncate read to end of page if crossing page boundary */
 		offset = addr % QSFP_PAGESIZE;
 		nread = len - count;
-		if ((offset + nread) > QSFP_PAGESIZE)
-			nread = QSFP_PAGESIZE - offset;
+		/* truncate read to boundary if crossing boundary */
+		if (((addr % QSFP_RW_BOUNDARY) + nread) > QSFP_RW_BOUNDARY)
+			nread = QSFP_RW_BOUNDARY - (addr % QSFP_RW_BOUNDARY);
 
-		ret = __i2c_read(ppd, target, QSFP_DEV, offset, bp + count,
-					nread);
+		/* QSFPs require a 5-10msec delay after write operations */
+		mdelay(5);
+		ret = __i2c_read(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
+				 offset, bp + count, nread);
 		if (ret <= 0)	/* stop on error or nothing read */
 			break;
 
@@ -284,17 +321,40 @@
 		addr += ret;
 	}
 
-	mutex_unlock(&ppd->dd->qsfp_i2c_mutex);
-
 	if (ret < 0)
 		return ret;
 	return count;
 }
 
 /*
+ * Perform a stand-alone single QSFP read.  Acquire the resource, do the
+ * read, then release the resource.
+ */
+int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
+		  int len)
+{
+	struct hfi1_devdata *dd = ppd->dd;
+	u32 resource = qsfp_resource(dd);
+	int ret;
+
+	ret = acquire_chip_resource(dd, resource, QSFP_WAIT);
+	if (ret)
+		return ret;
+	ret = qsfp_read(ppd, target, addr, bp, len);
+	release_chip_resource(dd, resource);
+
+	return ret;
+}
+
+/*
  * This function caches the QSFP memory range in 128 byte chunks.
  * As an example, the next byte after address 255 is byte 128 from
  * upper page 01H (if existing) rather than byte 0 from lower page 00H.
+ * Access page n, offset m of QSFP memory as defined by SFF 8636
+ * in the cache by reading byte ((128 * n) + m)
+ * The calls to qsfp_{read,write} in this function correctly handle the
+ * address map difference between this mapping and the mapping implemented
+ * by those functions
  */
 int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp)
 {
@@ -304,79 +364,84 @@
 	u8 *cache = &cp->cache[0];
 
 	/* ensure sane contents on invalid reads, for cable swaps */
-	memset(cache, 0, (QSFP_MAX_NUM_PAGES*128));
-	dd_dev_info(ppd->dd, "%s: called\n", __func__);
+	memset(cache, 0, (QSFP_MAX_NUM_PAGES * 128));
+	spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
+	ppd->qsfp_info.cache_valid = 0;
+	spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags);
+
 	if (!qsfp_mod_present(ppd)) {
 		ret = -ENODEV;
-		goto bail;
+		goto bail_no_release;
 	}
 
-	ret = qsfp_read(ppd, target, 0, cache, 256);
-	if (ret != 256) {
+	ret = acquire_chip_resource(ppd->dd, qsfp_resource(ppd->dd), QSFP_WAIT);
+	if (ret)
+		goto bail_no_release;
+
+	ret = qsfp_read(ppd, target, 0, cache, QSFP_PAGESIZE);
+	if (ret != QSFP_PAGESIZE) {
 		dd_dev_info(ppd->dd,
-			"%s: Read of pages 00H failed, expected 256, got %d\n",
-			__func__, ret);
+			    "%s: Page 0 read failed, expected %d, got %d\n",
+			    __func__, QSFP_PAGESIZE, ret);
 		goto bail;
 	}
 
-	if (cache[0] != 0x0C && cache[0] != 0x0D)
-		goto bail;
-
 	/* Is paging enabled? */
 	if (!(cache[2] & 4)) {
-
 		/* Paging enabled, page 03 required */
 		if ((cache[195] & 0xC0) == 0xC0) {
 			/* all */
 			ret = qsfp_read(ppd, target, 384, cache + 256, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 			ret = qsfp_read(ppd, target, 640, cache + 384, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 			ret = qsfp_read(ppd, target, 896, cache + 512, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 		} else if ((cache[195] & 0x80) == 0x80) {
 			/* only page 2 and 3 */
 			ret = qsfp_read(ppd, target, 640, cache + 384, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 			ret = qsfp_read(ppd, target, 896, cache + 512, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 		} else if ((cache[195] & 0x40) == 0x40) {
 			/* only page 1 and 3 */
 			ret = qsfp_read(ppd, target, 384, cache + 256, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 			ret = qsfp_read(ppd, target, 896, cache + 512, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 		} else {
 			/* only page 3 */
 			ret = qsfp_read(ppd, target, 896, cache + 512, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 		}
 	}
 
+	release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
+
 	spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
 	ppd->qsfp_info.cache_valid = 1;
 	ppd->qsfp_info.cache_refresh_required = 0;
@@ -385,7 +450,9 @@
 	return 0;
 
 bail:
-	memset(cache, 0, (QSFP_MAX_NUM_PAGES*128));
+	release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
+bail_no_release:
+	memset(cache, 0, (QSFP_MAX_NUM_PAGES * 128));
 	return ret;
 }
 
@@ -434,7 +501,7 @@
 
 	if (port_num > dd->num_pports || port_num < 1) {
 		dd_dev_info(dd, "%s: Invalid port number %d\n",
-				__func__, port_num);
+			    __func__, port_num);
 		ret = -EINVAL;
 		goto set_zeroes;
 	}
@@ -485,7 +552,6 @@
 	lenstr[1] = '\0';
 
 	if (ppd->qsfp_info.cache_valid) {
-
 		if (QSFP_IS_CU(cache[QSFP_MOD_TECH_OFFS]))
 			sprintf(lenstr, "%dM ", cache[QSFP_MOD_LEN_OFFS]);
 
@@ -529,7 +595,7 @@
 
 			memcpy(bin_buff, &cache[bidx], QSFP_DUMP_CHUNK);
 			for (iidx = 0; iidx < QSFP_DUMP_CHUNK; ++iidx) {
-				sofar += scnprintf(buf + sofar, len-sofar,
+				sofar += scnprintf(buf + sofar, len - sofar,
 					" %02X", bin_buff[iidx]);
 			}
 			sofar += scnprintf(buf + sofar, len - sofar, "\n");

diff --git a/drivers/staging/rdma/hfi1/qsfp.h b/drivers/staging/rdma/hfi1/qsfp.h
index d30c2a6..831fe4c 100644
--- a/drivers/staging/rdma/hfi1/qsfp.h
+++ b/drivers/staging/rdma/hfi1/qsfp.h

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -59,23 +56,28 @@
  * Below are masks for QSFP pins.  Pins are the same for HFI0 and HFI1.
  * _N means asserted low
  */
-#define QSFP_HFI0_I2CCLK    (1 << 0)
-#define QSFP_HFI0_I2CDAT    (1 << 1)
-#define QSFP_HFI0_RESET_N   (1 << 2)
-#define QSFP_HFI0_INT_N	    (1 << 3)
-#define QSFP_HFI0_MODPRST_N (1 << 4)
+#define QSFP_HFI0_I2CCLK    BIT(0)
+#define QSFP_HFI0_I2CDAT    BIT(1)
+#define QSFP_HFI0_RESET_N   BIT(2)
+#define QSFP_HFI0_INT_N	    BIT(3)
+#define QSFP_HFI0_MODPRST_N BIT(4)
 
 /* QSFP is paged at 256 bytes */
 #define QSFP_PAGESIZE 256
+/* Reads/writes cannot cross 128 byte boundaries */
+#define QSFP_RW_BOUNDARY 128
+
+/* number of bytes in i2c offset for QSFP devices */
+#define __QSFP_OFFSET_SIZE 1                           /* num address bytes */
+#define QSFP_OFFSET_SIZE (__QSFP_OFFSET_SIZE << 8)     /* shifted value */
 
 /* Defined fields that Intel requires of qualified cables */
 /* Byte 0 is Identifier, not checked */
 /* Byte 1 is reserved "status MSB" */
-/* Byte 2 is "status LSB" We only care that D2 "Flat Mem" is set. */
-/*
- * Rest of first 128 not used, although 127 is reserved for page select
- * if module is not "Flat memory".
- */
+#define QSFP_TX_CTRL_BYTE_OFFS 86
+#define QSFP_PWR_CTRL_BYTE_OFFS 93
+#define QSFP_CDR_CTRL_BYTE_OFFS 98
+
 #define QSFP_PAGE_SELECT_BYTE_OFFS 127
 /* Byte 128 is Identifier: must be 0x0c for QSFP, or 0x0d for QSFP+ */
 #define QSFP_MOD_ID_OFFS 128
@@ -87,7 +89,8 @@
 /* Byte 130 is Connector type. Not Intel req'd */
 /* Bytes 131..138 are Transceiver types, bit maps for various tech, none IB */
 /* Byte 139 is encoding. code 0x01 is 8b10b. Not Intel req'd */
-/* byte 140 is nominal bit-rate, in units of 100Mbits/sec Not Intel req'd */
+/* byte 140 is nominal bit-rate, in units of 100Mbits/sec */
+#define QSFP_NOM_BIT_RATE_100_OFFS 140
 /* Byte 141 is Extended Rate Select. Not Intel req'd */
 /* Bytes 142..145 are lengths for various fiber types. Not Intel req'd */
 /* Byte 146 is length for Copper. Units of 1 meter */
@@ -135,11 +138,18 @@
  */
 #define QSFP_ATTEN_OFFS 186
 #define QSFP_ATTEN_LEN 2
-/* Bytes 188,189 are Wavelength tolerance, not Intel req'd */
+/*
+ * Bytes 188,189 are Wavelength tolerance, if optical
+ * If copper, they are attenuation in dB:
+ * Byte 188 is at 12.5 Gb/s, Byte 189 at 25 Gb/s
+ */
+#define QSFP_CU_ATTEN_7G_OFFS 188
+#define QSFP_CU_ATTEN_12G_OFFS 189
 /* Byte 190 is Max Case Temp. Not Intel req'd */
 /* Byte 191 is LSB of sum of bytes 128..190. Not Intel req'd */
 #define QSFP_CC_OFFS 191
-/* Bytes 192..195 are Options implemented in qsfp. Not Intel req'd */
+#define QSFP_EQ_INFO_OFFS 193
+#define QSFP_CDR_INFO_OFFS 194
 /* Bytes 196..211 are Serial Number, String */
 #define QSFP_SN_OFFS 196
 #define QSFP_SN_LEN 16
@@ -150,6 +160,8 @@
 #define QSFP_LOT_OFFS 218
 #define QSFP_LOT_LEN 2
 /* Bytes 220, 221 indicate monitoring options, Not Intel req'd */
+/* Byte 222 indicates nominal bitrate in units of 250Mbits/sec */
+#define QSFP_NOM_BIT_RATE_250_OFFS 222
 /* Byte 223 is LSB of sum of bytes 192..222 */
 #define QSFP_CC_EXT_OFFS 223
 
@@ -191,6 +203,7 @@
  */
 
 #define QSFP_PWR(pbyte) (((pbyte) >> 6) & 3)
+#define QSFP_HIGH_PWR(pbyte) (((pbyte) & 3) | 4)
 #define QSFP_ATTEN_SDR(attenarray) (attenarray[0])
 #define QSFP_ATTEN_DDR(attenarray) (attenarray[1])
 
@@ -198,10 +211,12 @@
 	/* Helps to find our way */
 	struct hfi1_pportdata *ppd;
 	struct work_struct qsfp_work;
-	u8 cache[QSFP_MAX_NUM_PAGES*128];
+	u8 cache[QSFP_MAX_NUM_PAGES * 128];
+	/* protect qsfp data */
 	spinlock_t qsfp_lock;
 	u8 check_interrupt_flags;
-	u8 qsfp_interrupt_functional;
+	u8 reset_needed;
+	u8 limiting_active;
 	u8 cache_valid;
 	u8 cache_refresh_required;
 };
@@ -220,3 +235,7 @@
 	       int len);
 int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
 	      int len);
+int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
+		   int len);
+int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
+		  int len);

diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c
index 6f4a155..0d7e101 100644
--- a/drivers/staging/rdma/hfi1/rc.c
+++ b/drivers/staging/rdma/hfi1/rc.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -49,18 +46,151 @@
  */
 
 #include <linux/io.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_qp.h>
 
 #include "hfi.h"
 #include "qp.h"
-#include "sdma.h"
+#include "verbs_txreq.h"
 #include "trace.h"
 
 /* cut down ridiculously long IB macro names */
 #define OP(x) IB_OPCODE_RC_##x
 
-static void rc_timeout(unsigned long arg);
+/**
+ * hfi1_add_retry_timer - add/start a retry timer
+ * @qp - the QP
+ *
+ * add a retry timer on the QP
+ */
+static inline void hfi1_add_retry_timer(struct rvt_qp *qp)
+{
+	struct ib_qp *ibqp = &qp->ibqp;
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
 
-static u32 restart_sge(struct hfi1_sge_state *ss, struct hfi1_swqe *wqe,
+	qp->s_flags |= RVT_S_TIMER;
+	/* 4.096 usec. * (1 << qp->timeout) */
+	qp->s_timer.expires = jiffies + qp->timeout_jiffies +
+			      rdi->busy_jiffies;
+	add_timer(&qp->s_timer);
+}
+
+/**
+ * hfi1_add_rnr_timer - add/start an rnr timer
+ * @qp - the QP
+ * @to - timeout in usecs
+ *
+ * add an rnr timer on the QP
+ */
+void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	qp->s_flags |= RVT_S_WAIT_RNR;
+	qp->s_timer.expires = jiffies + usecs_to_jiffies(to);
+	add_timer(&priv->s_rnr_timer);
+}
+
+/**
+ * hfi1_mod_retry_timer - mod a retry timer
+ * @qp - the QP
+ *
+ * Modify a potentially already running retry
+ * timer
+ */
+static inline void hfi1_mod_retry_timer(struct rvt_qp *qp)
+{
+	struct ib_qp *ibqp = &qp->ibqp;
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+	qp->s_flags |= RVT_S_TIMER;
+	/* 4.096 usec. * (1 << qp->timeout) */
+	mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies +
+		  rdi->busy_jiffies);
+}
+
+/**
+ * hfi1_stop_retry_timer - stop a retry timer
+ * @qp - the QP
+ *
+ * stop a retry timer and return if the timer
+ * had been pending.
+ */
+static inline int hfi1_stop_retry_timer(struct rvt_qp *qp)
+{
+	int rval = 0;
+
+	/* Remove QP from retry */
+	if (qp->s_flags & RVT_S_TIMER) {
+		qp->s_flags &= ~RVT_S_TIMER;
+		rval = del_timer(&qp->s_timer);
+	}
+	return rval;
+}
+
+/**
+ * hfi1_stop_rc_timers - stop all timers
+ * @qp - the QP
+ *
+ * stop any pending timers
+ */
+void hfi1_stop_rc_timers(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	/* Remove QP from all timers */
+	if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
+		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
+		del_timer(&qp->s_timer);
+		del_timer(&priv->s_rnr_timer);
+	}
+}
+
+/**
+ * hfi1_stop_rnr_timer - stop an rnr timer
+ * @qp - the QP
+ *
+ * stop an rnr timer and return if the timer
+ * had been pending.
+ */
+static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp)
+{
+	int rval = 0;
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	/* Remove QP from rnr timer */
+	if (qp->s_flags & RVT_S_WAIT_RNR) {
+		qp->s_flags &= ~RVT_S_WAIT_RNR;
+		rval = del_timer(&priv->s_rnr_timer);
+	}
+	return rval;
+}
+
+/**
+ * hfi1_del_timers_sync - wait for any timeout routines to exit
+ * @qp - the QP
+ */
+void hfi1_del_timers_sync(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	del_timer_sync(&qp->s_timer);
+	del_timer_sync(&priv->s_rnr_timer);
+}
+
+/* only opcode mask for adaptive pio */
+const u32 rc_only_opcode =
+	BIT(OP(SEND_ONLY) & 0x1f) |
+	BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) |
+	BIT(OP(RDMA_WRITE_ONLY & 0x1f)) |
+	BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)) |
+	BIT(OP(RDMA_READ_REQUEST & 0x1f)) |
+	BIT(OP(ACKNOWLEDGE & 0x1f)) |
+	BIT(OP(ATOMIC_ACKNOWLEDGE & 0x1f)) |
+	BIT(OP(COMPARE_SWAP & 0x1f)) |
+	BIT(OP(FETCH_ADD & 0x1f));
+
+static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
 		       u32 psn, u32 pmtu)
 {
 	u32 len;
@@ -74,38 +204,32 @@
 	return wqe->length - len;
 }
 
-static void start_timer(struct hfi1_qp *qp)
-{
-	qp->s_flags |= HFI1_S_TIMER;
-	qp->s_timer.function = rc_timeout;
-	/* 4.096 usec. * (1 << qp->timeout) */
-	qp->s_timer.expires = jiffies + qp->timeout_jiffies;
-	add_timer(&qp->s_timer);
-}
-
 /**
  * make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
  * @dev: the device for this QP
  * @qp: a pointer to the QP
  * @ohdr: a pointer to the IB header being constructed
- * @pmtu: the path MTU
+ * @ps: the xmit packet state
  *
  * Return 1 if constructed; otherwise, return 0.
  * Note that we are in the responder's side of the QP context.
  * Note the QP s_lock must be held.
  */
-static int make_rc_ack(struct hfi1_ibdev *dev, struct hfi1_qp *qp,
-		       struct hfi1_other_headers *ohdr, u32 pmtu)
+static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
+		       struct hfi1_other_headers *ohdr,
+		       struct hfi1_pkt_state *ps)
 {
-	struct hfi1_ack_entry *e;
+	struct rvt_ack_entry *e;
 	u32 hwords;
 	u32 len;
 	u32 bth0;
 	u32 bth2;
 	int middle = 0;
+	u32 pmtu = qp->pmtu;
+	struct hfi1_qp_priv *priv = qp->priv;
 
 	/* Don't send an ACK if we aren't supposed to. */
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 		goto bail;
 
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -116,7 +240,7 @@
 	case OP(RDMA_READ_RESPONSE_ONLY):
 		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
 		if (e->rdma_sge.mr) {
-			hfi1_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
 		/* FALLTHROUGH */
@@ -133,7 +257,7 @@
 	case OP(ACKNOWLEDGE):
 		/* Check for no next entry in the queue. */
 		if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
-			if (qp->s_flags & HFI1_S_ACK_PENDING)
+			if (qp->s_flags & RVT_S_ACK_PENDING)
 				goto normal;
 			goto bail;
 		}
@@ -152,9 +276,9 @@
 				goto bail;
 			}
 			/* Copy SGE state in case we need to resend */
-			qp->s_rdma_mr = e->rdma_sge.mr;
-			if (qp->s_rdma_mr)
-				hfi1_get_mr(qp->s_rdma_mr);
+			ps->s_txreq->mr = e->rdma_sge.mr;
+			if (ps->s_txreq->mr)
+				rvt_get_mr(ps->s_txreq->mr);
 			qp->s_ack_rdma_sge.sge = e->rdma_sge;
 			qp->s_ack_rdma_sge.num_sge = 1;
 			qp->s_cur_sge = &qp->s_ack_rdma_sge;
@@ -191,9 +315,9 @@
 		/* FALLTHROUGH */
 	case OP(RDMA_READ_RESPONSE_MIDDLE):
 		qp->s_cur_sge = &qp->s_ack_rdma_sge;
-		qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
-		if (qp->s_rdma_mr)
-			hfi1_get_mr(qp->s_rdma_mr);
+		ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
+		if (ps->s_txreq->mr)
+			rvt_get_mr(ps->s_txreq->mr);
 		len = qp->s_ack_rdma_sge.sge.sge_length;
 		if (len > pmtu) {
 			len = pmtu;
@@ -218,7 +342,7 @@
 		 * (see above).
 		 */
 		qp->s_ack_state = OP(SEND_ONLY);
-		qp->s_flags &= ~HFI1_S_ACK_PENDING;
+		qp->s_flags &= ~RVT_S_ACK_PENDING;
 		qp->s_cur_sge = NULL;
 		if (qp->s_nak_state)
 			ohdr->u.aeth =
@@ -234,20 +358,23 @@
 	}
 	qp->s_rdma_ack_cnt++;
 	qp->s_hdrwords = hwords;
+	ps->s_txreq->sde = priv->s_sde;
 	qp->s_cur_size = len;
-	hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle);
+	hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps);
+	/* pbc */
+	ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
 	return 1;
 
 bail:
 	qp->s_ack_state = OP(ACKNOWLEDGE);
 	/*
 	 * Ensure s_rdma_ack_cnt changes are committed prior to resetting
-	 * HFI1_S_RESP_PENDING
+	 * RVT_S_RESP_PENDING
 	 */
 	smp_wmb();
-	qp->s_flags &= ~(HFI1_S_RESP_PENDING
-				| HFI1_S_ACK_PENDING
-				| HFI1_S_AHG_VALID);
+	qp->s_flags &= ~(RVT_S_RESP_PENDING
+				| RVT_S_ACK_PENDING
+				| RVT_S_AHG_VALID);
 	return 0;
 }
 
@@ -255,14 +382,17 @@
  * hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
  * @qp: a pointer to the QP
  *
+ * Assumes s_lock is held.
+ *
  * Return 1 if constructed; otherwise, return 0.
  */
-int hfi1_make_rc_req(struct hfi1_qp *qp)
+int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
 	struct hfi1_other_headers *ohdr;
-	struct hfi1_sge_state *ss;
-	struct hfi1_swqe *wqe;
+	struct rvt_sge_state *ss;
+	struct rvt_swqe *wqe;
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
 	u32 hwords = 5;
 	u32 len;
@@ -270,51 +400,48 @@
 	u32 bth2;
 	u32 pmtu = qp->pmtu;
 	char newreq;
-	unsigned long flags;
-	int ret = 0;
 	int middle = 0;
 	int delta;
 
-	ohdr = &qp->s_hdr->ibh.u.oth;
-	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-		ohdr = &qp->s_hdr->ibh.u.l.oth;
+	ps->s_txreq = get_txreq(ps->dev, qp);
+	if (IS_ERR(ps->s_txreq))
+		goto bail_no_tx;
 
-	/*
-	 * The lock is needed to synchronize between the sending tasklet,
-	 * the receive interrupt handler, and timeout re-sends.
-	 */
-	spin_lock_irqsave(&qp->s_lock, flags);
+	ohdr = &ps->s_txreq->phdr.hdr.u.oth;
+	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+		ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
 
 	/* Sending responses has higher priority over sending requests. */
-	if ((qp->s_flags & HFI1_S_RESP_PENDING) &&
-	    make_rc_ack(dev, qp, ohdr, pmtu))
-		goto done;
+	if ((qp->s_flags & RVT_S_RESP_PENDING) &&
+	    make_rc_ack(dev, qp, ohdr, ps))
+		return 1;
 
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_SEND_OK)) {
-		if (!(ib_hfi1_state_ops[qp->state] & HFI1_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
+		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 			goto bail;
 		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send() */
+		if (qp->s_last == ACCESS_ONCE(qp->s_head))
 			goto bail;
 		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_iowait.sdma_busy)) {
-			qp->s_flags |= HFI1_S_WAIT_DMA;
+		if (iowait_sdma_pending(&priv->s_iowait)) {
+			qp->s_flags |= RVT_S_WAIT_DMA;
 			goto bail;
 		}
 		clear_ahg(qp);
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		hfi1_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
 			IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
 		/* will get called again */
-		goto done;
+		goto done_free_tx;
 	}
 
-	if (qp->s_flags & (HFI1_S_WAIT_RNR | HFI1_S_WAIT_ACK))
+	if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK))
 		goto bail;
 
 	if (cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) {
 		if (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
-			qp->s_flags |= HFI1_S_WAIT_PSN;
+			qp->s_flags |= RVT_S_WAIT_PSN;
 			goto bail;
 		}
 		qp->s_sending_psn = qp->s_psn;
@@ -322,10 +449,10 @@
 	}
 
 	/* Send a request. */
-	wqe = get_swqe_ptr(qp, qp->s_cur);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 	switch (qp->s_state) {
 	default:
-		if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_NEXT_SEND_OK))
+		if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
 			goto bail;
 		/*
 		 * Resend an old request or start a new one.
@@ -347,11 +474,11 @@
 			 */
 			if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
 			    qp->s_num_rd_atomic) {
-				qp->s_flags |= HFI1_S_WAIT_FENCE;
+				qp->s_flags |= RVT_S_WAIT_FENCE;
 				goto bail;
 			}
-			wqe->psn = qp->s_next_psn;
 			newreq = 1;
+			qp->s_psn = wqe->psn;
 		}
 		/*
 		 * Note that we have to be careful not to modify the
@@ -365,21 +492,19 @@
 		case IB_WR_SEND:
 		case IB_WR_SEND_WITH_IMM:
 			/* If no credit, return. */
-			if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT) &&
+			if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
 			    cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
-				qp->s_flags |= HFI1_S_WAIT_SSN_CREDIT;
+				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
 				goto bail;
 			}
-			wqe->lpsn = wqe->psn;
 			if (len > pmtu) {
-				wqe->lpsn += (len - 1) / pmtu;
 				qp->s_state = OP(SEND_FIRST);
 				len = pmtu;
 				break;
 			}
-			if (wqe->wr.opcode == IB_WR_SEND)
+			if (wqe->wr.opcode == IB_WR_SEND) {
 				qp->s_state = OP(SEND_ONLY);
-			else {
+			} else {
 				qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
 				/* Immediate data comes after the BTH */
 				ohdr->u.imm_data = wqe->wr.ex.imm_data;
@@ -393,14 +518,14 @@
 			break;
 
 		case IB_WR_RDMA_WRITE:
-			if (newreq && !(qp->s_flags & HFI1_S_UNLIMITED_CREDIT))
+			if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 				qp->s_lsn++;
 			/* FALLTHROUGH */
 		case IB_WR_RDMA_WRITE_WITH_IMM:
 			/* If no credit, return. */
-			if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT) &&
+			if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
 			    cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
-				qp->s_flags |= HFI1_S_WAIT_SSN_CREDIT;
+				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
 				goto bail;
 			}
 			ohdr->u.rc.reth.vaddr =
@@ -409,16 +534,14 @@
 				cpu_to_be32(wqe->rdma_wr.rkey);
 			ohdr->u.rc.reth.length = cpu_to_be32(len);
 			hwords += sizeof(struct ib_reth) / sizeof(u32);
-			wqe->lpsn = wqe->psn;
 			if (len > pmtu) {
-				wqe->lpsn += (len - 1) / pmtu;
 				qp->s_state = OP(RDMA_WRITE_FIRST);
 				len = pmtu;
 				break;
 			}
-			if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+			if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
 				qp->s_state = OP(RDMA_WRITE_ONLY);
-			else {
+			} else {
 				qp->s_state =
 					OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
 				/* Immediate data comes after RETH */
@@ -440,19 +563,12 @@
 			if (newreq) {
 				if (qp->s_num_rd_atomic >=
 				    qp->s_max_rd_atomic) {
-					qp->s_flags |= HFI1_S_WAIT_RDMAR;
+					qp->s_flags |= RVT_S_WAIT_RDMAR;
 					goto bail;
 				}
 				qp->s_num_rd_atomic++;
-				if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT))
+				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 					qp->s_lsn++;
-				/*
-				 * Adjust s_next_psn to count the
-				 * expected number of responses.
-				 */
-				if (len > pmtu)
-					qp->s_next_psn += (len - 1) / pmtu;
-				wqe->lpsn = qp->s_next_psn++;
 			}
 			ohdr->u.rc.reth.vaddr =
 				cpu_to_be64(wqe->rdma_wr.remote_addr);
@@ -477,13 +593,12 @@
 			if (newreq) {
 				if (qp->s_num_rd_atomic >=
 				    qp->s_max_rd_atomic) {
-					qp->s_flags |= HFI1_S_WAIT_RDMAR;
+					qp->s_flags |= RVT_S_WAIT_RDMAR;
 					goto bail;
 				}
 				qp->s_num_rd_atomic++;
-				if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT))
+				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 					qp->s_lsn++;
-				wqe->lpsn = wqe->psn;
 			}
 			if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
 				qp->s_state = OP(COMPARE_SWAP);
@@ -526,11 +641,8 @@
 		}
 		if (wqe->wr.opcode == IB_WR_RDMA_READ)
 			qp->s_psn = wqe->lpsn + 1;
-		else {
+		else
 			qp->s_psn++;
-			if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0)
-				qp->s_next_psn = qp->s_psn;
-		}
 		break;
 
 	case OP(RDMA_READ_RESPONSE_FIRST):
@@ -550,8 +662,6 @@
 		/* FALLTHROUGH */
 	case OP(SEND_MIDDLE):
 		bth2 = mask_psn(qp->s_psn++);
-		if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0)
-			qp->s_next_psn = qp->s_psn;
 		ss = &qp->s_sge;
 		len = qp->s_len;
 		if (len > pmtu) {
@@ -559,9 +669,9 @@
 			middle = HFI1_CAP_IS_KSET(SDMA_AHG);
 			break;
 		}
-		if (wqe->wr.opcode == IB_WR_SEND)
+		if (wqe->wr.opcode == IB_WR_SEND) {
 			qp->s_state = OP(SEND_LAST);
-		else {
+		} else {
 			qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
 			/* Immediate data comes after the BTH */
 			ohdr->u.imm_data = wqe->wr.ex.imm_data;
@@ -592,8 +702,6 @@
 		/* FALLTHROUGH */
 	case OP(RDMA_WRITE_MIDDLE):
 		bth2 = mask_psn(qp->s_psn++);
-		if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0)
-			qp->s_next_psn = qp->s_psn;
 		ss = &qp->s_sge;
 		len = qp->s_len;
 		if (len > pmtu) {
@@ -601,9 +709,9 @@
 			middle = HFI1_CAP_IS_KSET(SDMA_AHG);
 			break;
 		}
-		if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+		if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
 			qp->s_state = OP(RDMA_WRITE_LAST);
-		else {
+		} else {
 			qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
 			/* Immediate data comes after the BTH */
 			ohdr->u.imm_data = wqe->wr.ex.imm_data;
@@ -648,13 +756,14 @@
 	delta = delta_psn(bth2, wqe->psn);
 	if (delta && delta % HFI1_PSN_CREDIT == 0)
 		bth2 |= IB_BTH_REQ_ACK;
-	if (qp->s_flags & HFI1_S_SEND_ONE) {
-		qp->s_flags &= ~HFI1_S_SEND_ONE;
-		qp->s_flags |= HFI1_S_WAIT_ACK;
+	if (qp->s_flags & RVT_S_SEND_ONE) {
+		qp->s_flags &= ~RVT_S_SEND_ONE;
+		qp->s_flags |= RVT_S_WAIT_ACK;
 		bth2 |= IB_BTH_REQ_ACK;
 	}
 	qp->s_len -= len;
 	qp->s_hdrwords = hwords;
+	ps->s_txreq->sde = priv->s_sde;
 	qp->s_cur_sge = ss;
 	qp->s_cur_size = len;
 	hfi1_make_ruc_header(
@@ -662,16 +771,25 @@
 		ohdr,
 		bth0 | (qp->s_state << 24),
 		bth2,
-		middle);
-done:
-	ret = 1;
-	goto unlock;
+		middle,
+		ps);
+	/* pbc */
+	ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
+	return 1;
+
+done_free_tx:
+	hfi1_put_txreq(ps->s_txreq);
+	ps->s_txreq = NULL;
+	return 1;
 
 bail:
-	qp->s_flags &= ~HFI1_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	return ret;
+	hfi1_put_txreq(ps->s_txreq);
+
+bail_no_tx:
+	ps->s_txreq = NULL;
+	qp->s_flags &= ~RVT_S_BUSY;
+	qp->s_hdrwords = 0;
+	return 0;
 }
 
 /**
@@ -682,7 +800,7 @@
  * Note that RDMA reads and atomics are handled in the
  * send side QP state and tasklet.
  */
-void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct hfi1_qp *qp,
+void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
 		      int is_fecn)
 {
 	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
@@ -700,7 +818,7 @@
 	unsigned long flags;
 
 	/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
-	if (qp->s_flags & HFI1_S_RESP_PENDING)
+	if (qp->s_flags & RVT_S_RESP_PENDING)
 		goto queue_ack;
 
 	/* Ensure s_rdma_ack_cnt changes are committed */
@@ -763,7 +881,7 @@
 		goto queue_ack;
 	}
 
-	trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr);
+	trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr);
 
 	/* write the pbc and data */
 	ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords);
@@ -771,13 +889,13 @@
 	return;
 
 queue_ack:
-	this_cpu_inc(*ibp->rc_qacks);
+	this_cpu_inc(*ibp->rvp.rc_qacks);
 	spin_lock_irqsave(&qp->s_lock, flags);
-	qp->s_flags |= HFI1_S_ACK_PENDING | HFI1_S_RESP_PENDING;
+	qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
 	qp->s_nak_state = qp->r_nak_state;
 	qp->s_ack_psn = qp->r_ack_psn;
 	if (is_fecn)
-		qp->s_flags |= HFI1_S_ECN;
+		qp->s_flags |= RVT_S_ECN;
 
 	/* Schedule the send tasklet. */
 	hfi1_schedule_send(qp);
@@ -793,10 +911,10 @@
  * for the given QP.
  * Called at interrupt level with the QP s_lock held.
  */
-static void reset_psn(struct hfi1_qp *qp, u32 psn)
+static void reset_psn(struct rvt_qp *qp, u32 psn)
 {
 	u32 n = qp->s_acked;
-	struct hfi1_swqe *wqe = get_swqe_ptr(qp, n);
+	struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
 	u32 opcode;
 
 	qp->s_cur = n;
@@ -819,7 +937,7 @@
 			n = 0;
 		if (n == qp->s_tail)
 			break;
-		wqe = get_swqe_ptr(qp, n);
+		wqe = rvt_get_swqe_ptr(qp, n);
 		diff = cmp_psn(psn, wqe->psn);
 		if (diff < 0)
 			break;
@@ -865,23 +983,23 @@
 done:
 	qp->s_psn = psn;
 	/*
-	 * Set HFI1_S_WAIT_PSN as rc_complete() may start the timer
+	 * Set RVT_S_WAIT_PSN as rc_complete() may start the timer
 	 * asynchronously before the send tasklet can get scheduled.
 	 * Doing it in hfi1_make_rc_req() is too late.
 	 */
 	if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
 	    (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
-		qp->s_flags |= HFI1_S_WAIT_PSN;
-	qp->s_flags &= ~HFI1_S_AHG_VALID;
+		qp->s_flags |= RVT_S_WAIT_PSN;
+	qp->s_flags &= ~RVT_S_AHG_VALID;
 }
 
 /*
  * Back up requester to resend the last un-ACKed request.
  * The QP r_lock and s_lock should be held and interrupts disabled.
  */
-static void restart_rc(struct hfi1_qp *qp, u32 psn, int wait)
+static void restart_rc(struct rvt_qp *qp, u32 psn, int wait)
 {
-	struct hfi1_swqe *wqe = get_swqe_ptr(qp, qp->s_acked);
+	struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	struct hfi1_ibport *ibp;
 
 	if (qp->s_retry == 0) {
@@ -890,42 +1008,44 @@
 			qp->s_retry = qp->s_retry_cnt;
 		} else if (qp->s_last == qp->s_acked) {
 			hfi1_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
-			hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+			rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 			return;
-		} else /* need to handle delayed completion */
+		} else { /* need to handle delayed completion */
 			return;
-	} else
+		}
+	} else {
 		qp->s_retry--;
+	}
 
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 	if (wqe->wr.opcode == IB_WR_RDMA_READ)
-		ibp->n_rc_resends++;
+		ibp->rvp.n_rc_resends++;
 	else
-		ibp->n_rc_resends += delta_psn(qp->s_psn, psn);
+		ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
 
-	qp->s_flags &= ~(HFI1_S_WAIT_FENCE | HFI1_S_WAIT_RDMAR |
-			 HFI1_S_WAIT_SSN_CREDIT | HFI1_S_WAIT_PSN |
-			 HFI1_S_WAIT_ACK);
+	qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR |
+			 RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN |
+			 RVT_S_WAIT_ACK);
 	if (wait)
-		qp->s_flags |= HFI1_S_SEND_ONE;
+		qp->s_flags |= RVT_S_SEND_ONE;
 	reset_psn(qp, psn);
 }
 
 /*
  * This is called from s_timer for missing responses.
  */
-static void rc_timeout(unsigned long arg)
+void hfi1_rc_timeout(unsigned long arg)
 {
-	struct hfi1_qp *qp = (struct hfi1_qp *)arg;
+	struct rvt_qp *qp = (struct rvt_qp *)arg;
 	struct hfi1_ibport *ibp;
 	unsigned long flags;
 
 	spin_lock_irqsave(&qp->r_lock, flags);
 	spin_lock(&qp->s_lock);
-	if (qp->s_flags & HFI1_S_TIMER) {
+	if (qp->s_flags & RVT_S_TIMER) {
 		ibp = to_iport(qp->ibqp.device, qp->port_num);
-		ibp->n_rc_timeouts++;
-		qp->s_flags &= ~HFI1_S_TIMER;
+		ibp->rvp.n_rc_timeouts++;
+		qp->s_flags &= ~RVT_S_TIMER;
 		del_timer(&qp->s_timer);
 		trace_hfi1_rc_timeout(qp, qp->s_last_psn + 1);
 		restart_rc(qp, qp->s_last_psn + 1, 1);
@@ -940,15 +1060,12 @@
  */
 void hfi1_rc_rnr_retry(unsigned long arg)
 {
-	struct hfi1_qp *qp = (struct hfi1_qp *)arg;
+	struct rvt_qp *qp = (struct rvt_qp *)arg;
 	unsigned long flags;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (qp->s_flags & HFI1_S_WAIT_RNR) {
-		qp->s_flags &= ~HFI1_S_WAIT_RNR;
-		del_timer(&qp->s_timer);
-		hfi1_schedule_send(qp);
-	}
+	hfi1_stop_rnr_timer(qp);
+	hfi1_schedule_send(qp);
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 }
 
@@ -956,14 +1073,14 @@
  * Set qp->s_sending_psn to the next PSN after the given one.
  * This would be psn+1 except when RDMA reads are present.
  */
-static void reset_sending_psn(struct hfi1_qp *qp, u32 psn)
+static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
 {
-	struct hfi1_swqe *wqe;
+	struct rvt_swqe *wqe;
 	u32 n = qp->s_last;
 
 	/* Find the work request corresponding to the given PSN. */
 	for (;;) {
-		wqe = get_swqe_ptr(qp, n);
+		wqe = rvt_get_swqe_ptr(qp, n);
 		if (cmp_psn(psn, wqe->lpsn) <= 0) {
 			if (wqe->wr.opcode == IB_WR_RDMA_READ)
 				qp->s_sending_psn = wqe->lpsn + 1;
@@ -981,16 +1098,16 @@
 /*
  * This should be called with the QP s_lock held and interrupts disabled.
  */
-void hfi1_rc_send_complete(struct hfi1_qp *qp, struct hfi1_ib_header *hdr)
+void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr)
 {
 	struct hfi1_other_headers *ohdr;
-	struct hfi1_swqe *wqe;
+	struct rvt_swqe *wqe;
 	struct ib_wc wc;
 	unsigned i;
 	u32 opcode;
 	u32 psn;
 
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_OR_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		return;
 
 	/* Find out where the BTH is */
@@ -1016,22 +1133,30 @@
 	 */
 	if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
 	    !(qp->s_flags &
-		(HFI1_S_TIMER | HFI1_S_WAIT_RNR | HFI1_S_WAIT_PSN)) &&
-		(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK))
-		start_timer(qp);
+		(RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
+		(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
+		hfi1_add_retry_timer(qp);
 
 	while (qp->s_last != qp->s_acked) {
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		u32 s_last;
+
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 &&
 		    cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
 			break;
+		s_last = qp->s_last;
+		if (++s_last >= qp->s_size)
+			s_last = 0;
+		qp->s_last = s_last;
+		/* see post_send() */
+		barrier();
 		for (i = 0; i < wqe->wr.num_sge; i++) {
-			struct hfi1_sge *sge = &wqe->sg_list[i];
+			struct rvt_sge *sge = &wqe->sg_list[i];
 
-			hfi1_put_mr(sge->mr);
+			rvt_put_mr(sge->mr);
 		}
 		/* Post a send completion queue entry if requested. */
-		if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) ||
+		if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 			memset(&wc, 0, sizeof(wc));
 			wc.wr_id = wqe->wr.wr_id;
@@ -1039,26 +1164,24 @@
 			wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
 			wc.byte_len = wqe->length;
 			wc.qp = &qp->ibqp;
-			hfi1_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
 		}
-		if (++qp->s_last >= qp->s_size)
-			qp->s_last = 0;
 	}
 	/*
 	 * If we were waiting for sends to complete before re-sending,
 	 * and they are now complete, restart sending.
 	 */
 	trace_hfi1_rc_sendcomplete(qp, psn);
-	if (qp->s_flags & HFI1_S_WAIT_PSN &&
+	if (qp->s_flags & RVT_S_WAIT_PSN &&
 	    cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
-		qp->s_flags &= ~HFI1_S_WAIT_PSN;
+		qp->s_flags &= ~RVT_S_WAIT_PSN;
 		qp->s_sending_psn = qp->s_psn;
 		qp->s_sending_hpsn = qp->s_psn - 1;
 		hfi1_schedule_send(qp);
 	}
 }
 
-static inline void update_last_psn(struct hfi1_qp *qp, u32 psn)
+static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
 {
 	qp->s_last_psn = psn;
 }
@@ -1068,9 +1191,9 @@
  * This is similar to hfi1_send_complete but has to check to be sure
  * that the SGEs are not being referenced if the SWQE is being resent.
  */
-static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp,
-					  struct hfi1_swqe *wqe,
-					  struct hfi1_ibport *ibp)
+static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
+					 struct rvt_swqe *wqe,
+					 struct hfi1_ibport *ibp)
 {
 	struct ib_wc wc;
 	unsigned i;
@@ -1082,13 +1205,21 @@
 	 */
 	if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 ||
 	    cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
-		for (i = 0; i < wqe->wr.num_sge; i++) {
-			struct hfi1_sge *sge = &wqe->sg_list[i];
+		u32 s_last;
 
-			hfi1_put_mr(sge->mr);
+		for (i = 0; i < wqe->wr.num_sge; i++) {
+			struct rvt_sge *sge = &wqe->sg_list[i];
+
+			rvt_put_mr(sge->mr);
 		}
+		s_last = qp->s_last;
+		if (++s_last >= qp->s_size)
+			s_last = 0;
+		qp->s_last = s_last;
+		/* see post_send() */
+		barrier();
 		/* Post a send completion queue entry if requested. */
-		if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) ||
+		if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 			memset(&wc, 0, sizeof(wc));
 			wc.wr_id = wqe->wr.wr_id;
@@ -1096,14 +1227,12 @@
 			wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
 			wc.byte_len = wqe->length;
 			wc.qp = &qp->ibqp;
-			hfi1_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
 		}
-		if (++qp->s_last >= qp->s_size)
-			qp->s_last = 0;
 	} else {
 		struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 
-		this_cpu_inc(*ibp->rc_delayed_comp);
+		this_cpu_inc(*ibp->rvp.rc_delayed_comp);
 		/*
 		 * If send progress not running attempt to progress
 		 * SDMA queue.
@@ -1131,7 +1260,7 @@
 		if (++qp->s_cur >= qp->s_size)
 			qp->s_cur = 0;
 		qp->s_acked = qp->s_cur;
-		wqe = get_swqe_ptr(qp, qp->s_cur);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 		if (qp->s_acked != qp->s_tail) {
 			qp->s_state = OP(SEND_LAST);
 			qp->s_psn = wqe->psn;
@@ -1141,7 +1270,7 @@
 			qp->s_acked = 0;
 		if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
 			qp->s_draining = 0;
-		wqe = get_swqe_ptr(qp, qp->s_acked);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	}
 	return wqe;
 }
@@ -1157,21 +1286,16 @@
  * May be called at interrupt level, with the QP s_lock held.
  * Returns 1 if OK, 0 if current operation should be aborted (NAK).
  */
-static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
+static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
 		     u64 val, struct hfi1_ctxtdata *rcd)
 {
 	struct hfi1_ibport *ibp;
 	enum ib_wc_status status;
-	struct hfi1_swqe *wqe;
+	struct rvt_swqe *wqe;
 	int ret = 0;
 	u32 ack_psn;
 	int diff;
-
-	/* Remove QP from retry timer */
-	if (qp->s_flags & (HFI1_S_TIMER | HFI1_S_WAIT_RNR)) {
-		qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_WAIT_RNR);
-		del_timer(&qp->s_timer);
-	}
+	unsigned long to;
 
 	/*
 	 * Note that NAKs implicitly ACK outstanding SEND and RDMA write
@@ -1182,7 +1306,7 @@
 	ack_psn = psn;
 	if (aeth >> 29)
 		ack_psn--;
-	wqe = get_swqe_ptr(qp, qp->s_acked);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 
 	/*
@@ -1200,7 +1324,7 @@
 		    opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
 		    diff == 0) {
 			ret = 1;
-			goto bail;
+			goto bail_stop;
 		}
 		/*
 		 * If this request is a RDMA read or atomic, and the ACK is
@@ -1217,11 +1341,11 @@
 		      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
 		     (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
 			/* Retry this request. */
-			if (!(qp->r_flags & HFI1_R_RDMAR_SEQ)) {
-				qp->r_flags |= HFI1_R_RDMAR_SEQ;
+			if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
+				qp->r_flags |= RVT_R_RDMAR_SEQ;
 				restart_rc(qp, qp->s_last_psn + 1, 0);
 				if (list_empty(&qp->rspwait)) {
-					qp->r_flags |= HFI1_R_RSP_SEND;
+					qp->r_flags |= RVT_R_RSP_SEND;
 					atomic_inc(&qp->refcount);
 					list_add_tail(&qp->rspwait,
 						      &rcd->qp_wait_list);
@@ -1231,7 +1355,7 @@
 			 * No need to process the ACK/NAK since we are
 			 * restarting an earlier request.
 			 */
-			goto bail;
+			goto bail_stop;
 		}
 		if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
 		    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
@@ -1244,14 +1368,14 @@
 		     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
 			qp->s_num_rd_atomic--;
 			/* Restart sending task if fence is complete */
-			if ((qp->s_flags & HFI1_S_WAIT_FENCE) &&
+			if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
 			    !qp->s_num_rd_atomic) {
-				qp->s_flags &= ~(HFI1_S_WAIT_FENCE |
-						 HFI1_S_WAIT_ACK);
+				qp->s_flags &= ~(RVT_S_WAIT_FENCE |
+						 RVT_S_WAIT_ACK);
 				hfi1_schedule_send(qp);
-			} else if (qp->s_flags & HFI1_S_WAIT_RDMAR) {
-				qp->s_flags &= ~(HFI1_S_WAIT_RDMAR |
-						 HFI1_S_WAIT_ACK);
+			} else if (qp->s_flags & RVT_S_WAIT_RDMAR) {
+				qp->s_flags &= ~(RVT_S_WAIT_RDMAR |
+						 RVT_S_WAIT_ACK);
 				hfi1_schedule_send(qp);
 			}
 		}
@@ -1262,40 +1386,43 @@
 
 	switch (aeth >> 29) {
 	case 0:         /* ACK */
-		this_cpu_inc(*ibp->rc_acks);
+		this_cpu_inc(*ibp->rvp.rc_acks);
 		if (qp->s_acked != qp->s_tail) {
 			/*
 			 * We are expecting more ACKs so
-			 * reset the re-transmit timer.
+			 * mod the retry timer.
 			 */
-			start_timer(qp);
+			hfi1_mod_retry_timer(qp);
 			/*
 			 * We can stop re-sending the earlier packets and
 			 * continue with the next packet the receiver wants.
 			 */
 			if (cmp_psn(qp->s_psn, psn) <= 0)
 				reset_psn(qp, psn + 1);
-		} else if (cmp_psn(qp->s_psn, psn) <= 0) {
-			qp->s_state = OP(SEND_LAST);
-			qp->s_psn = psn + 1;
+		} else {
+			/* No more acks - kill all timers */
+			hfi1_stop_rc_timers(qp);
+			if (cmp_psn(qp->s_psn, psn) <= 0) {
+				qp->s_state = OP(SEND_LAST);
+				qp->s_psn = psn + 1;
+			}
 		}
-		if (qp->s_flags & HFI1_S_WAIT_ACK) {
-			qp->s_flags &= ~HFI1_S_WAIT_ACK;
+		if (qp->s_flags & RVT_S_WAIT_ACK) {
+			qp->s_flags &= ~RVT_S_WAIT_ACK;
 			hfi1_schedule_send(qp);
 		}
 		hfi1_get_credit(qp, aeth);
 		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
 		qp->s_retry = qp->s_retry_cnt;
 		update_last_psn(qp, psn);
-		ret = 1;
-		goto bail;
+		return 1;
 
 	case 1:         /* RNR NAK */
-		ibp->n_rnr_naks++;
+		ibp->rvp.n_rnr_naks++;
 		if (qp->s_acked == qp->s_tail)
-			goto bail;
-		if (qp->s_flags & HFI1_S_WAIT_RNR)
-			goto bail;
+			goto bail_stop;
+		if (qp->s_flags & RVT_S_WAIT_RNR)
+			goto bail_stop;
 		if (qp->s_rnr_retry == 0) {
 			status = IB_WC_RNR_RETRY_EXC_ERR;
 			goto class_b;
@@ -1306,28 +1433,27 @@
 		/* The last valid PSN is the previous PSN. */
 		update_last_psn(qp, psn - 1);
 
-		ibp->n_rc_resends += delta_psn(qp->s_psn, psn);
+		ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
 
 		reset_psn(qp, psn);
 
-		qp->s_flags &= ~(HFI1_S_WAIT_SSN_CREDIT | HFI1_S_WAIT_ACK);
-		qp->s_flags |= HFI1_S_WAIT_RNR;
-		qp->s_timer.function = hfi1_rc_rnr_retry;
-		qp->s_timer.expires = jiffies + usecs_to_jiffies(
+		qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
+		hfi1_stop_rc_timers(qp);
+		to =
 			ib_hfi1_rnr_table[(aeth >> HFI1_AETH_CREDIT_SHIFT) &
-					   HFI1_AETH_CREDIT_MASK]);
-		add_timer(&qp->s_timer);
-		goto bail;
+					   HFI1_AETH_CREDIT_MASK];
+		hfi1_add_rnr_timer(qp, to);
+		return 0;
 
 	case 3:         /* NAK */
 		if (qp->s_acked == qp->s_tail)
-			goto bail;
+			goto bail_stop;
 		/* The last valid PSN is the previous PSN. */
 		update_last_psn(qp, psn - 1);
 		switch ((aeth >> HFI1_AETH_CREDIT_SHIFT) &
 			HFI1_AETH_CREDIT_MASK) {
 		case 0: /* PSN sequence error */
-			ibp->n_seq_naks++;
+			ibp->rvp.n_seq_naks++;
 			/*
 			 * Back up to the responder's expected PSN.
 			 * Note that we might get a NAK in the middle of an
@@ -1340,21 +1466,21 @@
 
 		case 1: /* Invalid Request */
 			status = IB_WC_REM_INV_REQ_ERR;
-			ibp->n_other_naks++;
+			ibp->rvp.n_other_naks++;
 			goto class_b;
 
 		case 2: /* Remote Access Error */
 			status = IB_WC_REM_ACCESS_ERR;
-			ibp->n_other_naks++;
+			ibp->rvp.n_other_naks++;
 			goto class_b;
 
 		case 3: /* Remote Operation Error */
 			status = IB_WC_REM_OP_ERR;
-			ibp->n_other_naks++;
+			ibp->rvp.n_other_naks++;
 class_b:
 			if (qp->s_last == qp->s_acked) {
 				hfi1_send_complete(qp, wqe, status);
-				hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+				rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 			}
 			break;
 
@@ -1364,15 +1490,16 @@
 		}
 		qp->s_retry = qp->s_retry_cnt;
 		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
-		goto bail;
+		goto bail_stop;
 
 	default:                /* 2: reserved */
 reserved:
 		/* Ignore reserved NAK codes. */
-		goto bail;
+		goto bail_stop;
 	}
-
-bail:
+	return ret;
+bail_stop:
+	hfi1_stop_rc_timers(qp);
 	return ret;
 }
 
@@ -1380,18 +1507,15 @@
  * We have seen an out of sequence RDMA read middle or last packet.
  * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
  */
-static void rdma_seq_err(struct hfi1_qp *qp, struct hfi1_ibport *ibp, u32 psn,
+static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
 			 struct hfi1_ctxtdata *rcd)
 {
-	struct hfi1_swqe *wqe;
+	struct rvt_swqe *wqe;
 
 	/* Remove QP from retry timer */
-	if (qp->s_flags & (HFI1_S_TIMER | HFI1_S_WAIT_RNR)) {
-		qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_WAIT_RNR);
-		del_timer(&qp->s_timer);
-	}
+	hfi1_stop_rc_timers(qp);
 
-	wqe = get_swqe_ptr(qp, qp->s_acked);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 
 	while (cmp_psn(psn, wqe->lpsn) > 0) {
 		if (wqe->wr.opcode == IB_WR_RDMA_READ ||
@@ -1401,11 +1525,11 @@
 		wqe = do_rc_completion(qp, wqe, ibp);
 	}
 
-	ibp->n_rdma_seq++;
-	qp->r_flags |= HFI1_R_RDMAR_SEQ;
+	ibp->rvp.n_rdma_seq++;
+	qp->r_flags |= RVT_R_RDMAR_SEQ;
 	restart_rc(qp, qp->s_last_psn + 1, 0);
 	if (list_empty(&qp->rspwait)) {
-		qp->r_flags |= HFI1_R_RSP_SEND;
+		qp->r_flags |= RVT_R_RSP_SEND;
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
@@ -1429,11 +1553,11 @@
  */
 static void rc_rcv_resp(struct hfi1_ibport *ibp,
 			struct hfi1_other_headers *ohdr,
-			void *data, u32 tlen, struct hfi1_qp *qp,
+			void *data, u32 tlen, struct rvt_qp *qp,
 			u32 opcode, u32 psn, u32 hdrsize, u32 pmtu,
 			struct hfi1_ctxtdata *rcd)
 {
-	struct hfi1_swqe *wqe;
+	struct rvt_swqe *wqe;
 	enum ib_wc_status status;
 	unsigned long flags;
 	int diff;
@@ -1446,7 +1570,8 @@
 	trace_hfi1_rc_ack(qp, psn);
 
 	/* Ignore invalid responses. */
-	if (cmp_psn(psn, qp->s_next_psn) >= 0)
+	smp_read_barrier_depends(); /* see post_one_send */
+	if (cmp_psn(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
 		goto ack_done;
 
 	/* Ignore duplicate responses. */
@@ -1465,15 +1590,15 @@
 	 * Skip everything other than the PSN we expect, if we are waiting
 	 * for a reply to a restarted RDMA read or atomic op.
 	 */
-	if (qp->r_flags & HFI1_R_RDMAR_SEQ) {
+	if (qp->r_flags & RVT_R_RDMAR_SEQ) {
 		if (cmp_psn(psn, qp->s_last_psn + 1) != 0)
 			goto ack_done;
-		qp->r_flags &= ~HFI1_R_RDMAR_SEQ;
+		qp->r_flags &= ~RVT_R_RDMAR_SEQ;
 	}
 
 	if (unlikely(qp->s_acked == qp->s_tail))
 		goto ack_done;
-	wqe = get_swqe_ptr(qp, qp->s_acked);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	status = IB_WC_SUCCESS;
 
 	switch (opcode) {
@@ -1484,14 +1609,15 @@
 		if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
 			__be32 *p = ohdr->u.at.atomic_ack_eth;
 
-			val = ((u64) be32_to_cpu(p[0]) << 32) |
+			val = ((u64)be32_to_cpu(p[0]) << 32) |
 				be32_to_cpu(p[1]);
-		} else
+		} else {
 			val = 0;
+		}
 		if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
 		    opcode != OP(RDMA_READ_RESPONSE_FIRST))
 			goto ack_done;
-		wqe = get_swqe_ptr(qp, qp->s_acked);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
 			goto ack_op_err;
 		/*
@@ -1519,10 +1645,10 @@
 		 * We got a response so update the timeout.
 		 * 4.096 usec. * (1 << qp->timeout)
 		 */
-		qp->s_flags |= HFI1_S_TIMER;
+		qp->s_flags |= RVT_S_TIMER;
 		mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
-		if (qp->s_flags & HFI1_S_WAIT_ACK) {
-			qp->s_flags &= ~HFI1_S_WAIT_ACK;
+		if (qp->s_flags & RVT_S_WAIT_ACK) {
+			qp->s_flags &= ~RVT_S_WAIT_ACK;
 			hfi1_schedule_send(qp);
 		}
 
@@ -1536,7 +1662,7 @@
 		qp->s_rdma_read_len -= pmtu;
 		update_last_psn(qp, psn);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
-		hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0);
+		hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0, 0);
 		goto bail;
 
 	case OP(RDMA_READ_RESPONSE_ONLY):
@@ -1556,7 +1682,7 @@
 		 * have to be careful to copy the data to the right
 		 * location.
 		 */
-		wqe = get_swqe_ptr(qp, qp->s_acked);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
 						  wqe, psn, pmtu);
 		goto read_last;
@@ -1580,9 +1706,9 @@
 		if (unlikely(tlen != qp->s_rdma_read_len))
 			goto ack_len_err;
 		aeth = be32_to_cpu(ohdr->u.aeth);
-		hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0);
+		hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0, 0);
 		WARN_ON(qp->s_rdma_read_sge.num_sge);
-		(void) do_rc_ack(qp, aeth, psn,
+		(void)do_rc_ack(qp, aeth, psn,
 				 OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
 		goto ack_done;
 	}
@@ -1600,7 +1726,7 @@
 ack_err:
 	if (qp->s_last == qp->s_acked) {
 		hfi1_send_complete(qp, wqe, status);
-		hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+		rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 	}
 ack_done:
 	spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1609,22 +1735,24 @@
 }
 
 static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
-				  struct hfi1_qp *qp)
+				  struct rvt_qp *qp)
 {
 	if (list_empty(&qp->rspwait)) {
-		qp->r_flags |= HFI1_R_RSP_DEFERED_ACK;
+		qp->r_flags |= RVT_R_RSP_NAK;
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
 }
 
-static inline void rc_cancel_ack(struct hfi1_qp *qp)
+static inline void rc_cancel_ack(struct rvt_qp *qp)
 {
-	qp->r_adefered = 0;
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	priv->r_adefered = 0;
 	if (list_empty(&qp->rspwait))
 		return;
 	list_del_init(&qp->rspwait);
-	qp->r_flags &= ~HFI1_R_RSP_DEFERED_ACK;
+	qp->r_flags &= ~RVT_R_RSP_NAK;
 	if (atomic_dec_and_test(&qp->refcount))
 		wake_up(&qp->wait);
 }
@@ -1645,11 +1773,11 @@
  * schedule a response to be sent.
  */
 static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
-			struct hfi1_qp *qp, u32 opcode, u32 psn, int diff,
-			struct hfi1_ctxtdata *rcd)
+				 struct rvt_qp *qp, u32 opcode, u32 psn,
+				 int diff, struct hfi1_ctxtdata *rcd)
 {
 	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-	struct hfi1_ack_entry *e;
+	struct rvt_ack_entry *e;
 	unsigned long flags;
 	u8 i, prev;
 	int old_req;
@@ -1662,7 +1790,7 @@
 		 * Don't queue the NAK if we already sent one.
 		 */
 		if (!qp->r_nak_state) {
-			ibp->n_rc_seqnak++;
+			ibp->rvp.n_rc_seqnak++;
 			qp->r_nak_state = IB_NAK_PSN_ERROR;
 			/* Use the expected PSN. */
 			qp->r_ack_psn = qp->r_psn;
@@ -1694,7 +1822,7 @@
 	 */
 	e = NULL;
 	old_req = 1;
-	ibp->n_rc_dupreq++;
+	ibp->rvp.n_rc_dupreq++;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 
@@ -1747,7 +1875,7 @@
 		if (unlikely(offset + len != e->rdma_sge.sge_length))
 			goto unlock_done;
 		if (e->rdma_sge.mr) {
-			hfi1_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
 		if (len != 0) {
@@ -1755,8 +1883,8 @@
 			u64 vaddr = be64_to_cpu(reth->vaddr);
 			int ok;
 
-			ok = hfi1_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
-					  IB_ACCESS_REMOTE_READ);
+			ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
+					 IB_ACCESS_REMOTE_READ);
 			if (unlikely(!ok))
 				goto unlock_done;
 		} else {
@@ -1778,7 +1906,7 @@
 		 * or the send tasklet is already backed up to send an
 		 * earlier entry, we can ignore this request.
 		 */
-		if (!e || e->opcode != (u8) opcode || old_req)
+		if (!e || e->opcode != (u8)opcode || old_req)
 			goto unlock_done;
 		qp->s_tail_ack_queue = prev;
 		break;
@@ -1810,7 +1938,7 @@
 		break;
 	}
 	qp->s_ack_state = OP(ACKNOWLEDGE);
-	qp->s_flags |= HFI1_S_RESP_PENDING;
+	qp->s_flags |= RVT_S_RESP_PENDING;
 	qp->r_nak_state = 0;
 	hfi1_schedule_send(qp);
 
@@ -1823,13 +1951,13 @@
 	return 0;
 }
 
-void hfi1_rc_error(struct hfi1_qp *qp, enum ib_wc_status err)
+void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
 {
 	unsigned long flags;
 	int lastwqe;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	lastwqe = hfi1_error_qp(qp, err);
+	lastwqe = rvt_error_qp(qp, err);
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 
 	if (lastwqe) {
@@ -1842,7 +1970,7 @@
 	}
 }
 
-static inline void update_ack_queue(struct hfi1_qp *qp, unsigned n)
+static inline void update_ack_queue(struct rvt_qp *qp, unsigned n)
 {
 	unsigned next;
 
@@ -1864,14 +1992,14 @@
 
 	spin_lock_irqsave(&ppd->cc_log_lock, flags);
 
-	ppd->threshold_cong_event_map[sl/8] |= 1 << (sl % 8);
+	ppd->threshold_cong_event_map[sl / 8] |= 1 << (sl % 8);
 	ppd->threshold_event_counter++;
 
 	cc_event = &ppd->cc_events[ppd->cc_log_idx++];
 	if (ppd->cc_log_idx == OPA_CONG_LOG_ELEMS)
 		ppd->cc_log_idx = 0;
-	cc_event->lqpn = lqpn & HFI1_QPN_MASK;
-	cc_event->rqpn = rqpn & HFI1_QPN_MASK;
+	cc_event->lqpn = lqpn & RVT_QPN_MASK;
+	cc_event->rqpn = rqpn & RVT_QPN_MASK;
 	cc_event->sl = sl;
 	cc_event->svc_type = svc_type;
 	cc_event->rlid = rlid;
@@ -1897,7 +2025,7 @@
 
 	cc_state = get_cc_state(ppd);
 
-	if (cc_state == NULL)
+	if (!cc_state)
 		return;
 
 	/*
@@ -1957,7 +2085,7 @@
 	u32 rcv_flags = packet->rcv_flags;
 	void *data = packet->ebuf;
 	u32 tlen = packet->tlen;
-	struct hfi1_qp *qp = packet->qp;
+	struct rvt_qp *qp = packet->qp;
 	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	struct hfi1_other_headers *ohdr = packet->ohdr;
@@ -1972,6 +2100,7 @@
 	unsigned long flags;
 	u32 bth1;
 	int ret, is_fecn = 0;
+	int copy_last = 0;
 
 	bth0 = be32_to_cpu(ohdr->bth[0]);
 	if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0))
@@ -2054,13 +2183,13 @@
 		break;
 	}
 
-	if (qp->state == IB_QPS_RTR && !(qp->r_flags & HFI1_R_COMM_EST))
+	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
 		qp_comm_est(qp);
 
 	/* OK, process the packet. */
 	switch (opcode) {
 	case OP(SEND_FIRST):
-		ret = hfi1_get_rwqe(qp, 0);
+		ret = hfi1_rvt_get_rwqe(qp, 0);
 		if (ret < 0)
 			goto nack_op_err;
 		if (!ret)
@@ -2076,12 +2205,12 @@
 		qp->r_rcv_len += pmtu;
 		if (unlikely(qp->r_rcv_len > qp->r_len))
 			goto nack_inv;
-		hfi1_copy_sge(&qp->r_sge, data, pmtu, 1);
+		hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0);
 		break;
 
 	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
 		/* consume RWQE */
-		ret = hfi1_get_rwqe(qp, 1);
+		ret = hfi1_rvt_get_rwqe(qp, 1);
 		if (ret < 0)
 			goto nack_op_err;
 		if (!ret)
@@ -2090,7 +2219,7 @@
 
 	case OP(SEND_ONLY):
 	case OP(SEND_ONLY_WITH_IMMEDIATE):
-		ret = hfi1_get_rwqe(qp, 0);
+		ret = hfi1_rvt_get_rwqe(qp, 0);
 		if (ret < 0)
 			goto nack_op_err;
 		if (!ret)
@@ -2104,8 +2233,10 @@
 		wc.ex.imm_data = ohdr->u.imm_data;
 		wc.wc_flags = IB_WC_WITH_IMM;
 		goto send_last;
-	case OP(SEND_LAST):
 	case OP(RDMA_WRITE_LAST):
+		copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
+		/* fall through */
+	case OP(SEND_LAST):
 no_immediate_data:
 		wc.wc_flags = 0;
 		wc.ex.imm_data = 0;
@@ -2121,10 +2252,10 @@
 		wc.byte_len = tlen + qp->r_rcv_len;
 		if (unlikely(wc.byte_len > qp->r_len))
 			goto nack_inv;
-		hfi1_copy_sge(&qp->r_sge, data, tlen, 1);
-		hfi1_put_ss(&qp->r_sge);
+		hfi1_copy_sge(&qp->r_sge, data, tlen, 1, copy_last);
+		rvt_put_ss(&qp->r_sge);
 		qp->r_msn++;
-		if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags))
+		if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 			break;
 		wc.wr_id = qp->r_wr_id;
 		wc.status = IB_WC_SUCCESS;
@@ -2154,12 +2285,14 @@
 		wc.dlid_path_bits = 0;
 		wc.port_num = 0;
 		/* Signal completion event if the solicited bit is set. */
-		hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-			      (bth0 & IB_BTH_SOLICITED) != 0);
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+			     (bth0 & IB_BTH_SOLICITED) != 0);
 		break;
 
-	case OP(RDMA_WRITE_FIRST):
 	case OP(RDMA_WRITE_ONLY):
+		copy_last = 1;
+		/* fall through */
+	case OP(RDMA_WRITE_FIRST):
 	case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
 			goto nack_inv;
@@ -2174,8 +2307,8 @@
 			int ok;
 
 			/* Check rkey & NAK */
-			ok = hfi1_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
-					  rkey, IB_ACCESS_REMOTE_WRITE);
+			ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
+					 rkey, IB_ACCESS_REMOTE_WRITE);
 			if (unlikely(!ok))
 				goto nack_acc;
 			qp->r_sge.num_sge = 1;
@@ -2190,7 +2323,7 @@
 			goto send_middle;
 		else if (opcode == OP(RDMA_WRITE_ONLY))
 			goto no_immediate_data;
-		ret = hfi1_get_rwqe(qp, 1);
+		ret = hfi1_rvt_get_rwqe(qp, 1);
 		if (ret < 0)
 			goto nack_op_err;
 		if (!ret)
@@ -2200,7 +2333,7 @@
 		goto send_last;
 
 	case OP(RDMA_READ_REQUEST): {
-		struct hfi1_ack_entry *e;
+		struct rvt_ack_entry *e;
 		u32 len;
 		u8 next;
 
@@ -2218,7 +2351,7 @@
 		}
 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
 		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
-			hfi1_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
 		reth = &ohdr->u.rc.reth;
@@ -2229,8 +2362,8 @@
 			int ok;
 
 			/* Check rkey & NAK */
-			ok = hfi1_rkey_ok(qp, &e->rdma_sge, len, vaddr,
-					  rkey, IB_ACCESS_REMOTE_READ);
+			ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr,
+					 rkey, IB_ACCESS_REMOTE_READ);
 			if (unlikely(!ok))
 				goto nack_acc_unlck;
 			/*
@@ -2261,7 +2394,7 @@
 		qp->r_head_ack_queue = next;
 
 		/* Schedule the send tasklet. */
-		qp->s_flags |= HFI1_S_RESP_PENDING;
+		qp->s_flags |= RVT_S_RESP_PENDING;
 		hfi1_schedule_send(qp);
 
 		spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -2273,7 +2406,7 @@
 	case OP(COMPARE_SWAP):
 	case OP(FETCH_ADD): {
 		struct ib_atomic_eth *ateth;
-		struct hfi1_ack_entry *e;
+		struct rvt_ack_entry *e;
 		u64 vaddr;
 		atomic64_t *maddr;
 		u64 sdata;
@@ -2293,29 +2426,29 @@
 		}
 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
 		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
-			hfi1_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
 		ateth = &ohdr->u.atomic_eth;
-		vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
+		vaddr = ((u64)be32_to_cpu(ateth->vaddr[0]) << 32) |
 			be32_to_cpu(ateth->vaddr[1]);
 		if (unlikely(vaddr & (sizeof(u64) - 1)))
 			goto nack_inv_unlck;
 		rkey = be32_to_cpu(ateth->rkey);
 		/* Check rkey & NAK */
-		if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
-					   vaddr, rkey,
-					   IB_ACCESS_REMOTE_ATOMIC)))
+		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+					  vaddr, rkey,
+					  IB_ACCESS_REMOTE_ATOMIC)))
 			goto nack_acc_unlck;
 		/* Perform atomic OP and save result. */
-		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+		maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
 		sdata = be64_to_cpu(ateth->swap_data);
 		e->atomic_data = (opcode == OP(FETCH_ADD)) ?
-			(u64) atomic64_add_return(sdata, maddr) - sdata :
-			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+			(u64)atomic64_add_return(sdata, maddr) - sdata :
+			(u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
 				      be64_to_cpu(ateth->compare_data),
 				      sdata);
-		hfi1_put_mr(qp->r_sge.sge.mr);
+		rvt_put_mr(qp->r_sge.sge.mr);
 		qp->r_sge.num_sge = 0;
 		e->opcode = opcode;
 		e->sent = 0;
@@ -2328,7 +2461,7 @@
 		qp->r_head_ack_queue = next;
 
 		/* Schedule the send tasklet. */
-		qp->s_flags |= HFI1_S_RESP_PENDING;
+		qp->s_flags |= RVT_S_RESP_PENDING;
 		hfi1_schedule_send(qp);
 
 		spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -2347,11 +2480,13 @@
 	qp->r_nak_state = 0;
 	/* Send an ACK if requested or required. */
 	if (psn & IB_BTH_REQ_ACK) {
+		struct hfi1_qp_priv *priv = qp->priv;
+
 		if (packet->numpkt == 0) {
 			rc_cancel_ack(qp);
 			goto send_ack;
 		}
-		if (qp->r_adefered >= HFI1_PSN_CREDIT) {
+		if (priv->r_adefered >= HFI1_PSN_CREDIT) {
 			rc_cancel_ack(qp);
 			goto send_ack;
 		}
@@ -2359,13 +2494,13 @@
 			rc_cancel_ack(qp);
 			goto send_ack;
 		}
-		qp->r_adefered++;
+		priv->r_adefered++;
 		rc_defered_ack(rcd, qp);
 	}
 	return;
 
 rnr_nak:
-	qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
+	qp->r_nak_state = qp->r_min_rnr_timer | IB_RNR_NAK;
 	qp->r_ack_psn = qp->r_psn;
 	/* Queue RNR NAK for later */
 	rc_defered_ack(rcd, qp);
@@ -2403,7 +2538,7 @@
 	struct hfi1_ctxtdata *rcd,
 	struct hfi1_ib_header *hdr,
 	u32 rcv_flags,
-	struct hfi1_qp *qp)
+	struct rvt_qp *qp)
 {
 	int has_grh = rcv_flags & HFI1_HAS_GRH;
 	struct hfi1_other_headers *ohdr;
@@ -2428,7 +2563,7 @@
 	if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
 		diff = delta_psn(psn, qp->r_psn);
 		if (!qp->r_nak_state && diff >= 0) {
-			ibp->n_rc_seqnak++;
+			ibp->rvp.n_rc_seqnak++;
 			qp->r_nak_state = IB_NAK_PSN_ERROR;
 			/* Use the expected PSN. */
 			qp->r_ack_psn = qp->r_psn;

diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c
index 4a91975..08813cd 100644
--- a/drivers/staging/rdma/hfi1/ruc.c
+++ b/drivers/staging/rdma/hfi1/ruc.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -53,7 +50,8 @@
 #include "hfi.h"
 #include "mad.h"
 #include "qp.h"
-#include "sdma.h"
+#include "verbs_txreq.h"
+#include "trace.h"
 
 /*
  * Convert the AETH RNR timeout code into the number of microseconds.
@@ -97,16 +95,16 @@
  * Validate a RWQE and fill in the SGE state.
  * Return 1 if OK.
  */
-static int init_sge(struct hfi1_qp *qp, struct hfi1_rwqe *wqe)
+static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
 {
 	int i, j, ret;
 	struct ib_wc wc;
-	struct hfi1_lkey_table *rkt;
-	struct hfi1_pd *pd;
-	struct hfi1_sge_state *ss;
+	struct rvt_lkey_table *rkt;
+	struct rvt_pd *pd;
+	struct rvt_sge_state *ss;
 
-	rkt = &to_idev(qp->ibqp.device)->lk_table;
-	pd = to_ipd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
+	rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table;
+	pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
 	ss = &qp->r_sge;
 	ss->sg_list = qp->r_sg_list;
 	qp->r_len = 0;
@@ -114,8 +112,8 @@
 		if (wqe->sg_list[i].length == 0)
 			continue;
 		/* Check LKEY */
-		if (!hfi1_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
-				  &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+		if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
+				 &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
 			goto bad_lkey;
 		qp->r_len += wqe->sg_list[i].length;
 		j++;
@@ -127,9 +125,9 @@
 
 bad_lkey:
 	while (j) {
-		struct hfi1_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
+		struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
 
-		hfi1_put_mr(sge->mr);
+		rvt_put_mr(sge->mr);
 	}
 	ss->num_sge = 0;
 	memset(&wc, 0, sizeof(wc));
@@ -138,14 +136,14 @@
 	wc.opcode = IB_WC_RECV;
 	wc.qp = &qp->ibqp;
 	/* Signal solicited completion event. */
-	hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
 	ret = 0;
 bail:
 	return ret;
 }
 
 /**
- * hfi1_get_rwqe - copy the next RWQE into the QP's RWQE
+ * hfi1_rvt_get_rwqe - copy the next RWQE into the QP's RWQE
  * @qp: the QP
  * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
  *
@@ -154,19 +152,19 @@
  *
  * Can be called from interrupt level.
  */
-int hfi1_get_rwqe(struct hfi1_qp *qp, int wr_id_only)
+int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only)
 {
 	unsigned long flags;
-	struct hfi1_rq *rq;
-	struct hfi1_rwq *wq;
-	struct hfi1_srq *srq;
-	struct hfi1_rwqe *wqe;
+	struct rvt_rq *rq;
+	struct rvt_rwq *wq;
+	struct rvt_srq *srq;
+	struct rvt_rwqe *wqe;
 	void (*handler)(struct ib_event *, void *);
 	u32 tail;
 	int ret;
 
 	if (qp->ibqp.srq) {
-		srq = to_isrq(qp->ibqp.srq);
+		srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
 		handler = srq->ibsrq.event_handler;
 		rq = &srq->rq;
 	} else {
@@ -176,7 +174,7 @@
 	}
 
 	spin_lock_irqsave(&rq->lock, flags);
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK)) {
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
 		ret = 0;
 		goto unlock;
 	}
@@ -192,7 +190,7 @@
 	}
 	/* Make sure entry is read after head index is read. */
 	smp_rmb();
-	wqe = get_rwqe_ptr(rq, tail);
+	wqe = rvt_get_rwqe_ptr(rq, tail);
 	/*
 	 * Even though we update the tail index in memory, the verbs
 	 * consumer is not supposed to post more entries until a
@@ -208,7 +206,7 @@
 	qp->r_wr_id = wqe->wr_id;
 
 	ret = 1;
-	set_bit(HFI1_R_WRID_VALID, &qp->r_aflags);
+	set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
 	if (handler) {
 		u32 n;
 
@@ -265,7 +263,7 @@
  * The s_lock will be acquired around the hfi1_migrate_qp() call.
  */
 int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
-		       int has_grh, struct hfi1_qp *qp, u32 bth0)
+		       int has_grh, struct rvt_qp *qp, u32 bth0)
 {
 	__be64 guid;
 	unsigned long flags;
@@ -279,11 +277,13 @@
 			if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
 				goto err;
 			guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
-			if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+			if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
+				    guid))
 				goto err;
-			if (!gid_ok(&hdr->u.l.grh.sgid,
-			    qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
-			    qp->alt_ah_attr.grh.dgid.global.interface_id))
+			if (!gid_ok(
+				&hdr->u.l.grh.sgid,
+				qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
+				qp->alt_ah_attr.grh.dgid.global.interface_id))
 				goto err;
 		}
 		if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
@@ -312,11 +312,13 @@
 				goto err;
 			guid = get_sguid(ibp,
 					 qp->remote_ah_attr.grh.sgid_index);
-			if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+			if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
+				    guid))
 				goto err;
-			if (!gid_ok(&hdr->u.l.grh.sgid,
-			    qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
-			    qp->remote_ah_attr.grh.dgid.global.interface_id))
+			if (!gid_ok(
+			     &hdr->u.l.grh.sgid,
+			     qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
+			     qp->remote_ah_attr.grh.dgid.global.interface_id))
 				goto err;
 		}
 		if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
@@ -355,12 +357,12 @@
  * receive interrupts since this is a connected protocol and all packets
  * will pass through here.
  */
-static void ruc_loopback(struct hfi1_qp *sqp)
+static void ruc_loopback(struct rvt_qp *sqp)
 {
 	struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
-	struct hfi1_qp *qp;
-	struct hfi1_swqe *wqe;
-	struct hfi1_sge *sge;
+	struct rvt_qp *qp;
+	struct rvt_swqe *wqe;
+	struct rvt_sge *sge;
 	unsigned long flags;
 	struct ib_wc wc;
 	u64 sdata;
@@ -368,6 +370,8 @@
 	enum ib_wc_status send_status;
 	int release;
 	int ret;
+	int copy_last = 0;
+	u32 to;
 
 	rcu_read_lock();
 
@@ -375,25 +379,27 @@
 	 * Note that we check the responder QP state after
 	 * checking the requester's state.
 	 */
-	qp = hfi1_lookup_qpn(ibp, sqp->remote_qpn);
+	qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
+			    sqp->remote_qpn);
 
 	spin_lock_irqsave(&sqp->s_lock, flags);
 
 	/* Return if we are already busy processing a work request. */
-	if ((sqp->s_flags & (HFI1_S_BUSY | HFI1_S_ANY_WAIT)) ||
-	    !(ib_hfi1_state_ops[sqp->state] & HFI1_PROCESS_OR_FLUSH_SEND))
+	if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
+	    !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		goto unlock;
 
-	sqp->s_flags |= HFI1_S_BUSY;
+	sqp->s_flags |= RVT_S_BUSY;
 
 again:
-	if (sqp->s_last == sqp->s_head)
+	smp_read_barrier_depends(); /* see post_one_send() */
+	if (sqp->s_last == ACCESS_ONCE(sqp->s_head))
 		goto clr_busy;
-	wqe = get_swqe_ptr(sqp, sqp->s_last);
+	wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
 
 	/* Return if it is not OK to start a new work request. */
-	if (!(ib_hfi1_state_ops[sqp->state] & HFI1_PROCESS_NEXT_SEND_OK)) {
-		if (!(ib_hfi1_state_ops[sqp->state] & HFI1_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
+		if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
 			goto clr_busy;
 		/* We are in the error state, flush the work request. */
 		send_status = IB_WC_WR_FLUSH_ERR;
@@ -411,9 +417,9 @@
 	}
 	spin_unlock_irqrestore(&sqp->s_lock, flags);
 
-	if (!qp || !(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) ||
+	if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
 	    qp->ibqp.qp_type != sqp->ibqp.qp_type) {
-		ibp->n_pkt_drops++;
+		ibp->rvp.n_pkt_drops++;
 		/*
 		 * For RC, the requester would timeout and retry so
 		 * shortcut the timeouts and just signal too many retries.
@@ -439,7 +445,7 @@
 		wc.ex.imm_data = wqe->wr.ex.imm_data;
 		/* FALLTHROUGH */
 	case IB_WR_SEND:
-		ret = hfi1_get_rwqe(qp, 0);
+		ret = hfi1_rvt_get_rwqe(qp, 0);
 		if (ret < 0)
 			goto op_err;
 		if (!ret)
@@ -451,21 +457,24 @@
 			goto inv_err;
 		wc.wc_flags = IB_WC_WITH_IMM;
 		wc.ex.imm_data = wqe->wr.ex.imm_data;
-		ret = hfi1_get_rwqe(qp, 1);
+		ret = hfi1_rvt_get_rwqe(qp, 1);
 		if (ret < 0)
 			goto op_err;
 		if (!ret)
 			goto rnr_nak;
-		/* FALLTHROUGH */
+		/* skip copy_last set and qp_access_flags recheck */
+		goto do_write;
 	case IB_WR_RDMA_WRITE:
+		copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
 			goto inv_err;
+do_write:
 		if (wqe->length == 0)
 			break;
-		if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
-					   wqe->rdma_wr.remote_addr,
-					   wqe->rdma_wr.rkey,
-					   IB_ACCESS_REMOTE_WRITE)))
+		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
+					  wqe->rdma_wr.remote_addr,
+					  wqe->rdma_wr.rkey,
+					  IB_ACCESS_REMOTE_WRITE)))
 			goto acc_err;
 		qp->r_sge.sg_list = NULL;
 		qp->r_sge.num_sge = 1;
@@ -475,10 +484,10 @@
 	case IB_WR_RDMA_READ:
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
 			goto inv_err;
-		if (unlikely(!hfi1_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
-					   wqe->rdma_wr.remote_addr,
-					   wqe->rdma_wr.rkey,
-					   IB_ACCESS_REMOTE_READ)))
+		if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
+					  wqe->rdma_wr.remote_addr,
+					  wqe->rdma_wr.rkey,
+					  IB_ACCESS_REMOTE_READ)))
 			goto acc_err;
 		release = 0;
 		sqp->s_sge.sg_list = NULL;
@@ -493,20 +502,20 @@
 	case IB_WR_ATOMIC_FETCH_AND_ADD:
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
 			goto inv_err;
-		if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
-					   wqe->atomic_wr.remote_addr,
-					   wqe->atomic_wr.rkey,
-					   IB_ACCESS_REMOTE_ATOMIC)))
+		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+					  wqe->atomic_wr.remote_addr,
+					  wqe->atomic_wr.rkey,
+					  IB_ACCESS_REMOTE_ATOMIC)))
 			goto acc_err;
 		/* Perform atomic OP and save result. */
-		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+		maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
 		sdata = wqe->atomic_wr.compare_add;
-		*(u64 *) sqp->s_sge.sge.vaddr =
+		*(u64 *)sqp->s_sge.sge.vaddr =
 			(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
-			(u64) atomic64_add_return(sdata, maddr) - sdata :
-			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+			(u64)atomic64_add_return(sdata, maddr) - sdata :
+			(u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
 				      sdata, wqe->atomic_wr.swap);
-		hfi1_put_mr(qp->r_sge.sge.mr);
+		rvt_put_mr(qp->r_sge.sge.mr);
 		qp->r_sge.num_sge = 0;
 		goto send_comp;
 
@@ -524,17 +533,17 @@
 		if (len > sge->sge_length)
 			len = sge->sge_length;
 		WARN_ON_ONCE(len == 0);
-		hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release);
+		hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last);
 		sge->vaddr += len;
 		sge->length -= len;
 		sge->sge_length -= len;
 		if (sge->sge_length == 0) {
 			if (!release)
-				hfi1_put_mr(sge->mr);
+				rvt_put_mr(sge->mr);
 			if (--sqp->s_sge.num_sge)
 				*sge = *sqp->s_sge.sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= HFI1_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -547,9 +556,9 @@
 		sqp->s_len -= len;
 	}
 	if (release)
-		hfi1_put_ss(&qp->r_sge);
+		rvt_put_ss(&qp->r_sge);
 
-	if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags))
+	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 		goto send_comp;
 
 	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
@@ -565,12 +574,12 @@
 	wc.sl = qp->remote_ah_attr.sl;
 	wc.port_num = 1;
 	/* Signal completion event if the solicited bit is set. */
-	hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-		      wqe->wr.send_flags & IB_SEND_SOLICITED);
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+		     wqe->wr.send_flags & IB_SEND_SOLICITED);
 
 send_comp:
 	spin_lock_irqsave(&sqp->s_lock, flags);
-	ibp->n_loop_pkts++;
+	ibp->rvp.n_loop_pkts++;
 flush_send:
 	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
 	hfi1_send_complete(sqp, wqe, send_status);
@@ -580,7 +589,7 @@
 	/* Handle RNR NAK */
 	if (qp->ibqp.qp_type == IB_QPT_UC)
 		goto send_comp;
-	ibp->n_rnr_naks++;
+	ibp->rvp.n_rnr_naks++;
 	/*
 	 * Note: we don't need the s_lock held since the BUSY flag
 	 * makes this single threaded.
@@ -592,13 +601,10 @@
 	if (sqp->s_rnr_retry_cnt < 7)
 		sqp->s_rnr_retry--;
 	spin_lock_irqsave(&sqp->s_lock, flags);
-	if (!(ib_hfi1_state_ops[sqp->state] & HFI1_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
 		goto clr_busy;
-	sqp->s_flags |= HFI1_S_WAIT_RNR;
-	sqp->s_timer.function = hfi1_rc_rnr_retry;
-	sqp->s_timer.expires = jiffies +
-		usecs_to_jiffies(ib_hfi1_rnr_table[qp->r_min_rnr_timer]);
-	add_timer(&sqp->s_timer);
+	to = ib_hfi1_rnr_table[qp->r_min_rnr_timer];
+	hfi1_add_rnr_timer(sqp, to);
 	goto clr_busy;
 
 op_err:
@@ -622,9 +628,9 @@
 	spin_lock_irqsave(&sqp->s_lock, flags);
 	hfi1_send_complete(sqp, wqe, send_status);
 	if (sqp->ibqp.qp_type == IB_QPT_RC) {
-		int lastwqe = hfi1_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+		int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
 
-		sqp->s_flags &= ~HFI1_S_BUSY;
+		sqp->s_flags &= ~RVT_S_BUSY;
 		spin_unlock_irqrestore(&sqp->s_lock, flags);
 		if (lastwqe) {
 			struct ib_event ev;
@@ -637,7 +643,7 @@
 		goto done;
 	}
 clr_busy:
-	sqp->s_flags &= ~HFI1_S_BUSY;
+	sqp->s_flags &= ~RVT_S_BUSY;
 unlock:
 	spin_unlock_irqrestore(&sqp->s_lock, flags);
 done:
@@ -666,7 +672,7 @@
 	hdr->next_hdr = IB_GRH_NEXT_HDR;
 	hdr->hop_limit = grh->hop_limit;
 	/* The SGID is 32-bit aligned. */
-	hdr->sgid.global.subnet_prefix = ibp->gid_prefix;
+	hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
 	hdr->sgid.global.interface_id =
 		grh->sgid_index && grh->sgid_index < ARRAY_SIZE(ibp->guids) ?
 		ibp->guids[grh->sgid_index - 1] :
@@ -690,29 +696,31 @@
  * Subsequent middles use the copied entry, editing the
  * PSN with 1 or 2 edits.
  */
-static inline void build_ahg(struct hfi1_qp *qp, u32 npsn)
+static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
 {
-	if (unlikely(qp->s_flags & HFI1_S_AHG_CLEAR))
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	if (unlikely(qp->s_flags & RVT_S_AHG_CLEAR))
 		clear_ahg(qp);
-	if (!(qp->s_flags & HFI1_S_AHG_VALID)) {
+	if (!(qp->s_flags & RVT_S_AHG_VALID)) {
 		/* first middle that needs copy  */
 		if (qp->s_ahgidx < 0)
-			qp->s_ahgidx = sdma_ahg_alloc(qp->s_sde);
+			qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde);
 		if (qp->s_ahgidx >= 0) {
 			qp->s_ahgpsn = npsn;
-			qp->s_hdr->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
+			priv->s_hdr->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
 			/* save to protect a change in another thread */
-			qp->s_hdr->sde = qp->s_sde;
-			qp->s_hdr->ahgidx = qp->s_ahgidx;
-			qp->s_flags |= HFI1_S_AHG_VALID;
+			priv->s_hdr->sde = priv->s_sde;
+			priv->s_hdr->ahgidx = qp->s_ahgidx;
+			qp->s_flags |= RVT_S_AHG_VALID;
 		}
 	} else {
 		/* subsequent middle after valid */
 		if (qp->s_ahgidx >= 0) {
-			qp->s_hdr->tx_flags |= SDMA_TXREQ_F_USE_AHG;
-			qp->s_hdr->ahgidx = qp->s_ahgidx;
-			qp->s_hdr->ahgcount++;
-			qp->s_hdr->ahgdesc[0] =
+			priv->s_hdr->tx_flags |= SDMA_TXREQ_F_USE_AHG;
+			priv->s_hdr->ahgidx = qp->s_ahgidx;
+			priv->s_hdr->ahgcount++;
+			priv->s_hdr->ahgdesc[0] =
 				sdma_build_ahg_descriptor(
 					(__force u16)cpu_to_be16((u16)npsn),
 					BTH2_OFFSET,
@@ -720,8 +728,8 @@
 					16);
 			if ((npsn & 0xffff0000) !=
 					(qp->s_ahgpsn & 0xffff0000)) {
-				qp->s_hdr->ahgcount++;
-				qp->s_hdr->ahgdesc[1] =
+				priv->s_hdr->ahgcount++;
+				priv->s_hdr->ahgdesc[1] =
 					sdma_build_ahg_descriptor(
 						(__force u16)cpu_to_be16(
 							(u16)(npsn >> 16)),
@@ -733,10 +741,12 @@
 	}
 }
 
-void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
-			  u32 bth0, u32 bth2, int middle)
+void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr,
+			  u32 bth0, u32 bth2, int middle,
+			  struct hfi1_pkt_state *ps)
 {
-	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+	struct hfi1_qp_priv *priv = qp->priv;
+	struct hfi1_ibport *ibp = ps->ibp;
 	u16 lrh0;
 	u32 nwords;
 	u32 extra_bytes;
@@ -747,13 +757,14 @@
 	nwords = (qp->s_cur_size + extra_bytes) >> 2;
 	lrh0 = HFI1_LRH_BTH;
 	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
-		qp->s_hdrwords += hfi1_make_grh(ibp, &qp->s_hdr->ibh.u.l.grh,
-					       &qp->remote_ah_attr.grh,
-					       qp->s_hdrwords, nwords);
+		qp->s_hdrwords += hfi1_make_grh(ibp,
+						&ps->s_txreq->phdr.hdr.u.l.grh,
+						&qp->remote_ah_attr.grh,
+						qp->s_hdrwords, nwords);
 		lrh0 = HFI1_LRH_GRH;
 		middle = 0;
 	}
-	lrh0 |= (qp->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
+	lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
 	/*
 	 * reset s_hdr/AHG fields
 	 *
@@ -765,10 +776,10 @@
 	 * build_ahg() will modify as appropriate
 	 * to use the AHG feature.
 	 */
-	qp->s_hdr->tx_flags = 0;
-	qp->s_hdr->ahgcount = 0;
-	qp->s_hdr->ahgidx = 0;
-	qp->s_hdr->sde = NULL;
+	priv->s_hdr->tx_flags = 0;
+	priv->s_hdr->ahgcount = 0;
+	priv->s_hdr->ahgidx = 0;
+	priv->s_hdr->sde = NULL;
 	if (qp->s_mig_state == IB_MIG_MIGRATED)
 		bth0 |= IB_BTH_MIG_REQ;
 	else
@@ -776,19 +787,19 @@
 	if (middle)
 		build_ahg(qp, bth2);
 	else
-		qp->s_flags &= ~HFI1_S_AHG_VALID;
-	qp->s_hdr->ibh.lrh[0] = cpu_to_be16(lrh0);
-	qp->s_hdr->ibh.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
-	qp->s_hdr->ibh.lrh[2] =
+		qp->s_flags &= ~RVT_S_AHG_VALID;
+	ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
+	ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+	ps->s_txreq->phdr.hdr.lrh[2] =
 		cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
-	qp->s_hdr->ibh.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
+	ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
 				       qp->remote_ah_attr.src_path_bits);
 	bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
 	bth0 |= extra_bytes << 20;
 	ohdr->bth[0] = cpu_to_be32(bth0);
 	bth1 = qp->remote_qpn;
-	if (qp->s_flags & HFI1_S_ECN) {
-		qp->s_flags &= ~HFI1_S_ECN;
+	if (qp->s_flags & RVT_S_ECN) {
+		qp->s_flags &= ~RVT_S_ECN;
 		/* we recently received a FECN, so return a BECN */
 		bth1 |= (HFI1_BECN_MASK << HFI1_BECN_SHIFT);
 	}
@@ -799,6 +810,14 @@
 /* when sending, force a reschedule every one of these periods */
 #define SEND_RESCHED_TIMEOUT (5 * HZ)  /* 5s in jiffies */
 
+void _hfi1_do_send(struct work_struct *work)
+{
+	struct iowait *wait = container_of(work, struct iowait, iowork);
+	struct rvt_qp *qp = iowait_to_qp(wait);
+
+	hfi1_do_send(qp);
+}
+
 /**
  * hfi1_do_send - perform a send on a QP
  * @work: contains a pointer to the QP
@@ -807,34 +826,45 @@
  * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
  * Otherwise, two threads could send packets out of order.
  */
-void hfi1_do_send(struct work_struct *work)
+void hfi1_do_send(struct rvt_qp *qp)
 {
-	struct iowait *wait = container_of(work, struct iowait, iowork);
-	struct hfi1_qp *qp = container_of(wait, struct hfi1_qp, s_iowait);
 	struct hfi1_pkt_state ps;
-	int (*make_req)(struct hfi1_qp *qp);
+	struct hfi1_qp_priv *priv = qp->priv;
+	int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
 	unsigned long flags;
 	unsigned long timeout;
+	unsigned long timeout_int;
+	int cpu;
 
 	ps.dev = to_idev(qp->ibqp.device);
 	ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
 	ps.ppd = ppd_from_ibp(ps.ibp);
 
-	if ((qp->ibqp.qp_type == IB_QPT_RC ||
-	     qp->ibqp.qp_type == IB_QPT_UC) &&
-	    !loopback &&
-	    (qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc) - 1)) ==
-	    ps.ppd->lid) {
-		ruc_loopback(qp);
-		return;
-	}
-
-	if (qp->ibqp.qp_type == IB_QPT_RC)
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_RC:
+		if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
+								) - 1)) ==
+				 ps.ppd->lid)) {
+			ruc_loopback(qp);
+			return;
+		}
 		make_req = hfi1_make_rc_req;
-	else if (qp->ibqp.qp_type == IB_QPT_UC)
+		timeout_int = (qp->timeout_jiffies);
+		break;
+	case IB_QPT_UC:
+		if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
+								) - 1)) ==
+				 ps.ppd->lid)) {
+			ruc_loopback(qp);
+			return;
+		}
 		make_req = hfi1_make_uc_req;
-	else
+		timeout_int = SEND_RESCHED_TIMEOUT;
+		break;
+	default:
 		make_req = hfi1_make_ud_req;
+		timeout_int = SEND_RESCHED_TIMEOUT;
+	}
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 
@@ -844,57 +874,83 @@
 		return;
 	}
 
-	qp->s_flags |= HFI1_S_BUSY;
+	qp->s_flags |= RVT_S_BUSY;
 
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-
-	timeout = jiffies + SEND_RESCHED_TIMEOUT;
+	timeout = jiffies + (timeout_int) / 8;
+	cpu = priv->s_sde ? priv->s_sde->cpu :
+			cpumask_first(cpumask_of_node(ps.ppd->dd->node));
+	/* insure a pre-built packet is handled  */
+	ps.s_txreq = get_waiting_verbs_txreq(qp);
 	do {
 		/* Check for a constructed packet to be sent. */
 		if (qp->s_hdrwords != 0) {
+			spin_unlock_irqrestore(&qp->s_lock, flags);
 			/*
 			 * If the packet cannot be sent now, return and
 			 * the send tasklet will be woken up later.
 			 */
 			if (hfi1_verbs_send(qp, &ps))
-				break;
+				return;
 			/* Record that s_hdr is empty. */
 			qp->s_hdrwords = 0;
+			/* allow other tasks to run */
+			if (unlikely(time_after(jiffies, timeout))) {
+				if (workqueue_congested(cpu,
+							ps.ppd->hfi1_wq)) {
+					spin_lock_irqsave(&qp->s_lock, flags);
+					qp->s_flags &= ~RVT_S_BUSY;
+					hfi1_schedule_send(qp);
+					spin_unlock_irqrestore(&qp->s_lock,
+							       flags);
+					this_cpu_inc(
+						*ps.ppd->dd->send_schedule);
+					return;
+				}
+				if (!irqs_disabled()) {
+					cond_resched();
+					this_cpu_inc(
+					   *ps.ppd->dd->send_schedule);
+				}
+				timeout = jiffies + (timeout_int) / 8;
+			}
+			spin_lock_irqsave(&qp->s_lock, flags);
 		}
+	} while (make_req(qp, &ps));
 
-		/* allow other tasks to run */
-		if (unlikely(time_after(jiffies, timeout))) {
-			cond_resched();
-			ps.ppd->dd->verbs_dev.n_send_schedule++;
-			timeout = jiffies + SEND_RESCHED_TIMEOUT;
-		}
-	} while (make_req(qp));
+	spin_unlock_irqrestore(&qp->s_lock, flags);
 }
 
 /*
  * This should be called with s_lock held.
  */
-void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe,
+void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
 			enum ib_wc_status status)
 {
 	u32 old_last, last;
 	unsigned i;
 
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_OR_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		return;
 
+	last = qp->s_last;
+	old_last = last;
+	if (++last >= qp->s_size)
+		last = 0;
+	qp->s_last = last;
+	/* See post_send() */
+	barrier();
 	for (i = 0; i < wqe->wr.num_sge; i++) {
-		struct hfi1_sge *sge = &wqe->sg_list[i];
+		struct rvt_sge *sge = &wqe->sg_list[i];
 
-		hfi1_put_mr(sge->mr);
+		rvt_put_mr(sge->mr);
 	}
 	if (qp->ibqp.qp_type == IB_QPT_UD ||
 	    qp->ibqp.qp_type == IB_QPT_SMI ||
 	    qp->ibqp.qp_type == IB_QPT_GSI)
-		atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
+		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
 
 	/* See ch. 11.2.4.1 and 10.7.3.1 */
-	if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) ||
+	if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
 	    (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
 	    status != IB_WC_SUCCESS) {
 		struct ib_wc wc;
@@ -906,15 +962,10 @@
 		wc.qp = &qp->ibqp;
 		if (status == IB_WC_SUCCESS)
 			wc.byte_len = wqe->length;
-		hfi1_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
-			      status != IB_WC_SUCCESS);
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
+			     status != IB_WC_SUCCESS);
 	}
 
-	last = qp->s_last;
-	old_last = last;
-	if (++last >= qp->s_size)
-		last = 0;
-	qp->s_last = last;
 	if (qp->s_acked == old_last)
 		qp->s_acked = last;
 	if (qp->s_cur == old_last)

diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c
index 9a15f1f..abb8ebc 100644
--- a/drivers/staging/rdma/hfi1/sdma.c
+++ b/drivers/staging/rdma/hfi1/sdma.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -112,10 +109,10 @@
 	| SD(ENG_ERR_STATUS_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_SMASK))
 
 /* sdma_sendctrl operations */
-#define SDMA_SENDCTRL_OP_ENABLE    (1U << 0)
-#define SDMA_SENDCTRL_OP_INTENABLE (1U << 1)
-#define SDMA_SENDCTRL_OP_HALT      (1U << 2)
-#define SDMA_SENDCTRL_OP_CLEANUP   (1U << 3)
+#define SDMA_SENDCTRL_OP_ENABLE    BIT(0)
+#define SDMA_SENDCTRL_OP_INTENABLE BIT(1)
+#define SDMA_SENDCTRL_OP_HALT      BIT(2)
+#define SDMA_SENDCTRL_OP_CLEANUP   BIT(3)
 
 /* handle long defines */
 #define SDMA_EGRESS_PACKET_OCCUPANCY_SMASK \
@@ -325,9 +322,9 @@
 		if (lcnt++ > 500) {
 			/* timed out - bounce the link */
 			dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u, bouncing link\n",
-				  __func__, sde->this_idx, (u32)reg);
+				   __func__, sde->this_idx, (u32)reg);
 			queue_work(dd->pport->hfi1_wq,
-				&dd->pport->link_bounce_work);
+				   &dd->pport->link_bounce_work);
 			break;
 		}
 		udelay(1);
@@ -361,6 +358,28 @@
 	write_sde_csr(sde, SD(DESC_CNT), reg);
 }
 
+static inline void complete_tx(struct sdma_engine *sde,
+			       struct sdma_txreq *tx,
+			       int res)
+{
+	/* protect against complete modifying */
+	struct iowait *wait = tx->wait;
+	callback_t complete = tx->complete;
+
+#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
+	trace_hfi1_sdma_out_sn(sde, tx->sn);
+	if (WARN_ON_ONCE(sde->head_sn != tx->sn))
+		dd_dev_err(sde->dd, "expected %llu got %llu\n",
+			   sde->head_sn, tx->sn);
+	sde->head_sn++;
+#endif
+	sdma_txclean(sde->dd, tx);
+	if (complete)
+		(*complete)(tx, res);
+	if (iowait_sdma_dec(wait) && wait)
+		iowait_drain_wakeup(wait);
+}
+
 /*
  * Complete all the sdma requests with a SDMA_TXREQ_S_ABORTED status
  *
@@ -395,27 +414,8 @@
 	}
 	spin_unlock_irqrestore(&sde->flushlist_lock, flags);
 	/* flush from flush list */
-	list_for_each_entry_safe(txp, txp_next, &flushlist, list) {
-		int drained = 0;
-		/* protect against complete modifying */
-		struct iowait *wait = txp->wait;
-
-		list_del_init(&txp->list);
-#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
-		trace_hfi1_sdma_out_sn(sde, txp->sn);
-		if (WARN_ON_ONCE(sde->head_sn != txp->sn))
-			dd_dev_err(sde->dd, "expected %llu got %llu\n",
-				sde->head_sn, txp->sn);
-		sde->head_sn++;
-#endif
-		sdma_txclean(sde->dd, txp);
-		if (wait)
-			drained = atomic_dec_and_test(&wait->sdma_busy);
-		if (txp->complete)
-			(*txp->complete)(txp, SDMA_TXREQ_S_ABORTED, drained);
-		if (wait && drained)
-			iowait_drain_wakeup(wait);
-	}
+	list_for_each_entry_safe(txp, txp_next, &flushlist, list)
+		complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
 }
 
 /*
@@ -455,8 +455,8 @@
 			break;
 		if (time_after(jiffies, timeout)) {
 			dd_dev_err(sde->dd,
-				"SDMA engine %d - timeout waiting for engine to halt\n",
-				sde->this_idx);
+				   "SDMA engine %d - timeout waiting for engine to halt\n",
+				   sde->this_idx);
 			/*
 			 * Continue anyway.  This could happen if there was
 			 * an uncorrectable error in the wrong spot.
@@ -472,7 +472,6 @@
 static void sdma_err_progress_check_schedule(struct sdma_engine *sde)
 {
 	if (!is_bx(sde->dd) && HFI1_CAP_IS_KSET(SDMA_AHG)) {
-
 		unsigned index;
 		struct hfi1_devdata *dd = sde->dd;
 
@@ -531,7 +530,7 @@
 
 static void sdma_hw_clean_up_task(unsigned long opaque)
 {
-	struct sdma_engine *sde = (struct sdma_engine *) opaque;
+	struct sdma_engine *sde = (struct sdma_engine *)opaque;
 	u64 statuscsr;
 
 	while (1) {
@@ -577,31 +576,10 @@
 		head = ++sde->descq_head & sde->sdma_mask;
 		/* if now past this txp's descs, do the callback */
 		if (txp && txp->next_descq_idx == head) {
-			int drained = 0;
-			/* protect against complete modifying */
-			struct iowait *wait = txp->wait;
-
 			/* remove from list */
 			sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
-			if (wait)
-				drained = atomic_dec_and_test(&wait->sdma_busy);
-#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
-			trace_hfi1_sdma_out_sn(sde, txp->sn);
-			if (WARN_ON_ONCE(sde->head_sn != txp->sn))
-				dd_dev_err(sde->dd, "expected %llu got %llu\n",
-					sde->head_sn, txp->sn);
-			sde->head_sn++;
-#endif
-			sdma_txclean(sde->dd, txp);
+			complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
 			trace_hfi1_sdma_progress(sde, head, tail, txp);
-			if (txp->complete)
-				(*txp->complete)(
-					txp,
-					SDMA_TXREQ_S_ABORTED,
-					drained);
-			if (wait && drained)
-				iowait_drain_wakeup(wait);
-			/* see if there is another txp */
 			txp = get_txhead(sde);
 		}
 		progress++;
@@ -612,7 +590,7 @@
 
 static void sdma_sw_clean_up_task(unsigned long opaque)
 {
-	struct sdma_engine *sde = (struct sdma_engine *) opaque;
+	struct sdma_engine *sde = (struct sdma_engine *)opaque;
 	unsigned long flags;
 
 	spin_lock_irqsave(&sde->tail_lock, flags);
@@ -627,7 +605,6 @@
 	 *   descq are ours to play with.
 	 */
 
-
 	/*
 	 * In the error clean up sequence, software clean must be called
 	 * before the hardware clean so we can use the hardware head in
@@ -676,7 +653,7 @@
 }
 
 static void sdma_set_state(struct sdma_engine *sde,
-	enum sdma_states next_state)
+			   enum sdma_states next_state)
 {
 	struct sdma_state *ss = &sde->state;
 	const struct sdma_set_state_action *action = sdma_action_table;
@@ -692,8 +669,8 @@
 	ss->previous_op = ss->current_op;
 	ss->current_state = next_state;
 
-	if (ss->previous_state != sdma_state_s99_running
-		&& next_state == sdma_state_s99_running)
+	if (ss->previous_state != sdma_state_s99_running &&
+	    next_state == sdma_state_s99_running)
 		sdma_flush(sde);
 
 	if (action[next_state].op_enable)
@@ -890,6 +867,9 @@
 	newmap->actual_vls = num_vls;
 	newmap->vls = roundup_pow_of_two(num_vls);
 	newmap->mask = (1 << ilog2(newmap->vls)) - 1;
+	/* initialize back-map */
+	for (i = 0; i < TXE_NUM_SDMA_ENGINES; i++)
+		newmap->engine_to_vl[i] = -1;
 	for (i = 0; i < newmap->vls; i++) {
 		/* save for wrap around */
 		int first_engine = engine;
@@ -913,6 +893,9 @@
 					/* wrap back to first engine */
 					engine = first_engine;
 			}
+			/* assign back-map */
+			for (j = 0; j < vl_engines[i]; j++)
+				newmap->engine_to_vl[first_engine + j] = i;
 		} else {
 			/* just re-use entry without allocating */
 			newmap->map[i] = newmap->map[i % num_vls];
@@ -922,7 +905,7 @@
 	/* newmap in hand, save old map */
 	spin_lock_irq(&dd->sde_map_lock);
 	oldmap = rcu_dereference_protected(dd->sdma_map,
-			lockdep_is_held(&dd->sde_map_lock));
+					   lockdep_is_held(&dd->sde_map_lock));
 
 	/* publish newmap */
 	rcu_assign_pointer(dd->sdma_map, newmap);
@@ -983,7 +966,7 @@
 		sde->tx_ring = NULL;
 	}
 	spin_lock_irq(&dd->sde_map_lock);
-	kfree(rcu_access_pointer(dd->sdma_map));
+	sdma_map_free(rcu_access_pointer(dd->sdma_map));
 	RCU_INIT_POINTER(dd->sdma_map, NULL);
 	spin_unlock_irq(&dd->sde_map_lock);
 	synchronize_rcu();
@@ -1020,19 +1003,19 @@
 		return 0;
 	}
 	if (mod_num_sdma &&
-		/* can't exceed chip support */
-		mod_num_sdma <= dd->chip_sdma_engines &&
-		/* count must be >= vls */
-		mod_num_sdma >= num_vls)
+	    /* can't exceed chip support */
+	    mod_num_sdma <= dd->chip_sdma_engines &&
+	    /* count must be >= vls */
+	    mod_num_sdma >= num_vls)
 		num_engines = mod_num_sdma;
 
 	dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma);
 	dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", dd->chip_sdma_engines);
 	dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n",
-		dd->chip_sdma_mem_size);
+		    dd->chip_sdma_mem_size);
 
 	per_sdma_credits =
-		dd->chip_sdma_mem_size/(num_engines * SDMA_BLOCK_SIZE);
+		dd->chip_sdma_mem_size / (num_engines * SDMA_BLOCK_SIZE);
 
 	/* set up freeze waitqueue */
 	init_waitqueue_head(&dd->sdma_unfreeze_wq);
@@ -1040,7 +1023,7 @@
 
 	descq_cnt = sdma_get_descq_cnt();
 	dd_dev_info(dd, "SDMA engines %zu descq_cnt %u\n",
-		num_engines, descq_cnt);
+		    num_engines, descq_cnt);
 
 	/* alloc memory for array of send engines */
 	dd->per_sdma = kcalloc(num_engines, sizeof(*dd->per_sdma), GFP_KERNEL);
@@ -1061,18 +1044,18 @@
 		sde->desc_avail = sdma_descq_freecnt(sde);
 		sde->sdma_shift = ilog2(descq_cnt);
 		sde->sdma_mask = (1 << sde->sdma_shift) - 1;
-		sde->descq_full_count = 0;
 
-		/* Create a mask for all 3 chip interrupt sources */
-		sde->imask = (u64)1 << (0*TXE_NUM_SDMA_ENGINES + this_idx)
-			| (u64)1 << (1*TXE_NUM_SDMA_ENGINES + this_idx)
-			| (u64)1 << (2*TXE_NUM_SDMA_ENGINES + this_idx);
-		/* Create a mask specifically for sdma_idle */
-		sde->idle_mask =
-			(u64)1 << (2*TXE_NUM_SDMA_ENGINES + this_idx);
-		/* Create a mask specifically for sdma_progress */
-		sde->progress_mask =
-			(u64)1 << (TXE_NUM_SDMA_ENGINES + this_idx);
+		/* Create a mask specifically for each interrupt source */
+		sde->int_mask = (u64)1 << (0 * TXE_NUM_SDMA_ENGINES +
+					   this_idx);
+		sde->progress_mask = (u64)1 << (1 * TXE_NUM_SDMA_ENGINES +
+						this_idx);
+		sde->idle_mask = (u64)1 << (2 * TXE_NUM_SDMA_ENGINES +
+					    this_idx);
+		/* Create a combined mask to cover all 3 interrupt sources */
+		sde->imask = sde->int_mask | sde->progress_mask |
+			     sde->idle_mask;
+
 		spin_lock_init(&sde->tail_lock);
 		seqlock_init(&sde->head_lock);
 		spin_lock_init(&sde->senddmactrl_lock);
@@ -1100,10 +1083,10 @@
 				SDMA_DESC1_INT_REQ_FLAG;
 
 		tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task,
-			(unsigned long)sde);
+			     (unsigned long)sde);
 
 		tasklet_init(&sde->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
-			(unsigned long)sde);
+			     (unsigned long)sde);
 		INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait);
 		INIT_WORK(&sde->flush_worker, sdma_field_flush);
 
@@ -1251,11 +1234,10 @@
 
 	for (this_idx = 0; dd->per_sdma && this_idx < dd->num_sdma;
 			++this_idx) {
-
 		sde = &dd->per_sdma[this_idx];
 		if (!list_empty(&sde->dmawait))
 			dd_dev_err(dd, "sde %u: dmawait list not empty!\n",
-				sde->this_idx);
+				   sde->this_idx);
 		sdma_process_event(sde, sdma_event_e00_go_hw_down);
 
 		del_timer_sync(&sde->err_progress_check_timer);
@@ -1358,8 +1340,8 @@
 	use_dmahead = HFI1_CAP_IS_KSET(USE_SDMA_HEAD) && __sdma_running(sde) &&
 					(dd->flags & HFI1_HAS_SDMA_TIMEOUT);
 	hwhead = use_dmahead ?
-		(u16) le64_to_cpu(*sde->head_dma) :
-		(u16) read_sde_csr(sde, SD(HEAD));
+		(u16)le64_to_cpu(*sde->head_dma) :
+		(u16)read_sde_csr(sde, SD(HEAD));
 
 	if (unlikely(HFI1_CAP_IS_KSET(SDMA_HEAD_CHECK))) {
 		u16 cnt;
@@ -1385,9 +1367,9 @@
 
 		if (unlikely(!sane)) {
 			dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%hu swhd=%hu swtl=%hu cnt=%hu\n",
-				sde->this_idx,
-				use_dmahead ? "dma" : "kreg",
-				hwhead, swhead, swtail, cnt);
+				   sde->this_idx,
+				   use_dmahead ? "dma" : "kreg",
+				   hwhead, swhead, swtail, cnt);
 			if (use_dmahead) {
 				/* try one more time, using csr */
 				use_dmahead = 0;
@@ -1464,7 +1446,7 @@
 {
 	struct sdma_txreq *txp = NULL;
 	int progress = 0;
-	u16 hwhead, swhead, swtail;
+	u16 hwhead, swhead;
 	int idle_check_done = 0;
 
 	hwhead = sdma_gethead(sde);
@@ -1485,29 +1467,9 @@
 
 		/* if now past this txp's descs, do the callback */
 		if (txp && txp->next_descq_idx == swhead) {
-			int drained = 0;
-			/* protect against complete modifying */
-			struct iowait *wait = txp->wait;
-
 			/* remove from list */
 			sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
-			if (wait)
-				drained = atomic_dec_and_test(&wait->sdma_busy);
-#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
-			trace_hfi1_sdma_out_sn(sde, txp->sn);
-			if (WARN_ON_ONCE(sde->head_sn != txp->sn))
-				dd_dev_err(sde->dd, "expected %llu got %llu\n",
-					sde->head_sn, txp->sn);
-			sde->head_sn++;
-#endif
-			sdma_txclean(sde->dd, txp);
-			if (txp->complete)
-				(*txp->complete)(
-					txp,
-					SDMA_TXREQ_S_OK,
-					drained);
-			if (wait && drained)
-				iowait_drain_wakeup(wait);
+			complete_tx(sde, txp, SDMA_TXREQ_S_OK);
 			/* see if there is another txp */
 			txp = get_txhead(sde);
 		}
@@ -1525,6 +1487,8 @@
 	 * of sdma_make_progress(..) which is ensured by idle_check_done flag
 	 */
 	if ((status & sde->idle_mask) && !idle_check_done) {
+		u16 swtail;
+
 		swtail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask;
 		if (swtail != hwhead) {
 			hwhead = (u16)read_sde_csr(sde, SD(HEAD));
@@ -1552,6 +1516,12 @@
 	trace_hfi1_sdma_engine_interrupt(sde, status);
 	write_seqlock(&sde->head_lock);
 	sdma_set_desc_cnt(sde, sdma_desct_intr);
+	if (status & sde->idle_mask)
+		sde->idle_int_cnt++;
+	else if (status & sde->progress_mask)
+		sde->progress_int_cnt++;
+	else if (status & sde->int_mask)
+		sde->sdma_int_cnt++;
 	sdma_make_progress(sde, status);
 	write_sequnlock(&sde->head_lock);
 }
@@ -1577,10 +1547,10 @@
 		__sdma_process_event(sde, sdma_event_e60_hw_halted);
 	if (status & ~SD(ENG_ERR_STATUS_SDMA_HALT_ERR_SMASK)) {
 		dd_dev_err(sde->dd,
-			"SDMA (%u) engine error: 0x%llx state %s\n",
-			sde->this_idx,
-			(unsigned long long)status,
-			sdma_state_names[sde->state.current_state]);
+			   "SDMA (%u) engine error: 0x%llx state %s\n",
+			   sde->this_idx,
+			   (unsigned long long)status,
+			   sdma_state_names[sde->state.current_state]);
 		dump_sdma_state(sde);
 	}
 	write_sequnlock(&sde->head_lock);
@@ -1624,8 +1594,8 @@
 
 	if (op & SDMA_SENDCTRL_OP_CLEANUP)
 		write_sde_csr(sde, SD(CTRL),
-			sde->p_senddmactrl |
-			SD(CTRL_SDMA_CLEANUP_SMASK));
+			      sde->p_senddmactrl |
+			      SD(CTRL_SDMA_CLEANUP_SMASK));
 	else
 		write_sde_csr(sde, SD(CTRL), sde->p_senddmactrl);
 
@@ -1649,12 +1619,10 @@
 	 * generation counter.
 	 */
 	write_sde_csr(sde, SD(LEN_GEN),
-		(sde->descq_cnt/64) << SD(LEN_GEN_LENGTH_SHIFT)
-	);
+		      (sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT));
 	write_sde_csr(sde, SD(LEN_GEN),
-		((sde->descq_cnt/64) << SD(LEN_GEN_LENGTH_SHIFT))
-		| (4ULL << SD(LEN_GEN_GENERATION_SHIFT))
-	);
+		      ((sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT)) |
+		      (4ULL << SD(LEN_GEN_GENERATION_SHIFT)));
 }
 
 static inline void sdma_update_tail(struct sdma_engine *sde, u16 tail)
@@ -1714,7 +1682,6 @@
 	write_sde_csr(sde, SD(CHECK_ENABLE), reg);
 }
 
-
 static void init_sdma_regs(
 	struct sdma_engine *sde,
 	u32 credits,
@@ -1735,17 +1702,16 @@
 	write_sde_csr(sde, SD(DESC_CNT), 0);
 	write_sde_csr(sde, SD(HEAD_ADDR), sde->head_phys);
 	write_sde_csr(sde, SD(MEMORY),
-		((u64)credits <<
-			SD(MEMORY_SDMA_MEMORY_CNT_SHIFT)) |
-		((u64)(credits * sde->this_idx) <<
-			SD(MEMORY_SDMA_MEMORY_INDEX_SHIFT)));
+		      ((u64)credits << SD(MEMORY_SDMA_MEMORY_CNT_SHIFT)) |
+		      ((u64)(credits * sde->this_idx) <<
+		       SD(MEMORY_SDMA_MEMORY_INDEX_SHIFT)));
 	write_sde_csr(sde, SD(ENG_ERR_MASK), ~0ull);
 	set_sdma_integrity(sde);
 	opmask = OPCODE_CHECK_MASK_DISABLED;
 	opval = OPCODE_CHECK_VAL_DISABLED;
 	write_sde_csr(sde, SD(CHECK_OPCODE),
-		(opmask << SEND_CTXT_CHECK_OPCODE_MASK_SHIFT) |
-		(opval << SEND_CTXT_CHECK_OPCODE_VALUE_SHIFT));
+		      (opmask << SEND_CTXT_CHECK_OPCODE_MASK_SHIFT) |
+		      (opval << SEND_CTXT_CHECK_OPCODE_VALUE_SHIFT));
 }
 
 #ifdef CONFIG_SDMA_VERBOSITY
@@ -1824,12 +1790,9 @@
 	descq = sde->descq;
 
 	dd_dev_err(sde->dd,
-		"SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n",
-		sde->this_idx,
-		head,
-		tail,
-		cnt,
-		!list_empty(&sde->flushlist));
+		   "SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n",
+		   sde->this_idx, head, tail, cnt,
+		   !list_empty(&sde->flushlist));
 
 	/* print info for each entry in the descriptor queue */
 	while (head != tail) {
@@ -1850,20 +1813,23 @@
 		len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
 			& SDMA_DESC0_BYTE_COUNT_MASK;
 		dd_dev_err(sde->dd,
-			"SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
-			 head, flags, addr, gen, len);
+			   "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
+			   head, flags, addr, gen, len);
 		dd_dev_err(sde->dd,
-			"\tdesc0:0x%016llx desc1 0x%016llx\n",
-			 desc[0], desc[1]);
+			   "\tdesc0:0x%016llx desc1 0x%016llx\n",
+			   desc[0], desc[1]);
 		if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
 			dd_dev_err(sde->dd,
-				"\taidx: %u amode: %u alen: %u\n",
-				(u8)((desc[1] & SDMA_DESC1_HEADER_INDEX_SMASK)
-					>> SDMA_DESC1_HEADER_INDEX_SHIFT),
-				(u8)((desc[1] & SDMA_DESC1_HEADER_MODE_SMASK)
-					>> SDMA_DESC1_HEADER_MODE_SHIFT),
-				(u8)((desc[1] & SDMA_DESC1_HEADER_DWS_SMASK)
-					>> SDMA_DESC1_HEADER_DWS_SHIFT));
+				   "\taidx: %u amode: %u alen: %u\n",
+				   (u8)((desc[1] &
+					 SDMA_DESC1_HEADER_INDEX_SMASK) >>
+					SDMA_DESC1_HEADER_INDEX_SHIFT),
+				   (u8)((desc[1] &
+					 SDMA_DESC1_HEADER_MODE_SMASK) >>
+					SDMA_DESC1_HEADER_MODE_SHIFT),
+				   (u8)((desc[1] &
+					 SDMA_DESC1_HEADER_DWS_SMASK) >>
+					SDMA_DESC1_HEADER_DWS_SHIFT));
 		head++;
 		head &= sde->sdma_mask;
 	}
@@ -1890,29 +1856,26 @@
 	head = sde->descq_head & sde->sdma_mask;
 	tail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask;
 	seq_printf(s, SDE_FMT, sde->this_idx,
-		sde->cpu,
-		sdma_state_name(sde->state.current_state),
-		(unsigned long long)read_sde_csr(sde, SD(CTRL)),
-		(unsigned long long)read_sde_csr(sde, SD(STATUS)),
-		(unsigned long long)read_sde_csr(sde,
-			SD(ENG_ERR_STATUS)),
-		(unsigned long long)read_sde_csr(sde, SD(TAIL)),
-		tail,
-		(unsigned long long)read_sde_csr(sde, SD(HEAD)),
-		head,
-		(unsigned long long)le64_to_cpu(*sde->head_dma),
-		(unsigned long long)read_sde_csr(sde, SD(MEMORY)),
-		(unsigned long long)read_sde_csr(sde, SD(LEN_GEN)),
-		(unsigned long long)read_sde_csr(sde, SD(RELOAD_CNT)),
-		(unsigned long long)sde->last_status,
-		(unsigned long long)sde->ahg_bits,
-		sde->tx_tail,
-		sde->tx_head,
-		sde->descq_tail,
-		sde->descq_head,
+		   sde->cpu,
+		   sdma_state_name(sde->state.current_state),
+		   (unsigned long long)read_sde_csr(sde, SD(CTRL)),
+		   (unsigned long long)read_sde_csr(sde, SD(STATUS)),
+		   (unsigned long long)read_sde_csr(sde, SD(ENG_ERR_STATUS)),
+		   (unsigned long long)read_sde_csr(sde, SD(TAIL)), tail,
+		   (unsigned long long)read_sde_csr(sde, SD(HEAD)), head,
+		   (unsigned long long)le64_to_cpu(*sde->head_dma),
+		   (unsigned long long)read_sde_csr(sde, SD(MEMORY)),
+		   (unsigned long long)read_sde_csr(sde, SD(LEN_GEN)),
+		   (unsigned long long)read_sde_csr(sde, SD(RELOAD_CNT)),
+		   (unsigned long long)sde->last_status,
+		   (unsigned long long)sde->ahg_bits,
+		   sde->tx_tail,
+		   sde->tx_head,
+		   sde->descq_tail,
+		   sde->descq_head,
 		   !list_empty(&sde->flushlist),
-		sde->descq_full_count,
-		(unsigned long long)read_sde_csr(sde, SEND_DMA_CHECK_SLID));
+		   sde->descq_full_count,
+		   (unsigned long long)read_sde_csr(sde, SEND_DMA_CHECK_SLID));
 
 	/* print info for each entry in the descriptor queue */
 	while (head != tail) {
@@ -1933,14 +1896,16 @@
 		len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
 			& SDMA_DESC0_BYTE_COUNT_MASK;
 		seq_printf(s,
-			"\tdesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
-			head, flags, addr, gen, len);
+			   "\tdesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
+			   head, flags, addr, gen, len);
 		if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
 			seq_printf(s, "\t\tahgidx: %u ahgmode: %u\n",
-				(u8)((desc[1] & SDMA_DESC1_HEADER_INDEX_SMASK)
-					>> SDMA_DESC1_HEADER_INDEX_SHIFT),
-				(u8)((desc[1] & SDMA_DESC1_HEADER_MODE_SMASK)
-					>> SDMA_DESC1_HEADER_MODE_SHIFT));
+				   (u8)((desc[1] &
+					 SDMA_DESC1_HEADER_INDEX_SMASK) >>
+					SDMA_DESC1_HEADER_INDEX_SHIFT),
+				   (u8)((desc[1] &
+					 SDMA_DESC1_HEADER_MODE_SMASK) >>
+					SDMA_DESC1_HEADER_MODE_SHIFT));
 		head = (head + 1) & sde->sdma_mask;
 	}
 }
@@ -2041,8 +2006,9 @@
 		ret = wait->sleep(sde, wait, tx, seq);
 		if (ret == -EAGAIN)
 			sde->desc_avail = sdma_descq_freecnt(sde);
-	} else
+	} else {
 		ret = -EBUSY;
+	}
 	return ret;
 }
 
@@ -2080,14 +2046,14 @@
 		goto nodesc;
 	tail = submit_tx(sde, tx);
 	if (wait)
-		atomic_inc(&wait->sdma_busy);
+		iowait_sdma_inc(wait);
 	sdma_update_tail(sde, tail);
 unlock:
 	spin_unlock_irqrestore(&sde->tail_lock, flags);
 	return ret;
 unlock_noconn:
 	if (wait)
-		atomic_inc(&wait->sdma_busy);
+		iowait_sdma_inc(wait);
 	tx->next_descq_idx = 0;
 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
 	tx->sn = sde->tail_sn++;
@@ -2132,13 +2098,12 @@
  * side locking.
  *
  * Return:
- * 0 - Success, -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring
- * (wait == NULL)
+ * > 0 - Success (value is number of sdma_txreq's submitted),
+ * -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
  * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
  */
-int sdma_send_txlist(struct sdma_engine *sde,
-		    struct iowait *wait,
-		    struct list_head *tx_list)
+int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
+		     struct list_head *tx_list)
 {
 	struct sdma_txreq *tx, *tx_next;
 	int ret = 0;
@@ -2169,18 +2134,18 @@
 	}
 update_tail:
 	if (wait)
-		atomic_add(count, &wait->sdma_busy);
+		iowait_sdma_add(wait, count);
 	if (tail != INVALID_TAIL)
 		sdma_update_tail(sde, tail);
 	spin_unlock_irqrestore(&sde->tail_lock, flags);
-	return ret;
+	return ret == 0 ? count : ret;
 unlock_noconn:
 	spin_lock(&sde->flushlist_lock);
 	list_for_each_entry_safe(tx, tx_next, tx_list, list) {
 		tx->wait = wait;
 		list_del_init(&tx->list);
 		if (wait)
-			atomic_inc(&wait->sdma_busy);
+			iowait_sdma_inc(wait);
 		tx->next_descq_idx = 0;
 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
 		tx->sn = sde->tail_sn++;
@@ -2206,8 +2171,7 @@
 	goto update_tail;
 }
 
-static void sdma_process_event(struct sdma_engine *sde,
-	enum sdma_events event)
+static void sdma_process_event(struct sdma_engine *sde, enum sdma_events event)
 {
 	unsigned long flags;
 
@@ -2224,7 +2188,7 @@
 }
 
 static void __sdma_process_event(struct sdma_engine *sde,
-	enum sdma_events event)
+				 enum sdma_events event)
 {
 	struct sdma_state *ss = &sde->state;
 	int need_progress = 0;
@@ -2247,14 +2211,15 @@
 			 * of link up, then we need to start up.
 			 * This can happen when hw down is requested while
 			 * bringing the link up with traffic active on
-			 * 7220, e.g. */
+			 * 7220, e.g.
+			 */
 			ss->go_s99_running = 1;
 			/* fall through and start dma engine */
 		case sdma_event_e10_go_hw_start:
 			/* This reference means the state machine is started */
 			sdma_get(&sde->state);
 			sdma_set_state(sde,
-				sdma_state_s10_hw_start_up_halt_wait);
+				       sdma_state_s10_hw_start_up_halt_wait);
 			break;
 		case sdma_event_e15_hw_halt_done:
 			break;
@@ -2292,7 +2257,7 @@
 			break;
 		case sdma_event_e15_hw_halt_done:
 			sdma_set_state(sde,
-				sdma_state_s15_hw_start_up_clean_wait);
+				       sdma_state_s15_hw_start_up_clean_wait);
 			sdma_start_hw_clean_up(sde);
 			break;
 		case sdma_event_e25_hw_clean_up_done:
@@ -2767,7 +2732,7 @@
  * This function calls _extend_sdma_tx_descs to extend or allocate
  * coalesce buffer. If there is a allocated coalesce buffer, it will
  * copy the input packet data into the coalesce buffer. It also adds
- * coalesce buffer descriptor once whe whole packet is received.
+ * coalesce buffer descriptor once when whole packet is received.
  *
  * Return:
  * <0 - error
@@ -3030,7 +2995,8 @@
 	 * continuing.
 	 */
 	ret = wait_event_interruptible(dd->sdma_unfreeze_wq,
-				atomic_read(&dd->sdma_unfreeze_count) <= 0);
+				       atomic_read(&dd->sdma_unfreeze_count) <=
+				       0);
 	/* interrupted or count is negative, then unloading - just exit */
 	if (ret || atomic_read(&dd->sdma_unfreeze_count) < 0)
 		return;
@@ -3047,7 +3013,7 @@
 	 * software clean will read engine CSRs, so must be completed before
 	 * the next step, which will clear the engine CSRs.
 	 */
-	(void) wait_event_interruptible(dd->sdma_unfreeze_wq,
+	(void)wait_event_interruptible(dd->sdma_unfreeze_wq,
 				atomic_read(&dd->sdma_unfreeze_count) <= 0);
 	/* no need to check results - done no matter what */
 }
@@ -3067,7 +3033,7 @@
 	/* tell all engines start freeze clean up */
 	for (i = 0; i < dd->num_sdma; i++)
 		sdma_process_event(&dd->per_sdma[i],
-					sdma_event_e82_hw_unfreeze);
+				   sdma_event_e82_hw_unfreeze);
 }
 
 /**
@@ -3081,5 +3047,6 @@
 	trace_hfi1_sdma_engine_progress(sde, sde->progress_mask);
 	/* assume we have selected a good cpu */
 	write_csr(sde->dd,
-		  CCE_INT_FORCE + (8*(IS_SDMA_START/64)), sde->progress_mask);
+		  CCE_INT_FORCE + (8 * (IS_SDMA_START / 64)),
+		  sde->progress_mask);
 }

diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/staging/rdma/hfi1/sdma.h
index da89e64..8f50c99 100644
--- a/drivers/staging/rdma/hfi1/sdma.h
+++ b/drivers/staging/rdma/hfi1/sdma.h

@@ -1,14 +1,13 @@
 #ifndef _HFI1_SDMA_H
 #define _HFI1_SDMA_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -58,15 +55,13 @@
 
 #include "hfi.h"
 #include "verbs.h"
+#include "sdma_txreq.h"
 
-/* increased for AHG */
-#define NUM_DESC 6
 /* Hardware limit */
 #define MAX_DESC 64
 /* Hardware limit for SDMA packet size */
 #define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1)
 
-
 #define SDMA_TXREQ_S_OK        0
 #define SDMA_TXREQ_S_SENDERROR 1
 #define SDMA_TXREQ_S_ABORTED   2
@@ -109,8 +104,8 @@
 /*
  * Bits defined in the send DMA descriptor.
  */
-#define SDMA_DESC0_FIRST_DESC_FLAG      (1ULL << 63)
-#define SDMA_DESC0_LAST_DESC_FLAG       (1ULL << 62)
+#define SDMA_DESC0_FIRST_DESC_FLAG      BIT_ULL(63)
+#define SDMA_DESC0_LAST_DESC_FLAG       BIT_ULL(62)
 #define SDMA_DESC0_BYTE_COUNT_SHIFT     48
 #define SDMA_DESC0_BYTE_COUNT_WIDTH     14
 #define SDMA_DESC0_BYTE_COUNT_MASK \
@@ -154,8 +149,8 @@
 	((1ULL << SDMA_DESC1_GENERATION_WIDTH) - 1)
 #define SDMA_DESC1_GENERATION_SMASK \
 	(SDMA_DESC1_GENERATION_MASK << SDMA_DESC1_GENERATION_SHIFT)
-#define SDMA_DESC1_INT_REQ_FLAG         (1ULL << 1)
-#define SDMA_DESC1_HEAD_TO_HOST_FLAG    (1ULL << 0)
+#define SDMA_DESC1_INT_REQ_FLAG         BIT_ULL(1)
+#define SDMA_DESC1_HEAD_TO_HOST_FLAG    BIT_ULL(0)
 
 enum sdma_states {
 	sdma_state_s00_hw_down,
@@ -311,83 +306,6 @@
 	__le64 qw[2];
 };
 
-/*
- * struct sdma_desc - canonical fragment descriptor
- *
- * This is the descriptor carried in the tx request
- * corresponding to each fragment.
- *
- */
-struct sdma_desc {
-	/* private:  don't use directly */
-	u64 qw[2];
-};
-
-struct sdma_txreq;
-typedef void (*callback_t)(struct sdma_txreq *, int, int);
-
-/**
- * struct sdma_txreq - the sdma_txreq structure (one per packet)
- * @list: for use by user and by queuing for wait
- *
- * This is the representation of a packet which consists of some
- * number of fragments.   Storage is provided to within the structure.
- * for all fragments.
- *
- * The storage for the descriptors are automatically extended as needed
- * when the currently allocation is exceeded.
- *
- * The user (Verbs or PSM) may overload this structure with fields
- * specific to their use by putting this struct first in their struct.
- * The method of allocation of the overloaded structure is user dependent
- *
- * The list is the only public field in the structure.
- *
- */
-
-struct sdma_txreq {
-	struct list_head list;
-	/* private: */
-	struct sdma_desc *descp;
-	/* private: */
-	void *coalesce_buf;
-	/* private: */
-	u16 coalesce_idx;
-	/* private: */
-	struct iowait *wait;
-	/* private: */
-	callback_t                  complete;
-#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
-	u64 sn;
-#endif
-	/* private: - used in coalesce/pad processing */
-	u16                         packet_len;
-	/* private: - down-counted to trigger last */
-	u16                         tlen;
-	/* private: flags */
-	u16                         flags;
-	/* private: */
-	u16                         num_desc;
-	/* private: */
-	u16                         desc_limit;
-	/* private: */
-	u16                         next_descq_idx;
-	/* private: */
-	struct sdma_desc descs[NUM_DESC];
-};
-
-struct verbs_txreq {
-	struct hfi1_pio_header	phdr;
-	struct sdma_txreq       txreq;
-	struct hfi1_qp           *qp;
-	struct hfi1_swqe         *wqe;
-	struct hfi1_mregion	*mr;
-	struct hfi1_sge_state    *ss;
-	struct sdma_engine     *sde;
-	u16                     hdr_dwords;
-	u16                     hdr_inx;
-};
-
 /**
  * struct sdma_engine - Data pertaining to each SDMA engine.
  * @dd: a back-pointer to the device data
@@ -409,6 +327,7 @@
 	u64 imask;			/* clear interrupt mask */
 	u64 idle_mask;
 	u64 progress_mask;
+	u64 int_mask;
 	/* private: */
 	volatile __le64      *head_dma; /* DMA'ed by chip */
 	/* private: */
@@ -465,6 +384,12 @@
 	u16                   tx_head;
 	/* private: */
 	u64                   last_status;
+	/* private */
+	u64                     err_cnt;
+	/* private */
+	u64                     sdma_int_cnt;
+	u64                     idle_int_cnt;
+	u64                     progress_int_cnt;
 
 	/* private: */
 	struct list_head      dmawait;
@@ -484,12 +409,12 @@
 	u32                   progress_check_head;
 	/* private: */
 	struct work_struct flush_worker;
+	/* protect flush list */
 	spinlock_t flushlist_lock;
 	/* private: */
 	struct list_head flushlist;
 };
 
-
 int sdma_init(struct hfi1_devdata *dd, u8 port);
 void sdma_start(struct hfi1_devdata *dd);
 void sdma_exit(struct hfi1_devdata *dd);
@@ -535,7 +460,6 @@
 	return engine->state.current_state == sdma_state_s99_running;
 }
 
-
 /**
  * sdma_running() - state suitability test
  * @engine: sdma engine
@@ -565,7 +489,6 @@
 	u32 *ahg,
 	u8 ahg_hlen);
 
-
 /**
  * sdma_txinit_ahg() - initialize an sdma_txreq struct with AHG
  * @tx: tx request to initialize
@@ -626,7 +549,7 @@
 	u8 num_ahg,
 	u32 *ahg,
 	u8 ahg_hlen,
-	void (*cb)(struct sdma_txreq *, int, int))
+	void (*cb)(struct sdma_txreq *, int))
 {
 	if (tlen == 0)
 		return -ENODATA;
@@ -640,7 +563,8 @@
 	tx->complete = cb;
 	tx->coalesce_buf = NULL;
 	tx->wait = NULL;
-	tx->tlen = tx->packet_len = tlen;
+	tx->packet_len = tlen;
+	tx->tlen = tx->packet_len;
 	tx->descs[0].qw[0] = SDMA_DESC0_FIRST_DESC_FLAG;
 	tx->descs[0].qw[1] = 0;
 	if (flags & SDMA_TXREQ_F_AHG_COPY)
@@ -689,7 +613,7 @@
 	struct sdma_txreq *tx,
 	u16 flags,
 	u16 tlen,
-	void (*cb)(struct sdma_txreq *, int, int))
+	void (*cb)(struct sdma_txreq *, int))
 {
 	return sdma_txinit_ahg(tx, flags, tlen, 0, 0, NULL, 0, cb);
 }
@@ -753,7 +677,7 @@
 		dd->default_desc1;
 	if (tx->flags & SDMA_TXREQ_F_URGENT)
 		tx->descp[tx->num_desc].qw[1] |=
-			(SDMA_DESC1_HEAD_TO_HOST_FLAG|
+			(SDMA_DESC1_HEAD_TO_HOST_FLAG |
 			 SDMA_DESC1_INT_REQ_FLAG);
 }
 
@@ -1080,6 +1004,7 @@
 
 /**
  * struct sdma_map_el - mapping for a vl
+ * @engine_to_vl - map of an engine to a vl
  * @list - rcu head for free callback
  * @mask - vl mask to "mod" the vl to produce an index to map array
  * @actual_vls - number of vls
@@ -1091,6 +1016,7 @@
  * in turn point to an array of sde's for that vl.
  */
 struct sdma_vl_map {
+	s8 engine_to_vl[TXE_NUM_SDMA_ENGINES];
 	struct rcu_head list;
 	u32 mask;
 	u8 actual_vls;

diff --git a/drivers/staging/rdma/hfi1/sdma_txreq.h b/drivers/staging/rdma/hfi1/sdma_txreq.h
new file mode 100644
index 0000000..bf7d777
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/sdma_txreq.h

@@ -0,0 +1,135 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef HFI1_SDMA_TXREQ_H
+#define HFI1_SDMA_TXREQ_H
+
+/* increased for AHG */
+#define NUM_DESC 6
+
+/*
+ * struct sdma_desc - canonical fragment descriptor
+ *
+ * This is the descriptor carried in the tx request
+ * corresponding to each fragment.
+ *
+ */
+struct sdma_desc {
+	/* private:  don't use directly */
+	u64 qw[2];
+};
+
+/**
+ * struct sdma_txreq - the sdma_txreq structure (one per packet)
+ * @list: for use by user and by queuing for wait
+ *
+ * This is the representation of a packet which consists of some
+ * number of fragments.   Storage is provided to within the structure.
+ * for all fragments.
+ *
+ * The storage for the descriptors are automatically extended as needed
+ * when the currently allocation is exceeded.
+ *
+ * The user (Verbs or PSM) may overload this structure with fields
+ * specific to their use by putting this struct first in their struct.
+ * The method of allocation of the overloaded structure is user dependent
+ *
+ * The list is the only public field in the structure.
+ *
+ */
+
+#define SDMA_TXREQ_S_OK        0
+#define SDMA_TXREQ_S_SENDERROR 1
+#define SDMA_TXREQ_S_ABORTED   2
+#define SDMA_TXREQ_S_SHUTDOWN  3
+
+/* flags bits */
+#define SDMA_TXREQ_F_URGENT       0x0001
+#define SDMA_TXREQ_F_AHG_COPY     0x0002
+#define SDMA_TXREQ_F_USE_AHG      0x0004
+
+struct sdma_txreq;
+typedef void (*callback_t)(struct sdma_txreq *, int);
+
+struct iowait;
+struct sdma_txreq {
+	struct list_head list;
+	/* private: */
+	struct sdma_desc *descp;
+	/* private: */
+	void *coalesce_buf;
+	/* private: */
+	struct iowait *wait;
+	/* private: */
+	callback_t                  complete;
+#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
+	u64 sn;
+#endif
+	/* private: - used in coalesce/pad processing */
+	u16                         packet_len;
+	/* private: - down-counted to trigger last */
+	u16                         tlen;
+	/* private: */
+	u16                         num_desc;
+	/* private: */
+	u16                         desc_limit;
+	/* private: */
+	u16                         next_descq_idx;
+	/* private: */
+	u16 coalesce_idx;
+	/* private: flags */
+	u16                         flags;
+	/* private: */
+	struct sdma_desc descs[NUM_DESC];
+};
+
+static inline int sdma_txreq_built(struct sdma_txreq *tx)
+{
+	return tx->num_desc;
+}
+
+#endif                          /* HFI1_SDMA_TXREQ_H */

diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/staging/rdma/hfi1/sysfs.c
index 1dd6727..c7f1271 100644
--- a/drivers/staging/rdma/hfi1/sysfs.c
+++ b/drivers/staging/rdma/hfi1/sysfs.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -53,7 +50,6 @@
 #include "mad.h"
 #include "trace.h"
 
-
 /*
  * Start of per-port congestion control structures and support code
  */
@@ -62,8 +58,8 @@
  * Congestion control table size followed by table entries
  */
 static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
-		struct bin_attribute *bin_attr,
-		char *buf, loff_t pos, size_t count)
+				 struct bin_attribute *bin_attr,
+				 char *buf, loff_t pos, size_t count)
 {
 	int ret;
 	struct hfi1_pportdata *ppd =
@@ -84,7 +80,7 @@
 
 	rcu_read_lock();
 	cc_state = get_cc_state(ppd);
-	if (cc_state == NULL) {
+	if (!cc_state) {
 		rcu_read_unlock();
 		return -EINVAL;
 	}
@@ -99,10 +95,6 @@
 	/* nothing to do since memory is freed by hfi1_free_devdata() */
 }
 
-static struct kobj_type port_cc_ktype = {
-	.release = port_release,
-};
-
 static struct bin_attribute cc_table_bin_attr = {
 	.attr = {.name = "cc_table_bin", .mode = 0444},
 	.read = read_cc_table_bin,
@@ -115,8 +107,8 @@
  * trigger threshold and the minimum injection rate delay.
  */
 static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
-		struct bin_attribute *bin_attr,
-		char *buf, loff_t pos, size_t count)
+				   struct bin_attribute *bin_attr,
+				   char *buf, loff_t pos, size_t count)
 {
 	int ret;
 	struct hfi1_pportdata *ppd =
@@ -135,7 +127,7 @@
 
 	rcu_read_lock();
 	cc_state = get_cc_state(ppd);
-	if (cc_state == NULL) {
+	if (!cc_state) {
 		rcu_read_unlock();
 		return -EINVAL;
 	}
@@ -151,6 +143,68 @@
 	.size = PAGE_SIZE,
 };
 
+struct hfi1_port_attr {
+	struct attribute attr;
+	ssize_t	(*show)(struct hfi1_pportdata *, char *);
+	ssize_t	(*store)(struct hfi1_pportdata *, const char *, size_t);
+};
+
+static ssize_t cc_prescan_show(struct hfi1_pportdata *ppd, char *buf)
+{
+	return sprintf(buf, "%s\n", ppd->cc_prescan ? "on" : "off");
+}
+
+static ssize_t cc_prescan_store(struct hfi1_pportdata *ppd, const char *buf,
+				size_t count)
+{
+	if (!memcmp(buf, "on", 2))
+		ppd->cc_prescan = true;
+	else if (!memcmp(buf, "off", 3))
+		ppd->cc_prescan = false;
+
+	return count;
+}
+
+static struct hfi1_port_attr cc_prescan_attr =
+		__ATTR(cc_prescan, 0600, cc_prescan_show, cc_prescan_store);
+
+static ssize_t cc_attr_show(struct kobject *kobj, struct attribute *attr,
+			    char *buf)
+{
+	struct hfi1_port_attr *port_attr =
+		container_of(attr, struct hfi1_port_attr, attr);
+	struct hfi1_pportdata *ppd =
+		container_of(kobj, struct hfi1_pportdata, pport_cc_kobj);
+
+	return port_attr->show(ppd, buf);
+}
+
+static ssize_t cc_attr_store(struct kobject *kobj, struct attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct hfi1_port_attr *port_attr =
+		container_of(attr, struct hfi1_port_attr, attr);
+	struct hfi1_pportdata *ppd =
+		container_of(kobj, struct hfi1_pportdata, pport_cc_kobj);
+
+	return port_attr->store(ppd, buf, count);
+}
+
+static const struct sysfs_ops port_cc_sysfs_ops = {
+	.show = cc_attr_show,
+	.store = cc_attr_store
+};
+
+static struct attribute *port_cc_default_attributes[] = {
+	&cc_prescan_attr.attr
+};
+
+static struct kobj_type port_cc_ktype = {
+	.release = port_release,
+	.sysfs_ops = &port_cc_sysfs_ops,
+	.default_attrs = port_cc_default_attributes
+};
+
 /* Start sc2vl */
 #define HFI1_SC2VL_ATTR(N)				    \
 	static struct hfi1_sc2vl_attr hfi1_sc2vl_attr_##N = { \
@@ -196,7 +250,6 @@
 HFI1_SC2VL_ATTR(30);
 HFI1_SC2VL_ATTR(31);
 
-
 static struct attribute *sc2vl_default_attributes[] = {
 	&hfi1_sc2vl_attr_0.attr,
 	&hfi1_sc2vl_attr_1.attr,
@@ -302,7 +355,6 @@
 HFI1_SL2SC_ATTR(30);
 HFI1_SL2SC_ATTR(31);
 
-
 static struct attribute *sl2sc_default_attributes[] = {
 	&hfi1_sl2sc_attr_0.attr,
 	&hfi1_sl2sc_attr_1.attr,
@@ -435,7 +487,6 @@
 	.default_attrs = vl2mtu_default_attributes
 };
 
-
 /* end of per-port file structures and support code */
 
 /*
@@ -446,7 +497,7 @@
 			char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 
 	return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
 }
@@ -455,7 +506,7 @@
 			char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 	int ret;
 
@@ -470,19 +521,18 @@
 				 struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 
 	/* The string printed here is already newline-terminated. */
 	return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
 }
 
-
 static ssize_t show_nctxts(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 
 	/*
@@ -497,10 +547,10 @@
 }
 
 static ssize_t show_nfreectxts(struct device *device,
-			   struct device_attribute *attr, char *buf)
+			       struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 
 	/* Return the number of free user ports (contexts) available. */
@@ -511,11 +561,10 @@
 			   struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 
 	return scnprintf(buf, PAGE_SIZE, "%s", dd->serial);
-
 }
 
 static ssize_t store_chip_reset(struct device *device,
@@ -523,7 +572,7 @@
 				size_t count)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 	int ret;
 
@@ -552,7 +601,7 @@
 			      struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 	struct hfi1_temp temp;
 	int ret;
@@ -608,8 +657,8 @@
 
 	if (!port_num || port_num > dd->num_pports) {
 		dd_dev_err(dd,
-			"Skipping infiniband class with invalid port %u\n",
-			port_num);
+			   "Skipping infiniband class with invalid port %u\n",
+			   port_num);
 		return -ENODEV;
 	}
 	ppd = &dd->pport[port_num - 1];
@@ -644,39 +693,36 @@
 	}
 	kobject_uevent(&ppd->vl2mtu_kobj, KOBJ_ADD);
 
-
 	ret = kobject_init_and_add(&ppd->pport_cc_kobj, &port_cc_ktype,
 				   kobj, "CCMgtA");
 	if (ret) {
 		dd_dev_err(dd,
-		 "Skipping Congestion Control sysfs info, (err %d) port %u\n",
-		 ret, port_num);
+			   "Skipping Congestion Control sysfs info, (err %d) port %u\n",
+			   ret, port_num);
 		goto bail_vl2mtu;
 	}
 
 	kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD);
 
-	ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
-				&cc_setting_bin_attr);
+	ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, &cc_setting_bin_attr);
 	if (ret) {
 		dd_dev_err(dd,
-		 "Skipping Congestion Control setting sysfs info, (err %d) port %u\n",
-		 ret, port_num);
+			   "Skipping Congestion Control setting sysfs info, (err %d) port %u\n",
+			   ret, port_num);
 		goto bail_cc;
 	}
 
-	ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
-				&cc_table_bin_attr);
+	ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, &cc_table_bin_attr);
 	if (ret) {
 		dd_dev_err(dd,
-		 "Skipping Congestion Control table sysfs info, (err %d) port %u\n",
-		 ret, port_num);
+			   "Skipping Congestion Control table sysfs info, (err %d) port %u\n",
+			   ret, port_num);
 		goto bail_cc_entry_bin;
 	}
 
 	dd_dev_info(dd,
-		"IB%u: Congestion Control Agent enabled for port %d\n",
-		dd->unit, port_num);
+		    "IB%u: Congestion Control Agent enabled for port %d\n",
+		    dd->unit, port_num);
 
 	return 0;
 
@@ -700,7 +746,7 @@
  */
 int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
 {
-	struct ib_device *dev = &dd->verbs_dev.ibdev;
+	struct ib_device *dev = &dd->verbs_dev.rdi.ibdev;
 	int i, ret;
 
 	for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) {

diff --git a/drivers/staging/rdma/hfi1/trace.c b/drivers/staging/rdma/hfi1/trace.c
index 10122e8..8b62fef 100644
--- a/drivers/staging/rdma/hfi1/trace.c
+++ b/drivers/staging/rdma/hfi1/trace.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -109,17 +106,17 @@
 	case OP(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE):
 	case OP(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE):
 		trace_seq_printf(p, IMM_PRN,
-			be32_to_cpu(eh->imm_data));
+				 be32_to_cpu(eh->imm_data));
 		break;
 	/* reth + imm */
 	case OP(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE):
 	case OP(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE):
 		trace_seq_printf(p, RETH_PRN " " IMM_PRN,
-			(unsigned long long)ib_u64_get(
-				(__be32 *)&eh->rc.reth.vaddr),
-			be32_to_cpu(eh->rc.reth.rkey),
-			be32_to_cpu(eh->rc.reth.length),
-			be32_to_cpu(eh->rc.imm_data));
+				 (unsigned long long)ib_u64_get(
+				 (__be32 *)&eh->rc.reth.vaddr),
+				 be32_to_cpu(eh->rc.reth.rkey),
+				 be32_to_cpu(eh->rc.reth.length),
+				 be32_to_cpu(eh->rc.imm_data));
 		break;
 	/* reth */
 	case OP(RC, RDMA_READ_REQUEST):
@@ -128,10 +125,10 @@
 	case OP(RC, RDMA_WRITE_ONLY):
 	case OP(UC, RDMA_WRITE_ONLY):
 		trace_seq_printf(p, RETH_PRN,
-			(unsigned long long)ib_u64_get(
-				(__be32 *)&eh->rc.reth.vaddr),
-			be32_to_cpu(eh->rc.reth.rkey),
-			be32_to_cpu(eh->rc.reth.length));
+				 (unsigned long long)ib_u64_get(
+				 (__be32 *)&eh->rc.reth.vaddr),
+				 be32_to_cpu(eh->rc.reth.rkey),
+				 be32_to_cpu(eh->rc.reth.length));
 		break;
 	case OP(RC, RDMA_READ_RESPONSE_FIRST):
 	case OP(RC, RDMA_READ_RESPONSE_LAST):
@@ -154,19 +151,20 @@
 	case OP(RC, COMPARE_SWAP):
 	case OP(RC, FETCH_ADD):
 		trace_seq_printf(p, ATOMICETH_PRN,
-			(unsigned long long)ib_u64_get(eh->atomic_eth.vaddr),
-			eh->atomic_eth.rkey,
-			(unsigned long long)ib_u64_get(
-				(__be32 *)&eh->atomic_eth.swap_data),
-			(unsigned long long) ib_u64_get(
+				 (unsigned long long)ib_u64_get(
+				 eh->atomic_eth.vaddr),
+				 eh->atomic_eth.rkey,
+				 (unsigned long long)ib_u64_get(
+				 (__be32 *)&eh->atomic_eth.swap_data),
+				 (unsigned long long)ib_u64_get(
 				 (__be32 *)&eh->atomic_eth.compare_data));
 		break;
 	/* deth */
 	case OP(UD, SEND_ONLY):
 	case OP(UD, SEND_ONLY_WITH_IMMEDIATE):
 		trace_seq_printf(p, DETH_PRN,
-			be32_to_cpu(eh->ud.deth[0]),
-			be32_to_cpu(eh->ud.deth[1]) & HFI1_QPN_MASK);
+				 be32_to_cpu(eh->ud.deth[0]),
+				 be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK);
 		break;
 	}
 	trace_seq_putc(p, 0);
@@ -187,12 +185,12 @@
 	trace_seq_printf(p, "%s", flags);
 	if (desc0 & SDMA_DESC0_FIRST_DESC_FLAG)
 		trace_seq_printf(p, " amode:%u aidx:%u alen:%u",
-			(u8)((desc1 >> SDMA_DESC1_HEADER_MODE_SHIFT)
-				& SDMA_DESC1_HEADER_MODE_MASK),
-			(u8)((desc1 >> SDMA_DESC1_HEADER_INDEX_SHIFT)
-				& SDMA_DESC1_HEADER_INDEX_MASK),
-			(u8)((desc1 >> SDMA_DESC1_HEADER_DWS_SHIFT)
-				& SDMA_DESC1_HEADER_DWS_MASK));
+				 (u8)((desc1 >> SDMA_DESC1_HEADER_MODE_SHIFT) &
+				      SDMA_DESC1_HEADER_MODE_MASK),
+				 (u8)((desc1 >> SDMA_DESC1_HEADER_INDEX_SHIFT) &
+				      SDMA_DESC1_HEADER_INDEX_MASK),
+				 (u8)((desc1 >> SDMA_DESC1_HEADER_DWS_SHIFT) &
+				      SDMA_DESC1_HEADER_DWS_MASK));
 	return ret;
 }
 
@@ -234,3 +232,4 @@
 __hfi1_trace_fn(FIRMWARE);
 __hfi1_trace_fn(RCVCTRL);
 __hfi1_trace_fn(TID);
+__hfi1_trace_fn(MMU);

diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/staging/rdma/hfi1/trace.h
index 86c12eb..963dc94 100644
--- a/drivers/staging/rdma/hfi1/trace.h
+++ b/drivers/staging/rdma/hfi1/trace.h

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -76,304 +73,295 @@
 #define TRACE_SYSTEM hfi1_rx
 
 TRACE_EVENT(hfi1_rcvhdr,
-	TP_PROTO(struct hfi1_devdata *dd,
-		 u64 eflags,
-		 u32 ctxt,
-		 u32 etype,
-		 u32 hlen,
-		 u32 tlen,
-		 u32 updegr,
-		 u32 etail),
-	TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd)
-		__field(u64, eflags)
-		__field(u32, ctxt)
-		__field(u32, etype)
-		__field(u32, hlen)
-		__field(u32, tlen)
-		__field(u32, updegr)
-		__field(u32, etail)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd);
-		__entry->eflags = eflags;
-		__entry->ctxt = ctxt;
-		__entry->etype = etype;
-		__entry->hlen = hlen;
-		__entry->tlen = tlen;
-		__entry->updegr = updegr;
-		__entry->etail = etail;
-	),
-	TP_printk(
-"[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d",
-		__get_str(dev),
-		__entry->ctxt,
-		__entry->eflags,
-		__entry->etype, show_packettype(__entry->etype),
-		__entry->hlen,
-		__entry->tlen,
-		__entry->updegr,
-		__entry->etail
-	)
+	    TP_PROTO(struct hfi1_devdata *dd,
+		     u64 eflags,
+		     u32 ctxt,
+		     u32 etype,
+		     u32 hlen,
+		     u32 tlen,
+		     u32 updegr,
+		     u32 etail
+		     ),
+	    TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+			     __field(u64, eflags)
+			     __field(u32, ctxt)
+			     __field(u32, etype)
+			     __field(u32, hlen)
+			     __field(u32, tlen)
+			     __field(u32, updegr)
+			     __field(u32, etail)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(dd);
+			   __entry->eflags = eflags;
+			   __entry->ctxt = ctxt;
+			   __entry->etype = etype;
+			   __entry->hlen = hlen;
+			   __entry->tlen = tlen;
+			   __entry->updegr = updegr;
+			   __entry->etail = etail;
+			   ),
+	    TP_printk(
+		      "[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d",
+		      __get_str(dev),
+		      __entry->ctxt,
+		      __entry->eflags,
+		      __entry->etype, show_packettype(__entry->etype),
+		      __entry->hlen,
+		      __entry->tlen,
+		      __entry->updegr,
+		      __entry->etail
+		      )
 );
 
 TRACE_EVENT(hfi1_receive_interrupt,
-	TP_PROTO(struct hfi1_devdata *dd, u32 ctxt),
-	TP_ARGS(dd, ctxt),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd)
-		__field(u32, ctxt)
-		__field(u8, slow_path)
-		__field(u8, dma_rtail)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd);
-		__entry->ctxt = ctxt;
-		if (dd->rcd[ctxt]->do_interrupt ==
-		    &handle_receive_interrupt) {
-			__entry->slow_path = 1;
-			__entry->dma_rtail = 0xFF;
-		} else if (dd->rcd[ctxt]->do_interrupt ==
-			&handle_receive_interrupt_dma_rtail){
-			__entry->dma_rtail = 1;
-			__entry->slow_path = 0;
-		} else if (dd->rcd[ctxt]->do_interrupt ==
-			 &handle_receive_interrupt_nodma_rtail) {
-			__entry->dma_rtail = 0;
-			__entry->slow_path = 0;
-		}
-	),
-	TP_printk(
-		"[%s] ctxt %d SlowPath: %d DmaRtail: %d",
-		__get_str(dev),
-		__entry->ctxt,
-		__entry->slow_path,
-		__entry->dma_rtail
-	)
+	    TP_PROTO(struct hfi1_devdata *dd, u32 ctxt),
+	    TP_ARGS(dd, ctxt),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+			     __field(u32, ctxt)
+			     __field(u8, slow_path)
+			     __field(u8, dma_rtail)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(dd);
+			   __entry->ctxt = ctxt;
+			   if (dd->rcd[ctxt]->do_interrupt ==
+			       &handle_receive_interrupt) {
+				__entry->slow_path = 1;
+				__entry->dma_rtail = 0xFF;
+			   } else if (dd->rcd[ctxt]->do_interrupt ==
+				      &handle_receive_interrupt_dma_rtail){
+				__entry->dma_rtail = 1;
+				__entry->slow_path = 0;
+			   } else if (dd->rcd[ctxt]->do_interrupt ==
+				      &handle_receive_interrupt_nodma_rtail) {
+				__entry->dma_rtail = 0;
+				__entry->slow_path = 0;
+			   }
+			   ),
+	    TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d",
+		      __get_str(dev),
+		      __entry->ctxt,
+		      __entry->slow_path,
+		      __entry->dma_rtail
+		      )
 );
 
-const char *print_u64_array(struct trace_seq *, u64 *, int);
-
-TRACE_EVENT(hfi1_exp_tid_map,
-	    TP_PROTO(unsigned ctxt, u16 subctxt, int dir,
-		     unsigned long *maps, u16 count),
-	    TP_ARGS(ctxt, subctxt, dir, maps, count),
+TRACE_EVENT(hfi1_exp_tid_reg,
+	    TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr,
+		     u32 npages, unsigned long va, unsigned long pa,
+		     dma_addr_t dma),
+	    TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
 	    TP_STRUCT__entry(
 		    __field(unsigned, ctxt)
 		    __field(u16, subctxt)
-		    __field(int, dir)
-		    __field(u16, count)
-		    __dynamic_array(unsigned long, maps, sizeof(*maps) * count)
+		    __field(u32, rarr)
+		    __field(u32, npages)
+		    __field(unsigned long, va)
+		    __field(unsigned long, pa)
+		    __field(dma_addr_t, dma)
 		    ),
 	    TP_fast_assign(
 		    __entry->ctxt = ctxt;
 		    __entry->subctxt = subctxt;
-		    __entry->dir = dir;
-		    __entry->count = count;
-		    memcpy(__get_dynamic_array(maps), maps,
-			   sizeof(*maps) * count);
+		    __entry->rarr = rarr;
+		    __entry->npages = npages;
+		    __entry->va = va;
+		    __entry->pa = pa;
+		    __entry->dma = dma;
 		    ),
-	    TP_printk("[%3u:%02u] %s tidmaps %s",
+	    TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
 		      __entry->ctxt,
 		      __entry->subctxt,
-		      (__entry->dir ? ">" : "<"),
-		      print_u64_array(p, __get_dynamic_array(maps),
-				      __entry->count)
+		      __entry->rarr,
+		      __entry->npages,
+		      __entry->pa,
+		      __entry->va,
+		      __entry->dma
 		    )
 	);
 
-TRACE_EVENT(hfi1_exp_rcv_set,
-	    TP_PROTO(unsigned ctxt, u16 subctxt, u32 tid,
-		     unsigned long vaddr, u64 phys_addr, void *page),
-	    TP_ARGS(ctxt, subctxt, tid, vaddr, phys_addr, page),
+TRACE_EVENT(hfi1_exp_tid_unreg,
+	    TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr, u32 npages,
+		     unsigned long va, unsigned long pa, dma_addr_t dma),
+	    TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
 	    TP_STRUCT__entry(
 		    __field(unsigned, ctxt)
 		    __field(u16, subctxt)
-		    __field(u32, tid)
-		    __field(unsigned long, vaddr)
-		    __field(u64, phys_addr)
-		    __field(void *, page)
+		    __field(u32, rarr)
+		    __field(u32, npages)
+		    __field(unsigned long, va)
+		    __field(unsigned long, pa)
+		    __field(dma_addr_t, dma)
 		    ),
 	    TP_fast_assign(
 		    __entry->ctxt = ctxt;
 		    __entry->subctxt = subctxt;
-		    __entry->tid = tid;
-		    __entry->vaddr = vaddr;
-		    __entry->phys_addr = phys_addr;
-		    __entry->page = page;
+		    __entry->rarr = rarr;
+		    __entry->npages = npages;
+		    __entry->va = va;
+		    __entry->pa = pa;
+		    __entry->dma = dma;
 		    ),
-	    TP_printk("[%u:%u] TID %u, vaddrs 0x%lx, physaddr 0x%llx, pgp %p",
+	    TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
 		      __entry->ctxt,
 		      __entry->subctxt,
-		      __entry->tid,
-		      __entry->vaddr,
-		      __entry->phys_addr,
-		      __entry->page
+		      __entry->rarr,
+		      __entry->npages,
+		      __entry->pa,
+		      __entry->va,
+		      __entry->dma
 		    )
 	);
 
-TRACE_EVENT(hfi1_exp_rcv_free,
-	    TP_PROTO(unsigned ctxt, u16 subctxt, u32 tid,
-		     unsigned long phys, void *page),
-	    TP_ARGS(ctxt, subctxt, tid, phys, page),
+TRACE_EVENT(hfi1_exp_tid_inval,
+	    TP_PROTO(unsigned ctxt, u16 subctxt, unsigned long va, u32 rarr,
+		     u32 npages, dma_addr_t dma),
+	    TP_ARGS(ctxt, subctxt, va, rarr, npages, dma),
 	    TP_STRUCT__entry(
 		    __field(unsigned, ctxt)
 		    __field(u16, subctxt)
-		    __field(u32, tid)
-		    __field(unsigned long, phys)
-		    __field(void *, page)
+		    __field(unsigned long, va)
+		    __field(u32, rarr)
+		    __field(u32, npages)
+		    __field(dma_addr_t, dma)
 		    ),
 	    TP_fast_assign(
 		    __entry->ctxt = ctxt;
 		    __entry->subctxt = subctxt;
-		    __entry->tid = tid;
-		    __entry->phys = phys;
-		    __entry->page = page;
+		    __entry->va = va;
+		    __entry->rarr = rarr;
+		    __entry->npages = npages;
+		    __entry->dma = dma;
 		    ),
-	    TP_printk("[%u:%u] freeing TID %u, 0x%lx, pgp %p",
+	    TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx",
 		      __entry->ctxt,
 		      __entry->subctxt,
-		      __entry->tid,
-		      __entry->phys,
-		      __entry->page
+		      __entry->rarr,
+		      __entry->npages,
+		      __entry->va,
+		      __entry->dma
 		    )
 	);
+
+TRACE_EVENT(hfi1_mmu_invalidate,
+	    TP_PROTO(unsigned ctxt, u16 subctxt, const char *type,
+		     unsigned long start, unsigned long end),
+	    TP_ARGS(ctxt, subctxt, type, start, end),
+	    TP_STRUCT__entry(
+		    __field(unsigned, ctxt)
+		    __field(u16, subctxt)
+		    __string(type, type)
+		    __field(unsigned long, start)
+		    __field(unsigned long, end)
+		    ),
+	    TP_fast_assign(
+		    __entry->ctxt = ctxt;
+		    __entry->subctxt = subctxt;
+		    __assign_str(type, type);
+		    __entry->start = start;
+		    __entry->end = end;
+		    ),
+	    TP_printk("[%3u:%02u] MMU Invalidate (%s) 0x%lx - 0x%lx",
+		      __entry->ctxt,
+		      __entry->subctxt,
+		      __get_str(type),
+		      __entry->start,
+		      __entry->end
+		    )
+	);
+
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM hfi1_tx
 
 TRACE_EVENT(hfi1_piofree,
-	TP_PROTO(struct send_context *sc, int extra),
-	TP_ARGS(sc, extra),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sc->dd)
-		__field(u32, sw_index)
-		__field(u32, hw_context)
-		__field(int, extra)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sc->dd);
-		__entry->sw_index = sc->sw_index;
-		__entry->hw_context = sc->hw_context;
-		__entry->extra = extra;
-	),
-	TP_printk(
-		"[%s] ctxt %u(%u) extra %d",
-		__get_str(dev),
-		__entry->sw_index,
-		__entry->hw_context,
-		__entry->extra
-	)
+	    TP_PROTO(struct send_context *sc, int extra),
+	    TP_ARGS(sc, extra),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd)
+			     __field(u32, sw_index)
+			     __field(u32, hw_context)
+			     __field(int, extra)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(sc->dd);
+			   __entry->sw_index = sc->sw_index;
+			   __entry->hw_context = sc->hw_context;
+			   __entry->extra = extra;
+			   ),
+	    TP_printk("[%s] ctxt %u(%u) extra %d",
+		      __get_str(dev),
+		      __entry->sw_index,
+		      __entry->hw_context,
+		      __entry->extra
+		      )
 );
 
 TRACE_EVENT(hfi1_wantpiointr,
-	TP_PROTO(struct send_context *sc, u32 needint, u64 credit_ctrl),
-	TP_ARGS(sc, needint, credit_ctrl),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sc->dd)
-		__field(u32, sw_index)
-		__field(u32, hw_context)
-		__field(u32, needint)
-		__field(u64, credit_ctrl)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sc->dd);
-		__entry->sw_index = sc->sw_index;
-		__entry->hw_context = sc->hw_context;
-		__entry->needint = needint;
-		__entry->credit_ctrl = credit_ctrl;
-	),
-	TP_printk(
-		"[%s] ctxt %u(%u) on %d credit_ctrl 0x%llx",
-		__get_str(dev),
-		__entry->sw_index,
-		__entry->hw_context,
-		__entry->needint,
-		(unsigned long long)__entry->credit_ctrl
-	)
+	    TP_PROTO(struct send_context *sc, u32 needint, u64 credit_ctrl),
+	    TP_ARGS(sc, needint, credit_ctrl),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd)
+			     __field(u32, sw_index)
+			     __field(u32, hw_context)
+			     __field(u32, needint)
+			     __field(u64, credit_ctrl)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(sc->dd);
+			   __entry->sw_index = sc->sw_index;
+			   __entry->hw_context = sc->hw_context;
+			   __entry->needint = needint;
+			   __entry->credit_ctrl = credit_ctrl;
+			   ),
+	    TP_printk("[%s] ctxt %u(%u) on %d credit_ctrl 0x%llx",
+		      __get_str(dev),
+		      __entry->sw_index,
+		      __entry->hw_context,
+		      __entry->needint,
+		      (unsigned long long)__entry->credit_ctrl
+		       )
 );
 
 DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template,
-	TP_PROTO(struct hfi1_qp *qp, u32 flags),
-	TP_ARGS(qp, flags),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
-		__field(u32, qpn)
-		__field(u32, flags)
-		__field(u32, s_flags)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
-		__entry->flags = flags;
-		__entry->qpn = qp->ibqp.qp_num;
-		__entry->s_flags = qp->s_flags;
-	),
-	TP_printk(
-		"[%s] qpn 0x%x flags 0x%x s_flags 0x%x",
-		__get_str(dev),
-		__entry->qpn,
-		__entry->flags,
-		__entry->s_flags
-	)
+		    TP_PROTO(struct rvt_qp *qp, u32 flags),
+		    TP_ARGS(qp, flags),
+		    TP_STRUCT__entry(
+			    DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+			    __field(u32, qpn)
+			    __field(u32, flags)
+			    __field(u32, s_flags)
+			    ),
+		    TP_fast_assign(
+			    DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+			    __entry->flags = flags;
+			    __entry->qpn = qp->ibqp.qp_num;
+			    __entry->s_flags = qp->s_flags;
+			    ),
+		    TP_printk(
+			    "[%s] qpn 0x%x flags 0x%x s_flags 0x%x",
+			    __get_str(dev),
+			    __entry->qpn,
+			    __entry->flags,
+			    __entry->s_flags
+			    )
 );
 
 DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpwakeup,
-	     TP_PROTO(struct hfi1_qp *qp, u32 flags),
+	     TP_PROTO(struct rvt_qp *qp, u32 flags),
 	     TP_ARGS(qp, flags));
 
 DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpsleep,
-	     TP_PROTO(struct hfi1_qp *qp, u32 flags),
+	     TP_PROTO(struct rvt_qp *qp, u32 flags),
 	     TP_ARGS(qp, flags));
 
 #undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_qphash
-DECLARE_EVENT_CLASS(hfi1_qphash_template,
-	TP_PROTO(struct hfi1_qp *qp, u32 bucket),
-	TP_ARGS(qp, bucket),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
-		__field(u32, qpn)
-		__field(u32, bucket)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
-		__entry->qpn = qp->ibqp.qp_num;
-		__entry->bucket = bucket;
-	),
-	TP_printk(
-		"[%s] qpn 0x%x bucket %u",
-		__get_str(dev),
-		__entry->qpn,
-		__entry->bucket
-	)
-);
-
-DEFINE_EVENT(hfi1_qphash_template, hfi1_qpinsert,
-	TP_PROTO(struct hfi1_qp *qp, u32 bucket),
-	TP_ARGS(qp, bucket));
-
-DEFINE_EVENT(hfi1_qphash_template, hfi1_qpremove,
-	TP_PROTO(struct hfi1_qp *qp, u32 bucket),
-	TP_ARGS(qp, bucket));
-
-#undef TRACE_SYSTEM
 #define TRACE_SYSTEM hfi1_ibhdrs
 
 u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr);
-const char *parse_everbs_hdrs(
-	struct trace_seq *p,
-	u8 opcode,
-	void *ehdrs);
+const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs);
 
 #define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs)
 
-const char *parse_sdma_flags(
-	struct trace_seq *p,
-	u64 desc0, u64 desc1);
+const char *parse_sdma_flags(struct trace_seq *p, u64 desc0, u64 desc1);
 
 #define __parse_sdma_flags(desc0, desc1) parse_sdma_flags(p, desc0, desc1)
 
-
 #define lrh_name(lrh) { HFI1_##lrh, #lrh }
 #define show_lnh(lrh)                    \
 __print_symbolic(lrh,                    \
@@ -420,7 +408,6 @@
 	ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE),       \
 	ib_opcode_name(CNP))
 
-
 #define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x"
 #define BTH_PRN \
 	"op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \
@@ -428,124 +415,130 @@
 #define EHDR_PRN "%s"
 
 DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
-	TP_PROTO(struct hfi1_devdata *dd,
-		 struct hfi1_ib_header *hdr),
-	TP_ARGS(dd, hdr),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd)
-		/* LRH */
-		__field(u8, vl)
-		__field(u8, lver)
-		__field(u8, sl)
-		__field(u8, lnh)
-		__field(u16, dlid)
-		__field(u16, len)
-		__field(u16, slid)
-		/* BTH */
-		__field(u8, opcode)
-		__field(u8, se)
-		__field(u8, m)
-		__field(u8, pad)
-		__field(u8, tver)
-		__field(u16, pkey)
-		__field(u8, f)
-		__field(u8, b)
-		__field(u32, qpn)
-		__field(u8, a)
-		__field(u32, psn)
-		/* extended headers */
-		__dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
-	),
-	TP_fast_assign(
-		struct hfi1_other_headers *ohdr;
+		    TP_PROTO(struct hfi1_devdata *dd,
+			     struct hfi1_ib_header *hdr),
+		    TP_ARGS(dd, hdr),
+		    TP_STRUCT__entry(
+			    DD_DEV_ENTRY(dd)
+			    /* LRH */
+			    __field(u8, vl)
+			    __field(u8, lver)
+			    __field(u8, sl)
+			    __field(u8, lnh)
+			    __field(u16, dlid)
+			    __field(u16, len)
+			    __field(u16, slid)
+			    /* BTH */
+			    __field(u8, opcode)
+			    __field(u8, se)
+			    __field(u8, m)
+			    __field(u8, pad)
+			    __field(u8, tver)
+			    __field(u16, pkey)
+			    __field(u8, f)
+			    __field(u8, b)
+			    __field(u32, qpn)
+			    __field(u8, a)
+			    __field(u32, psn)
+			    /* extended headers */
+			    __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
+			    ),
+		    TP_fast_assign(
+			   struct hfi1_other_headers *ohdr;
 
-		DD_DEV_ASSIGN(dd);
-		/* LRH */
-		__entry->vl =
-			(u8)(be16_to_cpu(hdr->lrh[0]) >> 12);
-		__entry->lver =
-			(u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf;
-		__entry->sl =
-			(u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
-		__entry->lnh =
-			(u8)(be16_to_cpu(hdr->lrh[0]) & 3);
-		__entry->dlid =
-			be16_to_cpu(hdr->lrh[1]);
-		/* allow for larger len */
-		__entry->len =
-			be16_to_cpu(hdr->lrh[2]);
-		__entry->slid =
-			be16_to_cpu(hdr->lrh[3]);
-		/* BTH */
-		if (__entry->lnh == HFI1_LRH_BTH)
-			ohdr = &hdr->u.oth;
-		else
-			ohdr = &hdr->u.l.oth;
-		__entry->opcode =
-			(be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
-		__entry->se =
-			(be32_to_cpu(ohdr->bth[0]) >> 23) & 1;
-		__entry->m =
-			 (be32_to_cpu(ohdr->bth[0]) >> 22) & 1;
-		__entry->pad =
-			(be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-		__entry->tver =
-			(be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf;
-		__entry->pkey =
-			be32_to_cpu(ohdr->bth[0]) & 0xffff;
-		__entry->f =
-			(be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT)
-			& HFI1_FECN_MASK;
-		__entry->b =
-			(be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT)
-			& HFI1_BECN_MASK;
-		__entry->qpn =
-			be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK;
-		__entry->a =
-			(be32_to_cpu(ohdr->bth[2]) >> 31) & 1;
-		/* allow for larger PSN */
-		__entry->psn =
-			be32_to_cpu(ohdr->bth[2]) & 0x7fffffff;
-		/* extended headers */
-		 memcpy(
-			__get_dynamic_array(ehdrs),
-			&ohdr->u,
-			ibhdr_exhdr_len(hdr));
-	),
-	TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN,
-		__get_str(dev),
-		/* LRH */
-		__entry->vl,
-		__entry->lver,
-		__entry->sl,
-		__entry->lnh, show_lnh(__entry->lnh),
-		__entry->dlid,
-		__entry->len,
-		__entry->slid,
-		/* BTH */
-		__entry->opcode, show_ib_opcode(__entry->opcode),
-		__entry->se,
-		__entry->m,
-		__entry->pad,
-		__entry->tver,
-		__entry->pkey,
-		__entry->f,
-		__entry->b,
-		__entry->qpn,
-		__entry->a,
-		__entry->psn,
-		/* extended headers */
-		__parse_ib_ehdrs(
-			__entry->opcode,
-			(void *)__get_dynamic_array(ehdrs))
-	)
+			   DD_DEV_ASSIGN(dd);
+			   /* LRH */
+			   __entry->vl =
+			   (u8)(be16_to_cpu(hdr->lrh[0]) >> 12);
+			   __entry->lver =
+			   (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf;
+			   __entry->sl =
+			   (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
+			   __entry->lnh =
+			   (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
+			   __entry->dlid =
+			   be16_to_cpu(hdr->lrh[1]);
+			   /* allow for larger len */
+			   __entry->len =
+			   be16_to_cpu(hdr->lrh[2]);
+			   __entry->slid =
+			   be16_to_cpu(hdr->lrh[3]);
+			   /* BTH */
+			   if (__entry->lnh == HFI1_LRH_BTH)
+				ohdr = &hdr->u.oth;
+			   else
+				ohdr = &hdr->u.l.oth;
+			  __entry->opcode =
+			  (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
+			  __entry->se =
+			  (be32_to_cpu(ohdr->bth[0]) >> 23) & 1;
+			  __entry->m =
+			  (be32_to_cpu(ohdr->bth[0]) >> 22) & 1;
+			  __entry->pad =
+			  (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+			  __entry->tver =
+			  (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf;
+			  __entry->pkey =
+			  be32_to_cpu(ohdr->bth[0]) & 0xffff;
+			  __entry->f =
+			  (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) &
+			  HFI1_FECN_MASK;
+			  __entry->b =
+			  (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) &
+			  HFI1_BECN_MASK;
+			  __entry->qpn =
+			  be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
+			  __entry->a =
+			  (be32_to_cpu(ohdr->bth[2]) >> 31) & 1;
+			  /* allow for larger PSN */
+			  __entry->psn =
+			  be32_to_cpu(ohdr->bth[2]) & 0x7fffffff;
+			  /* extended headers */
+			  memcpy(__get_dynamic_array(ehdrs), &ohdr->u,
+				 ibhdr_exhdr_len(hdr));
+			 ),
+		    TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN,
+			      __get_str(dev),
+			      /* LRH */
+			      __entry->vl,
+			      __entry->lver,
+			      __entry->sl,
+			      __entry->lnh, show_lnh(__entry->lnh),
+			      __entry->dlid,
+			      __entry->len,
+			      __entry->slid,
+			      /* BTH */
+			      __entry->opcode, show_ib_opcode(__entry->opcode),
+			      __entry->se,
+			      __entry->m,
+			      __entry->pad,
+			      __entry->tver,
+			      __entry->pkey,
+			      __entry->f,
+			      __entry->b,
+			      __entry->qpn,
+			      __entry->a,
+			      __entry->psn,
+			      /* extended headers */
+			      __parse_ib_ehdrs(
+					__entry->opcode,
+					(void *)__get_dynamic_array(ehdrs))
+			     )
 );
 
 DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr,
 	     TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
 	     TP_ARGS(dd, hdr));
 
-DEFINE_EVENT(hfi1_ibhdr_template, output_ibhdr,
+DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr,
+	     TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+	     TP_ARGS(dd, hdr));
+
+DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr,
+	     TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+	     TP_ARGS(dd, hdr));
+
+DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr,
 	     TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
 	     TP_ARGS(dd, hdr));
 
@@ -556,15 +549,14 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM hfi1_snoop
 
-
 TRACE_EVENT(snoop_capture,
-	TP_PROTO(struct hfi1_devdata *dd,
-		 int hdr_len,
-		 struct hfi1_ib_header *hdr,
-		 int data_len,
-		 void *data),
-	TP_ARGS(dd, hdr_len, hdr, data_len, data),
-	TP_STRUCT__entry(
+	    TP_PROTO(struct hfi1_devdata *dd,
+		     int hdr_len,
+		     struct hfi1_ib_header *hdr,
+		     int data_len,
+		     void *data),
+	    TP_ARGS(dd, hdr_len, hdr, data_len, data),
+	    TP_STRUCT__entry(
 		DD_DEV_ENTRY(dd)
 		__field(u16, slid)
 		__field(u16, dlid)
@@ -577,8 +569,8 @@
 		__field(u8, lnh)
 		__dynamic_array(u8, raw_hdr, hdr_len)
 		__dynamic_array(u8, raw_pkt, data_len)
-	),
-	TP_fast_assign(
+		),
+	    TP_fast_assign(
 		struct hfi1_other_headers *ohdr;
 
 		__entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
@@ -589,7 +581,7 @@
 		DD_DEV_ASSIGN(dd);
 		__entry->slid = be16_to_cpu(hdr->lrh[3]);
 		__entry->dlid = be16_to_cpu(hdr->lrh[1]);
-		__entry->qpn = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK;
+		__entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
 		__entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
 		__entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
 		__entry->pkey =	be32_to_cpu(ohdr->bth[0]) & 0xffff;
@@ -597,8 +589,9 @@
 		__entry->data_len = data_len;
 		memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len);
 		memcpy(__get_dynamic_array(raw_pkt), data, data_len);
-	),
-	TP_printk("[%s] " SNOOP_PRN,
+		),
+	    TP_printk(
+		"[%s] " SNOOP_PRN,
 		__get_str(dev),
 		__entry->slid,
 		__entry->dlid,
@@ -609,7 +602,7 @@
 		__entry->pkey,
 		__entry->hdr_len,
 		__entry->data_len
-	)
+		)
 );
 
 #undef TRACE_SYSTEM
@@ -621,41 +614,39 @@
 TRACE_EVENT(hfi1_uctxtdata,
 	    TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt),
 	    TP_ARGS(dd, uctxt),
-	    TP_STRUCT__entry(
-		    DD_DEV_ENTRY(dd)
-		    __field(unsigned, ctxt)
-		    __field(u32, credits)
-		    __field(u64, hw_free)
-		    __field(u64, piobase)
-		    __field(u16, rcvhdrq_cnt)
-		    __field(u64, rcvhdrq_phys)
-		    __field(u32, eager_cnt)
-		    __field(u64, rcvegr_phys)
-		    ),
-	    TP_fast_assign(
-		    DD_DEV_ASSIGN(dd);
-		    __entry->ctxt = uctxt->ctxt;
-		    __entry->credits = uctxt->sc->credits;
-		    __entry->hw_free = (u64)uctxt->sc->hw_free;
-		    __entry->piobase = (u64)uctxt->sc->base_addr;
-		    __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
-		    __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys;
-		    __entry->eager_cnt = uctxt->egrbufs.alloced;
-		    __entry->rcvegr_phys = uctxt->egrbufs.rcvtids[0].phys;
-		    ),
-	    TP_printk(
-		    "[%s] ctxt %u " UCTXT_FMT,
-		    __get_str(dev),
-		    __entry->ctxt,
-		    __entry->credits,
-		    __entry->hw_free,
-		    __entry->piobase,
-		    __entry->rcvhdrq_cnt,
-		    __entry->rcvhdrq_phys,
-		    __entry->eager_cnt,
-		    __entry->rcvegr_phys
-		    )
-	);
+	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+			     __field(unsigned, ctxt)
+			     __field(u32, credits)
+			     __field(u64, hw_free)
+			     __field(u64, piobase)
+			     __field(u16, rcvhdrq_cnt)
+			     __field(u64, rcvhdrq_phys)
+			     __field(u32, eager_cnt)
+			     __field(u64, rcvegr_phys)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(dd);
+			   __entry->ctxt = uctxt->ctxt;
+			   __entry->credits = uctxt->sc->credits;
+			   __entry->hw_free = (u64)uctxt->sc->hw_free;
+			   __entry->piobase = (u64)uctxt->sc->base_addr;
+			   __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
+			   __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys;
+			   __entry->eager_cnt = uctxt->egrbufs.alloced;
+			   __entry->rcvegr_phys =
+			   uctxt->egrbufs.rcvtids[0].phys;
+			   ),
+	    TP_printk("[%s] ctxt %u " UCTXT_FMT,
+		      __get_str(dev),
+		      __entry->ctxt,
+		      __entry->credits,
+		      __entry->hw_free,
+		      __entry->piobase,
+		      __entry->rcvhdrq_cnt,
+		      __entry->rcvhdrq_phys,
+		      __entry->eager_cnt,
+		      __entry->rcvegr_phys
+		      )
+);
 
 #define CINFO_FMT \
 	"egrtids:%u, egr_size:%u, hdrq_cnt:%u, hdrq_size:%u, sdma_ring_size:%u"
@@ -663,38 +654,35 @@
 	    TP_PROTO(struct hfi1_devdata *dd, unsigned ctxt, unsigned subctxt,
 		     struct hfi1_ctxt_info cinfo),
 	    TP_ARGS(dd, ctxt, subctxt, cinfo),
-	    TP_STRUCT__entry(
-		    DD_DEV_ENTRY(dd)
-		    __field(unsigned, ctxt)
-		    __field(unsigned, subctxt)
-		    __field(u16, egrtids)
-		    __field(u16, rcvhdrq_cnt)
-		    __field(u16, rcvhdrq_size)
-		    __field(u16, sdma_ring_size)
-		    __field(u32, rcvegr_size)
-		    ),
-	    TP_fast_assign(
-		    DD_DEV_ASSIGN(dd);
-		    __entry->ctxt = ctxt;
-		    __entry->subctxt = subctxt;
-		    __entry->egrtids = cinfo.egrtids;
-		    __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt;
-		    __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize;
-		    __entry->sdma_ring_size = cinfo.sdma_ring_size;
-		    __entry->rcvegr_size = cinfo.rcvegr_size;
-		    ),
-	    TP_printk(
-		    "[%s] ctxt %u:%u " CINFO_FMT,
-		    __get_str(dev),
-		    __entry->ctxt,
-		    __entry->subctxt,
-		    __entry->egrtids,
-		    __entry->rcvegr_size,
-		    __entry->rcvhdrq_cnt,
-		    __entry->rcvhdrq_size,
-		    __entry->sdma_ring_size
-		    )
-	);
+	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+			     __field(unsigned, ctxt)
+			     __field(unsigned, subctxt)
+			     __field(u16, egrtids)
+			     __field(u16, rcvhdrq_cnt)
+			     __field(u16, rcvhdrq_size)
+			     __field(u16, sdma_ring_size)
+			     __field(u32, rcvegr_size)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(dd);
+			    __entry->ctxt = ctxt;
+			    __entry->subctxt = subctxt;
+			    __entry->egrtids = cinfo.egrtids;
+			    __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt;
+			    __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize;
+			    __entry->sdma_ring_size = cinfo.sdma_ring_size;
+			    __entry->rcvegr_size = cinfo.rcvegr_size;
+			    ),
+	    TP_printk("[%s] ctxt %u:%u " CINFO_FMT,
+		      __get_str(dev),
+		      __entry->ctxt,
+		      __entry->subctxt,
+		      __entry->egrtids,
+		      __entry->rcvegr_size,
+		      __entry->rcvhdrq_cnt,
+		      __entry->rcvhdrq_size,
+		      __entry->sdma_ring_size
+		      )
+);
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM hfi1_sma
@@ -708,52 +696,48 @@
 	)
 
 DECLARE_EVENT_CLASS(hfi1_bct_template,
-	TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
-	TP_ARGS(dd, bc),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd)
-		__dynamic_array(u8, bct, sizeof(*bc))
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd);
-		memcpy(
-			__get_dynamic_array(bct),
-			bc,
-			sizeof(*bc));
-	),
-	TP_printk(BCT_FORMAT,
-		BCT(overall_shared_limit),
+		    TP_PROTO(struct hfi1_devdata *dd,
+			     struct buffer_control *bc),
+		    TP_ARGS(dd, bc),
+		    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+				     __dynamic_array(u8, bct, sizeof(*bc))
+				     ),
+		    TP_fast_assign(DD_DEV_ASSIGN(dd);
+				   memcpy(__get_dynamic_array(bct), bc,
+					  sizeof(*bc));
+				   ),
+		    TP_printk(BCT_FORMAT,
+			      BCT(overall_shared_limit),
 
-		BCT(vl[0].dedicated),
-		BCT(vl[0].shared),
+			      BCT(vl[0].dedicated),
+			      BCT(vl[0].shared),
 
-		BCT(vl[1].dedicated),
-		BCT(vl[1].shared),
+			      BCT(vl[1].dedicated),
+			      BCT(vl[1].shared),
 
-		BCT(vl[2].dedicated),
-		BCT(vl[2].shared),
+			      BCT(vl[2].dedicated),
+			      BCT(vl[2].shared),
 
-		BCT(vl[3].dedicated),
-		BCT(vl[3].shared),
+			      BCT(vl[3].dedicated),
+			      BCT(vl[3].shared),
 
-		BCT(vl[4].dedicated),
-		BCT(vl[4].shared),
+			      BCT(vl[4].dedicated),
+			      BCT(vl[4].shared),
 
-		BCT(vl[5].dedicated),
-		BCT(vl[5].shared),
+			      BCT(vl[5].dedicated),
+			      BCT(vl[5].shared),
 
-		BCT(vl[6].dedicated),
-		BCT(vl[6].shared),
+			      BCT(vl[6].dedicated),
+			      BCT(vl[6].shared),
 
-		BCT(vl[7].dedicated),
-		BCT(vl[7].shared),
+			      BCT(vl[7].dedicated),
+			      BCT(vl[7].shared),
 
-		BCT(vl[15].dedicated),
-		BCT(vl[15].shared)
-	)
+			      BCT(vl[15].dedicated),
+			      BCT(vl[15].shared)
+			      )
 );
 
-
 DEFINE_EVENT(hfi1_bct_template, bct_set,
 	     TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
 	     TP_ARGS(dd, bc));
@@ -766,252 +750,209 @@
 #define TRACE_SYSTEM hfi1_sdma
 
 TRACE_EVENT(hfi1_sdma_descriptor,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		u64 desc0,
-		u64 desc1,
-		u16 e,
-		void *descp),
+	    TP_PROTO(struct sdma_engine *sde,
+		     u64 desc0,
+		     u64 desc1,
+		     u16 e,
+		     void *descp),
 	TP_ARGS(sde, desc0, desc1, e, descp),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__field(void *, descp)
-		__field(u64, desc0)
-		__field(u64, desc1)
-		__field(u16, e)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__entry->desc0 = desc0;
-		__entry->desc1 = desc1;
-		__entry->idx = sde->this_idx;
-		__entry->descp = descp;
-		__entry->e = e;
-	),
+	TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+			 __field(void *, descp)
+			 __field(u64, desc0)
+			 __field(u64, desc1)
+			 __field(u16, e)
+			 __field(u8, idx)
+			 ),
+	TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+		       __entry->desc0 = desc0;
+		       __entry->desc1 = desc1;
+		       __entry->idx = sde->this_idx;
+		       __entry->descp = descp;
+		       __entry->e = e;
+		       ),
 	TP_printk(
-		"[%s] SDE(%u) flags:%s addr:0x%016llx gen:%u len:%u d0:%016llx d1:%016llx to %p,%u",
-		__get_str(dev),
-		__entry->idx,
-		__parse_sdma_flags(__entry->desc0, __entry->desc1),
-		(__entry->desc0 >> SDMA_DESC0_PHY_ADDR_SHIFT)
-			& SDMA_DESC0_PHY_ADDR_MASK,
-		(u8)((__entry->desc1 >> SDMA_DESC1_GENERATION_SHIFT)
-			& SDMA_DESC1_GENERATION_MASK),
-		(u16)((__entry->desc0 >> SDMA_DESC0_BYTE_COUNT_SHIFT)
-			& SDMA_DESC0_BYTE_COUNT_MASK),
-		__entry->desc0,
-		__entry->desc1,
-		__entry->descp,
-		__entry->e
-	)
+		  "[%s] SDE(%u) flags:%s addr:0x%016llx gen:%u len:%u d0:%016llx d1:%016llx to %p,%u",
+		  __get_str(dev),
+		  __entry->idx,
+		  __parse_sdma_flags(__entry->desc0, __entry->desc1),
+		  (__entry->desc0 >> SDMA_DESC0_PHY_ADDR_SHIFT) &
+		  SDMA_DESC0_PHY_ADDR_MASK,
+		  (u8)((__entry->desc1 >> SDMA_DESC1_GENERATION_SHIFT) &
+		       SDMA_DESC1_GENERATION_MASK),
+		  (u16)((__entry->desc0 >> SDMA_DESC0_BYTE_COUNT_SHIFT) &
+			SDMA_DESC0_BYTE_COUNT_MASK),
+		  __entry->desc0,
+		  __entry->desc1,
+		  __entry->descp,
+		  __entry->e
+		  )
 );
 
 TRACE_EVENT(hfi1_sdma_engine_select,
-	TP_PROTO(struct hfi1_devdata *dd, u32 sel, u8 vl, u8 idx),
-	TP_ARGS(dd, sel, vl, idx),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd)
-		__field(u32, sel)
-		__field(u8, vl)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd);
-		__entry->sel = sel;
-		__entry->vl = vl;
-		__entry->idx = idx;
-	),
-	TP_printk(
-		"[%s] selecting SDE %u sel 0x%x vl %u",
-		__get_str(dev),
-		__entry->idx,
-		__entry->sel,
-		__entry->vl
-	)
+	    TP_PROTO(struct hfi1_devdata *dd, u32 sel, u8 vl, u8 idx),
+	    TP_ARGS(dd, sel, vl, idx),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+			     __field(u32, sel)
+			     __field(u8, vl)
+			     __field(u8, idx)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(dd);
+			   __entry->sel = sel;
+			   __entry->vl = vl;
+			   __entry->idx = idx;
+			   ),
+	    TP_printk("[%s] selecting SDE %u sel 0x%x vl %u",
+		      __get_str(dev),
+		      __entry->idx,
+		      __entry->sel,
+		      __entry->vl
+		      )
 );
 
 DECLARE_EVENT_CLASS(hfi1_sdma_engine_class,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		u64 status
-	),
-	TP_ARGS(sde, status),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__field(u64, status)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__entry->status = status;
-		__entry->idx = sde->this_idx;
-	),
-	TP_printk(
-		"[%s] SDE(%u) status %llx",
-		__get_str(dev),
-		__entry->idx,
-		(unsigned long long)__entry->status
-	)
+		    TP_PROTO(struct sdma_engine *sde, u64 status),
+		    TP_ARGS(sde, status),
+		    TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+				     __field(u64, status)
+				     __field(u8, idx)
+				     ),
+		    TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+				   __entry->status = status;
+				   __entry->idx = sde->this_idx;
+				   ),
+		    TP_printk("[%s] SDE(%u) status %llx",
+			      __get_str(dev),
+			      __entry->idx,
+			      (unsigned long long)__entry->status
+			      )
 );
 
 DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_interrupt,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		u64 status
-	),
-	TP_ARGS(sde, status)
+	     TP_PROTO(struct sdma_engine *sde, u64 status),
+	     TP_ARGS(sde, status)
 );
 
 DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_progress,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		u64 status
-	),
-	TP_ARGS(sde, status)
+	     TP_PROTO(struct sdma_engine *sde, u64 status),
+	     TP_ARGS(sde, status)
 );
 
 DECLARE_EVENT_CLASS(hfi1_sdma_ahg_ad,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		int aidx
-	),
-	TP_ARGS(sde, aidx),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__field(int, aidx)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__entry->idx = sde->this_idx;
-		__entry->aidx = aidx;
-	),
-	TP_printk(
-		"[%s] SDE(%u) aidx %d",
-		__get_str(dev),
-		__entry->idx,
-		__entry->aidx
-	)
+		    TP_PROTO(struct sdma_engine *sde, int aidx),
+		    TP_ARGS(sde, aidx),
+		    TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+				     __field(int, aidx)
+				     __field(u8, idx)
+				     ),
+		    TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+				   __entry->idx = sde->this_idx;
+				   __entry->aidx = aidx;
+				   ),
+		    TP_printk("[%s] SDE(%u) aidx %d",
+			      __get_str(dev),
+			      __entry->idx,
+			      __entry->aidx
+			      )
 );
 
 DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_allocate,
-	     TP_PROTO(
-		struct sdma_engine *sde,
-		int aidx
-	     ),
+	     TP_PROTO(struct sdma_engine *sde, int aidx),
 	     TP_ARGS(sde, aidx));
 
 DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_deallocate,
-	     TP_PROTO(
-		struct sdma_engine *sde,
-		int aidx
-	     ),
+	     TP_PROTO(struct sdma_engine *sde, int aidx),
 	     TP_ARGS(sde, aidx));
 
 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
 TRACE_EVENT(hfi1_sdma_progress,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		u16 hwhead,
-		u16 swhead,
-		struct sdma_txreq *txp
-	),
-	TP_ARGS(sde, hwhead, swhead, txp),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__field(u64, sn)
-		__field(u16, hwhead)
-		__field(u16, swhead)
-		__field(u16, txnext)
-		__field(u16, tx_tail)
-		__field(u16, tx_head)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__entry->hwhead = hwhead;
-		__entry->swhead = swhead;
-		__entry->tx_tail = sde->tx_tail;
-		__entry->tx_head = sde->tx_head;
-		__entry->txnext = txp ? txp->next_descq_idx : ~0;
-		__entry->idx = sde->this_idx;
-		__entry->sn = txp ? txp->sn : ~0;
-	),
-	TP_printk(
-		"[%s] SDE(%u) sn %llu hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
-		__get_str(dev),
-		__entry->idx,
-		__entry->sn,
-		__entry->hwhead,
-		__entry->swhead,
-		__entry->txnext,
-		__entry->tx_head,
-		__entry->tx_tail
-	)
+	    TP_PROTO(struct sdma_engine *sde,
+		     u16 hwhead,
+		     u16 swhead,
+		     struct sdma_txreq *txp
+		     ),
+	    TP_ARGS(sde, hwhead, swhead, txp),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+			     __field(u64, sn)
+			     __field(u16, hwhead)
+			     __field(u16, swhead)
+			     __field(u16, txnext)
+			     __field(u16, tx_tail)
+			     __field(u16, tx_head)
+			     __field(u8, idx)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+			   __entry->hwhead = hwhead;
+			   __entry->swhead = swhead;
+			   __entry->tx_tail = sde->tx_tail;
+			   __entry->tx_head = sde->tx_head;
+			   __entry->txnext = txp ? txp->next_descq_idx : ~0;
+			   __entry->idx = sde->this_idx;
+			   __entry->sn = txp ? txp->sn : ~0;
+			   ),
+	    TP_printk(
+		      "[%s] SDE(%u) sn %llu hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
+		      __get_str(dev),
+		      __entry->idx,
+		      __entry->sn,
+		      __entry->hwhead,
+		      __entry->swhead,
+		      __entry->txnext,
+		      __entry->tx_head,
+		      __entry->tx_tail
+		      )
 );
 #else
 TRACE_EVENT(hfi1_sdma_progress,
-	    TP_PROTO(
-		struct sdma_engine *sde,
-		u16 hwhead,
-		u16 swhead,
-		struct sdma_txreq *txp
+	    TP_PROTO(struct sdma_engine *sde,
+		     u16 hwhead, u16 swhead,
+		     struct sdma_txreq *txp
 	    ),
 	TP_ARGS(sde, hwhead, swhead, txp),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__field(u16, hwhead)
-		__field(u16, swhead)
-		__field(u16, txnext)
-		__field(u16, tx_tail)
-		__field(u16, tx_head)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__entry->hwhead = hwhead;
-		__entry->swhead = swhead;
-		__entry->tx_tail = sde->tx_tail;
-		__entry->tx_head = sde->tx_head;
-		__entry->txnext = txp ? txp->next_descq_idx : ~0;
-		__entry->idx = sde->this_idx;
-	),
+	TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+			 __field(u16, hwhead)
+			 __field(u16, swhead)
+			 __field(u16, txnext)
+			 __field(u16, tx_tail)
+			 __field(u16, tx_head)
+			 __field(u8, idx)
+			 ),
+	TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+		       __entry->hwhead = hwhead;
+		       __entry->swhead = swhead;
+		       __entry->tx_tail = sde->tx_tail;
+		       __entry->tx_head = sde->tx_head;
+		       __entry->txnext = txp ? txp->next_descq_idx : ~0;
+		       __entry->idx = sde->this_idx;
+		       ),
 	TP_printk(
-		"[%s] SDE(%u) hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
-		__get_str(dev),
-		__entry->idx,
-		__entry->hwhead,
-		__entry->swhead,
-		__entry->txnext,
-		__entry->tx_head,
-		__entry->tx_tail
-	)
+		  "[%s] SDE(%u) hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
+		  __get_str(dev),
+		  __entry->idx,
+		  __entry->hwhead,
+		  __entry->swhead,
+		  __entry->txnext,
+		  __entry->tx_head,
+		  __entry->tx_tail
+		  )
 );
 #endif
 
 DECLARE_EVENT_CLASS(hfi1_sdma_sn,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		u64 sn
-	),
-	TP_ARGS(sde, sn),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__field(u64, sn)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__entry->sn = sn;
-		__entry->idx = sde->this_idx;
-	),
-	TP_printk(
-		"[%s] SDE(%u) sn %llu",
-		__get_str(dev),
-		__entry->idx,
-		__entry->sn
-	)
+		    TP_PROTO(struct sdma_engine *sde, u64 sn),
+		    TP_ARGS(sde, sn),
+		    TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+				     __field(u64, sn)
+				     __field(u8, idx)
+				     ),
+		    TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+				   __entry->sn = sn;
+				   __entry->idx = sde->this_idx;
+				   ),
+		    TP_printk("[%s] SDE(%u) sn %llu",
+			      __get_str(dev),
+			      __entry->idx,
+			      __entry->sn
+			      )
 );
 
 DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_out_sn,
@@ -1023,10 +964,7 @@
 );
 
 DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_in_sn,
-	     TP_PROTO(
-		struct sdma_engine *sde,
-		u64 sn
-	     ),
+	     TP_PROTO(struct sdma_engine *sde, u64 sn),
 	     TP_ARGS(sde, sn)
 );
 
@@ -1227,88 +1165,85 @@
 	);
 
 TRACE_EVENT(hfi1_sdma_state,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		const char *cstate,
-		const char *nstate
-	),
-	TP_ARGS(sde, cstate, nstate),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__string(curstate, cstate)
-		__string(newstate, nstate)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__assign_str(curstate, cstate);
-		__assign_str(newstate, nstate);
-	),
+	    TP_PROTO(struct sdma_engine *sde,
+		     const char *cstate,
+		     const char *nstate
+		     ),
+	    TP_ARGS(sde, cstate, nstate),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+			     __string(curstate, cstate)
+			     __string(newstate, nstate)
+			     ),
+	TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+		       __assign_str(curstate, cstate);
+		       __assign_str(newstate, nstate);
+		       ),
 	TP_printk("[%s] current state %s new state %s",
-		__get_str(dev),
-		__get_str(curstate),
-		__get_str(newstate)
-	)
+		  __get_str(dev),
+		  __get_str(curstate),
+		  __get_str(newstate)
+		  )
 );
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM hfi1_rc
 
 DECLARE_EVENT_CLASS(hfi1_rc_template,
-	TP_PROTO(struct hfi1_qp *qp, u32 psn),
-	TP_ARGS(qp, psn),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
-		__field(u32, qpn)
-		__field(u32, s_flags)
-		__field(u32, psn)
-		__field(u32, s_psn)
-		__field(u32, s_next_psn)
-		__field(u32, s_sending_psn)
-		__field(u32, s_sending_hpsn)
-		__field(u32, r_psn)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
-		__entry->qpn = qp->ibqp.qp_num;
-		__entry->s_flags = qp->s_flags;
-		__entry->psn = psn;
-		__entry->s_psn = qp->s_psn;
-		__entry->s_next_psn = qp->s_next_psn;
-		__entry->s_sending_psn = qp->s_sending_psn;
-		__entry->s_sending_hpsn = qp->s_sending_hpsn;
-		__entry->r_psn = qp->r_psn;
-	),
-	TP_printk(
-		"[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x",
-		__get_str(dev),
-		__entry->qpn,
-		__entry->s_flags,
-		__entry->psn,
-		__entry->s_psn,
-		__entry->s_next_psn,
-		__entry->s_sending_psn,
-		__entry->s_sending_hpsn,
-		__entry->r_psn
-	)
+		    TP_PROTO(struct rvt_qp *qp, u32 psn),
+		    TP_ARGS(qp, psn),
+		    TP_STRUCT__entry(
+			DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+			__field(u32, qpn)
+			__field(u32, s_flags)
+			__field(u32, psn)
+			__field(u32, s_psn)
+			__field(u32, s_next_psn)
+			__field(u32, s_sending_psn)
+			__field(u32, s_sending_hpsn)
+			__field(u32, r_psn)
+			),
+		    TP_fast_assign(
+			DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+			__entry->qpn = qp->ibqp.qp_num;
+			__entry->s_flags = qp->s_flags;
+			__entry->psn = psn;
+			__entry->s_psn = qp->s_psn;
+			__entry->s_next_psn = qp->s_next_psn;
+			__entry->s_sending_psn = qp->s_sending_psn;
+			__entry->s_sending_hpsn = qp->s_sending_hpsn;
+			__entry->r_psn = qp->r_psn;
+			),
+		    TP_printk(
+			"[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x",
+			__get_str(dev),
+			__entry->qpn,
+			__entry->s_flags,
+			__entry->psn,
+			__entry->s_psn,
+			__entry->s_next_psn,
+			__entry->s_sending_psn,
+			__entry->s_sending_hpsn,
+			__entry->r_psn
+			)
 );
 
 DEFINE_EVENT(hfi1_rc_template, hfi1_rc_sendcomplete,
-	     TP_PROTO(struct hfi1_qp *qp, u32 psn),
+	     TP_PROTO(struct rvt_qp *qp, u32 psn),
 	     TP_ARGS(qp, psn)
 );
 
 DEFINE_EVENT(hfi1_rc_template, hfi1_rc_ack,
-	     TP_PROTO(struct hfi1_qp *qp, u32 psn),
+	     TP_PROTO(struct rvt_qp *qp, u32 psn),
 	     TP_ARGS(qp, psn)
 );
 
 DEFINE_EVENT(hfi1_rc_template, hfi1_rc_timeout,
-	     TP_PROTO(struct hfi1_qp *qp, u32 psn),
+	     TP_PROTO(struct rvt_qp *qp, u32 psn),
 	     TP_ARGS(qp, psn)
 );
 
 DEFINE_EVENT(hfi1_rc_template, hfi1_rc_rcv_error,
-	     TP_PROTO(struct hfi1_qp *qp, u32 psn),
+	     TP_PROTO(struct rvt_qp *qp, u32 psn),
 	     TP_ARGS(qp, psn)
 );
 
@@ -1316,21 +1251,20 @@
 #define TRACE_SYSTEM hfi1_misc
 
 TRACE_EVENT(hfi1_interrupt,
-	TP_PROTO(struct hfi1_devdata *dd, const struct is_table *is_entry,
-		 int src),
-	TP_ARGS(dd, is_entry, src),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd)
-		__array(char, buf, 64)
-		__field(int, src)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd)
-		is_entry->is_name(__entry->buf, 64, src - is_entry->start);
-		__entry->src = src;
-	),
-	TP_printk("[%s] source: %s [%d]", __get_str(dev), __entry->buf,
-		  __entry->src)
+	    TP_PROTO(struct hfi1_devdata *dd, const struct is_table *is_entry,
+		     int src),
+	    TP_ARGS(dd, is_entry, src),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+			     __array(char, buf, 64)
+			     __field(int, src)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(dd)
+			   is_entry->is_name(__entry->buf, 64,
+					     src - is_entry->start);
+			   __entry->src = src;
+			   ),
+	    TP_printk("[%s] source: %s [%d]", __get_str(dev), __entry->buf,
+		      __entry->src)
 );
 
 /*
@@ -1345,21 +1279,21 @@
 #define MAX_MSG_LEN 512
 
 DECLARE_EVENT_CLASS(hfi1_trace_template,
-	TP_PROTO(const char *function, struct va_format *vaf),
-	TP_ARGS(function, vaf),
-	TP_STRUCT__entry(
-		__string(function, function)
-		__dynamic_array(char, msg, MAX_MSG_LEN)
-	),
-	TP_fast_assign(
-		__assign_str(function, function);
-		WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
-		     MAX_MSG_LEN, vaf->fmt,
-		     *vaf->va) >= MAX_MSG_LEN);
-	),
-	TP_printk("(%s) %s",
-		  __get_str(function),
-		  __get_str(msg))
+		    TP_PROTO(const char *function, struct va_format *vaf),
+		    TP_ARGS(function, vaf),
+		    TP_STRUCT__entry(__string(function, function)
+				     __dynamic_array(char, msg, MAX_MSG_LEN)
+				     ),
+		    TP_fast_assign(__assign_str(function, function);
+				   WARN_ON_ONCE(vsnprintf
+						(__get_dynamic_array(msg),
+						 MAX_MSG_LEN, vaf->fmt,
+						 *vaf->va) >=
+						MAX_MSG_LEN);
+				   ),
+		    TP_printk("(%s) %s",
+			      __get_str(function),
+			      __get_str(msg))
 );
 
 /*
@@ -1406,6 +1340,7 @@
 __hfi1_trace_def(FIRMWARE);
 __hfi1_trace_def(RCVCTRL);
 __hfi1_trace_def(TID);
+__hfi1_trace_def(MMU);
 
 #define hfi1_cdbg(which, fmt, ...) \
 	__hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__)

diff --git a/drivers/staging/rdma/hfi1/twsi.c b/drivers/staging/rdma/hfi1/twsi.c
index ea54fd2..e82e52a 100644
--- a/drivers/staging/rdma/hfi1/twsi.c
+++ b/drivers/staging/rdma/hfi1/twsi.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -119,9 +116,9 @@
 	 * Allow for slow slaves by simple
 	 * delay for falling edge, sampling on rise.
 	 */
-	if (!bit)
+	if (!bit) {
 		udelay(2);
-	else {
+	} else {
 		int rise_usec;
 
 		for (rise_usec = SCL_WAIT_USEC; rise_usec > 0; rise_usec -= 2) {
@@ -131,11 +128,24 @@
 		}
 		if (rise_usec <= 0)
 			dd_dev_err(dd, "SCL interface stuck low > %d uSec\n",
-				    SCL_WAIT_USEC);
+				   SCL_WAIT_USEC);
 	}
 	i2c_wait_for_writes(dd, target);
 }
 
+static u8 scl_in(struct hfi1_devdata *dd, u32 target, int wait)
+{
+	u32 read_val, mask;
+
+	mask = QSFP_HFI0_I2CCLK;
+	/* SCL is meant to be bare-drain, so never set "OUT", just DIR */
+	hfi1_gpio_mod(dd, target, 0, 0, mask);
+	read_val = hfi1_gpio_mod(dd, target, 0, 0, 0);
+	if (wait)
+		i2c_wait_for_writes(dd, target);
+	return (read_val & mask) >> GPIO_SCL_NUM;
+}
+
 static void sda_out(struct hfi1_devdata *dd, u32 target, u8 bit)
 {
 	u32 mask;
@@ -274,13 +284,12 @@
 /**
  * hfi1_twsi_reset - reset I2C communication
  * @dd: the hfi1_ib device
+ * returns 0 if ok, -EIO on error
  */
-
 int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target)
 {
 	int clock_cycles_left = 9;
-	int was_high = 0;
-	u32 pins, mask;
+	u32 mask;
 
 	/* Both SCL and SDA should be high. If not, there
 	 * is something wrong.
@@ -294,43 +303,23 @@
 	 */
 	hfi1_gpio_mod(dd, target, 0, 0, mask);
 
-	/*
-	 * Clock nine times to get all listeners into a sane state.
-	 * If SDA does not go high at any point, we are wedged.
-	 * One vendor recommends then issuing START followed by STOP.
-	 * we cannot use our "normal" functions to do that, because
-	 * if SCL drops between them, another vendor's part will
-	 * wedge, dropping SDA and keeping it low forever, at the end of
-	 * the next transaction (even if it was not the device addressed).
-	 * So our START and STOP take place with SCL held high.
+	/* Check if SCL is low, if it is low then we have a slave device
+	 * misbehaving and there is not much we can do.
+	 */
+	if (!scl_in(dd, target, 0))
+		return -EIO;
+
+	/* Check if SDA is low, if it is low then we have to clock SDA
+	 * up to 9 times for the device to release the bus
 	 */
 	while (clock_cycles_left--) {
+		if (sda_in(dd, target, 0))
+			return 0;
 		scl_out(dd, target, 0);
 		scl_out(dd, target, 1);
-		/* Note if SDA is high, but keep clocking to sync slave */
-		was_high |= sda_in(dd, target, 0);
 	}
 
-	if (was_high) {
-		/*
-		 * We saw a high, which we hope means the slave is sync'd.
-		 * Issue START, STOP, pause for T_BUF.
-		 */
-
-		pins = hfi1_gpio_mod(dd, target, 0, 0, 0);
-		if ((pins & mask) != mask)
-			dd_dev_err(dd, "GPIO pins not at rest: %d\n",
-				    pins & mask);
-		/* Drop SDA to issue START */
-		udelay(1); /* Guarantee .6 uSec setup */
-		sda_out(dd, target, 0);
-		udelay(1); /* Guarantee .6 uSec hold */
-		/* At this point, SCL is high, SDA low. Raise SDA for STOP */
-		sda_out(dd, target, 1);
-		udelay(TWSI_BUF_WAIT_USEC);
-	}
-
-	return !was_high;
+	return -EIO;
 }
 
 #define HFI1_TWSI_START 0x100
@@ -365,17 +354,25 @@
  * HFI1_TWSI_NO_DEV and does the correct operation for the legacy part,
  * which responded to all TWSI device codes, interpreting them as
  * address within device. On all other devices found on board handled by
- * this driver, the device is followed by a one-byte "address" which selects
+ * this driver, the device is followed by a N-byte "address" which selects
  * the "register" or "offset" within the device from which data should
  * be read.
  */
 int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr,
 		     void *buffer, int len)
 {
-	int ret;
 	u8 *bp = buffer;
+	int ret = 1;
+	int i;
+	int offset_size;
 
-	ret = 1;
+	/* obtain the offset size, strip it from the device address */
+	offset_size = (dev >> 8) & 0xff;
+	dev &= 0xff;
+
+	/* allow at most a 2 byte offset */
+	if (offset_size > 2)
+		goto bail;
 
 	if (dev == HFI1_TWSI_NO_DEV) {
 		/* legacy not-really-I2C */
@@ -383,34 +380,29 @@
 		ret = twsi_wr(dd, target, addr, HFI1_TWSI_START);
 	} else {
 		/* Actual I2C */
-		ret = twsi_wr(dd, target, dev | WRITE_CMD, HFI1_TWSI_START);
-		if (ret) {
-			stop_cmd(dd, target);
-			ret = 1;
-			goto bail;
-		}
-		/*
-		 * SFF spec claims we do _not_ stop after the addr
-		 * but simply issue a start with the "read" dev-addr.
-		 * Since we are implicitly waiting for ACK here,
-		 * we need t_buf (nominally 20uSec) before that start,
-		 * and cannot rely on the delay built in to the STOP
-		 */
-		ret = twsi_wr(dd, target, addr, 0);
-		udelay(TWSI_BUF_WAIT_USEC);
+		if (offset_size) {
+			ret = twsi_wr(dd, target,
+				      dev | WRITE_CMD, HFI1_TWSI_START);
+			if (ret) {
+				stop_cmd(dd, target);
+				goto bail;
+			}
 
-		if (ret) {
-			dd_dev_err(dd,
-				"Failed to write interface read addr %02X\n",
-				addr);
-			ret = 1;
-			goto bail;
+			for (i = 0; i < offset_size; i++) {
+				ret = twsi_wr(dd, target,
+					      (addr >> (i * 8)) & 0xff, 0);
+				udelay(TWSI_BUF_WAIT_USEC);
+				if (ret) {
+					dd_dev_err(dd, "Failed to write byte %d of offset 0x%04X\n",
+						   i, addr);
+					goto bail;
+				}
+			}
 		}
 		ret = twsi_wr(dd, target, dev | READ_CMD, HFI1_TWSI_START);
 	}
 	if (ret) {
 		stop_cmd(dd, target);
-		ret = 1;
 		goto bail;
 	}
 
@@ -442,76 +434,55 @@
  * HFI1_TWSI_NO_DEV and does the correct operation for the legacy part,
  * which responded to all TWSI device codes, interpreting them as
  * address within device. On all other devices found on board handled by
- * this driver, the device is followed by a one-byte "address" which selects
+ * this driver, the device is followed by a N-byte "address" which selects
  * the "register" or "offset" within the device to which data should
  * be written.
  */
 int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr,
 		     const void *buffer, int len)
 {
-	int sub_len;
 	const u8 *bp = buffer;
-	int max_wait_time, i;
 	int ret = 1;
+	int i;
+	int offset_size;
 
-	while (len > 0) {
-		if (dev == HFI1_TWSI_NO_DEV) {
-			if (twsi_wr(dd, target, (addr << 1) | WRITE_CMD,
-				    HFI1_TWSI_START)) {
-				goto failed_write;
-			}
-		} else {
-			/* Real I2C */
-			if (twsi_wr(dd, target,
-				    dev | WRITE_CMD, HFI1_TWSI_START))
-				goto failed_write;
-			ret = twsi_wr(dd, target, addr, 0);
-			if (ret) {
-				dd_dev_err(dd,
-					"Failed to write interface write addr %02X\n",
-					addr);
-				goto failed_write;
-			}
+	/* obtain the offset size, strip it from the device address */
+	offset_size = (dev >> 8) & 0xff;
+	dev &= 0xff;
+
+	/* allow at most a 2 byte offset */
+	if (offset_size > 2)
+		goto bail;
+
+	if (dev == HFI1_TWSI_NO_DEV) {
+		if (twsi_wr(dd, target, (addr << 1) | WRITE_CMD,
+			    HFI1_TWSI_START)) {
+			goto failed_write;
 		}
-
-		sub_len = min(len, 4);
-		addr += sub_len;
-		len -= sub_len;
-
-		for (i = 0; i < sub_len; i++)
-			if (twsi_wr(dd, target, *bp++, 0))
-				goto failed_write;
-
-		stop_cmd(dd, target);
-
-		/*
-		 * Wait for write complete by waiting for a successful
-		 * read (the chip replies with a zero after the write
-		 * cmd completes, and before it writes to the eeprom.
-		 * The startcmd for the read will fail the ack until
-		 * the writes have completed.   We do this inline to avoid
-		 * the debug prints that are in the real read routine
-		 * if the startcmd fails.
-		 * We also use the proper device address, so it doesn't matter
-		 * whether we have real eeprom_dev. Legacy likes any address.
-		 */
-		max_wait_time = 100;
-		while (twsi_wr(dd, target,
-			       dev | READ_CMD, HFI1_TWSI_START)) {
-			stop_cmd(dd, target);
-			if (!--max_wait_time)
-				goto failed_write;
-		}
-		/* now read (and ignore) the resulting byte */
-		rd_byte(dd, target, 1);
+	} else {
+		/* Real I2C */
+		if (twsi_wr(dd, target, dev | WRITE_CMD, HFI1_TWSI_START))
+			goto failed_write;
 	}
 
+	for (i = 0; i < offset_size; i++) {
+		ret = twsi_wr(dd, target, (addr >> (i * 8)) & 0xff, 0);
+		udelay(TWSI_BUF_WAIT_USEC);
+		if (ret) {
+			dd_dev_err(dd, "Failed to write byte %d of offset 0x%04X\n",
+				   i, addr);
+			goto bail;
+		}
+	}
+
+	for (i = 0; i < len; i++)
+		if (twsi_wr(dd, target, *bp++, 0))
+			goto failed_write;
+
 	ret = 0;
-	goto bail;
 
 failed_write:
 	stop_cmd(dd, target);
-	ret = 1;
 
 bail:
 	return ret;

diff --git a/drivers/staging/rdma/hfi1/twsi.h b/drivers/staging/rdma/hfi1/twsi.h
index 5907e02..5b8a5b5 100644
--- a/drivers/staging/rdma/hfi1/twsi.h
+++ b/drivers/staging/rdma/hfi1/twsi.h

@@ -1,14 +1,13 @@
 #ifndef _TWSI_H
 #define _TWSI_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -54,8 +51,9 @@
 
 struct hfi1_devdata;
 
-/* Bit position of SDA pin in ASIC_QSFP* registers  */
+/* Bit position of SDA/SCL pins in ASIC_QSFP* registers  */
 #define  GPIO_SDA_NUM 1
+#define  GPIO_SCL_NUM 0
 
 /* these functions must be called with qsfp_lock held */
 int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target);
@@ -64,5 +62,4 @@
 int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr,
 		     const void *buffer, int len);
 
-
 #endif /* _TWSI_H */

diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c
index 4f2a788..df773d4 100644
--- a/drivers/staging/rdma/hfi1/uc.c
+++ b/drivers/staging/rdma/hfi1/uc.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -49,71 +46,82 @@
  */
 
 #include "hfi.h"
-#include "sdma.h"
+#include "verbs_txreq.h"
 #include "qp.h"
 
 /* cut down ridiculously long IB macro names */
 #define OP(x) IB_OPCODE_UC_##x
 
+/* only opcode mask for adaptive pio */
+const u32 uc_only_opcode =
+	BIT(OP(SEND_ONLY) & 0x1f) |
+	BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) |
+	BIT(OP(RDMA_WRITE_ONLY & 0x1f)) |
+	BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f));
+
 /**
  * hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
  * @qp: a pointer to the QP
  *
+ * Assume s_lock is held.
+ *
  * Return 1 if constructed; otherwise, return 0.
  */
-int hfi1_make_uc_req(struct hfi1_qp *qp)
+int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_other_headers *ohdr;
-	struct hfi1_swqe *wqe;
-	unsigned long flags;
+	struct rvt_swqe *wqe;
 	u32 hwords = 5;
 	u32 bth0 = 0;
 	u32 len;
 	u32 pmtu = qp->pmtu;
-	int ret = 0;
 	int middle = 0;
 
-	spin_lock_irqsave(&qp->s_lock, flags);
+	ps->s_txreq = get_txreq(ps->dev, qp);
+	if (IS_ERR(ps->s_txreq))
+		goto bail_no_tx;
 
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_SEND_OK)) {
-		if (!(ib_hfi1_state_ops[qp->state] & HFI1_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
+		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 			goto bail;
 		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send() */
+		if (qp->s_last == ACCESS_ONCE(qp->s_head))
 			goto bail;
 		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_iowait.sdma_busy)) {
-			qp->s_flags |= HFI1_S_WAIT_DMA;
+		if (iowait_sdma_pending(&priv->s_iowait)) {
+			qp->s_flags |= RVT_S_WAIT_DMA;
 			goto bail;
 		}
 		clear_ahg(qp);
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-		goto done;
+		goto done_free_tx;
 	}
 
-	ohdr = &qp->s_hdr->ibh.u.oth;
+	ohdr = &ps->s_txreq->phdr.hdr.u.oth;
 	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-		ohdr = &qp->s_hdr->ibh.u.l.oth;
+		ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
 
 	/* Get the next send request. */
-	wqe = get_swqe_ptr(qp, qp->s_cur);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 	qp->s_wqe = NULL;
 	switch (qp->s_state) {
 	default:
-		if (!(ib_hfi1_state_ops[qp->state] &
-		    HFI1_PROCESS_NEXT_SEND_OK))
+		if (!(ib_rvt_state_ops[qp->state] &
+		    RVT_PROCESS_NEXT_SEND_OK))
 			goto bail;
 		/* Check if send work queue is empty. */
-		if (qp->s_cur == qp->s_head) {
+		smp_read_barrier_depends(); /* see post_one_send() */
+		if (qp->s_cur == ACCESS_ONCE(qp->s_head)) {
 			clear_ahg(qp);
 			goto bail;
 		}
 		/*
 		 * Start a new request.
 		 */
-		wqe->psn = qp->s_next_psn;
-		qp->s_psn = qp->s_next_psn;
+		qp->s_psn = wqe->psn;
 		qp->s_sge.sge = wqe->sg_list[0];
 		qp->s_sge.sg_list = wqe->sg_list + 1;
 		qp->s_sge.num_sge = wqe->wr.num_sge;
@@ -128,9 +136,9 @@
 				len = pmtu;
 				break;
 			}
-			if (wqe->wr.opcode == IB_WR_SEND)
+			if (wqe->wr.opcode == IB_WR_SEND) {
 				qp->s_state = OP(SEND_ONLY);
-			else {
+			} else {
 				qp->s_state =
 					OP(SEND_ONLY_WITH_IMMEDIATE);
 				/* Immediate data comes after the BTH */
@@ -157,9 +165,9 @@
 				len = pmtu;
 				break;
 			}
-			if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+			if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
 				qp->s_state = OP(RDMA_WRITE_ONLY);
-			else {
+			} else {
 				qp->s_state =
 					OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
 				/* Immediate data comes after the RETH */
@@ -188,9 +196,9 @@
 			middle = HFI1_CAP_IS_KSET(SDMA_AHG);
 			break;
 		}
-		if (wqe->wr.opcode == IB_WR_SEND)
+		if (wqe->wr.opcode == IB_WR_SEND) {
 			qp->s_state = OP(SEND_LAST);
-		else {
+		} else {
 			qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
 			/* Immediate data comes after the BTH */
 			ohdr->u.imm_data = wqe->wr.ex.imm_data;
@@ -213,9 +221,9 @@
 			middle = HFI1_CAP_IS_KSET(SDMA_AHG);
 			break;
 		}
-		if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+		if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
 			qp->s_state = OP(RDMA_WRITE_LAST);
-		else {
+		} else {
 			qp->s_state =
 				OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
 			/* Immediate data comes after the BTH */
@@ -231,19 +239,28 @@
 	}
 	qp->s_len -= len;
 	qp->s_hdrwords = hwords;
+	ps->s_txreq->sde = priv->s_sde;
 	qp->s_cur_sge = &qp->s_sge;
 	qp->s_cur_size = len;
 	hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
-			     mask_psn(qp->s_next_psn++), middle);
-done:
-	ret = 1;
-	goto unlock;
+			     mask_psn(qp->s_psn++), middle, ps);
+	/* pbc */
+	ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
+	return 1;
+
+done_free_tx:
+	hfi1_put_txreq(ps->s_txreq);
+	ps->s_txreq = NULL;
+	return 1;
 
 bail:
-	qp->s_flags &= ~HFI1_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	return ret;
+	hfi1_put_txreq(ps->s_txreq);
+
+bail_no_tx:
+	ps->s_txreq = NULL;
+	qp->s_flags &= ~RVT_S_BUSY;
+	qp->s_hdrwords = 0;
+	return 0;
 }
 
 /**
@@ -266,7 +283,7 @@
 	u32 rcv_flags = packet->rcv_flags;
 	void *data = packet->ebuf;
 	u32 tlen = packet->tlen;
-	struct hfi1_qp *qp = packet->qp;
+	struct rvt_qp *qp = packet->qp;
 	struct hfi1_other_headers *ohdr = packet->ohdr;
 	u32 bth0, opcode;
 	u32 hdrsize = packet->hlen;
@@ -291,14 +308,14 @@
 			u16 rlid = be16_to_cpu(hdr->lrh[3]);
 			u8 sl, sc5;
 
-			lqpn = bth1 & HFI1_QPN_MASK;
+			lqpn = bth1 & RVT_QPN_MASK;
 			rqpn = qp->remote_qpn;
 
 			sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
 			sl = ibp->sc_to_sl[sc5];
 
 			process_becn(ppd, sl, rlid, lqpn, rqpn,
-					IB_CC_SVCTYPE_UC);
+				     IB_CC_SVCTYPE_UC);
 		}
 
 		if (bth1 & HFI1_FECN_SMASK) {
@@ -331,10 +348,11 @@
 inv:
 		if (qp->r_state == OP(SEND_FIRST) ||
 		    qp->r_state == OP(SEND_MIDDLE)) {
-			set_bit(HFI1_R_REWIND_SGE, &qp->r_aflags);
+			set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
 			qp->r_sge.num_sge = 0;
-		} else
-			hfi1_put_ss(&qp->r_sge);
+		} else {
+			rvt_put_ss(&qp->r_sge);
+		}
 		qp->r_state = OP(SEND_LAST);
 		switch (opcode) {
 		case OP(SEND_FIRST):
@@ -381,7 +399,7 @@
 		goto inv;
 	}
 
-	if (qp->state == IB_QPS_RTR && !(qp->r_flags & HFI1_R_COMM_EST))
+	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
 		qp_comm_est(qp);
 
 	/* OK, process the packet. */
@@ -390,10 +408,10 @@
 	case OP(SEND_ONLY):
 	case OP(SEND_ONLY_WITH_IMMEDIATE):
 send_first:
-		if (test_and_clear_bit(HFI1_R_REWIND_SGE, &qp->r_aflags))
+		if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
 			qp->r_sge = qp->s_rdma_read_sge;
-		else {
-			ret = hfi1_get_rwqe(qp, 0);
+		} else {
+			ret = hfi1_rvt_get_rwqe(qp, 0);
 			if (ret < 0)
 				goto op_err;
 			if (!ret)
@@ -417,7 +435,7 @@
 		qp->r_rcv_len += pmtu;
 		if (unlikely(qp->r_rcv_len > qp->r_len))
 			goto rewind;
-		hfi1_copy_sge(&qp->r_sge, data, pmtu, 0);
+		hfi1_copy_sge(&qp->r_sge, data, pmtu, 0, 0);
 		break;
 
 	case OP(SEND_LAST_WITH_IMMEDIATE):
@@ -442,8 +460,8 @@
 		if (unlikely(wc.byte_len > qp->r_len))
 			goto rewind;
 		wc.opcode = IB_WC_RECV;
-		hfi1_copy_sge(&qp->r_sge, data, tlen, 0);
-		hfi1_put_ss(&qp->s_rdma_read_sge);
+		hfi1_copy_sge(&qp->r_sge, data, tlen, 0, 0);
+		rvt_put_ss(&qp->s_rdma_read_sge);
 last_imm:
 		wc.wr_id = qp->r_wr_id;
 		wc.status = IB_WC_SUCCESS;
@@ -468,9 +486,9 @@
 		wc.dlid_path_bits = 0;
 		wc.port_num = 0;
 		/* Signal completion event if the solicited bit is set. */
-		hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-			      (ohdr->bth[0] &
-				cpu_to_be32(IB_BTH_SOLICITED)) != 0);
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+			     (ohdr->bth[0] &
+			      cpu_to_be32(IB_BTH_SOLICITED)) != 0);
 		break;
 
 	case OP(RDMA_WRITE_FIRST):
@@ -491,8 +509,8 @@
 			int ok;
 
 			/* Check rkey */
-			ok = hfi1_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
-					  vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
+			ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
+					 vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
 			if (unlikely(!ok))
 				goto drop;
 			qp->r_sge.num_sge = 1;
@@ -503,9 +521,9 @@
 			qp->r_sge.sge.length = 0;
 			qp->r_sge.sge.sge_length = 0;
 		}
-		if (opcode == OP(RDMA_WRITE_ONLY))
+		if (opcode == OP(RDMA_WRITE_ONLY)) {
 			goto rdma_last;
-		else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) {
+		} else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) {
 			wc.ex.imm_data = ohdr->u.rc.imm_data;
 			goto rdma_last_imm;
 		}
@@ -517,7 +535,7 @@
 		qp->r_rcv_len += pmtu;
 		if (unlikely(qp->r_rcv_len > qp->r_len))
 			goto drop;
-		hfi1_copy_sge(&qp->r_sge, data, pmtu, 1);
+		hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0);
 		break;
 
 	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -535,10 +553,10 @@
 		tlen -= (hdrsize + pad + 4);
 		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
 			goto drop;
-		if (test_and_clear_bit(HFI1_R_REWIND_SGE, &qp->r_aflags))
-			hfi1_put_ss(&qp->s_rdma_read_sge);
-		else {
-			ret = hfi1_get_rwqe(qp, 1);
+		if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
+			rvt_put_ss(&qp->s_rdma_read_sge);
+		} else {
+			ret = hfi1_rvt_get_rwqe(qp, 1);
 			if (ret < 0)
 				goto op_err;
 			if (!ret)
@@ -546,8 +564,8 @@
 		}
 		wc.byte_len = qp->r_len;
 		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-		hfi1_copy_sge(&qp->r_sge, data, tlen, 1);
-		hfi1_put_ss(&qp->r_sge);
+		hfi1_copy_sge(&qp->r_sge, data, tlen, 1, 0);
+		rvt_put_ss(&qp->r_sge);
 		goto last_imm;
 
 	case OP(RDMA_WRITE_LAST):
@@ -562,8 +580,8 @@
 		tlen -= (hdrsize + pad + 4);
 		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
 			goto drop;
-		hfi1_copy_sge(&qp->r_sge, data, tlen, 1);
-		hfi1_put_ss(&qp->r_sge);
+		hfi1_copy_sge(&qp->r_sge, data, tlen, 1, 0);
+		rvt_put_ss(&qp->r_sge);
 		break;
 
 	default:
@@ -575,14 +593,12 @@
 	return;
 
 rewind:
-	set_bit(HFI1_R_REWIND_SGE, &qp->r_aflags);
+	set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
 	qp->r_sge.num_sge = 0;
 drop:
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 	return;
 
 op_err:
 	hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
-	return;
-
 }

diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c
index 25e6053..ae8a70f 100644
--- a/drivers/staging/rdma/hfi1/ud.c
+++ b/drivers/staging/rdma/hfi1/ud.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -53,6 +50,7 @@
 
 #include "hfi.h"
 #include "mad.h"
+#include "verbs_txreq.h"
 #include "qp.h"
 
 /**
@@ -65,24 +63,25 @@
  * Note that the receive interrupt handler may be calling hfi1_ud_rcv()
  * while this is being called.
  */
-static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
+static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
 {
 	struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
 	struct hfi1_pportdata *ppd;
-	struct hfi1_qp *qp;
+	struct rvt_qp *qp;
 	struct ib_ah_attr *ah_attr;
 	unsigned long flags;
-	struct hfi1_sge_state ssge;
-	struct hfi1_sge *sge;
+	struct rvt_sge_state ssge;
+	struct rvt_sge *sge;
 	struct ib_wc wc;
 	u32 length;
 	enum ib_qp_type sqptype, dqptype;
 
 	rcu_read_lock();
 
-	qp = hfi1_lookup_qpn(ibp, swqe->ud_wr.remote_qpn);
+	qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
+			    swqe->ud_wr.remote_qpn);
 	if (!qp) {
-		ibp->n_pkt_drops++;
+		ibp->rvp.n_pkt_drops++;
 		rcu_read_unlock();
 		return;
 	}
@@ -93,12 +92,12 @@
 			IB_QPT_UD : qp->ibqp.qp_type;
 
 	if (dqptype != sqptype ||
-	    !(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK)) {
-		ibp->n_pkt_drops++;
+	    !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
+		ibp->rvp.n_pkt_drops++;
 		goto drop;
 	}
 
-	ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
+	ah_attr = &ibah_to_rvtah(swqe->ud_wr.ah)->attr;
 	ppd = ppd_from_ibp(ibp);
 
 	if (qp->ibqp.qp_num > 1) {
@@ -161,35 +160,36 @@
 	/*
 	 * Get the next work request entry to find where to put the data.
 	 */
-	if (qp->r_flags & HFI1_R_REUSE_SGE)
-		qp->r_flags &= ~HFI1_R_REUSE_SGE;
-	else {
+	if (qp->r_flags & RVT_R_REUSE_SGE) {
+		qp->r_flags &= ~RVT_R_REUSE_SGE;
+	} else {
 		int ret;
 
-		ret = hfi1_get_rwqe(qp, 0);
+		ret = hfi1_rvt_get_rwqe(qp, 0);
 		if (ret < 0) {
 			hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
 			goto bail_unlock;
 		}
 		if (!ret) {
 			if (qp->ibqp.qp_num == 0)
-				ibp->n_vl15_dropped++;
+				ibp->rvp.n_vl15_dropped++;
 			goto bail_unlock;
 		}
 	}
 	/* Silently drop packets which are too big. */
 	if (unlikely(wc.byte_len > qp->r_len)) {
-		qp->r_flags |= HFI1_R_REUSE_SGE;
-		ibp->n_pkt_drops++;
+		qp->r_flags |= RVT_R_REUSE_SGE;
+		ibp->rvp.n_pkt_drops++;
 		goto bail_unlock;
 	}
 
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		hfi1_copy_sge(&qp->r_sge, &ah_attr->grh,
-			      sizeof(struct ib_grh), 1);
+			      sizeof(struct ib_grh), 1, 0);
 		wc.wc_flags |= IB_WC_GRH;
-	} else
+	} else {
 		hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+	}
 	ssge.sg_list = swqe->sg_list + 1;
 	ssge.sge = *swqe->sg_list;
 	ssge.num_sge = swqe->wr.num_sge;
@@ -202,7 +202,7 @@
 		if (len > sge->sge_length)
 			len = sge->sge_length;
 		WARN_ON_ONCE(len == 0);
-		hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, 1);
+		hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, 1, 0);
 		sge->vaddr += len;
 		sge->length -= len;
 		sge->sge_length -= len;
@@ -210,7 +210,7 @@
 			if (--ssge.num_sge)
 				*sge = *ssge.sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= HFI1_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -222,8 +222,8 @@
 		}
 		length -= len;
 	}
-	hfi1_put_ss(&qp->r_sge);
-	if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags))
+	rvt_put_ss(&qp->r_sge);
+	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 		goto bail_unlock;
 	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
@@ -242,14 +242,14 @@
 	wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
 	/* Check for loopback when the port lid is not set */
 	if (wc.slid == 0 && sqp->ibqp.qp_type == IB_QPT_GSI)
-		wc.slid = HFI1_PERMISSIVE_LID;
+		wc.slid = be16_to_cpu(IB_LID_PERMISSIVE);
 	wc.sl = ah_attr->sl;
 	wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
 	wc.port_num = qp->port_num;
 	/* Signal completion event if the solicited bit is set. */
-	hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-		      swqe->wr.send_flags & IB_SEND_SOLICITED);
-	ibp->n_loop_pkts++;
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+		     swqe->wr.send_flags & IB_SEND_SOLICITED);
+	ibp->rvp.n_loop_pkts++;
 bail_unlock:
 	spin_unlock_irqrestore(&qp->r_lock, flags);
 drop:
@@ -260,47 +260,53 @@
  * hfi1_make_ud_req - construct a UD request packet
  * @qp: the QP
  *
+ * Assume s_lock is held.
+ *
  * Return 1 if constructed; otherwise, return 0.
  */
-int hfi1_make_ud_req(struct hfi1_qp *qp)
+int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_other_headers *ohdr;
 	struct ib_ah_attr *ah_attr;
 	struct hfi1_pportdata *ppd;
 	struct hfi1_ibport *ibp;
-	struct hfi1_swqe *wqe;
-	unsigned long flags;
+	struct rvt_swqe *wqe;
 	u32 nwords;
 	u32 extra_bytes;
 	u32 bth0;
 	u16 lrh0;
 	u16 lid;
-	int ret = 0;
 	int next_cur;
 	u8 sc5;
 
-	spin_lock_irqsave(&qp->s_lock, flags);
+	ps->s_txreq = get_txreq(ps->dev, qp);
+	if (IS_ERR(ps->s_txreq))
+		goto bail_no_tx;
 
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_NEXT_SEND_OK)) {
-		if (!(ib_hfi1_state_ops[qp->state] & HFI1_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
+		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 			goto bail;
 		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send */
+		if (qp->s_last == ACCESS_ONCE(qp->s_head))
 			goto bail;
 		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_iowait.sdma_busy)) {
-			qp->s_flags |= HFI1_S_WAIT_DMA;
+		if (iowait_sdma_pending(&priv->s_iowait)) {
+			qp->s_flags |= RVT_S_WAIT_DMA;
 			goto bail;
 		}
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-		goto done;
+		goto done_free_tx;
 	}
 
-	if (qp->s_cur == qp->s_head)
+	/* see post_one_send() */
+	smp_read_barrier_depends();
+	if (qp->s_cur == ACCESS_ONCE(qp->s_head))
 		goto bail;
 
-	wqe = get_swqe_ptr(qp, qp->s_cur);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 	next_cur = qp->s_cur + 1;
 	if (next_cur >= qp->s_size)
 		next_cur = 0;
@@ -308,13 +314,15 @@
 	/* Construct the header. */
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 	ppd = ppd_from_ibp(ibp);
-	ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
-	if (ah_attr->dlid < HFI1_MULTICAST_LID_BASE ||
-	    ah_attr->dlid == HFI1_PERMISSIVE_LID) {
+	ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
+	if (ah_attr->dlid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+	    ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
 		lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
-		if (unlikely(!loopback && (lid == ppd->lid ||
-		    (lid == HFI1_PERMISSIVE_LID &&
-		     qp->ibqp.qp_type == IB_QPT_GSI)))) {
+		if (unlikely(!loopback &&
+			     (lid == ppd->lid ||
+			      (lid == be16_to_cpu(IB_LID_PERMISSIVE) &&
+			      qp->ibqp.qp_type == IB_QPT_GSI)))) {
+			unsigned long flags;
 			/*
 			 * If DMAs are in progress, we can't generate
 			 * a completion for the loopback packet since
@@ -322,16 +330,17 @@
 			 * Instead of waiting, we could queue a
 			 * zero length descriptor so we get a callback.
 			 */
-			if (atomic_read(&qp->s_iowait.sdma_busy)) {
-				qp->s_flags |= HFI1_S_WAIT_DMA;
+			if (iowait_sdma_pending(&priv->s_iowait)) {
+				qp->s_flags |= RVT_S_WAIT_DMA;
 				goto bail;
 			}
 			qp->s_cur = next_cur;
+			local_irq_save(flags);
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 			ud_loopback(qp, wqe);
 			spin_lock_irqsave(&qp->s_lock, flags);
 			hfi1_send_complete(qp, wqe, IB_WC_SUCCESS);
-			goto done;
+			goto done_free_tx;
 		}
 	}
 
@@ -353,11 +362,12 @@
 
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		/* Header size in 32-bit words. */
-		qp->s_hdrwords += hfi1_make_grh(ibp, &qp->s_hdr->ibh.u.l.grh,
-					       &ah_attr->grh,
-					       qp->s_hdrwords, nwords);
+		qp->s_hdrwords += hfi1_make_grh(ibp,
+						&ps->s_txreq->phdr.hdr.u.l.grh,
+						&ah_attr->grh,
+						qp->s_hdrwords, nwords);
 		lrh0 = HFI1_LRH_GRH;
-		ohdr = &qp->s_hdr->ibh.u.l.oth;
+		ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
 		/*
 		 * Don't worry about sending to locally attached multicast
 		 * QPs.  It is unspecified by the spec. what happens.
@@ -365,37 +375,42 @@
 	} else {
 		/* Header size in 32-bit words. */
 		lrh0 = HFI1_LRH_BTH;
-		ohdr = &qp->s_hdr->ibh.u.oth;
+		ohdr = &ps->s_txreq->phdr.hdr.u.oth;
 	}
 	if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
 		qp->s_hdrwords++;
 		ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
 		bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
-	} else
+	} else {
 		bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
+	}
 	sc5 = ibp->sl_to_sc[ah_attr->sl];
 	lrh0 |= (ah_attr->sl & 0xf) << 4;
 	if (qp->ibqp.qp_type == IB_QPT_SMI) {
 		lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
-		qp->s_sc = 0xf;
+		priv->s_sc = 0xf;
 	} else {
 		lrh0 |= (sc5 & 0xf) << 12;
-		qp->s_sc = sc5;
+		priv->s_sc = sc5;
 	}
-	qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc);
-	qp->s_hdr->ibh.lrh[0] = cpu_to_be16(lrh0);
-	qp->s_hdr->ibh.lrh[1] = cpu_to_be16(ah_attr->dlid);  /* DEST LID */
-	qp->s_hdr->ibh.lrh[2] =
+	priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
+	ps->s_txreq->sde = priv->s_sde;
+	priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
+	ps->s_txreq->psc = priv->s_sendcontext;
+	ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
+	ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);
+	ps->s_txreq->phdr.hdr.lrh[2] =
 		cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
-	if (ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE))
-		qp->s_hdr->ibh.lrh[3] = IB_LID_PERMISSIVE;
-	else {
+	if (ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
+		ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE;
+	} else {
 		lid = ppd->lid;
 		if (lid) {
 			lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
-			qp->s_hdr->ibh.lrh[3] = cpu_to_be16(lid);
-		} else
-			qp->s_hdr->ibh.lrh[3] = IB_LID_PERMISSIVE;
+			ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(lid);
+		} else {
+			ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE;
+		}
 	}
 	if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 		bth0 |= IB_BTH_SOLICITED;
@@ -406,7 +421,7 @@
 		bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
 	ohdr->bth[0] = cpu_to_be32(bth0);
 	ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn);
-	ohdr->bth[2] = cpu_to_be32(mask_psn(qp->s_next_psn++));
+	ohdr->bth[2] = cpu_to_be32(mask_psn(wqe->psn));
 	/*
 	 * Qkeys with the high order bit set mean use the
 	 * qkey from the QP context instead of the WR (see 10.2.5).
@@ -415,20 +430,28 @@
 					 qp->qkey : wqe->ud_wr.remote_qkey);
 	ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
 	/* disarm any ahg */
-	qp->s_hdr->ahgcount = 0;
-	qp->s_hdr->ahgidx = 0;
-	qp->s_hdr->tx_flags = 0;
-	qp->s_hdr->sde = NULL;
+	priv->s_hdr->ahgcount = 0;
+	priv->s_hdr->ahgidx = 0;
+	priv->s_hdr->tx_flags = 0;
+	priv->s_hdr->sde = NULL;
+	/* pbc */
+	ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
 
-done:
-	ret = 1;
-	goto unlock;
+	return 1;
+
+done_free_tx:
+	hfi1_put_txreq(ps->s_txreq);
+	ps->s_txreq = NULL;
+	return 1;
 
 bail:
-	qp->s_flags &= ~HFI1_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	return ret;
+	hfi1_put_txreq(ps->s_txreq);
+
+bail_no_tx:
+	ps->s_txreq = NULL;
+	qp->s_flags &= ~RVT_S_BUSY;
+	qp->s_hdrwords = 0;
+	return 0;
 }
 
 /*
@@ -476,7 +499,7 @@
 	return -1;
 }
 
-void return_cnp(struct hfi1_ibport *ibp, struct hfi1_qp *qp, u32 remote_qpn,
+void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
 		u32 pkey, u32 slid, u32 dlid, u8 sc5,
 		const struct ib_grh *old_grh)
 {
@@ -550,7 +573,7 @@
  * opa_smp_check() returns 0 if all checks succeed, 1 otherwise.
  */
 static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
-			 struct hfi1_qp *qp, u16 slid, struct opa_smp *smp)
+			 struct rvt_qp *qp, u16 slid, struct opa_smp *smp)
 {
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 
@@ -607,7 +630,7 @@
 	case IB_MGMT_METHOD_TRAP:
 	case IB_MGMT_METHOD_GET_RESP:
 	case IB_MGMT_METHOD_REPORT_RESP:
-		if (ibp->port_cap_flags & IB_PORT_SM)
+		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
 			return 0;
 		if (pkey == FULL_MGMT_P_KEY) {
 			smp->status |= IB_SMP_UNSUP_METHOD;
@@ -624,7 +647,6 @@
 	return 0;
 }
 
-
 /**
  * hfi1_ud_rcv - receive an incoming UD packet
  * @ibp: the port the packet came in on
@@ -654,7 +676,7 @@
 	u32 rcv_flags = packet->rcv_flags;
 	void *data = packet->ebuf;
 	u32 tlen = packet->tlen;
-	struct hfi1_qp *qp = packet->qp;
+	struct rvt_qp *qp = packet->qp;
 	bool has_grh = rcv_flags & HFI1_HAS_GRH;
 	bool sc4_bit = has_sc4_bit(packet);
 	u8 sc;
@@ -663,10 +685,10 @@
 	struct ib_grh *grh = NULL;
 
 	qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
-	src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & HFI1_QPN_MASK;
+	src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
 	dlid = be16_to_cpu(hdr->lrh[1]);
-	is_mcast = (dlid > HFI1_MULTICAST_LID_BASE) &&
-			(dlid != HFI1_PERMISSIVE_LID);
+	is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+			(dlid != be16_to_cpu(IB_LID_PERMISSIVE));
 	bth1 = be32_to_cpu(ohdr->bth[1]);
 	if (unlikely(bth1 & HFI1_BECN_SMASK)) {
 		/*
@@ -674,7 +696,7 @@
 		 * error path.
 		 */
 		struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-		u32 lqpn =  be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK;
+		u32 lqpn =  be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
 		u8 sl, sc5;
 
 		sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
@@ -750,7 +772,6 @@
 			mgmt_pkey_idx = hfi1_lookup_pkey_idx(ibp, pkey);
 			if (mgmt_pkey_idx < 0)
 				goto drop;
-
 		}
 		if (unlikely(qkey != qp->qkey)) {
 			hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey,
@@ -788,7 +809,6 @@
 		mgmt_pkey_idx = hfi1_lookup_pkey_idx(ibp, pkey);
 		if (mgmt_pkey_idx < 0)
 			goto drop;
-
 	}
 
 	if (qp->ibqp.qp_num > 1 &&
@@ -799,8 +819,9 @@
 	} else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
 		wc.ex.imm_data = 0;
 		wc.wc_flags = 0;
-	} else
+	} else {
 		goto drop;
+	}
 
 	/*
 	 * A GRH is expected to precede the data even if not
@@ -811,36 +832,38 @@
 	/*
 	 * Get the next work request entry to find where to put the data.
 	 */
-	if (qp->r_flags & HFI1_R_REUSE_SGE)
-		qp->r_flags &= ~HFI1_R_REUSE_SGE;
-	else {
+	if (qp->r_flags & RVT_R_REUSE_SGE) {
+		qp->r_flags &= ~RVT_R_REUSE_SGE;
+	} else {
 		int ret;
 
-		ret = hfi1_get_rwqe(qp, 0);
+		ret = hfi1_rvt_get_rwqe(qp, 0);
 		if (ret < 0) {
 			hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
 			return;
 		}
 		if (!ret) {
 			if (qp->ibqp.qp_num == 0)
-				ibp->n_vl15_dropped++;
+				ibp->rvp.n_vl15_dropped++;
 			return;
 		}
 	}
 	/* Silently drop packets which are too big. */
 	if (unlikely(wc.byte_len > qp->r_len)) {
-		qp->r_flags |= HFI1_R_REUSE_SGE;
+		qp->r_flags |= RVT_R_REUSE_SGE;
 		goto drop;
 	}
 	if (has_grh) {
 		hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh,
-			      sizeof(struct ib_grh), 1);
+			      sizeof(struct ib_grh), 1, 0);
 		wc.wc_flags |= IB_WC_GRH;
-	} else
+	} else {
 		hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
-	hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
-	hfi1_put_ss(&qp->r_sge);
-	if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags))
+	}
+	hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
+		      1, 0);
+	rvt_put_ss(&qp->r_sge);
+	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 		return;
 	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
@@ -862,8 +885,9 @@
 			}
 		}
 		wc.pkey_index = (unsigned)mgmt_pkey_idx;
-	} else
+	} else {
 		wc.pkey_index = 0;
+	}
 
 	wc.slid = be16_to_cpu(hdr->lrh[3]);
 	sc = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
@@ -873,15 +897,15 @@
 	/*
 	 * Save the LMC lower bits if the destination LID is a unicast LID.
 	 */
-	wc.dlid_path_bits = dlid >= HFI1_MULTICAST_LID_BASE ? 0 :
+	wc.dlid_path_bits = dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) ? 0 :
 		dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
 	wc.port_num = qp->port_num;
 	/* Signal completion event if the solicited bit is set. */
-	hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-		      (ohdr->bth[0] &
-			cpu_to_be32(IB_BTH_SOLICITED)) != 0);
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+		     (ohdr->bth[0] &
+		      cpu_to_be32(IB_BTH_SOLICITED)) != 0);
 	return;
 
 drop:
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 }

diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c
new file mode 100644
index 0000000..0861e09
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c

@@ -0,0 +1,1044 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <asm/page.h>
+
+#include "user_exp_rcv.h"
+#include "trace.h"
+#include "mmu_rb.h"
+
+struct tid_group {
+	struct list_head list;
+	unsigned base;
+	u8 size;
+	u8 used;
+	u8 map;
+};
+
+struct tid_rb_node {
+	struct mmu_rb_node mmu;
+	unsigned long phys;
+	struct tid_group *grp;
+	u32 rcventry;
+	dma_addr_t dma_addr;
+	bool freed;
+	unsigned npages;
+	struct page *pages[0];
+};
+
+struct tid_pageset {
+	u16 idx;
+	u16 count;
+};
+
+#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list))
+
+#define num_user_pages(vaddr, len)				       \
+	(1 + (((((unsigned long)(vaddr) +			       \
+		 (unsigned long)(len) - 1) & PAGE_MASK) -	       \
+	       ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT))
+
+static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *,
+			    struct rb_root *);
+static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *);
+static int set_rcvarray_entry(struct file *, unsigned long, u32,
+			      struct tid_group *, struct page **, unsigned);
+static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
+static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *, bool);
+static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
+static int program_rcvarray(struct file *, unsigned long, struct tid_group *,
+			    struct tid_pageset *, unsigned, u16, struct page **,
+			    u32 *, unsigned *, unsigned *);
+static int unprogram_rcvarray(struct file *, u32, struct tid_group **);
+static void clear_tid_node(struct hfi1_filedata *, u16, struct tid_rb_node *);
+
+static struct mmu_rb_ops tid_rb_ops = {
+	.insert = mmu_rb_insert,
+	.remove = mmu_rb_remove,
+	.invalidate = mmu_rb_invalidate
+};
+
+static inline u32 rcventry2tidinfo(u32 rcventry)
+{
+	u32 pair = rcventry & ~0x1;
+
+	return EXP_TID_SET(IDX, pair >> 1) |
+		EXP_TID_SET(CTRL, 1 << (rcventry - pair));
+}
+
+static inline void exp_tid_group_init(struct exp_tid_set *set)
+{
+	INIT_LIST_HEAD(&set->list);
+	set->count = 0;
+}
+
+static inline void tid_group_remove(struct tid_group *grp,
+				    struct exp_tid_set *set)
+{
+	list_del_init(&grp->list);
+	set->count--;
+}
+
+static inline void tid_group_add_tail(struct tid_group *grp,
+				      struct exp_tid_set *set)
+{
+	list_add_tail(&grp->list, &set->list);
+	set->count++;
+}
+
+static inline struct tid_group *tid_group_pop(struct exp_tid_set *set)
+{
+	struct tid_group *grp =
+		list_first_entry(&set->list, struct tid_group, list);
+	list_del_init(&grp->list);
+	set->count--;
+	return grp;
+}
+
+static inline void tid_group_move(struct tid_group *group,
+				  struct exp_tid_set *s1,
+				  struct exp_tid_set *s2)
+{
+	tid_group_remove(group, s1);
+	tid_group_add_tail(group, s2);
+}
+
+/*
+ * Initialize context and file private data needed for Expected
+ * receive caching. This needs to be done after the context has
+ * been configured with the eager/expected RcvEntry counts.
+ */
+int hfi1_user_exp_rcv_init(struct file *fp)
+{
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct hfi1_devdata *dd = uctxt->dd;
+	unsigned tidbase;
+	int i, ret = 0;
+
+	spin_lock_init(&fd->tid_lock);
+	spin_lock_init(&fd->invalid_lock);
+	fd->tid_rb_root = RB_ROOT;
+
+	if (!uctxt->subctxt_cnt || !fd->subctxt) {
+		exp_tid_group_init(&uctxt->tid_group_list);
+		exp_tid_group_init(&uctxt->tid_used_list);
+		exp_tid_group_init(&uctxt->tid_full_list);
+
+		tidbase = uctxt->expected_base;
+		for (i = 0; i < uctxt->expected_count /
+			     dd->rcv_entries.group_size; i++) {
+			struct tid_group *grp;
+
+			grp = kzalloc(sizeof(*grp), GFP_KERNEL);
+			if (!grp) {
+				/*
+				 * If we fail here, the groups already
+				 * allocated will be freed by the close
+				 * call.
+				 */
+				ret = -ENOMEM;
+				goto done;
+			}
+			grp->size = dd->rcv_entries.group_size;
+			grp->base = tidbase;
+			tid_group_add_tail(grp, &uctxt->tid_group_list);
+			tidbase += dd->rcv_entries.group_size;
+		}
+	}
+
+	fd->entry_to_rb = kcalloc(uctxt->expected_count,
+				     sizeof(struct rb_node *),
+				     GFP_KERNEL);
+	if (!fd->entry_to_rb)
+		return -ENOMEM;
+
+	if (!HFI1_CAP_IS_USET(TID_UNMAP)) {
+		fd->invalid_tid_idx = 0;
+		fd->invalid_tids = kzalloc(uctxt->expected_count *
+					   sizeof(u32), GFP_KERNEL);
+		if (!fd->invalid_tids) {
+			ret = -ENOMEM;
+			goto done;
+		}
+
+		/*
+		 * Register MMU notifier callbacks. If the registration
+		 * fails, continue but turn off the TID caching for
+		 * all user contexts.
+		 */
+		ret = hfi1_mmu_rb_register(&fd->tid_rb_root, &tid_rb_ops);
+		if (ret) {
+			dd_dev_info(dd,
+				    "Failed MMU notifier registration %d\n",
+				    ret);
+			HFI1_CAP_USET(TID_UNMAP);
+			ret = 0;
+		}
+	}
+
+	/*
+	 * PSM does not have a good way to separate, count, and
+	 * effectively enforce a limit on RcvArray entries used by
+	 * subctxts (when context sharing is used) when TID caching
+	 * is enabled. To help with that, we calculate a per-process
+	 * RcvArray entry share and enforce that.
+	 * If TID caching is not in use, PSM deals with usage on its
+	 * own. In that case, we allow any subctxt to take all of the
+	 * entries.
+	 *
+	 * Make sure that we set the tid counts only after successful
+	 * init.
+	 */
+	spin_lock(&fd->tid_lock);
+	if (uctxt->subctxt_cnt && !HFI1_CAP_IS_USET(TID_UNMAP)) {
+		u16 remainder;
+
+		fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt;
+		remainder = uctxt->expected_count % uctxt->subctxt_cnt;
+		if (remainder && fd->subctxt < remainder)
+			fd->tid_limit++;
+	} else {
+		fd->tid_limit = uctxt->expected_count;
+	}
+	spin_unlock(&fd->tid_lock);
+done:
+	return ret;
+}
+
+int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
+{
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct tid_group *grp, *gptr;
+
+	/*
+	 * The notifier would have been removed when the process'es mm
+	 * was freed.
+	 */
+	if (!HFI1_CAP_IS_USET(TID_UNMAP))
+		hfi1_mmu_rb_unregister(&fd->tid_rb_root);
+
+	kfree(fd->invalid_tids);
+
+	if (!uctxt->cnt) {
+		if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
+			unlock_exp_tids(uctxt, &uctxt->tid_full_list,
+					&fd->tid_rb_root);
+		if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
+			unlock_exp_tids(uctxt, &uctxt->tid_used_list,
+					&fd->tid_rb_root);
+		list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
+					 list) {
+			list_del_init(&grp->list);
+			kfree(grp);
+		}
+		hfi1_clear_tids(uctxt);
+	}
+
+	kfree(fd->entry_to_rb);
+	return 0;
+}
+
+/*
+ * Write an "empty" RcvArray entry.
+ * This function exists so the TID registaration code can use it
+ * to write to unused/unneeded entries and still take advantage
+ * of the WC performance improvements. The HFI will ignore this
+ * write to the RcvArray entry.
+ */
+static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
+{
+	/*
+	 * Doing the WC fill writes only makes sense if the device is
+	 * present and the RcvArray has been mapped as WC memory.
+	 */
+	if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc)
+		writeq(0, dd->rcvarray_wc + (index * 8));
+}
+
+/*
+ * RcvArray entry allocation for Expected Receives is done by the
+ * following algorithm:
+ *
+ * The context keeps 3 lists of groups of RcvArray entries:
+ *   1. List of empty groups - tid_group_list
+ *      This list is created during user context creation and
+ *      contains elements which describe sets (of 8) of empty
+ *      RcvArray entries.
+ *   2. List of partially used groups - tid_used_list
+ *      This list contains sets of RcvArray entries which are
+ *      not completely used up. Another mapping request could
+ *      use some of all of the remaining entries.
+ *   3. List of full groups - tid_full_list
+ *      This is the list where sets that are completely used
+ *      up go.
+ *
+ * An attempt to optimize the usage of RcvArray entries is
+ * made by finding all sets of physically contiguous pages in a
+ * user's buffer.
+ * These physically contiguous sets are further split into
+ * sizes supported by the receive engine of the HFI. The
+ * resulting sets of pages are stored in struct tid_pageset,
+ * which describes the sets as:
+ *    * .count - number of pages in this set
+ *    * .idx - starting index into struct page ** array
+ *                    of this set
+ *
+ * From this point on, the algorithm deals with the page sets
+ * described above. The number of pagesets is divided by the
+ * RcvArray group size to produce the number of full groups
+ * needed.
+ *
+ * Groups from the 3 lists are manipulated using the following
+ * rules:
+ *   1. For each set of 8 pagesets, a complete group from
+ *      tid_group_list is taken, programmed, and moved to
+ *      the tid_full_list list.
+ *   2. For all remaining pagesets:
+ *      2.1 If the tid_used_list is empty and the tid_group_list
+ *          is empty, stop processing pageset and return only
+ *          what has been programmed up to this point.
+ *      2.2 If the tid_used_list is empty and the tid_group_list
+ *          is not empty, move a group from tid_group_list to
+ *          tid_used_list.
+ *      2.3 For each group is tid_used_group, program as much as
+ *          can fit into the group. If the group becomes fully
+ *          used, move it to tid_full_list.
+ */
+int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
+{
+	int ret = 0, need_group = 0, pinned;
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct hfi1_devdata *dd = uctxt->dd;
+	unsigned npages, ngroups, pageidx = 0, pageset_count, npagesets,
+		tididx = 0, mapped, mapped_pages = 0;
+	unsigned long vaddr = tinfo->vaddr;
+	struct page **pages = NULL;
+	u32 *tidlist = NULL;
+	struct tid_pageset *pagesets = NULL;
+
+	/* Get the number of pages the user buffer spans */
+	npages = num_user_pages(vaddr, tinfo->length);
+	if (!npages)
+		return -EINVAL;
+
+	if (npages > uctxt->expected_count) {
+		dd_dev_err(dd, "Expected buffer too big\n");
+		return -EINVAL;
+	}
+
+	/* Verify that access is OK for the user buffer */
+	if (!access_ok(VERIFY_WRITE, (void __user *)vaddr,
+		       npages * PAGE_SIZE)) {
+		dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n",
+			   (void *)vaddr, npages);
+		return -EFAULT;
+	}
+
+	pagesets = kcalloc(uctxt->expected_count, sizeof(*pagesets),
+			   GFP_KERNEL);
+	if (!pagesets)
+		return -ENOMEM;
+
+	/* Allocate the array of struct page pointers needed for pinning */
+	pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
+	if (!pages) {
+		ret = -ENOMEM;
+		goto bail;
+	}
+
+	/*
+	 * Pin all the pages of the user buffer. If we can't pin all the
+	 * pages, accept the amount pinned so far and program only that.
+	 * User space knows how to deal with partially programmed buffers.
+	 */
+	if (!hfi1_can_pin_pages(dd, fd->tid_n_pinned, npages))
+		return -ENOMEM;
+	pinned = hfi1_acquire_user_pages(vaddr, npages, true, pages);
+	if (pinned <= 0) {
+		ret = pinned;
+		goto bail;
+	}
+	fd->tid_n_pinned += npages;
+
+	/* Find sets of physically contiguous pages */
+	npagesets = find_phys_blocks(pages, pinned, pagesets);
+
+	/*
+	 * We don't need to access this under a lock since tid_used is per
+	 * process and the same process cannot be in hfi1_user_exp_rcv_clear()
+	 * and hfi1_user_exp_rcv_setup() at the same time.
+	 */
+	spin_lock(&fd->tid_lock);
+	if (fd->tid_used + npagesets > fd->tid_limit)
+		pageset_count = fd->tid_limit - fd->tid_used;
+	else
+		pageset_count = npagesets;
+	spin_unlock(&fd->tid_lock);
+
+	if (!pageset_count)
+		goto bail;
+
+	ngroups = pageset_count / dd->rcv_entries.group_size;
+	tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL);
+	if (!tidlist) {
+		ret = -ENOMEM;
+		goto nomem;
+	}
+
+	tididx = 0;
+
+	/*
+	 * From this point on, we are going to be using shared (between master
+	 * and subcontexts) context resources. We need to take the lock.
+	 */
+	mutex_lock(&uctxt->exp_lock);
+	/*
+	 * The first step is to program the RcvArray entries which are complete
+	 * groups.
+	 */
+	while (ngroups && uctxt->tid_group_list.count) {
+		struct tid_group *grp =
+			tid_group_pop(&uctxt->tid_group_list);
+
+		ret = program_rcvarray(fp, vaddr, grp, pagesets,
+				       pageidx, dd->rcv_entries.group_size,
+				       pages, tidlist, &tididx, &mapped);
+		/*
+		 * If there was a failure to program the RcvArray
+		 * entries for the entire group, reset the grp fields
+		 * and add the grp back to the free group list.
+		 */
+		if (ret <= 0) {
+			tid_group_add_tail(grp, &uctxt->tid_group_list);
+			hfi1_cdbg(TID,
+				  "Failed to program RcvArray group %d", ret);
+			goto unlock;
+		}
+
+		tid_group_add_tail(grp, &uctxt->tid_full_list);
+		ngroups--;
+		pageidx += ret;
+		mapped_pages += mapped;
+	}
+
+	while (pageidx < pageset_count) {
+		struct tid_group *grp, *ptr;
+		/*
+		 * If we don't have any partially used tid groups, check
+		 * if we have empty groups. If so, take one from there and
+		 * put in the partially used list.
+		 */
+		if (!uctxt->tid_used_list.count || need_group) {
+			if (!uctxt->tid_group_list.count)
+				goto unlock;
+
+			grp = tid_group_pop(&uctxt->tid_group_list);
+			tid_group_add_tail(grp, &uctxt->tid_used_list);
+			need_group = 0;
+		}
+		/*
+		 * There is an optimization opportunity here - instead of
+		 * fitting as many page sets as we can, check for a group
+		 * later on in the list that could fit all of them.
+		 */
+		list_for_each_entry_safe(grp, ptr, &uctxt->tid_used_list.list,
+					 list) {
+			unsigned use = min_t(unsigned, pageset_count - pageidx,
+					     grp->size - grp->used);
+
+			ret = program_rcvarray(fp, vaddr, grp, pagesets,
+					       pageidx, use, pages, tidlist,
+					       &tididx, &mapped);
+			if (ret < 0) {
+				hfi1_cdbg(TID,
+					  "Failed to program RcvArray entries %d",
+					  ret);
+				ret = -EFAULT;
+				goto unlock;
+			} else if (ret > 0) {
+				if (grp->used == grp->size)
+					tid_group_move(grp,
+						       &uctxt->tid_used_list,
+						       &uctxt->tid_full_list);
+				pageidx += ret;
+				mapped_pages += mapped;
+				need_group = 0;
+				/* Check if we are done so we break out early */
+				if (pageidx >= pageset_count)
+					break;
+			} else if (WARN_ON(ret == 0)) {
+				/*
+				 * If ret is 0, we did not program any entries
+				 * into this group, which can only happen if
+				 * we've screwed up the accounting somewhere.
+				 * Warn and try to continue.
+				 */
+				need_group = 1;
+			}
+		}
+	}
+unlock:
+	mutex_unlock(&uctxt->exp_lock);
+nomem:
+	hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx,
+		  mapped_pages, ret);
+	if (tididx) {
+		spin_lock(&fd->tid_lock);
+		fd->tid_used += tididx;
+		spin_unlock(&fd->tid_lock);
+		tinfo->tidcnt = tididx;
+		tinfo->length = mapped_pages * PAGE_SIZE;
+
+		if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist,
+				 tidlist, sizeof(tidlist[0]) * tididx)) {
+			/*
+			 * On failure to copy to the user level, we need to undo
+			 * everything done so far so we don't leak resources.
+			 */
+			tinfo->tidlist = (unsigned long)&tidlist;
+			hfi1_user_exp_rcv_clear(fp, tinfo);
+			tinfo->tidlist = 0;
+			ret = -EFAULT;
+			goto bail;
+		}
+	}
+
+	/*
+	 * If not everything was mapped (due to insufficient RcvArray entries,
+	 * for example), unpin all unmapped pages so we can pin them nex time.
+	 */
+	if (mapped_pages != pinned) {
+		hfi1_release_user_pages(current->mm, &pages[mapped_pages],
+					pinned - mapped_pages,
+					false);
+		fd->tid_n_pinned -= pinned - mapped_pages;
+	}
+bail:
+	kfree(pagesets);
+	kfree(pages);
+	kfree(tidlist);
+	return ret > 0 ? 0 : ret;
+}
+
+int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
+{
+	int ret = 0;
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	u32 *tidinfo;
+	unsigned tididx;
+
+	tidinfo = kcalloc(tinfo->tidcnt, sizeof(*tidinfo), GFP_KERNEL);
+	if (!tidinfo)
+		return -ENOMEM;
+
+	if (copy_from_user(tidinfo, (void __user *)(unsigned long)
+			   tinfo->tidlist, sizeof(tidinfo[0]) *
+			   tinfo->tidcnt)) {
+		ret = -EFAULT;
+		goto done;
+	}
+
+	mutex_lock(&uctxt->exp_lock);
+	for (tididx = 0; tididx < tinfo->tidcnt; tididx++) {
+		ret = unprogram_rcvarray(fp, tidinfo[tididx], NULL);
+		if (ret) {
+			hfi1_cdbg(TID, "Failed to unprogram rcv array %d",
+				  ret);
+			break;
+		}
+	}
+	spin_lock(&fd->tid_lock);
+	fd->tid_used -= tididx;
+	spin_unlock(&fd->tid_lock);
+	tinfo->tidcnt = tididx;
+	mutex_unlock(&uctxt->exp_lock);
+done:
+	kfree(tidinfo);
+	return ret;
+}
+
+int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo)
+{
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	unsigned long *ev = uctxt->dd->events +
+		(((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
+		  HFI1_MAX_SHARED_CTXTS) + fd->subctxt);
+	u32 *array;
+	int ret = 0;
+
+	if (!fd->invalid_tids)
+		return -EINVAL;
+
+	/*
+	 * copy_to_user() can sleep, which will leave the invalid_lock
+	 * locked and cause the MMU notifier to be blocked on the lock
+	 * for a long time.
+	 * Copy the data to a local buffer so we can release the lock.
+	 */
+	array = kcalloc(uctxt->expected_count, sizeof(*array), GFP_KERNEL);
+	if (!array)
+		return -EFAULT;
+
+	spin_lock(&fd->invalid_lock);
+	if (fd->invalid_tid_idx) {
+		memcpy(array, fd->invalid_tids, sizeof(*array) *
+		       fd->invalid_tid_idx);
+		memset(fd->invalid_tids, 0, sizeof(*fd->invalid_tids) *
+		       fd->invalid_tid_idx);
+		tinfo->tidcnt = fd->invalid_tid_idx;
+		fd->invalid_tid_idx = 0;
+		/*
+		 * Reset the user flag while still holding the lock.
+		 * Otherwise, PSM can miss events.
+		 */
+		clear_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
+	} else {
+		tinfo->tidcnt = 0;
+	}
+	spin_unlock(&fd->invalid_lock);
+
+	if (tinfo->tidcnt) {
+		if (copy_to_user((void __user *)tinfo->tidlist,
+				 array, sizeof(*array) * tinfo->tidcnt))
+			ret = -EFAULT;
+	}
+	kfree(array);
+
+	return ret;
+}
+
+static u32 find_phys_blocks(struct page **pages, unsigned npages,
+			    struct tid_pageset *list)
+{
+	unsigned pagecount, pageidx, setcount = 0, i;
+	unsigned long pfn, this_pfn;
+
+	if (!npages)
+		return 0;
+
+	/*
+	 * Look for sets of physically contiguous pages in the user buffer.
+	 * This will allow us to optimize Expected RcvArray entry usage by
+	 * using the bigger supported sizes.
+	 */
+	pfn = page_to_pfn(pages[0]);
+	for (pageidx = 0, pagecount = 1, i = 1; i <= npages; i++) {
+		this_pfn = i < npages ? page_to_pfn(pages[i]) : 0;
+
+		/*
+		 * If the pfn's are not sequential, pages are not physically
+		 * contiguous.
+		 */
+		if (this_pfn != ++pfn) {
+			/*
+			 * At this point we have to loop over the set of
+			 * physically contiguous pages and break them down it
+			 * sizes supported by the HW.
+			 * There are two main constraints:
+			 *     1. The max buffer size is MAX_EXPECTED_BUFFER.
+			 *        If the total set size is bigger than that
+			 *        program only a MAX_EXPECTED_BUFFER chunk.
+			 *     2. The buffer size has to be a power of two. If
+			 *        it is not, round down to the closes power of
+			 *        2 and program that size.
+			 */
+			while (pagecount) {
+				int maxpages = pagecount;
+				u32 bufsize = pagecount * PAGE_SIZE;
+
+				if (bufsize > MAX_EXPECTED_BUFFER)
+					maxpages =
+						MAX_EXPECTED_BUFFER >>
+						PAGE_SHIFT;
+				else if (!is_power_of_2(bufsize))
+					maxpages =
+						rounddown_pow_of_two(bufsize) >>
+						PAGE_SHIFT;
+
+				list[setcount].idx = pageidx;
+				list[setcount].count = maxpages;
+				pagecount -= maxpages;
+				pageidx += maxpages;
+				setcount++;
+			}
+			pageidx = i;
+			pagecount = 1;
+			pfn = this_pfn;
+		} else {
+			pagecount++;
+		}
+	}
+	return setcount;
+}
+
+/**
+ * program_rcvarray() - program an RcvArray group with receive buffers
+ * @fp: file pointer
+ * @vaddr: starting user virtual address
+ * @grp: RcvArray group
+ * @sets: array of struct tid_pageset holding information on physically
+ *        contiguous chunks from the user buffer
+ * @start: starting index into sets array
+ * @count: number of struct tid_pageset's to program
+ * @pages: an array of struct page * for the user buffer
+ * @tidlist: the array of u32 elements when the information about the
+ *           programmed RcvArray entries is to be encoded.
+ * @tididx: starting offset into tidlist
+ * @pmapped: (output parameter) number of pages programmed into the RcvArray
+ *           entries.
+ *
+ * This function will program up to 'count' number of RcvArray entries from the
+ * group 'grp'. To make best use of write-combining writes, the function will
+ * perform writes to the unused RcvArray entries which will be ignored by the
+ * HW. Each RcvArray entry will be programmed with a physically contiguous
+ * buffer chunk from the user's virtual buffer.
+ *
+ * Return:
+ * -EINVAL if the requested count is larger than the size of the group,
+ * -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or
+ * number of RcvArray entries programmed.
+ */
+static int program_rcvarray(struct file *fp, unsigned long vaddr,
+			    struct tid_group *grp,
+			    struct tid_pageset *sets,
+			    unsigned start, u16 count, struct page **pages,
+			    u32 *tidlist, unsigned *tididx, unsigned *pmapped)
+{
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct hfi1_devdata *dd = uctxt->dd;
+	u16 idx;
+	u32 tidinfo = 0, rcventry, useidx = 0;
+	int mapped = 0;
+
+	/* Count should never be larger than the group size */
+	if (count > grp->size)
+		return -EINVAL;
+
+	/* Find the first unused entry in the group */
+	for (idx = 0; idx < grp->size; idx++) {
+		if (!(grp->map & (1 << idx))) {
+			useidx = idx;
+			break;
+		}
+		rcv_array_wc_fill(dd, grp->base + idx);
+	}
+
+	idx = 0;
+	while (idx < count) {
+		u16 npages, pageidx, setidx = start + idx;
+		int ret = 0;
+
+		/*
+		 * If this entry in the group is used, move to the next one.
+		 * If we go past the end of the group, exit the loop.
+		 */
+		if (useidx >= grp->size) {
+			break;
+		} else if (grp->map & (1 << useidx)) {
+			rcv_array_wc_fill(dd, grp->base + useidx);
+			useidx++;
+			continue;
+		}
+
+		rcventry = grp->base + useidx;
+		npages = sets[setidx].count;
+		pageidx = sets[setidx].idx;
+
+		ret = set_rcvarray_entry(fp, vaddr + (pageidx * PAGE_SIZE),
+					 rcventry, grp, pages + pageidx,
+					 npages);
+		if (ret)
+			return ret;
+		mapped += npages;
+
+		tidinfo = rcventry2tidinfo(rcventry - uctxt->expected_base) |
+			EXP_TID_SET(LEN, npages);
+		tidlist[(*tididx)++] = tidinfo;
+		grp->used++;
+		grp->map |= 1 << useidx++;
+		idx++;
+	}
+
+	/* Fill the rest of the group with "blank" writes */
+	for (; useidx < grp->size; useidx++)
+		rcv_array_wc_fill(dd, grp->base + useidx);
+	*pmapped = mapped;
+	return idx;
+}
+
+static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
+			      u32 rcventry, struct tid_group *grp,
+			      struct page **pages, unsigned npages)
+{
+	int ret;
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct tid_rb_node *node;
+	struct hfi1_devdata *dd = uctxt->dd;
+	struct rb_root *root = &fd->tid_rb_root;
+	dma_addr_t phys;
+
+	/*
+	 * Allocate the node first so we can handle a potential
+	 * failure before we've programmed anything.
+	 */
+	node = kzalloc(sizeof(*node) + (sizeof(struct page *) * npages),
+		       GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	phys = pci_map_single(dd->pcidev,
+			      __va(page_to_phys(pages[0])),
+			      npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
+	if (dma_mapping_error(&dd->pcidev->dev, phys)) {
+		dd_dev_err(dd, "Failed to DMA map Exp Rcv pages 0x%llx\n",
+			   phys);
+		kfree(node);
+		return -EFAULT;
+	}
+
+	node->mmu.addr = vaddr;
+	node->mmu.len = npages * PAGE_SIZE;
+	node->phys = page_to_phys(pages[0]);
+	node->npages = npages;
+	node->rcventry = rcventry;
+	node->dma_addr = phys;
+	node->grp = grp;
+	node->freed = false;
+	memcpy(node->pages, pages, sizeof(struct page *) * npages);
+
+	if (HFI1_CAP_IS_USET(TID_UNMAP))
+		ret = mmu_rb_insert(root, &node->mmu);
+	else
+		ret = hfi1_mmu_rb_insert(root, &node->mmu);
+
+	if (ret) {
+		hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
+			  node->rcventry, node->mmu.addr, node->phys, ret);
+		pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
+				 PCI_DMA_FROMDEVICE);
+		kfree(node);
+		return -EFAULT;
+	}
+	hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1);
+	trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages,
+			       node->mmu.addr, node->phys, phys);
+	return 0;
+}
+
+static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
+			      struct tid_group **grp)
+{
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct hfi1_devdata *dd = uctxt->dd;
+	struct tid_rb_node *node;
+	u8 tidctrl = EXP_TID_GET(tidinfo, CTRL);
+	u32 tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry;
+
+	if (tididx >= uctxt->expected_count) {
+		dd_dev_err(dd, "Invalid RcvArray entry (%u) index for ctxt %u\n",
+			   tididx, uctxt->ctxt);
+		return -EINVAL;
+	}
+
+	if (tidctrl == 0x3)
+		return -EINVAL;
+
+	rcventry = tididx + (tidctrl - 1);
+
+	node = fd->entry_to_rb[rcventry];
+	if (!node || node->rcventry != (uctxt->expected_base + rcventry))
+		return -EBADF;
+	if (HFI1_CAP_IS_USET(TID_UNMAP))
+		mmu_rb_remove(&fd->tid_rb_root, &node->mmu, false);
+	else
+		hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu);
+
+	if (grp)
+		*grp = node->grp;
+	clear_tid_node(fd, fd->subctxt, node);
+	return 0;
+}
+
+static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt,
+			   struct tid_rb_node *node)
+{
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct hfi1_devdata *dd = uctxt->dd;
+
+	trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry,
+				 node->npages, node->mmu.addr, node->phys,
+				 node->dma_addr);
+
+	hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0);
+	/*
+	 * Make sure device has seen the write before we unpin the
+	 * pages.
+	 */
+	flush_wc();
+
+	pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len,
+			 PCI_DMA_FROMDEVICE);
+	hfi1_release_user_pages(current->mm, node->pages, node->npages, true);
+	fd->tid_n_pinned -= node->npages;
+
+	node->grp->used--;
+	node->grp->map &= ~(1 << (node->rcventry - node->grp->base));
+
+	if (node->grp->used == node->grp->size - 1)
+		tid_group_move(node->grp, &uctxt->tid_full_list,
+			       &uctxt->tid_used_list);
+	else if (!node->grp->used)
+		tid_group_move(node->grp, &uctxt->tid_used_list,
+			       &uctxt->tid_group_list);
+	kfree(node);
+}
+
+static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
+			    struct exp_tid_set *set, struct rb_root *root)
+{
+	struct tid_group *grp, *ptr;
+	struct hfi1_filedata *fd = container_of(root, struct hfi1_filedata,
+						tid_rb_root);
+	int i;
+
+	list_for_each_entry_safe(grp, ptr, &set->list, list) {
+		list_del_init(&grp->list);
+
+		for (i = 0; i < grp->size; i++) {
+			if (grp->map & (1 << i)) {
+				u16 rcventry = grp->base + i;
+				struct tid_rb_node *node;
+
+				node = fd->entry_to_rb[rcventry -
+							  uctxt->expected_base];
+				if (!node || node->rcventry != rcventry)
+					continue;
+				if (HFI1_CAP_IS_USET(TID_UNMAP))
+					mmu_rb_remove(&fd->tid_rb_root,
+						      &node->mmu, false);
+				else
+					hfi1_mmu_rb_remove(&fd->tid_rb_root,
+							   &node->mmu);
+				clear_tid_node(fd, -1, node);
+			}
+		}
+	}
+}
+
+static int mmu_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode)
+{
+	struct hfi1_filedata *fdata =
+		container_of(root, struct hfi1_filedata, tid_rb_root);
+	struct hfi1_ctxtdata *uctxt = fdata->uctxt;
+	struct tid_rb_node *node =
+		container_of(mnode, struct tid_rb_node, mmu);
+
+	if (node->freed)
+		return 0;
+
+	trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr,
+				 node->rcventry, node->npages, node->dma_addr);
+	node->freed = true;
+
+	spin_lock(&fdata->invalid_lock);
+	if (fdata->invalid_tid_idx < uctxt->expected_count) {
+		fdata->invalid_tids[fdata->invalid_tid_idx] =
+			rcventry2tidinfo(node->rcventry - uctxt->expected_base);
+		fdata->invalid_tids[fdata->invalid_tid_idx] |=
+			EXP_TID_SET(LEN, node->npages);
+		if (!fdata->invalid_tid_idx) {
+			unsigned long *ev;
+
+			/*
+			 * hfi1_set_uevent_bits() sets a user event flag
+			 * for all processes. Because calling into the
+			 * driver to process TID cache invalidations is
+			 * expensive and TID cache invalidations are
+			 * handled on a per-process basis, we can
+			 * optimize this to set the flag only for the
+			 * process in question.
+			 */
+			ev = uctxt->dd->events +
+				(((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
+				  HFI1_MAX_SHARED_CTXTS) + fdata->subctxt);
+			set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
+		}
+		fdata->invalid_tid_idx++;
+	}
+	spin_unlock(&fdata->invalid_lock);
+	return 0;
+}
+
+static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node)
+{
+	struct hfi1_filedata *fdata =
+		container_of(root, struct hfi1_filedata, tid_rb_root);
+	struct tid_rb_node *tnode =
+		container_of(node, struct tid_rb_node, mmu);
+	u32 base = fdata->uctxt->expected_base;
+
+	fdata->entry_to_rb[tnode->rcventry - base] = tnode;
+	return 0;
+}
+
+static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node,
+			  bool notifier)
+{
+	struct hfi1_filedata *fdata =
+		container_of(root, struct hfi1_filedata, tid_rb_root);
+	struct tid_rb_node *tnode =
+		container_of(node, struct tid_rb_node, mmu);
+	u32 base = fdata->uctxt->expected_base;
+
+	fdata->entry_to_rb[tnode->rcventry - base] = NULL;
+}

diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.h b/drivers/staging/rdma/hfi1/user_exp_rcv.h
index 4f4876e..9bc8d9f 100644
--- a/drivers/staging/rdma/hfi1/user_exp_rcv.h
+++ b/drivers/staging/rdma/hfi1/user_exp_rcv.h

@@ -1,14 +1,13 @@
 #ifndef _HFI1_USER_EXP_RCV_H
 #define _HFI1_USER_EXP_RCV_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -50,6 +47,8 @@
  *
  */
 
+#include "hfi.h"
+
 #define EXP_TID_TIDLEN_MASK   0x7FFULL
 #define EXP_TID_TIDLEN_SHIFT  0
 #define EXP_TID_TIDCTRL_MASK  0x3ULL
@@ -71,4 +70,10 @@
 		(tid) |= EXP_TID_SET(field, (value));			\
 	} while (0)
 
+int hfi1_user_exp_rcv_init(struct file *);
+int hfi1_user_exp_rcv_free(struct hfi1_filedata *);
+int hfi1_user_exp_rcv_setup(struct file *, struct hfi1_tid_info *);
+int hfi1_user_exp_rcv_clear(struct file *, struct hfi1_tid_info *);
+int hfi1_user_exp_rcv_invalid(struct file *, struct hfi1_tid_info *);
+
 #endif /* _HFI1_USER_EXP_RCV_H */

diff --git a/drivers/staging/rdma/hfi1/user_pages.c b/drivers/staging/rdma/hfi1/user_pages.c
index 8ebfe9e..88e10b5f 100644
--- a/drivers/staging/rdma/hfi1/user_pages.c
+++ b/drivers/staging/rdma/hfi1/user_pages.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -51,32 +48,62 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/device.h>
+#include <linux/module.h>
 
 #include "hfi.h"
 
-/**
- * hfi1_map_page - a safety wrapper around pci_map_page()
+static unsigned long cache_size = 256;
+module_param(cache_size, ulong, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(cache_size, "Send and receive side cache size limit (in MB)");
+
+/*
+ * Determine whether the caller can pin pages.
+ *
+ * This function should be used in the implementation of buffer caches.
+ * The cache implementation should call this function prior to attempting
+ * to pin buffer pages in order to determine whether they should do so.
+ * The function computes cache limits based on the configured ulimit and
+ * cache size. Use of this function is especially important for caches
+ * which are not limited in any other way (e.g. by HW resources) and, thus,
+ * could keeping caching buffers.
  *
  */
-dma_addr_t hfi1_map_page(struct pci_dev *hwdev, struct page *page,
-			 unsigned long offset, size_t size, int direction)
+bool hfi1_can_pin_pages(struct hfi1_devdata *dd, u32 nlocked, u32 npages)
 {
-	return pci_map_page(hwdev, page, offset, size, direction);
-}
-
-int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable,
-			    struct page **pages)
-{
-	unsigned long pinned, lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit,
+		size = (cache_size * (1UL << 20)); /* convert to bytes */
+	unsigned usr_ctxts = dd->num_rcv_contexts - dd->first_user_ctxt;
 	bool can_lock = capable(CAP_IPC_LOCK);
-	int ret;
+
+	/*
+	 * Calculate per-cache size. The calculation below uses only a quarter
+	 * of the available per-context limit. This leaves space for other
+	 * pinning. Should we worry about shared ctxts?
+	 */
+	cache_limit = (ulimit / usr_ctxts) / 4;
+
+	/* If ulimit isn't set to "unlimited" and is smaller than cache_size. */
+	if (ulimit != (-1UL) && size > cache_limit)
+		size = cache_limit;
+
+	/* Convert to number of pages */
+	size = DIV_ROUND_UP(size, PAGE_SIZE);
 
 	down_read(&current->mm->mmap_sem);
 	pinned = current->mm->pinned_vm;
 	up_read(&current->mm->mmap_sem);
 
-	if (pinned + npages > lock_limit && !can_lock)
-		return -ENOMEM;
+	/* First, check the absolute limit against all pinned pages. */
+	if (pinned + npages >= ulimit && !can_lock)
+		return false;
+
+	return ((nlocked + npages) <= size) || can_lock;
+}
+
+int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable,
+			    struct page **pages)
+{
+	int ret;
 
 	ret = get_user_pages_fast(vaddr, npages, writable, pages);
 	if (ret < 0)
@@ -89,7 +116,8 @@
 	return ret;
 }
 
-void hfi1_release_user_pages(struct page **p, size_t npages, bool dirty)
+void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
+			     size_t npages, bool dirty)
 {
 	size_t i;
 
@@ -99,9 +127,9 @@
 		put_page(p[i]);
 	}
 
-	if (current->mm) { /* during close after signal, mm can be NULL */
-		down_write(&current->mm->mmap_sem);
-		current->mm->pinned_vm -= npages;
-		up_write(&current->mm->mmap_sem);
+	if (mm) { /* during close after signal, mm can be NULL */
+		down_write(&mm->mmap_sem);
+		mm->pinned_vm -= npages;
+		up_write(&mm->mmap_sem);
 	}
 }

diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c
index 9d4f5d6..ab6b6a4 100644
--- a/drivers/staging/rdma/hfi1/user_sdma.c
+++ b/drivers/staging/rdma/hfi1/user_sdma.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -70,6 +67,7 @@
 #include "verbs.h"  /* for the headers */
 #include "common.h" /* for struct hfi1_tid_info */
 #include "trace.h"
+#include "mmu_rb.h"
 
 static uint hfi1_sdma_comp_ring_size = 128;
 module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO);
@@ -146,7 +144,6 @@
 
 /* Last packet in the request */
 #define TXREQ_FLAGS_REQ_LAST_PKT BIT(0)
-#define TXREQ_FLAGS_IOVEC_LAST_PKT BIT(0)
 
 #define SDMA_REQ_IN_USE     0
 #define SDMA_REQ_FOR_THREAD 1
@@ -170,16 +167,28 @@
 #define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */
 
 struct user_sdma_iovec {
+	struct list_head list;
 	struct iovec iov;
 	/* number of pages in this vector */
 	unsigned npages;
 	/* array of pinned pages for this vector */
 	struct page **pages;
-	/* offset into the virtual address space of the vector at
-	 * which we last left off. */
+	/*
+	 * offset into the virtual address space of the vector at
+	 * which we last left off.
+	 */
 	u64 offset;
 };
 
+struct sdma_mmu_node {
+	struct mmu_rb_node rb;
+	struct list_head list;
+	struct hfi1_user_sdma_pkt_q *pq;
+	atomic_t refcount;
+	struct page **pages;
+	unsigned npages;
+};
+
 struct user_sdma_request {
 	struct sdma_req_info info;
 	struct hfi1_user_sdma_pkt_q *pq;
@@ -213,15 +222,6 @@
 	 */
 	u8 omfactor;
 	/*
-	 * pointer to the user's mm_struct. We are going to
-	 * get a reference to it so it doesn't get freed
-	 * since we might not be in process context when we
-	 * are processing the iov's.
-	 * Using this mm_struct, we can get vma based on the
-	 * iov's address (find_vma()).
-	 */
-	struct mm_struct *user_mm;
-	/*
 	 * We copy the iovs for this request (based on
 	 * info.iovcnt). These are only the data vectors
 	 */
@@ -238,13 +238,12 @@
 	u16 tididx;
 	u32 sent;
 	u64 seqnum;
+	u64 seqcomp;
+	u64 seqsubmitted;
 	struct list_head txps;
-	spinlock_t txcmp_lock;  /* protect txcmp list */
-	struct list_head txcmp;
 	unsigned long flags;
 	/* status of the last txreq completed */
 	int status;
-	struct work_struct worker;
 };
 
 /*
@@ -259,11 +258,6 @@
 	struct sdma_txreq txreq;
 	struct list_head list;
 	struct user_sdma_request *req;
-	struct {
-		struct user_sdma_iovec *vec;
-		u8 flags;
-	} iovecs[3];
-	int idx;
 	u16 flags;
 	unsigned busycount;
 	u64 seqnum;
@@ -279,21 +273,21 @@
 
 static int user_sdma_send_pkts(struct user_sdma_request *, unsigned);
 static int num_user_pages(const struct iovec *);
-static void user_sdma_txreq_cb(struct sdma_txreq *, int, int);
-static void user_sdma_delayed_completion(struct work_struct *);
-static void user_sdma_free_request(struct user_sdma_request *);
+static void user_sdma_txreq_cb(struct sdma_txreq *, int);
+static inline void pq_update(struct hfi1_user_sdma_pkt_q *);
+static void user_sdma_free_request(struct user_sdma_request *, bool);
 static int pin_vector_pages(struct user_sdma_request *,
 			    struct user_sdma_iovec *);
-static void unpin_vector_pages(struct user_sdma_request *,
-			       struct user_sdma_iovec *);
+static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned);
 static int check_header_template(struct user_sdma_request *,
 				 struct hfi1_pkt_header *, u32, u32);
 static int set_txreq_header(struct user_sdma_request *,
 			    struct user_sdma_txreq *, u32);
 static int set_txreq_header_ahg(struct user_sdma_request *,
 				struct user_sdma_txreq *, u32);
-static inline void set_comp_state(struct user_sdma_request *,
-					enum hfi1_sdma_comp_state, int);
+static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *,
+				  struct hfi1_user_sdma_comp_q *,
+				  u16, enum hfi1_sdma_comp_state, int);
 static inline u32 set_pkt_bth_psn(__be32, u8, u32);
 static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
 
@@ -303,6 +297,17 @@
 	struct sdma_txreq *,
 	unsigned seq);
 static void activate_packet_queue(struct iowait *, int);
+static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long);
+static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *);
+static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, bool);
+static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
+
+static struct mmu_rb_ops sdma_rb_ops = {
+	.filter = sdma_rb_filter,
+	.insert = sdma_rb_insert,
+	.remove = sdma_rb_remove,
+	.invalidate = sdma_rb_invalidate
+};
 
 static int defer_packet_queue(
 	struct sdma_engine *sde,
@@ -380,7 +385,7 @@
 		goto pq_nomem;
 
 	memsize = sizeof(*pq->reqs) * hfi1_sdma_comp_ring_size;
-	pq->reqs = kmalloc(memsize, GFP_KERNEL);
+	pq->reqs = kzalloc(memsize, GFP_KERNEL);
 	if (!pq->reqs)
 		goto pq_reqs_nomem;
 
@@ -392,9 +397,12 @@
 	pq->state = SDMA_PKT_Q_INACTIVE;
 	atomic_set(&pq->n_reqs, 0);
 	init_waitqueue_head(&pq->wait);
+	pq->sdma_rb_root = RB_ROOT;
+	INIT_LIST_HEAD(&pq->evict);
+	spin_lock_init(&pq->evict_lock);
 
 	iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
-		    activate_packet_queue);
+		    activate_packet_queue, NULL);
 	pq->reqidx = 0;
 	snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt,
 		 fd->subctxt);
@@ -421,6 +429,12 @@
 	cq->nentries = hfi1_sdma_comp_ring_size;
 	fd->cq = cq;
 
+	ret = hfi1_mmu_rb_register(&pq->sdma_rb_root, &sdma_rb_ops);
+	if (ret) {
+		dd_dev_err(dd, "Failed to register with MMU %d", ret);
+		goto done;
+	}
+
 	spin_lock_irqsave(&uctxt->sdma_qlock, flags);
 	list_add(&pq->list, &uctxt->sdma_queues);
 	spin_unlock_irqrestore(&uctxt->sdma_qlock, flags);
@@ -450,6 +464,7 @@
 	hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit,
 		  uctxt->ctxt, fd->subctxt);
 	pq = fd->pq;
+	hfi1_mmu_rb_unregister(&pq->sdma_rb_root);
 	if (pq) {
 		spin_lock_irqsave(&uctxt->sdma_qlock, flags);
 		if (!list_empty(&pq->list))
@@ -476,7 +491,7 @@
 int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 				   unsigned long dim, unsigned long *count)
 {
-	int ret = 0, i = 0, sent;
+	int ret = 0, i = 0;
 	struct hfi1_filedata *fd = fp->private_data;
 	struct hfi1_ctxtdata *uctxt = fd->uctxt;
 	struct hfi1_user_sdma_pkt_q *pq = fd->pq;
@@ -502,9 +517,11 @@
 			  dd->unit, uctxt->ctxt, fd->subctxt, ret);
 		return -EFAULT;
 	}
+
 	trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt,
 				     (u16 *)&info);
-	if (cq->comps[info.comp_idx].status == QUEUED) {
+	if (cq->comps[info.comp_idx].status == QUEUED ||
+	    test_bit(SDMA_REQ_IN_USE, &pq->reqs[info.comp_idx].flags)) {
 		hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in QUEUED state",
 			  dd->unit, uctxt->ctxt, fd->subctxt,
 			  info.comp_idx);
@@ -531,10 +548,7 @@
 	req->cq = cq;
 	req->status = -1;
 	INIT_LIST_HEAD(&req->txps);
-	INIT_LIST_HEAD(&req->txcmp);
-	INIT_WORK(&req->worker, user_sdma_delayed_completion);
 
-	spin_lock_init(&req->txcmp_lock);
 	memcpy(&req->info, &info, sizeof(info));
 
 	if (req_opcode(info.ctrl) == EXPECTED)
@@ -593,8 +607,10 @@
 	}
 
 	req->koffset = le32_to_cpu(req->hdr.kdeth.swdata[6]);
-	/* Calculate the initial TID offset based on the values of
-	   KDETH.OFFSET and KDETH.OM that are passed in. */
+	/*
+	 * Calculate the initial TID offset based on the values of
+	 * KDETH.OFFSET and KDETH.OM that are passed in.
+	 */
 	req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) *
 		(KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ?
 		 KDETH_OM_LARGE : KDETH_OM_SMALL);
@@ -603,8 +619,13 @@
 
 	/* Save all the IO vector structures */
 	while (i < req->data_iovs) {
+		INIT_LIST_HEAD(&req->iovs[i].list);
 		memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec));
-		req->iovs[i].offset = 0;
+		ret = pin_vector_pages(req, &req->iovs[i]);
+		if (ret) {
+			req->status = ret;
+			goto free_req;
+		}
 		req->data_len += req->iovs[i++].iov.iov_len;
 	}
 	SDMA_DBG(req, "total data length %u", req->data_len);
@@ -668,52 +689,59 @@
 		}
 	}
 
-	set_comp_state(req, QUEUED, 0);
-	/* Send the first N packets in the request to buy us some time */
-	sent = user_sdma_send_pkts(req, pcount);
-	if (unlikely(sent < 0)) {
-		if (sent != -EBUSY) {
-			req->status = sent;
-			set_comp_state(req, ERROR, req->status);
-			return sent;
-		} else
-			sent = 0;
-	}
+	set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
 	atomic_inc(&pq->n_reqs);
-	xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
+	/* Send the first N packets in the request to buy us some time */
+	ret = user_sdma_send_pkts(req, pcount);
+	if (unlikely(ret < 0 && ret != -EBUSY)) {
+		req->status = ret;
+		goto free_req;
+	}
 
-	if (sent < req->info.npkts) {
-		/*
-		 * This is a somewhat blocking send implementation.
-		 * The driver will block the caller until all packets of the
-		 * request have been submitted to the SDMA engine. However, it
-		 * will not wait for send completions.
-		 */
-		while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
-			ret = user_sdma_send_pkts(req, pcount);
-			if (ret < 0) {
-				if (ret != -EBUSY) {
-					req->status = ret;
-					return ret;
-				}
-				wait_event_interruptible_timeout(
-					pq->busy.wait_dma,
-					(pq->state == SDMA_PKT_Q_ACTIVE),
-					msecs_to_jiffies(
-						SDMA_IOWAIT_TIMEOUT));
+	/*
+	 * It is possible that the SDMA engine would have processed all the
+	 * submitted packets by the time we get here. Therefore, only set
+	 * packet queue state to ACTIVE if there are still uncompleted
+	 * requests.
+	 */
+	if (atomic_read(&pq->n_reqs))
+		xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
+
+	/*
+	 * This is a somewhat blocking send implementation.
+	 * The driver will block the caller until all packets of the
+	 * request have been submitted to the SDMA engine. However, it
+	 * will not wait for send completions.
+	 */
+	while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
+		ret = user_sdma_send_pkts(req, pcount);
+		if (ret < 0) {
+			if (ret != -EBUSY) {
+				req->status = ret;
+				set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+				if (ACCESS_ONCE(req->seqcomp) ==
+				    req->seqsubmitted - 1)
+					goto free_req;
+				return ret;
 			}
+			wait_event_interruptible_timeout(
+				pq->busy.wait_dma,
+				(pq->state == SDMA_PKT_Q_ACTIVE),
+				msecs_to_jiffies(
+					SDMA_IOWAIT_TIMEOUT));
 		}
-
 	}
 	*count += idx;
 	return 0;
 free_req:
-	user_sdma_free_request(req);
+	user_sdma_free_request(req, true);
+	pq_update(pq);
+	set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
 	return ret;
 }
 
 static inline u32 compute_data_length(struct user_sdma_request *req,
-					    struct user_sdma_txreq *tx)
+				      struct user_sdma_txreq *tx)
 {
 	/*
 	 * Determine the proper size of the packet data.
@@ -731,8 +759,10 @@
 	} else if (req_opcode(req->info.ctrl) == EXPECTED) {
 		u32 tidlen = EXP_TID_GET(req->tids[req->tididx], LEN) *
 			PAGE_SIZE;
-		/* Get the data length based on the remaining space in the
-		 * TID pair. */
+		/*
+		 * Get the data length based on the remaining space in the
+		 * TID pair.
+		 */
 		len = min(tidlen - req->tidoffset, (u32)req->info.fragsize);
 		/* If we've filled up the TID pair, move to the next one. */
 		if (unlikely(!len) && ++req->tididx < req->n_tids &&
@@ -742,12 +772,15 @@
 			req->tidoffset = 0;
 			len = min_t(u32, tidlen, req->info.fragsize);
 		}
-		/* Since the TID pairs map entire pages, make sure that we
+		/*
+		 * Since the TID pairs map entire pages, make sure that we
 		 * are not going to try to send more data that we have
-		 * remaining. */
+		 * remaining.
+		 */
 		len = min(len, req->data_len - req->sent);
-	} else
+	} else {
 		len = min(req->data_len - req->sent, (u32)req->info.fragsize);
+	}
 	SDMA_DBG(req, "Data Length = %u", len);
 	return len;
 }
@@ -810,9 +843,7 @@
 		tx->flags = 0;
 		tx->req = req;
 		tx->busycount = 0;
-		tx->idx = -1;
 		INIT_LIST_HEAD(&tx->list);
-		memset(tx->iovecs, 0, sizeof(tx->iovecs));
 
 		if (req->seqnum == req->info.npkts - 1)
 			tx->flags |= TXREQ_FLAGS_REQ_LAST_PKT;
@@ -833,18 +864,6 @@
 				WARN_ON(iovec->offset);
 			}
 
-			/*
-			 * This request might include only a header and no user
-			 * data, so pin pages only if there is data and it the
-			 * pages have not been pinned already.
-			 */
-			if (unlikely(!iovec->pages && iovec->iov.iov_len)) {
-				ret = pin_vector_pages(req, iovec);
-				if (ret)
-					goto free_tx;
-			}
-
-			tx->iovecs[++tx->idx].vec = iovec;
 			datalen = compute_data_length(req, tx);
 			if (!datalen) {
 				SDMA_DBG(req,
@@ -934,16 +953,8 @@
 					      iovec->pages[pageidx],
 					      offset, len);
 			if (ret) {
-				int i;
-
 				SDMA_DBG(req, "SDMA txreq add page failed %d\n",
 					 ret);
-				/* Mark all assigned vectors as complete so they
-				 * are unpinned in the callback. */
-				for (i = tx->idx; i >= 0; i--) {
-					tx->iovecs[i].flags |=
-						TXREQ_FLAGS_IOVEC_LAST_PKT;
-				}
 				goto free_txreq;
 			}
 			iov_offset += len;
@@ -951,19 +962,10 @@
 			data_sent += len;
 			if (unlikely(queued < datalen &&
 				     pageidx == iovec->npages &&
-				     req->iov_idx < req->data_iovs - 1 &&
-				     tx->idx < ARRAY_SIZE(tx->iovecs))) {
+				     req->iov_idx < req->data_iovs - 1)) {
 				iovec->offset += iov_offset;
-				tx->iovecs[tx->idx].flags |=
-					TXREQ_FLAGS_IOVEC_LAST_PKT;
 				iovec = &req->iovs[++req->iov_idx];
-				if (!iovec->pages) {
-					ret = pin_vector_pages(req, iovec);
-					if (ret)
-						goto free_txreq;
-				}
 				iov_offset = 0;
-				tx->iovecs[++tx->idx].vec = iovec;
 			}
 		}
 		/*
@@ -974,28 +976,21 @@
 		if (req_opcode(req->info.ctrl) == EXPECTED)
 			req->tidoffset += datalen;
 		req->sent += data_sent;
-		if (req->data_len) {
-			tx->iovecs[tx->idx].vec->offset += iov_offset;
-			/* If we've reached the end of the io vector, mark it
-			 * so the callback can unpin the pages and free it. */
-			if (tx->iovecs[tx->idx].vec->offset ==
-			    tx->iovecs[tx->idx].vec->iov.iov_len)
-				tx->iovecs[tx->idx].flags |=
-					TXREQ_FLAGS_IOVEC_LAST_PKT;
-		}
-
+		if (req->data_len)
+			iovec->offset += iov_offset;
+		list_add_tail(&tx->txreq.list, &req->txps);
 		/*
 		 * It is important to increment this here as it is used to
 		 * generate the BTH.PSN and, therefore, can't be bulk-updated
 		 * outside of the loop.
 		 */
 		tx->seqnum = req->seqnum++;
-		list_add_tail(&tx->txreq.list, &req->txps);
 		npkts++;
 	}
 dosend:
 	ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps);
-	if (list_empty(&req->txps))
+	if (list_empty(&req->txps)) {
+		req->seqsubmitted = req->seqnum;
 		if (req->seqnum == req->info.npkts) {
 			set_bit(SDMA_REQ_SEND_DONE, &req->flags);
 			/*
@@ -1007,6 +1002,10 @@
 			if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
 				sdma_ahg_free(req->sde, req->ahg_idx);
 		}
+	} else if (ret > 0) {
+		req->seqsubmitted += ret;
+		ret = 0;
+	}
 	return ret;
 
 free_txreq:
@@ -1021,7 +1020,7 @@
  */
 static inline int num_user_pages(const struct iovec *iov)
 {
-	const unsigned long addr  = (unsigned long) iov->iov_base;
+	const unsigned long addr  = (unsigned long)iov->iov_base;
 	const unsigned long len   = iov->iov_len;
 	const unsigned long spage = addr & PAGE_MASK;
 	const unsigned long epage = (addr + len - 1) & PAGE_MASK;
@@ -1029,64 +1028,129 @@
 	return 1 + ((epage - spage) >> PAGE_SHIFT);
 }
 
-static int pin_vector_pages(struct user_sdma_request *req,
-			    struct user_sdma_iovec *iovec) {
-	int pinned, npages;
+/* Caller must hold pq->evict_lock */
+static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages)
+{
+	u32 cleared = 0;
+	struct sdma_mmu_node *node, *ptr;
 
-	npages = num_user_pages(&iovec->iov);
-	iovec->pages = kcalloc(npages, sizeof(*iovec->pages), GFP_KERNEL);
-	if (!iovec->pages) {
-		SDMA_DBG(req, "Failed page array alloc");
-		return -ENOMEM;
+	list_for_each_entry_safe_reverse(node, ptr, &pq->evict, list) {
+		/* Make sure that no one is still using the node. */
+		if (!atomic_read(&node->refcount)) {
+			/*
+			 * Need to use the page count now as the remove callback
+			 * will free the node.
+			 */
+			cleared += node->npages;
+			spin_unlock(&pq->evict_lock);
+			hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb);
+			spin_lock(&pq->evict_lock);
+			if (cleared >= npages)
+				break;
+		}
 	}
-
-	/*
-	 * Get a reference to the process's mm so we can use it when
-	 * unpinning the io vectors.
-	 */
-	req->pq->user_mm = get_task_mm(current);
-
-	pinned = hfi1_acquire_user_pages((unsigned long)iovec->iov.iov_base,
-					 npages, 0, iovec->pages);
-
-	if (pinned < 0)
-		return pinned;
-
-	iovec->npages = pinned;
-	if (pinned != npages) {
-		SDMA_DBG(req, "Failed to pin pages (%d/%u)", pinned, npages);
-		unpin_vector_pages(req, iovec);
-		return -EFAULT;
-	}
-	return 0;
+	return cleared;
 }
 
-static void unpin_vector_pages(struct user_sdma_request *req,
-			       struct user_sdma_iovec *iovec)
-{
-	/*
-	 * Unpinning is done through the workqueue so use the
-	 * process's mm if we have a reference to it.
-	 */
-	if ((current->flags & PF_KTHREAD) && req->pq->user_mm)
-		use_mm(req->pq->user_mm);
+static int pin_vector_pages(struct user_sdma_request *req,
+			    struct user_sdma_iovec *iovec) {
+	int ret = 0, pinned, npages, cleared;
+	struct page **pages;
+	struct hfi1_user_sdma_pkt_q *pq = req->pq;
+	struct sdma_mmu_node *node = NULL;
+	struct mmu_rb_node *rb_node;
 
-	hfi1_release_user_pages(iovec->pages, iovec->npages, 0);
+	rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root,
+				     (unsigned long)iovec->iov.iov_base,
+				     iovec->iov.iov_len);
+	if (rb_node)
+		node = container_of(rb_node, struct sdma_mmu_node, rb);
 
-	/*
-	 * Unuse the user's mm (see above) and release the
-	 * reference to it.
-	 */
-	if (req->pq->user_mm) {
-		if (current->flags & PF_KTHREAD)
-			unuse_mm(req->pq->user_mm);
-		mmput(req->pq->user_mm);
+	if (!node) {
+		node = kzalloc(sizeof(*node), GFP_KERNEL);
+		if (!node)
+			return -ENOMEM;
+
+		node->rb.addr = (unsigned long)iovec->iov.iov_base;
+		node->rb.len = iovec->iov.iov_len;
+		node->pq = pq;
+		atomic_set(&node->refcount, 0);
+		INIT_LIST_HEAD(&node->list);
 	}
 
-	kfree(iovec->pages);
-	iovec->pages = NULL;
-	iovec->npages = 0;
-	iovec->offset = 0;
+	npages = num_user_pages(&iovec->iov);
+	if (node->npages < npages) {
+		pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
+		if (!pages) {
+			SDMA_DBG(req, "Failed page array alloc");
+			ret = -ENOMEM;
+			goto bail;
+		}
+		memcpy(pages, node->pages, node->npages * sizeof(*pages));
+
+		npages -= node->npages;
+retry:
+		if (!hfi1_can_pin_pages(pq->dd, pq->n_locked, npages)) {
+			spin_lock(&pq->evict_lock);
+			cleared = sdma_cache_evict(pq, npages);
+			spin_unlock(&pq->evict_lock);
+			if (cleared >= npages)
+				goto retry;
+		}
+		pinned = hfi1_acquire_user_pages(
+			((unsigned long)iovec->iov.iov_base +
+			 (node->npages * PAGE_SIZE)), npages, 0,
+			pages + node->npages);
+		if (pinned < 0) {
+			kfree(pages);
+			ret = pinned;
+			goto bail;
+		}
+		if (pinned != npages) {
+			unpin_vector_pages(current->mm, pages, pinned);
+			ret = -EFAULT;
+			goto bail;
+		}
+		kfree(node->pages);
+		node->pages = pages;
+		node->npages += pinned;
+		npages = node->npages;
+		spin_lock(&pq->evict_lock);
+		if (!rb_node)
+			list_add(&node->list, &pq->evict);
+		else
+			list_move(&node->list, &pq->evict);
+		pq->n_locked += pinned;
+		spin_unlock(&pq->evict_lock);
+	}
+	iovec->pages = node->pages;
+	iovec->npages = npages;
+
+	if (!rb_node) {
+		ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb);
+		if (ret) {
+			spin_lock(&pq->evict_lock);
+			list_del(&node->list);
+			pq->n_locked -= node->npages;
+			spin_unlock(&pq->evict_lock);
+			ret = 0;
+			goto bail;
+		}
+	} else {
+		atomic_inc(&node->refcount);
+	}
+	return 0;
+bail:
+	if (!rb_node)
+		kfree(node);
+	return ret;
+}
+
+static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
+			       unsigned npages)
+{
+	hfi1_release_user_pages(mm, pages, npages, 0);
+	kfree(pages);
 }
 
 static int check_header_template(struct user_sdma_request *req,
@@ -1209,7 +1273,6 @@
 		if (ret)
 			return ret;
 		goto done;
-
 	}
 
 	hdr->bth[2] = cpu_to_be32(
@@ -1219,7 +1282,7 @@
 
 	/* Set ACK request on last packet */
 	if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT))
-		hdr->bth[2] |= cpu_to_be32(1UL<<31);
+		hdr->bth[2] |= cpu_to_be32(1UL << 31);
 
 	/* Set the new offset */
 	hdr->kdeth.swdata[6] = cpu_to_le32(req->koffset);
@@ -1233,8 +1296,10 @@
 		if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) *
 					 PAGE_SIZE)) {
 			req->tidoffset = 0;
-			/* Since we don't copy all the TIDs, all at once,
-			 * we have to check again. */
+			/*
+			 * Since we don't copy all the TIDs, all at once,
+			 * we have to check again.
+			 */
 			if (++req->tididx > req->n_tids - 1 ||
 			    !req->tids[req->tididx]) {
 				return -EINVAL;
@@ -1315,8 +1380,10 @@
 		if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) *
 					 PAGE_SIZE)) {
 			req->tidoffset = 0;
-			/* Since we don't copy all the TIDs, all at once,
-			 * we have to check again. */
+			/*
+			 * Since we don't copy all the TIDs, all at once,
+			 * we have to check again.
+			 */
 			if (++req->tididx > req->n_tids - 1 ||
 			    !req->tids[req->tididx]) {
 				return -EINVAL;
@@ -1340,8 +1407,9 @@
 								INTR) >> 16);
 			val &= cpu_to_le16(~(1U << 13));
 			AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
-		} else
+		} else {
 			AHG_HEADER_SET(req->ahg, diff, 7, 16, 12, val);
+		}
 	}
 
 	trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt,
@@ -1356,113 +1424,62 @@
  * tx request have been processed by the DMA engine. Called in
  * interrupt context.
  */
-static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status,
-			       int drain)
+static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
 {
 	struct user_sdma_txreq *tx =
 		container_of(txreq, struct user_sdma_txreq, txreq);
 	struct user_sdma_request *req;
-	bool defer;
-	int i;
+	struct hfi1_user_sdma_pkt_q *pq;
+	struct hfi1_user_sdma_comp_q *cq;
+	u16 idx;
 
 	if (!tx->req)
 		return;
 
 	req = tx->req;
-	/*
-	 * If this is the callback for the last packet of the request,
-	 * queue up the request for clean up.
-	 */
-	defer = (tx->seqnum == req->info.npkts - 1);
+	pq = req->pq;
+	cq = req->cq;
 
-	/*
-	 * If we have any io vectors associated with this txreq,
-	 * check whether they need to be 'freed'. We can't free them
-	 * here because the unpin function needs to be able to sleep.
-	 */
-	for (i = tx->idx; i >= 0; i--) {
-		if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT) {
-			defer = true;
-			break;
-		}
-	}
-
-	req->status = status;
 	if (status != SDMA_TXREQ_S_OK) {
 		SDMA_DBG(req, "SDMA completion with error %d",
 			 status);
 		set_bit(SDMA_REQ_HAS_ERROR, &req->flags);
-		defer = true;
 	}
 
-	/*
-	 * Defer the clean up of the iovectors and the request until later
-	 * so it can be done outside of interrupt context.
-	 */
-	if (defer) {
-		spin_lock(&req->txcmp_lock);
-		list_add_tail(&tx->list, &req->txcmp);
-		spin_unlock(&req->txcmp_lock);
-		schedule_work(&req->worker);
+	req->seqcomp = tx->seqnum;
+	kmem_cache_free(pq->txreq_cache, tx);
+	tx = NULL;
+
+	idx = req->info.comp_idx;
+	if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
+		if (req->seqcomp == req->info.npkts - 1) {
+			req->status = 0;
+			user_sdma_free_request(req, false);
+			pq_update(pq);
+			set_comp_state(pq, cq, idx, COMPLETE, 0);
+		}
 	} else {
-		kmem_cache_free(req->pq->txreq_cache, tx);
+		if (status != SDMA_TXREQ_S_OK)
+			req->status = status;
+		if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) &&
+		    (test_bit(SDMA_REQ_SEND_DONE, &req->flags) ||
+		     test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) {
+			user_sdma_free_request(req, false);
+			pq_update(pq);
+			set_comp_state(pq, cq, idx, ERROR, req->status);
+		}
 	}
 }
 
-static void user_sdma_delayed_completion(struct work_struct *work)
+static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
 {
-	struct user_sdma_request *req =
-		container_of(work, struct user_sdma_request, worker);
-	struct hfi1_user_sdma_pkt_q *pq = req->pq;
-	struct user_sdma_txreq *tx = NULL;
-	unsigned long flags;
-	u64 seqnum;
-	int i;
-
-	while (1) {
-		spin_lock_irqsave(&req->txcmp_lock, flags);
-		if (!list_empty(&req->txcmp)) {
-			tx = list_first_entry(&req->txcmp,
-					      struct user_sdma_txreq, list);
-			list_del(&tx->list);
-		}
-		spin_unlock_irqrestore(&req->txcmp_lock, flags);
-		if (!tx)
-			break;
-
-		for (i = tx->idx; i >= 0; i--)
-			if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT)
-				unpin_vector_pages(req, tx->iovecs[i].vec);
-
-		seqnum = tx->seqnum;
-		kmem_cache_free(pq->txreq_cache, tx);
-		tx = NULL;
-
-		if (req->status != SDMA_TXREQ_S_OK) {
-			if (seqnum == ACCESS_ONCE(req->seqnum) &&
-			    test_bit(SDMA_REQ_DONE_ERROR, &req->flags)) {
-				atomic_dec(&pq->n_reqs);
-				set_comp_state(req, ERROR, req->status);
-				user_sdma_free_request(req);
-				break;
-			}
-		} else {
-			if (seqnum == req->info.npkts - 1) {
-				atomic_dec(&pq->n_reqs);
-				set_comp_state(req, COMPLETE, 0);
-				user_sdma_free_request(req);
-				break;
-			}
-		}
-	}
-
-	if (!atomic_read(&pq->n_reqs)) {
+	if (atomic_dec_and_test(&pq->n_reqs)) {
 		xchg(&pq->state, SDMA_PKT_Q_INACTIVE);
 		wake_up(&pq->wait);
 	}
 }
 
-static void user_sdma_free_request(struct user_sdma_request *req)
+static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
 {
 	if (!list_empty(&req->txps)) {
 		struct sdma_txreq *t, *p;
@@ -1476,25 +1493,87 @@
 		}
 	}
 	if (req->data_iovs) {
+		struct sdma_mmu_node *node;
+		struct mmu_rb_node *mnode;
 		int i;
 
-		for (i = 0; i < req->data_iovs; i++)
-			if (req->iovs[i].npages && req->iovs[i].pages)
-				unpin_vector_pages(req, &req->iovs[i]);
+		for (i = 0; i < req->data_iovs; i++) {
+			mnode = hfi1_mmu_rb_search(
+				&req->pq->sdma_rb_root,
+				(unsigned long)req->iovs[i].iov.iov_base,
+				req->iovs[i].iov.iov_len);
+			if (!mnode)
+				continue;
+
+			node = container_of(mnode, struct sdma_mmu_node, rb);
+			if (unpin)
+				hfi1_mmu_rb_remove(&req->pq->sdma_rb_root,
+						   &node->rb);
+			else
+				atomic_dec(&node->refcount);
+		}
 	}
 	kfree(req->tids);
 	clear_bit(SDMA_REQ_IN_USE, &req->flags);
 }
 
-static inline void set_comp_state(struct user_sdma_request *req,
-					enum hfi1_sdma_comp_state state,
-					int ret)
+static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
+				  struct hfi1_user_sdma_comp_q *cq,
+				  u16 idx, enum hfi1_sdma_comp_state state,
+				  int ret)
 {
-	SDMA_DBG(req, "Setting completion status %u %d", state, ret);
-	req->cq->comps[req->info.comp_idx].status = state;
+	hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d",
+		  pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret);
+	cq->comps[idx].status = state;
 	if (state == ERROR)
-		req->cq->comps[req->info.comp_idx].errcode = -ret;
-	trace_hfi1_sdma_user_completion(req->pq->dd, req->pq->ctxt,
-					req->pq->subctxt, req->info.comp_idx,
-					state, ret);
+		cq->comps[idx].errcode = -ret;
+	trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt,
+					idx, state, ret);
+}
+
+static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
+			   unsigned long len)
+{
+	return (bool)(node->addr == addr);
+}
+
+static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
+{
+	struct sdma_mmu_node *node =
+		container_of(mnode, struct sdma_mmu_node, rb);
+
+	atomic_inc(&node->refcount);
+	return 0;
+}
+
+static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode,
+			   bool notifier)
+{
+	struct sdma_mmu_node *node =
+		container_of(mnode, struct sdma_mmu_node, rb);
+
+	spin_lock(&node->pq->evict_lock);
+	list_del(&node->list);
+	node->pq->n_locked -= node->npages;
+	spin_unlock(&node->pq->evict_lock);
+
+	unpin_vector_pages(notifier ? NULL : current->mm, node->pages,
+			   node->npages);
+	/*
+	 * If called by the MMU notifier, we have to adjust the pinned
+	 * page count ourselves.
+	 */
+	if (notifier)
+		current->mm->pinned_vm -= node->npages;
+	kfree(node);
+}
+
+static int sdma_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode)
+{
+	struct sdma_mmu_node *node =
+		container_of(mnode, struct sdma_mmu_node, rb);
+
+	if (!atomic_read(&node->refcount))
+		return 1;
+	return 0;
 }

diff --git a/drivers/staging/rdma/hfi1/user_sdma.h b/drivers/staging/rdma/hfi1/user_sdma.h
index 0afa285..b9240e3 100644
--- a/drivers/staging/rdma/hfi1/user_sdma.h
+++ b/drivers/staging/rdma/hfi1/user_sdma.h

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -69,7 +66,11 @@
 	struct iowait busy;
 	unsigned state;
 	wait_queue_head_t wait;
-	struct mm_struct *user_mm;
+	unsigned long unpinned;
+	struct rb_root sdma_rb_root;
+	u32 n_locked;
+	struct list_head evict;
+	spinlock_t evict_lock; /* protect evict and n_locked */
 };
 
 struct hfi1_user_sdma_comp_q {

diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c
index 1761686..89f2aad 100644
--- a/drivers/staging/rdma/hfi1/verbs.c
+++ b/drivers/staging/rdma/hfi1/verbs.c

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -63,9 +60,9 @@
 #include "device.h"
 #include "trace.h"
 #include "qp.h"
-#include "sdma.h"
+#include "verbs_txreq.h"
 
-unsigned int hfi1_lkey_table_size = 16;
+static unsigned int hfi1_lkey_table_size = 16;
 module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
 		   S_IRUGO);
 MODULE_PARM_DESC(lkey_table_size,
@@ -124,45 +121,181 @@
 module_param_named(max_srq_wrs, hfi1_max_srq_wrs, uint, S_IRUGO);
 MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
 
+unsigned short piothreshold = 256;
+module_param(piothreshold, ushort, S_IRUGO);
+MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio");
+
+#define COPY_CACHELESS 1
+#define COPY_ADAPTIVE  2
+static unsigned int sge_copy_mode;
+module_param(sge_copy_mode, uint, S_IRUGO);
+MODULE_PARM_DESC(sge_copy_mode,
+		 "Verbs copy mode: 0 use memcpy, 1 use cacheless copy, 2 adapt based on WSS");
+
 static void verbs_sdma_complete(
 	struct sdma_txreq *cookie,
-	int status,
-	int drained);
+	int status);
+
+static int pio_wait(struct rvt_qp *qp,
+		    struct send_context *sc,
+		    struct hfi1_pkt_state *ps,
+		    u32 flag);
 
 /* Length of buffer to create verbs txreq cache name */
 #define TXREQ_NAME_LEN 24
 
-/*
- * Note that it is OK to post send work requests in the SQE and ERR
- * states; hfi1_do_send() will process them and generate error
- * completions as per IB 1.2 C10-96.
- */
-const int ib_hfi1_state_ops[IB_QPS_ERR + 1] = {
-	[IB_QPS_RESET] = 0,
-	[IB_QPS_INIT] = HFI1_POST_RECV_OK,
-	[IB_QPS_RTR] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK,
-	[IB_QPS_RTS] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK |
-	    HFI1_POST_SEND_OK | HFI1_PROCESS_SEND_OK |
-	    HFI1_PROCESS_NEXT_SEND_OK,
-	[IB_QPS_SQD] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK |
-	    HFI1_POST_SEND_OK | HFI1_PROCESS_SEND_OK,
-	[IB_QPS_SQE] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK |
-	    HFI1_POST_SEND_OK | HFI1_FLUSH_SEND,
-	[IB_QPS_ERR] = HFI1_POST_RECV_OK | HFI1_FLUSH_RECV |
-	    HFI1_POST_SEND_OK | HFI1_FLUSH_SEND,
+static uint wss_threshold;
+module_param(wss_threshold, uint, S_IRUGO);
+MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
+static uint wss_clean_period = 256;
+module_param(wss_clean_period, uint, S_IRUGO);
+MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned");
+
+/* memory working set size */
+struct hfi1_wss {
+	unsigned long *entries;
+	atomic_t total_count;
+	atomic_t clean_counter;
+	atomic_t clean_entry;
+
+	int threshold;
+	int num_entries;
+	long pages_mask;
 };
 
-struct hfi1_ucontext {
-	struct ib_ucontext ibucontext;
-};
+static struct hfi1_wss wss;
 
-static inline struct hfi1_ucontext *to_iucontext(struct ib_ucontext
-						  *ibucontext)
+int hfi1_wss_init(void)
 {
-	return container_of(ibucontext, struct hfi1_ucontext, ibucontext);
+	long llc_size;
+	long llc_bits;
+	long table_size;
+	long table_bits;
+
+	/* check for a valid percent range - default to 80 if none or invalid */
+	if (wss_threshold < 1 || wss_threshold > 100)
+		wss_threshold = 80;
+	/* reject a wildly large period */
+	if (wss_clean_period > 1000000)
+		wss_clean_period = 256;
+	/* reject a zero period */
+	if (wss_clean_period == 0)
+		wss_clean_period = 1;
+
+	/*
+	 * Calculate the table size - the next power of 2 larger than the
+	 * LLC size.  LLC size is in KiB.
+	 */
+	llc_size = wss_llc_size() * 1024;
+	table_size = roundup_pow_of_two(llc_size);
+
+	/* one bit per page in rounded up table */
+	llc_bits = llc_size / PAGE_SIZE;
+	table_bits = table_size / PAGE_SIZE;
+	wss.pages_mask = table_bits - 1;
+	wss.num_entries = table_bits / BITS_PER_LONG;
+
+	wss.threshold = (llc_bits * wss_threshold) / 100;
+	if (wss.threshold == 0)
+		wss.threshold = 1;
+
+	atomic_set(&wss.clean_counter, wss_clean_period);
+
+	wss.entries = kcalloc(wss.num_entries, sizeof(*wss.entries),
+			      GFP_KERNEL);
+	if (!wss.entries) {
+		hfi1_wss_exit();
+		return -ENOMEM;
+	}
+
+	return 0;
 }
 
-static inline void _hfi1_schedule_send(struct hfi1_qp *qp);
+void hfi1_wss_exit(void)
+{
+	/* coded to handle partially initialized and repeat callers */
+	kfree(wss.entries);
+	wss.entries = NULL;
+}
+
+/*
+ * Advance the clean counter.  When the clean period has expired,
+ * clean an entry.
+ *
+ * This is implemented in atomics to avoid locking.  Because multiple
+ * variables are involved, it can be racy which can lead to slightly
+ * inaccurate information.  Since this is only a heuristic, this is
+ * OK.  Any innaccuracies will clean themselves out as the counter
+ * advances.  That said, it is unlikely the entry clean operation will
+ * race - the next possible racer will not start until the next clean
+ * period.
+ *
+ * The clean counter is implemented as a decrement to zero.  When zero
+ * is reached an entry is cleaned.
+ */
+static void wss_advance_clean_counter(void)
+{
+	int entry;
+	int weight;
+	unsigned long bits;
+
+	/* become the cleaner if we decrement the counter to zero */
+	if (atomic_dec_and_test(&wss.clean_counter)) {
+		/*
+		 * Set, not add, the clean period.  This avoids an issue
+		 * where the counter could decrement below the clean period.
+		 * Doing a set can result in lost decrements, slowing the
+		 * clean advance.  Since this a heuristic, this possible
+		 * slowdown is OK.
+		 *
+		 * An alternative is to loop, advancing the counter by a
+		 * clean period until the result is > 0. However, this could
+		 * lead to several threads keeping another in the clean loop.
+		 * This could be mitigated by limiting the number of times
+		 * we stay in the loop.
+		 */
+		atomic_set(&wss.clean_counter, wss_clean_period);
+
+		/*
+		 * Uniquely grab the entry to clean and move to next.
+		 * The current entry is always the lower bits of
+		 * wss.clean_entry.  The table size, wss.num_entries,
+		 * is always a power-of-2.
+		 */
+		entry = (atomic_inc_return(&wss.clean_entry) - 1)
+			& (wss.num_entries - 1);
+
+		/* clear the entry and count the bits */
+		bits = xchg(&wss.entries[entry], 0);
+		weight = hweight64((u64)bits);
+		/* only adjust the contended total count if needed */
+		if (weight)
+			atomic_sub(weight, &wss.total_count);
+	}
+}
+
+/*
+ * Insert the given address into the working set array.
+ */
+static void wss_insert(void *address)
+{
+	u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss.pages_mask;
+	u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */
+	u32 nr = page & (BITS_PER_LONG - 1);
+
+	if (!test_and_set_bit(nr, &wss.entries[entry]))
+		atomic_inc(&wss.total_count);
+
+	wss_advance_clean_counter();
+}
+
+/*
+ * Is the working set larger than the threshold?
+ */
+static inline int wss_exceeds_threshold(void)
+{
+	return atomic_read(&wss.total_count) >= wss.threshold;
+}
 
 /*
  * Translate ib_wr_opcode into ib_wc_opcode.
@@ -274,14 +407,47 @@
  * @ss: the SGE state
  * @data: the data to copy
  * @length: the length of the data
+ * @copy_last: do a separate copy of the last 8 bytes
  */
 void hfi1_copy_sge(
-	struct hfi1_sge_state *ss,
+	struct rvt_sge_state *ss,
 	void *data, u32 length,
-	int release)
+	int release,
+	int copy_last)
 {
-	struct hfi1_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
+	int in_last = 0;
+	int i;
+	int cacheless_copy = 0;
 
+	if (sge_copy_mode == COPY_CACHELESS) {
+		cacheless_copy = length >= PAGE_SIZE;
+	} else if (sge_copy_mode == COPY_ADAPTIVE) {
+		if (length >= PAGE_SIZE) {
+			/*
+			 * NOTE: this *assumes*:
+			 * o The first vaddr is the dest.
+			 * o If multiple pages, then vaddr is sequential.
+			 */
+			wss_insert(sge->vaddr);
+			if (length >= (2 * PAGE_SIZE))
+				wss_insert(sge->vaddr + PAGE_SIZE);
+
+			cacheless_copy = wss_exceeds_threshold();
+		} else {
+			wss_advance_clean_counter();
+		}
+	}
+	if (copy_last) {
+		if (length > 8) {
+			length -= 8;
+		} else {
+			copy_last = 0;
+			in_last = 1;
+		}
+	}
+
+again:
 	while (length) {
 		u32 len = sge->length;
 
@@ -290,17 +456,25 @@
 		if (len > sge->sge_length)
 			len = sge->sge_length;
 		WARN_ON_ONCE(len == 0);
-		memcpy(sge->vaddr, data, len);
+		if (unlikely(in_last)) {
+			/* enforce byte transfer ordering */
+			for (i = 0; i < len; i++)
+				((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
+		} else if (cacheless_copy) {
+			cacheless_memcpy(sge->vaddr, data, len);
+		} else {
+			memcpy(sge->vaddr, data, len);
+		}
 		sge->vaddr += len;
 		sge->length -= len;
 		sge->sge_length -= len;
 		if (sge->sge_length == 0) {
 			if (release)
-				hfi1_put_mr(sge->mr);
+				rvt_put_mr(sge->mr);
 			if (--ss->num_sge)
 				*sge = *ss->sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= HFI1_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -313,6 +487,13 @@
 		data += len;
 		length -= len;
 	}
+
+	if (copy_last) {
+		copy_last = 0;
+		in_last = 1;
+		length = 8;
+		goto again;
+	}
 }
 
 /**
@@ -320,9 +501,9 @@
  * @ss: the SGE state
  * @length: the number of bytes to skip
  */
-void hfi1_skip_sge(struct hfi1_sge_state *ss, u32 length, int release)
+void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release)
 {
-	struct hfi1_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
 
 	while (length) {
 		u32 len = sge->length;
@@ -337,11 +518,11 @@
 		sge->sge_length -= len;
 		if (sge->sge_length == 0) {
 			if (release)
-				hfi1_put_mr(sge->mr);
+				rvt_put_mr(sge->mr);
 			if (--ss->num_sge)
 				*sge = *ss->sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= HFI1_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -355,231 +536,6 @@
 	}
 }
 
-/**
- * post_one_send - post one RC, UC, or UD send work request
- * @qp: the QP to post on
- * @wr: the work request to send
- */
-static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr)
-{
-	struct hfi1_swqe *wqe;
-	u32 next;
-	int i;
-	int j;
-	int acc;
-	struct hfi1_lkey_table *rkt;
-	struct hfi1_pd *pd;
-	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
-	struct hfi1_pportdata *ppd;
-	struct hfi1_ibport *ibp;
-
-	/* IB spec says that num_sge == 0 is OK. */
-	if (unlikely(wr->num_sge > qp->s_max_sge))
-		return -EINVAL;
-
-	ppd = &dd->pport[qp->port_num - 1];
-	ibp = &ppd->ibport_data;
-
-	/*
-	 * Don't allow RDMA reads or atomic operations on UC or
-	 * undefined operations.
-	 * Make sure buffer is large enough to hold the result for atomics.
-	 */
-	if (qp->ibqp.qp_type == IB_QPT_UC) {
-		if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
-			return -EINVAL;
-	} else if (qp->ibqp.qp_type != IB_QPT_RC) {
-		/* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
-		if (wr->opcode != IB_WR_SEND &&
-		    wr->opcode != IB_WR_SEND_WITH_IMM)
-			return -EINVAL;
-		/* Check UD destination address PD */
-		if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
-			return -EINVAL;
-	} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
-		return -EINVAL;
-	else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
-		   (wr->num_sge == 0 ||
-		    wr->sg_list[0].length < sizeof(u64) ||
-		    wr->sg_list[0].addr & (sizeof(u64) - 1)))
-		return -EINVAL;
-	else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
-		return -EINVAL;
-
-	next = qp->s_head + 1;
-	if (next >= qp->s_size)
-		next = 0;
-	if (next == qp->s_last)
-		return -ENOMEM;
-
-	rkt = &to_idev(qp->ibqp.device)->lk_table;
-	pd = to_ipd(qp->ibqp.pd);
-	wqe = get_swqe_ptr(qp, qp->s_head);
-
-
-	if (qp->ibqp.qp_type != IB_QPT_UC &&
-	    qp->ibqp.qp_type != IB_QPT_RC)
-		memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
-	else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
-		 wr->opcode == IB_WR_RDMA_WRITE ||
-		 wr->opcode == IB_WR_RDMA_READ)
-		memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
-	else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-		 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-		memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
-	else
-		memcpy(&wqe->wr, wr, sizeof(wqe->wr));
-
-	wqe->length = 0;
-	j = 0;
-	if (wr->num_sge) {
-		acc = wr->opcode >= IB_WR_RDMA_READ ?
-			IB_ACCESS_LOCAL_WRITE : 0;
-		for (i = 0; i < wr->num_sge; i++) {
-			u32 length = wr->sg_list[i].length;
-			int ok;
-
-			if (length == 0)
-				continue;
-			ok = hfi1_lkey_ok(rkt, pd, &wqe->sg_list[j],
-					  &wr->sg_list[i], acc);
-			if (!ok)
-				goto bail_inval_free;
-			wqe->length += length;
-			j++;
-		}
-		wqe->wr.num_sge = j;
-	}
-	if (qp->ibqp.qp_type == IB_QPT_UC ||
-	    qp->ibqp.qp_type == IB_QPT_RC) {
-		if (wqe->length > 0x80000000U)
-			goto bail_inval_free;
-	} else {
-		struct hfi1_ah *ah = to_iah(ud_wr(wr)->ah);
-
-		atomic_inc(&ah->refcount);
-	}
-	wqe->ssn = qp->s_ssn++;
-	qp->s_head = next;
-
-	return 0;
-
-bail_inval_free:
-	/* release mr holds */
-	while (j) {
-		struct hfi1_sge *sge = &wqe->sg_list[--j];
-
-		hfi1_put_mr(sge->mr);
-	}
-	return -EINVAL;
-}
-
-/**
- * post_send - post a send on a QP
- * @ibqp: the QP to post the send on
- * @wr: the list of work requests to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		     struct ib_send_wr **bad_wr)
-{
-	struct hfi1_qp *qp = to_iqp(ibqp);
-	int err = 0;
-	int call_send;
-	unsigned long flags;
-	unsigned nreq = 0;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	/* Check that state is OK to post send. */
-	if (unlikely(!(ib_hfi1_state_ops[qp->state] & HFI1_POST_SEND_OK))) {
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-		return -EINVAL;
-	}
-
-	/* sq empty and not list -> call send */
-	call_send = qp->s_head == qp->s_last && !wr->next;
-
-	for (; wr; wr = wr->next) {
-		err = post_one_send(qp, wr);
-		if (unlikely(err)) {
-			*bad_wr = wr;
-			goto bail;
-		}
-		nreq++;
-	}
-bail:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	if (nreq && !call_send)
-		_hfi1_schedule_send(qp);
-	if (nreq && call_send)
-		hfi1_do_send(&qp->s_iowait.iowork);
-	return err;
-}
-
-/**
- * post_receive - post a receive on a QP
- * @ibqp: the QP to post the receive on
- * @wr: the WR to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			struct ib_recv_wr **bad_wr)
-{
-	struct hfi1_qp *qp = to_iqp(ibqp);
-	struct hfi1_rwq *wq = qp->r_rq.wq;
-	unsigned long flags;
-	int ret;
-
-	/* Check that state is OK to post receive. */
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_POST_RECV_OK) || !wq) {
-		*bad_wr = wr;
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	for (; wr; wr = wr->next) {
-		struct hfi1_rwqe *wqe;
-		u32 next;
-		int i;
-
-		if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
-			*bad_wr = wr;
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		spin_lock_irqsave(&qp->r_rq.lock, flags);
-		next = wq->head + 1;
-		if (next >= qp->r_rq.size)
-			next = 0;
-		if (next == wq->tail) {
-			spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-			*bad_wr = wr;
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
-		wqe->wr_id = wr->wr_id;
-		wqe->num_sge = wr->num_sge;
-		for (i = 0; i < wr->num_sge; i++)
-			wqe->sg_list[i] = wr->sg_list[i];
-		/* Make sure queue entry is written before the head index. */
-		smp_wmb();
-		wq->head = next;
-		spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-	}
-	ret = 0;
-
-bail:
-	return ret;
-}
-
 /*
  * Make sure the QP is ready and able to accept the given opcode.
  */
@@ -587,18 +543,17 @@
 {
 	struct hfi1_ibport *ibp;
 
-	if (!(ib_hfi1_state_ops[packet->qp->state] & HFI1_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
 		goto dropit;
 	if (((opcode & OPCODE_QP_MASK) == packet->qp->allowed_ops) ||
 	    (opcode == IB_OPCODE_CNP))
 		return 1;
 dropit:
 	ibp = &packet->rcd->ppd->ibport_data;
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 	return 0;
 }
 
-
 /**
  * hfi1_ib_rcv - process an incoming packet
  * @packet: data packet information
@@ -614,6 +569,7 @@
 	u32 tlen = packet->tlen;
 	struct hfi1_pportdata *ppd = rcd->ppd;
 	struct hfi1_ibport *ibp = &ppd->ibport_data;
+	struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
 	unsigned long flags;
 	u32 qp_num;
 	int lnh;
@@ -622,9 +578,9 @@
 
 	/* Check for GRH */
 	lnh = be16_to_cpu(hdr->lrh[0]) & 3;
-	if (lnh == HFI1_LRH_BTH)
+	if (lnh == HFI1_LRH_BTH) {
 		packet->ohdr = &hdr->u.oth;
-	else if (lnh == HFI1_LRH_GRH) {
+	} else if (lnh == HFI1_LRH_GRH) {
 		u32 vtf;
 
 		packet->ohdr = &hdr->u.l.oth;
@@ -634,8 +590,9 @@
 		if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
 			goto drop;
 		packet->rcv_flags |= HFI1_HAS_GRH;
-	} else
+	} else {
 		goto drop;
+	}
 
 	trace_input_ibhdr(rcd->dd, hdr);
 
@@ -643,17 +600,17 @@
 	inc_opstats(tlen, &rcd->opstats->stats[opcode]);
 
 	/* Get the destination QP number. */
-	qp_num = be32_to_cpu(packet->ohdr->bth[1]) & HFI1_QPN_MASK;
+	qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK;
 	lid = be16_to_cpu(hdr->lrh[1]);
-	if (unlikely((lid >= HFI1_MULTICAST_LID_BASE) &&
-	    (lid != HFI1_PERMISSIVE_LID))) {
-		struct hfi1_mcast *mcast;
-		struct hfi1_mcast_qp *p;
+	if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+		     (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) {
+		struct rvt_mcast *mcast;
+		struct rvt_mcast_qp *p;
 
 		if (lnh != HFI1_LRH_GRH)
 			goto drop;
-		mcast = hfi1_mcast_find(ibp, &hdr->u.l.grh.dgid);
-		if (mcast == NULL)
+		mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid);
+		if (!mcast)
 			goto drop;
 		list_for_each_entry_rcu(p, &mcast->qp_list, list) {
 			packet->qp = p->qp;
@@ -663,14 +620,14 @@
 			spin_unlock_irqrestore(&packet->qp->r_lock, flags);
 		}
 		/*
-		 * Notify hfi1_multicast_detach() if it is waiting for us
+		 * Notify rvt_multicast_detach() if it is waiting for us
 		 * to finish.
 		 */
 		if (atomic_dec_return(&mcast->refcount) <= 1)
 			wake_up(&mcast->wait);
 	} else {
 		rcu_read_lock();
-		packet->qp = hfi1_lookup_qpn(ibp, qp_num);
+		packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
 		if (!packet->qp) {
 			rcu_read_unlock();
 			goto drop;
@@ -684,7 +641,7 @@
 	return;
 
 drop:
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 }
 
 /*
@@ -695,15 +652,17 @@
 {
 	struct hfi1_ibdev *dev = (struct hfi1_ibdev *)data;
 	struct list_head *list = &dev->memwait;
-	struct hfi1_qp *qp = NULL;
+	struct rvt_qp *qp = NULL;
 	struct iowait *wait;
 	unsigned long flags;
+	struct hfi1_qp_priv *priv;
 
 	write_seqlock_irqsave(&dev->iowait_lock, flags);
 	if (!list_empty(list)) {
 		wait = list_first_entry(list, struct iowait, list);
-		qp = container_of(wait, struct hfi1_qp, s_iowait);
-		list_del_init(&qp->s_iowait.list);
+		qp = iowait_to_qp(wait);
+		priv = qp->priv;
+		list_del_init(&priv->s_iowait.list);
 		/* refcount held until actual wake up */
 		if (!list_empty(list))
 			mod_timer(&dev->mem_timer, jiffies + 1);
@@ -711,12 +670,12 @@
 	write_sequnlock_irqrestore(&dev->iowait_lock, flags);
 
 	if (qp)
-		hfi1_qp_wakeup(qp, HFI1_S_WAIT_KMEM);
+		hfi1_qp_wakeup(qp, RVT_S_WAIT_KMEM);
 }
 
-void update_sge(struct hfi1_sge_state *ss, u32 length)
+void update_sge(struct rvt_sge_state *ss, u32 length)
 {
-	struct hfi1_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
 
 	sge->vaddr += length;
 	sge->length -= length;
@@ -725,7 +684,7 @@
 		if (--ss->num_sge)
 			*sge = *ss->sg_list++;
 	} else if (sge->length == 0 && sge->mr->lkey) {
-		if (++sge->n >= HFI1_SEGSZ) {
+		if (++sge->n >= RVT_SEGSZ) {
 			if (++sge->m >= sge->mr->mapsz)
 				return;
 			sge->n = 0;
@@ -735,143 +694,55 @@
 	}
 }
 
-static noinline struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
-						struct hfi1_qp *qp)
-{
-	struct verbs_txreq *tx;
-	unsigned long flags;
-
-	tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC);
-	if (!tx) {
-		spin_lock_irqsave(&qp->s_lock, flags);
-		write_seqlock(&dev->iowait_lock);
-		if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK &&
-		    list_empty(&qp->s_iowait.list)) {
-			dev->n_txwait++;
-			qp->s_flags |= HFI1_S_WAIT_TX;
-			list_add_tail(&qp->s_iowait.list, &dev->txwait);
-			trace_hfi1_qpsleep(qp, HFI1_S_WAIT_TX);
-			atomic_inc(&qp->refcount);
-		}
-		qp->s_flags &= ~HFI1_S_BUSY;
-		write_sequnlock(&dev->iowait_lock);
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-		tx = ERR_PTR(-EBUSY);
-	}
-	return tx;
-}
-
-static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
-					    struct hfi1_qp *qp)
-{
-	struct verbs_txreq *tx;
-
-	tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC);
-	if (!tx) {
-		/* call slow path to get the lock */
-		tx =  __get_txreq(dev, qp);
-		if (IS_ERR(tx))
-			return tx;
-	}
-	tx->qp = qp;
-	return tx;
-}
-
-void hfi1_put_txreq(struct verbs_txreq *tx)
-{
-	struct hfi1_ibdev *dev;
-	struct hfi1_qp *qp;
-	unsigned long flags;
-	unsigned int seq;
-
-	qp = tx->qp;
-	dev = to_idev(qp->ibqp.device);
-
-	if (tx->mr) {
-		hfi1_put_mr(tx->mr);
-		tx->mr = NULL;
-	}
-	sdma_txclean(dd_from_dev(dev), &tx->txreq);
-
-	/* Free verbs_txreq and return to slab cache */
-	kmem_cache_free(dev->verbs_txreq_cache, tx);
-
-	do {
-		seq = read_seqbegin(&dev->iowait_lock);
-		if (!list_empty(&dev->txwait)) {
-			struct iowait *wait;
-
-			write_seqlock_irqsave(&dev->iowait_lock, flags);
-			/* Wake up first QP wanting a free struct */
-			wait = list_first_entry(&dev->txwait, struct iowait,
-						list);
-			qp = container_of(wait, struct hfi1_qp, s_iowait);
-			list_del_init(&qp->s_iowait.list);
-			/* refcount held until actual wake up */
-			write_sequnlock_irqrestore(&dev->iowait_lock, flags);
-			hfi1_qp_wakeup(qp, HFI1_S_WAIT_TX);
-			break;
-		}
-	} while (read_seqretry(&dev->iowait_lock, seq));
-}
-
 /*
  * This is called with progress side lock held.
  */
 /* New API */
 static void verbs_sdma_complete(
 	struct sdma_txreq *cookie,
-	int status,
-	int drained)
+	int status)
 {
 	struct verbs_txreq *tx =
 		container_of(cookie, struct verbs_txreq, txreq);
-	struct hfi1_qp *qp = tx->qp;
+	struct rvt_qp *qp = tx->qp;
 
 	spin_lock(&qp->s_lock);
-	if (tx->wqe)
+	if (tx->wqe) {
 		hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
-	else if (qp->ibqp.qp_type == IB_QPT_RC) {
+	} else if (qp->ibqp.qp_type == IB_QPT_RC) {
 		struct hfi1_ib_header *hdr;
 
 		hdr = &tx->phdr.hdr;
 		hfi1_rc_send_complete(qp, hdr);
 	}
-	if (drained) {
-		/*
-		 * This happens when the send engine notes
-		 * a QP in the error state and cannot
-		 * do the flush work until that QP's
-		 * sdma work has finished.
-		 */
-		if (qp->s_flags & HFI1_S_WAIT_DMA) {
-			qp->s_flags &= ~HFI1_S_WAIT_DMA;
-			hfi1_schedule_send(qp);
-		}
-	}
 	spin_unlock(&qp->s_lock);
 
 	hfi1_put_txreq(tx);
 }
 
-static int wait_kmem(struct hfi1_ibdev *dev, struct hfi1_qp *qp)
+static int wait_kmem(struct hfi1_ibdev *dev,
+		     struct rvt_qp *qp,
+		     struct hfi1_pkt_state *ps)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	unsigned long flags;
 	int ret = 0;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) {
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		write_seqlock(&dev->iowait_lock);
-		if (list_empty(&qp->s_iowait.list)) {
+		list_add_tail(&ps->s_txreq->txreq.list,
+			      &priv->s_iowait.tx_head);
+		if (list_empty(&priv->s_iowait.list)) {
 			if (list_empty(&dev->memwait))
 				mod_timer(&dev->mem_timer, jiffies + 1);
-			qp->s_flags |= HFI1_S_WAIT_KMEM;
-			list_add_tail(&qp->s_iowait.list, &dev->memwait);
-			trace_hfi1_qpsleep(qp, HFI1_S_WAIT_KMEM);
+			qp->s_flags |= RVT_S_WAIT_KMEM;
+			list_add_tail(&priv->s_iowait.list, &dev->memwait);
+			trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
 			atomic_inc(&qp->refcount);
 		}
 		write_sequnlock(&dev->iowait_lock);
-		qp->s_flags &= ~HFI1_S_BUSY;
+		qp->s_flags &= ~RVT_S_BUSY;
 		ret = -EBUSY;
 	}
 	spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -884,14 +755,14 @@
  *
  * Add failures will revert the sge cursor
  */
-static int build_verbs_ulp_payload(
+static noinline int build_verbs_ulp_payload(
 	struct sdma_engine *sde,
-	struct hfi1_sge_state *ss,
+	struct rvt_sge_state *ss,
 	u32 length,
 	struct verbs_txreq *tx)
 {
-	struct hfi1_sge *sg_list = ss->sg_list;
-	struct hfi1_sge sge = ss->sge;
+	struct rvt_sge *sg_list = ss->sg_list;
+	struct rvt_sge sge = ss->sge;
 	u8 num_sge = ss->num_sge;
 	u32 len;
 	int ret = 0;
@@ -928,23 +799,21 @@
  * NOTE: DMA mapping is held in the tx until completed in the ring or
  *       the tx desc is freed without having been submitted to the ring
  *
- * This routine insures the following all the helper routine
- * calls succeed.
+ * This routine ensures all the helper routine calls succeed.
  */
 /* New API */
 static int build_verbs_tx_desc(
 	struct sdma_engine *sde,
-	struct hfi1_sge_state *ss,
+	struct rvt_sge_state *ss,
 	u32 length,
 	struct verbs_txreq *tx,
 	struct ahg_ib_header *ahdr,
 	u64 pbc)
 {
 	int ret = 0;
-	struct hfi1_pio_header *phdr;
+	struct hfi1_pio_header *phdr = &tx->phdr;
 	u16 hdrbytes = tx->hdr_dwords << 2;
 
-	phdr = &tx->phdr;
 	if (!ahdr->ahgcount) {
 		ret = sdma_txinit_ahg(
 			&tx->txreq,
@@ -958,29 +827,14 @@
 		if (ret)
 			goto bail_txadd;
 		phdr->pbc = cpu_to_le64(pbc);
-		memcpy(&phdr->hdr, &ahdr->ibh, hdrbytes - sizeof(phdr->pbc));
-		/* add the header */
 		ret = sdma_txadd_kvaddr(
 			sde->dd,
 			&tx->txreq,
-			&tx->phdr,
-			tx->hdr_dwords << 2);
+			phdr,
+			hdrbytes);
 		if (ret)
 			goto bail_txadd;
 	} else {
-		struct hfi1_other_headers *sohdr = &ahdr->ibh.u.oth;
-		struct hfi1_other_headers *dohdr = &phdr->hdr.u.oth;
-
-		/* needed in rc_send_complete() */
-		phdr->hdr.lrh[0] = ahdr->ibh.lrh[0];
-		if ((be16_to_cpu(phdr->hdr.lrh[0]) & 3) == HFI1_LRH_GRH) {
-			sohdr = &ahdr->ibh.u.l.oth;
-			dohdr = &phdr->hdr.u.l.oth;
-		}
-		/* opcode */
-		dohdr->bth[0] = sohdr->bth[0];
-		/* PSN/ACK  */
-		dohdr->bth[2] = sohdr->bth[2];
 		ret = sdma_txinit_ahg(
 			&tx->txreq,
 			ahdr->tx_flags,
@@ -1001,80 +855,75 @@
 	return ret;
 }
 
-int hfi1_verbs_send_dma(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			u64 pbc)
 {
-	struct ahg_ib_header *ahdr = qp->s_hdr;
+	struct hfi1_qp_priv *priv = qp->priv;
+	struct ahg_ib_header *ahdr = priv->s_hdr;
 	u32 hdrwords = qp->s_hdrwords;
-	struct hfi1_sge_state *ss = qp->s_cur_sge;
+	struct rvt_sge_state *ss = qp->s_cur_sge;
 	u32 len = qp->s_cur_size;
 	u32 plen = hdrwords + ((len + 3) >> 2) + 2; /* includes pbc */
 	struct hfi1_ibdev *dev = ps->dev;
 	struct hfi1_pportdata *ppd = ps->ppd;
 	struct verbs_txreq *tx;
-	struct sdma_txreq *stx;
 	u64 pbc_flags = 0;
-	u8 sc5 = qp->s_sc;
+	u8 sc5 = priv->s_sc;
+
 	int ret;
 
-	if (!list_empty(&qp->s_iowait.tx_head)) {
-		stx = list_first_entry(
-			&qp->s_iowait.tx_head,
-			struct sdma_txreq,
-			list);
-		list_del_init(&stx->list);
-		tx = container_of(stx, struct verbs_txreq, txreq);
-		ret = sdma_send_txreq(tx->sde, &qp->s_iowait, stx);
-		if (unlikely(ret == -ECOMM))
+	tx = ps->s_txreq;
+	if (!sdma_txreq_built(&tx->txreq)) {
+		if (likely(pbc == 0)) {
+			u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+			/* No vl15 here */
+			/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
+			pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+
+			pbc = create_pbc(ppd,
+					 pbc_flags,
+					 qp->srate_mbps,
+					 vl,
+					 plen);
+		}
+		tx->wqe = qp->s_wqe;
+		ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahdr, pbc);
+		if (unlikely(ret))
+			goto bail_build;
+	}
+	ret =  sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq);
+	if (unlikely(ret < 0)) {
+		if (ret == -ECOMM)
 			goto bail_ecomm;
 		return ret;
 	}
-
-	tx = get_txreq(dev, qp);
-	if (IS_ERR(tx))
-		goto bail_tx;
-
-	tx->sde = qp->s_sde;
-
-	if (likely(pbc == 0)) {
-		u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
-		/* No vl15 here */
-		/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
-		pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
-
-		pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
-	}
-	tx->wqe = qp->s_wqe;
-	tx->mr = qp->s_rdma_mr;
-	if (qp->s_rdma_mr)
-		qp->s_rdma_mr = NULL;
-	tx->hdr_dwords = hdrwords + 2;
-	ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahdr, pbc);
-	if (unlikely(ret))
-		goto bail_build;
-	trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ahdr->ibh);
-	ret =  sdma_send_txreq(tx->sde, &qp->s_iowait, &tx->txreq);
-	if (unlikely(ret == -ECOMM))
-		goto bail_ecomm;
+	trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
+				&ps->s_txreq->phdr.hdr);
 	return ret;
 
 bail_ecomm:
 	/* The current one got "sent" */
 	return 0;
 bail_build:
-	/* kmalloc or mapping fail */
-	hfi1_put_txreq(tx);
-	return wait_kmem(dev, qp);
-bail_tx:
-	return PTR_ERR(tx);
+	ret = wait_kmem(dev, qp, ps);
+	if (!ret) {
+		/* free txreq - bad state */
+		hfi1_put_txreq(ps->s_txreq);
+		ps->s_txreq = NULL;
+	}
+	return ret;
 }
 
 /*
  * If we are now in the error state, return zero to flush the
  * send work request.
  */
-static int no_bufs_available(struct hfi1_qp *qp, struct send_context *sc)
+static int pio_wait(struct rvt_qp *qp,
+		    struct send_context *sc,
+		    struct hfi1_pkt_state *ps,
+		    u32 flag)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_devdata *dd = sc->dd;
 	struct hfi1_ibdev *dev = &dd->verbs_dev;
 	unsigned long flags;
@@ -1087,74 +936,89 @@
 	 * enabling the PIO avail interrupt.
 	 */
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) {
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		write_seqlock(&dev->iowait_lock);
-		if (list_empty(&qp->s_iowait.list)) {
+		list_add_tail(&ps->s_txreq->txreq.list,
+			      &priv->s_iowait.tx_head);
+		if (list_empty(&priv->s_iowait.list)) {
 			struct hfi1_ibdev *dev = &dd->verbs_dev;
 			int was_empty;
 
+			dev->n_piowait += !!(flag & RVT_S_WAIT_PIO);
+			dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN);
 			dev->n_piowait++;
-			qp->s_flags |= HFI1_S_WAIT_PIO;
+			qp->s_flags |= flag;
 			was_empty = list_empty(&sc->piowait);
-			list_add_tail(&qp->s_iowait.list, &sc->piowait);
-			trace_hfi1_qpsleep(qp, HFI1_S_WAIT_PIO);
+			list_add_tail(&priv->s_iowait.list, &sc->piowait);
+			trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
 			atomic_inc(&qp->refcount);
 			/* counting: only call wantpiobuf_intr if first user */
 			if (was_empty)
 				hfi1_sc_wantpiobuf_intr(sc, 1);
 		}
 		write_sequnlock(&dev->iowait_lock);
-		qp->s_flags &= ~HFI1_S_BUSY;
+		qp->s_flags &= ~RVT_S_BUSY;
 		ret = -EBUSY;
 	}
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 	return ret;
 }
 
-struct send_context *qp_to_send_context(struct hfi1_qp *qp, u8 sc5)
+static void verbs_pio_complete(void *arg, int code)
 {
-	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
-	struct hfi1_pportdata *ppd = dd->pport + (qp->port_num - 1);
-	u8 vl;
+	struct rvt_qp *qp = (struct rvt_qp *)arg;
+	struct hfi1_qp_priv *priv = qp->priv;
 
-	vl = sc_to_vlt(dd, sc5);
-	if (vl >= ppd->vls_supported && vl != 15)
-		return NULL;
-	return dd->vld[vl].sc;
+	if (iowait_pio_dec(&priv->s_iowait))
+		iowait_drain_wakeup(&priv->s_iowait);
 }
 
-int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			u64 pbc)
 {
-	struct ahg_ib_header *ahdr = qp->s_hdr;
+	struct hfi1_qp_priv *priv = qp->priv;
 	u32 hdrwords = qp->s_hdrwords;
-	struct hfi1_sge_state *ss = qp->s_cur_sge;
+	struct rvt_sge_state *ss = qp->s_cur_sge;
 	u32 len = qp->s_cur_size;
 	u32 dwords = (len + 3) >> 2;
 	u32 plen = hdrwords + dwords + 2; /* includes pbc */
 	struct hfi1_pportdata *ppd = ps->ppd;
-	u32 *hdr = (u32 *)&ahdr->ibh;
+	u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
 	u64 pbc_flags = 0;
-	u32 sc5;
+	u8 sc5;
 	unsigned long flags = 0;
 	struct send_context *sc;
 	struct pio_buf *pbuf;
 	int wc_status = IB_WC_SUCCESS;
+	int ret = 0;
+	pio_release_cb cb = NULL;
+
+	/* only RC/UC use complete */
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_RC:
+	case IB_QPT_UC:
+		cb = verbs_pio_complete;
+		break;
+	default:
+		break;
+	}
 
 	/* vl15 special case taken care of in ud.c */
-	sc5 = qp->s_sc;
-	sc = qp_to_send_context(qp, sc5);
+	sc5 = priv->s_sc;
+	sc = ps->s_txreq->psc;
 
-	if (!sc)
-		return -EINVAL;
 	if (likely(pbc == 0)) {
-		u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+		u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
 		/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
 		pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
 		pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
 	}
-	pbuf = sc_buffer_alloc(sc, plen, NULL, NULL);
-	if (unlikely(pbuf == NULL)) {
+	if (cb)
+		iowait_pio_inc(&priv->s_iowait);
+	pbuf = sc_buffer_alloc(sc, plen, cb, qp);
+	if (unlikely(!pbuf)) {
+		if (cb)
+			verbs_pio_complete(qp, 0);
 		if (ppd->host_link_state != HLS_UP_ACTIVE) {
 			/*
 			 * If we have filled the PIO buffers to capacity and are
@@ -1174,7 +1038,12 @@
 			 * so lets continue to queue the request.
 			 */
 			hfi1_cdbg(PIO, "alloc failed. state active, queuing");
-			return no_bufs_available(qp, sc);
+			ret = pio_wait(qp, sc, ps, RVT_S_WAIT_PIO);
+			if (!ret)
+				/* txreq not queued - free */
+				goto bail;
+			/* tx consumed in wait */
+			return ret;
 		}
 	}
 
@@ -1182,7 +1051,7 @@
 		pio_copy(ppd->dd, pbuf, pbc, hdr, hdrwords);
 	} else {
 		if (ss) {
-			seg_pio_copy_start(pbuf, pbc, hdr, hdrwords*4);
+			seg_pio_copy_start(pbuf, pbc, hdr, hdrwords * 4);
 			while (len) {
 				void *addr = ss->sge.vaddr;
 				u32 slen = ss->sge.length;
@@ -1197,12 +1066,8 @@
 		}
 	}
 
-	trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ahdr->ibh);
-
-	if (qp->s_rdma_mr) {
-		hfi1_put_mr(qp->s_rdma_mr);
-		qp->s_rdma_mr = NULL;
-	}
+	trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
+			       &ps->s_txreq->phdr.hdr);
 
 pio_bail:
 	if (qp->s_wqe) {
@@ -1211,10 +1076,15 @@
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 	} else if (qp->ibqp.qp_type == IB_QPT_RC) {
 		spin_lock_irqsave(&qp->s_lock, flags);
-		hfi1_rc_send_complete(qp, &ahdr->ibh);
+		hfi1_rc_send_complete(qp, &ps->s_txreq->phdr.hdr);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 	}
-	return 0;
+
+	ret = 0;
+
+bail:
+	hfi1_put_txreq(ps->s_txreq);
+	return ret;
 }
 
 /*
@@ -1247,13 +1117,14 @@
  */
 static inline int egress_pkey_check(struct hfi1_pportdata *ppd,
 				    struct hfi1_ib_header *hdr,
-				    struct hfi1_qp *qp)
+				    struct rvt_qp *qp)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_other_headers *ohdr;
 	struct hfi1_devdata *dd;
 	int i = 0;
 	u16 pkey;
-	u8 lnh, sc5 = qp->s_sc;
+	u8 lnh, sc5 = priv->s_sc;
 
 	if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT))
 		return 0;
@@ -1271,14 +1142,14 @@
 	if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK))
 		goto bad;
 
-
 	/* Is the pkey = 0x0, or 0x8000? */
 	if ((pkey & PKEY_LOW_15_MASK) == 0)
 		goto bad;
 
 	/* The most likely matching pkey has index qp->s_pkey_index */
 	if (unlikely(!egress_pkey_matches_entry(pkey,
-					ppd->pkeys[qp->s_pkey_index]))) {
+						ppd->pkeys
+						[qp->s_pkey_index]))) {
 		/* no match - try the entire table */
 		for (; i < MAX_PKEY_VALUES; i++) {
 			if (egress_pkey_matches_entry(pkey, ppd->pkeys[i]))
@@ -1302,31 +1173,65 @@
 }
 
 /**
+ * get_send_routine - choose an egress routine
+ *
+ * Choose an egress routine based on QP type
+ * and size
+ */
+static inline send_routine get_send_routine(struct rvt_qp *qp,
+					    struct verbs_txreq *tx)
+{
+	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+	struct hfi1_qp_priv *priv = qp->priv;
+	struct hfi1_ib_header *h = &tx->phdr.hdr;
+
+	if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA)))
+		return dd->process_pio_send;
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_SMI:
+		return dd->process_pio_send;
+	case IB_QPT_GSI:
+	case IB_QPT_UD:
+		break;
+	case IB_QPT_RC:
+		if (piothreshold &&
+		    qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
+		    (BIT(get_opcode(h) & 0x1f) & rc_only_opcode) &&
+		    iowait_sdma_pending(&priv->s_iowait) == 0 &&
+		    !sdma_txreq_built(&tx->txreq))
+			return dd->process_pio_send;
+		break;
+	case IB_QPT_UC:
+		if (piothreshold &&
+		    qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
+		    (BIT(get_opcode(h) & 0x1f) & uc_only_opcode) &&
+		    iowait_sdma_pending(&priv->s_iowait) == 0 &&
+		    !sdma_txreq_built(&tx->txreq))
+			return dd->process_pio_send;
+		break;
+	default:
+		break;
+	}
+	return dd->process_dma_send;
+}
+
+/**
  * hfi1_verbs_send - send a packet
  * @qp: the QP to send on
  * @ps: the state of the packet to send
  *
  * Return zero if packet is sent or queued OK.
- * Return non-zero and clear qp->s_flags HFI1_S_BUSY otherwise.
+ * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise.
  */
-int hfi1_verbs_send(struct hfi1_qp *qp, struct hfi1_pkt_state *ps)
+int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 {
 	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
-	struct ahg_ib_header *ahdr = qp->s_hdr;
+	struct hfi1_qp_priv *priv = qp->priv;
+	send_routine sr;
 	int ret;
-	int pio = 0;
-	unsigned long flags = 0;
 
-	/*
-	 * VL15 packets (IB_QPT_SMI) will always use PIO, so we
-	 * can defer SDMA restart until link goes ACTIVE without
-	 * worrying about just how we got there.
-	 */
-	if ((qp->ibqp.qp_type == IB_QPT_SMI) ||
-	    !(dd->flags & HFI1_HAS_SEND_DMA))
-		pio = 1;
-
-	ret = egress_pkey_check(dd->pport, &ahdr->ibh, qp);
+	sr = get_send_routine(qp, ps->s_txreq);
+	ret = egress_pkey_check(dd->pport, &ps->s_txreq->phdr.hdr, qp);
 	if (unlikely(ret)) {
 		/*
 		 * The value we are returning here does not get propagated to
@@ -1336,7 +1241,9 @@
 		 * mechanism for handling the errors. So for SDMA we can just
 		 * return.
 		 */
-		if (pio) {
+		if (sr == dd->process_pio_send) {
+			unsigned long flags;
+
 			hfi1_cdbg(PIO, "%s() Failed. Completing with err",
 				  __func__);
 			spin_lock_irqsave(&qp->s_lock, flags);
@@ -1345,71 +1252,57 @@
 		}
 		return -EINVAL;
 	}
-
-	if (pio) {
-		ret = dd->process_pio_send(qp, ps, 0);
-	} else {
-#ifdef CONFIG_SDMA_VERBOSITY
-		dd_dev_err(dd, "CONFIG SDMA %s:%d %s()\n",
-			   slashstrip(__FILE__), __LINE__, __func__);
-		dd_dev_err(dd, "SDMA hdrwords = %u, len = %u\n", qp->s_hdrwords,
-			   qp->s_cur_size);
-#endif
-		ret = dd->process_dma_send(qp, ps, 0);
-	}
-
-	return ret;
+	if (sr == dd->process_dma_send && iowait_pio_pending(&priv->s_iowait))
+		return pio_wait(qp,
+				ps->s_txreq->psc,
+				ps,
+				RVT_S_WAIT_PIO_DRAIN);
+	return sr(qp, ps, 0);
 }
 
-static int query_device(struct ib_device *ibdev,
-			struct ib_device_attr *props,
-			struct ib_udata *uhw)
+/**
+ * hfi1_fill_device_attr - Fill in rvt dev info device attributes.
+ * @dd: the device data structure
+ */
+static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
 {
-	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	struct hfi1_ibdev *dev = to_idev(ibdev);
+	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 
-	if (uhw->inlen || uhw->outlen)
-		return -EINVAL;
-	memset(props, 0, sizeof(*props));
+	memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
 
-	props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
-		IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
-		IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
-		IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
-
-	props->page_size_cap = PAGE_SIZE;
-	props->vendor_id =
-		dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
-	props->vendor_part_id = dd->pcidev->device;
-	props->hw_ver = dd->minrev;
-	props->sys_image_guid = ib_hfi1_sys_image_guid;
-	props->max_mr_size = ~0ULL;
-	props->max_qp = hfi1_max_qps;
-	props->max_qp_wr = hfi1_max_qp_wrs;
-	props->max_sge = hfi1_max_sges;
-	props->max_sge_rd = hfi1_max_sges;
-	props->max_cq = hfi1_max_cqs;
-	props->max_ah = hfi1_max_ahs;
-	props->max_cqe = hfi1_max_cqes;
-	props->max_mr = dev->lk_table.max;
-	props->max_fmr = dev->lk_table.max;
-	props->max_map_per_fmr = 32767;
-	props->max_pd = hfi1_max_pds;
-	props->max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC;
-	props->max_qp_init_rd_atom = 255;
-	/* props->max_res_rd_atom */
-	props->max_srq = hfi1_max_srqs;
-	props->max_srq_wr = hfi1_max_srq_wrs;
-	props->max_srq_sge = hfi1_max_srq_sges;
-	/* props->local_ca_ack_delay */
-	props->atomic_cap = IB_ATOMIC_GLOB;
-	props->max_pkeys = hfi1_get_npkeys(dd);
-	props->max_mcast_grp = hfi1_max_mcast_grps;
-	props->max_mcast_qp_attach = hfi1_max_mcast_qp_attached;
-	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
-		props->max_mcast_grp;
-
-	return 0;
+	rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
+			IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
+			IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
+			IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
+	rdi->dparms.props.page_size_cap = PAGE_SIZE;
+	rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
+	rdi->dparms.props.vendor_part_id = dd->pcidev->device;
+	rdi->dparms.props.hw_ver = dd->minrev;
+	rdi->dparms.props.sys_image_guid = ib_hfi1_sys_image_guid;
+	rdi->dparms.props.max_mr_size = ~0ULL;
+	rdi->dparms.props.max_qp = hfi1_max_qps;
+	rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs;
+	rdi->dparms.props.max_sge = hfi1_max_sges;
+	rdi->dparms.props.max_sge_rd = hfi1_max_sges;
+	rdi->dparms.props.max_cq = hfi1_max_cqs;
+	rdi->dparms.props.max_ah = hfi1_max_ahs;
+	rdi->dparms.props.max_cqe = hfi1_max_cqes;
+	rdi->dparms.props.max_mr = rdi->lkey_table.max;
+	rdi->dparms.props.max_fmr = rdi->lkey_table.max;
+	rdi->dparms.props.max_map_per_fmr = 32767;
+	rdi->dparms.props.max_pd = hfi1_max_pds;
+	rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC;
+	rdi->dparms.props.max_qp_init_rd_atom = 255;
+	rdi->dparms.props.max_srq = hfi1_max_srqs;
+	rdi->dparms.props.max_srq_wr = hfi1_max_srq_wrs;
+	rdi->dparms.props.max_srq_sge = hfi1_max_srq_sges;
+	rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB;
+	rdi->dparms.props.max_pkeys = hfi1_get_npkeys(dd);
+	rdi->dparms.props.max_mcast_grp = hfi1_max_mcast_grps;
+	rdi->dparms.props.max_mcast_qp_attach = hfi1_max_mcast_qp_attached;
+	rdi->dparms.props.max_total_mcast_qp_attach =
+					rdi->dparms.props.max_mcast_qp_attach *
+					rdi->dparms.props.max_mcast_grp;
 }
 
 static inline u16 opa_speed_to_ib(u16 in)
@@ -1443,33 +1336,24 @@
 	}
 }
 
-static int query_port(struct ib_device *ibdev, u8 port,
+static int query_port(struct rvt_dev_info *rdi, u8 port_num,
 		      struct ib_port_attr *props)
 {
-	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	struct hfi1_ibport *ibp = to_iport(ibdev, port);
-	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+	struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
+	struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
+	struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
 	u16 lid = ppd->lid;
 
-	memset(props, 0, sizeof(*props));
 	props->lid = lid ? lid : 0;
 	props->lmc = ppd->lmc;
-	props->sm_lid = ibp->sm_lid;
-	props->sm_sl = ibp->sm_sl;
 	/* OPA logical states match IB logical states */
 	props->state = driver_lstate(ppd);
 	props->phys_state = hfi1_ibphys_portstate(ppd);
-	props->port_cap_flags = ibp->port_cap_flags;
 	props->gid_tbl_len = HFI1_GUIDS_PER_PORT;
-	props->max_msg_sz = 0x80000000;
-	props->pkey_tbl_len = hfi1_get_npkeys(dd);
-	props->bad_pkey_cntr = ibp->pkey_violations;
-	props->qkey_viol_cntr = ibp->qkey_violations;
 	props->active_width = (u8)opa_width_to_ib(ppd->link_width_active);
 	/* see rate_show() in ib core/sysfs.c */
 	props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active);
 	props->max_vl_num = ppd->vls_supported;
-	props->init_type_reply = 0;
 
 	/* Once we are a "first class" citizen and have added the OPA MTUs to
 	 * the core we can advertise the larger MTU enum to the ULPs, for now
@@ -1483,27 +1367,6 @@
 				      4096 : hfi1_max_mtu), IB_MTU_4096);
 	props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
 		mtu_to_enum(ppd->ibmtu, IB_MTU_2048);
-	props->subnet_timeout = ibp->subnet_timeout;
-
-	return 0;
-}
-
-static int port_immutable(struct ib_device *ibdev, u8 port_num,
-			  struct ib_port_immutable *immutable)
-{
-	struct ib_port_attr attr;
-	int err;
-
-	err = query_port(ibdev, port_num, &attr);
-	if (err)
-		return err;
-
-	memset(immutable, 0, sizeof(*immutable));
-
-	immutable->pkey_tbl_len = attr.pkey_tbl_len;
-	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_INTEL_OPA;
-	immutable->max_mad_size = OPA_MGMT_MAD_SIZE;
 
 	return 0;
 }
@@ -1547,102 +1410,31 @@
 	return ret;
 }
 
-static int modify_port(struct ib_device *ibdev, u8 port,
-		       int port_modify_mask, struct ib_port_modify *props)
+static int shut_down_port(struct rvt_dev_info *rdi, u8 port_num)
 {
-	struct hfi1_ibport *ibp = to_iport(ibdev, port);
+	struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
+	struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
+	struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
+	int ret;
+
+	set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0,
+			     OPA_LINKDOWN_REASON_UNKNOWN);
+	ret = set_link_state(ppd, HLS_DN_DOWNDEF);
+	return ret;
+}
+
+static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
+			    int guid_index, __be64 *guid)
+{
+	struct hfi1_ibport *ibp = container_of(rvp, struct hfi1_ibport, rvp);
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-	int ret = 0;
 
-	ibp->port_cap_flags |= props->set_port_cap_mask;
-	ibp->port_cap_flags &= ~props->clr_port_cap_mask;
-	if (props->set_port_cap_mask || props->clr_port_cap_mask)
-		hfi1_cap_mask_chg(ibp);
-	if (port_modify_mask & IB_PORT_SHUTDOWN) {
-		set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0,
-		  OPA_LINKDOWN_REASON_UNKNOWN);
-		ret = set_link_state(ppd, HLS_DN_DOWNDEF);
-	}
-	if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
-		ibp->qkey_violations = 0;
-	return ret;
-}
-
-static int query_gid(struct ib_device *ibdev, u8 port,
-		     int index, union ib_gid *gid)
-{
-	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	int ret = 0;
-
-	if (!port || port > dd->num_pports)
-		ret = -EINVAL;
-	else {
-		struct hfi1_ibport *ibp = to_iport(ibdev, port);
-		struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-
-		gid->global.subnet_prefix = ibp->gid_prefix;
-		if (index == 0)
-			gid->global.interface_id = cpu_to_be64(ppd->guid);
-		else if (index < HFI1_GUIDS_PER_PORT)
-			gid->global.interface_id = ibp->guids[index - 1];
-		else
-			ret = -EINVAL;
-	}
-
-	return ret;
-}
-
-static struct ib_pd *alloc_pd(struct ib_device *ibdev,
-			      struct ib_ucontext *context,
-			      struct ib_udata *udata)
-{
-	struct hfi1_ibdev *dev = to_idev(ibdev);
-	struct hfi1_pd *pd;
-	struct ib_pd *ret;
-
-	/*
-	 * This is actually totally arbitrary.  Some correctness tests
-	 * assume there's a maximum number of PDs that can be allocated.
-	 * We don't actually have this limit, but we fail the test if
-	 * we allow allocations of more than we report for this value.
-	 */
-
-	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	spin_lock(&dev->n_pds_lock);
-	if (dev->n_pds_allocated == hfi1_max_pds) {
-		spin_unlock(&dev->n_pds_lock);
-		kfree(pd);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	dev->n_pds_allocated++;
-	spin_unlock(&dev->n_pds_lock);
-
-	/* ib_alloc_pd() will initialize pd->ibpd. */
-	pd->user = udata != NULL;
-
-	ret = &pd->ibpd;
-
-bail:
-	return ret;
-}
-
-static int dealloc_pd(struct ib_pd *ibpd)
-{
-	struct hfi1_pd *pd = to_ipd(ibpd);
-	struct hfi1_ibdev *dev = to_idev(ibpd->device);
-
-	spin_lock(&dev->n_pds_lock);
-	dev->n_pds_allocated--;
-	spin_unlock(&dev->n_pds_lock);
-
-	kfree(pd);
+	if (guid_index == 0)
+		*guid = cpu_to_be64(ppd->guid);
+	else if (guid_index < HFI1_GUIDS_PER_PORT)
+		*guid = ibp->guids[guid_index - 1];
+	else
+		return -EINVAL;
 
 	return 0;
 }
@@ -1657,101 +1449,57 @@
 	return ibp->sl_to_sc[ah->sl];
 }
 
-int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
+static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
 {
 	struct hfi1_ibport *ibp;
 	struct hfi1_pportdata *ppd;
 	struct hfi1_devdata *dd;
 	u8 sc5;
 
-	/* A multicast address requires a GRH (see ch. 8.4.1). */
-	if (ah_attr->dlid >= HFI1_MULTICAST_LID_BASE &&
-	    ah_attr->dlid != HFI1_PERMISSIVE_LID &&
-	    !(ah_attr->ah_flags & IB_AH_GRH))
-		goto bail;
-	if ((ah_attr->ah_flags & IB_AH_GRH) &&
-	    ah_attr->grh.sgid_index >= HFI1_GUIDS_PER_PORT)
-		goto bail;
-	if (ah_attr->dlid == 0)
-		goto bail;
-	if (ah_attr->port_num < 1 ||
-	    ah_attr->port_num > ibdev->phys_port_cnt)
-		goto bail;
-	if (ah_attr->static_rate != IB_RATE_PORT_CURRENT &&
-	    ib_rate_to_mbps(ah_attr->static_rate) < 0)
-		goto bail;
-	if (ah_attr->sl >= OPA_MAX_SLS)
-		goto bail;
 	/* test the mapping for validity */
 	ibp = to_iport(ibdev, ah_attr->port_num);
 	ppd = ppd_from_ibp(ibp);
 	sc5 = ibp->sl_to_sc[ah_attr->sl];
 	dd = dd_from_ppd(ppd);
 	if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf)
-		goto bail;
+		return -EINVAL;
 	return 0;
-bail:
-	return -EINVAL;
 }
 
-/**
- * create_ah - create an address handle
- * @pd: the protection domain
- * @ah_attr: the attributes of the AH
- *
- * This may be called from interrupt context.
- */
-static struct ib_ah *create_ah(struct ib_pd *pd,
-			       struct ib_ah_attr *ah_attr)
+static void hfi1_notify_new_ah(struct ib_device *ibdev,
+			       struct ib_ah_attr *ah_attr,
+			       struct rvt_ah *ah)
 {
-	struct hfi1_ah *ah;
-	struct ib_ah *ret;
-	struct hfi1_ibdev *dev = to_idev(pd->device);
-	unsigned long flags;
+	struct hfi1_ibport *ibp;
+	struct hfi1_pportdata *ppd;
+	struct hfi1_devdata *dd;
+	u8 sc5;
 
-	if (hfi1_check_ah(pd->device, ah_attr)) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
+	/*
+	 * Do not trust reading anything from rvt_ah at this point as it is not
+	 * done being setup. We can however modify things which we need to set.
+	 */
 
-	ah = kmalloc(sizeof(*ah), GFP_ATOMIC);
-	if (!ah) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	spin_lock_irqsave(&dev->n_ahs_lock, flags);
-	if (dev->n_ahs_allocated == hfi1_max_ahs) {
-		spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-		kfree(ah);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	dev->n_ahs_allocated++;
-	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-	/* ib_create_ah() will initialize ah->ibah. */
-	ah->attr = *ah_attr;
-	atomic_set(&ah->refcount, 0);
-
-	ret = &ah->ibah;
-
-bail:
-	return ret;
+	ibp = to_iport(ibdev, ah_attr->port_num);
+	ppd = ppd_from_ibp(ibp);
+	sc5 = ibp->sl_to_sc[ah->attr.sl];
+	dd = dd_from_ppd(ppd);
+	ah->vl = sc_to_vlt(dd, sc5);
+	if (ah->vl < num_vls || ah->vl == 15)
+		ah->log_pmtu = ilog2(dd->vld[ah->vl].mtu);
 }
 
 struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid)
 {
 	struct ib_ah_attr attr;
 	struct ib_ah *ah = ERR_PTR(-EINVAL);
-	struct hfi1_qp *qp0;
+	struct rvt_qp *qp0;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.dlid = dlid;
 	attr.port_num = ppd_from_ibp(ibp)->port;
 	rcu_read_lock();
-	qp0 = rcu_dereference(ibp->qp[0]);
+	qp0 = rcu_dereference(ibp->rvp.qp[0]);
 	if (qp0)
 		ah = ib_create_ah(qp0->ibqp.pd, &attr);
 	rcu_read_unlock();
@@ -1759,51 +1507,6 @@
 }
 
 /**
- * destroy_ah - destroy an address handle
- * @ibah: the AH to destroy
- *
- * This may be called from interrupt context.
- */
-static int destroy_ah(struct ib_ah *ibah)
-{
-	struct hfi1_ibdev *dev = to_idev(ibah->device);
-	struct hfi1_ah *ah = to_iah(ibah);
-	unsigned long flags;
-
-	if (atomic_read(&ah->refcount) != 0)
-		return -EBUSY;
-
-	spin_lock_irqsave(&dev->n_ahs_lock, flags);
-	dev->n_ahs_allocated--;
-	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-	kfree(ah);
-
-	return 0;
-}
-
-static int modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
-	struct hfi1_ah *ah = to_iah(ibah);
-
-	if (hfi1_check_ah(ibah->device, ah_attr))
-		return -EINVAL;
-
-	ah->attr = *ah_attr;
-
-	return 0;
-}
-
-static int query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
-	struct hfi1_ah *ah = to_iah(ibah);
-
-	*ah_attr = ah->attr;
-
-	return 0;
-}
-
-/**
  * hfi1_get_npkeys - return the size of the PKEY table for context 0
  * @dd: the hfi1_ib device
  */
@@ -1812,54 +1515,6 @@
 	return ARRAY_SIZE(dd->pport[0].pkeys);
 }
 
-static int query_pkey(struct ib_device *ibdev, u8 port, u16 index,
-		      u16 *pkey)
-{
-	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	int ret;
-
-	if (index >= hfi1_get_npkeys(dd)) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	*pkey = hfi1_get_pkey(to_iport(ibdev, port), index);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * alloc_ucontext - allocate a ucontest
- * @ibdev: the infiniband device
- * @udata: not used by the driver
- */
-
-static struct ib_ucontext *alloc_ucontext(struct ib_device *ibdev,
-					  struct ib_udata *udata)
-{
-	struct hfi1_ucontext *context;
-	struct ib_ucontext *ret;
-
-	context = kmalloc(sizeof(*context), GFP_KERNEL);
-	if (!context) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	ret = &context->ibucontext;
-
-bail:
-	return ret;
-}
-
-static int dealloc_ucontext(struct ib_ucontext *context)
-{
-	kfree(to_iucontext(context));
-	return 0;
-}
-
 static void init_ibport(struct hfi1_pportdata *ppd)
 {
 	struct hfi1_ibport *ibp = &ppd->ibport_data;
@@ -1871,28 +1526,21 @@
 		ibp->sc_to_sl[i] = i;
 	}
 
-	spin_lock_init(&ibp->lock);
+	spin_lock_init(&ibp->rvp.lock);
 	/* Set the prefix to the default value (see ch. 4.1.1) */
-	ibp->gid_prefix = IB_DEFAULT_GID_PREFIX;
-	ibp->sm_lid = 0;
+	ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX;
+	ibp->rvp.sm_lid = 0;
 	/* Below should only set bits defined in OPA PortInfo.CapabilityMask */
-	ibp->port_cap_flags = IB_PORT_AUTO_MIGR_SUP |
+	ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP |
 		IB_PORT_CAP_MASK_NOTICE_SUP;
-	ibp->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
-	ibp->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
-	ibp->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
-	ibp->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
-	ibp->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
+	ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
+	ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
+	ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
+	ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
+	ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
 
-	RCU_INIT_POINTER(ibp->qp[0], NULL);
-	RCU_INIT_POINTER(ibp->qp[1], NULL);
-}
-
-static void verbs_txreq_kmem_cache_ctor(void *obj)
-{
-	struct verbs_txreq *tx = obj;
-
-	memset(tx, 0, sizeof(*tx));
+	RCU_INIT_POINTER(ibp->rvp.qp[0], NULL);
+	RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
 }
 
 /**
@@ -1903,74 +1551,26 @@
 int hfi1_register_ib_device(struct hfi1_devdata *dd)
 {
 	struct hfi1_ibdev *dev = &dd->verbs_dev;
-	struct ib_device *ibdev = &dev->ibdev;
+	struct ib_device *ibdev = &dev->rdi.ibdev;
 	struct hfi1_pportdata *ppd = dd->pport;
-	unsigned i, lk_tab_size;
+	unsigned i;
 	int ret;
 	size_t lcpysz = IB_DEVICE_NAME_MAX;
-	u16 descq_cnt;
-	char buf[TXREQ_NAME_LEN];
-
-	ret = hfi1_qp_init(dev);
-	if (ret)
-		goto err_qp_init;
-
 
 	for (i = 0; i < dd->num_pports; i++)
 		init_ibport(ppd + i);
 
 	/* Only need to initialize non-zero fields. */
-	spin_lock_init(&dev->n_pds_lock);
-	spin_lock_init(&dev->n_ahs_lock);
-	spin_lock_init(&dev->n_cqs_lock);
-	spin_lock_init(&dev->n_qps_lock);
-	spin_lock_init(&dev->n_srqs_lock);
-	spin_lock_init(&dev->n_mcast_grps_lock);
+
 	setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
 
-	/*
-	 * The top hfi1_lkey_table_size bits are used to index the
-	 * table.  The lower 8 bits can be owned by the user (copied from
-	 * the LKEY).  The remaining bits act as a generation number or tag.
-	 */
-	spin_lock_init(&dev->lk_table.lock);
-	dev->lk_table.max = 1 << hfi1_lkey_table_size;
-	/* ensure generation is at least 4 bits (keys.c) */
-	if (hfi1_lkey_table_size > MAX_LKEY_TABLE_BITS) {
-		dd_dev_warn(dd, "lkey bits %u too large, reduced to %u\n",
-			      hfi1_lkey_table_size, MAX_LKEY_TABLE_BITS);
-		hfi1_lkey_table_size = MAX_LKEY_TABLE_BITS;
-	}
-	lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
-	dev->lk_table.table = (struct hfi1_mregion __rcu **)
-		vmalloc(lk_tab_size);
-	if (dev->lk_table.table == NULL) {
-		ret = -ENOMEM;
-		goto err_lk;
-	}
-	RCU_INIT_POINTER(dev->dma_mr, NULL);
-	for (i = 0; i < dev->lk_table.max; i++)
-		RCU_INIT_POINTER(dev->lk_table.table[i], NULL);
-	INIT_LIST_HEAD(&dev->pending_mmaps);
-	spin_lock_init(&dev->pending_lock);
 	seqlock_init(&dev->iowait_lock);
-	dev->mmap_offset = PAGE_SIZE;
-	spin_lock_init(&dev->mmap_offset_lock);
 	INIT_LIST_HEAD(&dev->txwait);
 	INIT_LIST_HEAD(&dev->memwait);
 
-	descq_cnt = sdma_get_descq_cnt();
-
-	snprintf(buf, sizeof(buf), "hfi1_%u_vtxreq_cache", dd->unit);
-	/* SLAB_HWCACHE_ALIGN for AHG */
-	dev->verbs_txreq_cache = kmem_cache_create(buf,
-						   sizeof(struct verbs_txreq),
-						   0, SLAB_HWCACHE_ALIGN,
-						   verbs_txreq_kmem_cache_ctor);
-	if (!dev->verbs_txreq_cache) {
-		ret = -ENOMEM;
+	ret = verbs_txreq_init(dev);
+	if (ret)
 		goto err_verbs_txreq;
-	}
 
 	/*
 	 * The system image GUID is supposed to be the same for all
@@ -1983,142 +1583,119 @@
 	strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz);
 	ibdev->owner = THIS_MODULE;
 	ibdev->node_guid = cpu_to_be64(ppd->guid);
-	ibdev->uverbs_abi_ver = HFI1_UVERBS_ABI_VERSION;
-	ibdev->uverbs_cmd_mask =
-		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
-		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
-		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
-		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
-		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
-		(1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
-		(1ull << IB_USER_VERBS_CMD_MODIFY_AH)           |
-		(1ull << IB_USER_VERBS_CMD_QUERY_AH)            |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_AH)          |
-		(1ull << IB_USER_VERBS_CMD_REG_MR)              |
-		(1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
-		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
-		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
-		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
-		(1ull << IB_USER_VERBS_CMD_POLL_CQ)             |
-		(1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)       |
-		(1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
-		(1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
-		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
-		(1ull << IB_USER_VERBS_CMD_POST_SEND)           |
-		(1ull << IB_USER_VERBS_CMD_POST_RECV)           |
-		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
-		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
-		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
-		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
-		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
-		(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
-	ibdev->node_type = RDMA_NODE_IB_CA;
 	ibdev->phys_port_cnt = dd->num_pports;
-	ibdev->num_comp_vectors = 1;
 	ibdev->dma_device = &dd->pcidev->dev;
-	ibdev->query_device = query_device;
 	ibdev->modify_device = modify_device;
-	ibdev->query_port = query_port;
-	ibdev->modify_port = modify_port;
-	ibdev->query_pkey = query_pkey;
-	ibdev->query_gid = query_gid;
-	ibdev->alloc_ucontext = alloc_ucontext;
-	ibdev->dealloc_ucontext = dealloc_ucontext;
-	ibdev->alloc_pd = alloc_pd;
-	ibdev->dealloc_pd = dealloc_pd;
-	ibdev->create_ah = create_ah;
-	ibdev->destroy_ah = destroy_ah;
-	ibdev->modify_ah = modify_ah;
-	ibdev->query_ah = query_ah;
-	ibdev->create_srq = hfi1_create_srq;
-	ibdev->modify_srq = hfi1_modify_srq;
-	ibdev->query_srq = hfi1_query_srq;
-	ibdev->destroy_srq = hfi1_destroy_srq;
-	ibdev->create_qp = hfi1_create_qp;
-	ibdev->modify_qp = hfi1_modify_qp;
-	ibdev->query_qp = hfi1_query_qp;
-	ibdev->destroy_qp = hfi1_destroy_qp;
-	ibdev->post_send = post_send;
-	ibdev->post_recv = post_receive;
-	ibdev->post_srq_recv = hfi1_post_srq_receive;
-	ibdev->create_cq = hfi1_create_cq;
-	ibdev->destroy_cq = hfi1_destroy_cq;
-	ibdev->resize_cq = hfi1_resize_cq;
-	ibdev->poll_cq = hfi1_poll_cq;
-	ibdev->req_notify_cq = hfi1_req_notify_cq;
-	ibdev->get_dma_mr = hfi1_get_dma_mr;
-	ibdev->reg_user_mr = hfi1_reg_user_mr;
-	ibdev->dereg_mr = hfi1_dereg_mr;
-	ibdev->alloc_mr = hfi1_alloc_mr;
-	ibdev->alloc_fmr = hfi1_alloc_fmr;
-	ibdev->map_phys_fmr = hfi1_map_phys_fmr;
-	ibdev->unmap_fmr = hfi1_unmap_fmr;
-	ibdev->dealloc_fmr = hfi1_dealloc_fmr;
-	ibdev->attach_mcast = hfi1_multicast_attach;
-	ibdev->detach_mcast = hfi1_multicast_detach;
+
+	/* keep process mad in the driver */
 	ibdev->process_mad = hfi1_process_mad;
-	ibdev->mmap = hfi1_mmap;
-	ibdev->dma_ops = &hfi1_dma_mapping_ops;
-	ibdev->get_port_immutable = port_immutable;
 
 	strncpy(ibdev->node_desc, init_utsname()->nodename,
 		sizeof(ibdev->node_desc));
 
-	ret = ib_register_device(ibdev, hfi1_create_port_files);
-	if (ret)
-		goto err_reg;
+	/*
+	 * Fill in rvt info object.
+	 */
+	dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files;
+	dd->verbs_dev.rdi.driver_f.get_card_name = get_card_name;
+	dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev;
+	dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah;
+	dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah;
+	dd->verbs_dev.rdi.driver_f.get_guid_be = hfi1_get_guid_be;
+	dd->verbs_dev.rdi.driver_f.query_port_state = query_port;
+	dd->verbs_dev.rdi.driver_f.shut_down_port = shut_down_port;
+	dd->verbs_dev.rdi.driver_f.cap_mask_chg = hfi1_cap_mask_chg;
+	/*
+	 * Fill in rvt info device attributes.
+	 */
+	hfi1_fill_device_attr(dd);
 
-	ret = hfi1_create_agents(dev);
+	/* queue pair */
+	dd->verbs_dev.rdi.dparms.qp_table_size = hfi1_qp_table_size;
+	dd->verbs_dev.rdi.dparms.qpn_start = 0;
+	dd->verbs_dev.rdi.dparms.qpn_inc = 1;
+	dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift;
+	dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16;
+	dd->verbs_dev.rdi.dparms.qpn_res_end =
+	dd->verbs_dev.rdi.dparms.qpn_res_start + 65535;
+	dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC;
+	dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK;
+	dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT;
+	dd->verbs_dev.rdi.dparms.psn_modify_mask = PSN_MODIFY_MASK;
+	dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_INTEL_OPA;
+	dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE;
+
+	dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc;
+	dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free;
+	dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps;
+	dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset;
+	dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send;
+	dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send;
+	dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send;
+	dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr;
+	dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp;
+	dd->verbs_dev.rdi.driver_f.flush_qp_waiters = flush_qp_waiters;
+	dd->verbs_dev.rdi.driver_f.stop_send_queue = stop_send_queue;
+	dd->verbs_dev.rdi.driver_f.quiesce_qp = quiesce_qp;
+	dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp;
+	dd->verbs_dev.rdi.driver_f.mtu_from_qp = mtu_from_qp;
+	dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu;
+	dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
+	dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
+	dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
+
+	/* completeion queue */
+	snprintf(dd->verbs_dev.rdi.dparms.cq_name,
+		 sizeof(dd->verbs_dev.rdi.dparms.cq_name),
+		 "hfi1_cq%d", dd->unit);
+	dd->verbs_dev.rdi.dparms.node = dd->node;
+
+	/* misc settings */
+	dd->verbs_dev.rdi.flags = 0; /* Let rdmavt handle it all */
+	dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size;
+	dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
+	dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd);
+
+	ppd = dd->pport;
+	for (i = 0; i < dd->num_pports; i++, ppd++)
+		rvt_init_port(&dd->verbs_dev.rdi,
+			      &ppd->ibport_data.rvp,
+			      i,
+			      ppd->pkeys);
+
+	ret = rvt_register_device(&dd->verbs_dev.rdi);
 	if (ret)
-		goto err_agents;
+		goto err_verbs_txreq;
 
 	ret = hfi1_verbs_register_sysfs(dd);
 	if (ret)
 		goto err_class;
 
-	goto bail;
+	return ret;
 
 err_class:
-	hfi1_free_agents(dev);
-err_agents:
-	ib_unregister_device(ibdev);
-err_reg:
+	rvt_unregister_device(&dd->verbs_dev.rdi);
 err_verbs_txreq:
-	kmem_cache_destroy(dev->verbs_txreq_cache);
-	vfree(dev->lk_table.table);
-err_lk:
-	hfi1_qp_exit(dev);
-err_qp_init:
+	verbs_txreq_exit(dev);
 	dd_dev_err(dd, "cannot register verbs: %d!\n", -ret);
-bail:
 	return ret;
 }
 
 void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
 {
 	struct hfi1_ibdev *dev = &dd->verbs_dev;
-	struct ib_device *ibdev = &dev->ibdev;
 
 	hfi1_verbs_unregister_sysfs(dd);
 
-	hfi1_free_agents(dev);
-
-	ib_unregister_device(ibdev);
+	rvt_unregister_device(&dd->verbs_dev.rdi);
 
 	if (!list_empty(&dev->txwait))
 		dd_dev_err(dd, "txwait list not empty!\n");
 	if (!list_empty(&dev->memwait))
 		dd_dev_err(dd, "memwait list not empty!\n");
-	if (dev->dma_mr)
-		dd_dev_err(dd, "DMA MR not NULL!\n");
 
-	hfi1_qp_exit(dev);
 	del_timer_sync(&dev->mem_timer);
-	kmem_cache_destroy(dev->verbs_txreq_cache);
-	vfree(dev->lk_table.table);
+	verbs_txreq_exit(dev);
 }
 
 void hfi1_cnp_rcv(struct hfi1_packet *packet)
@@ -2126,7 +1703,7 @@
 	struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	struct hfi1_ib_header *hdr = packet->hdr;
-	struct hfi1_qp *qp = packet->qp;
+	struct rvt_qp *qp = packet->qp;
 	u32 lqpn, rqpn = 0;
 	u16 rlid = 0;
 	u8 sl, sc5, sc4_bit, svc_type;
@@ -2149,7 +1726,7 @@
 		svc_type = IB_CC_SVCTYPE_UD;
 		break;
 	default:
-		ibp->n_pkt_drops++;
+		ibp->rvp.n_pkt_drops++;
 		return;
 	}
 

diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h
index 286e468..6c4670f 100644
--- a/drivers/staging/rdma/hfi1/verbs.h
+++ b/drivers/staging/rdma/hfi1/verbs.h

@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -59,9 +56,13 @@
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
 #include <linux/completion.h>
+#include <linux/slab.h>
 #include <rdma/ib_pack.h>
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_mad.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_qp.h>
+#include <rdma/rdmavt_cq.h>
 
 struct hfi1_ctxtdata;
 struct hfi1_pportdata;
@@ -79,12 +80,6 @@
  */
 #define HFI1_UVERBS_ABI_VERSION       2
 
-/*
- * Define an ib_cq_notify value that is not valid so we know when CQ
- * notifications are armed.
- */
-#define IB_CQ_NONE      (IB_CQ_NEXT_COMP + 1)
-
 #define IB_SEQ_NAK	(3 << 29)
 
 /* AETH NAK opcode values */
@@ -95,17 +90,6 @@
 #define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
 #define IB_NAK_INVALID_RD_REQUEST       0x64
 
-/* Flags for checking QP state (see ib_hfi1_state_ops[]) */
-#define HFI1_POST_SEND_OK                0x01
-#define HFI1_POST_RECV_OK                0x02
-#define HFI1_PROCESS_RECV_OK             0x04
-#define HFI1_PROCESS_SEND_OK             0x08
-#define HFI1_PROCESS_NEXT_SEND_OK        0x10
-#define HFI1_FLUSH_SEND			0x20
-#define HFI1_FLUSH_RECV			0x40
-#define HFI1_PROCESS_OR_FLUSH_SEND \
-	(HFI1_PROCESS_SEND_OK | HFI1_FLUSH_SEND)
-
 /* IB Performance Manager status values */
 #define IB_PMA_SAMPLE_STATUS_DONE       0x00
 #define IB_PMA_SAMPLE_STATUS_STARTED    0x01
@@ -208,341 +192,18 @@
 } __packed;
 
 /*
- * used for force cacheline alignment for AHG
+ * hfi1 specific data structures that will be hidden from rvt after the queue
+ * pair is made common
  */
-struct tx_pio_header {
-	struct hfi1_pio_header phdr;
-} ____cacheline_aligned;
-
-/*
- * There is one struct hfi1_mcast for each multicast GID.
- * All attached QPs are then stored as a list of
- * struct hfi1_mcast_qp.
- */
-struct hfi1_mcast_qp {
-	struct list_head list;
-	struct hfi1_qp *qp;
-};
-
-struct hfi1_mcast {
-	struct rb_node rb_node;
-	union ib_gid mgid;
-	struct list_head qp_list;
-	wait_queue_head_t wait;
-	atomic_t refcount;
-	int n_attached;
-};
-
-/* Protection domain */
-struct hfi1_pd {
-	struct ib_pd ibpd;
-	int user;               /* non-zero if created from user space */
-};
-
-/* Address Handle */
-struct hfi1_ah {
-	struct ib_ah ibah;
-	struct ib_ah_attr attr;
-	atomic_t refcount;
-};
-
-/*
- * This structure is used by hfi1_mmap() to validate an offset
- * when an mmap() request is made.  The vm_area_struct then uses
- * this as its vm_private_data.
- */
-struct hfi1_mmap_info {
-	struct list_head pending_mmaps;
-	struct ib_ucontext *context;
-	void *obj;
-	__u64 offset;
-	struct kref ref;
-	unsigned size;
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and completion queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- */
-struct hfi1_cq_wc {
-	u32 head;               /* index of next entry to fill */
-	u32 tail;               /* index of next ib_poll_cq() entry */
-	union {
-		/* these are actually size ibcq.cqe + 1 */
-		struct ib_uverbs_wc uqueue[0];
-		struct ib_wc kqueue[0];
-	};
-};
-
-/*
- * The completion queue structure.
- */
-struct hfi1_cq {
-	struct ib_cq ibcq;
-	struct kthread_work comptask;
-	struct hfi1_devdata *dd;
-	spinlock_t lock; /* protect changes in this struct */
-	u8 notify;
-	u8 triggered;
-	struct hfi1_cq_wc *queue;
-	struct hfi1_mmap_info *ip;
-};
-
-/*
- * A segment is a linear region of low physical memory.
- * Used by the verbs layer.
- */
-struct hfi1_seg {
-	void *vaddr;
-	size_t length;
-};
-
-/* The number of hfi1_segs that fit in a page. */
-#define HFI1_SEGSZ     (PAGE_SIZE / sizeof(struct hfi1_seg))
-
-struct hfi1_segarray {
-	struct hfi1_seg segs[HFI1_SEGSZ];
-};
-
-struct hfi1_mregion {
-	struct ib_pd *pd;       /* shares refcnt of ibmr.pd */
-	u64 user_base;          /* User's address for this region */
-	u64 iova;               /* IB start address of this region */
-	size_t length;
-	u32 lkey;
-	u32 offset;             /* offset (bytes) to start of region */
-	int access_flags;
-	u32 max_segs;           /* number of hfi1_segs in all the arrays */
-	u32 mapsz;              /* size of the map array */
-	u8  page_shift;         /* 0 - non unform/non powerof2 sizes */
-	u8  lkey_published;     /* in global table */
-	struct completion comp; /* complete when refcount goes to zero */
-	atomic_t refcount;
-	struct hfi1_segarray *map[0];    /* the segments */
-};
-
-/*
- * These keep track of the copy progress within a memory region.
- * Used by the verbs layer.
- */
-struct hfi1_sge {
-	struct hfi1_mregion *mr;
-	void *vaddr;            /* kernel virtual address of segment */
-	u32 sge_length;         /* length of the SGE */
-	u32 length;             /* remaining length of the segment */
-	u16 m;                  /* current index: mr->map[m] */
-	u16 n;                  /* current index: mr->map[m]->segs[n] */
-};
-
-/* Memory region */
-struct hfi1_mr {
-	struct ib_mr ibmr;
-	struct ib_umem *umem;
-	struct hfi1_mregion mr;  /* must be last */
-};
-
-/*
- * Send work request queue entry.
- * The size of the sg_list is determined when the QP is created and stored
- * in qp->s_max_sge.
- */
-struct hfi1_swqe {
-	union {
-		struct ib_send_wr wr;   /* don't use wr.sg_list */
-		struct ib_rdma_wr rdma_wr;
-		struct ib_atomic_wr atomic_wr;
-		struct ib_ud_wr ud_wr;
-	};
-	u32 psn;                /* first packet sequence number */
-	u32 lpsn;               /* last packet sequence number */
-	u32 ssn;                /* send sequence number */
-	u32 length;             /* total length of data in sg_list */
-	struct hfi1_sge sg_list[0];
-};
-
-/*
- * Receive work request queue entry.
- * The size of the sg_list is determined when the QP (or SRQ) is created
- * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
- */
-struct hfi1_rwqe {
-	u64 wr_id;
-	u8 num_sge;
-	struct ib_sge sg_list[0];
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and receive work queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- * Note that the wq array elements are variable size so you can't
- * just index into the array to get the N'th element;
- * use get_rwqe_ptr() instead.
- */
-struct hfi1_rwq {
-	u32 head;               /* new work requests posted to the head */
-	u32 tail;               /* receives pull requests from here. */
-	struct hfi1_rwqe wq[0];
-};
-
-struct hfi1_rq {
-	struct hfi1_rwq *wq;
-	u32 size;               /* size of RWQE array */
-	u8 max_sge;
-	/* protect changes in this struct */
-	spinlock_t lock ____cacheline_aligned_in_smp;
-};
-
-struct hfi1_srq {
-	struct ib_srq ibsrq;
-	struct hfi1_rq rq;
-	struct hfi1_mmap_info *ip;
-	/* send signal when number of RWQEs < limit */
-	u32 limit;
-};
-
-struct hfi1_sge_state {
-	struct hfi1_sge *sg_list;      /* next SGE to be used if any */
-	struct hfi1_sge sge;   /* progress state for the current SGE */
-	u32 total_len;
-	u8 num_sge;
-};
-
-/*
- * This structure holds the information that the send tasklet needs
- * to send a RDMA read response or atomic operation.
- */
-struct hfi1_ack_entry {
-	u8 opcode;
-	u8 sent;
-	u32 psn;
-	u32 lpsn;
-	union {
-		struct hfi1_sge rdma_sge;
-		u64 atomic_data;
-	};
-};
-
-/*
- * Variables prefixed with s_ are for the requester (sender).
- * Variables prefixed with r_ are for the responder (receiver).
- * Variables prefixed with ack_ are for responder replies.
- *
- * Common variables are protected by both r_rq.lock and s_lock in that order
- * which only happens in modify_qp() or changing the QP 'state'.
- */
-struct hfi1_qp {
-	struct ib_qp ibqp;
-	/* read mostly fields above and below */
-	struct ib_ah_attr remote_ah_attr;
-	struct ib_ah_attr alt_ah_attr;
-	struct hfi1_qp __rcu *next;           /* link list for QPN hash table */
-	struct hfi1_swqe *s_wq;  /* send work queue */
-	struct hfi1_mmap_info *ip;
-	struct ahg_ib_header *s_hdr;     /* next packet header to send */
-	/* sc for UC/RC QPs - based on ah for UD */
-	u8 s_sc;
-	unsigned long timeout_jiffies;  /* computed from timeout */
-
-	enum ib_mtu path_mtu;
-	int srate_mbps;		/* s_srate (below) converted to Mbit/s */
-	u32 remote_qpn;
-	u32 pmtu;		/* decoded from path_mtu */
-	u32 qkey;               /* QKEY for this QP (for UD or RD) */
-	u32 s_size;             /* send work queue size */
-	u32 s_rnr_timeout;      /* number of milliseconds for RNR timeout */
-	u32 s_ahgpsn;           /* set to the psn in the copy of the header */
-
-	u8 state;               /* QP state */
-	u8 allowed_ops;		/* high order bits of allowed opcodes */
-	u8 qp_access_flags;
-	u8 alt_timeout;         /* Alternate path timeout for this QP */
-	u8 timeout;             /* Timeout for this QP */
-	u8 s_srate;
-	u8 s_mig_state;
-	u8 port_num;
-	u8 s_pkey_index;        /* PKEY index to use */
-	u8 s_alt_pkey_index;    /* Alternate path PKEY index to use */
-	u8 r_max_rd_atomic;     /* max number of RDMA read/atomic to receive */
-	u8 s_max_rd_atomic;     /* max number of RDMA read/atomic to send */
-	u8 s_retry_cnt;         /* number of times to retry */
-	u8 s_rnr_retry_cnt;
-	u8 r_min_rnr_timer;     /* retry timeout value for RNR NAKs */
-	u8 s_max_sge;           /* size of s_wq->sg_list */
-	u8 s_draining;
-
-	/* start of read/write fields */
-	atomic_t refcount ____cacheline_aligned_in_smp;
-	wait_queue_head_t wait;
-
-
-	struct hfi1_ack_entry s_ack_queue[HFI1_MAX_RDMA_ATOMIC + 1]
-		____cacheline_aligned_in_smp;
-	struct hfi1_sge_state s_rdma_read_sge;
-
-	spinlock_t r_lock ____cacheline_aligned_in_smp;      /* used for APM */
-	unsigned long r_aflags;
-	u64 r_wr_id;            /* ID for current receive WQE */
-	u32 r_ack_psn;          /* PSN for next ACK or atomic ACK */
-	u32 r_len;              /* total length of r_sge */
-	u32 r_rcv_len;          /* receive data len processed */
-	u32 r_psn;              /* expected rcv packet sequence number */
-	u32 r_msn;              /* message sequence number */
-
-	u8 r_adefered;         /* number of acks defered */
-	u8 r_state;             /* opcode of last packet received */
-	u8 r_flags;
-	u8 r_head_ack_queue;    /* index into s_ack_queue[] */
-
-	struct list_head rspwait;       /* link for waiting to respond */
-
-	struct hfi1_sge_state r_sge;     /* current receive data */
-	struct hfi1_rq r_rq;             /* receive work queue */
-
-	spinlock_t s_lock ____cacheline_aligned_in_smp;
-	struct hfi1_sge_state *s_cur_sge;
-	u32 s_flags;
-	struct hfi1_swqe *s_wqe;
-	struct hfi1_sge_state s_sge;     /* current send request data */
-	struct hfi1_mregion *s_rdma_mr;
-	struct sdma_engine *s_sde; /* current sde */
-	u32 s_cur_size;         /* size of send packet in bytes */
-	u32 s_len;              /* total length of s_sge */
-	u32 s_rdma_read_len;    /* total length of s_rdma_read_sge */
-	u32 s_next_psn;         /* PSN for next request */
-	u32 s_last_psn;         /* last response PSN processed */
-	u32 s_sending_psn;      /* lowest PSN that is being sent */
-	u32 s_sending_hpsn;     /* highest PSN that is being sent */
-	u32 s_psn;              /* current packet sequence number */
-	u32 s_ack_rdma_psn;     /* PSN for sending RDMA read responses */
-	u32 s_ack_psn;          /* PSN for acking sends and RDMA writes */
-	u32 s_head;             /* new entries added here */
-	u32 s_tail;             /* next entry to process */
-	u32 s_cur;              /* current work queue entry */
-	u32 s_acked;            /* last un-ACK'ed entry */
-	u32 s_last;             /* last completed entry */
-	u32 s_ssn;              /* SSN of tail entry */
-	u32 s_lsn;              /* limit sequence number (credit) */
-	u16 s_hdrwords;         /* size of s_hdr in 32 bit words */
-	u16 s_rdma_ack_cnt;
-	s8 s_ahgidx;
-	u8 s_state;             /* opcode of last packet sent */
-	u8 s_ack_state;         /* opcode of packet to ACK */
-	u8 s_nak_state;         /* non-zero if NAK is pending */
-	u8 r_nak_state;         /* non-zero if NAK is pending */
-	u8 s_retry;             /* requester retry counter */
-	u8 s_rnr_retry;         /* requester RNR retry counter */
-	u8 s_num_rd_atomic;     /* number of RDMA read/atomic pending */
-	u8 s_tail_ack_queue;    /* index into s_ack_queue[] */
-
-	struct hfi1_sge_state s_ack_rdma_sge;
-	struct timer_list s_timer;
-
+struct hfi1_qp_priv {
+	struct ahg_ib_header *s_hdr;              /* next header to send */
+	struct sdma_engine *s_sde;                /* current sde */
+	struct send_context *s_sendcontext;       /* current sendcontext */
+	u8 s_sc;		                  /* SC[0..4] for next packet */
+	u8 r_adefered;                            /* number of acks defered */
 	struct iowait s_iowait;
-
-	struct hfi1_sge r_sg_list[0] /* verified SGEs */
-		____cacheline_aligned_in_smp;
+	struct timer_list s_rnr_timer;
+	struct rvt_qp *owner;
 };
 
 /*
@@ -553,123 +214,11 @@
 	struct hfi1_ibdev *dev;
 	struct hfi1_ibport *ibp;
 	struct hfi1_pportdata *ppd;
+	struct verbs_txreq *s_txreq;
 };
 
-/*
- * Atomic bit definitions for r_aflags.
- */
-#define HFI1_R_WRID_VALID        0
-#define HFI1_R_REWIND_SGE        1
-
-/*
- * Bit definitions for r_flags.
- */
-#define HFI1_R_REUSE_SGE       0x01
-#define HFI1_R_RDMAR_SEQ       0x02
-/* defer ack until end of interrupt session */
-#define HFI1_R_RSP_DEFERED_ACK 0x04
-/* relay ack to send engine */
-#define HFI1_R_RSP_SEND        0x08
-#define HFI1_R_COMM_EST        0x10
-
-/*
- * Bit definitions for s_flags.
- *
- * HFI1_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled
- * HFI1_S_BUSY - send tasklet is processing the QP
- * HFI1_S_TIMER - the RC retry timer is active
- * HFI1_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics
- * HFI1_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs
- *                         before processing the next SWQE
- * HFI1_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete
- *                         before processing the next SWQE
- * HFI1_S_WAIT_RNR - waiting for RNR timeout
- * HFI1_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
- * HFI1_S_WAIT_DMA - waiting for send DMA queue to drain before generating
- *                  next send completion entry not via send DMA
- * HFI1_S_WAIT_PIO - waiting for a send buffer to be available
- * HFI1_S_WAIT_TX - waiting for a struct verbs_txreq to be available
- * HFI1_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available
- * HFI1_S_WAIT_KMEM - waiting for kernel memory to be available
- * HFI1_S_WAIT_PSN - waiting for a packet to exit the send DMA queue
- * HFI1_S_WAIT_ACK - waiting for an ACK packet before sending more requests
- * HFI1_S_SEND_ONE - send one packet, request ACK, then wait for ACK
- * HFI1_S_ECN - a BECN was queued to the send engine
- */
-#define HFI1_S_SIGNAL_REQ_WR	0x0001
-#define HFI1_S_BUSY		0x0002
-#define HFI1_S_TIMER		0x0004
-#define HFI1_S_RESP_PENDING	0x0008
-#define HFI1_S_ACK_PENDING	0x0010
-#define HFI1_S_WAIT_FENCE	0x0020
-#define HFI1_S_WAIT_RDMAR	0x0040
-#define HFI1_S_WAIT_RNR		0x0080
-#define HFI1_S_WAIT_SSN_CREDIT	0x0100
-#define HFI1_S_WAIT_DMA		0x0200
-#define HFI1_S_WAIT_PIO		0x0400
-#define HFI1_S_WAIT_TX		0x0800
-#define HFI1_S_WAIT_DMA_DESC	0x1000
-#define HFI1_S_WAIT_KMEM		0x2000
-#define HFI1_S_WAIT_PSN		0x4000
-#define HFI1_S_WAIT_ACK		0x8000
-#define HFI1_S_SEND_ONE		0x10000
-#define HFI1_S_UNLIMITED_CREDIT	0x20000
-#define HFI1_S_AHG_VALID		0x40000
-#define HFI1_S_AHG_CLEAR		0x80000
-#define HFI1_S_ECN		0x100000
-
-/*
- * Wait flags that would prevent any packet type from being sent.
- */
-#define HFI1_S_ANY_WAIT_IO (HFI1_S_WAIT_PIO | HFI1_S_WAIT_TX | \
-	HFI1_S_WAIT_DMA_DESC | HFI1_S_WAIT_KMEM)
-
-/*
- * Wait flags that would prevent send work requests from making progress.
- */
-#define HFI1_S_ANY_WAIT_SEND (HFI1_S_WAIT_FENCE | HFI1_S_WAIT_RDMAR | \
-	HFI1_S_WAIT_RNR | HFI1_S_WAIT_SSN_CREDIT | HFI1_S_WAIT_DMA | \
-	HFI1_S_WAIT_PSN | HFI1_S_WAIT_ACK)
-
-#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | HFI1_S_ANY_WAIT_SEND)
-
 #define HFI1_PSN_CREDIT  16
 
-/*
- * Since struct hfi1_swqe is not a fixed size, we can't simply index into
- * struct hfi1_qp.s_wq.  This function does the array index computation.
- */
-static inline struct hfi1_swqe *get_swqe_ptr(struct hfi1_qp *qp,
-					     unsigned n)
-{
-	return (struct hfi1_swqe *)((char *)qp->s_wq +
-				     (sizeof(struct hfi1_swqe) +
-				      qp->s_max_sge *
-				      sizeof(struct hfi1_sge)) * n);
-}
-
-/*
- * Since struct hfi1_rwqe is not a fixed size, we can't simply index into
- * struct hfi1_rwq.wq.  This function does the array index computation.
- */
-static inline struct hfi1_rwqe *get_rwqe_ptr(struct hfi1_rq *rq, unsigned n)
-{
-	return (struct hfi1_rwqe *)
-		((char *) rq->wq->wq +
-		 (sizeof(struct hfi1_rwqe) +
-		  rq->max_sge * sizeof(struct ib_sge)) * n);
-}
-
-#define MAX_LKEY_TABLE_BITS 23
-
-struct hfi1_lkey_table {
-	spinlock_t lock; /* protect changes in this struct */
-	u32 next;               /* next unused index (speeds search) */
-	u32 gen;                /* generation count */
-	u32 max;                /* size of the table */
-	struct hfi1_mregion __rcu **table;
-};
-
 struct hfi1_opcode_stats {
 	u64 n_packets;          /* number of packets */
 	u64 n_bytes;            /* total number of bytes */
@@ -690,75 +239,20 @@
 }
 
 struct hfi1_ibport {
-	struct hfi1_qp __rcu *qp[2];
-	struct ib_mad_agent *send_agent;	/* agent for SMI (traps) */
-	struct hfi1_ah *sm_ah;
-	struct hfi1_ah *smi_ah;
-	struct rb_root mcast_tree;
-	spinlock_t lock;		/* protect changes in this struct */
+	struct rvt_qp __rcu *qp[2];
+	struct rvt_ibport rvp;
 
-	/* non-zero when timer is set */
-	unsigned long mkey_lease_timeout;
-	unsigned long trap_timeout;
-	__be64 gid_prefix;      /* in network order */
-	__be64 mkey;
 	__be64 guids[HFI1_GUIDS_PER_PORT	- 1];	/* writable GUIDs */
-	u64 tid;		/* TID for traps */
-	u64 n_rc_resends;
-	u64 n_seq_naks;
-	u64 n_rdma_seq;
-	u64 n_rnr_naks;
-	u64 n_other_naks;
-	u64 n_loop_pkts;
-	u64 n_pkt_drops;
-	u64 n_vl15_dropped;
-	u64 n_rc_timeouts;
-	u64 n_dmawait;
-	u64 n_unaligned;
-	u64 n_rc_dupreq;
-	u64 n_rc_seqnak;
 
-	/* Hot-path per CPU counters to avoid cacheline trading to update */
-	u64 z_rc_acks;
-	u64 z_rc_qacks;
-	u64 z_rc_delayed_comp;
-	u64 __percpu *rc_acks;
-	u64 __percpu *rc_qacks;
-	u64 __percpu *rc_delayed_comp;
-
-	u32 port_cap_flags;
-	u32 pma_sample_start;
-	u32 pma_sample_interval;
-	__be16 pma_counter_select[5];
-	u16 pma_tag;
-	u16 pkey_violations;
-	u16 qkey_violations;
-	u16 mkey_violations;
-	u16 mkey_lease_period;
-	u16 sm_lid;
-	u16 repress_traps;
-	u8 sm_sl;
-	u8 mkeyprot;
-	u8 subnet_timeout;
-	u8 vl_high_limit;
 	/* the first 16 entries are sl_to_vl for !OPA */
 	u8 sl_to_sc[32];
 	u8 sc_to_sl[32];
 };
 
-
-struct hfi1_qp_ibdev;
 struct hfi1_ibdev {
-	struct ib_device ibdev;
-	struct list_head pending_mmaps;
-	spinlock_t mmap_offset_lock; /* protect mmap_offset */
-	u32 mmap_offset;
-	struct hfi1_mregion __rcu *dma_mr;
-
-	struct hfi1_qp_ibdev *qp_dev;
+	struct rvt_dev_info rdi; /* Must be first */
 
 	/* QP numbers are shared by all IB ports */
-	struct hfi1_lkey_table lk_table;
 	/* protect wait lists */
 	seqlock_t iowait_lock;
 	struct list_head txwait;        /* list for wait verbs_txreq */
@@ -767,26 +261,11 @@
 	struct kmem_cache *verbs_txreq_cache;
 	struct timer_list mem_timer;
 
-	/* other waiters */
-	spinlock_t pending_lock;
-
 	u64 n_piowait;
+	u64 n_piodrain;
 	u64 n_txwait;
 	u64 n_kmem_wait;
-	u64 n_send_schedule;
 
-	u32 n_pds_allocated;    /* number of PDs allocated for device */
-	spinlock_t n_pds_lock;
-	u32 n_ahs_allocated;    /* number of AHs allocated for device */
-	spinlock_t n_ahs_lock;
-	u32 n_cqs_allocated;    /* number of CQs allocated for device */
-	spinlock_t n_cqs_lock;
-	u32 n_qps_allocated;    /* number of QPs allocated for device */
-	spinlock_t n_qps_lock;
-	u32 n_srqs_allocated;   /* number of SRQs allocated for device */
-	spinlock_t n_srqs_lock;
-	u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
-	spinlock_t n_mcast_grps_lock;
 #ifdef CONFIG_DEBUG_FS
 	/* per HFI debugfs */
 	struct dentry *hfi1_ibdev_dbg;
@@ -795,66 +274,31 @@
 #endif
 };
 
-struct hfi1_verbs_counters {
-	u64 symbol_error_counter;
-	u64 link_error_recovery_counter;
-	u64 link_downed_counter;
-	u64 port_rcv_errors;
-	u64 port_rcv_remphys_errors;
-	u64 port_xmit_discards;
-	u64 port_xmit_data;
-	u64 port_rcv_data;
-	u64 port_xmit_packets;
-	u64 port_rcv_packets;
-	u32 local_link_integrity_errors;
-	u32 excessive_buffer_overrun_errors;
-	u32 vl15_dropped;
-};
-
-static inline struct hfi1_mr *to_imr(struct ib_mr *ibmr)
-{
-	return container_of(ibmr, struct hfi1_mr, ibmr);
-}
-
-static inline struct hfi1_pd *to_ipd(struct ib_pd *ibpd)
-{
-	return container_of(ibpd, struct hfi1_pd, ibpd);
-}
-
-static inline struct hfi1_ah *to_iah(struct ib_ah *ibah)
-{
-	return container_of(ibah, struct hfi1_ah, ibah);
-}
-
-static inline struct hfi1_cq *to_icq(struct ib_cq *ibcq)
-{
-	return container_of(ibcq, struct hfi1_cq, ibcq);
-}
-
-static inline struct hfi1_srq *to_isrq(struct ib_srq *ibsrq)
-{
-	return container_of(ibsrq, struct hfi1_srq, ibsrq);
-}
-
-static inline struct hfi1_qp *to_iqp(struct ib_qp *ibqp)
-{
-	return container_of(ibqp, struct hfi1_qp, ibqp);
-}
-
 static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev)
 {
-	return container_of(ibdev, struct hfi1_ibdev, ibdev);
+	struct rvt_dev_info *rdi;
+
+	rdi = container_of(ibdev, struct rvt_dev_info, ibdev);
+	return container_of(rdi, struct hfi1_ibdev, rdi);
+}
+
+static inline struct rvt_qp *iowait_to_qp(struct  iowait *s_iowait)
+{
+	struct hfi1_qp_priv *priv;
+
+	priv = container_of(s_iowait, struct hfi1_qp_priv, s_iowait);
+	return priv->owner;
 }
 
 /*
  * Send if not busy or waiting for I/O and either
  * a RC response is pending or we can process send work requests.
  */
-static inline int hfi1_send_ok(struct hfi1_qp *qp)
+static inline int hfi1_send_ok(struct rvt_qp *qp)
 {
-	return !(qp->s_flags & (HFI1_S_BUSY | HFI1_S_ANY_WAIT_IO)) &&
-		(qp->s_hdrwords || (qp->s_flags & HFI1_S_RESP_PENDING) ||
-		 !(qp->s_flags & HFI1_S_ANY_WAIT_SEND));
+	return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) &&
+		(qp->s_hdrwords || (qp->s_flags & RVT_S_RESP_PENDING) ||
+		 !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
 }
 
 /*
@@ -862,7 +306,7 @@
  */
 void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
 		    u32 qp1, u32 qp2, u16 lid1, u16 lid2);
-void hfi1_cap_mask_chg(struct hfi1_ibport *ibp);
+void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num);
 void hfi1_sys_guid_chg(struct hfi1_ibport *ibp);
 void hfi1_node_desc_chg(struct hfi1_ibport *ibp);
 int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
@@ -870,8 +314,6 @@
 		     const struct ib_mad_hdr *in_mad, size_t in_mad_size,
 		     struct ib_mad_hdr *out_mad, size_t *out_mad_size,
 		     u16 *out_mad_pkey_index);
-int hfi1_create_agents(struct hfi1_ibdev *dev);
-void hfi1_free_agents(struct hfi1_ibdev *dev);
 
 /*
  * The PSN_MASK and PSN_SHIFT allow for
@@ -901,7 +343,7 @@
  */
 static inline int cmp_msn(u32 a, u32 b)
 {
-	return (((int) a) - ((int) b)) << 8;
+	return (((int)a) - ((int)b)) << 8;
 }
 
 /*
@@ -910,7 +352,7 @@
  */
 static inline int cmp_psn(u32 a, u32 b)
 {
-	return (((int) a) - ((int) b)) << PSN_SHIFT;
+	return (((int)a) - ((int)b)) << PSN_SHIFT;
 }
 
 /*
@@ -929,23 +371,15 @@
 	return (((int)a - (int)b) << PSN_SHIFT) >> PSN_SHIFT;
 }
 
-struct hfi1_mcast *hfi1_mcast_find(struct hfi1_ibport *ibp, union ib_gid *mgid);
-
-int hfi1_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int hfi1_mcast_tree_empty(struct hfi1_ibport *ibp);
-
 struct verbs_txreq;
 void hfi1_put_txreq(struct verbs_txreq *tx);
 
-int hfi1_verbs_send(struct hfi1_qp *qp, struct hfi1_pkt_state *ps);
+int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
 
-void hfi1_copy_sge(struct hfi1_sge_state *ss, void *data, u32 length,
-		   int release);
+void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
+		   int release, int copy_last);
 
-void hfi1_skip_sge(struct hfi1_sge_state *ss, u32 length, int release);
+void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release);
 
 void hfi1_cnp_rcv(struct hfi1_packet *packet);
 
@@ -957,147 +391,75 @@
 	struct hfi1_ctxtdata *rcd,
 	struct hfi1_ib_header *hdr,
 	u32 rcv_flags,
-	struct hfi1_qp *qp);
+	struct rvt_qp *qp);
 
 u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
 
-int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
-
 struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid);
 
 void hfi1_rc_rnr_retry(unsigned long arg);
+void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to);
+void hfi1_rc_timeout(unsigned long arg);
+void hfi1_del_timers_sync(struct rvt_qp *qp);
+void hfi1_stop_rc_timers(struct rvt_qp *qp);
 
-void hfi1_rc_send_complete(struct hfi1_qp *qp, struct hfi1_ib_header *hdr);
+void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr);
 
-void hfi1_rc_error(struct hfi1_qp *qp, enum ib_wc_status err);
+void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
 
 void hfi1_ud_rcv(struct hfi1_packet *packet);
 
 int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey);
 
-int hfi1_alloc_lkey(struct hfi1_mregion *mr, int dma_region);
+int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only);
 
-void hfi1_free_lkey(struct hfi1_mregion *mr);
+void hfi1_migrate_qp(struct rvt_qp *qp);
 
-int hfi1_lkey_ok(struct hfi1_lkey_table *rkt, struct hfi1_pd *pd,
-		 struct hfi1_sge *isge, struct ib_sge *sge, int acc);
+int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
+			 int attr_mask, struct ib_udata *udata);
 
-int hfi1_rkey_ok(struct hfi1_qp *qp, struct hfi1_sge *sge,
-		 u32 len, u64 vaddr, u32 rkey, int acc);
+void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
+		    int attr_mask, struct ib_udata *udata);
 
-int hfi1_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			  struct ib_recv_wr **bad_wr);
+int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
 
-struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd,
-			       struct ib_srq_init_attr *srq_init_attr,
-			       struct ib_udata *udata);
+extern const u32 rc_only_opcode;
+extern const u32 uc_only_opcode;
 
-int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		    enum ib_srq_attr_mask attr_mask,
-		    struct ib_udata *udata);
-
-int hfi1_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
-
-int hfi1_destroy_srq(struct ib_srq *ibsrq);
-
-int hfi1_cq_init(struct hfi1_devdata *dd);
-
-void hfi1_cq_exit(struct hfi1_devdata *dd);
-
-void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int sig);
-
-int hfi1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-
-struct ib_cq *hfi1_create_cq(
-	struct ib_device *ibdev,
-	const struct ib_cq_init_attr *attr,
-	struct ib_ucontext *context,
-	struct ib_udata *udata);
-
-int hfi1_destroy_cq(struct ib_cq *ibcq);
-
-int hfi1_req_notify_cq(
-	struct ib_cq *ibcq,
-	enum ib_cq_notify_flags notify_flags);
-
-int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
-
-struct ib_mr *hfi1_get_dma_mr(struct ib_pd *pd, int acc);
-
-struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-			       u64 virt_addr, int mr_access_flags,
-			       struct ib_udata *udata);
-
-int hfi1_dereg_mr(struct ib_mr *ibmr);
-
-struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd,
-			    enum ib_mr_type mr_type,
-			    u32 max_entries);
-
-struct ib_fmr *hfi1_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-			      struct ib_fmr_attr *fmr_attr);
-
-int hfi1_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
-		      int list_len, u64 iova);
-
-int hfi1_unmap_fmr(struct list_head *fmr_list);
-
-int hfi1_dealloc_fmr(struct ib_fmr *ibfmr);
-
-static inline void hfi1_get_mr(struct hfi1_mregion *mr)
+static inline u8 get_opcode(struct hfi1_ib_header *h)
 {
-	atomic_inc(&mr->refcount);
+	u16 lnh = be16_to_cpu(h->lrh[0]) & 3;
+
+	if (lnh == IB_LNH_IBA_LOCAL)
+		return be32_to_cpu(h->u.oth.bth[0]) >> 24;
+	else
+		return be32_to_cpu(h->u.l.oth.bth[0]) >> 24;
 }
 
-static inline void hfi1_put_mr(struct hfi1_mregion *mr)
-{
-	if (unlikely(atomic_dec_and_test(&mr->refcount)))
-		complete(&mr->comp);
-}
-
-static inline void hfi1_put_ss(struct hfi1_sge_state *ss)
-{
-	while (ss->num_sge) {
-		hfi1_put_mr(ss->sge.mr);
-		if (--ss->num_sge)
-			ss->sge = *ss->sg_list++;
-	}
-}
-
-void hfi1_release_mmap_info(struct kref *ref);
-
-struct hfi1_mmap_info *hfi1_create_mmap_info(struct hfi1_ibdev *dev, u32 size,
-					     struct ib_ucontext *context,
-					     void *obj);
-
-void hfi1_update_mmap_info(struct hfi1_ibdev *dev, struct hfi1_mmap_info *ip,
-			   u32 size, void *obj);
-
-int hfi1_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
-
-int hfi1_get_rwqe(struct hfi1_qp *qp, int wr_id_only);
-
 int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
-		       int has_grh, struct hfi1_qp *qp, u32 bth0);
+		       int has_grh, struct rvt_qp *qp, u32 bth0);
 
 u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
 		  struct ib_global_route *grh, u32 hwords, u32 nwords);
 
-void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
-			  u32 bth0, u32 bth2, int middle);
+void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr,
+			  u32 bth0, u32 bth2, int middle,
+			  struct hfi1_pkt_state *ps);
 
-void hfi1_do_send(struct work_struct *work);
+void _hfi1_do_send(struct work_struct *work);
 
-void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe,
+void hfi1_do_send(struct rvt_qp *qp);
+
+void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
 			enum ib_wc_status status);
 
-void hfi1_send_rc_ack(struct hfi1_ctxtdata *, struct hfi1_qp *qp, int is_fecn);
+void hfi1_send_rc_ack(struct hfi1_ctxtdata *, struct rvt_qp *qp, int is_fecn);
 
-int hfi1_make_rc_req(struct hfi1_qp *qp);
+int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
 
-int hfi1_make_uc_req(struct hfi1_qp *qp);
+int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
 
-int hfi1_make_ud_req(struct hfi1_qp *qp);
+int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
 
 int hfi1_register_ib_device(struct hfi1_devdata *);
 
@@ -1107,24 +469,42 @@
 
 unsigned hfi1_get_npkeys(struct hfi1_devdata *);
 
-int hfi1_verbs_send_dma(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			u64 pbc);
 
-int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			u64 pbc);
 
-struct send_context *qp_to_send_context(struct hfi1_qp *qp, u8 sc5);
+int hfi1_wss_init(void);
+void hfi1_wss_exit(void);
+
+/* platform specific: return the lowest level cache (llc) size, in KiB */
+static inline int wss_llc_size(void)
+{
+	/* assume that the boot CPU value is universal for all CPUs */
+	return boot_cpu_data.x86_cache_size;
+}
+
+/* platform specific: cacheless copy */
+static inline void cacheless_memcpy(void *dst, void *src, size_t n)
+{
+	/*
+	 * Use the only available X64 cacheless copy.  Add a __user cast
+	 * to quiet sparse.  The src agument is already in the kernel so
+	 * there are no security issues.  The extra fault recovery machinery
+	 * is not invoked.
+	 */
+	__copy_user_nocache(dst, (void __user *)src, n, 0);
+}
 
 extern const enum ib_wc_opcode ib_hfi1_wc_opcode[];
 
 extern const u8 hdr_len_by_opcode[];
 
-extern const int ib_hfi1_state_ops[];
+extern const int ib_rvt_state_ops[];
 
 extern __be64 ib_hfi1_sys_image_guid;    /* in network order */
 
-extern unsigned int hfi1_lkey_table_size;
-
 extern unsigned int hfi1_max_cqes;
 
 extern unsigned int hfi1_max_cqs;
@@ -1145,8 +525,8 @@
 
 extern unsigned int hfi1_max_srq_wrs;
 
-extern const u32 ib_hfi1_rnr_table[];
+extern unsigned short piothreshold;
 
-extern struct ib_dma_mapping_ops hfi1_dma_mapping_ops;
+extern const u32 ib_hfi1_rnr_table[];
 
 #endif                          /* HFI1_VERBS_H */

diff --git a/drivers/staging/rdma/hfi1/verbs_mcast.c b/drivers/staging/rdma/hfi1/verbs_mcast.c
deleted file mode 100644
index afc6b4c..0000000
--- a/drivers/staging/rdma/hfi1/verbs_mcast.c
+++ /dev/null

@@ -1,385 +0,0 @@
-/*
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/rculist.h>
-
-#include "hfi.h"
-
-/**
- * mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
- * @qp: the QP to link
- */
-static struct hfi1_mcast_qp *mcast_qp_alloc(struct hfi1_qp *qp)
-{
-	struct hfi1_mcast_qp *mqp;
-
-	mqp = kmalloc(sizeof(*mqp), GFP_KERNEL);
-	if (!mqp)
-		goto bail;
-
-	mqp->qp = qp;
-	atomic_inc(&qp->refcount);
-
-bail:
-	return mqp;
-}
-
-static void mcast_qp_free(struct hfi1_mcast_qp *mqp)
-{
-	struct hfi1_qp *qp = mqp->qp;
-
-	/* Notify hfi1_destroy_qp() if it is waiting. */
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
-
-	kfree(mqp);
-}
-
-/**
- * mcast_alloc - allocate the multicast GID structure
- * @mgid: the multicast GID
- *
- * A list of QPs will be attached to this structure.
- */
-static struct hfi1_mcast *mcast_alloc(union ib_gid *mgid)
-{
-	struct hfi1_mcast *mcast;
-
-	mcast = kmalloc(sizeof(*mcast), GFP_KERNEL);
-	if (!mcast)
-		goto bail;
-
-	mcast->mgid = *mgid;
-	INIT_LIST_HEAD(&mcast->qp_list);
-	init_waitqueue_head(&mcast->wait);
-	atomic_set(&mcast->refcount, 0);
-	mcast->n_attached = 0;
-
-bail:
-	return mcast;
-}
-
-static void mcast_free(struct hfi1_mcast *mcast)
-{
-	struct hfi1_mcast_qp *p, *tmp;
-
-	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
-		mcast_qp_free(p);
-
-	kfree(mcast);
-}
-
-/**
- * hfi1_mcast_find - search the global table for the given multicast GID
- * @ibp: the IB port structure
- * @mgid: the multicast GID to search for
- *
- * Returns NULL if not found.
- *
- * The caller is responsible for decrementing the reference count if found.
- */
-struct hfi1_mcast *hfi1_mcast_find(struct hfi1_ibport *ibp, union ib_gid *mgid)
-{
-	struct rb_node *n;
-	unsigned long flags;
-	struct hfi1_mcast *mcast;
-
-	spin_lock_irqsave(&ibp->lock, flags);
-	n = ibp->mcast_tree.rb_node;
-	while (n) {
-		int ret;
-
-		mcast = rb_entry(n, struct hfi1_mcast, rb_node);
-
-		ret = memcmp(mgid->raw, mcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0)
-			n = n->rb_left;
-		else if (ret > 0)
-			n = n->rb_right;
-		else {
-			atomic_inc(&mcast->refcount);
-			spin_unlock_irqrestore(&ibp->lock, flags);
-			goto bail;
-		}
-	}
-	spin_unlock_irqrestore(&ibp->lock, flags);
-
-	mcast = NULL;
-
-bail:
-	return mcast;
-}
-
-/**
- * mcast_add - insert mcast GID into table and attach QP struct
- * @mcast: the mcast GID table
- * @mqp: the QP to attach
- *
- * Return zero if both were added.  Return EEXIST if the GID was already in
- * the table but the QP was added.  Return ESRCH if the QP was already
- * attached and neither structure was added.
- */
-static int mcast_add(struct hfi1_ibdev *dev, struct hfi1_ibport *ibp,
-		     struct hfi1_mcast *mcast, struct hfi1_mcast_qp *mqp)
-{
-	struct rb_node **n = &ibp->mcast_tree.rb_node;
-	struct rb_node *pn = NULL;
-	int ret;
-
-	spin_lock_irq(&ibp->lock);
-
-	while (*n) {
-		struct hfi1_mcast *tmcast;
-		struct hfi1_mcast_qp *p;
-
-		pn = *n;
-		tmcast = rb_entry(pn, struct hfi1_mcast, rb_node);
-
-		ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0) {
-			n = &pn->rb_left;
-			continue;
-		}
-		if (ret > 0) {
-			n = &pn->rb_right;
-			continue;
-		}
-
-		/* Search the QP list to see if this is already there. */
-		list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
-			if (p->qp == mqp->qp) {
-				ret = ESRCH;
-				goto bail;
-			}
-		}
-		if (tmcast->n_attached == hfi1_max_mcast_qp_attached) {
-			ret = ENOMEM;
-			goto bail;
-		}
-
-		tmcast->n_attached++;
-
-		list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
-		ret = EEXIST;
-		goto bail;
-	}
-
-	spin_lock(&dev->n_mcast_grps_lock);
-	if (dev->n_mcast_grps_allocated == hfi1_max_mcast_grps) {
-		spin_unlock(&dev->n_mcast_grps_lock);
-		ret = ENOMEM;
-		goto bail;
-	}
-
-	dev->n_mcast_grps_allocated++;
-	spin_unlock(&dev->n_mcast_grps_lock);
-
-	mcast->n_attached++;
-
-	list_add_tail_rcu(&mqp->list, &mcast->qp_list);
-
-	atomic_inc(&mcast->refcount);
-	rb_link_node(&mcast->rb_node, pn, n);
-	rb_insert_color(&mcast->rb_node, &ibp->mcast_tree);
-
-	ret = 0;
-
-bail:
-	spin_unlock_irq(&ibp->lock);
-
-	return ret;
-}
-
-int hfi1_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	struct hfi1_qp *qp = to_iqp(ibqp);
-	struct hfi1_ibdev *dev = to_idev(ibqp->device);
-	struct hfi1_ibport *ibp;
-	struct hfi1_mcast *mcast;
-	struct hfi1_mcast_qp *mqp;
-	int ret;
-
-	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/*
-	 * Allocate data structures since its better to do this outside of
-	 * spin locks and it will most likely be needed.
-	 */
-	mcast = mcast_alloc(gid);
-	if (mcast == NULL) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-	mqp = mcast_qp_alloc(qp);
-	if (mqp == NULL) {
-		mcast_free(mcast);
-		ret = -ENOMEM;
-		goto bail;
-	}
-	ibp = to_iport(ibqp->device, qp->port_num);
-	switch (mcast_add(dev, ibp, mcast, mqp)) {
-	case ESRCH:
-		/* Neither was used: OK to attach the same QP twice. */
-		mcast_qp_free(mqp);
-		mcast_free(mcast);
-		break;
-
-	case EEXIST:            /* The mcast wasn't used */
-		mcast_free(mcast);
-		break;
-
-	case ENOMEM:
-		/* Exceeded the maximum number of mcast groups. */
-		mcast_qp_free(mqp);
-		mcast_free(mcast);
-		ret = -ENOMEM;
-		goto bail;
-
-	default:
-		break;
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	struct hfi1_qp *qp = to_iqp(ibqp);
-	struct hfi1_ibdev *dev = to_idev(ibqp->device);
-	struct hfi1_ibport *ibp = to_iport(ibqp->device, qp->port_num);
-	struct hfi1_mcast *mcast = NULL;
-	struct hfi1_mcast_qp *p, *tmp;
-	struct rb_node *n;
-	int last = 0;
-	int ret;
-
-	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	spin_lock_irq(&ibp->lock);
-
-	/* Find the GID in the mcast table. */
-	n = ibp->mcast_tree.rb_node;
-	while (1) {
-		if (n == NULL) {
-			spin_unlock_irq(&ibp->lock);
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		mcast = rb_entry(n, struct hfi1_mcast, rb_node);
-		ret = memcmp(gid->raw, mcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0)
-			n = n->rb_left;
-		else if (ret > 0)
-			n = n->rb_right;
-		else
-			break;
-	}
-
-	/* Search the QP list. */
-	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
-		if (p->qp != qp)
-			continue;
-		/*
-		 * We found it, so remove it, but don't poison the forward
-		 * link until we are sure there are no list walkers.
-		 */
-		list_del_rcu(&p->list);
-		mcast->n_attached--;
-
-		/* If this was the last attached QP, remove the GID too. */
-		if (list_empty(&mcast->qp_list)) {
-			rb_erase(&mcast->rb_node, &ibp->mcast_tree);
-			last = 1;
-		}
-		break;
-	}
-
-	spin_unlock_irq(&ibp->lock);
-
-	if (p) {
-		/*
-		 * Wait for any list walkers to finish before freeing the
-		 * list element.
-		 */
-		wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
-		mcast_qp_free(p);
-	}
-	if (last) {
-		atomic_dec(&mcast->refcount);
-		wait_event(mcast->wait, !atomic_read(&mcast->refcount));
-		mcast_free(mcast);
-		spin_lock_irq(&dev->n_mcast_grps_lock);
-		dev->n_mcast_grps_allocated--;
-		spin_unlock_irq(&dev->n_mcast_grps_lock);
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-int hfi1_mcast_tree_empty(struct hfi1_ibport *ibp)
-{
-	return ibp->mcast_tree.rb_node == NULL;
-}

diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.c b/drivers/staging/rdma/hfi1/verbs_txreq.c
new file mode 100644
index 0000000..bc95c41
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/verbs_txreq.c

@@ -0,0 +1,149 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "hfi.h"
+#include "verbs_txreq.h"
+#include "qp.h"
+#include "trace.h"
+
+#define TXREQ_LEN 24
+
+void hfi1_put_txreq(struct verbs_txreq *tx)
+{
+	struct hfi1_ibdev *dev;
+	struct rvt_qp *qp;
+	unsigned long flags;
+	unsigned int seq;
+	struct hfi1_qp_priv *priv;
+
+	qp = tx->qp;
+	dev = to_idev(qp->ibqp.device);
+
+	if (tx->mr)
+		rvt_put_mr(tx->mr);
+
+	sdma_txclean(dd_from_dev(dev), &tx->txreq);
+
+	/* Free verbs_txreq and return to slab cache */
+	kmem_cache_free(dev->verbs_txreq_cache, tx);
+
+	do {
+		seq = read_seqbegin(&dev->iowait_lock);
+		if (!list_empty(&dev->txwait)) {
+			struct iowait *wait;
+
+			write_seqlock_irqsave(&dev->iowait_lock, flags);
+			wait = list_first_entry(&dev->txwait, struct iowait,
+						list);
+			qp = iowait_to_qp(wait);
+			priv = qp->priv;
+			list_del_init(&priv->s_iowait.list);
+			/* refcount held until actual wake up */
+			write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+			hfi1_qp_wakeup(qp, RVT_S_WAIT_TX);
+			break;
+		}
+	} while (read_seqretry(&dev->iowait_lock, seq));
+}
+
+struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
+				struct rvt_qp *qp)
+{
+	struct verbs_txreq *tx = ERR_PTR(-EBUSY);
+	unsigned long flags;
+
+	spin_lock_irqsave(&qp->s_lock, flags);
+	write_seqlock(&dev->iowait_lock);
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
+		struct hfi1_qp_priv *priv;
+
+		tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC);
+		if (tx)
+			goto out;
+		priv = qp->priv;
+		if (list_empty(&priv->s_iowait.list)) {
+			dev->n_txwait++;
+			qp->s_flags |= RVT_S_WAIT_TX;
+			list_add_tail(&priv->s_iowait.list, &dev->txwait);
+			trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX);
+			atomic_inc(&qp->refcount);
+		}
+		qp->s_flags &= ~RVT_S_BUSY;
+	}
+out:
+	write_sequnlock(&dev->iowait_lock);
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+	return tx;
+}
+
+static void verbs_txreq_kmem_cache_ctor(void *obj)
+{
+	struct verbs_txreq *tx = (struct verbs_txreq *)obj;
+
+	memset(tx, 0, sizeof(*tx));
+}
+
+int verbs_txreq_init(struct hfi1_ibdev *dev)
+{
+	char buf[TXREQ_LEN];
+	struct hfi1_devdata *dd = dd_from_dev(dev);
+
+	snprintf(buf, sizeof(buf), "hfi1_%u_vtxreq_cache", dd->unit);
+	dev->verbs_txreq_cache = kmem_cache_create(buf,
+						   sizeof(struct verbs_txreq),
+						   0, SLAB_HWCACHE_ALIGN,
+						   verbs_txreq_kmem_cache_ctor);
+	if (!dev->verbs_txreq_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void verbs_txreq_exit(struct hfi1_ibdev *dev)
+{
+	kmem_cache_destroy(dev->verbs_txreq_cache);
+	dev->verbs_txreq_cache = NULL;
+}

diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.h b/drivers/staging/rdma/hfi1/verbs_txreq.h
new file mode 100644
index 0000000..1cf69b2
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/verbs_txreq.h

@@ -0,0 +1,116 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef HFI1_VERBS_TXREQ_H
+#define HFI1_VERBS_TXREQ_H
+
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#include "verbs.h"
+#include "sdma_txreq.h"
+#include "iowait.h"
+
+struct verbs_txreq {
+	struct hfi1_pio_header	phdr;
+	struct sdma_txreq       txreq;
+	struct rvt_qp           *qp;
+	struct rvt_swqe         *wqe;
+	struct rvt_mregion	*mr;
+	struct rvt_sge_state    *ss;
+	struct sdma_engine     *sde;
+	struct send_context     *psc;
+	u16                     hdr_dwords;
+};
+
+struct hfi1_ibdev;
+struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
+				struct rvt_qp *qp);
+
+static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
+					    struct rvt_qp *qp)
+{
+	struct verbs_txreq *tx;
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC);
+	if (unlikely(!tx)) {
+		/* call slow path to get the lock */
+		tx = __get_txreq(dev, qp);
+		if (IS_ERR(tx))
+			return tx;
+	}
+	tx->qp = qp;
+	tx->mr = NULL;
+	tx->sde = priv->s_sde;
+	tx->psc = priv->s_sendcontext;
+	/* so that we can test if the sdma decriptors are there */
+	tx->txreq.num_desc = 0;
+	return tx;
+}
+
+static inline struct sdma_txreq *get_sdma_txreq(struct verbs_txreq *tx)
+{
+	return &tx->txreq;
+}
+
+static inline struct verbs_txreq *get_waiting_verbs_txreq(struct rvt_qp *qp)
+{
+	struct sdma_txreq *stx;
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	stx = iowait_get_txhead(&priv->s_iowait);
+	if (stx)
+		return container_of(stx, struct verbs_txreq, txreq);
+	return NULL;
+}
+
+void hfi1_put_txreq(struct verbs_txreq *tx);
+int verbs_txreq_init(struct hfi1_ibdev *dev);
+void verbs_txreq_exit(struct hfi1_ibdev *dev);
+
+#endif                         /* HFI1_VERBS_TXREQ_H */

diff --git a/drivers/staging/rtl8712/TODO b/drivers/staging/rtl8712/TODO
index d8dfe5b..847c8c4 100644
--- a/drivers/staging/rtl8712/TODO
+++ b/drivers/staging/rtl8712/TODO

@@ -4,10 +4,10 @@
 - switch to use MAC80211
 - checkpatch.pl fixes - only a few remain
 
-Please send any patches to Greg Kroah-Hartman <greg@kroah.com>,
-Larry Finger <Larry.Finger@lwfinger.net> and
-Florian Schilhabel <florian.c.schilhabel@googlemail.com>.
+A replacement for this driver with MAC80211 support is available
+at https://github.com/chunkeey/rtl8192su
 
-
-
-
+Please send any patches to Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
+Larry Finger <Larry.Finger@lwfinger.net>,
+Florian Schilhabel <florian.c.schilhabel@googlemail.com> and
+Linux Driver Project Developer List <driverdev-devel@linuxdriverproject.org>.

diff --git a/drivers/staging/rtl8723au/TODO b/drivers/staging/rtl8723au/TODO
index f5d57d3..42b86e4 100644
--- a/drivers/staging/rtl8723au/TODO
+++ b/drivers/staging/rtl8723au/TODO

@@ -9,5 +9,8 @@
 - merge Realtek's bugfixes and new features into the driver
 - switch to use MAC80211
 
+A mac80211 driver for this hardware already was integrated at
+drivers/net/wireless/realtek/rtl8xxxu/
+
 Please send any patches to Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
 Jes Sorensen <Jes.Sorensen@redhat.com>, and Larry Finger <Larry.Finger@lwfinger.net>.

diff --git a/drivers/staging/ste_rmi4/Kconfig b/drivers/staging/ste_rmi4/Kconfig
deleted file mode 100644
index e867950..0000000
--- a/drivers/staging/ste_rmi4/Kconfig
+++ /dev/null

@@ -1,9 +0,0 @@
-config TOUCHSCREEN_SYNAPTICS_I2C_RMI4
-	tristate "Synaptics i2c rmi4 touchscreen"
-	depends on I2C && INPUT
-	help
-	  Say Y here if you have a Synaptics RMI4 and
-	  want to enable support for the built-in touchscreen.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called synaptics_rmi4_ts.

diff --git a/drivers/staging/ste_rmi4/Makefile b/drivers/staging/ste_rmi4/Makefile
deleted file mode 100644
index 6cce2ed..0000000
--- a/drivers/staging/ste_rmi4/Makefile
+++ /dev/null

@@ -1,4 +0,0 @@
-#
-# Makefile for the RMI4 touchscreen driver.
-#
-obj-$(CONFIG_TOUCHSCREEN_SYNAPTICS_I2C_RMI4) += synaptics_i2c_rmi4.o

diff --git a/drivers/staging/ste_rmi4/TODO b/drivers/staging/ste_rmi4/TODO
deleted file mode 100644
index 9be2437..0000000
--- a/drivers/staging/ste_rmi4/TODO
+++ /dev/null

@@ -1,7 +0,0 @@
-TODO
-----
-
-Wait for the official upstream synaptics rmi4 clearpad drivers as promised over the past few months
-Merge any device support needed from this driver into it
-Delete this driver
-

diff --git a/drivers/staging/ste_rmi4/synaptics_i2c_rmi4.c b/drivers/staging/ste_rmi4/synaptics_i2c_rmi4.c
deleted file mode 100644
index 774958a..0000000
--- a/drivers/staging/ste_rmi4/synaptics_i2c_rmi4.c
+++ /dev/null

@@ -1,1137 +0,0 @@
-/**
- *
- * Synaptics Register Mapped Interface (RMI4) I2C Physical Layer Driver.
- * Copyright (c) 2007-2010, Synaptics Incorporated
- *
- * Author: Js HA <js.ha@stericsson.com> for ST-Ericsson
- * Author: Naveen Kumar G <naveen.gaddipati@stericsson.com> for ST-Ericsson
- * Copyright 2010 (c) ST-Ericsson AB
- */
-/*
- * This file is licensed under the GPL2 license.
- *
- *#############################################################################
- * GPL
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
- *#############################################################################
- */
-
-#include <linux/input.h>
-#include <linux/slab.h>
-#include <linux/i2c.h>
-#include <linux/interrupt.h>
-#include <linux/regulator/consumer.h>
-#include <linux/module.h>
-#include <linux/input/mt.h>
-#include "synaptics_i2c_rmi4.h"
-
-/* TODO: for multiple device support will need a per-device mutex */
-#define DRIVER_NAME "synaptics_rmi4_i2c"
-
-#define MAX_ERROR_REPORT	6
-#define MAX_TOUCH_MAJOR		15
-#define MAX_RETRY_COUNT		5
-#define STD_QUERY_LEN		21
-#define PAGE_LEN		2
-#define DATA_BUF_LEN		32
-#define BUF_LEN			37
-#define QUERY_LEN		9
-#define DATA_LEN		12
-#define HAS_TAP			0x01
-#define HAS_PALMDETECT		0x01
-#define HAS_ROTATE		0x02
-#define HAS_TAPANDHOLD		0x02
-#define HAS_DOUBLETAP		0x04
-#define HAS_EARLYTAP		0x08
-#define HAS_RELEASE		0x08
-#define HAS_FLICK		0x10
-#define HAS_PRESS		0x20
-#define HAS_PINCH		0x40
-
-#define MASK_16BIT		0xFFFF
-#define MASK_8BIT		0xFF
-#define MASK_7BIT		0x7F
-#define MASK_5BIT		0x1F
-#define MASK_4BIT		0x0F
-#define MASK_3BIT		0x07
-#define MASK_2BIT		0x03
-#define TOUCHPAD_CTRL_INTR	0x8
-#define PDT_START_SCAN_LOCATION (0x00E9)
-#define PDT_END_SCAN_LOCATION	(0x000A)
-#define PDT_ENTRY_SIZE		(0x0006)
-#define SYNAPTICS_RMI4_TOUCHPAD_FUNC_NUM	(0x11)
-#define SYNAPTICS_RMI4_DEVICE_CONTROL_FUNC_NUM	(0x01)
-
-/**
- * struct synaptics_rmi4_fn_desc - contains the function descriptor information
- * @query_base_addr: base address for query
- * @cmd_base_addr: base address for command
- * @ctrl_base_addr: base address for control
- * @data_base_addr: base address for data
- * @intr_src_count: count for the interrupt source
- * @fn_number: function number
- *
- * This structure is used to gives the function descriptor information
- * of the particular functionality.
- */
-struct synaptics_rmi4_fn_desc {
-	unsigned char	query_base_addr;
-	unsigned char	cmd_base_addr;
-	unsigned char	ctrl_base_addr;
-	unsigned char	data_base_addr;
-	unsigned char	intr_src_count;
-	unsigned char	fn_number;
-};
-
-/**
- * struct synaptics_rmi4_fn - contains the function information
- * @fn_number: function number
- * @num_of_data_sources: number of data sources
- * @num_of_data_points: number of fingers touched
- * @size_of_data_register_block: data register block size
- * @index_to_intr_reg: index for interrupt register
- * @intr_mask: interrupt mask value
- * @fn_desc: variable for function descriptor structure
- * @link: linked list for function descriptors
- *
- * This structure gives information about the number of data sources and
- * the number of data registers associated with the function.
- */
-struct synaptics_rmi4_fn {
-	unsigned char		fn_number;
-	unsigned char		num_of_data_sources;
-	unsigned char		num_of_data_points;
-	unsigned char		size_of_data_register_block;
-	unsigned char		index_to_intr_reg;
-	unsigned char		intr_mask;
-	struct synaptics_rmi4_fn_desc	fn_desc;
-	struct list_head	link;
-};
-
-/**
- * struct synaptics_rmi4_device_info - contains the rmi4 device information
- * @version_major: protocol major version number
- * @version_minor: protocol minor version number
- * @manufacturer_id: manufacturer identification byte
- * @product_props: product properties information
- * @product_info: product info array
- * @date_code: device manufacture date
- * @tester_id: tester id array
- * @serial_number: serial number for that device
- * @product_id_string: product id for the device
- * @support_fn_list: linked list for device information
- *
- * This structure gives information about the number of data sources and
- * the number of data registers associated with the function.
- */
-struct synaptics_rmi4_device_info {
-	unsigned int		version_major;
-	unsigned int		version_minor;
-	unsigned char		manufacturer_id;
-	unsigned char		product_props;
-	unsigned char		product_info[2];
-	unsigned char		date_code[3];
-	unsigned short		tester_id;
-	unsigned short		serial_number;
-	unsigned char		product_id_string[11];
-	struct list_head	support_fn_list;
-};
-
-/**
- * struct synaptics_rmi4_data - contains the rmi4 device data
- * @rmi4_mod_info: structure variable for rmi4 device info
- * @input_dev: pointer for input device
- * @i2c_client: pointer for i2c client
- * @board: constant pointer for touch platform data
- * @fn_list_mutex: mutex for function list
- * @rmi4_page_mutex: mutex for rmi4 page
- * @current_page: variable for integer
- * @number_of_interrupt_register: interrupt registers count
- * @fn01_ctrl_base_addr: control base address for fn01
- * @fn01_query_base_addr: query base address for fn01
- * @fn01_data_base_addr: data base address for fn01
- * @sensor_max_x: sensor maximum x value
- * @sensor_max_y: sensor maximum y value
- * @regulator: pointer to the regulator structure
- * @wait: wait queue structure variable
- * @touch_stopped: flag to stop the thread function
- * @fingers_supported: maximum supported fingers
- *
- * This structure gives the device data information.
- */
-struct synaptics_rmi4_data {
-	struct synaptics_rmi4_device_info rmi4_mod_info;
-	struct input_dev	*input_dev;
-	struct i2c_client	*i2c_client;
-	const struct synaptics_rmi4_platform_data *board;
-	struct mutex		fn_list_mutex;
-	struct mutex		rmi4_page_mutex;
-	int			current_page;
-	unsigned int		number_of_interrupt_register;
-	unsigned short		fn01_ctrl_base_addr;
-	unsigned short		fn01_query_base_addr;
-	unsigned short		fn01_data_base_addr;
-	int			sensor_max_x;
-	int			sensor_max_y;
-	struct regulator	*regulator;
-	wait_queue_head_t	wait;
-	bool			touch_stopped;
-	unsigned char		fingers_supported;
-};
-
-/**
- * synaptics_rmi4_set_page() - sets the page
- * @pdata: pointer to synaptics_rmi4_data structure
- * @address: set the address of the page
- *
- * This function is used to set the page and returns integer.
- */
-static int synaptics_rmi4_set_page(struct synaptics_rmi4_data *pdata,
-					unsigned int address)
-{
-	unsigned char	txbuf[PAGE_LEN];
-	int		retval;
-	unsigned int	page;
-	struct i2c_client *i2c = pdata->i2c_client;
-
-	page	= ((address >> 8) & MASK_8BIT);
-	if (page != pdata->current_page) {
-		txbuf[0]	= MASK_8BIT;
-		txbuf[1]	= page;
-		retval	= i2c_master_send(i2c, txbuf, PAGE_LEN);
-		if (retval != PAGE_LEN)
-			dev_err(&i2c->dev, "failed:%d\n", retval);
-		else
-			pdata->current_page = page;
-	} else
-		retval = PAGE_LEN;
-	return retval;
-}
-/**
- * synaptics_rmi4_i2c_block_read() - read the block of data
- * @pdata: pointer to synaptics_rmi4_data structure
- * @address: read the block of data from this offset
- * @valp: pointer to a buffer containing the data to be read
- * @size: number of bytes to read
- *
- * This function is to read the block of data and returns integer.
- */
-static int synaptics_rmi4_i2c_block_read(struct synaptics_rmi4_data *pdata,
-						unsigned short address,
-						unsigned char *valp, int size)
-{
-	int retval = 0;
-	int retry_count = 0;
-	int index;
-	struct i2c_client *i2c = pdata->i2c_client;
-
-	mutex_lock(&(pdata->rmi4_page_mutex));
-	retval = synaptics_rmi4_set_page(pdata, address);
-	if (retval != PAGE_LEN)
-		goto exit;
-	index = address & MASK_8BIT;
-retry:
-	retval = i2c_smbus_read_i2c_block_data(i2c, index, size, valp);
-	if (retval != size) {
-		if (++retry_count == MAX_RETRY_COUNT)
-			dev_err(&i2c->dev,
-				"%s:address 0x%04x size %d failed:%d\n",
-					__func__, address, size, retval);
-		else {
-			synaptics_rmi4_set_page(pdata, address);
-			goto retry;
-		}
-	}
-exit:
-	mutex_unlock(&(pdata->rmi4_page_mutex));
-	return retval;
-}
-
-/**
- * synaptics_rmi4_i2c_byte_write() - write the single byte data
- * @pdata: pointer to synaptics_rmi4_data structure
- * @address: write the block of data from this offset
- * @data: data to be write
- *
- * This function is to write the single byte data and returns integer.
- */
-static int synaptics_rmi4_i2c_byte_write(struct synaptics_rmi4_data *pdata,
-						unsigned short address,
-						unsigned char data)
-{
-	unsigned char txbuf[2];
-	int retval = 0;
-	struct i2c_client *i2c = pdata->i2c_client;
-
-	/* Can't have anyone else changing the page behind our backs */
-	mutex_lock(&(pdata->rmi4_page_mutex));
-
-	retval = synaptics_rmi4_set_page(pdata, address);
-	if (retval != PAGE_LEN)
-		goto exit;
-	txbuf[0]	= address & MASK_8BIT;
-	txbuf[1]	= data;
-	retval		= i2c_master_send(pdata->i2c_client, txbuf, 2);
-	/* Add in retry on writes only in certain error return values */
-	if (retval != 2) {
-		dev_err(&i2c->dev, "failed:%d\n", retval);
-		retval = -EIO;
-	} else
-		retval = 1;
-exit:
-	mutex_unlock(&(pdata->rmi4_page_mutex));
-	return retval;
-}
-
-/**
- * synpatics_rmi4_touchpad_report() - reports for the rmi4 touchpad device
- * @pdata: pointer to synaptics_rmi4_data structure
- * @rfi: pointer to synaptics_rmi4_fn structure
- *
- * This function calls to reports for the rmi4 touchpad device
- */
-static int synpatics_rmi4_touchpad_report(struct synaptics_rmi4_data *pdata,
-						struct synaptics_rmi4_fn *rfi)
-{
-	/* number of touch points - fingers down in this case */
-	int	touch_count = 0;
-	int	finger;
-	int	finger_registers;
-	int	reg;
-	int	finger_shift;
-	int	finger_status;
-	int	retval;
-	int	x, y;
-	int	wx, wy;
-	unsigned short	data_base_addr;
-	unsigned short	data_offset;
-	unsigned char	data_reg_blk_size;
-	unsigned char	values[2];
-	unsigned char	data[DATA_LEN];
-	unsigned char	fingers_supported = pdata->fingers_supported;
-	struct	i2c_client *client = pdata->i2c_client;
-	struct	input_dev *input_dev = pdata->input_dev;
-
-	/* get 2D sensor finger data */
-	/*
-	 * First get the finger status field - the size of the finger status
-	 * field is determined by the number of finger supporte - 2 bits per
-	 * finger, so the number of registers to read is:
-	 * registerCount = ceil(numberOfFingers/4).
-	 * Read the required number of registers and check each 2 bit field to
-	 * determine if a finger is down:
-	 *	00 = finger not present,
-	 *	01 = finger present and data accurate,
-	 *	10 = finger present but data may not be accurate,
-	 *	11 = reserved for product use.
-	 */
-	finger_registers	= (fingers_supported + 3) / 4;
-	data_base_addr		= rfi->fn_desc.data_base_addr;
-	retval = synaptics_rmi4_i2c_block_read(pdata, data_base_addr, values,
-							finger_registers);
-	if (retval != finger_registers) {
-		dev_err(&client->dev, "%s:read status registers failed\n",
-								__func__);
-		return 0;
-	}
-	/*
-	 * For each finger present, read the proper number of registers
-	 * to get absolute data.
-	 */
-	data_reg_blk_size = rfi->size_of_data_register_block;
-	for (finger = 0; finger < fingers_supported; finger++) {
-		/* determine which data byte the finger status is in */
-		reg = finger / 4;
-		/* bit shift to get finger's status */
-		finger_shift	= (finger % 4) * 2;
-		finger_status	= (values[reg] >> finger_shift) & 3;
-		/*
-		 * if finger status indicates a finger is present then
-		 * read the finger data and report it
-		 */
-		input_mt_slot(input_dev, finger);
-		input_mt_report_slot_state(input_dev, MT_TOOL_FINGER,
-							finger_status != 0);
-
-		if (finger_status) {
-			/* Read the finger data */
-			data_offset = data_base_addr +
-					((finger * data_reg_blk_size) +
-					finger_registers);
-			retval = synaptics_rmi4_i2c_block_read(pdata,
-						data_offset, data,
-						data_reg_blk_size);
-			if (retval != data_reg_blk_size) {
-				dev_err(&client->dev, "%s:read data failed\n",
-								__func__);
-				return 0;
-			}
-			x = (data[0] << 4) | (data[2] & MASK_4BIT);
-			y = (data[1] << 4) | ((data[2] >> 4) & MASK_4BIT);
-			wy = (data[3] >> 4) & MASK_4BIT;
-			wx = (data[3] & MASK_4BIT);
-
-			if (pdata->board->x_flip)
-				x = pdata->sensor_max_x - x;
-			if (pdata->board->y_flip)
-				y = pdata->sensor_max_y - y;
-
-			input_report_abs(input_dev, ABS_MT_TOUCH_MAJOR,
-								max(wx, wy));
-			input_report_abs(input_dev, ABS_MT_POSITION_X, x);
-			input_report_abs(input_dev, ABS_MT_POSITION_Y, y);
-
-			/* number of active touch points */
-			touch_count++;
-		}
-	}
-
-	/* sync after groups of events */
-	input_mt_sync_frame(input_dev);
-	input_sync(input_dev);
-	/* return the number of touch points */
-	return touch_count;
-}
-
-/**
- * synaptics_rmi4_report_device() - reports the rmi4 device
- * @pdata: pointer to synaptics_rmi4_data structure
- * @rfi: pointer to synaptics_rmi4_fn
- *
- * This function is used to call the report function of the rmi4 device.
- */
-static int synaptics_rmi4_report_device(struct synaptics_rmi4_data *pdata,
-					struct synaptics_rmi4_fn *rfi)
-{
-	int touch = 0;
-	struct	i2c_client *client = pdata->i2c_client;
-	static int num_error_reports;
-
-	if (rfi->fn_number != SYNAPTICS_RMI4_TOUCHPAD_FUNC_NUM) {
-		num_error_reports++;
-		if (num_error_reports < MAX_ERROR_REPORT)
-			dev_err(&client->dev, "%s:report not supported\n",
-								__func__);
-	} else
-		touch = synpatics_rmi4_touchpad_report(pdata, rfi);
-	return touch;
-}
-/**
- * synaptics_rmi4_sensor_report() - reports to input subsystem
- * @pdata: pointer to synaptics_rmi4_data structure
- *
- * This function is used to reads in all data sources and reports
- * them to the input subsystem.
- */
-static int synaptics_rmi4_sensor_report(struct synaptics_rmi4_data *pdata)
-{
-	unsigned char	intr_status[4];
-	/* number of touch points - fingers or buttons */
-	int touch = 0;
-	unsigned int retval;
-	struct synaptics_rmi4_fn		*rfi;
-	struct synaptics_rmi4_device_info	*rmi;
-	struct	i2c_client *client = pdata->i2c_client;
-
-	/*
-	 * Get the interrupt status from the function $01
-	 * control register+1 to find which source(s) were interrupting
-	 * so we can read the data from the source(s) (2D sensor, buttons..)
-	 */
-	retval = synaptics_rmi4_i2c_block_read(pdata,
-					pdata->fn01_data_base_addr + 1,
-					intr_status,
-					pdata->number_of_interrupt_register);
-	if (retval != pdata->number_of_interrupt_register) {
-		dev_err(&client->dev,
-				"could not read interrupt status registers\n");
-		return 0;
-	}
-	/*
-	 * check each function that has data sources and if the interrupt for
-	 * that triggered then call that RMI4 functions report() function to
-	 * gather data and report it to the input subsystem
-	 */
-	rmi = &(pdata->rmi4_mod_info);
-	list_for_each_entry(rfi, &rmi->support_fn_list, link) {
-		if (rfi->num_of_data_sources) {
-			if (intr_status[rfi->index_to_intr_reg] &
-							rfi->intr_mask)
-				touch = synaptics_rmi4_report_device(pdata,
-									rfi);
-		}
-	}
-	/* return the number of touch points */
-	return touch;
-}
-
-/**
- * synaptics_rmi4_irq() - thread function for rmi4 attention line
- * @irq: irq value
- * @data: void pointer
- *
- * This function is interrupt thread function. It just notifies the
- * application layer that attention is required.
- */
-static irqreturn_t synaptics_rmi4_irq(int irq, void *data)
-{
-	struct synaptics_rmi4_data *pdata = data;
-	int touch_count;
-
-	do {
-		touch_count = synaptics_rmi4_sensor_report(pdata);
-		if (touch_count)
-			wait_event_timeout(pdata->wait, pdata->touch_stopped,
-							msecs_to_jiffies(1));
-		else
-			break;
-	} while (!pdata->touch_stopped);
-	return IRQ_HANDLED;
-}
-
-/**
- * synpatics_rmi4_touchpad_detect() - detects the rmi4 touchpad device
- * @pdata: pointer to synaptics_rmi4_data structure
- * @rfi: pointer to synaptics_rmi4_fn structure
- * @fd: pointer to synaptics_rmi4_fn_desc structure
- * @interruptcount: count the number of interrupts
- *
- * This function calls to detects the rmi4 touchpad device
- */
-static int synpatics_rmi4_touchpad_detect(struct synaptics_rmi4_data *pdata,
-					struct synaptics_rmi4_fn *rfi,
-					struct synaptics_rmi4_fn_desc *fd,
-					unsigned int interruptcount)
-{
-	unsigned char	queries[QUERY_LEN];
-	unsigned short	intr_offset;
-	unsigned char	abs_data_size;
-	unsigned char	abs_data_blk_size;
-	unsigned char	egr_0, egr_1;
-	unsigned int	all_data_blk_size;
-	int	has_pinch, has_flick, has_tap;
-	int	has_tapandhold, has_doubletap;
-	int	has_earlytap, has_press;
-	int	has_palmdetect, has_rotate;
-	int	has_rel;
-	int	i;
-	int	retval;
-	struct	i2c_client *client = pdata->i2c_client;
-
-	rfi->fn_desc.query_base_addr	= fd->query_base_addr;
-	rfi->fn_desc.data_base_addr	= fd->data_base_addr;
-	rfi->fn_desc.intr_src_count	= fd->intr_src_count;
-	rfi->fn_desc.fn_number		= fd->fn_number;
-	rfi->fn_number			= fd->fn_number;
-	rfi->num_of_data_sources	= fd->intr_src_count;
-	rfi->fn_desc.ctrl_base_addr	= fd->ctrl_base_addr;
-	rfi->fn_desc.cmd_base_addr	= fd->cmd_base_addr;
-
-	/*
-	 * need to get number of fingers supported, data size, etc.
-	 * to be used when getting data since the number of registers to
-	 * read depends on the number of fingers supported and data size.
-	 */
-	retval = synaptics_rmi4_i2c_block_read(pdata, fd->query_base_addr,
-							queries,
-							sizeof(queries));
-	if (retval != sizeof(queries)) {
-		dev_err(&client->dev, "%s:read function query registers\n",
-							__func__);
-		return retval;
-	}
-	/*
-	 * 2D data sources have only 3 bits for the number of fingers
-	 * supported - so the encoding is a bit weird.
-	 */
-	if ((queries[1] & MASK_3BIT) <= 4)
-		/* add 1 since zero based */
-		rfi->num_of_data_points = (queries[1] & MASK_3BIT) + 1;
-	else {
-		/*
-		 * a value of 5 is up to 10 fingers - 6 and 7 are reserved
-		 * (shouldn't get these i int retval;n a normal 2D source).
-		 */
-		if ((queries[1] & MASK_3BIT) == 5)
-			rfi->num_of_data_points = 10;
-	}
-	pdata->fingers_supported = rfi->num_of_data_points;
-	/* Need to get interrupt info for handling interrupts */
-	rfi->index_to_intr_reg = (interruptcount + 7) / 8;
-	if (rfi->index_to_intr_reg != 0)
-		rfi->index_to_intr_reg -= 1;
-	/*
-	 * loop through interrupts for each source in fn $11
-	 * and or in a bit to the interrupt mask for each.
-	 */
-	intr_offset = interruptcount % 8;
-	rfi->intr_mask = 0;
-	for (i = intr_offset;
-		i < ((fd->intr_src_count & MASK_3BIT) + intr_offset); i++)
-		rfi->intr_mask |= 1 << i;
-
-	/* Size of just the absolute data for one finger */
-	abs_data_size	= queries[5] & MASK_2BIT;
-	/* One each for X and Y, one for LSB for X & Y, one for W, one for Z */
-	abs_data_blk_size = 3 + (2 * (abs_data_size == 0 ? 1 : 0));
-	rfi->size_of_data_register_block = abs_data_blk_size;
-
-	/*
-	 * need to determine the size of data to read - this depends on
-	 * conditions such as whether Relative data is reported and if Gesture
-	 * data is reported.
-	 */
-	egr_0 = queries[7];
-	egr_1 = queries[8];
-
-	/*
-	 * Get info about what EGR data is supported, whether it has
-	 * Relative data supported, etc.
-	 */
-	has_pinch	= egr_0 & HAS_PINCH;
-	has_flick	= egr_0 & HAS_FLICK;
-	has_tap		= egr_0 & HAS_TAP;
-	has_earlytap	= egr_0 & HAS_EARLYTAP;
-	has_press	= egr_0 & HAS_PRESS;
-	has_rotate	= egr_1 & HAS_ROTATE;
-	has_rel		= queries[1] & HAS_RELEASE;
-	has_tapandhold	= egr_0 & HAS_TAPANDHOLD;
-	has_doubletap	= egr_0 & HAS_DOUBLETAP;
-	has_palmdetect	= egr_1 & HAS_PALMDETECT;
-
-	/*
-	 * Size of all data including finger status, absolute data for each
-	 * finger, relative data and EGR data
-	 */
-	all_data_blk_size =
-		/* finger status, four fingers per register */
-		((rfi->num_of_data_points + 3) / 4) +
-		/* absolute data, per finger times number of fingers */
-		(abs_data_blk_size * rfi->num_of_data_points) +
-		/*
-		 * two relative registers (if relative is being reported)
-		 */
-		2 * has_rel +
-		/*
-		 * F11_2D_data8 is only present if the egr_0
-		 * register is non-zero.
-		 */
-		!!(egr_0) +
-		/*
-		 * F11_2D_data9 is only present if either egr_0 or
-		 * egr_1 registers are non-zero.
-		 */
-		(egr_0 || egr_1) +
-		/*
-		 * F11_2D_data10 is only present if EGR_PINCH or EGR_FLICK of
-		 * egr_0 reports as 1.
-		 */
-		!!(has_pinch | has_flick) +
-		/*
-		 * F11_2D_data11 and F11_2D_data12 are only present if
-		 * EGR_FLICK of egr_0 reports as 1.
-		 */
-		2 * !!(has_flick);
-	return retval;
-}
-
-/**
- * synaptics_rmi4_touchpad_config() - configures the rmi4 touchpad device
- * @pdata: pointer to synaptics_rmi4_data structure
- * @rfi: pointer to synaptics_rmi4_fn structure
- *
- * This function calls to configures the rmi4 touchpad device
- */
-static int synaptics_rmi4_touchpad_config(struct synaptics_rmi4_data *pdata,
-						struct synaptics_rmi4_fn *rfi)
-{
-	/*
-	 * For the data source - print info and do any
-	 * source specific configuration.
-	 */
-	unsigned char data[BUF_LEN];
-	int retval = 0;
-	struct	i2c_client *client = pdata->i2c_client;
-
-	/* Get and print some info about the data source... */
-	/* To Query 2D devices we need to read from the address obtained
-	 * from the function descriptor stored in the RMI function info.
-	 */
-	retval = synaptics_rmi4_i2c_block_read(pdata,
-						rfi->fn_desc.query_base_addr,
-						data, QUERY_LEN);
-	if (retval != QUERY_LEN)
-		dev_err(&client->dev, "%s:read query registers failed\n",
-								__func__);
-	else {
-		retval = synaptics_rmi4_i2c_block_read(pdata,
-						rfi->fn_desc.ctrl_base_addr,
-						data, DATA_BUF_LEN);
-		if (retval != DATA_BUF_LEN) {
-			dev_err(&client->dev,
-				"%s:read control registers failed\n",
-								__func__);
-			return retval;
-		}
-		/* Store these for use later*/
-		pdata->sensor_max_x = ((data[6] & MASK_8BIT) << 0) |
-						((data[7] & MASK_4BIT) << 8);
-		pdata->sensor_max_y = ((data[8] & MASK_5BIT) << 0) |
-						((data[9] & MASK_4BIT) << 8);
-	}
-	return retval;
-}
-
-/**
- * synaptics_rmi4_i2c_query_device() - query the rmi4 device
- * @pdata: pointer to synaptics_rmi4_data structure
- *
- * This function is used to query the rmi4 device.
- */
-static int synaptics_rmi4_i2c_query_device(struct synaptics_rmi4_data *pdata)
-{
-	int i;
-	int retval;
-	unsigned char std_queries[STD_QUERY_LEN];
-	unsigned char intr_count = 0;
-	int data_sources = 0;
-	unsigned int ctrl_offset;
-	struct synaptics_rmi4_fn *rfi;
-	struct synaptics_rmi4_fn_desc	rmi_fd;
-	struct synaptics_rmi4_device_info *rmi;
-	struct	i2c_client *client = pdata->i2c_client;
-
-	/*
-	 * init the physical drivers RMI module
-	 * info list of functions
-	 */
-	INIT_LIST_HEAD(&pdata->rmi4_mod_info.support_fn_list);
-
-	/*
-	 * Read the Page Descriptor Table to determine what functions
-	 * are present
-	 */
-	for (i = PDT_START_SCAN_LOCATION; i > PDT_END_SCAN_LOCATION;
-						i -= PDT_ENTRY_SIZE) {
-		retval = synaptics_rmi4_i2c_block_read(pdata, i,
-						(unsigned char *)&rmi_fd,
-						sizeof(rmi_fd));
-		if (retval != sizeof(rmi_fd)) {
-			/* failed to read next PDT entry */
-			dev_err(&client->dev, "%s: read error\n", __func__);
-			return -EIO;
-		}
-		rfi = NULL;
-		if (rmi_fd.fn_number) {
-			switch (rmi_fd.fn_number & MASK_8BIT) {
-			case SYNAPTICS_RMI4_DEVICE_CONTROL_FUNC_NUM:
-				pdata->fn01_query_base_addr =
-						rmi_fd.query_base_addr;
-				pdata->fn01_ctrl_base_addr =
-						rmi_fd.ctrl_base_addr;
-				pdata->fn01_data_base_addr =
-						rmi_fd.data_base_addr;
-				break;
-			case SYNAPTICS_RMI4_TOUCHPAD_FUNC_NUM:
-				if (rmi_fd.intr_src_count) {
-					rfi = kmalloc(sizeof(*rfi),
-						      GFP_KERNEL);
-					if (!rfi)
-						return -ENOMEM;
-					retval = synpatics_rmi4_touchpad_detect
-								(pdata,	rfi,
-								&rmi_fd,
-								intr_count);
-					if (retval < 0) {
-						kfree(rfi);
-						return retval;
-					}
-				}
-				break;
-			}
-			/* interrupt count for next iteration */
-			intr_count += (rmi_fd.intr_src_count & MASK_3BIT);
-			/*
-			 * We only want to add functions to the list
-			 * that have data associated with them.
-			 */
-			if (rfi && rmi_fd.intr_src_count) {
-				/* link this function info to the RMI module */
-				mutex_lock(&(pdata->fn_list_mutex));
-				list_add_tail(&rfi->link,
-					&pdata->rmi4_mod_info.support_fn_list);
-				mutex_unlock(&(pdata->fn_list_mutex));
-			}
-		} else {
-			/*
-			 * A zero in the function number
-			 * signals the end of the PDT
-			 */
-			dev_dbg(&client->dev,
-				"%s:end of PDT\n", __func__);
-			break;
-		}
-	}
-	/*
-	 * calculate the interrupt register count - used in the
-	 * ISR to read the correct number of interrupt registers
-	 */
-	pdata->number_of_interrupt_register = (intr_count + 7) / 8;
-	/*
-	 * Function $01 will be used to query the product properties,
-	 * and product ID  so we had to read the PDT above first to get
-	 * the Fn $01 query address and prior to filling in the product
-	 * info. NOTE: Even an unflashed device will still have FN $01.
-	 */
-
-	/* Load up the standard queries and get the RMI4 module info */
-	retval = synaptics_rmi4_i2c_block_read(pdata,
-					pdata->fn01_query_base_addr,
-					std_queries,
-					sizeof(std_queries));
-	if (retval != sizeof(std_queries)) {
-		dev_err(&client->dev, "%s:Failed reading queries\n",
-							__func__);
-		 return -EIO;
-	}
-
-	/* Currently supported RMI version is 4.0 */
-	pdata->rmi4_mod_info.version_major	= 4;
-	pdata->rmi4_mod_info.version_minor	= 0;
-	/*
-	 * get manufacturer id, product_props, product info,
-	 * date code, tester id, serial num and product id (name)
-	 */
-	pdata->rmi4_mod_info.manufacturer_id	= std_queries[0];
-	pdata->rmi4_mod_info.product_props	= std_queries[1];
-	pdata->rmi4_mod_info.product_info[0]	= std_queries[2];
-	pdata->rmi4_mod_info.product_info[1]	= std_queries[3];
-	/* year - 2001-2032 */
-	pdata->rmi4_mod_info.date_code[0]	= std_queries[4] & MASK_5BIT;
-	/* month - 1-12 */
-	pdata->rmi4_mod_info.date_code[1]	= std_queries[5] & MASK_4BIT;
-	/* day - 1-31 */
-	pdata->rmi4_mod_info.date_code[2]	= std_queries[6] & MASK_5BIT;
-	pdata->rmi4_mod_info.tester_id = ((std_queries[7] & MASK_7BIT) << 8) |
-						(std_queries[8] & MASK_7BIT);
-	pdata->rmi4_mod_info.serial_number =
-		((std_queries[9] & MASK_7BIT) << 8) |
-				(std_queries[10] & MASK_7BIT);
-	memcpy(pdata->rmi4_mod_info.product_id_string, &std_queries[11], 10);
-
-	/* Check if this is a Synaptics device - report if not. */
-	if (pdata->rmi4_mod_info.manufacturer_id != 1)
-		dev_err(&client->dev, "non-Synaptics mfg id:%d\n",
-			pdata->rmi4_mod_info.manufacturer_id);
-
-	list_for_each_entry(rfi, &pdata->rmi4_mod_info.support_fn_list, link)
-		data_sources += rfi->num_of_data_sources;
-	if (data_sources) {
-		rmi = &(pdata->rmi4_mod_info);
-		list_for_each_entry(rfi, &rmi->support_fn_list, link) {
-			if (rfi->num_of_data_sources) {
-				if (rfi->fn_number ==
-					SYNAPTICS_RMI4_TOUCHPAD_FUNC_NUM) {
-					retval = synaptics_rmi4_touchpad_config
-								(pdata, rfi);
-					if (retval < 0)
-						return retval;
-				} else
-					dev_err(&client->dev,
-						"%s:fn_number not supported\n",
-								__func__);
-				/*
-				 * Turn on interrupts for this
-				 * function's data sources.
-				 */
-				ctrl_offset = pdata->fn01_ctrl_base_addr + 1 +
-							rfi->index_to_intr_reg;
-				retval = synaptics_rmi4_i2c_byte_write(pdata,
-							ctrl_offset,
-							rfi->intr_mask);
-				if (retval < 0)
-					return retval;
-			}
-		}
-	}
-	return 0;
-}
-
-/*
- * Descriptor structure.
- * Describes the number of i2c devices on the bus that speak RMI.
- */
-static const struct synaptics_rmi4_platform_data synaptics_rmi4_platformdata = {
-	.irq_type       = (IRQF_TRIGGER_FALLING | IRQF_SHARED),
-	.x_flip		= false,
-	.y_flip		= true,
-};
-
-/**
- * synaptics_rmi4_probe() - Initialze the i2c-client touchscreen driver
- * @i2c: i2c client structure pointer
- * @id:i2c device id pointer
- *
- * This function will allocate and initialize the instance
- * data and request the irq and set the instance data as the clients
- * platform data then register the physical driver which will do a scan of
- * the rmi4 Physical Device Table and enumerate any rmi4 functions that
- * have data sources associated with them.
- */
-static int synaptics_rmi4_probe
-	(struct i2c_client *client, const struct i2c_device_id *dev_id)
-{
-	int retval;
-	unsigned char intr_status[4];
-	struct synaptics_rmi4_data *rmi4_data;
-	const struct synaptics_rmi4_platform_data *platformdata =
-						client->dev.platform_data;
-
-	if (!i2c_check_functionality(client->adapter,
-					I2C_FUNC_SMBUS_BYTE_DATA)) {
-		dev_err(&client->dev, "i2c smbus byte data not supported\n");
-		return -EIO;
-	}
-
-	if (!platformdata)
-		platformdata = &synaptics_rmi4_platformdata;
-
-	/* Allocate and initialize the instance data for this client */
-	rmi4_data = kcalloc(2, sizeof(struct synaptics_rmi4_data),
-			    GFP_KERNEL);
-	if (!rmi4_data)
-		return -ENOMEM;
-
-	rmi4_data->input_dev = input_allocate_device();
-	if (!rmi4_data->input_dev) {
-		retval = -ENOMEM;
-		goto err_input;
-	}
-
-	rmi4_data->regulator = regulator_get(&client->dev, "vdd");
-	if (IS_ERR(rmi4_data->regulator)) {
-		dev_err(&client->dev, "%s:get regulator failed\n",
-							__func__);
-		retval = PTR_ERR(rmi4_data->regulator);
-		goto err_get_regulator;
-	}
-	retval = regulator_enable(rmi4_data->regulator);
-	if (retval < 0) {
-		dev_err(&client->dev, "%s:regulator enable failed\n",
-							__func__);
-		goto err_regulator_enable;
-	}
-	init_waitqueue_head(&rmi4_data->wait);
-	/*
-	 * Copy i2c_client pointer into RTID's i2c_client pointer for
-	 * later use in rmi4_read, rmi4_write, etc.
-	 */
-	rmi4_data->i2c_client		= client;
-	/* So we set the page correctly the first time */
-	rmi4_data->current_page		= MASK_16BIT;
-	rmi4_data->board		= platformdata;
-	rmi4_data->touch_stopped	= false;
-
-	/* init the mutexes for maintain the lists */
-	mutex_init(&(rmi4_data->fn_list_mutex));
-	mutex_init(&(rmi4_data->rmi4_page_mutex));
-
-	/*
-	 * Register physical driver - this will call the detect function that
-	 * will then scan the device and determine the supported
-	 * rmi4 functions.
-	 */
-	retval = synaptics_rmi4_i2c_query_device(rmi4_data);
-	if (retval) {
-		dev_err(&client->dev, "%s: rmi4 query device failed\n",
-							__func__);
-		goto err_query_dev;
-	}
-
-	/* Store the instance data in the i2c_client */
-	i2c_set_clientdata(client, rmi4_data);
-
-	/*initialize the input device parameters */
-	rmi4_data->input_dev->name	= DRIVER_NAME;
-	rmi4_data->input_dev->phys	= "Synaptics_Clearpad";
-	rmi4_data->input_dev->id.bustype = BUS_I2C;
-	rmi4_data->input_dev->dev.parent = &client->dev;
-	input_set_drvdata(rmi4_data->input_dev, rmi4_data);
-
-	/* Initialize the function handlers for rmi4 */
-	set_bit(EV_SYN, rmi4_data->input_dev->evbit);
-	set_bit(EV_KEY, rmi4_data->input_dev->evbit);
-	set_bit(EV_ABS, rmi4_data->input_dev->evbit);
-
-	input_set_abs_params(rmi4_data->input_dev, ABS_MT_POSITION_X, 0,
-					rmi4_data->sensor_max_x, 0, 0);
-	input_set_abs_params(rmi4_data->input_dev, ABS_MT_POSITION_Y, 0,
-					rmi4_data->sensor_max_y, 0, 0);
-	input_set_abs_params(rmi4_data->input_dev, ABS_MT_TOUCH_MAJOR, 0,
-						MAX_TOUCH_MAJOR, 0, 0);
-	input_mt_init_slots(rmi4_data->input_dev,
-				rmi4_data->fingers_supported, 0);
-
-	/* Clear interrupts */
-	synaptics_rmi4_i2c_block_read(rmi4_data,
-			rmi4_data->fn01_data_base_addr + 1, intr_status,
-				rmi4_data->number_of_interrupt_register);
-	retval = request_threaded_irq(client->irq, NULL,
-					synaptics_rmi4_irq,
-					platformdata->irq_type,
-					DRIVER_NAME, rmi4_data);
-	if (retval) {
-		dev_err(&client->dev, "Unable to get attn irq %d\n",
-			client->irq);
-		goto err_query_dev;
-	}
-
-	retval = input_register_device(rmi4_data->input_dev);
-	if (retval) {
-		dev_err(&client->dev, "%s:input register failed\n", __func__);
-		goto err_free_irq;
-	}
-
-	return retval;
-
-err_free_irq:
-	free_irq(client->irq, rmi4_data);
-err_query_dev:
-	regulator_disable(rmi4_data->regulator);
-err_regulator_enable:
-	regulator_put(rmi4_data->regulator);
-err_get_regulator:
-	input_free_device(rmi4_data->input_dev);
-	rmi4_data->input_dev = NULL;
-err_input:
-	kfree(rmi4_data);
-
-	return retval;
-}
-/**
- * synaptics_rmi4_remove() - Removes the i2c-client touchscreen driver
- * @client: i2c client structure pointer
- *
- * This function uses to remove the i2c-client
- * touchscreen driver and returns integer.
- */
-static int synaptics_rmi4_remove(struct i2c_client *client)
-{
-	struct synaptics_rmi4_data *rmi4_data = i2c_get_clientdata(client);
-
-	rmi4_data->touch_stopped = true;
-	wake_up(&rmi4_data->wait);
-	free_irq(client->irq, rmi4_data);
-	input_unregister_device(rmi4_data->input_dev);
-	regulator_disable(rmi4_data->regulator);
-	regulator_put(rmi4_data->regulator);
-	kfree(rmi4_data);
-
-	return 0;
-}
-
-/**
- * synaptics_rmi4_suspend() - suspend the touch screen controller
- * @dev: pointer to device structure
- *
- * This function is used to suspend the
- * touch panel controller and returns integer
- */
-static int __maybe_unused synaptics_rmi4_suspend(struct device *dev)
-{
-	/* Touch sleep mode */
-	int retval;
-	unsigned char intr_status;
-	struct synaptics_rmi4_data *rmi4_data = dev_get_drvdata(dev);
-
-	rmi4_data->touch_stopped = true;
-	disable_irq(rmi4_data->i2c_client->irq);
-
-	retval = synaptics_rmi4_i2c_block_read(rmi4_data,
-				rmi4_data->fn01_data_base_addr + 1,
-				&intr_status,
-				rmi4_data->number_of_interrupt_register);
-	if (retval < 0)
-		return retval;
-
-	retval = synaptics_rmi4_i2c_byte_write(rmi4_data,
-					rmi4_data->fn01_ctrl_base_addr + 1,
-					(intr_status & ~TOUCHPAD_CTRL_INTR));
-	if (retval < 0)
-		return retval;
-
-	regulator_disable(rmi4_data->regulator);
-
-	return 0;
-}
-/**
- * synaptics_rmi4_resume() - resume the touch screen controller
- * @dev: pointer to device structure
- *
- * This function is used to resume the touch panel
- * controller and returns integer.
- */
-static int __maybe_unused synaptics_rmi4_resume(struct device *dev)
-{
-	int retval;
-	unsigned char intr_status;
-	struct synaptics_rmi4_data *rmi4_data = dev_get_drvdata(dev);
-
-	retval = regulator_enable(rmi4_data->regulator);
-	if (retval) {
-		dev_err(dev, "Regulator enable failed (%d)\n", retval);
-		return retval;
-	}
-
-	enable_irq(rmi4_data->i2c_client->irq);
-	rmi4_data->touch_stopped = false;
-
-	retval = synaptics_rmi4_i2c_block_read(rmi4_data,
-				rmi4_data->fn01_data_base_addr + 1,
-				&intr_status,
-				rmi4_data->number_of_interrupt_register);
-	if (retval < 0)
-		return retval;
-
-	retval = synaptics_rmi4_i2c_byte_write(rmi4_data,
-					rmi4_data->fn01_ctrl_base_addr + 1,
-					(intr_status | TOUCHPAD_CTRL_INTR));
-	if (retval < 0)
-		return retval;
-
-	return 0;
-}
-
-static SIMPLE_DEV_PM_OPS(synaptics_rmi4_dev_pm_ops, synaptics_rmi4_suspend,
-			 synaptics_rmi4_resume);
-
-static const struct i2c_device_id synaptics_rmi4_id_table[] = {
-	{ DRIVER_NAME, 0 },
-	{ },
-};
-MODULE_DEVICE_TABLE(i2c, synaptics_rmi4_id_table);
-
-static struct i2c_driver synaptics_rmi4_driver = {
-	.driver = {
-		.name	=	DRIVER_NAME,
-		.pm	=	&synaptics_rmi4_dev_pm_ops,
-	},
-	.probe		=	synaptics_rmi4_probe,
-	.remove		=	synaptics_rmi4_remove,
-	.id_table	=	synaptics_rmi4_id_table,
-};
-
-module_i2c_driver(synaptics_rmi4_driver);
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("naveen.gaddipati@stericsson.com, js.ha@stericsson.com");
-MODULE_DESCRIPTION("synaptics rmi4 i2c touch Driver");

diff --git a/drivers/staging/ste_rmi4/synaptics_i2c_rmi4.h b/drivers/staging/ste_rmi4/synaptics_i2c_rmi4.h
deleted file mode 100644
index 8c9166b..0000000
--- a/drivers/staging/ste_rmi4/synaptics_i2c_rmi4.h
+++ /dev/null

@@ -1,46 +0,0 @@
-/**
- *
- * Synaptics Register Mapped Interface (RMI4) I2C Physical Layer Driver.
- * Copyright (c) 2007-2010, Synaptics Incorporated
- *
- * Author: Js HA <js.ha@stericsson.com> for ST-Ericsson
- * Author: Naveen Kumar G <naveen.gaddipati@stericsson.com> for ST-Ericsson
- * Copyright 2010 (c) ST-Ericsson AB
- */
-/*
- * This file is licensed under the GPL2 license.
- *
- *#############################################################################
- * GPL
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
- *#############################################################################
- */
-
-#ifndef _SYNAPTICS_RMI4_H_INCLUDED_
-#define _SYNAPTICS_RMI4_H_INCLUDED_
-
-/**
- * struct synaptics_rmi4_platform_data - contains the rmi4 platform data
- * @irq_number: irq number
- * @irq_type: irq type
- * @x flip: x flip flag
- * @y flip: y flip flag
- *
- * This structure gives platform data for rmi4.
- */
-struct synaptics_rmi4_platform_data {
-	int irq_type;
-	bool x_flip;
-	bool y_flip;
-};
-
-#endif

diff --git a/drivers/staging/wilc1000/coreconfigurator.c b/drivers/staging/wilc1000/coreconfigurator.c
index 2c4ae1f..4b51c0a 100644
--- a/drivers/staging/wilc1000/coreconfigurator.c
+++ b/drivers/staging/wilc1000/coreconfigurator.c

@@ -338,8 +338,10 @@
 
 		if (ies_len > 0) {
 			network_info->ies = kmemdup(ies, ies_len, GFP_KERNEL);
-			if (!network_info->ies)
+			if (!network_info->ies) {
+				kfree(network_info);
 				return -ENOMEM;
+			}
 		}
 		network_info->ies_len = ies_len;
 	}
@@ -373,8 +375,10 @@
 					    AID_LEN);
 
 		connect_resp_info->ies = kmemdup(ies, ies_len, GFP_KERNEL);
-		if (!connect_resp_info->ies)
+		if (!connect_resp_info->ies) {
+			kfree(connect_resp_info);
 			return -ENOMEM;
+		}
 
 		connect_resp_info->ies_len = ies_len;
 	}

diff --git a/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c b/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c
index b76622d..448a5c8 100644
--- a/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c
+++ b/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c

@@ -2170,6 +2170,13 @@
 	int ret;
 	struct wilc_priv *priv = wiphy_priv(wiphy);
 	struct wilc_vif *vif = netdev_priv(priv->dev);
+	struct wilc *wl;
+
+	wl = vif->wilc;
+
+	/* If firmware is not started, return. */
+	if (!wl->initialized)
+		return -EIO;
 
 	ret = wilc_get_tx_power(vif, (u8 *)dbm);
 	if (ret)

diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index a24443b..97e5b69 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c

@@ -779,14 +779,6 @@
 	return 0;
 }
 
-static void lio_target_cleanup_nodeacl( struct se_node_acl *se_nacl)
-{
-	struct iscsi_node_acl *acl = container_of(se_nacl,
-			struct iscsi_node_acl, se_node_acl);
-
-	configfs_remove_default_groups(&acl->se_node_acl.acl_fabric_stat_group);
-}
-
 /* End items for lio_target_acl_cit */
 
 /* Start items for lio_target_tpg_attrib_cit */
@@ -1247,6 +1239,16 @@
 	if (IS_ERR(tiqn))
 		return ERR_CAST(tiqn);
 
+	pr_debug("LIO_Target_ConfigFS: REGISTER -> %s\n", tiqn->tiqn);
+	pr_debug("LIO_Target_ConfigFS: REGISTER -> Allocated Node:"
+			" %s\n", name);
+	return &tiqn->tiqn_wwn;
+}
+
+static void lio_target_add_wwn_groups(struct se_wwn *wwn)
+{
+	struct iscsi_tiqn *tiqn = container_of(wwn, struct iscsi_tiqn, tiqn_wwn);
+
 	config_group_init_type_name(&tiqn->tiqn_stat_grps.iscsi_instance_group,
 			"iscsi_instance", &iscsi_stat_instance_cit);
 	configfs_add_default_group(&tiqn->tiqn_stat_grps.iscsi_instance_group,
@@ -1271,12 +1273,6 @@
 			"iscsi_logout_stats", &iscsi_stat_logout_cit);
 	configfs_add_default_group(&tiqn->tiqn_stat_grps.iscsi_logout_stats_group,
 			&tiqn->tiqn_wwn.fabric_stat_group);
-
-
-	pr_debug("LIO_Target_ConfigFS: REGISTER -> %s\n", tiqn->tiqn);
-	pr_debug("LIO_Target_ConfigFS: REGISTER -> Allocated Node:"
-			" %s\n", name);
-	return &tiqn->tiqn_wwn;
 }
 
 static void lio_target_call_coredeltiqn(
@@ -1284,8 +1280,6 @@
 {
 	struct iscsi_tiqn *tiqn = container_of(wwn, struct iscsi_tiqn, tiqn_wwn);
 
-	configfs_remove_default_groups(&tiqn->tiqn_wwn.fabric_stat_group);
-
 	pr_debug("LIO_Target_ConfigFS: DEREGISTER -> %s\n",
 			tiqn->tiqn);
 	iscsit_del_tiqn(tiqn);
@@ -1660,12 +1654,12 @@
 	.aborted_task			= lio_aborted_task,
 	.fabric_make_wwn		= lio_target_call_coreaddtiqn,
 	.fabric_drop_wwn		= lio_target_call_coredeltiqn,
+	.add_wwn_groups			= lio_target_add_wwn_groups,
 	.fabric_make_tpg		= lio_target_tiqn_addtpg,
 	.fabric_drop_tpg		= lio_target_tiqn_deltpg,
 	.fabric_make_np			= lio_target_call_addnptotpg,
 	.fabric_drop_np			= lio_target_call_delnpfromtpg,
 	.fabric_init_nodeacl		= lio_target_init_nodeacl,
-	.fabric_cleanup_nodeacl		= lio_target_cleanup_nodeacl,
 
 	.tfc_discovery_attrs		= lio_target_discovery_auth_attrs,
 	.tfc_wwn_attrs			= lio_target_wwn_attrs,

diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index 1bd5c72..31a096a 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c

@@ -338,10 +338,8 @@
 {
 	struct se_node_acl *se_nacl = container_of(to_config_group(item),
 			struct se_node_acl, acl_group);
-	struct target_fabric_configfs *tf = se_nacl->se_tpg->se_tpg_wwn->wwn_tf;
 
-	if (tf->tf_ops->fabric_cleanup_nodeacl)
-		tf->tf_ops->fabric_cleanup_nodeacl(se_nacl);
+	configfs_remove_default_groups(&se_nacl->acl_fabric_stat_group);
 	core_tpg_del_initiator_node_acl(se_nacl);
 }
 
@@ -383,14 +381,6 @@
 	if (IS_ERR(se_nacl))
 		return ERR_CAST(se_nacl);
 
-	if (tf->tf_ops->fabric_init_nodeacl) {
-		int ret = tf->tf_ops->fabric_init_nodeacl(se_nacl, name);
-		if (ret) {
-			core_tpg_del_initiator_node_acl(se_nacl);
-			return ERR_PTR(ret);
-		}
-	}
-
 	config_group_init_type_name(&se_nacl->acl_group, name,
 			&tf->tf_tpg_nacl_base_cit);
 
@@ -414,6 +404,15 @@
 	configfs_add_default_group(&se_nacl->acl_fabric_stat_group,
 			&se_nacl->acl_group);
 
+	if (tf->tf_ops->fabric_init_nodeacl) {
+		int ret = tf->tf_ops->fabric_init_nodeacl(se_nacl, name);
+		if (ret) {
+			configfs_remove_default_groups(&se_nacl->acl_fabric_stat_group);
+			core_tpg_del_initiator_node_acl(se_nacl);
+			return ERR_PTR(ret);
+		}
+	}
+
 	return &se_nacl->acl_group;
 }
 
@@ -892,6 +891,7 @@
 				struct se_wwn, wwn_group);
 	struct target_fabric_configfs *tf = wwn->wwn_tf;
 
+	configfs_remove_default_groups(&wwn->fabric_stat_group);
 	tf->tf_ops->fabric_drop_wwn(wwn);
 }
 
@@ -945,6 +945,8 @@
 			&tf->tf_wwn_fabric_stats_cit);
 	configfs_add_default_group(&wwn->fabric_stat_group, &wwn->wwn_group);
 
+	if (tf->tf_ops->add_wwn_groups)
+		tf->tf_ops->add_wwn_groups(wwn);
 	return &wwn->wwn_group;
 }
 

diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 7c92c09..c37eedc 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig

@@ -178,6 +178,7 @@
 config HISI_THERMAL
 	tristate "Hisilicon thermal driver"
 	depends on (ARCH_HISI && CPU_THERMAL && OF) || COMPILE_TEST
+	depends on HAS_IOMEM
 	help
 	  Enable this to plug hisilicon's thermal sensor driver into the Linux
 	  thermal framework. cpufreq is used as the cooling device to throttle
@@ -197,6 +198,7 @@
 config SPEAR_THERMAL
 	tristate "SPEAr thermal sensor driver"
 	depends on PLAT_SPEAR || COMPILE_TEST
+	depends on HAS_IOMEM
 	depends on OF
 	help
 	  Enable this to plug the SPEAr thermal sensor driver into the Linux
@@ -206,6 +208,7 @@
 	tristate "Rockchip thermal driver"
 	depends on ARCH_ROCKCHIP || COMPILE_TEST
 	depends on RESET_CONTROLLER
+	depends on HAS_IOMEM
 	help
 	  Rockchip thermal driver provides support for Temperature sensor
 	  ADC (TS-ADC) found on Rockchip SoCs. It supports one critical
@@ -214,7 +217,7 @@
 
 config RCAR_THERMAL
 	tristate "Renesas R-Car thermal driver"
-	depends on ARCH_SHMOBILE || COMPILE_TEST
+	depends on ARCH_RENESAS || COMPILE_TEST
 	depends on HAS_IOMEM
 	help
 	  Enable this to plug the R-Car thermal sensor driver into the Linux
@@ -223,6 +226,7 @@
 config KIRKWOOD_THERMAL
 	tristate "Temperature sensor on Marvell Kirkwood SoCs"
 	depends on MACH_KIRKWOOD || COMPILE_TEST
+	depends on HAS_IOMEM
 	depends on OF
 	help
 	  Support for the Kirkwood thermal sensor driver into the Linux thermal
@@ -231,6 +235,7 @@
 config DOVE_THERMAL
 	tristate "Temperature sensor on Marvell Dove SoCs"
 	depends on ARCH_DOVE || MACH_DOVE || COMPILE_TEST
+	depends on HAS_IOMEM
 	depends on OF
 	help
 	  Support for the Dove thermal sensor driver in the Linux thermal
@@ -249,6 +254,7 @@
 config ARMADA_THERMAL
 	tristate "Armada 370/XP thermal management"
 	depends on ARCH_MVEBU || COMPILE_TEST
+	depends on HAS_IOMEM
 	depends on OF
 	help
 	  Enable this option if you want to have support for thermal management
@@ -266,7 +272,8 @@
 
 config DB8500_CPUFREQ_COOLING
 	tristate "DB8500 cpufreq cooling"
-	depends on ARCH_U8500
+	depends on ARCH_U8500 || COMPILE_TEST
+	depends on HAS_IOMEM
 	depends on CPU_THERMAL
 	default y
 	help
@@ -365,8 +372,18 @@
 	  Thermal reporting device will provide temperature reading,
 	  programmable trip points and other information.
 
+config MTK_THERMAL
+	tristate "Temperature sensor driver for mediatek SoCs"
+	depends on ARCH_MEDIATEK || COMPILE_TEST
+	depends on HAS_IOMEM
+	default y
+	help
+	  Enable this option if you want to have support for thermal management
+	  controller present in Mediatek SoCs
+
 menu "Texas Instruments thermal drivers"
 depends on ARCH_HAS_BANDGAP || COMPILE_TEST
+depends on HAS_IOMEM
 source "drivers/thermal/ti-soc-thermal/Kconfig"
 endmenu
 

diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index cfae6a6..8e9cbc3 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile

@@ -48,3 +48,4 @@
 obj-$(CONFIG_ST_THERMAL)	+= st/
 obj-$(CONFIG_TEGRA_SOCTHERM)	+= tegra_soctherm.o
 obj-$(CONFIG_HISI_THERMAL)     += hisi_thermal.o
+obj-$(CONFIG_MTK_THERMAL)	+= mtk_thermal.o

diff --git a/drivers/thermal/intel_pch_thermal.c b/drivers/thermal/intel_pch_thermal.c
index 00d81af..6a6ec1c 100644
--- a/drivers/thermal/intel_pch_thermal.c
+++ b/drivers/thermal/intel_pch_thermal.c

@@ -24,6 +24,7 @@
 
 /* Intel PCH thermal Device IDs */
 #define PCH_THERMAL_DID_WPT	0x9CA4 /* Wildcat Point */
+#define PCH_THERMAL_DID_SKL	0x9D31 /* Skylake PCH */
 
 /* Wildcat Point-LP  PCH Thermal registers */
 #define WPT_TEMP	0x0000	/* Temperature */
@@ -201,6 +202,10 @@
 		ptd->ops = &pch_dev_ops_wpt;
 		dev_name = "pch_wildcat_point";
 		break;
+	case PCH_THERMAL_DID_SKL:
+		ptd->ops = &pch_dev_ops_wpt;
+		dev_name = "pch_skylake";
+		break;
 	default:
 		dev_err(&pdev->dev, "unknown pch thermal device\n");
 		return -ENODEV;
@@ -266,6 +271,7 @@
 
 static struct pci_device_id intel_pch_thermal_id[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WPT) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL) },
 	{ 0, },
 };
 MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id);

diff --git a/drivers/thermal/mtk_thermal.c b/drivers/thermal/mtk_thermal.c
new file mode 100644
index 0000000..3d93b1c
--- /dev/null
+++ b/drivers/thermal/mtk_thermal.c

@@ -0,0 +1,625 @@
+/*
+ * Copyright (c) 2015 MediaTek Inc.
+ * Author: Hanyi Wu <hanyi.wu@mediatek.com>
+ *         Sascha Hauer <s.hauer@pengutronix.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/nvmem-consumer.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/thermal.h>
+#include <linux/reset.h>
+#include <linux/types.h>
+#include <linux/nvmem-consumer.h>
+
+/* AUXADC Registers */
+#define AUXADC_CON0_V		0x000
+#define AUXADC_CON1_V		0x004
+#define AUXADC_CON1_SET_V	0x008
+#define AUXADC_CON1_CLR_V	0x00c
+#define AUXADC_CON2_V		0x010
+#define AUXADC_DATA(channel)	(0x14 + (channel) * 4)
+#define AUXADC_MISC_V		0x094
+
+#define AUXADC_CON1_CHANNEL(x)	BIT(x)
+
+#define APMIXED_SYS_TS_CON1	0x604
+
+/* Thermal Controller Registers */
+#define TEMP_MONCTL0		0x000
+#define TEMP_MONCTL1		0x004
+#define TEMP_MONCTL2		0x008
+#define TEMP_MONIDET0		0x014
+#define TEMP_MONIDET1		0x018
+#define TEMP_MSRCTL0		0x038
+#define TEMP_AHBPOLL		0x040
+#define TEMP_AHBTO		0x044
+#define TEMP_ADCPNP0		0x048
+#define TEMP_ADCPNP1		0x04c
+#define TEMP_ADCPNP2		0x050
+#define TEMP_ADCPNP3		0x0b4
+
+#define TEMP_ADCMUX		0x054
+#define TEMP_ADCEN		0x060
+#define TEMP_PNPMUXADDR		0x064
+#define TEMP_ADCMUXADDR		0x068
+#define TEMP_ADCENADDR		0x074
+#define TEMP_ADCVALIDADDR	0x078
+#define TEMP_ADCVOLTADDR	0x07c
+#define TEMP_RDCTRL		0x080
+#define TEMP_ADCVALIDMASK	0x084
+#define TEMP_ADCVOLTAGESHIFT	0x088
+#define TEMP_ADCWRITECTRL	0x08c
+#define TEMP_MSR0		0x090
+#define TEMP_MSR1		0x094
+#define TEMP_MSR2		0x098
+#define TEMP_MSR3		0x0B8
+
+#define TEMP_SPARE0		0x0f0
+
+#define PTPCORESEL		0x400
+
+#define TEMP_MONCTL1_PERIOD_UNIT(x)	((x) & 0x3ff)
+
+#define TEMP_MONCTL2_FILTER_INTERVAL(x)	(((x) & 0x3ff) << 16)
+#define TEMP_MONCTL2_SENSOR_INTERVAL(x)	((x) & 0x3ff)
+
+#define TEMP_AHBPOLL_ADC_POLL_INTERVAL(x)	(x)
+
+#define TEMP_ADCWRITECTRL_ADC_PNP_WRITE		BIT(0)
+#define TEMP_ADCWRITECTRL_ADC_MUX_WRITE		BIT(1)
+
+#define TEMP_ADCVALIDMASK_VALID_HIGH		BIT(5)
+#define TEMP_ADCVALIDMASK_VALID_POS(bit)	(bit)
+
+#define MT8173_TS1	0
+#define MT8173_TS2	1
+#define MT8173_TS3	2
+#define MT8173_TS4	3
+#define MT8173_TSABB	4
+
+/* AUXADC channel 11 is used for the temperature sensors */
+#define MT8173_TEMP_AUXADC_CHANNEL	11
+
+/* The total number of temperature sensors in the MT8173 */
+#define MT8173_NUM_SENSORS		5
+
+/* The number of banks in the MT8173 */
+#define MT8173_NUM_ZONES		4
+
+/* The number of sensing points per bank */
+#define MT8173_NUM_SENSORS_PER_ZONE	4
+
+/* Layout of the fuses providing the calibration data */
+#define MT8173_CALIB_BUF0_VALID		BIT(0)
+#define MT8173_CALIB_BUF1_ADC_GE(x)	(((x) >> 22) & 0x3ff)
+#define MT8173_CALIB_BUF0_VTS_TS1(x)	(((x) >> 17) & 0x1ff)
+#define MT8173_CALIB_BUF0_VTS_TS2(x)	(((x) >> 8) & 0x1ff)
+#define MT8173_CALIB_BUF1_VTS_TS3(x)	(((x) >> 0) & 0x1ff)
+#define MT8173_CALIB_BUF2_VTS_TS4(x)	(((x) >> 23) & 0x1ff)
+#define MT8173_CALIB_BUF2_VTS_TSABB(x)	(((x) >> 14) & 0x1ff)
+#define MT8173_CALIB_BUF0_DEGC_CALI(x)	(((x) >> 1) & 0x3f)
+#define MT8173_CALIB_BUF0_O_SLOPE(x)	(((x) >> 26) & 0x3f)
+
+#define THERMAL_NAME    "mtk-thermal"
+
+struct mtk_thermal;
+
+struct mtk_thermal_bank {
+	struct mtk_thermal *mt;
+	int id;
+};
+
+struct mtk_thermal {
+	struct device *dev;
+	void __iomem *thermal_base;
+
+	struct clk *clk_peri_therm;
+	struct clk *clk_auxadc;
+
+	struct mtk_thermal_bank banks[MT8173_NUM_ZONES];
+
+	/* lock: for getting and putting banks */
+	struct mutex lock;
+
+	/* Calibration values */
+	s32 adc_ge;
+	s32 degc_cali;
+	s32 o_slope;
+	s32 vts[MT8173_NUM_SENSORS];
+
+	struct thermal_zone_device *tzd;
+};
+
+struct mtk_thermal_bank_cfg {
+	unsigned int num_sensors;
+	unsigned int sensors[MT8173_NUM_SENSORS_PER_ZONE];
+};
+
+static const int sensor_mux_values[MT8173_NUM_SENSORS] = { 0, 1, 2, 3, 16 };
+
+/*
+ * The MT8173 thermal controller has four banks. Each bank can read up to
+ * four temperature sensors simultaneously. The MT8173 has a total of 5
+ * temperature sensors. We use each bank to measure a certain area of the
+ * SoC. Since TS2 is located centrally in the SoC it is influenced by multiple
+ * areas, hence is used in different banks.
+ *
+ * The thermal core only gets the maximum temperature of all banks, so
+ * the bank concept wouldn't be necessary here. However, the SVS (Smart
+ * Voltage Scaling) unit makes its decisions based on the same bank
+ * data, and this indeed needs the temperatures of the individual banks
+ * for making better decisions.
+ */
+static const struct mtk_thermal_bank_cfg bank_data[] = {
+	{
+		.num_sensors = 2,
+		.sensors = { MT8173_TS2, MT8173_TS3 },
+	}, {
+		.num_sensors = 2,
+		.sensors = { MT8173_TS2, MT8173_TS4 },
+	}, {
+		.num_sensors = 3,
+		.sensors = { MT8173_TS1, MT8173_TS2, MT8173_TSABB },
+	}, {
+		.num_sensors = 1,
+		.sensors = { MT8173_TS2 },
+	},
+};
+
+struct mtk_thermal_sense_point {
+	int msr;
+	int adcpnp;
+};
+
+static const struct mtk_thermal_sense_point
+		sensing_points[MT8173_NUM_SENSORS_PER_ZONE] = {
+	{
+		.msr = TEMP_MSR0,
+		.adcpnp = TEMP_ADCPNP0,
+	}, {
+		.msr = TEMP_MSR1,
+		.adcpnp = TEMP_ADCPNP1,
+	}, {
+		.msr = TEMP_MSR2,
+		.adcpnp = TEMP_ADCPNP2,
+	}, {
+		.msr = TEMP_MSR3,
+		.adcpnp = TEMP_ADCPNP3,
+	},
+};
+
+/**
+ * raw_to_mcelsius - convert a raw ADC value to mcelsius
+ * @mt:		The thermal controller
+ * @raw:	raw ADC value
+ *
+ * This converts the raw ADC value to mcelsius using the SoC specific
+ * calibration constants
+ */
+static int raw_to_mcelsius(struct mtk_thermal *mt, int sensno, s32 raw)
+{
+	s32 tmp;
+
+	raw &= 0xfff;
+
+	tmp = 203450520 << 3;
+	tmp /= 165 + mt->o_slope;
+	tmp /= 10000 + mt->adc_ge;
+	tmp *= raw - mt->vts[sensno] - 3350;
+	tmp >>= 3;
+
+	return mt->degc_cali * 500 - tmp;
+}
+
+/**
+ * mtk_thermal_get_bank - get bank
+ * @bank:	The bank
+ *
+ * The bank registers are banked, we have to select a bank in the
+ * PTPCORESEL register to access it.
+ */
+static void mtk_thermal_get_bank(struct mtk_thermal_bank *bank)
+{
+	struct mtk_thermal *mt = bank->mt;
+	u32 val;
+
+	mutex_lock(&mt->lock);
+
+	val = readl(mt->thermal_base + PTPCORESEL);
+	val &= ~0xf;
+	val |= bank->id;
+	writel(val, mt->thermal_base + PTPCORESEL);
+}
+
+/**
+ * mtk_thermal_put_bank - release bank
+ * @bank:	The bank
+ *
+ * release a bank previously taken with mtk_thermal_get_bank,
+ */
+static void mtk_thermal_put_bank(struct mtk_thermal_bank *bank)
+{
+	struct mtk_thermal *mt = bank->mt;
+
+	mutex_unlock(&mt->lock);
+}
+
+/**
+ * mtk_thermal_bank_temperature - get the temperature of a bank
+ * @bank:	The bank
+ *
+ * The temperature of a bank is considered the maximum temperature of
+ * the sensors associated to the bank.
+ */
+static int mtk_thermal_bank_temperature(struct mtk_thermal_bank *bank)
+{
+	struct mtk_thermal *mt = bank->mt;
+	int i, temp = INT_MIN, max = INT_MIN;
+	u32 raw;
+
+	for (i = 0; i < bank_data[bank->id].num_sensors; i++) {
+		raw = readl(mt->thermal_base + sensing_points[i].msr);
+
+		temp = raw_to_mcelsius(mt, bank_data[bank->id].sensors[i], raw);
+
+		/*
+		 * The first read of a sensor often contains very high bogus
+		 * temperature value. Filter these out so that the system does
+		 * not immediately shut down.
+		 */
+		if (temp > 200000)
+			temp = 0;
+
+		if (temp > max)
+			max = temp;
+	}
+
+	return max;
+}
+
+static int mtk_read_temp(void *data, int *temperature)
+{
+	struct mtk_thermal *mt = data;
+	int i;
+	int tempmax = INT_MIN;
+
+	for (i = 0; i < MT8173_NUM_ZONES; i++) {
+		struct mtk_thermal_bank *bank = &mt->banks[i];
+
+		mtk_thermal_get_bank(bank);
+
+		tempmax = max(tempmax, mtk_thermal_bank_temperature(bank));
+
+		mtk_thermal_put_bank(bank);
+	}
+
+	*temperature = tempmax;
+
+	return 0;
+}
+
+static const struct thermal_zone_of_device_ops mtk_thermal_ops = {
+	.get_temp = mtk_read_temp,
+};
+
+static void mtk_thermal_init_bank(struct mtk_thermal *mt, int num,
+				  u32 apmixed_phys_base, u32 auxadc_phys_base)
+{
+	struct mtk_thermal_bank *bank = &mt->banks[num];
+	const struct mtk_thermal_bank_cfg *cfg = &bank_data[num];
+	int i;
+
+	bank->id = num;
+	bank->mt = mt;
+
+	mtk_thermal_get_bank(bank);
+
+	/* bus clock 66M counting unit is 12 * 15.15ns * 256 = 46.540us */
+	writel(TEMP_MONCTL1_PERIOD_UNIT(12), mt->thermal_base + TEMP_MONCTL1);
+
+	/*
+	 * filt interval is 1 * 46.540us = 46.54us,
+	 * sen interval is 429 * 46.540us = 19.96ms
+	 */
+	writel(TEMP_MONCTL2_FILTER_INTERVAL(1) |
+			TEMP_MONCTL2_SENSOR_INTERVAL(429),
+			mt->thermal_base + TEMP_MONCTL2);
+
+	/* poll is set to 10u */
+	writel(TEMP_AHBPOLL_ADC_POLL_INTERVAL(768),
+	       mt->thermal_base + TEMP_AHBPOLL);
+
+	/* temperature sampling control, 1 sample */
+	writel(0x0, mt->thermal_base + TEMP_MSRCTL0);
+
+	/* exceed this polling time, IRQ would be inserted */
+	writel(0xffffffff, mt->thermal_base + TEMP_AHBTO);
+
+	/* number of interrupts per event, 1 is enough */
+	writel(0x0, mt->thermal_base + TEMP_MONIDET0);
+	writel(0x0, mt->thermal_base + TEMP_MONIDET1);
+
+	/*
+	 * The MT8173 thermal controller does not have its own ADC. Instead it
+	 * uses AHB bus accesses to control the AUXADC. To do this the thermal
+	 * controller has to be programmed with the physical addresses of the
+	 * AUXADC registers and with the various bit positions in the AUXADC.
+	 * Also the thermal controller controls a mux in the APMIXEDSYS register
+	 * space.
+	 */
+
+	/*
+	 * this value will be stored to TEMP_PNPMUXADDR (TEMP_SPARE0)
+	 * automatically by hw
+	 */
+	writel(BIT(MT8173_TEMP_AUXADC_CHANNEL), mt->thermal_base + TEMP_ADCMUX);
+
+	/* AHB address for auxadc mux selection */
+	writel(auxadc_phys_base + AUXADC_CON1_CLR_V,
+	       mt->thermal_base + TEMP_ADCMUXADDR);
+
+	/* AHB address for pnp sensor mux selection */
+	writel(apmixed_phys_base + APMIXED_SYS_TS_CON1,
+	       mt->thermal_base + TEMP_PNPMUXADDR);
+
+	/* AHB value for auxadc enable */
+	writel(BIT(MT8173_TEMP_AUXADC_CHANNEL), mt->thermal_base + TEMP_ADCEN);
+
+	/* AHB address for auxadc enable (channel 0 immediate mode selected) */
+	writel(auxadc_phys_base + AUXADC_CON1_SET_V,
+	       mt->thermal_base + TEMP_ADCENADDR);
+
+	/* AHB address for auxadc valid bit */
+	writel(auxadc_phys_base + AUXADC_DATA(MT8173_TEMP_AUXADC_CHANNEL),
+	       mt->thermal_base + TEMP_ADCVALIDADDR);
+
+	/* AHB address for auxadc voltage output */
+	writel(auxadc_phys_base + AUXADC_DATA(MT8173_TEMP_AUXADC_CHANNEL),
+	       mt->thermal_base + TEMP_ADCVOLTADDR);
+
+	/* read valid & voltage are at the same register */
+	writel(0x0, mt->thermal_base + TEMP_RDCTRL);
+
+	/* indicate where the valid bit is */
+	writel(TEMP_ADCVALIDMASK_VALID_HIGH | TEMP_ADCVALIDMASK_VALID_POS(12),
+	       mt->thermal_base + TEMP_ADCVALIDMASK);
+
+	/* no shift */
+	writel(0x0, mt->thermal_base + TEMP_ADCVOLTAGESHIFT);
+
+	/* enable auxadc mux write transaction */
+	writel(TEMP_ADCWRITECTRL_ADC_MUX_WRITE,
+	       mt->thermal_base + TEMP_ADCWRITECTRL);
+
+	for (i = 0; i < cfg->num_sensors; i++)
+		writel(sensor_mux_values[cfg->sensors[i]],
+		       mt->thermal_base + sensing_points[i].adcpnp);
+
+	writel((1 << cfg->num_sensors) - 1, mt->thermal_base + TEMP_MONCTL0);
+
+	writel(TEMP_ADCWRITECTRL_ADC_PNP_WRITE |
+	       TEMP_ADCWRITECTRL_ADC_MUX_WRITE,
+	       mt->thermal_base + TEMP_ADCWRITECTRL);
+
+	mtk_thermal_put_bank(bank);
+}
+
+static u64 of_get_phys_base(struct device_node *np)
+{
+	u64 size64;
+	const __be32 *regaddr_p;
+
+	regaddr_p = of_get_address(np, 0, &size64, NULL);
+	if (!regaddr_p)
+		return OF_BAD_ADDR;
+
+	return of_translate_address(np, regaddr_p);
+}
+
+static int mtk_thermal_get_calibration_data(struct device *dev,
+					    struct mtk_thermal *mt)
+{
+	struct nvmem_cell *cell;
+	u32 *buf;
+	size_t len;
+	int i, ret = 0;
+
+	/* Start with default values */
+	mt->adc_ge = 512;
+	for (i = 0; i < MT8173_NUM_SENSORS; i++)
+		mt->vts[i] = 260;
+	mt->degc_cali = 40;
+	mt->o_slope = 0;
+
+	cell = nvmem_cell_get(dev, "calibration-data");
+	if (IS_ERR(cell)) {
+		if (PTR_ERR(cell) == -EPROBE_DEFER)
+			return PTR_ERR(cell);
+		return 0;
+	}
+
+	buf = (u32 *)nvmem_cell_read(cell, &len);
+
+	nvmem_cell_put(cell);
+
+	if (IS_ERR(buf))
+		return PTR_ERR(buf);
+
+	if (len < 3 * sizeof(u32)) {
+		dev_warn(dev, "invalid calibration data\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (buf[0] & MT8173_CALIB_BUF0_VALID) {
+		mt->adc_ge = MT8173_CALIB_BUF1_ADC_GE(buf[1]);
+		mt->vts[MT8173_TS1] = MT8173_CALIB_BUF0_VTS_TS1(buf[0]);
+		mt->vts[MT8173_TS2] = MT8173_CALIB_BUF0_VTS_TS2(buf[0]);
+		mt->vts[MT8173_TS3] = MT8173_CALIB_BUF1_VTS_TS3(buf[1]);
+		mt->vts[MT8173_TS4] = MT8173_CALIB_BUF2_VTS_TS4(buf[2]);
+		mt->vts[MT8173_TSABB] = MT8173_CALIB_BUF2_VTS_TSABB(buf[2]);
+		mt->degc_cali = MT8173_CALIB_BUF0_DEGC_CALI(buf[0]);
+		mt->o_slope = MT8173_CALIB_BUF0_O_SLOPE(buf[0]);
+	} else {
+		dev_info(dev, "Device not calibrated, using default calibration values\n");
+	}
+
+out:
+	kfree(buf);
+
+	return ret;
+}
+
+static int mtk_thermal_probe(struct platform_device *pdev)
+{
+	int ret, i;
+	struct device_node *auxadc, *apmixedsys, *np = pdev->dev.of_node;
+	struct mtk_thermal *mt;
+	struct resource *res;
+	u64 auxadc_phys_base, apmixed_phys_base;
+
+	mt = devm_kzalloc(&pdev->dev, sizeof(*mt), GFP_KERNEL);
+	if (!mt)
+		return -ENOMEM;
+
+	mt->clk_peri_therm = devm_clk_get(&pdev->dev, "therm");
+	if (IS_ERR(mt->clk_peri_therm))
+		return PTR_ERR(mt->clk_peri_therm);
+
+	mt->clk_auxadc = devm_clk_get(&pdev->dev, "auxadc");
+	if (IS_ERR(mt->clk_auxadc))
+		return PTR_ERR(mt->clk_auxadc);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	mt->thermal_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(mt->thermal_base))
+		return PTR_ERR(mt->thermal_base);
+
+	ret = mtk_thermal_get_calibration_data(&pdev->dev, mt);
+	if (ret)
+		return ret;
+
+	mutex_init(&mt->lock);
+
+	mt->dev = &pdev->dev;
+
+	auxadc = of_parse_phandle(np, "mediatek,auxadc", 0);
+	if (!auxadc) {
+		dev_err(&pdev->dev, "missing auxadc node\n");
+		return -ENODEV;
+	}
+
+	auxadc_phys_base = of_get_phys_base(auxadc);
+
+	of_node_put(auxadc);
+
+	if (auxadc_phys_base == OF_BAD_ADDR) {
+		dev_err(&pdev->dev, "Can't get auxadc phys address\n");
+		return -EINVAL;
+	}
+
+	apmixedsys = of_parse_phandle(np, "mediatek,apmixedsys", 0);
+	if (!apmixedsys) {
+		dev_err(&pdev->dev, "missing apmixedsys node\n");
+		return -ENODEV;
+	}
+
+	apmixed_phys_base = of_get_phys_base(apmixedsys);
+
+	of_node_put(apmixedsys);
+
+	if (apmixed_phys_base == OF_BAD_ADDR) {
+		dev_err(&pdev->dev, "Can't get auxadc phys address\n");
+		return -EINVAL;
+	}
+
+	ret = clk_prepare_enable(mt->clk_auxadc);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't enable auxadc clk: %d\n", ret);
+		return ret;
+	}
+
+	ret = device_reset(&pdev->dev);
+	if (ret)
+		goto err_disable_clk_auxadc;
+
+	ret = clk_prepare_enable(mt->clk_peri_therm);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't enable peri clk: %d\n", ret);
+		goto err_disable_clk_auxadc;
+	}
+
+	for (i = 0; i < MT8173_NUM_ZONES; i++)
+		mtk_thermal_init_bank(mt, i, apmixed_phys_base,
+				      auxadc_phys_base);
+
+	platform_set_drvdata(pdev, mt);
+
+	mt->tzd = thermal_zone_of_sensor_register(&pdev->dev, 0, mt,
+				&mtk_thermal_ops);
+	if (IS_ERR(mt->tzd))
+		goto err_register;
+
+	return 0;
+
+err_register:
+	clk_disable_unprepare(mt->clk_peri_therm);
+
+err_disable_clk_auxadc:
+	clk_disable_unprepare(mt->clk_auxadc);
+
+	return ret;
+}
+
+static int mtk_thermal_remove(struct platform_device *pdev)
+{
+	struct mtk_thermal *mt = platform_get_drvdata(pdev);
+
+	thermal_zone_of_sensor_unregister(&pdev->dev, mt->tzd);
+
+	clk_disable_unprepare(mt->clk_peri_therm);
+	clk_disable_unprepare(mt->clk_auxadc);
+
+	return 0;
+}
+
+static const struct of_device_id mtk_thermal_of_match[] = {
+	{
+		.compatible = "mediatek,mt8173-thermal",
+	}, {
+	},
+};
+
+static struct platform_driver mtk_thermal_driver = {
+	.probe = mtk_thermal_probe,
+	.remove = mtk_thermal_remove,
+	.driver = {
+		.name = THERMAL_NAME,
+		.of_match_table = mtk_thermal_of_match,
+	},
+};
+
+module_platform_driver(mtk_thermal_driver);
+
+MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de");
+MODULE_AUTHOR("Hanyi Wu <hanyi.wu@mediatek.com>");
+MODULE_DESCRIPTION("Mediatek thermal driver");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index 9043f8f..49ac23d 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c

@@ -555,6 +555,87 @@
 }
 EXPORT_SYMBOL_GPL(thermal_zone_of_sensor_unregister);
 
+static void devm_thermal_zone_of_sensor_release(struct device *dev, void *res)
+{
+	thermal_zone_of_sensor_unregister(dev,
+					  *(struct thermal_zone_device **)res);
+}
+
+static int devm_thermal_zone_of_sensor_match(struct device *dev, void *res,
+					     void *data)
+{
+	struct thermal_zone_device **r = res;
+
+	if (WARN_ON(!r || !*r))
+		return 0;
+
+	return *r == data;
+}
+
+/**
+ * devm_thermal_zone_of_sensor_register - Resource managed version of
+ *				thermal_zone_of_sensor_register()
+ * @dev: a valid struct device pointer of a sensor device. Must contain
+ *       a valid .of_node, for the sensor node.
+ * @sensor_id: a sensor identifier, in case the sensor IP has more
+ *	       than one sensors
+ * @data: a private pointer (owned by the caller) that will be passed
+ *	  back, when a temperature reading is needed.
+ * @ops: struct thermal_zone_of_device_ops *. Must contain at least .get_temp.
+ *
+ * Refer thermal_zone_of_sensor_register() for more details.
+ *
+ * Return: On success returns a valid struct thermal_zone_device,
+ * otherwise, it returns a corresponding ERR_PTR(). Caller must
+ * check the return value with help of IS_ERR() helper.
+ * Registered hermal_zone_device device will automatically be
+ * released when device is unbounded.
+ */
+struct thermal_zone_device *devm_thermal_zone_of_sensor_register(
+	struct device *dev, int sensor_id,
+	void *data, const struct thermal_zone_of_device_ops *ops)
+{
+	struct thermal_zone_device **ptr, *tzd;
+
+	ptr = devres_alloc(devm_thermal_zone_of_sensor_release, sizeof(*ptr),
+			   GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	tzd = thermal_zone_of_sensor_register(dev, sensor_id, data, ops);
+	if (IS_ERR(tzd)) {
+		devres_free(ptr);
+		return tzd;
+	}
+
+	*ptr = tzd;
+	devres_add(dev, ptr);
+
+	return tzd;
+}
+EXPORT_SYMBOL_GPL(devm_thermal_zone_of_sensor_register);
+
+/**
+ * devm_thermal_zone_of_sensor_unregister - Resource managed version of
+ *				thermal_zone_of_sensor_unregister().
+ * @dev: Device for which which resource was allocated.
+ * @tzd: a pointer to struct thermal_zone_device where the sensor is registered.
+ *
+ * This function removes the sensor callbacks and private data from the
+ * thermal zone device registered with devm_thermal_zone_of_sensor_register()
+ * API. It will also silent the zone by remove the .get_temp() and .get_trend()
+ * thermal zone device callbacks.
+ * Normally this function will not need to be called and the resource
+ * management code will ensure that the resource is freed.
+ */
+void devm_thermal_zone_of_sensor_unregister(struct device *dev,
+					    struct thermal_zone_device *tzd)
+{
+	WARN_ON(devres_release(dev, devm_thermal_zone_of_sensor_release,
+			       devm_thermal_zone_of_sensor_match, tzd));
+}
+EXPORT_SYMBOL_GPL(devm_thermal_zone_of_sensor_unregister);
+
 /***   functions parsing device tree nodes   ***/
 
 /**

diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index 0e735ac..82daba0 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c

@@ -430,8 +430,7 @@
 	struct rcar_thermal_priv *priv;
 	struct device *dev = &pdev->dev;
 	struct resource *res, *irq;
-	const struct of_device_id *of_id = of_match_device(rcar_thermal_dt_ids, dev);
-	unsigned long of_data = (unsigned long)of_id->data;
+	unsigned long of_data = (unsigned long)of_device_get_match_data(dev);
 	int mres = 0;
 	int i;
 	int ret = -ENODEV;

diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c
index b58e3fb..233a564 100644
--- a/drivers/thermal/rockchip_thermal.c
+++ b/drivers/thermal/rockchip_thermal.c

@@ -58,8 +58,8 @@
 
 /**
  * The conversion table has the adc value and temperature.
- * ADC_DECREMENT: the adc value is of diminishing.(e.g. v2_code_table)
- * ADC_INCREMENT: the adc value is incremental.(e.g. v3_code_table)
+ * ADC_DECREMENT: the adc value is of diminishing.(e.g. rk3288_code_table)
+ * ADC_INCREMENT: the adc value is incremental.(e.g. rk3368_code_table)
  */
 enum adc_sort_mode {
 	ADC_DECREMENT = 0,
@@ -135,7 +135,13 @@
 	enum tshut_polarity tshut_polarity;
 };
 
-/* TSADC Sensor info define: */
+/**
+ * TSADC Sensor Register description:
+ *
+ * TSADCV2_* are used for RK3288 SoCs, the other chips can reuse it.
+ * TSADCV3_* are used for newer SoCs than RK3288. (e.g: RK3228, RK3399)
+ *
+ */
 #define TSADCV2_AUTO_CON			0x04
 #define TSADCV2_INT_EN				0x08
 #define TSADCV2_INT_PD				0x0c
@@ -149,13 +155,20 @@
 #define TSADCV2_AUTO_EN				BIT(0)
 #define TSADCV2_AUTO_SRC_EN(chn)		BIT(4 + (chn))
 #define TSADCV2_AUTO_TSHUT_POLARITY_HIGH	BIT(8)
+/**
+ * TSADCV1_AUTO_Q_SEL_EN:
+ * whether select (1024 - tsadc_q) as output
+ * 1'b0:use tsadc_q as output(temperature-code is rising sequence)
+ * 1'b1:use(1024 - tsadc_q) as output (temperature-code is falling sequence)
+ */
+#define TSADCV3_AUTO_Q_SEL_EN			BIT(1)
 
 #define TSADCV2_INT_SRC_EN(chn)			BIT(chn)
 #define TSADCV2_SHUT_2GPIO_SRC_EN(chn)		BIT(4 + (chn))
 #define TSADCV2_SHUT_2CRU_SRC_EN(chn)		BIT(8 + (chn))
 
-#define TSADCV1_INT_PD_CLEAR_MASK		~BIT(16)
 #define TSADCV2_INT_PD_CLEAR_MASK		~BIT(8)
+#define TSADCV3_INT_PD_CLEAR_MASK		~BIT(16)
 
 #define TSADCV2_DATA_MASK			0xfff
 #define TSADCV3_DATA_MASK			0x3ff
@@ -177,45 +190,46 @@
  * linearly interpolated.
  * Code to Temperature mapping should be updated based on sillcon results.
  */
-static const struct tsadc_table v1_code_table[] = {
-	{TSADCV3_DATA_MASK, -40000},
-	{436, -40000},
-	{431, -35000},
-	{426, -30000},
-	{421, -25000},
-	{416, -20000},
-	{411, -15000},
-	{406, -10000},
-	{401, -5000},
-	{395, 0},
-	{390, 5000},
-	{385, 10000},
-	{380, 15000},
-	{375, 20000},
-	{370, 25000},
-	{364, 30000},
-	{359, 35000},
-	{354, 40000},
-	{349, 45000},
-	{343, 50000},
-	{338, 55000},
-	{333, 60000},
-	{328, 65000},
-	{322, 70000},
-	{317, 75000},
-	{312, 80000},
-	{307, 85000},
-	{301, 90000},
-	{296, 95000},
-	{291, 100000},
-	{286, 105000},
-	{280, 110000},
-	{275, 115000},
-	{270, 120000},
-	{264, 125000},
+static const struct tsadc_table rk3228_code_table[] = {
+	{0, -40000},
+	{588, -40000},
+	{593, -35000},
+	{598, -30000},
+	{603, -25000},
+	{608, -20000},
+	{613, -15000},
+	{618, -10000},
+	{623, -5000},
+	{629, 0},
+	{634, 5000},
+	{639, 10000},
+	{644, 15000},
+	{649, 20000},
+	{654, 25000},
+	{660, 30000},
+	{665, 35000},
+	{670, 40000},
+	{675, 45000},
+	{681, 50000},
+	{686, 55000},
+	{691, 60000},
+	{696, 65000},
+	{702, 70000},
+	{707, 75000},
+	{712, 80000},
+	{717, 85000},
+	{723, 90000},
+	{728, 95000},
+	{733, 100000},
+	{738, 105000},
+	{744, 110000},
+	{749, 115000},
+	{754, 120000},
+	{760, 125000},
+	{TSADCV2_DATA_MASK, 125000},
 };
 
-static const struct tsadc_table v2_code_table[] = {
+static const struct tsadc_table rk3288_code_table[] = {
 	{TSADCV2_DATA_MASK, -40000},
 	{3800, -40000},
 	{3792, -35000},
@@ -253,7 +267,7 @@
 	{3421, 125000},
 };
 
-static const struct tsadc_table v3_code_table[] = {
+static const struct tsadc_table rk3368_code_table[] = {
 	{0, -40000},
 	{106, -40000},
 	{108, -35000},
@@ -292,42 +306,43 @@
 	{TSADCV3_DATA_MASK, 125000},
 };
 
-static const struct tsadc_table v4_code_table[] = {
-	{TSADCV3_DATA_MASK, -40000},
-	{431, -40000},
-	{426, -35000},
-	{421, -30000},
-	{415, -25000},
-	{410, -20000},
-	{405, -15000},
-	{399, -10000},
-	{394, -5000},
-	{389, 0},
-	{383, 5000},
-	{378, 10000},
-	{373, 15000},
-	{367, 20000},
-	{362, 25000},
-	{357, 30000},
-	{351, 35000},
-	{346, 40000},
-	{340, 45000},
-	{335, 50000},
-	{330, 55000},
-	{324, 60000},
-	{319, 65000},
-	{313, 70000},
-	{308, 75000},
-	{302, 80000},
-	{297, 85000},
-	{291, 90000},
-	{286, 95000},
-	{281, 100000},
-	{275, 105000},
-	{270, 110000},
-	{264, 115000},
-	{259, 120000},
-	{253, 125000},
+static const struct tsadc_table rk3399_code_table[] = {
+	{0, -40000},
+	{593, -40000},
+	{598, -35000},
+	{603, -30000},
+	{609, -25000},
+	{614, -20000},
+	{619, -15000},
+	{625, -10000},
+	{630, -5000},
+	{635, 0},
+	{641, 5000},
+	{646, 10000},
+	{651, 15000},
+	{657, 20000},
+	{662, 25000},
+	{667, 30000},
+	{673, 35000},
+	{678, 40000},
+	{684, 45000},
+	{689, 50000},
+	{694, 55000},
+	{700, 60000},
+	{705, 65000},
+	{711, 70000},
+	{716, 75000},
+	{722, 80000},
+	{727, 85000},
+	{733, 90000},
+	{738, 95000},
+	{743, 100000},
+	{749, 105000},
+	{754, 110000},
+	{760, 115000},
+	{765, 120000},
+	{771, 125000},
+	{TSADCV3_DATA_MASK, 125000},
 };
 
 static u32 rk_tsadcv2_temp_to_code(struct chip_tsadc_table table,
@@ -411,7 +426,7 @@
 	 * temperature between 2 table entries is linear and interpolate
 	 * to produce less granular result.
 	 */
-	num = table.id[mid].temp - v2_code_table[mid - 1].temp;
+	num = table.id[mid].temp - table.id[mid - 1].temp;
 	num *= abs(table.id[mid - 1].code - code);
 	denom = abs(table.id[mid - 1].code - table.id[mid].code);
 	*temp = table.id[mid - 1].temp + (num / denom);
@@ -453,14 +468,6 @@
 		       regs + TSADCV2_HIGHT_TSHUT_DEBOUNCE);
 }
 
-static void rk_tsadcv1_irq_ack(void __iomem *regs)
-{
-	u32 val;
-
-	val = readl_relaxed(regs + TSADCV2_INT_PD);
-	writel_relaxed(val & TSADCV1_INT_PD_CLEAR_MASK, regs + TSADCV2_INT_PD);
-}
-
 static void rk_tsadcv2_irq_ack(void __iomem *regs)
 {
 	u32 val;
@@ -469,6 +476,14 @@
 	writel_relaxed(val & TSADCV2_INT_PD_CLEAR_MASK, regs + TSADCV2_INT_PD);
 }
 
+static void rk_tsadcv3_irq_ack(void __iomem *regs)
+{
+	u32 val;
+
+	val = readl_relaxed(regs + TSADCV2_INT_PD);
+	writel_relaxed(val & TSADCV3_INT_PD_CLEAR_MASK, regs + TSADCV2_INT_PD);
+}
+
 static void rk_tsadcv2_control(void __iomem *regs, bool enable)
 {
 	u32 val;
@@ -482,6 +497,25 @@
 	writel_relaxed(val, regs + TSADCV2_AUTO_CON);
 }
 
+/**
+ * @rk_tsadcv3_control:
+ * TSADC controller works at auto mode, and some SoCs need set the tsadc_q_sel
+ * bit on TSADCV2_AUTO_CON[1]. The (1024 - tsadc_q) as output adc value if
+ * setting this bit to enable.
+ */
+static void rk_tsadcv3_control(void __iomem *regs, bool enable)
+{
+	u32 val;
+
+	val = readl_relaxed(regs + TSADCV2_AUTO_CON);
+	if (enable)
+		val |= TSADCV2_AUTO_EN | TSADCV3_AUTO_Q_SEL_EN;
+	else
+		val &= ~TSADCV2_AUTO_EN;
+
+	writel_relaxed(val, regs + TSADCV2_AUTO_CON);
+}
+
 static int rk_tsadcv2_get_temp(struct chip_tsadc_table table,
 			       int chn, void __iomem *regs, int *temp)
 {
@@ -531,17 +565,17 @@
 	.tshut_temp = 95000,
 
 	.initialize = rk_tsadcv2_initialize,
-	.irq_ack = rk_tsadcv1_irq_ack,
-	.control = rk_tsadcv2_control,
+	.irq_ack = rk_tsadcv3_irq_ack,
+	.control = rk_tsadcv3_control,
 	.get_temp = rk_tsadcv2_get_temp,
 	.set_tshut_temp = rk_tsadcv2_tshut_temp,
 	.set_tshut_mode = rk_tsadcv2_tshut_mode,
 
 	.table = {
-		.id = v1_code_table,
-		.length = ARRAY_SIZE(v1_code_table),
+		.id = rk3228_code_table,
+		.length = ARRAY_SIZE(rk3228_code_table),
 		.data_mask = TSADCV3_DATA_MASK,
-		.mode = ADC_DECREMENT,
+		.mode = ADC_INCREMENT,
 	},
 };
 
@@ -562,8 +596,8 @@
 	.set_tshut_mode = rk_tsadcv2_tshut_mode,
 
 	.table = {
-		.id = v2_code_table,
-		.length = ARRAY_SIZE(v2_code_table),
+		.id = rk3288_code_table,
+		.length = ARRAY_SIZE(rk3288_code_table),
 		.data_mask = TSADCV2_DATA_MASK,
 		.mode = ADC_DECREMENT,
 	},
@@ -586,8 +620,8 @@
 	.set_tshut_mode = rk_tsadcv2_tshut_mode,
 
 	.table = {
-		.id = v3_code_table,
-		.length = ARRAY_SIZE(v3_code_table),
+		.id = rk3368_code_table,
+		.length = ARRAY_SIZE(rk3368_code_table),
 		.data_mask = TSADCV3_DATA_MASK,
 		.mode = ADC_INCREMENT,
 	},
@@ -603,17 +637,17 @@
 	.tshut_temp = 95000,
 
 	.initialize = rk_tsadcv2_initialize,
-	.irq_ack = rk_tsadcv1_irq_ack,
-	.control = rk_tsadcv2_control,
+	.irq_ack = rk_tsadcv3_irq_ack,
+	.control = rk_tsadcv3_control,
 	.get_temp = rk_tsadcv2_get_temp,
 	.set_tshut_temp = rk_tsadcv2_tshut_temp,
 	.set_tshut_mode = rk_tsadcv2_tshut_mode,
 
 	.table = {
-		.id = v4_code_table,
-		.length = ARRAY_SIZE(v4_code_table),
+		.id = rk3399_code_table,
+		.length = ARRAY_SIZE(rk3399_code_table),
 		.data_mask = TSADCV3_DATA_MASK,
-		.mode = ADC_DECREMENT,
+		.mode = ADC_INCREMENT,
 	},
 };
 
@@ -693,15 +727,14 @@
 			 thermal->chip->tshut_temp);
 		thermal->tshut_temp = thermal->chip->tshut_temp;
 	} else {
+		if (shut_temp > INT_MAX) {
+			dev_err(dev, "Invalid tshut temperature specified: %d\n",
+				shut_temp);
+			return -ERANGE;
+		}
 		thermal->tshut_temp = shut_temp;
 	}
 
-	if (thermal->tshut_temp > INT_MAX) {
-		dev_err(dev, "Invalid tshut temperature specified: %d\n",
-			thermal->tshut_temp);
-		return -ERANGE;
-	}
-
 	if (of_property_read_u32(np, "rockchip,hw-tshut-mode", &tshut_mode)) {
 		dev_warn(dev,
 			 "Missing tshut mode property, using default (%s)\n",

diff --git a/drivers/thermal/samsung/Kconfig b/drivers/thermal/samsung/Kconfig
index e0da386..222e644 100644
--- a/drivers/thermal/samsung/Kconfig
+++ b/drivers/thermal/samsung/Kconfig

@@ -1,6 +1,7 @@
 config EXYNOS_THERMAL
 	tristate "Exynos thermal management unit driver"
 	depends on THERMAL_OF
+	depends on HAS_IOMEM
 	help
 	  If you say yes here you get support for the TMU (Thermal Management
 	  Unit) driver for SAMSUNG EXYNOS series of SoCs. This driver initialises

diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
index fa61eff..f3ce94e 100644
--- a/drivers/thermal/samsung/exynos_tmu.c
+++ b/drivers/thermal/samsung/exynos_tmu.c

@@ -184,6 +184,7 @@
  * @temp_error2: fused value of the second point trim.
  * @regulator: pointer to the TMU regulator structure.
  * @reg_conf: pointer to structure to register with core thermal.
+ * @ntrip: number of supported trip points.
  * @tmu_initialize: SoC specific TMU initialization method
  * @tmu_control: SoC specific TMU control method
  * @tmu_read: SoC specific TMU temperature read method
@@ -203,6 +204,7 @@
 	u16 temp_error1, temp_error2;
 	struct regulator *regulator;
 	struct thermal_zone_device *tzd;
+	unsigned int ntrip;
 
 	int (*tmu_initialize)(struct platform_device *pdev);
 	void (*tmu_control)(struct platform_device *pdev, bool on);
@@ -346,6 +348,14 @@
 	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
 	int ret;
 
+	if (of_thermal_get_ntrips(data->tzd) > data->ntrip) {
+		dev_info(&pdev->dev,
+			 "More trip points than supported by this TMU.\n");
+		dev_info(&pdev->dev,
+			 "%d trip points should be configured in polling mode.\n",
+			 (of_thermal_get_ntrips(data->tzd) - data->ntrip));
+	}
+
 	mutex_lock(&data->lock);
 	clk_enable(data->clk);
 	if (!IS_ERR(data->clk_sec))
@@ -1210,6 +1220,7 @@
 		data->tmu_control = exynos4210_tmu_control;
 		data->tmu_read = exynos4210_tmu_read;
 		data->tmu_clear_irqs = exynos4210_tmu_clear_irqs;
+		data->ntrip = 4;
 		break;
 	case SOC_ARCH_EXYNOS3250:
 	case SOC_ARCH_EXYNOS4412:
@@ -1222,6 +1233,7 @@
 		data->tmu_read = exynos4412_tmu_read;
 		data->tmu_set_emulation = exynos4412_tmu_set_emulation;
 		data->tmu_clear_irqs = exynos4210_tmu_clear_irqs;
+		data->ntrip = 4;
 		break;
 	case SOC_ARCH_EXYNOS5433:
 		data->tmu_initialize = exynos5433_tmu_initialize;
@@ -1229,6 +1241,7 @@
 		data->tmu_read = exynos4412_tmu_read;
 		data->tmu_set_emulation = exynos4412_tmu_set_emulation;
 		data->tmu_clear_irqs = exynos4210_tmu_clear_irqs;
+		data->ntrip = 8;
 		break;
 	case SOC_ARCH_EXYNOS5440:
 		data->tmu_initialize = exynos5440_tmu_initialize;
@@ -1236,6 +1249,7 @@
 		data->tmu_read = exynos5440_tmu_read;
 		data->tmu_set_emulation = exynos5440_tmu_set_emulation;
 		data->tmu_clear_irqs = exynos5440_tmu_clear_irqs;
+		data->ntrip = 4;
 		break;
 	case SOC_ARCH_EXYNOS7:
 		data->tmu_initialize = exynos7_tmu_initialize;
@@ -1243,6 +1257,7 @@
 		data->tmu_read = exynos7_tmu_read;
 		data->tmu_set_emulation = exynos4412_tmu_set_emulation;
 		data->tmu_clear_irqs = exynos4210_tmu_clear_irqs;
+		data->ntrip = 8;
 		break;
 	default:
 		dev_err(&pdev->dev, "Platform not supported\n");
@@ -1295,7 +1310,7 @@
 	 * TODO: Add regulator as an SOC feature, so that regulator enable
 	 * is a compulsory call.
 	 */
-	data->regulator = devm_regulator_get(&pdev->dev, "vtmu");
+	data->regulator = devm_regulator_get_optional(&pdev->dev, "vtmu");
 	if (!IS_ERR(data->regulator)) {
 		ret = regulator_enable(data->regulator);
 		if (ret) {
@@ -1303,6 +1318,8 @@
 			return ret;
 		}
 	} else {
+		if (PTR_ERR(data->regulator) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
 		dev_info(&pdev->dev, "Regulator node (vtmu) not found\n");
 	}
 

diff --git a/drivers/thermal/tegra_soctherm.c b/drivers/thermal/tegra_soctherm.c
index 74ea576..1369752 100644
--- a/drivers/thermal/tegra_soctherm.c
+++ b/drivers/thermal/tegra_soctherm.c

@@ -57,7 +57,7 @@
 #define READBACK_VALUE_MASK			0xff00
 #define READBACK_VALUE_SHIFT			8
 #define READBACK_ADD_HALF			BIT(7)
-#define READBACK_NEGATE				BIT(1)
+#define READBACK_NEGATE				BIT(0)
 
 #define FUSE_TSENSOR8_CALIB			0x180
 #define FUSE_SPARE_REALIGNMENT_REG_0		0x1fc

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index a0a8fd1..d4b5465 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c

@@ -454,6 +454,10 @@
 {
 	enum thermal_trip_type type;
 
+	/* Ignore disabled trip points */
+	if (test_bit(trip, &tz->trips_disabled))
+		return;
+
 	tz->ops->get_trip_type(tz, trip, &type);
 
 	if (type == THERMAL_TRIP_CRITICAL || type == THERMAL_TRIP_HOT)
@@ -1800,6 +1804,7 @@
 {
 	struct thermal_zone_device *tz;
 	enum thermal_trip_type trip_type;
+	int trip_temp;
 	int result;
 	int count;
 	int passive = 0;
@@ -1871,9 +1876,15 @@
 		goto unregister;
 
 	for (count = 0; count < trips; count++) {
-		tz->ops->get_trip_type(tz, count, &trip_type);
+		if (tz->ops->get_trip_type(tz, count, &trip_type))
+			set_bit(count, &tz->trips_disabled);
 		if (trip_type == THERMAL_TRIP_PASSIVE)
 			passive = 1;
+		if (tz->ops->get_trip_temp(tz, count, &trip_temp))
+			set_bit(count, &tz->trips_disabled);
+		/* Check for bogus trip points */
+		if (trip_temp == 0)
+			set_bit(count, &tz->trips_disabled);
 	}
 
 	if (!passive) {

diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.c b/drivers/thermal/ti-soc-thermal/ti-bandgap.c
index 1e34a1e..06ea976 100644
--- a/drivers/thermal/ti-soc-thermal/ti-bandgap.c
+++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.c

@@ -1265,7 +1265,7 @@
 int ti_bandgap_probe(struct platform_device *pdev)
 {
 	struct ti_bandgap *bgp;
-	int clk_rate, ret = 0, i;
+	int clk_rate, ret, i;
 
 	bgp = ti_bandgap_build(pdev);
 	if (IS_ERR(bgp)) {
@@ -1288,16 +1288,14 @@
 	}
 
 	bgp->fclock = clk_get(NULL, bgp->conf->fclock_name);
-	ret = IS_ERR(bgp->fclock);
-	if (ret) {
+	if (IS_ERR(bgp->fclock)) {
 		dev_err(&pdev->dev, "failed to request fclock reference\n");
 		ret = PTR_ERR(bgp->fclock);
 		goto free_irqs;
 	}
 
 	bgp->div_clk = clk_get(NULL, bgp->conf->div_ck_name);
-	ret = IS_ERR(bgp->div_clk);
-	if (ret) {
+	if (IS_ERR(bgp->div_clk)) {
 		dev_err(&pdev->dev, "failed to request div_ts_ck clock ref\n");
 		ret = PTR_ERR(bgp->div_clk);
 		goto free_irqs;
@@ -1314,7 +1312,7 @@
 		 * may not be accurate
 		 */
 		val = ti_bandgap_readl(bgp, tsr->bgap_efuse);
-		if (ret || !val)
+		if (!val)
 			dev_info(&pdev->dev,
 				 "Non-trimmed BGAP, Temp not accurate\n");
 	}

diff --git a/drivers/usb/dwc3/dwc3-of-simple.c b/drivers/usb/dwc3/dwc3-of-simple.c
index 9c9f741..9743353 100644
--- a/drivers/usb/dwc3/dwc3-of-simple.c
+++ b/drivers/usb/dwc3/dwc3-of-simple.c

@@ -42,6 +42,7 @@
 	struct device		*dev = &pdev->dev;
 	struct device_node	*np = dev->of_node;
 
+	unsigned int		count;
 	int			ret;
 	int			i;
 
@@ -49,11 +50,11 @@
 	if (!simple)
 		return -ENOMEM;
 
-	ret = of_clk_get_parent_count(np);
-	if (ret < 0)
-		return ret;
+	count = of_clk_get_parent_count(np);
+	if (!count)
+		return -ENOENT;
 
-	simple->num_clocks = ret;
+	simple->num_clocks = count;
 
 	simple->clks = devm_kcalloc(dev, simple->num_clocks,
 			sizeof(struct clk *), GFP_KERNEL);

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 8cfce10..e21ca2bd 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c

@@ -1147,8 +1147,8 @@
 	ffs->sb              = sb;
 	data->ffs_data       = NULL;
 	sb->s_fs_info        = ffs;
-	sb->s_blocksize      = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize      = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic          = FUNCTIONFS_MAGIC;
 	sb->s_op             = &ffs_sb_operations;
 	sb->s_time_gran      = 1;

diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 5cdaf01..e64479f 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c

@@ -1954,8 +1954,8 @@
 		return -ENODEV;
 
 	/* superblock */
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = GADGETFS_MAGIC;
 	sb->s_op = &gadget_fs_operations;
 	sb->s_time_gran = 1;

diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index dba5136..9090186 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c

@@ -123,7 +123,7 @@
 		unsigned int max_sectors = 64;
 
 		if (us->fflags & US_FL_MAX_SECTORS_MIN)
-			max_sectors = PAGE_CACHE_SIZE >> 9;
+			max_sectors = PAGE_SIZE >> 9;
 		if (queue_max_hw_sectors(sdev->request_queue) > max_sectors)
 			blk_queue_max_hw_sectors(sdev->request_queue,
 					      max_sectors);

diff --git a/drivers/video/fbdev/pvr2fb.c b/drivers/video/fbdev/pvr2fb.c
index 71a923e..3b1ca44 100644
--- a/drivers/video/fbdev/pvr2fb.c
+++ b/drivers/video/fbdev/pvr2fb.c

@@ -735,7 +735,7 @@
 
 out_unmap:
 	for (i = 0; i < nr_pages; i++)
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 
 	kfree(pages);
 

diff --git a/drivers/video/fbdev/simplefb.c b/drivers/video/fbdev/simplefb.c
index 48ccf6d..e9cf199 100644
--- a/drivers/video/fbdev/simplefb.c
+++ b/drivers/video/fbdev/simplefb.c

@@ -174,7 +174,7 @@
 struct simplefb_par {
 	u32 palette[PSEUDO_PALETTE_SIZE];
 #if defined CONFIG_OF && defined CONFIG_COMMON_CLK
-	int clk_count;
+	unsigned int clk_count;
 	struct clk **clks;
 #endif
 #if defined CONFIG_OF && defined CONFIG_REGULATOR
@@ -213,7 +213,7 @@
 		return 0;
 
 	par->clk_count = of_clk_get_parent_count(np);
-	if (par->clk_count <= 0)
+	if (!par->clk_count)
 		return 0;
 
 	par->clks = kcalloc(par->clk_count, sizeof(struct clk *), GFP_KERNEL);

diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index f6f28cc..e76bd91 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c

@@ -17,6 +17,7 @@
  *
  */
 
+#include <linux/delay.h>
 #define VIRTIO_PCI_NO_LEGACY
 #include "virtio_pci_common.h"
 
@@ -271,9 +272,13 @@
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	/* 0 status means a reset. */
 	vp_iowrite8(0, &vp_dev->common->device_status);
-	/* Flush out the status write, and flush in device writes,
-	 * including MSI-X interrupts, if any. */
-	vp_ioread8(&vp_dev->common->device_status);
+	/* After writing 0 to device_status, the driver MUST wait for a read of
+	 * device_status to return 0 before reinitializing the device.
+	 * This will flush out the status write, and flush in device writes,
+	 * including MSI-X interrupts, if any.
+	 */
+	while (vp_ioread8(&vp_dev->common->device_status))
+		msleep(1);
 	/* Flush pending VQ/configuration callbacks. */
 	vp_synchronize_vectors(vdev);
 }

diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 8fc284c..8f89bd8 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c

@@ -484,7 +484,7 @@
 	static int die_nmi_called;
 
 	if (!hpwdt_nmi_decoding)
-		goto out;
+		return NMI_DONE;
 
 	spin_lock_irqsave(&rom_lock, rom_pl);
 	if (!die_nmi_called && !is_icru && !is_uefi)
@@ -497,11 +497,11 @@
 
 	if (!is_icru && !is_uefi) {
 		if (cmn_regs.u1.ral == 0) {
-			panic("An NMI occurred, "
-				"but unable to determine source.\n");
+			nmi_panic(regs, "An NMI occurred, but unable to determine source.\n");
+			return NMI_HANDLED;
 		}
 	}
-	panic("An NMI occurred. Depending on your system the reason "
+	nmi_panic(regs, "An NMI occurred. Depending on your system the reason "
 		"for the NMI is logged in any one of the following "
 		"resources:\n"
 		"1. Integrated Management Log (IML)\n"
@@ -509,8 +509,7 @@
 		"3. OA Forward Progress Log\n"
 		"4. iLO Event Log");
 
-out:
-	return NMI_DONE;
+	return NMI_HANDLED;
 }
 #endif /* CONFIG_HPWDT_NMI_DECODING */
 

diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 488017a..cb7138c 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c

@@ -484,9 +484,19 @@
 	struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
 	int rc = 0;
 
-	irq_move_irq(data);
+	if (!VALID_EVTCHN(evtchn))
+		return;
 
-	if (VALID_EVTCHN(evtchn))
+	if (unlikely(irqd_is_setaffinity_pending(data))) {
+		int masked = test_and_set_mask(evtchn);
+
+		clear_evtchn(evtchn);
+
+		irq_move_masked_irq(data);
+
+		if (!masked)
+			unmask_evtchn(evtchn);
+	} else
 		clear_evtchn(evtchn);
 
 	if (pirq_needs_eoi(data->irq)) {
@@ -1357,9 +1367,19 @@
 {
 	int evtchn = evtchn_from_irq(data->irq);
 
-	irq_move_irq(data);
+	if (!VALID_EVTCHN(evtchn))
+		return;
 
-	if (VALID_EVTCHN(evtchn))
+	if (unlikely(irqd_is_setaffinity_pending(data))) {
+		int masked = test_and_set_mask(evtchn);
+
+		clear_evtchn(evtchn);
+
+		irq_move_masked_irq(data);
+
+		if (!masked)
+			unmask_evtchn(evtchn);
+	} else
 		clear_evtchn(evtchn);
 }
 

diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index e9e0437..ac9225e 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c

@@ -153,7 +153,7 @@
 	 * If called with zero offset, we should release
 	 * the private state assocated with the page
 	 */
-	if (offset == 0 && length == PAGE_CACHE_SIZE)
+	if (offset == 0 && length == PAGE_SIZE)
 		v9fs_fscache_invalidate_page(page);
 }
 
@@ -166,10 +166,10 @@
 	struct bio_vec bvec;
 	int err, len;
 
-	if (page->index == size >> PAGE_CACHE_SHIFT)
-		len = size & ~PAGE_CACHE_MASK;
+	if (page->index == size >> PAGE_SHIFT)
+		len = size & ~PAGE_MASK;
 	else
-		len = PAGE_CACHE_SIZE;
+		len = PAGE_SIZE;
 
 	bvec.bv_page = page;
 	bvec.bv_offset = 0;
@@ -271,7 +271,7 @@
 	int retval = 0;
 	struct page *page;
 	struct v9fs_inode *v9inode;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 	struct inode *inode = mapping->host;
 
 
@@ -288,11 +288,11 @@
 	if (PageUptodate(page))
 		goto out;
 
-	if (len == PAGE_CACHE_SIZE)
+	if (len == PAGE_SIZE)
 		goto out;
 
 	retval = v9fs_fid_readpage(v9inode->writeback_fid, page);
-	page_cache_release(page);
+	put_page(page);
 	if (!retval)
 		goto start;
 out:
@@ -313,7 +313,7 @@
 		/*
 		 * zero out the rest of the area
 		 */
-		unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+		unsigned from = pos & (PAGE_SIZE - 1);
 
 		zero_user(page, from + copied, len - copied);
 		flush_dcache_page(page);
@@ -331,7 +331,7 @@
 	}
 	set_page_dirty(page);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return copied;
 }

diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index eadc894..b84c291 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c

@@ -421,8 +421,8 @@
 		struct inode *inode = file_inode(file);
 		loff_t i_size;
 		unsigned long pg_start, pg_end;
-		pg_start = origin >> PAGE_CACHE_SHIFT;
-		pg_end = (origin + retval - 1) >> PAGE_CACHE_SHIFT;
+		pg_start = origin >> PAGE_SHIFT;
+		pg_end = (origin + retval - 1) >> PAGE_SHIFT;
 		if (inode->i_mapping && inode->i_mapping->nrpages)
 			invalidate_inode_pages2_range(inode->i_mapping,
 						      pg_start, pg_end);

diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index bf495ce..de3ed86 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c

@@ -87,7 +87,7 @@
 		sb->s_op = &v9fs_super_ops;
 	sb->s_bdi = &v9ses->bdi;
 	if (v9ses->cache)
-		sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_CACHE_SIZE;
+		sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_SIZE;
 
 	sb->s_flags |= MS_ACTIVE | MS_DIRSYNC | MS_NOATIME;
 	if (!v9ses->cache)

diff --git a/fs/Kconfig b/fs/Kconfig
index 9d75767..6725f59 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig

@@ -209,6 +209,7 @@
 
 if MISC_FILESYSTEMS
 
+source "fs/orangefs/Kconfig"
 source "fs/adfs/Kconfig"
 source "fs/affs/Kconfig"
 source "fs/ecryptfs/Kconfig"

diff --git a/fs/Makefile b/fs/Makefile
index 252c968..85b6e13 100644
--- a/fs/Makefile
+++ b/fs/Makefile

@@ -106,6 +106,7 @@
 obj-$(CONFIG_ADFS_FS)		+= adfs/
 obj-$(CONFIG_FUSE_FS)		+= fuse/
 obj-$(CONFIG_OVERLAY_FS)	+= overlayfs/
+obj-$(CONFIG_ORANGEFS_FS)       += orangefs/
 obj-$(CONFIG_UDF_FS)		+= udf/
 obj-$(CONFIG_SUN_OPENPROMFS)	+= openpromfs/
 obj-$(CONFIG_OMFS_FS)		+= omfs/

diff --git a/fs/affs/file.c b/fs/affs/file.c
index 22fc7c8..0cde550 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c

@@ -510,9 +510,9 @@
 
 	pr_debug("%s(%lu, %ld, 0, %d)\n", __func__, inode->i_ino,
 		 page->index, to);
-	BUG_ON(to > PAGE_CACHE_SIZE);
+	BUG_ON(to > PAGE_SIZE);
 	bsize = AFFS_SB(sb)->s_data_blksize;
-	tmp = page->index << PAGE_CACHE_SHIFT;
+	tmp = page->index << PAGE_SHIFT;
 	bidx = tmp / bsize;
 	boff = tmp % bsize;
 
@@ -613,10 +613,10 @@
 	int err;
 
 	pr_debug("%s(%lu, %ld)\n", __func__, inode->i_ino, page->index);
-	to = PAGE_CACHE_SIZE;
-	if (((page->index + 1) << PAGE_CACHE_SHIFT) > inode->i_size) {
-		to = inode->i_size & ~PAGE_CACHE_MASK;
-		memset(page_address(page) + to, 0, PAGE_CACHE_SIZE - to);
+	to = PAGE_SIZE;
+	if (((page->index + 1) << PAGE_SHIFT) > inode->i_size) {
+		to = inode->i_size & ~PAGE_MASK;
+		memset(page_address(page) + to, 0, PAGE_SIZE - to);
 	}
 
 	err = affs_do_readpage_ofs(page, to);
@@ -646,7 +646,7 @@
 			return err;
 	}
 
-	index = pos >> PAGE_CACHE_SHIFT;
+	index = pos >> PAGE_SHIFT;
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
@@ -656,10 +656,10 @@
 		return 0;
 
 	/* XXX: inefficient but safe in the face of short writes */
-	err = affs_do_readpage_ofs(page, PAGE_CACHE_SIZE);
+	err = affs_do_readpage_ofs(page, PAGE_SIZE);
 	if (err) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	return err;
 }
@@ -677,7 +677,7 @@
 	u32 tmp;
 	int written;
 
-	from = pos & (PAGE_CACHE_SIZE - 1);
+	from = pos & (PAGE_SIZE - 1);
 	to = pos + len;
 	/*
 	 * XXX: not sure if this can handle short copies (len < copied), but
@@ -692,7 +692,7 @@
 
 	bh = NULL;
 	written = 0;
-	tmp = (page->index << PAGE_CACHE_SHIFT) + from;
+	tmp = (page->index << PAGE_SHIFT) + from;
 	bidx = tmp / bsize;
 	boff = tmp % bsize;
 	if (boff) {
@@ -788,13 +788,13 @@
 
 done:
 	affs_brelse(bh);
-	tmp = (page->index << PAGE_CACHE_SHIFT) + from;
+	tmp = (page->index << PAGE_SHIFT) + from;
 	if (tmp > inode->i_size)
 		inode->i_size = AFFS_I(inode)->mmu_private = tmp;
 
 err_first_bh:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return written;
 

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index e10e1778..5fda2bc 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c

@@ -181,7 +181,7 @@
 static inline void afs_dir_put_page(struct page *page)
 {
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 /*

diff --git a/fs/afs/file.c b/fs/afs/file.c
index 999bc3c..6344aee 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c

@@ -164,7 +164,7 @@
 		_debug("cache said ENOBUFS");
 	default:
 	go_on:
-		offset = page->index << PAGE_CACHE_SHIFT;
+		offset = page->index << PAGE_SHIFT;
 		len = min_t(size_t, i_size_read(inode) - offset, PAGE_SIZE);
 
 		/* read the contents of the file from the server into the
@@ -319,7 +319,7 @@
 	BUG_ON(!PageLocked(page));
 
 	/* we clean up only if the entire page is being invalidated */
-	if (offset == 0 && length == PAGE_CACHE_SIZE) {
+	if (offset == 0 && length == PAGE_SIZE) {
 #ifdef CONFIG_AFS_FSCACHE
 		if (PageFsCache(page)) {
 			struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);

diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index ccd0b21..81dd075 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c

@@ -93,7 +93,7 @@
 
 	kunmap(page);
 out_free:
-	page_cache_release(page);
+	put_page(page);
 out:
 	_leave(" = %d", ret);
 	return ret;
@@ -189,7 +189,7 @@
 		buf = kmap_atomic(page);
 		memcpy(devname, buf, size);
 		kunmap_atomic(buf);
-		page_cache_release(page);
+		put_page(page);
 		page = NULL;
 	}
 
@@ -211,7 +211,7 @@
 	return mnt;
 
 error:
-	page_cache_release(page);
+	put_page(page);
 error_no_page:
 	free_page((unsigned long) options);
 error_no_options:

diff --git a/fs/afs/super.c b/fs/afs/super.c
index 81afefe..fbdb022 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c

@@ -315,8 +315,8 @@
 	_enter("");
 
 	/* fill in the superblock */
-	sb->s_blocksize		= PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits	= PAGE_CACHE_SHIFT;
+	sb->s_blocksize		= PAGE_SIZE;
+	sb->s_blocksize_bits	= PAGE_SHIFT;
 	sb->s_magic		= AFS_FS_MAGIC;
 	sb->s_op		= &afs_super_ops;
 	sb->s_bdi		= &as->volume->bdi;

diff --git a/fs/afs/write.c b/fs/afs/write.c
index dfef94f..65de439 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c

@@ -93,10 +93,10 @@
 	_enter(",,%llu", (unsigned long long)pos);
 
 	i_size = i_size_read(&vnode->vfs_inode);
-	if (pos + PAGE_CACHE_SIZE > i_size)
+	if (pos + PAGE_SIZE > i_size)
 		len = i_size - pos;
 	else
-		len = PAGE_CACHE_SIZE;
+		len = PAGE_SIZE;
 
 	ret = afs_vnode_fetch_data(vnode, key, pos, len, page);
 	if (ret < 0) {
@@ -123,9 +123,9 @@
 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
 	struct page *page;
 	struct key *key = file->private_data;
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned from = pos & (PAGE_SIZE - 1);
 	unsigned to = from + len;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 	int ret;
 
 	_enter("{%x:%u},{%lx},%u,%u",
@@ -151,8 +151,8 @@
 	*pagep = page;
 	/* page won't leak in error case: it eventually gets cleaned off LRU */
 
-	if (!PageUptodate(page) && len != PAGE_CACHE_SIZE) {
-		ret = afs_fill_page(vnode, key, index << PAGE_CACHE_SHIFT, page);
+	if (!PageUptodate(page) && len != PAGE_SIZE) {
+		ret = afs_fill_page(vnode, key, index << PAGE_SHIFT, page);
 		if (ret < 0) {
 			kfree(candidate);
 			_leave(" = %d [prep]", ret);
@@ -266,7 +266,7 @@
 	if (PageDirty(page))
 		_debug("dirtied");
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return copied;
 }
@@ -480,7 +480,7 @@
 
 		if (page->index > end) {
 			*_next = index;
-			page_cache_release(page);
+			put_page(page);
 			_leave(" = 0 [%lx]", *_next);
 			return 0;
 		}
@@ -494,7 +494,7 @@
 
 		if (page->mapping != mapping) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			continue;
 		}
 
@@ -515,7 +515,7 @@
 
 		ret = afs_write_back_from_locked_page(wb, page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		if (ret < 0) {
 			_leave(" = %d", ret);
 			return ret;
@@ -551,13 +551,13 @@
 						    &next);
 		mapping->writeback_index = next;
 	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
-		end = (pgoff_t)(LLONG_MAX >> PAGE_CACHE_SHIFT);
+		end = (pgoff_t)(LLONG_MAX >> PAGE_SHIFT);
 		ret = afs_writepages_region(mapping, wbc, 0, end, &next);
 		if (wbc->nr_to_write > 0)
 			mapping->writeback_index = next;
 	} else {
-		start = wbc->range_start >> PAGE_CACHE_SHIFT;
-		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		start = wbc->range_start >> PAGE_SHIFT;
+		end = wbc->range_end >> PAGE_SHIFT;
 		ret = afs_writepages_region(mapping, wbc, start, end, &next);
 	}
 

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7d914c6..81381cc 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c

@@ -2292,7 +2292,7 @@
 				void *kaddr = kmap(page);
 				stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
 				kunmap(page);
-				page_cache_release(page);
+				put_page(page);
 			} else
 				stop = !dump_skip(cprm, PAGE_SIZE);
 			if (stop)

diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index b1adb92..083ea2bc 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c

@@ -1533,7 +1533,7 @@
 				void *kaddr = kmap(page);
 				res = dump_emit(cprm, kaddr, PAGE_SIZE);
 				kunmap(page);
-				page_cache_release(page);
+				put_page(page);
 			} else {
 				res = dump_skip(cprm, PAGE_SIZE);
 			}

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3172c4e..20a2c02 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c

@@ -331,7 +331,7 @@
 	ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
 
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return ret;
 }
@@ -1149,7 +1149,7 @@
 	inode_lock(bdev->bd_inode);
 	i_size_write(bdev->bd_inode, size);
 	inode_unlock(bdev->bd_inode);
-	while (bsize < PAGE_CACHE_SIZE) {
+	while (bsize < PAGE_SIZE) {
 		if (size & bsize)
 			break;
 		bsize <<= 1;

diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index e34a71b..516e19d 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c

@@ -757,7 +757,7 @@
 			BUG_ON(NULL == l);
 
 			ret = btrfsic_read_block(state, &tmp_next_block_ctx);
-			if (ret < (int)PAGE_CACHE_SIZE) {
+			if (ret < (int)PAGE_SIZE) {
 				printk(KERN_INFO
 				       "btrfsic: read @logical %llu failed!\n",
 				       tmp_next_block_ctx.start);
@@ -1231,15 +1231,15 @@
 	size_t offset_in_page;
 	char *kaddr;
 	char *dst = (char *)dstv;
-	size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1);
-	unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
+	size_t start_offset = block_ctx->start & ((u64)PAGE_SIZE - 1);
+	unsigned long i = (start_offset + offset) >> PAGE_SHIFT;
 
 	WARN_ON(offset + len > block_ctx->len);
-	offset_in_page = (start_offset + offset) & (PAGE_CACHE_SIZE - 1);
+	offset_in_page = (start_offset + offset) & (PAGE_SIZE - 1);
 
 	while (len > 0) {
-		cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
-		BUG_ON(i >= DIV_ROUND_UP(block_ctx->len, PAGE_CACHE_SIZE));
+		cur = min(len, ((size_t)PAGE_SIZE - offset_in_page));
+		BUG_ON(i >= DIV_ROUND_UP(block_ctx->len, PAGE_SIZE));
 		kaddr = block_ctx->datav[i];
 		memcpy(dst, kaddr + offset_in_page, cur);
 
@@ -1605,8 +1605,8 @@
 
 		BUG_ON(!block_ctx->datav);
 		BUG_ON(!block_ctx->pagev);
-		num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
-			    PAGE_CACHE_SHIFT;
+		num_pages = (block_ctx->len + (u64)PAGE_SIZE - 1) >>
+			    PAGE_SHIFT;
 		while (num_pages > 0) {
 			num_pages--;
 			if (block_ctx->datav[num_pages]) {
@@ -1637,15 +1637,15 @@
 	BUG_ON(block_ctx->datav);
 	BUG_ON(block_ctx->pagev);
 	BUG_ON(block_ctx->mem_to_free);
-	if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
+	if (block_ctx->dev_bytenr & ((u64)PAGE_SIZE - 1)) {
 		printk(KERN_INFO
 		       "btrfsic: read_block() with unaligned bytenr %llu\n",
 		       block_ctx->dev_bytenr);
 		return -1;
 	}
 
-	num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
-		    PAGE_CACHE_SHIFT;
+	num_pages = (block_ctx->len + (u64)PAGE_SIZE - 1) >>
+		    PAGE_SHIFT;
 	block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) +
 					  sizeof(*block_ctx->pagev)) *
 					 num_pages, GFP_NOFS);
@@ -1676,8 +1676,8 @@
 
 		for (j = i; j < num_pages; j++) {
 			ret = bio_add_page(bio, block_ctx->pagev[j],
-					   PAGE_CACHE_SIZE, 0);
-			if (PAGE_CACHE_SIZE != ret)
+					   PAGE_SIZE, 0);
+			if (PAGE_SIZE != ret)
 				break;
 		}
 		if (j == i) {
@@ -1693,7 +1693,7 @@
 			return -1;
 		}
 		bio_put(bio);
-		dev_bytenr += (j - i) * PAGE_CACHE_SIZE;
+		dev_bytenr += (j - i) * PAGE_SIZE;
 		i = j;
 	}
 	for (i = 0; i < num_pages; i++) {
@@ -1769,9 +1769,9 @@
 	u32 crc = ~(u32)0;
 	unsigned int i;
 
-	if (num_pages * PAGE_CACHE_SIZE < state->metablock_size)
+	if (num_pages * PAGE_SIZE < state->metablock_size)
 		return 1; /* not metadata */
-	num_pages = state->metablock_size >> PAGE_CACHE_SHIFT;
+	num_pages = state->metablock_size >> PAGE_SHIFT;
 	h = (struct btrfs_header *)datav[0];
 
 	if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
@@ -1779,8 +1779,8 @@
 
 	for (i = 0; i < num_pages; i++) {
 		u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
-		size_t sublen = i ? PAGE_CACHE_SIZE :
-				    (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
+		size_t sublen = i ? PAGE_SIZE :
+				    (PAGE_SIZE - BTRFS_CSUM_SIZE);
 
 		crc = btrfs_crc32c(crc, data, sublen);
 	}
@@ -1826,14 +1826,14 @@
 		if (block->is_superblock) {
 			bytenr = btrfs_super_bytenr((struct btrfs_super_block *)
 						    mapped_datav[0]);
-			if (num_pages * PAGE_CACHE_SIZE <
+			if (num_pages * PAGE_SIZE <
 			    BTRFS_SUPER_INFO_SIZE) {
 				printk(KERN_INFO
 				       "btrfsic: cannot work with too short bios!\n");
 				return;
 			}
 			is_metadata = 1;
-			BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1));
+			BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_SIZE - 1));
 			processed_len = BTRFS_SUPER_INFO_SIZE;
 			if (state->print_mask &
 			    BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
@@ -1844,7 +1844,7 @@
 		}
 		if (is_metadata) {
 			if (!block->is_superblock) {
-				if (num_pages * PAGE_CACHE_SIZE <
+				if (num_pages * PAGE_SIZE <
 				    state->metablock_size) {
 					printk(KERN_INFO
 					       "btrfsic: cannot work with too short bios!\n");
@@ -1880,7 +1880,7 @@
 			}
 			block->logical_bytenr = bytenr;
 		} else {
-			if (num_pages * PAGE_CACHE_SIZE <
+			if (num_pages * PAGE_SIZE <
 			    state->datablock_size) {
 				printk(KERN_INFO
 				       "btrfsic: cannot work with too short bios!\n");
@@ -2013,7 +2013,7 @@
 			block->logical_bytenr = bytenr;
 			block->is_metadata = 1;
 			if (block->is_superblock) {
-				BUG_ON(PAGE_CACHE_SIZE !=
+				BUG_ON(PAGE_SIZE !=
 				       BTRFS_SUPER_INFO_SIZE);
 				ret = btrfsic_process_written_superblock(
 						state,
@@ -2172,8 +2172,8 @@
 continue_loop:
 	BUG_ON(!processed_len);
 	dev_bytenr += processed_len;
-	mapped_datav += processed_len >> PAGE_CACHE_SHIFT;
-	num_pages -= processed_len >> PAGE_CACHE_SHIFT;
+	mapped_datav += processed_len >> PAGE_SHIFT;
+	num_pages -= processed_len >> PAGE_SHIFT;
 	goto again;
 }
 
@@ -2954,7 +2954,7 @@
 			goto leave;
 		cur_bytenr = dev_bytenr;
 		for (i = 0; i < bio->bi_vcnt; i++) {
-			BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
+			BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_SIZE);
 			mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
 			if (!mapped_datav[i]) {
 				while (i > 0) {
@@ -3037,16 +3037,16 @@
 	struct list_head *dev_head = &fs_devices->devices;
 	struct btrfs_device *device;
 
-	if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
+	if (root->nodesize & ((u64)PAGE_SIZE - 1)) {
 		printk(KERN_INFO
-		       "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
-		       root->nodesize, PAGE_CACHE_SIZE);
+		       "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_SIZE %ld!\n",
+		       root->nodesize, PAGE_SIZE);
 		return -1;
 	}
-	if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
+	if (root->sectorsize & ((u64)PAGE_SIZE - 1)) {
 		printk(KERN_INFO
-		       "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
-		       root->sectorsize, PAGE_CACHE_SIZE);
+		       "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_SIZE %ld!\n",
+		       root->sectorsize, PAGE_SIZE);
 		return -1;
 	}
 	state = kzalloc(sizeof(*state), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 3346cd8..ff61a41 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c

@@ -119,7 +119,7 @@
 		csum = ~(u32)0;
 
 		kaddr = kmap_atomic(page);
-		csum = btrfs_csum_data(kaddr, csum, PAGE_CACHE_SIZE);
+		csum = btrfs_csum_data(kaddr, csum, PAGE_SIZE);
 		btrfs_csum_final(csum, (char *)&csum);
 		kunmap_atomic(kaddr);
 
@@ -190,7 +190,7 @@
 	for (index = 0; index < cb->nr_pages; index++) {
 		page = cb->compressed_pages[index];
 		page->mapping = NULL;
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	/* do io completion on the original bio */
@@ -224,8 +224,8 @@
 static noinline void end_compressed_writeback(struct inode *inode,
 					      const struct compressed_bio *cb)
 {
-	unsigned long index = cb->start >> PAGE_CACHE_SHIFT;
-	unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_CACHE_SHIFT;
+	unsigned long index = cb->start >> PAGE_SHIFT;
+	unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
 	struct page *pages[16];
 	unsigned long nr_pages = end_index - index + 1;
 	int i;
@@ -247,7 +247,7 @@
 			if (cb->errors)
 				SetPageError(pages[i]);
 			end_page_writeback(pages[i]);
-			page_cache_release(pages[i]);
+			put_page(pages[i]);
 		}
 		nr_pages -= ret;
 		index += ret;
@@ -304,7 +304,7 @@
 	for (index = 0; index < cb->nr_pages; index++) {
 		page = cb->compressed_pages[index];
 		page->mapping = NULL;
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	/* finally free the cb struct */
@@ -341,7 +341,7 @@
 	int ret;
 	int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
-	WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
+	WARN_ON(start & ((u64)PAGE_SIZE - 1));
 	cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
 	if (!cb)
 		return -ENOMEM;
@@ -374,14 +374,14 @@
 		page->mapping = inode->i_mapping;
 		if (bio->bi_iter.bi_size)
 			ret = io_tree->ops->merge_bio_hook(WRITE, page, 0,
-							   PAGE_CACHE_SIZE,
+							   PAGE_SIZE,
 							   bio, 0);
 		else
 			ret = 0;
 
 		page->mapping = NULL;
-		if (ret || bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) <
-		    PAGE_CACHE_SIZE) {
+		if (ret || bio_add_page(bio, page, PAGE_SIZE, 0) <
+		    PAGE_SIZE) {
 			bio_get(bio);
 
 			/*
@@ -410,15 +410,15 @@
 			BUG_ON(!bio);
 			bio->bi_private = cb;
 			bio->bi_end_io = end_compressed_bio_write;
-			bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
+			bio_add_page(bio, page, PAGE_SIZE, 0);
 		}
-		if (bytes_left < PAGE_CACHE_SIZE) {
+		if (bytes_left < PAGE_SIZE) {
 			btrfs_info(BTRFS_I(inode)->root->fs_info,
 					"bytes left %lu compress len %lu nr %lu",
 			       bytes_left, cb->compressed_len, cb->nr_pages);
 		}
-		bytes_left -= PAGE_CACHE_SIZE;
-		first_byte += PAGE_CACHE_SIZE;
+		bytes_left -= PAGE_SIZE;
+		first_byte += PAGE_SIZE;
 		cond_resched();
 	}
 	bio_get(bio);
@@ -457,17 +457,17 @@
 	int misses = 0;
 
 	page = cb->orig_bio->bi_io_vec[cb->orig_bio->bi_vcnt - 1].bv_page;
-	last_offset = (page_offset(page) + PAGE_CACHE_SIZE);
+	last_offset = (page_offset(page) + PAGE_SIZE);
 	em_tree = &BTRFS_I(inode)->extent_tree;
 	tree = &BTRFS_I(inode)->io_tree;
 
 	if (isize == 0)
 		return 0;
 
-	end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+	end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
 
 	while (last_offset < compressed_end) {
-		pg_index = last_offset >> PAGE_CACHE_SHIFT;
+		pg_index = last_offset >> PAGE_SHIFT;
 
 		if (pg_index > end_index)
 			break;
@@ -488,11 +488,11 @@
 			break;
 
 		if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) {
-			page_cache_release(page);
+			put_page(page);
 			goto next;
 		}
 
-		end = last_offset + PAGE_CACHE_SIZE - 1;
+		end = last_offset + PAGE_SIZE - 1;
 		/*
 		 * at this point, we have a locked page in the page cache
 		 * for these bytes in the file.  But, we have to make
@@ -502,27 +502,27 @@
 		lock_extent(tree, last_offset, end);
 		read_lock(&em_tree->lock);
 		em = lookup_extent_mapping(em_tree, last_offset,
-					   PAGE_CACHE_SIZE);
+					   PAGE_SIZE);
 		read_unlock(&em_tree->lock);
 
 		if (!em || last_offset < em->start ||
-		    (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
+		    (last_offset + PAGE_SIZE > extent_map_end(em)) ||
 		    (em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) {
 			free_extent_map(em);
 			unlock_extent(tree, last_offset, end);
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			break;
 		}
 		free_extent_map(em);
 
 		if (page->index == end_index) {
 			char *userpage;
-			size_t zero_offset = isize & (PAGE_CACHE_SIZE - 1);
+			size_t zero_offset = isize & (PAGE_SIZE - 1);
 
 			if (zero_offset) {
 				int zeros;
-				zeros = PAGE_CACHE_SIZE - zero_offset;
+				zeros = PAGE_SIZE - zero_offset;
 				userpage = kmap_atomic(page);
 				memset(userpage + zero_offset, 0, zeros);
 				flush_dcache_page(page);
@@ -531,19 +531,19 @@
 		}
 
 		ret = bio_add_page(cb->orig_bio, page,
-				   PAGE_CACHE_SIZE, 0);
+				   PAGE_SIZE, 0);
 
-		if (ret == PAGE_CACHE_SIZE) {
+		if (ret == PAGE_SIZE) {
 			nr_pages++;
-			page_cache_release(page);
+			put_page(page);
 		} else {
 			unlock_extent(tree, last_offset, end);
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			break;
 		}
 next:
-		last_offset += PAGE_CACHE_SIZE;
+		last_offset += PAGE_SIZE;
 	}
 	return 0;
 }
@@ -567,7 +567,7 @@
 	struct extent_map_tree *em_tree;
 	struct compressed_bio *cb;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
-	unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
+	unsigned long uncompressed_len = bio->bi_vcnt * PAGE_SIZE;
 	unsigned long compressed_len;
 	unsigned long nr_pages;
 	unsigned long pg_index;
@@ -589,7 +589,7 @@
 	read_lock(&em_tree->lock);
 	em = lookup_extent_mapping(em_tree,
 				   page_offset(bio->bi_io_vec->bv_page),
-				   PAGE_CACHE_SIZE);
+				   PAGE_SIZE);
 	read_unlock(&em_tree->lock);
 	if (!em)
 		return -EIO;
@@ -617,7 +617,7 @@
 	cb->compress_type = extent_compress_type(bio_flags);
 	cb->orig_bio = bio;
 
-	nr_pages = DIV_ROUND_UP(compressed_len, PAGE_CACHE_SIZE);
+	nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
 	cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *),
 				       GFP_NOFS);
 	if (!cb->compressed_pages)
@@ -640,7 +640,7 @@
 	add_ra_bio_pages(inode, em_start + em_len, cb);
 
 	/* include any pages we added in add_ra-bio_pages */
-	uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
+	uncompressed_len = bio->bi_vcnt * PAGE_SIZE;
 	cb->len = uncompressed_len;
 
 	comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
@@ -653,18 +653,18 @@
 	for (pg_index = 0; pg_index < nr_pages; pg_index++) {
 		page = cb->compressed_pages[pg_index];
 		page->mapping = inode->i_mapping;
-		page->index = em_start >> PAGE_CACHE_SHIFT;
+		page->index = em_start >> PAGE_SHIFT;
 
 		if (comp_bio->bi_iter.bi_size)
 			ret = tree->ops->merge_bio_hook(READ, page, 0,
-							PAGE_CACHE_SIZE,
+							PAGE_SIZE,
 							comp_bio, 0);
 		else
 			ret = 0;
 
 		page->mapping = NULL;
-		if (ret || bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0) <
-		    PAGE_CACHE_SIZE) {
+		if (ret || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
+		    PAGE_SIZE) {
 			bio_get(comp_bio);
 
 			ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio,
@@ -702,9 +702,9 @@
 			comp_bio->bi_private = cb;
 			comp_bio->bi_end_io = end_compressed_bio_read;
 
-			bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0);
+			bio_add_page(comp_bio, page, PAGE_SIZE, 0);
 		}
-		cur_disk_byte += PAGE_CACHE_SIZE;
+		cur_disk_byte += PAGE_SIZE;
 	}
 	bio_get(comp_bio);
 
@@ -1013,8 +1013,8 @@
 
 	/* copy bytes from the working buffer into the pages */
 	while (working_bytes > 0) {
-		bytes = min(PAGE_CACHE_SIZE - *pg_offset,
-			    PAGE_CACHE_SIZE - buf_offset);
+		bytes = min(PAGE_SIZE - *pg_offset,
+			    PAGE_SIZE - buf_offset);
 		bytes = min(bytes, working_bytes);
 		kaddr = kmap_atomic(page_out);
 		memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
@@ -1027,7 +1027,7 @@
 		current_buf_start += bytes;
 
 		/* check if we need to pick another page */
-		if (*pg_offset == PAGE_CACHE_SIZE) {
+		if (*pg_offset == PAGE_SIZE) {
 			(*pg_index)++;
 			if (*pg_index >= vcnt)
 				return 0;

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4b02591..4e47849 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c

@@ -25,7 +25,6 @@
 #include <linux/buffer_head.h>
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
-#include <linux/freezer.h>
 #include <linux/slab.h>
 #include <linux/migrate.h>
 #include <linux/ratelimit.h>
@@ -303,7 +302,7 @@
 		err = map_private_extent_buffer(buf, offset, 32,
 					&kaddr, &map_start, &map_len);
 		if (err)
-			return 1;
+			return err;
 		cur_len = min(len, map_len - (offset - map_start));
 		crc = btrfs_csum_data(kaddr + offset - map_start,
 				      crc, cur_len);
@@ -313,7 +312,7 @@
 	if (csum_size > sizeof(inline_result)) {
 		result = kzalloc(csum_size, GFP_NOFS);
 		if (!result)
-			return 1;
+			return -ENOMEM;
 	} else {
 		result = (char *)&inline_result;
 	}
@@ -334,7 +333,7 @@
 				val, found, btrfs_header_level(buf));
 			if (result != (char *)&inline_result)
 				kfree(result);
-			return 1;
+			return -EUCLEAN;
 		}
 	} else {
 		write_extent_buffer(buf, result, 0, csum_size);
@@ -513,11 +512,21 @@
 	eb = (struct extent_buffer *)page->private;
 	if (page != eb->pages[0])
 		return 0;
+
 	found_start = btrfs_header_bytenr(eb);
-	if (WARN_ON(found_start != start || !PageUptodate(page)))
-		return 0;
-	csum_tree_block(fs_info, eb, 0);
-	return 0;
+	/*
+	 * Please do not consolidate these warnings into a single if.
+	 * It is useful to know what went wrong.
+	 */
+	if (WARN_ON(found_start != start))
+		return -EUCLEAN;
+	if (WARN_ON(!PageUptodate(page)))
+		return -EUCLEAN;
+
+	ASSERT(memcmp_extent_buffer(eb, fs_info->fsid,
+			btrfs_header_fsid(), BTRFS_FSID_SIZE) == 0);
+
+	return csum_tree_block(fs_info, eb, 0);
 }
 
 static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
@@ -661,10 +670,8 @@
 				       eb, found_level);
 
 	ret = csum_tree_block(fs_info, eb, 1);
-	if (ret) {
-		ret = -EIO;
+	if (ret)
 		goto err;
-	}
 
 	/*
 	 * If this is a leaf block and it is corrupt, set the corrupt bit so
@@ -1055,7 +1062,7 @@
 			   (unsigned long long)page_offset(page));
 		ClearPagePrivate(page);
 		set_page_private(page, 0);
-		page_cache_release(page);
+		put_page(page);
 	}
 }
 
@@ -1757,7 +1764,7 @@
 	if (err)
 		return err;
 
-	bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE;
+	bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
 	bdi->congested_fn	= btrfs_congested_fn;
 	bdi->congested_data	= info;
 	bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK;
@@ -1831,7 +1838,7 @@
 		 */
 		btrfs_delete_unused_bgs(root->fs_info);
 sleep:
-		if (!try_to_freeze() && !again) {
+		if (!again) {
 			set_current_state(TASK_INTERRUPTIBLE);
 			if (!kthread_should_stop())
 				schedule();
@@ -1921,14 +1928,12 @@
 		if (unlikely(test_bit(BTRFS_FS_STATE_ERROR,
 				      &root->fs_info->fs_state)))
 			btrfs_cleanup_transaction(root);
-		if (!try_to_freeze()) {
-			set_current_state(TASK_INTERRUPTIBLE);
-			if (!kthread_should_stop() &&
-			    (!btrfs_transaction_blocked(root->fs_info) ||
-			     cannot_commit))
-				schedule_timeout(delay);
-			__set_current_state(TASK_RUNNING);
-		}
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (!kthread_should_stop() &&
+				(!btrfs_transaction_blocked(root->fs_info) ||
+				 cannot_commit))
+			schedule_timeout(delay);
+		__set_current_state(TASK_RUNNING);
 	} while (!kthread_should_stop());
 	return 0;
 }
@@ -2537,7 +2542,7 @@
 		err = ret;
 		goto fail_bdi;
 	}
-	fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
+	fs_info->dirty_metadata_batch = PAGE_SIZE *
 					(1 + ilog2(nr_cpu_ids));
 
 	ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL);
@@ -2782,7 +2787,7 @@
 	 * flag our filesystem as having big metadata blocks if
 	 * they are bigger than the page size
 	 */
-	if (btrfs_super_nodesize(disk_super) > PAGE_CACHE_SIZE) {
+	if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
 		if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
 			printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n");
 		features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
@@ -2832,7 +2837,7 @@
 
 	fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
 	fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
-				    SZ_4M / PAGE_CACHE_SIZE);
+				    SZ_4M / PAGE_SIZE);
 
 	tree_root->nodesize = nodesize;
 	tree_root->sectorsize = sectorsize;
@@ -4071,9 +4076,9 @@
 		ret = -EINVAL;
 	}
 	/* Only PAGE SIZE is supported yet */
-	if (sectorsize != PAGE_CACHE_SIZE) {
+	if (sectorsize != PAGE_SIZE) {
 		printk(KERN_ERR "BTRFS: sectorsize %llu not supported yet, only support %lu\n",
-				sectorsize, PAGE_CACHE_SIZE);
+				sectorsize, PAGE_SIZE);
 		ret = -EINVAL;
 	}
 	if (!is_power_of_2(nodesize) || nodesize < sectorsize ||

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 53e1297..ce114ba 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c

@@ -3452,7 +3452,7 @@
 		num_pages = 1;
 
 	num_pages *= 16;
-	num_pages *= PAGE_CACHE_SIZE;
+	num_pages *= PAGE_SIZE;
 
 	ret = btrfs_check_data_free_space(inode, 0, num_pages);
 	if (ret)
@@ -4639,7 +4639,7 @@
 	loops = 0;
 	while (delalloc_bytes && loops < 3) {
 		max_reclaim = min(delalloc_bytes, to_reclaim);
-		nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
+		nr_pages = max_reclaim >> PAGE_SHIFT;
 		btrfs_writeback_inodes_sb_nr(root, nr_pages, items);
 		/*
 		 * We need to wait for the async pages to actually start before

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 76a0c85..d247fc0 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c

@@ -1363,23 +1363,23 @@
 
 void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
 {
-	unsigned long index = start >> PAGE_CACHE_SHIFT;
-	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+	unsigned long index = start >> PAGE_SHIFT;
+	unsigned long end_index = end >> PAGE_SHIFT;
 	struct page *page;
 
 	while (index <= end_index) {
 		page = find_get_page(inode->i_mapping, index);
 		BUG_ON(!page); /* Pages should be in the extent_io_tree */
 		clear_page_dirty_for_io(page);
-		page_cache_release(page);
+		put_page(page);
 		index++;
 	}
 }
 
 void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
 {
-	unsigned long index = start >> PAGE_CACHE_SHIFT;
-	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+	unsigned long index = start >> PAGE_SHIFT;
+	unsigned long end_index = end >> PAGE_SHIFT;
 	struct page *page;
 
 	while (index <= end_index) {
@@ -1387,7 +1387,7 @@
 		BUG_ON(!page); /* Pages should be in the extent_io_tree */
 		__set_page_dirty_nobuffers(page);
 		account_page_redirty(page);
-		page_cache_release(page);
+		put_page(page);
 		index++;
 	}
 }
@@ -1397,15 +1397,15 @@
  */
 static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
 {
-	unsigned long index = start >> PAGE_CACHE_SHIFT;
-	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+	unsigned long index = start >> PAGE_SHIFT;
+	unsigned long end_index = end >> PAGE_SHIFT;
 	struct page *page;
 
 	while (index <= end_index) {
 		page = find_get_page(tree->mapping, index);
 		BUG_ON(!page); /* Pages should be in the extent_io_tree */
 		set_page_writeback(page);
-		page_cache_release(page);
+		put_page(page);
 		index++;
 	}
 }
@@ -1556,8 +1556,8 @@
 {
 	int ret;
 	struct page *pages[16];
-	unsigned long index = start >> PAGE_CACHE_SHIFT;
-	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+	unsigned long index = start >> PAGE_SHIFT;
+	unsigned long end_index = end >> PAGE_SHIFT;
 	unsigned long nr_pages = end_index - index + 1;
 	int i;
 
@@ -1571,7 +1571,7 @@
 		for (i = 0; i < ret; i++) {
 			if (pages[i] != locked_page)
 				unlock_page(pages[i]);
-			page_cache_release(pages[i]);
+			put_page(pages[i]);
 		}
 		nr_pages -= ret;
 		index += ret;
@@ -1584,9 +1584,9 @@
 					u64 delalloc_start,
 					u64 delalloc_end)
 {
-	unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT;
+	unsigned long index = delalloc_start >> PAGE_SHIFT;
 	unsigned long start_index = index;
-	unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT;
+	unsigned long end_index = delalloc_end >> PAGE_SHIFT;
 	unsigned long pages_locked = 0;
 	struct page *pages[16];
 	unsigned long nrpages;
@@ -1619,11 +1619,11 @@
 				    pages[i]->mapping != inode->i_mapping) {
 					ret = -EAGAIN;
 					unlock_page(pages[i]);
-					page_cache_release(pages[i]);
+					put_page(pages[i]);
 					goto done;
 				}
 			}
-			page_cache_release(pages[i]);
+			put_page(pages[i]);
 			pages_locked++;
 		}
 		nrpages -= ret;
@@ -1636,7 +1636,7 @@
 		__unlock_for_delalloc(inode, locked_page,
 			      delalloc_start,
 			      ((u64)(start_index + pages_locked - 1)) <<
-			      PAGE_CACHE_SHIFT);
+			      PAGE_SHIFT);
 	}
 	return ret;
 }
@@ -1696,7 +1696,7 @@
 		free_extent_state(cached_state);
 		cached_state = NULL;
 		if (!loops) {
-			max_bytes = PAGE_CACHE_SIZE;
+			max_bytes = PAGE_SIZE;
 			loops = 1;
 			goto again;
 		} else {
@@ -1735,8 +1735,8 @@
 	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 	int ret;
 	struct page *pages[16];
-	unsigned long index = start >> PAGE_CACHE_SHIFT;
-	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+	unsigned long index = start >> PAGE_SHIFT;
+	unsigned long end_index = end >> PAGE_SHIFT;
 	unsigned long nr_pages = end_index - index + 1;
 	int i;
 
@@ -1757,7 +1757,7 @@
 				SetPagePrivate2(pages[i]);
 
 			if (pages[i] == locked_page) {
-				page_cache_release(pages[i]);
+				put_page(pages[i]);
 				continue;
 			}
 			if (page_ops & PAGE_CLEAR_DIRTY)
@@ -1770,7 +1770,7 @@
 				end_page_writeback(pages[i]);
 			if (page_ops & PAGE_UNLOCK)
 				unlock_page(pages[i]);
-			page_cache_release(pages[i]);
+			put_page(pages[i]);
 		}
 		nr_pages -= ret;
 		index += ret;
@@ -1961,7 +1961,7 @@
 static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
 {
 	u64 start = page_offset(page);
-	u64 end = start + PAGE_CACHE_SIZE - 1;
+	u64 end = start + PAGE_SIZE - 1;
 	if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
 		SetPageUptodate(page);
 }
@@ -2071,11 +2071,11 @@
 		struct page *p = eb->pages[i];
 
 		ret = repair_io_failure(root->fs_info->btree_inode, start,
-					PAGE_CACHE_SIZE, start, p,
+					PAGE_SIZE, start, p,
 					start - page_offset(p), mirror_num);
 		if (ret)
 			break;
-		start += PAGE_CACHE_SIZE;
+		start += PAGE_SIZE;
 	}
 
 	return ret;
@@ -2466,8 +2466,8 @@
 		 * advance bv_offset and adjust bv_len to compensate.
 		 * Print a warning for nonzero offsets, and an error
 		 * if they don't add up to a full page.  */
-		if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
-			if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
+		if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
+			if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
 				btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
 				   "partial page write in btrfs with offset %u and length %u",
 					bvec->bv_offset, bvec->bv_len);
@@ -2541,8 +2541,8 @@
 		 * advance bv_offset and adjust bv_len to compensate.
 		 * Print a warning for nonzero offsets, and an error
 		 * if they don't add up to a full page.  */
-		if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
-			if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
+		if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
+			if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
 				btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
 				   "partial page read in btrfs with offset %u and length %u",
 					bvec->bv_offset, bvec->bv_len);
@@ -2598,13 +2598,13 @@
 readpage_ok:
 		if (likely(uptodate)) {
 			loff_t i_size = i_size_read(inode);
-			pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+			pgoff_t end_index = i_size >> PAGE_SHIFT;
 			unsigned off;
 
 			/* Zero out the end if this page straddles i_size */
-			off = i_size & (PAGE_CACHE_SIZE-1);
+			off = i_size & (PAGE_SIZE-1);
 			if (page->index == end_index && off)
-				zero_user_segment(page, off, PAGE_CACHE_SIZE);
+				zero_user_segment(page, off, PAGE_SIZE);
 			SetPageUptodate(page);
 		} else {
 			ClearPageUptodate(page);
@@ -2768,7 +2768,7 @@
 	struct bio *bio;
 	int contig = 0;
 	int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
-	size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE);
+	size_t page_size = min_t(size_t, size, PAGE_SIZE);
 
 	if (bio_ret && *bio_ret) {
 		bio = *bio_ret;
@@ -2821,7 +2821,7 @@
 {
 	if (!PagePrivate(page)) {
 		SetPagePrivate(page);
-		page_cache_get(page);
+		get_page(page);
 		set_page_private(page, (unsigned long)eb);
 	} else {
 		WARN_ON(page->private != (unsigned long)eb);
@@ -2832,7 +2832,7 @@
 {
 	if (!PagePrivate(page)) {
 		SetPagePrivate(page);
-		page_cache_get(page);
+		get_page(page);
 		set_page_private(page, EXTENT_PAGE_PRIVATE);
 	}
 }
@@ -2880,7 +2880,7 @@
 {
 	struct inode *inode = page->mapping->host;
 	u64 start = page_offset(page);
-	u64 page_end = start + PAGE_CACHE_SIZE - 1;
+	u64 page_end = start + PAGE_SIZE - 1;
 	u64 end;
 	u64 cur = start;
 	u64 extent_offset;
@@ -2909,12 +2909,12 @@
 		}
 	}
 
-	if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
+	if (page->index == last_byte >> PAGE_SHIFT) {
 		char *userpage;
-		size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
+		size_t zero_offset = last_byte & (PAGE_SIZE - 1);
 
 		if (zero_offset) {
-			iosize = PAGE_CACHE_SIZE - zero_offset;
+			iosize = PAGE_SIZE - zero_offset;
 			userpage = kmap_atomic(page);
 			memset(userpage + zero_offset, 0, iosize);
 			flush_dcache_page(page);
@@ -2922,14 +2922,14 @@
 		}
 	}
 	while (cur <= end) {
-		unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
+		unsigned long pnr = (last_byte >> PAGE_SHIFT) + 1;
 		bool force_bio_submit = false;
 
 		if (cur >= last_byte) {
 			char *userpage;
 			struct extent_state *cached = NULL;
 
-			iosize = PAGE_CACHE_SIZE - pg_offset;
+			iosize = PAGE_SIZE - pg_offset;
 			userpage = kmap_atomic(page);
 			memset(userpage + pg_offset, 0, iosize);
 			flush_dcache_page(page);
@@ -3112,7 +3112,7 @@
 	for (index = 0; index < nr_pages; index++) {
 		__do_readpage(tree, pages[index], get_extent, em_cached, bio,
 			      mirror_num, bio_flags, rw, prev_em_start);
-		page_cache_release(pages[index]);
+		put_page(pages[index]);
 	}
 }
 
@@ -3134,10 +3134,10 @@
 		page_start = page_offset(pages[index]);
 		if (!end) {
 			start = page_start;
-			end = start + PAGE_CACHE_SIZE - 1;
+			end = start + PAGE_SIZE - 1;
 			first_index = index;
 		} else if (end + 1 == page_start) {
-			end += PAGE_CACHE_SIZE;
+			end += PAGE_SIZE;
 		} else {
 			__do_contiguous_readpages(tree, &pages[first_index],
 						  index - first_index, start,
@@ -3145,7 +3145,7 @@
 						  bio, mirror_num, bio_flags,
 						  rw, prev_em_start);
 			start = page_start;
-			end = start + PAGE_CACHE_SIZE - 1;
+			end = start + PAGE_SIZE - 1;
 			first_index = index;
 		}
 	}
@@ -3167,13 +3167,13 @@
 	struct inode *inode = page->mapping->host;
 	struct btrfs_ordered_extent *ordered;
 	u64 start = page_offset(page);
-	u64 end = start + PAGE_CACHE_SIZE - 1;
+	u64 end = start + PAGE_SIZE - 1;
 	int ret;
 
 	while (1) {
 		lock_extent(tree, start, end);
 		ordered = btrfs_lookup_ordered_range(inode, start,
-						PAGE_CACHE_SIZE);
+						PAGE_SIZE);
 		if (!ordered)
 			break;
 		unlock_extent(tree, start, end);
@@ -3227,7 +3227,7 @@
 			      unsigned long *nr_written)
 {
 	struct extent_io_tree *tree = epd->tree;
-	u64 page_end = delalloc_start + PAGE_CACHE_SIZE - 1;
+	u64 page_end = delalloc_start + PAGE_SIZE - 1;
 	u64 nr_delalloc;
 	u64 delalloc_to_write = 0;
 	u64 delalloc_end = 0;
@@ -3264,13 +3264,11 @@
 			goto done;
 		}
 		/*
-		 * delalloc_end is already one less than the total
-		 * length, so we don't subtract one from
-		 * PAGE_CACHE_SIZE
+		 * delalloc_end is already one less than the total length, so
+		 * we don't subtract one from PAGE_SIZE
 		 */
 		delalloc_to_write += (delalloc_end - delalloc_start +
-				      PAGE_CACHE_SIZE) >>
-				      PAGE_CACHE_SHIFT;
+				      PAGE_SIZE) >> PAGE_SHIFT;
 		delalloc_start = delalloc_end + 1;
 	}
 	if (wbc->nr_to_write < delalloc_to_write) {
@@ -3319,7 +3317,7 @@
 {
 	struct extent_io_tree *tree = epd->tree;
 	u64 start = page_offset(page);
-	u64 page_end = start + PAGE_CACHE_SIZE - 1;
+	u64 page_end = start + PAGE_SIZE - 1;
 	u64 end;
 	u64 cur = start;
 	u64 extent_offset;
@@ -3434,7 +3432,7 @@
 		if (ret) {
 			SetPageError(page);
 		} else {
-			unsigned long max_nr = (i_size >> PAGE_CACHE_SHIFT) + 1;
+			unsigned long max_nr = (i_size >> PAGE_SHIFT) + 1;
 
 			set_range_writeback(tree, cur, cur + iosize - 1);
 			if (!PageWriteback(page)) {
@@ -3477,12 +3475,12 @@
 	struct inode *inode = page->mapping->host;
 	struct extent_page_data *epd = data;
 	u64 start = page_offset(page);
-	u64 page_end = start + PAGE_CACHE_SIZE - 1;
+	u64 page_end = start + PAGE_SIZE - 1;
 	int ret;
 	int nr = 0;
 	size_t pg_offset = 0;
 	loff_t i_size = i_size_read(inode);
-	unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
+	unsigned long end_index = i_size >> PAGE_SHIFT;
 	int write_flags;
 	unsigned long nr_written = 0;
 
@@ -3497,10 +3495,10 @@
 
 	ClearPageError(page);
 
-	pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
+	pg_offset = i_size & (PAGE_SIZE - 1);
 	if (page->index > end_index ||
 	   (page->index == end_index && !pg_offset)) {
-		page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
+		page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
 		unlock_page(page);
 		return 0;
 	}
@@ -3510,7 +3508,7 @@
 
 		userpage = kmap_atomic(page);
 		memset(userpage + pg_offset, 0,
-		       PAGE_CACHE_SIZE - pg_offset);
+		       PAGE_SIZE - pg_offset);
 		kunmap_atomic(userpage);
 		flush_dcache_page(page);
 	}
@@ -3748,7 +3746,7 @@
 		clear_page_dirty_for_io(p);
 		set_page_writeback(p);
 		ret = submit_extent_page(rw, tree, wbc, p, offset >> 9,
-					 PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
+					 PAGE_SIZE, 0, bdev, &epd->bio,
 					 -1, end_bio_extent_buffer_writepage,
 					 0, epd->bio_flags, bio_flags, false);
 		epd->bio_flags = bio_flags;
@@ -3760,7 +3758,7 @@
 			ret = -EIO;
 			break;
 		}
-		offset += PAGE_CACHE_SIZE;
+		offset += PAGE_SIZE;
 		update_nr_written(p, wbc, 1);
 		unlock_page(p);
 	}
@@ -3804,8 +3802,8 @@
 		index = mapping->writeback_index; /* Start from prev offset */
 		end = -1;
 	} else {
-		index = wbc->range_start >> PAGE_CACHE_SHIFT;
-		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		index = wbc->range_start >> PAGE_SHIFT;
+		end = wbc->range_end >> PAGE_SHIFT;
 		scanned = 1;
 	}
 	if (wbc->sync_mode == WB_SYNC_ALL)
@@ -3948,8 +3946,8 @@
 		index = mapping->writeback_index; /* Start from prev offset */
 		end = -1;
 	} else {
-		index = wbc->range_start >> PAGE_CACHE_SHIFT;
-		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		index = wbc->range_start >> PAGE_SHIFT;
+		end = wbc->range_end >> PAGE_SHIFT;
 		scanned = 1;
 	}
 	if (wbc->sync_mode == WB_SYNC_ALL)
@@ -4083,8 +4081,8 @@
 	int ret = 0;
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
-	unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >>
-		PAGE_CACHE_SHIFT;
+	unsigned long nr_pages = (end - start + PAGE_SIZE) >>
+		PAGE_SHIFT;
 
 	struct extent_page_data epd = {
 		.bio = NULL,
@@ -4102,18 +4100,18 @@
 	};
 
 	while (start <= end) {
-		page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+		page = find_get_page(mapping, start >> PAGE_SHIFT);
 		if (clear_page_dirty_for_io(page))
 			ret = __extent_writepage(page, &wbc_writepages, &epd);
 		else {
 			if (tree->ops && tree->ops->writepage_end_io_hook)
 				tree->ops->writepage_end_io_hook(page, start,
-						 start + PAGE_CACHE_SIZE - 1,
+						 start + PAGE_SIZE - 1,
 						 NULL, 1);
 			unlock_page(page);
 		}
-		page_cache_release(page);
-		start += PAGE_CACHE_SIZE;
+		put_page(page);
+		start += PAGE_SIZE;
 	}
 
 	flush_epd_write_bio(&epd);
@@ -4163,7 +4161,7 @@
 		list_del(&page->lru);
 		if (add_to_page_cache_lru(page, mapping,
 					page->index, GFP_NOFS)) {
-			page_cache_release(page);
+			put_page(page);
 			continue;
 		}
 
@@ -4197,7 +4195,7 @@
 {
 	struct extent_state *cached_state = NULL;
 	u64 start = page_offset(page);
-	u64 end = start + PAGE_CACHE_SIZE - 1;
+	u64 end = start + PAGE_SIZE - 1;
 	size_t blocksize = page->mapping->host->i_sb->s_blocksize;
 
 	start += ALIGN(offset, blocksize);
@@ -4223,7 +4221,7 @@
 				    struct page *page, gfp_t mask)
 {
 	u64 start = page_offset(page);
-	u64 end = start + PAGE_CACHE_SIZE - 1;
+	u64 end = start + PAGE_SIZE - 1;
 	int ret = 1;
 
 	if (test_range_bit(tree, start, end,
@@ -4262,7 +4260,7 @@
 {
 	struct extent_map *em;
 	u64 start = page_offset(page);
-	u64 end = start + PAGE_CACHE_SIZE - 1;
+	u64 end = start + PAGE_SIZE - 1;
 
 	if (gfpflags_allow_blocking(mask) &&
 	    page->mapping->host->i_size > SZ_16M) {
@@ -4587,14 +4585,14 @@
 			ClearPagePrivate(page);
 			set_page_private(page, 0);
 			/* One for the page private */
-			page_cache_release(page);
+			put_page(page);
 		}
 
 		if (mapped)
 			spin_unlock(&page->mapping->private_lock);
 
 		/* One for when we alloced the page */
-		page_cache_release(page);
+		put_page(page);
 	} while (index != 0);
 }
 
@@ -4779,7 +4777,7 @@
 
 	rcu_read_lock();
 	eb = radix_tree_lookup(&fs_info->buffer_radix,
-			       start >> PAGE_CACHE_SHIFT);
+			       start >> PAGE_SHIFT);
 	if (eb && atomic_inc_not_zero(&eb->refs)) {
 		rcu_read_unlock();
 		/*
@@ -4829,7 +4827,7 @@
 		goto free_eb;
 	spin_lock(&fs_info->buffer_lock);
 	ret = radix_tree_insert(&fs_info->buffer_radix,
-				start >> PAGE_CACHE_SHIFT, eb);
+				start >> PAGE_SHIFT, eb);
 	spin_unlock(&fs_info->buffer_lock);
 	radix_tree_preload_end();
 	if (ret == -EEXIST) {
@@ -4862,7 +4860,7 @@
 	unsigned long len = fs_info->tree_root->nodesize;
 	unsigned long num_pages = num_extent_pages(start, len);
 	unsigned long i;
-	unsigned long index = start >> PAGE_CACHE_SHIFT;
+	unsigned long index = start >> PAGE_SHIFT;
 	struct extent_buffer *eb;
 	struct extent_buffer *exists = NULL;
 	struct page *p;
@@ -4896,7 +4894,7 @@
 			if (atomic_inc_not_zero(&exists->refs)) {
 				spin_unlock(&mapping->private_lock);
 				unlock_page(p);
-				page_cache_release(p);
+				put_page(p);
 				mark_extent_buffer_accessed(exists, p);
 				goto free_eb;
 			}
@@ -4908,7 +4906,7 @@
 			 */
 			ClearPagePrivate(p);
 			WARN_ON(PageDirty(p));
-			page_cache_release(p);
+			put_page(p);
 		}
 		attach_extent_buffer_page(eb, p);
 		spin_unlock(&mapping->private_lock);
@@ -4931,7 +4929,7 @@
 
 	spin_lock(&fs_info->buffer_lock);
 	ret = radix_tree_insert(&fs_info->buffer_radix,
-				start >> PAGE_CACHE_SHIFT, eb);
+				start >> PAGE_SHIFT, eb);
 	spin_unlock(&fs_info->buffer_lock);
 	radix_tree_preload_end();
 	if (ret == -EEXIST) {
@@ -4994,7 +4992,7 @@
 
 			spin_lock(&fs_info->buffer_lock);
 			radix_tree_delete(&fs_info->buffer_radix,
-					  eb->start >> PAGE_CACHE_SHIFT);
+					  eb->start >> PAGE_SHIFT);
 			spin_unlock(&fs_info->buffer_lock);
 		} else {
 			spin_unlock(&eb->refs_lock);
@@ -5168,8 +5166,8 @@
 
 	if (start) {
 		WARN_ON(start < eb->start);
-		start_i = (start >> PAGE_CACHE_SHIFT) -
-			(eb->start >> PAGE_CACHE_SHIFT);
+		start_i = (start >> PAGE_SHIFT) -
+			(eb->start >> PAGE_SHIFT);
 	} else {
 		start_i = 0;
 	}
@@ -5252,18 +5250,18 @@
 	struct page *page;
 	char *kaddr;
 	char *dst = (char *)dstv;
-	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
-	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
 
 	WARN_ON(start > eb->len);
 	WARN_ON(start + len > eb->start + eb->len);
 
-	offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
+	offset = (start_offset + start) & (PAGE_SIZE - 1);
 
 	while (len > 0) {
 		page = eb->pages[i];
 
-		cur = min(len, (PAGE_CACHE_SIZE - offset));
+		cur = min(len, (PAGE_SIZE - offset));
 		kaddr = page_address(page);
 		memcpy(dst, kaddr + offset, cur);
 
@@ -5283,19 +5281,19 @@
 	struct page *page;
 	char *kaddr;
 	char __user *dst = (char __user *)dstv;
-	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
-	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
 	int ret = 0;
 
 	WARN_ON(start > eb->len);
 	WARN_ON(start + len > eb->start + eb->len);
 
-	offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
+	offset = (start_offset + start) & (PAGE_SIZE - 1);
 
 	while (len > 0) {
 		page = eb->pages[i];
 
-		cur = min(len, (PAGE_CACHE_SIZE - offset));
+		cur = min(len, (PAGE_SIZE - offset));
 		kaddr = page_address(page);
 		if (copy_to_user(dst, kaddr + offset, cur)) {
 			ret = -EFAULT;
@@ -5316,13 +5314,13 @@
 			       unsigned long *map_start,
 			       unsigned long *map_len)
 {
-	size_t offset = start & (PAGE_CACHE_SIZE - 1);
+	size_t offset = start & (PAGE_SIZE - 1);
 	char *kaddr;
 	struct page *p;
-	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
-	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
 	unsigned long end_i = (start_offset + start + min_len - 1) >>
-		PAGE_CACHE_SHIFT;
+		PAGE_SHIFT;
 
 	if (i != end_i)
 		return -EINVAL;
@@ -5332,7 +5330,7 @@
 		*map_start = 0;
 	} else {
 		offset = 0;
-		*map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
+		*map_start = ((u64)i << PAGE_SHIFT) - start_offset;
 	}
 
 	if (start + min_len > eb->len) {
@@ -5345,7 +5343,7 @@
 	p = eb->pages[i];
 	kaddr = page_address(p);
 	*map = kaddr + offset;
-	*map_len = PAGE_CACHE_SIZE - offset;
+	*map_len = PAGE_SIZE - offset;
 	return 0;
 }
 
@@ -5358,19 +5356,19 @@
 	struct page *page;
 	char *kaddr;
 	char *ptr = (char *)ptrv;
-	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
-	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
 	int ret = 0;
 
 	WARN_ON(start > eb->len);
 	WARN_ON(start + len > eb->start + eb->len);
 
-	offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
+	offset = (start_offset + start) & (PAGE_SIZE - 1);
 
 	while (len > 0) {
 		page = eb->pages[i];
 
-		cur = min(len, (PAGE_CACHE_SIZE - offset));
+		cur = min(len, (PAGE_SIZE - offset));
 
 		kaddr = page_address(page);
 		ret = memcmp(ptr, kaddr + offset, cur);
@@ -5393,19 +5391,19 @@
 	struct page *page;
 	char *kaddr;
 	char *src = (char *)srcv;
-	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
-	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
 
 	WARN_ON(start > eb->len);
 	WARN_ON(start + len > eb->start + eb->len);
 
-	offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
+	offset = (start_offset + start) & (PAGE_SIZE - 1);
 
 	while (len > 0) {
 		page = eb->pages[i];
 		WARN_ON(!PageUptodate(page));
 
-		cur = min(len, PAGE_CACHE_SIZE - offset);
+		cur = min(len, PAGE_SIZE - offset);
 		kaddr = page_address(page);
 		memcpy(kaddr + offset, src, cur);
 
@@ -5423,19 +5421,19 @@
 	size_t offset;
 	struct page *page;
 	char *kaddr;
-	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
-	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
 
 	WARN_ON(start > eb->len);
 	WARN_ON(start + len > eb->start + eb->len);
 
-	offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
+	offset = (start_offset + start) & (PAGE_SIZE - 1);
 
 	while (len > 0) {
 		page = eb->pages[i];
 		WARN_ON(!PageUptodate(page));
 
-		cur = min(len, PAGE_CACHE_SIZE - offset);
+		cur = min(len, PAGE_SIZE - offset);
 		kaddr = page_address(page);
 		memset(kaddr + offset, c, cur);
 
@@ -5454,19 +5452,19 @@
 	size_t offset;
 	struct page *page;
 	char *kaddr;
-	size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
-	unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
+	size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
+	unsigned long i = (start_offset + dst_offset) >> PAGE_SHIFT;
 
 	WARN_ON(src->len != dst_len);
 
 	offset = (start_offset + dst_offset) &
-		(PAGE_CACHE_SIZE - 1);
+		(PAGE_SIZE - 1);
 
 	while (len > 0) {
 		page = dst->pages[i];
 		WARN_ON(!PageUptodate(page));
 
-		cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
+		cur = min(len, (unsigned long)(PAGE_SIZE - offset));
 
 		kaddr = page_address(page);
 		read_extent_buffer(src, kaddr + offset, src_offset, cur);
@@ -5508,7 +5506,7 @@
 				    unsigned long *page_index,
 				    size_t *page_offset)
 {
-	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
 	size_t byte_offset = BIT_BYTE(nr);
 	size_t offset;
 
@@ -5519,8 +5517,8 @@
 	 */
 	offset = start_offset + start + byte_offset;
 
-	*page_index = offset >> PAGE_CACHE_SHIFT;
-	*page_offset = offset & (PAGE_CACHE_SIZE - 1);
+	*page_index = offset >> PAGE_SHIFT;
+	*page_offset = offset & (PAGE_SIZE - 1);
 }
 
 /**
@@ -5572,7 +5570,7 @@
 		len -= bits_to_set;
 		bits_to_set = BITS_PER_BYTE;
 		mask_to_set = ~0U;
-		if (++offset >= PAGE_CACHE_SIZE && len > 0) {
+		if (++offset >= PAGE_SIZE && len > 0) {
 			offset = 0;
 			page = eb->pages[++i];
 			WARN_ON(!PageUptodate(page));
@@ -5614,7 +5612,7 @@
 		len -= bits_to_clear;
 		bits_to_clear = BITS_PER_BYTE;
 		mask_to_clear = ~0U;
-		if (++offset >= PAGE_CACHE_SIZE && len > 0) {
+		if (++offset >= PAGE_SIZE && len > 0) {
 			offset = 0;
 			page = eb->pages[++i];
 			WARN_ON(!PageUptodate(page));
@@ -5661,7 +5659,7 @@
 	size_t cur;
 	size_t dst_off_in_page;
 	size_t src_off_in_page;
-	size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
+	size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
 	unsigned long dst_i;
 	unsigned long src_i;
 
@@ -5680,17 +5678,17 @@
 
 	while (len > 0) {
 		dst_off_in_page = (start_offset + dst_offset) &
-			(PAGE_CACHE_SIZE - 1);
+			(PAGE_SIZE - 1);
 		src_off_in_page = (start_offset + src_offset) &
-			(PAGE_CACHE_SIZE - 1);
+			(PAGE_SIZE - 1);
 
-		dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
-		src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
+		dst_i = (start_offset + dst_offset) >> PAGE_SHIFT;
+		src_i = (start_offset + src_offset) >> PAGE_SHIFT;
 
-		cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
+		cur = min(len, (unsigned long)(PAGE_SIZE -
 					       src_off_in_page));
 		cur = min_t(unsigned long, cur,
-			(unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
+			(unsigned long)(PAGE_SIZE - dst_off_in_page));
 
 		copy_pages(dst->pages[dst_i], dst->pages[src_i],
 			   dst_off_in_page, src_off_in_page, cur);
@@ -5709,7 +5707,7 @@
 	size_t src_off_in_page;
 	unsigned long dst_end = dst_offset + len - 1;
 	unsigned long src_end = src_offset + len - 1;
-	size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
+	size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
 	unsigned long dst_i;
 	unsigned long src_i;
 
@@ -5728,13 +5726,13 @@
 		return;
 	}
 	while (len > 0) {
-		dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
-		src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
+		dst_i = (start_offset + dst_end) >> PAGE_SHIFT;
+		src_i = (start_offset + src_end) >> PAGE_SHIFT;
 
 		dst_off_in_page = (start_offset + dst_end) &
-			(PAGE_CACHE_SIZE - 1);
+			(PAGE_SIZE - 1);
 		src_off_in_page = (start_offset + src_end) &
-			(PAGE_CACHE_SIZE - 1);
+			(PAGE_SIZE - 1);
 
 		cur = min_t(unsigned long, len, src_off_in_page + 1);
 		cur = min(cur, dst_off_in_page + 1);

diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5dbf92e..b5e0ade 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h

@@ -120,7 +120,7 @@
 };
 
 #define INLINE_EXTENT_BUFFER_PAGES 16
-#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_CACHE_SIZE)
+#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_SIZE)
 struct extent_buffer {
 	u64 start;
 	unsigned long len;
@@ -365,8 +365,8 @@
 
 static inline unsigned long num_extent_pages(u64 start, u64 len)
 {
-	return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
-		(start >> PAGE_CACHE_SHIFT);
+	return ((start + len + PAGE_SIZE - 1) >> PAGE_SHIFT) -
+		(start >> PAGE_SHIFT);
 }
 
 static inline void extent_buffer_get(struct extent_buffer *eb)

diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index b5baf5b..7a7d6e2 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c

@@ -32,7 +32,7 @@
 				  size) - 1))
 
 #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \
-				       PAGE_CACHE_SIZE))
+				       PAGE_SIZE))
 
 #define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \
 				   sizeof(struct btrfs_ordered_sum)) / \
@@ -203,7 +203,7 @@
 		csum = (u8 *)dst;
 	}
 
-	if (bio->bi_iter.bi_size > PAGE_CACHE_SIZE * 8)
+	if (bio->bi_iter.bi_size > PAGE_SIZE * 8)
 		path->reada = READA_FORWARD;
 
 	WARN_ON(bio->bi_vcnt <= 0);

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 2f40482..fbe2589 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c

@@ -414,11 +414,11 @@
 	size_t copied = 0;
 	size_t total_copied = 0;
 	int pg = 0;
-	int offset = pos & (PAGE_CACHE_SIZE - 1);
+	int offset = pos & (PAGE_SIZE - 1);
 
 	while (write_bytes > 0) {
 		size_t count = min_t(size_t,
-				     PAGE_CACHE_SIZE - offset, write_bytes);
+				     PAGE_SIZE - offset, write_bytes);
 		struct page *page = prepared_pages[pg];
 		/*
 		 * Copy data from userspace to the current page
@@ -448,7 +448,7 @@
 		if (unlikely(copied == 0))
 			break;
 
-		if (copied < PAGE_CACHE_SIZE - offset) {
+		if (copied < PAGE_SIZE - offset) {
 			offset += copied;
 		} else {
 			pg++;
@@ -473,7 +473,7 @@
 		 */
 		ClearPageChecked(pages[i]);
 		unlock_page(pages[i]);
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 	}
 }
 
@@ -1297,7 +1297,7 @@
 {
 	int ret = 0;
 
-	if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) &&
+	if (((pos & (PAGE_SIZE - 1)) || force_uptodate) &&
 	    !PageUptodate(page)) {
 		ret = btrfs_readpage(NULL, page);
 		if (ret)
@@ -1323,7 +1323,7 @@
 				  size_t write_bytes, bool force_uptodate)
 {
 	int i;
-	unsigned long index = pos >> PAGE_CACHE_SHIFT;
+	unsigned long index = pos >> PAGE_SHIFT;
 	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
 	int err = 0;
 	int faili;
@@ -1345,7 +1345,7 @@
 			err = prepare_uptodate_page(inode, pages[i],
 						    pos + write_bytes, false);
 		if (err) {
-			page_cache_release(pages[i]);
+			put_page(pages[i]);
 			if (err == -EAGAIN) {
 				err = 0;
 				goto again;
@@ -1360,7 +1360,7 @@
 fail:
 	while (faili >= 0) {
 		unlock_page(pages[faili]);
-		page_cache_release(pages[faili]);
+		put_page(pages[faili]);
 		faili--;
 	}
 	return err;
@@ -1408,7 +1408,7 @@
 					     cached_state, GFP_NOFS);
 			for (i = 0; i < num_pages; i++) {
 				unlock_page(pages[i]);
-				page_cache_release(pages[i]);
+				put_page(pages[i]);
 			}
 			btrfs_start_ordered_extent(inode, ordered, 1);
 			btrfs_put_ordered_extent(ordered);
@@ -1497,8 +1497,8 @@
 	bool force_page_uptodate = false;
 	bool need_unlock;
 
-	nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_CACHE_SIZE),
-			PAGE_CACHE_SIZE / (sizeof(struct page *)));
+	nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE),
+			PAGE_SIZE / (sizeof(struct page *)));
 	nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
 	nrptrs = max(nrptrs, 8);
 	pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL);
@@ -1506,13 +1506,13 @@
 		return -ENOMEM;
 
 	while (iov_iter_count(i) > 0) {
-		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
+		size_t offset = pos & (PAGE_SIZE - 1);
 		size_t sector_offset;
 		size_t write_bytes = min(iov_iter_count(i),
-					 nrptrs * (size_t)PAGE_CACHE_SIZE -
+					 nrptrs * (size_t)PAGE_SIZE -
 					 offset);
 		size_t num_pages = DIV_ROUND_UP(write_bytes + offset,
-						PAGE_CACHE_SIZE);
+						PAGE_SIZE);
 		size_t reserve_bytes;
 		size_t dirty_pages;
 		size_t copied;
@@ -1547,7 +1547,7 @@
 			 * write_bytes, so scale down.
 			 */
 			num_pages = DIV_ROUND_UP(write_bytes + offset,
-						 PAGE_CACHE_SIZE);
+						 PAGE_SIZE);
 			reserve_bytes = round_up(write_bytes + sector_offset,
 					root->sectorsize);
 			goto reserve_metadata;
@@ -1609,7 +1609,7 @@
 		} else {
 			force_page_uptodate = false;
 			dirty_pages = DIV_ROUND_UP(copied + offset,
-						   PAGE_CACHE_SIZE);
+						   PAGE_SIZE);
 		}
 
 		/*
@@ -1641,7 +1641,7 @@
 				u64 __pos;
 
 				__pos = round_down(pos, root->sectorsize) +
-					(dirty_pages << PAGE_CACHE_SHIFT);
+					(dirty_pages << PAGE_SHIFT);
 				btrfs_delalloc_release_space(inode, __pos,
 							     release_bytes);
 			}
@@ -1682,7 +1682,7 @@
 		cond_resched();
 
 		balance_dirty_pages_ratelimited(inode->i_mapping);
-		if (dirty_pages < (root->nodesize >> PAGE_CACHE_SHIFT) + 1)
+		if (dirty_pages < (root->nodesize >> PAGE_SHIFT) + 1)
 			btrfs_btree_balance_dirty(root);
 
 		pos += copied;
@@ -1738,8 +1738,8 @@
 		goto out;
 	written += written_buffered;
 	iocb->ki_pos = pos + written_buffered;
-	invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT,
-				 endbyte >> PAGE_CACHE_SHIFT);
+	invalidate_mapping_pages(file->f_mapping, pos >> PAGE_SHIFT,
+				 endbyte >> PAGE_SHIFT);
 out:
 	return written ? written : err;
 }

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 8f835bf..5e6062c 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c

@@ -29,7 +29,7 @@
 #include "inode-map.h"
 #include "volumes.h"
 
-#define BITS_PER_BITMAP		(PAGE_CACHE_SIZE * 8)
+#define BITS_PER_BITMAP		(PAGE_SIZE * 8)
 #define MAX_CACHE_BYTES_PER_GIG	SZ_32K
 
 struct btrfs_trim_range {
@@ -295,7 +295,7 @@
 		return -ENOMEM;
 
 	file_ra_state_init(ra, inode->i_mapping);
-	last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+	last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
 
 	page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
 
@@ -310,14 +310,14 @@
 	int num_pages;
 	int check_crcs = 0;
 
-	num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE);
+	num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
 
 	if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
 		check_crcs = 1;
 
 	/* Make sure we can fit our crcs into the first page */
 	if (write && check_crcs &&
-	    (num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)
+	    (num_pages * sizeof(u32)) >= PAGE_SIZE)
 		return -ENOSPC;
 
 	memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
@@ -354,9 +354,9 @@
 	io_ctl->page = io_ctl->pages[io_ctl->index++];
 	io_ctl->cur = page_address(io_ctl->page);
 	io_ctl->orig = io_ctl->cur;
-	io_ctl->size = PAGE_CACHE_SIZE;
+	io_ctl->size = PAGE_SIZE;
 	if (clear)
-		memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
+		memset(io_ctl->cur, 0, PAGE_SIZE);
 }
 
 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
@@ -369,7 +369,7 @@
 		if (io_ctl->pages[i]) {
 			ClearPageChecked(io_ctl->pages[i]);
 			unlock_page(io_ctl->pages[i]);
-			page_cache_release(io_ctl->pages[i]);
+			put_page(io_ctl->pages[i]);
 		}
 	}
 }
@@ -475,7 +475,7 @@
 		offset = sizeof(u32) * io_ctl->num_pages;
 
 	crc = btrfs_csum_data(io_ctl->orig + offset, crc,
-			      PAGE_CACHE_SIZE - offset);
+			      PAGE_SIZE - offset);
 	btrfs_csum_final(crc, (char *)&crc);
 	io_ctl_unmap_page(io_ctl);
 	tmp = page_address(io_ctl->pages[0]);
@@ -503,7 +503,7 @@
 
 	io_ctl_map_page(io_ctl, 0);
 	crc = btrfs_csum_data(io_ctl->orig + offset, crc,
-			      PAGE_CACHE_SIZE - offset);
+			      PAGE_SIZE - offset);
 	btrfs_csum_final(crc, (char *)&crc);
 	if (val != crc) {
 		btrfs_err_rl(io_ctl->root->fs_info,
@@ -561,7 +561,7 @@
 		io_ctl_map_page(io_ctl, 0);
 	}
 
-	memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
+	memcpy(io_ctl->cur, bitmap, PAGE_SIZE);
 	io_ctl_set_crc(io_ctl, io_ctl->index - 1);
 	if (io_ctl->index < io_ctl->num_pages)
 		io_ctl_map_page(io_ctl, 0);
@@ -621,7 +621,7 @@
 	if (ret)
 		return ret;
 
-	memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
+	memcpy(entry->bitmap, io_ctl->cur, PAGE_SIZE);
 	io_ctl_unmap_page(io_ctl);
 
 	return 0;
@@ -775,7 +775,7 @@
 		} else {
 			ASSERT(num_bitmaps);
 			num_bitmaps--;
-			e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+			e->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS);
 			if (!e->bitmap) {
 				kmem_cache_free(
 					btrfs_free_space_cachep, e);
@@ -1660,7 +1660,7 @@
 	 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
 	 * we add more bitmaps.
 	 */
-	bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
+	bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_SIZE;
 
 	if (bitmap_bytes >= max_bytes) {
 		ctl->extents_thresh = 0;
@@ -2111,7 +2111,7 @@
 		}
 
 		/* allocate the bitmap */
-		info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+		info->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS);
 		spin_lock(&ctl->tree_lock);
 		if (!info->bitmap) {
 			ret = -ENOMEM;
@@ -3580,7 +3580,7 @@
 	}
 
 	if (!map) {
-		map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+		map = kzalloc(PAGE_SIZE, GFP_NOFS);
 		if (!map) {
 			kmem_cache_free(btrfs_free_space_cachep, info);
 			return -ENOMEM;

diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 1f0ec19..70107f7 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c

@@ -283,7 +283,7 @@
 }
 
 #define INIT_THRESHOLD	((SZ_32K / 2) / sizeof(struct btrfs_free_space))
-#define INODES_PER_BITMAP (PAGE_CACHE_SIZE * 8)
+#define INODES_PER_BITMAP (PAGE_SIZE * 8)
 
 /*
  * The goal is to keep the memory used by the free_ino tree won't
@@ -317,7 +317,7 @@
 	}
 
 	ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) *
-				PAGE_CACHE_SIZE / sizeof(*info);
+				PAGE_SIZE / sizeof(*info);
 }
 
 /*
@@ -481,12 +481,12 @@
 
 	spin_lock(&ctl->tree_lock);
 	prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
-	prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE);
-	prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE;
+	prealloc = ALIGN(prealloc, PAGE_SIZE);
+	prealloc += ctl->total_bitmaps * PAGE_SIZE;
 	spin_unlock(&ctl->tree_lock);
 
 	/* Just to make sure we have enough space */
-	prealloc += 8 * PAGE_CACHE_SIZE;
+	prealloc += 8 * PAGE_SIZE;
 
 	ret = btrfs_delalloc_reserve_space(inode, 0, prealloc);
 	if (ret)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 41a5688..2aaba58 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c

@@ -194,7 +194,7 @@
 		while (compressed_size > 0) {
 			cpage = compressed_pages[i];
 			cur_size = min_t(unsigned long, compressed_size,
-				       PAGE_CACHE_SIZE);
+				       PAGE_SIZE);
 
 			kaddr = kmap_atomic(cpage);
 			write_extent_buffer(leaf, kaddr, ptr, cur_size);
@@ -208,13 +208,13 @@
 						  compress_type);
 	} else {
 		page = find_get_page(inode->i_mapping,
-				     start >> PAGE_CACHE_SHIFT);
+				     start >> PAGE_SHIFT);
 		btrfs_set_file_extent_compression(leaf, ei, 0);
 		kaddr = kmap_atomic(page);
-		offset = start & (PAGE_CACHE_SIZE - 1);
+		offset = start & (PAGE_SIZE - 1);
 		write_extent_buffer(leaf, kaddr + offset, ptr, size);
 		kunmap_atomic(kaddr);
-		page_cache_release(page);
+		put_page(page);
 	}
 	btrfs_mark_buffer_dirty(leaf);
 	btrfs_release_path(path);
@@ -322,7 +322,7 @@
 	 * And at reserve time, it's always aligned to page size, so
 	 * just free one page here.
 	 */
-	btrfs_qgroup_free_data(inode, 0, PAGE_CACHE_SIZE);
+	btrfs_qgroup_free_data(inode, 0, PAGE_SIZE);
 	btrfs_free_path(path);
 	btrfs_end_transaction(trans, root);
 	return ret;
@@ -435,8 +435,8 @@
 	actual_end = min_t(u64, isize, end + 1);
 again:
 	will_compress = 0;
-	nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1;
-	nr_pages = min_t(unsigned long, nr_pages, SZ_128K / PAGE_CACHE_SIZE);
+	nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
+	nr_pages = min_t(unsigned long, nr_pages, SZ_128K / PAGE_SIZE);
 
 	/*
 	 * we don't want to send crud past the end of i_size through
@@ -514,7 +514,7 @@
 
 		if (!ret) {
 			unsigned long offset = total_compressed &
-				(PAGE_CACHE_SIZE - 1);
+				(PAGE_SIZE - 1);
 			struct page *page = pages[nr_pages_ret - 1];
 			char *kaddr;
 
@@ -524,7 +524,7 @@
 			if (offset) {
 				kaddr = kmap_atomic(page);
 				memset(kaddr + offset, 0,
-				       PAGE_CACHE_SIZE - offset);
+				       PAGE_SIZE - offset);
 				kunmap_atomic(kaddr);
 			}
 			will_compress = 1;
@@ -580,7 +580,7 @@
 		 * one last check to make sure the compression is really a
 		 * win, compare the page count read with the blocks on disk
 		 */
-		total_in = ALIGN(total_in, PAGE_CACHE_SIZE);
+		total_in = ALIGN(total_in, PAGE_SIZE);
 		if (total_compressed >= total_in) {
 			will_compress = 0;
 		} else {
@@ -594,7 +594,7 @@
 		 */
 		for (i = 0; i < nr_pages_ret; i++) {
 			WARN_ON(pages[i]->mapping);
-			page_cache_release(pages[i]);
+			put_page(pages[i]);
 		}
 		kfree(pages);
 		pages = NULL;
@@ -650,7 +650,7 @@
 free_pages_out:
 	for (i = 0; i < nr_pages_ret; i++) {
 		WARN_ON(pages[i]->mapping);
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 	}
 	kfree(pages);
 }
@@ -664,7 +664,7 @@
 
 	for (i = 0; i < async_extent->nr_pages; i++) {
 		WARN_ON(async_extent->pages[i]->mapping);
-		page_cache_release(async_extent->pages[i]);
+		put_page(async_extent->pages[i]);
 	}
 	kfree(async_extent->pages);
 	async_extent->nr_pages = 0;
@@ -966,7 +966,7 @@
 				     PAGE_END_WRITEBACK);
 
 			*nr_written = *nr_written +
-			     (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
+			     (end - start + PAGE_SIZE) / PAGE_SIZE;
 			*page_started = 1;
 			goto out;
 		} else if (ret < 0) {
@@ -1106,8 +1106,8 @@
 	async_cow = container_of(work, struct async_cow, work);
 
 	root = async_cow->root;
-	nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
-		PAGE_CACHE_SHIFT;
+	nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >>
+		PAGE_SHIFT;
 
 	/*
 	 * atomic_sub_return implies a barrier for waitqueue_active
@@ -1164,8 +1164,8 @@
 				async_cow_start, async_cow_submit,
 				async_cow_free);
 
-		nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >>
-			PAGE_CACHE_SHIFT;
+		nr_pages = (cur_end - start + PAGE_SIZE) >>
+			PAGE_SHIFT;
 		atomic_add(nr_pages, &root->fs_info->async_delalloc_pages);
 
 		btrfs_queue_work(root->fs_info->delalloc_workers,
@@ -1960,7 +1960,7 @@
 int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
 			      struct extent_state **cached_state)
 {
-	WARN_ON((end & (PAGE_CACHE_SIZE - 1)) == 0);
+	WARN_ON((end & (PAGE_SIZE - 1)) == 0);
 	return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
 				   cached_state, GFP_NOFS);
 }
@@ -1993,7 +1993,7 @@
 
 	inode = page->mapping->host;
 	page_start = page_offset(page);
-	page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
+	page_end = page_offset(page) + PAGE_SIZE - 1;
 
 	lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
 			 &cached_state);
@@ -2003,7 +2003,7 @@
 		goto out;
 
 	ordered = btrfs_lookup_ordered_range(inode, page_start,
-					PAGE_CACHE_SIZE);
+					PAGE_SIZE);
 	if (ordered) {
 		unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
 				     page_end, &cached_state, GFP_NOFS);
@@ -2014,7 +2014,7 @@
 	}
 
 	ret = btrfs_delalloc_reserve_space(inode, page_start,
-					   PAGE_CACHE_SIZE);
+					   PAGE_SIZE);
 	if (ret) {
 		mapping_set_error(page->mapping, ret);
 		end_extent_writepage(page, ret, page_start, page_end);
@@ -2030,7 +2030,7 @@
 			     &cached_state, GFP_NOFS);
 out_page:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	kfree(fixup);
 }
 
@@ -2063,7 +2063,7 @@
 		return -EAGAIN;
 
 	SetPageChecked(page);
-	page_cache_get(page);
+	get_page(page);
 	btrfs_init_work(&fixup->work, btrfs_fixup_helper,
 			btrfs_writepage_fixup_worker, NULL, NULL);
 	fixup->page = page;
@@ -4247,7 +4247,7 @@
 
 	if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) {
 		loff_t offset = new_size;
-		loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE);
+		loff_t page_end = ALIGN(offset, PAGE_SIZE);
 
 		/*
 		 * Zero out the remaining of the last page of our inline extent,
@@ -4633,7 +4633,7 @@
 	struct extent_state *cached_state = NULL;
 	char *kaddr;
 	u32 blocksize = root->sectorsize;
-	pgoff_t index = from >> PAGE_CACHE_SHIFT;
+	pgoff_t index = from >> PAGE_SHIFT;
 	unsigned offset = from & (blocksize - 1);
 	struct page *page;
 	gfp_t mask = btrfs_alloc_write_mask(mapping);
@@ -4668,7 +4668,7 @@
 		lock_page(page);
 		if (page->mapping != mapping) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			goto again;
 		}
 		if (!PageUptodate(page)) {
@@ -4686,7 +4686,7 @@
 		unlock_extent_cached(io_tree, block_start, block_end,
 				     &cached_state, GFP_NOFS);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		btrfs_start_ordered_extent(inode, ordered, 1);
 		btrfs_put_ordered_extent(ordered);
 		goto again;
@@ -4728,7 +4728,7 @@
 		btrfs_delalloc_release_space(inode, block_start,
 					     blocksize);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 out:
 	return ret;
 }
@@ -6717,7 +6717,7 @@
 
 	read_extent_buffer(leaf, tmp, ptr, inline_size);
 
-	max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size);
+	max_size = min_t(unsigned long, PAGE_SIZE, max_size);
 	ret = btrfs_decompress(compress_type, tmp, page,
 			       extent_offset, inline_size, max_size);
 	kfree(tmp);
@@ -6879,8 +6879,8 @@
 
 		size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
 		extent_offset = page_offset(page) + pg_offset - extent_start;
-		copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
-				size - extent_offset);
+		copy_size = min_t(u64, PAGE_SIZE - pg_offset,
+				  size - extent_offset);
 		em->start = extent_start + extent_offset;
 		em->len = ALIGN(copy_size, root->sectorsize);
 		em->orig_block_len = em->len;
@@ -6899,9 +6899,9 @@
 				map = kmap(page);
 				read_extent_buffer(leaf, map + pg_offset, ptr,
 						   copy_size);
-				if (pg_offset + copy_size < PAGE_CACHE_SIZE) {
+				if (pg_offset + copy_size < PAGE_SIZE) {
 					memset(map + pg_offset + copy_size, 0,
-					       PAGE_CACHE_SIZE - pg_offset -
+					       PAGE_SIZE - pg_offset -
 					       copy_size);
 				}
 				kunmap(page);
@@ -7336,12 +7336,12 @@
 	int start_idx;
 	int end_idx;
 
-	start_idx = start >> PAGE_CACHE_SHIFT;
+	start_idx = start >> PAGE_SHIFT;
 
 	/*
 	 * end is the last byte in the last page.  end == start is legal
 	 */
-	end_idx = end >> PAGE_CACHE_SHIFT;
+	end_idx = end >> PAGE_SHIFT;
 
 	rcu_read_lock();
 
@@ -7382,7 +7382,7 @@
 		 * include/linux/pagemap.h for details.
 		 */
 		if (unlikely(page != *pagep)) {
-			page_cache_release(page);
+			put_page(page);
 			page = NULL;
 		}
 	}
@@ -7390,7 +7390,7 @@
 	if (page) {
 		if (page->index <= end_idx)
 			found = true;
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	rcu_read_unlock();
@@ -8719,7 +8719,7 @@
 	if (ret == 1) {
 		ClearPagePrivate(page);
 		set_page_private(page, 0);
-		page_cache_release(page);
+		put_page(page);
 	}
 	return ret;
 }
@@ -8739,7 +8739,7 @@
 	struct btrfs_ordered_extent *ordered;
 	struct extent_state *cached_state = NULL;
 	u64 page_start = page_offset(page);
-	u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
+	u64 page_end = page_start + PAGE_SIZE - 1;
 	u64 start;
 	u64 end;
 	int inode_evicting = inode->i_state & I_FREEING;
@@ -8822,7 +8822,7 @@
 	 * 2) Not written to disk
 	 *    This means the reserved space should be freed here.
 	 */
-	btrfs_qgroup_free_data(inode, page_start, PAGE_CACHE_SIZE);
+	btrfs_qgroup_free_data(inode, page_start, PAGE_SIZE);
 	if (!inode_evicting) {
 		clear_extent_bit(tree, page_start, page_end,
 				 EXTENT_LOCKED | EXTENT_DIRTY |
@@ -8837,7 +8837,7 @@
 	if (PagePrivate(page)) {
 		ClearPagePrivate(page);
 		set_page_private(page, 0);
-		page_cache_release(page);
+		put_page(page);
 	}
 }
 
@@ -8874,11 +8874,11 @@
 	u64 page_end;
 	u64 end;
 
-	reserved_space = PAGE_CACHE_SIZE;
+	reserved_space = PAGE_SIZE;
 
 	sb_start_pagefault(inode->i_sb);
 	page_start = page_offset(page);
-	page_end = page_start + PAGE_CACHE_SIZE - 1;
+	page_end = page_start + PAGE_SIZE - 1;
 	end = page_end;
 
 	/*
@@ -8934,15 +8934,15 @@
 		goto again;
 	}
 
-	if (page->index == ((size - 1) >> PAGE_CACHE_SHIFT)) {
+	if (page->index == ((size - 1) >> PAGE_SHIFT)) {
 		reserved_space = round_up(size - page_start, root->sectorsize);
-		if (reserved_space < PAGE_CACHE_SIZE) {
+		if (reserved_space < PAGE_SIZE) {
 			end = page_start + reserved_space - 1;
 			spin_lock(&BTRFS_I(inode)->lock);
 			BTRFS_I(inode)->outstanding_extents++;
 			spin_unlock(&BTRFS_I(inode)->lock);
 			btrfs_delalloc_release_space(inode, page_start,
-						PAGE_CACHE_SIZE - reserved_space);
+						PAGE_SIZE - reserved_space);
 		}
 	}
 
@@ -8969,14 +8969,14 @@
 	ret = 0;
 
 	/* page is wholly or partially inside EOF */
-	if (page_start + PAGE_CACHE_SIZE > size)
-		zero_start = size & ~PAGE_CACHE_MASK;
+	if (page_start + PAGE_SIZE > size)
+		zero_start = size & ~PAGE_MASK;
 	else
-		zero_start = PAGE_CACHE_SIZE;
+		zero_start = PAGE_SIZE;
 
-	if (zero_start != PAGE_CACHE_SIZE) {
+	if (zero_start != PAGE_SIZE) {
 		kaddr = kmap(page);
-		memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
+		memset(kaddr + zero_start, 0, PAGE_SIZE - zero_start);
 		flush_dcache_page(page);
 		kunmap(page);
 	}

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 053e677..94a0c8a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c

@@ -898,7 +898,7 @@
 	u64 end;
 
 	read_lock(&em_tree->lock);
-	em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
+	em = lookup_extent_mapping(em_tree, offset, PAGE_SIZE);
 	read_unlock(&em_tree->lock);
 
 	if (em) {
@@ -988,7 +988,7 @@
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 	struct extent_map *em;
-	u64 len = PAGE_CACHE_SIZE;
+	u64 len = PAGE_SIZE;
 
 	/*
 	 * hopefully we have this extent in the tree already, try without
@@ -1124,15 +1124,15 @@
 	struct extent_io_tree *tree;
 	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
 
-	file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
+	file_end = (isize - 1) >> PAGE_SHIFT;
 	if (!isize || start_index > file_end)
 		return 0;
 
 	page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
 
 	ret = btrfs_delalloc_reserve_space(inode,
-			start_index << PAGE_CACHE_SHIFT,
-			page_cnt << PAGE_CACHE_SHIFT);
+			start_index << PAGE_SHIFT,
+			page_cnt << PAGE_SHIFT);
 	if (ret)
 		return ret;
 	i_done = 0;
@@ -1148,7 +1148,7 @@
 			break;
 
 		page_start = page_offset(page);
-		page_end = page_start + PAGE_CACHE_SIZE - 1;
+		page_end = page_start + PAGE_SIZE - 1;
 		while (1) {
 			lock_extent_bits(tree, page_start, page_end,
 					 &cached_state);
@@ -1169,7 +1169,7 @@
 			 */
 			if (page->mapping != inode->i_mapping) {
 				unlock_page(page);
-				page_cache_release(page);
+				put_page(page);
 				goto again;
 			}
 		}
@@ -1179,7 +1179,7 @@
 			lock_page(page);
 			if (!PageUptodate(page)) {
 				unlock_page(page);
-				page_cache_release(page);
+				put_page(page);
 				ret = -EIO;
 				break;
 			}
@@ -1187,7 +1187,7 @@
 
 		if (page->mapping != inode->i_mapping) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			goto again;
 		}
 
@@ -1208,7 +1208,7 @@
 		wait_on_page_writeback(pages[i]);
 
 	page_start = page_offset(pages[0]);
-	page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE;
+	page_end = page_offset(pages[i_done - 1]) + PAGE_SIZE;
 
 	lock_extent_bits(&BTRFS_I(inode)->io_tree,
 			 page_start, page_end - 1, &cached_state);
@@ -1222,8 +1222,8 @@
 		BTRFS_I(inode)->outstanding_extents++;
 		spin_unlock(&BTRFS_I(inode)->lock);
 		btrfs_delalloc_release_space(inode,
-				start_index << PAGE_CACHE_SHIFT,
-				(page_cnt - i_done) << PAGE_CACHE_SHIFT);
+				start_index << PAGE_SHIFT,
+				(page_cnt - i_done) << PAGE_SHIFT);
 	}
 
 
@@ -1240,17 +1240,17 @@
 		set_page_extent_mapped(pages[i]);
 		set_page_dirty(pages[i]);
 		unlock_page(pages[i]);
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 	}
 	return i_done;
 out:
 	for (i = 0; i < i_done; i++) {
 		unlock_page(pages[i]);
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 	}
 	btrfs_delalloc_release_space(inode,
-			start_index << PAGE_CACHE_SHIFT,
-			page_cnt << PAGE_CACHE_SHIFT);
+			start_index << PAGE_SHIFT,
+			page_cnt << PAGE_SHIFT);
 	return ret;
 
 }
@@ -1273,7 +1273,7 @@
 	int defrag_count = 0;
 	int compress_type = BTRFS_COMPRESS_ZLIB;
 	u32 extent_thresh = range->extent_thresh;
-	unsigned long max_cluster = SZ_256K >> PAGE_CACHE_SHIFT;
+	unsigned long max_cluster = SZ_256K >> PAGE_SHIFT;
 	unsigned long cluster = max_cluster;
 	u64 new_align = ~((u64)SZ_128K - 1);
 	struct page **pages = NULL;
@@ -1317,9 +1317,9 @@
 	/* find the last page to defrag */
 	if (range->start + range->len > range->start) {
 		last_index = min_t(u64, isize - 1,
-			 range->start + range->len - 1) >> PAGE_CACHE_SHIFT;
+			 range->start + range->len - 1) >> PAGE_SHIFT;
 	} else {
-		last_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+		last_index = (isize - 1) >> PAGE_SHIFT;
 	}
 
 	if (newer_than) {
@@ -1331,11 +1331,11 @@
 			 * we always align our defrag to help keep
 			 * the extents in the file evenly spaced
 			 */
-			i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
+			i = (newer_off & new_align) >> PAGE_SHIFT;
 		} else
 			goto out_ra;
 	} else {
-		i = range->start >> PAGE_CACHE_SHIFT;
+		i = range->start >> PAGE_SHIFT;
 	}
 	if (!max_to_defrag)
 		max_to_defrag = last_index - i + 1;
@@ -1348,7 +1348,7 @@
 		inode->i_mapping->writeback_index = i;
 
 	while (i <= last_index && defrag_count < max_to_defrag &&
-	       (i < DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE))) {
+	       (i < DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE))) {
 		/*
 		 * make sure we stop running if someone unmounts
 		 * the FS
@@ -1362,7 +1362,7 @@
 			break;
 		}
 
-		if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
+		if (!should_defrag_range(inode, (u64)i << PAGE_SHIFT,
 					 extent_thresh, &last_len, &skip,
 					 &defrag_end, range->flags &
 					 BTRFS_DEFRAG_RANGE_COMPRESS)) {
@@ -1371,14 +1371,14 @@
 			 * the should_defrag function tells us how much to skip
 			 * bump our counter by the suggested amount
 			 */
-			next = DIV_ROUND_UP(skip, PAGE_CACHE_SIZE);
+			next = DIV_ROUND_UP(skip, PAGE_SIZE);
 			i = max(i + 1, next);
 			continue;
 		}
 
 		if (!newer_than) {
-			cluster = (PAGE_CACHE_ALIGN(defrag_end) >>
-				   PAGE_CACHE_SHIFT) - i;
+			cluster = (PAGE_ALIGN(defrag_end) >>
+				   PAGE_SHIFT) - i;
 			cluster = min(cluster, max_cluster);
 		} else {
 			cluster = max_cluster;
@@ -1412,20 +1412,20 @@
 				i += ret;
 
 			newer_off = max(newer_off + 1,
-					(u64)i << PAGE_CACHE_SHIFT);
+					(u64)i << PAGE_SHIFT);
 
 			ret = find_new_extents(root, inode, newer_than,
 					       &newer_off, SZ_64K);
 			if (!ret) {
 				range->start = newer_off;
-				i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
+				i = (newer_off & new_align) >> PAGE_SHIFT;
 			} else {
 				break;
 			}
 		} else {
 			if (ret > 0) {
 				i += ret;
-				last_len += ret << PAGE_CACHE_SHIFT;
+				last_len += ret << PAGE_SHIFT;
 			} else {
 				i++;
 				last_len = 0;
@@ -1722,7 +1722,7 @@
 	if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
 		readonly = true;
 	if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
-		if (vol_args->size > PAGE_CACHE_SIZE) {
+		if (vol_args->size > PAGE_SIZE) {
 			ret = -EINVAL;
 			goto free_args;
 		}
@@ -2806,12 +2806,12 @@
 		lock_page(page);
 		if (!PageUptodate(page)) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			return ERR_PTR(-EIO);
 		}
 		if (page->mapping != inode->i_mapping) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			return ERR_PTR(-EAGAIN);
 		}
 	}
@@ -2823,7 +2823,7 @@
 			       int num_pages, u64 off)
 {
 	int i;
-	pgoff_t index = off >> PAGE_CACHE_SHIFT;
+	pgoff_t index = off >> PAGE_SHIFT;
 
 	for (i = 0; i < num_pages; i++) {
 again:
@@ -2932,12 +2932,12 @@
 		pg = cmp->src_pages[i];
 		if (pg) {
 			unlock_page(pg);
-			page_cache_release(pg);
+			put_page(pg);
 		}
 		pg = cmp->dst_pages[i];
 		if (pg) {
 			unlock_page(pg);
-			page_cache_release(pg);
+			put_page(pg);
 		}
 	}
 	kfree(cmp->src_pages);
@@ -2949,7 +2949,7 @@
 				  u64 len, struct cmp_pages *cmp)
 {
 	int ret;
-	int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
+	int num_pages = PAGE_ALIGN(len) >> PAGE_SHIFT;
 	struct page **src_pgarr, **dst_pgarr;
 
 	/*
@@ -2987,12 +2987,12 @@
 	int ret = 0;
 	int i;
 	struct page *src_page, *dst_page;
-	unsigned int cmp_len = PAGE_CACHE_SIZE;
+	unsigned int cmp_len = PAGE_SIZE;
 	void *addr, *dst_addr;
 
 	i = 0;
 	while (len) {
-		if (len < PAGE_CACHE_SIZE)
+		if (len < PAGE_SIZE)
 			cmp_len = len;
 
 		BUG_ON(i >= cmp->num_pages);
@@ -3191,7 +3191,7 @@
 	if (olen > BTRFS_MAX_DEDUPE_LEN)
 		olen = BTRFS_MAX_DEDUPE_LEN;
 
-	if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) {
+	if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
 		/*
 		 * Btrfs does not support blocksize < page_size. As a
 		 * result, btrfs_cmp_data() won't correctly handle
@@ -3891,8 +3891,8 @@
 	 * data immediately and not the previous data.
 	 */
 	truncate_inode_pages_range(&inode->i_data,
-				round_down(destoff, PAGE_CACHE_SIZE),
-				round_up(destoff + len, PAGE_CACHE_SIZE) - 1);
+				round_down(destoff, PAGE_SIZE),
+				round_up(destoff + len, PAGE_SIZE) - 1);
 out_unlock:
 	if (!same_inode)
 		btrfs_double_inode_unlock(src, inode);
@@ -4124,7 +4124,7 @@
 	/* we generally have at most 6 or so space infos, one for each raid
 	 * level.  So, a whole page should be more than enough for everyone
 	 */
-	if (alloc_size > PAGE_CACHE_SIZE)
+	if (alloc_size > PAGE_SIZE)
 		return -ENOMEM;
 
 	space_args.total_spaces = 0;

diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index a2f0513..1adfbe7 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c

@@ -55,8 +55,8 @@
 		return ERR_PTR(-ENOMEM);
 
 	workspace->mem = vmalloc(LZO1X_MEM_COMPRESS);
-	workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE));
-	workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE));
+	workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_SIZE));
+	workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_SIZE));
 	if (!workspace->mem || !workspace->buf || !workspace->cbuf)
 		goto fail;
 
@@ -116,7 +116,7 @@
 	*total_out = 0;
 	*total_in = 0;
 
-	in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+	in_page = find_get_page(mapping, start >> PAGE_SHIFT);
 	data_in = kmap(in_page);
 
 	/*
@@ -133,10 +133,10 @@
 	tot_out = LZO_LEN;
 	pages[0] = out_page;
 	nr_pages = 1;
-	pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN;
+	pg_bytes_left = PAGE_SIZE - LZO_LEN;
 
 	/* compress at most one page of data each time */
-	in_len = min(len, PAGE_CACHE_SIZE);
+	in_len = min(len, PAGE_SIZE);
 	while (tot_in < len) {
 		ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf,
 				       &out_len, workspace->mem);
@@ -201,7 +201,7 @@
 				cpage_out = kmap(out_page);
 				pages[nr_pages++] = out_page;
 
-				pg_bytes_left = PAGE_CACHE_SIZE;
+				pg_bytes_left = PAGE_SIZE;
 				out_offset = 0;
 			}
 		}
@@ -221,12 +221,12 @@
 
 		bytes_left = len - tot_in;
 		kunmap(in_page);
-		page_cache_release(in_page);
+		put_page(in_page);
 
-		start += PAGE_CACHE_SIZE;
-		in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+		start += PAGE_SIZE;
+		in_page = find_get_page(mapping, start >> PAGE_SHIFT);
 		data_in = kmap(in_page);
-		in_len = min(bytes_left, PAGE_CACHE_SIZE);
+		in_len = min(bytes_left, PAGE_SIZE);
 	}
 
 	if (tot_out > tot_in)
@@ -248,7 +248,7 @@
 
 	if (in_page) {
 		kunmap(in_page);
-		page_cache_release(in_page);
+		put_page(in_page);
 	}
 
 	return ret;
@@ -266,7 +266,7 @@
 	char *data_in;
 	unsigned long page_in_index = 0;
 	unsigned long page_out_index = 0;
-	unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_CACHE_SIZE);
+	unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
 	unsigned long buf_start;
 	unsigned long buf_offset = 0;
 	unsigned long bytes;
@@ -289,7 +289,7 @@
 	tot_in = LZO_LEN;
 	in_offset = LZO_LEN;
 	tot_len = min_t(size_t, srclen, tot_len);
-	in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN;
+	in_page_bytes_left = PAGE_SIZE - LZO_LEN;
 
 	tot_out = 0;
 	pg_offset = 0;
@@ -345,12 +345,12 @@
 
 				data_in = kmap(pages_in[++page_in_index]);
 
-				in_page_bytes_left = PAGE_CACHE_SIZE;
+				in_page_bytes_left = PAGE_SIZE;
 				in_offset = 0;
 			}
 		}
 
-		out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
+		out_len = lzo1x_worst_compress(PAGE_SIZE);
 		ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
 					    &out_len);
 		if (need_unmap)
@@ -399,7 +399,7 @@
 	in_len = read_compress_length(data_in);
 	data_in += LZO_LEN;
 
-	out_len = PAGE_CACHE_SIZE;
+	out_len = PAGE_SIZE;
 	ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
 	if (ret != LZO_E_OK) {
 		printk(KERN_WARNING "BTRFS: decompress failed!\n");

diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 5516136..0b7792e 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c

@@ -270,7 +270,7 @@
 		s = kmap(rbio->bio_pages[i]);
 		d = kmap(rbio->stripe_pages[i]);
 
-		memcpy(d, s, PAGE_CACHE_SIZE);
+		memcpy(d, s, PAGE_SIZE);
 
 		kunmap(rbio->bio_pages[i]);
 		kunmap(rbio->stripe_pages[i]);
@@ -962,7 +962,7 @@
  */
 static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
 {
-	return DIV_ROUND_UP(stripe_len, PAGE_CACHE_SIZE) * nr_stripes;
+	return DIV_ROUND_UP(stripe_len, PAGE_SIZE) * nr_stripes;
 }
 
 /*
@@ -1078,7 +1078,7 @@
 	u64 disk_start;
 
 	stripe = &rbio->bbio->stripes[stripe_nr];
-	disk_start = stripe->physical + (page_index << PAGE_CACHE_SHIFT);
+	disk_start = stripe->physical + (page_index << PAGE_SHIFT);
 
 	/* if the device is missing, just fail this stripe */
 	if (!stripe->dev->bdev)
@@ -1096,8 +1096,8 @@
 		if (last_end == disk_start && stripe->dev->bdev &&
 		    !last->bi_error &&
 		    last->bi_bdev == stripe->dev->bdev) {
-			ret = bio_add_page(last, page, PAGE_CACHE_SIZE, 0);
-			if (ret == PAGE_CACHE_SIZE)
+			ret = bio_add_page(last, page, PAGE_SIZE, 0);
+			if (ret == PAGE_SIZE)
 				return 0;
 		}
 	}
@@ -1111,7 +1111,7 @@
 	bio->bi_bdev = stripe->dev->bdev;
 	bio->bi_iter.bi_sector = disk_start >> 9;
 
-	bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
+	bio_add_page(bio, page, PAGE_SIZE, 0);
 	bio_list_add(bio_list, bio);
 	return 0;
 }
@@ -1154,7 +1154,7 @@
 	bio_list_for_each(bio, &rbio->bio_list) {
 		start = (u64)bio->bi_iter.bi_sector << 9;
 		stripe_offset = start - rbio->bbio->raid_map[0];
-		page_index = stripe_offset >> PAGE_CACHE_SHIFT;
+		page_index = stripe_offset >> PAGE_SHIFT;
 
 		for (i = 0; i < bio->bi_vcnt; i++) {
 			p = bio->bi_io_vec[i].bv_page;
@@ -1253,7 +1253,7 @@
 		} else {
 			/* raid5 */
 			memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
-			run_xor(pointers + 1, nr_data - 1, PAGE_CACHE_SIZE);
+			run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
 		}
 
 
@@ -1914,7 +1914,7 @@
 			/* Copy parity block into failed block to start with */
 			memcpy(pointers[faila],
 			       pointers[rbio->nr_data],
-			       PAGE_CACHE_SIZE);
+			       PAGE_SIZE);
 
 			/* rearrange the pointer array */
 			p = pointers[faila];
@@ -1923,7 +1923,7 @@
 			pointers[rbio->nr_data - 1] = p;
 
 			/* xor in the rest */
-			run_xor(pointers, rbio->nr_data - 1, PAGE_CACHE_SIZE);
+			run_xor(pointers, rbio->nr_data - 1, PAGE_SIZE);
 		}
 		/* if we're doing this rebuild as part of an rmw, go through
 		 * and set all of our private rbio pages in the
@@ -2250,7 +2250,7 @@
 	ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] +
 				rbio->stripe_len * rbio->nr_data);
 	stripe_offset = (int)(logical - rbio->bbio->raid_map[0]);
-	index = stripe_offset >> PAGE_CACHE_SHIFT;
+	index = stripe_offset >> PAGE_SHIFT;
 	rbio->bio_pages[index] = page;
 }
 
@@ -2365,14 +2365,14 @@
 		} else {
 			/* raid5 */
 			memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
-			run_xor(pointers + 1, nr_data - 1, PAGE_CACHE_SIZE);
+			run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
 		}
 
 		/* Check scrubbing pairty and repair it */
 		p = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
 		parity = kmap(p);
-		if (memcmp(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE))
-			memcpy(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE);
+		if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE))
+			memcpy(parity, pointers[rbio->scrubp], PAGE_SIZE);
 		else
 			/* Parity is right, needn't writeback */
 			bitmap_clear(rbio->dbitmap, pagenr, 1);

diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index b8929149..298631ea 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c

@@ -226,7 +226,7 @@
 	/* find extent */
 	spin_lock(&fs_info->reada_lock);
 	re = radix_tree_lookup(&fs_info->reada_tree,
-			       start >> PAGE_CACHE_SHIFT);
+			       start >> PAGE_SHIFT);
 	if (re)
 		re->refcnt++;
 	spin_unlock(&fs_info->reada_lock);
@@ -257,7 +257,7 @@
 	zone = NULL;
 	spin_lock(&fs_info->reada_lock);
 	ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
-				     logical >> PAGE_CACHE_SHIFT, 1);
+				     logical >> PAGE_SHIFT, 1);
 	if (ret == 1 && logical >= zone->start && logical <= zone->end) {
 		kref_get(&zone->refcnt);
 		spin_unlock(&fs_info->reada_lock);
@@ -294,13 +294,13 @@
 
 	spin_lock(&fs_info->reada_lock);
 	ret = radix_tree_insert(&dev->reada_zones,
-				(unsigned long)(zone->end >> PAGE_CACHE_SHIFT),
+				(unsigned long)(zone->end >> PAGE_SHIFT),
 				zone);
 
 	if (ret == -EEXIST) {
 		kfree(zone);
 		ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
-					     logical >> PAGE_CACHE_SHIFT, 1);
+					     logical >> PAGE_SHIFT, 1);
 		if (ret == 1 && logical >= zone->start && logical <= zone->end)
 			kref_get(&zone->refcnt);
 		else
@@ -326,7 +326,7 @@
 	u64 length;
 	int real_stripes;
 	int nzones = 0;
-	unsigned long index = logical >> PAGE_CACHE_SHIFT;
+	unsigned long index = logical >> PAGE_SHIFT;
 	int dev_replace_is_ongoing;
 	int have_zone = 0;
 
@@ -495,7 +495,7 @@
 			     struct reada_extent *re)
 {
 	int i;
-	unsigned long index = re->logical >> PAGE_CACHE_SHIFT;
+	unsigned long index = re->logical >> PAGE_SHIFT;
 
 	spin_lock(&fs_info->reada_lock);
 	if (--re->refcnt) {
@@ -538,7 +538,7 @@
 	struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt);
 
 	radix_tree_delete(&zone->device->reada_zones,
-			  zone->end >> PAGE_CACHE_SHIFT);
+			  zone->end >> PAGE_SHIFT);
 
 	kfree(zone);
 }
@@ -587,7 +587,7 @@
 static void reada_peer_zones_set_lock(struct reada_zone *zone, int lock)
 {
 	int i;
-	unsigned long index = zone->end >> PAGE_CACHE_SHIFT;
+	unsigned long index = zone->end >> PAGE_SHIFT;
 
 	for (i = 0; i < zone->ndevs; ++i) {
 		struct reada_zone *peer;
@@ -622,7 +622,7 @@
 					     (void **)&zone, index, 1);
 		if (ret == 0)
 			break;
-		index = (zone->end >> PAGE_CACHE_SHIFT) + 1;
+		index = (zone->end >> PAGE_SHIFT) + 1;
 		if (zone->locked) {
 			if (zone->elems > top_locked_elems) {
 				top_locked_elems = zone->elems;
@@ -673,7 +673,7 @@
 	 * plugging to speed things up
 	 */
 	ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
-				     dev->reada_next >> PAGE_CACHE_SHIFT, 1);
+				     dev->reada_next >> PAGE_SHIFT, 1);
 	if (ret == 0 || re->logical > dev->reada_curr_zone->end) {
 		ret = reada_pick_zone(dev);
 		if (!ret) {
@@ -682,7 +682,7 @@
 		}
 		re = NULL;
 		ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
-					dev->reada_next >> PAGE_CACHE_SHIFT, 1);
+					dev->reada_next >> PAGE_SHIFT, 1);
 	}
 	if (ret == 0) {
 		spin_unlock(&fs_info->reada_lock);
@@ -838,7 +838,7 @@
 				printk(KERN_CONT " curr off %llu",
 					device->reada_next - zone->start);
 			printk(KERN_CONT "\n");
-			index = (zone->end >> PAGE_CACHE_SHIFT) + 1;
+			index = (zone->end >> PAGE_SHIFT) + 1;
 		}
 		cnt = 0;
 		index = 0;
@@ -864,7 +864,7 @@
 				}
 			}
 			printk(KERN_CONT "\n");
-			index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
+			index = (re->logical >> PAGE_SHIFT) + 1;
 			if (++cnt > 15)
 				break;
 		}
@@ -880,7 +880,7 @@
 		if (ret == 0)
 			break;
 		if (!re->scheduled) {
-			index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
+			index = (re->logical >> PAGE_SHIFT) + 1;
 			continue;
 		}
 		printk(KERN_DEBUG
@@ -897,7 +897,7 @@
 			}
 		}
 		printk(KERN_CONT "\n");
-		index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
+		index = (re->logical >> PAGE_SHIFT) + 1;
 	}
 	spin_unlock(&fs_info->reada_lock);
 }

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 2bd0011..3c93968 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c

@@ -3129,10 +3129,10 @@
 	if (ret)
 		goto out;
 
-	index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
-	last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
+	index = (cluster->start - offset) >> PAGE_SHIFT;
+	last_index = (cluster->end - offset) >> PAGE_SHIFT;
 	while (index <= last_index) {
-		ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE);
+		ret = btrfs_delalloc_reserve_metadata(inode, PAGE_SIZE);
 		if (ret)
 			goto out;
 
@@ -3145,7 +3145,7 @@
 						   mask);
 			if (!page) {
 				btrfs_delalloc_release_metadata(inode,
-							PAGE_CACHE_SIZE);
+							PAGE_SIZE);
 				ret = -ENOMEM;
 				goto out;
 			}
@@ -3162,16 +3162,16 @@
 			lock_page(page);
 			if (!PageUptodate(page)) {
 				unlock_page(page);
-				page_cache_release(page);
+				put_page(page);
 				btrfs_delalloc_release_metadata(inode,
-							PAGE_CACHE_SIZE);
+							PAGE_SIZE);
 				ret = -EIO;
 				goto out;
 			}
 		}
 
 		page_start = page_offset(page);
-		page_end = page_start + PAGE_CACHE_SIZE - 1;
+		page_end = page_start + PAGE_SIZE - 1;
 
 		lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);
 
@@ -3191,7 +3191,7 @@
 		unlock_extent(&BTRFS_I(inode)->io_tree,
 			      page_start, page_end);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 
 		index++;
 		balance_dirty_pages_ratelimited(inode->i_mapping);

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 39dbdcb..4678f03 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c

@@ -703,7 +703,7 @@
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
-	index = offset >> PAGE_CACHE_SHIFT;
+	index = offset >> PAGE_SHIFT;
 
 	page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
 	if (!page) {
@@ -1636,7 +1636,7 @@
 	if (spage->io_error) {
 		void *mapped_buffer = kmap_atomic(spage->page);
 
-		memset(mapped_buffer, 0, PAGE_CACHE_SIZE);
+		memset(mapped_buffer, 0, PAGE_SIZE);
 		flush_dcache_page(spage->page);
 		kunmap_atomic(mapped_buffer);
 	}
@@ -4294,8 +4294,8 @@
 		goto out;
 	}
 
-	while (len >= PAGE_CACHE_SIZE) {
-		index = offset >> PAGE_CACHE_SHIFT;
+	while (len >= PAGE_SIZE) {
+		index = offset >> PAGE_SHIFT;
 again:
 		page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
 		if (!page) {
@@ -4326,7 +4326,7 @@
 			 */
 			if (page->mapping != inode->i_mapping) {
 				unlock_page(page);
-				page_cache_release(page);
+				put_page(page);
 				goto again;
 			}
 			if (!PageUptodate(page)) {
@@ -4348,15 +4348,15 @@
 			ret = err;
 next_page:
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 
 		if (ret)
 			break;
 
-		offset += PAGE_CACHE_SIZE;
-		physical_for_dev_replace += PAGE_CACHE_SIZE;
-		nocow_ctx_logical += PAGE_CACHE_SIZE;
-		len -= PAGE_CACHE_SIZE;
+		offset += PAGE_SIZE;
+		physical_for_dev_replace += PAGE_SIZE;
+		nocow_ctx_logical += PAGE_SIZE;
+		len -= PAGE_SIZE;
 	}
 	ret = COPY_COMPLETE;
 out:
@@ -4390,8 +4390,8 @@
 	bio->bi_iter.bi_size = 0;
 	bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
 	bio->bi_bdev = dev->bdev;
-	ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
-	if (ret != PAGE_CACHE_SIZE) {
+	ret = bio_add_page(bio, page, PAGE_SIZE, 0);
+	if (ret != PAGE_SIZE) {
 leave_with_eio:
 		bio_put(bio);
 		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 19b7bf4..8d358c5 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c

@@ -4449,9 +4449,9 @@
 	struct page *page;
 	char *addr;
 	struct btrfs_key key;
-	pgoff_t index = offset >> PAGE_CACHE_SHIFT;
+	pgoff_t index = offset >> PAGE_SHIFT;
 	pgoff_t last_index;
-	unsigned pg_offset = offset & ~PAGE_CACHE_MASK;
+	unsigned pg_offset = offset & ~PAGE_MASK;
 	ssize_t ret = 0;
 
 	key.objectid = sctx->cur_ino;
@@ -4471,7 +4471,7 @@
 	if (len == 0)
 		goto out;
 
-	last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT;
+	last_index = (offset + len - 1) >> PAGE_SHIFT;
 
 	/* initial readahead */
 	memset(&sctx->ra, 0, sizeof(struct file_ra_state));
@@ -4481,7 +4481,7 @@
 
 	while (index <= last_index) {
 		unsigned cur_len = min_t(unsigned, len,
-					 PAGE_CACHE_SIZE - pg_offset);
+					 PAGE_SIZE - pg_offset);
 		page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
 		if (!page) {
 			ret = -ENOMEM;
@@ -4493,7 +4493,7 @@
 			lock_page(page);
 			if (!PageUptodate(page)) {
 				unlock_page(page);
-				page_cache_release(page);
+				put_page(page);
 				ret = -EIO;
 				break;
 			}
@@ -4503,7 +4503,7 @@
 		memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len);
 		kunmap(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		index++;
 		pg_offset = 0;
 		len -= cur_len;
@@ -4804,7 +4804,7 @@
 		type = btrfs_file_extent_type(leaf, ei);
 		if (type == BTRFS_FILE_EXTENT_INLINE) {
 			ext_len = btrfs_file_extent_inline_len(leaf, slot, ei);
-			ext_len = PAGE_CACHE_ALIGN(ext_len);
+			ext_len = PAGE_ALIGN(ext_len);
 		} else {
 			ext_len = btrfs_file_extent_num_bytes(leaf, ei);
 		}
@@ -4886,7 +4886,7 @@
 		 * but there may be items after this page.  Make
 		 * sure to send the whole thing
 		 */
-		len = PAGE_CACHE_ALIGN(len);
+		len = PAGE_ALIGN(len);
 	} else {
 		len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
 	}

diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index b976597..e05619f 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c

@@ -66,7 +66,7 @@
 									\
 	if (token && token->kaddr && token->offset <= offset &&		\
 	    token->eb == eb &&						\
-	   (token->offset + PAGE_CACHE_SIZE >= offset + size)) {	\
+	   (token->offset + PAGE_SIZE >= offset + size)) {	\
 		kaddr = token->kaddr;					\
 		p = kaddr + part_offset - token->offset;		\
 		res = get_unaligned_le##bits(p + off);			\
@@ -104,7 +104,7 @@
 									\
 	if (token && token->kaddr && token->offset <= offset &&		\
 	    token->eb == eb &&						\
-	   (token->offset + PAGE_CACHE_SIZE >= offset + size)) {	\
+	   (token->offset + PAGE_SIZE >= offset + size)) {	\
 		kaddr = token->kaddr;					\
 		p = kaddr + part_offset - token->offset;		\
 		put_unaligned_le##bits(val, p + off);			\

diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 669b582..70948b1 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c

@@ -32,8 +32,8 @@
 {
 	int ret;
 	struct page *pages[16];
-	unsigned long index = start >> PAGE_CACHE_SHIFT;
-	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+	unsigned long index = start >> PAGE_SHIFT;
+	unsigned long end_index = end >> PAGE_SHIFT;
 	unsigned long nr_pages = end_index - index + 1;
 	int i;
 	int count = 0;
@@ -49,9 +49,9 @@
 				count++;
 			if (flags & PROCESS_UNLOCK && PageLocked(pages[i]))
 				unlock_page(pages[i]);
-			page_cache_release(pages[i]);
+			put_page(pages[i]);
 			if (flags & PROCESS_RELEASE)
-				page_cache_release(pages[i]);
+				put_page(pages[i]);
 		}
 		nr_pages -= ret;
 		index += ret;
@@ -93,7 +93,7 @@
 	 * everything to make sure our pages don't get evicted and screw up our
 	 * test.
 	 */
-	for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
+	for (index = 0; index < (total_dirty >> PAGE_SHIFT); index++) {
 		page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
 		if (!page) {
 			test_msg("Failed to allocate test page\n");
@@ -104,7 +104,7 @@
 		if (index) {
 			unlock_page(page);
 		} else {
-			page_cache_get(page);
+			get_page(page);
 			locked_page = page;
 		}
 	}
@@ -129,7 +129,7 @@
 	}
 	unlock_extent(&tmp, start, end);
 	unlock_page(locked_page);
-	page_cache_release(locked_page);
+	put_page(locked_page);
 
 	/*
 	 * Test this scenario
@@ -139,7 +139,7 @@
 	 */
 	test_start = SZ_64M;
 	locked_page = find_lock_page(inode->i_mapping,
-				     test_start >> PAGE_CACHE_SHIFT);
+				     test_start >> PAGE_SHIFT);
 	if (!locked_page) {
 		test_msg("Couldn't find the locked page\n");
 		goto out_bits;
@@ -165,7 +165,7 @@
 	}
 	unlock_extent(&tmp, start, end);
 	/* locked_page was unlocked above */
-	page_cache_release(locked_page);
+	put_page(locked_page);
 
 	/*
 	 * Test this scenario
@@ -174,7 +174,7 @@
 	 */
 	test_start = max_bytes + 4096;
 	locked_page = find_lock_page(inode->i_mapping, test_start >>
-				     PAGE_CACHE_SHIFT);
+				     PAGE_SHIFT);
 	if (!locked_page) {
 		test_msg("Could'nt find the locked page\n");
 		goto out_bits;
@@ -225,13 +225,13 @@
 	 * range we want to find.
 	 */
 	page = find_get_page(inode->i_mapping,
-			     (max_bytes + SZ_1M) >> PAGE_CACHE_SHIFT);
+			     (max_bytes + SZ_1M) >> PAGE_SHIFT);
 	if (!page) {
 		test_msg("Couldn't find our page\n");
 		goto out_bits;
 	}
 	ClearPageDirty(page);
-	page_cache_release(page);
+	put_page(page);
 
 	/* We unlocked it in the previous test */
 	lock_page(locked_page);
@@ -239,7 +239,7 @@
 	end = 0;
 	/*
 	 * Currently if we fail to find dirty pages in the delalloc range we
-	 * will adjust max_bytes down to PAGE_CACHE_SIZE and then re-search.  If
+	 * will adjust max_bytes down to PAGE_SIZE and then re-search.  If
 	 * this changes at any point in the future we will need to fix this
 	 * tests expected behavior.
 	 */
@@ -249,9 +249,9 @@
 		test_msg("Didn't find our range\n");
 		goto out_bits;
 	}
-	if (start != test_start && end != test_start + PAGE_CACHE_SIZE - 1) {
+	if (start != test_start && end != test_start + PAGE_SIZE - 1) {
 		test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n",
-			 test_start, test_start + PAGE_CACHE_SIZE - 1, start,
+			 test_start, test_start + PAGE_SIZE - 1, start,
 			 end);
 		goto out_bits;
 	}
@@ -265,7 +265,7 @@
 	clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL);
 out:
 	if (locked_page)
-		page_cache_release(locked_page);
+		put_page(locked_page);
 	process_page_range(inode, 0, total_dirty - 1,
 			   PROCESS_UNLOCK | PROCESS_RELEASE);
 	iput(inode);
@@ -298,9 +298,9 @@
 		return -EINVAL;
 	}
 
-	bitmap_set(bitmap, (PAGE_CACHE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
+	bitmap_set(bitmap, (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
 		   sizeof(long) * BITS_PER_BYTE);
-	extent_buffer_bitmap_set(eb, PAGE_CACHE_SIZE - sizeof(long) / 2, 0,
+	extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0,
 				 sizeof(long) * BITS_PER_BYTE);
 	if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
 		test_msg("Setting straddling pages failed\n");
@@ -309,10 +309,10 @@
 
 	bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
 	bitmap_clear(bitmap,
-		     (PAGE_CACHE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
+		     (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
 		     sizeof(long) * BITS_PER_BYTE);
 	extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
-	extent_buffer_bitmap_clear(eb, PAGE_CACHE_SIZE - sizeof(long) / 2, 0,
+	extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0,
 				   sizeof(long) * BITS_PER_BYTE);
 	if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
 		test_msg("Clearing straddling pages failed\n");
@@ -353,7 +353,7 @@
 
 static int test_eb_bitmaps(void)
 {
-	unsigned long len = PAGE_CACHE_SIZE * 4;
+	unsigned long len = PAGE_SIZE * 4;
 	unsigned long *bitmap;
 	struct extent_buffer *eb;
 	int ret;
@@ -379,7 +379,7 @@
 
 	/* Do it over again with an extent buffer which isn't page-aligned. */
 	free_extent_buffer(eb);
-	eb = __alloc_dummy_extent_buffer(NULL, PAGE_CACHE_SIZE / 2, len);
+	eb = __alloc_dummy_extent_buffer(NULL, PAGE_SIZE / 2, len);
 	if (!eb) {
 		test_msg("Couldn't allocate test extent buffer\n");
 		kfree(bitmap);

diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
index c9ad97b..5142475 100644
--- a/fs/btrfs/tests/free-space-tests.c
+++ b/fs/btrfs/tests/free-space-tests.c

@@ -22,7 +22,7 @@
 #include "../disk-io.h"
 #include "../free-space-cache.h"
 
-#define BITS_PER_BITMAP		(PAGE_CACHE_SIZE * 8)
+#define BITS_PER_BITMAP		(PAGE_SIZE * 8)
 
 /*
  * This test just does basic sanity checking, making sure we can add an exten

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e2b54d5..bd0f45f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c

@@ -1025,16 +1025,16 @@
 	}
 
 	/* make sure our super fits in the device */
-	if (bytenr + PAGE_CACHE_SIZE >= i_size_read(bdev->bd_inode))
+	if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode))
 		goto error_bdev_put;
 
 	/* make sure our super fits in the page */
-	if (sizeof(*disk_super) > PAGE_CACHE_SIZE)
+	if (sizeof(*disk_super) > PAGE_SIZE)
 		goto error_bdev_put;
 
 	/* make sure our super doesn't straddle pages on disk */
-	index = bytenr >> PAGE_CACHE_SHIFT;
-	if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_CACHE_SHIFT != index)
+	index = bytenr >> PAGE_SHIFT;
+	if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_SHIFT != index)
 		goto error_bdev_put;
 
 	/* pull in the page with our super */
@@ -1047,7 +1047,7 @@
 	p = kmap(page);
 
 	/* align our pointer to the offset of the super block */
-	disk_super = p + (bytenr & ~PAGE_CACHE_MASK);
+	disk_super = p + (bytenr & ~PAGE_MASK);
 
 	if (btrfs_super_bytenr(disk_super) != bytenr ||
 	    btrfs_super_magic(disk_super) != BTRFS_MAGIC)
@@ -1075,7 +1075,7 @@
 
 error_unmap:
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 
 error_bdev_put:
 	blkdev_put(bdev, flags);
@@ -6527,7 +6527,7 @@
 	 * but sb spans only this function. Add an explicit SetPageUptodate call
 	 * to silence the warning eg. on PowerPC 64.
 	 */
-	if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE)
+	if (PAGE_SIZE > BTRFS_SUPER_INFO_SIZE)
 		SetPageUptodate(sb->pages[0]);
 
 	write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);

diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 82990b8..88d274e 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c

@@ -59,7 +59,7 @@
 	workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
 			zlib_inflate_workspacesize());
 	workspace->strm.workspace = vmalloc(workspacesize);
-	workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+	workspace->buf = kmalloc(PAGE_SIZE, GFP_NOFS);
 	if (!workspace->strm.workspace || !workspace->buf)
 		goto fail;
 
@@ -103,7 +103,7 @@
 	workspace->strm.total_in = 0;
 	workspace->strm.total_out = 0;
 
-	in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+	in_page = find_get_page(mapping, start >> PAGE_SHIFT);
 	data_in = kmap(in_page);
 
 	out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
@@ -117,8 +117,8 @@
 
 	workspace->strm.next_in = data_in;
 	workspace->strm.next_out = cpage_out;
-	workspace->strm.avail_out = PAGE_CACHE_SIZE;
-	workspace->strm.avail_in = min(len, PAGE_CACHE_SIZE);
+	workspace->strm.avail_out = PAGE_SIZE;
+	workspace->strm.avail_in = min(len, PAGE_SIZE);
 
 	while (workspace->strm.total_in < len) {
 		ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH);
@@ -156,7 +156,7 @@
 			cpage_out = kmap(out_page);
 			pages[nr_pages] = out_page;
 			nr_pages++;
-			workspace->strm.avail_out = PAGE_CACHE_SIZE;
+			workspace->strm.avail_out = PAGE_SIZE;
 			workspace->strm.next_out = cpage_out;
 		}
 		/* we're all done */
@@ -170,14 +170,14 @@
 
 			bytes_left = len - workspace->strm.total_in;
 			kunmap(in_page);
-			page_cache_release(in_page);
+			put_page(in_page);
 
-			start += PAGE_CACHE_SIZE;
+			start += PAGE_SIZE;
 			in_page = find_get_page(mapping,
-						start >> PAGE_CACHE_SHIFT);
+						start >> PAGE_SHIFT);
 			data_in = kmap(in_page);
 			workspace->strm.avail_in = min(bytes_left,
-							   PAGE_CACHE_SIZE);
+							   PAGE_SIZE);
 			workspace->strm.next_in = data_in;
 		}
 	}
@@ -205,7 +205,7 @@
 
 	if (in_page) {
 		kunmap(in_page);
-		page_cache_release(in_page);
+		put_page(in_page);
 	}
 	return ret;
 }
@@ -223,18 +223,18 @@
 	size_t total_out = 0;
 	unsigned long page_in_index = 0;
 	unsigned long page_out_index = 0;
-	unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_CACHE_SIZE);
+	unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
 	unsigned long buf_start;
 	unsigned long pg_offset;
 
 	data_in = kmap(pages_in[page_in_index]);
 	workspace->strm.next_in = data_in;
-	workspace->strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
+	workspace->strm.avail_in = min_t(size_t, srclen, PAGE_SIZE);
 	workspace->strm.total_in = 0;
 
 	workspace->strm.total_out = 0;
 	workspace->strm.next_out = workspace->buf;
-	workspace->strm.avail_out = PAGE_CACHE_SIZE;
+	workspace->strm.avail_out = PAGE_SIZE;
 	pg_offset = 0;
 
 	/* If it's deflate, and it's got no preset dictionary, then
@@ -274,7 +274,7 @@
 		}
 
 		workspace->strm.next_out = workspace->buf;
-		workspace->strm.avail_out = PAGE_CACHE_SIZE;
+		workspace->strm.avail_out = PAGE_SIZE;
 
 		if (workspace->strm.avail_in == 0) {
 			unsigned long tmp;
@@ -288,7 +288,7 @@
 			workspace->strm.next_in = data_in;
 			tmp = srclen - workspace->strm.total_in;
 			workspace->strm.avail_in = min(tmp,
-							   PAGE_CACHE_SIZE);
+							   PAGE_SIZE);
 		}
 	}
 	if (ret != Z_STREAM_END)
@@ -325,7 +325,7 @@
 	workspace->strm.total_in = 0;
 
 	workspace->strm.next_out = workspace->buf;
-	workspace->strm.avail_out = PAGE_CACHE_SIZE;
+	workspace->strm.avail_out = PAGE_SIZE;
 	workspace->strm.total_out = 0;
 	/* If it's deflate, and it's got no preset dictionary, then
 	   we can tell zlib to skip the adler32 check. */
@@ -368,8 +368,8 @@
 		else
 			buf_offset = 0;
 
-		bytes = min(PAGE_CACHE_SIZE - pg_offset,
-			    PAGE_CACHE_SIZE - buf_offset);
+		bytes = min(PAGE_SIZE - pg_offset,
+			    PAGE_SIZE - buf_offset);
 		bytes = min(bytes, bytes_left);
 
 		kaddr = kmap_atomic(dest_page);
@@ -380,7 +380,7 @@
 		bytes_left -= bytes;
 next:
 		workspace->strm.next_out = workspace->buf;
-		workspace->strm.avail_out = PAGE_CACHE_SIZE;
+		workspace->strm.avail_out = PAGE_SIZE;
 	}
 
 	if (ret != Z_STREAM_END && bytes_left != 0)

diff --git a/fs/buffer.c b/fs/buffer.c
index 33be296..af0d9a8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c

@@ -129,7 +129,7 @@
 {
 	ClearPagePrivate(page);
 	set_page_private(page, 0);
-	page_cache_release(page);
+	put_page(page);
 }
 
 static void buffer_io_error(struct buffer_head *bh, char *msg)
@@ -207,7 +207,7 @@
 	struct page *page;
 	int all_mapped = 1;
 
-	index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
+	index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
 	page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
 	if (!page)
 		goto out;
@@ -245,7 +245,7 @@
 	}
 out_unlock:
 	spin_unlock(&bd_mapping->private_lock);
-	page_cache_release(page);
+	put_page(page);
 out:
 	return ret;
 }
@@ -1040,7 +1040,7 @@
 	ret = (block < end_block) ? 1 : -ENXIO;
 failed:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return ret;
 }
 
@@ -1533,7 +1533,7 @@
 	/*
 	 * Check for overflow
 	 */
-	BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
+	BUG_ON(stop > PAGE_SIZE || stop < length);
 
 	head = page_buffers(page);
 	bh = head;
@@ -1716,7 +1716,7 @@
 	blocksize = bh->b_size;
 	bbits = block_size_bits(blocksize);
 
-	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+	block = (sector_t)page->index << (PAGE_SHIFT - bbits);
 	last_block = (i_size_read(inode) - 1) >> bbits;
 
 	/*
@@ -1894,7 +1894,7 @@
 int __block_write_begin(struct page *page, loff_t pos, unsigned len,
 		get_block_t *get_block)
 {
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned from = pos & (PAGE_SIZE - 1);
 	unsigned to = from + len;
 	struct inode *inode = page->mapping->host;
 	unsigned block_start, block_end;
@@ -1904,15 +1904,15 @@
 	struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
 
 	BUG_ON(!PageLocked(page));
-	BUG_ON(from > PAGE_CACHE_SIZE);
-	BUG_ON(to > PAGE_CACHE_SIZE);
+	BUG_ON(from > PAGE_SIZE);
+	BUG_ON(to > PAGE_SIZE);
 	BUG_ON(from > to);
 
 	head = create_page_buffers(page, inode, 0);
 	blocksize = head->b_size;
 	bbits = block_size_bits(blocksize);
 
-	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+	block = (sector_t)page->index << (PAGE_SHIFT - bbits);
 
 	for(bh = head, block_start = 0; bh != head || !block_start;
 	    block++, block_start=block_end, bh = bh->b_this_page) {
@@ -2020,7 +2020,7 @@
 int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
 		unsigned flags, struct page **pagep, get_block_t *get_block)
 {
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 	struct page *page;
 	int status;
 
@@ -2031,7 +2031,7 @@
 	status = __block_write_begin(page, pos, len, get_block);
 	if (unlikely(status)) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		page = NULL;
 	}
 
@@ -2047,7 +2047,7 @@
 	struct inode *inode = mapping->host;
 	unsigned start;
 
-	start = pos & (PAGE_CACHE_SIZE - 1);
+	start = pos & (PAGE_SIZE - 1);
 
 	if (unlikely(copied < len)) {
 		/*
@@ -2099,7 +2099,7 @@
 	}
 
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	if (old_size < pos)
 		pagecache_isize_extended(inode, old_size, pos);
@@ -2136,9 +2136,9 @@
 
 	head = page_buffers(page);
 	blocksize = head->b_size;
-	to = min_t(unsigned, PAGE_CACHE_SIZE - from, count);
+	to = min_t(unsigned, PAGE_SIZE - from, count);
 	to = from + to;
-	if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
+	if (from < blocksize && to > PAGE_SIZE - blocksize)
 		return 0;
 
 	bh = head;
@@ -2181,7 +2181,7 @@
 	blocksize = head->b_size;
 	bbits = block_size_bits(blocksize);
 
-	iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+	iblock = (sector_t)page->index << (PAGE_SHIFT - bbits);
 	lblock = (i_size_read(inode)+blocksize-1) >> bbits;
 	bh = head;
 	nr = 0;
@@ -2295,16 +2295,16 @@
 	unsigned zerofrom, offset, len;
 	int err = 0;
 
-	index = pos >> PAGE_CACHE_SHIFT;
-	offset = pos & ~PAGE_CACHE_MASK;
+	index = pos >> PAGE_SHIFT;
+	offset = pos & ~PAGE_MASK;
 
-	while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
-		zerofrom = curpos & ~PAGE_CACHE_MASK;
+	while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) {
+		zerofrom = curpos & ~PAGE_MASK;
 		if (zerofrom & (blocksize-1)) {
 			*bytes |= (blocksize-1);
 			(*bytes)++;
 		}
-		len = PAGE_CACHE_SIZE - zerofrom;
+		len = PAGE_SIZE - zerofrom;
 
 		err = pagecache_write_begin(file, mapping, curpos, len,
 						AOP_FLAG_UNINTERRUPTIBLE,
@@ -2329,7 +2329,7 @@
 
 	/* page covers the boundary, find the boundary offset */
 	if (index == curidx) {
-		zerofrom = curpos & ~PAGE_CACHE_MASK;
+		zerofrom = curpos & ~PAGE_MASK;
 		/* if we will expand the thing last block will be filled */
 		if (offset <= zerofrom) {
 			goto out;
@@ -2375,7 +2375,7 @@
 	if (err)
 		return err;
 
-	zerofrom = *bytes & ~PAGE_CACHE_MASK;
+	zerofrom = *bytes & ~PAGE_MASK;
 	if (pos+len > *bytes && zerofrom & (blocksize-1)) {
 		*bytes |= (blocksize-1);
 		(*bytes)++;
@@ -2430,10 +2430,10 @@
 	}
 
 	/* page is wholly or partially inside EOF */
-	if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
-		end = size & ~PAGE_CACHE_MASK;
+	if (((page->index + 1) << PAGE_SHIFT) > size)
+		end = size & ~PAGE_MASK;
 	else
-		end = PAGE_CACHE_SIZE;
+		end = PAGE_SIZE;
 
 	ret = __block_write_begin(page, 0, end, get_block);
 	if (!ret)
@@ -2508,8 +2508,8 @@
 	int ret = 0;
 	int is_mapped_to_disk = 1;
 
-	index = pos >> PAGE_CACHE_SHIFT;
-	from = pos & (PAGE_CACHE_SIZE - 1);
+	index = pos >> PAGE_SHIFT;
+	from = pos & (PAGE_SIZE - 1);
 	to = from + len;
 
 	page = grab_cache_page_write_begin(mapping, index, flags);
@@ -2543,7 +2543,7 @@
 		goto out_release;
 	}
 
-	block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
+	block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
 
 	/*
 	 * We loop across all blocks in the page, whether or not they are
@@ -2551,7 +2551,7 @@
 	 * page is fully mapped-to-disk.
 	 */
 	for (block_start = 0, block_in_page = 0, bh = head;
-		  block_start < PAGE_CACHE_SIZE;
+		  block_start < PAGE_SIZE;
 		  block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
 		int create;
 
@@ -2623,7 +2623,7 @@
 
 out_release:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	*pagep = NULL;
 
 	return ret;
@@ -2653,7 +2653,7 @@
 	}
 
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	while (head) {
 		bh = head;
@@ -2675,7 +2675,7 @@
 {
 	struct inode * const inode = page->mapping->host;
 	loff_t i_size = i_size_read(inode);
-	const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+	const pgoff_t end_index = i_size >> PAGE_SHIFT;
 	unsigned offset;
 	int ret;
 
@@ -2684,7 +2684,7 @@
 		goto out;
 
 	/* Is the page fully outside i_size? (truncate in progress) */
-	offset = i_size & (PAGE_CACHE_SIZE-1);
+	offset = i_size & (PAGE_SIZE-1);
 	if (page->index >= end_index+1 || !offset) {
 		/*
 		 * The page may have dirty, unmapped buffers.  For example,
@@ -2707,7 +2707,7 @@
 	 * the  page size, the remaining memory is zeroed when mapped, and
 	 * writes to that region are not written out to the file."
 	 */
-	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+	zero_user_segment(page, offset, PAGE_SIZE);
 out:
 	ret = mpage_writepage(page, get_block, wbc);
 	if (ret == -EAGAIN)
@@ -2720,8 +2720,8 @@
 int nobh_truncate_page(struct address_space *mapping,
 			loff_t from, get_block_t *get_block)
 {
-	pgoff_t index = from >> PAGE_CACHE_SHIFT;
-	unsigned offset = from & (PAGE_CACHE_SIZE-1);
+	pgoff_t index = from >> PAGE_SHIFT;
+	unsigned offset = from & (PAGE_SIZE-1);
 	unsigned blocksize;
 	sector_t iblock;
 	unsigned length, pos;
@@ -2738,7 +2738,7 @@
 		return 0;
 
 	length = blocksize - length;
-	iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
 
 	page = grab_cache_page(mapping, index);
 	err = -ENOMEM;
@@ -2748,7 +2748,7 @@
 	if (page_has_buffers(page)) {
 has_buffers:
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		return block_truncate_page(mapping, from, get_block);
 	}
 
@@ -2772,7 +2772,7 @@
 	if (!PageUptodate(page)) {
 		err = mapping->a_ops->readpage(NULL, page);
 		if (err) {
-			page_cache_release(page);
+			put_page(page);
 			goto out;
 		}
 		lock_page(page);
@@ -2789,7 +2789,7 @@
 
 unlock:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 out:
 	return err;
 }
@@ -2798,8 +2798,8 @@
 int block_truncate_page(struct address_space *mapping,
 			loff_t from, get_block_t *get_block)
 {
-	pgoff_t index = from >> PAGE_CACHE_SHIFT;
-	unsigned offset = from & (PAGE_CACHE_SIZE-1);
+	pgoff_t index = from >> PAGE_SHIFT;
+	unsigned offset = from & (PAGE_SIZE-1);
 	unsigned blocksize;
 	sector_t iblock;
 	unsigned length, pos;
@@ -2816,7 +2816,7 @@
 		return 0;
 
 	length = blocksize - length;
-	iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
 	
 	page = grab_cache_page(mapping, index);
 	err = -ENOMEM;
@@ -2865,7 +2865,7 @@
 
 unlock:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 out:
 	return err;
 }
@@ -2879,7 +2879,7 @@
 {
 	struct inode * const inode = page->mapping->host;
 	loff_t i_size = i_size_read(inode);
-	const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+	const pgoff_t end_index = i_size >> PAGE_SHIFT;
 	unsigned offset;
 
 	/* Is the page fully inside i_size? */
@@ -2888,14 +2888,14 @@
 					       end_buffer_async_write);
 
 	/* Is the page fully outside i_size? (truncate in progress) */
-	offset = i_size & (PAGE_CACHE_SIZE-1);
+	offset = i_size & (PAGE_SIZE-1);
 	if (page->index >= end_index+1 || !offset) {
 		/*
 		 * The page may have dirty, unmapped buffers.  For example,
 		 * they may have been added in ext3_writepage().  Make them
 		 * freeable here, so the page does not leak.
 		 */
-		do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+		do_invalidatepage(page, 0, PAGE_SIZE);
 		unlock_page(page);
 		return 0; /* don't care */
 	}
@@ -2907,7 +2907,7 @@
 	 * the  page size, the remaining memory is zeroed when mapped, and
 	 * writes to that region are not written out to the file."
 	 */
-	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+	zero_user_segment(page, offset, PAGE_SIZE);
 	return __block_write_full_page(inode, page, get_block, wbc,
 							end_buffer_async_write);
 }

diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index c0f3da3..afbdc41 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c

@@ -194,10 +194,10 @@
 			error = -EIO;
 		}
 
-		page_cache_release(monitor->back_page);
+		put_page(monitor->back_page);
 
 		fscache_end_io(op, monitor->netfs_page, error);
-		page_cache_release(monitor->netfs_page);
+		put_page(monitor->netfs_page);
 		fscache_retrieval_complete(op, 1);
 		fscache_put_retrieval(op);
 		kfree(monitor);
@@ -288,8 +288,8 @@
 	_debug("- monitor add");
 
 	/* install the monitor */
-	page_cache_get(monitor->netfs_page);
-	page_cache_get(backpage);
+	get_page(monitor->netfs_page);
+	get_page(backpage);
 	monitor->back_page = backpage;
 	monitor->monitor.private = backpage;
 	add_page_wait_queue(backpage, &monitor->monitor);
@@ -310,7 +310,7 @@
 	_debug("- present");
 
 	if (newpage) {
-		page_cache_release(newpage);
+		put_page(newpage);
 		newpage = NULL;
 	}
 
@@ -342,7 +342,7 @@
 
 out:
 	if (backpage)
-		page_cache_release(backpage);
+		put_page(backpage);
 	if (monitor) {
 		fscache_put_retrieval(monitor->op);
 		kfree(monitor);
@@ -363,7 +363,7 @@
 	goto out;
 
 nomem_page:
-	page_cache_release(newpage);
+	put_page(newpage);
 nomem_monitor:
 	fscache_put_retrieval(monitor->op);
 	kfree(monitor);
@@ -530,7 +530,7 @@
 					    netpage->index, cachefiles_gfp);
 		if (ret < 0) {
 			if (ret == -EEXIST) {
-				page_cache_release(netpage);
+				put_page(netpage);
 				fscache_retrieval_complete(op, 1);
 				continue;
 			}
@@ -538,10 +538,10 @@
 		}
 
 		/* install a monitor */
-		page_cache_get(netpage);
+		get_page(netpage);
 		monitor->netfs_page = netpage;
 
-		page_cache_get(backpage);
+		get_page(backpage);
 		monitor->back_page = backpage;
 		monitor->monitor.private = backpage;
 		add_page_wait_queue(backpage, &monitor->monitor);
@@ -555,10 +555,10 @@
 			unlock_page(backpage);
 		}
 
-		page_cache_release(backpage);
+		put_page(backpage);
 		backpage = NULL;
 
-		page_cache_release(netpage);
+		put_page(netpage);
 		netpage = NULL;
 		continue;
 
@@ -603,7 +603,7 @@
 					    netpage->index, cachefiles_gfp);
 		if (ret < 0) {
 			if (ret == -EEXIST) {
-				page_cache_release(netpage);
+				put_page(netpage);
 				fscache_retrieval_complete(op, 1);
 				continue;
 			}
@@ -612,14 +612,14 @@
 
 		copy_highpage(netpage, backpage);
 
-		page_cache_release(backpage);
+		put_page(backpage);
 		backpage = NULL;
 
 		fscache_mark_page_cached(op, netpage);
 
 		/* the netpage is unlocked and marked up to date here */
 		fscache_end_io(op, netpage, 0);
-		page_cache_release(netpage);
+		put_page(netpage);
 		netpage = NULL;
 		fscache_retrieval_complete(op, 1);
 		continue;
@@ -632,11 +632,11 @@
 out:
 	/* tidy up */
 	if (newpage)
-		page_cache_release(newpage);
+		put_page(newpage);
 	if (netpage)
-		page_cache_release(netpage);
+		put_page(netpage);
 	if (backpage)
-		page_cache_release(backpage);
+		put_page(backpage);
 	if (monitor) {
 		fscache_put_retrieval(op);
 		kfree(monitor);
@@ -644,7 +644,7 @@
 
 	list_for_each_entry_safe(netpage, _n, list, lru) {
 		list_del(&netpage->lru);
-		page_cache_release(netpage);
+		put_page(netpage);
 		fscache_retrieval_complete(op, 1);
 	}
 

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 19adeb0..4801571 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c

@@ -143,7 +143,7 @@
 	inode = page->mapping->host;
 	ci = ceph_inode(inode);
 
-	if (offset != 0 || length != PAGE_CACHE_SIZE) {
+	if (offset != 0 || length != PAGE_SIZE) {
 		dout("%p invalidatepage %p idx %lu partial dirty page %u~%u\n",
 		     inode, page, page->index, offset, length);
 		return;
@@ -175,8 +175,8 @@
 
 static int ceph_releasepage(struct page *page, gfp_t g)
 {
-	struct inode *inode = page->mapping ? page->mapping->host : NULL;
-	dout("%p releasepage %p idx %lu\n", inode, page, page->index);
+	dout("%p releasepage %p idx %lu\n", page->mapping->host,
+	     page, page->index);
 	WARN_ON(PageDirty(page));
 
 	/* Can we release the page from the cache? */
@@ -197,10 +197,10 @@
 		&ceph_inode_to_client(inode)->client->osdc;
 	int err = 0;
 	u64 off = page_offset(page);
-	u64 len = PAGE_CACHE_SIZE;
+	u64 len = PAGE_SIZE;
 
 	if (off >= i_size_read(inode)) {
-		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+		zero_user_segment(page, 0, PAGE_SIZE);
 		SetPageUptodate(page);
 		return 0;
 	}
@@ -212,7 +212,7 @@
 		 */
 		if (off == 0)
 			return -EINVAL;
-		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+		zero_user_segment(page, 0, PAGE_SIZE);
 		SetPageUptodate(page);
 		return 0;
 	}
@@ -234,9 +234,9 @@
 		ceph_fscache_readpage_cancel(inode, page);
 		goto out;
 	}
-	if (err < PAGE_CACHE_SIZE)
+	if (err < PAGE_SIZE)
 		/* zero fill remainder of page */
-		zero_user_segment(page, err, PAGE_CACHE_SIZE);
+		zero_user_segment(page, err, PAGE_SIZE);
 	else
 		flush_dcache_page(page);
 
@@ -276,12 +276,12 @@
 	for (i = 0; i < num_pages; i++) {
 		struct page *page = osd_data->pages[i];
 
-		if (rc < 0 && rc != ENOENT)
+		if (rc < 0 && rc != -ENOENT)
 			goto unlock;
-		if (bytes < (int)PAGE_CACHE_SIZE) {
+		if (bytes < (int)PAGE_SIZE) {
 			/* zero (remainder of) page */
 			int s = bytes < 0 ? 0 : bytes;
-			zero_user_segment(page, s, PAGE_CACHE_SIZE);
+			zero_user_segment(page, s, PAGE_SIZE);
 		}
  		dout("finish_read %p uptodate %p idx %lu\n", inode, page,
 		     page->index);
@@ -290,8 +290,8 @@
 		ceph_readpage_to_fscache(inode, page);
 unlock:
 		unlock_page(page);
-		page_cache_release(page);
-		bytes -= PAGE_CACHE_SIZE;
+		put_page(page);
+		bytes -= PAGE_SIZE;
 	}
 	kfree(osd_data->pages);
 }
@@ -336,7 +336,7 @@
 		if (max && nr_pages == max)
 			break;
 	}
-	len = nr_pages << PAGE_CACHE_SHIFT;
+	len = nr_pages << PAGE_SHIFT;
 	dout("start_read %p nr_pages %d is %lld~%lld\n", inode, nr_pages,
 	     off, len);
 	vino = ceph_vino(inode);
@@ -364,7 +364,7 @@
 		if (add_to_page_cache_lru(page, &inode->i_data, page->index,
 					  GFP_KERNEL)) {
 			ceph_fscache_uncache_page(inode, page);
-			page_cache_release(page);
+			put_page(page);
 			dout("start_read %p add_to_page_cache failed %p\n",
 			     inode, page);
 			nr_pages = i;
@@ -415,8 +415,8 @@
 	if (rc == 0)
 		goto out;
 
-	if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
-		max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
+	if (fsc->mount_options->rsize >= PAGE_SIZE)
+		max = (fsc->mount_options->rsize + PAGE_SIZE - 1)
 			>> PAGE_SHIFT;
 
 	dout("readpages %p file %p nr_pages %d max %d\n", inode,
@@ -484,7 +484,7 @@
 	long writeback_stat;
 	u64 truncate_size;
 	u32 truncate_seq;
-	int err = 0, len = PAGE_CACHE_SIZE;
+	int err = 0, len = PAGE_SIZE;
 
 	dout("writepage %p idx %lu\n", page, page->index);
 
@@ -606,71 +606,71 @@
 	struct inode *inode = req->r_inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_osd_data *osd_data;
-	unsigned wrote;
 	struct page *page;
-	int num_pages;
-	int i;
+	int num_pages, total_pages = 0;
+	int i, j;
+	int rc = req->r_result;
 	struct ceph_snap_context *snapc = req->r_snapc;
 	struct address_space *mapping = inode->i_mapping;
-	int rc = req->r_result;
-	u64 bytes = req->r_ops[0].extent.length;
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
-	long writeback_stat;
-	unsigned issued = ceph_caps_issued(ci);
+	bool remove_page;
 
-	osd_data = osd_req_op_extent_osd_data(req, 0);
-	BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
-	num_pages = calc_pages_for((u64)osd_data->alignment,
-					(u64)osd_data->length);
-	if (rc >= 0) {
-		/*
-		 * Assume we wrote the pages we originally sent.  The
-		 * osd might reply with fewer pages if our writeback
-		 * raced with a truncation and was adjusted at the osd,
-		 * so don't believe the reply.
-		 */
-		wrote = num_pages;
-	} else {
-		wrote = 0;
+
+	dout("writepages_finish %p rc %d\n", inode, rc);
+	if (rc < 0)
 		mapping_set_error(mapping, rc);
-	}
-	dout("writepages_finish %p rc %d bytes %llu wrote %d (pages)\n",
-	     inode, rc, bytes, wrote);
+
+	/*
+	 * We lost the cache cap, need to truncate the page before
+	 * it is unlocked, otherwise we'd truncate it later in the
+	 * page truncation thread, possibly losing some data that
+	 * raced its way in
+	 */
+	remove_page = !(ceph_caps_issued(ci) &
+			(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO));
 
 	/* clean all pages */
-	for (i = 0; i < num_pages; i++) {
-		page = osd_data->pages[i];
-		BUG_ON(!page);
-		WARN_ON(!PageUptodate(page));
+	for (i = 0; i < req->r_num_ops; i++) {
+		if (req->r_ops[i].op != CEPH_OSD_OP_WRITE)
+			break;
 
-		writeback_stat =
-			atomic_long_dec_return(&fsc->writeback_count);
-		if (writeback_stat <
-		    CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb))
-			clear_bdi_congested(&fsc->backing_dev_info,
-					    BLK_RW_ASYNC);
+		osd_data = osd_req_op_extent_osd_data(req, i);
+		BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
+		num_pages = calc_pages_for((u64)osd_data->alignment,
+					   (u64)osd_data->length);
+		total_pages += num_pages;
+		for (j = 0; j < num_pages; j++) {
+			page = osd_data->pages[j];
+			BUG_ON(!page);
+			WARN_ON(!PageUptodate(page));
 
-		ceph_put_snap_context(page_snap_context(page));
-		page->private = 0;
-		ClearPagePrivate(page);
-		dout("unlocking %d %p\n", i, page);
-		end_page_writeback(page);
+			if (atomic_long_dec_return(&fsc->writeback_count) <
+			     CONGESTION_OFF_THRESH(
+					fsc->mount_options->congestion_kb))
+				clear_bdi_congested(&fsc->backing_dev_info,
+						    BLK_RW_ASYNC);
 
-		/*
-		 * We lost the cache cap, need to truncate the page before
-		 * it is unlocked, otherwise we'd truncate it later in the
-		 * page truncation thread, possibly losing some data that
-		 * raced its way in
-		 */
-		if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0)
-			generic_error_remove_page(inode->i_mapping, page);
+			ceph_put_snap_context(page_snap_context(page));
+			page->private = 0;
+			ClearPagePrivate(page);
+			dout("unlocking %p\n", page);
+			end_page_writeback(page);
 
-		unlock_page(page);
+			if (remove_page)
+				generic_error_remove_page(inode->i_mapping,
+							  page);
+
+			unlock_page(page);
+		}
+		dout("writepages_finish %p wrote %llu bytes cleaned %d pages\n",
+		     inode, osd_data->length, rc >= 0 ? num_pages : 0);
+
+		ceph_release_pages(osd_data->pages, num_pages);
 	}
-	dout("%p wrote+cleaned %d pages\n", inode, wrote);
-	ceph_put_wrbuffer_cap_refs(ci, num_pages, snapc);
 
-	ceph_release_pages(osd_data->pages, num_pages);
+	ceph_put_wrbuffer_cap_refs(ci, total_pages, snapc);
+
+	osd_data = osd_req_op_extent_osd_data(req, 0);
 	if (osd_data->pages_from_pool)
 		mempool_free(osd_data->pages,
 			     ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool);
@@ -725,9 +725,9 @@
 	}
 	if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize)
 		wsize = fsc->mount_options->wsize;
-	if (wsize < PAGE_CACHE_SIZE)
-		wsize = PAGE_CACHE_SIZE;
-	max_pages_ever = wsize >> PAGE_CACHE_SHIFT;
+	if (wsize < PAGE_SIZE)
+		wsize = PAGE_SIZE;
+	max_pages_ever = wsize >> PAGE_SHIFT;
 
 	pagevec_init(&pvec, 0);
 
@@ -737,8 +737,8 @@
 		end = -1;
 		dout(" cyclic, start at %lu\n", start);
 	} else {
-		start = wbc->range_start >> PAGE_CACHE_SHIFT;
-		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		start = wbc->range_start >> PAGE_SHIFT;
+		end = wbc->range_end >> PAGE_SHIFT;
 		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
 			range_whole = 1;
 		should_loop = 0;
@@ -778,17 +778,15 @@
 	while (!done && index <= end) {
 		unsigned i;
 		int first;
-		pgoff_t next;
-		int pvec_pages, locked_pages;
-		struct page **pages = NULL;
+		pgoff_t strip_unit_end = 0;
+		int num_ops = 0, op_idx;
+		int pvec_pages, locked_pages = 0;
+		struct page **pages = NULL, **data_pages;
 		mempool_t *pool = NULL;	/* Becomes non-null if mempool used */
 		struct page *page;
 		int want;
-		u64 offset, len;
-		long writeback_stat;
+		u64 offset = 0, len = 0;
 
-		next = 0;
-		locked_pages = 0;
 		max_pages = max_pages_ever;
 
 get_more_pages:
@@ -824,8 +822,8 @@
 				unlock_page(page);
 				break;
 			}
-			if (next && (page->index != next)) {
-				dout("not consecutive %p\n", page);
+			if (strip_unit_end && (page->index > strip_unit_end)) {
+				dout("end of strip unit %p\n", page);
 				unlock_page(page);
 				break;
 			}
@@ -867,36 +865,31 @@
 			/*
 			 * We have something to write.  If this is
 			 * the first locked page this time through,
-			 * allocate an osd request and a page array
-			 * that it will use.
+			 * calculate max possinle write size and
+			 * allocate a page array
 			 */
 			if (locked_pages == 0) {
-				BUG_ON(pages);
+				u64 objnum;
+				u64 objoff;
+
 				/* prepare async write request */
 				offset = (u64)page_offset(page);
 				len = wsize;
-				req = ceph_osdc_new_request(&fsc->client->osdc,
-							&ci->i_layout, vino,
-							offset, &len, 0,
-							do_sync ? 2 : 1,
-							CEPH_OSD_OP_WRITE,
-							CEPH_OSD_FLAG_WRITE |
-							CEPH_OSD_FLAG_ONDISK,
-							snapc, truncate_seq,
-							truncate_size, true);
-				if (IS_ERR(req)) {
-					rc = PTR_ERR(req);
+
+				rc = ceph_calc_file_object_mapping(&ci->i_layout,
+								offset, len,
+								&objnum, &objoff,
+								&len);
+				if (rc < 0) {
 					unlock_page(page);
 					break;
 				}
 
-				if (do_sync)
-					osd_req_op_init(req, 1,
-							CEPH_OSD_OP_STARTSYNC, 0);
+				num_ops = 1 + do_sync;
+				strip_unit_end = page->index +
+					((len - 1) >> PAGE_SHIFT);
 
-				req->r_callback = writepages_finish;
-				req->r_inode = inode;
-
+				BUG_ON(pages);
 				max_pages = calc_pages_for(0, (u64)len);
 				pages = kmalloc(max_pages * sizeof (*pages),
 						GFP_NOFS);
@@ -905,6 +898,20 @@
 					pages = mempool_alloc(pool, GFP_NOFS);
 					BUG_ON(!pages);
 				}
+
+				len = 0;
+			} else if (page->index !=
+				   (offset + len) >> PAGE_SHIFT) {
+				if (num_ops >= (pool ?  CEPH_OSD_SLAB_OPS :
+							CEPH_OSD_MAX_OPS)) {
+					redirty_page_for_writepage(wbc, page);
+					unlock_page(page);
+					break;
+				}
+
+				num_ops++;
+				offset = (u64)page_offset(page);
+				len = 0;
 			}
 
 			/* note position of first page in pvec */
@@ -913,18 +920,16 @@
 			dout("%p will write page %p idx %lu\n",
 			     inode, page, page->index);
 
-			writeback_stat =
-			       atomic_long_inc_return(&fsc->writeback_count);
-			if (writeback_stat > CONGESTION_ON_THRESH(
+			if (atomic_long_inc_return(&fsc->writeback_count) >
+			    CONGESTION_ON_THRESH(
 				    fsc->mount_options->congestion_kb)) {
 				set_bdi_congested(&fsc->backing_dev_info,
 						  BLK_RW_ASYNC);
 			}
 
-			set_page_writeback(page);
 			pages[locked_pages] = page;
 			locked_pages++;
-			next = page->index + 1;
+			len += PAGE_SIZE;
 		}
 
 		/* did we get anything? */
@@ -944,38 +949,119 @@
 			/* shift unused pages over in the pvec...  we
 			 * will need to release them below. */
 			for (j = i; j < pvec_pages; j++) {
-				dout(" pvec leftover page %p\n",
-				     pvec.pages[j]);
+				dout(" pvec leftover page %p\n", pvec.pages[j]);
 				pvec.pages[j-i+first] = pvec.pages[j];
 			}
 			pvec.nr -= i-first;
 		}
 
-		/* Format the osd request message and submit the write */
+new_request:
 		offset = page_offset(pages[0]);
-		len = (u64)locked_pages << PAGE_CACHE_SHIFT;
-		if (snap_size == -1) {
-			len = min(len, (u64)i_size_read(inode) - offset);
-			 /* writepages_finish() clears writeback pages
-			  * according to the data length, so make sure
-			  * data length covers all locked pages */
-			len = max(len, 1 +
-				((u64)(locked_pages - 1) << PAGE_CACHE_SHIFT));
-		} else {
-			len = min(len, snap_size - offset);
+		len = wsize;
+
+		req = ceph_osdc_new_request(&fsc->client->osdc,
+					&ci->i_layout, vino,
+					offset, &len, 0, num_ops,
+					CEPH_OSD_OP_WRITE,
+					CEPH_OSD_FLAG_WRITE |
+					CEPH_OSD_FLAG_ONDISK,
+					snapc, truncate_seq,
+					truncate_size, false);
+		if (IS_ERR(req)) {
+			req = ceph_osdc_new_request(&fsc->client->osdc,
+						&ci->i_layout, vino,
+						offset, &len, 0,
+						min(num_ops,
+						    CEPH_OSD_SLAB_OPS),
+						CEPH_OSD_OP_WRITE,
+						CEPH_OSD_FLAG_WRITE |
+						CEPH_OSD_FLAG_ONDISK,
+						snapc, truncate_seq,
+						truncate_size, true);
+			BUG_ON(IS_ERR(req));
 		}
-		dout("writepages got %d pages at %llu~%llu\n",
-		     locked_pages, offset, len);
+		BUG_ON(len < page_offset(pages[locked_pages - 1]) +
+			     PAGE_SIZE - offset);
 
-		osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
+		req->r_callback = writepages_finish;
+		req->r_inode = inode;
+
+		/* Format the osd request message and submit the write */
+		len = 0;
+		data_pages = pages;
+		op_idx = 0;
+		for (i = 0; i < locked_pages; i++) {
+			u64 cur_offset = page_offset(pages[i]);
+			if (offset + len != cur_offset) {
+				if (op_idx + do_sync + 1 == req->r_num_ops)
+					break;
+				osd_req_op_extent_dup_last(req, op_idx,
+							   cur_offset - offset);
+				dout("writepages got pages at %llu~%llu\n",
+				     offset, len);
+				osd_req_op_extent_osd_data_pages(req, op_idx,
+							data_pages, len, 0,
 							!!pool, false);
+				osd_req_op_extent_update(req, op_idx, len);
 
-		pages = NULL;	/* request message now owns the pages array */
+				len = 0;
+				offset = cur_offset; 
+				data_pages = pages + i;
+				op_idx++;
+			}
+
+			set_page_writeback(pages[i]);
+			len += PAGE_SIZE;
+		}
+
+		if (snap_size != -1) {
+			len = min(len, snap_size - offset);
+		} else if (i == locked_pages) {
+			/* writepages_finish() clears writeback pages
+			 * according to the data length, so make sure
+			 * data length covers all locked pages */
+			u64 min_len = len + 1 - PAGE_SIZE;
+			len = min(len, (u64)i_size_read(inode) - offset);
+			len = max(len, min_len);
+		}
+		dout("writepages got pages at %llu~%llu\n", offset, len);
+
+		osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len,
+						 0, !!pool, false);
+		osd_req_op_extent_update(req, op_idx, len);
+
+		if (do_sync) {
+			op_idx++;
+			osd_req_op_init(req, op_idx, CEPH_OSD_OP_STARTSYNC, 0);
+		}
+		BUG_ON(op_idx + 1 != req->r_num_ops);
+
 		pool = NULL;
+		if (i < locked_pages) {
+			BUG_ON(num_ops <= req->r_num_ops);
+			num_ops -= req->r_num_ops;
+			num_ops += do_sync;
+			locked_pages -= i;
 
-		/* Update the write op length in case we changed it */
-
-		osd_req_op_extent_update(req, 0, len);
+			/* allocate new pages array for next request */
+			data_pages = pages;
+			pages = kmalloc(locked_pages * sizeof (*pages),
+					GFP_NOFS);
+			if (!pages) {
+				pool = fsc->wb_pagevec_pool;
+				pages = mempool_alloc(pool, GFP_NOFS);
+				BUG_ON(!pages);
+			}
+			memcpy(pages, data_pages + i,
+			       locked_pages * sizeof(*pages));
+			memset(data_pages + i, 0,
+			       locked_pages * sizeof(*pages));
+		} else {
+			BUG_ON(num_ops != req->r_num_ops);
+			index = pages[i - 1]->index + 1;
+			/* request message now owns the pages array */
+			pages = NULL;
+		}
 
 		vino = ceph_vino(inode);
 		ceph_osdc_build_request(req, offset, snapc, vino.snap,
@@ -985,9 +1071,10 @@
 		BUG_ON(rc);
 		req = NULL;
 
-		/* continue? */
-		index = next;
-		wbc->nr_to_write -= locked_pages;
+		wbc->nr_to_write -= i;
+		if (pages)
+			goto new_request;
+
 		if (wbc->nr_to_write <= 0)
 			done = 1;
 
@@ -1048,8 +1135,8 @@
 {
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	loff_t page_off = pos & PAGE_CACHE_MASK;
-	int pos_in_page = pos & ~PAGE_CACHE_MASK;
+	loff_t page_off = pos & PAGE_MASK;
+	int pos_in_page = pos & ~PAGE_MASK;
 	int end_in_page = pos_in_page + len;
 	loff_t i_size;
 	int r;
@@ -1104,7 +1191,7 @@
 	}
 
 	/* full page? */
-	if (pos_in_page == 0 && len == PAGE_CACHE_SIZE)
+	if (pos_in_page == 0 && len == PAGE_SIZE)
 		return 0;
 
 	/* past end of file? */
@@ -1112,12 +1199,12 @@
 
 	if (page_off >= i_size ||
 	    (pos_in_page == 0 && (pos+len) >= i_size &&
-	     end_in_page - pos_in_page != PAGE_CACHE_SIZE)) {
+	     end_in_page - pos_in_page != PAGE_SIZE)) {
 		dout(" zeroing %p 0 - %d and %d - %d\n",
-		     page, pos_in_page, end_in_page, (int)PAGE_CACHE_SIZE);
+		     page, pos_in_page, end_in_page, (int)PAGE_SIZE);
 		zero_user_segments(page,
 				   0, pos_in_page,
-				   end_in_page, PAGE_CACHE_SIZE);
+				   end_in_page, PAGE_SIZE);
 		return 0;
 	}
 
@@ -1141,7 +1228,7 @@
 {
 	struct inode *inode = file_inode(file);
 	struct page *page;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 	int r;
 
 	do {
@@ -1155,7 +1242,7 @@
 
 		r = ceph_update_writeable_page(file, pos, len, page);
 		if (r < 0)
-			page_cache_release(page);
+			put_page(page);
 		else
 			*pagep = page;
 	} while (r == -EAGAIN);
@@ -1172,7 +1259,7 @@
 			  struct page *page, void *fsdata)
 {
 	struct inode *inode = file_inode(file);
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned from = pos & (PAGE_SIZE - 1);
 	int check_cap = 0;
 
 	dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
@@ -1192,7 +1279,7 @@
 	set_page_dirty(page);
 
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	if (check_cap)
 		ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL);
@@ -1235,11 +1322,11 @@
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_file_info *fi = vma->vm_file->private_data;
 	struct page *pinned_page = NULL;
-	loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT;
+	loff_t off = vmf->pgoff << PAGE_SHIFT;
 	int want, got, ret;
 
 	dout("filemap_fault %p %llx.%llx %llu~%zd trying to get caps\n",
-	     inode, ceph_vinop(inode), off, (size_t)PAGE_CACHE_SIZE);
+	     inode, ceph_vinop(inode), off, (size_t)PAGE_SIZE);
 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
 		want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
 	else
@@ -1256,7 +1343,7 @@
 		}
 	}
 	dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
-	     inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got));
+	     inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got));
 
 	if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
 	    ci->i_inline_version == CEPH_INLINE_NONE)
@@ -1265,16 +1352,16 @@
 		ret = -EAGAIN;
 
 	dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
-	     inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret);
+	     inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got), ret);
 	if (pinned_page)
-		page_cache_release(pinned_page);
+		put_page(pinned_page);
 	ceph_put_cap_refs(ci, got);
 
 	if (ret != -EAGAIN)
 		return ret;
 
 	/* read inline data */
-	if (off >= PAGE_CACHE_SIZE) {
+	if (off >= PAGE_SIZE) {
 		/* does not support inline data > PAGE_SIZE */
 		ret = VM_FAULT_SIGBUS;
 	} else {
@@ -1291,12 +1378,12 @@
 					 CEPH_STAT_CAP_INLINE_DATA, true);
 		if (ret1 < 0 || off >= i_size_read(inode)) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			ret = VM_FAULT_SIGBUS;
 			goto out;
 		}
-		if (ret1 < PAGE_CACHE_SIZE)
-			zero_user_segment(page, ret1, PAGE_CACHE_SIZE);
+		if (ret1 < PAGE_SIZE)
+			zero_user_segment(page, ret1, PAGE_SIZE);
 		else
 			flush_dcache_page(page);
 		SetPageUptodate(page);
@@ -1305,7 +1392,7 @@
 	}
 out:
 	dout("filemap_fault %p %llu~%zd read inline data ret %d\n",
-	     inode, off, (size_t)PAGE_CACHE_SIZE, ret);
+	     inode, off, (size_t)PAGE_SIZE, ret);
 	return ret;
 }
 
@@ -1343,10 +1430,10 @@
 		}
 	}
 
-	if (off + PAGE_CACHE_SIZE <= size)
-		len = PAGE_CACHE_SIZE;
+	if (off + PAGE_SIZE <= size)
+		len = PAGE_SIZE;
 	else
-		len = size & ~PAGE_CACHE_MASK;
+		len = size & ~PAGE_MASK;
 
 	dout("page_mkwrite %p %llx.%llx %llu~%zd getting caps i_size %llu\n",
 	     inode, ceph_vinop(inode), off, len, size);
@@ -1432,7 +1519,7 @@
 			return;
 		if (PageUptodate(page)) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			return;
 		}
 	}
@@ -1447,14 +1534,14 @@
 	}
 
 	if (page != locked_page) {
-		if (len < PAGE_CACHE_SIZE)
-			zero_user_segment(page, len, PAGE_CACHE_SIZE);
+		if (len < PAGE_SIZE)
+			zero_user_segment(page, len, PAGE_SIZE);
 		else
 			flush_dcache_page(page);
 
 		SetPageUptodate(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 }
 
@@ -1491,7 +1578,7 @@
 				from_pagecache = true;
 				lock_page(page);
 			} else {
-				page_cache_release(page);
+				put_page(page);
 				page = NULL;
 			}
 		}
@@ -1499,8 +1586,8 @@
 
 	if (page) {
 		len = i_size_read(inode);
-		if (len > PAGE_CACHE_SIZE)
-			len = PAGE_CACHE_SIZE;
+		if (len > PAGE_SIZE)
+			len = PAGE_SIZE;
 	} else {
 		page = __page_cache_alloc(GFP_NOFS);
 		if (!page) {
@@ -1522,7 +1609,7 @@
 				    ceph_vino(inode), 0, &len, 0, 1,
 				    CEPH_OSD_OP_CREATE,
 				    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
-				    ceph_empty_snapc, 0, 0, false);
+				    NULL, 0, 0, false);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto out;
@@ -1540,9 +1627,8 @@
 				    ceph_vino(inode), 0, &len, 1, 3,
 				    CEPH_OSD_OP_WRITE,
 				    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
-				    ceph_empty_snapc,
-				    ci->i_truncate_seq, ci->i_truncate_size,
-				    false);
+				    NULL, ci->i_truncate_seq,
+				    ci->i_truncate_size, false);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto out;
@@ -1584,7 +1670,7 @@
 	if (page && page != locked_page) {
 		if (from_pagecache) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 		} else
 			__free_pages(page, 0);
 	}
@@ -1663,8 +1749,7 @@
 		goto out;
 	}
 
-	rd_req = ceph_osdc_alloc_request(&fsc->client->osdc,
-					 ceph_empty_snapc,
+	rd_req = ceph_osdc_alloc_request(&fsc->client->osdc, NULL,
 					 1, false, GFP_NOFS);
 	if (!rd_req) {
 		err = -ENOMEM;
@@ -1678,8 +1763,7 @@
 		 "%llx.00000000", ci->i_vino.ino);
 	rd_req->r_base_oid.name_len = strlen(rd_req->r_base_oid.name);
 
-	wr_req = ceph_osdc_alloc_request(&fsc->client->osdc,
-					 ceph_empty_snapc,
+	wr_req = ceph_osdc_alloc_request(&fsc->client->osdc, NULL,
 					 1, false, GFP_NOFS);
 	if (!wr_req) {
 		err = -ENOMEM;

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 6fe0ad2..cfaeef1 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c

@@ -991,7 +991,7 @@
 			u32 seq, u64 flush_tid, u64 oldest_flush_tid,
 			u32 issue_seq, u32 mseq, u64 size, u64 max_size,
 			struct timespec *mtime, struct timespec *atime,
-			u64 time_warp_seq,
+			struct timespec *ctime, u64 time_warp_seq,
 			kuid_t uid, kgid_t gid, umode_t mode,
 			u64 xattr_version,
 			struct ceph_buffer *xattrs_buf,
@@ -1042,6 +1042,8 @@
 		ceph_encode_timespec(&fc->mtime, mtime);
 	if (atime)
 		ceph_encode_timespec(&fc->atime, atime);
+	if (ctime)
+		ceph_encode_timespec(&fc->ctime, ctime);
 	fc->time_warp_seq = cpu_to_le32(time_warp_seq);
 
 	fc->uid = cpu_to_le32(from_kuid(&init_user_ns, uid));
@@ -1116,7 +1118,7 @@
 	int held, revoking, dropping, keep;
 	u64 seq, issue_seq, mseq, time_warp_seq, follows;
 	u64 size, max_size;
-	struct timespec mtime, atime;
+	struct timespec mtime, atime, ctime;
 	int wake = 0;
 	umode_t mode;
 	kuid_t uid;
@@ -1180,6 +1182,7 @@
 	ci->i_requested_max_size = max_size;
 	mtime = inode->i_mtime;
 	atime = inode->i_atime;
+	ctime = inode->i_ctime;
 	time_warp_seq = ci->i_time_warp_seq;
 	uid = inode->i_uid;
 	gid = inode->i_gid;
@@ -1198,7 +1201,7 @@
 	ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
 		op, keep, want, flushing, seq,
 		flush_tid, oldest_flush_tid, issue_seq, mseq,
-		size, max_size, &mtime, &atime, time_warp_seq,
+		size, max_size, &mtime, &atime, &ctime, time_warp_seq,
 		uid, gid, mode, xattr_version, xattr_blob,
 		follows, inline_data);
 	if (ret < 0) {
@@ -1320,7 +1323,7 @@
 			     capsnap->dirty, 0, capsnap->flush_tid, 0,
 			     0, mseq, capsnap->size, 0,
 			     &capsnap->mtime, &capsnap->atime,
-			     capsnap->time_warp_seq,
+			     &capsnap->ctime, capsnap->time_warp_seq,
 			     capsnap->uid, capsnap->gid, capsnap->mode,
 			     capsnap->xattr_version, capsnap->xattr_blob,
 			     capsnap->follows, capsnap->inline_data);
@@ -2507,7 +2510,7 @@
 					*pinned_page = page;
 					break;
 				}
-				page_cache_release(page);
+				put_page(page);
 			}
 			/*
 			 * drop cap refs first because getattr while

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index fd11fb2..4fb2bbc 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c

@@ -38,7 +38,7 @@
 	if (dentry->d_fsdata)
 		return 0;
 
-	di = kmem_cache_alloc(ceph_dentry_cachep, GFP_KERNEL | __GFP_ZERO);
+	di = kmem_cache_zalloc(ceph_dentry_cachep, GFP_KERNEL);
 	if (!di)
 		return -ENOMEM;          /* oh well */
 
@@ -68,23 +68,6 @@
 	return 0;
 }
 
-struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry)
-{
-	struct inode *inode = NULL;
-
-	if (!dentry)
-		return NULL;
-
-	spin_lock(&dentry->d_lock);
-	if (!IS_ROOT(dentry)) {
-		inode = d_inode(dentry->d_parent);
-		ihold(inode);
-	}
-	spin_unlock(&dentry->d_lock);
-	return inode;
-}
-
-
 /*
  * for readdir, we encode the directory frag and offset within that
  * frag into f_pos.
@@ -146,7 +129,7 @@
 	struct inode *dir = d_inode(parent);
 	struct dentry *dentry, *last = NULL;
 	struct ceph_dentry_info *di;
-	unsigned nsize = PAGE_CACHE_SIZE / sizeof(struct dentry *);
+	unsigned nsize = PAGE_SIZE / sizeof(struct dentry *);
 	int err = 0;
 	loff_t ptr_pos = 0;
 	struct ceph_readdir_cache_control cache_ctl = {};
@@ -171,7 +154,7 @@
 		}
 
 		err = -EAGAIN;
-		pgoff = ptr_pos >> PAGE_CACHE_SHIFT;
+		pgoff = ptr_pos >> PAGE_SHIFT;
 		if (!cache_ctl.page || pgoff != page_index(cache_ctl.page)) {
 			ceph_readdir_cache_release(&cache_ctl);
 			cache_ctl.page = find_lock_page(&dir->i_data, pgoff);
@@ -624,6 +607,7 @@
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
 	int op;
+	int mask;
 	int err;
 
 	dout("lookup %p dentry %p '%pd'\n",
@@ -666,8 +650,12 @@
 		return ERR_CAST(req);
 	req->r_dentry = dget(dentry);
 	req->r_num_caps = 2;
-	/* we only need inode linkage */
-	req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
+
+	mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
+	if (ceph_security_xattr_wanted(dir))
+		mask |= CEPH_CAP_XATTR_SHARED;
+	req->r_args.getattr.mask = cpu_to_le32(mask);
+
 	req->r_locked_dir = dir;
 	err = ceph_mdsc_do_request(mdsc, NULL, req);
 	err = ceph_handle_snapdir(req, dentry, err);
@@ -1095,6 +1083,7 @@
 static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	int valid = 0;
+	struct dentry *parent;
 	struct inode *dir;
 
 	if (flags & LOOKUP_RCU)
@@ -1103,7 +1092,8 @@
 	dout("d_revalidate %p '%pd' inode %p offset %lld\n", dentry,
 	     dentry, d_inode(dentry), ceph_dentry(dentry)->offset);
 
-	dir = ceph_get_dentry_parent_inode(dentry);
+	parent = dget_parent(dentry);
+	dir = d_inode(parent);
 
 	/* always trust cached snapped dentries, snapdir dentry */
 	if (ceph_snap(dir) != CEPH_NOSNAP) {
@@ -1121,13 +1111,48 @@
 			valid = 1;
 	}
 
+	if (!valid) {
+		struct ceph_mds_client *mdsc =
+			ceph_sb_to_client(dir->i_sb)->mdsc;
+		struct ceph_mds_request *req;
+		int op, mask, err;
+
+		op = ceph_snap(dir) == CEPH_SNAPDIR ?
+			CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
+		req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
+		if (!IS_ERR(req)) {
+			req->r_dentry = dget(dentry);
+			req->r_num_caps = 2;
+
+			mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
+			if (ceph_security_xattr_wanted(dir))
+				mask |= CEPH_CAP_XATTR_SHARED;
+			req->r_args.getattr.mask = mask;
+
+			req->r_locked_dir = dir;
+			err = ceph_mdsc_do_request(mdsc, NULL, req);
+			if (err == 0 || err == -ENOENT) {
+				if (dentry == req->r_dentry) {
+					valid = !d_unhashed(dentry);
+				} else {
+					d_invalidate(req->r_dentry);
+					err = -EAGAIN;
+				}
+			}
+			ceph_mdsc_put_request(req);
+			dout("d_revalidate %p lookup result=%d\n",
+			     dentry, err);
+		}
+	}
+
 	dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid");
 	if (valid) {
 		ceph_dentry_lru_touch(dentry);
 	} else {
 		ceph_dir_clear_complete(dir);
 	}
-	iput(dir);
+
+	dput(parent);
 	return valid;
 }
 

diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 3b31723..6e72c98 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c

@@ -71,12 +71,18 @@
 	inode = ceph_find_inode(sb, vino);
 	if (!inode) {
 		struct ceph_mds_request *req;
+		int mask;
 
 		req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
 					       USE_ANY_MDS);
 		if (IS_ERR(req))
 			return ERR_CAST(req);
 
+		mask = CEPH_STAT_CAP_INODE;
+		if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
+			mask |= CEPH_CAP_XATTR_SHARED;
+		req->r_args.getattr.mask = cpu_to_le32(mask);
+
 		req->r_ino1 = vino;
 		req->r_num_caps = 1;
 		err = ceph_mdsc_do_request(mdsc, NULL, req);
@@ -128,6 +134,7 @@
 	struct ceph_mds_request *req;
 	struct inode *inode;
 	struct dentry *dentry;
+	int mask;
 	int err;
 
 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT,
@@ -144,6 +151,12 @@
 			.snap = CEPH_NOSNAP,
 		};
 	}
+
+	mask = CEPH_STAT_CAP_INODE;
+	if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
+		mask |= CEPH_CAP_XATTR_SHARED;
+	req->r_args.getattr.mask = cpu_to_le32(mask);
+
 	req->r_num_caps = 1;
 	err = ceph_mdsc_do_request(mdsc, NULL, req);
 	inode = req->r_target_inode;

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index eb9028e..a79f926 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c

@@ -157,7 +157,7 @@
 	case S_IFDIR:
 		dout("init_file %p %p 0%o (regular)\n", inode, file,
 		     inode->i_mode);
-		cf = kmem_cache_alloc(ceph_file_cachep, GFP_KERNEL | __GFP_ZERO);
+		cf = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
 		if (cf == NULL) {
 			ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
 			return -ENOMEM;
@@ -300,6 +300,7 @@
 	struct ceph_mds_request *req;
 	struct dentry *dn;
 	struct ceph_acls_info acls = {};
+       int mask;
 	int err;
 
 	dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n",
@@ -335,6 +336,12 @@
 			acls.pagelist = NULL;
 		}
 	}
+
+       mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
+       if (ceph_security_xattr_wanted(dir))
+               mask |= CEPH_CAP_XATTR_SHARED;
+       req->r_args.open.mask = cpu_to_le32(mask);
+
 	req->r_locked_dir = dir;           /* caller holds dir->i_mutex */
 	err = ceph_mdsc_do_request(mdsc,
 				   (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
@@ -459,7 +466,7 @@
 			ret += zlen;
 		}
 
-		didpages = (page_align + ret) >> PAGE_CACHE_SHIFT;
+		didpages = (page_align + ret) >> PAGE_SHIFT;
 		pos += ret;
 		read = pos - off;
 		left -= ret;
@@ -725,7 +732,6 @@
 	ret = ceph_osdc_start_request(req->r_osdc, req, false);
 out:
 	if (ret < 0) {
-		BUG_ON(ret == -EOLDSNAPC);
 		req->r_result = ret;
 		ceph_aio_complete_req(req, NULL);
 	}
@@ -783,7 +789,7 @@
 	int num_pages = 0;
 	int flags;
 	int ret;
-	struct timespec mtime = CURRENT_TIME;
+	struct timespec mtime = current_fs_time(inode->i_sb);
 	size_t count = iov_iter_count(iter);
 	loff_t pos = iocb->ki_pos;
 	bool write = iov_iter_rw(iter) == WRITE;
@@ -800,8 +806,8 @@
 
 	if (write) {
 		ret = invalidate_inode_pages2_range(inode->i_mapping,
-					pos >> PAGE_CACHE_SHIFT,
-					(pos + count) >> PAGE_CACHE_SHIFT);
+					pos >> PAGE_SHIFT,
+					(pos + count) >> PAGE_SHIFT);
 		if (ret < 0)
 			dout("invalidate_inode_pages2_range returned %d\n", ret);
 
@@ -866,7 +872,7 @@
 			 * may block.
 			 */
 			truncate_inode_pages_range(inode->i_mapping, pos,
-					(pos+len) | (PAGE_CACHE_SIZE - 1));
+					(pos+len) | (PAGE_SIZE - 1));
 
 			osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
 		}
@@ -949,7 +955,6 @@
 				ret = ceph_osdc_start_request(req->r_osdc,
 							      req, false);
 			if (ret < 0) {
-				BUG_ON(ret == -EOLDSNAPC);
 				req->r_result = ret;
 				ceph_aio_complete_req(req, NULL);
 			}
@@ -988,7 +993,7 @@
 	int flags;
 	int check_caps = 0;
 	int ret;
-	struct timespec mtime = CURRENT_TIME;
+	struct timespec mtime = current_fs_time(inode->i_sb);
 	size_t count = iov_iter_count(from);
 
 	if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
@@ -1001,8 +1006,8 @@
 		return ret;
 
 	ret = invalidate_inode_pages2_range(inode->i_mapping,
-					    pos >> PAGE_CACHE_SHIFT,
-					    (pos + count) >> PAGE_CACHE_SHIFT);
+					    pos >> PAGE_SHIFT,
+					    (pos + count) >> PAGE_SHIFT);
 	if (ret < 0)
 		dout("invalidate_inode_pages2_range returned %d\n", ret);
 
@@ -1031,7 +1036,7 @@
 		 * write from beginning of first page,
 		 * regardless of io alignment
 		 */
-		num_pages = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+		num_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
 		pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
 		if (IS_ERR(pages)) {
@@ -1154,7 +1159,7 @@
 	dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
 	     inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
 	if (pinned_page) {
-		page_cache_release(pinned_page);
+		put_page(pinned_page);
 		pinned_page = NULL;
 	}
 	ceph_put_cap_refs(ci, got);
@@ -1183,10 +1188,10 @@
 		if (retry_op == READ_INLINE) {
 			BUG_ON(ret > 0 || read > 0);
 			if (iocb->ki_pos < i_size &&
-			    iocb->ki_pos < PAGE_CACHE_SIZE) {
+			    iocb->ki_pos < PAGE_SIZE) {
 				loff_t end = min_t(loff_t, i_size,
 						   iocb->ki_pos + len);
-				end = min_t(loff_t, end, PAGE_CACHE_SIZE);
+				end = min_t(loff_t, end, PAGE_SIZE);
 				if (statret < end)
 					zero_user_segment(page, statret, end);
 				ret = copy_page_to_iter(page,
@@ -1458,21 +1463,21 @@
 	struct inode *inode, loff_t offset, unsigned size)
 {
 	struct page *page;
-	pgoff_t index = offset >> PAGE_CACHE_SHIFT;
+	pgoff_t index = offset >> PAGE_SHIFT;
 
 	page = find_lock_page(inode->i_mapping, index);
 	if (page) {
 		wait_on_page_writeback(page);
-		zero_user(page, offset & (PAGE_CACHE_SIZE - 1), size);
+		zero_user(page, offset & (PAGE_SIZE - 1), size);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 }
 
 static void ceph_zero_pagecache_range(struct inode *inode, loff_t offset,
 				      loff_t length)
 {
-	loff_t nearly = round_up(offset, PAGE_CACHE_SIZE);
+	loff_t nearly = round_up(offset, PAGE_SIZE);
 	if (offset < nearly) {
 		loff_t size = nearly - offset;
 		if (length < size)
@@ -1481,8 +1486,8 @@
 		offset += size;
 		length -= size;
 	}
-	if (length >= PAGE_CACHE_SIZE) {
-		loff_t size = round_down(length, PAGE_CACHE_SIZE);
+	if (length >= PAGE_SIZE) {
+		loff_t size = round_down(length, PAGE_SIZE);
 		truncate_pagecache_range(inode, offset, offset + size - 1);
 		offset += size;
 		length -= size;

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e48fd8b..edfade0 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c

@@ -549,6 +549,10 @@
 	if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 ||
 	    (truncate_seq == ci->i_truncate_seq && size > inode->i_size)) {
 		dout("size %lld -> %llu\n", inode->i_size, size);
+		if (size > 0 && S_ISDIR(inode->i_mode)) {
+			pr_err("fill_file_size non-zero size for directory\n");
+			size = 0;
+		}
 		i_size_write(inode, size);
 		inode->i_blocks = (size + (1<<9) - 1) >> 9;
 		ci->i_reported_size = size;
@@ -1261,6 +1265,7 @@
 			dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
 			     dn, d_inode(dn), ceph_vinop(d_inode(dn)),
 			     ceph_vinop(in));
+			d_invalidate(dn);
 			have_lease = false;
 		}
 
@@ -1333,7 +1338,7 @@
 {
 	if (ctl->page) {
 		kunmap(ctl->page);
-		page_cache_release(ctl->page);
+		put_page(ctl->page);
 		ctl->page = NULL;
 	}
 }
@@ -1343,21 +1348,26 @@
 			      struct ceph_mds_request *req)
 {
 	struct ceph_inode_info *ci = ceph_inode(dir);
-	unsigned nsize = PAGE_CACHE_SIZE / sizeof(struct dentry*);
+	unsigned nsize = PAGE_SIZE / sizeof(struct dentry*);
 	unsigned idx = ctl->index % nsize;
 	pgoff_t pgoff = ctl->index / nsize;
 
 	if (!ctl->page || pgoff != page_index(ctl->page)) {
 		ceph_readdir_cache_release(ctl);
-		ctl->page  = grab_cache_page(&dir->i_data, pgoff);
+		if (idx == 0)
+			ctl->page = grab_cache_page(&dir->i_data, pgoff);
+		else
+			ctl->page = find_lock_page(&dir->i_data, pgoff);
 		if (!ctl->page) {
 			ctl->index = -1;
-			return -ENOMEM;
+			return idx == 0 ? -ENOMEM : 0;
 		}
 		/* reading/filling the cache are serialized by
 		 * i_mutex, no need to use page lock */
 		unlock_page(ctl->page);
 		ctl->dentries = kmap(ctl->page);
+		if (idx == 0)
+			memset(ctl->dentries, 0, PAGE_SIZE);
 	}
 
 	if (req->r_dir_release_cnt == atomic64_read(&ci->i_release_count) &&
@@ -1380,7 +1390,7 @@
 	struct qstr dname;
 	struct dentry *dn;
 	struct inode *in;
-	int err = 0, ret, i;
+	int err = 0, skipped = 0, ret, i;
 	struct inode *snapdir = NULL;
 	struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
 	struct ceph_dentry_info *di;
@@ -1492,7 +1502,17 @@
 		}
 
 		if (d_really_is_negative(dn)) {
-			struct dentry *realdn = splice_dentry(dn, in);
+			struct dentry *realdn;
+
+			if (ceph_security_xattr_deadlock(in)) {
+				dout(" skip splicing dn %p to inode %p"
+				     " (security xattr deadlock)\n", dn, in);
+				iput(in);
+				skipped++;
+				goto next_item;
+			}
+
+			realdn = splice_dentry(dn, in);
 			if (IS_ERR(realdn)) {
 				err = PTR_ERR(realdn);
 				d_drop(dn);
@@ -1509,7 +1529,7 @@
 				    req->r_session,
 				    req->r_request_started);
 
-		if (err == 0 && cache_ctl.index >= 0) {
+		if (err == 0 && skipped == 0 && cache_ctl.index >= 0) {
 			ret = fill_readdir_cache(d_inode(parent), dn,
 						 &cache_ctl, req);
 			if (ret < 0)
@@ -1520,7 +1540,7 @@
 			dput(dn);
 	}
 out:
-	if (err == 0) {
+	if (err == 0 && skipped == 0) {
 		req->r_did_prepopulate = true;
 		req->r_readdir_cache_idx = cache_ctl.index;
 	}
@@ -1950,7 +1970,7 @@
 	if (dirtied) {
 		inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied,
 							   &prealloc_cf);
-		inode->i_ctime = CURRENT_TIME;
+		inode->i_ctime = current_fs_time(inode->i_sb);
 	}
 
 	release &= issued;

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 911d64d..541ead4 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c

@@ -1610,7 +1610,7 @@
 	while (!list_empty(&tmp_list)) {
 		if (!msg) {
 			msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE,
-					PAGE_CACHE_SIZE, GFP_NOFS, false);
+					PAGE_SIZE, GFP_NOFS, false);
 			if (!msg)
 				goto out_err;
 			head = msg->front.iov_base;
@@ -1729,7 +1729,7 @@
 	init_completion(&req->r_safe_completion);
 	INIT_LIST_HEAD(&req->r_unsafe_item);
 
-	req->r_stamp = CURRENT_TIME;
+	req->r_stamp = current_fs_time(mdsc->fsc->sb);
 
 	req->r_op = op;
 	req->r_direct_mode = mode;
@@ -2540,6 +2540,7 @@
 
 	/* insert trace into our cache */
 	mutex_lock(&req->r_fill_mutex);
+	current->journal_info = req;
 	err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
 	if (err == 0) {
 		if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
@@ -2547,6 +2548,7 @@
 			ceph_readdir_prepopulate(req, req->r_session);
 		ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
 	}
+	current->journal_info = NULL;
 	mutex_unlock(&req->r_fill_mutex);
 
 	up_read(&mdsc->snap_rwsem);
@@ -3764,7 +3766,6 @@
 	dout("handle_map epoch %u len %d\n", epoch, (int)maplen);
 
 	/* do we need it? */
-	ceph_monc_got_mdsmap(&mdsc->fsc->client->monc, epoch);
 	mutex_lock(&mdsc->mutex);
 	if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) {
 		dout("handle_map epoch %u <= our %u\n",
@@ -3791,6 +3792,8 @@
 	mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
 
 	__wake_requests(mdsc, &mdsc->waiting_for_map);
+	ceph_monc_got_map(&mdsc->fsc->client->monc, CEPH_SUB_MDSMAP,
+			  mdsc->mdsmap->m_epoch);
 
 	mutex_unlock(&mdsc->mutex);
 	schedule_delayed(mdsc);

diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 37712cc..ee69a53 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h

@@ -97,7 +97,7 @@
 /*
  * cap releases are batched and sent to the MDS en masse.
  */
-#define CEPH_CAPS_PER_RELEASE ((PAGE_CACHE_SIZE -			\
+#define CEPH_CAPS_PER_RELEASE ((PAGE_SIZE -			\
 				sizeof(struct ceph_mds_cap_release)) /	\
 			       sizeof(struct ceph_mds_cap_item))
 

diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 4aa7122..9caaa7f 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c

@@ -296,8 +296,6 @@
 }
 
 
-struct ceph_snap_context *ceph_empty_snapc;
-
 /*
  * build the snap context for a given realm.
  */
@@ -987,17 +985,3 @@
 		up_write(&mdsc->snap_rwsem);
 	return;
 }
-
-int __init ceph_snap_init(void)
-{
-	ceph_empty_snapc = ceph_create_snap_context(0, GFP_NOFS);
-	if (!ceph_empty_snapc)
-		return -ENOMEM;
-	ceph_empty_snapc->seq = 1;
-	return 0;
-}
-
-void ceph_snap_exit(void)
-{
-	ceph_put_snap_context(ceph_empty_snapc);
-}

diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index ca4d5e8..f12d5e2 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c

@@ -439,8 +439,8 @@
 
 	if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
 		seq_puts(m, ",dirstat");
-	if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0)
-		seq_puts(m, ",norbytes");
+	if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES))
+		seq_puts(m, ",rbytes");
 	if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
 		seq_puts(m, ",noasyncreaddir");
 	if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
@@ -530,7 +530,7 @@
 		goto fail;
 	}
 	fsc->client->extra_mon_dispatch = extra_mon_dispatch;
-	fsc->client->monc.want_mdsmap = 1;
+	ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 0, true);
 
 	fsc->mount_options = fsopt;
 
@@ -560,7 +560,7 @@
 
 	/* set up mempools */
 	err = -ENOMEM;
-	page_count = fsc->mount_options->wsize >> PAGE_CACHE_SHIFT;
+	page_count = fsc->mount_options->wsize >> PAGE_SHIFT;
 	size = sizeof (struct page *) * (page_count ? page_count : 1);
 	fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size);
 	if (!fsc->wb_pagevec_pool)
@@ -793,22 +793,20 @@
 	struct dentry *root;
 	int first = 0;   /* first vfsmount for this super_block */
 
-	dout("mount start\n");
+	dout("mount start %p\n", fsc);
 	mutex_lock(&fsc->client->mount_mutex);
 
-	err = __ceph_open_session(fsc->client, started);
-	if (err < 0)
-		goto out;
+	if (!fsc->sb->s_root) {
+		err = __ceph_open_session(fsc->client, started);
+		if (err < 0)
+			goto out;
 
-	dout("mount opening root\n");
-	root = open_root_dentry(fsc, "", started);
-	if (IS_ERR(root)) {
-		err = PTR_ERR(root);
-		goto out;
-	}
-	if (fsc->sb->s_root) {
-		dput(root);
-	} else {
+		dout("mount opening root\n");
+		root = open_root_dentry(fsc, "", started);
+		if (IS_ERR(root)) {
+			err = PTR_ERR(root);
+			goto out;
+		}
 		fsc->sb->s_root = root;
 		first = 1;
 
@@ -818,6 +816,7 @@
 	}
 
 	if (path[0] == 0) {
+		root = fsc->sb->s_root;
 		dget(root);
 	} else {
 		dout("mount opening base mountpoint\n");
@@ -833,16 +832,14 @@
 	mutex_unlock(&fsc->client->mount_mutex);
 	return root;
 
-out:
-	mutex_unlock(&fsc->client->mount_mutex);
-	return ERR_PTR(err);
-
 fail:
 	if (first) {
 		dput(fsc->sb->s_root);
 		fsc->sb->s_root = NULL;
 	}
-	goto out;
+out:
+	mutex_unlock(&fsc->client->mount_mutex);
+	return ERR_PTR(err);
 }
 
 static int ceph_set_super(struct super_block *s, void *data)
@@ -915,13 +912,13 @@
 	int err;
 
 	/* set ra_pages based on rasize mount option? */
-	if (fsc->mount_options->rasize >= PAGE_CACHE_SIZE)
+	if (fsc->mount_options->rasize >= PAGE_SIZE)
 		fsc->backing_dev_info.ra_pages =
-			(fsc->mount_options->rasize + PAGE_CACHE_SIZE - 1)
+			(fsc->mount_options->rasize + PAGE_SIZE - 1)
 			>> PAGE_SHIFT;
 	else
 		fsc->backing_dev_info.ra_pages =
-			VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE;
+			VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
 
 	err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld",
 			   atomic_long_inc_return(&bdi_seq));
@@ -1042,19 +1039,14 @@
 
 	ceph_flock_init();
 	ceph_xattr_init();
-	ret = ceph_snap_init();
-	if (ret)
-		goto out_xattr;
 	ret = register_filesystem(&ceph_fs_type);
 	if (ret)
-		goto out_snap;
+		goto out_xattr;
 
 	pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
 
 	return 0;
 
-out_snap:
-	ceph_snap_exit();
 out_xattr:
 	ceph_xattr_exit();
 	destroy_caches();
@@ -1066,7 +1058,6 @@
 {
 	dout("exit_ceph\n");
 	unregister_filesystem(&ceph_fs_type);
-	ceph_snap_exit();
 	ceph_xattr_exit();
 	destroy_caches();
 }

diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 9c458eb..e705c4d 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h

@@ -37,8 +37,7 @@
 #define CEPH_MOUNT_OPT_FSCACHE         (1<<10) /* use fscache */
 #define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<11) /* no pool permission check */
 
-#define CEPH_MOUNT_OPT_DEFAULT    (CEPH_MOUNT_OPT_RBYTES | \
-				   CEPH_MOUNT_OPT_DCACHE)
+#define CEPH_MOUNT_OPT_DEFAULT    CEPH_MOUNT_OPT_DCACHE
 
 #define ceph_set_mount_opt(fsc, opt) \
 	(fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
@@ -469,7 +468,7 @@
 #define CEPH_I_POOL_PERM	(1 << 4)  /* pool rd/wr bits are valid */
 #define CEPH_I_POOL_RD		(1 << 5)  /* can read from pool */
 #define CEPH_I_POOL_WR		(1 << 6)  /* can write to pool */
-
+#define CEPH_I_SEC_INITED	(1 << 7)  /* security initialized */
 
 static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
 					   long long release_count,
@@ -721,7 +720,6 @@
 
 
 /* snap.c */
-extern struct ceph_snap_context *ceph_empty_snapc;
 struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc,
 					       u64 ino);
 extern void ceph_get_snap_realm(struct ceph_mds_client *mdsc,
@@ -738,8 +736,6 @@
 extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
 				  struct ceph_cap_snap *capsnap);
 extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
-extern int ceph_snap_init(void);
-extern void ceph_snap_exit(void);
 
 /*
  * a cap_snap is "pending" if it is still awaiting an in-progress
@@ -808,6 +804,20 @@
 extern void ceph_xattr_exit(void);
 extern const struct xattr_handler *ceph_xattr_handlers[];
 
+#ifdef CONFIG_SECURITY
+extern bool ceph_security_xattr_deadlock(struct inode *in);
+extern bool ceph_security_xattr_wanted(struct inode *in);
+#else
+static inline bool ceph_security_xattr_deadlock(struct inode *in)
+{
+	return false;
+}
+static inline bool ceph_security_xattr_wanted(struct inode *in)
+{
+	return false;
+}
+#endif
+
 /* acl.c */
 struct ceph_acls_info {
 	void *default_acl;
@@ -947,7 +957,6 @@
 extern void ceph_dentry_lru_del(struct dentry *dn);
 extern void ceph_invalidate_dentry_lease(struct dentry *dentry);
 extern unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn);
-extern struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry);
 extern void ceph_readdir_cache_release(struct ceph_readdir_cache_control *ctl);
 
 /*

diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 819163d..9410abd 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c

@@ -714,31 +714,62 @@
 	}
 }
 
+static inline int __get_request_mask(struct inode *in) {
+	struct ceph_mds_request *req = current->journal_info;
+	int mask = 0;
+	if (req && req->r_target_inode == in) {
+		if (req->r_op == CEPH_MDS_OP_LOOKUP ||
+		    req->r_op == CEPH_MDS_OP_LOOKUPINO ||
+		    req->r_op == CEPH_MDS_OP_LOOKUPPARENT ||
+		    req->r_op == CEPH_MDS_OP_GETATTR) {
+			mask = le32_to_cpu(req->r_args.getattr.mask);
+		} else if (req->r_op == CEPH_MDS_OP_OPEN ||
+			   req->r_op == CEPH_MDS_OP_CREATE) {
+			mask = le32_to_cpu(req->r_args.open.mask);
+		}
+	}
+	return mask;
+}
+
 ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
 		      size_t size)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	int err;
 	struct ceph_inode_xattr *xattr;
 	struct ceph_vxattr *vxattr = NULL;
+	int req_mask;
+	int err;
 
 	if (!ceph_is_valid_xattr(name))
 		return -ENODATA;
 
 	/* let's see if a virtual xattr was requested */
 	vxattr = ceph_match_vxattr(inode, name);
-	if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
-		err = vxattr->getxattr_cb(ci, value, size);
+	if (vxattr) {
+		err = -ENODATA;
+		if (!(vxattr->exists_cb && !vxattr->exists_cb(ci)))
+			err = vxattr->getxattr_cb(ci, value, size);
 		return err;
 	}
 
+	req_mask = __get_request_mask(inode);
+
 	spin_lock(&ci->i_ceph_lock);
 	dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
 	     ci->i_xattrs.version, ci->i_xattrs.index_version);
 
 	if (ci->i_xattrs.version == 0 ||
-	    !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) {
+	    !((req_mask & CEPH_CAP_XATTR_SHARED) ||
+	      __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) {
 		spin_unlock(&ci->i_ceph_lock);
+
+		/* security module gets xattr while filling trace */
+		if (current->journal_info != NULL) {
+			pr_warn_ratelimited("sync getxattr %p "
+					    "during filling trace\n", inode);
+			return -EBUSY;
+		}
+
 		/* get xattrs from mds (if we don't already have them) */
 		err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
 		if (err)
@@ -765,6 +796,9 @@
 
 	memcpy(value, xattr->val, xattr->val_len);
 
+	if (current->journal_info != NULL &&
+	    !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
+		ci->i_ceph_flags |= CEPH_I_SEC_INITED;
 out:
 	spin_unlock(&ci->i_ceph_lock);
 	return err;
@@ -999,7 +1033,7 @@
 		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
 					       &prealloc_cf);
 		ci->i_xattrs.dirty = true;
-		inode->i_ctime = CURRENT_TIME;
+		inode->i_ctime = current_fs_time(inode->i_sb);
 	}
 
 	spin_unlock(&ci->i_ceph_lock);
@@ -1015,7 +1049,15 @@
 do_sync_unlocked:
 	if (lock_snap_rwsem)
 		up_read(&mdsc->snap_rwsem);
-	err = ceph_sync_setxattr(dentry, name, value, size, flags);
+
+	/* security module set xattr while filling trace */
+	if (current->journal_info != NULL) {
+		pr_warn_ratelimited("sync setxattr %p "
+				    "during filling trace\n", inode);
+		err = -EBUSY;
+	} else {
+		err = ceph_sync_setxattr(dentry, name, value, size, flags);
+	}
 out:
 	ceph_free_cap_flush(prealloc_cf);
 	kfree(newname);
@@ -1136,7 +1178,7 @@
 	dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
 				       &prealloc_cf);
 	ci->i_xattrs.dirty = true;
-	inode->i_ctime = CURRENT_TIME;
+	inode->i_ctime = current_fs_time(inode->i_sb);
 	spin_unlock(&ci->i_ceph_lock);
 	if (lock_snap_rwsem)
 		up_read(&mdsc->snap_rwsem);
@@ -1164,3 +1206,25 @@
 
 	return __ceph_removexattr(dentry, name);
 }
+
+#ifdef CONFIG_SECURITY
+bool ceph_security_xattr_wanted(struct inode *in)
+{
+	return in->i_security != NULL;
+}
+
+bool ceph_security_xattr_deadlock(struct inode *in)
+{
+	struct ceph_inode_info *ci;
+	bool ret;
+	if (in->i_security == NULL)
+		return false;
+	ci = ceph_inode(in);
+	spin_lock(&ci->i_ceph_lock);
+	ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) &&
+	      !(ci->i_xattrs.version > 0 &&
+		__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0));
+	spin_unlock(&ci->i_ceph_lock);
+	return ret;
+}
+#endif

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 1d86fc6..8920156 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c

@@ -962,7 +962,7 @@
 	cifs_dbg(FYI, "about to flush pages\n");
 	/* should we flush first and last page first */
 	truncate_inode_pages_range(&target_inode->i_data, destoff,
-				   PAGE_CACHE_ALIGN(destoff + len)-1);
+				   PAGE_ALIGN(destoff + len)-1);
 
 	if (target_tcon->ses->server->ops->duplicate_extents)
 		rc = target_tcon->ses->server->ops->duplicate_extents(xid,

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index d21da9f..f2cc0b3 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h

@@ -714,7 +714,7 @@
  *
  * Note that this might make for "interesting" allocation problems during
  * writeback however as we have to allocate an array of pointers for the
- * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096.
+ * pages. A 16M write means ~32kb page array with PAGE_SIZE == 4096.
  *
  * For reads, there is a similar problem as we need to allocate an array
  * of kvecs to handle the receive, though that should only need to be done
@@ -733,7 +733,7 @@
 
 /*
  * The default wsize is 1M. find_get_pages seems to return a maximum of 256
- * pages in a single call. With PAGE_CACHE_SIZE == 4k, this means we can fill
+ * pages in a single call. With PAGE_SIZE == 4k, this means we can fill
  * a single wsize request with a single call.
  */
 #define CIFS_DEFAULT_IOSIZE (1024 * 1024)

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 76fcb50..a894bf8 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c

@@ -1929,17 +1929,17 @@
 
 		wsize = server->ops->wp_retry_size(inode);
 		if (wsize < rest_len) {
-			nr_pages = wsize / PAGE_CACHE_SIZE;
+			nr_pages = wsize / PAGE_SIZE;
 			if (!nr_pages) {
 				rc = -ENOTSUPP;
 				break;
 			}
-			cur_len = nr_pages * PAGE_CACHE_SIZE;
-			tailsz = PAGE_CACHE_SIZE;
+			cur_len = nr_pages * PAGE_SIZE;
+			tailsz = PAGE_SIZE;
 		} else {
-			nr_pages = DIV_ROUND_UP(rest_len, PAGE_CACHE_SIZE);
+			nr_pages = DIV_ROUND_UP(rest_len, PAGE_SIZE);
 			cur_len = rest_len;
-			tailsz = rest_len - (nr_pages - 1) * PAGE_CACHE_SIZE;
+			tailsz = rest_len - (nr_pages - 1) * PAGE_SIZE;
 		}
 
 		wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete);
@@ -1957,7 +1957,7 @@
 		wdata2->sync_mode = wdata->sync_mode;
 		wdata2->nr_pages = nr_pages;
 		wdata2->offset = page_offset(wdata2->pages[0]);
-		wdata2->pagesz = PAGE_CACHE_SIZE;
+		wdata2->pagesz = PAGE_SIZE;
 		wdata2->tailsz = tailsz;
 		wdata2->bytes = cur_len;
 
@@ -1975,7 +1975,7 @@
 			if (rc != 0 && rc != -EAGAIN) {
 				SetPageError(wdata2->pages[j]);
 				end_page_writeback(wdata2->pages[j]);
-				page_cache_release(wdata2->pages[j]);
+				put_page(wdata2->pages[j]);
 			}
 		}
 
@@ -2018,7 +2018,7 @@
 		else if (wdata->result < 0)
 			SetPageError(page);
 		end_page_writeback(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	if (wdata->result != -EAGAIN)
 		mapping_set_error(inode->i_mapping, wdata->result);

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a763cd3..6f62ac8 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c

@@ -3630,7 +3630,7 @@
 	cifs_sb->rsize = server->ops->negotiate_rsize(tcon, volume_info);
 
 	/* tune readahead according to rsize */
-	cifs_sb->bdi.ra_pages = cifs_sb->rsize / PAGE_CACHE_SIZE;
+	cifs_sb->bdi.ra_pages = cifs_sb->rsize / PAGE_SIZE;
 
 remote_path_check:
 #ifdef CONFIG_CIFS_DFS_UPCALL

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index ff882ae..c03d074 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c

@@ -1833,7 +1833,7 @@
 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
 {
 	struct address_space *mapping = page->mapping;
-	loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
+	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
 	char *write_data;
 	int rc = -EFAULT;
 	int bytes_written = 0;
@@ -1849,7 +1849,7 @@
 	write_data = kmap(page);
 	write_data += from;
 
-	if ((to > PAGE_CACHE_SIZE) || (from > to)) {
+	if ((to > PAGE_SIZE) || (from > to)) {
 		kunmap(page);
 		return -EIO;
 	}
@@ -1902,7 +1902,7 @@
 	 * find_get_pages_tag seems to return a max of 256 on each
 	 * iteration, so we must call it several times in order to
 	 * fill the array or the wsize is effectively limited to
-	 * 256 * PAGE_CACHE_SIZE.
+	 * 256 * PAGE_SIZE.
 	 */
 	*found_pages = 0;
 	pages = wdata->pages;
@@ -1991,7 +1991,7 @@
 
 	/* put any pages we aren't going to use */
 	for (i = nr_pages; i < found_pages; i++) {
-		page_cache_release(wdata->pages[i]);
+		put_page(wdata->pages[i]);
 		wdata->pages[i] = NULL;
 	}
 
@@ -2009,11 +2009,11 @@
 	wdata->sync_mode = wbc->sync_mode;
 	wdata->nr_pages = nr_pages;
 	wdata->offset = page_offset(wdata->pages[0]);
-	wdata->pagesz = PAGE_CACHE_SIZE;
+	wdata->pagesz = PAGE_SIZE;
 	wdata->tailsz = min(i_size_read(mapping->host) -
 			page_offset(wdata->pages[nr_pages - 1]),
-			(loff_t)PAGE_CACHE_SIZE);
-	wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;
+			(loff_t)PAGE_SIZE);
+	wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
 
 	if (wdata->cfile != NULL)
 		cifsFileInfo_put(wdata->cfile);
@@ -2047,15 +2047,15 @@
 	 * If wsize is smaller than the page cache size, default to writing
 	 * one page at a time via cifs_writepage
 	 */
-	if (cifs_sb->wsize < PAGE_CACHE_SIZE)
+	if (cifs_sb->wsize < PAGE_SIZE)
 		return generic_writepages(mapping, wbc);
 
 	if (wbc->range_cyclic) {
 		index = mapping->writeback_index; /* Start from prev offset */
 		end = -1;
 	} else {
-		index = wbc->range_start >> PAGE_CACHE_SHIFT;
-		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		index = wbc->range_start >> PAGE_SHIFT;
+		end = wbc->range_end >> PAGE_SHIFT;
 		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
 			range_whole = true;
 		scanned = true;
@@ -2071,7 +2071,7 @@
 		if (rc)
 			break;
 
-		tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1;
+		tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
 
 		wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
 						  &found_pages);
@@ -2111,7 +2111,7 @@
 				else
 					SetPageError(wdata->pages[i]);
 				end_page_writeback(wdata->pages[i]);
-				page_cache_release(wdata->pages[i]);
+				put_page(wdata->pages[i]);
 			}
 			if (rc != -EAGAIN)
 				mapping_set_error(mapping, rc);
@@ -2154,7 +2154,7 @@
 
 	xid = get_xid();
 /* BB add check for wbc flags */
-	page_cache_get(page);
+	get_page(page);
 	if (!PageUptodate(page))
 		cifs_dbg(FYI, "ppw - page not up to date\n");
 
@@ -2170,7 +2170,7 @@
 	 */
 	set_page_writeback(page);
 retry_write:
-	rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
+	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
 	if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
 		goto retry_write;
 	else if (rc == -EAGAIN)
@@ -2180,7 +2180,7 @@
 	else
 		SetPageUptodate(page);
 	end_page_writeback(page);
-	page_cache_release(page);
+	put_page(page);
 	free_xid(xid);
 	return rc;
 }
@@ -2214,12 +2214,12 @@
 		if (copied == len)
 			SetPageUptodate(page);
 		ClearPageChecked(page);
-	} else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
+	} else if (!PageUptodate(page) && copied == PAGE_SIZE)
 		SetPageUptodate(page);
 
 	if (!PageUptodate(page)) {
 		char *page_data;
-		unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+		unsigned offset = pos & (PAGE_SIZE - 1);
 		unsigned int xid;
 
 		xid = get_xid();
@@ -2248,7 +2248,7 @@
 	}
 
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return rc;
 }
@@ -3286,9 +3286,9 @@
 		    (rdata->result == -EAGAIN && got_bytes))
 			cifs_readpage_to_fscache(rdata->mapping->host, page);
 
-		got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes);
+		got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
 
-		page_cache_release(page);
+		put_page(page);
 		rdata->pages[i] = NULL;
 	}
 	kref_put(&rdata->refcount, cifs_readdata_release);
@@ -3307,21 +3307,21 @@
 
 	/* determine the eof that the server (probably) has */
 	eof = CIFS_I(rdata->mapping->host)->server_eof;
-	eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
+	eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
 	cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
 
 	rdata->got_bytes = 0;
-	rdata->tailsz = PAGE_CACHE_SIZE;
+	rdata->tailsz = PAGE_SIZE;
 	for (i = 0; i < nr_pages; i++) {
 		struct page *page = rdata->pages[i];
 
-		if (len >= PAGE_CACHE_SIZE) {
+		if (len >= PAGE_SIZE) {
 			/* enough data to fill the page */
 			iov.iov_base = kmap(page);
-			iov.iov_len = PAGE_CACHE_SIZE;
+			iov.iov_len = PAGE_SIZE;
 			cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
 				 i, page->index, iov.iov_base, iov.iov_len);
-			len -= PAGE_CACHE_SIZE;
+			len -= PAGE_SIZE;
 		} else if (len > 0) {
 			/* enough for partial page, fill and zero the rest */
 			iov.iov_base = kmap(page);
@@ -3329,7 +3329,7 @@
 			cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
 				 i, page->index, iov.iov_base, iov.iov_len);
 			memset(iov.iov_base + len,
-				'\0', PAGE_CACHE_SIZE - len);
+				'\0', PAGE_SIZE - len);
 			rdata->tailsz = len;
 			len = 0;
 		} else if (page->index > eof_index) {
@@ -3341,12 +3341,12 @@
 			 * to prevent the VFS from repeatedly attempting to
 			 * fill them until the writes are flushed.
 			 */
-			zero_user(page, 0, PAGE_CACHE_SIZE);
+			zero_user(page, 0, PAGE_SIZE);
 			lru_cache_add_file(page);
 			flush_dcache_page(page);
 			SetPageUptodate(page);
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			rdata->pages[i] = NULL;
 			rdata->nr_pages--;
 			continue;
@@ -3354,7 +3354,7 @@
 			/* no need to hold page hostage */
 			lru_cache_add_file(page);
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			rdata->pages[i] = NULL;
 			rdata->nr_pages--;
 			continue;
@@ -3402,8 +3402,8 @@
 	}
 
 	/* move first page to the tmplist */
-	*offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
-	*bytes = PAGE_CACHE_SIZE;
+	*offset = (loff_t)page->index << PAGE_SHIFT;
+	*bytes = PAGE_SIZE;
 	*nr_pages = 1;
 	list_move_tail(&page->lru, tmplist);
 
@@ -3415,7 +3415,7 @@
 			break;
 
 		/* would this page push the read over the rsize? */
-		if (*bytes + PAGE_CACHE_SIZE > rsize)
+		if (*bytes + PAGE_SIZE > rsize)
 			break;
 
 		__SetPageLocked(page);
@@ -3424,7 +3424,7 @@
 			break;
 		}
 		list_move_tail(&page->lru, tmplist);
-		(*bytes) += PAGE_CACHE_SIZE;
+		(*bytes) += PAGE_SIZE;
 		expected_index++;
 		(*nr_pages)++;
 	}
@@ -3493,7 +3493,7 @@
 		 * reach this point however since we set ra_pages to 0 when the
 		 * rsize is smaller than a cache page.
 		 */
-		if (unlikely(rsize < PAGE_CACHE_SIZE)) {
+		if (unlikely(rsize < PAGE_SIZE)) {
 			add_credits_and_wake_if(server, credits, 0);
 			return 0;
 		}
@@ -3512,7 +3512,7 @@
 				list_del(&page->lru);
 				lru_cache_add_file(page);
 				unlock_page(page);
-				page_cache_release(page);
+				put_page(page);
 			}
 			rc = -ENOMEM;
 			add_credits_and_wake_if(server, credits, 0);
@@ -3524,7 +3524,7 @@
 		rdata->offset = offset;
 		rdata->bytes = bytes;
 		rdata->pid = pid;
-		rdata->pagesz = PAGE_CACHE_SIZE;
+		rdata->pagesz = PAGE_SIZE;
 		rdata->read_into_pages = cifs_readpages_read_into_pages;
 		rdata->credits = credits;
 
@@ -3542,7 +3542,7 @@
 				page = rdata->pages[i];
 				lru_cache_add_file(page);
 				unlock_page(page);
-				page_cache_release(page);
+				put_page(page);
 			}
 			/* Fallback to the readpage in error/reconnect cases */
 			kref_put(&rdata->refcount, cifs_readdata_release);
@@ -3577,7 +3577,7 @@
 	read_data = kmap(page);
 	/* for reads over a certain size could initiate async read ahead */
 
-	rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
+	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
 
 	if (rc < 0)
 		goto io_error;
@@ -3587,8 +3587,8 @@
 	file_inode(file)->i_atime =
 		current_fs_time(file_inode(file)->i_sb);
 
-	if (PAGE_CACHE_SIZE > rc)
-		memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
+	if (PAGE_SIZE > rc)
+		memset(read_data + rc, 0, PAGE_SIZE - rc);
 
 	flush_dcache_page(page);
 	SetPageUptodate(page);
@@ -3608,7 +3608,7 @@
 
 static int cifs_readpage(struct file *file, struct page *page)
 {
-	loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
+	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
 	int rc = -EACCES;
 	unsigned int xid;
 
@@ -3679,8 +3679,8 @@
 			struct page **pagep, void **fsdata)
 {
 	int oncethru = 0;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
+	pgoff_t index = pos >> PAGE_SHIFT;
+	loff_t offset = pos & (PAGE_SIZE - 1);
 	loff_t page_start = pos & PAGE_MASK;
 	loff_t i_size;
 	struct page *page;
@@ -3703,7 +3703,7 @@
 	 * the server. If the write is short, we'll end up doing a sync write
 	 * instead.
 	 */
-	if (len == PAGE_CACHE_SIZE)
+	if (len == PAGE_SIZE)
 		goto out;
 
 	/*
@@ -3718,7 +3718,7 @@
 		    (offset == 0 && (pos + len) >= i_size)) {
 			zero_user_segments(page, 0, offset,
 					   offset + len,
-					   PAGE_CACHE_SIZE);
+					   PAGE_SIZE);
 			/*
 			 * PageChecked means that the parts of the page
 			 * to which we're not writing are considered up
@@ -3737,7 +3737,7 @@
 		 * do a sync write instead since PG_uptodate isn't set.
 		 */
 		cifs_readpage_worker(file, page, &page_start);
-		page_cache_release(page);
+		put_page(page);
 		oncethru = 1;
 		goto start;
 	} else {
@@ -3764,7 +3764,7 @@
 {
 	struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
 
-	if (offset == 0 && length == PAGE_CACHE_SIZE)
+	if (offset == 0 && length == PAGE_SIZE)
 		cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
 }
 
@@ -3772,7 +3772,7 @@
 {
 	int rc = 0;
 	loff_t range_start = page_offset(page);
-	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
+	loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
 	struct writeback_control wbc = {
 		.sync_mode = WB_SYNC_ALL,
 		.nr_to_write = 0,

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index aeb26db..5f9ad5c 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c

@@ -59,7 +59,7 @@
 
 		/* check if server can support readpages */
 		if (cifs_sb_master_tcon(cifs_sb)->ses->server->maxBuf <
-				PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)
+				PAGE_SIZE + MAX_CIFS_HDR_SIZE)
 			inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
 		else
 			inode->i_data.a_ops = &cifs_addr_ops;
@@ -2019,8 +2019,8 @@
 
 static int cifs_truncate_page(struct address_space *mapping, loff_t from)
 {
-	pgoff_t index = from >> PAGE_CACHE_SHIFT;
-	unsigned offset = from & (PAGE_CACHE_SIZE - 1);
+	pgoff_t index = from >> PAGE_SHIFT;
+	unsigned offset = from & (PAGE_SIZE - 1);
 	struct page *page;
 	int rc = 0;
 
@@ -2028,9 +2028,9 @@
 	if (!page)
 		return -ENOMEM;
 
-	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+	zero_user_segment(page, offset, PAGE_SIZE);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return rc;
 }
 

diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index a8f3b58..cfd9132 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c

@@ -71,8 +71,8 @@
 	struct inode *inode;
 	struct dentry *root;
 
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = CONFIGFS_MAGIC;
 	sb->s_op = &configfs_ops;
 	sb->s_time_gran = 1;

diff --git a/fs/coredump.c b/fs/coredump.c
index 9ea87e9..47c32c3 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c

@@ -32,6 +32,9 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/oom.h>
 #include <linux/compat.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/path.h>
 #include <linux/timekeeping.h>
 
 #include <asm/uaccess.h>
@@ -649,6 +652,8 @@
 		}
 	} else {
 		struct inode *inode;
+		int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW |
+				 O_LARGEFILE | O_EXCL;
 
 		if (cprm.limit < binfmt->min_coredump)
 			goto fail_unlock;
@@ -687,10 +692,27 @@
 		 * what matters is that at least one of the two processes
 		 * writes its coredump successfully, not which one.
 		 */
-		cprm.file = filp_open(cn.corename,
-				 O_CREAT | 2 | O_NOFOLLOW |
-				 O_LARGEFILE | O_EXCL,
-				 0600);
+		if (need_suid_safe) {
+			/*
+			 * Using user namespaces, normal user tasks can change
+			 * their current->fs->root to point to arbitrary
+			 * directories. Since the intention of the "only dump
+			 * with a fully qualified path" rule is to control where
+			 * coredumps may be placed using root privileges,
+			 * current->fs->root must not be used. Instead, use the
+			 * root directory of init_task.
+			 */
+			struct path root;
+
+			task_lock(&init_task);
+			get_fs_root(init_task.fs, &root);
+			task_unlock(&init_task);
+			cprm.file = file_open_root(root.dentry, root.mnt,
+				cn.corename, open_flags, 0600);
+			path_put(&root);
+		} else {
+			cprm.file = filp_open(cn.corename, open_flags, 0600);
+		}
 		if (IS_ERR(cprm.file))
 			goto fail_unlock;
 

diff --git a/fs/cramfs/README b/fs/cramfs/README
index 445d1c2..9d4e7ea 100644
--- a/fs/cramfs/README
+++ b/fs/cramfs/README

@@ -86,26 +86,26 @@
 
 (Block size in cramfs refers to the size of input data that is
 compressed at a time.  It's intended to be somewhere around
-PAGE_CACHE_SIZE for cramfs_readpage's convenience.)
+PAGE_SIZE for cramfs_readpage's convenience.)
 
 The superblock ought to indicate the block size that the fs was
 written for, since comments in <linux/pagemap.h> indicate that
-PAGE_CACHE_SIZE may grow in future (if I interpret the comment
+PAGE_SIZE may grow in future (if I interpret the comment
 correctly).
 
-Currently, mkcramfs #define's PAGE_CACHE_SIZE as 4096 and uses that
-for blksize, whereas Linux-2.3.39 uses its PAGE_CACHE_SIZE, which in
+Currently, mkcramfs #define's PAGE_SIZE as 4096 and uses that
+for blksize, whereas Linux-2.3.39 uses its PAGE_SIZE, which in
 turn is defined as PAGE_SIZE (which can be as large as 32KB on arm).
 This discrepancy is a bug, though it's not clear which should be
 changed.
 
-One option is to change mkcramfs to take its PAGE_CACHE_SIZE from
+One option is to change mkcramfs to take its PAGE_SIZE from
 <asm/page.h>.  Personally I don't like this option, but it does
 require the least amount of change: just change `#define
-PAGE_CACHE_SIZE (4096)' to `#include <asm/page.h>'.  The disadvantage
+PAGE_SIZE (4096)' to `#include <asm/page.h>'.  The disadvantage
 is that the generated cramfs cannot always be shared between different
 kernels, not even necessarily kernels of the same architecture if
-PAGE_CACHE_SIZE is subject to change between kernel versions
+PAGE_SIZE is subject to change between kernel versions
 (currently possible with arm and ia64).
 
 The remaining options try to make cramfs more sharable.
@@ -126,22 +126,22 @@
   1. Always 4096 bytes.
 
   2. Writer chooses blocksize; kernel adapts but rejects blocksize >
-     PAGE_CACHE_SIZE.
+     PAGE_SIZE.
 
   3. Writer chooses blocksize; kernel adapts even to blocksize >
-     PAGE_CACHE_SIZE.
+     PAGE_SIZE.
 
 It's easy enough to change the kernel to use a smaller value than
-PAGE_CACHE_SIZE: just make cramfs_readpage read multiple blocks.
+PAGE_SIZE: just make cramfs_readpage read multiple blocks.
 
-The cost of option 1 is that kernels with a larger PAGE_CACHE_SIZE
+The cost of option 1 is that kernels with a larger PAGE_SIZE
 value don't get as good compression as they can.
 
 The cost of option 2 relative to option 1 is that the code uses
 variables instead of #define'd constants.  The gain is that people
-with kernels having larger PAGE_CACHE_SIZE can make use of that if
+with kernels having larger PAGE_SIZE can make use of that if
 they don't mind their cramfs being inaccessible to kernels with
-smaller PAGE_CACHE_SIZE values.
+smaller PAGE_SIZE values.
 
 Option 3 is easy to implement if we don't mind being CPU-inefficient:
 e.g. get readpage to decompress to a buffer of size MAX_BLKSIZE (which

diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index b862bc2..3a32ddf 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c

@@ -137,7 +137,7 @@
  * page cache and dentry tree anyway..
  *
  * This also acts as a way to guarantee contiguous areas of up to
- * BLKS_PER_BUF*PAGE_CACHE_SIZE, so that the caller doesn't need to
+ * BLKS_PER_BUF*PAGE_SIZE, so that the caller doesn't need to
  * worry about end-of-buffer issues even when decompressing a full
  * page cache.
  */
@@ -152,7 +152,7 @@
  */
 #define BLKS_PER_BUF_SHIFT	(2)
 #define BLKS_PER_BUF		(1 << BLKS_PER_BUF_SHIFT)
-#define BUFFER_SIZE		(BLKS_PER_BUF*PAGE_CACHE_SIZE)
+#define BUFFER_SIZE		(BLKS_PER_BUF*PAGE_SIZE)
 
 static unsigned char read_buffers[READ_BUFFERS][BUFFER_SIZE];
 static unsigned buffer_blocknr[READ_BUFFERS];
@@ -173,8 +173,8 @@
 
 	if (!len)
 		return NULL;
-	blocknr = offset >> PAGE_CACHE_SHIFT;
-	offset &= PAGE_CACHE_SIZE - 1;
+	blocknr = offset >> PAGE_SHIFT;
+	offset &= PAGE_SIZE - 1;
 
 	/* Check if an existing buffer already has the data.. */
 	for (i = 0; i < READ_BUFFERS; i++) {
@@ -184,14 +184,14 @@
 			continue;
 		if (blocknr < buffer_blocknr[i])
 			continue;
-		blk_offset = (blocknr - buffer_blocknr[i]) << PAGE_CACHE_SHIFT;
+		blk_offset = (blocknr - buffer_blocknr[i]) << PAGE_SHIFT;
 		blk_offset += offset;
 		if (blk_offset + len > BUFFER_SIZE)
 			continue;
 		return read_buffers[i] + blk_offset;
 	}
 
-	devsize = mapping->host->i_size >> PAGE_CACHE_SHIFT;
+	devsize = mapping->host->i_size >> PAGE_SHIFT;
 
 	/* Ok, read in BLKS_PER_BUF pages completely first. */
 	for (i = 0; i < BLKS_PER_BUF; i++) {
@@ -213,7 +213,7 @@
 			wait_on_page_locked(page);
 			if (!PageUptodate(page)) {
 				/* asynchronous error */
-				page_cache_release(page);
+				put_page(page);
 				pages[i] = NULL;
 			}
 		}
@@ -229,12 +229,12 @@
 		struct page *page = pages[i];
 
 		if (page) {
-			memcpy(data, kmap(page), PAGE_CACHE_SIZE);
+			memcpy(data, kmap(page), PAGE_SIZE);
 			kunmap(page);
-			page_cache_release(page);
+			put_page(page);
 		} else
-			memset(data, 0, PAGE_CACHE_SIZE);
-		data += PAGE_CACHE_SIZE;
+			memset(data, 0, PAGE_SIZE);
+		data += PAGE_SIZE;
 	}
 	return read_buffers[buffer] + offset;
 }
@@ -353,7 +353,7 @@
 	u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
 
 	buf->f_type = CRAMFS_MAGIC;
-	buf->f_bsize = PAGE_CACHE_SIZE;
+	buf->f_bsize = PAGE_SIZE;
 	buf->f_blocks = CRAMFS_SB(sb)->blocks;
 	buf->f_bfree = 0;
 	buf->f_bavail = 0;
@@ -496,7 +496,7 @@
 	int bytes_filled;
 	void *pgdata;
 
-	maxblock = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	maxblock = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	bytes_filled = 0;
 	pgdata = kmap(page);
 
@@ -516,14 +516,14 @@
 
 		if (compr_len == 0)
 			; /* hole */
-		else if (unlikely(compr_len > (PAGE_CACHE_SIZE << 1))) {
+		else if (unlikely(compr_len > (PAGE_SIZE << 1))) {
 			pr_err("bad compressed blocksize %u\n",
 				compr_len);
 			goto err;
 		} else {
 			mutex_lock(&read_mutex);
 			bytes_filled = cramfs_uncompress_block(pgdata,
-				 PAGE_CACHE_SIZE,
+				 PAGE_SIZE,
 				 cramfs_read(sb, start_offset, compr_len),
 				 compr_len);
 			mutex_unlock(&read_mutex);
@@ -532,7 +532,7 @@
 		}
 	}
 
-	memset(pgdata + bytes_filled, 0, PAGE_CACHE_SIZE - bytes_filled);
+	memset(pgdata + bytes_filled, 0, PAGE_SIZE - bytes_filled);
 	flush_dcache_page(page);
 	kunmap(page);
 	SetPageUptodate(page);

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index aed9ccc..7f58045 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c

@@ -170,15 +170,15 @@
 		fscrypt_complete, &ecr);
 
 	BUILD_BUG_ON(FS_XTS_TWEAK_SIZE < sizeof(index));
-	memcpy(xts_tweak, &inode->i_ino, sizeof(index));
+	memcpy(xts_tweak, &index, sizeof(index));
 	memset(&xts_tweak[sizeof(index)], 0,
 			FS_XTS_TWEAK_SIZE - sizeof(index));
 
 	sg_init_table(&dst, 1);
-	sg_set_page(&dst, dest_page, PAGE_CACHE_SIZE, 0);
+	sg_set_page(&dst, dest_page, PAGE_SIZE, 0);
 	sg_init_table(&src, 1);
-	sg_set_page(&src, src_page, PAGE_CACHE_SIZE, 0);
-	skcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE,
+	sg_set_page(&src, src_page, PAGE_SIZE, 0);
+	skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE,
 					xts_tweak);
 	if (rw == FS_DECRYPT)
 		res = crypto_skcipher_decrypt(req);
@@ -287,7 +287,7 @@
 	struct bio *bio;
 	int ret, err = 0;
 
-	BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE);
+	BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE);
 
 	ctx = fscrypt_get_ctx(inode);
 	if (IS_ERR(ctx))

diff --git a/fs/dax.c b/fs/dax.c
index 90322eb..75ba46d 100644
--- a/fs/dax.c
+++ b/fs/dax.c

@@ -323,7 +323,7 @@
 	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	if (vmf->pgoff >= size) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -351,7 +351,7 @@
 }
 
 #define NO_SECTOR -1
-#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_CACHE_SHIFT))
+#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_SHIFT))
 
 static int dax_radix_entry(struct address_space *mapping, pgoff_t index,
 		sector_t sector, bool pmd_entry, bool dirty)
@@ -506,8 +506,8 @@
 	if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
 		return 0;
 
-	start_index = wbc->range_start >> PAGE_CACHE_SHIFT;
-	end_index = wbc->range_end >> PAGE_CACHE_SHIFT;
+	start_index = wbc->range_start >> PAGE_SHIFT;
+	end_index = wbc->range_end >> PAGE_SHIFT;
 	pmd_index = DAX_PMD_INDEX(start_index);
 
 	rcu_read_lock();
@@ -642,12 +642,12 @@
 	page = find_get_page(mapping, vmf->pgoff);
 	if (page) {
 		if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
-			page_cache_release(page);
+			put_page(page);
 			return VM_FAULT_RETRY;
 		}
 		if (unlikely(page->mapping != mapping)) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			goto repeat;
 		}
 		size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -711,10 +711,10 @@
 
 	if (page) {
 		unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT,
-							PAGE_CACHE_SIZE, 0);
+							PAGE_SIZE, 0);
 		delete_from_page_cache(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		page = NULL;
 	}
 
@@ -747,7 +747,7 @@
  unlock_page:
 	if (page) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	goto out;
 }
@@ -1094,7 +1094,7 @@
  * you are truncating a file, the helper function dax_truncate_page() may be
  * more convenient.
  *
- * We work in terms of PAGE_CACHE_SIZE here for commonality with
+ * We work in terms of PAGE_SIZE here for commonality with
  * block_truncate_page(), but we could go down to PAGE_SIZE if the filesystem
  * took care of disposing of the unnecessary blocks.  Even if the filesystem
  * block size is smaller than PAGE_SIZE, we have to zero the rest of the page
@@ -1104,18 +1104,18 @@
 							get_block_t get_block)
 {
 	struct buffer_head bh;
-	pgoff_t index = from >> PAGE_CACHE_SHIFT;
-	unsigned offset = from & (PAGE_CACHE_SIZE-1);
+	pgoff_t index = from >> PAGE_SHIFT;
+	unsigned offset = from & (PAGE_SIZE-1);
 	int err;
 
 	/* Block boundary? Nothing to do */
 	if (!length)
 		return 0;
-	BUG_ON((offset + length) > PAGE_CACHE_SIZE);
+	BUG_ON((offset + length) > PAGE_SIZE);
 
 	memset(&bh, 0, sizeof(bh));
 	bh.b_bdev = inode->i_sb->s_bdev;
-	bh.b_size = PAGE_CACHE_SIZE;
+	bh.b_size = PAGE_SIZE;
 	err = get_block(inode, index, &bh, 0);
 	if (err < 0)
 		return err;
@@ -1123,7 +1123,7 @@
 		struct block_device *bdev = bh.b_bdev;
 		struct blk_dax_ctl dax = {
 			.sector = to_sector(&bh, inode),
-			.size = PAGE_CACHE_SIZE,
+			.size = PAGE_SIZE,
 		};
 
 		if (dax_map_atomic(bdev, &dax) < 0)
@@ -1146,7 +1146,7 @@
  * Similar to block_truncate_page(), this function can be called by a
  * filesystem when it is truncating a DAX file to handle the partial page.
  *
- * We work in terms of PAGE_CACHE_SIZE here for commonality with
+ * We work in terms of PAGE_SIZE here for commonality with
  * block_truncate_page(), but we could go down to PAGE_SIZE if the filesystem
  * took care of disposing of the unnecessary blocks.  Even if the filesystem
  * block size is smaller than PAGE_SIZE, we have to zero the rest of the page
@@ -1154,7 +1154,7 @@
  */
 int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
 {
-	unsigned length = PAGE_CACHE_ALIGN(from) - from;
+	unsigned length = PAGE_ALIGN(from) - from;
 	return dax_zero_page_range(inode, from, length, get_block);
 }
 EXPORT_SYMBOL_GPL(dax_truncate_page);

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 476f1ec..4720377 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c

@@ -172,7 +172,7 @@
 		 */
 		if (dio->page_errors == 0)
 			dio->page_errors = ret;
-		page_cache_get(page);
+		get_page(page);
 		dio->pages[0] = page;
 		sdio->head = 0;
 		sdio->tail = 1;
@@ -424,7 +424,7 @@
 static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
 {
 	while (sdio->head < sdio->tail)
-		page_cache_release(dio->pages[sdio->head++]);
+		put_page(dio->pages[sdio->head++]);
 }
 
 /*
@@ -487,7 +487,7 @@
 			if (dio->rw == READ && !PageCompound(page) &&
 					dio->should_dirty)
 				set_page_dirty_lock(page);
-			page_cache_release(page);
+			put_page(page);
 		}
 		err = bio->bi_error;
 		bio_put(bio);
@@ -696,7 +696,7 @@
 		 */
 		if ((sdio->cur_page_len + sdio->cur_page_offset) == PAGE_SIZE)
 			sdio->pages_in_io--;
-		page_cache_get(sdio->cur_page);
+		get_page(sdio->cur_page);
 		sdio->final_block_in_bio = sdio->cur_page_block +
 			(sdio->cur_page_len >> sdio->blkbits);
 		ret = 0;
@@ -810,13 +810,13 @@
 	 */
 	if (sdio->cur_page) {
 		ret = dio_send_cur_page(dio, sdio, map_bh);
-		page_cache_release(sdio->cur_page);
+		put_page(sdio->cur_page);
 		sdio->cur_page = NULL;
 		if (ret)
 			return ret;
 	}
 
-	page_cache_get(page);		/* It is in dio */
+	get_page(page);		/* It is in dio */
 	sdio->cur_page = page;
 	sdio->cur_page_offset = offset;
 	sdio->cur_page_len = len;
@@ -830,7 +830,7 @@
 	if (sdio->boundary) {
 		ret = dio_send_cur_page(dio, sdio, map_bh);
 		dio_bio_submit(dio, sdio);
-		page_cache_release(sdio->cur_page);
+		put_page(sdio->cur_page);
 		sdio->cur_page = NULL;
 	}
 	return ret;
@@ -947,7 +947,7 @@
 
 				ret = get_more_blocks(dio, sdio, map_bh);
 				if (ret) {
-					page_cache_release(page);
+					put_page(page);
 					goto out;
 				}
 				if (!buffer_mapped(map_bh))
@@ -988,7 +988,7 @@
 
 				/* AKPM: eargh, -ENOTBLK is a hack */
 				if (dio->rw & WRITE) {
-					page_cache_release(page);
+					put_page(page);
 					return -ENOTBLK;
 				}
 
@@ -1001,7 +1001,7 @@
 				if (sdio->block_in_file >=
 						i_size_aligned >> blkbits) {
 					/* We hit eof */
-					page_cache_release(page);
+					put_page(page);
 					goto out;
 				}
 				zero_user(page, from, 1 << blkbits);
@@ -1041,7 +1041,7 @@
 						  sdio->next_block_for_io,
 						  map_bh);
 			if (ret) {
-				page_cache_release(page);
+				put_page(page);
 				goto out;
 			}
 			sdio->next_block_for_io += this_chunk_blocks;
@@ -1057,7 +1057,7 @@
 		}
 
 		/* Drop the ref which was taken in get_user_pages() */
-		page_cache_release(page);
+		put_page(page);
 	}
 out:
 	return ret;
@@ -1281,7 +1281,7 @@
 		ret2 = dio_send_cur_page(dio, &sdio, &map_bh);
 		if (retval == 0)
 			retval = ret2;
-		page_cache_release(sdio.cur_page);
+		put_page(sdio.cur_page);
 		sdio.cur_page = NULL;
 	}
 	if (sdio.bio)

diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 5191121..1669f62 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c

@@ -343,13 +343,12 @@
 	struct dlm_cluster *cl = NULL;
 	struct dlm_spaces *sps = NULL;
 	struct dlm_comms *cms = NULL;
-	void *gps = NULL;
 
 	cl = kzalloc(sizeof(struct dlm_cluster), GFP_NOFS);
 	sps = kzalloc(sizeof(struct dlm_spaces), GFP_NOFS);
 	cms = kzalloc(sizeof(struct dlm_comms), GFP_NOFS);
 
-	if (!cl || !gps || !sps || !cms)
+	if (!cl || !sps || !cms)
 		goto fail;
 
 	config_group_init_type_name(&cl->group, name, &cluster_type);

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 00640e7..1ab012a 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c

@@ -640,7 +640,7 @@
 		con->rx_page = alloc_page(GFP_ATOMIC);
 		if (con->rx_page == NULL)
 			goto out_resched;
-		cbuf_init(&con->cb, PAGE_CACHE_SIZE);
+		cbuf_init(&con->cb, PAGE_SIZE);
 	}
 
 	/*
@@ -657,7 +657,7 @@
 	 * buffer and the start of the currently used section (cb.base)
 	 */
 	if (cbuf_data(&con->cb) >= con->cb.base) {
-		iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&con->cb);
+		iov[0].iov_len = PAGE_SIZE - cbuf_data(&con->cb);
 		iov[1].iov_len = con->cb.base;
 		iov[1].iov_base = page_address(con->rx_page);
 		nvec = 2;
@@ -675,7 +675,7 @@
 	ret = dlm_process_incoming_buffer(con->nodeid,
 					  page_address(con->rx_page),
 					  con->cb.base, con->cb.len,
-					  PAGE_CACHE_SIZE);
+					  PAGE_SIZE);
 	if (ret == -EBADMSG) {
 		log_print("lowcomms: addr=%p, base=%u, len=%u, read=%d",
 			  page_address(con->rx_page), con->cb.base,
@@ -1416,7 +1416,7 @@
 	spin_lock(&con->writequeue_lock);
 	e = list_entry(con->writequeue.prev, struct writequeue_entry, list);
 	if ((&e->list == &con->writequeue) ||
-	    (PAGE_CACHE_SIZE - e->end < len)) {
+	    (PAGE_SIZE - e->end < len)) {
 		e = NULL;
 	} else {
 		offset = e->end;

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 64026e5..d09cb4c 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c

@@ -286,7 +286,7 @@
 		pg = virt_to_page(addr);
 		offset = offset_in_page(addr);
 		sg_set_page(&sg[i], pg, 0, offset);
-		remainder_of_page = PAGE_CACHE_SIZE - offset;
+		remainder_of_page = PAGE_SIZE - offset;
 		if (size >= remainder_of_page) {
 			sg[i].length = remainder_of_page;
 			addr += remainder_of_page;
@@ -400,7 +400,7 @@
 				    struct page *page)
 {
 	return ecryptfs_lower_header_size(crypt_stat) +
-	       ((loff_t)page->index << PAGE_CACHE_SHIFT);
+	       ((loff_t)page->index << PAGE_SHIFT);
 }
 
 /**
@@ -428,7 +428,7 @@
 	size_t extent_size = crypt_stat->extent_size;
 	int rc;
 
-	extent_base = (((loff_t)page_index) * (PAGE_CACHE_SIZE / extent_size));
+	extent_base = (((loff_t)page_index) * (PAGE_SIZE / extent_size));
 	rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
 				(extent_base + extent_offset));
 	if (rc) {
@@ -498,7 +498,7 @@
 	}
 
 	for (extent_offset = 0;
-	     extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
+	     extent_offset < (PAGE_SIZE / crypt_stat->extent_size);
 	     extent_offset++) {
 		rc = crypt_extent(crypt_stat, enc_extent_page, page,
 				  extent_offset, ENCRYPT);
@@ -512,7 +512,7 @@
 	lower_offset = lower_offset_for_page(crypt_stat, page);
 	enc_extent_virt = kmap(enc_extent_page);
 	rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, lower_offset,
-				  PAGE_CACHE_SIZE);
+				  PAGE_SIZE);
 	kunmap(enc_extent_page);
 	if (rc < 0) {
 		ecryptfs_printk(KERN_ERR,
@@ -560,7 +560,7 @@
 
 	lower_offset = lower_offset_for_page(crypt_stat, page);
 	page_virt = kmap(page);
-	rc = ecryptfs_read_lower(page_virt, lower_offset, PAGE_CACHE_SIZE,
+	rc = ecryptfs_read_lower(page_virt, lower_offset, PAGE_SIZE,
 				 ecryptfs_inode);
 	kunmap(page);
 	if (rc < 0) {
@@ -571,7 +571,7 @@
 	}
 
 	for (extent_offset = 0;
-	     extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
+	     extent_offset < (PAGE_SIZE / crypt_stat->extent_size);
 	     extent_offset++) {
 		rc = crypt_extent(crypt_stat, page, page,
 				  extent_offset, DECRYPT);
@@ -659,11 +659,11 @@
 	if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
 		crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
 	else {
-		if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)
+		if (PAGE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)
 			crypt_stat->metadata_size =
 				ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
 		else
-			crypt_stat->metadata_size = PAGE_CACHE_SIZE;
+			crypt_stat->metadata_size = PAGE_SIZE;
 	}
 }
 
@@ -1442,7 +1442,7 @@
 						ECRYPTFS_VALIDATE_HEADER_SIZE);
 	if (rc) {
 		/* metadata is not in the file header, so try xattrs */
-		memset(page_virt, 0, PAGE_CACHE_SIZE);
+		memset(page_virt, 0, PAGE_SIZE);
 		rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode);
 		if (rc) {
 			printk(KERN_DEBUG "Valid eCryptfs headers not found in "
@@ -1475,7 +1475,7 @@
 	}
 out:
 	if (page_virt) {
-		memset(page_virt, 0, PAGE_CACHE_SIZE);
+		memset(page_virt, 0, PAGE_SIZE);
 		kmem_cache_free(ecryptfs_header_cache, page_virt);
 	}
 	return rc;

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 121114e..224b49e 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c

@@ -763,10 +763,10 @@
 	} else { /* ia->ia_size < i_size_read(inode) */
 		/* We're chopping off all the pages down to the page
 		 * in which ia->ia_size is located. Fill in the end of
-		 * that page from (ia->ia_size & ~PAGE_CACHE_MASK) to
-		 * PAGE_CACHE_SIZE with zeros. */
-		size_t num_zeros = (PAGE_CACHE_SIZE
-				    - (ia->ia_size & ~PAGE_CACHE_MASK));
+		 * that page from (ia->ia_size & ~PAGE_MASK) to
+		 * PAGE_SIZE with zeros. */
+		size_t num_zeros = (PAGE_SIZE
+				    - (ia->ia_size & ~PAGE_MASK));
 
 		if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
 			truncate_setsize(inode, ia->ia_size);

diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index c5c84df..3cf1546 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c

@@ -635,8 +635,7 @@
 	if (!s) {
 		printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc "
 		       "[%zd] bytes of kernel memory\n", __func__, sizeof(*s));
-		rc = -ENOMEM;
-		goto out;
+		return -ENOMEM;
 	}
 	(*packet_size) = 0;
 	rc = ecryptfs_find_auth_tok_for_sig(
@@ -922,8 +921,7 @@
 	if (!s) {
 		printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc "
 		       "[%zd] bytes of kernel memory\n", __func__, sizeof(*s));
-		rc = -ENOMEM;
-		goto out;
+		return -ENOMEM;
 	}
 	if (max_packet_size < ECRYPTFS_TAG_70_MIN_METADATA_SIZE) {
 		printk(KERN_WARNING "%s: max_packet_size is [%zd]; it must be "
@@ -1800,7 +1798,7 @@
 	 * added the our &auth_tok_list */
 	next_packet_is_auth_tok_packet = 1;
 	while (next_packet_is_auth_tok_packet) {
-		size_t max_packet_size = ((PAGE_CACHE_SIZE - 8) - i);
+		size_t max_packet_size = ((PAGE_SIZE - 8) - i);
 
 		switch (src[i]) {
 		case ECRYPTFS_TAG_3_PACKET_TYPE:

diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 8b0b4a7..1698132 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c

@@ -695,12 +695,12 @@
 	{
 		.cache = &ecryptfs_header_cache,
 		.name = "ecryptfs_headers",
-		.size = PAGE_CACHE_SIZE,
+		.size = PAGE_SIZE,
 	},
 	{
 		.cache = &ecryptfs_xattr_cache,
 		.name = "ecryptfs_xattr_cache",
-		.size = PAGE_CACHE_SIZE,
+		.size = PAGE_SIZE,
 	},
 	{
 		.cache = &ecryptfs_key_record_cache,
@@ -818,7 +818,7 @@
 {
 	int rc;
 
-	if (ECRYPTFS_DEFAULT_EXTENT_SIZE > PAGE_CACHE_SIZE) {
+	if (ECRYPTFS_DEFAULT_EXTENT_SIZE > PAGE_SIZE) {
 		rc = -EINVAL;
 		ecryptfs_printk(KERN_ERR, "The eCryptfs extent size is "
 				"larger than the host's page size, and so "
@@ -826,7 +826,7 @@
 				"default eCryptfs extent size is [%u] bytes; "
 				"the page size is [%lu] bytes.\n",
 				ECRYPTFS_DEFAULT_EXTENT_SIZE,
-				(unsigned long)PAGE_CACHE_SIZE);
+				(unsigned long)PAGE_SIZE);
 		goto out;
 	}
 	rc = ecryptfs_init_kmem_caches();

diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 1f58652..e6b1d80 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c

@@ -122,7 +122,7 @@
 				       struct ecryptfs_crypt_stat *crypt_stat)
 {
 	loff_t extent_num_in_page = 0;
-	loff_t num_extents_per_page = (PAGE_CACHE_SIZE
+	loff_t num_extents_per_page = (PAGE_SIZE
 				       / crypt_stat->extent_size);
 	int rc = 0;
 
@@ -138,7 +138,7 @@
 			char *page_virt;
 
 			page_virt = kmap_atomic(page);
-			memset(page_virt, 0, PAGE_CACHE_SIZE);
+			memset(page_virt, 0, PAGE_SIZE);
 			/* TODO: Support more than one header extent */
 			if (view_extent_num == 0) {
 				size_t written;
@@ -164,8 +164,8 @@
 				 - crypt_stat->metadata_size);
 
 			rc = ecryptfs_read_lower_page_segment(
-				page, (lower_offset >> PAGE_CACHE_SHIFT),
-				(lower_offset & ~PAGE_CACHE_MASK),
+				page, (lower_offset >> PAGE_SHIFT),
+				(lower_offset & ~PAGE_MASK),
 				crypt_stat->extent_size, page->mapping->host);
 			if (rc) {
 				printk(KERN_ERR "%s: Error attempting to read "
@@ -198,7 +198,7 @@
 
 	if (!crypt_stat || !(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
 		rc = ecryptfs_read_lower_page_segment(page, page->index, 0,
-						      PAGE_CACHE_SIZE,
+						      PAGE_SIZE,
 						      page->mapping->host);
 	} else if (crypt_stat->flags & ECRYPTFS_VIEW_AS_ENCRYPTED) {
 		if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) {
@@ -215,7 +215,7 @@
 
 		} else {
 			rc = ecryptfs_read_lower_page_segment(
-				page, page->index, 0, PAGE_CACHE_SIZE,
+				page, page->index, 0, PAGE_SIZE,
 				page->mapping->host);
 			if (rc) {
 				printk(KERN_ERR "Error reading page; rc = "
@@ -250,12 +250,12 @@
 	struct inode *inode = page->mapping->host;
 	int end_byte_in_page;
 
-	if ((i_size_read(inode) / PAGE_CACHE_SIZE) != page->index)
+	if ((i_size_read(inode) / PAGE_SIZE) != page->index)
 		goto out;
-	end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE;
+	end_byte_in_page = i_size_read(inode) % PAGE_SIZE;
 	if (to > end_byte_in_page)
 		end_byte_in_page = to;
-	zero_user_segment(page, end_byte_in_page, PAGE_CACHE_SIZE);
+	zero_user_segment(page, end_byte_in_page, PAGE_SIZE);
 out:
 	return 0;
 }
@@ -279,7 +279,7 @@
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 	struct page *page;
 	loff_t prev_page_end_size;
 	int rc = 0;
@@ -289,14 +289,14 @@
 		return -ENOMEM;
 	*pagep = page;
 
-	prev_page_end_size = ((loff_t)index << PAGE_CACHE_SHIFT);
+	prev_page_end_size = ((loff_t)index << PAGE_SHIFT);
 	if (!PageUptodate(page)) {
 		struct ecryptfs_crypt_stat *crypt_stat =
 			&ecryptfs_inode_to_private(mapping->host)->crypt_stat;
 
 		if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
 			rc = ecryptfs_read_lower_page_segment(
-				page, index, 0, PAGE_CACHE_SIZE, mapping->host);
+				page, index, 0, PAGE_SIZE, mapping->host);
 			if (rc) {
 				printk(KERN_ERR "%s: Error attempting to read "
 				       "lower page segment; rc = [%d]\n",
@@ -322,7 +322,7 @@
 				SetPageUptodate(page);
 			} else {
 				rc = ecryptfs_read_lower_page_segment(
-					page, index, 0, PAGE_CACHE_SIZE,
+					page, index, 0, PAGE_SIZE,
 					mapping->host);
 				if (rc) {
 					printk(KERN_ERR "%s: Error reading "
@@ -336,9 +336,9 @@
 		} else {
 			if (prev_page_end_size
 			    >= i_size_read(page->mapping->host)) {
-				zero_user(page, 0, PAGE_CACHE_SIZE);
+				zero_user(page, 0, PAGE_SIZE);
 				SetPageUptodate(page);
-			} else if (len < PAGE_CACHE_SIZE) {
+			} else if (len < PAGE_SIZE) {
 				rc = ecryptfs_decrypt_page(page);
 				if (rc) {
 					printk(KERN_ERR "%s: Error decrypting "
@@ -371,11 +371,11 @@
 	 * of page?  Zero it out. */
 	if ((i_size_read(mapping->host) == prev_page_end_size)
 	    && (pos != 0))
-		zero_user(page, 0, PAGE_CACHE_SIZE);
+		zero_user(page, 0, PAGE_SIZE);
 out:
 	if (unlikely(rc)) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		*pagep = NULL;
 	}
 	return rc;
@@ -437,7 +437,7 @@
 	}
 	inode_lock(lower_inode);
 	size = lower_inode->i_op->getxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
-					   xattr_virt, PAGE_CACHE_SIZE);
+					   xattr_virt, PAGE_SIZE);
 	if (size < 0)
 		size = 8;
 	put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt);
@@ -479,8 +479,8 @@
 			loff_t pos, unsigned len, unsigned copied,
 			struct page *page, void *fsdata)
 {
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	pgoff_t index = pos >> PAGE_SHIFT;
+	unsigned from = pos & (PAGE_SIZE - 1);
 	unsigned to = from + copied;
 	struct inode *ecryptfs_inode = mapping->host;
 	struct ecryptfs_crypt_stat *crypt_stat =
@@ -500,7 +500,7 @@
 		goto out;
 	}
 	if (!PageUptodate(page)) {
-		if (copied < PAGE_CACHE_SIZE) {
+		if (copied < PAGE_SIZE) {
 			rc = 0;
 			goto out;
 		}
@@ -533,7 +533,7 @@
 		rc = copied;
 out:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return rc;
 }
 

diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 09fe622..158a3a3 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c

@@ -74,7 +74,7 @@
 	loff_t offset;
 	int rc;
 
-	offset = ((((loff_t)page_for_lower->index) << PAGE_CACHE_SHIFT)
+	offset = ((((loff_t)page_for_lower->index) << PAGE_SHIFT)
 		  + offset_in_page);
 	virt = kmap(page_for_lower);
 	rc = ecryptfs_write_lower(ecryptfs_inode, virt, offset, size);
@@ -123,9 +123,9 @@
 	else
 		pos = offset;
 	while (pos < (offset + size)) {
-		pgoff_t ecryptfs_page_idx = (pos >> PAGE_CACHE_SHIFT);
-		size_t start_offset_in_page = (pos & ~PAGE_CACHE_MASK);
-		size_t num_bytes = (PAGE_CACHE_SIZE - start_offset_in_page);
+		pgoff_t ecryptfs_page_idx = (pos >> PAGE_SHIFT);
+		size_t start_offset_in_page = (pos & ~PAGE_MASK);
+		size_t num_bytes = (PAGE_SIZE - start_offset_in_page);
 		loff_t total_remaining_bytes = ((offset + size) - pos);
 
 		if (fatal_signal_pending(current)) {
@@ -165,7 +165,7 @@
 			 * Fill in zero values to the end of the page */
 			memset(((char *)ecryptfs_page_virt
 				+ start_offset_in_page), 0,
-				PAGE_CACHE_SIZE - start_offset_in_page);
+				PAGE_SIZE - start_offset_in_page);
 		}
 
 		/* pos >= offset, we are now writing the data request */
@@ -186,7 +186,7 @@
 						ecryptfs_page,
 						start_offset_in_page,
 						data_offset);
-		page_cache_release(ecryptfs_page);
+		put_page(ecryptfs_page);
 		if (rc) {
 			printk(KERN_ERR "%s: Error encrypting "
 			       "page; rc = [%d]\n", __func__, rc);
@@ -262,7 +262,7 @@
 	loff_t offset;
 	int rc;
 
-	offset = ((((loff_t)page_index) << PAGE_CACHE_SHIFT) + offset_in_page);
+	offset = ((((loff_t)page_index) << PAGE_SHIFT) + offset_in_page);
 	virt = kmap(page_for_ecryptfs);
 	rc = ecryptfs_read_lower(virt, offset, size, ecryptfs_inode);
 	if (rc > 0)

diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index dd029d1..553c5d2 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c

@@ -197,8 +197,8 @@
 	efivarfs_sb = sb;
 
 	sb->s_maxbytes          = MAX_LFS_FILESIZE;
-	sb->s_blocksize         = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits    = PAGE_CACHE_SHIFT;
+	sb->s_blocksize         = PAGE_SIZE;
+	sb->s_blocksize_bits    = PAGE_SHIFT;
 	sb->s_magic             = EFIVARFS_MAGIC;
 	sb->s_op                = &efivarfs_ops;
 	sb->s_d_op		= &efivarfs_d_ops;

diff --git a/fs/eventfd.c b/fs/eventfd.c
index ed70cf9..1231cd1 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c

@@ -121,8 +121,46 @@
 	u64 count;
 
 	poll_wait(file, &ctx->wqh, wait);
-	smp_rmb();
-	count = ctx->count;
+
+	/*
+	 * All writes to ctx->count occur within ctx->wqh.lock.  This read
+	 * can be done outside ctx->wqh.lock because we know that poll_wait
+	 * takes that lock (through add_wait_queue) if our caller will sleep.
+	 *
+	 * The read _can_ therefore seep into add_wait_queue's critical
+	 * section, but cannot move above it!  add_wait_queue's spin_lock acts
+	 * as an acquire barrier and ensures that the read be ordered properly
+	 * against the writes.  The following CAN happen and is safe:
+	 *
+	 *     poll                               write
+	 *     -----------------                  ------------
+	 *     lock ctx->wqh.lock (in poll_wait)
+	 *     count = ctx->count
+	 *     __add_wait_queue
+	 *     unlock ctx->wqh.lock
+	 *                                        lock ctx->qwh.lock
+	 *                                        ctx->count += n
+	 *                                        if (waitqueue_active)
+	 *                                          wake_up_locked_poll
+	 *                                        unlock ctx->qwh.lock
+	 *     eventfd_poll returns 0
+	 *
+	 * but the following, which would miss a wakeup, cannot happen:
+	 *
+	 *     poll                               write
+	 *     -----------------                  ------------
+	 *     count = ctx->count (INVALID!)
+	 *                                        lock ctx->qwh.lock
+	 *                                        ctx->count += n
+	 *                                        **waitqueue_active is false**
+	 *                                        **no wake_up_locked_poll!**
+	 *                                        unlock ctx->qwh.lock
+	 *     lock ctx->wqh.lock (in poll_wait)
+	 *     __add_wait_queue
+	 *     unlock ctx->wqh.lock
+	 *     eventfd_poll returns 0
+	 */
+	count = READ_ONCE(ctx->count);
 
 	if (count > 0)
 		events |= POLLIN;

diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index e5bb2ab..547b93c 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c

@@ -41,16 +41,16 @@
 static inline void exofs_put_page(struct page *page)
 {
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 static unsigned exofs_last_byte(struct inode *inode, unsigned long page_nr)
 {
 	loff_t last_byte = inode->i_size;
 
-	last_byte -= page_nr << PAGE_CACHE_SHIFT;
-	if (last_byte > PAGE_CACHE_SIZE)
-		last_byte = PAGE_CACHE_SIZE;
+	last_byte -= page_nr << PAGE_SHIFT;
+	if (last_byte > PAGE_SIZE)
+		last_byte = PAGE_SIZE;
 	return last_byte;
 }
 
@@ -85,13 +85,13 @@
 	unsigned chunk_size = exofs_chunk_size(dir);
 	char *kaddr = page_address(page);
 	unsigned offs, rec_len;
-	unsigned limit = PAGE_CACHE_SIZE;
+	unsigned limit = PAGE_SIZE;
 	struct exofs_dir_entry *p;
 	char *error;
 
 	/* if the page is the last one in the directory */
-	if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
-		limit = dir->i_size & ~PAGE_CACHE_MASK;
+	if ((dir->i_size >> PAGE_SHIFT) == page->index) {
+		limit = dir->i_size & ~PAGE_MASK;
 		if (limit & (chunk_size - 1))
 			goto Ebadsize;
 		if (!limit)
@@ -138,7 +138,7 @@
 	EXOFS_ERR(
 		"ERROR [exofs_check_page]: bad entry in directory(0x%lx): %s - "
 		"offset=%lu, inode=0x%llu, rec_len=%d, name_len=%d\n",
-		dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
+		dir->i_ino, error, (page->index<<PAGE_SHIFT)+offs,
 		_LLU(le64_to_cpu(p->inode_no)),
 		rec_len, p->name_len);
 	goto fail;
@@ -147,7 +147,7 @@
 	EXOFS_ERR("ERROR [exofs_check_page]: "
 		"entry in directory(0x%lx) spans the page boundary"
 		"offset=%lu, inode=0x%llx\n",
-		dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
+		dir->i_ino, (page->index<<PAGE_SHIFT)+offs,
 		_LLU(le64_to_cpu(p->inode_no)));
 fail:
 	SetPageChecked(page);
@@ -237,8 +237,8 @@
 {
 	loff_t pos = ctx->pos;
 	struct inode *inode = file_inode(file);
-	unsigned int offset = pos & ~PAGE_CACHE_MASK;
-	unsigned long n = pos >> PAGE_CACHE_SHIFT;
+	unsigned int offset = pos & ~PAGE_MASK;
+	unsigned long n = pos >> PAGE_SHIFT;
 	unsigned long npages = dir_pages(inode);
 	unsigned chunk_mask = ~(exofs_chunk_size(inode)-1);
 	int need_revalidate = (file->f_version != inode->i_version);
@@ -254,7 +254,7 @@
 		if (IS_ERR(page)) {
 			EXOFS_ERR("ERROR: bad page in directory(0x%lx)\n",
 				  inode->i_ino);
-			ctx->pos += PAGE_CACHE_SIZE - offset;
+			ctx->pos += PAGE_SIZE - offset;
 			return PTR_ERR(page);
 		}
 		kaddr = page_address(page);
@@ -262,7 +262,7 @@
 			if (offset) {
 				offset = exofs_validate_entry(kaddr, offset,
 								chunk_mask);
-				ctx->pos = (n<<PAGE_CACHE_SHIFT) + offset;
+				ctx->pos = (n<<PAGE_SHIFT) + offset;
 			}
 			file->f_version = inode->i_version;
 			need_revalidate = 0;
@@ -449,7 +449,7 @@
 		kaddr = page_address(page);
 		dir_end = kaddr + exofs_last_byte(dir, n);
 		de = (struct exofs_dir_entry *)kaddr;
-		kaddr += PAGE_CACHE_SIZE - reclen;
+		kaddr += PAGE_SIZE - reclen;
 		while ((char *)de <= kaddr) {
 			if ((char *)de == dir_end) {
 				name_len = 0;
@@ -602,7 +602,7 @@
 	kunmap_atomic(kaddr);
 	err = exofs_commit_chunk(page, 0, chunk_size);
 fail:
-	page_cache_release(page);
+	put_page(page);
 	return err;
 }
 

diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 9eaf595..49e1bd0 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c

@@ -317,7 +317,7 @@
 
 	if (!pcol->ios) {
 		int ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, true,
-					     pcol->pg_first << PAGE_CACHE_SHIFT,
+					     pcol->pg_first << PAGE_SHIFT,
 					     pcol->length, &pcol->ios);
 
 		if (ret)
@@ -383,7 +383,7 @@
 	struct inode *inode = pcol->inode;
 	struct exofs_i_info *oi = exofs_i(inode);
 	loff_t i_size = i_size_read(inode);
-	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+	pgoff_t end_index = i_size >> PAGE_SHIFT;
 	size_t len;
 	int ret;
 
@@ -397,9 +397,9 @@
 	pcol->that_locked_page = page;
 
 	if (page->index < end_index)
-		len = PAGE_CACHE_SIZE;
+		len = PAGE_SIZE;
 	else if (page->index == end_index)
-		len = i_size & ~PAGE_CACHE_MASK;
+		len = i_size & ~PAGE_MASK;
 	else
 		len = 0;
 
@@ -442,8 +442,8 @@
 			goto fail;
 	}
 
-	if (len != PAGE_CACHE_SIZE)
-		zero_user(page, len, PAGE_CACHE_SIZE - len);
+	if (len != PAGE_SIZE)
+		zero_user(page, len, PAGE_SIZE - len);
 
 	EXOFS_DBGMSG2("    readpage_strip(0x%lx, 0x%lx) len=0x%zx\n",
 		     inode->i_ino, page->index, len);
@@ -609,7 +609,7 @@
 
 	if ((pcol->that_locked_page != page) && (ZERO_PAGE(0) != page)) {
 		EXOFS_DBGMSG2("index=0x%lx\n", page->index);
-		page_cache_release(page);
+		put_page(page);
 		return;
 	}
 	EXOFS_DBGMSG2("that_locked_page index=0x%lx\n",
@@ -633,7 +633,7 @@
 
 	BUG_ON(pcol->ios);
 	ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, false,
-				 pcol->pg_first << PAGE_CACHE_SHIFT,
+				 pcol->pg_first << PAGE_SHIFT,
 				 pcol->length, &pcol->ios);
 	if (unlikely(ret))
 		goto err;
@@ -696,7 +696,7 @@
 	struct inode *inode = pcol->inode;
 	struct exofs_i_info *oi = exofs_i(inode);
 	loff_t i_size = i_size_read(inode);
-	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+	pgoff_t end_index = i_size >> PAGE_SHIFT;
 	size_t len;
 	int ret;
 
@@ -708,9 +708,9 @@
 
 	if (page->index < end_index)
 		/* in this case, the page is within the limits of the file */
-		len = PAGE_CACHE_SIZE;
+		len = PAGE_SIZE;
 	else {
-		len = i_size & ~PAGE_CACHE_MASK;
+		len = i_size & ~PAGE_MASK;
 
 		if (page->index > end_index || !len) {
 			/* in this case, the page is outside the limits
@@ -790,10 +790,10 @@
 	long start, end, expected_pages;
 	int ret;
 
-	start = wbc->range_start >> PAGE_CACHE_SHIFT;
+	start = wbc->range_start >> PAGE_SHIFT;
 	end = (wbc->range_end == LLONG_MAX) ?
 			start + mapping->nrpages :
-			wbc->range_end >> PAGE_CACHE_SHIFT;
+			wbc->range_end >> PAGE_SHIFT;
 
 	if (start || end)
 		expected_pages = end - start + 1;
@@ -881,15 +881,15 @@
 	}
 
 	 /* read modify write */
-	if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
+	if (!PageUptodate(page) && (len != PAGE_SIZE)) {
 		loff_t i_size = i_size_read(mapping->host);
-		pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+		pgoff_t end_index = i_size >> PAGE_SHIFT;
 		size_t rlen;
 
 		if (page->index < end_index)
-			rlen = PAGE_CACHE_SIZE;
+			rlen = PAGE_SIZE;
 		else if (page->index == end_index)
-			rlen = i_size & ~PAGE_CACHE_MASK;
+			rlen = i_size & ~PAGE_MASK;
 		else
 			rlen = 0;
 

diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index c20d77d..622a686 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c

@@ -292,11 +292,11 @@
 out_dir:
 	if (dir_de) {
 		kunmap(dir_page);
-		page_cache_release(dir_page);
+		put_page(dir_page);
 	}
 out_old:
 	kunmap(old_page);
-	page_cache_release(old_page);
+	put_page(old_page);
 out:
 	return err;
 }

diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 0c6638b..7ff6fcf 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c

@@ -37,7 +37,7 @@
 {
 	unsigned len = le16_to_cpu(dlen);
 
-#if (PAGE_CACHE_SIZE >= 65536)
+#if (PAGE_SIZE >= 65536)
 	if (len == EXT2_MAX_REC_LEN)
 		return 1 << 16;
 #endif
@@ -46,7 +46,7 @@
 
 static inline __le16 ext2_rec_len_to_disk(unsigned len)
 {
-#if (PAGE_CACHE_SIZE >= 65536)
+#if (PAGE_SIZE >= 65536)
 	if (len == (1 << 16))
 		return cpu_to_le16(EXT2_MAX_REC_LEN);
 	else
@@ -67,7 +67,7 @@
 static inline void ext2_put_page(struct page *page)
 {
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 /*
@@ -79,9 +79,9 @@
 {
 	unsigned last_byte = inode->i_size;
 
-	last_byte -= page_nr << PAGE_CACHE_SHIFT;
-	if (last_byte > PAGE_CACHE_SIZE)
-		last_byte = PAGE_CACHE_SIZE;
+	last_byte -= page_nr << PAGE_SHIFT;
+	if (last_byte > PAGE_SIZE)
+		last_byte = PAGE_SIZE;
 	return last_byte;
 }
 
@@ -118,12 +118,12 @@
 	char *kaddr = page_address(page);
 	u32 max_inumber = le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count);
 	unsigned offs, rec_len;
-	unsigned limit = PAGE_CACHE_SIZE;
+	unsigned limit = PAGE_SIZE;
 	ext2_dirent *p;
 	char *error;
 
-	if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
-		limit = dir->i_size & ~PAGE_CACHE_MASK;
+	if ((dir->i_size >> PAGE_SHIFT) == page->index) {
+		limit = dir->i_size & ~PAGE_MASK;
 		if (limit & (chunk_size - 1))
 			goto Ebadsize;
 		if (!limit)
@@ -176,7 +176,7 @@
 	if (!quiet)
 		ext2_error(sb, __func__, "bad entry in directory #%lu: : %s - "
 			"offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
-			dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
+			dir->i_ino, error, (page->index<<PAGE_SHIFT)+offs,
 			(unsigned long) le32_to_cpu(p->inode),
 			rec_len, p->name_len);
 	goto fail;
@@ -186,7 +186,7 @@
 		ext2_error(sb, "ext2_check_page",
 			"entry in directory #%lu spans the page boundary"
 			"offset=%lu, inode=%lu",
-			dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
+			dir->i_ino, (page->index<<PAGE_SHIFT)+offs,
 			(unsigned long) le32_to_cpu(p->inode));
 	}
 fail:
@@ -287,8 +287,8 @@
 	loff_t pos = ctx->pos;
 	struct inode *inode = file_inode(file);
 	struct super_block *sb = inode->i_sb;
-	unsigned int offset = pos & ~PAGE_CACHE_MASK;
-	unsigned long n = pos >> PAGE_CACHE_SHIFT;
+	unsigned int offset = pos & ~PAGE_MASK;
+	unsigned long n = pos >> PAGE_SHIFT;
 	unsigned long npages = dir_pages(inode);
 	unsigned chunk_mask = ~(ext2_chunk_size(inode)-1);
 	unsigned char *types = NULL;
@@ -309,14 +309,14 @@
 			ext2_error(sb, __func__,
 				   "bad page in #%lu",
 				   inode->i_ino);
-			ctx->pos += PAGE_CACHE_SIZE - offset;
+			ctx->pos += PAGE_SIZE - offset;
 			return PTR_ERR(page);
 		}
 		kaddr = page_address(page);
 		if (unlikely(need_revalidate)) {
 			if (offset) {
 				offset = ext2_validate_entry(kaddr, offset, chunk_mask);
-				ctx->pos = (n<<PAGE_CACHE_SHIFT) + offset;
+				ctx->pos = (n<<PAGE_SHIFT) + offset;
 			}
 			file->f_version = inode->i_version;
 			need_revalidate = 0;
@@ -406,7 +406,7 @@
 		if (++n >= npages)
 			n = 0;
 		/* next page is past the blocks we've got */
-		if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) {
+		if (unlikely(n > (dir->i_blocks >> (PAGE_SHIFT - 9)))) {
 			ext2_error(dir->i_sb, __func__,
 				"dir %lu size %lld exceeds block count %llu",
 				dir->i_ino, dir->i_size,
@@ -511,7 +511,7 @@
 		kaddr = page_address(page);
 		dir_end = kaddr + ext2_last_byte(dir, n);
 		de = (ext2_dirent *)kaddr;
-		kaddr += PAGE_CACHE_SIZE - reclen;
+		kaddr += PAGE_SIZE - reclen;
 		while ((char *)de <= kaddr) {
 			if ((char *)de == dir_end) {
 				/* We hit i_size */
@@ -655,7 +655,7 @@
 	kunmap_atomic(kaddr);
 	err = ext2_commit_chunk(page, 0, chunk_size);
 fail:
-	page_cache_release(page);
+	put_page(page);
 	return err;
 }
 

diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 7a2be8f..d348439 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c

@@ -398,7 +398,7 @@
 			ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0);
 		else {
 			kunmap(dir_page);
-			page_cache_release(dir_page);
+			put_page(dir_page);
 		}
 		inode_dec_link_count(old_dir);
 	}
@@ -408,11 +408,11 @@
 out_dir:
 	if (dir_de) {
 		kunmap(dir_page);
-		page_cache_release(dir_page);
+		put_page(dir_page);
 	}
 out_old:
 	kunmap(old_page);
-	page_cache_release(old_page);
+	put_page(old_page);
 out:
 	return err;
 }

diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index ea69ce4..db9ae6e1 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c

@@ -285,10 +285,10 @@
 	       EXT4_XTS_TWEAK_SIZE - sizeof(index));
 
 	sg_init_table(&dst, 1);
-	sg_set_page(&dst, dest_page, PAGE_CACHE_SIZE, 0);
+	sg_set_page(&dst, dest_page, PAGE_SIZE, 0);
 	sg_init_table(&src, 1);
-	sg_set_page(&src, src_page, PAGE_CACHE_SIZE, 0);
-	skcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE,
+	sg_set_page(&src, src_page, PAGE_SIZE, 0);
+	skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE,
 				   xts_tweak);
 	if (rw == EXT4_DECRYPT)
 		res = crypto_skcipher_decrypt(req);
@@ -400,7 +400,7 @@
 		 (unsigned long) inode->i_ino, lblk, len);
 #endif
 
-	BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE);
+	BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE);
 
 	ctx = ext4_get_crypto_ctx(inode, GFP_NOFS);
 	if (IS_ERR(ctx))

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ebfcb89..4173bfe 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c

@@ -160,13 +160,13 @@
 		err = ext4_map_blocks(NULL, inode, &map, 0);
 		if (err > 0) {
 			pgoff_t index = map.m_pblk >>
-					(PAGE_CACHE_SHIFT - inode->i_blkbits);
+					(PAGE_SHIFT - inode->i_blkbits);
 			if (!ra_has_index(&file->f_ra, index))
 				page_cache_sync_readahead(
 					sb->s_bdev->bd_inode->i_mapping,
 					&file->f_ra, file,
 					index, 1);
-			file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
+			file->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
 			bh = ext4_bread(NULL, inode, map.m_lblk, 0);
 			if (IS_ERR(bh)) {
 				err = PTR_ERR(bh);
@@ -290,7 +290,7 @@
 static inline int is_32bit_api(void)
 {
 #ifdef CONFIG_COMPAT
-	return is_compat_task();
+	return in_compat_syscall();
 #else
 	return (BITS_PER_LONG == 32);
 #endif

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6a857af0..349afeb 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h

@@ -1984,7 +1984,7 @@
 {
 	unsigned len = le16_to_cpu(dlen);
 
-#if (PAGE_CACHE_SIZE >= 65536)
+#if (PAGE_SIZE >= 65536)
 	if (len == EXT4_MAX_REC_LEN || len == 0)
 		return blocksize;
 	return (len & 65532) | ((len & 3) << 16);
@@ -1997,7 +1997,7 @@
 {
 	if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
 		BUG();
-#if (PAGE_CACHE_SIZE >= 65536)
+#if (PAGE_SIZE >= 65536)
 	if (len < 65536)
 		return cpu_to_le16(len);
 	if (len == blocksize) {

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index edba9fb..fa2208b 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c

@@ -432,8 +432,8 @@
 	lastoff = startoff;
 	endoff = (loff_t)end_blk << blkbits;
 
-	index = startoff >> PAGE_CACHE_SHIFT;
-	end = endoff >> PAGE_CACHE_SHIFT;
+	index = startoff >> PAGE_SHIFT;
+	end = endoff >> PAGE_SHIFT;
 
 	pagevec_init(&pvec, 0);
 	do {

diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 7cbdd375..7bc6c85 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c

@@ -482,7 +482,7 @@
 	ret = ext4_read_inline_data(inode, kaddr, len, &iloc);
 	flush_dcache_page(page);
 	kunmap_atomic(kaddr);
-	zero_user_segment(page, len, PAGE_CACHE_SIZE);
+	zero_user_segment(page, len, PAGE_SIZE);
 	SetPageUptodate(page);
 	brelse(iloc.bh);
 
@@ -507,7 +507,7 @@
 	if (!page->index)
 		ret = ext4_read_inline_page(inode, page);
 	else if (!PageUptodate(page)) {
-		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+		zero_user_segment(page, 0, PAGE_SIZE);
 		SetPageUptodate(page);
 	}
 
@@ -595,7 +595,7 @@
 
 	if (ret) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		page = NULL;
 		ext4_orphan_add(handle, inode);
 		up_write(&EXT4_I(inode)->xattr_sem);
@@ -621,7 +621,7 @@
 out:
 	if (page) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	if (sem_held)
 		up_write(&EXT4_I(inode)->xattr_sem);
@@ -690,7 +690,7 @@
 	if (!ext4_has_inline_data(inode)) {
 		ret = 0;
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		goto out_up_read;
 	}
 
@@ -815,7 +815,7 @@
 	if (ret) {
 		up_read(&EXT4_I(inode)->xattr_sem);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		ext4_truncate_failed_write(inode);
 		return ret;
 	}
@@ -829,7 +829,7 @@
 	up_read(&EXT4_I(inode)->xattr_sem);
 	if (page) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	return ret;
 }
@@ -919,7 +919,7 @@
 out_release_page:
 	up_read(&EXT4_I(inode)->xattr_sem);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 out_journal:
 	ext4_journal_stop(handle);
 out:
@@ -947,7 +947,7 @@
 		i_size_changed = 1;
 	}
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	/*
 	 * Don't mark the inode dirty under page lock. First, it unnecessarily

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8710174..981a1fc 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c

@@ -1055,7 +1055,7 @@
 static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
 				  get_block_t *get_block)
 {
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned from = pos & (PAGE_SIZE - 1);
 	unsigned to = from + len;
 	struct inode *inode = page->mapping->host;
 	unsigned block_start, block_end;
@@ -1067,15 +1067,15 @@
 	bool decrypt = false;
 
 	BUG_ON(!PageLocked(page));
-	BUG_ON(from > PAGE_CACHE_SIZE);
-	BUG_ON(to > PAGE_CACHE_SIZE);
+	BUG_ON(from > PAGE_SIZE);
+	BUG_ON(to > PAGE_SIZE);
 	BUG_ON(from > to);
 
 	if (!page_has_buffers(page))
 		create_empty_buffers(page, blocksize, 0);
 	head = page_buffers(page);
 	bbits = ilog2(blocksize);
-	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+	block = (sector_t)page->index << (PAGE_SHIFT - bbits);
 
 	for (bh = head, block_start = 0; bh != head || !block_start;
 	    block++, block_start = block_end, bh = bh->b_this_page) {
@@ -1157,8 +1157,8 @@
 	 * we allocate blocks but write fails for some reason
 	 */
 	needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
-	index = pos >> PAGE_CACHE_SHIFT;
-	from = pos & (PAGE_CACHE_SIZE - 1);
+	index = pos >> PAGE_SHIFT;
+	from = pos & (PAGE_SIZE - 1);
 	to = from + len;
 
 	if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
@@ -1186,7 +1186,7 @@
 retry_journal:
 	handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
 	if (IS_ERR(handle)) {
-		page_cache_release(page);
+		put_page(page);
 		return PTR_ERR(handle);
 	}
 
@@ -1194,7 +1194,7 @@
 	if (page->mapping != mapping) {
 		/* The page got truncated from under us */
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		ext4_journal_stop(handle);
 		goto retry_grab;
 	}
@@ -1250,7 +1250,7 @@
 		if (ret == -ENOSPC &&
 		    ext4_should_retry_alloc(inode->i_sb, &retries))
 			goto retry_journal;
-		page_cache_release(page);
+		put_page(page);
 		return ret;
 	}
 	*pagep = page;
@@ -1293,7 +1293,7 @@
 		ret = ext4_jbd2_file_inode(handle, inode);
 		if (ret) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			goto errout;
 		}
 	}
@@ -1313,7 +1313,7 @@
 	 */
 	i_size_changed = ext4_update_inode_size(inode, pos + copied);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	if (old_size < pos)
 		pagecache_isize_extended(inode, old_size, pos);
@@ -1397,7 +1397,7 @@
 	int size_changed = 0;
 
 	trace_ext4_journalled_write_end(inode, pos, len, copied);
-	from = pos & (PAGE_CACHE_SIZE - 1);
+	from = pos & (PAGE_SIZE - 1);
 	to = from + len;
 
 	BUG_ON(!ext4_handle_valid(handle));
@@ -1421,7 +1421,7 @@
 	ext4_set_inode_state(inode, EXT4_STATE_JDATA);
 	EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	if (old_size < pos)
 		pagecache_isize_extended(inode, old_size, pos);
@@ -1535,7 +1535,7 @@
 	int num_clusters;
 	ext4_fsblk_t lblk;
 
-	BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
+	BUG_ON(stop > PAGE_SIZE || stop < length);
 
 	head = page_buffers(page);
 	bh = head;
@@ -1551,7 +1551,7 @@
 			clear_buffer_delay(bh);
 		} else if (contiguous_blks) {
 			lblk = page->index <<
-			       (PAGE_CACHE_SHIFT - inode->i_blkbits);
+			       (PAGE_SHIFT - inode->i_blkbits);
 			lblk += (curr_off >> inode->i_blkbits) -
 				contiguous_blks;
 			ext4_es_remove_extent(inode, lblk, contiguous_blks);
@@ -1561,7 +1561,7 @@
 	} while ((bh = bh->b_this_page) != head);
 
 	if (contiguous_blks) {
-		lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+		lblk = page->index << (PAGE_SHIFT - inode->i_blkbits);
 		lblk += (curr_off >> inode->i_blkbits) - contiguous_blks;
 		ext4_es_remove_extent(inode, lblk, contiguous_blks);
 	}
@@ -1570,7 +1570,7 @@
 	 * need to release the reserved space for that cluster. */
 	num_clusters = EXT4_NUM_B2C(sbi, to_release);
 	while (num_clusters > 0) {
-		lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
+		lblk = (page->index << (PAGE_SHIFT - inode->i_blkbits)) +
 			((num_clusters - 1) << sbi->s_cluster_bits);
 		if (sbi->s_cluster_ratio == 1 ||
 		    !ext4_find_delalloc_cluster(inode, lblk))
@@ -1617,8 +1617,8 @@
 	end   = mpd->next_page - 1;
 	if (invalidate) {
 		ext4_lblk_t start, last;
-		start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
-		last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+		start = index << (PAGE_SHIFT - inode->i_blkbits);
+		last = end << (PAGE_SHIFT - inode->i_blkbits);
 		ext4_es_remove_extent(inode, start, last - start + 1);
 	}
 
@@ -1634,7 +1634,7 @@
 			BUG_ON(!PageLocked(page));
 			BUG_ON(PageWriteback(page));
 			if (invalidate) {
-				block_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+				block_invalidatepage(page, 0, PAGE_SIZE);
 				ClearPageUptodate(page);
 			}
 			unlock_page(page);
@@ -2005,10 +2005,10 @@
 
 	trace_ext4_writepage(page);
 	size = i_size_read(inode);
-	if (page->index == size >> PAGE_CACHE_SHIFT)
-		len = size & ~PAGE_CACHE_MASK;
+	if (page->index == size >> PAGE_SHIFT)
+		len = size & ~PAGE_MASK;
 	else
-		len = PAGE_CACHE_SIZE;
+		len = PAGE_SIZE;
 
 	page_bufs = page_buffers(page);
 	/*
@@ -2032,7 +2032,7 @@
 				   ext4_bh_delay_or_unwritten)) {
 		redirty_page_for_writepage(wbc, page);
 		if ((current->flags & PF_MEMALLOC) ||
-		    (inode->i_sb->s_blocksize == PAGE_CACHE_SIZE)) {
+		    (inode->i_sb->s_blocksize == PAGE_SIZE)) {
 			/*
 			 * For memory cleaning there's no point in writing only
 			 * some buffers. So just bail out. Warn if we came here
@@ -2074,10 +2074,10 @@
 	int err;
 
 	BUG_ON(page->index != mpd->first_page);
-	if (page->index == size >> PAGE_CACHE_SHIFT)
-		len = size & ~PAGE_CACHE_MASK;
+	if (page->index == size >> PAGE_SHIFT)
+		len = size & ~PAGE_MASK;
 	else
-		len = PAGE_CACHE_SIZE;
+		len = PAGE_SIZE;
 	clear_page_dirty_for_io(page);
 	err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
 	if (!err)
@@ -2211,7 +2211,7 @@
 	int nr_pages, i;
 	struct inode *inode = mpd->inode;
 	struct buffer_head *head, *bh;
-	int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
+	int bpp_bits = PAGE_SHIFT - inode->i_blkbits;
 	pgoff_t start, end;
 	ext4_lblk_t lblk;
 	sector_t pblock;
@@ -2272,7 +2272,7 @@
 			 * supports blocksize < pagesize as we will try to
 			 * convert potentially unmapped parts of inode.
 			 */
-			mpd->io_submit.io_end->size += PAGE_CACHE_SIZE;
+			mpd->io_submit.io_end->size += PAGE_SIZE;
 			/* Page fully mapped - let IO run! */
 			err = mpage_submit_page(mpd, page);
 			if (err < 0) {
@@ -2424,7 +2424,7 @@
 	 * Update on-disk size after IO is submitted.  Races with
 	 * truncate are avoided by checking i_size under i_data_sem.
 	 */
-	disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
+	disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT;
 	if (disksize > EXT4_I(inode)->i_disksize) {
 		int err2;
 		loff_t i_size;
@@ -2560,7 +2560,7 @@
 			mpd->next_page = page->index + 1;
 			/* Add all dirty buffers to mpd */
 			lblk = ((ext4_lblk_t)page->index) <<
-				(PAGE_CACHE_SHIFT - blkbits);
+				(PAGE_SHIFT - blkbits);
 			head = page_buffers(page);
 			err = mpage_process_page_bufs(mpd, head, head, lblk);
 			if (err <= 0)
@@ -2645,7 +2645,7 @@
 		 * We may need to convert up to one extent per block in
 		 * the page and we may dirty the inode.
 		 */
-		rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits);
+		rsv_blocks = 1 + (PAGE_SIZE >> inode->i_blkbits);
 	}
 
 	/*
@@ -2676,8 +2676,8 @@
 		mpd.first_page = writeback_index;
 		mpd.last_page = -1;
 	} else {
-		mpd.first_page = wbc->range_start >> PAGE_CACHE_SHIFT;
-		mpd.last_page = wbc->range_end >> PAGE_CACHE_SHIFT;
+		mpd.first_page = wbc->range_start >> PAGE_SHIFT;
+		mpd.last_page = wbc->range_end >> PAGE_SHIFT;
 	}
 
 	mpd.inode = inode;
@@ -2836,7 +2836,7 @@
 	struct inode *inode = mapping->host;
 	handle_t *handle;
 
-	index = pos >> PAGE_CACHE_SHIFT;
+	index = pos >> PAGE_SHIFT;
 
 	if (ext4_nonda_switch(inode->i_sb)) {
 		*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
@@ -2879,7 +2879,7 @@
 	handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
 				ext4_da_write_credits(inode, pos, len));
 	if (IS_ERR(handle)) {
-		page_cache_release(page);
+		put_page(page);
 		return PTR_ERR(handle);
 	}
 
@@ -2887,7 +2887,7 @@
 	if (page->mapping != mapping) {
 		/* The page got truncated from under us */
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		ext4_journal_stop(handle);
 		goto retry_grab;
 	}
@@ -2915,7 +2915,7 @@
 		    ext4_should_retry_alloc(inode->i_sb, &retries))
 			goto retry_journal;
 
-		page_cache_release(page);
+		put_page(page);
 		return ret;
 	}
 
@@ -2963,7 +2963,7 @@
 				      len, copied, page, fsdata);
 
 	trace_ext4_da_write_end(inode, pos, len, copied);
-	start = pos & (PAGE_CACHE_SIZE - 1);
+	start = pos & (PAGE_SIZE - 1);
 	end = start + copied - 1;
 
 	/*
@@ -3185,7 +3185,7 @@
 	/*
 	 * If it's a full truncate we just forget about the pending dirtying
 	 */
-	if (offset == 0 && length == PAGE_CACHE_SIZE)
+	if (offset == 0 && length == PAGE_SIZE)
 		ClearPageChecked(page);
 
 	return jbd2_journal_invalidatepage(journal, page, offset, length);
@@ -3554,8 +3554,8 @@
 static int __ext4_block_zero_page_range(handle_t *handle,
 		struct address_space *mapping, loff_t from, loff_t length)
 {
-	ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
-	unsigned offset = from & (PAGE_CACHE_SIZE-1);
+	ext4_fsblk_t index = from >> PAGE_SHIFT;
+	unsigned offset = from & (PAGE_SIZE-1);
 	unsigned blocksize, pos;
 	ext4_lblk_t iblock;
 	struct inode *inode = mapping->host;
@@ -3563,14 +3563,14 @@
 	struct page *page;
 	int err = 0;
 
-	page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
+	page = find_or_create_page(mapping, from >> PAGE_SHIFT,
 				   mapping_gfp_constraint(mapping, ~__GFP_FS));
 	if (!page)
 		return -ENOMEM;
 
 	blocksize = inode->i_sb->s_blocksize;
 
-	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+	iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
 
 	if (!page_has_buffers(page))
 		create_empty_buffers(page, blocksize, 0);
@@ -3612,7 +3612,7 @@
 		    ext4_encrypted_inode(inode)) {
 			/* We expect the key to be set. */
 			BUG_ON(!ext4_has_encryption_key(inode));
-			BUG_ON(blocksize != PAGE_CACHE_SIZE);
+			BUG_ON(blocksize != PAGE_SIZE);
 			WARN_ON_ONCE(ext4_decrypt(page));
 		}
 	}
@@ -3636,7 +3636,7 @@
 
 unlock:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return err;
 }
 
@@ -3651,7 +3651,7 @@
 		struct address_space *mapping, loff_t from, loff_t length)
 {
 	struct inode *inode = mapping->host;
-	unsigned offset = from & (PAGE_CACHE_SIZE-1);
+	unsigned offset = from & (PAGE_SIZE-1);
 	unsigned blocksize = inode->i_sb->s_blocksize;
 	unsigned max = blocksize - (offset & (blocksize - 1));
 
@@ -3676,7 +3676,7 @@
 static int ext4_block_truncate_page(handle_t *handle,
 		struct address_space *mapping, loff_t from)
 {
-	unsigned offset = from & (PAGE_CACHE_SIZE-1);
+	unsigned offset = from & (PAGE_SIZE-1);
 	unsigned length;
 	unsigned blocksize;
 	struct inode *inode = mapping->host;
@@ -3814,7 +3814,7 @@
 	 */
 	if (offset + length > inode->i_size) {
 		length = inode->i_size +
-		   PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
+		   PAGE_SIZE - (inode->i_size & (PAGE_SIZE - 1)) -
 		   offset;
 	}
 
@@ -4889,23 +4889,23 @@
 	tid_t commit_tid = 0;
 	int ret;
 
-	offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
+	offset = inode->i_size & (PAGE_SIZE - 1);
 	/*
 	 * All buffers in the last page remain valid? Then there's nothing to
-	 * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
+	 * do. We do the check mainly to optimize the common PAGE_SIZE ==
 	 * blocksize case
 	 */
-	if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits))
+	if (offset > PAGE_SIZE - (1 << inode->i_blkbits))
 		return;
 	while (1) {
 		page = find_lock_page(inode->i_mapping,
-				      inode->i_size >> PAGE_CACHE_SHIFT);
+				      inode->i_size >> PAGE_SHIFT);
 		if (!page)
 			return;
 		ret = __ext4_journalled_invalidatepage(page, offset,
-						PAGE_CACHE_SIZE - offset);
+						PAGE_SIZE - offset);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		if (ret != -EBUSY)
 			return;
 		commit_tid = 0;
@@ -5544,10 +5544,10 @@
 		goto out;
 	}
 
-	if (page->index == size >> PAGE_CACHE_SHIFT)
-		len = size & ~PAGE_CACHE_MASK;
+	if (page->index == size >> PAGE_SHIFT)
+		len = size & ~PAGE_MASK;
 	else
-		len = PAGE_CACHE_SIZE;
+		len = PAGE_SIZE;
 	/*
 	 * Return if we have all the buffers mapped. This avoids the need to do
 	 * journal_start/journal_stop which can block and take a long time
@@ -5578,7 +5578,7 @@
 	ret = block_page_mkwrite(vma, vmf, get_block);
 	if (!ret && ext4_should_journal_data(inode)) {
 		if (ext4_walk_page_buffers(handle, page_buffers(page), 0,
-			  PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
+			  PAGE_SIZE, NULL, do_journal_get_write_access)) {
 			unlock_page(page);
 			ret = VM_FAULT_SIGBUS;
 			ext4_journal_stop(handle);

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 50e05df..eeeade7 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c

@@ -119,7 +119,7 @@
  *
  *
  * one block each for bitmap and buddy information.  So for each group we
- * take up 2 blocks. A page can contain blocks_per_page (PAGE_CACHE_SIZE /
+ * take up 2 blocks. A page can contain blocks_per_page (PAGE_SIZE /
  * blocksize) blocks.  So it can have information regarding groups_per_page
  * which is blocks_per_page/2
  *
@@ -807,7 +807,7 @@
  *
  * one block each for bitmap and buddy information.
  * So for each group we take up 2 blocks. A page can
- * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize)  blocks.
+ * contain blocks_per_page (PAGE_SIZE / blocksize)  blocks.
  * So it can have information regarding groups_per_page which
  * is blocks_per_page/2
  *
@@ -839,7 +839,7 @@
 	sb = inode->i_sb;
 	ngroups = ext4_get_groups_count(sb);
 	blocksize = 1 << inode->i_blkbits;
-	blocks_per_page = PAGE_CACHE_SIZE / blocksize;
+	blocks_per_page = PAGE_SIZE / blocksize;
 
 	groups_per_page = blocks_per_page >> 1;
 	if (groups_per_page == 0)
@@ -993,7 +993,7 @@
 	e4b->bd_buddy_page = NULL;
 	e4b->bd_bitmap_page = NULL;
 
-	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+	blocks_per_page = PAGE_SIZE / sb->s_blocksize;
 	/*
 	 * the buddy cache inode stores the block bitmap
 	 * and buddy information in consecutive blocks.
@@ -1028,11 +1028,11 @@
 {
 	if (e4b->bd_bitmap_page) {
 		unlock_page(e4b->bd_bitmap_page);
-		page_cache_release(e4b->bd_bitmap_page);
+		put_page(e4b->bd_bitmap_page);
 	}
 	if (e4b->bd_buddy_page) {
 		unlock_page(e4b->bd_buddy_page);
-		page_cache_release(e4b->bd_buddy_page);
+		put_page(e4b->bd_buddy_page);
 	}
 }
 
@@ -1125,7 +1125,7 @@
 	might_sleep();
 	mb_debug(1, "load group %u\n", group);
 
-	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+	blocks_per_page = PAGE_SIZE / sb->s_blocksize;
 	grp = ext4_get_group_info(sb, group);
 
 	e4b->bd_blkbits = sb->s_blocksize_bits;
@@ -1167,7 +1167,7 @@
 			 * is yet to initialize the same. So
 			 * wait for it to initialize.
 			 */
-			page_cache_release(page);
+			put_page(page);
 		page = find_or_create_page(inode->i_mapping, pnum, gfp);
 		if (page) {
 			BUG_ON(page->mapping != inode->i_mapping);
@@ -1203,7 +1203,7 @@
 	page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
 	if (page == NULL || !PageUptodate(page)) {
 		if (page)
-			page_cache_release(page);
+			put_page(page);
 		page = find_or_create_page(inode->i_mapping, pnum, gfp);
 		if (page) {
 			BUG_ON(page->mapping != inode->i_mapping);
@@ -1238,11 +1238,11 @@
 
 err:
 	if (page)
-		page_cache_release(page);
+		put_page(page);
 	if (e4b->bd_bitmap_page)
-		page_cache_release(e4b->bd_bitmap_page);
+		put_page(e4b->bd_bitmap_page);
 	if (e4b->bd_buddy_page)
-		page_cache_release(e4b->bd_buddy_page);
+		put_page(e4b->bd_buddy_page);
 	e4b->bd_buddy = NULL;
 	e4b->bd_bitmap = NULL;
 	return ret;
@@ -1257,9 +1257,9 @@
 static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
 {
 	if (e4b->bd_bitmap_page)
-		page_cache_release(e4b->bd_bitmap_page);
+		put_page(e4b->bd_bitmap_page);
 	if (e4b->bd_buddy_page)
-		page_cache_release(e4b->bd_buddy_page);
+		put_page(e4b->bd_buddy_page);
 }
 
 
@@ -2833,8 +2833,8 @@
 		/* No more items in the per group rb tree
 		 * balance refcounts from ext4_mb_free_metadata()
 		 */
-		page_cache_release(e4b.bd_buddy_page);
-		page_cache_release(e4b.bd_bitmap_page);
+		put_page(e4b.bd_buddy_page);
+		put_page(e4b.bd_bitmap_page);
 	}
 	ext4_unlock_group(sb, entry->efd_group);
 	kmem_cache_free(ext4_free_data_cachep, entry);
@@ -4385,9 +4385,9 @@
 		ext4_mb_put_pa(ac, ac->ac_sb, pa);
 	}
 	if (ac->ac_bitmap_page)
-		page_cache_release(ac->ac_bitmap_page);
+		put_page(ac->ac_bitmap_page);
 	if (ac->ac_buddy_page)
-		page_cache_release(ac->ac_buddy_page);
+		put_page(ac->ac_buddy_page);
 	if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
 		mutex_unlock(&ac->ac_lg->lg_mutex);
 	ext4_mb_collect_stats(ac);
@@ -4599,8 +4599,8 @@
 		 * otherwise we'll refresh it from
 		 * on-disk bitmap and lose not-yet-available
 		 * blocks */
-		page_cache_get(e4b->bd_buddy_page);
-		page_cache_get(e4b->bd_bitmap_page);
+		get_page(e4b->bd_buddy_page);
+		get_page(e4b->bd_bitmap_page);
 	}
 	while (*n) {
 		parent = *n;

diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 796ff0e..325cef4 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c

@@ -156,7 +156,7 @@
 	page[1] = grab_cache_page_write_begin(mapping[1], index2, fl);
 	if (!page[1]) {
 		unlock_page(page[0]);
-		page_cache_release(page[0]);
+		put_page(page[0]);
 		return -ENOMEM;
 	}
 	/*
@@ -192,7 +192,7 @@
 		create_empty_buffers(page, blocksize, 0);
 
 	head = page_buffers(page);
-	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	block = (sector_t)page->index << (PAGE_SHIFT - inode->i_blkbits);
 	for (bh = head, block_start = 0; bh != head || !block_start;
 	     block++, block_start = block_end, bh = bh->b_this_page) {
 		block_end = block_start + blocksize;
@@ -268,7 +268,7 @@
 	int i, err2, jblocks, retries = 0;
 	int replaced_count = 0;
 	int from = data_offset_in_page << orig_inode->i_blkbits;
-	int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
+	int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
 	struct super_block *sb = orig_inode->i_sb;
 	struct buffer_head *bh = NULL;
 
@@ -404,9 +404,9 @@
 
 unlock_pages:
 	unlock_page(pagep[0]);
-	page_cache_release(pagep[0]);
+	put_page(pagep[0]);
 	unlock_page(pagep[1]);
-	page_cache_release(pagep[1]);
+	put_page(pagep[1]);
 stop_journal:
 	ext4_journal_stop(handle);
 	if (*err == -ENOSPC &&
@@ -561,7 +561,7 @@
 	struct inode *orig_inode = file_inode(o_filp);
 	struct inode *donor_inode = file_inode(d_filp);
 	struct ext4_ext_path *path = NULL;
-	int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
+	int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
 	ext4_lblk_t o_end, o_start = orig_blk;
 	ext4_lblk_t d_start = donor_blk;
 	int ret;
@@ -655,9 +655,9 @@
 		if (o_end - o_start < cur_len)
 			cur_len = o_end - o_start;
 
-		orig_page_index = o_start >> (PAGE_CACHE_SHIFT -
+		orig_page_index = o_start >> (PAGE_SHIFT -
 					       orig_inode->i_blkbits);
-		donor_page_index = d_start >> (PAGE_CACHE_SHIFT -
+		donor_page_index = d_start >> (PAGE_SHIFT -
 					       donor_inode->i_blkbits);
 		offset_in_page = o_start % blocks_per_page;
 		if (cur_len > blocks_per_page- offset_in_page)

diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index b39d9c7..e4fc8ea 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c

@@ -433,8 +433,8 @@
 	 * the page size, the remaining memory is zeroed when mapped, and
 	 * writes to that region are not written out to the file."
 	 */
-	if (len < PAGE_CACHE_SIZE)
-		zero_user_segment(page, len, PAGE_CACHE_SIZE);
+	if (len < PAGE_SIZE)
+		zero_user_segment(page, len, PAGE_SIZE);
 	/*
 	 * In the first loop we prepare and mark buffers to submit. We have to
 	 * mark all buffers in the page before submitting so that

diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index bc7642f..dc54a4b 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c

@@ -23,7 +23,7 @@
  *
  * then this code just gives up and calls the buffer_head-based read function.
  * It does handle a page which has holes at the end - that is a common case:
- * the end-of-file on blocksize < PAGE_CACHE_SIZE setups.
+ * the end-of-file on blocksize < PAGE_SIZE setups.
  *
  */
 
@@ -140,7 +140,7 @@
 
 	struct inode *inode = mapping->host;
 	const unsigned blkbits = inode->i_blkbits;
-	const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
+	const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
 	const unsigned blocksize = 1 << blkbits;
 	sector_t block_in_file;
 	sector_t last_block;
@@ -173,7 +173,7 @@
 		if (page_has_buffers(page))
 			goto confused;
 
-		block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
+		block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
 		last_block = block_in_file + nr_pages * blocks_per_page;
 		last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
 		if (last_block > last_block_in_file)
@@ -217,7 +217,7 @@
 				set_error_page:
 					SetPageError(page);
 					zero_user_segment(page, 0,
-							  PAGE_CACHE_SIZE);
+							  PAGE_SIZE);
 					unlock_page(page);
 					goto next_page;
 				}
@@ -250,7 +250,7 @@
 		}
 		if (first_hole != blocks_per_page) {
 			zero_user_segment(page, first_hole << blkbits,
-					  PAGE_CACHE_SIZE);
+					  PAGE_SIZE);
 			if (first_hole == 0) {
 				SetPageUptodate(page);
 				unlock_page(page);
@@ -319,7 +319,7 @@
 			unlock_page(page);
 	next_page:
 		if (pages)
-			page_cache_release(page);
+			put_page(page);
 	}
 	BUG_ON(pages && !list_empty(pages));
 	if (bio)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index df21361..304c712 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c

@@ -1785,7 +1785,7 @@
 		int blocksize =
 			BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
 
-		if (blocksize < PAGE_CACHE_SIZE) {
+		if (blocksize < PAGE_SIZE) {
 			ext4_msg(sb, KERN_ERR, "can't mount with "
 				 "dioread_nolock if block size != PAGE_SIZE");
 			return 0;
@@ -3809,7 +3809,7 @@
 	}
 
 	if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
-	    (blocksize != PAGE_CACHE_SIZE)) {
+	    (blocksize != PAGE_SIZE)) {
 		ext4_msg(sb, KERN_ERR,
 			 "Unsupported blocksize for fs encryption");
 		goto failed_mount_wq;

diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 6f7ee30..75ed5c2 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c

@@ -80,12 +80,12 @@
 	if (res <= plen)
 		paddr[res] = '\0';
 	if (cpage)
-		page_cache_release(cpage);
+		put_page(cpage);
 	set_delayed_call(done, kfree_link, paddr);
 	return paddr;
 errout:
 	if (cpage)
-		page_cache_release(cpage);
+		put_page(cpage);
 	kfree(paddr);
 	return ERR_PTR(res);
 }

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index e5c762b..53fec08 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c

@@ -223,7 +223,7 @@
 	/* Allocate a new bio */
 	bio = __bio_alloc(fio->sbi, fio->new_blkaddr, 1, is_read_io(fio->rw));
 
-	if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
+	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
 		bio_put(bio);
 		return -EFAULT;
 	}
@@ -265,8 +265,8 @@
 
 	bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
 
-	if (bio_add_page(io->bio, bio_page, PAGE_CACHE_SIZE, 0) <
-							PAGE_CACHE_SIZE) {
+	if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) <
+							PAGE_SIZE) {
 		__submit_merged_bio(io);
 		goto alloc_new;
 	}
@@ -406,7 +406,7 @@
 	 * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
 	 */
 	if (dn.data_blkaddr == NEW_ADDR) {
-		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+		zero_user_segment(page, 0, PAGE_SIZE);
 		SetPageUptodate(page);
 		unlock_page(page);
 		return page;
@@ -517,7 +517,7 @@
 		goto got_it;
 
 	if (dn.data_blkaddr == NEW_ADDR) {
-		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+		zero_user_segment(page, 0, PAGE_SIZE);
 		SetPageUptodate(page);
 	} else {
 		f2fs_put_page(page, 1);
@@ -530,8 +530,8 @@
 	}
 got_it:
 	if (new_i_size && i_size_read(inode) <
-				((loff_t)(index + 1) << PAGE_CACHE_SHIFT)) {
-		i_size_write(inode, ((loff_t)(index + 1) << PAGE_CACHE_SHIFT));
+				((loff_t)(index + 1) << PAGE_SHIFT)) {
+		i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
 		/* Only the directory inode sets new_i_size */
 		set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
 	}
@@ -570,9 +570,9 @@
 	/* update i_size */
 	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
 							dn->ofs_in_node;
-	if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT))
+	if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT))
 		i_size_write(dn->inode,
-				((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT));
+				((loff_t)(fofs + 1) << PAGE_SHIFT));
 	return 0;
 }
 
@@ -971,7 +971,7 @@
 				goto confused;
 			}
 		} else {
-			zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+			zero_user_segment(page, 0, PAGE_SIZE);
 			SetPageUptodate(page);
 			unlock_page(page);
 			goto next_page;
@@ -1021,7 +1021,7 @@
 		goto next_page;
 set_error_page:
 		SetPageError(page);
-		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+		zero_user_segment(page, 0, PAGE_SIZE);
 		unlock_page(page);
 		goto next_page;
 confused:
@@ -1032,7 +1032,7 @@
 		unlock_page(page);
 next_page:
 		if (pages)
-			page_cache_release(page);
+			put_page(page);
 	}
 	BUG_ON(pages && !list_empty(pages));
 	if (bio)
@@ -1136,7 +1136,7 @@
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	loff_t i_size = i_size_read(inode);
 	const pgoff_t end_index = ((unsigned long long) i_size)
-							>> PAGE_CACHE_SHIFT;
+							>> PAGE_SHIFT;
 	unsigned offset = 0;
 	bool need_balance_fs = false;
 	int err = 0;
@@ -1157,11 +1157,11 @@
 	 * If the offset is out-of-range of file size,
 	 * this page does not have to be written to disk.
 	 */
-	offset = i_size & (PAGE_CACHE_SIZE - 1);
+	offset = i_size & (PAGE_SIZE - 1);
 	if ((page->index >= end_index + 1) || !offset)
 		goto out;
 
-	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+	zero_user_segment(page, offset, PAGE_SIZE);
 write:
 	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
 		goto redirty_out;
@@ -1267,8 +1267,8 @@
 			cycled = 0;
 		end = -1;
 	} else {
-		index = wbc->range_start >> PAGE_CACHE_SHIFT;
-		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		index = wbc->range_start >> PAGE_SHIFT;
+		end = wbc->range_end >> PAGE_SHIFT;
 		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
 			range_whole = 1;
 		cycled = 1; /* ignore range_cyclic tests */
@@ -1448,11 +1448,11 @@
 	 * the block addresses when there is no need to fill the page.
 	 */
 	if (!f2fs_has_inline_data(inode) && !f2fs_encrypted_inode(inode) &&
-					len == PAGE_CACHE_SIZE)
+					len == PAGE_SIZE)
 		return 0;
 
 	if (f2fs_has_inline_data(inode) ||
-			(pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
+			(pos & PAGE_MASK) >= i_size_read(inode)) {
 		f2fs_lock_op(sbi);
 		locked = true;
 	}
@@ -1513,7 +1513,7 @@
 	struct inode *inode = mapping->host;
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct page *page = NULL;
-	pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
+	pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
 	bool need_balance = false;
 	block_t blkaddr = NULL_ADDR;
 	int err = 0;
@@ -1561,22 +1561,22 @@
 	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
 		f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr);
 
-	if (len == PAGE_CACHE_SIZE)
+	if (len == PAGE_SIZE)
 		goto out_update;
 	if (PageUptodate(page))
 		goto out_clear;
 
-	if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
-		unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+	if ((pos & PAGE_MASK) >= i_size_read(inode)) {
+		unsigned start = pos & (PAGE_SIZE - 1);
 		unsigned end = start + len;
 
 		/* Reading beyond i_size is simple: memset to zero */
-		zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
+		zero_user_segments(page, 0, start, end, PAGE_SIZE);
 		goto out_update;
 	}
 
 	if (blkaddr == NEW_ADDR) {
-		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+		zero_user_segment(page, 0, PAGE_SIZE);
 	} else {
 		struct f2fs_io_info fio = {
 			.sbi = sbi,
@@ -1688,7 +1688,7 @@
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 
 	if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
-		(offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE))
+		(offset % PAGE_SIZE || length != PAGE_SIZE))
 		return;
 
 	if (PageDirty(page)) {

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 4fb6ef8..f4a61a5 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c

@@ -164,7 +164,7 @@
 
 	/* build curseg */
 	si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE;
-	si->base_mem += PAGE_CACHE_SIZE * NR_CURSEG_TYPE;
+	si->base_mem += PAGE_SIZE * NR_CURSEG_TYPE;
 
 	/* build dirty segmap */
 	si->base_mem += sizeof(struct dirty_seglist_info);
@@ -201,9 +201,9 @@
 
 	si->page_mem = 0;
 	npages = NODE_MAPPING(sbi)->nrpages;
-	si->page_mem += (unsigned long long)npages << PAGE_CACHE_SHIFT;
+	si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
 	npages = META_MAPPING(sbi)->nrpages;
-	si->page_mem += (unsigned long long)npages << PAGE_CACHE_SHIFT;
+	si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
 }
 
 static int stat_show(struct seq_file *s, void *v)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 80641ad..af81957 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c

@@ -17,8 +17,8 @@
 
 static unsigned long dir_blocks(struct inode *inode)
 {
-	return ((unsigned long long) (i_size_read(inode) + PAGE_CACHE_SIZE - 1))
-							>> PAGE_CACHE_SHIFT;
+	return ((unsigned long long) (i_size_read(inode) + PAGE_SIZE - 1))
+							>> PAGE_SHIFT;
 }
 
 static unsigned int dir_buckets(unsigned int level, int dir_level)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index bbe2cd1..7a4558d 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h

@@ -1294,7 +1294,7 @@
 		f2fs_bug_on(F2FS_P_SB(page), !PageLocked(page));
 		unlock_page(page);
 	}
-	page_cache_release(page);
+	put_page(page);
 }
 
 static inline void f2fs_put_dnode(struct dnode_of_data *dn)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index b41c357..443e077 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c

@@ -74,11 +74,11 @@
 		goto mapped;
 
 	/* page is wholly or partially inside EOF */
-	if (((loff_t)(page->index + 1) << PAGE_CACHE_SHIFT) >
+	if (((loff_t)(page->index + 1) << PAGE_SHIFT) >
 						i_size_read(inode)) {
 		unsigned offset;
-		offset = i_size_read(inode) & ~PAGE_CACHE_MASK;
-		zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+		offset = i_size_read(inode) & ~PAGE_MASK;
+		zero_user_segment(page, offset, PAGE_SIZE);
 	}
 	set_page_dirty(page);
 	SetPageUptodate(page);
@@ -346,11 +346,11 @@
 		goto found;
 	}
 
-	pgofs = (pgoff_t)(offset >> PAGE_CACHE_SHIFT);
+	pgofs = (pgoff_t)(offset >> PAGE_SHIFT);
 
 	dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence);
 
-	for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_CACHE_SHIFT) {
+	for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
 		set_new_dnode(&dn, inode, NULL, NULL, 0);
 		err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
 		if (err && err != -ENOENT) {
@@ -370,7 +370,7 @@
 		/* find data/hole in dnode block */
 		for (; dn.ofs_in_node < end_offset;
 				dn.ofs_in_node++, pgofs++,
-				data_ofs = (loff_t)pgofs << PAGE_CACHE_SHIFT) {
+				data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
 			block_t blkaddr;
 			blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
 
@@ -508,8 +508,8 @@
 static int truncate_partial_data_page(struct inode *inode, u64 from,
 								bool cache_only)
 {
-	unsigned offset = from & (PAGE_CACHE_SIZE - 1);
-	pgoff_t index = from >> PAGE_CACHE_SHIFT;
+	unsigned offset = from & (PAGE_SIZE - 1);
+	pgoff_t index = from >> PAGE_SHIFT;
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
 
@@ -529,7 +529,7 @@
 		return 0;
 truncate_out:
 	f2fs_wait_on_page_writeback(page, DATA, true);
-	zero_user(page, offset, PAGE_CACHE_SIZE - offset);
+	zero_user(page, offset, PAGE_SIZE - offset);
 	if (!cache_only || !f2fs_encrypted_inode(inode) ||
 					!S_ISREG(inode->i_mode))
 		set_page_dirty(page);
@@ -799,11 +799,11 @@
 	if (ret)
 		return ret;
 
-	pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
-	pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
+	pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
+	pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
 
-	off_start = offset & (PAGE_CACHE_SIZE - 1);
-	off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
+	off_start = offset & (PAGE_SIZE - 1);
+	off_end = (offset + len) & (PAGE_SIZE - 1);
 
 	if (pg_start == pg_end) {
 		ret = fill_zero(inode, pg_start, off_start,
@@ -813,7 +813,7 @@
 	} else {
 		if (off_start) {
 			ret = fill_zero(inode, pg_start++, off_start,
-						PAGE_CACHE_SIZE - off_start);
+						PAGE_SIZE - off_start);
 			if (ret)
 				return ret;
 		}
@@ -830,8 +830,8 @@
 
 			f2fs_balance_fs(sbi, true);
 
-			blk_start = (loff_t)pg_start << PAGE_CACHE_SHIFT;
-			blk_end = (loff_t)pg_end << PAGE_CACHE_SHIFT;
+			blk_start = (loff_t)pg_start << PAGE_SHIFT;
+			blk_end = (loff_t)pg_end << PAGE_SHIFT;
 			truncate_inode_pages_range(mapping, blk_start,
 					blk_end - 1);
 
@@ -954,8 +954,8 @@
 	if (ret)
 		return ret;
 
-	pg_start = offset >> PAGE_CACHE_SHIFT;
-	pg_end = (offset + len) >> PAGE_CACHE_SHIFT;
+	pg_start = offset >> PAGE_SHIFT;
+	pg_end = (offset + len) >> PAGE_SHIFT;
 
 	/* write out all dirty pages from offset */
 	ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
@@ -1006,11 +1006,11 @@
 
 	truncate_pagecache_range(inode, offset, offset + len - 1);
 
-	pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
-	pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
+	pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
+	pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
 
-	off_start = offset & (PAGE_CACHE_SIZE - 1);
-	off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
+	off_start = offset & (PAGE_SIZE - 1);
+	off_end = (offset + len) & (PAGE_SIZE - 1);
 
 	if (pg_start == pg_end) {
 		ret = fill_zero(inode, pg_start, off_start,
@@ -1024,12 +1024,12 @@
 	} else {
 		if (off_start) {
 			ret = fill_zero(inode, pg_start++, off_start,
-						PAGE_CACHE_SIZE - off_start);
+						PAGE_SIZE - off_start);
 			if (ret)
 				return ret;
 
 			new_size = max_t(loff_t, new_size,
-					(loff_t)pg_start << PAGE_CACHE_SHIFT);
+					(loff_t)pg_start << PAGE_SHIFT);
 		}
 
 		for (index = pg_start; index < pg_end; index++) {
@@ -1060,7 +1060,7 @@
 			f2fs_unlock_op(sbi);
 
 			new_size = max_t(loff_t, new_size,
-				(loff_t)(index + 1) << PAGE_CACHE_SHIFT);
+				(loff_t)(index + 1) << PAGE_SHIFT);
 		}
 
 		if (off_end) {
@@ -1117,8 +1117,8 @@
 
 	truncate_pagecache(inode, offset);
 
-	pg_start = offset >> PAGE_CACHE_SHIFT;
-	pg_end = (offset + len) >> PAGE_CACHE_SHIFT;
+	pg_start = offset >> PAGE_SHIFT;
+	pg_end = (offset + len) >> PAGE_SHIFT;
 	delta = pg_end - pg_start;
 	nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
 
@@ -1158,11 +1158,11 @@
 
 	f2fs_balance_fs(sbi, true);
 
-	pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
-	pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
+	pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
+	pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
 
-	off_start = offset & (PAGE_CACHE_SIZE - 1);
-	off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
+	off_start = offset & (PAGE_SIZE - 1);
+	off_end = (offset + len) & (PAGE_SIZE - 1);
 
 	f2fs_lock_op(sbi);
 
@@ -1180,12 +1180,12 @@
 		if (pg_start == pg_end)
 			new_size = offset + len;
 		else if (index == pg_start && off_start)
-			new_size = (loff_t)(index + 1) << PAGE_CACHE_SHIFT;
+			new_size = (loff_t)(index + 1) << PAGE_SHIFT;
 		else if (index == pg_end)
-			new_size = ((loff_t)index << PAGE_CACHE_SHIFT) +
+			new_size = ((loff_t)index << PAGE_SHIFT) +
 								off_end;
 		else
-			new_size += PAGE_CACHE_SIZE;
+			new_size += PAGE_SIZE;
 	}
 
 	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
@@ -1652,8 +1652,8 @@
 	if (need_inplace_update(inode))
 		return -EINVAL;
 
-	pg_start = range->start >> PAGE_CACHE_SHIFT;
-	pg_end = (range->start + range->len) >> PAGE_CACHE_SHIFT;
+	pg_start = range->start >> PAGE_SHIFT;
+	pg_end = (range->start + range->len) >> PAGE_SHIFT;
 
 	f2fs_balance_fs(sbi, true);
 
@@ -1770,7 +1770,7 @@
 out:
 	inode_unlock(inode);
 	if (!err)
-		range->len = (u64)total << PAGE_CACHE_SHIFT;
+		range->len = (u64)total << PAGE_SHIFT;
 	return err;
 }
 

diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 358214e..a2fbe6f 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c

@@ -51,7 +51,7 @@
 
 	f2fs_bug_on(F2FS_P_SB(page), page->index);
 
-	zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
+	zero_user_segment(page, MAX_INLINE_DATA, PAGE_SIZE);
 
 	/* Copy the whole inline data block */
 	src_addr = inline_data_addr(ipage);
@@ -93,7 +93,7 @@
 	}
 
 	if (page->index)
-		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+		zero_user_segment(page, 0, PAGE_SIZE);
 	else
 		read_inline_data(page, ipage);
 
@@ -375,7 +375,7 @@
 		goto out;
 
 	f2fs_wait_on_page_writeback(page, DATA, true);
-	zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
+	zero_user_segment(page, MAX_INLINE_DATA, PAGE_SIZE);
 
 	dentry_blk = kmap_atomic(page);
 
@@ -405,8 +405,8 @@
 	stat_dec_inline_dir(dir);
 	clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY);
 
-	if (i_size_read(dir) < PAGE_CACHE_SIZE) {
-		i_size_write(dir, PAGE_CACHE_SIZE);
+	if (i_size_read(dir) < PAGE_SIZE) {
+		i_size_write(dir, PAGE_SIZE);
 		set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
 	}
 

diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 7876f10..013e579 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c

@@ -1027,12 +1027,6 @@
 		goto errout;
 	}
 
-	/* this is broken symlink case */
-	if (unlikely(cstr.name[0] == 0)) {
-		res = -ENOENT;
-		goto errout;
-	}
-
 	if ((cstr.len + sizeof(struct fscrypt_symlink_data) - 1) > max_size) {
 		/* Symlink data on the disk is corrupted */
 		res = -EIO;
@@ -1046,17 +1040,23 @@
 	if (res < 0)
 		goto errout;
 
+	/* this is broken symlink case */
+	if (unlikely(pstr.name[0] == 0)) {
+		res = -ENOENT;
+		goto errout;
+	}
+
 	paddr = pstr.name;
 
 	/* Null-terminate the name */
 	paddr[res] = '\0';
 
-	page_cache_release(cpage);
+	put_page(cpage);
 	set_delayed_call(done, kfree_link, paddr);
 	return paddr;
 errout:
 	fscrypt_fname_free_buffer(&pstr);
-	page_cache_release(cpage);
+	put_page(cpage);
 	return ERR_PTR(res);
 }
 

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 118321b..1a33de9 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c

@@ -46,11 +46,11 @@
 	 */
 	if (type == FREE_NIDS) {
 		mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
-							PAGE_CACHE_SHIFT;
+							PAGE_SHIFT;
 		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
 	} else if (type == NAT_ENTRIES) {
 		mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
-							PAGE_CACHE_SHIFT;
+							PAGE_SHIFT;
 		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
 	} else if (type == DIRTY_DENTS) {
 		if (sbi->sb->s_bdi->wb.dirty_exceeded)
@@ -62,13 +62,13 @@
 
 		for (i = 0; i <= UPDATE_INO; i++)
 			mem_size += (sbi->im[i].ino_num *
-				sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
+				sizeof(struct ino_entry)) >> PAGE_SHIFT;
 		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
 	} else if (type == EXTENT_CACHE) {
 		mem_size = (atomic_read(&sbi->total_ext_tree) *
 				sizeof(struct extent_tree) +
 				atomic_read(&sbi->total_ext_node) *
-				sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT;
+				sizeof(struct extent_node)) >> PAGE_SHIFT;
 		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
 	} else {
 		if (!sbi->sb->s_bdi->wb.dirty_exceeded)
@@ -121,7 +121,7 @@
 
 	src_addr = page_address(src_page);
 	dst_addr = page_address(dst_page);
-	memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
+	memcpy(dst_addr, src_addr, PAGE_SIZE);
 	set_page_dirty(dst_page);
 	f2fs_put_page(src_page, 1);
 

diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 0b30cd2..011942f 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c

@@ -591,7 +591,7 @@
 
 	/* truncate meta pages to be used by the recovery */
 	truncate_inode_pages_range(META_MAPPING(sbi),
-			(loff_t)MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1);
+			(loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1);
 
 	if (err) {
 		truncate_inode_pages_final(NODE_MAPPING(sbi));

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 6f16b39..540669d 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c

@@ -885,12 +885,12 @@
 		}
 	}
 
-	sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE -
+	sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
 			SUM_FOOTER_SIZE) / SUMMARY_SIZE;
 	if (valid_sum_count <= sum_in_page)
 		return 1;
 	else if ((valid_sum_count - sum_in_page) <=
-		(PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
+		(PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
 		return 2;
 	return 3;
 }
@@ -909,9 +909,9 @@
 	void *dst = page_address(page);
 
 	if (src)
-		memcpy(dst, src, PAGE_CACHE_SIZE);
+		memcpy(dst, src, PAGE_SIZE);
 	else
-		memset(dst, 0, PAGE_CACHE_SIZE);
+		memset(dst, 0, PAGE_SIZE);
 	set_page_dirty(page);
 	f2fs_put_page(page, 1);
 }
@@ -1596,7 +1596,7 @@
 			s = (struct f2fs_summary *)(kaddr + offset);
 			seg_i->sum_blk->entries[j] = *s;
 			offset += SUMMARY_SIZE;
-			if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
+			if (offset + SUMMARY_SIZE <= PAGE_SIZE -
 						SUM_FOOTER_SIZE)
 				continue;
 
@@ -1757,7 +1757,7 @@
 			*summary = seg_i->sum_blk->entries[j];
 			written_size += SUMMARY_SIZE;
 
-			if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
+			if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
 							SUM_FOOTER_SIZE)
 				continue;
 
@@ -1844,7 +1844,7 @@
 
 	src_addr = page_address(src_page);
 	dst_addr = page_address(dst_page);
-	memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
+	memcpy(dst_addr, src_addr, PAGE_SIZE);
 
 	set_page_dirty(dst_page);
 	f2fs_put_page(src_page, 1);
@@ -2171,7 +2171,7 @@
 
 	for (i = 0; i < NR_CURSEG_TYPE; i++) {
 		mutex_init(&array[i].curseg_mutex);
-		array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+		array[i].sum_blk = kzalloc(PAGE_SIZE, GFP_KERNEL);
 		if (!array[i].sum_blk)
 			return -ENOMEM;
 		init_rwsem(&array[i].journal_rwsem);

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 15bb81f..006f87d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c

@@ -984,9 +984,25 @@
 	return result;
 }
 
-static inline bool sanity_check_area_boundary(struct super_block *sb,
-					struct f2fs_super_block *raw_super)
+static int __f2fs_commit_super(struct buffer_head *bh,
+			struct f2fs_super_block *super)
 {
+	lock_buffer(bh);
+	if (super)
+		memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super));
+	set_buffer_uptodate(bh);
+	set_buffer_dirty(bh);
+	unlock_buffer(bh);
+
+	/* it's rare case, we can do fua all the time */
+	return __sync_dirty_buffer(bh, WRITE_FLUSH_FUA);
+}
+
+static inline bool sanity_check_area_boundary(struct super_block *sb,
+					struct buffer_head *bh)
+{
+	struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
+					(bh->b_data + F2FS_SUPER_OFFSET);
 	u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
 	u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr);
 	u32 sit_blkaddr = le32_to_cpu(raw_super->sit_blkaddr);
@@ -1000,6 +1016,10 @@
 	u32 segment_count_main = le32_to_cpu(raw_super->segment_count_main);
 	u32 segment_count = le32_to_cpu(raw_super->segment_count);
 	u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
+	u64 main_end_blkaddr = main_blkaddr +
+				(segment_count_main << log_blocks_per_seg);
+	u64 seg_end_blkaddr = segment0_blkaddr +
+				(segment_count << log_blocks_per_seg);
 
 	if (segment0_blkaddr != cp_blkaddr) {
 		f2fs_msg(sb, KERN_INFO,
@@ -1044,22 +1064,45 @@
 		return true;
 	}
 
-	if (main_blkaddr + (segment_count_main << log_blocks_per_seg) !=
-		segment0_blkaddr + (segment_count << log_blocks_per_seg)) {
+	if (main_end_blkaddr > seg_end_blkaddr) {
 		f2fs_msg(sb, KERN_INFO,
-			"Wrong MAIN_AREA boundary, start(%u) end(%u) blocks(%u)",
+			"Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)",
 			main_blkaddr,
-			segment0_blkaddr + (segment_count << log_blocks_per_seg),
+			segment0_blkaddr +
+				(segment_count << log_blocks_per_seg),
 			segment_count_main << log_blocks_per_seg);
 		return true;
-	}
+	} else if (main_end_blkaddr < seg_end_blkaddr) {
+		int err = 0;
+		char *res;
 
+		/* fix in-memory information all the time */
+		raw_super->segment_count = cpu_to_le32((main_end_blkaddr -
+				segment0_blkaddr) >> log_blocks_per_seg);
+
+		if (f2fs_readonly(sb) || bdev_read_only(sb->s_bdev)) {
+			res = "internally";
+		} else {
+			err = __f2fs_commit_super(bh, NULL);
+			res = err ? "failed" : "done";
+		}
+		f2fs_msg(sb, KERN_INFO,
+			"Fix alignment : %s, start(%u) end(%u) block(%u)",
+			res, main_blkaddr,
+			segment0_blkaddr +
+				(segment_count << log_blocks_per_seg),
+			segment_count_main << log_blocks_per_seg);
+		if (err)
+			return true;
+	}
 	return false;
 }
 
 static int sanity_check_raw_super(struct super_block *sb,
-			struct f2fs_super_block *raw_super)
+				struct buffer_head *bh)
 {
+	struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
+					(bh->b_data + F2FS_SUPER_OFFSET);
 	unsigned int blocksize;
 
 	if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
@@ -1070,10 +1113,10 @@
 	}
 
 	/* Currently, support only 4KB page cache size */
-	if (F2FS_BLKSIZE != PAGE_CACHE_SIZE) {
+	if (F2FS_BLKSIZE != PAGE_SIZE) {
 		f2fs_msg(sb, KERN_INFO,
 			"Invalid page_cache_size (%lu), supports only 4KB\n",
-			PAGE_CACHE_SIZE);
+			PAGE_SIZE);
 		return 1;
 	}
 
@@ -1126,7 +1169,7 @@
 	}
 
 	/* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
-	if (sanity_check_area_boundary(sb, raw_super))
+	if (sanity_check_area_boundary(sb, bh))
 		return 1;
 
 	return 0;
@@ -1202,7 +1245,7 @@
 {
 	int block;
 	struct buffer_head *bh;
-	struct f2fs_super_block *super, *buf;
+	struct f2fs_super_block *super;
 	int err = 0;
 
 	super = kzalloc(sizeof(struct f2fs_super_block), GFP_KERNEL);
@@ -1218,11 +1261,8 @@
 			continue;
 		}
 
-		buf = (struct f2fs_super_block *)
-				(bh->b_data + F2FS_SUPER_OFFSET);
-
 		/* sanity checking of raw super */
-		if (sanity_check_raw_super(sb, buf)) {
+		if (sanity_check_raw_super(sb, bh)) {
 			f2fs_msg(sb, KERN_ERR,
 				"Can't find valid F2FS filesystem in %dth superblock",
 				block + 1);
@@ -1232,7 +1272,8 @@
 		}
 
 		if (!*raw_super) {
-			memcpy(super, buf, sizeof(*super));
+			memcpy(super, bh->b_data + F2FS_SUPER_OFFSET,
+							sizeof(*super));
 			*valid_super_block = block;
 			*raw_super = super;
 		}
@@ -1252,42 +1293,29 @@
 	return err;
 }
 
-static int __f2fs_commit_super(struct f2fs_sb_info *sbi, int block)
+int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
 {
-	struct f2fs_super_block *super = F2FS_RAW_SUPER(sbi);
 	struct buffer_head *bh;
 	int err;
 
-	bh = sb_getblk(sbi->sb, block);
+	/* write back-up superblock first */
+	bh = sb_getblk(sbi->sb, sbi->valid_super_block ? 0: 1);
 	if (!bh)
 		return -EIO;
-
-	lock_buffer(bh);
-	memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super));
-	set_buffer_uptodate(bh);
-	set_buffer_dirty(bh);
-	unlock_buffer(bh);
-
-	/* it's rare case, we can do fua all the time */
-	err = __sync_dirty_buffer(bh, WRITE_FLUSH_FUA);
+	err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
 	brelse(bh);
 
-	return err;
-}
-
-int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
-{
-	int err;
-
-	/* write back-up superblock first */
-	err = __f2fs_commit_super(sbi, sbi->valid_super_block ? 0 : 1);
-
 	/* if we are in recovery path, skip writing valid superblock */
 	if (recover || err)
 		return err;
 
 	/* write current valid superblock */
-	return __f2fs_commit_super(sbi, sbi->valid_super_block);
+	bh = sb_getblk(sbi->sb, sbi->valid_super_block);
+	if (!bh)
+		return -EIO;
+	err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
+	brelse(bh);
+	return err;
 }
 
 static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
@@ -1442,7 +1470,7 @@
 	seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
 	if (__exist_node_summaries(sbi))
 		sbi->kbytes_written =
-			le64_to_cpu(seg_i->sum_blk->journal.info.kbytes_written);
+			le64_to_cpu(seg_i->journal->info.kbytes_written);
 
 	build_gc_manager(sbi);
 

diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig
index 182f9ff..3ff1772 100644
--- a/fs/fat/Kconfig
+++ b/fs/fat/Kconfig

@@ -93,8 +93,24 @@
 	  that most of your FAT filesystems use, and can be overridden
 	  with the "iocharset" mount option for FAT filesystems.
 	  Note that "utf8" is not recommended for FAT filesystems.
-	  If unsure, you shouldn't set "utf8" here.
+	  If unsure, you shouldn't set "utf8" here - select the next option
+	  instead if you would like to use UTF-8 encoded file names by default.
 	  See <file:Documentation/filesystems/vfat.txt> for more information.
 
 	  Enable any character sets you need in File Systems/Native Language
 	  Support.
+
+config FAT_DEFAULT_UTF8
+	bool "Enable FAT UTF-8 option by default"
+	depends on VFAT_FS
+	default n
+	help
+	  Set this if you would like to have "utf8" mount option set
+	  by default when mounting FAT filesystems.
+
+	  Even if you say Y here can always disable UTF-8 for
+	  particular mount by adding "utf8=0" to mount options.
+
+	  Say Y if you use UTF-8 encoding for file names, N otherwise.
+
+	  See <file:Documentation/filesystems/vfat.txt> for more information.

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a559905..2262810 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c

@@ -1127,7 +1127,7 @@
 	}
 	opts->name_check = 'n';
 	opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK =  0;
-	opts->utf8 = opts->unicode_xlate = 0;
+	opts->unicode_xlate = 0;
 	opts->numtail = 1;
 	opts->usefree = opts->nocase = 0;
 	opts->tz_set = 0;
@@ -1135,6 +1135,8 @@
 	opts->errors = FAT_ERRORS_RO;
 	*debug = 0;
 
+	opts->utf8 = IS_ENABLED(CONFIG_FAT_DEFAULT_UTF8) && is_vfat;
+
 	if (!options)
 		goto out;
 

diff --git a/fs/fhandle.c b/fs/fhandle.c
index d59712d..ca3c3dd 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c

@@ -228,7 +228,7 @@
 		path_put(&path);
 		return fd;
 	}
-	file = file_open_root(path.dentry, path.mnt, "", open_flag);
+	file = file_open_root(path.dentry, path.mnt, "", open_flag, 0);
 	if (IS_ERR(file)) {
 		put_unused_fd(fd);
 		retval =  PTR_ERR(file);

diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index cb84f0f..bfc780c 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c

@@ -66,11 +66,11 @@
 vxfs_immed_readpage(struct file *fp, struct page *pp)
 {
 	struct vxfs_inode_info	*vip = VXFS_INO(pp->mapping->host);
-	u_int64_t	offset = (u_int64_t)pp->index << PAGE_CACHE_SHIFT;
+	u_int64_t	offset = (u_int64_t)pp->index << PAGE_SHIFT;
 	caddr_t		kaddr;
 
 	kaddr = kmap(pp);
-	memcpy(kaddr, vip->vii_immed.vi_immed + offset, PAGE_CACHE_SIZE);
+	memcpy(kaddr, vip->vii_immed.vi_immed + offset, PAGE_SIZE);
 	kunmap(pp);
 	
 	flush_dcache_page(pp);

diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index 1cff72d..a49e0cf 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c

@@ -45,7 +45,7 @@
 /*
  * Number of VxFS blocks per page.
  */
-#define VXFS_BLOCK_PER_PAGE(sbp)  ((PAGE_CACHE_SIZE / (sbp)->s_blocksize))
+#define VXFS_BLOCK_PER_PAGE(sbp)  ((PAGE_SIZE / (sbp)->s_blocksize))
 
 
 static struct dentry *	vxfs_lookup(struct inode *, struct dentry *, unsigned int);
@@ -175,7 +175,7 @@
 	if (de) {
 		ino = de->d_ino;
 		kunmap(pp);
-		page_cache_release(pp);
+		put_page(pp);
 	}
 	
 	return (ino);
@@ -255,8 +255,8 @@
 	nblocks = dir_blocks(ip);
 	pblocks = VXFS_BLOCK_PER_PAGE(sbp);
 
-	page = pos >> PAGE_CACHE_SHIFT;
-	offset = pos & ~PAGE_CACHE_MASK;
+	page = pos >> PAGE_SHIFT;
+	offset = pos & ~PAGE_MASK;
 	block = (u_long)(pos >> sbp->s_blocksize_bits) % pblocks;
 
 	for (; page < npages; page++, block = 0) {
@@ -289,7 +289,7 @@
 					continue;
 
 				offset = (char *)de - kaddr;
-				ctx->pos = ((page << PAGE_CACHE_SHIFT) | offset) + 2;
+				ctx->pos = ((page << PAGE_SHIFT) | offset) + 2;
 				if (!dir_emit(ctx, de->d_name, de->d_namelen,
 					de->d_ino, DT_UNKNOWN)) {
 					vxfs_put_page(pp);
@@ -301,6 +301,6 @@
 		vxfs_put_page(pp);
 		offset = 0;
 	}
-	ctx->pos = ((page << PAGE_CACHE_SHIFT) | offset) + 2;
+	ctx->pos = ((page << PAGE_SHIFT) | offset) + 2;
 	return 0;
 }

diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c
index 5d318c4..e806694 100644
--- a/fs/freevxfs/vxfs_subr.c
+++ b/fs/freevxfs/vxfs_subr.c

@@ -50,7 +50,7 @@
 vxfs_put_page(struct page *pp)
 {
 	kunmap(pp);
-	page_cache_release(pp);
+	put_page(pp);
 }
 
 /**

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5c46ed9..592cea5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c

@@ -33,7 +33,7 @@
 /*
  * 4MB minimal write chunk size
  */
-#define MIN_WRITEBACK_PAGES	(4096UL >> (PAGE_CACHE_SHIFT - 10))
+#define MIN_WRITEBACK_PAGES	(4096UL >> (PAGE_SHIFT - 10))
 
 struct wb_completion {
 	atomic_t		cnt;
@@ -281,13 +281,15 @@
 		wb_get(wb);
 		spin_unlock(&inode->i_lock);
 		spin_lock(&wb->list_lock);
-		wb_put(wb);		/* not gonna deref it anymore */
 
 		/* i_wb may have changed inbetween, can't use inode_to_wb() */
-		if (likely(wb == inode->i_wb))
-			return wb;	/* @inode already has ref */
+		if (likely(wb == inode->i_wb)) {
+			wb_put(wb);	/* @inode already has ref */
+			return wb;
+		}
 
 		spin_unlock(&wb->list_lock);
+		wb_put(wb);
 		cpu_relax();
 		spin_lock(&inode->i_lock);
 	}
@@ -1337,10 +1339,10 @@
  * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode()
  * and does more profound writeback list handling in writeback_sb_inodes().
  */
-static int
-writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
-		       struct writeback_control *wbc)
+static int writeback_single_inode(struct inode *inode,
+				  struct writeback_control *wbc)
 {
+	struct bdi_writeback *wb;
 	int ret = 0;
 
 	spin_lock(&inode->i_lock);
@@ -1378,7 +1380,8 @@
 	ret = __writeback_single_inode(inode, wbc);
 
 	wbc_detach_inode(wbc);
-	spin_lock(&wb->list_lock);
+
+	wb = inode_to_wb_and_lock_list(inode);
 	spin_lock(&inode->i_lock);
 	/*
 	 * If inode is clean, remove it from writeback lists. Otherwise don't
@@ -1453,6 +1456,7 @@
 
 	while (!list_empty(&wb->b_io)) {
 		struct inode *inode = wb_inode(wb->b_io.prev);
+		struct bdi_writeback *tmp_wb;
 
 		if (inode->i_sb != sb) {
 			if (work->sb) {
@@ -1543,15 +1547,23 @@
 			cond_resched();
 		}
 
-
-		spin_lock(&wb->list_lock);
+		/*
+		 * Requeue @inode if still dirty.  Be careful as @inode may
+		 * have been switched to another wb in the meantime.
+		 */
+		tmp_wb = inode_to_wb_and_lock_list(inode);
 		spin_lock(&inode->i_lock);
 		if (!(inode->i_state & I_DIRTY_ALL))
 			wrote++;
-		requeue_inode(inode, wb, &wbc);
+		requeue_inode(inode, tmp_wb, &wbc);
 		inode_sync_complete(inode);
 		spin_unlock(&inode->i_lock);
 
+		if (unlikely(tmp_wb != wb)) {
+			spin_unlock(&tmp_wb->list_lock);
+			spin_lock(&wb->list_lock);
+		}
+
 		/*
 		 * bail out to wb_writeback() often enough to check
 		 * background threshold and other termination conditions.
@@ -2338,7 +2350,6 @@
  */
 int write_inode_now(struct inode *inode, int sync)
 {
-	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
 	struct writeback_control wbc = {
 		.nr_to_write = LONG_MAX,
 		.sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
@@ -2350,7 +2361,7 @@
 		wbc.nr_to_write = 0;
 
 	might_sleep();
-	return writeback_single_inode(inode, wb, &wbc);
+	return writeback_single_inode(inode, &wbc);
 }
 EXPORT_SYMBOL(write_inode_now);
 
@@ -2367,7 +2378,7 @@
  */
 int sync_inode(struct inode *inode, struct writeback_control *wbc)
 {
-	return writeback_single_inode(inode, &inode_to_bdi(inode)->wb, wbc);
+	return writeback_single_inode(inode, wbc);
 }
 EXPORT_SYMBOL(sync_inode);
 

diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 6b35fc4..3078b67 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c

@@ -113,7 +113,7 @@
 
 	wake_up_bit(&cookie->flags, 0);
 	if (xpage)
-		page_cache_release(xpage);
+		put_page(xpage);
 	__fscache_uncache_page(cookie, page);
 	return true;
 
@@ -164,7 +164,7 @@
 	}
 	spin_unlock(&object->lock);
 	if (xpage)
-		page_cache_release(xpage);
+		put_page(xpage);
 }
 
 /*
@@ -884,7 +884,7 @@
 		spin_unlock(&cookie->stores_lock);
 
 		for (i = n - 1; i >= 0; i--)
-			page_cache_release(results[i]);
+			put_page(results[i]);
 	}
 
 	_leave("");
@@ -982,7 +982,7 @@
 
 	radix_tree_tag_set(&cookie->stores, page->index,
 			   FSCACHE_COOKIE_PENDING_TAG);
-	page_cache_get(page);
+	get_page(page);
 
 	/* we only want one writer at a time, but we do need to queue new
 	 * writers after exclusive ops */
@@ -1026,7 +1026,7 @@
 	radix_tree_delete(&cookie->stores, page->index);
 	spin_unlock(&cookie->stores_lock);
 	wake_cookie = __fscache_unuse_cookie(cookie);
-	page_cache_release(page);
+	put_page(page);
 	ret = -ENOBUFS;
 	goto nobufs;
 

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ebb5e37..cbece12 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c

@@ -897,7 +897,7 @@
 		return err;
 	}
 
-	page_cache_get(newpage);
+	get_page(newpage);
 
 	if (!(buf->flags & PIPE_BUF_FLAG_LRU))
 		lru_cache_add_file(newpage);
@@ -912,12 +912,12 @@
 
 	if (err) {
 		unlock_page(newpage);
-		page_cache_release(newpage);
+		put_page(newpage);
 		return err;
 	}
 
 	unlock_page(oldpage);
-	page_cache_release(oldpage);
+	put_page(oldpage);
 	cs->len = 0;
 
 	return 0;
@@ -951,7 +951,7 @@
 	fuse_copy_finish(cs);
 
 	buf = cs->pipebufs;
-	page_cache_get(page);
+	get_page(page);
 	buf->page = page;
 	buf->offset = offset;
 	buf->len = count;
@@ -1435,7 +1435,7 @@
 
 out:
 	for (; page_nr < cs.nr_segs; page_nr++)
-		page_cache_release(bufs[page_nr].page);
+		put_page(bufs[page_nr].page);
 
 	kfree(bufs);
 	return ret;
@@ -1632,8 +1632,8 @@
 		goto out_up_killsb;
 
 	mapping = inode->i_mapping;
-	index = outarg.offset >> PAGE_CACHE_SHIFT;
-	offset = outarg.offset & ~PAGE_CACHE_MASK;
+	index = outarg.offset >> PAGE_SHIFT;
+	offset = outarg.offset & ~PAGE_MASK;
 	file_size = i_size_read(inode);
 	end = outarg.offset + outarg.size;
 	if (end > file_size) {
@@ -1652,13 +1652,13 @@
 		if (!page)
 			goto out_iput;
 
-		this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
+		this_num = min_t(unsigned, num, PAGE_SIZE - offset);
 		err = fuse_copy_page(cs, &page, offset, this_num, 0);
 		if (!err && offset == 0 &&
-		    (this_num == PAGE_CACHE_SIZE || file_size == end))
+		    (this_num == PAGE_SIZE || file_size == end))
 			SetPageUptodate(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 
 		if (err)
 			goto out_iput;
@@ -1697,7 +1697,7 @@
 	size_t total_len = 0;
 	int num_pages;
 
-	offset = outarg->offset & ~PAGE_CACHE_MASK;
+	offset = outarg->offset & ~PAGE_MASK;
 	file_size = i_size_read(inode);
 
 	num = outarg->size;
@@ -1720,7 +1720,7 @@
 	req->page_descs[0].offset = offset;
 	req->end = fuse_retrieve_end;
 
-	index = outarg->offset >> PAGE_CACHE_SHIFT;
+	index = outarg->offset >> PAGE_SHIFT;
 
 	while (num && req->num_pages < num_pages) {
 		struct page *page;
@@ -1730,7 +1730,7 @@
 		if (!page)
 			break;
 
-		this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
+		this_num = min_t(unsigned, num, PAGE_SIZE - offset);
 		req->pages[req->num_pages] = page;
 		req->page_descs[req->num_pages].length = this_num;
 		req->num_pages++;

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9dde38f..719924d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c

@@ -348,7 +348,7 @@
 		pgoff_t curr_index;
 
 		BUG_ON(req->inode != inode);
-		curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+		curr_index = req->misc.write.in.offset >> PAGE_SHIFT;
 		if (idx_from < curr_index + req->num_pages &&
 		    curr_index <= idx_to) {
 			found = true;
@@ -683,11 +683,11 @@
 		 * present there.
 		 */
 		int i;
-		int start_idx = num_read >> PAGE_CACHE_SHIFT;
-		size_t off = num_read & (PAGE_CACHE_SIZE - 1);
+		int start_idx = num_read >> PAGE_SHIFT;
+		size_t off = num_read & (PAGE_SIZE - 1);
 
 		for (i = start_idx; i < req->num_pages; i++) {
-			zero_user_segment(req->pages[i], off, PAGE_CACHE_SIZE);
+			zero_user_segment(req->pages[i], off, PAGE_SIZE);
 			off = 0;
 		}
 	} else {
@@ -704,7 +704,7 @@
 	struct fuse_req *req;
 	size_t num_read;
 	loff_t pos = page_offset(page);
-	size_t count = PAGE_CACHE_SIZE;
+	size_t count = PAGE_SIZE;
 	u64 attr_ver;
 	int err;
 
@@ -789,7 +789,7 @@
 		else
 			SetPageError(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	if (req->ff)
 		fuse_file_put(req->ff, false);
@@ -800,7 +800,7 @@
 	struct fuse_file *ff = file->private_data;
 	struct fuse_conn *fc = ff->fc;
 	loff_t pos = page_offset(req->pages[0]);
-	size_t count = req->num_pages << PAGE_CACHE_SHIFT;
+	size_t count = req->num_pages << PAGE_SHIFT;
 
 	req->out.argpages = 1;
 	req->out.page_zeroing = 1;
@@ -836,7 +836,7 @@
 
 	if (req->num_pages &&
 	    (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
-	     (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
+	     (req->num_pages + 1) * PAGE_SIZE > fc->max_read ||
 	     req->pages[req->num_pages - 1]->index + 1 != page->index)) {
 		int nr_alloc = min_t(unsigned, data->nr_pages,
 				     FUSE_MAX_PAGES_PER_REQ);
@@ -858,7 +858,7 @@
 		return -EIO;
 	}
 
-	page_cache_get(page);
+	get_page(page);
 	req->pages[req->num_pages] = page;
 	req->page_descs[req->num_pages].length = PAGE_SIZE;
 	req->num_pages++;
@@ -1003,17 +1003,17 @@
 	for (i = 0; i < req->num_pages; i++) {
 		struct page *page = req->pages[i];
 
-		if (!req->out.h.error && !offset && count >= PAGE_CACHE_SIZE)
+		if (!req->out.h.error && !offset && count >= PAGE_SIZE)
 			SetPageUptodate(page);
 
-		if (count > PAGE_CACHE_SIZE - offset)
-			count -= PAGE_CACHE_SIZE - offset;
+		if (count > PAGE_SIZE - offset)
+			count -= PAGE_SIZE - offset;
 		else
 			count = 0;
 		offset = 0;
 
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	return res;
@@ -1024,7 +1024,7 @@
 			       struct iov_iter *ii, loff_t pos)
 {
 	struct fuse_conn *fc = get_fuse_conn(mapping->host);
-	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned offset = pos & (PAGE_SIZE - 1);
 	size_t count = 0;
 	int err;
 
@@ -1034,8 +1034,8 @@
 	do {
 		size_t tmp;
 		struct page *page;
-		pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-		size_t bytes = min_t(size_t, PAGE_CACHE_SIZE - offset,
+		pgoff_t index = pos >> PAGE_SHIFT;
+		size_t bytes = min_t(size_t, PAGE_SIZE - offset,
 				     iov_iter_count(ii));
 
 		bytes = min_t(size_t, bytes, fc->max_write - count);
@@ -1059,7 +1059,7 @@
 		iov_iter_advance(ii, tmp);
 		if (!tmp) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			bytes = min(bytes, iov_iter_single_seg_count(ii));
 			goto again;
 		}
@@ -1072,7 +1072,7 @@
 		count += tmp;
 		pos += tmp;
 		offset += tmp;
-		if (offset == PAGE_CACHE_SIZE)
+		if (offset == PAGE_SIZE)
 			offset = 0;
 
 		if (!fc->big_writes)
@@ -1086,8 +1086,8 @@
 static inline unsigned fuse_wr_pages(loff_t pos, size_t len)
 {
 	return min_t(unsigned,
-		     ((pos + len - 1) >> PAGE_CACHE_SHIFT) -
-		     (pos >> PAGE_CACHE_SHIFT) + 1,
+		     ((pos + len - 1) >> PAGE_SHIFT) -
+		     (pos >> PAGE_SHIFT) + 1,
 		     FUSE_MAX_PAGES_PER_REQ);
 }
 
@@ -1205,8 +1205,8 @@
 			goto out;
 
 		invalidate_mapping_pages(file->f_mapping,
-					 pos >> PAGE_CACHE_SHIFT,
-					 endbyte >> PAGE_CACHE_SHIFT);
+					 pos >> PAGE_SHIFT,
+					 endbyte >> PAGE_SHIFT);
 
 		written += written_buffered;
 		iocb->ki_pos = pos + written_buffered;
@@ -1315,8 +1315,8 @@
 	size_t nmax = write ? fc->max_write : fc->max_read;
 	loff_t pos = *ppos;
 	size_t count = iov_iter_count(iter);
-	pgoff_t idx_from = pos >> PAGE_CACHE_SHIFT;
-	pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT;
+	pgoff_t idx_from = pos >> PAGE_SHIFT;
+	pgoff_t idx_to = (pos + count - 1) >> PAGE_SHIFT;
 	ssize_t res = 0;
 	struct fuse_req *req;
 	int err = 0;
@@ -1466,7 +1466,7 @@
 {
 	struct fuse_inode *fi = get_fuse_inode(req->inode);
 	struct fuse_write_in *inarg = &req->misc.write.in;
-	__u64 data_size = req->num_pages * PAGE_CACHE_SIZE;
+	__u64 data_size = req->num_pages * PAGE_SIZE;
 
 	if (!fc->connected)
 		goto out_free;
@@ -1727,7 +1727,7 @@
 	list_del(&new_req->writepages_entry);
 	list_for_each_entry(old_req, &fi->writepages, writepages_entry) {
 		BUG_ON(old_req->inode != new_req->inode);
-		curr_index = old_req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+		curr_index = old_req->misc.write.in.offset >> PAGE_SHIFT;
 		if (curr_index <= page->index &&
 		    page->index < curr_index + old_req->num_pages) {
 			found = true;
@@ -1742,7 +1742,7 @@
 	new_req->num_pages = 1;
 	for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) {
 		BUG_ON(tmp->inode != new_req->inode);
-		curr_index = tmp->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+		curr_index = tmp->misc.write.in.offset >> PAGE_SHIFT;
 		if (tmp->num_pages == 1 &&
 		    curr_index == page->index) {
 			old_req = tmp;
@@ -1799,7 +1799,7 @@
 
 	if (req && req->num_pages &&
 	    (is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
-	     (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write ||
+	     (req->num_pages + 1) * PAGE_SIZE > fc->max_write ||
 	     data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
 		fuse_writepages_send(data);
 		data->req = NULL;
@@ -1924,7 +1924,7 @@
 		loff_t pos, unsigned len, unsigned flags,
 		struct page **pagep, void **fsdata)
 {
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 	struct fuse_conn *fc = get_fuse_conn(file_inode(file));
 	struct page *page;
 	loff_t fsize;
@@ -1938,15 +1938,15 @@
 
 	fuse_wait_on_page_writeback(mapping->host, page->index);
 
-	if (PageUptodate(page) || len == PAGE_CACHE_SIZE)
+	if (PageUptodate(page) || len == PAGE_SIZE)
 		goto success;
 	/*
 	 * Check if the start this page comes after the end of file, in which
 	 * case the readpage can be optimized away.
 	 */
 	fsize = i_size_read(mapping->host);
-	if (fsize <= (pos & PAGE_CACHE_MASK)) {
-		size_t off = pos & ~PAGE_CACHE_MASK;
+	if (fsize <= (pos & PAGE_MASK)) {
+		size_t off = pos & ~PAGE_MASK;
 		if (off)
 			zero_user_segment(page, 0, off);
 		goto success;
@@ -1960,7 +1960,7 @@
 
 cleanup:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 error:
 	return err;
 }
@@ -1973,16 +1973,16 @@
 
 	if (!PageUptodate(page)) {
 		/* Zero any unwritten bytes at the end of the page */
-		size_t endoff = (pos + copied) & ~PAGE_CACHE_MASK;
+		size_t endoff = (pos + copied) & ~PAGE_MASK;
 		if (endoff)
-			zero_user_segment(page, endoff, PAGE_CACHE_SIZE);
+			zero_user_segment(page, endoff, PAGE_SIZE);
 		SetPageUptodate(page);
 	}
 
 	fuse_write_update_size(inode, pos + copied);
 	set_page_dirty(page);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return copied;
 }

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 4d69d5c..1ce6766 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c

@@ -339,11 +339,11 @@
 
 	fuse_invalidate_attr(inode);
 	if (offset >= 0) {
-		pg_start = offset >> PAGE_CACHE_SHIFT;
+		pg_start = offset >> PAGE_SHIFT;
 		if (len <= 0)
 			pg_end = -1;
 		else
-			pg_end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
+			pg_end = (offset + len - 1) >> PAGE_SHIFT;
 		invalidate_inode_pages2_range(inode->i_mapping,
 					      pg_start, pg_end);
 	}
@@ -864,7 +864,7 @@
 		process_init_limits(fc, arg);
 
 		if (arg->minor >= 6) {
-			ra_pages = arg->max_readahead / PAGE_CACHE_SIZE;
+			ra_pages = arg->max_readahead / PAGE_SIZE;
 			if (arg->flags & FUSE_ASYNC_READ)
 				fc->async_read = 1;
 			if (!(arg->flags & FUSE_POSIX_LOCKS))
@@ -901,7 +901,7 @@
 			if (arg->time_gran && arg->time_gran <= 1000000000)
 				fc->sb->s_time_gran = arg->time_gran;
 		} else {
-			ra_pages = fc->max_read / PAGE_CACHE_SIZE;
+			ra_pages = fc->max_read / PAGE_SIZE;
 			fc->no_lock = 1;
 			fc->no_flock = 1;
 		}
@@ -922,7 +922,7 @@
 
 	arg->major = FUSE_KERNEL_VERSION;
 	arg->minor = FUSE_KERNEL_MINOR_VERSION;
-	arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
+	arg->max_readahead = fc->bdi.ra_pages * PAGE_SIZE;
 	arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
 		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
 		FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
@@ -955,7 +955,7 @@
 	int err;
 
 	fc->bdi.name = "fuse";
-	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
 	/* fuse does it's own writeback accounting */
 	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT;
 
@@ -1053,8 +1053,8 @@
 			goto err;
 #endif
 	} else {
-		sb->s_blocksize = PAGE_CACHE_SIZE;
-		sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+		sb->s_blocksize = PAGE_SIZE;
+		sb->s_blocksize_bits = PAGE_SHIFT;
 	}
 	sb->s_magic = FUSE_SUPER_MAGIC;
 	sb->s_op = &fuse_super_operations;

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index aa016e4..1bbbee9 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c

@@ -101,7 +101,7 @@
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	loff_t i_size = i_size_read(inode);
-	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+	pgoff_t end_index = i_size >> PAGE_SHIFT;
 	unsigned offset;
 
 	if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
@@ -109,9 +109,9 @@
 	if (current->journal_info)
 		goto redirty;
 	/* Is the page fully outside i_size? (truncate in progress) */
-	offset = i_size & (PAGE_CACHE_SIZE-1);
+	offset = i_size & (PAGE_SIZE-1);
 	if (page->index > end_index || (page->index == end_index && !offset)) {
-		page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
+		page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
 		goto out;
 	}
 	return 1;
@@ -238,7 +238,7 @@
 {
 	struct inode *inode = mapping->host;
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize);
+	unsigned nrblocks = nr_pages * (PAGE_SIZE/inode->i_sb->s_blocksize);
 	int i;
 	int ret;
 
@@ -366,8 +366,8 @@
 			cycled = 0;
 		end = -1;
 	} else {
-		index = wbc->range_start >> PAGE_CACHE_SHIFT;
-		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		index = wbc->range_start >> PAGE_SHIFT;
+		end = wbc->range_end >> PAGE_SHIFT;
 		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
 			range_whole = 1;
 		cycled = 1; /* ignore range_cyclic tests */
@@ -458,7 +458,7 @@
 	 * so we need to supply one here. It doesn't happen often.
 	 */
 	if (unlikely(page->index)) {
-		zero_user(page, 0, PAGE_CACHE_SIZE);
+		zero_user(page, 0, PAGE_SIZE);
 		SetPageUptodate(page);
 		return 0;
 	}
@@ -471,7 +471,7 @@
 	if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
 		dsize = (dibh->b_size - sizeof(struct gfs2_dinode));
 	memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
-	memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize);
+	memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
 	kunmap_atomic(kaddr);
 	flush_dcache_page(page);
 	brelse(dibh);
@@ -560,8 +560,8 @@
                        unsigned size)
 {
 	struct address_space *mapping = ip->i_inode.i_mapping;
-	unsigned long index = *pos / PAGE_CACHE_SIZE;
-	unsigned offset = *pos & (PAGE_CACHE_SIZE - 1);
+	unsigned long index = *pos / PAGE_SIZE;
+	unsigned offset = *pos & (PAGE_SIZE - 1);
 	unsigned copied = 0;
 	unsigned amt;
 	struct page *page;
@@ -569,15 +569,15 @@
 
 	do {
 		amt = size - copied;
-		if (offset + size > PAGE_CACHE_SIZE)
-			amt = PAGE_CACHE_SIZE - offset;
+		if (offset + size > PAGE_SIZE)
+			amt = PAGE_SIZE - offset;
 		page = read_cache_page(mapping, index, __gfs2_readpage, NULL);
 		if (IS_ERR(page))
 			return PTR_ERR(page);
 		p = kmap_atomic(page);
 		memcpy(buf + copied, p + offset, amt);
 		kunmap_atomic(p);
-		page_cache_release(page);
+		put_page(page);
 		copied += amt;
 		index++;
 		offset = 0;
@@ -651,8 +651,8 @@
 	unsigned requested = 0;
 	int alloc_required;
 	int error = 0;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	pgoff_t index = pos >> PAGE_SHIFT;
+	unsigned from = pos & (PAGE_SIZE - 1);
 	struct page *page;
 
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
@@ -697,7 +697,7 @@
 		rblocks += gfs2_rg_blocks(ip, requested);
 
 	error = gfs2_trans_begin(sdp, rblocks,
-				 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
+				 PAGE_SIZE/sdp->sd_sb.sb_bsize);
 	if (error)
 		goto out_trans_fail;
 
@@ -727,7 +727,7 @@
 		return 0;
 
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	gfs2_trans_end(sdp);
 	if (pos + len > ip->i_inode.i_size)
@@ -827,7 +827,7 @@
 	if (!PageUptodate(page))
 		SetPageUptodate(page);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	if (copied) {
 		if (inode->i_size < to)
@@ -877,7 +877,7 @@
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 	struct buffer_head *dibh;
-	unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned int from = pos & (PAGE_SIZE - 1);
 	unsigned int to = from + len;
 	int ret;
 	struct gfs2_trans *tr = current->journal_info;
@@ -888,7 +888,7 @@
 	ret = gfs2_meta_inode_buffer(ip, &dibh);
 	if (unlikely(ret)) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		goto failed;
 	}
 
@@ -992,7 +992,7 @@
 {
 	struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
 	unsigned int stop = offset + length;
-	int partial_page = (offset || length < PAGE_CACHE_SIZE);
+	int partial_page = (offset || length < PAGE_SIZE);
 	struct buffer_head *bh, *head;
 	unsigned long pos = 0;
 
@@ -1082,7 +1082,7 @@
 	 * the first place, mapping->nr_pages will always be zero.
 	 */
 	if (mapping->nrpages) {
-		loff_t lstart = offset & ~(PAGE_CACHE_SIZE - 1);
+		loff_t lstart = offset & ~(PAGE_SIZE - 1);
 		loff_t len = iov_iter_count(iter);
 		loff_t end = PAGE_ALIGN(offset + len) - 1;
 

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 0860f0b..24ce1cd 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c

@@ -75,7 +75,7 @@
 			dsize = dibh->b_size - sizeof(struct gfs2_dinode);
 
 		memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
-		memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize);
+		memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
 		kunmap(page);
 
 		SetPageUptodate(page);
@@ -98,7 +98,7 @@
 
 	if (release) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	return 0;
@@ -932,8 +932,8 @@
 {
 	struct inode *inode = mapping->host;
 	struct gfs2_inode *ip = GFS2_I(inode);
-	unsigned long index = from >> PAGE_CACHE_SHIFT;
-	unsigned offset = from & (PAGE_CACHE_SIZE-1);
+	unsigned long index = from >> PAGE_SHIFT;
+	unsigned offset = from & (PAGE_SIZE-1);
 	unsigned blocksize, iblock, length, pos;
 	struct buffer_head *bh;
 	struct page *page;
@@ -945,7 +945,7 @@
 
 	blocksize = inode->i_sb->s_blocksize;
 	length = blocksize - (offset & (blocksize - 1));
-	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+	iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
 
 	if (!page_has_buffers(page))
 		create_empty_buffers(page, blocksize, 0);
@@ -989,7 +989,7 @@
 	mark_buffer_dirty(bh);
 unlock:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return err;
 }
 

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index c9384f9..208efc7 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c

@@ -354,8 +354,8 @@
 {
 	struct inode *inode = page->mapping->host;
 	struct buffer_head bh;
-	unsigned long size = PAGE_CACHE_SIZE;
-	u64 lblock = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	unsigned long size = PAGE_SIZE;
+	u64 lblock = page->index << (PAGE_SHIFT - inode->i_blkbits);
 
 	do {
 		bh.b_state = 0;
@@ -386,7 +386,7 @@
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_alloc_parms ap = { .aflags = 0, };
 	unsigned long last_index;
-	u64 pos = page->index << PAGE_CACHE_SHIFT;
+	u64 pos = page->index << PAGE_SHIFT;
 	unsigned int data_blocks, ind_blocks, rblocks;
 	struct gfs2_holder gh;
 	loff_t size;
@@ -401,7 +401,7 @@
 	if (ret)
 		goto out;
 
-	gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE);
+	gfs2_size_hint(vma->vm_file, pos, PAGE_SIZE);
 
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 	ret = gfs2_glock_nq(&gh);
@@ -411,7 +411,7 @@
 	set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
 	set_bit(GIF_SW_PAGED, &ip->i_flags);
 
-	if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) {
+	if (!gfs2_write_alloc_required(ip, pos, PAGE_SIZE)) {
 		lock_page(page);
 		if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
 			ret = -EAGAIN;
@@ -424,7 +424,7 @@
 	if (ret)
 		goto out_unlock;
 
-	gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
+	gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks);
 	ap.target = data_blocks + ind_blocks;
 	ret = gfs2_quota_lock_check(ip, &ap);
 	if (ret)
@@ -447,7 +447,7 @@
 	lock_page(page);
 	ret = -EINVAL;
 	size = i_size_read(inode);
-	last_index = (size - 1) >> PAGE_CACHE_SHIFT;
+	last_index = (size - 1) >> PAGE_SHIFT;
 	/* Check page index against inode size */
 	if (size == 0 || (page->index > last_index))
 		goto out_trans_end;
@@ -873,7 +873,7 @@
 			rblocks += data_blocks ? data_blocks : 1;
 
 		error = gfs2_trans_begin(sdp, rblocks,
-					 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
+					 PAGE_SIZE/sdp->sd_sb.sb_bsize);
 		if (error)
 			goto out_trans_fail;
 

diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index e137d96..0448524 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c

@@ -124,7 +124,7 @@
 	if (mapping == NULL)
 		mapping = &sdp->sd_aspace;
 
-	shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift;
+	shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
 	index = blkno >> shift;             /* convert block to page */
 	bufnum = blkno - (index << shift);  /* block buf index within page */
 
@@ -154,7 +154,7 @@
 		map_bh(bh, sdp->sd_vfs, blkno);
 
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return bh;
 }

diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a398913..ce7d69a 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c

@@ -701,7 +701,7 @@
 	unsigned to_write = bytes, pg_off = off;
 	int done = 0;
 
-	blk = index << (PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift);
+	blk = index << (PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift);
 	boff = off % bsize;
 
 	page = find_or_create_page(mapping, index, GFP_NOFS);
@@ -753,13 +753,13 @@
 	flush_dcache_page(page);
 	kunmap_atomic(kaddr);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return 0;
 
 unlock_out:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return -EIO;
 }
 
@@ -773,13 +773,13 @@
 
 	nbytes = sizeof(struct gfs2_quota);
 
-	pg_beg = loc >> PAGE_CACHE_SHIFT;
-	pg_off = loc % PAGE_CACHE_SIZE;
+	pg_beg = loc >> PAGE_SHIFT;
+	pg_off = loc % PAGE_SIZE;
 
 	/* If the quota straddles a page boundary, split the write in two */
-	if ((pg_off + nbytes) > PAGE_CACHE_SIZE) {
+	if ((pg_off + nbytes) > PAGE_SIZE) {
 		pg_oflow = 1;
-		overflow = (pg_off + nbytes) - PAGE_CACHE_SIZE;
+		overflow = (pg_off + nbytes) - PAGE_SIZE;
 	}
 
 	ptr = qp;

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 07c0265..99a0bda 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c

@@ -918,9 +918,8 @@
 		goto fail;
 
 	rgd->rd_gl->gl_object = rgd;
-	rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_CACHE_MASK;
-	rgd->rd_gl->gl_vm.end = PAGE_CACHE_ALIGN((rgd->rd_addr +
-						  rgd->rd_length) * bsize) - 1;
+	rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK;
+	rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1;
 	rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
 	rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
 	if (rgd->rd_data > sdp->sd_max_rg_data)

diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index 221719e..d77d844 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c

@@ -278,14 +278,14 @@
 
 	mapping = tree->inode->i_mapping;
 	off = (loff_t)cnid * tree->node_size;
-	block = off >> PAGE_CACHE_SHIFT;
-	node->page_offset = off & ~PAGE_CACHE_MASK;
+	block = off >> PAGE_SHIFT;
+	node->page_offset = off & ~PAGE_MASK;
 	for (i = 0; i < tree->pages_per_bnode; i++) {
 		page = read_mapping_page(mapping, block++, NULL);
 		if (IS_ERR(page))
 			goto fail;
 		if (PageError(page)) {
-			page_cache_release(page);
+			put_page(page);
 			goto fail;
 		}
 		node->page[i] = page;
@@ -401,7 +401,7 @@
 
 	for (i = 0; i < node->tree->pages_per_bnode; i++)
 		if (node->page[i])
-			page_cache_release(node->page[i]);
+			put_page(node->page[i]);
 	kfree(node);
 }
 
@@ -429,11 +429,11 @@
 
 	pagep = node->page;
 	memset(kmap(*pagep) + node->page_offset, 0,
-	       min((int)PAGE_CACHE_SIZE, (int)tree->node_size));
+	       min((int)PAGE_SIZE, (int)tree->node_size));
 	set_page_dirty(*pagep);
 	kunmap(*pagep);
 	for (i = 1; i < tree->pages_per_bnode; i++) {
-		memset(kmap(*++pagep), 0, PAGE_CACHE_SIZE);
+		memset(kmap(*++pagep), 0, PAGE_SIZE);
 		set_page_dirty(*pagep);
 		kunmap(*pagep);
 	}

diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 1ab19e6..37cdd95 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c

@@ -116,14 +116,14 @@
 	}
 
 	tree->node_size_shift = ffs(size) - 1;
-	tree->pages_per_bnode = (tree->node_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	tree->pages_per_bnode = (tree->node_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 	return tree;
 
 fail_page:
-	page_cache_release(page);
+	put_page(page);
 free_inode:
 	tree->inode->i_mapping->a_ops = &hfs_aops;
 	iput(tree->inode);
@@ -257,9 +257,9 @@
 	off = off16;
 
 	off += node->page_offset;
-	pagep = node->page + (off >> PAGE_CACHE_SHIFT);
+	pagep = node->page + (off >> PAGE_SHIFT);
 	data = kmap(*pagep);
-	off &= ~PAGE_CACHE_MASK;
+	off &= ~PAGE_MASK;
 	idx = 0;
 
 	for (;;) {
@@ -279,7 +279,7 @@
 					}
 				}
 			}
-			if (++off >= PAGE_CACHE_SIZE) {
+			if (++off >= PAGE_SIZE) {
 				kunmap(*pagep);
 				data = kmap(*++pagep);
 				off = 0;
@@ -302,9 +302,9 @@
 		len = hfs_brec_lenoff(node, 0, &off16);
 		off = off16;
 		off += node->page_offset;
-		pagep = node->page + (off >> PAGE_CACHE_SHIFT);
+		pagep = node->page + (off >> PAGE_SHIFT);
 		data = kmap(*pagep);
-		off &= ~PAGE_CACHE_MASK;
+		off &= ~PAGE_MASK;
 	}
 }
 
@@ -348,9 +348,9 @@
 		len = hfs_brec_lenoff(node, 0, &off);
 	}
 	off += node->page_offset + nidx / 8;
-	page = node->page[off >> PAGE_CACHE_SHIFT];
+	page = node->page[off >> PAGE_SHIFT];
 	data = kmap(page);
-	off &= ~PAGE_CACHE_MASK;
+	off &= ~PAGE_MASK;
 	m = 1 << (~nidx & 7);
 	byte = data[off];
 	if (!(byte & m)) {

diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 6686bf3..cb1e5fa 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c

@@ -91,8 +91,8 @@
 	if (!tree)
 		return 0;
 
-	if (tree->node_size >= PAGE_CACHE_SIZE) {
-		nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT);
+	if (tree->node_size >= PAGE_SIZE) {
+		nidx = page->index >> (tree->node_size_shift - PAGE_SHIFT);
 		spin_lock(&tree->hash_lock);
 		node = hfs_bnode_findhash(tree, nidx);
 		if (!node)
@@ -105,8 +105,8 @@
 		}
 		spin_unlock(&tree->hash_lock);
 	} else {
-		nidx = page->index << (PAGE_CACHE_SHIFT - tree->node_size_shift);
-		i = 1 << (PAGE_CACHE_SHIFT - tree->node_size_shift);
+		nidx = page->index << (PAGE_SHIFT - tree->node_size_shift);
+		i = 1 << (PAGE_SHIFT - tree->node_size_shift);
 		spin_lock(&tree->hash_lock);
 		do {
 			node = hfs_bnode_findhash(tree, nidx++);

diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c
index d2954451..c0ae274 100644
--- a/fs/hfsplus/bitmap.c
+++ b/fs/hfsplus/bitmap.c

@@ -13,7 +13,7 @@
 #include "hfsplus_fs.h"
 #include "hfsplus_raw.h"
 
-#define PAGE_CACHE_BITS	(PAGE_CACHE_SIZE * 8)
+#define PAGE_CACHE_BITS	(PAGE_SIZE * 8)
 
 int hfsplus_block_allocate(struct super_block *sb, u32 size,
 		u32 offset, u32 *max)

diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 6392466..ce014ce 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c

@@ -24,16 +24,16 @@
 	int l;
 
 	off += node->page_offset;
-	pagep = node->page + (off >> PAGE_CACHE_SHIFT);
-	off &= ~PAGE_CACHE_MASK;
+	pagep = node->page + (off >> PAGE_SHIFT);
+	off &= ~PAGE_MASK;
 
-	l = min_t(int, len, PAGE_CACHE_SIZE - off);
+	l = min_t(int, len, PAGE_SIZE - off);
 	memcpy(buf, kmap(*pagep) + off, l);
 	kunmap(*pagep);
 
 	while ((len -= l) != 0) {
 		buf += l;
-		l = min_t(int, len, PAGE_CACHE_SIZE);
+		l = min_t(int, len, PAGE_SIZE);
 		memcpy(buf, kmap(*++pagep), l);
 		kunmap(*pagep);
 	}
@@ -77,17 +77,17 @@
 	int l;
 
 	off += node->page_offset;
-	pagep = node->page + (off >> PAGE_CACHE_SHIFT);
-	off &= ~PAGE_CACHE_MASK;
+	pagep = node->page + (off >> PAGE_SHIFT);
+	off &= ~PAGE_MASK;
 
-	l = min_t(int, len, PAGE_CACHE_SIZE - off);
+	l = min_t(int, len, PAGE_SIZE - off);
 	memcpy(kmap(*pagep) + off, buf, l);
 	set_page_dirty(*pagep);
 	kunmap(*pagep);
 
 	while ((len -= l) != 0) {
 		buf += l;
-		l = min_t(int, len, PAGE_CACHE_SIZE);
+		l = min_t(int, len, PAGE_SIZE);
 		memcpy(kmap(*++pagep), buf, l);
 		set_page_dirty(*pagep);
 		kunmap(*pagep);
@@ -107,16 +107,16 @@
 	int l;
 
 	off += node->page_offset;
-	pagep = node->page + (off >> PAGE_CACHE_SHIFT);
-	off &= ~PAGE_CACHE_MASK;
+	pagep = node->page + (off >> PAGE_SHIFT);
+	off &= ~PAGE_MASK;
 
-	l = min_t(int, len, PAGE_CACHE_SIZE - off);
+	l = min_t(int, len, PAGE_SIZE - off);
 	memset(kmap(*pagep) + off, 0, l);
 	set_page_dirty(*pagep);
 	kunmap(*pagep);
 
 	while ((len -= l) != 0) {
-		l = min_t(int, len, PAGE_CACHE_SIZE);
+		l = min_t(int, len, PAGE_SIZE);
 		memset(kmap(*++pagep), 0, l);
 		set_page_dirty(*pagep);
 		kunmap(*pagep);
@@ -136,20 +136,20 @@
 	tree = src_node->tree;
 	src += src_node->page_offset;
 	dst += dst_node->page_offset;
-	src_page = src_node->page + (src >> PAGE_CACHE_SHIFT);
-	src &= ~PAGE_CACHE_MASK;
-	dst_page = dst_node->page + (dst >> PAGE_CACHE_SHIFT);
-	dst &= ~PAGE_CACHE_MASK;
+	src_page = src_node->page + (src >> PAGE_SHIFT);
+	src &= ~PAGE_MASK;
+	dst_page = dst_node->page + (dst >> PAGE_SHIFT);
+	dst &= ~PAGE_MASK;
 
 	if (src == dst) {
-		l = min_t(int, len, PAGE_CACHE_SIZE - src);
+		l = min_t(int, len, PAGE_SIZE - src);
 		memcpy(kmap(*dst_page) + src, kmap(*src_page) + src, l);
 		kunmap(*src_page);
 		set_page_dirty(*dst_page);
 		kunmap(*dst_page);
 
 		while ((len -= l) != 0) {
-			l = min_t(int, len, PAGE_CACHE_SIZE);
+			l = min_t(int, len, PAGE_SIZE);
 			memcpy(kmap(*++dst_page), kmap(*++src_page), l);
 			kunmap(*src_page);
 			set_page_dirty(*dst_page);
@@ -161,12 +161,12 @@
 		do {
 			src_ptr = kmap(*src_page) + src;
 			dst_ptr = kmap(*dst_page) + dst;
-			if (PAGE_CACHE_SIZE - src < PAGE_CACHE_SIZE - dst) {
-				l = PAGE_CACHE_SIZE - src;
+			if (PAGE_SIZE - src < PAGE_SIZE - dst) {
+				l = PAGE_SIZE - src;
 				src = 0;
 				dst += l;
 			} else {
-				l = PAGE_CACHE_SIZE - dst;
+				l = PAGE_SIZE - dst;
 				src += l;
 				dst = 0;
 			}
@@ -195,11 +195,11 @@
 	dst += node->page_offset;
 	if (dst > src) {
 		src += len - 1;
-		src_page = node->page + (src >> PAGE_CACHE_SHIFT);
-		src = (src & ~PAGE_CACHE_MASK) + 1;
+		src_page = node->page + (src >> PAGE_SHIFT);
+		src = (src & ~PAGE_MASK) + 1;
 		dst += len - 1;
-		dst_page = node->page + (dst >> PAGE_CACHE_SHIFT);
-		dst = (dst & ~PAGE_CACHE_MASK) + 1;
+		dst_page = node->page + (dst >> PAGE_SHIFT);
+		dst = (dst & ~PAGE_MASK) + 1;
 
 		if (src == dst) {
 			while (src < len) {
@@ -208,7 +208,7 @@
 				set_page_dirty(*dst_page);
 				kunmap(*dst_page);
 				len -= src;
-				src = PAGE_CACHE_SIZE;
+				src = PAGE_SIZE;
 				src_page--;
 				dst_page--;
 			}
@@ -226,32 +226,32 @@
 				dst_ptr = kmap(*dst_page) + dst;
 				if (src < dst) {
 					l = src;
-					src = PAGE_CACHE_SIZE;
+					src = PAGE_SIZE;
 					dst -= l;
 				} else {
 					l = dst;
 					src -= l;
-					dst = PAGE_CACHE_SIZE;
+					dst = PAGE_SIZE;
 				}
 				l = min(len, l);
 				memmove(dst_ptr - l, src_ptr - l, l);
 				kunmap(*src_page);
 				set_page_dirty(*dst_page);
 				kunmap(*dst_page);
-				if (dst == PAGE_CACHE_SIZE)
+				if (dst == PAGE_SIZE)
 					dst_page--;
 				else
 					src_page--;
 			} while ((len -= l));
 		}
 	} else {
-		src_page = node->page + (src >> PAGE_CACHE_SHIFT);
-		src &= ~PAGE_CACHE_MASK;
-		dst_page = node->page + (dst >> PAGE_CACHE_SHIFT);
-		dst &= ~PAGE_CACHE_MASK;
+		src_page = node->page + (src >> PAGE_SHIFT);
+		src &= ~PAGE_MASK;
+		dst_page = node->page + (dst >> PAGE_SHIFT);
+		dst &= ~PAGE_MASK;
 
 		if (src == dst) {
-			l = min_t(int, len, PAGE_CACHE_SIZE - src);
+			l = min_t(int, len, PAGE_SIZE - src);
 			memmove(kmap(*dst_page) + src,
 				kmap(*src_page) + src, l);
 			kunmap(*src_page);
@@ -259,7 +259,7 @@
 			kunmap(*dst_page);
 
 			while ((len -= l) != 0) {
-				l = min_t(int, len, PAGE_CACHE_SIZE);
+				l = min_t(int, len, PAGE_SIZE);
 				memmove(kmap(*++dst_page),
 					kmap(*++src_page), l);
 				kunmap(*src_page);
@@ -272,13 +272,13 @@
 			do {
 				src_ptr = kmap(*src_page) + src;
 				dst_ptr = kmap(*dst_page) + dst;
-				if (PAGE_CACHE_SIZE - src <
-						PAGE_CACHE_SIZE - dst) {
-					l = PAGE_CACHE_SIZE - src;
+				if (PAGE_SIZE - src <
+						PAGE_SIZE - dst) {
+					l = PAGE_SIZE - src;
 					src = 0;
 					dst += l;
 				} else {
-					l = PAGE_CACHE_SIZE - dst;
+					l = PAGE_SIZE - dst;
 					src += l;
 					dst = 0;
 				}
@@ -444,14 +444,14 @@
 
 	mapping = tree->inode->i_mapping;
 	off = (loff_t)cnid << tree->node_size_shift;
-	block = off >> PAGE_CACHE_SHIFT;
-	node->page_offset = off & ~PAGE_CACHE_MASK;
+	block = off >> PAGE_SHIFT;
+	node->page_offset = off & ~PAGE_MASK;
 	for (i = 0; i < tree->pages_per_bnode; block++, i++) {
 		page = read_mapping_page(mapping, block, NULL);
 		if (IS_ERR(page))
 			goto fail;
 		if (PageError(page)) {
-			page_cache_release(page);
+			put_page(page);
 			goto fail;
 		}
 		node->page[i] = page;
@@ -569,7 +569,7 @@
 
 	for (i = 0; i < node->tree->pages_per_bnode; i++)
 		if (node->page[i])
-			page_cache_release(node->page[i]);
+			put_page(node->page[i]);
 	kfree(node);
 }
 
@@ -597,11 +597,11 @@
 
 	pagep = node->page;
 	memset(kmap(*pagep) + node->page_offset, 0,
-	       min_t(int, PAGE_CACHE_SIZE, tree->node_size));
+	       min_t(int, PAGE_SIZE, tree->node_size));
 	set_page_dirty(*pagep);
 	kunmap(*pagep);
 	for (i = 1; i < tree->pages_per_bnode; i++) {
-		memset(kmap(*++pagep), 0, PAGE_CACHE_SIZE);
+		memset(kmap(*++pagep), 0, PAGE_SIZE);
 		set_page_dirty(*pagep);
 		kunmap(*pagep);
 	}

diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 3345c75..d9d1a36 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c

@@ -236,15 +236,15 @@
 	tree->node_size_shift = ffs(size) - 1;
 
 	tree->pages_per_bnode =
-		(tree->node_size + PAGE_CACHE_SIZE - 1) >>
-		PAGE_CACHE_SHIFT;
+		(tree->node_size + PAGE_SIZE - 1) >>
+		PAGE_SHIFT;
 
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 	return tree;
 
  fail_page:
-	page_cache_release(page);
+	put_page(page);
  free_inode:
 	tree->inode->i_mapping->a_ops = &hfsplus_aops;
 	iput(tree->inode);
@@ -380,9 +380,9 @@
 	off = off16;
 
 	off += node->page_offset;
-	pagep = node->page + (off >> PAGE_CACHE_SHIFT);
+	pagep = node->page + (off >> PAGE_SHIFT);
 	data = kmap(*pagep);
-	off &= ~PAGE_CACHE_MASK;
+	off &= ~PAGE_MASK;
 	idx = 0;
 
 	for (;;) {
@@ -403,7 +403,7 @@
 					}
 				}
 			}
-			if (++off >= PAGE_CACHE_SIZE) {
+			if (++off >= PAGE_SIZE) {
 				kunmap(*pagep);
 				data = kmap(*++pagep);
 				off = 0;
@@ -426,9 +426,9 @@
 		len = hfs_brec_lenoff(node, 0, &off16);
 		off = off16;
 		off += node->page_offset;
-		pagep = node->page + (off >> PAGE_CACHE_SHIFT);
+		pagep = node->page + (off >> PAGE_SHIFT);
 		data = kmap(*pagep);
-		off &= ~PAGE_CACHE_MASK;
+		off &= ~PAGE_MASK;
 	}
 }
 
@@ -475,9 +475,9 @@
 		len = hfs_brec_lenoff(node, 0, &off);
 	}
 	off += node->page_offset + nidx / 8;
-	page = node->page[off >> PAGE_CACHE_SHIFT];
+	page = node->page[off >> PAGE_SHIFT];
 	data = kmap(page);
-	off &= ~PAGE_CACHE_MASK;
+	off &= ~PAGE_MASK;
 	m = 1 << (~nidx & 7);
 	byte = data[off];
 	if (!(byte & m)) {

diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 1a6394c..b28f398 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c

@@ -87,9 +87,9 @@
 	}
 	if (!tree)
 		return 0;
-	if (tree->node_size >= PAGE_CACHE_SIZE) {
+	if (tree->node_size >= PAGE_SIZE) {
 		nidx = page->index >>
-			(tree->node_size_shift - PAGE_CACHE_SHIFT);
+			(tree->node_size_shift - PAGE_SHIFT);
 		spin_lock(&tree->hash_lock);
 		node = hfs_bnode_findhash(tree, nidx);
 		if (!node)
@@ -103,8 +103,8 @@
 		spin_unlock(&tree->hash_lock);
 	} else {
 		nidx = page->index <<
-			(PAGE_CACHE_SHIFT - tree->node_size_shift);
-		i = 1 << (PAGE_CACHE_SHIFT - tree->node_size_shift);
+			(PAGE_SHIFT - tree->node_size_shift);
+		i = 1 << (PAGE_SHIFT - tree->node_size_shift);
 		spin_lock(&tree->hash_lock);
 		do {
 			node = hfs_bnode_findhash(tree, nidx++);

diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 5d54490..c359113 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c

@@ -438,7 +438,7 @@
 	err = -EFBIG;
 	last_fs_block = sbi->total_blocks - 1;
 	last_fs_page = (last_fs_block << sbi->alloc_blksz_shift) >>
-			PAGE_CACHE_SHIFT;
+			PAGE_SHIFT;
 
 	if ((last_fs_block > (sector_t)(~0ULL) >> (sbi->alloc_blksz_shift - 9)) ||
 	    (last_fs_page > (pgoff_t)(~0ULL))) {

diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index ab01530..70e445f 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c

@@ -220,7 +220,7 @@
 
 	index = 0;
 	written = 0;
-	for (; written < node_size; index++, written += PAGE_CACHE_SIZE) {
+	for (; written < node_size; index++, written += PAGE_SIZE) {
 		void *kaddr;
 
 		page = read_mapping_page(mapping, index, NULL);
@@ -231,11 +231,11 @@
 
 		kaddr = kmap_atomic(page);
 		memcpy(kaddr, buf + written,
-			min_t(size_t, PAGE_CACHE_SIZE, node_size - written));
+			min_t(size_t, PAGE_SIZE, node_size - written));
 		kunmap_atomic(kaddr);
 
 		set_page_dirty(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	hfsplus_mark_inode_dirty(attr_file, HFSPLUS_I_ATTR_DIRTY);

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index d1abbee..7016653 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c

@@ -410,12 +410,12 @@
 	struct inode *inode = mapping->host;
 	char *buffer;
 	loff_t base = page_offset(page);
-	int count = PAGE_CACHE_SIZE;
-	int end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+	int count = PAGE_SIZE;
+	int end_index = inode->i_size >> PAGE_SHIFT;
 	int err;
 
 	if (page->index >= end_index)
-		count = inode->i_size & (PAGE_CACHE_SIZE-1);
+		count = inode->i_size & (PAGE_SIZE-1);
 
 	buffer = kmap(page);
 
@@ -447,7 +447,7 @@
 
 	buffer = kmap(page);
 	bytes_read = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer,
-			PAGE_CACHE_SIZE);
+			PAGE_SIZE);
 	if (bytes_read < 0) {
 		ClearPageUptodate(page);
 		SetPageError(page);
@@ -455,7 +455,7 @@
 		goto out;
 	}
 
-	memset(buffer + bytes_read, 0, PAGE_CACHE_SIZE - bytes_read);
+	memset(buffer + bytes_read, 0, PAGE_SIZE - bytes_read);
 
 	ClearPageError(page);
 	SetPageUptodate(page);
@@ -471,7 +471,7 @@
 			      loff_t pos, unsigned len, unsigned flags,
 			      struct page **pagep, void **fsdata)
 {
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 
 	*pagep = grab_cache_page_write_begin(mapping, index, flags);
 	if (!*pagep)
@@ -485,14 +485,14 @@
 {
 	struct inode *inode = mapping->host;
 	void *buffer;
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned from = pos & (PAGE_SIZE - 1);
 	int err;
 
 	buffer = kmap(page);
 	err = write_file(FILE_HOSTFS_I(file)->fd, &pos, buffer + from, copied);
 	kunmap(page);
 
-	if (!PageUptodate(page) && err == PAGE_CACHE_SIZE)
+	if (!PageUptodate(page) && err == PAGE_SIZE)
 		SetPageUptodate(page);
 
 	/*
@@ -502,7 +502,7 @@
 	if (err > 0 && (pos > inode->i_size))
 		inode->i_size = pos;
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return err;
 }

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index e1f465a..4ea71eb 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c

@@ -213,12 +213,12 @@
 	int i, chunksize;
 
 	/* Find which 4k chunk and offset with in that chunk */
-	i = offset >> PAGE_CACHE_SHIFT;
-	offset = offset & ~PAGE_CACHE_MASK;
+	i = offset >> PAGE_SHIFT;
+	offset = offset & ~PAGE_MASK;
 
 	while (size) {
 		size_t n;
-		chunksize = PAGE_CACHE_SIZE;
+		chunksize = PAGE_SIZE;
 		if (offset)
 			chunksize -= offset;
 		if (chunksize > size)
@@ -237,7 +237,7 @@
 /*
  * Support for read() - Find the page attached to f_mapping and copy out the
  * data. Its *very* similar to do_generic_mapping_read(), we can't use that
- * since it has PAGE_CACHE_SIZE assumptions.
+ * since it has PAGE_SIZE assumptions.
  */
 static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
@@ -285,7 +285,7 @@
 			 * We have the page, copy it to user space buffer.
 			 */
 			copied = hugetlbfs_read_actor(page, offset, to, nr);
-			page_cache_release(page);
+			put_page(page);
 		}
 		offset += copied;
 		retval += copied;

diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index f311bf0..2e4e834 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c

@@ -26,7 +26,7 @@
 #include "zisofs.h"
 
 /* This should probably be global. */
-static char zisofs_sink_page[PAGE_CACHE_SIZE];
+static char zisofs_sink_page[PAGE_SIZE];
 
 /*
  * This contains the zlib memory allocation and the mutex for the
@@ -70,11 +70,11 @@
 		for ( i = 0 ; i < pcount ; i++ ) {
 			if (!pages[i])
 				continue;
-			memset(page_address(pages[i]), 0, PAGE_CACHE_SIZE);
+			memset(page_address(pages[i]), 0, PAGE_SIZE);
 			flush_dcache_page(pages[i]);
 			SetPageUptodate(pages[i]);
 		}
-		return ((loff_t)pcount) << PAGE_CACHE_SHIFT;
+		return ((loff_t)pcount) << PAGE_SHIFT;
 	}
 
 	/* Because zlib is not thread-safe, do all the I/O at the top. */
@@ -121,11 +121,11 @@
 			if (pages[curpage]) {
 				stream.next_out = page_address(pages[curpage])
 						+ poffset;
-				stream.avail_out = PAGE_CACHE_SIZE - poffset;
+				stream.avail_out = PAGE_SIZE - poffset;
 				poffset = 0;
 			} else {
 				stream.next_out = (void *)&zisofs_sink_page;
-				stream.avail_out = PAGE_CACHE_SIZE;
+				stream.avail_out = PAGE_SIZE;
 			}
 		}
 		if (!stream.avail_in) {
@@ -220,14 +220,14 @@
 	 * pages with the data we have anyway...
 	 */
 	start_off = page_offset(pages[full_page]);
-	end_off = min_t(loff_t, start_off + PAGE_CACHE_SIZE, inode->i_size);
+	end_off = min_t(loff_t, start_off + PAGE_SIZE, inode->i_size);
 
 	cstart_block = start_off >> zisofs_block_shift;
 	cend_block = (end_off + (1 << zisofs_block_shift) - 1)
 			>> zisofs_block_shift;
 
-	WARN_ON(start_off - (full_page << PAGE_CACHE_SHIFT) !=
-		((cstart_block << zisofs_block_shift) & PAGE_CACHE_MASK));
+	WARN_ON(start_off - (full_page << PAGE_SHIFT) !=
+		((cstart_block << zisofs_block_shift) & PAGE_MASK));
 
 	/* Find the pointer to this specific chunk */
 	/* Note: we're not using isonum_731() here because the data is known aligned */
@@ -260,10 +260,10 @@
 		ret = zisofs_uncompress_block(inode, block_start, block_end,
 					      pcount, pages, poffset, &err);
 		poffset += ret;
-		pages += poffset >> PAGE_CACHE_SHIFT;
-		pcount -= poffset >> PAGE_CACHE_SHIFT;
-		full_page -= poffset >> PAGE_CACHE_SHIFT;
-		poffset &= ~PAGE_CACHE_MASK;
+		pages += poffset >> PAGE_SHIFT;
+		pcount -= poffset >> PAGE_SHIFT;
+		full_page -= poffset >> PAGE_SHIFT;
+		poffset &= ~PAGE_MASK;
 
 		if (err) {
 			brelse(bh);
@@ -282,7 +282,7 @@
 
 	if (poffset && *pages) {
 		memset(page_address(*pages) + poffset, 0,
-		       PAGE_CACHE_SIZE - poffset);
+		       PAGE_SIZE - poffset);
 		flush_dcache_page(*pages);
 		SetPageUptodate(*pages);
 	}
@@ -302,12 +302,12 @@
 	int i, pcount, full_page;
 	unsigned int zisofs_block_shift = ISOFS_I(inode)->i_format_parm[1];
 	unsigned int zisofs_pages_per_cblock =
-		PAGE_CACHE_SHIFT <= zisofs_block_shift ?
-		(1 << (zisofs_block_shift - PAGE_CACHE_SHIFT)) : 0;
+		PAGE_SHIFT <= zisofs_block_shift ?
+		(1 << (zisofs_block_shift - PAGE_SHIFT)) : 0;
 	struct page *pages[max_t(unsigned, zisofs_pages_per_cblock, 1)];
 	pgoff_t index = page->index, end_index;
 
-	end_index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	end_index = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	/*
 	 * If this page is wholly outside i_size we just return zero;
 	 * do_generic_file_read() will handle this for us
@@ -318,7 +318,7 @@
 		return 0;
 	}
 
-	if (PAGE_CACHE_SHIFT <= zisofs_block_shift) {
+	if (PAGE_SHIFT <= zisofs_block_shift) {
 		/* We have already been given one page, this is the one
 		   we must do. */
 		full_page = index & (zisofs_pages_per_cblock - 1);
@@ -351,7 +351,7 @@
 			kunmap(pages[i]);
 			unlock_page(pages[i]);
 			if (i != full_page)
-				page_cache_release(pages[i]);
+				put_page(pages[i]);
 		}
 	}			
 

diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index bcd2d41..131dedc 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c

@@ -1021,7 +1021,7 @@
 		 * the page with useless information without generating any
 		 * I/O errors.
 		 */
-		if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) {
+		if (b_off > ((inode->i_size + PAGE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) {
 			printk(KERN_DEBUG "%s: block >= EOF (%lu, %llu)\n",
 				__func__, b_off,
 				(unsigned long long)inode->i_size);

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 517f2de..2ad98d6 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c

@@ -81,11 +81,11 @@
 	if (!trylock_page(page))
 		goto nope;
 
-	page_cache_get(page);
+	get_page(page);
 	__brelse(bh);
 	try_to_free_buffers(page);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return;
 
 nope:

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index de73a95..435f0b2 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c

@@ -2221,7 +2221,7 @@
 
 int jbd2_journal_blocks_per_page(struct inode *inode)
 {
-	return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+	return 1 << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
 }
 
 /*

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 01e4652d..67c1038 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c

@@ -2263,7 +2263,7 @@
 	struct buffer_head *head, *bh, *next;
 	unsigned int stop = offset + length;
 	unsigned int curr_off = 0;
-	int partial_page = (offset || length < PAGE_CACHE_SIZE);
+	int partial_page = (offset || length < PAGE_SIZE);
 	int may_free = 1;
 	int ret = 0;
 
@@ -2272,7 +2272,7 @@
 	if (!page_has_buffers(page))
 		return 0;
 
-	BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
+	BUG_ON(stop > PAGE_SIZE || stop < length);
 
 	/* We will potentially be playing with lists other than just the
 	 * data lists (especially for journaled data mode), so be

diff --git a/fs/jffs2/debug.c b/fs/jffs2/debug.c
index 1090eb6..9d26b1b9 100644
--- a/fs/jffs2/debug.c
+++ b/fs/jffs2/debug.c

@@ -95,15 +95,15 @@
 			   rather than mucking around with actually reading the node
 			   and checking the compression type, which is the real way
 			   to tell a hole node. */
-			if (frag->ofs & (PAGE_CACHE_SIZE-1) && frag_prev(frag)
-					&& frag_prev(frag)->size < PAGE_CACHE_SIZE && frag_prev(frag)->node) {
+			if (frag->ofs & (PAGE_SIZE-1) && frag_prev(frag)
+					&& frag_prev(frag)->size < PAGE_SIZE && frag_prev(frag)->node) {
 				JFFS2_ERROR("REF_PRISTINE node at 0x%08x had a previous non-hole frag in the same page. Tell dwmw2.\n",
 					ref_offset(fn->raw));
 				bitched = 1;
 			}
 
-			if ((frag->ofs+frag->size) & (PAGE_CACHE_SIZE-1) && frag_next(frag)
-					&& frag_next(frag)->size < PAGE_CACHE_SIZE && frag_next(frag)->node) {
+			if ((frag->ofs+frag->size) & (PAGE_SIZE-1) && frag_next(frag)
+					&& frag_next(frag)->size < PAGE_SIZE && frag_next(frag)->node) {
 				JFFS2_ERROR("REF_PRISTINE node at 0x%08x (%08x-%08x) had a following non-hole frag in the same page. Tell dwmw2.\n",
 				       ref_offset(fn->raw), frag->ofs, frag->ofs+frag->size);
 				bitched = 1;

diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index cad86ba..0e62dec 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c

@@ -87,14 +87,15 @@
 	int ret;
 
 	jffs2_dbg(2, "%s(): ino #%lu, page at offset 0x%lx\n",
-		  __func__, inode->i_ino, pg->index << PAGE_CACHE_SHIFT);
+		  __func__, inode->i_ino, pg->index << PAGE_SHIFT);
 
 	BUG_ON(!PageLocked(pg));
 
 	pg_buf = kmap(pg);
 	/* FIXME: Can kmap fail? */
 
-	ret = jffs2_read_inode_range(c, f, pg_buf, pg->index << PAGE_CACHE_SHIFT, PAGE_CACHE_SIZE);
+	ret = jffs2_read_inode_range(c, f, pg_buf, pg->index << PAGE_SHIFT,
+				     PAGE_SIZE);
 
 	if (ret) {
 		ClearPageUptodate(pg);
@@ -137,8 +138,8 @@
 	struct page *pg;
 	struct inode *inode = mapping->host;
 	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	uint32_t pageofs = index << PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
+	uint32_t pageofs = index << PAGE_SHIFT;
 	int ret = 0;
 
 	pg = grab_cache_page_write_begin(mapping, index, flags);
@@ -230,7 +231,7 @@
 
 out_page:
 	unlock_page(pg);
-	page_cache_release(pg);
+	put_page(pg);
 	return ret;
 }
 
@@ -245,14 +246,14 @@
 	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
 	struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
 	struct jffs2_raw_inode *ri;
-	unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned start = pos & (PAGE_SIZE - 1);
 	unsigned end = start + copied;
 	unsigned aligned_start = start & ~3;
 	int ret = 0;
 	uint32_t writtenlen = 0;
 
 	jffs2_dbg(1, "%s(): ino #%lu, page at 0x%lx, range %d-%d, flags %lx\n",
-		  __func__, inode->i_ino, pg->index << PAGE_CACHE_SHIFT,
+		  __func__, inode->i_ino, pg->index << PAGE_SHIFT,
 		  start, end, pg->flags);
 
 	/* We need to avoid deadlock with page_cache_read() in
@@ -261,7 +262,7 @@
 	   to re-lock it. */
 	BUG_ON(!PageUptodate(pg));
 
-	if (end == PAGE_CACHE_SIZE) {
+	if (end == PAGE_SIZE) {
 		/* When writing out the end of a page, write out the
 		   _whole_ page. This helps to reduce the number of
 		   nodes in files which have many short writes, like
@@ -275,7 +276,7 @@
 		jffs2_dbg(1, "%s(): Allocation of raw inode failed\n",
 			  __func__);
 		unlock_page(pg);
-		page_cache_release(pg);
+		put_page(pg);
 		return -ENOMEM;
 	}
 
@@ -292,7 +293,7 @@
 	kmap(pg);
 
 	ret = jffs2_write_inode_range(c, f, ri, page_address(pg) + aligned_start,
-				      (pg->index << PAGE_CACHE_SHIFT) + aligned_start,
+				      (pg->index << PAGE_SHIFT) + aligned_start,
 				      end - aligned_start, &writtenlen);
 
 	kunmap(pg);
@@ -329,6 +330,6 @@
 	jffs2_dbg(1, "%s() returning %d\n",
 		  __func__, writtenlen > 0 ? writtenlen : ret);
 	unlock_page(pg);
-	page_cache_release(pg);
+	put_page(pg);
 	return writtenlen > 0 ? writtenlen : ret;
 }

diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index bead25a..ae2ebb2 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c

@@ -586,8 +586,8 @@
 		goto out_root;
 
 	sb->s_maxbytes = 0xFFFFFFFF;
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = JFFS2_SUPER_MAGIC;
 	if (!(sb->s_flags & MS_RDONLY))
 		jffs2_start_garbage_collect_thread(c);
@@ -685,7 +685,7 @@
 	struct inode *inode = OFNI_EDONI_2SFFJ(f);
 	struct page *pg;
 
-	pg = read_cache_page(inode->i_mapping, offset >> PAGE_CACHE_SHIFT,
+	pg = read_cache_page(inode->i_mapping, offset >> PAGE_SHIFT,
 			     (void *)jffs2_do_readpage_unlock, inode);
 	if (IS_ERR(pg))
 		return (void *)pg;
@@ -701,7 +701,7 @@
 	struct page *pg = (void *)*priv;
 
 	kunmap(pg);
-	page_cache_release(pg);
+	put_page(pg);
 }
 
 static int jffs2_flash_setup(struct jffs2_sb_info *c) {

diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 95d5880..9ed0f26 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c

@@ -134,38 +134,60 @@
 	if (mutex_lock_interruptible(&c->alloc_sem))
 		return -EINTR;
 
+
 	for (;;) {
+		/* We can't start doing GC until we've finished checking
+		   the node CRCs etc. */
+		int bucket, want_ino;
+
 		spin_lock(&c->erase_completion_lock);
 		if (!c->unchecked_size)
 			break;
-
-		/* We can't start doing GC yet. We haven't finished checking
-		   the node CRCs etc. Do it now. */
-
-		/* checked_ino is protected by the alloc_sem */
-		if (c->checked_ino > c->highest_ino && xattr) {
-			pr_crit("Checked all inodes but still 0x%x bytes of unchecked space?\n",
-				c->unchecked_size);
-			jffs2_dbg_dump_block_lists_nolock(c);
-			spin_unlock(&c->erase_completion_lock);
-			mutex_unlock(&c->alloc_sem);
-			return -ENOSPC;
-		}
-
 		spin_unlock(&c->erase_completion_lock);
 
 		if (!xattr)
 			xattr = jffs2_verify_xattr(c);
 
 		spin_lock(&c->inocache_lock);
+		/* Instead of doing the inodes in numeric order, doing a lookup
+		 * in the hash for each possible number, just walk the hash
+		 * buckets of *existing* inodes. This means that we process
+		 * them out-of-order, but it can be a lot faster if there's
+		 * a sparse inode# space. Which there often is. */
+		want_ino = c->check_ino;
+		for (bucket = c->check_ino % c->inocache_hashsize ; bucket < c->inocache_hashsize; bucket++) {
+			for (ic = c->inocache_list[bucket]; ic; ic = ic->next) {
+				if (ic->ino < want_ino)
+					continue;
 
-		ic = jffs2_get_ino_cache(c, c->checked_ino++);
+				if (ic->state != INO_STATE_CHECKEDABSENT &&
+				    ic->state != INO_STATE_PRESENT)
+					goto got_next; /* with inocache_lock held */
 
-		if (!ic) {
-			spin_unlock(&c->inocache_lock);
-			continue;
+				jffs2_dbg(1, "Skipping ino #%u already checked\n",
+					  ic->ino);
+			}
+			want_ino = 0;
 		}
 
+		/* Point c->check_ino past the end of the last bucket. */
+		c->check_ino = ((c->highest_ino + c->inocache_hashsize + 1) &
+				~c->inocache_hashsize) - 1;
+
+		spin_unlock(&c->inocache_lock);
+
+		pr_crit("Checked all inodes but still 0x%x bytes of unchecked space?\n",
+			c->unchecked_size);
+		jffs2_dbg_dump_block_lists_nolock(c);
+		mutex_unlock(&c->alloc_sem);
+		return -ENOSPC;
+
+	got_next:
+		/* For next time round the loop, we want c->checked_ino to indicate
+		 * the *next* one we want to check. And since we're walking the
+		 * buckets rather than doing it sequentially, it's: */
+		c->check_ino = ic->ino + c->inocache_hashsize;
+
 		if (!ic->pino_nlink) {
 			jffs2_dbg(1, "Skipping check of ino #%d with nlink/pino zero\n",
 				  ic->ino);
@@ -176,8 +198,6 @@
 		switch(ic->state) {
 		case INO_STATE_CHECKEDABSENT:
 		case INO_STATE_PRESENT:
-			jffs2_dbg(1, "Skipping ino #%u already checked\n",
-				  ic->ino);
 			spin_unlock(&c->inocache_lock);
 			continue;
 
@@ -196,7 +216,7 @@
 				  ic->ino);
 			/* We need to come back again for the _same_ inode. We've
 			 made no progress in this case, but that should be OK */
-			c->checked_ino--;
+			c->check_ino = ic->ino;
 
 			mutex_unlock(&c->alloc_sem);
 			sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
@@ -532,7 +552,7 @@
 				goto upnout;
 		}
 		/* We found a datanode. Do the GC */
-		if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
+		if((start >> PAGE_SHIFT) < ((end-1) >> PAGE_SHIFT)) {
 			/* It crosses a page boundary. Therefore, it must be a hole. */
 			ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
 		} else {
@@ -1172,8 +1192,8 @@
 		struct jffs2_node_frag *frag;
 		uint32_t min, max;
 
-		min = start & ~(PAGE_CACHE_SIZE-1);
-		max = min + PAGE_CACHE_SIZE;
+		min = start & ~(PAGE_SIZE-1);
+		max = min + PAGE_SIZE;
 
 		frag = jffs2_lookup_node_frag(&f->fragtree, start);
 
@@ -1331,7 +1351,7 @@
 		cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
 		datalen = end - offset;
 
-		writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
+		writebuf = pg_ptr + (offset & (PAGE_SIZE -1));
 
 		comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
 

diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 046fee8..778275f 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h

@@ -49,7 +49,7 @@
 	struct mtd_info *mtd;
 
 	uint32_t highest_ino;
-	uint32_t checked_ino;
+	uint32_t check_ino;		/* *NEXT* inode to be checked */
 
 	unsigned int flags;
 

diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index 9a5449b..b86c78d 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c

@@ -90,7 +90,7 @@
 
 	/* If the last fragment starts at the RAM page boundary, it is
 	 * REF_PRISTINE irrespective of its size. */
-	if (frag->node && (frag->ofs & (PAGE_CACHE_SIZE - 1)) == 0) {
+	if (frag->node && (frag->ofs & (PAGE_SIZE - 1)) == 0) {
 		dbg_fragtree2("marking the last fragment 0x%08x-0x%08x REF_PRISTINE.\n",
 			frag->ofs, frag->ofs + frag->size);
 		frag->node->raw->flash_offset = ref_offset(frag->node->raw) | REF_PRISTINE;
@@ -237,7 +237,7 @@
 		   If so, both 'this' and the new node get marked REF_NORMAL so
 		   the GC can take a look.
 		*/
-		if (lastend && (lastend-1) >> PAGE_CACHE_SHIFT == newfrag->ofs >> PAGE_CACHE_SHIFT) {
+		if (lastend && (lastend-1) >> PAGE_SHIFT == newfrag->ofs >> PAGE_SHIFT) {
 			if (this->node)
 				mark_ref_normal(this->node->raw);
 			mark_ref_normal(newfrag->node->raw);
@@ -382,7 +382,7 @@
 
 	/* If we now share a page with other nodes, mark either previous
 	   or next node REF_NORMAL, as appropriate.  */
-	if (newfrag->ofs & (PAGE_CACHE_SIZE-1)) {
+	if (newfrag->ofs & (PAGE_SIZE-1)) {
 		struct jffs2_node_frag *prev = frag_prev(newfrag);
 
 		mark_ref_normal(fn->raw);
@@ -391,7 +391,7 @@
 			mark_ref_normal(prev->node->raw);
 	}
 
-	if ((newfrag->ofs+newfrag->size) & (PAGE_CACHE_SIZE-1)) {
+	if ((newfrag->ofs+newfrag->size) & (PAGE_SIZE-1)) {
 		struct jffs2_node_frag *next = frag_next(newfrag);
 
 		if (next) {

diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index b6bd4af..cda0774 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c

@@ -846,8 +846,8 @@
 		return 1;
 
 	if (c->unchecked_size) {
-		jffs2_dbg(1, "jffs2_thread_should_wake(): unchecked_size %d, checked_ino #%d\n",
-			  c->unchecked_size, c->checked_ino);
+		jffs2_dbg(1, "jffs2_thread_should_wake(): unchecked_size %d, check_ino #%d\n",
+			  c->unchecked_size, c->check_ino);
 		return 1;
 	}
 

diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 5a3da3f..b25d28a 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c

@@ -1183,22 +1183,20 @@
 
 int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
 {
-	struct nand_ecclayout *oinfo = c->mtd->ecclayout;
-
 	if (!c->mtd->oobsize)
 		return 0;
 
 	/* Cleanmarker is out-of-band, so inline size zero */
 	c->cleanmarker_size = 0;
 
-	if (!oinfo || oinfo->oobavail == 0) {
+	if (c->mtd->oobavail == 0) {
 		pr_err("inconsistent device description\n");
 		return -EINVAL;
 	}
 
 	jffs2_dbg(1, "using OOB on NAND\n");
 
-	c->oobavail = oinfo->oobavail;
+	c->oobavail = c->mtd->oobavail;
 
 	/* Initialise write buffer */
 	init_rwsem(&c->wbuf_sem);

diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c
index b634de4c..7fb187a 100644
--- a/fs/jffs2/write.c
+++ b/fs/jffs2/write.c

@@ -172,8 +172,8 @@
 	   beginning of a page and runs to the end of the file, or if
 	   it's a hole node, mark it REF_PRISTINE, else REF_NORMAL.
 	*/
-	if ((je32_to_cpu(ri->dsize) >= PAGE_CACHE_SIZE) ||
-	    ( ((je32_to_cpu(ri->offset)&(PAGE_CACHE_SIZE-1))==0) &&
+	if ((je32_to_cpu(ri->dsize) >= PAGE_SIZE) ||
+	    ( ((je32_to_cpu(ri->offset)&(PAGE_SIZE-1))==0) &&
 	      (je32_to_cpu(ri->dsize)+je32_to_cpu(ri->offset) ==  je32_to_cpu(ri->isize)))) {
 		flash_ofs |= REF_PRISTINE;
 	} else {
@@ -366,7 +366,8 @@
 			break;
 		}
 		mutex_lock(&f->sem);
-		datalen = min_t(uint32_t, writelen, PAGE_CACHE_SIZE - (offset & (PAGE_CACHE_SIZE-1)));
+		datalen = min_t(uint32_t, writelen,
+				PAGE_SIZE - (offset & (PAGE_SIZE-1)));
 		cdatalen = min_t(uint32_t, alloclen - sizeof(*ri), datalen);
 
 		comprtype = jffs2_compress(c, f, buf, &comprbuf, &datalen, &cdatalen);

diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index a3eb316..b60e015 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c

@@ -80,7 +80,7 @@
 static struct kmem_cache *metapage_cache;
 static mempool_t *metapage_mempool;
 
-#define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE)
+#define MPS_PER_PAGE (PAGE_SIZE >> L2PSIZE)
 
 #if MPS_PER_PAGE > 1
 
@@ -316,7 +316,7 @@
 	struct metapage *mp;
 	unsigned int offset;
 
-	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+	for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) {
 		mp = page_to_mp(page, offset);
 		if (mp && test_bit(META_io, &mp->flag)) {
 			if (mp->lsn)
@@ -366,12 +366,12 @@
 	int bad_blocks = 0;
 
 	page_start = (sector_t)page->index <<
-		     (PAGE_CACHE_SHIFT - inode->i_blkbits);
+		     (PAGE_SHIFT - inode->i_blkbits);
 	BUG_ON(!PageLocked(page));
 	BUG_ON(PageWriteback(page));
 	set_page_writeback(page);
 
-	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+	for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) {
 		mp = page_to_mp(page, offset);
 
 		if (!mp || !test_bit(META_dirty, &mp->flag))
@@ -416,7 +416,7 @@
 			bio = NULL;
 		} else
 			inc_io(page);
-		xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits;
+		xlen = (PAGE_SIZE - offset) >> inode->i_blkbits;
 		pblock = metapage_get_blocks(inode, lblock, &xlen);
 		if (!pblock) {
 			printk(KERN_ERR "JFS: metapage_get_blocks failed\n");
@@ -485,7 +485,7 @@
 	struct inode *inode = page->mapping->host;
 	struct bio *bio = NULL;
 	int block_offset;
-	int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits;
+	int blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
 	sector_t page_start;	/* address of page in fs blocks */
 	sector_t pblock;
 	int xlen;
@@ -494,7 +494,7 @@
 
 	BUG_ON(!PageLocked(page));
 	page_start = (sector_t)page->index <<
-		     (PAGE_CACHE_SHIFT - inode->i_blkbits);
+		     (PAGE_SHIFT - inode->i_blkbits);
 
 	block_offset = 0;
 	while (block_offset < blocks_per_page) {
@@ -542,7 +542,7 @@
 	int ret = 1;
 	int offset;
 
-	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+	for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) {
 		mp = page_to_mp(page, offset);
 
 		if (!mp)
@@ -568,7 +568,7 @@
 static void metapage_invalidatepage(struct page *page, unsigned int offset,
 				    unsigned int length)
 {
-	BUG_ON(offset || length < PAGE_CACHE_SIZE);
+	BUG_ON(offset || length < PAGE_SIZE);
 
 	BUG_ON(PageWriteback(page));
 
@@ -599,10 +599,10 @@
 		 inode->i_ino, lblock, absolute);
 
 	l2bsize = inode->i_blkbits;
-	l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
+	l2BlocksPerPage = PAGE_SHIFT - l2bsize;
 	page_index = lblock >> l2BlocksPerPage;
 	page_offset = (lblock - (page_index << l2BlocksPerPage)) << l2bsize;
-	if ((page_offset + size) > PAGE_CACHE_SIZE) {
+	if ((page_offset + size) > PAGE_SIZE) {
 		jfs_err("MetaData crosses page boundary!!");
 		jfs_err("lblock = %lx, size  = %d", lblock, size);
 		dump_stack();
@@ -621,7 +621,7 @@
 		mapping = inode->i_mapping;
 	}
 
-	if (new && (PSIZE == PAGE_CACHE_SIZE)) {
+	if (new && (PSIZE == PAGE_SIZE)) {
 		page = grab_cache_page(mapping, page_index);
 		if (!page) {
 			jfs_err("grab_cache_page failed!");
@@ -693,7 +693,7 @@
 void grab_metapage(struct metapage * mp)
 {
 	jfs_info("grab_metapage: mp = 0x%p", mp);
-	page_cache_get(mp->page);
+	get_page(mp->page);
 	lock_page(mp->page);
 	mp->count++;
 	lock_metapage(mp);
@@ -706,12 +706,12 @@
 	jfs_info("force_metapage: mp = 0x%p", mp);
 	set_bit(META_forcewrite, &mp->flag);
 	clear_bit(META_sync, &mp->flag);
-	page_cache_get(page);
+	get_page(page);
 	lock_page(page);
 	set_page_dirty(page);
 	write_one_page(page, 1);
 	clear_bit(META_forcewrite, &mp->flag);
-	page_cache_release(page);
+	put_page(page);
 }
 
 void hold_metapage(struct metapage *mp)
@@ -726,7 +726,7 @@
 		unlock_page(mp->page);
 		return;
 	}
-	page_cache_get(mp->page);
+	get_page(mp->page);
 	mp->count++;
 	lock_metapage(mp);
 	unlock_page(mp->page);
@@ -746,7 +746,7 @@
 	assert(mp->count);
 	if (--mp->count || mp->nohomeok) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		return;
 	}
 
@@ -764,13 +764,13 @@
 	drop_metapage(page, mp);
 
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 void __invalidate_metapages(struct inode *ip, s64 addr, int len)
 {
 	sector_t lblock;
-	int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
+	int l2BlocksPerPage = PAGE_SHIFT - ip->i_blkbits;
 	int BlocksPerPage = 1 << l2BlocksPerPage;
 	/* All callers are interested in block device's mapping */
 	struct address_space *mapping =
@@ -788,7 +788,7 @@
 		page = find_lock_page(mapping, lblock >> l2BlocksPerPage);
 		if (!page)
 			continue;
-		for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+		for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) {
 			mp = page_to_mp(page, offset);
 			if (!mp)
 				continue;
@@ -803,7 +803,7 @@
 				remove_from_logsync(mp);
 		}
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 }
 

diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index 337e9e5..a869fb4 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h

@@ -106,7 +106,7 @@
 	lock_page(page);
 	if (!mp->nohomeok++) {
 		mark_metapage_dirty(mp);
-		page_cache_get(page);
+		get_page(page);
 		wait_on_page_writeback(page);
 	}
 	unlock_page(page);
@@ -128,7 +128,7 @@
 static inline void _metapage_homeok(struct metapage *mp)
 {
 	if (!--mp->nohomeok)
-		page_cache_release(mp->page);
+		put_page(mp->page);
 }
 
 static inline void metapage_homeok(struct metapage *mp)

diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 4f5d85b..78d5991 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c

@@ -596,7 +596,7 @@
 	 * Page cache is indexed by long.
 	 * I would use MAX_LFS_FILESIZE, but it's only half as big
 	 */
-	sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1,
+	sb->s_maxbytes = min(((u64) PAGE_SIZE << 32) - 1,
 			     (u64)sb->s_maxbytes);
 #endif
 	sb->s_time_gran = 1;

diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index b67dbcc..f73541f 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c

@@ -138,8 +138,8 @@
 	struct dentry *root;
 
 	info->sb = sb;
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = magic;
 	sb->s_op = &kernfs_sops;
 	sb->s_time_gran = 1;

diff --git a/fs/libfs.c b/fs/libfs.c
index 0ca80b2..f3fa82c 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c

@@ -25,7 +25,7 @@
 {
 	struct inode *inode = d_inode(dentry);
 	generic_fillattr(inode, stat);
-	stat->blocks = inode->i_mapping->nrpages << (PAGE_CACHE_SHIFT - 9);
+	stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9);
 	return 0;
 }
 EXPORT_SYMBOL(simple_getattr);
@@ -33,7 +33,7 @@
 int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	buf->f_type = dentry->d_sb->s_magic;
-	buf->f_bsize = PAGE_CACHE_SIZE;
+	buf->f_bsize = PAGE_SIZE;
 	buf->f_namelen = NAME_MAX;
 	return 0;
 }
@@ -395,7 +395,7 @@
 	struct page *page;
 	pgoff_t index;
 
-	index = pos >> PAGE_CACHE_SHIFT;
+	index = pos >> PAGE_SHIFT;
 
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
@@ -403,10 +403,10 @@
 
 	*pagep = page;
 
-	if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
-		unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	if (!PageUptodate(page) && (len != PAGE_SIZE)) {
+		unsigned from = pos & (PAGE_SIZE - 1);
 
-		zero_user_segments(page, 0, from, from + len, PAGE_CACHE_SIZE);
+		zero_user_segments(page, 0, from, from + len, PAGE_SIZE);
 	}
 	return 0;
 }
@@ -442,7 +442,7 @@
 
 	/* zero the stale part of the page if we did a short copy */
 	if (copied < len) {
-		unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+		unsigned from = pos & (PAGE_SIZE - 1);
 
 		zero_user(page, from + copied, len - copied);
 	}
@@ -458,7 +458,7 @@
 
 	set_page_dirty(page);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return copied;
 }
@@ -477,8 +477,8 @@
 	struct dentry *dentry;
 	int i;
 
-	s->s_blocksize = PAGE_CACHE_SIZE;
-	s->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	s->s_blocksize = PAGE_SIZE;
+	s->s_blocksize_bits = PAGE_SHIFT;
 	s->s_magic = magic;
 	s->s_op = &simple_super_operations;
 	s->s_time_gran = 1;
@@ -994,12 +994,12 @@
 {
 	u64 last_fs_block = num_blocks - 1;
 	u64 last_fs_page =
-		last_fs_block >> (PAGE_CACHE_SHIFT - blocksize_bits);
+		last_fs_block >> (PAGE_SHIFT - blocksize_bits);
 
 	if (unlikely(num_blocks == 0))
 		return 0;
 
-	if ((blocksize_bits < 9) || (blocksize_bits > PAGE_CACHE_SHIFT))
+	if ((blocksize_bits < 9) || (blocksize_bits > PAGE_SHIFT))
 		return -EINVAL;
 
 	if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) ||

diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index a709d80..cc26f8f 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c

@@ -64,7 +64,7 @@
 
 	bio_for_each_segment_all(bvec, bio, i) {
 		end_page_writeback(bvec->bv_page);
-		page_cache_release(bvec->bv_page);
+		put_page(bvec->bv_page);
 	}
 	bio_put(bio);
 	if (atomic_dec_and_test(&super->s_pending_writes))

diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index 9c50144..b76a62b 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c

@@ -46,9 +46,9 @@
 
 	BUG_ON((ofs >= mtd->size) || (len > mtd->size - ofs));
 	BUG_ON(ofs != (ofs >> super->s_writeshift) << super->s_writeshift);
-	BUG_ON(len > PAGE_CACHE_SIZE);
-	page_start = ofs & PAGE_CACHE_MASK;
-	page_end = PAGE_CACHE_ALIGN(ofs + len) - 1;
+	BUG_ON(len > PAGE_SIZE);
+	page_start = ofs & PAGE_MASK;
+	page_end = PAGE_ALIGN(ofs + len) - 1;
 	ret = mtd_write(mtd, ofs, len, &retlen, buf);
 	if (ret || (retlen != len))
 		return -EIO;
@@ -82,7 +82,7 @@
 		if (!page)
 			continue;
 		memset(page_address(page), 0xFF, PAGE_SIZE);
-		page_cache_release(page);
+		put_page(page);
 	}
 	return 0;
 }
@@ -195,7 +195,7 @@
 		err = loffs_mtd_write(sb, page->index << PAGE_SHIFT, PAGE_SIZE,
 					page_address(page));
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		if (err)
 			return err;
 	}

diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 542468e..ddbed2b 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c

@@ -183,7 +183,7 @@
 		if (name->len != be16_to_cpu(dd->namelen) ||
 				memcmp(name->name, dd->name, name->len)) {
 			kunmap_atomic(dd);
-			page_cache_release(page);
+			put_page(page);
 			continue;
 		}
 
@@ -238,7 +238,7 @@
 		return PTR_ERR(page);
 	}
 	index = page->index;
-	page_cache_release(page);
+	put_page(page);
 
 	mutex_lock(&super->s_dirop_mutex);
 	logfs_add_transaction(dir, ta);
@@ -316,7 +316,7 @@
 				be16_to_cpu(dd->namelen),
 				be64_to_cpu(dd->ino), dd->type);
 		kunmap(page);
-		page_cache_release(page);
+		put_page(page);
 		if (full)
 			break;
 	}
@@ -349,7 +349,7 @@
 	dd = kmap_atomic(page);
 	ino = be64_to_cpu(dd->ino);
 	kunmap_atomic(dd);
-	page_cache_release(page);
+	put_page(page);
 
 	inode = logfs_iget(dir->i_sb, ino);
 	if (IS_ERR(inode))
@@ -392,7 +392,7 @@
 
 		err = logfs_write_buf(dir, page, WF_LOCK);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		if (!err)
 			grow_dir(dir, index);
 		return err;
@@ -561,7 +561,7 @@
 	map = kmap_atomic(page);
 	memcpy(dd, map, sizeof(*dd));
 	kunmap_atomic(map);
-	page_cache_release(page);
+	put_page(page);
 	return 0;
 }
 

diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 61eaeb1..f01ddfb 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c

@@ -15,21 +15,21 @@
 {
 	struct inode *inode = mapping->host;
 	struct page *page;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
 	*pagep = page;
 
-	if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
+	if ((len == PAGE_SIZE) || PageUptodate(page))
 		return 0;
-	if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
-		unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+	if ((pos & PAGE_MASK) >= i_size_read(inode)) {
+		unsigned start = pos & (PAGE_SIZE - 1);
 		unsigned end = start + len;
 
 		/* Reading beyond i_size is simple: memset to zero */
-		zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
+		zero_user_segments(page, 0, start, end, PAGE_SIZE);
 		return 0;
 	}
 	return logfs_readpage_nolock(page);
@@ -41,11 +41,11 @@
 {
 	struct inode *inode = mapping->host;
 	pgoff_t index = page->index;
-	unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned start = pos & (PAGE_SIZE - 1);
 	unsigned end = start + copied;
 	int ret = 0;
 
-	BUG_ON(PAGE_CACHE_SIZE != inode->i_sb->s_blocksize);
+	BUG_ON(PAGE_SIZE != inode->i_sb->s_blocksize);
 	BUG_ON(page->index > I3_BLOCKS);
 
 	if (copied < len) {
@@ -61,8 +61,8 @@
 	if (copied == 0)
 		goto out; /* FIXME: do we need to update inode? */
 
-	if (i_size_read(inode) < (index << PAGE_CACHE_SHIFT) + end) {
-		i_size_write(inode, (index << PAGE_CACHE_SHIFT) + end);
+	if (i_size_read(inode) < (index << PAGE_SHIFT) + end) {
+		i_size_write(inode, (index << PAGE_SHIFT) + end);
 		mark_inode_dirty_sync(inode);
 	}
 
@@ -75,7 +75,7 @@
 	}
 out:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return ret ? ret : copied;
 }
 
@@ -118,7 +118,7 @@
 {
 	struct inode *inode = page->mapping->host;
 	loff_t i_size = i_size_read(inode);
-	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+	pgoff_t end_index = i_size >> PAGE_SHIFT;
 	unsigned offset;
 	u64 bix;
 	level_t level;
@@ -142,7 +142,7 @@
 		return __logfs_writepage(page);
 
 	 /* Is the page fully outside i_size? (truncate in progress) */
-	offset = i_size & (PAGE_CACHE_SIZE-1);
+	offset = i_size & (PAGE_SIZE-1);
 	if (bix > end_index || offset == 0) {
 		unlock_page(page);
 		return 0; /* don't care */
@@ -155,7 +155,7 @@
 	 * the  page size, the remaining memory is zeroed when mapped, and
 	 * writes to that region are not written out to the file."
 	 */
-	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+	zero_user_segment(page, offset, PAGE_SIZE);
 	return __logfs_writepage(page);
 }
 

diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 20973c9..3fb8c6d 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c

@@ -281,7 +281,7 @@
 static void logfs_put_read_page(struct page *page)
 {
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 static void logfs_lock_write_page(struct page *page)
@@ -323,7 +323,7 @@
 			return NULL;
 		err = add_to_page_cache_lru(page, mapping, index, GFP_NOFS);
 		if (unlikely(err)) {
-			page_cache_release(page);
+			put_page(page);
 			if (err == -EEXIST)
 				goto repeat;
 			return NULL;
@@ -342,7 +342,7 @@
 static void logfs_put_write_page(struct page *page)
 {
 	logfs_unlock_write_page(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 static struct page *logfs_get_page(struct inode *inode, u64 bix, level_t level,
@@ -562,7 +562,7 @@
 
 	if (PagePrivate(page)) {
 		ClearPagePrivate(page);
-		page_cache_release(page);
+		put_page(page);
 		set_page_private(page, 0);
 	}
 	__free_block(sb, block);
@@ -655,7 +655,7 @@
 	block->page = page;
 
 	SetPagePrivate(page);
-	page_cache_get(page);
+	get_page(page);
 	set_page_private(page, (unsigned long) block);
 
 	block->ops = &indirect_block_ops;
@@ -709,7 +709,7 @@
 
 static int logfs_read_empty(struct page *page)
 {
-	zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+	zero_user_segment(page, 0, PAGE_SIZE);
 	return 0;
 }
 
@@ -1660,7 +1660,7 @@
 	if (err)
 		return err;
 
-	zero_user_segment(page, size - pageofs, PAGE_CACHE_SIZE);
+	zero_user_segment(page, size - pageofs, PAGE_SIZE);
 	return logfs_segment_write(inode, page, shadow);
 }
 
@@ -1919,7 +1919,7 @@
 	block->page = NULL;
 	if (PagePrivate(page)) {
 		ClearPagePrivate(page);
-		page_cache_release(page);
+		put_page(page);
 		set_page_private(page, 0);
 	}
 }
@@ -1940,7 +1940,7 @@
 
 	if (!PagePrivate(page)) {
 		SetPagePrivate(page);
-		page_cache_get(page);
+		get_page(page);
 		set_page_private(page, (unsigned long) block);
 	}
 
@@ -1971,7 +1971,7 @@
 	logfs_disk_to_inode(di, inode);
 	kunmap_atomic(di);
 	move_page_to_inode(inode, page);
-	page_cache_release(page);
+	put_page(page);
 	return 0;
 }
 

diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index d270e4b..1efd605 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c

@@ -90,9 +90,9 @@
 
 		if (!PagePrivate(page)) {
 			SetPagePrivate(page);
-			page_cache_get(page);
+			get_page(page);
 		}
-		page_cache_release(page);
+		put_page(page);
 
 		buf += copylen;
 		len -= copylen;
@@ -117,9 +117,9 @@
 		memset(page_address(page) + offset, 0xff, len);
 		if (!PagePrivate(page)) {
 			SetPagePrivate(page);
-			page_cache_get(page);
+			get_page(page);
 		}
-		page_cache_release(page);
+		put_page(page);
 	}
 }
 
@@ -129,20 +129,20 @@
 	struct logfs_super *super = logfs_super(sb);
 	u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
 	u32 len = super->s_segsize - area->a_used_bytes;
-	pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT;
-	pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT;
+	pgoff_t index = PAGE_ALIGN(ofs) >> PAGE_SHIFT;
+	pgoff_t no_indizes = len >> PAGE_SHIFT;
 	struct page *page;
 
 	while (no_indizes) {
 		page = get_mapping_page(sb, index, 0);
 		BUG_ON(!page); /* FIXME: reserve a pool */
 		SetPageUptodate(page);
-		memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
+		memset(page_address(page), 0xff, PAGE_SIZE);
 		if (!PagePrivate(page)) {
 			SetPagePrivate(page);
-			page_cache_get(page);
+			get_page(page);
 		}
-		page_cache_release(page);
+		put_page(page);
 		index++;
 		no_indizes--;
 	}
@@ -411,7 +411,7 @@
 		if (IS_ERR(page))
 			return PTR_ERR(page);
 		memcpy(buf, page_address(page) + offset, copylen);
-		page_cache_release(page);
+		put_page(page);
 
 		buf += copylen;
 		len -= copylen;
@@ -499,7 +499,7 @@
 
 	if (!PagePrivate(page)) {
 		SetPagePrivate(page);
-		page_cache_get(page);
+		get_page(page);
 		set_page_private(page, (unsigned long) block);
 	}
 	block->ops = &indirect_block_ops;
@@ -554,7 +554,7 @@
 
 	if (PagePrivate(page)) {
 		ClearPagePrivate(page);
-		page_cache_release(page);
+		put_page(page);
 		set_page_private(page, 0);
 	}
 	block->ops = &btree_block_ops;
@@ -723,9 +723,9 @@
 			continue;
 		if (PagePrivate(page)) {
 			ClearPagePrivate(page);
-			page_cache_release(page);
+			put_page(page);
 		}
-		page_cache_release(page);
+		put_page(page);
 	}
 }
 

diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 5436029..5751082 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c

@@ -48,7 +48,7 @@
 	if (page == emergency_page)
 		mutex_unlock(&emergency_mutex);
 	else
-		page_cache_release(page);
+		put_page(page);
 }
 
 static void dump_segfile(struct super_block *sb)
@@ -206,7 +206,7 @@
 	logfs_set_segment_erased(sb, segno, ec, 0);
 	logfs_write_ds(sb, ds, segno, ec);
 	err = super->s_devops->write_sb(sb, page);
-	page_cache_release(page);
+	put_page(page);
 	return err;
 }
 
@@ -366,24 +366,24 @@
 		return NULL;
 	last = super->s_devops->find_last_sb(sb, &super->s_sb_ofs[1]);
 	if (!last || IS_ERR(last)) {
-		page_cache_release(first);
+		put_page(first);
 		return NULL;
 	}
 
 	if (!logfs_check_ds(page_address(first))) {
-		page_cache_release(last);
+		put_page(last);
 		return first;
 	}
 
 	/* First one didn't work, try the second superblock */
 	if (!logfs_check_ds(page_address(last))) {
-		page_cache_release(first);
+		put_page(first);
 		return last;
 	}
 
 	/* Neither worked, sorry folks */
-	page_cache_release(first);
-	page_cache_release(last);
+	put_page(first);
+	put_page(last);
 	return NULL;
 }
 
@@ -425,7 +425,7 @@
 	super->s_data_levels = ds->ds_data_levels;
 	super->s_total_levels = super->s_ifile_levels + super->s_iblock_levels
 		+ super->s_data_levels;
-	page_cache_release(page);
+	put_page(page);
 	return 0;
 }
 

diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index d19ac25..33957c0 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c

@@ -28,7 +28,7 @@
 static inline void dir_put_page(struct page *page)
 {
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 /*
@@ -38,10 +38,10 @@
 static unsigned
 minix_last_byte(struct inode *inode, unsigned long page_nr)
 {
-	unsigned last_byte = PAGE_CACHE_SIZE;
+	unsigned last_byte = PAGE_SIZE;
 
-	if (page_nr == (inode->i_size >> PAGE_CACHE_SHIFT))
-		last_byte = inode->i_size & (PAGE_CACHE_SIZE - 1);
+	if (page_nr == (inode->i_size >> PAGE_SHIFT))
+		last_byte = inode->i_size & (PAGE_SIZE - 1);
 	return last_byte;
 }
 
@@ -92,8 +92,8 @@
 	if (pos >= inode->i_size)
 		return 0;
 
-	offset = pos & ~PAGE_CACHE_MASK;
-	n = pos >> PAGE_CACHE_SHIFT;
+	offset = pos & ~PAGE_MASK;
+	n = pos >> PAGE_SHIFT;
 
 	for ( ; n < npages; n++, offset = 0) {
 		char *p, *kaddr, *limit;
@@ -229,7 +229,7 @@
 		lock_page(page);
 		kaddr = (char*)page_address(page);
 		dir_end = kaddr + minix_last_byte(dir, n);
-		limit = kaddr + PAGE_CACHE_SIZE - sbi->s_dirsize;
+		limit = kaddr + PAGE_SIZE - sbi->s_dirsize;
 		for (p = kaddr; p <= limit; p = minix_next_entry(p, sbi)) {
 			de = (minix_dirent *)p;
 			de3 = (minix3_dirent *)p;
@@ -327,7 +327,7 @@
 	}
 
 	kaddr = kmap_atomic(page);
-	memset(kaddr, 0, PAGE_CACHE_SIZE);
+	memset(kaddr, 0, PAGE_SIZE);
 
 	if (sbi->s_version == MINIX_V3) {
 		minix3_dirent *de3 = (minix3_dirent *)kaddr;
@@ -350,7 +350,7 @@
 
 	err = dir_commit_chunk(page, 0, 2 * sbi->s_dirsize);
 fail:
-	page_cache_release(page);
+	put_page(page);
 	return err;
 }
 

diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index a795a11..2887d1d 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c

@@ -243,11 +243,11 @@
 out_dir:
 	if (dir_de) {
 		kunmap(dir_page);
-		page_cache_release(dir_page);
+		put_page(dir_page);
 	}
 out_old:
 	kunmap(old_page);
-	page_cache_release(old_page);
+	put_page(old_page);
 out:
 	return err;
 }

diff --git a/fs/mpage.c b/fs/mpage.c
index 6bd9fd9..eedc644 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c

@@ -107,7 +107,7 @@
 		 * don't make any buffers if there is only one buffer on
 		 * the page and the page just needs to be set up to date
 		 */
-		if (inode->i_blkbits == PAGE_CACHE_SHIFT && 
+		if (inode->i_blkbits == PAGE_SHIFT &&
 		    buffer_uptodate(bh)) {
 			SetPageUptodate(page);    
 			return;
@@ -145,7 +145,7 @@
 {
 	struct inode *inode = page->mapping->host;
 	const unsigned blkbits = inode->i_blkbits;
-	const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
+	const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
 	const unsigned blocksize = 1 << blkbits;
 	sector_t block_in_file;
 	sector_t last_block;
@@ -162,7 +162,7 @@
 	if (page_has_buffers(page))
 		goto confused;
 
-	block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
+	block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
 	last_block = block_in_file + nr_pages * blocks_per_page;
 	last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
 	if (last_block > last_block_in_file)
@@ -249,7 +249,7 @@
 	}
 
 	if (first_hole != blocks_per_page) {
-		zero_user_segment(page, first_hole << blkbits, PAGE_CACHE_SIZE);
+		zero_user_segment(page, first_hole << blkbits, PAGE_SIZE);
 		if (first_hole == 0) {
 			SetPageUptodate(page);
 			unlock_page(page);
@@ -331,7 +331,7 @@
  *
  * then this code just gives up and calls the buffer_head-based read function.
  * It does handle a page which has holes at the end - that is a common case:
- * the end-of-file on blocksize < PAGE_CACHE_SIZE setups.
+ * the end-of-file on blocksize < PAGE_SIZE setups.
  *
  * BH_Boundary explanation:
  *
@@ -380,7 +380,7 @@
 					&first_logical_block,
 					get_block, gfp);
 		}
-		page_cache_release(page);
+		put_page(page);
 	}
 	BUG_ON(!list_empty(pages));
 	if (bio)
@@ -472,7 +472,7 @@
 	struct inode *inode = page->mapping->host;
 	const unsigned blkbits = inode->i_blkbits;
 	unsigned long end_index;
-	const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
+	const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
 	sector_t last_block;
 	sector_t block_in_file;
 	sector_t blocks[MAX_BUF_PER_PAGE];
@@ -542,7 +542,7 @@
 	 * The page has no buffers: map it to disk
 	 */
 	BUG_ON(!PageUptodate(page));
-	block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
+	block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
 	last_block = (i_size - 1) >> blkbits;
 	map_bh.b_page = page;
 	for (page_block = 0; page_block < blocks_per_page; ) {
@@ -574,7 +574,7 @@
 	first_unmapped = page_block;
 
 page_is_mapped:
-	end_index = i_size >> PAGE_CACHE_SHIFT;
+	end_index = i_size >> PAGE_SHIFT;
 	if (page->index >= end_index) {
 		/*
 		 * The page straddles i_size.  It must be zeroed out on each
@@ -584,11 +584,11 @@
 		 * is zeroed when mapped, and writes to that region are not
 		 * written out to the file."
 		 */
-		unsigned offset = i_size & (PAGE_CACHE_SIZE - 1);
+		unsigned offset = i_size & (PAGE_SIZE - 1);
 
 		if (page->index > end_index || !offset)
 			goto confused;
-		zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+		zero_user_segment(page, offset, PAGE_SIZE);
 	}
 
 	/*

diff --git a/fs/namei.c b/fs/namei.c
index 794f81d..1d9ca2d 100644
--- a/fs/namei.c
+++ b/fs/namei.c

@@ -1740,15 +1740,17 @@
 					  nd->flags);
 		if (IS_ERR(path.dentry))
 			return PTR_ERR(path.dentry);
-		if (unlikely(d_is_negative(path.dentry))) {
-			dput(path.dentry);
-			return -ENOENT;
-		}
+
 		path.mnt = nd->path.mnt;
 		err = follow_managed(&path, nd);
 		if (unlikely(err < 0))
 			return err;
 
+		if (unlikely(d_is_negative(path.dentry))) {
+			path_to_nameidata(&path, nd);
+			return -ENOENT;
+		}
+
 		seq = 0;	/* we are already out of RCU mode */
 		inode = d_backing_inode(path.dentry);
 	}

diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index b7f8eae..bfdad00 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c

@@ -510,7 +510,7 @@
 			kunmap(ctl.page);
 			SetPageUptodate(ctl.page);
 			unlock_page(ctl.page);
-			page_cache_release(ctl.page);
+			put_page(ctl.page);
 			ctl.page = NULL;
 		}
 		ctl.idx  = 0;
@@ -520,7 +520,7 @@
 	if (ctl.page) {
 		kunmap(ctl.page);
 		unlock_page(ctl.page);
-		page_cache_release(ctl.page);
+		put_page(ctl.page);
 		ctl.page = NULL;
 	}
 	ctl.cache = cache;
@@ -554,14 +554,14 @@
 		kunmap(ctl.page);
 		SetPageUptodate(ctl.page);
 		unlock_page(ctl.page);
-		page_cache_release(ctl.page);
+		put_page(ctl.page);
 	}
 	if (page) {
 		cache->head = ctl.head;
 		kunmap(page);
 		SetPageUptodate(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 out:
 	return result;
@@ -649,7 +649,7 @@
 			kunmap(ctl.page);
 			SetPageUptodate(ctl.page);
 			unlock_page(ctl.page);
-			page_cache_release(ctl.page);
+			put_page(ctl.page);
 		}
 		ctl.cache = NULL;
 		ctl.idx  -= NCP_DIRCACHE_SIZE;

diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 5233fbc..17cfb74 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h

@@ -191,7 +191,7 @@
 	int		eof;
 };
 
-#define NCP_DIRCACHE_SIZE	((int)(PAGE_CACHE_SIZE/sizeof(struct dentry *)))
+#define NCP_DIRCACHE_SIZE	((int)(PAGE_SIZE/sizeof(struct dentry *)))
 union ncp_dir_cache {
 	struct ncp_cache_head	head;
 	struct dentry		*dentry[NCP_DIRCACHE_SIZE];

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index ddd0138..17a42e4 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c

@@ -231,7 +231,7 @@
 	size_t bytes_left = header->args.count;
 	unsigned int pg_offset = header->args.pgbase, pg_len;
 	struct page **pages = header->args.pages;
-	int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
+	int pg_index = header->args.pgbase >> PAGE_SHIFT;
 	const bool is_dio = (header->dreq != NULL);
 	struct blk_plug plug;
 	int i;
@@ -263,13 +263,13 @@
 		}
 
 		if (is_dio) {
-			if (pg_offset + bytes_left > PAGE_CACHE_SIZE)
-				pg_len = PAGE_CACHE_SIZE - pg_offset;
+			if (pg_offset + bytes_left > PAGE_SIZE)
+				pg_len = PAGE_SIZE - pg_offset;
 			else
 				pg_len = bytes_left;
 		} else {
 			BUG_ON(pg_offset != 0);
-			pg_len = PAGE_CACHE_SIZE;
+			pg_len = PAGE_SIZE;
 		}
 
 		if (is_hole(&be)) {
@@ -339,9 +339,9 @@
 
 	if (likely(!hdr->pnfs_error)) {
 		struct pnfs_block_layout *bl = BLK_LSEG2EXT(hdr->lseg);
-		u64 start = hdr->args.offset & (loff_t)PAGE_CACHE_MASK;
+		u64 start = hdr->args.offset & (loff_t)PAGE_MASK;
 		u64 end = (hdr->args.offset + hdr->args.count +
-			PAGE_CACHE_SIZE - 1) & (loff_t)PAGE_CACHE_MASK;
+			PAGE_SIZE - 1) & (loff_t)PAGE_MASK;
 
 		ext_tree_mark_written(bl, start >> SECTOR_SHIFT,
 					(end - start) >> SECTOR_SHIFT);
@@ -373,7 +373,7 @@
 	loff_t offset = header->args.offset;
 	size_t count = header->args.count;
 	struct page **pages = header->args.pages;
-	int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
+	int pg_index = header->args.pgbase >> PAGE_SHIFT;
 	unsigned int pg_len;
 	struct blk_plug plug;
 	int i;
@@ -392,7 +392,7 @@
 	blk_start_plug(&plug);
 
 	/* we always write out the whole page */
-	offset = offset & (loff_t)PAGE_CACHE_MASK;
+	offset = offset & (loff_t)PAGE_MASK;
 	isect = offset >> SECTOR_SHIFT;
 
 	for (i = pg_index; i < header->page_array.npages; i++) {
@@ -408,7 +408,7 @@
 			extent_length = be.be_length - (isect - be.be_f_offset);
 		}
 
-		pg_len = PAGE_CACHE_SIZE;
+		pg_len = PAGE_SIZE;
 		bio = do_add_page_to_bio(bio, header->page_array.npages - i,
 					 WRITE, isect, pages[i], &map, &be,
 					 bl_end_io_write, par,
@@ -446,8 +446,8 @@
 	kfree(bl);
 }
 
-static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
-						   gfp_t gfp_flags)
+static struct pnfs_layout_hdr *__bl_alloc_layout_hdr(struct inode *inode,
+		gfp_t gfp_flags, bool is_scsi_layout)
 {
 	struct pnfs_block_layout *bl;
 
@@ -460,9 +460,22 @@
 	bl->bl_ext_ro = RB_ROOT;
 	spin_lock_init(&bl->bl_ext_lock);
 
+	bl->bl_scsi_layout = is_scsi_layout;
 	return &bl->bl_layout;
 }
 
+static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
+						   gfp_t gfp_flags)
+{
+	return __bl_alloc_layout_hdr(inode, gfp_flags, false);
+}
+
+static struct pnfs_layout_hdr *sl_alloc_layout_hdr(struct inode *inode,
+						   gfp_t gfp_flags)
+{
+	return __bl_alloc_layout_hdr(inode, gfp_flags, true);
+}
+
 static void bl_free_lseg(struct pnfs_layout_segment *lseg)
 {
 	dprintk("%s enter\n", __func__);
@@ -743,7 +756,7 @@
 
 static bool
 is_aligned_req(struct nfs_pageio_descriptor *pgio,
-		struct nfs_page *req, unsigned int alignment)
+		struct nfs_page *req, unsigned int alignment, bool is_write)
 {
 	/*
 	 * Always accept buffered writes, higher layers take care of the
@@ -758,7 +771,8 @@
 	if (IS_ALIGNED(req->wb_bytes, alignment))
 		return true;
 
-	if (req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode)) {
+	if (is_write &&
+	    (req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode))) {
 		/*
 		 * If the write goes up to the inode size, just write
 		 * the full page.  Data past the inode size is
@@ -775,7 +789,7 @@
 static void
 bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
 {
-	if (!is_aligned_req(pgio, req, SECTOR_SIZE)) {
+	if (!is_aligned_req(pgio, req, SECTOR_SIZE, false)) {
 		nfs_pageio_reset_read_mds(pgio);
 		return;
 	}
@@ -791,7 +805,7 @@
 bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		struct nfs_page *req)
 {
-	if (!is_aligned_req(pgio, req, SECTOR_SIZE))
+	if (!is_aligned_req(pgio, req, SECTOR_SIZE, false))
 		return 0;
 	return pnfs_generic_pg_test(pgio, prev, req);
 }
@@ -806,7 +820,7 @@
 	pgoff_t end;
 
 	/* Optimize common case that writes from 0 to end of file */
-	end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE);
+	end = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
 	if (end != inode->i_mapping->nrpages) {
 		rcu_read_lock();
 		end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX);
@@ -814,9 +828,9 @@
 	}
 
 	if (!end)
-		return i_size_read(inode) - (idx << PAGE_CACHE_SHIFT);
+		return i_size_read(inode) - (idx << PAGE_SHIFT);
 	else
-		return (end - idx) << PAGE_CACHE_SHIFT;
+		return (end - idx) << PAGE_SHIFT;
 }
 
 static void
@@ -824,7 +838,7 @@
 {
 	u64 wb_size;
 
-	if (!is_aligned_req(pgio, req, PAGE_SIZE)) {
+	if (!is_aligned_req(pgio, req, PAGE_SIZE, true)) {
 		nfs_pageio_reset_write_mds(pgio);
 		return;
 	}
@@ -846,7 +860,7 @@
 bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		 struct nfs_page *req)
 {
-	if (!is_aligned_req(pgio, req, PAGE_SIZE))
+	if (!is_aligned_req(pgio, req, PAGE_SIZE, true))
 		return 0;
 	return pnfs_generic_pg_test(pgio, prev, req);
 }
@@ -888,22 +902,53 @@
 	.sync				= pnfs_generic_sync,
 };
 
+static struct pnfs_layoutdriver_type scsilayout_type = {
+	.id				= LAYOUT_SCSI,
+	.name				= "LAYOUT_SCSI",
+	.owner				= THIS_MODULE,
+	.flags				= PNFS_LAYOUTRET_ON_SETATTR |
+					  PNFS_READ_WHOLE_PAGE,
+	.read_pagelist			= bl_read_pagelist,
+	.write_pagelist			= bl_write_pagelist,
+	.alloc_layout_hdr		= sl_alloc_layout_hdr,
+	.free_layout_hdr		= bl_free_layout_hdr,
+	.alloc_lseg			= bl_alloc_lseg,
+	.free_lseg			= bl_free_lseg,
+	.return_range			= bl_return_range,
+	.prepare_layoutcommit		= bl_prepare_layoutcommit,
+	.cleanup_layoutcommit		= bl_cleanup_layoutcommit,
+	.set_layoutdriver		= bl_set_layoutdriver,
+	.alloc_deviceid_node		= bl_alloc_deviceid_node,
+	.free_deviceid_node		= bl_free_deviceid_node,
+	.pg_read_ops			= &bl_pg_read_ops,
+	.pg_write_ops			= &bl_pg_write_ops,
+	.sync				= pnfs_generic_sync,
+};
+
+
 static int __init nfs4blocklayout_init(void)
 {
 	int ret;
 
 	dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
 
-	ret = pnfs_register_layoutdriver(&blocklayout_type);
-	if (ret)
-		goto out;
 	ret = bl_init_pipefs();
 	if (ret)
-		goto out_unregister;
+		goto out;
+
+	ret = pnfs_register_layoutdriver(&blocklayout_type);
+	if (ret)
+		goto out_cleanup_pipe;
+
+	ret = pnfs_register_layoutdriver(&scsilayout_type);
+	if (ret)
+		goto out_unregister_block;
 	return 0;
 
-out_unregister:
+out_unregister_block:
 	pnfs_unregister_layoutdriver(&blocklayout_type);
+out_cleanup_pipe:
+	bl_cleanup_pipefs();
 out:
 	return ret;
 }
@@ -913,8 +958,9 @@
 	dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
 	       __func__);
 
-	bl_cleanup_pipefs();
+	pnfs_unregister_layoutdriver(&scsilayout_type);
 	pnfs_unregister_layoutdriver(&blocklayout_type);
+	bl_cleanup_pipefs();
 }
 
 MODULE_ALIAS("nfs-layouttype4-3");

diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index c556640..18e6fd0 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h

@@ -40,8 +40,8 @@
 #include "../pnfs.h"
 #include "../netns.h"
 
-#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT)
-#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
+#define PAGE_CACHE_SECTORS (PAGE_SIZE >> SECTOR_SHIFT)
+#define PAGE_CACHE_SECTOR_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
 #define SECTOR_SIZE (1 << SECTOR_SHIFT)
 
 struct pnfs_block_dev;
@@ -55,7 +55,6 @@
  */
 #define PNFS_BLOCK_UUID_LEN	128
 
-
 struct pnfs_block_volume {
 	enum pnfs_block_volume_type	type;
 	union {
@@ -82,6 +81,13 @@
 			u32		volumes_count;
 			u32		volumes[PNFS_BLOCK_MAX_DEVICES];
 		} stripe;
+		struct {
+			enum scsi_code_set		code_set;
+			enum scsi_designator_type	designator_type;
+			int				designator_len;
+			u8				designator[256];
+			u64				pr_key;
+		} scsi;
 	};
 };
 
@@ -106,6 +112,9 @@
 	struct block_device		*bdev;
 	u64				disk_offset;
 
+	u64				pr_key;
+	bool				pr_registered;
+
 	bool (*map)(struct pnfs_block_dev *dev, u64 offset,
 			struct pnfs_block_dev_map *map);
 };
@@ -131,6 +140,7 @@
 	struct rb_root		bl_ext_rw;
 	struct rb_root		bl_ext_ro;
 	spinlock_t		bl_ext_lock;   /* Protects list manipulation */
+	bool			bl_scsi_layout;
 };
 
 static inline struct pnfs_block_layout *
@@ -182,6 +192,6 @@
 dev_t bl_resolve_deviceid(struct nfs_server *server,
 		struct pnfs_block_volume *b, gfp_t gfp_mask);
 int __init bl_init_pipefs(void);
-void __exit bl_cleanup_pipefs(void);
+void bl_cleanup_pipefs(void);
 
 #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */

diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index a861bbd..e5b8967 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c

@@ -1,11 +1,12 @@
 /*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
  */
 #include <linux/sunrpc/svc.h>
 #include <linux/blkdev.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_xdr.h>
+#include <linux/pr.h>
 
 #include "blocklayout.h"
 
@@ -21,6 +22,17 @@
 			bl_free_device(&dev->children[i]);
 		kfree(dev->children);
 	} else {
+		if (dev->pr_registered) {
+			const struct pr_ops *ops =
+				dev->bdev->bd_disk->fops->pr_ops;
+			int error;
+
+			error = ops->pr_register(dev->bdev, dev->pr_key, 0,
+				false);
+			if (error)
+				pr_err("failed to unregister PR key.\n");
+		}
+
 		if (dev->bdev)
 			blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
 	}
@@ -113,6 +125,24 @@
 		for (i = 0; i < b->stripe.volumes_count; i++)
 			b->stripe.volumes[i] = be32_to_cpup(p++);
 		break;
+	case PNFS_BLOCK_VOLUME_SCSI:
+		p = xdr_inline_decode(xdr, 4 + 4 + 4);
+		if (!p)
+			return -EIO;
+		b->scsi.code_set = be32_to_cpup(p++);
+		b->scsi.designator_type = be32_to_cpup(p++);
+		b->scsi.designator_len = be32_to_cpup(p++);
+		p = xdr_inline_decode(xdr, b->scsi.designator_len);
+		if (!p)
+			return -EIO;
+		if (b->scsi.designator_len > 256)
+			return -EIO;
+		memcpy(&b->scsi.designator, p, b->scsi.designator_len);
+		p = xdr_inline_decode(xdr, 8);
+		if (!p)
+			return -EIO;
+		p = xdr_decode_hyper(p, &b->scsi.pr_key);
+		break;
 	default:
 		dprintk("unknown volume type!\n");
 		return -EIO;
@@ -216,6 +246,116 @@
 	return 0;
 }
 
+static bool
+bl_validate_designator(struct pnfs_block_volume *v)
+{
+	switch (v->scsi.designator_type) {
+	case PS_DESIGNATOR_EUI64:
+		if (v->scsi.code_set != PS_CODE_SET_BINARY)
+			return false;
+
+		if (v->scsi.designator_len != 8 &&
+		    v->scsi.designator_len != 10 &&
+		    v->scsi.designator_len != 16)
+			return false;
+
+		return true;
+	case PS_DESIGNATOR_NAA:
+		if (v->scsi.code_set != PS_CODE_SET_BINARY)
+			return false;
+
+		if (v->scsi.designator_len != 8 &&
+		    v->scsi.designator_len != 16)
+			return false;
+
+		return true;
+	case PS_DESIGNATOR_T10:
+	case PS_DESIGNATOR_NAME:
+		pr_err("pNFS: unsupported designator "
+			"(code set %d, type %d, len %d.\n",
+			v->scsi.code_set,
+			v->scsi.designator_type,
+			v->scsi.designator_len);
+		return false;
+	default:
+		pr_err("pNFS: invalid designator "
+			"(code set %d, type %d, len %d.\n",
+			v->scsi.code_set,
+			v->scsi.designator_type,
+			v->scsi.designator_len);
+		return false;
+	}
+}
+
+static int
+bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
+		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
+{
+	struct pnfs_block_volume *v = &volumes[idx];
+	const struct pr_ops *ops;
+	const char *devname;
+	int error;
+
+	if (!bl_validate_designator(v))
+		return -EINVAL;
+
+	switch (v->scsi.designator_len) {
+	case 8:
+		devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%8phN",
+				v->scsi.designator);
+		break;
+	case 12:
+		devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%12phN",
+				v->scsi.designator);
+		break;
+	case 16:
+		devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%16phN",
+				v->scsi.designator);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	d->bdev = blkdev_get_by_path(devname, FMODE_READ, NULL);
+	if (IS_ERR(d->bdev)) {
+		pr_warn("pNFS: failed to open device %s (%ld)\n",
+			devname, PTR_ERR(d->bdev));
+		kfree(devname);
+		return PTR_ERR(d->bdev);
+	}
+
+	kfree(devname);
+
+	d->len = i_size_read(d->bdev->bd_inode);
+	d->map = bl_map_simple;
+	d->pr_key = v->scsi.pr_key;
+
+	pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
+		d->bdev->bd_disk->disk_name, d->pr_key);
+
+	ops = d->bdev->bd_disk->fops->pr_ops;
+	if (!ops) {
+		pr_err("pNFS: block device %s does not support reservations.",
+				d->bdev->bd_disk->disk_name);
+		error = -EINVAL;
+		goto out_blkdev_put;
+	}
+
+	error = ops->pr_register(d->bdev, 0, d->pr_key, true);
+	if (error) {
+		pr_err("pNFS: failed to register key for block device %s.",
+				d->bdev->bd_disk->disk_name);
+		goto out_blkdev_put;
+	}
+
+	d->pr_registered = true;
+	return 0;
+
+out_blkdev_put:
+	blkdev_put(d->bdev, FMODE_READ);
+	return error;
+}
+
 static int
 bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
 		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
@@ -303,6 +443,8 @@
 		return bl_parse_concat(server, d, volumes, idx, gfp_mask);
 	case PNFS_BLOCK_VOLUME_STRIPE:
 		return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
+	case PNFS_BLOCK_VOLUME_SCSI:
+		return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
 	default:
 		dprintk("unsupported volume type: %d\n", volumes[idx].type);
 		return -EIO;

diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index 35ab51c..720b3ff 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
  */
 
 #include <linux/vmalloc.h>
@@ -462,10 +462,12 @@
 	return err;
 }
 
-static size_t ext_tree_layoutupdate_size(size_t count)
+static size_t ext_tree_layoutupdate_size(struct pnfs_block_layout *bl, size_t count)
 {
-	return sizeof(__be32) /* number of entries */ +
-		PNFS_BLOCK_EXTENT_SIZE * count;
+	if (bl->bl_scsi_layout)
+		return sizeof(__be32) + PNFS_SCSI_RANGE_SIZE * count;
+	else
+		return sizeof(__be32) + PNFS_BLOCK_EXTENT_SIZE * count;
 }
 
 static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
@@ -483,6 +485,23 @@
 	}
 }
 
+static __be32 *encode_block_extent(struct pnfs_block_extent *be, __be32 *p)
+{
+	p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data,
+			NFS4_DEVICEID4_SIZE);
+	p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
+	p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
+	p = xdr_encode_hyper(p, 0LL);
+	*p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
+	return p;
+}
+
+static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p)
+{
+	p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
+	return xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
+}
+
 static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
 		size_t buffer_size, size_t *count)
 {
@@ -496,19 +515,16 @@
 			continue;
 
 		(*count)++;
-		if (ext_tree_layoutupdate_size(*count) > buffer_size) {
+		if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) {
 			/* keep counting.. */
 			ret = -ENOSPC;
 			continue;
 		}
 
-		p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data,
-				NFS4_DEVICEID4_SIZE);
-		p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
-		p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
-		p = xdr_encode_hyper(p, 0LL);
-		*p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
-
+		if (bl->bl_scsi_layout)
+			p = encode_scsi_range(be, p);
+		else
+			p = encode_block_extent(be, p);
 		be->be_tag = EXTENT_COMMITTING;
 	}
 	spin_unlock(&bl->bl_ext_lock);
@@ -537,7 +553,7 @@
 	if (unlikely(ret)) {
 		ext_tree_free_commitdata(arg, buffer_size);
 
-		buffer_size = ext_tree_layoutupdate_size(count);
+		buffer_size = ext_tree_layoutupdate_size(bl, count);
 		count = 0;
 
 		arg->layoutupdate_pages =
@@ -556,7 +572,7 @@
 	}
 
 	*start_p = cpu_to_be32(count);
-	arg->layoutupdate_len = ext_tree_layoutupdate_size(count);
+	arg->layoutupdate_len = ext_tree_layoutupdate_size(bl, count);
 
 	if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
 		void *p = start_p, *end = p + arg->layoutupdate_len;

diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c
index dbe5839..9fb067a6 100644
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c

@@ -281,7 +281,7 @@
 	return ret;
 }
 
-void __exit bl_cleanup_pipefs(void)
+void bl_cleanup_pipefs(void)
 {
 	rpc_pipefs_notifier_unregister(&nfs4blocklayout_block);
 	unregister_pernet_subsys(&nfs4blocklayout_net_ops);

diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index ff8195b..5fe1cec 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h

@@ -37,10 +37,11 @@
 	OP_CB_ILLEGAL = 10044,
 };
 
+struct nfs4_slot;
 struct cb_process_state {
 	__be32			drc_status;
 	struct nfs_client	*clp;
-	u32			slotid;
+	struct nfs4_slot	*slot;
 	u32			minorversion;
 	struct net		*net;
 };

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index f0939d0..618ced3 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c

@@ -354,47 +354,38 @@
  * a single outstanding callback request at a time.
  */
 static __be32
-validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
+validate_seqid(const struct nfs4_slot_table *tbl, const struct nfs4_slot *slot,
+		const struct cb_sequenceargs * args)
 {
-	struct nfs4_slot *slot;
+	dprintk("%s enter. slotid %u seqid %u, slot table seqid: %u\n",
+		__func__, args->csa_slotid, args->csa_sequenceid, slot->seq_nr);
 
-	dprintk("%s enter. slotid %u seqid %u\n",
-		__func__, args->csa_slotid, args->csa_sequenceid);
-
-	if (args->csa_slotid >= NFS41_BC_MAX_CALLBACKS)
+	if (args->csa_slotid > tbl->server_highest_slotid)
 		return htonl(NFS4ERR_BADSLOT);
 
-	slot = tbl->slots + args->csa_slotid;
-	dprintk("%s slot table seqid: %u\n", __func__, slot->seq_nr);
-
-	/* Normal */
-	if (likely(args->csa_sequenceid == slot->seq_nr + 1))
-		goto out_ok;
-
 	/* Replay */
 	if (args->csa_sequenceid == slot->seq_nr) {
 		dprintk("%s seqid %u is a replay\n",
 			__func__, args->csa_sequenceid);
+		if (nfs4_test_locked_slot(tbl, slot->slot_nr))
+			return htonl(NFS4ERR_DELAY);
 		/* Signal process_op to set this error on next op */
 		if (args->csa_cachethis == 0)
 			return htonl(NFS4ERR_RETRY_UNCACHED_REP);
 
-		/* The ca_maxresponsesize_cached is 0 with no DRC */
-		else if (args->csa_cachethis == 1)
-			return htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE);
+		/* Liar! We never allowed you to set csa_cachethis != 0 */
+		return htonl(NFS4ERR_SEQ_FALSE_RETRY);
 	}
 
 	/* Wraparound */
-	if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) {
-		slot->seq_nr = 1;
-		goto out_ok;
-	}
+	if (unlikely(slot->seq_nr == 0xFFFFFFFFU)) {
+		if (args->csa_sequenceid == 1)
+			return htonl(NFS4_OK);
+	} else if (likely(args->csa_sequenceid == slot->seq_nr + 1))
+		return htonl(NFS4_OK);
 
 	/* Misordered request */
 	return htonl(NFS4ERR_SEQ_MISORDERED);
-out_ok:
-	tbl->highest_used_slotid = args->csa_slotid;
-	return htonl(NFS4_OK);
 }
 
 /*
@@ -473,6 +464,12 @@
 	tbl = &clp->cl_session->bc_slot_table;
 	slot = tbl->slots + args->csa_slotid;
 
+	/* Set up res before grabbing the spinlock */
+	memcpy(&res->csr_sessionid, &args->csa_sessionid,
+	       sizeof(res->csr_sessionid));
+	res->csr_sequenceid = args->csa_sequenceid;
+	res->csr_slotid = args->csa_slotid;
+
 	spin_lock(&tbl->slot_tbl_lock);
 	/* state manager is resetting the session */
 	if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
@@ -485,18 +482,26 @@
 		goto out_unlock;
 	}
 
-	memcpy(&res->csr_sessionid, &args->csa_sessionid,
-	       sizeof(res->csr_sessionid));
-	res->csr_sequenceid = args->csa_sequenceid;
-	res->csr_slotid = args->csa_slotid;
-	res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
-	res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
-
-	status = validate_seqid(tbl, args);
-	if (status)
+	status = htonl(NFS4ERR_BADSLOT);
+	slot = nfs4_lookup_slot(tbl, args->csa_slotid);
+	if (IS_ERR(slot))
 		goto out_unlock;
 
-	cps->slotid = args->csa_slotid;
+	res->csr_highestslotid = tbl->server_highest_slotid;
+	res->csr_target_highestslotid = tbl->target_highest_slotid;
+
+	status = validate_seqid(tbl, slot, args);
+	if (status)
+		goto out_unlock;
+	if (!nfs4_try_to_lock_slot(tbl, slot)) {
+		status = htonl(NFS4ERR_DELAY);
+		goto out_unlock;
+	}
+	cps->slot = slot;
+
+	/* The ca_maxresponsesize_cached is 0 with no DRC */
+	if (args->csa_cachethis != 0)
+		return htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE);
 
 	/*
 	 * Check for pending referring calls.  If a match is found, a
@@ -513,7 +518,7 @@
 	 * If CB_SEQUENCE returns an error, then the state of the slot
 	 * (sequence ID, cached reply) MUST NOT change.
 	 */
-	slot->seq_nr++;
+	slot->seq_nr = args->csa_sequenceid;
 out_unlock:
 	spin_unlock(&tbl->slot_tbl_lock);
 

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 646cdac..976c906 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c

@@ -752,7 +752,8 @@
 	return htonl(NFS_OK);
 }
 
-static void nfs4_callback_free_slot(struct nfs4_session *session)
+static void nfs4_callback_free_slot(struct nfs4_session *session,
+		struct nfs4_slot *slot)
 {
 	struct nfs4_slot_table *tbl = &session->bc_slot_table;
 
@@ -761,15 +762,17 @@
 	 * Let the state manager know callback processing done.
 	 * A single slot, so highest used slotid is either 0 or -1
 	 */
-	tbl->highest_used_slotid = NFS4_NO_SLOT;
+	nfs4_free_slot(tbl, slot);
 	nfs4_slot_tbl_drain_complete(tbl);
 	spin_unlock(&tbl->slot_tbl_lock);
 }
 
 static void nfs4_cb_free_slot(struct cb_process_state *cps)
 {
-	if (cps->slotid != NFS4_NO_SLOT)
-		nfs4_callback_free_slot(cps->clp->cl_session);
+	if (cps->slot) {
+		nfs4_callback_free_slot(cps->clp->cl_session, cps->slot);
+		cps->slot = NULL;
+	}
 }
 
 #else /* CONFIG_NFS_V4_1 */
@@ -893,7 +896,6 @@
 	struct cb_process_state cps = {
 		.drc_status = 0,
 		.clp = NULL,
-		.slotid = NFS4_NO_SLOT,
 		.net = SVC_NET(rqstp),
 	};
 	unsigned int nops = 0;

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index d6d5d2a..0c96528 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c

@@ -736,7 +736,7 @@
 		server->rsize = max_rpc_payload;
 	if (server->rsize > NFS_MAX_FILE_IO_SIZE)
 		server->rsize = NFS_MAX_FILE_IO_SIZE;
-	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	server->rpages = (server->rsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
 	server->backing_dev_info.name = "nfs";
 	server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
@@ -745,13 +745,13 @@
 		server->wsize = max_rpc_payload;
 	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
 		server->wsize = NFS_MAX_FILE_IO_SIZE;
-	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	server->wpages = (server->wsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
 	server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
 
 	server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
-	if (server->dtsize > PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES)
-		server->dtsize = PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES;
+	if (server->dtsize > PAGE_SIZE * NFS_MAX_READDIR_PAGES)
+		server->dtsize = PAGE_SIZE * NFS_MAX_READDIR_PAGES;
 	if (server->dtsize > server->rsize)
 		server->dtsize = server->rsize;
 

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a89d32a..33eb817 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c

@@ -707,7 +707,7 @@
 {
 	if (!desc->page->mapping)
 		nfs_readdir_clear_array(desc->page);
-	page_cache_release(desc->page);
+	put_page(desc->page);
 	desc->page = NULL;
 }
 
@@ -1923,7 +1923,7 @@
 		 * add_to_page_cache_lru() grabs an extra page refcount.
 		 * Drop it here to avoid leaking this page later.
 		 */
-		page_cache_release(page);
+		put_page(page);
 	} else
 		__free_page(page);
 

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 7a0cfd3..c93826e 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c

@@ -269,7 +269,7 @@
 {
 	unsigned int i;
 	for (i = 0; i < npages; i++)
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 }
 
 void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
@@ -1003,7 +1003,7 @@
 		      iov_iter_count(iter));
 
 	pos = iocb->ki_pos;
-	end = (pos + iov_iter_count(iter) - 1) >> PAGE_CACHE_SHIFT;
+	end = (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT;
 
 	inode_lock(inode);
 
@@ -1013,7 +1013,7 @@
 
 	if (mapping->nrpages) {
 		result = invalidate_inode_pages2_range(mapping,
-					pos >> PAGE_CACHE_SHIFT, end);
+					pos >> PAGE_SHIFT, end);
 		if (result)
 			goto out_unlock;
 	}
@@ -1042,7 +1042,7 @@
 
 	if (mapping->nrpages) {
 		invalidate_inode_pages2_range(mapping,
-					      pos >> PAGE_CACHE_SHIFT, end);
+					      pos >> PAGE_SHIFT, end);
 	}
 
 	inode_unlock(inode);

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 748bb81..be01095 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c

@@ -233,7 +233,7 @@
  * nfs_file_write() that a write error occurred, and hence cause it to
  * fall back to doing a synchronous write.
  */
-int
+static int
 nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	struct nfs_open_context *ctx = nfs_file_open_context(file);
@@ -263,9 +263,8 @@
 out:
 	return ret;
 }
-EXPORT_SYMBOL_GPL(nfs_file_fsync_commit);
 
-static int
+int
 nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	int ret;
@@ -273,13 +272,15 @@
 
 	trace_nfs_fsync_enter(inode);
 
-	nfs_inode_dio_wait(inode);
+	inode_dio_wait(inode);
 	do {
 		ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
 		if (ret != 0)
 			break;
 		inode_lock(inode);
 		ret = nfs_file_fsync_commit(file, start, end, datasync);
+		if (!ret)
+			ret = pnfs_sync_inode(inode, !!datasync);
 		inode_unlock(inode);
 		/*
 		 * If nfs_file_fsync_commit detected a server reboot, then
@@ -293,6 +294,7 @@
 	trace_nfs_fsync_exit(inode, ret);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(nfs_file_fsync);
 
 /*
  * Decide whether a read/modify/write cycle may be more efficient
@@ -318,7 +320,7 @@
 			loff_t pos, unsigned len)
 {
 	unsigned int pglen = nfs_page_length(page);
-	unsigned int offset = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned int offset = pos & (PAGE_SIZE - 1);
 	unsigned int end = offset + len;
 
 	if (pnfs_ld_read_whole_page(file->f_mapping->host)) {
@@ -349,7 +351,7 @@
 			struct page **pagep, void **fsdata)
 {
 	int ret;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 	struct page *page;
 	int once_thru = 0;
 
@@ -368,7 +370,7 @@
 	/*
 	 * Wait for O_DIRECT to complete
 	 */
-	nfs_inode_dio_wait(mapping->host);
+	inode_dio_wait(mapping->host);
 
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
@@ -378,12 +380,12 @@
 	ret = nfs_flush_incompatible(file, page);
 	if (ret) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	} else if (!once_thru &&
 		   nfs_want_read_modify_write(file, page, pos, len)) {
 		once_thru = 1;
 		ret = nfs_readpage(file, page);
-		page_cache_release(page);
+		put_page(page);
 		if (!ret)
 			goto start;
 	}
@@ -394,7 +396,7 @@
 			loff_t pos, unsigned len, unsigned copied,
 			struct page *page, void *fsdata)
 {
-	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned offset = pos & (PAGE_SIZE - 1);
 	struct nfs_open_context *ctx = nfs_file_open_context(file);
 	int status;
 
@@ -411,20 +413,20 @@
 
 		if (pglen == 0) {
 			zero_user_segments(page, 0, offset,
-					end, PAGE_CACHE_SIZE);
+					end, PAGE_SIZE);
 			SetPageUptodate(page);
 		} else if (end >= pglen) {
-			zero_user_segment(page, end, PAGE_CACHE_SIZE);
+			zero_user_segment(page, end, PAGE_SIZE);
 			if (offset == 0)
 				SetPageUptodate(page);
 		} else
-			zero_user_segment(page, pglen, PAGE_CACHE_SIZE);
+			zero_user_segment(page, pglen, PAGE_SIZE);
 	}
 
 	status = nfs_updatepage(file, page, offset, copied);
 
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	if (status < 0)
 		return status;
@@ -452,7 +454,7 @@
 	dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %u, %u)\n",
 		 page, offset, length);
 
-	if (offset != 0 || length < PAGE_CACHE_SIZE)
+	if (offset != 0 || length < PAGE_SIZE)
 		return;
 	/* Cancel any unstarted writes on this page */
 	nfs_wb_page_cancel(page_file_mapping(page)->host, page);

diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index eb37046..add0e5a 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c

@@ -418,6 +418,8 @@
 				pnfs_error_mark_layout_for_return(ino, lseg);
 		} else
 			pnfs_error_mark_layout_for_return(ino, lseg);
+		ds = NULL;
+		goto out;
 	}
 out_update_creds:
 	if (ff_layout_update_mirror_cred(mirror, ds))

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 847b678..738c84a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c

@@ -141,7 +141,7 @@
 
 int nfs_sync_inode(struct inode *inode)
 {
-	nfs_inode_dio_wait(inode);
+	inode_dio_wait(inode);
 	return nfs_wb_all(inode);
 }
 EXPORT_SYMBOL_GPL(nfs_sync_inode);

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 9a547aa..f1d1d2c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h

@@ -358,7 +358,7 @@
 int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
 
 /* file.c */
-int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int);
+int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync);
 loff_t nfs_file_llseek(struct file *, loff_t, int);
 ssize_t nfs_file_read(struct kiocb *, struct iov_iter *);
 ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *,
@@ -515,10 +515,6 @@
 /* direct.c */
 void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
 			      struct nfs_direct_req *dreq);
-static inline void nfs_inode_dio_wait(struct inode *inode)
-{
-	inode_dio_wait(inode);
-}
 extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
 
 /* nfs4proc.c */
@@ -642,11 +638,11 @@
 
 	if (i_size > 0) {
 		pgoff_t page_index = page_file_index(page);
-		pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
+		pgoff_t end_index = (i_size - 1) >> PAGE_SHIFT;
 		if (page_index < end_index)
-			return PAGE_CACHE_SIZE;
+			return PAGE_SIZE;
 		if (page_index == end_index)
-			return ((i_size - 1) & ~PAGE_CACHE_MASK) + 1;
+			return ((i_size - 1) & ~PAGE_MASK) + 1;
 	}
 	return 0;
 }

diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 2a9ff14..d039051 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c

@@ -128,37 +128,6 @@
 	return vfs_fsync(file, 0);
 }
 
-static int
-nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
-{
-	int ret;
-	struct inode *inode = file_inode(file);
-
-	trace_nfs_fsync_enter(inode);
-
-	nfs_inode_dio_wait(inode);
-	do {
-		ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
-		if (ret != 0)
-			break;
-		inode_lock(inode);
-		ret = nfs_file_fsync_commit(file, start, end, datasync);
-		if (!ret)
-			ret = pnfs_sync_inode(inode, !!datasync);
-		inode_unlock(inode);
-		/*
-		 * If nfs_file_fsync_commit detected a server reboot, then
-		 * resend all dirty pages that might have been covered by
-		 * the NFS_CONTEXT_RESEND_WRITES flag
-		 */
-		start = 0;
-		end = LLONG_MAX;
-	} while (ret == -EAGAIN);
-
-	trace_nfs_fsync_exit(inode, ret);
-	return ret;
-}
-
 #ifdef CONFIG_NFS_V4_2
 static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
 {
@@ -266,7 +235,7 @@
 	.open		= nfs4_file_open,
 	.flush		= nfs4_file_flush,
 	.release	= nfs_file_release,
-	.fsync		= nfs4_file_fsync,
+	.fsync		= nfs_file_fsync,
 	.lock		= nfs_lock,
 	.flock		= nfs_flock,
 	.splice_read	= nfs_file_splice_read,

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 400a70b..327b8c3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c

@@ -6783,13 +6783,26 @@
 	return false;
 }
 
+static void
+nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata)
+{
+}
+
+static const struct rpc_call_ops nfs4_bind_one_conn_to_session_ops = {
+	.rpc_call_done =  &nfs4_bind_one_conn_to_session_done,
+};
+
 /*
- * nfs4_proc_bind_conn_to_session()
+ * nfs4_proc_bind_one_conn_to_session()
  *
  * The 4.1 client currently uses the same TCP connection for the
  * fore and backchannel.
  */
-int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred)
+static
+int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt,
+		struct rpc_xprt *xprt,
+		struct nfs_client *clp,
+		struct rpc_cred *cred)
 {
 	int status;
 	struct nfs41_bind_conn_to_session_args args = {
@@ -6804,6 +6817,14 @@
 		.rpc_resp = &res,
 		.rpc_cred = cred,
 	};
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = clnt,
+		.rpc_xprt = xprt,
+		.callback_ops = &nfs4_bind_one_conn_to_session_ops,
+		.rpc_message = &msg,
+		.flags = RPC_TASK_TIMEOUT,
+	};
+	struct rpc_task *task;
 
 	dprintk("--> %s\n", __func__);
 
@@ -6811,7 +6832,16 @@
 	if (!(clp->cl_session->flags & SESSION4_BACK_CHAN))
 		args.dir = NFS4_CDFC4_FORE;
 
-	status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+	/* Do not set the backchannel flag unless this is clnt->cl_xprt */
+	if (xprt != rcu_access_pointer(clnt->cl_xprt))
+		args.dir = NFS4_CDFC4_FORE;
+
+	task = rpc_run_task(&task_setup_data);
+	if (!IS_ERR(task)) {
+		status = task->tk_status;
+		rpc_put_task(task);
+	} else
+		status = PTR_ERR(task);
 	trace_nfs4_bind_conn_to_session(clp, status);
 	if (status == 0) {
 		if (memcmp(res.sessionid.data,
@@ -6838,6 +6868,31 @@
 	return status;
 }
 
+struct rpc_bind_conn_calldata {
+	struct nfs_client *clp;
+	struct rpc_cred *cred;
+};
+
+static int
+nfs4_proc_bind_conn_to_session_callback(struct rpc_clnt *clnt,
+		struct rpc_xprt *xprt,
+		void *calldata)
+{
+	struct rpc_bind_conn_calldata *p = calldata;
+
+	return nfs4_proc_bind_one_conn_to_session(clnt, xprt, p->clp, p->cred);
+}
+
+int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred)
+{
+	struct rpc_bind_conn_calldata data = {
+		.clp = clp,
+		.cred = cred,
+	};
+	return rpc_clnt_iterate_for_each_xprt(clp->cl_rpcclient,
+			nfs4_proc_bind_conn_to_session_callback, &data);
+}
+
 /*
  * Minimum set of SP4_MACH_CRED operations from RFC 5661 in the enforce map
  * and operations we'd like to see to enable certain features in the allow map
@@ -7320,7 +7375,7 @@
 	args->bc_attrs.max_resp_sz = PAGE_SIZE;
 	args->bc_attrs.max_resp_sz_cached = 0;
 	args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS;
-	args->bc_attrs.max_reqs = 1;
+	args->bc_attrs.max_reqs = NFS41_BC_MAX_CALLBACKS;
 
 	dprintk("%s: Back Channel : max_rqst_sz=%u max_resp_sz=%u "
 		"max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n",

diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
index e23366e..332d06e 100644
--- a/fs/nfs/nfs4session.c
+++ b/fs/nfs/nfs4session.c

@@ -135,6 +135,43 @@
 	return ERR_PTR(-ENOMEM);
 }
 
+static void nfs4_lock_slot(struct nfs4_slot_table *tbl,
+		struct nfs4_slot *slot)
+{
+	u32 slotid = slot->slot_nr;
+
+	__set_bit(slotid, tbl->used_slots);
+	if (slotid > tbl->highest_used_slotid ||
+	    tbl->highest_used_slotid == NFS4_NO_SLOT)
+		tbl->highest_used_slotid = slotid;
+	slot->generation = tbl->generation;
+}
+
+/*
+ * nfs4_try_to_lock_slot - Given a slot try to allocate it
+ *
+ * Note: must be called with the slot_tbl_lock held.
+ */
+bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot)
+{
+	if (nfs4_test_locked_slot(tbl, slot->slot_nr))
+		return false;
+	nfs4_lock_slot(tbl, slot);
+	return true;
+}
+
+/*
+ * nfs4_lookup_slot - Find a slot but don't allocate it
+ *
+ * Note: must be called with the slot_tbl_lock held.
+ */
+struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid)
+{
+	if (slotid <= tbl->max_slotid)
+		return nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT);
+	return ERR_PTR(-E2BIG);
+}
+
 /*
  * nfs4_alloc_slot - efficiently look for a free slot
  *
@@ -153,18 +190,11 @@
 		__func__, tbl->used_slots[0], tbl->highest_used_slotid,
 		tbl->max_slotid + 1);
 	slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slotid + 1);
-	if (slotid > tbl->max_slotid)
-		goto out;
-	ret = nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT);
-	if (IS_ERR(ret))
-		goto out;
-	__set_bit(slotid, tbl->used_slots);
-	if (slotid > tbl->highest_used_slotid ||
-			tbl->highest_used_slotid == NFS4_NO_SLOT)
-		tbl->highest_used_slotid = slotid;
-	ret->generation = tbl->generation;
-
-out:
+	if (slotid <= tbl->max_slotid) {
+		ret = nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT);
+		if (!IS_ERR(ret))
+			nfs4_lock_slot(tbl, ret);
+	}
 	dprintk("<-- %s used_slots=%04lx highest_used=%u slotid=%u\n",
 		__func__, tbl->used_slots[0], tbl->highest_used_slotid,
 		!IS_ERR(ret) ? ret->slot_nr : NFS4_NO_SLOT);

diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index e3ea2c5..5b51298 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h

@@ -77,6 +77,8 @@
 		unsigned int max_reqs, const char *queue);
 extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl);
 extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl);
+extern struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid);
+extern bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
 extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
 extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl);
 bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl,
@@ -88,6 +90,12 @@
 	return !!test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
 }
 
+static inline bool nfs4_test_locked_slot(const struct nfs4_slot_table *tbl,
+		u32 slotid)
+{
+	return !!test_bit(slotid, tbl->used_slots);
+}
+
 #if defined(CONFIG_NFS_V4_1)
 extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl,
 		u32 target_highest_slotid);

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e44412..88474a4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c

@@ -5001,7 +5001,7 @@
 		blocksize = be32_to_cpup(p);
 		maxsize = (uint64_t)nblocks * (uint64_t)blocksize;
 	}
-	maxsize >>= PAGE_CACHE_SHIFT;
+	maxsize >>= PAGE_SHIFT;
 	*pagemod_limit = min_t(u64, maxsize, ULONG_MAX);
 	return 0;
 out_overflow:

diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 9aebffb..049c1b1 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c

@@ -486,7 +486,7 @@
 	dprintk("%s: index=0x%lx\n", __func__,
 		(page == ZERO_PAGE(0)) ? -1UL : page->index);
 	if (ZERO_PAGE(0) != page)
-		page_cache_release(page);
+		put_page(page);
 	return;
 }
 

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 8ce4f61..1f6db42 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c

@@ -342,7 +342,7 @@
 	 * update_nfs_request below if the region is not locked. */
 	req->wb_page    = page;
 	req->wb_index	= page_file_index(page);
-	page_cache_get(page);
+	get_page(page);
 	req->wb_offset  = offset;
 	req->wb_pgbase	= offset;
 	req->wb_bytes   = count;
@@ -392,7 +392,7 @@
 	struct nfs_lock_context *l_ctx = req->wb_lock_context;
 
 	if (page != NULL) {
-		page_cache_release(page);
+		put_page(page);
 		req->wb_page = NULL;
 	}
 	if (l_ctx != NULL) {
@@ -904,7 +904,7 @@
 				return false;
 		} else {
 			if (req->wb_pgbase != 0 ||
-			    prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
+			    prev->wb_pgbase + prev->wb_bytes != PAGE_SIZE)
 				return false;
 		}
 	}

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 2fa483e..89a5ef4 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c

@@ -841,7 +841,7 @@
 
 		i_size = i_size_read(ino);
 
-		lgp->args.minlength = PAGE_CACHE_SIZE;
+		lgp->args.minlength = PAGE_SIZE;
 		if (lgp->args.minlength > range->length)
 			lgp->args.minlength = range->length;
 		if (range->iomode == IOMODE_READ) {
@@ -1618,13 +1618,13 @@
 		spin_unlock(&clp->cl_lock);
 	}
 
-	pg_offset = arg.offset & ~PAGE_CACHE_MASK;
+	pg_offset = arg.offset & ~PAGE_MASK;
 	if (pg_offset) {
 		arg.offset -= pg_offset;
 		arg.length += pg_offset;
 	}
 	if (arg.length != NFS4_MAX_UINT64)
-		arg.length = PAGE_CACHE_ALIGN(arg.length);
+		arg.length = PAGE_ALIGN(arg.length);
 
 	lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
 	atomic_dec(&lo->plh_outstanding);

diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 81ac648..4aaed89 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c

@@ -606,12 +606,22 @@
 		dprintk("%s: DS %s: trying address %s\n",
 			__func__, ds->ds_remotestr, da->da_remotestr);
 
-		clp = get_v3_ds_connect(mds_srv->nfs_client,
+		if (!IS_ERR(clp)) {
+			struct xprt_create xprt_args = {
+				.ident = XPRT_TRANSPORT_TCP,
+				.net = clp->cl_net,
+				.dstaddr = (struct sockaddr *)&da->da_addr,
+				.addrlen = da->da_addrlen,
+				.servername = clp->cl_hostname,
+			};
+			/* Add this address as an alias */
+			rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
+					rpc_clnt_test_and_add_xprt, NULL);
+		} else
+			clp = get_v3_ds_connect(mds_srv->nfs_client,
 					(struct sockaddr *)&da->da_addr,
 					da->da_addrlen, IPPROTO_TCP,
 					timeo, retrans, au_flavor);
-		if (!IS_ERR(clp))
-			break;
 	}
 
 	if (IS_ERR(clp)) {

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index eb31e23..6776d7a 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c

@@ -46,7 +46,7 @@
 static
 int nfs_return_empty_page(struct page *page)
 {
-	zero_user(page, 0, PAGE_CACHE_SIZE);
+	zero_user(page, 0, PAGE_SIZE);
 	SetPageUptodate(page);
 	unlock_page(page);
 	return 0;
@@ -118,8 +118,8 @@
 		unlock_page(page);
 		return PTR_ERR(new);
 	}
-	if (len < PAGE_CACHE_SIZE)
-		zero_user_segment(page, len, PAGE_CACHE_SIZE);
+	if (len < PAGE_SIZE)
+		zero_user_segment(page, len, PAGE_SIZE);
 
 	nfs_pageio_init_read(&pgio, inode, false,
 			     &nfs_async_read_completion_ops);
@@ -295,7 +295,7 @@
 	int		error;
 
 	dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
-		page, PAGE_CACHE_SIZE, page_file_index(page));
+		page, PAGE_SIZE, page_file_index(page));
 	nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
 	nfs_add_stats(inode, NFSIOS_READPAGES, 1);
 
@@ -361,8 +361,8 @@
 	if (IS_ERR(new))
 		goto out_error;
 
-	if (len < PAGE_CACHE_SIZE)
-		zero_user_segment(page, len, PAGE_CACHE_SIZE);
+	if (len < PAGE_SIZE)
+		zero_user_segment(page, len, PAGE_SIZE);
 	if (!nfs_pageio_add_request(desc->pgio, new)) {
 		nfs_list_remove_request(new);
 		nfs_readpage_release(new);
@@ -424,8 +424,8 @@
 
 	pgm = &pgio.pg_mirrors[0];
 	NFS_I(inode)->read_io += pgm->pg_bytes_written;
-	npages = (pgm->pg_bytes_written + PAGE_CACHE_SIZE - 1) >>
-		 PAGE_CACHE_SHIFT;
+	npages = (pgm->pg_bytes_written + PAGE_SIZE - 1) >>
+		 PAGE_SHIFT;
 	nfs_add_stats(inode, NFSIOS_READPAGES, npages);
 read_complete:
 	put_nfs_open_context(desc.ctx);

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5754835..5f4fd53 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c

@@ -150,7 +150,7 @@
 
 	spin_lock(&inode->i_lock);
 	i_size = i_size_read(inode);
-	end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
+	end_index = (i_size - 1) >> PAGE_SHIFT;
 	if (i_size > 0 && page_file_index(page) < end_index)
 		goto out;
 	end = page_file_offset(page) + ((loff_t)offset+count);
@@ -1942,7 +1942,7 @@
 int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder)
 {
 	loff_t range_start = page_file_offset(page);
-	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
+	loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
 	struct writeback_control wbc = {
 		.sync_mode = WB_SYNC_ALL,
 		.nr_to_write = 0,

diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index a0b77fc..c9f583d 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig

@@ -84,12 +84,30 @@
 	  If unsure, say N.
 
 config NFSD_PNFS
-	bool "NFSv4.1 server support for Parallel NFS (pNFS)"
-	depends on NFSD_V4
+	bool
+
+config NFSD_BLOCKLAYOUT
+	bool "NFSv4.1 server support for pNFS block layouts"
+	depends on NFSD_V4 && BLOCK
+	select NFSD_PNFS
 	help
-	  This option enables support for the parallel NFS features of the
-	  minor version 1 of the NFSv4 protocol (RFC5661) in the kernel's NFS
-	  server.
+	  This option enables support for the exporting pNFS block layouts
+	  in the kernel's NFS server. The pNFS block layout enables NFS
+	  clients to directly perform I/O to block devices accesible to both
+	  the server and the clients.  See RFC 5663 for more details.
+
+	  If unsure, say N.
+
+config NFSD_SCSILAYOUT
+	bool "NFSv4.1 server support for pNFS SCSI layouts"
+	depends on NFSD_V4 && BLOCK
+	select NFSD_PNFS
+	help
+	  This option enables support for the exporting pNFS SCSI layouts
+	  in the kernel's NFS server. The pNFS SCSI layout enables NFS
+	  clients to directly perform I/O to SCSI devices accesible to both
+	  the server and the clients.  See draft-ietf-nfsv4-scsi-layout for
+	  more details.
 
 	  If unsure, say N.
 

diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 9a6028e..3ae5f3c 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile

@@ -17,4 +17,6 @@
 nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
 nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
 			   nfs4acl.o nfs4callback.o nfs4recover.o
-nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o blocklayout.o blocklayoutxdr.o
+nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o
+nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o
+nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o

diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index c29d942..e55b524 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c

@@ -1,11 +1,14 @@
 /*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
  */
 #include <linux/exportfs.h>
 #include <linux/genhd.h>
 #include <linux/slab.h>
+#include <linux/pr.h>
 
 #include <linux/nfsd/debug.h>
+#include <scsi/scsi_proto.h>
+#include <scsi/scsi_common.h>
 
 #include "blocklayoutxdr.h"
 #include "pnfs.h"
@@ -13,37 +16,6 @@
 #define NFSDDBG_FACILITY	NFSDDBG_PNFS
 
 
-static int
-nfsd4_block_get_device_info_simple(struct super_block *sb,
-		struct nfsd4_getdeviceinfo *gdp)
-{
-	struct pnfs_block_deviceaddr *dev;
-	struct pnfs_block_volume *b;
-
-	dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
-		      sizeof(struct pnfs_block_volume), GFP_KERNEL);
-	if (!dev)
-		return -ENOMEM;
-	gdp->gd_device = dev;
-
-	dev->nr_volumes = 1;
-	b = &dev->volumes[0];
-
-	b->type = PNFS_BLOCK_VOLUME_SIMPLE;
-	b->simple.sig_len = PNFS_BLOCK_UUID_LEN;
-	return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len,
-			&b->simple.offset);
-}
-
-static __be32
-nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
-		struct nfsd4_getdeviceinfo *gdp)
-{
-	if (sb->s_bdev != sb->s_bdev->bd_contains)
-		return nfserr_inval;
-	return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp));
-}
-
 static __be32
 nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
 		struct nfsd4_layoutget *args)
@@ -141,20 +113,13 @@
 }
 
 static __be32
-nfsd4_block_proc_layoutcommit(struct inode *inode,
-		struct nfsd4_layoutcommit *lcp)
+nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
+		struct iomap *iomaps, int nr_iomaps)
 {
 	loff_t new_size = lcp->lc_last_wr + 1;
 	struct iattr iattr = { .ia_valid = 0 };
-	struct iomap *iomaps;
-	int nr_iomaps;
 	int error;
 
-	nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
-			lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
-	if (nr_iomaps < 0)
-		return nfserrno(nr_iomaps);
-
 	if (lcp->lc_mtime.tv_nsec == UTIME_NOW ||
 	    timespec_compare(&lcp->lc_mtime, &inode->i_mtime) < 0)
 		lcp->lc_mtime = current_fs_time(inode->i_sb);
@@ -172,6 +137,54 @@
 	return nfserrno(error);
 }
 
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
+static int
+nfsd4_block_get_device_info_simple(struct super_block *sb,
+		struct nfsd4_getdeviceinfo *gdp)
+{
+	struct pnfs_block_deviceaddr *dev;
+	struct pnfs_block_volume *b;
+
+	dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
+		      sizeof(struct pnfs_block_volume), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+	gdp->gd_device = dev;
+
+	dev->nr_volumes = 1;
+	b = &dev->volumes[0];
+
+	b->type = PNFS_BLOCK_VOLUME_SIMPLE;
+	b->simple.sig_len = PNFS_BLOCK_UUID_LEN;
+	return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len,
+			&b->simple.offset);
+}
+
+static __be32
+nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
+		struct nfs4_client *clp,
+		struct nfsd4_getdeviceinfo *gdp)
+{
+	if (sb->s_bdev != sb->s_bdev->bd_contains)
+		return nfserr_inval;
+	return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp));
+}
+
+static __be32
+nfsd4_block_proc_layoutcommit(struct inode *inode,
+		struct nfsd4_layoutcommit *lcp)
+{
+	struct iomap *iomaps;
+	int nr_iomaps;
+
+	nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
+			lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
+	if (nr_iomaps < 0)
+		return nfserrno(nr_iomaps);
+
+	return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
+}
+
 const struct nfsd4_layout_ops bl_layout_ops = {
 	/*
 	 * Pretend that we send notification to the client.  This is a blatant
@@ -190,3 +203,206 @@
 	.encode_layoutget	= nfsd4_block_encode_layoutget,
 	.proc_layoutcommit	= nfsd4_block_proc_layoutcommit,
 };
+#endif /* CONFIG_NFSD_BLOCKLAYOUT */
+
+#ifdef CONFIG_NFSD_SCSILAYOUT
+static int nfsd4_scsi_identify_device(struct block_device *bdev,
+		struct pnfs_block_volume *b)
+{
+	struct request_queue *q = bdev->bd_disk->queue;
+	struct request *rq;
+	size_t bufflen = 252, len, id_len;
+	u8 *buf, *d, type, assoc;
+	int error;
+
+	buf = kzalloc(bufflen, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	rq = blk_get_request(q, READ, GFP_KERNEL);
+	if (IS_ERR(rq)) {
+		error = -ENOMEM;
+		goto out_free_buf;
+	}
+	blk_rq_set_block_pc(rq);
+
+	error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
+	if (error)
+		goto out_put_request;
+
+	rq->cmd[0] = INQUIRY;
+	rq->cmd[1] = 1;
+	rq->cmd[2] = 0x83;
+	rq->cmd[3] = bufflen >> 8;
+	rq->cmd[4] = bufflen & 0xff;
+	rq->cmd_len = COMMAND_SIZE(INQUIRY);
+
+	error = blk_execute_rq(rq->q, NULL, rq, 1);
+	if (error) {
+		pr_err("pNFS: INQUIRY 0x83 failed with: %x\n",
+			rq->errors);
+		goto out_put_request;
+	}
+
+	len = (buf[2] << 8) + buf[3] + 4;
+	if (len > bufflen) {
+		pr_err("pNFS: INQUIRY 0x83 response invalid (len = %zd)\n",
+			len);
+		goto out_put_request;
+	}
+
+	d = buf + 4;
+	for (d = buf + 4; d < buf + len; d += id_len + 4) {
+		id_len = d[3];
+		type = d[1] & 0xf;
+		assoc = (d[1] >> 4) & 0x3;
+
+		/*
+		 * We only care about a EUI-64 and NAA designator types
+		 * with LU association.
+		 */
+		if (assoc != 0x00)
+			continue;
+		if (type != 0x02 && type != 0x03)
+			continue;
+		if (id_len != 8 && id_len != 12 && id_len != 16)
+			continue;
+
+		b->scsi.code_set = PS_CODE_SET_BINARY;
+		b->scsi.designator_type = type == 0x02 ?
+			PS_DESIGNATOR_EUI64 : PS_DESIGNATOR_NAA;
+		b->scsi.designator_len = id_len;
+		memcpy(b->scsi.designator, d + 4, id_len);
+
+		/*
+		 * If we found a 8 or 12 byte descriptor continue on to
+		 * see if a 16 byte one is available.  If we find a
+		 * 16 byte descriptor we're done.
+		 */
+		if (id_len == 16)
+			break;
+	}
+
+out_put_request:
+	blk_put_request(rq);
+out_free_buf:
+	kfree(buf);
+	return error;
+}
+
+#define NFSD_MDS_PR_KEY		0x0100000000000000
+
+/*
+ * We use the client ID as a unique key for the reservations.
+ * This allows us to easily fence a client when recalls fail.
+ */
+static u64 nfsd4_scsi_pr_key(struct nfs4_client *clp)
+{
+	return ((u64)clp->cl_clientid.cl_boot << 32) | clp->cl_clientid.cl_id;
+}
+
+static int
+nfsd4_block_get_device_info_scsi(struct super_block *sb,
+		struct nfs4_client *clp,
+		struct nfsd4_getdeviceinfo *gdp)
+{
+	struct pnfs_block_deviceaddr *dev;
+	struct pnfs_block_volume *b;
+	const struct pr_ops *ops;
+	int error;
+
+	dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
+		      sizeof(struct pnfs_block_volume), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+	gdp->gd_device = dev;
+
+	dev->nr_volumes = 1;
+	b = &dev->volumes[0];
+
+	b->type = PNFS_BLOCK_VOLUME_SCSI;
+	b->scsi.pr_key = nfsd4_scsi_pr_key(clp);
+
+	error = nfsd4_scsi_identify_device(sb->s_bdev, b);
+	if (error)
+		return error;
+
+	ops = sb->s_bdev->bd_disk->fops->pr_ops;
+	if (!ops) {
+		pr_err("pNFS: device %s does not support PRs.\n",
+			sb->s_id);
+		return -EINVAL;
+	}
+
+	error = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true);
+	if (error) {
+		pr_err("pNFS: failed to register key for device %s.\n",
+			sb->s_id);
+		return -EINVAL;
+	}
+
+	error = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY,
+			PR_EXCLUSIVE_ACCESS_REG_ONLY, 0);
+	if (error) {
+		pr_err("pNFS: failed to reserve device %s.\n",
+			sb->s_id);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static __be32
+nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb,
+		struct nfs4_client *clp,
+		struct nfsd4_getdeviceinfo *gdp)
+{
+	if (sb->s_bdev != sb->s_bdev->bd_contains)
+		return nfserr_inval;
+	return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp));
+}
+static __be32
+nfsd4_scsi_proc_layoutcommit(struct inode *inode,
+		struct nfsd4_layoutcommit *lcp)
+{
+	struct iomap *iomaps;
+	int nr_iomaps;
+
+	nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
+			lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
+	if (nr_iomaps < 0)
+		return nfserrno(nr_iomaps);
+
+	return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
+}
+
+static void
+nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
+{
+	struct nfs4_client *clp = ls->ls_stid.sc_client;
+	struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev;
+
+	bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
+			nfsd4_scsi_pr_key(clp), 0, true);
+}
+
+const struct nfsd4_layout_ops scsi_layout_ops = {
+	/*
+	 * Pretend that we send notification to the client.  This is a blatant
+	 * lie to force recent Linux clients to cache our device IDs.
+	 * We rarely ever change the device ID, so the harm of leaking deviceids
+	 * for a while isn't too bad.  Unfortunately RFC5661 is a complete mess
+	 * in this regard, but I filed errata 4119 for this a while ago, and
+	 * hopefully the Linux client will eventually start caching deviceids
+	 * without this again.
+	 */
+	.notify_types		=
+			NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
+	.proc_getdeviceinfo	= nfsd4_scsi_proc_getdeviceinfo,
+	.encode_getdeviceinfo	= nfsd4_block_encode_getdeviceinfo,
+	.proc_layoutget		= nfsd4_block_proc_layoutget,
+	.encode_layoutget	= nfsd4_block_encode_layoutget,
+	.proc_layoutcommit	= nfsd4_scsi_proc_layoutcommit,
+	.fence_client		= nfsd4_scsi_fence_client,
+};
+#endif /* CONFIG_NFSD_SCSILAYOUT */

diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 6d834dc..6c3b316 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
  */
 #include <linux/sunrpc/svc.h>
 #include <linux/exportfs.h>
@@ -53,6 +53,18 @@
 		p = xdr_encode_hyper(p, b->simple.offset);
 		p = xdr_encode_opaque(p, b->simple.sig, b->simple.sig_len);
 		break;
+	case PNFS_BLOCK_VOLUME_SCSI:
+		len = 4 + 4 + 4 + 4 + b->scsi.designator_len + 8;
+		p = xdr_reserve_space(xdr, len);
+		if (!p)
+			return -ETOOSMALL;
+
+		*p++ = cpu_to_be32(b->type);
+		*p++ = cpu_to_be32(b->scsi.code_set);
+		*p++ = cpu_to_be32(b->scsi.designator_type);
+		p = xdr_encode_opaque(p, b->scsi.designator, b->scsi.designator_len);
+		p = xdr_encode_hyper(p, b->scsi.pr_key);
+		break;
 	default:
 		return -ENOTSUPP;
 	}
@@ -93,18 +105,22 @@
 		u32 block_size)
 {
 	struct iomap *iomaps;
-	u32 nr_iomaps, expected, i;
+	u32 nr_iomaps, i;
 
 	if (len < sizeof(u32)) {
 		dprintk("%s: extent array too small: %u\n", __func__, len);
 		return -EINVAL;
 	}
+	len -= sizeof(u32);
+	if (len % PNFS_BLOCK_EXTENT_SIZE) {
+		dprintk("%s: extent array invalid: %u\n", __func__, len);
+		return -EINVAL;
+	}
 
 	nr_iomaps = be32_to_cpup(p++);
-	expected = sizeof(__be32) + nr_iomaps * PNFS_BLOCK_EXTENT_SIZE;
-	if (len != expected) {
+	if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) {
 		dprintk("%s: extent array size mismatch: %u/%u\n",
-			__func__, len, expected);
+			__func__, len, nr_iomaps);
 		return -EINVAL;
 	}
 
@@ -155,3 +171,54 @@
 	kfree(iomaps);
 	return -EINVAL;
 }
+
+int
+nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
+		u32 block_size)
+{
+	struct iomap *iomaps;
+	u32 nr_iomaps, expected, i;
+
+	if (len < sizeof(u32)) {
+		dprintk("%s: extent array too small: %u\n", __func__, len);
+		return -EINVAL;
+	}
+
+	nr_iomaps = be32_to_cpup(p++);
+	expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE;
+	if (len != expected) {
+		dprintk("%s: extent array size mismatch: %u/%u\n",
+			__func__, len, expected);
+		return -EINVAL;
+	}
+
+	iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
+	if (!iomaps) {
+		dprintk("%s: failed to allocate extent array\n", __func__);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < nr_iomaps; i++) {
+		u64 val;
+
+		p = xdr_decode_hyper(p, &val);
+		if (val & (block_size - 1)) {
+			dprintk("%s: unaligned offset 0x%llx\n", __func__, val);
+			goto fail;
+		}
+		iomaps[i].offset = val;
+
+		p = xdr_decode_hyper(p, &val);
+		if (val & (block_size - 1)) {
+			dprintk("%s: unaligned length 0x%llx\n", __func__, val);
+			goto fail;
+		}
+		iomaps[i].length = val;
+	}
+
+	*iomapp = iomaps;
+	return nr_iomaps;
+fail:
+	kfree(iomaps);
+	return -EINVAL;
+}

diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h
index 6de925f..397bc75 100644
--- a/fs/nfsd/blocklayoutxdr.h
+++ b/fs/nfsd/blocklayoutxdr.h

@@ -15,6 +15,11 @@
 	enum pnfs_block_extent_state	es;
 };
 
+struct pnfs_block_range {
+	u64				foff;
+	u64				len;
+};
+
 /*
  * Random upper cap for the uuid length to avoid unbounded allocation.
  * Not actually limited by the protocol.
@@ -29,6 +34,13 @@
 			u32		sig_len;
 			u8		sig[PNFS_BLOCK_UUID_LEN];
 		} simple;
+		struct {
+			enum scsi_code_set		code_set;
+			enum scsi_designator_type	designator_type;
+			int				designator_len;
+			u8				designator[256];
+			u64				pr_key;
+		} scsi;
 	};
 };
 
@@ -43,5 +55,7 @@
 		struct nfsd4_layoutget *lgp);
 int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
 		u32 block_size);
+int nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
+		u32 block_size);
 
 #endif /* _NFSD_BLOCKLAYOUTXDR_H */

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 7b755b7..51c3b06 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c

@@ -147,6 +147,7 @@
 {
 	__be32	nfserr;
 	u32	max_blocksize = svc_max_payload(rqstp);
+	unsigned long cnt = min(argp->count, max_blocksize);
 
 	dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
 				SVCFH_fmt(&argp->fh),
@@ -157,7 +158,7 @@
 	 * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
 	 * + 1 (xdr opaque byte count) = 26
 	 */
-	resp->count = min(argp->count, max_blocksize);
+	resp->count = cnt;
 	svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
 
 	fh_copy(&resp->fh, &argp->fh);
@@ -167,8 +168,8 @@
 				  &resp->count);
 	if (nfserr == 0) {
 		struct inode	*inode = d_inode(resp->fh.fh_dentry);
-
-		resp->eof = (argp->offset + resp->count) >= inode->i_size;
+		resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset,
+							inode->i_size);
 	}
 
 	RETURN_STATUS(nfserr);

diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index ce2d010..825c7bc 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c

@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2014 Christoph Hellwig.
  */
+#include <linux/blkdev.h>
 #include <linux/kmod.h>
 #include <linux/file.h>
 #include <linux/jhash.h>
@@ -26,7 +27,12 @@
 static const struct lock_manager_operations nfsd4_layouts_lm_ops;
 
 const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] =  {
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
 	[LAYOUT_BLOCK_VOLUME]	= &bl_layout_ops,
+#endif
+#ifdef CONFIG_NFSD_SCSILAYOUT
+	[LAYOUT_SCSI]		= &scsi_layout_ops,
+#endif
 };
 
 /* pNFS device ID to export fsid mapping */
@@ -121,10 +127,24 @@
 	if (!(exp->ex_flags & NFSEXP_PNFS))
 		return;
 
+	/*
+	 * Check if the file system supports exporting a block-like layout.
+	 * If the block device supports reservations prefer the SCSI layout,
+	 * otherwise advertise the block layout.
+	 */
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
 	if (sb->s_export_op->get_uuid &&
 	    sb->s_export_op->map_blocks &&
 	    sb->s_export_op->commit_blocks)
 		exp->ex_layout_type = LAYOUT_BLOCK_VOLUME;
+#endif
+#ifdef CONFIG_NFSD_SCSILAYOUT
+	/* overwrite block layout selection if needed */
+	if (sb->s_export_op->map_blocks &&
+	    sb->s_export_op->commit_blocks &&
+	    sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops)
+		exp->ex_layout_type = LAYOUT_SCSI;
+#endif
 }
 
 static void
@@ -590,8 +610,6 @@
 
 	rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str));
 
-	trace_layout_recall_fail(&ls->ls_stid.sc_stateid);
-
 	printk(KERN_WARNING
 		"nfsd: client %s failed to respond to layout recall. "
 		"  Fencing..\n", addr_str);
@@ -626,6 +644,7 @@
 		container_of(cb, struct nfs4_layout_stateid, ls_recall);
 	struct nfsd_net *nn;
 	ktime_t now, cutoff;
+	const struct nfsd4_layout_ops *ops;
 	LIST_HEAD(reaplist);
 
 
@@ -661,7 +680,13 @@
 		/*
 		 * Unknown error or non-responding client, we'll need to fence.
 		 */
-		nfsd4_cb_layout_fail(ls);
+		trace_layout_recall_fail(&ls->ls_stid.sc_stateid);
+
+		ops = nfsd4_layout_ops[ls->ls_layout_type];
+		if (ops->fence_client)
+			ops->fence_client(ls);
+		else
+			nfsd4_cb_layout_fail(ls);
 		return -1;
 	}
 }

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 4cba786..de1ff1d 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c

@@ -864,12 +864,10 @@
 nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	      struct nfsd4_secinfo *secinfo)
 {
-	struct svc_fh resfh;
 	struct svc_export *exp;
 	struct dentry *dentry;
 	__be32 err;
 
-	fh_init(&resfh, NFS4_FHSIZE);
 	err = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_EXEC);
 	if (err)
 		return err;
@@ -878,6 +876,7 @@
 				    &exp, &dentry);
 	if (err)
 		return err;
+	fh_unlock(&cstate->current_fh);
 	if (d_really_is_negative(dentry)) {
 		exp_put(exp);
 		err = nfserr_noent;
@@ -1269,8 +1268,10 @@
 		goto out;
 
 	nfserr = nfs_ok;
-	if (gdp->gd_maxcount != 0)
-		nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp);
+	if (gdp->gd_maxcount != 0) {
+		nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb,
+					cstate->session->se_client, gdp);
+	}
 
 	gdp->gd_notify_types &= ops->notify_types;
 out:

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 195fe26..66eaeb1 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c

@@ -1266,6 +1266,7 @@
 	/* XXX: The usermode helper s not working in container yet. */
 	if (net != &init_net) {
 		pr_warn("NFSD: attempt to initialize umh client tracking in a container ignored.\n");
+		kfree(grace_start);
 		return -EINVAL;
 	}
 

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c484a2b..0462eed 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c

@@ -2408,7 +2408,8 @@
 	default:				/* checked by xdr code */
 		WARN_ON_ONCE(1);
 	case SP4_SSV:
-		return nfserr_encr_alg_unsupp;
+		status = nfserr_encr_alg_unsupp;
+		goto out_nolock;
 	}
 
 	/* Cases below refer to rfc 5661 section 18.35.4: */
@@ -2586,21 +2587,26 @@
 	return nfs_ok;
 }
 
+/*
+ * Server's NFSv4.1 backchannel support is AUTH_SYS-only for now.
+ * These are based on similar macros in linux/sunrpc/msg_prot.h .
+ */
+#define RPC_MAX_HEADER_WITH_AUTH_SYS \
+	(RPC_CALLHDRSIZE + 2 * (2 + UNX_CALLSLACK))
+
+#define RPC_MAX_REPHEADER_WITH_AUTH_SYS \
+	(RPC_REPHDRSIZE + (2 + NUL_REPLYSLACK))
+
 #define NFSD_CB_MAX_REQ_SZ	((NFS4_enc_cb_recall_sz + \
-				 RPC_MAX_HEADER_WITH_AUTH) * sizeof(__be32))
+				 RPC_MAX_HEADER_WITH_AUTH_SYS) * sizeof(__be32))
 #define NFSD_CB_MAX_RESP_SZ	((NFS4_dec_cb_recall_sz + \
-				 RPC_MAX_REPHEADER_WITH_AUTH) * sizeof(__be32))
+				 RPC_MAX_REPHEADER_WITH_AUTH_SYS) * \
+				 sizeof(__be32))
 
 static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca)
 {
 	ca->headerpadsz = 0;
 
-	/*
-	 * These RPC_MAX_HEADER macros are overkill, especially since we
-	 * don't even do gss on the backchannel yet.  But this is still
-	 * less than 1k.  Tighten up this estimate in the unlikely event
-	 * it turns out to be a problem for some client:
-	 */
 	if (ca->maxreq_sz < NFSD_CB_MAX_REQ_SZ)
 		return nfserr_toosmall;
 	if (ca->maxresp_sz < NFSD_CB_MAX_RESP_SZ)
@@ -2710,10 +2716,9 @@
 		goto out_free_conn;
 	}
 	status = nfs_ok;
-	/*
-	 * We do not support RDMA or persistent sessions
-	 */
+	/* Persistent sessions are not supported */
 	cr_ses->flags &= ~SESSION4_PERSIST;
+	/* Upshifting from TCP to RDMA is not supported */
 	cr_ses->flags &= ~SESSION4_RDMA;
 
 	init_session(rqstp, new, conf, cr_ses);

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index d6ef095..9df898b 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c

@@ -1072,8 +1072,9 @@
 
 	READ_BUF(4);
 	rename->rn_snamelen = be32_to_cpup(p++);
-	READ_BUF(rename->rn_snamelen + 4);
+	READ_BUF(rename->rn_snamelen);
 	SAVEMEM(rename->rn_sname, rename->rn_snamelen);
+	READ_BUF(4);
 	rename->rn_tnamelen = be32_to_cpup(p++);
 	READ_BUF(rename->rn_tnamelen);
 	SAVEMEM(rename->rn_tname, rename->rn_tnamelen);
@@ -1155,13 +1156,14 @@
 	READ_BUF(8);
 	setclientid->se_callback_prog = be32_to_cpup(p++);
 	setclientid->se_callback_netid_len = be32_to_cpup(p++);
-
-	READ_BUF(setclientid->se_callback_netid_len + 4);
+	READ_BUF(setclientid->se_callback_netid_len);
 	SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len);
+	READ_BUF(4);
 	setclientid->se_callback_addr_len = be32_to_cpup(p++);
 
-	READ_BUF(setclientid->se_callback_addr_len + 4);
+	READ_BUF(setclientid->se_callback_addr_len);
 	SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len);
+	READ_BUF(4);
 	setclientid->se_callback_ident = be32_to_cpup(p++);
 
 	DECODE_TAIL;
@@ -1835,8 +1837,9 @@
 
 	READ_BUF(4);
 	argp->taglen = be32_to_cpup(p++);
-	READ_BUF(argp->taglen + 8);
+	READ_BUF(argp->taglen);
 	SAVEMEM(argp->tag, argp->taglen);
+	READ_BUF(8);
 	argp->minorversion = be32_to_cpup(p++);
 	argp->opcnt = be32_to_cpup(p++);
 	max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2);
@@ -3060,7 +3063,7 @@
 		p = xdr_encode_opaque_fixed(p, bcts->sessionid.data,
 						NFS4_MAX_SESSIONID_LEN);
 		*p++ = cpu_to_be32(bcts->dir);
-		/* Sorry, we do not yet support RDMA over 4.1: */
+		/* Upshifting from TCP to RDMA is not supported */
 		*p++ = cpu_to_be32(0);
 	}
 	return nfserr;
@@ -3362,6 +3365,7 @@
 	struct xdr_stream *xdr = &resp->xdr;
 	struct xdr_buf *buf = xdr->buf;
 	u32 eof;
+	long len;
 	int space_left;
 	__be32 nfserr;
 	__be32 *p = xdr->p - 2;
@@ -3370,6 +3374,7 @@
 	if (xdr->end - xdr->p < 1)
 		return nfserr_resource;
 
+	len = maxcount;
 	nfserr = nfsd_splice_read(read->rd_rqstp, file,
 				  read->rd_offset, &maxcount);
 	if (nfserr) {
@@ -3382,8 +3387,8 @@
 		return nfserr;
 	}
 
-	eof = (read->rd_offset + maxcount >=
-	       d_inode(read->rd_fhp->fh_dentry)->i_size);
+	eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
+				d_inode(read->rd_fhp->fh_dentry)->i_size);
 
 	*(p++) = htonl(eof);
 	*(p++) = htonl(maxcount);
@@ -3453,14 +3458,15 @@
 	}
 	read->rd_vlen = v;
 
+	len = maxcount;
 	nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec,
 			read->rd_vlen, &maxcount);
 	if (nfserr)
 		return nfserr;
 	xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
 
-	eof = (read->rd_offset + maxcount >=
-	       d_inode(read->rd_fhp->fh_dentry)->i_size);
+	eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
+				d_inode(read->rd_fhp->fh_dentry)->i_size);
 
 	tmp = htonl(eof);
 	write_bytes_to_xdr_buf(xdr->buf, starting_len    , &tmp, 4);

diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
index d4c4453..7d073b9 100644
--- a/fs/nfsd/pnfs.h
+++ b/fs/nfsd/pnfs.h

@@ -21,6 +21,7 @@
 	u32		notify_types;
 
 	__be32 (*proc_getdeviceinfo)(struct super_block *sb,
+			struct nfs4_client *clp,
 			struct nfsd4_getdeviceinfo *gdevp);
 	__be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr,
 			struct nfsd4_getdeviceinfo *gdevp);
@@ -32,10 +33,17 @@
 
 	__be32 (*proc_layoutcommit)(struct inode *inode,
 			struct nfsd4_layoutcommit *lcp);
+
+	void (*fence_client)(struct nfs4_layout_stateid *ls);
 };
 
 extern const struct nfsd4_layout_ops *nfsd4_layout_ops[];
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
 extern const struct nfsd4_layout_ops bl_layout_ops;
+#endif
+#ifdef CONFIG_NFSD_SCSILAYOUT
+extern const struct nfsd4_layout_ops scsi_layout_ops;
+#endif
 
 __be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
 		struct nfsd4_compound_state *cstate, stateid_t *stateid,

diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index c11ba31..2d573ec 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h

@@ -139,4 +139,23 @@
 	       || createmode == NFS4_CREATE_EXCLUSIVE4_1;
 }
 
+static inline bool nfsd_eof_on_read(long requested, long read,
+				loff_t offset, loff_t size)
+{
+	/* We assume a short read means eof: */
+	if (requested > read)
+		return true;
+	/*
+	 * A non-short read might also reach end of file.  The spec
+	 * still requires us to set eof in that case.
+	 *
+	 * Further operations may have modified the file size since
+	 * the read, so the following check is not atomic with the read.
+	 * We've only seen that cause a problem for a client in the case
+	 * where the read returned a count of 0 without setting eof.
+	 * That case was fixed by the addition of the above check.
+	 */
+	return (offset + read >= size);
+}
+
 #endif /* LINUX_NFSD_VFS_H */

diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 27f75bc..a9fb363 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c

@@ -458,7 +458,7 @@
 	struct buffer_head *pbh;
 	__u64 key;
 
-	key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT -
+	key = page_index(bh->b_page) << (PAGE_SHIFT -
 					 bmap->b_inode->i_blkbits);
 	for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page)
 		key++;

diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index a35ae35..e0c9daf 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c

@@ -62,7 +62,7 @@
 	set_buffer_uptodate(bh);
 
 	unlock_page(bh->b_page);
-	page_cache_release(bh->b_page);
+	put_page(bh->b_page);
 	return bh;
 }
 
@@ -128,7 +128,7 @@
 
 out_locked:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return err;
 }
 
@@ -146,7 +146,7 @@
 	pgoff_t index = page_index(page);
 	int still_dirty;
 
-	page_cache_get(page);
+	get_page(page);
 	lock_page(page);
 	wait_on_page_writeback(page);
 
@@ -154,7 +154,7 @@
 	still_dirty = PageDirty(page);
 	mapping = page->mapping;
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	if (!still_dirty && mapping)
 		invalidate_inode_pages2_range(mapping, index, index);
@@ -181,7 +181,7 @@
 	obh = ctxt->bh;
 	ctxt->newbh = NULL;
 
-	if (inode->i_blkbits == PAGE_CACHE_SHIFT) {
+	if (inode->i_blkbits == PAGE_SHIFT) {
 		lock_page(obh->b_page);
 		/*
 		 * We cannot call radix_tree_preload for the kernels older

diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 6b8b92b..e08f064 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c

@@ -58,7 +58,7 @@
 static inline void nilfs_put_page(struct page *page)
 {
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 /*
@@ -69,9 +69,9 @@
 {
 	unsigned last_byte = inode->i_size;
 
-	last_byte -= page_nr << PAGE_CACHE_SHIFT;
-	if (last_byte > PAGE_CACHE_SIZE)
-		last_byte = PAGE_CACHE_SIZE;
+	last_byte -= page_nr << PAGE_SHIFT;
+	if (last_byte > PAGE_SIZE)
+		last_byte = PAGE_SIZE;
 	return last_byte;
 }
 
@@ -109,12 +109,12 @@
 	unsigned chunk_size = nilfs_chunk_size(dir);
 	char *kaddr = page_address(page);
 	unsigned offs, rec_len;
-	unsigned limit = PAGE_CACHE_SIZE;
+	unsigned limit = PAGE_SIZE;
 	struct nilfs_dir_entry *p;
 	char *error;
 
-	if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
-		limit = dir->i_size & ~PAGE_CACHE_MASK;
+	if ((dir->i_size >> PAGE_SHIFT) == page->index) {
+		limit = dir->i_size & ~PAGE_MASK;
 		if (limit & (chunk_size - 1))
 			goto Ebadsize;
 		if (!limit)
@@ -161,7 +161,7 @@
 bad_entry:
 	nilfs_error(sb, "nilfs_check_page", "bad entry in directory #%lu: %s - "
 		    "offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
-		    dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
+		    dir->i_ino, error, (page->index<<PAGE_SHIFT)+offs,
 		    (unsigned long) le64_to_cpu(p->inode),
 		    rec_len, p->name_len);
 	goto fail;
@@ -170,7 +170,7 @@
 	nilfs_error(sb, "nilfs_check_page",
 		    "entry in directory #%lu spans the page boundary"
 		    "offset=%lu, inode=%lu",
-		    dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
+		    dir->i_ino, (page->index<<PAGE_SHIFT)+offs,
 		    (unsigned long) le64_to_cpu(p->inode));
 fail:
 	SetPageChecked(page);
@@ -256,8 +256,8 @@
 	loff_t pos = ctx->pos;
 	struct inode *inode = file_inode(file);
 	struct super_block *sb = inode->i_sb;
-	unsigned int offset = pos & ~PAGE_CACHE_MASK;
-	unsigned long n = pos >> PAGE_CACHE_SHIFT;
+	unsigned int offset = pos & ~PAGE_MASK;
+	unsigned long n = pos >> PAGE_SHIFT;
 	unsigned long npages = dir_pages(inode);
 /*	unsigned chunk_mask = ~(nilfs_chunk_size(inode)-1); */
 
@@ -272,7 +272,7 @@
 		if (IS_ERR(page)) {
 			nilfs_error(sb, __func__, "bad page in #%lu",
 				    inode->i_ino);
-			ctx->pos += PAGE_CACHE_SIZE - offset;
+			ctx->pos += PAGE_SIZE - offset;
 			return -EIO;
 		}
 		kaddr = page_address(page);
@@ -361,7 +361,7 @@
 		if (++n >= npages)
 			n = 0;
 		/* next page is past the blocks we've got */
-		if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) {
+		if (unlikely(n > (dir->i_blocks >> (PAGE_SHIFT - 9)))) {
 			nilfs_error(dir->i_sb, __func__,
 			       "dir %lu size %lld exceeds block count %llu",
 			       dir->i_ino, dir->i_size,
@@ -401,7 +401,7 @@
 	if (de) {
 		res = le64_to_cpu(de->inode);
 		kunmap(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	return res;
 }
@@ -460,7 +460,7 @@
 		kaddr = page_address(page);
 		dir_end = kaddr + nilfs_last_byte(dir, n);
 		de = (struct nilfs_dir_entry *)kaddr;
-		kaddr += PAGE_CACHE_SIZE - reclen;
+		kaddr += PAGE_SIZE - reclen;
 		while ((char *)de <= kaddr) {
 			if ((char *)de == dir_end) {
 				/* We hit i_size */
@@ -603,7 +603,7 @@
 	kunmap_atomic(kaddr);
 	nilfs_commit_chunk(page, mapping, 0, chunk_size);
 fail:
-	page_cache_release(page);
+	put_page(page);
 	return err;
 }
 

diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 748ca23..0224b78 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c

@@ -115,7 +115,7 @@
 
  failed:
 	unlock_page(bh->b_page);
-	page_cache_release(bh->b_page);
+	put_page(bh->b_page);
 	return err;
 }
 

diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 21a1e2e..5346313 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c

@@ -249,7 +249,7 @@
 		if (nr_dirty)
 			nilfs_set_file_dirty(inode, nr_dirty);
 	} else if (ret) {
-		unsigned nr_dirty = 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+		unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits);
 
 		nilfs_set_file_dirty(inode, nr_dirty);
 	}
@@ -291,7 +291,7 @@
 			   struct page *page, void *fsdata)
 {
 	struct inode *inode = mapping->host;
-	unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned start = pos & (PAGE_SIZE - 1);
 	unsigned nr_dirty;
 	int err;
 

diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 1125f40..f6982b9 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c

@@ -110,7 +110,7 @@
 
  failed_bh:
 	unlock_page(bh->b_page);
-	page_cache_release(bh->b_page);
+	put_page(bh->b_page);
 	brelse(bh);
 
  failed_unlock:
@@ -170,7 +170,7 @@
 
  failed_bh:
 	unlock_page(bh->b_page);
-	page_cache_release(bh->b_page);
+	put_page(bh->b_page);
 	brelse(bh);
  failed:
 	return ret;
@@ -363,7 +363,7 @@
 int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
 {
 	pgoff_t index = (pgoff_t)block >>
-		(PAGE_CACHE_SHIFT - inode->i_blkbits);
+		(PAGE_SHIFT - inode->i_blkbits);
 	struct page *page;
 	unsigned long first_block;
 	int ret = 0;
@@ -376,7 +376,7 @@
 	wait_on_page_writeback(page);
 
 	first_block = (unsigned long)index <<
-		(PAGE_CACHE_SHIFT - inode->i_blkbits);
+		(PAGE_SHIFT - inode->i_blkbits);
 	if (page_has_buffers(page)) {
 		struct buffer_head *bh;
 
@@ -385,7 +385,7 @@
 	}
 	still_dirty = PageDirty(page);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	if (still_dirty ||
 	    invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0)
@@ -578,7 +578,7 @@
 	}
 
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return 0;
 }
 
@@ -597,7 +597,7 @@
 			bh_frozen = nilfs_page_get_nth_block(page, n);
 		}
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	return bh_frozen;
 }

diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 7ccdb96..151bc19 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c

@@ -431,11 +431,11 @@
 out_dir:
 	if (dir_de) {
 		kunmap(dir_page);
-		page_cache_release(dir_page);
+		put_page(dir_page);
 	}
 out_old:
 	kunmap(old_page);
-	page_cache_release(old_page);
+	put_page(old_page);
 out:
 	nilfs_transaction_abort(old_dir->i_sb);
 	return err;

diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index c20df77..4893915 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c

@@ -50,7 +50,7 @@
 	if (!page_has_buffers(page))
 		create_empty_buffers(page, 1 << blkbits, b_state);
 
-	first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits);
+	first_block = (unsigned long)index << (PAGE_SHIFT - blkbits);
 	bh = nilfs_page_get_nth_block(page, block - first_block);
 
 	touch_buffer(bh);
@@ -64,7 +64,7 @@
 				      unsigned long b_state)
 {
 	int blkbits = inode->i_blkbits;
-	pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits);
+	pgoff_t index = blkoff >> (PAGE_SHIFT - blkbits);
 	struct page *page;
 	struct buffer_head *bh;
 
@@ -75,7 +75,7 @@
 	bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state);
 	if (unlikely(!bh)) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		return NULL;
 	}
 	return bh;
@@ -288,7 +288,7 @@
 		__set_page_dirty_nobuffers(dpage);
 
 		unlock_page(dpage);
-		page_cache_release(dpage);
+		put_page(dpage);
 		unlock_page(page);
 	}
 	pagevec_release(&pvec);
@@ -333,7 +333,7 @@
 			WARN_ON(PageDirty(dpage));
 			nilfs_copy_page(dpage, page, 0);
 			unlock_page(dpage);
-			page_cache_release(dpage);
+			put_page(dpage);
 		} else {
 			struct page *page2;
 
@@ -350,7 +350,7 @@
 			if (unlikely(err < 0)) {
 				WARN_ON(err == -EEXIST);
 				page->mapping = NULL;
-				page_cache_release(page); /* for cache */
+				put_page(page); /* for cache */
 			} else {
 				page->mapping = dmap;
 				dmap->nrpages++;
@@ -523,8 +523,8 @@
 	if (inode->i_mapping->nrpages == 0)
 		return 0;
 
-	index = start_blk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
-	nblocks_in_page = 1U << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	index = start_blk >> (PAGE_SHIFT - inode->i_blkbits);
+	nblocks_in_page = 1U << (PAGE_SHIFT - inode->i_blkbits);
 
 	pagevec_init(&pvec, 0);
 
@@ -537,7 +537,7 @@
 	if (length > 0 && pvec.pages[0]->index > index)
 		goto out;
 
-	b = pvec.pages[0]->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	b = pvec.pages[0]->index << (PAGE_SHIFT - inode->i_blkbits);
 	i = 0;
 	do {
 		page = pvec.pages[i];

diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 9b4f205..5afa77f 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c

@@ -544,14 +544,14 @@
 				blocksize, page, NULL);
 
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 
 		(*nr_salvaged_blocks)++;
 		goto next;
 
  failed_page:
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 
  failed_inode:
 		printk(KERN_WARNING

diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 3b65ada..4317f72 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c

@@ -2070,7 +2070,7 @@
 			goto failed_to_write;
 
 		if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE ||
-		    nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) {
+		    nilfs->ns_blocksize_bits != PAGE_SHIFT) {
 			/*
 			 * At this point, we avoid double buffering
 			 * for blocksize < pagesize because page dirty

diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 7521e11..97768a1 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c

@@ -74,7 +74,7 @@
 
 		set_buffer_uptodate(bh);
 
-		file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
+		file_ofs = ((s64)page->index << PAGE_SHIFT) +
 				bh_offset(bh);
 		read_lock_irqsave(&ni->size_lock, flags);
 		init_size = ni->initialized_size;
@@ -142,7 +142,7 @@
 		u32 rec_size;
 
 		rec_size = ni->itype.index.block_size;
-		recs = PAGE_CACHE_SIZE / rec_size;
+		recs = PAGE_SIZE / rec_size;
 		/* Should have been verified before we got here... */
 		BUG_ON(!recs);
 		local_irq_save(flags);
@@ -229,7 +229,7 @@
 	 * fully truncated, truncate will throw it away as soon as we unlock
 	 * it so no need to worry what we do with it.
 	 */
-	iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
+	iblock = (s64)page->index << (PAGE_SHIFT - blocksize_bits);
 	read_lock_irqsave(&ni->size_lock, flags);
 	lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
 	init_size = ni->initialized_size;
@@ -412,9 +412,9 @@
 	vi = page->mapping->host;
 	i_size = i_size_read(vi);
 	/* Is the page fully outside i_size? (truncate in progress) */
-	if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
-			PAGE_CACHE_SHIFT)) {
-		zero_user(page, 0, PAGE_CACHE_SIZE);
+	if (unlikely(page->index >= (i_size + PAGE_SIZE - 1) >>
+			PAGE_SHIFT)) {
+		zero_user(page, 0, PAGE_SIZE);
 		ntfs_debug("Read outside i_size - truncated?");
 		goto done;
 	}
@@ -463,7 +463,7 @@
 	 * ok to ignore the compressed flag here.
 	 */
 	if (unlikely(page->index > 0)) {
-		zero_user(page, 0, PAGE_CACHE_SIZE);
+		zero_user(page, 0, PAGE_SIZE);
 		goto done;
 	}
 	if (!NInoAttr(ni))
@@ -509,7 +509,7 @@
 			le16_to_cpu(ctx->attr->data.resident.value_offset),
 			attr_len);
 	/* Zero the remainder of the page. */
-	memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
+	memset(addr + attr_len, 0, PAGE_SIZE - attr_len);
 	flush_dcache_page(page);
 	kunmap_atomic(addr);
 put_unm_err_out:
@@ -599,7 +599,7 @@
 	/* NOTE: Different naming scheme to ntfs_read_block()! */
 
 	/* The first block in the page. */
-	block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
+	block = (s64)page->index << (PAGE_SHIFT - blocksize_bits);
 
 	read_lock_irqsave(&ni->size_lock, flags);
 	i_size = i_size_read(vi);
@@ -674,7 +674,7 @@
 				// in the inode.
 				// Again, for each page do:
 				//	__set_page_dirty_buffers();
-				// page_cache_release()
+				// put_page()
 				// We don't need to wait on the writes.
 				// Update iblock.
 			}
@@ -925,7 +925,7 @@
 	ntfs_volume *vol = ni->vol;
 	u8 *kaddr;
 	unsigned int rec_size = ni->itype.index.block_size;
-	ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size];
+	ntfs_inode *locked_nis[PAGE_SIZE / rec_size];
 	struct buffer_head *bh, *head, *tbh, *rec_start_bh;
 	struct buffer_head *bhs[MAX_BUF_PER_PAGE];
 	runlist_element *rl;
@@ -949,7 +949,7 @@
 			(NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
 	bh_size = vol->sb->s_blocksize;
 	bh_size_bits = vol->sb->s_blocksize_bits;
-	max_bhs = PAGE_CACHE_SIZE / bh_size;
+	max_bhs = PAGE_SIZE / bh_size;
 	BUG_ON(!max_bhs);
 	BUG_ON(max_bhs > MAX_BUF_PER_PAGE);
 
@@ -961,13 +961,13 @@
 	BUG_ON(!bh);
 
 	rec_size_bits = ni->itype.index.block_size_bits;
-	BUG_ON(!(PAGE_CACHE_SIZE >> rec_size_bits));
+	BUG_ON(!(PAGE_SIZE >> rec_size_bits));
 	bhs_per_rec = rec_size >> bh_size_bits;
 	BUG_ON(!bhs_per_rec);
 
 	/* The first block in the page. */
 	rec_block = block = (sector_t)page->index <<
-			(PAGE_CACHE_SHIFT - bh_size_bits);
+			(PAGE_SHIFT - bh_size_bits);
 
 	/* The first out of bounds block for the data size. */
 	dblock = (i_size_read(vi) + bh_size - 1) >> bh_size_bits;
@@ -1133,7 +1133,7 @@
 			unsigned long mft_no;
 
 			/* Get the mft record number. */
-			mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
+			mft_no = (((s64)page->index << PAGE_SHIFT) + ofs)
 					>> rec_size_bits;
 			/* Check whether to write this mft record. */
 			tni = NULL;
@@ -1249,7 +1249,7 @@
 				continue;
 			ofs = bh_offset(tbh);
 			/* Get the mft record number. */
-			mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
+			mft_no = (((s64)page->index << PAGE_SHIFT) + ofs)
 					>> rec_size_bits;
 			if (mft_no < vol->mftmirr_size)
 				ntfs_sync_mft_mirror(vol, mft_no,
@@ -1300,7 +1300,7 @@
 		 * Set page error if there is only one ntfs record in the page.
 		 * Otherwise we would loose per-record granularity.
 		 */
-		if (ni->itype.index.block_size == PAGE_CACHE_SIZE)
+		if (ni->itype.index.block_size == PAGE_SIZE)
 			SetPageError(page);
 		NVolSetErrors(vol);
 	}
@@ -1308,7 +1308,7 @@
 		ntfs_debug("Page still contains one or more dirty ntfs "
 				"records.  Redirtying the page starting at "
 				"record 0x%lx.", page->index <<
-				(PAGE_CACHE_SHIFT - rec_size_bits));
+				(PAGE_SHIFT - rec_size_bits));
 		redirty_page_for_writepage(wbc, page);
 		unlock_page(page);
 	} else {
@@ -1365,13 +1365,13 @@
 	BUG_ON(!PageLocked(page));
 	i_size = i_size_read(vi);
 	/* Is the page fully outside i_size? (truncate in progress) */
-	if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
-			PAGE_CACHE_SHIFT)) {
+	if (unlikely(page->index >= (i_size + PAGE_SIZE - 1) >>
+			PAGE_SHIFT)) {
 		/*
 		 * The page may have dirty, unmapped buffers.  Make them
 		 * freeable here, so the page does not leak.
 		 */
-		block_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+		block_invalidatepage(page, 0, PAGE_SIZE);
 		unlock_page(page);
 		ntfs_debug("Write outside i_size - truncated?");
 		return 0;
@@ -1414,10 +1414,10 @@
 	/* NInoNonResident() == NInoIndexAllocPresent() */
 	if (NInoNonResident(ni)) {
 		/* We have to zero every time due to mmap-at-end-of-file. */
-		if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
+		if (page->index >= (i_size >> PAGE_SHIFT)) {
 			/* The page straddles i_size. */
-			unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
-			zero_user_segment(page, ofs, PAGE_CACHE_SIZE);
+			unsigned int ofs = i_size & ~PAGE_MASK;
+			zero_user_segment(page, ofs, PAGE_SIZE);
 		}
 		/* Handle mst protected attributes. */
 		if (NInoMstProtected(ni))
@@ -1500,7 +1500,7 @@
 			le16_to_cpu(ctx->attr->data.resident.value_offset),
 			addr, attr_len);
 	/* Zero out of bounds area in the page cache page. */
-	memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
+	memset(addr + attr_len, 0, PAGE_SIZE - attr_len);
 	kunmap_atomic(addr);
 	flush_dcache_page(page);
 	flush_dcache_mft_record_page(ctx->ntfs_ino);

diff --git a/fs/ntfs/aops.h b/fs/ntfs/aops.h
index caecc58..820d6ea 100644
--- a/fs/ntfs/aops.h
+++ b/fs/ntfs/aops.h

@@ -40,7 +40,7 @@
 static inline void ntfs_unmap_page(struct page *page)
 {
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 /**
@@ -49,7 +49,7 @@
  * @index:	index into the page cache for @mapping of the page to map
  *
  * Read a page from the page cache of the address space @mapping at position
- * @index, where @index is in units of PAGE_CACHE_SIZE, and not in bytes.
+ * @index, where @index is in units of PAGE_SIZE, and not in bytes.
  *
  * If the page is not in memory it is loaded from disk first using the readpage
  * method defined in the address space operations of @mapping and the page is

diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 250ed5b..44a39a0 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c

@@ -152,7 +152,7 @@
 			if (old_ctx.base_ntfs_ino && old_ctx.ntfs_ino !=
 					old_ctx.base_ntfs_ino) {
 				put_this_page = old_ctx.ntfs_ino->page;
-				page_cache_get(put_this_page);
+				get_page(put_this_page);
 			}
 			/*
 			 * Reinitialize the search context so we can lookup the
@@ -275,7 +275,7 @@
 		 * the pieces anyway.
 		 */
 		if (put_this_page)
-			page_cache_release(put_this_page);
+			put_page(put_this_page);
 	}
 	return err;
 }
@@ -1660,7 +1660,7 @@
 		memcpy(kaddr, (u8*)a +
 				le16_to_cpu(a->data.resident.value_offset),
 				attr_size);
-		memset(kaddr + attr_size, 0, PAGE_CACHE_SIZE - attr_size);
+		memset(kaddr + attr_size, 0, PAGE_SIZE - attr_size);
 		kunmap_atomic(kaddr);
 		flush_dcache_page(page);
 		SetPageUptodate(page);
@@ -1748,7 +1748,7 @@
 	if (page) {
 		set_page_dirty(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	ntfs_debug("Done.");
 	return 0;
@@ -1835,7 +1835,7 @@
 		ntfs_free(rl);
 page_err_out:
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	if (err == -EINVAL)
 		err = -EIO;
@@ -2513,17 +2513,17 @@
 	BUG_ON(NInoEncrypted(ni));
 	mapping = VFS_I(ni)->i_mapping;
 	/* Work out the starting index and page offset. */
-	idx = ofs >> PAGE_CACHE_SHIFT;
-	start_ofs = ofs & ~PAGE_CACHE_MASK;
+	idx = ofs >> PAGE_SHIFT;
+	start_ofs = ofs & ~PAGE_MASK;
 	/* Work out the ending index and page offset. */
 	end = ofs + cnt;
-	end_ofs = end & ~PAGE_CACHE_MASK;
+	end_ofs = end & ~PAGE_MASK;
 	/* If the end is outside the inode size return -ESPIPE. */
 	if (unlikely(end > i_size_read(VFS_I(ni)))) {
 		ntfs_error(vol->sb, "Request exceeds end of attribute.");
 		return -ESPIPE;
 	}
-	end >>= PAGE_CACHE_SHIFT;
+	end >>= PAGE_SHIFT;
 	/* If there is a first partial page, need to do it the slow way. */
 	if (start_ofs) {
 		page = read_mapping_page(mapping, idx, NULL);
@@ -2536,7 +2536,7 @@
 		 * If the last page is the same as the first page, need to
 		 * limit the write to the end offset.
 		 */
-		size = PAGE_CACHE_SIZE;
+		size = PAGE_SIZE;
 		if (idx == end)
 			size = end_ofs;
 		kaddr = kmap_atomic(page);
@@ -2544,7 +2544,7 @@
 		flush_dcache_page(page);
 		kunmap_atomic(kaddr);
 		set_page_dirty(page);
-		page_cache_release(page);
+		put_page(page);
 		balance_dirty_pages_ratelimited(mapping);
 		cond_resched();
 		if (idx == end)
@@ -2561,7 +2561,7 @@
 			return -ENOMEM;
 		}
 		kaddr = kmap_atomic(page);
-		memset(kaddr, val, PAGE_CACHE_SIZE);
+		memset(kaddr, val, PAGE_SIZE);
 		flush_dcache_page(page);
 		kunmap_atomic(kaddr);
 		/*
@@ -2585,7 +2585,7 @@
 		set_page_dirty(page);
 		/* Finally unlock and release the page. */
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		balance_dirty_pages_ratelimited(mapping);
 		cond_resched();
 	}
@@ -2602,7 +2602,7 @@
 		flush_dcache_page(page);
 		kunmap_atomic(kaddr);
 		set_page_dirty(page);
-		page_cache_release(page);
+		put_page(page);
 		balance_dirty_pages_ratelimited(mapping);
 		cond_resched();
 	}

diff --git a/fs/ntfs/bitmap.c b/fs/ntfs/bitmap.c
index 0809cf8..ec130c5 100644
--- a/fs/ntfs/bitmap.c
+++ b/fs/ntfs/bitmap.c

@@ -67,8 +67,8 @@
 	 * Calculate the indices for the pages containing the first and last
 	 * bits, i.e. @start_bit and @start_bit + @cnt - 1, respectively.
 	 */
-	index = start_bit >> (3 + PAGE_CACHE_SHIFT);
-	end_index = (start_bit + cnt - 1) >> (3 + PAGE_CACHE_SHIFT);
+	index = start_bit >> (3 + PAGE_SHIFT);
+	end_index = (start_bit + cnt - 1) >> (3 + PAGE_SHIFT);
 
 	/* Get the page containing the first bit (@start_bit). */
 	mapping = vi->i_mapping;
@@ -82,7 +82,7 @@
 	kaddr = page_address(page);
 
 	/* Set @pos to the position of the byte containing @start_bit. */
-	pos = (start_bit >> 3) & ~PAGE_CACHE_MASK;
+	pos = (start_bit >> 3) & ~PAGE_MASK;
 
 	/* Calculate the position of @start_bit in the first byte. */
 	bit = start_bit & 7;
@@ -108,7 +108,7 @@
 	 * Depending on @value, modify all remaining whole bytes in the page up
 	 * to @cnt.
 	 */
-	len = min_t(s64, cnt >> 3, PAGE_CACHE_SIZE - pos);
+	len = min_t(s64, cnt >> 3, PAGE_SIZE - pos);
 	memset(kaddr + pos, value ? 0xff : 0, len);
 	cnt -= len << 3;
 
@@ -132,7 +132,7 @@
 		 * Depending on @value, modify all remaining whole bytes in the
 		 * page up to @cnt.
 		 */
-		len = min_t(s64, cnt >> 3, PAGE_CACHE_SIZE);
+		len = min_t(s64, cnt >> 3, PAGE_SIZE);
 		memset(kaddr, value ? 0xff : 0, len);
 		cnt -= len << 3;
 	}

diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index f82498c..f2b5e74 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c

@@ -104,16 +104,12 @@
 	unsigned int kp_ofs;
 
 	ntfs_debug("Zeroing page region outside initialized size.");
-	if (((s64)page->index << PAGE_CACHE_SHIFT) >= initialized_size) {
-		/*
-		 * FIXME: Using clear_page() will become wrong when we get
-		 * PAGE_CACHE_SIZE != PAGE_SIZE but for now there is no problem.
-		 */
+	if (((s64)page->index << PAGE_SHIFT) >= initialized_size) {
 		clear_page(kp);
 		return;
 	}
-	kp_ofs = initialized_size & ~PAGE_CACHE_MASK;
-	memset(kp + kp_ofs, 0, PAGE_CACHE_SIZE - kp_ofs);
+	kp_ofs = initialized_size & ~PAGE_MASK;
+	memset(kp + kp_ofs, 0, PAGE_SIZE - kp_ofs);
 	return;
 }
 
@@ -123,7 +119,7 @@
 static inline void handle_bounds_compressed_page(struct page *page,
 		const loff_t i_size, const s64 initialized_size)
 {
-	if ((page->index >= (initialized_size >> PAGE_CACHE_SHIFT)) &&
+	if ((page->index >= (initialized_size >> PAGE_SHIFT)) &&
 			(initialized_size < i_size))
 		zero_partial_compressed_page(page, initialized_size);
 	return;
@@ -160,7 +156,7 @@
  * @xpage_done indicates whether the target page (@dest_pages[@xpage]) was
  * completed during the decompression of the compression block (@cb_start).
  *
- * Warning: This function *REQUIRES* PAGE_CACHE_SIZE >= 4096 or it will blow up
+ * Warning: This function *REQUIRES* PAGE_SIZE >= 4096 or it will blow up
  * unpredicatbly! You have been warned!
  *
  * Note to hackers: This function may not sleep until it has finished accessing
@@ -241,7 +237,7 @@
 				if (di == xpage)
 					*xpage_done = 1;
 				else
-					page_cache_release(dp);
+					put_page(dp);
 				dest_pages[di] = NULL;
 			}
 		}
@@ -274,7 +270,7 @@
 		cb = cb_sb_end;
 
 		/* Advance destination position to next sub-block. */
-		*dest_ofs = (*dest_ofs + NTFS_SB_SIZE) & ~PAGE_CACHE_MASK;
+		*dest_ofs = (*dest_ofs + NTFS_SB_SIZE) & ~PAGE_MASK;
 		if (!*dest_ofs && (++*dest_index > dest_max_index))
 			goto return_overflow;
 		goto do_next_sb;
@@ -301,7 +297,7 @@
 
 		/* Advance destination position to next sub-block. */
 		*dest_ofs += NTFS_SB_SIZE;
-		if (!(*dest_ofs &= ~PAGE_CACHE_MASK)) {
+		if (!(*dest_ofs &= ~PAGE_MASK)) {
 finalize_page:
 			/*
 			 * First stage: add current page index to array of
@@ -335,7 +331,7 @@
 			*dest_ofs += nr_bytes;
 		}
 		/* We have finished the current sub-block. */
-		if (!(*dest_ofs &= ~PAGE_CACHE_MASK))
+		if (!(*dest_ofs &= ~PAGE_MASK))
 			goto finalize_page;
 		goto do_next_sb;
 	}
@@ -462,7 +458,7 @@
  * have been written to so that we would lose data if we were to just overwrite
  * them with the out-of-date uncompressed data.
  *
- * FIXME: For PAGE_CACHE_SIZE > cb_size we are not doing the Right Thing(TM) at
+ * FIXME: For PAGE_SIZE > cb_size we are not doing the Right Thing(TM) at
  * the end of the file I think. We need to detect this case and zero the out
  * of bounds remainder of the page in question and mark it as handled. At the
  * moment we would just return -EIO on such a page. This bug will only become
@@ -470,7 +466,7 @@
  * clusters so is probably not going to be seen by anyone. Still this should
  * be fixed. (AIA)
  *
- * FIXME: Again for PAGE_CACHE_SIZE > cb_size we are screwing up both in
+ * FIXME: Again for PAGE_SIZE > cb_size we are screwing up both in
  * handling sparse and compressed cbs. (AIA)
  *
  * FIXME: At the moment we don't do any zeroing out in the case that
@@ -497,14 +493,14 @@
 	u64 cb_size_mask = cb_size - 1UL;
 	VCN vcn;
 	LCN lcn;
-	/* The first wanted vcn (minimum alignment is PAGE_CACHE_SIZE). */
-	VCN start_vcn = (((s64)index << PAGE_CACHE_SHIFT) & ~cb_size_mask) >>
+	/* The first wanted vcn (minimum alignment is PAGE_SIZE). */
+	VCN start_vcn = (((s64)index << PAGE_SHIFT) & ~cb_size_mask) >>
 			vol->cluster_size_bits;
 	/*
 	 * The first vcn after the last wanted vcn (minimum alignment is again
-	 * PAGE_CACHE_SIZE.
+	 * PAGE_SIZE.
 	 */
-	VCN end_vcn = ((((s64)(index + 1UL) << PAGE_CACHE_SHIFT) + cb_size - 1)
+	VCN end_vcn = ((((s64)(index + 1UL) << PAGE_SHIFT) + cb_size - 1)
 			& ~cb_size_mask) >> vol->cluster_size_bits;
 	/* Number of compression blocks (cbs) in the wanted vcn range. */
 	unsigned int nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits
@@ -515,7 +511,7 @@
 	 * guarantees of start_vcn and end_vcn, no need to round up here.
 	 */
 	unsigned int nr_pages = (end_vcn - start_vcn) <<
-			vol->cluster_size_bits >> PAGE_CACHE_SHIFT;
+			vol->cluster_size_bits >> PAGE_SHIFT;
 	unsigned int xpage, max_page, cur_page, cur_ofs, i;
 	unsigned int cb_clusters, cb_max_ofs;
 	int block, max_block, cb_max_page, bhs_size, nr_bhs, err = 0;
@@ -549,7 +545,7 @@
 	 * We have already been given one page, this is the one we must do.
 	 * Once again, the alignment guarantees keep it simple.
 	 */
-	offset = start_vcn << vol->cluster_size_bits >> PAGE_CACHE_SHIFT;
+	offset = start_vcn << vol->cluster_size_bits >> PAGE_SHIFT;
 	xpage = index - offset;
 	pages[xpage] = page;
 	/*
@@ -560,13 +556,13 @@
 	i_size = i_size_read(VFS_I(ni));
 	initialized_size = ni->initialized_size;
 	read_unlock_irqrestore(&ni->size_lock, flags);
-	max_page = ((i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
+	max_page = ((i_size + PAGE_SIZE - 1) >> PAGE_SHIFT) -
 			offset;
 	/* Is the page fully outside i_size? (truncate in progress) */
 	if (xpage >= max_page) {
 		kfree(bhs);
 		kfree(pages);
-		zero_user(page, 0, PAGE_CACHE_SIZE);
+		zero_user(page, 0, PAGE_SIZE);
 		ntfs_debug("Compressed read outside i_size - truncated?");
 		SetPageUptodate(page);
 		unlock_page(page);
@@ -591,7 +587,7 @@
 				continue;
 			}
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			pages[i] = NULL;
 		}
 	}
@@ -735,9 +731,9 @@
 	ntfs_debug("Successfully read the compression block.");
 
 	/* The last page and maximum offset within it for the current cb. */
-	cb_max_page = (cur_page << PAGE_CACHE_SHIFT) + cur_ofs + cb_size;
-	cb_max_ofs = cb_max_page & ~PAGE_CACHE_MASK;
-	cb_max_page >>= PAGE_CACHE_SHIFT;
+	cb_max_page = (cur_page << PAGE_SHIFT) + cur_ofs + cb_size;
+	cb_max_ofs = cb_max_page & ~PAGE_MASK;
+	cb_max_page >>= PAGE_SHIFT;
 
 	/* Catch end of file inside a compression block. */
 	if (cb_max_page > max_page)
@@ -753,16 +749,11 @@
 		for (; cur_page < cb_max_page; cur_page++) {
 			page = pages[cur_page];
 			if (page) {
-				/*
-				 * FIXME: Using clear_page() will become wrong
-				 * when we get PAGE_CACHE_SIZE != PAGE_SIZE but
-				 * for now there is no problem.
-				 */
 				if (likely(!cur_ofs))
 					clear_page(page_address(page));
 				else
 					memset(page_address(page) + cur_ofs, 0,
-							PAGE_CACHE_SIZE -
+							PAGE_SIZE -
 							cur_ofs);
 				flush_dcache_page(page);
 				kunmap(page);
@@ -771,10 +762,10 @@
 				if (cur_page == xpage)
 					xpage_done = 1;
 				else
-					page_cache_release(page);
+					put_page(page);
 				pages[cur_page] = NULL;
 			}
-			cb_pos += PAGE_CACHE_SIZE - cur_ofs;
+			cb_pos += PAGE_SIZE - cur_ofs;
 			cur_ofs = 0;
 			if (cb_pos >= cb_end)
 				break;
@@ -807,7 +798,7 @@
 		 * synchronous io for the majority of pages.
 		 * Or if we choose not to do the read-ahead/-behind stuff, we
 		 * could just return block_read_full_page(pages[xpage]) as long
-		 * as PAGE_CACHE_SIZE <= cb_size.
+		 * as PAGE_SIZE <= cb_size.
 		 */
 		if (cb_max_ofs)
 			cb_max_page--;
@@ -816,8 +807,8 @@
 			page = pages[cur_page];
 			if (page)
 				memcpy(page_address(page) + cur_ofs, cb_pos,
-						PAGE_CACHE_SIZE - cur_ofs);
-			cb_pos += PAGE_CACHE_SIZE - cur_ofs;
+						PAGE_SIZE - cur_ofs);
+			cb_pos += PAGE_SIZE - cur_ofs;
 			cur_ofs = 0;
 			if (cb_pos >= cb_end)
 				break;
@@ -850,10 +841,10 @@
 				if (cur2_page == xpage)
 					xpage_done = 1;
 				else
-					page_cache_release(page);
+					put_page(page);
 				pages[cur2_page] = NULL;
 			}
-			cb_pos2 += PAGE_CACHE_SIZE - cur_ofs2;
+			cb_pos2 += PAGE_SIZE - cur_ofs2;
 			cur_ofs2 = 0;
 			if (cb_pos2 >= cb_end)
 				break;
@@ -884,7 +875,7 @@
 					kunmap(page);
 					unlock_page(page);
 					if (prev_cur_page != xpage)
-						page_cache_release(page);
+						put_page(page);
 					pages[prev_cur_page] = NULL;
 				}
 			}
@@ -914,7 +905,7 @@
 			kunmap(page);
 			unlock_page(page);
 			if (cur_page != xpage)
-				page_cache_release(page);
+				put_page(page);
 			pages[cur_page] = NULL;
 		}
 	}
@@ -961,7 +952,7 @@
 			kunmap(page);
 			unlock_page(page);
 			if (i != xpage)
-				page_cache_release(page);
+				put_page(page);
 		}
 	}
 	kfree(pages);

diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index b2eff58..a186135 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c

@@ -315,11 +315,11 @@
 descend_into_child_node:
 	/*
 	 * Convert vcn to index into the index allocation attribute in units
-	 * of PAGE_CACHE_SIZE and map the page cache page, reading it from
+	 * of PAGE_SIZE and map the page cache page, reading it from
 	 * disk if necessary.
 	 */
 	page = ntfs_map_page(ia_mapping, vcn <<
-			dir_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT);
+			dir_ni->itype.index.vcn_size_bits >> PAGE_SHIFT);
 	if (IS_ERR(page)) {
 		ntfs_error(sb, "Failed to map directory index page, error %ld.",
 				-PTR_ERR(page));
@@ -331,9 +331,9 @@
 fast_descend_into_child_node:
 	/* Get to the index allocation block. */
 	ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
-			dir_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK));
+			dir_ni->itype.index.vcn_size_bits) & ~PAGE_MASK));
 	/* Bounds checks. */
-	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) {
+	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) {
 		ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
 				"inode 0x%lx or driver bug.", dir_ni->mft_no);
 		goto unm_err_out;
@@ -366,7 +366,7 @@
 		goto unm_err_out;
 	}
 	index_end = (u8*)ia + dir_ni->itype.index.block_size;
-	if (index_end > kaddr + PAGE_CACHE_SIZE) {
+	if (index_end > kaddr + PAGE_SIZE) {
 		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
 				"0x%lx crosses page boundary. Impossible! "
 				"Cannot access! This is probably a bug in the "
@@ -559,9 +559,9 @@
 			/* If vcn is in the same page cache page as old_vcn we
 			 * recycle the mapped page. */
 			if (old_vcn << vol->cluster_size_bits >>
-					PAGE_CACHE_SHIFT == vcn <<
+					PAGE_SHIFT == vcn <<
 					vol->cluster_size_bits >>
-					PAGE_CACHE_SHIFT)
+					PAGE_SHIFT)
 				goto fast_descend_into_child_node;
 			unlock_page(page);
 			ntfs_unmap_page(page);
@@ -793,11 +793,11 @@
 descend_into_child_node:
 	/*
 	 * Convert vcn to index into the index allocation attribute in units
-	 * of PAGE_CACHE_SIZE and map the page cache page, reading it from
+	 * of PAGE_SIZE and map the page cache page, reading it from
 	 * disk if necessary.
 	 */
 	page = ntfs_map_page(ia_mapping, vcn <<
-			dir_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT);
+			dir_ni->itype.index.vcn_size_bits >> PAGE_SHIFT);
 	if (IS_ERR(page)) {
 		ntfs_error(sb, "Failed to map directory index page, error %ld.",
 				-PTR_ERR(page));
@@ -809,9 +809,9 @@
 fast_descend_into_child_node:
 	/* Get to the index allocation block. */
 	ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
-			dir_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK));
+			dir_ni->itype.index.vcn_size_bits) & ~PAGE_MASK));
 	/* Bounds checks. */
-	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) {
+	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) {
 		ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
 				"inode 0x%lx or driver bug.", dir_ni->mft_no);
 		goto unm_err_out;
@@ -844,7 +844,7 @@
 		goto unm_err_out;
 	}
 	index_end = (u8*)ia + dir_ni->itype.index.block_size;
-	if (index_end > kaddr + PAGE_CACHE_SIZE) {
+	if (index_end > kaddr + PAGE_SIZE) {
 		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
 				"0x%lx crosses page boundary. Impossible! "
 				"Cannot access! This is probably a bug in the "
@@ -968,9 +968,9 @@
 			/* If vcn is in the same page cache page as old_vcn we
 			 * recycle the mapped page. */
 			if (old_vcn << vol->cluster_size_bits >>
-					PAGE_CACHE_SHIFT == vcn <<
+					PAGE_SHIFT == vcn <<
 					vol->cluster_size_bits >>
-					PAGE_CACHE_SHIFT)
+					PAGE_SHIFT)
 				goto fast_descend_into_child_node;
 			unlock_page(page);
 			ntfs_unmap_page(page);
@@ -1246,15 +1246,15 @@
 		goto iput_err_out;
 	}
 	/* Get the starting bit position in the current bitmap page. */
-	cur_bmp_pos = bmp_pos & ((PAGE_CACHE_SIZE * 8) - 1);
-	bmp_pos &= ~(u64)((PAGE_CACHE_SIZE * 8) - 1);
+	cur_bmp_pos = bmp_pos & ((PAGE_SIZE * 8) - 1);
+	bmp_pos &= ~(u64)((PAGE_SIZE * 8) - 1);
 get_next_bmp_page:
 	ntfs_debug("Reading bitmap with page index 0x%llx, bit ofs 0x%llx",
-			(unsigned long long)bmp_pos >> (3 + PAGE_CACHE_SHIFT),
+			(unsigned long long)bmp_pos >> (3 + PAGE_SHIFT),
 			(unsigned long long)bmp_pos &
-			(unsigned long long)((PAGE_CACHE_SIZE * 8) - 1));
+			(unsigned long long)((PAGE_SIZE * 8) - 1));
 	bmp_page = ntfs_map_page(bmp_mapping,
-			bmp_pos >> (3 + PAGE_CACHE_SHIFT));
+			bmp_pos >> (3 + PAGE_SHIFT));
 	if (IS_ERR(bmp_page)) {
 		ntfs_error(sb, "Reading index bitmap failed.");
 		err = PTR_ERR(bmp_page);
@@ -1270,9 +1270,9 @@
 		 * If we have reached the end of the bitmap page, get the next
 		 * page, and put away the old one.
 		 */
-		if (unlikely((cur_bmp_pos >> 3) >= PAGE_CACHE_SIZE)) {
+		if (unlikely((cur_bmp_pos >> 3) >= PAGE_SIZE)) {
 			ntfs_unmap_page(bmp_page);
-			bmp_pos += PAGE_CACHE_SIZE * 8;
+			bmp_pos += PAGE_SIZE * 8;
 			cur_bmp_pos = 0;
 			goto get_next_bmp_page;
 		}
@@ -1285,8 +1285,8 @@
 	ntfs_debug("Handling index buffer 0x%llx.",
 			(unsigned long long)bmp_pos + cur_bmp_pos);
 	/* If the current index buffer is in the same page we reuse the page. */
-	if ((prev_ia_pos & (s64)PAGE_CACHE_MASK) !=
-			(ia_pos & (s64)PAGE_CACHE_MASK)) {
+	if ((prev_ia_pos & (s64)PAGE_MASK) !=
+			(ia_pos & (s64)PAGE_MASK)) {
 		prev_ia_pos = ia_pos;
 		if (likely(ia_page != NULL)) {
 			unlock_page(ia_page);
@@ -1296,7 +1296,7 @@
 		 * Map the page cache page containing the current ia_pos,
 		 * reading it from disk if necessary.
 		 */
-		ia_page = ntfs_map_page(ia_mapping, ia_pos >> PAGE_CACHE_SHIFT);
+		ia_page = ntfs_map_page(ia_mapping, ia_pos >> PAGE_SHIFT);
 		if (IS_ERR(ia_page)) {
 			ntfs_error(sb, "Reading index allocation data failed.");
 			err = PTR_ERR(ia_page);
@@ -1307,10 +1307,10 @@
 		kaddr = (u8*)page_address(ia_page);
 	}
 	/* Get the current index buffer. */
-	ia = (INDEX_ALLOCATION*)(kaddr + (ia_pos & ~PAGE_CACHE_MASK &
-			~(s64)(ndir->itype.index.block_size - 1)));
+	ia = (INDEX_ALLOCATION*)(kaddr + (ia_pos & ~PAGE_MASK &
+					  ~(s64)(ndir->itype.index.block_size - 1)));
 	/* Bounds checks. */
-	if (unlikely((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE)) {
+	if (unlikely((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE)) {
 		ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
 				"inode 0x%lx or driver bug.", vdir->i_ino);
 		goto err_out;
@@ -1348,7 +1348,7 @@
 		goto err_out;
 	}
 	index_end = (u8*)ia + ndir->itype.index.block_size;
-	if (unlikely(index_end > kaddr + PAGE_CACHE_SIZE)) {
+	if (unlikely(index_end > kaddr + PAGE_SIZE)) {
 		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
 				"0x%lx crosses page boundary. Impossible! "
 				"Cannot access! This is probably a bug in the "

diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index bed4d42..91117ad 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c

@@ -220,8 +220,8 @@
 		m = NULL;
 	}
 	mapping = vi->i_mapping;
-	index = old_init_size >> PAGE_CACHE_SHIFT;
-	end_index = (new_init_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	index = old_init_size >> PAGE_SHIFT;
+	end_index = (new_init_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	do {
 		/*
 		 * Read the page.  If the page is not present, this will zero
@@ -233,7 +233,7 @@
 			goto init_err_out;
 		}
 		if (unlikely(PageError(page))) {
-			page_cache_release(page);
+			put_page(page);
 			err = -EIO;
 			goto init_err_out;
 		}
@@ -242,13 +242,13 @@
 		 * enough to make ntfs_writepage() work.
 		 */
 		write_lock_irqsave(&ni->size_lock, flags);
-		ni->initialized_size = (s64)(index + 1) << PAGE_CACHE_SHIFT;
+		ni->initialized_size = (s64)(index + 1) << PAGE_SHIFT;
 		if (ni->initialized_size > new_init_size)
 			ni->initialized_size = new_init_size;
 		write_unlock_irqrestore(&ni->size_lock, flags);
 		/* Set the page dirty so it gets written out. */
 		set_page_dirty(page);
-		page_cache_release(page);
+		put_page(page);
 		/*
 		 * Play nice with the vm and the rest of the system.  This is
 		 * very much needed as we can potentially be modifying the
@@ -543,7 +543,7 @@
 err_out:
 	while (nr > 0) {
 		unlock_page(pages[--nr]);
-		page_cache_release(pages[nr]);
+		put_page(pages[nr]);
 	}
 	goto out;
 }
@@ -573,7 +573,7 @@
  * only partially being written to.
  *
  * If @nr_pages is greater than one, we are guaranteed that the cluster size is
- * greater than PAGE_CACHE_SIZE, that all pages in @pages are entirely inside
+ * greater than PAGE_SIZE, that all pages in @pages are entirely inside
  * the same cluster and that they are the entirety of that cluster, and that
  * the cluster is sparse, i.e. we need to allocate a cluster to fill the hole.
  *
@@ -653,7 +653,7 @@
 	u = 0;
 do_next_page:
 	page = pages[u];
-	bh_pos = (s64)page->index << PAGE_CACHE_SHIFT;
+	bh_pos = (s64)page->index << PAGE_SHIFT;
 	bh = head = page_buffers(page);
 	do {
 		VCN cdelta;
@@ -810,11 +810,11 @@
 					
 				kaddr = kmap_atomic(page);
 				if (bh_pos < pos) {
-					pofs = bh_pos & ~PAGE_CACHE_MASK;
+					pofs = bh_pos & ~PAGE_MASK;
 					memset(kaddr + pofs, 0, pos - bh_pos);
 				}
 				if (bh_end > end) {
-					pofs = end & ~PAGE_CACHE_MASK;
+					pofs = end & ~PAGE_MASK;
 					memset(kaddr + pofs, 0, bh_end - end);
 				}
 				kunmap_atomic(kaddr);
@@ -942,7 +942,7 @@
 		 * unmapped.  This can only happen when the cluster size is
 		 * less than the page cache size.
 		 */
-		if (unlikely(vol->cluster_size < PAGE_CACHE_SIZE)) {
+		if (unlikely(vol->cluster_size < PAGE_SIZE)) {
 			bh_cend = (bh_end + vol->cluster_size - 1) >>
 					vol->cluster_size_bits;
 			if ((bh_cend <= cpos || bh_cpos >= cend)) {
@@ -1208,7 +1208,7 @@
 		wait_on_buffer(bh);
 		if (likely(buffer_uptodate(bh))) {
 			page = bh->b_page;
-			bh_pos = ((s64)page->index << PAGE_CACHE_SHIFT) +
+			bh_pos = ((s64)page->index << PAGE_SHIFT) +
 					bh_offset(bh);
 			/*
 			 * If the buffer overflows the initialized size, need
@@ -1350,7 +1350,7 @@
 		bh = head = page_buffers(page);
 		do {
 			if (u == nr_pages &&
-					((s64)page->index << PAGE_CACHE_SHIFT) +
+					((s64)page->index << PAGE_SHIFT) +
 					bh_offset(bh) >= end)
 				break;
 			if (!buffer_new(bh))
@@ -1422,7 +1422,7 @@
 		bool partial;
 
 		page = pages[u];
-		bh_pos = (s64)page->index << PAGE_CACHE_SHIFT;
+		bh_pos = (s64)page->index << PAGE_SHIFT;
 		bh = head = page_buffers(page);
 		partial = false;
 		do {
@@ -1639,7 +1639,7 @@
 		if (end < attr_len)
 			memcpy(kaddr + end, kattr + end, attr_len - end);
 		/* Zero the region outside the end of the attribute value. */
-		memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
+		memset(kaddr + attr_len, 0, PAGE_SIZE - attr_len);
 		flush_dcache_page(page);
 		SetPageUptodate(page);
 	}
@@ -1706,7 +1706,7 @@
 	unsigned len, copied;
 
 	do {
-		len = PAGE_CACHE_SIZE - ofs;
+		len = PAGE_SIZE - ofs;
 		if (len > bytes)
 			len = bytes;
 		copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs,
@@ -1724,14 +1724,14 @@
 	return total;
 err:
 	/* Zero the rest of the target like __copy_from_user(). */
-	len = PAGE_CACHE_SIZE - copied;
+	len = PAGE_SIZE - copied;
 	do {
 		if (len > bytes)
 			len = bytes;
 		zero_user(*pages, copied, len);
 		bytes -= len;
 		copied = 0;
-		len = PAGE_CACHE_SIZE;
+		len = PAGE_SIZE;
 	} while (++pages < last_page);
 	goto out;
 }
@@ -1787,8 +1787,8 @@
 	 * attributes.
 	 */
 	nr_pages = 1;
-	if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni))
-		nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT;
+	if (vol->cluster_size > PAGE_SIZE && NInoNonResident(ni))
+		nr_pages = vol->cluster_size >> PAGE_SHIFT;
 	last_vcn = -1;
 	do {
 		VCN vcn;
@@ -1796,9 +1796,9 @@
 		unsigned ofs, do_pages, u;
 		size_t copied;
 
-		start_idx = idx = pos >> PAGE_CACHE_SHIFT;
-		ofs = pos & ~PAGE_CACHE_MASK;
-		bytes = PAGE_CACHE_SIZE - ofs;
+		start_idx = idx = pos >> PAGE_SHIFT;
+		ofs = pos & ~PAGE_MASK;
+		bytes = PAGE_SIZE - ofs;
 		do_pages = 1;
 		if (nr_pages > 1) {
 			vcn = pos >> vol->cluster_size_bits;
@@ -1832,7 +1832,7 @@
 				if (lcn == LCN_HOLE) {
 					start_idx = (pos & ~(s64)
 							vol->cluster_size_mask)
-							>> PAGE_CACHE_SHIFT;
+							>> PAGE_SHIFT;
 					bytes = vol->cluster_size - (pos &
 							vol->cluster_size_mask);
 					do_pages = nr_pages;
@@ -1871,12 +1871,12 @@
 			if (unlikely(status)) {
 				do {
 					unlock_page(pages[--do_pages]);
-					page_cache_release(pages[do_pages]);
+					put_page(pages[do_pages]);
 				} while (do_pages);
 				break;
 			}
 		}
-		u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index;
+		u = (pos >> PAGE_SHIFT) - pages[0]->index;
 		copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs,
 					i, bytes);
 		ntfs_flush_dcache_pages(pages + u, do_pages - u);
@@ -1889,7 +1889,7 @@
 		}
 		do {
 			unlock_page(pages[--do_pages]);
-			page_cache_release(pages[do_pages]);
+			put_page(pages[do_pages]);
 		} while (do_pages);
 		if (unlikely(status < 0))
 			break;
@@ -1921,7 +1921,7 @@
 		}
 	} while (iov_iter_count(i));
 	if (cached_page)
-		page_cache_release(cached_page);
+		put_page(cached_page);
 	ntfs_debug("Done.  Returning %s (written 0x%lx, status %li).",
 			written ? "written" : "status", (unsigned long)written,
 			(long)status);

diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
index 096c135..0d645f3 100644
--- a/fs/ntfs/index.c
+++ b/fs/ntfs/index.c

@@ -272,11 +272,11 @@
 descend_into_child_node:
 	/*
 	 * Convert vcn to index into the index allocation attribute in units
-	 * of PAGE_CACHE_SIZE and map the page cache page, reading it from
+	 * of PAGE_SIZE and map the page cache page, reading it from
 	 * disk if necessary.
 	 */
 	page = ntfs_map_page(ia_mapping, vcn <<
-			idx_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT);
+			idx_ni->itype.index.vcn_size_bits >> PAGE_SHIFT);
 	if (IS_ERR(page)) {
 		ntfs_error(sb, "Failed to map index page, error %ld.",
 				-PTR_ERR(page));
@@ -288,9 +288,9 @@
 fast_descend_into_child_node:
 	/* Get to the index allocation block. */
 	ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
-			idx_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK));
+			idx_ni->itype.index.vcn_size_bits) & ~PAGE_MASK));
 	/* Bounds checks. */
-	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) {
+	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) {
 		ntfs_error(sb, "Out of bounds check failed.  Corrupt inode "
 				"0x%lx or driver bug.", idx_ni->mft_no);
 		goto unm_err_out;
@@ -323,7 +323,7 @@
 		goto unm_err_out;
 	}
 	index_end = (u8*)ia + idx_ni->itype.index.block_size;
-	if (index_end > kaddr + PAGE_CACHE_SIZE) {
+	if (index_end > kaddr + PAGE_SIZE) {
 		ntfs_error(sb, "Index buffer (VCN 0x%llx) of inode 0x%lx "
 				"crosses page boundary.  Impossible!  Cannot "
 				"access!  This is probably a bug in the "
@@ -427,9 +427,9 @@
 		 * the mapped page.
 		 */
 		if (old_vcn << vol->cluster_size_bits >>
-				PAGE_CACHE_SHIFT == vcn <<
+				PAGE_SHIFT == vcn <<
 				vol->cluster_size_bits >>
-				PAGE_CACHE_SHIFT)
+				PAGE_SHIFT)
 			goto fast_descend_into_child_node;
 		unlock_page(page);
 		ntfs_unmap_page(page);

diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index d284f07..f40972d 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c

@@ -868,12 +868,12 @@
 					ni->itype.index.block_size);
 			goto unm_err_out;
 		}
-		if (ni->itype.index.block_size > PAGE_CACHE_SIZE) {
+		if (ni->itype.index.block_size > PAGE_SIZE) {
 			ntfs_error(vi->i_sb, "Index block size (%u) > "
-					"PAGE_CACHE_SIZE (%ld) is not "
+					"PAGE_SIZE (%ld) is not "
 					"supported.  Sorry.",
 					ni->itype.index.block_size,
-					PAGE_CACHE_SIZE);
+					PAGE_SIZE);
 			err = -EOPNOTSUPP;
 			goto unm_err_out;
 		}
@@ -1585,10 +1585,10 @@
 				"two.", ni->itype.index.block_size);
 		goto unm_err_out;
 	}
-	if (ni->itype.index.block_size > PAGE_CACHE_SIZE) {
-		ntfs_error(vi->i_sb, "Index block size (%u) > PAGE_CACHE_SIZE "
+	if (ni->itype.index.block_size > PAGE_SIZE) {
+		ntfs_error(vi->i_sb, "Index block size (%u) > PAGE_SIZE "
 				"(%ld) is not supported.  Sorry.",
-				ni->itype.index.block_size, PAGE_CACHE_SIZE);
+				ni->itype.index.block_size, PAGE_SIZE);
 		err = -EOPNOTSUPP;
 		goto unm_err_out;
 	}

diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index 1711b71..27a24a4 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c

@@ -283,15 +283,15 @@
 			ntfs_unmap_page(page);
 		}
 		page = ntfs_map_page(mapping, last_read_pos >>
-				PAGE_CACHE_SHIFT);
+				PAGE_SHIFT);
 		if (IS_ERR(page)) {
 			err = PTR_ERR(page);
 			ntfs_error(vol->sb, "Failed to map page.");
 			goto out;
 		}
-		buf_size = last_read_pos & ~PAGE_CACHE_MASK;
+		buf_size = last_read_pos & ~PAGE_MASK;
 		buf = page_address(page) + buf_size;
-		buf_size = PAGE_CACHE_SIZE - buf_size;
+		buf_size = PAGE_SIZE - buf_size;
 		if (unlikely(last_read_pos + buf_size > i_size))
 			buf_size = i_size - last_read_pos;
 		buf_size <<= 3;

diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index c71de29..9d71213 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c

@@ -381,7 +381,7 @@
 	 * completely inside @rp, just copy it from there.  Otherwise map all
 	 * the required pages and copy the data from them.
 	 */
-	size = PAGE_CACHE_SIZE - (pos & ~PAGE_CACHE_MASK);
+	size = PAGE_SIZE - (pos & ~PAGE_MASK);
 	if (size >= le32_to_cpu(rp->system_page_size)) {
 		memcpy(trp, rp, le32_to_cpu(rp->system_page_size));
 	} else {
@@ -394,8 +394,8 @@
 		/* Copy the remaining data one page at a time. */
 		have_read = size;
 		to_read = le32_to_cpu(rp->system_page_size) - size;
-		idx = (pos + size) >> PAGE_CACHE_SHIFT;
-		BUG_ON((pos + size) & ~PAGE_CACHE_MASK);
+		idx = (pos + size) >> PAGE_SHIFT;
+		BUG_ON((pos + size) & ~PAGE_MASK);
 		do {
 			page = ntfs_map_page(vi->i_mapping, idx);
 			if (IS_ERR(page)) {
@@ -406,7 +406,7 @@
 					err = -EIO;
 				goto err_out;
 			}
-			size = min_t(int, to_read, PAGE_CACHE_SIZE);
+			size = min_t(int, to_read, PAGE_SIZE);
 			memcpy((u8*)trp + have_read, page_address(page), size);
 			ntfs_unmap_page(page);
 			have_read += size;
@@ -509,11 +509,11 @@
 	 * log page size if the page cache size is between the default log page
 	 * size and twice that.
 	 */
-	if (PAGE_CACHE_SIZE >= DefaultLogPageSize && PAGE_CACHE_SIZE <=
+	if (PAGE_SIZE >= DefaultLogPageSize && PAGE_SIZE <=
 			DefaultLogPageSize * 2)
 		log_page_size = DefaultLogPageSize;
 	else
-		log_page_size = PAGE_CACHE_SIZE;
+		log_page_size = PAGE_SIZE;
 	log_page_mask = log_page_size - 1;
 	/*
 	 * Use ntfs_ffs() instead of ffs() to enable the compiler to
@@ -539,7 +539,7 @@
 	 * to be empty.
 	 */
 	for (pos = 0; pos < size; pos <<= 1) {
-		pgoff_t idx = pos >> PAGE_CACHE_SHIFT;
+		pgoff_t idx = pos >> PAGE_SHIFT;
 		if (!page || page->index != idx) {
 			if (page)
 				ntfs_unmap_page(page);
@@ -550,7 +550,7 @@
 				goto err_out;
 			}
 		}
-		kaddr = (u8*)page_address(page) + (pos & ~PAGE_CACHE_MASK);
+		kaddr = (u8*)page_address(page) + (pos & ~PAGE_MASK);
 		/*
 		 * A non-empty block means the logfile is not empty while an
 		 * empty block after a non-empty block has been encountered

diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 3014a36..37b2501 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c

@@ -61,16 +61,16 @@
 	 * here if the volume was that big...
 	 */
 	index = (u64)ni->mft_no << vol->mft_record_size_bits >>
-			PAGE_CACHE_SHIFT;
-	ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
+			PAGE_SHIFT;
+	ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_MASK;
 
 	i_size = i_size_read(mft_vi);
 	/* The maximum valid index into the page cache for $MFT's data. */
-	end_index = i_size >> PAGE_CACHE_SHIFT;
+	end_index = i_size >> PAGE_SHIFT;
 
 	/* If the wanted index is out of bounds the mft record doesn't exist. */
 	if (unlikely(index >= end_index)) {
-		if (index > end_index || (i_size & ~PAGE_CACHE_MASK) < ofs +
+		if (index > end_index || (i_size & ~PAGE_MASK) < ofs +
 				vol->mft_record_size) {
 			page = ERR_PTR(-ENOENT);
 			ntfs_error(vol->sb, "Attempt to read mft record 0x%lx, "
@@ -487,7 +487,7 @@
 	}
 	/* Get the page containing the mirror copy of the mft record @m. */
 	page = ntfs_map_page(vol->mftmirr_ino->i_mapping, mft_no >>
-			(PAGE_CACHE_SHIFT - vol->mft_record_size_bits));
+			(PAGE_SHIFT - vol->mft_record_size_bits));
 	if (IS_ERR(page)) {
 		ntfs_error(vol->sb, "Failed to map mft mirror page.");
 		err = PTR_ERR(page);
@@ -497,7 +497,7 @@
 	BUG_ON(!PageUptodate(page));
 	ClearPageUptodate(page);
 	/* Offset of the mft mirror record inside the page. */
-	page_ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
+	page_ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_MASK;
 	/* The address in the page of the mirror copy of the mft record @m. */
 	kmirr = page_address(page) + page_ofs;
 	/* Copy the mst protected mft record to the mirror. */
@@ -1178,8 +1178,8 @@
 	for (; pass <= 2;) {
 		/* Cap size to pass_end. */
 		ofs = data_pos >> 3;
-		page_ofs = ofs & ~PAGE_CACHE_MASK;
-		size = PAGE_CACHE_SIZE - page_ofs;
+		page_ofs = ofs & ~PAGE_MASK;
+		size = PAGE_SIZE - page_ofs;
 		ll = ((pass_end + 7) >> 3) - ofs;
 		if (size > ll)
 			size = ll;
@@ -1190,7 +1190,7 @@
 		 */
 		if (size) {
 			page = ntfs_map_page(mftbmp_mapping,
-					ofs >> PAGE_CACHE_SHIFT);
+					ofs >> PAGE_SHIFT);
 			if (IS_ERR(page)) {
 				ntfs_error(vol->sb, "Failed to read mft "
 						"bitmap, aborting.");
@@ -1328,13 +1328,13 @@
 	 */
 	ll = lcn >> 3;
 	page = ntfs_map_page(vol->lcnbmp_ino->i_mapping,
-			ll >> PAGE_CACHE_SHIFT);
+			ll >> PAGE_SHIFT);
 	if (IS_ERR(page)) {
 		up_write(&mftbmp_ni->runlist.lock);
 		ntfs_error(vol->sb, "Failed to read from lcn bitmap.");
 		return PTR_ERR(page);
 	}
-	b = (u8*)page_address(page) + (ll & ~PAGE_CACHE_MASK);
+	b = (u8*)page_address(page) + (ll & ~PAGE_MASK);
 	tb = 1 << (lcn & 7ull);
 	down_write(&vol->lcnbmp_lock);
 	if (*b != 0xff && !(*b & tb)) {
@@ -2103,14 +2103,14 @@
 	 * The index into the page cache and the offset within the page cache
 	 * page of the wanted mft record.
 	 */
-	index = mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT;
-	ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
+	index = mft_no << vol->mft_record_size_bits >> PAGE_SHIFT;
+	ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_MASK;
 	/* The maximum valid index into the page cache for $MFT's data. */
 	i_size = i_size_read(mft_vi);
-	end_index = i_size >> PAGE_CACHE_SHIFT;
+	end_index = i_size >> PAGE_SHIFT;
 	if (unlikely(index >= end_index)) {
 		if (unlikely(index > end_index || ofs + vol->mft_record_size >=
-				(i_size & ~PAGE_CACHE_MASK))) {
+				(i_size & ~PAGE_MASK))) {
 			ntfs_error(vol->sb, "Tried to format non-existing mft "
 					"record 0x%llx.", (long long)mft_no);
 			return -ENOENT;
@@ -2515,8 +2515,8 @@
 	 * We now have allocated and initialized the mft record.  Calculate the
 	 * index of and the offset within the page cache page the record is in.
 	 */
-	index = bit << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT;
-	ofs = (bit << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
+	index = bit << vol->mft_record_size_bits >> PAGE_SHIFT;
+	ofs = (bit << vol->mft_record_size_bits) & ~PAGE_MASK;
 	/* Read, map, and pin the page containing the mft record. */
 	page = ntfs_map_page(vol->mft_ino->i_mapping, index);
 	if (IS_ERR(page)) {

diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index c581e26..12de47b 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h

@@ -43,7 +43,7 @@
 	NTFS_MAX_NAME_LEN	= 255,
 	NTFS_MAX_ATTR_NAME_LEN	= 255,
 	NTFS_MAX_CLUSTER_SIZE	= 64 * 1024,	/* 64kiB */
-	NTFS_MAX_PAGES_PER_CLUSTER = NTFS_MAX_CLUSTER_SIZE / PAGE_CACHE_SIZE,
+	NTFS_MAX_PAGES_PER_CLUSTER = NTFS_MAX_CLUSTER_SIZE / PAGE_SIZE,
 } NTFS_CONSTANTS;
 
 /* Global variables. */

diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 1b38abd..ecb4987 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c

@@ -823,14 +823,14 @@
 	ntfs_debug("vol->mft_record_size_bits = %i (0x%x)",
 			vol->mft_record_size_bits, vol->mft_record_size_bits);
 	/*
-	 * We cannot support mft record sizes above the PAGE_CACHE_SIZE since
+	 * We cannot support mft record sizes above the PAGE_SIZE since
 	 * we store $MFT/$DATA, the table of mft records in the page cache.
 	 */
-	if (vol->mft_record_size > PAGE_CACHE_SIZE) {
+	if (vol->mft_record_size > PAGE_SIZE) {
 		ntfs_error(vol->sb, "Mft record size (%i) exceeds the "
-				"PAGE_CACHE_SIZE on your system (%lu).  "
+				"PAGE_SIZE on your system (%lu).  "
 				"This is not supported.  Sorry.",
-				vol->mft_record_size, PAGE_CACHE_SIZE);
+				vol->mft_record_size, PAGE_SIZE);
 		return false;
 	}
 	/* We cannot support mft record sizes below the sector size. */
@@ -1096,7 +1096,7 @@
 
 	ntfs_debug("Entering.");
 	/* Compare contents of $MFT and $MFTMirr. */
-	mrecs_per_page = PAGE_CACHE_SIZE / vol->mft_record_size;
+	mrecs_per_page = PAGE_SIZE / vol->mft_record_size;
 	BUG_ON(!mrecs_per_page);
 	BUG_ON(!vol->mftmirr_size);
 	mft_page = mirr_page = NULL;
@@ -1615,20 +1615,20 @@
 	if (!vol->attrdef)
 		goto iput_failed;
 	index = 0;
-	max_index = i_size >> PAGE_CACHE_SHIFT;
-	size = PAGE_CACHE_SIZE;
+	max_index = i_size >> PAGE_SHIFT;
+	size = PAGE_SIZE;
 	while (index < max_index) {
 		/* Read the attrdef table and copy it into the linear buffer. */
 read_partial_attrdef_page:
 		page = ntfs_map_page(ino->i_mapping, index);
 		if (IS_ERR(page))
 			goto free_iput_failed;
-		memcpy((u8*)vol->attrdef + (index++ << PAGE_CACHE_SHIFT),
+		memcpy((u8*)vol->attrdef + (index++ << PAGE_SHIFT),
 				page_address(page), size);
 		ntfs_unmap_page(page);
 	};
-	if (size == PAGE_CACHE_SIZE) {
-		size = i_size & ~PAGE_CACHE_MASK;
+	if (size == PAGE_SIZE) {
+		size = i_size & ~PAGE_MASK;
 		if (size)
 			goto read_partial_attrdef_page;
 	}
@@ -1684,20 +1684,20 @@
 	if (!vol->upcase)
 		goto iput_upcase_failed;
 	index = 0;
-	max_index = i_size >> PAGE_CACHE_SHIFT;
-	size = PAGE_CACHE_SIZE;
+	max_index = i_size >> PAGE_SHIFT;
+	size = PAGE_SIZE;
 	while (index < max_index) {
 		/* Read the upcase table and copy it into the linear buffer. */
 read_partial_upcase_page:
 		page = ntfs_map_page(ino->i_mapping, index);
 		if (IS_ERR(page))
 			goto iput_upcase_failed;
-		memcpy((char*)vol->upcase + (index++ << PAGE_CACHE_SHIFT),
+		memcpy((char*)vol->upcase + (index++ << PAGE_SHIFT),
 				page_address(page), size);
 		ntfs_unmap_page(page);
 	};
-	if (size == PAGE_CACHE_SIZE) {
-		size = i_size & ~PAGE_CACHE_MASK;
+	if (size == PAGE_SIZE) {
+		size = i_size & ~PAGE_MASK;
 		if (size)
 			goto read_partial_upcase_page;
 	}
@@ -2471,14 +2471,14 @@
 	down_read(&vol->lcnbmp_lock);
 	/*
 	 * Convert the number of bits into bytes rounded up, then convert into
-	 * multiples of PAGE_CACHE_SIZE, rounding up so that if we have one
+	 * multiples of PAGE_SIZE, rounding up so that if we have one
 	 * full and one partial page max_index = 2.
 	 */
-	max_index = (((vol->nr_clusters + 7) >> 3) + PAGE_CACHE_SIZE - 1) >>
-			PAGE_CACHE_SHIFT;
-	/* Use multiples of 4 bytes, thus max_size is PAGE_CACHE_SIZE / 4. */
+	max_index = (((vol->nr_clusters + 7) >> 3) + PAGE_SIZE - 1) >>
+			PAGE_SHIFT;
+	/* Use multiples of 4 bytes, thus max_size is PAGE_SIZE / 4. */
 	ntfs_debug("Reading $Bitmap, max_index = 0x%lx, max_size = 0x%lx.",
-			max_index, PAGE_CACHE_SIZE / 4);
+			max_index, PAGE_SIZE / 4);
 	for (index = 0; index < max_index; index++) {
 		unsigned long *kaddr;
 
@@ -2491,7 +2491,7 @@
 		if (IS_ERR(page)) {
 			ntfs_debug("read_mapping_page() error. Skipping "
 					"page (index 0x%lx).", index);
-			nr_free -= PAGE_CACHE_SIZE * 8;
+			nr_free -= PAGE_SIZE * 8;
 			continue;
 		}
 		kaddr = kmap_atomic(page);
@@ -2503,9 +2503,9 @@
 		 * ntfs_readpage().
 		 */
 		nr_free -= bitmap_weight(kaddr,
-					PAGE_CACHE_SIZE * BITS_PER_BYTE);
+					PAGE_SIZE * BITS_PER_BYTE);
 		kunmap_atomic(kaddr);
-		page_cache_release(page);
+		put_page(page);
 	}
 	ntfs_debug("Finished reading $Bitmap, last index = 0x%lx.", index - 1);
 	/*
@@ -2547,9 +2547,9 @@
 	pgoff_t index;
 
 	ntfs_debug("Entering.");
-	/* Use multiples of 4 bytes, thus max_size is PAGE_CACHE_SIZE / 4. */
+	/* Use multiples of 4 bytes, thus max_size is PAGE_SIZE / 4. */
 	ntfs_debug("Reading $MFT/$BITMAP, max_index = 0x%lx, max_size = "
-			"0x%lx.", max_index, PAGE_CACHE_SIZE / 4);
+			"0x%lx.", max_index, PAGE_SIZE / 4);
 	for (index = 0; index < max_index; index++) {
 		unsigned long *kaddr;
 
@@ -2562,7 +2562,7 @@
 		if (IS_ERR(page)) {
 			ntfs_debug("read_mapping_page() error. Skipping "
 					"page (index 0x%lx).", index);
-			nr_free -= PAGE_CACHE_SIZE * 8;
+			nr_free -= PAGE_SIZE * 8;
 			continue;
 		}
 		kaddr = kmap_atomic(page);
@@ -2574,9 +2574,9 @@
 		 * ntfs_readpage().
 		 */
 		nr_free -= bitmap_weight(kaddr,
-					PAGE_CACHE_SIZE * BITS_PER_BYTE);
+					PAGE_SIZE * BITS_PER_BYTE);
 		kunmap_atomic(kaddr);
-		page_cache_release(page);
+		put_page(page);
 	}
 	ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.",
 			index - 1);
@@ -2618,17 +2618,17 @@
 	/* Type of filesystem. */
 	sfs->f_type   = NTFS_SB_MAGIC;
 	/* Optimal transfer block size. */
-	sfs->f_bsize  = PAGE_CACHE_SIZE;
+	sfs->f_bsize  = PAGE_SIZE;
 	/*
 	 * Total data blocks in filesystem in units of f_bsize and since
 	 * inodes are also stored in data blocs ($MFT is a file) this is just
 	 * the total clusters.
 	 */
 	sfs->f_blocks = vol->nr_clusters << vol->cluster_size_bits >>
-				PAGE_CACHE_SHIFT;
+				PAGE_SHIFT;
 	/* Free data blocks in filesystem in units of f_bsize. */
 	size	      = get_nr_free_clusters(vol) << vol->cluster_size_bits >>
-				PAGE_CACHE_SHIFT;
+				PAGE_SHIFT;
 	if (size < 0LL)
 		size = 0LL;
 	/* Free blocks avail to non-superuser, same as above on NTFS. */
@@ -2639,11 +2639,11 @@
 	size = i_size_read(vol->mft_ino) >> vol->mft_record_size_bits;
 	/*
 	 * Convert the maximum number of set bits into bytes rounded up, then
-	 * convert into multiples of PAGE_CACHE_SIZE, rounding up so that if we
+	 * convert into multiples of PAGE_SIZE, rounding up so that if we
 	 * have one full and one partial page max_index = 2.
 	 */
 	max_index = ((((mft_ni->initialized_size >> vol->mft_record_size_bits)
-			+ 7) >> 3) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+			+ 7) >> 3) + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	read_unlock_irqrestore(&mft_ni->size_lock, flags);
 	/* Number of inodes in filesystem (at this point in time). */
 	sfs->f_files = size;
@@ -2765,15 +2765,15 @@
 	if (!parse_options(vol, (char*)opt))
 		goto err_out_now;
 
-	/* We support sector sizes up to the PAGE_CACHE_SIZE. */
-	if (bdev_logical_block_size(sb->s_bdev) > PAGE_CACHE_SIZE) {
+	/* We support sector sizes up to the PAGE_SIZE. */
+	if (bdev_logical_block_size(sb->s_bdev) > PAGE_SIZE) {
 		if (!silent)
 			ntfs_error(sb, "Device has unsupported sector size "
 					"(%i).  The maximum supported sector "
 					"size on this architecture is %lu "
 					"bytes.",
 					bdev_logical_block_size(sb->s_bdev),
-					PAGE_CACHE_SIZE);
+					PAGE_SIZE);
 		goto err_out_now;
 	}
 	/*

diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index ce210d4..e27e652 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile

@@ -41,7 +41,8 @@
 	quota_local.o		\
 	quota_global.o		\
 	xattr.o			\
-	acl.o
+	acl.o	\
+	filecheck.o
 
 ocfs2_stackglue-objs := stackglue.o
 ocfs2_stack_o2cb-objs := stack_o2cb.o

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index d002579..e361d1a 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c

@@ -2516,21 +2516,6 @@
 	struct ocfs2_extent_block *eb;
 	u32 range;
 
-	/*
-	 * In normal tree rotation process, we will never touch the
-	 * tree branch above subtree_index and ocfs2_extend_rotate_transaction
-	 * doesn't reserve the credits for them either.
-	 *
-	 * But we do have a special case here which will update the rightmost
-	 * records for all the bh in the path.
-	 * So we have to allocate extra credits and access them.
-	 */
-	ret = ocfs2_extend_trans(handle, subtree_index);
-	if (ret) {
-		mlog_errno(ret);
-		goto out;
-	}
-
 	ret = ocfs2_journal_access_path(et->et_ci, handle, path);
 	if (ret) {
 		mlog_errno(ret);
@@ -2956,7 +2941,7 @@
 		     right_path->p_node[subtree_root].bh->b_blocknr,
 		     right_path->p_tree_depth);
 
-		ret = ocfs2_extend_rotate_transaction(handle, subtree_root,
+		ret = ocfs2_extend_rotate_transaction(handle, 0,
 						      orig_credits, left_path);
 		if (ret) {
 			mlog_errno(ret);
@@ -3029,21 +3014,9 @@
 	struct ocfs2_extent_block *eb;
 	struct ocfs2_extent_list *el;
 
-
 	ret = ocfs2_et_sanity_check(et);
 	if (ret)
 		goto out;
-	/*
-	 * There's two ways we handle this depending on
-	 * whether path is the only existing one.
-	 */
-	ret = ocfs2_extend_rotate_transaction(handle, 0,
-					      handle->h_buffer_credits,
-					      path);
-	if (ret) {
-		mlog_errno(ret);
-		goto out;
-	}
 
 	ret = ocfs2_journal_access_path(et->et_ci, handle, path);
 	if (ret) {
@@ -3641,6 +3614,14 @@
 		 */
 		if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 &&
 		    le16_to_cpu(el->l_next_free_rec) == 1) {
+			/* extend credit for ocfs2_remove_rightmost_path */
+			ret = ocfs2_extend_rotate_transaction(handle, 0,
+					handle->h_buffer_credits,
+					right_path);
+			if (ret) {
+				mlog_errno(ret);
+				goto out;
+			}
 
 			ret = ocfs2_remove_rightmost_path(handle, et,
 							  right_path,
@@ -3679,6 +3660,14 @@
 	BUG_ON(ctxt->c_contig_type == CONTIG_NONE);
 
 	if (ctxt->c_split_covers_rec && ctxt->c_has_empty_extent) {
+		/* extend credit for ocfs2_remove_rightmost_path */
+		ret = ocfs2_extend_rotate_transaction(handle, 0,
+				handle->h_buffer_credits,
+				path);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
 		/*
 		 * The merge code will need to create an empty
 		 * extent to take the place of the newly
@@ -3727,6 +3716,15 @@
 		 */
 		BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
 
+		/* extend credit for ocfs2_remove_rightmost_path */
+		ret = ocfs2_extend_rotate_transaction(handle, 0,
+					handle->h_buffer_credits,
+					path);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
 		/* The merge left us with an empty extent, remove it. */
 		ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
 		if (ret) {
@@ -3748,6 +3746,15 @@
 			goto out;
 		}
 
+		/* extend credit for ocfs2_remove_rightmost_path */
+		ret = ocfs2_extend_rotate_transaction(handle, 0,
+				handle->h_buffer_credits,
+				path);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
 		ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
 		/*
 		 * Error from this last rotate is not critical, so
@@ -3783,6 +3790,16 @@
 		}
 
 		if (ctxt->c_split_covers_rec) {
+			/* extend credit for ocfs2_remove_rightmost_path */
+			ret = ocfs2_extend_rotate_transaction(handle, 0,
+					handle->h_buffer_credits,
+					path);
+			if (ret) {
+				mlog_errno(ret);
+				ret = 0;
+				goto out;
+			}
+
 			/*
 			 * The merge may have left an empty extent in
 			 * our leaf. Try to rotate it away.
@@ -5342,6 +5359,15 @@
 	struct ocfs2_extent_block *eb;
 
 	if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) {
+		/* extend credit for ocfs2_remove_rightmost_path */
+		ret = ocfs2_extend_rotate_transaction(handle, 0,
+				handle->h_buffer_credits,
+				path);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
 		ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
 		if (ret) {
 			mlog_errno(ret);
@@ -5928,16 +5954,6 @@
 
 		ocfs2_journal_dirty(handle, tl_bh);
 
-		/* TODO: Perhaps we can calculate the bulk of the
-		 * credits up front rather than extending like
-		 * this. */
-		status = ocfs2_extend_trans(handle,
-					    OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
-		if (status < 0) {
-			mlog_errno(status);
-			goto bail;
-		}
-
 		rec = tl->tl_recs[i];
 		start_blk = ocfs2_clusters_to_blocks(data_alloc_inode->i_sb,
 						    le32_to_cpu(rec.t_start));
@@ -5958,6 +5974,13 @@
 				goto bail;
 			}
 		}
+
+		status = ocfs2_extend_trans(handle,
+				OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
+		if (status < 0) {
+			mlog_errno(status);
+			goto bail;
+		}
 		i--;
 	}
 
@@ -6016,7 +6039,7 @@
 		goto out_mutex;
 	}
 
-	handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE);
+	handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		mlog_errno(status);
@@ -6079,7 +6102,7 @@
 		if (cancel)
 			cancel_delayed_work(&osb->osb_truncate_log_wq);
 
-		queue_delayed_work(ocfs2_wq, &osb->osb_truncate_log_wq,
+		queue_delayed_work(osb->ocfs2_wq, &osb->osb_truncate_log_wq,
 				   OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL);
 	}
 }
@@ -6253,7 +6276,7 @@
 
 	if (tl_inode) {
 		cancel_delayed_work(&osb->osb_truncate_log_wq);
-		flush_workqueue(ocfs2_wq);
+		flush_workqueue(osb->ocfs2_wq);
 
 		status = ocfs2_flush_truncate_log(osb);
 		if (status < 0)
@@ -6648,7 +6671,7 @@
 {
 	int i;
 	struct page *page;
-	unsigned int from, to = PAGE_CACHE_SIZE;
+	unsigned int from, to = PAGE_SIZE;
 	struct super_block *sb = inode->i_sb;
 
 	BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb)));
@@ -6656,21 +6679,21 @@
 	if (numpages == 0)
 		goto out;
 
-	to = PAGE_CACHE_SIZE;
+	to = PAGE_SIZE;
 	for(i = 0; i < numpages; i++) {
 		page = pages[i];
 
-		from = start & (PAGE_CACHE_SIZE - 1);
-		if ((end >> PAGE_CACHE_SHIFT) == page->index)
-			to = end & (PAGE_CACHE_SIZE - 1);
+		from = start & (PAGE_SIZE - 1);
+		if ((end >> PAGE_SHIFT) == page->index)
+			to = end & (PAGE_SIZE - 1);
 
-		BUG_ON(from > PAGE_CACHE_SIZE);
-		BUG_ON(to > PAGE_CACHE_SIZE);
+		BUG_ON(from > PAGE_SIZE);
+		BUG_ON(to > PAGE_SIZE);
 
 		ocfs2_map_and_dirty_page(inode, handle, from, to, page, 1,
 					 &phys);
 
-		start = (page->index + 1) << PAGE_CACHE_SHIFT;
+		start = (page->index + 1) << PAGE_SHIFT;
 	}
 out:
 	if (pages)
@@ -6689,7 +6712,7 @@
 
 	numpages = 0;
 	last_page_bytes = PAGE_ALIGN(end);
-	index = start >> PAGE_CACHE_SHIFT;
+	index = start >> PAGE_SHIFT;
 	do {
 		pages[numpages] = find_or_create_page(mapping, index, GFP_NOFS);
 		if (!pages[numpages]) {
@@ -6700,7 +6723,7 @@
 
 		numpages++;
 		index++;
-	} while (index < (last_page_bytes >> PAGE_CACHE_SHIFT));
+	} while (index < (last_page_bytes >> PAGE_SHIFT));
 
 out:
 	if (ret != 0) {
@@ -6927,8 +6950,8 @@
 		 * to do that now.
 		 */
 		if (!ocfs2_sparse_alloc(osb) &&
-		    PAGE_CACHE_SIZE < osb->s_clustersize)
-			end = PAGE_CACHE_SIZE;
+		    PAGE_SIZE < osb->s_clustersize)
+			end = PAGE_SIZE;
 
 		ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
 		if (ret) {
@@ -6948,8 +6971,8 @@
 			goto out_unlock;
 		}
 
-		page_end = PAGE_CACHE_SIZE;
-		if (PAGE_CACHE_SIZE > osb->s_clustersize)
+		page_end = PAGE_SIZE;
+		if (PAGE_SIZE > osb->s_clustersize)
 			page_end = osb->s_clustersize;
 
 		for (i = 0; i < num_pages; i++)

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 043110e..ad15773 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c

@@ -234,7 +234,7 @@
 
 	size = i_size_read(inode);
 
-	if (size > PAGE_CACHE_SIZE ||
+	if (size > PAGE_SIZE ||
 	    size > ocfs2_max_inline_data_with_xattr(inode->i_sb, di)) {
 		ocfs2_error(inode->i_sb,
 			    "Inode %llu has with inline data has bad size: %Lu\n",
@@ -247,7 +247,7 @@
 	if (size)
 		memcpy(kaddr, di->id2.i_data.id_data, size);
 	/* Clear the remaining part of the page */
-	memset(kaddr + size, 0, PAGE_CACHE_SIZE - size);
+	memset(kaddr + size, 0, PAGE_SIZE - size);
 	flush_dcache_page(page);
 	kunmap_atomic(kaddr);
 
@@ -282,7 +282,7 @@
 {
 	struct inode *inode = page->mapping->host;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
-	loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT;
+	loff_t start = (loff_t)page->index << PAGE_SHIFT;
 	int ret, unlock = 1;
 
 	trace_ocfs2_readpage((unsigned long long)oi->ip_blkno,
@@ -385,7 +385,7 @@
 	 * drop out in that case as it's not worth handling here.
 	 */
 	last = list_entry(pages->prev, struct page, lru);
-	start = (loff_t)last->index << PAGE_CACHE_SHIFT;
+	start = (loff_t)last->index << PAGE_SHIFT;
 	if (start >= i_size_read(inode))
 		goto out_unlock;
 
@@ -499,158 +499,6 @@
 	return status;
 }
 
-/*
- * TODO: Make this into a generic get_blocks function.
- *
- * From do_direct_io in direct-io.c:
- *  "So what we do is to permit the ->get_blocks function to populate
- *   bh.b_size with the size of IO which is permitted at this offset and
- *   this i_blkbits."
- *
- * This function is called directly from get_more_blocks in direct-io.c.
- *
- * called like this: dio->get_blocks(dio->inode, fs_startblk,
- * 					fs_count, map_bh, dio->rw == WRITE);
- */
-static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
-				     struct buffer_head *bh_result, int create)
-{
-	int ret;
-	u32 cpos = 0;
-	int alloc_locked = 0;
-	u64 p_blkno, inode_blocks, contig_blocks;
-	unsigned int ext_flags;
-	unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
-	unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
-	unsigned long len = bh_result->b_size;
-	unsigned int clusters_to_alloc = 0, contig_clusters = 0;
-
-	cpos = ocfs2_blocks_to_clusters(inode->i_sb, iblock);
-
-	/* This function won't even be called if the request isn't all
-	 * nicely aligned and of the right size, so there's no need
-	 * for us to check any of that. */
-
-	inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
-
-	down_read(&OCFS2_I(inode)->ip_alloc_sem);
-
-	/* This figures out the size of the next contiguous block, and
-	 * our logical offset */
-	ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
-					  &contig_blocks, &ext_flags);
-	up_read(&OCFS2_I(inode)->ip_alloc_sem);
-
-	if (ret) {
-		mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n",
-		     (unsigned long long)iblock);
-		ret = -EIO;
-		goto bail;
-	}
-
-	/* We should already CoW the refcounted extent in case of create. */
-	BUG_ON(create && (ext_flags & OCFS2_EXT_REFCOUNTED));
-
-	/* allocate blocks if no p_blkno is found, and create == 1 */
-	if (!p_blkno && create) {
-		ret = ocfs2_inode_lock(inode, NULL, 1);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto bail;
-		}
-
-		alloc_locked = 1;
-
-		down_write(&OCFS2_I(inode)->ip_alloc_sem);
-
-		/* fill hole, allocate blocks can't be larger than the size
-		 * of the hole */
-		clusters_to_alloc = ocfs2_clusters_for_bytes(inode->i_sb, len);
-		contig_clusters = ocfs2_clusters_for_blocks(inode->i_sb,
-				contig_blocks);
-		if (clusters_to_alloc > contig_clusters)
-			clusters_to_alloc = contig_clusters;
-
-		/* allocate extent and insert them into the extent tree */
-		ret = ocfs2_extend_allocation(inode, cpos,
-				clusters_to_alloc, 0);
-		if (ret < 0) {
-			up_write(&OCFS2_I(inode)->ip_alloc_sem);
-			mlog_errno(ret);
-			goto bail;
-		}
-
-		ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
-				&contig_blocks, &ext_flags);
-		if (ret < 0) {
-			up_write(&OCFS2_I(inode)->ip_alloc_sem);
-			mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n",
-					(unsigned long long)iblock);
-			ret = -EIO;
-			goto bail;
-		}
-		set_buffer_new(bh_result);
-		up_write(&OCFS2_I(inode)->ip_alloc_sem);
-	}
-
-	/*
-	 * get_more_blocks() expects us to describe a hole by clearing
-	 * the mapped bit on bh_result().
-	 *
-	 * Consider an unwritten extent as a hole.
-	 */
-	if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
-		map_bh(bh_result, inode->i_sb, p_blkno);
-	else
-		clear_buffer_mapped(bh_result);
-
-	/* make sure we don't map more than max_blocks blocks here as
-	   that's all the kernel will handle at this point. */
-	if (max_blocks < contig_blocks)
-		contig_blocks = max_blocks;
-	bh_result->b_size = contig_blocks << blocksize_bits;
-bail:
-	if (alloc_locked)
-		ocfs2_inode_unlock(inode, 1);
-	return ret;
-}
-
-/*
- * ocfs2_dio_end_io is called by the dio core when a dio is finished.  We're
- * particularly interested in the aio/dio case.  We use the rw_lock DLM lock
- * to protect io on one node from truncation on another.
- */
-static int ocfs2_dio_end_io(struct kiocb *iocb,
-			     loff_t offset,
-			     ssize_t bytes,
-			     void *private)
-{
-	struct inode *inode = file_inode(iocb->ki_filp);
-	int level;
-
-	if (bytes <= 0)
-		return 0;
-
-	/* this io's submitter should not have unlocked this before we could */
-	BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
-
-	if (ocfs2_iocb_is_unaligned_aio(iocb)) {
-		ocfs2_iocb_clear_unaligned_aio(iocb);
-
-		mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio);
-	}
-
-	/* Let rw unlock to be done later to protect append direct io write */
-	if (offset + bytes <= i_size_read(inode)) {
-		ocfs2_iocb_clear_rw_locked(iocb);
-
-		level = ocfs2_iocb_rw_locked_level(iocb);
-		ocfs2_rw_unlock(inode, level);
-	}
-
-	return 0;
-}
-
 static int ocfs2_releasepage(struct page *page, gfp_t wait)
 {
 	if (!page_has_buffers(page))
@@ -658,374 +506,17 @@
 	return try_to_free_buffers(page);
 }
 
-static int ocfs2_is_overwrite(struct ocfs2_super *osb,
-		struct inode *inode, loff_t offset)
-{
-	int ret = 0;
-	u32 v_cpos = 0;
-	u32 p_cpos = 0;
-	unsigned int num_clusters = 0;
-	unsigned int ext_flags = 0;
-
-	v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset);
-	ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
-			&num_clusters, &ext_flags);
-	if (ret < 0) {
-		mlog_errno(ret);
-		return ret;
-	}
-
-	if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN))
-		return 1;
-
-	return 0;
-}
-
-static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb,
-		struct inode *inode, loff_t offset,
-		u64 zero_len, int cluster_align)
-{
-	u32 p_cpos = 0;
-	u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode));
-	unsigned int num_clusters = 0;
-	unsigned int ext_flags = 0;
-	int ret = 0;
-
-	if (offset <= i_size_read(inode) || cluster_align)
-		return 0;
-
-	ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters,
-			&ext_flags);
-	if (ret < 0) {
-		mlog_errno(ret);
-		return ret;
-	}
-
-	if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
-		u64 s = i_size_read(inode);
-		sector_t sector = ((u64)p_cpos << (osb->s_clustersize_bits - 9)) +
-			(do_div(s, osb->s_clustersize) >> 9);
-
-		ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector,
-				zero_len >> 9, GFP_NOFS, false);
-		if (ret < 0)
-			mlog_errno(ret);
-	}
-
-	return ret;
-}
-
-static int ocfs2_direct_IO_extend_no_holes(struct ocfs2_super *osb,
-		struct inode *inode, loff_t offset)
-{
-	u64 zero_start, zero_len, total_zero_len;
-	u32 p_cpos = 0, clusters_to_add;
-	u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode));
-	unsigned int num_clusters = 0;
-	unsigned int ext_flags = 0;
-	u32 size_div, offset_div;
-	int ret = 0;
-
-	{
-		u64 o = offset;
-		u64 s = i_size_read(inode);
-
-		offset_div = do_div(o, osb->s_clustersize);
-		size_div = do_div(s, osb->s_clustersize);
-	}
-
-	if (offset <= i_size_read(inode))
-		return 0;
-
-	clusters_to_add = ocfs2_bytes_to_clusters(inode->i_sb, offset) -
-		ocfs2_bytes_to_clusters(inode->i_sb, i_size_read(inode));
-	total_zero_len = offset - i_size_read(inode);
-	if (clusters_to_add)
-		total_zero_len -= offset_div;
-
-	/* Allocate clusters to fill out holes, and this is only needed
-	 * when we add more than one clusters. Otherwise the cluster will
-	 * be allocated during direct IO */
-	if (clusters_to_add > 1) {
-		ret = ocfs2_extend_allocation(inode,
-				OCFS2_I(inode)->ip_clusters,
-				clusters_to_add - 1, 0);
-		if (ret) {
-			mlog_errno(ret);
-			goto out;
-		}
-	}
-
-	while (total_zero_len) {
-		ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters,
-				&ext_flags);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out;
-		}
-
-		zero_start = ocfs2_clusters_to_bytes(osb->sb, p_cpos) +
-			size_div;
-		zero_len = ocfs2_clusters_to_bytes(osb->sb, num_clusters) -
-			size_div;
-		zero_len = min(total_zero_len, zero_len);
-
-		if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
-			ret = blkdev_issue_zeroout(osb->sb->s_bdev,
-					zero_start >> 9, zero_len >> 9,
-					GFP_NOFS, false);
-			if (ret < 0) {
-				mlog_errno(ret);
-				goto out;
-			}
-		}
-
-		total_zero_len -= zero_len;
-		v_cpos += ocfs2_bytes_to_clusters(osb->sb, zero_len + size_div);
-
-		/* Only at first iteration can be cluster not aligned.
-		 * So set size_div to 0 for the rest */
-		size_div = 0;
-	}
-
-out:
-	return ret;
-}
-
-static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
-		struct iov_iter *iter,
-		loff_t offset)
-{
-	ssize_t ret = 0;
-	ssize_t written = 0;
-	bool orphaned = false;
-	int is_overwrite = 0;
-	struct file *file = iocb->ki_filp;
-	struct inode *inode = file_inode(file)->i_mapping->host;
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	struct buffer_head *di_bh = NULL;
-	size_t count = iter->count;
-	journal_t *journal = osb->journal->j_journal;
-	u64 zero_len_head, zero_len_tail;
-	int cluster_align_head, cluster_align_tail;
-	loff_t final_size = offset + count;
-	int append_write = offset >= i_size_read(inode) ? 1 : 0;
-	unsigned int num_clusters = 0;
-	unsigned int ext_flags = 0;
-
-	{
-		u64 o = offset;
-		u64 s = i_size_read(inode);
-
-		zero_len_head = do_div(o, 1 << osb->s_clustersize_bits);
-		cluster_align_head = !zero_len_head;
-
-		zero_len_tail = osb->s_clustersize -
-			do_div(s, osb->s_clustersize);
-		if ((offset - i_size_read(inode)) < zero_len_tail)
-			zero_len_tail = offset - i_size_read(inode);
-		cluster_align_tail = !zero_len_tail;
-	}
-
-	/*
-	 * when final_size > inode->i_size, inode->i_size will be
-	 * updated after direct write, so add the inode to orphan
-	 * dir first.
-	 */
-	if (final_size > i_size_read(inode)) {
-		ret = ocfs2_add_inode_to_orphan(osb, inode);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out;
-		}
-		orphaned = true;
-	}
-
-	if (append_write) {
-		ret = ocfs2_inode_lock(inode, NULL, 1);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto clean_orphan;
-		}
-
-		/* zeroing out the previously allocated cluster tail
-		 * that but not zeroed */
-		if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) {
-			down_read(&OCFS2_I(inode)->ip_alloc_sem);
-			ret = ocfs2_direct_IO_zero_extend(osb, inode, offset,
-					zero_len_tail, cluster_align_tail);
-			up_read(&OCFS2_I(inode)->ip_alloc_sem);
-		} else {
-			down_write(&OCFS2_I(inode)->ip_alloc_sem);
-			ret = ocfs2_direct_IO_extend_no_holes(osb, inode,
-					offset);
-			up_write(&OCFS2_I(inode)->ip_alloc_sem);
-		}
-		if (ret < 0) {
-			mlog_errno(ret);
-			ocfs2_inode_unlock(inode, 1);
-			goto clean_orphan;
-		}
-
-		is_overwrite = ocfs2_is_overwrite(osb, inode, offset);
-		if (is_overwrite < 0) {
-			mlog_errno(is_overwrite);
-			ret = is_overwrite;
-			ocfs2_inode_unlock(inode, 1);
-			goto clean_orphan;
-		}
-
-		ocfs2_inode_unlock(inode, 1);
-	}
-
-	written = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
-				       offset, ocfs2_direct_IO_get_blocks,
-				       ocfs2_dio_end_io, NULL, 0);
-	/* overwrite aio may return -EIOCBQUEUED, and it is not an error */
-	if ((written < 0) && (written != -EIOCBQUEUED)) {
-		loff_t i_size = i_size_read(inode);
-
-		if (offset + count > i_size) {
-			ret = ocfs2_inode_lock(inode, &di_bh, 1);
-			if (ret < 0) {
-				mlog_errno(ret);
-				goto clean_orphan;
-			}
-
-			if (i_size == i_size_read(inode)) {
-				ret = ocfs2_truncate_file(inode, di_bh,
-						i_size);
-				if (ret < 0) {
-					if (ret != -ENOSPC)
-						mlog_errno(ret);
-
-					ocfs2_inode_unlock(inode, 1);
-					brelse(di_bh);
-					di_bh = NULL;
-					goto clean_orphan;
-				}
-			}
-
-			ocfs2_inode_unlock(inode, 1);
-			brelse(di_bh);
-			di_bh = NULL;
-
-			ret = jbd2_journal_force_commit(journal);
-			if (ret < 0)
-				mlog_errno(ret);
-		}
-	} else if (written > 0 && append_write && !is_overwrite &&
-			!cluster_align_head) {
-		/* zeroing out the allocated cluster head */
-		u32 p_cpos = 0;
-		u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset);
-
-		ret = ocfs2_inode_lock(inode, NULL, 0);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto clean_orphan;
-		}
-
-		ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
-				&num_clusters, &ext_flags);
-		if (ret < 0) {
-			mlog_errno(ret);
-			ocfs2_inode_unlock(inode, 0);
-			goto clean_orphan;
-		}
-
-		BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN));
-
-		ret = blkdev_issue_zeroout(osb->sb->s_bdev,
-				(u64)p_cpos << (osb->s_clustersize_bits - 9),
-				zero_len_head >> 9, GFP_NOFS, false);
-		if (ret < 0)
-			mlog_errno(ret);
-
-		ocfs2_inode_unlock(inode, 0);
-	}
-
-clean_orphan:
-	if (orphaned) {
-		int tmp_ret;
-		int update_isize = written > 0 ? 1 : 0;
-		loff_t end = update_isize ? offset + written : 0;
-
-		tmp_ret = ocfs2_inode_lock(inode, &di_bh, 1);
-		if (tmp_ret < 0) {
-			ret = tmp_ret;
-			mlog_errno(ret);
-			goto out;
-		}
-
-		tmp_ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh,
-				update_isize, end);
-		if (tmp_ret < 0) {
-			ocfs2_inode_unlock(inode, 1);
-			ret = tmp_ret;
-			mlog_errno(ret);
-			brelse(di_bh);
-			goto out;
-		}
-
-		ocfs2_inode_unlock(inode, 1);
-		brelse(di_bh);
-
-		tmp_ret = jbd2_journal_force_commit(journal);
-		if (tmp_ret < 0) {
-			ret = tmp_ret;
-			mlog_errno(tmp_ret);
-		}
-	}
-
-out:
-	if (ret >= 0)
-		ret = written;
-	return ret;
-}
-
-static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
-			       loff_t offset)
-{
-	struct file *file = iocb->ki_filp;
-	struct inode *inode = file_inode(file)->i_mapping->host;
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	int full_coherency = !(osb->s_mount_opt &
-			OCFS2_MOUNT_COHERENCY_BUFFERED);
-
-	/*
-	 * Fallback to buffered I/O if we see an inode without
-	 * extents.
-	 */
-	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
-		return 0;
-
-	/* Fallback to buffered I/O if we are appending and
-	 * concurrent O_DIRECT writes are allowed.
-	 */
-	if (i_size_read(inode) <= offset && !full_coherency)
-		return 0;
-
-	if (iov_iter_rw(iter) == READ)
-		return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
-					    iter, offset,
-					    ocfs2_direct_IO_get_blocks,
-					    ocfs2_dio_end_io, NULL, 0);
-	else
-		return ocfs2_direct_IO_write(iocb, iter, offset);
-}
-
 static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb,
 					    u32 cpos,
 					    unsigned int *start,
 					    unsigned int *end)
 {
-	unsigned int cluster_start = 0, cluster_end = PAGE_CACHE_SIZE;
+	unsigned int cluster_start = 0, cluster_end = PAGE_SIZE;
 
-	if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) {
+	if (unlikely(PAGE_SHIFT > osb->s_clustersize_bits)) {
 		unsigned int cpp;
 
-		cpp = 1 << (PAGE_CACHE_SHIFT - osb->s_clustersize_bits);
+		cpp = 1 << (PAGE_SHIFT - osb->s_clustersize_bits);
 
 		cluster_start = cpos % cpp;
 		cluster_start = cluster_start << osb->s_clustersize_bits;
@@ -1193,13 +684,20 @@
 	return ret;
 }
 
-#if (PAGE_CACHE_SIZE >= OCFS2_MAX_CLUSTERSIZE)
+#if (PAGE_SIZE >= OCFS2_MAX_CLUSTERSIZE)
 #define OCFS2_MAX_CTXT_PAGES	1
 #else
-#define OCFS2_MAX_CTXT_PAGES	(OCFS2_MAX_CLUSTERSIZE / PAGE_CACHE_SIZE)
+#define OCFS2_MAX_CTXT_PAGES	(OCFS2_MAX_CLUSTERSIZE / PAGE_SIZE)
 #endif
 
-#define OCFS2_MAX_CLUSTERS_PER_PAGE	(PAGE_CACHE_SIZE / OCFS2_MIN_CLUSTERSIZE)
+#define OCFS2_MAX_CLUSTERS_PER_PAGE	(PAGE_SIZE / OCFS2_MIN_CLUSTERSIZE)
+
+struct ocfs2_unwritten_extent {
+	struct list_head	ue_node;
+	struct list_head	ue_ip_node;
+	u32			ue_cpos;
+	u32			ue_phys;
+};
 
 /*
  * Describe the state of a single cluster to be written to.
@@ -1212,7 +710,7 @@
 	 * filled.
 	 */
 	unsigned	c_new;
-	unsigned	c_unwritten;
+	unsigned	c_clear_unwritten;
 	unsigned	c_needs_zero;
 };
 
@@ -1224,6 +722,9 @@
 	/* First cluster allocated in a nonsparse extend */
 	u32				w_first_new_cpos;
 
+	/* Type of caller. Must be one of buffer, mmap, direct.  */
+	ocfs2_write_type_t		w_type;
+
 	struct ocfs2_write_cluster_desc	w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE];
 
 	/*
@@ -1272,6 +773,8 @@
 	struct buffer_head		*w_di_bh;
 
 	struct ocfs2_cached_dealloc_ctxt w_dealloc;
+
+	struct list_head		w_unwritten_list;
 };
 
 void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
@@ -1282,7 +785,7 @@
 		if (pages[i]) {
 			unlock_page(pages[i]);
 			mark_page_accessed(pages[i]);
-			page_cache_release(pages[i]);
+			put_page(pages[i]);
 		}
 	}
 }
@@ -1305,13 +808,30 @@
 			}
 		}
 		mark_page_accessed(wc->w_target_page);
-		page_cache_release(wc->w_target_page);
+		put_page(wc->w_target_page);
 	}
 	ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
 }
 
-static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
+static void ocfs2_free_unwritten_list(struct inode *inode,
+				 struct list_head *head)
 {
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_unwritten_extent *ue = NULL, *tmp = NULL;
+
+	list_for_each_entry_safe(ue, tmp, head, ue_node) {
+		list_del(&ue->ue_node);
+		spin_lock(&oi->ip_lock);
+		list_del(&ue->ue_ip_node);
+		spin_unlock(&oi->ip_lock);
+		kfree(ue);
+	}
+}
+
+static void ocfs2_free_write_ctxt(struct inode *inode,
+				  struct ocfs2_write_ctxt *wc)
+{
+	ocfs2_free_unwritten_list(inode, &wc->w_unwritten_list);
 	ocfs2_unlock_pages(wc);
 	brelse(wc->w_di_bh);
 	kfree(wc);
@@ -1319,7 +839,8 @@
 
 static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
 				  struct ocfs2_super *osb, loff_t pos,
-				  unsigned len, struct buffer_head *di_bh)
+				  unsigned len, ocfs2_write_type_t type,
+				  struct buffer_head *di_bh)
 {
 	u32 cend;
 	struct ocfs2_write_ctxt *wc;
@@ -1334,13 +855,15 @@
 	wc->w_clen = cend - wc->w_cpos + 1;
 	get_bh(di_bh);
 	wc->w_di_bh = di_bh;
+	wc->w_type = type;
 
-	if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits))
+	if (unlikely(PAGE_SHIFT > osb->s_clustersize_bits))
 		wc->w_large_pages = 1;
 	else
 		wc->w_large_pages = 0;
 
 	ocfs2_init_dealloc_ctxt(&wc->w_dealloc);
+	INIT_LIST_HEAD(&wc->w_unwritten_list);
 
 	*wcp = wc;
 
@@ -1397,16 +920,17 @@
 				loff_t user_pos, unsigned user_len)
 {
 	int i;
-	unsigned from = user_pos & (PAGE_CACHE_SIZE - 1),
+	unsigned from = user_pos & (PAGE_SIZE - 1),
 		to = user_pos + user_len;
 	struct page *tmppage;
 
-	ocfs2_zero_new_buffers(wc->w_target_page, from, to);
+	if (wc->w_target_page)
+		ocfs2_zero_new_buffers(wc->w_target_page, from, to);
 
 	for(i = 0; i < wc->w_num_pages; i++) {
 		tmppage = wc->w_pages[i];
 
-		if (page_has_buffers(tmppage)) {
+		if (tmppage && page_has_buffers(tmppage)) {
 			if (ocfs2_should_order_data(inode))
 				ocfs2_jbd2_file_inode(wc->w_handle, inode);
 
@@ -1436,7 +960,7 @@
 			(page_offset(page) <= user_pos));
 
 	if (page == wc->w_target_page) {
-		map_from = user_pos & (PAGE_CACHE_SIZE - 1);
+		map_from = user_pos & (PAGE_SIZE - 1);
 		map_to = map_from + user_len;
 
 		if (new)
@@ -1510,7 +1034,7 @@
 	struct inode *inode = mapping->host;
 	loff_t last_byte;
 
-	target_index = user_pos >> PAGE_CACHE_SHIFT;
+	target_index = user_pos >> PAGE_SHIFT;
 
 	/*
 	 * Figure out how many pages we'll be manipulating here. For
@@ -1529,18 +1053,20 @@
 		 */
 		last_byte = max(user_pos + user_len, i_size_read(inode));
 		BUG_ON(last_byte < 1);
-		end_index = ((last_byte - 1) >> PAGE_CACHE_SHIFT) + 1;
+		end_index = ((last_byte - 1) >> PAGE_SHIFT) + 1;
 		if ((start + wc->w_num_pages) > end_index)
 			wc->w_num_pages = end_index - start;
 	} else {
 		wc->w_num_pages = 1;
 		start = target_index;
 	}
+	end_index = (user_pos + user_len - 1) >> PAGE_SHIFT;
 
 	for(i = 0; i < wc->w_num_pages; i++) {
 		index = start + i;
 
-		if (index == target_index && mmap_page) {
+		if (index >= target_index && index <= end_index &&
+		    wc->w_type == OCFS2_WRITE_MMAP) {
 			/*
 			 * ocfs2_pagemkwrite() is a little different
 			 * and wants us to directly use the page
@@ -1556,9 +1082,14 @@
 				goto out;
 			}
 
-			page_cache_get(mmap_page);
+			get_page(mmap_page);
 			wc->w_pages[i] = mmap_page;
 			wc->w_target_locked = true;
+		} else if (index >= target_index && index <= end_index &&
+			   wc->w_type == OCFS2_WRITE_DIRECT) {
+			/* Direct write has no mapping page. */
+			wc->w_pages[i] = NULL;
+			continue;
 		} else {
 			wc->w_pages[i] = find_or_create_page(mapping, index,
 							     GFP_NOFS);
@@ -1583,19 +1114,20 @@
  * Prepare a single cluster for write one cluster into the file.
  */
 static int ocfs2_write_cluster(struct address_space *mapping,
-			       u32 phys, unsigned int unwritten,
+			       u32 *phys, unsigned int new,
+			       unsigned int clear_unwritten,
 			       unsigned int should_zero,
 			       struct ocfs2_alloc_context *data_ac,
 			       struct ocfs2_alloc_context *meta_ac,
 			       struct ocfs2_write_ctxt *wc, u32 cpos,
 			       loff_t user_pos, unsigned user_len)
 {
-	int ret, i, new;
-	u64 v_blkno, p_blkno;
+	int ret, i;
+	u64 p_blkno;
 	struct inode *inode = mapping->host;
 	struct ocfs2_extent_tree et;
+	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
 
-	new = phys == 0 ? 1 : 0;
 	if (new) {
 		u32 tmp_pos;
 
@@ -1605,9 +1137,9 @@
 		 */
 		tmp_pos = cpos;
 		ret = ocfs2_add_inode_data(OCFS2_SB(inode->i_sb), inode,
-					   &tmp_pos, 1, 0, wc->w_di_bh,
-					   wc->w_handle, data_ac,
-					   meta_ac, NULL);
+					   &tmp_pos, 1, !clear_unwritten,
+					   wc->w_di_bh, wc->w_handle,
+					   data_ac, meta_ac, NULL);
 		/*
 		 * This shouldn't happen because we must have already
 		 * calculated the correct meta data allocation required. The
@@ -1624,11 +1156,11 @@
 			mlog_errno(ret);
 			goto out;
 		}
-	} else if (unwritten) {
+	} else if (clear_unwritten) {
 		ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode),
 					      wc->w_di_bh);
 		ret = ocfs2_mark_extent_written(inode, &et,
-						wc->w_handle, cpos, 1, phys,
+						wc->w_handle, cpos, 1, *phys,
 						meta_ac, &wc->w_dealloc);
 		if (ret < 0) {
 			mlog_errno(ret);
@@ -1636,30 +1168,33 @@
 		}
 	}
 
-	if (should_zero)
-		v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos);
-	else
-		v_blkno = user_pos >> inode->i_sb->s_blocksize_bits;
-
 	/*
 	 * The only reason this should fail is due to an inability to
 	 * find the extent added.
 	 */
-	ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL,
-					  NULL);
+	ret = ocfs2_get_clusters(inode, cpos, phys, NULL, NULL);
 	if (ret < 0) {
 		mlog(ML_ERROR, "Get physical blkno failed for inode %llu, "
-			    "at logical block %llu",
-			    (unsigned long long)OCFS2_I(inode)->ip_blkno,
-			    (unsigned long long)v_blkno);
+			    "at logical cluster %u",
+			    (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos);
 		goto out;
 	}
 
-	BUG_ON(p_blkno == 0);
+	BUG_ON(*phys == 0);
+
+	p_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *phys);
+	if (!should_zero)
+		p_blkno += (user_pos >> inode->i_sb->s_blocksize_bits) & (u64)(bpc - 1);
 
 	for(i = 0; i < wc->w_num_pages; i++) {
 		int tmpret;
 
+		/* This is the direct io target page. */
+		if (wc->w_pages[i] == NULL) {
+			p_blkno++;
+			continue;
+		}
+
 		tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc,
 						      wc->w_pages[i], cpos,
 						      user_pos, user_len,
@@ -1706,8 +1241,9 @@
 		if ((cluster_off + local_len) > osb->s_clustersize)
 			local_len = osb->s_clustersize - cluster_off;
 
-		ret = ocfs2_write_cluster(mapping, desc->c_phys,
-					  desc->c_unwritten,
+		ret = ocfs2_write_cluster(mapping, &desc->c_phys,
+					  desc->c_new,
+					  desc->c_clear_unwritten,
 					  desc->c_needs_zero,
 					  data_ac, meta_ac,
 					  wc, desc->c_cpos, pos, local_len);
@@ -1736,7 +1272,7 @@
 {
 	struct ocfs2_write_cluster_desc *desc;
 
-	wc->w_target_from = pos & (PAGE_CACHE_SIZE - 1);
+	wc->w_target_from = pos & (PAGE_SIZE - 1);
 	wc->w_target_to = wc->w_target_from + len;
 
 	if (alloc == 0)
@@ -1773,11 +1309,71 @@
 							&wc->w_target_to);
 	} else {
 		wc->w_target_from = 0;
-		wc->w_target_to = PAGE_CACHE_SIZE;
+		wc->w_target_to = PAGE_SIZE;
 	}
 }
 
 /*
+ * Check if this extent is marked UNWRITTEN by direct io. If so, we need not to
+ * do the zero work. And should not to clear UNWRITTEN since it will be cleared
+ * by the direct io procedure.
+ * If this is a new extent that allocated by direct io, we should mark it in
+ * the ip_unwritten_list.
+ */
+static int ocfs2_unwritten_check(struct inode *inode,
+				 struct ocfs2_write_ctxt *wc,
+				 struct ocfs2_write_cluster_desc *desc)
+{
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_unwritten_extent *ue = NULL, *new = NULL;
+	int ret = 0;
+
+	if (!desc->c_needs_zero)
+		return 0;
+
+retry:
+	spin_lock(&oi->ip_lock);
+	/* Needs not to zero no metter buffer or direct. The one who is zero
+	 * the cluster is doing zero. And he will clear unwritten after all
+	 * cluster io finished. */
+	list_for_each_entry(ue, &oi->ip_unwritten_list, ue_ip_node) {
+		if (desc->c_cpos == ue->ue_cpos) {
+			BUG_ON(desc->c_new);
+			desc->c_needs_zero = 0;
+			desc->c_clear_unwritten = 0;
+			goto unlock;
+		}
+	}
+
+	if (wc->w_type != OCFS2_WRITE_DIRECT)
+		goto unlock;
+
+	if (new == NULL) {
+		spin_unlock(&oi->ip_lock);
+		new = kmalloc(sizeof(struct ocfs2_unwritten_extent),
+			     GFP_NOFS);
+		if (new == NULL) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		goto retry;
+	}
+	/* This direct write will doing zero. */
+	new->ue_cpos = desc->c_cpos;
+	new->ue_phys = desc->c_phys;
+	desc->c_clear_unwritten = 0;
+	list_add_tail(&new->ue_ip_node, &oi->ip_unwritten_list);
+	list_add_tail(&new->ue_node, &wc->w_unwritten_list);
+	new = NULL;
+unlock:
+	spin_unlock(&oi->ip_lock);
+out:
+	if (new)
+		kfree(new);
+	return ret;
+}
+
+/*
  * Populate each single-cluster write descriptor in the write context
  * with information about the i/o to be done.
  *
@@ -1852,14 +1448,21 @@
 		if (phys == 0) {
 			desc->c_new = 1;
 			desc->c_needs_zero = 1;
+			desc->c_clear_unwritten = 1;
 			*clusters_to_alloc = *clusters_to_alloc + 1;
 		}
 
 		if (ext_flags & OCFS2_EXT_UNWRITTEN) {
-			desc->c_unwritten = 1;
+			desc->c_clear_unwritten = 1;
 			desc->c_needs_zero = 1;
 		}
 
+		ret = ocfs2_unwritten_check(inode, wc, desc);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+
 		num_clusters--;
 	}
 
@@ -2022,8 +1625,10 @@
 	if (ret)
 		mlog_errno(ret);
 
-	wc->w_first_new_cpos =
-		ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
+	/* There is no wc if this is call from direct. */
+	if (wc)
+		wc->w_first_new_cpos =
+			ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
 
 	return ret;
 }
@@ -2077,9 +1682,8 @@
 	return ret;
 }
 
-int ocfs2_write_begin_nolock(struct file *filp,
-			     struct address_space *mapping,
-			     loff_t pos, unsigned len, unsigned flags,
+int ocfs2_write_begin_nolock(struct address_space *mapping,
+			     loff_t pos, unsigned len, ocfs2_write_type_t type,
 			     struct page **pagep, void **fsdata,
 			     struct buffer_head *di_bh, struct page *mmap_page)
 {
@@ -2096,7 +1700,7 @@
 	int try_free = 1, ret1;
 
 try_again:
-	ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
+	ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, type, di_bh);
 	if (ret) {
 		mlog_errno(ret);
 		return ret;
@@ -2115,14 +1719,17 @@
 		}
 	}
 
-	if (ocfs2_sparse_alloc(osb))
-		ret = ocfs2_zero_tail(inode, di_bh, pos);
-	else
-		ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, len,
-						   wc);
-	if (ret) {
-		mlog_errno(ret);
-		goto out;
+	/* Direct io change i_size late, should not zero tail here. */
+	if (type != OCFS2_WRITE_DIRECT) {
+		if (ocfs2_sparse_alloc(osb))
+			ret = ocfs2_zero_tail(inode, di_bh, pos);
+		else
+			ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos,
+							   len, wc);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
 	}
 
 	ret = ocfs2_check_range_for_refcount(inode, pos, len);
@@ -2153,7 +1760,7 @@
 			(unsigned long long)OCFS2_I(inode)->ip_blkno,
 			(long long)i_size_read(inode),
 			le32_to_cpu(di->i_clusters),
-			pos, len, flags, mmap_page,
+			pos, len, type, mmap_page,
 			clusters_to_alloc, extents_to_split);
 
 	/*
@@ -2183,17 +1790,17 @@
 
 		credits = ocfs2_calc_extend_credits(inode->i_sb,
 						    &di->id2.i_list);
-
-	}
+	} else if (type == OCFS2_WRITE_DIRECT)
+		/* direct write needs not to start trans if no extents alloc. */
+		goto success;
 
 	/*
 	 * We have to zero sparse allocated clusters, unwritten extent clusters,
 	 * and non-sparse clusters we just extended.  For non-sparse writes,
 	 * we know zeros will only be needed in the first and/or last cluster.
 	 */
-	if (clusters_to_alloc || extents_to_split ||
-	    (wc->w_clen && (wc->w_desc[0].c_needs_zero ||
-			    wc->w_desc[wc->w_clen - 1].c_needs_zero)))
+	if (wc->w_clen && (wc->w_desc[0].c_needs_zero ||
+			   wc->w_desc[wc->w_clen - 1].c_needs_zero))
 		cluster_of_pages = 1;
 	else
 		cluster_of_pages = 0;
@@ -2260,7 +1867,8 @@
 		ocfs2_free_alloc_context(meta_ac);
 
 success:
-	*pagep = wc->w_target_page;
+	if (pagep)
+		*pagep = wc->w_target_page;
 	*fsdata = wc;
 	return 0;
 out_quota:
@@ -2271,7 +1879,7 @@
 	ocfs2_commit_trans(osb, handle);
 
 out:
-	ocfs2_free_write_ctxt(wc);
+	ocfs2_free_write_ctxt(inode, wc);
 
 	if (data_ac) {
 		ocfs2_free_alloc_context(data_ac);
@@ -2323,8 +1931,8 @@
 	 */
 	down_write(&OCFS2_I(inode)->ip_alloc_sem);
 
-	ret = ocfs2_write_begin_nolock(file, mapping, pos, len, flags, pagep,
-				       fsdata, di_bh, NULL);
+	ret = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_BUFFER,
+				       pagep, fsdata, di_bh, NULL);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_fail;
@@ -2373,7 +1981,7 @@
 			   struct page *page, void *fsdata)
 {
 	int i, ret;
-	unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned from, to, start = pos & (PAGE_SIZE - 1);
 	struct inode *inode = mapping->host;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_write_ctxt *wc = fsdata;
@@ -2381,12 +1989,16 @@
 	handle_t *handle = wc->w_handle;
 	struct page *tmppage;
 
-	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
-			OCFS2_JOURNAL_ACCESS_WRITE);
-	if (ret) {
-		copied = ret;
-		mlog_errno(ret);
-		goto out;
+	BUG_ON(!list_empty(&wc->w_unwritten_list));
+
+	if (handle) {
+		ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
+				wc->w_di_bh, OCFS2_JOURNAL_ACCESS_WRITE);
+		if (ret) {
+			copied = ret;
+			mlog_errno(ret);
+			goto out;
+		}
 	}
 
 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
@@ -2394,24 +2006,29 @@
 		goto out_write_size;
 	}
 
-	if (unlikely(copied < len)) {
+	if (unlikely(copied < len) && wc->w_target_page) {
 		if (!PageUptodate(wc->w_target_page))
 			copied = 0;
 
 		ocfs2_zero_new_buffers(wc->w_target_page, start+copied,
 				       start+len);
 	}
-	flush_dcache_page(wc->w_target_page);
+	if (wc->w_target_page)
+		flush_dcache_page(wc->w_target_page);
 
 	for(i = 0; i < wc->w_num_pages; i++) {
 		tmppage = wc->w_pages[i];
 
+		/* This is the direct io target page. */
+		if (tmppage == NULL)
+			continue;
+
 		if (tmppage == wc->w_target_page) {
 			from = wc->w_target_from;
 			to = wc->w_target_to;
 
-			BUG_ON(from > PAGE_CACHE_SIZE ||
-			       to > PAGE_CACHE_SIZE ||
+			BUG_ON(from > PAGE_SIZE ||
+			       to > PAGE_SIZE ||
 			       to < from);
 		} else {
 			/*
@@ -2420,29 +2037,33 @@
 			 * to flush their entire range.
 			 */
 			from = 0;
-			to = PAGE_CACHE_SIZE;
+			to = PAGE_SIZE;
 		}
 
 		if (page_has_buffers(tmppage)) {
-			if (ocfs2_should_order_data(inode))
-				ocfs2_jbd2_file_inode(wc->w_handle, inode);
+			if (handle && ocfs2_should_order_data(inode))
+				ocfs2_jbd2_file_inode(handle, inode);
 			block_commit_write(tmppage, from, to);
 		}
 	}
 
 out_write_size:
-	pos += copied;
-	if (pos > i_size_read(inode)) {
-		i_size_write(inode, pos);
-		mark_inode_dirty(inode);
+	/* Direct io do not update i_size here. */
+	if (wc->w_type != OCFS2_WRITE_DIRECT) {
+		pos += copied;
+		if (pos > i_size_read(inode)) {
+			i_size_write(inode, pos);
+			mark_inode_dirty(inode);
+		}
+		inode->i_blocks = ocfs2_inode_sector_count(inode);
+		di->i_size = cpu_to_le64((u64)i_size_read(inode));
+		inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+		di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
+		di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+		ocfs2_update_inode_fsync_trans(handle, inode, 1);
 	}
-	inode->i_blocks = ocfs2_inode_sector_count(inode);
-	di->i_size = cpu_to_le64((u64)i_size_read(inode));
-	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-	di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
-	di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
-	ocfs2_update_inode_fsync_trans(handle, inode, 1);
-	ocfs2_journal_dirty(handle, wc->w_di_bh);
+	if (handle)
+		ocfs2_journal_dirty(handle, wc->w_di_bh);
 
 out:
 	/* unlock pages before dealloc since it needs acquiring j_trans_barrier
@@ -2452,7 +2073,8 @@
 	 */
 	ocfs2_unlock_pages(wc);
 
-	ocfs2_commit_trans(osb, handle);
+	if (handle)
+		ocfs2_commit_trans(osb, handle);
 
 	ocfs2_run_deallocs(osb, &wc->w_dealloc);
 
@@ -2477,6 +2099,360 @@
 	return ret;
 }
 
+struct ocfs2_dio_write_ctxt {
+	struct list_head	dw_zero_list;
+	unsigned		dw_zero_count;
+	int			dw_orphaned;
+	pid_t			dw_writer_pid;
+};
+
+static struct ocfs2_dio_write_ctxt *
+ocfs2_dio_alloc_write_ctx(struct buffer_head *bh, int *alloc)
+{
+	struct ocfs2_dio_write_ctxt *dwc = NULL;
+
+	if (bh->b_private)
+		return bh->b_private;
+
+	dwc = kmalloc(sizeof(struct ocfs2_dio_write_ctxt), GFP_NOFS);
+	if (dwc == NULL)
+		return NULL;
+	INIT_LIST_HEAD(&dwc->dw_zero_list);
+	dwc->dw_zero_count = 0;
+	dwc->dw_orphaned = 0;
+	dwc->dw_writer_pid = task_pid_nr(current);
+	bh->b_private = dwc;
+	*alloc = 1;
+
+	return dwc;
+}
+
+static void ocfs2_dio_free_write_ctx(struct inode *inode,
+				     struct ocfs2_dio_write_ctxt *dwc)
+{
+	ocfs2_free_unwritten_list(inode, &dwc->dw_zero_list);
+	kfree(dwc);
+}
+
+/*
+ * TODO: Make this into a generic get_blocks function.
+ *
+ * From do_direct_io in direct-io.c:
+ *  "So what we do is to permit the ->get_blocks function to populate
+ *   bh.b_size with the size of IO which is permitted at this offset and
+ *   this i_blkbits."
+ *
+ * This function is called directly from get_more_blocks in direct-io.c.
+ *
+ * called like this: dio->get_blocks(dio->inode, fs_startblk,
+ * 					fs_count, map_bh, dio->rw == WRITE);
+ */
+static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock,
+			       struct buffer_head *bh_result, int create)
+{
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_write_ctxt *wc;
+	struct ocfs2_write_cluster_desc *desc = NULL;
+	struct ocfs2_dio_write_ctxt *dwc = NULL;
+	struct buffer_head *di_bh = NULL;
+	u64 p_blkno;
+	loff_t pos = iblock << inode->i_sb->s_blocksize_bits;
+	unsigned len, total_len = bh_result->b_size;
+	int ret = 0, first_get_block = 0;
+
+	len = osb->s_clustersize - (pos & (osb->s_clustersize - 1));
+	len = min(total_len, len);
+
+	mlog(0, "get block of %lu at %llu:%u req %u\n",
+			inode->i_ino, pos, len, total_len);
+
+	/*
+	 * Because we need to change file size in ocfs2_dio_end_io_write(), or
+	 * we may need to add it to orphan dir. So can not fall to fast path
+	 * while file size will be changed.
+	 */
+	if (pos + total_len <= i_size_read(inode)) {
+		down_read(&oi->ip_alloc_sem);
+		/* This is the fast path for re-write. */
+		ret = ocfs2_get_block(inode, iblock, bh_result, create);
+
+		up_read(&oi->ip_alloc_sem);
+
+		if (buffer_mapped(bh_result) &&
+		    !buffer_new(bh_result) &&
+		    ret == 0)
+			goto out;
+
+		/* Clear state set by ocfs2_get_block. */
+		bh_result->b_state = 0;
+	}
+
+	dwc = ocfs2_dio_alloc_write_ctx(bh_result, &first_get_block);
+	if (unlikely(dwc == NULL)) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (ocfs2_clusters_for_bytes(inode->i_sb, pos + total_len) >
+	    ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)) &&
+	    !dwc->dw_orphaned) {
+		/*
+		 * when we are going to alloc extents beyond file size, add the
+		 * inode to orphan dir, so we can recall those spaces when
+		 * system crashed during write.
+		 */
+		ret = ocfs2_add_inode_to_orphan(osb, inode);
+		if (ret < 0) {
+			mlog_errno(ret);
+			goto out;
+		}
+		dwc->dw_orphaned = 1;
+	}
+
+	ret = ocfs2_inode_lock(inode, &di_bh, 1);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	down_write(&oi->ip_alloc_sem);
+
+	if (first_get_block) {
+		if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+			ret = ocfs2_zero_tail(inode, di_bh, pos);
+		else
+			ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos,
+							   total_len, NULL);
+		if (ret < 0) {
+			mlog_errno(ret);
+			goto unlock;
+		}
+	}
+
+	ret = ocfs2_write_begin_nolock(inode->i_mapping, pos, len,
+				       OCFS2_WRITE_DIRECT, NULL,
+				       (void **)&wc, di_bh, NULL);
+	if (ret) {
+		mlog_errno(ret);
+		goto unlock;
+	}
+
+	desc = &wc->w_desc[0];
+
+	p_blkno = ocfs2_clusters_to_blocks(inode->i_sb, desc->c_phys);
+	BUG_ON(p_blkno == 0);
+	p_blkno += iblock & (u64)(ocfs2_clusters_to_blocks(inode->i_sb, 1) - 1);
+
+	map_bh(bh_result, inode->i_sb, p_blkno);
+	bh_result->b_size = len;
+	if (desc->c_needs_zero)
+		set_buffer_new(bh_result);
+
+	/* May sleep in end_io. It should not happen in a irq context. So defer
+	 * it to dio work queue. */
+	set_buffer_defer_completion(bh_result);
+
+	if (!list_empty(&wc->w_unwritten_list)) {
+		struct ocfs2_unwritten_extent *ue = NULL;
+
+		ue = list_first_entry(&wc->w_unwritten_list,
+				      struct ocfs2_unwritten_extent,
+				      ue_node);
+		BUG_ON(ue->ue_cpos != desc->c_cpos);
+		/* The physical address may be 0, fill it. */
+		ue->ue_phys = desc->c_phys;
+
+		list_splice_tail_init(&wc->w_unwritten_list, &dwc->dw_zero_list);
+		dwc->dw_zero_count++;
+	}
+
+	ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, NULL, wc);
+	BUG_ON(ret != len);
+	ret = 0;
+unlock:
+	up_write(&oi->ip_alloc_sem);
+	ocfs2_inode_unlock(inode, 1);
+	brelse(di_bh);
+out:
+	if (ret < 0)
+		ret = -EIO;
+	return ret;
+}
+
+static void ocfs2_dio_end_io_write(struct inode *inode,
+				   struct ocfs2_dio_write_ctxt *dwc,
+				   loff_t offset,
+				   ssize_t bytes)
+{
+	struct ocfs2_cached_dealloc_ctxt dealloc;
+	struct ocfs2_extent_tree et;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	struct ocfs2_unwritten_extent *ue = NULL;
+	struct buffer_head *di_bh = NULL;
+	struct ocfs2_dinode *di;
+	struct ocfs2_alloc_context *data_ac = NULL;
+	struct ocfs2_alloc_context *meta_ac = NULL;
+	handle_t *handle = NULL;
+	loff_t end = offset + bytes;
+	int ret = 0, credits = 0, locked = 0;
+
+	ocfs2_init_dealloc_ctxt(&dealloc);
+
+	/* We do clear unwritten, delete orphan, change i_size here. If neither
+	 * of these happen, we can skip all this. */
+	if (list_empty(&dwc->dw_zero_list) &&
+	    end <= i_size_read(inode) &&
+	    !dwc->dw_orphaned)
+		goto out;
+
+	/* ocfs2_file_write_iter will get i_mutex, so we need not lock if we
+	 * are in that context. */
+	if (dwc->dw_writer_pid != task_pid_nr(current)) {
+		mutex_lock(&inode->i_mutex);
+		locked = 1;
+	}
+
+	ret = ocfs2_inode_lock(inode, &di_bh, 1);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	down_write(&oi->ip_alloc_sem);
+
+	/* Delete orphan before acquire i_mutex. */
+	if (dwc->dw_orphaned) {
+		BUG_ON(dwc->dw_writer_pid != task_pid_nr(current));
+
+		end = end > i_size_read(inode) ? end : 0;
+
+		ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh,
+				!!end, end);
+		if (ret < 0)
+			mlog_errno(ret);
+	}
+
+	di = (struct ocfs2_dinode *)di_bh;
+
+	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
+
+	ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2,
+				    &data_ac, &meta_ac);
+	if (ret) {
+		mlog_errno(ret);
+		goto unlock;
+	}
+
+	credits = ocfs2_calc_extend_credits(inode->i_sb, &di->id2.i_list);
+
+	handle = ocfs2_start_trans(osb, credits);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		mlog_errno(ret);
+		goto unlock;
+	}
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto commit;
+	}
+
+	list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) {
+		ret = ocfs2_mark_extent_written(inode, &et, handle,
+						ue->ue_cpos, 1,
+						ue->ue_phys,
+						meta_ac, &dealloc);
+		if (ret < 0) {
+			mlog_errno(ret);
+			break;
+		}
+	}
+
+	if (end > i_size_read(inode)) {
+		ret = ocfs2_set_inode_size(handle, inode, di_bh, end);
+		if (ret < 0)
+			mlog_errno(ret);
+	}
+commit:
+	ocfs2_commit_trans(osb, handle);
+unlock:
+	up_write(&oi->ip_alloc_sem);
+	ocfs2_inode_unlock(inode, 1);
+	brelse(di_bh);
+out:
+	if (data_ac)
+		ocfs2_free_alloc_context(data_ac);
+	if (meta_ac)
+		ocfs2_free_alloc_context(meta_ac);
+	ocfs2_run_deallocs(osb, &dealloc);
+	if (locked)
+		mutex_unlock(&inode->i_mutex);
+	ocfs2_dio_free_write_ctx(inode, dwc);
+}
+
+/*
+ * ocfs2_dio_end_io is called by the dio core when a dio is finished.  We're
+ * particularly interested in the aio/dio case.  We use the rw_lock DLM lock
+ * to protect io on one node from truncation on another.
+ */
+static int ocfs2_dio_end_io(struct kiocb *iocb,
+			    loff_t offset,
+			    ssize_t bytes,
+			    void *private)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+	int level;
+
+	if (bytes <= 0)
+		return 0;
+
+	/* this io's submitter should not have unlocked this before we could */
+	BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
+
+	if (private)
+		ocfs2_dio_end_io_write(inode, private, offset, bytes);
+
+	ocfs2_iocb_clear_rw_locked(iocb);
+
+	level = ocfs2_iocb_rw_locked_level(iocb);
+	ocfs2_rw_unlock(inode, level);
+	return 0;
+}
+
+static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+			       loff_t offset)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file_inode(file)->i_mapping->host;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	loff_t end = offset + iter->count;
+	get_block_t *get_block;
+
+	/*
+	 * Fallback to buffered I/O if we see an inode without
+	 * extents.
+	 */
+	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
+		return 0;
+
+	/* Fallback to buffered I/O if we do not support append dio. */
+	if (end > i_size_read(inode) && !ocfs2_supports_append_dio(osb))
+		return 0;
+
+	if (iov_iter_rw(iter) == READ)
+		get_block = ocfs2_get_block;
+	else
+		get_block = ocfs2_dio_get_block;
+
+	return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
+				    iter, offset, get_block,
+				    ocfs2_dio_end_io, NULL, 0);
+}
+
 const struct address_space_operations ocfs2_aops = {
 	.readpage		= ocfs2_readpage,
 	.readpages		= ocfs2_readpages,

diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 24e496d..b1c9f28 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h

@@ -47,9 +47,14 @@
 			   loff_t pos, unsigned len, unsigned copied,
 			   struct page *page, void *fsdata);
 
-int ocfs2_write_begin_nolock(struct file *filp,
-			     struct address_space *mapping,
-			     loff_t pos, unsigned len, unsigned flags,
+typedef enum {
+	OCFS2_WRITE_BUFFER = 0,
+	OCFS2_WRITE_DIRECT,
+	OCFS2_WRITE_MMAP,
+} ocfs2_write_type_t;
+
+int ocfs2_write_begin_nolock(struct address_space *mapping,
+			     loff_t pos, unsigned len, ocfs2_write_type_t type,
 			     struct page **pagep, void **fsdata,
 			     struct buffer_head *di_bh, struct page *mmap_page);
 
@@ -79,7 +84,6 @@
 enum ocfs2_iocb_lock_bits {
 	OCFS2_IOCB_RW_LOCK = 0,
 	OCFS2_IOCB_RW_LOCK_LEVEL,
-	OCFS2_IOCB_UNALIGNED_IO,
 	OCFS2_IOCB_NUM_LOCKS
 };
 
@@ -88,11 +92,4 @@
 #define ocfs2_iocb_rw_locked_level(iocb) \
 	test_bit(OCFS2_IOCB_RW_LOCK_LEVEL, (unsigned long *)&iocb->private)
 
-#define ocfs2_iocb_set_unaligned_aio(iocb) \
-	set_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private)
-#define ocfs2_iocb_clear_unaligned_aio(iocb) \
-	clear_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private)
-#define ocfs2_iocb_is_unaligned_aio(iocb) \
-	test_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private)
-
 #endif /* OCFS2_FILE_H */

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index ef6a2ec..1934abb 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c

@@ -417,13 +417,13 @@
 	bio->bi_private = wc;
 	bio->bi_end_io = o2hb_bio_end_io;
 
-	vec_start = (cs << bits) % PAGE_CACHE_SIZE;
+	vec_start = (cs << bits) % PAGE_SIZE;
 	while(cs < max_slots) {
 		current_page = cs / spp;
 		page = reg->hr_slot_data[current_page];
 
-		vec_len = min(PAGE_CACHE_SIZE - vec_start,
-			      (max_slots-cs) * (PAGE_CACHE_SIZE/spp) );
+		vec_len = min(PAGE_SIZE - vec_start,
+			      (max_slots-cs) * (PAGE_SIZE/spp) );
 
 		mlog(ML_HB_BIO, "page %d, vec_len = %u, vec_start = %u\n",
 		     current_page, vec_len, vec_start);
@@ -431,7 +431,7 @@
 		len = bio_add_page(bio, page, vec_len, vec_start);
 		if (len != vec_len) break;
 
-		cs += vec_len / (PAGE_CACHE_SIZE/spp);
+		cs += vec_len / (PAGE_SIZE/spp);
 		vec_start = 0;
 	}
 
@@ -1444,8 +1444,8 @@
 	debugfs_remove(reg->hr_debug_dir);
 	kfree(reg->hr_db_livenodes);
 	kfree(reg->hr_db_regnum);
-	kfree(reg->hr_debug_elapsed_time);
-	kfree(reg->hr_debug_pinned);
+	kfree(reg->hr_db_elapsed_time);
+	kfree(reg->hr_db_pinned);
 
 	spin_lock(&o2hb_live_lock);
 	list_del(&reg->hr_all_item);
@@ -1576,7 +1576,7 @@
 
 static void o2hb_init_region_params(struct o2hb_region *reg)
 {
-	reg->hr_slots_per_page = PAGE_CACHE_SIZE >> reg->hr_block_bits;
+	reg->hr_slots_per_page = PAGE_SIZE >> reg->hr_block_bits;
 	reg->hr_timeout_ms = O2HB_REGION_TIMEOUT_MS;
 
 	mlog(ML_HEARTBEAT, "hr_start_block = %llu, hr_blocks = %u\n",

diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index e36d63f..cdeafb4 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c

@@ -212,6 +212,12 @@
 	if (lock->lksb->flags & DLM_LKSB_PUT_LVB)
 		memcpy(res->lvb, lock->lksb->lvb, DLM_LVB_LEN);
 
+	/*
+	 * Move the lock to the tail because it may be the only lock which has
+	 * an invalid lvb.
+	 */
+	list_move_tail(&lock->list, &res->granted);
+
 	status = DLM_NORMAL;
 	*call_ast = 1;
 	goto unlock_exit;
@@ -262,6 +268,7 @@
 				  struct dlm_lock *lock, int flags, int type)
 {
 	enum dlm_status status;
+	u8 old_owner = res->owner;
 
 	mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type,
 	     lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS);
@@ -287,6 +294,19 @@
 		status = DLM_DENIED;
 		goto bail;
 	}
+
+	if (lock->ml.type == type && lock->ml.convert_type == LKM_IVMODE) {
+		mlog(0, "last convert request returned DLM_RECOVERING, but "
+		     "owner has already queued and sent ast to me. res %.*s, "
+		     "(cookie=%u:%llu, type=%d, conv=%d)\n",
+		     res->lockname.len, res->lockname.name,
+		     dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+		     dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
+		     lock->ml.type, lock->ml.convert_type);
+		status = DLM_NORMAL;
+		goto bail;
+	}
+
 	res->state |= DLM_LOCK_RES_IN_PROGRESS;
 	/* move lock to local convert queue */
 	/* do not alter lock refcount.  switching lists. */
@@ -316,11 +336,19 @@
 	spin_lock(&res->spinlock);
 	res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
 	lock->convert_pending = 0;
-	/* if it failed, move it back to granted queue */
+	/* if it failed, move it back to granted queue.
+	 * if master returns DLM_NORMAL and then down before sending ast,
+	 * it may have already been moved to granted queue, reset to
+	 * DLM_RECOVERING and retry convert */
 	if (status != DLM_NORMAL) {
 		if (status != DLM_NOTQUEUED)
 			dlm_error(status);
 		dlm_revert_pending_convert(res, lock);
+	} else if ((res->state & DLM_LOCK_RES_RECOVERING) ||
+			(old_owner != res->owner)) {
+		mlog(0, "res %.*s is in recovering or has been recovered.\n",
+				res->lockname.len, res->lockname.name);
+		status = DLM_RECOVERING;
 	}
 bail:
 	spin_unlock(&res->spinlock);

diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index cd38488..f6b3138 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c

@@ -2083,7 +2083,6 @@
 			dlm_lock_get(lock);
 			if (lock->convert_pending) {
 				/* move converting lock back to granted */
-				BUG_ON(i != DLM_CONVERTING_LIST);
 				mlog(0, "node died with convert pending "
 				     "on %.*s. move back to granted list.\n",
 				     res->lockname.len, res->lockname.name);

diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 03768bb..47b3b2d 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c

@@ -571,8 +571,8 @@
 			    int silent)
 {
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = DLMFS_MAGIC;
 	sb->s_op = &dlmfs_ops;
 	sb->s_root = d_make_root(dlmfs_get_root_inode(sb));

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 7cb38fd..5308841 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c

@@ -770,14 +770,14 @@
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
-	unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
+	unsigned long index = abs_from >> PAGE_SHIFT;
 	handle_t *handle;
 	int ret = 0;
 	unsigned zero_from, zero_to, block_start, block_end;
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
 
 	BUG_ON(abs_from >= abs_to);
-	BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
+	BUG_ON(abs_to > (((u64)index + 1) << PAGE_SHIFT));
 	BUG_ON(abs_from & (inode->i_blkbits - 1));
 
 	handle = ocfs2_zero_start_ordered_transaction(inode, di_bh);
@@ -794,10 +794,10 @@
 	}
 
 	/* Get the offsets within the page that we want to zero */
-	zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
-	zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
+	zero_from = abs_from & (PAGE_SIZE - 1);
+	zero_to = abs_to & (PAGE_SIZE - 1);
 	if (!zero_to)
-		zero_to = PAGE_CACHE_SIZE;
+		zero_to = PAGE_SIZE;
 
 	trace_ocfs2_write_zero_page(
 			(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -851,7 +851,7 @@
 
 out_unlock:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 out_commit_trans:
 	if (handle)
 		ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
@@ -959,7 +959,7 @@
 	BUG_ON(range_start >= range_end);
 
 	while (zero_pos < range_end) {
-		next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
+		next_pos = (zero_pos & PAGE_MASK) + PAGE_SIZE;
 		if (next_pos > range_end)
 			next_pos = range_end;
 		rc = ocfs2_write_zero_page(inode, zero_pos, next_pos, di_bh);
@@ -1381,44 +1381,6 @@
 	return ret;
 }
 
-/*
- * Will look for holes and unwritten extents in the range starting at
- * pos for count bytes (inclusive).
- */
-static int ocfs2_check_range_for_holes(struct inode *inode, loff_t pos,
-				       size_t count)
-{
-	int ret = 0;
-	unsigned int extent_flags;
-	u32 cpos, clusters, extent_len, phys_cpos;
-	struct super_block *sb = inode->i_sb;
-
-	cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits;
-	clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;
-
-	while (clusters) {
-		ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len,
-					 &extent_flags);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out;
-		}
-
-		if (phys_cpos == 0 || (extent_flags & OCFS2_EXT_UNWRITTEN)) {
-			ret = 1;
-			break;
-		}
-
-		if (extent_len > clusters)
-			extent_len = clusters;
-
-		clusters -= extent_len;
-		cpos += extent_len;
-	}
-out:
-	return ret;
-}
-
 static int ocfs2_write_remove_suid(struct inode *inode)
 {
 	int ret;
@@ -2129,18 +2091,12 @@
 
 static int ocfs2_prepare_inode_for_write(struct file *file,
 					 loff_t pos,
-					 size_t count,
-					 int appending,
-					 int *direct_io,
-					 int *has_refcount)
+					 size_t count)
 {
 	int ret = 0, meta_level = 0;
 	struct dentry *dentry = file->f_path.dentry;
 	struct inode *inode = d_inode(dentry);
 	loff_t end;
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	int full_coherency = !(osb->s_mount_opt &
-		OCFS2_MOUNT_COHERENCY_BUFFERED);
 
 	/*
 	 * We start with a read level meta lock and only jump to an ex
@@ -2189,10 +2145,6 @@
 							       pos,
 							       count,
 							       &meta_level);
-			if (has_refcount)
-				*has_refcount = 1;
-			if (direct_io)
-				*direct_io = 0;
 		}
 
 		if (ret < 0) {
@@ -2200,67 +2152,12 @@
 			goto out_unlock;
 		}
 
-		/*
-		 * Skip the O_DIRECT checks if we don't need
-		 * them.
-		 */
-		if (!direct_io || !(*direct_io))
-			break;
-
-		/*
-		 * There's no sane way to do direct writes to an inode
-		 * with inline data.
-		 */
-		if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
-			*direct_io = 0;
-			break;
-		}
-
-		/*
-		 * Allowing concurrent direct writes means
-		 * i_size changes wouldn't be synchronized, so
-		 * one node could wind up truncating another
-		 * nodes writes.
-		 */
-		if (end > i_size_read(inode) && !full_coherency) {
-			*direct_io = 0;
-			break;
-		}
-
-		/*
-		 * Fallback to old way if the feature bit is not set.
-		 */
-		if (end > i_size_read(inode) &&
-				!ocfs2_supports_append_dio(osb)) {
-			*direct_io = 0;
-			break;
-		}
-
-		/*
-		 * We don't fill holes during direct io, so
-		 * check for them here. If any are found, the
-		 * caller will have to retake some cluster
-		 * locks and initiate the io as buffered.
-		 */
-		ret = ocfs2_check_range_for_holes(inode, pos, count);
-		if (ret == 1) {
-			/*
-			 * Fallback to old way if the feature bit is not set.
-			 * Otherwise try dio first and then complete the rest
-			 * request through buffer io.
-			 */
-			if (!ocfs2_supports_append_dio(osb))
-				*direct_io = 0;
-			ret = 0;
-		} else if (ret < 0)
-			mlog_errno(ret);
 		break;
 	}
 
 out_unlock:
 	trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
-					    pos, appending, count,
-					    direct_io, has_refcount);
+					    pos, count);
 
 	if (meta_level >= 0)
 		ocfs2_inode_unlock(inode, meta_level);
@@ -2272,18 +2169,16 @@
 static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
 				    struct iov_iter *from)
 {
-	int direct_io, appending, rw_level;
-	int can_do_direct, has_refcount = 0;
+	int direct_io, rw_level;
 	ssize_t written = 0;
 	ssize_t ret;
-	size_t count = iov_iter_count(from), orig_count;
+	size_t count = iov_iter_count(from);
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	int full_coherency = !(osb->s_mount_opt &
 			       OCFS2_MOUNT_COHERENCY_BUFFERED);
-	int unaligned_dio = 0;
-	int dropped_dio = 0;
+	void *saved_ki_complete = NULL;
 	int append_write = ((iocb->ki_pos + count) >=
 			i_size_read(inode) ? 1 : 0);
 
@@ -2296,12 +2191,10 @@
 	if (count == 0)
 		return 0;
 
-	appending = iocb->ki_flags & IOCB_APPEND ? 1 : 0;
 	direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
 
 	inode_lock(inode);
 
-relock:
 	/*
 	 * Concurrent O_DIRECT writes are allowed with
 	 * mount_option "coherency=buffered".
@@ -2334,7 +2227,6 @@
 		ocfs2_inode_unlock(inode, 1);
 	}
 
-	orig_count = iov_iter_count(from);
 	ret = generic_write_checks(iocb, from);
 	if (ret <= 0) {
 		if (ret)
@@ -2343,41 +2235,18 @@
 	}
 	count = ret;
 
-	can_do_direct = direct_io;
-	ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, appending,
-					    &can_do_direct, &has_refcount);
+	ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
 	}
 
-	if (direct_io && !is_sync_kiocb(iocb))
-		unaligned_dio = ocfs2_is_io_unaligned(inode, count, iocb->ki_pos);
-
-	/*
-	 * We can't complete the direct I/O as requested, fall back to
-	 * buffered I/O.
-	 */
-	if (direct_io && !can_do_direct) {
-		ocfs2_rw_unlock(inode, rw_level);
-
-		rw_level = -1;
-
-		direct_io = 0;
-		iocb->ki_flags &= ~IOCB_DIRECT;
-		iov_iter_reexpand(from, orig_count);
-		dropped_dio = 1;
-		goto relock;
-	}
-
-	if (unaligned_dio) {
+	if (direct_io && !is_sync_kiocb(iocb) &&
+	    ocfs2_is_io_unaligned(inode, count, iocb->ki_pos)) {
 		/*
-		 * Wait on previous unaligned aio to complete before
-		 * proceeding.
+		 * Make it a sync io if it's an unaligned aio.
 		 */
-		mutex_lock(&OCFS2_I(inode)->ip_unaligned_aio);
-		/* Mark the iocb as needing an unlock in ocfs2_dio_end_io */
-		ocfs2_iocb_set_unaligned_aio(iocb);
+		saved_ki_complete = xchg(&iocb->ki_complete, NULL);
 	}
 
 	/* communicate with ocfs2_dio_end_io */
@@ -2398,14 +2267,13 @@
 	 */
 	if ((written == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) {
 		rw_level = -1;
-		unaligned_dio = 0;
 	}
 
 	if (unlikely(written <= 0))
-		goto no_sync;
+		goto out;
 
 	if (((file->f_flags & O_DSYNC) && !direct_io) ||
-	    IS_SYNC(inode) || dropped_dio) {
+	    IS_SYNC(inode)) {
 		ret = filemap_fdatawrite_range(file->f_mapping,
 					       iocb->ki_pos - written,
 					       iocb->ki_pos - 1);
@@ -2424,13 +2292,10 @@
 						      iocb->ki_pos - 1);
 	}
 
-no_sync:
-	if (unaligned_dio && ocfs2_iocb_is_unaligned_aio(iocb)) {
-		ocfs2_iocb_clear_unaligned_aio(iocb);
-		mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio);
-	}
-
 out:
+	if (saved_ki_complete)
+		xchg(&iocb->ki_complete, saved_ki_complete);
+
 	if (rw_level != -1)
 		ocfs2_rw_unlock(inode, rw_level);
 

diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
new file mode 100644
index 0000000..2cabbcf
--- /dev/null
+++ b/fs/ocfs2/filecheck.c

@@ -0,0 +1,606 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * filecheck.c
+ *
+ * Code which implements online file check.
+ *
+ * Copyright (C) 2016 SuSE.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/kmod.h>
+#include <linux/fs.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/sysctl.h>
+#include <cluster/masklog.h>
+
+#include "ocfs2.h"
+#include "ocfs2_fs.h"
+#include "stackglue.h"
+#include "inode.h"
+
+#include "filecheck.h"
+
+
+/* File check error strings,
+ * must correspond with error number in header file.
+ */
+static const char * const ocfs2_filecheck_errs[] = {
+	"SUCCESS",
+	"FAILED",
+	"INPROGRESS",
+	"READONLY",
+	"INJBD",
+	"INVALIDINO",
+	"BLOCKECC",
+	"BLOCKNO",
+	"VALIDFLAG",
+	"GENERATION",
+	"UNSUPPORTED"
+};
+
+static DEFINE_SPINLOCK(ocfs2_filecheck_sysfs_lock);
+static LIST_HEAD(ocfs2_filecheck_sysfs_list);
+
+struct ocfs2_filecheck {
+	struct list_head fc_head;	/* File check entry list head */
+	spinlock_t fc_lock;
+	unsigned int fc_max;	/* Maximum number of entry in list */
+	unsigned int fc_size;	/* Current entry count in list */
+	unsigned int fc_done;	/* Finished entry count in list */
+};
+
+struct ocfs2_filecheck_sysfs_entry {	/* sysfs entry per mounting */
+	struct list_head fs_list;
+	atomic_t fs_count;
+	struct super_block *fs_sb;
+	struct kset *fs_devicekset;
+	struct kset *fs_fcheckkset;
+	struct ocfs2_filecheck *fs_fcheck;
+};
+
+#define OCFS2_FILECHECK_MAXSIZE		100
+#define OCFS2_FILECHECK_MINSIZE		10
+
+/* File check operation type */
+enum {
+	OCFS2_FILECHECK_TYPE_CHK = 0,	/* Check a file(inode) */
+	OCFS2_FILECHECK_TYPE_FIX,	/* Fix a file(inode) */
+	OCFS2_FILECHECK_TYPE_SET = 100	/* Set entry list maximum size */
+};
+
+struct ocfs2_filecheck_entry {
+	struct list_head fe_list;
+	unsigned long fe_ino;
+	unsigned int fe_type;
+	unsigned int fe_done:1;
+	unsigned int fe_status:31;
+};
+
+struct ocfs2_filecheck_args {
+	unsigned int fa_type;
+	union {
+		unsigned long fa_ino;
+		unsigned int fa_len;
+	};
+};
+
+static const char *
+ocfs2_filecheck_error(int errno)
+{
+	if (!errno)
+		return ocfs2_filecheck_errs[errno];
+
+	BUG_ON(errno < OCFS2_FILECHECK_ERR_START ||
+	       errno > OCFS2_FILECHECK_ERR_END);
+	return ocfs2_filecheck_errs[errno - OCFS2_FILECHECK_ERR_START + 1];
+}
+
+static ssize_t ocfs2_filecheck_show(struct kobject *kobj,
+				    struct kobj_attribute *attr,
+				    char *buf);
+static ssize_t ocfs2_filecheck_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t count);
+static struct kobj_attribute ocfs2_attr_filecheck_chk =
+					__ATTR(check, S_IRUSR | S_IWUSR,
+					ocfs2_filecheck_show,
+					ocfs2_filecheck_store);
+static struct kobj_attribute ocfs2_attr_filecheck_fix =
+					__ATTR(fix, S_IRUSR | S_IWUSR,
+					ocfs2_filecheck_show,
+					ocfs2_filecheck_store);
+static struct kobj_attribute ocfs2_attr_filecheck_set =
+					__ATTR(set, S_IRUSR | S_IWUSR,
+					ocfs2_filecheck_show,
+					ocfs2_filecheck_store);
+
+static int ocfs2_filecheck_sysfs_wait(atomic_t *p)
+{
+	schedule();
+	return 0;
+}
+
+static void
+ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry)
+{
+	struct ocfs2_filecheck_entry *p;
+
+	if (!atomic_dec_and_test(&entry->fs_count))
+		wait_on_atomic_t(&entry->fs_count, ocfs2_filecheck_sysfs_wait,
+				 TASK_UNINTERRUPTIBLE);
+
+	spin_lock(&entry->fs_fcheck->fc_lock);
+	while (!list_empty(&entry->fs_fcheck->fc_head)) {
+		p = list_first_entry(&entry->fs_fcheck->fc_head,
+				     struct ocfs2_filecheck_entry, fe_list);
+		list_del(&p->fe_list);
+		BUG_ON(!p->fe_done); /* To free a undone file check entry */
+		kfree(p);
+	}
+	spin_unlock(&entry->fs_fcheck->fc_lock);
+
+	kset_unregister(entry->fs_fcheckkset);
+	kset_unregister(entry->fs_devicekset);
+	kfree(entry->fs_fcheck);
+	kfree(entry);
+}
+
+static void
+ocfs2_filecheck_sysfs_add(struct ocfs2_filecheck_sysfs_entry *entry)
+{
+	spin_lock(&ocfs2_filecheck_sysfs_lock);
+	list_add_tail(&entry->fs_list, &ocfs2_filecheck_sysfs_list);
+	spin_unlock(&ocfs2_filecheck_sysfs_lock);
+}
+
+static int ocfs2_filecheck_sysfs_del(const char *devname)
+{
+	struct ocfs2_filecheck_sysfs_entry *p;
+
+	spin_lock(&ocfs2_filecheck_sysfs_lock);
+	list_for_each_entry(p, &ocfs2_filecheck_sysfs_list, fs_list) {
+		if (!strcmp(p->fs_sb->s_id, devname)) {
+			list_del(&p->fs_list);
+			spin_unlock(&ocfs2_filecheck_sysfs_lock);
+			ocfs2_filecheck_sysfs_free(p);
+			return 0;
+		}
+	}
+	spin_unlock(&ocfs2_filecheck_sysfs_lock);
+	return 1;
+}
+
+static void
+ocfs2_filecheck_sysfs_put(struct ocfs2_filecheck_sysfs_entry *entry)
+{
+	if (atomic_dec_and_test(&entry->fs_count))
+		wake_up_atomic_t(&entry->fs_count);
+}
+
+static struct ocfs2_filecheck_sysfs_entry *
+ocfs2_filecheck_sysfs_get(const char *devname)
+{
+	struct ocfs2_filecheck_sysfs_entry *p = NULL;
+
+	spin_lock(&ocfs2_filecheck_sysfs_lock);
+	list_for_each_entry(p, &ocfs2_filecheck_sysfs_list, fs_list) {
+		if (!strcmp(p->fs_sb->s_id, devname)) {
+			atomic_inc(&p->fs_count);
+			spin_unlock(&ocfs2_filecheck_sysfs_lock);
+			return p;
+		}
+	}
+	spin_unlock(&ocfs2_filecheck_sysfs_lock);
+	return NULL;
+}
+
+int ocfs2_filecheck_create_sysfs(struct super_block *sb)
+{
+	int ret = 0;
+	struct kset *device_kset = NULL;
+	struct kset *fcheck_kset = NULL;
+	struct ocfs2_filecheck *fcheck = NULL;
+	struct ocfs2_filecheck_sysfs_entry *entry = NULL;
+	struct attribute **attrs = NULL;
+	struct attribute_group attrgp;
+
+	if (!ocfs2_kset)
+		return -ENOMEM;
+
+	attrs = kmalloc(sizeof(struct attribute *) * 4, GFP_NOFS);
+	if (!attrs) {
+		ret = -ENOMEM;
+		goto error;
+	} else {
+		attrs[0] = &ocfs2_attr_filecheck_chk.attr;
+		attrs[1] = &ocfs2_attr_filecheck_fix.attr;
+		attrs[2] = &ocfs2_attr_filecheck_set.attr;
+		attrs[3] = NULL;
+		memset(&attrgp, 0, sizeof(attrgp));
+		attrgp.attrs = attrs;
+	}
+
+	fcheck = kmalloc(sizeof(struct ocfs2_filecheck), GFP_NOFS);
+	if (!fcheck) {
+		ret = -ENOMEM;
+		goto error;
+	} else {
+		INIT_LIST_HEAD(&fcheck->fc_head);
+		spin_lock_init(&fcheck->fc_lock);
+		fcheck->fc_max = OCFS2_FILECHECK_MINSIZE;
+		fcheck->fc_size = 0;
+		fcheck->fc_done = 0;
+	}
+
+	if (strlen(sb->s_id) <= 0) {
+		mlog(ML_ERROR,
+		"Cannot get device basename when create filecheck sysfs\n");
+		ret = -ENODEV;
+		goto error;
+	}
+
+	device_kset = kset_create_and_add(sb->s_id, NULL, &ocfs2_kset->kobj);
+	if (!device_kset) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	fcheck_kset = kset_create_and_add("filecheck", NULL,
+					  &device_kset->kobj);
+	if (!fcheck_kset) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	ret = sysfs_create_group(&fcheck_kset->kobj, &attrgp);
+	if (ret)
+		goto error;
+
+	entry = kmalloc(sizeof(struct ocfs2_filecheck_sysfs_entry), GFP_NOFS);
+	if (!entry) {
+		ret = -ENOMEM;
+		goto error;
+	} else {
+		atomic_set(&entry->fs_count, 1);
+		entry->fs_sb = sb;
+		entry->fs_devicekset = device_kset;
+		entry->fs_fcheckkset = fcheck_kset;
+		entry->fs_fcheck = fcheck;
+		ocfs2_filecheck_sysfs_add(entry);
+	}
+
+	kfree(attrs);
+	return 0;
+
+error:
+	kfree(attrs);
+	kfree(entry);
+	kfree(fcheck);
+	kset_unregister(fcheck_kset);
+	kset_unregister(device_kset);
+	return ret;
+}
+
+int ocfs2_filecheck_remove_sysfs(struct super_block *sb)
+{
+	return ocfs2_filecheck_sysfs_del(sb->s_id);
+}
+
+static int
+ocfs2_filecheck_erase_entries(struct ocfs2_filecheck_sysfs_entry *ent,
+			      unsigned int count);
+static int
+ocfs2_filecheck_adjust_max(struct ocfs2_filecheck_sysfs_entry *ent,
+			   unsigned int len)
+{
+	int ret;
+
+	if ((len < OCFS2_FILECHECK_MINSIZE) || (len > OCFS2_FILECHECK_MAXSIZE))
+		return -EINVAL;
+
+	spin_lock(&ent->fs_fcheck->fc_lock);
+	if (len < (ent->fs_fcheck->fc_size - ent->fs_fcheck->fc_done)) {
+		mlog(ML_ERROR,
+		"Cannot set online file check maximum entry number "
+		"to %u due to too many pending entries(%u)\n",
+		len, ent->fs_fcheck->fc_size - ent->fs_fcheck->fc_done);
+		ret = -EBUSY;
+	} else {
+		if (len < ent->fs_fcheck->fc_size)
+			BUG_ON(!ocfs2_filecheck_erase_entries(ent,
+				ent->fs_fcheck->fc_size - len));
+
+		ent->fs_fcheck->fc_max = len;
+		ret = 0;
+	}
+	spin_unlock(&ent->fs_fcheck->fc_lock);
+
+	return ret;
+}
+
+#define OCFS2_FILECHECK_ARGS_LEN	24
+static int
+ocfs2_filecheck_args_get_long(const char *buf, size_t count,
+			      unsigned long *val)
+{
+	char buffer[OCFS2_FILECHECK_ARGS_LEN];
+
+	memcpy(buffer, buf, count);
+	buffer[count] = '\0';
+
+	if (kstrtoul(buffer, 0, val))
+		return 1;
+
+	return 0;
+}
+
+static int
+ocfs2_filecheck_type_parse(const char *name, unsigned int *type)
+{
+	if (!strncmp(name, "fix", 4))
+		*type = OCFS2_FILECHECK_TYPE_FIX;
+	else if (!strncmp(name, "check", 6))
+		*type = OCFS2_FILECHECK_TYPE_CHK;
+	else if (!strncmp(name, "set", 4))
+		*type = OCFS2_FILECHECK_TYPE_SET;
+	else
+		return 1;
+
+	return 0;
+}
+
+static int
+ocfs2_filecheck_args_parse(const char *name, const char *buf, size_t count,
+			   struct ocfs2_filecheck_args *args)
+{
+	unsigned long val = 0;
+	unsigned int type;
+
+	/* too short/long args length */
+	if ((count < 1) || (count >= OCFS2_FILECHECK_ARGS_LEN))
+		return 1;
+
+	if (ocfs2_filecheck_type_parse(name, &type))
+		return 1;
+	if (ocfs2_filecheck_args_get_long(buf, count, &val))
+		return 1;
+
+	if (val <= 0)
+		return 1;
+
+	args->fa_type = type;
+	if (type == OCFS2_FILECHECK_TYPE_SET)
+		args->fa_len = (unsigned int)val;
+	else
+		args->fa_ino = val;
+
+	return 0;
+}
+
+static ssize_t ocfs2_filecheck_show(struct kobject *kobj,
+				    struct kobj_attribute *attr,
+				    char *buf)
+{
+
+	ssize_t ret = 0, total = 0, remain = PAGE_SIZE;
+	unsigned int type;
+	struct ocfs2_filecheck_entry *p;
+	struct ocfs2_filecheck_sysfs_entry *ent;
+
+	if (ocfs2_filecheck_type_parse(attr->attr.name, &type))
+		return -EINVAL;
+
+	ent = ocfs2_filecheck_sysfs_get(kobj->parent->name);
+	if (!ent) {
+		mlog(ML_ERROR,
+		"Cannot get the corresponding entry via device basename %s\n",
+		kobj->name);
+		return -ENODEV;
+	}
+
+	if (type == OCFS2_FILECHECK_TYPE_SET) {
+		spin_lock(&ent->fs_fcheck->fc_lock);
+		total = snprintf(buf, remain, "%u\n", ent->fs_fcheck->fc_max);
+		spin_unlock(&ent->fs_fcheck->fc_lock);
+		goto exit;
+	}
+
+	ret = snprintf(buf, remain, "INO\t\tDONE\tERROR\n");
+	total += ret;
+	remain -= ret;
+	spin_lock(&ent->fs_fcheck->fc_lock);
+	list_for_each_entry(p, &ent->fs_fcheck->fc_head, fe_list) {
+		if (p->fe_type != type)
+			continue;
+
+		ret = snprintf(buf + total, remain, "%lu\t\t%u\t%s\n",
+			       p->fe_ino, p->fe_done,
+			       ocfs2_filecheck_error(p->fe_status));
+		if (ret < 0) {
+			total = ret;
+			break;
+		}
+		if (ret == remain) {
+			/* snprintf() didn't fit */
+			total = -E2BIG;
+			break;
+		}
+		total += ret;
+		remain -= ret;
+	}
+	spin_unlock(&ent->fs_fcheck->fc_lock);
+
+exit:
+	ocfs2_filecheck_sysfs_put(ent);
+	return total;
+}
+
+static int
+ocfs2_filecheck_erase_entry(struct ocfs2_filecheck_sysfs_entry *ent)
+{
+	struct ocfs2_filecheck_entry *p;
+
+	list_for_each_entry(p, &ent->fs_fcheck->fc_head, fe_list) {
+		if (p->fe_done) {
+			list_del(&p->fe_list);
+			kfree(p);
+			ent->fs_fcheck->fc_size--;
+			ent->fs_fcheck->fc_done--;
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int
+ocfs2_filecheck_erase_entries(struct ocfs2_filecheck_sysfs_entry *ent,
+			      unsigned int count)
+{
+	unsigned int i = 0;
+	unsigned int ret = 0;
+
+	while (i++ < count) {
+		if (ocfs2_filecheck_erase_entry(ent))
+			ret++;
+		else
+			break;
+	}
+
+	return (ret == count ? 1 : 0);
+}
+
+static void
+ocfs2_filecheck_done_entry(struct ocfs2_filecheck_sysfs_entry *ent,
+			   struct ocfs2_filecheck_entry *entry)
+{
+	entry->fe_done = 1;
+	spin_lock(&ent->fs_fcheck->fc_lock);
+	ent->fs_fcheck->fc_done++;
+	spin_unlock(&ent->fs_fcheck->fc_lock);
+}
+
+static unsigned int
+ocfs2_filecheck_handle(struct super_block *sb,
+		       unsigned long ino, unsigned int flags)
+{
+	unsigned int ret = OCFS2_FILECHECK_ERR_SUCCESS;
+	struct inode *inode = NULL;
+	int rc;
+
+	inode = ocfs2_iget(OCFS2_SB(sb), ino, flags, 0);
+	if (IS_ERR(inode)) {
+		rc = (int)(-(long)inode);
+		if (rc >= OCFS2_FILECHECK_ERR_START &&
+		    rc < OCFS2_FILECHECK_ERR_END)
+			ret = rc;
+		else
+			ret = OCFS2_FILECHECK_ERR_FAILED;
+	} else
+		iput(inode);
+
+	return ret;
+}
+
+static void
+ocfs2_filecheck_handle_entry(struct ocfs2_filecheck_sysfs_entry *ent,
+			     struct ocfs2_filecheck_entry *entry)
+{
+	if (entry->fe_type == OCFS2_FILECHECK_TYPE_CHK)
+		entry->fe_status = ocfs2_filecheck_handle(ent->fs_sb,
+				entry->fe_ino, OCFS2_FI_FLAG_FILECHECK_CHK);
+	else if (entry->fe_type == OCFS2_FILECHECK_TYPE_FIX)
+		entry->fe_status = ocfs2_filecheck_handle(ent->fs_sb,
+				entry->fe_ino, OCFS2_FI_FLAG_FILECHECK_FIX);
+	else
+		entry->fe_status = OCFS2_FILECHECK_ERR_UNSUPPORTED;
+
+	ocfs2_filecheck_done_entry(ent, entry);
+}
+
+static ssize_t ocfs2_filecheck_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct ocfs2_filecheck_args args;
+	struct ocfs2_filecheck_entry *entry;
+	struct ocfs2_filecheck_sysfs_entry *ent;
+	ssize_t ret = 0;
+
+	if (count == 0)
+		return count;
+
+	if (ocfs2_filecheck_args_parse(attr->attr.name, buf, count, &args)) {
+		mlog(ML_ERROR, "Invalid arguments for online file check\n");
+		return -EINVAL;
+	}
+
+	ent = ocfs2_filecheck_sysfs_get(kobj->parent->name);
+	if (!ent) {
+		mlog(ML_ERROR,
+		"Cannot get the corresponding entry via device basename %s\n",
+		kobj->parent->name);
+		return -ENODEV;
+	}
+
+	if (args.fa_type == OCFS2_FILECHECK_TYPE_SET) {
+		ret = ocfs2_filecheck_adjust_max(ent, args.fa_len);
+		goto exit;
+	}
+
+	entry = kmalloc(sizeof(struct ocfs2_filecheck_entry), GFP_NOFS);
+	if (!entry) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	spin_lock(&ent->fs_fcheck->fc_lock);
+	if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) &&
+	    (ent->fs_fcheck->fc_done == 0)) {
+		mlog(ML_ERROR,
+		"Cannot do more file check "
+		"since file check queue(%u) is full now\n",
+		ent->fs_fcheck->fc_max);
+		ret = -EBUSY;
+		kfree(entry);
+	} else {
+		if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) &&
+		    (ent->fs_fcheck->fc_done > 0)) {
+			/* Delete the oldest entry which was done,
+			 * make sure the entry size in list does
+			 * not exceed maximum value
+			 */
+			BUG_ON(!ocfs2_filecheck_erase_entry(ent));
+		}
+
+		entry->fe_ino = args.fa_ino;
+		entry->fe_type = args.fa_type;
+		entry->fe_done = 0;
+		entry->fe_status = OCFS2_FILECHECK_ERR_INPROGRESS;
+		list_add_tail(&entry->fe_list, &ent->fs_fcheck->fc_head);
+		ent->fs_fcheck->fc_size++;
+	}
+	spin_unlock(&ent->fs_fcheck->fc_lock);
+
+	if (!ret)
+		ocfs2_filecheck_handle_entry(ent, entry);
+
+exit:
+	ocfs2_filecheck_sysfs_put(ent);
+	return (!ret ? count : ret);
+}

diff --git a/fs/ocfs2/filecheck.h b/fs/ocfs2/filecheck.h
new file mode 100644
index 0000000..e5cd002
--- /dev/null
+++ b/fs/ocfs2/filecheck.h

@@ -0,0 +1,49 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * filecheck.h
+ *
+ * Online file check.
+ *
+ * Copyright (C) 2016 SuSE.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+
+#ifndef FILECHECK_H
+#define FILECHECK_H
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+
+/* File check errno */
+enum {
+	OCFS2_FILECHECK_ERR_SUCCESS = 0,	/* Success */
+	OCFS2_FILECHECK_ERR_FAILED = 1000,	/* Other failure */
+	OCFS2_FILECHECK_ERR_INPROGRESS,		/* In progress */
+	OCFS2_FILECHECK_ERR_READONLY,		/* Read only */
+	OCFS2_FILECHECK_ERR_INJBD,		/* Buffer in jbd */
+	OCFS2_FILECHECK_ERR_INVALIDINO,		/* Invalid ino */
+	OCFS2_FILECHECK_ERR_BLOCKECC,		/* Block ecc */
+	OCFS2_FILECHECK_ERR_BLOCKNO,		/* Block number */
+	OCFS2_FILECHECK_ERR_VALIDFLAG,		/* Inode valid flag */
+	OCFS2_FILECHECK_ERR_GENERATION,		/* Inode generation */
+	OCFS2_FILECHECK_ERR_UNSUPPORTED		/* Unsupported */
+};
+
+#define OCFS2_FILECHECK_ERR_START	OCFS2_FILECHECK_ERR_FAILED
+#define OCFS2_FILECHECK_ERR_END		OCFS2_FILECHECK_ERR_UNSUPPORTED
+
+int ocfs2_filecheck_create_sysfs(struct super_block *sb);
+int ocfs2_filecheck_remove_sysfs(struct super_block *sb);
+
+#endif  /* FILECHECK_H */

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 3629444..12f4a9e 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c

@@ -53,6 +53,7 @@
 #include "xattr.h"
 #include "refcounttree.h"
 #include "ocfs2_trace.h"
+#include "filecheck.h"
 
 #include "buffer_head_io.h"
 
@@ -74,6 +75,14 @@
 				    struct inode *inode,
 				    struct buffer_head *fe_bh);
 
+static int ocfs2_filecheck_read_inode_block_full(struct inode *inode,
+						 struct buffer_head **bh,
+						 int flags, int type);
+static int ocfs2_filecheck_validate_inode_block(struct super_block *sb,
+						struct buffer_head *bh);
+static int ocfs2_filecheck_repair_inode_block(struct super_block *sb,
+					      struct buffer_head *bh);
+
 void ocfs2_set_inode_flags(struct inode *inode)
 {
 	unsigned int flags = OCFS2_I(inode)->ip_attr;
@@ -127,6 +136,7 @@
 struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
 			 int sysfile_type)
 {
+	int rc = 0;
 	struct inode *inode = NULL;
 	struct super_block *sb = osb->sb;
 	struct ocfs2_find_inode_args args;
@@ -161,12 +171,17 @@
 	}
 	trace_ocfs2_iget5_locked(inode->i_state);
 	if (inode->i_state & I_NEW) {
-		ocfs2_read_locked_inode(inode, &args);
+		rc = ocfs2_read_locked_inode(inode, &args);
 		unlock_new_inode(inode);
 	}
 	if (is_bad_inode(inode)) {
 		iput(inode);
-		inode = ERR_PTR(-ESTALE);
+		if ((flags & OCFS2_FI_FLAG_FILECHECK_CHK) ||
+		    (flags & OCFS2_FI_FLAG_FILECHECK_FIX))
+			/* Return OCFS2_FILECHECK_ERR_XXX related errno */
+			inode = ERR_PTR(rc);
+		else
+			inode = ERR_PTR(-ESTALE);
 		goto bail;
 	}
 
@@ -410,7 +425,7 @@
 	struct ocfs2_super *osb;
 	struct ocfs2_dinode *fe;
 	struct buffer_head *bh = NULL;
-	int status, can_lock;
+	int status, can_lock, lock_level = 0;
 	u32 generation = 0;
 
 	status = -EINVAL;
@@ -478,7 +493,7 @@
 			mlog_errno(status);
 			return status;
 		}
-		status = ocfs2_inode_lock(inode, NULL, 0);
+		status = ocfs2_inode_lock(inode, NULL, lock_level);
 		if (status) {
 			make_bad_inode(inode);
 			mlog_errno(status);
@@ -495,16 +510,32 @@
 	}
 
 	if (can_lock) {
-		status = ocfs2_read_inode_block_full(inode, &bh,
-						     OCFS2_BH_IGNORE_CACHE);
+		if (args->fi_flags & OCFS2_FI_FLAG_FILECHECK_CHK)
+			status = ocfs2_filecheck_read_inode_block_full(inode,
+						&bh, OCFS2_BH_IGNORE_CACHE, 0);
+		else if (args->fi_flags & OCFS2_FI_FLAG_FILECHECK_FIX)
+			status = ocfs2_filecheck_read_inode_block_full(inode,
+						&bh, OCFS2_BH_IGNORE_CACHE, 1);
+		else
+			status = ocfs2_read_inode_block_full(inode,
+						&bh, OCFS2_BH_IGNORE_CACHE);
 	} else {
 		status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
 		/*
 		 * If buffer is in jbd, then its checksum may not have been
 		 * computed as yet.
 		 */
-		if (!status && !buffer_jbd(bh))
-			status = ocfs2_validate_inode_block(osb->sb, bh);
+		if (!status && !buffer_jbd(bh)) {
+			if (args->fi_flags & OCFS2_FI_FLAG_FILECHECK_CHK)
+				status = ocfs2_filecheck_validate_inode_block(
+								osb->sb, bh);
+			else if (args->fi_flags & OCFS2_FI_FLAG_FILECHECK_FIX)
+				status = ocfs2_filecheck_repair_inode_block(
+								osb->sb, bh);
+			else
+				status = ocfs2_validate_inode_block(
+								osb->sb, bh);
+		}
 	}
 	if (status < 0) {
 		mlog_errno(status);
@@ -532,11 +563,24 @@
 
 	BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));
 
+	if (buffer_dirty(bh) && !buffer_jbd(bh)) {
+		if (can_lock) {
+			ocfs2_inode_unlock(inode, lock_level);
+			lock_level = 1;
+			ocfs2_inode_lock(inode, NULL, lock_level);
+		}
+		status = ocfs2_write_block(osb, bh, INODE_CACHE(inode));
+		if (status < 0) {
+			mlog_errno(status);
+			goto bail;
+		}
+	}
+
 	status = 0;
 
 bail:
 	if (can_lock)
-		ocfs2_inode_unlock(inode, 0);
+		ocfs2_inode_unlock(inode, lock_level);
 
 	if (status < 0)
 		make_bad_inode(inode);
@@ -1126,6 +1170,9 @@
 	mlog_bug_on_msg(!list_empty(&oi->ip_io_markers),
 			"Clear inode of %llu, inode has io markers\n",
 			(unsigned long long)oi->ip_blkno);
+	mlog_bug_on_msg(!list_empty(&oi->ip_unwritten_list),
+			"Clear inode of %llu, inode has unwritten extents\n",
+			(unsigned long long)oi->ip_blkno);
 
 	ocfs2_extent_map_trunc(inode, 0);
 
@@ -1397,6 +1444,169 @@
 	return rc;
 }
 
+static int ocfs2_filecheck_validate_inode_block(struct super_block *sb,
+						struct buffer_head *bh)
+{
+	int rc = 0;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
+
+	trace_ocfs2_filecheck_validate_inode_block(
+		(unsigned long long)bh->b_blocknr);
+
+	BUG_ON(!buffer_uptodate(bh));
+
+	/*
+	 * Call ocfs2_validate_meta_ecc() first since it has ecc repair
+	 * function, but we should not return error immediately when ecc
+	 * validation fails, because the reason is quite likely the invalid
+	 * inode number inputed.
+	 */
+	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check);
+	if (rc) {
+		mlog(ML_ERROR,
+		     "Filecheck: checksum failed for dinode %llu\n",
+		     (unsigned long long)bh->b_blocknr);
+		rc = -OCFS2_FILECHECK_ERR_BLOCKECC;
+	}
+
+	if (!OCFS2_IS_VALID_DINODE(di)) {
+		mlog(ML_ERROR,
+		     "Filecheck: invalid dinode #%llu: signature = %.*s\n",
+		     (unsigned long long)bh->b_blocknr, 7, di->i_signature);
+		rc = -OCFS2_FILECHECK_ERR_INVALIDINO;
+		goto bail;
+	} else if (rc)
+		goto bail;
+
+	if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) {
+		mlog(ML_ERROR,
+		     "Filecheck: invalid dinode #%llu: i_blkno is %llu\n",
+		     (unsigned long long)bh->b_blocknr,
+		     (unsigned long long)le64_to_cpu(di->i_blkno));
+		rc = -OCFS2_FILECHECK_ERR_BLOCKNO;
+		goto bail;
+	}
+
+	if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
+		mlog(ML_ERROR,
+		     "Filecheck: invalid dinode #%llu: OCFS2_VALID_FL "
+		     "not set\n",
+		     (unsigned long long)bh->b_blocknr);
+		rc = -OCFS2_FILECHECK_ERR_VALIDFLAG;
+		goto bail;
+	}
+
+	if (le32_to_cpu(di->i_fs_generation) !=
+	    OCFS2_SB(sb)->fs_generation) {
+		mlog(ML_ERROR,
+		     "Filecheck: invalid dinode #%llu: fs_generation is %u\n",
+		     (unsigned long long)bh->b_blocknr,
+		     le32_to_cpu(di->i_fs_generation));
+		rc = -OCFS2_FILECHECK_ERR_GENERATION;
+		goto bail;
+	}
+
+bail:
+	return rc;
+}
+
+static int ocfs2_filecheck_repair_inode_block(struct super_block *sb,
+					      struct buffer_head *bh)
+{
+	int changed = 0;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
+
+	if (!ocfs2_filecheck_validate_inode_block(sb, bh))
+		return 0;
+
+	trace_ocfs2_filecheck_repair_inode_block(
+		(unsigned long long)bh->b_blocknr);
+
+	if (ocfs2_is_hard_readonly(OCFS2_SB(sb)) ||
+	    ocfs2_is_soft_readonly(OCFS2_SB(sb))) {
+		mlog(ML_ERROR,
+		     "Filecheck: cannot repair dinode #%llu "
+		     "on readonly filesystem\n",
+		     (unsigned long long)bh->b_blocknr);
+		return -OCFS2_FILECHECK_ERR_READONLY;
+	}
+
+	if (buffer_jbd(bh)) {
+		mlog(ML_ERROR,
+		     "Filecheck: cannot repair dinode #%llu, "
+		     "its buffer is in jbd\n",
+		     (unsigned long long)bh->b_blocknr);
+		return -OCFS2_FILECHECK_ERR_INJBD;
+	}
+
+	if (!OCFS2_IS_VALID_DINODE(di)) {
+		/* Cannot fix invalid inode block */
+		return -OCFS2_FILECHECK_ERR_INVALIDINO;
+	}
+
+	if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
+		/* Cannot just add VALID_FL flag back as a fix,
+		 * need more things to check here.
+		 */
+		return -OCFS2_FILECHECK_ERR_VALIDFLAG;
+	}
+
+	if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) {
+		di->i_blkno = cpu_to_le64(bh->b_blocknr);
+		changed = 1;
+		mlog(ML_ERROR,
+		     "Filecheck: reset dinode #%llu: i_blkno to %llu\n",
+		     (unsigned long long)bh->b_blocknr,
+		     (unsigned long long)le64_to_cpu(di->i_blkno));
+	}
+
+	if (le32_to_cpu(di->i_fs_generation) !=
+	    OCFS2_SB(sb)->fs_generation) {
+		di->i_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
+		changed = 1;
+		mlog(ML_ERROR,
+		     "Filecheck: reset dinode #%llu: fs_generation to %u\n",
+		     (unsigned long long)bh->b_blocknr,
+		     le32_to_cpu(di->i_fs_generation));
+	}
+
+	if (changed || ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check)) {
+		ocfs2_compute_meta_ecc(sb, bh->b_data, &di->i_check);
+		mark_buffer_dirty(bh);
+		mlog(ML_ERROR,
+		     "Filecheck: reset dinode #%llu: compute meta ecc\n",
+		     (unsigned long long)bh->b_blocknr);
+	}
+
+	return 0;
+}
+
+static int
+ocfs2_filecheck_read_inode_block_full(struct inode *inode,
+				      struct buffer_head **bh,
+				      int flags, int type)
+{
+	int rc;
+	struct buffer_head *tmp = *bh;
+
+	if (!type) /* Check inode block */
+		rc = ocfs2_read_blocks(INODE_CACHE(inode),
+				OCFS2_I(inode)->ip_blkno,
+				1, &tmp, flags,
+				ocfs2_filecheck_validate_inode_block);
+	else /* Repair inode block */
+		rc = ocfs2_read_blocks(INODE_CACHE(inode),
+				OCFS2_I(inode)->ip_blkno,
+				1, &tmp, flags,
+				ocfs2_filecheck_repair_inode_block);
+
+	/* If ocfs2_read_blocks() got us a new bh, pass it up. */
+	if (!rc && !*bh)
+		*bh = tmp;
+
+	return rc;
+}
+
 int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
 				int flags)
 {

diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index aac8b86..d8f3fc8 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h

@@ -43,9 +43,6 @@
 	/* protects extended attribute changes on this inode */
 	struct rw_semaphore		ip_xattr_sem;
 
-	/* Number of outstanding AIO's which are not page aligned */
-	struct mutex			ip_unaligned_aio;
-
 	/* These fields are protected by ip_lock */
 	spinlock_t			ip_lock;
 	u32				ip_open_count;
@@ -57,6 +54,9 @@
 	u32				ip_flags; /* see below */
 	u32				ip_attr; /* inode attributes */
 
+	/* Record unwritten extents during direct io. */
+	struct list_head		ip_unwritten_list;
+
 	/* protected by recovery_lock. */
 	struct inode			*ip_next_orphan;
 
@@ -139,6 +139,9 @@
 /* Flags for ocfs2_iget() */
 #define OCFS2_FI_FLAG_SYSFILE		0x1
 #define OCFS2_FI_FLAG_ORPHAN_RECOVERY	0x2
+#define OCFS2_FI_FLAG_FILECHECK_CHK	0x4
+#define OCFS2_FI_FLAG_FILECHECK_FIX	0x8
+
 struct inode *ocfs2_ilookup(struct super_block *sb, u64 feoff);
 struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags,
 			 int sysfile_type);

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 61b833b..e607419 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c

@@ -231,7 +231,7 @@
 	/* At this point, we know that no more recovery threads can be
 	 * launched, so wait for any recovery completion work to
 	 * complete. */
-	flush_workqueue(ocfs2_wq);
+	flush_workqueue(osb->ocfs2_wq);
 
 	/*
 	 * Now that recovery is shut down, and the osb is about to be
@@ -1326,7 +1326,7 @@
 
 	spin_lock(&journal->j_lock);
 	list_add_tail(&item->lri_list, &journal->j_la_cleanups);
-	queue_work(ocfs2_wq, &journal->j_recovery_work);
+	queue_work(journal->j_osb->ocfs2_wq, &journal->j_recovery_work);
 	spin_unlock(&journal->j_lock);
 }
 
@@ -1968,7 +1968,7 @@
 	mutex_lock(&os->os_lock);
 	ocfs2_queue_orphan_scan(osb);
 	if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
-		queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
+		queue_delayed_work(osb->ocfs2_wq, &os->os_orphan_scan_work,
 				      ocfs2_orphan_scan_timeout());
 	mutex_unlock(&os->os_lock);
 }
@@ -2008,7 +2008,7 @@
 		atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
 	else {
 		atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
-		queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
+		queue_delayed_work(osb->ocfs2_wq, &os->os_orphan_scan_work,
 				   ocfs2_orphan_scan_timeout());
 	}
 }

diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 7d62c43..fe0d1f9 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c

@@ -386,7 +386,7 @@
 	struct ocfs2_dinode *alloc = NULL;
 
 	cancel_delayed_work(&osb->la_enable_wq);
-	flush_workqueue(ocfs2_wq);
+	flush_workqueue(osb->ocfs2_wq);
 
 	if (osb->local_alloc_state == OCFS2_LA_UNUSED)
 		goto out;
@@ -1085,7 +1085,7 @@
 		} else {
 			osb->local_alloc_state = OCFS2_LA_DISABLED;
 		}
-		queue_delayed_work(ocfs2_wq, &osb->la_enable_wq,
+		queue_delayed_work(osb->ocfs2_wq, &osb->la_enable_wq,
 				   OCFS2_LA_ENABLE_INTERVAL);
 		goto out_unlock;
 	}

diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 77ebc2b..71545ad 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c

@@ -65,13 +65,13 @@
 	struct inode *inode = file_inode(file);
 	struct address_space *mapping = inode->i_mapping;
 	loff_t pos = page_offset(page);
-	unsigned int len = PAGE_CACHE_SIZE;
+	unsigned int len = PAGE_SIZE;
 	pgoff_t last_index;
 	struct page *locked_page = NULL;
 	void *fsdata;
 	loff_t size = i_size_read(inode);
 
-	last_index = (size - 1) >> PAGE_CACHE_SHIFT;
+	last_index = (size - 1) >> PAGE_SHIFT;
 
 	/*
 	 * There are cases that lead to the page no longer bebongs to the
@@ -102,10 +102,10 @@
 	 * because the "write" would invalidate their data.
 	 */
 	if (page->index == last_index)
-		len = ((size - 1) & ~PAGE_CACHE_MASK) + 1;
+		len = ((size - 1) & ~PAGE_MASK) + 1;
 
-	ret = ocfs2_write_begin_nolock(file, mapping, pos, len, 0, &locked_page,
-				       &fsdata, di_bh, page);
+	ret = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP,
+				       &locked_page, &fsdata, di_bh, page);
 	if (ret) {
 		if (ret != -ENOSPC)
 			mlog_errno(ret);

diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 7a01262..e63af7d 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h

@@ -464,6 +464,14 @@
 	struct ocfs2_refcount_tree *osb_ref_tree_lru;
 
 	struct mutex system_file_mutex;
+
+	/*
+	 * OCFS2 needs to schedule several different types of work which
+	 * require cluster locking, disk I/O, recovery waits, etc. Since these
+	 * types of work tend to be heavy we avoid using the kernel events
+	 * workqueue and schedule on our own.
+	 */
+	struct workqueue_struct *ocfs2_wq;
 };
 
 #define OCFS2_SB(sb)	    ((struct ocfs2_super *)(sb)->s_fs_info)
@@ -814,10 +822,10 @@
 	u32 clusters = pg_index;
 	unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
 
-	if (unlikely(PAGE_CACHE_SHIFT > cbits))
-		clusters = pg_index << (PAGE_CACHE_SHIFT - cbits);
-	else if (PAGE_CACHE_SHIFT < cbits)
-		clusters = pg_index >> (cbits - PAGE_CACHE_SHIFT);
+	if (unlikely(PAGE_SHIFT > cbits))
+		clusters = pg_index << (PAGE_SHIFT - cbits);
+	else if (PAGE_SHIFT < cbits)
+		clusters = pg_index >> (cbits - PAGE_SHIFT);
 
 	return clusters;
 }
@@ -831,10 +839,10 @@
 	unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
         pgoff_t index = clusters;
 
-	if (PAGE_CACHE_SHIFT > cbits) {
-		index = (pgoff_t)clusters >> (PAGE_CACHE_SHIFT - cbits);
-	} else if (PAGE_CACHE_SHIFT < cbits) {
-		index = (pgoff_t)clusters << (cbits - PAGE_CACHE_SHIFT);
+	if (PAGE_SHIFT > cbits) {
+		index = (pgoff_t)clusters >> (PAGE_SHIFT - cbits);
+	} else if (PAGE_SHIFT < cbits) {
+		index = (pgoff_t)clusters << (cbits - PAGE_SHIFT);
 	}
 
 	return index;
@@ -845,8 +853,8 @@
 	unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
 	unsigned int pages_per_cluster = 1;
 
-	if (PAGE_CACHE_SHIFT < cbits)
-		pages_per_cluster = 1 << (cbits - PAGE_CACHE_SHIFT);
+	if (PAGE_SHIFT < cbits)
+		pages_per_cluster = 1 << (cbits - PAGE_SHIFT);
 
 	return pages_per_cluster;
 }

diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index a52a2db..f8f5fc5 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h

@@ -1450,28 +1450,20 @@
 
 TRACE_EVENT(ocfs2_prepare_inode_for_write,
 	TP_PROTO(unsigned long long ino, unsigned long long saved_pos,
-		 int appending, unsigned long count,
-		 int *direct_io, int *has_refcount),
-	TP_ARGS(ino, saved_pos, appending, count, direct_io, has_refcount),
+		 unsigned long count),
+	TP_ARGS(ino, saved_pos, count),
 	TP_STRUCT__entry(
 		__field(unsigned long long, ino)
 		__field(unsigned long long, saved_pos)
-		__field(int, appending)
 		__field(unsigned long, count)
-		__field(int, direct_io)
-		__field(int, has_refcount)
 	),
 	TP_fast_assign(
 		__entry->ino = ino;
 		__entry->saved_pos = saved_pos;
-		__entry->appending = appending;
 		__entry->count = count;
-		__entry->direct_io = direct_io ? *direct_io : -1;
-		__entry->has_refcount = has_refcount ? *has_refcount : -1;
 	),
-	TP_printk("%llu %llu %d %lu %d %d", __entry->ino,
-		  __entry->saved_pos, __entry->appending, __entry->count,
-		  __entry->direct_io, __entry->has_refcount)
+	TP_printk("%llu %llu %lu", __entry->ino,
+		  __entry->saved_pos, __entry->count)
 );
 
 DEFINE_OCFS2_INT_EVENT(generic_file_aio_read_ret);
@@ -1540,6 +1532,8 @@
 DEFINE_OCFS2_INT_INT_EVENT(ocfs2_check_orphan_recovery_state);
 
 DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_inode_block);
+DEFINE_OCFS2_ULL_EVENT(ocfs2_filecheck_validate_inode_block);
+DEFINE_OCFS2_ULL_EVENT(ocfs2_filecheck_repair_inode_block);
 
 TRACE_EVENT(ocfs2_inode_is_valid_to_delete,
 	TP_PROTO(void *task, void *dc_task, unsigned long long ino,

diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 91bc674..ab6a6cd 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c

@@ -726,7 +726,7 @@
 		dqgrab(dquot);
 		/* First entry on list -> queue work */
 		if (llist_add(&OCFS2_DQUOT(dquot)->list, &osb->dquot_drop_list))
-			queue_work(ocfs2_wq, &osb->dquot_drop_work);
+			queue_work(osb->ocfs2_wq, &osb->dquot_drop_work);
 		goto out;
 	}
 	status = ocfs2_lock_global_qf(oinfo, 1);
@@ -867,6 +867,10 @@
 	int status = 0;
 
 	trace_ocfs2_get_next_id(from_kqid(&init_user_ns, *qid), type);
+	if (!sb_has_quota_loaded(sb, type)) {
+		status = -ESRCH;
+		goto out;
+	}
 	status = ocfs2_lock_global_qf(info, 0);
 	if (status < 0)
 		goto out;
@@ -878,8 +882,11 @@
 out_global:
 	ocfs2_unlock_global_qf(info, 0);
 out:
-	/* Avoid logging ENOENT since it just means there isn't next ID */
-	if (status && status != -ENOENT)
+	/*
+	 * Avoid logging ENOENT since it just means there isn't next ID and
+	 * ESRCH which means quota isn't enabled for the filesystem.
+	 */
+	if (status && status != -ENOENT && status != -ESRCH)
 		mlog_errno(status);
 	return status;
 }

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 3eff031..744d5d9 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c

@@ -2937,16 +2937,16 @@
 		end = i_size_read(inode);
 
 	while (offset < end) {
-		page_index = offset >> PAGE_CACHE_SHIFT;
-		map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT;
+		page_index = offset >> PAGE_SHIFT;
+		map_end = ((loff_t)page_index + 1) << PAGE_SHIFT;
 		if (map_end > end)
 			map_end = end;
 
 		/* from, to is the offset within the page. */
-		from = offset & (PAGE_CACHE_SIZE - 1);
-		to = PAGE_CACHE_SIZE;
-		if (map_end & (PAGE_CACHE_SIZE - 1))
-			to = map_end & (PAGE_CACHE_SIZE - 1);
+		from = offset & (PAGE_SIZE - 1);
+		to = PAGE_SIZE;
+		if (map_end & (PAGE_SIZE - 1))
+			to = map_end & (PAGE_SIZE - 1);
 
 		page = find_or_create_page(mapping, page_index, GFP_NOFS);
 		if (!page) {
@@ -2956,10 +2956,10 @@
 		}
 
 		/*
-		 * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page
+		 * In case PAGE_SIZE <= CLUSTER_SIZE, This page
 		 * can't be dirtied before we CoW it out.
 		 */
-		if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
+		if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize)
 			BUG_ON(PageDirty(page));
 
 		if (!PageUptodate(page)) {
@@ -2987,7 +2987,7 @@
 		mark_page_accessed(page);
 unlock:
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		page = NULL;
 		offset = map_end;
 		if (ret)
@@ -3165,8 +3165,8 @@
 	}
 
 	while (offset < end) {
-		page_index = offset >> PAGE_CACHE_SHIFT;
-		map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT;
+		page_index = offset >> PAGE_SHIFT;
+		map_end = ((loff_t)page_index + 1) << PAGE_SHIFT;
 		if (map_end > end)
 			map_end = end;
 
@@ -3182,7 +3182,7 @@
 			mark_page_accessed(page);
 
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		page = NULL;
 		offset = map_end;
 		if (ret)

diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 576b9a0..18451e0 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c

@@ -196,7 +196,7 @@
 	for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) {
 		blkno = ocfs2_backup_super_blkno(inode->i_sb, i);
 		cluster = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
-		if (cluster > clusters)
+		if (cluster >= clusters)
 			break;
 
 		ret = ocfs2_read_blocks_sync(osb, blkno, 1, &backup);

diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 5d965e8..13219ed 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c

@@ -629,7 +629,8 @@
 	.attrs = ocfs2_attrs,
 };
 
-static struct kset *ocfs2_kset;
+struct kset *ocfs2_kset;
+EXPORT_SYMBOL_GPL(ocfs2_kset);
 
 static void ocfs2_sysfs_exit(void)
 {

diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index 66334a3..f2dce10 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h

@@ -298,4 +298,6 @@
 int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
 void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
 
+extern struct kset *ocfs2_kset;
+
 #endif  /* STACKGLUE_H */

diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 302854e..d7cae33 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c

@@ -74,17 +74,12 @@
 #include "suballoc.h"
 
 #include "buffer_head_io.h"
+#include "filecheck.h"
 
 static struct kmem_cache *ocfs2_inode_cachep;
 struct kmem_cache *ocfs2_dquot_cachep;
 struct kmem_cache *ocfs2_qf_chunk_cachep;
 
-/* OCFS2 needs to schedule several different types of work which
- * require cluster locking, disk I/O, recovery waits, etc. Since these
- * types of work tend to be heavy we avoid using the kernel events
- * workqueue and schedule on our own. */
-struct workqueue_struct *ocfs2_wq = NULL;
-
 static struct dentry *ocfs2_debugfs_root;
 
 MODULE_AUTHOR("Oracle");
@@ -610,8 +605,8 @@
 	/*
 	 * We might be limited by page cache size.
 	 */
-	if (bytes > PAGE_CACHE_SIZE) {
-		bytes = PAGE_CACHE_SIZE;
+	if (bytes > PAGE_SIZE) {
+		bytes = PAGE_SIZE;
 		trim = 1;
 		/*
 		 * Shift by 31 here so that we don't get larger than
@@ -1205,6 +1200,9 @@
 	/* Start this when the mount is almost sure of being successful */
 	ocfs2_orphan_scan_start(osb);
 
+	/* Create filecheck sysfile /sys/fs/ocfs2/<devname>/filecheck */
+	ocfs2_filecheck_create_sysfs(sb);
+
 	return status;
 
 read_super_error:
@@ -1609,33 +1607,25 @@
 	if (status < 0)
 		goto out2;
 
-	ocfs2_wq = create_singlethread_workqueue("ocfs2_wq");
-	if (!ocfs2_wq) {
-		status = -ENOMEM;
-		goto out3;
-	}
-
 	ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL);
 	if (!ocfs2_debugfs_root) {
 		status = -ENOMEM;
 		mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
-		goto out4;
+		goto out3;
 	}
 
 	ocfs2_set_locking_protocol();
 
 	status = register_quota_format(&ocfs2_quota_format);
 	if (status < 0)
-		goto out4;
+		goto out3;
 	status = register_filesystem(&ocfs2_fs_type);
 	if (!status)
 		return 0;
 
 	unregister_quota_format(&ocfs2_quota_format);
-out4:
-	destroy_workqueue(ocfs2_wq);
-	debugfs_remove(ocfs2_debugfs_root);
 out3:
+	debugfs_remove(ocfs2_debugfs_root);
 	ocfs2_free_mem_caches();
 out2:
 	exit_ocfs2_uptodate_cache();
@@ -1646,11 +1636,6 @@
 
 static void __exit ocfs2_exit(void)
 {
-	if (ocfs2_wq) {
-		flush_workqueue(ocfs2_wq);
-		destroy_workqueue(ocfs2_wq);
-	}
-
 	unregister_quota_format(&ocfs2_quota_format);
 
 	debugfs_remove(ocfs2_debugfs_root);
@@ -1668,6 +1653,7 @@
 
 	ocfs2_sync_blockdev(sb);
 	ocfs2_dismount_volume(sb, 0);
+	ocfs2_filecheck_remove_sysfs(sb);
 }
 
 static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -1740,8 +1726,8 @@
 	spin_lock_init(&oi->ip_lock);
 	ocfs2_extent_map_init(&oi->vfs_inode);
 	INIT_LIST_HEAD(&oi->ip_io_markers);
+	INIT_LIST_HEAD(&oi->ip_unwritten_list);
 	oi->ip_dir_start_lookup = 0;
-	mutex_init(&oi->ip_unaligned_aio);
 	init_rwsem(&oi->ip_alloc_sem);
 	init_rwsem(&oi->ip_xattr_sem);
 	mutex_init(&oi->ip_io_mutex);
@@ -2344,6 +2330,12 @@
 	}
 	cleancache_init_shared_fs(sb);
 
+	osb->ocfs2_wq = create_singlethread_workqueue("ocfs2_wq");
+	if (!osb->ocfs2_wq) {
+		status = -ENOMEM;
+		mlog_errno(status);
+	}
+
 bail:
 	return status;
 }
@@ -2531,6 +2523,12 @@
 {
 	/* This function assumes that the caller has the main osb resource */
 
+	/* ocfs2_initializer_super have already created this workqueue */
+	if (osb->ocfs2_wq) {
+		flush_workqueue(osb->ocfs2_wq);
+		destroy_workqueue(osb->ocfs2_wq);
+	}
+
 	ocfs2_free_slot_info(osb);
 
 	kfree(osb->osb_orphan_wipes);

diff --git a/fs/ocfs2/super.h b/fs/ocfs2/super.h
index b477d0b..b023e4f 100644
--- a/fs/ocfs2/super.h
+++ b/fs/ocfs2/super.h

@@ -26,8 +26,6 @@
 #ifndef OCFS2_SUPER_H
 #define OCFS2_SUPER_H
 
-extern struct workqueue_struct *ocfs2_wq;
-
 int ocfs2_publish_get_mount_state(struct ocfs2_super *osb,
 				  int node_num);
 

diff --git a/fs/open.c b/fs/open.c
index 55bdc75..17cb6b1 100644
--- a/fs/open.c
+++ b/fs/open.c

@@ -992,14 +992,12 @@
 EXPORT_SYMBOL(filp_open);
 
 struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
-			    const char *filename, int flags)
+			    const char *filename, int flags, umode_t mode)
 {
 	struct open_flags op;
-	int err = build_open_flags(flags, 0, &op);
+	int err = build_open_flags(flags, mode, &op);
 	if (err)
 		return ERR_PTR(err);
-	if (flags & O_CREAT)
-		return ERR_PTR(-EINVAL);
 	return do_file_open_root(dentry, mnt, filename, &op);
 }
 EXPORT_SYMBOL(file_open_root);

diff --git a/fs/orangefs/Kconfig b/fs/orangefs/Kconfig
new file mode 100644
index 0000000..1554c02
--- /dev/null
+++ b/fs/orangefs/Kconfig

@@ -0,0 +1,6 @@
+config ORANGEFS_FS
+	tristate "ORANGEFS (Powered by PVFS) support"
+	select FS_POSIX_ACL
+	help
+	   Orange is a parallel file system designed for use on high end
+	   computing (HEC) systems.

diff --git a/fs/orangefs/Makefile b/fs/orangefs/Makefile
new file mode 100644
index 0000000..a9d6a96
--- /dev/null
+++ b/fs/orangefs/Makefile

@@ -0,0 +1,10 @@
+#
+# Makefile for the ORANGEFS filesystem.
+#
+
+obj-$(CONFIG_ORANGEFS_FS) += orangefs.o
+
+orangefs-objs := acl.o file.o orangefs-cache.o orangefs-utils.o xattr.o \
+		 dcache.o inode.o orangefs-sysfs.o orangefs-mod.o super.o \
+		 devorangefs-req.o namei.o symlink.o dir.o orangefs-bufmap.o \
+		 orangefs-debugfs.o waitqueue.o

diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c
new file mode 100644
index 0000000..03f89db
--- /dev/null
+++ b/fs/orangefs/acl.c

@@ -0,0 +1,175 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-bufmap.h"
+#include <linux/posix_acl_xattr.h>
+#include <linux/fs_struct.h>
+
+struct posix_acl *orangefs_get_acl(struct inode *inode, int type)
+{
+	struct posix_acl *acl;
+	int ret;
+	char *key = NULL, *value = NULL;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		key = ORANGEFS_XATTR_NAME_ACL_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		key = ORANGEFS_XATTR_NAME_ACL_DEFAULT;
+		break;
+	default:
+		gossip_err("orangefs_get_acl: bogus value of type %d\n", type);
+		return ERR_PTR(-EINVAL);
+	}
+	/*
+	 * Rather than incurring a network call just to determine the exact
+	 * length of the attribute, I just allocate a max length to save on
+	 * the network call. Conceivably, we could pass NULL to
+	 * orangefs_inode_getxattr() to probe the length of the value, but
+	 * I don't do that for now.
+	 */
+	value = kmalloc(ORANGEFS_MAX_XATTR_VALUELEN, GFP_KERNEL);
+	if (value == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	gossip_debug(GOSSIP_ACL_DEBUG,
+		     "inode %pU, key %s, type %d\n",
+		     get_khandle_from_ino(inode),
+		     key,
+		     type);
+	ret = orangefs_inode_getxattr(inode,
+				   "",
+				   key,
+				   value,
+				   ORANGEFS_MAX_XATTR_VALUELEN);
+	/* if the key exists, convert it to an in-memory rep */
+	if (ret > 0) {
+		acl = posix_acl_from_xattr(&init_user_ns, value, ret);
+	} else if (ret == -ENODATA || ret == -ENOSYS) {
+		acl = NULL;
+	} else {
+		gossip_err("inode %pU retrieving acl's failed with error %d\n",
+			   get_khandle_from_ino(inode),
+			   ret);
+		acl = ERR_PTR(ret);
+	}
+	/* kfree(NULL) is safe, so don't worry if value ever got used */
+	kfree(value);
+	return acl;
+}
+
+int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+	int error = 0;
+	void *value = NULL;
+	size_t size = 0;
+	const char *name = NULL;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name = ORANGEFS_XATTR_NAME_ACL_ACCESS;
+		if (acl) {
+			umode_t mode = inode->i_mode;
+			/*
+			 * can we represent this with the traditional file
+			 * mode permission bits?
+			 */
+			error = posix_acl_equiv_mode(acl, &mode);
+			if (error < 0) {
+				gossip_err("%s: posix_acl_equiv_mode err: %d\n",
+					   __func__,
+					   error);
+				return error;
+			}
+
+			if (inode->i_mode != mode)
+				SetModeFlag(orangefs_inode);
+			inode->i_mode = mode;
+			mark_inode_dirty_sync(inode);
+			if (error == 0)
+				acl = NULL;
+		}
+		break;
+	case ACL_TYPE_DEFAULT:
+		name = ORANGEFS_XATTR_NAME_ACL_DEFAULT;
+		break;
+	default:
+		gossip_err("%s: invalid type %d!\n", __func__, type);
+		return -EINVAL;
+	}
+
+	gossip_debug(GOSSIP_ACL_DEBUG,
+		     "%s: inode %pU, key %s type %d\n",
+		     __func__, get_khandle_from_ino(inode),
+		     name,
+		     type);
+
+	if (acl) {
+		size = posix_acl_xattr_size(acl->a_count);
+		value = kmalloc(size, GFP_KERNEL);
+		if (!value)
+			return -ENOMEM;
+
+		error = posix_acl_to_xattr(&init_user_ns, acl, value, size);
+		if (error < 0)
+			goto out;
+	}
+
+	gossip_debug(GOSSIP_ACL_DEBUG,
+		     "%s: name %s, value %p, size %zd, acl %p\n",
+		     __func__, name, value, size, acl);
+	/*
+	 * Go ahead and set the extended attribute now. NOTE: Suppose acl
+	 * was NULL, then value will be NULL and size will be 0 and that
+	 * will xlate to a removexattr. However, we don't want removexattr
+	 * complain if attributes does not exist.
+	 */
+	error = orangefs_inode_setxattr(inode, "", name, value, size, 0);
+
+out:
+	kfree(value);
+	if (!error)
+		set_cached_acl(inode, type, acl);
+	return error;
+}
+
+int orangefs_init_acl(struct inode *inode, struct inode *dir)
+{
+	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+	struct posix_acl *default_acl, *acl;
+	umode_t mode = inode->i_mode;
+	int error = 0;
+
+	ClearModeFlag(orangefs_inode);
+
+	error = posix_acl_create(dir, &mode, &default_acl, &acl);
+	if (error)
+		return error;
+
+	if (default_acl) {
+		error = orangefs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
+		posix_acl_release(default_acl);
+	}
+
+	if (acl) {
+		if (!error)
+			error = orangefs_set_acl(inode, acl, ACL_TYPE_ACCESS);
+		posix_acl_release(acl);
+	}
+
+	/* If mode of the inode was changed, then do a forcible ->setattr */
+	if (mode != inode->i_mode) {
+		SetModeFlag(orangefs_inode);
+		inode->i_mode = mode;
+		orangefs_flush_inode(inode);
+	}
+
+	return error;
+}

diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c
new file mode 100644
index 0000000..5dfc4f3
--- /dev/null
+++ b/fs/orangefs/dcache.c

@@ -0,0 +1,138 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ *  Implementation of dentry (directory cache) functions.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+
+/* Returns 1 if dentry can still be trusted, else 0. */
+static int orangefs_revalidate_lookup(struct dentry *dentry)
+{
+	struct dentry *parent_dentry = dget_parent(dentry);
+	struct inode *parent_inode = parent_dentry->d_inode;
+	struct orangefs_inode_s *parent = ORANGEFS_I(parent_inode);
+	struct inode *inode = dentry->d_inode;
+	struct orangefs_kernel_op_s *new_op;
+	int ret = 0;
+	int err = 0;
+
+	gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: attempting lookup.\n", __func__);
+
+	new_op = op_alloc(ORANGEFS_VFS_OP_LOOKUP);
+	if (!new_op)
+		goto out_put_parent;
+
+	new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW;
+	new_op->upcall.req.lookup.parent_refn = parent->refn;
+	strncpy(new_op->upcall.req.lookup.d_name,
+		dentry->d_name.name,
+		ORANGEFS_NAME_MAX);
+
+	gossip_debug(GOSSIP_DCACHE_DEBUG,
+		     "%s:%s:%d interrupt flag [%d]\n",
+		     __FILE__,
+		     __func__,
+		     __LINE__,
+		     get_interruptible_flag(parent_inode));
+
+	err = service_operation(new_op, "orangefs_lookup",
+			get_interruptible_flag(parent_inode));
+
+	/* Positive dentry: reject if error or not the same inode. */
+	if (inode) {
+		if (err) {
+			gossip_debug(GOSSIP_DCACHE_DEBUG,
+			    "%s:%s:%d lookup failure.\n",
+			    __FILE__, __func__, __LINE__);
+			goto out_drop;
+		}
+		if (!match_handle(new_op->downcall.resp.lookup.refn.khandle,
+		    inode)) {
+			gossip_debug(GOSSIP_DCACHE_DEBUG,
+			    "%s:%s:%d no match.\n",
+			    __FILE__, __func__, __LINE__);
+			goto out_drop;
+		}
+
+	/* Negative dentry: reject if success or error other than ENOENT. */
+	} else {
+		gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: negative dentry.\n",
+		    __func__);
+		if (!err || err != -ENOENT) {
+			if (new_op->downcall.status != 0)
+				gossip_debug(GOSSIP_DCACHE_DEBUG,
+				    "%s:%s:%d lookup failure.\n",
+				    __FILE__, __func__, __LINE__);
+			goto out_drop;
+		}
+	}
+
+	ret = 1;
+out_release_op:
+	op_release(new_op);
+out_put_parent:
+	dput(parent_dentry);
+	return ret;
+out_drop:
+	gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d revalidate failed\n",
+	    __FILE__, __func__, __LINE__);
+	goto out_release_op;
+}
+
+/*
+ * Verify that dentry is valid.
+ *
+ * Should return 1 if dentry can still be trusted, else 0.
+ */
+static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	int ret;
+
+	if (flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: called on dentry %p.\n",
+		     __func__, dentry);
+
+	/* skip root handle lookups. */
+	if (dentry->d_inode && is_root_handle(dentry->d_inode))
+		return 1;
+
+	/*
+	 * If this passes, the positive dentry still exists or the negative
+	 * dentry still does not exist.
+	 */
+	if (!orangefs_revalidate_lookup(dentry))
+		return 0;
+
+	/* We do not need to continue with negative dentries. */
+	if (!dentry->d_inode)
+		goto out;
+
+	/* Now we must perform a getattr to validate the inode contents. */
+
+	ret = orangefs_inode_check_changed(dentry->d_inode);
+	if (ret < 0) {
+		gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d getattr failure.\n",
+		    __FILE__, __func__, __LINE__);
+		return 0;
+	}
+	if (ret == 0)
+		return 0;
+
+out:
+	gossip_debug(GOSSIP_DCACHE_DEBUG,
+	    "%s: negative dentry or positive dentry and inode valid.\n",
+	    __func__);
+	return 1;
+}
+
+const struct dentry_operations orangefs_dentry_operations = {
+	.d_revalidate = orangefs_d_revalidate,
+};

diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c
new file mode 100644
index 0000000..db170be
--- /dev/null
+++ b/fs/orangefs/devorangefs-req.c

@@ -0,0 +1,943 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * Changes by Acxiom Corporation to add protocol version to kernel
+ * communication, Copyright Acxiom Corporation, 2005.
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-dev-proto.h"
+#include "orangefs-bufmap.h"
+
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+
+/* this file implements the /dev/pvfs2-req device node */
+
+static int open_access_count;
+
+#define DUMP_DEVICE_ERROR()                                                   \
+do {                                                                          \
+	gossip_err("*****************************************************\n");\
+	gossip_err("ORANGEFS Device Error:  You cannot open the device file ");  \
+	gossip_err("\n/dev/%s more than once.  Please make sure that\nthere " \
+		   "are no ", ORANGEFS_REQDEVICE_NAME);                          \
+	gossip_err("instances of a program using this device\ncurrently "     \
+		   "running. (You must verify this!)\n");                     \
+	gossip_err("For example, you can use the lsof program as follows:\n");\
+	gossip_err("'lsof | grep %s' (run this as root)\n",                   \
+		   ORANGEFS_REQDEVICE_NAME);                                     \
+	gossip_err("  open_access_count = %d\n", open_access_count);          \
+	gossip_err("*****************************************************\n");\
+} while (0)
+
+static int hash_func(__u64 tag, int table_size)
+{
+	return do_div(tag, (unsigned int)table_size);
+}
+
+static void orangefs_devreq_add_op(struct orangefs_kernel_op_s *op)
+{
+	int index = hash_func(op->tag, hash_table_size);
+
+	list_add_tail(&op->list, &htable_ops_in_progress[index]);
+}
+
+/*
+ * find the op with this tag and remove it from the in progress
+ * hash table.
+ */
+static struct orangefs_kernel_op_s *orangefs_devreq_remove_op(__u64 tag)
+{
+	struct orangefs_kernel_op_s *op, *next;
+	int index;
+
+	index = hash_func(tag, hash_table_size);
+
+	spin_lock(&htable_ops_in_progress_lock);
+	list_for_each_entry_safe(op,
+				 next,
+				 &htable_ops_in_progress[index],
+				 list) {
+		if (op->tag == tag && !op_state_purged(op) &&
+		    !op_state_given_up(op)) {
+			list_del_init(&op->list);
+			spin_unlock(&htable_ops_in_progress_lock);
+			return op;
+		}
+	}
+
+	spin_unlock(&htable_ops_in_progress_lock);
+	return NULL;
+}
+
+/* Returns whether any FS are still pending remounted */
+static int mark_all_pending_mounts(void)
+{
+	int unmounted = 1;
+	struct orangefs_sb_info_s *orangefs_sb = NULL;
+
+	spin_lock(&orangefs_superblocks_lock);
+	list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) {
+		/* All of these file system require a remount */
+		orangefs_sb->mount_pending = 1;
+		unmounted = 0;
+	}
+	spin_unlock(&orangefs_superblocks_lock);
+	return unmounted;
+}
+
+/*
+ * Determine if a given file system needs to be remounted or not
+ *  Returns -1 on error
+ *           0 if already mounted
+ *           1 if needs remount
+ */
+static int fs_mount_pending(__s32 fsid)
+{
+	int mount_pending = -1;
+	struct orangefs_sb_info_s *orangefs_sb = NULL;
+
+	spin_lock(&orangefs_superblocks_lock);
+	list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) {
+		if (orangefs_sb->fs_id == fsid) {
+			mount_pending = orangefs_sb->mount_pending;
+			break;
+		}
+	}
+	spin_unlock(&orangefs_superblocks_lock);
+	return mount_pending;
+}
+
+static int orangefs_devreq_open(struct inode *inode, struct file *file)
+{
+	int ret = -EINVAL;
+
+	if (!(file->f_flags & O_NONBLOCK)) {
+		gossip_err("%s: device cannot be opened in blocking mode\n",
+			   __func__);
+		goto out;
+	}
+	ret = -EACCES;
+	gossip_debug(GOSSIP_DEV_DEBUG, "client-core: opening device\n");
+	mutex_lock(&devreq_mutex);
+
+	if (open_access_count == 0) {
+		open_access_count = 1;
+		ret = 0;
+	} else {
+		DUMP_DEVICE_ERROR();
+	}
+	mutex_unlock(&devreq_mutex);
+
+out:
+
+	gossip_debug(GOSSIP_DEV_DEBUG,
+		     "pvfs2-client-core: open device complete (ret = %d)\n",
+		     ret);
+	return ret;
+}
+
+/* Function for read() callers into the device */
+static ssize_t orangefs_devreq_read(struct file *file,
+				 char __user *buf,
+				 size_t count, loff_t *offset)
+{
+	struct orangefs_kernel_op_s *op, *temp;
+	__s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION;
+	static __s32 magic = ORANGEFS_DEVREQ_MAGIC;
+	struct orangefs_kernel_op_s *cur_op = NULL;
+	unsigned long ret;
+
+	/* We do not support blocking IO. */
+	if (!(file->f_flags & O_NONBLOCK)) {
+		gossip_err("%s: blocking read from client-core.\n",
+			   __func__);
+		return -EINVAL;
+	}
+
+	/*
+	 * The client will do an ioctl to find MAX_DEV_REQ_UPSIZE, then
+	 * always read with that size buffer.
+	 */
+	if (count != MAX_DEV_REQ_UPSIZE) {
+		gossip_err("orangefs: client-core tried to read wrong size\n");
+		return -EINVAL;
+	}
+
+restart:
+	/* Get next op (if any) from top of list. */
+	spin_lock(&orangefs_request_list_lock);
+	list_for_each_entry_safe(op, temp, &orangefs_request_list, list) {
+		__s32 fsid;
+		/* This lock is held past the end of the loop when we break. */
+		spin_lock(&op->lock);
+		if (unlikely(op_state_purged(op) || op_state_given_up(op))) {
+			spin_unlock(&op->lock);
+			continue;
+		}
+
+		fsid = fsid_of_op(op);
+		if (fsid != ORANGEFS_FS_ID_NULL) {
+			int ret;
+			/* Skip ops whose filesystem needs to be mounted. */
+			ret = fs_mount_pending(fsid);
+			if (ret == 1) {
+				gossip_debug(GOSSIP_DEV_DEBUG,
+				    "%s: mount pending, skipping op tag "
+				    "%llu %s\n",
+				    __func__,
+				    llu(op->tag),
+				    get_opname_string(op));
+				spin_unlock(&op->lock);
+				continue;
+			/*
+			 * Skip ops whose filesystem we don't know about unless
+			 * it is being mounted.
+			 */
+			/* XXX: is there a better way to detect this? */
+			} else if (ret == -1 &&
+				   !(op->upcall.type ==
+					ORANGEFS_VFS_OP_FS_MOUNT ||
+				     op->upcall.type ==
+					ORANGEFS_VFS_OP_GETATTR)) {
+				gossip_debug(GOSSIP_DEV_DEBUG,
+				    "orangefs: skipping op tag %llu %s\n",
+				    llu(op->tag), get_opname_string(op));
+				gossip_err(
+				    "orangefs: ERROR: fs_mount_pending %d\n",
+				    fsid);
+				spin_unlock(&op->lock);
+				continue;
+			}
+		}
+		/*
+		 * Either this op does not pertain to a filesystem, is mounting
+		 * a filesystem, or pertains to a mounted filesystem. Let it
+		 * through.
+		 */
+		cur_op = op;
+		break;
+	}
+
+	/*
+	 * At this point we either have a valid op and can continue or have not
+	 * found an op and must ask the client to try again later.
+	 */
+	if (!cur_op) {
+		spin_unlock(&orangefs_request_list_lock);
+		return -EAGAIN;
+	}
+
+	gossip_debug(GOSSIP_DEV_DEBUG, "%s: reading op tag %llu %s\n",
+		     __func__,
+		     llu(cur_op->tag),
+		     get_opname_string(cur_op));
+
+	/*
+	 * Such an op should never be on the list in the first place. If so, we
+	 * will abort.
+	 */
+	if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) {
+		gossip_err("orangefs: ERROR: Current op already queued.\n");
+		list_del_init(&cur_op->list);
+		spin_unlock(&cur_op->lock);
+		spin_unlock(&orangefs_request_list_lock);
+		return -EAGAIN;
+	}
+
+	list_del_init(&cur_op->list);
+	spin_unlock(&orangefs_request_list_lock);
+
+	spin_unlock(&cur_op->lock);
+
+	/* Push the upcall out. */
+	ret = copy_to_user(buf, &proto_ver, sizeof(__s32));
+	if (ret != 0)
+		goto error;
+	ret = copy_to_user(buf+sizeof(__s32), &magic, sizeof(__s32));
+	if (ret != 0)
+		goto error;
+	ret = copy_to_user(buf+2 * sizeof(__s32), &cur_op->tag, sizeof(__u64));
+	if (ret != 0)
+		goto error;
+	ret = copy_to_user(buf+2*sizeof(__s32)+sizeof(__u64), &cur_op->upcall,
+			   sizeof(struct orangefs_upcall_s));
+	if (ret != 0)
+		goto error;
+
+	spin_lock(&htable_ops_in_progress_lock);
+	spin_lock(&cur_op->lock);
+	if (unlikely(op_state_given_up(cur_op))) {
+		spin_unlock(&cur_op->lock);
+		spin_unlock(&htable_ops_in_progress_lock);
+		complete(&cur_op->waitq);
+		goto restart;
+	}
+
+	/*
+	 * Set the operation to be in progress and move it between lists since
+	 * it has been sent to the client.
+	 */
+	set_op_state_inprogress(cur_op);
+	gossip_debug(GOSSIP_DEV_DEBUG,
+		     "%s: 1 op:%s: op_state:%d: process:%s:\n",
+		     __func__,
+		     get_opname_string(cur_op),
+		     cur_op->op_state,
+		     current->comm);
+	orangefs_devreq_add_op(cur_op);
+	spin_unlock(&cur_op->lock);
+	spin_unlock(&htable_ops_in_progress_lock);
+
+	/* The client only asks to read one size buffer. */
+	return MAX_DEV_REQ_UPSIZE;
+error:
+	/*
+	 * We were unable to copy the op data to the client. Put the op back in
+	 * list. If client has crashed, the op will be purged later when the
+	 * device is released.
+	 */
+	gossip_err("orangefs: Failed to copy data to user space\n");
+	spin_lock(&orangefs_request_list_lock);
+	spin_lock(&cur_op->lock);
+	if (likely(!op_state_given_up(cur_op))) {
+		set_op_state_waiting(cur_op);
+		gossip_debug(GOSSIP_DEV_DEBUG,
+			     "%s: 2 op:%s: op_state:%d: process:%s:\n",
+			     __func__,
+			     get_opname_string(cur_op),
+			     cur_op->op_state,
+			     current->comm);
+		list_add(&cur_op->list, &orangefs_request_list);
+		spin_unlock(&cur_op->lock);
+	} else {
+		spin_unlock(&cur_op->lock);
+		complete(&cur_op->waitq);
+	}
+	spin_unlock(&orangefs_request_list_lock);
+	return -EFAULT;
+}
+
+/*
+ * Function for writev() callers into the device.
+ *
+ * Userspace should have written:
+ *  - __u32 version
+ *  - __u32 magic
+ *  - __u64 tag
+ *  - struct orangefs_downcall_s
+ *  - trailer buffer (in the case of READDIR operations)
+ */
+static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
+				      struct iov_iter *iter)
+{
+	ssize_t ret;
+	struct orangefs_kernel_op_s *op = NULL;
+	struct {
+		__u32 version;
+		__u32 magic;
+		__u64 tag;
+	} head;
+	int total = ret = iov_iter_count(iter);
+	int n;
+	int downcall_size = sizeof(struct orangefs_downcall_s);
+	int head_size = sizeof(head);
+
+	gossip_debug(GOSSIP_DEV_DEBUG, "%s: total:%d: ret:%zd:\n",
+		     __func__,
+		     total,
+		     ret);
+
+        if (total < MAX_DEV_REQ_DOWNSIZE) {
+		gossip_err("%s: total:%d: must be at least:%u:\n",
+			   __func__,
+			   total,
+			   (unsigned int) MAX_DEV_REQ_DOWNSIZE);
+		return -EFAULT;
+	}
+     
+	n = copy_from_iter(&head, head_size, iter);
+	if (n < head_size) {
+		gossip_err("%s: failed to copy head.\n", __func__);
+		return -EFAULT;
+	}
+
+	if (head.version < ORANGEFS_MINIMUM_USERSPACE_VERSION) {
+		gossip_err("%s: userspace claims version"
+			   "%d, minimum version required: %d.\n",
+			   __func__,
+			   head.version,
+			   ORANGEFS_MINIMUM_USERSPACE_VERSION);
+		return -EPROTO;
+	}
+
+	if (head.magic != ORANGEFS_DEVREQ_MAGIC) {
+		gossip_err("Error: Device magic number does not match.\n");
+		return -EPROTO;
+	}
+
+	/* remove the op from the in progress hash table */
+	op = orangefs_devreq_remove_op(head.tag);
+	if (!op) {
+		gossip_err("WARNING: No one's waiting for tag %llu\n",
+			   llu(head.tag));
+		return ret;
+	}
+
+	n = copy_from_iter(&op->downcall, downcall_size, iter);
+	if (n != downcall_size) {
+		gossip_err("%s: failed to copy downcall.\n", __func__);
+		goto Efault;
+	}
+
+	if (op->downcall.status)
+		goto wakeup;
+
+	/*
+	 * We've successfully peeled off the head and the downcall. 
+	 * Something has gone awry if total doesn't equal the
+	 * sum of head_size, downcall_size and trailer_size.
+	 */
+	if ((head_size + downcall_size + op->downcall.trailer_size) != total) {
+		gossip_err("%s: funky write, head_size:%d"
+			   ": downcall_size:%d: trailer_size:%lld"
+			   ": total size:%d:\n",
+			   __func__,
+			   head_size,
+			   downcall_size,
+			   op->downcall.trailer_size,
+			   total);
+		goto Efault;
+	}
+
+	/* Only READDIR operations should have trailers. */
+	if ((op->downcall.type != ORANGEFS_VFS_OP_READDIR) &&
+	    (op->downcall.trailer_size != 0)) {
+		gossip_err("%s: %x operation with trailer.",
+			   __func__,
+			   op->downcall.type);
+		goto Efault;
+	}
+
+	/* READDIR operations should always have trailers. */
+	if ((op->downcall.type == ORANGEFS_VFS_OP_READDIR) &&
+	    (op->downcall.trailer_size == 0)) {
+		gossip_err("%s: %x operation with no trailer.",
+			   __func__,
+			   op->downcall.type);
+		goto Efault;
+	}
+
+	if (op->downcall.type != ORANGEFS_VFS_OP_READDIR)
+		goto wakeup;
+
+	op->downcall.trailer_buf =
+		vmalloc(op->downcall.trailer_size);
+	if (op->downcall.trailer_buf == NULL) {
+		gossip_err("%s: failed trailer vmalloc.\n",
+			   __func__);
+		goto Enomem;
+	}
+	memset(op->downcall.trailer_buf, 0, op->downcall.trailer_size);
+	n = copy_from_iter(op->downcall.trailer_buf,
+			   op->downcall.trailer_size,
+			   iter);
+	if (n != op->downcall.trailer_size) {
+		gossip_err("%s: failed to copy trailer.\n", __func__);
+		vfree(op->downcall.trailer_buf);
+		goto Efault;
+	}
+
+wakeup:
+	/*
+	 * Return to vfs waitqueue, and back to service_operation
+	 * through wait_for_matching_downcall. 
+	 */
+	spin_lock(&op->lock);
+	if (unlikely(op_is_cancel(op))) {
+		spin_unlock(&op->lock);
+		put_cancel(op);
+	} else if (unlikely(op_state_given_up(op))) {
+		spin_unlock(&op->lock);
+		complete(&op->waitq);
+	} else {
+		set_op_state_serviced(op);
+		gossip_debug(GOSSIP_DEV_DEBUG,
+			     "%s: op:%s: op_state:%d: process:%s:\n",
+			     __func__,
+			     get_opname_string(op),
+			     op->op_state,
+			     current->comm);
+		spin_unlock(&op->lock);
+	}
+	return ret;
+
+Efault:
+	op->downcall.status = -(ORANGEFS_ERROR_BIT | 9);
+	ret = -EFAULT;
+	goto wakeup;
+
+Enomem:
+	op->downcall.status = -(ORANGEFS_ERROR_BIT | 8);
+	ret = -ENOMEM;
+	goto wakeup;
+}
+
+/*
+ * NOTE: gets called when the last reference to this device is dropped.
+ * Using the open_access_count variable, we enforce a reference count
+ * on this file so that it can be opened by only one process at a time.
+ * the devreq_mutex is used to make sure all i/o has completed
+ * before we call orangefs_bufmap_finalize, and similar such tricky
+ * situations
+ */
+static int orangefs_devreq_release(struct inode *inode, struct file *file)
+{
+	int unmounted = 0;
+
+	gossip_debug(GOSSIP_DEV_DEBUG,
+		     "%s:pvfs2-client-core: exiting, closing device\n",
+		     __func__);
+
+	mutex_lock(&devreq_mutex);
+	orangefs_bufmap_finalize();
+
+	open_access_count = -1;
+
+	unmounted = mark_all_pending_mounts();
+	gossip_debug(GOSSIP_DEV_DEBUG, "ORANGEFS Device Close: Filesystem(s) %s\n",
+		     (unmounted ? "UNMOUNTED" : "MOUNTED"));
+
+	purge_waiting_ops();
+	purge_inprogress_ops();
+
+	orangefs_bufmap_run_down();
+
+	gossip_debug(GOSSIP_DEV_DEBUG,
+		     "pvfs2-client-core: device close complete\n");
+	open_access_count = 0;
+	mutex_unlock(&devreq_mutex);
+	return 0;
+}
+
+int is_daemon_in_service(void)
+{
+	int in_service;
+
+	/*
+	 * What this function does is checks if client-core is alive
+	 * based on the access count we maintain on the device.
+	 */
+	mutex_lock(&devreq_mutex);
+	in_service = open_access_count == 1 ? 0 : -EIO;
+	mutex_unlock(&devreq_mutex);
+	return in_service;
+}
+
+bool __is_daemon_in_service(void)
+{
+	return open_access_count == 1;
+}
+
+static inline long check_ioctl_command(unsigned int command)
+{
+	/* Check for valid ioctl codes */
+	if (_IOC_TYPE(command) != ORANGEFS_DEV_MAGIC) {
+		gossip_err("device ioctl magic numbers don't match! Did you rebuild pvfs2-client-core/libpvfs2? [cmd %x, magic %x != %x]\n",
+			command,
+			_IOC_TYPE(command),
+			ORANGEFS_DEV_MAGIC);
+		return -EINVAL;
+	}
+	/* and valid ioctl commands */
+	if (_IOC_NR(command) >= ORANGEFS_DEV_MAXNR || _IOC_NR(command) <= 0) {
+		gossip_err("Invalid ioctl command number [%d >= %d]\n",
+			   _IOC_NR(command), ORANGEFS_DEV_MAXNR);
+		return -ENOIOCTLCMD;
+	}
+	return 0;
+}
+
+static long dispatch_ioctl_command(unsigned int command, unsigned long arg)
+{
+	static __s32 magic = ORANGEFS_DEVREQ_MAGIC;
+	static __s32 max_up_size = MAX_DEV_REQ_UPSIZE;
+	static __s32 max_down_size = MAX_DEV_REQ_DOWNSIZE;
+	struct ORANGEFS_dev_map_desc user_desc;
+	int ret = 0;
+	struct dev_mask_info_s mask_info = { 0 };
+	struct dev_mask2_info_s mask2_info = { 0, 0 };
+	int upstream_kmod = 1;
+	struct orangefs_sb_info_s *orangefs_sb;
+
+	/* mtmoore: add locking here */
+
+	switch (command) {
+	case ORANGEFS_DEV_GET_MAGIC:
+		return ((put_user(magic, (__s32 __user *) arg) == -EFAULT) ?
+			-EIO :
+			0);
+	case ORANGEFS_DEV_GET_MAX_UPSIZE:
+		return ((put_user(max_up_size,
+				  (__s32 __user *) arg) == -EFAULT) ?
+					-EIO :
+					0);
+	case ORANGEFS_DEV_GET_MAX_DOWNSIZE:
+		return ((put_user(max_down_size,
+				  (__s32 __user *) arg) == -EFAULT) ?
+					-EIO :
+					0);
+	case ORANGEFS_DEV_MAP:
+		ret = copy_from_user(&user_desc,
+				     (struct ORANGEFS_dev_map_desc __user *)
+				     arg,
+				     sizeof(struct ORANGEFS_dev_map_desc));
+		/* WTF -EIO and not -EFAULT? */
+		return ret ? -EIO : orangefs_bufmap_initialize(&user_desc);
+	case ORANGEFS_DEV_REMOUNT_ALL:
+		gossip_debug(GOSSIP_DEV_DEBUG,
+			     "%s: got ORANGEFS_DEV_REMOUNT_ALL\n",
+			     __func__);
+
+		/*
+		 * remount all mounted orangefs volumes to regain the lost
+		 * dynamic mount tables (if any) -- NOTE: this is done
+		 * without keeping the superblock list locked due to the
+		 * upcall/downcall waiting.  also, the request mutex is
+		 * used to ensure that no operations will be serviced until
+		 * all of the remounts are serviced (to avoid ops between
+		 * mounts to fail)
+		 */
+		ret = mutex_lock_interruptible(&request_mutex);
+		if (ret < 0)
+			return ret;
+		gossip_debug(GOSSIP_DEV_DEBUG,
+			     "%s: priority remount in progress\n",
+			     __func__);
+		spin_lock(&orangefs_superblocks_lock);
+		list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) {
+			/*
+			 * We have to drop the spinlock, so entries can be
+			 * removed.  They can't be freed, though, so we just
+			 * keep the forward pointers and zero the back ones -
+			 * that way we can get to the rest of the list.
+			 */
+			if (!orangefs_sb->list.prev)
+				continue;
+			gossip_debug(GOSSIP_DEV_DEBUG,
+				     "%s: Remounting SB %p\n",
+				     __func__,
+				     orangefs_sb);
+
+			spin_unlock(&orangefs_superblocks_lock);
+			ret = orangefs_remount(orangefs_sb);
+			spin_lock(&orangefs_superblocks_lock);
+			if (ret) {
+				gossip_debug(GOSSIP_DEV_DEBUG,
+					     "SB %p remount failed\n",
+					     orangefs_sb);
+				break;
+			}
+		}
+		spin_unlock(&orangefs_superblocks_lock);
+		gossip_debug(GOSSIP_DEV_DEBUG,
+			     "%s: priority remount complete\n",
+			     __func__);
+		mutex_unlock(&request_mutex);
+		return ret;
+
+	case ORANGEFS_DEV_UPSTREAM:
+		ret = copy_to_user((void __user *)arg,
+				    &upstream_kmod,
+				    sizeof(upstream_kmod));
+
+		if (ret != 0)
+			return -EIO;
+		else
+			return ret;
+
+	case ORANGEFS_DEV_CLIENT_MASK:
+		ret = copy_from_user(&mask2_info,
+				     (void __user *)arg,
+				     sizeof(struct dev_mask2_info_s));
+
+		if (ret != 0)
+			return -EIO;
+
+		client_debug_mask.mask1 = mask2_info.mask1_value;
+		client_debug_mask.mask2 = mask2_info.mask2_value;
+
+		pr_info("%s: client debug mask has been been received "
+			":%llx: :%llx:\n",
+			__func__,
+			(unsigned long long)client_debug_mask.mask1,
+			(unsigned long long)client_debug_mask.mask2);
+
+		return ret;
+
+	case ORANGEFS_DEV_CLIENT_STRING:
+		ret = copy_from_user(&client_debug_array_string,
+				     (void __user *)arg,
+				     ORANGEFS_MAX_DEBUG_STRING_LEN);
+		/*
+		 * The real client-core makes an effort to ensure
+		 * that actual strings that aren't too long to fit in
+		 * this buffer is what we get here. We're going to use
+		 * string functions on the stuff we got, so we'll make
+		 * this extra effort to try and keep from
+		 * flowing out of this buffer when we use the string
+		 * functions, even if somehow the stuff we end up
+		 * with here is garbage.
+		 */
+		client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN - 1] =
+			'\0';
+		
+		if (ret != 0) {
+			pr_info("%s: CLIENT_STRING: copy_from_user failed\n",
+				__func__);
+			return -EIO;
+		}
+
+		pr_info("%s: client debug array string has been received.\n",
+			__func__);
+
+		if (!help_string_initialized) {
+
+			/* Free the "we don't know yet" default string... */
+			kfree(debug_help_string);
+
+			/* build a proper debug help string */
+			if (orangefs_prepare_debugfs_help_string(0)) {
+				gossip_err("%s: no debug help string \n",
+					   __func__);
+				return -EIO;
+			}
+
+			/* Replace the boilerplate boot-time debug-help file. */
+			debugfs_remove(help_file_dentry);
+
+			help_file_dentry =
+				debugfs_create_file(
+					ORANGEFS_KMOD_DEBUG_HELP_FILE,
+					0444,
+					debug_dir,
+					debug_help_string,
+					&debug_help_fops);
+
+			if (!help_file_dentry) {
+				gossip_err("%s: debugfs_create_file failed for"
+					   " :%s:!\n",
+					   __func__,
+					   ORANGEFS_KMOD_DEBUG_HELP_FILE);
+				return -EIO;
+			}
+		}
+
+		debug_mask_to_string(&client_debug_mask, 1);
+
+		debugfs_remove(client_debug_dentry);
+
+		orangefs_client_debug_init();
+
+		help_string_initialized++;
+
+		return ret;
+
+	case ORANGEFS_DEV_DEBUG:
+		ret = copy_from_user(&mask_info,
+				     (void __user *)arg,
+				     sizeof(mask_info));
+
+		if (ret != 0)
+			return -EIO;
+
+		if (mask_info.mask_type == KERNEL_MASK) {
+			if ((mask_info.mask_value == 0)
+			    && (kernel_mask_set_mod_init)) {
+				/*
+				 * the kernel debug mask was set when the
+				 * kernel module was loaded; don't override
+				 * it if the client-core was started without
+				 * a value for ORANGEFS_KMODMASK.
+				 */
+				return 0;
+			}
+			debug_mask_to_string(&mask_info.mask_value,
+					     mask_info.mask_type);
+			gossip_debug_mask = mask_info.mask_value;
+			pr_info("%s: kernel debug mask has been modified to "
+				":%s: :%llx:\n",
+				__func__,
+				kernel_debug_string,
+				(unsigned long long)gossip_debug_mask);
+		} else if (mask_info.mask_type == CLIENT_MASK) {
+			debug_mask_to_string(&mask_info.mask_value,
+					     mask_info.mask_type);
+			pr_info("%s: client debug mask has been modified to"
+				":%s: :%llx:\n",
+				__func__,
+				client_debug_string,
+				llu(mask_info.mask_value));
+		} else {
+			gossip_lerr("Invalid mask type....\n");
+			return -EINVAL;
+		}
+
+		return ret;
+
+	default:
+		return -ENOIOCTLCMD;
+	}
+	return -ENOIOCTLCMD;
+}
+
+static long orangefs_devreq_ioctl(struct file *file,
+			       unsigned int command, unsigned long arg)
+{
+	long ret;
+
+	/* Check for properly constructed commands */
+	ret = check_ioctl_command(command);
+	if (ret < 0)
+		return (int)ret;
+
+	return (int)dispatch_ioctl_command(command, arg);
+}
+
+#ifdef CONFIG_COMPAT		/* CONFIG_COMPAT is in .config */
+
+/*  Compat structure for the ORANGEFS_DEV_MAP ioctl */
+struct ORANGEFS_dev_map_desc32 {
+	compat_uptr_t ptr;
+	__s32 total_size;
+	__s32 size;
+	__s32 count;
+};
+
+static unsigned long translate_dev_map26(unsigned long args, long *error)
+{
+	struct ORANGEFS_dev_map_desc32 __user *p32 = (void __user *)args;
+	/*
+	 * Depending on the architecture, allocate some space on the
+	 * user-call-stack based on our expected layout.
+	 */
+	struct ORANGEFS_dev_map_desc __user *p =
+	    compat_alloc_user_space(sizeof(*p));
+	compat_uptr_t addr;
+
+	*error = 0;
+	/* get the ptr from the 32 bit user-space */
+	if (get_user(addr, &p32->ptr))
+		goto err;
+	/* try to put that into a 64-bit layout */
+	if (put_user(compat_ptr(addr), &p->ptr))
+		goto err;
+	/* copy the remaining fields */
+	if (copy_in_user(&p->total_size, &p32->total_size, sizeof(__s32)))
+		goto err;
+	if (copy_in_user(&p->size, &p32->size, sizeof(__s32)))
+		goto err;
+	if (copy_in_user(&p->count, &p32->count, sizeof(__s32)))
+		goto err;
+	return (unsigned long)p;
+err:
+	*error = -EFAULT;
+	return 0;
+}
+
+/*
+ * 32 bit user-space apps' ioctl handlers when kernel modules
+ * is compiled as a 64 bit one
+ */
+static long orangefs_devreq_compat_ioctl(struct file *filp, unsigned int cmd,
+				      unsigned long args)
+{
+	long ret;
+	unsigned long arg = args;
+
+	/* Check for properly constructed commands */
+	ret = check_ioctl_command(cmd);
+	if (ret < 0)
+		return ret;
+	if (cmd == ORANGEFS_DEV_MAP) {
+		/*
+		 * convert the arguments to what we expect internally
+		 * in kernel space
+		 */
+		arg = translate_dev_map26(args, &ret);
+		if (ret < 0) {
+			gossip_err("Could not translate dev map\n");
+			return ret;
+		}
+	}
+	/* no other ioctl requires translation */
+	return dispatch_ioctl_command(cmd, arg);
+}
+
+#endif /* CONFIG_COMPAT is in .config */
+
+/* the assigned character device major number */
+static int orangefs_dev_major;
+
+/*
+ * Initialize orangefs device specific state:
+ * Must be called at module load time only
+ */
+int orangefs_dev_init(void)
+{
+	/* register orangefs-req device  */
+	orangefs_dev_major = register_chrdev(0,
+					  ORANGEFS_REQDEVICE_NAME,
+					  &orangefs_devreq_file_operations);
+	if (orangefs_dev_major < 0) {
+		gossip_debug(GOSSIP_DEV_DEBUG,
+			     "Failed to register /dev/%s (error %d)\n",
+			     ORANGEFS_REQDEVICE_NAME, orangefs_dev_major);
+		return orangefs_dev_major;
+	}
+
+	gossip_debug(GOSSIP_DEV_DEBUG,
+		     "*** /dev/%s character device registered ***\n",
+		     ORANGEFS_REQDEVICE_NAME);
+	gossip_debug(GOSSIP_DEV_DEBUG, "'mknod /dev/%s c %d 0'.\n",
+		     ORANGEFS_REQDEVICE_NAME, orangefs_dev_major);
+	return 0;
+}
+
+void orangefs_dev_cleanup(void)
+{
+	unregister_chrdev(orangefs_dev_major, ORANGEFS_REQDEVICE_NAME);
+	gossip_debug(GOSSIP_DEV_DEBUG,
+		     "*** /dev/%s character device unregistered ***\n",
+		     ORANGEFS_REQDEVICE_NAME);
+}
+
+static unsigned int orangefs_devreq_poll(struct file *file,
+				      struct poll_table_struct *poll_table)
+{
+	int poll_revent_mask = 0;
+
+	poll_wait(file, &orangefs_request_list_waitq, poll_table);
+
+	if (!list_empty(&orangefs_request_list))
+		poll_revent_mask |= POLL_IN;
+	return poll_revent_mask;
+}
+
+const struct file_operations orangefs_devreq_file_operations = {
+	.owner = THIS_MODULE,
+	.read = orangefs_devreq_read,
+	.write_iter = orangefs_devreq_write_iter,
+	.open = orangefs_devreq_open,
+	.release = orangefs_devreq_release,
+	.unlocked_ioctl = orangefs_devreq_ioctl,
+
+#ifdef CONFIG_COMPAT		/* CONFIG_COMPAT is in .config */
+	.compat_ioctl = orangefs_devreq_compat_ioctl,
+#endif
+	.poll = orangefs_devreq_poll
+};

diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c
new file mode 100644
index 0000000..ba7dec4
--- /dev/null
+++ b/fs/orangefs/dir.c

@@ -0,0 +1,398 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-bufmap.h"
+
+/*
+ * decode routine used by kmod to deal with the blob sent from
+ * userspace for readdirs. The blob contains zero or more of these
+ * sub-blobs:
+ *   __u32 - represents length of the character string that follows.
+ *   string - between 1 and ORANGEFS_NAME_MAX bytes long.
+ *   padding - (if needed) to cause the __u32 plus the string to be
+ *             eight byte aligned.
+ *   khandle - sizeof(khandle) bytes.
+ */
+static long decode_dirents(char *ptr, size_t size,
+                           struct orangefs_readdir_response_s *readdir)
+{
+	int i;
+	struct orangefs_readdir_response_s *rd =
+		(struct orangefs_readdir_response_s *) ptr;
+	char *buf = ptr;
+	int khandle_size = sizeof(struct orangefs_khandle);
+	size_t offset = offsetof(struct orangefs_readdir_response_s,
+				dirent_array);
+	/* 8 reflects eight byte alignment */
+	int smallest_blob = khandle_size + 8;
+	__u32 len;
+	int aligned_len;
+	int sizeof_u32 = sizeof(__u32);
+	long ret;
+
+	gossip_debug(GOSSIP_DIR_DEBUG, "%s: size:%zu:\n", __func__, size);
+
+	/* size is = offset on empty dirs, > offset on non-empty dirs... */
+	if (size < offset) {
+		gossip_err("%s: size:%zu: offset:%zu:\n",
+			   __func__,
+			   size,
+			   offset);
+		ret = -EINVAL;
+		goto out;
+	}
+
+        if ((size == offset) && (readdir->orangefs_dirent_outcount != 0)) {
+		gossip_err("%s: size:%zu: dirent_outcount:%d:\n",
+			   __func__,
+			   size,
+			   readdir->orangefs_dirent_outcount);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	readdir->token = rd->token;
+	readdir->orangefs_dirent_outcount = rd->orangefs_dirent_outcount;
+	readdir->dirent_array = kcalloc(readdir->orangefs_dirent_outcount,
+					sizeof(*readdir->dirent_array),
+					GFP_KERNEL);
+	if (readdir->dirent_array == NULL) {
+		gossip_err("%s: kcalloc failed.\n", __func__);
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	buf += offset;
+	size -= offset;
+
+	for (i = 0; i < readdir->orangefs_dirent_outcount; i++) {
+		if (size < smallest_blob) {
+			gossip_err("%s: size:%zu: smallest_blob:%d:\n",
+				   __func__,
+				   size,
+				   smallest_blob);
+			ret = -EINVAL;
+			goto free;
+		}
+
+		len = *(__u32 *)buf;
+		if ((len < 1) || (len > ORANGEFS_NAME_MAX)) {
+			gossip_err("%s: len:%d:\n", __func__, len);
+			ret = -EINVAL;
+			goto free;
+		}
+
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			     "%s: size:%zu: len:%d:\n",
+			     __func__,
+			     size,
+			     len);
+
+		readdir->dirent_array[i].d_name = buf + sizeof_u32;
+		readdir->dirent_array[i].d_length = len;
+
+		/*
+		 * Calculate "aligned" length of this string and its
+		 * associated __u32 descriptor.
+		 */
+		aligned_len = ((sizeof_u32 + len + 1) + 7) & ~7;
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			     "%s: aligned_len:%d:\n",
+			     __func__,
+			     aligned_len);
+
+		/*
+		 * The end of the blob should coincide with the end
+		 * of the last sub-blob.
+		 */
+		if (size < aligned_len + khandle_size) {
+			gossip_err("%s: ran off the end of the blob.\n",
+				   __func__);
+			ret = -EINVAL;
+			goto free;
+		}
+		size -= aligned_len + khandle_size;
+
+		buf += aligned_len;
+
+		readdir->dirent_array[i].khandle =
+			*(struct orangefs_khandle *) buf;
+		buf += khandle_size;
+	}
+	ret = buf - ptr;
+	gossip_debug(GOSSIP_DIR_DEBUG, "%s: returning:%ld:\n", __func__, ret);
+	goto out;
+
+free:
+	kfree(readdir->dirent_array);
+	readdir->dirent_array = NULL;
+
+out:
+	return ret;
+}
+
+/*
+ * Read directory entries from an instance of an open directory.
+ */
+static int orangefs_readdir(struct file *file, struct dir_context *ctx)
+{
+	int ret = 0;
+	int buffer_index;
+	/*
+	 * ptoken supports Orangefs' distributed directory logic, added
+	 * in 2.9.2.
+	 */
+	__u64 *ptoken = file->private_data;
+	__u64 pos = 0;
+	ino_t ino = 0;
+	struct dentry *dentry = file->f_path.dentry;
+	struct orangefs_kernel_op_s *new_op = NULL;
+	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(dentry->d_inode);
+	int buffer_full = 0;
+	struct orangefs_readdir_response_s readdir_response;
+	void *dents_buf;
+	int i = 0;
+	int len = 0;
+	ino_t current_ino = 0;
+	char *current_entry = NULL;
+	long bytes_decoded;
+
+	gossip_debug(GOSSIP_DIR_DEBUG,
+		     "%s: ctx->pos:%lld, ptoken = %llu\n",
+		     __func__,
+		     lld(ctx->pos),
+		     llu(*ptoken));
+
+	pos = (__u64) ctx->pos;
+
+	/* are we done? */
+	if (pos == ORANGEFS_READDIR_END) {
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			     "Skipping to termination path\n");
+		return 0;
+	}
+
+	gossip_debug(GOSSIP_DIR_DEBUG,
+		     "orangefs_readdir called on %s (pos=%llu)\n",
+		     dentry->d_name.name, llu(pos));
+
+	memset(&readdir_response, 0, sizeof(readdir_response));
+
+	new_op = op_alloc(ORANGEFS_VFS_OP_READDIR);
+	if (!new_op)
+		return -ENOMEM;
+
+	/*
+	 * Only the indices are shared. No memory is actually shared, but the
+	 * mechanism is used.
+	 */
+	new_op->uses_shared_memory = 1;
+	new_op->upcall.req.readdir.refn = orangefs_inode->refn;
+	new_op->upcall.req.readdir.max_dirent_count =
+	    ORANGEFS_MAX_DIRENT_COUNT_READDIR;
+
+	gossip_debug(GOSSIP_DIR_DEBUG,
+		     "%s: upcall.req.readdir.refn.khandle: %pU\n",
+		     __func__,
+		     &new_op->upcall.req.readdir.refn.khandle);
+
+	new_op->upcall.req.readdir.token = *ptoken;
+
+get_new_buffer_index:
+	buffer_index = orangefs_readdir_index_get();
+	if (buffer_index < 0) {
+		ret = buffer_index;
+		gossip_lerr("orangefs_readdir: orangefs_readdir_index_get() failure (%d)\n",
+			    ret);
+		goto out_free_op;
+	}
+	new_op->upcall.req.readdir.buf_index = buffer_index;
+
+	ret = service_operation(new_op,
+				"orangefs_readdir",
+				get_interruptible_flag(dentry->d_inode));
+
+	gossip_debug(GOSSIP_DIR_DEBUG,
+		     "Readdir downcall status is %d.  ret:%d\n",
+		     new_op->downcall.status,
+		     ret);
+
+	orangefs_readdir_index_put(buffer_index);
+
+	if (ret == -EAGAIN && op_state_purged(new_op)) {
+		/* Client-core indices are invalid after it restarted. */
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			"%s: Getting new buffer_index for retry of readdir..\n",
+			 __func__);
+		goto get_new_buffer_index;
+	}
+
+	if (ret == -EIO && op_state_purged(new_op)) {
+		gossip_err("%s: Client is down. Aborting readdir call.\n",
+			__func__);
+		goto out_free_op;
+	}
+
+	if (ret < 0 || new_op->downcall.status != 0) {
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			     "Readdir request failed.  Status:%d\n",
+			     new_op->downcall.status);
+		if (ret >= 0)
+			ret = new_op->downcall.status;
+		goto out_free_op;
+	}
+
+	dents_buf = new_op->downcall.trailer_buf;
+	if (dents_buf == NULL) {
+		gossip_err("Invalid NULL buffer in readdir response\n");
+		ret = -ENOMEM;
+		goto out_free_op;
+	}
+
+	bytes_decoded = decode_dirents(dents_buf, new_op->downcall.trailer_size,
+					&readdir_response);
+	if (bytes_decoded < 0) {
+		ret = bytes_decoded;
+		gossip_err("Could not decode readdir from buffer %d\n", ret);
+		goto out_vfree;
+	}
+
+	if (bytes_decoded != new_op->downcall.trailer_size) {
+		gossip_err("orangefs_readdir: # bytes decoded (%ld) "
+			   "!= trailer size (%ld)\n",
+			   bytes_decoded,
+			   (long)new_op->downcall.trailer_size);
+		ret = -EINVAL;
+		goto out_destroy_handle;
+	}
+
+	/*
+	 *  orangefs doesn't actually store dot and dot-dot, but
+	 *  we need to have them represented.
+	 */
+	if (pos == 0) {
+		ino = get_ino_from_khandle(dentry->d_inode);
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			     "%s: calling dir_emit of \".\" with pos = %llu\n",
+			     __func__,
+			     llu(pos));
+		ret = dir_emit(ctx, ".", 1, ino, DT_DIR);
+		pos += 1;
+	}
+
+	if (pos == 1) {
+		ino = get_parent_ino_from_dentry(dentry);
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			     "%s: calling dir_emit of \"..\" with pos = %llu\n",
+			     __func__,
+			     llu(pos));
+		ret = dir_emit(ctx, "..", 2, ino, DT_DIR);
+		pos += 1;
+	}
+
+	/*
+	 * we stored ORANGEFS_ITERATE_NEXT in ctx->pos last time around
+	 * to prevent "finding" dot and dot-dot on any iteration
+	 * other than the first.
+	 */
+	if (ctx->pos == ORANGEFS_ITERATE_NEXT)
+		ctx->pos = 0;
+
+	gossip_debug(GOSSIP_DIR_DEBUG,
+		     "%s: dirent_outcount:%d:\n",
+		     __func__,
+		     readdir_response.orangefs_dirent_outcount);
+	for (i = ctx->pos;
+	     i < readdir_response.orangefs_dirent_outcount;
+	     i++) {
+		len = readdir_response.dirent_array[i].d_length;
+		current_entry = readdir_response.dirent_array[i].d_name;
+		current_ino = orangefs_khandle_to_ino(
+			&readdir_response.dirent_array[i].khandle);
+
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			     "calling dir_emit for %s with len %d"
+			     ", ctx->pos %ld\n",
+			     current_entry,
+			     len,
+			     (unsigned long)ctx->pos);
+		/*
+		 * type is unknown. We don't return object type
+		 * in the dirent_array. This leaves getdents
+		 * clueless about type.
+		 */
+		ret =
+		    dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN);
+		if (!ret)
+			break;
+		ctx->pos++;
+		gossip_debug(GOSSIP_DIR_DEBUG,
+			      "%s: ctx->pos:%lld\n",
+			      __func__,
+			      lld(ctx->pos));
+
+	}
+
+	/*
+	 * we ran all the way through the last batch, set up for
+	 * getting another batch...
+	 */
+	if (ret) {
+		*ptoken = readdir_response.token;
+		ctx->pos = ORANGEFS_ITERATE_NEXT;
+	}
+
+	/*
+	 * Did we hit the end of the directory?
+	 */
+	if (readdir_response.token == ORANGEFS_READDIR_END &&
+	    !buffer_full) {
+		gossip_debug(GOSSIP_DIR_DEBUG,
+		"End of dir detected; setting ctx->pos to ORANGEFS_READDIR_END.\n");
+		ctx->pos = ORANGEFS_READDIR_END;
+	}
+
+out_destroy_handle:
+	/* kfree(NULL) is safe */
+	kfree(readdir_response.dirent_array);
+out_vfree:
+	gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", dents_buf);
+	vfree(dents_buf);
+out_free_op:
+	op_release(new_op);
+	gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir returning %d\n", ret);
+	return ret;
+}
+
+static int orangefs_dir_open(struct inode *inode, struct file *file)
+{
+	__u64 *ptoken;
+
+	file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL);
+	if (!file->private_data)
+		return -ENOMEM;
+
+	ptoken = file->private_data;
+	*ptoken = ORANGEFS_READDIR_START;
+	return 0;
+}
+
+static int orangefs_dir_release(struct inode *inode, struct file *file)
+{
+	orangefs_flush_inode(inode);
+	kfree(file->private_data);
+	return 0;
+}
+
+/** ORANGEFS implementation of VFS directory operations */
+const struct file_operations orangefs_dir_operations = {
+	.read = generic_read_dir,
+	.iterate = orangefs_readdir,
+	.open = orangefs_dir_open,
+	.release = orangefs_dir_release,
+};

diff --git a/fs/orangefs/downcall.h b/fs/orangefs/downcall.h
new file mode 100644
index 0000000..66b9921
--- /dev/null
+++ b/fs/orangefs/downcall.h

@@ -0,0 +1,133 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ *  Definitions of downcalls used in Linux kernel module.
+ */
+
+#ifndef __DOWNCALL_H
+#define __DOWNCALL_H
+
+/*
+ * Sanitized the device-client core interaction
+ * for clean 32-64 bit usage
+ */
+struct orangefs_io_response {
+	__s64 amt_complete;
+};
+
+struct orangefs_lookup_response {
+	struct orangefs_object_kref refn;
+};
+
+struct orangefs_create_response {
+	struct orangefs_object_kref refn;
+};
+
+struct orangefs_symlink_response {
+	struct orangefs_object_kref refn;
+};
+
+struct orangefs_getattr_response {
+	struct ORANGEFS_sys_attr_s attributes;
+	char link_target[ORANGEFS_NAME_MAX];
+};
+
+struct orangefs_mkdir_response {
+	struct orangefs_object_kref refn;
+};
+
+/*
+ * duplication of some system interface structures so that I don't have
+ * to allocate extra memory
+ */
+struct orangefs_dirent {
+	char *d_name;
+	int d_length;
+	struct orangefs_khandle khandle;
+};
+
+struct orangefs_statfs_response {
+	__s64 block_size;
+	__s64 blocks_total;
+	__s64 blocks_avail;
+	__s64 files_total;
+	__s64 files_avail;
+};
+
+struct orangefs_fs_mount_response {
+	__s32 fs_id;
+	__s32 id;
+	struct orangefs_khandle root_khandle;
+};
+
+/* the getxattr response is the attribute value */
+struct orangefs_getxattr_response {
+	__s32 val_sz;
+	__s32 __pad1;
+	char val[ORANGEFS_MAX_XATTR_VALUELEN];
+};
+
+/* the listxattr response is an array of attribute names */
+struct orangefs_listxattr_response {
+	__s32 returned_count;
+	__s32 __pad1;
+	__u64 token;
+	char key[ORANGEFS_MAX_XATTR_LISTLEN * ORANGEFS_MAX_XATTR_NAMELEN];
+	__s32 keylen;
+	__s32 __pad2;
+	__s32 lengths[ORANGEFS_MAX_XATTR_LISTLEN];
+};
+
+struct orangefs_param_response {
+	__s64 value;
+};
+
+#define PERF_COUNT_BUF_SIZE 4096
+struct orangefs_perf_count_response {
+	char buffer[PERF_COUNT_BUF_SIZE];
+};
+
+#define FS_KEY_BUF_SIZE 4096
+struct orangefs_fs_key_response {
+	__s32 fs_keylen;
+	__s32 __pad1;
+	char fs_key[FS_KEY_BUF_SIZE];
+};
+
+struct orangefs_downcall_s {
+	__s32 type;
+	__s32 status;
+	/* currently trailer is used only by readdir */
+	__s64 trailer_size;
+	char *trailer_buf;
+
+	union {
+		struct orangefs_io_response io;
+		struct orangefs_lookup_response lookup;
+		struct orangefs_create_response create;
+		struct orangefs_symlink_response sym;
+		struct orangefs_getattr_response getattr;
+		struct orangefs_mkdir_response mkdir;
+		struct orangefs_statfs_response statfs;
+		struct orangefs_fs_mount_response fs_mount;
+		struct orangefs_getxattr_response getxattr;
+		struct orangefs_listxattr_response listxattr;
+		struct orangefs_param_response param;
+		struct orangefs_perf_count_response perf_count;
+		struct orangefs_fs_key_response fs_key;
+	} resp;
+};
+
+struct orangefs_readdir_response_s {
+	__u64 token;
+	__u64 directory_version;
+	__u32 __pad2;
+	__u32 orangefs_dirent_outcount;
+	struct orangefs_dirent *dirent_array;
+};
+
+#endif /* __DOWNCALL_H */

diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
new file mode 100644
index 0000000..ae92795
--- /dev/null
+++ b/fs/orangefs/file.c

@@ -0,0 +1,717 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ *  Linux VFS file operations.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-bufmap.h"
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+
+/*
+ * Copy to client-core's address space from the buffers specified
+ * by the iovec upto total_size bytes.
+ * NOTE: the iovector can either contain addresses which
+ *       can futher be kernel-space or user-space addresses.
+ *       or it can pointers to struct page's
+ */
+static int precopy_buffers(int buffer_index,
+			   struct iov_iter *iter,
+			   size_t total_size)
+{
+	int ret = 0;
+	/*
+	 * copy data from application/kernel by pulling it out
+	 * of the iovec.
+	 */
+
+
+	if (total_size) {
+		ret = orangefs_bufmap_copy_from_iovec(iter,
+						      buffer_index,
+						      total_size);
+		if (ret < 0)
+		gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
+			   __func__,
+			   (long)ret);
+	}
+
+	if (ret < 0)
+		gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
+			__func__,
+			(long)ret);
+	return ret;
+}
+
+/*
+ * Copy from client-core's address space to the buffers specified
+ * by the iovec upto total_size bytes.
+ * NOTE: the iovector can either contain addresses which
+ *       can futher be kernel-space or user-space addresses.
+ *       or it can pointers to struct page's
+ */
+static int postcopy_buffers(int buffer_index,
+			    struct iov_iter *iter,
+			    size_t total_size)
+{
+	int ret = 0;
+	/*
+	 * copy data to application/kernel by pushing it out to
+	 * the iovec. NOTE; target buffers can be addresses or
+	 * struct page pointers.
+	 */
+	if (total_size) {
+		ret = orangefs_bufmap_copy_to_iovec(iter,
+						    buffer_index,
+						    total_size);
+		if (ret < 0)
+			gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n",
+				__func__,
+				(long)ret);
+	}
+	return ret;
+}
+
+/*
+ * Post and wait for the I/O upcall to finish
+ */
+static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode,
+		loff_t *offset, struct iov_iter *iter,
+		size_t total_size, loff_t readahead_size)
+{
+	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+	struct orangefs_khandle *handle = &orangefs_inode->refn.khandle;
+	struct orangefs_kernel_op_s *new_op = NULL;
+	struct iov_iter saved = *iter;
+	int buffer_index = -1;
+	ssize_t ret;
+
+	new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO);
+	if (!new_op)
+		return -ENOMEM;
+
+	/* synchronous I/O */
+	new_op->upcall.req.io.readahead_size = readahead_size;
+	new_op->upcall.req.io.io_type = type;
+	new_op->upcall.req.io.refn = orangefs_inode->refn;
+
+populate_shared_memory:
+	/* get a shared buffer index */
+	buffer_index = orangefs_bufmap_get();
+	if (buffer_index < 0) {
+		ret = buffer_index;
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "%s: orangefs_bufmap_get failure (%zd)\n",
+			     __func__, ret);
+		goto out;
+	}
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "%s(%pU): GET op %p -> buffer_index %d\n",
+		     __func__,
+		     handle,
+		     new_op,
+		     buffer_index);
+
+	new_op->uses_shared_memory = 1;
+	new_op->upcall.req.io.buf_index = buffer_index;
+	new_op->upcall.req.io.count = total_size;
+	new_op->upcall.req.io.offset = *offset;
+
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "%s(%pU): offset: %llu total_size: %zd\n",
+		     __func__,
+		     handle,
+		     llu(*offset),
+		     total_size);
+	/*
+	 * Stage 1: copy the buffers into client-core's address space
+	 * precopy_buffers only pertains to writes.
+	 */
+	if (type == ORANGEFS_IO_WRITE) {
+		ret = precopy_buffers(buffer_index,
+				      iter,
+				      total_size);
+		if (ret < 0)
+			goto out;
+	}
+
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "%s(%pU): Calling post_io_request with tag (%llu)\n",
+		     __func__,
+		     handle,
+		     llu(new_op->tag));
+
+	/* Stage 2: Service the I/O operation */
+	ret = service_operation(new_op,
+				type == ORANGEFS_IO_WRITE ?
+					"file_write" :
+					"file_read",
+				get_interruptible_flag(inode));
+
+	/*
+	 * If service_operation() returns -EAGAIN #and# the operation was
+	 * purged from orangefs_request_list or htable_ops_in_progress, then
+	 * we know that the client was restarted, causing the shared memory
+	 * area to be wiped clean.  To restart a  write operation in this
+	 * case, we must re-copy the data from the user's iovec to a NEW
+	 * shared memory location. To restart a read operation, we must get
+	 * a new shared memory location.
+	 */
+	if (ret == -EAGAIN && op_state_purged(new_op)) {
+		orangefs_bufmap_put(buffer_index);
+		buffer_index = -1;
+		if (type == ORANGEFS_IO_WRITE)
+			*iter = saved;
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "%s:going to repopulate_shared_memory.\n",
+			     __func__);
+		goto populate_shared_memory;
+	}
+
+	if (ret < 0) {
+		if (ret == -EINTR) {
+			/*
+			 * We can't return EINTR if any data was written,
+			 * it's not POSIX. It is minimally acceptable
+			 * to give a partial write, the way NFS does.
+			 *
+			 * It would be optimal to return all or nothing,
+			 * but if a userspace write is bigger than
+			 * an IO buffer, and the interrupt occurs
+			 * between buffer writes, that would not be
+			 * possible.
+			 */
+			switch (new_op->op_state - OP_VFS_STATE_GIVEN_UP) {
+			/*
+			 * If the op was waiting when the interrupt
+			 * occurred, then the client-core did not
+			 * trigger the write.
+			 */
+			case OP_VFS_STATE_WAITING:
+				if (*offset == 0)
+					ret = -EINTR;
+				else
+					ret = 0;
+				break;
+			/* 
+			 * If the op was in progress when the interrupt
+			 * occurred, then the client-core was able to
+			 * trigger the write.
+			 */
+			case OP_VFS_STATE_INPROGR:
+				ret = total_size;
+				break;
+			default:
+				gossip_err("%s: unexpected op state :%d:.\n",
+					   __func__,
+					   new_op->op_state);
+				ret = 0;
+				break;
+			}
+			gossip_debug(GOSSIP_FILE_DEBUG,
+				     "%s: got EINTR, state:%d: %p\n",
+				     __func__,
+				     new_op->op_state,
+				     new_op);
+		} else {
+			gossip_err("%s: error in %s handle %pU, returning %zd\n",
+				__func__,
+				type == ORANGEFS_IO_READ ?
+					"read from" : "write to",
+				handle, ret);
+		}
+		if (orangefs_cancel_op_in_progress(new_op))
+			return ret;
+
+		goto out;
+	}
+
+	/*
+	 * Stage 3: Post copy buffers from client-core's address space
+	 * postcopy_buffers only pertains to reads.
+	 */
+	if (type == ORANGEFS_IO_READ) {
+		ret = postcopy_buffers(buffer_index,
+				       iter,
+				       new_op->downcall.resp.io.amt_complete);
+		if (ret < 0)
+			goto out;
+	}
+	gossip_debug(GOSSIP_FILE_DEBUG,
+	    "%s(%pU): Amount %s, returned by the sys-io call:%d\n",
+	    __func__,
+	    handle,
+	    type == ORANGEFS_IO_READ ?  "read" : "written",
+	    (int)new_op->downcall.resp.io.amt_complete);
+
+	ret = new_op->downcall.resp.io.amt_complete;
+
+out:
+	if (buffer_index >= 0) {
+		orangefs_bufmap_put(buffer_index);
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "%s(%pU): PUT buffer_index %d\n",
+			     __func__, handle, buffer_index);
+		buffer_index = -1;
+	}
+	op_release(new_op);
+	return ret;
+}
+
+/*
+ * Common entry point for read/write/readv/writev
+ * This function will dispatch it to either the direct I/O
+ * or buffered I/O path depending on the mount options and/or
+ * augmented/extended metadata attached to the file.
+ * Note: File extended attributes override any mount options.
+ */
+static ssize_t do_readv_writev(enum ORANGEFS_io_type type, struct file *file,
+		loff_t *offset, struct iov_iter *iter)
+{
+	struct inode *inode = file->f_mapping->host;
+	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+	struct orangefs_khandle *handle = &orangefs_inode->refn.khandle;
+	size_t count = iov_iter_count(iter);
+	ssize_t total_count = 0;
+	ssize_t ret = -EINVAL;
+
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		"%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n",
+		__func__,
+		handle,
+		(int)count);
+
+	if (type == ORANGEFS_IO_WRITE) {
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "%s(%pU): proceeding with offset : %llu, "
+			     "size %d\n",
+			     __func__,
+			     handle,
+			     llu(*offset),
+			     (int)count);
+	}
+
+	if (count == 0) {
+		ret = 0;
+		goto out;
+	}
+
+	while (iov_iter_count(iter)) {
+		size_t each_count = iov_iter_count(iter);
+		size_t amt_complete;
+
+		/* how much to transfer in this loop iteration */
+		if (each_count > orangefs_bufmap_size_query())
+			each_count = orangefs_bufmap_size_query();
+
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "%s(%pU): size of each_count(%d)\n",
+			     __func__,
+			     handle,
+			     (int)each_count);
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "%s(%pU): BEFORE wait_for_io: offset is %d\n",
+			     __func__,
+			     handle,
+			     (int)*offset);
+
+		ret = wait_for_direct_io(type, inode, offset, iter,
+				each_count, 0);
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "%s(%pU): return from wait_for_io:%d\n",
+			     __func__,
+			     handle,
+			     (int)ret);
+
+		if (ret < 0)
+			goto out;
+
+		*offset += ret;
+		total_count += ret;
+		amt_complete = ret;
+
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "%s(%pU): AFTER wait_for_io: offset is %d\n",
+			     __func__,
+			     handle,
+			     (int)*offset);
+
+		/*
+		 * if we got a short I/O operations,
+		 * fall out and return what we got so far
+		 */
+		if (amt_complete < each_count)
+			break;
+	} /*end while */
+
+out:
+	if (total_count > 0)
+		ret = total_count;
+	if (ret > 0) {
+		if (type == ORANGEFS_IO_READ) {
+			file_accessed(file);
+		} else {
+			SetMtimeFlag(orangefs_inode);
+			inode->i_mtime = CURRENT_TIME;
+			mark_inode_dirty_sync(inode);
+		}
+	}
+
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "%s(%pU): Value(%d) returned.\n",
+		     __func__,
+		     handle,
+		     (int)ret);
+
+	return ret;
+}
+
+/*
+ * Read data from a specified offset in a file (referenced by inode).
+ * Data may be placed either in a user or kernel buffer.
+ */
+ssize_t orangefs_inode_read(struct inode *inode,
+			    struct iov_iter *iter,
+			    loff_t *offset,
+			    loff_t readahead_size)
+{
+	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+	size_t count = iov_iter_count(iter);
+	size_t bufmap_size;
+	ssize_t ret = -EINVAL;
+
+	g_orangefs_stats.reads++;
+
+	bufmap_size = orangefs_bufmap_size_query();
+	if (count > bufmap_size) {
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "%s: count is too large (%zd/%zd)!\n",
+			     __func__, count, bufmap_size);
+		return -EINVAL;
+	}
+
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "%s(%pU) %zd@%llu\n",
+		     __func__,
+		     &orangefs_inode->refn.khandle,
+		     count,
+		     llu(*offset));
+
+	ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, offset, iter,
+			count, readahead_size);
+	if (ret > 0)
+		*offset += ret;
+
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "%s(%pU): Value(%zd) returned.\n",
+		     __func__,
+		     &orangefs_inode->refn.khandle,
+		     ret);
+
+	return ret;
+}
+
+static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+	struct file *file = iocb->ki_filp;
+	loff_t pos = *(&iocb->ki_pos);
+	ssize_t rc = 0;
+
+	BUG_ON(iocb->private);
+
+	gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_read_iter\n");
+
+	g_orangefs_stats.reads++;
+
+	rc = do_readv_writev(ORANGEFS_IO_READ, file, &pos, iter);
+	iocb->ki_pos = pos;
+
+	return rc;
+}
+
+static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+	struct file *file = iocb->ki_filp;
+	loff_t pos;
+	ssize_t rc;
+
+	BUG_ON(iocb->private);
+
+	gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_write_iter\n");
+
+	mutex_lock(&file->f_mapping->host->i_mutex);
+
+	/* Make sure generic_write_checks sees an up to date inode size. */
+	if (file->f_flags & O_APPEND) {
+		rc = orangefs_inode_getattr(file->f_mapping->host, 0, 1);
+		if (rc == -ESTALE)
+			rc = -EIO;
+		if (rc) {
+			gossip_err("%s: orangefs_inode_getattr failed, "
+			    "rc:%zd:.\n", __func__, rc);
+			goto out;
+		}
+	}
+
+	if (file->f_pos > i_size_read(file->f_mapping->host))
+		orangefs_i_size_write(file->f_mapping->host, file->f_pos);
+
+	rc = generic_write_checks(iocb, iter);
+
+	if (rc <= 0) {
+		gossip_err("%s: generic_write_checks failed, rc:%zd:.\n",
+			   __func__, rc);
+		goto out;
+	}
+
+	/*
+	 * if we are appending, generic_write_checks would have updated
+	 * pos to the end of the file, so we will wait till now to set
+	 * pos...
+	 */
+	pos = *(&iocb->ki_pos);
+
+	rc = do_readv_writev(ORANGEFS_IO_WRITE,
+			     file,
+			     &pos,
+			     iter);
+	if (rc < 0) {
+		gossip_err("%s: do_readv_writev failed, rc:%zd:.\n",
+			   __func__, rc);
+		goto out;
+	}
+
+	iocb->ki_pos = pos;
+	g_orangefs_stats.writes++;
+
+out:
+
+	mutex_unlock(&file->f_mapping->host->i_mutex);
+	return rc;
+}
+
+/*
+ * Perform a miscellaneous operation on a file.
+ */
+static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int ret = -ENOTTY;
+	__u64 val = 0;
+	unsigned long uval;
+
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "orangefs_ioctl: called with cmd %d\n",
+		     cmd);
+
+	/*
+	 * we understand some general ioctls on files, such as the immutable
+	 * and append flags
+	 */
+	if (cmd == FS_IOC_GETFLAGS) {
+		val = 0;
+		ret = orangefs_inode_getxattr(file_inode(file),
+					      ORANGEFS_XATTR_NAME_DEFAULT_PREFIX,
+					      "user.pvfs2.meta_hint",
+					      &val, sizeof(val));
+		if (ret < 0 && ret != -ENODATA)
+			return ret;
+		else if (ret == -ENODATA)
+			val = 0;
+		uval = val;
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "orangefs_ioctl: FS_IOC_GETFLAGS: %llu\n",
+			     (unsigned long long)uval);
+		return put_user(uval, (int __user *)arg);
+	} else if (cmd == FS_IOC_SETFLAGS) {
+		ret = 0;
+		if (get_user(uval, (int __user *)arg))
+			return -EFAULT;
+		/*
+		 * ORANGEFS_MIRROR_FL is set internally when the mirroring mode
+		 * is turned on for a file. The user is not allowed to turn
+		 * on this bit, but the bit is present if the user first gets
+		 * the flags and then updates the flags with some new
+		 * settings. So, we ignore it in the following edit. bligon.
+		 */
+		if ((uval & ~ORANGEFS_MIRROR_FL) &
+		    (~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NOATIME_FL))) {
+			gossip_err("orangefs_ioctl: the FS_IOC_SETFLAGS only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n");
+			return -EINVAL;
+		}
+		val = uval;
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "orangefs_ioctl: FS_IOC_SETFLAGS: %llu\n",
+			     (unsigned long long)val);
+		ret = orangefs_inode_setxattr(file_inode(file),
+					      ORANGEFS_XATTR_NAME_DEFAULT_PREFIX,
+					      "user.pvfs2.meta_hint",
+					      &val, sizeof(val), 0);
+	}
+
+	return ret;
+}
+
+/*
+ * Memory map a region of a file.
+ */
+static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "orangefs_file_mmap: called on %s\n",
+		     (file ?
+			(char *)file->f_path.dentry->d_name.name :
+			(char *)"Unknown"));
+
+	/* set the sequential readahead hint */
+	vma->vm_flags |= VM_SEQ_READ;
+	vma->vm_flags &= ~VM_RAND_READ;
+
+	/* Use readonly mmap since we cannot support writable maps. */
+	return generic_file_readonly_mmap(file, vma);
+}
+
+#define mapping_nrpages(idata) ((idata)->nrpages)
+
+/*
+ * Called to notify the module that there are no more references to
+ * this file (i.e. no processes have it open).
+ *
+ * \note Not called when each file is closed.
+ */
+static int orangefs_file_release(struct inode *inode, struct file *file)
+{
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "orangefs_file_release: called on %s\n",
+		     file->f_path.dentry->d_name.name);
+
+	orangefs_flush_inode(inode);
+
+	/*
+	 * remove all associated inode pages from the page cache and mmap
+	 * readahead cache (if any); this forces an expensive refresh of
+	 * data for the next caller of mmap (or 'get_block' accesses)
+	 */
+	if (file->f_path.dentry->d_inode &&
+	    file->f_path.dentry->d_inode->i_mapping &&
+	    mapping_nrpages(&file->f_path.dentry->d_inode->i_data))
+		truncate_inode_pages(file->f_path.dentry->d_inode->i_mapping,
+				     0);
+	return 0;
+}
+
+/*
+ * Push all data for a specific file onto permanent storage.
+ */
+static int orangefs_fsync(struct file *file,
+		       loff_t start,
+		       loff_t end,
+		       int datasync)
+{
+	int ret = -EINVAL;
+	struct orangefs_inode_s *orangefs_inode =
+		ORANGEFS_I(file->f_path.dentry->d_inode);
+	struct orangefs_kernel_op_s *new_op = NULL;
+
+	/* required call */
+	filemap_write_and_wait_range(file->f_mapping, start, end);
+
+	new_op = op_alloc(ORANGEFS_VFS_OP_FSYNC);
+	if (!new_op)
+		return -ENOMEM;
+	new_op->upcall.req.fsync.refn = orangefs_inode->refn;
+
+	ret = service_operation(new_op,
+			"orangefs_fsync",
+			get_interruptible_flag(file->f_path.dentry->d_inode));
+
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "orangefs_fsync got return value of %d\n",
+		     ret);
+
+	op_release(new_op);
+
+	orangefs_flush_inode(file->f_path.dentry->d_inode);
+	return ret;
+}
+
+/*
+ * Change the file pointer position for an instance of an open file.
+ *
+ * \note If .llseek is overriden, we must acquire lock as described in
+ *       Documentation/filesystems/Locking.
+ *
+ * Future upgrade could support SEEK_DATA and SEEK_HOLE but would
+ * require much changes to the FS
+ */
+static loff_t orangefs_file_llseek(struct file *file, loff_t offset, int origin)
+{
+	int ret = -EINVAL;
+	struct inode *inode = file_inode(file);
+
+	if (origin == SEEK_END) {
+		/*
+		 * revalidate the inode's file size.
+		 * NOTE: We are only interested in file size here,
+		 * so we set mask accordingly.
+		 */
+		ret = orangefs_inode_getattr(file->f_mapping->host, 0, 1);
+		if (ret == -ESTALE)
+			ret = -EIO;
+		if (ret) {
+			gossip_debug(GOSSIP_FILE_DEBUG,
+				     "%s:%s:%d calling make bad inode\n",
+				     __FILE__,
+				     __func__,
+				     __LINE__);
+			return ret;
+		}
+	}
+
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "orangefs_file_llseek: offset is %ld | origin is %d"
+		     " | inode size is %lu\n",
+		     (long)offset,
+		     origin,
+		     (unsigned long)i_size_read(inode));
+
+	return generic_file_llseek(file, offset, origin);
+}
+
+/*
+ * Support local locks (locks that only this kernel knows about)
+ * if Orangefs was mounted -o local_lock.
+ */
+static int orangefs_lock(struct file *filp, int cmd, struct file_lock *fl)
+{
+	int rc = -EINVAL;
+
+	if (ORANGEFS_SB(filp->f_inode->i_sb)->flags & ORANGEFS_OPT_LOCAL_LOCK) {
+		if (cmd == F_GETLK) {
+			rc = 0;
+			posix_test_lock(filp, fl);
+		} else {
+			rc = posix_lock_file(filp, fl, NULL);
+		}
+	}
+
+	return rc;
+}
+
+/** ORANGEFS implementation of VFS file operations */
+const struct file_operations orangefs_file_operations = {
+	.llseek		= orangefs_file_llseek,
+	.read_iter	= orangefs_file_read_iter,
+	.write_iter	= orangefs_file_write_iter,
+	.lock		= orangefs_lock,
+	.unlocked_ioctl	= orangefs_ioctl,
+	.mmap		= orangefs_file_mmap,
+	.open		= generic_file_open,
+	.release	= orangefs_file_release,
+	.fsync		= orangefs_fsync,
+};

diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
new file mode 100644
index 0000000..0166faa
--- /dev/null
+++ b/fs/orangefs/inode.c

@@ -0,0 +1,475 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ *  Linux VFS inode operations.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-bufmap.h"
+
+static int read_one_page(struct page *page)
+{
+	int ret;
+	int max_block;
+	ssize_t bytes_read = 0;
+	struct inode *inode = page->mapping->host;
+	const __u32 blocksize = PAGE_SIZE;	/* inode->i_blksize */
+	const __u32 blockbits = PAGE_SHIFT;	/* inode->i_blkbits */
+	struct iov_iter to;
+	struct bio_vec bv = {.bv_page = page, .bv_len = PAGE_SIZE};
+
+	iov_iter_bvec(&to, ITER_BVEC | READ, &bv, 1, PAGE_SIZE);
+
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		    "orangefs_readpage called with page %p\n",
+		     page);
+
+	max_block = ((inode->i_size / blocksize) + 1);
+
+	if (page->index < max_block) {
+		loff_t blockptr_offset = (((loff_t) page->index) << blockbits);
+
+		bytes_read = orangefs_inode_read(inode,
+						 &to,
+						 &blockptr_offset,
+						 inode->i_size);
+	}
+	/* this will only zero remaining unread portions of the page data */
+	iov_iter_zero(~0U, &to);
+	/* takes care of potential aliasing */
+	flush_dcache_page(page);
+	if (bytes_read < 0) {
+		ret = bytes_read;
+		SetPageError(page);
+	} else {
+		SetPageUptodate(page);
+		if (PageError(page))
+			ClearPageError(page);
+		ret = 0;
+	}
+	/* unlock the page after the ->readpage() routine completes */
+	unlock_page(page);
+	return ret;
+}
+
+static int orangefs_readpage(struct file *file, struct page *page)
+{
+	return read_one_page(page);
+}
+
+static int orangefs_readpages(struct file *file,
+			   struct address_space *mapping,
+			   struct list_head *pages,
+			   unsigned nr_pages)
+{
+	int page_idx;
+	int ret;
+
+	gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_readpages called\n");
+
+	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
+		struct page *page;
+
+		page = list_entry(pages->prev, struct page, lru);
+		list_del(&page->lru);
+		if (!add_to_page_cache(page,
+				       mapping,
+				       page->index,
+				       GFP_KERNEL)) {
+			ret = read_one_page(page);
+			gossip_debug(GOSSIP_INODE_DEBUG,
+				"failure adding page to cache, read_one_page returned: %d\n",
+				ret);
+	      } else {
+			put_page(page);
+	      }
+	}
+	BUG_ON(!list_empty(pages));
+	return 0;
+}
+
+static void orangefs_invalidatepage(struct page *page,
+				 unsigned int offset,
+				 unsigned int length)
+{
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "orangefs_invalidatepage called on page %p "
+		     "(offset is %u)\n",
+		     page,
+		     offset);
+
+	ClearPageUptodate(page);
+	ClearPageMappedToDisk(page);
+	return;
+
+}
+
+static int orangefs_releasepage(struct page *page, gfp_t foo)
+{
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "orangefs_releasepage called on page %p\n",
+		     page);
+	return 0;
+}
+
+/*
+ * Having a direct_IO entry point in the address_space_operations
+ * struct causes the kernel to allows us to use O_DIRECT on
+ * open. Nothing will ever call this thing, but in the future we
+ * will need to be able to use O_DIRECT on open in order to support
+ * AIO. Modeled after NFS, they do this too.
+ */
+/*
+ * static ssize_t orangefs_direct_IO(int rw,
+ *			struct kiocb *iocb,
+ *			struct iov_iter *iter,
+ *			loff_t offset)
+ *{
+ *	gossip_debug(GOSSIP_INODE_DEBUG,
+ *		     "orangefs_direct_IO: %s\n",
+ *		     iocb->ki_filp->f_path.dentry->d_name.name);
+ *
+ *	return -EINVAL;
+ *}
+ */
+
+struct backing_dev_info orangefs_backing_dev_info = {
+	.name = "orangefs",
+	.ra_pages = 0,
+	.capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+};
+
+/** ORANGEFS2 implementation of address space operations */
+const struct address_space_operations orangefs_address_operations = {
+	.readpage = orangefs_readpage,
+	.readpages = orangefs_readpages,
+	.invalidatepage = orangefs_invalidatepage,
+	.releasepage = orangefs_releasepage,
+/*	.direct_IO = orangefs_direct_IO */
+};
+
+static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr)
+{
+	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+	struct orangefs_kernel_op_s *new_op;
+	loff_t orig_size;
+	int ret = -EINVAL;
+
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "%s: %pU: Handle is %pU | fs_id %d | size is %llu\n",
+		     __func__,
+		     get_khandle_from_ino(inode),
+		     &orangefs_inode->refn.khandle,
+		     orangefs_inode->refn.fs_id,
+		     iattr->ia_size);
+
+	/* Ensure that we have a up to date size, so we know if it changed. */
+	ret = orangefs_inode_getattr(inode, 0, 1);
+	if (ret == -ESTALE)
+		ret = -EIO;
+	if (ret) {
+		gossip_err("%s: orangefs_inode_getattr failed, ret:%d:.\n",
+		    __func__, ret);
+		return ret;
+	}
+	orig_size = i_size_read(inode);
+
+	truncate_setsize(inode, iattr->ia_size);
+
+	new_op = op_alloc(ORANGEFS_VFS_OP_TRUNCATE);
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->upcall.req.truncate.refn = orangefs_inode->refn;
+	new_op->upcall.req.truncate.size = (__s64) iattr->ia_size;
+
+	ret = service_operation(new_op, __func__,
+				get_interruptible_flag(inode));
+
+	/*
+	 * the truncate has no downcall members to retrieve, but
+	 * the status value tells us if it went through ok or not
+	 */
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "orangefs: orangefs_truncate got return value of %d\n",
+		     ret);
+
+	op_release(new_op);
+
+	if (ret != 0)
+		return ret;
+
+	/*
+	 * Only change the c/mtime if we are changing the size or we are
+	 * explicitly asked to change it.  This handles the semantic difference
+	 * between truncate() and ftruncate() as implemented in the VFS.
+	 *
+	 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
+	 * special case where we need to update the times despite not having
+	 * these flags set.  For all other operations the VFS set these flags
+	 * explicitly if it wants a timestamp update.
+	 */
+	if (orig_size != i_size_read(inode) &&
+	    !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
+		iattr->ia_ctime = iattr->ia_mtime =
+			current_fs_time(inode->i_sb);
+		iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
+	}
+
+	return ret;
+}
+
+/*
+ * Change attributes of an object referenced by dentry.
+ */
+int orangefs_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+	int ret = -EINVAL;
+	struct inode *inode = dentry->d_inode;
+
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "orangefs_setattr: called on %s\n",
+		     dentry->d_name.name);
+
+	ret = inode_change_ok(inode, iattr);
+	if (ret)
+		goto out;
+
+	if ((iattr->ia_valid & ATTR_SIZE) &&
+	    iattr->ia_size != i_size_read(inode)) {
+		ret = orangefs_setattr_size(inode, iattr);
+		if (ret)
+			goto out;
+	}
+
+	setattr_copy(inode, iattr);
+	mark_inode_dirty(inode);
+
+	ret = orangefs_inode_setattr(inode, iattr);
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "orangefs_setattr: inode_setattr returned %d\n",
+		     ret);
+
+	if (!ret && (iattr->ia_valid & ATTR_MODE))
+		/* change mod on a file that has ACLs */
+		ret = posix_acl_chmod(inode, inode->i_mode);
+
+out:
+	gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_setattr: returning %d\n", ret);
+	return ret;
+}
+
+/*
+ * Obtain attributes of an object given a dentry
+ */
+int orangefs_getattr(struct vfsmount *mnt,
+		  struct dentry *dentry,
+		  struct kstat *kstat)
+{
+	int ret = -ENOENT;
+	struct inode *inode = dentry->d_inode;
+	struct orangefs_inode_s *orangefs_inode = NULL;
+
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "orangefs_getattr: called on %s\n",
+		     dentry->d_name.name);
+
+	ret = orangefs_inode_getattr(inode, 0, 1);
+	if (ret == 0) {
+		generic_fillattr(inode, kstat);
+
+		/* override block size reported to stat */
+		orangefs_inode = ORANGEFS_I(inode);
+		kstat->blksize = orangefs_inode->blksize;
+	}
+	return ret;
+}
+
+int orangefs_permission(struct inode *inode, int mask)
+{
+	int ret;
+
+	if (mask & MAY_NOT_BLOCK)
+		return -ECHILD;
+
+	gossip_debug(GOSSIP_INODE_DEBUG, "%s: refreshing\n", __func__);
+
+	/* Make sure the permission (and other common attrs) are up to date. */
+	ret = orangefs_inode_getattr(inode, 0, 0);
+	if (ret < 0)
+		return ret;
+
+	return generic_permission(inode, mask);
+}
+
+/* ORANGEDS2 implementation of VFS inode operations for files */
+struct inode_operations orangefs_file_inode_operations = {
+	.get_acl = orangefs_get_acl,
+	.set_acl = orangefs_set_acl,
+	.setattr = orangefs_setattr,
+	.getattr = orangefs_getattr,
+	.setxattr = generic_setxattr,
+	.getxattr = generic_getxattr,
+	.listxattr = orangefs_listxattr,
+	.removexattr = generic_removexattr,
+	.permission = orangefs_permission,
+};
+
+static int orangefs_init_iops(struct inode *inode)
+{
+	inode->i_mapping->a_ops = &orangefs_address_operations;
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFREG:
+		inode->i_op = &orangefs_file_inode_operations;
+		inode->i_fop = &orangefs_file_operations;
+		inode->i_blkbits = PAGE_SHIFT;
+		break;
+	case S_IFLNK:
+		inode->i_op = &orangefs_symlink_inode_operations;
+		break;
+	case S_IFDIR:
+		inode->i_op = &orangefs_dir_inode_operations;
+		inode->i_fop = &orangefs_dir_operations;
+		break;
+	default:
+		gossip_debug(GOSSIP_INODE_DEBUG,
+			     "%s: unsupported mode\n",
+			     __func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Given a ORANGEFS object identifier (fsid, handle), convert it into a ino_t type
+ * that will be used as a hash-index from where the handle will
+ * be searched for in the VFS hash table of inodes.
+ */
+static inline ino_t orangefs_handle_hash(struct orangefs_object_kref *ref)
+{
+	if (!ref)
+		return 0;
+	return orangefs_khandle_to_ino(&(ref->khandle));
+}
+
+/*
+ * Called to set up an inode from iget5_locked.
+ */
+static int orangefs_set_inode(struct inode *inode, void *data)
+{
+	struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data;
+	ORANGEFS_I(inode)->refn.fs_id = ref->fs_id;
+	ORANGEFS_I(inode)->refn.khandle = ref->khandle;
+	return 0;
+}
+
+/*
+ * Called to determine if handles match.
+ */
+static int orangefs_test_inode(struct inode *inode, void *data)
+{
+	struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data;
+	struct orangefs_inode_s *orangefs_inode = NULL;
+
+	orangefs_inode = ORANGEFS_I(inode);
+	return (!ORANGEFS_khandle_cmp(&(orangefs_inode->refn.khandle), &(ref->khandle))
+		&& orangefs_inode->refn.fs_id == ref->fs_id);
+}
+
+/*
+ * Front-end to lookup the inode-cache maintained by the VFS using the ORANGEFS
+ * file handle.
+ *
+ * @sb: the file system super block instance.
+ * @ref: The ORANGEFS object for which we are trying to locate an inode structure.
+ */
+struct inode *orangefs_iget(struct super_block *sb, struct orangefs_object_kref *ref)
+{
+	struct inode *inode = NULL;
+	unsigned long hash;
+	int error;
+
+	hash = orangefs_handle_hash(ref);
+	inode = iget5_locked(sb, hash, orangefs_test_inode, orangefs_set_inode, ref);
+	if (!inode || !(inode->i_state & I_NEW))
+		return inode;
+
+	error = orangefs_inode_getattr(inode, 1, 0);
+	if (error) {
+		iget_failed(inode);
+		return ERR_PTR(error);
+	}
+
+	inode->i_ino = hash;	/* needed for stat etc */
+	orangefs_init_iops(inode);
+	unlock_new_inode(inode);
+
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "iget handle %pU, fsid %d hash %ld i_ino %lu\n",
+		     &ref->khandle,
+		     ref->fs_id,
+		     hash,
+		     inode->i_ino);
+
+	return inode;
+}
+
+/*
+ * Allocate an inode for a newly created file and insert it into the inode hash.
+ */
+struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir,
+		int mode, dev_t dev, struct orangefs_object_kref *ref)
+{
+	unsigned long hash = orangefs_handle_hash(ref);
+	struct inode *inode;
+	int error;
+
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "%s:(sb is %p | MAJOR(dev)=%u | MINOR(dev)=%u mode=%o)\n",
+		     __func__,
+		     sb,
+		     MAJOR(dev),
+		     MINOR(dev),
+		     mode);
+
+	inode = new_inode(sb);
+	if (!inode)
+		return NULL;
+
+	orangefs_set_inode(inode, ref);
+	inode->i_ino = hash;	/* needed for stat etc */
+
+	error = orangefs_inode_getattr(inode, 1, 0);
+	if (error)
+		goto out_iput;
+
+	orangefs_init_iops(inode);
+
+	inode->i_mode = mode;
+	inode->i_uid = current_fsuid();
+	inode->i_gid = current_fsgid();
+	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	inode->i_size = PAGE_SIZE;
+	inode->i_rdev = dev;
+
+	error = insert_inode_locked4(inode, hash, orangefs_test_inode, ref);
+	if (error < 0)
+		goto out_iput;
+
+	gossip_debug(GOSSIP_INODE_DEBUG,
+		     "Initializing ACL's for inode %pU\n",
+		     get_khandle_from_ino(inode));
+	orangefs_init_acl(inode, dir);
+	return inode;
+
+out_iput:
+	iput(inode);
+	return ERR_PTR(error);
+}

diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
new file mode 100644
index 0000000..5a60c50
--- /dev/null
+++ b/fs/orangefs/namei.c

@@ -0,0 +1,462 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ *  Linux VFS namei operations.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+
+/*
+ * Get a newly allocated inode to go with a negative dentry.
+ */
+static int orangefs_create(struct inode *dir,
+			struct dentry *dentry,
+			umode_t mode,
+			bool exclusive)
+{
+	struct orangefs_inode_s *parent = ORANGEFS_I(dir);
+	struct orangefs_kernel_op_s *new_op;
+	struct inode *inode;
+	int ret;
+
+	gossip_debug(GOSSIP_NAME_DEBUG, "%s: %s\n",
+		     __func__,
+		     dentry->d_name.name);
+
+	new_op = op_alloc(ORANGEFS_VFS_OP_CREATE);
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->upcall.req.create.parent_refn = parent->refn;
+
+	fill_default_sys_attrs(new_op->upcall.req.create.attributes,
+			       ORANGEFS_TYPE_METAFILE, mode);
+
+	strncpy(new_op->upcall.req.create.d_name,
+		dentry->d_name.name, ORANGEFS_NAME_MAX);
+
+	ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "%s: %s: handle:%pU: fsid:%d: new_op:%p: ret:%d:\n",
+		     __func__,
+		     dentry->d_name.name,
+		     &new_op->downcall.resp.create.refn.khandle,
+		     new_op->downcall.resp.create.refn.fs_id,
+		     new_op,
+		     ret);
+
+	if (ret < 0)
+		goto out;
+
+	inode = orangefs_new_inode(dir->i_sb, dir, S_IFREG | mode, 0,
+				&new_op->downcall.resp.create.refn);
+	if (IS_ERR(inode)) {
+		gossip_err("%s: Failed to allocate inode for file :%s:\n",
+			   __func__,
+			   dentry->d_name.name);
+		ret = PTR_ERR(inode);
+		goto out;
+	}
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "%s: Assigned inode :%pU: for file :%s:\n",
+		     __func__,
+		     get_khandle_from_ino(inode),
+		     dentry->d_name.name);
+
+	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "%s: dentry instantiated for %s\n",
+		     __func__,
+		     dentry->d_name.name);
+
+	SetMtimeFlag(parent);
+	dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
+	mark_inode_dirty_sync(dir);
+	ret = 0;
+out:
+	op_release(new_op);
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "%s: %s: returning %d\n",
+		     __func__,
+		     dentry->d_name.name,
+		     ret);
+	return ret;
+}
+
+/*
+ * Attempt to resolve an object name (dentry->d_name), parent handle, and
+ * fsid into a handle for the object.
+ */
+static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry,
+				   unsigned int flags)
+{
+	struct orangefs_inode_s *parent = ORANGEFS_I(dir);
+	struct orangefs_kernel_op_s *new_op;
+	struct inode *inode;
+	struct dentry *res;
+	int ret = -EINVAL;
+
+	/*
+	 * in theory we could skip a lookup here (if the intent is to
+	 * create) in order to avoid a potentially failed lookup, but
+	 * leaving it in can skip a valid lookup and try to create a file
+	 * that already exists (e.g. the vfs already handles checking for
+	 * -EEXIST on O_EXCL opens, which is broken if we skip this lookup
+	 * in the create path)
+	 */
+	gossip_debug(GOSSIP_NAME_DEBUG, "%s called on %s\n",
+		     __func__, dentry->d_name.name);
+
+	if (dentry->d_name.len > (ORANGEFS_NAME_MAX - 1))
+		return ERR_PTR(-ENAMETOOLONG);
+
+	new_op = op_alloc(ORANGEFS_VFS_OP_LOOKUP);
+	if (!new_op)
+		return ERR_PTR(-ENOMEM);
+
+	new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW;
+
+	gossip_debug(GOSSIP_NAME_DEBUG, "%s:%s:%d using parent %pU\n",
+		     __FILE__,
+		     __func__,
+		     __LINE__,
+		     &parent->refn.khandle);
+	new_op->upcall.req.lookup.parent_refn = parent->refn;
+
+	strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name,
+		ORANGEFS_NAME_MAX);
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "%s: doing lookup on %s under %pU,%d\n",
+		     __func__,
+		     new_op->upcall.req.lookup.d_name,
+		     &new_op->upcall.req.lookup.parent_refn.khandle,
+		     new_op->upcall.req.lookup.parent_refn.fs_id);
+
+	ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "Lookup Got %pU, fsid %d (ret=%d)\n",
+		     &new_op->downcall.resp.lookup.refn.khandle,
+		     new_op->downcall.resp.lookup.refn.fs_id,
+		     ret);
+
+	if (ret < 0) {
+		if (ret == -ENOENT) {
+			/*
+			 * if no inode was found, add a negative dentry to
+			 * dcache anyway; if we don't, we don't hold expected
+			 * lookup semantics and we most noticeably break
+			 * during directory renames.
+			 *
+			 * however, if the operation failed or exited, do not
+			 * add the dentry (e.g. in the case that a touch is
+			 * issued on a file that already exists that was
+			 * interrupted during this lookup -- no need to add
+			 * another negative dentry for an existing file)
+			 */
+
+			gossip_debug(GOSSIP_NAME_DEBUG,
+				     "orangefs_lookup: Adding *negative* dentry "
+				     "%p for %s\n",
+				     dentry,
+				     dentry->d_name.name);
+
+			d_add(dentry, NULL);
+			res = NULL;
+			goto out;
+		}
+
+		/* must be a non-recoverable error */
+		res = ERR_PTR(ret);
+		goto out;
+	}
+
+	inode = orangefs_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn);
+	if (IS_ERR(inode)) {
+		gossip_debug(GOSSIP_NAME_DEBUG,
+			"error %ld from iget\n", PTR_ERR(inode));
+		res = ERR_CAST(inode);
+		goto out;
+	}
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "%s:%s:%d "
+		     "Found good inode [%lu] with count [%d]\n",
+		     __FILE__,
+		     __func__,
+		     __LINE__,
+		     inode->i_ino,
+		     (int)atomic_read(&inode->i_count));
+
+	/* update dentry/inode pair into dcache */
+	res = d_splice_alias(inode, dentry);
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "Lookup success (inode ct = %d)\n",
+		     (int)atomic_read(&inode->i_count));
+out:
+	op_release(new_op);
+	return res;
+}
+
+/* return 0 on success; non-zero otherwise */
+static int orangefs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+	struct orangefs_inode_s *parent = ORANGEFS_I(dir);
+	struct orangefs_kernel_op_s *new_op;
+	int ret;
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "%s: called on %s\n"
+		     "  (inode %pU): Parent is %pU | fs_id %d\n",
+		     __func__,
+		     dentry->d_name.name,
+		     get_khandle_from_ino(inode),
+		     &parent->refn.khandle,
+		     parent->refn.fs_id);
+
+	new_op = op_alloc(ORANGEFS_VFS_OP_REMOVE);
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->upcall.req.remove.parent_refn = parent->refn;
+	strncpy(new_op->upcall.req.remove.d_name, dentry->d_name.name,
+		ORANGEFS_NAME_MAX);
+
+	ret = service_operation(new_op, "orangefs_unlink",
+				get_interruptible_flag(inode));
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "%s: service_operation returned:%d:\n",
+		     __func__,
+		     ret);
+
+	op_release(new_op);
+
+	if (!ret) {
+		drop_nlink(inode);
+
+		SetMtimeFlag(parent);
+		dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
+		mark_inode_dirty_sync(dir);
+	}
+	return ret;
+}
+
+static int orangefs_symlink(struct inode *dir,
+			 struct dentry *dentry,
+			 const char *symname)
+{
+	struct orangefs_inode_s *parent = ORANGEFS_I(dir);
+	struct orangefs_kernel_op_s *new_op;
+	struct inode *inode;
+	int mode = 755;
+	int ret;
+
+	gossip_debug(GOSSIP_NAME_DEBUG, "%s: called\n", __func__);
+
+	if (!symname)
+		return -EINVAL;
+
+	if (strlen(symname)+1 > ORANGEFS_NAME_MAX)
+		return -ENAMETOOLONG;
+
+	new_op = op_alloc(ORANGEFS_VFS_OP_SYMLINK);
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->upcall.req.sym.parent_refn = parent->refn;
+
+	fill_default_sys_attrs(new_op->upcall.req.sym.attributes,
+			       ORANGEFS_TYPE_SYMLINK,
+			       mode);
+
+	strncpy(new_op->upcall.req.sym.entry_name,
+		dentry->d_name.name,
+		ORANGEFS_NAME_MAX);
+	strncpy(new_op->upcall.req.sym.target, symname, ORANGEFS_NAME_MAX);
+
+	ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "Symlink Got ORANGEFS handle %pU on fsid %d (ret=%d)\n",
+		     &new_op->downcall.resp.sym.refn.khandle,
+		     new_op->downcall.resp.sym.refn.fs_id, ret);
+
+	if (ret < 0) {
+		gossip_debug(GOSSIP_NAME_DEBUG,
+			    "%s: failed with error code %d\n",
+			    __func__, ret);
+		goto out;
+	}
+
+	inode = orangefs_new_inode(dir->i_sb, dir, S_IFLNK | mode, 0,
+				&new_op->downcall.resp.sym.refn);
+	if (IS_ERR(inode)) {
+		gossip_err
+		    ("*** Failed to allocate orangefs symlink inode\n");
+		ret = PTR_ERR(inode);
+		goto out;
+	}
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "Assigned symlink inode new number of %pU\n",
+		     get_khandle_from_ino(inode));
+
+	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "Inode (Symlink) %pU -> %s\n",
+		     get_khandle_from_ino(inode),
+		     dentry->d_name.name);
+
+	SetMtimeFlag(parent);
+	dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
+	mark_inode_dirty_sync(dir);
+	ret = 0;
+out:
+	op_release(new_op);
+	return ret;
+}
+
+static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	struct orangefs_inode_s *parent = ORANGEFS_I(dir);
+	struct orangefs_kernel_op_s *new_op;
+	struct inode *inode;
+	int ret;
+
+	new_op = op_alloc(ORANGEFS_VFS_OP_MKDIR);
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->upcall.req.mkdir.parent_refn = parent->refn;
+
+	fill_default_sys_attrs(new_op->upcall.req.mkdir.attributes,
+			      ORANGEFS_TYPE_DIRECTORY, mode);
+
+	strncpy(new_op->upcall.req.mkdir.d_name,
+		dentry->d_name.name, ORANGEFS_NAME_MAX);
+
+	ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "Mkdir Got ORANGEFS handle %pU on fsid %d\n",
+		     &new_op->downcall.resp.mkdir.refn.khandle,
+		     new_op->downcall.resp.mkdir.refn.fs_id);
+
+	if (ret < 0) {
+		gossip_debug(GOSSIP_NAME_DEBUG,
+			     "%s: failed with error code %d\n",
+			     __func__, ret);
+		goto out;
+	}
+
+	inode = orangefs_new_inode(dir->i_sb, dir, S_IFDIR | mode, 0,
+				&new_op->downcall.resp.mkdir.refn);
+	if (IS_ERR(inode)) {
+		gossip_err("*** Failed to allocate orangefs dir inode\n");
+		ret = PTR_ERR(inode);
+		goto out;
+	}
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "Assigned dir inode new number of %pU\n",
+		     get_khandle_from_ino(inode));
+
+	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "Inode (Directory) %pU -> %s\n",
+		     get_khandle_from_ino(inode),
+		     dentry->d_name.name);
+
+	/*
+	 * NOTE: we have no good way to keep nlink consistent for directories
+	 * across clients; keep constant at 1.
+	 */
+	SetMtimeFlag(parent);
+	dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
+	mark_inode_dirty_sync(dir);
+out:
+	op_release(new_op);
+	return ret;
+}
+
+static int orangefs_rename(struct inode *old_dir,
+			struct dentry *old_dentry,
+			struct inode *new_dir,
+			struct dentry *new_dentry)
+{
+	struct orangefs_kernel_op_s *new_op;
+	int ret;
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "orangefs_rename: called (%s/%s => %s/%s) ct=%d\n",
+		     old_dentry->d_parent->d_name.name,
+		     old_dentry->d_name.name,
+		     new_dentry->d_parent->d_name.name,
+		     new_dentry->d_name.name,
+		     d_count(new_dentry));
+
+	new_op = op_alloc(ORANGEFS_VFS_OP_RENAME);
+	if (!new_op)
+		return -EINVAL;
+
+	new_op->upcall.req.rename.old_parent_refn = ORANGEFS_I(old_dir)->refn;
+	new_op->upcall.req.rename.new_parent_refn = ORANGEFS_I(new_dir)->refn;
+
+	strncpy(new_op->upcall.req.rename.d_old_name,
+		old_dentry->d_name.name,
+		ORANGEFS_NAME_MAX);
+	strncpy(new_op->upcall.req.rename.d_new_name,
+		new_dentry->d_name.name,
+		ORANGEFS_NAME_MAX);
+
+	ret = service_operation(new_op,
+				"orangefs_rename",
+				get_interruptible_flag(old_dentry->d_inode));
+
+	gossip_debug(GOSSIP_NAME_DEBUG,
+		     "orangefs_rename: got downcall status %d\n",
+		     ret);
+
+	if (new_dentry->d_inode)
+		new_dentry->d_inode->i_ctime = CURRENT_TIME;
+
+	op_release(new_op);
+	return ret;
+}
+
+/* ORANGEFS implementation of VFS inode operations for directories */
+struct inode_operations orangefs_dir_inode_operations = {
+	.lookup = orangefs_lookup,
+	.get_acl = orangefs_get_acl,
+	.set_acl = orangefs_set_acl,
+	.create = orangefs_create,
+	.unlink = orangefs_unlink,
+	.symlink = orangefs_symlink,
+	.mkdir = orangefs_mkdir,
+	.rmdir = orangefs_unlink,
+	.rename = orangefs_rename,
+	.setattr = orangefs_setattr,
+	.getattr = orangefs_getattr,
+	.setxattr = generic_setxattr,
+	.getxattr = generic_getxattr,
+	.removexattr = generic_removexattr,
+	.listxattr = orangefs_listxattr,
+	.permission = orangefs_permission,
+};

diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
new file mode 100644
index 0000000..75375e9
--- /dev/null
+++ b/fs/orangefs/orangefs-bufmap.c

@@ -0,0 +1,556 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-bufmap.h"
+
+struct slot_map {
+	int c;
+	wait_queue_head_t q;
+	int count;
+	unsigned long *map;
+};
+
+static struct slot_map rw_map = {
+	.c = -1,
+	.q = __WAIT_QUEUE_HEAD_INITIALIZER(rw_map.q)
+};
+static struct slot_map readdir_map = {
+	.c = -1,
+	.q = __WAIT_QUEUE_HEAD_INITIALIZER(readdir_map.q)
+};
+
+
+static void install(struct slot_map *m, int count, unsigned long *map)
+{
+	spin_lock(&m->q.lock);
+	m->c = m->count = count;
+	m->map = map;
+	wake_up_all_locked(&m->q);
+	spin_unlock(&m->q.lock);
+}
+
+static void mark_killed(struct slot_map *m)
+{
+	spin_lock(&m->q.lock);
+	m->c -= m->count + 1;
+	spin_unlock(&m->q.lock);
+}
+
+static void run_down(struct slot_map *m)
+{
+	DEFINE_WAIT(wait);
+	spin_lock(&m->q.lock);
+	if (m->c != -1) {
+		for (;;) {
+			if (likely(list_empty(&wait.task_list)))
+				__add_wait_queue_tail(&m->q, &wait);
+			set_current_state(TASK_UNINTERRUPTIBLE);
+
+			if (m->c == -1)
+				break;
+
+			spin_unlock(&m->q.lock);
+			schedule();
+			spin_lock(&m->q.lock);
+		}
+		__remove_wait_queue(&m->q, &wait);
+		__set_current_state(TASK_RUNNING);
+	}
+	m->map = NULL;
+	spin_unlock(&m->q.lock);
+}
+
+static void put(struct slot_map *m, int slot)
+{
+	int v;
+	spin_lock(&m->q.lock);
+	__clear_bit(slot, m->map);
+	v = ++m->c;
+	if (unlikely(v == 1))	/* no free slots -> one free slot */
+		wake_up_locked(&m->q);
+	else if (unlikely(v == -1))	/* finished dying */
+		wake_up_all_locked(&m->q);
+	spin_unlock(&m->q.lock);
+}
+
+static int wait_for_free(struct slot_map *m)
+{
+	long left = slot_timeout_secs * HZ;
+	DEFINE_WAIT(wait);
+
+	do {
+		long n = left, t;
+		if (likely(list_empty(&wait.task_list)))
+			__add_wait_queue_tail_exclusive(&m->q, &wait);
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (m->c > 0)
+			break;
+
+		if (m->c < 0) {
+			/* we are waiting for map to be installed */
+			/* it would better be there soon, or we go away */
+			if (n > ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS * HZ)
+				n = ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS * HZ;
+		}
+		spin_unlock(&m->q.lock);
+		t = schedule_timeout(n);
+		spin_lock(&m->q.lock);
+		if (unlikely(!t) && n != left && m->c < 0)
+			left = t;
+		else
+			left = t + (left - n);
+		if (unlikely(signal_pending(current)))
+			left = -EINTR;
+	} while (left > 0);
+
+	if (!list_empty(&wait.task_list))
+		list_del(&wait.task_list);
+	else if (left <= 0 && waitqueue_active(&m->q))
+		__wake_up_locked_key(&m->q, TASK_INTERRUPTIBLE, NULL);
+	__set_current_state(TASK_RUNNING);
+
+	if (likely(left > 0))
+		return 0;
+
+	return left < 0 ? -EINTR : -ETIMEDOUT;
+}
+
+static int get(struct slot_map *m)
+{
+	int res = 0;
+	spin_lock(&m->q.lock);
+	if (unlikely(m->c <= 0))
+		res = wait_for_free(m);
+	if (likely(!res)) {
+		m->c--;
+		res = find_first_zero_bit(m->map, m->count);
+		__set_bit(res, m->map);
+	}
+	spin_unlock(&m->q.lock);
+	return res;
+}
+
+/* used to describe mapped buffers */
+struct orangefs_bufmap_desc {
+	void *uaddr;			/* user space address pointer */
+	struct page **page_array;	/* array of mapped pages */
+	int array_count;		/* size of above arrays */
+	struct list_head list_link;
+};
+
+static struct orangefs_bufmap {
+	int desc_size;
+	int desc_shift;
+	int desc_count;
+	int total_size;
+	int page_count;
+
+	struct page **page_array;
+	struct orangefs_bufmap_desc *desc_array;
+
+	/* array to track usage of buffer descriptors */
+	unsigned long *buffer_index_array;
+
+	/* array to track usage of buffer descriptors for readdir */
+#define N DIV_ROUND_UP(ORANGEFS_READDIR_DEFAULT_DESC_COUNT, BITS_PER_LONG)
+	unsigned long readdir_index_array[N];
+#undef N
+} *__orangefs_bufmap;
+
+static DEFINE_SPINLOCK(orangefs_bufmap_lock);
+
+static void
+orangefs_bufmap_unmap(struct orangefs_bufmap *bufmap)
+{
+	int i;
+
+	for (i = 0; i < bufmap->page_count; i++)
+		put_page(bufmap->page_array[i]);
+}
+
+static void
+orangefs_bufmap_free(struct orangefs_bufmap *bufmap)
+{
+	kfree(bufmap->page_array);
+	kfree(bufmap->desc_array);
+	kfree(bufmap->buffer_index_array);
+	kfree(bufmap);
+}
+
+/*
+ * XXX: Can the size and shift change while the caller gives up the 
+ * XXX: lock between calling this and doing something useful?
+ */
+
+int orangefs_bufmap_size_query(void)
+{
+	struct orangefs_bufmap *bufmap;
+	int size = 0;
+	spin_lock(&orangefs_bufmap_lock);
+	bufmap = __orangefs_bufmap;
+	if (bufmap)
+		size = bufmap->desc_size;
+	spin_unlock(&orangefs_bufmap_lock);
+	return size;
+}
+
+int orangefs_bufmap_shift_query(void)
+{
+	struct orangefs_bufmap *bufmap;
+	int shift = 0;
+	spin_lock(&orangefs_bufmap_lock);
+	bufmap = __orangefs_bufmap;
+	if (bufmap)
+		shift = bufmap->desc_shift;
+	spin_unlock(&orangefs_bufmap_lock);
+	return shift;
+}
+
+static DECLARE_WAIT_QUEUE_HEAD(bufmap_waitq);
+static DECLARE_WAIT_QUEUE_HEAD(readdir_waitq);
+
+/*
+ * orangefs_get_bufmap_init
+ *
+ * If bufmap_init is 1, then the shared memory system, including the
+ * buffer_index_array, is available.  Otherwise, it is not.
+ *
+ * returns the value of bufmap_init
+ */
+int orangefs_get_bufmap_init(void)
+{
+	return __orangefs_bufmap ? 1 : 0;
+}
+
+
+static struct orangefs_bufmap *
+orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc)
+{
+	struct orangefs_bufmap *bufmap;
+
+	bufmap = kzalloc(sizeof(*bufmap), GFP_KERNEL);
+	if (!bufmap)
+		goto out;
+
+	bufmap->total_size = user_desc->total_size;
+	bufmap->desc_count = user_desc->count;
+	bufmap->desc_size = user_desc->size;
+	bufmap->desc_shift = ilog2(bufmap->desc_size);
+
+	bufmap->buffer_index_array =
+		kzalloc(DIV_ROUND_UP(bufmap->desc_count, BITS_PER_LONG), GFP_KERNEL);
+	if (!bufmap->buffer_index_array) {
+		gossip_err("orangefs: could not allocate %d buffer indices\n",
+				bufmap->desc_count);
+		goto out_free_bufmap;
+	}
+
+	bufmap->desc_array =
+		kcalloc(bufmap->desc_count, sizeof(struct orangefs_bufmap_desc),
+			GFP_KERNEL);
+	if (!bufmap->desc_array) {
+		gossip_err("orangefs: could not allocate %d descriptors\n",
+				bufmap->desc_count);
+		goto out_free_index_array;
+	}
+
+	bufmap->page_count = bufmap->total_size / PAGE_SIZE;
+
+	/* allocate storage to track our page mappings */
+	bufmap->page_array =
+		kcalloc(bufmap->page_count, sizeof(struct page *), GFP_KERNEL);
+	if (!bufmap->page_array)
+		goto out_free_desc_array;
+
+	return bufmap;
+
+out_free_desc_array:
+	kfree(bufmap->desc_array);
+out_free_index_array:
+	kfree(bufmap->buffer_index_array);
+out_free_bufmap:
+	kfree(bufmap);
+out:
+	return NULL;
+}
+
+static int
+orangefs_bufmap_map(struct orangefs_bufmap *bufmap,
+		struct ORANGEFS_dev_map_desc *user_desc)
+{
+	int pages_per_desc = bufmap->desc_size / PAGE_SIZE;
+	int offset = 0, ret, i;
+
+	/* map the pages */
+	ret = get_user_pages_fast((unsigned long)user_desc->ptr,
+			     bufmap->page_count, 1, bufmap->page_array);
+
+	if (ret < 0)
+		return ret;
+
+	if (ret != bufmap->page_count) {
+		gossip_err("orangefs error: asked for %d pages, only got %d.\n",
+				bufmap->page_count, ret);
+
+		for (i = 0; i < ret; i++) {
+			SetPageError(bufmap->page_array[i]);
+			put_page(bufmap->page_array[i]);
+		}
+		return -ENOMEM;
+	}
+
+	/*
+	 * ideally we want to get kernel space pointers for each page, but
+	 * we can't kmap that many pages at once if highmem is being used.
+	 * so instead, we just kmap/kunmap the page address each time the
+	 * kaddr is needed.
+	 */
+	for (i = 0; i < bufmap->page_count; i++)
+		flush_dcache_page(bufmap->page_array[i]);
+
+	/* build a list of available descriptors */
+	for (offset = 0, i = 0; i < bufmap->desc_count; i++) {
+		bufmap->desc_array[i].page_array = &bufmap->page_array[offset];
+		bufmap->desc_array[i].array_count = pages_per_desc;
+		bufmap->desc_array[i].uaddr =
+		    (user_desc->ptr + (i * pages_per_desc * PAGE_SIZE));
+		offset += pages_per_desc;
+	}
+
+	return 0;
+}
+
+/*
+ * orangefs_bufmap_initialize()
+ *
+ * initializes the mapped buffer interface
+ *
+ * returns 0 on success, -errno on failure
+ */
+int orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc *user_desc)
+{
+	struct orangefs_bufmap *bufmap;
+	int ret = -EINVAL;
+
+	gossip_debug(GOSSIP_BUFMAP_DEBUG,
+		     "orangefs_bufmap_initialize: called (ptr ("
+		     "%p) sz (%d) cnt(%d).\n",
+		     user_desc->ptr,
+		     user_desc->size,
+		     user_desc->count);
+
+	/*
+	 * sanity check alignment and size of buffer that caller wants to
+	 * work with
+	 */
+	if (PAGE_ALIGN((unsigned long)user_desc->ptr) !=
+	    (unsigned long)user_desc->ptr) {
+		gossip_err("orangefs error: memory alignment (front). %p\n",
+			   user_desc->ptr);
+		goto out;
+	}
+
+	if (PAGE_ALIGN(((unsigned long)user_desc->ptr + user_desc->total_size))
+	    != (unsigned long)(user_desc->ptr + user_desc->total_size)) {
+		gossip_err("orangefs error: memory alignment (back).(%p + %d)\n",
+			   user_desc->ptr,
+			   user_desc->total_size);
+		goto out;
+	}
+
+	if (user_desc->total_size != (user_desc->size * user_desc->count)) {
+		gossip_err("orangefs error: user provided an oddly sized buffer: (%d, %d, %d)\n",
+			   user_desc->total_size,
+			   user_desc->size,
+			   user_desc->count);
+		goto out;
+	}
+
+	if ((user_desc->size % PAGE_SIZE) != 0) {
+		gossip_err("orangefs error: bufmap size not page size divisible (%d).\n",
+			   user_desc->size);
+		goto out;
+	}
+
+	ret = -ENOMEM;
+	bufmap = orangefs_bufmap_alloc(user_desc);
+	if (!bufmap)
+		goto out;
+
+	ret = orangefs_bufmap_map(bufmap, user_desc);
+	if (ret)
+		goto out_free_bufmap;
+
+
+	spin_lock(&orangefs_bufmap_lock);
+	if (__orangefs_bufmap) {
+		spin_unlock(&orangefs_bufmap_lock);
+		gossip_err("orangefs: error: bufmap already initialized.\n");
+		ret = -EINVAL;
+		goto out_unmap_bufmap;
+	}
+	__orangefs_bufmap = bufmap;
+	install(&rw_map,
+		bufmap->desc_count,
+		bufmap->buffer_index_array);
+	install(&readdir_map,
+		ORANGEFS_READDIR_DEFAULT_DESC_COUNT,
+		bufmap->readdir_index_array);
+	spin_unlock(&orangefs_bufmap_lock);
+
+	gossip_debug(GOSSIP_BUFMAP_DEBUG,
+		     "orangefs_bufmap_initialize: exiting normally\n");
+	return 0;
+
+out_unmap_bufmap:
+	orangefs_bufmap_unmap(bufmap);
+out_free_bufmap:
+	orangefs_bufmap_free(bufmap);
+out:
+	return ret;
+}
+
+/*
+ * orangefs_bufmap_finalize()
+ *
+ * shuts down the mapped buffer interface and releases any resources
+ * associated with it
+ *
+ * no return value
+ */
+void orangefs_bufmap_finalize(void)
+{
+	struct orangefs_bufmap *bufmap = __orangefs_bufmap;
+	if (!bufmap)
+		return;
+	gossip_debug(GOSSIP_BUFMAP_DEBUG, "orangefs_bufmap_finalize: called\n");
+	mark_killed(&rw_map);
+	mark_killed(&readdir_map);
+	gossip_debug(GOSSIP_BUFMAP_DEBUG,
+		     "orangefs_bufmap_finalize: exiting normally\n");
+}
+
+void orangefs_bufmap_run_down(void)
+{
+	struct orangefs_bufmap *bufmap = __orangefs_bufmap;
+	if (!bufmap)
+		return;
+	run_down(&rw_map);
+	run_down(&readdir_map);
+	spin_lock(&orangefs_bufmap_lock);
+	__orangefs_bufmap = NULL;
+	spin_unlock(&orangefs_bufmap_lock);
+	orangefs_bufmap_unmap(bufmap);
+	orangefs_bufmap_free(bufmap);
+}
+
+/*
+ * orangefs_bufmap_get()
+ *
+ * gets a free mapped buffer descriptor, will sleep until one becomes
+ * available if necessary
+ *
+ * returns slot on success, -errno on failure
+ */
+int orangefs_bufmap_get(void)
+{
+	return get(&rw_map);
+}
+
+/*
+ * orangefs_bufmap_put()
+ *
+ * returns a mapped buffer descriptor to the collection
+ *
+ * no return value
+ */
+void orangefs_bufmap_put(int buffer_index)
+{
+	put(&rw_map, buffer_index);
+}
+
+/*
+ * orangefs_readdir_index_get()
+ *
+ * gets a free descriptor, will sleep until one becomes
+ * available if necessary.
+ * Although the readdir buffers are not mapped into kernel space
+ * we could do that at a later point of time. Regardless, these
+ * indices are used by the client-core.
+ *
+ * returns slot on success, -errno on failure
+ */
+int orangefs_readdir_index_get(void)
+{
+	return get(&readdir_map);
+}
+
+void orangefs_readdir_index_put(int buffer_index)
+{
+	put(&readdir_map, buffer_index);
+}
+
+/*
+ * we've been handed an iovec, we need to copy it to 
+ * the shared memory descriptor at "buffer_index".
+ */
+int orangefs_bufmap_copy_from_iovec(struct iov_iter *iter,
+				int buffer_index,
+				size_t size)
+{
+	struct orangefs_bufmap_desc *to;
+	int i;
+
+	gossip_debug(GOSSIP_BUFMAP_DEBUG,
+		     "%s: buffer_index:%d: size:%zu:\n",
+		     __func__, buffer_index, size);
+
+	to = &__orangefs_bufmap->desc_array[buffer_index];
+	for (i = 0; size; i++) {
+		struct page *page = to->page_array[i];
+		size_t n = size;
+		if (n > PAGE_SIZE)
+			n = PAGE_SIZE;
+		n = copy_page_from_iter(page, 0, n, iter);
+		if (!n)
+			return -EFAULT;
+		size -= n;
+	}
+	return 0;
+
+}
+
+/*
+ * we've been handed an iovec, we need to fill it from
+ * the shared memory descriptor at "buffer_index".
+ */
+int orangefs_bufmap_copy_to_iovec(struct iov_iter *iter,
+				    int buffer_index,
+				    size_t size)
+{
+	struct orangefs_bufmap_desc *from;
+	int i;
+
+	from = &__orangefs_bufmap->desc_array[buffer_index];
+	gossip_debug(GOSSIP_BUFMAP_DEBUG,
+		     "%s: buffer_index:%d: size:%zu:\n",
+		     __func__, buffer_index, size);
+
+
+	for (i = 0; size; i++) {
+		struct page *page = from->page_array[i];
+		size_t n = size;
+		if (n > PAGE_SIZE)
+			n = PAGE_SIZE;
+		n = copy_page_to_iter(page, 0, n, iter);
+		if (!n)
+			return -EFAULT;
+		size -= n;
+	}
+	return 0;
+}

diff --git a/fs/orangefs/orangefs-bufmap.h b/fs/orangefs/orangefs-bufmap.h
new file mode 100644
index 0000000..71f64f4
--- /dev/null
+++ b/fs/orangefs/orangefs-bufmap.h

@@ -0,0 +1,36 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#ifndef __ORANGEFS_BUFMAP_H
+#define __ORANGEFS_BUFMAP_H
+
+int orangefs_bufmap_size_query(void);
+
+int orangefs_bufmap_shift_query(void);
+
+int orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc *user_desc);
+
+void orangefs_bufmap_finalize(void);
+
+void orangefs_bufmap_run_down(void);
+
+int orangefs_bufmap_get(void);
+
+void orangefs_bufmap_put(int buffer_index);
+
+int orangefs_readdir_index_get(void);
+
+void orangefs_readdir_index_put(int buffer_index);
+
+int orangefs_bufmap_copy_from_iovec(struct iov_iter *iter,
+				int buffer_index,
+				size_t size);
+
+int orangefs_bufmap_copy_to_iovec(struct iov_iter *iter,
+			      int buffer_index,
+			      size_t size);
+
+#endif /* __ORANGEFS_BUFMAP_H */

diff --git a/fs/orangefs/orangefs-cache.c b/fs/orangefs/orangefs-cache.c
new file mode 100644
index 0000000..900a2e3
--- /dev/null
+++ b/fs/orangefs/orangefs-cache.c

@@ -0,0 +1,161 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+
+/* tags assigned to kernel upcall operations */
+static __u64 next_tag_value;
+static DEFINE_SPINLOCK(next_tag_value_lock);
+
+/* the orangefs memory caches */
+
+/* a cache for orangefs upcall/downcall operations */
+static struct kmem_cache *op_cache;
+
+int op_cache_initialize(void)
+{
+	op_cache = kmem_cache_create("orangefs_op_cache",
+				     sizeof(struct orangefs_kernel_op_s),
+				     0,
+				     ORANGEFS_CACHE_CREATE_FLAGS,
+				     NULL);
+
+	if (!op_cache) {
+		gossip_err("Cannot create orangefs_op_cache\n");
+		return -ENOMEM;
+	}
+
+	/* initialize our atomic tag counter */
+	spin_lock(&next_tag_value_lock);
+	next_tag_value = 100;
+	spin_unlock(&next_tag_value_lock);
+	return 0;
+}
+
+int op_cache_finalize(void)
+{
+	kmem_cache_destroy(op_cache);
+	return 0;
+}
+
+char *get_opname_string(struct orangefs_kernel_op_s *new_op)
+{
+	if (new_op) {
+		__s32 type = new_op->upcall.type;
+
+		if (type == ORANGEFS_VFS_OP_FILE_IO)
+			return "OP_FILE_IO";
+		else if (type == ORANGEFS_VFS_OP_LOOKUP)
+			return "OP_LOOKUP";
+		else if (type == ORANGEFS_VFS_OP_CREATE)
+			return "OP_CREATE";
+		else if (type == ORANGEFS_VFS_OP_GETATTR)
+			return "OP_GETATTR";
+		else if (type == ORANGEFS_VFS_OP_REMOVE)
+			return "OP_REMOVE";
+		else if (type == ORANGEFS_VFS_OP_MKDIR)
+			return "OP_MKDIR";
+		else if (type == ORANGEFS_VFS_OP_READDIR)
+			return "OP_READDIR";
+		else if (type == ORANGEFS_VFS_OP_READDIRPLUS)
+			return "OP_READDIRPLUS";
+		else if (type == ORANGEFS_VFS_OP_SETATTR)
+			return "OP_SETATTR";
+		else if (type == ORANGEFS_VFS_OP_SYMLINK)
+			return "OP_SYMLINK";
+		else if (type == ORANGEFS_VFS_OP_RENAME)
+			return "OP_RENAME";
+		else if (type == ORANGEFS_VFS_OP_STATFS)
+			return "OP_STATFS";
+		else if (type == ORANGEFS_VFS_OP_TRUNCATE)
+			return "OP_TRUNCATE";
+		else if (type == ORANGEFS_VFS_OP_MMAP_RA_FLUSH)
+			return "OP_MMAP_RA_FLUSH";
+		else if (type == ORANGEFS_VFS_OP_FS_MOUNT)
+			return "OP_FS_MOUNT";
+		else if (type == ORANGEFS_VFS_OP_FS_UMOUNT)
+			return "OP_FS_UMOUNT";
+		else if (type == ORANGEFS_VFS_OP_GETXATTR)
+			return "OP_GETXATTR";
+		else if (type == ORANGEFS_VFS_OP_SETXATTR)
+			return "OP_SETXATTR";
+		else if (type == ORANGEFS_VFS_OP_LISTXATTR)
+			return "OP_LISTXATTR";
+		else if (type == ORANGEFS_VFS_OP_REMOVEXATTR)
+			return "OP_REMOVEXATTR";
+		else if (type == ORANGEFS_VFS_OP_PARAM)
+			return "OP_PARAM";
+		else if (type == ORANGEFS_VFS_OP_PERF_COUNT)
+			return "OP_PERF_COUNT";
+		else if (type == ORANGEFS_VFS_OP_CANCEL)
+			return "OP_CANCEL";
+		else if (type == ORANGEFS_VFS_OP_FSYNC)
+			return "OP_FSYNC";
+		else if (type == ORANGEFS_VFS_OP_FSKEY)
+			return "OP_FSKEY";
+	}
+	return "OP_UNKNOWN?";
+}
+
+void orangefs_new_tag(struct orangefs_kernel_op_s *op)
+{
+	spin_lock(&next_tag_value_lock);
+	op->tag = next_tag_value++;
+	if (next_tag_value == 0)
+		next_tag_value = 100;
+	spin_unlock(&next_tag_value_lock);
+}
+
+struct orangefs_kernel_op_s *op_alloc(__s32 type)
+{
+	struct orangefs_kernel_op_s *new_op = NULL;
+
+	new_op = kmem_cache_zalloc(op_cache, GFP_KERNEL);
+	if (new_op) {
+		INIT_LIST_HEAD(&new_op->list);
+		spin_lock_init(&new_op->lock);
+		init_completion(&new_op->waitq);
+
+		new_op->upcall.type = ORANGEFS_VFS_OP_INVALID;
+		new_op->downcall.type = ORANGEFS_VFS_OP_INVALID;
+		new_op->downcall.status = -1;
+
+		new_op->op_state = OP_VFS_STATE_UNKNOWN;
+
+		/* initialize the op specific tag and upcall credentials */
+		orangefs_new_tag(new_op);
+		new_op->upcall.type = type;
+		new_op->attempts = 0;
+		gossip_debug(GOSSIP_CACHE_DEBUG,
+			     "Alloced OP (%p: %llu %s)\n",
+			     new_op,
+			     llu(new_op->tag),
+			     get_opname_string(new_op));
+
+		new_op->upcall.uid = from_kuid(current_user_ns(),
+					       current_fsuid());
+
+		new_op->upcall.gid = from_kgid(current_user_ns(),
+					       current_fsgid());
+	} else {
+		gossip_err("op_alloc: kmem_cache_zalloc failed!\n");
+	}
+	return new_op;
+}
+
+void op_release(struct orangefs_kernel_op_s *orangefs_op)
+{
+	if (orangefs_op) {
+		gossip_debug(GOSSIP_CACHE_DEBUG,
+			     "Releasing OP (%p: %llu)\n",
+			     orangefs_op,
+			     llu(orangefs_op->tag));
+		kmem_cache_free(op_cache, orangefs_op);
+	} else {
+		gossip_err("NULL pointer in op_release\n");
+	}
+}

diff --git a/fs/orangefs/orangefs-debug.h b/fs/orangefs/orangefs-debug.h
new file mode 100644
index 0000000..387db17
--- /dev/null
+++ b/fs/orangefs/orangefs-debug.h

@@ -0,0 +1,92 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/* This file just defines debugging masks to be used with the gossip
+ * logging utility.  All debugging masks for ORANGEFS are kept here to make
+ * sure we don't have collisions.
+ */
+
+#ifndef __ORANGEFS_DEBUG_H
+#define __ORANGEFS_DEBUG_H
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#define	GOSSIP_NO_DEBUG			(__u64)0
+
+#define GOSSIP_SUPER_DEBUG		((__u64)1 << 0)
+#define GOSSIP_INODE_DEBUG		((__u64)1 << 1)
+#define GOSSIP_FILE_DEBUG		((__u64)1 << 2)
+#define GOSSIP_DIR_DEBUG		((__u64)1 << 3)
+#define GOSSIP_UTILS_DEBUG		((__u64)1 << 4)
+#define GOSSIP_WAIT_DEBUG		((__u64)1 << 5)
+#define GOSSIP_ACL_DEBUG		((__u64)1 << 6)
+#define GOSSIP_DCACHE_DEBUG		((__u64)1 << 7)
+#define GOSSIP_DEV_DEBUG		((__u64)1 << 8)
+#define GOSSIP_NAME_DEBUG		((__u64)1 << 9)
+#define GOSSIP_BUFMAP_DEBUG		((__u64)1 << 10)
+#define GOSSIP_CACHE_DEBUG		((__u64)1 << 11)
+#define GOSSIP_DEBUGFS_DEBUG		((__u64)1 << 12)
+#define GOSSIP_XATTR_DEBUG		((__u64)1 << 13)
+#define GOSSIP_INIT_DEBUG		((__u64)1 << 14)
+#define GOSSIP_SYSFS_DEBUG		((__u64)1 << 15)
+
+#define GOSSIP_MAX_NR                 16
+#define GOSSIP_MAX_DEBUG              (((__u64)1 << GOSSIP_MAX_NR) - 1)
+
+/*function prototypes*/
+__u64 ORANGEFS_kmod_eventlog_to_mask(const char *event_logging);
+__u64 ORANGEFS_debug_eventlog_to_mask(const char *event_logging);
+char *ORANGEFS_debug_mask_to_eventlog(__u64 mask);
+char *ORANGEFS_kmod_mask_to_eventlog(__u64 mask);
+
+/* a private internal type */
+struct __keyword_mask_s {
+	const char *keyword;
+	__u64 mask_val;
+};
+
+/*
+ * Map all kmod keywords to kmod debug masks here. Keep this
+ * structure "packed":
+ *
+ *   "all" is always last...
+ *
+ *   keyword     mask_val     index
+ *     foo          1           0
+ *     bar          2           1
+ *     baz          4           2
+ *     qux          8           3
+ *      .           .           .
+ */
+static struct __keyword_mask_s s_kmod_keyword_mask_map[] = {
+	{"super", GOSSIP_SUPER_DEBUG},
+	{"inode", GOSSIP_INODE_DEBUG},
+	{"file", GOSSIP_FILE_DEBUG},
+	{"dir", GOSSIP_DIR_DEBUG},
+	{"utils", GOSSIP_UTILS_DEBUG},
+	{"wait", GOSSIP_WAIT_DEBUG},
+	{"acl", GOSSIP_ACL_DEBUG},
+	{"dcache", GOSSIP_DCACHE_DEBUG},
+	{"dev", GOSSIP_DEV_DEBUG},
+	{"name", GOSSIP_NAME_DEBUG},
+	{"bufmap", GOSSIP_BUFMAP_DEBUG},
+	{"cache", GOSSIP_CACHE_DEBUG},
+	{"debugfs", GOSSIP_DEBUGFS_DEBUG},
+	{"xattr", GOSSIP_XATTR_DEBUG},
+	{"init", GOSSIP_INIT_DEBUG},
+	{"sysfs", GOSSIP_SYSFS_DEBUG},
+	{"none", GOSSIP_NO_DEBUG},
+	{"all", GOSSIP_MAX_DEBUG}
+};
+
+static const int num_kmod_keyword_mask_map = (int)
+	(sizeof(s_kmod_keyword_mask_map) / sizeof(struct __keyword_mask_s));
+
+#endif /* __ORANGEFS_DEBUG_H */

diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c
new file mode 100644
index 0000000..19670b8
--- /dev/null
+++ b/fs/orangefs/orangefs-debugfs.c

@@ -0,0 +1,455 @@
+/*
+ * What:		/sys/kernel/debug/orangefs/debug-help
+ * Date:		June 2015
+ * Contact:		Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * 			List of client and kernel debug keywords.
+ *
+ *
+ * What:		/sys/kernel/debug/orangefs/client-debug
+ * Date:		June 2015
+ * Contact:		Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * 			Debug setting for "the client", the userspace
+ * 			helper for the kernel module.
+ *
+ *
+ * What:		/sys/kernel/debug/orangefs/kernel-debug
+ * Date:		June 2015
+ * Contact:		Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * 			Debug setting for the orangefs kernel module.
+ *
+ * 			Any of the keywords, or comma-separated lists
+ * 			of keywords, from debug-help can be catted to
+ * 			client-debug or kernel-debug.
+ *
+ * 			"none", "all" and "verbose" are special keywords
+ * 			for client-debug. Setting client-debug to "all"
+ * 			is kind of like trying to drink water from a
+ * 			fire hose, "verbose" triggers most of the same
+ * 			output except for the constant flow of output
+ * 			from the main wait loop.
+ *
+ * 			"none" and "all" are similar settings for kernel-debug
+ * 			no need for a "verbose".
+ */
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+
+#include <linux/uaccess.h>
+
+#include "orangefs-debugfs.h"
+#include "protocol.h"
+#include "orangefs-kernel.h"
+
+static int orangefs_debug_disabled = 1;
+
+static int orangefs_debug_help_open(struct inode *, struct file *);
+
+const struct file_operations debug_help_fops = {
+	.open           = orangefs_debug_help_open,
+	.read           = seq_read,
+	.release        = seq_release,
+	.llseek         = seq_lseek,
+};
+
+static void *help_start(struct seq_file *, loff_t *);
+static void *help_next(struct seq_file *, void *, loff_t *);
+static void help_stop(struct seq_file *, void *);
+static int help_show(struct seq_file *, void *);
+
+static const struct seq_operations help_debug_ops = {
+	.start	= help_start,
+	.next	= help_next,
+	.stop	= help_stop,
+	.show	= help_show,
+};
+
+/*
+ * Used to protect data in ORANGEFS_KMOD_DEBUG_FILE and
+ * ORANGEFS_KMOD_DEBUG_FILE.
+ */
+static DEFINE_MUTEX(orangefs_debug_lock);
+
+int orangefs_debug_open(struct inode *, struct file *);
+
+static ssize_t orangefs_debug_read(struct file *,
+				 char __user *,
+				 size_t,
+				 loff_t *);
+
+static ssize_t orangefs_debug_write(struct file *,
+				  const char __user *,
+				  size_t,
+				  loff_t *);
+
+static const struct file_operations kernel_debug_fops = {
+	.open           = orangefs_debug_open,
+	.read           = orangefs_debug_read,
+	.write		= orangefs_debug_write,
+	.llseek         = generic_file_llseek,
+};
+
+/*
+ * initialize kmod debug operations, create orangefs debugfs dir and
+ * ORANGEFS_KMOD_DEBUG_HELP_FILE.
+ */
+int orangefs_debugfs_init(void)
+{
+
+	int rc = -ENOMEM;
+
+	debug_dir = debugfs_create_dir("orangefs", NULL);
+	if (!debug_dir) {
+		pr_info("%s: debugfs_create_dir failed.\n", __func__);
+		goto out;
+	}
+
+	help_file_dentry = debugfs_create_file(ORANGEFS_KMOD_DEBUG_HELP_FILE,
+				  0444,
+				  debug_dir,
+				  debug_help_string,
+				  &debug_help_fops);
+	if (!help_file_dentry) {
+		pr_info("%s: debugfs_create_file failed.\n", __func__);
+		goto out;
+	}
+
+	orangefs_debug_disabled = 0;
+	rc = 0;
+
+out:
+
+	return rc;
+}
+
+void orangefs_debugfs_cleanup(void)
+{
+	if (debug_dir)
+		debugfs_remove_recursive(debug_dir);
+}
+
+/* open ORANGEFS_KMOD_DEBUG_HELP_FILE */
+static int orangefs_debug_help_open(struct inode *inode, struct file *file)
+{
+	int rc = -ENODEV;
+	int ret;
+
+	gossip_debug(GOSSIP_DEBUGFS_DEBUG,
+		     "orangefs_debug_help_open: start\n");
+
+	if (orangefs_debug_disabled)
+		goto out;
+
+	ret = seq_open(file, &help_debug_ops);
+	if (ret)
+		goto out;
+
+	((struct seq_file *)(file->private_data))->private = inode->i_private;
+
+	rc = 0;
+
+out:
+	gossip_debug(GOSSIP_DEBUGFS_DEBUG,
+		     "orangefs_debug_help_open: rc:%d:\n",
+		     rc);
+	return rc;
+}
+
+/*
+ * I think start always gets called again after stop. Start
+ * needs to return NULL when it is done. The whole "payload"
+ * in this case is a single (long) string, so by the second
+ * time we get to start (pos = 1), we're done.
+ */
+static void *help_start(struct seq_file *m, loff_t *pos)
+{
+	void *payload = NULL;
+
+	gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_start: start\n");
+
+	if (*pos == 0)
+		payload = m->private;
+
+	return payload;
+}
+
+static void *help_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_next: start\n");
+
+	return NULL;
+}
+
+static void help_stop(struct seq_file *m, void *p)
+{
+	gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_stop: start\n");
+}
+
+static int help_show(struct seq_file *m, void *v)
+{
+	gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_show: start\n");
+
+	seq_puts(m, v);
+
+	return 0;
+}
+
+/*
+ * initialize the kernel-debug file.
+ */
+int orangefs_kernel_debug_init(void)
+{
+	int rc = -ENOMEM;
+	struct dentry *ret;
+	char *k_buffer = NULL;
+
+	gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__);
+
+	k_buffer = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
+	if (!k_buffer)
+		goto out;
+
+	if (strlen(kernel_debug_string) + 1 < ORANGEFS_MAX_DEBUG_STRING_LEN) {
+		strcpy(k_buffer, kernel_debug_string);
+		strcat(k_buffer, "\n");
+	} else {
+		strcpy(k_buffer, "none\n");
+		pr_info("%s: overflow 1!\n", __func__);
+	}
+
+	ret = debugfs_create_file(ORANGEFS_KMOD_DEBUG_FILE,
+				  0444,
+				  debug_dir,
+				  k_buffer,
+				  &kernel_debug_fops);
+	if (!ret) {
+		pr_info("%s: failed to create %s.\n",
+			__func__,
+			ORANGEFS_KMOD_DEBUG_FILE);
+		goto out;
+	}
+
+	rc = 0;
+
+out:
+
+	gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc);
+	return rc;
+}
+
+/*
+ * initialize the client-debug file.
+ */
+int orangefs_client_debug_init(void)
+{
+
+	int rc = -ENOMEM;
+	char *c_buffer = NULL;
+
+	gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__);
+
+	c_buffer = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
+	if (!c_buffer)
+		goto out;
+
+	if (strlen(client_debug_string) + 1 < ORANGEFS_MAX_DEBUG_STRING_LEN) {
+		strcpy(c_buffer, client_debug_string);
+		strcat(c_buffer, "\n");
+	} else {
+		strcpy(c_buffer, "none\n");
+		pr_info("%s: overflow! 2\n", __func__);
+	}
+
+	client_debug_dentry = debugfs_create_file(ORANGEFS_CLIENT_DEBUG_FILE,
+						  0444,
+						  debug_dir,
+						  c_buffer,
+						  &kernel_debug_fops);
+	if (!client_debug_dentry) {
+		pr_info("%s: failed to create updated %s.\n",
+			__func__,
+			ORANGEFS_CLIENT_DEBUG_FILE);
+		goto out;
+	}
+
+	rc = 0;
+
+out:
+
+	gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc);
+	return rc;
+}
+
+/* open ORANGEFS_KMOD_DEBUG_FILE or ORANGEFS_CLIENT_DEBUG_FILE.*/
+int orangefs_debug_open(struct inode *inode, struct file *file)
+{
+	int rc = -ENODEV;
+
+	gossip_debug(GOSSIP_DEBUGFS_DEBUG,
+		     "%s: orangefs_debug_disabled: %d\n",
+		     __func__,
+		     orangefs_debug_disabled);
+
+	if (orangefs_debug_disabled)
+		goto out;
+
+	rc = 0;
+	mutex_lock(&orangefs_debug_lock);
+	file->private_data = inode->i_private;
+	mutex_unlock(&orangefs_debug_lock);
+
+out:
+	gossip_debug(GOSSIP_DEBUGFS_DEBUG,
+		     "orangefs_debug_open: rc: %d\n",
+		     rc);
+	return rc;
+}
+
+static ssize_t orangefs_debug_read(struct file *file,
+				 char __user *ubuf,
+				 size_t count,
+				 loff_t *ppos)
+{
+	char *buf;
+	int sprintf_ret;
+	ssize_t read_ret = -ENOMEM;
+
+	gossip_debug(GOSSIP_DEBUGFS_DEBUG, "orangefs_debug_read: start\n");
+
+	buf = kmalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
+	if (!buf)
+		goto out;
+
+	mutex_lock(&orangefs_debug_lock);
+	sprintf_ret = sprintf(buf, "%s", (char *)file->private_data);
+	mutex_unlock(&orangefs_debug_lock);
+
+	read_ret = simple_read_from_buffer(ubuf, count, ppos, buf, sprintf_ret);
+
+	kfree(buf);
+
+out:
+	gossip_debug(GOSSIP_DEBUGFS_DEBUG,
+		     "orangefs_debug_read: ret: %zu\n",
+		     read_ret);
+
+	return read_ret;
+}
+
+static ssize_t orangefs_debug_write(struct file *file,
+				  const char __user *ubuf,
+				  size_t count,
+				  loff_t *ppos)
+{
+	char *buf;
+	int rc = -EFAULT;
+	size_t silly = 0;
+	char *debug_string;
+	struct orangefs_kernel_op_s *new_op = NULL;
+	struct client_debug_mask c_mask = { NULL, 0, 0 };
+
+	gossip_debug(GOSSIP_DEBUGFS_DEBUG,
+		"orangefs_debug_write: %s\n",
+		file->f_path.dentry->d_name.name);
+
+	/*
+	 * Thwart users who try to jamb a ridiculous number
+	 * of bytes into the debug file...
+	 */
+	if (count > ORANGEFS_MAX_DEBUG_STRING_LEN + 1) {
+		silly = count;
+		count = ORANGEFS_MAX_DEBUG_STRING_LEN + 1;
+	}
+
+	buf = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
+	if (!buf)
+		goto out;
+
+	if (copy_from_user(buf, ubuf, count - 1)) {
+		gossip_debug(GOSSIP_DEBUGFS_DEBUG,
+			     "%s: copy_from_user failed!\n",
+			     __func__);
+		goto out;
+	}
+
+	/*
+	 * Map the keyword string from userspace into a valid debug mask.
+	 * The mapping process involves mapping the human-inputted string
+	 * into a valid mask, and then rebuilding the string from the
+	 * verified valid mask.
+	 *
+	 * A service operation is required to set a new client-side
+	 * debug mask.
+	 */
+	if (!strcmp(file->f_path.dentry->d_name.name,
+		    ORANGEFS_KMOD_DEBUG_FILE)) {
+		debug_string_to_mask(buf, &gossip_debug_mask, 0);
+		debug_mask_to_string(&gossip_debug_mask, 0);
+		debug_string = kernel_debug_string;
+		gossip_debug(GOSSIP_DEBUGFS_DEBUG,
+			     "New kernel debug string is %s\n",
+			     kernel_debug_string);
+	} else {
+		/* Can't reset client debug mask if client is not running. */
+		if (is_daemon_in_service()) {
+			pr_info("%s: Client not running :%d:\n",
+				__func__,
+				is_daemon_in_service());
+			goto out;
+		}
+
+		debug_string_to_mask(buf, &c_mask, 1);
+		debug_mask_to_string(&c_mask, 1);
+		debug_string = client_debug_string;
+
+		new_op = op_alloc(ORANGEFS_VFS_OP_PARAM);
+		if (!new_op) {
+			pr_info("%s: op_alloc failed!\n", __func__);
+			goto out;
+		}
+
+		new_op->upcall.req.param.op =
+			ORANGEFS_PARAM_REQUEST_OP_TWO_MASK_VALUES;
+		new_op->upcall.req.param.type = ORANGEFS_PARAM_REQUEST_SET;
+		memset(new_op->upcall.req.param.s_value,
+		       0,
+		       ORANGEFS_MAX_DEBUG_STRING_LEN);
+		sprintf(new_op->upcall.req.param.s_value,
+			"%llx %llx\n",
+			c_mask.mask1,
+			c_mask.mask2);
+
+		/* service_operation returns 0 on success... */
+		rc = service_operation(new_op,
+				       "orangefs_param",
+					ORANGEFS_OP_INTERRUPTIBLE);
+
+		if (rc)
+			gossip_debug(GOSSIP_DEBUGFS_DEBUG,
+				     "%s: service_operation failed! rc:%d:\n",
+				     __func__,
+				     rc);
+
+		op_release(new_op);
+	}
+
+	mutex_lock(&orangefs_debug_lock);
+	memset(file->f_inode->i_private, 0, ORANGEFS_MAX_DEBUG_STRING_LEN);
+	sprintf((char *)file->f_inode->i_private, "%s\n", debug_string);
+	mutex_unlock(&orangefs_debug_lock);
+
+	*ppos += count;
+	if (silly)
+		rc = silly;
+	else
+		rc = count;
+
+out:
+	gossip_debug(GOSSIP_DEBUGFS_DEBUG,
+		     "orangefs_debug_write: rc: %d\n",
+		     rc);
+	kfree(buf);
+	return rc;
+}

diff --git a/fs/orangefs/orangefs-debugfs.h b/fs/orangefs/orangefs-debugfs.h
new file mode 100644
index 0000000..e4828c0
--- /dev/null
+++ b/fs/orangefs/orangefs-debugfs.h

@@ -0,0 +1,3 @@
+int orangefs_debugfs_init(void);
+int orangefs_kernel_debug_init(void);
+void orangefs_debugfs_cleanup(void);

diff --git a/fs/orangefs/orangefs-dev-proto.h b/fs/orangefs/orangefs-dev-proto.h
new file mode 100644
index 0000000..9eac9d9
--- /dev/null
+++ b/fs/orangefs/orangefs-dev-proto.h

@@ -0,0 +1,62 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#ifndef _ORANGEFS_DEV_PROTO_H
+#define _ORANGEFS_DEV_PROTO_H
+
+/*
+ * types and constants shared between user space and kernel space for
+ * device interaction using a common protocol
+ */
+
+/*
+ * valid orangefs kernel operation types
+ */
+#define ORANGEFS_VFS_OP_INVALID           0xFF000000
+#define ORANGEFS_VFS_OP_FILE_IO        0xFF000001
+#define ORANGEFS_VFS_OP_LOOKUP         0xFF000002
+#define ORANGEFS_VFS_OP_CREATE         0xFF000003
+#define ORANGEFS_VFS_OP_GETATTR        0xFF000004
+#define ORANGEFS_VFS_OP_REMOVE         0xFF000005
+#define ORANGEFS_VFS_OP_MKDIR          0xFF000006
+#define ORANGEFS_VFS_OP_READDIR        0xFF000007
+#define ORANGEFS_VFS_OP_SETATTR        0xFF000008
+#define ORANGEFS_VFS_OP_SYMLINK        0xFF000009
+#define ORANGEFS_VFS_OP_RENAME         0xFF00000A
+#define ORANGEFS_VFS_OP_STATFS         0xFF00000B
+#define ORANGEFS_VFS_OP_TRUNCATE       0xFF00000C
+#define ORANGEFS_VFS_OP_MMAP_RA_FLUSH  0xFF00000D
+#define ORANGEFS_VFS_OP_FS_MOUNT       0xFF00000E
+#define ORANGEFS_VFS_OP_FS_UMOUNT      0xFF00000F
+#define ORANGEFS_VFS_OP_GETXATTR       0xFF000010
+#define ORANGEFS_VFS_OP_SETXATTR          0xFF000011
+#define ORANGEFS_VFS_OP_LISTXATTR         0xFF000012
+#define ORANGEFS_VFS_OP_REMOVEXATTR       0xFF000013
+#define ORANGEFS_VFS_OP_PARAM          0xFF000014
+#define ORANGEFS_VFS_OP_PERF_COUNT     0xFF000015
+#define ORANGEFS_VFS_OP_CANCEL            0xFF00EE00
+#define ORANGEFS_VFS_OP_FSYNC          0xFF00EE01
+#define ORANGEFS_VFS_OP_FSKEY             0xFF00EE02
+#define ORANGEFS_VFS_OP_READDIRPLUS       0xFF00EE03
+
+/*
+ * Misc constants. Please retain them as multiples of 8!
+ * Otherwise 32-64 bit interactions will be messed up :)
+ */
+#define ORANGEFS_MAX_DEBUG_STRING_LEN	0x00000400
+#define ORANGEFS_MAX_DEBUG_ARRAY_LEN	0x00000800
+
+/*
+ * The maximum number of directory entries in a single request is 96.
+ * XXX: Why can this not be higher. The client-side code can handle up to 512.
+ * XXX: What happens if we expect more than the client can return?
+ */
+#define ORANGEFS_MAX_DIRENT_COUNT_READDIR 96
+
+#include "upcall.h"
+#include "downcall.h"
+
+#endif

diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
new file mode 100644
index 0000000..a9925e2
--- /dev/null
+++ b/fs/orangefs/orangefs-kernel.h

@@ -0,0 +1,623 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ *  The ORANGEFS Linux kernel support allows ORANGEFS volumes to be mounted and
+ *  accessed through the Linux VFS (i.e. using standard I/O system calls).
+ *  This support is only needed on clients that wish to mount the file system.
+ *
+ */
+
+/*
+ *  Declarations and macros for the ORANGEFS Linux kernel support.
+ */
+
+#ifndef __ORANGEFSKERNEL_H
+#define __ORANGEFSKERNEL_H
+
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+#include <linux/statfs.h>
+#include <linux/backing-dev.h>
+#include <linux/device.h>
+#include <linux/mpage.h>
+#include <linux/namei.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/vmalloc.h>
+
+#include <linux/aio.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/compat.h>
+#include <linux/mount.h>
+#include <linux/uaccess.h>
+#include <linux/atomic.h>
+#include <linux/uio.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/wait.h>
+#include <linux/dcache.h>
+#include <linux/pagemap.h>
+#include <linux/poll.h>
+#include <linux/rwsem.h>
+#include <linux/xattr.h>
+#include <linux/exportfs.h>
+
+#include <asm/unaligned.h>
+
+#include "orangefs-dev-proto.h"
+
+#ifdef ORANGEFS_KERNEL_DEBUG
+#define ORANGEFS_DEFAULT_OP_TIMEOUT_SECS       10
+#else
+#define ORANGEFS_DEFAULT_OP_TIMEOUT_SECS       20
+#endif
+
+#define ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS   30
+
+#define ORANGEFS_DEFAULT_SLOT_TIMEOUT_SECS     900	/* 15 minutes */
+
+#define ORANGEFS_REQDEVICE_NAME          "pvfs2-req"
+
+#define ORANGEFS_DEVREQ_MAGIC             0x20030529
+#define ORANGEFS_LINK_MAX                 0x000000FF
+#define ORANGEFS_PURGE_RETRY_COUNT     0x00000005
+#define ORANGEFS_MAX_NUM_OPTIONS          0x00000004
+#define ORANGEFS_MAX_MOUNT_OPT_LEN        0x00000080
+#define ORANGEFS_MAX_FSKEY_LEN            64
+
+#define MAX_DEV_REQ_UPSIZE (2 * sizeof(__s32) +   \
+sizeof(__u64) + sizeof(struct orangefs_upcall_s))
+#define MAX_DEV_REQ_DOWNSIZE (2 * sizeof(__s32) + \
+sizeof(__u64) + sizeof(struct orangefs_downcall_s))
+
+/*
+ * valid orangefs kernel operation states
+ *
+ * unknown  - op was just initialized
+ * waiting  - op is on request_list (upward bound)
+ * inprogr  - op is in progress (waiting for downcall)
+ * serviced - op has matching downcall; ok
+ * purged   - op has to start a timer since client-core
+ *            exited uncleanly before servicing op
+ * given up - submitter has given up waiting for it
+ */
+enum orangefs_vfs_op_states {
+	OP_VFS_STATE_UNKNOWN = 0,
+	OP_VFS_STATE_WAITING = 1,
+	OP_VFS_STATE_INPROGR = 2,
+	OP_VFS_STATE_SERVICED = 4,
+	OP_VFS_STATE_PURGED = 8,
+	OP_VFS_STATE_GIVEN_UP = 16,
+};
+
+/*
+ * An array of client_debug_mask will be built to hold debug keyword/mask
+ * values fetched from userspace.
+ */
+struct client_debug_mask {
+	char *keyword;
+	__u64 mask1;
+	__u64 mask2;
+};
+
+/*
+ * orangefs kernel memory related flags
+ */
+
+#if ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB))
+#define ORANGEFS_CACHE_CREATE_FLAGS SLAB_RED_ZONE
+#else
+#define ORANGEFS_CACHE_CREATE_FLAGS 0
+#endif /* ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) */
+
+/* orangefs xattr and acl related defines */
+#define ORANGEFS_XATTR_INDEX_POSIX_ACL_ACCESS  1
+#define ORANGEFS_XATTR_INDEX_POSIX_ACL_DEFAULT 2
+#define ORANGEFS_XATTR_INDEX_TRUSTED           3
+#define ORANGEFS_XATTR_INDEX_DEFAULT           4
+
+#define ORANGEFS_XATTR_NAME_ACL_ACCESS XATTR_NAME_POSIX_ACL_ACCESS
+#define ORANGEFS_XATTR_NAME_ACL_DEFAULT XATTR_NAME_POSIX_ACL_DEFAULT
+#define ORANGEFS_XATTR_NAME_TRUSTED_PREFIX "trusted."
+#define ORANGEFS_XATTR_NAME_DEFAULT_PREFIX ""
+
+/* these functions are defined in orangefs-utils.c */
+int orangefs_prepare_cdm_array(char *debug_array_string);
+int orangefs_prepare_debugfs_help_string(int);
+
+/* defined in orangefs-debugfs.c */
+int orangefs_client_debug_init(void);
+
+void debug_string_to_mask(char *, void *, int);
+void do_c_mask(int, char *, struct client_debug_mask **);
+void do_k_mask(int, char *, __u64 **);
+
+void debug_mask_to_string(void *, int);
+void do_k_string(void *, int);
+void do_c_string(void *, int);
+int check_amalgam_keyword(void *, int);
+int keyword_is_amalgam(char *);
+
+/*these variables are defined in orangefs-mod.c */
+extern char kernel_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
+extern char client_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
+extern char client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
+extern unsigned int kernel_mask_set_mod_init;
+
+extern int orangefs_init_acl(struct inode *inode, struct inode *dir);
+extern const struct xattr_handler *orangefs_xattr_handlers[];
+
+extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type);
+extern int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+
+/*
+ * Redefine xtvec structure so that we could move helper functions out of
+ * the define
+ */
+struct xtvec {
+	__kernel_off_t xtv_off;		/* must be off_t */
+	__kernel_size_t xtv_len;	/* must be size_t */
+};
+
+/*
+ * orangefs data structures
+ */
+struct orangefs_kernel_op_s {
+	enum orangefs_vfs_op_states op_state;
+	__u64 tag;
+
+	/*
+	 * Set uses_shared_memory to non zero if this operation uses
+	 * shared memory. If true, then a retry on the op must also
+	 * get a new shared memory buffer and re-populate it.
+	 * Cancels don't care - it only matters for service_operation()
+	 * retry logics and cancels don't go through it anymore. It
+	 * safely stays non-zero when we use it as slot_to_free.
+	 */
+	union {
+		int uses_shared_memory;
+		int slot_to_free;
+	};
+
+	struct orangefs_upcall_s upcall;
+	struct orangefs_downcall_s downcall;
+
+	struct completion waitq;
+	spinlock_t lock;
+
+	int attempts;
+
+	struct list_head list;
+};
+
+#define set_op_state_waiting(op)     ((op)->op_state = OP_VFS_STATE_WAITING)
+#define set_op_state_inprogress(op)  ((op)->op_state = OP_VFS_STATE_INPROGR)
+#define set_op_state_given_up(op)  ((op)->op_state = OP_VFS_STATE_GIVEN_UP)
+static inline void set_op_state_serviced(struct orangefs_kernel_op_s *op)
+{
+	op->op_state = OP_VFS_STATE_SERVICED;
+	complete(&op->waitq);
+}
+
+#define op_state_waiting(op)     ((op)->op_state & OP_VFS_STATE_WAITING)
+#define op_state_in_progress(op) ((op)->op_state & OP_VFS_STATE_INPROGR)
+#define op_state_serviced(op)    ((op)->op_state & OP_VFS_STATE_SERVICED)
+#define op_state_purged(op)      ((op)->op_state & OP_VFS_STATE_PURGED)
+#define op_state_given_up(op)    ((op)->op_state & OP_VFS_STATE_GIVEN_UP)
+#define op_is_cancel(op)         ((op)->upcall.type == ORANGEFS_VFS_OP_CANCEL)
+
+void op_release(struct orangefs_kernel_op_s *op);
+
+extern void orangefs_bufmap_put(int);
+static inline void put_cancel(struct orangefs_kernel_op_s *op)
+{
+	orangefs_bufmap_put(op->slot_to_free);
+	op_release(op);
+}
+
+static inline void set_op_state_purged(struct orangefs_kernel_op_s *op)
+{
+	spin_lock(&op->lock);
+	if (unlikely(op_is_cancel(op))) {
+		list_del_init(&op->list);
+		spin_unlock(&op->lock);
+		put_cancel(op);
+	} else {
+		op->op_state |= OP_VFS_STATE_PURGED;
+		complete(&op->waitq);
+		spin_unlock(&op->lock);
+	}
+}
+
+/* per inode private orangefs info */
+struct orangefs_inode_s {
+	struct orangefs_object_kref refn;
+	char link_target[ORANGEFS_NAME_MAX];
+	__s64 blksize;
+	/*
+	 * Reading/Writing Extended attributes need to acquire the appropriate
+	 * reader/writer semaphore on the orangefs_inode_s structure.
+	 */
+	struct rw_semaphore xattr_sem;
+
+	struct inode vfs_inode;
+	sector_t last_failed_block_index_read;
+
+	/*
+	 * State of in-memory attributes not yet flushed to disk associated
+	 * with this object
+	 */
+	unsigned long pinode_flags;
+};
+
+#define P_ATIME_FLAG 0
+#define P_MTIME_FLAG 1
+#define P_CTIME_FLAG 2
+#define P_MODE_FLAG  3
+
+#define ClearAtimeFlag(pinode) clear_bit(P_ATIME_FLAG, &(pinode)->pinode_flags)
+#define SetAtimeFlag(pinode)   set_bit(P_ATIME_FLAG, &(pinode)->pinode_flags)
+#define AtimeFlag(pinode)      test_bit(P_ATIME_FLAG, &(pinode)->pinode_flags)
+
+#define ClearMtimeFlag(pinode) clear_bit(P_MTIME_FLAG, &(pinode)->pinode_flags)
+#define SetMtimeFlag(pinode)   set_bit(P_MTIME_FLAG, &(pinode)->pinode_flags)
+#define MtimeFlag(pinode)      test_bit(P_MTIME_FLAG, &(pinode)->pinode_flags)
+
+#define ClearCtimeFlag(pinode) clear_bit(P_CTIME_FLAG, &(pinode)->pinode_flags)
+#define SetCtimeFlag(pinode)   set_bit(P_CTIME_FLAG, &(pinode)->pinode_flags)
+#define CtimeFlag(pinode)      test_bit(P_CTIME_FLAG, &(pinode)->pinode_flags)
+
+#define ClearModeFlag(pinode) clear_bit(P_MODE_FLAG, &(pinode)->pinode_flags)
+#define SetModeFlag(pinode)   set_bit(P_MODE_FLAG, &(pinode)->pinode_flags)
+#define ModeFlag(pinode)      test_bit(P_MODE_FLAG, &(pinode)->pinode_flags)
+
+/* per superblock private orangefs info */
+struct orangefs_sb_info_s {
+	struct orangefs_khandle root_khandle;
+	__s32 fs_id;
+	int id;
+	int flags;
+#define ORANGEFS_OPT_INTR	0x01
+#define ORANGEFS_OPT_LOCAL_LOCK	0x02
+	char devname[ORANGEFS_MAX_SERVER_ADDR_LEN];
+	struct super_block *sb;
+	int mount_pending;
+	struct list_head list;
+};
+
+/*
+ * structure that holds the state of any async I/O operation issued
+ * through the VFS. Needed especially to handle cancellation requests
+ * or even completion notification so that the VFS client-side daemon
+ * can free up its vfs_request slots.
+ */
+struct orangefs_kiocb_s {
+	/* the pointer to the task that initiated the AIO */
+	struct task_struct *tsk;
+
+	/* pointer to the kiocb that kicked this operation */
+	struct kiocb *kiocb;
+
+	/* buffer index that was used for the I/O */
+	struct orangefs_bufmap *bufmap;
+	int buffer_index;
+
+	/* orangefs kernel operation type */
+	struct orangefs_kernel_op_s *op;
+
+	/* The user space buffers from/to which I/O is being staged */
+	struct iovec *iov;
+
+	/* number of elements in the iovector */
+	unsigned long nr_segs;
+
+	/* set to indicate the type of the operation */
+	int rw;
+
+	/* file offset */
+	loff_t offset;
+
+	/* and the count in bytes */
+	size_t bytes_to_be_copied;
+
+	ssize_t bytes_copied;
+	int needs_cleanup;
+};
+
+struct orangefs_stats {
+	unsigned long cache_hits;
+	unsigned long cache_misses;
+	unsigned long reads;
+	unsigned long writes;
+};
+
+extern struct orangefs_stats g_orangefs_stats;
+
+/*
+ * NOTE: See Documentation/filesystems/porting for information
+ * on implementing FOO_I and properly accessing fs private data
+ */
+static inline struct orangefs_inode_s *ORANGEFS_I(struct inode *inode)
+{
+	return container_of(inode, struct orangefs_inode_s, vfs_inode);
+}
+
+static inline struct orangefs_sb_info_s *ORANGEFS_SB(struct super_block *sb)
+{
+	return (struct orangefs_sb_info_s *) sb->s_fs_info;
+}
+
+/* ino_t descends from "unsigned long", 8 bytes, 64 bits. */
+static inline ino_t orangefs_khandle_to_ino(struct orangefs_khandle *khandle)
+{
+	union {
+		unsigned char u[8];
+		__u64 ino;
+	} ihandle;
+
+	ihandle.u[0] = khandle->u[0] ^ khandle->u[4];
+	ihandle.u[1] = khandle->u[1] ^ khandle->u[5];
+	ihandle.u[2] = khandle->u[2] ^ khandle->u[6];
+	ihandle.u[3] = khandle->u[3] ^ khandle->u[7];
+	ihandle.u[4] = khandle->u[12] ^ khandle->u[8];
+	ihandle.u[5] = khandle->u[13] ^ khandle->u[9];
+	ihandle.u[6] = khandle->u[14] ^ khandle->u[10];
+	ihandle.u[7] = khandle->u[15] ^ khandle->u[11];
+
+	return ihandle.ino;
+}
+
+static inline struct orangefs_khandle *get_khandle_from_ino(struct inode *inode)
+{
+	return &(ORANGEFS_I(inode)->refn.khandle);
+}
+
+static inline __s32 get_fsid_from_ino(struct inode *inode)
+{
+	return ORANGEFS_I(inode)->refn.fs_id;
+}
+
+static inline ino_t get_ino_from_khandle(struct inode *inode)
+{
+	struct orangefs_khandle *khandle;
+	ino_t ino;
+
+	khandle = get_khandle_from_ino(inode);
+	ino = orangefs_khandle_to_ino(khandle);
+	return ino;
+}
+
+static inline ino_t get_parent_ino_from_dentry(struct dentry *dentry)
+{
+	return get_ino_from_khandle(dentry->d_parent->d_inode);
+}
+
+static inline int is_root_handle(struct inode *inode)
+{
+	gossip_debug(GOSSIP_DCACHE_DEBUG,
+		     "%s: root handle: %pU, this handle: %pU:\n",
+		     __func__,
+		     &ORANGEFS_SB(inode->i_sb)->root_khandle,
+		     get_khandle_from_ino(inode));
+
+	if (ORANGEFS_khandle_cmp(&(ORANGEFS_SB(inode->i_sb)->root_khandle),
+			     get_khandle_from_ino(inode)))
+		return 0;
+	else
+		return 1;
+}
+
+static inline int match_handle(struct orangefs_khandle resp_handle,
+			       struct inode *inode)
+{
+	gossip_debug(GOSSIP_DCACHE_DEBUG,
+		     "%s: one handle: %pU, another handle:%pU:\n",
+		     __func__,
+		     &resp_handle,
+		     get_khandle_from_ino(inode));
+
+	if (ORANGEFS_khandle_cmp(&resp_handle, get_khandle_from_ino(inode)))
+		return 0;
+	else
+		return 1;
+}
+
+/*
+ * defined in orangefs-cache.c
+ */
+int op_cache_initialize(void);
+int op_cache_finalize(void);
+struct orangefs_kernel_op_s *op_alloc(__s32 type);
+void orangefs_new_tag(struct orangefs_kernel_op_s *op);
+char *get_opname_string(struct orangefs_kernel_op_s *new_op);
+
+int orangefs_inode_cache_initialize(void);
+int orangefs_inode_cache_finalize(void);
+
+/*
+ * defined in orangefs-mod.c
+ */
+void purge_inprogress_ops(void);
+
+/*
+ * defined in waitqueue.c
+ */
+void purge_waiting_ops(void);
+
+/*
+ * defined in super.c
+ */
+struct dentry *orangefs_mount(struct file_system_type *fst,
+			   int flags,
+			   const char *devname,
+			   void *data);
+
+void orangefs_kill_sb(struct super_block *sb);
+int orangefs_remount(struct orangefs_sb_info_s *);
+
+int fsid_key_table_initialize(void);
+void fsid_key_table_finalize(void);
+
+/*
+ * defined in inode.c
+ */
+__u32 convert_to_orangefs_mask(unsigned long lite_mask);
+struct inode *orangefs_new_inode(struct super_block *sb,
+			      struct inode *dir,
+			      int mode,
+			      dev_t dev,
+			      struct orangefs_object_kref *ref);
+
+int orangefs_setattr(struct dentry *dentry, struct iattr *iattr);
+
+int orangefs_getattr(struct vfsmount *mnt,
+		  struct dentry *dentry,
+		  struct kstat *kstat);
+
+int orangefs_permission(struct inode *inode, int mask);
+
+/*
+ * defined in xattr.c
+ */
+int orangefs_setxattr(struct dentry *dentry,
+		   const char *name,
+		   const void *value,
+		   size_t size,
+		   int flags);
+
+ssize_t orangefs_getxattr(struct dentry *dentry,
+		       const char *name,
+		       void *buffer,
+		       size_t size);
+
+ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size);
+
+/*
+ * defined in namei.c
+ */
+struct inode *orangefs_iget(struct super_block *sb,
+			 struct orangefs_object_kref *ref);
+
+ssize_t orangefs_inode_read(struct inode *inode,
+			    struct iov_iter *iter,
+			    loff_t *offset,
+			    loff_t readahead_size);
+
+/*
+ * defined in devorangefs-req.c
+ */
+int orangefs_dev_init(void);
+void orangefs_dev_cleanup(void);
+int is_daemon_in_service(void);
+bool __is_daemon_in_service(void);
+
+/*
+ * defined in orangefs-utils.c
+ */
+__s32 fsid_of_op(struct orangefs_kernel_op_s *op);
+
+int orangefs_flush_inode(struct inode *inode);
+
+ssize_t orangefs_inode_getxattr(struct inode *inode,
+			     const char *prefix,
+			     const char *name,
+			     void *buffer,
+			     size_t size);
+
+int orangefs_inode_setxattr(struct inode *inode,
+			 const char *prefix,
+			 const char *name,
+			 const void *value,
+			 size_t size,
+			 int flags);
+
+int orangefs_inode_getattr(struct inode *inode, int new, int size);
+
+int orangefs_inode_check_changed(struct inode *inode);
+
+int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr);
+
+void orangefs_make_bad_inode(struct inode *inode);
+
+int orangefs_unmount_sb(struct super_block *sb);
+
+bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op);
+
+int orangefs_normalize_to_errno(__s32 error_code);
+
+extern struct mutex devreq_mutex;
+extern struct mutex request_mutex;
+extern int debug;
+extern int op_timeout_secs;
+extern int slot_timeout_secs;
+extern struct list_head orangefs_superblocks;
+extern spinlock_t orangefs_superblocks_lock;
+extern struct list_head orangefs_request_list;
+extern spinlock_t orangefs_request_list_lock;
+extern wait_queue_head_t orangefs_request_list_waitq;
+extern struct list_head *htable_ops_in_progress;
+extern spinlock_t htable_ops_in_progress_lock;
+extern int hash_table_size;
+
+extern const struct address_space_operations orangefs_address_operations;
+extern struct backing_dev_info orangefs_backing_dev_info;
+extern struct inode_operations orangefs_file_inode_operations;
+extern const struct file_operations orangefs_file_operations;
+extern struct inode_operations orangefs_symlink_inode_operations;
+extern struct inode_operations orangefs_dir_inode_operations;
+extern const struct file_operations orangefs_dir_operations;
+extern const struct dentry_operations orangefs_dentry_operations;
+extern const struct file_operations orangefs_devreq_file_operations;
+
+extern wait_queue_head_t orangefs_bufmap_init_waitq;
+
+/*
+ * misc convenience macros
+ */
+
+#define ORANGEFS_OP_INTERRUPTIBLE 1   /* service_operation() is interruptible */
+#define ORANGEFS_OP_PRIORITY      2   /* service_operation() is high priority */
+#define ORANGEFS_OP_CANCELLATION  4   /* this is a cancellation */
+#define ORANGEFS_OP_NO_MUTEX      8   /* don't acquire request_mutex */
+#define ORANGEFS_OP_ASYNC         16  /* Queue it, but don't wait */
+
+int service_operation(struct orangefs_kernel_op_s *op,
+		      const char *op_name,
+		      int flags);
+
+#define get_interruptible_flag(inode) \
+	((ORANGEFS_SB(inode->i_sb)->flags & ORANGEFS_OPT_INTR) ? \
+		ORANGEFS_OP_INTERRUPTIBLE : 0)
+
+#define fill_default_sys_attrs(sys_attr, type, mode)			\
+do {									\
+	sys_attr.owner = from_kuid(current_user_ns(), current_fsuid()); \
+	sys_attr.group = from_kgid(current_user_ns(), current_fsgid()); \
+	sys_attr.perms = ORANGEFS_util_translate_mode(mode);		\
+	sys_attr.mtime = 0;						\
+	sys_attr.atime = 0;						\
+	sys_attr.ctime = 0;						\
+	sys_attr.mask = ORANGEFS_ATTR_SYS_ALL_SETABLE;			\
+} while (0)
+
+static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size)
+{
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+	mutex_lock(&inode->i_mutex);
+#endif
+	i_size_write(inode, i_size);
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+	mutex_unlock(&inode->i_mutex);
+#endif
+}
+
+#endif /* __ORANGEFSKERNEL_H */

diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c
new file mode 100644
index 0000000..6f072a8
--- /dev/null
+++ b/fs/orangefs/orangefs-mod.c

@@ -0,0 +1,293 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * Changes by Acxiom Corporation to add proc file handler for pvfs2 client
+ * parameters, Copyright Acxiom Corporation, 2005.
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-debugfs.h"
+#include "orangefs-sysfs.h"
+
+/* ORANGEFS_VERSION is a ./configure define */
+#ifndef ORANGEFS_VERSION
+#define ORANGEFS_VERSION "upstream"
+#endif
+
+/*
+ * global variables declared here
+ */
+
+/* array of client debug keyword/mask values */
+struct client_debug_mask *cdm_array;
+int cdm_element_count;
+
+char kernel_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN] = "none";
+char client_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
+char client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
+
+char *debug_help_string;
+int help_string_initialized;
+struct dentry *help_file_dentry;
+struct dentry *client_debug_dentry;
+struct dentry *debug_dir;
+int client_verbose_index;
+int client_all_index;
+struct orangefs_stats g_orangefs_stats;
+
+/* the size of the hash tables for ops in progress */
+int hash_table_size = 509;
+
+static ulong module_parm_debug_mask;
+__u64 gossip_debug_mask;
+struct client_debug_mask client_debug_mask = { NULL, 0, 0 };
+unsigned int kernel_mask_set_mod_init; /* implicitly false */
+int op_timeout_secs = ORANGEFS_DEFAULT_OP_TIMEOUT_SECS;
+int slot_timeout_secs = ORANGEFS_DEFAULT_SLOT_TIMEOUT_SECS;
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ORANGEFS Development Team");
+MODULE_DESCRIPTION("The Linux Kernel VFS interface to ORANGEFS");
+MODULE_PARM_DESC(module_parm_debug_mask, "debugging level (see orangefs-debug.h for values)");
+MODULE_PARM_DESC(op_timeout_secs, "Operation timeout in seconds");
+MODULE_PARM_DESC(slot_timeout_secs, "Slot timeout in seconds");
+MODULE_PARM_DESC(hash_table_size,
+		 "size of hash table for operations in progress");
+
+static struct file_system_type orangefs_fs_type = {
+	.name = "pvfs2",
+	.mount = orangefs_mount,
+	.kill_sb = orangefs_kill_sb,
+	.owner = THIS_MODULE,
+};
+
+module_param(hash_table_size, int, 0);
+module_param(module_parm_debug_mask, ulong, 0644);
+module_param(op_timeout_secs, int, 0);
+module_param(slot_timeout_secs, int, 0);
+
+/* synchronizes the request device file */
+DEFINE_MUTEX(devreq_mutex);
+
+/*
+ * Blocks non-priority requests from being queued for servicing.  This
+ * could be used for protecting the request list data structure, but
+ * for now it's only being used to stall the op addition to the request
+ * list
+ */
+DEFINE_MUTEX(request_mutex);
+
+/* hash table for storing operations waiting for matching downcall */
+struct list_head *htable_ops_in_progress;
+DEFINE_SPINLOCK(htable_ops_in_progress_lock);
+
+/* list for queueing upcall operations */
+LIST_HEAD(orangefs_request_list);
+
+/* used to protect the above orangefs_request_list */
+DEFINE_SPINLOCK(orangefs_request_list_lock);
+
+/* used for incoming request notification */
+DECLARE_WAIT_QUEUE_HEAD(orangefs_request_list_waitq);
+
+static int __init orangefs_init(void)
+{
+	int ret = -1;
+	__u32 i = 0;
+
+	/* convert input debug mask to a 64-bit unsigned integer */
+	gossip_debug_mask = (unsigned long long) module_parm_debug_mask;
+
+	/*
+	 * set the kernel's gossip debug string; invalid mask values will
+	 * be ignored.
+	 */
+	debug_mask_to_string(&gossip_debug_mask, 0);
+
+	/* remove any invalid values from the mask */
+	debug_string_to_mask(kernel_debug_string, &gossip_debug_mask, 0);
+
+	/*
+	 * if the mask has a non-zero value, then indicate that the mask
+	 * was set when the kernel module was loaded.  The orangefs dev ioctl
+	 * command will look at this boolean to determine if the kernel's
+	 * debug mask should be overwritten when the client-core is started.
+	 */
+	if (gossip_debug_mask != 0)
+		kernel_mask_set_mod_init = true;
+
+	pr_info("%s: called with debug mask: :%s: :%llx:\n",
+		__func__,
+		kernel_debug_string,
+		(unsigned long long)gossip_debug_mask);
+
+	ret = bdi_init(&orangefs_backing_dev_info);
+
+	if (ret)
+		return ret;
+
+	if (op_timeout_secs < 0)
+		op_timeout_secs = 0;
+
+	if (slot_timeout_secs < 0)
+		slot_timeout_secs = 0;
+
+	/* initialize global book keeping data structures */
+	ret = op_cache_initialize();
+	if (ret < 0)
+		goto err;
+
+	ret = orangefs_inode_cache_initialize();
+	if (ret < 0)
+		goto cleanup_op;
+
+	htable_ops_in_progress =
+	    kcalloc(hash_table_size, sizeof(struct list_head), GFP_KERNEL);
+	if (!htable_ops_in_progress) {
+		gossip_err("Failed to initialize op hashtable");
+		ret = -ENOMEM;
+		goto cleanup_inode;
+	}
+
+	/* initialize a doubly linked at each hash table index */
+	for (i = 0; i < hash_table_size; i++)
+		INIT_LIST_HEAD(&htable_ops_in_progress[i]);
+
+	ret = fsid_key_table_initialize();
+	if (ret < 0)
+		goto cleanup_progress_table;
+
+	/*
+	 * Build the contents of /sys/kernel/debug/orangefs/debug-help
+	 * from the keywords in the kernel keyword/mask array.
+	 *
+	 * The keywords in the client keyword/mask array are
+	 * unknown at boot time.
+	 *
+	 * orangefs_prepare_debugfs_help_string will be used again
+	 * later to rebuild the debug-help file after the client starts
+	 * and passes along the needed info. The argument signifies
+	 * which time orangefs_prepare_debugfs_help_string is being
+	 * called.
+	 */
+	ret = orangefs_prepare_debugfs_help_string(1);
+	if (ret)
+		goto cleanup_key_table;
+
+	ret = orangefs_debugfs_init();
+	if (ret)
+		goto debugfs_init_failed;
+
+	ret = orangefs_kernel_debug_init();
+	if (ret)
+		goto kernel_debug_init_failed;
+
+	ret = orangefs_sysfs_init();
+	if (ret)
+		goto sysfs_init_failed;
+
+	/* Initialize the orangefsdev subsystem. */
+	ret = orangefs_dev_init();
+	if (ret < 0) {
+		gossip_err("%s: could not initialize device subsystem %d!\n",
+			   __func__,
+			   ret);
+		goto cleanup_device;
+	}
+
+	ret = register_filesystem(&orangefs_fs_type);
+	if (ret == 0) {
+		pr_info("orangefs: module version %s loaded\n", ORANGEFS_VERSION);
+		ret = 0;
+		goto out;
+	}
+
+	orangefs_sysfs_exit();
+
+cleanup_device:
+	orangefs_dev_cleanup();
+
+sysfs_init_failed:
+
+kernel_debug_init_failed:
+
+debugfs_init_failed:
+	orangefs_debugfs_cleanup();
+
+cleanup_key_table:
+	fsid_key_table_finalize();
+
+cleanup_progress_table:
+	kfree(htable_ops_in_progress);
+
+cleanup_inode:
+	orangefs_inode_cache_finalize();
+
+cleanup_op:
+	op_cache_finalize();
+
+err:
+	bdi_destroy(&orangefs_backing_dev_info);
+
+out:
+	return ret;
+}
+
+static void __exit orangefs_exit(void)
+{
+	int i = 0;
+	gossip_debug(GOSSIP_INIT_DEBUG, "orangefs: orangefs_exit called\n");
+
+	unregister_filesystem(&orangefs_fs_type);
+	orangefs_debugfs_cleanup();
+	orangefs_sysfs_exit();
+	fsid_key_table_finalize();
+	orangefs_dev_cleanup();
+	BUG_ON(!list_empty(&orangefs_request_list));
+	for (i = 0; i < hash_table_size; i++)
+		BUG_ON(!list_empty(&htable_ops_in_progress[i]));
+
+	orangefs_inode_cache_finalize();
+	op_cache_finalize();
+
+	kfree(htable_ops_in_progress);
+
+	bdi_destroy(&orangefs_backing_dev_info);
+
+	pr_info("orangefs: module version %s unloaded\n", ORANGEFS_VERSION);
+}
+
+/*
+ * What we do in this function is to walk the list of operations
+ * that are in progress in the hash table and mark them as purged as well.
+ */
+void purge_inprogress_ops(void)
+{
+	int i;
+
+	for (i = 0; i < hash_table_size; i++) {
+		struct orangefs_kernel_op_s *op;
+		struct orangefs_kernel_op_s *next;
+
+		spin_lock(&htable_ops_in_progress_lock);
+		list_for_each_entry_safe(op,
+					 next,
+					 &htable_ops_in_progress[i],
+					 list) {
+			set_op_state_purged(op);
+			gossip_debug(GOSSIP_DEV_DEBUG,
+				     "%s: op:%s: op_state:%d: process:%s:\n",
+				     __func__,
+				     get_opname_string(op),
+				     op->op_state,
+				     current->comm);
+		}
+		spin_unlock(&htable_ops_in_progress_lock);
+	}
+}
+
+module_init(orangefs_init);
+module_exit(orangefs_exit);

diff --git a/fs/orangefs/orangefs-sysfs.c b/fs/orangefs/orangefs-sysfs.c
new file mode 100644
index 0000000..5c03113
--- /dev/null
+++ b/fs/orangefs/orangefs-sysfs.c

@@ -0,0 +1,1772 @@
+/*
+ * Documentation/ABI/stable/orangefs-sysfs:
+ *
+ * What:		/sys/fs/orangefs/perf_counter_reset
+ * Date:		June 2015
+ * Contact:		Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * 			echo a 0 or a 1 into perf_counter_reset to
+ * 			reset all the counters in
+ * 			/sys/fs/orangefs/perf_counters
+ * 			except ones with PINT_PERF_PRESERVE set.
+ *
+ *
+ * What:		/sys/fs/orangefs/perf_counters/...
+ * Date:		Jun 2015
+ * Contact:		Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * 			Counters and settings for various caches.
+ * 			Read only.
+ *
+ *
+ * What:		/sys/fs/orangefs/perf_time_interval_secs
+ * Date:		Jun 2015
+ * Contact:		Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ *			Length of perf counter intervals in
+ *			seconds.
+ *
+ *
+ * What:		/sys/fs/orangefs/perf_history_size
+ * Date:		Jun 2015
+ * Contact:		Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * 			The perf_counters cache statistics have N, or
+ * 			perf_history_size, samples. The default is
+ * 			one.
+ *
+ *			Every perf_time_interval_secs the (first)
+ *			samples are reset.
+ *
+ *			If N is greater than one, the "current" set
+ *			of samples is reset, and the samples from the
+ *			other N-1 intervals remain available.
+ *
+ *
+ * What:		/sys/fs/orangefs/op_timeout_secs
+ * Date:		Jun 2015
+ * Contact:		Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ *			Service operation timeout in seconds.
+ *
+ *
+ * What:		/sys/fs/orangefs/slot_timeout_secs
+ * Date:		Jun 2015
+ * Contact:		Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ *			"Slot" timeout in seconds. A "slot"
+ *			is an indexed buffer in the shared
+ *			memory segment used for communication
+ *			between the kernel module and userspace.
+ *			Slots are requested and waited for,
+ *			the wait times out after slot_timeout_secs.
+ *
+ *
+ * What:		/sys/fs/orangefs/acache/...
+ * Date:		Jun 2015
+ * Contact:		Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * 			Attribute cache configurable settings.
+ *
+ *
+ * What:		/sys/fs/orangefs/ncache/...
+ * Date:		Jun 2015
+ * Contact:		Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * 			Name cache configurable settings.
+ *
+ *
+ * What:		/sys/fs/orangefs/capcache/...
+ * Date:		Jun 2015
+ * Contact:		Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * 			Capability cache configurable settings.
+ *
+ *
+ * What:		/sys/fs/orangefs/ccache/...
+ * Date:		Jun 2015
+ * Contact:		Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * 			Credential cache configurable settings.
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-sysfs.h"
+
+#define ORANGEFS_KOBJ_ID "orangefs"
+#define ACACHE_KOBJ_ID "acache"
+#define CAPCACHE_KOBJ_ID "capcache"
+#define CCACHE_KOBJ_ID "ccache"
+#define NCACHE_KOBJ_ID "ncache"
+#define PC_KOBJ_ID "pc"
+#define STATS_KOBJ_ID "stats"
+
+struct orangefs_obj {
+	struct kobject kobj;
+	int op_timeout_secs;
+	int perf_counter_reset;
+	int perf_history_size;
+	int perf_time_interval_secs;
+	int slot_timeout_secs;
+};
+
+struct acache_orangefs_obj {
+	struct kobject kobj;
+	int hard_limit;
+	int reclaim_percentage;
+	int soft_limit;
+	int timeout_msecs;
+};
+
+struct capcache_orangefs_obj {
+	struct kobject kobj;
+	int hard_limit;
+	int reclaim_percentage;
+	int soft_limit;
+	int timeout_secs;
+};
+
+struct ccache_orangefs_obj {
+	struct kobject kobj;
+	int hard_limit;
+	int reclaim_percentage;
+	int soft_limit;
+	int timeout_secs;
+};
+
+struct ncache_orangefs_obj {
+	struct kobject kobj;
+	int hard_limit;
+	int reclaim_percentage;
+	int soft_limit;
+	int timeout_msecs;
+};
+
+struct pc_orangefs_obj {
+	struct kobject kobj;
+	char *acache;
+	char *capcache;
+	char *ncache;
+};
+
+struct stats_orangefs_obj {
+	struct kobject kobj;
+	int reads;
+	int writes;
+};
+
+struct orangefs_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct orangefs_obj *orangefs_obj,
+			struct orangefs_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct orangefs_obj *orangefs_obj,
+			 struct orangefs_attribute *attr,
+			 const char *buf,
+			 size_t count);
+};
+
+struct acache_orangefs_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct acache_orangefs_obj *acache_orangefs_obj,
+			struct acache_orangefs_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct acache_orangefs_obj *acache_orangefs_obj,
+			 struct acache_orangefs_attribute *attr,
+			 const char *buf,
+			 size_t count);
+};
+
+struct capcache_orangefs_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct capcache_orangefs_obj *capcache_orangefs_obj,
+			struct capcache_orangefs_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct capcache_orangefs_obj *capcache_orangefs_obj,
+			 struct capcache_orangefs_attribute *attr,
+			 const char *buf,
+			 size_t count);
+};
+
+struct ccache_orangefs_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct ccache_orangefs_obj *ccache_orangefs_obj,
+			struct ccache_orangefs_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct ccache_orangefs_obj *ccache_orangefs_obj,
+			 struct ccache_orangefs_attribute *attr,
+			 const char *buf,
+			 size_t count);
+};
+
+struct ncache_orangefs_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct ncache_orangefs_obj *ncache_orangefs_obj,
+			struct ncache_orangefs_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct ncache_orangefs_obj *ncache_orangefs_obj,
+			 struct ncache_orangefs_attribute *attr,
+			 const char *buf,
+			 size_t count);
+};
+
+struct pc_orangefs_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct pc_orangefs_obj *pc_orangefs_obj,
+			struct pc_orangefs_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct pc_orangefs_obj *pc_orangefs_obj,
+			 struct pc_orangefs_attribute *attr,
+			 const char *buf,
+			 size_t count);
+};
+
+struct stats_orangefs_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct stats_orangefs_obj *stats_orangefs_obj,
+			struct stats_orangefs_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct stats_orangefs_obj *stats_orangefs_obj,
+			 struct stats_orangefs_attribute *attr,
+			 const char *buf,
+			 size_t count);
+};
+
+static ssize_t orangefs_attr_show(struct kobject *kobj,
+				  struct attribute *attr,
+				  char *buf)
+{
+	struct orangefs_attribute *attribute;
+	struct orangefs_obj *orangefs_obj;
+	int rc;
+
+	attribute = container_of(attr, struct orangefs_attribute, attr);
+	orangefs_obj = container_of(kobj, struct orangefs_obj, kobj);
+
+	if (!attribute->show) {
+		rc = -EIO;
+		goto out;
+	}
+
+	rc = attribute->show(orangefs_obj, attribute, buf);
+
+out:
+	return rc;
+}
+
+static ssize_t orangefs_attr_store(struct kobject *kobj,
+				   struct attribute *attr,
+				   const char *buf,
+				   size_t len)
+{
+	struct orangefs_attribute *attribute;
+	struct orangefs_obj *orangefs_obj;
+	int rc;
+
+	gossip_debug(GOSSIP_SYSFS_DEBUG,
+		     "orangefs_attr_store: start\n");
+
+	attribute = container_of(attr, struct orangefs_attribute, attr);
+	orangefs_obj = container_of(kobj, struct orangefs_obj, kobj);
+
+	if (!attribute->store) {
+		rc = -EIO;
+		goto out;
+	}
+
+	rc = attribute->store(orangefs_obj, attribute, buf, len);
+
+out:
+	return rc;
+}
+
+static const struct sysfs_ops orangefs_sysfs_ops = {
+	.show = orangefs_attr_show,
+	.store = orangefs_attr_store,
+};
+
+static ssize_t acache_orangefs_attr_show(struct kobject *kobj,
+					 struct attribute *attr,
+					 char *buf)
+{
+	struct acache_orangefs_attribute *attribute;
+	struct acache_orangefs_obj *acache_orangefs_obj;
+	int rc;
+
+	attribute = container_of(attr, struct acache_orangefs_attribute, attr);
+	acache_orangefs_obj =
+		container_of(kobj, struct acache_orangefs_obj, kobj);
+
+	if (!attribute->show) {
+		rc = -EIO;
+		goto out;
+	}
+
+	rc = attribute->show(acache_orangefs_obj, attribute, buf);
+
+out:
+	return rc;
+}
+
+static ssize_t acache_orangefs_attr_store(struct kobject *kobj,
+					  struct attribute *attr,
+					  const char *buf,
+					  size_t len)
+{
+	struct acache_orangefs_attribute *attribute;
+	struct acache_orangefs_obj *acache_orangefs_obj;
+	int rc;
+
+	gossip_debug(GOSSIP_SYSFS_DEBUG,
+		     "acache_orangefs_attr_store: start\n");
+
+	attribute = container_of(attr, struct acache_orangefs_attribute, attr);
+	acache_orangefs_obj =
+		container_of(kobj, struct acache_orangefs_obj, kobj);
+
+	if (!attribute->store) {
+		rc = -EIO;
+		goto out;
+	}
+
+	rc = attribute->store(acache_orangefs_obj, attribute, buf, len);
+
+out:
+	return rc;
+}
+
+static const struct sysfs_ops acache_orangefs_sysfs_ops = {
+	.show = acache_orangefs_attr_show,
+	.store = acache_orangefs_attr_store,
+};
+
+static ssize_t capcache_orangefs_attr_show(struct kobject *kobj,
+					   struct attribute *attr,
+					   char *buf)
+{
+	struct capcache_orangefs_attribute *attribute;
+	struct capcache_orangefs_obj *capcache_orangefs_obj;
+	int rc;
+
+	attribute =
+		container_of(attr, struct capcache_orangefs_attribute, attr);
+	capcache_orangefs_obj =
+		container_of(kobj, struct capcache_orangefs_obj, kobj);
+
+	if (!attribute->show) {
+		rc = -EIO;
+		goto out;
+	}
+
+	rc = attribute->show(capcache_orangefs_obj, attribute, buf);
+
+out:
+	return rc;
+}
+
+static ssize_t capcache_orangefs_attr_store(struct kobject *kobj,
+					    struct attribute *attr,
+					    const char *buf,
+					    size_t len)
+{
+	struct capcache_orangefs_attribute *attribute;
+	struct capcache_orangefs_obj *capcache_orangefs_obj;
+	int rc;
+
+	gossip_debug(GOSSIP_SYSFS_DEBUG,
+		     "capcache_orangefs_attr_store: start\n");
+
+	attribute =
+		container_of(attr, struct capcache_orangefs_attribute, attr);
+	capcache_orangefs_obj =
+		container_of(kobj, struct capcache_orangefs_obj, kobj);
+
+	if (!attribute->store) {
+		rc = -EIO;
+		goto out;
+	}
+
+	rc = attribute->store(capcache_orangefs_obj, attribute, buf, len);
+
+out:
+	return rc;
+}
+
+static const struct sysfs_ops capcache_orangefs_sysfs_ops = {
+	.show = capcache_orangefs_attr_show,
+	.store = capcache_orangefs_attr_store,
+};
+
+static ssize_t ccache_orangefs_attr_show(struct kobject *kobj,
+					 struct attribute *attr,
+					 char *buf)
+{
+	struct ccache_orangefs_attribute *attribute;
+	struct ccache_orangefs_obj *ccache_orangefs_obj;
+	int rc;
+
+	attribute =
+		container_of(attr, struct ccache_orangefs_attribute, attr);
+	ccache_orangefs_obj =
+		container_of(kobj, struct ccache_orangefs_obj, kobj);
+
+	if (!attribute->show) {
+		rc = -EIO;
+		goto out;
+	}
+
+	rc = attribute->show(ccache_orangefs_obj, attribute, buf);
+
+out:
+	return rc;
+}
+
+static ssize_t ccache_orangefs_attr_store(struct kobject *kobj,
+					  struct attribute *attr,
+					  const char *buf,
+					  size_t len)
+{
+	struct ccache_orangefs_attribute *attribute;
+	struct ccache_orangefs_obj *ccache_orangefs_obj;
+	int rc;
+
+	gossip_debug(GOSSIP_SYSFS_DEBUG,
+		     "ccache_orangefs_attr_store: start\n");
+
+	attribute =
+		container_of(attr, struct ccache_orangefs_attribute, attr);
+	ccache_orangefs_obj =
+		container_of(kobj, struct ccache_orangefs_obj, kobj);
+
+	if (!attribute->store) {
+		rc = -EIO;
+		goto out;
+	}
+
+	rc = attribute->store(ccache_orangefs_obj, attribute, buf, len);
+
+out:
+	return rc;
+}
+
+static const struct sysfs_ops ccache_orangefs_sysfs_ops = {
+	.show = ccache_orangefs_attr_show,
+	.store = ccache_orangefs_attr_store,
+};
+
+static ssize_t ncache_orangefs_attr_show(struct kobject *kobj,
+					 struct attribute *attr,
+					 char *buf)
+{
+	struct ncache_orangefs_attribute *attribute;
+	struct ncache_orangefs_obj *ncache_orangefs_obj;
+	int rc;
+
+	attribute = container_of(attr, struct ncache_orangefs_attribute, attr);
+	ncache_orangefs_obj =
+		container_of(kobj, struct ncache_orangefs_obj, kobj);
+
+	if (!attribute->show) {
+		rc = -EIO;
+		goto out;
+	}
+
+	rc = attribute->show(ncache_orangefs_obj, attribute, buf);
+
+out:
+	return rc;
+}
+
+static ssize_t ncache_orangefs_attr_store(struct kobject *kobj,
+					  struct attribute *attr,
+					  const char *buf,
+					  size_t len)
+{
+	struct ncache_orangefs_attribute *attribute;
+	struct ncache_orangefs_obj *ncache_orangefs_obj;
+	int rc;
+
+	gossip_debug(GOSSIP_SYSFS_DEBUG,
+		     "ncache_orangefs_attr_store: start\n");
+
+	attribute = container_of(attr, struct ncache_orangefs_attribute, attr);
+	ncache_orangefs_obj =
+		container_of(kobj, struct ncache_orangefs_obj, kobj);
+
+	if (!attribute->store) {
+		rc = -EIO;
+		goto out;
+	}
+
+	rc = attribute->store(ncache_orangefs_obj, attribute, buf, len);
+
+out:
+	return rc;
+}
+
+static const struct sysfs_ops ncache_orangefs_sysfs_ops = {
+	.show = ncache_orangefs_attr_show,
+	.store = ncache_orangefs_attr_store,
+};
+
+static ssize_t pc_orangefs_attr_show(struct kobject *kobj,
+				     struct attribute *attr,
+				     char *buf)
+{
+	struct pc_orangefs_attribute *attribute;
+	struct pc_orangefs_obj *pc_orangefs_obj;
+	int rc;
+
+	attribute = container_of(attr, struct pc_orangefs_attribute, attr);
+	pc_orangefs_obj =
+		container_of(kobj, struct pc_orangefs_obj, kobj);
+
+	if (!attribute->show) {
+		rc = -EIO;
+		goto out;
+	}
+
+	rc = attribute->show(pc_orangefs_obj, attribute, buf);
+
+out:
+	return rc;
+}
+
+static const struct sysfs_ops pc_orangefs_sysfs_ops = {
+	.show = pc_orangefs_attr_show,
+};
+
+static ssize_t stats_orangefs_attr_show(struct kobject *kobj,
+					struct attribute *attr,
+					char *buf)
+{
+	struct stats_orangefs_attribute *attribute;
+	struct stats_orangefs_obj *stats_orangefs_obj;
+	int rc;
+
+	attribute = container_of(attr, struct stats_orangefs_attribute, attr);
+	stats_orangefs_obj =
+		container_of(kobj, struct stats_orangefs_obj, kobj);
+
+	if (!attribute->show) {
+		rc = -EIO;
+		goto out;
+	}
+
+	rc = attribute->show(stats_orangefs_obj, attribute, buf);
+
+out:
+	return rc;
+}
+
+static const struct sysfs_ops stats_orangefs_sysfs_ops = {
+	.show = stats_orangefs_attr_show,
+};
+
+static void orangefs_release(struct kobject *kobj)
+{
+	struct orangefs_obj *orangefs_obj;
+
+	orangefs_obj = container_of(kobj, struct orangefs_obj, kobj);
+	kfree(orangefs_obj);
+}
+
+static void acache_orangefs_release(struct kobject *kobj)
+{
+	struct acache_orangefs_obj *acache_orangefs_obj;
+
+	acache_orangefs_obj =
+		container_of(kobj, struct acache_orangefs_obj, kobj);
+	kfree(acache_orangefs_obj);
+}
+
+static void capcache_orangefs_release(struct kobject *kobj)
+{
+	struct capcache_orangefs_obj *capcache_orangefs_obj;
+
+	capcache_orangefs_obj =
+		container_of(kobj, struct capcache_orangefs_obj, kobj);
+	kfree(capcache_orangefs_obj);
+}
+
+static void ccache_orangefs_release(struct kobject *kobj)
+{
+	struct ccache_orangefs_obj *ccache_orangefs_obj;
+
+	ccache_orangefs_obj =
+		container_of(kobj, struct ccache_orangefs_obj, kobj);
+	kfree(ccache_orangefs_obj);
+}
+
+static void ncache_orangefs_release(struct kobject *kobj)
+{
+	struct ncache_orangefs_obj *ncache_orangefs_obj;
+
+	ncache_orangefs_obj =
+		container_of(kobj, struct ncache_orangefs_obj, kobj);
+	kfree(ncache_orangefs_obj);
+}
+
+static void pc_orangefs_release(struct kobject *kobj)
+{
+	struct pc_orangefs_obj *pc_orangefs_obj;
+
+	pc_orangefs_obj =
+		container_of(kobj, struct pc_orangefs_obj, kobj);
+	kfree(pc_orangefs_obj);
+}
+
+static void stats_orangefs_release(struct kobject *kobj)
+{
+	struct stats_orangefs_obj *stats_orangefs_obj;
+
+	stats_orangefs_obj =
+		container_of(kobj, struct stats_orangefs_obj, kobj);
+	kfree(stats_orangefs_obj);
+}
+
+static ssize_t sysfs_int_show(char *kobj_id, char *buf, void *attr)
+{
+	int rc = -EIO;
+	struct orangefs_attribute *orangefs_attr;
+	struct stats_orangefs_attribute *stats_orangefs_attr;
+
+	gossip_debug(GOSSIP_SYSFS_DEBUG, "sysfs_int_show: id:%s:\n", kobj_id);
+
+	if (!strcmp(kobj_id, ORANGEFS_KOBJ_ID)) {
+		orangefs_attr = (struct orangefs_attribute *)attr;
+
+		if (!strcmp(orangefs_attr->attr.name, "op_timeout_secs")) {
+			rc = scnprintf(buf,
+				       PAGE_SIZE,
+				       "%d\n",
+				       op_timeout_secs);
+			goto out;
+		} else if (!strcmp(orangefs_attr->attr.name,
+				   "slot_timeout_secs")) {
+			rc = scnprintf(buf,
+				       PAGE_SIZE,
+				       "%d\n",
+				       slot_timeout_secs);
+			goto out;
+		} else {
+			goto out;
+		}
+
+	} else if (!strcmp(kobj_id, STATS_KOBJ_ID)) {
+		stats_orangefs_attr = (struct stats_orangefs_attribute *)attr;
+
+		if (!strcmp(stats_orangefs_attr->attr.name, "reads")) {
+			rc = scnprintf(buf,
+				       PAGE_SIZE,
+				       "%lu\n",
+				       g_orangefs_stats.reads);
+			goto out;
+		} else if (!strcmp(stats_orangefs_attr->attr.name, "writes")) {
+			rc = scnprintf(buf,
+				       PAGE_SIZE,
+				       "%lu\n",
+				       g_orangefs_stats.writes);
+			goto out;
+		} else {
+			goto out;
+		}
+	}
+
+out:
+
+	return rc;
+}
+
+static ssize_t int_orangefs_show(struct orangefs_obj *orangefs_obj,
+				 struct orangefs_attribute *attr,
+				 char *buf)
+{
+	int rc;
+
+	gossip_debug(GOSSIP_SYSFS_DEBUG,
+		     "int_orangefs_show:start attr->attr.name:%s:\n",
+		     attr->attr.name);
+
+	rc = sysfs_int_show(ORANGEFS_KOBJ_ID, buf, (void *) attr);
+
+	return rc;
+}
+
+static ssize_t int_stats_show(struct stats_orangefs_obj *stats_orangefs_obj,
+			struct stats_orangefs_attribute *attr,
+			char *buf)
+{
+	int rc;
+
+	gossip_debug(GOSSIP_SYSFS_DEBUG,
+		     "int_stats_show:start attr->attr.name:%s:\n",
+		     attr->attr.name);
+
+	rc = sysfs_int_show(STATS_KOBJ_ID, buf, (void *) attr);
+
+	return rc;
+}
+
+static ssize_t int_store(struct orangefs_obj *orangefs_obj,
+			 struct orangefs_attribute *attr,
+			 const char *buf,
+			 size_t count)
+{
+	int rc = 0;
+
+	gossip_debug(GOSSIP_SYSFS_DEBUG,
+		     "int_store: start attr->attr.name:%s: buf:%s:\n",
+		     attr->attr.name, buf);
+
+	if (!strcmp(attr->attr.name, "op_timeout_secs")) {
+		rc = kstrtoint(buf, 0, &op_timeout_secs);
+		goto out;
+	} else if (!strcmp(attr->attr.name, "slot_timeout_secs")) {
+		rc = kstrtoint(buf, 0, &slot_timeout_secs);
+		goto out;
+	} else {
+		goto out;
+	}
+
+out:
+	if (rc)
+		rc = -EINVAL;
+	else
+		rc = count;
+
+	return rc;
+}
+
+/*
+ * obtain attribute values from userspace with a service operation.
+ */
+static int sysfs_service_op_show(char *kobj_id, char *buf, void *attr)
+{
+	struct orangefs_kernel_op_s *new_op = NULL;
+	int rc = 0;
+	char *ser_op_type = NULL;
+	struct orangefs_attribute *orangefs_attr;
+	struct acache_orangefs_attribute *acache_attr;
+	struct capcache_orangefs_attribute *capcache_attr;
+	struct ccache_orangefs_attribute *ccache_attr;
+	struct ncache_orangefs_attribute *ncache_attr;
+	struct pc_orangefs_attribute *pc_attr;
+	__u32 op_alloc_type;
+
+	gossip_debug(GOSSIP_SYSFS_DEBUG,
+		     "sysfs_service_op_show: id:%s:\n",
+		     kobj_id);
+
+	if (strcmp(kobj_id, PC_KOBJ_ID))
+		op_alloc_type = ORANGEFS_VFS_OP_PARAM;
+	else
+		op_alloc_type = ORANGEFS_VFS_OP_PERF_COUNT;
+
+	new_op = op_alloc(op_alloc_type);
+	if (!new_op)
+		return -ENOMEM;
+
+	/* Can't do a service_operation if the client is not running... */
+	rc = is_daemon_in_service();
+	if (rc) {
+		pr_info("%s: Client not running :%d:\n",
+			__func__,
+			is_daemon_in_service());
+		goto out;
+	}
+
+	if (strcmp(kobj_id, PC_KOBJ_ID))
+		new_op->upcall.req.param.type = ORANGEFS_PARAM_REQUEST_GET;
+
+	if (!strcmp(kobj_id, ORANGEFS_KOBJ_ID)) {
+		orangefs_attr = (struct orangefs_attribute *)attr;
+
+		if (!strcmp(orangefs_attr->attr.name, "perf_history_size"))
+			new_op->upcall.req.param.op =
+				ORANGEFS_PARAM_REQUEST_OP_PERF_HISTORY_SIZE;
+		else if (!strcmp(orangefs_attr->attr.name,
+				 "perf_time_interval_secs"))
+			new_op->upcall.req.param.op =
+				ORANGEFS_PARAM_REQUEST_OP_PERF_TIME_INTERVAL_SECS;
+		else if (!strcmp(orangefs_attr->attr.name,
+				 "perf_counter_reset"))
+			new_op->upcall.req.param.op =
+				ORANGEFS_PARAM_REQUEST_OP_PERF_RESET;
+
+	} else if (!strcmp(kobj_id, ACACHE_KOBJ_ID)) {
+		acache_attr = (struct acache_orangefs_attribute *)attr;
+
+		if (!strcmp(acache_attr->attr.name, "timeout_msecs"))
+			new_op->upcall.req.param.op =
+				ORANGEFS_PARAM_REQUEST_OP_ACACHE_TIMEOUT_MSECS;
+
+		if (!strcmp(acache_attr->attr.name, "hard_limit"))
+			new_op->upcall.req.param.op =
+				ORANGEFS_PARAM_REQUEST_OP_ACACHE_HARD_LIMIT;
+
+		if (!strcmp(acache_attr->attr.name, "soft_limit"))
+			new_op->upcall.req.param.op =
+				ORANGEFS_PARAM_REQUEST_OP_ACACHE_SOFT_LIMIT;
+
+		if (!strcmp(acache_attr->attr.name, "reclaim_percentage"))
+			new_op->upcall.req.param.op =
+			  ORANGEFS_PARAM_REQUEST_OP_ACACHE_RECLAIM_PERCENTAGE;
+
+	} else if (!strcmp(kobj_id, CAPCACHE_KOBJ_ID)) {
+		capcache_attr = (struct capcache_orangefs_attribute *)attr;
+
+		if (!strcmp(capcache_attr->attr.name, "timeout_secs"))
+			new_op->upcall.req.param.op =
+				ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_TIMEOUT_SECS;
+
+		if (!strcmp(capcache_attr->attr.name, "hard_limit"))
+			new_op->upcall.req.param.op =
+				ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_HARD_LIMIT;
+
+		if (!strcmp(capcache_attr->attr.name, "soft_limit"))
+			new_op->upcall.req.param.op =
+				ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_SOFT_LIMIT;
+
+		if (!strcmp(capcache_attr->attr.name, "reclaim_percentage"))
+			new_op->upcall.req.param.op =
+			  ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_RECLAIM_PERCENTAGE;
+
+	} else if (!strcmp(kobj_id, CCACHE_KOBJ_ID)) {
+		ccache_attr = (struct ccache_orangefs_attribute *)attr;
+
+		if (!strcmp(ccache_attr->attr.name, "timeout_secs"))
+			new_op->upcall.req.param.op =
+				ORANGEFS_PARAM_REQUEST_OP_CCACHE_TIMEOUT_SECS;
+
+		if (!strcmp(ccache_attr->attr.name, "hard_limit"))
+			new_op->upcall.req.param.op =
+				ORANGEFS_PARAM_REQUEST_OP_CCACHE_HARD_LIMIT;
+
+		if (!strcmp(ccache_attr->attr.name, "soft_limit"))
+			new_op->upcall.req.param.op =
+				ORANGEFS_PARAM_REQUEST_OP_CCACHE_SOFT_LIMIT;
+
+		if (!strcmp(ccache_attr->attr.name, "reclaim_percentage"))
+			new_op->upcall.req.param.op =
+			  ORANGEFS_PARAM_REQUEST_OP_CCACHE_RECLAIM_PERCENTAGE;
+
+	} else if (!strcmp(kobj_id, NCACHE_KOBJ_ID)) {
+		ncache_attr = (struct ncache_orangefs_attribute *)attr;
+
+		if (!strcmp(ncache_attr->attr.name, "timeout_msecs"))
+			new_op->upcall.req.param.op =
+				ORANGEFS_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS;
+
+		if (!strcmp(ncache_attr->attr.name, "hard_limit"))
+			new_op->upcall.req.param.op =
+				ORANGEFS_PARAM_REQUEST_OP_NCACHE_HARD_LIMIT;
+
+		if (!strcmp(ncache_attr->attr.name, "soft_limit"))
+			new_op->upcall.req.param.op =
+				ORANGEFS_PARAM_REQUEST_OP_NCACHE_SOFT_LIMIT;
+
+		if (!strcmp(ncache_attr->attr.name, "reclaim_percentage"))
+			new_op->upcall.req.param.op =
+			  ORANGEFS_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE;
+
+	} else if (!strcmp(kobj_id, PC_KOBJ_ID)) {
+		pc_attr = (struct pc_orangefs_attribute *)attr;
+
+		if (!strcmp(pc_attr->attr.name, ACACHE_KOBJ_ID))
+			new_op->upcall.req.perf_count.type =
+				ORANGEFS_PERF_COUNT_REQUEST_ACACHE;
+
+		if (!strcmp(pc_attr->attr.name, CAPCACHE_KOBJ_ID))
+			new_op->upcall.req.perf_count.type =
+				ORANGEFS_PERF_COUNT_REQUEST_CAPCACHE;
+
+		if (!strcmp(pc_attr->attr.name, NCACHE_KOBJ_ID))
+			new_op->upcall.req.perf_count.type =
+				ORANGEFS_PERF_COUNT_REQUEST_NCACHE;
+
+	} else {
+		gossip_err("sysfs_service_op_show: unknown kobj_id:%s:\n",
+			   kobj_id);
+		rc = -EINVAL;
+		goto out;
+	}
+
+
+	if (strcmp(kobj_id, PC_KOBJ_ID))
+		ser_op_type = "orangefs_param";
+	else
+		ser_op_type = "orangefs_perf_count";
+
+	/*
+	 * The service_operation will return an errno return code on
+	 * error, and zero on success.
+	 */
+	rc = service_operation(new_op, ser_op_type, ORANGEFS_OP_INTERRUPTIBLE);
+
+out:
+	if (!rc) {
+		if (strcmp(kobj_id, PC_KOBJ_ID)) {
+			rc = scnprintf(buf,
+				       PAGE_SIZE,
+				       "%d\n",
+				       (int)new_op->downcall.resp.param.value);
+		} else {
+			rc = scnprintf(
+				buf,
+				PAGE_SIZE,
+				"%s",
+				new_op->downcall.resp.perf_count.buffer);
+		}
+	}
+
+	op_release(new_op);
+
+	return rc;
+
+}
+
+static ssize_t service_orangefs_show(struct orangefs_obj *orangefs_obj,
+				     struct orangefs_attribute *attr,
+				     char *buf)
+{
+	int rc = 0;
+
+	rc = sysfs_service_op_show(ORANGEFS_KOBJ_ID, buf, (void *)attr);
+
+	return rc;
+}
+
+static ssize_t
+	service_acache_show(struct acache_orangefs_obj *acache_orangefs_obj,
+			    struct acache_orangefs_attribute *attr,
+			    char *buf)
+{
+	int rc = 0;
+
+	rc = sysfs_service_op_show(ACACHE_KOBJ_ID, buf, (void *)attr);
+
+	return rc;
+}
+
+static ssize_t service_capcache_show(struct capcache_orangefs_obj
+					*capcache_orangefs_obj,
+				     struct capcache_orangefs_attribute *attr,
+				     char *buf)
+{
+	int rc = 0;
+
+	rc = sysfs_service_op_show(CAPCACHE_KOBJ_ID, buf, (void *)attr);
+
+	return rc;
+}
+
+static ssize_t service_ccache_show(struct ccache_orangefs_obj
+					*ccache_orangefs_obj,
+				   struct ccache_orangefs_attribute *attr,
+				   char *buf)
+{
+	int rc = 0;
+
+	rc = sysfs_service_op_show(CCACHE_KOBJ_ID, buf, (void *)attr);
+
+	return rc;
+}
+
+static ssize_t
+	service_ncache_show(struct ncache_orangefs_obj *ncache_orangefs_obj,
+			    struct ncache_orangefs_attribute *attr,
+			    char *buf)
+{
+	int rc = 0;
+
+	rc = sysfs_service_op_show(NCACHE_KOBJ_ID, buf, (void *)attr);
+
+	return rc;
+}
+
+static ssize_t
+	service_pc_show(struct pc_orangefs_obj *pc_orangefs_obj,
+			    struct pc_orangefs_attribute *attr,
+			    char *buf)
+{
+	int rc = 0;
+
+	rc = sysfs_service_op_show(PC_KOBJ_ID, buf, (void *)attr);
+
+	return rc;
+}
+
+/*
+ * pass attribute values back to userspace with a service operation.
+ *
+ * We have to do a memory allocation, an sscanf and a service operation.
+ * And we have to evaluate what the user entered, to make sure the
+ * value is within the range supported by the attribute. So, there's
+ * a lot of return code checking and mapping going on here.
+ *
+ * We want to return 1 if we think everything went OK, and
+ * EINVAL if not.
+ */
+static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr)
+{
+	struct orangefs_kernel_op_s *new_op = NULL;
+	int val = 0;
+	int rc = 0;
+	struct orangefs_attribute *orangefs_attr;
+	struct acache_orangefs_attribute *acache_attr;
+	struct capcache_orangefs_attribute *capcache_attr;
+	struct ccache_orangefs_attribute *ccache_attr;
+	struct ncache_orangefs_attribute *ncache_attr;
+
+	gossip_debug(GOSSIP_SYSFS_DEBUG,
+		     "sysfs_service_op_store: id:%s:\n",
+		     kobj_id);
+
+	new_op = op_alloc(ORANGEFS_VFS_OP_PARAM);
+	if (!new_op)
+		return -EINVAL; /* sic */
+
+	/* Can't do a service_operation if the client is not running... */
+	rc = is_daemon_in_service();
+	if (rc) {
+		pr_info("%s: Client not running :%d:\n",
+			__func__,
+			is_daemon_in_service());
+		goto out;
+	}
+
+	/*
+	 * The value we want to send back to userspace is in buf.
+	 */
+	rc = kstrtoint(buf, 0, &val);
+	if (rc)
+		goto out;
+
+	if (!strcmp(kobj_id, ORANGEFS_KOBJ_ID)) {
+		orangefs_attr = (struct orangefs_attribute *)attr;
+
+		if (!strcmp(orangefs_attr->attr.name, "perf_history_size")) {
+			if (val > 0) {
+				new_op->upcall.req.param.op =
+				  ORANGEFS_PARAM_REQUEST_OP_PERF_HISTORY_SIZE;
+			} else {
+				rc = 0;
+				goto out;
+			}
+		} else if (!strcmp(orangefs_attr->attr.name,
+				   "perf_time_interval_secs")) {
+			if (val > 0) {
+				new_op->upcall.req.param.op =
+				ORANGEFS_PARAM_REQUEST_OP_PERF_TIME_INTERVAL_SECS;
+			} else {
+				rc = 0;
+				goto out;
+			}
+		} else if (!strcmp(orangefs_attr->attr.name,
+				   "perf_counter_reset")) {
+			if ((val == 0) || (val == 1)) {
+				new_op->upcall.req.param.op =
+					ORANGEFS_PARAM_REQUEST_OP_PERF_RESET;
+			} else {
+				rc = 0;
+				goto out;
+			}
+		}
+
+	} else if (!strcmp(kobj_id, ACACHE_KOBJ_ID)) {
+		acache_attr = (struct acache_orangefs_attribute *)attr;
+
+		if (!strcmp(acache_attr->attr.name, "hard_limit")) {
+			if (val > -1) {
+				new_op->upcall.req.param.op =
+				  ORANGEFS_PARAM_REQUEST_OP_ACACHE_HARD_LIMIT;
+			} else {
+				rc = 0;
+				goto out;
+			}
+		} else if (!strcmp(acache_attr->attr.name, "soft_limit")) {
+			if (val > -1) {
+				new_op->upcall.req.param.op =
+				  ORANGEFS_PARAM_REQUEST_OP_ACACHE_SOFT_LIMIT;
+			} else {
+				rc = 0;
+				goto out;
+			}
+		} else if (!strcmp(acache_attr->attr.name,
+				   "reclaim_percentage")) {
+			if ((val > -1) && (val < 101)) {
+				new_op->upcall.req.param.op =
+				  ORANGEFS_PARAM_REQUEST_OP_ACACHE_RECLAIM_PERCENTAGE;
+			} else {
+				rc = 0;
+				goto out;
+			}
+		} else if (!strcmp(acache_attr->attr.name, "timeout_msecs")) {
+			if (val > -1) {
+				new_op->upcall.req.param.op =
+				  ORANGEFS_PARAM_REQUEST_OP_ACACHE_TIMEOUT_MSECS;
+			} else {
+				rc = 0;
+				goto out;
+			}
+		}
+
+	} else if (!strcmp(kobj_id, CAPCACHE_KOBJ_ID)) {
+		capcache_attr = (struct capcache_orangefs_attribute *)attr;
+
+		if (!strcmp(capcache_attr->attr.name, "hard_limit")) {
+			if (val > -1) {
+				new_op->upcall.req.param.op =
+				  ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_HARD_LIMIT;
+			} else {
+				rc = 0;
+				goto out;
+			}
+		} else if (!strcmp(capcache_attr->attr.name, "soft_limit")) {
+			if (val > -1) {
+				new_op->upcall.req.param.op =
+				  ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_SOFT_LIMIT;
+			} else {
+				rc = 0;
+				goto out;
+			}
+		} else if (!strcmp(capcache_attr->attr.name,
+				   "reclaim_percentage")) {
+			if ((val > -1) && (val < 101)) {
+				new_op->upcall.req.param.op =
+				  ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_RECLAIM_PERCENTAGE;
+			} else {
+				rc = 0;
+				goto out;
+			}
+		} else if (!strcmp(capcache_attr->attr.name, "timeout_secs")) {
+			if (val > -1) {
+				new_op->upcall.req.param.op =
+				  ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_TIMEOUT_SECS;
+			} else {
+				rc = 0;
+				goto out;
+			}
+		}
+
+	} else if (!strcmp(kobj_id, CCACHE_KOBJ_ID)) {
+		ccache_attr = (struct ccache_orangefs_attribute *)attr;
+
+		if (!strcmp(ccache_attr->attr.name, "hard_limit")) {
+			if (val > -1) {
+				new_op->upcall.req.param.op =
+				  ORANGEFS_PARAM_REQUEST_OP_CCACHE_HARD_LIMIT;
+			} else {
+				rc = 0;
+				goto out;
+			}
+		} else if (!strcmp(ccache_attr->attr.name, "soft_limit")) {
+			if (val > -1) {
+				new_op->upcall.req.param.op =
+				  ORANGEFS_PARAM_REQUEST_OP_CCACHE_SOFT_LIMIT;
+			} else {
+				rc = 0;
+				goto out;
+			}
+		} else if (!strcmp(ccache_attr->attr.name,
+				   "reclaim_percentage")) {
+			if ((val > -1) && (val < 101)) {
+				new_op->upcall.req.param.op =
+				  ORANGEFS_PARAM_REQUEST_OP_CCACHE_RECLAIM_PERCENTAGE;
+			} else {
+				rc = 0;
+				goto out;
+			}
+		} else if (!strcmp(ccache_attr->attr.name, "timeout_secs")) {
+			if (val > -1) {
+				new_op->upcall.req.param.op =
+				  ORANGEFS_PARAM_REQUEST_OP_CCACHE_TIMEOUT_SECS;
+			} else {
+				rc = 0;
+				goto out;
+			}
+		}
+
+	} else if (!strcmp(kobj_id, NCACHE_KOBJ_ID)) {
+		ncache_attr = (struct ncache_orangefs_attribute *)attr;
+
+		if (!strcmp(ncache_attr->attr.name, "hard_limit")) {
+			if (val > -1) {
+				new_op->upcall.req.param.op =
+				  ORANGEFS_PARAM_REQUEST_OP_NCACHE_HARD_LIMIT;
+			} else {
+				rc = 0;
+				goto out;
+			}
+		} else if (!strcmp(ncache_attr->attr.name, "soft_limit")) {
+			if (val > -1) {
+				new_op->upcall.req.param.op =
+				  ORANGEFS_PARAM_REQUEST_OP_NCACHE_SOFT_LIMIT;
+			} else {
+				rc = 0;
+				goto out;
+			}
+		} else if (!strcmp(ncache_attr->attr.name,
+				   "reclaim_percentage")) {
+			if ((val > -1) && (val < 101)) {
+				new_op->upcall.req.param.op =
+					ORANGEFS_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE;
+			} else {
+				rc = 0;
+				goto out;
+			}
+		} else if (!strcmp(ncache_attr->attr.name, "timeout_msecs")) {
+			if (val > -1) {
+				new_op->upcall.req.param.op =
+				  ORANGEFS_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS;
+			} else {
+				rc = 0;
+				goto out;
+			}
+		}
+
+	} else {
+		gossip_err("sysfs_service_op_store: unknown kobj_id:%s:\n",
+			   kobj_id);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	new_op->upcall.req.param.type = ORANGEFS_PARAM_REQUEST_SET;
+
+	new_op->upcall.req.param.value = val;
+
+	/*
+	 * The service_operation will return a errno return code on
+	 * error, and zero on success.
+	 */
+	rc = service_operation(new_op, "orangefs_param", ORANGEFS_OP_INTERRUPTIBLE);
+
+	if (rc < 0) {
+		gossip_err("sysfs_service_op_store: service op returned:%d:\n",
+			rc);
+		rc = 0;
+	} else {
+		rc = 1;
+	}
+
+out:
+	op_release(new_op);
+
+	if (rc == -ENOMEM || rc == 0)
+		rc = -EINVAL;
+
+	return rc;
+}
+
+static ssize_t
+	service_orangefs_store(struct orangefs_obj *orangefs_obj,
+			       struct orangefs_attribute *attr,
+			       const char *buf,
+			       size_t count)
+{
+	int rc = 0;
+
+	rc = sysfs_service_op_store(ORANGEFS_KOBJ_ID, buf, (void *) attr);
+
+	/* rc should have an errno value if the service_op went bad. */
+	if (rc == 1)
+		rc = count;
+
+	return rc;
+}
+
+static ssize_t
+	service_acache_store(struct acache_orangefs_obj *acache_orangefs_obj,
+			     struct acache_orangefs_attribute *attr,
+			     const char *buf,
+			     size_t count)
+{
+	int rc = 0;
+
+	rc = sysfs_service_op_store(ACACHE_KOBJ_ID, buf, (void *) attr);
+
+	/* rc should have an errno value if the service_op went bad. */
+	if (rc == 1)
+		rc = count;
+
+	return rc;
+}
+
+static ssize_t
+	service_capcache_store(struct capcache_orangefs_obj
+				*capcache_orangefs_obj,
+			       struct capcache_orangefs_attribute *attr,
+			       const char *buf,
+			       size_t count)
+{
+	int rc = 0;
+
+	rc = sysfs_service_op_store(CAPCACHE_KOBJ_ID, buf, (void *) attr);
+
+	/* rc should have an errno value if the service_op went bad. */
+	if (rc == 1)
+		rc = count;
+
+	return rc;
+}
+
+static ssize_t service_ccache_store(struct ccache_orangefs_obj
+					*ccache_orangefs_obj,
+				    struct ccache_orangefs_attribute *attr,
+				    const char *buf,
+				    size_t count)
+{
+	int rc = 0;
+
+	rc = sysfs_service_op_store(CCACHE_KOBJ_ID, buf, (void *) attr);
+
+	/* rc should have an errno value if the service_op went bad. */
+	if (rc == 1)
+		rc = count;
+
+	return rc;
+}
+
+static ssize_t
+	service_ncache_store(struct ncache_orangefs_obj *ncache_orangefs_obj,
+			     struct ncache_orangefs_attribute *attr,
+			     const char *buf,
+			     size_t count)
+{
+	int rc = 0;
+
+	rc = sysfs_service_op_store(NCACHE_KOBJ_ID, buf, (void *) attr);
+
+	/* rc should have an errno value if the service_op went bad. */
+	if (rc == 1)
+		rc = count;
+
+	return rc;
+}
+
+static struct orangefs_attribute op_timeout_secs_attribute =
+	__ATTR(op_timeout_secs, 0664, int_orangefs_show, int_store);
+
+static struct orangefs_attribute slot_timeout_secs_attribute =
+	__ATTR(slot_timeout_secs, 0664, int_orangefs_show, int_store);
+
+static struct orangefs_attribute perf_counter_reset_attribute =
+	__ATTR(perf_counter_reset,
+	       0664,
+	       service_orangefs_show,
+	       service_orangefs_store);
+
+static struct orangefs_attribute perf_history_size_attribute =
+	__ATTR(perf_history_size,
+	       0664,
+	       service_orangefs_show,
+	       service_orangefs_store);
+
+static struct orangefs_attribute perf_time_interval_secs_attribute =
+	__ATTR(perf_time_interval_secs,
+	       0664,
+	       service_orangefs_show,
+	       service_orangefs_store);
+
+static struct attribute *orangefs_default_attrs[] = {
+	&op_timeout_secs_attribute.attr,
+	&slot_timeout_secs_attribute.attr,
+	&perf_counter_reset_attribute.attr,
+	&perf_history_size_attribute.attr,
+	&perf_time_interval_secs_attribute.attr,
+	NULL,
+};
+
+static struct kobj_type orangefs_ktype = {
+	.sysfs_ops = &orangefs_sysfs_ops,
+	.release = orangefs_release,
+	.default_attrs = orangefs_default_attrs,
+};
+
+static struct acache_orangefs_attribute acache_hard_limit_attribute =
+	__ATTR(hard_limit,
+	       0664,
+	       service_acache_show,
+	       service_acache_store);
+
+static struct acache_orangefs_attribute acache_reclaim_percent_attribute =
+	__ATTR(reclaim_percentage,
+	       0664,
+	       service_acache_show,
+	       service_acache_store);
+
+static struct acache_orangefs_attribute acache_soft_limit_attribute =
+	__ATTR(soft_limit,
+	       0664,
+	       service_acache_show,
+	       service_acache_store);
+
+static struct acache_orangefs_attribute acache_timeout_msecs_attribute =
+	__ATTR(timeout_msecs,
+	       0664,
+	       service_acache_show,
+	       service_acache_store);
+
+static struct attribute *acache_orangefs_default_attrs[] = {
+	&acache_hard_limit_attribute.attr,
+	&acache_reclaim_percent_attribute.attr,
+	&acache_soft_limit_attribute.attr,
+	&acache_timeout_msecs_attribute.attr,
+	NULL,
+};
+
+static struct kobj_type acache_orangefs_ktype = {
+	.sysfs_ops = &acache_orangefs_sysfs_ops,
+	.release = acache_orangefs_release,
+	.default_attrs = acache_orangefs_default_attrs,
+};
+
+static struct capcache_orangefs_attribute capcache_hard_limit_attribute =
+	__ATTR(hard_limit,
+	       0664,
+	       service_capcache_show,
+	       service_capcache_store);
+
+static struct capcache_orangefs_attribute capcache_reclaim_percent_attribute =
+	__ATTR(reclaim_percentage,
+	       0664,
+	       service_capcache_show,
+	       service_capcache_store);
+
+static struct capcache_orangefs_attribute capcache_soft_limit_attribute =
+	__ATTR(soft_limit,
+	       0664,
+	       service_capcache_show,
+	       service_capcache_store);
+
+static struct capcache_orangefs_attribute capcache_timeout_secs_attribute =
+	__ATTR(timeout_secs,
+	       0664,
+	       service_capcache_show,
+	       service_capcache_store);
+
+static struct attribute *capcache_orangefs_default_attrs[] = {
+	&capcache_hard_limit_attribute.attr,
+	&capcache_reclaim_percent_attribute.attr,
+	&capcache_soft_limit_attribute.attr,
+	&capcache_timeout_secs_attribute.attr,
+	NULL,
+};
+
+static struct kobj_type capcache_orangefs_ktype = {
+	.sysfs_ops = &capcache_orangefs_sysfs_ops,
+	.release = capcache_orangefs_release,
+	.default_attrs = capcache_orangefs_default_attrs,
+};
+
+static struct ccache_orangefs_attribute ccache_hard_limit_attribute =
+	__ATTR(hard_limit,
+	       0664,
+	       service_ccache_show,
+	       service_ccache_store);
+
+static struct ccache_orangefs_attribute ccache_reclaim_percent_attribute =
+	__ATTR(reclaim_percentage,
+	       0664,
+	       service_ccache_show,
+	       service_ccache_store);
+
+static struct ccache_orangefs_attribute ccache_soft_limit_attribute =
+	__ATTR(soft_limit,
+	       0664,
+	       service_ccache_show,
+	       service_ccache_store);
+
+static struct ccache_orangefs_attribute ccache_timeout_secs_attribute =
+	__ATTR(timeout_secs,
+	       0664,
+	       service_ccache_show,
+	       service_ccache_store);
+
+static struct attribute *ccache_orangefs_default_attrs[] = {
+	&ccache_hard_limit_attribute.attr,
+	&ccache_reclaim_percent_attribute.attr,
+	&ccache_soft_limit_attribute.attr,
+	&ccache_timeout_secs_attribute.attr,
+	NULL,
+};
+
+static struct kobj_type ccache_orangefs_ktype = {
+	.sysfs_ops = &ccache_orangefs_sysfs_ops,
+	.release = ccache_orangefs_release,
+	.default_attrs = ccache_orangefs_default_attrs,
+};
+
+static struct ncache_orangefs_attribute ncache_hard_limit_attribute =
+	__ATTR(hard_limit,
+	       0664,
+	       service_ncache_show,
+	       service_ncache_store);
+
+static struct ncache_orangefs_attribute ncache_reclaim_percent_attribute =
+	__ATTR(reclaim_percentage,
+	       0664,
+	       service_ncache_show,
+	       service_ncache_store);
+
+static struct ncache_orangefs_attribute ncache_soft_limit_attribute =
+	__ATTR(soft_limit,
+	       0664,
+	       service_ncache_show,
+	       service_ncache_store);
+
+static struct ncache_orangefs_attribute ncache_timeout_msecs_attribute =
+	__ATTR(timeout_msecs,
+	       0664,
+	       service_ncache_show,
+	       service_ncache_store);
+
+static struct attribute *ncache_orangefs_default_attrs[] = {
+	&ncache_hard_limit_attribute.attr,
+	&ncache_reclaim_percent_attribute.attr,
+	&ncache_soft_limit_attribute.attr,
+	&ncache_timeout_msecs_attribute.attr,
+	NULL,
+};
+
+static struct kobj_type ncache_orangefs_ktype = {
+	.sysfs_ops = &ncache_orangefs_sysfs_ops,
+	.release = ncache_orangefs_release,
+	.default_attrs = ncache_orangefs_default_attrs,
+};
+
+static struct pc_orangefs_attribute pc_acache_attribute =
+	__ATTR(acache,
+	       0664,
+	       service_pc_show,
+	       NULL);
+
+static struct pc_orangefs_attribute pc_capcache_attribute =
+	__ATTR(capcache,
+	       0664,
+	       service_pc_show,
+	       NULL);
+
+static struct pc_orangefs_attribute pc_ncache_attribute =
+	__ATTR(ncache,
+	       0664,
+	       service_pc_show,
+	       NULL);
+
+static struct attribute *pc_orangefs_default_attrs[] = {
+	&pc_acache_attribute.attr,
+	&pc_capcache_attribute.attr,
+	&pc_ncache_attribute.attr,
+	NULL,
+};
+
+static struct kobj_type pc_orangefs_ktype = {
+	.sysfs_ops = &pc_orangefs_sysfs_ops,
+	.release = pc_orangefs_release,
+	.default_attrs = pc_orangefs_default_attrs,
+};
+
+static struct stats_orangefs_attribute stats_reads_attribute =
+	__ATTR(reads,
+	       0664,
+	       int_stats_show,
+	       NULL);
+
+static struct stats_orangefs_attribute stats_writes_attribute =
+	__ATTR(writes,
+	       0664,
+	       int_stats_show,
+	       NULL);
+
+static struct attribute *stats_orangefs_default_attrs[] = {
+	&stats_reads_attribute.attr,
+	&stats_writes_attribute.attr,
+	NULL,
+};
+
+static struct kobj_type stats_orangefs_ktype = {
+	.sysfs_ops = &stats_orangefs_sysfs_ops,
+	.release = stats_orangefs_release,
+	.default_attrs = stats_orangefs_default_attrs,
+};
+
+static struct orangefs_obj *orangefs_obj;
+static struct acache_orangefs_obj *acache_orangefs_obj;
+static struct capcache_orangefs_obj *capcache_orangefs_obj;
+static struct ccache_orangefs_obj *ccache_orangefs_obj;
+static struct ncache_orangefs_obj *ncache_orangefs_obj;
+static struct pc_orangefs_obj *pc_orangefs_obj;
+static struct stats_orangefs_obj *stats_orangefs_obj;
+
+int orangefs_sysfs_init(void)
+{
+	int rc = -EINVAL;
+
+	gossip_debug(GOSSIP_SYSFS_DEBUG, "orangefs_sysfs_init: start\n");
+
+	/* create /sys/fs/orangefs. */
+	orangefs_obj = kzalloc(sizeof(*orangefs_obj), GFP_KERNEL);
+	if (!orangefs_obj)
+		goto out;
+
+	rc = kobject_init_and_add(&orangefs_obj->kobj,
+				  &orangefs_ktype,
+				  fs_kobj,
+				  ORANGEFS_KOBJ_ID);
+
+	if (rc)
+		goto ofs_obj_bail;
+
+	kobject_uevent(&orangefs_obj->kobj, KOBJ_ADD);
+
+	/* create /sys/fs/orangefs/acache. */
+	acache_orangefs_obj = kzalloc(sizeof(*acache_orangefs_obj), GFP_KERNEL);
+	if (!acache_orangefs_obj) {
+		rc = -EINVAL;
+		goto ofs_obj_bail;
+	}
+
+	rc = kobject_init_and_add(&acache_orangefs_obj->kobj,
+				  &acache_orangefs_ktype,
+				  &orangefs_obj->kobj,
+				  ACACHE_KOBJ_ID);
+
+	if (rc)
+		goto acache_obj_bail;
+
+	kobject_uevent(&acache_orangefs_obj->kobj, KOBJ_ADD);
+
+	/* create /sys/fs/orangefs/capcache. */
+	capcache_orangefs_obj =
+		kzalloc(sizeof(*capcache_orangefs_obj), GFP_KERNEL);
+	if (!capcache_orangefs_obj) {
+		rc = -EINVAL;
+		goto acache_obj_bail;
+	}
+
+	rc = kobject_init_and_add(&capcache_orangefs_obj->kobj,
+				  &capcache_orangefs_ktype,
+				  &orangefs_obj->kobj,
+				  CAPCACHE_KOBJ_ID);
+	if (rc)
+		goto capcache_obj_bail;
+
+	kobject_uevent(&capcache_orangefs_obj->kobj, KOBJ_ADD);
+
+	/* create /sys/fs/orangefs/ccache. */
+	ccache_orangefs_obj =
+		kzalloc(sizeof(*ccache_orangefs_obj), GFP_KERNEL);
+	if (!ccache_orangefs_obj) {
+		rc = -EINVAL;
+		goto capcache_obj_bail;
+	}
+
+	rc = kobject_init_and_add(&ccache_orangefs_obj->kobj,
+				  &ccache_orangefs_ktype,
+				  &orangefs_obj->kobj,
+				  CCACHE_KOBJ_ID);
+	if (rc)
+		goto ccache_obj_bail;
+
+	kobject_uevent(&ccache_orangefs_obj->kobj, KOBJ_ADD);
+
+	/* create /sys/fs/orangefs/ncache. */
+	ncache_orangefs_obj = kzalloc(sizeof(*ncache_orangefs_obj), GFP_KERNEL);
+	if (!ncache_orangefs_obj) {
+		rc = -EINVAL;
+		goto ccache_obj_bail;
+	}
+
+	rc = kobject_init_and_add(&ncache_orangefs_obj->kobj,
+				  &ncache_orangefs_ktype,
+				  &orangefs_obj->kobj,
+				  NCACHE_KOBJ_ID);
+
+	if (rc)
+		goto ncache_obj_bail;
+
+	kobject_uevent(&ncache_orangefs_obj->kobj, KOBJ_ADD);
+
+	/* create /sys/fs/orangefs/perf_counters. */
+	pc_orangefs_obj = kzalloc(sizeof(*pc_orangefs_obj), GFP_KERNEL);
+	if (!pc_orangefs_obj) {
+		rc = -EINVAL;
+		goto ncache_obj_bail;
+	}
+
+	rc = kobject_init_and_add(&pc_orangefs_obj->kobj,
+				  &pc_orangefs_ktype,
+				  &orangefs_obj->kobj,
+				  "perf_counters");
+
+	if (rc)
+		goto pc_obj_bail;
+
+	kobject_uevent(&pc_orangefs_obj->kobj, KOBJ_ADD);
+
+	/* create /sys/fs/orangefs/stats. */
+	stats_orangefs_obj = kzalloc(sizeof(*stats_orangefs_obj), GFP_KERNEL);
+	if (!stats_orangefs_obj) {
+		rc = -EINVAL;
+		goto pc_obj_bail;
+	}
+
+	rc = kobject_init_and_add(&stats_orangefs_obj->kobj,
+				  &stats_orangefs_ktype,
+				  &orangefs_obj->kobj,
+				  STATS_KOBJ_ID);
+
+	if (rc)
+		goto stats_obj_bail;
+
+	kobject_uevent(&stats_orangefs_obj->kobj, KOBJ_ADD);
+	goto out;
+
+stats_obj_bail:
+		kobject_put(&stats_orangefs_obj->kobj);
+
+pc_obj_bail:
+		kobject_put(&pc_orangefs_obj->kobj);
+
+ncache_obj_bail:
+		kobject_put(&ncache_orangefs_obj->kobj);
+
+ccache_obj_bail:
+		kobject_put(&ccache_orangefs_obj->kobj);
+
+capcache_obj_bail:
+		kobject_put(&capcache_orangefs_obj->kobj);
+
+acache_obj_bail:
+		kobject_put(&acache_orangefs_obj->kobj);
+
+ofs_obj_bail:
+		kobject_put(&orangefs_obj->kobj);
+out:
+	return rc;
+}
+
+void orangefs_sysfs_exit(void)
+{
+	gossip_debug(GOSSIP_SYSFS_DEBUG, "orangefs_sysfs_exit: start\n");
+
+	kobject_put(&acache_orangefs_obj->kobj);
+	kobject_put(&capcache_orangefs_obj->kobj);
+	kobject_put(&ccache_orangefs_obj->kobj);
+	kobject_put(&ncache_orangefs_obj->kobj);
+	kobject_put(&pc_orangefs_obj->kobj);
+	kobject_put(&stats_orangefs_obj->kobj);
+
+	kobject_put(&orangefs_obj->kobj);
+}

diff --git a/fs/orangefs/orangefs-sysfs.h b/fs/orangefs/orangefs-sysfs.h
new file mode 100644
index 0000000..f0b7638
--- /dev/null
+++ b/fs/orangefs/orangefs-sysfs.h

@@ -0,0 +1,2 @@
+extern int orangefs_sysfs_init(void);
+extern void orangefs_sysfs_exit(void);

diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c
new file mode 100644
index 0000000..8277aba
--- /dev/null
+++ b/fs/orangefs/orangefs-utils.c

@@ -0,0 +1,1048 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-dev-proto.h"
+#include "orangefs-bufmap.h"
+
+__s32 fsid_of_op(struct orangefs_kernel_op_s *op)
+{
+	__s32 fsid = ORANGEFS_FS_ID_NULL;
+
+	if (op) {
+		switch (op->upcall.type) {
+		case ORANGEFS_VFS_OP_FILE_IO:
+			fsid = op->upcall.req.io.refn.fs_id;
+			break;
+		case ORANGEFS_VFS_OP_LOOKUP:
+			fsid = op->upcall.req.lookup.parent_refn.fs_id;
+			break;
+		case ORANGEFS_VFS_OP_CREATE:
+			fsid = op->upcall.req.create.parent_refn.fs_id;
+			break;
+		case ORANGEFS_VFS_OP_GETATTR:
+			fsid = op->upcall.req.getattr.refn.fs_id;
+			break;
+		case ORANGEFS_VFS_OP_REMOVE:
+			fsid = op->upcall.req.remove.parent_refn.fs_id;
+			break;
+		case ORANGEFS_VFS_OP_MKDIR:
+			fsid = op->upcall.req.mkdir.parent_refn.fs_id;
+			break;
+		case ORANGEFS_VFS_OP_READDIR:
+			fsid = op->upcall.req.readdir.refn.fs_id;
+			break;
+		case ORANGEFS_VFS_OP_SETATTR:
+			fsid = op->upcall.req.setattr.refn.fs_id;
+			break;
+		case ORANGEFS_VFS_OP_SYMLINK:
+			fsid = op->upcall.req.sym.parent_refn.fs_id;
+			break;
+		case ORANGEFS_VFS_OP_RENAME:
+			fsid = op->upcall.req.rename.old_parent_refn.fs_id;
+			break;
+		case ORANGEFS_VFS_OP_STATFS:
+			fsid = op->upcall.req.statfs.fs_id;
+			break;
+		case ORANGEFS_VFS_OP_TRUNCATE:
+			fsid = op->upcall.req.truncate.refn.fs_id;
+			break;
+		case ORANGEFS_VFS_OP_MMAP_RA_FLUSH:
+			fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
+			break;
+		case ORANGEFS_VFS_OP_FS_UMOUNT:
+			fsid = op->upcall.req.fs_umount.fs_id;
+			break;
+		case ORANGEFS_VFS_OP_GETXATTR:
+			fsid = op->upcall.req.getxattr.refn.fs_id;
+			break;
+		case ORANGEFS_VFS_OP_SETXATTR:
+			fsid = op->upcall.req.setxattr.refn.fs_id;
+			break;
+		case ORANGEFS_VFS_OP_LISTXATTR:
+			fsid = op->upcall.req.listxattr.refn.fs_id;
+			break;
+		case ORANGEFS_VFS_OP_REMOVEXATTR:
+			fsid = op->upcall.req.removexattr.refn.fs_id;
+			break;
+		case ORANGEFS_VFS_OP_FSYNC:
+			fsid = op->upcall.req.fsync.refn.fs_id;
+			break;
+		default:
+			break;
+		}
+	}
+	return fsid;
+}
+
+static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs)
+{
+	int flags = 0;
+	if (attrs->flags & ORANGEFS_IMMUTABLE_FL)
+		flags |= S_IMMUTABLE;
+	else
+		flags &= ~S_IMMUTABLE;
+	if (attrs->flags & ORANGEFS_APPEND_FL)
+		flags |= S_APPEND;
+	else
+		flags &= ~S_APPEND;
+	if (attrs->flags & ORANGEFS_NOATIME_FL)
+		flags |= S_NOATIME;
+	else
+		flags &= ~S_NOATIME;
+	return flags;
+}
+
+static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs)
+{
+	int perm_mode = 0;
+
+	if (attrs->perms & ORANGEFS_O_EXECUTE)
+		perm_mode |= S_IXOTH;
+	if (attrs->perms & ORANGEFS_O_WRITE)
+		perm_mode |= S_IWOTH;
+	if (attrs->perms & ORANGEFS_O_READ)
+		perm_mode |= S_IROTH;
+
+	if (attrs->perms & ORANGEFS_G_EXECUTE)
+		perm_mode |= S_IXGRP;
+	if (attrs->perms & ORANGEFS_G_WRITE)
+		perm_mode |= S_IWGRP;
+	if (attrs->perms & ORANGEFS_G_READ)
+		perm_mode |= S_IRGRP;
+
+	if (attrs->perms & ORANGEFS_U_EXECUTE)
+		perm_mode |= S_IXUSR;
+	if (attrs->perms & ORANGEFS_U_WRITE)
+		perm_mode |= S_IWUSR;
+	if (attrs->perms & ORANGEFS_U_READ)
+		perm_mode |= S_IRUSR;
+
+	if (attrs->perms & ORANGEFS_G_SGID)
+		perm_mode |= S_ISGID;
+	if (attrs->perms & ORANGEFS_U_SUID)
+		perm_mode |= S_ISUID;
+
+	return perm_mode;
+}
+
+/*
+ * NOTE: in kernel land, we never use the sys_attr->link_target for
+ * anything, so don't bother copying it into the sys_attr object here.
+ */
+static inline int copy_attributes_from_inode(struct inode *inode,
+					     struct ORANGEFS_sys_attr_s *attrs,
+					     struct iattr *iattr)
+{
+	umode_t tmp_mode;
+
+	if (!iattr || !inode || !attrs) {
+		gossip_err("NULL iattr (%p), inode (%p), attrs (%p) "
+			   "in copy_attributes_from_inode!\n",
+			   iattr,
+			   inode,
+			   attrs);
+		return -EINVAL;
+	}
+	/*
+	 * We need to be careful to only copy the attributes out of the
+	 * iattr object that we know are valid.
+	 */
+	attrs->mask = 0;
+	if (iattr->ia_valid & ATTR_UID) {
+		attrs->owner = from_kuid(current_user_ns(), iattr->ia_uid);
+		attrs->mask |= ORANGEFS_ATTR_SYS_UID;
+		gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
+	}
+	if (iattr->ia_valid & ATTR_GID) {
+		attrs->group = from_kgid(current_user_ns(), iattr->ia_gid);
+		attrs->mask |= ORANGEFS_ATTR_SYS_GID;
+		gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
+	}
+
+	if (iattr->ia_valid & ATTR_ATIME) {
+		attrs->mask |= ORANGEFS_ATTR_SYS_ATIME;
+		if (iattr->ia_valid & ATTR_ATIME_SET) {
+			attrs->atime = (time64_t)iattr->ia_atime.tv_sec;
+			attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET;
+		}
+	}
+	if (iattr->ia_valid & ATTR_MTIME) {
+		attrs->mask |= ORANGEFS_ATTR_SYS_MTIME;
+		if (iattr->ia_valid & ATTR_MTIME_SET) {
+			attrs->mtime = (time64_t)iattr->ia_mtime.tv_sec;
+			attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET;
+		}
+	}
+	if (iattr->ia_valid & ATTR_CTIME)
+		attrs->mask |= ORANGEFS_ATTR_SYS_CTIME;
+
+	/*
+	 * ORANGEFS cannot set size with a setattr operation.  Probably not likely
+	 * to be requested through the VFS, but just in case, don't worry about
+	 * ATTR_SIZE
+	 */
+
+	if (iattr->ia_valid & ATTR_MODE) {
+		tmp_mode = iattr->ia_mode;
+		if (tmp_mode & (S_ISVTX)) {
+			if (is_root_handle(inode)) {
+				/*
+				 * allow sticky bit to be set on root (since
+				 * it shows up that way by default anyhow),
+				 * but don't show it to the server
+				 */
+				tmp_mode -= S_ISVTX;
+			} else {
+				gossip_debug(GOSSIP_UTILS_DEBUG,
+					     "User attempted to set sticky bit on non-root directory; returning EINVAL.\n");
+				return -EINVAL;
+			}
+		}
+
+		if (tmp_mode & (S_ISUID)) {
+			gossip_debug(GOSSIP_UTILS_DEBUG,
+				     "Attempting to set setuid bit (not supported); returning EINVAL.\n");
+			return -EINVAL;
+		}
+
+		attrs->perms = ORANGEFS_util_translate_mode(tmp_mode);
+		attrs->mask |= ORANGEFS_ATTR_SYS_PERM;
+	}
+
+	return 0;
+}
+
+static int orangefs_inode_type(enum orangefs_ds_type objtype)
+{
+	if (objtype == ORANGEFS_TYPE_METAFILE)
+		return S_IFREG;
+	else if (objtype == ORANGEFS_TYPE_DIRECTORY)
+		return S_IFDIR;
+	else if (objtype == ORANGEFS_TYPE_SYMLINK)
+		return S_IFLNK;
+	else
+		return -1;
+}
+
+static int orangefs_inode_is_stale(struct inode *inode, int new,
+    struct ORANGEFS_sys_attr_s *attrs, char *link_target)
+{
+	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+	int type = orangefs_inode_type(attrs->objtype);
+	if (!new) {
+		/*
+		 * If the inode type or symlink target have changed then this
+		 * inode is stale.
+		 */
+		if (type == -1 || !(inode->i_mode & type)) {
+			orangefs_make_bad_inode(inode);
+			return 1;
+		}
+		if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
+		    link_target, ORANGEFS_NAME_MAX)) {
+			orangefs_make_bad_inode(inode);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+int orangefs_inode_getattr(struct inode *inode, int new, int size)
+{
+	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+	struct orangefs_kernel_op_s *new_op;
+	loff_t inode_size, rounded_up_size;
+	int ret, type;
+
+	gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
+	    get_khandle_from_ino(inode));
+
+	new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
+	if (!new_op)
+		return -ENOMEM;
+	new_op->upcall.req.getattr.refn = orangefs_inode->refn;
+	new_op->upcall.req.getattr.mask = size ?
+	    ORANGEFS_ATTR_SYS_ALL_NOHINT : ORANGEFS_ATTR_SYS_ALL_NOHINT_NOSIZE;
+
+	ret = service_operation(new_op, __func__,
+	    get_interruptible_flag(inode));
+	if (ret != 0)
+		goto out;
+
+	type = orangefs_inode_type(new_op->
+	    downcall.resp.getattr.attributes.objtype);
+	ret = orangefs_inode_is_stale(inode, new,
+	    &new_op->downcall.resp.getattr.attributes,
+	    new_op->downcall.resp.getattr.link_target);
+	if (ret) {
+		ret = -ESTALE;
+		goto out;
+	}
+
+	switch (type) {
+	case S_IFREG:
+		inode->i_flags = orangefs_inode_flags(&new_op->
+		    downcall.resp.getattr.attributes);
+		if (size) {
+			inode_size = (loff_t)new_op->
+			    downcall.resp.getattr.attributes.size;
+			rounded_up_size =
+			    (inode_size + (4096 - (inode_size % 4096)));
+			inode->i_size = inode_size;
+			orangefs_inode->blksize =
+			    new_op->downcall.resp.getattr.attributes.blksize;
+			spin_lock(&inode->i_lock);
+			inode->i_bytes = inode_size;
+			inode->i_blocks =
+			    (unsigned long)(rounded_up_size / 512);
+			spin_unlock(&inode->i_lock);
+		}
+		break;
+	case S_IFDIR:
+		inode->i_size = PAGE_SIZE;
+		orangefs_inode->blksize = (1 << inode->i_blkbits);
+		spin_lock(&inode->i_lock);
+		inode_set_bytes(inode, inode->i_size);
+		spin_unlock(&inode->i_lock);
+		set_nlink(inode, 1);
+		break;
+	case S_IFLNK:
+		if (new) {
+			inode->i_size = (loff_t)strlen(new_op->
+			    downcall.resp.getattr.link_target);
+			orangefs_inode->blksize = (1 << inode->i_blkbits);
+			strlcpy(orangefs_inode->link_target,
+			    new_op->downcall.resp.getattr.link_target,
+			    ORANGEFS_NAME_MAX);
+			inode->i_link = orangefs_inode->link_target;
+		}
+		break;
+	}
+
+	inode->i_uid = make_kuid(&init_user_ns, new_op->
+	    downcall.resp.getattr.attributes.owner);
+	inode->i_gid = make_kgid(&init_user_ns, new_op->
+	    downcall.resp.getattr.attributes.group);
+	inode->i_atime.tv_sec = (time64_t)new_op->
+	    downcall.resp.getattr.attributes.atime;
+	inode->i_mtime.tv_sec = (time64_t)new_op->
+	    downcall.resp.getattr.attributes.mtime;
+	inode->i_ctime.tv_sec = (time64_t)new_op->
+	    downcall.resp.getattr.attributes.ctime;
+	inode->i_atime.tv_nsec = 0;
+	inode->i_mtime.tv_nsec = 0;
+	inode->i_ctime.tv_nsec = 0;
+
+	/* special case: mark the root inode as sticky */
+	inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) |
+	    orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes);
+
+	ret = 0;
+out:
+	op_release(new_op);
+	return ret;
+}
+
+int orangefs_inode_check_changed(struct inode *inode)
+{
+	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+	struct orangefs_kernel_op_s *new_op;
+	int ret;
+
+	gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
+	    get_khandle_from_ino(inode));
+
+	new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
+	if (!new_op)
+		return -ENOMEM;
+	new_op->upcall.req.getattr.refn = orangefs_inode->refn;
+	new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE |
+	    ORANGEFS_ATTR_SYS_LNK_TARGET;
+
+	ret = service_operation(new_op, __func__,
+	    get_interruptible_flag(inode));
+	if (ret != 0)
+		goto out;
+
+	ret = orangefs_inode_is_stale(inode, 0,
+	    &new_op->downcall.resp.getattr.attributes,
+	    new_op->downcall.resp.getattr.link_target);
+out:
+	op_release(new_op);
+	return ret;
+}
+
+/*
+ * issues a orangefs setattr request to make sure the new attribute values
+ * take effect if successful.  returns 0 on success; -errno otherwise
+ */
+int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr)
+{
+	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+	struct orangefs_kernel_op_s *new_op;
+	int ret;
+
+	new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR);
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->upcall.req.setattr.refn = orangefs_inode->refn;
+	ret = copy_attributes_from_inode(inode,
+		       &new_op->upcall.req.setattr.attributes,
+		       iattr);
+	if (ret >= 0) {
+		ret = service_operation(new_op, __func__,
+				get_interruptible_flag(inode));
+
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "orangefs_inode_setattr: returning %d\n",
+			     ret);
+	}
+
+	op_release(new_op);
+
+	/*
+	 * successful setattr should clear the atime, mtime and
+	 * ctime flags.
+	 */
+	if (ret == 0) {
+		ClearAtimeFlag(orangefs_inode);
+		ClearMtimeFlag(orangefs_inode);
+		ClearCtimeFlag(orangefs_inode);
+		ClearModeFlag(orangefs_inode);
+	}
+
+	return ret;
+}
+
+int orangefs_flush_inode(struct inode *inode)
+{
+	/*
+	 * If it is a dirty inode, this function gets called.
+	 * Gather all the information that needs to be setattr'ed
+	 * Right now, this will only be used for mode, atime, mtime
+	 * and/or ctime.
+	 */
+	struct iattr wbattr;
+	int ret;
+	int mtime_flag;
+	int ctime_flag;
+	int atime_flag;
+	int mode_flag;
+	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+
+	memset(&wbattr, 0, sizeof(wbattr));
+
+	/*
+	 * check inode flags up front, and clear them if they are set.  This
+	 * will prevent multiple processes from all trying to flush the same
+	 * inode if they call close() simultaneously
+	 */
+	mtime_flag = MtimeFlag(orangefs_inode);
+	ClearMtimeFlag(orangefs_inode);
+	ctime_flag = CtimeFlag(orangefs_inode);
+	ClearCtimeFlag(orangefs_inode);
+	atime_flag = AtimeFlag(orangefs_inode);
+	ClearAtimeFlag(orangefs_inode);
+	mode_flag = ModeFlag(orangefs_inode);
+	ClearModeFlag(orangefs_inode);
+
+	/*  -- Lazy atime,mtime and ctime update --
+	 * Note: all times are dictated by server in the new scheme
+	 * and not by the clients
+	 *
+	 * Also mode updates are being handled now..
+	 */
+
+	if (mtime_flag)
+		wbattr.ia_valid |= ATTR_MTIME;
+	if (ctime_flag)
+		wbattr.ia_valid |= ATTR_CTIME;
+	if (atime_flag)
+		wbattr.ia_valid |= ATTR_ATIME;
+
+	if (mode_flag) {
+		wbattr.ia_mode = inode->i_mode;
+		wbattr.ia_valid |= ATTR_MODE;
+	}
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "*********** orangefs_flush_inode: %pU "
+		     "(ia_valid %d)\n",
+		     get_khandle_from_ino(inode),
+		     wbattr.ia_valid);
+	if (wbattr.ia_valid == 0) {
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "orangefs_flush_inode skipping setattr()\n");
+		return 0;
+	}
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "orangefs_flush_inode (%pU) writing mode %o\n",
+		     get_khandle_from_ino(inode),
+		     inode->i_mode);
+
+	ret = orangefs_inode_setattr(inode, &wbattr);
+
+	return ret;
+}
+
+int orangefs_unmount_sb(struct super_block *sb)
+{
+	int ret = -EINVAL;
+	struct orangefs_kernel_op_s *new_op = NULL;
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "orangefs_unmount_sb called on sb %p\n",
+		     sb);
+
+	new_op = op_alloc(ORANGEFS_VFS_OP_FS_UMOUNT);
+	if (!new_op)
+		return -ENOMEM;
+	new_op->upcall.req.fs_umount.id = ORANGEFS_SB(sb)->id;
+	new_op->upcall.req.fs_umount.fs_id = ORANGEFS_SB(sb)->fs_id;
+	strncpy(new_op->upcall.req.fs_umount.orangefs_config_server,
+		ORANGEFS_SB(sb)->devname,
+		ORANGEFS_MAX_SERVER_ADDR_LEN);
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Attempting ORANGEFS Unmount via host %s\n",
+		     new_op->upcall.req.fs_umount.orangefs_config_server);
+
+	ret = service_operation(new_op, "orangefs_fs_umount", 0);
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "orangefs_unmount: got return value of %d\n", ret);
+	if (ret)
+		sb = ERR_PTR(ret);
+	else
+		ORANGEFS_SB(sb)->mount_pending = 1;
+
+	op_release(new_op);
+	return ret;
+}
+
+void orangefs_make_bad_inode(struct inode *inode)
+{
+	if (is_root_handle(inode)) {
+		/*
+		 * if this occurs, the pvfs2-client-core was killed but we
+		 * can't afford to lose the inode operations and such
+		 * associated with the root handle in any case.
+		 */
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "*** NOT making bad root inode %pU\n",
+			     get_khandle_from_ino(inode));
+	} else {
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "*** making bad inode %pU\n",
+			     get_khandle_from_ino(inode));
+		make_bad_inode(inode);
+	}
+}
+
+/*
+ * The following is a very dirty hack that is now a permanent part of the
+ * ORANGEFS protocol. See protocol.h for more error definitions.
+ */
+
+/* The order matches include/orangefs-types.h in the OrangeFS source. */
+static int PINT_errno_mapping[] = {
+	0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM,
+	EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE,
+	EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG,
+	ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH,
+	EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM,
+	EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE,
+	ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE,
+	EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS,
+	ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY,
+	EACCES, ECONNRESET, ERANGE
+};
+
+int orangefs_normalize_to_errno(__s32 error_code)
+{
+	__u32 i;
+
+	/* Success */
+	if (error_code == 0) {
+		return 0;
+	/*
+	 * This shouldn't ever happen. If it does it should be fixed on the
+	 * server.
+	 */
+	} else if (error_code > 0) {
+		gossip_err("orangefs: error status receieved.\n");
+		gossip_err("orangefs: assuming error code is inverted.\n");
+		error_code = -error_code;
+	}
+
+	/*
+	 * XXX: This is very bad since error codes from ORANGEFS may not be
+	 * suitable for return into userspace.
+	 */
+
+	/*
+	 * Convert ORANGEFS error values into errno values suitable for return
+	 * from the kernel.
+	 */
+	if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) {
+		if (((-error_code) &
+		    (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT|
+		    ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) {
+			/*
+			 * cancellation error codes generally correspond to
+			 * a timeout from the client's perspective
+			 */
+			error_code = -ETIMEDOUT;
+		} else {
+			/* assume a default error code */
+			gossip_err("orangefs: warning: got error code without errno equivalent: %d.\n", error_code);
+			error_code = -EINVAL;
+		}
+
+	/* Convert ORANGEFS encoded errno values into regular errno values. */
+	} else if ((-error_code) & ORANGEFS_ERROR_BIT) {
+		i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS);
+		if (i < sizeof(PINT_errno_mapping)/sizeof(*PINT_errno_mapping))
+			error_code = -PINT_errno_mapping[i];
+		else
+			error_code = -EINVAL;
+
+	/*
+	 * Only ORANGEFS protocol error codes should ever come here. Otherwise
+	 * there is a bug somewhere.
+	 */
+	} else {
+		gossip_err("orangefs: orangefs_normalize_to_errno: got error code which is not from ORANGEFS.\n");
+	}
+	return error_code;
+}
+
+#define NUM_MODES 11
+__s32 ORANGEFS_util_translate_mode(int mode)
+{
+	int ret = 0;
+	int i = 0;
+	static int modes[NUM_MODES] = {
+		S_IXOTH, S_IWOTH, S_IROTH,
+		S_IXGRP, S_IWGRP, S_IRGRP,
+		S_IXUSR, S_IWUSR, S_IRUSR,
+		S_ISGID, S_ISUID
+	};
+	static int orangefs_modes[NUM_MODES] = {
+		ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ,
+		ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ,
+		ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ,
+		ORANGEFS_G_SGID, ORANGEFS_U_SUID
+	};
+
+	for (i = 0; i < NUM_MODES; i++)
+		if (mode & modes[i])
+			ret |= orangefs_modes[i];
+
+	return ret;
+}
+#undef NUM_MODES
+
+/*
+ * After obtaining a string representation of the client's debug
+ * keywords and their associated masks, this function is called to build an
+ * array of these values.
+ */
+int orangefs_prepare_cdm_array(char *debug_array_string)
+{
+	int i;
+	int rc = -EINVAL;
+	char *cds_head = NULL;
+	char *cds_delimiter = NULL;
+	int keyword_len = 0;
+
+	gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
+
+	/*
+	 * figure out how many elements the cdm_array needs.
+	 */
+	for (i = 0; i < strlen(debug_array_string); i++)
+		if (debug_array_string[i] == '\n')
+			cdm_element_count++;
+
+	if (!cdm_element_count) {
+		pr_info("No elements in client debug array string!\n");
+		goto out;
+	}
+
+	cdm_array =
+		kzalloc(cdm_element_count * sizeof(struct client_debug_mask),
+			GFP_KERNEL);
+	if (!cdm_array) {
+		pr_info("malloc failed for cdm_array!\n");
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	cds_head = debug_array_string;
+
+	for (i = 0; i < cdm_element_count; i++) {
+		cds_delimiter = strchr(cds_head, '\n');
+		*cds_delimiter = '\0';
+
+		keyword_len = strcspn(cds_head, " ");
+
+		cdm_array[i].keyword = kzalloc(keyword_len + 1, GFP_KERNEL);
+		if (!cdm_array[i].keyword) {
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		sscanf(cds_head,
+		       "%s %llx %llx",
+		       cdm_array[i].keyword,
+		       (unsigned long long *)&(cdm_array[i].mask1),
+		       (unsigned long long *)&(cdm_array[i].mask2));
+
+		if (!strcmp(cdm_array[i].keyword, ORANGEFS_VERBOSE))
+			client_verbose_index = i;
+
+		if (!strcmp(cdm_array[i].keyword, ORANGEFS_ALL))
+			client_all_index = i;
+
+		cds_head = cds_delimiter + 1;
+	}
+
+	rc = cdm_element_count;
+
+	gossip_debug(GOSSIP_UTILS_DEBUG, "%s: rc:%d:\n", __func__, rc);
+
+out:
+
+	return rc;
+
+}
+
+/*
+ * /sys/kernel/debug/orangefs/debug-help can be catted to
+ * see all the available kernel and client debug keywords.
+ *
+ * When the kernel boots, we have no idea what keywords the
+ * client supports, nor their associated masks.
+ *
+ * We pass through this function once at boot and stamp a
+ * boilerplate "we don't know" message for the client in the
+ * debug-help file. We pass through here again when the client
+ * starts and then we can fill out the debug-help file fully.
+ *
+ * The client might be restarted any number of times between
+ * reboots, we only build the debug-help file the first time.
+ */
+int orangefs_prepare_debugfs_help_string(int at_boot)
+{
+	int rc = -EINVAL;
+	int i;
+	int byte_count = 0;
+	char *client_title = "Client Debug Keywords:\n";
+	char *kernel_title = "Kernel Debug Keywords:\n";
+
+	gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
+
+	if (at_boot) {
+		byte_count += strlen(HELP_STRING_UNINITIALIZED);
+		client_title = HELP_STRING_UNINITIALIZED;
+	} else {
+		/*
+		 * fill the client keyword/mask array and remember
+		 * how many elements there were.
+		 */
+		cdm_element_count =
+			orangefs_prepare_cdm_array(client_debug_array_string);
+		if (cdm_element_count <= 0)
+			goto out;
+
+		/* Count the bytes destined for debug_help_string. */
+		byte_count += strlen(client_title);
+
+		for (i = 0; i < cdm_element_count; i++) {
+			byte_count += strlen(cdm_array[i].keyword + 2);
+			if (byte_count >= DEBUG_HELP_STRING_SIZE) {
+				pr_info("%s: overflow 1!\n", __func__);
+				goto out;
+			}
+		}
+
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "%s: cdm_element_count:%d:\n",
+			     __func__,
+			     cdm_element_count);
+	}
+
+	byte_count += strlen(kernel_title);
+	for (i = 0; i < num_kmod_keyword_mask_map; i++) {
+		byte_count +=
+			strlen(s_kmod_keyword_mask_map[i].keyword + 2);
+		if (byte_count >= DEBUG_HELP_STRING_SIZE) {
+			pr_info("%s: overflow 2!\n", __func__);
+			goto out;
+		}
+	}
+
+	/* build debug_help_string. */
+	debug_help_string = kzalloc(DEBUG_HELP_STRING_SIZE, GFP_KERNEL);
+	if (!debug_help_string) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	strcat(debug_help_string, client_title);
+
+	if (!at_boot) {
+		for (i = 0; i < cdm_element_count; i++) {
+			strcat(debug_help_string, "\t");
+			strcat(debug_help_string, cdm_array[i].keyword);
+			strcat(debug_help_string, "\n");
+		}
+	}
+
+	strcat(debug_help_string, "\n");
+	strcat(debug_help_string, kernel_title);
+
+	for (i = 0; i < num_kmod_keyword_mask_map; i++) {
+		strcat(debug_help_string, "\t");
+		strcat(debug_help_string, s_kmod_keyword_mask_map[i].keyword);
+		strcat(debug_help_string, "\n");
+	}
+
+	rc = 0;
+
+out:
+
+	return rc;
+
+}
+
+/*
+ * kernel = type 0
+ * client = type 1
+ */
+void debug_mask_to_string(void *mask, int type)
+{
+	int i;
+	int len = 0;
+	char *debug_string;
+	int element_count = 0;
+
+	gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
+
+	if (type) {
+		debug_string = client_debug_string;
+		element_count = cdm_element_count;
+	} else {
+		debug_string = kernel_debug_string;
+		element_count = num_kmod_keyword_mask_map;
+	}
+
+	memset(debug_string, 0, ORANGEFS_MAX_DEBUG_STRING_LEN);
+
+	/*
+	 * Some keywords, like "all" or "verbose", are amalgams of
+	 * numerous other keywords. Make a special check for those
+	 * before grinding through the whole mask only to find out
+	 * later...
+	 */
+	if (check_amalgam_keyword(mask, type))
+		goto out;
+
+	/* Build the debug string. */
+	for (i = 0; i < element_count; i++)
+		if (type)
+			do_c_string(mask, i);
+		else
+			do_k_string(mask, i);
+
+	len = strlen(debug_string);
+
+	if ((len) && (type))
+		client_debug_string[len - 1] = '\0';
+	else if (len)
+		kernel_debug_string[len - 1] = '\0';
+	else if (type)
+		strcpy(client_debug_string, "none");
+	else
+		strcpy(kernel_debug_string, "none");
+
+out:
+gossip_debug(GOSSIP_UTILS_DEBUG, "%s: string:%s:\n", __func__, debug_string);
+
+	return;
+
+}
+
+void do_k_string(void *k_mask, int index)
+{
+	__u64 *mask = (__u64 *) k_mask;
+
+	if (keyword_is_amalgam((char *) s_kmod_keyword_mask_map[index].keyword))
+		goto out;
+
+	if (*mask & s_kmod_keyword_mask_map[index].mask_val) {
+		if ((strlen(kernel_debug_string) +
+		     strlen(s_kmod_keyword_mask_map[index].keyword))
+			< ORANGEFS_MAX_DEBUG_STRING_LEN - 1) {
+				strcat(kernel_debug_string,
+				       s_kmod_keyword_mask_map[index].keyword);
+				strcat(kernel_debug_string, ",");
+			} else {
+				gossip_err("%s: overflow!\n", __func__);
+				strcpy(kernel_debug_string, ORANGEFS_ALL);
+				goto out;
+			}
+	}
+
+out:
+
+	return;
+}
+
+void do_c_string(void *c_mask, int index)
+{
+	struct client_debug_mask *mask = (struct client_debug_mask *) c_mask;
+
+	if (keyword_is_amalgam(cdm_array[index].keyword))
+		goto out;
+
+	if ((mask->mask1 & cdm_array[index].mask1) ||
+	    (mask->mask2 & cdm_array[index].mask2)) {
+		if ((strlen(client_debug_string) +
+		     strlen(cdm_array[index].keyword) + 1)
+			< ORANGEFS_MAX_DEBUG_STRING_LEN - 2) {
+				strcat(client_debug_string,
+				       cdm_array[index].keyword);
+				strcat(client_debug_string, ",");
+			} else {
+				gossip_err("%s: overflow!\n", __func__);
+				strcpy(client_debug_string, ORANGEFS_ALL);
+				goto out;
+			}
+	}
+out:
+	return;
+}
+
+int keyword_is_amalgam(char *keyword)
+{
+	int rc = 0;
+
+	if ((!strcmp(keyword, ORANGEFS_ALL)) || (!strcmp(keyword, ORANGEFS_VERBOSE)))
+		rc = 1;
+
+	return rc;
+}
+
+/*
+ * kernel = type 0
+ * client = type 1
+ *
+ * return 1 if we found an amalgam.
+ */
+int check_amalgam_keyword(void *mask, int type)
+{
+	__u64 *k_mask;
+	struct client_debug_mask *c_mask;
+	int k_all_index = num_kmod_keyword_mask_map - 1;
+	int rc = 0;
+
+	if (type) {
+		c_mask = (struct client_debug_mask *) mask;
+
+		if ((c_mask->mask1 == cdm_array[client_all_index].mask1) &&
+		    (c_mask->mask2 == cdm_array[client_all_index].mask2)) {
+			strcpy(client_debug_string, ORANGEFS_ALL);
+			rc = 1;
+			goto out;
+		}
+
+		if ((c_mask->mask1 == cdm_array[client_verbose_index].mask1) &&
+		    (c_mask->mask2 == cdm_array[client_verbose_index].mask2)) {
+			strcpy(client_debug_string, ORANGEFS_VERBOSE);
+			rc = 1;
+			goto out;
+		}
+
+	} else {
+		k_mask = (__u64 *) mask;
+
+		if (*k_mask >= s_kmod_keyword_mask_map[k_all_index].mask_val) {
+			strcpy(kernel_debug_string, ORANGEFS_ALL);
+			rc = 1;
+			goto out;
+		}
+	}
+
+out:
+
+	return rc;
+}
+
+/*
+ * kernel = type 0
+ * client = type 1
+ */
+void debug_string_to_mask(char *debug_string, void *mask, int type)
+{
+	char *unchecked_keyword;
+	int i;
+	char *strsep_fodder = kstrdup(debug_string, GFP_KERNEL);
+	char *original_pointer;
+	int element_count = 0;
+	struct client_debug_mask *c_mask;
+	__u64 *k_mask;
+
+	gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
+
+	if (type) {
+		c_mask = (struct client_debug_mask *)mask;
+		element_count = cdm_element_count;
+	} else {
+		k_mask = (__u64 *)mask;
+		*k_mask = 0;
+		element_count = num_kmod_keyword_mask_map;
+	}
+
+	original_pointer = strsep_fodder;
+	while ((unchecked_keyword = strsep(&strsep_fodder, ",")))
+		if (strlen(unchecked_keyword)) {
+			for (i = 0; i < element_count; i++)
+				if (type)
+					do_c_mask(i,
+						  unchecked_keyword,
+						  &c_mask);
+				else
+					do_k_mask(i,
+						  unchecked_keyword,
+						  &k_mask);
+		}
+
+	kfree(original_pointer);
+}
+
+void do_c_mask(int i,
+	       char *unchecked_keyword,
+	       struct client_debug_mask **sane_mask)
+{
+
+	if (!strcmp(cdm_array[i].keyword, unchecked_keyword)) {
+		(**sane_mask).mask1 = (**sane_mask).mask1 | cdm_array[i].mask1;
+		(**sane_mask).mask2 = (**sane_mask).mask2 | cdm_array[i].mask2;
+	}
+}
+
+void do_k_mask(int i, char *unchecked_keyword, __u64 **sane_mask)
+{
+
+	if (!strcmp(s_kmod_keyword_mask_map[i].keyword, unchecked_keyword))
+		**sane_mask = (**sane_mask) |
+				s_kmod_keyword_mask_map[i].mask_val;
+}

diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h
new file mode 100644
index 0000000..50578a2
--- /dev/null
+++ b/fs/orangefs/protocol.h

@@ -0,0 +1,452 @@
+#include <linux/types.h>
+#include <linux/spinlock_types.h>
+#include <linux/slab.h>
+#include <linux/ioctl.h>
+
+extern struct client_debug_mask *cdm_array;
+extern char *debug_help_string;
+extern int help_string_initialized;
+extern struct dentry *debug_dir;
+extern struct dentry *help_file_dentry;
+extern struct dentry *client_debug_dentry;
+extern const struct file_operations debug_help_fops;
+extern int client_all_index;
+extern int client_verbose_index;
+extern int cdm_element_count;
+#define DEBUG_HELP_STRING_SIZE 4096
+#define HELP_STRING_UNINITIALIZED \
+	"Client Debug Keywords are unknown until the first time\n" \
+	"the client is started after boot.\n"
+#define ORANGEFS_KMOD_DEBUG_HELP_FILE "debug-help"
+#define ORANGEFS_KMOD_DEBUG_FILE "kernel-debug"
+#define ORANGEFS_CLIENT_DEBUG_FILE "client-debug"
+#define ORANGEFS_VERBOSE "verbose"
+#define ORANGEFS_ALL "all"
+
+/* pvfs2-config.h ***********************************************************/
+#define ORANGEFS_VERSION_MAJOR 2
+#define ORANGEFS_VERSION_MINOR 9
+#define ORANGEFS_VERSION_SUB 0
+
+/* khandle stuff  ***********************************************************/
+
+/*
+ * The 2.9 core will put 64 bit handles in here like this:
+ *    1234 0000 0000 5678
+ * The 3.0 and beyond cores will put 128 bit handles in here like this:
+ *    1234 5678 90AB CDEF
+ * The kernel module will always use the first four bytes and
+ * the last four bytes as an inum.
+ */
+struct orangefs_khandle {
+	unsigned char u[16];
+}  __aligned(8);
+
+/*
+ * kernel version of an object ref.
+ */
+struct orangefs_object_kref {
+	struct orangefs_khandle khandle;
+	__s32 fs_id;
+	__s32 __pad1;
+};
+
+/*
+ * compare 2 khandles assumes little endian thus from large address to
+ * small address
+ */
+static inline int ORANGEFS_khandle_cmp(const struct orangefs_khandle *kh1,
+				   const struct orangefs_khandle *kh2)
+{
+	int i;
+
+	for (i = 15; i >= 0; i--) {
+		if (kh1->u[i] > kh2->u[i])
+			return 1;
+		if (kh1->u[i] < kh2->u[i])
+			return -1;
+	}
+
+	return 0;
+}
+
+static inline void ORANGEFS_khandle_to(const struct orangefs_khandle *kh,
+				   void *p, int size)
+{
+
+	memset(p, 0, size);
+	memcpy(p, kh->u, 16);
+
+}
+
+static inline void ORANGEFS_khandle_from(struct orangefs_khandle *kh,
+				     void *p, int size)
+{
+	memset(kh, 0, 16);
+	memcpy(kh->u, p, 16);
+
+}
+
+/* pvfs2-types.h ************************************************************/
+typedef __u32 ORANGEFS_uid;
+typedef __u32 ORANGEFS_gid;
+typedef __s32 ORANGEFS_fs_id;
+typedef __u32 ORANGEFS_permissions;
+typedef __u64 ORANGEFS_time;
+typedef __s64 ORANGEFS_size;
+typedef __u64 ORANGEFS_flags;
+typedef __u64 ORANGEFS_ds_position;
+typedef __s32 ORANGEFS_error;
+typedef __s64 ORANGEFS_offset;
+
+#define ORANGEFS_SUPER_MAGIC 0x20030528
+
+/*
+ * ORANGEFS error codes are a signed 32-bit integer. Error codes are negative, but
+ * the sign is stripped before decoding.
+ */
+
+/* Bit 31 is not used since it is the sign. */
+
+/*
+ * Bit 30 specifies that this is a ORANGEFS error. A ORANGEFS error is either an
+ * encoded errno value or a ORANGEFS protocol error.
+ */
+#define ORANGEFS_ERROR_BIT (1 << 30)
+
+/*
+ * Bit 29 specifies that this is a ORANGEFS protocol error and not an encoded
+ * errno value.
+ */
+#define ORANGEFS_NON_ERRNO_ERROR_BIT (1 << 29)
+
+/*
+ * Bits 9, 8, and 7 specify the error class, which encodes the section of
+ * server code the error originated in for logging purposes. It is not used
+ * in the kernel except to be masked out.
+ */
+#define ORANGEFS_ERROR_CLASS_BITS 0x380
+
+/* Bits 6 - 0 are reserved for the actual error code. */
+#define ORANGEFS_ERROR_NUMBER_BITS 0x7f
+
+/* Encoded errno values decoded by PINT_errno_mapping in orangefs-utils.c. */
+
+/* Our own ORANGEFS protocol error codes. */
+#define ORANGEFS_ECANCEL    (1|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT)
+#define ORANGEFS_EDEVINIT   (2|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT)
+#define ORANGEFS_EDETAIL    (3|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT)
+#define ORANGEFS_EHOSTNTFD  (4|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT)
+#define ORANGEFS_EADDRNTFD  (5|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT)
+#define ORANGEFS_ENORECVR   (6|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT)
+#define ORANGEFS_ETRYAGAIN  (7|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT)
+#define ORANGEFS_ENOTPVFS   (8|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT)
+#define ORANGEFS_ESECURITY  (9|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT)
+
+/* permission bits */
+#define ORANGEFS_O_EXECUTE (1 << 0)
+#define ORANGEFS_O_WRITE   (1 << 1)
+#define ORANGEFS_O_READ    (1 << 2)
+#define ORANGEFS_G_EXECUTE (1 << 3)
+#define ORANGEFS_G_WRITE   (1 << 4)
+#define ORANGEFS_G_READ    (1 << 5)
+#define ORANGEFS_U_EXECUTE (1 << 6)
+#define ORANGEFS_U_WRITE   (1 << 7)
+#define ORANGEFS_U_READ    (1 << 8)
+/* no ORANGEFS_U_VTX (sticky bit) */
+#define ORANGEFS_G_SGID    (1 << 10)
+#define ORANGEFS_U_SUID    (1 << 11)
+
+/* definition taken from stdint.h */
+#define INT32_MAX (2147483647)
+#define ORANGEFS_ITERATE_START    (INT32_MAX - 1)
+#define ORANGEFS_ITERATE_END      (INT32_MAX - 2)
+#define ORANGEFS_ITERATE_NEXT     (INT32_MAX - 3)
+#define ORANGEFS_READDIR_START ORANGEFS_ITERATE_START
+#define ORANGEFS_READDIR_END   ORANGEFS_ITERATE_END
+#define ORANGEFS_IMMUTABLE_FL FS_IMMUTABLE_FL
+#define ORANGEFS_APPEND_FL    FS_APPEND_FL
+#define ORANGEFS_NOATIME_FL   FS_NOATIME_FL
+#define ORANGEFS_MIRROR_FL    0x01000000ULL
+#define ORANGEFS_O_EXECUTE (1 << 0)
+#define ORANGEFS_FS_ID_NULL       ((__s32)0)
+#define ORANGEFS_ATTR_SYS_UID                   (1 << 0)
+#define ORANGEFS_ATTR_SYS_GID                   (1 << 1)
+#define ORANGEFS_ATTR_SYS_PERM                  (1 << 2)
+#define ORANGEFS_ATTR_SYS_ATIME                 (1 << 3)
+#define ORANGEFS_ATTR_SYS_CTIME                 (1 << 4)
+#define ORANGEFS_ATTR_SYS_MTIME                 (1 << 5)
+#define ORANGEFS_ATTR_SYS_TYPE                  (1 << 6)
+#define ORANGEFS_ATTR_SYS_ATIME_SET             (1 << 7)
+#define ORANGEFS_ATTR_SYS_MTIME_SET             (1 << 8)
+#define ORANGEFS_ATTR_SYS_SIZE                  (1 << 20)
+#define ORANGEFS_ATTR_SYS_LNK_TARGET            (1 << 24)
+#define ORANGEFS_ATTR_SYS_DFILE_COUNT           (1 << 25)
+#define ORANGEFS_ATTR_SYS_DIRENT_COUNT          (1 << 26)
+#define ORANGEFS_ATTR_SYS_BLKSIZE               (1 << 28)
+#define ORANGEFS_ATTR_SYS_MIRROR_COPIES_COUNT   (1 << 29)
+#define ORANGEFS_ATTR_SYS_COMMON_ALL	\
+	(ORANGEFS_ATTR_SYS_UID	|	\
+	 ORANGEFS_ATTR_SYS_GID	|	\
+	 ORANGEFS_ATTR_SYS_PERM	|	\
+	 ORANGEFS_ATTR_SYS_ATIME	|	\
+	 ORANGEFS_ATTR_SYS_CTIME	|	\
+	 ORANGEFS_ATTR_SYS_MTIME	|	\
+	 ORANGEFS_ATTR_SYS_TYPE)
+
+#define ORANGEFS_ATTR_SYS_ALL_SETABLE		\
+(ORANGEFS_ATTR_SYS_COMMON_ALL-ORANGEFS_ATTR_SYS_TYPE)
+
+#define ORANGEFS_ATTR_SYS_ALL_NOHINT			\
+	(ORANGEFS_ATTR_SYS_COMMON_ALL		|	\
+	 ORANGEFS_ATTR_SYS_SIZE			|	\
+	 ORANGEFS_ATTR_SYS_LNK_TARGET		|	\
+	 ORANGEFS_ATTR_SYS_DFILE_COUNT		|	\
+	 ORANGEFS_ATTR_SYS_MIRROR_COPIES_COUNT	|	\
+	 ORANGEFS_ATTR_SYS_DIRENT_COUNT		|	\
+	 ORANGEFS_ATTR_SYS_BLKSIZE)
+
+#define ORANGEFS_ATTR_SYS_ALL_NOHINT_NOSIZE		\
+	(ORANGEFS_ATTR_SYS_COMMON_ALL		|	\
+	 ORANGEFS_ATTR_SYS_LNK_TARGET		|	\
+	 ORANGEFS_ATTR_SYS_DFILE_COUNT		|	\
+	 ORANGEFS_ATTR_SYS_MIRROR_COPIES_COUNT	|	\
+	 ORANGEFS_ATTR_SYS_DIRENT_COUNT		|	\
+	 ORANGEFS_ATTR_SYS_BLKSIZE)
+
+#define ORANGEFS_XATTR_REPLACE 0x2
+#define ORANGEFS_XATTR_CREATE  0x1
+#define ORANGEFS_MAX_SERVER_ADDR_LEN 256
+#define ORANGEFS_NAME_MAX                256
+/*
+ * max extended attribute name len as imposed by the VFS and exploited for the
+ * upcall request types.
+ * NOTE: Please retain them as multiples of 8 even if you wish to change them
+ * This is *NECESSARY* for supporting 32 bit user-space binaries on a 64-bit
+ * kernel. Due to implementation within DBPF, this really needs to be
+ * ORANGEFS_NAME_MAX, which it was the same value as, but no reason to let it
+ * break if that changes in the future.
+ */
+#define ORANGEFS_MAX_XATTR_NAMELEN   ORANGEFS_NAME_MAX	/* Not the same as
+						 * XATTR_NAME_MAX defined
+						 * by <linux/xattr.h>
+						 */
+#define ORANGEFS_MAX_XATTR_VALUELEN  8192	/* Not the same as XATTR_SIZE_MAX
+					 * defined by <linux/xattr.h>
+					 */
+#define ORANGEFS_MAX_XATTR_LISTLEN   16	/* Not the same as XATTR_LIST_MAX
+					 * defined by <linux/xattr.h>
+					 */
+/*
+ * ORANGEFS I/O operation types, used in both system and server interfaces.
+ */
+enum ORANGEFS_io_type {
+	ORANGEFS_IO_READ = 1,
+	ORANGEFS_IO_WRITE = 2
+};
+
+/*
+ * If this enum is modified the server parameters related to the precreate pool
+ * batch and low threshold sizes may need to be modified  to reflect this
+ * change.
+ */
+enum orangefs_ds_type {
+	ORANGEFS_TYPE_NONE = 0,
+	ORANGEFS_TYPE_METAFILE = (1 << 0),
+	ORANGEFS_TYPE_DATAFILE = (1 << 1),
+	ORANGEFS_TYPE_DIRECTORY = (1 << 2),
+	ORANGEFS_TYPE_SYMLINK = (1 << 3),
+	ORANGEFS_TYPE_DIRDATA = (1 << 4),
+	ORANGEFS_TYPE_INTERNAL = (1 << 5)	/* for the server's private use */
+};
+
+/*
+ * ORANGEFS_certificate simply stores a buffer with the buffer size.
+ * The buffer can be converted to an OpenSSL X509 struct for use.
+ */
+struct ORANGEFS_certificate {
+	__u32 buf_size;
+	unsigned char *buf;
+};
+
+/*
+ * A credential identifies a user and is signed by the client/user
+ * private key.
+ */
+struct ORANGEFS_credential {
+	__u32 userid;	/* user id */
+	__u32 num_groups;	/* length of group_array */
+	__u32 *group_array;	/* groups for which the user is a member */
+	char *issuer;		/* alias of the issuing server */
+	__u64 timeout;	/* seconds after epoch to time out */
+	__u32 sig_size;	/* length of the signature in bytes */
+	unsigned char *signature;	/* digital signature */
+	struct ORANGEFS_certificate certificate;	/* user certificate buffer */
+};
+#define extra_size_ORANGEFS_credential (ORANGEFS_REQ_LIMIT_GROUPS	*	\
+				    sizeof(__u32)		+	\
+				    ORANGEFS_REQ_LIMIT_ISSUER	+	\
+				    ORANGEFS_REQ_LIMIT_SIGNATURE	+	\
+				    extra_size_ORANGEFS_certificate)
+
+/* This structure is used by the VFS-client interaction alone */
+struct ORANGEFS_keyval_pair {
+	char key[ORANGEFS_MAX_XATTR_NAMELEN];
+	__s32 key_sz;	/* __s32 for portable, fixed-size structures */
+	__s32 val_sz;
+	char val[ORANGEFS_MAX_XATTR_VALUELEN];
+};
+
+/* pvfs2-sysint.h ***********************************************************/
+/* Describes attributes for a file, directory, or symlink. */
+struct ORANGEFS_sys_attr_s {
+	__u32 owner;
+	__u32 group;
+	__u32 perms;
+	__u64 atime;
+	__u64 mtime;
+	__u64 ctime;
+	__s64 size;
+
+	/* NOTE: caller must free if valid */
+	char *link_target;
+
+	/* Changed to __s32 so that size of structure does not change */
+	__s32 dfile_count;
+
+	/* Changed to __s32 so that size of structure does not change */
+	__s32 distr_dir_servers_initial;
+
+	/* Changed to __s32 so that size of structure does not change */
+	__s32 distr_dir_servers_max;
+
+	/* Changed to __s32 so that size of structure does not change */
+	__s32 distr_dir_split_size;
+
+	__u32 mirror_copies_count;
+
+	/* NOTE: caller must free if valid */
+	char *dist_name;
+
+	/* NOTE: caller must free if valid */
+	char *dist_params;
+
+	__s64 dirent_count;
+	enum orangefs_ds_type objtype;
+	__u64 flags;
+	__u32 mask;
+	__s64 blksize;
+};
+
+#define ORANGEFS_LOOKUP_LINK_NO_FOLLOW 0
+
+/* pint-dev.h ***************************************************************/
+
+/* parameter structure used in ORANGEFS_DEV_DEBUG ioctl command */
+struct dev_mask_info_s {
+	enum {
+		KERNEL_MASK,
+		CLIENT_MASK,
+	} mask_type;
+	__u64 mask_value;
+};
+
+struct dev_mask2_info_s {
+	__u64 mask1_value;
+	__u64 mask2_value;
+};
+
+/* pvfs2-util.h *************************************************************/
+__s32 ORANGEFS_util_translate_mode(int mode);
+
+/* pvfs2-debug.h ************************************************************/
+#include "orangefs-debug.h"
+
+/* pvfs2-internal.h *********************************************************/
+#define llu(x) (unsigned long long)(x)
+#define lld(x) (long long)(x)
+
+/* pint-dev-shared.h ********************************************************/
+#define ORANGEFS_DEV_MAGIC 'k'
+
+#define ORANGEFS_READDIR_DEFAULT_DESC_COUNT  5
+
+#define DEV_GET_MAGIC           0x1
+#define DEV_GET_MAX_UPSIZE      0x2
+#define DEV_GET_MAX_DOWNSIZE    0x3
+#define DEV_MAP                 0x4
+#define DEV_REMOUNT_ALL         0x5
+#define DEV_DEBUG               0x6
+#define DEV_UPSTREAM            0x7
+#define DEV_CLIENT_MASK         0x8
+#define DEV_CLIENT_STRING       0x9
+#define DEV_MAX_NR              0xa
+
+/* supported ioctls, codes are with respect to user-space */
+enum {
+	ORANGEFS_DEV_GET_MAGIC = _IOW(ORANGEFS_DEV_MAGIC, DEV_GET_MAGIC, __s32),
+	ORANGEFS_DEV_GET_MAX_UPSIZE =
+	    _IOW(ORANGEFS_DEV_MAGIC, DEV_GET_MAX_UPSIZE, __s32),
+	ORANGEFS_DEV_GET_MAX_DOWNSIZE =
+	    _IOW(ORANGEFS_DEV_MAGIC, DEV_GET_MAX_DOWNSIZE, __s32),
+	ORANGEFS_DEV_MAP = _IO(ORANGEFS_DEV_MAGIC, DEV_MAP),
+	ORANGEFS_DEV_REMOUNT_ALL = _IO(ORANGEFS_DEV_MAGIC, DEV_REMOUNT_ALL),
+	ORANGEFS_DEV_DEBUG = _IOR(ORANGEFS_DEV_MAGIC, DEV_DEBUG, __s32),
+	ORANGEFS_DEV_UPSTREAM = _IOW(ORANGEFS_DEV_MAGIC, DEV_UPSTREAM, int),
+	ORANGEFS_DEV_CLIENT_MASK = _IOW(ORANGEFS_DEV_MAGIC,
+				    DEV_CLIENT_MASK,
+				    struct dev_mask2_info_s),
+	ORANGEFS_DEV_CLIENT_STRING = _IOW(ORANGEFS_DEV_MAGIC,
+				      DEV_CLIENT_STRING,
+				      char *),
+	ORANGEFS_DEV_MAXNR = DEV_MAX_NR,
+};
+
+/*
+ * version number for use in communicating between kernel space and user
+ * space. Zero signifies the upstream version of the kernel module.
+ */
+#define ORANGEFS_KERNEL_PROTO_VERSION 0
+#define ORANGEFS_MINIMUM_USERSPACE_VERSION 20903
+
+/*
+ * describes memory regions to map in the ORANGEFS_DEV_MAP ioctl.
+ * NOTE: See devorangefs-req.c for 32 bit compat structure.
+ * Since this structure has a variable-sized layout that is different
+ * on 32 and 64 bit platforms, we need to normalize to a 64 bit layout
+ * on such systems before servicing ioctl calls from user-space binaries
+ * that may be 32 bit!
+ */
+struct ORANGEFS_dev_map_desc {
+	void *ptr;
+	__s32 total_size;
+	__s32 size;
+	__s32 count;
+};
+
+/* gossip.h *****************************************************************/
+
+#ifdef GOSSIP_DISABLE_DEBUG
+#define gossip_debug(mask, format, f...) do {} while (0)
+#else
+extern __u64 gossip_debug_mask;
+extern struct client_debug_mask client_debug_mask;
+
+/* try to avoid function call overhead by checking masks in macro */
+#define gossip_debug(mask, format, f...)			\
+do {								\
+	if (gossip_debug_mask & mask)				\
+		printk(format, ##f);				\
+} while (0)
+#endif /* GOSSIP_DISABLE_DEBUG */
+
+/* do file and line number printouts w/ the GNU preprocessor */
+#define gossip_ldebug(mask, format, f...)				\
+		gossip_debug(mask, "%s: " format, __func__, ##f)
+
+#define gossip_err printk
+#define gossip_lerr(format, f...)					\
+		gossip_err("%s line %d: " format,			\
+			   __FILE__,					\
+			   __LINE__,					\
+			   ##f)

diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
new file mode 100644
index 0000000..b9da9a0
--- /dev/null
+++ b/fs/orangefs/super.c

@@ -0,0 +1,559 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-bufmap.h"
+
+#include <linux/parser.h>
+
+/* a cache for orangefs-inode objects (i.e. orangefs inode private data) */
+static struct kmem_cache *orangefs_inode_cache;
+
+/* list for storing orangefs specific superblocks in use */
+LIST_HEAD(orangefs_superblocks);
+
+DEFINE_SPINLOCK(orangefs_superblocks_lock);
+
+enum {
+	Opt_intr,
+	Opt_acl,
+	Opt_local_lock,
+
+	Opt_err
+};
+
+static const match_table_t tokens = {
+	{ Opt_acl,		"acl" },
+	{ Opt_intr,		"intr" },
+	{ Opt_local_lock,	"local_lock" },
+	{ Opt_err,	NULL }
+};
+
+
+static int parse_mount_options(struct super_block *sb, char *options,
+		int silent)
+{
+	struct orangefs_sb_info_s *orangefs_sb = ORANGEFS_SB(sb);
+	substring_t args[MAX_OPT_ARGS];
+	char *p;
+
+	/*
+	 * Force any potential flags that might be set from the mount
+	 * to zero, ie, initialize to unset.
+	 */
+	sb->s_flags &= ~MS_POSIXACL;
+	orangefs_sb->flags &= ~ORANGEFS_OPT_INTR;
+	orangefs_sb->flags &= ~ORANGEFS_OPT_LOCAL_LOCK;
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
+
+		if (!*p)
+			continue;
+
+		token = match_token(p, tokens, args);
+		switch (token) {
+		case Opt_acl:
+			sb->s_flags |= MS_POSIXACL;
+			break;
+		case Opt_intr:
+			orangefs_sb->flags |= ORANGEFS_OPT_INTR;
+			break;
+		case Opt_local_lock:
+			orangefs_sb->flags |= ORANGEFS_OPT_LOCAL_LOCK;
+			break;
+		default:
+			goto fail;
+		}
+	}
+
+	return 0;
+fail:
+	if (!silent)
+		gossip_err("Error: mount option [%s] is not supported.\n", p);
+	return -EINVAL;
+}
+
+static void orangefs_inode_cache_ctor(void *req)
+{
+	struct orangefs_inode_s *orangefs_inode = req;
+
+	inode_init_once(&orangefs_inode->vfs_inode);
+	init_rwsem(&orangefs_inode->xattr_sem);
+
+	orangefs_inode->vfs_inode.i_version = 1;
+}
+
+static struct inode *orangefs_alloc_inode(struct super_block *sb)
+{
+	struct orangefs_inode_s *orangefs_inode;
+
+	orangefs_inode = kmem_cache_alloc(orangefs_inode_cache, GFP_KERNEL);
+	if (orangefs_inode == NULL) {
+		gossip_err("Failed to allocate orangefs_inode\n");
+		return NULL;
+	}
+
+	/*
+	 * We want to clear everything except for rw_semaphore and the
+	 * vfs_inode.
+	 */
+	memset(&orangefs_inode->refn.khandle, 0, 16);
+	orangefs_inode->refn.fs_id = ORANGEFS_FS_ID_NULL;
+	orangefs_inode->last_failed_block_index_read = 0;
+	memset(orangefs_inode->link_target, 0, sizeof(orangefs_inode->link_target));
+	orangefs_inode->pinode_flags = 0;
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "orangefs_alloc_inode: allocated %p\n",
+		     &orangefs_inode->vfs_inode);
+	return &orangefs_inode->vfs_inode;
+}
+
+static void orangefs_destroy_inode(struct inode *inode)
+{
+	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+			"%s: deallocated %p destroying inode %pU\n",
+			__func__, orangefs_inode, get_khandle_from_ino(inode));
+
+	kmem_cache_free(orangefs_inode_cache, orangefs_inode);
+}
+
+/*
+ * NOTE: information filled in here is typically reflected in the
+ * output of the system command 'df'
+*/
+static int orangefs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	int ret = -ENOMEM;
+	struct orangefs_kernel_op_s *new_op = NULL;
+	int flags = 0;
+	struct super_block *sb = NULL;
+
+	sb = dentry->d_sb;
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "orangefs_statfs: called on sb %p (fs_id is %d)\n",
+		     sb,
+		     (int)(ORANGEFS_SB(sb)->fs_id));
+
+	new_op = op_alloc(ORANGEFS_VFS_OP_STATFS);
+	if (!new_op)
+		return ret;
+	new_op->upcall.req.statfs.fs_id = ORANGEFS_SB(sb)->fs_id;
+
+	if (ORANGEFS_SB(sb)->flags & ORANGEFS_OPT_INTR)
+		flags = ORANGEFS_OP_INTERRUPTIBLE;
+
+	ret = service_operation(new_op, "orangefs_statfs", flags);
+
+	if (new_op->downcall.status < 0)
+		goto out_op_release;
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "%s: got %ld blocks available | "
+		     "%ld blocks total | %ld block size | "
+		     "%ld files total | %ld files avail\n",
+		     __func__,
+		     (long)new_op->downcall.resp.statfs.blocks_avail,
+		     (long)new_op->downcall.resp.statfs.blocks_total,
+		     (long)new_op->downcall.resp.statfs.block_size,
+		     (long)new_op->downcall.resp.statfs.files_total,
+		     (long)new_op->downcall.resp.statfs.files_avail);
+
+	buf->f_type = sb->s_magic;
+	memcpy(&buf->f_fsid, &ORANGEFS_SB(sb)->fs_id, sizeof(buf->f_fsid));
+	buf->f_bsize = new_op->downcall.resp.statfs.block_size;
+	buf->f_namelen = ORANGEFS_NAME_MAX;
+
+	buf->f_blocks = (sector_t) new_op->downcall.resp.statfs.blocks_total;
+	buf->f_bfree = (sector_t) new_op->downcall.resp.statfs.blocks_avail;
+	buf->f_bavail = (sector_t) new_op->downcall.resp.statfs.blocks_avail;
+	buf->f_files = (sector_t) new_op->downcall.resp.statfs.files_total;
+	buf->f_ffree = (sector_t) new_op->downcall.resp.statfs.files_avail;
+	buf->f_frsize = sb->s_blocksize;
+
+out_op_release:
+	op_release(new_op);
+	gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_statfs: returning %d\n", ret);
+	return ret;
+}
+
+/*
+ * Remount as initiated by VFS layer.  We just need to reparse the mount
+ * options, no need to signal pvfs2-client-core about it.
+ */
+static int orangefs_remount_fs(struct super_block *sb, int *flags, char *data)
+{
+	gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_remount_fs: called\n");
+	return parse_mount_options(sb, data, 1);
+}
+
+/*
+ * Remount as initiated by pvfs2-client-core on restart.  This is used to
+ * repopulate mount information left from previous pvfs2-client-core.
+ *
+ * the idea here is that given a valid superblock, we're
+ * re-initializing the user space client with the initial mount
+ * information specified when the super block was first initialized.
+ * this is very different than the first initialization/creation of a
+ * superblock.  we use the special service_priority_operation to make
+ * sure that the mount gets ahead of any other pending operation that
+ * is waiting for servicing.  this means that the pvfs2-client won't
+ * fail to start several times for all other pending operations before
+ * the client regains all of the mount information from us.
+ * NOTE: this function assumes that the request_mutex is already acquired!
+ */
+int orangefs_remount(struct orangefs_sb_info_s *orangefs_sb)
+{
+	struct orangefs_kernel_op_s *new_op;
+	int ret = -EINVAL;
+
+	gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_remount: called\n");
+
+	new_op = op_alloc(ORANGEFS_VFS_OP_FS_MOUNT);
+	if (!new_op)
+		return -ENOMEM;
+	strncpy(new_op->upcall.req.fs_mount.orangefs_config_server,
+		orangefs_sb->devname,
+		ORANGEFS_MAX_SERVER_ADDR_LEN);
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "Attempting ORANGEFS Remount via host %s\n",
+		     new_op->upcall.req.fs_mount.orangefs_config_server);
+
+	/*
+	 * we assume that the calling function has already acquired the
+	 * request_mutex to prevent other operations from bypassing
+	 * this one
+	 */
+	ret = service_operation(new_op, "orangefs_remount",
+		ORANGEFS_OP_PRIORITY | ORANGEFS_OP_NO_MUTEX);
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "orangefs_remount: mount got return value of %d\n",
+		     ret);
+	if (ret == 0) {
+		/*
+		 * store the id assigned to this sb -- it's just a
+		 * short-lived mapping that the system interface uses
+		 * to map this superblock to a particular mount entry
+		 */
+		orangefs_sb->id = new_op->downcall.resp.fs_mount.id;
+		orangefs_sb->mount_pending = 0;
+	}
+
+	op_release(new_op);
+	return ret;
+}
+
+int fsid_key_table_initialize(void)
+{
+	return 0;
+}
+
+void fsid_key_table_finalize(void)
+{
+}
+
+/* Called whenever the VFS dirties the inode in response to atime updates */
+static void orangefs_dirty_inode(struct inode *inode, int flags)
+{
+	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "orangefs_dirty_inode: %pU\n",
+		     get_khandle_from_ino(inode));
+	SetAtimeFlag(orangefs_inode);
+}
+
+static const struct super_operations orangefs_s_ops = {
+	.alloc_inode = orangefs_alloc_inode,
+	.destroy_inode = orangefs_destroy_inode,
+	.dirty_inode = orangefs_dirty_inode,
+	.drop_inode = generic_delete_inode,
+	.statfs = orangefs_statfs,
+	.remount_fs = orangefs_remount_fs,
+	.show_options = generic_show_options,
+};
+
+static struct dentry *orangefs_fh_to_dentry(struct super_block *sb,
+				  struct fid *fid,
+				  int fh_len,
+				  int fh_type)
+{
+	struct orangefs_object_kref refn;
+
+	if (fh_len < 5 || fh_type > 2)
+		return NULL;
+
+	ORANGEFS_khandle_from(&(refn.khandle), fid->raw, 16);
+	refn.fs_id = (u32) fid->raw[4];
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "fh_to_dentry: handle %pU, fs_id %d\n",
+		     &refn.khandle,
+		     refn.fs_id);
+
+	return d_obtain_alias(orangefs_iget(sb, &refn));
+}
+
+static int orangefs_encode_fh(struct inode *inode,
+		    __u32 *fh,
+		    int *max_len,
+		    struct inode *parent)
+{
+	int len = parent ? 10 : 5;
+	int type = 1;
+	struct orangefs_object_kref refn;
+
+	if (*max_len < len) {
+		gossip_lerr("fh buffer is too small for encoding\n");
+		*max_len = len;
+		type = 255;
+		goto out;
+	}
+
+	refn = ORANGEFS_I(inode)->refn;
+	ORANGEFS_khandle_to(&refn.khandle, fh, 16);
+	fh[4] = refn.fs_id;
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "Encoding fh: handle %pU, fsid %u\n",
+		     &refn.khandle,
+		     refn.fs_id);
+
+
+	if (parent) {
+		refn = ORANGEFS_I(parent)->refn;
+		ORANGEFS_khandle_to(&refn.khandle, (char *) fh + 20, 16);
+		fh[9] = refn.fs_id;
+
+		type = 2;
+		gossip_debug(GOSSIP_SUPER_DEBUG,
+			     "Encoding parent: handle %pU, fsid %u\n",
+			     &refn.khandle,
+			     refn.fs_id);
+	}
+	*max_len = len;
+
+out:
+	return type;
+}
+
+static const struct export_operations orangefs_export_ops = {
+	.encode_fh = orangefs_encode_fh,
+	.fh_to_dentry = orangefs_fh_to_dentry,
+};
+
+static int orangefs_fill_sb(struct super_block *sb,
+		struct orangefs_fs_mount_response *fs_mount,
+		void *data, int silent)
+{
+	int ret = -EINVAL;
+	struct inode *root = NULL;
+	struct dentry *root_dentry = NULL;
+	struct orangefs_object_kref root_object;
+
+	/* alloc and init our private orangefs sb info */
+	sb->s_fs_info = kzalloc(sizeof(struct orangefs_sb_info_s), GFP_KERNEL);
+	if (!ORANGEFS_SB(sb))
+		return -ENOMEM;
+	ORANGEFS_SB(sb)->sb = sb;
+
+	ORANGEFS_SB(sb)->root_khandle = fs_mount->root_khandle;
+	ORANGEFS_SB(sb)->fs_id = fs_mount->fs_id;
+	ORANGEFS_SB(sb)->id = fs_mount->id;
+
+	if (data) {
+		ret = parse_mount_options(sb, data, silent);
+		if (ret)
+			return ret;
+	}
+
+	/* Hang the xattr handlers off the superblock */
+	sb->s_xattr = orangefs_xattr_handlers;
+	sb->s_magic = ORANGEFS_SUPER_MAGIC;
+	sb->s_op = &orangefs_s_ops;
+	sb->s_d_op = &orangefs_dentry_operations;
+
+	sb->s_blocksize = orangefs_bufmap_size_query();
+	sb->s_blocksize_bits = orangefs_bufmap_shift_query();
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+
+	root_object.khandle = ORANGEFS_SB(sb)->root_khandle;
+	root_object.fs_id = ORANGEFS_SB(sb)->fs_id;
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "get inode %pU, fsid %d\n",
+		     &root_object.khandle,
+		     root_object.fs_id);
+
+	root = orangefs_iget(sb, &root_object);
+	if (IS_ERR(root))
+		return PTR_ERR(root);
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "Allocated root inode [%p] with mode %x\n",
+		     root,
+		     root->i_mode);
+
+	/* allocates and places root dentry in dcache */
+	root_dentry = d_make_root(root);
+	if (!root_dentry)
+		return -ENOMEM;
+
+	sb->s_export_op = &orangefs_export_ops;
+	sb->s_root = root_dentry;
+	return 0;
+}
+
+struct dentry *orangefs_mount(struct file_system_type *fst,
+			   int flags,
+			   const char *devname,
+			   void *data)
+{
+	int ret = -EINVAL;
+	struct super_block *sb = ERR_PTR(-EINVAL);
+	struct orangefs_kernel_op_s *new_op;
+	struct dentry *d = ERR_PTR(-EINVAL);
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "orangefs_mount: called with devname %s\n",
+		     devname);
+
+	if (!devname) {
+		gossip_err("ERROR: device name not specified.\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	new_op = op_alloc(ORANGEFS_VFS_OP_FS_MOUNT);
+	if (!new_op)
+		return ERR_PTR(-ENOMEM);
+
+	strncpy(new_op->upcall.req.fs_mount.orangefs_config_server,
+		devname,
+		ORANGEFS_MAX_SERVER_ADDR_LEN);
+
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "Attempting ORANGEFS Mount via host %s\n",
+		     new_op->upcall.req.fs_mount.orangefs_config_server);
+
+	ret = service_operation(new_op, "orangefs_mount", 0);
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "orangefs_mount: mount got return value of %d\n", ret);
+	if (ret)
+		goto free_op;
+
+	if (new_op->downcall.resp.fs_mount.fs_id == ORANGEFS_FS_ID_NULL) {
+		gossip_err("ERROR: Retrieved null fs_id\n");
+		ret = -EINVAL;
+		goto free_op;
+	}
+
+	sb = sget(fst, NULL, set_anon_super, flags, NULL);
+
+	if (IS_ERR(sb)) {
+		d = ERR_CAST(sb);
+		goto free_op;
+	}
+
+	ret = orangefs_fill_sb(sb,
+	      &new_op->downcall.resp.fs_mount, data,
+	      flags & MS_SILENT ? 1 : 0);
+
+	if (ret) {
+		d = ERR_PTR(ret);
+		goto free_op;
+	}
+
+	/*
+	 * on successful mount, store the devname and data
+	 * used
+	 */
+	strncpy(ORANGEFS_SB(sb)->devname,
+		devname,
+		ORANGEFS_MAX_SERVER_ADDR_LEN);
+
+	/* mount_pending must be cleared */
+	ORANGEFS_SB(sb)->mount_pending = 0;
+
+	/*
+	 * finally, add this sb to our list of known orangefs
+	 * sb's
+	 */
+	gossip_debug(GOSSIP_SUPER_DEBUG,
+		     "Adding SB %p to orangefs superblocks\n",
+		     ORANGEFS_SB(sb));
+	spin_lock(&orangefs_superblocks_lock);
+	list_add_tail(&ORANGEFS_SB(sb)->list, &orangefs_superblocks);
+	spin_unlock(&orangefs_superblocks_lock);
+	op_release(new_op);
+	return dget(sb->s_root);
+
+free_op:
+	gossip_err("orangefs_mount: mount request failed with %d\n", ret);
+	if (ret == -EINVAL) {
+		gossip_err("Ensure that all orangefs-servers have the same FS configuration files\n");
+		gossip_err("Look at pvfs2-client-core log file (typically /tmp/pvfs2-client.log) for more details\n");
+	}
+
+	op_release(new_op);
+
+	return d;
+}
+
+void orangefs_kill_sb(struct super_block *sb)
+{
+	gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_kill_sb: called\n");
+
+	/* provided sb cleanup */
+	kill_anon_super(sb);
+
+	/*
+	 * issue the unmount to userspace to tell it to remove the
+	 * dynamic mount info it has for this superblock
+	 */
+	 orangefs_unmount_sb(sb);
+
+	/* remove the sb from our list of orangefs specific sb's */
+
+	spin_lock(&orangefs_superblocks_lock);
+	__list_del_entry(&ORANGEFS_SB(sb)->list);	/* not list_del_init */
+	ORANGEFS_SB(sb)->list.prev = NULL;
+	spin_unlock(&orangefs_superblocks_lock);
+
+	/*
+	 * make sure that ORANGEFS_DEV_REMOUNT_ALL loop that might've seen us
+	 * gets completed before we free the dang thing.
+	 */
+	mutex_lock(&request_mutex);
+	mutex_unlock(&request_mutex);
+
+	/* free the orangefs superblock private data */
+	kfree(ORANGEFS_SB(sb));
+}
+
+int orangefs_inode_cache_initialize(void)
+{
+	orangefs_inode_cache = kmem_cache_create("orangefs_inode_cache",
+					      sizeof(struct orangefs_inode_s),
+					      0,
+					      ORANGEFS_CACHE_CREATE_FLAGS,
+					      orangefs_inode_cache_ctor);
+
+	if (!orangefs_inode_cache) {
+		gossip_err("Cannot create orangefs_inode_cache\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+int orangefs_inode_cache_finalize(void)
+{
+	kmem_cache_destroy(orangefs_inode_cache);
+	return 0;
+}

diff --git a/fs/orangefs/symlink.c b/fs/orangefs/symlink.c
new file mode 100644
index 0000000..6418dd6
--- /dev/null
+++ b/fs/orangefs/symlink.c

@@ -0,0 +1,19 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-bufmap.h"
+
+struct inode_operations orangefs_symlink_inode_operations = {
+	.readlink = generic_readlink,
+	.get_link = simple_get_link,
+	.setattr = orangefs_setattr,
+	.getattr = orangefs_getattr,
+	.listxattr = orangefs_listxattr,
+	.setxattr = generic_setxattr,
+	.permission = orangefs_permission,
+};

diff --git a/fs/orangefs/upcall.h b/fs/orangefs/upcall.h
new file mode 100644
index 0000000..001b202
--- /dev/null
+++ b/fs/orangefs/upcall.h

@@ -0,0 +1,246 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#ifndef __UPCALL_H
+#define __UPCALL_H
+
+/*
+ * Sanitized this header file to fix
+ * 32-64 bit interaction issues between
+ * client-core and device
+ */
+struct orangefs_io_request_s {
+	__s32 __pad1;
+	__s32 buf_index;
+	__s32 count;
+	__s32 __pad2;
+	__s64 offset;
+	struct orangefs_object_kref refn;
+	enum ORANGEFS_io_type io_type;
+	__s32 readahead_size;
+};
+
+struct orangefs_lookup_request_s {
+	__s32 sym_follow;
+	__s32 __pad1;
+	struct orangefs_object_kref parent_refn;
+	char d_name[ORANGEFS_NAME_MAX];
+};
+
+struct orangefs_create_request_s {
+	struct orangefs_object_kref parent_refn;
+	struct ORANGEFS_sys_attr_s attributes;
+	char d_name[ORANGEFS_NAME_MAX];
+};
+
+struct orangefs_symlink_request_s {
+	struct orangefs_object_kref parent_refn;
+	struct ORANGEFS_sys_attr_s attributes;
+	char entry_name[ORANGEFS_NAME_MAX];
+	char target[ORANGEFS_NAME_MAX];
+};
+
+struct orangefs_getattr_request_s {
+	struct orangefs_object_kref refn;
+	__u32 mask;
+	__u32 __pad1;
+};
+
+struct orangefs_setattr_request_s {
+	struct orangefs_object_kref refn;
+	struct ORANGEFS_sys_attr_s attributes;
+};
+
+struct orangefs_remove_request_s {
+	struct orangefs_object_kref parent_refn;
+	char d_name[ORANGEFS_NAME_MAX];
+};
+
+struct orangefs_mkdir_request_s {
+	struct orangefs_object_kref parent_refn;
+	struct ORANGEFS_sys_attr_s attributes;
+	char d_name[ORANGEFS_NAME_MAX];
+};
+
+struct orangefs_readdir_request_s {
+	struct orangefs_object_kref refn;
+	__u64 token;
+	__s32 max_dirent_count;
+	__s32 buf_index;
+};
+
+struct orangefs_readdirplus_request_s {
+	struct orangefs_object_kref refn;
+	__u64 token;
+	__s32 max_dirent_count;
+	__u32 mask;
+	__s32 buf_index;
+	__s32 __pad1;
+};
+
+struct orangefs_rename_request_s {
+	struct orangefs_object_kref old_parent_refn;
+	struct orangefs_object_kref new_parent_refn;
+	char d_old_name[ORANGEFS_NAME_MAX];
+	char d_new_name[ORANGEFS_NAME_MAX];
+};
+
+struct orangefs_statfs_request_s {
+	__s32 fs_id;
+	__s32 __pad1;
+};
+
+struct orangefs_truncate_request_s {
+	struct orangefs_object_kref refn;
+	__s64 size;
+};
+
+struct orangefs_mmap_ra_cache_flush_request_s {
+	struct orangefs_object_kref refn;
+};
+
+struct orangefs_fs_mount_request_s {
+	char orangefs_config_server[ORANGEFS_MAX_SERVER_ADDR_LEN];
+};
+
+struct orangefs_fs_umount_request_s {
+	__s32 id;
+	__s32 fs_id;
+	char orangefs_config_server[ORANGEFS_MAX_SERVER_ADDR_LEN];
+};
+
+struct orangefs_getxattr_request_s {
+	struct orangefs_object_kref refn;
+	__s32 key_sz;
+	__s32 __pad1;
+	char key[ORANGEFS_MAX_XATTR_NAMELEN];
+};
+
+struct orangefs_setxattr_request_s {
+	struct orangefs_object_kref refn;
+	struct ORANGEFS_keyval_pair keyval;
+	__s32 flags;
+	__s32 __pad1;
+};
+
+struct orangefs_listxattr_request_s {
+	struct orangefs_object_kref refn;
+	__s32 requested_count;
+	__s32 __pad1;
+	__u64 token;
+};
+
+struct orangefs_removexattr_request_s {
+	struct orangefs_object_kref refn;
+	__s32 key_sz;
+	__s32 __pad1;
+	char key[ORANGEFS_MAX_XATTR_NAMELEN];
+};
+
+struct orangefs_op_cancel_s {
+	__u64 op_tag;
+};
+
+struct orangefs_fsync_request_s {
+	struct orangefs_object_kref refn;
+};
+
+enum orangefs_param_request_type {
+	ORANGEFS_PARAM_REQUEST_SET = 1,
+	ORANGEFS_PARAM_REQUEST_GET = 2
+};
+
+enum orangefs_param_request_op {
+	ORANGEFS_PARAM_REQUEST_OP_ACACHE_TIMEOUT_MSECS = 1,
+	ORANGEFS_PARAM_REQUEST_OP_ACACHE_HARD_LIMIT = 2,
+	ORANGEFS_PARAM_REQUEST_OP_ACACHE_SOFT_LIMIT = 3,
+	ORANGEFS_PARAM_REQUEST_OP_ACACHE_RECLAIM_PERCENTAGE = 4,
+	ORANGEFS_PARAM_REQUEST_OP_PERF_TIME_INTERVAL_SECS = 5,
+	ORANGEFS_PARAM_REQUEST_OP_PERF_HISTORY_SIZE = 6,
+	ORANGEFS_PARAM_REQUEST_OP_PERF_RESET = 7,
+	ORANGEFS_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS = 8,
+	ORANGEFS_PARAM_REQUEST_OP_NCACHE_HARD_LIMIT = 9,
+	ORANGEFS_PARAM_REQUEST_OP_NCACHE_SOFT_LIMIT = 10,
+	ORANGEFS_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE = 11,
+	ORANGEFS_PARAM_REQUEST_OP_STATIC_ACACHE_TIMEOUT_MSECS = 12,
+	ORANGEFS_PARAM_REQUEST_OP_STATIC_ACACHE_HARD_LIMIT = 13,
+	ORANGEFS_PARAM_REQUEST_OP_STATIC_ACACHE_SOFT_LIMIT = 14,
+	ORANGEFS_PARAM_REQUEST_OP_STATIC_ACACHE_RECLAIM_PERCENTAGE = 15,
+	ORANGEFS_PARAM_REQUEST_OP_CLIENT_DEBUG = 16,
+	ORANGEFS_PARAM_REQUEST_OP_CCACHE_TIMEOUT_SECS = 17,
+	ORANGEFS_PARAM_REQUEST_OP_CCACHE_HARD_LIMIT = 18,
+	ORANGEFS_PARAM_REQUEST_OP_CCACHE_SOFT_LIMIT = 19,
+	ORANGEFS_PARAM_REQUEST_OP_CCACHE_RECLAIM_PERCENTAGE = 20,
+	ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_TIMEOUT_SECS = 21,
+	ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_HARD_LIMIT = 22,
+	ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_SOFT_LIMIT = 23,
+	ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_RECLAIM_PERCENTAGE = 24,
+	ORANGEFS_PARAM_REQUEST_OP_TWO_MASK_VALUES = 25,
+};
+
+struct orangefs_param_request_s {
+	enum orangefs_param_request_type type;
+	enum orangefs_param_request_op op;
+	__s64 value;
+	char s_value[ORANGEFS_MAX_DEBUG_STRING_LEN];
+};
+
+enum orangefs_perf_count_request_type {
+	ORANGEFS_PERF_COUNT_REQUEST_ACACHE = 1,
+	ORANGEFS_PERF_COUNT_REQUEST_NCACHE = 2,
+	ORANGEFS_PERF_COUNT_REQUEST_CAPCACHE = 3,
+};
+
+struct orangefs_perf_count_request_s {
+	enum orangefs_perf_count_request_type type;
+	__s32 __pad1;
+};
+
+struct orangefs_fs_key_request_s {
+	__s32 fsid;
+	__s32 __pad1;
+};
+
+struct orangefs_upcall_s {
+	__s32 type;
+	__u32 uid;
+	__u32 gid;
+	int pid;
+	int tgid;
+	/* Trailers unused but must be retained for protocol compatibility. */
+	__s64 trailer_size;
+	char *trailer_buf;
+
+	union {
+		struct orangefs_io_request_s io;
+		struct orangefs_lookup_request_s lookup;
+		struct orangefs_create_request_s create;
+		struct orangefs_symlink_request_s sym;
+		struct orangefs_getattr_request_s getattr;
+		struct orangefs_setattr_request_s setattr;
+		struct orangefs_remove_request_s remove;
+		struct orangefs_mkdir_request_s mkdir;
+		struct orangefs_readdir_request_s readdir;
+		struct orangefs_readdirplus_request_s readdirplus;
+		struct orangefs_rename_request_s rename;
+		struct orangefs_statfs_request_s statfs;
+		struct orangefs_truncate_request_s truncate;
+		struct orangefs_mmap_ra_cache_flush_request_s ra_cache_flush;
+		struct orangefs_fs_mount_request_s fs_mount;
+		struct orangefs_fs_umount_request_s fs_umount;
+		struct orangefs_getxattr_request_s getxattr;
+		struct orangefs_setxattr_request_s setxattr;
+		struct orangefs_listxattr_request_s listxattr;
+		struct orangefs_removexattr_request_s removexattr;
+		struct orangefs_op_cancel_s cancel;
+		struct orangefs_fsync_request_s fsync;
+		struct orangefs_param_request_s param;
+		struct orangefs_perf_count_request_s perf_count;
+		struct orangefs_fs_key_request_s fs_key;
+	} req;
+};
+
+#endif /* __UPCALL_H */

diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c
new file mode 100644
index 0000000..31635bc
--- /dev/null
+++ b/fs/orangefs/waitqueue.c

@@ -0,0 +1,357 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ * (C) 2011 Omnibond Systems
+ *
+ * Changes by Acxiom Corporation to implement generic service_operation()
+ * function, Copyright Acxiom Corporation, 2005.
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ *  In-kernel waitqueue operations.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-bufmap.h"
+
+static int wait_for_matching_downcall(struct orangefs_kernel_op_s *, long, bool);
+static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *);
+
+/*
+ * What we do in this function is to walk the list of operations that are
+ * present in the request queue and mark them as purged.
+ * NOTE: This is called from the device close after client-core has
+ * guaranteed that no new operations could appear on the list since the
+ * client-core is anyway going to exit.
+ */
+void purge_waiting_ops(void)
+{
+	struct orangefs_kernel_op_s *op;
+
+	spin_lock(&orangefs_request_list_lock);
+	list_for_each_entry(op, &orangefs_request_list, list) {
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			     "pvfs2-client-core: purging op tag %llu %s\n",
+			     llu(op->tag),
+			     get_opname_string(op));
+		set_op_state_purged(op);
+		gossip_debug(GOSSIP_DEV_DEBUG,
+			     "%s: op:%s: op_state:%d: process:%s:\n",
+			     __func__,
+			     get_opname_string(op),
+			     op->op_state,
+			     current->comm);
+	}
+	spin_unlock(&orangefs_request_list_lock);
+}
+
+/*
+ * submits a ORANGEFS operation and waits for it to complete
+ *
+ * Note op->downcall.status will contain the status of the operation (in
+ * errno format), whether provided by pvfs2-client or a result of failure to
+ * service the operation.  If the caller wishes to distinguish, then
+ * op->state can be checked to see if it was serviced or not.
+ *
+ * Returns contents of op->downcall.status for convenience
+ */
+int service_operation(struct orangefs_kernel_op_s *op,
+		      const char *op_name,
+		      int flags)
+{
+	long timeout = MAX_SCHEDULE_TIMEOUT;
+	int ret = 0;
+
+	DEFINE_WAIT(wait_entry);
+
+	op->upcall.tgid = current->tgid;
+	op->upcall.pid = current->pid;
+
+retry_servicing:
+	op->downcall.status = 0;
+	gossip_debug(GOSSIP_WAIT_DEBUG,
+		     "%s: %s op:%p: process:%s: pid:%d:\n",
+		     __func__,
+		     op_name,
+		     op,
+		     current->comm,
+		     current->pid);
+
+	/*
+	 * If ORANGEFS_OP_NO_MUTEX was set in flags, we need to avoid
+	 * acquiring the request_mutex because we're servicing a
+	 * high priority remount operation and the request_mutex is
+	 * already taken.
+	 */
+	if (!(flags & ORANGEFS_OP_NO_MUTEX)) {
+		if (flags & ORANGEFS_OP_INTERRUPTIBLE)
+			ret = mutex_lock_interruptible(&request_mutex);
+		else
+			ret = mutex_lock_killable(&request_mutex);
+		/*
+		 * check to see if we were interrupted while waiting for
+		 * mutex
+		 */
+		if (ret < 0) {
+			op->downcall.status = ret;
+			gossip_debug(GOSSIP_WAIT_DEBUG,
+				     "%s: service_operation interrupted.\n",
+				     __func__);
+			return ret;
+		}
+	}
+
+	/* queue up the operation */
+	spin_lock(&orangefs_request_list_lock);
+	spin_lock(&op->lock);
+	set_op_state_waiting(op);
+	gossip_debug(GOSSIP_DEV_DEBUG,
+		     "%s: op:%s: op_state:%d: process:%s:\n",
+		     __func__,
+		     get_opname_string(op),
+		     op->op_state,
+		     current->comm);
+	/* add high priority remount op to the front of the line. */
+	if (flags & ORANGEFS_OP_PRIORITY)
+		list_add(&op->list, &orangefs_request_list);
+	else
+		list_add_tail(&op->list, &orangefs_request_list);
+	spin_unlock(&op->lock);
+	wake_up_interruptible(&orangefs_request_list_waitq);
+	if (!__is_daemon_in_service()) {
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			     "%s:client core is NOT in service.\n",
+			     __func__);
+		timeout = op_timeout_secs * HZ;
+	}
+	spin_unlock(&orangefs_request_list_lock);
+
+	if (!(flags & ORANGEFS_OP_NO_MUTEX))
+		mutex_unlock(&request_mutex);
+
+	ret = wait_for_matching_downcall(op, timeout,
+					 flags & ORANGEFS_OP_INTERRUPTIBLE);
+
+	gossip_debug(GOSSIP_WAIT_DEBUG,
+		     "%s: wait_for_matching_downcall returned %d for %p\n",
+		     __func__,
+		     ret,
+		     op);
+
+	/* got matching downcall; make sure status is in errno format */
+	if (!ret) {
+		spin_unlock(&op->lock);
+		op->downcall.status =
+		    orangefs_normalize_to_errno(op->downcall.status);
+		ret = op->downcall.status;
+		goto out;
+	}
+
+	/* failed to get matching downcall */
+	if (ret == -ETIMEDOUT) {
+		gossip_err("%s: %s -- wait timed out; aborting attempt.\n",
+			   __func__,
+			   op_name);
+	}
+
+	/*
+	 * remove a waiting op from the request list or
+	 * remove an in-progress op from the in-progress list.
+	 */
+	orangefs_clean_up_interrupted_operation(op);
+
+	op->downcall.status = ret;
+	/* retry if operation has not been serviced and if requested */
+	if (ret == -EAGAIN) {
+		op->attempts++;
+		timeout = op_timeout_secs * HZ;
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			     "orangefs: tag %llu (%s)"
+			     " -- operation to be retried (%d attempt)\n",
+			     llu(op->tag),
+			     op_name,
+			     op->attempts);
+
+		/*
+		 * io ops (ops that use the shared memory buffer) have
+		 * to be returned to their caller for a retry. Other ops
+		 * can just be recycled here.
+		 */
+		if (!op->uses_shared_memory)
+			goto retry_servicing;
+	}
+
+out:
+	gossip_debug(GOSSIP_WAIT_DEBUG,
+		     "%s: %s returning: %d for %p.\n",
+		     __func__,
+		     op_name,
+		     ret,
+		     op);
+	return ret;
+}
+
+/* This can get called on an I/O op if it had a bad service_operation. */
+bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op)
+{
+	u64 tag = op->tag;
+	if (!op_state_in_progress(op))
+		return false;
+
+	op->slot_to_free = op->upcall.req.io.buf_index;
+	memset(&op->upcall, 0, sizeof(op->upcall));
+	memset(&op->downcall, 0, sizeof(op->downcall));
+	op->upcall.type = ORANGEFS_VFS_OP_CANCEL;
+	op->upcall.req.cancel.op_tag = tag;
+	op->downcall.type = ORANGEFS_VFS_OP_INVALID;
+	op->downcall.status = -1;
+	orangefs_new_tag(op);
+
+	spin_lock(&orangefs_request_list_lock);
+	/* orangefs_request_list_lock is enough of a barrier here */
+	if (!__is_daemon_in_service()) {
+		spin_unlock(&orangefs_request_list_lock);
+		return false;
+	}
+	spin_lock(&op->lock);
+	set_op_state_waiting(op);
+	gossip_debug(GOSSIP_DEV_DEBUG,
+		     "%s: op:%s: op_state:%d: process:%s:\n",
+		     __func__,
+		     get_opname_string(op),
+		     op->op_state,
+		     current->comm);
+	list_add(&op->list, &orangefs_request_list);
+	spin_unlock(&op->lock);
+	spin_unlock(&orangefs_request_list_lock);
+
+	gossip_debug(GOSSIP_WAIT_DEBUG,
+		     "Attempting ORANGEFS operation cancellation of tag %llu\n",
+		     llu(tag));
+	return true;
+}
+
+/*
+ * Change an op to the "given up" state and remove it from its list.
+ */
+static void
+	orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op)
+{
+	/*
+	 * handle interrupted cases depending on what state we were in when
+	 * the interruption is detected.
+	 *
+	 * Called with op->lock held.
+	 */
+
+	/*
+	 * List manipulation code elsewhere will ignore ops that
+	 * have been given up upon.
+	 */
+	op->op_state |= OP_VFS_STATE_GIVEN_UP;
+
+	if (list_empty(&op->list)) {
+		/* caught copying to/from daemon */
+		BUG_ON(op_state_serviced(op));
+		spin_unlock(&op->lock);
+		wait_for_completion(&op->waitq);
+	} else if (op_state_waiting(op)) {
+		/*
+		 * upcall hasn't been read; remove op from upcall request
+		 * list.
+		 */
+		spin_unlock(&op->lock);
+		spin_lock(&orangefs_request_list_lock);
+		list_del_init(&op->list);
+		spin_unlock(&orangefs_request_list_lock);
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			     "Interrupted: Removed op %p from request_list\n",
+			     op);
+	} else if (op_state_in_progress(op)) {
+		/* op must be removed from the in progress htable */
+		spin_unlock(&op->lock);
+		spin_lock(&htable_ops_in_progress_lock);
+		list_del_init(&op->list);
+		spin_unlock(&htable_ops_in_progress_lock);
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			     "Interrupted: Removed op %p"
+			     " from htable_ops_in_progress\n",
+			     op);
+	} else {
+		spin_unlock(&op->lock);
+		gossip_err("interrupted operation is in a weird state 0x%x\n",
+			   op->op_state);
+	}
+	reinit_completion(&op->waitq);
+}
+
+/*
+ * Sleeps on waitqueue waiting for matching downcall.
+ * If client-core finishes servicing, then we are good to go.
+ * else if client-core exits, we get woken up here, and retry with a timeout
+ *
+ * When this call returns to the caller, the specified op will no
+ * longer be in either the in_progress hash table or on the request list.
+ *
+ * Returns 0 on success and -errno on failure
+ * Errors are:
+ * EAGAIN in case we want the caller to requeue and try again..
+ * EINTR/EIO/ETIMEDOUT indicating we are done trying to service this
+ * operation since client-core seems to be exiting too often
+ * or if we were interrupted.
+ *
+ * Returns with op->lock taken.
+ */
+static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op,
+				      long timeout,
+				      bool interruptible)
+{
+	long n;
+
+	/*
+	 * There's a "schedule_timeout" inside of these wait
+	 * primitives, during which the op is out of the hands of the
+	 * user process that needs something done and is being
+	 * manipulated by the client-core process.
+	 */
+	if (interruptible)
+		n = wait_for_completion_interruptible_timeout(&op->waitq,
+							      timeout);
+	else
+		n = wait_for_completion_killable_timeout(&op->waitq, timeout);
+
+	spin_lock(&op->lock);
+
+	if (op_state_serviced(op))
+		return 0;
+
+	if (unlikely(n < 0)) {
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			     "%s: operation interrupted, tag %llu, %p\n",
+			     __func__,
+			     llu(op->tag),
+			     op);
+		return -EINTR;
+	}
+	if (op_state_purged(op)) {
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			     "%s: operation purged, tag %llu, %p, %d\n",
+			     __func__,
+			     llu(op->tag),
+			     op,
+			     op->attempts);
+		return (op->attempts < ORANGEFS_PURGE_RETRY_COUNT) ?
+			 -EAGAIN :
+			 -EIO;
+	}
+	/* must have timed out, then... */
+	gossip_debug(GOSSIP_WAIT_DEBUG,
+		     "%s: operation timed out, tag %llu, %p, %d)\n",
+		     __func__,
+		     llu(op->tag),
+		     op,
+		     op->attempts);
+	return -ETIMEDOUT;
+}

diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c
new file mode 100644
index 0000000..ef5da75
--- /dev/null
+++ b/fs/orangefs/xattr.c

@@ -0,0 +1,545 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ *  Linux VFS extended attribute operations.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-bufmap.h"
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+
+#define SYSTEM_ORANGEFS_KEY "system.pvfs2."
+#define SYSTEM_ORANGEFS_KEY_LEN 13
+
+/*
+ * this function returns
+ *   0 if the key corresponding to name is not meant to be printed as part
+ *     of a listxattr.
+ *   1 if the key corresponding to name is meant to be returned as part of
+ *     a listxattr.
+ * The ones that start SYSTEM_ORANGEFS_KEY are the ones to avoid printing.
+ */
+static int is_reserved_key(const char *key, size_t size)
+{
+
+	if (size < SYSTEM_ORANGEFS_KEY_LEN)
+		return 1;
+
+	return strncmp(key, SYSTEM_ORANGEFS_KEY, SYSTEM_ORANGEFS_KEY_LEN) ?  1 : 0;
+}
+
+static inline int convert_to_internal_xattr_flags(int setxattr_flags)
+{
+	int internal_flag = 0;
+
+	if (setxattr_flags & XATTR_REPLACE) {
+		/* Attribute must exist! */
+		internal_flag = ORANGEFS_XATTR_REPLACE;
+	} else if (setxattr_flags & XATTR_CREATE) {
+		/* Attribute must not exist */
+		internal_flag = ORANGEFS_XATTR_CREATE;
+	}
+	return internal_flag;
+}
+
+
+/*
+ * Tries to get a specified key's attributes of a given
+ * file into a user-specified buffer. Note that the getxattr
+ * interface allows for the users to probe the size of an
+ * extended attribute by passing in a value of 0 to size.
+ * Thus our return value is always the size of the attribute
+ * unless the key does not exist for the file and/or if
+ * there were errors in fetching the attribute value.
+ */
+ssize_t orangefs_inode_getxattr(struct inode *inode, const char *prefix,
+		const char *name, void *buffer, size_t size)
+{
+	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+	struct orangefs_kernel_op_s *new_op = NULL;
+	ssize_t ret = -ENOMEM;
+	ssize_t length = 0;
+	int fsuid;
+	int fsgid;
+
+	gossip_debug(GOSSIP_XATTR_DEBUG,
+		     "%s: prefix %s name %s, buffer_size %zd\n",
+		     __func__, prefix, name, size);
+
+	if (name == NULL || (size > 0 && buffer == NULL)) {
+		gossip_err("orangefs_inode_getxattr: bogus NULL pointers\n");
+		return -EINVAL;
+	}
+	if ((strlen(name) + strlen(prefix)) >= ORANGEFS_MAX_XATTR_NAMELEN) {
+		gossip_err("Invalid key length (%d)\n",
+			   (int)(strlen(name) + strlen(prefix)));
+		return -EINVAL;
+	}
+
+	fsuid = from_kuid(current_user_ns(), current_fsuid());
+	fsgid = from_kgid(current_user_ns(), current_fsgid());
+
+	gossip_debug(GOSSIP_XATTR_DEBUG,
+		     "getxattr on inode %pU, name %s "
+		     "(uid %o, gid %o)\n",
+		     get_khandle_from_ino(inode),
+		     name,
+		     fsuid,
+		     fsgid);
+
+	down_read(&orangefs_inode->xattr_sem);
+
+	new_op = op_alloc(ORANGEFS_VFS_OP_GETXATTR);
+	if (!new_op)
+		goto out_unlock;
+
+	new_op->upcall.req.getxattr.refn = orangefs_inode->refn;
+	ret = snprintf((char *)new_op->upcall.req.getxattr.key,
+		       ORANGEFS_MAX_XATTR_NAMELEN, "%s%s", prefix, name);
+
+	/*
+	 * NOTE: Although keys are meant to be NULL terminated textual
+	 * strings, I am going to explicitly pass the length just in case
+	 * we change this later on...
+	 */
+	new_op->upcall.req.getxattr.key_sz = ret + 1;
+
+	ret = service_operation(new_op, "orangefs_inode_getxattr",
+				get_interruptible_flag(inode));
+	if (ret != 0) {
+		if (ret == -ENOENT) {
+			ret = -ENODATA;
+			gossip_debug(GOSSIP_XATTR_DEBUG,
+				     "orangefs_inode_getxattr: inode %pU key %s"
+				     " does not exist!\n",
+				     get_khandle_from_ino(inode),
+				     (char *)new_op->upcall.req.getxattr.key);
+		}
+		goto out_release_op;
+	}
+
+	/*
+	 * Length returned includes null terminator.
+	 */
+	length = new_op->downcall.resp.getxattr.val_sz;
+
+	/*
+	 * Just return the length of the queried attribute.
+	 */
+	if (size == 0) {
+		ret = length;
+		goto out_release_op;
+	}
+
+	/*
+	 * Check to see if key length is > provided buffer size.
+	 */
+	if (length > size) {
+		ret = -ERANGE;
+		goto out_release_op;
+	}
+
+	memset(buffer, 0, size);
+	memcpy(buffer, new_op->downcall.resp.getxattr.val, length);
+	gossip_debug(GOSSIP_XATTR_DEBUG,
+	     "orangefs_inode_getxattr: inode %pU "
+	     "key %s key_sz %d, val_len %d\n",
+	     get_khandle_from_ino(inode),
+	     (char *)new_op->
+		upcall.req.getxattr.key,
+		     (int)new_op->
+		upcall.req.getxattr.key_sz,
+	     (int)ret);
+
+	ret = length;
+
+out_release_op:
+	op_release(new_op);
+out_unlock:
+	up_read(&orangefs_inode->xattr_sem);
+	return ret;
+}
+
+static int orangefs_inode_removexattr(struct inode *inode,
+			    const char *prefix,
+			    const char *name,
+			    int flags)
+{
+	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+	struct orangefs_kernel_op_s *new_op = NULL;
+	int ret = -ENOMEM;
+
+	down_write(&orangefs_inode->xattr_sem);
+	new_op = op_alloc(ORANGEFS_VFS_OP_REMOVEXATTR);
+	if (!new_op)
+		goto out_unlock;
+
+	new_op->upcall.req.removexattr.refn = orangefs_inode->refn;
+	/*
+	 * NOTE: Although keys are meant to be NULL terminated
+	 * textual strings, I am going to explicitly pass the
+	 * length just in case we change this later on...
+	 */
+	ret = snprintf((char *)new_op->upcall.req.removexattr.key,
+		       ORANGEFS_MAX_XATTR_NAMELEN,
+		       "%s%s",
+		       (prefix ? prefix : ""),
+		       name);
+	new_op->upcall.req.removexattr.key_sz = ret + 1;
+
+	gossip_debug(GOSSIP_XATTR_DEBUG,
+		     "orangefs_inode_removexattr: key %s, key_sz %d\n",
+		     (char *)new_op->upcall.req.removexattr.key,
+		     (int)new_op->upcall.req.removexattr.key_sz);
+
+	ret = service_operation(new_op,
+				"orangefs_inode_removexattr",
+				get_interruptible_flag(inode));
+	if (ret == -ENOENT) {
+		/*
+		 * Request to replace a non-existent attribute is an error.
+		 */
+		if (flags & XATTR_REPLACE)
+			ret = -ENODATA;
+		else
+			ret = 0;
+	}
+
+	gossip_debug(GOSSIP_XATTR_DEBUG,
+		     "orangefs_inode_removexattr: returning %d\n", ret);
+
+	op_release(new_op);
+out_unlock:
+	up_write(&orangefs_inode->xattr_sem);
+	return ret;
+}
+
+/*
+ * Tries to set an attribute for a given key on a file.
+ *
+ * Returns a -ve number on error and 0 on success.  Key is text, but value
+ * can be binary!
+ */
+int orangefs_inode_setxattr(struct inode *inode, const char *prefix,
+		const char *name, const void *value, size_t size, int flags)
+{
+	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+	struct orangefs_kernel_op_s *new_op;
+	int internal_flag = 0;
+	int ret = -ENOMEM;
+
+	gossip_debug(GOSSIP_XATTR_DEBUG,
+		     "%s: prefix %s, name %s, buffer_size %zd\n",
+		     __func__, prefix, name, size);
+
+	if (size < 0 ||
+	    size >= ORANGEFS_MAX_XATTR_VALUELEN ||
+	    flags < 0) {
+		gossip_err("orangefs_inode_setxattr: bogus values of size(%d), flags(%d)\n",
+			   (int)size,
+			   flags);
+		return -EINVAL;
+	}
+
+	if (name == NULL ||
+	    (size > 0 && value == NULL)) {
+		gossip_err("orangefs_inode_setxattr: bogus NULL pointers!\n");
+		return -EINVAL;
+	}
+
+	internal_flag = convert_to_internal_xattr_flags(flags);
+
+	if (prefix) {
+		if (strlen(name) + strlen(prefix) >= ORANGEFS_MAX_XATTR_NAMELEN) {
+			gossip_err
+			    ("orangefs_inode_setxattr: bogus key size (%d)\n",
+			     (int)(strlen(name) + strlen(prefix)));
+			return -EINVAL;
+		}
+	} else {
+		if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) {
+			gossip_err
+			    ("orangefs_inode_setxattr: bogus key size (%d)\n",
+			     (int)(strlen(name)));
+			return -EINVAL;
+		}
+	}
+
+	/* This is equivalent to a removexattr */
+	if (size == 0 && value == NULL) {
+		gossip_debug(GOSSIP_XATTR_DEBUG,
+			     "removing xattr (%s%s)\n",
+			     prefix,
+			     name);
+		return orangefs_inode_removexattr(inode, prefix, name, flags);
+	}
+
+	gossip_debug(GOSSIP_XATTR_DEBUG,
+		     "setxattr on inode %pU, name %s\n",
+		     get_khandle_from_ino(inode),
+		     name);
+
+	down_write(&orangefs_inode->xattr_sem);
+	new_op = op_alloc(ORANGEFS_VFS_OP_SETXATTR);
+	if (!new_op)
+		goto out_unlock;
+
+
+	new_op->upcall.req.setxattr.refn = orangefs_inode->refn;
+	new_op->upcall.req.setxattr.flags = internal_flag;
+	/*
+	 * NOTE: Although keys are meant to be NULL terminated textual
+	 * strings, I am going to explicitly pass the length just in
+	 * case we change this later on...
+	 */
+	ret = snprintf((char *)new_op->upcall.req.setxattr.keyval.key,
+		       ORANGEFS_MAX_XATTR_NAMELEN,
+		       "%s%s",
+		       prefix, name);
+	new_op->upcall.req.setxattr.keyval.key_sz = ret + 1;
+	memcpy(new_op->upcall.req.setxattr.keyval.val, value, size);
+	new_op->upcall.req.setxattr.keyval.val_sz = size;
+
+	gossip_debug(GOSSIP_XATTR_DEBUG,
+		     "orangefs_inode_setxattr: key %s, key_sz %d "
+		     " value size %zd\n",
+		     (char *)new_op->upcall.req.setxattr.keyval.key,
+		     (int)new_op->upcall.req.setxattr.keyval.key_sz,
+		     size);
+
+	ret = service_operation(new_op,
+				"orangefs_inode_setxattr",
+				get_interruptible_flag(inode));
+
+	gossip_debug(GOSSIP_XATTR_DEBUG,
+		     "orangefs_inode_setxattr: returning %d\n",
+		     ret);
+
+	/* when request is serviced properly, free req op struct */
+	op_release(new_op);
+out_unlock:
+	up_write(&orangefs_inode->xattr_sem);
+	return ret;
+}
+
+/*
+ * Tries to get a specified object's keys into a user-specified buffer of a
+ * given size.  Note that like the previous instances of xattr routines, this
+ * also allows you to pass in a NULL pointer and 0 size to probe the size for
+ * subsequent memory allocations. Thus our return value is always the size of
+ * all the keys unless there were errors in fetching the keys!
+ */
+ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+	struct inode *inode = dentry->d_inode;
+	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+	struct orangefs_kernel_op_s *new_op;
+	__u64 token = ORANGEFS_ITERATE_START;
+	ssize_t ret = -ENOMEM;
+	ssize_t total = 0;
+	int count_keys = 0;
+	int key_size;
+	int i = 0;
+	int returned_count = 0;
+
+	if (size > 0 && buffer == NULL) {
+		gossip_err("%s: bogus NULL pointers\n", __func__);
+		return -EINVAL;
+	}
+	if (size < 0) {
+		gossip_err("Invalid size (%d)\n", (int)size);
+		return -EINVAL;
+	}
+
+	down_read(&orangefs_inode->xattr_sem);
+	new_op = op_alloc(ORANGEFS_VFS_OP_LISTXATTR);
+	if (!new_op)
+		goto out_unlock;
+
+	if (buffer && size > 0)
+		memset(buffer, 0, size);
+
+try_again:
+	key_size = 0;
+	new_op->upcall.req.listxattr.refn = orangefs_inode->refn;
+	new_op->upcall.req.listxattr.token = token;
+	new_op->upcall.req.listxattr.requested_count =
+	    (size == 0) ? 0 : ORANGEFS_MAX_XATTR_LISTLEN;
+	ret = service_operation(new_op, __func__,
+				get_interruptible_flag(inode));
+	if (ret != 0)
+		goto done;
+
+	if (size == 0) {
+		/*
+		 * This is a bit of a big upper limit, but I did not want to
+		 * spend too much time getting this correct, since users end
+		 * up allocating memory rather than us...
+		 */
+		total = new_op->downcall.resp.listxattr.returned_count *
+			ORANGEFS_MAX_XATTR_NAMELEN;
+		goto done;
+	}
+
+	returned_count = new_op->downcall.resp.listxattr.returned_count;
+	if (returned_count < 0 ||
+	    returned_count >= ORANGEFS_MAX_XATTR_LISTLEN) {
+		gossip_err("%s: impossible value for returned_count:%d:\n",
+		__func__,
+		returned_count);
+		ret = -EIO;
+		goto done;
+	}
+
+	/*
+	 * Check to see how much can be fit in the buffer. Fit only whole keys.
+	 */
+	for (i = 0; i < returned_count; i++) {
+		if (new_op->downcall.resp.listxattr.lengths[i] < 0 ||
+		    new_op->downcall.resp.listxattr.lengths[i] >
+		    ORANGEFS_MAX_XATTR_NAMELEN) {
+			gossip_err("%s: impossible value for lengths[%d]\n",
+			    __func__,
+			    new_op->downcall.resp.listxattr.lengths[i]);
+			ret = -EIO;
+			goto done;
+		}
+		if (total + new_op->downcall.resp.listxattr.lengths[i] > size)
+			goto done;
+
+		/*
+		 * Since many dumb programs try to setxattr() on our reserved
+		 * xattrs this is a feeble attempt at defeating those by not
+		 * listing them in the output of listxattr.. sigh
+		 */
+		if (is_reserved_key(new_op->downcall.resp.listxattr.key +
+				    key_size,
+				    new_op->downcall.resp.
+					listxattr.lengths[i])) {
+			gossip_debug(GOSSIP_XATTR_DEBUG, "Copying key %d -> %s\n",
+					i, new_op->downcall.resp.listxattr.key +
+						key_size);
+			memcpy(buffer + total,
+				new_op->downcall.resp.listxattr.key + key_size,
+				new_op->downcall.resp.listxattr.lengths[i]);
+			total += new_op->downcall.resp.listxattr.lengths[i];
+			count_keys++;
+		} else {
+			gossip_debug(GOSSIP_XATTR_DEBUG, "[RESERVED] key %d -> %s\n",
+					i, new_op->downcall.resp.listxattr.key +
+						key_size);
+		}
+		key_size += new_op->downcall.resp.listxattr.lengths[i];
+	}
+
+	/*
+	 * Since the buffer was large enough, we might have to continue
+	 * fetching more keys!
+	 */
+	token = new_op->downcall.resp.listxattr.token;
+	if (token != ORANGEFS_ITERATE_END)
+		goto try_again;
+
+done:
+	gossip_debug(GOSSIP_XATTR_DEBUG, "%s: returning %d"
+		     " [size of buffer %ld] (filled in %d keys)\n",
+		     __func__,
+		     ret ? (int)ret : (int)total,
+		     (long)size,
+		     count_keys);
+	op_release(new_op);
+	if (ret == 0)
+		ret = total;
+out_unlock:
+	up_read(&orangefs_inode->xattr_sem);
+	return ret;
+}
+
+static int orangefs_xattr_set_default(const struct xattr_handler *handler,
+				      struct dentry *dentry,
+				      const char *name,
+				      const void *buffer,
+				      size_t size,
+				      int flags)
+{
+	return orangefs_inode_setxattr(dentry->d_inode,
+				    ORANGEFS_XATTR_NAME_DEFAULT_PREFIX,
+				    name,
+				    buffer,
+				    size,
+				    flags);
+}
+
+static int orangefs_xattr_get_default(const struct xattr_handler *handler,
+				      struct dentry *dentry,
+				      const char *name,
+				      void *buffer,
+				      size_t size)
+{
+	return orangefs_inode_getxattr(dentry->d_inode,
+				    ORANGEFS_XATTR_NAME_DEFAULT_PREFIX,
+				    name,
+				    buffer,
+				    size);
+
+}
+
+static int orangefs_xattr_set_trusted(const struct xattr_handler *handler,
+				     struct dentry *dentry,
+				     const char *name,
+				     const void *buffer,
+				     size_t size,
+				     int flags)
+{
+	return orangefs_inode_setxattr(dentry->d_inode,
+				    ORANGEFS_XATTR_NAME_TRUSTED_PREFIX,
+				    name,
+				    buffer,
+				    size,
+				    flags);
+}
+
+static int orangefs_xattr_get_trusted(const struct xattr_handler *handler,
+				      struct dentry *dentry,
+				      const char *name,
+				      void *buffer,
+				      size_t size)
+{
+	return orangefs_inode_getxattr(dentry->d_inode,
+				    ORANGEFS_XATTR_NAME_TRUSTED_PREFIX,
+				    name,
+				    buffer,
+				    size);
+}
+
+static struct xattr_handler orangefs_xattr_trusted_handler = {
+	.prefix = ORANGEFS_XATTR_NAME_TRUSTED_PREFIX,
+	.get = orangefs_xattr_get_trusted,
+	.set = orangefs_xattr_set_trusted,
+};
+
+static struct xattr_handler orangefs_xattr_default_handler = {
+	/*
+	 * NOTE: this is set to be the empty string.
+	 * so that all un-prefixed xattrs keys get caught
+	 * here!
+	 */
+	.prefix = ORANGEFS_XATTR_NAME_DEFAULT_PREFIX,
+	.get = orangefs_xattr_get_default,
+	.set = orangefs_xattr_set_default,
+};
+
+const struct xattr_handler *orangefs_xattr_handlers[] = {
+	&posix_acl_access_xattr_handler,
+	&posix_acl_default_xattr_handler,
+	&orangefs_xattr_trusted_handler,
+	&orangefs_xattr_default_handler,
+	NULL
+};

diff --git a/fs/pipe.c b/fs/pipe.c
index ab8dad3..0d3f516 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c

@@ -134,7 +134,7 @@
 	if (page_count(page) == 1 && !pipe->tmp_page)
 		pipe->tmp_page = page;
 	else
-		page_cache_release(page);
+		put_page(page);
 }
 
 /**
@@ -180,7 +180,7 @@
  */
 void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
 {
-	page_cache_get(buf->page);
+	get_page(buf->page);
 }
 EXPORT_SYMBOL(generic_pipe_buf_get);
 
@@ -211,7 +211,7 @@
 void generic_pipe_buf_release(struct pipe_inode_info *pipe,
 			      struct pipe_buffer *buf)
 {
-	page_cache_release(buf->page);
+	put_page(buf->page);
 }
 EXPORT_SYMBOL(generic_pipe_buf_release);
 

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 9df4316..229cb54 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c

@@ -553,7 +553,7 @@
 		if (radix_tree_exceptional_entry(page))
 			mss->swap += PAGE_SIZE;
 		else
-			page_cache_release(page);
+			put_page(page);
 
 		return;
 	}

diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 55bb57e..8afe10c 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c

@@ -279,12 +279,12 @@
 	if (!page)
 		return VM_FAULT_OOM;
 	if (!PageUptodate(page)) {
-		offset = (loff_t) index << PAGE_CACHE_SHIFT;
+		offset = (loff_t) index << PAGE_SHIFT;
 		buf = __va((page_to_pfn(page) << PAGE_SHIFT));
 		rc = __read_vmcore(buf, PAGE_SIZE, &offset, 0);
 		if (rc < 0) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			return (rc == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS;
 		}
 		SetPageUptodate(page);

diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index dc645b6..45d6110 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c

@@ -420,8 +420,8 @@
 	pstore_sb = sb;
 
 	sb->s_maxbytes		= MAX_LFS_FILESIZE;
-	sb->s_blocksize		= PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits	= PAGE_CACHE_SHIFT;
+	sb->s_blocksize		= PAGE_SIZE;
+	sb->s_blocksize_bits	= PAGE_SHIFT;
 	sb->s_magic		= PSTOREFS_MAGIC;
 	sb->s_op		= &pstore_ops;
 	sb->s_time_gran		= 1;

diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
index e1f3727..144ceda 100644
--- a/fs/qnx6/dir.c
+++ b/fs/qnx6/dir.c

@@ -35,9 +35,9 @@
 static unsigned last_entry(struct inode *inode, unsigned long page_nr)
 {
 	unsigned long last_byte = inode->i_size;
-	last_byte -= page_nr << PAGE_CACHE_SHIFT;
-	if (last_byte > PAGE_CACHE_SIZE)
-		last_byte = PAGE_CACHE_SIZE;
+	last_byte -= page_nr << PAGE_SHIFT;
+	if (last_byte > PAGE_SIZE)
+		last_byte = PAGE_SIZE;
 	return last_byte / QNX6_DIR_ENTRY_SIZE;
 }
 
@@ -47,9 +47,9 @@
 {
 	struct qnx6_sb_info *sbi = QNX6_SB(sb);
 	u32 s = fs32_to_cpu(sbi, de->de_long_inode); /* in block units */
-	u32 n = s >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits); /* in pages */
+	u32 n = s >> (PAGE_SHIFT - sb->s_blocksize_bits); /* in pages */
 	/* within page */
-	u32 offs = (s << sb->s_blocksize_bits) & ~PAGE_CACHE_MASK;
+	u32 offs = (s << sb->s_blocksize_bits) & ~PAGE_MASK;
 	struct address_space *mapping = sbi->longfile->i_mapping;
 	struct page *page = read_mapping_page(mapping, n, NULL);
 	if (IS_ERR(page))
@@ -115,8 +115,8 @@
 	struct qnx6_sb_info *sbi = QNX6_SB(s);
 	loff_t pos = ctx->pos & ~(QNX6_DIR_ENTRY_SIZE - 1);
 	unsigned long npages = dir_pages(inode);
-	unsigned long n = pos >> PAGE_CACHE_SHIFT;
-	unsigned start = (pos & ~PAGE_CACHE_MASK) / QNX6_DIR_ENTRY_SIZE;
+	unsigned long n = pos >> PAGE_SHIFT;
+	unsigned start = (pos & ~PAGE_MASK) / QNX6_DIR_ENTRY_SIZE;
 	bool done = false;
 
 	ctx->pos = pos;
@@ -131,7 +131,7 @@
 
 		if (IS_ERR(page)) {
 			pr_err("%s(): read failed\n", __func__);
-			ctx->pos = (n + 1) << PAGE_CACHE_SHIFT;
+			ctx->pos = (n + 1) << PAGE_SHIFT;
 			return PTR_ERR(page);
 		}
 		de = ((struct qnx6_dir_entry *)page_address(page)) + start;

diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 47bb1de..1192422 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c

@@ -542,8 +542,8 @@
 		iget_failed(inode);
 		return ERR_PTR(-EIO);
 	}
-	n = (ino - 1) >> (PAGE_CACHE_SHIFT - QNX6_INODE_SIZE_BITS);
-	offs = (ino - 1) & (~PAGE_CACHE_MASK >> QNX6_INODE_SIZE_BITS);
+	n = (ino - 1) >> (PAGE_SHIFT - QNX6_INODE_SIZE_BITS);
+	offs = (ino - 1) & (~PAGE_MASK >> QNX6_INODE_SIZE_BITS);
 	mapping = sbi->inodes->i_mapping;
 	page = read_mapping_page(mapping, n, NULL);
 	if (IS_ERR(page)) {

diff --git a/fs/qnx6/qnx6.h b/fs/qnx6/qnx6.h
index d3fb2b6..f23b5c4 100644
--- a/fs/qnx6/qnx6.h
+++ b/fs/qnx6/qnx6.h

@@ -128,7 +128,7 @@
 static inline void qnx6_put_page(struct page *page)
 {
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 extern unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index ba827da..ff21980 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c

@@ -2047,11 +2047,20 @@
 	struct quota_info *dqopt = sb_dqopt(sb);
 	int err;
 
-	if (!dqopt->ops[qid->type]->get_next_id)
-		return -ENOSYS;
+	mutex_lock(&dqopt->dqonoff_mutex);
+	if (!sb_has_quota_active(sb, qid->type)) {
+		err = -ESRCH;
+		goto out;
+	}
+	if (!dqopt->ops[qid->type]->get_next_id) {
+		err = -ENOSYS;
+		goto out;
+	}
 	mutex_lock(&dqopt->dqio_mutex);
 	err = dqopt->ops[qid->type]->get_next_id(sb, qid);
 	mutex_unlock(&dqopt->dqio_mutex);
+out:
+	mutex_unlock(&dqopt->dqonoff_mutex);
 
 	return err;
 }

diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 38981b0..1ab6e6c 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c

@@ -223,8 +223,8 @@
 		return err;
 
 	sb->s_maxbytes		= MAX_LFS_FILESIZE;
-	sb->s_blocksize		= PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits	= PAGE_CACHE_SHIFT;
+	sb->s_blocksize		= PAGE_SIZE;
+	sb->s_blocksize_bits	= PAGE_SHIFT;
 	sb->s_magic		= RAMFS_MAGIC;
 	sb->s_op		= &ramfs_ops;
 	sb->s_time_gran		= 1;

diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 9424a4b..3897737 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c

@@ -180,11 +180,11 @@
 	int partial = 0;
 	unsigned blocksize;
 	struct buffer_head *bh, *head;
-	unsigned long i_size_index = inode->i_size >> PAGE_CACHE_SHIFT;
+	unsigned long i_size_index = inode->i_size >> PAGE_SHIFT;
 	int new;
 	int logit = reiserfs_file_data_log(inode);
 	struct super_block *s = inode->i_sb;
-	int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
+	int bh_per_page = PAGE_SIZE / s->s_blocksize;
 	struct reiserfs_transaction_handle th;
 	int ret = 0;
 

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index ae9e5b3..d5c2e9c 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c

@@ -386,7 +386,7 @@
 		goto finished;
 	}
 	/* read file tail into part of page */
-	offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1);
+	offset = (cpu_key_k_offset(&key) - 1) & (PAGE_SIZE - 1);
 	copy_item_head(&tmp_ih, ih);
 
 	/*
@@ -587,10 +587,10 @@
 		return -EIO;
 
 	/* always try to read until the end of the block */
-	tail_start = tail_offset & (PAGE_CACHE_SIZE - 1);
+	tail_start = tail_offset & (PAGE_SIZE - 1);
 	tail_end = (tail_start | (bh_result->b_size - 1)) + 1;
 
-	index = tail_offset >> PAGE_CACHE_SHIFT;
+	index = tail_offset >> PAGE_SHIFT;
 	/*
 	 * hole_page can be zero in case of direct_io, we are sure
 	 * that we cannot get here if we write with O_DIRECT into tail page
@@ -629,7 +629,7 @@
 unlock:
 	if (tail_page != hole_page) {
 		unlock_page(tail_page);
-		page_cache_release(tail_page);
+		put_page(tail_page);
 	}
 out:
 	return retval;
@@ -2189,11 +2189,11 @@
 	 * we want the page with the last byte in the file,
 	 * not the page that will hold the next byte for appending
 	 */
-	unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
+	unsigned long index = (inode->i_size - 1) >> PAGE_SHIFT;
 	unsigned long pos = 0;
 	unsigned long start = 0;
 	unsigned long blocksize = inode->i_sb->s_blocksize;
-	unsigned long offset = (inode->i_size) & (PAGE_CACHE_SIZE - 1);
+	unsigned long offset = (inode->i_size) & (PAGE_SIZE - 1);
 	struct buffer_head *bh;
 	struct buffer_head *head;
 	struct page *page;
@@ -2251,7 +2251,7 @@
 
 unlock:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return error;
 }
 
@@ -2265,7 +2265,7 @@
 {
 	struct reiserfs_transaction_handle th;
 	/* we want the offset for the first byte after the end of the file */
-	unsigned long offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
+	unsigned long offset = inode->i_size & (PAGE_SIZE - 1);
 	unsigned blocksize = inode->i_sb->s_blocksize;
 	unsigned length;
 	struct page *page = NULL;
@@ -2345,7 +2345,7 @@
 			}
 		}
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	reiserfs_write_unlock(inode->i_sb);
@@ -2354,7 +2354,7 @@
 out:
 	if (page) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	reiserfs_write_unlock(inode->i_sb);
@@ -2426,7 +2426,7 @@
 	} else if (is_direct_le_ih(ih)) {
 		char *p;
 		p = page_address(bh_result->b_page);
-		p += (byte_offset - 1) & (PAGE_CACHE_SIZE - 1);
+		p += (byte_offset - 1) & (PAGE_SIZE - 1);
 		copy_size = ih_item_len(ih) - pos_in_item;
 
 		fs_gen = get_generation(inode->i_sb);
@@ -2525,7 +2525,7 @@
 				    struct writeback_control *wbc)
 {
 	struct inode *inode = page->mapping->host;
-	unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+	unsigned long end_index = inode->i_size >> PAGE_SHIFT;
 	int error = 0;
 	unsigned long block;
 	sector_t last_block;
@@ -2535,7 +2535,7 @@
 	int checked = PageChecked(page);
 	struct reiserfs_transaction_handle th;
 	struct super_block *s = inode->i_sb;
-	int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
+	int bh_per_page = PAGE_SIZE / s->s_blocksize;
 	th.t_trans_id = 0;
 
 	/* no logging allowed when nonblocking or from PF_MEMALLOC */
@@ -2564,16 +2564,16 @@
 	if (page->index >= end_index) {
 		unsigned last_offset;
 
-		last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
+		last_offset = inode->i_size & (PAGE_SIZE - 1);
 		/* no file contents in this page */
 		if (page->index >= end_index + 1 || !last_offset) {
 			unlock_page(page);
 			return 0;
 		}
-		zero_user_segment(page, last_offset, PAGE_CACHE_SIZE);
+		zero_user_segment(page, last_offset, PAGE_SIZE);
 	}
 	bh = head;
-	block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits);
+	block = page->index << (PAGE_SHIFT - s->s_blocksize_bits);
 	last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
 	/* first map all the buffers, logging any direct items we find */
 	do {
@@ -2774,7 +2774,7 @@
 		*fsdata = (void *)(unsigned long)flags;
 	}
 
-	index = pos >> PAGE_CACHE_SHIFT;
+	index = pos >> PAGE_SHIFT;
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
@@ -2822,7 +2822,7 @@
 	}
 	if (ret) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		/* Truncate allocated blocks */
 		reiserfs_truncate_failed_write(inode);
 	}
@@ -2909,7 +2909,7 @@
 	else
 		th = NULL;
 
-	start = pos & (PAGE_CACHE_SIZE - 1);
+	start = pos & (PAGE_SIZE - 1);
 	if (unlikely(copied < len)) {
 		if (!PageUptodate(page))
 			copied = 0;
@@ -2974,7 +2974,7 @@
 	if (locked)
 		reiserfs_write_unlock(inode->i_sb);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	if (pos + len > inode->i_size)
 		reiserfs_truncate_failed_write(inode);
@@ -2996,7 +2996,7 @@
 			  unsigned from, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
-	loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
+	loff_t pos = ((loff_t) page->index << PAGE_SHIFT) + to;
 	int ret = 0;
 	int update_sd = 0;
 	struct reiserfs_transaction_handle *th = NULL;
@@ -3181,7 +3181,7 @@
 	struct inode *inode = page->mapping->host;
 	unsigned int curr_off = 0;
 	unsigned int stop = offset + length;
-	int partial_page = (offset || length < PAGE_CACHE_SIZE);
+	int partial_page = (offset || length < PAGE_SIZE);
 	int ret = 1;
 
 	BUG_ON(!PageLocked(page));

diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 036a1fc..57045f4 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c

@@ -203,7 +203,7 @@
 	 * __reiserfs_write_begin on that page.  This will force a
 	 * reiserfs_get_block to unpack the tail for us.
 	 */
-	index = inode->i_size >> PAGE_CACHE_SHIFT;
+	index = inode->i_size >> PAGE_SHIFT;
 	mapping = inode->i_mapping;
 	page = grab_cache_page(mapping, index);
 	retval = -ENOMEM;
@@ -221,7 +221,7 @@
 
 out_unlock:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 out:
 	inode_unlock(inode);

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 44c2bdc..2ace90e 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c

@@ -599,18 +599,18 @@
  * This does a check to see if the buffer belongs to one of these
  * lost pages before doing the final put_bh.  If page->mapping was
  * null, it tries to free buffers on the page, which should make the
- * final page_cache_release drop the page from the lru.
+ * final put_page drop the page from the lru.
  */
 static void release_buffer_page(struct buffer_head *bh)
 {
 	struct page *page = bh->b_page;
 	if (!page->mapping && trylock_page(page)) {
-		page_cache_get(page);
+		get_page(page);
 		put_bh(bh);
 		if (!page->mapping)
 			try_to_free_buffers(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	} else {
 		put_bh(bh);
 	}

diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 24cbe01..5feacd6 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c

@@ -1342,7 +1342,7 @@
 		 */
 
 		data = kmap_atomic(un_bh->b_page);
-		off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
+		off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_SIZE - 1));
 		memcpy(data + off,
 		       ih_item_body(PATH_PLAST_BUFFER(path), &s_ih),
 		       ret_value);
@@ -1511,7 +1511,7 @@
 
 	if (page) {
 		if (page_has_buffers(page)) {
-			tail_index = pos & (PAGE_CACHE_SIZE - 1);
+			tail_index = pos & (PAGE_SIZE - 1);
 			cur_index = 0;
 			head = page_buffers(page);
 			bh = head;

diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c
index f41e19b..2d5489b 100644
--- a/fs/reiserfs/tail_conversion.c
+++ b/fs/reiserfs/tail_conversion.c

@@ -151,7 +151,7 @@
 	 */
 	if (up_to_date_bh) {
 		unsigned pgoff =
-		    (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1);
+		    (tail_offset + total_tail - 1) & (PAGE_SIZE - 1);
 		char *kaddr = kmap_atomic(up_to_date_bh->b_page);
 		memset(kaddr + pgoff, 0, blk_size - total_tail);
 		kunmap_atomic(kaddr);
@@ -271,7 +271,7 @@
 	 * the page was locked and this part of the page was up to date when
 	 * indirect2direct was called, so we know the bytes are still valid
 	 */
-	tail = tail + (pos & (PAGE_CACHE_SIZE - 1));
+	tail = tail + (pos & (PAGE_SIZE - 1));
 
 	PATH_LAST_POSITION(path)++;
 

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 57e0b23..28f5f8b 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c

@@ -415,7 +415,7 @@
 static inline void reiserfs_put_page(struct page *page)
 {
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 static struct page *reiserfs_get_page(struct inode *dir, size_t n)
@@ -427,7 +427,7 @@
 	 * and an unlink/rmdir has just occurred - GFP_NOFS avoids this
 	 */
 	mapping_set_gfp_mask(mapping, GFP_NOFS);
-	page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL);
+	page = read_mapping_page(mapping, n >> PAGE_SHIFT, NULL);
 	if (!IS_ERR(page)) {
 		kmap(page);
 		if (PageError(page))
@@ -526,10 +526,10 @@
 	while (buffer_pos < buffer_size || buffer_pos == 0) {
 		size_t chunk;
 		size_t skip = 0;
-		size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1));
+		size_t page_offset = (file_pos & (PAGE_SIZE - 1));
 
-		if (buffer_size - buffer_pos > PAGE_CACHE_SIZE)
-			chunk = PAGE_CACHE_SIZE;
+		if (buffer_size - buffer_pos > PAGE_SIZE)
+			chunk = PAGE_SIZE;
 		else
 			chunk = buffer_size - buffer_pos;
 
@@ -546,8 +546,8 @@
 			struct reiserfs_xattr_header *rxh;
 
 			skip = file_pos = sizeof(struct reiserfs_xattr_header);
-			if (chunk + skip > PAGE_CACHE_SIZE)
-				chunk = PAGE_CACHE_SIZE - skip;
+			if (chunk + skip > PAGE_SIZE)
+				chunk = PAGE_SIZE - skip;
 			rxh = (struct reiserfs_xattr_header *)data;
 			rxh->h_magic = cpu_to_le32(REISERFS_XATTR_MAGIC);
 			rxh->h_hash = cpu_to_le32(xahash);
@@ -675,8 +675,8 @@
 		char *data;
 		size_t skip = 0;
 
-		if (isize - file_pos > PAGE_CACHE_SIZE)
-			chunk = PAGE_CACHE_SIZE;
+		if (isize - file_pos > PAGE_SIZE)
+			chunk = PAGE_SIZE;
 		else
 			chunk = isize - file_pos;
 

diff --git a/fs/splice.c b/fs/splice.c
index 9947b5c..b018eb4 100644
--- a/fs/splice.c
+++ b/fs/splice.c

@@ -88,7 +88,7 @@
 static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
 					struct pipe_buffer *buf)
 {
-	page_cache_release(buf->page);
+	put_page(buf->page);
 	buf->flags &= ~PIPE_BUF_FLAG_LRU;
 }
 
@@ -268,7 +268,7 @@
 
 void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
 {
-	page_cache_release(spd->pages[i]);
+	put_page(spd->pages[i]);
 }
 
 /*
@@ -328,9 +328,9 @@
 	if (splice_grow_spd(pipe, &spd))
 		return -ENOMEM;
 
-	index = *ppos >> PAGE_CACHE_SHIFT;
-	loff = *ppos & ~PAGE_CACHE_MASK;
-	req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	index = *ppos >> PAGE_SHIFT;
+	loff = *ppos & ~PAGE_MASK;
+	req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	nr_pages = min(req_pages, spd.nr_pages_max);
 
 	/*
@@ -365,7 +365,7 @@
 			error = add_to_page_cache_lru(page, mapping, index,
 				   mapping_gfp_constraint(mapping, GFP_KERNEL));
 			if (unlikely(error)) {
-				page_cache_release(page);
+				put_page(page);
 				if (error == -EEXIST)
 					continue;
 				break;
@@ -385,7 +385,7 @@
 	 * Now loop over the map and see if we need to start IO on any
 	 * pages, fill in the partial map, etc.
 	 */
-	index = *ppos >> PAGE_CACHE_SHIFT;
+	index = *ppos >> PAGE_SHIFT;
 	nr_pages = spd.nr_pages;
 	spd.nr_pages = 0;
 	for (page_nr = 0; page_nr < nr_pages; page_nr++) {
@@ -397,7 +397,7 @@
 		/*
 		 * this_len is the max we'll use from this page
 		 */
-		this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
+		this_len = min_t(unsigned long, len, PAGE_SIZE - loff);
 		page = spd.pages[page_nr];
 
 		if (PageReadahead(page))
@@ -426,7 +426,7 @@
 					error = -ENOMEM;
 					break;
 				}
-				page_cache_release(spd.pages[page_nr]);
+				put_page(spd.pages[page_nr]);
 				spd.pages[page_nr] = page;
 			}
 			/*
@@ -456,7 +456,7 @@
 		 * i_size must be checked after PageUptodate.
 		 */
 		isize = i_size_read(mapping->host);
-		end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+		end_index = (isize - 1) >> PAGE_SHIFT;
 		if (unlikely(!isize || index > end_index))
 			break;
 
@@ -470,7 +470,7 @@
 			/*
 			 * max good bytes in this page
 			 */
-			plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
+			plen = ((isize - 1) & ~PAGE_MASK) + 1;
 			if (plen <= loff)
 				break;
 
@@ -494,8 +494,8 @@
 	 * we got, 'nr_pages' is how many pages are in the map.
 	 */
 	while (page_nr < nr_pages)
-		page_cache_release(spd.pages[page_nr++]);
-	in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
+		put_page(spd.pages[page_nr++]);
+	in->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
 
 	if (spd.nr_pages)
 		error = splice_to_pipe(pipe, &spd);
@@ -636,8 +636,8 @@
 			goto shrink_ret;
 	}
 
-	offset = *ppos & ~PAGE_CACHE_MASK;
-	nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	offset = *ppos & ~PAGE_MASK;
+	nr_pages = (len + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
 	for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) {
 		struct page *page;
@@ -647,7 +647,7 @@
 		if (!page)
 			goto err;
 
-		this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset);
+		this_len = min_t(size_t, len, PAGE_SIZE - offset);
 		vec[i].iov_base = (void __user *) page_address(page);
 		vec[i].iov_len = this_len;
 		spd.pages[i] = page;

diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 0cea9b9..2c26184 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c

@@ -181,11 +181,11 @@
 			in = min(bytes, msblk->devblksize - offset);
 			bytes -= in;
 			while (in) {
-				if (pg_offset == PAGE_CACHE_SIZE) {
+				if (pg_offset == PAGE_SIZE) {
 					data = squashfs_next_page(output);
 					pg_offset = 0;
 				}
-				avail = min_t(int, in, PAGE_CACHE_SIZE -
+				avail = min_t(int, in, PAGE_SIZE -
 						pg_offset);
 				memcpy(data + pg_offset, bh[k]->b_data + offset,
 						avail);

diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 1cb70a0..23813c0 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c

@@ -30,7 +30,7 @@
  * access the metadata and fragment caches.
  *
  * To avoid out of memory and fragmentation issues with vmalloc the cache
- * uses sequences of kmalloced PAGE_CACHE_SIZE buffers.
+ * uses sequences of kmalloced PAGE_SIZE buffers.
  *
  * It should be noted that the cache is not used for file datablocks, these
  * are decompressed and cached in the page-cache in the normal way.  The
@@ -231,7 +231,7 @@
 /*
  * Initialise cache allocating the specified number of entries, each of
  * size block_size.  To avoid vmalloc fragmentation issues each entry
- * is allocated as a sequence of kmalloced PAGE_CACHE_SIZE buffers.
+ * is allocated as a sequence of kmalloced PAGE_SIZE buffers.
  */
 struct squashfs_cache *squashfs_cache_init(char *name, int entries,
 	int block_size)
@@ -255,7 +255,7 @@
 	cache->unused = entries;
 	cache->entries = entries;
 	cache->block_size = block_size;
-	cache->pages = block_size >> PAGE_CACHE_SHIFT;
+	cache->pages = block_size >> PAGE_SHIFT;
 	cache->pages = cache->pages ? cache->pages : 1;
 	cache->name = name;
 	cache->num_waiters = 0;
@@ -275,7 +275,7 @@
 		}
 
 		for (j = 0; j < cache->pages; j++) {
-			entry->data[j] = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+			entry->data[j] = kmalloc(PAGE_SIZE, GFP_KERNEL);
 			if (entry->data[j] == NULL) {
 				ERROR("Failed to allocate %s buffer\n", name);
 				goto cleanup;
@@ -314,10 +314,10 @@
 		return min(length, entry->length - offset);
 
 	while (offset < entry->length) {
-		void *buff = entry->data[offset / PAGE_CACHE_SIZE]
-				+ (offset % PAGE_CACHE_SIZE);
+		void *buff = entry->data[offset / PAGE_SIZE]
+				+ (offset % PAGE_SIZE);
 		int bytes = min_t(int, entry->length - offset,
-				PAGE_CACHE_SIZE - (offset % PAGE_CACHE_SIZE));
+				PAGE_SIZE - (offset % PAGE_SIZE));
 
 		if (bytes >= remaining) {
 			memcpy(buffer, buff, remaining);
@@ -415,7 +415,7 @@
  */
 void *squashfs_read_table(struct super_block *sb, u64 block, int length)
 {
-	int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	int pages = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	int i, res;
 	void *table, *buffer, **data;
 	struct squashfs_page_actor *actor;
@@ -436,7 +436,7 @@
 		goto failed2;
 	}
 
-	for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
+	for (i = 0; i < pages; i++, buffer += PAGE_SIZE)
 		data[i] = buffer;
 
 	res = squashfs_read_data(sb, block, length |

diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index e9034bf..d2bc136 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c

@@ -102,7 +102,7 @@
 	 * Read decompressor specific options from file system if present
 	 */
 	if (SQUASHFS_COMP_OPTS(flags)) {
-		buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+		buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
 		if (buffer == NULL) {
 			comp_opts = ERR_PTR(-ENOMEM);
 			goto out;

diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index e5c9689..13d8094 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c

@@ -175,7 +175,7 @@
 {
 	int err, i;
 	long long block = 0;
-	__le32 *blist = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+	__le32 *blist = kmalloc(PAGE_SIZE, GFP_KERNEL);
 
 	if (blist == NULL) {
 		ERROR("read_indexes: Failed to allocate block_list\n");
@@ -183,7 +183,7 @@
 	}
 
 	while (n) {
-		int blocks = min_t(int, n, PAGE_CACHE_SIZE >> 2);
+		int blocks = min_t(int, n, PAGE_SIZE >> 2);
 
 		err = squashfs_read_metadata(sb, blist, start_block,
 				offset, blocks << 2);
@@ -377,19 +377,19 @@
 	struct inode *inode = page->mapping->host;
 	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
 	void *pageaddr;
-	int i, mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+	int i, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
 	int start_index = page->index & ~mask, end_index = start_index | mask;
 
 	/*
 	 * Loop copying datablock into pages.  As the datablock likely covers
-	 * many PAGE_CACHE_SIZE pages (default block size is 128 KiB) explicitly
+	 * many PAGE_SIZE pages (default block size is 128 KiB) explicitly
 	 * grab the pages from the page cache, except for the page that we've
 	 * been called to fill.
 	 */
 	for (i = start_index; i <= end_index && bytes > 0; i++,
-			bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
+			bytes -= PAGE_SIZE, offset += PAGE_SIZE) {
 		struct page *push_page;
-		int avail = buffer ? min_t(int, bytes, PAGE_CACHE_SIZE) : 0;
+		int avail = buffer ? min_t(int, bytes, PAGE_SIZE) : 0;
 
 		TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail);
 
@@ -404,14 +404,14 @@
 
 		pageaddr = kmap_atomic(push_page);
 		squashfs_copy_data(pageaddr, buffer, offset, avail);
-		memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
+		memset(pageaddr + avail, 0, PAGE_SIZE - avail);
 		kunmap_atomic(pageaddr);
 		flush_dcache_page(push_page);
 		SetPageUptodate(push_page);
 skip_page:
 		unlock_page(push_page);
 		if (i != page->index)
-			page_cache_release(push_page);
+			put_page(push_page);
 	}
 }
 
@@ -454,7 +454,7 @@
 {
 	struct inode *inode = page->mapping->host;
 	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
-	int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
+	int index = page->index >> (msblk->block_log - PAGE_SHIFT);
 	int file_end = i_size_read(inode) >> msblk->block_log;
 	int res;
 	void *pageaddr;
@@ -462,8 +462,8 @@
 	TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
 				page->index, squashfs_i(inode)->start);
 
-	if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
-					PAGE_CACHE_SHIFT))
+	if (page->index >= ((i_size_read(inode) + PAGE_SIZE - 1) >>
+					PAGE_SHIFT))
 		goto out;
 
 	if (index < file_end || squashfs_i(inode)->fragment_block ==
@@ -487,7 +487,7 @@
 	SetPageError(page);
 out:
 	pageaddr = kmap_atomic(page);
-	memset(pageaddr, 0, PAGE_CACHE_SIZE);
+	memset(pageaddr, 0, PAGE_SIZE);
 	kunmap_atomic(pageaddr);
 	flush_dcache_page(page);
 	if (!PageError(page))

diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index 43e7a7e..cb485d8 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c

@@ -30,8 +30,8 @@
 	struct inode *inode = target_page->mapping->host;
 	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
 
-	int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
-	int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+	int file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT;
+	int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
 	int start_index = target_page->index & ~mask;
 	int end_index = start_index | mask;
 	int i, n, pages, missing_pages, bytes, res = -ENOMEM;
@@ -68,7 +68,7 @@
 
 		if (PageUptodate(page[i])) {
 			unlock_page(page[i]);
-			page_cache_release(page[i]);
+			put_page(page[i]);
 			page[i] = NULL;
 			missing_pages++;
 		}
@@ -96,10 +96,10 @@
 		goto mark_errored;
 
 	/* Last page may have trailing bytes not filled */
-	bytes = res % PAGE_CACHE_SIZE;
+	bytes = res % PAGE_SIZE;
 	if (bytes) {
 		pageaddr = kmap_atomic(page[pages - 1]);
-		memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
+		memset(pageaddr + bytes, 0, PAGE_SIZE - bytes);
 		kunmap_atomic(pageaddr);
 	}
 
@@ -109,7 +109,7 @@
 		SetPageUptodate(page[i]);
 		unlock_page(page[i]);
 		if (page[i] != target_page)
-			page_cache_release(page[i]);
+			put_page(page[i]);
 	}
 
 	kfree(actor);
@@ -127,7 +127,7 @@
 		flush_dcache_page(page[i]);
 		SetPageError(page[i]);
 		unlock_page(page[i]);
-		page_cache_release(page[i]);
+		put_page(page[i]);
 	}
 
 out:
@@ -153,21 +153,21 @@
 	}
 
 	for (n = 0; n < pages && bytes > 0; n++,
-			bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
-		int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
+			bytes -= PAGE_SIZE, offset += PAGE_SIZE) {
+		int avail = min_t(int, bytes, PAGE_SIZE);
 
 		if (page[n] == NULL)
 			continue;
 
 		pageaddr = kmap_atomic(page[n]);
 		squashfs_copy_data(pageaddr, buffer, offset, avail);
-		memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
+		memset(pageaddr + avail, 0, PAGE_SIZE - avail);
 		kunmap_atomic(pageaddr);
 		flush_dcache_page(page[n]);
 		SetPageUptodate(page[n]);
 		unlock_page(page[n]);
 		if (page[n] != target_page)
-			page_cache_release(page[n]);
+			put_page(page[n]);
 	}
 
 out:

diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index c31e2bc..ff4468b 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c

@@ -117,13 +117,13 @@
 	data = squashfs_first_page(output);
 	buff = stream->output;
 	while (data) {
-		if (bytes <= PAGE_CACHE_SIZE) {
+		if (bytes <= PAGE_SIZE) {
 			memcpy(data, buff, bytes);
 			break;
 		}
-		memcpy(data, buff, PAGE_CACHE_SIZE);
-		buff += PAGE_CACHE_SIZE;
-		bytes -= PAGE_CACHE_SIZE;
+		memcpy(data, buff, PAGE_SIZE);
+		buff += PAGE_SIZE;
+		bytes -= PAGE_SIZE;
 		data = squashfs_next_page(output);
 	}
 	squashfs_finish_page(output);

diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 244b9fb..934c17e 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c

@@ -102,13 +102,13 @@
 	data = squashfs_first_page(output);
 	buff = stream->output;
 	while (data) {
-		if (bytes <= PAGE_CACHE_SIZE) {
+		if (bytes <= PAGE_SIZE) {
 			memcpy(data, buff, bytes);
 			break;
 		} else {
-			memcpy(data, buff, PAGE_CACHE_SIZE);
-			buff += PAGE_CACHE_SIZE;
-			bytes -= PAGE_CACHE_SIZE;
+			memcpy(data, buff, PAGE_SIZE);
+			buff += PAGE_SIZE;
+			bytes -= PAGE_SIZE;
 			data = squashfs_next_page(output);
 		}
 	}

diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
index 5a1c11f..9b7b1b6 100644
--- a/fs/squashfs/page_actor.c
+++ b/fs/squashfs/page_actor.c

@@ -48,7 +48,7 @@
 	if (actor == NULL)
 		return NULL;
 
-	actor->length = length ? : pages * PAGE_CACHE_SIZE;
+	actor->length = length ? : pages * PAGE_SIZE;
 	actor->buffer = buffer;
 	actor->pages = pages;
 	actor->next_page = 0;
@@ -88,7 +88,7 @@
 	if (actor == NULL)
 		return NULL;
 
-	actor->length = length ? : pages * PAGE_CACHE_SIZE;
+	actor->length = length ? : pages * PAGE_SIZE;
 	actor->page = page;
 	actor->pages = pages;
 	actor->next_page = 0;

diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
index 26dd820..98537ea 100644
--- a/fs/squashfs/page_actor.h
+++ b/fs/squashfs/page_actor.h

@@ -24,7 +24,7 @@
 	if (actor == NULL)
 		return NULL;
 
-	actor->length = length ? : pages * PAGE_CACHE_SIZE;
+	actor->length = length ? : pages * PAGE_SIZE;
 	actor->page = page;
 	actor->pages = pages;
 	actor->next_page = 0;

diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 5e79bfa..cf01e15 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c

@@ -152,7 +152,7 @@
 	 * Check the system page size is not larger than the filesystem
 	 * block size (by default 128K).  This is currently not supported.
 	 */
-	if (PAGE_CACHE_SIZE > msblk->block_size) {
+	if (PAGE_SIZE > msblk->block_size) {
 		ERROR("Page size > filesystem block size (%d).  This is "
 			"currently not supported!\n", msblk->block_size);
 		goto failed_mount;

diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c
index dbcc2f5..d688ef4 100644
--- a/fs/squashfs/symlink.c
+++ b/fs/squashfs/symlink.c

@@ -48,10 +48,10 @@
 	struct inode *inode = page->mapping->host;
 	struct super_block *sb = inode->i_sb;
 	struct squashfs_sb_info *msblk = sb->s_fs_info;
-	int index = page->index << PAGE_CACHE_SHIFT;
+	int index = page->index << PAGE_SHIFT;
 	u64 block = squashfs_i(inode)->start;
 	int offset = squashfs_i(inode)->offset;
-	int length = min_t(int, i_size_read(inode) - index, PAGE_CACHE_SIZE);
+	int length = min_t(int, i_size_read(inode) - index, PAGE_SIZE);
 	int bytes, copied;
 	void *pageaddr;
 	struct squashfs_cache_entry *entry;
@@ -94,7 +94,7 @@
 		copied = squashfs_copy_data(pageaddr + bytes, entry, offset,
 								length - bytes);
 		if (copied == length - bytes)
-			memset(pageaddr + length, 0, PAGE_CACHE_SIZE - length);
+			memset(pageaddr + length, 0, PAGE_SIZE - length);
 		else
 			block = entry->next_index;
 		kunmap_atomic(pageaddr);

diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index c609624..6bfaef7 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c

@@ -141,7 +141,7 @@
 	stream->buf.in_pos = 0;
 	stream->buf.in_size = 0;
 	stream->buf.out_pos = 0;
-	stream->buf.out_size = PAGE_CACHE_SIZE;
+	stream->buf.out_size = PAGE_SIZE;
 	stream->buf.out = squashfs_first_page(output);
 
 	do {
@@ -158,7 +158,7 @@
 			stream->buf.out = squashfs_next_page(output);
 			if (stream->buf.out != NULL) {
 				stream->buf.out_pos = 0;
-				total += PAGE_CACHE_SIZE;
+				total += PAGE_SIZE;
 			}
 		}
 

diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 8727cab..2ec24d1 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c

@@ -69,7 +69,7 @@
 	int zlib_err, zlib_init = 0, k = 0;
 	z_stream *stream = strm;
 
-	stream->avail_out = PAGE_CACHE_SIZE;
+	stream->avail_out = PAGE_SIZE;
 	stream->next_out = squashfs_first_page(output);
 	stream->avail_in = 0;
 
@@ -85,7 +85,7 @@
 		if (stream->avail_out == 0) {
 			stream->next_out = squashfs_next_page(output);
 			if (stream->next_out != NULL)
-				stream->avail_out = PAGE_CACHE_SIZE;
+				stream->avail_out = PAGE_SIZE;
 		}
 
 		if (!zlib_init) {

diff --git a/fs/sync.c b/fs/sync.c
index dd5d171..2a54c1f 100644
--- a/fs/sync.c
+++ b/fs/sync.c

@@ -302,7 +302,7 @@
 		goto out;
 
 	if (sizeof(pgoff_t) == 4) {
-		if (offset >= (0x100000000ULL << PAGE_CACHE_SHIFT)) {
+		if (offset >= (0x100000000ULL << PAGE_SHIFT)) {
 			/*
 			 * The range starts outside a 32 bit machine's
 			 * pagecache addressing capabilities.  Let it "succeed"
@@ -310,7 +310,7 @@
 			ret = 0;
 			goto out;
 		}
-		if (endbyte >= (0x100000000ULL << PAGE_CACHE_SHIFT)) {
+		if (endbyte >= (0x100000000ULL << PAGE_SHIFT)) {
 			/*
 			 * Out to EOF
 			 */

diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 63c1bcb..c0f0a3e 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c

@@ -30,7 +30,7 @@
 static inline void dir_put_page(struct page *page)
 {
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len)
@@ -73,8 +73,8 @@
 	if (pos >= inode->i_size)
 		return 0;
 
-	offset = pos & ~PAGE_CACHE_MASK;
-	n = pos >> PAGE_CACHE_SHIFT;
+	offset = pos & ~PAGE_MASK;
+	n = pos >> PAGE_SHIFT;
 
 	for ( ; n < npages; n++, offset = 0) {
 		char *kaddr, *limit;
@@ -85,7 +85,7 @@
 			continue;
 		kaddr = (char *)page_address(page);
 		de = (struct sysv_dir_entry *)(kaddr+offset);
-		limit = kaddr + PAGE_CACHE_SIZE - SYSV_DIRSIZE;
+		limit = kaddr + PAGE_SIZE - SYSV_DIRSIZE;
 		for ( ;(char*)de <= limit; de++, ctx->pos += sizeof(*de)) {
 			char *name = de->name;
 
@@ -146,7 +146,7 @@
 		if (!IS_ERR(page)) {
 			kaddr = (char*)page_address(page);
 			de = (struct sysv_dir_entry *) kaddr;
-			kaddr += PAGE_CACHE_SIZE - SYSV_DIRSIZE;
+			kaddr += PAGE_SIZE - SYSV_DIRSIZE;
 			for ( ; (char *) de <= kaddr ; de++) {
 				if (!de->inode)
 					continue;
@@ -190,7 +190,7 @@
 			goto out;
 		kaddr = (char*)page_address(page);
 		de = (struct sysv_dir_entry *)kaddr;
-		kaddr += PAGE_CACHE_SIZE - SYSV_DIRSIZE;
+		kaddr += PAGE_SIZE - SYSV_DIRSIZE;
 		while ((char *)de <= kaddr) {
 			if (!de->inode)
 				goto got_it;
@@ -261,7 +261,7 @@
 	kmap(page);
 
 	base = (char*)page_address(page);
-	memset(base, 0, PAGE_CACHE_SIZE);
+	memset(base, 0, PAGE_SIZE);
 
 	de = (struct sysv_dir_entry *) base;
 	de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
@@ -273,7 +273,7 @@
 	kunmap(page);
 	err = dir_commit_chunk(page, 0, 2 * SYSV_DIRSIZE);
 fail:
-	page_cache_release(page);
+	put_page(page);
 	return err;
 }
 
@@ -296,7 +296,7 @@
 
 		kaddr = (char *)page_address(page);
 		de = (struct sysv_dir_entry *)kaddr;
-		kaddr += PAGE_CACHE_SIZE-SYSV_DIRSIZE;
+		kaddr += PAGE_SIZE-SYSV_DIRSIZE;
 
 		for ( ;(char *)de <= kaddr; de++) {
 			if (!de->inode)

diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 11e83ed..90b60c0 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c

@@ -264,11 +264,11 @@
 out_dir:
 	if (dir_de) {
 		kunmap(dir_page);
-		page_cache_release(dir_page);
+		put_page(dir_page);
 	}
 out_old:
 	kunmap(old_page);
-	page_cache_release(old_page);
+	put_page(old_page);
 out:
 	return err;
 }

diff --git a/fs/ubifs/Makefile b/fs/ubifs/Makefile
index 2c6f0cb..c54a243 100644
--- a/fs/ubifs/Makefile
+++ b/fs/ubifs/Makefile

@@ -4,3 +4,4 @@
 ubifs-y += tnc.o master.o scan.o replay.o log.o commit.o gc.o orphan.o
 ubifs-y += budget.o find.o tnc_commit.o compress.o lpt.o lprops.o
 ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o xattr.o debug.o
+ubifs-y += misc.o

diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 065c88f..446753d 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c

@@ -121,7 +121,7 @@
 	if (block >= beyond) {
 		/* Reading beyond inode */
 		SetPageChecked(page);
-		memset(addr, 0, PAGE_CACHE_SIZE);
+		memset(addr, 0, PAGE_SIZE);
 		goto out;
 	}
 
@@ -223,7 +223,7 @@
 {
 	struct inode *inode = mapping->host;
 	struct ubifs_info *c = inode->i_sb->s_fs_info;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 	struct ubifs_budget_req req = { .new_page = 1 };
 	int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
 	struct page *page;
@@ -254,13 +254,13 @@
 	}
 
 	if (!PageUptodate(page)) {
-		if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
+		if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE)
 			SetPageChecked(page);
 		else {
 			err = do_readpage(page);
 			if (err) {
 				unlock_page(page);
-				page_cache_release(page);
+				put_page(page);
 				ubifs_release_budget(c, &req);
 				return err;
 			}
@@ -428,7 +428,7 @@
 	struct inode *inode = mapping->host;
 	struct ubifs_info *c = inode->i_sb->s_fs_info;
 	struct ubifs_inode *ui = ubifs_inode(inode);
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 	int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
 	int skipped_read = 0;
 	struct page *page;
@@ -446,7 +446,7 @@
 
 	if (!PageUptodate(page)) {
 		/* The page is not loaded from the flash */
-		if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) {
+		if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) {
 			/*
 			 * We change whole page so no need to load it. But we
 			 * do not know whether this page exists on the media or
@@ -462,7 +462,7 @@
 			err = do_readpage(page);
 			if (err) {
 				unlock_page(page);
-				page_cache_release(page);
+				put_page(page);
 				return err;
 			}
 		}
@@ -494,7 +494,7 @@
 			mutex_unlock(&ui->ui_mutex);
 		}
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 
 		return write_begin_slow(mapping, pos, len, pagep, flags);
 	}
@@ -549,12 +549,12 @@
 	dbg_gen("ino %lu, pos %llu, pg %lu, len %u, copied %d, i_size %lld",
 		inode->i_ino, pos, page->index, len, copied, inode->i_size);
 
-	if (unlikely(copied < len && len == PAGE_CACHE_SIZE)) {
+	if (unlikely(copied < len && len == PAGE_SIZE)) {
 		/*
 		 * VFS copied less data to the page that it intended and
 		 * declared in its '->write_begin()' call via the @len
 		 * argument. If the page was not up-to-date, and @len was
-		 * @PAGE_CACHE_SIZE, the 'ubifs_write_begin()' function did
+		 * @PAGE_SIZE, the 'ubifs_write_begin()' function did
 		 * not load it from the media (for optimization reasons). This
 		 * means that part of the page contains garbage. So read the
 		 * page now.
@@ -593,7 +593,7 @@
 
 out:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return copied;
 }
 
@@ -621,10 +621,10 @@
 
 	addr = zaddr = kmap(page);
 
-	end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
+	end_index = (i_size - 1) >> PAGE_SHIFT;
 	if (!i_size || page->index > end_index) {
 		hole = 1;
-		memset(addr, 0, PAGE_CACHE_SIZE);
+		memset(addr, 0, PAGE_SIZE);
 		goto out_hole;
 	}
 
@@ -673,7 +673,7 @@
 	}
 
 	if (end_index == page->index) {
-		int len = i_size & (PAGE_CACHE_SIZE - 1);
+		int len = i_size & (PAGE_SIZE - 1);
 
 		if (len && len < read)
 			memset(zaddr + len, 0, read - len);
@@ -773,7 +773,7 @@
 	isize = i_size_read(inode);
 	if (isize == 0)
 		goto out_free;
-	end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
+	end_index = ((isize - 1) >> PAGE_SHIFT);
 
 	for (page_idx = 1; page_idx < page_cnt; page_idx++) {
 		pgoff_t page_offset = offset + page_idx;
@@ -788,7 +788,7 @@
 		if (!PageUptodate(page))
 			err = populate_page(c, page, bu, &n);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		if (err)
 			break;
 	}
@@ -905,7 +905,7 @@
 #ifdef UBIFS_DEBUG
 	struct ubifs_inode *ui = ubifs_inode(inode);
 	spin_lock(&ui->ui_lock);
-	ubifs_assert(page->index <= ui->synced_i_size >> PAGE_CACHE_SHIFT);
+	ubifs_assert(page->index <= ui->synced_i_size >> PAGE_SHIFT);
 	spin_unlock(&ui->ui_lock);
 #endif
 
@@ -1001,8 +1001,8 @@
 	struct inode *inode = page->mapping->host;
 	struct ubifs_inode *ui = ubifs_inode(inode);
 	loff_t i_size =  i_size_read(inode), synced_i_size;
-	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
-	int err, len = i_size & (PAGE_CACHE_SIZE - 1);
+	pgoff_t end_index = i_size >> PAGE_SHIFT;
+	int err, len = i_size & (PAGE_SIZE - 1);
 	void *kaddr;
 
 	dbg_gen("ino %lu, pg %lu, pg flags %#lx",
@@ -1021,7 +1021,7 @@
 
 	/* Is the page fully inside @i_size? */
 	if (page->index < end_index) {
-		if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) {
+		if (page->index >= synced_i_size >> PAGE_SHIFT) {
 			err = inode->i_sb->s_op->write_inode(inode, NULL);
 			if (err)
 				goto out_unlock;
@@ -1034,7 +1034,7 @@
 			 * with this.
 			 */
 		}
-		return do_writepage(page, PAGE_CACHE_SIZE);
+		return do_writepage(page, PAGE_SIZE);
 	}
 
 	/*
@@ -1045,7 +1045,7 @@
 	 * writes to that region are not written out to the file."
 	 */
 	kaddr = kmap_atomic(page);
-	memset(kaddr + len, 0, PAGE_CACHE_SIZE - len);
+	memset(kaddr + len, 0, PAGE_SIZE - len);
 	flush_dcache_page(page);
 	kunmap_atomic(kaddr);
 
@@ -1138,7 +1138,7 @@
 	truncate_setsize(inode, new_size);
 
 	if (offset) {
-		pgoff_t index = new_size >> PAGE_CACHE_SHIFT;
+		pgoff_t index = new_size >> PAGE_SHIFT;
 		struct page *page;
 
 		page = find_lock_page(inode->i_mapping, index);
@@ -1157,9 +1157,9 @@
 				clear_page_dirty_for_io(page);
 				if (UBIFS_BLOCKS_PER_PAGE_SHIFT)
 					offset = new_size &
-						 (PAGE_CACHE_SIZE - 1);
+						 (PAGE_SIZE - 1);
 				err = do_writepage(page, offset);
-				page_cache_release(page);
+				put_page(page);
 				if (err)
 					goto out_budg;
 				/*
@@ -1173,7 +1173,7 @@
 				 * having to read it.
 				 */
 				unlock_page(page);
-				page_cache_release(page);
+				put_page(page);
 			}
 		}
 	}
@@ -1285,7 +1285,7 @@
 	struct ubifs_info *c = inode->i_sb->s_fs_info;
 
 	ubifs_assert(PagePrivate(page));
-	if (offset || length < PAGE_CACHE_SIZE)
+	if (offset || length < PAGE_SIZE)
 		/* Partial page remains dirty */
 		return;
 

diff --git a/fs/ubifs/misc.c b/fs/ubifs/misc.c
new file mode 100644
index 0000000..486a284
--- /dev/null
+++ b/fs/ubifs/misc.c

@@ -0,0 +1,57 @@
+#include <linux/kernel.h>
+#include "ubifs.h"
+
+/* Normal UBIFS messages */
+void ubifs_msg(const struct ubifs_info *c, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	pr_notice("UBIFS (ubi%d:%d): %pV\n",
+		  c->vi.ubi_num, c->vi.vol_id, &vaf);
+
+	va_end(args);
+}								    \
+
+/* UBIFS error messages */
+void ubifs_err(const struct ubifs_info *c, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	pr_err("UBIFS error (ubi%d:%d pid %d): %ps: %pV\n",
+	       c->vi.ubi_num, c->vi.vol_id, current->pid,
+	       __builtin_return_address(0),
+	       &vaf);
+
+	va_end(args);
+}								    \
+
+/* UBIFS warning messages */
+void ubifs_warn(const struct ubifs_info *c, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	pr_warn("UBIFS warning (ubi%d:%d pid %d): %ps: %pV\n",
+		c->vi.ubi_num, c->vi.vol_id, current->pid,
+		__builtin_return_address(0),
+		&vaf);
+
+	va_end(args);
+}

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index a233ba9..e98c24e 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c

@@ -2237,12 +2237,12 @@
 	BUILD_BUG_ON(UBIFS_COMPR_TYPES_CNT > 4);
 
 	/*
-	 * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to
+	 * We require that PAGE_SIZE is greater-than-or-equal-to
 	 * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2.
 	 */
-	if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) {
+	if (PAGE_SIZE < UBIFS_BLOCK_SIZE) {
 		pr_err("UBIFS error (pid %d): VFS page cache size is %u bytes, but UBIFS requires at least 4096 bytes",
-		       current->pid, (unsigned int)PAGE_CACHE_SIZE);
+		       current->pid, (unsigned int)PAGE_SIZE);
 		return -EINVAL;
 	}
 

diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index a5697de..4cd7e56 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h

@@ -42,36 +42,12 @@
 /* Version of this UBIFS implementation */
 #define UBIFS_VERSION 1
 
-/* Normal UBIFS messages */
-#define ubifs_msg(c, fmt, ...)                                      \
-	pr_notice("UBIFS (ubi%d:%d): " fmt "\n",                    \
-		  (c)->vi.ubi_num, (c)->vi.vol_id, ##__VA_ARGS__)
-/* UBIFS error messages */
-#define ubifs_err(c, fmt, ...)                                      \
-	pr_err("UBIFS error (ubi%d:%d pid %d): %s: " fmt "\n",      \
-	       (c)->vi.ubi_num, (c)->vi.vol_id, current->pid,       \
-	       __func__, ##__VA_ARGS__)
-/* UBIFS warning messages */
-#define ubifs_warn(c, fmt, ...)                                     \
-	pr_warn("UBIFS warning (ubi%d:%d pid %d): %s: " fmt "\n",   \
-		(c)->vi.ubi_num, (c)->vi.vol_id, current->pid,      \
-		__func__, ##__VA_ARGS__)
-/*
- * A variant of 'ubifs_err()' which takes the UBIFS file-sytem description
- * object as an argument.
- */
-#define ubifs_errc(c, fmt, ...)                                     \
-	do {                                                        \
-		if (!(c)->probing)                                  \
-			ubifs_err(c, fmt, ##__VA_ARGS__);           \
-	} while (0)
-
 /* UBIFS file system VFS magic number */
 #define UBIFS_SUPER_MAGIC 0x24051905
 
 /* Number of UBIFS blocks per VFS page */
-#define UBIFS_BLOCKS_PER_PAGE (PAGE_CACHE_SIZE / UBIFS_BLOCK_SIZE)
-#define UBIFS_BLOCKS_PER_PAGE_SHIFT (PAGE_CACHE_SHIFT - UBIFS_BLOCK_SHIFT)
+#define UBIFS_BLOCKS_PER_PAGE (PAGE_SIZE / UBIFS_BLOCK_SIZE)
+#define UBIFS_BLOCKS_PER_PAGE_SHIFT (PAGE_SHIFT - UBIFS_BLOCK_SHIFT)
 
 /* "File system end of life" sequence number watermark */
 #define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL
@@ -1802,4 +1778,21 @@
 #include "misc.h"
 #include "key.h"
 
+/* Normal UBIFS messages */
+__printf(2, 3)
+void ubifs_msg(const struct ubifs_info *c, const char *fmt, ...);
+__printf(2, 3)
+void ubifs_err(const struct ubifs_info *c, const char *fmt, ...);
+__printf(2, 3)
+void ubifs_warn(const struct ubifs_info *c, const char *fmt, ...);
+/*
+ * A variant of 'ubifs_err()' which takes the UBIFS file-sytem description
+ * object as an argument.
+ */
+#define ubifs_errc(c, fmt, ...)						\
+do {									\
+	if (!(c)->probing)						\
+		ubifs_err(c, fmt, ##__VA_ARGS__);			\
+} while (0)
+
 #endif /* !__UBIFS_H__ */

diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index c7f4d434..b043e04 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c

@@ -59,7 +59,6 @@
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/xattr.h>
-#include <linux/posix_acl_xattr.h>
 
 /*
  * Limit the number of extended attributes per inode so that the total size

diff --git a/fs/udf/file.c b/fs/udf/file.c
index 1af9896..877ba1c 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c

@@ -46,7 +46,7 @@
 
 	kaddr = kmap(page);
 	memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size);
-	memset(kaddr + inode->i_size, 0, PAGE_CACHE_SIZE - inode->i_size);
+	memset(kaddr + inode->i_size, 0, PAGE_SIZE - inode->i_size);
 	flush_dcache_page(page);
 	SetPageUptodate(page);
 	kunmap(page);
@@ -87,14 +87,14 @@
 {
 	struct page *page;
 
-	if (WARN_ON_ONCE(pos >= PAGE_CACHE_SIZE))
+	if (WARN_ON_ONCE(pos >= PAGE_SIZE))
 		return -EIO;
 	page = grab_cache_page_write_begin(mapping, 0, flags);
 	if (!page)
 		return -ENOMEM;
 	*pagep = page;
 
-	if (!PageUptodate(page) && len != PAGE_CACHE_SIZE)
+	if (!PageUptodate(page) && len != PAGE_SIZE)
 		__udf_adinicb_readpage(page);
 	return 0;
 }

diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 166d3ed..2dc461e 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c

@@ -287,7 +287,7 @@
 	if (!PageUptodate(page)) {
 		kaddr = kmap(page);
 		memset(kaddr + iinfo->i_lenAlloc, 0x00,
-		       PAGE_CACHE_SIZE - iinfo->i_lenAlloc);
+		       PAGE_SIZE - iinfo->i_lenAlloc);
 		memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr,
 			iinfo->i_lenAlloc);
 		flush_dcache_page(page);
@@ -319,7 +319,7 @@
 		inode->i_data.a_ops = &udf_adinicb_aops;
 		up_write(&iinfo->i_data_sem);
 	}
-	page_cache_release(page);
+	put_page(page);
 	mark_inode_dirty(inode);
 
 	return err;

diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index dc5fae6..0447b94 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c

@@ -237,7 +237,7 @@
 			       sector_t newb, struct page *locked_page)
 {
 	const unsigned blks_per_page =
-		1 << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+		1 << (PAGE_SHIFT - inode->i_blkbits);
 	const unsigned mask = blks_per_page - 1;
 	struct address_space * const mapping = inode->i_mapping;
 	pgoff_t index, cur_index, last_index;
@@ -255,9 +255,9 @@
 
 	cur_index = locked_page->index;
 	end = count + beg;
-	last_index = end >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	last_index = end >> (PAGE_SHIFT - inode->i_blkbits);
 	for (i = beg; i < end; i = (i | mask) + 1) {
-		index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+		index = i >> (PAGE_SHIFT - inode->i_blkbits);
 
 		if (likely(cur_index != index)) {
 			page = ufs_get_locked_page(mapping, index);

diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 74f2e80..0b14572 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c

@@ -62,7 +62,7 @@
 static inline void ufs_put_page(struct page *page)
 {
 	kunmap(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 ino_t ufs_inode_by_name(struct inode *dir, const struct qstr *qstr)
@@ -111,13 +111,13 @@
 	struct super_block *sb = dir->i_sb;
 	char *kaddr = page_address(page);
 	unsigned offs, rec_len;
-	unsigned limit = PAGE_CACHE_SIZE;
+	unsigned limit = PAGE_SIZE;
 	const unsigned chunk_mask = UFS_SB(sb)->s_uspi->s_dirblksize - 1;
 	struct ufs_dir_entry *p;
 	char *error;
 
-	if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
-		limit = dir->i_size & ~PAGE_CACHE_MASK;
+	if ((dir->i_size >> PAGE_SHIFT) == page->index) {
+		limit = dir->i_size & ~PAGE_MASK;
 		if (limit & chunk_mask)
 			goto Ebadsize;
 		if (!limit)
@@ -170,7 +170,7 @@
 bad_entry:
 	ufs_error (sb, "ufs_check_page", "bad entry in directory #%lu: %s - "
 		   "offset=%lu, rec_len=%d, name_len=%d",
-		   dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
+		   dir->i_ino, error, (page->index<<PAGE_SHIFT)+offs,
 		   rec_len, ufs_get_de_namlen(sb, p));
 	goto fail;
 Eend:
@@ -178,7 +178,7 @@
 	ufs_error(sb, __func__,
 		   "entry in directory #%lu spans the page boundary"
 		   "offset=%lu",
-		   dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs);
+		   dir->i_ino, (page->index<<PAGE_SHIFT)+offs);
 fail:
 	SetPageChecked(page);
 	SetPageError(page);
@@ -211,9 +211,9 @@
 {
 	unsigned last_byte = inode->i_size;
 
-	last_byte -= page_nr << PAGE_CACHE_SHIFT;
-	if (last_byte > PAGE_CACHE_SIZE)
-		last_byte = PAGE_CACHE_SIZE;
+	last_byte -= page_nr << PAGE_SHIFT;
+	if (last_byte > PAGE_SIZE)
+		last_byte = PAGE_SIZE;
 	return last_byte;
 }
 
@@ -341,7 +341,7 @@
 		kaddr = page_address(page);
 		dir_end = kaddr + ufs_last_byte(dir, n);
 		de = (struct ufs_dir_entry *)kaddr;
-		kaddr += PAGE_CACHE_SIZE - reclen;
+		kaddr += PAGE_SIZE - reclen;
 		while ((char *)de <= kaddr) {
 			if ((char *)de == dir_end) {
 				/* We hit i_size */
@@ -432,8 +432,8 @@
 	loff_t pos = ctx->pos;
 	struct inode *inode = file_inode(file);
 	struct super_block *sb = inode->i_sb;
-	unsigned int offset = pos & ~PAGE_CACHE_MASK;
-	unsigned long n = pos >> PAGE_CACHE_SHIFT;
+	unsigned int offset = pos & ~PAGE_MASK;
+	unsigned long n = pos >> PAGE_SHIFT;
 	unsigned long npages = dir_pages(inode);
 	unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
 	int need_revalidate = file->f_version != inode->i_version;
@@ -454,14 +454,14 @@
 			ufs_error(sb, __func__,
 				  "bad page in #%lu",
 				  inode->i_ino);
-			ctx->pos += PAGE_CACHE_SIZE - offset;
+			ctx->pos += PAGE_SIZE - offset;
 			return -EIO;
 		}
 		kaddr = page_address(page);
 		if (unlikely(need_revalidate)) {
 			if (offset) {
 				offset = ufs_validate_entry(sb, kaddr, offset, chunk_mask);
-				ctx->pos = (n<<PAGE_CACHE_SHIFT) + offset;
+				ctx->pos = (n<<PAGE_SHIFT) + offset;
 			}
 			file->f_version = inode->i_version;
 			need_revalidate = 0;
@@ -574,7 +574,7 @@
 
 	kmap(page);
 	base = (char*)page_address(page);
-	memset(base, 0, PAGE_CACHE_SIZE);
+	memset(base, 0, PAGE_SIZE);
 
 	de = (struct ufs_dir_entry *) base;
 
@@ -594,7 +594,7 @@
 
 	err = ufs_commit_chunk(page, 0, chunk_size);
 fail:
-	page_cache_release(page);
+	put_page(page);
 	return err;
 }
 

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index d897e16..9f49431 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c

@@ -1051,13 +1051,13 @@
 	lastfrag--;
 
 	lastpage = ufs_get_locked_page(mapping, lastfrag >>
-				       (PAGE_CACHE_SHIFT - inode->i_blkbits));
+				       (PAGE_SHIFT - inode->i_blkbits));
        if (IS_ERR(lastpage)) {
                err = -EIO;
                goto out;
        }
 
-       end = lastfrag & ((1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - 1);
+       end = lastfrag & ((1 << (PAGE_SHIFT - inode->i_blkbits)) - 1);
        bh = page_buffers(lastpage);
        for (i = 0; i < end; ++i)
                bh = bh->b_this_page;

diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index acf4a3b..a1559f7 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c

@@ -305,7 +305,7 @@
 			ufs_set_link(old_inode, dir_de, dir_page, new_dir, 0);
 		else {
 			kunmap(dir_page);
-			page_cache_release(dir_page);
+			put_page(dir_page);
 		}
 		inode_dec_link_count(old_dir);
 	}
@@ -315,11 +315,11 @@
 out_dir:
 	if (dir_de) {
 		kunmap(dir_page);
-		page_cache_release(dir_page);
+		put_page(dir_page);
 	}
 out_old:
 	kunmap(old_page);
-	page_cache_release(old_page);
+	put_page(old_page);
 out:
 	return err;
 }

diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index b6c2f94..a409e3e 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c

@@ -261,14 +261,14 @@
 		if (unlikely(page->mapping == NULL)) {
 			/* Truncate got there first */
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			page = NULL;
 			goto out;
 		}
 
 		if (!PageUptodate(page) || PageError(page)) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 
 			printk(KERN_ERR "ufs_change_blocknr: "
 			       "can not read page: ino %lu, index: %lu\n",

diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 9541759..b7fbf53 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h

@@ -283,7 +283,7 @@
 static inline void ufs_put_locked_page(struct page *page)
 {
        unlock_page(page);
-       page_cache_release(page);
+       put_page(page);
 }
 
 

diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index f646391..3542d94 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile

@@ -121,4 +121,5 @@
 xfs-$(CONFIG_XFS_POSIX_ACL)	+= xfs_acl.o
 xfs-$(CONFIG_SYSCTL)		+= xfs_sysctl.o
 xfs-$(CONFIG_COMPAT)		+= xfs_ioctl32.o
-xfs-$(CONFIG_NFSD_PNFS)		+= xfs_pnfs.o
+xfs-$(CONFIG_NFSD_BLOCKLAYOUT)	+= xfs_pnfs.o
+xfs-$(CONFIG_NFSD_SCSILAYOUT)	+= xfs_pnfs.o

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 041b694..ce41d7f 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c

@@ -3742,11 +3742,11 @@
 		args.prod = align;
 		if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod)))
 			args.mod = (xfs_extlen_t)(args.prod - args.mod);
-	} else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) {
+	} else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
 		args.prod = 1;
 		args.mod = 0;
 	} else {
-		args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog;
+		args.prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
 		if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod))))
 			args.mod = (xfs_extlen_t)(args.prod - args.mod);
 	}

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index d445a64..e49b240 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c

@@ -704,7 +704,7 @@
 
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 out_invalidate:
-	xfs_vm_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+	xfs_vm_invalidatepage(page, 0, PAGE_SIZE);
 	return;
 }
 
@@ -925,9 +925,9 @@
 	 * ---------------------------------^------------------|
 	 */
 	offset = i_size_read(inode);
-	end_index = offset >> PAGE_CACHE_SHIFT;
+	end_index = offset >> PAGE_SHIFT;
 	if (page->index < end_index)
-		end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT;
+		end_offset = (xfs_off_t)(page->index + 1) << PAGE_SHIFT;
 	else {
 		/*
 		 * Check whether the page to write out is beyond or straddles
@@ -940,7 +940,7 @@
 		 * |				    |      Straddles     |
 		 * ---------------------------------^-----------|--------|
 		 */
-		unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1);
+		unsigned offset_into_page = offset & (PAGE_SIZE - 1);
 
 		/*
 		 * Skip the page if it is fully outside i_size, e.g. due to a
@@ -971,7 +971,7 @@
 		 * memory is zeroed when mapped, and writes to that region are
 		 * not written out to the file."
 		 */
-		zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE);
+		zero_user_segment(page, offset_into_page, PAGE_SIZE);
 
 		/* Adjust the end_offset to the end of file */
 		end_offset = offset;
@@ -1475,7 +1475,7 @@
 	loff_t			block_offset;
 	loff_t			block_start;
 	loff_t			block_end;
-	loff_t			from = pos & (PAGE_CACHE_SIZE - 1);
+	loff_t			from = pos & (PAGE_SIZE - 1);
 	loff_t			to = from + len;
 	struct buffer_head	*bh, *head;
 	struct xfs_mount	*mp = XFS_I(inode)->i_mount;
@@ -1491,7 +1491,7 @@
 	 * start of the page by using shifts rather than masks the mismatch
 	 * problem.
 	 */
-	block_offset = (pos >> PAGE_CACHE_SHIFT) << PAGE_CACHE_SHIFT;
+	block_offset = (pos >> PAGE_SHIFT) << PAGE_SHIFT;
 
 	ASSERT(block_offset + from == pos);
 
@@ -1558,12 +1558,12 @@
 	struct page		**pagep,
 	void			**fsdata)
 {
-	pgoff_t			index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t			index = pos >> PAGE_SHIFT;
 	struct page		*page;
 	int			status;
 	struct xfs_mount	*mp = XFS_I(mapping->host)->i_mount;
 
-	ASSERT(len <= PAGE_CACHE_SIZE);
+	ASSERT(len <= PAGE_SIZE);
 
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
@@ -1592,7 +1592,7 @@
 			truncate_pagecache_range(inode, start, pos + len);
 		}
 
-		page_cache_release(page);
+		put_page(page);
 		page = NULL;
 	}
 
@@ -1620,7 +1620,7 @@
 {
 	int			ret;
 
-	ASSERT(len <= PAGE_CACHE_SIZE);
+	ASSERT(len <= PAGE_SIZE);
 
 	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
 	if (unlikely(ret < len)) {

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index a32c1dc..3b63098 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c

@@ -1237,7 +1237,7 @@
 	/* wait for the completion of any pending DIOs */
 	inode_dio_wait(VFS_I(ip));
 
-	rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
+	rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE);
 	ioffset = round_down(offset, rounding);
 	iendoffset = round_up(offset + len, rounding) - 1;
 	error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset,
@@ -1466,7 +1466,7 @@
 	if (error)
 		return error;
 	error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
-					offset >> PAGE_CACHE_SHIFT, -1);
+					offset >> PAGE_SHIFT, -1);
 	if (error)
 		return error;
 

diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index 2816d42..a1b2dd8 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c

@@ -246,7 +246,7 @@
 	.fh_to_parent		= xfs_fs_fh_to_parent,
 	.get_parent		= xfs_fs_get_parent,
 	.commit_metadata	= xfs_fs_nfs_commit_metadata,
-#ifdef CONFIG_NFSD_PNFS
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
 	.get_uuid		= xfs_fs_get_uuid,
 	.map_blocks		= xfs_fs_map_blocks,
 	.commit_blocks		= xfs_fs_commit_blocks,

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ac0fd32..569938a 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c

@@ -106,8 +106,8 @@
 		unsigned offset, bytes;
 		void *fsdata;
 
-		offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
-		bytes = PAGE_CACHE_SIZE - offset;
+		offset = (pos & (PAGE_SIZE -1)); /* Within page */
+		bytes = PAGE_SIZE - offset;
 		if (bytes > count)
 			bytes = count;
 
@@ -799,8 +799,8 @@
 	/* see generic_file_direct_write() for why this is necessary */
 	if (mapping->nrpages) {
 		invalidate_inode_pages2_range(mapping,
-					      pos >> PAGE_CACHE_SHIFT,
-					      end >> PAGE_CACHE_SHIFT);
+					      pos >> PAGE_SHIFT,
+					      end >> PAGE_SHIFT);
 	}
 
 	if (ret > 0) {
@@ -1207,9 +1207,9 @@
 
 	pagevec_init(&pvec, 0);
 
-	index = startoff >> PAGE_CACHE_SHIFT;
+	index = startoff >> PAGE_SHIFT;
 	endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
-	end = endoff >> PAGE_CACHE_SHIFT;
+	end = endoff >> PAGE_SHIFT;
 	do {
 		int		want;
 		unsigned	nr_pages;

diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index ec0e239..a8192dc 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h

@@ -135,7 +135,7 @@
  * Size of block device i/o is parameterized here.
  * Currently the system supports page-sized i/o.
  */
-#define	BLKDEV_IOSHIFT		PAGE_CACHE_SHIFT
+#define	BLKDEV_IOSHIFT		PAGE_SHIFT
 #define	BLKDEV_IOSIZE		(1<<BLKDEV_IOSHIFT)
 /* number of BB's per block device block */
 #define	BLKDEV_BB		BTOBB(BLKDEV_IOSIZE)

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 536a0ee..cfd4210 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c

@@ -171,7 +171,7 @@
 	ASSERT(sbp->sb_blocklog >= BBSHIFT);
 
 	/* Limited by ULONG_MAX of page cache index */
-	if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
+	if (nblocks >> (PAGE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
 		return -EFBIG;
 	return 0;
 }

diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index bac6b34..eafe257 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h

@@ -231,12 +231,12 @@
 xfs_preferred_iosize(xfs_mount_t *mp)
 {
 	if (mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE)
-		return PAGE_CACHE_SIZE;
+		return PAGE_SIZE;
 	return (mp->m_swidth ?
 		(mp->m_swidth << mp->m_sb.sb_blocklog) :
 		((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ?
 			(1 << (int)MAX(mp->m_readio_log, mp->m_writeio_log)) :
-			PAGE_CACHE_SIZE));
+			PAGE_SIZE));
 }
 
 #define XFS_LAST_UNMOUNT_WAS_CLEAN(mp)	\

diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index ade236e..51ddaf2 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c

@@ -293,8 +293,8 @@
 		 * Make sure reads through the pagecache see the new data.
 		 */
 		error = invalidate_inode_pages2_range(inode->i_mapping,
-					start >> PAGE_CACHE_SHIFT,
-					(end - 1) >> PAGE_CACHE_SHIFT);
+					start >> PAGE_SHIFT,
+					(end - 1) >> PAGE_SHIFT);
 		WARN_ON_ONCE(error);
 
 		error = xfs_iomap_write_unwritten(ip, start, length);

diff --git a/fs/xfs/xfs_pnfs.h b/fs/xfs/xfs_pnfs.h
index 8147ac1..93f7485 100644
--- a/fs/xfs/xfs_pnfs.h
+++ b/fs/xfs/xfs_pnfs.h

@@ -1,7 +1,7 @@
 #ifndef _XFS_PNFS_H
 #define _XFS_PNFS_H 1
 
-#ifdef CONFIG_NFSD_PNFS
+#if defined(CONFIG_NFSD_BLOCKLAYOUT) || defined(CONFIG_NFSD_SCSILAYOUT)
 int xfs_fs_get_uuid(struct super_block *sb, u8 *buf, u32 *len, u64 *offset);
 int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length,
 		struct iomap *iomap, bool write, u32 *device_generation);

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index d760934..187e14b 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c

@@ -556,10 +556,10 @@
 	/* Figure out maximum filesize, on Linux this can depend on
 	 * the filesystem blocksize (on 32 bit platforms).
 	 * __block_write_begin does this in an [unsigned] long...
-	 *      page->index << (PAGE_CACHE_SHIFT - bbits)
+	 *      page->index << (PAGE_SHIFT - bbits)
 	 * So, for page sized blocks (4K on 32 bit platforms),
 	 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
-	 *      (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
+	 *      (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1)
 	 * but for smaller blocksizes it is less (bbits = log2 bsize).
 	 * Note1: get_block_t takes a long (implicit cast from above)
 	 * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
@@ -570,10 +570,10 @@
 #if BITS_PER_LONG == 32
 # if defined(CONFIG_LBDAF)
 	ASSERT(sizeof(sector_t) == 8);
-	pagefactor = PAGE_CACHE_SIZE;
+	pagefactor = PAGE_SIZE;
 	bitshift = BITS_PER_LONG;
 # else
-	pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
+	pagefactor = PAGE_SIZE >> (PAGE_SHIFT - blockshift);
 # endif
 #endif
 

diff --git a/include/asm-generic/bitops/lock.h b/include/asm-generic/bitops/lock.h
index c30266e..8ef0ccb 100644
--- a/include/asm-generic/bitops/lock.h
+++ b/include/asm-generic/bitops/lock.h

@@ -29,16 +29,16 @@
  * @nr: the bit to set
  * @addr: the address to start counting from
  *
- * This operation is like clear_bit_unlock, however it is not atomic.
- * It does provide release barrier semantics so it can be used to unlock
- * a bit lock, however it would only be used if no other CPU can modify
- * any bits in the memory until the lock is released (a good example is
- * if the bit lock itself protects access to the other bits in the word).
+ * A weaker form of clear_bit_unlock() as used by __bit_lock_unlock(). If all
+ * the bits in the word are protected by this lock some archs can use weaker
+ * ops to safely unlock.
+ *
+ * See for example x86's implementation.
  */
 #define __clear_bit_unlock(nr, addr)	\
 do {					\
-	smp_mb();			\
-	__clear_bit(nr, addr);		\
+	smp_mb__before_atomic();	\
+	clear_bit(nr, addr);		\
 } while (0)
 
 #endif /* _ASM_GENERIC_BITOPS_LOCK_H_ */

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index f90588a..6f96247 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h

@@ -151,7 +151,7 @@
 #endif
 
 #ifndef HAVE_ARCH_BUG_ON
-#define BUG_ON(condition) do { if (condition) ; } while (0)
+#define BUG_ON(condition) do { if (condition) BUG(); } while (0)
 #endif
 
 #ifndef HAVE_ARCH_WARN_ON

diff --git a/include/asm-generic/io-64-nonatomic-hi-lo.h b/include/asm-generic/io-64-nonatomic-hi-lo.h
deleted file mode 100644
index 32b73ab..0000000
--- a/include/asm-generic/io-64-nonatomic-hi-lo.h
+++ /dev/null

@@ -1,2 +0,0 @@
-/* XXX: delete asm-generic/io-64-nonatomic-hi-lo.h after converting new users */
-#include <linux/io-64-nonatomic-hi-lo.h>

diff --git a/include/asm-generic/io-64-nonatomic-lo-hi.h b/include/asm-generic/io-64-nonatomic-lo-hi.h
deleted file mode 100644
index 55a627c..0000000
--- a/include/asm-generic/io-64-nonatomic-lo-hi.h
+++ /dev/null

@@ -1,2 +0,0 @@
-/* XXX: delete asm-generic/io-64-nonatomic-lo-hi.h after converting new users */
-#include <linux/io-64-nonatomic-lo-hi.h>

diff --git a/include/asm-generic/page.h b/include/asm-generic/page.h
index 37d1fe2..67cfb7d 100644
--- a/include/asm-generic/page.h
+++ b/include/asm-generic/page.h

@@ -24,9 +24,6 @@
 
 #ifndef __ASSEMBLY__
 
-#define get_user_page(vaddr)		__get_free_page(GFP_KERNEL)
-#define free_user_page(page, addr)	free_page(addr)
-
 #define clear_page(page)	memset((page), 0, PAGE_SIZE)
 #define copy_page(to,from)	memcpy((to), (from), PAGE_SIZE)
 

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 8f5a12a..339125b 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h

@@ -456,7 +456,7 @@
 		*(.entry.text)						\
 		VMLINUX_SYMBOL(__entry_text_end) = .;
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
 #define IRQENTRY_TEXT							\
 		ALIGN_FUNCTION();					\
 		VMLINUX_SYMBOL(__irqentry_text_start) = .;		\
@@ -466,6 +466,16 @@
 #define IRQENTRY_TEXT
 #endif
 
+#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
+#define SOFTIRQENTRY_TEXT						\
+		ALIGN_FUNCTION();					\
+		VMLINUX_SYMBOL(__softirqentry_text_start) = .;		\
+		*(.softirqentry.text)					\
+		VMLINUX_SYMBOL(__softirqentry_text_end) = .;
+#else
+#define SOFTIRQENTRY_TEXT
+#endif
+
 /* Section used for early init (in .S files) */
 #define HEAD_TEXT  *(.head.text)
 

diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index afae231..055a08d 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h

@@ -92,7 +92,7 @@
  */
 struct ttm_bus_placement {
 	void		*addr;
-	unsigned long	base;
+	phys_addr_t	base;
 	unsigned long	size;
 	unsigned long	offset;
 	bool		is_iomem;

diff --git a/include/dt-bindings/clock/bcm-cygnus.h b/include/dt-bindings/clock/bcm-cygnus.h
index 32fbc47..62ac5d7 100644
--- a/include/dt-bindings/clock/bcm-cygnus.h
+++ b/include/dt-bindings/clock/bcm-cygnus.h

@@ -65,4 +65,10 @@
 #define BCM_CYGNUS_ASIU_ADC_CLK       1
 #define BCM_CYGNUS_ASIU_PWM_CLK       2
 
+/* AUDIO clock ID */
+#define BCM_CYGNUS_AUDIOPLL           0
+#define BCM_CYGNUS_AUDIOPLL_CH0       1
+#define BCM_CYGNUS_AUDIOPLL_CH1       2
+#define BCM_CYGNUS_AUDIOPLL_CH2       3
+
 #endif /* _CLOCK_BCM_CYGNUS_H */

diff --git a/include/dt-bindings/clock/exynos5433.h b/include/dt-bindings/clock/exynos5433.h
index 5bd80d5..8e024fe 100644
--- a/include/dt-bindings/clock/exynos5433.h
+++ b/include/dt-bindings/clock/exynos5433.h

@@ -765,7 +765,12 @@
 #define CLK_SCLK_RGB_VCLK				109
 #define CLK_SCLK_RGB_TV_VCLK				110
 
-#define DISP_NR_CLK					111
+#define CLK_PHYCLK_HDMIPHY_PIXEL_CLKO_PHY		111
+#define CLK_PHYCLK_HDMIPHY_TMDS_CLKO_PHY		112
+
+#define CLK_PCLK_DECON					113
+
+#define DISP_NR_CLK					114
 
 /* CMU_AUD */
 #define CLK_MOUT_AUD_PLL_USER				1
@@ -1298,7 +1303,7 @@
 #define CLK_MOUT_ACLK_LITE_C_B				13
 #define CLK_MOUT_ACLK_LITE_C_A				14
 
-#define CLK_DIV_SCLK_ISP_WPWM				15
+#define CLK_DIV_SCLK_ISP_MPWM				15
 #define CLK_DIV_PCLK_CAM1_83				16
 #define CLK_DIV_PCLK_CAM1_166				17
 #define CLK_DIV_PCLK_DBG_CAM1				18

diff --git a/include/dt-bindings/clock/imx6qdl-clock.h b/include/dt-bindings/clock/imx6qdl-clock.h
index 77985cc..2905033 100644
--- a/include/dt-bindings/clock/imx6qdl-clock.h
+++ b/include/dt-bindings/clock/imx6qdl-clock.h

@@ -255,6 +255,20 @@
 #define IMX6QDL_CLK_CAAM_ACLK			242
 #define IMX6QDL_CLK_CAAM_IPG			243
 #define IMX6QDL_CLK_SPDIF_GCLK			244
-#define IMX6QDL_CLK_END				245
+#define IMX6QDL_CLK_UART_SEL			245
+#define IMX6QDL_CLK_IPG_PER_SEL			246
+#define IMX6QDL_CLK_ECSPI_SEL			247
+#define IMX6QDL_CLK_CAN_SEL			248
+#define IMX6QDL_CLK_MMDC_CH1_AXI_CG		249
+#define IMX6QDL_CLK_PRE0			250
+#define IMX6QDL_CLK_PRE1			251
+#define IMX6QDL_CLK_PRE2			252
+#define IMX6QDL_CLK_PRE3			253
+#define IMX6QDL_CLK_PRG0_AXI			254
+#define IMX6QDL_CLK_PRG1_AXI			255
+#define IMX6QDL_CLK_PRG0_APB			256
+#define IMX6QDL_CLK_PRG1_APB			257
+#define IMX6QDL_CLK_PRE_AXI			258
+#define IMX6QDL_CLK_END				259
 
 #endif /* __DT_BINDINGS_CLOCK_IMX6QDL_H */

diff --git a/include/dt-bindings/clock/imx6ul-clock.h b/include/dt-bindings/clock/imx6ul-clock.h
index c343894..fd8aee8 100644
--- a/include/dt-bindings/clock/imx6ul-clock.h
+++ b/include/dt-bindings/clock/imx6ul-clock.h

@@ -21,13 +21,13 @@
 #define IMX6UL_PLL5_BYPASS_SRC		8
 #define IMX6UL_PLL6_BYPASS_SRC		9
 #define IMX6UL_PLL7_BYPASS_SRC		10
-#define IMX6UL_CLK_PLL1 		11
-#define IMX6UL_CLK_PLL2 		12
-#define IMX6UL_CLK_PLL3 		13
-#define IMX6UL_CLK_PLL4 		14
-#define IMX6UL_CLK_PLL5 		15
-#define IMX6UL_CLK_PLL6 		16
-#define IMX6UL_CLK_PLL7 		17
+#define IMX6UL_CLK_PLL1			11
+#define IMX6UL_CLK_PLL2			12
+#define IMX6UL_CLK_PLL3			13
+#define IMX6UL_CLK_PLL4			14
+#define IMX6UL_CLK_PLL5			15
+#define IMX6UL_CLK_PLL6			16
+#define IMX6UL_CLK_PLL7			17
 #define IMX6UL_PLL1_BYPASS		18
 #define IMX6UL_PLL2_BYPASS		19
 #define IMX6UL_PLL3_BYPASS		20
@@ -37,7 +37,7 @@
 #define IMX6UL_PLL7_BYPASS		24
 #define IMX6UL_CLK_PLL1_SYS		25
 #define IMX6UL_CLK_PLL2_BUS		26
-#define IMX6UL_CLK_PLL3_USB_OTG 	27
+#define IMX6UL_CLK_PLL3_USB_OTG		27
 #define IMX6UL_CLK_PLL4_AUDIO		28
 #define IMX6UL_CLK_PLL5_VIDEO		29
 #define IMX6UL_CLK_PLL6_ENET		30
@@ -66,7 +66,7 @@
 #define IMX6UL_CLK_PLL2_198M		53
 #define IMX6UL_CLK_PLL3_80M		54
 #define IMX6UL_CLK_PLL3_60M		55
-#define IMX6UL_CLK_STEP 		56
+#define IMX6UL_CLK_STEP			56
 #define IMX6UL_CLK_PLL1_SW		57
 #define IMX6UL_CLK_AXI_ALT_SEL		58
 #define IMX6UL_CLK_AXI_SEL		59
@@ -78,7 +78,7 @@
 #define IMX6UL_CLK_USDHC2_SEL		65
 #define IMX6UL_CLK_BCH_SEL		66
 #define IMX6UL_CLK_GPMI_SEL		67
-#define IMX6UL_CLK_EIM_SLOW_SEL 	68
+#define IMX6UL_CLK_EIM_SLOW_SEL		68
 #define IMX6UL_CLK_SPDIF_SEL		69
 #define IMX6UL_CLK_SAI1_SEL		70
 #define IMX6UL_CLK_SAI2_SEL		71
@@ -105,9 +105,9 @@
 #define IMX6UL_CLK_LDB_DI1_DIV_SEL	92
 #define IMX6UL_CLK_ARM			93
 #define IMX6UL_CLK_PERIPH_CLK2		94
-#define IMX6UL_CLK_PERIPH2_CLK2 	95
+#define IMX6UL_CLK_PERIPH2_CLK2		95
 #define IMX6UL_CLK_AHB			96
-#define IMX6UL_CLK_MMDC_PODF 		97
+#define IMX6UL_CLK_MMDC_PODF		97
 #define IMX6UL_CLK_AXI_PODF		98
 #define IMX6UL_CLK_PERCLK		99
 #define IMX6UL_CLK_IPG			100
@@ -133,16 +133,16 @@
 #define IMX6UL_CLK_CAN_PODF		120
 #define IMX6UL_CLK_ECSPI_PODF		121
 #define IMX6UL_CLK_UART_PODF		122
-#define IMX6UL_CLK_ADC1 		123
-#define IMX6UL_CLK_ADC2 		124
+#define IMX6UL_CLK_ADC1			123
+#define IMX6UL_CLK_ADC2			124
 #define IMX6UL_CLK_AIPSTZ1		125
 #define IMX6UL_CLK_AIPSTZ2		126
 #define IMX6UL_CLK_AIPSTZ3		127
 #define IMX6UL_CLK_APBHDMA		128
 #define IMX6UL_CLK_ASRC_IPG		129
 #define IMX6UL_CLK_ASRC_MEM		130
-#define IMX6UL_CLK_GPMI_BCH_APB  	131
-#define IMX6UL_CLK_GPMI_BCH 		132
+#define IMX6UL_CLK_GPMI_BCH_APB		131
+#define IMX6UL_CLK_GPMI_BCH		132
 #define IMX6UL_CLK_GPMI_IO		133
 #define IMX6UL_CLK_GPMI_APB		134
 #define IMX6UL_CLK_CAAM_MEM		135
@@ -154,7 +154,7 @@
 #define IMX6UL_CLK_ECSPI3		141
 #define IMX6UL_CLK_ECSPI4		142
 #define IMX6UL_CLK_EIM			143
-#define IMX6UL_CLK_ENET  		144
+#define IMX6UL_CLK_ENET			144
 #define IMX6UL_CLK_ENET_AHB		145
 #define IMX6UL_CLK_EPIT1		146
 #define IMX6UL_CLK_EPIT2		147
@@ -166,63 +166,63 @@
 #define IMX6UL_CLK_GPT1_SERIAL		153
 #define IMX6UL_CLK_GPT2_BUS		154
 #define IMX6UL_CLK_GPT2_SERIAL		155
-#define IMX6UL_CLK_I2C1 		156
-#define IMX6UL_CLK_I2C2 		157
-#define IMX6UL_CLK_I2C3 		158
-#define IMX6UL_CLK_I2C4 		159
-#define IMX6UL_CLK_IOMUXC 		160
-#define IMX6UL_CLK_LCDIF_APB 		161
-#define IMX6UL_CLK_LCDIF_PIX 		162
-#define IMX6UL_CLK_MMDC_P0_FAST 	163
-#define IMX6UL_CLK_MMDC_P0_IPG  	164
-#define IMX6UL_CLK_OCOTP 		165
-#define IMX6UL_CLK_OCRAM 		166
-#define IMX6UL_CLK_PWM1 		167
-#define IMX6UL_CLK_PWM2 		168
-#define IMX6UL_CLK_PWM3 		169
-#define IMX6UL_CLK_PWM4 		170
-#define IMX6UL_CLK_PWM5 		171
-#define IMX6UL_CLK_PWM6 		172
-#define IMX6UL_CLK_PWM7 		173
-#define IMX6UL_CLK_PWM8 		174
-#define IMX6UL_CLK_PXP  		175
-#define IMX6UL_CLK_QSPI 		176
-#define IMX6UL_CLK_ROM  		177
-#define IMX6UL_CLK_SAI1 		178
-#define IMX6UL_CLK_SAI1_IPG 		179
-#define IMX6UL_CLK_SAI2 		180
-#define IMX6UL_CLK_SAI2_IPG 		181
-#define IMX6UL_CLK_SAI3 		182
-#define IMX6UL_CLK_SAI3_IPG 		183
-#define IMX6UL_CLK_SDMA 		184
-#define IMX6UL_CLK_SIM  		185
-#define IMX6UL_CLK_SIM_S 		186
-#define IMX6UL_CLK_SPBA 		187
-#define IMX6UL_CLK_SPDIF 		188
-#define IMX6UL_CLK_UART1_IPG 		189
-#define IMX6UL_CLK_UART1_SERIAL 	190
-#define IMX6UL_CLK_UART2_IPG 		191
-#define IMX6UL_CLK_UART2_SERIAL 	192
-#define IMX6UL_CLK_UART3_IPG 		193
-#define IMX6UL_CLK_UART3_SERIAL 	194
-#define IMX6UL_CLK_UART4_IPG 		195
-#define IMX6UL_CLK_UART4_SERIAL 	196
-#define IMX6UL_CLK_UART5_IPG 		197
-#define IMX6UL_CLK_UART5_SERIAL 	198
-#define IMX6UL_CLK_UART6_IPG 		199
-#define IMX6UL_CLK_UART6_SERIAL 	200
-#define IMX6UL_CLK_UART7_IPG 		201
-#define IMX6UL_CLK_UART7_SERIAL 	202
-#define IMX6UL_CLK_UART8_IPG 		203
-#define IMX6UL_CLK_UART8_SERIAL 	204
-#define IMX6UL_CLK_USBOH3 		205
-#define IMX6UL_CLK_USDHC1 		206
-#define IMX6UL_CLK_USDHC2 		207
-#define IMX6UL_CLK_WDOG1 		208
-#define IMX6UL_CLK_WDOG2 		209
-#define IMX6UL_CLK_WDOG3 		210
+#define IMX6UL_CLK_I2C1			156
+#define IMX6UL_CLK_I2C2			157
+#define IMX6UL_CLK_I2C3			158
+#define IMX6UL_CLK_I2C4			159
+#define IMX6UL_CLK_IOMUXC		160
+#define IMX6UL_CLK_LCDIF_APB		161
+#define IMX6UL_CLK_LCDIF_PIX		162
+#define IMX6UL_CLK_MMDC_P0_FAST		163
+#define IMX6UL_CLK_MMDC_P0_IPG		164
+#define IMX6UL_CLK_OCOTP		165
+#define IMX6UL_CLK_OCRAM		166
+#define IMX6UL_CLK_PWM1			167
+#define IMX6UL_CLK_PWM2			168
+#define IMX6UL_CLK_PWM3			169
+#define IMX6UL_CLK_PWM4			170
+#define IMX6UL_CLK_PWM5			171
+#define IMX6UL_CLK_PWM6			172
+#define IMX6UL_CLK_PWM7			173
+#define IMX6UL_CLK_PWM8			174
+#define IMX6UL_CLK_PXP			175
+#define IMX6UL_CLK_QSPI			176
+#define IMX6UL_CLK_ROM			177
+#define IMX6UL_CLK_SAI1			178
+#define IMX6UL_CLK_SAI1_IPG		179
+#define IMX6UL_CLK_SAI2			180
+#define IMX6UL_CLK_SAI2_IPG		181
+#define IMX6UL_CLK_SAI3			182
+#define IMX6UL_CLK_SAI3_IPG		183
+#define IMX6UL_CLK_SDMA			184
+#define IMX6UL_CLK_SIM			185
+#define IMX6UL_CLK_SIM_S		186
+#define IMX6UL_CLK_SPBA			187
+#define IMX6UL_CLK_SPDIF		188
+#define IMX6UL_CLK_UART1_IPG		189
+#define IMX6UL_CLK_UART1_SERIAL		190
+#define IMX6UL_CLK_UART2_IPG		191
+#define IMX6UL_CLK_UART2_SERIAL		192
+#define IMX6UL_CLK_UART3_IPG		193
+#define IMX6UL_CLK_UART3_SERIAL		194
+#define IMX6UL_CLK_UART4_IPG		195
+#define IMX6UL_CLK_UART4_SERIAL		196
+#define IMX6UL_CLK_UART5_IPG		197
+#define IMX6UL_CLK_UART5_SERIAL		198
+#define IMX6UL_CLK_UART6_IPG		199
+#define IMX6UL_CLK_UART6_SERIAL		200
+#define IMX6UL_CLK_UART7_IPG		201
+#define IMX6UL_CLK_UART7_SERIAL		202
+#define IMX6UL_CLK_UART8_IPG		203
+#define IMX6UL_CLK_UART8_SERIAL		204
+#define IMX6UL_CLK_USBOH3		205
+#define IMX6UL_CLK_USDHC1		206
+#define IMX6UL_CLK_USDHC2		207
+#define IMX6UL_CLK_WDOG1		208
+#define IMX6UL_CLK_WDOG2		209
+#define IMX6UL_CLK_WDOG3		210
 #define IMX6UL_CLK_LDB_DI0		211
-#define IMX6UL_CLK_AXI  		212
+#define IMX6UL_CLK_AXI			212
 #define IMX6UL_CLK_SPDIF_GCLK		213
 #define IMX6UL_CLK_GPT_3M		214
 #define IMX6UL_CLK_SIM2			215
@@ -234,7 +234,8 @@
 #define IMX6UL_CLK_CSI_SEL		221
 #define IMX6UL_CLK_CSI_PODF		222
 #define IMX6UL_CLK_PLL3_120M		223
+#define IMX6UL_CLK_KPP			224
 
-#define IMX6UL_CLK_END			224
+#define IMX6UL_CLK_END			225
 
 #endif /* __DT_BINDINGS_CLOCK_IMX6UL_H */

diff --git a/include/dt-bindings/clock/lpc32xx-clock.h b/include/dt-bindings/clock/lpc32xx-clock.h
index bcb1c9a..d41b6fe 100644
--- a/include/dt-bindings/clock/lpc32xx-clock.h
+++ b/include/dt-bindings/clock/lpc32xx-clock.h

@@ -47,6 +47,7 @@
 #define LPC32XX_CLK_PWM1	32
 #define LPC32XX_CLK_PWM2	33
 #define LPC32XX_CLK_ADC		34
+#define LPC32XX_CLK_HCLK_PLL	35
 
 /* LPC32XX USB clocks */
 #define LPC32XX_USB_CLK_I2C	1

diff --git a/include/dt-bindings/clock/qcom,gcc-ipq4019.h b/include/dt-bindings/clock/qcom,gcc-ipq4019.h
new file mode 100644
index 0000000..6240e5b
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,gcc-ipq4019.h

@@ -0,0 +1,158 @@
+/* Copyright (c) 2015 The Linux Foundation. All rights reserved.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+#ifndef __QCOM_CLK_IPQ4019_H__
+#define __QCOM_CLK_IPQ4019_H__
+
+#define GCC_DUMMY_CLK					0
+#define AUDIO_CLK_SRC					1
+#define BLSP1_QUP1_I2C_APPS_CLK_SRC			2
+#define BLSP1_QUP1_SPI_APPS_CLK_SRC			3
+#define BLSP1_QUP2_I2C_APPS_CLK_SRC			4
+#define BLSP1_QUP2_SPI_APPS_CLK_SRC			5
+#define BLSP1_UART1_APPS_CLK_SRC			6
+#define BLSP1_UART2_APPS_CLK_SRC			7
+#define GCC_USB3_MOCK_UTMI_CLK_SRC			8
+#define GCC_APPS_CLK_SRC				9
+#define GCC_APPS_AHB_CLK_SRC				10
+#define GP1_CLK_SRC					11
+#define GP2_CLK_SRC					12
+#define GP3_CLK_SRC					13
+#define SDCC1_APPS_CLK_SRC				14
+#define FEPHY_125M_DLY_CLK_SRC				15
+#define WCSS2G_CLK_SRC					16
+#define WCSS5G_CLK_SRC					17
+#define GCC_APSS_AHB_CLK				18
+#define GCC_AUDIO_AHB_CLK				19
+#define GCC_AUDIO_PWM_CLK				20
+#define GCC_BLSP1_AHB_CLK				21
+#define GCC_BLSP1_QUP1_I2C_APPS_CLK			22
+#define GCC_BLSP1_QUP1_SPI_APPS_CLK			23
+#define GCC_BLSP1_QUP2_I2C_APPS_CLK			24
+#define GCC_BLSP1_QUP2_SPI_APPS_CLK			25
+#define GCC_BLSP1_UART1_APPS_CLK			26
+#define GCC_BLSP1_UART2_APPS_CLK			27
+#define GCC_DCD_XO_CLK					28
+#define GCC_GP1_CLK					29
+#define GCC_GP2_CLK					30
+#define GCC_GP3_CLK					31
+#define GCC_BOOT_ROM_AHB_CLK				32
+#define GCC_CRYPTO_AHB_CLK				33
+#define GCC_CRYPTO_AXI_CLK				34
+#define GCC_CRYPTO_CLK					35
+#define GCC_ESS_CLK					36
+#define GCC_IMEM_AXI_CLK				37
+#define GCC_IMEM_CFG_AHB_CLK				38
+#define GCC_PCIE_AHB_CLK				39
+#define GCC_PCIE_AXI_M_CLK				40
+#define GCC_PCIE_AXI_S_CLK				41
+#define GCC_PCNOC_AHB_CLK				42
+#define GCC_PRNG_AHB_CLK				43
+#define GCC_QPIC_AHB_CLK				44
+#define GCC_QPIC_CLK					45
+#define GCC_SDCC1_AHB_CLK				46
+#define GCC_SDCC1_APPS_CLK				47
+#define GCC_SNOC_PCNOC_AHB_CLK				48
+#define GCC_SYS_NOC_125M_CLK				49
+#define GCC_SYS_NOC_AXI_CLK				50
+#define GCC_TCSR_AHB_CLK				51
+#define GCC_TLMM_AHB_CLK				52
+#define GCC_USB2_MASTER_CLK				53
+#define GCC_USB2_SLEEP_CLK				54
+#define GCC_USB2_MOCK_UTMI_CLK				55
+#define GCC_USB3_MASTER_CLK				56
+#define GCC_USB3_SLEEP_CLK				57
+#define GCC_USB3_MOCK_UTMI_CLK				58
+#define GCC_WCSS2G_CLK					59
+#define GCC_WCSS2G_REF_CLK				60
+#define GCC_WCSS2G_RTC_CLK				61
+#define GCC_WCSS5G_CLK					62
+#define GCC_WCSS5G_REF_CLK				63
+#define GCC_WCSS5G_RTC_CLK				64
+
+#define WIFI0_CPU_INIT_RESET				0
+#define WIFI0_RADIO_SRIF_RESET				1
+#define WIFI0_RADIO_WARM_RESET				2
+#define WIFI0_RADIO_COLD_RESET				3
+#define WIFI0_CORE_WARM_RESET				4
+#define WIFI0_CORE_COLD_RESET				5
+#define WIFI1_CPU_INIT_RESET				6
+#define WIFI1_RADIO_SRIF_RESET				7
+#define WIFI1_RADIO_WARM_RESET				8
+#define WIFI1_RADIO_COLD_RESET				9
+#define WIFI1_CORE_WARM_RESET				10
+#define WIFI1_CORE_COLD_RESET				11
+#define USB3_UNIPHY_PHY_ARES				12
+#define USB3_HSPHY_POR_ARES				13
+#define USB3_HSPHY_S_ARES				14
+#define USB2_HSPHY_POR_ARES				15
+#define USB2_HSPHY_S_ARES				16
+#define PCIE_PHY_AHB_ARES				17
+#define PCIE_AHB_ARES					18
+#define PCIE_PWR_ARES					19
+#define PCIE_PIPE_STICKY_ARES				20
+#define PCIE_AXI_M_STICKY_ARES				21
+#define PCIE_PHY_ARES					22
+#define PCIE_PARF_XPU_ARES				23
+#define PCIE_AXI_S_XPU_ARES				24
+#define PCIE_AXI_M_VMIDMT_ARES				25
+#define PCIE_PIPE_ARES					26
+#define PCIE_AXI_S_ARES					27
+#define PCIE_AXI_M_ARES					28
+#define ESS_RESET					29
+#define GCC_BLSP1_BCR					30
+#define GCC_BLSP1_QUP1_BCR				31
+#define GCC_BLSP1_UART1_BCR				32
+#define GCC_BLSP1_QUP2_BCR				33
+#define GCC_BLSP1_UART2_BCR				34
+#define GCC_BIMC_BCR					35
+#define GCC_TLMM_BCR					36
+#define GCC_IMEM_BCR					37
+#define GCC_ESS_BCR					38
+#define GCC_PRNG_BCR					39
+#define GCC_BOOT_ROM_BCR				40
+#define GCC_CRYPTO_BCR					41
+#define GCC_SDCC1_BCR					42
+#define GCC_SEC_CTRL_BCR				43
+#define GCC_AUDIO_BCR					44
+#define GCC_QPIC_BCR					45
+#define GCC_PCIE_BCR					46
+#define GCC_USB2_BCR					47
+#define GCC_USB2_PHY_BCR				48
+#define GCC_USB3_BCR					49
+#define GCC_USB3_PHY_BCR				50
+#define GCC_SYSTEM_NOC_BCR				51
+#define GCC_PCNOC_BCR					52
+#define GCC_DCD_BCR					53
+#define GCC_SNOC_BUS_TIMEOUT0_BCR			54
+#define GCC_SNOC_BUS_TIMEOUT1_BCR			55
+#define GCC_SNOC_BUS_TIMEOUT2_BCR			56
+#define GCC_SNOC_BUS_TIMEOUT3_BCR			57
+#define GCC_PCNOC_BUS_TIMEOUT0_BCR			58
+#define GCC_PCNOC_BUS_TIMEOUT1_BCR			59
+#define GCC_PCNOC_BUS_TIMEOUT2_BCR			60
+#define GCC_PCNOC_BUS_TIMEOUT3_BCR			61
+#define GCC_PCNOC_BUS_TIMEOUT4_BCR			62
+#define GCC_PCNOC_BUS_TIMEOUT5_BCR			63
+#define GCC_PCNOC_BUS_TIMEOUT6_BCR			64
+#define GCC_PCNOC_BUS_TIMEOUT7_BCR			65
+#define GCC_PCNOC_BUS_TIMEOUT8_BCR			66
+#define GCC_PCNOC_BUS_TIMEOUT9_BCR			67
+#define GCC_TCSR_BCR					68
+#define GCC_QDSS_BCR					69
+#define GCC_MPM_BCR					70
+#define GCC_SPDM_BCR					71
+
+#endif

diff --git a/include/dt-bindings/clock/qcom,gcc-msm8916.h b/include/dt-bindings/clock/qcom,gcc-msm8916.h
index 257e2fb..28a27a4 100644
--- a/include/dt-bindings/clock/qcom,gcc-msm8916.h
+++ b/include/dt-bindings/clock/qcom,gcc-msm8916.h

@@ -174,6 +174,7 @@
 #define GCC_ULTAUDIO_LPAIF_SEC_I2S_CLK		157
 #define GCC_ULTAUDIO_LPAIF_AUX_I2S_CLK		158
 #define GCC_CODEC_DIGCODEC_CLK			159
+#define GCC_MSS_Q6_BIMC_AXI_CLK			160
 
 /* Indexes for GDSCs */
 #define BIMC_GDSC				0

diff --git a/include/dt-bindings/clock/qcom,gcc-msm8996.h b/include/dt-bindings/clock/qcom,gcc-msm8996.h
index 888e75c..6f814db 100644
--- a/include/dt-bindings/clock/qcom,gcc-msm8996.h
+++ b/include/dt-bindings/clock/qcom,gcc-msm8996.h

@@ -336,4 +336,15 @@
 #define GCC_MSS_Q6_BCR						99
 #define GCC_QREFS_VBG_CAL_BCR					100
 
+/* Indexes for GDSCs */
+#define AGGRE0_NOC_GDSC			0
+#define HLOS1_VOTE_AGGRE0_NOC_GDSC	1
+#define HLOS1_VOTE_LPASS_ADSP_GDSC	2
+#define HLOS1_VOTE_LPASS_CORE_GDSC	3
+#define USB30_GDSC			4
+#define PCIE0_GDSC			5
+#define PCIE1_GDSC			6
+#define PCIE2_GDSC			7
+#define UFS_GDSC			8
+
 #endif

diff --git a/include/dt-bindings/clock/qcom,mmcc-msm8996.h b/include/dt-bindings/clock/qcom,mmcc-msm8996.h
index 9b81ca6..7d3a7fa 100644
--- a/include/dt-bindings/clock/qcom,mmcc-msm8996.h
+++ b/include/dt-bindings/clock/qcom,mmcc-msm8996.h

@@ -282,4 +282,21 @@
 #define FD_BCR						58
 #define MMSS_SPDM_RM_BCR				59
 
+/* Indexes for GDSCs */
+#define MMAGIC_VIDEO_GDSC	0
+#define MMAGIC_MDSS_GDSC	1
+#define MMAGIC_CAMSS_GDSC	2
+#define GPU_GDSC		3
+#define VENUS_GDSC		4
+#define VENUS_CORE0_GDSC	5
+#define VENUS_CORE1_GDSC	6
+#define CAMSS_GDSC		7
+#define VFE0_GDSC		8
+#define VFE1_GDSC		9
+#define JPEG_GDSC		10
+#define CPP_GDSC		11
+#define FD_GDSC			12
+#define MDSS_GDSC		13
+#define GPU_GX_GDSC		14
+
 #endif

diff --git a/include/dt-bindings/clock/rk3228-cru.h b/include/dt-bindings/clock/rk3228-cru.h
index a78dd89..5d43ed9 100644
--- a/include/dt-bindings/clock/rk3228-cru.h
+++ b/include/dt-bindings/clock/rk3228-cru.h

@@ -29,6 +29,7 @@
 #define SCLK_SDMMC		68
 #define SCLK_SDIO		69
 #define SCLK_EMMC		71
+#define SCLK_TSADC		72
 #define SCLK_UART0		77
 #define SCLK_UART1		78
 #define SCLK_UART2		79
@@ -49,10 +50,17 @@
 #define SCLK_SDMMC_SAMPLE	118
 #define SCLK_SDIO_SAMPLE	119
 #define SCLK_EMMC_SAMPLE	121
+#define SCLK_VOP		122
+#define SCLK_HDMI_HDCP		123
+
+/* dclk gates */
+#define DCLK_VOP		190
+#define DCLK_HDMI_PHY		191
 
 /* aclk gates */
 #define ACLK_DMAC		194
 #define ACLK_PERI		210
+#define ACLK_VOP		211
 
 /* pclk gates */
 #define PCLK_GPIO0		320
@@ -68,11 +76,15 @@
 #define PCLK_UART0		341
 #define PCLK_UART1		342
 #define PCLK_UART2		343
+#define PCLK_TSADC		344
 #define PCLK_PWM		350
 #define PCLK_TIMER		353
 #define PCLK_PERI		363
+#define PCLK_HDMI_CTRL		364
+#define PCLK_HDMI_PHY		365
 
 /* hclk gates */
+#define HCLK_VOP		452
 #define HCLK_NANDC		453
 #define HCLK_SDMMC		456
 #define HCLK_SDIO		457

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index df4f369..506c353 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h

@@ -559,25 +559,25 @@
 #endif
 
 /**
- * fetch_or - perform *ptr |= mask and return old value of *ptr
- * @ptr: pointer to value
- * @mask: mask to OR on the value
- *
- * cmpxchg based fetch_or, macro so it works for different integer types
+ * atomic_fetch_or - perform *p |= mask and return old value of *p
+ * @p: pointer to atomic_t
+ * @mask: mask to OR on the atomic_t
  */
-#ifndef fetch_or
-#define fetch_or(ptr, mask)						\
-({	typeof(*(ptr)) __old, __val = *(ptr);				\
-	for (;;) {							\
-		__old = cmpxchg((ptr), __val, __val | (mask));		\
-		if (__old == __val)					\
-			break;						\
-		__val = __old;						\
-	}								\
-	__old;								\
-})
-#endif
+#ifndef atomic_fetch_or
+static inline int atomic_fetch_or(atomic_t *p, int mask)
+{
+	int old, val = atomic_read(p);
 
+	for (;;) {
+		old = atomic_cmpxchg(p, val, val | mask);
+		if (old == val)
+			break;
+		val = old;
+	}
+
+	return old;
+}
+#endif
 
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 1b4d69f..3f10307 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h

@@ -135,7 +135,7 @@
 
 struct backing_dev_info {
 	struct list_head bdi_list;
-	unsigned long ra_pages;	/* max readahead in PAGE_CACHE_SIZE units */
+	unsigned long ra_pages;	/* max readahead in PAGE_SIZE units */
 	unsigned int capabilities; /* Device capabilities */
 	congested_fn *congested_fn; /* Function pointer if device is md/dm */
 	void *congested_data;	/* Pointer to aux data for congested func */

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 88bc64f..6b7481f 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h

@@ -41,7 +41,7 @@
 #endif
 
 #define BIO_MAX_PAGES		256
-#define BIO_MAX_SIZE		(BIO_MAX_PAGES << PAGE_CACHE_SHIFT)
+#define BIO_MAX_SIZE		(BIO_MAX_PAGES << PAGE_SHIFT)
 #define BIO_MAX_SECTORS		(BIO_MAX_SIZE >> 9)
 
 /*

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 15a73d4..9ac9799 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h

@@ -263,22 +263,8 @@
 	for ((i) = 0; (i) < (q)->nr_hw_queues &&			\
 	     ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++)
 
-#define queue_for_each_ctx(q, ctx, i)					\
-	for ((i) = 0; (i) < (q)->nr_queues &&				\
-	     ({ ctx = per_cpu_ptr((q)->queue_ctx, (i)); 1; }); (i)++)
-
 #define hctx_for_each_ctx(hctx, ctx, i)					\
 	for ((i) = 0; (i) < (hctx)->nr_ctx &&				\
 	     ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++)
 
-#define blk_ctx_sum(q, sum)						\
-({									\
-	struct blk_mq_ctx *__x;						\
-	unsigned int __ret = 0, __i;					\
-									\
-	queue_for_each_ctx((q), __x, __i)				\
-		__ret += sum;						\
-	__ret;								\
-})
-
 #endif

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7e5d7e0..669e419 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h

@@ -1372,7 +1372,7 @@
 
 static inline void put_dev_sector(Sector p)
 {
-	page_cache_release(p.v);
+	put_page(p.v);
 }
 
 static inline bool __bvec_gap_to_prev(struct request_queue *q,

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index f0ba9c2..e3354b7 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h

@@ -24,6 +24,8 @@
 #define PHY_ID_BCM7250			0xae025280
 #define PHY_ID_BCM7364			0xae025260
 #define PHY_ID_BCM7366			0x600d8490
+#define PHY_ID_BCM7346			0x600d8650
+#define PHY_ID_BCM7362			0x600d84b0
 #define PHY_ID_BCM7425			0x600d86b0
 #define PHY_ID_BCM7429			0x600d8730
 #define PHY_ID_BCM7435			0x600d8750

diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index c67f052..d48daa3 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h

@@ -43,7 +43,7 @@
 			 */
 };
 
-#define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512)
+#define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
 
 struct page;
 struct buffer_head;
@@ -263,7 +263,7 @@
 static inline void attach_page_buffers(struct page *page,
 		struct buffer_head *head)
 {
-	page_cache_get(page);
+	get_page(page);
 	SetPagePrivate(page);
 	set_page_private(page, (unsigned long)head);
 }

diff --git a/include/linux/capability.h b/include/linux/capability.h
index f314275..00690ff 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h

@@ -40,8 +40,6 @@
 struct dentry;
 struct user_namespace;
 
-struct user_namespace *current_user_ns(void);
-
 extern const kernel_cap_t __cap_empty_set;
 extern const kernel_cap_t __cap_init_eff_set;
 

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 15151f3..ae2f668 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h

@@ -105,6 +105,7 @@
  */
 #define CEPH_FEATURES_SUPPORTED_DEFAULT		\
 	(CEPH_FEATURE_NOSRCADDR |		\
+	 CEPH_FEATURE_SUBSCRIBE2 |		\
 	 CEPH_FEATURE_RECONNECT_SEQ |		\
 	 CEPH_FEATURE_PGID64 |			\
 	 CEPH_FEATURE_PGPOOL3 |			\
@@ -127,6 +128,7 @@
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
 	(CEPH_FEATURE_NOSRCADDR |	 \
+	 CEPH_FEATURE_SUBSCRIBE2 |	 \
 	 CEPH_FEATURE_RECONNECT_SEQ |	 \
 	 CEPH_FEATURE_PGID64 |		 \
 	 CEPH_FEATURE_PGPOOL3 |		 \

diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index d7d072a..37f28bf 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h

@@ -198,8 +198,8 @@
 #define CEPH_SUBSCRIBE_ONETIME    1  /* i want only 1 update after have */
 
 struct ceph_mon_subscribe_item {
-	__le64 have_version;    __le64 have;
-	__u8 onetime;
+	__le64 start;
+	__u8 flags;
 } __attribute__ ((packed));
 
 struct ceph_mon_subscribe_ack {
@@ -376,7 +376,8 @@
 		__le32 stripe_count;         /* ... */
 		__le32 object_size;
 		__le32 file_replication;
-		__le32 unused;               /* used to be preferred osd */
+               __le32 mask;                 /* CEPH_CAP_* */
+               __le32 old_size;
 	} __attribute__ ((packed)) open;
 	struct {
 		__le32 flags;

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 3e3799c..db92a8d 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h

@@ -47,7 +47,6 @@
 	unsigned long mount_timeout;		/* jiffies */
 	unsigned long osd_idle_ttl;		/* jiffies */
 	unsigned long osd_keepalive_timeout;	/* jiffies */
-	unsigned long monc_ping_timeout;	/* jiffies */
 
 	/*
 	 * any type that can't be simply compared or doesn't need need
@@ -68,7 +67,12 @@
 #define CEPH_MOUNT_TIMEOUT_DEFAULT	msecs_to_jiffies(60 * 1000)
 #define CEPH_OSD_KEEPALIVE_DEFAULT	msecs_to_jiffies(5 * 1000)
 #define CEPH_OSD_IDLE_TTL_DEFAULT	msecs_to_jiffies(60 * 1000)
-#define CEPH_MONC_PING_TIMEOUT_DEFAULT	msecs_to_jiffies(30 * 1000)
+
+#define CEPH_MONC_HUNT_INTERVAL		msecs_to_jiffies(3 * 1000)
+#define CEPH_MONC_PING_INTERVAL		msecs_to_jiffies(10 * 1000)
+#define CEPH_MONC_PING_TIMEOUT		msecs_to_jiffies(30 * 1000)
+#define CEPH_MONC_HUNT_BACKOFF		2
+#define CEPH_MONC_HUNT_MAX_MULT		10
 
 #define CEPH_MSG_MAX_FRONT_LEN	(16*1024*1024)
 #define CEPH_MSG_MAX_MIDDLE_LEN	(16*1024*1024)
@@ -172,8 +176,8 @@
  */
 static inline int calc_pages_for(u64 off, u64 len)
 {
-	return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) -
-		(off >> PAGE_CACHE_SHIFT);
+	return ((off+len+PAGE_SIZE-1) >> PAGE_SHIFT) -
+		(off >> PAGE_SHIFT);
 }
 
 extern struct kmem_cache *ceph_inode_cachep;

diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index 81810dc..e230e7e 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h

@@ -68,18 +68,24 @@
 
 	bool hunting;
 	int cur_mon;                       /* last monitor i contacted */
-	unsigned long sub_sent, sub_renew_after;
+	unsigned long sub_renew_after;
+	unsigned long sub_renew_sent;
 	struct ceph_connection con;
 
+	bool had_a_connection;
+	int hunt_mult; /* [1..CEPH_MONC_HUNT_MAX_MULT] */
+
 	/* pending generic requests */
 	struct rb_root generic_request_tree;
 	int num_generic_requests;
 	u64 last_tid;
 
-	/* mds/osd map */
-	int want_mdsmap;
-	int want_next_osdmap; /* 1 = want, 2 = want+asked */
-	u32 have_osdmap, have_mdsmap;
+	/* subs, indexed with CEPH_SUB_* */
+	struct {
+		struct ceph_mon_subscribe_item item;
+		bool want;
+		u32 have; /* epoch */
+	} subs[3];
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *debugfs_file;
@@ -93,14 +99,23 @@
 extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl);
 extern void ceph_monc_stop(struct ceph_mon_client *monc);
 
+enum {
+	CEPH_SUB_MDSMAP = 0,
+	CEPH_SUB_MONMAP,
+	CEPH_SUB_OSDMAP,
+};
+
+extern const char *ceph_sub_str[];
+
 /*
  * The model here is to indicate that we need a new map of at least
- * epoch @want, and also call in when we receive a map.  We will
+ * epoch @epoch, and also call in when we receive a map.  We will
  * periodically rerequest the map from the monitor cluster until we
  * get what we want.
  */
-extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 have);
-extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have);
+bool ceph_monc_want_map(struct ceph_mon_client *monc, int sub, u32 epoch,
+			bool continuous);
+void ceph_monc_got_map(struct ceph_mon_client *monc, int sub, u32 epoch);
 
 extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc);
 extern int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 7506b48..4343df8 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h

@@ -43,7 +43,8 @@
 };
 
 
-#define CEPH_OSD_MAX_OP	3
+#define CEPH_OSD_SLAB_OPS	2
+#define CEPH_OSD_MAX_OPS	16
 
 enum ceph_osd_data_type {
 	CEPH_OSD_DATA_TYPE_NONE = 0,
@@ -77,7 +78,10 @@
 struct ceph_osd_req_op {
 	u16 op;           /* CEPH_OSD_OP_* */
 	u32 flags;        /* CEPH_OSD_OP_FLAG_* */
-	u32 payload_len;
+	u32 indata_len;   /* request */
+	u32 outdata_len;  /* reply */
+	s32 rval;
+
 	union {
 		struct ceph_osd_data raw_data_in;
 		struct {
@@ -136,7 +140,6 @@
 
 	/* request osd ops array  */
 	unsigned int		r_num_ops;
-	struct ceph_osd_req_op	r_ops[CEPH_OSD_MAX_OP];
 
 	/* these are updated on each send */
 	__le32           *r_request_osdmap_epoch;
@@ -148,8 +151,6 @@
 	struct ceph_eversion *r_request_reassert_version;
 
 	int               r_result;
-	int               r_reply_op_len[CEPH_OSD_MAX_OP];
-	s32               r_reply_op_result[CEPH_OSD_MAX_OP];
 	int               r_got_reply;
 	int		  r_linger;
 
@@ -174,6 +175,8 @@
 	unsigned long     r_stamp;            /* send OR check time */
 
 	struct ceph_snap_context *r_snapc;    /* snap context for writes */
+
+	struct ceph_osd_req_op r_ops[];
 };
 
 struct ceph_request_redirect {
@@ -263,6 +266,8 @@
 					u64 truncate_size, u32 truncate_seq);
 extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
 					unsigned int which, u64 length);
+extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
+				       unsigned int which, u64 offset_inc);
 
 extern struct ceph_osd_data *osd_req_op_extent_osd_data(
 					struct ceph_osd_request *osd_req,

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 33dc814..da95258 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h

@@ -25,7 +25,7 @@
 #define CLK_SET_PARENT_GATE	BIT(1) /* must be gated across re-parent */
 #define CLK_SET_RATE_PARENT	BIT(2) /* propagate rate change up one level */
 #define CLK_IGNORE_UNUSED	BIT(3) /* do not gate even if unused */
-#define CLK_IS_ROOT		BIT(4) /* root clk, has no parent */
+#define CLK_IS_ROOT		BIT(4) /* Deprecated: Don't use */
 #define CLK_IS_BASIC		BIT(5) /* Basic clk, can't do a to_clk_foo() */
 #define CLK_GET_RATE_NOCACHE	BIT(6) /* do not use the cached clk rate */
 #define CLK_SET_RATE_NO_REPARENT BIT(7) /* don't re-parent on rate change */
@@ -285,7 +285,7 @@
 struct clk *clk_register_fixed_rate_with_accuracy(struct device *dev,
 		const char *name, const char *parent_name, unsigned long flags,
 		unsigned long fixed_rate, unsigned long fixed_accuracy);
-
+void clk_unregister_fixed_rate(struct clk *clk);
 void of_fixed_clk_setup(struct device_node *np);
 
 /**
@@ -498,6 +498,7 @@
 struct clk *clk_register_fixed_factor(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
 		unsigned int mult, unsigned int div);
+void clk_unregister_fixed_factor(struct clk *clk);
 
 /**
  * struct clk_fractional_divider - adjustable fractional divider clock
@@ -625,8 +626,6 @@
 		const char *parent_name, unsigned gpio, bool active_low,
 		unsigned long flags);
 
-void of_gpio_clk_gate_setup(struct device_node *node);
-
 /**
  * struct clk_gpio_mux - gpio controlled clock multiplexer
  *
@@ -642,8 +641,6 @@
 		const char * const *parent_names, u8 num_parents, unsigned gpio,
 		bool active_low, unsigned long flags);
 
-void of_gpio_mux_clk_setup(struct device_node *node);
-
 /**
  * clk_register - allocate a new clock, register it and return an opaque cookie
  * @dev: device that is registering this clock
@@ -719,7 +716,7 @@
 struct clk *of_clk_src_simple_get(struct of_phandle_args *clkspec,
 				  void *data);
 struct clk *of_clk_src_onecell_get(struct of_phandle_args *clkspec, void *data);
-int of_clk_get_parent_count(struct device_node *np);
+unsigned int of_clk_get_parent_count(struct device_node *np);
 int of_clk_parent_fill(struct device_node *np, const char **parents,
 		       unsigned int size);
 const char *of_clk_get_parent_name(struct device_node *np, int index);

diff --git a/include/linux/clk/shmobile.h b/include/linux/clk/renesas.h
similarity index 93%
rename from include/linux/clk/shmobile.h
rename to include/linux/clk/renesas.h
index cb19cc1..7adfd80 100644
--- a/include/linux/clk/shmobile.h
+++ b/include/linux/clk/renesas.h

@@ -11,8 +11,8 @@
  * (at your option) any later version.
  */
 
-#ifndef __LINUX_CLK_SHMOBILE_H_
-#define __LINUX_CLK_SHMOBILE_H_
+#ifndef __LINUX_CLK_RENESAS_H_
+#define __LINUX_CLK_RENESAS_H_
 
 #include <linux/types.h>
 

diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 9a63860..dc5164a 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h

@@ -23,8 +23,8 @@
  * @mult_div1_reg: register containing the DPLL M and N bitfields
  * @mult_mask: mask of the DPLL M bitfield in @mult_div1_reg
  * @div1_mask: mask of the DPLL N bitfield in @mult_div1_reg
- * @clk_bypass: struct clk pointer to the clock's bypass clock input
- * @clk_ref: struct clk pointer to the clock's reference clock input
+ * @clk_bypass: struct clk_hw pointer to the clock's bypass clock input
+ * @clk_ref: struct clk_hw pointer to the clock's reference clock input
  * @control_reg: register containing the DPLL mode bitfield
  * @enable_mask: mask of the DPLL mode bitfield in @control_reg
  * @last_rounded_rate: cache of the last rate result of omap2_dpll_round_rate()
@@ -69,8 +69,8 @@
 	void __iomem		*mult_div1_reg;
 	u32			mult_mask;
 	u32			div1_mask;
-	struct clk		*clk_bypass;
-	struct clk		*clk_ref;
+	struct clk_hw		*clk_bypass;
+	struct clk_hw		*clk_ref;
 	void __iomem		*control_reg;
 	u32			enable_mask;
 	unsigned long		last_rounded_rate;

diff --git a/include/linux/compat.h b/include/linux/compat.h
index fe4ccd0..f964ef7 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h

@@ -5,6 +5,8 @@
  * syscall compatibility layer.
  */
 
+#include <linux/types.h>
+
 #ifdef CONFIG_COMPAT
 
 #include <linux/stat.h>
@@ -719,9 +721,22 @@
 
 asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32,
 					    int, const char __user *);
+
+/*
+ * For most but not all architectures, "am I in a compat syscall?" and
+ * "am I a compat task?" are the same question.  For architectures on which
+ * they aren't the same question, arch code can override in_compat_syscall.
+ */
+
+#ifndef in_compat_syscall
+static inline bool in_compat_syscall(void) { return is_compat_task(); }
+#endif
+
 #else
 
 #define is_compat_task() (0)
+static inline bool in_compat_syscall(void) { return false; }
 
 #endif /* CONFIG_COMPAT */
+
 #endif /* _LINUX_COMPAT_H */

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index d1e49d5..de17999 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h

@@ -10,3 +10,8 @@
 #undef uninitialized_var
 #define uninitialized_var(x) x = *(&(x))
 #endif
+
+/* same as gcc, this was present in clang-2.6 so we can assume it works
+ * with any version that can compile the kernel
+ */
+#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 22ab246..eeae401 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h

@@ -199,7 +199,7 @@
 #define unreachable() __builtin_unreachable()
 
 /* Mark a function definition as prohibited from being cloned. */
-#define __noclone	__attribute__((__noclone__))
+#define __noclone	__attribute__((__noclone__, __optimize__("no-tracer")))
 
 #endif /* GCC_VERSION >= 40500 */
 

diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 485fe55..d9d6a9d 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h

@@ -188,7 +188,7 @@
 }
 
 #define CONFIGFS_BIN_ATTR_RO(_pfx, _name, _priv, _maxsz)	\
-static struct configfs_attribute _pfx##attr_##_name = {		\
+static struct configfs_bin_attribute _pfx##attr_##_name = {	\
 	.cb_attr = {						\
 		.ca_name	= __stringify(_name),		\
 		.ca_mode	= S_IRUGO,			\
@@ -200,7 +200,7 @@
 }
 
 #define CONFIGFS_BIN_ATTR_WO(_pfx, _name, _priv, _maxsz)	\
-static struct configfs_attribute _pfx##attr_##_name = {		\
+static struct configfs_bin_attribute _pfx##attr_##_name = {	\
 	.cb_attr = {						\
 		.ca_name	= __stringify(_name),		\
 		.ca_mode	= S_IWUSR,			\

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index fc14275..40cee6b 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h

@@ -607,8 +607,6 @@
 
 /**
  * cpumask_size - size to allocate for a 'struct cpumask' in bytes
- *
- * This will eventually be a runtime variable, depending on nr_cpu_ids.
  */
 static inline size_t cpumask_size(void)
 {

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 8d70e13..257db64 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h

@@ -377,7 +377,10 @@
 #ifdef CONFIG_USER_NS
 #define current_user_ns()	(current_cred_xxx(user_ns))
 #else
-#define current_user_ns()	(&init_user_ns)
+static inline struct user_namespace *current_user_ns(void)
+{
+	return &init_user_ns;
+}
 #endif
 
 

diff --git a/include/linux/device.h b/include/linux/device.h
index 88192d0..002c597 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h

@@ -685,6 +685,18 @@
 int devm_add_action(struct device *dev, void (*action)(void *), void *data);
 void devm_remove_action(struct device *dev, void (*action)(void *), void *data);
 
+static inline int devm_add_action_or_reset(struct device *dev,
+					   void (*action)(void *), void *data)
+{
+	int ret;
+
+	ret = devm_add_action(dev, action, data);
+	if (ret)
+		action(data);
+
+	return ret;
+}
+
 struct device_dma_parameters {
 	/*
 	 * a low level driver may set these to teach IOMMU code about

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 532108e..3fe90d4 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h

@@ -94,7 +94,7 @@
 	void (*release)(struct dma_buf *);
 
 	int (*begin_cpu_access)(struct dma_buf *, enum dma_data_direction);
-	void (*end_cpu_access)(struct dma_buf *, enum dma_data_direction);
+	int (*end_cpu_access)(struct dma_buf *, enum dma_data_direction);
 	void *(*kmap_atomic)(struct dma_buf *, unsigned long);
 	void (*kunmap_atomic)(struct dma_buf *, unsigned long, void *);
 	void *(*kmap)(struct dma_buf *, unsigned long);
@@ -224,8 +224,8 @@
 				enum dma_data_direction);
 int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
 			     enum dma_data_direction dir);
-void dma_buf_end_cpu_access(struct dma_buf *dma_buf,
-			    enum dma_data_direction dir);
+int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
+			   enum dma_data_direction dir);
 void *dma_buf_kmap_atomic(struct dma_buf *, unsigned long);
 void dma_buf_kunmap_atomic(struct dma_buf *, unsigned long, void *);
 void *dma_buf_kmap(struct dma_buf *, unsigned long);

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 9eb215a..b90e9bd 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h

@@ -262,7 +262,7 @@
 /*
  * For NAT entries
  */
-#define NAT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_nat_entry))
+#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
 
 struct f2fs_nat_entry {
 	__u8 version;		/* latest version of cached nat entry */
@@ -282,7 +282,7 @@
  * Not allow to change this.
  */
 #define SIT_VBLOCK_MAP_SIZE 64
-#define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry))
+#define SIT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_sit_entry))
 
 /*
  * Note that f2fs_sit_entry->vblocks has the following bit-field information.

diff --git a/include/linux/fence.h b/include/linux/fence.h
index 605bd88..2b17698 100644
--- a/include/linux/fence.h
+++ b/include/linux/fence.h

@@ -294,7 +294,7 @@
 	if (WARN_ON(f1->context != f2->context))
 		return false;
 
-	return f1->seqno - f2->seqno < INT_MAX;
+	return (int)(f1->seqno - f2->seqno) > 0;
 }
 
 /**

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 43aa1f8..a51a536 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h

@@ -465,10 +465,14 @@
 void bpf_prog_destroy(struct bpf_prog *fp);
 
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
+int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk,
+		       bool locked);
 int sk_attach_bpf(u32 ufd, struct sock *sk);
 int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
 int sk_detach_filter(struct sock *sk);
+int __sk_detach_filter(struct sock *sk, bool locked);
+
 int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
 		  unsigned int len);
 

diff --git a/include/linux/fs.h b/include/linux/fs.h
index b2ed231..70e61b5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h

@@ -929,7 +929,7 @@
 /* Page cache limit. The filesystems should put that into their s_maxbytes 
    limits, otherwise bad things can happen in VM. */ 
 #if BITS_PER_LONG==32
-#define MAX_LFS_FILESIZE	(((loff_t)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) 
+#define MAX_LFS_FILESIZE	(((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1)
 #elif BITS_PER_LONG==64
 #define MAX_LFS_FILESIZE 	((loff_t)0x7fffffffffffffffLL)
 #endif
@@ -2077,7 +2077,7 @@
 /* /sys/fs */
 extern struct kobject *fs_kobj;
 
-#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
+#define MAX_RW_COUNT (INT_MAX & PAGE_MASK)
 
 #ifdef CONFIG_MANDATORY_FILE_LOCKING
 extern int locks_mandatory_locked(struct file *);
@@ -2273,7 +2273,7 @@
 extern struct file *file_open_name(struct filename *, int, umode_t);
 extern struct file *filp_open(const char *, int, umode_t);
 extern struct file *file_open_root(struct dentry *, struct vfsmount *,
-				   const char *, int);
+				   const char *, int, umode_t);
 extern struct file * dentry_open(const struct path *, int, const struct cred *);
 extern int filp_close(struct file *, fl_owner_t id);
 

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 6d9df3f..dea12a6 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h

@@ -811,16 +811,6 @@
  */
 #define __notrace_funcgraph		notrace
 
-/*
- * We want to which function is an entrypoint of a hardirq.
- * That will help us to put a signal on output.
- */
-#define __irq_entry		 __attribute__((__section__(".irqentry.text")))
-
-/* Limits of hardirq entrypoints */
-extern char __irqentry_text_start[];
-extern char __irqentry_text_end[];
-
 #define FTRACE_NOTRACE_DEPTH 65536
 #define FTRACE_RETFUNC_DEPTH 50
 #define FTRACE_RETSTACK_ALLOC_SIZE 32
@@ -857,7 +847,6 @@
 #else /* !CONFIG_FUNCTION_GRAPH_TRACER */
 
 #define __notrace_funcgraph
-#define __irq_entry
 #define INIT_FTRACE_GRAPH
 
 static inline void ftrace_graph_init_task(struct task_struct *t) { }

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 79b0ef6..7008623 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h

@@ -127,7 +127,7 @@
 	if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd))
 		return __pmd_trans_huge_lock(pmd, vma);
 	else
-		return false;
+		return NULL;
 }
 static inline int hpage_nr_pages(struct page *page)
 {

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 358076e..9fcabeb 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h

@@ -683,4 +683,24 @@
 extern int arch_probe_nr_irqs(void);
 extern int arch_early_irq_init(void);
 
+#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
+/*
+ * We want to know which function is an entrypoint of a hardirq or a softirq.
+ */
+#define __irq_entry		 __attribute__((__section__(".irqentry.text")))
+#define __softirq_entry  \
+	__attribute__((__section__(".softirqentry.text")))
+
+/* Limits of hardirq entrypoints */
+extern char __irqentry_text_start[];
+extern char __irqentry_text_end[];
+/* Limits of softirq entrypoints */
+extern char __softirqentry_text_start[];
+extern char __softirqentry_text_end[];
+
+#else
+#define __irq_entry
+#define __softirq_entry
+#endif
+
 #endif

diff --git a/include/linux/io.h b/include/linux/io.h
index 32403b5..e2c8419 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h

@@ -135,6 +135,7 @@
 	/* See memremap() kernel-doc for usage description... */
 	MEMREMAP_WB = 1 << 0,
 	MEMREMAP_WT = 1 << 1,
+	MEMREMAP_WC = 1 << 2,
 };
 
 void *memremap(resource_size_t offset, size_t size, unsigned long flags);

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 0fdc798..737371b 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h

@@ -48,19 +48,28 @@
 void kasan_alloc_pages(struct page *page, unsigned int order);
 void kasan_free_pages(struct page *page, unsigned int order);
 
+void kasan_cache_create(struct kmem_cache *cache, size_t *size,
+			unsigned long *flags);
+
 void kasan_poison_slab(struct page *page);
 void kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
 void kasan_poison_object_data(struct kmem_cache *cache, void *object);
 
-void kasan_kmalloc_large(const void *ptr, size_t size);
+void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags);
 void kasan_kfree_large(const void *ptr);
 void kasan_kfree(void *ptr);
-void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size);
-void kasan_krealloc(const void *object, size_t new_size);
+void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size,
+		  gfp_t flags);
+void kasan_krealloc(const void *object, size_t new_size, gfp_t flags);
 
-void kasan_slab_alloc(struct kmem_cache *s, void *object);
+void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags);
 void kasan_slab_free(struct kmem_cache *s, void *object);
 
+struct kasan_cache {
+	int alloc_meta_offset;
+	int free_meta_offset;
+};
+
 int kasan_module_alloc(void *addr, size_t size);
 void kasan_free_shadow(const struct vm_struct *vm);
 
@@ -76,20 +85,26 @@
 static inline void kasan_alloc_pages(struct page *page, unsigned int order) {}
 static inline void kasan_free_pages(struct page *page, unsigned int order) {}
 
+static inline void kasan_cache_create(struct kmem_cache *cache,
+				      size_t *size,
+				      unsigned long *flags) {}
+
 static inline void kasan_poison_slab(struct page *page) {}
 static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
 					void *object) {}
 static inline void kasan_poison_object_data(struct kmem_cache *cache,
 					void *object) {}
 
-static inline void kasan_kmalloc_large(void *ptr, size_t size) {}
+static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {}
 static inline void kasan_kfree_large(const void *ptr) {}
 static inline void kasan_kfree(void *ptr) {}
 static inline void kasan_kmalloc(struct kmem_cache *s, const void *object,
-				size_t size) {}
-static inline void kasan_krealloc(const void *object, size_t new_size) {}
+				size_t size, gfp_t flags) {}
+static inline void kasan_krealloc(const void *object, size_t new_size,
+				 gfp_t flags) {}
 
-static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {}
+static inline void kasan_slab_alloc(struct kmem_cache *s, void *object,
+				   gfp_t flags) {}
 static inline void kasan_slab_free(struct kmem_cache *s, void *object) {}
 
 static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }

diff --git a/include/linux/kcov.h b/include/linux/kcov.h
new file mode 100644
index 0000000..2883ac9
--- /dev/null
+++ b/include/linux/kcov.h

@@ -0,0 +1,29 @@
+#ifndef _LINUX_KCOV_H
+#define _LINUX_KCOV_H
+
+#include <uapi/linux/kcov.h>
+
+struct task_struct;
+
+#ifdef CONFIG_KCOV
+
+void kcov_task_init(struct task_struct *t);
+void kcov_task_exit(struct task_struct *t);
+
+enum kcov_mode {
+	/* Coverage collection is not enabled yet. */
+	KCOV_MODE_DISABLED = 0,
+	/*
+	 * Tracing coverage collection mode.
+	 * Covered PCs are collected in a per-task buffer.
+	 */
+	KCOV_MODE_TRACE = 1,
+};
+
+#else
+
+static inline void kcov_task_init(struct task_struct *t) {}
+static inline void kcov_task_exit(struct task_struct *t) {}
+
+#endif /* CONFIG_KCOV */
+#endif /* _LINUX_KCOV_H */

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index b82646e..2f7775e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h

@@ -255,7 +255,7 @@
 __printf(1, 2)
 void panic(const char *fmt, ...)
 	__noreturn __cold;
-void nmi_panic_self_stop(struct pt_regs *);
+void nmi_panic(struct pt_regs *regs, const char *msg);
 extern void oops_enter(void);
 extern void oops_exit(void);
 void print_oops_end_marker(void);
@@ -457,25 +457,6 @@
 #define PANIC_CPU_INVALID	-1
 
 /*
- * A variant of panic() called from NMI context. We return if we've already
- * panicked on this CPU. If another CPU already panicked, loop in
- * nmi_panic_self_stop() which can provide architecture dependent code such
- * as saving register state for crash dump.
- */
-#define nmi_panic(regs, fmt, ...)					\
-do {									\
-	int old_cpu, cpu;						\
-									\
-	cpu = raw_smp_processor_id();					\
-	old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu);	\
-									\
-	if (old_cpu == PANIC_CPU_INVALID)				\
-		panic(fmt, ##__VA_ARGS__);				\
-	else if (old_cpu != cpu)					\
-		nmi_panic_self_stop(regs);				\
-} while (0)
-
-/*
  * Only to be used by arch init code. If the user over-wrote the default
  * CONFIG_PANIC_TIMEOUT, honor it.
  */
@@ -637,7 +618,7 @@
 
 #define do_trace_printk(fmt, args...)					\
 do {									\
-	static const char *trace_printk_fmt				\
+	static const char *trace_printk_fmt __used			\
 		__attribute__((section("__trace_printk_fmt"))) =	\
 		__builtin_constant_p(fmt) ? fmt : NULL;			\
 									\
@@ -681,7 +662,7 @@
  */
 
 #define trace_puts(str) ({						\
-	static const char *trace_printk_fmt				\
+	static const char *trace_printk_fmt __used			\
 		__attribute__((section("__trace_printk_fmt"))) =	\
 		__builtin_constant_p(str) ? str : NULL;			\
 									\
@@ -703,7 +684,7 @@
 #define ftrace_vprintk(fmt, vargs)					\
 do {									\
 	if (__builtin_constant_p(fmt)) {				\
-		static const char *trace_printk_fmt			\
+		static const char *trace_printk_fmt __used		\
 		  __attribute__((section("__trace_printk_fmt"))) =	\
 			__builtin_constant_p(fmt) ? fmt : NULL;		\
 									\

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 473b436..41eb6fd 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h

@@ -401,7 +401,7 @@
 			((typeof(__tmp->type))__kfifo->data) : \
 			(__tmp->buf) \
 			)[__kfifo->in & __tmp->kfifo.mask] = \
-				(typeof(*__tmp->type))__val; \
+				*(typeof(__tmp->type))&__val; \
 			smp_wmb(); \
 			__kfifo->in++; \
 		} \

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index c3c4318..cdcb2cc 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h

@@ -242,6 +242,7 @@
 	uint16_t nr_pages;
 	uint16_t flags;
 
+	u64 ppa_status; /* ppa media status */
 	int error;
 };
 
@@ -346,6 +347,7 @@
 	int nr_luns;
 	unsigned max_pages_per_blk;
 
+	unsigned long *lun_map;
 	void *ppalist_pool;
 
 	struct nvm_id identity;
@@ -355,6 +357,7 @@
 	char name[DISK_NAME_LEN];
 
 	struct mutex mlock;
+	spinlock_t lock;
 };
 
 static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
@@ -465,8 +468,13 @@
 typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
 								unsigned long);
 typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
+typedef int (nvmm_reserve_lun)(struct nvm_dev *, int);
+typedef void (nvmm_release_lun)(struct nvm_dev *, int);
 typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
 
+typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
+typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t);
+
 struct nvmm_type {
 	const char *name;
 	unsigned int version[3];
@@ -488,9 +496,15 @@
 
 	/* Configuration management */
 	nvmm_get_lun_fn *get_lun;
+	nvmm_reserve_lun *reserve_lun;
+	nvmm_release_lun *release_lun;
 
 	/* Statistics */
 	nvmm_lun_info_print_fn *lun_info_print;
+
+	nvmm_get_area_fn *get_area;
+	nvmm_put_area_fn *put_area;
+
 	struct list_head list;
 };
 

diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index 246a352..ac02c54 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h

@@ -596,7 +596,7 @@
 }
 
 extern void	set_channel_address(struct mISDNchannel *, u_int, u_int);
-extern void	mISDN_clock_update(struct mISDNclock *, int, struct timeval *);
+extern void	mISDN_clock_update(struct mISDNclock *, int, ktime_t *);
 extern unsigned short mISDN_clock_get(void);
 extern const char *mISDNDevName4ch(struct mISDNchannel *);
 

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 02ac300..8156e3c 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h

@@ -1236,6 +1236,8 @@
 	MLX5_CAP_FLOW_TABLE,
 	MLX5_CAP_ESWITCH_FLOW_TABLE,
 	MLX5_CAP_ESWITCH,
+	MLX5_CAP_RESERVED,
+	MLX5_CAP_VECTOR_CALC,
 	/* NUM OF CAP Types */
 	MLX5_CAP_NUM
 };
@@ -1298,6 +1300,10 @@
 #define MLX5_CAP_ODP(mdev, cap)\
 	MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap)
 
+#define MLX5_CAP_VECTOR_CALC(mdev, cap) \
+	MLX5_GET(vector_calc_cap, \
+		 mdev->hca_caps_cur[MLX5_CAP_VECTOR_CALC], cap)
+
 enum {
 	MLX5_CMD_STAT_OK			= 0x0,
 	MLX5_CMD_STAT_INT_ERR			= 0x1,

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 3a95446..dcd5ac8 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h

@@ -613,7 +613,10 @@
 };
 
 enum port_state_policy {
-	MLX5_AAA_000
+	MLX5_POLICY_DOWN	= 0,
+	MLX5_POLICY_UP		= 1,
+	MLX5_POLICY_FOLLOW	= 2,
+	MLX5_POLICY_INVALID	= 0xffffffff
 };
 
 enum phy_port_state {
@@ -706,8 +709,7 @@
 void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
 int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
 int mlx5_cmd_status_to_err_v2(void *ptr);
-int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type,
-		       enum mlx5_cap_mode cap_mode);
+int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
 int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 		  int out_size);
 int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index e52730e..c15b8a8 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h

@@ -618,6 +618,33 @@
 	u8         reserved_at_e0[0x720];
 };
 
+struct mlx5_ifc_calc_op {
+	u8        reserved_at_0[0x10];
+	u8        reserved_at_10[0x9];
+	u8        op_swap_endianness[0x1];
+	u8        op_min[0x1];
+	u8        op_xor[0x1];
+	u8        op_or[0x1];
+	u8        op_and[0x1];
+	u8        op_max[0x1];
+	u8        op_add[0x1];
+};
+
+struct mlx5_ifc_vector_calc_cap_bits {
+	u8         calc_matrix[0x1];
+	u8         reserved_at_1[0x1f];
+	u8         reserved_at_20[0x8];
+	u8         max_vec_count[0x8];
+	u8         reserved_at_30[0xd];
+	u8         max_chunk_size[0x3];
+	struct mlx5_ifc_calc_op calc0;
+	struct mlx5_ifc_calc_op calc1;
+	struct mlx5_ifc_calc_op calc2;
+	struct mlx5_ifc_calc_op calc3;
+
+	u8         reserved_at_e0[0x720];
+};
+
 enum {
 	MLX5_WQ_TYPE_LINKED_LIST  = 0x0,
 	MLX5_WQ_TYPE_CYCLIC       = 0x1,
@@ -784,7 +811,8 @@
 	u8         cd[0x1];
 	u8         reserved_at_22c[0x1];
 	u8         apm[0x1];
-	u8         reserved_at_22e[0x2];
+	u8         vector_calc[0x1];
+	u8         reserved_at_22f[0x1];
 	u8	   imaicl[0x1];
 	u8         reserved_at_231[0x4];
 	u8         qkv[0x1];
@@ -1954,6 +1982,7 @@
 	struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap;
 	struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap;
 	struct mlx5_ifc_e_switch_cap_bits e_switch_cap;
+	struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap;
 	u8         reserved_at_0[0x8000];
 };
 
@@ -3681,6 +3710,12 @@
 	u8         pkey_index[0x10];
 };
 
+enum {
+	MLX5_HCA_VPORT_SEL_PORT_GUID	= 1 << 0,
+	MLX5_HCA_VPORT_SEL_NODE_GUID	= 1 << 1,
+	MLX5_HCA_VPORT_SEL_STATE_POLICY	= 1 << 2,
+};
+
 struct mlx5_ifc_query_hca_vport_gid_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];

diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index a9f2bcc..bd93e63 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h

@@ -93,6 +93,11 @@
 int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev);
 int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev);
 int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
-				  u8 port_num, void *out, size_t out_sz);
+				  int vf, u8 port_num, void *out,
+				  size_t out_sz);
+int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
+				       u8 other_vport, u8 port_num,
+				       int vf,
+				       struct mlx5_hca_vport_context *req);
 
 #endif /* __MLX5_VPORT_H__ */

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 450fc97..ffcff53 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h

@@ -623,7 +623,7 @@
  *
  * A page may belong to an inode's memory mapping. In this case, page->mapping
  * is the pointer to the inode, and page->index is the file offset of the page,
- * in units of PAGE_CACHE_SIZE.
+ * in units of PAGE_SIZE.
  *
  * If pagecache pages are not associated with an inode, they are said to be
  * anonymous pages. These may become associated with the swapcache, and in that
@@ -1132,6 +1132,8 @@
 	struct address_space *check_mapping;	/* Check page->mapping if set */
 	pgoff_t	first_index;			/* Lowest page->index to unmap */
 	pgoff_t last_index;			/* Highest page->index to unmap */
+	bool ignore_dirty;			/* Ignore dirty pages */
+	bool check_swap_entries;		/* Check also swap entries */
 };
 
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 944b2b3..c2d75b4 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h

@@ -341,7 +341,7 @@
 
 	/* Information about our backing store: */
 	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
-					   units, *not* PAGE_CACHE_SIZE */
+					   units */
 	struct file * vm_file;		/* File we map to (can be NULL). */
 	void * vm_private_data;		/* was vm_pte (shared mem) */
 

diff --git a/include/linux/mtd/bbm.h b/include/linux/mtd/bbm.h
index 36bb6a5..3bf8f95 100644
--- a/include/linux/mtd/bbm.h
+++ b/include/linux/mtd/bbm.h

@@ -166,7 +166,6 @@
 };
 
 /* OneNAND BBT interface */
-extern int onenand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd);
 extern int onenand_default_bbt(struct mtd_info *mtd);
 
 #endif	/* __LINUX_MTD_BBM_H */

diff --git a/include/linux/mtd/inftl.h b/include/linux/mtd/inftl.h
index 02cd5f9..8255118 100644
--- a/include/linux/mtd/inftl.h
+++ b/include/linux/mtd/inftl.h

@@ -44,7 +44,6 @@
 	unsigned int nb_blocks;		/* number of physical blocks */
 	unsigned int nb_boot_blocks;	/* number of blocks used by the bios */
 	struct erase_info instr;
-	struct nand_ecclayout oobinfo;
 };
 
 int INFTL_mount(struct INFTLrecord *s);

diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index 58f3ba7..5e0eb7c 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h

@@ -240,8 +240,11 @@
 	   If there is no cache to care about this can be set to NULL. */
 	void (*inval_cache)(struct map_info *, unsigned long, ssize_t);
 
-	/* set_vpp() must handle being reentered -- enable, enable, disable
-	   must leave it enabled. */
+	/* This will be called with 1 as parameter when the first map user
+	 * needs VPP, and called with 0 when the last user exits. The map
+	 * core maintains a reference counter, and assumes that VPP is a
+	 * global resource applying to all mapped flash chips on the system.
+	 */
 	void (*set_vpp)(struct map_info *, int);
 
 	unsigned long pfow_base;

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index cc84923..7712721 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h

@@ -105,7 +105,6 @@
 struct nand_ecclayout {
 	__u32 eccbytes;
 	__u32 eccpos[MTD_MAX_ECCPOS_ENTRIES_LARGE];
-	__u32 oobavail;
 	struct nand_oobfree oobfree[MTD_MAX_OOBFREE_ENTRIES_LARGE];
 };
 
@@ -265,6 +264,11 @@
 	return mtd->dev.of_node;
 }
 
+static inline int mtd_oobavail(struct mtd_info *mtd, struct mtd_oob_ops *ops)
+{
+	return ops->mode == MTD_OPS_AUTO_OOB ? mtd->oobavail : mtd->oobsize;
+}
+
 int mtd_erase(struct mtd_info *mtd, struct erase_info *instr);
 int mtd_point(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen,
 	      void **virt, resource_size_t *phys);

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index bdd68e2..56574ba 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h

@@ -168,6 +168,12 @@
 /* Device supports subpage reads */
 #define NAND_SUBPAGE_READ	0x00001000
 
+/*
+ * Some MLC NANDs need data scrambling to limit bitflips caused by repeated
+ * patterns.
+ */
+#define NAND_NEED_SCRAMBLING	0x00002000
+
 /* Options valid for Samsung large page devices */
 #define NAND_SAMSUNG_LP_OPTIONS NAND_CACHEPRG
 
@@ -666,7 +672,7 @@
 	void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len);
 	void (*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len);
 	void (*select_chip)(struct mtd_info *mtd, int chip);
-	int (*block_bad)(struct mtd_info *mtd, loff_t ofs, int getchip);
+	int (*block_bad)(struct mtd_info *mtd, loff_t ofs);
 	int (*block_markbad)(struct mtd_info *mtd, loff_t ofs);
 	void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl);
 	int (*dev_ready)(struct mtd_info *mtd);
@@ -896,7 +902,6 @@
  * @chip_delay:		R/B delay value in us
  * @options:		Option flags, e.g. 16bit buswidth
  * @bbt_options:	BBT option flags, e.g. NAND_BBT_USE_FLASH
- * @ecclayout:		ECC layout info structure
  * @part_probe_types:	NULL-terminated array of probe types
  */
 struct platform_nand_chip {
@@ -904,7 +909,6 @@
 	int chip_offset;
 	int nr_partitions;
 	struct mtd_partition *partitions;
-	struct nand_ecclayout *ecclayout;
 	int chip_delay;
 	unsigned int options;
 	unsigned int bbt_options;

diff --git a/include/linux/mtd/nand_bch.h b/include/linux/mtd/nand_bch.h
index fb0bc34..98f20ef 100644
--- a/include/linux/mtd/nand_bch.h
+++ b/include/linux/mtd/nand_bch.h

@@ -32,9 +32,7 @@
 /*
  * Initialize BCH encoder/decoder
  */
-struct nand_bch_control *
-nand_bch_init(struct mtd_info *mtd, unsigned int eccsize,
-	      unsigned int eccbytes, struct nand_ecclayout **ecclayout);
+struct nand_bch_control *nand_bch_init(struct mtd_info *mtd);
 /*
  * Release BCH encoder/decoder resources
  */
@@ -58,9 +56,7 @@
 	return -ENOTSUPP;
 }
 
-static inline struct nand_bch_control *
-nand_bch_init(struct mtd_info *mtd, unsigned int eccsize,
-	      unsigned int eccbytes, struct nand_ecclayout **ecclayout)
+static inline struct nand_bch_control *nand_bch_init(struct mtd_info *mtd)
 {
 	return NULL;
 }

diff --git a/include/linux/mtd/nftl.h b/include/linux/mtd/nftl.h
index b059629..044daa0 100644
--- a/include/linux/mtd/nftl.h
+++ b/include/linux/mtd/nftl.h

@@ -50,7 +50,6 @@
         unsigned int nb_blocks;		/* number of physical blocks */
         unsigned int nb_boot_blocks;	/* number of blocks used by the bios */
         struct erase_info instr;
-	struct nand_ecclayout oobinfo;
 };
 
 int NFTL_mount(struct NFTLrecord *s);

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 62356d5..3c36113 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h

@@ -85,6 +85,7 @@
 #define SR_BP0			BIT(2)	/* Block protect 0 */
 #define SR_BP1			BIT(3)	/* Block protect 1 */
 #define SR_BP2			BIT(4)	/* Block protect 2 */
+#define SR_TB			BIT(5)	/* Top/Bottom protect */
 #define SR_SRWD			BIT(7)	/* SR write protect */
 
 #define SR_QUAD_EN_MX		BIT(6)	/* Macronix Quad I/O */
@@ -116,6 +117,7 @@
 
 enum spi_nor_option_flags {
 	SNOR_F_USE_FSR		= BIT(0),
+	SNOR_F_HAS_SR_TB	= BIT(1),
 };
 
 /**

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index be693b3..cb0d5d0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h

@@ -81,8 +81,8 @@
  * function. Real network devices commonly used with qdiscs should only return
  * the driver transmit return codes though - when qdiscs are used, the actual
  * transmission happens asynchronously, so the value is not propagated to
- * higher layers. Virtual network devices transmit synchronously, in this case
- * the driver transmit return codes are consumed by dev_queue_xmit(), all
+ * higher layers. Virtual network devices transmit synchronously; in this case
+ * the driver transmit return codes are consumed by dev_queue_xmit(), and all
  * others are propagated to higher layers.
  */
 
@@ -129,7 +129,7 @@
 }
 
 /*
- *	Compute the worst case header length according to the protocols
+ *	Compute the worst-case header length according to the protocols
  *	used.
  */
 
@@ -246,7 +246,7 @@
 	unsigned long	hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)];
 };
 
-/* Reserve HH_DATA_MOD byte aligned hard_header_len, but at least that much.
+/* Reserve HH_DATA_MOD byte-aligned hard_header_len, but at least that much.
  * Alternative is:
  *   dev->hard_header_len ? (dev->hard_header_len +
  *                           (HH_DATA_MOD - 1)) & ~(HH_DATA_MOD - 1) : 0
@@ -272,7 +272,7 @@
 };
 
 /* These flag bits are private to the generic network queueing
- * layer, they may not be explicitly referenced by any other
+ * layer; they may not be explicitly referenced by any other
  * code.
  */
 
@@ -286,7 +286,7 @@
 
 
 /*
- * This structure holds at boot time configured netdevice settings. They
+ * This structure holds boot-time configured netdevice settings. They
  * are then used in the device probing.
  */
 struct netdev_boot_setup {
@@ -304,7 +304,7 @@
 	/* The poll_list must only be managed by the entity which
 	 * changes the state of the NAPI_STATE_SCHED bit.  This means
 	 * whoever atomically sets that bit can add this napi_struct
-	 * to the per-cpu poll_list, and whoever clears that bit
+	 * to the per-CPU poll_list, and whoever clears that bit
 	 * can remove from the list right before clearing the bit.
 	 */
 	struct list_head	poll_list;
@@ -350,7 +350,7 @@
  * @RX_HANDLER_ANOTHER: Do another round in receive path. This is indicated in
  * case skb->dev was changed by rx_handler.
  * @RX_HANDLER_EXACT: Force exact delivery, no wildcard.
- * @RX_HANDLER_PASS: Do nothing, passe the skb as if no rx_handler was called.
+ * @RX_HANDLER_PASS: Do nothing, pass the skb as if no rx_handler was called.
  *
  * rx_handlers are functions called from inside __netif_receive_skb(), to do
  * special processing of the skb, prior to delivery to protocol handlers.
@@ -365,19 +365,19 @@
  * Upon return, rx_handler is expected to tell __netif_receive_skb() what to
  * do with the skb.
  *
- * If the rx_handler consumed to skb in some way, it should return
+ * If the rx_handler consumed the skb in some way, it should return
  * RX_HANDLER_CONSUMED. This is appropriate when the rx_handler arranged for
- * the skb to be delivered in some other ways.
+ * the skb to be delivered in some other way.
  *
  * If the rx_handler changed skb->dev, to divert the skb to another
  * net_device, it should return RX_HANDLER_ANOTHER. The rx_handler for the
  * new device will be called if it exists.
  *
- * If the rx_handler consider the skb should be ignored, it should return
+ * If the rx_handler decides the skb should be ignored, it should return
  * RX_HANDLER_EXACT. The skb will only be delivered to protocol handlers that
  * are registered on exact device (ptype->dev == skb->dev).
  *
- * If the rx_handler didn't changed skb->dev, but want the skb to be normally
+ * If the rx_handler didn't change skb->dev, but wants the skb to be normally
  * delivered, it should return RX_HANDLER_PASS.
  *
  * A device without a registered rx_handler will behave as if rx_handler
@@ -402,11 +402,11 @@
 }
 
 /**
- *	napi_schedule_prep - check if napi can be scheduled
- *	@n: napi context
+ *	napi_schedule_prep - check if NAPI can be scheduled
+ *	@n: NAPI context
  *
  * Test if NAPI routine is already running, and if not mark
- * it as running.  This is used as a condition variable
+ * it as running.  This is used as a condition variable to
  * insure only one NAPI poll instance runs.  We also make
  * sure there is no pending NAPI disable.
  */
@@ -418,7 +418,7 @@
 
 /**
  *	napi_schedule - schedule NAPI poll
- *	@n: napi context
+ *	@n: NAPI context
  *
  * Schedule NAPI poll routine to be called if it is not already
  * running.
@@ -431,7 +431,7 @@
 
 /**
  *	napi_schedule_irqoff - schedule NAPI poll
- *	@n: napi context
+ *	@n: NAPI context
  *
  * Variant of napi_schedule(), assuming hard irqs are masked.
  */
@@ -455,7 +455,7 @@
 void napi_complete_done(struct napi_struct *n, int work_done);
 /**
  *	napi_complete - NAPI processing complete
- *	@n: napi context
+ *	@n: NAPI context
  *
  * Mark NAPI processing as complete.
  * Consider using napi_complete_done() instead.
@@ -467,32 +467,32 @@
 
 /**
  *	napi_hash_add - add a NAPI to global hashtable
- *	@napi: napi context
+ *	@napi: NAPI context
  *
- * generate a new napi_id and store a @napi under it in napi_hash
- * Used for busy polling (CONFIG_NET_RX_BUSY_POLL)
+ * Generate a new napi_id and store a @napi under it in napi_hash.
+ * Used for busy polling (CONFIG_NET_RX_BUSY_POLL).
  * Note: This is normally automatically done from netif_napi_add(),
- * so might disappear in a future linux version.
+ * so might disappear in a future Linux version.
  */
 void napi_hash_add(struct napi_struct *napi);
 
 /**
  *	napi_hash_del - remove a NAPI from global table
- *	@napi: napi context
+ *	@napi: NAPI context
  *
- * Warning: caller must observe rcu grace period
+ * Warning: caller must observe RCU grace period
  * before freeing memory containing @napi, if
  * this function returns true.
  * Note: core networking stack automatically calls it
- * from netif_napi_del()
+ * from netif_napi_del().
  * Drivers might want to call this helper to combine all
- * the needed rcu grace periods into a single one.
+ * the needed RCU grace periods into a single one.
  */
 bool napi_hash_del(struct napi_struct *napi);
 
 /**
  *	napi_disable - prevent NAPI from scheduling
- *	@n: napi context
+ *	@n: NAPI context
  *
  * Stop NAPI from being scheduled on this context.
  * Waits till any outstanding processing completes.
@@ -501,7 +501,7 @@
 
 /**
  *	napi_enable - enable NAPI scheduling
- *	@n: napi context
+ *	@n: NAPI context
  *
  * Resume NAPI from being scheduled on this context.
  * Must be paired with napi_disable.
@@ -516,7 +516,7 @@
 
 /**
  *	napi_synchronize - wait until NAPI is not running
- *	@n: napi context
+ *	@n: NAPI context
  *
  * Wait until NAPI is done being scheduled on this context.
  * Waits till any outstanding processing completes but
@@ -559,7 +559,7 @@
 
 struct netdev_queue {
 /*
- * read mostly part
+ * read-mostly part
  */
 	struct net_device	*dev;
 	struct Qdisc __rcu	*qdisc;
@@ -571,7 +571,7 @@
 	int			numa_node;
 #endif
 /*
- * write mostly part
+ * write-mostly part
  */
 	spinlock_t		_xmit_lock ____cacheline_aligned_in_smp;
 	int			xmit_lock_owner;
@@ -648,11 +648,11 @@
 /*
  * The rps_sock_flow_table contains mappings of flows to the last CPU
  * on which they were processed by the application (set in recvmsg).
- * Each entry is a 32bit value. Upper part is the high order bits
- * of flow hash, lower part is cpu number.
+ * Each entry is a 32bit value. Upper part is the high-order bits
+ * of flow hash, lower part is CPU number.
  * rps_cpu_mask is used to partition the space, depending on number of
- * possible cpus : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
- * For example, if 64 cpus are possible, rps_cpu_mask = 0x3f,
+ * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
+ * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f,
  * meaning we use 32-6=26 bits for the hash.
  */
 struct rps_sock_flow_table {
@@ -674,7 +674,7 @@
 		unsigned int index = hash & table->mask;
 		u32 val = hash & ~rps_cpu_mask;
 
-		/* We only give a hint, preemption can change cpu under us */
+		/* We only give a hint, preemption can change CPU under us */
 		val |= raw_smp_processor_id();
 
 		if (table->ents[index] != val)
@@ -807,21 +807,21 @@
  * optional and can be filled with a null pointer.
  *
  * int (*ndo_init)(struct net_device *dev);
- *     This function is called once when network device is registered.
- *     The network device can use this to any late stage initializaton
- *     or semantic validattion. It can fail with an error code which will
- *     be propogated back to register_netdev
+ *     This function is called once when a network device is registered.
+ *     The network device can use this for any late stage initialization
+ *     or semantic validation. It can fail with an error code which will
+ *     be propagated back to register_netdev.
  *
  * void (*ndo_uninit)(struct net_device *dev);
  *     This function is called when device is unregistered or when registration
  *     fails. It is not called if init fails.
  *
  * int (*ndo_open)(struct net_device *dev);
- *     This function is called when network device transistions to the up
+ *     This function is called when a network device transitions to the up
  *     state.
  *
  * int (*ndo_stop)(struct net_device *dev);
- *     This function is called when network device transistions to the down
+ *     This function is called when a network device transitions to the down
  *     state.
  *
  * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb,
@@ -832,7 +832,7 @@
  *	corner cases, but the stack really does a non-trivial amount
  *	of useless work if you return NETDEV_TX_BUSY.
  *        (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX)
- *	Required can not be NULL.
+ *	Required; cannot be NULL.
  *
  * netdev_features_t (*ndo_fix_features)(struct net_device *dev,
  *		netdev_features_t features);
@@ -842,34 +842,34 @@
  *
  * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
  *                         void *accel_priv, select_queue_fallback_t fallback);
- *	Called to decide which queue to when device supports multiple
+ *	Called to decide which queue to use when device supports multiple
  *	transmit queues.
  *
  * void (*ndo_change_rx_flags)(struct net_device *dev, int flags);
  *	This function is called to allow device receiver to make
- *	changes to configuration when multicast or promiscious is enabled.
+ *	changes to configuration when multicast or promiscuous is enabled.
  *
  * void (*ndo_set_rx_mode)(struct net_device *dev);
  *	This function is called device changes address list filtering.
  *	If driver handles unicast address filtering, it should set
- *	IFF_UNICAST_FLT to its priv_flags.
+ *	IFF_UNICAST_FLT in its priv_flags.
  *
  * int (*ndo_set_mac_address)(struct net_device *dev, void *addr);
  *	This function  is called when the Media Access Control address
  *	needs to be changed. If this interface is not defined, the
- *	mac address can not be changed.
+ *	MAC address can not be changed.
  *
  * int (*ndo_validate_addr)(struct net_device *dev);
  *	Test if Media Access Control address is valid for the device.
  *
  * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd);
- *	Called when a user request an ioctl which can't be handled by
- *	the generic interface code. If not defined ioctl's return
+ *	Called when a user requests an ioctl which can't be handled by
+ *	the generic interface code. If not defined ioctls return
  *	not supported error code.
  *
  * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map);
  *	Used to set network devices bus interface parameters. This interface
- *	is retained for legacy reason, new devices should use the bus
+ *	is retained for legacy reasons; new devices should use the bus
  *	interface (PCI) for low level management.
  *
  * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu);
@@ -878,7 +878,7 @@
  *	will return an error.
  *
  * void (*ndo_tx_timeout)(struct net_device *dev);
- *	Callback uses when the transmitter has not made any progress
+ *	Callback used when the transmitter has not made any progress
  *	for dev->watchdog ticks.
  *
  * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev,
@@ -896,11 +896,11 @@
  *	   neither operation.
  *
  * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid);
- *	If device support VLAN filtering this function is called when a
+ *	If device supports VLAN filtering this function is called when a
  *	VLAN id is registered.
  *
  * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, __be16 proto, u16 vid);
- *	If device support VLAN filtering this function is called when a
+ *	If device supports VLAN filtering this function is called when a
  *	VLAN id is unregistered.
  *
  * void (*ndo_poll_controller)(struct net_device *dev);
@@ -920,7 +920,7 @@
  *
  *      Enable or disable the VF ability to query its RSS Redirection Table and
  *      Hash Key. This is needed since on some devices VF share this information
- *      with PF and querying it may adduce a theoretical security risk.
+ *      with PF and querying it may introduce a theoretical security risk.
  * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting);
  * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
  * int (*ndo_setup_tc)(struct net_device *dev, u8 tc)
@@ -1030,20 +1030,20 @@
  *
  * void (*ndo_add_vxlan_port)(struct  net_device *dev,
  *			      sa_family_t sa_family, __be16 port);
- *	Called by vxlan to notiy a driver about the UDP port and socket
- *	address family that vxlan is listnening to. It is called only when
+ *	Called by vxlan to notify a driver about the UDP port and socket
+ *	address family that vxlan is listening to. It is called only when
  *	a new port starts listening. The operation is protected by the
  *	vxlan_net->sock_lock.
  *
  * void (*ndo_add_geneve_port)(struct net_device *dev,
- *			      sa_family_t sa_family, __be16 port);
+ *			       sa_family_t sa_family, __be16 port);
  *	Called by geneve to notify a driver about the UDP port and socket
  *	address family that geneve is listnening to. It is called only when
  *	a new port starts listening. The operation is protected by the
  *	geneve_net->sock_lock.
  *
  * void (*ndo_del_geneve_port)(struct net_device *dev,
- *			      sa_family_t sa_family, __be16 port);
+ *			       sa_family_t sa_family, __be16 port);
  *	Called by geneve to notify the driver about a UDP port and socket
  *	address family that geneve is not listening to anymore. The operation
  *	is protected by the geneve_net->sock_lock.
@@ -1072,9 +1072,9 @@
  *	Callback to use for xmit over the accelerated station. This
  *	is used in place of ndo_start_xmit on accelerated net
  *	devices.
- * netdev_features_t (*ndo_features_check) (struct sk_buff *skb,
- *					    struct net_device *dev
- *					    netdev_features_t features);
+ * netdev_features_t (*ndo_features_check)(struct sk_buff *skb,
+ *					   struct net_device *dev
+ *					   netdev_features_t features);
  *	Called by core transmit path to determine if device is capable of
  *	performing offload operations on a given packet. This is to give
  *	the device an opportunity to implement any restrictions that cannot
@@ -1088,7 +1088,7 @@
  * int (*ndo_get_iflink)(const struct net_device *dev);
  *	Called to get the iflink value of this device.
  * void (*ndo_change_proto_down)(struct net_device *dev,
- *				  bool proto_down);
+ *				 bool proto_down);
  *	This function is used to pass protocol port error state information
  *	to the switch driver. The switch driver can react to the proto_down
  *      by doing a phys down on the associated switch port.
@@ -1100,7 +1100,7 @@
  *	This function is used to specify the headroom that the skb must
  *	consider when allocation skb during packet reception. Setting
  *	appropriate rx headroom value allows avoiding skb head copy on
- *	forward. Setting a negative value reset the rx headroom to the
+ *	forward. Setting a negative value resets the rx headroom to the
  *	default value.
  *
  */
@@ -1176,6 +1176,9 @@
 						   struct nlattr *port[]);
 	int			(*ndo_get_vf_port)(struct net_device *dev,
 						   int vf, struct sk_buff *skb);
+	int			(*ndo_set_vf_guid)(struct net_device *dev,
+						   int vf, u64 guid,
+						   int guid_type);
 	int			(*ndo_set_vf_rss_query_en)(
 						   struct net_device *dev,
 						   int vf, bool setting);
@@ -1296,7 +1299,7 @@
  *
  * These are the &struct net_device, they are only set internally
  * by drivers and used in the kernel. These flags are invisible to
- * userspace, this means that the order of these flags can change
+ * userspace; this means that the order of these flags can change
  * during any kernel release.
  *
  * You should have a pretty good reason to be extending these flags.
@@ -1320,6 +1323,10 @@
  * @IFF_LIVE_ADDR_CHANGE: device supports hardware address
  *	change when it's running
  * @IFF_MACVLAN: Macvlan device
+ * @IFF_XMIT_DST_RELEASE_PERM: IFF_XMIT_DST_RELEASE not taking into account
+ *	underlying stacked devices
+ * @IFF_IPVLAN_MASTER: IPvlan master device
+ * @IFF_IPVLAN_SLAVE: IPvlan slave device
  * @IFF_L3MDEV_MASTER: device is an L3 master device
  * @IFF_NO_QUEUE: device can run without qdisc attached
  * @IFF_OPENVSWITCH: device is a Open vSwitch master
@@ -1410,10 +1417,12 @@
  *
  *	@state:		Generic network queuing layer state, see netdev_state_t
  *	@dev_list:	The global list of network devices
- *	@napi_list:	List entry, that is used for polling napi devices
- *	@unreg_list:	List entry, that is used, when we are unregistering the
- *			device, see the function unregister_netdev
- *	@close_list:	List entry, that is used, when we are closing the device
+ *	@napi_list:	List entry used for polling NAPI devices
+ *	@unreg_list:	List entry  when we are unregistering the
+ *			device; see the function unregister_netdev
+ *	@close_list:	List entry used when we are closing the device
+ *	@ptype_all:     Device-specific packet handlers for all protocols
+ *	@ptype_specific: Device-specific, protocol-specific packet handlers
  *
  *	@adj_list:	Directly linked devices, like slaves for bonding
  *	@all_adj_list:	All linked devices, *including* neighbours
@@ -1431,7 +1440,7 @@
  *	@mpls_features:	Mask of features inheritable by MPLS
  *
  *	@ifindex:	interface index
- *	@group:		The group, that the device belongs to
+ *	@group:		The group the device belongs to
  *
  *	@stats:		Statistics struct, which was left as a legacy, use
  *			rtnl_link_stats64 instead
@@ -1485,7 +1494,7 @@
  * 	@dev_port:		Used to differentiate devices that share
  * 				the same function
  *	@addr_list_lock:	XXX: need comments on this one
- *	@uc_promisc:		Counter, that indicates, that promiscuous mode
+ *	@uc_promisc:		Counter that indicates promiscuous mode
  *				has been enabled due to the need to listen to
  *				additional unicast addresses in a device that
  *				does not implement ndo_set_rx_mode()
@@ -1493,9 +1502,9 @@
  *	@mc:			multicast mac addresses
  *	@dev_addrs:		list of device hw addresses
  *	@queues_kset:		Group of all Kobjects in the Tx and RX queues
- *	@promiscuity:		Number of times, the NIC is told to work in
- *				Promiscuous mode, if it becomes 0 the NIC will
- *				exit from working in Promiscuous mode
+ *	@promiscuity:		Number of times the NIC is told to work in
+ *				promiscuous mode; if it becomes 0 the NIC will
+ *				exit promiscuous mode
  *	@allmulti:		Counter, enables or disables allmulticast mode
  *
  *	@vlan_info:	VLAN info
@@ -1541,7 +1550,7 @@
  *
  *	@trans_start:		Time (in jiffies) of last Tx
  *	@watchdog_timeo:	Represents the timeout that is used by
- *				the watchdog ( see dev_watchdog() )
+ *				the watchdog (see dev_watchdog())
  *	@watchdog_timer:	List of timers
  *
  *	@pcpu_refcnt:		Number of references to this device
@@ -1658,8 +1667,8 @@
 	atomic_long_t		rx_nohandler;
 
 #ifdef CONFIG_WIRELESS_EXT
-	const struct iw_handler_def *	wireless_handlers;
-	struct iw_public_data *	wireless_data;
+	const struct iw_handler_def *wireless_handlers;
+	struct iw_public_data	*wireless_data;
 #endif
 	const struct net_device_ops *netdev_ops;
 	const struct ethtool_ops *ethtool_ops;
@@ -1712,7 +1721,7 @@
 	unsigned int		allmulti;
 
 
-	/* Protocol specific pointers */
+	/* Protocol-specific pointers */
 
 #if IS_ENABLED(CONFIG_VLAN_8021Q)
 	struct vlan_info __rcu	*vlan_info;
@@ -1742,13 +1751,11 @@
 	/* Interface address info used in eth_type_trans() */
 	unsigned char		*dev_addr;
 
-
 #ifdef CONFIG_SYSFS
 	struct netdev_rx_queue	*_rx;
 
 	unsigned int		num_rx_queues;
 	unsigned int		real_num_rx_queues;
-
 #endif
 
 	unsigned long		gro_flush_timeout;
@@ -1840,7 +1847,7 @@
 	struct garp_port __rcu	*garp_port;
 	struct mrp_port __rcu	*mrp_port;
 
-	struct device	dev;
+	struct device		dev;
 	const struct attribute_group *sysfs_groups[4];
 	const struct attribute_group *sysfs_rx_queue_group;
 
@@ -1855,9 +1862,9 @@
 #ifdef CONFIG_DCB
 	const struct dcbnl_rtnl_ops *dcbnl_ops;
 #endif
-	u8 num_tc;
-	struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
-	u8 prio_tc_map[TC_BITMASK + 1];
+	u8			num_tc;
+	struct netdev_tc_txq	tc_to_txq[TC_MAX_QUEUE];
+	u8			prio_tc_map[TC_BITMASK + 1];
 
 #if IS_ENABLED(CONFIG_FCOE)
 	unsigned int		fcoe_ddp_xid;
@@ -1865,9 +1872,9 @@
 #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
 	struct netprio_map __rcu *priomap;
 #endif
-	struct phy_device *phydev;
-	struct lock_class_key *qdisc_tx_busylock;
-	bool proto_down;
+	struct phy_device	*phydev;
+	struct lock_class_key	*qdisc_tx_busylock;
+	bool			proto_down;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
@@ -2015,7 +2022,7 @@
 
 /* Set the sysfs device type for the network logical device to allow
  * fine-grained identification of different network device types. For
- * example Ethernet, Wirelss LAN, Bluetooth, WiMAX etc.
+ * example Ethernet, Wireless LAN, Bluetooth, WiMAX etc.
  */
 #define SET_NETDEV_DEVTYPE(net, devtype)	((net)->dev.type = (devtype))
 
@@ -2025,22 +2032,22 @@
 #define NAPI_POLL_WEIGHT 64
 
 /**
- *	netif_napi_add - initialize a napi context
+ *	netif_napi_add - initialize a NAPI context
  *	@dev:  network device
- *	@napi: napi context
+ *	@napi: NAPI context
  *	@poll: polling function
  *	@weight: default weight
  *
- * netif_napi_add() must be used to initialize a napi context prior to calling
- * *any* of the other napi related functions.
+ * netif_napi_add() must be used to initialize a NAPI context prior to calling
+ * *any* of the other NAPI-related functions.
  */
 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 		    int (*poll)(struct napi_struct *, int), int weight);
 
 /**
- *	netif_tx_napi_add - initialize a napi context
+ *	netif_tx_napi_add - initialize a NAPI context
  *	@dev:  network device
- *	@napi: napi context
+ *	@napi: NAPI context
  *	@poll: polling function
  *	@weight: default weight
  *
@@ -2058,22 +2065,22 @@
 }
 
 /**
- *  netif_napi_del - remove a napi context
- *  @napi: napi context
+ *  netif_napi_del - remove a NAPI context
+ *  @napi: NAPI context
  *
- *  netif_napi_del() removes a napi context from the network device napi list
+ *  netif_napi_del() removes a NAPI context from the network device NAPI list
  */
 void netif_napi_del(struct napi_struct *napi);
 
 struct napi_gro_cb {
 	/* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */
-	void *frag0;
+	void	*frag0;
 
 	/* Length of frag0. */
 	unsigned int frag0_len;
 
 	/* This indicates where we are processing relative to skb->data. */
-	int data_offset;
+	int	data_offset;
 
 	/* This is non-zero if the packet cannot be merged with the new skb. */
 	u16	flush;
@@ -2096,8 +2103,8 @@
 	/* This is non-zero if the packet may be of the same flow. */
 	u8	same_flow:1;
 
-	/* Used in udp_gro_receive */
-	u8	udp_mark:1;
+	/* Used in tunnel GRO receive */
+	u8	encap_mark:1;
 
 	/* GRO checksum is valid */
 	u8	csum_valid:1;
@@ -2169,7 +2176,7 @@
 	struct udp_offload_callbacks callbacks;
 };
 
-/* often modified stats are per cpu, other are shared (netdev->stats) */
+/* often modified stats are per-CPU, other are shared (netdev->stats) */
 struct pcpu_sw_netstats {
 	u64     rx_packets;
 	u64     rx_bytes;
@@ -2266,7 +2273,7 @@
 	struct netdev_notifier_info info; /* must be first */
 	struct net_device *upper_dev; /* new upper dev */
 	bool master; /* is upper dev master */
-	bool linking; /* is the nofication for link or unlink */
+	bool linking; /* is the notification for link or unlink */
 	void *upper_info; /* upper dev info */
 };
 
@@ -2731,7 +2738,7 @@
 #endif /* CONFIG_NET_FLOW_LIMIT */
 
 /*
- * Incoming packets are placed on per-cpu queues
+ * Incoming packets are placed on per-CPU queues
  */
 struct softnet_data {
 	struct list_head	poll_list;
@@ -2901,7 +2908,7 @@
  *	@dev_queue: pointer to transmit queue
  *
  * BQL enabled drivers might use this helper in their ndo_start_xmit(),
- * to give appropriate hint to the cpu.
+ * to give appropriate hint to the CPU.
  */
 static inline void netdev_txq_bql_enqueue_prefetchw(struct netdev_queue *dev_queue)
 {
@@ -2915,7 +2922,7 @@
  *	@dev_queue: pointer to transmit queue
  *
  * BQL enabled drivers might use this helper in their TX completion path,
- * to give appropriate hint to the cpu.
+ * to give appropriate hint to the CPU.
  */
 static inline void netdev_txq_bql_complete_prefetchw(struct netdev_queue *dev_queue)
 {
@@ -3054,7 +3061,7 @@
 }
 
 /*
- * Routines to manage the subqueues on a device.  We only need start
+ * Routines to manage the subqueues on a device.  We only need start,
  * stop, and a check if it's stopped.  All other device management is
  * done at the overall netdevice level.
  * Also test the device if we're multiqueue.
@@ -3338,7 +3345,6 @@
  * in a "pending" state, waiting for some external event.  For "on-
  * demand" interfaces, this new state identifies the situation where the
  * interface is waiting for events to place it in the up state.
- *
  */
 static inline void netif_dormant_on(struct net_device *dev)
 {
@@ -3673,7 +3679,7 @@
  *
  *  Add newly added addresses to the interface, and release
  *  addresses that have been deleted.
- **/
+ */
 static inline int __dev_uc_sync(struct net_device *dev,
 				int (*sync)(struct net_device *,
 					    const unsigned char *),
@@ -3689,7 +3695,7 @@
  *  @unsync: function to call if address should be removed
  *
  *  Remove all addresses that were added to the device by dev_uc_sync().
- **/
+ */
 static inline void __dev_uc_unsync(struct net_device *dev,
 				   int (*unsync)(struct net_device *,
 						 const unsigned char *))
@@ -3717,7 +3723,7 @@
  *
  *  Add newly added addresses to the interface, and release
  *  addresses that have been deleted.
- **/
+ */
 static inline int __dev_mc_sync(struct net_device *dev,
 				int (*sync)(struct net_device *,
 					    const unsigned char *),
@@ -3733,7 +3739,7 @@
  *  @unsync: function to call if address should be removed
  *
  *  Remove all addresses that were added to the device by dev_mc_sync().
- **/
+ */
 static inline void __dev_mc_unsync(struct net_device *dev,
 				   int (*unsync)(struct net_device *,
 						 const unsigned char *))

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 0e1f433..f48b8a6 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h

@@ -234,6 +234,10 @@
 	spinlock_t lock;
 	/* References to the set */
 	u32 ref;
+	/* References to the set for netlink events like dump,
+	 * ref can be swapped out by ip_set_swap
+	 */
+	u32 ref_netlink;
 	/* The core set type */
 	struct ip_set_type *type;
 	/* The type variant doing the real job */

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index d6f9b4e..0114334 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h

@@ -529,6 +529,7 @@
 	LAYOUT_OSD2_OBJECTS = 2,
 	LAYOUT_BLOCK_VOLUME = 3,
 	LAYOUT_FLEX_FILES = 4,
+	LAYOUT_SCSI = 5,
 	LAYOUT_TYPE_MAX
 };
 
@@ -555,6 +556,7 @@
 	PNFS_BLOCK_VOLUME_SLICE		= 1,
 	PNFS_BLOCK_VOLUME_CONCAT	= 2,
 	PNFS_BLOCK_VOLUME_STRIPE	= 3,
+	PNFS_BLOCK_VOLUME_SCSI		= 4,
 };
 
 enum pnfs_block_extent_state {
@@ -568,6 +570,23 @@
 #define PNFS_BLOCK_EXTENT_SIZE \
 	(7 * sizeof(__be32) + NFS4_DEVICEID4_SIZE)
 
+/* on the wire size of a scsi commit range */
+#define PNFS_SCSI_RANGE_SIZE \
+	(4 * sizeof(__be32))
+
+enum scsi_code_set {
+	PS_CODE_SET_BINARY	= 1,
+	PS_CODE_SET_ASCII	= 2,
+	PS_CODE_SET_UTF8	= 3
+};
+
+enum scsi_designator_type {
+	PS_DESIGNATOR_T10	= 1,
+	PS_DESIGNATOR_EUI64	= 2,
+	PS_DESIGNATOR_NAA	= 3,
+	PS_DESIGNATOR_NAME	= 8
+};
+
 #define NFL4_UFLG_MASK			0x0000003F
 #define NFL4_UFLG_DENSE			0x00000001
 #define NFL4_UFLG_COMMIT_THRU_MDS	0x00000002

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index f2f650f..957049f 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h

@@ -41,8 +41,8 @@
 	struct page		*wb_page;	/* page to read in/write out */
 	struct nfs_open_context	*wb_context;	/* File state context info */
 	struct nfs_lock_context	*wb_lock_context;	/* lock context info */
-	pgoff_t			wb_index;	/* Offset >> PAGE_CACHE_SHIFT */
-	unsigned int		wb_offset,	/* Offset & ~PAGE_CACHE_MASK */
+	pgoff_t			wb_index;	/* Offset >> PAGE_SHIFT */
+	unsigned int		wb_offset,	/* Offset & ~PAGE_MASK */
 				wb_pgbase,	/* Start of page data */
 				wb_bytes;	/* Length of request */
 	struct kref		wb_kref;	/* reference count */
@@ -184,7 +184,7 @@
 static inline
 loff_t req_offset(struct nfs_page *req)
 {
-	return (((loff_t)req->wb_index) << PAGE_CACHE_SHIFT) + req->wb_offset;
+	return (((loff_t)req->wb_index) << PAGE_SHIFT) + req->wb_offset;
 }
 
 #endif /* _LINUX_NFS_PAGE_H */

diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index 9abb763..e9fcf90 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h

@@ -331,7 +331,7 @@
 {
 	unsigned len = le16_to_cpu(dlen);
 
-#if !defined(__KERNEL__) || (PAGE_CACHE_SIZE >= 65536)
+#if !defined(__KERNEL__) || (PAGE_SIZE >= 65536)
 	if (len == NILFS_MAX_REC_LEN)
 		return 1 << 16;
 #endif
@@ -340,7 +340,7 @@
 
 static inline __le16 nilfs_rec_len_to_disk(unsigned len)
 {
-#if !defined(__KERNEL__) || (PAGE_CACHE_SIZE >= 65536)
+#if !defined(__KERNEL__) || (PAGE_SIZE >= 65536)
 	if (len == (1 << 16))
 		return cpu_to_le16(NILFS_MAX_REC_LEN);
 	else if (len > (1 << 16))

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 7ec5b86..4630eea 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h

@@ -65,7 +65,6 @@
 #endif
 
 #ifdef CONFIG_LOCKUP_DETECTOR
-int hw_nmi_is_cpu_stuck(struct pt_regs *);
 u64 hw_nmi_get_sample_period(int watchdog_thresh);
 extern int nmi_watchdog_enabled;
 extern int soft_watchdog_enabled;

diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index f798e2a..6f47562 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h

@@ -284,7 +284,7 @@
 		/* ops->db_read_mask			&& */
 		ops->db_set_mask			&&
 		ops->db_clear_mask			&&
-		ops->peer_db_addr			&&
+		/* ops->peer_db_addr			&& */
 		/* ops->peer_db_read			&& */
 		ops->peer_db_set			&&
 		/* ops->peer_db_clear			&& */
@@ -295,7 +295,7 @@
 		ops->spad_count				&&
 		ops->spad_read				&&
 		ops->spad_write				&&
-		ops->peer_spad_addr			&&
+		/* ops->peer_spad_addr			&& */
 		/* ops->peer_spad_read			&& */
 		ops->peer_spad_write			&&
 		1;
@@ -757,6 +757,9 @@
 				   phys_addr_t *db_addr,
 				   resource_size_t *db_size)
 {
+	if (!ntb->ops->peer_db_addr)
+		return -EINVAL;
+
 	return ntb->ops->peer_db_addr(ntb, db_addr, db_size);
 }
 
@@ -948,6 +951,9 @@
 static inline int ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
 				     phys_addr_t *spad_addr)
 {
+	if (!ntb->ops->peer_spad_addr)
+		return -EINVAL;
+
 	return ntb->ops->peer_spad_addr(ntb, idx, spad_addr);
 }
 

diff --git a/include/linux/oom.h b/include/linux/oom.h
index 03e6257..628a432 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h

@@ -76,8 +76,6 @@
 		struct mem_cgroup *memcg, const nodemask_t *nodemask,
 		unsigned long totalpages);
 
-extern int oom_kills_count(void);
-extern void note_oom_kill(void);
 extern void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 			     unsigned int points, unsigned long totalpages,
 			     struct mem_cgroup *memcg, const char *message);
@@ -91,7 +89,7 @@
 
 extern bool out_of_memory(struct oom_control *oc);
 
-extern void exit_oom_victim(void);
+extern void exit_oom_victim(struct task_struct *tsk);
 
 extern int register_oom_notifier(struct notifier_block *nb);
 extern int unregister_oom_notifier(struct notifier_block *nb);

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 1ebd65c..7e1ab15 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h

@@ -86,21 +86,6 @@
 				(__force unsigned long)mask;
 }
 
-/*
- * The page cache can be done in larger chunks than
- * one page, because it allows for more efficient
- * throughput (it can then be mapped into user
- * space in smaller chunks for same flexibility).
- *
- * Or rather, it _will_ be done in larger chunks.
- */
-#define PAGE_CACHE_SHIFT	PAGE_SHIFT
-#define PAGE_CACHE_SIZE		PAGE_SIZE
-#define PAGE_CACHE_MASK		PAGE_MASK
-#define PAGE_CACHE_ALIGN(addr)	(((addr)+PAGE_CACHE_SIZE-1)&PAGE_CACHE_MASK)
-
-#define page_cache_get(page)		get_page(page)
-#define page_cache_release(page)	put_page(page)
 void release_pages(struct page **pages, int nr, bool cold);
 
 /*
@@ -390,13 +375,13 @@
 		return page->index << compound_order(page);
 
 	if (likely(!PageTransTail(page)))
-		return page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+		return page->index;
 
 	/*
 	 *  We don't initialize ->index for tail pages: calculate based on
 	 *  head page
 	 */
-	pgoff = compound_head(page)->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	pgoff = compound_head(page)->index;
 	pgoff += page - compound_head(page);
 	return pgoff;
 }
@@ -406,12 +391,12 @@
  */
 static inline loff_t page_offset(struct page *page)
 {
-	return ((loff_t)page->index) << PAGE_CACHE_SHIFT;
+	return ((loff_t)page->index) << PAGE_SHIFT;
 }
 
 static inline loff_t page_file_offset(struct page *page)
 {
-	return ((loff_t)page_file_index(page)) << PAGE_CACHE_SHIFT;
+	return ((loff_t)page_file_index(page)) << PAGE_SHIFT;
 }
 
 extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
@@ -425,7 +410,7 @@
 		return linear_hugepage_index(vma, address);
 	pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
 	pgoff += vma->vm_pgoff;
-	return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	return pgoff;
 }
 
 extern void __lock_page(struct page *page);
@@ -535,8 +520,7 @@
 /*
  * Fault a userspace page into pagetables.  Return non-zero on a fault.
  *
- * This assumes that two userspace pages are always sufficient.  That's
- * not true if PAGE_CACHE_SIZE > PAGE_SIZE.
+ * This assumes that two userspace pages are always sufficient.
  */
 static inline int fault_in_pages_writeable(char __user *uaddr, int size)
 {
@@ -671,8 +655,8 @@
 
 static inline unsigned long dir_pages(struct inode *inode)
 {
-	return (unsigned long)(inode->i_size + PAGE_CACHE_SIZE - 1) >>
-			       PAGE_CACHE_SHIFT;
+	return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >>
+			       PAGE_SHIFT;
 }
 
 #endif /* _LINUX_PAGEMAP_H */

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 78fda2a..f291275 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h

@@ -121,6 +121,7 @@
 		struct { /* intel_cqm */
 			int			cqm_state;
 			u32			cqm_rmid;
+			int			is_group_event;
 			struct list_head	cqm_events_entry;
 			struct list_head	cqm_groups_entry;
 			struct list_head	cqm_group_entry;
@@ -128,6 +129,10 @@
 		struct { /* itrace */
 			int			itrace_started;
 		};
+		struct { /* amd_power */
+			u64	pwr_acc;
+			u64	ptsc;
+		};
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 		struct { /* breakpoint */
 			/*

diff --git a/include/linux/platform_data/mtd-nand-s3c2410.h b/include/linux/platform_data/mtd-nand-s3c2410.h
index 36bb921..c55e42e 100644
--- a/include/linux/platform_data/mtd-nand-s3c2410.h
+++ b/include/linux/platform_data/mtd-nand-s3c2410.h

@@ -40,7 +40,6 @@
 	char			*name;
 	int			*nr_map;
 	struct mtd_partition	*partitions;
-	struct nand_ecclayout	*ecc_layout;
 };
 
 struct s3c2410_platform_nand {

diff --git a/include/linux/pm_clock.h b/include/linux/pm_clock.h
index 25266c6..308d604 100644
--- a/include/linux/pm_clock.h
+++ b/include/linux/pm_clock.h

@@ -42,7 +42,9 @@
 extern void pm_clk_destroy(struct device *dev);
 extern int pm_clk_add(struct device *dev, const char *con_id);
 extern int pm_clk_add_clk(struct device *dev, struct clk *clk);
+extern int of_pm_clk_add_clks(struct device *dev);
 extern void pm_clk_remove(struct device *dev, const char *con_id);
+extern void pm_clk_remove_clk(struct device *dev, struct clk *clk);
 extern int pm_clk_suspend(struct device *dev);
 extern int pm_clk_resume(struct device *dev);
 #else
@@ -69,11 +71,18 @@
 {
 	return -EINVAL;
 }
+static inline int of_pm_clk_add_clks(struct device *dev)
+{
+	return -EINVAL;
+}
 static inline void pm_clk_remove(struct device *dev, const char *con_id)
 {
 }
 #define pm_clk_suspend	NULL
 #define pm_clk_resume	NULL
+static inline void pm_clk_remove_clk(struct device *dev, struct clk *clk)
+{
+}
 #endif
 
 #ifdef CONFIG_HAVE_CLK

diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index 3ec5309..ac6d872 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h

@@ -42,6 +42,13 @@
 	BUG();
 }
 
+static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src,
+		size_t n)
+{
+	BUG();
+	return -EFAULT;
+}
+
 static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
 		struct iov_iter *i)
 {
@@ -66,14 +73,17 @@
 #endif
 
 /*
- * Architectures that define ARCH_HAS_PMEM_API must provide
- * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(),
- * arch_copy_from_iter_pmem(), arch_clear_pmem(), arch_wb_cache_pmem()
- * and arch_has_wmb_pmem().
+ * memcpy_from_pmem - read from persistent memory with error handling
+ * @dst: destination buffer
+ * @src: source buffer
+ * @size: transfer length
+ *
+ * Returns 0 on success negative error code on failure.
  */
-static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
+static inline int memcpy_from_pmem(void *dst, void __pmem const *src,
+		size_t size)
 {
-	memcpy(dst, (void __force const *) src, size);
+	return arch_memcpy_from_pmem(dst, src, size);
 }
 
 static inline bool arch_has_pmem_api(void)

diff --git a/include/linux/rio.h b/include/linux/rio.h
index cde976e..aa23238 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h

@@ -137,6 +137,13 @@
 	int (*em_handle) (struct rio_dev *dev, u8 swport);
 };
 
+enum rio_device_state {
+	RIO_DEVICE_INITIALIZING,
+	RIO_DEVICE_RUNNING,
+	RIO_DEVICE_GONE,
+	RIO_DEVICE_SHUTDOWN,
+};
+
 /**
  * struct rio_dev - RIO device info
  * @global_list: Node in list of all RIO devices
@@ -165,6 +172,7 @@
  * @destid: Network destination ID (or associated destid for switch)
  * @hopcount: Hopcount to this device
  * @prev: Previous RIO device connected to the current one
+ * @state: device state
  * @rswitch: struct rio_switch (if valid for this device)
  */
 struct rio_dev {
@@ -194,6 +202,7 @@
 	u16 destid;
 	u8 hopcount;
 	struct rio_dev *prev;
+	atomic_t state;
 	struct rio_switch rswitch[0];	/* RIO switch info */
 };
 
@@ -202,6 +211,7 @@
 #define	to_rio_dev(n) container_of(n, struct rio_dev, dev)
 #define sw_to_rio_dev(n) container_of(n, struct rio_dev, rswitch[0])
 #define	to_rio_mport(n) container_of(n, struct rio_mport, dev)
+#define	to_rio_net(n) container_of(n, struct rio_net, dev)
 
 /**
  * struct rio_msg - RIO message event
@@ -235,8 +245,11 @@
 /**
  * struct rio_mport - RIO master port info
  * @dbells: List of doorbell events
+ * @pwrites: List of portwrite events
  * @node: Node in global list of master ports
  * @nnode: Node in network list of master ports
+ * @net: RIO net this mport is attached to
+ * @lock: lock to synchronize lists manipulations
  * @iores: I/O mem resource that this master port interface owns
  * @riores: RIO resources that this master port interfaces owns
  * @inb_msg: RIO inbound message event descriptors
@@ -253,11 +266,16 @@
  * @priv: Master port private data
  * @dma: DMA device associated with mport
  * @nscan: RapidIO network enumeration/discovery operations
+ * @state: mport device state
+ * @pwe_refcnt: port-write enable ref counter to track enable/disable requests
  */
 struct rio_mport {
 	struct list_head dbells;	/* list of doorbell events */
+	struct list_head pwrites;	/* list of portwrite events */
 	struct list_head node;	/* node in global list of ports */
 	struct list_head nnode;	/* node in net list of ports */
+	struct rio_net *net;	/* RIO net this mport is attached to */
+	struct mutex lock;
 	struct resource iores;
 	struct resource riores[RIO_MAX_MPORT_RESOURCES];
 	struct rio_msg inb_msg[RIO_MAX_MBOX];
@@ -280,20 +298,20 @@
 	struct dma_device	dma;
 #endif
 	struct rio_scan *nscan;
+	atomic_t state;
+	unsigned int pwe_refcnt;
 };
 
+static inline int rio_mport_is_running(struct rio_mport *mport)
+{
+	return atomic_read(&mport->state) == RIO_DEVICE_RUNNING;
+}
+
 /*
  * Enumeration/discovery control flags
  */
 #define RIO_SCAN_ENUM_NO_WAIT	0x00000001 /* Do not wait for enum completed */
 
-struct rio_id_table {
-	u16 start;	/* logical minimal id */
-	u32 max;	/* max number of IDs in table */
-	spinlock_t lock;
-	unsigned long *table;
-};
-
 /**
  * struct rio_net - RIO network info
  * @node: Node in global list of RIO networks
@@ -302,7 +320,9 @@
  * @mports: List of master ports accessing this network
  * @hport: Default port for accessing this network
  * @id: RIO network ID
- * @destid_table: destID allocation table
+ * @dev: Device object
+ * @enum_data: private data specific to a network enumerator
+ * @release: enumerator-specific release callback
  */
 struct rio_net {
 	struct list_head node;	/* node in list of networks */
@@ -311,7 +331,53 @@
 	struct list_head mports;	/* list of ports accessing net */
 	struct rio_mport *hport;	/* primary port for accessing net */
 	unsigned char id;	/* RIO network ID */
-	struct rio_id_table destid_table;  /* destID allocation table */
+	struct device dev;
+	void *enum_data;	/* private data for enumerator of the network */
+	void (*release)(struct rio_net *net);
+};
+
+enum rio_link_speed {
+	RIO_LINK_DOWN = 0, /* SRIO Link not initialized */
+	RIO_LINK_125 = 1, /* 1.25 GBaud  */
+	RIO_LINK_250 = 2, /* 2.5 GBaud   */
+	RIO_LINK_312 = 3, /* 3.125 GBaud */
+	RIO_LINK_500 = 4, /* 5.0 GBaud   */
+	RIO_LINK_625 = 5  /* 6.25 GBaud  */
+};
+
+enum rio_link_width {
+	RIO_LINK_1X  = 0,
+	RIO_LINK_1XR = 1,
+	RIO_LINK_2X  = 3,
+	RIO_LINK_4X  = 2,
+	RIO_LINK_8X  = 4,
+	RIO_LINK_16X = 5
+};
+
+enum rio_mport_flags {
+	RIO_MPORT_DMA	 = (1 << 0), /* supports DMA data transfers */
+	RIO_MPORT_DMA_SG = (1 << 1), /* DMA supports HW SG mode */
+	RIO_MPORT_IBSG	 = (1 << 2), /* inbound mapping supports SG */
+};
+
+/**
+ * struct rio_mport_attr - RIO mport device attributes
+ * @flags: mport device capability flags
+ * @link_speed: SRIO link speed value (as defined by RapidIO specification)
+ * @link_width:	SRIO link width value (as defined by RapidIO specification)
+ * @dma_max_sge: number of SG list entries that can be handled by DMA channel(s)
+ * @dma_max_size: max number of bytes in single DMA transfer (SG entry)
+ * @dma_align: alignment shift for DMA operations (as for other DMA operations)
+ */
+struct rio_mport_attr {
+	int flags;
+	int link_speed;
+	int link_width;
+
+	/* DMA capability info: valid only if RIO_MPORT_DMA flag is set */
+	int dma_max_sge;
+	int dma_max_size;
+	int dma_align;
 };
 
 /* Low-level architecture-dependent routines */
@@ -333,6 +399,9 @@
  * @get_inb_message: Callback to get a message from an inbound mailbox queue.
  * @map_inb: Callback to map RapidIO address region into local memory space.
  * @unmap_inb: Callback to unmap RapidIO address region mapped with map_inb().
+ * @query_mport: Callback to query mport device attributes.
+ * @map_outb: Callback to map outbound address region into local memory space.
+ * @unmap_outb: Callback to unmap outbound RapidIO address region.
  */
 struct rio_ops {
 	int (*lcread) (struct rio_mport *mport, int index, u32 offset, int len,
@@ -358,6 +427,11 @@
 	int (*map_inb)(struct rio_mport *mport, dma_addr_t lstart,
 			u64 rstart, u32 size, u32 flags);
 	void (*unmap_inb)(struct rio_mport *mport, dma_addr_t lstart);
+	int (*query_mport)(struct rio_mport *mport,
+			   struct rio_mport_attr *attr);
+	int (*map_outb)(struct rio_mport *mport, u16 destid, u64 rstart,
+			u32 size, u32 flags, dma_addr_t *laddr);
+	void (*unmap_outb)(struct rio_mport *mport, u16 destid, u64 rstart);
 };
 
 #define RIO_RESOURCE_MEM	0x00000100
@@ -376,6 +450,7 @@
  * @id_table: RIO device ids to be associated with this driver
  * @probe: RIO device inserted
  * @remove: RIO device removed
+ * @shutdown: shutdown notification callback
  * @suspend: RIO device suspended
  * @resume: RIO device awakened
  * @enable_wake: RIO device enable wake event
@@ -390,6 +465,7 @@
 	const struct rio_device_id *id_table;
 	int (*probe) (struct rio_dev * dev, const struct rio_device_id * id);
 	void (*remove) (struct rio_dev * dev);
+	void (*shutdown)(struct rio_dev *dev);
 	int (*suspend) (struct rio_dev * dev, u32 state);
 	int (*resume) (struct rio_dev * dev);
 	int (*enable_wake) (struct rio_dev * dev, u32 state, int enable);
@@ -476,10 +552,14 @@
 };
 
 /* Architecture and hardware-specific functions */
+extern int rio_mport_initialize(struct rio_mport *);
 extern int rio_register_mport(struct rio_mport *);
+extern int rio_unregister_mport(struct rio_mport *);
 extern int rio_open_inb_mbox(struct rio_mport *, void *, int, int);
 extern void rio_close_inb_mbox(struct rio_mport *, int);
 extern int rio_open_outb_mbox(struct rio_mport *, void *, int, int);
 extern void rio_close_outb_mbox(struct rio_mport *, int);
+extern int rio_query_mport(struct rio_mport *port,
+			   struct rio_mport_attr *mport_attr);
 
 #endif				/* LINUX_RIO_H */

diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
index 9fc2f21..0834264 100644
--- a/include/linux/rio_drv.h
+++ b/include/linux/rio_drv.h

@@ -369,12 +369,24 @@
 extern int rio_map_inb_region(struct rio_mport *mport, dma_addr_t local,
 			u64 rbase, u32 size, u32 rflags);
 extern void rio_unmap_inb_region(struct rio_mport *mport, dma_addr_t lstart);
+extern int rio_map_outb_region(struct rio_mport *mport, u16 destid, u64 rbase,
+			u32 size, u32 rflags, dma_addr_t *local);
+extern void rio_unmap_outb_region(struct rio_mport *mport,
+				  u16 destid, u64 rstart);
 
 /* Port-Write management */
 extern int rio_request_inb_pwrite(struct rio_dev *,
 			int (*)(struct rio_dev *, union rio_pw_msg*, int));
 extern int rio_release_inb_pwrite(struct rio_dev *);
-extern int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg);
+extern int rio_add_mport_pw_handler(struct rio_mport *mport, void *dev_id,
+			int (*pwcback)(struct rio_mport *mport, void *dev_id,
+			union rio_pw_msg *msg, int step));
+extern int rio_del_mport_pw_handler(struct rio_mport *mport, void *dev_id,
+			int (*pwcback)(struct rio_mport *mport, void *dev_id,
+			union rio_pw_msg *msg, int step));
+extern int rio_inb_pwrite_handler(struct rio_mport *mport,
+				  union rio_pw_msg *pw_msg);
+extern void rio_pw_enable(struct rio_mport *mport, int enable);
 
 /* LDM support */
 int rio_register_driver(struct rio_driver *);
@@ -435,6 +447,7 @@
 
 /* Misc driver helpers */
 extern u16 rio_local_get_device_id(struct rio_mport *port);
+extern void rio_local_set_device_id(struct rio_mport *port, u16 did);
 extern struct rio_dev *rio_get_device(u16 vid, u16 did, struct rio_dev *from);
 extern struct rio_dev *rio_get_asm(u16 vid, u16 did, u16 asm_vid, u16 asm_did,
 				   struct rio_dev *from);

diff --git a/include/linux/rio_mport_cdev.h b/include/linux/rio_mport_cdev.h
new file mode 100644
index 0000000..b65d19d
--- /dev/null
+++ b/include/linux/rio_mport_cdev.h

@@ -0,0 +1,271 @@
+/*
+ * Copyright (c) 2015-2016, Integrated Device Technology Inc.
+ * Copyright (c) 2015, Prodrive Technologies
+ * Copyright (c) 2015, Texas Instruments Incorporated
+ * Copyright (c) 2015, RapidIO Trade Association
+ * All rights reserved.
+ *
+ * This software is available to you under a choice of one of two licenses.
+ * You may choose to be licensed under the terms of the GNU General Public
+ * License(GPL) Version 2, or the BSD-3 Clause license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RIO_MPORT_CDEV_H_
+#define _RIO_MPORT_CDEV_H_
+
+#ifndef __user
+#define __user
+#endif
+
+struct rio_mport_maint_io {
+	uint32_t rioid;		/* destID of remote device */
+	uint32_t hopcount;	/* hopcount to remote device */
+	uint32_t offset;	/* offset in register space */
+	size_t length;		/* length in bytes */
+	void __user *buffer;	/* data buffer */
+};
+
+/*
+ * Definitions for RapidIO data transfers:
+ * - memory mapped (MAPPED)
+ * - packet generation from memory (TRANSFER)
+ */
+#define RIO_TRANSFER_MODE_MAPPED	(1 << 0)
+#define RIO_TRANSFER_MODE_TRANSFER	(1 << 1)
+#define RIO_CAP_DBL_SEND		(1 << 2)
+#define RIO_CAP_DBL_RECV		(1 << 3)
+#define RIO_CAP_PW_SEND			(1 << 4)
+#define RIO_CAP_PW_RECV			(1 << 5)
+#define RIO_CAP_MAP_OUTB		(1 << 6)
+#define RIO_CAP_MAP_INB			(1 << 7)
+
+struct rio_mport_properties {
+	uint16_t hdid;
+	uint8_t id;			/* Physical port ID */
+	uint8_t  index;
+	uint32_t flags;
+	uint32_t sys_size;		/* Default addressing size */
+	uint8_t  port_ok;
+	uint8_t  link_speed;
+	uint8_t  link_width;
+	uint32_t dma_max_sge;
+	uint32_t dma_max_size;
+	uint32_t dma_align;
+	uint32_t transfer_mode;		/* Default transfer mode */
+	uint32_t cap_sys_size;		/* Capable system sizes */
+	uint32_t cap_addr_size;		/* Capable addressing sizes */
+	uint32_t cap_transfer_mode;	/* Capable transfer modes */
+	uint32_t cap_mport;		/* Mport capabilities */
+};
+
+/*
+ * Definitions for RapidIO events;
+ * - incoming port-writes
+ * - incoming doorbells
+ */
+#define RIO_DOORBELL	(1 << 0)
+#define RIO_PORTWRITE	(1 << 1)
+
+struct rio_doorbell {
+	uint32_t rioid;
+	uint16_t payload;
+};
+
+struct rio_doorbell_filter {
+	uint32_t rioid;			/* 0xffffffff to match all ids */
+	uint16_t low;
+	uint16_t high;
+};
+
+
+struct rio_portwrite {
+	uint32_t payload[16];
+};
+
+struct rio_pw_filter {
+	uint32_t mask;
+	uint32_t low;
+	uint32_t high;
+};
+
+/* RapidIO base address for inbound requests set to value defined below
+ * indicates that no specific RIO-to-local address translation is requested
+ * and driver should use direct (one-to-one) address mapping.
+*/
+#define RIO_MAP_ANY_ADDR	(uint64_t)(~((uint64_t) 0))
+
+struct rio_mmap {
+	uint32_t rioid;
+	uint64_t rio_addr;
+	uint64_t length;
+	uint64_t handle;
+	void *address;
+};
+
+struct rio_dma_mem {
+	uint64_t length;		/* length of DMA memory */
+	uint64_t dma_handle;		/* handle associated with this memory */
+	void *buffer;			/* pointer to this memory */
+};
+
+
+struct rio_event {
+	unsigned int header;	/* event type RIO_DOORBELL or RIO_PORTWRITE */
+	union {
+		struct rio_doorbell doorbell;	/* header for RIO_DOORBELL */
+		struct rio_portwrite portwrite; /* header for RIO_PORTWRITE */
+	} u;
+};
+
+enum rio_transfer_sync {
+	RIO_TRANSFER_SYNC,	/* synchronous transfer */
+	RIO_TRANSFER_ASYNC,	/* asynchronous transfer */
+	RIO_TRANSFER_FAF,	/* fire-and-forget transfer */
+};
+
+enum rio_transfer_dir {
+	RIO_TRANSFER_DIR_READ,	/* Read operation */
+	RIO_TRANSFER_DIR_WRITE,	/* Write operation */
+};
+
+/*
+ * RapidIO data exchange transactions are lists of individual transfers. Each
+ * transfer exchanges data between two RapidIO devices by remote direct memory
+ * access and has its own completion code.
+ *
+ * The RapidIO specification defines four types of data exchange requests:
+ * NREAD, NWRITE, SWRITE and NWRITE_R. The RapidIO DMA channel interface allows
+ * to specify the required type of write operation or combination of them when
+ * only the last data packet requires response.
+ *
+ * NREAD:    read up to 256 bytes from remote device memory into local memory
+ * NWRITE:   write up to 256 bytes from local memory to remote device memory
+ *           without confirmation
+ * SWRITE:   as NWRITE, but all addresses and payloads must be 64-bit aligned
+ * NWRITE_R: as NWRITE, but expect acknowledgment from remote device.
+ *
+ * The default exchange is chosen from NREAD and any of the WRITE modes as the
+ * driver sees fit. For write requests the user can explicitly choose between
+ * any of the write modes for each transaction.
+ */
+enum rio_exchange {
+	RIO_EXCHANGE_DEFAULT,	/* Default method */
+	RIO_EXCHANGE_NWRITE,	/* All packets using NWRITE */
+	RIO_EXCHANGE_SWRITE,	/* All packets using SWRITE */
+	RIO_EXCHANGE_NWRITE_R,	/* Last packet NWRITE_R, others NWRITE */
+	RIO_EXCHANGE_SWRITE_R,	/* Last packet NWRITE_R, others SWRITE */
+	RIO_EXCHANGE_NWRITE_R_ALL, /* All packets using NWRITE_R */
+};
+
+struct rio_transfer_io {
+	uint32_t rioid;			/* Target destID */
+	uint64_t rio_addr;		/* Address in target's RIO mem space */
+	enum rio_exchange method;	/* Data exchange method */
+	void __user *loc_addr;
+	uint64_t handle;
+	uint64_t offset;		/* Offset in buffer */
+	uint64_t length;		/* Length in bytes */
+	uint32_t completion_code;	/* Completion code for this transfer */
+};
+
+struct rio_transaction {
+	uint32_t transfer_mode;		/* Data transfer mode */
+	enum rio_transfer_sync sync;	/* Synchronization method */
+	enum rio_transfer_dir dir;	/* Transfer direction */
+	size_t count;			/* Number of transfers */
+	struct rio_transfer_io __user *block;	/* Array of <count> transfers */
+};
+
+struct rio_async_tx_wait {
+	uint32_t token;		/* DMA transaction ID token */
+	uint32_t timeout;	/* Wait timeout in msec, if 0 use default TO */
+};
+
+#define RIO_MAX_DEVNAME_SZ	20
+
+struct rio_rdev_info {
+	uint32_t destid;
+	uint8_t hopcount;
+	uint32_t comptag;
+	char name[RIO_MAX_DEVNAME_SZ + 1];
+};
+
+/* Driver IOCTL codes */
+#define RIO_MPORT_DRV_MAGIC           'm'
+
+#define RIO_MPORT_MAINT_HDID_SET	\
+	_IOW(RIO_MPORT_DRV_MAGIC, 1, uint16_t)
+#define RIO_MPORT_MAINT_COMPTAG_SET	\
+	_IOW(RIO_MPORT_DRV_MAGIC, 2, uint32_t)
+#define RIO_MPORT_MAINT_PORT_IDX_GET	\
+	_IOR(RIO_MPORT_DRV_MAGIC, 3, uint32_t)
+#define RIO_MPORT_GET_PROPERTIES \
+	_IOR(RIO_MPORT_DRV_MAGIC, 4, struct rio_mport_properties)
+#define RIO_MPORT_MAINT_READ_LOCAL \
+	_IOR(RIO_MPORT_DRV_MAGIC, 5, struct rio_mport_maint_io)
+#define RIO_MPORT_MAINT_WRITE_LOCAL \
+	_IOW(RIO_MPORT_DRV_MAGIC, 6, struct rio_mport_maint_io)
+#define RIO_MPORT_MAINT_READ_REMOTE \
+	_IOR(RIO_MPORT_DRV_MAGIC, 7, struct rio_mport_maint_io)
+#define RIO_MPORT_MAINT_WRITE_REMOTE \
+	_IOW(RIO_MPORT_DRV_MAGIC, 8, struct rio_mport_maint_io)
+#define RIO_ENABLE_DOORBELL_RANGE	\
+	_IOW(RIO_MPORT_DRV_MAGIC, 9, struct rio_doorbell_filter)
+#define RIO_DISABLE_DOORBELL_RANGE	\
+	_IOW(RIO_MPORT_DRV_MAGIC, 10, struct rio_doorbell_filter)
+#define RIO_ENABLE_PORTWRITE_RANGE	\
+	_IOW(RIO_MPORT_DRV_MAGIC, 11, struct rio_pw_filter)
+#define RIO_DISABLE_PORTWRITE_RANGE	\
+	_IOW(RIO_MPORT_DRV_MAGIC, 12, struct rio_pw_filter)
+#define RIO_SET_EVENT_MASK		\
+	_IOW(RIO_MPORT_DRV_MAGIC, 13, unsigned int)
+#define RIO_GET_EVENT_MASK		\
+	_IOR(RIO_MPORT_DRV_MAGIC, 14, unsigned int)
+#define RIO_MAP_OUTBOUND \
+	_IOWR(RIO_MPORT_DRV_MAGIC, 15, struct rio_mmap)
+#define RIO_UNMAP_OUTBOUND \
+	_IOW(RIO_MPORT_DRV_MAGIC, 16, struct rio_mmap)
+#define RIO_MAP_INBOUND \
+	_IOWR(RIO_MPORT_DRV_MAGIC, 17, struct rio_mmap)
+#define RIO_UNMAP_INBOUND \
+	_IOW(RIO_MPORT_DRV_MAGIC, 18, uint64_t)
+#define RIO_ALLOC_DMA \
+	_IOWR(RIO_MPORT_DRV_MAGIC, 19, struct rio_dma_mem)
+#define RIO_FREE_DMA \
+	_IOW(RIO_MPORT_DRV_MAGIC, 20, uint64_t)
+#define RIO_TRANSFER \
+	_IOWR(RIO_MPORT_DRV_MAGIC, 21, struct rio_transaction)
+#define RIO_WAIT_FOR_ASYNC \
+	_IOW(RIO_MPORT_DRV_MAGIC, 22, struct rio_async_tx_wait)
+#define RIO_DEV_ADD \
+	_IOW(RIO_MPORT_DRV_MAGIC, 23, struct rio_rdev_info)
+#define RIO_DEV_DEL \
+	_IOW(RIO_MPORT_DRV_MAGIC, 24, struct rio_rdev_info)
+
+#endif /* _RIO_MPORT_CDEV_H_ */

diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h
index 218168a..1063ae3 100644
--- a/include/linux/rio_regs.h
+++ b/include/linux/rio_regs.h

@@ -238,6 +238,8 @@
 #define  RIO_PORT_N_ACK_INBOUND		0x3f000000
 #define  RIO_PORT_N_ACK_OUTSTAND	0x00003f00
 #define  RIO_PORT_N_ACK_OUTBOUND	0x0000003f
+#define RIO_PORT_N_CTL2_CSR(x)		(0x0054 + x*0x20)
+#define  RIO_PORT_N_CTL2_SEL_BAUD	0xf0000000
 #define RIO_PORT_N_ERR_STS_CSR(x)	(0x0058 + x*0x20)
 #define  RIO_PORT_N_ERR_STS_PW_OUT_ES	0x00010000 /* Output Error-stopped */
 #define  RIO_PORT_N_ERR_STS_PW_INP_ES	0x00000100 /* Input Error-stopped */
@@ -249,6 +251,7 @@
 #define  RIO_PORT_N_CTL_PWIDTH		0xc0000000
 #define  RIO_PORT_N_CTL_PWIDTH_1	0x00000000
 #define  RIO_PORT_N_CTL_PWIDTH_4	0x40000000
+#define  RIO_PORT_N_CTL_IPW		0x38000000 /* Initialized Port Width */
 #define  RIO_PORT_N_CTL_P_TYP_SER	0x00000001
 #define  RIO_PORT_N_CTL_LOCKOUT		0x00000002
 #define  RIO_PORT_N_CTL_EN_RX_SER	0x00200000

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 084ed9f..52c4847 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h

@@ -51,6 +51,7 @@
 #include <linux/resource.h>
 #include <linux/timer.h>
 #include <linux/hrtimer.h>
+#include <linux/kcov.h>
 #include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
 #include <linux/cred.h>
@@ -425,6 +426,7 @@
 extern signed long schedule_timeout_interruptible(signed long timeout);
 extern signed long schedule_timeout_killable(signed long timeout);
 extern signed long schedule_timeout_uninterruptible(signed long timeout);
+extern signed long schedule_timeout_idle(signed long timeout);
 asmlinkage void schedule(void);
 extern void schedule_preempt_disabled(void);
 
@@ -718,7 +720,7 @@
 	struct task_cputime cputime_expires;
 
 #ifdef CONFIG_NO_HZ_FULL
-	unsigned long tick_dep_mask;
+	atomic_t tick_dep_mask;
 #endif
 
 	struct list_head cpu_timers[3];
@@ -1547,7 +1549,7 @@
 #endif
 
 #ifdef CONFIG_NO_HZ_FULL
-	unsigned long tick_dep_mask;
+	atomic_t tick_dep_mask;
 #endif
 	unsigned long nvcsw, nivcsw; /* context switch counts */
 	u64 start_time;		/* monotonic time in nsec */
@@ -1818,6 +1820,16 @@
 	/* bitmask and counter of trace recursion */
 	unsigned long trace_recursion;
 #endif /* CONFIG_TRACING */
+#ifdef CONFIG_KCOV
+	/* Coverage collection mode enabled for this task (0 if disabled). */
+	enum kcov_mode kcov_mode;
+	/* Size of the kcov_area. */
+	unsigned	kcov_size;
+	/* Buffer for coverage collection. */
+	void		*kcov_area;
+	/* kcov desciptor wired with this task or NULL. */
+	struct kcov	*kcov;
+#endif
 #ifdef CONFIG_MEMCG
 	struct mem_cgroup *memcg_in_oom;
 	gfp_t memcg_oom_gfp_mask;
@@ -1837,6 +1849,9 @@
 	unsigned long	task_state_change;
 #endif
 	int pagefault_disabled;
+#ifdef CONFIG_MMU
+	struct task_struct *oom_reaper_list;
+#endif
 /* CPU-specific state of this task */
 	struct thread_struct thread;
 /*
@@ -2859,10 +2874,18 @@
 	unsigned long *n = end_of_stack(p);
 
 	do { 	/* Skip over canary */
+# ifdef CONFIG_STACK_GROWSUP
+		n--;
+# else
 		n++;
+# endif
 	} while (!*n);
 
+# ifdef CONFIG_STACK_GROWSUP
+	return (unsigned long)end_of_stack(p) - (unsigned long)n;
+# else
 	return (unsigned long)n - (unsigned long)end_of_stack(p);
+# endif
 }
 #endif
 extern void set_task_stack_end_magic(struct task_struct *tsk);

diff --git a/include/linux/slab.h b/include/linux/slab.h
index e4b5687..508bd82 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h

@@ -92,6 +92,12 @@
 # define SLAB_ACCOUNT		0x00000000UL
 #endif
 
+#ifdef CONFIG_KASAN
+#define SLAB_KASAN		0x08000000UL
+#else
+#define SLAB_KASAN		0x00000000UL
+#endif
+
 /* The following flags affect the page allocator grouping pages by mobility */
 #define SLAB_RECLAIM_ACCOUNT	0x00020000UL		/* Objects are reclaimable */
 #define SLAB_TEMPORARY		SLAB_RECLAIM_ACCOUNT	/* Objects are short-lived */
@@ -370,7 +376,7 @@
 {
 	void *ret = kmem_cache_alloc(s, flags);
 
-	kasan_kmalloc(s, ret, size);
+	kasan_kmalloc(s, ret, size, flags);
 	return ret;
 }
 
@@ -381,7 +387,7 @@
 {
 	void *ret = kmem_cache_alloc_node(s, gfpflags, node);
 
-	kasan_kmalloc(s, ret, size);
+	kasan_kmalloc(s, ret, size, gfpflags);
 	return ret;
 }
 #endif /* CONFIG_TRACING */

diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index e878ba3..9edbbf3 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h

@@ -76,8 +76,22 @@
 #ifdef CONFIG_MEMCG
 	struct memcg_cache_params memcg_params;
 #endif
+#ifdef CONFIG_KASAN
+	struct kasan_cache kasan_info;
+#endif
 
 	struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
+static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
+				void *x) {
+	void *object = x - (x - page->s_mem) % cache->size;
+	void *last_object = page->s_mem + (cache->num - 1) * cache->size;
+
+	if (unlikely(object > last_object))
+		return last_object;
+	else
+		return object;
+}
+
 #endif	/* _LINUX_SLAB_DEF_H */

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index ac5143f..665cd0c 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h

@@ -130,4 +130,15 @@
 void object_err(struct kmem_cache *s, struct page *page,
 		u8 *object, char *reason);
 
+static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
+				void *x) {
+	void *object = x - (x - page_address(page)) % cache->size;
+	void *last_object = page_address(page) +
+		(page->objects - 1) * cache->size;
+	if (unlikely(object > last_object))
+		return last_object;
+	else
+		return object;
+}
+
 #endif /* _LINUX_SLUB_DEF_H */

diff --git a/include/linux/soc/ti/ti-msgmgr.h b/include/linux/soc/ti/ti-msgmgr.h
new file mode 100644
index 0000000..eac8e0c
--- /dev/null
+++ b/include/linux/soc/ti/ti-msgmgr.h

@@ -0,0 +1,35 @@
+/*
+ * Texas Instruments' Message Manager
+ *
+ * Copyright (C) 2015-2016 Texas Instruments Incorporated - http://www.ti.com/
+ *	Nishanth Menon
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef TI_MSGMGR_H
+#define TI_MSGMGR_H
+
+/**
+ * struct ti_msgmgr_message - Message Manager structure
+ * @len: Length of data in the Buffer
+ * @buf: Buffer pointer
+ *
+ * This is the structure for data used in mbox_send_message
+ * the length of data buffer used depends on the SoC integration
+ * parameters - each message may be 64, 128 bytes long depending
+ * on SoC. Client is supposed to be aware of this.
+ */
+struct ti_msgmgr_message {
+	size_t len;
+	u8 *buf;
+};
+
+#endif /* TI_MSGMGR_H */

diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
new file mode 100644
index 0000000..7978b3e
--- /dev/null
+++ b/include/linux/stackdepot.h

@@ -0,0 +1,32 @@
+/*
+ * A generic stack depot implementation
+ *
+ * Author: Alexander Potapenko <glider@google.com>
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * Based on code by Dmitry Chernenkov.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_STACKDEPOT_H
+#define _LINUX_STACKDEPOT_H
+
+typedef u32 depot_stack_handle_t;
+
+struct stack_trace;
+
+depot_stack_handle_t depot_save_stack(struct stack_trace *trace, gfp_t flags);
+
+void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace);
+
+#endif

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 4bcf5a6..e6bc30a 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h

@@ -108,7 +108,6 @@
 };
 
 struct plat_stmmacenet_data {
-	char *phy_bus_name;
 	int bus_id;
 	int phy_addr;
 	int interface;

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 1ecf13e..6a241a2 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h

@@ -21,10 +21,17 @@
 #include <linux/utsname.h>
 
 /*
+ * Maximum size of AUTH_NONE authentication information, in XDR words.
+ */
+#define NUL_CALLSLACK	(4)
+#define NUL_REPLYSLACK	(2)
+
+/*
  * Size of the nodename buffer. RFC1831 specifies a hard limit of 255 bytes,
  * but Linux hostnames are actually limited to __NEW_UTS_LEN bytes.
  */
 #define UNX_MAXNODENAME	__NEW_UTS_LEN
+#define UNX_CALLSLACK	(21 + XDR_QUADLEN(UNX_MAXNODENAME))
 
 struct rpcsec_gss_info;
 

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 131032f..9a7ddba 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h

@@ -25,6 +25,7 @@
 #include <asm/signal.h>
 #include <linux/path.h>
 #include <net/ipv6.h>
+#include <linux/sunrpc/xprtmultipath.h>
 
 struct rpc_inode;
 
@@ -67,6 +68,7 @@
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	struct dentry		*cl_debugfs;	/* debugfs directory */
 #endif
+	struct rpc_xprt_iter	cl_xpi;
 };
 
 /*
@@ -139,7 +141,6 @@
 					struct rpc_xprt *xprt);
 struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
 				const struct rpc_program *, u32);
-void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt);
 struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
 struct rpc_clnt *rpc_clone_client_set_auth(struct rpc_clnt *,
 				rpc_authflavor_t);
@@ -181,6 +182,21 @@
 const char	*rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
 int		rpc_localaddr(struct rpc_clnt *, struct sockaddr *, size_t);
 
+int 		rpc_clnt_iterate_for_each_xprt(struct rpc_clnt *clnt,
+			int (*fn)(struct rpc_clnt *, struct rpc_xprt *, void *),
+			void *data);
+
+int 		rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
+			struct rpc_xprt_switch *xps,
+			struct rpc_xprt *xprt,
+			void *dummy);
+int		rpc_clnt_add_xprt(struct rpc_clnt *, struct xprt_create *,
+			int (*setup)(struct rpc_clnt *,
+				struct rpc_xprt_switch *,
+				struct rpc_xprt *,
+				void *),
+			void *data);
+
 const char *rpc_proc_name(const struct rpc_task *task);
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */

diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h
index f33c5a4..3b1ff38 100644
--- a/include/linux/sunrpc/rpc_rdma.h
+++ b/include/linux/sunrpc/rpc_rdma.h

@@ -93,6 +93,12 @@
 			__be32 rm_pempty[3];	/* 3 empty chunk lists */
 		} rm_padded;
 
+		struct {
+			__be32 rm_err;
+			__be32 rm_vers_low;
+			__be32 rm_vers_high;
+		} rm_error;
+
 		__be32 rm_chunks[0];	/* read, write and reply chunks */
 
 	} rm_body;
@@ -102,17 +108,13 @@
  * Smallest RPC/RDMA header: rm_xid through rm_type, then rm_nochunks
  */
 #define RPCRDMA_HDRLEN_MIN	(sizeof(__be32) * 7)
+#define RPCRDMA_HDRLEN_ERR	(sizeof(__be32) * 5)
 
 enum rpcrdma_errcode {
 	ERR_VERS = 1,
 	ERR_CHUNK = 2
 };
 
-struct rpcrdma_err_vers {
-	uint32_t rdma_vers_low;	/* Version range supported by peer */
-	uint32_t rdma_vers_high;
-};
-
 enum rpcrdma_proc {
 	RDMA_MSG = 0,		/* An RPC call or reply msg */
 	RDMA_NOMSG = 1,		/* An RPC call or reply msg - separate body */

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index d703f0e..05a1809 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h

@@ -42,40 +42,43 @@
  */
 struct rpc_task {
 	atomic_t		tk_count;	/* Reference count */
+	int			tk_status;	/* result of last operation */
 	struct list_head	tk_task;	/* global list of tasks */
-	struct rpc_clnt *	tk_client;	/* RPC client */
-	struct rpc_rqst *	tk_rqstp;	/* RPC request */
-
-	/*
-	 * RPC call state
-	 */
-	struct rpc_message	tk_msg;		/* RPC call info */
 
 	/*
 	 * callback	to be executed after waking up
 	 * action	next procedure for async tasks
-	 * tk_ops	caller callbacks
 	 */
 	void			(*tk_callback)(struct rpc_task *);
 	void			(*tk_action)(struct rpc_task *);
-	const struct rpc_call_ops *tk_ops;
-	void *			tk_calldata;
 
 	unsigned long		tk_timeout;	/* timeout for rpc_sleep() */
 	unsigned long		tk_runstate;	/* Task run status */
-	struct workqueue_struct	*tk_workqueue;	/* Normally rpciod, but could
-						 * be any workqueue
-						 */
+
 	struct rpc_wait_queue 	*tk_waitqueue;	/* RPC wait queue we're on */
 	union {
 		struct work_struct	tk_work;	/* Async task work queue */
 		struct rpc_wait		tk_wait;	/* RPC wait */
 	} u;
 
+	/*
+	 * RPC call state
+	 */
+	struct rpc_message	tk_msg;		/* RPC call info */
+	void *			tk_calldata;	/* Caller private data */
+	const struct rpc_call_ops *tk_ops;	/* Caller callbacks */
+
+	struct rpc_clnt *	tk_client;	/* RPC client */
+	struct rpc_xprt *	tk_xprt;	/* Transport */
+
+	struct rpc_rqst *	tk_rqstp;	/* RPC request */
+
+	struct workqueue_struct	*tk_workqueue;	/* Normally rpciod, but could
+						 * be any workqueue
+						 */
 	ktime_t			tk_start;	/* RPC task init timestamp */
 
 	pid_t			tk_owner;	/* Process id for batching tasks */
-	int			tk_status;	/* result of last operation */
 	unsigned short		tk_flags;	/* misc flags */
 	unsigned short		tk_timeouts;	/* maj timeouts */
 
@@ -100,6 +103,7 @@
 struct rpc_task_setup {
 	struct rpc_task *task;
 	struct rpc_clnt *rpc_client;
+	struct rpc_xprt *rpc_xprt;
 	const struct rpc_message *rpc_message;
 	const struct rpc_call_ops *callback_ops;
 	void *callback_data;

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index cc0fc71..7ca44fb 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h

@@ -129,7 +129,7 @@
  *
  * These happen to all be powers of 2, which is not strictly
  * necessary but helps enforce the real limitation, which is
- * that they should be multiples of PAGE_CACHE_SIZE.
+ * that they should be multiples of PAGE_SIZE.
  *
  * For UDP transports, a block plus NFS,RPC, and UDP headers
  * has to fit into the IP datagram limit of 64K.  The largest

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 5322fea..3081339 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h

@@ -75,8 +75,10 @@
 	struct svc_rdma_fastreg_mr *frmr;
 	int hdr_count;
 	struct xdr_buf arg;
+	struct ib_cqe cqe;
+	struct ib_cqe reg_cqe;
+	struct ib_cqe inv_cqe;
 	struct list_head dto_q;
-	enum ib_wr_opcode wr_op;
 	enum ib_wc_status wc_status;
 	u32 byte_len;
 	u32 position;
@@ -174,8 +176,6 @@
 	struct work_struct   sc_work;
 };
 /* sc_flags */
-#define RDMAXPRT_RQ_PENDING	1
-#define RDMAXPRT_SQ_PENDING	2
 #define RDMAXPRT_CONN_PENDING	3
 
 #define RPCRDMA_LISTEN_BACKLOG  10
@@ -199,7 +199,7 @@
 				    struct xdr_buf *rcvbuf);
 
 /* svc_rdma_marshal.c */
-extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
+extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg *, struct svc_rqst *);
 extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
 				     struct rpcrdma_msg *,
 				     enum rpcrdma_errcode, __be32 *);
@@ -224,16 +224,22 @@
 
 /* svc_rdma_sendto.c */
 extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *,
-			    struct svc_rdma_req_map *);
+			    struct svc_rdma_req_map *, bool);
 extern int svc_rdma_sendto(struct svc_rqst *);
 extern struct rpcrdma_read_chunk *
 	svc_rdma_get_read_chunk(struct rpcrdma_msg *);
+extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
+				int);
 
 /* svc_rdma_transport.c */
+extern void svc_rdma_wc_send(struct ib_cq *, struct ib_wc *);
+extern void svc_rdma_wc_write(struct ib_cq *, struct ib_wc *);
+extern void svc_rdma_wc_reg(struct ib_cq *, struct ib_wc *);
+extern void svc_rdma_wc_read(struct ib_cq *, struct ib_wc *);
+extern void svc_rdma_wc_inv(struct ib_cq *, struct ib_wc *);
 extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
-extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
-				enum rpcrdma_errcode);
 extern int svc_rdma_post_recv(struct svcxprt_rdma *, gfp_t);
+extern int svc_rdma_repost_recv(struct svcxprt_rdma *, gfp_t);
 extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
 extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
 extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 69ef5b3..fb0d212 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h

@@ -13,6 +13,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/ktime.h>
+#include <linux/kref.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/msg_prot.h>
@@ -166,7 +167,7 @@
 };
 
 struct rpc_xprt {
-	atomic_t		count;		/* Reference count */
+	struct kref		kref;		/* Reference count */
 	struct rpc_xprt_ops *	ops;		/* transport methods */
 
 	const struct rpc_timeout *timeout;	/* timeout parms */
@@ -197,6 +198,11 @@
 	unsigned int		bind_index;	/* bind function index */
 
 	/*
+	 * Multipath
+	 */
+	struct list_head	xprt_switch;
+
+	/*
 	 * Connection of transports
 	 */
 	unsigned long		bind_timeout,
@@ -256,6 +262,7 @@
 	struct dentry		*debugfs;		/* debugfs directory */
 	atomic_t		inject_disconnect;
 #endif
+	struct rcu_head		rcu;
 };
 
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
@@ -318,24 +325,13 @@
 void			xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
 void			xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
 void			xprt_release(struct rpc_task *task);
+struct rpc_xprt *	xprt_get(struct rpc_xprt *xprt);
 void			xprt_put(struct rpc_xprt *xprt);
 struct rpc_xprt *	xprt_alloc(struct net *net, size_t size,
 				unsigned int num_prealloc,
 				unsigned int max_req);
 void			xprt_free(struct rpc_xprt *);
 
-/**
- * xprt_get - return a reference to an RPC transport.
- * @xprt: pointer to the transport
- *
- */
-static inline struct rpc_xprt *xprt_get(struct rpc_xprt *xprt)
-{
-	if (atomic_inc_not_zero(&xprt->count))
-		return xprt;
-	return NULL;
-}
-
 static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p)
 {
 	return p + xprt->tsh_size;

diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h
new file mode 100644
index 0000000..5a9acff
--- /dev/null
+++ b/include/linux/sunrpc/xprtmultipath.h

@@ -0,0 +1,69 @@
+/*
+ * RPC client multipathing definitions
+ *
+ * Copyright (c) 2015, 2016, Primary Data, Inc. All rights reserved.
+ *
+ * Trond Myklebust <trond.myklebust@primarydata.com>
+ */
+#ifndef _NET_SUNRPC_XPRTMULTIPATH_H
+#define _NET_SUNRPC_XPRTMULTIPATH_H
+
+struct rpc_xprt_iter_ops;
+struct rpc_xprt_switch {
+	spinlock_t		xps_lock;
+	struct kref		xps_kref;
+
+	unsigned int		xps_nxprts;
+	struct list_head	xps_xprt_list;
+
+	struct net *		xps_net;
+
+	const struct rpc_xprt_iter_ops *xps_iter_ops;
+
+	struct rcu_head		xps_rcu;
+};
+
+struct rpc_xprt_iter {
+	struct rpc_xprt_switch __rcu *xpi_xpswitch;
+	struct rpc_xprt *	xpi_cursor;
+
+	const struct rpc_xprt_iter_ops *xpi_ops;
+};
+
+
+struct rpc_xprt_iter_ops {
+	void (*xpi_rewind)(struct rpc_xprt_iter *);
+	struct rpc_xprt *(*xpi_xprt)(struct rpc_xprt_iter *);
+	struct rpc_xprt *(*xpi_next)(struct rpc_xprt_iter *);
+};
+
+extern struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt,
+		gfp_t gfp_flags);
+
+extern struct rpc_xprt_switch *xprt_switch_get(struct rpc_xprt_switch *xps);
+extern void xprt_switch_put(struct rpc_xprt_switch *xps);
+
+extern void rpc_xprt_switch_set_roundrobin(struct rpc_xprt_switch *xps);
+
+extern void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps,
+		struct rpc_xprt *xprt);
+extern void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps,
+		struct rpc_xprt *xprt);
+
+extern void xprt_iter_init(struct rpc_xprt_iter *xpi,
+		struct rpc_xprt_switch *xps);
+
+extern void xprt_iter_init_listall(struct rpc_xprt_iter *xpi,
+		struct rpc_xprt_switch *xps);
+
+extern void xprt_iter_destroy(struct rpc_xprt_iter *xpi);
+
+extern struct rpc_xprt_switch *xprt_iter_xchg_switch(
+		struct rpc_xprt_iter *xpi,
+		struct rpc_xprt_switch *newswitch);
+
+extern struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi);
+extern struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi);
+extern struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi);
+
+#endif

diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h
index b7b279b..767190b 100644
--- a/include/linux/sunrpc/xprtrdma.h
+++ b/include/linux/sunrpc/xprtrdma.h

@@ -54,8 +54,6 @@
 
 #define RPCRDMA_DEF_INLINE  (1024)	/* default inline max */
 
-#define RPCRDMA_INLINE_PAD_THRESH  (512)/* payload threshold to pad (bytes) */
-
 /* Memory registration strategies, by number.
  * This is part of a kernel / user space API. Do not remove. */
 enum rpcrdma_memreg {

diff --git a/include/linux/swap.h b/include/linux/swap.h
index d18b65c..2b83359 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h

@@ -433,9 +433,9 @@
 #define si_swapinfo(val) \
 	do { (val)->freeswap = (val)->totalswap = 0; } while (0)
 /* only sparc can not include linux/pagemap.h in this file
- * so leave page_cache_release and release_pages undeclared... */
+ * so leave put_page and release_pages undeclared... */
 #define free_page_and_swap_cache(page) \
-	page_cache_release(page)
+	put_page(page)
 #define free_pages_and_swap_cache(pages, nr) \
 	release_pages((pages), (nr), false);
 

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index e13a1ac..a55d052 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h

@@ -156,6 +156,7 @@
  * @trip_hyst_attrs:	attributes for trip points for sysfs: trip hysteresis
  * @devdata:	private pointer for device private data
  * @trips:	number of trip points the thermal zone supports
+ * @trips_disabled;	bitmap for disabled trips
  * @passive_delay:	number of milliseconds to wait between polls when
  *			performing passive cooling.
  * @polling_delay:	number of milliseconds to wait between polls when
@@ -191,6 +192,7 @@
 	struct thermal_attr *trip_hyst_attrs;
 	void *devdata;
 	int trips;
+	unsigned long trips_disabled;	/* bitmap for disabled trips */
 	int passive_delay;
 	int polling_delay;
 	int temperature;
@@ -362,6 +364,11 @@
 				const struct thermal_zone_of_device_ops *ops);
 void thermal_zone_of_sensor_unregister(struct device *dev,
 				       struct thermal_zone_device *tz);
+struct thermal_zone_device *devm_thermal_zone_of_sensor_register(
+		struct device *dev, int id, void *data,
+		const struct thermal_zone_of_device_ops *ops);
+void devm_thermal_zone_of_sensor_unregister(struct device *dev,
+					    struct thermal_zone_device *tz);
 #else
 static inline struct thermal_zone_device *
 thermal_zone_of_sensor_register(struct device *dev, int id, void *data,
@@ -376,6 +383,19 @@
 {
 }
 
+static inline struct thermal_zone_device *devm_thermal_zone_of_sensor_register(
+		struct device *dev, int id, void *data,
+		const struct thermal_zone_of_device_ops *ops)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline
+void devm_thermal_zone_of_sensor_unregister(struct device *dev,
+					    struct thermal_zone_device *tz)
+{
+}
+
 #endif
 
 #if IS_ENABLED(CONFIG_THERMAL)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 705df7d..0810f81b 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h

@@ -420,7 +420,8 @@
 extern enum event_trigger_type event_triggers_call(struct trace_event_file *file,
 						   void *rec);
 extern void event_triggers_post_call(struct trace_event_file *file,
-				     enum event_trigger_type tt);
+				     enum event_trigger_type tt,
+				     void *rec);
 
 bool trace_event_ignore_this_pid(struct trace_event_file *trace_file);
 
@@ -507,7 +508,7 @@
 		trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc);
 
 	if (tt)
-		event_triggers_post_call(file, tt);
+		event_triggers_post_call(file, tt, entry);
 }
 
 /**
@@ -540,7 +541,7 @@
 						irq_flags, pc, regs);
 
 	if (tt)
-		event_triggers_post_call(file, tt);
+		event_triggers_post_call(file, tt, entry);
 }
 
 #ifdef CONFIG_BPF_EVENTS

diff --git a/include/net/flow.h b/include/net/flow.h
index 83969ee..d47ef4b 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h

@@ -127,7 +127,6 @@
 #define flowi6_oif		__fl_common.flowic_oif
 #define flowi6_iif		__fl_common.flowic_iif
 #define flowi6_mark		__fl_common.flowic_mark
-#define flowi6_tos		__fl_common.flowic_tos
 #define flowi6_scope		__fl_common.flowic_scope
 #define flowi6_proto		__fl_common.flowic_proto
 #define flowi6_flags		__fl_common.flowic_flags
@@ -135,6 +134,7 @@
 #define flowi6_tun_key		__fl_common.flowic_tun_key
 	struct in6_addr		daddr;
 	struct in6_addr		saddr;
+	/* Note: flowi6_tos is encoded in flowlabel, too. */
 	__be32			flowlabel;
 	union flowi_uli		uli;
 #define fl6_sport		uli.ports.sport

diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h
index 064cfbe..954ad6b 100644
--- a/include/net/inet6_connection_sock.h
+++ b/include/net/inet6_connection_sock.h

@@ -15,7 +15,6 @@
 
 #include <linux/types.h>
 
-struct in6_addr;
 struct inet_bind_bucket;
 struct request_sock;
 struct sk_buff;

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index c35dda9..56050f9 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h

@@ -305,6 +305,22 @@
 
 struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask);
 
+static inline int iptunnel_pull_offloads(struct sk_buff *skb)
+{
+	if (skb_is_gso(skb)) {
+		int err;
+
+		err = skb_unclone(skb, GFP_ATOMIC);
+		if (unlikely(err))
+			return err;
+		skb_shinfo(skb)->gso_type &= ~(NETIF_F_GSO_ENCAP_ALL >>
+					       NETIF_F_GSO_SHIFT);
+	}
+
+	skb->encapsulation = 0;
+	return 0;
+}
+
 static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
 {
 	if (pkt_len > 0) {

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index f3c9857..d0aeb97 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h

@@ -835,6 +835,12 @@
 {
 	return ntohl(flowinfo & IPV6_TCLASS_MASK) >> IPV6_TCLASS_SHIFT;
 }
+
+static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel)
+{
+	return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel;
+}
+
 /*
  *	Prototypes exported by ipv6
  */

diff --git a/include/net/ping.h b/include/net/ping.h
index 5fd7cc2..4cd90d6 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h

@@ -79,7 +79,6 @@
 		  int flags, int *addr_len);
 int  ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
 			 void *user_icmph, size_t icmph_len);
-int  ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
 int  ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 bool ping_rcv(struct sk_buff *skb);
 

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 835aa2e..65521cf 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h

@@ -82,6 +82,11 @@
 #define SCTP_PROTOSW_FLAG INET_PROTOSW_PERMANENT
 #endif
 
+/* Round an int up to the next multiple of 4.  */
+#define WORD_ROUND(s) (((s)+3)&~3)
+/* Truncate to the previous multiple of 4.  */
+#define WORD_TRUNC(s) ((s)&~3)
+
 /*
  * Function declarations.
  */
@@ -426,7 +431,7 @@
 	if (asoc->user_frag)
 		frag = min_t(int, frag, asoc->user_frag);
 
-	frag = min_t(int, frag, SCTP_MAX_CHUNK_LEN);
+	frag = WORD_TRUNC(min_t(int, frag, SCTP_MAX_CHUNK_LEN));
 
 	return frag;
 }
@@ -475,9 +480,6 @@
      (void *)pos <= (void *)chunk->subh.fwdtsn_hdr->skip + end - sizeof(struct sctp_fwdtsn_skip);\
      pos++)
 
-/* Round an int up to the next multiple of 4.  */
-#define WORD_ROUND(s) (((s)+3)&~3)
-
 /* External references. */
 
 extern struct proto sctp_prot;

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index e2ac062..6df1ce7 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h

@@ -1097,7 +1097,7 @@
 			const struct sctp_bind_addr *src,
 			gfp_t gfp);
 int sctp_add_bind_addr(struct sctp_bind_addr *, union sctp_addr *,
-		       __u8 addr_state, gfp_t gfp);
+		       int new_size, __u8 addr_state, gfp_t gfp);
 int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *);
 int sctp_bind_addr_match(struct sctp_bind_addr *, const union sctp_addr *,
 			 struct sctp_sock *);

diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index a763c96..73ed2e9 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h

@@ -271,36 +271,36 @@
 static inline __be32 vxlan_vni(__be32 vni_field)
 {
 #if defined(__BIG_ENDIAN)
-	return vni_field >> 8;
+	return (__force __be32)((__force u32)vni_field >> 8);
 #else
-	return (vni_field & VXLAN_VNI_MASK) << 8;
+	return (__force __be32)((__force u32)(vni_field & VXLAN_VNI_MASK) << 8);
 #endif
 }
 
 static inline __be32 vxlan_vni_field(__be32 vni)
 {
 #if defined(__BIG_ENDIAN)
-	return vni << 8;
+	return (__force __be32)((__force u32)vni << 8);
 #else
-	return vni >> 8;
+	return (__force __be32)((__force u32)vni >> 8);
 #endif
 }
 
 static inline __be32 vxlan_tun_id_to_vni(__be64 tun_id)
 {
 #if defined(__BIG_ENDIAN)
-	return tun_id;
+	return (__force __be32)tun_id;
 #else
-	return tun_id >> 32;
+	return (__force __be32)((__force u64)tun_id >> 32);
 #endif
 }
 
 static inline __be64 vxlan_vni_to_tun_id(__be32 vni)
 {
 #if defined(__BIG_ENDIAN)
-	return (__be64)vni;
+	return (__force __be64)vni;
 #else
-	return (__be64)vni << 32;
+	return (__force __be64)((u64)(__force u32)vni << 32);
 #endif
 }
 

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 3a03c1d..fb2cef4 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h

@@ -56,6 +56,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 
+#include <linux/if_link.h>
 #include <linux/atomic.h>
 #include <linux/mmu_notifier.h>
 #include <asm/uaccess.h>
@@ -97,6 +98,11 @@
 	RDMA_NODE_USNIC_UDP,
 };
 
+enum {
+	/* set the local administered indication */
+	IB_SA_WELL_KNOWN_GUID	= BIT_ULL(57) | 2,
+};
+
 enum rdma_transport_type {
 	RDMA_TRANSPORT_IB,
 	RDMA_TRANSPORT_IWARP,
@@ -213,6 +219,7 @@
 	IB_DEVICE_SIGNATURE_HANDOVER		= (1 << 30),
 	IB_DEVICE_ON_DEMAND_PAGING		= (1 << 31),
 	IB_DEVICE_SG_GAPS_REG			= (1ULL << 32),
+	IB_DEVICE_VIRTUAL_FUNCTION		= ((u64)1 << 33),
 };
 
 enum ib_signature_prot_cap {
@@ -274,7 +281,7 @@
 	u32			hw_ver;
 	int			max_qp;
 	int			max_qp_wr;
-	int			device_cap_flags;
+	u64			device_cap_flags;
 	int			max_sge;
 	int			max_sge_rd;
 	int			max_cq;
@@ -490,6 +497,7 @@
 					| RDMA_CORE_CAP_OPA_MAD)
 
 struct ib_port_attr {
+	u64			subnet_prefix;
 	enum ib_port_state	state;
 	enum ib_mtu		max_mtu;
 	enum ib_mtu		active_mtu;
@@ -509,6 +517,7 @@
 	u8			active_width;
 	u8			active_speed;
 	u8                      phys_state;
+	bool			grh_required;
 };
 
 enum ib_device_modify_flags {
@@ -614,6 +623,7 @@
 };
 
 #define IB_LID_PERMISSIVE	cpu_to_be16(0xFFFF)
+#define IB_MULTICAST_LID_BASE	cpu_to_be16(0xC000)
 
 enum ib_ah_flags {
 	IB_AH_GRH	= 1
@@ -1860,6 +1870,14 @@
 	void			   (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
 	void			   (*drain_rq)(struct ib_qp *qp);
 	void			   (*drain_sq)(struct ib_qp *qp);
+	int			   (*set_vf_link_state)(struct ib_device *device, int vf, u8 port,
+							int state);
+	int			   (*get_vf_config)(struct ib_device *device, int vf, u8 port,
+						   struct ifla_vf_info *ivf);
+	int			   (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
+						   struct ifla_vf_stats *stats);
+	int			   (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
+						  int type);
 
 	struct ib_dma_mapping_ops   *dma_ops;
 
@@ -2303,6 +2321,15 @@
 		 u8 port_num, int index, union ib_gid *gid,
 		 struct ib_gid_attr *attr);
 
+int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
+			 int state);
+int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
+		     struct ifla_vf_info *info);
+int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
+		    struct ifla_vf_stats *stats);
+int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
+		   int type);
+
 int ib_query_pkey(struct ib_device *device,
 		  u8 port_num, u16 index, u16 *pkey);
 

diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h
index a0fa975..2b95c2c 100644
--- a/include/rdma/opa_port_info.h
+++ b/include/rdma/opa_port_info.h

@@ -97,7 +97,7 @@
 #define OPA_LINKDOWN_REASON_WIDTH_POLICY			41
 /* 42-48 reserved */
 #define OPA_LINKDOWN_REASON_DISCONNECTED			49
-#define OPA_LINKDOWN_REASONLOCAL_MEDIA_NOT_INSTALLED		50
+#define OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED		50
 #define OPA_LINKDOWN_REASON_NOT_INSTALLED			51
 #define OPA_LINKDOWN_REASON_CHASSIS_CONFIG			52
 /* 53 reserved */

diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
new file mode 100644
index 0000000..a869655
--- /dev/null
+++ b/include/rdma/rdma_vt.h

@@ -0,0 +1,481 @@
+#ifndef DEF_RDMA_VT_H
+#define DEF_RDMA_VT_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Structure that low level drivers will populate in order to register with the
+ * rdmavt layer.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/rdmavt_mr.h>
+#include <rdma/rdmavt_qp.h>
+
+#define RVT_MAX_PKEY_VALUES 16
+
+struct rvt_ibport {
+	struct rvt_qp __rcu *qp[2];
+	struct ib_mad_agent *send_agent;	/* agent for SMI (traps) */
+	struct rb_root mcast_tree;
+	spinlock_t lock;		/* protect changes in this struct */
+
+	/* non-zero when timer is set */
+	unsigned long mkey_lease_timeout;
+	unsigned long trap_timeout;
+	__be64 gid_prefix;      /* in network order */
+	__be64 mkey;
+	u64 tid;
+	u32 port_cap_flags;
+	u32 pma_sample_start;
+	u32 pma_sample_interval;
+	__be16 pma_counter_select[5];
+	u16 pma_tag;
+	u16 mkey_lease_period;
+	u16 sm_lid;
+	u8 sm_sl;
+	u8 mkeyprot;
+	u8 subnet_timeout;
+	u8 vl_high_limit;
+
+	/*
+	 * Driver is expected to keep these up to date. These
+	 * counters are informational only and not required to be
+	 * completely accurate.
+	 */
+	u64 n_rc_resends;
+	u64 n_seq_naks;
+	u64 n_rdma_seq;
+	u64 n_rnr_naks;
+	u64 n_other_naks;
+	u64 n_loop_pkts;
+	u64 n_pkt_drops;
+	u64 n_vl15_dropped;
+	u64 n_rc_timeouts;
+	u64 n_dmawait;
+	u64 n_unaligned;
+	u64 n_rc_dupreq;
+	u64 n_rc_seqnak;
+	u16 pkey_violations;
+	u16 qkey_violations;
+	u16 mkey_violations;
+
+	/* Hot-path per CPU counters to avoid cacheline trading to update */
+	u64 z_rc_acks;
+	u64 z_rc_qacks;
+	u64 z_rc_delayed_comp;
+	u64 __percpu *rc_acks;
+	u64 __percpu *rc_qacks;
+	u64 __percpu *rc_delayed_comp;
+
+	void *priv; /* driver private data */
+
+	/*
+	 * The pkey table is allocated and maintained by the driver. Drivers
+	 * need to have access to this before registering with rdmav. However
+	 * rdmavt will need access to it so drivers need to proviee this during
+	 * the attach port API call.
+	 */
+	u16 *pkey_table;
+
+	struct rvt_ah *sm_ah;
+};
+
+#define RVT_CQN_MAX 16 /* maximum length of cq name */
+
+/*
+ * Things that are driver specific, module parameters in hfi1 and qib
+ */
+struct rvt_driver_params {
+	struct ib_device_attr props;
+
+	/*
+	 * Anything driver specific that is not covered by props
+	 * For instance special module parameters. Goes here.
+	 */
+	unsigned int lkey_table_size;
+	unsigned int qp_table_size;
+	int qpn_start;
+	int qpn_inc;
+	int qpn_res_start;
+	int qpn_res_end;
+	int nports;
+	int npkeys;
+	u8 qos_shift;
+	char cq_name[RVT_CQN_MAX];
+	int node;
+	int max_rdma_atomic;
+	int psn_mask;
+	int psn_shift;
+	int psn_modify_mask;
+	u32 core_cap_flags;
+	u32 max_mad_size;
+};
+
+/* Protection domain */
+struct rvt_pd {
+	struct ib_pd ibpd;
+	int user;               /* non-zero if created from user space */
+};
+
+/* Address handle */
+struct rvt_ah {
+	struct ib_ah ibah;
+	struct ib_ah_attr attr;
+	atomic_t refcount;
+	u8 vl;
+	u8 log_pmtu;
+};
+
+struct rvt_dev_info;
+struct rvt_swqe;
+struct rvt_driver_provided {
+	/*
+	 * Which functions are required depends on which verbs rdmavt is
+	 * providing and which verbs the driver is overriding. See
+	 * check_support() for details.
+	 */
+
+	/* Passed to ib core registration. Callback to create syfs files */
+	int (*port_callback)(struct ib_device *, u8, struct kobject *);
+
+	/*
+	 * Returns a string to represent the device for which is being
+	 * registered. This is primarily used for error and debug messages on
+	 * the console.
+	 */
+	const char * (*get_card_name)(struct rvt_dev_info *rdi);
+
+	/*
+	 * Returns a pointer to the undelying hardware's PCI device. This is
+	 * used to display information as to what hardware is being referenced
+	 * in an output message
+	 */
+	struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi);
+
+	/*
+	 * Allocate a private queue pair data structure for driver specific
+	 * information which is opaque to rdmavt.
+	 */
+	void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+				gfp_t gfp);
+
+	/*
+	 * Free the driver's private qp structure.
+	 */
+	void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
+
+	/*
+	 * Inform the driver the particular qp in quesiton has been reset so
+	 * that it can clean up anything it needs to.
+	 */
+	void (*notify_qp_reset)(struct rvt_qp *qp);
+
+	/*
+	 * Give the driver a notice that there is send work to do. It is up to
+	 * the driver to generally push the packets out, this just queues the
+	 * work with the driver. There are two variants here. The no_lock
+	 * version requires the s_lock not to be held. The other assumes the
+	 * s_lock is held.
+	 */
+	void (*schedule_send)(struct rvt_qp *qp);
+	void (*schedule_send_no_lock)(struct rvt_qp *qp);
+
+	/*
+	 * Sometimes rdmavt needs to kick the driver's send progress. That is
+	 * done by this call back.
+	 */
+	void (*do_send)(struct rvt_qp *qp);
+
+	/*
+	 * Get a path mtu from the driver based on qp attributes.
+	 */
+	int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+				  struct ib_qp_attr *attr);
+
+	/*
+	 * Notify driver that it needs to flush any outstanding IO requests that
+	 * are waiting on a qp.
+	 */
+	void (*flush_qp_waiters)(struct rvt_qp *qp);
+
+	/*
+	 * Notify driver to stop its queue of sending packets. Nothing else
+	 * should be posted to the queue pair after this has been called.
+	 */
+	void (*stop_send_queue)(struct rvt_qp *qp);
+
+	/*
+	 * Have the drivr drain any in progress operations
+	 */
+	void (*quiesce_qp)(struct rvt_qp *qp);
+
+	/*
+	 * Inform the driver a qp has went to error state.
+	 */
+	void (*notify_error_qp)(struct rvt_qp *qp);
+
+	/*
+	 * Get an MTU for a qp.
+	 */
+	u32 (*mtu_from_qp)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+			   u32 pmtu);
+	/*
+	 * Convert an mtu to a path mtu
+	 */
+	int (*mtu_to_path_mtu)(u32 mtu);
+
+	/*
+	 * Get the guid of a port in big endian byte order
+	 */
+	int (*get_guid_be)(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
+			   int guid_index, __be64 *guid);
+
+	/*
+	 * Query driver for the state of the port.
+	 */
+	int (*query_port_state)(struct rvt_dev_info *rdi, u8 port_num,
+				struct ib_port_attr *props);
+
+	/*
+	 * Tell driver to shutdown a port
+	 */
+	int (*shut_down_port)(struct rvt_dev_info *rdi, u8 port_num);
+
+	/* Tell driver to send a trap for changed  port capabilities */
+	void (*cap_mask_chg)(struct rvt_dev_info *rdi, u8 port_num);
+
+	/*
+	 * The following functions can be safely ignored completely. Any use of
+	 * these is checked for NULL before blindly calling. Rdmavt should also
+	 * be functional if drivers omit these.
+	 */
+
+	/* Called to inform the driver that all qps should now be freed. */
+	unsigned (*free_all_qps)(struct rvt_dev_info *rdi);
+
+	/* Driver specific AH validation */
+	int (*check_ah)(struct ib_device *, struct ib_ah_attr *);
+
+	/* Inform the driver a new AH has been created */
+	void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *,
+			      struct rvt_ah *);
+
+	/* Let the driver pick the next queue pair number*/
+	int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
+			 enum ib_qp_type type, u8 port_num, gfp_t gfp);
+
+	/* Determine if its safe or allowed to modify the qp */
+	int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr,
+			       int attr_mask, struct ib_udata *udata);
+
+	/* Driver specific QP modification/notification-of */
+	void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr,
+			  int attr_mask, struct ib_udata *udata);
+
+	/* Driver specific work request checking */
+	int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
+
+	/* Notify driver a mad agent has been created */
+	void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx);
+
+	/* Notify driver a mad agent has been removed */
+	void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx);
+
+};
+
+struct rvt_dev_info {
+	struct ib_device ibdev; /* Keep this first. Nothing above here */
+
+	/*
+	 * Prior to calling for registration the driver will be responsible for
+	 * allocating space for this structure.
+	 *
+	 * The driver will also be responsible for filling in certain members of
+	 * dparms.props. The driver needs to fill in dparms exactly as it would
+	 * want values reported to a ULP. This will be returned to the caller
+	 * in rdmavt's device. The driver should also therefore refrain from
+	 * modifying this directly after registration with rdmavt.
+	 */
+
+	/* Driver specific properties */
+	struct rvt_driver_params dparms;
+
+	struct rvt_mregion __rcu *dma_mr;
+	struct rvt_lkey_table lkey_table;
+
+	/* Driver specific helper functions */
+	struct rvt_driver_provided driver_f;
+
+	/* Internal use */
+	int n_pds_allocated;
+	spinlock_t n_pds_lock; /* Protect pd allocated count */
+
+	int n_ahs_allocated;
+	spinlock_t n_ahs_lock; /* Protect ah allocated count */
+
+	u32 n_srqs_allocated;
+	spinlock_t n_srqs_lock; /* Protect srqs allocated count */
+
+	int flags;
+	struct rvt_ibport **ports;
+
+	/* QP */
+	struct rvt_qp_ibdev *qp_dev;
+	u32 n_qps_allocated;    /* number of QPs allocated for device */
+	u32 n_rc_qps;		/* number of RC QPs allocated for device */
+	u32 busy_jiffies;	/* timeout scaling based on RC QP count */
+	spinlock_t n_qps_lock;	/* protect qps, rc qps and busy jiffy counts */
+
+	/* memory maps */
+	struct list_head pending_mmaps;
+	spinlock_t mmap_offset_lock; /* protect mmap_offset */
+	u32 mmap_offset;
+	spinlock_t pending_lock; /* protect pending mmap list */
+
+	/* CQ */
+	struct kthread_worker *worker; /* per device cq worker */
+	u32 n_cqs_allocated;    /* number of CQs allocated for device */
+	spinlock_t n_cqs_lock; /* protect count of in use cqs */
+
+	/* Multicast */
+	u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
+	spinlock_t n_mcast_grps_lock;
+
+};
+
+static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd)
+{
+	return container_of(ibpd, struct rvt_pd, ibpd);
+}
+
+static inline struct rvt_ah *ibah_to_rvtah(struct ib_ah *ibah)
+{
+	return container_of(ibah, struct rvt_ah, ibah);
+}
+
+static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev)
+{
+	return  container_of(ibdev, struct rvt_dev_info, ibdev);
+}
+
+static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq)
+{
+	return container_of(ibsrq, struct rvt_srq, ibsrq);
+}
+
+static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp)
+{
+	return container_of(ibqp, struct rvt_qp, ibqp);
+}
+
+static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi)
+{
+	/*
+	 * All ports have same number of pkeys.
+	 */
+	return rdi->dparms.npkeys;
+}
+
+/*
+ * Return the indexed PKEY from the port PKEY table.
+ */
+static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi,
+			       int port_index,
+			       unsigned index)
+{
+	if (index >= rvt_get_npkeys(rdi))
+		return 0;
+	else
+		return rdi->ports[port_index]->pkey_table[index];
+}
+
+/**
+ * rvt_lookup_qpn - return the QP with the given QPN
+ * @ibp: the ibport
+ * @qpn: the QP number to look up
+ *
+ * The caller must hold the rcu_read_lock(), and keep the lock until
+ * the returned qp is no longer in use.
+ */
+/* TODO: Remove this and put in rdmavt/qp.h when no longer needed by drivers */
+static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi,
+					    struct rvt_ibport *rvp,
+					    u32 qpn) __must_hold(RCU)
+{
+	struct rvt_qp *qp = NULL;
+
+	if (unlikely(qpn <= 1)) {
+		qp = rcu_dereference(rvp->qp[qpn]);
+	} else {
+		u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits);
+
+		for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp;
+			qp = rcu_dereference(qp->next))
+			if (qp->ibqp.qp_num == qpn)
+				break;
+	}
+	return qp;
+}
+
+struct rvt_dev_info *rvt_alloc_device(size_t size, int nports);
+int rvt_register_device(struct rvt_dev_info *rvd);
+void rvt_unregister_device(struct rvt_dev_info *rvd);
+int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
+int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port,
+		  int port_index, u16 *pkey_table);
+int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
+		u32 len, u64 vaddr, u32 rkey, int acc);
+int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
+		struct rvt_sge *isge, struct ib_sge *sge, int acc);
+struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid);
+
+#endif          /* DEF_RDMA_VT_H */

diff --git a/include/rdma/rdmavt_cq.h b/include/rdma/rdmavt_cq.h
new file mode 100644
index 0000000..51fd00b
--- /dev/null
+++ b/include/rdma/rdmavt_cq.h

@@ -0,0 +1,99 @@
+#ifndef DEF_RDMAVT_INCCQ_H
+#define DEF_RDMAVT_INCCQ_H
+
+/*
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/kthread.h>
+#include <rdma/ib_user_verbs.h>
+
+/*
+ * Define an ib_cq_notify value that is not valid so we know when CQ
+ * notifications are armed.
+ */
+#define RVT_CQ_NONE      (IB_CQ_NEXT_COMP + 1)
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and completion queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ */
+struct rvt_cq_wc {
+	u32 head;               /* index of next entry to fill */
+	u32 tail;               /* index of next ib_poll_cq() entry */
+	union {
+		/* these are actually size ibcq.cqe + 1 */
+		struct ib_uverbs_wc uqueue[0];
+		struct ib_wc kqueue[0];
+	};
+};
+
+/*
+ * The completion queue structure.
+ */
+struct rvt_cq {
+	struct ib_cq ibcq;
+	struct kthread_work comptask;
+	spinlock_t lock; /* protect changes in this struct */
+	u8 notify;
+	u8 triggered;
+	struct rvt_dev_info *rdi;
+	struct rvt_cq_wc *queue;
+	struct rvt_mmap_info *ip;
+};
+
+static inline struct rvt_cq *ibcq_to_rvtcq(struct ib_cq *ibcq)
+{
+	return container_of(ibcq, struct rvt_cq, ibcq);
+}
+
+void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited);
+
+#endif          /* DEF_RDMAVT_INCCQH */

diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h
new file mode 100644
index 0000000..5edffdc
--- /dev/null
+++ b/include/rdma/rdmavt_mr.h

@@ -0,0 +1,139 @@
+#ifndef DEF_RDMAVT_INCMR_H
+#define DEF_RDMAVT_INCMR_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once
+ * drivers no longer need access to the MR directly.
+ */
+
+/*
+ * A segment is a linear region of low physical memory.
+ * Used by the verbs layer.
+ */
+struct rvt_seg {
+	void *vaddr;
+	size_t length;
+};
+
+/* The number of rvt_segs that fit in a page. */
+#define RVT_SEGSZ     (PAGE_SIZE / sizeof(struct rvt_seg))
+
+struct rvt_segarray {
+	struct rvt_seg segs[RVT_SEGSZ];
+};
+
+struct rvt_mregion {
+	struct ib_pd *pd;       /* shares refcnt of ibmr.pd */
+	u64 user_base;          /* User's address for this region */
+	u64 iova;               /* IB start address of this region */
+	size_t length;
+	u32 lkey;
+	u32 offset;             /* offset (bytes) to start of region */
+	int access_flags;
+	u32 max_segs;           /* number of rvt_segs in all the arrays */
+	u32 mapsz;              /* size of the map array */
+	u8  page_shift;         /* 0 - non unform/non powerof2 sizes */
+	u8  lkey_published;     /* in global table */
+	struct completion comp; /* complete when refcount goes to zero */
+	atomic_t refcount;
+	struct rvt_segarray *map[0];    /* the segments */
+};
+
+#define RVT_MAX_LKEY_TABLE_BITS 23
+
+struct rvt_lkey_table {
+	spinlock_t lock; /* protect changes in this struct */
+	u32 next;               /* next unused index (speeds search) */
+	u32 gen;                /* generation count */
+	u32 max;                /* size of the table */
+	struct rvt_mregion __rcu **table;
+};
+
+/*
+ * These keep track of the copy progress within a memory region.
+ * Used by the verbs layer.
+ */
+struct rvt_sge {
+	struct rvt_mregion *mr;
+	void *vaddr;            /* kernel virtual address of segment */
+	u32 sge_length;         /* length of the SGE */
+	u32 length;             /* remaining length of the segment */
+	u16 m;                  /* current index: mr->map[m] */
+	u16 n;                  /* current index: mr->map[m]->segs[n] */
+};
+
+struct rvt_sge_state {
+	struct rvt_sge *sg_list;      /* next SGE to be used if any */
+	struct rvt_sge sge;   /* progress state for the current SGE */
+	u32 total_len;
+	u8 num_sge;
+};
+
+static inline void rvt_put_mr(struct rvt_mregion *mr)
+{
+	if (unlikely(atomic_dec_and_test(&mr->refcount)))
+		complete(&mr->comp);
+}
+
+static inline void rvt_get_mr(struct rvt_mregion *mr)
+{
+	atomic_inc(&mr->refcount);
+}
+
+static inline void rvt_put_ss(struct rvt_sge_state *ss)
+{
+	while (ss->num_sge) {
+		rvt_put_mr(ss->sge.mr);
+		if (--ss->num_sge)
+			ss->sge = *ss->sg_list++;
+	}
+}
+
+#endif          /* DEF_RDMAVT_INCMRH */

diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
new file mode 100644
index 0000000..497e590
--- /dev/null
+++ b/include/rdma/rdmavt_qp.h

@@ -0,0 +1,446 @@
+#ifndef DEF_RDMAVT_INCQP_H
+#define DEF_RDMAVT_INCQP_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+#include <rdma/ib_pack.h>
+#include <rdma/ib_verbs.h>
+/*
+ * Atomic bit definitions for r_aflags.
+ */
+#define RVT_R_WRID_VALID        0
+#define RVT_R_REWIND_SGE        1
+
+/*
+ * Bit definitions for r_flags.
+ */
+#define RVT_R_REUSE_SGE 0x01
+#define RVT_R_RDMAR_SEQ 0x02
+#define RVT_R_RSP_NAK   0x04
+#define RVT_R_RSP_SEND  0x08
+#define RVT_R_COMM_EST  0x10
+
+/*
+ * Bit definitions for s_flags.
+ *
+ * RVT_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled
+ * RVT_S_BUSY - send tasklet is processing the QP
+ * RVT_S_TIMER - the RC retry timer is active
+ * RVT_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics
+ * RVT_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs
+ *                         before processing the next SWQE
+ * RVT_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete
+ *                         before processing the next SWQE
+ * RVT_S_WAIT_RNR - waiting for RNR timeout
+ * RVT_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
+ * RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating
+ *                  next send completion entry not via send DMA
+ * RVT_S_WAIT_PIO - waiting for a send buffer to be available
+ * RVT_S_WAIT_PIO_DRAIN - waiting for a qp to drain pio packets
+ * RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available
+ * RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available
+ * RVT_S_WAIT_KMEM - waiting for kernel memory to be available
+ * RVT_S_WAIT_PSN - waiting for a packet to exit the send DMA queue
+ * RVT_S_WAIT_ACK - waiting for an ACK packet before sending more requests
+ * RVT_S_SEND_ONE - send one packet, request ACK, then wait for ACK
+ * RVT_S_ECN - a BECN was queued to the send engine
+ */
+#define RVT_S_SIGNAL_REQ_WR	0x0001
+#define RVT_S_BUSY		0x0002
+#define RVT_S_TIMER		0x0004
+#define RVT_S_RESP_PENDING	0x0008
+#define RVT_S_ACK_PENDING	0x0010
+#define RVT_S_WAIT_FENCE	0x0020
+#define RVT_S_WAIT_RDMAR	0x0040
+#define RVT_S_WAIT_RNR		0x0080
+#define RVT_S_WAIT_SSN_CREDIT	0x0100
+#define RVT_S_WAIT_DMA		0x0200
+#define RVT_S_WAIT_PIO		0x0400
+#define RVT_S_WAIT_PIO_DRAIN    0x0800
+#define RVT_S_WAIT_TX		0x1000
+#define RVT_S_WAIT_DMA_DESC	0x2000
+#define RVT_S_WAIT_KMEM		0x4000
+#define RVT_S_WAIT_PSN		0x8000
+#define RVT_S_WAIT_ACK		0x10000
+#define RVT_S_SEND_ONE		0x20000
+#define RVT_S_UNLIMITED_CREDIT	0x40000
+#define RVT_S_AHG_VALID		0x80000
+#define RVT_S_AHG_CLEAR		0x100000
+#define RVT_S_ECN		0x200000
+
+/*
+ * Wait flags that would prevent any packet type from being sent.
+ */
+#define RVT_S_ANY_WAIT_IO (RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \
+	RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM)
+
+/*
+ * Wait flags that would prevent send work requests from making progress.
+ */
+#define RVT_S_ANY_WAIT_SEND (RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR | \
+	RVT_S_WAIT_RNR | RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_DMA | \
+	RVT_S_WAIT_PSN | RVT_S_WAIT_ACK)
+
+#define RVT_S_ANY_WAIT (RVT_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
+
+/* Number of bits to pay attention to in the opcode for checking qp type */
+#define RVT_OPCODE_QP_MASK 0xE0
+
+/* Flags for checking QP state (see ib_rvt_state_ops[]) */
+#define RVT_POST_SEND_OK                0x01
+#define RVT_POST_RECV_OK                0x02
+#define RVT_PROCESS_RECV_OK             0x04
+#define RVT_PROCESS_SEND_OK             0x08
+#define RVT_PROCESS_NEXT_SEND_OK        0x10
+#define RVT_FLUSH_SEND			0x20
+#define RVT_FLUSH_RECV			0x40
+#define RVT_PROCESS_OR_FLUSH_SEND \
+	(RVT_PROCESS_SEND_OK | RVT_FLUSH_SEND)
+
+/*
+ * Send work request queue entry.
+ * The size of the sg_list is determined when the QP is created and stored
+ * in qp->s_max_sge.
+ */
+struct rvt_swqe {
+	union {
+		struct ib_send_wr wr;   /* don't use wr.sg_list */
+		struct ib_ud_wr ud_wr;
+		struct ib_reg_wr reg_wr;
+		struct ib_rdma_wr rdma_wr;
+		struct ib_atomic_wr atomic_wr;
+	};
+	u32 psn;                /* first packet sequence number */
+	u32 lpsn;               /* last packet sequence number */
+	u32 ssn;                /* send sequence number */
+	u32 length;             /* total length of data in sg_list */
+	struct rvt_sge sg_list[0];
+};
+
+/*
+ * Receive work request queue entry.
+ * The size of the sg_list is determined when the QP (or SRQ) is created
+ * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
+ */
+struct rvt_rwqe {
+	u64 wr_id;
+	u8 num_sge;
+	struct ib_sge sg_list[0];
+};
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and receive work queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ * Note that the wq array elements are variable size so you can't
+ * just index into the array to get the N'th element;
+ * use get_rwqe_ptr() instead.
+ */
+struct rvt_rwq {
+	u32 head;               /* new work requests posted to the head */
+	u32 tail;               /* receives pull requests from here. */
+	struct rvt_rwqe wq[0];
+};
+
+struct rvt_rq {
+	struct rvt_rwq *wq;
+	u32 size;               /* size of RWQE array */
+	u8 max_sge;
+	/* protect changes in this struct */
+	spinlock_t lock ____cacheline_aligned_in_smp;
+};
+
+/*
+ * This structure is used by rvt_mmap() to validate an offset
+ * when an mmap() request is made.  The vm_area_struct then uses
+ * this as its vm_private_data.
+ */
+struct rvt_mmap_info {
+	struct list_head pending_mmaps;
+	struct ib_ucontext *context;
+	void *obj;
+	__u64 offset;
+	struct kref ref;
+	unsigned size;
+};
+
+#define RVT_MAX_RDMA_ATOMIC	16
+
+/*
+ * This structure holds the information that the send tasklet needs
+ * to send a RDMA read response or atomic operation.
+ */
+struct rvt_ack_entry {
+	u8 opcode;
+	u8 sent;
+	u32 psn;
+	u32 lpsn;
+	union {
+		struct rvt_sge rdma_sge;
+		u64 atomic_data;
+	};
+};
+
+#define	RC_QP_SCALING_INTERVAL	5
+
+/*
+ * Variables prefixed with s_ are for the requester (sender).
+ * Variables prefixed with r_ are for the responder (receiver).
+ * Variables prefixed with ack_ are for responder replies.
+ *
+ * Common variables are protected by both r_rq.lock and s_lock in that order
+ * which only happens in modify_qp() or changing the QP 'state'.
+ */
+struct rvt_qp {
+	struct ib_qp ibqp;
+	void *priv; /* Driver private data */
+	/* read mostly fields above and below */
+	struct ib_ah_attr remote_ah_attr;
+	struct ib_ah_attr alt_ah_attr;
+	struct rvt_qp __rcu *next;           /* link list for QPN hash table */
+	struct rvt_swqe *s_wq;  /* send work queue */
+	struct rvt_mmap_info *ip;
+
+	unsigned long timeout_jiffies;  /* computed from timeout */
+
+	enum ib_mtu path_mtu;
+	int srate_mbps;		/* s_srate (below) converted to Mbit/s */
+	pid_t pid;		/* pid for user mode QPs */
+	u32 remote_qpn;
+	u32 qkey;               /* QKEY for this QP (for UD or RD) */
+	u32 s_size;             /* send work queue size */
+	u32 s_ahgpsn;           /* set to the psn in the copy of the header */
+
+	u16 pmtu;		/* decoded from path_mtu */
+	u8 log_pmtu;		/* shift for pmtu */
+	u8 state;               /* QP state */
+	u8 allowed_ops;		/* high order bits of allowed opcodes */
+	u8 qp_access_flags;
+	u8 alt_timeout;         /* Alternate path timeout for this QP */
+	u8 timeout;             /* Timeout for this QP */
+	u8 s_srate;
+	u8 s_mig_state;
+	u8 port_num;
+	u8 s_pkey_index;        /* PKEY index to use */
+	u8 s_alt_pkey_index;    /* Alternate path PKEY index to use */
+	u8 r_max_rd_atomic;     /* max number of RDMA read/atomic to receive */
+	u8 s_max_rd_atomic;     /* max number of RDMA read/atomic to send */
+	u8 s_retry_cnt;         /* number of times to retry */
+	u8 s_rnr_retry_cnt;
+	u8 r_min_rnr_timer;     /* retry timeout value for RNR NAKs */
+	u8 s_max_sge;           /* size of s_wq->sg_list */
+	u8 s_draining;
+
+	/* start of read/write fields */
+	atomic_t refcount ____cacheline_aligned_in_smp;
+	wait_queue_head_t wait;
+
+	struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1]
+		____cacheline_aligned_in_smp;
+	struct rvt_sge_state s_rdma_read_sge;
+
+	spinlock_t r_lock ____cacheline_aligned_in_smp;      /* used for APM */
+	u32 r_psn;              /* expected rcv packet sequence number */
+	unsigned long r_aflags;
+	u64 r_wr_id;            /* ID for current receive WQE */
+	u32 r_ack_psn;          /* PSN for next ACK or atomic ACK */
+	u32 r_len;              /* total length of r_sge */
+	u32 r_rcv_len;          /* receive data len processed */
+	u32 r_msn;              /* message sequence number */
+
+	u8 r_state;             /* opcode of last packet received */
+	u8 r_flags;
+	u8 r_head_ack_queue;    /* index into s_ack_queue[] */
+
+	struct list_head rspwait;       /* link for waiting to respond */
+
+	struct rvt_sge_state r_sge;     /* current receive data */
+	struct rvt_rq r_rq;             /* receive work queue */
+
+	/* post send line */
+	spinlock_t s_hlock ____cacheline_aligned_in_smp;
+	u32 s_head;             /* new entries added here */
+	u32 s_next_psn;         /* PSN for next request */
+	u32 s_avail;            /* number of entries avail */
+	u32 s_ssn;              /* SSN of tail entry */
+
+	spinlock_t s_lock ____cacheline_aligned_in_smp;
+	u32 s_flags;
+	struct rvt_sge_state *s_cur_sge;
+	struct rvt_swqe *s_wqe;
+	struct rvt_sge_state s_sge;     /* current send request data */
+	struct rvt_mregion *s_rdma_mr;
+	u32 s_cur_size;         /* size of send packet in bytes */
+	u32 s_len;              /* total length of s_sge */
+	u32 s_rdma_read_len;    /* total length of s_rdma_read_sge */
+	u32 s_last_psn;         /* last response PSN processed */
+	u32 s_sending_psn;      /* lowest PSN that is being sent */
+	u32 s_sending_hpsn;     /* highest PSN that is being sent */
+	u32 s_psn;              /* current packet sequence number */
+	u32 s_ack_rdma_psn;     /* PSN for sending RDMA read responses */
+	u32 s_ack_psn;          /* PSN for acking sends and RDMA writes */
+	u32 s_tail;             /* next entry to process */
+	u32 s_cur;              /* current work queue entry */
+	u32 s_acked;            /* last un-ACK'ed entry */
+	u32 s_last;             /* last completed entry */
+	u32 s_lsn;              /* limit sequence number (credit) */
+	u16 s_hdrwords;         /* size of s_hdr in 32 bit words */
+	u16 s_rdma_ack_cnt;
+	s8 s_ahgidx;
+	u8 s_state;             /* opcode of last packet sent */
+	u8 s_ack_state;         /* opcode of packet to ACK */
+	u8 s_nak_state;         /* non-zero if NAK is pending */
+	u8 r_nak_state;         /* non-zero if NAK is pending */
+	u8 s_retry;             /* requester retry counter */
+	u8 s_rnr_retry;         /* requester RNR retry counter */
+	u8 s_num_rd_atomic;     /* number of RDMA read/atomic pending */
+	u8 s_tail_ack_queue;    /* index into s_ack_queue[] */
+
+	struct rvt_sge_state s_ack_rdma_sge;
+	struct timer_list s_timer;
+
+	/*
+	 * This sge list MUST be last. Do not add anything below here.
+	 */
+	struct rvt_sge r_sg_list[0] /* verified SGEs */
+		____cacheline_aligned_in_smp;
+};
+
+struct rvt_srq {
+	struct ib_srq ibsrq;
+	struct rvt_rq rq;
+	struct rvt_mmap_info *ip;
+	/* send signal when number of RWQEs < limit */
+	u32 limit;
+};
+
+#define RVT_QPN_MAX                 BIT(24)
+#define RVT_QPNMAP_ENTRIES          (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
+#define RVT_BITS_PER_PAGE           (PAGE_SIZE * BITS_PER_BYTE)
+#define RVT_BITS_PER_PAGE_MASK      (RVT_BITS_PER_PAGE - 1)
+#define RVT_QPN_MASK		    0xFFFFFF
+
+/*
+ * QPN-map pages start out as NULL, they get allocated upon
+ * first use and are never deallocated. This way,
+ * large bitmaps are not allocated unless large numbers of QPs are used.
+ */
+struct rvt_qpn_map {
+	void *page;
+};
+
+struct rvt_qpn_table {
+	spinlock_t lock; /* protect changes to the qp table */
+	unsigned flags;         /* flags for QP0/1 allocated for each port */
+	u32 last;               /* last QP number allocated */
+	u32 nmaps;              /* size of the map table */
+	u16 limit;
+	u8  incr;
+	/* bit map of free QP numbers other than 0/1 */
+	struct rvt_qpn_map map[RVT_QPNMAP_ENTRIES];
+};
+
+struct rvt_qp_ibdev {
+	u32 qp_table_size;
+	u32 qp_table_bits;
+	struct rvt_qp __rcu **qp_table;
+	spinlock_t qpt_lock; /* qptable lock */
+	struct rvt_qpn_table qpn_table;
+};
+
+/*
+ * There is one struct rvt_mcast for each multicast GID.
+ * All attached QPs are then stored as a list of
+ * struct rvt_mcast_qp.
+ */
+struct rvt_mcast_qp {
+	struct list_head list;
+	struct rvt_qp *qp;
+};
+
+struct rvt_mcast {
+	struct rb_node rb_node;
+	union ib_gid mgid;
+	struct list_head qp_list;
+	wait_queue_head_t wait;
+	atomic_t refcount;
+	int n_attached;
+};
+
+/*
+ * Since struct rvt_swqe is not a fixed size, we can't simply index into
+ * struct rvt_qp.s_wq.  This function does the array index computation.
+ */
+static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp,
+						unsigned n)
+{
+	return (struct rvt_swqe *)((char *)qp->s_wq +
+				     (sizeof(struct rvt_swqe) +
+				      qp->s_max_sge *
+				      sizeof(struct rvt_sge)) * n);
+}
+
+/*
+ * Since struct rvt_rwqe is not a fixed size, we can't simply index into
+ * struct rvt_rwq.wq.  This function does the array index computation.
+ */
+static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n)
+{
+	return (struct rvt_rwqe *)
+		((char *)rq->wq->wq +
+		 (sizeof(struct rvt_rwqe) +
+		  rq->max_sge * sizeof(struct ib_sge)) * n);
+}
+
+extern const int  ib_rvt_state_ops[];
+
+struct rvt_dev_info;
+int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err);
+
+#endif          /* DEF_RDMAVT_INCQP_H */

diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index 784bc2c..bf66ea6 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h

@@ -28,6 +28,7 @@
 #define SCSI_TRANSPORT_FC_H
 
 #include <linux/sched.h>
+#include <asm/unaligned.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_netlink.h>
 
@@ -797,22 +798,12 @@
 
 static inline u64 wwn_to_u64(u8 *wwn)
 {
-	return (u64)wwn[0] << 56 | (u64)wwn[1] << 48 |
-	    (u64)wwn[2] << 40 | (u64)wwn[3] << 32 |
-	    (u64)wwn[4] << 24 | (u64)wwn[5] << 16 |
-	    (u64)wwn[6] <<  8 | (u64)wwn[7];
+	return get_unaligned_be64(wwn);
 }
 
 static inline void u64_to_wwn(u64 inm, u8 *wwn)
 {
-	wwn[0] = (inm >> 56) & 0xff;
-	wwn[1] = (inm >> 48) & 0xff;
-	wwn[2] = (inm >> 40) & 0xff;
-	wwn[3] = (inm >> 32) & 0xff;
-	wwn[4] = (inm >> 24) & 0xff;
-	wwn[5] = (inm >> 16) & 0xff;
-	wwn[6] = (inm >> 8) & 0xff;
-	wwn[7] = inm & 0xff;
+	put_unaligned_be64(inm, wwn);
 }
 
 /**

diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index 685a51a..8ff6d40 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h

@@ -76,6 +76,7 @@
 	struct se_wwn *(*fabric_make_wwn)(struct target_fabric_configfs *,
 				struct config_group *, const char *);
 	void (*fabric_drop_wwn)(struct se_wwn *);
+	void (*add_wwn_groups)(struct se_wwn *);
 	struct se_portal_group *(*fabric_make_tpg)(struct se_wwn *,
 				struct config_group *, const char *);
 	void (*fabric_drop_tpg)(struct se_portal_group *);
@@ -87,7 +88,6 @@
 				struct config_group *, const char *);
 	void (*fabric_drop_np)(struct se_tpg_np *);
 	int (*fabric_init_nodeacl)(struct se_node_acl *, const char *);
-	void (*fabric_cleanup_nodeacl)(struct se_node_acl *);
 
 	struct configfs_attribute **tfc_discovery_attrs;
 	struct configfs_attribute **tfc_wwn_attrs;

diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h
index 4cf6bac..d60096c 100644
--- a/include/trace/events/fib6.h
+++ b/include/trace/events/fib6.h

@@ -37,7 +37,7 @@
 		__entry->tb_id = tb_id;
 		__entry->oif = flp->flowi6_oif;
 		__entry->iif = flp->flowi6_iif;
-		__entry->tos = flp->flowi6_tos;
+		__entry->tos = ip6_tclass(flp->flowlabel);
 		__entry->scope = flp->flowi6_scope;
 		__entry->flags = flp->flowi6_flags;
 

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index ca72173..6b2e154 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h

@@ -140,42 +140,19 @@
 	TP_ARGS(call_site, ptr)
 );
 
-DEFINE_EVENT_CONDITION(kmem_free, kmem_cache_free,
+DEFINE_EVENT(kmem_free, kmem_cache_free,
 
 	TP_PROTO(unsigned long call_site, const void *ptr),
 
-	TP_ARGS(call_site, ptr),
-
-	/*
-	 * This trace can be potentially called from an offlined cpu.
-	 * Since trace points use RCU and RCU should not be used from
-	 * offline cpus, filter such calls out.
-	 * While this trace can be called from a preemptable section,
-	 * it has no impact on the condition since tasks can migrate
-	 * only from online cpus to other online cpus. Thus its safe
-	 * to use raw_smp_processor_id.
-	 */
-	TP_CONDITION(cpu_online(raw_smp_processor_id()))
+	TP_ARGS(call_site, ptr)
 );
 
-TRACE_EVENT_CONDITION(mm_page_free,
+TRACE_EVENT(mm_page_free,
 
 	TP_PROTO(struct page *page, unsigned int order),
 
 	TP_ARGS(page, order),
 
-
-	/*
-	 * This trace can be potentially called from an offlined cpu.
-	 * Since trace points use RCU and RCU should not be used from
-	 * offline cpus, filter such calls out.
-	 * While this trace can be called from a preemptable section,
-	 * it has no impact on the condition since tasks can migrate
-	 * only from online cpus to other online cpus. Thus its safe
-	 * to use raw_smp_processor_id.
-	 */
-	TP_CONDITION(cpu_online(raw_smp_processor_id())),
-
 	TP_STRUCT__entry(
 		__field(	unsigned long,	pfn		)
 		__field(	unsigned int,	order		)
@@ -276,23 +253,12 @@
 	TP_ARGS(page, order, migratetype)
 );
 
-TRACE_EVENT_CONDITION(mm_page_pcpu_drain,
+TRACE_EVENT(mm_page_pcpu_drain,
 
 	TP_PROTO(struct page *page, unsigned int order, int migratetype),
 
 	TP_ARGS(page, order, migratetype),
 
-	/*
-	 * This trace can be potentially called from an offlined cpu.
-	 * Since trace points use RCU and RCU should not be used from
-	 * offline cpus, filter such calls out.
-	 * While this trace can be called from a preemptable section,
-	 * it has no impact on the condition since tasks can migrate
-	 * only from online cpus to other online cpus. Thus its safe
-	 * to use raw_smp_processor_id.
-	 */
-	TP_CONDITION(cpu_online(raw_smp_processor_id())),
-
 	TP_STRUCT__entry(
 		__field(	unsigned long,	pfn		)
 		__field(	unsigned int,	order		)

diff --git a/include/trace/events/page_isolation.h b/include/trace/events/page_isolation.h
index 6fb6440..8738a78 100644
--- a/include/trace/events/page_isolation.h
+++ b/include/trace/events/page_isolation.h

@@ -29,7 +29,7 @@
 
 	TP_printk("start_pfn=0x%lx end_pfn=0x%lx fin_pfn=0x%lx ret=%s",
 		__entry->start_pfn, __entry->end_pfn, __entry->fin_pfn,
-		__entry->end_pfn == __entry->fin_pfn ? "success" : "fail")
+		__entry->end_pfn <= __entry->fin_pfn ? "success" : "fail")
 );
 
 #endif /* _TRACE_PAGE_ISOLATION_H */

diff --git a/include/trace/events/thermal.h b/include/trace/events/thermal.h
index 5738bb3..2b4a8ff 100644
--- a/include/trace/events/thermal.h
+++ b/include/trace/events/thermal.h

@@ -8,6 +8,18 @@
 #include <linux/thermal.h>
 #include <linux/tracepoint.h>
 
+TRACE_DEFINE_ENUM(THERMAL_TRIP_CRITICAL);
+TRACE_DEFINE_ENUM(THERMAL_TRIP_HOT);
+TRACE_DEFINE_ENUM(THERMAL_TRIP_PASSIVE);
+TRACE_DEFINE_ENUM(THERMAL_TRIP_ACTIVE);
+
+#define show_tzt_type(type)					\
+	__print_symbolic(type,					\
+			 { THERMAL_TRIP_CRITICAL, "CRITICAL"},	\
+			 { THERMAL_TRIP_HOT,      "HOT"},	\
+			 { THERMAL_TRIP_PASSIVE,  "PASSIVE"},	\
+			 { THERMAL_TRIP_ACTIVE,   "ACTIVE"})
+
 TRACE_EVENT(thermal_temperature,
 
 	TP_PROTO(struct thermal_zone_device *tz),
@@ -73,9 +85,9 @@
 		__entry->trip_type = trip_type;
 	),
 
-	TP_printk("thermal_zone=%s id=%d trip=%d trip_type=%d",
+	TP_printk("thermal_zone=%s id=%d trip=%d trip_type=%s",
 		__get_str(thermal_zone), __entry->id, __entry->trip,
-		__entry->trip_type)
+		show_tzt_type(__entry->trip_type))
 );
 
 TRACE_EVENT(thermal_power_cpu_get_power,

diff --git a/include/trace/events/tlb.h b/include/trace/events/tlb.h
index bc8815f..9d14b19 100644
--- a/include/trace/events/tlb.h
+++ b/include/trace/events/tlb.h

@@ -34,13 +34,11 @@
 #define EM(a,b)		{ a, b },
 #define EMe(a,b)	{ a, b }
 
-TRACE_EVENT_CONDITION(tlb_flush,
+TRACE_EVENT(tlb_flush,
 
 	TP_PROTO(int reason, unsigned long pages),
 	TP_ARGS(reason, pages),
 
-	TP_CONDITION(cpu_online(smp_processor_id())),
-
 	TP_STRUCT__entry(
 		__field(	  int, reason)
 		__field(unsigned long,  pages)

diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index fff846b..73614ce 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h

@@ -134,58 +134,28 @@
 #ifdef CREATE_TRACE_POINTS
 #ifdef CONFIG_CGROUP_WRITEBACK
 
-static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb)
+static inline unsigned int __trace_wb_assign_cgroup(struct bdi_writeback *wb)
 {
-	return kernfs_path_len(wb->memcg_css->cgroup->kn) + 1;
+	return wb->memcg_css->cgroup->kn->ino;
 }
 
-static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb)
-{
-	struct cgroup *cgrp = wb->memcg_css->cgroup;
-	char *path;
-
-	path = cgroup_path(cgrp, buf, kernfs_path_len(cgrp->kn) + 1);
-	WARN_ON_ONCE(path != buf);
-}
-
-static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc)
+static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *wbc)
 {
 	if (wbc->wb)
-		return __trace_wb_cgroup_size(wbc->wb);
+		return __trace_wb_assign_cgroup(wbc->wb);
 	else
-		return 2;
+		return -1U;
 }
-
-static inline void __trace_wbc_assign_cgroup(char *buf,
-					     struct writeback_control *wbc)
-{
-	if (wbc->wb)
-		__trace_wb_assign_cgroup(buf, wbc->wb);
-	else
-		strcpy(buf, "/");
-}
-
 #else	/* CONFIG_CGROUP_WRITEBACK */
 
-static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb)
+static inline unsigned int __trace_wb_assign_cgroup(struct bdi_writeback *wb)
 {
-	return 2;
+	return -1U;
 }
 
-static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb)
+static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *wbc)
 {
-	strcpy(buf, "/");
-}
-
-static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc)
-{
-	return 2;
-}
-
-static inline void __trace_wbc_assign_cgroup(char *buf,
-					     struct writeback_control *wbc)
-{
-	strcpy(buf, "/");
+	return -1U;
 }
 
 #endif	/* CONFIG_CGROUP_WRITEBACK */
@@ -201,7 +171,7 @@
 		__array(char, name, 32)
 		__field(unsigned long, ino)
 		__field(int, sync_mode)
-		__dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
+		__field(unsigned int, cgroup_ino)
 	),
 
 	TP_fast_assign(
@@ -209,14 +179,14 @@
 			dev_name(inode_to_bdi(inode)->dev), 32);
 		__entry->ino		= inode->i_ino;
 		__entry->sync_mode	= wbc->sync_mode;
-		__trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
+		__entry->cgroup_ino	= __trace_wbc_assign_cgroup(wbc);
 	),
 
-	TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup=%s",
+	TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup_ino=%u",
 		__entry->name,
 		__entry->ino,
 		__entry->sync_mode,
-		__get_str(cgroup)
+		__entry->cgroup_ino
 	)
 );
 
@@ -246,7 +216,7 @@
 		__field(int, range_cyclic)
 		__field(int, for_background)
 		__field(int, reason)
-		__dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
+		__field(unsigned int, cgroup_ino)
 	),
 	TP_fast_assign(
 		strncpy(__entry->name,
@@ -258,10 +228,10 @@
 		__entry->range_cyclic = work->range_cyclic;
 		__entry->for_background	= work->for_background;
 		__entry->reason = work->reason;
-		__trace_wb_assign_cgroup(__get_str(cgroup), wb);
+		__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
 	),
 	TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
-		  "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup=%s",
+		  "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup_ino=%u",
 		  __entry->name,
 		  MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
 		  __entry->nr_pages,
@@ -270,7 +240,7 @@
 		  __entry->range_cyclic,
 		  __entry->for_background,
 		  __print_symbolic(__entry->reason, WB_WORK_REASON),
-		  __get_str(cgroup)
+		  __entry->cgroup_ino
 	)
 );
 #define DEFINE_WRITEBACK_WORK_EVENT(name) \
@@ -300,15 +270,15 @@
 	TP_ARGS(wb),
 	TP_STRUCT__entry(
 		__array(char, name, 32)
-		__dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
+		__field(unsigned int, cgroup_ino)
 	),
 	TP_fast_assign(
 		strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
-		__trace_wb_assign_cgroup(__get_str(cgroup), wb);
+		__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
 	),
-	TP_printk("bdi %s: cgroup=%s",
+	TP_printk("bdi %s: cgroup_ino=%u",
 		  __entry->name,
-		  __get_str(cgroup)
+		  __entry->cgroup_ino
 	)
 );
 #define DEFINE_WRITEBACK_EVENT(name) \
@@ -347,7 +317,7 @@
 		__field(int, range_cyclic)
 		__field(long, range_start)
 		__field(long, range_end)
-		__dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
+		__field(unsigned int, cgroup_ino)
 	),
 
 	TP_fast_assign(
@@ -361,12 +331,12 @@
 		__entry->range_cyclic	= wbc->range_cyclic;
 		__entry->range_start	= (long)wbc->range_start;
 		__entry->range_end	= (long)wbc->range_end;
-		__trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
+		__entry->cgroup_ino	= __trace_wbc_assign_cgroup(wbc);
 	),
 
 	TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d "
 		"bgrd=%d reclm=%d cyclic=%d "
-		"start=0x%lx end=0x%lx cgroup=%s",
+		"start=0x%lx end=0x%lx cgroup_ino=%u",
 		__entry->name,
 		__entry->nr_to_write,
 		__entry->pages_skipped,
@@ -377,7 +347,7 @@
 		__entry->range_cyclic,
 		__entry->range_start,
 		__entry->range_end,
-		__get_str(cgroup)
+		__entry->cgroup_ino
 	)
 )
 
@@ -398,7 +368,7 @@
 		__field(long,		age)
 		__field(int,		moved)
 		__field(int,		reason)
-		__dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
+		__field(unsigned int,	cgroup_ino)
 	),
 	TP_fast_assign(
 		unsigned long *older_than_this = work->older_than_this;
@@ -408,15 +378,15 @@
 				  (jiffies - *older_than_this) * 1000 / HZ : -1;
 		__entry->moved	= moved;
 		__entry->reason	= work->reason;
-		__trace_wb_assign_cgroup(__get_str(cgroup), wb);
+		__entry->cgroup_ino	= __trace_wb_assign_cgroup(wb);
 	),
-	TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup=%s",
+	TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup_ino=%u",
 		__entry->name,
 		__entry->older,	/* older_than_this in jiffies */
 		__entry->age,	/* older_than_this in relative milliseconds */
 		__entry->moved,
 		__print_symbolic(__entry->reason, WB_WORK_REASON),
-		__get_str(cgroup)
+		__entry->cgroup_ino
 	)
 );
 
@@ -484,7 +454,7 @@
 		__field(unsigned long,	dirty_ratelimit)
 		__field(unsigned long,	task_ratelimit)
 		__field(unsigned long,	balanced_dirty_ratelimit)
-		__dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
+		__field(unsigned int,	cgroup_ino)
 	),
 
 	TP_fast_assign(
@@ -496,13 +466,13 @@
 		__entry->task_ratelimit	= KBps(task_ratelimit);
 		__entry->balanced_dirty_ratelimit =
 					KBps(wb->balanced_dirty_ratelimit);
-		__trace_wb_assign_cgroup(__get_str(cgroup), wb);
+		__entry->cgroup_ino	= __trace_wb_assign_cgroup(wb);
 	),
 
 	TP_printk("bdi %s: "
 		  "write_bw=%lu awrite_bw=%lu dirty_rate=%lu "
 		  "dirty_ratelimit=%lu task_ratelimit=%lu "
-		  "balanced_dirty_ratelimit=%lu cgroup=%s",
+		  "balanced_dirty_ratelimit=%lu cgroup_ino=%u",
 		  __entry->bdi,
 		  __entry->write_bw,		/* write bandwidth */
 		  __entry->avg_write_bw,	/* avg write bandwidth */
@@ -510,7 +480,7 @@
 		  __entry->dirty_ratelimit,	/* base ratelimit */
 		  __entry->task_ratelimit, /* ratelimit with position control */
 		  __entry->balanced_dirty_ratelimit, /* the balanced ratelimit */
-		  __get_str(cgroup)
+		  __entry->cgroup_ino
 	)
 );
 
@@ -548,7 +518,7 @@
 		__field(	 long,	pause)
 		__field(unsigned long,	period)
 		__field(	 long,	think)
-		__dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
+		__field(unsigned int,	cgroup_ino)
 	),
 
 	TP_fast_assign(
@@ -571,7 +541,7 @@
 		__entry->period		= period * 1000 / HZ;
 		__entry->pause		= pause * 1000 / HZ;
 		__entry->paused		= (jiffies - start_time) * 1000 / HZ;
-		__trace_wb_assign_cgroup(__get_str(cgroup), wb);
+		__entry->cgroup_ino	= __trace_wb_assign_cgroup(wb);
 	),
 
 
@@ -580,7 +550,7 @@
 		  "bdi_setpoint=%lu bdi_dirty=%lu "
 		  "dirty_ratelimit=%lu task_ratelimit=%lu "
 		  "dirtied=%u dirtied_pause=%u "
-		  "paused=%lu pause=%ld period=%lu think=%ld cgroup=%s",
+		  "paused=%lu pause=%ld period=%lu think=%ld cgroup_ino=%u",
 		  __entry->bdi,
 		  __entry->limit,
 		  __entry->setpoint,
@@ -595,7 +565,7 @@
 		  __entry->pause,	/* ms */
 		  __entry->period,	/* ms */
 		  __entry->think,	/* ms */
-		  __get_str(cgroup)
+		  __entry->cgroup_ino
 	  )
 );
 
@@ -609,8 +579,7 @@
 		__field(unsigned long, ino)
 		__field(unsigned long, state)
 		__field(unsigned long, dirtied_when)
-		__dynamic_array(char, cgroup,
-				__trace_wb_cgroup_size(inode_to_wb(inode)))
+		__field(unsigned int, cgroup_ino)
 	),
 
 	TP_fast_assign(
@@ -619,16 +588,16 @@
 		__entry->ino		= inode->i_ino;
 		__entry->state		= inode->i_state;
 		__entry->dirtied_when	= inode->dirtied_when;
-		__trace_wb_assign_cgroup(__get_str(cgroup), inode_to_wb(inode));
+		__entry->cgroup_ino	= __trace_wb_assign_cgroup(inode_to_wb(inode));
 	),
 
-	TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup=%s",
+	TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup_ino=%u",
 		  __entry->name,
 		  __entry->ino,
 		  show_inode_state(__entry->state),
 		  __entry->dirtied_when,
 		  (jiffies - __entry->dirtied_when) / HZ,
-		  __get_str(cgroup)
+		  __entry->cgroup_ino
 	)
 );
 
@@ -684,7 +653,7 @@
 		__field(unsigned long, writeback_index)
 		__field(long, nr_to_write)
 		__field(unsigned long, wrote)
-		__dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
+		__field(unsigned int, cgroup_ino)
 	),
 
 	TP_fast_assign(
@@ -696,11 +665,11 @@
 		__entry->writeback_index = inode->i_mapping->writeback_index;
 		__entry->nr_to_write	= nr_to_write;
 		__entry->wrote		= nr_to_write - wbc->nr_to_write;
-		__trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
+		__entry->cgroup_ino	= __trace_wbc_assign_cgroup(wbc);
 	),
 
 	TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu "
-		  "index=%lu to_write=%ld wrote=%lu cgroup=%s",
+		  "index=%lu to_write=%ld wrote=%lu cgroup_ino=%u",
 		  __entry->name,
 		  __entry->ino,
 		  show_inode_state(__entry->state),
@@ -709,7 +678,7 @@
 		  __entry->writeback_index,
 		  __entry->nr_to_write,
 		  __entry->wrote,
-		  __get_str(cgroup)
+		  __entry->cgroup_ino
 	)
 );
 

diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 0495884..b71fd0b 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild

@@ -354,6 +354,7 @@
 header-y += reiserfs_xattr.h
 header-y += resource.h
 header-y += rfkill.h
+header-y += rio_mport_cdev.h
 header-y += romfs_fs.h
 header-y += rose.h
 header-y += route.h

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 924f537..23917bb 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h

@@ -375,6 +375,7 @@
 	};
 	__u8 tunnel_tos;
 	__u8 tunnel_ttl;
+	__u16 tunnel_ext;
 	__u32 tunnel_label;
 };
 

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 2835b07..9222db8 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h

@@ -1648,9 +1648,9 @@
  *	%ETHTOOL_GLINKSETTINGS: on entry, number of words passed by user
  *	(>= 0); on return, if handshake in progress, negative if
  *	request size unsupported by kernel: absolute value indicates
- *	kernel recommended size and cmd field is 0, as well as all the
- *	other fields; otherwise (handshake completed), strictly
- *	positive to indicate size used by kernel and cmd field is
+ *	kernel expected size and all the other fields but cmd
+ *	are 0; otherwise (handshake completed), strictly positive
+ *	to indicate size used by kernel and cmd field stays
  *	%ETHTOOL_GLINKSETTINGS, all other fields populated by driver. For
  *	%ETHTOOL_SLINKSETTINGS: must be valid on entry, ie. a positive
  *	value returned previously by %ETHTOOL_GLINKSETTINGS, otherwise

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 8e3f88f..c488066 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h

@@ -153,6 +153,8 @@
 	IFLA_LINK_NETNSID,
 	IFLA_PHYS_PORT_NAME,
 	IFLA_PROTO_DOWN,
+	IFLA_GSO_MAX_SEGS,
+	IFLA_GSO_MAX_SIZE,
 	__IFLA_MAX
 };
 
@@ -599,6 +601,8 @@
 				 */
 	IFLA_VF_STATS,		/* network device statistics */
 	IFLA_VF_TRUST,		/* Trust VF */
+	IFLA_VF_IB_NODE_GUID,	/* VF Infiniband node GUID */
+	IFLA_VF_IB_PORT_GUID,	/* VF Infiniband port GUID */
 	__IFLA_VF_MAX,
 };
 
@@ -631,6 +635,11 @@
 	__u32 setting;
 };
 
+struct ifla_vf_guid {
+	__u32 vf;
+	__u64 guid;
+};
+
 enum {
 	IFLA_VF_LINK_STATE_AUTO,	/* link state of the uplink */
 	IFLA_VF_LINK_STATE_ENABLE,	/* link always up */

diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h
new file mode 100644
index 0000000..574e22e
--- /dev/null
+++ b/include/uapi/linux/kcov.h

@@ -0,0 +1,10 @@
+#ifndef _LINUX_KCOV_IOCTLS_H
+#define _LINUX_KCOV_IOCTLS_H
+
+#include <linux/types.h>
+
+#define KCOV_INIT_TRACE			_IOR('c', 1, unsigned long)
+#define KCOV_ENABLE			_IO('c', 100)
+#define KCOV_DISABLE			_IO('c', 101)
+
+#endif /* _LINUX_KCOV_IOCTLS_H */

diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h
index aa9f104..621fa8a 100644
--- a/include/uapi/linux/stddef.h
+++ b/include/uapi/linux/stddef.h

@@ -1 +1,5 @@
 #include <linux/compiler.h>
+
+#ifndef __always_inline
+#define __always_inline inline
+#endif

diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
index c18264d..4cb65bb 100644
--- a/include/uapi/linux/virtio_config.h
+++ b/include/uapi/linux/virtio_config.h

@@ -40,6 +40,8 @@
 #define VIRTIO_CONFIG_S_DRIVER_OK	4
 /* Driver has finished configuring features */
 #define VIRTIO_CONFIG_S_FEATURES_OK	8
+/* Device entered invalid state, driver must reset it */
+#define VIRTIO_CONFIG_S_NEEDS_RESET	0x40
 /* We've given up on this device. */
 #define VIRTIO_CONFIG_S_FAILED		0x80
 

diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h
index 288694e..a533cec 100644
--- a/include/uapi/rdma/hfi/hfi1_user.h
+++ b/include/uapi/rdma/hfi/hfi1_user.h

@@ -66,7 +66,7 @@
  * The major version changes when data structures change in an incompatible
  * way. The driver must be the same for initialization to succeed.
  */
-#define HFI1_USER_SWMAJOR 4
+#define HFI1_USER_SWMAJOR 5
 
 /*
  * Minor version differences are always compatible
@@ -93,7 +93,7 @@
 #define HFI1_CAP_MULTI_PKT_EGR    (1UL <<  7) /* Enable multi-packet Egr buffs*/
 #define HFI1_CAP_NODROP_RHQ_FULL  (1UL <<  8) /* Don't drop on Hdr Q full */
 #define HFI1_CAP_NODROP_EGR_FULL  (1UL <<  9) /* Don't drop on EGR buffs full */
-#define HFI1_CAP_TID_UNMAP        (1UL << 10) /* Enable Expected TID caching */
+#define HFI1_CAP_TID_UNMAP        (1UL << 10) /* Disable Expected TID caching */
 #define HFI1_CAP_PRINT_UNIMPL     (1UL << 11) /* Show for unimplemented feats */
 #define HFI1_CAP_ALLOW_PERM_JKEY  (1UL << 12) /* Allow use of permissive JKEY */
 #define HFI1_CAP_NO_INTEGRITY     (1UL << 13) /* Enable ctxt integrity checks */
@@ -134,6 +134,7 @@
 #define HFI1_CMD_ACK_EVENT       10	/* ack & clear user status bits */
 #define HFI1_CMD_SET_PKEY        11     /* set context's pkey */
 #define HFI1_CMD_CTXT_RESET      12     /* reset context's HW send context */
+#define HFI1_CMD_TID_INVAL_READ  13     /* read TID cache invalidations */
 /* separate EPROM commands from normal PSM commands */
 #define HFI1_CMD_EP_INFO         64      /* read EPROM device ID */
 #define HFI1_CMD_EP_ERASE_CHIP   65      /* erase whole EPROM */
@@ -147,13 +148,15 @@
 #define _HFI1_EVENT_LID_CHANGE_BIT     2
 #define _HFI1_EVENT_LMC_CHANGE_BIT     3
 #define _HFI1_EVENT_SL2VL_CHANGE_BIT   4
-#define _HFI1_MAX_EVENT_BIT _HFI1_EVENT_SL2VL_CHANGE_BIT
+#define _HFI1_EVENT_TID_MMU_NOTIFY_BIT 5
+#define _HFI1_MAX_EVENT_BIT _HFI1_EVENT_TID_MMU_NOTIFY_BIT
 
 #define HFI1_EVENT_FROZEN            (1UL << _HFI1_EVENT_FROZEN_BIT)
 #define HFI1_EVENT_LINKDOWN          (1UL << _HFI1_EVENT_LINKDOWN_BIT)
 #define HFI1_EVENT_LID_CHANGE        (1UL << _HFI1_EVENT_LID_CHANGE_BIT)
 #define HFI1_EVENT_LMC_CHANGE        (1UL << _HFI1_EVENT_LMC_CHANGE_BIT)
 #define HFI1_EVENT_SL2VL_CHANGE      (1UL << _HFI1_EVENT_SL2VL_CHANGE_BIT)
+#define HFI1_EVENT_TID_MMU_NOTIFY    (1UL << _HFI1_EVENT_TID_MMU_NOTIFY_BIT)
 
 /*
  * These are the status bits readable (in ASCII form, 64bit value)
@@ -238,11 +241,6 @@
 	__u32 tidcnt;
 	/* length of transfer buffer programmed by this request */
 	__u32 length;
-	/*
-	 * pointer to bitmap of TIDs used for this call;
-	 * checked for being large enough at open
-	 */
-	__u64 tidmap;
 };
 
 struct hfi1_cmd {

diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index f7d7b6f..6e373d1 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h

@@ -8,6 +8,7 @@
 	RDMA_NL_IWCM,
 	RDMA_NL_RSVD,
 	RDMA_NL_LS,	/* RDMA Local Services */
+	RDMA_NL_I40IW,
 	RDMA_NL_NUM_CLIENTS
 };
 

diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
index eeba753..ad66589 100644
--- a/include/video/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h

@@ -194,8 +194,9 @@
 int ipu_cpmem_set_format_passthrough(struct ipuv3_channel *ch, int width);
 void ipu_cpmem_set_yuv_interleaved(struct ipuv3_channel *ch, u32 pixel_format);
 void ipu_cpmem_set_yuv_planar_full(struct ipuv3_channel *ch,
-				   u32 pixel_format, int stride,
-				   int u_offset, int v_offset);
+				   unsigned int uv_stride,
+				   unsigned int u_offset,
+				   unsigned int v_offset);
 void ipu_cpmem_set_yuv_planar(struct ipuv3_channel *ch,
 			      u32 pixel_format, int stride, int height);
 int ipu_cpmem_set_fmt(struct ipuv3_channel *ch, u32 drm_fourcc);
@@ -236,7 +237,7 @@
 int ipu_dmfc_alloc_bandwidth(struct dmfc_channel *dmfc,
 		unsigned long bandwidth_mbs, int burstsize);
 void ipu_dmfc_free_bandwidth(struct dmfc_channel *dmfc);
-int ipu_dmfc_init_channel(struct dmfc_channel *dmfc, int width);
+void ipu_dmfc_config_wait4eot(struct dmfc_channel *dmfc, int width);
 struct dmfc_channel *ipu_dmfc_get(struct ipu_soc *ipu, int ipuv3_channel);
 void ipu_dmfc_put(struct dmfc_channel *dmfc);
 

diff --git a/init/Kconfig b/init/Kconfig
index e0d2616..0dfd09d 100644
--- a/init/Kconfig
+++ b/init/Kconfig

@@ -272,8 +272,9 @@
 	  See the man page for more details.
 
 config FHANDLE
-	bool "open by fhandle syscalls"
+	bool "open by fhandle syscalls" if EXPERT
 	select EXPORTFS
+	default y
 	help
 	  If you say Y here, a user level program will be able to map
 	  file names to handle and then later use the handle for

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 781c139..ade739f 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c

@@ -307,8 +307,8 @@
 	struct inode *inode;
 	struct ipc_namespace *ns = data;
 
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = MQUEUE_MAGIC;
 	sb->s_op = &mqueue_super_ops;
 

diff --git a/ipc/sem.c b/ipc/sem.c
index cddd5b5..b3757ea 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c

@@ -92,7 +92,14 @@
 /* One semaphore structure for each semaphore in the system. */
 struct sem {
 	int	semval;		/* current value */
-	int	sempid;		/* pid of last operation */
+	/*
+	 * PID of the process that last modified the semaphore. For
+	 * Linux, specifically these are:
+	 *  - semop
+	 *  - semctl, via SETVAL and SETALL.
+	 *  - at task exit when performing undo adjustments (see exit_sem).
+	 */
+	int	sempid;
 	spinlock_t	lock;	/* spinlock for fine-grained semtimedop */
 	struct list_head pending_alter; /* pending single-sop operations */
 					/* that alter the semaphore */
@@ -1444,8 +1451,10 @@
 			goto out_unlock;
 		}
 
-		for (i = 0; i < nsems; i++)
+		for (i = 0; i < nsems; i++) {
 			sma->sem_base[i].semval = sem_io[i];
+			sma->sem_base[i].sempid = task_tgid_vnr(current);
+		}
 
 		ipc_assert_locked_object(&sma->sem_perm);
 		list_for_each_entry(un, &sma->list_id, list_id) {

diff --git a/kernel/Makefile b/kernel/Makefile
index baa55e5..f0c40bf 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile

@@ -18,6 +18,17 @@
 CFLAGS_REMOVE_irq_work.o = $(CC_FLAGS_FTRACE)
 endif
 
+# Prevents flicker of uninteresting __do_softirq()/__local_bh_disable_ip()
+# in coverage traces.
+KCOV_INSTRUMENT_softirq.o := n
+# These are called from save_stack_trace() on slub debug path,
+# and produce insane amounts of uninteresting coverage.
+KCOV_INSTRUMENT_module.o := n
+KCOV_INSTRUMENT_extable.o := n
+# Don't self-instrument.
+KCOV_INSTRUMENT_kcov.o := n
+KASAN_SANITIZE_kcov.o := n
+
 # cond_syscall is currently not LTO compatible
 CFLAGS_sys_ni.o = $(DISABLE_LTO)
 
@@ -68,6 +79,7 @@
 obj-$(CONFIG_AUDIT_WATCH) += audit_watch.o audit_fsnotify.o
 obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
 obj-$(CONFIG_GCOV_KERNEL) += gcov/
+obj-$(CONFIG_KCOV) += kcov.o
 obj-$(CONFIG_KPROBES) += kprobes.o
 obj-$(CONFIG_KGDB) += debug/
 obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 195ffae..7d0e3cf 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c

@@ -2412,8 +2412,8 @@
 		return;
 	audit_log_task(ab);
 	audit_log_format(ab, " sig=%ld arch=%x syscall=%ld compat=%d ip=0x%lx code=0x%x",
-			 signr, syscall_get_arch(), syscall, is_compat_task(),
-			 KSTK_EIP(current), code);
+			 signr, syscall_get_arch(), syscall,
+			 in_compat_syscall(), KSTK_EIP(current), code);
 	audit_log_end(ab);
 }
 

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2a2efe1..adc5e4b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c

@@ -137,11 +137,13 @@
 		   "map_type:\t%u\n"
 		   "key_size:\t%u\n"
 		   "value_size:\t%u\n"
-		   "max_entries:\t%u\n",
+		   "max_entries:\t%u\n"
+		   "map_flags:\t%#x\n",
 		   map->map_type,
 		   map->key_size,
 		   map->value_size,
-		   map->max_entries);
+		   map->max_entries,
+		   map->map_flags);
 }
 #endif
 

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 712570d..52bedc5 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c

@@ -376,8 +376,11 @@
 	u64 tmp = perf_sample_period_ns;
 
 	tmp *= sysctl_perf_cpu_time_max_percent;
-	do_div(tmp, 100);
-	ACCESS_ONCE(perf_sample_allowed_ns) = tmp;
+	tmp = div_u64(tmp, 100);
+	if (!tmp)
+		tmp = 1;
+
+	WRITE_ONCE(perf_sample_allowed_ns, tmp);
 }
 
 static int perf_rotate_context(struct perf_cpu_context *cpuctx);
@@ -409,7 +412,13 @@
 	if (ret || !write)
 		return ret;
 
-	update_perf_cpu_limits();
+	if (sysctl_perf_cpu_time_max_percent == 100) {
+		printk(KERN_WARNING
+		       "perf: Dynamic interrupt throttling disabled, can hang your system!\n");
+		WRITE_ONCE(perf_sample_allowed_ns, 0);
+	} else {
+		update_perf_cpu_limits();
+	}
 
 	return 0;
 }
@@ -423,62 +432,68 @@
 #define NR_ACCUMULATED_SAMPLES 128
 static DEFINE_PER_CPU(u64, running_sample_length);
 
+static u64 __report_avg;
+static u64 __report_allowed;
+
 static void perf_duration_warn(struct irq_work *w)
 {
-	u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns);
-	u64 avg_local_sample_len;
-	u64 local_samples_len;
-
-	local_samples_len = __this_cpu_read(running_sample_length);
-	avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES;
-
 	printk_ratelimited(KERN_WARNING
-			"perf interrupt took too long (%lld > %lld), lowering "
-			"kernel.perf_event_max_sample_rate to %d\n",
-			avg_local_sample_len, allowed_ns >> 1,
-			sysctl_perf_event_sample_rate);
+		"perf: interrupt took too long (%lld > %lld), lowering "
+		"kernel.perf_event_max_sample_rate to %d\n",
+		__report_avg, __report_allowed,
+		sysctl_perf_event_sample_rate);
 }
 
 static DEFINE_IRQ_WORK(perf_duration_work, perf_duration_warn);
 
 void perf_sample_event_took(u64 sample_len_ns)
 {
-	u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns);
-	u64 avg_local_sample_len;
-	u64 local_samples_len;
+	u64 max_len = READ_ONCE(perf_sample_allowed_ns);
+	u64 running_len;
+	u64 avg_len;
+	u32 max;
 
-	if (allowed_ns == 0)
+	if (max_len == 0)
 		return;
 
-	/* decay the counter by 1 average sample */
-	local_samples_len = __this_cpu_read(running_sample_length);
-	local_samples_len -= local_samples_len/NR_ACCUMULATED_SAMPLES;
-	local_samples_len += sample_len_ns;
-	__this_cpu_write(running_sample_length, local_samples_len);
+	/* Decay the counter by 1 average sample. */
+	running_len = __this_cpu_read(running_sample_length);
+	running_len -= running_len/NR_ACCUMULATED_SAMPLES;
+	running_len += sample_len_ns;
+	__this_cpu_write(running_sample_length, running_len);
 
 	/*
-	 * note: this will be biased artifically low until we have
-	 * seen NR_ACCUMULATED_SAMPLES.  Doing it this way keeps us
+	 * Note: this will be biased artifically low until we have
+	 * seen NR_ACCUMULATED_SAMPLES. Doing it this way keeps us
 	 * from having to maintain a count.
 	 */
-	avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES;
-
-	if (avg_local_sample_len <= allowed_ns)
+	avg_len = running_len/NR_ACCUMULATED_SAMPLES;
+	if (avg_len <= max_len)
 		return;
 
-	if (max_samples_per_tick <= 1)
-		return;
+	__report_avg = avg_len;
+	__report_allowed = max_len;
 
-	max_samples_per_tick = DIV_ROUND_UP(max_samples_per_tick, 2);
-	sysctl_perf_event_sample_rate = max_samples_per_tick * HZ;
+	/*
+	 * Compute a throttle threshold 25% below the current duration.
+	 */
+	avg_len += avg_len / 4;
+	max = (TICK_NSEC / 100) * sysctl_perf_cpu_time_max_percent;
+	if (avg_len < max)
+		max /= (u32)avg_len;
+	else
+		max = 1;
+
+	WRITE_ONCE(perf_sample_allowed_ns, avg_len);
+	WRITE_ONCE(max_samples_per_tick, max);
+
+	sysctl_perf_event_sample_rate = max * HZ;
 	perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
 
-	update_perf_cpu_limits();
-
 	if (!irq_work_queue(&perf_duration_work)) {
-		early_printk("perf interrupt took too long (%lld > %lld), lowering "
+		early_printk("perf: interrupt took too long (%lld > %lld), lowering "
 			     "kernel.perf_event_max_sample_rate to %d\n",
-			     avg_local_sample_len, allowed_ns >> 1,
+			     __report_avg, __report_allowed,
 			     sysctl_perf_event_sample_rate);
 	}
 }
@@ -2402,14 +2417,24 @@
 			cpuctx->task_ctx = NULL;
 	}
 
-	is_active ^= ctx->is_active; /* changed bits */
-
+	/*
+	 * Always update time if it was set; not only when it changes.
+	 * Otherwise we can 'forget' to update time for any but the last
+	 * context we sched out. For example:
+	 *
+	 *   ctx_sched_out(.event_type = EVENT_FLEXIBLE)
+	 *   ctx_sched_out(.event_type = EVENT_PINNED)
+	 *
+	 * would only update time for the pinned events.
+	 */
 	if (is_active & EVENT_TIME) {
 		/* update (and stop) ctx time */
 		update_context_time(ctx);
 		update_cgrp_time_from_cpuctx(cpuctx);
 	}
 
+	is_active ^= ctx->is_active; /* changed bits */
+
 	if (!ctx->nr_active || !(is_active & EVENT_ALL))
 		return;
 
@@ -4210,6 +4235,14 @@
 	active = (event->state == PERF_EVENT_STATE_ACTIVE);
 	if (active) {
 		perf_pmu_disable(ctx->pmu);
+		/*
+		 * We could be throttled; unthrottle now to avoid the tick
+		 * trying to unthrottle while we already re-started the event.
+		 */
+		if (event->hw.interrupts == MAX_INTERRUPTS) {
+			event->hw.interrupts = 0;
+			perf_log_throttle(event, 1);
+		}
 		event->pmu->stop(event, PERF_EF_UPDATE);
 	}
 
@@ -8509,6 +8542,7 @@
 					f_flags);
 	if (IS_ERR(event_file)) {
 		err = PTR_ERR(event_file);
+		event_file = NULL;
 		goto err_context;
 	}
 
@@ -9426,10 +9460,29 @@
 	switch (action & ~CPU_TASKS_FROZEN) {
 
 	case CPU_UP_PREPARE:
+		/*
+		 * This must be done before the CPU comes alive, because the
+		 * moment we can run tasks we can encounter (software) events.
+		 *
+		 * Specifically, someone can have inherited events on kthreadd
+		 * or a pre-existing worker thread that gets re-bound.
+		 */
 		perf_event_init_cpu(cpu);
 		break;
 
 	case CPU_DOWN_PREPARE:
+		/*
+		 * This must be done before the CPU dies because after that an
+		 * active event might want to IPI the CPU and that'll not work
+		 * so great for dead CPUs.
+		 *
+		 * XXX smp_call_function_single() return -ENXIO without a warn
+		 * so we could possibly deal with this.
+		 *
+		 * This is safe against new events arriving because
+		 * sys_perf_event_open() serializes against hotplug using
+		 * get_online_cpus().
+		 */
 		perf_event_exit_cpu(cpu);
 		break;
 	default:

diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 1faad2cf..c61f0cb 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c

@@ -746,8 +746,10 @@
 
 	rb->user_page = all_buf;
 	rb->data_pages[0] = all_buf + PAGE_SIZE;
-	rb->page_order = ilog2(nr_pages);
-	rb->nr_pages = !!nr_pages;
+	if (nr_pages) {
+		rb->nr_pages = 1;
+		rb->page_order = ilog2(nr_pages);
+	}
 
 	ring_buffer_init(rb, watermark, flags);
 

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 220fc17..7edc95e 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c

@@ -321,7 +321,7 @@
 	copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
 
 	ret = __replace_page(vma, vaddr, old_page, new_page);
-	page_cache_release(new_page);
+	put_page(new_page);
 put_old:
 	put_page(old_page);
 
@@ -539,14 +539,14 @@
 	 * see uprobe_register().
 	 */
 	if (mapping->a_ops->readpage)
-		page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp);
+		page = read_mapping_page(mapping, offset >> PAGE_SHIFT, filp);
 	else
-		page = shmem_read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT);
+		page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
 	if (IS_ERR(page))
 		return PTR_ERR(page);
 
 	copy_from_page(page, offset, insn, nbytes);
-	page_cache_release(page);
+	put_page(page);
 
 	return 0;
 }

diff --git a/kernel/exit.c b/kernel/exit.c
index 10e0882..fd90195 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c

@@ -53,6 +53,7 @@
 #include <linux/oom.h>
 #include <linux/writeback.h>
 #include <linux/shm.h>
+#include <linux/kcov.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -434,7 +435,7 @@
 	mm_update_next_owner(mm);
 	mmput(mm);
 	if (test_thread_flag(TIF_MEMDIE))
-		exit_oom_victim();
+		exit_oom_victim(tsk);
 }
 
 static struct task_struct *find_alive_thread(struct task_struct *p)
@@ -655,6 +656,7 @@
 	TASKS_RCU(int tasks_rcu_i);
 
 	profile_task_exit(tsk);
+	kcov_task_exit(tsk);
 
 	WARN_ON(blk_needs_flush_plug(tsk));
 

diff --git a/kernel/fork.c b/kernel/fork.c
index 5b8d1e7..d277e83 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c

@@ -75,6 +75,7 @@
 #include <linux/aio.h>
 #include <linux/compiler.h>
 #include <linux/sysctl.h>
+#include <linux/kcov.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -392,6 +393,8 @@
 
 	account_kernel_stack(ti, 1);
 
+	kcov_task_init(tsk);
+
 	return tsk;
 
 free_ti:

diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index e0f90c2..d234022 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c

@@ -185,10 +185,12 @@
 	rcu_read_unlock();
 }
 
-static unsigned long timeout_jiffies(unsigned long timeout)
+static long hung_timeout_jiffies(unsigned long last_checked,
+				 unsigned long timeout)
 {
 	/* timeout of 0 will disable the watchdog */
-	return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT;
+	return timeout ? last_checked - jiffies + timeout * HZ :
+		MAX_SCHEDULE_TIMEOUT;
 }
 
 /*
@@ -224,18 +226,21 @@
  */
 static int watchdog(void *dummy)
 {
+	unsigned long hung_last_checked = jiffies;
+
 	set_user_nice(current, 0);
 
 	for ( ; ; ) {
 		unsigned long timeout = sysctl_hung_task_timeout_secs;
+		long t = hung_timeout_jiffies(hung_last_checked, timeout);
 
-		while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
-			timeout = sysctl_hung_task_timeout_secs;
-
-		if (atomic_xchg(&reset_hung_task, 0))
+		if (t <= 0) {
+			if (!atomic_xchg(&reset_hung_task, 0))
+				check_hung_uninterruptible_tasks(timeout);
+			hung_last_checked = jiffies;
 			continue;
-
-		check_hung_uninterruptible_tasks(timeout);
+		}
+		schedule_timeout_interruptible(t);
 	}
 
 	return 0;

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 64731e8..cc1cc64 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c

@@ -1322,8 +1322,8 @@
 
 		if (nmsk != omsk)
 			/* hope the handler works with current  trigger mode */
-			pr_warning("irq %d uses trigger mode %u; requested %u\n",
-				   irq, nmsk, omsk);
+			pr_warn("irq %d uses trigger mode %u; requested %u\n",
+				irq, nmsk, omsk);
 	}
 
 	*old_ptr = new;

diff --git a/kernel/kcov.c b/kernel/kcov.c
new file mode 100644
index 0000000..3efbee0
--- /dev/null
+++ b/kernel/kcov.c

@@ -0,0 +1,273 @@
+#define pr_fmt(fmt) "kcov: " fmt
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/kcov.h>
+
+/*
+ * kcov descriptor (one per opened debugfs file).
+ * State transitions of the descriptor:
+ *  - initial state after open()
+ *  - then there must be a single ioctl(KCOV_INIT_TRACE) call
+ *  - then, mmap() call (several calls are allowed but not useful)
+ *  - then, repeated enable/disable for a task (only one task a time allowed)
+ */
+struct kcov {
+	/*
+	 * Reference counter. We keep one for:
+	 *  - opened file descriptor
+	 *  - task with enabled coverage (we can't unwire it from another task)
+	 */
+	atomic_t		refcount;
+	/* The lock protects mode, size, area and t. */
+	spinlock_t		lock;
+	enum kcov_mode		mode;
+	/* Size of arena (in long's for KCOV_MODE_TRACE). */
+	unsigned		size;
+	/* Coverage buffer shared with user space. */
+	void			*area;
+	/* Task for which we collect coverage, or NULL. */
+	struct task_struct	*t;
+};
+
+/*
+ * Entry point from instrumented code.
+ * This is called once per basic-block/edge.
+ */
+void __sanitizer_cov_trace_pc(void)
+{
+	struct task_struct *t;
+	enum kcov_mode mode;
+
+	t = current;
+	/*
+	 * We are interested in code coverage as a function of a syscall inputs,
+	 * so we ignore code executed in interrupts.
+	 */
+	if (!t || in_interrupt())
+		return;
+	mode = READ_ONCE(t->kcov_mode);
+	if (mode == KCOV_MODE_TRACE) {
+		unsigned long *area;
+		unsigned long pos;
+
+		/*
+		 * There is some code that runs in interrupts but for which
+		 * in_interrupt() returns false (e.g. preempt_schedule_irq()).
+		 * READ_ONCE()/barrier() effectively provides load-acquire wrt
+		 * interrupts, there are paired barrier()/WRITE_ONCE() in
+		 * kcov_ioctl_locked().
+		 */
+		barrier();
+		area = t->kcov_area;
+		/* The first word is number of subsequent PCs. */
+		pos = READ_ONCE(area[0]) + 1;
+		if (likely(pos < t->kcov_size)) {
+			area[pos] = _RET_IP_;
+			WRITE_ONCE(area[0], pos);
+		}
+	}
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_pc);
+
+static void kcov_get(struct kcov *kcov)
+{
+	atomic_inc(&kcov->refcount);
+}
+
+static void kcov_put(struct kcov *kcov)
+{
+	if (atomic_dec_and_test(&kcov->refcount)) {
+		vfree(kcov->area);
+		kfree(kcov);
+	}
+}
+
+void kcov_task_init(struct task_struct *t)
+{
+	t->kcov_mode = KCOV_MODE_DISABLED;
+	t->kcov_size = 0;
+	t->kcov_area = NULL;
+	t->kcov = NULL;
+}
+
+void kcov_task_exit(struct task_struct *t)
+{
+	struct kcov *kcov;
+
+	kcov = t->kcov;
+	if (kcov == NULL)
+		return;
+	spin_lock(&kcov->lock);
+	if (WARN_ON(kcov->t != t)) {
+		spin_unlock(&kcov->lock);
+		return;
+	}
+	/* Just to not leave dangling references behind. */
+	kcov_task_init(t);
+	kcov->t = NULL;
+	spin_unlock(&kcov->lock);
+	kcov_put(kcov);
+}
+
+static int kcov_mmap(struct file *filep, struct vm_area_struct *vma)
+{
+	int res = 0;
+	void *area;
+	struct kcov *kcov = vma->vm_file->private_data;
+	unsigned long size, off;
+	struct page *page;
+
+	area = vmalloc_user(vma->vm_end - vma->vm_start);
+	if (!area)
+		return -ENOMEM;
+
+	spin_lock(&kcov->lock);
+	size = kcov->size * sizeof(unsigned long);
+	if (kcov->mode == KCOV_MODE_DISABLED || vma->vm_pgoff != 0 ||
+	    vma->vm_end - vma->vm_start != size) {
+		res = -EINVAL;
+		goto exit;
+	}
+	if (!kcov->area) {
+		kcov->area = area;
+		vma->vm_flags |= VM_DONTEXPAND;
+		spin_unlock(&kcov->lock);
+		for (off = 0; off < size; off += PAGE_SIZE) {
+			page = vmalloc_to_page(kcov->area + off);
+			if (vm_insert_page(vma, vma->vm_start + off, page))
+				WARN_ONCE(1, "vm_insert_page() failed");
+		}
+		return 0;
+	}
+exit:
+	spin_unlock(&kcov->lock);
+	vfree(area);
+	return res;
+}
+
+static int kcov_open(struct inode *inode, struct file *filep)
+{
+	struct kcov *kcov;
+
+	kcov = kzalloc(sizeof(*kcov), GFP_KERNEL);
+	if (!kcov)
+		return -ENOMEM;
+	atomic_set(&kcov->refcount, 1);
+	spin_lock_init(&kcov->lock);
+	filep->private_data = kcov;
+	return nonseekable_open(inode, filep);
+}
+
+static int kcov_close(struct inode *inode, struct file *filep)
+{
+	kcov_put(filep->private_data);
+	return 0;
+}
+
+static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
+			     unsigned long arg)
+{
+	struct task_struct *t;
+	unsigned long size, unused;
+
+	switch (cmd) {
+	case KCOV_INIT_TRACE:
+		/*
+		 * Enable kcov in trace mode and setup buffer size.
+		 * Must happen before anything else.
+		 */
+		if (kcov->mode != KCOV_MODE_DISABLED)
+			return -EBUSY;
+		/*
+		 * Size must be at least 2 to hold current position and one PC.
+		 * Later we allocate size * sizeof(unsigned long) memory,
+		 * that must not overflow.
+		 */
+		size = arg;
+		if (size < 2 || size > INT_MAX / sizeof(unsigned long))
+			return -EINVAL;
+		kcov->size = size;
+		kcov->mode = KCOV_MODE_TRACE;
+		return 0;
+	case KCOV_ENABLE:
+		/*
+		 * Enable coverage for the current task.
+		 * At this point user must have been enabled trace mode,
+		 * and mmapped the file. Coverage collection is disabled only
+		 * at task exit or voluntary by KCOV_DISABLE. After that it can
+		 * be enabled for another task.
+		 */
+		unused = arg;
+		if (unused != 0 || kcov->mode == KCOV_MODE_DISABLED ||
+		    kcov->area == NULL)
+			return -EINVAL;
+		if (kcov->t != NULL)
+			return -EBUSY;
+		t = current;
+		/* Cache in task struct for performance. */
+		t->kcov_size = kcov->size;
+		t->kcov_area = kcov->area;
+		/* See comment in __sanitizer_cov_trace_pc(). */
+		barrier();
+		WRITE_ONCE(t->kcov_mode, kcov->mode);
+		t->kcov = kcov;
+		kcov->t = t;
+		/* This is put either in kcov_task_exit() or in KCOV_DISABLE. */
+		kcov_get(kcov);
+		return 0;
+	case KCOV_DISABLE:
+		/* Disable coverage for the current task. */
+		unused = arg;
+		if (unused != 0 || current->kcov != kcov)
+			return -EINVAL;
+		t = current;
+		if (WARN_ON(kcov->t != t))
+			return -EINVAL;
+		kcov_task_init(t);
+		kcov->t = NULL;
+		kcov_put(kcov);
+		return 0;
+	default:
+		return -ENOTTY;
+	}
+}
+
+static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+	struct kcov *kcov;
+	int res;
+
+	kcov = filep->private_data;
+	spin_lock(&kcov->lock);
+	res = kcov_ioctl_locked(kcov, cmd, arg);
+	spin_unlock(&kcov->lock);
+	return res;
+}
+
+static const struct file_operations kcov_fops = {
+	.open		= kcov_open,
+	.unlocked_ioctl	= kcov_ioctl,
+	.mmap		= kcov_mmap,
+	.release        = kcov_close,
+};
+
+static int __init kcov_init(void)
+{
+	if (!debugfs_create_file("kcov", 0600, NULL, NULL, &kcov_fops)) {
+		pr_err("failed to create kcov in debugfs\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+device_initcall(kcov_init);

diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 8e96f6c..31322a4 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile

@@ -1,3 +1,6 @@
+# Any varying coverage in these files is non-deterministic
+# and is generally not a function of system call inputs.
+KCOV_INSTRUMENT		:= n
 
 obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
 

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 53ab2f8..2324ba5 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c

@@ -2000,6 +2000,77 @@
 }
 
 /*
+ * Returns the next chain_key iteration
+ */
+static u64 print_chain_key_iteration(int class_idx, u64 chain_key)
+{
+	u64 new_chain_key = iterate_chain_key(chain_key, class_idx);
+
+	printk(" class_idx:%d -> chain_key:%016Lx",
+		class_idx,
+		(unsigned long long)new_chain_key);
+	return new_chain_key;
+}
+
+static void
+print_chain_keys_held_locks(struct task_struct *curr, struct held_lock *hlock_next)
+{
+	struct held_lock *hlock;
+	u64 chain_key = 0;
+	int depth = curr->lockdep_depth;
+	int i;
+
+	printk("depth: %u\n", depth + 1);
+	for (i = get_first_held_lock(curr, hlock_next); i < depth; i++) {
+		hlock = curr->held_locks + i;
+		chain_key = print_chain_key_iteration(hlock->class_idx, chain_key);
+
+		print_lock(hlock);
+	}
+
+	print_chain_key_iteration(hlock_next->class_idx, chain_key);
+	print_lock(hlock_next);
+}
+
+static void print_chain_keys_chain(struct lock_chain *chain)
+{
+	int i;
+	u64 chain_key = 0;
+	int class_id;
+
+	printk("depth: %u\n", chain->depth);
+	for (i = 0; i < chain->depth; i++) {
+		class_id = chain_hlocks[chain->base + i];
+		chain_key = print_chain_key_iteration(class_id + 1, chain_key);
+
+		print_lock_name(lock_classes + class_id);
+		printk("\n");
+	}
+}
+
+static void print_collision(struct task_struct *curr,
+			struct held_lock *hlock_next,
+			struct lock_chain *chain)
+{
+	printk("\n");
+	printk("======================\n");
+	printk("[chain_key collision ]\n");
+	print_kernel_ident();
+	printk("----------------------\n");
+	printk("%s/%d: ", current->comm, task_pid_nr(current));
+	printk("Hash chain already cached but the contents don't match!\n");
+
+	printk("Held locks:");
+	print_chain_keys_held_locks(curr, hlock_next);
+
+	printk("Locks in cached chain:");
+	print_chain_keys_chain(chain);
+
+	printk("\nstack backtrace:\n");
+	dump_stack();
+}
+
+/*
  * Checks whether the chain and the current held locks are consistent
  * in depth and also in content. If they are not it most likely means
  * that there was a collision during the calculation of the chain_key.
@@ -2014,14 +2085,18 @@
 
 	i = get_first_held_lock(curr, hlock);
 
-	if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i - 1)))
+	if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i - 1))) {
+		print_collision(curr, hlock, chain);
 		return 0;
+	}
 
 	for (j = 0; j < chain->depth - 1; j++, i++) {
 		id = curr->held_locks[i].class_idx - 1;
 
-		if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id))
+		if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id)) {
+			print_collision(curr, hlock, chain);
 			return 0;
+		}
 	}
 #endif
 	return 1;

diff --git a/kernel/memremap.c b/kernel/memremap.c
index 584febd..a6d3823 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c

@@ -41,11 +41,13 @@
  * memremap() - remap an iomem_resource as cacheable memory
  * @offset: iomem resource start address
  * @size: size of remap
- * @flags: either MEMREMAP_WB or MEMREMAP_WT
+ * @flags: any of MEMREMAP_WB, MEMREMAP_WT and MEMREMAP_WC
  *
  * memremap() is "ioremap" for cases where it is known that the resource
  * being mapped does not have i/o side effects and the __iomem
- * annotation is not applicable.
+ * annotation is not applicable. In the case of multiple flags, the different
+ * mapping types will be attempted in the order listed below until one of
+ * them succeeds.
  *
  * MEMREMAP_WB - matches the default mapping for System RAM on
  * the architecture.  This is usually a read-allocate write-back cache.
@@ -57,6 +59,10 @@
  * cache or are written through to memory and never exist in a
  * cache-dirty state with respect to program visibility.  Attempts to
  * map System RAM with this mapping type will fail.
+ *
+ * MEMREMAP_WC - establish a writecombine mapping, whereby writes may
+ * be coalesced together (e.g. in the CPU's write buffers), but is otherwise
+ * uncached. Attempts to map System RAM with this mapping type will fail.
  */
 void *memremap(resource_size_t offset, size_t size, unsigned long flags)
 {
@@ -64,6 +70,9 @@
 				       IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
 	void *addr = NULL;
 
+	if (!flags)
+		return NULL;
+
 	if (is_ram == REGION_MIXED) {
 		WARN_ONCE(1, "memremap attempted on mixed range %pa size: %#lx\n",
 				&offset, (unsigned long) size);
@@ -72,7 +81,6 @@
 
 	/* Try all mapping types requested until one returns non-NULL */
 	if (flags & MEMREMAP_WB) {
-		flags &= ~MEMREMAP_WB;
 		/*
 		 * MEMREMAP_WB is special in that it can be satisifed
 		 * from the direct map.  Some archs depend on the
@@ -86,21 +94,22 @@
 	}
 
 	/*
-	 * If we don't have a mapping yet and more request flags are
-	 * pending then we will be attempting to establish a new virtual
+	 * If we don't have a mapping yet and other request flags are
+	 * present then we will be attempting to establish a new virtual
 	 * address mapping.  Enforce that this mapping is not aliasing
 	 * System RAM.
 	 */
-	if (!addr && is_ram == REGION_INTERSECTS && flags) {
+	if (!addr && is_ram == REGION_INTERSECTS && flags != MEMREMAP_WB) {
 		WARN_ONCE(1, "memremap attempted on ram %pa size: %#lx\n",
 				&offset, (unsigned long) size);
 		return NULL;
 	}
 
-	if (!addr && (flags & MEMREMAP_WT)) {
-		flags &= ~MEMREMAP_WT;
+	if (!addr && (flags & MEMREMAP_WT))
 		addr = ioremap_wt(offset, size);
-	}
+
+	if (!addr && (flags & MEMREMAP_WC))
+		addr = ioremap_wc(offset, size);
 
 	return addr;
 }

diff --git a/kernel/panic.c b/kernel/panic.c
index fa40085..535c965 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c

@@ -73,6 +73,26 @@
 
 atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
 
+/*
+ * A variant of panic() called from NMI context. We return if we've already
+ * panicked on this CPU. If another CPU already panicked, loop in
+ * nmi_panic_self_stop() which can provide architecture dependent code such
+ * as saving register state for crash dump.
+ */
+void nmi_panic(struct pt_regs *regs, const char *msg)
+{
+	int old_cpu, cpu;
+
+	cpu = raw_smp_processor_id();
+	old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu);
+
+	if (old_cpu == PANIC_CPU_INVALID)
+		panic("%s", msg);
+	else if (old_cpu != cpu)
+		nmi_panic_self_stop(regs);
+}
+EXPORT_SYMBOL(nmi_panic);
+
 /**
  *	panic - halt the system
  *	@fmt: The text string to print

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index aa0f26b..fca9254 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c

@@ -339,6 +339,7 @@
 	pm_message_t msg;
 	int error;
 
+	pm_suspend_clear_flags();
 	error = platform_begin(platform_mode);
 	if (error)
 		goto Close;

diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 230a772..5b70d64 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c

@@ -473,8 +473,7 @@
 	if (state == PM_SUSPEND_FREEZE) {
 #ifdef CONFIG_PM_DEBUG
 		if (pm_test_level != TEST_NONE && pm_test_level <= TEST_CPUS) {
-			pr_warning("PM: Unsupported test mode for suspend to idle,"
-				   "please choose none/freezer/devices/platform.\n");
+			pr_warn("PM: Unsupported test mode for suspend to idle, please choose none/freezer/devices/platform.\n");
 			return -EAGAIN;
 		}
 #endif

diff --git a/kernel/profile.c b/kernel/profile.c
index 5136969..c2199e9 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c

@@ -44,7 +44,7 @@
 EXPORT_SYMBOL_GPL(prof_on);
 
 static cpumask_var_t prof_cpu_mask;
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS)
 static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits);
 static DEFINE_PER_CPU(int, cpu_profile_flip);
 static DEFINE_MUTEX(profile_flip_mutex);
@@ -202,7 +202,7 @@
 }
 EXPORT_SYMBOL_GPL(profile_event_unregister);
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS)
 /*
  * Each cpu has a pair of open-addressed hashtables for pending
  * profile hits. read_profile() IPI's all cpus to request them

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 2341efe..d49bfa1 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c

@@ -73,12 +73,11 @@
 {
 	BUG_ON(!child->ptrace);
 
-	child->ptrace = 0;
 	child->parent = child->real_parent;
 	list_del_init(&child->ptrace_entry);
 
 	spin_lock(&child->sighand->siglock);
-
+	child->ptrace = 0;
 	/*
 	 * Clear all pending traps and TRAPPING.  TRAPPING should be
 	 * cleared regardless of JOBCTL_STOP_PENDING.  Do it explicitly.
@@ -681,7 +680,7 @@
 			break;
 
 #ifdef CONFIG_COMPAT
-		if (unlikely(is_compat_task())) {
+		if (unlikely(in_compat_syscall())) {
 			compat_siginfo_t __user *uinfo = compat_ptr(data);
 
 			if (copy_siginfo_to_user32(uinfo, &info) ||

diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 61a1656..032b2c0 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile

@@ -1,3 +1,7 @@
+# Any varying coverage in these files is non-deterministic
+# and is generally not a function of system call inputs.
+KCOV_INSTRUMENT := n
+
 obj-y += update.o sync.o
 obj-$(CONFIG_SRCU) += srcu.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o

diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 302d6eb..414d9c1 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile

@@ -2,6 +2,10 @@
 CFLAGS_REMOVE_clock.o = $(CC_FLAGS_FTRACE)
 endif
 
+# These files are disabled because they produce non-interesting flaky coverage
+# that is not a function of syscall inputs. E.g. involuntary context switches.
+KCOV_INSTRUMENT := n
+
 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
 # needed for x86 only.  Why this used to be enabled for all architectures is beyond

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 44db0ff..8b489fc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c

@@ -321,6 +321,24 @@
 }
 #endif	/* CONFIG_SCHED_HRTICK */
 
+/*
+ * cmpxchg based fetch_or, macro so it works for different integer types
+ */
+#define fetch_or(ptr, mask)						\
+	({								\
+		typeof(ptr) _ptr = (ptr);				\
+		typeof(mask) _mask = (mask);				\
+		typeof(*_ptr) _old, _val = *_ptr;			\
+									\
+		for (;;) {						\
+			_old = cmpxchg(_ptr, _val, _val | _mask);	\
+			if (_old == _val)				\
+				break;					\
+			_val = _old;					\
+		}							\
+	_old;								\
+})
+
 #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
 /*
  * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
@@ -5371,6 +5389,7 @@
 
 	case CPU_UP_PREPARE:
 		rq->calc_load_update = calc_load_update;
+		account_reset_rq(rq);
 		break;
 
 	case CPU_ONLINE:
@@ -7537,7 +7556,7 @@
 /* task_group_lock serializes the addition/removal of task groups */
 static DEFINE_SPINLOCK(task_group_lock);
 
-static void free_sched_group(struct task_group *tg)
+static void sched_free_group(struct task_group *tg)
 {
 	free_fair_sched_group(tg);
 	free_rt_sched_group(tg);
@@ -7563,7 +7582,7 @@
 	return tg;
 
 err:
-	free_sched_group(tg);
+	sched_free_group(tg);
 	return ERR_PTR(-ENOMEM);
 }
 
@@ -7583,17 +7602,16 @@
 }
 
 /* rcu callback to free various structures associated with a task group */
-static void free_sched_group_rcu(struct rcu_head *rhp)
+static void sched_free_group_rcu(struct rcu_head *rhp)
 {
 	/* now it should be safe to free those cfs_rqs */
-	free_sched_group(container_of(rhp, struct task_group, rcu));
+	sched_free_group(container_of(rhp, struct task_group, rcu));
 }
 
-/* Destroy runqueue etc associated with a task group */
 void sched_destroy_group(struct task_group *tg)
 {
 	/* wait for possible concurrent references to cfs_rqs complete */
-	call_rcu(&tg->rcu, free_sched_group_rcu);
+	call_rcu(&tg->rcu, sched_free_group_rcu);
 }
 
 void sched_offline_group(struct task_group *tg)
@@ -8052,31 +8070,26 @@
 	if (IS_ERR(tg))
 		return ERR_PTR(-ENOMEM);
 
+	sched_online_group(tg, parent);
+
 	return &tg->css;
 }
 
-static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
+static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
 {
 	struct task_group *tg = css_tg(css);
-	struct task_group *parent = css_tg(css->parent);
 
-	if (parent)
-		sched_online_group(tg, parent);
-	return 0;
+	sched_offline_group(tg);
 }
 
 static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
 {
 	struct task_group *tg = css_tg(css);
 
-	sched_destroy_group(tg);
-}
-
-static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css)
-{
-	struct task_group *tg = css_tg(css);
-
-	sched_offline_group(tg);
+	/*
+	 * Relies on the RCU grace period between css_released() and this.
+	 */
+	sched_free_group(tg);
 }
 
 static void cpu_cgroup_fork(struct task_struct *task)
@@ -8436,9 +8449,8 @@
 
 struct cgroup_subsys cpu_cgrp_subsys = {
 	.css_alloc	= cpu_cgroup_css_alloc,
+	.css_released	= cpu_cgroup_css_released,
 	.css_free	= cpu_cgroup_css_free,
-	.css_online	= cpu_cgroup_css_online,
-	.css_offline	= cpu_cgroup_css_offline,
 	.fork		= cpu_cgroup_fork,
 	.can_attach	= cpu_cgroup_can_attach,
 	.attach		= cpu_cgroup_attach,

diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index 2ddaebf..4a81120 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c

@@ -145,13 +145,16 @@
 }
 
 static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
-			  u64 reset)
+			  u64 val)
 {
 	struct cpuacct *ca = css_ca(css);
 	int err = 0;
 	int i;
 
-	if (reset) {
+	/*
+	 * Only allow '0' here to do a reset.
+	 */
+	if (val) {
 		err = -EINVAL;
 		goto out;
 	}
@@ -235,23 +238,10 @@
 void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 {
 	struct cpuacct *ca;
-	int cpu;
-
-	cpu = task_cpu(tsk);
 
 	rcu_read_lock();
-
-	ca = task_ca(tsk);
-
-	while (true) {
-		u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
-		*cpuusage += cputime;
-
-		ca = parent_ca(ca);
-		if (!ca)
-			break;
-	}
-
+	for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
+		*this_cpu_ptr(ca->cpuusage) += cputime;
 	rcu_read_unlock();
 }
 
@@ -260,18 +250,13 @@
  *
  * Note: it's the caller that updates the account of the root cgroup.
  */
-void cpuacct_account_field(struct task_struct *p, int index, u64 val)
+void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
 {
-	struct kernel_cpustat *kcpustat;
 	struct cpuacct *ca;
 
 	rcu_read_lock();
-	ca = task_ca(p);
-	while (ca != &root_cpuacct) {
-		kcpustat = this_cpu_ptr(ca->cpustat);
-		kcpustat->cpustat[index] += val;
-		ca = parent_ca(ca);
-	}
+	for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
+		this_cpu_ptr(ca->cpustat)->cpustat[index] += val;
 	rcu_read_unlock();
 }
 

diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h
index ed60562..ba72807 100644
--- a/kernel/sched/cpuacct.h
+++ b/kernel/sched/cpuacct.h

@@ -1,7 +1,7 @@
 #ifdef CONFIG_CGROUP_CPUACCT
 
 extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
-extern void cpuacct_account_field(struct task_struct *p, int index, u64 val);
+extern void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
 
 #else
 
@@ -10,7 +10,7 @@
 }
 
 static inline void
-cpuacct_account_field(struct task_struct *p, int index, u64 val)
+cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
 {
 }
 

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 46d64e4..0fe30e6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c

@@ -3181,17 +3181,25 @@
 static void
 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
-	/*
-	 * Update the normalized vruntime before updating min_vruntime
-	 * through calling update_curr().
-	 */
-	if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
-		se->vruntime += cfs_rq->min_vruntime;
+	bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING);
+	bool curr = cfs_rq->curr == se;
 
 	/*
-	 * Update run-time statistics of the 'current'.
+	 * If we're the current task, we must renormalise before calling
+	 * update_curr().
 	 */
+	if (renorm && curr)
+		se->vruntime += cfs_rq->min_vruntime;
+
 	update_curr(cfs_rq);
+
+	/*
+	 * Otherwise, renormalise after, such that we're placed at the current
+	 * moment in time, instead of some random moment in the past.
+	 */
+	if (renorm && !curr)
+		se->vruntime += cfs_rq->min_vruntime;
+
 	enqueue_entity_load_avg(cfs_rq, se);
 	account_entity_enqueue(cfs_rq, se);
 	update_cfs_shares(cfs_rq);
@@ -3207,7 +3215,7 @@
 		update_stats_enqueue(cfs_rq, se);
 		check_spread(cfs_rq, se);
 	}
-	if (se != cfs_rq->curr)
+	if (!curr)
 		__enqueue_entity(cfs_rq, se);
 	se->on_rq = 1;
 
@@ -5071,7 +5079,19 @@
 		return i;
 
 	/*
-	 * Otherwise, iterate the domains and find an elegible idle cpu.
+	 * Otherwise, iterate the domains and find an eligible idle cpu.
+	 *
+	 * A completely idle sched group at higher domains is more
+	 * desirable than an idle group at a lower level, because lower
+	 * domains have smaller groups and usually share hardware
+	 * resources which causes tasks to contend on them, e.g. x86
+	 * hyperthread siblings in the lowest domain (SMT) can contend
+	 * on the shared cpu pipeline.
+	 *
+	 * However, while we prefer idle groups at higher domains
+	 * finding an idle cpu at the lowest domain is still better than
+	 * returning 'target', which we've already established, isn't
+	 * idle.
 	 */
 	sd = rcu_dereference(per_cpu(sd_llc, target));
 	for_each_lower_domain(sd) {
@@ -5081,11 +5101,16 @@
 						tsk_cpus_allowed(p)))
 				goto next;
 
+			/* Ensure the entire group is idle */
 			for_each_cpu(i, sched_group_cpus(sg)) {
 				if (i == target || !idle_cpu(i))
 					goto next;
 			}
 
+			/*
+			 * It doesn't matter which cpu we pick, the
+			 * whole group is idle.
+			 */
 			target = cpumask_first_and(sched_group_cpus(sg),
 					tsk_cpus_allowed(p));
 			goto done;

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 382848a..ec2e8d2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h

@@ -1841,3 +1841,16 @@
 static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {}
 static inline void cpufreq_trigger_update(u64 time) {}
 #endif /* CONFIG_CPU_FREQ */
+
+static inline void account_reset_rq(struct rq *rq)
+{
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+	rq->prev_irq_time = 0;
+#endif
+#ifdef CONFIG_PARAVIRT
+	rq->prev_steal_time = 0;
+#endif
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+	rq->prev_steal_time_rq = 0;
+#endif
+}

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 15a1795..e1e5a35 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c

@@ -395,7 +395,7 @@
 	struct seccomp_filter *filter = ERR_PTR(-EFAULT);
 
 #ifdef CONFIG_COMPAT
-	if (is_compat_task()) {
+	if (in_compat_syscall()) {
 		struct compat_sock_fprog fprog32;
 		if (copy_from_user(&fprog32, user_filter, sizeof(fprog32)))
 			goto out;
@@ -529,7 +529,7 @@
 {
 	int *syscall_whitelist = mode1_syscalls;
 #ifdef CONFIG_COMPAT
-	if (is_compat_task())
+	if (in_compat_syscall())
 		syscall_whitelist = mode1_syscalls_32;
 #endif
 	do {

diff --git a/kernel/signal.c b/kernel/signal.c
index fe8ed29..aa9bf00 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c

@@ -3585,6 +3585,10 @@
 
 void __init signals_init(void)
 {
+	/* If this check fails, the __ARCH_SI_PREAMBLE_SIZE value is wrong! */
+	BUILD_BUG_ON(__ARCH_SI_PREAMBLE_SIZE
+		!= offsetof(struct siginfo, _sifields._pad));
+
 	sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC);
 }
 

diff --git a/kernel/softirq.c b/kernel/softirq.c
index 8aae49d..17caf4b 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c

@@ -227,7 +227,7 @@
 static inline void lockdep_softirq_end(bool in_hardirq) { }
 #endif
 
-asmlinkage __visible void __do_softirq(void)
+asmlinkage __visible void __softirq_entry __do_softirq(void)
 {
 	unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
 	unsigned long old_flags = current->flags;

diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 7e7746a..10a1d7d 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c

@@ -1321,7 +1321,7 @@
 	}
 
 	mnt = task_active_pid_ns(current)->proc_mnt;
-	file = file_open_root(mnt->mnt_root, mnt, pathname, flags);
+	file = file_open_root(mnt->mnt_root, mnt, pathname, flags, 0);
 	result = PTR_ERR(file);
 	if (IS_ERR(file))
 		goto out_putname;

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 195fe7d..58e3310 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c

@@ -157,52 +157,50 @@
 cpumask_var_t tick_nohz_full_mask;
 cpumask_var_t housekeeping_mask;
 bool tick_nohz_full_running;
-static unsigned long tick_dep_mask;
+static atomic_t tick_dep_mask;
 
-static void trace_tick_dependency(unsigned long dep)
+static bool check_tick_dependency(atomic_t *dep)
 {
-	if (dep & TICK_DEP_MASK_POSIX_TIMER) {
+	int val = atomic_read(dep);
+
+	if (val & TICK_DEP_MASK_POSIX_TIMER) {
 		trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
-		return;
+		return true;
 	}
 
-	if (dep & TICK_DEP_MASK_PERF_EVENTS) {
+	if (val & TICK_DEP_MASK_PERF_EVENTS) {
 		trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
-		return;
+		return true;
 	}
 
-	if (dep & TICK_DEP_MASK_SCHED) {
+	if (val & TICK_DEP_MASK_SCHED) {
 		trace_tick_stop(0, TICK_DEP_MASK_SCHED);
-		return;
+		return true;
 	}
 
-	if (dep & TICK_DEP_MASK_CLOCK_UNSTABLE)
+	if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) {
 		trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
+		return true;
+	}
+
+	return false;
 }
 
 static bool can_stop_full_tick(struct tick_sched *ts)
 {
 	WARN_ON_ONCE(!irqs_disabled());
 
-	if (tick_dep_mask) {
-		trace_tick_dependency(tick_dep_mask);
+	if (check_tick_dependency(&tick_dep_mask))
 		return false;
-	}
 
-	if (ts->tick_dep_mask) {
-		trace_tick_dependency(ts->tick_dep_mask);
+	if (check_tick_dependency(&ts->tick_dep_mask))
 		return false;
-	}
 
-	if (current->tick_dep_mask) {
-		trace_tick_dependency(current->tick_dep_mask);
+	if (check_tick_dependency(&current->tick_dep_mask))
 		return false;
-	}
 
-	if (current->signal->tick_dep_mask) {
-		trace_tick_dependency(current->signal->tick_dep_mask);
+	if (check_tick_dependency(&current->signal->tick_dep_mask))
 		return false;
-	}
 
 	return true;
 }
@@ -259,12 +257,12 @@
 	preempt_enable();
 }
 
-static void tick_nohz_dep_set_all(unsigned long *dep,
+static void tick_nohz_dep_set_all(atomic_t *dep,
 				  enum tick_dep_bits bit)
 {
-	unsigned long prev;
+	int prev;
 
-	prev = fetch_or(dep, BIT_MASK(bit));
+	prev = atomic_fetch_or(dep, BIT(bit));
 	if (!prev)
 		tick_nohz_full_kick_all();
 }
@@ -280,7 +278,7 @@
 
 void tick_nohz_dep_clear(enum tick_dep_bits bit)
 {
-	clear_bit(bit, &tick_dep_mask);
+	atomic_andnot(BIT(bit), &tick_dep_mask);
 }
 
 /*
@@ -289,12 +287,12 @@
  */
 void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
 {
-	unsigned long prev;
+	int prev;
 	struct tick_sched *ts;
 
 	ts = per_cpu_ptr(&tick_cpu_sched, cpu);
 
-	prev = fetch_or(&ts->tick_dep_mask, BIT_MASK(bit));
+	prev = atomic_fetch_or(&ts->tick_dep_mask, BIT(bit));
 	if (!prev) {
 		preempt_disable();
 		/* Perf needs local kick that is NMI safe */
@@ -313,7 +311,7 @@
 {
 	struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
 
-	clear_bit(bit, &ts->tick_dep_mask);
+	atomic_andnot(BIT(bit), &ts->tick_dep_mask);
 }
 
 /*
@@ -331,7 +329,7 @@
 
 void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
 {
-	clear_bit(bit, &tsk->tick_dep_mask);
+	atomic_andnot(BIT(bit), &tsk->tick_dep_mask);
 }
 
 /*
@@ -345,7 +343,7 @@
 
 void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
 {
-	clear_bit(bit, &sig->tick_dep_mask);
+	atomic_andnot(BIT(bit), &sig->tick_dep_mask);
 }
 
 /*
@@ -366,7 +364,8 @@
 	ts = this_cpu_ptr(&tick_cpu_sched);
 
 	if (ts->tick_stopped) {
-		if (current->tick_dep_mask || current->signal->tick_dep_mask)
+		if (atomic_read(&current->tick_dep_mask) ||
+		    atomic_read(&current->signal->tick_dep_mask))
 			tick_nohz_full_kick();
 	}
 out:
@@ -378,7 +377,7 @@
 {
 	alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
 	if (cpulist_parse(str, tick_nohz_full_mask) < 0) {
-		pr_warning("NOHZ: Incorrect nohz_full cpumask\n");
+		pr_warn("NO_HZ: Incorrect nohz_full cpumask\n");
 		free_bootmem_cpumask_var(tick_nohz_full_mask);
 		return 1;
 	}
@@ -446,8 +445,7 @@
 	 * interrupts to avoid circular dependency on the tick
 	 */
 	if (!arch_irq_work_has_interrupt()) {
-		pr_warning("NO_HZ: Can't run full dynticks because arch doesn't "
-			   "support irq work self-IPIs\n");
+		pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support irq work self-IPIs\n");
 		cpumask_clear(tick_nohz_full_mask);
 		cpumask_copy(housekeeping_mask, cpu_possible_mask);
 		tick_nohz_full_running = false;
@@ -457,7 +455,8 @@
 	cpu = smp_processor_id();
 
 	if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
-		pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu);
+		pr_warn("NO_HZ: Clearing %d from nohz_full range for timekeeping\n",
+			cpu);
 		cpumask_clear_cpu(cpu, tick_nohz_full_mask);
 	}
 

diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index eb4e325..bf38226 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h

@@ -60,7 +60,7 @@
 	u64				next_timer;
 	ktime_t				idle_expires;
 	int				do_timer_last;
-	unsigned long			tick_dep_mask;
+	atomic_t			tick_dep_mask;
 };
 
 extern struct tick_sched *tick_get_tick_sched(int cpu);

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index d1798fa..73164c3 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c

@@ -1566,6 +1566,17 @@
 }
 EXPORT_SYMBOL(schedule_timeout_uninterruptible);
 
+/*
+ * Like schedule_timeout_uninterruptible(), except this task will not contribute
+ * to load average.
+ */
+signed long __sched schedule_timeout_idle(signed long timeout)
+{
+	__set_current_state(TASK_IDLE);
+	return schedule_timeout(timeout);
+}
+EXPORT_SYMBOL(schedule_timeout_idle);
+
 #ifdef CONFIG_HOTPLUG_CPU
 static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head)
 {

diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 2aeb6ff..f94e7a2 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c

@@ -1437,12 +1437,12 @@
 static int __init init_blk_tracer(void)
 {
 	if (!register_trace_event(&trace_blk_event)) {
-		pr_warning("Warning: could not register block events\n");
+		pr_warn("Warning: could not register block events\n");
 		return 1;
 	}
 
 	if (register_tracer(&blk_tracer) != 0) {
-		pr_warning("Warning: could not register the block tracer\n");
+		pr_warn("Warning: could not register the block tracer\n");
 		unregister_trace_event(&trace_blk_event);
 		return 1;
 	}

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 57a6eea..b1870fb 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c

@@ -1030,8 +1030,7 @@
 	for_each_possible_cpu(cpu) {
 		stat = &per_cpu(ftrace_profile_stats, cpu);
 
-		/* allocate enough for function name + cpu number */
-		name = kmalloc(32, GFP_KERNEL);
+		name = kasprintf(GFP_KERNEL, "function%d", cpu);
 		if (!name) {
 			/*
 			 * The files created are permanent, if something happens
@@ -1043,7 +1042,6 @@
 			return;
 		}
 		stat->stat = function_stats;
-		snprintf(name, 32, "function%d", cpu);
 		stat->stat.name = name;
 		ret = register_stat_tracer(&stat->stat);
 		if (ret) {
@@ -1058,8 +1056,7 @@
 	entry = tracefs_create_file("function_profile_enabled", 0644,
 				    d_tracer, NULL, &ftrace_profile_fops);
 	if (!entry)
-		pr_warning("Could not create tracefs "
-			   "'function_profile_enabled' entry\n");
+		pr_warn("Could not create tracefs 'function_profile_enabled' entry\n");
 }
 
 #else /* CONFIG_FUNCTION_PROFILER */
@@ -1610,7 +1607,7 @@
 	return  keep_regs;
 }
 
-static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
+static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
 				     int filter_hash,
 				     bool inc)
 {
@@ -1618,12 +1615,13 @@
 	struct ftrace_hash *other_hash;
 	struct ftrace_page *pg;
 	struct dyn_ftrace *rec;
+	bool update = false;
 	int count = 0;
 	int all = 0;
 
 	/* Only update if the ops has been registered */
 	if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
-		return;
+		return false;
 
 	/*
 	 * In the filter_hash case:
@@ -1650,7 +1648,7 @@
 		 * then there's nothing to do.
 		 */
 		if (ftrace_hash_empty(hash))
-			return;
+			return false;
 	}
 
 	do_for_each_ftrace_rec(pg, rec) {
@@ -1694,7 +1692,7 @@
 		if (inc) {
 			rec->flags++;
 			if (FTRACE_WARN_ON(ftrace_rec_count(rec) == FTRACE_REF_MAX))
-				return;
+				return false;
 
 			/*
 			 * If there's only a single callback registered to a
@@ -1720,7 +1718,7 @@
 				rec->flags |= FTRACE_FL_REGS;
 		} else {
 			if (FTRACE_WARN_ON(ftrace_rec_count(rec) == 0))
-				return;
+				return false;
 			rec->flags--;
 
 			/*
@@ -1753,22 +1751,28 @@
 			 */
 		}
 		count++;
+
+		/* Must match FTRACE_UPDATE_CALLS in ftrace_modify_all_code() */
+		update |= ftrace_test_record(rec, 1) != FTRACE_UPDATE_IGNORE;
+
 		/* Shortcut, if we handled all records, we are done. */
 		if (!all && count == hash->count)
-			return;
+			return update;
 	} while_for_each_ftrace_rec();
+
+	return update;
 }
 
-static void ftrace_hash_rec_disable(struct ftrace_ops *ops,
+static bool ftrace_hash_rec_disable(struct ftrace_ops *ops,
 				    int filter_hash)
 {
-	__ftrace_hash_rec_update(ops, filter_hash, 0);
+	return __ftrace_hash_rec_update(ops, filter_hash, 0);
 }
 
-static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
+static bool ftrace_hash_rec_enable(struct ftrace_ops *ops,
 				   int filter_hash)
 {
-	__ftrace_hash_rec_update(ops, filter_hash, 1);
+	return __ftrace_hash_rec_update(ops, filter_hash, 1);
 }
 
 static void ftrace_hash_rec_update_modify(struct ftrace_ops *ops,
@@ -2314,8 +2318,8 @@
 	if (rec->flags & FTRACE_FL_TRAMP_EN) {
 		ops = ftrace_find_tramp_ops_curr(rec);
 		if (FTRACE_WARN_ON(!ops)) {
-			pr_warning("Bad trampoline accounting at: %p (%pS)\n",
-				    (void *)rec->ip, (void *)rec->ip);
+			pr_warn("Bad trampoline accounting at: %p (%pS)\n",
+				(void *)rec->ip, (void *)rec->ip);
 			/* Ftrace is shutting down, return anything */
 			return (unsigned long)FTRACE_ADDR;
 		}
@@ -2644,7 +2648,6 @@
 		return ret;
 
 	ftrace_start_up++;
-	command |= FTRACE_UPDATE_CALLS;
 
 	/*
 	 * Note that ftrace probes uses this to start up
@@ -2665,7 +2668,8 @@
 		return ret;
 	}
 
-	ftrace_hash_rec_enable(ops, 1);
+	if (ftrace_hash_rec_enable(ops, 1))
+		command |= FTRACE_UPDATE_CALLS;
 
 	ftrace_startup_enable(command);
 
@@ -2695,12 +2699,12 @@
 
 	/* Disabling ipmodify never fails */
 	ftrace_hash_ipmodify_disable(ops);
-	ftrace_hash_rec_disable(ops, 1);
+
+	if (ftrace_hash_rec_disable(ops, 1))
+		command |= FTRACE_UPDATE_CALLS;
 
 	ops->flags &= ~FTRACE_OPS_FL_ENABLED;
 
-	command |= FTRACE_UPDATE_CALLS;
-
 	if (saved_ftrace_func != ftrace_trace_function) {
 		saved_ftrace_func = ftrace_trace_function;
 		command |= FTRACE_UPDATE_TRACE_FUNC;

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d929340..a2f0b9f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c

@@ -74,11 +74,6 @@
 	{ }
 };
 
-static struct tracer_flags dummy_tracer_flags = {
-	.val = 0,
-	.opts = dummy_tracer_opt
-};
-
 static int
 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 {
@@ -1258,12 +1253,22 @@
 
 	if (!type->set_flag)
 		type->set_flag = &dummy_set_flag;
-	if (!type->flags)
-		type->flags = &dummy_tracer_flags;
-	else
+	if (!type->flags) {
+		/*allocate a dummy tracer_flags*/
+		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
+		if (!type->flags) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		type->flags->val = 0;
+		type->flags->opts = dummy_tracer_opt;
+	} else
 		if (!type->flags->opts)
 			type->flags->opts = dummy_tracer_opt;
 
+	/* store the tracer for __set_tracer_option */
+	type->flags->trace = type;
+
 	ret = run_tracer_selftest(type);
 	if (ret < 0)
 		goto out;
@@ -1659,6 +1664,7 @@
 #else
 		TRACE_FLAG_IRQS_NOSUPPORT |
 #endif
+		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
@@ -2071,20 +2077,20 @@
 
 	/* trace_printk() is for debug use only. Don't use it in production. */
 
-	pr_warning("\n");
-	pr_warning("**********************************************************\n");
-	pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
-	pr_warning("**                                                      **\n");
-	pr_warning("** trace_printk() being used. Allocating extra memory.  **\n");
-	pr_warning("**                                                      **\n");
-	pr_warning("** This means that this is a DEBUG kernel and it is     **\n");
-	pr_warning("** unsafe for production use.                           **\n");
-	pr_warning("**                                                      **\n");
-	pr_warning("** If you see this message and you are not debugging    **\n");
-	pr_warning("** the kernel, report this immediately to your vendor!  **\n");
-	pr_warning("**                                                      **\n");
-	pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
-	pr_warning("**********************************************************\n");
+	pr_warn("\n");
+	pr_warn("**********************************************************\n");
+	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
+	pr_warn("**                                                      **\n");
+	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
+	pr_warn("**                                                      **\n");
+	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
+	pr_warn("** unsafe for production use.                           **\n");
+	pr_warn("**                                                      **\n");
+	pr_warn("** If you see this message and you are not debugging    **\n");
+	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
+	pr_warn("**                                                      **\n");
+	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
+	pr_warn("**********************************************************\n");
 
 	/* Expand the buffers to set size */
 	tracing_update_buffers();
@@ -3505,7 +3511,7 @@
 			       struct tracer_flags *tracer_flags,
 			       struct tracer_opt *opts, int neg)
 {
-	struct tracer *trace = tr->current_trace;
+	struct tracer *trace = tracer_flags->trace;
 	int ret;
 
 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
@@ -4101,7 +4107,7 @@
 	 */
 	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
 	if (!map_array) {
-		pr_warning("Unable to allocate trace enum mapping\n");
+		pr_warn("Unable to allocate trace enum mapping\n");
 		return;
 	}
 
@@ -4949,7 +4955,10 @@
 
 	spd.nr_pages = i;
 
-	ret = splice_to_pipe(pipe, &spd);
+	if (i)
+		ret = splice_to_pipe(pipe, &spd);
+	else
+		ret = 0;
 out:
 	splice_shrink_spd(&spd);
 	return ret;
@@ -6131,7 +6140,7 @@
 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
 	if (!d_cpu) {
-		pr_warning("Could not create tracefs '%s' entry\n", cpu_dir);
+		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
 		return;
 	}
 
@@ -6318,7 +6327,7 @@
 
 	ret = tracefs_create_file(name, mode, parent, data, fops);
 	if (!ret)
-		pr_warning("Could not create tracefs '%s' entry\n", name);
+		pr_warn("Could not create tracefs '%s' entry\n", name);
 
 	return ret;
 }
@@ -6337,7 +6346,7 @@
 
 	tr->options = tracefs_create_dir("options", d_tracer);
 	if (!tr->options) {
-		pr_warning("Could not create tracefs directory 'options'\n");
+		pr_warn("Could not create tracefs directory 'options'\n");
 		return NULL;
 	}
 
@@ -6391,11 +6400,8 @@
 		return;
 
 	for (i = 0; i < tr->nr_topts; i++) {
-		/*
-		 * Check if these flags have already been added.
-		 * Some tracers share flags.
-		 */
-		if (tr->topts[i].tracer->flags == tracer->flags)
+		/* Make sure there's no duplicate flags. */
+		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
 			return;
 	}
 
@@ -7248,8 +7254,8 @@
 	if (trace_boot_clock) {
 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
 		if (ret < 0)
-			pr_warning("Trace clock %s not defined, going back to default\n",
-				   trace_boot_clock);
+			pr_warn("Trace clock %s not defined, going back to default\n",
+				trace_boot_clock);
 	}
 
 	/*

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8414fa4..3fff4ad 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h

@@ -125,6 +125,7 @@
 	TRACE_FLAG_HARDIRQ		= 0x08,
 	TRACE_FLAG_SOFTIRQ		= 0x10,
 	TRACE_FLAG_PREEMPT_RESCHED	= 0x20,
+	TRACE_FLAG_NMI			= 0x40,
 };
 
 #define TRACE_BUF_SIZE		1024
@@ -345,6 +346,7 @@
 struct tracer_flags {
 	u32			val;
 	struct tracer_opt	*opts;
+	struct tracer		*trace;
 };
 
 /* Makes more easy to define a tracer opt */
@@ -1111,6 +1113,18 @@
 	unsigned short		right;
 };
 
+static inline bool is_string_field(struct ftrace_event_field *field)
+{
+	return field->filter_type == FILTER_DYN_STRING ||
+	       field->filter_type == FILTER_STATIC_STRING ||
+	       field->filter_type == FILTER_PTR_STRING;
+}
+
+static inline bool is_function_field(struct ftrace_event_field *field)
+{
+	return field->filter_type == FILTER_TRACE_FN;
+}
+
 extern enum regex_type
 filter_parse_regex(char *buff, int len, char **search, int *not);
 extern void print_event_filter(struct trace_event_file *file,
@@ -1159,9 +1173,24 @@
 	struct event_filter __rcu	*filter;
 	char				*filter_str;
 	void				*private_data;
+	bool				paused;
 	struct list_head		list;
 };
 
+extern void trigger_data_free(struct event_trigger_data *data);
+extern int event_trigger_init(struct event_trigger_ops *ops,
+			      struct event_trigger_data *data);
+extern int trace_event_trigger_enable_disable(struct trace_event_file *file,
+					      int trigger_enable);
+extern void update_cond_flag(struct trace_event_file *file);
+extern void unregister_trigger(char *glob, struct event_trigger_ops *ops,
+			       struct event_trigger_data *test,
+			       struct trace_event_file *file);
+extern int set_trigger_filter(char *filter_str,
+			      struct event_trigger_data *trigger_data,
+			      struct trace_event_file *file);
+extern int register_event_command(struct event_command *cmd);
+
 /**
  * struct event_trigger_ops - callbacks for trace event triggers
  *
@@ -1174,7 +1203,8 @@
  * @func: The trigger 'probe' function called when the triggering
  *	event occurs.  The data passed into this callback is the data
  *	that was supplied to the event_command @reg() function that
- *	registered the trigger (see struct event_command).
+ *	registered the trigger (see struct event_command) along with
+ *	the trace record, rec.
  *
  * @init: An optional initialization function called for the trigger
  *	when the trigger is registered (via the event_command reg()
@@ -1199,7 +1229,8 @@
  *	(see trace_event_triggers.c).
  */
 struct event_trigger_ops {
-	void			(*func)(struct event_trigger_data *data);
+	void			(*func)(struct event_trigger_data *data,
+					void *rec);
 	int			(*init)(struct event_trigger_ops *ops,
 					struct event_trigger_data *data);
 	void			(*free)(struct event_trigger_ops *ops,
@@ -1243,27 +1274,10 @@
  *	values are defined by adding new values to the trigger_type
  *	enum in include/linux/trace_events.h.
  *
- * @post_trigger: A flag that says whether or not this command needs
- *	to have its action delayed until after the current event has
- *	been closed.  Some triggers need to avoid being invoked while
- *	an event is currently in the process of being logged, since
- *	the trigger may itself log data into the trace buffer.  Thus
- *	we make sure the current event is committed before invoking
- *	those triggers.  To do that, the trigger invocation is split
- *	in two - the first part checks the filter using the current
- *	trace record; if a command has the @post_trigger flag set, it
- *	sets a bit for itself in the return value, otherwise it
- *	directly invokes the trigger.  Once all commands have been
- *	either invoked or set their return flag, the current record is
- *	either committed or discarded.  At that point, if any commands
- *	have deferred their triggers, those commands are finally
- *	invoked following the close of the current event.  In other
- *	words, if the event_trigger_ops @func() probe implementation
- *	itself logs to the trace buffer, this flag should be set,
- *	otherwise it can be left unspecified.
+ * @flags: See the enum event_command_flags below.
  *
- * All the methods below, except for @set_filter(), must be
- * implemented.
+ * All the methods below, except for @set_filter() and @unreg_all(),
+ * must be implemented.
  *
  * @func: The callback function responsible for parsing and
  *	registering the trigger written to the 'trigger' file by the
@@ -1288,6 +1302,10 @@
  *	This is usually implemented by the generic utility function
  *	@unregister_trigger() (see trace_event_triggers.c).
  *
+ * @unreg_all: An optional function called to remove all the triggers
+ *	from the list of triggers associated with the event.  Called
+ *	when a trigger file is opened in truncate mode.
+ *
  * @set_filter: An optional function called to parse and set a filter
  *	for the trigger.  If no @set_filter() method is set for the
  *	event command, filters set by the user for the command will be
@@ -1301,7 +1319,7 @@
 	struct list_head	list;
 	char			*name;
 	enum event_trigger_type	trigger_type;
-	bool			post_trigger;
+	int			flags;
 	int			(*func)(struct event_command *cmd_ops,
 					struct trace_event_file *file,
 					char *glob, char *cmd, char *params);
@@ -1313,12 +1331,56 @@
 					 struct event_trigger_ops *ops,
 					 struct event_trigger_data *data,
 					 struct trace_event_file *file);
+	void			(*unreg_all)(struct trace_event_file *file);
 	int			(*set_filter)(char *filter_str,
 					      struct event_trigger_data *data,
 					      struct trace_event_file *file);
 	struct event_trigger_ops *(*get_trigger_ops)(char *cmd, char *param);
 };
 
+/**
+ * enum event_command_flags - flags for struct event_command
+ *
+ * @POST_TRIGGER: A flag that says whether or not this command needs
+ *	to have its action delayed until after the current event has
+ *	been closed.  Some triggers need to avoid being invoked while
+ *	an event is currently in the process of being logged, since
+ *	the trigger may itself log data into the trace buffer.  Thus
+ *	we make sure the current event is committed before invoking
+ *	those triggers.  To do that, the trigger invocation is split
+ *	in two - the first part checks the filter using the current
+ *	trace record; if a command has the @post_trigger flag set, it
+ *	sets a bit for itself in the return value, otherwise it
+ *	directly invokes the trigger.  Once all commands have been
+ *	either invoked or set their return flag, the current record is
+ *	either committed or discarded.  At that point, if any commands
+ *	have deferred their triggers, those commands are finally
+ *	invoked following the close of the current event.  In other
+ *	words, if the event_trigger_ops @func() probe implementation
+ *	itself logs to the trace buffer, this flag should be set,
+ *	otherwise it can be left unspecified.
+ *
+ * @NEEDS_REC: A flag that says whether or not this command needs
+ *	access to the trace record in order to perform its function,
+ *	regardless of whether or not it has a filter associated with
+ *	it (filters make a trigger require access to the trace record
+ *	but are not always present).
+ */
+enum event_command_flags {
+	EVENT_CMD_FL_POST_TRIGGER	= 1,
+	EVENT_CMD_FL_NEEDS_REC		= 2,
+};
+
+static inline bool event_command_post_trigger(struct event_command *cmd_ops)
+{
+	return cmd_ops->flags & EVENT_CMD_FL_POST_TRIGGER;
+}
+
+static inline bool event_command_needs_rec(struct event_command *cmd_ops)
+{
+	return cmd_ops->flags & EVENT_CMD_FL_NEEDS_REC;
+}
+
 extern int trace_event_enable_disable(struct trace_event_file *file,
 				      int enable, int soft_disable);
 extern int tracing_alloc_snapshot(void);
@@ -1365,8 +1427,13 @@
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 void init_ftrace_syscalls(void);
+const char *get_syscall_name(int syscall);
 #else
 static inline void init_ftrace_syscalls(void) { }
+static inline const char *get_syscall_name(int syscall)
+{
+	return NULL;
+}
 #endif
 
 #ifdef CONFIG_EVENT_TRACING

diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 6816302..b3f5051 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c

@@ -961,18 +961,6 @@
 	return FILTER_OTHER;
 }
 
-static bool is_function_field(struct ftrace_event_field *field)
-{
-	return field->filter_type == FILTER_TRACE_FN;
-}
-
-static bool is_string_field(struct ftrace_event_field *field)
-{
-	return field->filter_type == FILTER_DYN_STRING ||
-	       field->filter_type == FILTER_STATIC_STRING ||
-	       field->filter_type == FILTER_PTR_STRING;
-}
-
 static bool is_legal_op(struct ftrace_event_field *field, int op)
 {
 	if (is_string_field(field) &&

diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index b38f617..d67992f 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c

@@ -28,8 +28,7 @@
 static LIST_HEAD(trigger_commands);
 static DEFINE_MUTEX(trigger_cmd_mutex);
 
-static void
-trigger_data_free(struct event_trigger_data *data)
+void trigger_data_free(struct event_trigger_data *data)
 {
 	if (data->cmd_ops->set_filter)
 		data->cmd_ops->set_filter(NULL, data, NULL);
@@ -73,18 +72,20 @@
 		return tt;
 
 	list_for_each_entry_rcu(data, &file->triggers, list) {
+		if (data->paused)
+			continue;
 		if (!rec) {
-			data->ops->func(data);
+			data->ops->func(data, rec);
 			continue;
 		}
 		filter = rcu_dereference_sched(data->filter);
 		if (filter && !filter_match_preds(filter, rec))
 			continue;
-		if (data->cmd_ops->post_trigger) {
+		if (event_command_post_trigger(data->cmd_ops)) {
 			tt |= data->cmd_ops->trigger_type;
 			continue;
 		}
-		data->ops->func(data);
+		data->ops->func(data, rec);
 	}
 	return tt;
 }
@@ -94,6 +95,7 @@
  * event_triggers_post_call - Call 'post_triggers' for a trace event
  * @file: The trace_event_file associated with the event
  * @tt: enum event_trigger_type containing a set bit for each trigger to invoke
+ * @rec: The trace entry for the event
  *
  * For each trigger associated with an event, invoke the trigger
  * function registered with the associated trigger command, if the
@@ -104,13 +106,16 @@
  */
 void
 event_triggers_post_call(struct trace_event_file *file,
-			 enum event_trigger_type tt)
+			 enum event_trigger_type tt,
+			 void *rec)
 {
 	struct event_trigger_data *data;
 
 	list_for_each_entry_rcu(data, &file->triggers, list) {
+		if (data->paused)
+			continue;
 		if (data->cmd_ops->trigger_type & tt)
-			data->ops->func(data);
+			data->ops->func(data, rec);
 	}
 }
 EXPORT_SYMBOL_GPL(event_triggers_post_call);
@@ -188,6 +193,19 @@
 		return -ENODEV;
 	}
 
+	if ((file->f_mode & FMODE_WRITE) &&
+	    (file->f_flags & O_TRUNC)) {
+		struct trace_event_file *event_file;
+		struct event_command *p;
+
+		event_file = event_file_data(file);
+
+		list_for_each_entry(p, &trigger_commands, list) {
+			if (p->unreg_all)
+				p->unreg_all(event_file);
+		}
+	}
+
 	if (file->f_mode & FMODE_READ) {
 		ret = seq_open(file, &event_triggers_seq_ops);
 		if (!ret) {
@@ -306,7 +324,7 @@
  * Currently we only register event commands from __init, so mark this
  * __init too.
  */
-static __init int register_event_command(struct event_command *cmd)
+__init int register_event_command(struct event_command *cmd)
 {
 	struct event_command *p;
 	int ret = 0;
@@ -395,9 +413,8 @@
  *
  * Return: 0 on success, errno otherwise
  */
-static int
-event_trigger_init(struct event_trigger_ops *ops,
-		   struct event_trigger_data *data)
+int event_trigger_init(struct event_trigger_ops *ops,
+		       struct event_trigger_data *data)
 {
 	data->ref++;
 	return 0;
@@ -425,8 +442,8 @@
 		trigger_data_free(data);
 }
 
-static int trace_event_trigger_enable_disable(struct trace_event_file *file,
-					      int trigger_enable)
+int trace_event_trigger_enable_disable(struct trace_event_file *file,
+				       int trigger_enable)
 {
 	int ret = 0;
 
@@ -483,13 +500,14 @@
  * its TRIGGER_COND bit set, otherwise the TRIGGER_COND bit should be
  * cleared.
  */
-static void update_cond_flag(struct trace_event_file *file)
+void update_cond_flag(struct trace_event_file *file)
 {
 	struct event_trigger_data *data;
 	bool set_cond = false;
 
 	list_for_each_entry_rcu(data, &file->triggers, list) {
-		if (data->filter || data->cmd_ops->post_trigger) {
+		if (data->filter || event_command_post_trigger(data->cmd_ops) ||
+		    event_command_needs_rec(data->cmd_ops)) {
 			set_cond = true;
 			break;
 		}
@@ -560,9 +578,9 @@
  * Usually used directly as the @unreg method in event command
  * implementations.
  */
-static void unregister_trigger(char *glob, struct event_trigger_ops *ops,
-			       struct event_trigger_data *test,
-			       struct trace_event_file *file)
+void unregister_trigger(char *glob, struct event_trigger_ops *ops,
+			struct event_trigger_data *test,
+			struct trace_event_file *file)
 {
 	struct event_trigger_data *data;
 	bool unregistered = false;
@@ -696,9 +714,9 @@
  *
  * Return: 0 on success, errno otherwise
  */
-static int set_trigger_filter(char *filter_str,
-			      struct event_trigger_data *trigger_data,
-			      struct trace_event_file *file)
+int set_trigger_filter(char *filter_str,
+		       struct event_trigger_data *trigger_data,
+		       struct trace_event_file *file)
 {
 	struct event_trigger_data *data = trigger_data;
 	struct event_filter *filter = NULL, *tmp;
@@ -747,7 +765,7 @@
 }
 
 static void
-traceon_trigger(struct event_trigger_data *data)
+traceon_trigger(struct event_trigger_data *data, void *rec)
 {
 	if (tracing_is_on())
 		return;
@@ -756,7 +774,7 @@
 }
 
 static void
-traceon_count_trigger(struct event_trigger_data *data)
+traceon_count_trigger(struct event_trigger_data *data, void *rec)
 {
 	if (tracing_is_on())
 		return;
@@ -771,7 +789,7 @@
 }
 
 static void
-traceoff_trigger(struct event_trigger_data *data)
+traceoff_trigger(struct event_trigger_data *data, void *rec)
 {
 	if (!tracing_is_on())
 		return;
@@ -780,7 +798,7 @@
 }
 
 static void
-traceoff_count_trigger(struct event_trigger_data *data)
+traceoff_count_trigger(struct event_trigger_data *data, void *rec)
 {
 	if (!tracing_is_on())
 		return;
@@ -876,13 +894,13 @@
 
 #ifdef CONFIG_TRACER_SNAPSHOT
 static void
-snapshot_trigger(struct event_trigger_data *data)
+snapshot_trigger(struct event_trigger_data *data, void *rec)
 {
 	tracing_snapshot();
 }
 
 static void
-snapshot_count_trigger(struct event_trigger_data *data)
+snapshot_count_trigger(struct event_trigger_data *data, void *rec)
 {
 	if (!data->count)
 		return;
@@ -890,7 +908,7 @@
 	if (data->count != -1)
 		(data->count)--;
 
-	snapshot_trigger(data);
+	snapshot_trigger(data, rec);
 }
 
 static int
@@ -969,13 +987,13 @@
 #define STACK_SKIP 3
 
 static void
-stacktrace_trigger(struct event_trigger_data *data)
+stacktrace_trigger(struct event_trigger_data *data, void *rec)
 {
 	trace_dump_stack(STACK_SKIP);
 }
 
 static void
-stacktrace_count_trigger(struct event_trigger_data *data)
+stacktrace_count_trigger(struct event_trigger_data *data, void *rec)
 {
 	if (!data->count)
 		return;
@@ -983,7 +1001,7 @@
 	if (data->count != -1)
 		(data->count)--;
 
-	stacktrace_trigger(data);
+	stacktrace_trigger(data, rec);
 }
 
 static int
@@ -1017,7 +1035,7 @@
 static struct event_command trigger_stacktrace_cmd = {
 	.name			= "stacktrace",
 	.trigger_type		= ETT_STACKTRACE,
-	.post_trigger		= true,
+	.flags			= EVENT_CMD_FL_POST_TRIGGER,
 	.func			= event_trigger_callback,
 	.reg			= register_trigger,
 	.unreg			= unregister_trigger,
@@ -1054,7 +1072,7 @@
 };
 
 static void
-event_enable_trigger(struct event_trigger_data *data)
+event_enable_trigger(struct event_trigger_data *data, void *rec)
 {
 	struct enable_trigger_data *enable_data = data->private_data;
 
@@ -1065,7 +1083,7 @@
 }
 
 static void
-event_enable_count_trigger(struct event_trigger_data *data)
+event_enable_count_trigger(struct event_trigger_data *data, void *rec)
 {
 	struct enable_trigger_data *enable_data = data->private_data;
 
@@ -1079,7 +1097,7 @@
 	if (data->count != -1)
 		(data->count)--;
 
-	event_enable_trigger(data);
+	event_enable_trigger(data, rec);
 }
 
 static int

diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index fcd41a1..5a095c2 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c

@@ -219,6 +219,8 @@
 	unregister_ftrace_function(tr->ops);
 }
 
+static struct tracer function_trace;
+
 static int
 func_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 {
@@ -228,6 +230,10 @@
 		if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK))
 			break;
 
+		/* We can change this flag when not running. */
+		if (tr->current_trace != &function_trace)
+			break;
+
 		unregister_ftrace_function(tr->ops);
 
 		if (set) {

diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index a663cbb..3a0244f 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c

@@ -8,6 +8,7 @@
  */
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
+#include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
 
@@ -1350,7 +1351,7 @@
  out_err_free:
 	kfree(data);
  out_err:
-	pr_warning("function graph tracer: not enough memory\n");
+	pr_warn("function graph tracer: not enough memory\n");
 }
 
 void graph_trace_close(struct trace_iterator *iter)
@@ -1468,12 +1469,12 @@
 	max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
 
 	if (!register_trace_event(&graph_trace_entry_event)) {
-		pr_warning("Warning: could not register graph trace events\n");
+		pr_warn("Warning: could not register graph trace events\n");
 		return 1;
 	}
 
 	if (!register_trace_event(&graph_trace_ret_event)) {
-		pr_warning("Warning: could not register graph trace events\n");
+		pr_warn("Warning: could not register graph trace events\n");
 		return 1;
 	}
 

diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index e4e5658..03cdff8 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c

@@ -109,8 +109,12 @@
 		return 0;
 
 	local_save_flags(*flags);
-	/* slight chance to get a false positive on tracing_cpu */
-	if (!irqs_disabled_flags(*flags))
+	/*
+	 * Slight chance to get a false positive on tracing_cpu,
+	 * although I'm starting to think there isn't a chance.
+	 * Leave this for now just to be paranoid.
+	 */
+	if (!irqs_disabled_flags(*flags) && !preempt_count())
 		return 0;
 
 	*data = per_cpu_ptr(tr->trace_buffer.data, cpu);
@@ -622,7 +626,6 @@
 	irqsoff_trace = tr;
 	/* make sure that the tracer is visible */
 	smp_wmb();
-	tracing_reset_online_cpus(&tr->trace_buffer);
 
 	ftrace_init_array_ops(tr, irqsoff_tracer_call);
 

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 21b81a4..919e0dd 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c

@@ -459,16 +459,14 @@
 	if (ret == 0)
 		tk->tp.flags |= TP_FLAG_REGISTERED;
 	else {
-		pr_warning("Could not insert probe at %s+%lu: %d\n",
-			   trace_kprobe_symbol(tk), trace_kprobe_offset(tk), ret);
+		pr_warn("Could not insert probe at %s+%lu: %d\n",
+			trace_kprobe_symbol(tk), trace_kprobe_offset(tk), ret);
 		if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) {
-			pr_warning("This probe might be able to register after"
-				   "target module is loaded. Continue.\n");
+			pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
 			ret = 0;
 		} else if (ret == -EILSEQ) {
-			pr_warning("Probing address(0x%p) is not an "
-				   "instruction boundary.\n",
-				   tk->rp.kp.addr);
+			pr_warn("Probing address(0x%p) is not an instruction boundary.\n",
+				tk->rp.kp.addr);
 			ret = -EINVAL;
 		}
 	}
@@ -529,7 +527,7 @@
 	/* Register new event */
 	ret = register_kprobe_event(tk);
 	if (ret) {
-		pr_warning("Failed to register probe event(%d)\n", ret);
+		pr_warn("Failed to register probe event(%d)\n", ret);
 		goto end;
 	}
 
@@ -564,10 +562,9 @@
 			__unregister_trace_kprobe(tk);
 			ret = __register_trace_kprobe(tk);
 			if (ret)
-				pr_warning("Failed to re-register probe %s on"
-					   "%s: %d\n",
-					   trace_event_name(&tk->tp.call),
-					   mod->name, ret);
+				pr_warn("Failed to re-register probe %s on %s: %d\n",
+					trace_event_name(&tk->tp.call),
+					mod->name, ret);
 		}
 	}
 	mutex_unlock(&probe_lock);
@@ -1336,16 +1333,14 @@
 
 	/* Event list interface */
 	if (!entry)
-		pr_warning("Could not create tracefs "
-			   "'kprobe_events' entry\n");
+		pr_warn("Could not create tracefs 'kprobe_events' entry\n");
 
 	/* Profile interface */
 	entry = tracefs_create_file("kprobe_profile", 0444, d_tracer,
 				    NULL, &kprobe_profile_ops);
 
 	if (!entry)
-		pr_warning("Could not create tracefs "
-			   "'kprobe_profile' entry\n");
+		pr_warn("Could not create tracefs 'kprobe_profile' entry\n");
 	return 0;
 }
 fs_initcall(init_kprobe_trace);

diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 2be8c4f..68f376c 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c

@@ -146,7 +146,7 @@
 		/* XXX: This is later than where events were lost. */
 		trace_seq_printf(s, "MARK 0.000000 Lost %lu events.\n", n);
 		if (!overrun_detected)
-			pr_warning("mmiotrace has lost events.\n");
+			pr_warn("mmiotrace has lost events\n");
 		overrun_detected = true;
 		goto print_out;
 	}

diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 8bb2071..49f61fe9 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c

@@ -56,7 +56,7 @@
 }
 
 /* It only serves as a signal handler and a callback to
- * accept or refuse tthe setting of a flag.
+ * accept or refuse the setting of a flag.
  * If you don't implement it, then the flag setting will be
  * automatically accepted.
  */
@@ -75,7 +75,7 @@
 
 	if (bit == TRACE_NOP_OPT_REFUSE) {
 		printk(KERN_DEBUG "nop_test_refuse flag set to %d: we refuse."
-			"Now cat trace_options to see the result\n",
+			" Now cat trace_options to see the result\n",
 			set);
 		return -EINVAL;
 	}

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 2829821..0bb9cf2 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c

@@ -389,7 +389,9 @@
 	char irqs_off;
 	int hardirq;
 	int softirq;
+	int nmi;
 
+	nmi = entry->flags & TRACE_FLAG_NMI;
 	hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
 	softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
 
@@ -415,10 +417,12 @@
 	}
 
 	hardsoft_irq =
+		(nmi && hardirq)     ? 'Z' :
+		nmi                  ? 'z' :
 		(hardirq && softirq) ? 'H' :
-		hardirq ? 'h' :
-		softirq ? 's' :
-		'.';
+		hardirq              ? 'h' :
+		softirq              ? 's' :
+		                       '.' ;
 
 	trace_seq_printf(s, "%c%c%c",
 			 irqs_off, need_resched, hardsoft_irq);

diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 060df67..f96f038 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c

@@ -296,6 +296,9 @@
 	const char *str = *fmt;
 	int i;
 
+	if (!*fmt)
+		return 0;
+
 	seq_printf(m, "0x%lx : \"", *(unsigned long *)fmt);
 
 	/*

diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 1769a81..1d372fa 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c

@@ -636,8 +636,8 @@
 			*tmp = '\0';
 			size = tmp - kbuf + 1;
 		} else if (done + size < count) {
-			pr_warning("Line length is too long: "
-				   "Should be less than %d.", WRITE_BUFSIZE);
+			pr_warn("Line length is too long: Should be less than %d\n",
+				WRITE_BUFSIZE);
 			ret = -EINVAL;
 			goto out;
 		}

diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index 6cf9353..413ff10 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c

@@ -281,8 +281,7 @@
 
 	stat_dir = tracefs_create_dir("trace_stat", d_tracing);
 	if (!stat_dir)
-		pr_warning("Could not create tracefs "
-			   "'trace_stat' entry\n");
+		pr_warn("Could not create tracefs 'trace_stat' entry\n");
 	return 0;
 }
 

diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index d166308..e78f364 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c

@@ -106,6 +106,17 @@
 	return syscalls_metadata[nr];
 }
 
+const char *get_syscall_name(int syscall)
+{
+	struct syscall_metadata *entry;
+
+	entry = syscall_nr_to_meta(syscall);
+	if (!entry)
+		return NULL;
+
+	return entry->name;
+}
+
 static enum print_line_t
 print_syscall_enter(struct trace_iterator *iter, int flags,
 		    struct trace_event *event)

diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index d2f6d0b..7915142 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c

@@ -334,7 +334,7 @@
 
 	ret = register_uprobe_event(tu);
 	if (ret) {
-		pr_warning("Failed to register probe event(%d)\n", ret);
+		pr_warn("Failed to register probe event(%d)\n", ret);
 		goto end;
 	}
 

diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index ecd536d..d0639d9 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c

@@ -491,7 +491,7 @@
 
 	ret = register_module_notifier(&tracepoint_module_nb);
 	if (ret)
-		pr_warning("Failed to register tracepoint module enter notifier\n");
+		pr_warn("Failed to register tracepoint module enter notifier\n");
 
 	return ret;
 }

diff --git a/lib/Kconfig b/lib/Kconfig
index 133ebc0..3cca122 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig

@@ -536,4 +536,8 @@
 config ARCH_HAS_MMIO_FLUSH
 	bool
 
+config STACKDEPOT
+	bool
+	select STACKTRACE
+
 endmenu

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 5a60f45..1e9a607 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug

@@ -558,7 +558,7 @@
 
 config DEBUG_STACK_USAGE
 	bool "Stack utilization instrumentation"
-	depends on DEBUG_KERNEL && !IA64 && !PARISC && !METAG
+	depends on DEBUG_KERNEL && !IA64
 	help
 	  Enables the display of the minimum amount of free stack which each
 	  task has ever had available in the sysrq-T and sysrq-P debug output.
@@ -696,6 +696,27 @@
 
 endmenu # "Memory Debugging"
 
+config ARCH_HAS_KCOV
+	bool
+	help
+	  KCOV does not have any arch-specific code, but currently it is enabled
+	  only for x86_64. KCOV requires testing on other archs, and most likely
+	  disabling of instrumentation for some early boot code.
+
+config KCOV
+	bool "Code coverage for fuzzing"
+	depends on ARCH_HAS_KCOV
+	select DEBUG_FS
+	help
+	  KCOV exposes kernel code coverage information in a form suitable
+	  for coverage-guided fuzzing (randomized testing).
+
+	  If RANDOMIZE_BASE is enabled, PC values will not be stable across
+	  different machines and across reboots. If you need stable PC values,
+	  disable RANDOMIZE_BASE.
+
+	  For more details, see Documentation/kcov.txt.
+
 config DEBUG_SHIRQ
 	bool "Debug shared IRQ handlers"
 	depends on DEBUG_KERNEL

diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 0fee5ac..67d8c68 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan

@@ -5,8 +5,9 @@
 
 config KASAN
 	bool "KASan: runtime memory debugger"
-	depends on SLUB_DEBUG
+	depends on SLUB_DEBUG || (SLAB && !DEBUG_SLAB)
 	select CONSTRUCTORS
+	select STACKDEPOT if SLAB
 	help
 	  Enables kernel address sanitizer - runtime memory debugger,
 	  designed to find out-of-bounds accesses and use-after-free bugs.
@@ -16,6 +17,8 @@
 	  This feature consumes about 1/8 of available memory and brings about
 	  ~x3 performance slowdown.
 	  For better error detection enable CONFIG_STACKTRACE.
+	  Currently CONFIG_KASAN doesn't work with CONFIG_DEBUG_SLAB
+	  (the resulting kernel does not boot).
 
 choice
 	prompt "Instrumentation type"

diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index e07c1ba..39494af 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan

@@ -13,6 +13,11 @@
 	bool "Enable instrumentation for the entire kernel"
 	depends on UBSAN
 	depends on ARCH_HAS_UBSAN_SANITIZE_ALL
+
+	# We build with -Wno-maybe-uninitilzed, but we still want to
+	# use -Wmaybe-uninitilized in allmodconfig builds.
+	# So dependsy bellow used to disable this option in allmodconfig
+	depends on !COMPILE_TEST
 	default y
 	help
 	  This option activates instrumentation for the entire kernel.

diff --git a/lib/Makefile b/lib/Makefile
index 4962d14c..7bd6fd4 100644
--- a/lib/Makefile
+++ b/lib/Makefile

@@ -7,6 +7,18 @@
 KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
 endif
 
+# These files are disabled because they produce lots of non-interesting and/or
+# flaky coverage that is not a function of syscall inputs. For example,
+# rbtree can be global and individual rotations don't correlate with inputs.
+KCOV_INSTRUMENT_string.o := n
+KCOV_INSTRUMENT_rbtree.o := n
+KCOV_INSTRUMENT_list_debug.o := n
+KCOV_INSTRUMENT_debugobjects.o := n
+KCOV_INSTRUMENT_dynamic_debug.o := n
+# Kernel does not boot if we instrument this file as it uses custom calling
+# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS).
+KCOV_INSTRUMENT_hweight.o := n
+
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
 	 idr.o int_sqrt.o extable.o \
@@ -169,6 +181,9 @@
 obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
 obj-$(CONFIG_IRQ_POLL) += irq_poll.o
 
+obj-$(CONFIG_STACKDEPOT) += stackdepot.o
+KASAN_SANITIZE_stackdepot.o := n
+
 libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
 	       fdt_empty_tree.o
 $(foreach file, $(libfdt_files), \

diff --git a/lib/stackdepot.c b/lib/stackdepot.c
new file mode 100644
index 0000000..654c9d8
--- /dev/null
+++ b/lib/stackdepot.c

@@ -0,0 +1,284 @@
+/*
+ * Generic stack depot for storing stack traces.
+ *
+ * Some debugging tools need to save stack traces of certain events which can
+ * be later presented to the user. For example, KASAN needs to safe alloc and
+ * free stacks for each object, but storing two stack traces per object
+ * requires too much memory (e.g. SLUB_DEBUG needs 256 bytes per object for
+ * that).
+ *
+ * Instead, stack depot maintains a hashtable of unique stacktraces. Since alloc
+ * and free stacks repeat a lot, we save about 100x space.
+ * Stacks are never removed from depot, so we store them contiguously one after
+ * another in a contiguos memory allocation.
+ *
+ * Author: Alexander Potapenko <glider@google.com>
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * Based on code by Dmitry Chernenkov.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include <linux/gfp.h>
+#include <linux/jhash.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/percpu.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/stacktrace.h>
+#include <linux/stackdepot.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8)
+
+#define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */
+#define STACK_ALLOC_SIZE (1LL << (PAGE_SHIFT + STACK_ALLOC_ORDER))
+#define STACK_ALLOC_ALIGN 4
+#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \
+					STACK_ALLOC_ALIGN)
+#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - STACK_ALLOC_OFFSET_BITS)
+#define STACK_ALLOC_SLABS_CAP 1024
+#define STACK_ALLOC_MAX_SLABS \
+	(((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \
+	 (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP)
+
+/* The compact structure to store the reference to stacks. */
+union handle_parts {
+	depot_stack_handle_t handle;
+	struct {
+		u32 slabindex : STACK_ALLOC_INDEX_BITS;
+		u32 offset : STACK_ALLOC_OFFSET_BITS;
+	};
+};
+
+struct stack_record {
+	struct stack_record *next;	/* Link in the hashtable */
+	u32 hash;			/* Hash in the hastable */
+	u32 size;			/* Number of frames in the stack */
+	union handle_parts handle;
+	unsigned long entries[1];	/* Variable-sized array of entries. */
+};
+
+static void *stack_slabs[STACK_ALLOC_MAX_SLABS];
+
+static int depot_index;
+static int next_slab_inited;
+static size_t depot_offset;
+static DEFINE_SPINLOCK(depot_lock);
+
+static bool init_stack_slab(void **prealloc)
+{
+	if (!*prealloc)
+		return false;
+	/*
+	 * This smp_load_acquire() pairs with smp_store_release() to
+	 * |next_slab_inited| below and in depot_alloc_stack().
+	 */
+	if (smp_load_acquire(&next_slab_inited))
+		return true;
+	if (stack_slabs[depot_index] == NULL) {
+		stack_slabs[depot_index] = *prealloc;
+	} else {
+		stack_slabs[depot_index + 1] = *prealloc;
+		/*
+		 * This smp_store_release pairs with smp_load_acquire() from
+		 * |next_slab_inited| above and in depot_save_stack().
+		 */
+		smp_store_release(&next_slab_inited, 1);
+	}
+	*prealloc = NULL;
+	return true;
+}
+
+/* Allocation of a new stack in raw storage */
+static struct stack_record *depot_alloc_stack(unsigned long *entries, int size,
+		u32 hash, void **prealloc, gfp_t alloc_flags)
+{
+	int required_size = offsetof(struct stack_record, entries) +
+		sizeof(unsigned long) * size;
+	struct stack_record *stack;
+
+	required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN);
+
+	if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) {
+		if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) {
+			WARN_ONCE(1, "Stack depot reached limit capacity");
+			return NULL;
+		}
+		depot_index++;
+		depot_offset = 0;
+		/*
+		 * smp_store_release() here pairs with smp_load_acquire() from
+		 * |next_slab_inited| in depot_save_stack() and
+		 * init_stack_slab().
+		 */
+		if (depot_index + 1 < STACK_ALLOC_MAX_SLABS)
+			smp_store_release(&next_slab_inited, 0);
+	}
+	init_stack_slab(prealloc);
+	if (stack_slabs[depot_index] == NULL)
+		return NULL;
+
+	stack = stack_slabs[depot_index] + depot_offset;
+
+	stack->hash = hash;
+	stack->size = size;
+	stack->handle.slabindex = depot_index;
+	stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN;
+	memcpy(stack->entries, entries, size * sizeof(unsigned long));
+	depot_offset += required_size;
+
+	return stack;
+}
+
+#define STACK_HASH_ORDER 20
+#define STACK_HASH_SIZE (1L << STACK_HASH_ORDER)
+#define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
+#define STACK_HASH_SEED 0x9747b28c
+
+static struct stack_record *stack_table[STACK_HASH_SIZE] = {
+	[0 ...	STACK_HASH_SIZE - 1] = NULL
+};
+
+/* Calculate hash for a stack */
+static inline u32 hash_stack(unsigned long *entries, unsigned int size)
+{
+	return jhash2((u32 *)entries,
+			       size * sizeof(unsigned long) / sizeof(u32),
+			       STACK_HASH_SEED);
+}
+
+/* Find a stack that is equal to the one stored in entries in the hash */
+static inline struct stack_record *find_stack(struct stack_record *bucket,
+					     unsigned long *entries, int size,
+					     u32 hash)
+{
+	struct stack_record *found;
+
+	for (found = bucket; found; found = found->next) {
+		if (found->hash == hash &&
+		    found->size == size &&
+		    !memcmp(entries, found->entries,
+			    size * sizeof(unsigned long))) {
+			return found;
+		}
+	}
+	return NULL;
+}
+
+void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace)
+{
+	union handle_parts parts = { .handle = handle };
+	void *slab = stack_slabs[parts.slabindex];
+	size_t offset = parts.offset << STACK_ALLOC_ALIGN;
+	struct stack_record *stack = slab + offset;
+
+	trace->nr_entries = trace->max_entries = stack->size;
+	trace->entries = stack->entries;
+	trace->skip = 0;
+}
+
+/**
+ * depot_save_stack - save stack in a stack depot.
+ * @trace - the stacktrace to save.
+ * @alloc_flags - flags for allocating additional memory if required.
+ *
+ * Returns the handle of the stack struct stored in depot.
+ */
+depot_stack_handle_t depot_save_stack(struct stack_trace *trace,
+				    gfp_t alloc_flags)
+{
+	u32 hash;
+	depot_stack_handle_t retval = 0;
+	struct stack_record *found = NULL, **bucket;
+	unsigned long flags;
+	struct page *page = NULL;
+	void *prealloc = NULL;
+
+	if (unlikely(trace->nr_entries == 0))
+		goto fast_exit;
+
+	hash = hash_stack(trace->entries, trace->nr_entries);
+	/* Bad luck, we won't store this stack. */
+	if (hash == 0)
+		goto exit;
+
+	bucket = &stack_table[hash & STACK_HASH_MASK];
+
+	/*
+	 * Fast path: look the stack trace up without locking.
+	 * The smp_load_acquire() here pairs with smp_store_release() to
+	 * |bucket| below.
+	 */
+	found = find_stack(smp_load_acquire(bucket), trace->entries,
+			   trace->nr_entries, hash);
+	if (found)
+		goto exit;
+
+	/*
+	 * Check if the current or the next stack slab need to be initialized.
+	 * If so, allocate the memory - we won't be able to do that under the
+	 * lock.
+	 *
+	 * The smp_load_acquire() here pairs with smp_store_release() to
+	 * |next_slab_inited| in depot_alloc_stack() and init_stack_slab().
+	 */
+	if (unlikely(!smp_load_acquire(&next_slab_inited))) {
+		/*
+		 * Zero out zone modifiers, as we don't have specific zone
+		 * requirements. Keep the flags related to allocation in atomic
+		 * contexts and I/O.
+		 */
+		alloc_flags &= ~GFP_ZONEMASK;
+		alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
+		page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER);
+		if (page)
+			prealloc = page_address(page);
+	}
+
+	spin_lock_irqsave(&depot_lock, flags);
+
+	found = find_stack(*bucket, trace->entries, trace->nr_entries, hash);
+	if (!found) {
+		struct stack_record *new =
+			depot_alloc_stack(trace->entries, trace->nr_entries,
+					  hash, &prealloc, alloc_flags);
+		if (new) {
+			new->next = *bucket;
+			/*
+			 * This smp_store_release() pairs with
+			 * smp_load_acquire() from |bucket| above.
+			 */
+			smp_store_release(bucket, new);
+			found = new;
+		}
+	} else if (prealloc) {
+		/*
+		 * We didn't need to store this stack trace, but let's keep
+		 * the preallocated memory for the future.
+		 */
+		WARN_ON(!init_stack_slab(&prealloc));
+	}
+
+	spin_unlock_irqrestore(&depot_lock, flags);
+exit:
+	if (prealloc) {
+		/* Nobody used this memory, ok to free it. */
+		free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER);
+	}
+	if (found)
+		retval = found->handle.handle;
+fast_exit:
+	return retval;
+}

diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index c32f3b0..82169fb 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c

@@ -65,11 +65,34 @@
 	kfree(ptr);
 }
 
-static noinline void __init kmalloc_large_oob_right(void)
+#ifdef CONFIG_SLUB
+static noinline void __init kmalloc_pagealloc_oob_right(void)
 {
 	char *ptr;
 	size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
 
+	/* Allocate a chunk that does not fit into a SLUB cache to trigger
+	 * the page allocator fallback.
+	 */
+	pr_info("kmalloc pagealloc allocation: out-of-bounds to right\n");
+	ptr = kmalloc(size, GFP_KERNEL);
+	if (!ptr) {
+		pr_err("Allocation failed\n");
+		return;
+	}
+
+	ptr[size] = 0;
+	kfree(ptr);
+}
+#endif
+
+static noinline void __init kmalloc_large_oob_right(void)
+{
+	char *ptr;
+	size_t size = KMALLOC_MAX_CACHE_SIZE - 256;
+	/* Allocate a chunk that is large enough, but still fits into a slab
+	 * and does not trigger the page allocator fallback in SLUB.
+	 */
 	pr_info("kmalloc large allocation: out-of-bounds to right\n");
 	ptr = kmalloc(size, GFP_KERNEL);
 	if (!ptr) {
@@ -271,6 +294,8 @@
 	}
 
 	ptr1[40] = 'x';
+	if (ptr1 == ptr2)
+		pr_err("Could not detect use-after-free: ptr1 == ptr2\n");
 	kfree(ptr2);
 }
 
@@ -324,6 +349,9 @@
 	kmalloc_oob_right();
 	kmalloc_oob_left();
 	kmalloc_node_oob_right();
+#ifdef CONFIG_SLUB
+	kmalloc_pagealloc_oob_right();
+#endif
 	kmalloc_large_oob_right();
 	kmalloc_oob_krealloc_more();
 	kmalloc_oob_krealloc_less();

diff --git a/mm/Makefile b/mm/Makefile
index 6da300a..deb467e 100644
--- a/mm/Makefile
+++ b/mm/Makefile

@@ -3,8 +3,24 @@
 #
 
 KASAN_SANITIZE_slab_common.o := n
+KASAN_SANITIZE_slab.o := n
 KASAN_SANITIZE_slub.o := n
 
+# These files are disabled because they produce non-interesting and/or
+# flaky coverage that is not a function of syscall inputs. E.g. slab is out of
+# free pages, or a task is migrated between nodes.
+KCOV_INSTRUMENT_slab_common.o := n
+KCOV_INSTRUMENT_slob.o := n
+KCOV_INSTRUMENT_slab.o := n
+KCOV_INSTRUMENT_slub.o := n
+KCOV_INSTRUMENT_page_alloc.o := n
+KCOV_INSTRUMENT_debug-pagealloc.o := n
+KCOV_INSTRUMENT_kmemleak.o := n
+KCOV_INSTRUMENT_kmemcheck.o := n
+KCOV_INSTRUMENT_memcontrol.o := n
+KCOV_INSTRUMENT_mmzone.o := n
+KCOV_INSTRUMENT_vmstat.o := n
+
 mmu-y			:= nommu.o
 mmu-$(CONFIG_MMU)	:= gup.o highmem.o memory.o mincore.o \
 			   mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \

diff --git a/mm/fadvise.c b/mm/fadvise.c
index b8a5bc6..b8024fa 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c

@@ -97,8 +97,8 @@
 		break;
 	case POSIX_FADV_WILLNEED:
 		/* First and last PARTIAL page! */
-		start_index = offset >> PAGE_CACHE_SHIFT;
-		end_index = endbyte >> PAGE_CACHE_SHIFT;
+		start_index = offset >> PAGE_SHIFT;
+		end_index = endbyte >> PAGE_SHIFT;
 
 		/* Careful about overflow on the "+1" */
 		nrpages = end_index - start_index + 1;
@@ -124,8 +124,8 @@
 		 * preserved on the expectation that it is better to preserve
 		 * needed memory than to discard unneeded memory.
 		 */
-		start_index = (offset+(PAGE_CACHE_SIZE-1)) >> PAGE_CACHE_SHIFT;
-		end_index = (endbyte >> PAGE_CACHE_SHIFT);
+		start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
+		end_index = (endbyte >> PAGE_SHIFT);
 
 		if (end_index >= start_index) {
 			unsigned long count = invalidate_mapping_pages(mapping,

diff --git a/mm/filemap.c b/mm/filemap.c
index 7c00f10..f2479af 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c

@@ -265,7 +265,7 @@
 
 	if (freepage)
 		freepage(page);
-	page_cache_release(page);
+	put_page(page);
 }
 EXPORT_SYMBOL(delete_from_page_cache);
 
@@ -352,8 +352,8 @@
 static int __filemap_fdatawait_range(struct address_space *mapping,
 				     loff_t start_byte, loff_t end_byte)
 {
-	pgoff_t index = start_byte >> PAGE_CACHE_SHIFT;
-	pgoff_t end = end_byte >> PAGE_CACHE_SHIFT;
+	pgoff_t index = start_byte >> PAGE_SHIFT;
+	pgoff_t end = end_byte >> PAGE_SHIFT;
 	struct pagevec pvec;
 	int nr_pages;
 	int ret = 0;
@@ -550,7 +550,7 @@
 		pgoff_t offset = old->index;
 		freepage = mapping->a_ops->freepage;
 
-		page_cache_get(new);
+		get_page(new);
 		new->mapping = mapping;
 		new->index = offset;
 
@@ -572,7 +572,7 @@
 		radix_tree_preload_end();
 		if (freepage)
 			freepage(old);
-		page_cache_release(old);
+		put_page(old);
 	}
 
 	return error;
@@ -651,7 +651,7 @@
 		return error;
 	}
 
-	page_cache_get(page);
+	get_page(page);
 	page->mapping = mapping;
 	page->index = offset;
 
@@ -675,7 +675,7 @@
 	spin_unlock_irq(&mapping->tree_lock);
 	if (!huge)
 		mem_cgroup_cancel_charge(page, memcg, false);
-	page_cache_release(page);
+	put_page(page);
 	return error;
 }
 
@@ -1083,7 +1083,7 @@
 		 * include/linux/pagemap.h for details.
 		 */
 		if (unlikely(page != *pagep)) {
-			page_cache_release(page);
+			put_page(page);
 			goto repeat;
 		}
 	}
@@ -1121,7 +1121,7 @@
 		/* Has the page been truncated? */
 		if (unlikely(page->mapping != mapping)) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			goto repeat;
 		}
 		VM_BUG_ON_PAGE(page->index != offset, page);
@@ -1168,7 +1168,7 @@
 	if (fgp_flags & FGP_LOCK) {
 		if (fgp_flags & FGP_NOWAIT) {
 			if (!trylock_page(page)) {
-				page_cache_release(page);
+				put_page(page);
 				return NULL;
 			}
 		} else {
@@ -1178,7 +1178,7 @@
 		/* Has the page been truncated? */
 		if (unlikely(page->mapping != mapping)) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			goto repeat;
 		}
 		VM_BUG_ON_PAGE(page->index != offset, page);
@@ -1209,7 +1209,7 @@
 		err = add_to_page_cache_lru(page, mapping, offset,
 				gfp_mask & GFP_RECLAIM_MASK);
 		if (unlikely(err)) {
-			page_cache_release(page);
+			put_page(page);
 			page = NULL;
 			if (err == -EEXIST)
 				goto repeat;
@@ -1278,7 +1278,7 @@
 
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			page_cache_release(page);
+			put_page(page);
 			goto repeat;
 		}
 export:
@@ -1343,7 +1343,7 @@
 
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			page_cache_release(page);
+			put_page(page);
 			goto repeat;
 		}
 
@@ -1405,7 +1405,7 @@
 
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			page_cache_release(page);
+			put_page(page);
 			goto repeat;
 		}
 
@@ -1415,7 +1415,7 @@
 		 * negatives, which is just confusing to the caller.
 		 */
 		if (page->mapping == NULL || page->index != iter.index) {
-			page_cache_release(page);
+			put_page(page);
 			break;
 		}
 
@@ -1482,7 +1482,7 @@
 
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			page_cache_release(page);
+			put_page(page);
 			goto repeat;
 		}
 
@@ -1549,7 +1549,7 @@
 
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			page_cache_release(page);
+			put_page(page);
 			goto repeat;
 		}
 export:
@@ -1610,11 +1610,11 @@
 	unsigned int prev_offset;
 	int error = 0;
 
-	index = *ppos >> PAGE_CACHE_SHIFT;
-	prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT;
-	prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1);
-	last_index = (*ppos + iter->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
-	offset = *ppos & ~PAGE_CACHE_MASK;
+	index = *ppos >> PAGE_SHIFT;
+	prev_index = ra->prev_pos >> PAGE_SHIFT;
+	prev_offset = ra->prev_pos & (PAGE_SIZE-1);
+	last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
+	offset = *ppos & ~PAGE_MASK;
 
 	for (;;) {
 		struct page *page;
@@ -1648,7 +1648,7 @@
 			if (PageUptodate(page))
 				goto page_ok;
 
-			if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
+			if (inode->i_blkbits == PAGE_SHIFT ||
 					!mapping->a_ops->is_partially_uptodate)
 				goto page_not_up_to_date;
 			if (!trylock_page(page))
@@ -1672,18 +1672,18 @@
 		 */
 
 		isize = i_size_read(inode);
-		end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+		end_index = (isize - 1) >> PAGE_SHIFT;
 		if (unlikely(!isize || index > end_index)) {
-			page_cache_release(page);
+			put_page(page);
 			goto out;
 		}
 
 		/* nr is the maximum number of bytes to copy from this page */
-		nr = PAGE_CACHE_SIZE;
+		nr = PAGE_SIZE;
 		if (index == end_index) {
-			nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
+			nr = ((isize - 1) & ~PAGE_MASK) + 1;
 			if (nr <= offset) {
-				page_cache_release(page);
+				put_page(page);
 				goto out;
 			}
 		}
@@ -1711,11 +1711,11 @@
 
 		ret = copy_page_to_iter(page, offset, nr, iter);
 		offset += ret;
-		index += offset >> PAGE_CACHE_SHIFT;
-		offset &= ~PAGE_CACHE_MASK;
+		index += offset >> PAGE_SHIFT;
+		offset &= ~PAGE_MASK;
 		prev_offset = offset;
 
-		page_cache_release(page);
+		put_page(page);
 		written += ret;
 		if (!iov_iter_count(iter))
 			goto out;
@@ -1735,7 +1735,7 @@
 		/* Did it get truncated before we got the lock? */
 		if (!page->mapping) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			continue;
 		}
 
@@ -1757,7 +1757,7 @@
 
 		if (unlikely(error)) {
 			if (error == AOP_TRUNCATED_PAGE) {
-				page_cache_release(page);
+				put_page(page);
 				error = 0;
 				goto find_page;
 			}
@@ -1774,7 +1774,7 @@
 					 * invalidate_mapping_pages got it
 					 */
 					unlock_page(page);
-					page_cache_release(page);
+					put_page(page);
 					goto find_page;
 				}
 				unlock_page(page);
@@ -1789,7 +1789,7 @@
 
 readpage_error:
 		/* UHHUH! A synchronous read error occurred. Report it */
-		page_cache_release(page);
+		put_page(page);
 		goto out;
 
 no_cached_page:
@@ -1805,7 +1805,7 @@
 		error = add_to_page_cache_lru(page, mapping, index,
 				mapping_gfp_constraint(mapping, GFP_KERNEL));
 		if (error) {
-			page_cache_release(page);
+			put_page(page);
 			if (error == -EEXIST) {
 				error = 0;
 				goto find_page;
@@ -1817,10 +1817,10 @@
 
 out:
 	ra->prev_pos = prev_index;
-	ra->prev_pos <<= PAGE_CACHE_SHIFT;
+	ra->prev_pos <<= PAGE_SHIFT;
 	ra->prev_pos |= prev_offset;
 
-	*ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;
+	*ppos = ((loff_t)index << PAGE_SHIFT) + offset;
 	file_accessed(filp);
 	return written ? written : error;
 }
@@ -1840,15 +1840,16 @@
 	ssize_t retval = 0;
 	loff_t *ppos = &iocb->ki_pos;
 	loff_t pos = *ppos;
+	size_t count = iov_iter_count(iter);
+
+	if (!count)
+		goto out; /* skip atime */
 
 	if (iocb->ki_flags & IOCB_DIRECT) {
 		struct address_space *mapping = file->f_mapping;
 		struct inode *inode = mapping->host;
-		size_t count = iov_iter_count(iter);
 		loff_t size;
 
-		if (!count)
-			goto out; /* skip atime */
 		size = i_size_read(inode);
 		retval = filemap_write_and_wait_range(mapping, pos,
 					pos + count - 1);
@@ -1911,7 +1912,7 @@
 		else if (ret == -EEXIST)
 			ret = 0; /* losing race to add is OK */
 
-		page_cache_release(page);
+		put_page(page);
 
 	} while (ret == AOP_TRUNCATED_PAGE);
 
@@ -2021,8 +2022,8 @@
 	loff_t size;
 	int ret = 0;
 
-	size = round_up(i_size_read(inode), PAGE_CACHE_SIZE);
-	if (offset >= size >> PAGE_CACHE_SHIFT)
+	size = round_up(i_size_read(inode), PAGE_SIZE);
+	if (offset >= size >> PAGE_SHIFT)
 		return VM_FAULT_SIGBUS;
 
 	/*
@@ -2048,7 +2049,7 @@
 	}
 
 	if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
-		page_cache_release(page);
+		put_page(page);
 		return ret | VM_FAULT_RETRY;
 	}
 
@@ -2071,10 +2072,10 @@
 	 * Found the page and have a reference on it.
 	 * We must recheck i_size under page lock.
 	 */
-	size = round_up(i_size_read(inode), PAGE_CACHE_SIZE);
-	if (unlikely(offset >= size >> PAGE_CACHE_SHIFT)) {
+	size = round_up(i_size_read(inode), PAGE_SIZE);
+	if (unlikely(offset >= size >> PAGE_SHIFT)) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -2119,7 +2120,7 @@
 		if (!PageUptodate(page))
 			error = -EIO;
 	}
-	page_cache_release(page);
+	put_page(page);
 
 	if (!error || error == AOP_TRUNCATED_PAGE)
 		goto retry_find;
@@ -2163,7 +2164,7 @@
 
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			page_cache_release(page);
+			put_page(page);
 			goto repeat;
 		}
 
@@ -2177,8 +2178,8 @@
 		if (page->mapping != mapping || !PageUptodate(page))
 			goto unlock;
 
-		size = round_up(i_size_read(mapping->host), PAGE_CACHE_SIZE);
-		if (page->index >= size >> PAGE_CACHE_SHIFT)
+		size = round_up(i_size_read(mapping->host), PAGE_SIZE);
+		if (page->index >= size >> PAGE_SHIFT)
 			goto unlock;
 
 		pte = vmf->pte + page->index - vmf->pgoff;
@@ -2194,7 +2195,7 @@
 unlock:
 		unlock_page(page);
 skip:
-		page_cache_release(page);
+		put_page(page);
 next:
 		if (iter.index == vmf->max_pgoff)
 			break;
@@ -2277,7 +2278,7 @@
 	if (!IS_ERR(page)) {
 		wait_on_page_locked(page);
 		if (!PageUptodate(page)) {
-			page_cache_release(page);
+			put_page(page);
 			page = ERR_PTR(-EIO);
 		}
 	}
@@ -2300,7 +2301,7 @@
 			return ERR_PTR(-ENOMEM);
 		err = add_to_page_cache_lru(page, mapping, index, gfp);
 		if (unlikely(err)) {
-			page_cache_release(page);
+			put_page(page);
 			if (err == -EEXIST)
 				goto repeat;
 			/* Presumably ENOMEM for radix tree node */
@@ -2310,7 +2311,7 @@
 filler:
 		err = filler(data, page);
 		if (err < 0) {
-			page_cache_release(page);
+			put_page(page);
 			return ERR_PTR(err);
 		}
 
@@ -2363,7 +2364,7 @@
 	/* Case c or d, restart the operation */
 	if (!page->mapping) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		goto repeat;
 	}
 
@@ -2510,7 +2511,7 @@
 	struct iov_iter data;
 
 	write_len = iov_iter_count(from);
-	end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
+	end = (pos + write_len - 1) >> PAGE_SHIFT;
 
 	written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
 	if (written)
@@ -2524,7 +2525,7 @@
 	 */
 	if (mapping->nrpages) {
 		written = invalidate_inode_pages2_range(mapping,
-					pos >> PAGE_CACHE_SHIFT, end);
+					pos >> PAGE_SHIFT, end);
 		/*
 		 * If a page can not be invalidated, return 0 to fall back
 		 * to buffered write.
@@ -2549,7 +2550,7 @@
 	 */
 	if (mapping->nrpages) {
 		invalidate_inode_pages2_range(mapping,
-					      pos >> PAGE_CACHE_SHIFT, end);
+					      pos >> PAGE_SHIFT, end);
 	}
 
 	if (written > 0) {
@@ -2610,8 +2611,8 @@
 		size_t copied;		/* Bytes copied from user */
 		void *fsdata;
 
-		offset = (pos & (PAGE_CACHE_SIZE - 1));
-		bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
+		offset = (pos & (PAGE_SIZE - 1));
+		bytes = min_t(unsigned long, PAGE_SIZE - offset,
 						iov_iter_count(i));
 
 again:
@@ -2664,7 +2665,7 @@
 			 * because not all segments in the iov can be copied at
 			 * once without a pagefault.
 			 */
-			bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
+			bytes = min_t(unsigned long, PAGE_SIZE - offset,
 						iov_iter_single_seg_count(i));
 			goto again;
 		}
@@ -2751,8 +2752,8 @@
 			iocb->ki_pos = endbyte + 1;
 			written += status;
 			invalidate_mapping_pages(mapping,
-						 pos >> PAGE_CACHE_SHIFT,
-						 endbyte >> PAGE_CACHE_SHIFT);
+						 pos >> PAGE_SHIFT,
+						 endbyte >> PAGE_SHIFT);
 		} else {
 			/*
 			 * We don't know how much we wrote, so just return

diff --git a/mm/gup.c b/mm/gup.c
index 7f1c4fb..fb87aea 100644
--- a/mm/gup.c
+++ b/mm/gup.c

@@ -1107,7 +1107,7 @@
  * @addr: user address
  *
  * Returns struct page pointer of user page pinned for dump,
- * to be freed afterwards by page_cache_release() or put_page().
+ * to be freed afterwards by put_page().
  *
  * Returns NULL on any kind of failure - a hole must then be inserted into
  * the corefile, to preserve alignment with its headers; and also returns

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index fbfb1b8..86f9f8b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c

@@ -2578,7 +2578,7 @@
 		}
 		khugepaged_node_load[node]++;
 		if (!PageLRU(page)) {
-			result = SCAN_SCAN_ABORT;
+			result = SCAN_PAGE_LRU;
 			goto out_unmap;
 		}
 		if (PageLocked(page)) {

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 06058ea..19d0d08 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c

@@ -3346,7 +3346,7 @@
 			old_page != pagecache_page)
 		outside_reserve = 1;
 
-	page_cache_get(old_page);
+	get_page(old_page);
 
 	/*
 	 * Drop page table lock as buddy allocator may be called. It will
@@ -3364,7 +3364,7 @@
 		 * may get SIGKILLed if it later faults.
 		 */
 		if (outside_reserve) {
-			page_cache_release(old_page);
+			put_page(old_page);
 			BUG_ON(huge_pte_none(pte));
 			unmap_ref_private(mm, vma, old_page, address);
 			BUG_ON(huge_pte_none(pte));
@@ -3425,9 +3425,9 @@
 	spin_unlock(ptl);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 out_release_all:
-	page_cache_release(new_page);
+	put_page(new_page);
 out_release_old:
-	page_cache_release(old_page);
+	put_page(old_page);
 
 	spin_lock(ptl); /* Caller expects lock to be held */
 	return ret;

diff --git a/mm/internal.h b/mm/internal.h
index 7449392..b79abb6 100644
--- a/mm/internal.h
+++ b/mm/internal.h

@@ -38,6 +38,11 @@
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
 
+void unmap_page_range(struct mmu_gather *tlb,
+			     struct vm_area_struct *vma,
+			     unsigned long addr, unsigned long end,
+			     struct zap_details *details);
+
 extern int __do_page_cache_readahead(struct address_space *mapping,
 		struct file *filp, pgoff_t offset, unsigned long nr_to_read,
 		unsigned long lookahead_size);

diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile
index a61460d..131daad 100644
--- a/mm/kasan/Makefile
+++ b/mm/kasan/Makefile

@@ -1,5 +1,6 @@
 KASAN_SANITIZE := n
 UBSAN_SANITIZE_kasan.o := n
+KCOV_INSTRUMENT := n
 
 CFLAGS_REMOVE_kasan.o = -pg
 # Function splitter causes unnecessary splits in __asan_load1/__asan_store1

diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 1ad20ad..38f1dd7 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c

@@ -17,7 +17,9 @@
 #define DISABLE_BRANCH_PROFILING
 
 #include <linux/export.h>
+#include <linux/interrupt.h>
 #include <linux/init.h>
+#include <linux/kasan.h>
 #include <linux/kernel.h>
 #include <linux/kmemleak.h>
 #include <linux/linkage.h>
@@ -32,7 +34,6 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/vmalloc.h>
-#include <linux/kasan.h>
 
 #include "kasan.h"
 #include "../slab.h"
@@ -334,6 +335,59 @@
 				KASAN_FREE_PAGE);
 }
 
+#ifdef CONFIG_SLAB
+/*
+ * Adaptive redzone policy taken from the userspace AddressSanitizer runtime.
+ * For larger allocations larger redzones are used.
+ */
+static size_t optimal_redzone(size_t object_size)
+{
+	int rz =
+		object_size <= 64        - 16   ? 16 :
+		object_size <= 128       - 32   ? 32 :
+		object_size <= 512       - 64   ? 64 :
+		object_size <= 4096      - 128  ? 128 :
+		object_size <= (1 << 14) - 256  ? 256 :
+		object_size <= (1 << 15) - 512  ? 512 :
+		object_size <= (1 << 16) - 1024 ? 1024 : 2048;
+	return rz;
+}
+
+void kasan_cache_create(struct kmem_cache *cache, size_t *size,
+			unsigned long *flags)
+{
+	int redzone_adjust;
+	/* Make sure the adjusted size is still less than
+	 * KMALLOC_MAX_CACHE_SIZE.
+	 * TODO: this check is only useful for SLAB, but not SLUB. We'll need
+	 * to skip it for SLUB when it starts using kasan_cache_create().
+	 */
+	if (*size > KMALLOC_MAX_CACHE_SIZE -
+	    sizeof(struct kasan_alloc_meta) -
+	    sizeof(struct kasan_free_meta))
+		return;
+	*flags |= SLAB_KASAN;
+	/* Add alloc meta. */
+	cache->kasan_info.alloc_meta_offset = *size;
+	*size += sizeof(struct kasan_alloc_meta);
+
+	/* Add free meta. */
+	if (cache->flags & SLAB_DESTROY_BY_RCU || cache->ctor ||
+	    cache->object_size < sizeof(struct kasan_free_meta)) {
+		cache->kasan_info.free_meta_offset = *size;
+		*size += sizeof(struct kasan_free_meta);
+	}
+	redzone_adjust = optimal_redzone(cache->object_size) -
+		(*size - cache->object_size);
+	if (redzone_adjust > 0)
+		*size += redzone_adjust;
+	*size = min(KMALLOC_MAX_CACHE_SIZE,
+		    max(*size,
+			cache->object_size +
+			optimal_redzone(cache->object_size)));
+}
+#endif
+
 void kasan_poison_slab(struct page *page)
 {
 	kasan_poison_shadow(page_address(page),
@@ -351,11 +405,81 @@
 	kasan_poison_shadow(object,
 			round_up(cache->object_size, KASAN_SHADOW_SCALE_SIZE),
 			KASAN_KMALLOC_REDZONE);
+#ifdef CONFIG_SLAB
+	if (cache->flags & SLAB_KASAN) {
+		struct kasan_alloc_meta *alloc_info =
+			get_alloc_info(cache, object);
+		alloc_info->state = KASAN_STATE_INIT;
+	}
+#endif
 }
 
-void kasan_slab_alloc(struct kmem_cache *cache, void *object)
+#ifdef CONFIG_SLAB
+static inline int in_irqentry_text(unsigned long ptr)
 {
-	kasan_kmalloc(cache, object, cache->object_size);
+	return (ptr >= (unsigned long)&__irqentry_text_start &&
+		ptr < (unsigned long)&__irqentry_text_end) ||
+		(ptr >= (unsigned long)&__softirqentry_text_start &&
+		 ptr < (unsigned long)&__softirqentry_text_end);
+}
+
+static inline void filter_irq_stacks(struct stack_trace *trace)
+{
+	int i;
+
+	if (!trace->nr_entries)
+		return;
+	for (i = 0; i < trace->nr_entries; i++)
+		if (in_irqentry_text(trace->entries[i])) {
+			/* Include the irqentry function into the stack. */
+			trace->nr_entries = i + 1;
+			break;
+		}
+}
+
+static inline depot_stack_handle_t save_stack(gfp_t flags)
+{
+	unsigned long entries[KASAN_STACK_DEPTH];
+	struct stack_trace trace = {
+		.nr_entries = 0,
+		.entries = entries,
+		.max_entries = KASAN_STACK_DEPTH,
+		.skip = 0
+	};
+
+	save_stack_trace(&trace);
+	filter_irq_stacks(&trace);
+	if (trace.nr_entries != 0 &&
+	    trace.entries[trace.nr_entries-1] == ULONG_MAX)
+		trace.nr_entries--;
+
+	return depot_save_stack(&trace, flags);
+}
+
+static inline void set_track(struct kasan_track *track, gfp_t flags)
+{
+	track->pid = current->pid;
+	track->stack = save_stack(flags);
+}
+
+struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
+					const void *object)
+{
+	BUILD_BUG_ON(sizeof(struct kasan_alloc_meta) > 32);
+	return (void *)object + cache->kasan_info.alloc_meta_offset;
+}
+
+struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
+				      const void *object)
+{
+	BUILD_BUG_ON(sizeof(struct kasan_free_meta) > 32);
+	return (void *)object + cache->kasan_info.free_meta_offset;
+}
+#endif
+
+void kasan_slab_alloc(struct kmem_cache *cache, void *object, gfp_t flags)
+{
+	kasan_kmalloc(cache, object, cache->object_size, flags);
 }
 
 void kasan_slab_free(struct kmem_cache *cache, void *object)
@@ -367,10 +491,22 @@
 	if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU))
 		return;
 
+#ifdef CONFIG_SLAB
+	if (cache->flags & SLAB_KASAN) {
+		struct kasan_free_meta *free_info =
+			get_free_info(cache, object);
+		struct kasan_alloc_meta *alloc_info =
+			get_alloc_info(cache, object);
+		alloc_info->state = KASAN_STATE_FREE;
+		set_track(&free_info->track, GFP_NOWAIT);
+	}
+#endif
+
 	kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE);
 }
 
-void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size)
+void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size,
+		   gfp_t flags)
 {
 	unsigned long redzone_start;
 	unsigned long redzone_end;
@@ -386,10 +522,20 @@
 	kasan_unpoison_shadow(object, size);
 	kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start,
 		KASAN_KMALLOC_REDZONE);
+#ifdef CONFIG_SLAB
+	if (cache->flags & SLAB_KASAN) {
+		struct kasan_alloc_meta *alloc_info =
+			get_alloc_info(cache, object);
+
+		alloc_info->state = KASAN_STATE_ALLOC;
+		alloc_info->alloc_size = size;
+		set_track(&alloc_info->track, flags);
+	}
+#endif
 }
 EXPORT_SYMBOL(kasan_kmalloc);
 
-void kasan_kmalloc_large(const void *ptr, size_t size)
+void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags)
 {
 	struct page *page;
 	unsigned long redzone_start;
@@ -408,7 +554,7 @@
 		KASAN_PAGE_REDZONE);
 }
 
-void kasan_krealloc(const void *object, size_t size)
+void kasan_krealloc(const void *object, size_t size, gfp_t flags)
 {
 	struct page *page;
 
@@ -418,9 +564,9 @@
 	page = virt_to_head_page(object);
 
 	if (unlikely(!PageSlab(page)))
-		kasan_kmalloc_large(object, size);
+		kasan_kmalloc_large(object, size, flags);
 	else
-		kasan_kmalloc(page->slab_cache, object, size);
+		kasan_kmalloc(page->slab_cache, object, size, flags);
 }
 
 void kasan_kfree(void *ptr)

diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 4f6c62e..30a2f0b 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h

@@ -2,6 +2,7 @@
 #define __MM_KASAN_KASAN_H
 
 #include <linux/kasan.h>
+#include <linux/stackdepot.h>
 
 #define KASAN_SHADOW_SCALE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT)
 #define KASAN_SHADOW_MASK       (KASAN_SHADOW_SCALE_SIZE - 1)
@@ -54,6 +55,42 @@
 #endif
 };
 
+/**
+ * Structures to keep alloc and free tracks *
+ */
+
+enum kasan_state {
+	KASAN_STATE_INIT,
+	KASAN_STATE_ALLOC,
+	KASAN_STATE_FREE
+};
+
+#define KASAN_STACK_DEPTH 64
+
+struct kasan_track {
+	u32 pid;
+	depot_stack_handle_t stack;
+};
+
+struct kasan_alloc_meta {
+	struct kasan_track track;
+	u32 state : 2;	/* enum kasan_state */
+	u32 alloc_size : 30;
+	u32 reserved;
+};
+
+struct kasan_free_meta {
+	/* Allocator freelist pointer, unused by KASAN. */
+	void **freelist;
+	struct kasan_track track;
+};
+
+struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
+					const void *object);
+struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
+					const void *object);
+
+
 static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
 {
 	return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET)

diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 745aa8f..60869a5a 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c

@@ -18,6 +18,7 @@
 #include <linux/printk.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/stackdepot.h>
 #include <linux/stacktrace.h>
 #include <linux/string.h>
 #include <linux/types.h>
@@ -115,6 +116,53 @@
 			sizeof(init_thread_union.stack));
 }
 
+#ifdef CONFIG_SLAB
+static void print_track(struct kasan_track *track)
+{
+	pr_err("PID = %u\n", track->pid);
+	if (track->stack) {
+		struct stack_trace trace;
+
+		depot_fetch_stack(track->stack, &trace);
+		print_stack_trace(&trace, 0);
+	} else {
+		pr_err("(stack is not available)\n");
+	}
+}
+
+static void object_err(struct kmem_cache *cache, struct page *page,
+			void *object, char *unused_reason)
+{
+	struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object);
+	struct kasan_free_meta *free_info;
+
+	dump_stack();
+	pr_err("Object at %p, in cache %s\n", object, cache->name);
+	if (!(cache->flags & SLAB_KASAN))
+		return;
+	switch (alloc_info->state) {
+	case KASAN_STATE_INIT:
+		pr_err("Object not allocated yet\n");
+		break;
+	case KASAN_STATE_ALLOC:
+		pr_err("Object allocated with size %u bytes.\n",
+		       alloc_info->alloc_size);
+		pr_err("Allocation:\n");
+		print_track(&alloc_info->track);
+		break;
+	case KASAN_STATE_FREE:
+		pr_err("Object freed, allocated with size %u bytes\n",
+		       alloc_info->alloc_size);
+		free_info = get_free_info(cache, object);
+		pr_err("Allocation:\n");
+		print_track(&alloc_info->track);
+		pr_err("Deallocation:\n");
+		print_track(&free_info->track);
+		break;
+	}
+}
+#endif
+
 static void print_address_description(struct kasan_access_info *info)
 {
 	const void *addr = info->access_addr;
@@ -126,17 +174,10 @@
 		if (PageSlab(page)) {
 			void *object;
 			struct kmem_cache *cache = page->slab_cache;
-			void *last_object;
-
-			object = virt_to_obj(cache, page_address(page), addr);
-			last_object = page_address(page) +
-				page->objects * cache->size;
-
-			if (unlikely(object > last_object))
-				object = last_object; /* we hit into padding */
-
+			object = nearest_obj(cache, page,
+						(void *)info->access_addr);
 			object_err(cache, page, object,
-				"kasan: bad access detected");
+					"kasan: bad access detected");
 			return;
 		}
 		dump_page(page, "kasan: bad access detected");
@@ -146,7 +187,6 @@
 		if (!init_task_stack_addr(addr))
 			pr_err("Address belongs to variable %pS\n", addr);
 	}
-
 	dump_stack();
 }
 

diff --git a/mm/madvise.c b/mm/madvise.c
index a011473..07427d3 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c

@@ -170,7 +170,7 @@
 		page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE,
 								vma, index);
 		if (page)
-			page_cache_release(page);
+			put_page(page);
 	}
 
 	return 0;
@@ -204,14 +204,14 @@
 		page = find_get_entry(mapping, index);
 		if (!radix_tree_exceptional_entry(page)) {
 			if (page)
-				page_cache_release(page);
+				put_page(page);
 			continue;
 		}
 		swap = radix_to_swp_entry(page);
 		page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE,
 								NULL, 0);
 		if (page)
-			page_cache_release(page);
+			put_page(page);
 	}
 
 	lru_add_drain();	/* Push any new pages onto the LRU now */

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 5a544c6..78f5f26 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c

@@ -538,7 +538,7 @@
 		/*
 		 * drop the page count elevated by isolate_lru_page()
 		 */
-		page_cache_release(p);
+		put_page(p);
 		return 0;
 	}
 	return -EIO;

diff --git a/mm/memory.c b/mm/memory.c
index 81dca00..93897f2 100644
--- a/mm/memory.c
+++ b/mm/memory.c

@@ -1102,6 +1102,12 @@
 
 			if (!PageAnon(page)) {
 				if (pte_dirty(ptent)) {
+					/*
+					 * oom_reaper cannot tear down dirty
+					 * pages
+					 */
+					if (unlikely(details && details->ignore_dirty))
+						continue;
 					force_flush = 1;
 					set_page_dirty(page);
 				}
@@ -1120,8 +1126,8 @@
 			}
 			continue;
 		}
-		/* If details->check_mapping, we leave swap entries. */
-		if (unlikely(details))
+		/* only check swap_entries if explicitly asked for in details */
+		if (unlikely(details && !details->check_swap_entries))
 			continue;
 
 		entry = pte_to_swp_entry(ptent);
@@ -1226,7 +1232,7 @@
 	return addr;
 }
 
-static void unmap_page_range(struct mmu_gather *tlb,
+void unmap_page_range(struct mmu_gather *tlb,
 			     struct vm_area_struct *vma,
 			     unsigned long addr, unsigned long end,
 			     struct zap_details *details)
@@ -1234,9 +1240,6 @@
 	pgd_t *pgd;
 	unsigned long next;
 
-	if (details && !details->check_mapping)
-		details = NULL;
-
 	BUG_ON(addr >= end);
 	tlb_start_vma(tlb, vma);
 	pgd = pgd_offset(vma->vm_mm, addr);
@@ -2051,7 +2054,7 @@
 		VM_BUG_ON_PAGE(PageAnon(page), page);
 		mapping = page->mapping;
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 
 		if ((dirtied || page_mkwrite) && mapping) {
 			/*
@@ -2185,7 +2188,7 @@
 	}
 
 	if (new_page)
-		page_cache_release(new_page);
+		put_page(new_page);
 
 	pte_unmap_unlock(page_table, ptl);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
@@ -2200,14 +2203,14 @@
 				munlock_vma_page(old_page);
 			unlock_page(old_page);
 		}
-		page_cache_release(old_page);
+		put_page(old_page);
 	}
 	return page_copied ? VM_FAULT_WRITE : 0;
 oom_free_new:
-	page_cache_release(new_page);
+	put_page(new_page);
 oom:
 	if (old_page)
-		page_cache_release(old_page);
+		put_page(old_page);
 	return VM_FAULT_OOM;
 }
 
@@ -2255,7 +2258,7 @@
 {
 	int page_mkwrite = 0;
 
-	page_cache_get(old_page);
+	get_page(old_page);
 
 	if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
 		int tmp;
@@ -2264,7 +2267,7 @@
 		tmp = do_page_mkwrite(vma, old_page, address);
 		if (unlikely(!tmp || (tmp &
 				      (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
-			page_cache_release(old_page);
+			put_page(old_page);
 			return tmp;
 		}
 		/*
@@ -2278,7 +2281,7 @@
 		if (!pte_same(*page_table, orig_pte)) {
 			unlock_page(old_page);
 			pte_unmap_unlock(page_table, ptl);
-			page_cache_release(old_page);
+			put_page(old_page);
 			return 0;
 		}
 		page_mkwrite = 1;
@@ -2338,7 +2341,7 @@
 	 */
 	if (PageAnon(old_page) && !PageKsm(old_page)) {
 		if (!trylock_page(old_page)) {
-			page_cache_get(old_page);
+			get_page(old_page);
 			pte_unmap_unlock(page_table, ptl);
 			lock_page(old_page);
 			page_table = pte_offset_map_lock(mm, pmd, address,
@@ -2346,10 +2349,10 @@
 			if (!pte_same(*page_table, orig_pte)) {
 				unlock_page(old_page);
 				pte_unmap_unlock(page_table, ptl);
-				page_cache_release(old_page);
+				put_page(old_page);
 				return 0;
 			}
-			page_cache_release(old_page);
+			put_page(old_page);
 		}
 		if (reuse_swap_page(old_page)) {
 			/*
@@ -2372,7 +2375,7 @@
 	/*
 	 * Ok, we need to copy. Oh, well..
 	 */
-	page_cache_get(old_page);
+	get_page(old_page);
 
 	pte_unmap_unlock(page_table, ptl);
 	return wp_page_copy(mm, vma, address, page_table, pmd,
@@ -2397,7 +2400,6 @@
 
 		vba = vma->vm_pgoff;
 		vea = vba + vma_pages(vma) - 1;
-		/* Assume for now that PAGE_CACHE_SHIFT == PAGE_SHIFT */
 		zba = details->first_index;
 		if (zba < vba)
 			zba = vba;
@@ -2432,7 +2434,7 @@
 void unmap_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen, int even_cows)
 {
-	struct zap_details details;
+	struct zap_details details = { };
 	pgoff_t hba = holebegin >> PAGE_SHIFT;
 	pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
@@ -2616,7 +2618,7 @@
 		 * parallel locked swapcache.
 		 */
 		unlock_page(swapcache);
-		page_cache_release(swapcache);
+		put_page(swapcache);
 	}
 
 	if (flags & FAULT_FLAG_WRITE) {
@@ -2638,10 +2640,10 @@
 out_page:
 	unlock_page(page);
 out_release:
-	page_cache_release(page);
+	put_page(page);
 	if (page != swapcache) {
 		unlock_page(swapcache);
-		page_cache_release(swapcache);
+		put_page(swapcache);
 	}
 	return ret;
 }
@@ -2749,7 +2751,7 @@
 	if (userfaultfd_missing(vma)) {
 		pte_unmap_unlock(page_table, ptl);
 		mem_cgroup_cancel_charge(page, memcg, false);
-		page_cache_release(page);
+		put_page(page);
 		return handle_userfault(vma, address, flags,
 					VM_UFFD_MISSING);
 	}
@@ -2768,10 +2770,10 @@
 	return 0;
 release:
 	mem_cgroup_cancel_charge(page, memcg, false);
-	page_cache_release(page);
+	put_page(page);
 	goto unlock;
 oom_free_page:
-	page_cache_release(page);
+	put_page(page);
 oom:
 	return VM_FAULT_OOM;
 }
@@ -2804,7 +2806,7 @@
 	if (unlikely(PageHWPoison(vmf.page))) {
 		if (ret & VM_FAULT_LOCKED)
 			unlock_page(vmf.page);
-		page_cache_release(vmf.page);
+		put_page(vmf.page);
 		return VM_FAULT_HWPOISON;
 	}
 
@@ -2993,7 +2995,7 @@
 	if (unlikely(!pte_same(*pte, orig_pte))) {
 		pte_unmap_unlock(pte, ptl);
 		unlock_page(fault_page);
-		page_cache_release(fault_page);
+		put_page(fault_page);
 		return ret;
 	}
 	do_set_pte(vma, address, fault_page, pte, false, false);
@@ -3021,7 +3023,7 @@
 		return VM_FAULT_OOM;
 
 	if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) {
-		page_cache_release(new_page);
+		put_page(new_page);
 		return VM_FAULT_OOM;
 	}
 
@@ -3038,7 +3040,7 @@
 		pte_unmap_unlock(pte, ptl);
 		if (fault_page) {
 			unlock_page(fault_page);
-			page_cache_release(fault_page);
+			put_page(fault_page);
 		} else {
 			/*
 			 * The fault handler has no page to lock, so it holds
@@ -3054,7 +3056,7 @@
 	pte_unmap_unlock(pte, ptl);
 	if (fault_page) {
 		unlock_page(fault_page);
-		page_cache_release(fault_page);
+		put_page(fault_page);
 	} else {
 		/*
 		 * The fault handler has no page to lock, so it holds
@@ -3065,7 +3067,7 @@
 	return ret;
 uncharge_out:
 	mem_cgroup_cancel_charge(new_page, memcg, false);
-	page_cache_release(new_page);
+	put_page(new_page);
 	return ret;
 }
 
@@ -3093,7 +3095,7 @@
 		tmp = do_page_mkwrite(vma, fault_page, address);
 		if (unlikely(!tmp ||
 				(tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
-			page_cache_release(fault_page);
+			put_page(fault_page);
 			return tmp;
 		}
 	}
@@ -3102,7 +3104,7 @@
 	if (unlikely(!pte_same(*pte, orig_pte))) {
 		pte_unmap_unlock(pte, ptl);
 		unlock_page(fault_page);
-		page_cache_release(fault_page);
+		put_page(fault_page);
 		return ret;
 	}
 	do_set_pte(vma, address, fault_page, pte, true, false);
@@ -3733,7 +3735,7 @@
 						    buf, maddr + offset, bytes);
 			}
 			kunmap(page);
-			page_cache_release(page);
+			put_page(page);
 		}
 		len -= bytes;
 		buf += bytes;

diff --git a/mm/mempool.c b/mm/mempool.c
index 07c383d..9b7a14a 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c

@@ -112,12 +112,12 @@
 		kasan_free_pages(element, (unsigned long)pool->pool_data);
 }
 
-static void kasan_unpoison_element(mempool_t *pool, void *element)
+static void kasan_unpoison_element(mempool_t *pool, void *element, gfp_t flags)
 {
 	if (pool->alloc == mempool_alloc_slab)
-		kasan_slab_alloc(pool->pool_data, element);
+		kasan_slab_alloc(pool->pool_data, element, flags);
 	if (pool->alloc == mempool_kmalloc)
-		kasan_krealloc(element, (size_t)pool->pool_data);
+		kasan_krealloc(element, (size_t)pool->pool_data, flags);
 	if (pool->alloc == mempool_alloc_pages)
 		kasan_alloc_pages(element, (unsigned long)pool->pool_data);
 }
@@ -130,12 +130,12 @@
 	pool->elements[pool->curr_nr++] = element;
 }
 
-static void *remove_element(mempool_t *pool)
+static void *remove_element(mempool_t *pool, gfp_t flags)
 {
 	void *element = pool->elements[--pool->curr_nr];
 
 	BUG_ON(pool->curr_nr < 0);
-	kasan_unpoison_element(pool, element);
+	kasan_unpoison_element(pool, element, flags);
 	check_element(pool, element);
 	return element;
 }
@@ -154,7 +154,7 @@
 		return;
 
 	while (pool->curr_nr) {
-		void *element = remove_element(pool);
+		void *element = remove_element(pool, GFP_KERNEL);
 		pool->free(element, pool->pool_data);
 	}
 	kfree(pool->elements);
@@ -250,7 +250,7 @@
 	spin_lock_irqsave(&pool->lock, flags);
 	if (new_min_nr <= pool->min_nr) {
 		while (new_min_nr < pool->curr_nr) {
-			element = remove_element(pool);
+			element = remove_element(pool, GFP_KERNEL);
 			spin_unlock_irqrestore(&pool->lock, flags);
 			pool->free(element, pool->pool_data);
 			spin_lock_irqsave(&pool->lock, flags);
@@ -347,7 +347,7 @@
 
 	spin_lock_irqsave(&pool->lock, flags);
 	if (likely(pool->curr_nr)) {
-		element = remove_element(pool);
+		element = remove_element(pool, gfp_temp);
 		spin_unlock_irqrestore(&pool->lock, flags);
 		/* paired with rmb in mempool_free(), read comment there */
 		smp_wmb();

diff --git a/mm/mincore.c b/mm/mincore.c
index 563f320..c0b5ba9 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c

@@ -75,7 +75,7 @@
 #endif
 	if (page) {
 		present = PageUptodate(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	return present;
@@ -211,7 +211,7 @@
  * return values:
  *  zero    - success
  *  -EFAULT - vec points to an illegal address
- *  -EINVAL - addr is not a multiple of PAGE_CACHE_SIZE
+ *  -EINVAL - addr is not a multiple of PAGE_SIZE
  *  -ENOMEM - Addresses in the range [addr, addr + len] are
  *		invalid for the address space of this process, or
  *		specify one or more pages which are not currently
@@ -226,14 +226,14 @@
 	unsigned char *tmp;
 
 	/* Check the start address: needs to be page-aligned.. */
- 	if (start & ~PAGE_CACHE_MASK)
+	if (start & ~PAGE_MASK)
 		return -EINVAL;
 
 	/* ..and we need to be passed a valid user-space range */
 	if (!access_ok(VERIFY_READ, (void __user *) start, len))
 		return -ENOMEM;
 
-	/* This also avoids any overflows on PAGE_CACHE_ALIGN */
+	/* This also avoids any overflows on PAGE_ALIGN */
 	pages = len >> PAGE_SHIFT;
 	pages += (offset_in_page(len)) != 0;
 

diff --git a/mm/mprotect.c b/mm/mprotect.c
index fa37c4c..b650c54 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c

@@ -359,6 +359,9 @@
 	struct vm_area_struct *vma, *prev;
 	int error = -EINVAL;
 	const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
+	const bool rier = (current->personality & READ_IMPLIES_EXEC) &&
+				(prot & PROT_READ);
+
 	prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
 	if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */
 		return -EINVAL;
@@ -375,11 +378,6 @@
 		return -EINVAL;
 
 	reqprot = prot;
-	/*
-	 * Does the application expect PROT_READ to imply PROT_EXEC:
-	 */
-	if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
-		prot |= PROT_EXEC;
 
 	down_write(&current->mm->mmap_sem);
 
@@ -414,6 +412,10 @@
 
 		/* Here we know that vma->vm_start <= nstart < vma->vm_end. */
 
+		/* Does the application expect PROT_READ to imply PROT_EXEC */
+		if (rier && (vma->vm_flags & VM_MAYEXEC))
+			prot |= PROT_EXEC;
+
 		newflags = calc_vm_prot_bits(prot, pkey);
 		newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
 
@@ -445,6 +447,7 @@
 			error = -ENOMEM;
 			goto out;
 		}
+		prot = reqprot;
 	}
 out:
 	up_write(&current->mm->mmap_sem);

diff --git a/mm/nommu.c b/mm/nommu.c
index de8b6b6..102e257 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c

@@ -141,7 +141,7 @@
 		if (pages) {
 			pages[i] = virt_to_page(start);
 			if (pages[i])
-				page_cache_get(pages[i]);
+				get_page(pages[i]);
 		}
 		if (vmas)
 			vmas[i] = vma;

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 06f7e17..8634958 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c

@@ -35,6 +35,11 @@
 #include <linux/freezer.h>
 #include <linux/ftrace.h>
 #include <linux/ratelimit.h>
+#include <linux/kthread.h>
+#include <linux/init.h>
+
+#include <asm/tlb.h>
+#include "internal.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/oom.h>
@@ -405,6 +410,176 @@
 
 bool oom_killer_disabled __read_mostly;
 
+#define K(x) ((x) << (PAGE_SHIFT-10))
+
+#ifdef CONFIG_MMU
+/*
+ * OOM Reaper kernel thread which tries to reap the memory used by the OOM
+ * victim (if that is possible) to help the OOM killer to move on.
+ */
+static struct task_struct *oom_reaper_th;
+static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
+static struct task_struct *oom_reaper_list;
+static DEFINE_SPINLOCK(oom_reaper_lock);
+
+
+static bool __oom_reap_task(struct task_struct *tsk)
+{
+	struct mmu_gather tlb;
+	struct vm_area_struct *vma;
+	struct mm_struct *mm;
+	struct task_struct *p;
+	struct zap_details details = {.check_swap_entries = true,
+				      .ignore_dirty = true};
+	bool ret = true;
+
+	/*
+	 * Make sure we find the associated mm_struct even when the particular
+	 * thread has already terminated and cleared its mm.
+	 * We might have race with exit path so consider our work done if there
+	 * is no mm.
+	 */
+	p = find_lock_task_mm(tsk);
+	if (!p)
+		return true;
+
+	mm = p->mm;
+	if (!atomic_inc_not_zero(&mm->mm_users)) {
+		task_unlock(p);
+		return true;
+	}
+
+	task_unlock(p);
+
+	if (!down_read_trylock(&mm->mmap_sem)) {
+		ret = false;
+		goto out;
+	}
+
+	tlb_gather_mmu(&tlb, mm, 0, -1);
+	for (vma = mm->mmap ; vma; vma = vma->vm_next) {
+		if (is_vm_hugetlb_page(vma))
+			continue;
+
+		/*
+		 * mlocked VMAs require explicit munlocking before unmap.
+		 * Let's keep it simple here and skip such VMAs.
+		 */
+		if (vma->vm_flags & VM_LOCKED)
+			continue;
+
+		/*
+		 * Only anonymous pages have a good chance to be dropped
+		 * without additional steps which we cannot afford as we
+		 * are OOM already.
+		 *
+		 * We do not even care about fs backed pages because all
+		 * which are reclaimable have already been reclaimed and
+		 * we do not want to block exit_mmap by keeping mm ref
+		 * count elevated without a good reason.
+		 */
+		if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED))
+			unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end,
+					 &details);
+	}
+	tlb_finish_mmu(&tlb, 0, -1);
+	pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
+			task_pid_nr(tsk), tsk->comm,
+			K(get_mm_counter(mm, MM_ANONPAGES)),
+			K(get_mm_counter(mm, MM_FILEPAGES)),
+			K(get_mm_counter(mm, MM_SHMEMPAGES)));
+	up_read(&mm->mmap_sem);
+
+	/*
+	 * Clear TIF_MEMDIE because the task shouldn't be sitting on a
+	 * reasonably reclaimable memory anymore. OOM killer can continue
+	 * by selecting other victim if unmapping hasn't led to any
+	 * improvements. This also means that selecting this task doesn't
+	 * make any sense.
+	 */
+	tsk->signal->oom_score_adj = OOM_SCORE_ADJ_MIN;
+	exit_oom_victim(tsk);
+out:
+	mmput(mm);
+	return ret;
+}
+
+#define MAX_OOM_REAP_RETRIES 10
+static void oom_reap_task(struct task_struct *tsk)
+{
+	int attempts = 0;
+
+	/* Retry the down_read_trylock(mmap_sem) a few times */
+	while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task(tsk))
+		schedule_timeout_idle(HZ/10);
+
+	if (attempts > MAX_OOM_REAP_RETRIES) {
+		pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
+				task_pid_nr(tsk), tsk->comm);
+		debug_show_all_locks();
+	}
+
+	/* Drop a reference taken by wake_oom_reaper */
+	put_task_struct(tsk);
+}
+
+static int oom_reaper(void *unused)
+{
+	set_freezable();
+
+	while (true) {
+		struct task_struct *tsk = NULL;
+
+		wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL);
+		spin_lock(&oom_reaper_lock);
+		if (oom_reaper_list != NULL) {
+			tsk = oom_reaper_list;
+			oom_reaper_list = tsk->oom_reaper_list;
+		}
+		spin_unlock(&oom_reaper_lock);
+
+		if (tsk)
+			oom_reap_task(tsk);
+	}
+
+	return 0;
+}
+
+static void wake_oom_reaper(struct task_struct *tsk)
+{
+	if (!oom_reaper_th)
+		return;
+
+	/* tsk is already queued? */
+	if (tsk == oom_reaper_list || tsk->oom_reaper_list)
+		return;
+
+	get_task_struct(tsk);
+
+	spin_lock(&oom_reaper_lock);
+	tsk->oom_reaper_list = oom_reaper_list;
+	oom_reaper_list = tsk;
+	spin_unlock(&oom_reaper_lock);
+	wake_up(&oom_reaper_wait);
+}
+
+static int __init oom_init(void)
+{
+	oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
+	if (IS_ERR(oom_reaper_th)) {
+		pr_err("Unable to start OOM reaper %ld. Continuing regardless\n",
+				PTR_ERR(oom_reaper_th));
+		oom_reaper_th = NULL;
+	}
+	return 0;
+}
+subsys_initcall(oom_init)
+#else
+static void wake_oom_reaper(struct task_struct *tsk)
+{
+}
+#endif
+
 /**
  * mark_oom_victim - mark the given task as OOM victim
  * @tsk: task to mark
@@ -431,9 +606,10 @@
 /**
  * exit_oom_victim - note the exit of an OOM victim
  */
-void exit_oom_victim(void)
+void exit_oom_victim(struct task_struct *tsk)
 {
-	clear_thread_flag(TIF_MEMDIE);
+	if (!test_and_clear_tsk_thread_flag(tsk, TIF_MEMDIE))
+		return;
 
 	if (!atomic_dec_return(&oom_victims))
 		wake_up_all(&oom_victims_wait);
@@ -494,7 +670,6 @@
 	return false;
 }
 
-#define K(x) ((x) << (PAGE_SHIFT-10))
 /*
  * Must be called while holding a reference to p, which will be released upon
  * returning.
@@ -510,6 +685,7 @@
 	unsigned int victim_points = 0;
 	static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
 					      DEFAULT_RATELIMIT_BURST);
+	bool can_oom_reap = true;
 
 	/*
 	 * If the task is already exiting, don't alarm the sysadmin or kill
@@ -600,17 +776,23 @@
 			continue;
 		if (same_thread_group(p, victim))
 			continue;
-		if (unlikely(p->flags & PF_KTHREAD))
+		if (unlikely(p->flags & PF_KTHREAD) || is_global_init(p) ||
+		    p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
+			/*
+			 * We cannot use oom_reaper for the mm shared by this
+			 * process because it wouldn't get killed and so the
+			 * memory might be still used.
+			 */
+			can_oom_reap = false;
 			continue;
-		if (is_global_init(p))
-			continue;
-		if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
-			continue;
-
+		}
 		do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true);
 	}
 	rcu_read_unlock();
 
+	if (can_oom_reap)
+		wake_oom_reaper(victim);
+
 	mmdrop(mm);
 	put_task_struct(victim);
 }

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 11ff8f7..999792d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c

@@ -2176,8 +2176,8 @@
 			cycled = 0;
 		end = -1;
 	} else {
-		index = wbc->range_start >> PAGE_CACHE_SHIFT;
-		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		index = wbc->range_start >> PAGE_SHIFT;
+		end = wbc->range_end >> PAGE_SHIFT;
 		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
 			range_whole = 1;
 		cycled = 1; /* ignore range_cyclic tests */
@@ -2382,14 +2382,14 @@
 		wait_on_page_writeback(page);
 
 	if (clear_page_dirty_for_io(page)) {
-		page_cache_get(page);
+		get_page(page);
 		ret = mapping->a_ops->writepage(page, &wbc);
 		if (ret == 0 && wait) {
 			wait_on_page_writeback(page);
 			if (PageError(page))
 				ret = -EIO;
 		}
-		page_cache_release(page);
+		put_page(page);
 	} else {
 		unlock_page(page);
 	}
@@ -2431,7 +2431,7 @@
 		__inc_zone_page_state(page, NR_DIRTIED);
 		__inc_wb_stat(wb, WB_RECLAIMABLE);
 		__inc_wb_stat(wb, WB_DIRTIED);
-		task_io_account_write(PAGE_CACHE_SIZE);
+		task_io_account_write(PAGE_SIZE);
 		current->nr_dirtied++;
 		this_cpu_inc(bdp_ratelimits);
 	}
@@ -2450,7 +2450,7 @@
 		mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
 		dec_zone_page_state(page, NR_FILE_DIRTY);
 		dec_wb_stat(wb, WB_RECLAIMABLE);
-		task_io_account_cancelled_write(PAGE_CACHE_SIZE);
+		task_io_account_cancelled_write(PAGE_SIZE);
 	}
 }
 

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a762be5..59de90d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c

@@ -692,34 +692,28 @@
 	unsigned long combined_idx;
 	unsigned long uninitialized_var(buddy_idx);
 	struct page *buddy;
-	unsigned int max_order = MAX_ORDER;
+	unsigned int max_order;
+
+	max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
 
 	VM_BUG_ON(!zone_is_initialized(zone));
 	VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
 
 	VM_BUG_ON(migratetype == -1);
-	if (is_migrate_isolate(migratetype)) {
-		/*
-		 * We restrict max order of merging to prevent merge
-		 * between freepages on isolate pageblock and normal
-		 * pageblock. Without this, pageblock isolation
-		 * could cause incorrect freepage accounting.
-		 */
-		max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
-	} else {
+	if (likely(!is_migrate_isolate(migratetype)))
 		__mod_zone_freepage_state(zone, 1 << order, migratetype);
-	}
 
-	page_idx = pfn & ((1 << max_order) - 1);
+	page_idx = pfn & ((1 << MAX_ORDER) - 1);
 
 	VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
 	VM_BUG_ON_PAGE(bad_range(zone, page), page);
 
+continue_merging:
 	while (order < max_order - 1) {
 		buddy_idx = __find_buddy_index(page_idx, order);
 		buddy = page + (buddy_idx - page_idx);
 		if (!page_is_buddy(page, buddy, order))
-			break;
+			goto done_merging;
 		/*
 		 * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page,
 		 * merge with it and move up one order.
@@ -736,6 +730,32 @@
 		page_idx = combined_idx;
 		order++;
 	}
+	if (max_order < MAX_ORDER) {
+		/* If we are here, it means order is >= pageblock_order.
+		 * We want to prevent merge between freepages on isolate
+		 * pageblock and normal pageblock. Without this, pageblock
+		 * isolation could cause incorrect freepage or CMA accounting.
+		 *
+		 * We don't want to hit this code for the more frequent
+		 * low-order merging.
+		 */
+		if (unlikely(has_isolate_pageblock(zone))) {
+			int buddy_mt;
+
+			buddy_idx = __find_buddy_index(page_idx, order);
+			buddy = page + (buddy_idx - page_idx);
+			buddy_mt = get_pageblock_migratetype(buddy);
+
+			if (migratetype != buddy_mt
+					&& (is_migrate_isolate(migratetype) ||
+						is_migrate_isolate(buddy_mt)))
+				goto done_merging;
+		}
+		max_order++;
+		goto continue_merging;
+	}
+
+done_merging:
 	set_page_order(page, order);
 
 	/*

diff --git a/mm/page_io.c b/mm/page_io.c
index ff74e51..cd92e3d 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c

@@ -66,6 +66,54 @@
 	bio_put(bio);
 }
 
+static void swap_slot_free_notify(struct page *page)
+{
+	struct swap_info_struct *sis;
+	struct gendisk *disk;
+
+	/*
+	 * There is no guarantee that the page is in swap cache - the software
+	 * suspend code (at least) uses end_swap_bio_read() against a non-
+	 * swapcache page.  So we must check PG_swapcache before proceeding with
+	 * this optimization.
+	 */
+	if (unlikely(!PageSwapCache(page)))
+		return;
+
+	sis = page_swap_info(page);
+	if (!(sis->flags & SWP_BLKDEV))
+		return;
+
+	/*
+	 * The swap subsystem performs lazy swap slot freeing,
+	 * expecting that the page will be swapped out again.
+	 * So we can avoid an unnecessary write if the page
+	 * isn't redirtied.
+	 * This is good for real swap storage because we can
+	 * reduce unnecessary I/O and enhance wear-leveling
+	 * if an SSD is used as the as swap device.
+	 * But if in-memory swap device (eg zram) is used,
+	 * this causes a duplicated copy between uncompressed
+	 * data in VM-owned memory and compressed data in
+	 * zram-owned memory.  So let's free zram-owned memory
+	 * and make the VM-owned decompressed page *dirty*,
+	 * so the page should be swapped out somewhere again if
+	 * we again wish to reclaim it.
+	 */
+	disk = sis->bdev->bd_disk;
+	if (disk->fops->swap_slot_free_notify) {
+		swp_entry_t entry;
+		unsigned long offset;
+
+		entry.val = page_private(page);
+		offset = swp_offset(entry);
+
+		SetPageDirty(page);
+		disk->fops->swap_slot_free_notify(sis->bdev,
+				offset);
+	}
+}
+
 static void end_swap_bio_read(struct bio *bio)
 {
 	struct page *page = bio->bi_io_vec[0].bv_page;
@@ -81,49 +129,7 @@
 	}
 
 	SetPageUptodate(page);
-
-	/*
-	 * There is no guarantee that the page is in swap cache - the software
-	 * suspend code (at least) uses end_swap_bio_read() against a non-
-	 * swapcache page.  So we must check PG_swapcache before proceeding with
-	 * this optimization.
-	 */
-	if (likely(PageSwapCache(page))) {
-		struct swap_info_struct *sis;
-
-		sis = page_swap_info(page);
-		if (sis->flags & SWP_BLKDEV) {
-			/*
-			 * The swap subsystem performs lazy swap slot freeing,
-			 * expecting that the page will be swapped out again.
-			 * So we can avoid an unnecessary write if the page
-			 * isn't redirtied.
-			 * This is good for real swap storage because we can
-			 * reduce unnecessary I/O and enhance wear-leveling
-			 * if an SSD is used as the as swap device.
-			 * But if in-memory swap device (eg zram) is used,
-			 * this causes a duplicated copy between uncompressed
-			 * data in VM-owned memory and compressed data in
-			 * zram-owned memory.  So let's free zram-owned memory
-			 * and make the VM-owned decompressed page *dirty*,
-			 * so the page should be swapped out somewhere again if
-			 * we again wish to reclaim it.
-			 */
-			struct gendisk *disk = sis->bdev->bd_disk;
-			if (disk->fops->swap_slot_free_notify) {
-				swp_entry_t entry;
-				unsigned long offset;
-
-				entry.val = page_private(page);
-				offset = swp_offset(entry);
-
-				SetPageDirty(page);
-				disk->fops->swap_slot_free_notify(sis->bdev,
-						offset);
-			}
-		}
-	}
-
+	swap_slot_free_notify(page);
 out:
 	unlock_page(page);
 	bio_put(bio);
@@ -246,7 +252,7 @@
 
 static sector_t swap_page_sector(struct page *page)
 {
-	return (sector_t)__page_file_index(page) << (PAGE_CACHE_SHIFT - 9);
+	return (sector_t)__page_file_index(page) << (PAGE_SHIFT - 9);
 }
 
 int __swap_writepage(struct page *page, struct writeback_control *wbc,
@@ -347,6 +353,7 @@
 
 	ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
 	if (!ret) {
+		swap_slot_free_notify(page);
 		count_vm_event(PSWPIN);
 		return 0;
 	}

diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 92c4c36..c4f5682 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c

@@ -215,7 +215,7 @@
  * all pages in [start_pfn...end_pfn) must be in the same zone.
  * zone->lock must be held before call this.
  *
- * Returns 1 if all pages in the range are isolated.
+ * Returns the last tested pfn.
  */
 static unsigned long
 __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn,
@@ -289,11 +289,11 @@
 	 * now as a simple work-around, we use the next node for destination.
 	 */
 	if (PageHuge(page)) {
-		nodemask_t src = nodemask_of_node(page_to_nid(page));
-		nodemask_t dst;
-		nodes_complement(dst, src);
+		int node = next_online_node(page_to_nid(page));
+		if (node == MAX_NUMNODES)
+			node = first_online_node;
 		return alloc_huge_page_node(page_hstate(compound_head(page)),
-					    next_node(page_to_nid(page), dst));
+					    node);
 	}
 
 	if (PageHighMem(page))

diff --git a/mm/readahead.c b/mm/readahead.c
index 20e58e8..40be3ae 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c

@@ -47,11 +47,11 @@
 		if (!trylock_page(page))
 			BUG();
 		page->mapping = mapping;
-		do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+		do_invalidatepage(page, 0, PAGE_SIZE);
 		page->mapping = NULL;
 		unlock_page(page);
 	}
-	page_cache_release(page);
+	put_page(page);
 }
 
 /*
@@ -93,14 +93,14 @@
 			read_cache_pages_invalidate_page(mapping, page);
 			continue;
 		}
-		page_cache_release(page);
+		put_page(page);
 
 		ret = filler(data, page);
 		if (unlikely(ret)) {
 			read_cache_pages_invalidate_pages(mapping, pages);
 			break;
 		}
-		task_io_account_read(PAGE_CACHE_SIZE);
+		task_io_account_read(PAGE_SIZE);
 	}
 	return ret;
 }
@@ -130,7 +130,7 @@
 				mapping_gfp_constraint(mapping, GFP_KERNEL))) {
 			mapping->a_ops->readpage(filp, page);
 		}
-		page_cache_release(page);
+		put_page(page);
 	}
 	ret = 0;
 
@@ -163,7 +163,7 @@
 	if (isize == 0)
 		goto out;
 
-	end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
+	end_index = ((isize - 1) >> PAGE_SHIFT);
 
 	/*
 	 * Preallocate as many pages as we will need.
@@ -216,7 +216,7 @@
 	while (nr_to_read) {
 		int err;
 
-		unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_CACHE_SIZE;
+		unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE;
 
 		if (this_chunk > nr_to_read)
 			this_chunk = nr_to_read;
@@ -425,7 +425,7 @@
 	 * trivial case: (offset - prev_offset) == 1
 	 * unaligned reads: (offset - prev_offset) == 0
 	 */
-	prev_offset = (unsigned long long)ra->prev_pos >> PAGE_CACHE_SHIFT;
+	prev_offset = (unsigned long long)ra->prev_pos >> PAGE_SHIFT;
 	if (offset - prev_offset <= 1UL)
 		goto initial_readahead;
 
@@ -558,8 +558,8 @@
 	if (f.file) {
 		if (f.file->f_mode & FMODE_READ) {
 			struct address_space *mapping = f.file->f_mapping;
-			pgoff_t start = offset >> PAGE_CACHE_SHIFT;
-			pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
+			pgoff_t start = offset >> PAGE_SHIFT;
+			pgoff_t end = (offset + count - 1) >> PAGE_SHIFT;
 			unsigned long len = end - start + 1;
 			ret = do_readahead(mapping, f.file, start, len);
 		}

diff --git a/mm/rmap.c b/mm/rmap.c
index c399a0d..307b555 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c

@@ -569,19 +569,6 @@
 }
 
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
-static void percpu_flush_tlb_batch_pages(void *data)
-{
-	/*
-	 * All TLB entries are flushed on the assumption that it is
-	 * cheaper to flush all TLBs and let them be refilled than
-	 * flushing individual PFNs. Note that we do not track mm's
-	 * to flush as that might simply be multiple full TLB flushes
-	 * for no gain.
-	 */
-	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
-	flush_tlb_local();
-}
-
 /*
  * Flush TLB entries for recently unmapped pages from remote CPUs. It is
  * important if a PTE was dirty when it was unmapped that it's flushed
@@ -598,15 +585,14 @@
 
 	cpu = get_cpu();
 
-	trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, -1UL);
-
-	if (cpumask_test_cpu(cpu, &tlb_ubc->cpumask))
-		percpu_flush_tlb_batch_pages(&tlb_ubc->cpumask);
-
-	if (cpumask_any_but(&tlb_ubc->cpumask, cpu) < nr_cpu_ids) {
-		smp_call_function_many(&tlb_ubc->cpumask,
-			percpu_flush_tlb_batch_pages, (void *)tlb_ubc, true);
+	if (cpumask_test_cpu(cpu, &tlb_ubc->cpumask)) {
+		count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+		local_flush_tlb();
+		trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
 	}
+
+	if (cpumask_any_but(&tlb_ubc->cpumask, cpu) < nr_cpu_ids)
+		flush_tlb_others(&tlb_ubc->cpumask, NULL, 0, TLB_FLUSH_ALL);
 	cpumask_clear(&tlb_ubc->cpumask);
 	tlb_ubc->flush_required = false;
 	tlb_ubc->writable = false;
@@ -1555,7 +1541,7 @@
 
 discard:
 	page_remove_rmap(page, PageHuge(page));
-	page_cache_release(page);
+	put_page(page);
 
 out_unmap:
 	pte_unmap_unlock(pte, ptl);

diff --git a/mm/shmem.c b/mm/shmem.c
index 9428c51..719bd6b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c

@@ -75,8 +75,8 @@
 
 #include "internal.h"
 
-#define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
-#define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
+#define BLOCKS_PER_PAGE  (PAGE_SIZE/512)
+#define VM_ACCT(size)    (PAGE_ALIGN(size) >> PAGE_SHIFT)
 
 /* Pretend that each entry is of this size in directory's i_size */
 #define BOGO_DIRENT_SIZE 20
@@ -176,13 +176,13 @@
 static inline int shmem_acct_block(unsigned long flags)
 {
 	return (flags & VM_NORESERVE) ?
-		security_vm_enough_memory_mm(current->mm, VM_ACCT(PAGE_CACHE_SIZE)) : 0;
+		security_vm_enough_memory_mm(current->mm, VM_ACCT(PAGE_SIZE)) : 0;
 }
 
 static inline void shmem_unacct_blocks(unsigned long flags, long pages)
 {
 	if (flags & VM_NORESERVE)
-		vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
+		vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE));
 }
 
 static const struct super_operations shmem_ops;
@@ -300,7 +300,7 @@
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
 
-	page_cache_get(page);
+	get_page(page);
 	page->mapping = mapping;
 	page->index = index;
 
@@ -318,7 +318,7 @@
 	} else {
 		page->mapping = NULL;
 		spin_unlock_irq(&mapping->tree_lock);
-		page_cache_release(page);
+		put_page(page);
 	}
 	return error;
 }
@@ -338,7 +338,7 @@
 	__dec_zone_page_state(page, NR_FILE_PAGES);
 	__dec_zone_page_state(page, NR_SHMEM);
 	spin_unlock_irq(&mapping->tree_lock);
-	page_cache_release(page);
+	put_page(page);
 	BUG_ON(error);
 }
 
@@ -474,10 +474,10 @@
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct shmem_inode_info *info = SHMEM_I(inode);
-	pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	pgoff_t end = (lend + 1) >> PAGE_CACHE_SHIFT;
-	unsigned int partial_start = lstart & (PAGE_CACHE_SIZE - 1);
-	unsigned int partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);
+	pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	pgoff_t end = (lend + 1) >> PAGE_SHIFT;
+	unsigned int partial_start = lstart & (PAGE_SIZE - 1);
+	unsigned int partial_end = (lend + 1) & (PAGE_SIZE - 1);
 	struct pagevec pvec;
 	pgoff_t indices[PAGEVEC_SIZE];
 	long nr_swaps_freed = 0;
@@ -530,7 +530,7 @@
 		struct page *page = NULL;
 		shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
 		if (page) {
-			unsigned int top = PAGE_CACHE_SIZE;
+			unsigned int top = PAGE_SIZE;
 			if (start > end) {
 				top = partial_end;
 				partial_end = 0;
@@ -538,7 +538,7 @@
 			zero_user_segment(page, partial_start, top);
 			set_page_dirty(page);
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 		}
 	}
 	if (partial_end) {
@@ -548,7 +548,7 @@
 			zero_user_segment(page, 0, partial_end);
 			set_page_dirty(page);
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 		}
 	}
 	if (start >= end)
@@ -833,7 +833,7 @@
 		mem_cgroup_commit_charge(page, memcg, true, false);
 out:
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return error;
 }
 
@@ -1080,7 +1080,7 @@
 	if (!newpage)
 		return -ENOMEM;
 
-	page_cache_get(newpage);
+	get_page(newpage);
 	copy_highpage(newpage, oldpage);
 	flush_dcache_page(newpage);
 
@@ -1120,8 +1120,8 @@
 	set_page_private(oldpage, 0);
 
 	unlock_page(oldpage);
-	page_cache_release(oldpage);
-	page_cache_release(oldpage);
+	put_page(oldpage);
+	put_page(oldpage);
 	return error;
 }
 
@@ -1145,7 +1145,7 @@
 	int once = 0;
 	int alloced = 0;
 
-	if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
+	if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT))
 		return -EFBIG;
 repeat:
 	swap.val = 0;
@@ -1156,7 +1156,7 @@
 	}
 
 	if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
-	    ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
+	    ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
 		error = -EINVAL;
 		goto unlock;
 	}
@@ -1169,7 +1169,7 @@
 		if (sgp != SGP_READ)
 			goto clear;
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		page = NULL;
 	}
 	if (page || (sgp == SGP_READ && !swap.val)) {
@@ -1327,7 +1327,7 @@
 
 	/* Perhaps the file has been truncated since we checked */
 	if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
-	    ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
+	    ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
 		if (alloced) {
 			ClearPageDirty(page);
 			delete_from_page_cache(page);
@@ -1355,7 +1355,7 @@
 unlock:
 	if (page) {
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	if (error == -ENOSPC && !once++) {
 		info = SHMEM_I(inode);
@@ -1577,7 +1577,7 @@
 {
 	struct inode *inode = mapping->host;
 	struct shmem_inode_info *info = SHMEM_I(inode);
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_SHIFT;
 
 	/* i_mutex is held by caller */
 	if (unlikely(info->seals)) {
@@ -1601,16 +1601,16 @@
 		i_size_write(inode, pos + copied);
 
 	if (!PageUptodate(page)) {
-		if (copied < PAGE_CACHE_SIZE) {
-			unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+		if (copied < PAGE_SIZE) {
+			unsigned from = pos & (PAGE_SIZE - 1);
 			zero_user_segments(page, 0, from,
-					from + copied, PAGE_CACHE_SIZE);
+					from + copied, PAGE_SIZE);
 		}
 		SetPageUptodate(page);
 	}
 	set_page_dirty(page);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 
 	return copied;
 }
@@ -1635,8 +1635,8 @@
 	if (!iter_is_iovec(to))
 		sgp = SGP_DIRTY;
 
-	index = *ppos >> PAGE_CACHE_SHIFT;
-	offset = *ppos & ~PAGE_CACHE_MASK;
+	index = *ppos >> PAGE_SHIFT;
+	offset = *ppos & ~PAGE_MASK;
 
 	for (;;) {
 		struct page *page = NULL;
@@ -1644,11 +1644,11 @@
 		unsigned long nr, ret;
 		loff_t i_size = i_size_read(inode);
 
-		end_index = i_size >> PAGE_CACHE_SHIFT;
+		end_index = i_size >> PAGE_SHIFT;
 		if (index > end_index)
 			break;
 		if (index == end_index) {
-			nr = i_size & ~PAGE_CACHE_MASK;
+			nr = i_size & ~PAGE_MASK;
 			if (nr <= offset)
 				break;
 		}
@@ -1666,14 +1666,14 @@
 		 * We must evaluate after, since reads (unlike writes)
 		 * are called without i_mutex protection against truncate
 		 */
-		nr = PAGE_CACHE_SIZE;
+		nr = PAGE_SIZE;
 		i_size = i_size_read(inode);
-		end_index = i_size >> PAGE_CACHE_SHIFT;
+		end_index = i_size >> PAGE_SHIFT;
 		if (index == end_index) {
-			nr = i_size & ~PAGE_CACHE_MASK;
+			nr = i_size & ~PAGE_MASK;
 			if (nr <= offset) {
 				if (page)
-					page_cache_release(page);
+					put_page(page);
 				break;
 			}
 		}
@@ -1694,7 +1694,7 @@
 				mark_page_accessed(page);
 		} else {
 			page = ZERO_PAGE(0);
-			page_cache_get(page);
+			get_page(page);
 		}
 
 		/*
@@ -1704,10 +1704,10 @@
 		ret = copy_page_to_iter(page, offset, nr, to);
 		retval += ret;
 		offset += ret;
-		index += offset >> PAGE_CACHE_SHIFT;
-		offset &= ~PAGE_CACHE_MASK;
+		index += offset >> PAGE_SHIFT;
+		offset &= ~PAGE_MASK;
 
-		page_cache_release(page);
+		put_page(page);
 		if (!iov_iter_count(to))
 			break;
 		if (ret < nr) {
@@ -1717,7 +1717,7 @@
 		cond_resched();
 	}
 
-	*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
+	*ppos = ((loff_t) index << PAGE_SHIFT) + offset;
 	file_accessed(file);
 	return retval ? retval : error;
 }
@@ -1755,9 +1755,9 @@
 	if (splice_grow_spd(pipe, &spd))
 		return -ENOMEM;
 
-	index = *ppos >> PAGE_CACHE_SHIFT;
-	loff = *ppos & ~PAGE_CACHE_MASK;
-	req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	index = *ppos >> PAGE_SHIFT;
+	loff = *ppos & ~PAGE_MASK;
+	req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	nr_pages = min(req_pages, spd.nr_pages_max);
 
 	spd.nr_pages = find_get_pages_contig(mapping, index,
@@ -1774,7 +1774,7 @@
 		index++;
 	}
 
-	index = *ppos >> PAGE_CACHE_SHIFT;
+	index = *ppos >> PAGE_SHIFT;
 	nr_pages = spd.nr_pages;
 	spd.nr_pages = 0;
 
@@ -1784,7 +1784,7 @@
 		if (!len)
 			break;
 
-		this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
+		this_len = min_t(unsigned long, len, PAGE_SIZE - loff);
 		page = spd.pages[page_nr];
 
 		if (!PageUptodate(page) || page->mapping != mapping) {
@@ -1793,19 +1793,19 @@
 			if (error)
 				break;
 			unlock_page(page);
-			page_cache_release(spd.pages[page_nr]);
+			put_page(spd.pages[page_nr]);
 			spd.pages[page_nr] = page;
 		}
 
 		isize = i_size_read(inode);
-		end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+		end_index = (isize - 1) >> PAGE_SHIFT;
 		if (unlikely(!isize || index > end_index))
 			break;
 
 		if (end_index == index) {
 			unsigned int plen;
 
-			plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
+			plen = ((isize - 1) & ~PAGE_MASK) + 1;
 			if (plen <= loff)
 				break;
 
@@ -1822,7 +1822,7 @@
 	}
 
 	while (page_nr < nr_pages)
-		page_cache_release(spd.pages[page_nr++]);
+		put_page(spd.pages[page_nr++]);
 
 	if (spd.nr_pages)
 		error = splice_to_pipe(pipe, &spd);
@@ -1904,10 +1904,10 @@
 	else if (offset >= inode->i_size)
 		offset = -ENXIO;
 	else {
-		start = offset >> PAGE_CACHE_SHIFT;
-		end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+		start = offset >> PAGE_SHIFT;
+		end = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 		new_offset = shmem_seek_hole_data(mapping, start, end, whence);
-		new_offset <<= PAGE_CACHE_SHIFT;
+		new_offset <<= PAGE_SHIFT;
 		if (new_offset > offset) {
 			if (new_offset < inode->i_size)
 				offset = new_offset;
@@ -2203,8 +2203,8 @@
 		goto out;
 	}
 
-	start = offset >> PAGE_CACHE_SHIFT;
-	end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	start = offset >> PAGE_SHIFT;
+	end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	/* Try to avoid a swapstorm if len is impossible to satisfy */
 	if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) {
 		error = -ENOSPC;
@@ -2237,8 +2237,8 @@
 		if (error) {
 			/* Remove the !PageUptodate pages we added */
 			shmem_undo_range(inode,
-				(loff_t)start << PAGE_CACHE_SHIFT,
-				(loff_t)index << PAGE_CACHE_SHIFT, true);
+				(loff_t)start << PAGE_SHIFT,
+				(loff_t)index << PAGE_SHIFT, true);
 			goto undone;
 		}
 
@@ -2259,7 +2259,7 @@
 		 */
 		set_page_dirty(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 		cond_resched();
 	}
 
@@ -2280,7 +2280,7 @@
 	struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
 
 	buf->f_type = TMPFS_MAGIC;
-	buf->f_bsize = PAGE_CACHE_SIZE;
+	buf->f_bsize = PAGE_SIZE;
 	buf->f_namelen = NAME_MAX;
 	if (sbinfo->max_blocks) {
 		buf->f_blocks = sbinfo->max_blocks;
@@ -2523,7 +2523,7 @@
 	struct shmem_inode_info *info;
 
 	len = strlen(symname) + 1;
-	if (len > PAGE_CACHE_SIZE)
+	if (len > PAGE_SIZE)
 		return -ENAMETOOLONG;
 
 	inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE);
@@ -2562,7 +2562,7 @@
 		SetPageUptodate(page);
 		set_page_dirty(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	dir->i_size += BOGO_DIRENT_SIZE;
 	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
@@ -2835,7 +2835,7 @@
 			if (*rest)
 				goto bad_val;
 			sbinfo->max_blocks =
-				DIV_ROUND_UP(size, PAGE_CACHE_SIZE);
+				DIV_ROUND_UP(size, PAGE_SIZE);
 		} else if (!strcmp(this_char,"nr_blocks")) {
 			sbinfo->max_blocks = memparse(value, &rest);
 			if (*rest)
@@ -2940,7 +2940,7 @@
 
 	if (sbinfo->max_blocks != shmem_default_max_blocks())
 		seq_printf(seq, ",size=%luk",
-			sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10));
+			sbinfo->max_blocks << (PAGE_SHIFT - 10));
 	if (sbinfo->max_inodes != shmem_default_max_inodes())
 		seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
 	if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
@@ -3082,8 +3082,8 @@
 	sbinfo->free_inodes = sbinfo->max_inodes;
 
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = TMPFS_MAGIC;
 	sb->s_op = &shmem_ops;
 	sb->s_time_gran = 1;

diff --git a/mm/slab.c b/mm/slab.c
index e719a5c..17e2848 100644
--- a/mm/slab.c
+++ b/mm/slab.c

@@ -2086,6 +2086,8 @@
 	}
 #endif
 
+	kasan_cache_create(cachep, &size, &flags);
+
 	size = ALIGN(size, cachep->align);
 	/*
 	 * We should restrict the number of objects in a slab to implement
@@ -2387,8 +2389,13 @@
 		 * cache which they are a constructor for.  Otherwise, deadlock.
 		 * They must also be threaded.
 		 */
-		if (cachep->ctor && !(cachep->flags & SLAB_POISON))
+		if (cachep->ctor && !(cachep->flags & SLAB_POISON)) {
+			kasan_unpoison_object_data(cachep,
+						   objp + obj_offset(cachep));
 			cachep->ctor(objp + obj_offset(cachep));
+			kasan_poison_object_data(
+				cachep, objp + obj_offset(cachep));
+		}
 
 		if (cachep->flags & SLAB_RED_ZONE) {
 			if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
@@ -2409,6 +2416,7 @@
 			    struct page *page)
 {
 	int i;
+	void *objp;
 
 	cache_init_objs_debug(cachep, page);
 
@@ -2419,8 +2427,12 @@
 
 	for (i = 0; i < cachep->num; i++) {
 		/* constructor could break poison info */
-		if (DEBUG == 0 && cachep->ctor)
-			cachep->ctor(index_to_obj(cachep, page, i));
+		if (DEBUG == 0 && cachep->ctor) {
+			objp = index_to_obj(cachep, page, i);
+			kasan_unpoison_object_data(cachep, objp);
+			cachep->ctor(objp);
+			kasan_poison_object_data(cachep, objp);
+		}
 
 		set_free_obj(page, i, i);
 	}
@@ -2550,6 +2562,7 @@
 
 	slab_map_pages(cachep, page, freelist);
 
+	kasan_poison_slab(page);
 	cache_init_objs(cachep, page);
 
 	if (gfpflags_allow_blocking(local_flags))
@@ -3316,6 +3329,8 @@
 {
 	struct array_cache *ac = cpu_cache_get(cachep);
 
+	kasan_slab_free(cachep, objp);
+
 	check_irq_off();
 	kmemleak_free_recursive(objp, cachep->flags);
 	objp = cache_free_debugcheck(cachep, objp, caller);
@@ -3363,6 +3378,7 @@
 {
 	void *ret = slab_alloc(cachep, flags, _RET_IP_);
 
+	kasan_slab_alloc(cachep, ret, flags);
 	trace_kmem_cache_alloc(_RET_IP_, ret,
 			       cachep->object_size, cachep->size, flags);
 
@@ -3428,6 +3444,7 @@
 
 	ret = slab_alloc(cachep, flags, _RET_IP_);
 
+	kasan_kmalloc(cachep, ret, size, flags);
 	trace_kmalloc(_RET_IP_, ret,
 		      size, cachep->size, flags);
 	return ret;
@@ -3451,6 +3468,7 @@
 {
 	void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
 
+	kasan_slab_alloc(cachep, ret, flags);
 	trace_kmem_cache_alloc_node(_RET_IP_, ret,
 				    cachep->object_size, cachep->size,
 				    flags, nodeid);
@@ -3469,6 +3487,7 @@
 
 	ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
 
+	kasan_kmalloc(cachep, ret, size, flags);
 	trace_kmalloc_node(_RET_IP_, ret,
 			   size, cachep->size,
 			   flags, nodeid);
@@ -3481,11 +3500,15 @@
 __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
 {
 	struct kmem_cache *cachep;
+	void *ret;
 
 	cachep = kmalloc_slab(size, flags);
 	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
 		return cachep;
-	return kmem_cache_alloc_node_trace(cachep, flags, node, size);
+	ret = kmem_cache_alloc_node_trace(cachep, flags, node, size);
+	kasan_kmalloc(cachep, ret, size, flags);
+
+	return ret;
 }
 
 void *__kmalloc_node(size_t size, gfp_t flags, int node)
@@ -3519,6 +3542,7 @@
 		return cachep;
 	ret = slab_alloc(cachep, flags, caller);
 
+	kasan_kmalloc(cachep, ret, size, flags);
 	trace_kmalloc(caller, ret,
 		      size, cachep->size, flags);
 
@@ -4290,10 +4314,18 @@
  */
 size_t ksize(const void *objp)
 {
+	size_t size;
+
 	BUG_ON(!objp);
 	if (unlikely(objp == ZERO_SIZE_PTR))
 		return 0;
 
-	return virt_to_cache(objp)->object_size;
+	size = virt_to_cache(objp)->object_size;
+	/* We assume that ksize callers could use the whole allocated area,
+	 * so we need to unpoison this area.
+	 */
+	kasan_krealloc(objp, size, GFP_NOWAIT);
+
+	return size;
 }
 EXPORT_SYMBOL(ksize);

diff --git a/mm/slab.h b/mm/slab.h
index ff39a8f..5969769 100644
--- a/mm/slab.h
+++ b/mm/slab.h

@@ -405,7 +405,7 @@
 		kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
 		kmemleak_alloc_recursive(object, s->object_size, 1,
 					 s->flags, flags);
-		kasan_slab_alloc(s, object);
+		kasan_slab_alloc(s, object, flags);
 	}
 	memcg_kmem_put_cache(s);
 }

diff --git a/mm/slab_common.c b/mm/slab_common.c
index b2e3796..3239bfd 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c

@@ -35,7 +35,7 @@
  */
 #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
 		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
-		SLAB_FAILSLAB)
+		SLAB_FAILSLAB | SLAB_KASAN)
 
 #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
 			 SLAB_NOTRACK | SLAB_ACCOUNT)
@@ -1013,7 +1013,7 @@
 	page = alloc_kmem_pages(flags, order);
 	ret = page ? page_address(page) : NULL;
 	kmemleak_alloc(ret, size, 1, flags);
-	kasan_kmalloc_large(ret, size);
+	kasan_kmalloc_large(ret, size, flags);
 	return ret;
 }
 EXPORT_SYMBOL(kmalloc_order);
@@ -1192,7 +1192,7 @@
 		ks = ksize(p);
 
 	if (ks >= new_size) {
-		kasan_krealloc((void *)p, new_size);
+		kasan_krealloc((void *)p, new_size, flags);
 		return (void *)p;
 	}
 

diff --git a/mm/slub.c b/mm/slub.c
index 7277413..4dbb109e 100644
--- a/mm/slub.c
+++ b/mm/slub.c

@@ -1313,7 +1313,7 @@
 static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
 {
 	kmemleak_alloc(ptr, size, 1, flags);
-	kasan_kmalloc_large(ptr, size);
+	kasan_kmalloc_large(ptr, size, flags);
 }
 
 static inline void kfree_hook(const void *x)
@@ -2596,7 +2596,7 @@
 {
 	void *ret = slab_alloc(s, gfpflags, _RET_IP_);
 	trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
-	kasan_kmalloc(s, ret, size);
+	kasan_kmalloc(s, ret, size, gfpflags);
 	return ret;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_trace);
@@ -2624,7 +2624,7 @@
 	trace_kmalloc_node(_RET_IP_, ret,
 			   size, s->size, gfpflags, node);
 
-	kasan_kmalloc(s, ret, size);
+	kasan_kmalloc(s, ret, size, gfpflags);
 	return ret;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
@@ -3182,7 +3182,8 @@
 	init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
 	init_tracking(kmem_cache_node, n);
 #endif
-	kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node));
+	kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
+		      GFP_KERNEL);
 	init_kmem_cache_node(n);
 	inc_slabs_node(kmem_cache_node, node, page->objects);
 
@@ -3561,7 +3562,7 @@
 
 	trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
 
-	kasan_kmalloc(s, ret, size);
+	kasan_kmalloc(s, ret, size, flags);
 
 	return ret;
 }
@@ -3606,7 +3607,7 @@
 
 	trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
 
-	kasan_kmalloc(s, ret, size);
+	kasan_kmalloc(s, ret, size, flags);
 
 	return ret;
 }
@@ -3635,7 +3636,7 @@
 	size_t size = __ksize(object);
 	/* We assume that ksize callers could use whole allocated area,
 	   so we need unpoison this area. */
-	kasan_krealloc(object, size);
+	kasan_krealloc(object, size, GFP_NOWAIT);
 	return size;
 }
 EXPORT_SYMBOL(ksize);

diff --git a/mm/swap.c b/mm/swap.c
index 09fe5e9..a0bc206 100644
--- a/mm/swap.c
+++ b/mm/swap.c

@@ -114,7 +114,7 @@
 
 		victim = list_entry(pages->prev, struct page, lru);
 		list_del(&victim->lru);
-		page_cache_release(victim);
+		put_page(victim);
 	}
 }
 EXPORT_SYMBOL(put_pages_list);
@@ -142,7 +142,7 @@
 			return seg;
 
 		pages[seg] = kmap_to_page(kiov[seg].iov_base);
-		page_cache_get(pages[seg]);
+		get_page(pages[seg]);
 	}
 
 	return seg;
@@ -236,7 +236,7 @@
 		struct pagevec *pvec;
 		unsigned long flags;
 
-		page_cache_get(page);
+		get_page(page);
 		local_irq_save(flags);
 		pvec = this_cpu_ptr(&lru_rotate_pvecs);
 		if (!pagevec_add(pvec, page))
@@ -294,7 +294,7 @@
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
 		struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
 
-		page_cache_get(page);
+		get_page(page);
 		if (!pagevec_add(pvec, page))
 			pagevec_lru_move_fn(pvec, __activate_page, NULL);
 		put_cpu_var(activate_page_pvecs);
@@ -389,7 +389,7 @@
 {
 	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
 
-	page_cache_get(page);
+	get_page(page);
 	if (!pagevec_space(pvec))
 		__pagevec_lru_add(pvec);
 	pagevec_add(pvec, page);
@@ -646,7 +646,7 @@
 	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
 		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
 
-		page_cache_get(page);
+		get_page(page);
 		if (!pagevec_add(pvec, page))
 			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
 		put_cpu_var(lru_deactivate_pvecs);
@@ -698,7 +698,7 @@
 }
 
 /**
- * release_pages - batched page_cache_release()
+ * release_pages - batched put_page()
  * @pages: array of pages to release
  * @nr: number of pages
  * @cold: whether the pages are cache cold

diff --git a/mm/swap_state.c b/mm/swap_state.c
index 69cb246..366ce35 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c

@@ -85,7 +85,7 @@
 	VM_BUG_ON_PAGE(PageSwapCache(page), page);
 	VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
 
-	page_cache_get(page);
+	get_page(page);
 	SetPageSwapCache(page);
 	set_page_private(page, entry.val);
 
@@ -109,7 +109,7 @@
 		VM_BUG_ON(error == -EEXIST);
 		set_page_private(page, 0UL);
 		ClearPageSwapCache(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	return error;
@@ -226,7 +226,7 @@
 	spin_unlock_irq(&address_space->tree_lock);
 
 	swapcache_free(entry);
-	page_cache_release(page);
+	put_page(page);
 }
 
 /* 
@@ -252,7 +252,7 @@
 void free_page_and_swap_cache(struct page *page)
 {
 	free_swap_cache(page);
-	page_cache_release(page);
+	put_page(page);
 }
 
 /*
@@ -380,7 +380,7 @@
 	} while (err != -ENOMEM);
 
 	if (new_page)
-		page_cache_release(new_page);
+		put_page(new_page);
 	return found_page;
 }
 
@@ -495,7 +495,7 @@
 			continue;
 		if (offset != entry_offset)
 			SetPageReadahead(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	blk_finish_plug(&plug);
 

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 560ad38..83874ec 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c

@@ -119,7 +119,7 @@
 		ret = try_to_free_swap(page);
 		unlock_page(page);
 	}
-	page_cache_release(page);
+	put_page(page);
 	return ret;
 }
 
@@ -1000,7 +1000,7 @@
 			page = find_get_page(swap_address_space(entry),
 						entry.val);
 			if (page && !trylock_page(page)) {
-				page_cache_release(page);
+				put_page(page);
 				page = NULL;
 			}
 		}
@@ -1017,7 +1017,7 @@
 			SetPageDirty(page);
 		}
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	return p != NULL;
 }
@@ -1518,7 +1518,7 @@
 		}
 		if (retval) {
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 			break;
 		}
 
@@ -1570,7 +1570,7 @@
 		 */
 		SetPageDirty(page);
 		unlock_page(page);
-		page_cache_release(page);
+		put_page(page);
 
 		/*
 		 * Make sure that we aren't completely killing
@@ -2574,7 +2574,7 @@
 out:
 	if (page && !IS_ERR(page)) {
 		kunmap(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	if (name)
 		putname(name);

diff --git a/mm/truncate.c b/mm/truncate.c
index 7598b55..b002728 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c

@@ -118,7 +118,7 @@
 		return -EIO;
 
 	if (page_has_private(page))
-		do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+		do_invalidatepage(page, 0, PAGE_SIZE);
 
 	/*
 	 * Some filesystems seem to re-dirty the page even after
@@ -159,8 +159,8 @@
 {
 	if (page_mapped(page)) {
 		unmap_mapping_range(mapping,
-				   (loff_t)page->index << PAGE_CACHE_SHIFT,
-				   PAGE_CACHE_SIZE, 0);
+				   (loff_t)page->index << PAGE_SHIFT,
+				   PAGE_SIZE, 0);
 	}
 	return truncate_complete_page(mapping, page);
 }
@@ -241,8 +241,8 @@
 		return;
 
 	/* Offsets within partial pages */
-	partial_start = lstart & (PAGE_CACHE_SIZE - 1);
-	partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);
+	partial_start = lstart & (PAGE_SIZE - 1);
+	partial_end = (lend + 1) & (PAGE_SIZE - 1);
 
 	/*
 	 * 'start' and 'end' always covers the range of pages to be fully
@@ -250,7 +250,7 @@
 	 * start of the range and 'partial_end' at the end of the range.
 	 * Note that 'end' is exclusive while 'lend' is inclusive.
 	 */
-	start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	if (lend == -1)
 		/*
 		 * lend == -1 indicates end-of-file so we have to set 'end'
@@ -259,7 +259,7 @@
 		 */
 		end = -1;
 	else
-		end = (lend + 1) >> PAGE_CACHE_SHIFT;
+		end = (lend + 1) >> PAGE_SHIFT;
 
 	pagevec_init(&pvec, 0);
 	index = start;
@@ -298,7 +298,7 @@
 	if (partial_start) {
 		struct page *page = find_lock_page(mapping, start - 1);
 		if (page) {
-			unsigned int top = PAGE_CACHE_SIZE;
+			unsigned int top = PAGE_SIZE;
 			if (start > end) {
 				/* Truncation within a single page */
 				top = partial_end;
@@ -311,7 +311,7 @@
 				do_invalidatepage(page, partial_start,
 						  top - partial_start);
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 		}
 	}
 	if (partial_end) {
@@ -324,7 +324,7 @@
 				do_invalidatepage(page, 0,
 						  partial_end);
 			unlock_page(page);
-			page_cache_release(page);
+			put_page(page);
 		}
 	}
 	/*
@@ -538,7 +538,7 @@
 	if (mapping->a_ops->freepage)
 		mapping->a_ops->freepage(page);
 
-	page_cache_release(page);	/* pagecache ref */
+	put_page(page);	/* pagecache ref */
 	return 1;
 failed:
 	spin_unlock_irqrestore(&mapping->tree_lock, flags);
@@ -608,18 +608,18 @@
 					 * Zap the rest of the file in one hit.
 					 */
 					unmap_mapping_range(mapping,
-					   (loff_t)index << PAGE_CACHE_SHIFT,
+					   (loff_t)index << PAGE_SHIFT,
 					   (loff_t)(1 + end - index)
-							 << PAGE_CACHE_SHIFT,
-					    0);
+							 << PAGE_SHIFT,
+							 0);
 					did_range_unmap = 1;
 				} else {
 					/*
 					 * Just zap this page
 					 */
 					unmap_mapping_range(mapping,
-					   (loff_t)index << PAGE_CACHE_SHIFT,
-					   PAGE_CACHE_SIZE, 0);
+					   (loff_t)index << PAGE_SHIFT,
+					   PAGE_SIZE, 0);
 				}
 			}
 			BUG_ON(page_mapped(page));
@@ -744,14 +744,14 @@
 
 	WARN_ON(to > inode->i_size);
 
-	if (from >= to || bsize == PAGE_CACHE_SIZE)
+	if (from >= to || bsize == PAGE_SIZE)
 		return;
 	/* Page straddling @from will not have any hole block created? */
 	rounded_from = round_up(from, bsize);
-	if (to <= rounded_from || !(rounded_from & (PAGE_CACHE_SIZE - 1)))
+	if (to <= rounded_from || !(rounded_from & (PAGE_SIZE - 1)))
 		return;
 
-	index = from >> PAGE_CACHE_SHIFT;
+	index = from >> PAGE_SHIFT;
 	page = find_lock_page(inode->i_mapping, index);
 	/* Page not cached? Nothing to do */
 	if (!page)
@@ -763,7 +763,7 @@
 	if (page_mkclean(page))
 		set_page_dirty(page);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 }
 EXPORT_SYMBOL(pagecache_isize_extended);
 

diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 9f3a029..af817e5 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c

@@ -93,7 +93,7 @@
 	pte_unmap_unlock(dst_pte, ptl);
 	mem_cgroup_cancel_charge(page, memcg, false);
 out_release:
-	page_cache_release(page);
+	put_page(page);
 	goto out;
 }
 
@@ -287,7 +287,7 @@
 	up_read(&dst_mm->mmap_sem);
 out:
 	if (page)
-		page_cache_release(page);
+		put_page(page);
 	BUG_ON(copied < 0);
 	BUG_ON(err > 0);
 	BUG_ON(!copied && !err);

diff --git a/mm/zswap.c b/mm/zswap.c
index bf14508..91dad80 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c

@@ -869,7 +869,7 @@
 
 	case ZSWAP_SWAPCACHE_EXIST:
 		/* page is already in the swap cache, ignore for now */
-		page_cache_release(page);
+		put_page(page);
 		ret = -EEXIST;
 		goto fail;
 
@@ -897,7 +897,7 @@
 
 	/* start writeback */
 	__swap_writepage(page, &wbc, end_swap_bio_write);
-	page_cache_release(page);
+	put_page(page);
 	zswap_written_back_pages++;
 
 	spin_lock(&tree->lock);

diff --git a/net/Kconfig b/net/Kconfig
index e134498..a8934d8 100644
--- a/net/Kconfig
+++ b/net/Kconfig

@@ -397,7 +397,7 @@
 	  with light weight tunnel state associated with fib routes.
 
 config DST_CACHE
-	bool "dst cache"
+	bool
 	default n
 
 config NET_DEVLINK

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index a73df33..8217aec 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c

@@ -437,6 +437,20 @@
 	return mtu;
 }
 
+static void br_set_gso_limits(struct net_bridge *br)
+{
+	unsigned int gso_max_size = GSO_MAX_SIZE;
+	u16 gso_max_segs = GSO_MAX_SEGS;
+	const struct net_bridge_port *p;
+
+	list_for_each_entry(p, &br->port_list, list) {
+		gso_max_size = min(gso_max_size, p->dev->gso_max_size);
+		gso_max_segs = min(gso_max_segs, p->dev->gso_max_segs);
+	}
+	br->dev->gso_max_size = gso_max_size;
+	br->dev->gso_max_segs = gso_max_segs;
+}
+
 /*
  * Recomputes features using slave's features
  */
@@ -564,6 +578,7 @@
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
 
 	dev_set_mtu(br->dev, br_min_mtu(br));
+	br_set_gso_limits(br);
 
 	kobject_uevent(&p->kobj, KOBJ_ADD);
 
@@ -610,6 +625,7 @@
 	del_nbp(p);
 
 	dev_set_mtu(br->dev, br_min_mtu(br));
+	br_set_gso_limits(br);
 
 	spin_lock_bh(&br->lock);
 	changed_addr = br_stp_recalculate_bridge_id(br);

diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index e234490..9cb7044 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c

@@ -582,7 +582,7 @@
 	int err;
 
 	err = switchdev_port_attr_set(br->dev, &attr);
-	if (err)
+	if (err && err != -EOPNOTSUPP)
 		return err;
 
 	br->ageing_time = t;

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 67b2e27..8570bc7 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c

@@ -1521,6 +1521,8 @@
 	if (copy_from_user(&tmp, user, sizeof(tmp)))
 		return -EFAULT;
 
+	tmp.name[sizeof(tmp.name) - 1] = '\0';
+
 	t = find_table_lock(net, tmp.name, &ret, &ebt_mutex);
 	if (!t)
 		return ret;
@@ -2332,6 +2334,8 @@
 	if (copy_from_user(&tmp, user, sizeof(tmp)))
 		return -EFAULT;
 
+	tmp.name[sizeof(tmp.name) - 1] = '\0';
+
 	t = find_table_lock(net, tmp.name, &ret, &ebt_mutex);
 	if (!t)
 		return ret;

diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index adc8d72..77f7e7a 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c

@@ -40,7 +40,8 @@
 /* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT)
  * or the bridge port (NF_BRIDGE PREROUTING).
  */
-static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb,
+static void nft_reject_br_send_v4_tcp_reset(struct net *net,
+					    struct sk_buff *oldskb,
 					    const struct net_device *dev,
 					    int hook)
 {
@@ -48,7 +49,6 @@
 	struct iphdr *niph;
 	const struct tcphdr *oth;
 	struct tcphdr _oth;
-	struct net *net = sock_net(oldskb->sk);
 
 	if (!nft_bridge_iphdr_validate(oldskb))
 		return;
@@ -75,7 +75,8 @@
 	br_deliver(br_port_get_rcu(dev), nskb);
 }
 
-static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb,
+static void nft_reject_br_send_v4_unreach(struct net *net,
+					  struct sk_buff *oldskb,
 					  const struct net_device *dev,
 					  int hook, u8 code)
 {
@@ -86,7 +87,6 @@
 	void *payload;
 	__wsum csum;
 	u8 proto;
-	struct net *net = sock_net(oldskb->sk);
 
 	if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb))
 		return;
@@ -273,17 +273,17 @@
 	case htons(ETH_P_IP):
 		switch (priv->type) {
 		case NFT_REJECT_ICMP_UNREACH:
-			nft_reject_br_send_v4_unreach(pkt->skb, pkt->in,
-						      pkt->hook,
+			nft_reject_br_send_v4_unreach(pkt->net, pkt->skb,
+						      pkt->in, pkt->hook,
 						      priv->icmp_code);
 			break;
 		case NFT_REJECT_TCP_RST:
-			nft_reject_br_send_v4_tcp_reset(pkt->skb, pkt->in,
-							pkt->hook);
+			nft_reject_br_send_v4_tcp_reset(pkt->net, pkt->skb,
+							pkt->in, pkt->hook);
 			break;
 		case NFT_REJECT_ICMPX_UNREACH:
-			nft_reject_br_send_v4_unreach(pkt->skb, pkt->in,
-						      pkt->hook,
+			nft_reject_br_send_v4_unreach(pkt->net, pkt->skb,
+						      pkt->in, pkt->hook,
 						      nft_reject_icmp_code(priv->icmp_code));
 			break;
 		}

diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index bcbec33..dcc18c6 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c

@@ -361,7 +361,6 @@
 	opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
 	opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
 	opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
-	opt->monc_ping_timeout = CEPH_MONC_PING_TIMEOUT_DEFAULT;
 
 	/* get mon ip(s) */
 	/* ip1[:port1][,ip2[:port2]...] */
@@ -686,6 +685,9 @@
 			return client->auth_err;
 	}
 
+	pr_info("client%llu fsid %pU\n", ceph_client_id(client), &client->fsid);
+	ceph_debugfs_client_init(client);
+
 	return 0;
 }
 EXPORT_SYMBOL(__ceph_open_session);

diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 593dc2e..b902fbc 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c

@@ -112,15 +112,20 @@
 	struct ceph_mon_generic_request *req;
 	struct ceph_mon_client *monc = &client->monc;
 	struct rb_node *rp;
+	int i;
 
 	mutex_lock(&monc->mutex);
 
-	if (monc->have_mdsmap)
-		seq_printf(s, "have mdsmap %u\n", (unsigned int)monc->have_mdsmap);
-	if (monc->have_osdmap)
-		seq_printf(s, "have osdmap %u\n", (unsigned int)monc->have_osdmap);
-	if (monc->want_next_osdmap)
-		seq_printf(s, "want next osdmap\n");
+	for (i = 0; i < ARRAY_SIZE(monc->subs); i++) {
+		seq_printf(s, "have %s %u", ceph_sub_str[i],
+			   monc->subs[i].have);
+		if (monc->subs[i].want)
+			seq_printf(s, " want %llu%s",
+				   le64_to_cpu(monc->subs[i].item.start),
+				   (monc->subs[i].item.flags &
+					CEPH_SUBSCRIBE_ONETIME ?  "" : "+"));
+		seq_putc(s, '\n');
+	}
 
 	for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
 		__u16 op;

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 9382619..a550289 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c

@@ -235,18 +235,12 @@
 static int ceph_msgr_slab_init(void)
 {
 	BUG_ON(ceph_msg_cache);
-	ceph_msg_cache = kmem_cache_create("ceph_msg",
-					sizeof (struct ceph_msg),
-					__alignof__(struct ceph_msg), 0, NULL);
-
+	ceph_msg_cache = KMEM_CACHE(ceph_msg, 0);
 	if (!ceph_msg_cache)
 		return -ENOMEM;
 
 	BUG_ON(ceph_msg_data_cache);
-	ceph_msg_data_cache = kmem_cache_create("ceph_msg_data",
-					sizeof (struct ceph_msg_data),
-					__alignof__(struct ceph_msg_data),
-					0, NULL);
+	ceph_msg_data_cache = KMEM_CACHE(ceph_msg_data, 0);
 	if (ceph_msg_data_cache)
 		return 0;
 
@@ -275,7 +269,7 @@
 	}
 
 	BUG_ON(zero_page == NULL);
-	page_cache_release(zero_page);
+	put_page(zero_page);
 	zero_page = NULL;
 
 	ceph_msgr_slab_exit();
@@ -288,7 +282,7 @@
 
 	BUG_ON(zero_page != NULL);
 	zero_page = ZERO_PAGE(0);
-	page_cache_get(zero_page);
+	get_page(zero_page);
 
 	/*
 	 * The number of active work items is limited by the number of
@@ -1221,25 +1215,19 @@
 static void prepare_write_message_footer(struct ceph_connection *con)
 {
 	struct ceph_msg *m = con->out_msg;
-	int v = con->out_kvec_left;
 
 	m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
 
 	dout("prepare_write_message_footer %p\n", con);
-	con->out_kvec[v].iov_base = &m->footer;
+	con_out_kvec_add(con, sizeof_footer(con), &m->footer);
 	if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
 		if (con->ops->sign_message)
 			con->ops->sign_message(m);
 		else
 			m->footer.sig = 0;
-		con->out_kvec[v].iov_len = sizeof(m->footer);
-		con->out_kvec_bytes += sizeof(m->footer);
 	} else {
 		m->old_footer.flags = m->footer.flags;
-		con->out_kvec[v].iov_len = sizeof(m->old_footer);
-		con->out_kvec_bytes += sizeof(m->old_footer);
 	}
-	con->out_kvec_left++;
 	con->out_more = m->more_to_follow;
 	con->out_msg_done = true;
 }
@@ -1614,7 +1602,7 @@
 
 	dout("%s %p %d left\n", __func__, con, con->out_skip);
 	while (con->out_skip > 0) {
-		size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE);
+		size_t size = min(con->out_skip, (int) PAGE_SIZE);
 
 		ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true);
 		if (ret <= 0)
@@ -2409,11 +2397,7 @@
 	}
 
 	/* footer */
-	if (need_sign)
-		size = sizeof(m->footer);
-	else
-		size = sizeof(m->old_footer);
-
+	size = sizeof_footer(con);
 	end += size;
 	ret = read_partial(con, end, size, &m->footer);
 	if (ret <= 0)
@@ -3089,10 +3073,7 @@
 			con->out_skip += con_out_kvec_skip(con);
 		} else {
 			BUG_ON(!msg->data_length);
-			if (con->peer_features & CEPH_FEATURE_MSG_AUTH)
-				con->out_skip += sizeof(msg->footer);
-			else
-				con->out_skip += sizeof(msg->old_footer);
+			con->out_skip += sizeof_footer(con);
 		}
 		/* data, middle, front */
 		if (msg->data_length)

diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index de85ddd..cf638c0 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c

@@ -122,51 +122,91 @@
 	ceph_msg_revoke(monc->m_subscribe);
 	ceph_msg_revoke_incoming(monc->m_subscribe_ack);
 	ceph_con_close(&monc->con);
-	monc->cur_mon = -1;
+
 	monc->pending_auth = 0;
 	ceph_auth_reset(monc->auth);
 }
 
 /*
- * Open a session with a (new) monitor.
+ * Pick a new monitor at random and set cur_mon.  If we are repicking
+ * (i.e. cur_mon is already set), be sure to pick a different one.
  */
-static int __open_session(struct ceph_mon_client *monc)
+static void pick_new_mon(struct ceph_mon_client *monc)
 {
-	char r;
-	int ret;
+	int old_mon = monc->cur_mon;
 
-	if (monc->cur_mon < 0) {
-		get_random_bytes(&r, 1);
-		monc->cur_mon = r % monc->monmap->num_mon;
-		dout("open_session num=%d r=%d -> mon%d\n",
-		     monc->monmap->num_mon, r, monc->cur_mon);
-		monc->sub_sent = 0;
-		monc->sub_renew_after = jiffies;  /* i.e., expired */
-		monc->want_next_osdmap = !!monc->want_next_osdmap;
+	BUG_ON(monc->monmap->num_mon < 1);
 
-		dout("open_session mon%d opening\n", monc->cur_mon);
-		ceph_con_open(&monc->con,
-			      CEPH_ENTITY_TYPE_MON, monc->cur_mon,
-			      &monc->monmap->mon_inst[monc->cur_mon].addr);
-
-		/* send an initial keepalive to ensure our timestamp is
-		 * valid by the time we are in an OPENED state */
-		ceph_con_keepalive(&monc->con);
-
-		/* initiatiate authentication handshake */
-		ret = ceph_auth_build_hello(monc->auth,
-					    monc->m_auth->front.iov_base,
-					    monc->m_auth->front_alloc_len);
-		__send_prepared_auth_request(monc, ret);
+	if (monc->monmap->num_mon == 1) {
+		monc->cur_mon = 0;
 	} else {
-		dout("open_session mon%d already open\n", monc->cur_mon);
+		int max = monc->monmap->num_mon;
+		int o = -1;
+		int n;
+
+		if (monc->cur_mon >= 0) {
+			if (monc->cur_mon < monc->monmap->num_mon)
+				o = monc->cur_mon;
+			if (o >= 0)
+				max--;
+		}
+
+		n = prandom_u32() % max;
+		if (o >= 0 && n >= o)
+			n++;
+
+		monc->cur_mon = n;
 	}
-	return 0;
+
+	dout("%s mon%d -> mon%d out of %d mons\n", __func__, old_mon,
+	     monc->cur_mon, monc->monmap->num_mon);
 }
 
-static bool __sub_expired(struct ceph_mon_client *monc)
+/*
+ * Open a session with a new monitor.
+ */
+static void __open_session(struct ceph_mon_client *monc)
 {
-	return time_after_eq(jiffies, monc->sub_renew_after);
+	int ret;
+
+	pick_new_mon(monc);
+
+	monc->hunting = true;
+	if (monc->had_a_connection) {
+		monc->hunt_mult *= CEPH_MONC_HUNT_BACKOFF;
+		if (monc->hunt_mult > CEPH_MONC_HUNT_MAX_MULT)
+			monc->hunt_mult = CEPH_MONC_HUNT_MAX_MULT;
+	}
+
+	monc->sub_renew_after = jiffies; /* i.e., expired */
+	monc->sub_renew_sent = 0;
+
+	dout("%s opening mon%d\n", __func__, monc->cur_mon);
+	ceph_con_open(&monc->con, CEPH_ENTITY_TYPE_MON, monc->cur_mon,
+		      &monc->monmap->mon_inst[monc->cur_mon].addr);
+
+	/*
+	 * send an initial keepalive to ensure our timestamp is valid
+	 * by the time we are in an OPENED state
+	 */
+	ceph_con_keepalive(&monc->con);
+
+	/* initiate authentication handshake */
+	ret = ceph_auth_build_hello(monc->auth,
+				    monc->m_auth->front.iov_base,
+				    monc->m_auth->front_alloc_len);
+	BUG_ON(ret <= 0);
+	__send_prepared_auth_request(monc, ret);
+}
+
+static void reopen_session(struct ceph_mon_client *monc)
+{
+	if (!monc->hunting)
+		pr_info("mon%d %s session lost, hunting for new mon\n",
+		    monc->cur_mon, ceph_pr_addr(&monc->con.peer_addr.in_addr));
+
+	__close_session(monc);
+	__open_session(monc);
 }
 
 /*
@@ -174,74 +214,70 @@
  */
 static void __schedule_delayed(struct ceph_mon_client *monc)
 {
-	struct ceph_options *opt = monc->client->options;
 	unsigned long delay;
 
-	if (monc->cur_mon < 0 || __sub_expired(monc)) {
-		delay = 10 * HZ;
-	} else {
-		delay = 20 * HZ;
-		if (opt->monc_ping_timeout > 0)
-			delay = min(delay, opt->monc_ping_timeout / 3);
-	}
+	if (monc->hunting)
+		delay = CEPH_MONC_HUNT_INTERVAL * monc->hunt_mult;
+	else
+		delay = CEPH_MONC_PING_INTERVAL;
+
 	dout("__schedule_delayed after %lu\n", delay);
-	schedule_delayed_work(&monc->delayed_work,
-			      round_jiffies_relative(delay));
+	mod_delayed_work(system_wq, &monc->delayed_work,
+			 round_jiffies_relative(delay));
 }
 
+const char *ceph_sub_str[] = {
+	[CEPH_SUB_MDSMAP] = "mdsmap",
+	[CEPH_SUB_MONMAP] = "monmap",
+	[CEPH_SUB_OSDMAP] = "osdmap",
+};
+
 /*
- * Send subscribe request for mdsmap and/or osdmap.
+ * Send subscribe request for one or more maps, according to
+ * monc->subs.
  */
 static void __send_subscribe(struct ceph_mon_client *monc)
 {
-	dout("__send_subscribe sub_sent=%u exp=%u want_osd=%d\n",
-	     (unsigned int)monc->sub_sent, __sub_expired(monc),
-	     monc->want_next_osdmap);
-	if ((__sub_expired(monc) && !monc->sub_sent) ||
-	    monc->want_next_osdmap == 1) {
-		struct ceph_msg *msg = monc->m_subscribe;
-		struct ceph_mon_subscribe_item *i;
-		void *p, *end;
-		int num;
+	struct ceph_msg *msg = monc->m_subscribe;
+	void *p = msg->front.iov_base;
+	void *const end = p + msg->front_alloc_len;
+	int num = 0;
+	int i;
 
-		p = msg->front.iov_base;
-		end = p + msg->front_alloc_len;
+	dout("%s sent %lu\n", __func__, monc->sub_renew_sent);
 
-		num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap;
-		ceph_encode_32(&p, num);
+	BUG_ON(monc->cur_mon < 0);
 
-		if (monc->want_next_osdmap) {
-			dout("__send_subscribe to 'osdmap' %u\n",
-			     (unsigned int)monc->have_osdmap);
-			ceph_encode_string(&p, end, "osdmap", 6);
-			i = p;
-			i->have = cpu_to_le64(monc->have_osdmap);
-			i->onetime = 1;
-			p += sizeof(*i);
-			monc->want_next_osdmap = 2;  /* requested */
-		}
-		if (monc->want_mdsmap) {
-			dout("__send_subscribe to 'mdsmap' %u+\n",
-			     (unsigned int)monc->have_mdsmap);
-			ceph_encode_string(&p, end, "mdsmap", 6);
-			i = p;
-			i->have = cpu_to_le64(monc->have_mdsmap);
-			i->onetime = 0;
-			p += sizeof(*i);
-		}
-		ceph_encode_string(&p, end, "monmap", 6);
-		i = p;
-		i->have = 0;
-		i->onetime = 0;
-		p += sizeof(*i);
+	if (!monc->sub_renew_sent)
+		monc->sub_renew_sent = jiffies | 1; /* never 0 */
 
-		msg->front.iov_len = p - msg->front.iov_base;
-		msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
-		ceph_msg_revoke(msg);
-		ceph_con_send(&monc->con, ceph_msg_get(msg));
+	msg->hdr.version = cpu_to_le16(2);
 
-		monc->sub_sent = jiffies | 1;  /* never 0 */
+	for (i = 0; i < ARRAY_SIZE(monc->subs); i++) {
+		if (monc->subs[i].want)
+			num++;
 	}
+	BUG_ON(num < 1); /* monmap sub is always there */
+	ceph_encode_32(&p, num);
+	for (i = 0; i < ARRAY_SIZE(monc->subs); i++) {
+		const char *s = ceph_sub_str[i];
+
+		if (!monc->subs[i].want)
+			continue;
+
+		dout("%s %s start %llu flags 0x%x\n", __func__, s,
+		     le64_to_cpu(monc->subs[i].item.start),
+		     monc->subs[i].item.flags);
+		ceph_encode_string(&p, end, s, strlen(s));
+		memcpy(p, &monc->subs[i].item, sizeof(monc->subs[i].item));
+		p += sizeof(monc->subs[i].item);
+	}
+
+	BUG_ON(p != (end - 35 - (ARRAY_SIZE(monc->subs) - num) * 19));
+	msg->front.iov_len = p - msg->front.iov_base;
+	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+	ceph_msg_revoke(msg);
+	ceph_con_send(&monc->con, ceph_msg_get(msg));
 }
 
 static void handle_subscribe_ack(struct ceph_mon_client *monc,
@@ -255,15 +291,16 @@
 	seconds = le32_to_cpu(h->duration);
 
 	mutex_lock(&monc->mutex);
-	if (monc->hunting) {
-		pr_info("mon%d %s session established\n",
-			monc->cur_mon,
-			ceph_pr_addr(&monc->con.peer_addr.in_addr));
-		monc->hunting = false;
+	if (monc->sub_renew_sent) {
+		monc->sub_renew_after = monc->sub_renew_sent +
+					    (seconds >> 1) * HZ - 1;
+		dout("%s sent %lu duration %d renew after %lu\n", __func__,
+		     monc->sub_renew_sent, seconds, monc->sub_renew_after);
+		monc->sub_renew_sent = 0;
+	} else {
+		dout("%s sent %lu renew after %lu, ignoring\n", __func__,
+		     monc->sub_renew_sent, monc->sub_renew_after);
 	}
-	dout("handle_subscribe_ack after %d seconds\n", seconds);
-	monc->sub_renew_after = monc->sub_sent + (seconds >> 1)*HZ - 1;
-	monc->sub_sent = 0;
 	mutex_unlock(&monc->mutex);
 	return;
 bad:
@@ -272,36 +309,82 @@
 }
 
 /*
- * Keep track of which maps we have
+ * Register interest in a map
+ *
+ * @sub: one of CEPH_SUB_*
+ * @epoch: X for "every map since X", or 0 for "just the latest"
  */
-int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got)
+static bool __ceph_monc_want_map(struct ceph_mon_client *monc, int sub,
+				 u32 epoch, bool continuous)
 {
-	mutex_lock(&monc->mutex);
-	monc->have_mdsmap = got;
-	mutex_unlock(&monc->mutex);
-	return 0;
-}
-EXPORT_SYMBOL(ceph_monc_got_mdsmap);
+	__le64 start = cpu_to_le64(epoch);
+	u8 flags = !continuous ? CEPH_SUBSCRIBE_ONETIME : 0;
 
-int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got)
+	dout("%s %s epoch %u continuous %d\n", __func__, ceph_sub_str[sub],
+	     epoch, continuous);
+
+	if (monc->subs[sub].want &&
+	    monc->subs[sub].item.start == start &&
+	    monc->subs[sub].item.flags == flags)
+		return false;
+
+	monc->subs[sub].item.start = start;
+	monc->subs[sub].item.flags = flags;
+	monc->subs[sub].want = true;
+
+	return true;
+}
+
+bool ceph_monc_want_map(struct ceph_mon_client *monc, int sub, u32 epoch,
+			bool continuous)
+{
+	bool need_request;
+
+	mutex_lock(&monc->mutex);
+	need_request = __ceph_monc_want_map(monc, sub, epoch, continuous);
+	mutex_unlock(&monc->mutex);
+
+	return need_request;
+}
+EXPORT_SYMBOL(ceph_monc_want_map);
+
+/*
+ * Keep track of which maps we have
+ *
+ * @sub: one of CEPH_SUB_*
+ */
+static void __ceph_monc_got_map(struct ceph_mon_client *monc, int sub,
+				u32 epoch)
+{
+	dout("%s %s epoch %u\n", __func__, ceph_sub_str[sub], epoch);
+
+	if (monc->subs[sub].want) {
+		if (monc->subs[sub].item.flags & CEPH_SUBSCRIBE_ONETIME)
+			monc->subs[sub].want = false;
+		else
+			monc->subs[sub].item.start = cpu_to_le64(epoch + 1);
+	}
+
+	monc->subs[sub].have = epoch;
+}
+
+void ceph_monc_got_map(struct ceph_mon_client *monc, int sub, u32 epoch)
 {
 	mutex_lock(&monc->mutex);
-	monc->have_osdmap = got;
-	monc->want_next_osdmap = 0;
+	__ceph_monc_got_map(monc, sub, epoch);
 	mutex_unlock(&monc->mutex);
-	return 0;
 }
+EXPORT_SYMBOL(ceph_monc_got_map);
 
 /*
  * Register interest in the next osdmap
  */
 void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
 {
-	dout("request_next_osdmap have %u\n", monc->have_osdmap);
+	dout("%s have %u\n", __func__, monc->subs[CEPH_SUB_OSDMAP].have);
 	mutex_lock(&monc->mutex);
-	if (!monc->want_next_osdmap)
-		monc->want_next_osdmap = 1;
-	if (monc->want_next_osdmap < 2)
+	if (__ceph_monc_want_map(monc, CEPH_SUB_OSDMAP,
+				 monc->subs[CEPH_SUB_OSDMAP].have + 1, false))
 		__send_subscribe(monc);
 	mutex_unlock(&monc->mutex);
 }
@@ -320,15 +403,15 @@
 	long ret;
 
 	mutex_lock(&monc->mutex);
-	while (monc->have_osdmap < epoch) {
+	while (monc->subs[CEPH_SUB_OSDMAP].have < epoch) {
 		mutex_unlock(&monc->mutex);
 
 		if (timeout && time_after_eq(jiffies, started + timeout))
 			return -ETIMEDOUT;
 
 		ret = wait_event_interruptible_timeout(monc->client->auth_wq,
-						monc->have_osdmap >= epoch,
-						ceph_timeout_jiffies(timeout));
+				     monc->subs[CEPH_SUB_OSDMAP].have >= epoch,
+				     ceph_timeout_jiffies(timeout));
 		if (ret < 0)
 			return ret;
 
@@ -341,11 +424,14 @@
 EXPORT_SYMBOL(ceph_monc_wait_osdmap);
 
 /*
- *
+ * Open a session with a random monitor.  Request monmap and osdmap,
+ * which are waited upon in __ceph_open_session().
  */
 int ceph_monc_open_session(struct ceph_mon_client *monc)
 {
 	mutex_lock(&monc->mutex);
+	__ceph_monc_want_map(monc, CEPH_SUB_MONMAP, 0, true);
+	__ceph_monc_want_map(monc, CEPH_SUB_OSDMAP, 0, false);
 	__open_session(monc);
 	__schedule_delayed(monc);
 	mutex_unlock(&monc->mutex);
@@ -353,29 +439,15 @@
 }
 EXPORT_SYMBOL(ceph_monc_open_session);
 
-/*
- * We require the fsid and global_id in order to initialize our
- * debugfs dir.
- */
-static bool have_debugfs_info(struct ceph_mon_client *monc)
-{
-	dout("have_debugfs_info fsid %d globalid %lld\n",
-	     (int)monc->client->have_fsid, monc->auth->global_id);
-	return monc->client->have_fsid && monc->auth->global_id > 0;
-}
-
 static void ceph_monc_handle_map(struct ceph_mon_client *monc,
 				 struct ceph_msg *msg)
 {
 	struct ceph_client *client = monc->client;
 	struct ceph_monmap *monmap = NULL, *old = monc->monmap;
 	void *p, *end;
-	int had_debugfs_info, init_debugfs = 0;
 
 	mutex_lock(&monc->mutex);
 
-	had_debugfs_info = have_debugfs_info(monc);
-
 	dout("handle_monmap\n");
 	p = msg->front.iov_base;
 	end = p + msg->front.iov_len;
@@ -395,29 +467,11 @@
 	client->monc.monmap = monmap;
 	kfree(old);
 
-	if (!client->have_fsid) {
-		client->have_fsid = true;
-		if (!had_debugfs_info && have_debugfs_info(monc)) {
-			pr_info("client%lld fsid %pU\n",
-				ceph_client_id(monc->client),
-				&monc->client->fsid);
-			init_debugfs = 1;
-		}
-		mutex_unlock(&monc->mutex);
+	__ceph_monc_got_map(monc, CEPH_SUB_MONMAP, monc->monmap->epoch);
+	client->have_fsid = true;
 
-		if (init_debugfs) {
-			/*
-			 * do debugfs initialization without mutex to avoid
-			 * creating a locking dependency
-			 */
-			ceph_debugfs_client_init(monc->client);
-		}
-
-		goto out_unlocked;
-	}
 out:
 	mutex_unlock(&monc->mutex);
-out_unlocked:
 	wake_up_all(&client->auth_wq);
 }
 
@@ -745,18 +799,15 @@
 	dout("monc delayed_work\n");
 	mutex_lock(&monc->mutex);
 	if (monc->hunting) {
-		__close_session(monc);
-		__open_session(monc);  /* continue hunting */
+		dout("%s continuing hunt\n", __func__);
+		reopen_session(monc);
 	} else {
-		struct ceph_options *opt = monc->client->options;
 		int is_auth = ceph_auth_is_authenticated(monc->auth);
 		if (ceph_con_keepalive_expired(&monc->con,
-					       opt->monc_ping_timeout)) {
+					       CEPH_MONC_PING_TIMEOUT)) {
 			dout("monc keepalive timeout\n");
 			is_auth = 0;
-			__close_session(monc);
-			monc->hunting = true;
-			__open_session(monc);
+			reopen_session(monc);
 		}
 
 		if (!monc->hunting) {
@@ -764,8 +815,14 @@
 			__validate_auth(monc);
 		}
 
-		if (is_auth)
-			__send_subscribe(monc);
+		if (is_auth) {
+			unsigned long now = jiffies;
+
+			dout("%s renew subs? now %lu renew after %lu\n",
+			     __func__, now, monc->sub_renew_after);
+			if (time_after_eq(now, monc->sub_renew_after))
+				__send_subscribe(monc);
+		}
 	}
 	__schedule_delayed(monc);
 	mutex_unlock(&monc->mutex);
@@ -852,18 +909,14 @@
 		      &monc->client->msgr);
 
 	monc->cur_mon = -1;
-	monc->hunting = true;
-	monc->sub_renew_after = jiffies;
-	monc->sub_sent = 0;
+	monc->had_a_connection = false;
+	monc->hunt_mult = 1;
 
 	INIT_DELAYED_WORK(&monc->delayed_work, delayed_work);
 	monc->generic_request_tree = RB_ROOT;
 	monc->num_generic_requests = 0;
 	monc->last_tid = 0;
 
-	monc->have_mdsmap = 0;
-	monc->have_osdmap = 0;
-	monc->want_next_osdmap = 1;
 	return 0;
 
 out_auth_reply:
@@ -888,7 +941,7 @@
 
 	mutex_lock(&monc->mutex);
 	__close_session(monc);
-
+	monc->cur_mon = -1;
 	mutex_unlock(&monc->mutex);
 
 	/*
@@ -910,26 +963,40 @@
 }
 EXPORT_SYMBOL(ceph_monc_stop);
 
+static void finish_hunting(struct ceph_mon_client *monc)
+{
+	if (monc->hunting) {
+		dout("%s found mon%d\n", __func__, monc->cur_mon);
+		monc->hunting = false;
+		monc->had_a_connection = true;
+		monc->hunt_mult /= 2; /* reduce by 50% */
+		if (monc->hunt_mult < 1)
+			monc->hunt_mult = 1;
+	}
+}
+
 static void handle_auth_reply(struct ceph_mon_client *monc,
 			      struct ceph_msg *msg)
 {
 	int ret;
 	int was_auth = 0;
-	int had_debugfs_info, init_debugfs = 0;
 
 	mutex_lock(&monc->mutex);
-	had_debugfs_info = have_debugfs_info(monc);
 	was_auth = ceph_auth_is_authenticated(monc->auth);
 	monc->pending_auth = 0;
 	ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
 				     msg->front.iov_len,
 				     monc->m_auth->front.iov_base,
 				     monc->m_auth->front_alloc_len);
+	if (ret > 0) {
+		__send_prepared_auth_request(monc, ret);
+		goto out;
+	}
+
+	finish_hunting(monc);
+
 	if (ret < 0) {
 		monc->client->auth_err = ret;
-		wake_up_all(&monc->client->auth_wq);
-	} else if (ret > 0) {
-		__send_prepared_auth_request(monc, ret);
 	} else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
 		dout("authenticated, starting session\n");
 
@@ -939,23 +1006,15 @@
 
 		__send_subscribe(monc);
 		__resend_generic_request(monc);
+
+		pr_info("mon%d %s session established\n", monc->cur_mon,
+			ceph_pr_addr(&monc->con.peer_addr.in_addr));
 	}
 
-	if (!had_debugfs_info && have_debugfs_info(monc)) {
-		pr_info("client%lld fsid %pU\n",
-			ceph_client_id(monc->client),
-			&monc->client->fsid);
-		init_debugfs = 1;
-	}
+out:
 	mutex_unlock(&monc->mutex);
-
-	if (init_debugfs) {
-		/*
-		 * do debugfs initialization without mutex to avoid
-		 * creating a locking dependency
-		 */
-		ceph_debugfs_client_init(monc->client);
-	}
+	if (monc->client->auth_err < 0)
+		wake_up_all(&monc->client->auth_wq);
 }
 
 static int __validate_auth(struct ceph_mon_client *monc)
@@ -1096,29 +1155,17 @@
 {
 	struct ceph_mon_client *monc = con->private;
 
-	if (!monc)
-		return;
-
-	dout("mon_fault\n");
 	mutex_lock(&monc->mutex);
-	if (!con->private)
-		goto out;
-
-	if (!monc->hunting)
-		pr_info("mon%d %s session lost, "
-			"hunting for new mon\n", monc->cur_mon,
-			ceph_pr_addr(&monc->con.peer_addr.in_addr));
-
-	__close_session(monc);
-	if (!monc->hunting) {
-		/* start hunting */
-		monc->hunting = true;
-		__open_session(monc);
-	} else {
-		/* already hunting, let's wait a bit */
-		__schedule_delayed(monc);
+	dout("%s mon%d\n", __func__, monc->cur_mon);
+	if (monc->cur_mon >= 0) {
+		if (!monc->hunting) {
+			dout("%s hunting for new mon\n", __func__);
+			reopen_session(monc);
+			__schedule_delayed(monc);
+		} else {
+			dout("%s already hunting\n", __func__);
+		}
 	}
-out:
 	mutex_unlock(&monc->mutex);
 }
 

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 5bc0537..32355d9d 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c

@@ -338,9 +338,10 @@
 	ceph_put_snap_context(req->r_snapc);
 	if (req->r_mempool)
 		mempool_free(req, req->r_osdc->req_mempool);
-	else
+	else if (req->r_num_ops <= CEPH_OSD_SLAB_OPS)
 		kmem_cache_free(ceph_osd_request_cache, req);
-
+	else
+		kfree(req);
 }
 
 void ceph_osdc_get_request(struct ceph_osd_request *req)
@@ -369,28 +370,22 @@
 	struct ceph_msg *msg;
 	size_t msg_size;
 
-	BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX);
-	BUG_ON(num_ops > CEPH_OSD_MAX_OP);
-
-	msg_size = 4 + 4 + 8 + 8 + 4+8;
-	msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
-	msg_size += 1 + 8 + 4 + 4;     /* pg_t */
-	msg_size += 4 + CEPH_MAX_OID_NAME_LEN; /* oid */
-	msg_size += 2 + num_ops*sizeof(struct ceph_osd_op);
-	msg_size += 8;  /* snapid */
-	msg_size += 8;  /* snap_seq */
-	msg_size += 8 * (snapc ? snapc->num_snaps : 0);  /* snaps */
-	msg_size += 4;
-
 	if (use_mempool) {
+		BUG_ON(num_ops > CEPH_OSD_SLAB_OPS);
 		req = mempool_alloc(osdc->req_mempool, gfp_flags);
-		memset(req, 0, sizeof(*req));
+	} else if (num_ops <= CEPH_OSD_SLAB_OPS) {
+		req = kmem_cache_alloc(ceph_osd_request_cache, gfp_flags);
 	} else {
-		req = kmem_cache_zalloc(ceph_osd_request_cache, gfp_flags);
+		BUG_ON(num_ops > CEPH_OSD_MAX_OPS);
+		req = kmalloc(sizeof(*req) + num_ops * sizeof(req->r_ops[0]),
+			      gfp_flags);
 	}
-	if (req == NULL)
+	if (unlikely(!req))
 		return NULL;
 
+	/* req only, each op is zeroed in _osd_req_op_init() */
+	memset(req, 0, sizeof(*req));
+
 	req->r_osdc = osdc;
 	req->r_mempool = use_mempool;
 	req->r_num_ops = num_ops;
@@ -408,18 +403,36 @@
 	req->r_base_oloc.pool = -1;
 	req->r_target_oloc.pool = -1;
 
+	msg_size = OSD_OPREPLY_FRONT_LEN;
+	if (num_ops > CEPH_OSD_SLAB_OPS) {
+		/* ceph_osd_op and rval */
+		msg_size += (num_ops - CEPH_OSD_SLAB_OPS) *
+			    (sizeof(struct ceph_osd_op) + 4);
+	}
+
 	/* create reply message */
 	if (use_mempool)
 		msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0);
 	else
-		msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY,
-				   OSD_OPREPLY_FRONT_LEN, gfp_flags, true);
+		msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, msg_size,
+				   gfp_flags, true);
 	if (!msg) {
 		ceph_osdc_put_request(req);
 		return NULL;
 	}
 	req->r_reply = msg;
 
+	msg_size = 4 + 4 + 4; /* client_inc, osdmap_epoch, flags */
+	msg_size += 4 + 4 + 4 + 8; /* mtime, reassert_version */
+	msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
+	msg_size += 1 + 8 + 4 + 4; /* pgid */
+	msg_size += 4 + CEPH_MAX_OID_NAME_LEN; /* oid */
+	msg_size += 2 + num_ops * sizeof(struct ceph_osd_op);
+	msg_size += 8; /* snapid */
+	msg_size += 8; /* snap_seq */
+	msg_size += 4 + 8 * (snapc ? snapc->num_snaps : 0); /* snaps */
+	msg_size += 4; /* retry_attempt */
+
 	/* create request message; allow space for oid */
 	if (use_mempool)
 		msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
@@ -498,7 +511,7 @@
 	if (opcode == CEPH_OSD_OP_WRITE || opcode == CEPH_OSD_OP_WRITEFULL)
 		payload_len += length;
 
-	op->payload_len = payload_len;
+	op->indata_len = payload_len;
 }
 EXPORT_SYMBOL(osd_req_op_extent_init);
 
@@ -517,10 +530,32 @@
 	BUG_ON(length > previous);
 
 	op->extent.length = length;
-	op->payload_len -= previous - length;
+	op->indata_len -= previous - length;
 }
 EXPORT_SYMBOL(osd_req_op_extent_update);
 
+void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
+				unsigned int which, u64 offset_inc)
+{
+	struct ceph_osd_req_op *op, *prev_op;
+
+	BUG_ON(which + 1 >= osd_req->r_num_ops);
+
+	prev_op = &osd_req->r_ops[which];
+	op = _osd_req_op_init(osd_req, which + 1, prev_op->op, prev_op->flags);
+	/* dup previous one */
+	op->indata_len = prev_op->indata_len;
+	op->outdata_len = prev_op->outdata_len;
+	op->extent = prev_op->extent;
+	/* adjust offset */
+	op->extent.offset += offset_inc;
+	op->extent.length -= offset_inc;
+
+	if (op->op == CEPH_OSD_OP_WRITE || op->op == CEPH_OSD_OP_WRITEFULL)
+		op->indata_len -= offset_inc;
+}
+EXPORT_SYMBOL(osd_req_op_extent_dup_last);
+
 void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
 			u16 opcode, const char *class, const char *method)
 {
@@ -554,7 +589,7 @@
 
 	op->cls.argc = 0;	/* currently unused */
 
-	op->payload_len = payload_len;
+	op->indata_len = payload_len;
 }
 EXPORT_SYMBOL(osd_req_op_cls_init);
 
@@ -587,7 +622,7 @@
 	op->xattr.cmp_mode = cmp_mode;
 
 	ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist);
-	op->payload_len = payload_len;
+	op->indata_len = payload_len;
 	return 0;
 }
 EXPORT_SYMBOL(osd_req_op_xattr_init);
@@ -707,7 +742,7 @@
 			BUG_ON(osd_data->type == CEPH_OSD_DATA_TYPE_NONE);
 			dst->cls.indata_len = cpu_to_le32(data_length);
 			ceph_osdc_msg_data_add(req->r_request, osd_data);
-			src->payload_len += data_length;
+			src->indata_len += data_length;
 			request_data_len += data_length;
 		}
 		osd_data = &src->cls.response_data;
@@ -750,7 +785,7 @@
 
 	dst->op = cpu_to_le16(src->op);
 	dst->flags = cpu_to_le32(src->flags);
-	dst->payload_len = cpu_to_le32(src->payload_len);
+	dst->payload_len = cpu_to_le32(src->indata_len);
 
 	return request_data_len;
 }
@@ -1810,7 +1845,7 @@
 
 	ceph_decode_need(&p, end, 4, bad_put);
 	numops = ceph_decode_32(&p);
-	if (numops > CEPH_OSD_MAX_OP)
+	if (numops > CEPH_OSD_MAX_OPS)
 		goto bad_put;
 	if (numops != req->r_num_ops)
 		goto bad_put;
@@ -1821,7 +1856,7 @@
 		int len;
 
 		len = le32_to_cpu(op->payload_len);
-		req->r_reply_op_len[i] = len;
+		req->r_ops[i].outdata_len = len;
 		dout(" op %d has %d bytes\n", i, len);
 		payload_len += len;
 		p += sizeof(*op);
@@ -1836,7 +1871,7 @@
 	ceph_decode_need(&p, end, 4 + numops * 4, bad_put);
 	retry_attempt = ceph_decode_32(&p);
 	for (i = 0; i < numops; i++)
-		req->r_reply_op_result[i] = ceph_decode_32(&p);
+		req->r_ops[i].rval = ceph_decode_32(&p);
 
 	if (le16_to_cpu(msg->hdr.version) >= 6) {
 		p += 8 + 4; /* skip replay_version */
@@ -2187,7 +2222,8 @@
 		goto bad;
 done:
 	downgrade_write(&osdc->map_sem);
-	ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
+	ceph_monc_got_map(&osdc->client->monc, CEPH_SUB_OSDMAP,
+			  osdc->osdmap->epoch);
 
 	/*
 	 * subscribe to subsequent osdmap updates if full to ensure
@@ -2646,8 +2682,8 @@
 	    round_jiffies_relative(osdc->client->options->osd_idle_ttl));
 
 	err = -ENOMEM;
-	osdc->req_mempool = mempool_create_kmalloc_pool(10,
-					sizeof(struct ceph_osd_request));
+	osdc->req_mempool = mempool_create_slab_pool(10,
+						     ceph_osd_request_cache);
 	if (!osdc->req_mempool)
 		goto out;
 
@@ -2782,11 +2818,12 @@
 
 int ceph_osdc_setup(void)
 {
+	size_t size = sizeof(struct ceph_osd_request) +
+	    CEPH_OSD_SLAB_OPS * sizeof(struct ceph_osd_req_op);
+
 	BUG_ON(ceph_osd_request_cache);
-	ceph_osd_request_cache = kmem_cache_create("ceph_osd_request",
-					sizeof (struct ceph_osd_request),
-					__alignof__(struct ceph_osd_request),
-					0, NULL);
+	ceph_osd_request_cache = kmem_cache_create("ceph_osd_request", size,
+						   0, 0, NULL);
 
 	return ceph_osd_request_cache ? 0 : -ENOMEM;
 }

diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index c7c220a..6864007 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c

@@ -56,7 +56,7 @@
 		size_t bit = pl->room;
 		int ret;
 
-		memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK),
+		memcpy(pl->mapped_tail + (pl->length & ~PAGE_MASK),
 		       buf, bit);
 		pl->length += bit;
 		pl->room -= bit;
@@ -67,7 +67,7 @@
 			return ret;
 	}
 
-	memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len);
+	memcpy(pl->mapped_tail + (pl->length & ~PAGE_MASK), buf, len);
 	pl->length += len;
 	pl->room -= len;
 	return 0;

diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 10297f7..00d2601 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c

@@ -95,19 +95,19 @@
 					 loff_t off, size_t len)
 {
 	int i = 0;
-	int po = off & ~PAGE_CACHE_MASK;
+	int po = off & ~PAGE_MASK;
 	int left = len;
 	int l, bad;
 
 	while (left > 0) {
-		l = min_t(int, PAGE_CACHE_SIZE-po, left);
+		l = min_t(int, PAGE_SIZE-po, left);
 		bad = copy_from_user(page_address(pages[i]) + po, data, l);
 		if (bad == l)
 			return -EFAULT;
 		data += l - bad;
 		left -= l - bad;
 		po += l - bad;
-		if (po == PAGE_CACHE_SIZE) {
+		if (po == PAGE_SIZE) {
 			po = 0;
 			i++;
 		}
@@ -121,17 +121,17 @@
 				    loff_t off, size_t len)
 {
 	int i = 0;
-	size_t po = off & ~PAGE_CACHE_MASK;
+	size_t po = off & ~PAGE_MASK;
 	size_t left = len;
 
 	while (left > 0) {
-		size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+		size_t l = min_t(size_t, PAGE_SIZE-po, left);
 
 		memcpy(page_address(pages[i]) + po, data, l);
 		data += l;
 		left -= l;
 		po += l;
-		if (po == PAGE_CACHE_SIZE) {
+		if (po == PAGE_SIZE) {
 			po = 0;
 			i++;
 		}
@@ -144,17 +144,17 @@
 				    loff_t off, size_t len)
 {
 	int i = 0;
-	size_t po = off & ~PAGE_CACHE_MASK;
+	size_t po = off & ~PAGE_MASK;
 	size_t left = len;
 
 	while (left > 0) {
-		size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+		size_t l = min_t(size_t, PAGE_SIZE-po, left);
 
 		memcpy(data, page_address(pages[i]) + po, l);
 		data += l;
 		left -= l;
 		po += l;
-		if (po == PAGE_CACHE_SIZE) {
+		if (po == PAGE_SIZE) {
 			po = 0;
 			i++;
 		}
@@ -168,25 +168,25 @@
  */
 void ceph_zero_page_vector_range(int off, int len, struct page **pages)
 {
-	int i = off >> PAGE_CACHE_SHIFT;
+	int i = off >> PAGE_SHIFT;
 
-	off &= ~PAGE_CACHE_MASK;
+	off &= ~PAGE_MASK;
 
 	dout("zero_page_vector_page %u~%u\n", off, len);
 
 	/* leading partial page? */
 	if (off) {
-		int end = min((int)PAGE_CACHE_SIZE, off + len);
+		int end = min((int)PAGE_SIZE, off + len);
 		dout("zeroing %d %p head from %d\n", i, pages[i],
 		     (int)off);
 		zero_user_segment(pages[i], off, end);
 		len -= (end - off);
 		i++;
 	}
-	while (len >= PAGE_CACHE_SIZE) {
+	while (len >= PAGE_SIZE) {
 		dout("zeroing %d %p len=%d\n", i, pages[i], len);
-		zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
-		len -= PAGE_CACHE_SIZE;
+		zero_user_segment(pages[i], 0, PAGE_SIZE);
+		len -= PAGE_SIZE;
 		i++;
 	}
 	/* trailing partial page? */

diff --git a/net/core/dev.c b/net/core/dev.c
index edb7179..b9bcbe7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c

@@ -4438,7 +4438,7 @@
 		NAPI_GRO_CB(skb)->same_flow = 0;
 		NAPI_GRO_CB(skb)->flush = 0;
 		NAPI_GRO_CB(skb)->free = 0;
-		NAPI_GRO_CB(skb)->udp_mark = 0;
+		NAPI_GRO_CB(skb)->encap_mark = 0;
 		NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
 
 		/* Setup for GRO checksum validation */
@@ -6445,6 +6445,7 @@
  *	dev_get_phys_port_name - Get device physical port name
  *	@dev: device
  *	@name: port name
+ *	@len: limit of bytes to copy to name
  *
  *	Get device physical port name
  */

diff --git a/net/core/filter.c b/net/core/filter.c
index b7177d0..ca7f832 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c

@@ -1149,7 +1149,8 @@
 }
 EXPORT_SYMBOL_GPL(bpf_prog_destroy);
 
-static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
+static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk,
+			    bool locked)
 {
 	struct sk_filter *fp, *old_fp;
 
@@ -1165,10 +1166,8 @@
 		return -ENOMEM;
 	}
 
-	old_fp = rcu_dereference_protected(sk->sk_filter,
-					   sock_owned_by_user(sk));
+	old_fp = rcu_dereference_protected(sk->sk_filter, locked);
 	rcu_assign_pointer(sk->sk_filter, fp);
-
 	if (old_fp)
 		sk_filter_uncharge(sk, old_fp);
 
@@ -1247,7 +1246,8 @@
  * occurs or there is insufficient memory for the filter a negative
  * errno code is returned. On success the return is zero.
  */
-int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk,
+		       bool locked)
 {
 	struct bpf_prog *prog = __get_filter(fprog, sk);
 	int err;
@@ -1255,7 +1255,7 @@
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
-	err = __sk_attach_prog(prog, sk);
+	err = __sk_attach_prog(prog, sk, locked);
 	if (err < 0) {
 		__bpf_prog_release(prog);
 		return err;
@@ -1263,7 +1263,12 @@
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(sk_attach_filter);
+EXPORT_SYMBOL_GPL(__sk_attach_filter);
+
+int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+{
+	return __sk_attach_filter(fprog, sk, sock_owned_by_user(sk));
+}
 
 int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 {
@@ -1309,7 +1314,7 @@
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
-	err = __sk_attach_prog(prog, sk);
+	err = __sk_attach_prog(prog, sk, sock_owned_by_user(sk));
 	if (err < 0) {
 		bpf_prog_put(prog);
 		return err;
@@ -1764,6 +1769,7 @@
 	if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
 		switch (size) {
 		case offsetof(struct bpf_tunnel_key, tunnel_label):
+		case offsetof(struct bpf_tunnel_key, tunnel_ext):
 			goto set_compat;
 		case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
 			/* Fixup deprecated structure layouts here, so we have
@@ -1849,6 +1855,7 @@
 	if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
 		switch (size) {
 		case offsetof(struct bpf_tunnel_key, tunnel_label):
+		case offsetof(struct bpf_tunnel_key, tunnel_ext):
 		case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
 			/* Fixup deprecated structure layouts here, so we have
 			 * a common path later on.
@@ -1861,7 +1868,8 @@
 			return -EINVAL;
 		}
 	}
-	if (unlikely(!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label))
+	if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) ||
+		     from->tunnel_ext))
 		return -EINVAL;
 
 	skb_dst_drop(skb);
@@ -2247,7 +2255,7 @@
 }
 late_initcall(register_sk_filter_ops);
 
-int sk_detach_filter(struct sock *sk)
+int __sk_detach_filter(struct sock *sk, bool locked)
 {
 	int ret = -ENOENT;
 	struct sk_filter *filter;
@@ -2255,8 +2263,7 @@
 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
 		return -EPERM;
 
-	filter = rcu_dereference_protected(sk->sk_filter,
-					   sock_owned_by_user(sk));
+	filter = rcu_dereference_protected(sk->sk_filter, locked);
 	if (filter) {
 		RCU_INIT_POINTER(sk->sk_filter, NULL);
 		sk_filter_uncharge(sk, filter);
@@ -2265,7 +2272,12 @@
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(sk_detach_filter);
+EXPORT_SYMBOL_GPL(__sk_detach_filter);
+
+int sk_detach_filter(struct sock *sk)
+{
+	return __sk_detach_filter(sk, sock_owned_by_user(sk));
+}
 
 int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
 		  unsigned int len)

diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 92d886f..4573d81 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c

@@ -191,6 +191,7 @@
 /**
  * gen_new_estimator - create a new rate estimator
  * @bstats: basic statistics
+ * @cpu_bstats: bstats per cpu
  * @rate_est: rate estimator statistics
  * @stats_lock: statistics lock
  * @opt: rate estimator configuration TLV
@@ -287,6 +288,7 @@
 /**
  * gen_replace_estimator - replace rate estimator configuration
  * @bstats: basic statistics
+ * @cpu_bstats: bstats per cpu
  * @rate_est: rate estimator statistics
  * @stats_lock: statistics lock
  * @opt: rate estimator configuration TLV

diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 1e2f46a..e640462 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c

@@ -140,6 +140,7 @@
 /**
  * gnet_stats_copy_basic - copy basic statistics into statistic TLV
  * @d: dumping handle
+ * @cpu: copy statistic per cpu
  * @b: basic statistics
  *
  * Appends the basic statistics to the top level TLV created by

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 94acfc8..a57bd17 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c

@@ -603,7 +603,6 @@
 	const struct net_device_ops *ops;
 	int err;
 
-	np->dev = ndev;
 	strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
 	INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
 
@@ -670,6 +669,7 @@
 		goto unlock;
 	}
 	dev_hold(ndev);
+	np->dev = ndev;
 
 	if (netdev_master_upper_dev_get(ndev)) {
 		np_err(np, "%s is a slave device, aborting\n", np->dev_name);
@@ -770,6 +770,7 @@
 	return 0;
 
 put:
+	np->dev = NULL;
 	dev_put(ndev);
 unlock:
 	rtnl_unlock();

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d2d9e5e..a75f7e9 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c

@@ -895,6 +895,8 @@
 	       + nla_total_size(4) /* IFLA_PROMISCUITY */
 	       + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */
 	       + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */
+	       + nla_total_size(4) /* IFLA_MAX_GSO_SEGS */
+	       + nla_total_size(4) /* IFLA_MAX_GSO_SIZE */
 	       + nla_total_size(1) /* IFLA_OPERSTATE */
 	       + nla_total_size(1) /* IFLA_LINKMODE */
 	       + nla_total_size(4) /* IFLA_CARRIER_CHANGES */
@@ -907,6 +909,7 @@
 	       + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */
 	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
 	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
+	       + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
 	       + nla_total_size(1); /* IFLA_PROTO_DOWN */
 
 }
@@ -1223,6 +1226,8 @@
 	    nla_put_u32(skb, IFLA_GROUP, dev->group) ||
 	    nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) ||
 	    nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) ||
+	    nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) ||
+	    nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) ||
 #ifdef CONFIG_RPS
 	    nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
 #endif
@@ -1389,6 +1394,8 @@
 	[IFLA_VF_RSS_QUERY_EN]	= { .len = sizeof(struct ifla_vf_rss_query_en) },
 	[IFLA_VF_STATS]		= { .type = NLA_NESTED },
 	[IFLA_VF_TRUST]		= { .len = sizeof(struct ifla_vf_trust) },
+	[IFLA_VF_IB_NODE_GUID]	= { .len = sizeof(struct ifla_vf_guid) },
+	[IFLA_VF_IB_PORT_GUID]	= { .len = sizeof(struct ifla_vf_guid) },
 };
 
 static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
@@ -1593,6 +1600,22 @@
 	return 0;
 }
 
+static int handle_infiniband_guid(struct net_device *dev, struct ifla_vf_guid *ivt,
+				  int guid_type)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	return ops->ndo_set_vf_guid(dev, ivt->vf, ivt->guid, guid_type);
+}
+
+static int handle_vf_guid(struct net_device *dev, struct ifla_vf_guid *ivt, int guid_type)
+{
+	if (dev->type != ARPHRD_INFINIBAND)
+		return -EOPNOTSUPP;
+
+	return handle_infiniband_guid(dev, ivt, guid_type);
+}
+
 static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
@@ -1695,6 +1718,24 @@
 			return err;
 	}
 
+	if (tb[IFLA_VF_IB_NODE_GUID]) {
+		struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_NODE_GUID]);
+
+		if (!ops->ndo_set_vf_guid)
+			return -EOPNOTSUPP;
+
+		return handle_vf_guid(dev, ivt, IFLA_VF_IB_NODE_GUID);
+	}
+
+	if (tb[IFLA_VF_IB_PORT_GUID]) {
+		struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_PORT_GUID]);
+
+		if (!ops->ndo_set_vf_guid)
+			return -EOPNOTSUPP;
+
+		return handle_vf_guid(dev, ivt, IFLA_VF_IB_PORT_GUID);
+	}
+
 	return err;
 }
 

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f044f97..d04c2d1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c

@@ -815,7 +815,7 @@
 	trace_consume_skb(skb);
 
 	/* if SKB is a clone, don't handle this case */
-	if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) {
+	if (skb->fclone != SKB_FCLONE_UNAVAILABLE) {
 		__kfree_skb(skb);
 		return;
 	}

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 0cc923f..9e48199 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c

@@ -1380,6 +1380,19 @@
 	return pp;
 }
 
+static struct sk_buff **ipip_gro_receive(struct sk_buff **head,
+					 struct sk_buff *skb)
+{
+	if (NAPI_GRO_CB(skb)->encap_mark) {
+		NAPI_GRO_CB(skb)->flush = 1;
+		return NULL;
+	}
+
+	NAPI_GRO_CB(skb)->encap_mark = 1;
+
+	return inet_gro_receive(head, skb);
+}
+
 #define SECONDS_PER_DAY	86400
 
 /* inet_current_timestamp - Return IP network timestamp
@@ -1402,7 +1415,7 @@
 	msecs += (u32)ts.tv_nsec / NSEC_PER_MSEC;
 
 	/* Convert to network byte order. */
-	return htons(msecs);
+	return htonl(msecs);
 }
 EXPORT_SYMBOL(inet_current_timestamp);
 
@@ -1448,6 +1461,13 @@
 	return err;
 }
 
+static int ipip_gro_complete(struct sk_buff *skb, int nhoff)
+{
+	skb->encapsulation = 1;
+	skb_shinfo(skb)->gso_type |= SKB_GSO_IPIP;
+	return inet_gro_complete(skb, nhoff);
+}
+
 int inet_ctl_sock_create(struct sock **sk, unsigned short family,
 			 unsigned short type, unsigned char protocol,
 			 struct net *net)
@@ -1675,8 +1695,8 @@
 static const struct net_offload ipip_offload = {
 	.callbacks = {
 		.gso_segment	= inet_gso_segment,
-		.gro_receive	= inet_gro_receive,
-		.gro_complete	= inet_gro_complete,
+		.gro_receive	= ipip_gro_receive,
+		.gro_complete	= ipip_gro_complete,
 	},
 };
 

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 21add55..8a9246d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c

@@ -280,7 +280,6 @@
 	struct in_device *in_dev;
 	struct fib_result res;
 	struct rtable *rt;
-	struct flowi4 fl4;
 	struct net *net;
 	int scope;
 
@@ -296,14 +295,13 @@
 
 	scope = RT_SCOPE_UNIVERSE;
 	if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
-		fl4.flowi4_oif = 0;
-		fl4.flowi4_iif = LOOPBACK_IFINDEX;
-		fl4.daddr = ip_hdr(skb)->saddr;
-		fl4.saddr = 0;
-		fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
-		fl4.flowi4_scope = scope;
-		fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
-		fl4.flowi4_tun_key.tun_id = 0;
+		struct flowi4 fl4 = {
+			.flowi4_iif = LOOPBACK_IFINDEX,
+			.daddr = ip_hdr(skb)->saddr,
+			.flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
+			.flowi4_scope = scope,
+			.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0,
+		};
 		if (!fib_lookup(net, &fl4, &res, 0))
 			return FIB_RES_PREFSRC(net, res);
 	} else {

diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 7804842..5a94aea 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c

@@ -48,7 +48,7 @@
 	return sk->sk_user_data;
 }
 
-static void fou_recv_pull(struct sk_buff *skb, size_t len)
+static int fou_recv_pull(struct sk_buff *skb, size_t len)
 {
 	struct iphdr *iph = ip_hdr(skb);
 
@@ -59,6 +59,7 @@
 	__skb_pull(skb, len);
 	skb_postpull_rcsum(skb, udp_hdr(skb), len);
 	skb_reset_transport_header(skb);
+	return iptunnel_pull_offloads(skb);
 }
 
 static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
@@ -68,9 +69,14 @@
 	if (!fou)
 		return 1;
 
-	fou_recv_pull(skb, sizeof(struct udphdr));
+	if (fou_recv_pull(skb, sizeof(struct udphdr)))
+		goto drop;
 
 	return -fou->protocol;
+
+drop:
+	kfree_skb(skb);
+	return 0;
 }
 
 static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
@@ -170,6 +176,9 @@
 	__skb_pull(skb, sizeof(struct udphdr) + hdrlen);
 	skb_reset_transport_header(skb);
 
+	if (iptunnel_pull_offloads(skb))
+		goto drop;
+
 	return -guehdr->proto_ctype;
 
 drop:
@@ -186,6 +195,14 @@
 	u8 proto = NAPI_GRO_CB(skb)->proto;
 	const struct net_offload **offloads;
 
+	/* We can clear the encap_mark for FOU as we are essentially doing
+	 * one of two possible things.  We are either adding an L4 tunnel
+	 * header to the outer L3 tunnel header, or we are are simply
+	 * treating the GRE tunnel header as though it is a UDP protocol
+	 * specific header such as VXLAN or GENEVE.
+	 */
+	NAPI_GRO_CB(skb)->encap_mark = 0;
+
 	rcu_read_lock();
 	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
 	ops = rcu_dereference(offloads[proto]);
@@ -343,6 +360,14 @@
 		}
 	}
 
+	/* We can clear the encap_mark for GUE as we are essentially doing
+	 * one of two possible things.  We are either adding an L4 tunnel
+	 * header to the outer L3 tunnel header, or we are are simply
+	 * treating the GRE tunnel header as though it is a UDP protocol
+	 * specific header such as VXLAN or GENEVE.
+	 */
+	NAPI_GRO_CB(skb)->encap_mark = 0;
+
 	rcu_read_lock();
 	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
 	ops = rcu_dereference(offloads[guehdr->proto_ctype]);

diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 540866d..c47539d 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c

@@ -49,6 +49,7 @@
 
 	/* setup inner skb. */
 	skb->encapsulation = 0;
+	SKB_GSO_CB(skb)->encap_level = 0;
 	__skb_pull(skb, tnl_hlen);
 	skb_reset_mac_header(skb);
 	skb_set_network_header(skb, skb_inner_network_offset(skb));
@@ -126,6 +127,11 @@
 	struct packet_offload *ptype;
 	__be16 type;
 
+	if (NAPI_GRO_CB(skb)->encap_mark)
+		goto out;
+
+	NAPI_GRO_CB(skb)->encap_mark = 1;
+
 	off = skb_gro_offset(skb);
 	hlen = off + sizeof(*greh);
 	greh = skb_gro_header_fast(skb, off);

diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index d27276f..6165f30 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c

@@ -114,7 +114,8 @@
 	skb->vlan_tci = 0;
 	skb_set_queue_mapping(skb, 0);
 	skb_scrub_packet(skb, xnet);
-	return 0;
+
+	return iptunnel_pull_offloads(skb);
 }
 EXPORT_SYMBOL_GPL(iptunnel_pull_header);
 
@@ -371,8 +372,8 @@
 	if (nla_put_be64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id) ||
 	    nla_put_in6_addr(skb, LWTUNNEL_IP6_DST, &tun_info->key.u.ipv6.dst) ||
 	    nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) ||
-	    nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.tos) ||
-	    nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.ttl) ||
+	    nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.tos) ||
+	    nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.ttl) ||
 	    nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags))
 		return -ENOMEM;
 

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index bf08192..4133b0f 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c

@@ -359,11 +359,12 @@
 }
 
 /* All zeroes == unconditional rule. */
-static inline bool unconditional(const struct arpt_arp *arp)
+static inline bool unconditional(const struct arpt_entry *e)
 {
 	static const struct arpt_arp uncond;
 
-	return memcmp(arp, &uncond, sizeof(uncond)) == 0;
+	return e->target_offset == sizeof(struct arpt_entry) &&
+	       memcmp(&e->arp, &uncond, sizeof(uncond)) == 0;
 }
 
 /* Figures out from what hook each rule can be called: returns 0 if
@@ -402,11 +403,10 @@
 				|= ((1 << hook) | (1 << NF_ARP_NUMHOOKS));
 
 			/* Unconditional return/END. */
-			if ((e->target_offset == sizeof(struct arpt_entry) &&
+			if ((unconditional(e) &&
 			     (strcmp(t->target.u.user.name,
 				     XT_STANDARD_TARGET) == 0) &&
-			     t->verdict < 0 && unconditional(&e->arp)) ||
-			    visited) {
+			     t->verdict < 0) || visited) {
 				unsigned int oldpos, size;
 
 				if ((strcmp(t->target.u.user.name,
@@ -474,14 +474,12 @@
 	return 1;
 }
 
-static inline int check_entry(const struct arpt_entry *e, const char *name)
+static inline int check_entry(const struct arpt_entry *e)
 {
 	const struct xt_entry_target *t;
 
-	if (!arp_checkentry(&e->arp)) {
-		duprintf("arp_tables: arp check failed %p %s.\n", e, name);
+	if (!arp_checkentry(&e->arp))
 		return -EINVAL;
-	}
 
 	if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset)
 		return -EINVAL;
@@ -522,10 +520,6 @@
 	struct xt_target *target;
 	int ret;
 
-	ret = check_entry(e, name);
-	if (ret)
-		return ret;
-
 	e->counters.pcnt = xt_percpu_counter_alloc();
 	if (IS_ERR_VALUE(e->counters.pcnt))
 		return -ENOMEM;
@@ -557,7 +551,7 @@
 	const struct xt_entry_target *t;
 	unsigned int verdict;
 
-	if (!unconditional(&e->arp))
+	if (!unconditional(e))
 		return false;
 	t = arpt_get_target_c(e);
 	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
@@ -576,9 +570,11 @@
 					     unsigned int valid_hooks)
 {
 	unsigned int h;
+	int err;
 
 	if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 ||
-	    (unsigned char *)e + sizeof(struct arpt_entry) >= limit) {
+	    (unsigned char *)e + sizeof(struct arpt_entry) >= limit ||
+	    (unsigned char *)e + e->next_offset > limit) {
 		duprintf("Bad offset %p\n", e);
 		return -EINVAL;
 	}
@@ -590,6 +586,10 @@
 		return -EINVAL;
 	}
 
+	err = check_entry(e);
+	if (err)
+		return err;
+
 	/* Check hooks & underflows */
 	for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
 		if (!(valid_hooks & (1 << h)))
@@ -598,9 +598,9 @@
 			newinfo->hook_entry[h] = hook_entries[h];
 		if ((unsigned char *)e - base == underflows[h]) {
 			if (!check_underflow(e)) {
-				pr_err("Underflows must be unconditional and "
-				       "use the STANDARD target with "
-				       "ACCEPT/DROP\n");
+				pr_debug("Underflows must be unconditional and "
+					 "use the STANDARD target with "
+					 "ACCEPT/DROP\n");
 				return -EINVAL;
 			}
 			newinfo->underflow[h] = underflows[h];
@@ -969,6 +969,7 @@
 			 sizeof(struct arpt_get_entries) + get.size);
 		return -EINVAL;
 	}
+	get.name[sizeof(get.name) - 1] = '\0';
 
 	t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
 	if (!IS_ERR_OR_NULL(t)) {
@@ -1233,7 +1234,8 @@
 
 	duprintf("check_compat_entry_size_and_hooks %p\n", e);
 	if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 ||
-	    (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) {
+	    (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit ||
+	    (unsigned char *)e + e->next_offset > limit) {
 		duprintf("Bad offset %p, limit = %p\n", e, limit);
 		return -EINVAL;
 	}
@@ -1246,7 +1248,7 @@
 	}
 
 	/* For purposes of check_entry casting the compat entry is fine */
-	ret = check_entry((struct arpt_entry *)e, name);
+	ret = check_entry((struct arpt_entry *)e);
 	if (ret)
 		return ret;
 
@@ -1662,6 +1664,7 @@
 			 *len, sizeof(get) + get.size);
 		return -EINVAL;
 	}
+	get.name[sizeof(get.name) - 1] = '\0';
 
 	xt_compat_lock(NFPROTO_ARP);
 	t = xt_find_table_lock(net, NFPROTO_ARP, get.name);

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e53f8d6..631c100 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c

@@ -168,11 +168,12 @@
 
 /* All zeroes == unconditional rule. */
 /* Mildly perf critical (only if packet tracing is on) */
-static inline bool unconditional(const struct ipt_ip *ip)
+static inline bool unconditional(const struct ipt_entry *e)
 {
 	static const struct ipt_ip uncond;
 
-	return memcmp(ip, &uncond, sizeof(uncond)) == 0;
+	return e->target_offset == sizeof(struct ipt_entry) &&
+	       memcmp(&e->ip, &uncond, sizeof(uncond)) == 0;
 #undef FWINV
 }
 
@@ -229,11 +230,10 @@
 	} else if (s == e) {
 		(*rulenum)++;
 
-		if (s->target_offset == sizeof(struct ipt_entry) &&
+		if (unconditional(s) &&
 		    strcmp(t->target.u.kernel.target->name,
 			   XT_STANDARD_TARGET) == 0 &&
-		   t->verdict < 0 &&
-		   unconditional(&s->ip)) {
+		   t->verdict < 0) {
 			/* Tail of chains: STANDARD target (return/policy) */
 			*comment = *chainname == hookname
 				? comments[NF_IP_TRACE_COMMENT_POLICY]
@@ -476,11 +476,10 @@
 			e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
 
 			/* Unconditional return/END. */
-			if ((e->target_offset == sizeof(struct ipt_entry) &&
+			if ((unconditional(e) &&
 			     (strcmp(t->target.u.user.name,
 				     XT_STANDARD_TARGET) == 0) &&
-			     t->verdict < 0 && unconditional(&e->ip)) ||
-			    visited) {
+			     t->verdict < 0) || visited) {
 				unsigned int oldpos, size;
 
 				if ((strcmp(t->target.u.user.name,
@@ -569,14 +568,12 @@
 }
 
 static int
-check_entry(const struct ipt_entry *e, const char *name)
+check_entry(const struct ipt_entry *e)
 {
 	const struct xt_entry_target *t;
 
-	if (!ip_checkentry(&e->ip)) {
-		duprintf("ip check failed %p %s.\n", e, name);
+	if (!ip_checkentry(&e->ip))
 		return -EINVAL;
-	}
 
 	if (e->target_offset + sizeof(struct xt_entry_target) >
 	    e->next_offset)
@@ -666,10 +663,6 @@
 	struct xt_mtchk_param mtpar;
 	struct xt_entry_match *ematch;
 
-	ret = check_entry(e, name);
-	if (ret)
-		return ret;
-
 	e->counters.pcnt = xt_percpu_counter_alloc();
 	if (IS_ERR_VALUE(e->counters.pcnt))
 		return -ENOMEM;
@@ -721,7 +714,7 @@
 	const struct xt_entry_target *t;
 	unsigned int verdict;
 
-	if (!unconditional(&e->ip))
+	if (!unconditional(e))
 		return false;
 	t = ipt_get_target_c(e);
 	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
@@ -741,9 +734,11 @@
 			   unsigned int valid_hooks)
 {
 	unsigned int h;
+	int err;
 
 	if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 ||
-	    (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
+	    (unsigned char *)e + sizeof(struct ipt_entry) >= limit ||
+	    (unsigned char *)e + e->next_offset > limit) {
 		duprintf("Bad offset %p\n", e);
 		return -EINVAL;
 	}
@@ -755,6 +750,10 @@
 		return -EINVAL;
 	}
 
+	err = check_entry(e);
+	if (err)
+		return err;
+
 	/* Check hooks & underflows */
 	for (h = 0; h < NF_INET_NUMHOOKS; h++) {
 		if (!(valid_hooks & (1 << h)))
@@ -763,9 +762,9 @@
 			newinfo->hook_entry[h] = hook_entries[h];
 		if ((unsigned char *)e - base == underflows[h]) {
 			if (!check_underflow(e)) {
-				pr_err("Underflows must be unconditional and "
-				       "use the STANDARD target with "
-				       "ACCEPT/DROP\n");
+				pr_debug("Underflows must be unconditional and "
+					 "use the STANDARD target with "
+					 "ACCEPT/DROP\n");
 				return -EINVAL;
 			}
 			newinfo->underflow[h] = underflows[h];
@@ -1157,6 +1156,7 @@
 			 *len, sizeof(get) + get.size);
 		return -EINVAL;
 	}
+	get.name[sizeof(get.name) - 1] = '\0';
 
 	t = xt_find_table_lock(net, AF_INET, get.name);
 	if (!IS_ERR_OR_NULL(t)) {
@@ -1493,7 +1493,8 @@
 
 	duprintf("check_compat_entry_size_and_hooks %p\n", e);
 	if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
-	    (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
+	    (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit ||
+	    (unsigned char *)e + e->next_offset > limit) {
 		duprintf("Bad offset %p, limit = %p\n", e, limit);
 		return -EINVAL;
 	}
@@ -1506,7 +1507,7 @@
 	}
 
 	/* For purposes of check_entry casting the compat entry is fine */
-	ret = check_entry((struct ipt_entry *)e, name);
+	ret = check_entry((struct ipt_entry *)e);
 	if (ret)
 		return ret;
 
@@ -1935,6 +1936,7 @@
 			 *len, sizeof(get) + get.size);
 		return -EINVAL;
 	}
+	get.name[sizeof(get.name) - 1] = '\0';
 
 	xt_compat_lock(AF_INET);
 	t = xt_find_table_lock(net, AF_INET, get.name);

diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 7b8fbb35..db5b875 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c

@@ -18,10 +18,10 @@
 #include <net/netfilter/nf_conntrack_synproxy.h>
 
 static struct iphdr *
-synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr)
+synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr,
+		  __be32 daddr)
 {
 	struct iphdr *iph;
-	struct net *net = sock_net(skb->sk);
 
 	skb_reset_network_header(skb);
 	iph = (struct iphdr *)skb_put(skb, sizeof(*iph));
@@ -40,14 +40,12 @@
 }
 
 static void
-synproxy_send_tcp(const struct synproxy_net *snet,
+synproxy_send_tcp(struct net *net,
 		  const struct sk_buff *skb, struct sk_buff *nskb,
 		  struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
 		  struct iphdr *niph, struct tcphdr *nth,
 		  unsigned int tcp_hdr_size)
 {
-	struct net *net = nf_ct_net(snet->tmpl);
-
 	nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0);
 	nskb->ip_summed   = CHECKSUM_PARTIAL;
 	nskb->csum_start  = (unsigned char *)nth - nskb->head;
@@ -72,7 +70,7 @@
 }
 
 static void
-synproxy_send_client_synack(const struct synproxy_net *snet,
+synproxy_send_client_synack(struct net *net,
 			    const struct sk_buff *skb, const struct tcphdr *th,
 			    const struct synproxy_options *opts)
 {
@@ -91,7 +89,7 @@
 		return;
 	skb_reserve(nskb, MAX_TCP_HEADER);
 
-	niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr);
+	niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr);
 
 	skb_reset_transport_header(nskb);
 	nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
@@ -109,15 +107,16 @@
 
 	synproxy_build_options(nth, opts);
 
-	synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+	synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
 			  niph, nth, tcp_hdr_size);
 }
 
 static void
-synproxy_send_server_syn(const struct synproxy_net *snet,
+synproxy_send_server_syn(struct net *net,
 			 const struct sk_buff *skb, const struct tcphdr *th,
 			 const struct synproxy_options *opts, u32 recv_seq)
 {
+	struct synproxy_net *snet = synproxy_pernet(net);
 	struct sk_buff *nskb;
 	struct iphdr *iph, *niph;
 	struct tcphdr *nth;
@@ -132,7 +131,7 @@
 		return;
 	skb_reserve(nskb, MAX_TCP_HEADER);
 
-	niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr);
+	niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr);
 
 	skb_reset_transport_header(nskb);
 	nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
@@ -153,12 +152,12 @@
 
 	synproxy_build_options(nth, opts);
 
-	synproxy_send_tcp(snet, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
+	synproxy_send_tcp(net, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
 			  niph, nth, tcp_hdr_size);
 }
 
 static void
-synproxy_send_server_ack(const struct synproxy_net *snet,
+synproxy_send_server_ack(struct net *net,
 			 const struct ip_ct_tcp *state,
 			 const struct sk_buff *skb, const struct tcphdr *th,
 			 const struct synproxy_options *opts)
@@ -177,7 +176,7 @@
 		return;
 	skb_reserve(nskb, MAX_TCP_HEADER);
 
-	niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr);
+	niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr);
 
 	skb_reset_transport_header(nskb);
 	nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
@@ -193,11 +192,11 @@
 
 	synproxy_build_options(nth, opts);
 
-	synproxy_send_tcp(snet, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+	synproxy_send_tcp(net, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
 }
 
 static void
-synproxy_send_client_ack(const struct synproxy_net *snet,
+synproxy_send_client_ack(struct net *net,
 			 const struct sk_buff *skb, const struct tcphdr *th,
 			 const struct synproxy_options *opts)
 {
@@ -215,7 +214,7 @@
 		return;
 	skb_reserve(nskb, MAX_TCP_HEADER);
 
-	niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr);
+	niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr);
 
 	skb_reset_transport_header(nskb);
 	nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
@@ -231,15 +230,16 @@
 
 	synproxy_build_options(nth, opts);
 
-	synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+	synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
 			  niph, nth, tcp_hdr_size);
 }
 
 static bool
-synproxy_recv_client_ack(const struct synproxy_net *snet,
+synproxy_recv_client_ack(struct net *net,
 			 const struct sk_buff *skb, const struct tcphdr *th,
 			 struct synproxy_options *opts, u32 recv_seq)
 {
+	struct synproxy_net *snet = synproxy_pernet(net);
 	int mss;
 
 	mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1);
@@ -255,7 +255,7 @@
 	if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
 		synproxy_check_timestamp_cookie(opts);
 
-	synproxy_send_server_syn(snet, skb, th, opts, recv_seq);
+	synproxy_send_server_syn(net, skb, th, opts, recv_seq);
 	return true;
 }
 
@@ -263,7 +263,8 @@
 synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_synproxy_info *info = par->targinfo;
-	struct synproxy_net *snet = synproxy_pernet(par->net);
+	struct net *net = par->net;
+	struct synproxy_net *snet = synproxy_pernet(net);
 	struct synproxy_options opts = {};
 	struct tcphdr *th, _th;
 
@@ -292,12 +293,12 @@
 					  XT_SYNPROXY_OPT_SACK_PERM |
 					  XT_SYNPROXY_OPT_ECN);
 
-		synproxy_send_client_synack(snet, skb, th, &opts);
+		synproxy_send_client_synack(net, skb, th, &opts);
 		return NF_DROP;
 
 	} else if (th->ack && !(th->fin || th->rst || th->syn)) {
 		/* ACK from client */
-		synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq));
+		synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq));
 		return NF_DROP;
 	}
 
@@ -308,7 +309,8 @@
 				       struct sk_buff *skb,
 				       const struct nf_hook_state *nhs)
 {
-	struct synproxy_net *snet = synproxy_pernet(nhs->net);
+	struct net *net = nhs->net;
+	struct synproxy_net *snet = synproxy_pernet(net);
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
 	struct nf_conn_synproxy *synproxy;
@@ -365,7 +367,7 @@
 			 * therefore we need to add 1 to make the SYN sequence
 			 * number match the one of first SYN.
 			 */
-			if (synproxy_recv_client_ack(snet, skb, th, &opts,
+			if (synproxy_recv_client_ack(net, skb, th, &opts,
 						     ntohl(th->seq) + 1))
 				this_cpu_inc(snet->stats->cookie_retrans);
 
@@ -391,12 +393,12 @@
 				  XT_SYNPROXY_OPT_SACK_PERM);
 
 		swap(opts.tsval, opts.tsecr);
-		synproxy_send_server_ack(snet, state, skb, th, &opts);
+		synproxy_send_server_ack(net, state, skb, th, &opts);
 
 		nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
 
 		swap(opts.tsval, opts.tsecr);
-		synproxy_send_client_ack(snet, skb, th, &opts);
+		synproxy_send_client_ack(net, skb, th, &opts);
 
 		consume_skb(skb);
 		return NF_STOLEN;

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 836abe5..08eed5e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c

@@ -2070,10 +2070,14 @@
 		if (!in_dev)
 			return;
 
-		ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
-				       iph->protocol);
-		if (!ours)
-			return;
+		/* we are supposed to accept bcast packets */
+		if (skb->pkt_type == PACKET_MULTICAST) {
+			ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
+					       iph->protocol);
+			if (!ours)
+				return;
+		}
+
 		sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
 						   uh->source, iph->saddr, dif);
 	} else if (skb->pkt_type == PACKET_HOST) {

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 8a3405a..0ed2daf 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c

@@ -56,6 +56,7 @@
 
 	/* setup inner skb. */
 	skb->encapsulation = 0;
+	SKB_GSO_CB(skb)->encap_level = 0;
 	__skb_pull(skb, tnl_hlen);
 	skb_reset_mac_header(skb);
 	skb_set_network_header(skb, skb_inner_network_offset(skb));
@@ -311,14 +312,14 @@
 	unsigned int off = skb_gro_offset(skb);
 	int flush = 1;
 
-	if (NAPI_GRO_CB(skb)->udp_mark ||
+	if (NAPI_GRO_CB(skb)->encap_mark ||
 	    (skb->ip_summed != CHECKSUM_PARTIAL &&
 	     NAPI_GRO_CB(skb)->csum_cnt == 0 &&
 	     !NAPI_GRO_CB(skb)->csum_valid))
 		goto out;
 
-	/* mark that this skb passed once through the udp gro layer */
-	NAPI_GRO_CB(skb)->udp_mark = 1;
+	/* mark that this skb passed once through the tunnel gro layer */
+	NAPI_GRO_CB(skb)->encap_mark = 1;
 
 	rcu_read_lock();
 	uo_priv = rcu_dereference(udp_offload_base);

diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index eeca943..82e9f30 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c

@@ -258,6 +258,19 @@
 	return pp;
 }
 
+static struct sk_buff **sit_gro_receive(struct sk_buff **head,
+					struct sk_buff *skb)
+{
+	if (NAPI_GRO_CB(skb)->encap_mark) {
+		NAPI_GRO_CB(skb)->flush = 1;
+		return NULL;
+	}
+
+	NAPI_GRO_CB(skb)->encap_mark = 1;
+
+	return ipv6_gro_receive(head, skb);
+}
+
 static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
 {
 	const struct net_offload *ops;
@@ -302,7 +315,7 @@
 static const struct net_offload sit_offload = {
 	.callbacks = {
 		.gso_segment	= ipv6_gso_segment,
-		.gro_receive    = ipv6_gro_receive,
+		.gro_receive    = sit_gro_receive,
 		.gro_complete   = sit_gro_complete,
 	},
 };

diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 84f9baf..86b67b7 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c

@@ -198,11 +198,12 @@
 
 /* All zeroes == unconditional rule. */
 /* Mildly perf critical (only if packet tracing is on) */
-static inline bool unconditional(const struct ip6t_ip6 *ipv6)
+static inline bool unconditional(const struct ip6t_entry *e)
 {
 	static const struct ip6t_ip6 uncond;
 
-	return memcmp(ipv6, &uncond, sizeof(uncond)) == 0;
+	return e->target_offset == sizeof(struct ip6t_entry) &&
+	       memcmp(&e->ipv6, &uncond, sizeof(uncond)) == 0;
 }
 
 static inline const struct xt_entry_target *
@@ -258,11 +259,10 @@
 	} else if (s == e) {
 		(*rulenum)++;
 
-		if (s->target_offset == sizeof(struct ip6t_entry) &&
+		if (unconditional(s) &&
 		    strcmp(t->target.u.kernel.target->name,
 			   XT_STANDARD_TARGET) == 0 &&
-		    t->verdict < 0 &&
-		    unconditional(&s->ipv6)) {
+		    t->verdict < 0) {
 			/* Tail of chains: STANDARD target (return/policy) */
 			*comment = *chainname == hookname
 				? comments[NF_IP6_TRACE_COMMENT_POLICY]
@@ -488,11 +488,10 @@
 			e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
 
 			/* Unconditional return/END. */
-			if ((e->target_offset == sizeof(struct ip6t_entry) &&
+			if ((unconditional(e) &&
 			     (strcmp(t->target.u.user.name,
 				     XT_STANDARD_TARGET) == 0) &&
-			     t->verdict < 0 &&
-			     unconditional(&e->ipv6)) || visited) {
+			     t->verdict < 0) || visited) {
 				unsigned int oldpos, size;
 
 				if ((strcmp(t->target.u.user.name,
@@ -581,14 +580,12 @@
 }
 
 static int
-check_entry(const struct ip6t_entry *e, const char *name)
+check_entry(const struct ip6t_entry *e)
 {
 	const struct xt_entry_target *t;
 
-	if (!ip6_checkentry(&e->ipv6)) {
-		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
+	if (!ip6_checkentry(&e->ipv6))
 		return -EINVAL;
-	}
 
 	if (e->target_offset + sizeof(struct xt_entry_target) >
 	    e->next_offset)
@@ -679,10 +676,6 @@
 	struct xt_mtchk_param mtpar;
 	struct xt_entry_match *ematch;
 
-	ret = check_entry(e, name);
-	if (ret)
-		return ret;
-
 	e->counters.pcnt = xt_percpu_counter_alloc();
 	if (IS_ERR_VALUE(e->counters.pcnt))
 		return -ENOMEM;
@@ -733,7 +726,7 @@
 	const struct xt_entry_target *t;
 	unsigned int verdict;
 
-	if (!unconditional(&e->ipv6))
+	if (!unconditional(e))
 		return false;
 	t = ip6t_get_target_c(e);
 	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
@@ -753,9 +746,11 @@
 			   unsigned int valid_hooks)
 {
 	unsigned int h;
+	int err;
 
 	if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 ||
-	    (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) {
+	    (unsigned char *)e + sizeof(struct ip6t_entry) >= limit ||
+	    (unsigned char *)e + e->next_offset > limit) {
 		duprintf("Bad offset %p\n", e);
 		return -EINVAL;
 	}
@@ -767,6 +762,10 @@
 		return -EINVAL;
 	}
 
+	err = check_entry(e);
+	if (err)
+		return err;
+
 	/* Check hooks & underflows */
 	for (h = 0; h < NF_INET_NUMHOOKS; h++) {
 		if (!(valid_hooks & (1 << h)))
@@ -775,9 +774,9 @@
 			newinfo->hook_entry[h] = hook_entries[h];
 		if ((unsigned char *)e - base == underflows[h]) {
 			if (!check_underflow(e)) {
-				pr_err("Underflows must be unconditional and "
-				       "use the STANDARD target with "
-				       "ACCEPT/DROP\n");
+				pr_debug("Underflows must be unconditional and "
+					 "use the STANDARD target with "
+					 "ACCEPT/DROP\n");
 				return -EINVAL;
 			}
 			newinfo->underflow[h] = underflows[h];
@@ -1169,6 +1168,7 @@
 			 *len, sizeof(get) + get.size);
 		return -EINVAL;
 	}
+	get.name[sizeof(get.name) - 1] = '\0';
 
 	t = xt_find_table_lock(net, AF_INET6, get.name);
 	if (!IS_ERR_OR_NULL(t)) {
@@ -1505,7 +1505,8 @@
 
 	duprintf("check_compat_entry_size_and_hooks %p\n", e);
 	if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 ||
-	    (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) {
+	    (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit ||
+	    (unsigned char *)e + e->next_offset > limit) {
 		duprintf("Bad offset %p, limit = %p\n", e, limit);
 		return -EINVAL;
 	}
@@ -1518,7 +1519,7 @@
 	}
 
 	/* For purposes of check_entry casting the compat entry is fine */
-	ret = check_entry((struct ip6t_entry *)e, name);
+	ret = check_entry((struct ip6t_entry *)e);
 	if (ret)
 		return ret;
 
@@ -1944,6 +1945,7 @@
 			 *len, sizeof(get) + get.size);
 		return -EINVAL;
 	}
+	get.name[sizeof(get.name) - 1] = '\0';
 
 	xt_compat_lock(AF_INET6);
 	t = xt_find_table_lock(net, AF_INET6, get.name);

diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 263a516..c382db7 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c

@@ -26,35 +26,6 @@
 #include <net/transp_v6.h>
 #include <net/ping.h>
 
-struct proto pingv6_prot = {
-	.name =		"PINGv6",
-	.owner =	THIS_MODULE,
-	.init =		ping_init_sock,
-	.close =	ping_close,
-	.connect =	ip6_datagram_connect_v6_only,
-	.disconnect =	udp_disconnect,
-	.setsockopt =	ipv6_setsockopt,
-	.getsockopt =	ipv6_getsockopt,
-	.sendmsg =	ping_v6_sendmsg,
-	.recvmsg =	ping_recvmsg,
-	.bind =		ping_bind,
-	.backlog_rcv =	ping_queue_rcv_skb,
-	.hash =		ping_hash,
-	.unhash =	ping_unhash,
-	.get_port =	ping_get_port,
-	.obj_size =	sizeof(struct raw6_sock),
-};
-EXPORT_SYMBOL_GPL(pingv6_prot);
-
-static struct inet_protosw pingv6_protosw = {
-	.type =      SOCK_DGRAM,
-	.protocol =  IPPROTO_ICMPV6,
-	.prot =      &pingv6_prot,
-	.ops =       &inet6_dgram_ops,
-	.flags =     INET_PROTOSW_REUSE,
-};
-
-
 /* Compatibility glue so we can support IPv6 when it's compiled as a module */
 static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
 				 int *addr_len)
@@ -77,7 +48,7 @@
 	return 0;
 }
 
-int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -192,6 +163,34 @@
 	return len;
 }
 
+struct proto pingv6_prot = {
+	.name =		"PINGv6",
+	.owner =	THIS_MODULE,
+	.init =		ping_init_sock,
+	.close =	ping_close,
+	.connect =	ip6_datagram_connect_v6_only,
+	.disconnect =	udp_disconnect,
+	.setsockopt =	ipv6_setsockopt,
+	.getsockopt =	ipv6_getsockopt,
+	.sendmsg =	ping_v6_sendmsg,
+	.recvmsg =	ping_recvmsg,
+	.bind =		ping_bind,
+	.backlog_rcv =	ping_queue_rcv_skb,
+	.hash =		ping_hash,
+	.unhash =	ping_unhash,
+	.get_port =	ping_get_port,
+	.obj_size =	sizeof(struct raw6_sock),
+};
+EXPORT_SYMBOL_GPL(pingv6_prot);
+
+static struct inet_protosw pingv6_protosw = {
+	.type =      SOCK_DGRAM,
+	.protocol =  IPPROTO_ICMPV6,
+	.prot =      &pingv6_prot,
+	.ops =       &inet6_dgram_ops,
+	.flags =     INET_PROTOSW_REUSE,
+};
+
 #ifdef CONFIG_PROC_FS
 static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos)
 {

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index f45b8ff..8338430 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c

@@ -681,14 +681,16 @@
 		skb->mac_header = skb->network_header;
 		skb_reset_network_header(skb);
 		IPCB(skb)->flags = 0;
-		skb->protocol = htons(ETH_P_IPV6);
+		skb->dev = tunnel->dev;
 
 		if (packet_is_spoofed(skb, iph, tunnel)) {
 			tunnel->dev->stats.rx_errors++;
 			goto out;
 		}
 
-		__skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
+		if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6),
+		    !net_eq(tunnel->net, dev_net(tunnel->dev))))
+			goto out;
 
 		err = IP_ECN_decapsulate(iph, skb);
 		if (unlikely(err)) {

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index fd25e44..8125931 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c

@@ -843,8 +843,8 @@
 		flush_stack(stack, count, skb, count - 1);
 	} else {
 		if (!inner_flushed)
-			UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
-					 proto == IPPROTO_UDPLITE);
+			UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
+					  proto == IPPROTO_UDPLITE);
 		consume_skb(skb);
 	}
 	return 0;

diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index b0bc475..2e8e7e5 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h

@@ -95,7 +95,7 @@
 	if (!nested)
 		goto nla_put_failure;
 	if (mtype_do_head(skb, map) ||
-	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
+	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
 	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
 		goto nla_put_failure;
 	if (unlikely(ip_set_put_flags(skb, set)))

diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 7e6568c..a748b0c 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c

@@ -497,6 +497,26 @@
 	write_unlock_bh(&ip_set_ref_lock);
 }
 
+/* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need
+ * a separate reference counter
+ */
+static inline void
+__ip_set_get_netlink(struct ip_set *set)
+{
+	write_lock_bh(&ip_set_ref_lock);
+	set->ref_netlink++;
+	write_unlock_bh(&ip_set_ref_lock);
+}
+
+static inline void
+__ip_set_put_netlink(struct ip_set *set)
+{
+	write_lock_bh(&ip_set_ref_lock);
+	BUG_ON(set->ref_netlink == 0);
+	set->ref_netlink--;
+	write_unlock_bh(&ip_set_ref_lock);
+}
+
 /* Add, del and test set entries from kernel.
  *
  * The set behind the index must exist and must be referenced
@@ -1002,7 +1022,7 @@
 	if (!attr[IPSET_ATTR_SETNAME]) {
 		for (i = 0; i < inst->ip_set_max; i++) {
 			s = ip_set(inst, i);
-			if (s && s->ref) {
+			if (s && (s->ref || s->ref_netlink)) {
 				ret = -IPSET_ERR_BUSY;
 				goto out;
 			}
@@ -1024,7 +1044,7 @@
 		if (!s) {
 			ret = -ENOENT;
 			goto out;
-		} else if (s->ref) {
+		} else if (s->ref || s->ref_netlink) {
 			ret = -IPSET_ERR_BUSY;
 			goto out;
 		}
@@ -1171,6 +1191,9 @@
 	      from->family == to->family))
 		return -IPSET_ERR_TYPE_MISMATCH;
 
+	if (from->ref_netlink || to->ref_netlink)
+		return -EBUSY;
+
 	strncpy(from_name, from->name, IPSET_MAXNAMELEN);
 	strncpy(from->name, to->name, IPSET_MAXNAMELEN);
 	strncpy(to->name, from_name, IPSET_MAXNAMELEN);
@@ -1206,7 +1229,7 @@
 		if (set->variant->uref)
 			set->variant->uref(set, cb, false);
 		pr_debug("release set %s\n", set->name);
-		__ip_set_put_byindex(inst, index);
+		__ip_set_put_netlink(set);
 	}
 	return 0;
 }
@@ -1328,7 +1351,7 @@
 		if (!cb->args[IPSET_CB_ARG0]) {
 			/* Start listing: make sure set won't be destroyed */
 			pr_debug("reference set\n");
-			set->ref++;
+			set->ref_netlink++;
 		}
 		write_unlock_bh(&ip_set_ref_lock);
 		nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
@@ -1396,7 +1419,7 @@
 		if (set->variant->uref)
 			set->variant->uref(set, cb, false);
 		pr_debug("release set %s\n", set->name);
-		__ip_set_put_byindex(inst, index);
+		__ip_set_put_netlink(set);
 		cb->args[IPSET_CB_ARG0] = 0;
 	}
 out:

diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index e5336ab..d32fd6b 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h

@@ -1082,7 +1082,7 @@
 	if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask))
 		goto nla_put_failure;
 #endif
-	if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
+	if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
 	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
 		goto nla_put_failure;
 	if (unlikely(ip_set_put_flags(skb, set)))

diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 24c6c19..a2a89e4 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c

@@ -458,7 +458,7 @@
 	if (!nested)
 		goto nla_put_failure;
 	if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
-	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
+	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
 	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
 			  htonl(sizeof(*map) + n * set->dsize)))
 		goto nla_put_failure;

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 7542999..cb5b630 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c

@@ -582,7 +582,12 @@
 	/* nfnetlink_unicast will either free the nskb or add it to a socket */
 	err = nfnetlink_unicast(nskb, net, queue->peer_portid, MSG_DONTWAIT);
 	if (err < 0) {
-		queue->queue_user_dropped++;
+		if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
+			failopen = 1;
+			err = 0;
+		} else {
+			queue->queue_user_dropped++;
+		}
 		goto err_out_unlock;
 	}
 

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c841679..215fc08 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c

@@ -1033,6 +1033,14 @@
 	return 0;
 }
 
+static int netlink_ioctl(struct socket *sock, unsigned int cmd,
+			 unsigned long arg)
+{
+	/* try to hand this ioctl down to the NIC drivers.
+	 */
+	return -ENOIOCTLCMD;
+}
+
 static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
 {
 	struct sock *sock;
@@ -2494,7 +2502,7 @@
 	.accept =	sock_no_accept,
 	.getname =	netlink_getname,
 	.poll =		datagram_poll,
-	.ioctl =	sock_no_ioctl,
+	.ioctl =	netlink_ioctl,
 	.listen =	sock_no_listen,
 	.shutdown =	sock_no_shutdown,
 	.setsockopt =	netlink_setsockopt,

diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 234a733..ce94729 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig

@@ -7,7 +7,9 @@
 	depends on INET
 	depends on !NF_CONNTRACK || \
 		   (NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \
-				     (!NF_NAT || NF_NAT)))
+				     (!NF_NAT || NF_NAT) && \
+				     (!NF_NAT_IPV4 || NF_NAT_IPV4) && \
+				     (!NF_NAT_IPV6 || NF_NAT_IPV6)))
 	select LIBCRC32C
 	select MPLS
 	select NET_MPLS_GSO

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index dc5eb29..1b9d286 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c

@@ -535,14 +535,15 @@
 	switch (ctinfo) {
 	case IP_CT_RELATED:
 	case IP_CT_RELATED_REPLY:
-		if (skb->protocol == htons(ETH_P_IP) &&
+		if (IS_ENABLED(CONFIG_NF_NAT_IPV4) &&
+		    skb->protocol == htons(ETH_P_IP) &&
 		    ip_hdr(skb)->protocol == IPPROTO_ICMP) {
 			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
 							   hooknum))
 				err = NF_DROP;
 			goto push;
-#if IS_ENABLED(CONFIG_NF_NAT_IPV6)
-		} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		} else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) &&
+			   skb->protocol == htons(ETH_P_IPV6)) {
 			__be16 frag_off;
 			u8 nexthdr = ipv6_hdr(skb)->nexthdr;
 			int hdrlen = ipv6_skip_exthdr(skb,
@@ -557,7 +558,6 @@
 					err = NF_DROP;
 				goto push;
 			}
-#endif
 		}
 		/* Non-ICMP, fall thru to initialize if needed. */
 	case IP_CT_NEW:
@@ -664,11 +664,12 @@
 
 	/* Determine NAT type.
 	 * Check if the NAT type can be deduced from the tracked connection.
-	 * Make sure expected traffic is NATted only when committing.
+	 * Make sure new expected connections (IP_CT_RELATED) are NATted only
+	 * when committing.
 	 */
 	if (info->nat & OVS_CT_NAT && ctinfo != IP_CT_NEW &&
 	    ct->status & IPS_NAT_MASK &&
-	    (!(ct->status & IPS_EXPECTED_BIT) || info->commit)) {
+	    (ctinfo != IP_CT_RELATED || info->commit)) {
 		/* NAT an established or related connection like before. */
 		if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY)
 			/* This is the REPLY direction for a connection
@@ -968,7 +969,8 @@
 			break;
 
 		case OVS_NAT_ATTR_IP_MIN:
-			nla_memcpy(&info->range.min_addr, a, nla_len(a));
+			nla_memcpy(&info->range.min_addr, a,
+				   sizeof(info->range.min_addr));
 			info->range.flags |= NF_NAT_RANGE_MAP_IPS;
 			break;
 
@@ -1238,7 +1240,8 @@
 	}
 
 	if (info->range.flags & NF_NAT_RANGE_MAP_IPS) {
-		if (info->family == NFPROTO_IPV4) {
+		if (IS_ENABLED(CONFIG_NF_NAT_IPV4) &&
+		    info->family == NFPROTO_IPV4) {
 			if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN,
 					    info->range.min_addr.ip) ||
 			    (info->range.max_addr.ip
@@ -1246,8 +1249,8 @@
 			     (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX,
 					      info->range.max_addr.ip))))
 				return false;
-#if IS_ENABLED(CONFIG_NF_NAT_IPV6)
-		} else if (info->family == NFPROTO_IPV6) {
+		} else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) &&
+			   info->family == NFPROTO_IPV6) {
 			if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN,
 					     &info->range.min_addr.in6) ||
 			    (memcmp(&info->range.max_addr.in6,
@@ -1256,7 +1259,6 @@
 			     (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MAX,
 					       &info->range.max_addr.in6))))
 				return false;
-#endif
 		} else {
 			return false;
 		}

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index a19b3e6..e1849f3 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c

@@ -1406,7 +1406,8 @@
 	list_for_each_entry(t, &asoc->peer.transport_addr_list,
 				transports) {
 		if (t->pmtu_pending && t->dst) {
-			sctp_transport_update_pmtu(sk, t, dst_mtu(t->dst));
+			sctp_transport_update_pmtu(sk, t,
+						   WORD_TRUNC(dst_mtu(t->dst)));
 			t->pmtu_pending = 0;
 		}
 		if (!pmtu || (t->pathmtu < pmtu))

diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 871cdf9..401c607 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c

@@ -111,7 +111,8 @@
 	dest->port = src->port;
 
 	list_for_each_entry(addr, &src->address_list, list) {
-		error = sctp_add_bind_addr(dest, &addr->a, 1, gfp);
+		error = sctp_add_bind_addr(dest, &addr->a, sizeof(addr->a),
+					   1, gfp);
 		if (error < 0)
 			break;
 	}
@@ -150,7 +151,7 @@
 
 /* Add an address to the bind address list in the SCTP_bind_addr structure. */
 int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
-		       __u8 addr_state, gfp_t gfp)
+		       int new_size, __u8 addr_state, gfp_t gfp)
 {
 	struct sctp_sockaddr_entry *addr;
 
@@ -159,7 +160,7 @@
 	if (!addr)
 		return -ENOMEM;
 
-	memcpy(&addr->a, new, sizeof(*new));
+	memcpy(&addr->a, new, min_t(size_t, sizeof(*new), new_size));
 
 	/* Fix up the port if it has not yet been set.
 	 * Both v4 and v6 have the port at the same offset.
@@ -291,7 +292,8 @@
 		}
 
 		af->from_addr_param(&addr, rawaddr, htons(port), 0);
-		retval = sctp_add_bind_addr(bp, &addr, SCTP_ADDR_SRC, gfp);
+		retval = sctp_add_bind_addr(bp, &addr, sizeof(addr),
+					    SCTP_ADDR_SRC, gfp);
 		if (retval) {
 			/* Can't finish building the list, clean up. */
 			sctp_bind_addr_clean(bp);
@@ -453,8 +455,8 @@
 		    (((AF_INET6 == addr->sa.sa_family) &&
 		      (flags & SCTP_ADDR6_ALLOWED) &&
 		      (flags & SCTP_ADDR6_PEERSUPP))))
-			error = sctp_add_bind_addr(dest, addr, SCTP_ADDR_SRC,
-						    gfp);
+			error = sctp_add_bind_addr(dest, addr, sizeof(*addr),
+						   SCTP_ADDR_SRC, gfp);
 	}
 
 	return error;

diff --git a/net/sctp/input.c b/net/sctp/input.c
index db76f1a..00b8445 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c

@@ -606,7 +606,8 @@
 
 		/* PMTU discovery (RFC1191) */
 		if (ICMP_FRAG_NEEDED == code) {
-			sctp_icmp_frag_needed(sk, asoc, transport, info);
+			sctp_icmp_frag_needed(sk, asoc, transport,
+					      WORD_TRUNC(info));
 			goto out_unlock;
 		} else {
 			if (ICMP_PROT_UNREACH == code) {

diff --git a/net/sctp/output.c b/net/sctp/output.c
index 736c004..9774535 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c

@@ -401,7 +401,7 @@
 	sk = chunk->skb->sk;
 
 	/* Allocate the new skb.  */
-	nskb = alloc_skb(packet->size + MAX_HEADER, GFP_ATOMIC);
+	nskb = alloc_skb(packet->size + MAX_HEADER, gfp);
 	if (!nskb)
 		goto nomem;
 
@@ -523,8 +523,8 @@
 	 */
 	if (auth)
 		sctp_auth_calculate_hmac(asoc, nskb,
-					(struct sctp_auth_chunk *)auth,
-					GFP_ATOMIC);
+					 (struct sctp_auth_chunk *)auth,
+					 gfp);
 
 	/* 2) Calculate the Adler-32 checksum of the whole packet,
 	 *    including the SCTP common header and all the

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index f03541d..8d3d362 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c

@@ -978,8 +978,12 @@
 			     (new_transport->state == SCTP_UNCONFIRMED) ||
 			     (new_transport->state == SCTP_PF)))
 				new_transport = asoc->peer.active_path;
-			if (new_transport->state == SCTP_UNCONFIRMED)
+			if (new_transport->state == SCTP_UNCONFIRMED) {
+				WARN_ONCE(1, "Atempt to send packet on unconfirmed path.");
+				sctp_chunk_fail(chunk, 0);
+				sctp_chunk_free(chunk);
 				continue;
+			}
 
 			/* Change packets if necessary.  */
 			if (new_transport != transport) {

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1099e99..d3d50da 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c

@@ -216,6 +216,7 @@
 			      (copy_flags & SCTP_ADDR6_ALLOWED) &&
 			      (copy_flags & SCTP_ADDR6_PEERSUPP)))) {
 				error = sctp_add_bind_addr(bp, &addr->a,
+						    sizeof(addr->a),
 						    SCTP_ADDR_SRC, GFP_ATOMIC);
 				if (error)
 					goto end_copy;

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index e47abf2..7f0bf79 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c

@@ -1849,7 +1849,8 @@
 	/* Also, add the destination address. */
 	if (list_empty(&retval->base.bind_addr.address_list)) {
 		sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest,
-				SCTP_ADDR_SRC, GFP_ATOMIC);
+				   sizeof(chunk->dest), SCTP_ADDR_SRC,
+				   GFP_ATOMIC);
 	}
 
 	retval->next_tsn = retval->c.initial_tsn;

diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 3c22c41..7fe56d0 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c

@@ -215,10 +215,14 @@
 		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
 				SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
 	} else {
+		__u32 old_a_rwnd = asoc->a_rwnd;
+
 		asoc->a_rwnd = asoc->rwnd;
 		sack = sctp_make_sack(asoc);
-		if (!sack)
+		if (!sack) {
+			asoc->a_rwnd = old_a_rwnd;
 			goto nomem;
+		}
 
 		asoc->peer.sack_needed = 0;
 		asoc->peer.sack_cnt = 0;

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 96e0811..878d28e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c

@@ -386,7 +386,8 @@
 	/* Add the address to the bind address list.
 	 * Use GFP_ATOMIC since BHs will be disabled.
 	 */
-	ret = sctp_add_bind_addr(bp, addr, SCTP_ADDR_SRC, GFP_ATOMIC);
+	ret = sctp_add_bind_addr(bp, addr, af->sockaddr_len,
+				 SCTP_ADDR_SRC, GFP_ATOMIC);
 
 	/* Copy back into socket for getsockname() use. */
 	if (!ret) {
@@ -577,6 +578,7 @@
 			af = sctp_get_af_specific(addr->v4.sin_family);
 			memcpy(&saveaddr, addr, af->sockaddr_len);
 			retval = sctp_add_bind_addr(bp, &saveaddr,
+						    sizeof(saveaddr),
 						    SCTP_ADDR_NEW, GFP_ATOMIC);
 			addr_buf += af->sockaddr_len;
 		}
@@ -1389,7 +1391,7 @@
 	int err = 0;
 
 #ifdef CONFIG_COMPAT
-	if (is_compat_task()) {
+	if (in_compat_syscall()) {
 		struct compat_sctp_getaddrs_old param32;
 
 		if (len < sizeof(param32))

diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index d517153..9b6b48c 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c

@@ -226,7 +226,7 @@
 	}
 
 	if (transport->dst) {
-		transport->pathmtu = dst_mtu(transport->dst);
+		transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst));
 	} else
 		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
 }
@@ -280,7 +280,7 @@
 		return;
 	}
 	if (transport->dst) {
-		transport->pathmtu = dst_mtu(transport->dst);
+		transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst));
 
 		/* Initialize sk->sk_rcv_saddr, if the transport is the
 		 * association's active path for getsockname().

diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index b512fbd..ea7ffa1 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile

@@ -12,7 +12,8 @@
 	    svc.o svcsock.o svcauth.o svcauth_unix.o \
 	    addr.o rpcb_clnt.o timer.o xdr.o \
 	    sunrpc_syms.o cache.o rpc_pipe.o \
-	    svc_xprt.o
+	    svc_xprt.o \
+	    xprtmultipath.o
 sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o
 sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o
 sunrpc-$(CONFIG_PROC_FS) += stats.o

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index cabf586..15612ff 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c

@@ -1181,12 +1181,12 @@
 gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
 	struct gss_auth *gss_auth;
-	struct rpc_xprt *xprt = rcu_access_pointer(clnt->cl_xprt);
+	struct rpc_xprt_switch *xps = rcu_access_pointer(clnt->cl_xpi.xpi_xpswitch);
 
 	while (clnt != clnt->cl_parent) {
 		struct rpc_clnt *parent = clnt->cl_parent;
 		/* Find the original parent for this transport */
-		if (rcu_access_pointer(parent->cl_xprt) != xprt)
+		if (rcu_access_pointer(parent->cl_xpi.xpi_xpswitch) != xps)
 			break;
 		clnt = parent;
 	}
@@ -1728,8 +1728,8 @@
 		return 0;
 	}
 
-	first = snd_buf->page_base >> PAGE_CACHE_SHIFT;
-	last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_CACHE_SHIFT;
+	first = snd_buf->page_base >> PAGE_SHIFT;
+	last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_SHIFT;
 	rqstp->rq_enc_pages_num = last - first + 1 + 1;
 	rqstp->rq_enc_pages
 		= kmalloc(rqstp->rq_enc_pages_num * sizeof(struct page *),
@@ -1775,10 +1775,10 @@
 	status = alloc_enc_pages(rqstp);
 	if (status)
 		return status;
-	first = snd_buf->page_base >> PAGE_CACHE_SHIFT;
+	first = snd_buf->page_base >> PAGE_SHIFT;
 	inpages = snd_buf->pages + first;
 	snd_buf->pages = rqstp->rq_enc_pages;
-	snd_buf->page_base -= first << PAGE_CACHE_SHIFT;
+	snd_buf->page_base -= first << PAGE_SHIFT;
 	/*
 	 * Give the tail its own page, in case we need extra space in the
 	 * head when wrapping:

diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index d94a8e1..045e11e 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c

@@ -465,7 +465,7 @@
 	page_pos = desc->pos - outbuf->head[0].iov_len;
 	if (page_pos >= 0 && page_pos < outbuf->page_len) {
 		/* pages are not in place: */
-		int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT;
+		int i = (page_pos + outbuf->page_base) >> PAGE_SHIFT;
 		in_page = desc->pages[i];
 	} else {
 		in_page = sg_page(sg);

diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 765088e4..a737c2d 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c

@@ -79,9 +79,9 @@
 		len -= buf->head[0].iov_len;
 	if (len <= buf->page_len) {
 		unsigned int last = (buf->page_base + len - 1)
-					>>PAGE_CACHE_SHIFT;
+					>>PAGE_SHIFT;
 		unsigned int offset = (buf->page_base + len - 1)
-					& (PAGE_CACHE_SIZE - 1);
+					& (PAGE_SIZE - 1);
 		ptr = kmap_atomic(buf->pages[last]);
 		pad = *(ptr + offset);
 		kunmap_atomic(ptr);

diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index c2a2b58..8d9eb4d 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c

@@ -113,8 +113,8 @@
 
 static
 struct rpc_auth null_auth = {
-	.au_cslack	= 4,
-	.au_rslack	= 2,
+	.au_cslack	= NUL_CALLSLACK,
+	.au_rslack	= NUL_REPLYSLACK,
 	.au_ops		= &authnull_ops,
 	.au_flavor	= RPC_AUTH_NULL,
 	.au_count	= ATOMIC_INIT(0),

diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 548240d..0d3dd36 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c

@@ -23,8 +23,6 @@
 };
 #define uc_uid			uc_base.cr_uid
 
-#define UNX_WRITESLACK		(21 + XDR_QUADLEN(UNX_MAXNODENAME))
-
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
@@ -228,8 +226,8 @@
 
 static
 struct rpc_auth		unix_auth = {
-	.au_cslack	= UNX_WRITESLACK,
-	.au_rslack	= 2,			/* assume AUTH_NULL verf */
+	.au_cslack	= UNX_CALLSLACK,
+	.au_rslack	= NUL_REPLYSLACK,
 	.au_ops		= &authunix_ops,
 	.au_flavor	= RPC_AUTH_UNIX,
 	.au_count	= ATOMIC_INIT(0),

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 273bc3a..553bf95 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c

@@ -881,7 +881,7 @@
 	char *kaddr;
 	ssize_t ret = -ENOMEM;
 
-	if (count >= PAGE_CACHE_SIZE)
+	if (count >= PAGE_SIZE)
 		goto out_slow;
 
 	page = find_or_create_page(mapping, 0, GFP_KERNEL);
@@ -892,7 +892,7 @@
 	ret = cache_do_downcall(kaddr, buf, count, cd);
 	kunmap(page);
 	unlock_page(page);
-	page_cache_release(page);
+	put_page(page);
 	return ret;
 out_slow:
 	return cache_slow_downcall(buf, count, cd);
@@ -1182,14 +1182,14 @@
 	}
 
 	crq->q.reader = 0;
-	crq->item = cache_get(h);
 	crq->buf = buf;
 	crq->len = 0;
 	crq->readers = 0;
 	spin_lock(&queue_lock);
-	if (test_bit(CACHE_PENDING, &h->flags))
+	if (test_bit(CACHE_PENDING, &h->flags)) {
+		crq->item = cache_get(h);
 		list_add_tail(&crq->q.list, &detail->queue);
-	else
+	} else
 		/* Lost a race, no longer PENDING, so don't enqueue */
 		ret = -EAGAIN;
 	spin_unlock(&queue_lock);

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index b7f2104..7e0c9bf 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c

@@ -354,6 +354,7 @@
 }
 
 static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
+		struct rpc_xprt_switch *xps,
 		struct rpc_xprt *xprt,
 		struct rpc_clnt *parent)
 {
@@ -411,6 +412,8 @@
 	}
 
 	rpc_clnt_set_transport(clnt, xprt, timeout);
+	xprt_iter_init(&clnt->cl_xpi, xps);
+	xprt_switch_put(xps);
 
 	clnt->cl_rtt = &clnt->cl_rtt_default;
 	rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
@@ -438,6 +441,7 @@
 out_err:
 	rpciod_down();
 out_no_rpciod:
+	xprt_switch_put(xps);
 	xprt_put(xprt);
 	return ERR_PTR(err);
 }
@@ -446,8 +450,13 @@
 					struct rpc_xprt *xprt)
 {
 	struct rpc_clnt *clnt = NULL;
+	struct rpc_xprt_switch *xps;
 
-	clnt = rpc_new_client(args, xprt, NULL);
+	xps = xprt_switch_alloc(xprt, GFP_KERNEL);
+	if (xps == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	clnt = rpc_new_client(args, xps, xprt, NULL);
 	if (IS_ERR(clnt))
 		return clnt;
 
@@ -564,6 +573,7 @@
 static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
 					   struct rpc_clnt *clnt)
 {
+	struct rpc_xprt_switch *xps;
 	struct rpc_xprt *xprt;
 	struct rpc_clnt *new;
 	int err;
@@ -571,13 +581,17 @@
 	err = -ENOMEM;
 	rcu_read_lock();
 	xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
+	xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
 	rcu_read_unlock();
-	if (xprt == NULL)
+	if (xprt == NULL || xps == NULL) {
+		xprt_put(xprt);
+		xprt_switch_put(xps);
 		goto out_err;
+	}
 	args->servername = xprt->servername;
 	args->nodename = clnt->cl_nodename;
 
-	new = rpc_new_client(args, xprt, clnt);
+	new = rpc_new_client(args, xps, xprt, clnt);
 	if (IS_ERR(new)) {
 		err = PTR_ERR(new);
 		goto out_err;
@@ -657,6 +671,7 @@
 {
 	const struct rpc_timeout *old_timeo;
 	rpc_authflavor_t pseudoflavor;
+	struct rpc_xprt_switch *xps, *oldxps;
 	struct rpc_xprt *xprt, *old;
 	struct rpc_clnt *parent;
 	int err;
@@ -668,10 +683,17 @@
 		return PTR_ERR(xprt);
 	}
 
+	xps = xprt_switch_alloc(xprt, GFP_KERNEL);
+	if (xps == NULL) {
+		xprt_put(xprt);
+		return -ENOMEM;
+	}
+
 	pseudoflavor = clnt->cl_auth->au_flavor;
 
 	old_timeo = clnt->cl_timeout;
 	old = rpc_clnt_set_transport(clnt, xprt, timeout);
+	oldxps = xprt_iter_xchg_switch(&clnt->cl_xpi, xps);
 
 	rpc_unregister_client(clnt);
 	__rpc_clnt_remove_pipedir(clnt);
@@ -697,20 +719,74 @@
 	synchronize_rcu();
 	if (parent != clnt)
 		rpc_release_client(parent);
+	xprt_switch_put(oldxps);
 	xprt_put(old);
 	dprintk("RPC:       replaced xprt for clnt %p\n", clnt);
 	return 0;
 
 out_revert:
+	xps = xprt_iter_xchg_switch(&clnt->cl_xpi, oldxps);
 	rpc_clnt_set_transport(clnt, old, old_timeo);
 	clnt->cl_parent = parent;
 	rpc_client_register(clnt, pseudoflavor, NULL);
+	xprt_switch_put(xps);
 	xprt_put(xprt);
 	dprintk("RPC:       failed to switch xprt for clnt %p\n", clnt);
 	return err;
 }
 EXPORT_SYMBOL_GPL(rpc_switch_client_transport);
 
+static
+int rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi)
+{
+	struct rpc_xprt_switch *xps;
+
+	rcu_read_lock();
+	xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
+	rcu_read_unlock();
+	if (xps == NULL)
+		return -EAGAIN;
+	xprt_iter_init_listall(xpi, xps);
+	xprt_switch_put(xps);
+	return 0;
+}
+
+/**
+ * rpc_clnt_iterate_for_each_xprt - Apply a function to all transports
+ * @clnt: pointer to client
+ * @fn: function to apply
+ * @data: void pointer to function data
+ *
+ * Iterates through the list of RPC transports currently attached to the
+ * client and applies the function fn(clnt, xprt, data).
+ *
+ * On error, the iteration stops, and the function returns the error value.
+ */
+int rpc_clnt_iterate_for_each_xprt(struct rpc_clnt *clnt,
+		int (*fn)(struct rpc_clnt *, struct rpc_xprt *, void *),
+		void *data)
+{
+	struct rpc_xprt_iter xpi;
+	int ret;
+
+	ret = rpc_clnt_xprt_iter_init(clnt, &xpi);
+	if (ret)
+		return ret;
+	for (;;) {
+		struct rpc_xprt *xprt = xprt_iter_get_next(&xpi);
+
+		if (!xprt)
+			break;
+		ret = fn(clnt, xprt, data);
+		xprt_put(xprt);
+		if (ret < 0)
+			break;
+	}
+	xprt_iter_destroy(&xpi);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_iterate_for_each_xprt);
+
 /*
  * Kill all tasks for the given client.
  * XXX: kill their descendants as well?
@@ -783,6 +859,7 @@
 	rpc_free_iostats(clnt->cl_metrics);
 	clnt->cl_metrics = NULL;
 	xprt_put(rcu_dereference_raw(clnt->cl_xprt));
+	xprt_iter_destroy(&clnt->cl_xpi);
 	rpciod_down();
 	rpc_free_clid(clnt);
 	kfree(clnt);
@@ -868,6 +945,7 @@
 void rpc_task_release_client(struct rpc_task *task)
 {
 	struct rpc_clnt *clnt = task->tk_client;
+	struct rpc_xprt *xprt = task->tk_xprt;
 
 	if (clnt != NULL) {
 		/* Remove from client task list */
@@ -878,13 +956,22 @@
 
 		rpc_release_client(clnt);
 	}
+
+	if (xprt != NULL) {
+		task->tk_xprt = NULL;
+
+		xprt_put(xprt);
+	}
 }
 
 static
 void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
 {
+
 	if (clnt != NULL) {
 		rpc_task_release_client(task);
+		if (task->tk_xprt == NULL)
+			task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi);
 		task->tk_client = clnt;
 		atomic_inc(&clnt->cl_count);
 		if (clnt->cl_softrtry)
@@ -900,14 +987,6 @@
 	}
 }
 
-void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt)
-{
-	rpc_task_release_client(task);
-	rpc_task_set_client(task, clnt);
-}
-EXPORT_SYMBOL_GPL(rpc_task_reset_client);
-
-
 static void
 rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
 {
@@ -2104,11 +2183,9 @@
 	}
 	if (RPC_IS_SOFT(task)) {
 		if (clnt->cl_chatty) {
-			rcu_read_lock();
 			printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
 				clnt->cl_program->name,
-				rcu_dereference(clnt->cl_xprt)->servername);
-			rcu_read_unlock();
+				task->tk_xprt->servername);
 		}
 		if (task->tk_flags & RPC_TASK_TIMEOUT)
 			rpc_exit(task, -ETIMEDOUT);
@@ -2120,11 +2197,9 @@
 	if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {
 		task->tk_flags |= RPC_CALL_MAJORSEEN;
 		if (clnt->cl_chatty) {
-			rcu_read_lock();
 			printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
 			clnt->cl_program->name,
-			rcu_dereference(clnt->cl_xprt)->servername);
-			rcu_read_unlock();
+			task->tk_xprt->servername);
 		}
 	}
 	rpc_force_rebind(clnt);
@@ -2154,11 +2229,9 @@
 
 	if (task->tk_flags & RPC_CALL_MAJORSEEN) {
 		if (clnt->cl_chatty) {
-			rcu_read_lock();
 			printk(KERN_NOTICE "%s: server %s OK\n",
 				clnt->cl_program->name,
-				rcu_dereference(clnt->cl_xprt)->servername);
-			rcu_read_unlock();
+				task->tk_xprt->servername);
 		}
 		task->tk_flags &= ~RPC_CALL_MAJORSEEN;
 	}
@@ -2312,11 +2385,9 @@
 			task->tk_action = call_bind;
 			goto out_retry;
 		case RPC_AUTH_TOOWEAK:
-			rcu_read_lock();
 			printk(KERN_NOTICE "RPC: server %s requires stronger "
 			       "authentication.\n",
-			       rcu_dereference(clnt->cl_xprt)->servername);
-			rcu_read_unlock();
+			       task->tk_xprt->servername);
 			break;
 		default:
 			dprintk("RPC: %5u %s: unknown auth error: %x\n",
@@ -2341,27 +2412,27 @@
 	case RPC_SUCCESS:
 		return p;
 	case RPC_PROG_UNAVAIL:
-		dprintk_rcu("RPC: %5u %s: program %u is unsupported "
+		dprintk("RPC: %5u %s: program %u is unsupported "
 				"by server %s\n", task->tk_pid, __func__,
 				(unsigned int)clnt->cl_prog,
-				rcu_dereference(clnt->cl_xprt)->servername);
+				task->tk_xprt->servername);
 		error = -EPFNOSUPPORT;
 		goto out_err;
 	case RPC_PROG_MISMATCH:
-		dprintk_rcu("RPC: %5u %s: program %u, version %u unsupported "
+		dprintk("RPC: %5u %s: program %u, version %u unsupported "
 				"by server %s\n", task->tk_pid, __func__,
 				(unsigned int)clnt->cl_prog,
 				(unsigned int)clnt->cl_vers,
-				rcu_dereference(clnt->cl_xprt)->servername);
+				task->tk_xprt->servername);
 		error = -EPROTONOSUPPORT;
 		goto out_err;
 	case RPC_PROC_UNAVAIL:
-		dprintk_rcu("RPC: %5u %s: proc %s unsupported by program %u, "
+		dprintk("RPC: %5u %s: proc %s unsupported by program %u, "
 				"version %u on server %s\n",
 				task->tk_pid, __func__,
 				rpc_proc_name(task),
 				clnt->cl_prog, clnt->cl_vers,
-				rcu_dereference(clnt->cl_xprt)->servername);
+				task->tk_xprt->servername);
 		error = -EOPNOTSUPP;
 		goto out_err;
 	case RPC_GARBAGE_ARGS:
@@ -2421,7 +2492,10 @@
 	return err;
 }
 
-struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags)
+static
+struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
+		struct rpc_xprt *xprt, struct rpc_cred *cred, int flags,
+		const struct rpc_call_ops *ops, void *data)
 {
 	struct rpc_message msg = {
 		.rpc_proc = &rpcproc_null,
@@ -2429,14 +2503,140 @@
 	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = clnt,
+		.rpc_xprt = xprt,
 		.rpc_message = &msg,
-		.callback_ops = &rpc_default_ops,
+		.callback_ops = (ops != NULL) ? ops : &rpc_default_ops,
+		.callback_data = data,
 		.flags = flags,
 	};
+
 	return rpc_run_task(&task_setup_data);
 }
+
+struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags)
+{
+	return rpc_call_null_helper(clnt, NULL, cred, flags, NULL, NULL);
+}
 EXPORT_SYMBOL_GPL(rpc_call_null);
 
+struct rpc_cb_add_xprt_calldata {
+	struct rpc_xprt_switch *xps;
+	struct rpc_xprt *xprt;
+};
+
+static void rpc_cb_add_xprt_done(struct rpc_task *task, void *calldata)
+{
+	struct rpc_cb_add_xprt_calldata *data = calldata;
+
+	if (task->tk_status == 0)
+		rpc_xprt_switch_add_xprt(data->xps, data->xprt);
+}
+
+static void rpc_cb_add_xprt_release(void *calldata)
+{
+	struct rpc_cb_add_xprt_calldata *data = calldata;
+
+	xprt_put(data->xprt);
+	xprt_switch_put(data->xps);
+	kfree(data);
+}
+
+const static struct rpc_call_ops rpc_cb_add_xprt_call_ops = {
+	.rpc_call_done = rpc_cb_add_xprt_done,
+	.rpc_release = rpc_cb_add_xprt_release,
+};
+
+/**
+ * rpc_clnt_test_and_add_xprt - Test and add a new transport to a rpc_clnt
+ * @clnt: pointer to struct rpc_clnt
+ * @xps: pointer to struct rpc_xprt_switch,
+ * @xprt: pointer struct rpc_xprt
+ * @dummy: unused
+ */
+int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
+		struct rpc_xprt_switch *xps, struct rpc_xprt *xprt,
+		void *dummy)
+{
+	struct rpc_cb_add_xprt_calldata *data;
+	struct rpc_cred *cred;
+	struct rpc_task *task;
+
+	data = kmalloc(sizeof(*data), GFP_NOFS);
+	if (!data)
+		return -ENOMEM;
+	data->xps = xprt_switch_get(xps);
+	data->xprt = xprt_get(xprt);
+
+	cred = authnull_ops.lookup_cred(NULL, NULL, 0);
+	task = rpc_call_null_helper(clnt, xprt, cred,
+			RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC,
+			&rpc_cb_add_xprt_call_ops, data);
+	put_rpccred(cred);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+	rpc_put_task(task);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_test_and_add_xprt);
+
+/**
+ * rpc_clnt_add_xprt - Add a new transport to a rpc_clnt
+ * @clnt: pointer to struct rpc_clnt
+ * @xprtargs: pointer to struct xprt_create
+ * @setup: callback to test and/or set up the connection
+ * @data: pointer to setup function data
+ *
+ * Creates a new transport using the parameters set in args and
+ * adds it to clnt.
+ * If ping is set, then test that connectivity succeeds before
+ * adding the new transport.
+ *
+ */
+int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
+		struct xprt_create *xprtargs,
+		int (*setup)(struct rpc_clnt *,
+			struct rpc_xprt_switch *,
+			struct rpc_xprt *,
+			void *),
+		void *data)
+{
+	struct rpc_xprt_switch *xps;
+	struct rpc_xprt *xprt;
+	unsigned char resvport;
+	int ret = 0;
+
+	rcu_read_lock();
+	xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
+	xprt = xprt_iter_xprt(&clnt->cl_xpi);
+	if (xps == NULL || xprt == NULL) {
+		rcu_read_unlock();
+		return -EAGAIN;
+	}
+	resvport = xprt->resvport;
+	rcu_read_unlock();
+
+	xprt = xprt_create_transport(xprtargs);
+	if (IS_ERR(xprt)) {
+		ret = PTR_ERR(xprt);
+		goto out_put_switch;
+	}
+	xprt->resvport = resvport;
+
+	rpc_xprt_switch_set_roundrobin(xps);
+	if (setup) {
+		ret = setup(clnt, xps, xprt, data);
+		if (ret != 0)
+			goto out_put_xprt;
+	}
+	rpc_xprt_switch_add_xprt(xps, xprt);
+out_put_xprt:
+	xprt_put(xprt);
+out_put_switch:
+	xprt_switch_put(xps);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt);
+
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 static void rpc_show_header(void)
 {
@@ -2483,57 +2683,39 @@
 #endif
 
 #if IS_ENABLED(CONFIG_SUNRPC_SWAP)
+static int
+rpc_clnt_swap_activate_callback(struct rpc_clnt *clnt,
+		struct rpc_xprt *xprt,
+		void *dummy)
+{
+	return xprt_enable_swap(xprt);
+}
+
 int
 rpc_clnt_swap_activate(struct rpc_clnt *clnt)
 {
-	int ret = 0;
-	struct rpc_xprt	*xprt;
-
-	if (atomic_inc_return(&clnt->cl_swapper) == 1) {
-retry:
-		rcu_read_lock();
-		xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
-		rcu_read_unlock();
-		if (!xprt) {
-			/*
-			 * If we didn't get a reference, then we likely are
-			 * racing with a migration event. Wait for a grace
-			 * period and try again.
-			 */
-			synchronize_rcu();
-			goto retry;
-		}
-
-		ret = xprt_enable_swap(xprt);
-		xprt_put(xprt);
-	}
-	return ret;
+	if (atomic_inc_return(&clnt->cl_swapper) == 1)
+		return rpc_clnt_iterate_for_each_xprt(clnt,
+				rpc_clnt_swap_activate_callback, NULL);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(rpc_clnt_swap_activate);
 
+static int
+rpc_clnt_swap_deactivate_callback(struct rpc_clnt *clnt,
+		struct rpc_xprt *xprt,
+		void *dummy)
+{
+	xprt_disable_swap(xprt);
+	return 0;
+}
+
 void
 rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
 {
-	struct rpc_xprt	*xprt;
-
-	if (atomic_dec_if_positive(&clnt->cl_swapper) == 0) {
-retry:
-		rcu_read_lock();
-		xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
-		rcu_read_unlock();
-		if (!xprt) {
-			/*
-			 * If we didn't get a reference, then we likely are
-			 * racing with a migration event. Wait for a grace
-			 * period and try again.
-			 */
-			synchronize_rcu();
-			goto retry;
-		}
-
-		xprt_disable_swap(xprt);
-		xprt_put(xprt);
-	}
+	if (atomic_dec_if_positive(&clnt->cl_swapper) == 0)
+		rpc_clnt_iterate_for_each_xprt(clnt,
+				rpc_clnt_swap_deactivate_callback, NULL);
 }
 EXPORT_SYMBOL_GPL(rpc_clnt_swap_deactivate);
 #endif /* CONFIG_SUNRPC_SWAP */

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 31789ef..fc48eca 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c

@@ -1390,8 +1390,8 @@
 	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 	int err;
 
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
 	sb->s_magic = RPCAUTH_GSSMAGIC;
 	sb->s_op = &s_ops;
 	sb->s_d_op = &simple_dentry_operations;

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index cf5770d..5b30603 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c

@@ -648,10 +648,10 @@
 static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt)
 {
 	struct rpc_clnt *parent = clnt->cl_parent;
-	struct rpc_xprt *xprt = rcu_dereference(clnt->cl_xprt);
+	struct rpc_xprt_switch *xps = rcu_access_pointer(clnt->cl_xpi.xpi_xpswitch);
 
 	while (parent != clnt) {
-		if (rcu_dereference(parent->cl_xprt) != xprt)
+		if (rcu_access_pointer(parent->cl_xpi.xpi_xpswitch) != xps)
 			break;
 		if (clnt->cl_autobind)
 			break;
@@ -683,11 +683,9 @@
 	int status;
 
 	rcu_read_lock();
-	do {
-		clnt = rpcb_find_transport_owner(task->tk_client);
-		xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
-	} while (xprt == NULL);
+	clnt = rpcb_find_transport_owner(task->tk_client);
 	rcu_read_unlock();
+	xprt = xprt_get(task->tk_xprt);
 
 	dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
 		task->tk_pid, __func__,

diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 73ad57a..fcfd48d 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c

@@ -909,6 +909,8 @@
 	/* Initialize workqueue for async tasks */
 	task->tk_workqueue = task_setup_data->workqueue;
 
+	task->tk_xprt = xprt_get(task_setup_data->rpc_xprt);
+
 	if (task->tk_ops->rpc_call_prepare != NULL)
 		task->tk_action = rpc_prepare_task;
 

diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 2df87f7..de70c78 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c

@@ -96,8 +96,8 @@
 	if (base || xdr->page_base) {
 		pglen -= base;
 		base += xdr->page_base;
-		ppage += base >> PAGE_CACHE_SHIFT;
-		base &= ~PAGE_CACHE_MASK;
+		ppage += base >> PAGE_SHIFT;
+		base &= ~PAGE_MASK;
 	}
 	do {
 		char *kaddr;
@@ -113,7 +113,7 @@
 			}
 		}
 
-		len = PAGE_CACHE_SIZE;
+		len = PAGE_SIZE;
 		kaddr = kmap_atomic(*ppage);
 		if (base) {
 			len -= base;

diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 4439ac4..6bdb386 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c

@@ -164,7 +164,7 @@
  * Note: the addresses pgto_base and pgfrom_base are both calculated in
  *       the same way:
  *            if a memory area starts at byte 'base' in page 'pages[i]',
- *            then its address is given as (i << PAGE_CACHE_SHIFT) + base
+ *            then its address is given as (i << PAGE_SHIFT) + base
  * Also note: pgfrom_base must be < pgto_base, but the memory areas
  * 	they point to may overlap.
  */
@@ -181,20 +181,20 @@
 	pgto_base += len;
 	pgfrom_base += len;
 
-	pgto = pages + (pgto_base >> PAGE_CACHE_SHIFT);
-	pgfrom = pages + (pgfrom_base >> PAGE_CACHE_SHIFT);
+	pgto = pages + (pgto_base >> PAGE_SHIFT);
+	pgfrom = pages + (pgfrom_base >> PAGE_SHIFT);
 
-	pgto_base &= ~PAGE_CACHE_MASK;
-	pgfrom_base &= ~PAGE_CACHE_MASK;
+	pgto_base &= ~PAGE_MASK;
+	pgfrom_base &= ~PAGE_MASK;
 
 	do {
 		/* Are any pointers crossing a page boundary? */
 		if (pgto_base == 0) {
-			pgto_base = PAGE_CACHE_SIZE;
+			pgto_base = PAGE_SIZE;
 			pgto--;
 		}
 		if (pgfrom_base == 0) {
-			pgfrom_base = PAGE_CACHE_SIZE;
+			pgfrom_base = PAGE_SIZE;
 			pgfrom--;
 		}
 
@@ -236,11 +236,11 @@
 	char *vto;
 	size_t copy;
 
-	pgto = pages + (pgbase >> PAGE_CACHE_SHIFT);
-	pgbase &= ~PAGE_CACHE_MASK;
+	pgto = pages + (pgbase >> PAGE_SHIFT);
+	pgbase &= ~PAGE_MASK;
 
 	for (;;) {
-		copy = PAGE_CACHE_SIZE - pgbase;
+		copy = PAGE_SIZE - pgbase;
 		if (copy > len)
 			copy = len;
 
@@ -253,7 +253,7 @@
 			break;
 
 		pgbase += copy;
-		if (pgbase == PAGE_CACHE_SIZE) {
+		if (pgbase == PAGE_SIZE) {
 			flush_dcache_page(*pgto);
 			pgbase = 0;
 			pgto++;
@@ -280,11 +280,11 @@
 	char *vfrom;
 	size_t copy;
 
-	pgfrom = pages + (pgbase >> PAGE_CACHE_SHIFT);
-	pgbase &= ~PAGE_CACHE_MASK;
+	pgfrom = pages + (pgbase >> PAGE_SHIFT);
+	pgbase &= ~PAGE_MASK;
 
 	do {
-		copy = PAGE_CACHE_SIZE - pgbase;
+		copy = PAGE_SIZE - pgbase;
 		if (copy > len)
 			copy = len;
 
@@ -293,7 +293,7 @@
 		kunmap_atomic(vfrom);
 
 		pgbase += copy;
-		if (pgbase == PAGE_CACHE_SIZE) {
+		if (pgbase == PAGE_SIZE) {
 			pgbase = 0;
 			pgfrom++;
 		}
@@ -1038,8 +1038,8 @@
 	if (base < buf->page_len) {
 		subbuf->page_len = min(buf->page_len - base, len);
 		base += buf->page_base;
-		subbuf->page_base = base & ~PAGE_CACHE_MASK;
-		subbuf->pages = &buf->pages[base >> PAGE_CACHE_SHIFT];
+		subbuf->page_base = base & ~PAGE_MASK;
+		subbuf->pages = &buf->pages[base >> PAGE_SHIFT];
 		len -= subbuf->page_len;
 		base = 0;
 	} else {
@@ -1297,9 +1297,9 @@
 		todo -= avail_here;
 
 		base += buf->page_base;
-		ppages = buf->pages + (base >> PAGE_CACHE_SHIFT);
-		base &= ~PAGE_CACHE_MASK;
-		avail_page = min_t(unsigned int, PAGE_CACHE_SIZE - base,
+		ppages = buf->pages + (base >> PAGE_SHIFT);
+		base &= ~PAGE_MASK;
+		avail_page = min_t(unsigned int, PAGE_SIZE - base,
 					avail_here);
 		c = kmap(*ppages) + base;
 
@@ -1383,7 +1383,7 @@
 			}
 
 			avail_page = min(avail_here,
-				 (unsigned int) PAGE_CACHE_SIZE);
+				 (unsigned int) PAGE_SIZE);
 		}
 		base = buf->page_len;  /* align to start of tail */
 	}
@@ -1479,9 +1479,9 @@
 		if (page_len > len)
 			page_len = len;
 		len -= page_len;
-		page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1);
-		i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT;
-		thislen = PAGE_CACHE_SIZE - page_offset;
+		page_offset = (offset + buf->page_base) & (PAGE_SIZE - 1);
+		i = (offset + buf->page_base) >> PAGE_SHIFT;
+		thislen = PAGE_SIZE - page_offset;
 		do {
 			if (thislen > page_len)
 				thislen = page_len;
@@ -1492,7 +1492,7 @@
 			page_len -= thislen;
 			i++;
 			page_offset = 0;
-			thislen = PAGE_CACHE_SIZE;
+			thislen = PAGE_SIZE;
 		} while (page_len != 0);
 		offset = 0;
 	}

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 37edea6..216a138 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c

@@ -48,6 +48,7 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/metrics.h>
 #include <linux/sunrpc/bc_xprt.h>
+#include <linux/rcupdate.h>
 
 #include <trace/events/sunrpc.h>
 
@@ -1166,7 +1167,7 @@
 {
 	put_net(xprt->xprt_net);
 	xprt_free_all_slots(xprt);
-	kfree(xprt);
+	kfree_rcu(xprt, rcu);
 }
 EXPORT_SYMBOL_GPL(xprt_free);
 
@@ -1180,7 +1181,7 @@
  */
 void xprt_reserve(struct rpc_task *task)
 {
-	struct rpc_xprt	*xprt;
+	struct rpc_xprt *xprt = task->tk_xprt;
 
 	task->tk_status = 0;
 	if (task->tk_rqstp != NULL)
@@ -1188,11 +1189,8 @@
 
 	task->tk_timeout = 0;
 	task->tk_status = -EAGAIN;
-	rcu_read_lock();
-	xprt = rcu_dereference(task->tk_client->cl_xprt);
 	if (!xprt_throttle_congested(xprt, task))
 		xprt->ops->alloc_slot(xprt, task);
-	rcu_read_unlock();
 }
 
 /**
@@ -1206,7 +1204,7 @@
  */
 void xprt_retry_reserve(struct rpc_task *task)
 {
-	struct rpc_xprt	*xprt;
+	struct rpc_xprt *xprt = task->tk_xprt;
 
 	task->tk_status = 0;
 	if (task->tk_rqstp != NULL)
@@ -1214,10 +1212,7 @@
 
 	task->tk_timeout = 0;
 	task->tk_status = -EAGAIN;
-	rcu_read_lock();
-	xprt = rcu_dereference(task->tk_client->cl_xprt);
 	xprt->ops->alloc_slot(xprt, task);
-	rcu_read_unlock();
 }
 
 static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
@@ -1264,11 +1259,9 @@
 
 	if (req == NULL) {
 		if (task->tk_client) {
-			rcu_read_lock();
-			xprt = rcu_dereference(task->tk_client->cl_xprt);
+			xprt = task->tk_xprt;
 			if (xprt->snd_task == task)
 				xprt_release_write(xprt, task);
-			rcu_read_unlock();
 		}
 		return;
 	}
@@ -1307,7 +1300,7 @@
 
 static void xprt_init(struct rpc_xprt *xprt, struct net *net)
 {
-	atomic_set(&xprt->count, 1);
+	kref_init(&xprt->kref);
 
 	spin_lock_init(&xprt->transport_lock);
 	spin_lock_init(&xprt->reserve_lock);
@@ -1318,6 +1311,7 @@
 	spin_lock_init(&xprt->bc_pa_lock);
 	INIT_LIST_HEAD(&xprt->bc_pa_list);
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
+	INIT_LIST_HEAD(&xprt->xprt_switch);
 
 	xprt->last_used = jiffies;
 	xprt->cwnd = RPC_INITCWND;
@@ -1415,6 +1409,24 @@
 	xprt->ops->destroy(xprt);
 }
 
+static void xprt_destroy_kref(struct kref *kref)
+{
+	xprt_destroy(container_of(kref, struct rpc_xprt, kref));
+}
+
+/**
+ * xprt_get - return a reference to an RPC transport.
+ * @xprt: pointer to the transport
+ *
+ */
+struct rpc_xprt *xprt_get(struct rpc_xprt *xprt)
+{
+	if (xprt != NULL && kref_get_unless_zero(&xprt->kref))
+		return xprt;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(xprt_get);
+
 /**
  * xprt_put - release a reference to an RPC transport.
  * @xprt: pointer to the transport
@@ -1422,7 +1434,7 @@
  */
 void xprt_put(struct rpc_xprt *xprt)
 {
-	if (atomic_dec_and_test(&xprt->count))
-		xprt_destroy(xprt);
+	if (xprt != NULL)
+		kref_put(&xprt->kref, xprt_destroy_kref);
 }
 EXPORT_SYMBOL_GPL(xprt_put);

diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
new file mode 100644
index 0000000..e7fd769
--- /dev/null
+++ b/net/sunrpc/xprtmultipath.c

@@ -0,0 +1,475 @@
+/*
+ * Multipath support for RPC
+ *
+ * Copyright (c) 2015, 2016, Primary Data, Inc. All rights reserved.
+ *
+ * Trond Myklebust <trond.myklebust@primarydata.com>
+ *
+ */
+#include <linux/types.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/rculist.h>
+#include <linux/slab.h>
+#include <asm/cmpxchg.h>
+#include <linux/spinlock.h>
+#include <linux/sunrpc/xprt.h>
+#include <linux/sunrpc/xprtmultipath.h>
+
+typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct list_head *head,
+		const struct rpc_xprt *cur);
+
+static const struct rpc_xprt_iter_ops rpc_xprt_iter_singular;
+static const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin;
+static const struct rpc_xprt_iter_ops rpc_xprt_iter_listall;
+
+static void xprt_switch_add_xprt_locked(struct rpc_xprt_switch *xps,
+		struct rpc_xprt *xprt)
+{
+	if (unlikely(xprt_get(xprt) == NULL))
+		return;
+	list_add_tail_rcu(&xprt->xprt_switch, &xps->xps_xprt_list);
+	smp_wmb();
+	if (xps->xps_nxprts == 0)
+		xps->xps_net = xprt->xprt_net;
+	xps->xps_nxprts++;
+}
+
+/**
+ * rpc_xprt_switch_add_xprt - Add a new rpc_xprt to an rpc_xprt_switch
+ * @xps: pointer to struct rpc_xprt_switch
+ * @xprt: pointer to struct rpc_xprt
+ *
+ * Adds xprt to the end of the list of struct rpc_xprt in xps.
+ */
+void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps,
+		struct rpc_xprt *xprt)
+{
+	if (xprt == NULL)
+		return;
+	spin_lock(&xps->xps_lock);
+	if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL)
+		xprt_switch_add_xprt_locked(xps, xprt);
+	spin_unlock(&xps->xps_lock);
+}
+
+static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps,
+		struct rpc_xprt *xprt)
+{
+	if (unlikely(xprt == NULL))
+		return;
+	xps->xps_nxprts--;
+	if (xps->xps_nxprts == 0)
+		xps->xps_net = NULL;
+	smp_wmb();
+	list_del_rcu(&xprt->xprt_switch);
+}
+
+/**
+ * rpc_xprt_switch_remove_xprt - Removes an rpc_xprt from a rpc_xprt_switch
+ * @xps: pointer to struct rpc_xprt_switch
+ * @xprt: pointer to struct rpc_xprt
+ *
+ * Removes xprt from the list of struct rpc_xprt in xps.
+ */
+void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps,
+		struct rpc_xprt *xprt)
+{
+	spin_lock(&xps->xps_lock);
+	xprt_switch_remove_xprt_locked(xps, xprt);
+	spin_unlock(&xps->xps_lock);
+	xprt_put(xprt);
+}
+
+/**
+ * xprt_switch_alloc - Allocate a new struct rpc_xprt_switch
+ * @xprt: pointer to struct rpc_xprt
+ * @gfp_flags: allocation flags
+ *
+ * On success, returns an initialised struct rpc_xprt_switch, containing
+ * the entry xprt. Returns NULL on failure.
+ */
+struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt,
+		gfp_t gfp_flags)
+{
+	struct rpc_xprt_switch *xps;
+
+	xps = kmalloc(sizeof(*xps), gfp_flags);
+	if (xps != NULL) {
+		spin_lock_init(&xps->xps_lock);
+		kref_init(&xps->xps_kref);
+		xps->xps_nxprts = 0;
+		INIT_LIST_HEAD(&xps->xps_xprt_list);
+		xps->xps_iter_ops = &rpc_xprt_iter_singular;
+		xprt_switch_add_xprt_locked(xps, xprt);
+	}
+
+	return xps;
+}
+
+static void xprt_switch_free_entries(struct rpc_xprt_switch *xps)
+{
+	spin_lock(&xps->xps_lock);
+	while (!list_empty(&xps->xps_xprt_list)) {
+		struct rpc_xprt *xprt;
+
+		xprt = list_first_entry(&xps->xps_xprt_list,
+				struct rpc_xprt, xprt_switch);
+		xprt_switch_remove_xprt_locked(xps, xprt);
+		spin_unlock(&xps->xps_lock);
+		xprt_put(xprt);
+		spin_lock(&xps->xps_lock);
+	}
+	spin_unlock(&xps->xps_lock);
+}
+
+static void xprt_switch_free(struct kref *kref)
+{
+	struct rpc_xprt_switch *xps = container_of(kref,
+			struct rpc_xprt_switch, xps_kref);
+
+	xprt_switch_free_entries(xps);
+	kfree_rcu(xps, xps_rcu);
+}
+
+/**
+ * xprt_switch_get - Return a reference to a rpc_xprt_switch
+ * @xps: pointer to struct rpc_xprt_switch
+ *
+ * Returns a reference to xps unless the refcount is already zero.
+ */
+struct rpc_xprt_switch *xprt_switch_get(struct rpc_xprt_switch *xps)
+{
+	if (xps != NULL && kref_get_unless_zero(&xps->xps_kref))
+		return xps;
+	return NULL;
+}
+
+/**
+ * xprt_switch_put - Release a reference to a rpc_xprt_switch
+ * @xps: pointer to struct rpc_xprt_switch
+ *
+ * Release the reference to xps, and free it once the refcount is zero.
+ */
+void xprt_switch_put(struct rpc_xprt_switch *xps)
+{
+	if (xps != NULL)
+		kref_put(&xps->xps_kref, xprt_switch_free);
+}
+
+/**
+ * rpc_xprt_switch_set_roundrobin - Set a round-robin policy on rpc_xprt_switch
+ * @xps: pointer to struct rpc_xprt_switch
+ *
+ * Sets a round-robin default policy for iterators acting on xps.
+ */
+void rpc_xprt_switch_set_roundrobin(struct rpc_xprt_switch *xps)
+{
+	if (READ_ONCE(xps->xps_iter_ops) != &rpc_xprt_iter_roundrobin)
+		WRITE_ONCE(xps->xps_iter_ops, &rpc_xprt_iter_roundrobin);
+}
+
+static
+const struct rpc_xprt_iter_ops *xprt_iter_ops(const struct rpc_xprt_iter *xpi)
+{
+	if (xpi->xpi_ops != NULL)
+		return xpi->xpi_ops;
+	return rcu_dereference(xpi->xpi_xpswitch)->xps_iter_ops;
+}
+
+static
+void xprt_iter_no_rewind(struct rpc_xprt_iter *xpi)
+{
+}
+
+static
+void xprt_iter_default_rewind(struct rpc_xprt_iter *xpi)
+{
+	WRITE_ONCE(xpi->xpi_cursor, NULL);
+}
+
+static
+struct rpc_xprt *xprt_switch_find_first_entry(struct list_head *head)
+{
+	return list_first_or_null_rcu(head, struct rpc_xprt, xprt_switch);
+}
+
+static
+struct rpc_xprt *xprt_iter_first_entry(struct rpc_xprt_iter *xpi)
+{
+	struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
+
+	if (xps == NULL)
+		return NULL;
+	return xprt_switch_find_first_entry(&xps->xps_xprt_list);
+}
+
+static
+struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head,
+		const struct rpc_xprt *cur)
+{
+	struct rpc_xprt *pos;
+
+	list_for_each_entry_rcu(pos, head, xprt_switch) {
+		if (cur == pos)
+			return pos;
+	}
+	return NULL;
+}
+
+static
+struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi)
+{
+	struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
+	struct list_head *head;
+
+	if (xps == NULL)
+		return NULL;
+	head = &xps->xps_xprt_list;
+	if (xpi->xpi_cursor == NULL || xps->xps_nxprts < 2)
+		return xprt_switch_find_first_entry(head);
+	return xprt_switch_find_current_entry(head, xpi->xpi_cursor);
+}
+
+static
+struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
+		const struct rpc_xprt *cur)
+{
+	struct rpc_xprt *pos, *prev = NULL;
+
+	list_for_each_entry_rcu(pos, head, xprt_switch) {
+		if (cur == prev)
+			return pos;
+		prev = pos;
+	}
+	return NULL;
+}
+
+static
+struct rpc_xprt *xprt_switch_set_next_cursor(struct list_head *head,
+		struct rpc_xprt **cursor,
+		xprt_switch_find_xprt_t find_next)
+{
+	struct rpc_xprt *cur, *pos, *old;
+
+	cur = READ_ONCE(*cursor);
+	for (;;) {
+		old = cur;
+		pos = find_next(head, old);
+		if (pos == NULL)
+			break;
+		cur = cmpxchg_relaxed(cursor, old, pos);
+		if (cur == old)
+			break;
+	}
+	return pos;
+}
+
+static
+struct rpc_xprt *xprt_iter_next_entry_multiple(struct rpc_xprt_iter *xpi,
+		xprt_switch_find_xprt_t find_next)
+{
+	struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
+	struct list_head *head;
+
+	if (xps == NULL)
+		return NULL;
+	head = &xps->xps_xprt_list;
+	if (xps->xps_nxprts < 2)
+		return xprt_switch_find_first_entry(head);
+	return xprt_switch_set_next_cursor(head, &xpi->xpi_cursor, find_next);
+}
+
+static
+struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct list_head *head,
+		const struct rpc_xprt *cur)
+{
+	struct rpc_xprt *ret;
+
+	ret = xprt_switch_find_next_entry(head, cur);
+	if (ret != NULL)
+		return ret;
+	return xprt_switch_find_first_entry(head);
+}
+
+static
+struct rpc_xprt *xprt_iter_next_entry_roundrobin(struct rpc_xprt_iter *xpi)
+{
+	return xprt_iter_next_entry_multiple(xpi,
+			xprt_switch_find_next_entry_roundrobin);
+}
+
+static
+struct rpc_xprt *xprt_iter_next_entry_all(struct rpc_xprt_iter *xpi)
+{
+	return xprt_iter_next_entry_multiple(xpi, xprt_switch_find_next_entry);
+}
+
+/*
+ * xprt_iter_rewind - Resets the xprt iterator
+ * @xpi: pointer to rpc_xprt_iter
+ *
+ * Resets xpi to ensure that it points to the first entry in the list
+ * of transports.
+ */
+static
+void xprt_iter_rewind(struct rpc_xprt_iter *xpi)
+{
+	rcu_read_lock();
+	xprt_iter_ops(xpi)->xpi_rewind(xpi);
+	rcu_read_unlock();
+}
+
+static void __xprt_iter_init(struct rpc_xprt_iter *xpi,
+		struct rpc_xprt_switch *xps,
+		const struct rpc_xprt_iter_ops *ops)
+{
+	rcu_assign_pointer(xpi->xpi_xpswitch, xprt_switch_get(xps));
+	xpi->xpi_cursor = NULL;
+	xpi->xpi_ops = ops;
+}
+
+/**
+ * xprt_iter_init - Initialise an xprt iterator
+ * @xpi: pointer to rpc_xprt_iter
+ * @xps: pointer to rpc_xprt_switch
+ *
+ * Initialises the iterator to use the default iterator ops
+ * as set in xps. This function is mainly intended for internal
+ * use in the rpc_client.
+ */
+void xprt_iter_init(struct rpc_xprt_iter *xpi,
+		struct rpc_xprt_switch *xps)
+{
+	__xprt_iter_init(xpi, xps, NULL);
+}
+
+/**
+ * xprt_iter_init_listall - Initialise an xprt iterator
+ * @xpi: pointer to rpc_xprt_iter
+ * @xps: pointer to rpc_xprt_switch
+ *
+ * Initialises the iterator to iterate once through the entire list
+ * of entries in xps.
+ */
+void xprt_iter_init_listall(struct rpc_xprt_iter *xpi,
+		struct rpc_xprt_switch *xps)
+{
+	__xprt_iter_init(xpi, xps, &rpc_xprt_iter_listall);
+}
+
+/**
+ * xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch
+ * @xpi: pointer to rpc_xprt_iter
+ * @xps: pointer to a new rpc_xprt_switch or NULL
+ *
+ * Swaps out the existing xpi->xpi_xpswitch with a new value.
+ */
+struct rpc_xprt_switch *xprt_iter_xchg_switch(struct rpc_xprt_iter *xpi,
+		struct rpc_xprt_switch *newswitch)
+{
+	struct rpc_xprt_switch __rcu *oldswitch;
+
+	/* Atomically swap out the old xpswitch */
+	oldswitch = xchg(&xpi->xpi_xpswitch, RCU_INITIALIZER(newswitch));
+	if (newswitch != NULL)
+		xprt_iter_rewind(xpi);
+	return rcu_dereference_protected(oldswitch, true);
+}
+
+/**
+ * xprt_iter_destroy - Destroys the xprt iterator
+ * @xpi pointer to rpc_xprt_iter
+ */
+void xprt_iter_destroy(struct rpc_xprt_iter *xpi)
+{
+	xprt_switch_put(xprt_iter_xchg_switch(xpi, NULL));
+}
+
+/**
+ * xprt_iter_xprt - Returns the rpc_xprt pointed to by the cursor
+ * @xpi: pointer to rpc_xprt_iter
+ *
+ * Returns a pointer to the struct rpc_xprt that is currently
+ * pointed to by the cursor.
+ * Caller must be holding rcu_read_lock().
+ */
+struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	return xprt_iter_ops(xpi)->xpi_xprt(xpi);
+}
+
+static
+struct rpc_xprt *xprt_iter_get_helper(struct rpc_xprt_iter *xpi,
+		struct rpc_xprt *(*fn)(struct rpc_xprt_iter *))
+{
+	struct rpc_xprt *ret;
+
+	do {
+		ret = fn(xpi);
+		if (ret == NULL)
+			break;
+		ret = xprt_get(ret);
+	} while (ret == NULL);
+	return ret;
+}
+
+/**
+ * xprt_iter_get_xprt - Returns the rpc_xprt pointed to by the cursor
+ * @xpi: pointer to rpc_xprt_iter
+ *
+ * Returns a reference to the struct rpc_xprt that is currently
+ * pointed to by the cursor.
+ */
+struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi)
+{
+	struct rpc_xprt *xprt;
+
+	rcu_read_lock();
+	xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_xprt);
+	rcu_read_unlock();
+	return xprt;
+}
+
+/**
+ * xprt_iter_get_next - Returns the next rpc_xprt following the cursor
+ * @xpi: pointer to rpc_xprt_iter
+ *
+ * Returns a reference to the struct rpc_xprt that immediately follows the
+ * entry pointed to by the cursor.
+ */
+struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi)
+{
+	struct rpc_xprt *xprt;
+
+	rcu_read_lock();
+	xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_next);
+	rcu_read_unlock();
+	return xprt;
+}
+
+/* Policy for always returning the first entry in the rpc_xprt_switch */
+static
+const struct rpc_xprt_iter_ops rpc_xprt_iter_singular = {
+	.xpi_rewind = xprt_iter_no_rewind,
+	.xpi_xprt = xprt_iter_first_entry,
+	.xpi_next = xprt_iter_first_entry,
+};
+
+/* Policy for round-robin iteration of entries in the rpc_xprt_switch */
+static
+const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin = {
+	.xpi_rewind = xprt_iter_default_rewind,
+	.xpi_xprt = xprt_iter_current_entry,
+	.xpi_next = xprt_iter_next_entry_roundrobin,
+};
+
+/* Policy for once-through iteration of entries in the rpc_xprt_switch */
+static
+const struct rpc_xprt_iter_ops rpc_xprt_iter_listall = {
+	.xpi_rewind = xprt_iter_default_rewind,
+	.xpi_xprt = xprt_iter_current_entry,
+	.xpi_next = xprt_iter_next_entry_all,
+};

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index c14f3a4..b289e10 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c

@@ -80,13 +80,13 @@
 		if (!r)
 			goto out;
 
-		r->r.fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES *
-					     sizeof(u64), GFP_KERNEL);
-		if (!r->r.fmr.physaddrs)
+		r->fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES *
+					   sizeof(u64), GFP_KERNEL);
+		if (!r->fmr.physaddrs)
 			goto out_free;
 
-		r->r.fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
-		if (IS_ERR(r->r.fmr.fmr))
+		r->fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
+		if (IS_ERR(r->fmr.fmr))
 			goto out_fmr_err;
 
 		list_add(&r->mw_list, &buf->rb_mws);
@@ -95,9 +95,9 @@
 	return 0;
 
 out_fmr_err:
-	rc = PTR_ERR(r->r.fmr.fmr);
+	rc = PTR_ERR(r->fmr.fmr);
 	dprintk("RPC:       %s: ib_alloc_fmr status %i\n", __func__, rc);
-	kfree(r->r.fmr.physaddrs);
+	kfree(r->fmr.physaddrs);
 out_free:
 	kfree(r);
 out:
@@ -109,7 +109,7 @@
 {
 	LIST_HEAD(l);
 
-	list_add(&r->r.fmr.fmr->list, &l);
+	list_add(&r->fmr.fmr->list, &l);
 	return ib_unmap_fmr(&l);
 }
 
@@ -148,7 +148,7 @@
 		nsegs = RPCRDMA_MAX_FMR_SGES;
 	for (i = 0; i < nsegs;) {
 		rpcrdma_map_one(device, seg, direction);
-		mw->r.fmr.physaddrs[i] = seg->mr_dma;
+		mw->fmr.physaddrs[i] = seg->mr_dma;
 		len += seg->mr_len;
 		++seg;
 		++i;
@@ -158,13 +158,13 @@
 			break;
 	}
 
-	rc = ib_map_phys_fmr(mw->r.fmr.fmr, mw->r.fmr.physaddrs,
+	rc = ib_map_phys_fmr(mw->fmr.fmr, mw->fmr.physaddrs,
 			     i, seg1->mr_dma);
 	if (rc)
 		goto out_maperr;
 
 	seg1->rl_mw = mw;
-	seg1->mr_rkey = mw->r.fmr.fmr->rkey;
+	seg1->mr_rkey = mw->fmr.fmr->rkey;
 	seg1->mr_base = seg1->mr_dma + pageoff;
 	seg1->mr_nsegs = i;
 	seg1->mr_len = len;
@@ -219,7 +219,7 @@
 		seg = &req->rl_segments[i];
 		mw = seg->rl_mw;
 
-		list_add(&mw->r.fmr.fmr->list, &unmap_list);
+		list_add(&mw->fmr.fmr->list, &unmap_list);
 
 		i += seg->mr_nsegs;
 	}
@@ -281,9 +281,9 @@
 	while (!list_empty(&buf->rb_all)) {
 		r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
 		list_del(&r->mw_all);
-		kfree(r->r.fmr.physaddrs);
+		kfree(r->fmr.physaddrs);
 
-		rc = ib_dealloc_fmr(r->r.fmr.fmr);
+		rc = ib_dealloc_fmr(r->fmr.fmr);
 		if (rc)
 			dprintk("RPC:       %s: ib_dealloc_fmr failed %i\n",
 				__func__, rc);

diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index e165673..c250924 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c

@@ -109,20 +109,20 @@
 __frwr_recovery_worker(struct work_struct *work)
 {
 	struct rpcrdma_mw *r = container_of(work, struct rpcrdma_mw,
-					    r.frmr.fr_work);
-	struct rpcrdma_xprt *r_xprt = r->r.frmr.fr_xprt;
+					    frmr.fr_work);
+	struct rpcrdma_xprt *r_xprt = r->frmr.fr_xprt;
 	unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
 	struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
 
-	if (ib_dereg_mr(r->r.frmr.fr_mr))
+	if (ib_dereg_mr(r->frmr.fr_mr))
 		goto out_fail;
 
-	r->r.frmr.fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
-	if (IS_ERR(r->r.frmr.fr_mr))
+	r->frmr.fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
+	if (IS_ERR(r->frmr.fr_mr))
 		goto out_fail;
 
 	dprintk("RPC:       %s: recovered FRMR %p\n", __func__, r);
-	r->r.frmr.fr_state = FRMR_IS_INVALID;
+	r->frmr.fr_state = FRMR_IS_INVALID;
 	rpcrdma_put_mw(r_xprt, r);
 	return;
 
@@ -137,15 +137,15 @@
 static void
 __frwr_queue_recovery(struct rpcrdma_mw *r)
 {
-	INIT_WORK(&r->r.frmr.fr_work, __frwr_recovery_worker);
-	queue_work(frwr_recovery_wq, &r->r.frmr.fr_work);
+	INIT_WORK(&r->frmr.fr_work, __frwr_recovery_worker);
+	queue_work(frwr_recovery_wq, &r->frmr.fr_work);
 }
 
 static int
 __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
 	    unsigned int depth)
 {
-	struct rpcrdma_frmr *f = &r->r.frmr;
+	struct rpcrdma_frmr *f = &r->frmr;
 	int rc;
 
 	f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
@@ -158,6 +158,8 @@
 
 	sg_init_table(f->sg, depth);
 
+	init_completion(&f->fr_linv_done);
+
 	return 0;
 
 out_mr_err:
@@ -179,11 +181,11 @@
 {
 	int rc;
 
-	rc = ib_dereg_mr(r->r.frmr.fr_mr);
+	rc = ib_dereg_mr(r->frmr.fr_mr);
 	if (rc)
 		dprintk("RPC:       %s: ib_dereg_mr status %i\n",
 			__func__, rc);
-	kfree(r->r.frmr.sg);
+	kfree(r->frmr.sg);
 }
 
 static int
@@ -244,39 +246,76 @@
 		     rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
 }
 
-/* If FAST_REG or LOCAL_INV failed, indicate the frmr needs
- * to be reset.
- *
- * WARNING: Only wr_id and status are reliable at this point
- */
 static void
-__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_mw *r)
+__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_frmr *frmr,
+			    const char *wr)
 {
-	if (likely(wc->status == IB_WC_SUCCESS))
-		return;
-
-	/* WARNING: Only wr_id and status are reliable at this point */
-	r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
-	if (wc->status == IB_WC_WR_FLUSH_ERR)
-		dprintk("RPC:       %s: frmr %p flushed\n", __func__, r);
-	else
-		pr_warn("RPC:       %s: frmr %p error, status %s (%d)\n",
-			__func__, r, ib_wc_status_msg(wc->status), wc->status);
-
-	r->r.frmr.fr_state = FRMR_IS_STALE;
+	frmr->fr_state = FRMR_IS_STALE;
+	if (wc->status != IB_WC_WR_FLUSH_ERR)
+		pr_err("rpcrdma: %s: %s (%u/0x%x)\n",
+		       wr, ib_wc_status_msg(wc->status),
+		       wc->status, wc->vendor_err);
 }
 
+/**
+ * frwr_wc_fastreg - Invoked by RDMA provider for each polled FastReg WC
+ * @cq:	completion queue (ignored)
+ * @wc:	completed WR
+ *
+ */
 static void
-frwr_sendcompletion(struct ib_wc *wc)
+frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
 {
-	struct rpcrdma_mw *r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
-	struct rpcrdma_frmr *f = &r->r.frmr;
+	struct rpcrdma_frmr *frmr;
+	struct ib_cqe *cqe;
 
-	if (unlikely(wc->status != IB_WC_SUCCESS))
-		__frwr_sendcompletion_flush(wc, r);
+	/* WARNING: Only wr_cqe and status are reliable at this point */
+	if (wc->status != IB_WC_SUCCESS) {
+		cqe = wc->wr_cqe;
+		frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
+		__frwr_sendcompletion_flush(wc, frmr, "fastreg");
+	}
+}
 
-	if (f->fr_waiter)
-		complete(&f->fr_linv_done);
+/**
+ * frwr_wc_localinv - Invoked by RDMA provider for each polled LocalInv WC
+ * @cq:	completion queue (ignored)
+ * @wc:	completed WR
+ *
+ */
+static void
+frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct rpcrdma_frmr *frmr;
+	struct ib_cqe *cqe;
+
+	/* WARNING: Only wr_cqe and status are reliable at this point */
+	if (wc->status != IB_WC_SUCCESS) {
+		cqe = wc->wr_cqe;
+		frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
+		__frwr_sendcompletion_flush(wc, frmr, "localinv");
+	}
+}
+
+/**
+ * frwr_wc_localinv - Invoked by RDMA provider for each polled LocalInv WC
+ * @cq:	completion queue (ignored)
+ * @wc:	completed WR
+ *
+ * Awaken anyone waiting for an MR to finish being fenced.
+ */
+static void
+frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct rpcrdma_frmr *frmr;
+	struct ib_cqe *cqe;
+
+	/* WARNING: Only wr_cqe and status are reliable at this point */
+	cqe = wc->wr_cqe;
+	frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
+	if (wc->status != IB_WC_SUCCESS)
+		__frwr_sendcompletion_flush(wc, frmr, "localinv");
+	complete_all(&frmr->fr_linv_done);
 }
 
 static int
@@ -313,8 +352,7 @@
 
 		list_add(&r->mw_list, &buf->rb_mws);
 		list_add(&r->mw_all, &buf->rb_all);
-		r->mw_sendcompletion = frwr_sendcompletion;
-		r->r.frmr.fr_xprt = r_xprt;
+		r->frmr.fr_xprt = r_xprt;
 	}
 
 	return 0;
@@ -347,10 +385,9 @@
 		mw = rpcrdma_get_mw(r_xprt);
 		if (!mw)
 			return -ENOMEM;
-	} while (mw->r.frmr.fr_state != FRMR_IS_INVALID);
-	frmr = &mw->r.frmr;
+	} while (mw->frmr.fr_state != FRMR_IS_INVALID);
+	frmr = &mw->frmr;
 	frmr->fr_state = FRMR_IS_VALID;
-	frmr->fr_waiter = false;
 	mr = frmr->fr_mr;
 	reg_wr = &frmr->fr_regwr;
 
@@ -400,7 +437,8 @@
 
 	reg_wr->wr.next = NULL;
 	reg_wr->wr.opcode = IB_WR_REG_MR;
-	reg_wr->wr.wr_id = (uintptr_t)mw;
+	frmr->fr_cqe.done = frwr_wc_fastreg;
+	reg_wr->wr.wr_cqe = &frmr->fr_cqe;
 	reg_wr->wr.num_sge = 0;
 	reg_wr->wr.send_flags = 0;
 	reg_wr->mr = mr;
@@ -434,15 +472,15 @@
 __frwr_prepare_linv_wr(struct rpcrdma_mr_seg *seg)
 {
 	struct rpcrdma_mw *mw = seg->rl_mw;
-	struct rpcrdma_frmr *f = &mw->r.frmr;
+	struct rpcrdma_frmr *f = &mw->frmr;
 	struct ib_send_wr *invalidate_wr;
 
-	f->fr_waiter = false;
 	f->fr_state = FRMR_IS_INVALID;
 	invalidate_wr = &f->fr_invwr;
 
 	memset(invalidate_wr, 0, sizeof(*invalidate_wr));
-	invalidate_wr->wr_id = (unsigned long)(void *)mw;
+	f->fr_cqe.done = frwr_wc_localinv;
+	invalidate_wr->wr_cqe = &f->fr_cqe;
 	invalidate_wr->opcode = IB_WR_LOCAL_INV;
 	invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey;
 
@@ -455,7 +493,7 @@
 {
 	struct ib_device *device = r_xprt->rx_ia.ri_device;
 	struct rpcrdma_mw *mw = seg->rl_mw;
-	struct rpcrdma_frmr *f = &mw->r.frmr;
+	struct rpcrdma_frmr *f = &mw->frmr;
 
 	seg->rl_mw = NULL;
 
@@ -504,15 +542,15 @@
 
 		i += seg->mr_nsegs;
 	}
-	f = &seg->rl_mw->r.frmr;
+	f = &seg->rl_mw->frmr;
 
 	/* Strong send queue ordering guarantees that when the
 	 * last WR in the chain completes, all WRs in the chain
 	 * are complete.
 	 */
 	f->fr_invwr.send_flags = IB_SEND_SIGNALED;
-	f->fr_waiter = true;
-	init_completion(&f->fr_linv_done);
+	f->fr_cqe.done = frwr_wc_localinv_wake;
+	reinit_completion(&f->fr_linv_done);
 	INIT_CQCOUNT(&r_xprt->rx_ep);
 
 	/* Transport disconnect drains the receive CQ before it
@@ -520,14 +558,18 @@
 	 * unless ri_id->qp is a valid pointer.
 	 */
 	rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr);
-	if (rc)
+	if (rc) {
 		pr_warn("%s: ib_post_send failed %i\n", __func__, rc);
+		rdma_disconnect(ia->ri_id);
+		goto unmap;
+	}
 
 	wait_for_completion(&f->fr_linv_done);
 
 	/* ORDER: Now DMA unmap all of the req's MRs, and return
 	 * them to the free MW list.
 	 */
+unmap:
 	for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
 		seg = &req->rl_segments[i];
 
@@ -549,7 +591,7 @@
 	struct rpcrdma_mr_seg *seg1 = seg;
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	struct rpcrdma_mw *mw = seg1->rl_mw;
-	struct rpcrdma_frmr *frmr = &mw->r.frmr;
+	struct rpcrdma_frmr *frmr = &mw->frmr;
 	struct ib_send_wr *invalidate_wr, *bad_wr;
 	int rc, nsegs = seg->mr_nsegs;
 
@@ -557,10 +599,11 @@
 
 	seg1->rl_mw = NULL;
 	frmr->fr_state = FRMR_IS_INVALID;
-	invalidate_wr = &mw->r.frmr.fr_invwr;
+	invalidate_wr = &mw->frmr.fr_invwr;
 
 	memset(invalidate_wr, 0, sizeof(*invalidate_wr));
-	invalidate_wr->wr_id = (uintptr_t)mw;
+	frmr->fr_cqe.done = frwr_wc_localinv;
+	invalidate_wr->wr_cqe = &frmr->fr_cqe;
 	invalidate_wr->opcode = IB_WR_LOCAL_INV;
 	invalidate_wr->ex.invalidate_rkey = frmr->fr_mr->rkey;
 	DECR_CQCOUNT(&r_xprt->rx_ep);

diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
index dbb302e..481b9b6 100644
--- a/net/sunrpc/xprtrdma/physical_ops.c
+++ b/net/sunrpc/xprtrdma/physical_ops.c

@@ -68,7 +68,6 @@
 	rpcrdma_map_one(ia->ri_device, seg, rpcrdma_data_dir(writing));
 	seg->mr_rkey = ia->ri_dma_mr->rkey;
 	seg->mr_base = seg->mr_dma;
-	seg->mr_nsegs = 1;
 	return 1;
 }
 

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 0f28f2d..888823b 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c

@@ -132,6 +132,33 @@
 	return tlen;
 }
 
+/* Split "vec" on page boundaries into segments. FMR registers pages,
+ * not a byte range. Other modes coalesce these segments into a single
+ * MR when they can.
+ */
+static int
+rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg,
+		     int n, int nsegs)
+{
+	size_t page_offset;
+	u32 remaining;
+	char *base;
+
+	base = vec->iov_base;
+	page_offset = offset_in_page(base);
+	remaining = vec->iov_len;
+	while (remaining && n < nsegs) {
+		seg[n].mr_page = NULL;
+		seg[n].mr_offset = base;
+		seg[n].mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining);
+		remaining -= seg[n].mr_len;
+		base += seg[n].mr_len;
+		++n;
+		page_offset = 0;
+	}
+	return n;
+}
+
 /*
  * Chunk assembly from upper layer xdr_buf.
  *
@@ -150,11 +177,10 @@
 	int page_base;
 	struct page **ppages;
 
-	if (pos == 0 && xdrbuf->head[0].iov_len) {
-		seg[n].mr_page = NULL;
-		seg[n].mr_offset = xdrbuf->head[0].iov_base;
-		seg[n].mr_len = xdrbuf->head[0].iov_len;
-		++n;
+	if (pos == 0) {
+		n = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, n, nsegs);
+		if (n == nsegs)
+			return -EIO;
 	}
 
 	len = xdrbuf->page_len;
@@ -192,13 +218,9 @@
 		 * xdr pad bytes, saving the server an RDMA operation. */
 		if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
 			return n;
+		n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n, nsegs);
 		if (n == nsegs)
-			/* Tail remains, but we're out of segments */
 			return -EIO;
-		seg[n].mr_page = NULL;
-		seg[n].mr_offset = xdrbuf->tail[0].iov_base;
-		seg[n].mr_len = xdrbuf->tail[0].iov_len;
-		++n;
 	}
 
 	return n;
@@ -773,20 +795,17 @@
 	struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
 	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
 	__be32 *iptr;
-	int rdmalen, status;
+	int rdmalen, status, rmerr;
 	unsigned long cwnd;
-	u32 credits;
 
 	dprintk("RPC:       %s: incoming rep %p\n", __func__, rep);
 
 	if (rep->rr_len == RPCRDMA_BAD_LEN)
 		goto out_badstatus;
-	if (rep->rr_len < RPCRDMA_HDRLEN_MIN)
+	if (rep->rr_len < RPCRDMA_HDRLEN_ERR)
 		goto out_shortreply;
 
 	headerp = rdmab_to_msg(rep->rr_rdmabuf);
-	if (headerp->rm_vers != rpcrdma_version)
-		goto out_badversion;
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 	if (rpcrdma_is_bcall(headerp))
 		goto out_bcall;
@@ -809,15 +828,16 @@
 	 */
 	list_del_init(&rqst->rq_list);
 	spin_unlock_bh(&xprt->transport_lock);
-	dprintk("RPC:       %s: reply 0x%p completes request 0x%p\n"
-		"                   RPC request 0x%p xid 0x%08x\n",
-			__func__, rep, req, rqst,
-			be32_to_cpu(headerp->rm_xid));
+	dprintk("RPC:       %s: reply %p completes request %p (xid 0x%08x)\n",
+		__func__, rep, req, be32_to_cpu(headerp->rm_xid));
 
 	/* from here on, the reply is no longer an orphan */
 	req->rl_reply = rep;
 	xprt->reestablish_timeout = 0;
 
+	if (headerp->rm_vers != rpcrdma_version)
+		goto out_badversion;
+
 	/* check for expected message types */
 	/* The order of some of these tests is important. */
 	switch (headerp->rm_type) {
@@ -878,6 +898,9 @@
 		status = rdmalen;
 		break;
 
+	case rdma_error:
+		goto out_rdmaerr;
+
 badheader:
 	default:
 		dprintk("%s: invalid rpcrdma reply header (type %d):"
@@ -893,6 +916,7 @@
 		break;
 	}
 
+out:
 	/* Invalidate and flush the data payloads before waking the
 	 * waiting application. This guarantees the memory region is
 	 * properly fenced from the server before the application
@@ -903,15 +927,9 @@
 	if (req->rl_nchunks)
 		r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req);
 
-	credits = be32_to_cpu(headerp->rm_credit);
-	if (credits == 0)
-		credits = 1;	/* don't deadlock */
-	else if (credits > r_xprt->rx_buf.rb_max_requests)
-		credits = r_xprt->rx_buf.rb_max_requests;
-
 	spin_lock_bh(&xprt->transport_lock);
 	cwnd = xprt->cwnd;
-	xprt->cwnd = credits << RPC_CWNDSHIFT;
+	xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
 	if (xprt->cwnd > cwnd)
 		xprt_release_rqst_cong(rqst->rq_task);
 
@@ -935,13 +953,43 @@
 	return;
 #endif
 
-out_shortreply:
-	dprintk("RPC:       %s: short/invalid reply\n", __func__);
-	goto repost;
-
+/* If the incoming reply terminated a pending RPC, the next
+ * RPC call will post a replacement receive buffer as it is
+ * being marshaled.
+ */
 out_badversion:
 	dprintk("RPC:       %s: invalid version %d\n",
 		__func__, be32_to_cpu(headerp->rm_vers));
+	status = -EIO;
+	r_xprt->rx_stats.bad_reply_count++;
+	goto out;
+
+out_rdmaerr:
+	rmerr = be32_to_cpu(headerp->rm_body.rm_error.rm_err);
+	switch (rmerr) {
+	case ERR_VERS:
+		pr_err("%s: server reports header version error (%u-%u)\n",
+		       __func__,
+		       be32_to_cpu(headerp->rm_body.rm_error.rm_vers_low),
+		       be32_to_cpu(headerp->rm_body.rm_error.rm_vers_high));
+		break;
+	case ERR_CHUNK:
+		pr_err("%s: server reports header decoding error\n",
+		       __func__);
+		break;
+	default:
+		pr_err("%s: server reports unknown error %d\n",
+		       __func__, rmerr);
+	}
+	status = -EREMOTEIO;
+	r_xprt->rx_stats.bad_reply_count++;
+	goto out;
+
+/* If no pending RPC transaction was matched, post a replacement
+ * receive buffer before returning.
+ */
+out_shortreply:
+	dprintk("RPC:       %s: short/invalid reply\n", __func__);
 	goto repost;
 
 out_nomatch:

diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 65a7c23..a2a7519 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c

@@ -107,26 +107,18 @@
 	int ret;
 
 	vec = svc_rdma_get_req_map(rdma);
-	ret = svc_rdma_map_xdr(rdma, sndbuf, vec);
+	ret = svc_rdma_map_xdr(rdma, sndbuf, vec, false);
 	if (ret)
 		goto out_err;
 
-	/* Post a recv buffer to handle the reply for this request. */
-	ret = svc_rdma_post_recv(rdma, GFP_NOIO);
-	if (ret) {
-		pr_err("svcrdma: Failed to post bc receive buffer, err=%d.\n",
-		       ret);
-		pr_err("svcrdma: closing transport %p.\n", rdma);
-		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
-		ret = -ENOTCONN;
+	ret = svc_rdma_repost_recv(rdma, GFP_NOIO);
+	if (ret)
 		goto out_err;
-	}
 
 	ctxt = svc_rdma_get_context(rdma);
 	ctxt->pages[0] = virt_to_page(rqst->rq_buffer);
 	ctxt->count = 1;
 
-	ctxt->wr_op = IB_WR_SEND;
 	ctxt->direction = DMA_TO_DEVICE;
 	ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
 	ctxt->sge[0].length = sndbuf->len;
@@ -140,7 +132,8 @@
 	atomic_inc(&rdma->sc_dma_used);
 
 	memset(&send_wr, 0, sizeof(send_wr));
-	send_wr.wr_id = (unsigned long)ctxt;
+	ctxt->cqe.done = svc_rdma_wc_send;
+	send_wr.wr_cqe = &ctxt->cqe;
 	send_wr.sg_list = ctxt->sge;
 	send_wr.num_sge = 1;
 	send_wr.opcode = IB_WR_SEND;

diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index e2fca76..765bca4 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c

@@ -145,29 +145,44 @@
 	return (__be32 *)&ary->wc_array[nchunks];
 }
 
-int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
-			    struct svc_rqst *rqstp)
+int svc_rdma_xdr_decode_req(struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp)
 {
-	struct rpcrdma_msg *rmsgp = NULL;
 	__be32 *va, *vaend;
+	unsigned int len;
 	u32 hdr_len;
 
-	rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
-
 	/* Verify that there's enough bytes for header + something */
-	if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) {
+	if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_ERR) {
 		dprintk("svcrdma: header too short = %d\n",
 			rqstp->rq_arg.len);
 		return -EINVAL;
 	}
 
-	if (rmsgp->rm_vers != rpcrdma_version)
-		return -ENOSYS;
+	if (rmsgp->rm_vers != rpcrdma_version) {
+		dprintk("%s: bad version %u\n", __func__,
+			be32_to_cpu(rmsgp->rm_vers));
+		return -EPROTONOSUPPORT;
+	}
 
-	/* Pull in the extra for the padded case and bump our pointer */
-	if (rmsgp->rm_type == rdma_msgp) {
-		int hdrlen;
+	switch (be32_to_cpu(rmsgp->rm_type)) {
+	case RDMA_MSG:
+	case RDMA_NOMSG:
+		break;
 
+	case RDMA_DONE:
+		/* Just drop it */
+		dprintk("svcrdma: dropping RDMA_DONE message\n");
+		return 0;
+
+	case RDMA_ERROR:
+		/* Possible if this is a backchannel reply.
+		 * XXX: We should cancel this XID, though.
+		 */
+		dprintk("svcrdma: dropping RDMA_ERROR message\n");
+		return 0;
+
+	case RDMA_MSGP:
+		/* Pull in the extra for the padded case, bump our pointer */
 		rmsgp->rm_body.rm_padded.rm_align =
 			be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align);
 		rmsgp->rm_body.rm_padded.rm_thresh =
@@ -175,11 +190,15 @@
 
 		va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
 		rqstp->rq_arg.head[0].iov_base = va;
-		hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
-		rqstp->rq_arg.head[0].iov_len -= hdrlen;
-		if (hdrlen > rqstp->rq_arg.len)
+		len = (u32)((unsigned long)va - (unsigned long)rmsgp);
+		rqstp->rq_arg.head[0].iov_len -= len;
+		if (len > rqstp->rq_arg.len)
 			return -EINVAL;
-		return hdrlen;
+		return len;
+	default:
+		dprintk("svcrdma: bad rdma procedure (%u)\n",
+			be32_to_cpu(rmsgp->rm_type));
+		return -EINVAL;
 	}
 
 	/* The chunk list may contain either a read chunk list or a write
@@ -188,20 +207,25 @@
 	va = &rmsgp->rm_body.rm_chunks[0];
 	vaend = (__be32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
 	va = decode_read_list(va, vaend);
-	if (!va)
+	if (!va) {
+		dprintk("svcrdma: failed to decode read list\n");
 		return -EINVAL;
+	}
 	va = decode_write_list(va, vaend);
-	if (!va)
+	if (!va) {
+		dprintk("svcrdma: failed to decode write list\n");
 		return -EINVAL;
+	}
 	va = decode_reply_array(va, vaend);
-	if (!va)
+	if (!va) {
+		dprintk("svcrdma: failed to decode reply chunk\n");
 		return -EINVAL;
+	}
 
 	rqstp->rq_arg.head[0].iov_base = va;
 	hdr_len = (unsigned long)va - (unsigned long)rmsgp;
 	rqstp->rq_arg.head[0].iov_len -= hdr_len;
 
-	*rdma_req = rmsgp;
 	return hdr_len;
 }
 

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index c8b8a8b..3b24a64 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c

@@ -180,9 +180,9 @@
 		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
 
 	memset(&read_wr, 0, sizeof(read_wr));
-	read_wr.wr.wr_id = (unsigned long)ctxt;
+	ctxt->cqe.done = svc_rdma_wc_read;
+	read_wr.wr.wr_cqe = &ctxt->cqe;
 	read_wr.wr.opcode = IB_WR_RDMA_READ;
-	ctxt->wr_op = read_wr.wr.opcode;
 	read_wr.wr.send_flags = IB_SEND_SIGNALED;
 	read_wr.rkey = rs_handle;
 	read_wr.remote_addr = rs_offset;
@@ -299,8 +299,9 @@
 	ctxt->read_hdr = head;
 
 	/* Prepare REG WR */
+	ctxt->reg_cqe.done = svc_rdma_wc_reg;
+	reg_wr.wr.wr_cqe = &ctxt->reg_cqe;
 	reg_wr.wr.opcode = IB_WR_REG_MR;
-	reg_wr.wr.wr_id = 0;
 	reg_wr.wr.send_flags = IB_SEND_SIGNALED;
 	reg_wr.wr.num_sge = 0;
 	reg_wr.mr = frmr->mr;
@@ -310,6 +311,8 @@
 
 	/* Prepare RDMA_READ */
 	memset(&read_wr, 0, sizeof(read_wr));
+	ctxt->cqe.done = svc_rdma_wc_read;
+	read_wr.wr.wr_cqe = &ctxt->cqe;
 	read_wr.wr.send_flags = IB_SEND_SIGNALED;
 	read_wr.rkey = rs_handle;
 	read_wr.remote_addr = rs_offset;
@@ -317,19 +320,18 @@
 	read_wr.wr.num_sge = 1;
 	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
 		read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
-		read_wr.wr.wr_id = (unsigned long)ctxt;
 		read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
 	} else {
 		read_wr.wr.opcode = IB_WR_RDMA_READ;
 		read_wr.wr.next = &inv_wr;
 		/* Prepare invalidate */
 		memset(&inv_wr, 0, sizeof(inv_wr));
-		inv_wr.wr_id = (unsigned long)ctxt;
+		ctxt->inv_cqe.done = svc_rdma_wc_inv;
+		inv_wr.wr_cqe = &ctxt->inv_cqe;
 		inv_wr.opcode = IB_WR_LOCAL_INV;
 		inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
 		inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
 	}
-	ctxt->wr_op = read_wr.wr.opcode;
 
 	/* Post the chain */
 	ret = svc_rdma_send(xprt, &reg_wr.wr);
@@ -612,7 +614,6 @@
 	struct svc_rdma_op_ctxt *ctxt = NULL;
 	struct rpcrdma_msg *rmsgp;
 	int ret = 0;
-	int len;
 
 	dprintk("svcrdma: rqstp=%p\n", rqstp);
 
@@ -642,8 +643,7 @@
 		 * transport list
 		 */
 		if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
-			goto close_out;
-
+			goto defer;
 		goto out;
 	}
 	dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
@@ -654,15 +654,13 @@
 	rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
 
 	/* Decode the RDMA header. */
-	len = svc_rdma_xdr_decode_req(&rmsgp, rqstp);
-	rqstp->rq_xprt_hlen = len;
-
-	/* If the request is invalid, reply with an error */
-	if (len < 0) {
-		if (len == -ENOSYS)
-			svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS);
-		goto close_out;
-	}
+	rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
+	ret = svc_rdma_xdr_decode_req(rmsgp, rqstp);
+	if (ret < 0)
+		goto out_err;
+	if (ret == 0)
+		goto out_drop;
+	rqstp->rq_xprt_hlen = ret;
 
 	if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) {
 		ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp,
@@ -698,26 +696,16 @@
 	svc_xprt_copy_addrs(rqstp, xprt);
 	return ret;
 
- close_out:
-	if (ctxt)
-		svc_rdma_put_context(ctxt, 1);
-	dprintk("svcrdma: transport %p is closing\n", xprt);
-	/*
-	 * Set the close bit and enqueue it. svc_recv will see the
-	 * close bit and call svc_xprt_delete
-	 */
-	set_bit(XPT_CLOSE, &xprt->xpt_flags);
+out_err:
+	svc_rdma_send_error(rdma_xprt, rmsgp, ret);
+	svc_rdma_put_context(ctxt, 0);
+	return 0;
+
 defer:
 	return 0;
 
+out_drop:
+	svc_rdma_put_context(ctxt, 1);
 repost:
-	ret = svc_rdma_post_recv(rdma_xprt, GFP_KERNEL);
-	if (ret) {
-		pr_err("svcrdma: could not post a receive buffer, err=%d.\n",
-		       ret);
-		pr_err("svcrdma: closing transport %p.\n", rdma_xprt);
-		set_bit(XPT_CLOSE, &rdma_xprt->sc_xprt.xpt_flags);
-		ret = -ENOTCONN;
-	}
-	return ret;
+	return svc_rdma_repost_recv(rdma_xprt, GFP_KERNEL);
 }

diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index df57f3c..4f1b1c4 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c

@@ -50,9 +50,15 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
+static u32 xdr_padsize(u32 len)
+{
+	return (len & 3) ? (4 - (len & 3)) : 0;
+}
+
 int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
 		     struct xdr_buf *xdr,
-		     struct svc_rdma_req_map *vec)
+		     struct svc_rdma_req_map *vec,
+		     bool write_chunk_present)
 {
 	int sge_no;
 	u32 sge_bytes;
@@ -92,9 +98,20 @@
 
 	/* Tail SGE */
 	if (xdr->tail[0].iov_len) {
-		vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
-		vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
-		sge_no++;
+		unsigned char *base = xdr->tail[0].iov_base;
+		size_t len = xdr->tail[0].iov_len;
+		u32 xdr_pad = xdr_padsize(xdr->page_len);
+
+		if (write_chunk_present && xdr_pad) {
+			base += xdr_pad;
+			len -= xdr_pad;
+		}
+
+		if (len) {
+			vec->sge[sge_no].iov_base = base;
+			vec->sge[sge_no].iov_len = len;
+			sge_no++;
+		}
 	}
 
 	dprintk("svcrdma: %s: sge_no %d page_no %d "
@@ -166,10 +183,10 @@
  * reply array is present
  */
 static struct rpcrdma_write_array *
-svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp)
+svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp,
+			 struct rpcrdma_write_array *wr_ary)
 {
 	struct rpcrdma_read_chunk *rch;
-	struct rpcrdma_write_array *wr_ary;
 	struct rpcrdma_write_array *rp_ary;
 
 	/* XXX: Need to fix when reply chunk may occur with read list
@@ -191,7 +208,6 @@
 		goto found_it;
 	}
 
-	wr_ary = svc_rdma_get_write_array(rmsgp);
 	if (wr_ary) {
 		int chunk = be32_to_cpu(wr_ary->wc_nchunks);
 
@@ -281,8 +297,8 @@
 
 	/* Prepare WRITE WR */
 	memset(&write_wr, 0, sizeof write_wr);
-	ctxt->wr_op = IB_WR_RDMA_WRITE;
-	write_wr.wr.wr_id = (unsigned long)ctxt;
+	ctxt->cqe.done = svc_rdma_wc_write;
+	write_wr.wr.wr_cqe = &ctxt->cqe;
 	write_wr.wr.sg_list = &sge[0];
 	write_wr.wr.num_sge = sge_no;
 	write_wr.wr.opcode = IB_WR_RDMA_WRITE;
@@ -298,41 +314,37 @@
  err:
 	svc_rdma_unmap_dma(ctxt);
 	svc_rdma_put_context(ctxt, 0);
-	/* Fatal error, close transport */
 	return -EIO;
 }
 
+noinline
 static int send_write_chunks(struct svcxprt_rdma *xprt,
-			     struct rpcrdma_msg *rdma_argp,
+			     struct rpcrdma_write_array *wr_ary,
 			     struct rpcrdma_msg *rdma_resp,
 			     struct svc_rqst *rqstp,
 			     struct svc_rdma_req_map *vec)
 {
-	u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
+	u32 xfer_len = rqstp->rq_res.page_len;
 	int write_len;
 	u32 xdr_off;
 	int chunk_off;
 	int chunk_no;
 	int nchunks;
-	struct rpcrdma_write_array *arg_ary;
 	struct rpcrdma_write_array *res_ary;
 	int ret;
 
-	arg_ary = svc_rdma_get_write_array(rdma_argp);
-	if (!arg_ary)
-		return 0;
 	res_ary = (struct rpcrdma_write_array *)
 		&rdma_resp->rm_body.rm_chunks[1];
 
 	/* Write chunks start at the pagelist */
-	nchunks = be32_to_cpu(arg_ary->wc_nchunks);
+	nchunks = be32_to_cpu(wr_ary->wc_nchunks);
 	for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
 	     xfer_len && chunk_no < nchunks;
 	     chunk_no++) {
 		struct rpcrdma_segment *arg_ch;
 		u64 rs_offset;
 
-		arg_ch = &arg_ary->wc_array[chunk_no].wc_target;
+		arg_ch = &wr_ary->wc_array[chunk_no].wc_target;
 		write_len = min(xfer_len, be32_to_cpu(arg_ch->rs_length));
 
 		/* Prepare the response chunk given the length actually
@@ -350,11 +362,8 @@
 					 xdr_off,
 					 write_len,
 					 vec);
-			if (ret <= 0) {
-				dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
-					ret);
-				return -EIO;
-			}
+			if (ret <= 0)
+				goto out_err;
 			chunk_off += ret;
 			xdr_off += ret;
 			xfer_len -= ret;
@@ -364,11 +373,16 @@
 	/* Update the req with the number of chunks actually used */
 	svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no);
 
-	return rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
+	return rqstp->rq_res.page_len;
+
+out_err:
+	pr_err("svcrdma: failed to send write chunks, rc=%d\n", ret);
+	return -EIO;
 }
 
+noinline
 static int send_reply_chunks(struct svcxprt_rdma *xprt,
-			     struct rpcrdma_msg *rdma_argp,
+			     struct rpcrdma_write_array *rp_ary,
 			     struct rpcrdma_msg *rdma_resp,
 			     struct svc_rqst *rqstp,
 			     struct svc_rdma_req_map *vec)
@@ -380,25 +394,21 @@
 	int chunk_off;
 	int nchunks;
 	struct rpcrdma_segment *ch;
-	struct rpcrdma_write_array *arg_ary;
 	struct rpcrdma_write_array *res_ary;
 	int ret;
 
-	arg_ary = svc_rdma_get_reply_array(rdma_argp);
-	if (!arg_ary)
-		return 0;
 	/* XXX: need to fix when reply lists occur with read-list and or
 	 * write-list */
 	res_ary = (struct rpcrdma_write_array *)
 		&rdma_resp->rm_body.rm_chunks[2];
 
 	/* xdr offset starts at RPC message */
-	nchunks = be32_to_cpu(arg_ary->wc_nchunks);
+	nchunks = be32_to_cpu(rp_ary->wc_nchunks);
 	for (xdr_off = 0, chunk_no = 0;
 	     xfer_len && chunk_no < nchunks;
 	     chunk_no++) {
 		u64 rs_offset;
-		ch = &arg_ary->wc_array[chunk_no].wc_target;
+		ch = &rp_ary->wc_array[chunk_no].wc_target;
 		write_len = min(xfer_len, be32_to_cpu(ch->rs_length));
 
 		/* Prepare the reply chunk given the length actually
@@ -415,11 +425,8 @@
 					 xdr_off,
 					 write_len,
 					 vec);
-			if (ret <= 0) {
-				dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
-					ret);
-				return -EIO;
-			}
+			if (ret <= 0)
+				goto out_err;
 			chunk_off += ret;
 			xdr_off += ret;
 			xfer_len -= ret;
@@ -430,6 +437,10 @@
 	svc_rdma_xdr_encode_reply_array(res_ary, chunk_no);
 
 	return rqstp->rq_res.len;
+
+out_err:
+	pr_err("svcrdma: failed to send reply chunks, rc=%d\n", ret);
+	return -EIO;
 }
 
 /* This function prepares the portion of the RPCRDMA message to be
@@ -464,13 +475,8 @@
 	int pages;
 	int ret;
 
-	/* Post a recv buffer to handle another request. */
-	ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
+	ret = svc_rdma_repost_recv(rdma, GFP_KERNEL);
 	if (ret) {
-		printk(KERN_INFO
-		       "svcrdma: could not post a receive buffer, err=%d."
-		       "Closing transport %p.\n", ret, rdma);
-		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
 		svc_rdma_put_context(ctxt, 0);
 		return -ENOTCONN;
 	}
@@ -543,8 +549,8 @@
 		goto err;
 	}
 	memset(&send_wr, 0, sizeof send_wr);
-	ctxt->wr_op = IB_WR_SEND;
-	send_wr.wr_id = (unsigned long)ctxt;
+	ctxt->cqe.done = svc_rdma_wc_send;
+	send_wr.wr_cqe = &ctxt->cqe;
 	send_wr.sg_list = ctxt->sge;
 	send_wr.num_sge = sge_no;
 	send_wr.opcode = IB_WR_SEND;
@@ -559,6 +565,7 @@
  err:
 	svc_rdma_unmap_dma(ctxt);
 	svc_rdma_put_context(ctxt, 1);
+	pr_err("svcrdma: failed to send reply, rc=%d\n", ret);
 	return -EIO;
 }
 
@@ -573,7 +580,7 @@
 		container_of(xprt, struct svcxprt_rdma, sc_xprt);
 	struct rpcrdma_msg *rdma_argp;
 	struct rpcrdma_msg *rdma_resp;
-	struct rpcrdma_write_array *reply_ary;
+	struct rpcrdma_write_array *wr_ary, *rp_ary;
 	enum rpcrdma_proc reply_type;
 	int ret;
 	int inline_bytes;
@@ -587,12 +594,14 @@
 	 * places this at the start of page 0.
 	 */
 	rdma_argp = page_address(rqstp->rq_pages[0]);
+	wr_ary = svc_rdma_get_write_array(rdma_argp);
+	rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary);
 
 	/* Build an req vec for the XDR */
 	ctxt = svc_rdma_get_context(rdma);
 	ctxt->direction = DMA_TO_DEVICE;
 	vec = svc_rdma_get_req_map(rdma);
-	ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec);
+	ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL);
 	if (ret)
 		goto err0;
 	inline_bytes = rqstp->rq_res.len;
@@ -603,8 +612,7 @@
 	if (!res_page)
 		goto err0;
 	rdma_resp = page_address(res_page);
-	reply_ary = svc_rdma_get_reply_array(rdma_argp);
-	if (reply_ary)
+	if (rp_ary)
 		reply_type = RDMA_NOMSG;
 	else
 		reply_type = RDMA_MSG;
@@ -612,27 +620,26 @@
 					 rdma_resp, reply_type);
 
 	/* Send any write-chunk data and build resp write-list */
-	ret = send_write_chunks(rdma, rdma_argp, rdma_resp,
-				rqstp, vec);
-	if (ret < 0) {
-		printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
-		       ret);
-		goto err1;
+	if (wr_ary) {
+		ret = send_write_chunks(rdma, wr_ary, rdma_resp, rqstp, vec);
+		if (ret < 0)
+			goto err1;
+		inline_bytes -= ret + xdr_padsize(ret);
 	}
-	inline_bytes -= ret;
 
 	/* Send any reply-list data and update resp reply-list */
-	ret = send_reply_chunks(rdma, rdma_argp, rdma_resp,
-				rqstp, vec);
-	if (ret < 0) {
-		printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
-		       ret);
-		goto err1;
+	if (rp_ary) {
+		ret = send_reply_chunks(rdma, rp_ary, rdma_resp, rqstp, vec);
+		if (ret < 0)
+			goto err1;
+		inline_bytes -= ret;
 	}
-	inline_bytes -= ret;
 
 	ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
 			 inline_bytes);
+	if (ret < 0)
+		goto err1;
+
 	svc_rdma_put_req_map(rdma, vec);
 	dprintk("svcrdma: send_reply returns %d\n", ret);
 	return ret;
@@ -642,5 +649,68 @@
  err0:
 	svc_rdma_put_req_map(rdma, vec);
 	svc_rdma_put_context(ctxt, 0);
-	return ret;
+	set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+	return -ENOTCONN;
+}
+
+void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
+			 int status)
+{
+	struct ib_send_wr err_wr;
+	struct page *p;
+	struct svc_rdma_op_ctxt *ctxt;
+	enum rpcrdma_errcode err;
+	__be32 *va;
+	int length;
+	int ret;
+
+	ret = svc_rdma_repost_recv(xprt, GFP_KERNEL);
+	if (ret)
+		return;
+
+	p = alloc_page(GFP_KERNEL);
+	if (!p)
+		return;
+	va = page_address(p);
+
+	/* XDR encode an error reply */
+	err = ERR_CHUNK;
+	if (status == -EPROTONOSUPPORT)
+		err = ERR_VERS;
+	length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
+
+	ctxt = svc_rdma_get_context(xprt);
+	ctxt->direction = DMA_TO_DEVICE;
+	ctxt->count = 1;
+	ctxt->pages[0] = p;
+
+	/* Prepare SGE for local address */
+	ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey;
+	ctxt->sge[0].length = length;
+	ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device,
+					    p, 0, length, DMA_TO_DEVICE);
+	if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
+		dprintk("svcrdma: Error mapping buffer for protocol error\n");
+		svc_rdma_put_context(ctxt, 1);
+		return;
+	}
+	atomic_inc(&xprt->sc_dma_used);
+
+	/* Prepare SEND WR */
+	memset(&err_wr, 0, sizeof(err_wr));
+	ctxt->cqe.done = svc_rdma_wc_send;
+	err_wr.wr_cqe = &ctxt->cqe;
+	err_wr.sg_list = ctxt->sge;
+	err_wr.num_sge = 1;
+	err_wr.opcode = IB_WR_SEND;
+	err_wr.send_flags = IB_SEND_SIGNALED;
+
+	/* Post It */
+	ret = svc_rdma_send(xprt, &err_wr);
+	if (ret) {
+		dprintk("svcrdma: Error %d posting send for protocol error\n",
+			ret);
+		svc_rdma_unmap_dma(ctxt);
+		svc_rdma_put_context(ctxt, 1);
+	}
 }

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 5763825..9066896 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c

@@ -63,17 +63,10 @@
 					int flags);
 static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
 static void svc_rdma_release_rqst(struct svc_rqst *);
-static void dto_tasklet_func(unsigned long data);
 static void svc_rdma_detach(struct svc_xprt *xprt);
 static void svc_rdma_free(struct svc_xprt *xprt);
 static int svc_rdma_has_wspace(struct svc_xprt *xprt);
 static int svc_rdma_secure_port(struct svc_rqst *);
-static void rq_cq_reap(struct svcxprt_rdma *xprt);
-static void sq_cq_reap(struct svcxprt_rdma *xprt);
-
-static DECLARE_TASKLET(dto_tasklet, dto_tasklet_func, 0UL);
-static DEFINE_SPINLOCK(dto_lock);
-static LIST_HEAD(dto_xprt_q);
 
 static struct svc_xprt_ops svc_rdma_ops = {
 	.xpo_create = svc_rdma_create,
@@ -352,15 +345,6 @@
 	}
 }
 
-/* ib_cq event handler */
-static void cq_event_handler(struct ib_event *event, void *context)
-{
-	struct svc_xprt *xprt = context;
-	dprintk("svcrdma: received CQ event %s (%d), context=%p\n",
-		ib_event_msg(event->event), event->event, context);
-	set_bit(XPT_CLOSE, &xprt->xpt_flags);
-}
-
 /* QP event handler */
 static void qp_event_handler(struct ib_event *event, void *context)
 {
@@ -392,251 +376,171 @@
 	}
 }
 
-/*
- * Data Transfer Operation Tasklet
+/**
+ * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
+ * @cq:        completion queue
+ * @wc:        completed WR
  *
- * Walks a list of transports with I/O pending, removing entries as
- * they are added to the server's I/O pending list. Two bits indicate
- * if SQ, RQ, or both have I/O pending. The dto_lock is an irqsave
- * spinlock that serializes access to the transport list with the RQ
- * and SQ interrupt handlers.
  */
-static void dto_tasklet_func(unsigned long data)
+static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
 {
-	struct svcxprt_rdma *xprt;
-	unsigned long flags;
+	struct svcxprt_rdma *xprt = cq->cq_context;
+	struct ib_cqe *cqe = wc->wr_cqe;
+	struct svc_rdma_op_ctxt *ctxt;
 
-	spin_lock_irqsave(&dto_lock, flags);
-	while (!list_empty(&dto_xprt_q)) {
-		xprt = list_entry(dto_xprt_q.next,
-				  struct svcxprt_rdma, sc_dto_q);
-		list_del_init(&xprt->sc_dto_q);
-		spin_unlock_irqrestore(&dto_lock, flags);
-
-		rq_cq_reap(xprt);
-		sq_cq_reap(xprt);
-
-		svc_xprt_put(&xprt->sc_xprt);
-		spin_lock_irqsave(&dto_lock, flags);
-	}
-	spin_unlock_irqrestore(&dto_lock, flags);
-}
-
-/*
- * Receive Queue Completion Handler
- *
- * Since an RQ completion handler is called on interrupt context, we
- * need to defer the handling of the I/O to a tasklet
- */
-static void rq_comp_handler(struct ib_cq *cq, void *cq_context)
-{
-	struct svcxprt_rdma *xprt = cq_context;
-	unsigned long flags;
-
-	/* Guard against unconditional flush call for destroyed QP */
-	if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0)
-		return;
-
-	/*
-	 * Set the bit regardless of whether or not it's on the list
-	 * because it may be on the list already due to an SQ
-	 * completion.
-	 */
-	set_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags);
-
-	/*
-	 * If this transport is not already on the DTO transport queue,
-	 * add it
-	 */
-	spin_lock_irqsave(&dto_lock, flags);
-	if (list_empty(&xprt->sc_dto_q)) {
-		svc_xprt_get(&xprt->sc_xprt);
-		list_add_tail(&xprt->sc_dto_q, &dto_xprt_q);
-	}
-	spin_unlock_irqrestore(&dto_lock, flags);
-
-	/* Tasklet does all the work to avoid irqsave locks. */
-	tasklet_schedule(&dto_tasklet);
-}
-
-/*
- * rq_cq_reap - Process the RQ CQ.
- *
- * Take all completing WC off the CQE and enqueue the associated DTO
- * context on the dto_q for the transport.
- *
- * Note that caller must hold a transport reference.
- */
-static void rq_cq_reap(struct svcxprt_rdma *xprt)
-{
-	int ret;
-	struct ib_wc wc;
-	struct svc_rdma_op_ctxt *ctxt = NULL;
-
-	if (!test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags))
-		return;
-
-	ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP);
-	atomic_inc(&rdma_stat_rq_poll);
-
-	while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) {
-		ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
-		ctxt->wc_status = wc.status;
-		ctxt->byte_len = wc.byte_len;
-		svc_rdma_unmap_dma(ctxt);
-		if (wc.status != IB_WC_SUCCESS) {
-			/* Close the transport */
-			dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt);
-			set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
-			svc_rdma_put_context(ctxt, 1);
-			svc_xprt_put(&xprt->sc_xprt);
-			continue;
-		}
-		spin_lock_bh(&xprt->sc_rq_dto_lock);
-		list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q);
-		spin_unlock_bh(&xprt->sc_rq_dto_lock);
-		svc_xprt_put(&xprt->sc_xprt);
-	}
-
-	if (ctxt)
-		atomic_inc(&rdma_stat_rq_prod);
-
-	set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
-	/*
-	 * If data arrived before established event,
-	 * don't enqueue. This defers RPC I/O until the
-	 * RDMA connection is complete.
-	 */
-	if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
-		svc_xprt_enqueue(&xprt->sc_xprt);
-}
-
-/*
- * Process a completion context
- */
-static void process_context(struct svcxprt_rdma *xprt,
-			    struct svc_rdma_op_ctxt *ctxt)
-{
-	struct svc_rdma_op_ctxt *read_hdr;
-	int free_pages = 0;
-
+	/* WARNING: Only wc->wr_cqe and wc->status are reliable */
+	ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
+	ctxt->wc_status = wc->status;
 	svc_rdma_unmap_dma(ctxt);
 
-	switch (ctxt->wr_op) {
-	case IB_WR_SEND:
-		free_pages = 1;
-		break;
+	if (wc->status != IB_WC_SUCCESS)
+		goto flushed;
 
-	case IB_WR_RDMA_WRITE:
-		break;
+	/* All wc fields are now known to be valid */
+	ctxt->byte_len = wc->byte_len;
+	spin_lock(&xprt->sc_rq_dto_lock);
+	list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q);
+	spin_unlock(&xprt->sc_rq_dto_lock);
 
-	case IB_WR_RDMA_READ:
-	case IB_WR_RDMA_READ_WITH_INV:
-		svc_rdma_put_frmr(xprt, ctxt->frmr);
+	set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
+	if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
+		goto out;
+	svc_xprt_enqueue(&xprt->sc_xprt);
+	goto out;
 
-		if (!test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags))
-			break;
+flushed:
+	if (wc->status != IB_WC_WR_FLUSH_ERR)
+		pr_warn("svcrdma: receive: %s (%u/0x%x)\n",
+			ib_wc_status_msg(wc->status),
+			wc->status, wc->vendor_err);
+	set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+	svc_rdma_put_context(ctxt, 1);
+
+out:
+	svc_xprt_put(&xprt->sc_xprt);
+}
+
+static void svc_rdma_send_wc_common(struct svcxprt_rdma *xprt,
+				    struct ib_wc *wc,
+				    const char *opname)
+{
+	if (wc->status != IB_WC_SUCCESS)
+		goto err;
+
+out:
+	atomic_dec(&xprt->sc_sq_count);
+	wake_up(&xprt->sc_send_wait);
+	return;
+
+err:
+	set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+	if (wc->status != IB_WC_WR_FLUSH_ERR)
+		pr_err("svcrdma: %s: %s (%u/0x%x)\n",
+		       opname, ib_wc_status_msg(wc->status),
+		       wc->status, wc->vendor_err);
+	goto out;
+}
+
+static void svc_rdma_send_wc_common_put(struct ib_cq *cq, struct ib_wc *wc,
+					const char *opname)
+{
+	struct svcxprt_rdma *xprt = cq->cq_context;
+
+	svc_rdma_send_wc_common(xprt, wc, opname);
+	svc_xprt_put(&xprt->sc_xprt);
+}
+
+/**
+ * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC
+ * @cq:        completion queue
+ * @wc:        completed WR
+ *
+ */
+void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct ib_cqe *cqe = wc->wr_cqe;
+	struct svc_rdma_op_ctxt *ctxt;
+
+	svc_rdma_send_wc_common_put(cq, wc, "send");
+
+	ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
+	svc_rdma_unmap_dma(ctxt);
+	svc_rdma_put_context(ctxt, 1);
+}
+
+/**
+ * svc_rdma_wc_write - Invoked by RDMA provider for each polled Write WC
+ * @cq:        completion queue
+ * @wc:        completed WR
+ *
+ */
+void svc_rdma_wc_write(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct ib_cqe *cqe = wc->wr_cqe;
+	struct svc_rdma_op_ctxt *ctxt;
+
+	svc_rdma_send_wc_common_put(cq, wc, "write");
+
+	ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
+	svc_rdma_unmap_dma(ctxt);
+	svc_rdma_put_context(ctxt, 0);
+}
+
+/**
+ * svc_rdma_wc_reg - Invoked by RDMA provider for each polled FASTREG WC
+ * @cq:        completion queue
+ * @wc:        completed WR
+ *
+ */
+void svc_rdma_wc_reg(struct ib_cq *cq, struct ib_wc *wc)
+{
+	svc_rdma_send_wc_common_put(cq, wc, "fastreg");
+}
+
+/**
+ * svc_rdma_wc_read - Invoked by RDMA provider for each polled Read WC
+ * @cq:        completion queue
+ * @wc:        completed WR
+ *
+ */
+void svc_rdma_wc_read(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct svcxprt_rdma *xprt = cq->cq_context;
+	struct ib_cqe *cqe = wc->wr_cqe;
+	struct svc_rdma_op_ctxt *ctxt;
+
+	svc_rdma_send_wc_common(xprt, wc, "read");
+
+	ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
+	svc_rdma_unmap_dma(ctxt);
+	svc_rdma_put_frmr(xprt, ctxt->frmr);
+
+	if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
+		struct svc_rdma_op_ctxt *read_hdr;
 
 		read_hdr = ctxt->read_hdr;
-		svc_rdma_put_context(ctxt, 0);
-
-		spin_lock_bh(&xprt->sc_rq_dto_lock);
-		set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
+		spin_lock(&xprt->sc_rq_dto_lock);
 		list_add_tail(&read_hdr->dto_q,
 			      &xprt->sc_read_complete_q);
-		spin_unlock_bh(&xprt->sc_rq_dto_lock);
+		spin_unlock(&xprt->sc_rq_dto_lock);
+
+		set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
 		svc_xprt_enqueue(&xprt->sc_xprt);
-		return;
-
-	default:
-		dprintk("svcrdma: unexpected completion opcode=%d\n",
-			ctxt->wr_op);
-		break;
 	}
 
-	svc_rdma_put_context(ctxt, free_pages);
+	svc_rdma_put_context(ctxt, 0);
+	svc_xprt_put(&xprt->sc_xprt);
 }
 
-/*
- * Send Queue Completion Handler - potentially called on interrupt context.
+/**
+ * svc_rdma_wc_inv - Invoked by RDMA provider for each polled LOCAL_INV WC
+ * @cq:        completion queue
+ * @wc:        completed WR
  *
- * Note that caller must hold a transport reference.
  */
-static void sq_cq_reap(struct svcxprt_rdma *xprt)
+void svc_rdma_wc_inv(struct ib_cq *cq, struct ib_wc *wc)
 {
-	struct svc_rdma_op_ctxt *ctxt = NULL;
-	struct ib_wc wc_a[6];
-	struct ib_wc *wc;
-	struct ib_cq *cq = xprt->sc_sq_cq;
-	int ret;
-
-	memset(wc_a, 0, sizeof(wc_a));
-
-	if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
-		return;
-
-	ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
-	atomic_inc(&rdma_stat_sq_poll);
-	while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) {
-		int i;
-
-		for (i = 0; i < ret; i++) {
-			wc = &wc_a[i];
-			if (wc->status != IB_WC_SUCCESS) {
-				dprintk("svcrdma: sq wc err status %s (%d)\n",
-					ib_wc_status_msg(wc->status),
-					wc->status);
-
-				/* Close the transport */
-				set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
-			}
-
-			/* Decrement used SQ WR count */
-			atomic_dec(&xprt->sc_sq_count);
-			wake_up(&xprt->sc_send_wait);
-
-			ctxt = (struct svc_rdma_op_ctxt *)
-				(unsigned long)wc->wr_id;
-			if (ctxt)
-				process_context(xprt, ctxt);
-
-			svc_xprt_put(&xprt->sc_xprt);
-		}
-	}
-
-	if (ctxt)
-		atomic_inc(&rdma_stat_sq_prod);
-}
-
-static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
-{
-	struct svcxprt_rdma *xprt = cq_context;
-	unsigned long flags;
-
-	/* Guard against unconditional flush call for destroyed QP */
-	if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0)
-		return;
-
-	/*
-	 * Set the bit regardless of whether or not it's on the list
-	 * because it may be on the list already due to an RQ
-	 * completion.
-	 */
-	set_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags);
-
-	/*
-	 * If this transport is not already on the DTO transport queue,
-	 * add it
-	 */
-	spin_lock_irqsave(&dto_lock, flags);
-	if (list_empty(&xprt->sc_dto_q)) {
-		svc_xprt_get(&xprt->sc_xprt);
-		list_add_tail(&xprt->sc_dto_q, &dto_xprt_q);
-	}
-	spin_unlock_irqrestore(&dto_lock, flags);
-
-	/* Tasklet does all the work to avoid irqsave locks. */
-	tasklet_schedule(&dto_tasklet);
+	svc_rdma_send_wc_common_put(cq, wc, "localInv");
 }
 
 static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
@@ -681,6 +585,7 @@
 	ctxt = svc_rdma_get_context(xprt);
 	buflen = 0;
 	ctxt->direction = DMA_FROM_DEVICE;
+	ctxt->cqe.done = svc_rdma_wc_receive;
 	for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) {
 		if (sge_no >= xprt->sc_max_sge) {
 			pr_err("svcrdma: Too many sges (%d)\n", sge_no);
@@ -705,7 +610,7 @@
 	recv_wr.next = NULL;
 	recv_wr.sg_list = &ctxt->sge[0];
 	recv_wr.num_sge = ctxt->count;
-	recv_wr.wr_id = (u64)(unsigned long)ctxt;
+	recv_wr.wr_cqe = &ctxt->cqe;
 
 	svc_xprt_get(&xprt->sc_xprt);
 	ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
@@ -722,6 +627,21 @@
 	return -ENOMEM;
 }
 
+int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags)
+{
+	int ret = 0;
+
+	ret = svc_rdma_post_recv(xprt, flags);
+	if (ret) {
+		pr_err("svcrdma: could not post a receive buffer, err=%d.\n",
+		       ret);
+		pr_err("svcrdma: closing transport %p.\n", xprt);
+		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+		ret = -ENOTCONN;
+	}
+	return ret;
+}
+
 /*
  * This function handles the CONNECT_REQUEST event on a listening
  * endpoint. It is passed the cma_id for the _new_ connection. The context in
@@ -1011,7 +931,6 @@
 	struct svcxprt_rdma *listen_rdma;
 	struct svcxprt_rdma *newxprt = NULL;
 	struct rdma_conn_param conn_param;
-	struct ib_cq_init_attr cq_attr = {};
 	struct ib_qp_init_attr qp_attr;
 	struct ib_device *dev;
 	unsigned int i;
@@ -1069,22 +988,14 @@
 		dprintk("svcrdma: error creating PD for connect request\n");
 		goto errout;
 	}
-	cq_attr.cqe = newxprt->sc_sq_depth;
-	newxprt->sc_sq_cq = ib_create_cq(dev,
-					 sq_comp_handler,
-					 cq_event_handler,
-					 newxprt,
-					 &cq_attr);
+	newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth,
+					0, IB_POLL_SOFTIRQ);
 	if (IS_ERR(newxprt->sc_sq_cq)) {
 		dprintk("svcrdma: error creating SQ CQ for connect request\n");
 		goto errout;
 	}
-	cq_attr.cqe = newxprt->sc_rq_depth;
-	newxprt->sc_rq_cq = ib_create_cq(dev,
-					 rq_comp_handler,
-					 cq_event_handler,
-					 newxprt,
-					 &cq_attr);
+	newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_rq_depth,
+					0, IB_POLL_SOFTIRQ);
 	if (IS_ERR(newxprt->sc_rq_cq)) {
 		dprintk("svcrdma: error creating RQ CQ for connect request\n");
 		goto errout;
@@ -1173,13 +1084,6 @@
 	/* Swap out the handler */
 	newxprt->sc_cm_id->event_handler = rdma_cma_handler;
 
-	/*
-	 * Arm the CQs for the SQ and RQ before accepting so we can't
-	 * miss the first message
-	 */
-	ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP);
-	ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP);
-
 	/* Accept Connection */
 	set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
 	memset(&conn_param, 0, sizeof conn_param);
@@ -1319,10 +1223,10 @@
 		ib_destroy_qp(rdma->sc_qp);
 
 	if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq))
-		ib_destroy_cq(rdma->sc_sq_cq);
+		ib_free_cq(rdma->sc_sq_cq);
 
 	if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
-		ib_destroy_cq(rdma->sc_rq_cq);
+		ib_free_cq(rdma->sc_rq_cq);
 
 	if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
 		ib_dealloc_pd(rdma->sc_pd);
@@ -1383,9 +1287,6 @@
 			spin_unlock_bh(&xprt->sc_lock);
 			atomic_inc(&rdma_stat_sq_starve);
 
-			/* See if we can opportunistically reap SQ WR to make room */
-			sq_cq_reap(xprt);
-
 			/* Wait until SQ WR available if SQ still full */
 			wait_event(xprt->sc_send_wait,
 				   atomic_read(&xprt->sc_sq_count) <
@@ -1418,57 +1319,3 @@
 	}
 	return ret;
 }
-
-void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
-			 enum rpcrdma_errcode err)
-{
-	struct ib_send_wr err_wr;
-	struct page *p;
-	struct svc_rdma_op_ctxt *ctxt;
-	__be32 *va;
-	int length;
-	int ret;
-
-	p = alloc_page(GFP_KERNEL);
-	if (!p)
-		return;
-	va = page_address(p);
-
-	/* XDR encode error */
-	length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
-
-	ctxt = svc_rdma_get_context(xprt);
-	ctxt->direction = DMA_FROM_DEVICE;
-	ctxt->count = 1;
-	ctxt->pages[0] = p;
-
-	/* Prepare SGE for local address */
-	ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device,
-					    p, 0, length, DMA_FROM_DEVICE);
-	if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
-		put_page(p);
-		svc_rdma_put_context(ctxt, 1);
-		return;
-	}
-	atomic_inc(&xprt->sc_dma_used);
-	ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey;
-	ctxt->sge[0].length = length;
-
-	/* Prepare SEND WR */
-	memset(&err_wr, 0, sizeof err_wr);
-	ctxt->wr_op = IB_WR_SEND;
-	err_wr.wr_id = (unsigned long)ctxt;
-	err_wr.sg_list = ctxt->sge;
-	err_wr.num_sge = 1;
-	err_wr.opcode = IB_WR_SEND;
-	err_wr.send_flags = IB_SEND_SIGNALED;
-
-	/* Post It */
-	ret = svc_rdma_send(xprt, &err_wr);
-	if (ret) {
-		dprintk("svcrdma: Error %d posting send for protocol error\n",
-			ret);
-		svc_rdma_unmap_dma(ctxt);
-		svc_rdma_put_context(ctxt, 1);
-	}
-}

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 878f1bf..f5ed9f9 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c

@@ -112,73 +112,20 @@
 	}
 }
 
-static void
-rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
-{
-	struct rpcrdma_ep *ep = context;
-
-	pr_err("RPC:       %s: %s on device %s ep %p\n",
-	       __func__, ib_event_msg(event->event),
-		event->device->name, context);
-	if (ep->rep_connected == 1) {
-		ep->rep_connected = -EIO;
-		rpcrdma_conn_func(ep);
-		wake_up_all(&ep->rep_connect_wait);
-	}
-}
-
-static void
-rpcrdma_sendcq_process_wc(struct ib_wc *wc)
-{
-	/* WARNING: Only wr_id and status are reliable at this point */
-	if (wc->wr_id == RPCRDMA_IGNORE_COMPLETION) {
-		if (wc->status != IB_WC_SUCCESS &&
-		    wc->status != IB_WC_WR_FLUSH_ERR)
-			pr_err("RPC:       %s: SEND: %s\n",
-			       __func__, ib_wc_status_msg(wc->status));
-	} else {
-		struct rpcrdma_mw *r;
-
-		r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
-		r->mw_sendcompletion(wc);
-	}
-}
-
-/* The common case is a single send completion is waiting. By
- * passing two WC entries to ib_poll_cq, a return code of 1
- * means there is exactly one WC waiting and no more. We don't
- * have to invoke ib_poll_cq again to know that the CQ has been
- * properly drained.
+/**
+ * rpcrdma_wc_send - Invoked by RDMA provider for each polled Send WC
+ * @cq:	completion queue (ignored)
+ * @wc:	completed WR
+ *
  */
 static void
-rpcrdma_sendcq_poll(struct ib_cq *cq)
+rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
 {
-	struct ib_wc *pos, wcs[2];
-	int count, rc;
-
-	do {
-		pos = wcs;
-
-		rc = ib_poll_cq(cq, ARRAY_SIZE(wcs), pos);
-		if (rc < 0)
-			break;
-
-		count = rc;
-		while (count-- > 0)
-			rpcrdma_sendcq_process_wc(pos++);
-	} while (rc == ARRAY_SIZE(wcs));
-	return;
-}
-
-/* Handle provider send completion upcalls.
- */
-static void
-rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
-{
-	do {
-		rpcrdma_sendcq_poll(cq);
-	} while (ib_req_notify_cq(cq, IB_CQ_NEXT_COMP |
-				  IB_CQ_REPORT_MISSED_EVENTS) > 0);
+	/* WARNING: Only wr_cqe and status are reliable at this point */
+	if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
+		pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
+		       ib_wc_status_msg(wc->status),
+		       wc->status, wc->vendor_err);
 }
 
 static void
@@ -190,11 +137,40 @@
 	rpcrdma_reply_handler(rep);
 }
 
+/* Perform basic sanity checking to avoid using garbage
+ * to update the credit grant value.
+ */
 static void
-rpcrdma_recvcq_process_wc(struct ib_wc *wc)
+rpcrdma_update_granted_credits(struct rpcrdma_rep *rep)
 {
-	struct rpcrdma_rep *rep =
-			(struct rpcrdma_rep *)(unsigned long)wc->wr_id;
+	struct rpcrdma_msg *rmsgp = rdmab_to_msg(rep->rr_rdmabuf);
+	struct rpcrdma_buffer *buffer = &rep->rr_rxprt->rx_buf;
+	u32 credits;
+
+	if (rep->rr_len < RPCRDMA_HDRLEN_ERR)
+		return;
+
+	credits = be32_to_cpu(rmsgp->rm_credit);
+	if (credits == 0)
+		credits = 1;	/* don't deadlock */
+	else if (credits > buffer->rb_max_requests)
+		credits = buffer->rb_max_requests;
+
+	atomic_set(&buffer->rb_credits, credits);
+}
+
+/**
+ * rpcrdma_receive_wc - Invoked by RDMA provider for each polled Receive WC
+ * @cq:	completion queue (ignored)
+ * @wc:	completed WR
+ *
+ */
+static void
+rpcrdma_receive_wc(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct ib_cqe *cqe = wc->wr_cqe;
+	struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep,
+					       rr_cqe);
 
 	/* WARNING: Only wr_id and status are reliable at this point */
 	if (wc->status != IB_WC_SUCCESS)
@@ -211,7 +187,8 @@
 	ib_dma_sync_single_for_cpu(rep->rr_device,
 				   rdmab_addr(rep->rr_rdmabuf),
 				   rep->rr_len, DMA_FROM_DEVICE);
-	prefetch(rdmab_to_msg(rep->rr_rdmabuf));
+
+	rpcrdma_update_granted_credits(rep);
 
 out_schedule:
 	queue_work(rpcrdma_receive_wq, &rep->rr_work);
@@ -219,57 +196,20 @@
 
 out_fail:
 	if (wc->status != IB_WC_WR_FLUSH_ERR)
-		pr_err("RPC:       %s: rep %p: %s\n",
-		       __func__, rep, ib_wc_status_msg(wc->status));
+		pr_err("rpcrdma: Recv: %s (%u/0x%x)\n",
+		       ib_wc_status_msg(wc->status),
+		       wc->status, wc->vendor_err);
 	rep->rr_len = RPCRDMA_BAD_LEN;
 	goto out_schedule;
 }
 
-/* The wc array is on stack: automatic memory is always CPU-local.
- *
- * struct ib_wc is 64 bytes, making the poll array potentially
- * large. But this is at the bottom of the call chain. Further
- * substantial work is done in another thread.
- */
-static void
-rpcrdma_recvcq_poll(struct ib_cq *cq)
-{
-	struct ib_wc *pos, wcs[4];
-	int count, rc;
-
-	do {
-		pos = wcs;
-
-		rc = ib_poll_cq(cq, ARRAY_SIZE(wcs), pos);
-		if (rc < 0)
-			break;
-
-		count = rc;
-		while (count-- > 0)
-			rpcrdma_recvcq_process_wc(pos++);
-	} while (rc == ARRAY_SIZE(wcs));
-}
-
-/* Handle provider receive completion upcalls.
- */
-static void
-rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
-{
-	do {
-		rpcrdma_recvcq_poll(cq);
-	} while (ib_req_notify_cq(cq, IB_CQ_NEXT_COMP |
-				  IB_CQ_REPORT_MISSED_EVENTS) > 0);
-}
-
 static void
 rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
 {
 	struct ib_wc wc;
 
 	while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
-		rpcrdma_recvcq_process_wc(&wc);
-	while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
-		rpcrdma_sendcq_process_wc(&wc);
+		rpcrdma_receive_wc(NULL, &wc);
 }
 
 static int
@@ -330,6 +270,7 @@
 connected:
 		dprintk("RPC:       %s: %sconnected\n",
 					__func__, connstate > 0 ? "" : "dis");
+		atomic_set(&xprt->rx_buf.rb_credits, 1);
 		ep->rep_connected = connstate;
 		rpcrdma_conn_func(ep);
 		wake_up_all(&ep->rep_connect_wait);
@@ -560,9 +501,8 @@
 				struct rpcrdma_create_data_internal *cdata)
 {
 	struct ib_cq *sendcq, *recvcq;
-	struct ib_cq_init_attr cq_attr = {};
 	unsigned int max_qp_wr;
-	int rc, err;
+	int rc;
 
 	if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) {
 		dprintk("RPC:       %s: insufficient sge's available\n",
@@ -614,9 +554,9 @@
 	init_waitqueue_head(&ep->rep_connect_wait);
 	INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
 
-	cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1;
-	sendcq = ib_create_cq(ia->ri_device, rpcrdma_sendcq_upcall,
-			      rpcrdma_cq_async_error_upcall, NULL, &cq_attr);
+	sendcq = ib_alloc_cq(ia->ri_device, NULL,
+			     ep->rep_attr.cap.max_send_wr + 1,
+			     0, IB_POLL_SOFTIRQ);
 	if (IS_ERR(sendcq)) {
 		rc = PTR_ERR(sendcq);
 		dprintk("RPC:       %s: failed to create send CQ: %i\n",
@@ -624,16 +564,9 @@
 		goto out1;
 	}
 
-	rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
-	if (rc) {
-		dprintk("RPC:       %s: ib_req_notify_cq failed: %i\n",
-			__func__, rc);
-		goto out2;
-	}
-
-	cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1;
-	recvcq = ib_create_cq(ia->ri_device, rpcrdma_recvcq_upcall,
-			      rpcrdma_cq_async_error_upcall, NULL, &cq_attr);
+	recvcq = ib_alloc_cq(ia->ri_device, NULL,
+			     ep->rep_attr.cap.max_recv_wr + 1,
+			     0, IB_POLL_SOFTIRQ);
 	if (IS_ERR(recvcq)) {
 		rc = PTR_ERR(recvcq);
 		dprintk("RPC:       %s: failed to create recv CQ: %i\n",
@@ -641,14 +574,6 @@
 		goto out2;
 	}
 
-	rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
-	if (rc) {
-		dprintk("RPC:       %s: ib_req_notify_cq failed: %i\n",
-			__func__, rc);
-		ib_destroy_cq(recvcq);
-		goto out2;
-	}
-
 	ep->rep_attr.send_cq = sendcq;
 	ep->rep_attr.recv_cq = recvcq;
 
@@ -673,10 +598,7 @@
 	return 0;
 
 out2:
-	err = ib_destroy_cq(sendcq);
-	if (err)
-		dprintk("RPC:       %s: ib_destroy_cq returned %i\n",
-			__func__, err);
+	ib_free_cq(sendcq);
 out1:
 	if (ia->ri_dma_mr)
 		ib_dereg_mr(ia->ri_dma_mr);
@@ -711,15 +633,8 @@
 		ia->ri_id->qp = NULL;
 	}
 
-	rc = ib_destroy_cq(ep->rep_attr.recv_cq);
-	if (rc)
-		dprintk("RPC:       %s: ib_destroy_cq returned %i\n",
-			__func__, rc);
-
-	rc = ib_destroy_cq(ep->rep_attr.send_cq);
-	if (rc)
-		dprintk("RPC:       %s: ib_destroy_cq returned %i\n",
-			__func__, rc);
+	ib_free_cq(ep->rep_attr.recv_cq);
+	ib_free_cq(ep->rep_attr.send_cq);
 
 	if (ia->ri_dma_mr) {
 		rc = ib_dereg_mr(ia->ri_dma_mr);
@@ -898,6 +813,7 @@
 	spin_lock(&buffer->rb_reqslock);
 	list_add(&req->rl_all, &buffer->rb_allreqs);
 	spin_unlock(&buffer->rb_reqslock);
+	req->rl_cqe.done = rpcrdma_wc_send;
 	req->rl_buffer = &r_xprt->rx_buf;
 	return req;
 }
@@ -923,6 +839,7 @@
 	}
 
 	rep->rr_device = ia->ri_device;
+	rep->rr_cqe.done = rpcrdma_receive_wc;
 	rep->rr_rxprt = r_xprt;
 	INIT_WORK(&rep->rr_work, rpcrdma_receive_worker);
 	return rep;
@@ -943,6 +860,7 @@
 	buf->rb_max_requests = r_xprt->rx_data.max_requests;
 	buf->rb_bc_srv_max_requests = 0;
 	spin_lock_init(&buf->rb_lock);
+	atomic_set(&buf->rb_credits, 1);
 
 	rc = ia->ri_ops->ro_init(r_xprt);
 	if (rc)
@@ -1259,7 +1177,7 @@
 	}
 
 	send_wr.next = NULL;
-	send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION;
+	send_wr.wr_cqe = &req->rl_cqe;
 	send_wr.sg_list = iov;
 	send_wr.num_sge = req->rl_niovs;
 	send_wr.opcode = IB_WR_SEND;
@@ -1297,7 +1215,7 @@
 	int rc;
 
 	recv_wr.next = NULL;
-	recv_wr.wr_id = (u64) (unsigned long) rep;
+	recv_wr.wr_cqe = &rep->rr_cqe;
 	recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
 	recv_wr.num_sge = 1;
 

diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 38fe11b..2ebc743 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h

@@ -95,10 +95,6 @@
 #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
 #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
 
-/* Force completion handler to ignore the signal
- */
-#define RPCRDMA_IGNORE_COMPLETION	(0ULL)
-
 /* Pre-allocate extra Work Requests for handling backward receives
  * and sends. This is a fixed value because the Work Queues are
  * allocated when the forward channel is set up.
@@ -171,6 +167,7 @@
 struct rpcrdma_buffer;
 
 struct rpcrdma_rep {
+	struct ib_cqe		rr_cqe;
 	unsigned int		rr_len;
 	struct ib_device	*rr_device;
 	struct rpcrdma_xprt	*rr_rxprt;
@@ -204,11 +201,11 @@
 	struct scatterlist		*sg;
 	int				sg_nents;
 	struct ib_mr			*fr_mr;
+	struct ib_cqe			fr_cqe;
 	enum rpcrdma_frmr_state		fr_state;
+	struct completion		fr_linv_done;
 	struct work_struct		fr_work;
 	struct rpcrdma_xprt		*fr_xprt;
-	bool				fr_waiter;
-	struct completion		fr_linv_done;;
 	union {
 		struct ib_reg_wr	fr_regwr;
 		struct ib_send_wr	fr_invwr;
@@ -224,8 +221,7 @@
 	union {
 		struct rpcrdma_fmr	fmr;
 		struct rpcrdma_frmr	frmr;
-	} r;
-	void			(*mw_sendcompletion)(struct ib_wc *);
+	};
 	struct list_head	mw_list;
 	struct list_head	mw_all;
 };
@@ -281,6 +277,7 @@
 	struct rpcrdma_regbuf	*rl_sendbuf;
 	struct rpcrdma_mr_seg	rl_segments[RPCRDMA_MAX_SEGS];
 
+	struct ib_cqe		rl_cqe;
 	struct list_head	rl_all;
 	bool			rl_backchannel;
 };
@@ -311,6 +308,7 @@
 	struct list_head	rb_send_bufs;
 	struct list_head	rb_recv_bufs;
 	u32			rb_max_requests;
+	atomic_t		rb_credits;	/* most recent credit grant */
 
 	u32			rb_bc_srv_max_requests;
 	spinlock_t		rb_reqslock;	/* protect rb_allreqs */

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index fde2138..65e7595 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c

@@ -1844,9 +1844,7 @@
  */
 static void xs_local_rpcbind(struct rpc_task *task)
 {
-	rcu_read_lock();
-	xprt_set_bound(rcu_dereference(task->tk_client->cl_xprt));
-	rcu_read_unlock();
+	xprt_set_bound(task->tk_xprt);
 }
 
 static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port)

diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 8b5833c..2b9b98f 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c

@@ -1079,7 +1079,7 @@
  *	@filter_dev: filter device
  *	@idx:
  *
- *	Delete FDB entry from switch device.
+ *	Dump FDB entries from switch device.
  */
 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 			    struct net_device *dev,

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index bbe65dc..3dce53e 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c

@@ -1209,10 +1209,14 @@
 
 		if (signal_pending(current)) {
 			err = sock_intr_errno(timeout);
-			goto out_wait_error;
+			sk->sk_state = SS_UNCONNECTED;
+			sock->state = SS_UNCONNECTED;
+			goto out_wait;
 		} else if (timeout == 0) {
 			err = -ETIMEDOUT;
-			goto out_wait_error;
+			sk->sk_state = SS_UNCONNECTED;
+			sock->state = SS_UNCONNECTED;
+			goto out_wait;
 		}
 
 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
@@ -1220,20 +1224,17 @@
 
 	if (sk->sk_err) {
 		err = -sk->sk_err;
-		goto out_wait_error;
-	} else
+		sk->sk_state = SS_UNCONNECTED;
+		sock->state = SS_UNCONNECTED;
+	} else {
 		err = 0;
+	}
 
 out_wait:
 	finish_wait(sk_sleep(sk), &wait);
 out:
 	release_sock(sk);
 	return err;
-
-out_wait_error:
-	sk->sk_state = SS_UNCONNECTED;
-	sock->state = SS_UNCONNECTED;
-	goto out_wait;
 }
 
 static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
@@ -1270,18 +1271,20 @@
 	       listener->sk_err == 0) {
 		release_sock(listener);
 		timeout = schedule_timeout(timeout);
+		finish_wait(sk_sleep(listener), &wait);
 		lock_sock(listener);
 
 		if (signal_pending(current)) {
 			err = sock_intr_errno(timeout);
-			goto out_wait;
+			goto out;
 		} else if (timeout == 0) {
 			err = -EAGAIN;
-			goto out_wait;
+			goto out;
 		}
 
 		prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
 	}
+	finish_wait(sk_sleep(listener), &wait);
 
 	if (listener->sk_err)
 		err = -listener->sk_err;
@@ -1301,19 +1304,15 @@
 		 */
 		if (err) {
 			vconnected->rejected = true;
-			release_sock(connected);
-			sock_put(connected);
-			goto out_wait;
+		} else {
+			newsock->state = SS_CONNECTED;
+			sock_graft(connected, newsock);
 		}
 
-		newsock->state = SS_CONNECTED;
-		sock_graft(connected, newsock);
 		release_sock(connected);
 		sock_put(connected);
 	}
 
-out_wait:
-	finish_wait(sk_sleep(listener), &wait);
 out:
 	release_sock(listener);
 	return err;
@@ -1557,9 +1556,11 @@
 	if (err < 0)
 		goto out;
 
+
 	while (total_written < len) {
 		ssize_t written;
 
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		while (vsock_stream_has_space(vsk) == 0 &&
 		       sk->sk_err == 0 &&
 		       !(sk->sk_shutdown & SEND_SHUTDOWN) &&
@@ -1568,27 +1569,33 @@
 			/* Don't wait for non-blocking sockets. */
 			if (timeout == 0) {
 				err = -EAGAIN;
-				goto out_wait;
+				finish_wait(sk_sleep(sk), &wait);
+				goto out_err;
 			}
 
 			err = transport->notify_send_pre_block(vsk, &send_data);
-			if (err < 0)
-				goto out_wait;
+			if (err < 0) {
+				finish_wait(sk_sleep(sk), &wait);
+				goto out_err;
+			}
 
 			release_sock(sk);
-			prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 			timeout = schedule_timeout(timeout);
-			finish_wait(sk_sleep(sk), &wait);
 			lock_sock(sk);
 			if (signal_pending(current)) {
 				err = sock_intr_errno(timeout);
-				goto out_wait;
+				finish_wait(sk_sleep(sk), &wait);
+				goto out_err;
 			} else if (timeout == 0) {
 				err = -EAGAIN;
-				goto out_wait;
+				finish_wait(sk_sleep(sk), &wait);
+				goto out_err;
 			}
 
+			prepare_to_wait(sk_sleep(sk), &wait,
+					TASK_INTERRUPTIBLE);
 		}
+		finish_wait(sk_sleep(sk), &wait);
 
 		/* These checks occur both as part of and after the loop
 		 * conditional since we need to check before and after
@@ -1596,16 +1603,16 @@
 		 */
 		if (sk->sk_err) {
 			err = -sk->sk_err;
-			goto out_wait;
+			goto out_err;
 		} else if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
 			   (vsk->peer_shutdown & RCV_SHUTDOWN)) {
 			err = -EPIPE;
-			goto out_wait;
+			goto out_err;
 		}
 
 		err = transport->notify_send_pre_enqueue(vsk, &send_data);
 		if (err < 0)
-			goto out_wait;
+			goto out_err;
 
 		/* Note that enqueue will only write as many bytes as are free
 		 * in the produce queue, so we don't need to ensure len is
@@ -1618,7 +1625,7 @@
 				len - total_written);
 		if (written < 0) {
 			err = -ENOMEM;
-			goto out_wait;
+			goto out_err;
 		}
 
 		total_written += written;
@@ -1626,11 +1633,11 @@
 		err = transport->notify_send_post_enqueue(
 				vsk, written, &send_data);
 		if (err < 0)
-			goto out_wait;
+			goto out_err;
 
 	}
 
-out_wait:
+out_err:
 	if (total_written > 0)
 		err = total_written;
 out:
@@ -1715,18 +1722,59 @@
 
 
 	while (1) {
-		s64 ready = vsock_stream_has_data(vsk);
+		s64 ready;
 
-		if (ready < 0) {
-			/* Invalid queue pair content. XXX This should be
-			 * changed to a connection reset in a later change.
-			 */
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+		ready = vsock_stream_has_data(vsk);
 
-			err = -ENOMEM;
-			goto out;
-		} else if (ready > 0) {
+		if (ready == 0) {
+			if (sk->sk_err != 0 ||
+			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
+			    (vsk->peer_shutdown & SEND_SHUTDOWN)) {
+				finish_wait(sk_sleep(sk), &wait);
+				break;
+			}
+			/* Don't wait for non-blocking sockets. */
+			if (timeout == 0) {
+				err = -EAGAIN;
+				finish_wait(sk_sleep(sk), &wait);
+				break;
+			}
+
+			err = transport->notify_recv_pre_block(
+					vsk, target, &recv_data);
+			if (err < 0) {
+				finish_wait(sk_sleep(sk), &wait);
+				break;
+			}
+			release_sock(sk);
+			timeout = schedule_timeout(timeout);
+			lock_sock(sk);
+
+			if (signal_pending(current)) {
+				err = sock_intr_errno(timeout);
+				finish_wait(sk_sleep(sk), &wait);
+				break;
+			} else if (timeout == 0) {
+				err = -EAGAIN;
+				finish_wait(sk_sleep(sk), &wait);
+				break;
+			}
+		} else {
 			ssize_t read;
 
+			finish_wait(sk_sleep(sk), &wait);
+
+			if (ready < 0) {
+				/* Invalid queue pair content. XXX This should
+				* be changed to a connection reset in a later
+				* change.
+				*/
+
+				err = -ENOMEM;
+				goto out;
+			}
+
 			err = transport->notify_recv_pre_dequeue(
 					vsk, target, &recv_data);
 			if (err < 0)
@@ -1752,35 +1800,6 @@
 				break;
 
 			target -= read;
-		} else {
-			if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN)
-			    || (vsk->peer_shutdown & SEND_SHUTDOWN)) {
-				break;
-			}
-			/* Don't wait for non-blocking sockets. */
-			if (timeout == 0) {
-				err = -EAGAIN;
-				break;
-			}
-
-			err = transport->notify_recv_pre_block(
-					vsk, target, &recv_data);
-			if (err < 0)
-				break;
-
-			release_sock(sk);
-			prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-			timeout = schedule_timeout(timeout);
-			finish_wait(sk_sleep(sk), &wait);
-			lock_sock(sk);
-
-			if (signal_pending(current)) {
-				err = sock_intr_errno(timeout);
-				break;
-			} else if (timeout == 0) {
-				err = -EAGAIN;
-				break;
-			}
 		}
 	}
 

diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index ad7f5b3..1c4ad47 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c

@@ -292,12 +292,15 @@
 		XFRM_SKB_CB(skb)->seq.input.hi = seq_hi;
 
 		skb_dst_force(skb);
+		dev_hold(skb->dev);
 
 		nexthdr = x->type->input(x, skb);
 
 		if (nexthdr == -EINPROGRESS)
 			return 0;
 resume:
+		dev_put(skb->dev);
+
 		spin_lock(&x->lock);
 		if (nexthdr <= 0) {
 			if (nexthdr == -EBADMSG) {

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 805681a..2cc7af8 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c

@@ -2449,7 +2449,7 @@
 	int type, err;
 
 #ifdef CONFIG_COMPAT
-	if (is_compat_task())
+	if (in_compat_syscall())
 		return -ENOTSUPP;
 #endif
 

diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 1db6d73..b2ab2a9 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include

@@ -251,7 +251,7 @@
 if_changed = $(if $(strip $(any-prereq) $(arg-check)),                       \
 	@set -e;                                                             \
 	$(echo-cmd) $(cmd_$(1));                                             \
-	printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd)
+	printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd, @:)
 
 # Execute the command and also postprocess generated .d dependencies file.
 if_changed_dep = $(if $(strip $(any-prereq) $(arg-check) ),                  \
@@ -259,14 +259,14 @@
 	$(echo-cmd) $(cmd_$(1));                                             \
 	scripts/basic/fixdep $(depfile) $@ '$(make-cmd)' > $(dot-target).tmp;\
 	rm -f $(depfile);                                                    \
-	mv -f $(dot-target).tmp $(dot-target).cmd)
+	mv -f $(dot-target).tmp $(dot-target).cmd, @:)
 
 # Usage: $(call if_changed_rule,foo)
 # Will check if $(cmd_foo) or any of the prerequisites changed,
 # and if so will execute $(rule_foo).
 if_changed_rule = $(if $(strip $(any-prereq) $(arg-check) ),                 \
 	@set -e;                                                             \
-	$(rule_$(1)))
+	$(rule_$(1)), @:)
 
 ###
 # why - tell why a a target got build

diff --git a/scripts/Makefile.dtbinst b/scripts/Makefile.dtbinst
index 1c15717..a1be75d 100644
--- a/scripts/Makefile.dtbinst
+++ b/scripts/Makefile.dtbinst

@@ -23,8 +23,6 @@
 PHONY += __dtbs_install_prep
 __dtbs_install_prep:
 ifeq ("$(dtbinst-root)", "$(obj)")
-	$(Q)if [ -d $(INSTALL_DTBS_PATH).old ]; then rm -rf $(INSTALL_DTBS_PATH).old; fi
-	$(Q)if [ -d $(INSTALL_DTBS_PATH) ]; then mv $(INSTALL_DTBS_PATH) $(INSTALL_DTBS_PATH).old; fi
 	$(Q)mkdir -p $(INSTALL_DTBS_PATH)
 endif
 

diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index ad50d58..ddf83d0 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib

@@ -136,6 +136,12 @@
 		$(CFLAGS_UBSAN))
 endif
 
+ifeq ($(CONFIG_KCOV),y)
+_c_flags += $(if $(patsubst n%,, \
+	$(KCOV_INSTRUMENT_$(basetarget).o)$(KCOV_INSTRUMENT)y), \
+	$(CFLAGS_KCOV))
+endif
+
 # If building the kernel in a separate objtree expand all occurrences
 # of -Idir to -I$(srctree)/dir except for absolute paths (starting with '/').
 

diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan
index 8ab6867..77ce538 100644
--- a/scripts/Makefile.ubsan
+++ b/scripts/Makefile.ubsan

@@ -14,4 +14,8 @@
 ifdef CONFIG_UBSAN_ALIGNMENT
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=alignment)
 endif
+
+      # -fsanitize=* options makes GCC less smart than usual and
+      # increase number of 'maybe-uninitialized false-positives
+      CFLAGS_UBSAN += $(call cc-option, -Wno-maybe-uninitialized)
 endif

diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index 5b327c6..caef815 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c

@@ -274,7 +274,11 @@
 		perror(filename);
 		exit(2);
 	}
-	fstat(fd, &st);
+	if (fstat(fd, &st) < 0) {
+		fprintf(stderr, "fixdep: error fstat'ing config file: ");
+		perror(filename);
+		exit(2);
+	}
 	if (st.st_size == 0) {
 		close(fd);
 		return;

diff --git a/scripts/coccinelle/api/pm_runtime.cocci b/scripts/coccinelle/api/pm_runtime.cocci
index b7042d0..89b98a2 100644
--- a/scripts/coccinelle/api/pm_runtime.cocci
+++ b/scripts/coccinelle/api/pm_runtime.cocci

@@ -78,7 +78,7 @@
 //  For org and report mode
 //----------------------------------------------------------
 
-@r depends on runtime_bad_err_handle exists@
+@r depends on runtime_bad_err_handle && (org || report) exists@
 position p1, p2;
 identifier pm_runtime_api;
 expression ret;

diff --git a/scripts/coccinelle/api/setup_timer.cocci b/scripts/coccinelle/api/setup_timer.cocci
new file mode 100644
index 0000000..8ee0ac3
--- /dev/null
+++ b/scripts/coccinelle/api/setup_timer.cocci

@@ -0,0 +1,199 @@
+/// Use setup_timer function instead of initializing timer with the function
+/// and data fields
+// Confidence: High
+// Copyright: (C) 2016 Vaishali Thakkar, Oracle. GPLv2
+// Options: --no-includes --include-headers
+// Keywords: init_timer, setup_timer
+
+virtual patch
+virtual context
+virtual org
+virtual report
+
+@match_immediate_function_data_after_init_timer
+depends on patch && !context && !org && !report@
+expression e, func, da;
+@@
+
+-init_timer (&e);
++setup_timer (&e, func, da);
+
+(
+-e.function = func;
+-e.data = da;
+|
+-e.data = da;
+-e.function = func;
+)
+
+@match_function_and_data_after_init_timer
+depends on patch && !context && !org && !report@
+expression e1, e2, e3, e4, e5, a, b;
+@@
+
+-init_timer (&e1);
++setup_timer (&e1, a, b);
+
+... when != a = e2
+    when != b = e3
+(
+-e1.function = a;
+... when != b = e4
+-e1.data = b;
+|
+-e1.data = b;
+... when != a = e5
+-e1.function = a;
+)
+
+@r1 exists@
+identifier f;
+position p;
+@@
+
+f(...) { ... when any
+  init_timer@p(...)
+  ... when any
+}
+
+@r2 exists@
+identifier g != r1.f;
+struct timer_list t;
+expression e8;
+@@
+
+g(...) { ... when any
+  t.data = e8
+  ... when any
+}
+
+// It is dangerous to use setup_timer if data field is initialized
+// in another function.
+
+@script:python depends on r2@
+p << r1.p;
+@@
+
+cocci.include_match(False)
+
+@r3 depends on patch && !context && !org && !report@
+expression e6, e7, c;
+position r1.p;
+@@
+
+-init_timer@p (&e6);
++setup_timer (&e6, c, 0UL);
+... when != c = e7
+-e6.function = c;
+
+// ----------------------------------------------------------------------------
+
+@match_immediate_function_data_after_init_timer_context
+depends on !patch && (context || org || report)@
+expression da, e, func;
+position j0, j1, j2;
+@@
+
+* init_timer@j0 (&e);
+(
+* e@j1.function = func;
+* e@j2.data = da;
+|
+* e@j1.data = da;
+* e@j2.function = func;
+)
+
+@match_function_and_data_after_init_timer_context
+depends on !patch &&
+!match_immediate_function_data_after_init_timer_context &&
+(context || org || report)@
+expression a, b, e1, e2, e3, e4, e5;
+position j0, j1, j2;
+@@
+
+* init_timer@j0 (&e1);
+... when != a = e2
+    when != b = e3
+(
+* e1@j1.function = a;
+... when != b = e4
+* e1@j2.data = b;
+|
+* e1@j1.data = b;
+... when != a = e5
+* e1@j2.function = a;
+)
+
+@r3_context depends on !patch &&
+!match_immediate_function_data_after_init_timer_context &&
+!match_function_and_data_after_init_timer_context &&
+(context || org || report)@
+expression c, e6, e7;
+position r1.p;
+position j0, j1;
+@@
+
+* init_timer@j0@p (&e6);
+... when != c = e7
+* e6@j1.function = c;
+
+// ----------------------------------------------------------------------------
+
+@script:python match_immediate_function_data_after_init_timer_org
+depends on org@
+j0 << match_immediate_function_data_after_init_timer_context.j0;
+j1 << match_immediate_function_data_after_init_timer_context.j1;
+j2 << match_immediate_function_data_after_init_timer_context.j2;
+@@
+
+msg = "Use setup_timer function."
+coccilib.org.print_todo(j0[0], msg)
+coccilib.org.print_link(j1[0], "")
+coccilib.org.print_link(j2[0], "")
+
+@script:python match_function_and_data_after_init_timer_org depends on org@
+j0 << match_function_and_data_after_init_timer_context.j0;
+j1 << match_function_and_data_after_init_timer_context.j1;
+j2 << match_function_and_data_after_init_timer_context.j2;
+@@
+
+msg = "Use setup_timer function."
+coccilib.org.print_todo(j0[0], msg)
+coccilib.org.print_link(j1[0], "")
+coccilib.org.print_link(j2[0], "")
+
+@script:python r3_org depends on org@
+j0 << r3_context.j0;
+j1 << r3_context.j1;
+@@
+
+msg = "Use setup_timer function."
+coccilib.org.print_todo(j0[0], msg)
+coccilib.org.print_link(j1[0], "")
+
+// ----------------------------------------------------------------------------
+
+@script:python match_immediate_function_data_after_init_timer_report
+depends on report@
+j0 << match_immediate_function_data_after_init_timer_context.j0;
+j1 << match_immediate_function_data_after_init_timer_context.j1;
+@@
+
+msg = "Use setup_timer function for function on line %s." % (j1[0].line)
+coccilib.report.print_report(j0[0], msg)
+
+@script:python match_function_and_data_after_init_timer_report depends on report@
+j0 << match_function_and_data_after_init_timer_context.j0;
+j1 << match_function_and_data_after_init_timer_context.j1;
+@@
+
+msg = "Use setup_timer function for function on line %s." % (j1[0].line)
+coccilib.report.print_report(j0[0], msg)
+
+@script:python r3_report depends on report@
+j0 << r3_context.j0;
+j1 << r3_context.j1;
+@@
+
+msg = "Use setup_timer function for function on line %s." % (j1[0].line)
+coccilib.report.print_report(j0[0], msg)

diff --git a/scripts/coccinelle/iterators/use_after_iter.cocci b/scripts/coccinelle/iterators/use_after_iter.cocci
index f085f59..ce8cc9c 100644
--- a/scripts/coccinelle/iterators/use_after_iter.cocci
+++ b/scripts/coccinelle/iterators/use_after_iter.cocci

@@ -123,7 +123,7 @@
 |
 sizeof(<+...c...+>)
 |
-&c->member
+ &c->member
 |
 c = E
 |

diff --git a/scripts/coccinelle/misc/array_size.cocci b/scripts/coccinelle/misc/array_size.cocci
index 81e279c..6ec0571 100644
--- a/scripts/coccinelle/misc/array_size.cocci
+++ b/scripts/coccinelle/misc/array_size.cocci

@@ -59,7 +59,7 @@
 //  For org and report mode
 //----------------------------------------------------------
 
-@r@
+@r depends on (org || report)@
 type T;
 T[] E;
 position p;

diff --git a/scripts/coccinelle/misc/badty.cocci b/scripts/coccinelle/misc/badty.cocci
index 2fc06fc..481cf30 100644
--- a/scripts/coccinelle/misc/badty.cocci
+++ b/scripts/coccinelle/misc/badty.cocci

@@ -50,7 +50,7 @@
 //  For org and report mode
 //----------------------------------------------------------
 
-@r disable sizeof_type_expr@
+@r depends on (org || report) disable sizeof_type_expr@
 type T;
 T **x;
 position p;

diff --git a/scripts/coccinelle/misc/bugon.cocci b/scripts/coccinelle/misc/bugon.cocci
index 27c97f1..7415860 100644
--- a/scripts/coccinelle/misc/bugon.cocci
+++ b/scripts/coccinelle/misc/bugon.cocci

@@ -40,7 +40,7 @@
 //  For org and report mode
 //----------------------------------------------------------
 
-@r@
+@r depends on (org || report)@
 expression e;
 position p;
 @@

diff --git a/scripts/gdb/linux/modules.py b/scripts/gdb/linux/modules.py
index 25db8cf..0a35d6d 100644
--- a/scripts/gdb/linux/modules.py
+++ b/scripts/gdb/linux/modules.py

@@ -73,10 +73,11 @@
                 "        " if utils.get_long_type().sizeof == 8 else ""))
 
         for module in module_list():
+            layout = module['core_layout']
             gdb.write("{address} {name:<19} {size:>8}  {ref}".format(
-                address=str(module['module_core']).split()[0],
+                address=str(layout['base']).split()[0],
                 name=module['name'].string(),
-                size=str(module['core_size']),
+                size=str(layout['size']),
                 ref=str(module['refcnt']['counter'])))
 
             source_list = module['source_list']

diff --git a/scripts/gdb/linux/proc.py b/scripts/gdb/linux/proc.py
new file mode 100644
index 0000000..6e6709c
--- /dev/null
+++ b/scripts/gdb/linux/proc.py

@@ -0,0 +1,41 @@
+#
+# gdb helper commands and functions for Linux kernel debugging
+#
+#  Kernel proc information reader
+#
+# Copyright (c) 2016 Linaro Ltd
+#
+# Authors:
+#  Kieran Bingham <kieran.bingham@linaro.org>
+#
+# This work is licensed under the terms of the GNU GPL version 2.
+#
+
+import gdb
+
+
+class LxCmdLine(gdb.Command):
+    """ Report the Linux Commandline used in the current kernel.
+        Equivalent to cat /proc/cmdline on a running target"""
+
+    def __init__(self):
+        super(LxCmdLine, self).__init__("lx-cmdline", gdb.COMMAND_DATA)
+
+    def invoke(self, arg, from_tty):
+        gdb.write(gdb.parse_and_eval("saved_command_line").string() + "\n")
+
+LxCmdLine()
+
+
+class LxVersion(gdb.Command):
+    """ Report the Linux Version of the current kernel.
+        Equivalent to cat /proc/version on a running target"""
+
+    def __init__(self):
+        super(LxVersion, self).__init__("lx-version", gdb.COMMAND_DATA)
+
+    def invoke(self, arg, from_tty):
+        # linux_banner should contain a newline
+        gdb.write(gdb.parse_and_eval("linux_banner").string())
+
+LxVersion()

diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py
index 627750c..9a0f892 100644
--- a/scripts/gdb/linux/symbols.py
+++ b/scripts/gdb/linux/symbols.py

@@ -108,7 +108,7 @@
 
     def load_module_symbols(self, module):
         module_name = module['name'].string()
-        module_addr = str(module['module_core']).split()[0]
+        module_addr = str(module['core_layout']['base']).split()[0]
 
         module_file = self._get_module_file(module_name)
         if not module_file and not self.module_files_updated:

diff --git a/scripts/gdb/vmlinux-gdb.py b/scripts/gdb/vmlinux-gdb.py
index ce82bf5..d5943ec 100644
--- a/scripts/gdb/vmlinux-gdb.py
+++ b/scripts/gdb/vmlinux-gdb.py

@@ -29,3 +29,4 @@
     import linux.tasks
     import linux.cpus
     import linux.lists
+    import linux.proc

diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index d79cba4..ebced77 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile

@@ -96,13 +96,15 @@
 defconfig: $(obj)/conf
 ifeq ($(KBUILD_DEFCONFIG),)
 	$< $(silent) --defconfig $(Kconfig)
-else ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/configs/$(KBUILD_DEFCONFIG)),)
+else
+ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/configs/$(KBUILD_DEFCONFIG)),)
 	@$(kecho) "*** Default configuration is based on '$(KBUILD_DEFCONFIG)'"
 	$(Q)$< $(silent) --defconfig=arch/$(SRCARCH)/configs/$(KBUILD_DEFCONFIG) $(Kconfig)
 else
 	@$(kecho) "*** Default configuration is based on target '$(KBUILD_DEFCONFIG)'"
 	$(Q)$(MAKE) -f $(srctree)/Makefile $(KBUILD_DEFCONFIG)
 endif
+endif
 
 %_defconfig: $(obj)/conf
 	$(Q)$< $(silent) --defconfig=arch/$(SRCARCH)/configs/$@ $(Kconfig)

diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index 0b7dc2f..dd243d2 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c

@@ -267,10 +267,8 @@
 		if (in)
 			goto load;
 		sym_add_change_count(1);
-		if (!sym_defconfig_list) {
-			sym_calc_value(modules_sym);
+		if (!sym_defconfig_list)
 			return 1;
-		}
 
 		for_all_defaults(sym_defconfig_list, prop) {
 			if (expr_calc_value(prop->visible.expr) == no ||
@@ -403,7 +401,6 @@
 	}
 	free(line);
 	fclose(in);
-	sym_calc_value(modules_sym);
 	return 0;
 }
 
@@ -414,8 +411,12 @@
 
 	sym_set_change_count(0);
 
-	if (conf_read_simple(name, S_DEF_USER))
+	if (conf_read_simple(name, S_DEF_USER)) {
+		sym_calc_value(modules_sym);
 		return 1;
+	}
+
+	sym_calc_value(modules_sym);
 
 	for_all_symbols(i, sym) {
 		sym_calc_value(sym);
@@ -846,6 +847,7 @@
 
 	name = conf_get_autoconfig_name();
 	conf_read_simple(name, S_DEF_AUTO);
+	sym_calc_value(modules_sym);
 
 	if (chdir("include/config"))
 		return 1;

diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 453ede9..49d61ad 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh

@@ -97,9 +97,10 @@
 	local aflags="${KBUILD_AFLAGS} ${KBUILD_AFLAGS_KERNEL}               \
 		      ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS}"
 
-	${NM} -n ${1} | \
-		scripts/kallsyms ${kallsymopt} | \
-		${CC} ${aflags} -c -o ${2} -x assembler-with-cpp -
+	local afile="`basename ${2} .o`.S"
+
+	${NM} -n ${1} | scripts/kallsyms ${kallsymopt} > ${afile}
+	${CC} ${aflags} -c -o ${2} ${afile}
 }
 
 # Create map file with all symbols from ${1}

diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index 71004da..b6de63c 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec

@@ -131,11 +131,16 @@
 echo ""
 echo "%post"
 echo "if [ -x /sbin/installkernel -a -r /boot/vmlinuz-$KERNELRELEASE -a -r /boot/System.map-$KERNELRELEASE ]; then"
-echo "cp /boot/vmlinuz-$KERNELRELEASE /boot/vmlinuz-$KERNELRELEASE-rpm"
-echo "cp /boot/System.map-$KERNELRELEASE /boot/System.map-$KERNELRELEASE-rpm"
+echo "cp /boot/vmlinuz-$KERNELRELEASE /boot/.vmlinuz-$KERNELRELEASE-rpm"
+echo "cp /boot/System.map-$KERNELRELEASE /boot/.System.map-$KERNELRELEASE-rpm"
 echo "rm -f /boot/vmlinuz-$KERNELRELEASE /boot/System.map-$KERNELRELEASE"
-echo "/sbin/installkernel $KERNELRELEASE /boot/vmlinuz-$KERNELRELEASE-rpm /boot/System.map-$KERNELRELEASE-rpm"
-echo "rm -f /boot/vmlinuz-$KERNELRELEASE-rpm /boot/System.map-$KERNELRELEASE-rpm"
+echo "/sbin/installkernel $KERNELRELEASE /boot/.vmlinuz-$KERNELRELEASE-rpm /boot/.System.map-$KERNELRELEASE-rpm"
+echo "rm -f /boot/.vmlinuz-$KERNELRELEASE-rpm /boot/.System.map-$KERNELRELEASE-rpm"
+echo "fi"
+echo ""
+echo "%preun"
+echo "if [ -x /sbin/new-kernel-pkg ]; then"
+echo "new-kernel-pkg --remove $KERNELRELEASE --rminitrd --initrdfile=/boot/initramfs-$KERNELRELEASE.img"
 echo "fi"
 echo ""
 echo "%files"

diff --git a/scripts/sortextable.c b/scripts/sortextable.c
index 62a1822e0..f453b7c 100644
--- a/scripts/sortextable.c
+++ b/scripts/sortextable.c

@@ -315,6 +315,7 @@
 
 	case EM_S390:
 	case EM_AARCH64:
+	case EM_PARISC:
 		custom_sort = sort_relative_table;
 		break;
 	case EM_ARCOMPACT:

diff --git a/scripts/tags.sh b/scripts/tags.sh
index 23ba1c6..f72f48f 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh

@@ -163,6 +163,8 @@
 	'/^TRACE_EVENT(\([[:alnum:]_]*\).*/trace_\1_rcuidle/'
 	'/^DEFINE_EVENT([^,)]*, *\([[:alnum:]_]*\).*/trace_\1/'
 	'/^DEFINE_EVENT([^,)]*, *\([[:alnum:]_]*\).*/trace_\1_rcuidle/'
+	'/^DEFINE_INSN_CACHE_OPS(\([[:alnum:]_]*\).*/get_\1_slot/'
+	'/^DEFINE_INSN_CACHE_OPS(\([[:alnum:]_]*\).*/free_\1_slot/'
 	'/^PAGEFLAG(\([[:alnum:]_]*\).*/Page\1/'
 	'/^PAGEFLAG(\([[:alnum:]_]*\).*/SetPage\1/'
 	'/^PAGEFLAG(\([[:alnum:]_]*\).*/ClearPage\1/'

diff --git a/sound/core/timer.c b/sound/core/timer.c
index aa1b15c..6469bed 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c

@@ -1019,8 +1019,8 @@
 		njiff += timer->sticks - priv->correction;
 		priv->correction = 0;
 	}
-	priv->last_expires = priv->tlist.expires = njiff;
-	add_timer(&priv->tlist);
+	priv->last_expires = njiff;
+	mod_timer(&priv->tlist, njiff);
 	return 0;
 }
 
@@ -1502,17 +1502,13 @@
 	return err;
 }
 
-static int snd_timer_user_gparams(struct file *file,
-				  struct snd_timer_gparams __user *_gparams)
+static int timer_set_gparams(struct snd_timer_gparams *gparams)
 {
-	struct snd_timer_gparams gparams;
 	struct snd_timer *t;
 	int err;
 
-	if (copy_from_user(&gparams, _gparams, sizeof(gparams)))
-		return -EFAULT;
 	mutex_lock(&register_mutex);
-	t = snd_timer_find(&gparams.tid);
+	t = snd_timer_find(&gparams->tid);
 	if (!t) {
 		err = -ENODEV;
 		goto _error;
@@ -1525,12 +1521,22 @@
 		err = -ENOSYS;
 		goto _error;
 	}
-	err = t->hw.set_period(t, gparams.period_num, gparams.period_den);
+	err = t->hw.set_period(t, gparams->period_num, gparams->period_den);
 _error:
 	mutex_unlock(&register_mutex);
 	return err;
 }
 
+static int snd_timer_user_gparams(struct file *file,
+				  struct snd_timer_gparams __user *_gparams)
+{
+	struct snd_timer_gparams gparams;
+
+	if (copy_from_user(&gparams, _gparams, sizeof(gparams)))
+		return -EFAULT;
+	return timer_set_gparams(&gparams);
+}
+
 static int snd_timer_user_gstatus(struct file *file,
 				  struct snd_timer_gstatus __user *_gstatus)
 {

diff --git a/sound/core/timer_compat.c b/sound/core/timer_compat.c
index 2e90822..6a437eb 100644
--- a/sound/core/timer_compat.c
+++ b/sound/core/timer_compat.c

@@ -22,6 +22,19 @@
 
 #include <linux/compat.h>
 
+/*
+ * ILP32/LP64 has different size for 'long' type. Additionally, the size
+ * of storage alignment differs depending on architectures. Here, '__packed'
+ * qualifier is used so that the size of this structure is multiple of 4 and
+ * it fits to any architectures with 32 bit storage alignment.
+ */
+struct snd_timer_gparams32 {
+	struct snd_timer_id tid;
+	u32 period_num;
+	u32 period_den;
+	unsigned char reserved[32];
+} __packed;
+
 struct snd_timer_info32 {
 	u32 flags;
 	s32 card;
@@ -32,6 +45,19 @@
 	unsigned char reserved[64];
 };
 
+static int snd_timer_user_gparams_compat(struct file *file,
+					struct snd_timer_gparams32 __user *user)
+{
+	struct snd_timer_gparams gparams;
+
+	if (copy_from_user(&gparams.tid, &user->tid, sizeof(gparams.tid)) ||
+	    get_user(gparams.period_num, &user->period_num) ||
+	    get_user(gparams.period_den, &user->period_den))
+		return -EFAULT;
+
+	return timer_set_gparams(&gparams);
+}
+
 static int snd_timer_user_info_compat(struct file *file,
 				      struct snd_timer_info32 __user *_info)
 {
@@ -99,6 +125,7 @@
  */
 
 enum {
+	SNDRV_TIMER_IOCTL_GPARAMS32 = _IOW('T', 0x04, struct snd_timer_gparams32),
 	SNDRV_TIMER_IOCTL_INFO32 = _IOR('T', 0x11, struct snd_timer_info32),
 	SNDRV_TIMER_IOCTL_STATUS32 = _IOW('T', 0x14, struct snd_timer_status32),
 #ifdef CONFIG_X86_X32
@@ -114,7 +141,6 @@
 	case SNDRV_TIMER_IOCTL_PVERSION:
 	case SNDRV_TIMER_IOCTL_TREAD:
 	case SNDRV_TIMER_IOCTL_GINFO:
-	case SNDRV_TIMER_IOCTL_GPARAMS:
 	case SNDRV_TIMER_IOCTL_GSTATUS:
 	case SNDRV_TIMER_IOCTL_SELECT:
 	case SNDRV_TIMER_IOCTL_PARAMS:
@@ -128,6 +154,8 @@
 	case SNDRV_TIMER_IOCTL_PAUSE_OLD:
 	case SNDRV_TIMER_IOCTL_NEXT_DEVICE:
 		return snd_timer_user_ioctl(file, cmd, (unsigned long)argp);
+	case SNDRV_TIMER_IOCTL_GPARAMS32:
+		return snd_timer_user_gparams_compat(file, argp);
 	case SNDRV_TIMER_IOCTL_INFO32:
 		return snd_timer_user_info_compat(file, argp);
 	case SNDRV_TIMER_IOCTL_STATUS32:

diff --git a/sound/firewire/dice/dice-stream.c b/sound/firewire/dice/dice-stream.c
index 845d5e5..ec4db3a 100644
--- a/sound/firewire/dice/dice-stream.c
+++ b/sound/firewire/dice/dice-stream.c

@@ -446,18 +446,12 @@
 
 void snd_dice_stream_destroy_duplex(struct snd_dice *dice)
 {
-	struct reg_params tx_params, rx_params;
+	unsigned int i;
 
-	snd_dice_transaction_clear_enable(dice);
-
-	if (get_register_params(dice, &tx_params, &rx_params) == 0) {
-		stop_streams(dice, AMDTP_IN_STREAM, &tx_params);
-		stop_streams(dice, AMDTP_OUT_STREAM, &rx_params);
+	for (i = 0; i < MAX_STREAMS; i++) {
+		destroy_stream(dice, AMDTP_IN_STREAM, i);
+		destroy_stream(dice, AMDTP_OUT_STREAM, i);
 	}
-
-	release_resources(dice);
-
-	dice->substreams_counter = 0;
 }
 
 void snd_dice_stream_update_duplex(struct snd_dice *dice)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 2624cfe..b680be0 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c

@@ -2361,6 +2361,10 @@
 	  .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
 	{ PCI_DEVICE(0x1002, 0xaae8),
 	  .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
+	{ PCI_DEVICE(0x1002, 0xaae0),
+	  .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
+	{ PCI_DEVICE(0x1002, 0xaaf0),
+	  .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
 	/* VIA VT8251/VT8237A */
 	{ PCI_DEVICE(0x1106, 0x3288), .driver_data = AZX_DRIVER_VIA },
 	/* VIA GFX VT7122/VX900 */

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 4f5ca0b..fefe83f 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c

@@ -4759,6 +4759,7 @@
 	ALC255_FIXUP_DELL_SPK_NOISE,
 	ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
 	ALC280_FIXUP_HP_HEADSET_MIC,
+	ALC221_FIXUP_HP_FRONT_MIC,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -5401,6 +5402,13 @@
 		.chained = true,
 		.chain_id = ALC269_FIXUP_HEADSET_MIC,
 	},
+	[ALC221_FIXUP_HP_FRONT_MIC] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x19, 0x02a19020 }, /* Front Mic */
+			{ }
+		},
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -5506,6 +5514,7 @@
 	SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x221c, "HP EliteBook 755 G2", ALC280_FIXUP_HP_HEADSET_MIC),
+	SND_PCI_QUIRK(0x103c, 0x8256, "HP", ALC221_FIXUP_HP_FRONT_MIC),
 	SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
 	SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
@@ -6406,6 +6415,7 @@
 	ALC668_FIXUP_AUTO_MUTE,
 	ALC668_FIXUP_DELL_DISABLE_AAMIX,
 	ALC668_FIXUP_DELL_XPS13,
+	ALC662_FIXUP_ASUS_Nx50,
 };
 
 static const struct hda_fixup alc662_fixups[] = {
@@ -6646,6 +6656,12 @@
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc_fixup_bass_chmap,
 	},
+	[ALC662_FIXUP_ASUS_Nx50] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc_fixup_auto_mute_via_amp,
+		.chained = true,
+		.chain_id = ALC662_FIXUP_BASS_1A
+	},
 };
 
 static const struct snd_pci_quirk alc662_fixup_tbl[] = {
@@ -6668,8 +6684,9 @@
 	SND_PCI_QUIRK(0x1028, 0x0698, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800),
-	SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_BASS_1A),
+	SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50),
 	SND_PCI_QUIRK(0x1043, 0x13df, "Asus N550JX", ALC662_FIXUP_BASS_1A),
+	SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50),
 	SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_MODE4_CHMAP),
 	SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16),
 	SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16),

diff --git a/sound/usb/Kconfig b/sound/usb/Kconfig
index d14bf41..a452ad7 100644
--- a/sound/usb/Kconfig
+++ b/sound/usb/Kconfig

@@ -15,7 +15,6 @@
 	select SND_RAWMIDI
 	select SND_PCM
 	select BITREVERSE
-	select SND_USB_AUDIO_USE_MEDIA_CONTROLLER if MEDIA_CONTROLLER && (MEDIA_SUPPORT=y || MEDIA_SUPPORT=SND_USB_AUDIO)
 	help
 	  Say Y here to include support for USB audio and USB MIDI
 	  devices.
@@ -23,9 +22,6 @@
 	  To compile this driver as a module, choose M here: the module
 	  will be called snd-usb-audio.
 
-config SND_USB_AUDIO_USE_MEDIA_CONTROLLER
-	bool
-
 config SND_USB_UA101
 	tristate "Edirol UA-101/UA-1000 driver"
 	select SND_PCM

diff --git a/sound/usb/Makefile b/sound/usb/Makefile
index 8dca3c4..2d2d122 100644
--- a/sound/usb/Makefile
+++ b/sound/usb/Makefile

@@ -15,8 +15,6 @@
 			quirks.o \
 			stream.o
 
-snd-usb-audio-$(CONFIG_SND_USB_AUDIO_USE_MEDIA_CONTROLLER) += media.o
-
 snd-usbmidi-lib-objs := midi.o
 
 # Toplevel Module Dependency

diff --git a/sound/usb/card.c b/sound/usb/card.c
index 63244bb..3fc6358 100644
--- a/sound/usb/card.c
+++ b/sound/usb/card.c

@@ -66,7 +66,6 @@
 #include "format.h"
 #include "power.h"
 #include "stream.h"
-#include "media.h"
 
 MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>");
 MODULE_DESCRIPTION("USB Audio");
@@ -612,11 +611,6 @@
 	if (err < 0)
 		goto __error;
 
-	if (quirk->media_device) {
-		/* don't want to fail when media_snd_device_create() fails */
-		media_snd_device_create(chip, intf);
-	}
-
 	usb_chip[chip->index] = chip;
 	chip->num_interfaces++;
 	usb_set_intfdata(intf, chip);
@@ -673,14 +667,6 @@
 		list_for_each(p, &chip->midi_list) {
 			snd_usbmidi_disconnect(p);
 		}
-		/*
-		 * Nice to check quirk && quirk->media_device
-		 * need some special handlings. Doesn't look like
-		 * we have access to quirk here
-		 * Acceses mixer_list
-		*/
-		media_snd_device_delete(chip);
-
 		/* release mixer resources */
 		list_for_each_entry(mixer, &chip->mixer_list, list) {
 			snd_usb_mixer_disconnect(mixer);

diff --git a/sound/usb/card.h b/sound/usb/card.h
index 34a0898..71778ca 100644
--- a/sound/usb/card.h
+++ b/sound/usb/card.h

@@ -105,8 +105,6 @@
 	struct list_head list;
 };
 
-struct media_ctl;
-
 struct snd_usb_substream {
 	struct snd_usb_stream *stream;
 	struct usb_device *dev;
@@ -158,7 +156,6 @@
 	} dsd_dop;
 
 	bool trigger_tstamp_pending_update; /* trigger timestamp being updated from initial estimate */
-	struct media_ctl *media_ctl;
 };
 
 struct snd_usb_stream {

diff --git a/sound/usb/media.c b/sound/usb/media.c
deleted file mode 100644
index 93a50d01..0000000
--- a/sound/usb/media.c
+++ /dev/null

@@ -1,318 +0,0 @@
-/*
- * media.c - Media Controller specific ALSA driver code
- *
- * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com>
- * Copyright (c) 2016 Samsung Electronics Co., Ltd.
- *
- * This file is released under the GPLv2.
- */
-
-/*
- * This file adds Media Controller support to ALSA driver
- * to use the Media Controller API to share tuner with DVB
- * and V4L2 drivers that control media device. Media device
- * is created based on existing quirks framework. Using this
- * approach, the media controller API usage can be added for
- * a specific device.
-*/
-
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-#include <linux/usb.h>
-
-#include <sound/pcm.h>
-#include <sound/core.h>
-
-#include "usbaudio.h"
-#include "card.h"
-#include "mixer.h"
-#include "media.h"
-
-static int media_snd_enable_source(struct media_ctl *mctl)
-{
-	if (mctl && mctl->media_dev->enable_source)
-		return mctl->media_dev->enable_source(&mctl->media_entity,
-						      &mctl->media_pipe);
-	return 0;
-}
-
-static void media_snd_disable_source(struct media_ctl *mctl)
-{
-	if (mctl && mctl->media_dev->disable_source)
-		mctl->media_dev->disable_source(&mctl->media_entity);
-}
-
-int media_snd_stream_init(struct snd_usb_substream *subs, struct snd_pcm *pcm,
-			int stream)
-{
-	struct media_device *mdev;
-	struct media_ctl *mctl;
-	struct device *pcm_dev = &pcm->streams[stream].dev;
-	u32 intf_type;
-	int ret = 0;
-	u16 mixer_pad;
-	struct media_entity *entity;
-
-	mdev = subs->stream->chip->media_dev;
-	if (!mdev)
-		return -ENODEV;
-
-	if (subs->media_ctl)
-		return 0;
-
-	/* allocate media_ctl */
-	mctl = kzalloc(sizeof(*mctl), GFP_KERNEL);
-	if (!mctl)
-		return -ENOMEM;
-
-	mctl->media_dev = mdev;
-	if (stream == SNDRV_PCM_STREAM_PLAYBACK) {
-		intf_type = MEDIA_INTF_T_ALSA_PCM_PLAYBACK;
-		mctl->media_entity.function = MEDIA_ENT_F_AUDIO_PLAYBACK;
-		mctl->media_pad.flags = MEDIA_PAD_FL_SOURCE;
-		mixer_pad = 1;
-	} else {
-		intf_type = MEDIA_INTF_T_ALSA_PCM_CAPTURE;
-		mctl->media_entity.function = MEDIA_ENT_F_AUDIO_CAPTURE;
-		mctl->media_pad.flags = MEDIA_PAD_FL_SINK;
-		mixer_pad = 2;
-	}
-	mctl->media_entity.name = pcm->name;
-	media_entity_pads_init(&mctl->media_entity, 1, &mctl->media_pad);
-	ret =  media_device_register_entity(mctl->media_dev,
-					    &mctl->media_entity);
-	if (ret)
-		goto free_mctl;
-
-	mctl->intf_devnode = media_devnode_create(mdev, intf_type, 0,
-						  MAJOR(pcm_dev->devt),
-						  MINOR(pcm_dev->devt));
-	if (!mctl->intf_devnode) {
-		ret = -ENOMEM;
-		goto unregister_entity;
-	}
-	mctl->intf_link = media_create_intf_link(&mctl->media_entity,
-						 &mctl->intf_devnode->intf,
-						 MEDIA_LNK_FL_ENABLED);
-	if (!mctl->intf_link) {
-		ret = -ENOMEM;
-		goto devnode_remove;
-	}
-
-	/* create link between mixer and audio */
-	media_device_for_each_entity(entity, mdev) {
-		switch (entity->function) {
-		case MEDIA_ENT_F_AUDIO_MIXER:
-			ret = media_create_pad_link(entity, mixer_pad,
-						    &mctl->media_entity, 0,
-						    MEDIA_LNK_FL_ENABLED);
-			if (ret)
-				goto remove_intf_link;
-			break;
-		}
-	}
-
-	subs->media_ctl = mctl;
-	return 0;
-
-remove_intf_link:
-	media_remove_intf_link(mctl->intf_link);
-devnode_remove:
-	media_devnode_remove(mctl->intf_devnode);
-unregister_entity:
-	media_device_unregister_entity(&mctl->media_entity);
-free_mctl:
-	kfree(mctl);
-	return ret;
-}
-
-void media_snd_stream_delete(struct snd_usb_substream *subs)
-{
-	struct media_ctl *mctl = subs->media_ctl;
-
-	if (mctl && mctl->media_dev) {
-		struct media_device *mdev;
-
-		mdev = subs->stream->chip->media_dev;
-		if (mdev && media_devnode_is_registered(&mdev->devnode)) {
-			media_devnode_remove(mctl->intf_devnode);
-			media_device_unregister_entity(&mctl->media_entity);
-			media_entity_cleanup(&mctl->media_entity);
-		}
-		kfree(mctl);
-		subs->media_ctl = NULL;
-	}
-}
-
-int media_snd_start_pipeline(struct snd_usb_substream *subs)
-{
-	struct media_ctl *mctl = subs->media_ctl;
-
-	if (mctl)
-		return media_snd_enable_source(mctl);
-	return 0;
-}
-
-void media_snd_stop_pipeline(struct snd_usb_substream *subs)
-{
-	struct media_ctl *mctl = subs->media_ctl;
-
-	if (mctl)
-		media_snd_disable_source(mctl);
-}
-
-int media_snd_mixer_init(struct snd_usb_audio *chip)
-{
-	struct device *ctl_dev = &chip->card->ctl_dev;
-	struct media_intf_devnode *ctl_intf;
-	struct usb_mixer_interface *mixer;
-	struct media_device *mdev = chip->media_dev;
-	struct media_mixer_ctl *mctl;
-	u32 intf_type = MEDIA_INTF_T_ALSA_CONTROL;
-	int ret;
-
-	if (!mdev)
-		return -ENODEV;
-
-	ctl_intf = chip->ctl_intf_media_devnode;
-	if (!ctl_intf) {
-		ctl_intf = media_devnode_create(mdev, intf_type, 0,
-						MAJOR(ctl_dev->devt),
-						MINOR(ctl_dev->devt));
-		if (!ctl_intf)
-			return -ENOMEM;
-		chip->ctl_intf_media_devnode = ctl_intf;
-	}
-
-	list_for_each_entry(mixer, &chip->mixer_list, list) {
-
-		if (mixer->media_mixer_ctl)
-			continue;
-
-		/* allocate media_mixer_ctl */
-		mctl = kzalloc(sizeof(*mctl), GFP_KERNEL);
-		if (!mctl)
-			return -ENOMEM;
-
-		mctl->media_dev = mdev;
-		mctl->media_entity.function = MEDIA_ENT_F_AUDIO_MIXER;
-		mctl->media_entity.name = chip->card->mixername;
-		mctl->media_pad[0].flags = MEDIA_PAD_FL_SINK;
-		mctl->media_pad[1].flags = MEDIA_PAD_FL_SOURCE;
-		mctl->media_pad[2].flags = MEDIA_PAD_FL_SOURCE;
-		media_entity_pads_init(&mctl->media_entity, MEDIA_MIXER_PAD_MAX,
-				  mctl->media_pad);
-		ret =  media_device_register_entity(mctl->media_dev,
-						    &mctl->media_entity);
-		if (ret) {
-			kfree(mctl);
-			return ret;
-		}
-
-		mctl->intf_link = media_create_intf_link(&mctl->media_entity,
-							 &ctl_intf->intf,
-							 MEDIA_LNK_FL_ENABLED);
-		if (!mctl->intf_link) {
-			media_device_unregister_entity(&mctl->media_entity);
-			media_entity_cleanup(&mctl->media_entity);
-			kfree(mctl);
-			return -ENOMEM;
-		}
-		mctl->intf_devnode = ctl_intf;
-		mixer->media_mixer_ctl = mctl;
-	}
-	return 0;
-}
-
-static void media_snd_mixer_delete(struct snd_usb_audio *chip)
-{
-	struct usb_mixer_interface *mixer;
-	struct media_device *mdev = chip->media_dev;
-
-	if (!mdev)
-		return;
-
-	list_for_each_entry(mixer, &chip->mixer_list, list) {
-		struct media_mixer_ctl *mctl;
-
-		mctl = mixer->media_mixer_ctl;
-		if (!mixer->media_mixer_ctl)
-			continue;
-
-		if (media_devnode_is_registered(&mdev->devnode)) {
-			media_device_unregister_entity(&mctl->media_entity);
-			media_entity_cleanup(&mctl->media_entity);
-		}
-		kfree(mctl);
-		mixer->media_mixer_ctl = NULL;
-	}
-	if (media_devnode_is_registered(&mdev->devnode))
-		media_devnode_remove(chip->ctl_intf_media_devnode);
-	chip->ctl_intf_media_devnode = NULL;
-}
-
-int media_snd_device_create(struct snd_usb_audio *chip,
-			struct usb_interface *iface)
-{
-	struct media_device *mdev;
-	struct usb_device *usbdev = interface_to_usbdev(iface);
-	int ret;
-
-	mdev = media_device_get_devres(&usbdev->dev);
-	if (!mdev)
-		return -ENOMEM;
-	if (!mdev->dev) {
-		/* register media device */
-		mdev->dev = &usbdev->dev;
-		if (usbdev->product)
-			strlcpy(mdev->model, usbdev->product,
-				sizeof(mdev->model));
-		if (usbdev->serial)
-			strlcpy(mdev->serial, usbdev->serial,
-				sizeof(mdev->serial));
-		strcpy(mdev->bus_info, usbdev->devpath);
-		mdev->hw_revision = le16_to_cpu(usbdev->descriptor.bcdDevice);
-		media_device_init(mdev);
-	}
-	if (!media_devnode_is_registered(&mdev->devnode)) {
-		ret = media_device_register(mdev);
-		if (ret) {
-			dev_err(&usbdev->dev,
-				"Couldn't register media device. Error: %d\n",
-				ret);
-			return ret;
-		}
-	}
-
-	/* save media device - avoid lookups */
-	chip->media_dev = mdev;
-
-	/* Create media entities for mixer and control dev */
-	ret = media_snd_mixer_init(chip);
-	if (ret) {
-		dev_err(&usbdev->dev,
-			"Couldn't create media mixer entities. Error: %d\n",
-			ret);
-
-		/* clear saved media_dev */
-		chip->media_dev = NULL;
-
-		return ret;
-	}
-	return 0;
-}
-
-void media_snd_device_delete(struct snd_usb_audio *chip)
-{
-	struct media_device *mdev = chip->media_dev;
-
-	media_snd_mixer_delete(chip);
-
-	if (mdev) {
-		if (media_devnode_is_registered(&mdev->devnode))
-			media_device_unregister(mdev);
-		chip->media_dev = NULL;
-	}
-}

diff --git a/sound/usb/media.h b/sound/usb/media.h
deleted file mode 100644
index 1dcdcdc..0000000
--- a/sound/usb/media.h
+++ /dev/null

@@ -1,72 +0,0 @@
-/*
- * media.h - Media Controller specific ALSA driver code
- *
- * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com>
- * Copyright (c) 2016 Samsung Electronics Co., Ltd.
- *
- * This file is released under the GPLv2.
- */
-
-/*
- * This file adds Media Controller support to ALSA driver
- * to use the Media Controller API to share tuner with DVB
- * and V4L2 drivers that control media device. Media device
- * is created based on existing quirks framework. Using this
- * approach, the media controller API usage can be added for
- * a specific device.
-*/
-#ifndef __MEDIA_H
-
-#ifdef CONFIG_SND_USB_AUDIO_USE_MEDIA_CONTROLLER
-
-#include <media/media-device.h>
-#include <media/media-entity.h>
-#include <sound/asound.h>
-
-struct media_ctl {
-	struct media_device *media_dev;
-	struct media_entity media_entity;
-	struct media_intf_devnode *intf_devnode;
-	struct media_link *intf_link;
-	struct media_pad media_pad;
-	struct media_pipeline media_pipe;
-};
-
-/*
- * One source pad each for SNDRV_PCM_STREAM_CAPTURE and
- * SNDRV_PCM_STREAM_PLAYBACK. One for sink pad to link
- * to AUDIO Source
-*/
-#define MEDIA_MIXER_PAD_MAX    (SNDRV_PCM_STREAM_LAST + 2)
-
-struct media_mixer_ctl {
-	struct media_device *media_dev;
-	struct media_entity media_entity;
-	struct media_intf_devnode *intf_devnode;
-	struct media_link *intf_link;
-	struct media_pad media_pad[MEDIA_MIXER_PAD_MAX];
-	struct media_pipeline media_pipe;
-};
-
-int media_snd_device_create(struct snd_usb_audio *chip,
-			    struct usb_interface *iface);
-void media_snd_device_delete(struct snd_usb_audio *chip);
-int media_snd_stream_init(struct snd_usb_substream *subs, struct snd_pcm *pcm,
-			  int stream);
-void media_snd_stream_delete(struct snd_usb_substream *subs);
-int media_snd_start_pipeline(struct snd_usb_substream *subs);
-void media_snd_stop_pipeline(struct snd_usb_substream *subs);
-#else
-static inline int media_snd_device_create(struct snd_usb_audio *chip,
-					  struct usb_interface *iface)
-						{ return 0; }
-static inline void media_snd_device_delete(struct snd_usb_audio *chip) { }
-static inline int media_snd_stream_init(struct snd_usb_substream *subs,
-					struct snd_pcm *pcm, int stream)
-						{ return 0; }
-static inline void media_snd_stream_delete(struct snd_usb_substream *subs) { }
-static inline int media_snd_start_pipeline(struct snd_usb_substream *subs)
-					{ return 0; }
-static inline void media_snd_stop_pipeline(struct snd_usb_substream *subs) { }
-#endif
-#endif /* __MEDIA_H */

diff --git a/sound/usb/mixer.h b/sound/usb/mixer.h
index f378944..3417ef3 100644
--- a/sound/usb/mixer.h
+++ b/sound/usb/mixer.h

@@ -3,8 +3,6 @@
 
 #include <sound/info.h>
 
-struct media_mixer_ctl;
-
 struct usb_mixer_interface {
 	struct snd_usb_audio *chip;
 	struct usb_host_interface *hostif;
@@ -24,7 +22,6 @@
 	struct urb *rc_urb;
 	struct usb_ctrlrequest *rc_setup_packet;
 	u8 rc_buffer[6];
-	struct media_mixer_ctl *media_mixer_ctl;
 };
 
 #define MAX_CHANNELS	16	/* max logical channels */

diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index 0e4e0640..44d178e 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c

@@ -35,7 +35,6 @@
 #include "pcm.h"
 #include "clock.h"
 #include "power.h"
-#include "media.h"
 
 #define SUBSTREAM_FLAG_DATA_EP_STARTED	0
 #define SUBSTREAM_FLAG_SYNC_EP_STARTED	1
@@ -718,14 +717,10 @@
 	struct audioformat *fmt;
 	int ret;
 
-	ret = media_snd_start_pipeline(subs);
-	if (ret)
-		return ret;
-
 	ret = snd_pcm_lib_alloc_vmalloc_buffer(substream,
 					       params_buffer_bytes(hw_params));
 	if (ret < 0)
-		goto err_ret;
+		return ret;
 
 	subs->pcm_format = params_format(hw_params);
 	subs->period_bytes = params_period_bytes(hw_params);
@@ -739,27 +734,22 @@
 		dev_dbg(&subs->dev->dev,
 			"cannot set format: format = %#x, rate = %d, channels = %d\n",
 			   subs->pcm_format, subs->cur_rate, subs->channels);
-		ret = -EINVAL;
-		goto err_ret;
+		return -EINVAL;
 	}
 
 	ret = snd_usb_lock_shutdown(subs->stream->chip);
 	if (ret < 0)
-		goto err_ret;
+		return ret;
 	ret = set_format(subs, fmt);
 	snd_usb_unlock_shutdown(subs->stream->chip);
 	if (ret < 0)
-		goto err_ret;
+		return ret;
 
 	subs->interface = fmt->iface;
 	subs->altset_idx = fmt->altset_idx;
 	subs->need_setup_ep = true;
 
 	return 0;
-
-err_ret:
-	media_snd_stop_pipeline(subs);
-	return ret;
 }
 
 /*
@@ -771,7 +761,6 @@
 {
 	struct snd_usb_substream *subs = substream->runtime->private_data;
 
-	media_snd_stop_pipeline(subs);
 	subs->cur_audiofmt = NULL;
 	subs->cur_rate = 0;
 	subs->period_bytes = 0;
@@ -1232,7 +1221,6 @@
 	struct snd_usb_stream *as = snd_pcm_substream_chip(substream);
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct snd_usb_substream *subs = &as->substream[direction];
-	int ret;
 
 	subs->interface = -1;
 	subs->altset_idx = 0;
@@ -1246,12 +1234,7 @@
 	subs->dsd_dop.channel = 0;
 	subs->dsd_dop.marker = 1;
 
-	ret = setup_hw_info(runtime, subs);
-	if (ret == 0)
-		ret = media_snd_stream_init(subs, as->pcm, direction);
-	if (ret)
-		snd_usb_autosuspend(subs->stream->chip);
-	return ret;
+	return setup_hw_info(runtime, subs);
 }
 
 static int snd_usb_pcm_close(struct snd_pcm_substream *substream, int direction)
@@ -1260,7 +1243,6 @@
 	struct snd_usb_substream *subs = &as->substream[direction];
 
 	stop_endpoints(subs, true);
-	media_snd_stop_pipeline(subs);
 
 	if (subs->interface >= 0 &&
 	    !snd_usb_lock_shutdown(subs->stream->chip)) {

diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index 9d087b1..c60a776 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h

@@ -2886,7 +2886,6 @@
 		.product_name = pname, \
 		.ifnum = QUIRK_ANY_INTERFACE, \
 		.type = QUIRK_AUDIO_ALIGN_TRANSFER, \
-		.media_device = 1, \
 	} \
 }
 

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index fb62bce..6178bb5 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c

@@ -150,6 +150,7 @@
 		usb_audio_err(chip, "cannot memdup\n");
 		return -ENOMEM;
 	}
+	INIT_LIST_HEAD(&fp->list);
 	if (fp->nr_rates > MAX_NR_RATES) {
 		kfree(fp);
 		return -EINVAL;
@@ -193,6 +194,7 @@
 	return 0;
 
  error:
+	list_del(&fp->list); /* unlink for avoiding double-free */
 	kfree(fp);
 	kfree(rate_table);
 	return err;
@@ -469,6 +471,7 @@
 	fp->ep_attr = get_endpoint(alts, 0)->bmAttributes;
 	fp->datainterval = 0;
 	fp->maxpacksize = le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize);
+	INIT_LIST_HEAD(&fp->list);
 
 	switch (fp->maxpacksize) {
 	case 0x120:
@@ -492,6 +495,7 @@
 		? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK;
 	err = snd_usb_add_audio_stream(chip, stream, fp);
 	if (err < 0) {
+		list_del(&fp->list); /* unlink for avoiding double-free */
 		kfree(fp);
 		return err;
 	}

diff --git a/sound/usb/stream.c b/sound/usb/stream.c
index 51258a1..8e9548bc 100644
--- a/sound/usb/stream.c
+++ b/sound/usb/stream.c

@@ -36,7 +36,6 @@
 #include "format.h"
 #include "clock.h"
 #include "stream.h"
-#include "media.h"
 
 /*
  * free a substream
@@ -53,7 +52,6 @@
 		kfree(fp);
 	}
 	kfree(subs->rate_list.list);
-	media_snd_stream_delete(subs);
 }
 
 
@@ -316,7 +314,9 @@
 /*
  * add this endpoint to the chip instance.
  * if a stream with the same endpoint already exists, append to it.
- * if not, create a new pcm stream.
+ * if not, create a new pcm stream. note, fp is added to the substream
+ * fmt_list and will be freed on the chip instance release. do not free
+ * fp or do remove it from the substream fmt_list to avoid double-free.
  */
 int snd_usb_add_audio_stream(struct snd_usb_audio *chip,
 			     int stream,
@@ -677,6 +677,7 @@
 					* (fp->maxpacksize & 0x7ff);
 		fp->attributes = parse_uac_endpoint_attributes(chip, alts, protocol, iface_no);
 		fp->clock = clock;
+		INIT_LIST_HEAD(&fp->list);
 
 		/* some quirks for attributes here */
 
@@ -725,6 +726,7 @@
 		dev_dbg(&dev->dev, "%u:%d: add audio endpoint %#x\n", iface_no, altno, fp->endpoint);
 		err = snd_usb_add_audio_stream(chip, stream, fp);
 		if (err < 0) {
+			list_del(&fp->list); /* unlink for avoiding double-free */
 			kfree(fp->rate_table);
 			kfree(fp->chmap);
 			kfree(fp);

diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h
index a161c7c..b665d85 100644
--- a/sound/usb/usbaudio.h
+++ b/sound/usb/usbaudio.h

@@ -30,9 +30,6 @@
  *
  */
 
-struct media_device;
-struct media_intf_devnode;
-
 struct snd_usb_audio {
 	int index;
 	struct usb_device *dev;
@@ -63,8 +60,6 @@
 	bool autoclock;			/* from the 'autoclock' module param */
 
 	struct usb_host_interface *ctrl_intf;	/* the audio control interface */
-	struct media_device *media_dev;
-	struct media_intf_devnode *ctl_intf_media_devnode;
 };
 
 #define usb_audio_err(chip, fmt, args...) \
@@ -115,7 +110,6 @@
 	const char *product_name;
 	int16_t ifnum;
 	uint16_t type;
-	bool media_device;
 	const void *data;
 };
 

diff --git a/tools/include/linux/stringify.h b/tools/include/linux/stringify.h
new file mode 100644
index 0000000..841cec8
--- /dev/null
+++ b/tools/include/linux/stringify.h

@@ -0,0 +1,12 @@
+#ifndef __LINUX_STRINGIFY_H
+#define __LINUX_STRINGIFY_H
+
+/* Indirect stringification.  Doing two levels allows the parameter to be a
+ * macro itself.  For example, compile with -DFOO=bar, __stringify(FOO)
+ * converts to "bar".
+ */
+
+#define __stringify_1(x...)	#x
+#define __stringify(x...)	__stringify_1(x)
+
+#endif	/* !__LINUX_STRINGIFY_H */

diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index bbc82c6..316f308 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile

@@ -1,5 +1,5 @@
 include ../../scripts/Makefile.include
-include ../../perf/config/utilities.mak		# QUIET_CLEAN
+include ../../scripts/utilities.mak		# QUIET_CLEAN
 
 ifeq ($(srctree),)
 srctree := $(patsubst %/,%,$(dir $(shell pwd)))

diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh
index 5334ad9..1069d96 100755
--- a/tools/lib/lockdep/run_tests.sh
+++ b/tools/lib/lockdep/run_tests.sh

@@ -3,7 +3,7 @@
 make &> /dev/null
 
 for i in `ls tests/*.c`; do
-	testname=$(basename -s .c "$i")
+	testname=$(basename "$i" .c)
 	gcc -o tests/$testname -pthread -lpthread $i liblockdep.a -Iinclude -D__USE_LIBLOCKDEP &> /dev/null
 	echo -ne "$testname... "
 	if [ $(timeout 1 ./tests/$testname | wc -l) -gt 0 ]; then
@@ -11,11 +11,13 @@
 	else
 		echo "FAILED!"
 	fi
-	rm tests/$testname
+	if [ -f "tests/$testname" ]; then
+		rm tests/$testname
+	fi
 done
 
 for i in `ls tests/*.c`; do
-	testname=$(basename -s .c "$i")
+	testname=$(basename "$i" .c)
 	gcc -o tests/$testname -pthread -lpthread -Iinclude $i &> /dev/null
 	echo -ne "(PRELOAD) $testname... "
 	if [ $(timeout 1 ./lockdep ./tests/$testname | wc -l) -gt 0 ]; then
@@ -23,5 +25,7 @@
 	else
 		echo "FAILED!"
 	fi
-	rm tests/$testname
+	if [ -f "tests/$testname" ]; then
+		rm tests/$testname
+	fi
 done

diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile
index 1faecb8..a810370 100644
--- a/tools/lib/subcmd/Makefile
+++ b/tools/lib/subcmd/Makefile

@@ -1,5 +1,5 @@
 include ../../scripts/Makefile.include
-include ../../perf/config/utilities.mak		# QUIET_CLEAN
+include ../../scripts/utilities.mak		# QUIET_CLEAN
 
 ifeq ($(srctree),)
 srctree := $(patsubst %/,%,$(dir $(shell pwd)))

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 190cc88..a8b6357 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c

@@ -5427,10 +5427,8 @@
 	}
 
 	if (pevent->latency_format) {
-		trace_seq_printf(s, " %3d", record->cpu);
 		pevent_data_lat_fmt(pevent, s, record);
-	} else
-		trace_seq_printf(s, " [%03d]", record->cpu);
+	}
 
 	if (use_usec_format) {
 		if (pevent->flags & PEVENT_NSEC_OUTPUT) {

diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index 3ba1c0b..098cfb9 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile

@@ -1,5 +1,5 @@
 include ../../scripts/Makefile.include
-include ../config/utilities.mak
+include ../../scripts/utilities.mak
 
 MAN1_TXT= \
 	$(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \

diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 79483f4..ec723d0 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt

@@ -40,10 +40,12 @@
  0 - SAMPLE_IP can have arbitrary skid
  1 - SAMPLE_IP must have constant skid
  2 - SAMPLE_IP requested to have 0 skid
- 3 - SAMPLE_IP must have 0 skid
+ 3 - SAMPLE_IP must have 0 skid, or uses randomization to avoid
+     sample shadowing effects.
 
 For Intel systems precise event sampling is implemented with PEBS
-which supports up to precise-level 2.
+which supports up to precise-level 2, and precise level 3 for
+some special cases
 
 On AMD systems it is implemented using IBS (up to precise-level 2).
 The precise modifier works with event types 0x76 (cpu-cycles, CPU

diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 2e1fa23..8c8c6b9 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST

@@ -74,6 +74,7 @@
 arch/*/include/uapi/asm/perf_regs.h
 arch/*/lib/memcpy*.S
 arch/*/lib/memset*.S
+arch/*/include/asm/*features.h
 include/linux/poison.h
 include/linux/hw_breakpoint.h
 include/uapi/linux/perf_event.h

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 4a4fad4..000ea21 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf

@@ -3,7 +3,7 @@
 # The default target of this Makefile is...
 all:
 
-include config/utilities.mak
+include ../scripts/utilities.mak
 
 # Define V to have a more verbose compile.
 #

diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c
index 6c1b8a7..f8ccee1 100644
--- a/tools/perf/arch/powerpc/util/header.c
+++ b/tools/perf/arch/powerpc/util/header.c

@@ -3,9 +3,9 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-
-#include "../../util/header.h"
-#include "../../util/util.h"
+#include <linux/stringify.h>
+#include "header.h"
+#include "util.h"
 
 #define mfspr(rn)       ({unsigned long rval; \
 			 asm volatile("mfspr %0," __stringify(rn) \

diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index a50df86..579a592 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h

@@ -25,19 +25,17 @@
 # endif
 #endif
 
-extern int bench_numa(int argc, const char **argv, const char *prefix);
-extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
-extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
-extern int bench_mem_memcpy(int argc, const char **argv,
-			    const char *prefix __maybe_unused);
-extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
-extern int bench_futex_hash(int argc, const char **argv, const char *prefix);
-extern int bench_futex_wake(int argc, const char **argv, const char *prefix);
-extern int bench_futex_wake_parallel(int argc, const char **argv,
-				     const char *prefix);
-extern int bench_futex_requeue(int argc, const char **argv, const char *prefix);
+int bench_numa(int argc, const char **argv, const char *prefix);
+int bench_sched_messaging(int argc, const char **argv, const char *prefix);
+int bench_sched_pipe(int argc, const char **argv, const char *prefix);
+int bench_mem_memcpy(int argc, const char **argv, const char *prefix);
+int bench_mem_memset(int argc, const char **argv, const char *prefix);
+int bench_futex_hash(int argc, const char **argv, const char *prefix);
+int bench_futex_wake(int argc, const char **argv, const char *prefix);
+int bench_futex_wake_parallel(int argc, const char **argv, const char *prefix);
+int bench_futex_requeue(int argc, const char **argv, const char *prefix);
 /* pi futexes */
-extern int bench_futex_lock_pi(int argc, const char **argv, const char *prefix);
+int bench_futex_lock_pi(int argc, const char **argv, const char *prefix);
 
 #define BENCH_FORMAT_DEFAULT_STR	"default"
 #define BENCH_FORMAT_DEFAULT		0

diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h
index 57b4ed8..5aad2a9 100644
--- a/tools/perf/bench/mem-memcpy-arch.h
+++ b/tools/perf/bench/mem-memcpy-arch.h

@@ -2,7 +2,7 @@
 #ifdef HAVE_ARCH_X86_64_SUPPORT
 
 #define MEMCPY_FN(fn, name, desc)		\
-	extern void *fn(void *, const void *, size_t);
+	void *fn(void *, const void *, size_t);
 
 #include "mem-memcpy-x86-64-asm-def.h"
 

diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h
index 633800c..0d15786 100644
--- a/tools/perf/bench/mem-memset-arch.h
+++ b/tools/perf/bench/mem-memset-arch.h

@@ -2,7 +2,7 @@
 #ifdef HAVE_ARCH_X86_64_SUPPORT
 
 #define MEMSET_FN(fn, name, desc)		\
-	extern void *fn(void *, int, size_t);
+	void *fn(void *, int, size_t);
 
 #include "mem-memset-x86-64-asm-def.h"
 

diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 5049d63..7500d95 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c

@@ -293,7 +293,7 @@
 	if (node == -1)
 		return;
 
-	BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask));
+	BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8);
 	nodemask = 1L << node;
 
 	ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8);

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index cfe3663..8141583 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c

@@ -94,7 +94,7 @@
 	struct addr_location al;
 	int ret = 0;
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
+	if (machine__resolve(machine, &al, sample) < 0) {
 		pr_warning("problem processing %d event, skipping it.\n",
 			   event->header.type);
 		return -1;

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 4d72359..8053a8c 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c

@@ -330,7 +330,7 @@
 	struct hists *hists = evsel__hists(evsel);
 	int ret = -1;
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
+	if (machine__resolve(machine, &al, sample) < 0) {
 		pr_warning("problem processing %d event, skipping it.\n",
 			   event->header.type);
 		return -1;

diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 49d55e2..bc1de9b 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c

@@ -106,12 +106,14 @@
 
 	if (!check_emacsclient_version()) {
 		/* This works only with emacsclient version >= 22. */
-		struct strbuf man_page = STRBUF_INIT;
+		char *man_page;
 
 		if (!path)
 			path = "emacsclient";
-		strbuf_addf(&man_page, "(woman \"%s\")", page);
-		execlp(path, "emacsclient", "-e", man_page.buf, NULL);
+		if (asprintf(&man_page, "(woman \"%s\")", page) > 0) {
+			execlp(path, "emacsclient", "-e", man_page, NULL);
+			free(man_page);
+		}
 		warning("failed to exec '%s': %s", path,
 			strerror_r(errno, sbuf, sizeof(sbuf)));
 	}
@@ -122,7 +124,7 @@
 	const char *display = getenv("DISPLAY");
 
 	if (display && *display) {
-		struct strbuf man_page = STRBUF_INIT;
+		char *man_page;
 		const char *filename = "kfmclient";
 		char sbuf[STRERR_BUFSIZE];
 
@@ -141,8 +143,10 @@
 				filename = file;
 		} else
 			path = "kfmclient";
-		strbuf_addf(&man_page, "man:%s(1)", page);
-		execlp(path, filename, "newTab", man_page.buf, NULL);
+		if (asprintf(&man_page, "man:%s(1)", page) > 0) {
+			execlp(path, filename, "newTab", man_page, NULL);
+			free(man_page);
+		}
 		warning("failed to exec '%s': %s", path,
 			strerror_r(errno, sbuf, sizeof(sbuf)));
 	}
@@ -161,11 +165,13 @@
 
 static void exec_man_cmd(const char *cmd, const char *page)
 {
-	struct strbuf shell_cmd = STRBUF_INIT;
 	char sbuf[STRERR_BUFSIZE];
+	char *shell_cmd;
 
-	strbuf_addf(&shell_cmd, "%s %s", cmd, page);
-	execl("/bin/sh", "sh", "-c", shell_cmd.buf, NULL);
+	if (asprintf(&shell_cmd, "%s %s", cmd, page) > 0) {
+		execl("/bin/sh", "sh", "-c", shell_cmd, NULL);
+		free(shell_cmd);
+	}
 	warning("failed to exec '%s': %s", cmd,
 		strerror_r(errno, sbuf, sizeof(sbuf)));
 }
@@ -299,43 +305,33 @@
 		is_in_cmdlist(&other_cmds, s);
 }
 
-static const char *prepend(const char *prefix, const char *cmd)
-{
-	size_t pre_len = strlen(prefix);
-	size_t cmd_len = strlen(cmd);
-	char *p = malloc(pre_len + cmd_len + 1);
-	memcpy(p, prefix, pre_len);
-	strcpy(p + pre_len, cmd);
-	return p;
-}
-
 static const char *cmd_to_page(const char *perf_cmd)
 {
+	char *s;
+
 	if (!perf_cmd)
 		return "perf";
 	else if (!prefixcmp(perf_cmd, "perf"))
 		return perf_cmd;
-	else
-		return prepend("perf-", perf_cmd);
+
+	return asprintf(&s, "perf-%s", perf_cmd) < 0 ? NULL : s;
 }
 
 static void setup_man_path(void)
 {
-	struct strbuf new_path = STRBUF_INIT;
+	char *new_path;
 	const char *old_path = getenv("MANPATH");
 
 	/* We should always put ':' after our path. If there is no
 	 * old_path, the ':' at the end will let 'man' to try
 	 * system-wide paths after ours to find the manual page. If
 	 * there is old_path, we need ':' as delimiter. */
-	strbuf_addstr(&new_path, system_path(PERF_MAN_PATH));
-	strbuf_addch(&new_path, ':');
-	if (old_path)
-		strbuf_addstr(&new_path, old_path);
-
-	setenv("MANPATH", new_path.buf, 1);
-
-	strbuf_release(&new_path);
+	if (asprintf(&new_path, "%s:%s", system_path(PERF_MAN_PATH), old_path ?: "") > 0) {
+		setenv("MANPATH", new_path, 1);
+		free(new_path);
+	} else {
+		error("Unable to setup man path");
+	}
 }
 
 static void exec_viewer(const char *name, const char *page)
@@ -380,7 +376,7 @@
 	return -1;
 }
 
-static int get_html_page_path(struct strbuf *page_path, const char *page)
+static int get_html_page_path(char **page_path, const char *page)
 {
 	struct stat st;
 	const char *html_path = system_path(PERF_HTML_PATH);
@@ -392,10 +388,7 @@
 		return -1;
 	}
 
-	strbuf_init(page_path, 0);
-	strbuf_addf(page_path, "%s/%s.html", html_path, page);
-
-	return 0;
+	return asprintf(page_path, "%s/%s.html", html_path, page);
 }
 
 /*
@@ -413,12 +406,12 @@
 static int show_html_page(const char *perf_cmd)
 {
 	const char *page = cmd_to_page(perf_cmd);
-	struct strbuf page_path; /* it leaks but we exec bellow */
+	char *page_path; /* it leaks but we exec bellow */
 
-	if (get_html_page_path(&page_path, page) != 0)
+	if (get_html_page_path(&page_path, page) < 0)
 		return -1;
 
-	open_html(page_path.buf);
+	open_html(page_path);
 
 	return 0;
 }

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 7fa6866..d1a2d10 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c

@@ -131,8 +131,7 @@
 
 static s64 perf_event__repipe_auxtrace(struct perf_tool *tool,
 				       union perf_event *event,
-				       struct perf_session *session
-				       __maybe_unused)
+				       struct perf_session *session)
 {
 	struct perf_inject *inject = container_of(tool, struct perf_inject,
 						  tool);
@@ -417,9 +416,6 @@
 {
 	struct addr_location al;
 	struct thread *thread;
-	u8 cpumode;
-
-	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
 	thread = machine__findnew_thread(machine, sample->pid, sample->tid);
 	if (thread == NULL) {
@@ -428,7 +424,7 @@
 		goto repipe;
 	}
 
-	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, &al);
+	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al);
 
 	if (al.map != NULL) {
 		if (!al.map->dso->hit) {

diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 88aeac9..85db3be 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c

@@ -131,7 +131,7 @@
 	struct addr_location al;
 	const char *fmt;
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
+	if (machine__resolve(machine, &al, sample) < 0) {
 		fprintf(stderr, "problem processing %d event, skipping it.\n",
 				event->header.type);
 		return -1;

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 7eea49f..160ea23 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c

@@ -41,6 +41,7 @@
 
 #include <dlfcn.h>
 #include <linux/bitmap.h>
+#include <linux/stringify.h>
 
 struct report {
 	struct perf_tool	tool;
@@ -154,7 +155,7 @@
 	};
 	int ret = 0;
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
+	if (machine__resolve(machine, &al, sample) < 0) {
 		pr_debug("problem processing %d event, skipping it.\n",
 			 event->header.type);
 		return -1;

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 57f9a7e..3770c3d 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c

@@ -405,9 +405,7 @@
 	return 0;
 }
 
-static void print_sample_iregs(union perf_event *event __maybe_unused,
-			  struct perf_sample *sample,
-			  struct thread *thread __maybe_unused,
+static void print_sample_iregs(struct perf_sample *sample,
 			  struct perf_event_attr *attr)
 {
 	struct regs_dump *regs = &sample->intr_regs;
@@ -476,10 +474,7 @@
 	return br->flags.predicted ? 'P' : 'M';
 }
 
-static void print_sample_brstack(union perf_event *event __maybe_unused,
-			  struct perf_sample *sample,
-			  struct thread *thread __maybe_unused,
-			  struct perf_event_attr *attr __maybe_unused)
+static void print_sample_brstack(struct perf_sample *sample)
 {
 	struct branch_stack *br = sample->branch_stack;
 	u64 i;
@@ -498,14 +493,11 @@
 	}
 }
 
-static void print_sample_brstacksym(union perf_event *event __maybe_unused,
-			  struct perf_sample *sample,
-			  struct thread *thread __maybe_unused,
-			  struct perf_event_attr *attr __maybe_unused)
+static void print_sample_brstacksym(struct perf_sample *sample,
+				    struct thread *thread)
 {
 	struct branch_stack *br = sample->branch_stack;
 	struct addr_location alf, alt;
-	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	u64 i, from, to;
 
 	if (!(br && br->nr))
@@ -518,11 +510,11 @@
 		from = br->entries[i].from;
 		to   = br->entries[i].to;
 
-		thread__find_addr_map(thread, cpumode, MAP__FUNCTION, from, &alf);
+		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
 		if (alf.map)
 			alf.sym = map__find_symbol(alf.map, alf.addr, NULL);
 
-		thread__find_addr_map(thread, cpumode, MAP__FUNCTION, to, &alt);
+		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
 		if (alt.map)
 			alt.sym = map__find_symbol(alt.map, alt.addr, NULL);
 
@@ -538,8 +530,7 @@
 }
 
 
-static void print_sample_addr(union perf_event *event,
-			  struct perf_sample *sample,
+static void print_sample_addr(struct perf_sample *sample,
 			  struct thread *thread,
 			  struct perf_event_attr *attr)
 {
@@ -550,7 +541,7 @@
 	if (!sample_addr_correlates_sym(attr))
 		return;
 
-	perf_event__preprocess_sample_addr(event, sample, thread, &al);
+	thread__resolve(thread, &al, sample);
 
 	if (PRINT_FIELD(SYM)) {
 		printf(" ");
@@ -567,8 +558,7 @@
 	}
 }
 
-static void print_sample_bts(union perf_event *event,
-			     struct perf_sample *sample,
+static void print_sample_bts(struct perf_sample *sample,
 			     struct perf_evsel *evsel,
 			     struct thread *thread,
 			     struct addr_location *al)
@@ -598,7 +588,7 @@
 	    ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
 	     !output[attr->type].user_set)) {
 		printf(" => ");
-		print_sample_addr(event, sample, thread, attr);
+		print_sample_addr(sample, thread, attr);
 	}
 
 	if (print_srcline_last)
@@ -747,7 +737,7 @@
 	return printf("%-*s", maxlen, out);
 }
 
-static void process_event(struct perf_script *script, union perf_event *event,
+static void process_event(struct perf_script *script,
 			  struct perf_sample *sample, struct perf_evsel *evsel,
 			  struct addr_location *al)
 {
@@ -776,7 +766,7 @@
 		print_sample_flags(sample->flags);
 
 	if (is_bts_event(attr)) {
-		print_sample_bts(event, sample, evsel, thread, al);
+		print_sample_bts(sample, evsel, thread, al);
 		return;
 	}
 
@@ -784,7 +774,7 @@
 		event_format__print(evsel->tp_format, sample->cpu,
 				    sample->raw_data, sample->raw_size);
 	if (PRINT_FIELD(ADDR))
-		print_sample_addr(event, sample, thread, attr);
+		print_sample_addr(sample, thread, attr);
 
 	if (PRINT_FIELD(DATA_SRC))
 		data_src__printf(sample->data_src);
@@ -804,12 +794,12 @@
 	}
 
 	if (PRINT_FIELD(IREGS))
-		print_sample_iregs(event, sample, thread, attr);
+		print_sample_iregs(sample, attr);
 
 	if (PRINT_FIELD(BRSTACK))
-		print_sample_brstack(event, sample, thread, attr);
+		print_sample_brstack(sample);
 	else if (PRINT_FIELD(BRSTACKSYM))
-		print_sample_brstacksym(event, sample, thread, attr);
+		print_sample_brstacksym(sample, thread);
 
 	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
 		print_sample_bpf_output(sample);
@@ -905,7 +895,7 @@
 		return 0;
 	}
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
+	if (machine__resolve(machine, &al, sample) < 0) {
 		pr_err("problem processing %d event, skipping it.\n",
 		       event->header.type);
 		return -1;
@@ -920,7 +910,7 @@
 	if (scripting_ops)
 		scripting_ops->process_event(event, sample, evsel, &al);
 	else
-		process_event(scr, event, sample, evsel, &al);
+		process_event(scr, sample, evsel, &al);
 
 out_put:
 	addr_location__put(&al);

diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index bd7a775..40cc9bb 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c

@@ -489,7 +489,7 @@
 	if (!chain)
 		goto exit;
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
+	if (machine__resolve(machine, &al, sample) < 0) {
 		fprintf(stderr, "problem processing %d event, skipping it.\n",
 			event->header.type);
 		goto exit;

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 94af190..8332149 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c

@@ -67,6 +67,7 @@
 #include <sys/utsname.h>
 #include <sys/mman.h>
 
+#include <linux/stringify.h>
 #include <linux/types.h>
 
 static volatile int done;
@@ -728,7 +729,7 @@
 	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
 		top->exact_samples++;
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0)
+	if (machine__resolve(machine, &al, sample) < 0)
 		return;
 
 	if (!top->kptr_restrict_warned &&
@@ -809,7 +810,6 @@
 	struct perf_session *session = top->session;
 	union perf_event *event;
 	struct machine *machine;
-	u8 origin;
 	int ret;
 
 	while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
@@ -822,12 +822,10 @@
 		evsel = perf_evlist__id2evsel(session->evlist, sample.id);
 		assert(evsel != NULL);
 
-		origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-
 		if (event->header.type == PERF_RECORD_SAMPLE)
 			++top->samples;
 
-		switch (origin) {
+		switch (sample.cpumode) {
 		case PERF_RECORD_MISC_USER:
 			++top->us_samples;
 			if (top->hide_user_symbols)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 8dc98c5..93ac724 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c

@@ -2256,11 +2256,10 @@
 
 static int trace__pgfault(struct trace *trace,
 			  struct perf_evsel *evsel,
-			  union perf_event *event,
+			  union perf_event *event __maybe_unused,
 			  struct perf_sample *sample)
 {
 	struct thread *thread;
-	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	struct addr_location al;
 	char map_type = 'd';
 	struct thread_trace *ttrace;
@@ -2279,7 +2278,7 @@
 	if (trace->summary_only)
 		goto out;
 
-	thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
+	thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
 			      sample->ip, &al);
 
 	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
@@ -2292,11 +2291,11 @@
 
 	fprintf(trace->output, "] => ");
 
-	thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
+	thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
 				   sample->addr, &al);
 
 	if (!al.map) {
-		thread__find_addr_location(thread, cpumode,
+		thread__find_addr_location(thread, sample->cpumode,
 					   MAP__FUNCTION, sample->addr, &al);
 
 		if (al.map)

diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 3f871b5..41c24010 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h

@@ -7,38 +7,38 @@
 extern const char perf_usage_string[];
 extern const char perf_more_info_string[];
 
-extern void list_common_cmds_help(void);
-extern const char *help_unknown_cmd(const char *cmd);
-extern void prune_packed_objects(int);
-extern int read_line_with_nul(char *buf, int size, FILE *file);
-extern int check_pager_config(const char *cmd);
+void list_common_cmds_help(void);
+const char *help_unknown_cmd(const char *cmd);
+void prune_packed_objects(int);
+int read_line_with_nul(char *buf, int size, FILE *file);
+int check_pager_config(const char *cmd);
 
-extern int cmd_annotate(int argc, const char **argv, const char *prefix);
-extern int cmd_bench(int argc, const char **argv, const char *prefix);
-extern int cmd_buildid_cache(int argc, const char **argv, const char *prefix);
-extern int cmd_buildid_list(int argc, const char **argv, const char *prefix);
-extern int cmd_config(int argc, const char **argv, const char *prefix);
-extern int cmd_diff(int argc, const char **argv, const char *prefix);
-extern int cmd_evlist(int argc, const char **argv, const char *prefix);
-extern int cmd_help(int argc, const char **argv, const char *prefix);
-extern int cmd_sched(int argc, const char **argv, const char *prefix);
-extern int cmd_list(int argc, const char **argv, const char *prefix);
-extern int cmd_record(int argc, const char **argv, const char *prefix);
-extern int cmd_report(int argc, const char **argv, const char *prefix);
-extern int cmd_stat(int argc, const char **argv, const char *prefix);
-extern int cmd_timechart(int argc, const char **argv, const char *prefix);
-extern int cmd_top(int argc, const char **argv, const char *prefix);
-extern int cmd_script(int argc, const char **argv, const char *prefix);
-extern int cmd_version(int argc, const char **argv, const char *prefix);
-extern int cmd_probe(int argc, const char **argv, const char *prefix);
-extern int cmd_kmem(int argc, const char **argv, const char *prefix);
-extern int cmd_lock(int argc, const char **argv, const char *prefix);
-extern int cmd_kvm(int argc, const char **argv, const char *prefix);
-extern int cmd_test(int argc, const char **argv, const char *prefix);
-extern int cmd_trace(int argc, const char **argv, const char *prefix);
-extern int cmd_inject(int argc, const char **argv, const char *prefix);
-extern int cmd_mem(int argc, const char **argv, const char *prefix);
-extern int cmd_data(int argc, const char **argv, const char *prefix);
+int cmd_annotate(int argc, const char **argv, const char *prefix);
+int cmd_bench(int argc, const char **argv, const char *prefix);
+int cmd_buildid_cache(int argc, const char **argv, const char *prefix);
+int cmd_buildid_list(int argc, const char **argv, const char *prefix);
+int cmd_config(int argc, const char **argv, const char *prefix);
+int cmd_diff(int argc, const char **argv, const char *prefix);
+int cmd_evlist(int argc, const char **argv, const char *prefix);
+int cmd_help(int argc, const char **argv, const char *prefix);
+int cmd_sched(int argc, const char **argv, const char *prefix);
+int cmd_list(int argc, const char **argv, const char *prefix);
+int cmd_record(int argc, const char **argv, const char *prefix);
+int cmd_report(int argc, const char **argv, const char *prefix);
+int cmd_stat(int argc, const char **argv, const char *prefix);
+int cmd_timechart(int argc, const char **argv, const char *prefix);
+int cmd_top(int argc, const char **argv, const char *prefix);
+int cmd_script(int argc, const char **argv, const char *prefix);
+int cmd_version(int argc, const char **argv, const char *prefix);
+int cmd_probe(int argc, const char **argv, const char *prefix);
+int cmd_kmem(int argc, const char **argv, const char *prefix);
+int cmd_lock(int argc, const char **argv, const char *prefix);
+int cmd_kvm(int argc, const char **argv, const char *prefix);
+int cmd_test(int argc, const char **argv, const char *prefix);
+int cmd_trace(int argc, const char **argv, const char *prefix);
+int cmd_inject(int argc, const char **argv, const char *prefix);
+int cmd_mem(int argc, const char **argv, const char *prefix);
+int cmd_data(int argc, const char **argv, const char *prefix);
 
-extern int find_scripts(char **scripts_array, char **scripts_path_array);
+int find_scripts(char **scripts_array, char **scripts_path_array);
 #endif

diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index eca6a91..f7d7f5a 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile

@@ -109,7 +109,7 @@
   CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
 endif
 
-include $(src-perf)/config/utilities.mak
+include $(srctree)/tools/scripts/utilities.mak
 
 ifeq ($(call get-executable,$(FLEX)),)
   dummy := $(error Error: $(FLEX) is missing on this system, please install it)

diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index afc9ad0..abd3f0e 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c

@@ -293,7 +293,6 @@
 {
 	struct perf_sample sample;
 	struct thread *thread;
-	u8 cpumode;
 	int ret;
 
 	if (perf_evlist__parse_sample(evlist, event, &sample)) {
@@ -307,9 +306,7 @@
 		return -1;
 	}
 
-	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-
-	ret = read_object_code(sample.ip, READLEN, cpumode, thread, state);
+	ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread, state);
 	thread__put(thread);
 	return ret;
 }

diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 1c5c022..8f6eb85 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c

@@ -20,10 +20,10 @@
 
 static int mmap_handler(struct perf_tool *tool __maybe_unused,
 			union perf_event *event,
-			struct perf_sample *sample __maybe_unused,
+			struct perf_sample *sample,
 			struct machine *machine)
 {
-	return machine__process_mmap2_event(machine, event, NULL);
+	return machine__process_mmap2_event(machine, event, sample);
 }
 
 static int init_live_machine(struct machine *machine)

diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index 071a8b5..f55f4bd 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c

@@ -100,9 +100,11 @@
 	}
 
 	for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {
+		struct perf_sample sample = {
+			.cpumode = PERF_RECORD_MISC_USER,
+		};
 		union perf_event fake_mmap_event = {
 			.mmap = {
-				.header = { .misc = PERF_RECORD_MISC_USER, },
 				.pid = fake_mmap_info[i].pid,
 				.tid = fake_mmap_info[i].pid,
 				.start = fake_mmap_info[i].start,
@@ -114,7 +116,7 @@
 		strcpy(fake_mmap_event.mmap.filename,
 		       fake_mmap_info[i].filename);
 
-		machine__process_mmap_event(machine, &fake_mmap_event, NULL);
+		machine__process_mmap_event(machine, &fake_mmap_event, &sample);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) {

diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index ecf136c..ed5aa9e 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c

@@ -81,11 +81,6 @@
 	size_t i;
 
 	for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
-		const union perf_event event = {
-			.header = {
-				.misc = PERF_RECORD_MISC_USER,
-			},
-		};
 		struct hist_entry_iter iter = {
 			.evsel = evsel,
 			.sample	= &sample,
@@ -97,13 +92,13 @@
 		else
 			iter.ops = &hist_iter_normal;
 
+		sample.cpumode = PERF_RECORD_MISC_USER;
 		sample.pid = fake_samples[i].pid;
 		sample.tid = fake_samples[i].pid;
 		sample.ip = fake_samples[i].ip;
 		sample.callchain = (struct ip_callchain *)fake_callchains[i];
 
-		if (perf_event__preprocess_sample(&event, machine, &al,
-						  &sample) < 0)
+		if (machine__resolve(machine, &al, &sample) < 0)
 			goto out;
 
 		if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,

diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index 34b945a..b825d24 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c

@@ -58,11 +58,6 @@
 	 */
 	evlist__for_each(evlist, evsel) {
 		for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
-			const union perf_event event = {
-				.header = {
-					.misc = PERF_RECORD_MISC_USER,
-				},
-			};
 			struct hist_entry_iter iter = {
 				.evsel = evsel,
 				.sample = &sample,
@@ -76,12 +71,12 @@
 			hists->dso_filter = NULL;
 			hists->symbol_filter_str = NULL;
 
+			sample.cpumode = PERF_RECORD_MISC_USER;
 			sample.pid = fake_samples[i].pid;
 			sample.tid = fake_samples[i].pid;
 			sample.ip = fake_samples[i].ip;
 
-			if (perf_event__preprocess_sample(&event, machine, &al,
-							  &sample) < 0)
+			if (machine__resolve(machine, &al, &sample) < 0)
 				goto out;
 
 			al.socket = fake_samples[i].socket;

diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 64b257d..358324e 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c

@@ -76,17 +76,12 @@
 		struct hists *hists = evsel__hists(evsel);
 
 		for (k = 0; k < ARRAY_SIZE(fake_common_samples); k++) {
-			const union perf_event event = {
-				.header = {
-					.misc = PERF_RECORD_MISC_USER,
-				},
-			};
-
+			sample.cpumode = PERF_RECORD_MISC_USER;
 			sample.pid = fake_common_samples[k].pid;
 			sample.tid = fake_common_samples[k].pid;
 			sample.ip = fake_common_samples[k].ip;
-			if (perf_event__preprocess_sample(&event, machine, &al,
-							  &sample) < 0)
+
+			if (machine__resolve(machine, &al, &sample) < 0)
 				goto out;
 
 			he = __hists__add_entry(hists, &al, NULL,
@@ -102,17 +97,10 @@
 		}
 
 		for (k = 0; k < ARRAY_SIZE(fake_samples[i]); k++) {
-			const union perf_event event = {
-				.header = {
-					.misc = PERF_RECORD_MISC_USER,
-				},
-			};
-
 			sample.pid = fake_samples[i][k].pid;
 			sample.tid = fake_samples[i][k].pid;
 			sample.ip = fake_samples[i][k].ip;
-			if (perf_event__preprocess_sample(&event, machine, &al,
-							  &sample) < 0)
+			if (machine__resolve(machine, &al, &sample) < 0)
 				goto out;
 
 			he = __hists__add_entry(hists, &al, NULL,

diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index 23cce67..d3556fb 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c

@@ -51,11 +51,6 @@
 	size_t i;
 
 	for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
-		const union perf_event event = {
-			.header = {
-				.misc = PERF_RECORD_MISC_USER,
-			},
-		};
 		struct hist_entry_iter iter = {
 			.evsel = evsel,
 			.sample = &sample,
@@ -63,13 +58,13 @@
 			.hide_unresolved = false,
 		};
 
+		sample.cpumode = PERF_RECORD_MISC_USER;
 		sample.cpu = fake_samples[i].cpu;
 		sample.pid = fake_samples[i].pid;
 		sample.tid = fake_samples[i].pid;
 		sample.ip = fake_samples[i].ip;
 
-		if (perf_event__preprocess_sample(&event, machine, &al,
-						  &sample) < 0)
+		if (machine__resolve(machine, &al, &sample) < 0)
 			goto out;
 
 		if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,

diff --git a/tools/perf/tests/perf-targz-src-pkg b/tools/perf/tests/perf-targz-src-pkg
index 238aa39..f2d9c5f 100755
--- a/tools/perf/tests/perf-targz-src-pkg
+++ b/tools/perf/tests/perf-targz-src-pkg

@@ -15,7 +15,7 @@
 tar xf ${TARBALL} -C $TMP_DEST
 rm -f ${TARBALL}
 cd - > /dev/null
-make -C $TMP_DEST/perf*/tools/perf > /dev/null 2>&1
+make -C $TMP_DEST/perf*/tools/perf > /dev/null
 RC=$?
 rm -rf ${TMP_DEST}
 exit $RC

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 4b98165..2a83414 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c

@@ -337,7 +337,7 @@
 	chain = list_entry(node->val.next, struct callchain_list, list);
 	chain->has_children = has_sibling;
 
-	if (node->val.next != node->val.prev) {
+	if (!list_empty(&node->val)) {
 		chain = list_entry(node->val.prev, struct callchain_list, list);
 		chain->has_children = !RB_EMPTY_ROOT(&node->rb_root);
 	}

diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index bd9bf7e..2aa45b6 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c

@@ -55,7 +55,7 @@
 	return he->stat_acc->_field;						\
 }										\
 										\
-static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,	\
+static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt,		\
 				       struct perf_hpp *hpp,			\
 				       struct hist_entry *he)			\
 {										\

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index eea25e2..da48fd8 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build

@@ -1,4 +1,3 @@
-libperf-y += abspath.o
 libperf-y += alias.o
 libperf-y += annotate.o
 libperf-y += build-id.o

diff --git a/tools/perf/util/abspath.c b/tools/perf/util/abspath.c
deleted file mode 100644
index 0e76aff..0000000
--- a/tools/perf/util/abspath.c
+++ /dev/null

@@ -1,37 +0,0 @@
-#include "cache.h"
-
-static const char *get_pwd_cwd(void)
-{
-	static char cwd[PATH_MAX + 1];
-	char *pwd;
-	struct stat cwd_stat, pwd_stat;
-	if (getcwd(cwd, PATH_MAX) == NULL)
-		return NULL;
-	pwd = getenv("PWD");
-	if (pwd && strcmp(pwd, cwd)) {
-		stat(cwd, &cwd_stat);
-		if (!stat(pwd, &pwd_stat) &&
-		    pwd_stat.st_dev == cwd_stat.st_dev &&
-		    pwd_stat.st_ino == cwd_stat.st_ino) {
-			strlcpy(cwd, pwd, PATH_MAX);
-		}
-	}
-	return cwd;
-}
-
-const char *make_nonrelative_path(const char *path)
-{
-	static char buf[PATH_MAX + 1];
-
-	if (is_absolute_path(path)) {
-		if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
-			die("Too long path: %.*s", 60, path);
-	} else {
-		const char *cwd = get_pwd_cwd();
-		if (!cwd)
-			die("Cannot determine the current working directory");
-		if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX)
-			die("Too long path: %.*s", 60, path);
-	}
-	return buf;
-}

diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index cea323d..9241f8c 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h

@@ -158,7 +158,7 @@
 
 int hist_entry__annotate(struct hist_entry *he, size_t privsize);
 
-int symbol__annotate_init(struct map *map __maybe_unused, struct symbol *sym);
+int symbol__annotate_init(struct map *map, struct symbol *sym);
 int symbol__annotate_printf(struct symbol *sym, struct map *map,
 			    struct perf_evsel *evsel, bool full_paths,
 			    int min_pcnt, int max_lines, int context);

diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index e5a8e2d..57ff31e 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h

@@ -517,7 +517,7 @@
 
 static inline struct auxtrace_record *
 auxtrace_record__init(struct perf_evlist *evlist __maybe_unused,
-		      int *err __maybe_unused)
+		      int *err)
 {
 	*err = 0;
 	return NULL;

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index f1479ee..0573c2e 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c

@@ -28,7 +28,6 @@
 			   struct machine *machine)
 {
 	struct addr_location al;
-	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	struct thread *thread = machine__findnew_thread(machine, sample->pid,
 							sample->tid);
 
@@ -38,7 +37,7 @@
 		return -1;
 	}
 
-	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, &al);
+	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al);
 
 	if (al.map != NULL)
 		al.map->dso->hit = 1;

diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 3ca453f..1f5a93c 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h

@@ -26,14 +26,14 @@
 extern const char *config_exclusive_filename;
 
 typedef int (*config_fn_t)(const char *, const char *, void *);
-extern int perf_default_config(const char *, const char *, void *);
-extern int perf_config(config_fn_t fn, void *);
-extern int perf_config_int(const char *, const char *);
-extern u64 perf_config_u64(const char *, const char *);
-extern int perf_config_bool(const char *, const char *);
-extern int config_error_nonbool(const char *);
-extern const char *perf_config_dirname(const char *, const char *);
-extern const char *perf_etc_perfconfig(void);
+int perf_default_config(const char *, const char *, void *);
+int perf_config(config_fn_t fn, void *);
+int perf_config_int(const char *, const char *);
+u64 perf_config_u64(const char *, const char *);
+int perf_config_bool(const char *, const char *);
+int config_error_nonbool(const char *);
+const char *perf_config_dirname(const char *, const char *);
+const char *perf_etc_perfconfig(void);
 
 char *alias_lookup(const char *alias);
 int split_cmdline(char *cmdline, const char ***argv);
@@ -64,13 +64,9 @@
 	return path[0] == '/';
 }
 
-const char *make_nonrelative_path(const char *path);
 char *strip_path_suffix(const char *path, const char *suffix);
 
-extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
-extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
-
-extern char *perf_pathdup(const char *fmt, ...)
-	__attribute__((format (printf, 1, 2)));
+char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
+char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
 
 #endif /* __PERF_CACHE_H */

diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 18dd222..d2a9e69 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h

@@ -220,7 +220,7 @@
 			bool hide_unresolved);
 
 extern const char record_callchain_help[];
-extern int parse_callchain_record(const char *arg, struct callchain_param *param);
+int parse_callchain_record(const char *arg, struct callchain_param *param);
 int parse_callchain_record_opt(const char *arg, struct callchain_param *param);
 int parse_callchain_report_opt(const char *arg);
 int parse_callchain_top_opt(const char *arg);
@@ -236,7 +236,7 @@
 }
 
 #ifdef HAVE_SKIP_CALLCHAIN_IDX
-extern int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain);
+int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain);
 #else
 static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
 			struct ip_callchain *chain __maybe_unused)

diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index b4b8cb4..31f8dcd 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h

@@ -13,7 +13,7 @@
 
 
 extern int nr_cgroups; /* number of explicit cgroups defined */
-extern void close_cgroup(struct cgroup_sel *cgrp);
-extern int parse_cgroups(const struct option *opt, const char *str, int unset);
+void close_cgroup(struct cgroup_sel *cgrp);
+int parse_cgroups(const struct option *opt, const char *str, int unset);
 
 #endif /* __CGROUP_H__ */

diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h
index 3bee677..d0d4659 100644
--- a/tools/perf/util/cloexec.h
+++ b/tools/perf/util/cloexec.h

@@ -5,7 +5,7 @@
 
 #ifdef __GLIBC_PREREQ
 #if !__GLIBC_PREREQ(2, 6) && !defined(__UCLIBC__)
-extern int sched_getcpu(void) __THROW;
+int sched_getcpu(void) __THROW;
 #endif
 #endif
 

diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 811af89..bbf69d2 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c

@@ -632,7 +632,7 @@
 }
 
 static int process_sample_event(struct perf_tool *tool,
-				union perf_event *_event __maybe_unused,
+				union perf_event *_event,
 				struct perf_sample *sample,
 				struct perf_evsel *evsel,
 				struct machine *machine __maybe_unused)

diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index 1c9689e..049438d 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c

@@ -333,7 +333,7 @@
 	    sample_addr_correlates_sym(&evsel->attr)) {
 		struct addr_location addr_al;
 
-		perf_event__preprocess_sample_addr(event, sample, thread, &addr_al);
+		thread__resolve(thread, &addr_al, sample);
 		err = db_ids_from_al(dbe, &addr_al, &es.addr_dso_db_id,
 				     &es.addr_sym_db_id, &es.addr_offset);
 		if (err)

diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 45ec4d0..0953280 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h

@@ -162,6 +162,7 @@
 	u8		 loaded;
 	u8		 rel;
 	u8		 build_id[BUILD_ID_SIZE];
+	u64		 text_offset;
 	const char	 *short_name;
 	const char	 *long_name;
 	u16		 long_name_len;
@@ -301,7 +302,7 @@
  * TODO
 */
 int dso__data_get_fd(struct dso *dso, struct machine *machine);
-void dso__data_put_fd(struct dso *dso __maybe_unused);
+void dso__data_put_fd(struct dso *dso);
 void dso__data_close(struct dso *dso);
 
 off_t dso__data_size(struct dso *dso, struct machine *machine);

diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index a509aa84..577e600 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c

@@ -915,7 +915,7 @@
 		tmp = "*";
 	else if (tag == DW_TAG_subroutine_type) {
 		/* Function pointer */
-		strbuf_addf(buf, "(function_type)");
+		strbuf_add(buf, "(function_type)", 15);
 		return 0;
 	} else {
 		if (!dwarf_diename(&type))
@@ -932,7 +932,7 @@
 	}
 	ret = die_get_typename(&type, buf);
 	if (ret == 0)
-		strbuf_addf(buf, "%s", tmp);
+		strbuf_addstr(buf, tmp);
 
 	return ret;
 }
@@ -951,7 +951,7 @@
 	ret = die_get_typename(vr_die, buf);
 	if (ret < 0) {
 		pr_debug("Failed to get type, make it unknown.\n");
-		strbuf_addf(buf, "(unknown_type)");
+		strbuf_add(buf, " (unknown_type)", 14);
 	}
 
 	strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
@@ -1013,7 +1013,7 @@
 	}
 
 	if (!first)
-		strbuf_addf(buf, "]>");
+		strbuf_add(buf, "]>", 2);
 
 out:
 	free(scopes);
@@ -1076,7 +1076,7 @@
 	}
 
 	if (!first)
-		strbuf_addf(buf, "]>");
+		strbuf_add(buf, "]>", 2);
 
 	return ret;
 }

diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index c42ec36..dc0ce1a 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h

@@ -25,48 +25,48 @@
 #include <elfutils/version.h>
 
 /* Find the realpath of the target file */
-extern const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname);
+const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname);
 
 /* Get DW_AT_comp_dir (should be NULL with older gcc) */
-extern const char *cu_get_comp_dir(Dwarf_Die *cu_die);
+const char *cu_get_comp_dir(Dwarf_Die *cu_die);
 
 /* Get a line number and file name for given address */
-extern int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
-			    const char **fname, int *lineno);
+int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
+		     const char **fname, int *lineno);
 
 /* Walk on funcitons at given address */
-extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
-			int (*callback)(Dwarf_Die *, void *), void *data);
+int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
+			 int (*callback)(Dwarf_Die *, void *), void *data);
 
 /* Ensure that this DIE is a subprogram and definition (not declaration) */
-extern bool die_is_func_def(Dwarf_Die *dw_die);
+bool die_is_func_def(Dwarf_Die *dw_die);
 
 /* Ensure that this DIE is an instance of a subprogram */
-extern bool die_is_func_instance(Dwarf_Die *dw_die);
+bool die_is_func_instance(Dwarf_Die *dw_die);
 
 /* Compare diename and tname */
-extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
+bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
 
 /* Matching diename with glob pattern */
-extern bool die_match_name(Dwarf_Die *dw_die, const char *glob);
+bool die_match_name(Dwarf_Die *dw_die, const char *glob);
 
 /* Get callsite line number of inline-function instance */
-extern int die_get_call_lineno(Dwarf_Die *in_die);
+int die_get_call_lineno(Dwarf_Die *in_die);
 
 /* Get callsite file name of inlined function instance */
-extern const char *die_get_call_file(Dwarf_Die *in_die);
+const char *die_get_call_file(Dwarf_Die *in_die);
 
 /* Get type die */
-extern Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
+Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
 
 /* Get a type die, but skip qualifiers and typedef */
-extern Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
+Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
 
 /* Check whether the DIE is signed or not */
-extern bool die_is_signed_type(Dwarf_Die *tp_die);
+bool die_is_signed_type(Dwarf_Die *tp_die);
 
 /* Get data_member_location offset */
-extern int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs);
+int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs);
 
 /* Return values for die_find_child() callbacks */
 enum {
@@ -77,29 +77,29 @@
 };
 
 /* Search child DIEs */
-extern Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
-				 int (*callback)(Dwarf_Die *, void *),
-				 void *data, Dwarf_Die *die_mem);
+Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
+			 int (*callback)(Dwarf_Die *, void *),
+			 void *data, Dwarf_Die *die_mem);
 
 /* Search a non-inlined function including given address */
-extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
-				    Dwarf_Die *die_mem);
+Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+			     Dwarf_Die *die_mem);
 
 /* Search a non-inlined function with tail call at given address */
 Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
 				    Dwarf_Die *die_mem);
 
 /* Search the top inlined function including given address */
-extern Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
-					  Dwarf_Die *die_mem);
+Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+				   Dwarf_Die *die_mem);
 
 /* Search the deepest inlined function including given address */
-extern Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
-				      Dwarf_Die *die_mem);
+Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+			       Dwarf_Die *die_mem);
 
 /* Walk on the instances of given DIE */
-extern int die_walk_instances(Dwarf_Die *in_die,
-			      int (*callback)(Dwarf_Die *, void *), void *data);
+int die_walk_instances(Dwarf_Die *in_die,
+		       int (*callback)(Dwarf_Die *, void *), void *data);
 
 /* Walker on lines (Note: line number will not be sorted) */
 typedef int (* line_walk_callback_t) (const char *fname, int lineno,
@@ -109,22 +109,20 @@
  * Walk on lines inside given DIE. If the DIE is a subprogram, walk only on
  * the lines inside the subprogram, otherwise the DIE must be a CU DIE.
  */
-extern int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback,
-			  void *data);
+int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data);
 
 /* Find a variable called 'name' at given address */
-extern Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
-				       Dwarf_Addr addr, Dwarf_Die *die_mem);
+Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
+				Dwarf_Addr addr, Dwarf_Die *die_mem);
 
 /* Find a member called 'name' */
-extern Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
-				  Dwarf_Die *die_mem);
+Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
+			   Dwarf_Die *die_mem);
 
 /* Get the name of given variable DIE */
-extern int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf);
+int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf);
 
 /* Get the name and type of given variable DIE, stored as "type\tname" */
-extern int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf);
-extern int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die,
-			struct strbuf *buf);
+int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf);
+int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf);
 #endif

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 7bad5c3..dad55d0 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c

@@ -56,13 +56,22 @@
 	return perf_event__names[id];
 }
 
-static struct perf_sample synth_sample = {
+static int perf_tool__process_synth_event(struct perf_tool *tool,
+					  union perf_event *event,
+					  struct machine *machine,
+					  perf_event__handler_t process)
+{
+	struct perf_sample synth_sample = {
 	.pid	   = -1,
 	.tid	   = -1,
 	.time	   = -1,
 	.stream_id = -1,
 	.cpu	   = -1,
 	.period	   = 1,
+	.cpumode   = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK,
+	};
+
+	return process(tool, event, &synth_sample, machine);
 };
 
 /*
@@ -186,7 +195,7 @@
 	if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0)
 		return -1;
 
-	if (process(tool, event, &synth_sample, machine) != 0)
+	if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
 		return -1;
 
 	return tgid;
@@ -218,7 +227,7 @@
 
 	event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);
 
-	if (process(tool, event, &synth_sample, machine) != 0)
+	if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
 		return -1;
 
 	return 0;
@@ -344,7 +353,7 @@
 		event->mmap2.pid = tgid;
 		event->mmap2.tid = pid;
 
-		if (process(tool, event, &synth_sample, machine) != 0) {
+		if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
 			rc = -1;
 			break;
 		}
@@ -402,7 +411,7 @@
 
 		memcpy(event->mmap.filename, pos->dso->long_name,
 		       pos->dso->long_name_len + 1);
-		if (process(tool, event, &synth_sample, machine) != 0) {
+		if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
 			rc = -1;
 			break;
 		}
@@ -472,7 +481,7 @@
 		/*
 		 * Send the prepared comm event
 		 */
-		if (process(tool, comm_event, &synth_sample, machine) != 0)
+		if (perf_tool__process_synth_event(tool, comm_event, machine, process) != 0)
 			break;
 
 		rc = 0;
@@ -701,7 +710,7 @@
 	event->mmap.len   = map->end - event->mmap.start;
 	event->mmap.pid   = machine->pid;
 
-	err = process(tool, event, &synth_sample, machine);
+	err = perf_tool__process_synth_event(tool, event, machine, process);
 	free(event);
 
 	return err;
@@ -1295,12 +1304,9 @@
  * Callers need to drop the reference to al->thread, obtained in
  * machine__findnew_thread()
  */
-int perf_event__preprocess_sample(const union perf_event *event,
-				  struct machine *machine,
-				  struct addr_location *al,
-				  struct perf_sample *sample)
+int machine__resolve(struct machine *machine, struct addr_location *al,
+		     struct perf_sample *sample)
 {
-	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	struct thread *thread = machine__findnew_thread(machine, sample->pid,
 							sample->tid);
 
@@ -1315,11 +1321,11 @@
 	 * events, but for older perf.data files there was no such thing, so do
 	 * it now.
 	 */
-	if (cpumode == PERF_RECORD_MISC_KERNEL &&
+	if (sample->cpumode == PERF_RECORD_MISC_KERNEL &&
 	    machine__kernel_map(machine) == NULL)
 		machine__create_kernel_maps(machine);
 
-	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, al);
+	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al);
 	dump_printf(" ...... dso: %s\n",
 		    al->map ? al->map->dso->long_name :
 			al->level == 'H' ? "[hypervisor]" : "<not found>");
@@ -1395,16 +1401,12 @@
 	return false;
 }
 
-void perf_event__preprocess_sample_addr(union perf_event *event,
-					struct perf_sample *sample,
-					struct thread *thread,
-					struct addr_location *al)
+void thread__resolve(struct thread *thread, struct addr_location *al,
+		     struct perf_sample *sample)
 {
-	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-
-	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->addr, al);
+	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->addr, al);
 	if (!al->map)
-		thread__find_addr_map(thread, cpumode, MAP__VARIABLE,
+		thread__find_addr_map(thread, sample->cpumode, MAP__VARIABLE,
 				      sample->addr, al);
 
 	al->cpu = sample->cpu;

diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index b7ffb7e..6bb1c92 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h

@@ -192,6 +192,7 @@
 	u64 data_src;
 	u32 flags;
 	u16 insn_len;
+	u8  cpumode;
 	void *raw_data;
 	struct ip_callchain *callchain;
 	struct branch_stack *branch_stack;
@@ -597,10 +598,8 @@
 
 struct addr_location;
 
-int perf_event__preprocess_sample(const union perf_event *event,
-				  struct machine *machine,
-				  struct addr_location *al,
-				  struct perf_sample *sample);
+int machine__resolve(struct machine *machine, struct addr_location *al,
+		     struct perf_sample *sample);
 
 void addr_location__put(struct addr_location *al);
 
@@ -608,10 +607,8 @@
 
 bool is_bts_event(struct perf_event_attr *attr);
 bool sample_addr_correlates_sym(struct perf_event_attr *attr);
-void perf_event__preprocess_sample_addr(union perf_event *event,
-					struct perf_sample *sample,
-					struct thread *thread,
-					struct addr_location *al);
+void thread__resolve(struct thread *thread, struct addr_location *al,
+		     struct perf_sample *sample);
 
 const char *perf_event__name(unsigned int id);
 

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 0902fe4..738ce22 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c

@@ -1643,6 +1643,7 @@
 	data->stream_id = data->id = data->time = -1ULL;
 	data->period = evsel->attr.sample_period;
 	data->weight = 0;
+	data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
 	if (event->header.type != PERF_RECORD_SAMPLE) {
 		if (!evsel->attr.sample_id_all)

diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
index 45bf9c6..2fbeb59 100644
--- a/tools/perf/util/genelf.h
+++ b/tools/perf/util/genelf.h

@@ -2,45 +2,39 @@
 #define __GENELF_H__
 
 /* genelf.c */
-extern int jit_write_elf(int fd, uint64_t code_addr, const char *sym,
-			 const void *code, int csize,
-			 void *debug, int nr_debug_entries);
+int jit_write_elf(int fd, uint64_t code_addr, const char *sym,
+		  const void *code, int csize, void *debug, int nr_debug_entries);
 /* genelf_debug.c */
-extern int jit_add_debug_info(Elf *e, uint64_t code_addr,
-			      void *debug, int nr_debug_entries);
+int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries);
 
 #if   defined(__arm__)
 #define GEN_ELF_ARCH	EM_ARM
-#define GEN_ELF_ENDIAN	ELFDATA2LSB
 #define GEN_ELF_CLASS	ELFCLASS32
 #elif defined(__aarch64__)
 #define GEN_ELF_ARCH	EM_AARCH64
-#define GEN_ELF_ENDIAN	ELFDATA2LSB
 #define GEN_ELF_CLASS	ELFCLASS64
 #elif defined(__x86_64__)
 #define GEN_ELF_ARCH	EM_X86_64
-#define GEN_ELF_ENDIAN	ELFDATA2LSB
 #define GEN_ELF_CLASS	ELFCLASS64
 #elif defined(__i386__)
 #define GEN_ELF_ARCH	EM_386
-#define GEN_ELF_ENDIAN	ELFDATA2LSB
 #define GEN_ELF_CLASS	ELFCLASS32
-#elif defined(__ppcle__)
-#define GEN_ELF_ARCH	EM_PPC
-#define GEN_ELF_ENDIAN	ELFDATA2LSB
+#elif defined(__powerpc64__)
+#define GEN_ELF_ARCH	EM_PPC64
 #define GEN_ELF_CLASS	ELFCLASS64
 #elif defined(__powerpc__)
-#define GEN_ELF_ARCH	EM_PPC64
-#define GEN_ELF_ENDIAN	ELFDATA2MSB
-#define GEN_ELF_CLASS	ELFCLASS64
-#elif defined(__powerpcle__)
-#define GEN_ELF_ARCH	EM_PPC64
-#define GEN_ELF_ENDIAN	ELFDATA2LSB
-#define GEN_ELF_CLASS	ELFCLASS64
+#define GEN_ELF_ARCH	EM_PPC
+#define GEN_ELF_CLASS	ELFCLASS32
 #else
 #error "unsupported architecture"
 #endif
 
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define GEN_ELF_ENDIAN	ELFDATA2MSB
+#else
+#define GEN_ELF_ENDIAN	ELFDATA2LSB
+#endif
+
 #if GEN_ELF_CLASS == ELFCLASS64
 #define elf_newehdr	elf64_newehdr
 #define elf_getshdr	elf64_getshdr

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 73e38e4..90680ec 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c

@@ -1872,11 +1872,6 @@
 		if (ph->needs_swap)
 			nr = bswap_32(nr);
 
-		if (nr > (u32)cpu_nr) {
-			pr_debug("core_id number is too big."
-				 "You may need to upgrade the perf tool.\n");
-			goto free_cpu;
-		}
 		ph->env.cpu[i].core_id = nr;
 
 		ret = readn(fd, &nr, sizeof(nr));

diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 3d87ca8..d306ca1 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h

@@ -121,7 +121,7 @@
 					     perf_event__handler_t process);
 int perf_event__process_attr(struct perf_tool *tool, union perf_event *event,
 			     struct perf_evlist **pevlist);
-int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
+int perf_event__process_event_update(struct perf_tool *tool,
 				     union perf_event *event,
 				     struct perf_evlist **pevlist);
 size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp);

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 290b3cb..31c4641 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c

@@ -670,7 +670,7 @@
 }
 
 static int
-iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused,
+iter_add_single_branch_entry(struct hist_entry_iter *iter,
 			     struct addr_location *al __maybe_unused)
 {
 	/* to avoid calling callback function */

diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ead18c8..bec0cd6 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h

@@ -433,8 +433,7 @@
 			  struct perf_sample *sample, bool nonany_branch_mode);
 
 struct option;
-int parse_filter_percentage(const struct option *opt __maybe_unused,
-			    const char *arg, int unset __maybe_unused);
+int parse_filter_percentage(const struct option *opt, const char *arg, int unset);
 int perf_hist_config(const char *var, const char *value);
 
 void perf_hpp_list__init(struct perf_hpp_list *list);

diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index eb0e7f8..abf1366 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c

@@ -279,6 +279,7 @@
 	event.sample.header.misc = PERF_RECORD_MISC_USER;
 	event.sample.header.size = sizeof(struct perf_event_header);
 
+	sample.cpumode = PERF_RECORD_MISC_USER;
 	sample.ip = le64_to_cpu(branch->from);
 	sample.pid = btsq->pid;
 	sample.tid = btsq->tid;
@@ -678,7 +679,7 @@
 	return 0;
 }
 
-static int intel_bts_flush(struct perf_session *session __maybe_unused,
+static int intel_bts_flush(struct perf_session *session,
 			   struct perf_tool *tool __maybe_unused)
 {
 	struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 05d8158..407f11b 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c

@@ -979,6 +979,7 @@
 	if (!pt->timeless_decoding)
 		sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
 
+	sample.cpumode = PERF_RECORD_MISC_USER;
 	sample.ip = ptq->state->from_ip;
 	sample.pid = ptq->pid;
 	sample.tid = ptq->tid;
@@ -1035,6 +1036,7 @@
 	if (!pt->timeless_decoding)
 		sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
 
+	sample.cpumode = PERF_RECORD_MISC_USER;
 	sample.ip = ptq->state->from_ip;
 	sample.pid = ptq->pid;
 	sample.tid = ptq->tid;
@@ -1092,6 +1094,7 @@
 	if (!pt->timeless_decoding)
 		sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
 
+	sample.cpumode = PERF_RECORD_MISC_USER;
 	sample.ip = ptq->state->from_ip;
 	sample.pid = ptq->pid;
 	sample.tid = ptq->tid;

diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h
index a1e99da..3f42ee4 100644
--- a/tools/perf/util/jit.h
+++ b/tools/perf/util/jit.h

@@ -3,13 +3,9 @@
 
 #include <data.h>
 
-extern int jit_process(struct perf_session *session,
-		       struct perf_data_file *output,
-		       struct machine *machine,
-		       char *filename,
-		       pid_t pid,
-		       u64 *nbytes);
+int jit_process(struct perf_session *session, struct perf_data_file *output,
+		struct machine *machine, char *filename, pid_t pid, u64 *nbytes);
 
-extern int jit_inject_record(const char *filename);
+int jit_inject_record(const char *filename);
 
 #endif /* __JIT_H__ */

diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index cd272cc..ad0c0bb 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c

@@ -417,6 +417,7 @@
 	 * use first address as sample address
 	 */
 	memset(&sample, 0, sizeof(sample));
+	sample.cpumode = PERF_RECORD_MISC_USER;
 	sample.pid  = pid;
 	sample.tid  = tid;
 	sample.time = id->time;
@@ -505,6 +506,7 @@
 	 * use first address as sample address
 	 */
 	memset(&sample, 0, sizeof(sample));
+	sample.cpumode = PERF_RECORD_MISC_USER;
 	sample.pid  = pid;
 	sample.tid  = tid;
 	sample.time = id->time;

diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 00724d4..33071d6 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c

@@ -3,11 +3,11 @@
  * Copyright (C) 2015, Huawei Inc.
  */
 
+#include <limits.h>
 #include <stdio.h>
-#include "util.h"
+#include <stdlib.h>
 #include "debug.h"
 #include "llvm-utils.h"
-#include "cache.h"
 
 #define CLANG_BPF_CMD_DEFAULT_TEMPLATE				\
 		"$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\
@@ -98,11 +98,12 @@
 	void *buf = NULL;
 	FILE *file = NULL;
 	size_t read_sz = 0, buf_sz = 0;
+	char serr[STRERR_BUFSIZE];
 
 	file = popen(cmd, "r");
 	if (!file) {
 		pr_err("ERROR: unable to popen cmd: %s\n",
-		       strerror(errno));
+		       strerror_r(errno, serr, sizeof(serr)));
 		return -EINVAL;
 	}
 
@@ -136,7 +137,7 @@
 
 	if (ferror(file)) {
 		pr_err("ERROR: error occurred when reading from pipe: %s\n",
-		       strerror(errno));
+		       strerror_r(errno, serr, sizeof(serr)));
 		err = -EIO;
 		goto errout;
 	}
@@ -334,10 +335,18 @@
 	unsigned int kernel_version;
 	char linux_version_code_str[64];
 	const char *clang_opt = llvm_param.clang_opt;
-	char clang_path[PATH_MAX], nr_cpus_avail_str[64];
+	char clang_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64];
+	char serr[STRERR_BUFSIZE];
 	char *kbuild_dir = NULL, *kbuild_include_opts = NULL;
 	const char *template = llvm_param.clang_bpf_cmd_template;
 
+	if (path[0] != '-' && realpath(path, abspath) == NULL) {
+		err = errno;
+		pr_err("ERROR: problems with path %s: %s\n",
+		       path, strerror_r(err, serr, sizeof(serr)));
+		return -err;
+	}
+
 	if (!template)
 		template = CLANG_BPF_CMD_DEFAULT_TEMPLATE;
 
@@ -362,7 +371,7 @@
 	if (nr_cpus_avail <= 0) {
 		pr_err(
 "WARNING:\tunable to get available CPUs in this system: %s\n"
-"        \tUse 128 instead.\n", strerror(errno));
+"        \tUse 128 instead.\n", strerror_r(errno, serr, sizeof(serr)));
 		nr_cpus_avail = 128;
 	}
 	snprintf(nr_cpus_avail_str, sizeof(nr_cpus_avail_str), "%d",
@@ -387,8 +396,7 @@
 	 * stdin to be source file (testing).
 	 */
 	force_set_env("CLANG_SOURCE",
-		      (path[0] == '-') ? path :
-		      make_nonrelative_path(path));
+		      (path[0] == '-') ? path : abspath);
 
 	pr_debug("llvm compiling command template: %s\n", template);
 	err = read_from_pipe(template, &obj_buf, &obj_buf_sz);

diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h
index 5b3cf1c..23b9a74 100644
--- a/tools/perf/util/llvm-utils.h
+++ b/tools/perf/util/llvm-utils.h

@@ -39,11 +39,10 @@
 };
 
 extern struct llvm_param llvm_param;
-extern int perf_llvm_config(const char *var, const char *value);
+int perf_llvm_config(const char *var, const char *value);
 
-extern int llvm__compile_bpf(const char *path, void **p_obj_buf,
-			     size_t *p_obj_buf_sz);
+int llvm__compile_bpf(const char *path, void **p_obj_buf, size_t *p_obj_buf_sz);
 
 /* This function is for test__llvm() use only */
-extern int llvm__search_clang(void);
+int llvm__search_clang(void);
 #endif

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index ad79297..80b9b6a 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c

@@ -1301,9 +1301,8 @@
 
 int machine__process_mmap2_event(struct machine *machine,
 				 union perf_event *event,
-				 struct perf_sample *sample __maybe_unused)
+				 struct perf_sample *sample)
 {
-	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	struct thread *thread;
 	struct map *map;
 	enum map_type type;
@@ -1312,8 +1311,8 @@
 	if (dump_trace)
 		perf_event__fprintf_mmap2(event, stdout);
 
-	if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
-	    cpumode == PERF_RECORD_MISC_KERNEL) {
+	if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
+	    sample->cpumode == PERF_RECORD_MISC_KERNEL) {
 		ret = machine__process_kernel_mmap_event(machine, event);
 		if (ret < 0)
 			goto out_problem;
@@ -1355,9 +1354,8 @@
 }
 
 int machine__process_mmap_event(struct machine *machine, union perf_event *event,
-				struct perf_sample *sample __maybe_unused)
+				struct perf_sample *sample)
 {
-	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	struct thread *thread;
 	struct map *map;
 	enum map_type type;
@@ -1366,8 +1364,8 @@
 	if (dump_trace)
 		perf_event__fprintf_mmap(event, stdout);
 
-	if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
-	    cpumode == PERF_RECORD_MISC_KERNEL) {
+	if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
+	    sample->cpumode == PERF_RECORD_MISC_KERNEL) {
 		ret = machine__process_kernel_mmap_event(machine, event);
 		if (ret < 0)
 			goto out_problem;

diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 1a3e45b..8499db2 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h

@@ -94,7 +94,7 @@
 			       union perf_event *event);
 int machine__process_itrace_start_event(struct machine *machine,
 					union perf_event *event);
-int machine__process_switch_event(struct machine *machine __maybe_unused,
+int machine__process_switch_event(struct machine *machine,
 				  union perf_event *event);
 int machine__process_mmap_event(struct machine *machine, union perf_event *event,
 				struct perf_sample *sample);

diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 67e4930..d740c3c 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h

@@ -22,19 +22,18 @@
 	struct tracepoint_path *next;
 };
 
-extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
-extern struct tracepoint_path *tracepoint_name_to_path(const char *name);
-extern bool have_tracepoints(struct list_head *evlist);
+struct tracepoint_path *tracepoint_id_to_path(u64 config);
+struct tracepoint_path *tracepoint_name_to_path(const char *name);
+bool have_tracepoints(struct list_head *evlist);
 
 const char *event_type(int type);
 
-extern int parse_events_option(const struct option *opt, const char *str,
-			       int unset);
-extern int parse_events(struct perf_evlist *evlist, const char *str,
-			struct parse_events_error *error);
-extern int parse_events_terms(struct list_head *terms, const char *str);
-extern int parse_filter(const struct option *opt, const char *str, int unset);
-extern int exclude_perf(const struct option *opt, const char *arg, int unset);
+int parse_events_option(const struct option *opt, const char *str, int unset);
+int parse_events(struct perf_evlist *evlist, const char *str,
+		 struct parse_events_error *error);
+int parse_events_terms(struct list_head *terms, const char *str);
+int parse_filter(const struct option *opt, const char *str, int unset);
+int exclude_perf(const struct option *opt, const char *arg, int unset);
 
 #define EVENTS_HELP_MAX (128*1024)
 
@@ -183,7 +182,7 @@
 void print_tracepoint_events(const char *subsys_glob, const char *event_glob,
 			     bool name_only);
 int print_hwcache_events(const char *event_glob, bool name_only);
-extern int is_valid_tracepoint(const char *event_string);
+int is_valid_tracepoint(const char *event_string);
 
 int valid_event_mount(const char *eventfs);
 char *parse_events_formats_error_string(char *additional_terms);

diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
index 3654d96..3bf6bf8 100644
--- a/tools/perf/util/path.c
+++ b/tools/perf/util/path.c

@@ -41,36 +41,6 @@
 	return path;
 }
 
-static char *perf_vsnpath(char *buf, size_t n, const char *fmt, va_list args)
-{
-	const char *perf_dir = get_perf_dir();
-	size_t len;
-
-	len = strlen(perf_dir);
-	if (n < len + 1)
-		goto bad;
-	memcpy(buf, perf_dir, len);
-	if (len && !is_dir_sep(perf_dir[len-1]))
-		buf[len++] = '/';
-	len += vsnprintf(buf + len, n - len, fmt, args);
-	if (len >= n)
-		goto bad;
-	return cleanup_path(buf);
-bad:
-	strlcpy(buf, bad_path, n);
-	return buf;
-}
-
-char *perf_pathdup(const char *fmt, ...)
-{
-	char path[PATH_MAX];
-	va_list args;
-	va_start(args, fmt);
-	(void)perf_vsnpath(path, sizeof(path), fmt, args);
-	va_end(args);
-	return xstrdup(path);
-}
-
 char *mkpath(const char *fmt, ...)
 {
 	va_list args;

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 93996ec..8319fbb 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c

@@ -2179,7 +2179,7 @@
 		strbuf_addf(result, " in %s", module);
 
 	if (pev->nargs > 0) {
-		strbuf_addstr(result, " with");
+		strbuf_add(result, " with", 5);
 		for (i = 0; i < pev->nargs; i++) {
 			ret = synthesize_perf_probe_arg(&pev->args[i],
 							buf, 128);

diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index ba926c3..e54e7b0 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h

@@ -114,49 +114,44 @@
 void exit_probe_symbol_maps(void);
 
 /* Command string to events */
-extern int parse_perf_probe_command(const char *cmd,
-				    struct perf_probe_event *pev);
-extern int parse_probe_trace_command(const char *cmd,
-				     struct probe_trace_event *tev);
+int parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev);
+int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev);
 
 /* Events to command string */
-extern char *synthesize_perf_probe_command(struct perf_probe_event *pev);
-extern char *synthesize_probe_trace_command(struct probe_trace_event *tev);
-extern int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf,
-				     size_t len);
+char *synthesize_perf_probe_command(struct perf_probe_event *pev);
+char *synthesize_probe_trace_command(struct probe_trace_event *tev);
+int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len);
 
 /* Check the perf_probe_event needs debuginfo */
-extern bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
+bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
 
 /* Release event contents */
-extern void clear_perf_probe_event(struct perf_probe_event *pev);
-extern void clear_probe_trace_event(struct probe_trace_event *tev);
+void clear_perf_probe_event(struct perf_probe_event *pev);
+void clear_probe_trace_event(struct probe_trace_event *tev);
 
 /* Command string to line-range */
-extern int parse_line_range_desc(const char *cmd, struct line_range *lr);
+int parse_line_range_desc(const char *cmd, struct line_range *lr);
 
 /* Release line range members */
-extern void line_range__clear(struct line_range *lr);
+void line_range__clear(struct line_range *lr);
 
 /* Initialize line range */
-extern int line_range__init(struct line_range *lr);
+int line_range__init(struct line_range *lr);
 
-extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
-extern int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs);
-extern int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs);
-extern void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs);
-extern int del_perf_probe_events(struct strfilter *filter);
+int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int del_perf_probe_events(struct strfilter *filter);
 
-extern int show_perf_probe_event(const char *group, const char *event,
-				 struct perf_probe_event *pev,
-				 const char *module, bool use_stdout);
-extern int show_perf_probe_events(struct strfilter *filter);
-extern int show_line_range(struct line_range *lr, const char *module,
-			   bool user);
-extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
-			       struct strfilter *filter);
-extern int show_available_funcs(const char *module, struct strfilter *filter,
-				bool user);
+int show_perf_probe_event(const char *group, const char *event,
+			  struct perf_probe_event *pev,
+			  const char *module, bool use_stdout);
+int show_perf_probe_events(struct strfilter *filter);
+int show_line_range(struct line_range *lr, const char *module, bool user);
+int show_available_vars(struct perf_probe_event *pevs, int npevs,
+			struct strfilter *filter);
+int show_available_funcs(const char *module, struct strfilter *filter, bool user);
 bool arch__prefers_symtab(void);
 void arch__fix_tev_from_maps(struct perf_probe_event *pev,
 			     struct probe_trace_event *tev, struct map *map);

diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 4ce5c5e..b3bd0fb 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c

@@ -1314,18 +1314,18 @@
 			if (probe_conf.show_location_range) {
 				if (!externs) {
 					if (ret)
-						strbuf_addf(&buf, "[INV]\t");
+						strbuf_add(&buf, "[INV]\t", 6);
 					else
-						strbuf_addf(&buf, "[VAL]\t");
+						strbuf_add(&buf, "[VAL]\t", 6);
 				} else
-					strbuf_addf(&buf, "[EXT]\t");
+					strbuf_add(&buf, "[EXT]\t", 6);
 			}
 
 			ret2 = die_get_varname(die_mem, &buf);
 
 			if (!ret2 && probe_conf.show_location_range &&
 				!externs) {
-				strbuf_addf(&buf, "\t");
+				strbuf_addch(&buf, '\t');
 				ret2 = die_get_var_range(&af->pf.sp_die,
 							die_mem, &buf);
 			}

diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 0aec770..51137fc 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h

@@ -34,27 +34,25 @@
 };
 
 /* This also tries to open distro debuginfo */
-extern struct debuginfo *debuginfo__new(const char *path);
-extern void debuginfo__delete(struct debuginfo *dbg);
+struct debuginfo *debuginfo__new(const char *path);
+void debuginfo__delete(struct debuginfo *dbg);
 
 /* Find probe_trace_events specified by perf_probe_event from debuginfo */
-extern int debuginfo__find_trace_events(struct debuginfo *dbg,
-					struct perf_probe_event *pev,
-					struct probe_trace_event **tevs);
+int debuginfo__find_trace_events(struct debuginfo *dbg,
+				 struct perf_probe_event *pev,
+				 struct probe_trace_event **tevs);
 
 /* Find a perf_probe_point from debuginfo */
-extern int debuginfo__find_probe_point(struct debuginfo *dbg,
-				       unsigned long addr,
-				       struct perf_probe_point *ppt);
+int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr,
+				struct perf_probe_point *ppt);
 
 /* Find a line range */
-extern int debuginfo__find_line_range(struct debuginfo *dbg,
-				      struct line_range *lr);
+int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr);
 
 /* Find available variables */
-extern int debuginfo__find_available_vars_at(struct debuginfo *dbg,
-					     struct perf_probe_event *pev,
-					     struct variable_list **vls);
+int debuginfo__find_available_vars_at(struct debuginfo *dbg,
+				      struct perf_probe_event *pev,
+				      struct variable_list **vls);
 
 /* Find a src file from a DWARF tag path */
 int get_real_path(const char *raw_path, const char *comp_dir,

diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h
index 172889e..3340c9c 100644
--- a/tools/perf/util/quote.h
+++ b/tools/perf/util/quote.h

@@ -24,6 +24,6 @@
  * sq_quote() in a real application.
  */
 
-extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
+void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
 
 #endif /* __PERF_QUOTE_H */

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 60b3593..4abd85c 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c

@@ -1107,12 +1107,11 @@
 					       union perf_event *event,
 					       struct perf_sample *sample)
 {
-	const u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	struct machine *machine;
 
 	if (perf_guest &&
-	    ((cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ||
-	     (cpumode == PERF_RECORD_MISC_GUEST_USER))) {
+	    ((sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ||
+	     (sample->cpumode == PERF_RECORD_MISC_GUEST_USER))) {
 		u32 pid;
 
 		if (event->header.type == PERF_RECORD_MMAP

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 93fa136..47966a1 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c

@@ -2225,7 +2225,7 @@
 }
 
 static int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
-			       struct perf_evlist *evlist __maybe_unused,
+			       struct perf_evlist *evlist,
 			       int level)
 {
 	unsigned int i;

diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index b33ffb2..fdb7196 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c

@@ -152,8 +152,7 @@
 }
 
 static void print_stalled_cycles_frontend(int cpu,
-					  struct perf_evsel *evsel
-					  __maybe_unused, double avg,
+					  struct perf_evsel *evsel, double avg,
 					  struct perf_stat_output_ctx *out)
 {
 	double total, ratio = 0.0;
@@ -175,8 +174,7 @@
 }
 
 static void print_stalled_cycles_backend(int cpu,
-					 struct perf_evsel *evsel
-					 __maybe_unused, double avg,
+					 struct perf_evsel *evsel, double avg,
 					 struct perf_stat_output_ctx *out)
 {
 	double total, ratio = 0.0;
@@ -194,7 +192,7 @@
 }
 
 static void print_branch_misses(int cpu,
-				struct perf_evsel *evsel __maybe_unused,
+				struct perf_evsel *evsel,
 				double avg,
 				struct perf_stat_output_ctx *out)
 {
@@ -213,7 +211,7 @@
 }
 
 static void print_l1_dcache_misses(int cpu,
-				   struct perf_evsel *evsel __maybe_unused,
+				   struct perf_evsel *evsel,
 				   double avg,
 				   struct perf_stat_output_ctx *out)
 {
@@ -232,7 +230,7 @@
 }
 
 static void print_l1_icache_misses(int cpu,
-				   struct perf_evsel *evsel __maybe_unused,
+				   struct perf_evsel *evsel,
 				   double avg,
 				   struct perf_stat_output_ctx *out)
 {
@@ -250,7 +248,7 @@
 }
 
 static void print_dtlb_cache_misses(int cpu,
-				    struct perf_evsel *evsel __maybe_unused,
+				    struct perf_evsel *evsel,
 				    double avg,
 				    struct perf_stat_output_ctx *out)
 {
@@ -268,7 +266,7 @@
 }
 
 static void print_itlb_cache_misses(int cpu,
-				    struct perf_evsel *evsel __maybe_unused,
+				    struct perf_evsel *evsel,
 				    double avg,
 				    struct perf_stat_output_ctx *out)
 {
@@ -286,7 +284,7 @@
 }
 
 static void print_ll_cache_misses(int cpu,
-				  struct perf_evsel *evsel __maybe_unused,
+				  struct perf_evsel *evsel,
 				  double avg,
 				  struct perf_stat_output_ctx *out)
 {

diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
index d3d2792..8fb7329 100644
--- a/tools/perf/util/strbuf.c
+++ b/tools/perf/util/strbuf.c

@@ -51,6 +51,13 @@
 	ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
 }
 
+void strbuf_addch(struct strbuf *sb, int c)
+{
+	strbuf_grow(sb, 1);
+	sb->buf[sb->len++] = c;
+	sb->buf[sb->len] = '\0';
+}
+
 void strbuf_add(struct strbuf *sb, const void *data, size_t len)
 {
 	strbuf_grow(sb, len);
@@ -58,7 +65,7 @@
 	strbuf_setlen(sb, sb->len + len);
 }
 
-void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap)
+static void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap)
 {
 	int len;
 	va_list ap_saved;

diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h
index 7a32c83..ab9be0fb 100644
--- a/tools/perf/util/strbuf.h
+++ b/tools/perf/util/strbuf.h

@@ -51,16 +51,16 @@
 #define STRBUF_INIT  { 0, 0, strbuf_slopbuf }
 
 /*----- strbuf life cycle -----*/
-extern void strbuf_init(struct strbuf *buf, ssize_t hint);
-extern void strbuf_release(struct strbuf *);
-extern char *strbuf_detach(struct strbuf *, size_t *);
+void strbuf_init(struct strbuf *buf, ssize_t hint);
+void strbuf_release(struct strbuf *buf);
+char *strbuf_detach(struct strbuf *buf, size_t *);
 
 /*----- strbuf size related -----*/
 static inline ssize_t strbuf_avail(const struct strbuf *sb) {
 	return sb->alloc ? sb->alloc - sb->len - 1 : 0;
 }
 
-extern void strbuf_grow(struct strbuf *, size_t);
+void strbuf_grow(struct strbuf *buf, size_t);
 
 static inline void strbuf_setlen(struct strbuf *sb, size_t len) {
 	if (!sb->alloc)
@@ -71,22 +71,17 @@
 }
 
 /*----- add data in your buffer -----*/
-static inline void strbuf_addch(struct strbuf *sb, int c) {
-	strbuf_grow(sb, 1);
-	sb->buf[sb->len++] = c;
-	sb->buf[sb->len] = '\0';
-}
+void strbuf_addch(struct strbuf *sb, int c);
 
-extern void strbuf_add(struct strbuf *, const void *, size_t);
+void strbuf_add(struct strbuf *buf, const void *, size_t);
 static inline void strbuf_addstr(struct strbuf *sb, const char *s) {
 	strbuf_add(sb, s, strlen(s));
 }
 
 __attribute__((format(printf,2,3)))
-extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
-extern void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap);
+void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
 
 /* XXX: if read fails, any partial read is undone */
-extern ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint);
+ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint);
 
 #endif /* __PERF_STRBUF_H */

diff --git a/tools/perf/util/svghelper.h b/tools/perf/util/svghelper.h
index 9292a52..946fdf2 100644
--- a/tools/perf/util/svghelper.h
+++ b/tools/perf/util/svghelper.h

@@ -3,32 +3,31 @@
 
 #include <linux/types.h>
 
-extern void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end);
-extern void svg_ubox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
-extern void svg_lbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
-extern void svg_fbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
-extern void svg_box(int Yslot, u64 start, u64 end, const char *type);
-extern void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
-extern void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
-extern void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
-extern void svg_cpu_box(int cpu, u64 max_frequency, u64 turbo_frequency);
+void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end);
+void svg_ubox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_lbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_fbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_box(int Yslot, u64 start, u64 end, const char *type);
+void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_cpu_box(int cpu, u64 max_frequency, u64 turbo_frequency);
 
 
-extern void svg_process(int cpu, u64 start, u64 end, int pid, const char *name, const char *backtrace);
-extern void svg_cstate(int cpu, u64 start, u64 end, int type);
-extern void svg_pstate(int cpu, u64 start, u64 end, u64 freq);
+void svg_process(int cpu, u64 start, u64 end, int pid, const char *name, const char *backtrace);
+void svg_cstate(int cpu, u64 start, u64 end, int type);
+void svg_pstate(int cpu, u64 start, u64 end, u64 freq);
 
 
-extern void svg_time_grid(double min_thickness);
-extern void svg_io_legenda(void);
-extern void svg_legenda(void);
-extern void svg_wakeline(u64 start, int row1, int row2, const char *backtrace);
-extern void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace);
-extern void svg_interrupt(u64 start, int row, const char *backtrace);
-extern void svg_text(int Yslot, u64 start, const char *text);
-extern void svg_close(void);
-extern int svg_build_topology_map(char *sib_core, int sib_core_nr,
-				  char *sib_thr, int sib_thr_nr);
+void svg_time_grid(double min_thickness);
+void svg_io_legenda(void);
+void svg_legenda(void);
+void svg_wakeline(u64 start, int row1, int row2, const char *backtrace);
+void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace);
+void svg_interrupt(u64 start, int row, const char *backtrace);
+void svg_text(int Yslot, u64 start, const char *text);
+void svg_close(void);
+int svg_build_topology_map(char *sib_core, int sib_core_nr, char *sib_thr, int sib_thr_nr);
 
 extern int svg_page_width;
 extern u64 svg_highlight;

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index b1dd68f..bc229a7 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c

@@ -793,6 +793,7 @@
 	uint32_t idx;
 	GElf_Ehdr ehdr;
 	GElf_Shdr shdr;
+	GElf_Shdr tshdr;
 	Elf_Data *syms, *opddata = NULL;
 	GElf_Sym sym;
 	Elf_Scn *sec, *sec_strndx;
@@ -832,6 +833,9 @@
 	sec = syms_ss->symtab;
 	shdr = syms_ss->symshdr;
 
+	if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL))
+		dso->text_offset = tshdr.sh_addr - tshdr.sh_offset;
+
 	if (runtime_ss->opdsec)
 		opddata = elf_rawdata(runtime_ss->opdsec, NULL);
 
@@ -880,12 +884,8 @@
 	 * Handle any relocation of vdso necessary because older kernels
 	 * attempted to prelink vdso to its virtual address.
 	 */
-	if (dso__is_vdso(dso)) {
-		GElf_Shdr tshdr;
-
-		if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL))
-			map->reloc = map->start - tshdr.sh_addr + tshdr.sh_offset;
-	}
+	if (dso__is_vdso(dso))
+		map->reloc = map->start - dso->text_offset;
 
 	dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap);
 	/*

diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index a937053..c8b7544 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h

@@ -34,8 +34,8 @@
 #endif
 
 #ifdef HAVE_LIBELF_SUPPORT
-extern Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
-				GElf_Shdr *shp, const char *name, size_t *idx);
+Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+			     GElf_Shdr *shp, const char *name, size_t *idx);
 #endif
 
 #ifndef DMGL_PARAMS

diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c
index 6adfa18..996046a 100644
--- a/tools/perf/util/usage.c
+++ b/tools/perf/util/usage.c

@@ -41,15 +41,9 @@
 /* If we are in a dlopen()ed .so write to a global variable would segfault
  * (ugh), so keep things static. */
 static void (*usage_routine)(const char *err) NORETURN = usage_builtin;
-static void (*die_routine)(const char *err, va_list params) NORETURN = die_builtin;
 static void (*error_routine)(const char *err, va_list params) = error_builtin;
 static void (*warn_routine)(const char *err, va_list params) = warn_builtin;
 
-void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN)
-{
-	die_routine = routine;
-}
-
 void set_warning_routine(void (*routine)(const char *err, va_list params))
 {
 	warn_routine = routine;
@@ -65,7 +59,7 @@
 	va_list params;
 
 	va_start(params, err);
-	die_routine(err, params);
+	die_builtin(err, params);
 	va_end(params);
 }
 

diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index d0d50ce..8298d60 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h

@@ -133,25 +133,15 @@
 #define PERF_GTK_DSO  "libperf-gtk.so"
 
 /* General helper functions */
-extern void usage(const char *err) NORETURN;
-extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2)));
-extern int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
-extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
+void usage(const char *err) NORETURN;
+void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2)));
+int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
+void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
 
-#include "../../../include/linux/stringify.h"
+void set_warning_routine(void (*routine)(const char *err, va_list params));
 
-#define DIE_IF(cnd)	\
-	do { if (cnd)	\
-		die(" at (" __FILE__ ":" __stringify(__LINE__) "): "	\
-		    __stringify(cnd) "\n");				\
-	} while (0)
-
-
-extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN);
-extern void set_warning_routine(void (*routine)(const char *err, va_list params));
-
-extern int prefixcmp(const char *str, const char *prefix);
-extern void set_buildid_dir(const char *dir);
+int prefixcmp(const char *str, const char *prefix);
+void set_buildid_dir(const char *dir);
 
 #ifdef __GLIBC_PREREQ
 #if __GLIBC_PREREQ(2, 1)
@@ -172,8 +162,7 @@
 /*
  * Wrappers:
  */
-extern char *xstrdup(const char *str);
-extern void *xrealloc(void *ptr, size_t size) __attribute__((weak));
+void *xrealloc(void *ptr, size_t size) __attribute__((weak));
 
 
 static inline void *zalloc(size_t size)

diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c
index 19f15b6..5f1a07c 100644
--- a/tools/perf/util/wrapper.c
+++ b/tools/perf/util/wrapper.c

@@ -12,18 +12,6 @@
 {
 }
 
-char *xstrdup(const char *str)
-{
-	char *ret = strdup(str);
-	if (!ret) {
-		release_pack_memory(strlen(str) + 1, -1);
-		ret = strdup(str);
-		if (!ret)
-			die("Out of memory, strdup failed");
-	}
-	return ret;
-}
-
 void *xrealloc(void *ptr, size_t size)
 {
 	void *ret = realloc(ptr, size);

diff --git a/tools/perf/config/utilities.mak b/tools/scripts/utilities.mak
similarity index 100%
rename from tools/perf/config/utilities.mak
rename to tools/scripts/utilities.mak


diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index b9453b8..150829d 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c

@@ -1497,15 +1497,15 @@
 #define SECCOMP_SET_MODE_FILTER 1
 #endif
 
-#ifndef SECCOMP_FLAG_FILTER_TSYNC
-#define SECCOMP_FLAG_FILTER_TSYNC 1
+#ifndef SECCOMP_FILTER_FLAG_TSYNC
+#define SECCOMP_FILTER_FLAG_TSYNC 1
 #endif
 
 #ifndef seccomp
-int seccomp(unsigned int op, unsigned int flags, struct sock_fprog *filter)
+int seccomp(unsigned int op, unsigned int flags, void *args)
 {
 	errno = 0;
-	return syscall(__NR_seccomp, op, flags, filter);
+	return syscall(__NR_seccomp, op, flags, args);
 }
 #endif
 
@@ -1613,7 +1613,7 @@
 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 	}
 
-	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
 		      &prog);
 	ASSERT_NE(ENOSYS, errno) {
 		TH_LOG("Kernel does not support seccomp syscall!");
@@ -1831,7 +1831,7 @@
 		self->sibling_count++;
 	}
 
-	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
 		      &self->apply_prog);
 	ASSERT_EQ(0, ret) {
 		TH_LOG("Could install filter on all threads!");
@@ -1892,7 +1892,7 @@
 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 	}
 
-	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
 		      &self->apply_prog);
 	ASSERT_NE(ENOSYS, errno) {
 		TH_LOG("Kernel does not support seccomp syscall!");
@@ -1940,7 +1940,7 @@
 		self->sibling_count++;
 	}
 
-	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
 		      &self->apply_prog);
 	ASSERT_EQ(self->sibling[0].system_tid, ret) {
 		TH_LOG("Did not fail on diverged sibling.");
@@ -1992,7 +1992,7 @@
 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
 	}
 
-	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
 		      &self->apply_prog);
 	ASSERT_EQ(ret, self->sibling[0].system_tid) {
 		TH_LOG("Did not fail on diverged sibling.");
@@ -2021,7 +2021,7 @@
 	/* Switch to the remaining sibling */
 	sib = !sib;
 
-	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
 		      &self->apply_prog);
 	ASSERT_EQ(0, ret) {
 		TH_LOG("Expected the remaining sibling to sync");
@@ -2044,7 +2044,7 @@
 	while (!kill(self->sibling[sib].system_tid, 0))
 		sleep(0.1);
 
-	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
 		      &self->apply_prog);
 	ASSERT_EQ(0, ret);  /* just us chickens */
 }

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index d5ce7d7..b47ebd1 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile

@@ -5,7 +5,7 @@
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall \
-			check_initial_reg_state sigreturn ldt_gdt
+			check_initial_reg_state sigreturn ldt_gdt iopl
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer

diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c
new file mode 100644
index 0000000..c496ca9
--- /dev/null
+++ b/tools/testing/selftests/x86/iopl.c

@@ -0,0 +1,135 @@
+/*
+ * iopl.c - Test case for a Linux on Xen 64-bit bug
+ * Copyright (c) 2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+#include <sched.h>
+#include <sys/io.h>
+
+static int nerrs = 0;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
+{
+	siglongjmp(jmpbuf, 1);
+}
+
+int main(void)
+{
+	cpu_set_t cpuset;
+	CPU_ZERO(&cpuset);
+	CPU_SET(0, &cpuset);
+	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+		err(1, "sched_setaffinity to CPU 0");
+
+	/* Probe for iopl support.  Note that iopl(0) works even as nonroot. */
+	if (iopl(3) != 0) {
+		printf("[OK]\tiopl(3) failed (%d) -- try running as root\n",
+		       errno);
+		return 0;
+	}
+
+	/* Restore our original state prior to starting the test. */
+	if (iopl(0) != 0)
+		err(1, "iopl(0)");
+
+	pid_t child = fork();
+	if (child == -1)
+		err(1, "fork");
+
+	if (child == 0) {
+		printf("\tchild: set IOPL to 3\n");
+		if (iopl(3) != 0)
+			err(1, "iopl");
+
+		printf("[RUN]\tchild: write to 0x80\n");
+		asm volatile ("outb %%al, $0x80" : : "a" (0));
+
+		return 0;
+	} else {
+		int status;
+		if (waitpid(child, &status, 0) != child ||
+		    !WIFEXITED(status)) {
+			printf("[FAIL]\tChild died\n");
+			nerrs++;
+		} else if (WEXITSTATUS(status) != 0) {
+			printf("[FAIL]\tChild failed\n");
+			nerrs++;
+		} else {
+			printf("[OK]\tChild succeeded\n");
+		}
+	}
+
+	printf("[RUN]\tparent: write to 0x80 (should fail)\n");
+
+	sethandler(SIGSEGV, sigsegv, 0);
+	if (sigsetjmp(jmpbuf, 1) != 0) {
+		printf("[OK]\twrite was denied\n");
+	} else {
+		asm volatile ("outb %%al, $0x80" : : "a" (0));
+		printf("[FAIL]\twrite was allowed\n");
+		nerrs++;
+	}
+
+	/* Test the capability checks. */
+	printf("\tiopl(3)\n");
+	if (iopl(3) != 0)
+		err(1, "iopl(3)");
+
+	printf("\tDrop privileges\n");
+	if (setresuid(1, 1, 1) != 0) {
+		printf("[WARN]\tDropping privileges failed\n");
+		goto done;
+	}
+
+	printf("[RUN]\tiopl(3) unprivileged but with IOPL==3\n");
+	if (iopl(3) != 0) {
+		printf("[FAIL]\tiopl(3) should work if iopl is already 3 even if unprivileged\n");
+		nerrs++;
+	}
+
+	printf("[RUN]\tiopl(0) unprivileged\n");
+	if (iopl(0) != 0) {
+		printf("[FAIL]\tiopl(0) should work if iopl is already 3 even if unprivileged\n");
+		nerrs++;
+	}
+
+	printf("[RUN]\tiopl(3) unprivileged\n");
+	if (iopl(3) == 0) {
+		printf("[FAIL]\tiopl(3) should fail if when unprivileged if iopl==0\n");
+		nerrs++;
+	} else {
+		printf("[OK]\tFailed as expected\n");
+	}
+
+done:
+	return nerrs ? 1 : 0;
+}
+

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 99ee4b1..4fd482f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c

@@ -170,8 +170,8 @@
 		kvm_make_request(req, vcpu);
 		cpu = vcpu->cpu;
 
-		/* Set ->requests bit before we read ->mode */
-		smp_mb();
+		/* Set ->requests bit before we read ->mode. */
+		smp_mb__after_atomic();
 
 		if (cpus != NULL && cpu != -1 && cpu != me &&
 		      kvm_vcpu_exiting_guest_mode(vcpu) != OUTSIDE_GUEST_MODE)
@@ -191,9 +191,23 @@
 #ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
 void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
-	long dirty_count = kvm->tlbs_dirty;
+	/*
+	 * Read tlbs_dirty before setting KVM_REQ_TLB_FLUSH in
+	 * kvm_make_all_cpus_request.
+	 */
+	long dirty_count = smp_load_acquire(&kvm->tlbs_dirty);
 
-	smp_mb();
+	/*
+	 * We want to publish modifications to the page tables before reading
+	 * mode. Pairs with a memory barrier in arch-specific code.
+	 * - x86: smp_mb__after_srcu_read_unlock in vcpu_enter_guest
+	 * and smp_mb in walk_shadow_page_lockless_begin/end.
+	 * - powerpc: smp_mb in kvmppc_prepare_to_enter.
+	 *
+	 * There is already an smp_mb__after_atomic() before
+	 * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that
+	 * barrier here.
+	 */
 	if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
 		++kvm->stat.remote_tlb_flush;
 	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
@@ -536,6 +550,16 @@
 	if (!kvm)
 		return ERR_PTR(-ENOMEM);
 
+	spin_lock_init(&kvm->mmu_lock);
+	atomic_inc(&current->mm->mm_count);
+	kvm->mm = current->mm;
+	kvm_eventfd_init(kvm);
+	mutex_init(&kvm->lock);
+	mutex_init(&kvm->irq_lock);
+	mutex_init(&kvm->slots_lock);
+	atomic_set(&kvm->users_count, 1);
+	INIT_LIST_HEAD(&kvm->devices);
+
 	r = kvm_arch_init_vm(kvm, type);
 	if (r)
 		goto out_err_no_disable;
@@ -568,16 +592,6 @@
 			goto out_err;
 	}
 
-	spin_lock_init(&kvm->mmu_lock);
-	kvm->mm = current->mm;
-	atomic_inc(&kvm->mm->mm_count);
-	kvm_eventfd_init(kvm);
-	mutex_init(&kvm->lock);
-	mutex_init(&kvm->irq_lock);
-	mutex_init(&kvm->slots_lock);
-	atomic_set(&kvm->users_count, 1);
-	INIT_LIST_HEAD(&kvm->devices);
-
 	r = kvm_init_mmu_notifier(kvm);
 	if (r)
 		goto out_err;
@@ -602,6 +616,7 @@
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
 		kvm_free_memslots(kvm, kvm->memslots[i]);
 	kvm_arch_free_vm(kvm);
+	mmdrop(current->mm);
 	return ERR_PTR(r);
 }
commit	93061f390f107c37bad7e3bf9eb07bda58a4a99f	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Thu Apr 07 17:22:20 2016 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	Thu Apr 07 17:22:20 2016 -0700
tree	6cd3ad605900fa7145a700c22062fabe36ccff95
parent	1c915b3ac4ecf6ff67573eed24458d862e842cb6 [diff]
parent	c325a67c72903e1cc30e990a15ce745bda0dbfde [diff]