Merge tag 'tegra-for-4.3-memory' of git://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux into next/drivers

ARM: tegra: Memory controller updates for v4.3-rc1

Adds support for Tegra210, which allows the SMMU to be used on this new
SoC generation.

* tag 'tegra-for-4.3-memory' of git://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux:
  memory: tegra: Add Tegra210 support
  memory: tegra: Add support for a variable-size client ID bitfield
  memory: tegra: Expose supported rates via debugfs

Signed-off-by: Olof Johansson <olof@lixom.net>
diff --git a/Documentation/arm/sunxi/README b/Documentation/arm/sunxi/README
index 1fe2d7f..5e38e158 100644
--- a/Documentation/arm/sunxi/README
+++ b/Documentation/arm/sunxi/README
@@ -36,7 +36,7 @@
         + User Manual
           http://dl.linux-sunxi.org/A20/A20%20User%20Manual%202013-03-22.pdf
 
-      - Allwinner A23
+      - Allwinner A23 (sun8i)
         + Datasheet
           http://dl.linux-sunxi.org/A23/A23%20Datasheet%20V1.0%2020130830.pdf
         + User Manual
@@ -55,7 +55,23 @@
         + User Manual
           http://dl.linux-sunxi.org/A31/A3x_release_document/A31s/IC/A31s%20User%20Manual%20%20V1.0%2020130322.pdf
 
+      - Allwinner A33 (sun8i)
+        + Datasheet
+          http://dl.linux-sunxi.org/A33/A33%20Datasheet%20release%201.1.pdf
+        + User Manual
+          http://dl.linux-sunxi.org/A33/A33%20user%20manual%20release%201.1.pdf
+
+      - Allwinner H3 (sun8i)
+        + Datasheet
+          http://dl.linux-sunxi.org/H3/Allwinner_H3_Datasheet_V1.0.pdf
+
     * Quad ARM Cortex-A15, Quad ARM Cortex-A7 based SoCs
       - Allwinner A80
         + Datasheet
 	  http://dl.linux-sunxi.org/A80/A80_Datasheet_Revision_1.0_0404.pdf
+
+    * Octa ARM Cortex-A7 based SoCs
+      - Allwinner A83T
+        + Not Supported
+        + Datasheet
+          http://dl.linux-sunxi.org/A83T/A83T_datasheet_Revision_1.1.pdf
diff --git a/Documentation/devicetree/bindings/arm/sunxi.txt b/Documentation/devicetree/bindings/arm/sunxi.txt
index 42941fd..67da205 100644
--- a/Documentation/devicetree/bindings/arm/sunxi.txt
+++ b/Documentation/devicetree/bindings/arm/sunxi.txt
@@ -9,4 +9,6 @@
   allwinner,sun6i-a31
   allwinner,sun7i-a20
   allwinner,sun8i-a23
+  allwinner,sun8i-a33
+  allwinner,sun8i-h3
   allwinner,sun9i-a80
diff --git a/Documentation/devicetree/bindings/clock/renesas,r8a7778-cpg-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,r8a7778-cpg-clocks.txt
index 2f3747f..e4cdaf1 100644
--- a/Documentation/devicetree/bindings/clock/renesas,r8a7778-cpg-clocks.txt
+++ b/Documentation/devicetree/bindings/clock/renesas,r8a7778-cpg-clocks.txt
@@ -1,7 +1,9 @@
 * Renesas R8A7778 Clock Pulse Generator (CPG)
 
 The CPG generates core clocks for the R8A7778. It includes two PLLs and
-several fixed ratio dividers
+several fixed ratio dividers.
+The CPG also provides a Clock Domain for SoC devices, in combination with the
+CPG Module Stop (MSTP) Clocks.
 
 Required Properties:
 
@@ -10,10 +12,18 @@
   - #clock-cells: Must be 1
   - clock-output-names: The names of the clocks. Supported clocks are
     "plla", "pllb", "b", "out", "p", "s", and "s1".
+  - #power-domain-cells: Must be 0
+
+SoC devices that are part of the CPG/MSTP Clock Domain and can be power-managed
+through an MSTP clock should refer to the CPG device node in their
+"power-domains" property, as documented by the generic PM domain bindings in
+Documentation/devicetree/bindings/power/power_domain.txt.
 
 
-Example
--------
+Examples
+--------
+
+  - CPG device node:
 
 	cpg_clocks: cpg_clocks@ffc80000 {
 		compatible = "renesas,r8a7778-cpg-clocks";
@@ -22,4 +32,17 @@
 		clocks = <&extal_clk>;
 		clock-output-names = "plla", "pllb", "b",
 				     "out", "p", "s", "s1";
+		#power-domain-cells = <0>;
+	};
+
+
+  - CPG/MSTP Clock Domain member device node:
+
+	sdhi0: sd@ffe4c000 {
+		compatible = "renesas,sdhi-r8a7778";
+		reg = <0xffe4c000 0x100>;
+		interrupts = <0 87 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&mstp3_clks R8A7778_CLK_SDHI0>;
+		power-domains = <&cpg_clocks>;
+		status = "disabled";
 	};
diff --git a/Documentation/devicetree/bindings/clock/renesas,r8a7779-cpg-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,r8a7779-cpg-clocks.txt
index ed3c8cb..8c81547 100644
--- a/Documentation/devicetree/bindings/clock/renesas,r8a7779-cpg-clocks.txt
+++ b/Documentation/devicetree/bindings/clock/renesas,r8a7779-cpg-clocks.txt
@@ -1,7 +1,9 @@
 * Renesas R8A7779 Clock Pulse Generator (CPG)
 
 The CPG generates core clocks for the R8A7779. It includes one PLL and
-several fixed ratio dividers
+several fixed ratio dividers.
+The CPG also provides a Clock Domain for SoC devices, in combination with the
+CPG Module Stop (MSTP) Clocks.
 
 Required Properties:
 
@@ -12,16 +14,36 @@
   - #clock-cells: Must be 1
   - clock-output-names: The names of the clocks. Supported clocks are "plla",
     "z", "zs", "s", "s1", "p", "b", "out".
+  - #power-domain-cells: Must be 0
+
+SoC devices that are part of the CPG/MSTP Clock Domain and can be power-managed
+through an MSTP clock should refer to the CPG device node in their
+"power-domains" property, as documented by the generic PM domain bindings in
+Documentation/devicetree/bindings/power/power_domain.txt.
 
 
-Example
--------
+Examples
+--------
+
+  - CPG device node:
 
 	cpg_clocks: cpg_clocks@ffc80000 {
 		compatible = "renesas,r8a7779-cpg-clocks";
-		reg = <0 0xffc80000 0 0x30>;
+		reg = <0xffc80000 0x30>;
 		clocks = <&extal_clk>;
 		#clock-cells = <1>;
 		clock-output-names = "plla", "z", "zs", "s", "s1", "p",
 		                     "b", "out";
+		#power-domain-cells = <0>;
+	};
+
+
+  - CPG/MSTP Clock Domain member device node:
+
+	sata: sata@fc600000 {
+		compatible = "renesas,sata-r8a7779", "renesas,rcar-sata";
+		reg = <0xfc600000 0x2000>;
+		interrupts = <0 100 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&mstp1_clks R8A7779_CLK_SATA>;
+		power-domains = <&cpg_clocks>;
 	};
diff --git a/Documentation/devicetree/bindings/clock/renesas,rcar-gen2-cpg-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,rcar-gen2-cpg-clocks.txt
index 56f111b..2a9a8ed 100644
--- a/Documentation/devicetree/bindings/clock/renesas,rcar-gen2-cpg-clocks.txt
+++ b/Documentation/devicetree/bindings/clock/renesas,rcar-gen2-cpg-clocks.txt
@@ -2,6 +2,8 @@
 
 The CPG generates core clocks for the R-Car Gen2 SoCs. It includes three PLLs
 and several fixed ratio dividers.
+The CPG also provides a Clock Domain for SoC devices, in combination with the
+CPG Module Stop (MSTP) Clocks.
 
 Required Properties:
 
@@ -20,10 +22,18 @@
   - clock-output-names: The names of the clocks. Supported clocks are "main",
     "pll0", "pll1", "pll3", "lb", "qspi", "sdh", "sd0", "sd1", "z", "rcan", and
     "adsp"
+  - #power-domain-cells: Must be 0
+
+SoC devices that are part of the CPG/MSTP Clock Domain and can be power-managed
+through an MSTP clock should refer to the CPG device node in their
+"power-domains" property, as documented by the generic PM domain bindings in
+Documentation/devicetree/bindings/power/power_domain.txt.
 
 
-Example
--------
+Examples
+--------
+
+  - CPG device node:
 
 	cpg_clocks: cpg_clocks@e6150000 {
 		compatible = "renesas,r8a7790-cpg-clocks",
@@ -34,4 +44,16 @@
 		clock-output-names = "main", "pll0, "pll1", "pll3",
 				     "lb", "qspi", "sdh", "sd0", "sd1", "z",
 				     "rcan", "adsp";
+		#power-domain-cells = <0>;
+	};
+
+
+  - CPG/MSTP Clock Domain member device node:
+
+	thermal@e61f0000 {
+		compatible = "renesas,thermal-r8a7790", "renesas,rcar-thermal";
+		reg = <0 0xe61f0000 0 0x14>, <0 0xe61f0100 0 0x38>;
+		interrupts = <0 69 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&mstp5_clks R8A7790_CLK_THERMAL>;
+		power-domains = <&cpg_clocks>;
 	};
diff --git a/Documentation/devicetree/bindings/clock/renesas,rz-cpg-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,rz-cpg-clocks.txt
index b0f7ddb..bb51a33 100644
--- a/Documentation/devicetree/bindings/clock/renesas,rz-cpg-clocks.txt
+++ b/Documentation/devicetree/bindings/clock/renesas,rz-cpg-clocks.txt
@@ -2,6 +2,8 @@
 
 The CPG generates core clocks for the RZ SoCs. It includes the PLL, variable
 CPU and GPU clocks, and several fixed ratio dividers.
+The CPG also provides a Clock Domain for SoC devices, in combination with the
+CPG Module Stop (MSTP) Clocks.
 
 Required Properties:
 
@@ -14,10 +16,18 @@
   - #clock-cells: Must be 1
   - clock-output-names: The names of the clocks. Supported clocks are "pll",
     "i", and "g"
+  - #power-domain-cells: Must be 0
+
+SoC devices that are part of the CPG/MSTP Clock Domain and can be power-managed
+through an MSTP clock should refer to the CPG device node in their
+"power-domains" property, as documented by the generic PM domain bindings in
+Documentation/devicetree/bindings/power/power_domain.txt.
 
 
-Example
--------
+Examples
+--------
+
+  - CPG device node:
 
 	cpg_clocks: cpg_clocks@fcfe0000 {
 		#clock-cells = <1>;
@@ -26,4 +36,19 @@
 		reg = <0xfcfe0000 0x18>;
 		clocks = <&extal_clk>, <&usb_x1_clk>;
 		clock-output-names = "pll", "i", "g";
+		#power-domain-cells = <0>;
+	};
+
+
+  - CPG/MSTP Clock Domain member device node:
+
+	mtu2: timer@fcff0000 {
+		compatible = "renesas,mtu2-r7s72100", "renesas,mtu2";
+		reg = <0xfcff0000 0x400>;
+		interrupts = <0 107 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "tgi0a";
+		clocks = <&mstp3_clks R7S72100_CLK_MTU2>;
+		clock-names = "fck";
+		power-domains = <&cpg_clocks>;
+		status = "disabled";
 	};
diff --git a/Documentation/devicetree/bindings/cpufreq/tegra124-cpufreq.txt b/Documentation/devicetree/bindings/cpufreq/tegra124-cpufreq.txt
new file mode 100644
index 0000000..b1669fb
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/tegra124-cpufreq.txt
@@ -0,0 +1,44 @@
+Tegra124 CPU frequency scaling driver bindings
+----------------------------------------------
+
+Both required and optional properties listed below must be defined
+under node /cpus/cpu@0.
+
+Required properties:
+- clocks: Must contain an entry for each entry in clock-names.
+  See ../clocks/clock-bindings.txt for details.
+- clock-names: Must include the following entries:
+  - cpu_g: Clock mux for the fast CPU cluster.
+  - cpu_lp: Clock mux for the low-power CPU cluster.
+  - pll_x: Fast PLL clocksource.
+  - pll_p: Auxiliary PLL used during fast PLL rate changes.
+  - dfll: Fast DFLL clocksource that also automatically scales CPU voltage.
+- vdd-cpu-supply: Regulator for CPU voltage
+
+Optional properties:
+- clock-latency: Specify the possible maximum transition latency for clock,
+  in unit of nanoseconds.
+
+Example:
+--------
+cpus {
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	cpu@0 {
+		device_type = "cpu";
+		compatible = "arm,cortex-a15";
+		reg = <0>;
+
+		clocks = <&tegra_car TEGRA124_CLK_CCLK_G>,
+			 <&tegra_car TEGRA124_CLK_CCLK_LP>,
+			 <&tegra_car TEGRA124_CLK_PLL_X>,
+			 <&tegra_car TEGRA124_CLK_PLL_P>,
+			 <&dfll>;
+		clock-names = "cpu_g", "cpu_lp", "pll_x", "pll_p", "dfll";
+		clock-latency = <300000>;
+		vdd-cpu-supply: <&vdd_cpu>;
+	};
+
+	<...>
+};
diff --git a/Documentation/devicetree/bindings/memory-controllers/arm,pl172.txt b/Documentation/devicetree/bindings/memory-controllers/arm,pl172.txt
new file mode 100644
index 0000000..e6df32f
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/arm,pl172.txt
@@ -0,0 +1,125 @@
+* Device tree bindings for ARM PL172 MultiPort Memory Controller
+
+Required properties:
+
+- compatible:		"arm,pl172", "arm,primecell"
+
+- reg:			Must contains offset/length value for controller.
+
+- #address-cells:	Must be 2. The partition number has to be encoded in the
+			first address cell and it may accept values 0..N-1
+			(N - total number of partitions). The second cell is the
+			offset into the partition.
+
+- #size-cells:		Must be set to 1.
+
+- ranges:		Must contain one or more chip select memory regions.
+
+- clocks:		Must contain references to controller clocks.
+
+- clock-names:		Must contain "mpmcclk" and "apb_pclk".
+
+- clock-ranges:		Empty property indicating that child nodes can inherit
+			named clocks. Required only if clock tree data present
+			in device tree.
+			See clock-bindings.txt
+
+Child chip-select (cs) nodes contain the memory devices nodes connected to
+such as NOR (e.g. cfi-flash) and NAND.
+
+Required child cs node properties:
+
+- #address-cells:	Must be 2.
+
+- #size-cells:		Must be 1.
+
+- ranges:		Empty property indicating that child nodes can inherit
+			memory layout.
+
+- clock-ranges:		Empty property indicating that child nodes can inherit
+			named clocks. Required only if clock tree data present
+			in device tree.
+
+- mpmc,cs:		Chip select number. Indicates to the pl0172 driver
+			which chipselect is used for accessing the memory.
+
+- mpmc,memory-width:	Width of the chip select memory. Must be equal to
+			either 8, 16 or 32.
+
+Optional child cs node config properties:
+
+- mpmc,async-page-mode:	Enable asynchronous page mode.
+
+- mpmc,cs-active-high:	Set chip select polarity to active high.
+
+- mpmc,byte-lane-low:	Set byte lane state to low.
+
+- mpmc,extended-wait:	Enable extended wait.
+
+- mpmc,buffer-enable:	Enable write buffer.
+
+- mpmc,write-protect:	Enable write protect.
+
+Optional child cs node timing properties:
+
+- mpmc,write-enable-delay:	Delay from chip select assertion to write
+				enable (WE signal) in nano seconds.
+
+- mpmc,output-enable-delay:	Delay from chip select assertion to output
+				enable (OE signal) in nano seconds.
+
+- mpmc,write-access-delay:	Delay from chip select assertion to write
+				access in nano seconds.
+
+- mpmc,read-access-delay:	Delay from chip select assertion to read
+				access in nano seconds.
+
+- mpmc,page-mode-read-delay:	Delay for asynchronous page mode sequential
+				accesses in nano seconds.
+
+- mpmc,turn-round-delay:	Delay between access to memory banks in nano
+				seconds.
+
+If any of the above timing parameters are absent, current parameter value will
+be taken from the corresponding HW reg.
+
+Example for pl172 with nor flash on chip select 0 shown below.
+
+emc: memory-controller@40005000 {
+	compatible = "arm,pl172", "arm,primecell";
+	reg = <0x40005000 0x1000>;
+	clocks = <&ccu1 CLK_CPU_EMCDIV>, <&ccu1 CLK_CPU_EMC>;
+	clock-names = "mpmcclk", "apb_pclk";
+	#address-cells = <2>;
+	#size-cells = <1>;
+	ranges = <0 0 0x1c000000 0x1000000
+		  1 0 0x1d000000 0x1000000
+		  2 0 0x1e000000 0x1000000
+		  3 0 0x1f000000 0x1000000>;
+
+	cs0 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+
+		mpmc,cs = <0>;
+		mpmc,memory-width = <16>;
+		mpmc,byte-lane-low;
+		mpmc,write-enable-delay = <0>;
+		mpmc,output-enable-delay = <0>;
+		mpmc,read-enable-delay = <70>;
+		mpmc,page-mode-read-delay = <70>;
+
+		flash@0,0 {
+			compatible = "sst,sst39vf320", "cfi-flash";
+			reg = <0 0 0x400000>;
+			bank-width = <2>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			partition@0 {
+				label = "data";
+				reg = <0 0x400000>;
+			};
+		};
+	};
+};
diff --git a/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt b/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt
index 938f8e1..0db6047 100644
--- a/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt
+++ b/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt
@@ -8,6 +8,7 @@
 Required properties:
 - compatible	: Should be of the form "ti,emif-<ip-rev>" where <ip-rev>
   is the IP revision of the specific EMIF instance.
+		  For am437x should be ti,emif-am4372.
 
 - phy-type	: <u32> indicating the DDR phy type. Following are the
   allowed values
diff --git a/Documentation/devicetree/bindings/reset/ath79-reset.txt b/Documentation/devicetree/bindings/reset/ath79-reset.txt
new file mode 100644
index 0000000..4c56330
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/ath79-reset.txt
@@ -0,0 +1,20 @@
+Binding for Qualcomm Atheros AR7xxx/AR9XXX reset controller
+
+Please also refer to reset.txt in this directory for common reset
+controller binding usage.
+
+Required Properties:
+- compatible: has to be "qca,<soctype>-reset", "qca,ar7100-reset"
+              as fallback
+- reg: Base address and size of the controllers memory area
+- #reset-cells : Specifies the number of cells needed to encode reset
+                 line, should be 1
+
+Example:
+
+	reset-controller@1806001c {
+		compatible = "qca,ar9132-reset", "qca,ar7100-reset";
+		reg = <0x1806001c 0x4>;
+
+		#reset-cells = <1>;
+	};
diff --git a/Documentation/devicetree/bindings/reset/nxp,lpc1850-rgu.txt b/Documentation/devicetree/bindings/reset/nxp,lpc1850-rgu.txt
new file mode 100644
index 0000000..b4e96a2
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/nxp,lpc1850-rgu.txt
@@ -0,0 +1,84 @@
+NXP LPC1850  Reset Generation Unit (RGU)
+========================================
+
+Please also refer to reset.txt in this directory for common reset
+controller binding usage.
+
+Required properties:
+- compatible: Should be "nxp,lpc1850-rgu"
+- reg: register base and length
+- clocks: phandle and clock specifier to RGU clocks
+- clock-names: should contain "delay" and "reg"
+- #reset-cells: should be 1
+
+See table below for valid peripheral reset numbers. Numbers not
+in the table below are either reserved or not applicable for
+normal operation.
+
+Reset	Peripheral
+  9	System control unit (SCU)
+ 12	ARM Cortex-M0 subsystem core (LPC43xx only)
+ 13	CPU core
+ 16	LCD controller
+ 17	USB0
+ 18	USB1
+ 19	DMA
+ 20	SDIO
+ 21	External memory controller (EMC)
+ 22	Ethernet
+ 25	Flash bank A
+ 27	EEPROM
+ 28	GPIO
+ 29	Flash bank B
+ 32	Timer0
+ 33	Timer1
+ 34	Timer2
+ 35	Timer3
+ 36	Repetitive Interrupt timer (RIT)
+ 37	State Configurable Timer (SCT)
+ 38	Motor control PWM (MCPWM)
+ 39	QEI
+ 40	ADC0
+ 41	ADC1
+ 42	DAC
+ 44	USART0
+ 45	UART1
+ 46	USART2
+ 47	USART3
+ 48	I2C0
+ 49	I2C1
+ 50	SSP0
+ 51	SSP1
+ 52	I2S0 and I2S1
+ 53	Serial Flash Interface (SPIFI)
+ 54	C_CAN1
+ 55	C_CAN0
+ 56	ARM Cortex-M0 application core (LPC4370 only)
+ 57	SGPIO (LPC43xx only)
+ 58	SPI (LPC43xx only)
+ 60	ADCHS (12-bit ADC) (LPC4370 only)
+
+Refer to NXP LPC18xx or LPC43xx user manual for more details about
+the reset signals and the connected block/peripheral.
+
+Reset provider example:
+rgu: reset-controller@40053000 {
+	compatible = "nxp,lpc1850-rgu";
+	reg = <0x40053000 0x1000>;
+	clocks = <&cgu BASE_SAFE_CLK>, <&ccu1 CLK_CPU_BUS>;
+	clock-names = "delay", "reg";
+	#reset-cells = <1>;
+};
+
+Reset consumer example:
+mac: ethernet@40010000 {
+	compatible = "nxp,lpc1850-dwmac", "snps,dwmac-3.611", "snps,dwmac";
+	reg = <0x40010000 0x2000>;
+	interrupts = <5>;
+	interrupt-names = "macirq";
+	clocks = <&ccu1 CLK_CPU_ETHERNET>;
+	clock-names = "stmmaceth";
+	resets = <&rgu 22>;
+	reset-names = "stmmaceth";
+	status = "disabled";
+};
diff --git a/Documentation/devicetree/bindings/reset/st,sti-picophyreset.txt b/Documentation/devicetree/bindings/reset/st,sti-picophyreset.txt
index 54ae9f7..9ca2776 100644
--- a/Documentation/devicetree/bindings/reset/st,sti-picophyreset.txt
+++ b/Documentation/devicetree/bindings/reset/st,sti-picophyreset.txt
@@ -39,4 +39,4 @@
 	};
 
 Macro definitions for the supported reset channels can be found in:
-include/dt-bindings/reset-controller/stih407-resets.h
+include/dt-bindings/reset/stih407-resets.h
diff --git a/Documentation/devicetree/bindings/reset/st,sti-powerdown.txt b/Documentation/devicetree/bindings/reset/st,sti-powerdown.txt
index 5ab26b7..1cfd21d 100644
--- a/Documentation/devicetree/bindings/reset/st,sti-powerdown.txt
+++ b/Documentation/devicetree/bindings/reset/st,sti-powerdown.txt
@@ -43,5 +43,5 @@
 
 Macro definitions for the supported reset channels can be found in:
 
-include/dt-bindings/reset-controller/stih415-resets.h
-include/dt-bindings/reset-controller/stih416-resets.h
+include/dt-bindings/reset/stih415-resets.h
+include/dt-bindings/reset/stih416-resets.h
diff --git a/Documentation/devicetree/bindings/reset/st,sti-softreset.txt b/Documentation/devicetree/bindings/reset/st,sti-softreset.txt
index a8d3d3c..891a2fd 100644
--- a/Documentation/devicetree/bindings/reset/st,sti-softreset.txt
+++ b/Documentation/devicetree/bindings/reset/st,sti-softreset.txt
@@ -42,5 +42,5 @@
 
 Macro definitions for the supported reset channels can be found in:
 
-include/dt-bindings/reset-controller/stih415-resets.h
-include/dt-bindings/reset-controller/stih416-resets.h
+include/dt-bindings/reset/stih415-resets.h
+include/dt-bindings/reset/stih416-resets.h
diff --git a/Documentation/devicetree/bindings/reset/zynq-reset.txt b/Documentation/devicetree/bindings/reset/zynq-reset.txt
new file mode 100644
index 0000000..5860120
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/zynq-reset.txt
@@ -0,0 +1,68 @@
+Xilinx Zynq Reset Manager
+
+The Zynq AP-SoC has several different resets.
+
+See Chapter 26 of the Zynq TRM (UG585) for more information about Zynq resets.
+
+Required properties:
+- compatible: "xlnx,zynq-reset"
+- reg: SLCR offset and size taken via syscon <0x200 0x48>
+- syscon: <&slcr>
+  This should be a phandle to the Zynq's SLCR registers.
+- #reset-cells: Must be 1
+
+The Zynq Reset Manager needs to be a childnode of the SLCR.
+
+Example:
+	rstc: rstc@200 {
+		compatible = "xlnx,zynq-reset";
+		reg = <0x200 0x48>;
+		#reset-cells = <1>;
+		syscon = <&slcr>;
+	};
+
+Reset outputs:
+ 0  : soft reset
+ 32 : ddr reset
+ 64 : topsw reset
+ 96 : dmac reset
+ 128: usb0 reset
+ 129: usb1 reset
+ 160: gem0 reset
+ 161: gem1 reset
+ 164: gem0 rx reset
+ 165: gem1 rx reset
+ 166: gem0 ref reset
+ 167: gem1 ref reset
+ 192: sdio0 reset
+ 193: sdio1 reset
+ 196: sdio0 ref reset
+ 197: sdio1 ref reset
+ 224: spi0 reset
+ 225: spi1 reset
+ 226: spi0 ref reset
+ 227: spi1 ref reset
+ 256: can0 reset
+ 257: can1 reset
+ 258: can0 ref reset
+ 259: can1 ref reset
+ 288: i2c0 reset
+ 289: i2c1 reset
+ 320: uart0 reset
+ 321: uart1 reset
+ 322: uart0 ref reset
+ 323: uart1 ref reset
+ 352: gpio reset
+ 384: lqspi reset
+ 385: qspi ref reset
+ 416: smc reset
+ 417: smc ref reset
+ 448: ocm reset
+ 512: fpga0 out reset
+ 513: fpga1 out reset
+ 514: fpga2 out reset
+ 515: fpga3 out reset
+ 544: a9 reset 0
+ 545: a9 reset 1
+ 552: peri reset
+
diff --git a/Documentation/devicetree/bindings/soc/qcom,smd-rpm.txt b/Documentation/devicetree/bindings/soc/qcom,smd-rpm.txt
new file mode 100644
index 0000000..e27f5c4
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/qcom,smd-rpm.txt
@@ -0,0 +1,117 @@
+Qualcomm Resource Power Manager (RPM) over SMD
+
+This driver is used to interface with the Resource Power Manager (RPM) found in
+various Qualcomm platforms. The RPM allows each component in the system to vote
+for state of the system resources, such as clocks, regulators and bus
+frequencies.
+
+- compatible:
+	Usage: required
+	Value type: <string>
+	Definition: must be one of:
+		    "qcom,rpm-msm8974"
+
+- qcom,smd-channels:
+	Usage: required
+	Value type: <stringlist>
+	Definition: Shared Memory channel used for communication with the RPM
+
+= SUBDEVICES
+
+The RPM exposes resources to its subnodes. The below bindings specify the set
+of valid subnodes that can operate on these resources.
+
+== Regulators
+
+Regulator nodes are identified by their compatible:
+
+- compatible:
+	Usage: required
+	Value type: <string>
+	Definition: must be one of:
+		    "qcom,rpm-pm8841-regulators"
+		    "qcom,rpm-pm8941-regulators"
+
+- vdd_s1-supply:
+- vdd_s2-supply:
+- vdd_s3-supply:
+- vdd_s4-supply:
+- vdd_s5-supply:
+- vdd_s6-supply:
+- vdd_s7-supply:
+- vdd_s8-supply:
+	Usage: optional (pm8841 only)
+	Value type: <phandle>
+	Definition: reference to regulator supplying the input pin, as
+		    described in the data sheet
+
+- vdd_s1-supply:
+- vdd_s2-supply:
+- vdd_s3-supply:
+- vdd_l1_l3-supply:
+- vdd_l2_lvs1_2_3-supply:
+- vdd_l4_l11-supply:
+- vdd_l5_l7-supply:
+- vdd_l6_l12_l14_l15-supply:
+- vdd_l8_l16_l18_l19-supply:
+- vdd_l9_l10_l17_l22-supply:
+- vdd_l13_l20_l23_l24-supply:
+- vdd_l21-supply:
+- vin_5vs-supply:
+	Usage: optional (pm8941 only)
+	Value type: <phandle>
+	Definition: reference to regulator supplying the input pin, as
+		    described in the data sheet
+
+The regulator node houses sub-nodes for each regulator within the device. Each
+sub-node is identified using the node's name, with valid values listed for each
+of the pmics below.
+
+pm8841:
+	s1, s2, s3, s4, s5, s6, s7, s8
+
+pm8941:
+	s1, s2, s3, s4, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13,
+	l14, l15, l16, l17, l18, l19, l20, l21, l22, l23, l24, lvs1, lvs2,
+	lvs3, 5vs1, 5vs2
+
+The content of each sub-node is defined by the standard binding for regulators -
+see regulator.txt.
+
+= EXAMPLE
+
+	smd {
+		compatible = "qcom,smd";
+
+		rpm {
+			interrupts = <0 168 1>;
+			qcom,ipc = <&apcs 8 0>;
+			qcom,smd-edge = <15>;
+
+			rpm_requests {
+				compatible = "qcom,rpm-msm8974";
+				qcom,smd-channels = "rpm_requests";
+
+				pm8941-regulators {
+					compatible = "qcom,rpm-pm8941-regulators";
+					vdd_l13_l20_l23_l24-supply = <&pm8941_boost>;
+
+					pm8941_s3: s3 {
+						regulator-min-microvolt = <1800000>;
+						regulator-max-microvolt = <1800000>;
+					};
+
+					pm8941_boost: s4 {
+						regulator-min-microvolt = <5000000>;
+						regulator-max-microvolt = <5000000>;
+					};
+
+					pm8941_l20: l20 {
+						regulator-min-microvolt = <2950000>;
+						regulator-max-microvolt = <2950000>;
+					};
+				};
+			};
+		};
+	};
+
diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,smd.txt b/Documentation/devicetree/bindings/soc/qcom/qcom,smd.txt
new file mode 100644
index 0000000..f65c76d
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,smd.txt
@@ -0,0 +1,79 @@
+Qualcomm Shared Memory Driver (SMD) binding
+
+This binding describes the Qualcomm Shared Memory Driver, a fifo based
+communication channel for sending data between the various subsystems in
+Qualcomm platforms.
+
+- compatible:
+	Usage: required
+	Value type: <stringlist>
+	Definition: must be "qcom,smd"
+
+= EDGES
+
+Each subnode of the SMD node represents a remote subsystem or a remote
+processor of some sort - or in SMD language an "edge". The name of the edges
+are not important.
+The edge is described by the following properties:
+
+- interrupts:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: should specify the IRQ used by the remote processor to
+		    signal this processor about communication related updates
+
+- qcom,ipc:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: three entries specifying the outgoing ipc bit used for
+		    signaling the remote processor:
+		    - phandle to a syscon node representing the apcs registers
+		    - u32 representing offset to the register within the syscon
+		    - u32 representing the ipc bit within the register
+
+- qcom,smd-edge:
+	Usage: required
+	Value type: <u32>
+	Definition: the identifier of the remote processor in the smd channel
+		    allocation table
+
+= SMD DEVICES
+
+In turn, subnodes of the "edges" represent devices tied to SMD channels on that
+"edge". The names of the devices are not important. The properties of these
+nodes are defined by the individual bindings for the SMD devices - but must
+contain the following property:
+
+- qcom,smd-channels:
+	Usage: required
+	Value type: <stringlist>
+	Definition: a list of channels tied to this device, used for matching
+		    the device to channels
+
+= EXAMPLE
+
+The following example represents a smd node, with one edge representing the
+"rpm" subsystem. For the "rpm" subsystem we have a device tied to the
+"rpm_request" channel.
+
+	apcs: syscon@f9011000 {
+		compatible = "syscon";
+		reg = <0xf9011000 0x1000>;
+	};
+
+	smd {
+		compatible = "qcom,smd";
+
+		rpm {
+			interrupts = <0 168 1>;
+			qcom,ipc = <&apcs 8 0>;
+			qcom,smd-edge = <15>;
+
+			rpm_requests {
+				compatible = "qcom,rpm-msm8974";
+				qcom,smd-channels = "rpm_requests";
+
+				...
+			};
+		};
+	};
diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt
index f732a83..8cc17ca 100644
--- a/Documentation/power/swsusp.txt
+++ b/Documentation/power/swsusp.txt
@@ -410,8 +410,17 @@
 
 Q: Can I suspend-to-disk using a swap partition under LVM?
 
-A: No. You can suspend successfully, but you'll not be able to
-resume. uswsusp should be able to work with LVM. See suspend.sf.net.
+A: Yes and No.  You can suspend successfully, but the kernel will not be able
+to resume on its own.  You need an initramfs that can recognize the resume
+situation, activate the logical volume containing the swap volume (but not
+touch any filesystems!), and eventually call
+
+echo -n "$major:$minor" > /sys/power/resume
+
+where $major and $minor are the respective major and minor device numbers of
+the swap volume.
+
+uswsusp works with LVM, too.  See http://suspend.sourceforge.net/
 
 Q: I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were
 compiled with the similar configuration files. Anyway I found that
diff --git a/MAINTAINERS b/MAINTAINERS
index 8133cef..e44c23f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1614,6 +1614,7 @@
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/boot/dts/vexpress*
+F:	arch/arm64/boot/dts/arm/vexpress*
 F:	arch/arm/mach-vexpress/
 F:	*/*/vexpress*
 F:	*/*/*/vexpress*
@@ -2562,19 +2563,31 @@
 F:	arch/powerpc/oprofile/*cell*
 F:	arch/powerpc/platforms/cell/
 
-CEPH DISTRIBUTED FILE SYSTEM CLIENT
+CEPH COMMON CODE (LIBCEPH)
+M:	Ilya Dryomov <idryomov@gmail.com>
 M:	"Yan, Zheng" <zyan@redhat.com>
 M:	Sage Weil <sage@redhat.com>
 L:	ceph-devel@vger.kernel.org
 W:	http://ceph.com/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
+T:	git git://github.com/ceph/ceph-client.git
 S:	Supported
-F:	Documentation/filesystems/ceph.txt
-F:	fs/ceph/
 F:	net/ceph/
 F:	include/linux/ceph/
 F:	include/linux/crush/
 
+CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH)
+M:	"Yan, Zheng" <zyan@redhat.com>
+M:	Sage Weil <sage@redhat.com>
+M:	Ilya Dryomov <idryomov@gmail.com>
+L:	ceph-devel@vger.kernel.org
+W:	http://ceph.com/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
+T:	git git://github.com/ceph/ceph-client.git
+S:	Supported
+F:	Documentation/filesystems/ceph.txt
+F:	fs/ceph/
+
 CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM:
 L:	linux-usb@vger.kernel.org
 S:	Orphan
@@ -6147,6 +6160,7 @@
 Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 S:	Supported
 F:	drivers/nvdimm/pmem.c
+F:	include/linux/pmem.h
 
 LINUX FOR IBM pSERIES (RS/6000)
 M:	Paul Mackerras <paulus@au.ibm.com>
@@ -6161,7 +6175,7 @@
 W:	http://www.penguinppc.org/
 L:	linuxppc-dev@lists.ozlabs.org
 Q:	http://patchwork.ozlabs.org/project/linuxppc-dev/list/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git
 S:	Supported
 F:	Documentation/powerpc/
 F:	arch/powerpc/
@@ -8366,10 +8380,12 @@
 M:	Ilya Dryomov <idryomov@gmail.com>
 M:	Sage Weil <sage@redhat.com>
 M:	Alex Elder <elder@kernel.org>
-M:	ceph-devel@vger.kernel.org
+L:	ceph-devel@vger.kernel.org
 W:	http://ceph.com/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
+T:	git git://github.com/ceph/ceph-client.git
 S:	Supported
+F:	Documentation/ABI/testing/sysfs-bus-rbd
 F:	drivers/block/rbd.c
 F:	drivers/block/rbd_types.h
 
@@ -8537,6 +8553,7 @@
 S:	Maintained
 F:	drivers/reset/
 F:	Documentation/devicetree/bindings/reset/
+F:	include/dt-bindings/reset/
 F:	include/linux/reset.h
 F:	include/linux/reset-controller.h
 
diff --git a/Makefile b/Makefile
index 13270c0..257ef58 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 2
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Hurr durr I'ma sheep
 
 # *DOCUMENTATION*
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a750c14..1c50210 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1693,6 +1693,12 @@
 config HIGHPTE
 	bool "Allocate 2nd-level pagetables from highmem"
 	depends on HIGHMEM
+	help
+	  The VM uses one page of physical memory for each page table.
+	  For systems with a lot of processes, this can use a lot of
+	  precious low memory, eventually leading to low memory being
+	  consumed by page tables.  Setting this option will allow
+	  user-space 2nd level page tables to reside in high memory.
 
 config HW_PERF_EVENTS
 	bool "Enable hardware performance counter support for perf events"
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index f1b1579..a2e16f9 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -1635,7 +1635,7 @@
 
 config DEBUG_SET_MODULE_RONX
 	bool "Set loadable kernel module data as NX and text as RO"
-	depends on MODULES
+	depends on MODULES && MMU
 	---help---
 	  This option helps catch unintended modifications to loadable
 	  kernel module's text and read-only data. It also prevents execution
diff --git a/arch/arm/boot/dts/am335x-boneblack.dts b/arch/arm/boot/dts/am335x-boneblack.dts
index 901739f..5c42d25 100644
--- a/arch/arm/boot/dts/am335x-boneblack.dts
+++ b/arch/arm/boot/dts/am335x-boneblack.dts
@@ -80,3 +80,7 @@
 		status = "okay";
 	};
 };
+
+&rtc {
+	system-power-controller;
+};
diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index c80a3e2..ade28c79 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@@ -132,6 +132,12 @@
 			};
 		};
 
+		emif: emif@4c000000 {
+			compatible = "ti,emif-am4372";
+			reg = <0x4c000000 0x1000000>;
+			ti,hwmods = "emif";
+		};
+
 		edma: edma@49000000 {
 			compatible = "ti,edma3";
 			ti,hwmods = "tpcc", "tptc0", "tptc1", "tptc2";
@@ -941,6 +947,7 @@
 				ti,hwmods = "dss_rfbi";
 				clocks = <&disp_clk>;
 				clock-names = "fck";
+				status = "disabled";
 			};
 		};
 
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts
index a42cc37..a63bf78 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15.dts
+++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts
@@ -605,6 +605,10 @@
 	phy-supply = <&ldousb_reg>;
 };
 
+&usb2_phy2 {
+	phy-supply = <&ldousb_reg>;
+};
+
 &usb1 {
 	dr_mode = "host";
 	pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/atlas7.dtsi b/arch/arm/boot/dts/atlas7.dtsi
index 5dfd3a4..3e21311 100644
--- a/arch/arm/boot/dts/atlas7.dtsi
+++ b/arch/arm/boot/dts/atlas7.dtsi
@@ -135,6 +135,1025 @@
 			compatible = "sirf,atlas7-ioc";
 			reg = <0x18880000 0x1000>,
 				<0x10E40000 0x1000>;
+
+			audio_ac97_pmx: audio_ac97@0 {
+				audio_ac97 {
+					groups = "audio_ac97_grp";
+					function = "audio_ac97";
+				};
+			};
+
+			audio_func_dbg_pmx: audio_func_dbg@0 {
+				audio_func_dbg {
+					groups = "audio_func_dbg_grp";
+					function = "audio_func_dbg";
+				};
+			};
+
+			audio_i2s_pmx: audio_i2s@0 {
+				audio_i2s {
+					groups = "audio_i2s_grp";
+					function = "audio_i2s";
+				};
+			};
+
+			audio_i2s_2ch_pmx: audio_i2s_2ch@0 {
+				audio_i2s_2ch {
+					groups = "audio_i2s_2ch_grp";
+					function = "audio_i2s_2ch";
+				};
+			};
+
+			audio_i2s_extclk_pmx: audio_i2s_extclk@0 {
+				audio_i2s_extclk {
+					groups = "audio_i2s_extclk_grp";
+					function = "audio_i2s_extclk";
+				};
+			};
+
+			audio_uart0_pmx: audio_uart0@0 {
+				audio_uart0 {
+					groups = "audio_uart0_grp";
+					function = "audio_uart0";
+				};
+			};
+
+			audio_uart1_pmx: audio_uart1@0 {
+				audio_uart1 {
+					groups = "audio_uart1_grp";
+					function = "audio_uart1";
+				};
+			};
+
+			audio_uart2_pmx0: audio_uart2@0 {
+				audio_uart2_0 {
+					groups = "audio_uart2_grp0";
+					function = "audio_uart2_m0";
+				};
+			};
+
+			audio_uart2_pmx1: audio_uart2@1 {
+				audio_uart2_1 {
+					groups = "audio_uart2_grp1";
+					function = "audio_uart2_m1";
+				};
+			};
+
+			c_can_trnsvr_pmx: c_can_trnsvr@0 {
+				c_can_trnsvr {
+					groups = "c_can_trnsvr_grp";
+					function = "c_can_trnsvr";
+				};
+			};
+
+			c0_can_pmx0: c0_can@0 {
+				c0_can_0 {
+					groups = "c0_can_grp0";
+					function = "c0_can_m0";
+				};
+			};
+
+			c0_can_pmx1: c0_can@1 {
+				c0_can_1 {
+					groups = "c0_can_grp1";
+					function = "c0_can_m1";
+				};
+			};
+
+			c1_can_pmx0: c1_can@0 {
+				c1_can_0 {
+					groups = "c1_can_grp0";
+					function = "c1_can_m0";
+				};
+			};
+
+			c1_can_pmx1: c1_can@1 {
+				c1_can_1 {
+					groups = "c1_can_grp1";
+					function = "c1_can_m1";
+				};
+			};
+
+			c1_can_pmx2: c1_can@2 {
+				c1_can_2 {
+					groups = "c1_can_grp2";
+					function = "c1_can_m2";
+				};
+			};
+
+			ca_audio_lpc_pmx: ca_audio_lpc@0 {
+				ca_audio_lpc {
+					groups = "ca_audio_lpc_grp";
+					function = "ca_audio_lpc";
+				};
+			};
+
+			ca_bt_lpc_pmx: ca_bt_lpc@0 {
+				ca_bt_lpc {
+					groups = "ca_bt_lpc_grp";
+					function = "ca_bt_lpc";
+				};
+			};
+
+			ca_coex_pmx: ca_coex@0 {
+				ca_coex {
+					groups = "ca_coex_grp";
+					function = "ca_coex";
+				};
+			};
+
+			ca_curator_lpc_pmx: ca_curator_lpc@0 {
+				ca_curator_lpc {
+					groups = "ca_curator_lpc_grp";
+					function = "ca_curator_lpc";
+				};
+			};
+
+			ca_pcm_debug_pmx: ca_pcm_debug@0 {
+				ca_pcm_debug {
+					groups = "ca_pcm_debug_grp";
+					function = "ca_pcm_debug";
+				};
+			};
+
+			ca_pio_pmx: ca_pio@0 {
+				ca_pio {
+					groups = "ca_pio_grp";
+					function = "ca_pio";
+				};
+			};
+
+			ca_sdio_debug_pmx: ca_sdio_debug@0 {
+				ca_sdio_debug {
+					groups = "ca_sdio_debug_grp";
+					function = "ca_sdio_debug";
+				};
+			};
+
+			ca_spi_pmx: ca_spi@0 {
+				ca_spi {
+					groups = "ca_spi_grp";
+					function = "ca_spi";
+				};
+			};
+
+			ca_trb_pmx: ca_trb@0 {
+				ca_trb {
+					groups = "ca_trb_grp";
+					function = "ca_trb";
+				};
+			};
+
+			ca_uart_debug_pmx: ca_uart_debug@0 {
+				ca_uart_debug {
+					groups = "ca_uart_debug_grp";
+					function = "ca_uart_debug";
+				};
+			};
+
+			clkc_pmx0: clkc@0 {
+				clkc_0 {
+					groups = "clkc_grp0";
+					function = "clkc_m0";
+				};
+			};
+
+			clkc_pmx1: clkc@1 {
+				clkc_1 {
+					groups = "clkc_grp1";
+					function = "clkc_m1";
+				};
+			};
+
+			gn_gnss_i2c_pmx: gn_gnss_i2c@0 {
+				gn_gnss_i2c {
+					groups = "gn_gnss_i2c_grp";
+					function = "gn_gnss_i2c";
+				};
+			};
+
+			gn_gnss_uart_nopause_pmx: gn_gnss_uart_nopause@0 {
+				gn_gnss_uart_nopause {
+					groups = "gn_gnss_uart_nopause_grp";
+					function = "gn_gnss_uart_nopause";
+				};
+			};
+
+			gn_gnss_uart_pmx: gn_gnss_uart@0 {
+				gn_gnss_uart {
+					groups = "gn_gnss_uart_grp";
+					function = "gn_gnss_uart";
+				};
+			};
+
+			gn_trg_spi_pmx0: gn_trg_spi@0 {
+				gn_trg_spi_0 {
+					groups = "gn_trg_spi_grp0";
+					function = "gn_trg_spi_m0";
+				};
+			};
+
+			gn_trg_spi_pmx1: gn_trg_spi@1 {
+				gn_trg_spi_1 {
+					groups = "gn_trg_spi_grp1";
+					function = "gn_trg_spi_m1";
+				};
+			};
+
+			cvbs_dbg_pmx: cvbs_dbg@0 {
+				cvbs_dbg {
+					groups = "cvbs_dbg_grp";
+					function = "cvbs_dbg";
+				};
+			};
+
+			cvbs_dbg_test_pmx0: cvbs_dbg_test@0 {
+				cvbs_dbg_test_0 {
+					groups = "cvbs_dbg_test_grp0";
+					function = "cvbs_dbg_test_m0";
+				};
+			};
+
+			cvbs_dbg_test_pmx1: cvbs_dbg_test@1 {
+				cvbs_dbg_test_1 {
+					groups = "cvbs_dbg_test_grp1";
+					function = "cvbs_dbg_test_m1";
+				};
+			};
+
+			cvbs_dbg_test_pmx2: cvbs_dbg_test@2 {
+				cvbs_dbg_test_2 {
+					groups = "cvbs_dbg_test_grp2";
+					function = "cvbs_dbg_test_m2";
+				};
+			};
+
+			cvbs_dbg_test_pmx3: cvbs_dbg_test@3 {
+				cvbs_dbg_test_3 {
+					groups = "cvbs_dbg_test_grp3";
+					function = "cvbs_dbg_test_m3";
+				};
+			};
+
+			cvbs_dbg_test_pmx4: cvbs_dbg_test@4 {
+				cvbs_dbg_test_4 {
+					groups = "cvbs_dbg_test_grp4";
+					function = "cvbs_dbg_test_m4";
+				};
+			};
+
+			cvbs_dbg_test_pmx5: cvbs_dbg_test@5 {
+				cvbs_dbg_test_5 {
+					groups = "cvbs_dbg_test_grp5";
+					function = "cvbs_dbg_test_m5";
+				};
+			};
+
+			cvbs_dbg_test_pmx6: cvbs_dbg_test@6 {
+				cvbs_dbg_test_6 {
+					groups = "cvbs_dbg_test_grp6";
+					function = "cvbs_dbg_test_m6";
+				};
+			};
+
+			cvbs_dbg_test_pmx7: cvbs_dbg_test@7 {
+				cvbs_dbg_test_7 {
+					groups = "cvbs_dbg_test_grp7";
+					function = "cvbs_dbg_test_m7";
+				};
+			};
+
+			cvbs_dbg_test_pmx8: cvbs_dbg_test@8 {
+				cvbs_dbg_test_8 {
+					groups = "cvbs_dbg_test_grp8";
+					function = "cvbs_dbg_test_m8";
+				};
+			};
+
+			cvbs_dbg_test_pmx9: cvbs_dbg_test@9 {
+				cvbs_dbg_test_9 {
+					groups = "cvbs_dbg_test_grp9";
+					function = "cvbs_dbg_test_m9";
+				};
+			};
+
+			cvbs_dbg_test_pmx10: cvbs_dbg_test@10 {
+				cvbs_dbg_test_10 {
+					groups = "cvbs_dbg_test_grp10";
+					function = "cvbs_dbg_test_m10";
+				};
+			};
+
+			cvbs_dbg_test_pmx11: cvbs_dbg_test@11 {
+				cvbs_dbg_test_11 {
+					groups = "cvbs_dbg_test_grp11";
+					function = "cvbs_dbg_test_m11";
+				};
+			};
+
+			cvbs_dbg_test_pmx12: cvbs_dbg_test@12 {
+				cvbs_dbg_test_12 {
+					groups = "cvbs_dbg_test_grp12";
+					function = "cvbs_dbg_test_m12";
+				};
+			};
+
+			cvbs_dbg_test_pmx13: cvbs_dbg_test@13 {
+				cvbs_dbg_test_13 {
+					groups = "cvbs_dbg_test_grp13";
+					function = "cvbs_dbg_test_m13";
+				};
+			};
+
+			cvbs_dbg_test_pmx14: cvbs_dbg_test@14 {
+				cvbs_dbg_test_14 {
+					groups = "cvbs_dbg_test_grp14";
+					function = "cvbs_dbg_test_m14";
+				};
+			};
+
+			cvbs_dbg_test_pmx15: cvbs_dbg_test@15 {
+				cvbs_dbg_test_15 {
+					groups = "cvbs_dbg_test_grp15";
+					function = "cvbs_dbg_test_m15";
+				};
+			};
+
+			gn_gnss_power_pmx: gn_gnss_power@0 {
+				gn_gnss_power {
+					groups = "gn_gnss_power_grp";
+					function = "gn_gnss_power";
+				};
+			};
+
+			gn_gnss_sw_status_pmx: gn_gnss_sw_status@0 {
+				gn_gnss_sw_status {
+					groups = "gn_gnss_sw_status_grp";
+					function = "gn_gnss_sw_status";
+				};
+			};
+
+			gn_gnss_eclk_pmx: gn_gnss_eclk@0 {
+				gn_gnss_eclk {
+					groups = "gn_gnss_eclk_grp";
+					function = "gn_gnss_eclk";
+				};
+			};
+
+			gn_gnss_irq1_pmx0: gn_gnss_irq1@0 {
+				gn_gnss_irq1_0 {
+					groups = "gn_gnss_irq1_grp0";
+					function = "gn_gnss_irq1_m0";
+				};
+			};
+
+			gn_gnss_irq2_pmx0: gn_gnss_irq2@0 {
+				gn_gnss_irq2_0 {
+					groups = "gn_gnss_irq2_grp0";
+					function = "gn_gnss_irq2_m0";
+				};
+			};
+
+			gn_gnss_tm_pmx: gn_gnss_tm@0 {
+				gn_gnss_tm {
+					groups = "gn_gnss_tm_grp";
+					function = "gn_gnss_tm";
+				};
+			};
+
+			gn_gnss_tsync_pmx: gn_gnss_tsync@0 {
+				gn_gnss_tsync {
+					groups = "gn_gnss_tsync_grp";
+					function = "gn_gnss_tsync";
+				};
+			};
+
+			gn_io_gnsssys_sw_cfg_pmx: gn_io_gnsssys_sw_cfg@0 {
+				gn_io_gnsssys_sw_cfg {
+					groups = "gn_io_gnsssys_sw_cfg_grp";
+					function = "gn_io_gnsssys_sw_cfg";
+				};
+			};
+
+			gn_trg_pmx0: gn_trg@0 {
+				gn_trg_0 {
+					groups = "gn_trg_grp0";
+					function = "gn_trg_m0";
+				};
+			};
+
+			gn_trg_pmx1: gn_trg@1 {
+				gn_trg_1 {
+					groups = "gn_trg_grp1";
+					function = "gn_trg_m1";
+				};
+			};
+
+			gn_trg_shutdown_pmx0: gn_trg_shutdown@0 {
+				gn_trg_shutdown_0 {
+					groups = "gn_trg_shutdown_grp0";
+					function = "gn_trg_shutdown_m0";
+				};
+			};
+
+			gn_trg_shutdown_pmx1: gn_trg_shutdown@1 {
+				gn_trg_shutdown_1 {
+					groups = "gn_trg_shutdown_grp1";
+					function = "gn_trg_shutdown_m1";
+				};
+			};
+
+			gn_trg_shutdown_pmx2: gn_trg_shutdown@2 {
+				gn_trg_shutdown_2 {
+					groups = "gn_trg_shutdown_grp2";
+					function = "gn_trg_shutdown_m2";
+				};
+			};
+
+			gn_trg_shutdown_pmx3: gn_trg_shutdown@3 {
+				gn_trg_shutdown_3 {
+					groups = "gn_trg_shutdown_grp3";
+					function = "gn_trg_shutdown_m3";
+				};
+			};
+
+			i2c0_pmx: i2c0@0 {
+				i2c0 {
+					groups = "i2c0_grp";
+					function = "i2c0";
+				};
+			};
+
+			i2c1_pmx: i2c1@0 {
+				i2c1 {
+					groups = "i2c1_grp";
+					function = "i2c1";
+				};
+			};
+
+			jtag_pmx0: jtag@0 {
+				jtag_0 {
+					groups = "jtag_grp0";
+					function = "jtag_m0";
+				};
+			};
+
+			ks_kas_spi_pmx0: ks_kas_spi@0 {
+				ks_kas_spi_0 {
+					groups = "ks_kas_spi_grp0";
+					function = "ks_kas_spi_m0";
+				};
+			};
+
+			ld_ldd_pmx: ld_ldd@0 {
+				ld_ldd {
+					groups = "ld_ldd_grp";
+					function = "ld_ldd";
+				};
+			};
+
+			ld_ldd_16bit_pmx: ld_ldd_16bit@0 {
+				ld_ldd_16bit {
+					groups = "ld_ldd_16bit_grp";
+					function = "ld_ldd_16bit";
+				};
+			};
+
+			ld_ldd_fck_pmx: ld_ldd_fck@0 {
+				ld_ldd_fck {
+					groups = "ld_ldd_fck_grp";
+					function = "ld_ldd_fck";
+				};
+			};
+
+			ld_ldd_lck_pmx: ld_ldd_lck@0 {
+				ld_ldd_lck {
+					groups = "ld_ldd_lck_grp";
+					function = "ld_ldd_lck";
+				};
+			};
+
+			lr_lcdrom_pmx: lr_lcdrom@0 {
+				lr_lcdrom {
+					groups = "lr_lcdrom_grp";
+					function = "lr_lcdrom";
+				};
+			};
+
+			lvds_analog_pmx: lvds_analog@0 {
+				lvds_analog {
+					groups = "lvds_analog_grp";
+					function = "lvds_analog";
+				};
+			};
+
+			nd_df_pmx: nd_df@0 {
+				nd_df {
+					groups = "nd_df_grp";
+					function = "nd_df";
+				};
+			};
+
+			nd_df_nowp_pmx: nd_df_nowp@0 {
+				nd_df_nowp {
+					groups = "nd_df_nowp_grp";
+					function = "nd_df_nowp";
+				};
+			};
+
+			ps_pmx: ps@0 {
+				ps {
+					groups = "ps_grp";
+					function = "ps";
+				};
+			};
+
+			pwc_core_on_pmx: pwc_core_on@0 {
+				pwc_core_on {
+					groups = "pwc_core_on_grp";
+					function = "pwc_core_on";
+				};
+			};
+
+			pwc_ext_on_pmx: pwc_ext_on@0 {
+				pwc_ext_on {
+					groups = "pwc_ext_on_grp";
+					function = "pwc_ext_on";
+				};
+			};
+
+			pwc_gpio3_clk_pmx: pwc_gpio3_clk@0 {
+				pwc_gpio3_clk {
+					groups = "pwc_gpio3_clk_grp";
+					function = "pwc_gpio3_clk";
+				};
+			};
+
+			pwc_io_on_pmx: pwc_io_on@0 {
+				pwc_io_on {
+					groups = "pwc_io_on_grp";
+					function = "pwc_io_on";
+				};
+			};
+
+			pwc_lowbatt_b_pmx0: pwc_lowbatt_b@0 {
+				pwc_lowbatt_b_0 {
+					groups = "pwc_lowbatt_b_grp0";
+					function = "pwc_lowbatt_b_m0";
+				};
+			};
+
+			pwc_mem_on_pmx: pwc_mem_on@0 {
+				pwc_mem_on {
+					groups = "pwc_mem_on_grp";
+					function = "pwc_mem_on";
+				};
+			};
+
+			pwc_on_key_b_pmx0: pwc_on_key_b@0 {
+				pwc_on_key_b_0 {
+					groups = "pwc_on_key_b_grp0";
+					function = "pwc_on_key_b_m0";
+				};
+			};
+
+			pwc_wakeup_src0_pmx: pwc_wakeup_src0@0 {
+				pwc_wakeup_src0 {
+					groups = "pwc_wakeup_src0_grp";
+					function = "pwc_wakeup_src0";
+				};
+			};
+
+			pwc_wakeup_src1_pmx: pwc_wakeup_src1@0 {
+				pwc_wakeup_src1 {
+					groups = "pwc_wakeup_src1_grp";
+					function = "pwc_wakeup_src1";
+				};
+			};
+
+			pwc_wakeup_src2_pmx: pwc_wakeup_src2@0 {
+				pwc_wakeup_src2 {
+					groups = "pwc_wakeup_src2_grp";
+					function = "pwc_wakeup_src2";
+				};
+			};
+
+			pwc_wakeup_src3_pmx: pwc_wakeup_src3@0 {
+				pwc_wakeup_src3 {
+					groups = "pwc_wakeup_src3_grp";
+					function = "pwc_wakeup_src3";
+				};
+			};
+
+			pw_cko0_pmx0: pw_cko0@0 {
+				pw_cko0_0 {
+					groups = "pw_cko0_grp0";
+					function = "pw_cko0_m0";
+				};
+			};
+
+			pw_cko0_pmx1: pw_cko0@1 {
+				pw_cko0_1 {
+					groups = "pw_cko0_grp1";
+					function = "pw_cko0_m1";
+				};
+			};
+
+			pw_cko0_pmx2: pw_cko0@2 {
+				pw_cko0_2 {
+					groups = "pw_cko0_grp2";
+					function = "pw_cko0_m2";
+				};
+			};
+
+			pw_cko1_pmx0: pw_cko1@0 {
+				pw_cko1_0 {
+					groups = "pw_cko1_grp0";
+					function = "pw_cko1_m0";
+				};
+			};
+
+			pw_cko1_pmx1: pw_cko1@1 {
+				pw_cko1_1 {
+					groups = "pw_cko1_grp1";
+					function = "pw_cko1_m1";
+				};
+			};
+
+			pw_i2s01_clk_pmx0: pw_i2s01_clk@0 {
+				pw_i2s01_clk_0 {
+					groups = "pw_i2s01_clk_grp0";
+					function = "pw_i2s01_clk_m0";
+				};
+			};
+
+			pw_i2s01_clk_pmx1: pw_i2s01_clk@1 {
+				pw_i2s01_clk_1 {
+					groups = "pw_i2s01_clk_grp1";
+					function = "pw_i2s01_clk_m1";
+				};
+			};
+
+			pw_pwm0_pmx: pw_pwm0@0 {
+				pw_pwm0 {
+					groups = "pw_pwm0_grp";
+					function = "pw_pwm0";
+				};
+			};
+
+			pw_pwm1_pmx: pw_pwm1@0 {
+				pw_pwm1 {
+					groups = "pw_pwm1_grp";
+					function = "pw_pwm1";
+				};
+			};
+
+			pw_pwm2_pmx0: pw_pwm2@0 {
+				pw_pwm2_0 {
+					groups = "pw_pwm2_grp0";
+					function = "pw_pwm2_m0";
+				};
+			};
+
+			pw_pwm2_pmx1: pw_pwm2@1 {
+				pw_pwm2_1 {
+					groups = "pw_pwm2_grp1";
+					function = "pw_pwm2_m1";
+				};
+			};
+
+			pw_pwm3_pmx0: pw_pwm3@0 {
+				pw_pwm3_0 {
+					groups = "pw_pwm3_grp0";
+					function = "pw_pwm3_m0";
+				};
+			};
+
+			pw_pwm3_pmx1: pw_pwm3@1 {
+				pw_pwm3_1 {
+					groups = "pw_pwm3_grp1";
+					function = "pw_pwm3_m1";
+				};
+			};
+
+			pw_pwm_cpu_vol_pmx0: pw_pwm_cpu_vol@0 {
+				pw_pwm_cpu_vol_0 {
+					groups = "pw_pwm_cpu_vol_grp0";
+					function = "pw_pwm_cpu_vol_m0";
+				};
+			};
+
+			pw_pwm_cpu_vol_pmx1: pw_pwm_cpu_vol@1 {
+				pw_pwm_cpu_vol_1 {
+					groups = "pw_pwm_cpu_vol_grp1";
+					function = "pw_pwm_cpu_vol_m1";
+				};
+			};
+
+			pw_backlight_pmx0: pw_backlight@0 {
+				pw_backlight_0 {
+					groups = "pw_backlight_grp0";
+					function = "pw_backlight_m0";
+				};
+			};
+
+			pw_backlight_pmx1: pw_backlight@1 {
+				pw_backlight_1 {
+					groups = "pw_backlight_grp1";
+					function = "pw_backlight_m1";
+				};
+			};
+
+			rg_eth_mac_pmx: rg_eth_mac@0 {
+				rg_eth_mac {
+					groups = "rg_eth_mac_grp";
+					function = "rg_eth_mac";
+				};
+			};
+
+			rg_gmac_phy_intr_n_pmx: rg_gmac_phy_intr_n@0 {
+				rg_gmac_phy_intr_n {
+					groups = "rg_gmac_phy_intr_n_grp";
+					function = "rg_gmac_phy_intr_n";
+				};
+			};
+
+			rg_rgmii_mac_pmx: rg_rgmii_mac@0 {
+				rg_rgmii_mac {
+					groups = "rg_rgmii_mac_grp";
+					function = "rg_rgmii_mac";
+				};
+			};
+
+			rg_rgmii_phy_ref_clk_pmx0: rg_rgmii_phy_ref_clk@0 {
+				rg_rgmii_phy_ref_clk_0 {
+					groups =
+						"rg_rgmii_phy_ref_clk_grp0";
+					function =
+						"rg_rgmii_phy_ref_clk_m0";
+				};
+			};
+
+			rg_rgmii_phy_ref_clk_pmx1: rg_rgmii_phy_ref_clk@1 {
+				rg_rgmii_phy_ref_clk_1 {
+					groups =
+						"rg_rgmii_phy_ref_clk_grp1";
+					function =
+						"rg_rgmii_phy_ref_clk_m1";
+				};
+			};
+
+			sd0_pmx: sd0@0 {
+				sd0 {
+					groups = "sd0_grp";
+					function = "sd0";
+				};
+			};
+
+			sd0_4bit_pmx: sd0_4bit@0 {
+				sd0_4bit {
+					groups = "sd0_4bit_grp";
+					function = "sd0_4bit";
+				};
+			};
+
+			sd1_pmx: sd1@0 {
+				sd1 {
+					groups = "sd1_grp";
+					function = "sd1";
+				};
+			};
+
+			sd1_4bit_pmx0: sd1_4bit@0 {
+				sd1_4bit_0 {
+					groups = "sd1_4bit_grp0";
+					function = "sd1_4bit_m0";
+				};
+			};
+
+			sd1_4bit_pmx1: sd1_4bit@1 {
+				sd1_4bit_1 {
+					groups = "sd1_4bit_grp1";
+					function = "sd1_4bit_m1";
+				};
+			};
+
+			sd2_pmx0: sd2@0 {
+				sd2_0 {
+					groups = "sd2_grp0";
+					function = "sd2_m0";
+				};
+			};
+
+			sd2_no_cdb_pmx0: sd2_no_cdb@0 {
+				sd2_no_cdb_0 {
+					groups = "sd2_no_cdb_grp0";
+					function = "sd2_no_cdb_m0";
+				};
+			};
+
+			sd3_pmx: sd3@0 {
+				sd3 {
+					groups = "sd3_grp";
+					function = "sd3";
+				};
+			};
+
+			sd5_pmx: sd5@0 {
+				sd5 {
+					groups = "sd5_grp";
+					function = "sd5";
+				};
+			};
+
+			sd6_pmx0: sd6@0 {
+				sd6_0 {
+					groups = "sd6_grp0";
+					function = "sd6_m0";
+				};
+			};
+
+			sd6_pmx1: sd6@1 {
+				sd6_1 {
+					groups = "sd6_grp1";
+					function = "sd6_m1";
+				};
+			};
+
+			sp0_ext_ldo_on_pmx: sp0_ext_ldo_on@0 {
+				sp0_ext_ldo_on {
+					groups = "sp0_ext_ldo_on_grp";
+					function = "sp0_ext_ldo_on";
+				};
+			};
+
+			sp0_qspi_pmx: sp0_qspi@0 {
+				sp0_qspi {
+					groups = "sp0_qspi_grp";
+					function = "sp0_qspi";
+				};
+			};
+
+			sp1_spi_pmx: sp1_spi@0 {
+				sp1_spi {
+					groups = "sp1_spi_grp";
+					function = "sp1_spi";
+				};
+			};
+
+			tpiu_trace_pmx: tpiu_trace@0 {
+				tpiu_trace {
+					groups = "tpiu_trace_grp";
+					function = "tpiu_trace";
+				};
+			};
+
+			uart0_pmx: uart0@0 {
+				uart0 {
+					groups = "uart0_grp";
+					function = "uart0";
+				};
+			};
+
+			uart0_nopause_pmx: uart0_nopause@0 {
+				uart0_nopause {
+					groups = "uart0_nopause_grp";
+					function = "uart0_nopause";
+				};
+			};
+
+			uart1_pmx: uart1@0 {
+				uart1 {
+					groups = "uart1_grp";
+					function = "uart1";
+				};
+			};
+
+			uart2_pmx: uart2@0 {
+				uart2 {
+					groups = "uart2_grp";
+					function = "uart2";
+				};
+			};
+
+			uart3_pmx0: uart3@0 {
+				uart3_0 {
+					groups = "uart3_grp0";
+					function = "uart3_m0";
+				};
+			};
+
+			uart3_pmx1: uart3@1 {
+				uart3_1 {
+					groups = "uart3_grp1";
+					function = "uart3_m1";
+				};
+			};
+
+			uart3_pmx2: uart3@2 {
+				uart3_2 {
+					groups = "uart3_grp2";
+					function = "uart3_m2";
+				};
+			};
+
+			uart3_pmx3: uart3@3 {
+				uart3_3 {
+					groups = "uart3_grp3";
+					function = "uart3_m3";
+				};
+			};
+
+			uart3_nopause_pmx0: uart3_nopause@0 {
+				uart3_nopause_0 {
+					groups = "uart3_nopause_grp0";
+					function = "uart3_nopause_m0";
+				};
+			};
+
+			uart3_nopause_pmx1: uart3_nopause@1 {
+				uart3_nopause_1 {
+					groups = "uart3_nopause_grp1";
+					function = "uart3_nopause_m1";
+				};
+			};
+
+			uart4_pmx0: uart4@0 {
+				uart4_0 {
+					groups = "uart4_grp0";
+					function = "uart4_m0";
+				};
+			};
+
+			uart4_pmx1: uart4@1 {
+				uart4_1 {
+					groups = "uart4_grp1";
+					function = "uart4_m1";
+				};
+			};
+
+			uart4_pmx2: uart4@2 {
+				uart4_2 {
+					groups = "uart4_grp2";
+					function = "uart4_m2";
+				};
+			};
+
+			uart4_nopause_pmx: uart4_nopause@0 {
+				uart4_nopause {
+					groups = "uart4_nopause_grp";
+					function = "uart4_nopause";
+				};
+			};
+
+			usb0_drvvbus_pmx: usb0_drvvbus@0 {
+				usb0_drvvbus {
+					groups = "usb0_drvvbus_grp";
+					function = "usb0_drvvbus";
+				};
+			};
+
+			usb1_drvvbus_pmx: usb1_drvvbus@0 {
+				usb1_drvvbus {
+					groups = "usb1_drvvbus_grp";
+					function = "usb1_drvvbus";
+				};
+			};
+
+			visbus_dout_pmx: visbus_dout@0 {
+				visbus_dout {
+					groups = "visbus_dout_grp";
+					function = "visbus_dout";
+				};
+			};
+
+			vi_vip1_pmx: vi_vip1@0 {
+				vi_vip1 {
+					groups = "vi_vip1_grp";
+					function = "vi_vip1";
+				};
+			};
+
+			vi_vip1_ext_pmx: vi_vip1_ext@0 {
+				vi_vip1_ext {
+					groups = "vi_vip1_ext_grp";
+					function = "vi_vip1_ext";
+				};
+			};
+
+			vi_vip1_low8bit_pmx: vi_vip1_low8bit@0 {
+				vi_vip1_low8bit {
+					groups = "vi_vip1_low8bit_grp";
+					function = "vi_vip1_low8bit";
+				};
+			};
+
+			vi_vip1_high8bit_pmx: vi_vip1_high8bit@0 {
+				vi_vip1_high8bit {
+					groups = "vi_vip1_high8bit_grp";
+					function = "vi_vip1_high8bit";
+				};
+			};
 		};
 
 		pmipc {
@@ -356,6 +1375,12 @@
 				clock-names = "gpio0_io";
 				gpio-controller;
 				interrupt-controller;
+
+				gpio-banks = <2>;
+				gpio-ranges = <&pinctrl 0 0 0>,
+						<&pinctrl 32 0 0>;
+				gpio-ranges-group-names = "lvds_gpio_grp",
+							"uart_nand_gpio_grp";
 			};
 
 			nand@17050000 {
@@ -461,11 +1486,22 @@
 				#interrupt-cells = <2>;
 				compatible = "sirf,atlas7-gpio";
 				reg = <0x13300000 0x1000>;
-				interrupts = <0 43 0>, <0 44 0>, <0 45 0>;
+				interrupts = <0 43 0>, <0 44 0>,
+						<0 45 0>, <0 46 0>;
 				clocks = <&car 84>;
 				clock-names = "gpio1_io";
 				gpio-controller;
 				interrupt-controller;
+
+				gpio-banks = <4>;
+				gpio-ranges = <&pinctrl 0 0 0>,
+						<&pinctrl 32 0 0>,
+						<&pinctrl 64 0 0>,
+						<&pinctrl 96 0 0>;
+				gpio-ranges-group-names = "gnss_gpio_grp",
+							"lcd_vip_gpio_grp",
+							"sdio_i2s_gpio_grp",
+							"sp_rgmii_gpio_grp";
 			};
 
 			sd2: sdhci@14200000 {
@@ -744,6 +1780,10 @@
 				interrupts = <0 47 0>;
 				gpio-controller;
 				interrupt-controller;
+
+				gpio-banks = <1>;
+				gpio-ranges = <&pinctrl 0 0 0>;
+				gpio-ranges-group-names = "rtc_gpio_grp";
 			};
 
 			rtc-iobg@18840000 {
diff --git a/arch/arm/boot/dts/stih407-family.dtsi b/arch/arm/boot/dts/stih407-family.dtsi
index 838b812..eab3477 100644
--- a/arch/arm/boot/dts/stih407-family.dtsi
+++ b/arch/arm/boot/dts/stih407-family.dtsi
@@ -9,7 +9,7 @@
 #include "stih407-pinctrl.dtsi"
 #include <dt-bindings/mfd/st-lpc.h>
 #include <dt-bindings/phy/phy.h>
-#include <dt-bindings/reset-controller/stih407-resets.h>
+#include <dt-bindings/reset/stih407-resets.h>
 #include <dt-bindings/interrupt-controller/irq-st.h>
 / {
 	#address-cells = <1>;
diff --git a/arch/arm/boot/dts/stih415.dtsi b/arch/arm/boot/dts/stih415.dtsi
index 19b019b..12427e6 100644
--- a/arch/arm/boot/dts/stih415.dtsi
+++ b/arch/arm/boot/dts/stih415.dtsi
@@ -10,7 +10,7 @@
 #include "stih415-clock.dtsi"
 #include "stih415-pinctrl.dtsi"
 #include <dt-bindings/interrupt-controller/arm-gic.h>
-#include <dt-bindings/reset-controller/stih415-resets.h>
+#include <dt-bindings/reset/stih415-resets.h>
 / {
 
 	L2: cache-controller {
diff --git a/arch/arm/boot/dts/stih416.dtsi b/arch/arm/boot/dts/stih416.dtsi
index 9dca173..9e3170c 100644
--- a/arch/arm/boot/dts/stih416.dtsi
+++ b/arch/arm/boot/dts/stih416.dtsi
@@ -12,7 +12,7 @@
 
 #include <dt-bindings/phy/phy.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
-#include <dt-bindings/reset-controller/stih416-resets.h>
+#include <dt-bindings/reset/stih416-resets.h>
 #include <dt-bindings/interrupt-controller/irq-st.h>
 / {
 	L2: cache-controller {
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
index 107395c..17f63f7 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
@@ -150,6 +150,16 @@
 			interface-type = "ace";
 			reg = <0x5000 0x1000>;
 		};
+
+		pmu@9000 {
+			 compatible = "arm,cci-400-pmu,r0";
+			 reg = <0x9000 0x5000>;
+			 interrupts = <0 105 4>,
+				      <0 101 4>,
+				      <0 102 4>,
+				      <0 103 4>,
+				      <0 104 4>;
+		};
 	};
 
 	memory-controller@7ffd0000 {
@@ -187,11 +197,22 @@
 			     <1 10 0xf08>;
 	};
 
-	pmu {
+	pmu_a15 {
 		compatible = "arm,cortex-a15-pmu";
 		interrupts = <0 68 4>,
 			     <0 69 4>;
-		interrupt-affinity = <&cpu0>, <&cpu1>;
+		interrupt-affinity = <&cpu0>,
+				     <&cpu1>;
+	};
+
+	pmu_a7 {
+		compatible = "arm,cortex-a7-pmu";
+		interrupts = <0 128 4>,
+			     <0 129 4>,
+			     <0 130 4>;
+		interrupt-affinity = <&cpu2>,
+				     <&cpu3>,
+				     <&cpu4>;
 	};
 
 	oscclk6a: oscclk6a {
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 6d83a1b..5fd8df6 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -353,7 +353,6 @@
 CONFIG_POWER_RESET_GPIO=y
 CONFIG_POWER_RESET_GPIO_RESTART=y
 CONFIG_POWER_RESET_KEYSTONE=y
-CONFIG_POWER_RESET_SUN6I=y
 CONFIG_POWER_RESET_RMOBILE=y
 CONFIG_SENSORS_LM90=y
 CONFIG_SENSORS_LM95245=y
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index 8ecba00..7ebc346b 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -2,6 +2,7 @@
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_PERF_EVENTS=y
+CONFIG_MODULES=y
 CONFIG_ARCH_SUNXI=y
 CONFIG_SMP=y
 CONFIG_NR_CPUS=8
@@ -77,7 +78,6 @@
 CONFIG_GPIO_SYSFS=y
 CONFIG_POWER_SUPPLY=y
 CONFIG_POWER_RESET=y
-CONFIG_POWER_RESET_SUN6I=y
 CONFIG_THERMAL=y
 CONFIG_CPU_THERMAL=y
 CONFIG_WATCHDOG=y
@@ -87,6 +87,10 @@
 CONFIG_REGULATOR_FIXED_VOLTAGE=y
 CONFIG_REGULATOR_AXP20X=y
 CONFIG_REGULATOR_GPIO=y
+CONFIG_FB=y
+CONFIG_FB_SIMPLE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
 CONFIG_USB=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 1c3938f..4859820 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -140,16 +140,11 @@
  * The _caller variety takes a __builtin_return_address(0) value for
  * /proc/vmalloc to use - and should only be used in non-inline functions.
  */
-extern void __iomem *__arm_ioremap_pfn_caller(unsigned long, unsigned long,
-	size_t, unsigned int, void *);
 extern void __iomem *__arm_ioremap_caller(phys_addr_t, size_t, unsigned int,
 	void *);
-
 extern void __iomem *__arm_ioremap_pfn(unsigned long, unsigned long, size_t, unsigned int);
-extern void __iomem *__arm_ioremap(phys_addr_t, size_t, unsigned int);
 extern void __iomem *__arm_ioremap_exec(phys_addr_t, size_t, bool cached);
 extern void __iounmap(volatile void __iomem *addr);
-extern void __arm_iounmap(volatile void __iomem *addr);
 
 extern void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t,
 	unsigned int, void *);
@@ -321,21 +316,24 @@
 static inline void memset_io(volatile void __iomem *dst, unsigned c,
 	size_t count)
 {
-	memset((void __force *)dst, c, count);
+	extern void mmioset(void *, unsigned int, size_t);
+	mmioset((void __force *)dst, c, count);
 }
 #define memset_io(dst,c,count) memset_io(dst,c,count)
 
 static inline void memcpy_fromio(void *to, const volatile void __iomem *from,
 	size_t count)
 {
-	memcpy(to, (const void __force *)from, count);
+	extern void mmiocpy(void *, const void *, size_t);
+	mmiocpy(to, (const void __force *)from, count);
 }
 #define memcpy_fromio(to,from,count) memcpy_fromio(to,from,count)
 
 static inline void memcpy_toio(volatile void __iomem *to, const void *from,
 	size_t count)
 {
-	memcpy((void __force *)to, from, count);
+	extern void mmiocpy(void *, const void *, size_t);
+	mmiocpy((void __force *)to, from, count);
 }
 #define memcpy_toio(to,from,count) memcpy_toio(to,from,count)
 
@@ -348,18 +346,61 @@
 #endif	/* readl */
 
 /*
- * ioremap and friends.
+ * ioremap() and friends.
  *
- * ioremap takes a PCI memory address, as specified in
- * Documentation/io-mapping.txt.
+ * ioremap() takes a resource address, and size.  Due to the ARM memory
+ * types, it is important to use the correct ioremap() function as each
+ * mapping has specific properties.
  *
+ * Function		Memory type	Cacheability	Cache hint
+ * ioremap()		Device		n/a		n/a
+ * ioremap_nocache()	Device		n/a		n/a
+ * ioremap_cache()	Normal		Writeback	Read allocate
+ * ioremap_wc()		Normal		Non-cacheable	n/a
+ * ioremap_wt()		Normal		Non-cacheable	n/a
+ *
+ * All device mappings have the following properties:
+ * - no access speculation
+ * - no repetition (eg, on return from an exception)
+ * - number, order and size of accesses are maintained
+ * - unaligned accesses are "unpredictable"
+ * - writes may be delayed before they hit the endpoint device
+ *
+ * ioremap_nocache() is the same as ioremap() as there are too many device
+ * drivers using this for device registers, and documentation which tells
+ * people to use it for such for this to be any different.  This is not a
+ * safe fallback for memory-like mappings, or memory regions where the
+ * compiler may generate unaligned accesses - eg, via inlining its own
+ * memcpy.
+ *
+ * All normal memory mappings have the following properties:
+ * - reads can be repeated with no side effects
+ * - repeated reads return the last value written
+ * - reads can fetch additional locations without side effects
+ * - writes can be repeated (in certain cases) with no side effects
+ * - writes can be merged before accessing the target
+ * - unaligned accesses can be supported
+ * - ordering is not guaranteed without explicit dependencies or barrier
+ *   instructions
+ * - writes may be delayed before they hit the endpoint memory
+ *
+ * The cache hint is only a performance hint: CPUs may alias these hints.
+ * Eg, a CPU not implementing read allocate but implementing write allocate
+ * will provide a write allocate mapping instead.
  */
-#define ioremap(cookie,size)		__arm_ioremap((cookie), (size), MT_DEVICE)
-#define ioremap_nocache(cookie,size)	__arm_ioremap((cookie), (size), MT_DEVICE)
-#define ioremap_cache(cookie,size)	__arm_ioremap((cookie), (size), MT_DEVICE_CACHED)
-#define ioremap_wc(cookie,size)		__arm_ioremap((cookie), (size), MT_DEVICE_WC)
-#define ioremap_wt(cookie,size)		__arm_ioremap((cookie), (size), MT_DEVICE)
-#define iounmap				__arm_iounmap
+void __iomem *ioremap(resource_size_t res_cookie, size_t size);
+#define ioremap ioremap
+#define ioremap_nocache ioremap
+
+void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size);
+#define ioremap_cache ioremap_cache
+
+void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size);
+#define ioremap_wc ioremap_wc
+#define ioremap_wt ioremap_wc
+
+void iounmap(volatile void __iomem *iomem_cookie);
+#define iounmap iounmap
 
 /*
  * io{read,write}{16,32}be() macros
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 3a72d69..6f225ac 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -275,7 +275,7 @@
  */
 #define __pa(x)			__virt_to_phys((unsigned long)(x))
 #define __va(x)			((void *)__phys_to_virt((phys_addr_t)(x)))
-#define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
+#define pfn_to_kaddr(pfn)	__va((phys_addr_t)(pfn) << PAGE_SHIFT)
 
 extern phys_addr_t (*arch_virt_to_idmap)(unsigned long x);
 
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index bfd662e..aeddd28 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -129,7 +129,36 @@
 
 /*
  * These are the memory types, defined to be compatible with
- * pre-ARMv6 CPUs cacheable and bufferable bits:   XXCB
+ * pre-ARMv6 CPUs cacheable and bufferable bits: n/a,n/a,C,B
+ * ARMv6+ without TEX remapping, they are a table index.
+ * ARMv6+ with TEX remapping, they correspond to n/a,TEX(0),C,B
+ *
+ * MT type		Pre-ARMv6	ARMv6+ type / cacheable status
+ * UNCACHED		Uncached	Strongly ordered
+ * BUFFERABLE		Bufferable	Normal memory / non-cacheable
+ * WRITETHROUGH		Writethrough	Normal memory / write through
+ * WRITEBACK		Writeback	Normal memory / write back, read alloc
+ * MINICACHE		Minicache	N/A
+ * WRITEALLOC		Writeback	Normal memory / write back, write alloc
+ * DEV_SHARED		Uncached	Device memory (shared)
+ * DEV_NONSHARED	Uncached	Device memory (non-shared)
+ * DEV_WC		Bufferable	Normal memory / non-cacheable
+ * DEV_CACHED		Writeback	Normal memory / write back, read alloc
+ * VECTORS		Variable	Normal memory / variable
+ *
+ * All normal memory mappings have the following properties:
+ * - reads can be repeated with no side effects
+ * - repeated reads return the last value written
+ * - reads can fetch additional locations without side effects
+ * - writes can be repeated (in certain cases) with no side effects
+ * - writes can be merged before accessing the target
+ * - unaligned accesses can be supported
+ *
+ * All device mappings have the following properties:
+ * - no access speculation
+ * - no repetition (eg, on return from an exception)
+ * - number, order and size of accesses are maintained
+ * - unaligned accesses are "unpredictable"
  */
 #define L_PTE_MT_UNCACHED	(_AT(pteval_t, 0x00) << 2)	/* 0000 */
 #define L_PTE_MT_BUFFERABLE	(_AT(pteval_t, 0x01) << 2)	/* 0001 */
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index a88671c..5e5a51a 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -50,6 +50,9 @@
 
 extern void fpundefinstr(void);
 
+void mmioset(void *, unsigned int, size_t);
+void mmiocpy(void *, const void *, size_t);
+
 	/* platform dependent support */
 EXPORT_SYMBOL(arm_delay_ops);
 
@@ -88,6 +91,9 @@
 EXPORT_SYMBOL(memchr);
 EXPORT_SYMBOL(__memzero);
 
+EXPORT_SYMBOL(mmioset);
+EXPORT_SYMBOL(mmiocpy);
+
 #ifdef CONFIG_MMU
 EXPORT_SYMBOL(copy_page);
 
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 7dac308..cb4fb1e 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -410,7 +410,7 @@
 	zero_fp
 
 	.if	\trace
-#ifdef CONFIG_IRQSOFF_TRACER
+#ifdef CONFIG_TRACE_IRQFLAGS
 	bl	trace_hardirqs_off
 #endif
 	ct_user_exit save = 0
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 90dfbed..3d6b782 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -578,7 +578,7 @@
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
 	if ((unsigned)ipinr < NR_IPI) {
-		trace_ipi_entry(ipi_types[ipinr]);
+		trace_ipi_entry_rcuidle(ipi_types[ipinr]);
 		__inc_irq_stat(cpu, ipi_irqs[ipinr]);
 	}
 
@@ -637,7 +637,7 @@
 	}
 
 	if ((unsigned)ipinr < NR_IPI)
-		trace_ipi_exit(ipi_types[ipinr]);
+		trace_ipi_exit_rcuidle(ipi_types[ipinr]);
 	set_irq_regs(old_regs);
 }
 
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index 7797e81..64111bd 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -61,8 +61,10 @@
 
 /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
 
+ENTRY(mmiocpy)
 ENTRY(memcpy)
 
 #include "copy_template.S"
 
 ENDPROC(memcpy)
+ENDPROC(mmiocpy)
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index a4ee97b..3c65e3b 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -16,6 +16,7 @@
 	.text
 	.align	5
 
+ENTRY(mmioset)
 ENTRY(memset)
 UNWIND( .fnstart         )
 	ands	r3, r0, #3		@ 1 unaligned?
@@ -133,3 +134,4 @@
 	b	1b
 UNWIND( .fnend   )
 ENDPROC(memset)
+ENDPROC(mmioset)
diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig
index 9747316..c86a5a0 100644
--- a/arch/arm/mach-mvebu/Kconfig
+++ b/arch/arm/mach-mvebu/Kconfig
@@ -96,6 +96,7 @@
 	select MACH_MVEBU_ANY
 	select ORION_IRQCHIP
 	select ORION_TIMER
+	select PM_GENERIC_DOMAINS if PM
 	select PINCTRL_DOVE
 	help
 	  Say 'Y' here if you want your kernel to support the
diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c
index e46e9ea..44eedf3 100644
--- a/arch/arm/mach-mvebu/coherency.c
+++ b/arch/arm/mach-mvebu/coherency.c
@@ -65,18 +65,6 @@
 int ll_enable_coherency(void);
 void ll_add_cpu_to_smp_group(void);
 
-int set_cpu_coherent(void)
-{
-	if (!coherency_base) {
-		pr_warn("Can't make current CPU cache coherent.\n");
-		pr_warn("Coherency fabric is not initialized\n");
-		return 1;
-	}
-
-	ll_add_cpu_to_smp_group();
-	return ll_enable_coherency();
-}
-
 static int mvebu_hwcc_notifier(struct notifier_block *nb,
 			       unsigned long event, void *__dev)
 {
@@ -206,6 +194,23 @@
 	return type;
 }
 
+int set_cpu_coherent(void)
+{
+	int type = coherency_type();
+
+	if (type == COHERENCY_FABRIC_TYPE_ARMADA_370_XP) {
+		if (!coherency_base) {
+			pr_warn("Can't make current CPU cache coherent.\n");
+			pr_warn("Coherency fabric is not initialized\n");
+			return 1;
+		}
+		ll_add_cpu_to_smp_group();
+		return ll_enable_coherency();
+	}
+
+	return 0;
+}
+
 int coherency_available(void)
 {
 	return coherency_type() != COHERENCY_FABRIC_TYPE_NONE;
diff --git a/arch/arm/mach-mvebu/common.h b/arch/arm/mach-mvebu/common.h
index 3e0aca1..6b77549 100644
--- a/arch/arm/mach-mvebu/common.h
+++ b/arch/arm/mach-mvebu/common.h
@@ -25,6 +25,6 @@
 
 void __iomem *mvebu_get_scu_base(void);
 
-int mvebu_pm_init(void (*board_pm_enter)(void __iomem *sdram_reg, u32 srcmd));
-
+int mvebu_pm_suspend_init(void (*board_pm_enter)(void __iomem *sdram_reg,
+							u32 srcmd));
 #endif
diff --git a/arch/arm/mach-mvebu/dove.c b/arch/arm/mach-mvebu/dove.c
index 5a17415..1aebb82 100644
--- a/arch/arm/mach-mvebu/dove.c
+++ b/arch/arm/mach-mvebu/dove.c
@@ -12,6 +12,7 @@
 #include <linux/mbus.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
+#include <linux/soc/dove/pmu.h>
 #include <asm/hardware/cache-tauros2.h>
 #include <asm/mach/arch.h>
 #include "common.h"
@@ -24,6 +25,7 @@
 	tauros2_init(0);
 #endif
 	BUG_ON(mvebu_mbus_dt_init(false));
+	dove_init_pmu();
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 }
 
diff --git a/arch/arm/mach-mvebu/pm-board.c b/arch/arm/mach-mvebu/pm-board.c
index 301ab38..db17121 100644
--- a/arch/arm/mach-mvebu/pm-board.c
+++ b/arch/arm/mach-mvebu/pm-board.c
@@ -1,7 +1,7 @@
 /*
  * Board-level suspend/resume support.
  *
- * Copyright (C) 2014 Marvell
+ * Copyright (C) 2014-2015 Marvell
  *
  * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
  *
@@ -20,27 +20,27 @@
 #include <linux/slab.h>
 #include "common.h"
 
-#define ARMADA_XP_GP_PIC_NR_GPIOS 3
+#define ARMADA_PIC_NR_GPIOS 3
 
 static void __iomem *gpio_ctrl;
-static int pic_gpios[ARMADA_XP_GP_PIC_NR_GPIOS];
-static int pic_raw_gpios[ARMADA_XP_GP_PIC_NR_GPIOS];
+static int pic_gpios[ARMADA_PIC_NR_GPIOS];
+static int pic_raw_gpios[ARMADA_PIC_NR_GPIOS];
 
-static void mvebu_armada_xp_gp_pm_enter(void __iomem *sdram_reg, u32 srcmd)
+static void mvebu_armada_pm_enter(void __iomem *sdram_reg, u32 srcmd)
 {
 	u32 reg, ackcmd;
 	int i;
 
 	/* Put 001 as value on the GPIOs */
 	reg = readl(gpio_ctrl);
-	for (i = 0; i < ARMADA_XP_GP_PIC_NR_GPIOS; i++)
+	for (i = 0; i < ARMADA_PIC_NR_GPIOS; i++)
 		reg &= ~BIT(pic_raw_gpios[i]);
 	reg |= BIT(pic_raw_gpios[0]);
 	writel(reg, gpio_ctrl);
 
 	/* Prepare writing 111 to the GPIOs */
 	ackcmd = readl(gpio_ctrl);
-	for (i = 0; i < ARMADA_XP_GP_PIC_NR_GPIOS; i++)
+	for (i = 0; i < ARMADA_PIC_NR_GPIOS; i++)
 		ackcmd |= BIT(pic_raw_gpios[i]);
 
 	srcmd = cpu_to_le32(srcmd);
@@ -76,7 +76,7 @@
 		  [ackcmd] "r" (ackcmd), [gpio_ctrl] "r" (gpio_ctrl) : "r1");
 }
 
-static int mvebu_armada_xp_gp_pm_init(void)
+static int __init mvebu_armada_pm_init(void)
 {
 	struct device_node *np;
 	struct device_node *gpio_ctrl_np;
@@ -89,7 +89,7 @@
 	if (!np)
 		return -ENODEV;
 
-	for (i = 0; i < ARMADA_XP_GP_PIC_NR_GPIOS; i++) {
+	for (i = 0; i < ARMADA_PIC_NR_GPIOS; i++) {
 		char *name;
 		struct of_phandle_args args;
 
@@ -134,11 +134,19 @@
 	if (!gpio_ctrl)
 		return -ENOMEM;
 
-	mvebu_pm_init(mvebu_armada_xp_gp_pm_enter);
+	mvebu_pm_suspend_init(mvebu_armada_pm_enter);
 
 out:
 	of_node_put(np);
 	return ret;
 }
 
-late_initcall(mvebu_armada_xp_gp_pm_init);
+/*
+ * Registering the mvebu_board_pm_enter callback must be done before
+ * the platform_suspend_ops will be registered. In the same time we
+ * also need to have the gpio devices registered. That's why we use a
+ * device_initcall_sync which is called after all the device_initcall
+ * (used by the gpio device) but before the late_initcall (used to
+ * register the platform_suspend_ops)
+ */
+device_initcall_sync(mvebu_armada_pm_init);
diff --git a/arch/arm/mach-mvebu/pm.c b/arch/arm/mach-mvebu/pm.c
index 6573a8f..8d32bf7 100644
--- a/arch/arm/mach-mvebu/pm.c
+++ b/arch/arm/mach-mvebu/pm.c
@@ -105,12 +105,10 @@
 	return of_translate_address(np, in_addr);
 }
 
-static void mvebu_pm_store_bootinfo(void)
+static void mvebu_pm_store_armadaxp_bootinfo(u32 *store_addr)
 {
-	u32 *store_addr;
 	phys_addr_t resume_pc;
 
-	store_addr = phys_to_virt(BOOT_INFO_ADDR);
 	resume_pc = virt_to_phys(armada_370_xp_cpu_resume);
 
 	/*
@@ -151,14 +149,30 @@
 	writel(BOOT_MAGIC_LIST_END, store_addr);
 }
 
-static int mvebu_pm_enter(suspend_state_t state)
+static int mvebu_pm_store_bootinfo(void)
 {
-	if (state != PM_SUSPEND_MEM)
-		return -EINVAL;
+	u32 *store_addr;
+
+	store_addr = phys_to_virt(BOOT_INFO_ADDR);
+
+	if (of_machine_is_compatible("marvell,armadaxp"))
+		mvebu_pm_store_armadaxp_bootinfo(store_addr);
+	else
+		return -ENODEV;
+
+	return 0;
+}
+
+static int mvebu_enter_suspend(void)
+{
+	int ret;
+
+	ret = mvebu_pm_store_bootinfo();
+	if (ret)
+		return ret;
 
 	cpu_pm_enter();
 
-	mvebu_pm_store_bootinfo();
 	cpu_suspend(0, mvebu_pm_powerdown);
 
 	outer_resume();
@@ -168,23 +182,62 @@
 	set_cpu_coherent();
 
 	cpu_pm_exit();
+	return 0;
+}
+
+static int mvebu_pm_enter(suspend_state_t state)
+{
+	switch (state) {
+	case PM_SUSPEND_STANDBY:
+		cpu_do_idle();
+		break;
+	case PM_SUSPEND_MEM:
+		pr_warn("Entering suspend to RAM. Only special wake-up sources will resume the system\n");
+		return mvebu_enter_suspend();
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int mvebu_pm_valid(suspend_state_t state)
+{
+	if (state == PM_SUSPEND_STANDBY)
+		return 1;
+
+	if (state == PM_SUSPEND_MEM && mvebu_board_pm_enter != NULL)
+		return 1;
 
 	return 0;
 }
 
 static const struct platform_suspend_ops mvebu_pm_ops = {
 	.enter = mvebu_pm_enter,
-	.valid = suspend_valid_only_mem,
+	.valid = mvebu_pm_valid,
 };
 
-int mvebu_pm_init(void (*board_pm_enter)(void __iomem *sdram_reg, u32 srcmd))
+static int __init mvebu_pm_init(void)
+{
+	if (!of_machine_is_compatible("marvell,armadaxp") &&
+	    !of_machine_is_compatible("marvell,armada370") &&
+	    !of_machine_is_compatible("marvell,armada380") &&
+	    !of_machine_is_compatible("marvell,armada390"))
+		return -ENODEV;
+
+	suspend_set_ops(&mvebu_pm_ops);
+
+	return 0;
+}
+
+
+late_initcall(mvebu_pm_init);
+
+int __init mvebu_pm_suspend_init(void (*board_pm_enter)(void __iomem *sdram_reg,
+							u32 srcmd))
 {
 	struct device_node *np;
 	struct resource res;
 
-	if (!of_machine_is_compatible("marvell,armadaxp"))
-		return -ENODEV;
-
 	np = of_find_compatible_node(NULL, NULL,
 				     "marvell,armada-xp-sdram-controller");
 	if (!np)
@@ -212,7 +265,5 @@
 
 	mvebu_board_pm_enter = board_pm_enter;
 
-	suspend_set_ops(&mvebu_pm_ops);
-
 	return 0;
 }
diff --git a/arch/arm/mach-omap2/dma.c b/arch/arm/mach-omap2/dma.c
index e1a56d8..1ed4be1 100644
--- a/arch/arm/mach-omap2/dma.c
+++ b/arch/arm/mach-omap2/dma.c
@@ -117,7 +117,6 @@
 	u8 revision = dma_read(REVISION, 0) & 0xff;
 	printk(KERN_INFO "OMAP DMA hardware revision %d.%d\n",
 				revision >> 4, revision & 0xf);
-	return;
 }
 
 static unsigned configure_dma_errata(void)
diff --git a/arch/arm/mach-prima2/Kconfig b/arch/arm/mach-prima2/Kconfig
index e03d8b5..9ab8932 100644
--- a/arch/arm/mach-prima2/Kconfig
+++ b/arch/arm/mach-prima2/Kconfig
@@ -4,6 +4,7 @@
 	select ARCH_REQUIRE_GPIOLIB
 	select GENERIC_IRQ_CHIP
 	select NO_IOPORT_MAP
+	select REGMAP
 	select PINCTRL
 	select PINCTRL_SIRF
 	help
diff --git a/arch/arm/mach-prima2/rtciobrg.c b/arch/arm/mach-prima2/rtciobrg.c
index 8f66d8f..d4852d2 100644
--- a/arch/arm/mach-prima2/rtciobrg.c
+++ b/arch/arm/mach-prima2/rtciobrg.c
@@ -1,5 +1,5 @@
 /*
- * RTC I/O Bridge interfaces for CSR SiRFprimaII
+ * RTC I/O Bridge interfaces for CSR SiRFprimaII/atlas7
  * ARM access the registers of SYSRTC, GPSRTC and PWRC through this module
  *
  * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company.
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/io.h>
+#include <linux/regmap.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
@@ -66,6 +67,7 @@
 {
 	unsigned long flags, val;
 
+	/* TODO: add hwspinlock to sync with M3 */
 	spin_lock_irqsave(&rtciobrg_lock, flags);
 
 	val = __sirfsoc_rtc_iobrg_readl(addr);
@@ -90,6 +92,7 @@
 {
 	unsigned long flags;
 
+	 /* TODO: add hwspinlock to sync with M3 */
 	spin_lock_irqsave(&rtciobrg_lock, flags);
 
 	sirfsoc_rtc_iobrg_pre_writel(val, addr);
@@ -102,6 +105,45 @@
 }
 EXPORT_SYMBOL_GPL(sirfsoc_rtc_iobrg_writel);
 
+
+static int regmap_iobg_regwrite(void *context, unsigned int reg,
+				   unsigned int val)
+{
+	sirfsoc_rtc_iobrg_writel(val, reg);
+	return 0;
+}
+
+static int regmap_iobg_regread(void *context, unsigned int reg,
+				  unsigned int *val)
+{
+	*val = (u32)sirfsoc_rtc_iobrg_readl(reg);
+	return 0;
+}
+
+static struct regmap_bus regmap_iobg = {
+	.reg_write = regmap_iobg_regwrite,
+	.reg_read = regmap_iobg_regread,
+};
+
+/**
+ * devm_regmap_init_iobg(): Initialise managed register map
+ *
+ * @iobg: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
+struct regmap *devm_regmap_init_iobg(struct device *dev,
+				    const struct regmap_config *config)
+{
+	const struct regmap_bus *bus = &regmap_iobg;
+
+	return devm_regmap_init(dev, bus, dev, config);
+}
+EXPORT_SYMBOL_GPL(devm_regmap_init_iobg);
+
 static const struct of_device_id rtciobrg_ids[] = {
 	{ .compatible = "sirf,prima2-rtciobg" },
 	{}
@@ -132,7 +174,7 @@
 }
 postcore_initcall(sirfsoc_rtciobrg_init);
 
-MODULE_AUTHOR("Zhiwu Song <zhiwu.song@csr.com>, "
-		"Barry Song <baohua.song@csr.com>");
+MODULE_AUTHOR("Zhiwu Song <zhiwu.song@csr.com>");
+MODULE_AUTHOR("Barry Song <baohua.song@csr.com>");
 MODULE_DESCRIPTION("CSR SiRFprimaII rtc io bridge");
 MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig
index 4500647..34eac88 100644
--- a/arch/arm/mach-shmobile/Kconfig
+++ b/arch/arm/mach-shmobile/Kconfig
@@ -4,6 +4,7 @@
 
 config PM_RCAR
 	bool
+	select PM_GENERIC_DOMAINS if PM
 
 config PM_RMOBILE
 	bool
@@ -50,6 +51,7 @@
 
 config ARCH_R7S72100
 	bool "RZ/A1H (R7S72100)"
+	select PM_GENERIC_DOMAINS if PM
 	select SYS_SUPPORTS_SH_MTU2
 
 config ARCH_R8A73A4
diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig
index 81502b9..4efe2d4 100644
--- a/arch/arm/mach-sunxi/Kconfig
+++ b/arch/arm/mach-sunxi/Kconfig
@@ -35,7 +35,7 @@
 	select SUN5I_HSTIMER
 
 config MACH_SUN8I
-	bool "Allwinner A23 (sun8i) SoCs support"
+	bool "Allwinner sun8i Family SoCs support"
 	default ARCH_SUNXI
 	select ARM_GIC
 	select MFD_SUN6I_PRCM
diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c
index 1bc811a..65bab28 100644
--- a/arch/arm/mach-sunxi/sunxi.c
+++ b/arch/arm/mach-sunxi/sunxi.c
@@ -67,10 +67,13 @@
 
 static const char * const sun8i_board_dt_compat[] = {
 	"allwinner,sun8i-a23",
+	"allwinner,sun8i-a33",
+	"allwinner,sun8i-h3",
 	NULL,
 };
 
-DT_MACHINE_START(SUN8I_DT, "Allwinner sun8i (A23) Family")
+DT_MACHINE_START(SUN8I_DT, "Allwinner sun8i Family")
+	.init_time	= sun6i_timer_init,
 	.dt_compat	= sun8i_board_dt_compat,
 	.init_late	= sunxi_dt_cpufreq_init,
 MACHINE_END
diff --git a/arch/arm/mach-tegra/cpuidle-tegra114.c b/arch/arm/mach-tegra/cpuidle-tegra114.c
index 155807f..9157546 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra114.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra114.c
@@ -24,6 +24,7 @@
 #include <asm/cpuidle.h>
 #include <asm/smp_plat.h>
 #include <asm/suspend.h>
+#include <asm/psci.h>
 
 #include "pm.h"
 #include "sleep.h"
@@ -44,16 +45,12 @@
 	tegra_set_cpu_in_lp2();
 	cpu_pm_enter();
 
-	tick_broadcast_enter();
-
 	call_firmware_op(prepare_idle);
 
 	/* Do suspend by ourselves if the firmware does not implement it */
 	if (call_firmware_op(do_idle, 0) == -ENOSYS)
 		cpu_suspend(0, tegra30_sleep_cpu_secondary_finish);
 
-	tick_broadcast_exit();
-
 	cpu_pm_exit();
 	tegra_clear_cpu_in_lp2();
 
@@ -61,6 +58,13 @@
 
 	return index;
 }
+
+static void tegra114_idle_enter_freeze(struct cpuidle_device *dev,
+				       struct cpuidle_driver *drv,
+				       int index)
+{
+       tegra114_idle_power_down(dev, drv, index);
+}
 #endif
 
 static struct cpuidle_driver tegra_idle_driver = {
@@ -72,8 +76,10 @@
 #ifdef CONFIG_PM_SLEEP
 		[1] = {
 			.enter			= tegra114_idle_power_down,
+			.enter_freeze		= tegra114_idle_enter_freeze,
 			.exit_latency		= 500,
 			.target_residency	= 1000,
+			.flags			= CPUIDLE_FLAG_TIMER_STOP,
 			.power_usage		= 0,
 			.name			= "powered-down",
 			.desc			= "CPU power gated",
@@ -84,5 +90,8 @@
 
 int __init tegra114_cpuidle_init(void)
 {
-	return cpuidle_register(&tegra_idle_driver, NULL);
+	if (!psci_smp_available())
+		return cpuidle_register(&tegra_idle_driver, NULL);
+
+	return 0;
 }
diff --git a/arch/arm/mach-tegra/iomap.h b/arch/arm/mach-tegra/iomap.h
index 81dc950..9e5b2f8 100644
--- a/arch/arm/mach-tegra/iomap.h
+++ b/arch/arm/mach-tegra/iomap.h
@@ -82,9 +82,6 @@
 #define TEGRA_EMC_BASE			0x7000F400
 #define TEGRA_EMC_SIZE			SZ_1K
 
-#define TEGRA_FUSE_BASE			0x7000F800
-#define TEGRA_FUSE_SIZE			SZ_1K
-
 #define TEGRA_EMC0_BASE			0x7001A000
 #define TEGRA_EMC0_SIZE			SZ_2K
 
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index d1e5ad7..0c81056 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -255,7 +255,7 @@
 }
 #endif
 
-void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn,
+static void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn,
 	unsigned long offset, size_t size, unsigned int mtype, void *caller)
 {
 	const struct mem_type *type;
@@ -363,7 +363,7 @@
 		  unsigned int mtype)
 {
 	return __arm_ioremap_pfn_caller(pfn, offset, size, mtype,
-			__builtin_return_address(0));
+					__builtin_return_address(0));
 }
 EXPORT_SYMBOL(__arm_ioremap_pfn);
 
@@ -371,13 +371,26 @@
 				      unsigned int, void *) =
 	__arm_ioremap_caller;
 
-void __iomem *
-__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype)
+void __iomem *ioremap(resource_size_t res_cookie, size_t size)
 {
-	return arch_ioremap_caller(phys_addr, size, mtype,
-		__builtin_return_address(0));
+	return arch_ioremap_caller(res_cookie, size, MT_DEVICE,
+				   __builtin_return_address(0));
 }
-EXPORT_SYMBOL(__arm_ioremap);
+EXPORT_SYMBOL(ioremap);
+
+void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size)
+{
+	return arch_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED,
+				   __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_cache);
+
+void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size)
+{
+	return arch_ioremap_caller(res_cookie, size, MT_DEVICE_WC,
+				   __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_wc);
 
 /*
  * Remap an arbitrary physical address space into the kernel virtual
@@ -431,11 +444,11 @@
 
 void (*arch_iounmap)(volatile void __iomem *) = __iounmap;
 
-void __arm_iounmap(volatile void __iomem *io_addr)
+void iounmap(volatile void __iomem *cookie)
 {
-	arch_iounmap(io_addr);
+	arch_iounmap(cookie);
 }
-EXPORT_SYMBOL(__arm_iounmap);
+EXPORT_SYMBOL(iounmap);
 
 #ifdef CONFIG_PCI
 static int pci_ioremap_mem_type = MT_DEVICE;
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 6ca7d9a..870838a 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1072,6 +1072,7 @@
 	int highmem = 0;
 	phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1;
 	struct memblock_region *reg;
+	bool should_use_highmem = false;
 
 	for_each_memblock(memory, reg) {
 		phys_addr_t block_start = reg->base;
@@ -1090,6 +1091,7 @@
 				pr_notice("Ignoring RAM at %pa-%pa (!CONFIG_HIGHMEM)\n",
 					  &block_start, &block_end);
 				memblock_remove(reg->base, reg->size);
+				should_use_highmem = true;
 				continue;
 			}
 
@@ -1100,6 +1102,7 @@
 					  &block_start, &block_end, &vmalloc_limit);
 				memblock_remove(vmalloc_limit, overlap_size);
 				block_end = vmalloc_limit;
+				should_use_highmem = true;
 			}
 		}
 
@@ -1134,6 +1137,9 @@
 		}
 	}
 
+	if (should_use_highmem)
+		pr_notice("Consider using a HIGHMEM enabled kernel.\n");
+
 	high_memory = __va(arm_lowmem_limit - 1) + 1;
 
 	/*
@@ -1494,6 +1500,7 @@
 	build_mem_type_table();
 	prepare_page_table();
 	map_lowmem();
+	memblock_set_current_limit(arm_lowmem_limit);
 	dma_contiguous_remap();
 	devicemaps_init(mdesc);
 	kmap_init();
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index afd7e05..1dd1093 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -351,30 +351,43 @@
 }
 EXPORT_SYMBOL(__arm_ioremap_pfn);
 
-void __iomem *__arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset,
-			   size_t size, unsigned int mtype, void *caller)
-{
-	return __arm_ioremap_pfn(pfn, offset, size, mtype);
-}
-
-void __iomem *__arm_ioremap(phys_addr_t phys_addr, size_t size,
-			    unsigned int mtype)
-{
-	return (void __iomem *)phys_addr;
-}
-EXPORT_SYMBOL(__arm_ioremap);
-
-void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *);
-
 void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size,
 				   unsigned int mtype, void *caller)
 {
-	return __arm_ioremap(phys_addr, size, mtype);
+	return (void __iomem *)phys_addr;
 }
 
+void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *);
+
+void __iomem *ioremap(resource_size_t res_cookie, size_t size)
+{
+	return __arm_ioremap_caller(res_cookie, size, MT_DEVICE,
+				    __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap);
+
+void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size)
+{
+	return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED,
+				    __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_cache);
+
+void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size)
+{
+	return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_WC,
+				    __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_wc);
+
+void __iounmap(volatile void __iomem *addr)
+{
+}
+EXPORT_SYMBOL(__iounmap);
+
 void (*arch_iounmap)(volatile void __iomem *);
 
-void __arm_iounmap(volatile void __iomem *addr)
+void iounmap(volatile void __iomem *addr)
 {
 }
-EXPORT_SYMBOL(__arm_iounmap);
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/arm/vdso/vdsomunge.c b/arch/arm/vdso/vdsomunge.c
index 9005b07..aedec81 100644
--- a/arch/arm/vdso/vdsomunge.c
+++ b/arch/arm/vdso/vdsomunge.c
@@ -45,13 +45,11 @@
  * it does.
  */
 
-#define _GNU_SOURCE
-
 #include <byteswap.h>
 #include <elf.h>
 #include <errno.h>
-#include <error.h>
 #include <fcntl.h>
+#include <stdarg.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -82,11 +80,25 @@
 #define EF_ARM_ABI_FLOAT_HARD 0x400
 #endif
 
+static int failed;
+static const char *argv0;
 static const char *outfile;
 
+static void fail(const char *fmt, ...)
+{
+	va_list ap;
+
+	failed = 1;
+	fprintf(stderr, "%s: ", argv0);
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	exit(EXIT_FAILURE);
+}
+
 static void cleanup(void)
 {
-	if (error_message_count > 0 && outfile != NULL)
+	if (failed && outfile != NULL)
 		unlink(outfile);
 }
 
@@ -119,68 +131,66 @@
 	int infd;
 
 	atexit(cleanup);
+	argv0 = argv[0];
 
 	if (argc != 3)
-		error(EXIT_FAILURE, 0, "Usage: %s [infile] [outfile]", argv[0]);
+		fail("Usage: %s [infile] [outfile]\n", argv[0]);
 
 	infile = argv[1];
 	outfile = argv[2];
 
 	infd = open(infile, O_RDONLY);
 	if (infd < 0)
-		error(EXIT_FAILURE, errno, "Cannot open %s", infile);
+		fail("Cannot open %s: %s\n", infile, strerror(errno));
 
 	if (fstat(infd, &stat) != 0)
-		error(EXIT_FAILURE, errno, "Failed stat for %s", infile);
+		fail("Failed stat for %s: %s\n", infile, strerror(errno));
 
 	inbuf = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, infd, 0);
 	if (inbuf == MAP_FAILED)
-		error(EXIT_FAILURE, errno, "Failed to map %s", infile);
+		fail("Failed to map %s: %s\n", infile, strerror(errno));
 
 	close(infd);
 
 	inhdr = inbuf;
 
 	if (memcmp(&inhdr->e_ident, ELFMAG, SELFMAG) != 0)
-		error(EXIT_FAILURE, 0, "Not an ELF file");
+		fail("Not an ELF file\n");
 
 	if (inhdr->e_ident[EI_CLASS] != ELFCLASS32)
-		error(EXIT_FAILURE, 0, "Unsupported ELF class");
+		fail("Unsupported ELF class\n");
 
 	swap = inhdr->e_ident[EI_DATA] != HOST_ORDER;
 
 	if (read_elf_half(inhdr->e_type, swap) != ET_DYN)
-		error(EXIT_FAILURE, 0, "Not a shared object");
+		fail("Not a shared object\n");
 
-	if (read_elf_half(inhdr->e_machine, swap) != EM_ARM) {
-		error(EXIT_FAILURE, 0, "Unsupported architecture %#x",
-		      inhdr->e_machine);
-	}
+	if (read_elf_half(inhdr->e_machine, swap) != EM_ARM)
+		fail("Unsupported architecture %#x\n", inhdr->e_machine);
 
 	e_flags = read_elf_word(inhdr->e_flags, swap);
 
 	if (EF_ARM_EABI_VERSION(e_flags) != EF_ARM_EABI_VER5) {
-		error(EXIT_FAILURE, 0, "Unsupported EABI version %#x",
-		      EF_ARM_EABI_VERSION(e_flags));
+		fail("Unsupported EABI version %#x\n",
+		     EF_ARM_EABI_VERSION(e_flags));
 	}
 
 	if (e_flags & EF_ARM_ABI_FLOAT_HARD)
-		error(EXIT_FAILURE, 0,
-		      "Unexpected hard-float flag set in e_flags");
+		fail("Unexpected hard-float flag set in e_flags\n");
 
 	clear_soft_float = !!(e_flags & EF_ARM_ABI_FLOAT_SOFT);
 
 	outfd = open(outfile, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
 	if (outfd < 0)
-		error(EXIT_FAILURE, errno, "Cannot open %s", outfile);
+		fail("Cannot open %s: %s\n", outfile, strerror(errno));
 
 	if (ftruncate(outfd, stat.st_size) != 0)
-		error(EXIT_FAILURE, errno, "Cannot truncate %s", outfile);
+		fail("Cannot truncate %s: %s\n", outfile, strerror(errno));
 
 	outbuf = mmap(NULL, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED,
 		      outfd, 0);
 	if (outbuf == MAP_FAILED)
-		error(EXIT_FAILURE, errno, "Failed to map %s", outfile);
+		fail("Failed to map %s: %s\n", outfile, strerror(errno));
 
 	close(outfd);
 
@@ -195,7 +205,7 @@
 	}
 
 	if (msync(outbuf, stat.st_size, MS_SYNC) != 0)
-		error(EXIT_FAILURE, errno, "Failed to sync %s", outfile);
+		fail("Failed to sync %s: %s\n", outfile, strerror(errno));
 
 	return EXIT_SUCCESS;
 }
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 0f6edb1..318175f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -23,9 +23,9 @@
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
 	select COMMON_CLK
-	select EDAC_SUPPORT
 	select CPU_PM if (SUSPEND || CPU_IDLE)
 	select DCACHE_WORD_ACCESS
+	select EDAC_SUPPORT
 	select GENERIC_ALLOCATOR
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
diff --git a/arch/arm64/boot/dts/apm/apm-mustang.dts b/arch/arm64/boot/dts/apm/apm-mustang.dts
index 83578e7..4c55833 100644
--- a/arch/arm64/boot/dts/apm/apm-mustang.dts
+++ b/arch/arm64/boot/dts/apm/apm-mustang.dts
@@ -23,6 +23,16 @@
 		device_type = "memory";
 		reg = < 0x1 0x00000000 0x0 0x80000000 >; /* Updated by bootloader */
 	};
+
+	gpio-keys {
+		compatible = "gpio-keys";
+		button@1 {
+			label = "POWER";
+			linux,code = <116>;
+			linux,input-type = <0x1>;
+			interrupts = <0x0 0x2d 0x1>;
+		};
+	};
 };
 
 &pcie0clk {
diff --git a/arch/arm64/boot/dts/arm/Makefile b/arch/arm64/boot/dts/arm/Makefile
index c5c98b9..bb3c072 100644
--- a/arch/arm64/boot/dts/arm/Makefile
+++ b/arch/arm64/boot/dts/arm/Makefile
@@ -1,6 +1,7 @@
 dtb-$(CONFIG_ARCH_VEXPRESS) += foundation-v8.dtb
 dtb-$(CONFIG_ARCH_VEXPRESS) += juno.dtb juno-r1.dtb
 dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb
+dtb-$(CONFIG_ARCH_VEXPRESS) += vexpress-v2f-1xv7-ca53x2.dtb
 
 always		:= $(dtb-y)
 subdir-y	:= $(dts-dirs)
diff --git a/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts b/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts
new file mode 100644
index 0000000..5b1d018
--- /dev/null
+++ b/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts
@@ -0,0 +1,191 @@
+/*
+ * ARM Ltd. Versatile Express
+ *
+ * LogicTile Express 20MG
+ * V2F-1XV7
+ *
+ * Cortex-A53 (2 cores) Soft Macrocell Model
+ *
+ * HBI-0247C
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+	model = "V2F-1XV7 Cortex-A53x2 SMM";
+	arm,hbi = <0x247>;
+	arm,vexpress,site = <0xf>;
+	compatible = "arm,vexpress,v2f-1xv7,ca53x2", "arm,vexpress,v2f-1xv7", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	chosen {
+		stdout-path = "serial0:38400n8";
+	};
+
+	aliases {
+		serial0 = &v2m_serial0;
+		serial1 = &v2m_serial1;
+		serial2 = &v2m_serial2;
+		serial3 = &v2m_serial3;
+		i2c0 = &v2m_i2c_dvi;
+		i2c1 = &v2m_i2c_pcie;
+	};
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53", "arm,armv8";
+			reg = <0 0>;
+			next-level-cache = <&L2_0>;
+		};
+
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53", "arm,armv8";
+			reg = <0 1>;
+			next-level-cache = <&L2_0>;
+		};
+
+		L2_0: l2-cache0 {
+			compatible = "cache";
+		};
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0 0x80000000 0 0x80000000>; /* 2GB @ 2GB */
+	};
+
+	gic: interrupt-controller@2c001000 {
+		compatible = "arm,gic-400";
+		#interrupt-cells = <3>;
+		#address-cells = <0>;
+		interrupt-controller;
+		reg = <0 0x2c001000 0 0x1000>,
+		      <0 0x2c002000 0 0x2000>,
+		      <0 0x2c004000 0 0x2000>,
+		      <0 0x2c006000 0 0x2000>;
+		interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_HIGH)>;
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>;
+	};
+
+	pmu {
+		compatible = "arm,armv8-pmuv3";
+		interrupts = <GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>;
+	};
+
+	dcc {
+		compatible = "arm,vexpress,config-bus";
+		arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+		smbclk: osc@4 {
+			/* SMC clock */
+			compatible = "arm,vexpress-osc";
+			arm,vexpress-sysreg,func = <1 4>;
+			freq-range = <40000000 40000000>;
+			#clock-cells = <0>;
+			clock-output-names = "smclk";
+		};
+
+		volt@0 {
+			/* VIO to expansion board above */
+			compatible = "arm,vexpress-volt";
+			arm,vexpress-sysreg,func = <2 0>;
+			regulator-name = "VIO_UP";
+			regulator-min-microvolt = <800000>;
+			regulator-max-microvolt = <1800000>;
+			regulator-always-on;
+		};
+
+		volt@1 {
+			/* 12V from power connector J6 */
+			compatible = "arm,vexpress-volt";
+			arm,vexpress-sysreg,func = <2 1>;
+			regulator-name = "12";
+			regulator-always-on;
+		};
+
+		temp@0 {
+			/* FPGA temperature */
+			compatible = "arm,vexpress-temp";
+			arm,vexpress-sysreg,func = <4 0>;
+			label = "FPGA";
+		};
+	};
+
+	smb {
+		compatible = "simple-bus";
+
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0 0x08000000 0x04000000>,
+			 <1 0 0 0x14000000 0x04000000>,
+			 <2 0 0 0x18000000 0x04000000>,
+			 <3 0 0 0x1c000000 0x04000000>,
+			 <4 0 0 0x0c000000 0x04000000>,
+			 <5 0 0 0x10000000 0x04000000>;
+
+		#interrupt-cells = <1>;
+		interrupt-map-mask = <0 0 63>;
+		interrupt-map = <0 0  0 &gic GIC_SPI  0 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  1 &gic GIC_SPI  1 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  2 &gic GIC_SPI  2 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  3 &gic GIC_SPI  3 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  4 &gic GIC_SPI  4 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  5 &gic GIC_SPI  5 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  6 &gic GIC_SPI  6 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  7 &gic GIC_SPI  7 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  8 &gic GIC_SPI  8 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0  9 &gic GIC_SPI  9 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 10 &gic GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 11 &gic GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 12 &gic GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 13 &gic GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 14 &gic GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 15 &gic GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 16 &gic GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 17 &gic GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 18 &gic GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 19 &gic GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 20 &gic GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 21 &gic GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 22 &gic GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 23 &gic GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 24 &gic GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 25 &gic GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 26 &gic GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 27 &gic GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 28 &gic GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 29 &gic GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 30 &gic GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 31 &gic GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 32 &gic GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 33 &gic GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 34 &gic GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 35 &gic GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 36 &gic GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 37 &gic GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 38 &gic GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 39 &gic GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 40 &gic GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 41 &gic GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
+				<0 0 42 &gic GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>;
+
+		/include/ "../../../../arm/boot/dts/vexpress-v2m-rs1.dtsi"
+	};
+};
diff --git a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi
index d8c0bdc..9cb7cf9 100644
--- a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi
+++ b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi
@@ -376,10 +376,19 @@
 		gic0: interrupt-controller@8010,00000000 {
 			compatible = "arm,gic-v3";
 			#interrupt-cells = <3>;
+			#address-cells = <2>;
+			#size-cells = <2>;
+			ranges;
 			interrupt-controller;
 			reg = <0x8010 0x00000000 0x0 0x010000>, /* GICD */
 			      <0x8010 0x80000000 0x0 0x600000>; /* GICR */
 			interrupts = <1 9 0xf04>;
+
+			its: gic-its@8010,00020000 {
+				compatible = "arm,gic-v3-its";
+				msi-controller;
+				reg = <0x8010 0x20000 0x0 0x200000>;
+			};
 		};
 
 		uaa0: serial@87e0,24000000 {
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index f38c94f..4e17e7e 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -83,6 +83,7 @@
 CONFIG_ATA=y
 CONFIG_SATA_AHCI=y
 CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_AHCI_CEVA=y
 CONFIG_AHCI_XGENE=y
 CONFIG_PATA_PLATFORM=y
 CONFIG_PATA_OF_PLATFORM=y
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 39248d3..406485e 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -19,6 +19,14 @@
 #include <asm/psci.h>
 #include <asm/smp_plat.h>
 
+/* Macros for consistency checks of the GICC subtable of MADT */
+#define ACPI_MADT_GICC_LENGTH	\
+	(acpi_gbl_FADT.header.revision < 6 ? 76 : 80)
+
+#define BAD_MADT_GICC_ENTRY(entry, end)						\
+	(!(entry) || (unsigned long)(entry) + sizeof(*(entry)) > (end) ||	\
+	 (entry)->header.length != ACPI_MADT_GICC_LENGTH)
+
 /* Basic configuration for ACPI */
 #ifdef	CONFIG_ACPI
 /* ACPI table mapping after acpi_gbl_permanent_mmap is set */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index a7691a3..f860bfd 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -352,8 +352,8 @@
 	// TODO: add support for undefined instructions in kernel mode
 	enable_dbg
 	mov	x0, sp
+	mov	x2, x1
 	mov	x1, #BAD_SYNC
-	mrs	x2, esr_el1
 	b	bad_mode
 ENDPROC(el1_sync)
 
@@ -553,7 +553,7 @@
 	ct_user_exit
 	mov	x0, sp
 	mov	x1, #BAD_SYNC
-	mrs	x2, esr_el1
+	mov	x2, x25
 	bl	bad_mode
 	b	ret_to_user
 ENDPROC(el0_sync)
diff --git a/arch/arm64/kernel/entry32.S b/arch/arm64/kernel/entry32.S
index bd9bfaa..f332d5d 100644
--- a/arch/arm64/kernel/entry32.S
+++ b/arch/arm64/kernel/entry32.S
@@ -32,13 +32,11 @@
 
 ENTRY(compat_sys_sigreturn_wrapper)
 	mov	x0, sp
-	mov	x27, #0		// prevent syscall restart handling (why)
 	b	compat_sys_sigreturn
 ENDPROC(compat_sys_sigreturn_wrapper)
 
 ENTRY(compat_sys_rt_sigreturn_wrapper)
 	mov	x0, sp
-	mov	x27, #0		// prevent syscall restart handling (why)
 	b	compat_sys_rt_sigreturn
 ENDPROC(compat_sys_rt_sigreturn_wrapper)
 
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 695801a..50fb469 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -438,7 +438,7 @@
 	struct acpi_madt_generic_interrupt *processor;
 
 	processor = (struct acpi_madt_generic_interrupt *)header;
-	if (BAD_MADT_ENTRY(processor, end))
+	if (BAD_MADT_GICC_ENTRY(processor, end))
 		return -EINVAL;
 
 	acpi_table_print_madt_entry(header);
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 9d84feb..773d37a 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -4,5 +4,3 @@
 				   context.o proc.o pageattr.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_ARM64_PTDUMP)	+= dump.o
-
-CFLAGS_mmu.o			:= -I$(srctree)/scripts/dtc/libfdt/
diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c
index 4dda9bd..e989cee 100644
--- a/arch/cris/arch-v32/drivers/sync_serial.c
+++ b/arch/cris/arch-v32/drivers/sync_serial.c
@@ -1464,7 +1464,7 @@
 		if (port->write_ts_idx == NBR_IN_DESCR)
 			port->write_ts_idx = 0;
 		idx = port->write_ts_idx++;
-		do_posix_clock_monotonic_gettime(&port->timestamp[idx]);
+		ktime_get_ts(&port->timestamp[idx]);
 		port->in_buffer_len += port->inbufchunk;
 	}
 	spin_unlock_irqrestore(&port->lock, flags);
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 2a14585..3a3548f 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -118,6 +118,7 @@
 
 config ATH79
 	bool "Atheros AR71XX/AR724X/AR913X based boards"
+	select ARCH_HAS_RESET_CONTROLLER
 	select ARCH_REQUIRE_GPIOLIB
 	select BOOT_RAW
 	select CEVT_R4K
@@ -2231,7 +2232,7 @@
 
 config MIPS_CPS
 	bool "MIPS Coherent Processing System support"
-	depends on SYS_SUPPORTS_MIPS_CPS && !64BIT
+	depends on SYS_SUPPORTS_MIPS_CPS
 	select MIPS_CM
 	select MIPS_CPC
 	select MIPS_CPS_PM if HOTPLUG_CPU
diff --git a/arch/mips/boot/dts/qca/ar9132.dtsi b/arch/mips/boot/dts/qca/ar9132.dtsi
index 4759cff..fb7734e 100644
--- a/arch/mips/boot/dts/qca/ar9132.dtsi
+++ b/arch/mips/boot/dts/qca/ar9132.dtsi
@@ -115,6 +115,14 @@
 				interrupt-controller;
 				#interrupt-cells = <1>;
 			};
+
+			rst: reset-controller@1806001c {
+				compatible = "qca,ar9132-reset",
+						"qca,ar7100-reset";
+				reg = <0x1806001c 0x4>;
+
+				#reset-cells = <1>;
+			};
 		};
 
 		spi@1f000000 {
diff --git a/arch/mips/include/asm/mach-loongson64/mmzone.h b/arch/mips/include/asm/mach-loongson64/mmzone.h
index 37c08a2..c9f7e23 100644
--- a/arch/mips/include/asm/mach-loongson64/mmzone.h
+++ b/arch/mips/include/asm/mach-loongson64/mmzone.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2010 Loongson Inc. & Lemote Inc. &
- *                    Insititute of Computing Technology
+ *                    Institute of Computing Technology
  * Author:  Xiang Gao, gaoxiang@ict.ac.cn
  *          Huacai Chen, chenhc@lemote.com
  *          Xiaofu Meng, Shuangshuang Zhang
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index 2b25d1b..16f1ea9 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -23,6 +23,7 @@
 extern int smp_num_siblings;
 extern cpumask_t cpu_sibling_map[];
 extern cpumask_t cpu_core_map[];
+extern cpumask_t cpu_foreign_map;
 
 #define raw_smp_processor_id() (current_thread_info()->cpu)
 
diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index c0c5e59..d8f9b35 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -600,7 +600,7 @@
 		break;
 
 	case blezl_op: /* not really i_format */
-		if (NO_R6EMU)
+		if (!insn.i_format.rt && NO_R6EMU)
 			goto sigill_r6;
 	case blez_op:
 		/*
@@ -635,7 +635,7 @@
 		break;
 
 	case bgtzl_op:
-		if (NO_R6EMU)
+		if (!insn.i_format.rt && NO_R6EMU)
 			goto sigill_r6;
 	case bgtz_op:
 		/*
diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S
index 55b759a..1b6ca63 100644
--- a/arch/mips/kernel/cps-vec.S
+++ b/arch/mips/kernel/cps-vec.S
@@ -60,7 +60,7 @@
 	 nop
 
 	/* This is an NMI */
-	la	k0, nmi_handler
+	PTR_LA	k0, nmi_handler
 	jr	k0
 	 nop
 
@@ -107,10 +107,10 @@
 	mul	t1, t1, t0
 	mul	t1, t1, t2
 
-	li	a0, KSEG0
-	add	a1, a0, t1
+	li	a0, CKSEG0
+	PTR_ADD	a1, a0, t1
 1:	cache	Index_Store_Tag_I, 0(a0)
-	add	a0, a0, t0
+	PTR_ADD	a0, a0, t0
 	bne	a0, a1, 1b
 	 nop
 icache_done:
@@ -134,12 +134,12 @@
 	mul	t1, t1, t0
 	mul	t1, t1, t2
 
-	li	a0, KSEG0
-	addu	a1, a0, t1
-	subu	a1, a1, t0
+	li	a0, CKSEG0
+	PTR_ADDU a1, a0, t1
+	PTR_SUBU a1, a1, t0
 1:	cache	Index_Store_Tag_D, 0(a0)
 	bne	a0, a1, 1b
-	 add	a0, a0, t0
+	 PTR_ADD a0, a0, t0
 dcache_done:
 
 	/* Set Kseg0 CCA to that in s0 */
@@ -152,11 +152,11 @@
 
 	/* Enter the coherent domain */
 	li	t0, 0xff
-	sw	t0, GCR_CL_COHERENCE_OFS(v1)
+	PTR_S	t0, GCR_CL_COHERENCE_OFS(v1)
 	ehb
 
 	/* Jump to kseg0 */
-	la	t0, 1f
+	PTR_LA	t0, 1f
 	jr	t0
 	 nop
 
@@ -178,9 +178,9 @@
 	 nop
 
 	/* Off we go! */
-	lw	t1, VPEBOOTCFG_PC(v0)
-	lw	gp, VPEBOOTCFG_GP(v0)
-	lw	sp, VPEBOOTCFG_SP(v0)
+	PTR_L	t1, VPEBOOTCFG_PC(v0)
+	PTR_L	gp, VPEBOOTCFG_GP(v0)
+	PTR_L	sp, VPEBOOTCFG_SP(v0)
 	jr	t1
 	 nop
 	END(mips_cps_core_entry)
@@ -217,7 +217,7 @@
 
 .org 0x480
 LEAF(excep_ejtag)
-	la	k0, ejtag_debug_handler
+	PTR_LA	k0, ejtag_debug_handler
 	jr	k0
 	 nop
 	END(excep_ejtag)
@@ -229,7 +229,7 @@
 	 nop
 
 	.set	push
-	.set	mips32r2
+	.set	mips64r2
 	.set	mt
 
 	/* Only allow 1 TC per VPE to execute... */
@@ -237,7 +237,7 @@
 
 	/* ...and for the moment only 1 VPE */
 	dvpe
-	la	t1, 1f
+	PTR_LA	t1, 1f
 	jr.hb	t1
 	 nop
 
@@ -250,25 +250,25 @@
 	mfc0	t0, CP0_MVPCONF0
 	srl	t0, t0, MVPCONF0_PVPE_SHIFT
 	andi	t0, t0, (MVPCONF0_PVPE >> MVPCONF0_PVPE_SHIFT)
-	addiu	t7, t0, 1
+	addiu	ta3, t0, 1
 
 	/* If there's only 1, we're done */
 	beqz	t0, 2f
 	 nop
 
 	/* Loop through each VPE within this core */
-	li	t5, 1
+	li	ta1, 1
 
 1:	/* Operate on the appropriate TC */
-	mtc0	t5, CP0_VPECONTROL
+	mtc0	ta1, CP0_VPECONTROL
 	ehb
 
 	/* Bind TC to VPE (1:1 TC:VPE mapping) */
-	mttc0	t5, CP0_TCBIND
+	mttc0	ta1, CP0_TCBIND
 
 	/* Set exclusive TC, non-active, master */
 	li	t0, VPECONF0_MVP
-	sll	t1, t5, VPECONF0_XTC_SHIFT
+	sll	t1, ta1, VPECONF0_XTC_SHIFT
 	or	t0, t0, t1
 	mttc0	t0, CP0_VPECONF0
 
@@ -280,8 +280,8 @@
 	mttc0	t0, CP0_TCHALT
 
 	/* Next VPE */
-	addiu	t5, t5, 1
-	slt	t0, t5, t7
+	addiu	ta1, ta1, 1
+	slt	t0, ta1, ta3
 	bnez	t0, 1b
 	 nop
 
@@ -298,19 +298,19 @@
 
 LEAF(mips_cps_boot_vpes)
 	/* Retrieve CM base address */
-	la	t0, mips_cm_base
-	lw	t0, 0(t0)
+	PTR_LA	t0, mips_cm_base
+	PTR_L	t0, 0(t0)
 
 	/* Calculate a pointer to this cores struct core_boot_config */
-	lw	t0, GCR_CL_ID_OFS(t0)
+	PTR_L	t0, GCR_CL_ID_OFS(t0)
 	li	t1, COREBOOTCFG_SIZE
 	mul	t0, t0, t1
-	la	t1, mips_cps_core_bootcfg
-	lw	t1, 0(t1)
-	addu	t0, t0, t1
+	PTR_LA	t1, mips_cps_core_bootcfg
+	PTR_L	t1, 0(t1)
+	PTR_ADDU t0, t0, t1
 
 	/* Calculate this VPEs ID. If the core doesn't support MT use 0 */
-	has_mt	t6, 1f
+	has_mt	ta2, 1f
 	 li	t9, 0
 
 	/* Find the number of VPEs present in the core */
@@ -334,24 +334,24 @@
 1:	/* Calculate a pointer to this VPEs struct vpe_boot_config */
 	li	t1, VPEBOOTCFG_SIZE
 	mul	v0, t9, t1
-	lw	t7, COREBOOTCFG_VPECONFIG(t0)
-	addu	v0, v0, t7
+	PTR_L	ta3, COREBOOTCFG_VPECONFIG(t0)
+	PTR_ADDU v0, v0, ta3
 
 #ifdef CONFIG_MIPS_MT
 
 	/* If the core doesn't support MT then return */
-	bnez	t6, 1f
+	bnez	ta2, 1f
 	 nop
 	jr	ra
 	 nop
 
 	.set	push
-	.set	mips32r2
+	.set	mips64r2
 	.set	mt
 
 1:	/* Enter VPE configuration state */
 	dvpe
-	la	t1, 1f
+	PTR_LA	t1, 1f
 	jr.hb	t1
 	 nop
 1:	mfc0	t1, CP0_MVPCONTROL
@@ -360,12 +360,12 @@
 	ehb
 
 	/* Loop through each VPE */
-	lw	t6, COREBOOTCFG_VPEMASK(t0)
-	move	t8, t6
-	li	t5, 0
+	PTR_L	ta2, COREBOOTCFG_VPEMASK(t0)
+	move	t8, ta2
+	li	ta1, 0
 
 	/* Check whether the VPE should be running. If not, skip it */
-1:	andi	t0, t6, 1
+1:	andi	t0, ta2, 1
 	beqz	t0, 2f
 	 nop
 
@@ -373,7 +373,7 @@
 	mfc0	t0, CP0_VPECONTROL
 	ori	t0, t0, VPECONTROL_TARGTC
 	xori	t0, t0, VPECONTROL_TARGTC
-	or	t0, t0, t5
+	or	t0, t0, ta1
 	mtc0	t0, CP0_VPECONTROL
 	ehb
 
@@ -384,8 +384,8 @@
 
 	/* Calculate a pointer to the VPEs struct vpe_boot_config */
 	li	t0, VPEBOOTCFG_SIZE
-	mul	t0, t0, t5
-	addu	t0, t0, t7
+	mul	t0, t0, ta1
+	addu	t0, t0, ta3
 
 	/* Set the TC restart PC */
 	lw	t1, VPEBOOTCFG_PC(t0)
@@ -423,9 +423,9 @@
 	mttc0	t0, CP0_VPECONF0
 
 	/* Next VPE */
-2:	srl	t6, t6, 1
-	addiu	t5, t5, 1
-	bnez	t6, 1b
+2:	srl	ta2, ta2, 1
+	addiu	ta1, ta1, 1
+	bnez	ta2, 1b
 	 nop
 
 	/* Leave VPE configuration state */
@@ -445,7 +445,7 @@
 	/* This VPE should be offline, halt the TC */
 	li	t0, TCHALT_H
 	mtc0	t0, CP0_TCHALT
-	la	t0, 1f
+	PTR_LA	t0, 1f
 1:	jr.hb	t0
 	 nop
 
@@ -466,10 +466,10 @@
 	.set	noat
 	lw	$1, TI_CPU(gp)
 	sll	$1, $1, LONGLOG
-	la	\dest, __per_cpu_offset
+	PTR_LA	\dest, __per_cpu_offset
 	addu	$1, $1, \dest
 	lw	$1, 0($1)
-	la	\dest, cps_cpu_state
+	PTR_LA	\dest, cps_cpu_state
 	addu	\dest, \dest, $1
 	.set	pop
 	.endm
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 6e8de80..4cc1350 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -73,10 +73,11 @@
 	.set    noreorder
 	.set	nomacro
 
-1:	user_lw(t5, 16(t0))		# argument #5 from usp
-4:	user_lw(t6, 20(t0))		# argument #6 from usp
-3:	user_lw(t7, 24(t0))		# argument #7 from usp
-2:	user_lw(t8, 28(t0))		# argument #8 from usp
+load_a4: user_lw(t5, 16(t0))		# argument #5 from usp
+load_a5: user_lw(t6, 20(t0))		# argument #6 from usp
+load_a6: user_lw(t7, 24(t0))		# argument #7 from usp
+load_a7: user_lw(t8, 28(t0))		# argument #8 from usp
+loads_done:
 
 	sw	t5, 16(sp)		# argument #5 to ksp
 	sw	t6, 20(sp)		# argument #6 to ksp
@@ -85,10 +86,10 @@
 	.set	pop
 
 	.section __ex_table,"a"
-	PTR	1b,bad_stack
-	PTR	2b,bad_stack
-	PTR	3b,bad_stack
-	PTR	4b,bad_stack
+	PTR	load_a4, bad_stack_a4
+	PTR	load_a5, bad_stack_a5
+	PTR	load_a6, bad_stack_a6
+	PTR	load_a7, bad_stack_a7
 	.previous
 
 	lw	t0, TI_FLAGS($28)	# syscall tracing enabled?
@@ -153,8 +154,8 @@
 /* ------------------------------------------------------------------------ */
 
 	/*
-	 * The stackpointer for a call with more than 4 arguments is bad.
-	 * We probably should handle this case a bit more drastic.
+	 * Our open-coded access area sanity test for the stack pointer
+	 * failed. We probably should handle this case a bit more drastic.
 	 */
 bad_stack:
 	li	v0, EFAULT
@@ -163,6 +164,22 @@
 	sw	t0, PT_R7(sp)
 	j	o32_syscall_exit
 
+bad_stack_a4:
+	li	t5, 0
+	b	load_a5
+
+bad_stack_a5:
+	li	t6, 0
+	b	load_a6
+
+bad_stack_a6:
+	li	t7, 0
+	b	load_a7
+
+bad_stack_a7:
+	li	t8, 0
+	b	loads_done
+
 	/*
 	 * The system call does not exist in this kernel
 	 */
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index d07b210..f543ff4 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -69,16 +69,17 @@
 	daddu	t1, t0, 32
 	bltz	t1, bad_stack
 
-1:	lw	a4, 16(t0)		# argument #5 from usp
-2:	lw	a5, 20(t0)		# argument #6 from usp
-3:	lw	a6, 24(t0)		# argument #7 from usp
-4:	lw	a7, 28(t0)		# argument #8 from usp (for indirect syscalls)
+load_a4: lw	a4, 16(t0)		# argument #5 from usp
+load_a5: lw	a5, 20(t0)		# argument #6 from usp
+load_a6: lw	a6, 24(t0)		# argument #7 from usp
+load_a7: lw	a7, 28(t0)		# argument #8 from usp
+loads_done:
 
 	.section __ex_table,"a"
-	PTR	1b, bad_stack
-	PTR	2b, bad_stack
-	PTR	3b, bad_stack
-	PTR	4b, bad_stack
+	PTR	load_a4, bad_stack_a4
+	PTR	load_a5, bad_stack_a5
+	PTR	load_a6, bad_stack_a6
+	PTR	load_a7, bad_stack_a7
 	.previous
 
 	li	t1, _TIF_WORK_SYSCALL_ENTRY
@@ -167,6 +168,22 @@
 	sd	t0, PT_R7(sp)
 	j	o32_syscall_exit
 
+bad_stack_a4:
+	li	a4, 0
+	b	load_a5
+
+bad_stack_a5:
+	li	a5, 0
+	b	load_a6
+
+bad_stack_a6:
+	li	a6, 0
+	b	load_a7
+
+bad_stack_a7:
+	li	a7, 0
+	b	loads_done
+
 not_o32_scall:
 	/*
 	 * This is not an o32 compatibility syscall, pass it on
@@ -383,7 +400,7 @@
 	PTR	sys_connect			/* 4170 */
 	PTR	sys_getpeername
 	PTR	sys_getsockname
-	PTR	sys_getsockopt
+	PTR	compat_sys_getsockopt
 	PTR	sys_listen
 	PTR	compat_sys_recv			/* 4175 */
 	PTR	compat_sys_recvfrom
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index be73c49..008b337 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -337,6 +337,11 @@
 			min_low_pfn = start;
 		if (end <= reserved_end)
 			continue;
+#ifdef CONFIG_BLK_DEV_INITRD
+		/* mapstart should be after initrd_end */
+		if (initrd_end && end <= (unsigned long)PFN_UP(__pa(initrd_end)))
+			continue;
+#endif
 		if (start >= mapstart)
 			continue;
 		mapstart = max(reserved_end, start);
@@ -366,14 +371,6 @@
 		max_low_pfn = PFN_DOWN(HIGHMEM_START);
 	}
 
-#ifdef CONFIG_BLK_DEV_INITRD
-	/*
-	 * mapstart should be after initrd_end
-	 */
-	if (initrd_end)
-		mapstart = max(mapstart, (unsigned long)PFN_UP(__pa(initrd_end)));
-#endif
-
 	/*
 	 * Initialize the boot-time allocator with low memory only.
 	 */
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 4251d39..c889377 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -133,7 +133,7 @@
 	/*
 	 * Patch the start of mips_cps_core_entry to provide:
 	 *
-	 * v0 = CM base address
+	 * v1 = CM base address
 	 * s0 = kseg0 CCA
 	 */
 	entry_code = (u32 *)&mips_cps_core_entry;
@@ -369,7 +369,7 @@
 
 static void wait_for_sibling_halt(void *ptr_cpu)
 {
-	unsigned cpu = (unsigned)ptr_cpu;
+	unsigned cpu = (unsigned long)ptr_cpu;
 	unsigned vpe_id = cpu_vpe_id(&cpu_data[cpu]);
 	unsigned halted;
 	unsigned long flags;
@@ -430,7 +430,7 @@
 		 */
 		err = smp_call_function_single(cpu_death_sibling,
 					       wait_for_sibling_halt,
-					       (void *)cpu, 1);
+					       (void *)(unsigned long)cpu, 1);
 		if (err)
 			panic("Failed to call remote sibling CPU\n");
 	}
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index faa46eb..d0744cc 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -63,6 +63,13 @@
 cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(cpu_core_map);
 
+/*
+ * A logcal cpu mask containing only one VPE per core to
+ * reduce the number of IPIs on large MT systems.
+ */
+cpumask_t cpu_foreign_map __read_mostly;
+EXPORT_SYMBOL(cpu_foreign_map);
+
 /* representing cpus for which sibling maps can be computed */
 static cpumask_t cpu_sibling_setup_map;
 
@@ -103,6 +110,29 @@
 	}
 }
 
+/*
+ * Calculate a new cpu_foreign_map mask whenever a
+ * new cpu appears or disappears.
+ */
+static inline void calculate_cpu_foreign_map(void)
+{
+	int i, k, core_present;
+	cpumask_t temp_foreign_map;
+
+	/* Re-calculate the mask */
+	for_each_online_cpu(i) {
+		core_present = 0;
+		for_each_cpu(k, &temp_foreign_map)
+			if (cpu_data[i].package == cpu_data[k].package &&
+			    cpu_data[i].core == cpu_data[k].core)
+				core_present = 1;
+		if (!core_present)
+			cpumask_set_cpu(i, &temp_foreign_map);
+	}
+
+	cpumask_copy(&cpu_foreign_map, &temp_foreign_map);
+}
+
 struct plat_smp_ops *mp_ops;
 EXPORT_SYMBOL(mp_ops);
 
@@ -146,6 +176,8 @@
 	set_cpu_sibling_map(cpu);
 	set_cpu_core_map(cpu);
 
+	calculate_cpu_foreign_map();
+
 	cpumask_set_cpu(cpu, &cpu_callin_map);
 
 	synchronise_count_slave(cpu);
@@ -173,9 +205,18 @@
 static void stop_this_cpu(void *dummy)
 {
 	/*
-	 * Remove this CPU:
+	 * Remove this CPU. Be a bit slow here and
+	 * set the bits for every online CPU so we don't miss
+	 * any IPI whilst taking this VPE down.
 	 */
+
+	cpumask_copy(&cpu_foreign_map, cpu_online_mask);
+
+	/* Make it visible to every other CPU */
+	smp_mb();
+
 	set_cpu_online(smp_processor_id(), false);
+	calculate_cpu_foreign_map();
 	local_irq_disable();
 	while (1);
 }
@@ -197,6 +238,7 @@
 	mp_ops->prepare_cpus(max_cpus);
 	set_cpu_sibling_map(0);
 	set_cpu_core_map(0);
+	calculate_cpu_foreign_map();
 #ifndef CONFIG_HOTPLUG_CPU
 	init_cpu_present(cpu_possible_mask);
 #endif
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 2a7b38e..e207a43 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2130,10 +2130,10 @@
 	BUG_ON(current->mm);
 	enter_lazy_tlb(&init_mm, current);
 
-		/* Boot CPU's cache setup in setup_arch(). */
-		if (!is_boot_cpu)
-			cpu_cache_init();
-		tlb_init();
+	/* Boot CPU's cache setup in setup_arch(). */
+	if (!is_boot_cpu)
+		cpu_cache_init();
+	tlb_init();
 	TLBMISS_HANDLER_SETUP();
 }
 
diff --git a/arch/mips/loongson64/common/bonito-irq.c b/arch/mips/loongson64/common/bonito-irq.c
index cc0e4fd..4e116d2 100644
--- a/arch/mips/loongson64/common/bonito-irq.c
+++ b/arch/mips/loongson64/common/bonito-irq.c
@@ -3,7 +3,7 @@
  * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net
  * Copyright (C) 2000, 2001 Ralf Baechle (ralf@gnu.org)
  *
- * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
  * Author: Fuxin Zhang, zhangfx@lemote.com
  *
  *  This program is free software; you can redistribute	 it and/or modify it
diff --git a/arch/mips/loongson64/common/cmdline.c b/arch/mips/loongson64/common/cmdline.c
index 72fed00..01fbed1 100644
--- a/arch/mips/loongson64/common/cmdline.c
+++ b/arch/mips/loongson64/common/cmdline.c
@@ -6,7 +6,7 @@
  * Copyright 2003 ICT CAS
  * Author: Michael Guo <guoyi@ict.ac.cn>
  *
- * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
  * Author: Fuxin Zhang, zhangfx@lemote.com
  *
  * Copyright (C) 2009 Lemote Inc.
diff --git a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c b/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c
index 12c75db..8750370 100644
--- a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c
+++ b/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c
@@ -1,7 +1,7 @@
 /*
  * CS5536 General timer functions
  *
- * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
  * Author: Yanhua, yanh@lemote.com
  *
  * Copyright (C) 2009 Lemote Inc.
diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/common/env.c
index 22f04ca..f6c44dd 100644
--- a/arch/mips/loongson64/common/env.c
+++ b/arch/mips/loongson64/common/env.c
@@ -6,7 +6,7 @@
  * Copyright 2003 ICT CAS
  * Author: Michael Guo <guoyi@ict.ac.cn>
  *
- * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
  * Author: Fuxin Zhang, zhangfx@lemote.com
  *
  * Copyright (C) 2009 Lemote Inc.
diff --git a/arch/mips/loongson64/common/irq.c b/arch/mips/loongson64/common/irq.c
index 687003b..d36d969 100644
--- a/arch/mips/loongson64/common/irq.c
+++ b/arch/mips/loongson64/common/irq.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
  * Author: Fuxin Zhang, zhangfx@lemote.com
  *
  *  This program is free software; you can redistribute	 it and/or modify it
diff --git a/arch/mips/loongson64/common/setup.c b/arch/mips/loongson64/common/setup.c
index d477dd6..2dc5122 100644
--- a/arch/mips/loongson64/common/setup.c
+++ b/arch/mips/loongson64/common/setup.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
  * Author: Fuxin Zhang, zhangfx@lemote.com
  *
  *  This program is free software; you can redistribute	 it and/or modify it
diff --git a/arch/mips/loongson64/fuloong-2e/irq.c b/arch/mips/loongson64/fuloong-2e/irq.c
index ef5ec8f..892963f8 100644
--- a/arch/mips/loongson64/fuloong-2e/irq.c
+++ b/arch/mips/loongson64/fuloong-2e/irq.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology
+ * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology
  * Author: Fuxin Zhang, zhangfx@lemote.com
  *
  *  This program is free software; you can redistribute	 it and/or modify it
diff --git a/arch/mips/loongson64/lemote-2f/clock.c b/arch/mips/loongson64/lemote-2f/clock.c
index 462e34d..a78fb65 100644
--- a/arch/mips/loongson64/lemote-2f/clock.c
+++ b/arch/mips/loongson64/lemote-2f/clock.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2006 - 2008 Lemote Inc. & Insititute of Computing Technology
+ * Copyright (C) 2006 - 2008 Lemote Inc. & Institute of Computing Technology
  * Author: Yanhua, yanh@lemote.com
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -15,7 +15,7 @@
 #include <linux/spinlock.h>
 
 #include <asm/clock.h>
-#include <asm/mach-loongson/loongson.h>
+#include <asm/mach-loongson64/loongson.h>
 
 static LIST_HEAD(clock_list);
 static DEFINE_SPINLOCK(clock_lock);
diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c
index 12d14ed..6f9e010 100644
--- a/arch/mips/loongson64/loongson-3/numa.c
+++ b/arch/mips/loongson64/loongson-3/numa.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2010 Loongson Inc. & Lemote Inc. &
- *                    Insititute of Computing Technology
+ *                    Institute of Computing Technology
  * Author:  Xiang Gao, gaoxiang@ict.ac.cn
  *          Huacai Chen, chenhc@lemote.com
  *          Xiaofu Meng, Shuangshuang Zhang
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 22b9b2c..712f17a 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -451,7 +451,7 @@
 			/* Fall through */
 		case jr_op:
 			/* For R6, JR already emulated in jalr_op */
-			if (NO_R6EMU && insn.r_format.opcode == jr_op)
+			if (NO_R6EMU && insn.r_format.func == jr_op)
 				break;
 			*contpc = regs->regs[insn.r_format.rs];
 			return 1;
@@ -551,7 +551,7 @@
 				dec_insn.next_pc_inc;
 		return 1;
 	case blezl_op:
-		if (NO_R6EMU)
+		if (!insn.i_format.rt && NO_R6EMU)
 			break;
 	case blez_op:
 
@@ -588,7 +588,7 @@
 				dec_insn.next_pc_inc;
 		return 1;
 	case bgtzl_op:
-		if (NO_R6EMU)
+		if (!insn.i_format.rt && NO_R6EMU)
 			break;
 	case bgtz_op:
 		/*
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 7f660dc..fbea443 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -37,6 +37,7 @@
 #include <asm/cacheflush.h> /* for run_uncached() */
 #include <asm/traps.h>
 #include <asm/dma-coherence.h>
+#include <asm/mips-cm.h>
 
 /*
  * Special Variant of smp_call_function for use by cache functions:
@@ -51,9 +52,16 @@
 {
 	preempt_disable();
 
-#ifndef CONFIG_MIPS_MT_SMP
-	smp_call_function(func, info, 1);
-#endif
+	/*
+	 * The Coherent Manager propagates address-based cache ops to other
+	 * cores but not index-based ops. However, r4k_on_each_cpu is used
+	 * in both cases so there is no easy way to tell what kind of op is
+	 * executed to the other cores. The best we can probably do is
+	 * to restrict that call when a CM is not present because both
+	 * CM-based SMP protocols (CMP & CPS) restrict index-based cache ops.
+	 */
+	if (!mips_cm_present())
+		smp_call_function_many(&cpu_foreign_map, func, info, 1);
 	func(info);
 	preempt_enable();
 }
@@ -937,7 +945,9 @@
 }
 
 static char *way_string[] = { NULL, "direct mapped", "2-way",
-	"3-way", "4-way", "5-way", "6-way", "7-way", "8-way"
+	"3-way", "4-way", "5-way", "6-way", "7-way", "8-way",
+	"9-way", "10-way", "11-way", "12-way",
+	"13-way", "14-way", "15-way", "16-way",
 };
 
 static void probe_pcache(void)
diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c
index 185e682..5625b19 100644
--- a/arch/mips/mti-malta/malta-time.c
+++ b/arch/mips/mti-malta/malta-time.c
@@ -119,18 +119,24 @@
 
 int get_c0_fdc_int(void)
 {
-	int mips_cpu_fdc_irq;
+	/*
+	 * Some cores claim the FDC is routable through the GIC, but it doesn't
+	 * actually seem to be connected for those Malta bitstreams.
+	 */
+	switch (current_cpu_type()) {
+	case CPU_INTERAPTIV:
+	case CPU_PROAPTIV:
+		return -1;
+	};
 
 	if (cpu_has_veic)
-		mips_cpu_fdc_irq = -1;
+		return -1;
 	else if (gic_present)
-		mips_cpu_fdc_irq = gic_get_c0_fdc_int();
+		return gic_get_c0_fdc_int();
 	else if (cp0_fdc_irq >= 0)
-		mips_cpu_fdc_irq = MIPS_CPU_IRQ_BASE + cp0_fdc_irq;
+		return MIPS_CPU_IRQ_BASE + cp0_fdc_irq;
 	else
-		mips_cpu_fdc_irq = -1;
-
-	return mips_cpu_fdc_irq;
+		return -1;
 }
 
 int get_c0_perfcount_int(void)
diff --git a/arch/mips/pistachio/init.c b/arch/mips/pistachio/init.c
index d2dc836..8bd8ebb 100644
--- a/arch/mips/pistachio/init.c
+++ b/arch/mips/pistachio/init.c
@@ -63,13 +63,19 @@
 	plat_setup_iocoherency();
 }
 
-#define DEFAULT_CPC_BASE_ADDR 0x1bde0000
+#define DEFAULT_CPC_BASE_ADDR	0x1bde0000
+#define DEFAULT_CDMM_BASE_ADDR	0x1bdd0000
 
 phys_addr_t mips_cpc_default_phys_base(void)
 {
 	return DEFAULT_CPC_BASE_ADDR;
 }
 
+phys_addr_t mips_cdmm_phys_base(void)
+{
+	return DEFAULT_CDMM_BASE_ADDR;
+}
+
 static void __init mips_nmi_setup(void)
 {
 	void *base;
diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c
index 67889fc..7c73fcb 100644
--- a/arch/mips/pistachio/time.c
+++ b/arch/mips/pistachio/time.c
@@ -27,6 +27,11 @@
 	return gic_get_c0_perfcount_int();
 }
 
+int get_c0_fdc_int(void)
+{
+	return gic_get_c0_fdc_int();
+}
+
 void __init plat_time_init(void)
 {
 	struct device_node *np;
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 0a18375..f93c4a4 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -16,7 +16,7 @@
 #include <asm/processor.h>
 #include <asm/cache.h>
 
-extern spinlock_t pa_dbit_lock;
+extern spinlock_t pa_tlb_lock;
 
 /*
  * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
@@ -33,6 +33,19 @@
  */
 #define kern_addr_valid(addr)	(1)
 
+/* Purge data and instruction TLB entries.  Must be called holding
+ * the pa_tlb_lock.  The TLB purge instructions are slow on SMP
+ * machines since the purge must be broadcast to all CPUs.
+ */
+
+static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
+{
+	mtsp(mm->context, 1);
+	pdtlb(addr);
+	if (unlikely(split_tlb))
+		pitlb(addr);
+}
+
 /* Certain architectures need to do special things when PTEs
  * within a page table are directly modified.  Thus, the following
  * hook is made available.
@@ -42,15 +55,20 @@
                 *(pteptr) = (pteval);                           \
         } while(0)
 
-extern void purge_tlb_entries(struct mm_struct *, unsigned long);
+#define pte_inserted(x)						\
+	((pte_val(x) & (_PAGE_PRESENT|_PAGE_ACCESSED))		\
+	 == (_PAGE_PRESENT|_PAGE_ACCESSED))
 
-#define set_pte_at(mm, addr, ptep, pteval)                      \
-	do {                                                    \
+#define set_pte_at(mm, addr, ptep, pteval)			\
+	do {							\
+		pte_t old_pte;					\
 		unsigned long flags;				\
-		spin_lock_irqsave(&pa_dbit_lock, flags);	\
-		set_pte(ptep, pteval);                          \
-		purge_tlb_entries(mm, addr);                    \
-		spin_unlock_irqrestore(&pa_dbit_lock, flags);	\
+		spin_lock_irqsave(&pa_tlb_lock, flags);		\
+		old_pte = *ptep;				\
+		set_pte(ptep, pteval);				\
+		if (pte_inserted(old_pte))			\
+			purge_tlb_entries(mm, addr);		\
+		spin_unlock_irqrestore(&pa_tlb_lock, flags);	\
 	} while (0)
 
 #endif /* !__ASSEMBLY__ */
@@ -268,7 +286,7 @@
 
 #define pte_none(x)     (pte_val(x) == 0)
 #define pte_present(x)	(pte_val(x) & _PAGE_PRESENT)
-#define pte_clear(mm,addr,xp)	do { pte_val(*(xp)) = 0; } while (0)
+#define pte_clear(mm, addr, xp)  set_pte_at(mm, addr, xp, __pte(0))
 
 #define pmd_flag(x)	(pmd_val(x) & PxD_FLAG_MASK)
 #define pmd_address(x)	((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
@@ -435,15 +453,15 @@
 	if (!pte_young(*ptep))
 		return 0;
 
-	spin_lock_irqsave(&pa_dbit_lock, flags);
+	spin_lock_irqsave(&pa_tlb_lock, flags);
 	pte = *ptep;
 	if (!pte_young(pte)) {
-		spin_unlock_irqrestore(&pa_dbit_lock, flags);
+		spin_unlock_irqrestore(&pa_tlb_lock, flags);
 		return 0;
 	}
 	set_pte(ptep, pte_mkold(pte));
 	purge_tlb_entries(vma->vm_mm, addr);
-	spin_unlock_irqrestore(&pa_dbit_lock, flags);
+	spin_unlock_irqrestore(&pa_tlb_lock, flags);
 	return 1;
 }
 
@@ -453,11 +471,12 @@
 	pte_t old_pte;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pa_dbit_lock, flags);
+	spin_lock_irqsave(&pa_tlb_lock, flags);
 	old_pte = *ptep;
-	pte_clear(mm,addr,ptep);
-	purge_tlb_entries(mm, addr);
-	spin_unlock_irqrestore(&pa_dbit_lock, flags);
+	set_pte(ptep, __pte(0));
+	if (pte_inserted(old_pte))
+		purge_tlb_entries(mm, addr);
+	spin_unlock_irqrestore(&pa_tlb_lock, flags);
 
 	return old_pte;
 }
@@ -465,10 +484,10 @@
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	unsigned long flags;
-	spin_lock_irqsave(&pa_dbit_lock, flags);
+	spin_lock_irqsave(&pa_tlb_lock, flags);
 	set_pte(ptep, pte_wrprotect(*ptep));
 	purge_tlb_entries(mm, addr);
-	spin_unlock_irqrestore(&pa_dbit_lock, flags);
+	spin_unlock_irqrestore(&pa_tlb_lock, flags);
 }
 
 #define pte_same(A,B)	(pte_val(A) == pte_val(B))
diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h
index 9d086a5..e84b964 100644
--- a/arch/parisc/include/asm/tlbflush.h
+++ b/arch/parisc/include/asm/tlbflush.h
@@ -13,6 +13,9 @@
  * active at any one time on the Merced bus.  This tlb purge
  * synchronisation is fairly lightweight and harmless so we activate
  * it on all systems not just the N class.
+
+ * It is also used to ensure PTE updates are atomic and consistent
+ * with the TLB.
  */
 extern spinlock_t pa_tlb_lock;
 
@@ -24,20 +27,24 @@
 
 #define smp_flush_tlb_all()	flush_tlb_all()
 
+int __flush_tlb_range(unsigned long sid,
+	unsigned long start, unsigned long end);
+
+#define flush_tlb_range(vma, start, end) \
+	__flush_tlb_range((vma)->vm_mm->context, start, end)
+
+#define flush_tlb_kernel_range(start, end) \
+	__flush_tlb_range(0, start, end)
+
 /*
  * flush_tlb_mm()
  *
- * XXX This code is NOT valid for HP-UX compatibility processes,
- * (although it will probably work 99% of the time). HP-UX
- * processes are free to play with the space id's and save them
- * over long periods of time, etc. so we have to preserve the
- * space and just flush the entire tlb. We need to check the
- * personality in order to do that, but the personality is not
- * currently being set correctly.
- *
- * Of course, Linux processes could do the same thing, but
- * we don't support that (and the compilers, dynamic linker,
- * etc. do not do that).
+ * The code to switch to a new context is NOT valid for processes
+ * which play with the space id's.  Thus, we have to preserve the
+ * space and just flush the entire tlb.  However, the compilers,
+ * dynamic linker, etc, do not manipulate space id's, so there
+ * could be a significant performance benefit in switching contexts
+ * and not flushing the whole tlb.
  */
 
 static inline void flush_tlb_mm(struct mm_struct *mm)
@@ -45,10 +52,18 @@
 	BUG_ON(mm == &init_mm); /* Should never happen */
 
 #if 1 || defined(CONFIG_SMP)
+	/* Except for very small threads, flushing the whole TLB is
+	 * faster than using __flush_tlb_range.  The pdtlb and pitlb
+	 * instructions are very slow because of the TLB broadcast.
+	 * It might be faster to do local range flushes on all CPUs
+	 * on PA 2.0 systems.
+	 */
 	flush_tlb_all();
 #else
 	/* FIXME: currently broken, causing space id and protection ids
-	 *  to go out of sync, resulting in faults on userspace accesses.
+	 * to go out of sync, resulting in faults on userspace accesses.
+	 * This approach needs further investigation since running many
+	 * small applications (e.g., GCC testsuite) is faster on HP-UX.
 	 */
 	if (mm) {
 		if (mm->context != 0)
@@ -65,22 +80,12 @@
 {
 	unsigned long flags, sid;
 
-	/* For one page, it's not worth testing the split_tlb variable */
-
-	mb();
 	sid = vma->vm_mm->context;
 	purge_tlb_start(flags);
 	mtsp(sid, 1);
 	pdtlb(addr);
-	pitlb(addr);
+	if (unlikely(split_tlb))
+		pitlb(addr);
 	purge_tlb_end(flags);
 }
-
-void __flush_tlb_range(unsigned long sid,
-	unsigned long start, unsigned long end);
-
-#define flush_tlb_range(vma,start,end) __flush_tlb_range((vma)->vm_mm->context,start,end)
-
-#define flush_tlb_kernel_range(start, end) __flush_tlb_range(0,start,end)
-
 #endif
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index f6448c7..cda6dbb 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -342,12 +342,15 @@
 EXPORT_SYMBOL(flush_kernel_icache_range_asm);
 
 #define FLUSH_THRESHOLD 0x80000 /* 0.5MB */
-int parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD;
+static unsigned long parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD;
+
+#define FLUSH_TLB_THRESHOLD (2*1024*1024) /* 2MB initial TLB threshold */
+static unsigned long parisc_tlb_flush_threshold __read_mostly = FLUSH_TLB_THRESHOLD;
 
 void __init parisc_setup_cache_timing(void)
 {
 	unsigned long rangetime, alltime;
-	unsigned long size;
+	unsigned long size, start;
 
 	alltime = mfctl(16);
 	flush_data_cache();
@@ -364,14 +367,43 @@
 	/* Racy, but if we see an intermediate value, it's ok too... */
 	parisc_cache_flush_threshold = size * alltime / rangetime;
 
-	parisc_cache_flush_threshold = (parisc_cache_flush_threshold + L1_CACHE_BYTES - 1) &~ (L1_CACHE_BYTES - 1); 
+	parisc_cache_flush_threshold = L1_CACHE_ALIGN(parisc_cache_flush_threshold);
 	if (!parisc_cache_flush_threshold)
 		parisc_cache_flush_threshold = FLUSH_THRESHOLD;
 
 	if (parisc_cache_flush_threshold > cache_info.dc_size)
 		parisc_cache_flush_threshold = cache_info.dc_size;
 
-	printk(KERN_INFO "Setting cache flush threshold to %x (%d CPUs online)\n", parisc_cache_flush_threshold, num_online_cpus());
+	printk(KERN_INFO "Setting cache flush threshold to %lu kB\n",
+		parisc_cache_flush_threshold/1024);
+
+	/* calculate TLB flush threshold */
+
+	alltime = mfctl(16);
+	flush_tlb_all();
+	alltime = mfctl(16) - alltime;
+
+	size = PAGE_SIZE;
+	start = (unsigned long) _text;
+	rangetime = mfctl(16);
+	while (start < (unsigned long) _end) {
+		flush_tlb_kernel_range(start, start + PAGE_SIZE);
+		start += PAGE_SIZE;
+		size += PAGE_SIZE;
+	}
+	rangetime = mfctl(16) - rangetime;
+
+	printk(KERN_DEBUG "Whole TLB flush %lu cycles, flushing %lu bytes %lu cycles\n",
+		alltime, size, rangetime);
+
+	parisc_tlb_flush_threshold = size * alltime / rangetime;
+	parisc_tlb_flush_threshold *= num_online_cpus();
+	parisc_tlb_flush_threshold = PAGE_ALIGN(parisc_tlb_flush_threshold);
+	if (!parisc_tlb_flush_threshold)
+		parisc_tlb_flush_threshold = FLUSH_TLB_THRESHOLD;
+
+	printk(KERN_INFO "Setting TLB flush threshold to %lu kB\n",
+		parisc_tlb_flush_threshold/1024);
 }
 
 extern void purge_kernel_dcache_page_asm(unsigned long);
@@ -403,48 +435,45 @@
 }
 EXPORT_SYMBOL(copy_user_page);
 
-void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
+/* __flush_tlb_range()
+ *
+ * returns 1 if all TLBs were flushed.
+ */
+int __flush_tlb_range(unsigned long sid, unsigned long start,
+		      unsigned long end)
 {
-	unsigned long flags;
+	unsigned long flags, size;
 
-	/* Note: purge_tlb_entries can be called at startup with
-	   no context.  */
-
-	purge_tlb_start(flags);
-	mtsp(mm->context, 1);
-	pdtlb(addr);
-	pitlb(addr);
-	purge_tlb_end(flags);
-}
-EXPORT_SYMBOL(purge_tlb_entries);
-
-void __flush_tlb_range(unsigned long sid, unsigned long start,
-		       unsigned long end)
-{
-	unsigned long npages;
-
-	npages = ((end - (start & PAGE_MASK)) + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
-	if (npages >= 512)  /* 2MB of space: arbitrary, should be tuned */
+	size = (end - start);
+	if (size >= parisc_tlb_flush_threshold) {
 		flush_tlb_all();
-	else {
-		unsigned long flags;
+		return 1;
+	}
 
+	/* Purge TLB entries for small ranges using the pdtlb and
+	   pitlb instructions.  These instructions execute locally
+	   but cause a purge request to be broadcast to other TLBs.  */
+	if (likely(!split_tlb)) {
+		while (start < end) {
+			purge_tlb_start(flags);
+			mtsp(sid, 1);
+			pdtlb(start);
+			purge_tlb_end(flags);
+			start += PAGE_SIZE;
+		}
+		return 0;
+	}
+
+	/* split TLB case */
+	while (start < end) {
 		purge_tlb_start(flags);
 		mtsp(sid, 1);
-		if (split_tlb) {
-			while (npages--) {
-				pdtlb(start);
-				pitlb(start);
-				start += PAGE_SIZE;
-			}
-		} else {
-			while (npages--) {
-				pdtlb(start);
-				start += PAGE_SIZE;
-			}
-		}
+		pdtlb(start);
+		pitlb(start);
 		purge_tlb_end(flags);
+		start += PAGE_SIZE;
 	}
+	return 0;
 }
 
 static void cacheflush_h_tmp_function(void *dummy)
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 7581961..c5ef408 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -45,7 +45,7 @@
 	.level 2.0
 #endif
 
-	.import         pa_dbit_lock,data
+	.import		pa_tlb_lock,data
 
 	/* space_to_prot macro creates a prot id from a space id */
 
@@ -420,8 +420,8 @@
 	SHLREG		%r9,PxD_VALUE_SHIFT,\pmd
 	extru		\va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index
 	dep		%r0,31,PAGE_SHIFT,\pmd  /* clear offset */
-	shladd		\index,BITS_PER_PTE_ENTRY,\pmd,\pmd
-	LDREG		%r0(\pmd),\pte		/* pmd is now pte */
+	shladd		\index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */
+	LDREG		%r0(\pmd),\pte
 	bb,>=,n		\pte,_PAGE_PRESENT_BIT,\fault
 	.endm
 
@@ -453,57 +453,53 @@
 	L2_ptep		\pgd,\pte,\index,\va,\fault
 	.endm
 
-	/* Acquire pa_dbit_lock lock. */
-	.macro		dbit_lock	spc,tmp,tmp1
+	/* Acquire pa_tlb_lock lock and recheck page is still present. */
+	.macro		tlb_lock	spc,ptp,pte,tmp,tmp1,fault
 #ifdef CONFIG_SMP
 	cmpib,COND(=),n	0,\spc,2f
-	load32		PA(pa_dbit_lock),\tmp
+	load32		PA(pa_tlb_lock),\tmp
 1:	LDCW		0(\tmp),\tmp1
 	cmpib,COND(=)	0,\tmp1,1b
 	nop
+	LDREG		0(\ptp),\pte
+	bb,<,n		\pte,_PAGE_PRESENT_BIT,2f
+	b		\fault
+	stw		 \spc,0(\tmp)
 2:
 #endif
 	.endm
 
-	/* Release pa_dbit_lock lock without reloading lock address. */
-	.macro		dbit_unlock0	spc,tmp
+	/* Release pa_tlb_lock lock without reloading lock address. */
+	.macro		tlb_unlock0	spc,tmp
 #ifdef CONFIG_SMP
 	or,COND(=)	%r0,\spc,%r0
 	stw             \spc,0(\tmp)
 #endif
 	.endm
 
-	/* Release pa_dbit_lock lock. */
-	.macro		dbit_unlock1	spc,tmp
+	/* Release pa_tlb_lock lock. */
+	.macro		tlb_unlock1	spc,tmp
 #ifdef CONFIG_SMP
-	load32		PA(pa_dbit_lock),\tmp
-	dbit_unlock0	\spc,\tmp
+	load32		PA(pa_tlb_lock),\tmp
+	tlb_unlock0	\spc,\tmp
 #endif
 	.endm
 
 	/* Set the _PAGE_ACCESSED bit of the PTE.  Be clever and
 	 * don't needlessly dirty the cache line if it was already set */
-	.macro		update_ptep	spc,ptep,pte,tmp,tmp1
-#ifdef CONFIG_SMP
-	or,COND(=)	%r0,\spc,%r0
-	LDREG		0(\ptep),\pte
-#endif
+	.macro		update_accessed	ptp,pte,tmp,tmp1
 	ldi		_PAGE_ACCESSED,\tmp1
 	or		\tmp1,\pte,\tmp
 	and,COND(<>)	\tmp1,\pte,%r0
-	STREG		\tmp,0(\ptep)
+	STREG		\tmp,0(\ptp)
 	.endm
 
 	/* Set the dirty bit (and accessed bit).  No need to be
 	 * clever, this is only used from the dirty fault */
-	.macro		update_dirty	spc,ptep,pte,tmp
-#ifdef CONFIG_SMP
-	or,COND(=)	%r0,\spc,%r0
-	LDREG		0(\ptep),\pte
-#endif
+	.macro		update_dirty	ptp,pte,tmp
 	ldi		_PAGE_ACCESSED|_PAGE_DIRTY,\tmp
 	or		\tmp,\pte,\pte
-	STREG		\pte,0(\ptep)
+	STREG		\pte,0(\ptp)
 	.endm
 
 	/* bitshift difference between a PFN (based on kernel's PAGE_SIZE)
@@ -1148,14 +1144,14 @@
 
 	L3_ptep		ptp,pte,t0,va,dtlb_check_alias_20w
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dtlb_check_alias_20w
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 	
 	idtlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1174,14 +1170,14 @@
 
 	L3_ptep		ptp,pte,t0,va,nadtlb_check_alias_20w
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,nadtlb_check_alias_20w
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
 	idtlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1202,20 +1198,20 @@
 
 	L2_ptep		ptp,pte,t0,va,dtlb_check_alias_11
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dtlb_check_alias_11
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb_11	spc,pte,prot
 
-	mfsp		%sr1,t0  /* Save sr1 so we can use it in tlb inserts */
+	mfsp		%sr1,t1  /* Save sr1 so we can use it in tlb inserts */
 	mtsp		spc,%sr1
 
 	idtlba		pte,(%sr1,va)
 	idtlbp		prot,(%sr1,va)
 
-	mtsp		t0, %sr1	/* Restore sr1 */
-	dbit_unlock1	spc,t0
+	mtsp		t1, %sr1	/* Restore sr1 */
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1235,21 +1231,20 @@
 
 	L2_ptep		ptp,pte,t0,va,nadtlb_check_alias_11
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,nadtlb_check_alias_11
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb_11	spc,pte,prot
 
-
-	mfsp		%sr1,t0  /* Save sr1 so we can use it in tlb inserts */
+	mfsp		%sr1,t1  /* Save sr1 so we can use it in tlb inserts */
 	mtsp		spc,%sr1
 
 	idtlba		pte,(%sr1,va)
 	idtlbp		prot,(%sr1,va)
 
-	mtsp		t0, %sr1	/* Restore sr1 */
-	dbit_unlock1	spc,t0
+	mtsp		t1, %sr1	/* Restore sr1 */
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1269,16 +1264,16 @@
 
 	L2_ptep		ptp,pte,t0,va,dtlb_check_alias_20
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dtlb_check_alias_20
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
-	f_extend	pte,t0
+	f_extend	pte,t1
 
 	idtlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1297,16 +1292,16 @@
 
 	L2_ptep		ptp,pte,t0,va,nadtlb_check_alias_20
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,nadtlb_check_alias_20
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
-	f_extend	pte,t0
+	f_extend	pte,t1
 	
-        idtlbt          pte,prot
-	dbit_unlock1	spc,t0
+	idtlbt		pte,prot
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1406,14 +1401,14 @@
 
 	L3_ptep		ptp,pte,t0,va,itlb_fault
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,itlb_fault
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 	
 	iitlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1430,14 +1425,14 @@
 
 	L3_ptep		ptp,pte,t0,va,naitlb_check_alias_20w
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,naitlb_check_alias_20w
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
 	iitlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1458,20 +1453,20 @@
 
 	L2_ptep		ptp,pte,t0,va,itlb_fault
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,itlb_fault
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb_11	spc,pte,prot
 
-	mfsp		%sr1,t0  /* Save sr1 so we can use it in tlb inserts */
+	mfsp		%sr1,t1  /* Save sr1 so we can use it in tlb inserts */
 	mtsp		spc,%sr1
 
 	iitlba		pte,(%sr1,va)
 	iitlbp		prot,(%sr1,va)
 
-	mtsp		t0, %sr1	/* Restore sr1 */
-	dbit_unlock1	spc,t0
+	mtsp		t1, %sr1	/* Restore sr1 */
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1482,20 +1477,20 @@
 
 	L2_ptep		ptp,pte,t0,va,naitlb_check_alias_11
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,naitlb_check_alias_11
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb_11	spc,pte,prot
 
-	mfsp		%sr1,t0  /* Save sr1 so we can use it in tlb inserts */
+	mfsp		%sr1,t1  /* Save sr1 so we can use it in tlb inserts */
 	mtsp		spc,%sr1
 
 	iitlba		pte,(%sr1,va)
 	iitlbp		prot,(%sr1,va)
 
-	mtsp		t0, %sr1	/* Restore sr1 */
-	dbit_unlock1	spc,t0
+	mtsp		t1, %sr1	/* Restore sr1 */
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1516,16 +1511,16 @@
 
 	L2_ptep		ptp,pte,t0,va,itlb_fault
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,itlb_fault
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
-	f_extend	pte,t0	
+	f_extend	pte,t1
 
 	iitlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1536,16 +1531,16 @@
 
 	L2_ptep		ptp,pte,t0,va,naitlb_check_alias_20
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,naitlb_check_alias_20
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
-	f_extend	pte,t0
+	f_extend	pte,t1
 
 	iitlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1568,14 +1563,14 @@
 
 	L3_ptep		ptp,pte,t0,va,dbit_fault
 
-	dbit_lock	spc,t0,t1
-	update_dirty	spc,ptp,pte,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dbit_fault
+	update_dirty	ptp,pte,t1
 
 	make_insert_tlb	spc,pte,prot
 		
 	idtlbt          pte,prot
-	dbit_unlock0	spc,t0
 
+	tlb_unlock0	spc,t0
 	rfir
 	nop
 #else
@@ -1588,8 +1583,8 @@
 
 	L2_ptep		ptp,pte,t0,va,dbit_fault
 
-	dbit_lock	spc,t0,t1
-	update_dirty	spc,ptp,pte,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dbit_fault
+	update_dirty	ptp,pte,t1
 
 	make_insert_tlb_11	spc,pte,prot
 
@@ -1600,8 +1595,8 @@
 	idtlbp		prot,(%sr1,va)
 
 	mtsp            t1, %sr1     /* Restore sr1 */
-	dbit_unlock0	spc,t0
 
+	tlb_unlock0	spc,t0
 	rfir
 	nop
 
@@ -1612,16 +1607,16 @@
 
 	L2_ptep		ptp,pte,t0,va,dbit_fault
 
-	dbit_lock	spc,t0,t1
-	update_dirty	spc,ptp,pte,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dbit_fault
+	update_dirty	ptp,pte,t1
 
 	make_insert_tlb	spc,pte,prot
 
 	f_extend	pte,t1
 	
-        idtlbt          pte,prot
-	dbit_unlock0	spc,t0
+	idtlbt		pte,prot
 
+	tlb_unlock0	spc,t0
 	rfir
 	nop
 #endif
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 6548fd1..b99b39f 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -43,10 +43,6 @@
 
 #include "../math-emu/math-emu.h"	/* for handle_fpe() */
 
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
-DEFINE_SPINLOCK(pa_dbit_lock);
-#endif
-
 static void parisc_show_stack(struct task_struct *task, unsigned long *sp,
 	struct pt_regs *regs);
 
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index ccde8f0..112ccf4 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -52,6 +52,22 @@
 	.text
 
 /*
+ * Used by threads when the lock bit of core_idle_state is set.
+ * Threads will spin in HMT_LOW until the lock bit is cleared.
+ * r14 - pointer to core_idle_state
+ * r15 - used to load contents of core_idle_state
+ */
+
+core_idle_lock_held:
+	HMT_LOW
+3:	lwz	r15,0(r14)
+	andi.   r15,r15,PNV_CORE_IDLE_LOCK_BIT
+	bne	3b
+	HMT_MEDIUM
+	lwarx	r15,0,r14
+	blr
+
+/*
  * Pass requested state in r3:
  *	r3 - PNV_THREAD_NAP/SLEEP/WINKLE
  *
@@ -150,6 +166,10 @@
 	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
 lwarx_loop1:
 	lwarx	r15,0,r14
+
+	andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
+	bnel	core_idle_lock_held
+
 	andc	r15,r15,r7			/* Clear thread bit */
 
 	andi.	r15,r15,PNV_CORE_IDLE_THREAD_BITS
@@ -294,7 +314,7 @@
 	 * workaround undo code or resyncing timebase or restoring context
 	 * In either case loop until the lock bit is cleared.
 	 */
-	bne	core_idle_lock_held
+	bnel	core_idle_lock_held
 
 	cmpwi	cr2,r15,0
 	lbz	r4,PACA_SUBCORE_SIBLING_MASK(r13)
@@ -319,15 +339,6 @@
 	isync
 	b	common_exit
 
-core_idle_lock_held:
-	HMT_LOW
-core_idle_lock_loop:
-	lwz	r15,0(14)
-	andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
-	bne	core_idle_lock_loop
-	HMT_MEDIUM
-	b	lwarx_loop2
-
 first_thread_in_subcore:
 	/* First thread in subcore to wakeup */
 	ori	r15,r15,PNV_CORE_IDLE_LOCK_BIT
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 6530f1b..37de90f 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -297,6 +297,8 @@
 
 	__this_cpu_inc(irq_stat.mce_exceptions);
 
+	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
+
 	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
 		handled = cur_cpu_spec->machine_check_early(regs);
 	return handled;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 6d53597..a67c6d7 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -529,6 +529,10 @@
 		printk(KERN_ALERT "Unable to handle kernel paging request for "
 			"instruction fetch\n");
 		break;
+	case 0x600:
+		printk(KERN_ALERT "Unable to handle kernel paging request for "
+			"unaligned access at address 0x%08lx\n", regs->dar);
+		break;
 	default:
 		printk(KERN_ALERT "Unable to handle kernel paging request for "
 			"unknown fault\n");
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index ec2eb20..df95629 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -320,6 +320,8 @@
 	if (!attr)
 		return NULL;
 
+	sysfs_attr_init(&attr->attr.attr);
+
 	attr->var = str;
 	attr->attr.attr.name = name;
 	attr->attr.attr.mode = 0444;
diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
index 4949ef0..37f959b 100644
--- a/arch/powerpc/platforms/powernv/opal-elog.c
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -237,7 +237,7 @@
 	return elog;
 }
 
-static void elog_work_fn(struct work_struct *work)
+static irqreturn_t elog_event(int irq, void *data)
 {
 	__be64 size;
 	__be64 id;
@@ -251,7 +251,7 @@
 	rc = opal_get_elog_size(&id, &size, &type);
 	if (rc != OPAL_SUCCESS) {
 		pr_err("ELOG: OPAL log info read failed\n");
-		return;
+		return IRQ_HANDLED;
 	}
 
 	elog_size = be64_to_cpu(size);
@@ -270,16 +270,10 @@
 	 * entries.
 	 */
 	if (kset_find_obj(elog_kset, name))
-		return;
+		return IRQ_HANDLED;
 
 	create_elog_obj(log_id, elog_size, elog_type);
-}
 
-static DECLARE_WORK(elog_work, elog_work_fn);
-
-static irqreturn_t elog_event(int irq, void *data)
-{
-	schedule_work(&elog_work);
 	return IRQ_HANDLED;
 }
 
@@ -304,8 +298,8 @@
 		return irq;
 	}
 
-	rc = request_irq(irq, elog_event,
-			IRQ_TYPE_LEVEL_HIGH, "opal-elog", NULL);
+	rc = request_threaded_irq(irq, NULL, elog_event,
+			IRQF_TRIGGER_HIGH | IRQF_ONESHOT, "opal-elog", NULL);
 	if (rc) {
 		pr_err("%s: Can't request OPAL event irq (%d)\n",
 		       __func__, rc);
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index 46cb3fe..4ece8e4 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -112,6 +112,7 @@
 static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	size_t addr, size;
+	pgprot_t page_prot;
 	int rc;
 
 	pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n",
@@ -125,13 +126,11 @@
 	if (!opal_prd_range_is_valid(addr, size))
 		return -EINVAL;
 
-	vma->vm_page_prot = __pgprot(pgprot_val(phys_mem_access_prot(file,
-						vma->vm_pgoff,
-						 size, vma->vm_page_prot))
-					| _PAGE_SPECIAL);
+	page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
+					 size, vma->vm_page_prot);
 
 	rc = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size,
-			vma->vm_page_prot);
+				page_prot);
 
 	return rc;
 }
diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c
index 2bc3367..87f9623 100644
--- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c
+++ b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c
@@ -18,6 +18,7 @@
 #include <linux/pci.h>
 #include <linux/semaphore.h>
 #include <asm/msi_bitmap.h>
+#include <asm/ppc-pci.h>
 
 struct ppc4xx_hsta_msi {
 	struct device *dev;
diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c
index 88c7016..97bbb60 100644
--- a/arch/tile/lib/memcpy_user_64.c
+++ b/arch/tile/lib/memcpy_user_64.c
@@ -28,7 +28,7 @@
 #define _ST(p, inst, v)						\
 	({							\
 		asm("1: " #inst " %0, %1;"			\
-		    ".pushsection .coldtext.memcpy,\"ax\";"	\
+		    ".pushsection .coldtext,\"ax\";"	\
 		    "2: { move r0, %2; jrp lr };"		\
 		    ".section __ex_table,\"a\";"		\
 		    ".align 8;"					\
@@ -41,7 +41,7 @@
 	({							\
 		unsigned long __v;				\
 		asm("1: " #inst " %0, %1;"			\
-		    ".pushsection .coldtext.memcpy,\"ax\";"	\
+		    ".pushsection .coldtext,\"ax\";"	\
 		    "2: { move r0, %2; jrp lr };"		\
 		    ".section __ex_table,\"a\";"		\
 		    ".align 8;"					\
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 55bced1..3dbb7e7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -254,6 +254,11 @@
 config ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	def_bool y
 
+config KASAN_SHADOW_OFFSET
+	hex
+	depends on KASAN
+	default 0xdffffc0000000000
+
 config HAVE_INTEL_TXT
 	def_bool y
 	depends on INTEL_IOMMU && ACPI
@@ -2015,7 +2020,7 @@
 
 	  To compile command line arguments into the kernel,
 	  set this option to 'Y', then fill in the
-	  the boot arguments in CONFIG_CMDLINE.
+	  boot arguments in CONFIG_CMDLINE.
 
 	  Systems with fully functional boot loaders (i.e. non-embedded)
 	  should leave this option set to 'N'.
diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h
index 99efebb..ca3ce9a 100644
--- a/arch/x86/include/asm/espfix.h
+++ b/arch/x86/include/asm/espfix.h
@@ -9,7 +9,7 @@
 DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
 
 extern void init_espfix_bsp(void);
-extern void init_espfix_ap(void);
+extern void init_espfix_ap(int cpu);
 
 #endif /* CONFIG_X86_64 */
 
diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h
index 8b22422..74a2a8d 100644
--- a/arch/x86/include/asm/kasan.h
+++ b/arch/x86/include/asm/kasan.h
@@ -14,15 +14,11 @@
 
 #ifndef __ASSEMBLY__
 
-extern pte_t kasan_zero_pte[];
-extern pte_t kasan_zero_pmd[];
-extern pte_t kasan_zero_pud[];
-
 #ifdef CONFIG_KASAN
-void __init kasan_map_early_shadow(pgd_t *pgd);
+void __init kasan_early_init(void);
 void __init kasan_init(void);
 #else
-static inline void kasan_map_early_shadow(pgd_t *pgd) { }
+static inline void kasan_early_init(void) { }
 static inline void kasan_init(void) { }
 #endif
 
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 28eba2d..f813261 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -409,12 +409,6 @@
 	int irq, vector;
 	struct apic_chip_data *data;
 
-	/*
-	 * vector_lock will make sure that we don't run into irq vector
-	 * assignments that might be happening on another cpu in parallel,
-	 * while we setup our initial vector to irq mappings.
-	 */
-	raw_spin_lock(&vector_lock);
 	/* Mark the inuse vectors */
 	for_each_active_irq(irq) {
 		data = apic_chip_data(irq_get_irq_data(irq));
@@ -436,16 +430,16 @@
 		if (!cpumask_test_cpu(cpu, data->domain))
 			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
 	}
-	raw_spin_unlock(&vector_lock);
 }
 
 /*
- * Setup the vector to irq mappings.
+ * Setup the vector to irq mappings. Must be called with vector_lock held.
  */
 void setup_vector_irq(int cpu)
 {
 	int irq;
 
+	lockdep_assert_held(&vector_lock);
 	/*
 	 * On most of the platforms, legacy PIC delivers the interrupts on the
 	 * boot cpu. But there are certain platforms where PIC interrupts are
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 89427d8..eec40f5 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -175,7 +175,9 @@
 	}
 
 	if (*s) {
-		if (kstrtoul(s, 0, &baud) < 0 || baud == 0)
+		baud = simple_strtoull(s, &e, 0);
+
+		if (baud == 0 || s == e)
 			baud = DEFAULT_BAUD;
 	}
 
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index f5d0730..ce95676 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -131,25 +131,24 @@
 	init_espfix_random();
 
 	/* The rest is the same as for any other processor */
-	init_espfix_ap();
+	init_espfix_ap(0);
 }
 
-void init_espfix_ap(void)
+void init_espfix_ap(int cpu)
 {
-	unsigned int cpu, page;
+	unsigned int page;
 	unsigned long addr;
 	pud_t pud, *pud_p;
 	pmd_t pmd, *pmd_p;
 	pte_t pte, *pte_p;
-	int n;
+	int n, node;
 	void *stack_page;
 	pteval_t ptemask;
 
 	/* We only have to do this once... */
-	if (likely(this_cpu_read(espfix_stack)))
+	if (likely(per_cpu(espfix_stack, cpu)))
 		return;		/* Already initialized */
 
-	cpu = smp_processor_id();
 	addr = espfix_base_addr(cpu);
 	page = cpu/ESPFIX_STACKS_PER_PAGE;
 
@@ -165,12 +164,15 @@
 	if (stack_page)
 		goto unlock_done;
 
+	node = cpu_to_node(cpu);
 	ptemask = __supported_pte_mask;
 
 	pud_p = &espfix_pud_page[pud_index(addr)];
 	pud = *pud_p;
 	if (!pud_present(pud)) {
-		pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP);
+		struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
+
+		pmd_p = (pmd_t *)page_address(page);
 		pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask));
 		paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT);
 		for (n = 0; n < ESPFIX_PUD_CLONES; n++)
@@ -180,7 +182,9 @@
 	pmd_p = pmd_offset(&pud, addr);
 	pmd = *pmd_p;
 	if (!pmd_present(pmd)) {
-		pte_p = (pte_t *)__get_free_page(PGALLOC_GFP);
+		struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0);
+
+		pte_p = (pte_t *)page_address(page);
 		pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask));
 		paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT);
 		for (n = 0; n < ESPFIX_PMD_CLONES; n++)
@@ -188,7 +192,7 @@
 	}
 
 	pte_p = pte_offset_kernel(&pmd, addr);
-	stack_page = (void *)__get_free_page(GFP_KERNEL);
+	stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0));
 	pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask));
 	for (n = 0; n < ESPFIX_PTE_CLONES; n++)
 		set_pte(&pte_p[n*PTE_STRIDE], pte);
@@ -199,7 +203,7 @@
 unlock_done:
 	mutex_unlock(&espfix_init_mutex);
 done:
-	this_cpu_write(espfix_stack, addr);
-	this_cpu_write(espfix_waddr, (unsigned long)stack_page
-		       + (addr & ~PAGE_MASK));
+	per_cpu(espfix_stack, cpu) = addr;
+	per_cpu(espfix_waddr, cpu) = (unsigned long)stack_page
+				      + (addr & ~PAGE_MASK);
 }
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 5a46681..f129a9a 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -161,11 +161,12 @@
 	/* Kill off the identity-map trampoline */
 	reset_early_page_tables();
 
-	kasan_map_early_shadow(early_level4_pgt);
-
-	/* clear bss before set_intr_gate with early_idt_handler */
 	clear_bss();
 
+	clear_page(init_level4_pgt);
+
+	kasan_early_init();
+
 	for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
 		set_intr_gate(i, early_idt_handler_array[i]);
 	load_idt((const struct desc_ptr *)&idt_descr);
@@ -177,12 +178,9 @@
 	 */
 	load_ucode_bsp();
 
-	clear_page(init_level4_pgt);
 	/* set init_level4_pgt kernel high mapping*/
 	init_level4_pgt[511] = early_level4_pgt[511];
 
-	kasan_map_early_shadow(init_level4_pgt);
-
 	x86_64_start_reservations(real_mode_data);
 }
 
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index e5c27f7..1d40ca8 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -516,38 +516,9 @@
 	/* This must match the first entry in level2_kernel_pgt */
 	.quad   0x0000000000000000
 
-#ifdef CONFIG_KASAN
-#define FILL(VAL, COUNT)				\
-	.rept (COUNT) ;					\
-	.quad	(VAL) ;					\
-	.endr
-
-NEXT_PAGE(kasan_zero_pte)
-	FILL(kasan_zero_page - __START_KERNEL_map + _KERNPG_TABLE, 512)
-NEXT_PAGE(kasan_zero_pmd)
-	FILL(kasan_zero_pte - __START_KERNEL_map + _KERNPG_TABLE, 512)
-NEXT_PAGE(kasan_zero_pud)
-	FILL(kasan_zero_pmd - __START_KERNEL_map + _KERNPG_TABLE, 512)
-
-#undef FILL
-#endif
-
-
 #include "../../x86/xen/xen-head.S"
 	
 	__PAGE_ALIGNED_BSS
 NEXT_PAGE(empty_zero_page)
 	.skip PAGE_SIZE
 
-#ifdef CONFIG_KASAN
-/*
- * This page used as early shadow. We don't use empty_zero_page
- * at early stages, stack instrumentation could write some garbage
- * to this page.
- * Latter we reuse it as zero shadow for large ranges of memory
- * that allowed to access, but not instrumented by kasan
- * (vmalloc/vmemmap ...).
- */
-NEXT_PAGE(kasan_zero_page)
-	.skip PAGE_SIZE
-#endif
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 88b36648..c7dfe1b 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -347,14 +347,22 @@
 			if (!desc)
 				continue;
 
+			/*
+			 * Protect against concurrent action removal,
+			 * affinity changes etc.
+			 */
+			raw_spin_lock(&desc->lock);
 			data = irq_desc_get_irq_data(desc);
 			cpumask_copy(&affinity_new, data->affinity);
 			cpumask_clear_cpu(this_cpu, &affinity_new);
 
 			/* Do not count inactive or per-cpu irqs. */
-			if (!irq_has_action(irq) || irqd_is_per_cpu(data))
+			if (!irq_has_action(irq) || irqd_is_per_cpu(data)) {
+				raw_spin_unlock(&desc->lock);
 				continue;
+			}
 
+			raw_spin_unlock(&desc->lock);
 			/*
 			 * A single irq may be mapped to multiple
 			 * cpu's vector_irq[] (for example IOAPIC cluster
@@ -385,6 +393,9 @@
 		 * vector. If the vector is marked in the used vectors
 		 * bitmap or an irq is assigned to it, we don't count
 		 * it as available.
+		 *
+		 * As this is an inaccurate snapshot anyway, we can do
+		 * this w/o holding vector_lock.
 		 */
 		for (vector = FIRST_EXTERNAL_VECTOR;
 		     vector < first_system_vector; vector++) {
@@ -486,6 +497,11 @@
 	 */
 	mdelay(1);
 
+	/*
+	 * We can walk the vector array of this cpu without holding
+	 * vector_lock because the cpu is already marked !online, so
+	 * nothing else will touch it.
+	 */
 	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
 		unsigned int irr;
 
@@ -497,9 +513,9 @@
 			irq = __this_cpu_read(vector_irq[vector]);
 
 			desc = irq_to_desc(irq);
+			raw_spin_lock(&desc->lock);
 			data = irq_desc_get_irq_data(desc);
 			chip = irq_data_get_irq_chip(data);
-			raw_spin_lock(&desc->lock);
 			if (chip->irq_retrigger) {
 				chip->irq_retrigger(data);
 				__this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8add66b..d3010aa 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -171,11 +171,6 @@
 	apic_ap_setup();
 
 	/*
-	 * Need to setup vector mappings before we enable interrupts.
-	 */
-	setup_vector_irq(smp_processor_id());
-
-	/*
 	 * Save our processor parameters. Note: this information
 	 * is needed for clock calibration.
 	 */
@@ -239,18 +234,13 @@
 	check_tsc_sync_target();
 
 	/*
-	 * Enable the espfix hack for this CPU
-	 */
-#ifdef CONFIG_X86_ESPFIX64
-	init_espfix_ap();
-#endif
-
-	/*
-	 * We need to hold vector_lock so there the set of online cpus
-	 * does not change while we are assigning vectors to cpus.  Holding
-	 * this lock ensures we don't half assign or remove an irq from a cpu.
+	 * Lock vector_lock and initialize the vectors on this cpu
+	 * before setting the cpu online. We must set it online with
+	 * vector_lock held to prevent a concurrent setup/teardown
+	 * from seeing a half valid vector space.
 	 */
 	lock_vector_lock();
+	setup_vector_irq(smp_processor_id());
 	set_cpu_online(smp_processor_id(), true);
 	unlock_vector_lock();
 	cpu_set_state_online(smp_processor_id());
@@ -854,6 +844,13 @@
 	initial_code = (unsigned long)start_secondary;
 	stack_start  = idle->thread.sp;
 
+	/*
+	 * Enable the espfix hack for this CPU
+	*/
+#ifdef CONFIG_X86_ESPFIX64
+	init_espfix_ap(cpu);
+#endif
+
 	/* So we see what's up */
 	announce_cpu(cpu, apicid);
 
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 5054497..7437b41 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -598,10 +598,19 @@
 			if (!pit_expect_msb(0xff-i, &delta, &d2))
 				break;
 
+			delta -= tsc;
+
+			/*
+			 * Extrapolate the error and fail fast if the error will
+			 * never be below 500 ppm.
+			 */
+			if (i == 1 &&
+			    d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11)
+				return 0;
+
 			/*
 			 * Iterate until the error is less than 500 ppm
 			 */
-			delta -= tsc;
 			if (d1+d2 >= delta >> 11)
 				continue;
 
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index ddf9ecb..e342586 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -20,7 +20,7 @@
 	unsigned long ret;
 
 	if (__range_not_ok(from, n, TASK_SIZE))
-		return 0;
+		return n;
 
 	/*
 	 * Even though this function is typically called from NMI/IRQ context
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 4860906..e1840f3 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -1,3 +1,4 @@
+#define pr_fmt(fmt) "kasan: " fmt
 #include <linux/bootmem.h>
 #include <linux/kasan.h>
 #include <linux/kdebug.h>
@@ -11,7 +12,19 @@
 extern pgd_t early_level4_pgt[PTRS_PER_PGD];
 extern struct range pfn_mapped[E820_X_MAX];
 
-extern unsigned char kasan_zero_page[PAGE_SIZE];
+static pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss;
+static pmd_t kasan_zero_pmd[PTRS_PER_PMD] __page_aligned_bss;
+static pte_t kasan_zero_pte[PTRS_PER_PTE] __page_aligned_bss;
+
+/*
+ * This page used as early shadow. We don't use empty_zero_page
+ * at early stages, stack instrumentation could write some garbage
+ * to this page.
+ * Latter we reuse it as zero shadow for large ranges of memory
+ * that allowed to access, but not instrumented by kasan
+ * (vmalloc/vmemmap ...).
+ */
+static unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;
 
 static int __init map_range(struct range *range)
 {
@@ -36,7 +49,7 @@
 		pgd_clear(pgd_offset_k(start));
 }
 
-void __init kasan_map_early_shadow(pgd_t *pgd)
+static void __init kasan_map_early_shadow(pgd_t *pgd)
 {
 	int i;
 	unsigned long start = KASAN_SHADOW_START;
@@ -73,7 +86,7 @@
 	while (IS_ALIGNED(addr, PMD_SIZE) && addr + PMD_SIZE <= end) {
 		WARN_ON(!pmd_none(*pmd));
 		set_pmd(pmd, __pmd(__pa_nodebug(kasan_zero_pte)
-					| __PAGE_KERNEL_RO));
+					| _KERNPG_TABLE));
 		addr += PMD_SIZE;
 		pmd = pmd_offset(pud, addr);
 	}
@@ -99,7 +112,7 @@
 	while (IS_ALIGNED(addr, PUD_SIZE) && addr + PUD_SIZE <= end) {
 		WARN_ON(!pud_none(*pud));
 		set_pud(pud, __pud(__pa_nodebug(kasan_zero_pmd)
-					| __PAGE_KERNEL_RO));
+					| _KERNPG_TABLE));
 		addr += PUD_SIZE;
 		pud = pud_offset(pgd, addr);
 	}
@@ -124,7 +137,7 @@
 	while (IS_ALIGNED(addr, PGDIR_SIZE) && addr + PGDIR_SIZE <= end) {
 		WARN_ON(!pgd_none(*pgd));
 		set_pgd(pgd, __pgd(__pa_nodebug(kasan_zero_pud)
-					| __PAGE_KERNEL_RO));
+					| _KERNPG_TABLE));
 		addr += PGDIR_SIZE;
 		pgd = pgd_offset_k(addr);
 	}
@@ -166,6 +179,26 @@
 };
 #endif
 
+void __init kasan_early_init(void)
+{
+	int i;
+	pteval_t pte_val = __pa_nodebug(kasan_zero_page) | __PAGE_KERNEL;
+	pmdval_t pmd_val = __pa_nodebug(kasan_zero_pte) | _KERNPG_TABLE;
+	pudval_t pud_val = __pa_nodebug(kasan_zero_pmd) | _KERNPG_TABLE;
+
+	for (i = 0; i < PTRS_PER_PTE; i++)
+		kasan_zero_pte[i] = __pte(pte_val);
+
+	for (i = 0; i < PTRS_PER_PMD; i++)
+		kasan_zero_pmd[i] = __pmd(pmd_val);
+
+	for (i = 0; i < PTRS_PER_PUD; i++)
+		kasan_zero_pud[i] = __pud(pud_val);
+
+	kasan_map_early_shadow(early_level4_pgt);
+	kasan_map_early_shadow(init_level4_pgt);
+}
+
 void __init kasan_init(void)
 {
 	int i;
@@ -176,6 +209,7 @@
 
 	memcpy(early_level4_pgt, init_level4_pgt, sizeof(early_level4_pgt));
 	load_cr3(early_level4_pgt);
+	__flush_tlb_all();
 
 	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
@@ -202,5 +236,8 @@
 	memset(kasan_zero_page, 0, PAGE_SIZE);
 
 	load_cr3(init_level4_pgt);
+	__flush_tlb_all();
 	init_task.kasan_depth = 0;
+
+	pr_info("Kernel address sanitizer initialized\n");
 }
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 569ee09..46b58ab 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -352,13 +352,16 @@
 				pdata->mmio_size = resource_size(rentry->res);
 			pdata->mmio_base = ioremap(rentry->res->start,
 						   pdata->mmio_size);
-			if (!pdata->mmio_base)
-				goto err_out;
 			break;
 		}
 
 	acpi_dev_free_resource_list(&resource_list);
 
+	if (!pdata->mmio_base) {
+		ret = -ENOMEM;
+		goto err_out;
+	}
+
 	pdata->dev_desc = dev_desc;
 
 	if (dev_desc->setup)
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 2161fa1..628a42c 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -18,6 +18,7 @@
 #include <linux/list.h>
 #include <linux/acpi.h>
 #include <linux/sort.h>
+#include <linux/pmem.h>
 #include <linux/io.h>
 #include "nfit.h"
 
@@ -305,6 +306,23 @@
 	return true;
 }
 
+static bool add_flush(struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_flush_address *flush)
+{
+	struct device *dev = acpi_desc->dev;
+	struct nfit_flush *nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush),
+			GFP_KERNEL);
+
+	if (!nfit_flush)
+		return false;
+	INIT_LIST_HEAD(&nfit_flush->list);
+	nfit_flush->flush = flush;
+	list_add_tail(&nfit_flush->list, &acpi_desc->flushes);
+	dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__,
+			flush->device_handle, flush->hint_count);
+	return true;
+}
+
 static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table,
 		const void *end)
 {
@@ -338,7 +356,8 @@
 			return err;
 		break;
 	case ACPI_NFIT_TYPE_FLUSH_ADDRESS:
-		dev_dbg(dev, "%s: flush\n", __func__);
+		if (!add_flush(acpi_desc, table))
+			return err;
 		break;
 	case ACPI_NFIT_TYPE_SMBIOS:
 		dev_dbg(dev, "%s: smbios\n", __func__);
@@ -389,6 +408,7 @@
 {
 	u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
 	struct nfit_memdev *nfit_memdev;
+	struct nfit_flush *nfit_flush;
 	struct nfit_dcr *nfit_dcr;
 	struct nfit_bdw *nfit_bdw;
 	struct nfit_idt *nfit_idt;
@@ -442,6 +462,14 @@
 			nfit_mem->idt_bdw = nfit_idt->idt;
 			break;
 		}
+
+		list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) {
+			if (nfit_flush->flush->device_handle !=
+					nfit_memdev->memdev->device_handle)
+				continue;
+			nfit_mem->nfit_flush = nfit_flush;
+			break;
+		}
 		break;
 	}
 
@@ -978,6 +1006,24 @@
 	return mmio->base_offset + line_offset + table_offset + sub_line_offset;
 }
 
+static void wmb_blk(struct nfit_blk *nfit_blk)
+{
+
+	if (nfit_blk->nvdimm_flush) {
+		/*
+		 * The first wmb() is needed to 'sfence' all previous writes
+		 * such that they are architecturally visible for the platform
+		 * buffer flush.  Note that we've already arranged for pmem
+		 * writes to avoid the cache via arch_memcpy_to_pmem().  The
+		 * final wmb() ensures ordering for the NVDIMM flush write.
+		 */
+		wmb();
+		writeq(1, nfit_blk->nvdimm_flush);
+		wmb();
+	} else
+		wmb_pmem();
+}
+
 static u64 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
 {
 	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
@@ -1012,7 +1058,10 @@
 		offset = to_interleave_offset(offset, mmio);
 
 	writeq(cmd, mmio->base + offset);
-	/* FIXME: conditionally perform read-back if mandated by firmware */
+	wmb_blk(nfit_blk);
+
+	if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH)
+		readq(mmio->base + offset);
 }
 
 static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
@@ -1026,7 +1075,6 @@
 
 	base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES
 		+ lane * mmio->size;
-	/* TODO: non-temporal access, flush hints, cache management etc... */
 	write_blk_ctl(nfit_blk, lane, dpa, len, rw);
 	while (len) {
 		unsigned int c;
@@ -1045,13 +1093,19 @@
 		}
 
 		if (rw)
-			memcpy(mmio->aperture + offset, iobuf + copied, c);
+			memcpy_to_pmem(mmio->aperture + offset,
+					iobuf + copied, c);
 		else
-			memcpy(iobuf + copied, mmio->aperture + offset, c);
+			memcpy_from_pmem(iobuf + copied,
+					mmio->aperture + offset, c);
 
 		copied += c;
 		len -= c;
 	}
+
+	if (rw)
+		wmb_blk(nfit_blk);
+
 	rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0;
 	return rc;
 }
@@ -1124,7 +1178,7 @@
 }
 
 static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
-		struct acpi_nfit_system_address *spa)
+		struct acpi_nfit_system_address *spa, enum spa_map_type type)
 {
 	resource_size_t start = spa->address;
 	resource_size_t n = spa->length;
@@ -1152,8 +1206,15 @@
 	if (!res)
 		goto err_mem;
 
-	/* TODO: cacheability based on the spa type */
-	spa_map->iomem = ioremap_nocache(start, n);
+	if (type == SPA_MAP_APERTURE) {
+		/*
+		 * TODO: memremap_pmem() support, but that requires cache
+		 * flushing when the aperture is moved.
+		 */
+		spa_map->iomem = ioremap_wc(start, n);
+	} else
+		spa_map->iomem = ioremap_nocache(start, n);
+
 	if (!spa_map->iomem)
 		goto err_map;
 
@@ -1171,6 +1232,7 @@
  * nfit_spa_map - interleave-aware managed-mappings of acpi_nfit_system_address ranges
  * @nvdimm_bus: NFIT-bus that provided the spa table entry
  * @nfit_spa: spa table to map
+ * @type: aperture or control region
  *
  * In the case where block-data-window apertures and
  * dimm-control-regions are interleaved they will end up sharing a
@@ -1180,12 +1242,12 @@
  * unbound.
  */
 static void __iomem *nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
-		struct acpi_nfit_system_address *spa)
+		struct acpi_nfit_system_address *spa, enum spa_map_type type)
 {
 	void __iomem *iomem;
 
 	mutex_lock(&acpi_desc->spa_map_mutex);
-	iomem = __nfit_spa_map(acpi_desc, spa);
+	iomem = __nfit_spa_map(acpi_desc, spa, type);
 	mutex_unlock(&acpi_desc->spa_map_mutex);
 
 	return iomem;
@@ -1206,12 +1268,35 @@
 	return 0;
 }
 
+static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc,
+		struct nvdimm *nvdimm, struct nfit_blk *nfit_blk)
+{
+	struct nd_cmd_dimm_flags flags;
+	int rc;
+
+	memset(&flags, 0, sizeof(flags));
+	rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags,
+			sizeof(flags));
+
+	if (rc >= 0 && flags.status == 0)
+		nfit_blk->dimm_flags = flags.flags;
+	else if (rc == -ENOTTY) {
+		/* fall back to a conservative default */
+		nfit_blk->dimm_flags = ND_BLK_DCR_LATCH;
+		rc = 0;
+	} else
+		rc = -ENXIO;
+
+	return rc;
+}
+
 static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 		struct device *dev)
 {
 	struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
 	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 	struct nd_blk_region *ndbr = to_nd_blk_region(dev);
+	struct nfit_flush *nfit_flush;
 	struct nfit_blk_mmio *mmio;
 	struct nfit_blk *nfit_blk;
 	struct nfit_mem *nfit_mem;
@@ -1223,8 +1308,8 @@
 	if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) {
 		dev_dbg(dev, "%s: missing%s%s%s\n", __func__,
 				nfit_mem ? "" : " nfit_mem",
-				nfit_mem->dcr ? "" : " dcr",
-				nfit_mem->bdw ? "" : " bdw");
+				(nfit_mem && nfit_mem->dcr) ? "" : " dcr",
+				(nfit_mem && nfit_mem->bdw) ? "" : " bdw");
 		return -ENXIO;
 	}
 
@@ -1237,7 +1322,8 @@
 	/* map block aperture memory */
 	nfit_blk->bdw_offset = nfit_mem->bdw->offset;
 	mmio = &nfit_blk->mmio[BDW];
-	mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw);
+	mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw,
+			SPA_MAP_APERTURE);
 	if (!mmio->base) {
 		dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
 				nvdimm_name(nvdimm));
@@ -1259,7 +1345,8 @@
 	nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
 	nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
 	mmio = &nfit_blk->mmio[DCR];
-	mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr);
+	mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr,
+			SPA_MAP_CONTROL);
 	if (!mmio->base) {
 		dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
 				nvdimm_name(nvdimm));
@@ -1277,6 +1364,24 @@
 		return rc;
 	}
 
+	rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk);
+	if (rc < 0) {
+		dev_dbg(dev, "%s: %s failed get DIMM flags\n",
+				__func__, nvdimm_name(nvdimm));
+		return rc;
+	}
+
+	nfit_flush = nfit_mem->nfit_flush;
+	if (nfit_flush && nfit_flush->flush->hint_count != 0) {
+		nfit_blk->nvdimm_flush = devm_ioremap_nocache(dev,
+				nfit_flush->flush->hint_address[0], 8);
+		if (!nfit_blk->nvdimm_flush)
+			return -ENOMEM;
+	}
+
+	if (!arch_has_pmem_api() && !nfit_blk->nvdimm_flush)
+		dev_warn(dev, "unable to guarantee persistence of writes\n");
+
 	if (mmio->line_size == 0)
 		return 0;
 
@@ -1459,6 +1564,7 @@
 	INIT_LIST_HEAD(&acpi_desc->dcrs);
 	INIT_LIST_HEAD(&acpi_desc->bdws);
 	INIT_LIST_HEAD(&acpi_desc->idts);
+	INIT_LIST_HEAD(&acpi_desc->flushes);
 	INIT_LIST_HEAD(&acpi_desc->memdevs);
 	INIT_LIST_HEAD(&acpi_desc->dimms);
 	mutex_init(&acpi_desc->spa_map_mutex);
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
index 81f2e8c..79b6d83 100644
--- a/drivers/acpi/nfit.h
+++ b/drivers/acpi/nfit.h
@@ -40,6 +40,10 @@
 	NFIT_UUID_MAX,
 };
 
+enum {
+	ND_BLK_DCR_LATCH = 2,
+};
+
 struct nfit_spa {
 	struct acpi_nfit_system_address *spa;
 	struct list_head list;
@@ -60,6 +64,11 @@
 	struct list_head list;
 };
 
+struct nfit_flush {
+	struct acpi_nfit_flush_address *flush;
+	struct list_head list;
+};
+
 struct nfit_memdev {
 	struct acpi_nfit_memory_map *memdev;
 	struct list_head list;
@@ -77,6 +86,7 @@
 	struct acpi_nfit_system_address *spa_bdw;
 	struct acpi_nfit_interleave *idt_dcr;
 	struct acpi_nfit_interleave *idt_bdw;
+	struct nfit_flush *nfit_flush;
 	struct list_head list;
 	struct acpi_device *adev;
 	unsigned long dsm_mask;
@@ -88,6 +98,7 @@
 	struct mutex spa_map_mutex;
 	struct list_head spa_maps;
 	struct list_head memdevs;
+	struct list_head flushes;
 	struct list_head dimms;
 	struct list_head spas;
 	struct list_head dcrs;
@@ -109,7 +120,7 @@
 	struct nfit_blk_mmio {
 		union {
 			void __iomem *base;
-			void *aperture;
+			void __pmem  *aperture;
 		};
 		u64 size;
 		u64 base_offset;
@@ -123,6 +134,13 @@
 	u64 bdw_offset; /* post interleave offset */
 	u64 stat_offset;
 	u64 cmd_offset;
+	void __iomem *nvdimm_flush;
+	u32 dimm_flags;
+};
+
+enum spa_map_type {
+	SPA_MAP_CONTROL,
+	SPA_MAP_APERTURE,
 };
 
 struct nfit_spa_mapping {
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index c262e4a..3b8963f 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -175,10 +175,14 @@
 	if (!addr || !length)
 		return;
 
-	acpi_reserve_region(addr, length, gas->space_id, 0, desc);
+	/* Resources are never freed */
+	if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO)
+		request_region(addr, length, desc);
+	else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
+		request_mem_region(addr, length, desc);
 }
 
-static void __init acpi_reserve_resources(void)
+static int __init acpi_reserve_resources(void)
 {
 	acpi_request_region(&acpi_gbl_FADT.xpm1a_event_block, acpi_gbl_FADT.pm1_event_length,
 		"ACPI PM1a_EVT_BLK");
@@ -207,7 +211,10 @@
 	if (!(acpi_gbl_FADT.gpe1_block_length & 0x1))
 		acpi_request_region(&acpi_gbl_FADT.xgpe1_block,
 			       acpi_gbl_FADT.gpe1_block_length, "ACPI GPE1_BLK");
+
+	return 0;
 }
+fs_initcall_sync(acpi_reserve_resources);
 
 void acpi_os_printf(const char *fmt, ...)
 {
@@ -1862,7 +1869,6 @@
 
 acpi_status __init acpi_os_initialize1(void)
 {
-	acpi_reserve_resources();
 	kacpid_wq = alloc_workqueue("kacpid", 0, 1);
 	kacpi_notify_wq = alloc_workqueue("kacpi_notify", 0, 1);
 	kacpi_hotplug_wq = alloc_ordered_workqueue("kacpi_hotplug", 0);
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 10561ce..8244f01 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -26,7 +26,6 @@
 #include <linux/device.h>
 #include <linux/export.h>
 #include <linux/ioport.h>
-#include <linux/list.h>
 #include <linux/slab.h>
 
 #ifdef CONFIG_X86
@@ -622,164 +621,3 @@
 	return (type & types) ? 0 : 1;
 }
 EXPORT_SYMBOL_GPL(acpi_dev_filter_resource_type);
-
-struct reserved_region {
-	struct list_head node;
-	u64 start;
-	u64 end;
-};
-
-static LIST_HEAD(reserved_io_regions);
-static LIST_HEAD(reserved_mem_regions);
-
-static int request_range(u64 start, u64 end, u8 space_id, unsigned long flags,
-			 char *desc)
-{
-	unsigned int length = end - start + 1;
-	struct resource *res;
-
-	res = space_id == ACPI_ADR_SPACE_SYSTEM_IO ?
-		request_region(start, length, desc) :
-		request_mem_region(start, length, desc);
-	if (!res)
-		return -EIO;
-
-	res->flags &= ~flags;
-	return 0;
-}
-
-static int add_region_before(u64 start, u64 end, u8 space_id,
-			     unsigned long flags, char *desc,
-			     struct list_head *head)
-{
-	struct reserved_region *reg;
-	int error;
-
-	reg = kmalloc(sizeof(*reg), GFP_KERNEL);
-	if (!reg)
-		return -ENOMEM;
-
-	error = request_range(start, end, space_id, flags, desc);
-	if (error) {
-		kfree(reg);
-		return error;
-	}
-
-	reg->start = start;
-	reg->end = end;
-	list_add_tail(&reg->node, head);
-	return 0;
-}
-
-/**
- * acpi_reserve_region - Reserve an I/O or memory region as a system resource.
- * @start: Starting address of the region.
- * @length: Length of the region.
- * @space_id: Identifier of address space to reserve the region from.
- * @flags: Resource flags to clear for the region after requesting it.
- * @desc: Region description (for messages).
- *
- * Reserve an I/O or memory region as a system resource to prevent others from
- * using it.  If the new region overlaps with one of the regions (in the given
- * address space) already reserved by this routine, only the non-overlapping
- * parts of it will be reserved.
- *
- * Returned is either 0 (success) or a negative error code indicating a resource
- * reservation problem.  It is the code of the first encountered error, but the
- * routine doesn't abort until it has attempted to request all of the parts of
- * the new region that don't overlap with other regions reserved previously.
- *
- * The resources requested by this routine are never released.
- */
-int acpi_reserve_region(u64 start, unsigned int length, u8 space_id,
-			unsigned long flags, char *desc)
-{
-	struct list_head *regions;
-	struct reserved_region *reg;
-	u64 end = start + length - 1;
-	int ret = 0, error = 0;
-
-	if (space_id == ACPI_ADR_SPACE_SYSTEM_IO)
-		regions = &reserved_io_regions;
-	else if (space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
-		regions = &reserved_mem_regions;
-	else
-		return -EINVAL;
-
-	if (list_empty(regions))
-		return add_region_before(start, end, space_id, flags, desc, regions);
-
-	list_for_each_entry(reg, regions, node)
-		if (reg->start == end + 1) {
-			/* The new region can be prepended to this one. */
-			ret = request_range(start, end, space_id, flags, desc);
-			if (!ret)
-				reg->start = start;
-
-			return ret;
-		} else if (reg->start > end) {
-			/* No overlap.  Add the new region here and get out. */
-			return add_region_before(start, end, space_id, flags,
-						 desc, &reg->node);
-		} else if (reg->end == start - 1) {
-			goto combine;
-		} else if (reg->end >= start) {
-			goto overlap;
-		}
-
-	/* The new region goes after the last existing one. */
-	return add_region_before(start, end, space_id, flags, desc, regions);
-
- overlap:
-	/*
-	 * The new region overlaps an existing one.
-	 *
-	 * The head part of the new region immediately preceding the existing
-	 * overlapping one can be combined with it right away.
-	 */
-	if (reg->start > start) {
-		error = request_range(start, reg->start - 1, space_id, flags, desc);
-		if (error)
-			ret = error;
-		else
-			reg->start = start;
-	}
-
- combine:
-	/*
-	 * The new region is adjacent to an existing one.  If it extends beyond
-	 * that region all the way to the next one, it is possible to combine
-	 * all three of them.
-	 */
-	while (reg->end < end) {
-		struct reserved_region *next = NULL;
-		u64 a = reg->end + 1, b = end;
-
-		if (!list_is_last(&reg->node, regions)) {
-			next = list_next_entry(reg, node);
-			if (next->start <= end)
-				b = next->start - 1;
-		}
-		error = request_range(a, b, space_id, flags, desc);
-		if (!error) {
-			if (next && next->start == b + 1) {
-				reg->end = next->end;
-				list_del(&next->node);
-				kfree(next);
-			} else {
-				reg->end = end;
-				break;
-			}
-		} else if (next) {
-			if (!ret)
-				ret = error;
-
-			reg = next;
-		} else {
-			break;
-		}
-	}
-
-	return ret ? ret : error;
-}
-EXPORT_SYMBOL_GPL(acpi_reserve_region);
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 2649a06..ec25635 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1019,6 +1019,29 @@
 	return false;
 }
 
+static bool __acpi_match_device_cls(const struct acpi_device_id *id,
+				    struct acpi_hardware_id *hwid)
+{
+	int i, msk, byte_shift;
+	char buf[3];
+
+	if (!id->cls)
+		return false;
+
+	/* Apply class-code bitmask, before checking each class-code byte */
+	for (i = 1; i <= 3; i++) {
+		byte_shift = 8 * (3 - i);
+		msk = (id->cls_msk >> byte_shift) & 0xFF;
+		if (!msk)
+			continue;
+
+		sprintf(buf, "%02x", (id->cls >> byte_shift) & msk);
+		if (strncmp(buf, &hwid->id[(i - 1) * 2], 2))
+			return false;
+	}
+	return true;
+}
+
 static const struct acpi_device_id *__acpi_match_device(
 	struct acpi_device *device,
 	const struct acpi_device_id *ids,
@@ -1036,9 +1059,12 @@
 
 	list_for_each_entry(hwid, &device->pnp.ids, list) {
 		/* First, check the ACPI/PNP IDs provided by the caller. */
-		for (id = ids; id->id[0]; id++)
-			if (!strcmp((char *) id->id, hwid->id))
+		for (id = ids; id->id[0] || id->cls; id++) {
+			if (id->id[0] && !strcmp((char *) id->id, hwid->id))
 				return id;
+			else if (id->cls && __acpi_match_device_cls(id, hwid))
+				return id;
+		}
 
 		/*
 		 * Next, check ACPI_DT_NAMESPACE_HID and try to match the
@@ -2101,6 +2127,8 @@
 		if (info->valid & ACPI_VALID_UID)
 			pnp->unique_id = kstrdup(info->unique_id.string,
 							GFP_KERNEL);
+		if (info->valid & ACPI_VALID_CLS)
+			acpi_add_id(pnp, info->class_code.string);
 
 		kfree(info);
 
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 6d17a3b..15e40ee 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -48,7 +48,7 @@
 
 config ATA_ACPI
 	bool "ATA ACPI Support"
-	depends on ACPI && PCI
+	depends on ACPI
 	default y
 	help
 	  This option adds support for ATA-related ACPI objects.
diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index 614c78f..1befb11 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -20,6 +20,8 @@
 #include <linux/platform_device.h>
 #include <linux/libata.h>
 #include <linux/ahci_platform.h>
+#include <linux/acpi.h>
+#include <linux/pci_ids.h>
 #include "ahci.h"
 
 #define DRV_NAME "ahci"
@@ -79,12 +81,19 @@
 };
 MODULE_DEVICE_TABLE(of, ahci_of_match);
 
+static const struct acpi_device_id ahci_acpi_match[] = {
+	{ ACPI_DEVICE_CLASS(PCI_CLASS_STORAGE_SATA_AHCI, 0xffffff) },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, ahci_acpi_match);
+
 static struct platform_driver ahci_driver = {
 	.probe = ahci_probe,
 	.remove = ata_platform_remove_one,
 	.driver = {
 		.name = DRV_NAME,
 		.of_match_table = ahci_of_match,
+		.acpi_match_table = ahci_acpi_match,
 		.pm = &ahci_pm_ops,
 	},
 };
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 9c42883..894bda1 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -563,10 +563,8 @@
 	kfree(fw_priv);
 }
 
-static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env)
+static int do_firmware_uevent(struct firmware_priv *fw_priv, struct kobj_uevent_env *env)
 {
-	struct firmware_priv *fw_priv = to_firmware_priv(dev);
-
 	if (add_uevent_var(env, "FIRMWARE=%s", fw_priv->buf->fw_id))
 		return -ENOMEM;
 	if (add_uevent_var(env, "TIMEOUT=%i", loading_timeout))
@@ -577,6 +575,18 @@
 	return 0;
 }
 
+static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct firmware_priv *fw_priv = to_firmware_priv(dev);
+	int err = 0;
+
+	mutex_lock(&fw_lock);
+	if (fw_priv->buf)
+		err = do_firmware_uevent(fw_priv, env);
+	mutex_unlock(&fw_lock);
+	return err;
+}
+
 static struct class firmware_class = {
 	.name		= "firmware",
 	.class_attrs	= firmware_class_attrs,
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index cdd547b..0ee43c1 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -6,6 +6,7 @@
  * This file is released under the GPLv2.
  */
 
+#include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/io.h>
 #include <linux/platform_device.h>
@@ -19,6 +20,8 @@
 #include <linux/suspend.h>
 #include <linux/export.h>
 
+#define GENPD_RETRY_MAX_MS	250		/* Approximate */
+
 #define GENPD_DEV_CALLBACK(genpd, type, callback, dev)		\
 ({								\
 	type (*__routine)(struct device *__d); 			\
@@ -2131,6 +2134,7 @@
 static void genpd_dev_pm_detach(struct device *dev, bool power_off)
 {
 	struct generic_pm_domain *pd;
+	unsigned int i;
 	int ret = 0;
 
 	pd = pm_genpd_lookup_dev(dev);
@@ -2139,10 +2143,12 @@
 
 	dev_dbg(dev, "removing from PM domain %s\n", pd->name);
 
-	while (1) {
+	for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) {
 		ret = pm_genpd_remove_device(pd, dev);
 		if (ret != -EAGAIN)
 			break;
+
+		mdelay(i);
 		cond_resched();
 	}
 
@@ -2183,6 +2189,7 @@
 {
 	struct of_phandle_args pd_args;
 	struct generic_pm_domain *pd;
+	unsigned int i;
 	int ret;
 
 	if (!dev->of_node)
@@ -2218,10 +2225,12 @@
 
 	dev_dbg(dev, "adding to PM domain %s\n", pd->name);
 
-	while (1) {
+	for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) {
 		ret = pm_genpd_add_device(pd, dev);
 		if (ret != -EAGAIN)
 			break;
+
+		mdelay(i);
 		cond_resched();
 	}
 
diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index 7470004..eb6e674 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -45,14 +45,12 @@
 		return -EEXIST;
 	}
 
-	dev->power.wakeirq = wirq;
-	spin_unlock_irqrestore(&dev->power.lock, flags);
-
 	err = device_wakeup_attach_irq(dev, wirq);
-	if (err)
-		return err;
+	if (!err)
+		dev->power.wakeirq = wirq;
 
-	return 0;
+	spin_unlock_irqrestore(&dev->power.lock, flags);
+	return err;
 }
 
 /**
@@ -105,10 +103,10 @@
 		return;
 
 	spin_lock_irqsave(&dev->power.lock, flags);
+	device_wakeup_detach_irq(dev);
 	dev->power.wakeirq = NULL;
 	spin_unlock_irqrestore(&dev->power.lock, flags);
 
-	device_wakeup_detach_irq(dev);
 	if (wirq->dedicated_irq)
 		free_irq(wirq->irq, wirq);
 	kfree(wirq);
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 40f7160..51f15bc 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -281,32 +281,25 @@
  * Attach a device wakeirq to the wakeup source so the device
  * wake IRQ can be configured automatically for suspend and
  * resume.
+ *
+ * Call under the device's power.lock lock.
  */
 int device_wakeup_attach_irq(struct device *dev,
 			     struct wake_irq *wakeirq)
 {
 	struct wakeup_source *ws;
-	int ret = 0;
 
-	spin_lock_irq(&dev->power.lock);
 	ws = dev->power.wakeup;
 	if (!ws) {
 		dev_err(dev, "forgot to call call device_init_wakeup?\n");
-		ret = -EINVAL;
-		goto unlock;
+		return -EINVAL;
 	}
 
-	if (ws->wakeirq) {
-		ret = -EEXIST;
-		goto unlock;
-	}
+	if (ws->wakeirq)
+		return -EEXIST;
 
 	ws->wakeirq = wakeirq;
-
-unlock:
-	spin_unlock_irq(&dev->power.lock);
-
-	return ret;
+	return 0;
 }
 
 /**
@@ -314,20 +307,16 @@
  * @dev: Device to handle
  *
  * Removes a device wakeirq from the wakeup source.
+ *
+ * Call under the device's power.lock lock.
  */
 void device_wakeup_detach_irq(struct device *dev)
 {
 	struct wakeup_source *ws;
 
-	spin_lock_irq(&dev->power.lock);
 	ws = dev->power.wakeup;
-	if (!ws)
-		goto unlock;
-
-	ws->wakeirq = NULL;
-
-unlock:
-	spin_unlock_irq(&dev->power.lock);
+	if (ws)
+		ws->wakeirq = NULL;
 }
 
 /**
diff --git a/drivers/clk/at91/clk-h32mx.c b/drivers/clk/at91/clk-h32mx.c
index 152dcb3..61566bc 100644
--- a/drivers/clk/at91/clk-h32mx.c
+++ b/drivers/clk/at91/clk-h32mx.c
@@ -116,8 +116,10 @@
 	h32mxclk->pmc = pmc;
 
 	clk = clk_register(NULL, &h32mxclk->hw);
-	if (!clk)
+	if (!clk) {
+		kfree(h32mxclk);
 		return;
+	}
 
 	of_clk_add_provider(np, of_clk_src_simple_get, clk);
 }
diff --git a/drivers/clk/at91/clk-main.c b/drivers/clk/at91/clk-main.c
index c240045..27dfa96 100644
--- a/drivers/clk/at91/clk-main.c
+++ b/drivers/clk/at91/clk-main.c
@@ -171,8 +171,10 @@
 	irq_set_status_flags(osc->irq, IRQ_NOAUTOEN);
 	ret = request_irq(osc->irq, clk_main_osc_irq_handler,
 			  IRQF_TRIGGER_HIGH, name, osc);
-	if (ret)
+	if (ret) {
+		kfree(osc);
 		return ERR_PTR(ret);
+	}
 
 	if (bypass)
 		pmc_write(pmc, AT91_CKGR_MOR,
diff --git a/drivers/clk/at91/clk-master.c b/drivers/clk/at91/clk-master.c
index f98eafe..5b3ded5 100644
--- a/drivers/clk/at91/clk-master.c
+++ b/drivers/clk/at91/clk-master.c
@@ -165,12 +165,16 @@
 	irq_set_status_flags(master->irq, IRQ_NOAUTOEN);
 	ret = request_irq(master->irq, clk_master_irq_handler,
 			  IRQF_TRIGGER_HIGH, "clk-master", master);
-	if (ret)
+	if (ret) {
+		kfree(master);
 		return ERR_PTR(ret);
+	}
 
 	clk = clk_register(NULL, &master->hw);
-	if (IS_ERR(clk))
+	if (IS_ERR(clk)) {
+		free_irq(master->irq, master);
 		kfree(master);
+	}
 
 	return clk;
 }
diff --git a/drivers/clk/at91/clk-pll.c b/drivers/clk/at91/clk-pll.c
index cbbe403..18b60f4 100644
--- a/drivers/clk/at91/clk-pll.c
+++ b/drivers/clk/at91/clk-pll.c
@@ -346,12 +346,16 @@
 	irq_set_status_flags(pll->irq, IRQ_NOAUTOEN);
 	ret = request_irq(pll->irq, clk_pll_irq_handler, IRQF_TRIGGER_HIGH,
 			  id ? "clk-pllb" : "clk-plla", pll);
-	if (ret)
+	if (ret) {
+		kfree(pll);
 		return ERR_PTR(ret);
+	}
 
 	clk = clk_register(NULL, &pll->hw);
-	if (IS_ERR(clk))
+	if (IS_ERR(clk)) {
+		free_irq(pll->irq, pll);
 		kfree(pll);
+	}
 
 	return clk;
 }
diff --git a/drivers/clk/at91/clk-system.c b/drivers/clk/at91/clk-system.c
index a76d03f..58008b3 100644
--- a/drivers/clk/at91/clk-system.c
+++ b/drivers/clk/at91/clk-system.c
@@ -130,13 +130,17 @@
 		irq_set_status_flags(sys->irq, IRQ_NOAUTOEN);
 		ret = request_irq(sys->irq, clk_system_irq_handler,
 				IRQF_TRIGGER_HIGH, name, sys);
-		if (ret)
+		if (ret) {
+			kfree(sys);
 			return ERR_PTR(ret);
+		}
 	}
 
 	clk = clk_register(NULL, &sys->hw);
-	if (IS_ERR(clk))
+	if (IS_ERR(clk)) {
+		free_irq(sys->irq, sys);
 		kfree(sys);
+	}
 
 	return clk;
 }
diff --git a/drivers/clk/at91/clk-utmi.c b/drivers/clk/at91/clk-utmi.c
index ae3263b..30dd697 100644
--- a/drivers/clk/at91/clk-utmi.c
+++ b/drivers/clk/at91/clk-utmi.c
@@ -118,12 +118,16 @@
 	irq_set_status_flags(utmi->irq, IRQ_NOAUTOEN);
 	ret = request_irq(utmi->irq, clk_utmi_irq_handler,
 			  IRQF_TRIGGER_HIGH, "clk-utmi", utmi);
-	if (ret)
+	if (ret) {
+		kfree(utmi);
 		return ERR_PTR(ret);
+	}
 
 	clk = clk_register(NULL, &utmi->hw);
-	if (IS_ERR(clk))
+	if (IS_ERR(clk)) {
+		free_irq(utmi->irq, utmi);
 		kfree(utmi);
+	}
 
 	return clk;
 }
diff --git a/drivers/clk/bcm/clk-iproc-asiu.c b/drivers/clk/bcm/clk-iproc-asiu.c
index e19c09c..f630e1b 100644
--- a/drivers/clk/bcm/clk-iproc-asiu.c
+++ b/drivers/clk/bcm/clk-iproc-asiu.c
@@ -222,10 +222,6 @@
 		struct iproc_asiu_clk *asiu_clk;
 		const char *clk_name;
 
-		clk_name = kzalloc(IPROC_CLK_NAME_LEN, GFP_KERNEL);
-		if (WARN_ON(!clk_name))
-			goto err_clk_register;
-
 		ret = of_property_read_string_index(node, "clock-output-names",
 						    i, &clk_name);
 		if (WARN_ON(ret))
@@ -259,7 +255,7 @@
 
 err_clk_register:
 	for (i = 0; i < num_clks; i++)
-		kfree(asiu->clks[i].name);
+		clk_unregister(asiu->clk_data.clks[i]);
 	iounmap(asiu->gate_base);
 
 err_iomap_gate:
diff --git a/drivers/clk/bcm/clk-iproc-pll.c b/drivers/clk/bcm/clk-iproc-pll.c
index 46fb84b..2dda4e8 100644
--- a/drivers/clk/bcm/clk-iproc-pll.c
+++ b/drivers/clk/bcm/clk-iproc-pll.c
@@ -366,7 +366,7 @@
 	val = readl(pll->pll_base + ctrl->ndiv_int.offset);
 	ndiv_int = (val >> ctrl->ndiv_int.shift) &
 		bit_mask(ctrl->ndiv_int.width);
-	ndiv = ndiv_int << ctrl->ndiv_int.shift;
+	ndiv = (u64)ndiv_int << ctrl->ndiv_int.shift;
 
 	if (ctrl->flags & IPROC_CLK_PLL_HAS_NDIV_FRAC) {
 		val = readl(pll->pll_base + ctrl->ndiv_frac.offset);
@@ -374,7 +374,8 @@
 			bit_mask(ctrl->ndiv_frac.width);
 
 		if (ndiv_frac != 0)
-			ndiv = (ndiv_int << ctrl->ndiv_int.shift) | ndiv_frac;
+			ndiv = ((u64)ndiv_int << ctrl->ndiv_int.shift) |
+				ndiv_frac;
 	}
 
 	val = readl(pll->pll_base + ctrl->pdiv.offset);
@@ -655,10 +656,6 @@
 		memset(&init, 0, sizeof(init));
 		parent_name = node->name;
 
-		clk_name = kzalloc(IPROC_CLK_NAME_LEN, GFP_KERNEL);
-		if (WARN_ON(!clk_name))
-			goto err_clk_register;
-
 		ret = of_property_read_string_index(node, "clock-output-names",
 						    i, &clk_name);
 		if (WARN_ON(ret))
@@ -690,10 +687,8 @@
 	return;
 
 err_clk_register:
-	for (i = 0; i < num_clks; i++) {
-		kfree(pll->clks[i].name);
+	for (i = 0; i < num_clks; i++)
 		clk_unregister(pll->clk_data.clks[i]);
-	}
 
 err_pll_register:
 	if (pll->asiu_base)
diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c
index b9b12a7..3f6f7ad 100644
--- a/drivers/clk/clk-stm32f4.c
+++ b/drivers/clk/clk-stm32f4.c
@@ -268,7 +268,7 @@
 	memcpy(table, stm32f42xx_gate_map, sizeof(table));
 
 	/* only bits set in table can be used as indices */
-	if (WARN_ON(secondary > 8 * sizeof(table) ||
+	if (WARN_ON(secondary >= BITS_PER_BYTE * sizeof(table) ||
 		    0 == (table[BIT_ULL_WORD(secondary)] &
 			  BIT_ULL_MASK(secondary))))
 		return -EINVAL;
diff --git a/drivers/clk/mediatek/clk-mt8173.c b/drivers/clk/mediatek/clk-mt8173.c
index 4b9e04c..8b6523d 100644
--- a/drivers/clk/mediatek/clk-mt8173.c
+++ b/drivers/clk/mediatek/clk-mt8173.c
@@ -700,6 +700,22 @@
 	MUX(CLK_PERI_UART3_SEL, "uart3_ck_sel", uart_ck_sel_parents, 0x40c, 3, 1),
 };
 
+static struct clk_onecell_data *mt8173_top_clk_data __initdata;
+static struct clk_onecell_data *mt8173_pll_clk_data __initdata;
+
+static void __init mtk_clk_enable_critical(void)
+{
+	if (!mt8173_top_clk_data || !mt8173_pll_clk_data)
+		return;
+
+	clk_prepare_enable(mt8173_pll_clk_data->clks[CLK_APMIXED_ARMCA15PLL]);
+	clk_prepare_enable(mt8173_pll_clk_data->clks[CLK_APMIXED_ARMCA7PLL]);
+	clk_prepare_enable(mt8173_top_clk_data->clks[CLK_TOP_MEM_SEL]);
+	clk_prepare_enable(mt8173_top_clk_data->clks[CLK_TOP_DDRPHYCFG_SEL]);
+	clk_prepare_enable(mt8173_top_clk_data->clks[CLK_TOP_CCI400_SEL]);
+	clk_prepare_enable(mt8173_top_clk_data->clks[CLK_TOP_RTC_SEL]);
+}
+
 static void __init mtk_topckgen_init(struct device_node *node)
 {
 	struct clk_onecell_data *clk_data;
@@ -712,19 +728,19 @@
 		return;
 	}
 
-	clk_data = mtk_alloc_clk_data(CLK_TOP_NR_CLK);
+	mt8173_top_clk_data = clk_data = mtk_alloc_clk_data(CLK_TOP_NR_CLK);
 
 	mtk_clk_register_factors(root_clk_alias, ARRAY_SIZE(root_clk_alias), clk_data);
 	mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data);
 	mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base,
 			&mt8173_clk_lock, clk_data);
 
-	clk_prepare_enable(clk_data->clks[CLK_TOP_CCI400_SEL]);
-
 	r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
 	if (r)
 		pr_err("%s(): could not register clock provider: %d\n",
 			__func__, r);
+
+	mtk_clk_enable_critical();
 }
 CLK_OF_DECLARE(mtk_topckgen, "mediatek,mt8173-topckgen", mtk_topckgen_init);
 
@@ -818,13 +834,13 @@
 {
 	struct clk_onecell_data *clk_data;
 
-	clk_data = mtk_alloc_clk_data(CLK_APMIXED_NR_CLK);
+	mt8173_pll_clk_data = clk_data = mtk_alloc_clk_data(CLK_APMIXED_NR_CLK);
 	if (!clk_data)
 		return;
 
 	mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data);
 
-	clk_prepare_enable(clk_data->clks[CLK_APMIXED_ARMCA15PLL]);
+	mtk_clk_enable_critical();
 }
 CLK_OF_DECLARE(mtk_apmixedsys, "mediatek,mt8173-apmixedsys",
 		mtk_apmixedsys_init);
diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c
index b95d17f..92936f0 100644
--- a/drivers/clk/qcom/clk-rcg2.c
+++ b/drivers/clk/qcom/clk-rcg2.c
@@ -530,19 +530,16 @@
 	struct clk_rcg2 *rcg = to_clk_rcg2(hw);
 	struct freq_tbl f = *rcg->freq_tbl;
 	const struct frac_entry *frac = frac_table_pixel;
-	unsigned long request, src_rate;
+	unsigned long request;
 	int delta = 100000;
 	u32 mask = BIT(rcg->hid_width) - 1;
 	u32 hid_div;
-	int index = qcom_find_src_index(hw, rcg->parent_map, f.src);
-	struct clk *parent = clk_get_parent_by_index(hw->clk, index);
 
 	for (; frac->num; frac++) {
 		request = (rate * frac->den) / frac->num;
 
-		src_rate = __clk_round_rate(parent, request);
-		if ((src_rate < (request - delta)) ||
-			(src_rate > (request + delta)))
+		if ((parent_rate < (request - delta)) ||
+			(parent_rate > (request + delta)))
 			continue;
 
 		regmap_read(rcg->clkr.regmap, rcg->cmd_rcgr + CFG_REG,
diff --git a/drivers/clk/shmobile/clk-mstp.c b/drivers/clk/shmobile/clk-mstp.c
index 2d2fe77..b1df7b2 100644
--- a/drivers/clk/shmobile/clk-mstp.c
+++ b/drivers/clk/shmobile/clk-mstp.c
@@ -2,6 +2,7 @@
  * R-Car MSTP clocks
  *
  * Copyright (C) 2013 Ideas On Board SPRL
+ * Copyright (C) 2015 Glider bvba
  *
  * Contact: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
  *
@@ -10,11 +11,16 @@
  * the Free Software Foundation; version 2 of the License.
  */
 
+#include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/clkdev.h>
+#include <linux/clk/shmobile.h>
+#include <linux/device.h>
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/pm_clock.h>
+#include <linux/pm_domain.h>
 #include <linux/spinlock.h>
 
 /*
@@ -236,3 +242,84 @@
 	of_clk_add_provider(np, of_clk_src_onecell_get, &group->data);
 }
 CLK_OF_DECLARE(cpg_mstp_clks, "renesas,cpg-mstp-clocks", cpg_mstp_clocks_init);
+
+
+#ifdef CONFIG_PM_GENERIC_DOMAINS_OF
+int cpg_mstp_attach_dev(struct generic_pm_domain *domain, struct device *dev)
+{
+	struct device_node *np = dev->of_node;
+	struct of_phandle_args clkspec;
+	struct clk *clk;
+	int i = 0;
+	int error;
+
+	while (!of_parse_phandle_with_args(np, "clocks", "#clock-cells", i,
+					   &clkspec)) {
+		if (of_device_is_compatible(clkspec.np,
+					    "renesas,cpg-mstp-clocks"))
+			goto found;
+
+		of_node_put(clkspec.np);
+		i++;
+	}
+
+	return 0;
+
+found:
+	clk = of_clk_get_from_provider(&clkspec);
+	of_node_put(clkspec.np);
+
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	error = pm_clk_create(dev);
+	if (error) {
+		dev_err(dev, "pm_clk_create failed %d\n", error);
+		goto fail_put;
+	}
+
+	error = pm_clk_add_clk(dev, clk);
+	if (error) {
+		dev_err(dev, "pm_clk_add_clk %pC failed %d\n", clk, error);
+		goto fail_destroy;
+	}
+
+	return 0;
+
+fail_destroy:
+	pm_clk_destroy(dev);
+fail_put:
+	clk_put(clk);
+	return error;
+}
+
+void cpg_mstp_detach_dev(struct generic_pm_domain *domain, struct device *dev)
+{
+	if (!list_empty(&dev->power.subsys_data->clock_list))
+		pm_clk_destroy(dev);
+}
+
+void __init cpg_mstp_add_clk_domain(struct device_node *np)
+{
+	struct generic_pm_domain *pd;
+	u32 ncells;
+
+	if (of_property_read_u32(np, "#power-domain-cells", &ncells)) {
+		pr_warn("%s lacks #power-domain-cells\n", np->full_name);
+		return;
+	}
+
+	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return;
+
+	pd->name = np->name;
+
+	pd->flags = GENPD_FLAG_PM_CLK;
+	pm_genpd_init(pd, &simple_qos_governor, false);
+	pd->attach_dev = cpg_mstp_attach_dev;
+	pd->detach_dev = cpg_mstp_detach_dev;
+
+	of_genpd_add_provider_simple(np, pd);
+}
+#endif /* !CONFIG_PM_GENERIC_DOMAINS_OF */
diff --git a/drivers/clk/shmobile/clk-r8a7778.c b/drivers/clk/shmobile/clk-r8a7778.c
index cb33b57..fa45684 100644
--- a/drivers/clk/shmobile/clk-r8a7778.c
+++ b/drivers/clk/shmobile/clk-r8a7778.c
@@ -124,6 +124,8 @@
 	}
 
 	of_clk_add_provider(np, of_clk_src_onecell_get, &cpg->data);
+
+	cpg_mstp_add_clk_domain(np);
 }
 
 CLK_OF_DECLARE(r8a7778_cpg_clks, "renesas,r8a7778-cpg-clocks",
diff --git a/drivers/clk/shmobile/clk-r8a7779.c b/drivers/clk/shmobile/clk-r8a7779.c
index 652ecac..e42a63a 100644
--- a/drivers/clk/shmobile/clk-r8a7779.c
+++ b/drivers/clk/shmobile/clk-r8a7779.c
@@ -168,6 +168,8 @@
 	}
 
 	of_clk_add_provider(np, of_clk_src_onecell_get, &cpg->data);
+
+	cpg_mstp_add_clk_domain(np);
 }
 CLK_OF_DECLARE(r8a7779_cpg_clks, "renesas,r8a7779-cpg-clocks",
 	       r8a7779_cpg_clocks_init);
diff --git a/drivers/clk/shmobile/clk-rcar-gen2.c b/drivers/clk/shmobile/clk-rcar-gen2.c
index acfb6d7..f2c457f 100644
--- a/drivers/clk/shmobile/clk-rcar-gen2.c
+++ b/drivers/clk/shmobile/clk-rcar-gen2.c
@@ -415,6 +415,8 @@
 	}
 
 	of_clk_add_provider(np, of_clk_src_onecell_get, &cpg->data);
+
+	cpg_mstp_add_clk_domain(np);
 }
 CLK_OF_DECLARE(rcar_gen2_cpg_clks, "renesas,rcar-gen2-cpg-clocks",
 	       rcar_gen2_cpg_clocks_init);
diff --git a/drivers/clk/shmobile/clk-rz.c b/drivers/clk/shmobile/clk-rz.c
index 7e68e86..9766e3c 100644
--- a/drivers/clk/shmobile/clk-rz.c
+++ b/drivers/clk/shmobile/clk-rz.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/clk-provider.h>
+#include <linux/clk/shmobile.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/of.h>
@@ -99,5 +100,7 @@
 	}
 
 	of_clk_add_provider(np, of_clk_src_onecell_get, &cpg->data);
+
+	cpg_mstp_add_clk_domain(np);
 }
 CLK_OF_DECLARE(rz_cpg_clks, "renesas,rz-cpg-clocks", rz_cpg_clocks_init);
diff --git a/drivers/clk/st/clk-flexgen.c b/drivers/clk/st/clk-flexgen.c
index 657ca14..8dd8cce 100644
--- a/drivers/clk/st/clk-flexgen.c
+++ b/drivers/clk/st/clk-flexgen.c
@@ -190,7 +190,7 @@
 
 	init.name = name;
 	init.ops = &flexgen_ops;
-	init.flags = CLK_IS_BASIC | flexgen_flags;
+	init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE | flexgen_flags;
 	init.parent_names = parent_names;
 	init.num_parents = num_parents;
 
@@ -303,6 +303,8 @@
 	if (!rlock)
 		goto err;
 
+	spin_lock_init(rlock);
+
 	for (i = 0; i < clk_data->clk_num; i++) {
 		struct clk *clk;
 		const char *clk_name;
diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c
index e94197f..d9eb2e1 100644
--- a/drivers/clk/st/clkgen-fsyn.c
+++ b/drivers/clk/st/clkgen-fsyn.c
@@ -340,7 +340,7 @@
 		    CLKGEN_FIELD(0x30c, 0xf, 20),
 		    CLKGEN_FIELD(0x310, 0xf, 20) },
 	.lockstatus_present = true,
-	.lock_status = CLKGEN_FIELD(0x2A0, 0x1, 24),
+	.lock_status = CLKGEN_FIELD(0x2f0, 0x1, 24),
 	.powerup_polarity = 1,
 	.standby_polarity = 1,
 	.pll_ops	= &st_quadfs_pll_c32_ops,
@@ -489,7 +489,7 @@
 	struct st_clk_quadfs_pll *pll = to_quadfs_pll(hw);
 	u32 npda = CLKGEN_READ(pll, npda);
 
-	return !!npda;
+	return pll->data->powerup_polarity ? !npda : !!npda;
 }
 
 static int clk_fs660c32_vco_get_rate(unsigned long input, struct stm_fs *fs,
@@ -635,7 +635,7 @@
 
 	init.name = name;
 	init.ops = quadfs->pll_ops;
-	init.flags = CLK_IS_BASIC;
+	init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE;
 	init.parent_names = &parent_name;
 	init.num_parents = 1;
 
@@ -774,7 +774,7 @@
 	if (fs->lock)
 		spin_lock_irqsave(fs->lock, flags);
 
-	CLKGEN_WRITE(fs, nsb[fs->chan], !fs->data->standby_polarity);
+	CLKGEN_WRITE(fs, nsb[fs->chan], fs->data->standby_polarity);
 
 	if (fs->lock)
 		spin_unlock_irqrestore(fs->lock, flags);
@@ -1082,10 +1082,6 @@
 		.compatible = "st,stih407-quadfs660-D",
 		.data = &st_fs660c32_D_407
 	},
-	{
-		.compatible = "st,stih407-quadfs660-D",
-		.data = (void *)&st_fs660c32_D_407
-	},
 	{}
 };
 
diff --git a/drivers/clk/st/clkgen-mux.c b/drivers/clk/st/clkgen-mux.c
index 4fbe6e0..717c4a9 100644
--- a/drivers/clk/st/clkgen-mux.c
+++ b/drivers/clk/st/clkgen-mux.c
@@ -237,7 +237,7 @@
 
 	init.name = name;
 	init.ops = &clkgena_divmux_ops;
-	init.flags = CLK_IS_BASIC;
+	init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE;
 	init.parent_names = parent_names;
 	init.num_parents = num_parents;
 
@@ -513,7 +513,8 @@
 					  0, &clk_name))
 		return;
 
-	clk = clk_register_divider_table(NULL, clk_name, parent_name, 0,
+	clk = clk_register_divider_table(NULL, clk_name, parent_name,
+					 CLK_GET_RATE_NOCACHE,
 					 reg + data->offset, data->shift, 1,
 					 0, data->table, NULL);
 	if (IS_ERR(clk))
@@ -582,7 +583,7 @@
 };
 static struct clkgen_mux_data stih407_a9_mux_data = {
 	.offset = 0x1a4,
-	.shift = 1,
+	.shift = 0,
 	.width = 2,
 };
 
@@ -786,7 +787,8 @@
 					     &mux->hw, &clk_mux_ops,
 					     &div->hw, &clk_divider_ops,
 					     &gate->hw, &clk_gate_ops,
-					     data->clk_flags);
+					     data->clk_flags |
+					     CLK_GET_RATE_NOCACHE);
 		if (IS_ERR(clk)) {
 			kfree(gate);
 			kfree(div);
diff --git a/drivers/clk/st/clkgen-pll.c b/drivers/clk/st/clkgen-pll.c
index 1065322..72d1c27 100644
--- a/drivers/clk/st/clkgen-pll.c
+++ b/drivers/clk/st/clkgen-pll.c
@@ -406,7 +406,7 @@
 	init.name = clk_name;
 	init.ops = pll_data->ops;
 
-	init.flags = CLK_IS_BASIC;
+	init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE;
 	init.parent_names = &parent_name;
 	init.num_parents  = 1;
 
diff --git a/drivers/clk/sunxi/clk-sunxi.c b/drivers/clk/sunxi/clk-sunxi.c
index 9a82f17..abf7b37 100644
--- a/drivers/clk/sunxi/clk-sunxi.c
+++ b/drivers/clk/sunxi/clk-sunxi.c
@@ -1391,6 +1391,7 @@
 CLK_OF_DECLARE(sun6i_a31_clk_init, "allwinner,sun6i-a31", sun6i_init_clocks);
 CLK_OF_DECLARE(sun6i_a31s_clk_init, "allwinner,sun6i-a31s", sun6i_init_clocks);
 CLK_OF_DECLARE(sun8i_a23_clk_init, "allwinner,sun8i-a23", sun6i_init_clocks);
+CLK_OF_DECLARE(sun8i_a33_clk_init, "allwinner,sun8i-a33", sun6i_init_clocks);
 
 static void __init sun9i_init_clocks(struct device_node *node)
 {
diff --git a/drivers/clocksource/timer-imx-gpt.c b/drivers/clocksource/timer-imx-gpt.c
index 879c784..2d59038 100644
--- a/drivers/clocksource/timer-imx-gpt.c
+++ b/drivers/clocksource/timer-imx-gpt.c
@@ -529,6 +529,7 @@
 
 CLOCKSOURCE_OF_DECLARE(imx1_timer, "fsl,imx1-gpt", imx1_timer_init_dt);
 CLOCKSOURCE_OF_DECLARE(imx21_timer, "fsl,imx21-gpt", imx21_timer_init_dt);
+CLOCKSOURCE_OF_DECLARE(imx27_timer, "fsl,imx27-gpt", imx21_timer_init_dt);
 CLOCKSOURCE_OF_DECLARE(imx31_timer, "fsl,imx31-gpt", imx31_timer_init_dt);
 CLOCKSOURCE_OF_DECLARE(imx25_timer, "fsl,imx25-gpt", imx31_timer_init_dt);
 CLOCKSOURCE_OF_DECLARE(imx50_timer, "fsl,imx50-gpt", imx31_timer_init_dt);
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index cc8a71c..24e5c66 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -247,12 +247,19 @@
 	help
 	  This adds the CPUFreq driver support for SPEAr SOCs.
 
-config ARM_TEGRA_CPUFREQ
-	bool "TEGRA CPUFreq support"
+config ARM_TEGRA20_CPUFREQ
+	bool "Tegra20 CPUFreq support"
 	depends on ARCH_TEGRA
 	default y
 	help
-	  This adds the CPUFreq driver support for TEGRA SOCs.
+	  This adds the CPUFreq driver support for Tegra20 SOCs.
+
+config ARM_TEGRA124_CPUFREQ
+	tristate "Tegra124 CPUFreq support"
+	depends on ARCH_TEGRA && CPUFREQ_DT
+	default y
+	help
+	  This adds the CPUFreq driver support for Tegra124 SOCs.
 
 config ARM_PXA2xx_CPUFREQ
 	tristate "Intel PXA2xx CPUfreq driver"
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 2169bf7..032745d 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -76,7 +76,8 @@
 obj-$(CONFIG_ARM_SA1100_CPUFREQ)	+= sa1100-cpufreq.o
 obj-$(CONFIG_ARM_SA1110_CPUFREQ)	+= sa1110-cpufreq.o
 obj-$(CONFIG_ARM_SPEAR_CPUFREQ)		+= spear-cpufreq.o
-obj-$(CONFIG_ARM_TEGRA_CPUFREQ)		+= tegra-cpufreq.o
+obj-$(CONFIG_ARM_TEGRA20_CPUFREQ)	+= tegra20-cpufreq.o
+obj-$(CONFIG_ARM_TEGRA124_CPUFREQ)	+= tegra124-cpufreq.o
 obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ)	+= vexpress-spc-cpufreq.o
 
 ##################################################################################
diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c
index fc897ba..e362860 100644
--- a/drivers/cpufreq/loongson2_cpufreq.c
+++ b/drivers/cpufreq/loongson2_cpufreq.c
@@ -3,7 +3,7 @@
  *
  * The 2E revision of loongson processor not support this feature.
  *
- * Copyright (C) 2006 - 2008 Lemote Inc. & Insititute of Computing Technology
+ * Copyright (C) 2006 - 2008 Lemote Inc. & Institute of Computing Technology
  * Author: Yanhua, yanh@lemote.com
  *
  * This file is subject to the terms and conditions of the GNU General Public
diff --git a/drivers/cpufreq/tegra124-cpufreq.c b/drivers/cpufreq/tegra124-cpufreq.c
new file mode 100644
index 0000000..20bcceb
--- /dev/null
+++ b/drivers/cpufreq/tegra124-cpufreq.c
@@ -0,0 +1,214 @@
+/*
+ * Tegra 124 cpufreq driver
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/clk.h>
+#include <linux/cpufreq-dt.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+#include <linux/regulator/consumer.h>
+#include <linux/types.h>
+
+struct tegra124_cpufreq_priv {
+	struct regulator *vdd_cpu_reg;
+	struct clk *cpu_clk;
+	struct clk *pllp_clk;
+	struct clk *pllx_clk;
+	struct clk *dfll_clk;
+	struct platform_device *cpufreq_dt_pdev;
+};
+
+static int tegra124_cpu_switch_to_dfll(struct tegra124_cpufreq_priv *priv)
+{
+	struct clk *orig_parent;
+	int ret;
+
+	ret = clk_set_rate(priv->dfll_clk, clk_get_rate(priv->cpu_clk));
+	if (ret)
+		return ret;
+
+	orig_parent = clk_get_parent(priv->cpu_clk);
+	clk_set_parent(priv->cpu_clk, priv->pllp_clk);
+
+	ret = clk_prepare_enable(priv->dfll_clk);
+	if (ret)
+		goto out;
+
+	clk_set_parent(priv->cpu_clk, priv->dfll_clk);
+
+	return 0;
+
+out:
+	clk_set_parent(priv->cpu_clk, orig_parent);
+
+	return ret;
+}
+
+static void tegra124_cpu_switch_to_pllx(struct tegra124_cpufreq_priv *priv)
+{
+	clk_set_parent(priv->cpu_clk, priv->pllp_clk);
+	clk_disable_unprepare(priv->dfll_clk);
+	regulator_sync_voltage(priv->vdd_cpu_reg);
+	clk_set_parent(priv->cpu_clk, priv->pllx_clk);
+}
+
+static struct cpufreq_dt_platform_data cpufreq_dt_pd = {
+	.independent_clocks = false,
+};
+
+static int tegra124_cpufreq_probe(struct platform_device *pdev)
+{
+	struct tegra124_cpufreq_priv *priv;
+	struct device_node *np;
+	struct device *cpu_dev;
+	struct platform_device_info cpufreq_dt_devinfo = {};
+	int ret;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	cpu_dev = get_cpu_device(0);
+	if (!cpu_dev)
+		return -ENODEV;
+
+	np = of_cpu_device_node_get(0);
+	if (!np)
+		return -ENODEV;
+
+	priv->vdd_cpu_reg = regulator_get(cpu_dev, "vdd-cpu");
+	if (IS_ERR(priv->vdd_cpu_reg)) {
+		ret = PTR_ERR(priv->vdd_cpu_reg);
+		goto out_put_np;
+	}
+
+	priv->cpu_clk = of_clk_get_by_name(np, "cpu_g");
+	if (IS_ERR(priv->cpu_clk)) {
+		ret = PTR_ERR(priv->cpu_clk);
+		goto out_put_vdd_cpu_reg;
+	}
+
+	priv->dfll_clk = of_clk_get_by_name(np, "dfll");
+	if (IS_ERR(priv->dfll_clk)) {
+		ret = PTR_ERR(priv->dfll_clk);
+		goto out_put_cpu_clk;
+	}
+
+	priv->pllx_clk = of_clk_get_by_name(np, "pll_x");
+	if (IS_ERR(priv->pllx_clk)) {
+		ret = PTR_ERR(priv->pllx_clk);
+		goto out_put_dfll_clk;
+	}
+
+	priv->pllp_clk = of_clk_get_by_name(np, "pll_p");
+	if (IS_ERR(priv->pllp_clk)) {
+		ret = PTR_ERR(priv->pllp_clk);
+		goto out_put_pllx_clk;
+	}
+
+	ret = tegra124_cpu_switch_to_dfll(priv);
+	if (ret)
+		goto out_put_pllp_clk;
+
+	cpufreq_dt_devinfo.name = "cpufreq-dt";
+	cpufreq_dt_devinfo.parent = &pdev->dev;
+	cpufreq_dt_devinfo.data = &cpufreq_dt_pd;
+	cpufreq_dt_devinfo.size_data = sizeof(cpufreq_dt_pd);
+
+	priv->cpufreq_dt_pdev =
+		platform_device_register_full(&cpufreq_dt_devinfo);
+	if (IS_ERR(priv->cpufreq_dt_pdev)) {
+		ret = PTR_ERR(priv->cpufreq_dt_pdev);
+		goto out_switch_to_pllx;
+	}
+
+	platform_set_drvdata(pdev, priv);
+
+	return 0;
+
+out_switch_to_pllx:
+	tegra124_cpu_switch_to_pllx(priv);
+out_put_pllp_clk:
+	clk_put(priv->pllp_clk);
+out_put_pllx_clk:
+	clk_put(priv->pllx_clk);
+out_put_dfll_clk:
+	clk_put(priv->dfll_clk);
+out_put_cpu_clk:
+	clk_put(priv->cpu_clk);
+out_put_vdd_cpu_reg:
+	regulator_put(priv->vdd_cpu_reg);
+out_put_np:
+	of_node_put(np);
+
+	return ret;
+}
+
+static int tegra124_cpufreq_remove(struct platform_device *pdev)
+{
+	struct tegra124_cpufreq_priv *priv = platform_get_drvdata(pdev);
+
+	platform_device_unregister(priv->cpufreq_dt_pdev);
+	tegra124_cpu_switch_to_pllx(priv);
+
+	clk_put(priv->pllp_clk);
+	clk_put(priv->pllx_clk);
+	clk_put(priv->dfll_clk);
+	clk_put(priv->cpu_clk);
+	regulator_put(priv->vdd_cpu_reg);
+
+	return 0;
+}
+
+static struct platform_driver tegra124_cpufreq_platdrv = {
+	.driver.name	= "cpufreq-tegra124",
+	.probe		= tegra124_cpufreq_probe,
+	.remove		= tegra124_cpufreq_remove,
+};
+
+static int __init tegra_cpufreq_init(void)
+{
+	int ret;
+	struct platform_device *pdev;
+
+	if (!of_machine_is_compatible("nvidia,tegra124"))
+		return -ENODEV;
+
+	/*
+	 * Platform driver+device required for handling EPROBE_DEFER with
+	 * the regulator and the DFLL clock
+	 */
+	ret = platform_driver_register(&tegra124_cpufreq_platdrv);
+	if (ret)
+		return ret;
+
+	pdev = platform_device_register_simple("cpufreq-tegra124", -1, NULL, 0);
+	if (IS_ERR(pdev)) {
+		platform_driver_unregister(&tegra124_cpufreq_platdrv);
+		return PTR_ERR(pdev);
+	}
+
+	return 0;
+}
+module_init(tegra_cpufreq_init);
+
+MODULE_AUTHOR("Tuomas Tynkkynen <ttynkkynen@nvidia.com>");
+MODULE_DESCRIPTION("cpufreq driver for NVIDIA Tegra124");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/cpufreq/tegra-cpufreq.c b/drivers/cpufreq/tegra20-cpufreq.c
similarity index 100%
rename from drivers/cpufreq/tegra-cpufreq.c
rename to drivers/cpufreq/tegra20-cpufreq.c
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 975edb1..ae43b58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -352,7 +352,7 @@
 	if (((int64_t)timeout_ns) < 0)
 		return MAX_SCHEDULE_TIMEOUT;
 
-	timeout = ktime_sub_ns(ktime_get(), timeout_ns);
+	timeout = ktime_sub(ns_to_ktime(timeout_ns), ktime_get());
 	if (ktime_to_ns(timeout) < 0)
 		return 0;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 5cde635..6e77964 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -3403,19 +3403,25 @@
 
 	switch (entry->src_data) {
 	case 0: /* vblank */
-		if (disp_int & interrupt_status_offsets[crtc].vblank) {
+		if (disp_int & interrupt_status_offsets[crtc].vblank)
 			dce_v10_0_crtc_vblank_int_ack(adev, crtc);
-			if (amdgpu_irq_enabled(adev, source, irq_type)) {
-				drm_handle_vblank(adev->ddev, crtc);
-			}
-			DRM_DEBUG("IH: D%d vblank\n", crtc + 1);
+		else
+			DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+		if (amdgpu_irq_enabled(adev, source, irq_type)) {
+			drm_handle_vblank(adev->ddev, crtc);
 		}
+		DRM_DEBUG("IH: D%d vblank\n", crtc + 1);
+
 		break;
 	case 1: /* vline */
-		if (disp_int & interrupt_status_offsets[crtc].vline) {
+		if (disp_int & interrupt_status_offsets[crtc].vline)
 			dce_v10_0_crtc_vline_int_ack(adev, crtc);
-			DRM_DEBUG("IH: D%d vline\n", crtc + 1);
-		}
+		else
+			DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+		DRM_DEBUG("IH: D%d vline\n", crtc + 1);
+
 		break;
 	default:
 		DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data);
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 95efd98..7f7abb0 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -3402,19 +3402,25 @@
 
 	switch (entry->src_data) {
 	case 0: /* vblank */
-		if (disp_int & interrupt_status_offsets[crtc].vblank) {
+		if (disp_int & interrupt_status_offsets[crtc].vblank)
 			dce_v11_0_crtc_vblank_int_ack(adev, crtc);
-			if (amdgpu_irq_enabled(adev, source, irq_type)) {
-				drm_handle_vblank(adev->ddev, crtc);
-			}
-			DRM_DEBUG("IH: D%d vblank\n", crtc + 1);
+		else
+			DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+		if (amdgpu_irq_enabled(adev, source, irq_type)) {
+			drm_handle_vblank(adev->ddev, crtc);
 		}
+		DRM_DEBUG("IH: D%d vblank\n", crtc + 1);
+
 		break;
 	case 1: /* vline */
-		if (disp_int & interrupt_status_offsets[crtc].vline) {
+		if (disp_int & interrupt_status_offsets[crtc].vline)
 			dce_v11_0_crtc_vline_int_ack(adev, crtc);
-			DRM_DEBUG("IH: D%d vline\n", crtc + 1);
-		}
+		else
+			DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+		DRM_DEBUG("IH: D%d vline\n", crtc + 1);
+
 		break;
 	default:
 		DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data);
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index aaca8d6..08387df 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -3237,19 +3237,25 @@
 
 	switch (entry->src_data) {
 	case 0: /* vblank */
-		if (disp_int & interrupt_status_offsets[crtc].vblank) {
+		if (disp_int & interrupt_status_offsets[crtc].vblank)
 			WREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc], LB_VBLANK_STATUS__VBLANK_ACK_MASK);
-			if (amdgpu_irq_enabled(adev, source, irq_type)) {
-				drm_handle_vblank(adev->ddev, crtc);
-			}
-			DRM_DEBUG("IH: D%d vblank\n", crtc + 1);
+		else
+			DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+		if (amdgpu_irq_enabled(adev, source, irq_type)) {
+			drm_handle_vblank(adev->ddev, crtc);
 		}
+		DRM_DEBUG("IH: D%d vblank\n", crtc + 1);
+
 		break;
 	case 1: /* vline */
-		if (disp_int & interrupt_status_offsets[crtc].vline) {
+		if (disp_int & interrupt_status_offsets[crtc].vline)
 			WREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc], LB_VLINE_STATUS__VLINE_ACK_MASK);
-			DRM_DEBUG("IH: D%d vline\n", crtc + 1);
-		}
+		else
+			DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+		DRM_DEBUG("IH: D%d vline\n", crtc + 1);
+
 		break;
 	default:
 		DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 8a1f999..9be0070 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -420,6 +420,12 @@
 	pqm_uninit(&p->pqm);
 
 	pdd = kfd_get_process_device_data(dev, p);
+
+	if (!pdd) {
+		mutex_unlock(&p->mutex);
+		return;
+	}
+
 	if (pdd->reset_wavefronts) {
 		dbgdev_wave_reset_wavefronts(pdd->dev, p);
 		pdd->reset_wavefronts = false;
@@ -431,8 +437,7 @@
 	 * We don't call amd_iommu_unbind_pasid() here
 	 * because the IOMMU called us.
 	 */
-	if (pdd)
-		pdd->bound = false;
+	pdd->bound = false;
 
 	mutex_unlock(&p->mutex);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 8867818..d65cbe6 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -157,9 +157,7 @@
 	struct drm_i915_gem_object *obj;
 	int ret;
 
-	obj = i915_gem_object_create_stolen(dev, size);
-	if (obj == NULL)
-		obj = i915_gem_alloc_object(dev, size);
+	obj = i915_gem_alloc_object(dev, size);
 	if (obj == NULL)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 9daa288..dcc6a88 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2546,6 +2546,8 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj;
 	struct i915_address_space *vm;
+	struct i915_vma *vma;
+	bool flush;
 
 	i915_check_and_clear_faults(dev);
 
@@ -2555,17 +2557,24 @@
 				       dev_priv->gtt.base.total,
 				       true);
 
+	/* Cache flush objects bound into GGTT and rebind them. */
+	vm = &dev_priv->gtt.base;
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-		struct i915_vma *vma = i915_gem_obj_to_vma(obj,
-							   &dev_priv->gtt.base);
-		if (!vma)
-			continue;
+		flush = false;
+		list_for_each_entry(vma, &obj->vma_list, vma_link) {
+			if (vma->vm != vm)
+				continue;
 
-		i915_gem_clflush_object(obj, obj->pin_display);
-		WARN_ON(i915_vma_bind(vma, obj->cache_level, PIN_UPDATE));
+			WARN_ON(i915_vma_bind(vma, obj->cache_level,
+					      PIN_UPDATE));
+
+			flush = true;
+		}
+
+		if (flush)
+			i915_gem_clflush_object(obj, obj->pin_display);
 	}
 
-
 	if (INTEL_INFO(dev)->gen >= 8) {
 		if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
 			chv_setup_private_ppat(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 633bd1f..d61e74a 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -183,8 +183,18 @@
 		if (IS_GEN4(dev)) {
 			uint32_t ddc2 = I915_READ(DCC2);
 
-			if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE))
+			if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE)) {
+				/* Since the swizzling may vary within an
+				 * object, we have no idea what the swizzling
+				 * is for any page in particular. Thus we
+				 * cannot migrate tiled pages using the GPU,
+				 * nor can we tell userspace what the exact
+				 * swizzling is for any object.
+				 */
 				dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
+				swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
+				swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
+			}
 		}
 
 		if (dcc == 0xffffffff) {
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 1b61f98..647b140 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4854,6 +4854,9 @@
 	struct intel_plane *intel_plane;
 	int pipe = intel_crtc->pipe;
 
+	if (!intel_crtc->active)
+		return;
+
 	intel_crtc_wait_for_pending_flips(crtc);
 
 	intel_pre_disable_primary(crtc);
@@ -7887,7 +7890,7 @@
 	int pipe = pipe_config->cpu_transcoder;
 	enum dpio_channel port = vlv_pipe_to_channel(pipe);
 	intel_clock_t clock;
-	u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2;
+	u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2, pll_dw3;
 	int refclk = 100000;
 
 	mutex_lock(&dev_priv->sb_lock);
@@ -7895,10 +7898,13 @@
 	pll_dw0 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW0(port));
 	pll_dw1 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW1(port));
 	pll_dw2 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW2(port));
+	pll_dw3 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW3(port));
 	mutex_unlock(&dev_priv->sb_lock);
 
 	clock.m1 = (pll_dw1 & 0x7) == DPIO_CHV_M1_DIV_BY_2 ? 2 : 0;
-	clock.m2 = ((pll_dw0 & 0xff) << 22) | (pll_dw2 & 0x3fffff);
+	clock.m2 = (pll_dw0 & 0xff) << 22;
+	if (pll_dw3 & DPIO_CHV_FRAC_DIV_EN)
+		clock.m2 |= pll_dw2 & 0x3fffff;
 	clock.n = (pll_dw1 >> DPIO_CHV_N_DIV_SHIFT) & 0xf;
 	clock.p1 = (cmn_dw13 >> DPIO_CHV_P1_DIV_SHIFT) & 0x7;
 	clock.p2 = (cmn_dw13 >> DPIO_CHV_P2_DIV_SHIFT) & 0x1f;
diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
index f2daad8..7841970 100644
--- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
+++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
@@ -285,7 +285,7 @@
 
 	if (wait) {
 		if (!wait_for_completion_timeout(&engine->compl,
-				msecs_to_jiffies(1))) {
+				msecs_to_jiffies(100))) {
 			dev_err(dmm->dev, "timed out waiting for done\n");
 			ret = -ETIMEDOUT;
 		}
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h
index ae2df41..12081e6 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.h
+++ b/drivers/gpu/drm/omapdrm/omap_drv.h
@@ -177,7 +177,7 @@
 		struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos);
 struct drm_gem_object *omap_framebuffer_bo(struct drm_framebuffer *fb, int p);
 int omap_framebuffer_pin(struct drm_framebuffer *fb);
-int omap_framebuffer_unpin(struct drm_framebuffer *fb);
+void omap_framebuffer_unpin(struct drm_framebuffer *fb);
 void omap_framebuffer_update_scanout(struct drm_framebuffer *fb,
 		struct omap_drm_window *win, struct omap_overlay_info *info);
 struct drm_connector *omap_framebuffer_get_next_connector(
@@ -211,7 +211,7 @@
 		enum dma_data_direction dir);
 int omap_gem_get_paddr(struct drm_gem_object *obj,
 		dma_addr_t *paddr, bool remap);
-int omap_gem_put_paddr(struct drm_gem_object *obj);
+void omap_gem_put_paddr(struct drm_gem_object *obj);
 int omap_gem_get_pages(struct drm_gem_object *obj, struct page ***pages,
 		bool remap);
 int omap_gem_put_pages(struct drm_gem_object *obj);
@@ -236,7 +236,7 @@
 	/* PVR needs alignment to 8 pixels.. right now that is the most
 	 * restrictive stride requirement..
 	 */
-	return ALIGN(pitch, 8 * bytespp);
+	return roundup(pitch, 8 * bytespp);
 }
 
 /* map crtc to vblank mask */
diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c
index 0b967e7..51b1219 100644
--- a/drivers/gpu/drm/omapdrm/omap_fb.c
+++ b/drivers/gpu/drm/omapdrm/omap_fb.c
@@ -287,10 +287,10 @@
 }
 
 /* unpin, no longer being scanned out: */
-int omap_framebuffer_unpin(struct drm_framebuffer *fb)
+void omap_framebuffer_unpin(struct drm_framebuffer *fb)
 {
 	struct omap_framebuffer *omap_fb = to_omap_framebuffer(fb);
-	int ret, i, n = drm_format_num_planes(fb->pixel_format);
+	int i, n = drm_format_num_planes(fb->pixel_format);
 
 	mutex_lock(&omap_fb->lock);
 
@@ -298,24 +298,16 @@
 
 	if (omap_fb->pin_count > 0) {
 		mutex_unlock(&omap_fb->lock);
-		return 0;
+		return;
 	}
 
 	for (i = 0; i < n; i++) {
 		struct plane *plane = &omap_fb->planes[i];
-		ret = omap_gem_put_paddr(plane->bo);
-		if (ret)
-			goto fail;
+		omap_gem_put_paddr(plane->bo);
 		plane->paddr = 0;
 	}
 
 	mutex_unlock(&omap_fb->lock);
-
-	return 0;
-
-fail:
-	mutex_unlock(&omap_fb->lock);
-	return ret;
 }
 
 struct drm_gem_object *omap_framebuffer_bo(struct drm_framebuffer *fb, int p)
diff --git a/drivers/gpu/drm/omapdrm/omap_fbdev.c b/drivers/gpu/drm/omapdrm/omap_fbdev.c
index 23b5a84..720d16b 100644
--- a/drivers/gpu/drm/omapdrm/omap_fbdev.c
+++ b/drivers/gpu/drm/omapdrm/omap_fbdev.c
@@ -135,7 +135,7 @@
 	fbdev->ywrap_enabled = priv->has_dmm && ywrap_enabled;
 	if (fbdev->ywrap_enabled) {
 		/* need to align pitch to page size if using DMM scrolling */
-		mode_cmd.pitches[0] = ALIGN(mode_cmd.pitches[0], PAGE_SIZE);
+		mode_cmd.pitches[0] = PAGE_ALIGN(mode_cmd.pitches[0]);
 	}
 
 	/* allocate backing bo */
diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c
index 2ab7780..7ed08fdc 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem.c
@@ -808,10 +808,10 @@
 /* Release physical address, when DMA is no longer being performed.. this
  * could potentially unpin and unmap buffers from TILER
  */
-int omap_gem_put_paddr(struct drm_gem_object *obj)
+void omap_gem_put_paddr(struct drm_gem_object *obj)
 {
 	struct omap_gem_object *omap_obj = to_omap_bo(obj);
-	int ret = 0;
+	int ret;
 
 	mutex_lock(&obj->dev->struct_mutex);
 	if (omap_obj->paddr_cnt > 0) {
@@ -821,7 +821,6 @@
 			if (ret) {
 				dev_err(obj->dev->dev,
 					"could not unpin pages: %d\n", ret);
-				goto fail;
 			}
 			ret = tiler_release(omap_obj->block);
 			if (ret) {
@@ -832,9 +831,8 @@
 			omap_obj->block = NULL;
 		}
 	}
-fail:
+
 	mutex_unlock(&obj->dev->struct_mutex);
-	return ret;
 }
 
 /* Get rotated scanout address (only valid if already pinned), at the
@@ -1378,11 +1376,7 @@
 
 	omap_obj = kzalloc(sizeof(*omap_obj), GFP_KERNEL);
 	if (!omap_obj)
-		goto fail;
-
-	spin_lock(&priv->list_lock);
-	list_add(&omap_obj->mm_list, &priv->obj_list);
-	spin_unlock(&priv->list_lock);
+		return NULL;
 
 	obj = &omap_obj->base;
 
@@ -1392,11 +1386,19 @@
 		 */
 		omap_obj->vaddr =  dma_alloc_writecombine(dev->dev, size,
 				&omap_obj->paddr, GFP_KERNEL);
-		if (omap_obj->vaddr)
-			flags |= OMAP_BO_DMA;
+		if (!omap_obj->vaddr) {
+			kfree(omap_obj);
 
+			return NULL;
+		}
+
+		flags |= OMAP_BO_DMA;
 	}
 
+	spin_lock(&priv->list_lock);
+	list_add(&omap_obj->mm_list, &priv->obj_list);
+	spin_unlock(&priv->list_lock);
+
 	omap_obj->flags = flags;
 
 	if (flags & OMAP_BO_TILED) {
diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c
index cfa8276..0989046 100644
--- a/drivers/gpu/drm/omapdrm/omap_plane.c
+++ b/drivers/gpu/drm/omapdrm/omap_plane.c
@@ -17,6 +17,7 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_plane_helper.h>
 
@@ -153,9 +154,34 @@
 	dispc_ovl_enable(omap_plane->id, false);
 }
 
+static int omap_plane_atomic_check(struct drm_plane *plane,
+				   struct drm_plane_state *state)
+{
+	struct drm_crtc_state *crtc_state;
+
+	if (!state->crtc)
+		return 0;
+
+	crtc_state = drm_atomic_get_crtc_state(state->state, state->crtc);
+	if (IS_ERR(crtc_state))
+		return PTR_ERR(crtc_state);
+
+	if (state->crtc_x < 0 || state->crtc_y < 0)
+		return -EINVAL;
+
+	if (state->crtc_x + state->crtc_w > crtc_state->adjusted_mode.hdisplay)
+		return -EINVAL;
+
+	if (state->crtc_y + state->crtc_h > crtc_state->adjusted_mode.vdisplay)
+		return -EINVAL;
+
+	return 0;
+}
+
 static const struct drm_plane_helper_funcs omap_plane_helper_funcs = {
 	.prepare_fb = omap_plane_prepare_fb,
 	.cleanup_fb = omap_plane_cleanup_fb,
+	.atomic_check = omap_plane_atomic_check,
 	.atomic_update = omap_plane_atomic_update,
 	.atomic_disable = omap_plane_atomic_disable,
 };
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 4ecf5ca..248953d 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -7964,23 +7964,27 @@
 		case 1: /* D1 vblank/vline */
 			switch (src_data) {
 			case 0: /* D1 vblank */
-				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
-					if (rdev->irq.crtc_vblank_int[0]) {
-						drm_handle_vblank(rdev->ddev, 0);
-						rdev->pm.vblank_sync = true;
-						wake_up(&rdev->irq.vblank_queue);
-					}
-					if (atomic_read(&rdev->irq.pflip[0]))
-						radeon_crtc_handle_vblank(rdev, 0);
-					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
-					DRM_DEBUG("IH: D1 vblank\n");
+				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				if (rdev->irq.crtc_vblank_int[0]) {
+					drm_handle_vblank(rdev->ddev, 0);
+					rdev->pm.vblank_sync = true;
+					wake_up(&rdev->irq.vblank_queue);
 				}
+				if (atomic_read(&rdev->irq.pflip[0]))
+					radeon_crtc_handle_vblank(rdev, 0);
+				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
+				DRM_DEBUG("IH: D1 vblank\n");
+
 				break;
 			case 1: /* D1 vline */
-				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
-					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
-					DRM_DEBUG("IH: D1 vline\n");
-				}
+				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
+				DRM_DEBUG("IH: D1 vline\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -7990,23 +7994,27 @@
 		case 2: /* D2 vblank/vline */
 			switch (src_data) {
 			case 0: /* D2 vblank */
-				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
-					if (rdev->irq.crtc_vblank_int[1]) {
-						drm_handle_vblank(rdev->ddev, 1);
-						rdev->pm.vblank_sync = true;
-						wake_up(&rdev->irq.vblank_queue);
-					}
-					if (atomic_read(&rdev->irq.pflip[1]))
-						radeon_crtc_handle_vblank(rdev, 1);
-					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
-					DRM_DEBUG("IH: D2 vblank\n");
+				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				if (rdev->irq.crtc_vblank_int[1]) {
+					drm_handle_vblank(rdev->ddev, 1);
+					rdev->pm.vblank_sync = true;
+					wake_up(&rdev->irq.vblank_queue);
 				}
+				if (atomic_read(&rdev->irq.pflip[1]))
+					radeon_crtc_handle_vblank(rdev, 1);
+				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
+				DRM_DEBUG("IH: D2 vblank\n");
+
 				break;
 			case 1: /* D2 vline */
-				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
-					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
-					DRM_DEBUG("IH: D2 vline\n");
-				}
+				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
+				DRM_DEBUG("IH: D2 vline\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -8016,23 +8024,27 @@
 		case 3: /* D3 vblank/vline */
 			switch (src_data) {
 			case 0: /* D3 vblank */
-				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
-					if (rdev->irq.crtc_vblank_int[2]) {
-						drm_handle_vblank(rdev->ddev, 2);
-						rdev->pm.vblank_sync = true;
-						wake_up(&rdev->irq.vblank_queue);
-					}
-					if (atomic_read(&rdev->irq.pflip[2]))
-						radeon_crtc_handle_vblank(rdev, 2);
-					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
-					DRM_DEBUG("IH: D3 vblank\n");
+				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				if (rdev->irq.crtc_vblank_int[2]) {
+					drm_handle_vblank(rdev->ddev, 2);
+					rdev->pm.vblank_sync = true;
+					wake_up(&rdev->irq.vblank_queue);
 				}
+				if (atomic_read(&rdev->irq.pflip[2]))
+					radeon_crtc_handle_vblank(rdev, 2);
+				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
+				DRM_DEBUG("IH: D3 vblank\n");
+
 				break;
 			case 1: /* D3 vline */
-				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
-					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
-					DRM_DEBUG("IH: D3 vline\n");
-				}
+				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
+				DRM_DEBUG("IH: D3 vline\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -8042,23 +8054,27 @@
 		case 4: /* D4 vblank/vline */
 			switch (src_data) {
 			case 0: /* D4 vblank */
-				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
-					if (rdev->irq.crtc_vblank_int[3]) {
-						drm_handle_vblank(rdev->ddev, 3);
-						rdev->pm.vblank_sync = true;
-						wake_up(&rdev->irq.vblank_queue);
-					}
-					if (atomic_read(&rdev->irq.pflip[3]))
-						radeon_crtc_handle_vblank(rdev, 3);
-					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
-					DRM_DEBUG("IH: D4 vblank\n");
+				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				if (rdev->irq.crtc_vblank_int[3]) {
+					drm_handle_vblank(rdev->ddev, 3);
+					rdev->pm.vblank_sync = true;
+					wake_up(&rdev->irq.vblank_queue);
 				}
+				if (atomic_read(&rdev->irq.pflip[3]))
+					radeon_crtc_handle_vblank(rdev, 3);
+				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
+				DRM_DEBUG("IH: D4 vblank\n");
+
 				break;
 			case 1: /* D4 vline */
-				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
-					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
-					DRM_DEBUG("IH: D4 vline\n");
-				}
+				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
+				DRM_DEBUG("IH: D4 vline\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -8068,23 +8084,27 @@
 		case 5: /* D5 vblank/vline */
 			switch (src_data) {
 			case 0: /* D5 vblank */
-				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
-					if (rdev->irq.crtc_vblank_int[4]) {
-						drm_handle_vblank(rdev->ddev, 4);
-						rdev->pm.vblank_sync = true;
-						wake_up(&rdev->irq.vblank_queue);
-					}
-					if (atomic_read(&rdev->irq.pflip[4]))
-						radeon_crtc_handle_vblank(rdev, 4);
-					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
-					DRM_DEBUG("IH: D5 vblank\n");
+				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				if (rdev->irq.crtc_vblank_int[4]) {
+					drm_handle_vblank(rdev->ddev, 4);
+					rdev->pm.vblank_sync = true;
+					wake_up(&rdev->irq.vblank_queue);
 				}
+				if (atomic_read(&rdev->irq.pflip[4]))
+					radeon_crtc_handle_vblank(rdev, 4);
+				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
+				DRM_DEBUG("IH: D5 vblank\n");
+
 				break;
 			case 1: /* D5 vline */
-				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
-					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
-					DRM_DEBUG("IH: D5 vline\n");
-				}
+				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
+				DRM_DEBUG("IH: D5 vline\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -8094,23 +8114,27 @@
 		case 6: /* D6 vblank/vline */
 			switch (src_data) {
 			case 0: /* D6 vblank */
-				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
-					if (rdev->irq.crtc_vblank_int[5]) {
-						drm_handle_vblank(rdev->ddev, 5);
-						rdev->pm.vblank_sync = true;
-						wake_up(&rdev->irq.vblank_queue);
-					}
-					if (atomic_read(&rdev->irq.pflip[5]))
-						radeon_crtc_handle_vblank(rdev, 5);
-					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
-					DRM_DEBUG("IH: D6 vblank\n");
+				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				if (rdev->irq.crtc_vblank_int[5]) {
+					drm_handle_vblank(rdev->ddev, 5);
+					rdev->pm.vblank_sync = true;
+					wake_up(&rdev->irq.vblank_queue);
 				}
+				if (atomic_read(&rdev->irq.pflip[5]))
+					radeon_crtc_handle_vblank(rdev, 5);
+				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
+				DRM_DEBUG("IH: D6 vblank\n");
+
 				break;
 			case 1: /* D6 vline */
-				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
-					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
-					DRM_DEBUG("IH: D6 vline\n");
-				}
+				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
+				DRM_DEBUG("IH: D6 vline\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -8130,88 +8154,112 @@
 		case 42: /* HPD hotplug */
 			switch (src_data) {
 			case 0:
-				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
-					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD1\n");
-				}
+				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD1\n");
+
 				break;
 			case 1:
-				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
-					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD2\n");
-				}
+				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD2\n");
+
 				break;
 			case 2:
-				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
-					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD3\n");
-				}
+				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD3\n");
+
 				break;
 			case 3:
-				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
-					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD4\n");
-				}
+				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD4\n");
+
 				break;
 			case 4:
-				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
-					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD5\n");
-				}
+				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD5\n");
+
 				break;
 			case 5:
-				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
-					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD6\n");
-				}
+				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD6\n");
+
 				break;
 			case 6:
-				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
-					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
-					queue_dp = true;
-					DRM_DEBUG("IH: HPD_RX 1\n");
-				}
+				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
+				queue_dp = true;
+				DRM_DEBUG("IH: HPD_RX 1\n");
+
 				break;
 			case 7:
-				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
-					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
-					queue_dp = true;
-					DRM_DEBUG("IH: HPD_RX 2\n");
-				}
+				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
+				queue_dp = true;
+				DRM_DEBUG("IH: HPD_RX 2\n");
+
 				break;
 			case 8:
-				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
-					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
-					queue_dp = true;
-					DRM_DEBUG("IH: HPD_RX 3\n");
-				}
+				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
+				queue_dp = true;
+				DRM_DEBUG("IH: HPD_RX 3\n");
+
 				break;
 			case 9:
-				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
-					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
-					queue_dp = true;
-					DRM_DEBUG("IH: HPD_RX 4\n");
-				}
+				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
+				queue_dp = true;
+				DRM_DEBUG("IH: HPD_RX 4\n");
+
 				break;
 			case 10:
-				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
-					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
-					queue_dp = true;
-					DRM_DEBUG("IH: HPD_RX 5\n");
-				}
+				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
+				queue_dp = true;
+				DRM_DEBUG("IH: HPD_RX 5\n");
+
 				break;
 			case 11:
-				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
-					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
-					queue_dp = true;
-					DRM_DEBUG("IH: HPD_RX 6\n");
-				}
+				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
+				queue_dp = true;
+				DRM_DEBUG("IH: HPD_RX 6\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 3a6d483..0acde19 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -4924,7 +4924,7 @@
 		return IRQ_NONE;
 
 	rptr = rdev->ih.rptr;
-	DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
+	DRM_DEBUG("evergreen_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
 
 	/* Order reading of wptr vs. reading of IH ring data */
 	rmb();
@@ -4942,23 +4942,27 @@
 		case 1: /* D1 vblank/vline */
 			switch (src_data) {
 			case 0: /* D1 vblank */
-				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
-					if (rdev->irq.crtc_vblank_int[0]) {
-						drm_handle_vblank(rdev->ddev, 0);
-						rdev->pm.vblank_sync = true;
-						wake_up(&rdev->irq.vblank_queue);
-					}
-					if (atomic_read(&rdev->irq.pflip[0]))
-						radeon_crtc_handle_vblank(rdev, 0);
-					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
-					DRM_DEBUG("IH: D1 vblank\n");
+				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT))
+					DRM_DEBUG("IH: D1 vblank - IH event w/o asserted irq bit?\n");
+
+				if (rdev->irq.crtc_vblank_int[0]) {
+					drm_handle_vblank(rdev->ddev, 0);
+					rdev->pm.vblank_sync = true;
+					wake_up(&rdev->irq.vblank_queue);
 				}
+				if (atomic_read(&rdev->irq.pflip[0]))
+					radeon_crtc_handle_vblank(rdev, 0);
+				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
+				DRM_DEBUG("IH: D1 vblank\n");
+
 				break;
 			case 1: /* D1 vline */
-				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
-					DRM_DEBUG("IH: D1 vline\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT))
+					DRM_DEBUG("IH: D1 vline - IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
+				DRM_DEBUG("IH: D1 vline\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -4968,23 +4972,27 @@
 		case 2: /* D2 vblank/vline */
 			switch (src_data) {
 			case 0: /* D2 vblank */
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
-					if (rdev->irq.crtc_vblank_int[1]) {
-						drm_handle_vblank(rdev->ddev, 1);
-						rdev->pm.vblank_sync = true;
-						wake_up(&rdev->irq.vblank_queue);
-					}
-					if (atomic_read(&rdev->irq.pflip[1]))
-						radeon_crtc_handle_vblank(rdev, 1);
-					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
-					DRM_DEBUG("IH: D2 vblank\n");
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
+					DRM_DEBUG("IH: D2 vblank - IH event w/o asserted irq bit?\n");
+
+				if (rdev->irq.crtc_vblank_int[1]) {
+					drm_handle_vblank(rdev->ddev, 1);
+					rdev->pm.vblank_sync = true;
+					wake_up(&rdev->irq.vblank_queue);
 				}
+				if (atomic_read(&rdev->irq.pflip[1]))
+					radeon_crtc_handle_vblank(rdev, 1);
+				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
+				DRM_DEBUG("IH: D2 vblank\n");
+
 				break;
 			case 1: /* D2 vline */
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
-					DRM_DEBUG("IH: D2 vline\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT))
+					DRM_DEBUG("IH: D2 vline - IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
+				DRM_DEBUG("IH: D2 vline\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -4994,23 +5002,27 @@
 		case 3: /* D3 vblank/vline */
 			switch (src_data) {
 			case 0: /* D3 vblank */
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
-					if (rdev->irq.crtc_vblank_int[2]) {
-						drm_handle_vblank(rdev->ddev, 2);
-						rdev->pm.vblank_sync = true;
-						wake_up(&rdev->irq.vblank_queue);
-					}
-					if (atomic_read(&rdev->irq.pflip[2]))
-						radeon_crtc_handle_vblank(rdev, 2);
-					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
-					DRM_DEBUG("IH: D3 vblank\n");
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
+					DRM_DEBUG("IH: D3 vblank - IH event w/o asserted irq bit?\n");
+
+				if (rdev->irq.crtc_vblank_int[2]) {
+					drm_handle_vblank(rdev->ddev, 2);
+					rdev->pm.vblank_sync = true;
+					wake_up(&rdev->irq.vblank_queue);
 				}
+				if (atomic_read(&rdev->irq.pflip[2]))
+					radeon_crtc_handle_vblank(rdev, 2);
+				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
+				DRM_DEBUG("IH: D3 vblank\n");
+
 				break;
 			case 1: /* D3 vline */
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
-					DRM_DEBUG("IH: D3 vline\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
+					DRM_DEBUG("IH: D3 vline - IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
+				DRM_DEBUG("IH: D3 vline\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -5020,23 +5032,27 @@
 		case 4: /* D4 vblank/vline */
 			switch (src_data) {
 			case 0: /* D4 vblank */
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
-					if (rdev->irq.crtc_vblank_int[3]) {
-						drm_handle_vblank(rdev->ddev, 3);
-						rdev->pm.vblank_sync = true;
-						wake_up(&rdev->irq.vblank_queue);
-					}
-					if (atomic_read(&rdev->irq.pflip[3]))
-						radeon_crtc_handle_vblank(rdev, 3);
-					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
-					DRM_DEBUG("IH: D4 vblank\n");
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
+					DRM_DEBUG("IH: D4 vblank - IH event w/o asserted irq bit?\n");
+
+				if (rdev->irq.crtc_vblank_int[3]) {
+					drm_handle_vblank(rdev->ddev, 3);
+					rdev->pm.vblank_sync = true;
+					wake_up(&rdev->irq.vblank_queue);
 				}
+				if (atomic_read(&rdev->irq.pflip[3]))
+					radeon_crtc_handle_vblank(rdev, 3);
+				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
+				DRM_DEBUG("IH: D4 vblank\n");
+
 				break;
 			case 1: /* D4 vline */
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
-					DRM_DEBUG("IH: D4 vline\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
+					DRM_DEBUG("IH: D4 vline - IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
+				DRM_DEBUG("IH: D4 vline\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -5046,23 +5062,27 @@
 		case 5: /* D5 vblank/vline */
 			switch (src_data) {
 			case 0: /* D5 vblank */
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
-					if (rdev->irq.crtc_vblank_int[4]) {
-						drm_handle_vblank(rdev->ddev, 4);
-						rdev->pm.vblank_sync = true;
-						wake_up(&rdev->irq.vblank_queue);
-					}
-					if (atomic_read(&rdev->irq.pflip[4]))
-						radeon_crtc_handle_vblank(rdev, 4);
-					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
-					DRM_DEBUG("IH: D5 vblank\n");
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
+					DRM_DEBUG("IH: D5 vblank - IH event w/o asserted irq bit?\n");
+
+				if (rdev->irq.crtc_vblank_int[4]) {
+					drm_handle_vblank(rdev->ddev, 4);
+					rdev->pm.vblank_sync = true;
+					wake_up(&rdev->irq.vblank_queue);
 				}
+				if (atomic_read(&rdev->irq.pflip[4]))
+					radeon_crtc_handle_vblank(rdev, 4);
+				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
+				DRM_DEBUG("IH: D5 vblank\n");
+
 				break;
 			case 1: /* D5 vline */
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
-					DRM_DEBUG("IH: D5 vline\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
+					DRM_DEBUG("IH: D5 vline - IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
+				DRM_DEBUG("IH: D5 vline\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -5072,23 +5092,27 @@
 		case 6: /* D6 vblank/vline */
 			switch (src_data) {
 			case 0: /* D6 vblank */
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
-					if (rdev->irq.crtc_vblank_int[5]) {
-						drm_handle_vblank(rdev->ddev, 5);
-						rdev->pm.vblank_sync = true;
-						wake_up(&rdev->irq.vblank_queue);
-					}
-					if (atomic_read(&rdev->irq.pflip[5]))
-						radeon_crtc_handle_vblank(rdev, 5);
-					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
-					DRM_DEBUG("IH: D6 vblank\n");
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
+					DRM_DEBUG("IH: D6 vblank - IH event w/o asserted irq bit?\n");
+
+				if (rdev->irq.crtc_vblank_int[5]) {
+					drm_handle_vblank(rdev->ddev, 5);
+					rdev->pm.vblank_sync = true;
+					wake_up(&rdev->irq.vblank_queue);
 				}
+				if (atomic_read(&rdev->irq.pflip[5]))
+					radeon_crtc_handle_vblank(rdev, 5);
+				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
+				DRM_DEBUG("IH: D6 vblank\n");
+
 				break;
 			case 1: /* D6 vline */
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
-					DRM_DEBUG("IH: D6 vline\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
+					DRM_DEBUG("IH: D6 vline - IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
+				DRM_DEBUG("IH: D6 vline\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -5108,88 +5132,100 @@
 		case 42: /* HPD hotplug */
 			switch (src_data) {
 			case 0:
-				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD1\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD1\n");
 				break;
 			case 1:
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD2\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD2\n");
 				break;
 			case 2:
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD3\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD3\n");
 				break;
 			case 3:
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD4\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD4\n");
 				break;
 			case 4:
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD5\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD5\n");
 				break;
 			case 5:
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD6\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD6\n");
 				break;
 			case 6:
-				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT;
-					queue_dp = true;
-					DRM_DEBUG("IH: HPD_RX 1\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT;
+				queue_dp = true;
+				DRM_DEBUG("IH: HPD_RX 1\n");
 				break;
 			case 7:
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
-					queue_dp = true;
-					DRM_DEBUG("IH: HPD_RX 2\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
+				queue_dp = true;
+				DRM_DEBUG("IH: HPD_RX 2\n");
 				break;
 			case 8:
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
-					queue_dp = true;
-					DRM_DEBUG("IH: HPD_RX 3\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
+				queue_dp = true;
+				DRM_DEBUG("IH: HPD_RX 3\n");
 				break;
 			case 9:
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
-					queue_dp = true;
-					DRM_DEBUG("IH: HPD_RX 4\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
+				queue_dp = true;
+				DRM_DEBUG("IH: HPD_RX 4\n");
 				break;
 			case 10:
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
-					queue_dp = true;
-					DRM_DEBUG("IH: HPD_RX 5\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
+				queue_dp = true;
+				DRM_DEBUG("IH: HPD_RX 5\n");
 				break;
 			case 11:
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
-					queue_dp = true;
-					DRM_DEBUG("IH: HPD_RX 6\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
+				queue_dp = true;
+				DRM_DEBUG("IH: HPD_RX 6\n");
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -5199,46 +5235,52 @@
 		case 44: /* hdmi */
 			switch (src_data) {
 			case 0:
-				if (rdev->irq.stat_regs.evergreen.afmt_status1 & AFMT_AZ_FORMAT_WTRIG) {
-					rdev->irq.stat_regs.evergreen.afmt_status1 &= ~AFMT_AZ_FORMAT_WTRIG;
-					queue_hdmi = true;
-					DRM_DEBUG("IH: HDMI0\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.afmt_status1 & AFMT_AZ_FORMAT_WTRIG))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.afmt_status1 &= ~AFMT_AZ_FORMAT_WTRIG;
+				queue_hdmi = true;
+				DRM_DEBUG("IH: HDMI0\n");
 				break;
 			case 1:
-				if (rdev->irq.stat_regs.evergreen.afmt_status2 & AFMT_AZ_FORMAT_WTRIG) {
-					rdev->irq.stat_regs.evergreen.afmt_status2 &= ~AFMT_AZ_FORMAT_WTRIG;
-					queue_hdmi = true;
-					DRM_DEBUG("IH: HDMI1\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.afmt_status2 & AFMT_AZ_FORMAT_WTRIG))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.afmt_status2 &= ~AFMT_AZ_FORMAT_WTRIG;
+				queue_hdmi = true;
+				DRM_DEBUG("IH: HDMI1\n");
 				break;
 			case 2:
-				if (rdev->irq.stat_regs.evergreen.afmt_status3 & AFMT_AZ_FORMAT_WTRIG) {
-					rdev->irq.stat_regs.evergreen.afmt_status3 &= ~AFMT_AZ_FORMAT_WTRIG;
-					queue_hdmi = true;
-					DRM_DEBUG("IH: HDMI2\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.afmt_status3 & AFMT_AZ_FORMAT_WTRIG))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.afmt_status3 &= ~AFMT_AZ_FORMAT_WTRIG;
+				queue_hdmi = true;
+				DRM_DEBUG("IH: HDMI2\n");
 				break;
 			case 3:
-				if (rdev->irq.stat_regs.evergreen.afmt_status4 & AFMT_AZ_FORMAT_WTRIG) {
-					rdev->irq.stat_regs.evergreen.afmt_status4 &= ~AFMT_AZ_FORMAT_WTRIG;
-					queue_hdmi = true;
-					DRM_DEBUG("IH: HDMI3\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.afmt_status4 & AFMT_AZ_FORMAT_WTRIG))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.afmt_status4 &= ~AFMT_AZ_FORMAT_WTRIG;
+				queue_hdmi = true;
+				DRM_DEBUG("IH: HDMI3\n");
 				break;
 			case 4:
-				if (rdev->irq.stat_regs.evergreen.afmt_status5 & AFMT_AZ_FORMAT_WTRIG) {
-					rdev->irq.stat_regs.evergreen.afmt_status5 &= ~AFMT_AZ_FORMAT_WTRIG;
-					queue_hdmi = true;
-					DRM_DEBUG("IH: HDMI4\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.afmt_status5 & AFMT_AZ_FORMAT_WTRIG))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.afmt_status5 &= ~AFMT_AZ_FORMAT_WTRIG;
+				queue_hdmi = true;
+				DRM_DEBUG("IH: HDMI4\n");
 				break;
 			case 5:
-				if (rdev->irq.stat_regs.evergreen.afmt_status6 & AFMT_AZ_FORMAT_WTRIG) {
-					rdev->irq.stat_regs.evergreen.afmt_status6 &= ~AFMT_AZ_FORMAT_WTRIG;
-					queue_hdmi = true;
-					DRM_DEBUG("IH: HDMI5\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.afmt_status6 & AFMT_AZ_FORMAT_WTRIG))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.afmt_status6 &= ~AFMT_AZ_FORMAT_WTRIG;
+				queue_hdmi = true;
+				DRM_DEBUG("IH: HDMI5\n");
 				break;
 			default:
 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 8e5aeeb..158872e 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -2162,18 +2162,20 @@
 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
 	}
 
-	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
-	if (ring->ring_size)
-		r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0);
+	if (rdev->family == CHIP_ARUBA) {
+		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
+		if (ring->ring_size)
+			r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0);
 
-	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
-	if (ring->ring_size)
-		r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0);
+		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
+		if (ring->ring_size)
+			r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0);
 
-	if (!r)
-		r = vce_v1_0_init(rdev);
-	else if (r != -ENOENT)
-		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
+		if (!r)
+			r = vce_v1_0_init(rdev);
+		if (r)
+			DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
+	}
 
 	r = radeon_ib_pool_init(rdev);
 	if (r) {
@@ -2396,7 +2398,8 @@
 	radeon_irq_kms_fini(rdev);
 	uvd_v1_0_fini(rdev);
 	radeon_uvd_fini(rdev);
-	radeon_vce_fini(rdev);
+	if (rdev->family == CHIP_ARUBA)
+		radeon_vce_fini(rdev);
 	cayman_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 35dafd7..4ea5b10 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -4086,23 +4086,27 @@
 		case 1: /* D1 vblank/vline */
 			switch (src_data) {
 			case 0: /* D1 vblank */
-				if (rdev->irq.stat_regs.r600.disp_int & LB_D1_VBLANK_INTERRUPT) {
-					if (rdev->irq.crtc_vblank_int[0]) {
-						drm_handle_vblank(rdev->ddev, 0);
-						rdev->pm.vblank_sync = true;
-						wake_up(&rdev->irq.vblank_queue);
-					}
-					if (atomic_read(&rdev->irq.pflip[0]))
-						radeon_crtc_handle_vblank(rdev, 0);
-					rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
-					DRM_DEBUG("IH: D1 vblank\n");
+				if (!(rdev->irq.stat_regs.r600.disp_int & LB_D1_VBLANK_INTERRUPT))
+					DRM_DEBUG("IH: D1 vblank - IH event w/o asserted irq bit?\n");
+
+				if (rdev->irq.crtc_vblank_int[0]) {
+					drm_handle_vblank(rdev->ddev, 0);
+					rdev->pm.vblank_sync = true;
+					wake_up(&rdev->irq.vblank_queue);
 				}
+				if (atomic_read(&rdev->irq.pflip[0]))
+					radeon_crtc_handle_vblank(rdev, 0);
+				rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
+				DRM_DEBUG("IH: D1 vblank\n");
+
 				break;
 			case 1: /* D1 vline */
-				if (rdev->irq.stat_regs.r600.disp_int & LB_D1_VLINE_INTERRUPT) {
-					rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VLINE_INTERRUPT;
-					DRM_DEBUG("IH: D1 vline\n");
-				}
+				if (!(rdev->irq.stat_regs.r600.disp_int & LB_D1_VLINE_INTERRUPT))
+				    DRM_DEBUG("IH: D1 vline - IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VLINE_INTERRUPT;
+				DRM_DEBUG("IH: D1 vline\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -4112,23 +4116,27 @@
 		case 5: /* D2 vblank/vline */
 			switch (src_data) {
 			case 0: /* D2 vblank */
-				if (rdev->irq.stat_regs.r600.disp_int & LB_D2_VBLANK_INTERRUPT) {
-					if (rdev->irq.crtc_vblank_int[1]) {
-						drm_handle_vblank(rdev->ddev, 1);
-						rdev->pm.vblank_sync = true;
-						wake_up(&rdev->irq.vblank_queue);
-					}
-					if (atomic_read(&rdev->irq.pflip[1]))
-						radeon_crtc_handle_vblank(rdev, 1);
-					rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT;
-					DRM_DEBUG("IH: D2 vblank\n");
+				if (!(rdev->irq.stat_regs.r600.disp_int & LB_D2_VBLANK_INTERRUPT))
+					DRM_DEBUG("IH: D2 vblank - IH event w/o asserted irq bit?\n");
+
+				if (rdev->irq.crtc_vblank_int[1]) {
+					drm_handle_vblank(rdev->ddev, 1);
+					rdev->pm.vblank_sync = true;
+					wake_up(&rdev->irq.vblank_queue);
 				}
+				if (atomic_read(&rdev->irq.pflip[1]))
+					radeon_crtc_handle_vblank(rdev, 1);
+				rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT;
+				DRM_DEBUG("IH: D2 vblank\n");
+
 				break;
 			case 1: /* D1 vline */
-				if (rdev->irq.stat_regs.r600.disp_int & LB_D2_VLINE_INTERRUPT) {
-					rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VLINE_INTERRUPT;
-					DRM_DEBUG("IH: D2 vline\n");
-				}
+				if (!(rdev->irq.stat_regs.r600.disp_int & LB_D2_VLINE_INTERRUPT))
+					DRM_DEBUG("IH: D2 vline - IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VLINE_INTERRUPT;
+				DRM_DEBUG("IH: D2 vline\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -4148,46 +4156,53 @@
 		case 19: /* HPD/DAC hotplug */
 			switch (src_data) {
 			case 0:
-				if (rdev->irq.stat_regs.r600.disp_int & DC_HPD1_INTERRUPT) {
-					rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD1_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD1\n");
-				}
+				if (!(rdev->irq.stat_regs.r600.disp_int & DC_HPD1_INTERRUPT))
+					DRM_DEBUG("IH: HPD1 - IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD1_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD1\n");
 				break;
 			case 1:
-				if (rdev->irq.stat_regs.r600.disp_int & DC_HPD2_INTERRUPT) {
-					rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD2_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD2\n");
-				}
+				if (!(rdev->irq.stat_regs.r600.disp_int & DC_HPD2_INTERRUPT))
+					DRM_DEBUG("IH: HPD2 - IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD2_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD2\n");
 				break;
 			case 4:
-				if (rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD3_INTERRUPT) {
-					rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD3_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD3\n");
-				}
+				if (!(rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD3_INTERRUPT))
+					DRM_DEBUG("IH: HPD3 - IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD3_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD3\n");
 				break;
 			case 5:
-				if (rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD4_INTERRUPT) {
-					rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD4_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD4\n");
-				}
+				if (!(rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD4_INTERRUPT))
+					DRM_DEBUG("IH: HPD4 - IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD4_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD4\n");
 				break;
 			case 10:
-				if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD5_INTERRUPT) {
-					rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD5_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD5\n");
-				}
+				if (!(rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD5_INTERRUPT))
+					DRM_DEBUG("IH: HPD5 - IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD5_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD5\n");
 				break;
 			case 12:
-				if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT) {
-					rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD6_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD6\n");
-				}
+				if (!(rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT))
+					DRM_DEBUG("IH: HPD6 - IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD6_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD6\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -4197,18 +4212,22 @@
 		case 21: /* hdmi */
 			switch (src_data) {
 			case 4:
-				if (rdev->irq.stat_regs.r600.hdmi0_status & HDMI0_AZ_FORMAT_WTRIG) {
-					rdev->irq.stat_regs.r600.hdmi0_status &= ~HDMI0_AZ_FORMAT_WTRIG;
-					queue_hdmi = true;
-					DRM_DEBUG("IH: HDMI0\n");
-				}
+				if (!(rdev->irq.stat_regs.r600.hdmi0_status & HDMI0_AZ_FORMAT_WTRIG))
+					DRM_DEBUG("IH: HDMI0 - IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.r600.hdmi0_status &= ~HDMI0_AZ_FORMAT_WTRIG;
+				queue_hdmi = true;
+				DRM_DEBUG("IH: HDMI0\n");
+
 				break;
 			case 5:
-				if (rdev->irq.stat_regs.r600.hdmi1_status & HDMI0_AZ_FORMAT_WTRIG) {
-					rdev->irq.stat_regs.r600.hdmi1_status &= ~HDMI0_AZ_FORMAT_WTRIG;
-					queue_hdmi = true;
-					DRM_DEBUG("IH: HDMI1\n");
-				}
+				if (!(rdev->irq.stat_regs.r600.hdmi1_status & HDMI0_AZ_FORMAT_WTRIG))
+					DRM_DEBUG("IH: HDMI1 - IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.r600.hdmi1_status &= ~HDMI0_AZ_FORMAT_WTRIG;
+				queue_hdmi = true;
+				DRM_DEBUG("IH: HDMI1\n");
+
 				break;
 			default:
 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c
index 09e3f39..98f9ada 100644
--- a/drivers/gpu/drm/radeon/r600_cp.c
+++ b/drivers/gpu/drm/radeon/r600_cp.c
@@ -2483,7 +2483,7 @@
 	struct drm_buf *buf;
 	u32 *buffer;
 	const u8 __user *data;
-	int size, pass_size;
+	unsigned int size, pass_size;
 	u64 src_offset, dst_offset;
 
 	if (!radeon_check_offset(dev_priv, tex->offset)) {
diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c
index 45e5406..afaf346 100644
--- a/drivers/gpu/drm/radeon/radeon_cursor.c
+++ b/drivers/gpu/drm/radeon/radeon_cursor.c
@@ -91,15 +91,34 @@
 	struct radeon_device *rdev = crtc->dev->dev_private;
 
 	if (ASIC_IS_DCE4(rdev)) {
+		WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset,
+		       upper_32_bits(radeon_crtc->cursor_addr));
+		WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
+		       lower_32_bits(radeon_crtc->cursor_addr));
 		WREG32(RADEON_MM_INDEX, EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset);
 		WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_EN |
 		       EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) |
 		       EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2));
 	} else if (ASIC_IS_AVIVO(rdev)) {
+		if (rdev->family >= CHIP_RV770) {
+			if (radeon_crtc->crtc_id)
+				WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH,
+				       upper_32_bits(radeon_crtc->cursor_addr));
+			else
+				WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH,
+				       upper_32_bits(radeon_crtc->cursor_addr));
+		}
+
+		WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
+		       lower_32_bits(radeon_crtc->cursor_addr));
 		WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset);
 		WREG32(RADEON_MM_DATA, AVIVO_D1CURSOR_EN |
 		       (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT));
 	} else {
+		/* offset is from DISP(2)_BASE_ADDRESS */
+		WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset,
+		       radeon_crtc->cursor_addr - radeon_crtc->legacy_display_base_addr);
+
 		switch (radeon_crtc->crtc_id) {
 		case 0:
 			WREG32(RADEON_MM_INDEX, RADEON_CRTC_GEN_CNTL);
@@ -205,8 +224,9 @@
 			| (x << 16)
 			| y));
 		/* offset is from DISP(2)_BASE_ADDRESS */
-		WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, (radeon_crtc->legacy_cursor_offset +
-								      (yorigin * 256)));
+		WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset,
+		       radeon_crtc->cursor_addr - radeon_crtc->legacy_display_base_addr +
+		       yorigin * 256);
 	}
 
 	radeon_crtc->cursor_x = x;
@@ -227,53 +247,6 @@
 	return ret;
 }
 
-static int radeon_set_cursor(struct drm_crtc *crtc, struct drm_gem_object *obj)
-{
-	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
-	struct radeon_device *rdev = crtc->dev->dev_private;
-	struct radeon_bo *robj = gem_to_radeon_bo(obj);
-	uint64_t gpu_addr;
-	int ret;
-
-	ret = radeon_bo_reserve(robj, false);
-	if (unlikely(ret != 0))
-		goto fail;
-	/* Only 27 bit offset for legacy cursor */
-	ret = radeon_bo_pin_restricted(robj, RADEON_GEM_DOMAIN_VRAM,
-				       ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27,
-				       &gpu_addr);
-	radeon_bo_unreserve(robj);
-	if (ret)
-		goto fail;
-
-	if (ASIC_IS_DCE4(rdev)) {
-		WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset,
-		       upper_32_bits(gpu_addr));
-		WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
-		       gpu_addr & 0xffffffff);
-	} else if (ASIC_IS_AVIVO(rdev)) {
-		if (rdev->family >= CHIP_RV770) {
-			if (radeon_crtc->crtc_id)
-				WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr));
-			else
-				WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr));
-		}
-		WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
-		       gpu_addr & 0xffffffff);
-	} else {
-		radeon_crtc->legacy_cursor_offset = gpu_addr - radeon_crtc->legacy_display_base_addr;
-		/* offset is from DISP(2)_BASE_ADDRESS */
-		WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, radeon_crtc->legacy_cursor_offset);
-	}
-
-	return 0;
-
-fail:
-	drm_gem_object_unreference_unlocked(obj);
-
-	return ret;
-}
-
 int radeon_crtc_cursor_set2(struct drm_crtc *crtc,
 			    struct drm_file *file_priv,
 			    uint32_t handle,
@@ -283,7 +256,9 @@
 			    int32_t hot_y)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct radeon_device *rdev = crtc->dev->dev_private;
 	struct drm_gem_object *obj;
+	struct radeon_bo *robj;
 	int ret;
 
 	if (!handle) {
@@ -305,6 +280,23 @@
 		return -ENOENT;
 	}
 
+	robj = gem_to_radeon_bo(obj);
+	ret = radeon_bo_reserve(robj, false);
+	if (ret != 0) {
+		drm_gem_object_unreference_unlocked(obj);
+		return ret;
+	}
+	/* Only 27 bit offset for legacy cursor */
+	ret = radeon_bo_pin_restricted(robj, RADEON_GEM_DOMAIN_VRAM,
+				       ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27,
+				       &radeon_crtc->cursor_addr);
+	radeon_bo_unreserve(robj);
+	if (ret) {
+		DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
+		drm_gem_object_unreference_unlocked(obj);
+		return ret;
+	}
+
 	radeon_crtc->cursor_width = width;
 	radeon_crtc->cursor_height = height;
 
@@ -323,13 +315,7 @@
 		radeon_crtc->cursor_hot_y = hot_y;
 	}
 
-	ret = radeon_set_cursor(crtc, obj);
-
-	if (ret)
-		DRM_ERROR("radeon_set_cursor returned %d, not changing cursor\n",
-			  ret);
-	else
-		radeon_show_cursor(crtc);
+	radeon_show_cursor(crtc);
 
 	radeon_lock_cursor(crtc, false);
 
@@ -341,8 +327,7 @@
 			radeon_bo_unpin(robj);
 			radeon_bo_unreserve(robj);
 		}
-		if (radeon_crtc->cursor_bo != obj)
-			drm_gem_object_unreference_unlocked(radeon_crtc->cursor_bo);
+		drm_gem_object_unreference_unlocked(radeon_crtc->cursor_bo);
 	}
 
 	radeon_crtc->cursor_bo = obj;
@@ -360,7 +345,6 @@
 void radeon_cursor_reset(struct drm_crtc *crtc)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
-	int ret;
 
 	if (radeon_crtc->cursor_bo) {
 		radeon_lock_cursor(crtc, true);
@@ -368,12 +352,7 @@
 		radeon_cursor_move_locked(crtc, radeon_crtc->cursor_x,
 					  radeon_crtc->cursor_y);
 
-		ret = radeon_set_cursor(crtc, radeon_crtc->cursor_bo);
-		if (ret)
-			DRM_ERROR("radeon_set_cursor returned %d, not showing "
-				  "cursor\n", ret);
-		else
-			radeon_show_cursor(crtc);
+		radeon_show_cursor(crtc);
 
 		radeon_lock_cursor(crtc, false);
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 2593b11..d8319da 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1080,6 +1080,22 @@
 }
 
 /**
+ * Determine a sensible default GART size according to ASIC family.
+ *
+ * @family ASIC family name
+ */
+static int radeon_gart_size_auto(enum radeon_family family)
+{
+	/* default to a larger gart size on newer asics */
+	if (family >= CHIP_TAHITI)
+		return 2048;
+	else if (family >= CHIP_RV770)
+		return 1024;
+	else
+		return 512;
+}
+
+/**
  * radeon_check_arguments - validate module params
  *
  * @rdev: radeon_device pointer
@@ -1097,27 +1113,17 @@
 	}
 
 	if (radeon_gart_size == -1) {
-		/* default to a larger gart size on newer asics */
-		if (rdev->family >= CHIP_RV770)
-			radeon_gart_size = 1024;
-		else
-			radeon_gart_size = 512;
+		radeon_gart_size = radeon_gart_size_auto(rdev->family);
 	}
 	/* gtt size must be power of two and greater or equal to 32M */
 	if (radeon_gart_size < 32) {
 		dev_warn(rdev->dev, "gart size (%d) too small\n",
 				radeon_gart_size);
-		if (rdev->family >= CHIP_RV770)
-			radeon_gart_size = 1024;
-		else
-			radeon_gart_size = 512;
+		radeon_gart_size = radeon_gart_size_auto(rdev->family);
 	} else if (!radeon_check_pot_argument(radeon_gart_size)) {
 		dev_warn(rdev->dev, "gart size (%d) must be a power of 2\n",
 				radeon_gart_size);
-		if (rdev->family >= CHIP_RV770)
-			radeon_gart_size = 1024;
-		else
-			radeon_gart_size = 512;
+		radeon_gart_size = radeon_gart_size_auto(rdev->family);
 	}
 	rdev->mc.gtt_size = (uint64_t)radeon_gart_size << 20;
 
@@ -1572,11 +1578,21 @@
 		drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
 	}
 
-	/* unpin the front buffers */
+	/* unpin the front buffers and cursors */
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 		struct radeon_framebuffer *rfb = to_radeon_framebuffer(crtc->primary->fb);
 		struct radeon_bo *robj;
 
+		if (radeon_crtc->cursor_bo) {
+			struct radeon_bo *robj = gem_to_radeon_bo(radeon_crtc->cursor_bo);
+			r = radeon_bo_reserve(robj, false);
+			if (r == 0) {
+				radeon_bo_unpin(robj);
+				radeon_bo_unreserve(robj);
+			}
+		}
+
 		if (rfb == NULL || rfb->obj == NULL) {
 			continue;
 		}
@@ -1639,6 +1655,7 @@
 {
 	struct drm_connector *connector;
 	struct radeon_device *rdev = dev->dev_private;
+	struct drm_crtc *crtc;
 	int r;
 
 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
@@ -1678,6 +1695,27 @@
 
 	radeon_restore_bios_scratch_regs(rdev);
 
+	/* pin cursors */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+
+		if (radeon_crtc->cursor_bo) {
+			struct radeon_bo *robj = gem_to_radeon_bo(radeon_crtc->cursor_bo);
+			r = radeon_bo_reserve(robj, false);
+			if (r == 0) {
+				/* Only 27 bit offset for legacy cursor */
+				r = radeon_bo_pin_restricted(robj,
+							     RADEON_GEM_DOMAIN_VRAM,
+							     ASIC_IS_AVIVO(rdev) ?
+							     0 : 1 << 27,
+							     &radeon_crtc->cursor_addr);
+				if (r != 0)
+					DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
+				radeon_bo_unreserve(robj);
+			}
+		}
+	}
+
 	/* init dig PHYs, disp eng pll */
 	if (rdev->is_atom_bios) {
 		radeon_atom_encoder_init(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 634793e..aeb6767 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -257,6 +257,7 @@
 	}
 
 	info->par = rfbdev;
+	info->skip_vt_switch = true;
 
 	ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj);
 	if (ret) {
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index ac3c131..013ec71 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -428,7 +428,6 @@
 int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *filp)
 {
-	struct radeon_device *rdev = dev->dev_private;
 	struct drm_radeon_gem_busy *args = data;
 	struct drm_gem_object *gobj;
 	struct radeon_bo *robj;
@@ -440,10 +439,16 @@
 		return -ENOENT;
 	}
 	robj = gem_to_radeon_bo(gobj);
-	r = radeon_bo_wait(robj, &cur_placement, true);
+
+	r = reservation_object_test_signaled_rcu(robj->tbo.resv, true);
+	if (r == 0)
+		r = -EBUSY;
+	else
+		r = 0;
+
+	cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type);
 	args->domain = radeon_mem_type_to_domain(cur_placement);
 	drm_gem_object_unreference_unlocked(gobj);
-	r = radeon_gem_handle_lockup(rdev, r);
 	return r;
 }
 
@@ -471,6 +476,7 @@
 		r = ret;
 
 	/* Flush HDP cache via MMIO if necessary */
+	cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type);
 	if (rdev->asic->mmio_hdp_flush &&
 	    radeon_mem_type_to_domain(cur_placement) == RADEON_GEM_DOMAIN_VRAM)
 		robj->rdev->asic->mmio_hdp_flush(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 6de5459..07909d8 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -343,7 +343,6 @@
 	int max_cursor_width;
 	int max_cursor_height;
 	uint32_t legacy_display_base_addr;
-	uint32_t legacy_cursor_offset;
 	enum radeon_rmx_type rmx_type;
 	u8 h_border;
 	u8 v_border;
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index ec10533..48d97c0 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -493,38 +493,35 @@
 	}
 
 	if (bo_va->it.start || bo_va->it.last) {
-		spin_lock(&vm->status_lock);
-		if (list_empty(&bo_va->vm_status)) {
-			/* add a clone of the bo_va to clear the old address */
-			struct radeon_bo_va *tmp;
-			spin_unlock(&vm->status_lock);
-			tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
-			if (!tmp) {
-				mutex_unlock(&vm->mutex);
-				r = -ENOMEM;
-				goto error_unreserve;
-			}
-			tmp->it.start = bo_va->it.start;
-			tmp->it.last = bo_va->it.last;
-			tmp->vm = vm;
-			tmp->bo = radeon_bo_ref(bo_va->bo);
-			spin_lock(&vm->status_lock);
-			list_add(&tmp->vm_status, &vm->freed);
+		/* add a clone of the bo_va to clear the old address */
+		struct radeon_bo_va *tmp;
+		tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
+		if (!tmp) {
+			mutex_unlock(&vm->mutex);
+			r = -ENOMEM;
+			goto error_unreserve;
 		}
-		spin_unlock(&vm->status_lock);
+		tmp->it.start = bo_va->it.start;
+		tmp->it.last = bo_va->it.last;
+		tmp->vm = vm;
+		tmp->bo = radeon_bo_ref(bo_va->bo);
 
 		interval_tree_remove(&bo_va->it, &vm->va);
+		spin_lock(&vm->status_lock);
 		bo_va->it.start = 0;
 		bo_va->it.last = 0;
+		list_del_init(&bo_va->vm_status);
+		list_add(&tmp->vm_status, &vm->freed);
+		spin_unlock(&vm->status_lock);
 	}
 
 	if (soffset || eoffset) {
+		spin_lock(&vm->status_lock);
 		bo_va->it.start = soffset;
 		bo_va->it.last = eoffset - 1;
-		interval_tree_insert(&bo_va->it, &vm->va);
-		spin_lock(&vm->status_lock);
 		list_add(&bo_va->vm_status, &vm->cleared);
 		spin_unlock(&vm->status_lock);
+		interval_tree_insert(&bo_va->it, &vm->va);
 	}
 
 	bo_va->flags = flags;
@@ -1158,7 +1155,8 @@
 
 	list_for_each_entry(bo_va, &bo->va, bo_list) {
 		spin_lock(&bo_va->vm->status_lock);
-		if (list_empty(&bo_va->vm_status))
+		if (list_empty(&bo_va->vm_status) &&
+		    (bo_va->it.start || bo_va->it.last))
 			list_add(&bo_va->vm_status, &bo_va->vm->invalidated);
 		spin_unlock(&bo_va->vm->status_lock);
 	}
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 26388b5..07037e3 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -6466,23 +6466,27 @@
 		case 1: /* D1 vblank/vline */
 			switch (src_data) {
 			case 0: /* D1 vblank */
-				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
-					if (rdev->irq.crtc_vblank_int[0]) {
-						drm_handle_vblank(rdev->ddev, 0);
-						rdev->pm.vblank_sync = true;
-						wake_up(&rdev->irq.vblank_queue);
-					}
-					if (atomic_read(&rdev->irq.pflip[0]))
-						radeon_crtc_handle_vblank(rdev, 0);
-					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
-					DRM_DEBUG("IH: D1 vblank\n");
+				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				if (rdev->irq.crtc_vblank_int[0]) {
+					drm_handle_vblank(rdev->ddev, 0);
+					rdev->pm.vblank_sync = true;
+					wake_up(&rdev->irq.vblank_queue);
 				}
+				if (atomic_read(&rdev->irq.pflip[0]))
+					radeon_crtc_handle_vblank(rdev, 0);
+				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
+				DRM_DEBUG("IH: D1 vblank\n");
+
 				break;
 			case 1: /* D1 vline */
-				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
-					DRM_DEBUG("IH: D1 vline\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
+				DRM_DEBUG("IH: D1 vline\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -6492,23 +6496,27 @@
 		case 2: /* D2 vblank/vline */
 			switch (src_data) {
 			case 0: /* D2 vblank */
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
-					if (rdev->irq.crtc_vblank_int[1]) {
-						drm_handle_vblank(rdev->ddev, 1);
-						rdev->pm.vblank_sync = true;
-						wake_up(&rdev->irq.vblank_queue);
-					}
-					if (atomic_read(&rdev->irq.pflip[1]))
-						radeon_crtc_handle_vblank(rdev, 1);
-					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
-					DRM_DEBUG("IH: D2 vblank\n");
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				if (rdev->irq.crtc_vblank_int[1]) {
+					drm_handle_vblank(rdev->ddev, 1);
+					rdev->pm.vblank_sync = true;
+					wake_up(&rdev->irq.vblank_queue);
 				}
+				if (atomic_read(&rdev->irq.pflip[1]))
+					radeon_crtc_handle_vblank(rdev, 1);
+				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
+				DRM_DEBUG("IH: D2 vblank\n");
+
 				break;
 			case 1: /* D2 vline */
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
-					DRM_DEBUG("IH: D2 vline\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
+				DRM_DEBUG("IH: D2 vline\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -6518,23 +6526,27 @@
 		case 3: /* D3 vblank/vline */
 			switch (src_data) {
 			case 0: /* D3 vblank */
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
-					if (rdev->irq.crtc_vblank_int[2]) {
-						drm_handle_vblank(rdev->ddev, 2);
-						rdev->pm.vblank_sync = true;
-						wake_up(&rdev->irq.vblank_queue);
-					}
-					if (atomic_read(&rdev->irq.pflip[2]))
-						radeon_crtc_handle_vblank(rdev, 2);
-					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
-					DRM_DEBUG("IH: D3 vblank\n");
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				if (rdev->irq.crtc_vblank_int[2]) {
+					drm_handle_vblank(rdev->ddev, 2);
+					rdev->pm.vblank_sync = true;
+					wake_up(&rdev->irq.vblank_queue);
 				}
+				if (atomic_read(&rdev->irq.pflip[2]))
+					radeon_crtc_handle_vblank(rdev, 2);
+				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
+				DRM_DEBUG("IH: D3 vblank\n");
+
 				break;
 			case 1: /* D3 vline */
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
-					DRM_DEBUG("IH: D3 vline\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
+				DRM_DEBUG("IH: D3 vline\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -6544,23 +6556,27 @@
 		case 4: /* D4 vblank/vline */
 			switch (src_data) {
 			case 0: /* D4 vblank */
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
-					if (rdev->irq.crtc_vblank_int[3]) {
-						drm_handle_vblank(rdev->ddev, 3);
-						rdev->pm.vblank_sync = true;
-						wake_up(&rdev->irq.vblank_queue);
-					}
-					if (atomic_read(&rdev->irq.pflip[3]))
-						radeon_crtc_handle_vblank(rdev, 3);
-					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
-					DRM_DEBUG("IH: D4 vblank\n");
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				if (rdev->irq.crtc_vblank_int[3]) {
+					drm_handle_vblank(rdev->ddev, 3);
+					rdev->pm.vblank_sync = true;
+					wake_up(&rdev->irq.vblank_queue);
 				}
+				if (atomic_read(&rdev->irq.pflip[3]))
+					radeon_crtc_handle_vblank(rdev, 3);
+				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
+				DRM_DEBUG("IH: D4 vblank\n");
+
 				break;
 			case 1: /* D4 vline */
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
-					DRM_DEBUG("IH: D4 vline\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
+				DRM_DEBUG("IH: D4 vline\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -6570,23 +6586,27 @@
 		case 5: /* D5 vblank/vline */
 			switch (src_data) {
 			case 0: /* D5 vblank */
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
-					if (rdev->irq.crtc_vblank_int[4]) {
-						drm_handle_vblank(rdev->ddev, 4);
-						rdev->pm.vblank_sync = true;
-						wake_up(&rdev->irq.vblank_queue);
-					}
-					if (atomic_read(&rdev->irq.pflip[4]))
-						radeon_crtc_handle_vblank(rdev, 4);
-					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
-					DRM_DEBUG("IH: D5 vblank\n");
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				if (rdev->irq.crtc_vblank_int[4]) {
+					drm_handle_vblank(rdev->ddev, 4);
+					rdev->pm.vblank_sync = true;
+					wake_up(&rdev->irq.vblank_queue);
 				}
+				if (atomic_read(&rdev->irq.pflip[4]))
+					radeon_crtc_handle_vblank(rdev, 4);
+				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
+				DRM_DEBUG("IH: D5 vblank\n");
+
 				break;
 			case 1: /* D5 vline */
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
-					DRM_DEBUG("IH: D5 vline\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
+				DRM_DEBUG("IH: D5 vline\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -6596,23 +6616,27 @@
 		case 6: /* D6 vblank/vline */
 			switch (src_data) {
 			case 0: /* D6 vblank */
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
-					if (rdev->irq.crtc_vblank_int[5]) {
-						drm_handle_vblank(rdev->ddev, 5);
-						rdev->pm.vblank_sync = true;
-						wake_up(&rdev->irq.vblank_queue);
-					}
-					if (atomic_read(&rdev->irq.pflip[5]))
-						radeon_crtc_handle_vblank(rdev, 5);
-					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
-					DRM_DEBUG("IH: D6 vblank\n");
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				if (rdev->irq.crtc_vblank_int[5]) {
+					drm_handle_vblank(rdev->ddev, 5);
+					rdev->pm.vblank_sync = true;
+					wake_up(&rdev->irq.vblank_queue);
 				}
+				if (atomic_read(&rdev->irq.pflip[5]))
+					radeon_crtc_handle_vblank(rdev, 5);
+				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
+				DRM_DEBUG("IH: D6 vblank\n");
+
 				break;
 			case 1: /* D6 vline */
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
-					DRM_DEBUG("IH: D6 vline\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
+				DRM_DEBUG("IH: D6 vline\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
@@ -6632,88 +6656,112 @@
 		case 42: /* HPD hotplug */
 			switch (src_data) {
 			case 0:
-				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD1\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD1\n");
+
 				break;
 			case 1:
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD2\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD2\n");
+
 				break;
 			case 2:
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD3\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD3\n");
+
 				break;
 			case 3:
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD4\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD4\n");
+
 				break;
 			case 4:
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD5\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD5\n");
+
 				break;
 			case 5:
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
-					queue_hotplug = true;
-					DRM_DEBUG("IH: HPD6\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
+				queue_hotplug = true;
+				DRM_DEBUG("IH: HPD6\n");
+
 				break;
 			case 6:
-				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT;
-					queue_dp = true;
-					DRM_DEBUG("IH: HPD_RX 1\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT;
+				queue_dp = true;
+				DRM_DEBUG("IH: HPD_RX 1\n");
+
 				break;
 			case 7:
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
-					queue_dp = true;
-					DRM_DEBUG("IH: HPD_RX 2\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
+				queue_dp = true;
+				DRM_DEBUG("IH: HPD_RX 2\n");
+
 				break;
 			case 8:
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
-					queue_dp = true;
-					DRM_DEBUG("IH: HPD_RX 3\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
+				queue_dp = true;
+				DRM_DEBUG("IH: HPD_RX 3\n");
+
 				break;
 			case 9:
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
-					queue_dp = true;
-					DRM_DEBUG("IH: HPD_RX 4\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
+				queue_dp = true;
+				DRM_DEBUG("IH: HPD_RX 4\n");
+
 				break;
 			case 10:
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
-					queue_dp = true;
-					DRM_DEBUG("IH: HPD_RX 5\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
+				queue_dp = true;
+				DRM_DEBUG("IH: HPD_RX 5\n");
+
 				break;
 			case 11:
-				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
-					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
-					queue_dp = true;
-					DRM_DEBUG("IH: HPD_RX 6\n");
-				}
+				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
+					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
+				queue_dp = true;
+				DRM_DEBUG("IH: HPD_RX 6\n");
+
 				break;
 			default:
 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 35ac237..577d58d 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -633,6 +633,7 @@
 config I2C_MT65XX
 	tristate "MediaTek I2C adapter"
 	depends on ARCH_MEDIATEK || COMPILE_TEST
+	depends on HAS_DMA
 	help
 	  This selects the MediaTek(R) Integrated Inter Circuit bus driver
 	  for MT65xx and MT81xx.
diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c
index 19b2d68..f325663 100644
--- a/drivers/i2c/busses/i2c-jz4780.c
+++ b/drivers/i2c/busses/i2c-jz4780.c
@@ -764,12 +764,15 @@
 	if (IS_ERR(i2c->clk))
 		return PTR_ERR(i2c->clk);
 
-	clk_prepare_enable(i2c->clk);
+	ret = clk_prepare_enable(i2c->clk);
+	if (ret)
+		return ret;
 
-	if (of_property_read_u32(pdev->dev.of_node, "clock-frequency",
-				 &clk_freq)) {
+	ret = of_property_read_u32(pdev->dev.of_node, "clock-frequency",
+				   &clk_freq);
+	if (ret) {
 		dev_err(&pdev->dev, "clock-frequency not specified in DT");
-		return clk_freq;
+		goto err;
 	}
 
 	i2c->speed = clk_freq / 1000;
@@ -790,10 +793,8 @@
 	i2c->irq = platform_get_irq(pdev, 0);
 	ret = devm_request_irq(&pdev->dev, i2c->irq, jz4780_i2c_irq, 0,
 			       dev_name(&pdev->dev), i2c);
-	if (ret) {
-		ret = -ENODEV;
+	if (ret)
 		goto err;
-	}
 
 	ret = i2c_add_adapter(&i2c->adap);
 	if (ret < 0) {
diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c
index dcca707..1c9cb65 100644
--- a/drivers/i2c/busses/i2c-xgene-slimpro.c
+++ b/drivers/i2c/busses/i2c-xgene-slimpro.c
@@ -419,6 +419,7 @@
 	rc = i2c_add_adapter(adapter);
 	if (rc) {
 		dev_err(&pdev->dev, "Adapter registeration failed\n");
+		mbox_free_channel(ctx->mbox_chan);
 		return rc;
 	}
 
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 069a41f..e6d4935 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -1012,6 +1012,8 @@
  */
 void i2c_unregister_device(struct i2c_client *client)
 {
+	if (client->dev.of_node)
+		of_node_clear_flag(client->dev.of_node, OF_POPULATED);
 	device_unregister(&client->dev);
 }
 EXPORT_SYMBOL_GPL(i2c_unregister_device);
@@ -1320,8 +1322,11 @@
 
 	dev_dbg(&adap->dev, "of_i2c: walking child nodes\n");
 
-	for_each_available_child_of_node(adap->dev.of_node, node)
+	for_each_available_child_of_node(adap->dev.of_node, node) {
+		if (of_node_test_and_set_flag(node, OF_POPULATED))
+			continue;
 		of_i2c_register_device(adap, node);
+	}
 }
 
 static int of_dev_node_match(struct device *dev, void *data)
@@ -1853,6 +1858,11 @@
 		if (adap == NULL)
 			return NOTIFY_OK;	/* not for us */
 
+		if (of_node_test_and_set_flag(rd->dn, OF_POPULATED)) {
+			put_device(&adap->dev);
+			return NOTIFY_OK;
+		}
+
 		client = of_i2c_register_device(adap, rd->dn);
 		put_device(&adap->dev);
 
@@ -1863,6 +1873,10 @@
 		}
 		break;
 	case OF_RECONFIG_CHANGE_REMOVE:
+		/* already depopulated? */
+		if (!of_node_check_flag(rd->dn, OF_POPULATED))
+			return NOTIFY_OK;
+
 		/* find our device by node */
 		client = of_find_i2c_device_by_node(rd->dn);
 		if (client == NULL)
diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index 62641f2..5b5f403 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -771,7 +771,7 @@
  */
 static void elan_report_contact(struct elan_tp_data *data,
 				int contact_num, bool contact_valid,
-				bool hover_event, u8 *finger_data)
+				u8 *finger_data)
 {
 	struct input_dev *input = data->input;
 	unsigned int pos_x, pos_y;
@@ -815,9 +815,7 @@
 		input_mt_report_slot_state(input, MT_TOOL_FINGER, true);
 		input_report_abs(input, ABS_MT_POSITION_X, pos_x);
 		input_report_abs(input, ABS_MT_POSITION_Y, data->max_y - pos_y);
-		input_report_abs(input, ABS_MT_DISTANCE, hover_event);
-		input_report_abs(input, ABS_MT_PRESSURE,
-				 hover_event ? 0 : scaled_pressure);
+		input_report_abs(input, ABS_MT_PRESSURE, scaled_pressure);
 		input_report_abs(input, ABS_TOOL_WIDTH, mk_x);
 		input_report_abs(input, ABS_MT_TOUCH_MAJOR, major);
 		input_report_abs(input, ABS_MT_TOUCH_MINOR, minor);
@@ -839,14 +837,14 @@
 	hover_event = hover_info & 0x40;
 	for (i = 0; i < ETP_MAX_FINGERS; i++) {
 		contact_valid = tp_info & (1U << (3 + i));
-		elan_report_contact(data, i, contact_valid, hover_event,
-				    finger_data);
+		elan_report_contact(data, i, contact_valid, finger_data);
 
 		if (contact_valid)
 			finger_data += ETP_FINGER_DATA_LEN;
 	}
 
 	input_report_key(input, BTN_LEFT, tp_info & 0x01);
+	input_report_abs(input, ABS_DISTANCE, hover_event != 0);
 	input_mt_report_pointer_emulation(input, true);
 	input_sync(input);
 }
@@ -922,6 +920,7 @@
 	input_abs_set_res(input, ABS_Y, data->y_res);
 	input_set_abs_params(input, ABS_PRESSURE, 0, ETP_MAX_PRESSURE, 0, 0);
 	input_set_abs_params(input, ABS_TOOL_WIDTH, 0, ETP_FINGER_WIDTH, 0, 0);
+	input_set_abs_params(input, ABS_DISTANCE, 0, 1, 0, 0);
 
 	/* And MT parameters */
 	input_set_abs_params(input, ABS_MT_POSITION_X, 0, data->max_x, 0, 0);
@@ -934,7 +933,6 @@
 			     ETP_FINGER_WIDTH * max_width, 0, 0);
 	input_set_abs_params(input, ABS_MT_TOUCH_MINOR, 0,
 			     ETP_FINGER_WIDTH * min_width, 0, 0);
-	input_set_abs_params(input, ABS_MT_DISTANCE, 0, 1, 0, 0);
 
 	data->input = input;
 
diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index 35c8d0c..3a32caf 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -1199,7 +1199,7 @@
 					ABS_MT_POSITION_Y);
 		/* Image sensors can report per-contact pressure */
 		input_set_abs_params(dev, ABS_MT_PRESSURE, 0, 255, 0, 0);
-		input_mt_init_slots(dev, 3, INPUT_MT_POINTER | INPUT_MT_TRACK);
+		input_mt_init_slots(dev, 2, INPUT_MT_POINTER | INPUT_MT_TRACK);
 
 		/* Image sensors can signal 4 and 5 finger clicks */
 		__set_bit(BTN_TOOL_QUADTAP, dev->keybit);
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 8d7e1c8..4dd8826 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1055,7 +1055,7 @@
 
 	processor = (struct acpi_madt_generic_interrupt *)header;
 
-	if (BAD_MADT_ENTRY(processor, end))
+	if (BAD_MADT_GICC_ENTRY(processor, end))
 		return -EINVAL;
 
 	/*
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 4400edd..b7d54d4 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -257,16 +257,6 @@
 		return MIPS_CPU_IRQ_BASE + cp0_fdc_irq;
 	}
 
-	/*
-	 * Some cores claim the FDC is routable but it doesn't actually seem to
-	 * be connected.
-	 */
-	switch (current_cpu_type()) {
-	case CPU_INTERAPTIV:
-	case CPU_PROAPTIV:
-		return -1;
-	}
-
 	return irq_create_mapping(gic_irq_domain,
 				  GIC_LOCAL_TO_HWIRQ(GIC_LOCAL_INT_FDC));
 }
diff --git a/drivers/memory/Kconfig b/drivers/memory/Kconfig
index 8406c668..c6a644b 100644
--- a/drivers/memory/Kconfig
+++ b/drivers/memory/Kconfig
@@ -7,6 +7,14 @@
 
 if MEMORY
 
+config ARM_PL172_MPMC
+	tristate "ARM PL172 MPMC driver"
+	depends on ARM_AMBA && OF
+	help
+	  This selects the ARM PrimeCell PL172 MultiPort Memory Controller.
+	  If you have an embedded system with an AMBA bus and a PL172
+	  controller, say Y or M here.
+
 config ATMEL_SDRAMC
 	bool "Atmel (Multi-port DDR-)SDRAM Controller"
 	default y
diff --git a/drivers/memory/Makefile b/drivers/memory/Makefile
index b670441..1c46af5 100644
--- a/drivers/memory/Makefile
+++ b/drivers/memory/Makefile
@@ -5,6 +5,7 @@
 ifeq ($(CONFIG_DDR),y)
 obj-$(CONFIG_OF)		+= of_memory.o
 endif
+obj-$(CONFIG_ARM_PL172_MPMC)	+= pl172.o
 obj-$(CONFIG_ATMEL_SDRAMC)	+= atmel-sdramc.o
 obj-$(CONFIG_TI_AEMIF)		+= ti-aemif.o
 obj-$(CONFIG_TI_EMIF)		+= emif.o
diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c
index 8911e51..3a27a84 100644
--- a/drivers/memory/omap-gpmc.c
+++ b/drivers/memory/omap-gpmc.c
@@ -2074,14 +2074,8 @@
 			ret = gpmc_probe_nand_child(pdev, child);
 		else if (of_node_cmp(child->name, "onenand") == 0)
 			ret = gpmc_probe_onenand_child(pdev, child);
-		else if (of_node_cmp(child->name, "ethernet") == 0 ||
-			 of_node_cmp(child->name, "nor") == 0 ||
-			 of_node_cmp(child->name, "uart") == 0)
+		else
 			ret = gpmc_probe_generic_child(pdev, child);
-
-		if (WARN(ret < 0, "%s: probing gpmc child %s failed\n",
-			 __func__, child->full_name))
-			of_node_put(child);
 	}
 
 	return 0;
diff --git a/drivers/memory/pl172.c b/drivers/memory/pl172.c
new file mode 100644
index 0000000..b2ef607
--- /dev/null
+++ b/drivers/memory/pl172.c
@@ -0,0 +1,301 @@
+/*
+ * Memory controller driver for ARM PrimeCell PL172
+ * PrimeCell MultiPort Memory Controller (PL172)
+ *
+ * Copyright (C) 2015 Joachim Eastwood <manabian@gmail.com>
+ *
+ * Based on:
+ * TI AEMIF driver, Copyright (C) 2010 - 2013 Texas Instruments Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/time.h>
+
+#define MPMC_STATIC_CFG(n)		(0x200 + 0x20 * n)
+#define  MPMC_STATIC_CFG_MW_8BIT	0x0
+#define  MPMC_STATIC_CFG_MW_16BIT	0x1
+#define  MPMC_STATIC_CFG_MW_32BIT	0x2
+#define  MPMC_STATIC_CFG_PM		BIT(3)
+#define  MPMC_STATIC_CFG_PC		BIT(6)
+#define  MPMC_STATIC_CFG_PB		BIT(7)
+#define  MPMC_STATIC_CFG_EW		BIT(8)
+#define  MPMC_STATIC_CFG_B		BIT(19)
+#define  MPMC_STATIC_CFG_P		BIT(20)
+#define MPMC_STATIC_WAIT_WEN(n)		(0x204 + 0x20 * n)
+#define  MPMC_STATIC_WAIT_WEN_MAX	0x0f
+#define MPMC_STATIC_WAIT_OEN(n)		(0x208 + 0x20 * n)
+#define  MPMC_STATIC_WAIT_OEN_MAX	0x0f
+#define MPMC_STATIC_WAIT_RD(n)		(0x20c + 0x20 * n)
+#define  MPMC_STATIC_WAIT_RD_MAX	0x1f
+#define MPMC_STATIC_WAIT_PAGE(n)	(0x210 + 0x20 * n)
+#define  MPMC_STATIC_WAIT_PAGE_MAX	0x1f
+#define MPMC_STATIC_WAIT_WR(n)		(0x214 + 0x20 * n)
+#define  MPMC_STATIC_WAIT_WR_MAX	0x1f
+#define MPMC_STATIC_WAIT_TURN(n)	(0x218 + 0x20 * n)
+#define  MPMC_STATIC_WAIT_TURN_MAX	0x0f
+
+/* Maximum number of static chip selects */
+#define PL172_MAX_CS		4
+
+struct pl172_data {
+	void __iomem *base;
+	unsigned long rate;
+	struct clk *clk;
+};
+
+static int pl172_timing_prop(struct amba_device *adev,
+			     const struct device_node *np, const char *name,
+			     u32 reg_offset, u32 max, int start)
+{
+	struct pl172_data *pl172 = amba_get_drvdata(adev);
+	int cycles;
+	u32 val;
+
+	if (!of_property_read_u32(np, name, &val)) {
+		cycles = DIV_ROUND_UP(val * pl172->rate, NSEC_PER_MSEC) - start;
+		if (cycles < 0) {
+			cycles = 0;
+		} else if (cycles > max) {
+			dev_err(&adev->dev, "%s timing too tight\n", name);
+			return -EINVAL;
+		}
+
+		writel(cycles, pl172->base + reg_offset);
+	}
+
+	dev_dbg(&adev->dev, "%s: %u cycle(s)\n", name, start +
+				readl(pl172->base + reg_offset));
+
+	return 0;
+}
+
+static int pl172_setup_static(struct amba_device *adev,
+			      struct device_node *np, u32 cs)
+{
+	struct pl172_data *pl172 = amba_get_drvdata(adev);
+	u32 cfg;
+	int ret;
+
+	/* MPMC static memory configuration */
+	if (!of_property_read_u32(np, "mpmc,memory-width", &cfg)) {
+		if (cfg == 8) {
+			cfg = MPMC_STATIC_CFG_MW_8BIT;
+		} else if (cfg == 16) {
+			cfg = MPMC_STATIC_CFG_MW_16BIT;
+		} else if (cfg == 32) {
+			cfg = MPMC_STATIC_CFG_MW_32BIT;
+		} else {
+			dev_err(&adev->dev, "invalid memory width cs%u\n", cs);
+			return -EINVAL;
+		}
+	} else {
+		dev_err(&adev->dev, "memory-width property required\n");
+		return -EINVAL;
+	}
+
+	if (of_property_read_bool(np, "mpmc,async-page-mode"))
+		cfg |= MPMC_STATIC_CFG_PM;
+
+	if (of_property_read_bool(np, "mpmc,cs-active-high"))
+		cfg |= MPMC_STATIC_CFG_PC;
+
+	if (of_property_read_bool(np, "mpmc,byte-lane-low"))
+		cfg |= MPMC_STATIC_CFG_PB;
+
+	if (of_property_read_bool(np, "mpmc,extended-wait"))
+		cfg |= MPMC_STATIC_CFG_EW;
+
+	if (of_property_read_bool(np, "mpmc,buffer-enable"))
+		cfg |= MPMC_STATIC_CFG_B;
+
+	if (of_property_read_bool(np, "mpmc,write-protect"))
+		cfg |= MPMC_STATIC_CFG_P;
+
+	writel(cfg, pl172->base + MPMC_STATIC_CFG(cs));
+	dev_dbg(&adev->dev, "mpmc static config cs%u: 0x%08x\n", cs, cfg);
+
+	/* MPMC static memory timing */
+	ret = pl172_timing_prop(adev, np, "mpmc,write-enable-delay",
+				MPMC_STATIC_WAIT_WEN(cs),
+				MPMC_STATIC_WAIT_WEN_MAX, 1);
+	if (ret)
+		goto fail;
+
+	ret = pl172_timing_prop(adev, np, "mpmc,output-enable-delay",
+				MPMC_STATIC_WAIT_OEN(cs),
+				MPMC_STATIC_WAIT_OEN_MAX, 0);
+	if (ret)
+		goto fail;
+
+	ret = pl172_timing_prop(adev, np, "mpmc,read-access-delay",
+				MPMC_STATIC_WAIT_RD(cs),
+				MPMC_STATIC_WAIT_RD_MAX, 1);
+	if (ret)
+		goto fail;
+
+	ret = pl172_timing_prop(adev, np, "mpmc,page-mode-read-delay",
+				MPMC_STATIC_WAIT_PAGE(cs),
+				MPMC_STATIC_WAIT_PAGE_MAX, 1);
+	if (ret)
+		goto fail;
+
+	ret = pl172_timing_prop(adev, np, "mpmc,write-access-delay",
+				MPMC_STATIC_WAIT_WR(cs),
+				MPMC_STATIC_WAIT_WR_MAX, 2);
+	if (ret)
+		goto fail;
+
+	ret = pl172_timing_prop(adev, np, "mpmc,turn-round-delay",
+				MPMC_STATIC_WAIT_TURN(cs),
+				MPMC_STATIC_WAIT_TURN_MAX, 1);
+	if (ret)
+		goto fail;
+
+	return 0;
+fail:
+	dev_err(&adev->dev, "failed to configure cs%u\n", cs);
+	return ret;
+}
+
+static int pl172_parse_cs_config(struct amba_device *adev,
+				 struct device_node *np)
+{
+	u32 cs;
+
+	if (!of_property_read_u32(np, "mpmc,cs", &cs)) {
+		if (cs >= PL172_MAX_CS) {
+			dev_err(&adev->dev, "cs%u invalid\n", cs);
+			return -EINVAL;
+		}
+
+		return pl172_setup_static(adev, np, cs);
+	}
+
+	dev_err(&adev->dev, "cs property required\n");
+
+	return -EINVAL;
+}
+
+static const char * const pl172_revisions[] = {"r1", "r2", "r2p3", "r2p4"};
+
+static int pl172_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	struct device_node *child_np, *np = adev->dev.of_node;
+	struct device *dev = &adev->dev;
+	static const char *rev = "?";
+	struct pl172_data *pl172;
+	int ret;
+
+	if (amba_part(adev) == 0x172) {
+		if (amba_rev(adev) < ARRAY_SIZE(pl172_revisions))
+			rev = pl172_revisions[amba_rev(adev)];
+	}
+
+	dev_info(dev, "ARM PL%x revision %s\n", amba_part(adev), rev);
+
+	pl172 = devm_kzalloc(dev, sizeof(*pl172), GFP_KERNEL);
+	if (!pl172)
+		return -ENOMEM;
+
+	pl172->clk = devm_clk_get(dev, "mpmcclk");
+	if (IS_ERR(pl172->clk)) {
+		dev_err(dev, "no mpmcclk provided clock\n");
+		return PTR_ERR(pl172->clk);
+	}
+
+	ret = clk_prepare_enable(pl172->clk);
+	if (ret) {
+		dev_err(dev, "unable to mpmcclk enable clock\n");
+		return ret;
+	}
+
+	pl172->rate = clk_get_rate(pl172->clk) / MSEC_PER_SEC;
+	if (!pl172->rate) {
+		dev_err(dev, "unable to get mpmcclk clock rate\n");
+		ret = -EINVAL;
+		goto err_clk_enable;
+	}
+
+	ret = amba_request_regions(adev, NULL);
+	if (ret) {
+		dev_err(dev, "unable to request AMBA regions\n");
+		goto err_clk_enable;
+	}
+
+	pl172->base = devm_ioremap(dev, adev->res.start,
+				   resource_size(&adev->res));
+	if (!pl172->base) {
+		dev_err(dev, "ioremap failed\n");
+		ret = -ENOMEM;
+		goto err_no_ioremap;
+	}
+
+	amba_set_drvdata(adev, pl172);
+
+	/*
+	 * Loop through each child node, which represent a chip select, and
+	 * configure parameters and timing. If successful; populate devices
+	 * under that node.
+	 */
+	for_each_available_child_of_node(np, child_np) {
+		ret = pl172_parse_cs_config(adev, child_np);
+		if (ret)
+			continue;
+
+		of_platform_populate(child_np, NULL, NULL, dev);
+	}
+
+	return 0;
+
+err_no_ioremap:
+	amba_release_regions(adev);
+err_clk_enable:
+	clk_disable_unprepare(pl172->clk);
+	return ret;
+}
+
+static int pl172_remove(struct amba_device *adev)
+{
+	struct pl172_data *pl172 = amba_get_drvdata(adev);
+
+	clk_disable_unprepare(pl172->clk);
+	amba_release_regions(adev);
+
+	return 0;
+}
+
+static const struct amba_id pl172_ids[] = {
+	{
+		.id	= 0x07341172,
+		.mask	= 0xffffffff,
+	},
+	{ 0, 0 },
+};
+MODULE_DEVICE_TABLE(amba, pl172_ids);
+
+static struct amba_driver pl172_driver = {
+	.drv = {
+		.name	= "memory-pl172",
+	},
+	.probe		= pl172_probe,
+	.remove		= pl172_remove,
+	.id_table	= pl172_ids,
+};
+module_amba_driver(pl172_driver);
+
+MODULE_AUTHOR("Joachim Eastwood <manabian@gmail.com>");
+MODULE_DESCRIPTION("PL172 Memory Controller Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 0c77240..729e085 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -23,6 +23,7 @@
 
 	afu = cxl_pci_to_afu(dev);
 
+	get_device(&afu->dev);
 	ctx = cxl_context_alloc();
 	if (IS_ERR(ctx))
 		return ctx;
@@ -31,6 +32,7 @@
 	rc = cxl_context_init(ctx, afu, false, NULL);
 	if (rc) {
 		kfree(ctx);
+		put_device(&afu->dev);
 		return ERR_PTR(-ENOMEM);
 	}
 	cxl_assign_psn_space(ctx);
@@ -60,6 +62,8 @@
 	if (ctx->status != CLOSED)
 		return -EBUSY;
 
+	put_device(&ctx->afu->dev);
+
 	cxl_context_free(ctx);
 
 	return 0;
@@ -159,7 +163,6 @@
 	}
 
 	ctx->status = STARTED;
-	get_device(&ctx->afu->dev);
 out:
 	mutex_unlock(&ctx->status_mutex);
 	return rc;
@@ -175,12 +178,7 @@
 /* Stop a context.  Returns 0 on success, otherwise -Errno */
 int cxl_stop_context(struct cxl_context *ctx)
 {
-	int rc;
-
-	rc = __detach_context(ctx);
-	if (!rc)
-		put_device(&ctx->afu->dev);
-	return rc;
+	return __detach_context(ctx);
 }
 EXPORT_SYMBOL_GPL(cxl_stop_context);
 
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 2a4c80a..1287148 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -113,11 +113,11 @@
 
 	if (ctx->afu->current_mode == CXL_MODE_DEDICATED) {
 		area = ctx->afu->psn_phys;
-		if (offset > ctx->afu->adapter->ps_size)
+		if (offset >= ctx->afu->adapter->ps_size)
 			return VM_FAULT_SIGBUS;
 	} else {
 		area = ctx->psn_phys;
-		if (offset > ctx->psn_size)
+		if (offset >= ctx->psn_size)
 			return VM_FAULT_SIGBUS;
 	}
 
@@ -145,8 +145,16 @@
  */
 int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma)
 {
+	u64 start = vma->vm_pgoff << PAGE_SHIFT;
 	u64 len = vma->vm_end - vma->vm_start;
-	len = min(len, ctx->psn_size);
+
+	if (ctx->afu->current_mode == CXL_MODE_DEDICATED) {
+		if (start + len > ctx->afu->adapter->ps_size)
+			return -EINVAL;
+	} else {
+		if (start + len > ctx->psn_size)
+			return -EINVAL;
+	}
 
 	if (ctx->afu->current_mode != CXL_MODE_DEDICATED) {
 		/* make sure there is a valid per process space for this AFU */
diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
index 833348e..4a164ab 100644
--- a/drivers/misc/cxl/main.c
+++ b/drivers/misc/cxl/main.c
@@ -73,7 +73,7 @@
 		spin_lock(&adapter->afu_list_lock);
 		for (slice = 0; slice < adapter->slices; slice++) {
 			afu = adapter->afu[slice];
-			if (!afu->enabled)
+			if (!afu || !afu->enabled)
 				continue;
 			rcu_read_lock();
 			idr_for_each_entry(&afu->contexts_idr, ctx, id)
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index c68ef58..32ad097 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -539,7 +539,7 @@
 
 static void cxl_unmap_slice_regs(struct cxl_afu *afu)
 {
-	if (afu->p1n_mmio)
+	if (afu->p2n_mmio)
 		iounmap(afu->p2n_mmio);
 	if (afu->p1n_mmio)
 		iounmap(afu->p1n_mmio);
diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
index b1d1983a..2eba002 100644
--- a/drivers/misc/cxl/vphb.c
+++ b/drivers/misc/cxl/vphb.c
@@ -112,9 +112,10 @@
 	unsigned long addr;
 
 	phb = pci_bus_to_host(bus);
-	afu = (struct cxl_afu *)phb->private_data;
 	if (phb == NULL)
 		return PCIBIOS_DEVICE_NOT_FOUND;
+	afu = (struct cxl_afu *)phb->private_data;
+
 	if (cxl_pcie_cfg_record(bus->number, devfn) > afu->crs_num)
 		return PCIBIOS_DEVICE_NOT_FOUND;
 	if (offset >= (unsigned long)phb->cfg_data)
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 357b6ae..458aa5a 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -552,22 +552,6 @@
 	schedule_work(&device->event_work);
 }
 
-void mei_cl_bus_remove_devices(struct mei_device *dev)
-{
-	struct mei_cl *cl, *next;
-
-	mutex_lock(&dev->device_lock);
-	list_for_each_entry_safe(cl, next, &dev->device_list, device_link) {
-		if (cl->device)
-			mei_cl_remove_device(cl->device);
-
-		list_del(&cl->device_link);
-		mei_cl_unlink(cl);
-		kfree(cl);
-	}
-	mutex_unlock(&dev->device_lock);
-}
-
 int __init mei_cl_bus_init(void)
 {
 	return bus_register(&mei_cl_bus_type);
diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
index 94514b2..00c3865 100644
--- a/drivers/misc/mei/init.c
+++ b/drivers/misc/mei/init.c
@@ -333,8 +333,6 @@
 
 	mei_nfc_host_exit(dev);
 
-	mei_cl_bus_remove_devices(dev);
-
 	mutex_lock(&dev->device_lock);
 
 	mei_wd_stop(dev);
diff --git a/drivers/misc/mei/nfc.c b/drivers/misc/mei/nfc.c
index b983c4e..290ef30 100644
--- a/drivers/misc/mei/nfc.c
+++ b/drivers/misc/mei/nfc.c
@@ -402,11 +402,12 @@
 
 	cldev->priv_data = NULL;
 
-	mutex_lock(&dev->device_lock);
 	/* Need to remove the device here
 	 * since mei_nfc_free will unlink the clients
 	 */
 	mei_cl_remove_device(cldev);
+
+	mutex_lock(&dev->device_lock);
 	mei_nfc_free(ndev);
 	mutex_unlock(&dev->device_lock);
 }
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 8eb22c0..7e2c43f 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -535,8 +535,6 @@
 					__func__, dimm_name, cmd_name, i);
 			return -ENXIO;
 		}
-		if (!access_ok(VERIFY_READ, p + in_len, in_size))
-			return -EFAULT;
 		if (in_len < sizeof(in_env))
 			copy = min_t(u32, sizeof(in_env) - in_len, in_size);
 		else
@@ -557,8 +555,6 @@
 					__func__, dimm_name, cmd_name, i);
 			return -EFAULT;
 		}
-		if (!access_ok(VERIFY_WRITE, p + in_len + out_len, out_size))
-			return -EFAULT;
 		if (out_len < sizeof(out_env))
 			copy = min_t(u32, sizeof(out_env) - out_len, out_size);
 		else
@@ -570,9 +566,6 @@
 	}
 
 	buf_len = out_len + in_len;
-	if (!access_ok(VERIFY_WRITE, p, sizeof(buf_len)))
-		return -EFAULT;
-
 	if (buf_len > ND_IOCTL_MAX_BUFLEN) {
 		dev_dbg(dev, "%s:%s cmd: %s buf_len: %zu > %d\n", __func__,
 				dimm_name, cmd_name, buf_len,
@@ -706,8 +699,10 @@
 	nvdimm_major = rc;
 
 	nd_class = class_create(THIS_MODULE, "nd");
-	if (IS_ERR(nd_class))
+	if (IS_ERR(nd_class)) {
+		rc = PTR_ERR(nd_class);
 		goto err_class;
+	}
 
 	return 0;
 
diff --git a/drivers/pnp/system.c b/drivers/pnp/system.c
index 515f338..49c1720 100644
--- a/drivers/pnp/system.c
+++ b/drivers/pnp/system.c
@@ -7,7 +7,6 @@
  *	Bjorn Helgaas <bjorn.helgaas@hp.com>
  */
 
-#include <linux/acpi.h>
 #include <linux/pnp.h>
 #include <linux/device.h>
 #include <linux/init.h>
@@ -23,41 +22,25 @@
 	{"", 0}
 };
 
-#ifdef CONFIG_ACPI
-static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc)
-{
-	u8 space_id = io ? ACPI_ADR_SPACE_SYSTEM_IO : ACPI_ADR_SPACE_SYSTEM_MEMORY;
-	return !acpi_reserve_region(start, length, space_id, IORESOURCE_BUSY, desc);
-}
-#else
-static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc)
-{
-	struct resource *res;
-
-	res = io ? request_region(start, length, desc) :
-		request_mem_region(start, length, desc);
-	if (res) {
-		res->flags &= ~IORESOURCE_BUSY;
-		return true;
-	}
-	return false;
-}
-#endif
-
 static void reserve_range(struct pnp_dev *dev, struct resource *r, int port)
 {
 	char *regionid;
 	const char *pnpid = dev_name(&dev->dev);
 	resource_size_t start = r->start, end = r->end;
-	bool reserved;
+	struct resource *res;
 
 	regionid = kmalloc(16, GFP_KERNEL);
 	if (!regionid)
 		return;
 
 	snprintf(regionid, 16, "pnp %s", pnpid);
-	reserved = __reserve_range(start, end - start + 1, !!port, regionid);
-	if (!reserved)
+	if (port)
+		res = request_region(start, end - start + 1, regionid);
+	else
+		res = request_mem_region(start, end - start + 1, regionid);
+	if (res)
+		res->flags &= ~IORESOURCE_BUSY;
+	else
 		kfree(regionid);
 
 	/*
@@ -66,7 +49,7 @@
 	 * have double reservations.
 	 */
 	dev_info(&dev->dev, "%pR %s reserved\n", r,
-		 reserved ? "has been" : "could not be");
+		 res ? "has been" : "could not be");
 }
 
 static void reserve_resources_of_dev(struct pnp_dev *dev)
diff --git a/drivers/reset/Makefile b/drivers/reset/Makefile
index 157d421..85d5904 100644
--- a/drivers/reset/Makefile
+++ b/drivers/reset/Makefile
@@ -1,5 +1,8 @@
 obj-$(CONFIG_RESET_CONTROLLER) += core.o
+obj-$(CONFIG_ARCH_LPC18XX) += reset-lpc18xx.o
 obj-$(CONFIG_ARCH_SOCFPGA) += reset-socfpga.o
 obj-$(CONFIG_ARCH_BERLIN) += reset-berlin.o
 obj-$(CONFIG_ARCH_SUNXI) += reset-sunxi.o
 obj-$(CONFIG_ARCH_STI) += sti/
+obj-$(CONFIG_ARCH_ZYNQ) += reset-zynq.o
+obj-$(CONFIG_ATH79) += reset-ath79.o
diff --git a/drivers/reset/reset-ath79.c b/drivers/reset/reset-ath79.c
new file mode 100644
index 0000000..d2d2904
--- /dev/null
+++ b/drivers/reset/reset-ath79.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2015 Alban Bedel <albeu@free.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/reset-controller.h>
+
+struct ath79_reset {
+	struct reset_controller_dev rcdev;
+	void __iomem *base;
+	spinlock_t lock;
+};
+
+static int ath79_reset_update(struct reset_controller_dev *rcdev,
+			unsigned long id, bool assert)
+{
+	struct ath79_reset *ath79_reset =
+		container_of(rcdev, struct ath79_reset, rcdev);
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&ath79_reset->lock, flags);
+	val = readl(ath79_reset->base);
+	if (assert)
+		val |= BIT(id);
+	else
+		val &= ~BIT(id);
+	writel(val, ath79_reset->base);
+	spin_unlock_irqrestore(&ath79_reset->lock, flags);
+
+	return 0;
+}
+
+static int ath79_reset_assert(struct reset_controller_dev *rcdev,
+			unsigned long id)
+{
+	return ath79_reset_update(rcdev, id, true);
+}
+
+static int ath79_reset_deassert(struct reset_controller_dev *rcdev,
+				unsigned long id)
+{
+	return ath79_reset_update(rcdev, id, false);
+}
+
+static int ath79_reset_status(struct reset_controller_dev *rcdev,
+			unsigned long id)
+{
+	struct ath79_reset *ath79_reset =
+		container_of(rcdev, struct ath79_reset, rcdev);
+	u32 val;
+
+	val = readl(ath79_reset->base);
+
+	return !!(val & BIT(id));
+}
+
+static struct reset_control_ops ath79_reset_ops = {
+	.assert = ath79_reset_assert,
+	.deassert = ath79_reset_deassert,
+	.status = ath79_reset_status,
+};
+
+static int ath79_reset_probe(struct platform_device *pdev)
+{
+	struct ath79_reset *ath79_reset;
+	struct resource *res;
+
+	ath79_reset = devm_kzalloc(&pdev->dev,
+				sizeof(*ath79_reset), GFP_KERNEL);
+	if (!ath79_reset)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, ath79_reset);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	ath79_reset->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(ath79_reset->base))
+		return PTR_ERR(ath79_reset->base);
+
+	ath79_reset->rcdev.ops = &ath79_reset_ops;
+	ath79_reset->rcdev.owner = THIS_MODULE;
+	ath79_reset->rcdev.of_node = pdev->dev.of_node;
+	ath79_reset->rcdev.of_reset_n_cells = 1;
+	ath79_reset->rcdev.nr_resets = 32;
+
+	return reset_controller_register(&ath79_reset->rcdev);
+}
+
+static int ath79_reset_remove(struct platform_device *pdev)
+{
+	struct ath79_reset *ath79_reset = platform_get_drvdata(pdev);
+
+	reset_controller_unregister(&ath79_reset->rcdev);
+
+	return 0;
+}
+
+static const struct of_device_id ath79_reset_dt_ids[] = {
+	{ .compatible = "qca,ar7100-reset", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, ath79_reset_dt_ids);
+
+static struct platform_driver ath79_reset_driver = {
+	.probe	= ath79_reset_probe,
+	.remove = ath79_reset_remove,
+	.driver = {
+		.name		= "ath79-reset",
+		.of_match_table	= ath79_reset_dt_ids,
+	},
+};
+module_platform_driver(ath79_reset_driver);
+
+MODULE_AUTHOR("Alban Bedel <albeu@free.fr>");
+MODULE_DESCRIPTION("AR71xx Reset Controller Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/reset/reset-lpc18xx.c b/drivers/reset/reset-lpc18xx.c
new file mode 100644
index 0000000..70922e9
--- /dev/null
+++ b/drivers/reset/reset-lpc18xx.c
@@ -0,0 +1,258 @@
+/*
+ * Reset driver for NXP LPC18xx/43xx Reset Generation Unit (RGU).
+ *
+ * Copyright (C) 2015 Joachim Eastwood <manabian@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/reboot.h>
+#include <linux/reset-controller.h>
+#include <linux/spinlock.h>
+
+/* LPC18xx RGU registers */
+#define LPC18XX_RGU_CTRL0		0x100
+#define LPC18XX_RGU_CTRL1		0x104
+#define LPC18XX_RGU_ACTIVE_STATUS0	0x150
+#define LPC18XX_RGU_ACTIVE_STATUS1	0x154
+
+#define LPC18XX_RGU_RESETS_PER_REG	32
+
+/* Internal reset outputs */
+#define LPC18XX_RGU_CORE_RST	0
+#define LPC43XX_RGU_M0SUB_RST	12
+#define LPC43XX_RGU_M0APP_RST	56
+
+struct lpc18xx_rgu_data {
+	struct reset_controller_dev rcdev;
+	struct clk *clk_delay;
+	struct clk *clk_reg;
+	void __iomem *base;
+	spinlock_t lock;
+	u32 delay_us;
+};
+
+#define to_rgu_data(p) container_of(p, struct lpc18xx_rgu_data, rcdev)
+
+static void __iomem *rgu_base;
+
+static int lpc18xx_rgu_restart(struct notifier_block *this, unsigned long mode,
+			       void *cmd)
+{
+	writel(BIT(LPC18XX_RGU_CORE_RST), rgu_base + LPC18XX_RGU_CTRL0);
+	mdelay(2000);
+
+	pr_emerg("%s: unable to restart system\n", __func__);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block lpc18xx_rgu_restart_nb = {
+	.notifier_call = lpc18xx_rgu_restart,
+	.priority = 192,
+};
+
+/*
+ * The LPC18xx RGU has mostly self-deasserting resets except for the
+ * two reset lines going to the internal Cortex-M0 cores.
+ *
+ * To prevent the M0 core resets from accidentally getting deasserted
+ * status register must be check and bits in control register set to
+ * preserve the state.
+ */
+static int lpc18xx_rgu_setclear_reset(struct reset_controller_dev *rcdev,
+				      unsigned long id, bool set)
+{
+	struct lpc18xx_rgu_data *rc = to_rgu_data(rcdev);
+	u32 stat_offset = LPC18XX_RGU_ACTIVE_STATUS0;
+	u32 ctrl_offset = LPC18XX_RGU_CTRL0;
+	unsigned long flags;
+	u32 stat, rst_bit;
+
+	stat_offset += (id / LPC18XX_RGU_RESETS_PER_REG) * sizeof(u32);
+	ctrl_offset += (id / LPC18XX_RGU_RESETS_PER_REG) * sizeof(u32);
+	rst_bit = 1 << (id % LPC18XX_RGU_RESETS_PER_REG);
+
+	spin_lock_irqsave(&rc->lock, flags);
+	stat = ~readl(rc->base + stat_offset);
+	if (set)
+		writel(stat | rst_bit, rc->base + ctrl_offset);
+	else
+		writel(stat & ~rst_bit, rc->base + ctrl_offset);
+	spin_unlock_irqrestore(&rc->lock, flags);
+
+	return 0;
+}
+
+static int lpc18xx_rgu_assert(struct reset_controller_dev *rcdev,
+			      unsigned long id)
+{
+	return lpc18xx_rgu_setclear_reset(rcdev, id, true);
+}
+
+static int lpc18xx_rgu_deassert(struct reset_controller_dev *rcdev,
+				unsigned long id)
+{
+	return lpc18xx_rgu_setclear_reset(rcdev, id, false);
+}
+
+/* Only M0 cores require explicit reset deassert */
+static int lpc18xx_rgu_reset(struct reset_controller_dev *rcdev,
+			     unsigned long id)
+{
+	struct lpc18xx_rgu_data *rc = to_rgu_data(rcdev);
+
+	lpc18xx_rgu_assert(rcdev, id);
+	udelay(rc->delay_us);
+
+	switch (id) {
+	case LPC43XX_RGU_M0SUB_RST:
+	case LPC43XX_RGU_M0APP_RST:
+		lpc18xx_rgu_setclear_reset(rcdev, id, false);
+	}
+
+	return 0;
+}
+
+static int lpc18xx_rgu_status(struct reset_controller_dev *rcdev,
+			      unsigned long id)
+{
+	struct lpc18xx_rgu_data *rc = to_rgu_data(rcdev);
+	u32 bit, offset = LPC18XX_RGU_ACTIVE_STATUS0;
+
+	offset += (id / LPC18XX_RGU_RESETS_PER_REG) * sizeof(u32);
+	bit = 1 << (id % LPC18XX_RGU_RESETS_PER_REG);
+
+	return !(readl(rc->base + offset) & bit);
+}
+
+static struct reset_control_ops lpc18xx_rgu_ops = {
+	.reset		= lpc18xx_rgu_reset,
+	.assert		= lpc18xx_rgu_assert,
+	.deassert	= lpc18xx_rgu_deassert,
+	.status		= lpc18xx_rgu_status,
+};
+
+static int lpc18xx_rgu_probe(struct platform_device *pdev)
+{
+	struct lpc18xx_rgu_data *rc;
+	struct resource *res;
+	u32 fcclk, firc;
+	int ret;
+
+	rc = devm_kzalloc(&pdev->dev, sizeof(*rc), GFP_KERNEL);
+	if (!rc)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	rc->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(rc->base))
+		return PTR_ERR(rc->base);
+
+	rc->clk_reg = devm_clk_get(&pdev->dev, "reg");
+	if (IS_ERR(rc->clk_reg)) {
+		dev_err(&pdev->dev, "reg clock not found\n");
+		return PTR_ERR(rc->clk_reg);
+	}
+
+	rc->clk_delay = devm_clk_get(&pdev->dev, "delay");
+	if (IS_ERR(rc->clk_delay)) {
+		dev_err(&pdev->dev, "delay clock not found\n");
+		return PTR_ERR(rc->clk_delay);
+	}
+
+	ret = clk_prepare_enable(rc->clk_reg);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to enable reg clock\n");
+		return ret;
+	}
+
+	ret = clk_prepare_enable(rc->clk_delay);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to enable delay clock\n");
+		goto dis_clk_reg;
+	}
+
+	fcclk = clk_get_rate(rc->clk_reg) / USEC_PER_SEC;
+	firc = clk_get_rate(rc->clk_delay) / USEC_PER_SEC;
+	if (fcclk == 0 || firc == 0)
+		rc->delay_us = 2;
+	else
+		rc->delay_us = DIV_ROUND_UP(fcclk, firc * firc);
+
+	spin_lock_init(&rc->lock);
+
+	rc->rcdev.owner = THIS_MODULE;
+	rc->rcdev.nr_resets = 64;
+	rc->rcdev.ops = &lpc18xx_rgu_ops;
+	rc->rcdev.of_node = pdev->dev.of_node;
+
+	platform_set_drvdata(pdev, rc);
+
+	ret = reset_controller_register(&rc->rcdev);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to register device\n");
+		goto dis_clks;
+	}
+
+	rgu_base = rc->base;
+	ret = register_restart_handler(&lpc18xx_rgu_restart_nb);
+	if (ret)
+		dev_warn(&pdev->dev, "failed to register restart handler\n");
+
+	return 0;
+
+dis_clks:
+	clk_disable_unprepare(rc->clk_delay);
+dis_clk_reg:
+	clk_disable_unprepare(rc->clk_reg);
+
+	return ret;
+}
+
+static int lpc18xx_rgu_remove(struct platform_device *pdev)
+{
+	struct lpc18xx_rgu_data *rc = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = unregister_restart_handler(&lpc18xx_rgu_restart_nb);
+	if (ret)
+		dev_warn(&pdev->dev, "failed to unregister restart handler\n");
+
+	reset_controller_unregister(&rc->rcdev);
+
+	clk_disable_unprepare(rc->clk_delay);
+	clk_disable_unprepare(rc->clk_reg);
+
+	return 0;
+}
+
+static const struct of_device_id lpc18xx_rgu_match[] = {
+	{ .compatible = "nxp,lpc1850-rgu" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, lpc18xx_rgu_match);
+
+static struct platform_driver lpc18xx_rgu_driver = {
+	.probe	= lpc18xx_rgu_probe,
+	.remove	= lpc18xx_rgu_remove,
+	.driver	= {
+		.name		= "lpc18xx-reset",
+		.of_match_table	= lpc18xx_rgu_match,
+	},
+};
+module_platform_driver(lpc18xx_rgu_driver);
+
+MODULE_AUTHOR("Joachim Eastwood <manabian@gmail.com>");
+MODULE_DESCRIPTION("Reset driver for LPC18xx/43xx RGU");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/reset/reset-socfpga.c b/drivers/reset/reset-socfpga.c
index 0a8def3..1a6c5d6 100644
--- a/drivers/reset/reset-socfpga.c
+++ b/drivers/reset/reset-socfpga.c
@@ -24,11 +24,11 @@
 #include <linux/types.h>
 
 #define NR_BANKS		4
-#define OFFSET_MODRST		0x10
 
 struct socfpga_reset_data {
 	spinlock_t			lock;
 	void __iomem			*membase;
+	u32				modrst_offset;
 	struct reset_controller_dev	rcdev;
 };
 
@@ -45,8 +45,8 @@
 
 	spin_lock_irqsave(&data->lock, flags);
 
-	reg = readl(data->membase + OFFSET_MODRST + (bank * NR_BANKS));
-	writel(reg | BIT(offset), data->membase + OFFSET_MODRST +
+	reg = readl(data->membase + data->modrst_offset + (bank * NR_BANKS));
+	writel(reg | BIT(offset), data->membase + data->modrst_offset +
 				 (bank * NR_BANKS));
 	spin_unlock_irqrestore(&data->lock, flags);
 
@@ -67,8 +67,8 @@
 
 	spin_lock_irqsave(&data->lock, flags);
 
-	reg = readl(data->membase + OFFSET_MODRST + (bank * NR_BANKS));
-	writel(reg & ~BIT(offset), data->membase + OFFSET_MODRST +
+	reg = readl(data->membase + data->modrst_offset + (bank * NR_BANKS));
+	writel(reg & ~BIT(offset), data->membase + data->modrst_offset +
 				  (bank * NR_BANKS));
 
 	spin_unlock_irqrestore(&data->lock, flags);
@@ -85,7 +85,7 @@
 	int offset = id % BITS_PER_LONG;
 	u32 reg;
 
-	reg = readl(data->membase + OFFSET_MODRST + (bank * NR_BANKS));
+	reg = readl(data->membase + data->modrst_offset + (bank * NR_BANKS));
 
 	return !(reg & BIT(offset));
 }
@@ -100,6 +100,8 @@
 {
 	struct socfpga_reset_data *data;
 	struct resource *res;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
 
 	/*
 	 * The binding was mainlined without the required property.
@@ -120,6 +122,11 @@
 	if (IS_ERR(data->membase))
 		return PTR_ERR(data->membase);
 
+	if (of_property_read_u32(np, "altr,modrst-offset", &data->modrst_offset)) {
+		dev_warn(dev, "missing altr,modrst-offset property, assuming 0x10!\n");
+		data->modrst_offset = 0x10;
+	}
+
 	spin_lock_init(&data->lock);
 
 	data->rcdev.owner = THIS_MODULE;
diff --git a/drivers/reset/reset-zynq.c b/drivers/reset/reset-zynq.c
new file mode 100644
index 0000000..89318a5
--- /dev/null
+++ b/drivers/reset/reset-zynq.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2015, National Instruments Corp.
+ *
+ * Xilinx Zynq Reset controller driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/reset-controller.h>
+#include <linux/regmap.h>
+#include <linux/types.h>
+
+struct zynq_reset_data {
+	struct regmap *slcr;
+	struct reset_controller_dev rcdev;
+	u32 offset;
+};
+
+#define to_zynq_reset_data(p)		\
+	container_of((p), struct zynq_reset_data, rcdev)
+
+static int zynq_reset_assert(struct reset_controller_dev *rcdev,
+			     unsigned long id)
+{
+	struct zynq_reset_data *priv = to_zynq_reset_data(rcdev);
+
+	int bank = id / BITS_PER_LONG;
+	int offset = id % BITS_PER_LONG;
+
+	pr_debug("%s: %s reset bank %u offset %u\n", KBUILD_MODNAME, __func__,
+		 bank, offset);
+
+	return regmap_update_bits(priv->slcr,
+				  priv->offset + (bank * 4),
+				  BIT(offset),
+				  BIT(offset));
+}
+
+static int zynq_reset_deassert(struct reset_controller_dev *rcdev,
+			       unsigned long id)
+{
+	struct zynq_reset_data *priv = to_zynq_reset_data(rcdev);
+
+	int bank = id / BITS_PER_LONG;
+	int offset = id % BITS_PER_LONG;
+
+	pr_debug("%s: %s reset bank %u offset %u\n", KBUILD_MODNAME, __func__,
+		 bank, offset);
+
+	return regmap_update_bits(priv->slcr,
+				  priv->offset + (bank * 4),
+				  BIT(offset),
+				  ~BIT(offset));
+}
+
+static int zynq_reset_status(struct reset_controller_dev *rcdev,
+			     unsigned long id)
+{
+	struct zynq_reset_data *priv = to_zynq_reset_data(rcdev);
+
+	int bank = id / BITS_PER_LONG;
+	int offset = id % BITS_PER_LONG;
+	int ret;
+	u32 reg;
+
+	pr_debug("%s: %s reset bank %u offset %u\n", KBUILD_MODNAME, __func__,
+		 bank, offset);
+
+	ret = regmap_read(priv->slcr, priv->offset + (bank * 4), &reg);
+	if (ret)
+		return ret;
+
+	return !!(reg & BIT(offset));
+}
+
+static struct reset_control_ops zynq_reset_ops = {
+	.assert		= zynq_reset_assert,
+	.deassert	= zynq_reset_deassert,
+	.status		= zynq_reset_status,
+};
+
+static int zynq_reset_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	struct zynq_reset_data *priv;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+	platform_set_drvdata(pdev, priv);
+
+	priv->slcr = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+						     "syscon");
+	if (IS_ERR(priv->slcr)) {
+		dev_err(&pdev->dev, "unable to get zynq-slcr regmap");
+		return PTR_ERR(priv->slcr);
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "missing IO resource\n");
+		return -ENODEV;
+	}
+
+	priv->offset = res->start;
+
+	priv->rcdev.owner = THIS_MODULE;
+	priv->rcdev.nr_resets = resource_size(res) / 4 * BITS_PER_LONG;
+	priv->rcdev.ops = &zynq_reset_ops;
+	priv->rcdev.of_node = pdev->dev.of_node;
+	reset_controller_register(&priv->rcdev);
+
+	return 0;
+}
+
+static int zynq_reset_remove(struct platform_device *pdev)
+{
+	struct zynq_reset_data *priv = platform_get_drvdata(pdev);
+
+	reset_controller_unregister(&priv->rcdev);
+
+	return 0;
+}
+
+static const struct of_device_id zynq_reset_dt_ids[] = {
+	{ .compatible = "xlnx,zynq-reset", },
+	{ /* sentinel */ },
+};
+
+static struct platform_driver zynq_reset_driver = {
+	.probe	= zynq_reset_probe,
+	.remove	= zynq_reset_remove,
+	.driver = {
+		.name		= KBUILD_MODNAME,
+		.of_match_table	= zynq_reset_dt_ids,
+	},
+};
+module_platform_driver(zynq_reset_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Moritz Fischer <moritz.fischer@ettus.com>");
+MODULE_DESCRIPTION("Zynq Reset Controller Driver");
diff --git a/drivers/reset/sti/reset-stih407.c b/drivers/reset/sti/reset-stih407.c
index d83db5d7..827eb3d 100644
--- a/drivers/reset/sti/reset-stih407.c
+++ b/drivers/reset/sti/reset-stih407.c
@@ -11,7 +11,7 @@
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
-#include <dt-bindings/reset-controller/stih407-resets.h>
+#include <dt-bindings/reset/stih407-resets.h>
 #include "reset-syscfg.h"
 
 /* STiH407 Peripheral powerdown definitions. */
@@ -126,7 +126,7 @@
 	.channels = stih407_picophyresets,
 };
 
-static struct of_device_id stih407_reset_match[] = {
+static const struct of_device_id stih407_reset_match[] = {
 	{
 		.compatible = "st,stih407-powerdown",
 		.data = &stih407_powerdown_controller,
diff --git a/drivers/reset/sti/reset-stih415.c b/drivers/reset/sti/reset-stih415.c
index 8dad603..6f220cd 100644
--- a/drivers/reset/sti/reset-stih415.c
+++ b/drivers/reset/sti/reset-stih415.c
@@ -13,7 +13,7 @@
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 
-#include <dt-bindings/reset-controller/stih415-resets.h>
+#include <dt-bindings/reset/stih415-resets.h>
 
 #include "reset-syscfg.h"
 
@@ -89,7 +89,7 @@
 	.channels = stih415_softresets,
 };
 
-static struct of_device_id stih415_reset_match[] = {
+static const struct of_device_id stih415_reset_match[] = {
 	{ .compatible = "st,stih415-powerdown",
 	  .data = &stih415_powerdown_controller, },
 	{ .compatible = "st,stih415-softreset",
diff --git a/drivers/reset/sti/reset-stih416.c b/drivers/reset/sti/reset-stih416.c
index 79aed70..c581d60 100644
--- a/drivers/reset/sti/reset-stih416.c
+++ b/drivers/reset/sti/reset-stih416.c
@@ -13,7 +13,7 @@
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 
-#include <dt-bindings/reset-controller/stih416-resets.h>
+#include <dt-bindings/reset/stih416-resets.h>
 
 #include "reset-syscfg.h"
 
@@ -120,7 +120,7 @@
 	.channels = stih416_softresets,
 };
 
-static struct of_device_id stih416_reset_match[] = {
+static const struct of_device_id stih416_reset_match[] = {
 	{ .compatible = "st,stih416-powerdown",
 	  .data = &stih416_powerdown_controller, },
 	{ .compatible = "st,stih416-softreset",
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 7dc7c0d..0b12d77 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -2,6 +2,7 @@
 # Makefile for the Linux Kernel SOC specific device drivers.
 #
 
+obj-$(CONFIG_MACH_DOVE)		+= dove/
 obj-$(CONFIG_ARCH_MEDIATEK)	+= mediatek/
 obj-$(CONFIG_ARCH_QCOM)		+= qcom/
 obj-$(CONFIG_ARCH_SUNXI)	+= sunxi/
diff --git a/drivers/soc/dove/Makefile b/drivers/soc/dove/Makefile
new file mode 100644
index 0000000..2db8e65
--- /dev/null
+++ b/drivers/soc/dove/Makefile
@@ -0,0 +1 @@
+obj-y		+= pmu.o
diff --git a/drivers/soc/dove/pmu.c b/drivers/soc/dove/pmu.c
new file mode 100644
index 0000000..6792aae
--- /dev/null
+++ b/drivers/soc/dove/pmu.c
@@ -0,0 +1,412 @@
+/*
+ * Marvell Dove PMU support
+ */
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/reset.h>
+#include <linux/reset-controller.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/soc/dove/pmu.h>
+#include <linux/spinlock.h>
+
+#define NR_PMU_IRQS		7
+
+#define PMC_SW_RST		0x30
+#define PMC_IRQ_CAUSE		0x50
+#define PMC_IRQ_MASK		0x54
+
+#define PMU_PWR			0x10
+#define PMU_ISO			0x58
+
+struct pmu_data {
+	spinlock_t lock;
+	struct device_node *of_node;
+	void __iomem *pmc_base;
+	void __iomem *pmu_base;
+	struct irq_chip_generic *irq_gc;
+	struct irq_domain *irq_domain;
+#ifdef CONFIG_RESET_CONTROLLER
+	struct reset_controller_dev reset;
+#endif
+};
+
+/*
+ * The PMU contains a register to reset various subsystems within the
+ * SoC.  Export this as a reset controller.
+ */
+#ifdef CONFIG_RESET_CONTROLLER
+#define rcdev_to_pmu(rcdev) container_of(rcdev, struct pmu_data, reset)
+
+static int pmu_reset_reset(struct reset_controller_dev *rc, unsigned long id)
+{
+	struct pmu_data *pmu = rcdev_to_pmu(rc);
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&pmu->lock, flags);
+	val = readl_relaxed(pmu->pmc_base + PMC_SW_RST);
+	writel_relaxed(val & ~BIT(id), pmu->pmc_base + PMC_SW_RST);
+	writel_relaxed(val | BIT(id), pmu->pmc_base + PMC_SW_RST);
+	spin_unlock_irqrestore(&pmu->lock, flags);
+
+	return 0;
+}
+
+static int pmu_reset_assert(struct reset_controller_dev *rc, unsigned long id)
+{
+	struct pmu_data *pmu = rcdev_to_pmu(rc);
+	unsigned long flags;
+	u32 val = ~BIT(id);
+
+	spin_lock_irqsave(&pmu->lock, flags);
+	val &= readl_relaxed(pmu->pmc_base + PMC_SW_RST);
+	writel_relaxed(val, pmu->pmc_base + PMC_SW_RST);
+	spin_unlock_irqrestore(&pmu->lock, flags);
+
+	return 0;
+}
+
+static int pmu_reset_deassert(struct reset_controller_dev *rc, unsigned long id)
+{
+	struct pmu_data *pmu = rcdev_to_pmu(rc);
+	unsigned long flags;
+	u32 val = BIT(id);
+
+	spin_lock_irqsave(&pmu->lock, flags);
+	val |= readl_relaxed(pmu->pmc_base + PMC_SW_RST);
+	writel_relaxed(val, pmu->pmc_base + PMC_SW_RST);
+	spin_unlock_irqrestore(&pmu->lock, flags);
+
+	return 0;
+}
+
+static struct reset_control_ops pmu_reset_ops = {
+	.reset = pmu_reset_reset,
+	.assert = pmu_reset_assert,
+	.deassert = pmu_reset_deassert,
+};
+
+static struct reset_controller_dev pmu_reset __initdata = {
+	.ops = &pmu_reset_ops,
+	.owner = THIS_MODULE,
+	.nr_resets = 32,
+};
+
+static void __init pmu_reset_init(struct pmu_data *pmu)
+{
+	int ret;
+
+	pmu->reset = pmu_reset;
+	pmu->reset.of_node = pmu->of_node;
+
+	ret = reset_controller_register(&pmu->reset);
+	if (ret)
+		pr_err("pmu: %s failed: %d\n", "reset_controller_register", ret);
+}
+#else
+static void __init pmu_reset_init(struct pmu_data *pmu)
+{
+}
+#endif
+
+struct pmu_domain {
+	struct pmu_data *pmu;
+	u32 pwr_mask;
+	u32 rst_mask;
+	u32 iso_mask;
+	struct generic_pm_domain base;
+};
+
+#define to_pmu_domain(dom) container_of(dom, struct pmu_domain, base)
+
+/*
+ * This deals with the "old" Marvell sequence of bringing a power domain
+ * down/up, which is: apply power, release reset, disable isolators.
+ *
+ * Later devices apparantly use a different sequence: power up, disable
+ * isolators, assert repair signal, enable SRMA clock, enable AXI clock,
+ * enable module clock, deassert reset.
+ *
+ * Note: reading the assembly, it seems that the IO accessors have an
+ * unfortunate side-effect - they cause memory already read into registers
+ * for the if () to be re-read for the bit-set or bit-clear operation.
+ * The code is written to avoid this.
+ */
+static int pmu_domain_power_off(struct generic_pm_domain *domain)
+{
+	struct pmu_domain *pmu_dom = to_pmu_domain(domain);
+	struct pmu_data *pmu = pmu_dom->pmu;
+	unsigned long flags;
+	unsigned int val;
+	void __iomem *pmu_base = pmu->pmu_base;
+	void __iomem *pmc_base = pmu->pmc_base;
+
+	spin_lock_irqsave(&pmu->lock, flags);
+
+	/* Enable isolators */
+	if (pmu_dom->iso_mask) {
+		val = ~pmu_dom->iso_mask;
+		val &= readl_relaxed(pmu_base + PMU_ISO);
+		writel_relaxed(val, pmu_base + PMU_ISO);
+	}
+
+	/* Reset unit */
+	if (pmu_dom->rst_mask) {
+		val = ~pmu_dom->rst_mask;
+		val &= readl_relaxed(pmc_base + PMC_SW_RST);
+		writel_relaxed(val, pmc_base + PMC_SW_RST);
+	}
+
+	/* Power down */
+	val = readl_relaxed(pmu_base + PMU_PWR) | pmu_dom->pwr_mask;
+	writel_relaxed(val, pmu_base + PMU_PWR);
+
+	spin_unlock_irqrestore(&pmu->lock, flags);
+
+	return 0;
+}
+
+static int pmu_domain_power_on(struct generic_pm_domain *domain)
+{
+	struct pmu_domain *pmu_dom = to_pmu_domain(domain);
+	struct pmu_data *pmu = pmu_dom->pmu;
+	unsigned long flags;
+	unsigned int val;
+	void __iomem *pmu_base = pmu->pmu_base;
+	void __iomem *pmc_base = pmu->pmc_base;
+
+	spin_lock_irqsave(&pmu->lock, flags);
+
+	/* Power on */
+	val = ~pmu_dom->pwr_mask & readl_relaxed(pmu_base + PMU_PWR);
+	writel_relaxed(val, pmu_base + PMU_PWR);
+
+	/* Release reset */
+	if (pmu_dom->rst_mask) {
+		val = pmu_dom->rst_mask;
+		val |= readl_relaxed(pmc_base + PMC_SW_RST);
+		writel_relaxed(val, pmc_base + PMC_SW_RST);
+	}
+
+	/* Disable isolators */
+	if (pmu_dom->iso_mask) {
+		val = pmu_dom->iso_mask;
+		val |= readl_relaxed(pmu_base + PMU_ISO);
+		writel_relaxed(val, pmu_base + PMU_ISO);
+	}
+
+	spin_unlock_irqrestore(&pmu->lock, flags);
+
+	return 0;
+}
+
+static void __pmu_domain_register(struct pmu_domain *domain,
+	struct device_node *np)
+{
+	unsigned int val = readl_relaxed(domain->pmu->pmu_base + PMU_PWR);
+
+	domain->base.power_off = pmu_domain_power_off;
+	domain->base.power_on = pmu_domain_power_on;
+
+	pm_genpd_init(&domain->base, NULL, !(val & domain->pwr_mask));
+
+	if (np)
+		of_genpd_add_provider_simple(np, &domain->base);
+}
+
+/* PMU IRQ controller */
+static void pmu_irq_handler(unsigned int irq, struct irq_desc *desc)
+{
+	struct pmu_data *pmu = irq_get_handler_data(irq);
+	struct irq_chip_generic *gc = pmu->irq_gc;
+	struct irq_domain *domain = pmu->irq_domain;
+	void __iomem *base = gc->reg_base;
+	u32 stat = readl_relaxed(base + PMC_IRQ_CAUSE) & gc->mask_cache;
+	u32 done = ~0;
+
+	if (stat == 0) {
+		handle_bad_irq(irq, desc);
+		return;
+	}
+
+	while (stat) {
+		u32 hwirq = fls(stat) - 1;
+
+		stat &= ~(1 << hwirq);
+		done &= ~(1 << hwirq);
+
+		generic_handle_irq(irq_find_mapping(domain, hwirq));
+	}
+
+	/*
+	 * The PMU mask register is not RW0C: it is RW.  This means that
+	 * the bits take whatever value is written to them; if you write
+	 * a '1', you will set the interrupt.
+	 *
+	 * Unfortunately this means there is NO race free way to clear
+	 * these interrupts.
+	 *
+	 * So, let's structure the code so that the window is as small as
+	 * possible.
+	 */
+	irq_gc_lock(gc);
+	done &= readl_relaxed(base + PMC_IRQ_CAUSE);
+	writel_relaxed(done, base + PMC_IRQ_CAUSE);
+	irq_gc_unlock(gc);
+}
+
+static int __init dove_init_pmu_irq(struct pmu_data *pmu, int irq)
+{
+	const char *name = "pmu_irq";
+	struct irq_chip_generic *gc;
+	struct irq_domain *domain;
+	int ret;
+
+	/* mask and clear all interrupts */
+	writel(0, pmu->pmc_base + PMC_IRQ_MASK);
+	writel(0, pmu->pmc_base + PMC_IRQ_CAUSE);
+
+	domain = irq_domain_add_linear(pmu->of_node, NR_PMU_IRQS,
+				       &irq_generic_chip_ops, NULL);
+	if (!domain) {
+		pr_err("%s: unable to add irq domain\n", name);
+		return -ENOMEM;
+	}
+
+	ret = irq_alloc_domain_generic_chips(domain, NR_PMU_IRQS, 1, name,
+					     handle_level_irq,
+					     IRQ_NOREQUEST | IRQ_NOPROBE, 0,
+					     IRQ_GC_INIT_MASK_CACHE);
+	if (ret) {
+		pr_err("%s: unable to alloc irq domain gc: %d\n", name, ret);
+		irq_domain_remove(domain);
+		return ret;
+	}
+
+	gc = irq_get_domain_generic_chip(domain, 0);
+	gc->reg_base = pmu->pmc_base;
+	gc->chip_types[0].regs.mask = PMC_IRQ_MASK;
+	gc->chip_types[0].chip.irq_mask = irq_gc_mask_clr_bit;
+	gc->chip_types[0].chip.irq_unmask = irq_gc_mask_set_bit;
+
+	pmu->irq_domain = domain;
+	pmu->irq_gc = gc;
+
+	irq_set_handler_data(irq, pmu);
+	irq_set_chained_handler(irq, pmu_irq_handler);
+
+	return 0;
+}
+
+/*
+ * pmu: power-manager@d0000 {
+ *	compatible = "marvell,dove-pmu";
+ *	reg = <0xd0000 0x8000> <0xd8000 0x8000>;
+ *	interrupts = <33>;
+ *	interrupt-controller;
+ *	#reset-cells = 1;
+ *	vpu_domain: vpu-domain {
+ *		#power-domain-cells = <0>;
+ *		marvell,pmu_pwr_mask = <0x00000008>;
+ *		marvell,pmu_iso_mask = <0x00000001>;
+ *		resets = <&pmu 16>;
+ *	};
+ *	gpu_domain: gpu-domain {
+ *		#power-domain-cells = <0>;
+ *		marvell,pmu_pwr_mask = <0x00000004>;
+ *		marvell,pmu_iso_mask = <0x00000002>;
+ *		resets = <&pmu 18>;
+ *	};
+ * };
+ */
+int __init dove_init_pmu(void)
+{
+	struct device_node *np_pmu, *domains_node, *np;
+	struct pmu_data *pmu;
+	int ret, parent_irq;
+
+	/* Lookup the PMU node */
+	np_pmu = of_find_compatible_node(NULL, NULL, "marvell,dove-pmu");
+	if (!np_pmu)
+		return 0;
+
+	domains_node = of_get_child_by_name(np_pmu, "domains");
+	if (!domains_node) {
+		pr_err("%s: failed to find domains sub-node\n", np_pmu->name);
+		return 0;
+	}
+
+	pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
+	if (!pmu)
+		return -ENOMEM;
+
+	spin_lock_init(&pmu->lock);
+	pmu->of_node = np_pmu;
+	pmu->pmc_base = of_iomap(pmu->of_node, 0);
+	pmu->pmu_base = of_iomap(pmu->of_node, 1);
+	if (!pmu->pmc_base || !pmu->pmu_base) {
+		pr_err("%s: failed to map PMU\n", np_pmu->name);
+		iounmap(pmu->pmu_base);
+		iounmap(pmu->pmc_base);
+		kfree(pmu);
+		return -ENOMEM;
+	}
+
+	pmu_reset_init(pmu);
+
+	for_each_available_child_of_node(domains_node, np) {
+		struct of_phandle_args args;
+		struct pmu_domain *domain;
+
+		domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+		if (!domain)
+			break;
+
+		domain->pmu = pmu;
+		domain->base.name = kstrdup(np->name, GFP_KERNEL);
+		if (!domain->base.name) {
+			kfree(domain);
+			break;
+		}
+
+		of_property_read_u32(np, "marvell,pmu_pwr_mask",
+				     &domain->pwr_mask);
+		of_property_read_u32(np, "marvell,pmu_iso_mask",
+				     &domain->iso_mask);
+
+		/*
+		 * We parse the reset controller property directly here
+		 * to ensure that we can operate when the reset controller
+		 * support is not configured into the kernel.
+		 */
+		ret = of_parse_phandle_with_args(np, "resets", "#reset-cells",
+						 0, &args);
+		if (ret == 0) {
+			if (args.np == pmu->of_node)
+				domain->rst_mask = BIT(args.args[0]);
+			of_node_put(args.np);
+		}
+
+		__pmu_domain_register(domain, np);
+	}
+	pm_genpd_poweroff_unused();
+
+	/* Loss of the interrupt controller is not a fatal error. */
+	parent_irq = irq_of_parse_and_map(pmu->of_node, 0);
+	if (!parent_irq) {
+		pr_err("%s: no interrupt specified\n", np_pmu->name);
+	} else {
+		ret = dove_init_pmu_irq(pmu, parent_irq);
+		if (ret)
+			pr_err("dove_init_pmu_irq() failed: %d\n", ret);
+	}
+
+	return 0;
+}
diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 5eea374..ba47b70 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -13,7 +13,38 @@
 config QCOM_PM
 	bool "Qualcomm Power Management"
 	depends on ARCH_QCOM && !ARM64
+	select QCOM_SCM
 	help
 	  QCOM Platform specific power driver to manage cores and L2 low power
 	  modes. It interface with various system drivers to put the cores in
 	  low power modes.
+
+config QCOM_SMD
+	tristate "Qualcomm Shared Memory Driver (SMD)"
+	depends on QCOM_SMEM
+	help
+	  Say y here to enable support for the Qualcomm Shared Memory Driver
+	  providing communication channels to remote processors in Qualcomm
+	  platforms.
+
+config QCOM_SMD_RPM
+	tristate "Qualcomm Resource Power Manager (RPM) over SMD"
+	depends on QCOM_SMD && OF
+	help
+	  If you say yes to this option, support will be included for the
+	  Resource Power Manager system found in the Qualcomm 8974 based
+	  devices.
+
+	  This is required to access many regulators, clocks and bus
+	  frequencies controlled by the RPM on these devices.
+
+	  Say M here if you want to include support for the Qualcomm RPM as a
+	  module. This will build a module called "qcom-smd-rpm".
+
+config QCOM_SMEM
+	tristate "Qualcomm Shared Memory Manager (SMEM)"
+	depends on ARCH_QCOM
+	help
+	  Say y here to enable support for the Qualcomm Shared Memory Manager.
+	  The driver provides an interface to items in a heap shared among all
+	  processors in a Qualcomm platform.
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index 931d385..10a93d1 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -1,2 +1,5 @@
 obj-$(CONFIG_QCOM_GSBI)	+=	qcom_gsbi.o
 obj-$(CONFIG_QCOM_PM)	+=	spm.o
+obj-$(CONFIG_QCOM_SMD) +=	smd.o
+obj-$(CONFIG_QCOM_SMD_RPM)	+= smd-rpm.o
+obj-$(CONFIG_QCOM_SMEM) +=	smem.o
diff --git a/drivers/soc/qcom/smd-rpm.c b/drivers/soc/qcom/smd-rpm.c
new file mode 100644
index 0000000..1392ccf
--- /dev/null
+++ b/drivers/soc/qcom/smd-rpm.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2015, Sony Mobile Communications AB.
+ * Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+
+#include <linux/soc/qcom/smd.h>
+#include <linux/soc/qcom/smd-rpm.h>
+
+#define RPM_REQUEST_TIMEOUT     (5 * HZ)
+
+/**
+ * struct qcom_smd_rpm - state of the rpm device driver
+ * @rpm_channel:	reference to the smd channel
+ * @ack:		completion for acks
+ * @lock:		mutual exclusion around the send/complete pair
+ * @ack_status:		result of the rpm request
+ */
+struct qcom_smd_rpm {
+	struct qcom_smd_channel *rpm_channel;
+
+	struct completion ack;
+	struct mutex lock;
+	int ack_status;
+};
+
+/**
+ * struct qcom_rpm_header - header for all rpm requests and responses
+ * @service_type:	identifier of the service
+ * @length:		length of the payload
+ */
+struct qcom_rpm_header {
+	u32 service_type;
+	u32 length;
+};
+
+/**
+ * struct qcom_rpm_request - request message to the rpm
+ * @msg_id:	identifier of the outgoing message
+ * @flags:	active/sleep state flags
+ * @type:	resource type
+ * @id:		resource id
+ * @data_len:	length of the payload following this header
+ */
+struct qcom_rpm_request {
+	u32 msg_id;
+	u32 flags;
+	u32 type;
+	u32 id;
+	u32 data_len;
+};
+
+/**
+ * struct qcom_rpm_message - response message from the rpm
+ * @msg_type:	indicator of the type of message
+ * @length:	the size of this message, including the message header
+ * @msg_id:	message id
+ * @message:	textual message from the rpm
+ *
+ * Multiple of these messages can be stacked in an rpm message.
+ */
+struct qcom_rpm_message {
+	u32 msg_type;
+	u32 length;
+	union {
+		u32 msg_id;
+		u8 message[0];
+	};
+};
+
+#define RPM_SERVICE_TYPE_REQUEST	0x00716572 /* "req\0" */
+
+#define RPM_MSG_TYPE_ERR		0x00727265 /* "err\0" */
+#define RPM_MSG_TYPE_MSG_ID		0x2367736d /* "msg#" */
+
+/**
+ * qcom_rpm_smd_write - write @buf to @type:@id
+ * @rpm:	rpm handle
+ * @type:	resource type
+ * @id:		resource identifier
+ * @buf:	the data to be written
+ * @count:	number of bytes in @buf
+ */
+int qcom_rpm_smd_write(struct qcom_smd_rpm *rpm,
+		       int state,
+		       u32 type, u32 id,
+		       void *buf,
+		       size_t count)
+{
+	static unsigned msg_id = 1;
+	int left;
+	int ret;
+
+	struct {
+		struct qcom_rpm_header hdr;
+		struct qcom_rpm_request req;
+		u8 payload[count];
+	} pkt;
+
+	/* SMD packets to the RPM may not exceed 256 bytes */
+	if (WARN_ON(sizeof(pkt) >= 256))
+		return -EINVAL;
+
+	mutex_lock(&rpm->lock);
+
+	pkt.hdr.service_type = RPM_SERVICE_TYPE_REQUEST;
+	pkt.hdr.length = sizeof(struct qcom_rpm_request) + count;
+
+	pkt.req.msg_id = msg_id++;
+	pkt.req.flags = BIT(state);
+	pkt.req.type = type;
+	pkt.req.id = id;
+	pkt.req.data_len = count;
+	memcpy(pkt.payload, buf, count);
+
+	ret = qcom_smd_send(rpm->rpm_channel, &pkt, sizeof(pkt));
+	if (ret)
+		goto out;
+
+	left = wait_for_completion_timeout(&rpm->ack, RPM_REQUEST_TIMEOUT);
+	if (!left)
+		ret = -ETIMEDOUT;
+	else
+		ret = rpm->ack_status;
+
+out:
+	mutex_unlock(&rpm->lock);
+	return ret;
+}
+EXPORT_SYMBOL(qcom_rpm_smd_write);
+
+static int qcom_smd_rpm_callback(struct qcom_smd_device *qsdev,
+				 const void *data,
+				 size_t count)
+{
+	const struct qcom_rpm_header *hdr = data;
+	const struct qcom_rpm_message *msg;
+	struct qcom_smd_rpm *rpm = dev_get_drvdata(&qsdev->dev);
+	const u8 *buf = data + sizeof(struct qcom_rpm_header);
+	const u8 *end = buf + hdr->length;
+	char msgbuf[32];
+	int status = 0;
+	u32 len;
+
+	if (hdr->service_type != RPM_SERVICE_TYPE_REQUEST ||
+	    hdr->length < sizeof(struct qcom_rpm_message)) {
+		dev_err(&qsdev->dev, "invalid request\n");
+		return 0;
+	}
+
+	while (buf < end) {
+		msg = (struct qcom_rpm_message *)buf;
+		switch (msg->msg_type) {
+		case RPM_MSG_TYPE_MSG_ID:
+			break;
+		case RPM_MSG_TYPE_ERR:
+			len = min_t(u32, ALIGN(msg->length, 4), sizeof(msgbuf));
+			memcpy_fromio(msgbuf, msg->message, len);
+			msgbuf[len - 1] = 0;
+
+			if (!strcmp(msgbuf, "resource does not exist"))
+				status = -ENXIO;
+			else
+				status = -EINVAL;
+			break;
+		}
+
+		buf = PTR_ALIGN(buf + 2 * sizeof(u32) + msg->length, 4);
+	}
+
+	rpm->ack_status = status;
+	complete(&rpm->ack);
+	return 0;
+}
+
+static int qcom_smd_rpm_probe(struct qcom_smd_device *sdev)
+{
+	struct qcom_smd_rpm *rpm;
+
+	rpm = devm_kzalloc(&sdev->dev, sizeof(*rpm), GFP_KERNEL);
+	if (!rpm)
+		return -ENOMEM;
+
+	mutex_init(&rpm->lock);
+	init_completion(&rpm->ack);
+
+	rpm->rpm_channel = sdev->channel;
+
+	dev_set_drvdata(&sdev->dev, rpm);
+
+	return of_platform_populate(sdev->dev.of_node, NULL, NULL, &sdev->dev);
+}
+
+static void qcom_smd_rpm_remove(struct qcom_smd_device *sdev)
+{
+	of_platform_depopulate(&sdev->dev);
+}
+
+static const struct of_device_id qcom_smd_rpm_of_match[] = {
+	{ .compatible = "qcom,rpm-msm8974" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, qcom_smd_rpm_of_match);
+
+static struct qcom_smd_driver qcom_smd_rpm_driver = {
+	.probe = qcom_smd_rpm_probe,
+	.remove = qcom_smd_rpm_remove,
+	.callback = qcom_smd_rpm_callback,
+	.driver  = {
+		.name  = "qcom_smd_rpm",
+		.owner = THIS_MODULE,
+		.of_match_table = qcom_smd_rpm_of_match,
+	},
+};
+
+static int __init qcom_smd_rpm_init(void)
+{
+	return qcom_smd_driver_register(&qcom_smd_rpm_driver);
+}
+arch_initcall(qcom_smd_rpm_init);
+
+static void __exit qcom_smd_rpm_exit(void)
+{
+	qcom_smd_driver_unregister(&qcom_smd_rpm_driver);
+}
+module_exit(qcom_smd_rpm_exit);
+
+MODULE_AUTHOR("Bjorn Andersson <bjorn.andersson@sonymobile.com>");
+MODULE_DESCRIPTION("Qualcomm SMD backed RPM driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/soc/qcom/smd.c b/drivers/soc/qcom/smd.c
new file mode 100644
index 0000000..327adcf
--- /dev/null
+++ b/drivers/soc/qcom/smd.c
@@ -0,0 +1,1319 @@
+/*
+ * Copyright (c) 2015, Sony Mobile Communications AB.
+ * Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/soc/qcom/smd.h>
+#include <linux/soc/qcom/smem.h>
+#include <linux/wait.h>
+
+/*
+ * The Qualcomm Shared Memory communication solution provides point-to-point
+ * channels for clients to send and receive streaming or packet based data.
+ *
+ * Each channel consists of a control item (channel info) and a ring buffer
+ * pair. The channel info carry information related to channel state, flow
+ * control and the offsets within the ring buffer.
+ *
+ * All allocated channels are listed in an allocation table, identifying the
+ * pair of items by name, type and remote processor.
+ *
+ * Upon creating a new channel the remote processor allocates channel info and
+ * ring buffer items from the smem heap and populate the allocation table. An
+ * interrupt is sent to the other end of the channel and a scan for new
+ * channels should be done. A channel never goes away, it will only change
+ * state.
+ *
+ * The remote processor signals it intent for bring up the communication
+ * channel by setting the state of its end of the channel to "opening" and
+ * sends out an interrupt. We detect this change and register a smd device to
+ * consume the channel. Upon finding a consumer we finish the handshake and the
+ * channel is up.
+ *
+ * Upon closing a channel, the remote processor will update the state of its
+ * end of the channel and signal us, we will then unregister any attached
+ * device and close our end of the channel.
+ *
+ * Devices attached to a channel can use the qcom_smd_send function to push
+ * data to the channel, this is done by copying the data into the tx ring
+ * buffer, updating the pointers in the channel info and signaling the remote
+ * processor.
+ *
+ * The remote processor does the equivalent when it transfer data and upon
+ * receiving the interrupt we check the channel info for new data and delivers
+ * this to the attached device. If the device is not ready to receive the data
+ * we leave it in the ring buffer for now.
+ */
+
+struct smd_channel_info;
+struct smd_channel_info_word;
+
+#define SMD_ALLOC_TBL_COUNT	2
+#define SMD_ALLOC_TBL_SIZE	64
+
+/*
+ * This lists the various smem heap items relevant for the allocation table and
+ * smd channel entries.
+ */
+static const struct {
+	unsigned alloc_tbl_id;
+	unsigned info_base_id;
+	unsigned fifo_base_id;
+} smem_items[SMD_ALLOC_TBL_COUNT] = {
+	{
+		.alloc_tbl_id = 13,
+		.info_base_id = 14,
+		.fifo_base_id = 338
+	},
+	{
+		.alloc_tbl_id = 14,
+		.info_base_id = 266,
+		.fifo_base_id = 202,
+	},
+};
+
+/**
+ * struct qcom_smd_edge - representing a remote processor
+ * @smd:		handle to qcom_smd
+ * @of_node:		of_node handle for information related to this edge
+ * @edge_id:		identifier of this edge
+ * @irq:		interrupt for signals on this edge
+ * @ipc_regmap:		regmap handle holding the outgoing ipc register
+ * @ipc_offset:		offset within @ipc_regmap of the register for ipc
+ * @ipc_bit:		bit in the register at @ipc_offset of @ipc_regmap
+ * @channels:		list of all channels detected on this edge
+ * @channels_lock:	guard for modifications of @channels
+ * @allocated:		array of bitmaps representing already allocated channels
+ * @need_rescan:	flag that the @work needs to scan smem for new channels
+ * @smem_available:	last available amount of smem triggering a channel scan
+ * @work:		work item for edge house keeping
+ */
+struct qcom_smd_edge {
+	struct qcom_smd *smd;
+	struct device_node *of_node;
+	unsigned edge_id;
+
+	int irq;
+
+	struct regmap *ipc_regmap;
+	int ipc_offset;
+	int ipc_bit;
+
+	struct list_head channels;
+	spinlock_t channels_lock;
+
+	DECLARE_BITMAP(allocated[SMD_ALLOC_TBL_COUNT], SMD_ALLOC_TBL_SIZE);
+
+	bool need_rescan;
+	unsigned smem_available;
+
+	struct work_struct work;
+};
+
+/*
+ * SMD channel states.
+ */
+enum smd_channel_state {
+	SMD_CHANNEL_CLOSED,
+	SMD_CHANNEL_OPENING,
+	SMD_CHANNEL_OPENED,
+	SMD_CHANNEL_FLUSHING,
+	SMD_CHANNEL_CLOSING,
+	SMD_CHANNEL_RESET,
+	SMD_CHANNEL_RESET_OPENING
+};
+
+/**
+ * struct qcom_smd_channel - smd channel struct
+ * @edge:		qcom_smd_edge this channel is living on
+ * @qsdev:		reference to a associated smd client device
+ * @name:		name of the channel
+ * @state:		local state of the channel
+ * @remote_state:	remote state of the channel
+ * @tx_info:		byte aligned outgoing channel info
+ * @rx_info:		byte aligned incoming channel info
+ * @tx_info_word:	word aligned outgoing channel info
+ * @rx_info_word:	word aligned incoming channel info
+ * @tx_lock:		lock to make writes to the channel mutually exclusive
+ * @fblockread_event:	wakeup event tied to tx fBLOCKREADINTR
+ * @tx_fifo:		pointer to the outgoing ring buffer
+ * @rx_fifo:		pointer to the incoming ring buffer
+ * @fifo_size:		size of each ring buffer
+ * @bounce_buffer:	bounce buffer for reading wrapped packets
+ * @cb:			callback function registered for this channel
+ * @recv_lock:		guard for rx info modifications and cb pointer
+ * @pkt_size:		size of the currently handled packet
+ * @list:		lite entry for @channels in qcom_smd_edge
+ */
+struct qcom_smd_channel {
+	struct qcom_smd_edge *edge;
+
+	struct qcom_smd_device *qsdev;
+
+	char *name;
+	enum smd_channel_state state;
+	enum smd_channel_state remote_state;
+
+	struct smd_channel_info *tx_info;
+	struct smd_channel_info *rx_info;
+
+	struct smd_channel_info_word *tx_info_word;
+	struct smd_channel_info_word *rx_info_word;
+
+	struct mutex tx_lock;
+	wait_queue_head_t fblockread_event;
+
+	void *tx_fifo;
+	void *rx_fifo;
+	int fifo_size;
+
+	void *bounce_buffer;
+	int (*cb)(struct qcom_smd_device *, const void *, size_t);
+
+	spinlock_t recv_lock;
+
+	int pkt_size;
+
+	struct list_head list;
+};
+
+/**
+ * struct qcom_smd - smd struct
+ * @dev:	device struct
+ * @num_edges:	number of entries in @edges
+ * @edges:	array of edges to be handled
+ */
+struct qcom_smd {
+	struct device *dev;
+
+	unsigned num_edges;
+	struct qcom_smd_edge edges[0];
+};
+
+/*
+ * Format of the smd_info smem items, for byte aligned channels.
+ */
+struct smd_channel_info {
+	u32 state;
+	u8  fDSR;
+	u8  fCTS;
+	u8  fCD;
+	u8  fRI;
+	u8  fHEAD;
+	u8  fTAIL;
+	u8  fSTATE;
+	u8  fBLOCKREADINTR;
+	u32 tail;
+	u32 head;
+};
+
+/*
+ * Format of the smd_info smem items, for word aligned channels.
+ */
+struct smd_channel_info_word {
+	u32 state;
+	u32 fDSR;
+	u32 fCTS;
+	u32 fCD;
+	u32 fRI;
+	u32 fHEAD;
+	u32 fTAIL;
+	u32 fSTATE;
+	u32 fBLOCKREADINTR;
+	u32 tail;
+	u32 head;
+};
+
+#define GET_RX_CHANNEL_INFO(channel, param) \
+	(channel->rx_info_word ? \
+		channel->rx_info_word->param : \
+		channel->rx_info->param)
+
+#define SET_RX_CHANNEL_INFO(channel, param, value) \
+	(channel->rx_info_word ? \
+		(channel->rx_info_word->param = value) : \
+		(channel->rx_info->param = value))
+
+#define GET_TX_CHANNEL_INFO(channel, param) \
+	(channel->tx_info_word ? \
+		channel->tx_info_word->param : \
+		channel->tx_info->param)
+
+#define SET_TX_CHANNEL_INFO(channel, param, value) \
+	(channel->tx_info_word ? \
+		(channel->tx_info_word->param = value) : \
+		(channel->tx_info->param = value))
+
+/**
+ * struct qcom_smd_alloc_entry - channel allocation entry
+ * @name:	channel name
+ * @cid:	channel index
+ * @flags:	channel flags and edge id
+ * @ref_count:	reference count of the channel
+ */
+struct qcom_smd_alloc_entry {
+	u8 name[20];
+	u32 cid;
+	u32 flags;
+	u32 ref_count;
+} __packed;
+
+#define SMD_CHANNEL_FLAGS_EDGE_MASK	0xff
+#define SMD_CHANNEL_FLAGS_STREAM	BIT(8)
+#define SMD_CHANNEL_FLAGS_PACKET	BIT(9)
+
+/*
+ * Each smd packet contains a 20 byte header, with the first 4 being the length
+ * of the packet.
+ */
+#define SMD_PACKET_HEADER_LEN	20
+
+/*
+ * Signal the remote processor associated with 'channel'.
+ */
+static void qcom_smd_signal_channel(struct qcom_smd_channel *channel)
+{
+	struct qcom_smd_edge *edge = channel->edge;
+
+	regmap_write(edge->ipc_regmap, edge->ipc_offset, BIT(edge->ipc_bit));
+}
+
+/*
+ * Initialize the tx channel info
+ */
+static void qcom_smd_channel_reset(struct qcom_smd_channel *channel)
+{
+	SET_TX_CHANNEL_INFO(channel, state, SMD_CHANNEL_CLOSED);
+	SET_TX_CHANNEL_INFO(channel, fDSR, 0);
+	SET_TX_CHANNEL_INFO(channel, fCTS, 0);
+	SET_TX_CHANNEL_INFO(channel, fCD, 0);
+	SET_TX_CHANNEL_INFO(channel, fRI, 0);
+	SET_TX_CHANNEL_INFO(channel, fHEAD, 0);
+	SET_TX_CHANNEL_INFO(channel, fTAIL, 0);
+	SET_TX_CHANNEL_INFO(channel, fSTATE, 1);
+	SET_TX_CHANNEL_INFO(channel, fBLOCKREADINTR, 0);
+	SET_TX_CHANNEL_INFO(channel, head, 0);
+	SET_TX_CHANNEL_INFO(channel, tail, 0);
+
+	qcom_smd_signal_channel(channel);
+
+	channel->state = SMD_CHANNEL_CLOSED;
+	channel->pkt_size = 0;
+}
+
+/*
+ * Calculate the amount of data available in the rx fifo
+ */
+static size_t qcom_smd_channel_get_rx_avail(struct qcom_smd_channel *channel)
+{
+	unsigned head;
+	unsigned tail;
+
+	head = GET_RX_CHANNEL_INFO(channel, head);
+	tail = GET_RX_CHANNEL_INFO(channel, tail);
+
+	return (head - tail) & (channel->fifo_size - 1);
+}
+
+/*
+ * Set tx channel state and inform the remote processor
+ */
+static void qcom_smd_channel_set_state(struct qcom_smd_channel *channel,
+				       int state)
+{
+	struct qcom_smd_edge *edge = channel->edge;
+	bool is_open = state == SMD_CHANNEL_OPENED;
+
+	if (channel->state == state)
+		return;
+
+	dev_dbg(edge->smd->dev, "set_state(%s, %d)\n", channel->name, state);
+
+	SET_TX_CHANNEL_INFO(channel, fDSR, is_open);
+	SET_TX_CHANNEL_INFO(channel, fCTS, is_open);
+	SET_TX_CHANNEL_INFO(channel, fCD, is_open);
+
+	SET_TX_CHANNEL_INFO(channel, state, state);
+	SET_TX_CHANNEL_INFO(channel, fSTATE, 1);
+
+	channel->state = state;
+	qcom_smd_signal_channel(channel);
+}
+
+/*
+ * Copy count bytes of data using 32bit accesses, if that's required.
+ */
+static void smd_copy_to_fifo(void __iomem *_dst,
+			     const void *_src,
+			     size_t count,
+			     bool word_aligned)
+{
+	u32 *dst = (u32 *)_dst;
+	u32 *src = (u32 *)_src;
+
+	if (word_aligned) {
+		count /= sizeof(u32);
+		while (count--)
+			writel_relaxed(*src++, dst++);
+	} else {
+		memcpy_toio(_dst, _src, count);
+	}
+}
+
+/*
+ * Copy count bytes of data using 32bit accesses, if that is required.
+ */
+static void smd_copy_from_fifo(void *_dst,
+			       const void __iomem *_src,
+			       size_t count,
+			       bool word_aligned)
+{
+	u32 *dst = (u32 *)_dst;
+	u32 *src = (u32 *)_src;
+
+	if (word_aligned) {
+		count /= sizeof(u32);
+		while (count--)
+			*dst++ = readl_relaxed(src++);
+	} else {
+		memcpy_fromio(_dst, _src, count);
+	}
+}
+
+/*
+ * Read count bytes of data from the rx fifo into buf, but don't advance the
+ * tail.
+ */
+static size_t qcom_smd_channel_peek(struct qcom_smd_channel *channel,
+				    void *buf, size_t count)
+{
+	bool word_aligned;
+	unsigned tail;
+	size_t len;
+
+	word_aligned = channel->rx_info_word != NULL;
+	tail = GET_RX_CHANNEL_INFO(channel, tail);
+
+	len = min_t(size_t, count, channel->fifo_size - tail);
+	if (len) {
+		smd_copy_from_fifo(buf,
+				   channel->rx_fifo + tail,
+				   len,
+				   word_aligned);
+	}
+
+	if (len != count) {
+		smd_copy_from_fifo(buf + len,
+				   channel->rx_fifo,
+				   count - len,
+				   word_aligned);
+	}
+
+	return count;
+}
+
+/*
+ * Advance the rx tail by count bytes.
+ */
+static void qcom_smd_channel_advance(struct qcom_smd_channel *channel,
+				     size_t count)
+{
+	unsigned tail;
+
+	tail = GET_RX_CHANNEL_INFO(channel, tail);
+	tail += count;
+	tail &= (channel->fifo_size - 1);
+	SET_RX_CHANNEL_INFO(channel, tail, tail);
+}
+
+/*
+ * Read out a single packet from the rx fifo and deliver it to the device
+ */
+static int qcom_smd_channel_recv_single(struct qcom_smd_channel *channel)
+{
+	struct qcom_smd_device *qsdev = channel->qsdev;
+	unsigned tail;
+	size_t len;
+	void *ptr;
+	int ret;
+
+	if (!channel->cb)
+		return 0;
+
+	tail = GET_RX_CHANNEL_INFO(channel, tail);
+
+	/* Use bounce buffer if the data wraps */
+	if (tail + channel->pkt_size >= channel->fifo_size) {
+		ptr = channel->bounce_buffer;
+		len = qcom_smd_channel_peek(channel, ptr, channel->pkt_size);
+	} else {
+		ptr = channel->rx_fifo + tail;
+		len = channel->pkt_size;
+	}
+
+	ret = channel->cb(qsdev, ptr, len);
+	if (ret < 0)
+		return ret;
+
+	/* Only forward the tail if the client consumed the data */
+	qcom_smd_channel_advance(channel, len);
+
+	channel->pkt_size = 0;
+
+	return 0;
+}
+
+/*
+ * Per channel interrupt handling
+ */
+static bool qcom_smd_channel_intr(struct qcom_smd_channel *channel)
+{
+	bool need_state_scan = false;
+	int remote_state;
+	u32 pktlen;
+	int avail;
+	int ret;
+
+	/* Handle state changes */
+	remote_state = GET_RX_CHANNEL_INFO(channel, state);
+	if (remote_state != channel->remote_state) {
+		channel->remote_state = remote_state;
+		need_state_scan = true;
+	}
+	/* Indicate that we have seen any state change */
+	SET_RX_CHANNEL_INFO(channel, fSTATE, 0);
+
+	/* Signal waiting qcom_smd_send() about the interrupt */
+	if (!GET_TX_CHANNEL_INFO(channel, fBLOCKREADINTR))
+		wake_up_interruptible(&channel->fblockread_event);
+
+	/* Don't consume any data until we've opened the channel */
+	if (channel->state != SMD_CHANNEL_OPENED)
+		goto out;
+
+	/* Indicate that we've seen the new data */
+	SET_RX_CHANNEL_INFO(channel, fHEAD, 0);
+
+	/* Consume data */
+	for (;;) {
+		avail = qcom_smd_channel_get_rx_avail(channel);
+
+		if (!channel->pkt_size && avail >= SMD_PACKET_HEADER_LEN) {
+			qcom_smd_channel_peek(channel, &pktlen, sizeof(pktlen));
+			qcom_smd_channel_advance(channel, SMD_PACKET_HEADER_LEN);
+			channel->pkt_size = pktlen;
+		} else if (channel->pkt_size && avail >= channel->pkt_size) {
+			ret = qcom_smd_channel_recv_single(channel);
+			if (ret)
+				break;
+		} else {
+			break;
+		}
+	}
+
+	/* Indicate that we have seen and updated tail */
+	SET_RX_CHANNEL_INFO(channel, fTAIL, 1);
+
+	/* Signal the remote that we've consumed the data (if requested) */
+	if (!GET_RX_CHANNEL_INFO(channel, fBLOCKREADINTR)) {
+		/* Ensure ordering of channel info updates */
+		wmb();
+
+		qcom_smd_signal_channel(channel);
+	}
+
+out:
+	return need_state_scan;
+}
+
+/*
+ * The edge interrupts are triggered by the remote processor on state changes,
+ * channel info updates or when new channels are created.
+ */
+static irqreturn_t qcom_smd_edge_intr(int irq, void *data)
+{
+	struct qcom_smd_edge *edge = data;
+	struct qcom_smd_channel *channel;
+	unsigned available;
+	bool kick_worker = false;
+
+	/*
+	 * Handle state changes or data on each of the channels on this edge
+	 */
+	spin_lock(&edge->channels_lock);
+	list_for_each_entry(channel, &edge->channels, list) {
+		spin_lock(&channel->recv_lock);
+		kick_worker |= qcom_smd_channel_intr(channel);
+		spin_unlock(&channel->recv_lock);
+	}
+	spin_unlock(&edge->channels_lock);
+
+	/*
+	 * Creating a new channel requires allocating an smem entry, so we only
+	 * have to scan if the amount of available space in smem have changed
+	 * since last scan.
+	 */
+	available = qcom_smem_get_free_space(edge->edge_id);
+	if (available != edge->smem_available) {
+		edge->smem_available = available;
+		edge->need_rescan = true;
+		kick_worker = true;
+	}
+
+	if (kick_worker)
+		schedule_work(&edge->work);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Delivers any outstanding packets in the rx fifo, can be used after probe of
+ * the clients to deliver any packets that wasn't delivered before the client
+ * was setup.
+ */
+static void qcom_smd_channel_resume(struct qcom_smd_channel *channel)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&channel->recv_lock, flags);
+	qcom_smd_channel_intr(channel);
+	spin_unlock_irqrestore(&channel->recv_lock, flags);
+}
+
+/*
+ * Calculate how much space is available in the tx fifo.
+ */
+static size_t qcom_smd_get_tx_avail(struct qcom_smd_channel *channel)
+{
+	unsigned head;
+	unsigned tail;
+	unsigned mask = channel->fifo_size - 1;
+
+	head = GET_TX_CHANNEL_INFO(channel, head);
+	tail = GET_TX_CHANNEL_INFO(channel, tail);
+
+	return mask - ((head - tail) & mask);
+}
+
+/*
+ * Write count bytes of data into channel, possibly wrapping in the ring buffer
+ */
+static int qcom_smd_write_fifo(struct qcom_smd_channel *channel,
+			       const void *data,
+			       size_t count)
+{
+	bool word_aligned;
+	unsigned head;
+	size_t len;
+
+	word_aligned = channel->tx_info_word != NULL;
+	head = GET_TX_CHANNEL_INFO(channel, head);
+
+	len = min_t(size_t, count, channel->fifo_size - head);
+	if (len) {
+		smd_copy_to_fifo(channel->tx_fifo + head,
+				 data,
+				 len,
+				 word_aligned);
+	}
+
+	if (len != count) {
+		smd_copy_to_fifo(channel->tx_fifo,
+				 data + len,
+				 count - len,
+				 word_aligned);
+	}
+
+	head += count;
+	head &= (channel->fifo_size - 1);
+	SET_TX_CHANNEL_INFO(channel, head, head);
+
+	return count;
+}
+
+/**
+ * qcom_smd_send - write data to smd channel
+ * @channel:	channel handle
+ * @data:	buffer of data to write
+ * @len:	number of bytes to write
+ *
+ * This is a blocking write of len bytes into the channel's tx ring buffer and
+ * signal the remote end. It will sleep until there is enough space available
+ * in the tx buffer, utilizing the fBLOCKREADINTR signaling mechanism to avoid
+ * polling.
+ */
+int qcom_smd_send(struct qcom_smd_channel *channel, const void *data, int len)
+{
+	u32 hdr[5] = {len,};
+	int tlen = sizeof(hdr) + len;
+	int ret;
+
+	/* Word aligned channels only accept word size aligned data */
+	if (channel->rx_info_word != NULL && len % 4)
+		return -EINVAL;
+
+	ret = mutex_lock_interruptible(&channel->tx_lock);
+	if (ret)
+		return ret;
+
+	while (qcom_smd_get_tx_avail(channel) < tlen) {
+		if (channel->state != SMD_CHANNEL_OPENED) {
+			ret = -EPIPE;
+			goto out;
+		}
+
+		SET_TX_CHANNEL_INFO(channel, fBLOCKREADINTR, 1);
+
+		ret = wait_event_interruptible(channel->fblockread_event,
+				       qcom_smd_get_tx_avail(channel) >= tlen ||
+				       channel->state != SMD_CHANNEL_OPENED);
+		if (ret)
+			goto out;
+
+		SET_TX_CHANNEL_INFO(channel, fBLOCKREADINTR, 0);
+	}
+
+	SET_TX_CHANNEL_INFO(channel, fTAIL, 0);
+
+	qcom_smd_write_fifo(channel, hdr, sizeof(hdr));
+	qcom_smd_write_fifo(channel, data, len);
+
+	SET_TX_CHANNEL_INFO(channel, fHEAD, 1);
+
+	/* Ensure ordering of channel info updates */
+	wmb();
+
+	qcom_smd_signal_channel(channel);
+
+out:
+	mutex_unlock(&channel->tx_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(qcom_smd_send);
+
+static struct qcom_smd_device *to_smd_device(struct device *dev)
+{
+	return container_of(dev, struct qcom_smd_device, dev);
+}
+
+static struct qcom_smd_driver *to_smd_driver(struct device *dev)
+{
+	struct qcom_smd_device *qsdev = to_smd_device(dev);
+
+	return container_of(qsdev->dev.driver, struct qcom_smd_driver, driver);
+}
+
+static int qcom_smd_dev_match(struct device *dev, struct device_driver *drv)
+{
+	return of_driver_match_device(dev, drv);
+}
+
+/*
+ * Probe the smd client.
+ *
+ * The remote side have indicated that it want the channel to be opened, so
+ * complete the state handshake and probe our client driver.
+ */
+static int qcom_smd_dev_probe(struct device *dev)
+{
+	struct qcom_smd_device *qsdev = to_smd_device(dev);
+	struct qcom_smd_driver *qsdrv = to_smd_driver(dev);
+	struct qcom_smd_channel *channel = qsdev->channel;
+	size_t bb_size;
+	int ret;
+
+	/*
+	 * Packets are maximum 4k, but reduce if the fifo is smaller
+	 */
+	bb_size = min(channel->fifo_size, SZ_4K);
+	channel->bounce_buffer = kmalloc(bb_size, GFP_KERNEL);
+	if (!channel->bounce_buffer)
+		return -ENOMEM;
+
+	channel->cb = qsdrv->callback;
+
+	qcom_smd_channel_set_state(channel, SMD_CHANNEL_OPENING);
+
+	qcom_smd_channel_set_state(channel, SMD_CHANNEL_OPENED);
+
+	ret = qsdrv->probe(qsdev);
+	if (ret)
+		goto err;
+
+	qcom_smd_channel_resume(channel);
+
+	return 0;
+
+err:
+	dev_err(&qsdev->dev, "probe failed\n");
+
+	channel->cb = NULL;
+	kfree(channel->bounce_buffer);
+	channel->bounce_buffer = NULL;
+
+	qcom_smd_channel_set_state(channel, SMD_CHANNEL_CLOSED);
+	return ret;
+}
+
+/*
+ * Remove the smd client.
+ *
+ * The channel is going away, for some reason, so remove the smd client and
+ * reset the channel state.
+ */
+static int qcom_smd_dev_remove(struct device *dev)
+{
+	struct qcom_smd_device *qsdev = to_smd_device(dev);
+	struct qcom_smd_driver *qsdrv = to_smd_driver(dev);
+	struct qcom_smd_channel *channel = qsdev->channel;
+	unsigned long flags;
+
+	qcom_smd_channel_set_state(channel, SMD_CHANNEL_CLOSING);
+
+	/*
+	 * Make sure we don't race with the code receiving data.
+	 */
+	spin_lock_irqsave(&channel->recv_lock, flags);
+	channel->cb = NULL;
+	spin_unlock_irqrestore(&channel->recv_lock, flags);
+
+	/* Wake up any sleepers in qcom_smd_send() */
+	wake_up_interruptible(&channel->fblockread_event);
+
+	/*
+	 * We expect that the client might block in remove() waiting for any
+	 * outstanding calls to qcom_smd_send() to wake up and finish.
+	 */
+	if (qsdrv->remove)
+		qsdrv->remove(qsdev);
+
+	/*
+	 * The client is now gone, cleanup and reset the channel state.
+	 */
+	channel->qsdev = NULL;
+	kfree(channel->bounce_buffer);
+	channel->bounce_buffer = NULL;
+
+	qcom_smd_channel_set_state(channel, SMD_CHANNEL_CLOSED);
+
+	qcom_smd_channel_reset(channel);
+
+	return 0;
+}
+
+static struct bus_type qcom_smd_bus = {
+	.name = "qcom_smd",
+	.match = qcom_smd_dev_match,
+	.probe = qcom_smd_dev_probe,
+	.remove = qcom_smd_dev_remove,
+};
+
+/*
+ * Release function for the qcom_smd_device object.
+ */
+static void qcom_smd_release_device(struct device *dev)
+{
+	struct qcom_smd_device *qsdev = to_smd_device(dev);
+
+	kfree(qsdev);
+}
+
+/*
+ * Finds the device_node for the smd child interested in this channel.
+ */
+static struct device_node *qcom_smd_match_channel(struct device_node *edge_node,
+						  const char *channel)
+{
+	struct device_node *child;
+	const char *name;
+	const char *key;
+	int ret;
+
+	for_each_available_child_of_node(edge_node, child) {
+		key = "qcom,smd-channels";
+		ret = of_property_read_string(child, key, &name);
+		if (ret) {
+			of_node_put(child);
+			continue;
+		}
+
+		if (strcmp(name, channel) == 0)
+			return child;
+	}
+
+	return NULL;
+}
+
+/*
+ * Create a smd client device for channel that is being opened.
+ */
+static int qcom_smd_create_device(struct qcom_smd_channel *channel)
+{
+	struct qcom_smd_device *qsdev;
+	struct qcom_smd_edge *edge = channel->edge;
+	struct device_node *node;
+	struct qcom_smd *smd = edge->smd;
+	int ret;
+
+	if (channel->qsdev)
+		return -EEXIST;
+
+	node = qcom_smd_match_channel(edge->of_node, channel->name);
+	if (!node) {
+		dev_dbg(smd->dev, "no match for '%s'\n", channel->name);
+		return -ENXIO;
+	}
+
+	dev_dbg(smd->dev, "registering '%s'\n", channel->name);
+
+	qsdev = kzalloc(sizeof(*qsdev), GFP_KERNEL);
+	if (!qsdev)
+		return -ENOMEM;
+
+	dev_set_name(&qsdev->dev, "%s.%s", edge->of_node->name, node->name);
+	qsdev->dev.parent = smd->dev;
+	qsdev->dev.bus = &qcom_smd_bus;
+	qsdev->dev.release = qcom_smd_release_device;
+	qsdev->dev.of_node = node;
+
+	qsdev->channel = channel;
+
+	channel->qsdev = qsdev;
+
+	ret = device_register(&qsdev->dev);
+	if (ret) {
+		dev_err(smd->dev, "device_register failed: %d\n", ret);
+		put_device(&qsdev->dev);
+	}
+
+	return ret;
+}
+
+/*
+ * Destroy a smd client device for a channel that's going away.
+ */
+static void qcom_smd_destroy_device(struct qcom_smd_channel *channel)
+{
+	struct device *dev;
+
+	BUG_ON(!channel->qsdev);
+
+	dev = &channel->qsdev->dev;
+
+	device_unregister(dev);
+	of_node_put(dev->of_node);
+	put_device(dev);
+}
+
+/**
+ * qcom_smd_driver_register - register a smd driver
+ * @qsdrv:	qcom_smd_driver struct
+ */
+int qcom_smd_driver_register(struct qcom_smd_driver *qsdrv)
+{
+	qsdrv->driver.bus = &qcom_smd_bus;
+	return driver_register(&qsdrv->driver);
+}
+EXPORT_SYMBOL(qcom_smd_driver_register);
+
+/**
+ * qcom_smd_driver_unregister - unregister a smd driver
+ * @qsdrv:	qcom_smd_driver struct
+ */
+void qcom_smd_driver_unregister(struct qcom_smd_driver *qsdrv)
+{
+	driver_unregister(&qsdrv->driver);
+}
+EXPORT_SYMBOL(qcom_smd_driver_unregister);
+
+/*
+ * Allocate the qcom_smd_channel object for a newly found smd channel,
+ * retrieving and validating the smem items involved.
+ */
+static struct qcom_smd_channel *qcom_smd_create_channel(struct qcom_smd_edge *edge,
+							unsigned smem_info_item,
+							unsigned smem_fifo_item,
+							char *name)
+{
+	struct qcom_smd_channel *channel;
+	struct qcom_smd *smd = edge->smd;
+	size_t fifo_size;
+	size_t info_size;
+	void *fifo_base;
+	void *info;
+	int ret;
+
+	channel = devm_kzalloc(smd->dev, sizeof(*channel), GFP_KERNEL);
+	if (!channel)
+		return ERR_PTR(-ENOMEM);
+
+	channel->edge = edge;
+	channel->name = devm_kstrdup(smd->dev, name, GFP_KERNEL);
+	if (!channel->name)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_init(&channel->tx_lock);
+	spin_lock_init(&channel->recv_lock);
+	init_waitqueue_head(&channel->fblockread_event);
+
+	ret = qcom_smem_get(edge->edge_id, smem_info_item, (void **)&info, &info_size);
+	if (ret)
+		goto free_name_and_channel;
+
+	/*
+	 * Use the size of the item to figure out which channel info struct to
+	 * use.
+	 */
+	if (info_size == 2 * sizeof(struct smd_channel_info_word)) {
+		channel->tx_info_word = info;
+		channel->rx_info_word = info + sizeof(struct smd_channel_info_word);
+	} else if (info_size == 2 * sizeof(struct smd_channel_info)) {
+		channel->tx_info = info;
+		channel->rx_info = info + sizeof(struct smd_channel_info);
+	} else {
+		dev_err(smd->dev,
+			"channel info of size %zu not supported\n", info_size);
+		ret = -EINVAL;
+		goto free_name_and_channel;
+	}
+
+	ret = qcom_smem_get(edge->edge_id, smem_fifo_item, &fifo_base, &fifo_size);
+	if (ret)
+		goto free_name_and_channel;
+
+	/* The channel consist of a rx and tx fifo of equal size */
+	fifo_size /= 2;
+
+	dev_dbg(smd->dev, "new channel '%s' info-size: %zu fifo-size: %zu\n",
+			  name, info_size, fifo_size);
+
+	channel->tx_fifo = fifo_base;
+	channel->rx_fifo = fifo_base + fifo_size;
+	channel->fifo_size = fifo_size;
+
+	qcom_smd_channel_reset(channel);
+
+	return channel;
+
+free_name_and_channel:
+	devm_kfree(smd->dev, channel->name);
+	devm_kfree(smd->dev, channel);
+
+	return ERR_PTR(ret);
+}
+
+/*
+ * Scans the allocation table for any newly allocated channels, calls
+ * qcom_smd_create_channel() to create representations of these and add
+ * them to the edge's list of channels.
+ */
+static void qcom_discover_channels(struct qcom_smd_edge *edge)
+{
+	struct qcom_smd_alloc_entry *alloc_tbl;
+	struct qcom_smd_alloc_entry *entry;
+	struct qcom_smd_channel *channel;
+	struct qcom_smd *smd = edge->smd;
+	unsigned long flags;
+	unsigned fifo_id;
+	unsigned info_id;
+	int ret;
+	int tbl;
+	int i;
+
+	for (tbl = 0; tbl < SMD_ALLOC_TBL_COUNT; tbl++) {
+		ret = qcom_smem_get(edge->edge_id,
+				    smem_items[tbl].alloc_tbl_id,
+				    (void **)&alloc_tbl,
+				    NULL);
+		if (ret < 0)
+			continue;
+
+		for (i = 0; i < SMD_ALLOC_TBL_SIZE; i++) {
+			entry = &alloc_tbl[i];
+			if (test_bit(i, edge->allocated[tbl]))
+				continue;
+
+			if (entry->ref_count == 0)
+				continue;
+
+			if (!entry->name[0])
+				continue;
+
+			if (!(entry->flags & SMD_CHANNEL_FLAGS_PACKET))
+				continue;
+
+			if ((entry->flags & SMD_CHANNEL_FLAGS_EDGE_MASK) != edge->edge_id)
+				continue;
+
+			info_id = smem_items[tbl].info_base_id + entry->cid;
+			fifo_id = smem_items[tbl].fifo_base_id + entry->cid;
+
+			channel = qcom_smd_create_channel(edge, info_id, fifo_id, entry->name);
+			if (IS_ERR(channel))
+				continue;
+
+			spin_lock_irqsave(&edge->channels_lock, flags);
+			list_add(&channel->list, &edge->channels);
+			spin_unlock_irqrestore(&edge->channels_lock, flags);
+
+			dev_dbg(smd->dev, "new channel found: '%s'\n", channel->name);
+			set_bit(i, edge->allocated[tbl]);
+		}
+	}
+
+	schedule_work(&edge->work);
+}
+
+/*
+ * This per edge worker scans smem for any new channels and register these. It
+ * then scans all registered channels for state changes that should be handled
+ * by creating or destroying smd client devices for the registered channels.
+ *
+ * LOCKING: edge->channels_lock is not needed to be held during the traversal
+ * of the channels list as it's done synchronously with the only writer.
+ */
+static void qcom_channel_state_worker(struct work_struct *work)
+{
+	struct qcom_smd_channel *channel;
+	struct qcom_smd_edge *edge = container_of(work,
+						  struct qcom_smd_edge,
+						  work);
+	unsigned remote_state;
+
+	/*
+	 * Rescan smem if we have reason to belive that there are new channels.
+	 */
+	if (edge->need_rescan) {
+		edge->need_rescan = false;
+		qcom_discover_channels(edge);
+	}
+
+	/*
+	 * Register a device for any closed channel where the remote processor
+	 * is showing interest in opening the channel.
+	 */
+	list_for_each_entry(channel, &edge->channels, list) {
+		if (channel->state != SMD_CHANNEL_CLOSED)
+			continue;
+
+		remote_state = GET_RX_CHANNEL_INFO(channel, state);
+		if (remote_state != SMD_CHANNEL_OPENING &&
+		    remote_state != SMD_CHANNEL_OPENED)
+			continue;
+
+		qcom_smd_create_device(channel);
+	}
+
+	/*
+	 * Unregister the device for any channel that is opened where the
+	 * remote processor is closing the channel.
+	 */
+	list_for_each_entry(channel, &edge->channels, list) {
+		if (channel->state != SMD_CHANNEL_OPENING &&
+		    channel->state != SMD_CHANNEL_OPENED)
+			continue;
+
+		remote_state = GET_RX_CHANNEL_INFO(channel, state);
+		if (remote_state == SMD_CHANNEL_OPENING ||
+		    remote_state == SMD_CHANNEL_OPENED)
+			continue;
+
+		qcom_smd_destroy_device(channel);
+	}
+}
+
+/*
+ * Parses an of_node describing an edge.
+ */
+static int qcom_smd_parse_edge(struct device *dev,
+			       struct device_node *node,
+			       struct qcom_smd_edge *edge)
+{
+	struct device_node *syscon_np;
+	const char *key;
+	int irq;
+	int ret;
+
+	INIT_LIST_HEAD(&edge->channels);
+	spin_lock_init(&edge->channels_lock);
+
+	INIT_WORK(&edge->work, qcom_channel_state_worker);
+
+	edge->of_node = of_node_get(node);
+
+	irq = irq_of_parse_and_map(node, 0);
+	if (irq < 0) {
+		dev_err(dev, "required smd interrupt missing\n");
+		return -EINVAL;
+	}
+
+	ret = devm_request_irq(dev, irq,
+			       qcom_smd_edge_intr, IRQF_TRIGGER_RISING,
+			       node->name, edge);
+	if (ret) {
+		dev_err(dev, "failed to request smd irq\n");
+		return ret;
+	}
+
+	edge->irq = irq;
+
+	key = "qcom,smd-edge";
+	ret = of_property_read_u32(node, key, &edge->edge_id);
+	if (ret) {
+		dev_err(dev, "edge missing %s property\n", key);
+		return -EINVAL;
+	}
+
+	syscon_np = of_parse_phandle(node, "qcom,ipc", 0);
+	if (!syscon_np) {
+		dev_err(dev, "no qcom,ipc node\n");
+		return -ENODEV;
+	}
+
+	edge->ipc_regmap = syscon_node_to_regmap(syscon_np);
+	if (IS_ERR(edge->ipc_regmap))
+		return PTR_ERR(edge->ipc_regmap);
+
+	key = "qcom,ipc";
+	ret = of_property_read_u32_index(node, key, 1, &edge->ipc_offset);
+	if (ret < 0) {
+		dev_err(dev, "no offset in %s\n", key);
+		return -EINVAL;
+	}
+
+	ret = of_property_read_u32_index(node, key, 2, &edge->ipc_bit);
+	if (ret < 0) {
+		dev_err(dev, "no bit in %s\n", key);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int qcom_smd_probe(struct platform_device *pdev)
+{
+	struct qcom_smd_edge *edge;
+	struct device_node *node;
+	struct qcom_smd *smd;
+	size_t array_size;
+	int num_edges;
+	int ret;
+	int i = 0;
+
+	/* Wait for smem */
+	ret = qcom_smem_get(QCOM_SMEM_HOST_ANY, smem_items[0].alloc_tbl_id, NULL, NULL);
+	if (ret == -EPROBE_DEFER)
+		return ret;
+
+	num_edges = of_get_available_child_count(pdev->dev.of_node);
+	array_size = sizeof(*smd) + num_edges * sizeof(struct qcom_smd_edge);
+	smd = devm_kzalloc(&pdev->dev, array_size, GFP_KERNEL);
+	if (!smd)
+		return -ENOMEM;
+	smd->dev = &pdev->dev;
+
+	smd->num_edges = num_edges;
+	for_each_available_child_of_node(pdev->dev.of_node, node) {
+		edge = &smd->edges[i++];
+		edge->smd = smd;
+
+		ret = qcom_smd_parse_edge(&pdev->dev, node, edge);
+		if (ret)
+			continue;
+
+		edge->need_rescan = true;
+		schedule_work(&edge->work);
+	}
+
+	platform_set_drvdata(pdev, smd);
+
+	return 0;
+}
+
+/*
+ * Shut down all smd clients by making sure that each edge stops processing
+ * events and scanning for new channels, then call destroy on the devices.
+ */
+static int qcom_smd_remove(struct platform_device *pdev)
+{
+	struct qcom_smd_channel *channel;
+	struct qcom_smd_edge *edge;
+	struct qcom_smd *smd = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < smd->num_edges; i++) {
+		edge = &smd->edges[i];
+
+		disable_irq(edge->irq);
+		cancel_work_sync(&edge->work);
+
+		list_for_each_entry(channel, &edge->channels, list) {
+			if (!channel->qsdev)
+				continue;
+
+			qcom_smd_destroy_device(channel);
+		}
+	}
+
+	return 0;
+}
+
+static const struct of_device_id qcom_smd_of_match[] = {
+	{ .compatible = "qcom,smd" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, qcom_smd_of_match);
+
+static struct platform_driver qcom_smd_driver = {
+	.probe = qcom_smd_probe,
+	.remove = qcom_smd_remove,
+	.driver = {
+		.name = "qcom-smd",
+		.of_match_table = qcom_smd_of_match,
+	},
+};
+
+static int __init qcom_smd_init(void)
+{
+	int ret;
+
+	ret = bus_register(&qcom_smd_bus);
+	if (ret) {
+		pr_err("failed to register smd bus: %d\n", ret);
+		return ret;
+	}
+
+	return platform_driver_register(&qcom_smd_driver);
+}
+postcore_initcall(qcom_smd_init);
+
+static void __exit qcom_smd_exit(void)
+{
+	platform_driver_unregister(&qcom_smd_driver);
+	bus_unregister(&qcom_smd_bus);
+}
+module_exit(qcom_smd_exit);
+
+MODULE_AUTHOR("Bjorn Andersson <bjorn.andersson@sonymobile.com>");
+MODULE_DESCRIPTION("Qualcomm Shared Memory Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/soc/qcom/smem.c b/drivers/soc/qcom/smem.c
new file mode 100644
index 0000000..7c2c324c
--- /dev/null
+++ b/drivers/soc/qcom/smem.c
@@ -0,0 +1,775 @@
+/*
+ * Copyright (c) 2015, Sony Mobile Communications AB.
+ * Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/hwspinlock.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/soc/qcom/smem.h>
+
+/*
+ * The Qualcomm shared memory system is a allocate only heap structure that
+ * consists of one of more memory areas that can be accessed by the processors
+ * in the SoC.
+ *
+ * All systems contains a global heap, accessible by all processors in the SoC,
+ * with a table of contents data structure (@smem_header) at the beginning of
+ * the main shared memory block.
+ *
+ * The global header contains meta data for allocations as well as a fixed list
+ * of 512 entries (@smem_global_entry) that can be initialized to reference
+ * parts of the shared memory space.
+ *
+ *
+ * In addition to this global heap a set of "private" heaps can be set up at
+ * boot time with access restrictions so that only certain processor pairs can
+ * access the data.
+ *
+ * These partitions are referenced from an optional partition table
+ * (@smem_ptable), that is found 4kB from the end of the main smem region. The
+ * partition table entries (@smem_ptable_entry) lists the involved processors
+ * (or hosts) and their location in the main shared memory region.
+ *
+ * Each partition starts with a header (@smem_partition_header) that identifies
+ * the partition and holds properties for the two internal memory regions. The
+ * two regions are cached and non-cached memory respectively. Each region
+ * contain a link list of allocation headers (@smem_private_entry) followed by
+ * their data.
+ *
+ * Items in the non-cached region are allocated from the start of the partition
+ * while items in the cached region are allocated from the end. The free area
+ * is hence the region between the cached and non-cached offsets.
+ *
+ *
+ * To synchronize allocations in the shared memory heaps a remote spinlock must
+ * be held - currently lock number 3 of the sfpb or tcsr is used for this on all
+ * platforms.
+ *
+ */
+
+/*
+ * Item 3 of the global heap contains an array of versions for the various
+ * software components in the SoC. We verify that the boot loader version is
+ * what the expected version (SMEM_EXPECTED_VERSION) as a sanity check.
+ */
+#define SMEM_ITEM_VERSION	3
+#define  SMEM_MASTER_SBL_VERSION_INDEX	7
+#define  SMEM_EXPECTED_VERSION		11
+
+/*
+ * The first 8 items are only to be allocated by the boot loader while
+ * initializing the heap.
+ */
+#define SMEM_ITEM_LAST_FIXED	8
+
+/* Highest accepted item number, for both global and private heaps */
+#define SMEM_ITEM_COUNT		512
+
+/* Processor/host identifier for the application processor */
+#define SMEM_HOST_APPS		0
+
+/* Max number of processors/hosts in a system */
+#define SMEM_HOST_COUNT		9
+
+/**
+  * struct smem_proc_comm - proc_comm communication struct (legacy)
+  * @command:	current command to be executed
+  * @status:	status of the currently requested command
+  * @params:	parameters to the command
+  */
+struct smem_proc_comm {
+	u32 command;
+	u32 status;
+	u32 params[2];
+};
+
+/**
+ * struct smem_global_entry - entry to reference smem items on the heap
+ * @allocated:	boolean to indicate if this entry is used
+ * @offset:	offset to the allocated space
+ * @size:	size of the allocated space, 8 byte aligned
+ * @aux_base:	base address for the memory region used by this unit, or 0 for
+ *		the default region. bits 0,1 are reserved
+ */
+struct smem_global_entry {
+	u32 allocated;
+	u32 offset;
+	u32 size;
+	u32 aux_base; /* bits 1:0 reserved */
+};
+#define AUX_BASE_MASK		0xfffffffc
+
+/**
+ * struct smem_header - header found in beginning of primary smem region
+ * @proc_comm:		proc_comm communication interface (legacy)
+ * @version:		array of versions for the various subsystems
+ * @initialized:	boolean to indicate that smem is initialized
+ * @free_offset:	index of the first unallocated byte in smem
+ * @available:		number of bytes available for allocation
+ * @reserved:		reserved field, must be 0
+ * toc:			array of references to items
+ */
+struct smem_header {
+	struct smem_proc_comm proc_comm[4];
+	u32 version[32];
+	u32 initialized;
+	u32 free_offset;
+	u32 available;
+	u32 reserved;
+	struct smem_global_entry toc[SMEM_ITEM_COUNT];
+};
+
+/**
+ * struct smem_ptable_entry - one entry in the @smem_ptable list
+ * @offset:	offset, within the main shared memory region, of the partition
+ * @size:	size of the partition
+ * @flags:	flags for the partition (currently unused)
+ * @host0:	first processor/host with access to this partition
+ * @host1:	second processor/host with access to this partition
+ * @reserved:	reserved entries for later use
+ */
+struct smem_ptable_entry {
+	u32 offset;
+	u32 size;
+	u32 flags;
+	u16 host0;
+	u16 host1;
+	u32 reserved[8];
+};
+
+/**
+ * struct smem_ptable - partition table for the private partitions
+ * @magic:	magic number, must be SMEM_PTABLE_MAGIC
+ * @version:	version of the partition table
+ * @num_entries: number of partitions in the table
+ * @reserved:	for now reserved entries
+ * @entry:	list of @smem_ptable_entry for the @num_entries partitions
+ */
+struct smem_ptable {
+	u32 magic;
+	u32 version;
+	u32 num_entries;
+	u32 reserved[5];
+	struct smem_ptable_entry entry[];
+};
+#define SMEM_PTABLE_MAGIC	0x434f5424 /* "$TOC" */
+
+/**
+ * struct smem_partition_header - header of the partitions
+ * @magic:	magic number, must be SMEM_PART_MAGIC
+ * @host0:	first processor/host with access to this partition
+ * @host1:	second processor/host with access to this partition
+ * @size:	size of the partition
+ * @offset_free_uncached: offset to the first free byte of uncached memory in
+ *		this partition
+ * @offset_free_cached: offset to the first free byte of cached memory in this
+ *		partition
+ * @reserved:	for now reserved entries
+ */
+struct smem_partition_header {
+	u32 magic;
+	u16 host0;
+	u16 host1;
+	u32 size;
+	u32 offset_free_uncached;
+	u32 offset_free_cached;
+	u32 reserved[3];
+};
+#define SMEM_PART_MAGIC		0x54525024 /* "$PRT" */
+
+/**
+ * struct smem_private_entry - header of each item in the private partition
+ * @canary:	magic number, must be SMEM_PRIVATE_CANARY
+ * @item:	identifying number of the smem item
+ * @size:	size of the data, including padding bytes
+ * @padding_data: number of bytes of padding of data
+ * @padding_hdr: number of bytes of padding between the header and the data
+ * @reserved:	for now reserved entry
+ */
+struct smem_private_entry {
+	u16 canary;
+	u16 item;
+	u32 size; /* includes padding bytes */
+	u16 padding_data;
+	u16 padding_hdr;
+	u32 reserved;
+};
+#define SMEM_PRIVATE_CANARY	0xa5a5
+
+/**
+ * struct smem_region - representation of a chunk of memory used for smem
+ * @aux_base:	identifier of aux_mem base
+ * @virt_base:	virtual base address of memory with this aux_mem identifier
+ * @size:	size of the memory region
+ */
+struct smem_region {
+	u32 aux_base;
+	void __iomem *virt_base;
+	size_t size;
+};
+
+/**
+ * struct qcom_smem - device data for the smem device
+ * @dev:	device pointer
+ * @hwlock:	reference to a hwspinlock
+ * @partitions:	list of pointers to partitions affecting the current
+ *		processor/host
+ * @num_regions: number of @regions
+ * @regions:	list of the memory regions defining the shared memory
+ */
+struct qcom_smem {
+	struct device *dev;
+
+	struct hwspinlock *hwlock;
+
+	struct smem_partition_header *partitions[SMEM_HOST_COUNT];
+
+	unsigned num_regions;
+	struct smem_region regions[0];
+};
+
+/* Pointer to the one and only smem handle */
+static struct qcom_smem *__smem;
+
+/* Timeout (ms) for the trylock of remote spinlocks */
+#define HWSPINLOCK_TIMEOUT	1000
+
+static int qcom_smem_alloc_private(struct qcom_smem *smem,
+				   unsigned host,
+				   unsigned item,
+				   size_t size)
+{
+	struct smem_partition_header *phdr;
+	struct smem_private_entry *hdr;
+	size_t alloc_size;
+	void *p;
+
+	/* We're not going to find it if there's no matching partition */
+	if (host >= SMEM_HOST_COUNT || !smem->partitions[host])
+		return -ENOENT;
+
+	phdr = smem->partitions[host];
+
+	p = (void *)phdr + sizeof(*phdr);
+	while (p < (void *)phdr + phdr->offset_free_uncached) {
+		hdr = p;
+
+		if (hdr->canary != SMEM_PRIVATE_CANARY) {
+			dev_err(smem->dev,
+				"Found invalid canary in host %d partition\n",
+				host);
+			return -EINVAL;
+		}
+
+		if (hdr->item == item)
+			return -EEXIST;
+
+		p += sizeof(*hdr) + hdr->padding_hdr + hdr->size;
+	}
+
+	/* Check that we don't grow into the cached region */
+	alloc_size = sizeof(*hdr) + ALIGN(size, 8);
+	if (p + alloc_size >= (void *)phdr + phdr->offset_free_cached) {
+		dev_err(smem->dev, "Out of memory\n");
+		return -ENOSPC;
+	}
+
+	hdr = p;
+	hdr->canary = SMEM_PRIVATE_CANARY;
+	hdr->item = item;
+	hdr->size = ALIGN(size, 8);
+	hdr->padding_data = hdr->size - size;
+	hdr->padding_hdr = 0;
+
+	/*
+	 * Ensure the header is written before we advance the free offset, so
+	 * that remote processors that does not take the remote spinlock still
+	 * gets a consistent view of the linked list.
+	 */
+	wmb();
+	phdr->offset_free_uncached += alloc_size;
+
+	return 0;
+}
+
+static int qcom_smem_alloc_global(struct qcom_smem *smem,
+				  unsigned item,
+				  size_t size)
+{
+	struct smem_header *header;
+	struct smem_global_entry *entry;
+
+	if (WARN_ON(item >= SMEM_ITEM_COUNT))
+		return -EINVAL;
+
+	header = smem->regions[0].virt_base;
+	entry = &header->toc[item];
+	if (entry->allocated)
+		return -EEXIST;
+
+	size = ALIGN(size, 8);
+	if (WARN_ON(size > header->available))
+		return -ENOMEM;
+
+	entry->offset = header->free_offset;
+	entry->size = size;
+
+	/*
+	 * Ensure the header is consistent before we mark the item allocated,
+	 * so that remote processors will get a consistent view of the item
+	 * even though they do not take the spinlock on read.
+	 */
+	wmb();
+	entry->allocated = 1;
+
+	header->free_offset += size;
+	header->available -= size;
+
+	return 0;
+}
+
+/**
+ * qcom_smem_alloc() - allocate space for a smem item
+ * @host:	remote processor id, or -1
+ * @item:	smem item handle
+ * @size:	number of bytes to be allocated
+ *
+ * Allocate space for a given smem item of size @size, given that the item is
+ * not yet allocated.
+ */
+int qcom_smem_alloc(unsigned host, unsigned item, size_t size)
+{
+	unsigned long flags;
+	int ret;
+
+	if (!__smem)
+		return -EPROBE_DEFER;
+
+	if (item < SMEM_ITEM_LAST_FIXED) {
+		dev_err(__smem->dev,
+			"Rejecting allocation of static entry %d\n", item);
+		return -EINVAL;
+	}
+
+	ret = hwspin_lock_timeout_irqsave(__smem->hwlock,
+					  HWSPINLOCK_TIMEOUT,
+					  &flags);
+	if (ret)
+		return ret;
+
+	ret = qcom_smem_alloc_private(__smem, host, item, size);
+	if (ret == -ENOENT)
+		ret = qcom_smem_alloc_global(__smem, item, size);
+
+	hwspin_unlock_irqrestore(__smem->hwlock, &flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(qcom_smem_alloc);
+
+static int qcom_smem_get_global(struct qcom_smem *smem,
+				unsigned item,
+				void **ptr,
+				size_t *size)
+{
+	struct smem_header *header;
+	struct smem_region *area;
+	struct smem_global_entry *entry;
+	u32 aux_base;
+	unsigned i;
+
+	if (WARN_ON(item >= SMEM_ITEM_COUNT))
+		return -EINVAL;
+
+	header = smem->regions[0].virt_base;
+	entry = &header->toc[item];
+	if (!entry->allocated)
+		return -ENXIO;
+
+	if (ptr != NULL) {
+		aux_base = entry->aux_base & AUX_BASE_MASK;
+
+		for (i = 0; i < smem->num_regions; i++) {
+			area = &smem->regions[i];
+
+			if (area->aux_base == aux_base || !aux_base) {
+				*ptr = area->virt_base + entry->offset;
+				break;
+			}
+		}
+	}
+	if (size != NULL)
+		*size = entry->size;
+
+	return 0;
+}
+
+static int qcom_smem_get_private(struct qcom_smem *smem,
+				 unsigned host,
+				 unsigned item,
+				 void **ptr,
+				 size_t *size)
+{
+	struct smem_partition_header *phdr;
+	struct smem_private_entry *hdr;
+	void *p;
+
+	/* We're not going to find it if there's no matching partition */
+	if (host >= SMEM_HOST_COUNT || !smem->partitions[host])
+		return -ENOENT;
+
+	phdr = smem->partitions[host];
+
+	p = (void *)phdr + sizeof(*phdr);
+	while (p < (void *)phdr + phdr->offset_free_uncached) {
+		hdr = p;
+
+		if (hdr->canary != SMEM_PRIVATE_CANARY) {
+			dev_err(smem->dev,
+				"Found invalid canary in host %d partition\n",
+				host);
+			return -EINVAL;
+		}
+
+		if (hdr->item == item) {
+			if (ptr != NULL)
+				*ptr = p + sizeof(*hdr) + hdr->padding_hdr;
+
+			if (size != NULL)
+				*size = hdr->size - hdr->padding_data;
+
+			return 0;
+		}
+
+		p += sizeof(*hdr) + hdr->padding_hdr + hdr->size;
+	}
+
+	return -ENOENT;
+}
+
+/**
+ * qcom_smem_get() - resolve ptr of size of a smem item
+ * @host:	the remote processor, or -1
+ * @item:	smem item handle
+ * @ptr:	pointer to be filled out with address of the item
+ * @size:	pointer to be filled out with size of the item
+ *
+ * Looks up pointer and size of a smem item.
+ */
+int qcom_smem_get(unsigned host, unsigned item, void **ptr, size_t *size)
+{
+	unsigned long flags;
+	int ret;
+
+	if (!__smem)
+		return -EPROBE_DEFER;
+
+	ret = hwspin_lock_timeout_irqsave(__smem->hwlock,
+					  HWSPINLOCK_TIMEOUT,
+					  &flags);
+	if (ret)
+		return ret;
+
+	ret = qcom_smem_get_private(__smem, host, item, ptr, size);
+	if (ret == -ENOENT)
+		ret = qcom_smem_get_global(__smem, item, ptr, size);
+
+	hwspin_unlock_irqrestore(__smem->hwlock, &flags);
+	return ret;
+
+}
+EXPORT_SYMBOL(qcom_smem_get);
+
+/**
+ * qcom_smem_get_free_space() - retrieve amount of free space in a partition
+ * @host:	the remote processor identifying a partition, or -1
+ *
+ * To be used by smem clients as a quick way to determine if any new
+ * allocations has been made.
+ */
+int qcom_smem_get_free_space(unsigned host)
+{
+	struct smem_partition_header *phdr;
+	struct smem_header *header;
+	unsigned ret;
+
+	if (!__smem)
+		return -EPROBE_DEFER;
+
+	if (host < SMEM_HOST_COUNT && __smem->partitions[host]) {
+		phdr = __smem->partitions[host];
+		ret = phdr->offset_free_cached - phdr->offset_free_uncached;
+	} else {
+		header = __smem->regions[0].virt_base;
+		ret = header->available;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(qcom_smem_get_free_space);
+
+static int qcom_smem_get_sbl_version(struct qcom_smem *smem)
+{
+	unsigned *versions;
+	size_t size;
+	int ret;
+
+	ret = qcom_smem_get_global(smem, SMEM_ITEM_VERSION,
+				   (void **)&versions, &size);
+	if (ret < 0) {
+		dev_err(smem->dev, "Unable to read the version item\n");
+		return -ENOENT;
+	}
+
+	if (size < sizeof(unsigned) * SMEM_MASTER_SBL_VERSION_INDEX) {
+		dev_err(smem->dev, "Version item is too small\n");
+		return -EINVAL;
+	}
+
+	return versions[SMEM_MASTER_SBL_VERSION_INDEX];
+}
+
+static int qcom_smem_enumerate_partitions(struct qcom_smem *smem,
+					  unsigned local_host)
+{
+	struct smem_partition_header *header;
+	struct smem_ptable_entry *entry;
+	struct smem_ptable *ptable;
+	unsigned remote_host;
+	int i;
+
+	ptable = smem->regions[0].virt_base + smem->regions[0].size - SZ_4K;
+	if (ptable->magic != SMEM_PTABLE_MAGIC)
+		return 0;
+
+	if (ptable->version != 1) {
+		dev_err(smem->dev,
+			"Unsupported partition header version %d\n",
+			ptable->version);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < ptable->num_entries; i++) {
+		entry = &ptable->entry[i];
+
+		if (entry->host0 != local_host && entry->host1 != local_host)
+			continue;
+
+		if (!entry->offset)
+			continue;
+
+		if (!entry->size)
+			continue;
+
+		if (entry->host0 == local_host)
+			remote_host = entry->host1;
+		else
+			remote_host = entry->host0;
+
+		if (remote_host >= SMEM_HOST_COUNT) {
+			dev_err(smem->dev,
+				"Invalid remote host %d\n",
+				remote_host);
+			return -EINVAL;
+		}
+
+		if (smem->partitions[remote_host]) {
+			dev_err(smem->dev,
+				"Already found a partition for host %d\n",
+				remote_host);
+			return -EINVAL;
+		}
+
+		header = smem->regions[0].virt_base + entry->offset;
+
+		if (header->magic != SMEM_PART_MAGIC) {
+			dev_err(smem->dev,
+				"Partition %d has invalid magic\n", i);
+			return -EINVAL;
+		}
+
+		if (header->host0 != local_host && header->host1 != local_host) {
+			dev_err(smem->dev,
+				"Partition %d hosts are invalid\n", i);
+			return -EINVAL;
+		}
+
+		if (header->host0 != remote_host && header->host1 != remote_host) {
+			dev_err(smem->dev,
+				"Partition %d hosts are invalid\n", i);
+			return -EINVAL;
+		}
+
+		if (header->size != entry->size) {
+			dev_err(smem->dev,
+				"Partition %d has invalid size\n", i);
+			return -EINVAL;
+		}
+
+		if (header->offset_free_uncached > header->size) {
+			dev_err(smem->dev,
+				"Partition %d has invalid free pointer\n", i);
+			return -EINVAL;
+		}
+
+		smem->partitions[remote_host] = header;
+	}
+
+	return 0;
+}
+
+static int qcom_smem_count_mem_regions(struct platform_device *pdev)
+{
+	struct resource *res;
+	int num_regions = 0;
+	int i;
+
+	for (i = 0; i < pdev->num_resources; i++) {
+		res = &pdev->resource[i];
+
+		if (resource_type(res) == IORESOURCE_MEM)
+			num_regions++;
+	}
+
+	return num_regions;
+}
+
+static int qcom_smem_probe(struct platform_device *pdev)
+{
+	struct smem_header *header;
+	struct device_node *np;
+	struct qcom_smem *smem;
+	struct resource *res;
+	struct resource r;
+	size_t array_size;
+	int num_regions = 0;
+	int hwlock_id;
+	u32 version;
+	int ret;
+	int i;
+
+	num_regions = qcom_smem_count_mem_regions(pdev) + 1;
+
+	array_size = num_regions * sizeof(struct smem_region);
+	smem = devm_kzalloc(&pdev->dev, sizeof(*smem) + array_size, GFP_KERNEL);
+	if (!smem)
+		return -ENOMEM;
+
+	smem->dev = &pdev->dev;
+	smem->num_regions = num_regions;
+
+	np = of_parse_phandle(pdev->dev.of_node, "memory-region", 0);
+	if (!np) {
+		dev_err(&pdev->dev, "No memory-region specified\n");
+		return -EINVAL;
+	}
+
+	ret = of_address_to_resource(np, 0, &r);
+	of_node_put(np);
+	if (ret)
+		return ret;
+
+	smem->regions[0].aux_base = (u32)r.start;
+	smem->regions[0].size = resource_size(&r);
+	smem->regions[0].virt_base = devm_ioremap_nocache(&pdev->dev,
+							  r.start,
+							  resource_size(&r));
+	if (!smem->regions[0].virt_base)
+		return -ENOMEM;
+
+	for (i = 1; i < num_regions; i++) {
+		res = platform_get_resource(pdev, IORESOURCE_MEM, i - 1);
+
+		smem->regions[i].aux_base = (u32)res->start;
+		smem->regions[i].size = resource_size(res);
+		smem->regions[i].virt_base = devm_ioremap_nocache(&pdev->dev,
+								  res->start,
+								  resource_size(res));
+		if (!smem->regions[i].virt_base)
+			return -ENOMEM;
+	}
+
+	header = smem->regions[0].virt_base;
+	if (header->initialized != 1 || header->reserved) {
+		dev_err(&pdev->dev, "SMEM is not initialized by SBL\n");
+		return -EINVAL;
+	}
+
+	version = qcom_smem_get_sbl_version(smem);
+	if (version >> 16 != SMEM_EXPECTED_VERSION) {
+		dev_err(&pdev->dev, "Unsupported SMEM version 0x%x\n", version);
+		return -EINVAL;
+	}
+
+	ret = qcom_smem_enumerate_partitions(smem, SMEM_HOST_APPS);
+	if (ret < 0)
+		return ret;
+
+	hwlock_id = of_hwspin_lock_get_id(pdev->dev.of_node, 0);
+	if (hwlock_id < 0) {
+		dev_err(&pdev->dev, "failed to retrieve hwlock\n");
+		return hwlock_id;
+	}
+
+	smem->hwlock = hwspin_lock_request_specific(hwlock_id);
+	if (!smem->hwlock)
+		return -ENXIO;
+
+	__smem = smem;
+
+	return 0;
+}
+
+static int qcom_smem_remove(struct platform_device *pdev)
+{
+	__smem = NULL;
+	hwspin_lock_free(__smem->hwlock);
+
+	return 0;
+}
+
+static const struct of_device_id qcom_smem_of_match[] = {
+	{ .compatible = "qcom,smem" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, qcom_smem_of_match);
+
+static struct platform_driver qcom_smem_driver = {
+	.probe = qcom_smem_probe,
+	.remove = qcom_smem_remove,
+	.driver  = {
+		.name = "qcom-smem",
+		.of_match_table = qcom_smem_of_match,
+		.suppress_bind_attrs = true,
+	},
+};
+
+static int __init qcom_smem_init(void)
+{
+	return platform_driver_register(&qcom_smem_driver);
+}
+arch_initcall(qcom_smem_init);
+
+static void __exit qcom_smem_exit(void)
+{
+	platform_driver_unregister(&qcom_smem_driver);
+}
+module_exit(qcom_smem_exit)
+
+MODULE_AUTHOR("Bjorn Andersson <bjorn.andersson@sonymobile.com>");
+MODULE_DESCRIPTION("Qualcomm Shared Memory Manager");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/soc/tegra/Makefile b/drivers/soc/tegra/Makefile
index cdaad9d..ae857ff 100644
--- a/drivers/soc/tegra/Makefile
+++ b/drivers/soc/tegra/Makefile
@@ -1,4 +1,4 @@
-obj-$(CONFIG_ARCH_TEGRA) += fuse/
+obj-y += fuse/
 
-obj-$(CONFIG_ARCH_TEGRA) += common.o
-obj-$(CONFIG_ARCH_TEGRA) += pmc.o
+obj-y += common.o
+obj-y += pmc.o
diff --git a/drivers/soc/tegra/common.c b/drivers/soc/tegra/common.c
index a71cb74..cd8f413 100644
--- a/drivers/soc/tegra/common.c
+++ b/drivers/soc/tegra/common.c
@@ -15,6 +15,8 @@
 	{ .compatible = "nvidia,tegra30", },
 	{ .compatible = "nvidia,tegra114", },
 	{ .compatible = "nvidia,tegra124", },
+	{ .compatible = "nvidia,tegra132", },
+	{ .compatible = "nvidia,tegra210", },
 	{ }
 };
 
diff --git a/drivers/soc/tegra/fuse/Makefile b/drivers/soc/tegra/fuse/Makefile
index 3af357d..21bc275 100644
--- a/drivers/soc/tegra/fuse/Makefile
+++ b/drivers/soc/tegra/fuse/Makefile
@@ -6,3 +6,5 @@
 obj-$(CONFIG_ARCH_TEGRA_3x_SOC)		+= speedo-tegra30.o
 obj-$(CONFIG_ARCH_TEGRA_114_SOC)	+= speedo-tegra114.o
 obj-$(CONFIG_ARCH_TEGRA_124_SOC)	+= speedo-tegra124.o
+obj-$(CONFIG_ARCH_TEGRA_132_SOC)	+= speedo-tegra124.o
+obj-$(CONFIG_ARCH_TEGRA_210_SOC)	+= speedo-tegra210.o
diff --git a/drivers/soc/tegra/fuse/fuse-tegra.c b/drivers/soc/tegra/fuse/fuse-tegra.c
index c0d660f..de2c1bf 100644
--- a/drivers/soc/tegra/fuse/fuse-tegra.c
+++ b/drivers/soc/tegra/fuse/fuse-tegra.c
@@ -15,9 +15,10 @@
  *
  */
 
+#include <linux/clk.h>
 #include <linux/device.h>
 #include <linux/kobject.h>
-#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
@@ -28,8 +29,6 @@
 
 #include "fuse.h"
 
-static u32 (*fuse_readl)(const unsigned int offset);
-static int fuse_size;
 struct tegra_sku_info tegra_sku_info;
 EXPORT_SYMBOL(tegra_sku_info);
 
@@ -42,11 +41,11 @@
 	[TEGRA_REVISION_A04]     = "A04",
 };
 
-static u8 fuse_readb(const unsigned int offset)
+static u8 fuse_readb(struct tegra_fuse *fuse, unsigned int offset)
 {
 	u32 val;
 
-	val = fuse_readl(round_down(offset, 4));
+	val = fuse->read(fuse, round_down(offset, 4));
 	val >>= (offset % 4) * 8;
 	val &= 0xff;
 
@@ -54,19 +53,21 @@
 }
 
 static ssize_t fuse_read(struct file *fd, struct kobject *kobj,
-			struct bin_attribute *attr, char *buf,
-			loff_t pos, size_t size)
+			 struct bin_attribute *attr, char *buf,
+			 loff_t pos, size_t size)
 {
+	struct device *dev = kobj_to_dev(kobj);
+	struct tegra_fuse *fuse = dev_get_drvdata(dev);
 	int i;
 
-	if (pos < 0 || pos >= fuse_size)
+	if (pos < 0 || pos >= attr->size)
 		return 0;
 
-	if (size > fuse_size - pos)
-		size = fuse_size - pos;
+	if (size > attr->size - pos)
+		size = attr->size - pos;
 
 	for (i = 0; i < size; i++)
-		buf[i] = fuse_readb(pos + i);
+		buf[i] = fuse_readb(fuse, pos + i);
 
 	return i;
 }
@@ -76,15 +77,122 @@
 	.read = fuse_read,
 };
 
+static int tegra_fuse_create_sysfs(struct device *dev, unsigned int size,
+				   const struct tegra_fuse_info *info)
+{
+	fuse_bin_attr.size = size;
+
+	return device_create_bin_file(dev, &fuse_bin_attr);
+}
+
 static const struct of_device_id car_match[] __initconst = {
 	{ .compatible = "nvidia,tegra20-car", },
 	{ .compatible = "nvidia,tegra30-car", },
 	{ .compatible = "nvidia,tegra114-car", },
 	{ .compatible = "nvidia,tegra124-car", },
 	{ .compatible = "nvidia,tegra132-car", },
+	{ .compatible = "nvidia,tegra210-car", },
 	{},
 };
 
+static struct tegra_fuse *fuse = &(struct tegra_fuse) {
+	.base = NULL,
+	.soc = NULL,
+};
+
+static const struct of_device_id tegra_fuse_match[] = {
+#ifdef CONFIG_ARCH_TEGRA_210_SOC
+	{ .compatible = "nvidia,tegra210-efuse", .data = &tegra210_fuse_soc },
+#endif
+#ifdef CONFIG_ARCH_TEGRA_132_SOC
+	{ .compatible = "nvidia,tegra132-efuse", .data = &tegra124_fuse_soc },
+#endif
+#ifdef CONFIG_ARCH_TEGRA_124_SOC
+	{ .compatible = "nvidia,tegra124-efuse", .data = &tegra124_fuse_soc },
+#endif
+#ifdef CONFIG_ARCH_TEGRA_114_SOC
+	{ .compatible = "nvidia,tegra114-efuse", .data = &tegra114_fuse_soc },
+#endif
+#ifdef CONFIG_ARCH_TEGRA_3x_SOC
+	{ .compatible = "nvidia,tegra30-efuse", .data = &tegra30_fuse_soc },
+#endif
+#ifdef CONFIG_ARCH_TEGRA_2x_SOC
+	{ .compatible = "nvidia,tegra20-efuse", .data = &tegra20_fuse_soc },
+#endif
+	{ /* sentinel */ }
+};
+
+static int tegra_fuse_probe(struct platform_device *pdev)
+{
+	void __iomem *base = fuse->base;
+	struct resource *res;
+	int err;
+
+	/* take over the memory region from the early initialization */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	fuse->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(fuse->base))
+		return PTR_ERR(fuse->base);
+
+	fuse->clk = devm_clk_get(&pdev->dev, "fuse");
+	if (IS_ERR(fuse->clk)) {
+		dev_err(&pdev->dev, "failed to get FUSE clock: %ld",
+			PTR_ERR(fuse->clk));
+		return PTR_ERR(fuse->clk);
+	}
+
+	platform_set_drvdata(pdev, fuse);
+	fuse->dev = &pdev->dev;
+
+	if (fuse->soc->probe) {
+		err = fuse->soc->probe(fuse);
+		if (err < 0)
+			return err;
+	}
+
+	if (tegra_fuse_create_sysfs(&pdev->dev, fuse->soc->info->size,
+				    fuse->soc->info))
+		return -ENODEV;
+
+	/* release the early I/O memory mapping */
+	iounmap(base);
+
+	return 0;
+}
+
+static struct platform_driver tegra_fuse_driver = {
+	.driver = {
+		.name = "tegra-fuse",
+		.of_match_table = tegra_fuse_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = tegra_fuse_probe,
+};
+module_platform_driver(tegra_fuse_driver);
+
+bool __init tegra_fuse_read_spare(unsigned int spare)
+{
+	unsigned int offset = fuse->soc->info->spare + spare * 4;
+
+	return fuse->read_early(fuse, offset) & 1;
+}
+
+u32 __init tegra_fuse_read_early(unsigned int offset)
+{
+	return fuse->read_early(fuse, offset);
+}
+
+int tegra_fuse_readl(unsigned long offset, u32 *value)
+{
+	if (!fuse->read)
+		return -EPROBE_DEFER;
+
+	*value = fuse->read(fuse, offset);
+
+	return 0;
+}
+EXPORT_SYMBOL(tegra_fuse_readl);
+
 static void tegra_enable_fuse_clk(void __iomem *base)
 {
 	u32 reg;
@@ -102,63 +210,106 @@
 	writel(reg, base + 0x14);
 }
 
-int tegra_fuse_readl(unsigned long offset, u32 *value)
-{
-	if (!fuse_readl)
-		return -EPROBE_DEFER;
-
-	*value = fuse_readl(offset);
-
-	return 0;
-}
-EXPORT_SYMBOL(tegra_fuse_readl);
-
-int tegra_fuse_create_sysfs(struct device *dev, int size,
-		     u32 (*readl)(const unsigned int offset))
-{
-	if (fuse_size)
-		return -ENODEV;
-
-	fuse_bin_attr.size = size;
-	fuse_bin_attr.read = fuse_read;
-
-	fuse_size = size;
-	fuse_readl = readl;
-
-	return device_create_bin_file(dev, &fuse_bin_attr);
-}
-
 static int __init tegra_init_fuse(void)
 {
+	const struct of_device_id *match;
 	struct device_node *np;
-	void __iomem *car_base;
-
-	if (!soc_is_tegra())
-		return 0;
+	struct resource regs;
 
 	tegra_init_apbmisc();
 
-	np = of_find_matching_node(NULL, car_match);
-	car_base = of_iomap(np, 0);
-	if (car_base) {
-		tegra_enable_fuse_clk(car_base);
-		iounmap(car_base);
+	np = of_find_matching_node_and_match(NULL, tegra_fuse_match, &match);
+	if (!np) {
+		/*
+		 * Fall back to legacy initialization for 32-bit ARM only. All
+		 * 64-bit ARM device tree files for Tegra are required to have
+		 * a FUSE node.
+		 *
+		 * This is for backwards-compatibility with old device trees
+		 * that didn't contain a FUSE node.
+		 */
+		if (IS_ENABLED(CONFIG_ARM) && soc_is_tegra()) {
+			u8 chip = tegra_get_chip_id();
+
+			regs.start = 0x7000f800;
+			regs.end = 0x7000fbff;
+			regs.flags = IORESOURCE_MEM;
+
+			switch (chip) {
+#ifdef CONFIG_ARCH_TEGRA_2x_SOC
+			case TEGRA20:
+				fuse->soc = &tegra20_fuse_soc;
+				break;
+#endif
+
+#ifdef CONFIG_ARCH_TEGRA_3x_SOC
+			case TEGRA30:
+				fuse->soc = &tegra30_fuse_soc;
+				break;
+#endif
+
+#ifdef CONFIG_ARCH_TEGRA_114_SOC
+			case TEGRA114:
+				fuse->soc = &tegra114_fuse_soc;
+				break;
+#endif
+
+#ifdef CONFIG_ARCH_TEGRA_124_SOC
+			case TEGRA124:
+				fuse->soc = &tegra124_fuse_soc;
+				break;
+#endif
+
+			default:
+				pr_warn("Unsupported SoC: %02x\n", chip);
+				break;
+			}
+		} else {
+			/*
+			 * At this point we're not running on Tegra, so play
+			 * nice with multi-platform kernels.
+			 */
+			return 0;
+		}
 	} else {
-		pr_err("Could not enable fuse clk. ioremap tegra car failed.\n");
+		/*
+		 * Extract information from the device tree if we've found a
+		 * matching node.
+		 */
+		if (of_address_to_resource(np, 0, &regs) < 0) {
+			pr_err("failed to get FUSE register\n");
+			return -ENXIO;
+		}
+
+		fuse->soc = match->data;
+	}
+
+	np = of_find_matching_node(NULL, car_match);
+	if (np) {
+		void __iomem *base = of_iomap(np, 0);
+		if (base) {
+			tegra_enable_fuse_clk(base);
+			iounmap(base);
+		} else {
+			pr_err("failed to map clock registers\n");
+			return -ENXIO;
+		}
+	}
+
+	fuse->base = ioremap_nocache(regs.start, resource_size(&regs));
+	if (!fuse->base) {
+		pr_err("failed to map FUSE registers\n");
 		return -ENXIO;
 	}
 
-	if (tegra_get_chip_id() == TEGRA20)
-		tegra20_init_fuse_early();
-	else
-		tegra30_init_fuse_early();
+	fuse->soc->init(fuse);
 
-	pr_info("Tegra Revision: %s SKU: %d CPU Process: %d Core Process: %d\n",
+	pr_info("Tegra Revision: %s SKU: %d CPU Process: %d SoC Process: %d\n",
 		tegra_revision_name[tegra_sku_info.revision],
 		tegra_sku_info.sku_id, tegra_sku_info.cpu_process_id,
-		tegra_sku_info.core_process_id);
-	pr_debug("Tegra CPU Speedo ID %d, Soc Speedo ID %d\n",
-		tegra_sku_info.cpu_speedo_id, tegra_sku_info.soc_speedo_id);
+		tegra_sku_info.soc_process_id);
+	pr_debug("Tegra CPU Speedo ID %d, SoC Speedo ID %d\n",
+		 tegra_sku_info.cpu_speedo_id, tegra_sku_info.soc_speedo_id);
 
 	return 0;
 }
diff --git a/drivers/soc/tegra/fuse/fuse-tegra20.c b/drivers/soc/tegra/fuse/fuse-tegra20.c
index 6acc2c4..294413a 100644
--- a/drivers/soc/tegra/fuse/fuse-tegra20.c
+++ b/drivers/soc/tegra/fuse/fuse-tegra20.c
@@ -34,159 +34,107 @@
 #include "fuse.h"
 
 #define FUSE_BEGIN	0x100
-#define FUSE_SIZE	0x1f8
 #define FUSE_UID_LOW	0x08
 #define FUSE_UID_HIGH	0x0c
 
-static phys_addr_t fuse_phys;
-static struct clk *fuse_clk;
-static void __iomem __initdata *fuse_base;
-
-static DEFINE_MUTEX(apb_dma_lock);
-static DECLARE_COMPLETION(apb_dma_wait);
-static struct dma_chan *apb_dma_chan;
-static struct dma_slave_config dma_sconfig;
-static u32 *apb_buffer;
-static dma_addr_t apb_buffer_phys;
+static u32 tegra20_fuse_read_early(struct tegra_fuse *fuse, unsigned int offset)
+{
+	return readl_relaxed(fuse->base + FUSE_BEGIN + offset);
+}
 
 static void apb_dma_complete(void *args)
 {
-	complete(&apb_dma_wait);
+	struct tegra_fuse *fuse = args;
+
+	complete(&fuse->apbdma.wait);
 }
 
-static u32 tegra20_fuse_readl(const unsigned int offset)
+static u32 tegra20_fuse_read(struct tegra_fuse *fuse, unsigned int offset)
 {
-	int ret;
-	u32 val = 0;
+	unsigned long flags = DMA_PREP_INTERRUPT | DMA_CTRL_ACK;
 	struct dma_async_tx_descriptor *dma_desc;
 	unsigned long time_left;
+	u32 value = 0;
+	int err;
 
-	mutex_lock(&apb_dma_lock);
+	mutex_lock(&fuse->apbdma.lock);
 
-	dma_sconfig.src_addr = fuse_phys + FUSE_BEGIN + offset;
-	ret = dmaengine_slave_config(apb_dma_chan, &dma_sconfig);
-	if (ret)
+	fuse->apbdma.config.src_addr = fuse->apbdma.phys + FUSE_BEGIN + offset;
+
+	err = dmaengine_slave_config(fuse->apbdma.chan, &fuse->apbdma.config);
+	if (err)
 		goto out;
 
-	dma_desc = dmaengine_prep_slave_single(apb_dma_chan, apb_buffer_phys,
-			sizeof(u32), DMA_DEV_TO_MEM,
-			DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	dma_desc = dmaengine_prep_slave_single(fuse->apbdma.chan,
+					       fuse->apbdma.phys,
+					       sizeof(u32), DMA_DEV_TO_MEM,
+					       flags);
 	if (!dma_desc)
 		goto out;
 
 	dma_desc->callback = apb_dma_complete;
-	dma_desc->callback_param = NULL;
+	dma_desc->callback_param = fuse;
 
-	reinit_completion(&apb_dma_wait);
+	reinit_completion(&fuse->apbdma.wait);
 
-	clk_prepare_enable(fuse_clk);
+	clk_prepare_enable(fuse->clk);
 
 	dmaengine_submit(dma_desc);
-	dma_async_issue_pending(apb_dma_chan);
-	time_left = wait_for_completion_timeout(&apb_dma_wait,
+	dma_async_issue_pending(fuse->apbdma.chan);
+	time_left = wait_for_completion_timeout(&fuse->apbdma.wait,
 						msecs_to_jiffies(50));
 
 	if (WARN(time_left == 0, "apb read dma timed out"))
-		dmaengine_terminate_all(apb_dma_chan);
+		dmaengine_terminate_all(fuse->apbdma.chan);
 	else
-		val = *apb_buffer;
+		value = *fuse->apbdma.virt;
 
-	clk_disable_unprepare(fuse_clk);
+	clk_disable_unprepare(fuse->clk);
+
 out:
-	mutex_unlock(&apb_dma_lock);
-
-	return val;
+	mutex_unlock(&fuse->apbdma.lock);
+	return value;
 }
 
-static const struct of_device_id tegra20_fuse_of_match[] = {
-	{ .compatible = "nvidia,tegra20-efuse" },
-	{},
-};
-
-static int apb_dma_init(void)
+static int tegra20_fuse_probe(struct tegra_fuse *fuse)
 {
 	dma_cap_mask_t mask;
 
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);
-	apb_dma_chan = dma_request_channel(mask, NULL, NULL);
-	if (!apb_dma_chan)
+
+	fuse->apbdma.chan = dma_request_channel(mask, NULL, NULL);
+	if (!fuse->apbdma.chan)
 		return -EPROBE_DEFER;
 
-	apb_buffer = dma_alloc_coherent(NULL, sizeof(u32), &apb_buffer_phys,
-					GFP_KERNEL);
-	if (!apb_buffer) {
-		dma_release_channel(apb_dma_chan);
+	fuse->apbdma.virt = dma_alloc_coherent(fuse->dev, sizeof(u32),
+					       &fuse->apbdma.phys,
+					       GFP_KERNEL);
+	if (!fuse->apbdma.virt) {
+		dma_release_channel(fuse->apbdma.chan);
 		return -ENOMEM;
 	}
 
-	dma_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-	dma_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-	dma_sconfig.src_maxburst = 1;
-	dma_sconfig.dst_maxburst = 1;
+	fuse->apbdma.config.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	fuse->apbdma.config.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	fuse->apbdma.config.src_maxburst = 1;
+	fuse->apbdma.config.dst_maxburst = 1;
+
+	init_completion(&fuse->apbdma.wait);
+	mutex_init(&fuse->apbdma.lock);
+	fuse->read = tegra20_fuse_read;
 
 	return 0;
 }
 
-static int tegra20_fuse_probe(struct platform_device *pdev)
-{
-	struct resource *res;
-	int err;
-
-	fuse_clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(fuse_clk)) {
-		dev_err(&pdev->dev, "missing clock");
-		return PTR_ERR(fuse_clk);
-	}
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -EINVAL;
-	fuse_phys = res->start;
-
-	err = apb_dma_init();
-	if (err)
-		return err;
-
-	if (tegra_fuse_create_sysfs(&pdev->dev, FUSE_SIZE, tegra20_fuse_readl))
-		return -ENODEV;
-
-	dev_dbg(&pdev->dev, "loaded\n");
-
-	return 0;
-}
-
-static struct platform_driver tegra20_fuse_driver = {
-	.probe = tegra20_fuse_probe,
-	.driver = {
-		.name = "tegra20_fuse",
-		.of_match_table = tegra20_fuse_of_match,
-	}
+static const struct tegra_fuse_info tegra20_fuse_info = {
+	.read = tegra20_fuse_read,
+	.size = 0x1f8,
+	.spare = 0x100,
 };
 
-static int __init tegra20_fuse_init(void)
-{
-	return platform_driver_register(&tegra20_fuse_driver);
-}
-postcore_initcall(tegra20_fuse_init);
-
 /* Early boot code. This code is called before the devices are created */
 
-u32 __init tegra20_fuse_early(const unsigned int offset)
-{
-	return readl_relaxed(fuse_base + FUSE_BEGIN + offset);
-}
-
-bool __init tegra20_spare_fuse_early(int spare_bit)
-{
-	u32 offset = spare_bit * 4;
-	bool value;
-
-	value = tegra20_fuse_early(offset + 0x100);
-
-	return value;
-}
-
 static void __init tegra20_fuse_add_randomness(void)
 {
 	u32 randomness[7];
@@ -195,22 +143,27 @@
 	randomness[1] = tegra_read_straps();
 	randomness[2] = tegra_read_chipid();
 	randomness[3] = tegra_sku_info.cpu_process_id << 16;
-	randomness[3] |= tegra_sku_info.core_process_id;
+	randomness[3] |= tegra_sku_info.soc_process_id;
 	randomness[4] = tegra_sku_info.cpu_speedo_id << 16;
 	randomness[4] |= tegra_sku_info.soc_speedo_id;
-	randomness[5] = tegra20_fuse_early(FUSE_UID_LOW);
-	randomness[6] = tegra20_fuse_early(FUSE_UID_HIGH);
+	randomness[5] = tegra_fuse_read_early(FUSE_UID_LOW);
+	randomness[6] = tegra_fuse_read_early(FUSE_UID_HIGH);
 
 	add_device_randomness(randomness, sizeof(randomness));
 }
 
-void __init tegra20_init_fuse_early(void)
+static void __init tegra20_fuse_init(struct tegra_fuse *fuse)
 {
-	fuse_base = ioremap(TEGRA_FUSE_BASE, TEGRA_FUSE_SIZE);
+	fuse->read_early = tegra20_fuse_read_early;
 
 	tegra_init_revision();
-	tegra20_init_speedo_data(&tegra_sku_info);
+	fuse->soc->speedo_init(&tegra_sku_info);
 	tegra20_fuse_add_randomness();
-
-	iounmap(fuse_base);
 }
+
+const struct tegra_fuse_soc tegra20_fuse_soc = {
+	.init = tegra20_fuse_init,
+	.speedo_init = tegra20_init_speedo_data,
+	.probe = tegra20_fuse_probe,
+	.info = &tegra20_fuse_info,
+};
diff --git a/drivers/soc/tegra/fuse/fuse-tegra30.c b/drivers/soc/tegra/fuse/fuse-tegra30.c
index 4d2f71b..882607b 100644
--- a/drivers/soc/tegra/fuse/fuse-tegra30.c
+++ b/drivers/soc/tegra/fuse/fuse-tegra30.c
@@ -42,114 +42,34 @@
 
 #define FUSE_HAS_REVISION_INFO	BIT(0)
 
-enum speedo_idx {
-	SPEEDO_TEGRA30 = 0,
-	SPEEDO_TEGRA114,
-	SPEEDO_TEGRA124,
-};
-
-struct tegra_fuse_info {
-	int		size;
-	int		spare_bit;
-	enum speedo_idx	speedo_idx;
-};
-
-static void __iomem *fuse_base;
-static struct clk *fuse_clk;
-static const struct tegra_fuse_info *fuse_info;
-
-u32 tegra30_fuse_readl(const unsigned int offset)
+#if defined(CONFIG_ARCH_TEGRA_3x_SOC) || \
+    defined(CONFIG_ARCH_TEGRA_114_SOC) || \
+    defined(CONFIG_ARCH_TEGRA_124_SOC) || \
+    defined(CONFIG_ARCH_TEGRA_132_SOC) || \
+    defined(CONFIG_ARCH_TEGRA_210_SOC)
+static u32 tegra30_fuse_read_early(struct tegra_fuse *fuse, unsigned int offset)
 {
-	u32 val;
-
-	/*
-	 * early in the boot, the fuse clock will be enabled by
-	 * tegra_init_fuse()
-	 */
-
-	if (fuse_clk)
-		clk_prepare_enable(fuse_clk);
-
-	val = readl_relaxed(fuse_base + FUSE_BEGIN + offset);
-
-	if (fuse_clk)
-		clk_disable_unprepare(fuse_clk);
-
-	return val;
+	return readl_relaxed(fuse->base + FUSE_BEGIN + offset);
 }
 
-static const struct tegra_fuse_info tegra30_info = {
-	.size			= 0x2a4,
-	.spare_bit		= 0x144,
-	.speedo_idx		= SPEEDO_TEGRA30,
-};
-
-static const struct tegra_fuse_info tegra114_info = {
-	.size			= 0x2a0,
-	.speedo_idx		= SPEEDO_TEGRA114,
-};
-
-static const struct tegra_fuse_info tegra124_info = {
-	.size			= 0x300,
-	.speedo_idx		= SPEEDO_TEGRA124,
-};
-
-static const struct of_device_id tegra30_fuse_of_match[] = {
-	{ .compatible = "nvidia,tegra30-efuse", .data = &tegra30_info },
-	{ .compatible = "nvidia,tegra114-efuse", .data = &tegra114_info },
-	{ .compatible = "nvidia,tegra124-efuse", .data = &tegra124_info },
-	{},
-};
-
-static int tegra30_fuse_probe(struct platform_device *pdev)
+static u32 tegra30_fuse_read(struct tegra_fuse *fuse, unsigned int offset)
 {
-	const struct of_device_id *of_dev_id;
+	u32 value;
+	int err;
 
-	of_dev_id = of_match_device(tegra30_fuse_of_match, &pdev->dev);
-	if (!of_dev_id)
-		return -ENODEV;
-
-	fuse_clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(fuse_clk)) {
-		dev_err(&pdev->dev, "missing clock");
-		return PTR_ERR(fuse_clk);
+	err = clk_prepare_enable(fuse->clk);
+	if (err < 0) {
+		dev_err(fuse->dev, "failed to enable FUSE clock: %d\n", err);
+		return 0;
 	}
 
-	platform_set_drvdata(pdev, NULL);
+	value = readl_relaxed(fuse->base + FUSE_BEGIN + offset);
 
-	if (tegra_fuse_create_sysfs(&pdev->dev, fuse_info->size,
-				    tegra30_fuse_readl))
-		return -ENODEV;
+	clk_disable_unprepare(fuse->clk);
 
-	dev_dbg(&pdev->dev, "loaded\n");
-
-	return 0;
+	return value;
 }
 
-static struct platform_driver tegra30_fuse_driver = {
-	.probe = tegra30_fuse_probe,
-	.driver = {
-		.name = "tegra_fuse",
-		.of_match_table = tegra30_fuse_of_match,
-	}
-};
-
-static int __init tegra30_fuse_init(void)
-{
-	return platform_driver_register(&tegra30_fuse_driver);
-}
-postcore_initcall(tegra30_fuse_init);
-
-/* Early boot code. This code is called before the devices are created */
-
-typedef void (*speedo_f)(struct tegra_sku_info *sku_info);
-
-static speedo_f __initdata speedo_tbl[] = {
-	[SPEEDO_TEGRA30]	= tegra30_init_speedo_data,
-	[SPEEDO_TEGRA114]	= tegra114_init_speedo_data,
-	[SPEEDO_TEGRA124]	= tegra124_init_speedo_data,
-};
-
 static void __init tegra30_fuse_add_randomness(void)
 {
 	u32 randomness[12];
@@ -158,67 +78,83 @@
 	randomness[1] = tegra_read_straps();
 	randomness[2] = tegra_read_chipid();
 	randomness[3] = tegra_sku_info.cpu_process_id << 16;
-	randomness[3] |= tegra_sku_info.core_process_id;
+	randomness[3] |= tegra_sku_info.soc_process_id;
 	randomness[4] = tegra_sku_info.cpu_speedo_id << 16;
 	randomness[4] |= tegra_sku_info.soc_speedo_id;
-	randomness[5] = tegra30_fuse_readl(FUSE_VENDOR_CODE);
-	randomness[6] = tegra30_fuse_readl(FUSE_FAB_CODE);
-	randomness[7] = tegra30_fuse_readl(FUSE_LOT_CODE_0);
-	randomness[8] = tegra30_fuse_readl(FUSE_LOT_CODE_1);
-	randomness[9] = tegra30_fuse_readl(FUSE_WAFER_ID);
-	randomness[10] = tegra30_fuse_readl(FUSE_X_COORDINATE);
-	randomness[11] = tegra30_fuse_readl(FUSE_Y_COORDINATE);
+	randomness[5] = tegra_fuse_read_early(FUSE_VENDOR_CODE);
+	randomness[6] = tegra_fuse_read_early(FUSE_FAB_CODE);
+	randomness[7] = tegra_fuse_read_early(FUSE_LOT_CODE_0);
+	randomness[8] = tegra_fuse_read_early(FUSE_LOT_CODE_1);
+	randomness[9] = tegra_fuse_read_early(FUSE_WAFER_ID);
+	randomness[10] = tegra_fuse_read_early(FUSE_X_COORDINATE);
+	randomness[11] = tegra_fuse_read_early(FUSE_Y_COORDINATE);
 
 	add_device_randomness(randomness, sizeof(randomness));
 }
 
-static void __init legacy_fuse_init(void)
+static void __init tegra30_fuse_init(struct tegra_fuse *fuse)
 {
-	switch (tegra_get_chip_id()) {
-	case TEGRA30:
-		fuse_info = &tegra30_info;
-		break;
-	case TEGRA114:
-		fuse_info = &tegra114_info;
-		break;
-	case TEGRA124:
-	case TEGRA132:
-		fuse_info = &tegra124_info;
-		break;
-	default:
-		return;
-	}
-
-	fuse_base = ioremap(TEGRA_FUSE_BASE, TEGRA_FUSE_SIZE);
-}
-
-bool __init tegra30_spare_fuse(int spare_bit)
-{
-	u32 offset = fuse_info->spare_bit + spare_bit * 4;
-
-	return tegra30_fuse_readl(offset) & 1;
-}
-
-void __init tegra30_init_fuse_early(void)
-{
-	struct device_node *np;
-	const struct of_device_id *of_match;
-
-	np = of_find_matching_node_and_match(NULL, tegra30_fuse_of_match,
-						&of_match);
-	if (np) {
-		fuse_base = of_iomap(np, 0);
-		fuse_info = (struct tegra_fuse_info *)of_match->data;
-	} else
-		legacy_fuse_init();
-
-	if (!fuse_base) {
-		pr_warn("fuse DT node missing and unknown chip id: 0x%02x\n",
-			tegra_get_chip_id());
-		return;
-	}
+	fuse->read_early = tegra30_fuse_read_early;
+	fuse->read = tegra30_fuse_read;
 
 	tegra_init_revision();
-	speedo_tbl[fuse_info->speedo_idx](&tegra_sku_info);
+	fuse->soc->speedo_init(&tegra_sku_info);
 	tegra30_fuse_add_randomness();
 }
+#endif
+
+#ifdef CONFIG_ARCH_TEGRA_3x_SOC
+static const struct tegra_fuse_info tegra30_fuse_info = {
+	.read = tegra30_fuse_read,
+	.size = 0x2a4,
+	.spare = 0x144,
+};
+
+const struct tegra_fuse_soc tegra30_fuse_soc = {
+	.init = tegra30_fuse_init,
+	.speedo_init = tegra30_init_speedo_data,
+	.info = &tegra30_fuse_info,
+};
+#endif
+
+#ifdef CONFIG_ARCH_TEGRA_114_SOC
+static const struct tegra_fuse_info tegra114_fuse_info = {
+	.read = tegra30_fuse_read,
+	.size = 0x2a0,
+	.spare = 0x180,
+};
+
+const struct tegra_fuse_soc tegra114_fuse_soc = {
+	.init = tegra30_fuse_init,
+	.speedo_init = tegra114_init_speedo_data,
+	.info = &tegra114_fuse_info,
+};
+#endif
+
+#if defined(CONFIG_ARCH_TEGRA_124_SOC) || defined(CONFIG_ARCH_TEGRA_132_SOC)
+static const struct tegra_fuse_info tegra124_fuse_info = {
+	.read = tegra30_fuse_read,
+	.size = 0x300,
+	.spare = 0x200,
+};
+
+const struct tegra_fuse_soc tegra124_fuse_soc = {
+	.init = tegra30_fuse_init,
+	.speedo_init = tegra124_init_speedo_data,
+	.info = &tegra124_fuse_info,
+};
+#endif
+
+#if defined(CONFIG_ARCH_TEGRA_210_SOC)
+static const struct tegra_fuse_info tegra210_fuse_info = {
+	.read = tegra30_fuse_read,
+	.size = 0x300,
+	.spare = 0x280,
+};
+
+const struct tegra_fuse_soc tegra210_fuse_soc = {
+	.init = tegra30_fuse_init,
+	.speedo_init = tegra210_init_speedo_data,
+	.info = &tegra210_fuse_info,
+};
+#endif
diff --git a/drivers/soc/tegra/fuse/fuse.h b/drivers/soc/tegra/fuse/fuse.h
index 3a398bf3..10c2076 100644
--- a/drivers/soc/tegra/fuse/fuse.h
+++ b/drivers/soc/tegra/fuse/fuse.h
@@ -19,53 +19,90 @@
 #ifndef __DRIVERS_MISC_TEGRA_FUSE_H
 #define __DRIVERS_MISC_TEGRA_FUSE_H
 
-#define TEGRA_FUSE_BASE	0x7000f800
-#define TEGRA_FUSE_SIZE	0x400
+#include <linux/dmaengine.h>
+#include <linux/types.h>
 
-int tegra_fuse_create_sysfs(struct device *dev, int size,
-		     u32 (*readl)(const unsigned int offset));
+struct tegra_fuse;
 
-bool tegra30_spare_fuse(int bit);
-u32 tegra30_fuse_readl(const unsigned int offset);
-void tegra30_init_fuse_early(void);
+struct tegra_fuse_info {
+	u32 (*read)(struct tegra_fuse *fuse, unsigned int offset);
+	unsigned int size;
+	unsigned int spare;
+};
+
+struct tegra_fuse_soc {
+	void (*init)(struct tegra_fuse *fuse);
+	void (*speedo_init)(struct tegra_sku_info *info);
+	int (*probe)(struct tegra_fuse *fuse);
+
+	const struct tegra_fuse_info *info;
+};
+
+struct tegra_fuse {
+	struct device *dev;
+	void __iomem *base;
+	phys_addr_t phys;
+	struct clk *clk;
+
+	u32 (*read_early)(struct tegra_fuse *fuse, unsigned int offset);
+	u32 (*read)(struct tegra_fuse *fuse, unsigned int offset);
+	const struct tegra_fuse_soc *soc;
+
+	/* APBDMA on Tegra20 */
+	struct {
+		struct mutex lock;
+		struct completion wait;
+		struct dma_chan *chan;
+		struct dma_slave_config config;
+		dma_addr_t phys;
+		u32 *virt;
+	} apbdma;
+};
+
 void tegra_init_revision(void);
 void tegra_init_apbmisc(void);
 
+bool __init tegra_fuse_read_spare(unsigned int spare);
+u32 __init tegra_fuse_read_early(unsigned int offset);
+
 #ifdef CONFIG_ARCH_TEGRA_2x_SOC
 void tegra20_init_speedo_data(struct tegra_sku_info *sku_info);
-bool tegra20_spare_fuse_early(int spare_bit);
-void tegra20_init_fuse_early(void);
-u32 tegra20_fuse_early(const unsigned int offset);
-#else
-static inline void tegra20_init_speedo_data(struct tegra_sku_info *sku_info) {}
-static inline bool tegra20_spare_fuse_early(int spare_bit)
-{
-	return false;
-}
-static inline void tegra20_init_fuse_early(void) {}
-static inline u32 tegra20_fuse_early(const unsigned int offset)
-{
-	return 0;
-}
 #endif
 
-
 #ifdef CONFIG_ARCH_TEGRA_3x_SOC
 void tegra30_init_speedo_data(struct tegra_sku_info *sku_info);
-#else
-static inline void tegra30_init_speedo_data(struct tegra_sku_info *sku_info) {}
 #endif
 
 #ifdef CONFIG_ARCH_TEGRA_114_SOC
 void tegra114_init_speedo_data(struct tegra_sku_info *sku_info);
-#else
-static inline void tegra114_init_speedo_data(struct tegra_sku_info *sku_info) {}
 #endif
 
-#ifdef CONFIG_ARCH_TEGRA_124_SOC
+#if defined(CONFIG_ARCH_TEGRA_124_SOC) || defined(CONFIG_ARCH_TEGRA_132_SOC)
 void tegra124_init_speedo_data(struct tegra_sku_info *sku_info);
-#else
-static inline void tegra124_init_speedo_data(struct tegra_sku_info *sku_info) {}
+#endif
+
+#ifdef CONFIG_ARCH_TEGRA_210_SOC
+void tegra210_init_speedo_data(struct tegra_sku_info *sku_info);
+#endif
+
+#ifdef CONFIG_ARCH_TEGRA_2x_SOC
+extern const struct tegra_fuse_soc tegra20_fuse_soc;
+#endif
+
+#ifdef CONFIG_ARCH_TEGRA_3x_SOC
+extern const struct tegra_fuse_soc tegra30_fuse_soc;
+#endif
+
+#ifdef CONFIG_ARCH_TEGRA_114_SOC
+extern const struct tegra_fuse_soc tegra114_fuse_soc;
+#endif
+
+#if defined(CONFIG_ARCH_TEGRA_124_SOC) || defined(CONFIG_ARCH_TEGRA_132_SOC)
+extern const struct tegra_fuse_soc tegra124_fuse_soc;
+#endif
+
+#ifdef CONFIG_ARCH_TEGRA_210_SOC
+extern const struct tegra_fuse_soc tegra210_fuse_soc;
 #endif
 
 #endif
diff --git a/drivers/soc/tegra/fuse/speedo-tegra114.c b/drivers/soc/tegra/fuse/speedo-tegra114.c
index 2a6ca03..1ba41eb 100644
--- a/drivers/soc/tegra/fuse/speedo-tegra114.c
+++ b/drivers/soc/tegra/fuse/speedo-tegra114.c
@@ -22,7 +22,7 @@
 
 #include "fuse.h"
 
-#define CORE_PROCESS_CORNERS	2
+#define SOC_PROCESS_CORNERS	2
 #define CPU_PROCESS_CORNERS	2
 
 enum {
@@ -31,7 +31,7 @@
 	THRESHOLD_INDEX_COUNT,
 };
 
-static const u32 __initconst core_process_speedos[][CORE_PROCESS_CORNERS] = {
+static const u32 __initconst soc_process_speedos[][SOC_PROCESS_CORNERS] = {
 	{1123,     UINT_MAX},
 	{0,        UINT_MAX},
 };
@@ -74,8 +74,8 @@
 	}
 
 	if (rev == TEGRA_REVISION_A01) {
-		tmp = tegra30_fuse_readl(0x270) << 1;
-		tmp |= tegra30_fuse_readl(0x26c);
+		tmp = tegra_fuse_read_early(0x270) << 1;
+		tmp |= tegra_fuse_read_early(0x26c);
 		if (!tmp)
 			sku_info->cpu_speedo_id = 0;
 	}
@@ -84,27 +84,27 @@
 void __init tegra114_init_speedo_data(struct tegra_sku_info *sku_info)
 {
 	u32 cpu_speedo_val;
-	u32 core_speedo_val;
+	u32 soc_speedo_val;
 	int threshold;
 	int i;
 
 	BUILD_BUG_ON(ARRAY_SIZE(cpu_process_speedos) !=
 			THRESHOLD_INDEX_COUNT);
-	BUILD_BUG_ON(ARRAY_SIZE(core_process_speedos) !=
+	BUILD_BUG_ON(ARRAY_SIZE(soc_process_speedos) !=
 			THRESHOLD_INDEX_COUNT);
 
 	rev_sku_to_speedo_ids(sku_info, &threshold);
 
-	cpu_speedo_val = tegra30_fuse_readl(0x12c) + 1024;
-	core_speedo_val = tegra30_fuse_readl(0x134);
+	cpu_speedo_val = tegra_fuse_read_early(0x12c) + 1024;
+	soc_speedo_val = tegra_fuse_read_early(0x134);
 
 	for (i = 0; i < CPU_PROCESS_CORNERS; i++)
 		if (cpu_speedo_val < cpu_process_speedos[threshold][i])
 			break;
 	sku_info->cpu_process_id = i;
 
-	for (i = 0; i < CORE_PROCESS_CORNERS; i++)
-		if (core_speedo_val < core_process_speedos[threshold][i])
+	for (i = 0; i < SOC_PROCESS_CORNERS; i++)
+		if (soc_speedo_val < soc_process_speedos[threshold][i])
 			break;
-	sku_info->core_process_id = i;
+	sku_info->soc_process_id = i;
 }
diff --git a/drivers/soc/tegra/fuse/speedo-tegra124.c b/drivers/soc/tegra/fuse/speedo-tegra124.c
index 4636238..a63a134 100644
--- a/drivers/soc/tegra/fuse/speedo-tegra124.c
+++ b/drivers/soc/tegra/fuse/speedo-tegra124.c
@@ -24,7 +24,7 @@
 
 #define CPU_PROCESS_CORNERS	2
 #define GPU_PROCESS_CORNERS	2
-#define CORE_PROCESS_CORNERS	2
+#define SOC_PROCESS_CORNERS	2
 
 #define FUSE_CPU_SPEEDO_0	0x14
 #define FUSE_CPU_SPEEDO_1	0x2c
@@ -53,7 +53,7 @@
 	{0,	UINT_MAX},
 };
 
-static const u32 __initconst core_process_speedos[][CORE_PROCESS_CORNERS] = {
+static const u32 __initconst soc_process_speedos[][SOC_PROCESS_CORNERS] = {
 	{2101,	UINT_MAX},
 	{0,	UINT_MAX},
 };
@@ -119,19 +119,19 @@
 			THRESHOLD_INDEX_COUNT);
 	BUILD_BUG_ON(ARRAY_SIZE(gpu_process_speedos) !=
 			THRESHOLD_INDEX_COUNT);
-	BUILD_BUG_ON(ARRAY_SIZE(core_process_speedos) !=
+	BUILD_BUG_ON(ARRAY_SIZE(soc_process_speedos) !=
 			THRESHOLD_INDEX_COUNT);
 
-	cpu_speedo_0_value = tegra30_fuse_readl(FUSE_CPU_SPEEDO_0);
+	cpu_speedo_0_value = tegra_fuse_read_early(FUSE_CPU_SPEEDO_0);
 
 	/* GPU Speedo is stored in CPU_SPEEDO_2 */
-	sku_info->gpu_speedo_value = tegra30_fuse_readl(FUSE_CPU_SPEEDO_2);
+	sku_info->gpu_speedo_value = tegra_fuse_read_early(FUSE_CPU_SPEEDO_2);
 
-	soc_speedo_0_value = tegra30_fuse_readl(FUSE_SOC_SPEEDO_0);
+	soc_speedo_0_value = tegra_fuse_read_early(FUSE_SOC_SPEEDO_0);
 
-	cpu_iddq_value = tegra30_fuse_readl(FUSE_CPU_IDDQ);
-	soc_iddq_value = tegra30_fuse_readl(FUSE_SOC_IDDQ);
-	gpu_iddq_value = tegra30_fuse_readl(FUSE_GPU_IDDQ);
+	cpu_iddq_value = tegra_fuse_read_early(FUSE_CPU_IDDQ);
+	soc_iddq_value = tegra_fuse_read_early(FUSE_SOC_IDDQ);
+	gpu_iddq_value = tegra_fuse_read_early(FUSE_GPU_IDDQ);
 
 	sku_info->cpu_speedo_value = cpu_speedo_0_value;
 
@@ -143,7 +143,7 @@
 
 	rev_sku_to_speedo_ids(sku_info, &threshold);
 
-	sku_info->cpu_iddq_value = tegra30_fuse_readl(FUSE_CPU_IDDQ);
+	sku_info->cpu_iddq_value = tegra_fuse_read_early(FUSE_CPU_IDDQ);
 
 	for (i = 0; i < GPU_PROCESS_CORNERS; i++)
 		if (sku_info->gpu_speedo_value <
@@ -157,11 +157,11 @@
 				break;
 	sku_info->cpu_process_id = i;
 
-	for (i = 0; i < CORE_PROCESS_CORNERS; i++)
+	for (i = 0; i < SOC_PROCESS_CORNERS; i++)
 		if (soc_speedo_0_value <
-			core_process_speedos[threshold][i])
+			soc_process_speedos[threshold][i])
 			break;
-	sku_info->core_process_id = i;
+	sku_info->soc_process_id = i;
 
 	pr_debug("Tegra GPU Speedo ID=%d, Speedo Value=%d\n",
 		 sku_info->gpu_speedo_id, sku_info->gpu_speedo_value);
diff --git a/drivers/soc/tegra/fuse/speedo-tegra20.c b/drivers/soc/tegra/fuse/speedo-tegra20.c
index eff1b63..5f7818b 100644
--- a/drivers/soc/tegra/fuse/speedo-tegra20.c
+++ b/drivers/soc/tegra/fuse/speedo-tegra20.c
@@ -28,11 +28,11 @@
 #define CPU_SPEEDO_REDUND_MSBIT		39
 #define CPU_SPEEDO_REDUND_OFFS	(CPU_SPEEDO_REDUND_MSBIT - CPU_SPEEDO_MSBIT)
 
-#define CORE_SPEEDO_LSBIT		40
-#define CORE_SPEEDO_MSBIT		47
-#define CORE_SPEEDO_REDUND_LSBIT	48
-#define CORE_SPEEDO_REDUND_MSBIT	55
-#define CORE_SPEEDO_REDUND_OFFS	(CORE_SPEEDO_REDUND_MSBIT - CORE_SPEEDO_MSBIT)
+#define SOC_SPEEDO_LSBIT		40
+#define SOC_SPEEDO_MSBIT		47
+#define SOC_SPEEDO_REDUND_LSBIT		48
+#define SOC_SPEEDO_REDUND_MSBIT		55
+#define SOC_SPEEDO_REDUND_OFFS	(SOC_SPEEDO_REDUND_MSBIT - SOC_SPEEDO_MSBIT)
 
 #define SPEEDO_MULT			4
 
@@ -56,7 +56,7 @@
 	{316, 331, 383, UINT_MAX},
 };
 
-static const u32 __initconst core_process_speedos[][PROCESS_CORNERS_NUM] = {
+static const u32 __initconst soc_process_speedos[][PROCESS_CORNERS_NUM] = {
 	{165, 195, 224, UINT_MAX},
 	{165, 195, 224, UINT_MAX},
 	{165, 195, 224, UINT_MAX},
@@ -69,7 +69,7 @@
 	int i;
 
 	BUILD_BUG_ON(ARRAY_SIZE(cpu_process_speedos) != SPEEDO_ID_COUNT);
-	BUILD_BUG_ON(ARRAY_SIZE(core_process_speedos) != SPEEDO_ID_COUNT);
+	BUILD_BUG_ON(ARRAY_SIZE(soc_process_speedos) != SPEEDO_ID_COUNT);
 
 	if (SPEEDO_ID_SELECT_0(sku_info->revision))
 		sku_info->soc_speedo_id = SPEEDO_ID_0;
@@ -80,8 +80,8 @@
 
 	val = 0;
 	for (i = CPU_SPEEDO_MSBIT; i >= CPU_SPEEDO_LSBIT; i--) {
-		reg = tegra20_spare_fuse_early(i) |
-			tegra20_spare_fuse_early(i + CPU_SPEEDO_REDUND_OFFS);
+		reg = tegra_fuse_read_spare(i) |
+			tegra_fuse_read_spare(i + CPU_SPEEDO_REDUND_OFFS);
 		val = (val << 1) | (reg & 0x1);
 	}
 	val = val * SPEEDO_MULT;
@@ -94,17 +94,17 @@
 	sku_info->cpu_process_id = i;
 
 	val = 0;
-	for (i = CORE_SPEEDO_MSBIT; i >= CORE_SPEEDO_LSBIT; i--) {
-		reg = tegra20_spare_fuse_early(i) |
-			tegra20_spare_fuse_early(i + CORE_SPEEDO_REDUND_OFFS);
+	for (i = SOC_SPEEDO_MSBIT; i >= SOC_SPEEDO_LSBIT; i--) {
+		reg = tegra_fuse_read_spare(i) |
+			tegra_fuse_read_spare(i + SOC_SPEEDO_REDUND_OFFS);
 		val = (val << 1) | (reg & 0x1);
 	}
 	val = val * SPEEDO_MULT;
 	pr_debug("Core speedo value %u\n", val);
 
 	for (i = 0; i < (PROCESS_CORNERS_NUM - 1); i++) {
-		if (val <= core_process_speedos[sku_info->soc_speedo_id][i])
+		if (val <= soc_process_speedos[sku_info->soc_speedo_id][i])
 			break;
 	}
-	sku_info->core_process_id = i;
+	sku_info->soc_process_id = i;
 }
diff --git a/drivers/soc/tegra/fuse/speedo-tegra210.c b/drivers/soc/tegra/fuse/speedo-tegra210.c
new file mode 100644
index 0000000..5373f4c
--- /dev/null
+++ b/drivers/soc/tegra/fuse/speedo-tegra210.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2013-2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+
+#include <soc/tegra/fuse.h>
+
+#include "fuse.h"
+
+#define CPU_PROCESS_CORNERS	2
+#define GPU_PROCESS_CORNERS	2
+#define SOC_PROCESS_CORNERS	3
+
+#define FUSE_CPU_SPEEDO_0	0x014
+#define FUSE_CPU_SPEEDO_1	0x02c
+#define FUSE_CPU_SPEEDO_2	0x030
+#define FUSE_SOC_SPEEDO_0	0x034
+#define FUSE_SOC_SPEEDO_1	0x038
+#define FUSE_SOC_SPEEDO_2	0x03c
+#define FUSE_CPU_IDDQ		0x018
+#define FUSE_SOC_IDDQ		0x040
+#define FUSE_GPU_IDDQ		0x128
+#define FUSE_FT_REV		0x028
+
+enum {
+	THRESHOLD_INDEX_0,
+	THRESHOLD_INDEX_1,
+	THRESHOLD_INDEX_COUNT,
+};
+
+static const u32 __initconst cpu_process_speedos[][CPU_PROCESS_CORNERS] = {
+	{ 2119, UINT_MAX },
+	{ 2119, UINT_MAX },
+};
+
+static const u32 __initconst gpu_process_speedos[][GPU_PROCESS_CORNERS] = {
+	{ UINT_MAX, UINT_MAX },
+	{ UINT_MAX, UINT_MAX },
+};
+
+static const u32 __initconst soc_process_speedos[][SOC_PROCESS_CORNERS] = {
+	{ 1950, 2100, UINT_MAX },
+	{ 1950, 2100, UINT_MAX },
+};
+
+static u8 __init get_speedo_revision(void)
+{
+	return tegra_fuse_read_spare(4) << 2 |
+	       tegra_fuse_read_spare(3) << 1 |
+	       tegra_fuse_read_spare(2) << 0;
+}
+
+static void __init rev_sku_to_speedo_ids(struct tegra_sku_info *sku_info,
+					 u8 speedo_rev, int *threshold)
+{
+	int sku = sku_info->sku_id;
+
+	/* Assign to default */
+	sku_info->cpu_speedo_id = 0;
+	sku_info->soc_speedo_id = 0;
+	sku_info->gpu_speedo_id = 0;
+	*threshold = THRESHOLD_INDEX_0;
+
+	switch (sku) {
+	case 0x00: /* Engineering SKU */
+	case 0x01: /* Engineering SKU */
+	case 0x07:
+	case 0x17:
+	case 0x27:
+		if (speedo_rev >= 2)
+			sku_info->gpu_speedo_id = 1;
+		break;
+
+	case 0x13:
+		if (speedo_rev >= 2)
+			sku_info->gpu_speedo_id = 1;
+
+		sku_info->cpu_speedo_id = 1;
+		break;
+
+	default:
+		pr_err("Tegra210: unknown SKU %#04x\n", sku);
+		/* Using the default for the error case */
+		break;
+	}
+}
+
+static int get_process_id(int value, const u32 *speedos, unsigned int num)
+{
+	unsigned int i;
+
+	for (i = 0; i < num; i++)
+		if (value < speedos[num])
+			return i;
+
+	return -EINVAL;
+}
+
+void __init tegra210_init_speedo_data(struct tegra_sku_info *sku_info)
+{
+	int cpu_speedo[3], soc_speedo[3], cpu_iddq, gpu_iddq, soc_iddq;
+	unsigned int index;
+	u8 speedo_revision;
+
+	BUILD_BUG_ON(ARRAY_SIZE(cpu_process_speedos) !=
+			THRESHOLD_INDEX_COUNT);
+	BUILD_BUG_ON(ARRAY_SIZE(gpu_process_speedos) !=
+			THRESHOLD_INDEX_COUNT);
+	BUILD_BUG_ON(ARRAY_SIZE(soc_process_speedos) !=
+			THRESHOLD_INDEX_COUNT);
+
+	/* Read speedo/IDDQ fuses */
+	cpu_speedo[0] = tegra_fuse_read_early(FUSE_CPU_SPEEDO_0);
+	cpu_speedo[1] = tegra_fuse_read_early(FUSE_CPU_SPEEDO_1);
+	cpu_speedo[2] = tegra_fuse_read_early(FUSE_CPU_SPEEDO_2);
+
+	soc_speedo[0] = tegra_fuse_read_early(FUSE_SOC_SPEEDO_0);
+	soc_speedo[1] = tegra_fuse_read_early(FUSE_SOC_SPEEDO_1);
+	soc_speedo[2] = tegra_fuse_read_early(FUSE_CPU_SPEEDO_2);
+
+	cpu_iddq = tegra_fuse_read_early(FUSE_CPU_IDDQ) * 4;
+	soc_iddq = tegra_fuse_read_early(FUSE_SOC_IDDQ) * 4;
+	gpu_iddq = tegra_fuse_read_early(FUSE_GPU_IDDQ) * 5;
+
+	/*
+	 * Determine CPU, GPU and SoC speedo values depending on speedo fusing
+	 * revision. Note that GPU speedo value is fused in CPU_SPEEDO_2.
+	 */
+	speedo_revision = get_speedo_revision();
+	pr_info("Speedo Revision %u\n", speedo_revision);
+
+	if (speedo_revision >= 3) {
+		sku_info->cpu_speedo_value = cpu_speedo[0];
+		sku_info->gpu_speedo_value = cpu_speedo[2];
+		sku_info->soc_speedo_value = soc_speedo[0];
+	} else if (speedo_revision == 2) {
+		sku_info->cpu_speedo_value = (-1938 + (1095 * cpu_speedo[0] / 100)) / 10;
+		sku_info->gpu_speedo_value = (-1662 + (1082 * cpu_speedo[2] / 100)) / 10;
+		sku_info->soc_speedo_value = ( -705 + (1037 * soc_speedo[0] / 100)) / 10;
+	} else {
+		sku_info->cpu_speedo_value = 2100;
+		sku_info->gpu_speedo_value = cpu_speedo[2] - 75;
+		sku_info->soc_speedo_value = 1900;
+	}
+
+	if ((sku_info->cpu_speedo_value <= 0) ||
+	    (sku_info->gpu_speedo_value <= 0) ||
+	    (sku_info->soc_speedo_value <= 0)) {
+		WARN(1, "speedo value not fused\n");
+		return;
+	}
+
+	rev_sku_to_speedo_ids(sku_info, speedo_revision, &index);
+
+	sku_info->gpu_process_id = get_process_id(sku_info->gpu_speedo_value,
+						  gpu_process_speedos[index],
+						  GPU_PROCESS_CORNERS);
+
+	sku_info->cpu_process_id = get_process_id(sku_info->cpu_speedo_value,
+						  cpu_process_speedos[index],
+						  CPU_PROCESS_CORNERS);
+
+	sku_info->soc_process_id = get_process_id(sku_info->soc_speedo_value,
+						  soc_process_speedos[index],
+						  SOC_PROCESS_CORNERS);
+
+	pr_debug("Tegra GPU Speedo ID=%d, Speedo Value=%d\n",
+		 sku_info->gpu_speedo_id, sku_info->gpu_speedo_value);
+}
diff --git a/drivers/soc/tegra/fuse/speedo-tegra30.c b/drivers/soc/tegra/fuse/speedo-tegra30.c
index b17f0dc..9b010b3 100644
--- a/drivers/soc/tegra/fuse/speedo-tegra30.c
+++ b/drivers/soc/tegra/fuse/speedo-tegra30.c
@@ -22,7 +22,7 @@
 
 #include "fuse.h"
 
-#define CORE_PROCESS_CORNERS	1
+#define SOC_PROCESS_CORNERS	1
 #define CPU_PROCESS_CORNERS	6
 
 #define FUSE_SPEEDO_CALIB_0	0x14
@@ -54,7 +54,7 @@
 	THRESHOLD_INDEX_COUNT,
 };
 
-static const u32 __initconst core_process_speedos[][CORE_PROCESS_CORNERS] = {
+static const u32 __initconst soc_process_speedos[][SOC_PROCESS_CORNERS] = {
 	{180},
 	{170},
 	{195},
@@ -93,25 +93,25 @@
 	int bit_minus1;
 	int bit_minus2;
 
-	reg = tegra30_fuse_readl(FUSE_SPEEDO_CALIB_0);
+	reg = tegra_fuse_read_early(FUSE_SPEEDO_CALIB_0);
 
 	*speedo_lp = (reg & 0xFFFF) * 4;
 	*speedo_g = ((reg >> 16) & 0xFFFF) * 4;
 
-	ate_ver = tegra30_fuse_readl(FUSE_TEST_PROG_VER);
+	ate_ver = tegra_fuse_read_early(FUSE_TEST_PROG_VER);
 	pr_debug("Tegra ATE prog ver %d.%d\n", ate_ver/10, ate_ver%10);
 
 	if (ate_ver >= 26) {
-		bit_minus1 = tegra30_spare_fuse(LP_SPEEDO_BIT_MINUS1);
-		bit_minus1 |= tegra30_spare_fuse(LP_SPEEDO_BIT_MINUS1_R);
-		bit_minus2 = tegra30_spare_fuse(LP_SPEEDO_BIT_MINUS2);
-		bit_minus2 |= tegra30_spare_fuse(LP_SPEEDO_BIT_MINUS2_R);
+		bit_minus1 = tegra_fuse_read_spare(LP_SPEEDO_BIT_MINUS1);
+		bit_minus1 |= tegra_fuse_read_spare(LP_SPEEDO_BIT_MINUS1_R);
+		bit_minus2 = tegra_fuse_read_spare(LP_SPEEDO_BIT_MINUS2);
+		bit_minus2 |= tegra_fuse_read_spare(LP_SPEEDO_BIT_MINUS2_R);
 		*speedo_lp |= (bit_minus1 << 1) | bit_minus2;
 
-		bit_minus1 = tegra30_spare_fuse(G_SPEEDO_BIT_MINUS1);
-		bit_minus1 |= tegra30_spare_fuse(G_SPEEDO_BIT_MINUS1_R);
-		bit_minus2 = tegra30_spare_fuse(G_SPEEDO_BIT_MINUS2);
-		bit_minus2 |= tegra30_spare_fuse(G_SPEEDO_BIT_MINUS2_R);
+		bit_minus1 = tegra_fuse_read_spare(G_SPEEDO_BIT_MINUS1);
+		bit_minus1 |= tegra_fuse_read_spare(G_SPEEDO_BIT_MINUS1_R);
+		bit_minus2 = tegra_fuse_read_spare(G_SPEEDO_BIT_MINUS2);
+		bit_minus2 |= tegra_fuse_read_spare(G_SPEEDO_BIT_MINUS2_R);
 		*speedo_g |= (bit_minus1 << 1) | bit_minus2;
 	} else {
 		*speedo_lp |= 0x3;
@@ -121,7 +121,7 @@
 
 static void __init rev_sku_to_speedo_ids(struct tegra_sku_info *sku_info)
 {
-	int package_id = tegra30_fuse_readl(FUSE_PACKAGE_INFO) & 0x0F;
+	int package_id = tegra_fuse_read_early(FUSE_PACKAGE_INFO) & 0x0F;
 
 	switch (sku_info->revision) {
 	case TEGRA_REVISION_A01:
@@ -246,19 +246,19 @@
 void __init tegra30_init_speedo_data(struct tegra_sku_info *sku_info)
 {
 	u32 cpu_speedo_val;
-	u32 core_speedo_val;
+	u32 soc_speedo_val;
 	int i;
 
 	BUILD_BUG_ON(ARRAY_SIZE(cpu_process_speedos) !=
 			THRESHOLD_INDEX_COUNT);
-	BUILD_BUG_ON(ARRAY_SIZE(core_process_speedos) !=
+	BUILD_BUG_ON(ARRAY_SIZE(soc_process_speedos) !=
 			THRESHOLD_INDEX_COUNT);
 
 
 	rev_sku_to_speedo_ids(sku_info);
-	fuse_speedo_calib(&cpu_speedo_val, &core_speedo_val);
+	fuse_speedo_calib(&cpu_speedo_val, &soc_speedo_val);
 	pr_debug("Tegra CPU speedo value %u\n", cpu_speedo_val);
-	pr_debug("Tegra Core speedo value %u\n", core_speedo_val);
+	pr_debug("Tegra Core speedo value %u\n", soc_speedo_val);
 
 	for (i = 0; i < CPU_PROCESS_CORNERS; i++) {
 		if (cpu_speedo_val < cpu_process_speedos[threshold_index][i])
@@ -273,16 +273,16 @@
 		sku_info->cpu_speedo_id = 1;
 	}
 
-	for (i = 0; i < CORE_PROCESS_CORNERS; i++) {
-		if (core_speedo_val < core_process_speedos[threshold_index][i])
+	for (i = 0; i < SOC_PROCESS_CORNERS; i++) {
+		if (soc_speedo_val < soc_process_speedos[threshold_index][i])
 			break;
 	}
-	sku_info->core_process_id = i - 1;
+	sku_info->soc_process_id = i - 1;
 
-	if (sku_info->core_process_id == -1) {
-		pr_warn("Tegra CORE speedo value %3d out of range",
-				 core_speedo_val);
-		sku_info->core_process_id = 0;
+	if (sku_info->soc_process_id == -1) {
+		pr_warn("Tegra SoC speedo value %3d out of range",
+			soc_speedo_val);
+		sku_info->soc_process_id = 0;
 		sku_info->soc_speedo_id = 1;
 	}
 }
diff --git a/drivers/soc/tegra/fuse/tegra-apbmisc.c b/drivers/soc/tegra/fuse/tegra-apbmisc.c
index 73fad05..5b18f6f 100644
--- a/drivers/soc/tegra/fuse/tegra-apbmisc.c
+++ b/drivers/soc/tegra/fuse/tegra-apbmisc.c
@@ -21,11 +21,10 @@
 #include <linux/io.h>
 
 #include <soc/tegra/fuse.h>
+#include <soc/tegra/common.h>
 
 #include "fuse.h"
 
-#define APBMISC_BASE	0x70000800
-#define APBMISC_SIZE	0x64
 #define FUSE_SKU_INFO	0x10
 
 #define PMC_STRAPPING_OPT_A_RAM_CODE_SHIFT	4
@@ -95,8 +94,8 @@
 		rev = TEGRA_REVISION_A02;
 		break;
 	case 3:
-		if (chip_id == TEGRA20 && (tegra20_spare_fuse_early(18) ||
-					   tegra20_spare_fuse_early(19)))
+		if (chip_id == TEGRA20 && (tegra_fuse_read_spare(18) ||
+					   tegra_fuse_read_spare(19)))
 			rev = TEGRA_REVISION_A03p;
 		else
 			rev = TEGRA_REVISION_A03;
@@ -110,27 +109,74 @@
 
 	tegra_sku_info.revision = rev;
 
-	if (chip_id == TEGRA20)
-		tegra_sku_info.sku_id = tegra20_fuse_early(FUSE_SKU_INFO);
-	else
-		tegra_sku_info.sku_id = tegra30_fuse_readl(FUSE_SKU_INFO);
+	tegra_sku_info.sku_id = tegra_fuse_read_early(FUSE_SKU_INFO);
 }
 
 void __init tegra_init_apbmisc(void)
 {
+	struct resource apbmisc, straps;
 	struct device_node *np;
 
 	np = of_find_matching_node(NULL, apbmisc_match);
-	apbmisc_base = of_iomap(np, 0);
-	if (!apbmisc_base) {
-		pr_warn("ioremap tegra apbmisc failed. using %08x instead\n",
-			APBMISC_BASE);
-		apbmisc_base = ioremap(APBMISC_BASE, APBMISC_SIZE);
+	if (!np) {
+		/*
+		 * Fall back to legacy initialization for 32-bit ARM only. All
+		 * 64-bit ARM device tree files for Tegra are required to have
+		 * an APBMISC node.
+		 *
+		 * This is for backwards-compatibility with old device trees
+		 * that didn't contain an APBMISC node.
+		 */
+		if (IS_ENABLED(CONFIG_ARM) && soc_is_tegra()) {
+			/* APBMISC registers (chip revision, ...) */
+			apbmisc.start = 0x70000800;
+			apbmisc.end = 0x70000863;
+			apbmisc.flags = IORESOURCE_MEM;
+
+			/* strapping options */
+			if (tegra_get_chip_id() == TEGRA124) {
+				straps.start = 0x7000e864;
+				straps.end = 0x7000e867;
+			} else {
+				straps.start = 0x70000008;
+				straps.end = 0x7000000b;
+			}
+
+			straps.flags = IORESOURCE_MEM;
+
+			pr_warn("Using APBMISC region %pR\n", &apbmisc);
+			pr_warn("Using strapping options registers %pR\n",
+				&straps);
+		} else {
+			/*
+			 * At this point we're not running on Tegra, so play
+			 * nice with multi-platform kernels.
+			 */
+			return;
+		}
+	} else {
+		/*
+		 * Extract information from the device tree if we've found a
+		 * matching node.
+		 */
+		if (of_address_to_resource(np, 0, &apbmisc) < 0) {
+			pr_err("failed to get APBMISC registers\n");
+			return;
+		}
+
+		if (of_address_to_resource(np, 1, &straps) < 0) {
+			pr_err("failed to get strapping options registers\n");
+			return;
+		}
 	}
 
-	strapping_base = of_iomap(np, 1);
+	apbmisc_base = ioremap_nocache(apbmisc.start, resource_size(&apbmisc));
+	if (!apbmisc_base)
+		pr_err("failed to map APBMISC registers\n");
+
+	strapping_base = ioremap_nocache(straps.start, resource_size(&straps));
 	if (!strapping_base)
-		pr_err("ioremap tegra strapping_base failed\n");
+		pr_err("failed to map strapping options registers\n");
 
 	long_ram_code = of_property_read_bool(np, "nvidia,long-ram-code");
 }
diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c
index 75d0457..bc34cf7 100644
--- a/drivers/soc/tegra/pmc.c
+++ b/drivers/soc/tegra/pmc.c
@@ -17,6 +17,8 @@
  *
  */
 
+#define pr_fmt(fmt) "tegra-pmc: " fmt
+
 #include <linux/kernel.h>
 #include <linux/clk.h>
 #include <linux/clk/tegra.h>
@@ -457,7 +459,6 @@
 				 unsigned long *status, unsigned int *bit)
 {
 	unsigned long rate, value;
-	struct clk *clk;
 
 	*bit = id % 32;
 
@@ -476,12 +477,7 @@
 		*request = IO_DPD2_REQ;
 	}
 
-	clk = clk_get_sys(NULL, "pclk");
-	if (IS_ERR(clk))
-		return PTR_ERR(clk);
-
-	rate = clk_get_rate(clk);
-	clk_put(clk);
+	rate = clk_get_rate(pmc->clk);
 
 	tegra_pmc_writel(DPD_SAMPLE_ENABLE, DPD_SAMPLE);
 
@@ -535,8 +531,10 @@
 	tegra_pmc_writel(value, request);
 
 	err = tegra_io_rail_poll(status, mask, 0, 250);
-	if (err < 0)
+	if (err < 0) {
+		pr_info("tegra_io_rail_poll() failed: %d\n", err);
 		return err;
+	}
 
 	tegra_io_rail_unprepare();
 
@@ -551,8 +549,10 @@
 	int err;
 
 	err = tegra_io_rail_prepare(id, &request, &status, &bit);
-	if (err < 0)
+	if (err < 0) {
+		pr_info("tegra_io_rail_prepare() failed: %d\n", err);
 		return err;
+	}
 
 	mask = 1 << bit;
 
@@ -736,12 +736,12 @@
 	u32 value, checksum;
 
 	if (!pmc->soc->has_tsense_reset)
-		goto out;
+		return;
 
 	np = of_find_node_by_name(pmc->dev->of_node, "i2c-thermtrip");
 	if (!np) {
 		dev_warn(dev, "i2c-thermtrip node not found, %s.\n", disabled);
-		goto out;
+		return;
 	}
 
 	if (of_property_read_u32(np, "nvidia,i2c-controller-id", &ctrl_id)) {
@@ -801,7 +801,6 @@
 
 out:
 	of_node_put(np);
-	return;
 }
 
 static int tegra_pmc_probe(struct platform_device *pdev)
@@ -1002,7 +1001,56 @@
 	.has_gpu_clamps = true,
 };
 
+static const char * const tegra210_powergates[] = {
+	[TEGRA_POWERGATE_CPU] = "crail",
+	[TEGRA_POWERGATE_3D] = "3d",
+	[TEGRA_POWERGATE_VENC] = "venc",
+	[TEGRA_POWERGATE_PCIE] = "pcie",
+	[TEGRA_POWERGATE_L2] = "l2",
+	[TEGRA_POWERGATE_MPE] = "mpe",
+	[TEGRA_POWERGATE_HEG] = "heg",
+	[TEGRA_POWERGATE_SATA] = "sata",
+	[TEGRA_POWERGATE_CPU1] = "cpu1",
+	[TEGRA_POWERGATE_CPU2] = "cpu2",
+	[TEGRA_POWERGATE_CPU3] = "cpu3",
+	[TEGRA_POWERGATE_CELP] = "celp",
+	[TEGRA_POWERGATE_CPU0] = "cpu0",
+	[TEGRA_POWERGATE_C0NC] = "c0nc",
+	[TEGRA_POWERGATE_C1NC] = "c1nc",
+	[TEGRA_POWERGATE_SOR] = "sor",
+	[TEGRA_POWERGATE_DIS] = "dis",
+	[TEGRA_POWERGATE_DISB] = "disb",
+	[TEGRA_POWERGATE_XUSBA] = "xusba",
+	[TEGRA_POWERGATE_XUSBB] = "xusbb",
+	[TEGRA_POWERGATE_XUSBC] = "xusbc",
+	[TEGRA_POWERGATE_VIC] = "vic",
+	[TEGRA_POWERGATE_IRAM] = "iram",
+	[TEGRA_POWERGATE_NVDEC] = "nvdec",
+	[TEGRA_POWERGATE_NVJPG] = "nvjpg",
+	[TEGRA_POWERGATE_AUD] = "aud",
+	[TEGRA_POWERGATE_DFD] = "dfd",
+	[TEGRA_POWERGATE_VE2] = "ve2",
+};
+
+static const u8 tegra210_cpu_powergates[] = {
+	TEGRA_POWERGATE_CPU0,
+	TEGRA_POWERGATE_CPU1,
+	TEGRA_POWERGATE_CPU2,
+	TEGRA_POWERGATE_CPU3,
+};
+
+static const struct tegra_pmc_soc tegra210_pmc_soc = {
+	.num_powergates = ARRAY_SIZE(tegra210_powergates),
+	.powergates = tegra210_powergates,
+	.num_cpu_powergates = ARRAY_SIZE(tegra210_cpu_powergates),
+	.cpu_powergates = tegra210_cpu_powergates,
+	.has_tsense_reset = true,
+	.has_gpu_clamps = true,
+};
+
 static const struct of_device_id tegra_pmc_match[] = {
+	{ .compatible = "nvidia,tegra210-pmc", .data = &tegra210_pmc_soc },
+	{ .compatible = "nvidia,tegra132-pmc", .data = &tegra124_pmc_soc },
 	{ .compatible = "nvidia,tegra124-pmc", .data = &tegra124_pmc_soc },
 	{ .compatible = "nvidia,tegra114-pmc", .data = &tegra114_pmc_soc },
 	{ .compatible = "nvidia,tegra30-pmc", .data = &tegra30_pmc_soc },
@@ -1035,25 +1083,44 @@
 	bool invert;
 	u32 value;
 
-	if (!soc_is_tegra())
-		return 0;
-
 	np = of_find_matching_node_and_match(NULL, tegra_pmc_match, &match);
 	if (!np) {
-		pr_warn("PMC device node not found, disabling powergating\n");
+		/*
+		 * Fall back to legacy initialization for 32-bit ARM only. All
+		 * 64-bit ARM device tree files for Tegra are required to have
+		 * a PMC node.
+		 *
+		 * This is for backwards-compatibility with old device trees
+		 * that didn't contain a PMC node. Note that in this case the
+		 * SoC data can't be matched and therefore powergating is
+		 * disabled.
+		 */
+		if (IS_ENABLED(CONFIG_ARM) && soc_is_tegra()) {
+			pr_warn("DT node not found, powergating disabled\n");
 
-		regs.start = 0x7000e400;
-		regs.end = 0x7000e7ff;
-		regs.flags = IORESOURCE_MEM;
+			regs.start = 0x7000e400;
+			regs.end = 0x7000e7ff;
+			regs.flags = IORESOURCE_MEM;
 
-		pr_warn("Using memory region %pR\n", &regs);
+			pr_warn("Using memory region %pR\n", &regs);
+		} else {
+			/*
+			 * At this point we're not running on Tegra, so play
+			 * nice with multi-platform kernels.
+			 */
+			return 0;
+		}
 	} else {
-		pmc->soc = match->data;
-	}
+		/*
+		 * Extract information from the device tree if we've found a
+		 * matching node.
+		 */
+		if (of_address_to_resource(np, 0, &regs) < 0) {
+			pr_err("failed to get PMC registers\n");
+			return -ENXIO;
+		}
 
-	if (of_address_to_resource(np, 0, &regs) < 0) {
-		pr_err("failed to get PMC registers\n");
-		return -ENXIO;
+		pmc->soc = match->data;
 	}
 
 	pmc->base = ioremap_nocache(regs.start, resource_size(&regs));
@@ -1064,6 +1131,10 @@
 
 	mutex_init(&pmc->powergates_lock);
 
+	/*
+	 * Invert the interrupt polarity if a PMC device tree node exists and
+	 * contains the nvidia,invert-interrupt property.
+	 */
 	invert = of_property_read_bool(np, "nvidia,invert-interrupt");
 
 	value = tegra_pmc_readl(PMC_CNTRL);
diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c
index 86621fa..735355b 100644
--- a/drivers/video/fbdev/stifb.c
+++ b/drivers/video/fbdev/stifb.c
@@ -121,6 +121,7 @@
 #define REG_3		0x0004a0
 #define REG_4		0x000600
 #define REG_6		0x000800
+#define REG_7		0x000804
 #define REG_8		0x000820
 #define REG_9		0x000a04
 #define REG_10		0x018000
@@ -135,6 +136,8 @@
 #define REG_21		0x200218
 #define REG_22		0x0005a0
 #define REG_23		0x0005c0
+#define REG_24		0x000808
+#define REG_25		0x000b00
 #define REG_26		0x200118
 #define REG_27		0x200308
 #define REG_32		0x21003c
@@ -429,6 +432,9 @@
 #define SET_LENXY_START_RECFILL(fb, lenxy) \
 	WRITE_WORD(lenxy, fb, REG_9)
 
+#define SETUP_COPYAREA(fb) \
+	WRITE_BYTE(0, fb, REG_16b1)
+
 static void
 HYPER_ENABLE_DISABLE_DISPLAY(struct stifb_info *fb, int enable)
 {
@@ -1004,6 +1010,36 @@
 	return 0;
 }
 
+static void
+stifb_copyarea(struct fb_info *info, const struct fb_copyarea *area)
+{
+	struct stifb_info *fb = container_of(info, struct stifb_info, info);
+
+	SETUP_COPYAREA(fb);
+
+	SETUP_HW(fb);
+	if (fb->info.var.bits_per_pixel == 32) {
+		WRITE_WORD(0xBBA0A000, fb, REG_10);
+
+		NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xffffffff);
+	} else {
+		WRITE_WORD(fb->id == S9000_ID_HCRX ? 0x13a02000 : 0x13a01000, fb, REG_10);
+
+		NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xff);
+	}
+
+	NGLE_QUICK_SET_IMAGE_BITMAP_OP(fb,
+		IBOvals(RopSrc, MaskAddrOffset(0),
+		BitmapExtent08, StaticReg(1),
+		DataDynamic, MaskOtc, BGx(0), FGx(0)));
+
+	WRITE_WORD(((area->sx << 16) | area->sy), fb, REG_24);
+	WRITE_WORD(((area->width << 16) | area->height), fb, REG_7);
+	WRITE_WORD(((area->dx << 16) | area->dy), fb, REG_25);
+
+	SETUP_FB(fb);
+}
+
 static void __init
 stifb_init_display(struct stifb_info *fb)
 {
@@ -1069,7 +1105,7 @@
 	.fb_setcolreg	= stifb_setcolreg,
 	.fb_blank	= stifb_blank,
 	.fb_fillrect	= cfb_fillrect,
-	.fb_copyarea	= cfb_copyarea,
+	.fb_copyarea	= stifb_copyarea,
 	.fb_imageblit	= cfb_imageblit,
 };
 
@@ -1258,7 +1294,7 @@
 	info->fbops = &stifb_ops;
 	info->screen_base = ioremap_nocache(REGION_BASE(fb,1), fix->smem_len);
 	info->screen_size = fix->smem_len;
-	info->flags = FBINFO_DEFAULT;
+	info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA;
 	info->pseudo_palette = &fb->pseudo_palette;
 
 	/* This has to be done !!! */
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 510040b..b1dc518 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -540,8 +540,7 @@
 	unlock_new_inode(inode);
 	return inode;
 error:
-	unlock_new_inode(inode);
-	iput(inode);
+	iget_failed(inode);
 	return ERR_PTR(retval);
 
 }
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 09e44337..e8aa57d 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -149,8 +149,7 @@
 	unlock_new_inode(inode);
 	return inode;
 error:
-	unlock_new_inode(inode);
-	iput(inode);
+	iget_failed(inode);
 	return ERR_PTR(retval);
 
 }
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 0ef5cc1..81220b2 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -44,6 +44,8 @@
 #define BTRFS_INODE_IN_DELALLOC_LIST		9
 #define BTRFS_INODE_READDIO_NEED_LOCK		10
 #define BTRFS_INODE_HAS_PROPS		        11
+/* DIO is ready to submit */
+#define BTRFS_INODE_DIO_READY		        12
 /*
  * The following 3 bits are meant only for the btree inode.
  * When any of them is set, it means an error happened while writing an
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 80a9aef..aac314e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1778,6 +1778,7 @@
 	spinlock_t unused_bgs_lock;
 	struct list_head unused_bgs;
 	struct mutex unused_bg_unpin_mutex;
+	struct mutex delete_unused_bgs_mutex;
 
 	/* For btrfs to record security options */
 	struct security_mnt_opts security_opts;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3f43bfea..a9aadb2 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1751,6 +1751,7 @@
 {
 	struct btrfs_root *root = arg;
 	int again;
+	struct btrfs_trans_handle *trans;
 
 	do {
 		again = 0;
@@ -1772,7 +1773,6 @@
 		}
 
 		btrfs_run_delayed_iputs(root);
-		btrfs_delete_unused_bgs(root->fs_info);
 		again = btrfs_clean_one_deleted_snapshot(root);
 		mutex_unlock(&root->fs_info->cleaner_mutex);
 
@@ -1781,6 +1781,16 @@
 		 * needn't do anything special here.
 		 */
 		btrfs_run_defrag_inodes(root->fs_info);
+
+		/*
+		 * Acquires fs_info->delete_unused_bgs_mutex to avoid racing
+		 * with relocation (btrfs_relocate_chunk) and relocation
+		 * acquires fs_info->cleaner_mutex (btrfs_relocate_block_group)
+		 * after acquiring fs_info->delete_unused_bgs_mutex. So we
+		 * can't hold, nor need to, fs_info->cleaner_mutex when deleting
+		 * unused block groups.
+		 */
+		btrfs_delete_unused_bgs(root->fs_info);
 sleep:
 		if (!try_to_freeze() && !again) {
 			set_current_state(TASK_INTERRUPTIBLE);
@@ -1789,6 +1799,34 @@
 			__set_current_state(TASK_RUNNING);
 		}
 	} while (!kthread_should_stop());
+
+	/*
+	 * Transaction kthread is stopped before us and wakes us up.
+	 * However we might have started a new transaction and COWed some
+	 * tree blocks when deleting unused block groups for example. So
+	 * make sure we commit the transaction we started to have a clean
+	 * shutdown when evicting the btree inode - if it has dirty pages
+	 * when we do the final iput() on it, eviction will trigger a
+	 * writeback for it which will fail with null pointer dereferences
+	 * since work queues and other resources were already released and
+	 * destroyed by the time the iput/eviction/writeback is made.
+	 */
+	trans = btrfs_attach_transaction(root);
+	if (IS_ERR(trans)) {
+		if (PTR_ERR(trans) != -ENOENT)
+			btrfs_err(root->fs_info,
+				  "cleaner transaction attach returned %ld",
+				  PTR_ERR(trans));
+	} else {
+		int ret;
+
+		ret = btrfs_commit_transaction(trans, root);
+		if (ret)
+			btrfs_err(root->fs_info,
+				  "cleaner open transaction commit returned %d",
+				  ret);
+	}
+
 	return 0;
 }
 
@@ -2492,6 +2530,7 @@
 	spin_lock_init(&fs_info->unused_bgs_lock);
 	rwlock_init(&fs_info->tree_mod_log_lock);
 	mutex_init(&fs_info->unused_bg_unpin_mutex);
+	mutex_init(&fs_info->delete_unused_bgs_mutex);
 	mutex_init(&fs_info->reloc_mutex);
 	mutex_init(&fs_info->delalloc_root_mutex);
 	seqlock_init(&fs_info->profiles_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 38b76cc..1c2bd17 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9889,6 +9889,8 @@
 		}
 		spin_unlock(&fs_info->unused_bgs_lock);
 
+		mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
+
 		/* Don't want to race with allocators so take the groups_sem */
 		down_write(&space_info->groups_sem);
 		spin_lock(&block_group->lock);
@@ -9983,6 +9985,7 @@
 end_trans:
 		btrfs_end_transaction(trans, root);
 next:
+		mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
 		btrfs_put_block_group(block_group);
 		spin_lock(&fs_info->unused_bgs_lock);
 	}
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index f6a596d..d4a582a 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -246,6 +246,7 @@
 {
 	struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
 	struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
+	spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock;
 	struct btrfs_free_space *info;
 	struct rb_node *n;
 	u64 count;
@@ -254,24 +255,30 @@
 		return;
 
 	while (1) {
+		bool add_to_ctl = true;
+
+		spin_lock(rbroot_lock);
 		n = rb_first(rbroot);
-		if (!n)
+		if (!n) {
+			spin_unlock(rbroot_lock);
 			break;
+		}
 
 		info = rb_entry(n, struct btrfs_free_space, offset_index);
 		BUG_ON(info->bitmap); /* Logic error */
 
 		if (info->offset > root->ino_cache_progress)
-			goto free;
+			add_to_ctl = false;
 		else if (info->offset + info->bytes > root->ino_cache_progress)
 			count = root->ino_cache_progress - info->offset + 1;
 		else
 			count = info->bytes;
 
-		__btrfs_add_free_space(ctl, info->offset, count);
-free:
 		rb_erase(&info->offset_index, rbroot);
-		kfree(info);
+		spin_unlock(rbroot_lock);
+		if (add_to_ctl)
+			__btrfs_add_free_space(ctl, info->offset, count);
+		kmem_cache_free(btrfs_free_space_cachep, info);
 	}
 }
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 855935f..b33c0cf 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4989,8 +4989,9 @@
 	/*
 	 * Keep looping until we have no more ranges in the io tree.
 	 * We can have ongoing bios started by readpages (called from readahead)
-	 * that didn't get their end io callbacks called yet or they are still
-	 * in progress ((extent_io.c:end_bio_extent_readpage()). This means some
+	 * that have their endio callback (extent_io.c:end_bio_extent_readpage)
+	 * still in progress (unlocked the pages in the bio but did not yet
+	 * unlocked the ranges in the io tree). Therefore this means some
 	 * ranges can still be locked and eviction started because before
 	 * submitting those bios, which are executed by a separate task (work
 	 * queue kthread), inode references (inode->i_count) were not taken
@@ -7546,6 +7547,7 @@
 
 		current->journal_info = outstanding_extents;
 		btrfs_free_reserved_data_space(inode, len);
+		set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags);
 	}
 
 	/*
@@ -7871,8 +7873,6 @@
 	struct bio *dio_bio;
 	int ret;
 
-	if (err)
-		goto out_done;
 again:
 	ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
 						   &ordered_offset,
@@ -7895,7 +7895,6 @@
 		ordered = NULL;
 		goto again;
 	}
-out_done:
 	dio_bio = dip->dio_bio;
 
 	kfree(dip);
@@ -8163,9 +8162,8 @@
 static void btrfs_submit_direct(int rw, struct bio *dio_bio,
 				struct inode *inode, loff_t file_offset)
 {
-	struct btrfs_root *root = BTRFS_I(inode)->root;
-	struct btrfs_dio_private *dip;
-	struct bio *io_bio;
+	struct btrfs_dio_private *dip = NULL;
+	struct bio *io_bio = NULL;
 	struct btrfs_io_bio *btrfs_bio;
 	int skip_sum;
 	int write = rw & REQ_WRITE;
@@ -8182,7 +8180,7 @@
 	dip = kzalloc(sizeof(*dip), GFP_NOFS);
 	if (!dip) {
 		ret = -ENOMEM;
-		goto free_io_bio;
+		goto free_ordered;
 	}
 
 	dip->private = dio_bio->bi_private;
@@ -8210,25 +8208,55 @@
 
 	if (btrfs_bio->end_io)
 		btrfs_bio->end_io(btrfs_bio, ret);
-free_io_bio:
-	bio_put(io_bio);
 
 free_ordered:
 	/*
-	 * If this is a write, we need to clean up the reserved space and kill
-	 * the ordered extent.
+	 * If we arrived here it means either we failed to submit the dip
+	 * or we either failed to clone the dio_bio or failed to allocate the
+	 * dip. If we cloned the dio_bio and allocated the dip, we can just
+	 * call bio_endio against our io_bio so that we get proper resource
+	 * cleanup if we fail to submit the dip, otherwise, we must do the
+	 * same as btrfs_endio_direct_[write|read] because we can't call these
+	 * callbacks - they require an allocated dip and a clone of dio_bio.
 	 */
-	if (write) {
-		struct btrfs_ordered_extent *ordered;
-		ordered = btrfs_lookup_ordered_extent(inode, file_offset);
-		if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
-		    !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
-			btrfs_free_reserved_extent(root, ordered->start,
-						   ordered->disk_len, 1);
-		btrfs_put_ordered_extent(ordered);
-		btrfs_put_ordered_extent(ordered);
+	if (io_bio && dip) {
+		bio_endio(io_bio, ret);
+		/*
+		 * The end io callbacks free our dip, do the final put on io_bio
+		 * and all the cleanup and final put for dio_bio (through
+		 * dio_end_io()).
+		 */
+		dip = NULL;
+		io_bio = NULL;
+	} else {
+		if (write) {
+			struct btrfs_ordered_extent *ordered;
+
+			ordered = btrfs_lookup_ordered_extent(inode,
+							      file_offset);
+			set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
+			/*
+			 * Decrements our ref on the ordered extent and removes
+			 * the ordered extent from the inode's ordered tree,
+			 * doing all the proper resource cleanup such as for the
+			 * reserved space and waking up any waiters for this
+			 * ordered extent (through btrfs_remove_ordered_extent).
+			 */
+			btrfs_finish_ordered_io(ordered);
+		} else {
+			unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
+			      file_offset + dio_bio->bi_iter.bi_size - 1);
+		}
+		clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
+		/*
+		 * Releases and cleans up our dio_bio, no need to bio_put()
+		 * nor bio_endio()/bio_io_error() against dio_bio.
+		 */
+		dio_end_io(dio_bio, ret);
 	}
-	bio_endio(dio_bio, ret);
+	if (io_bio)
+		bio_put(io_bio);
+	kfree(dip);
 }
 
 static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb,
@@ -8330,9 +8358,18 @@
 				   btrfs_submit_direct, flags);
 	if (iov_iter_rw(iter) == WRITE) {
 		current->journal_info = NULL;
-		if (ret < 0 && ret != -EIOCBQUEUED)
-			btrfs_delalloc_release_space(inode, count);
-		else if (ret >= 0 && (size_t)ret < count)
+		if (ret < 0 && ret != -EIOCBQUEUED) {
+			/*
+			 * If the error comes from submitting stage,
+			 * btrfs_get_blocsk_direct() has free'd data space,
+			 * and metadata space will be handled by
+			 * finish_ordered_fn, don't do that again to make
+			 * sure bytes_may_use is correct.
+			 */
+			if (!test_and_clear_bit(BTRFS_INODE_DIO_READY,
+				     &BTRFS_I(inode)->runtime_flags))
+				btrfs_delalloc_release_space(inode, count);
+		} else if (ret >= 0 && (size_t)ret < count)
 			btrfs_delalloc_release_space(inode,
 						     count - (size_t)ret);
 	}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index c86b835..5d91776 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -87,7 +87,8 @@
 
 
 static int btrfs_clone(struct inode *src, struct inode *inode,
-		       u64 off, u64 olen, u64 olen_aligned, u64 destoff);
+		       u64 off, u64 olen, u64 olen_aligned, u64 destoff,
+		       int no_time_update);
 
 /* Mask out flags that are inappropriate for the given type of inode. */
 static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@@ -2765,14 +2766,11 @@
 	return ret;
 }
 
-static struct page *extent_same_get_page(struct inode *inode, u64 off)
+static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
 {
 	struct page *page;
-	pgoff_t index;
 	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 
-	index = off >> PAGE_CACHE_SHIFT;
-
 	page = grab_cache_page(inode->i_mapping, index);
 	if (!page)
 		return NULL;
@@ -2793,6 +2791,20 @@
 	return page;
 }
 
+static int gather_extent_pages(struct inode *inode, struct page **pages,
+			       int num_pages, u64 off)
+{
+	int i;
+	pgoff_t index = off >> PAGE_CACHE_SHIFT;
+
+	for (i = 0; i < num_pages; i++) {
+		pages[i] = extent_same_get_page(inode, index + i);
+		if (!pages[i])
+			return -ENOMEM;
+	}
+	return 0;
+}
+
 static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
 {
 	/* do any pending delalloc/csum calc on src, one way or
@@ -2818,52 +2830,120 @@
 	}
 }
 
-static void btrfs_double_unlock(struct inode *inode1, u64 loff1,
-				struct inode *inode2, u64 loff2, u64 len)
+static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
 {
-	unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
-	unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
-
 	mutex_unlock(&inode1->i_mutex);
 	mutex_unlock(&inode2->i_mutex);
 }
 
-static void btrfs_double_lock(struct inode *inode1, u64 loff1,
-			      struct inode *inode2, u64 loff2, u64 len)
+static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
+{
+	if (inode1 < inode2)
+		swap(inode1, inode2);
+
+	mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
+	if (inode1 != inode2)
+		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+}
+
+static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
+				      struct inode *inode2, u64 loff2, u64 len)
+{
+	unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
+	unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
+}
+
+static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
+				     struct inode *inode2, u64 loff2, u64 len)
 {
 	if (inode1 < inode2) {
 		swap(inode1, inode2);
 		swap(loff1, loff2);
 	}
-
-	mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
 	lock_extent_range(inode1, loff1, len);
-	if (inode1 != inode2) {
-		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+	if (inode1 != inode2)
 		lock_extent_range(inode2, loff2, len);
+}
+
+struct cmp_pages {
+	int		num_pages;
+	struct page	**src_pages;
+	struct page	**dst_pages;
+};
+
+static void btrfs_cmp_data_free(struct cmp_pages *cmp)
+{
+	int i;
+	struct page *pg;
+
+	for (i = 0; i < cmp->num_pages; i++) {
+		pg = cmp->src_pages[i];
+		if (pg)
+			page_cache_release(pg);
+		pg = cmp->dst_pages[i];
+		if (pg)
+			page_cache_release(pg);
 	}
+	kfree(cmp->src_pages);
+	kfree(cmp->dst_pages);
+}
+
+static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
+				  struct inode *dst, u64 dst_loff,
+				  u64 len, struct cmp_pages *cmp)
+{
+	int ret;
+	int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
+	struct page **src_pgarr, **dst_pgarr;
+
+	/*
+	 * We must gather up all the pages before we initiate our
+	 * extent locking. We use an array for the page pointers. Size
+	 * of the array is bounded by len, which is in turn bounded by
+	 * BTRFS_MAX_DEDUPE_LEN.
+	 */
+	src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
+	dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
+	if (!src_pgarr || !dst_pgarr) {
+		kfree(src_pgarr);
+		kfree(dst_pgarr);
+		return -ENOMEM;
+	}
+	cmp->num_pages = num_pages;
+	cmp->src_pages = src_pgarr;
+	cmp->dst_pages = dst_pgarr;
+
+	ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff);
+	if (ret)
+		goto out;
+
+	ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, dst_loff);
+
+out:
+	if (ret)
+		btrfs_cmp_data_free(cmp);
+	return 0;
 }
 
 static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
-			  u64 dst_loff, u64 len)
+			  u64 dst_loff, u64 len, struct cmp_pages *cmp)
 {
 	int ret = 0;
+	int i;
 	struct page *src_page, *dst_page;
 	unsigned int cmp_len = PAGE_CACHE_SIZE;
 	void *addr, *dst_addr;
 
+	i = 0;
 	while (len) {
 		if (len < PAGE_CACHE_SIZE)
 			cmp_len = len;
 
-		src_page = extent_same_get_page(src, loff);
-		if (!src_page)
-			return -EINVAL;
-		dst_page = extent_same_get_page(dst, dst_loff);
-		if (!dst_page) {
-			page_cache_release(src_page);
-			return -EINVAL;
-		}
+		BUG_ON(i >= cmp->num_pages);
+
+		src_page = cmp->src_pages[i];
+		dst_page = cmp->dst_pages[i];
+
 		addr = kmap_atomic(src_page);
 		dst_addr = kmap_atomic(dst_page);
 
@@ -2875,15 +2955,12 @@
 
 		kunmap_atomic(addr);
 		kunmap_atomic(dst_addr);
-		page_cache_release(src_page);
-		page_cache_release(dst_page);
 
 		if (ret)
 			break;
 
-		loff += cmp_len;
-		dst_loff += cmp_len;
 		len -= cmp_len;
+		i++;
 	}
 
 	return ret;
@@ -2914,27 +2991,62 @@
 {
 	int ret;
 	u64 len = olen;
+	struct cmp_pages cmp;
+	int same_inode = 0;
+	u64 same_lock_start = 0;
+	u64 same_lock_len = 0;
 
-	/*
-	 * btrfs_clone() can't handle extents in the same file
-	 * yet. Once that works, we can drop this check and replace it
-	 * with a check for the same inode, but overlapping extents.
-	 */
 	if (src == dst)
-		return -EINVAL;
+		same_inode = 1;
 
 	if (len == 0)
 		return 0;
 
-	btrfs_double_lock(src, loff, dst, dst_loff, len);
+	if (same_inode) {
+		mutex_lock(&src->i_mutex);
 
-	ret = extent_same_check_offsets(src, loff, &len, olen);
-	if (ret)
-		goto out_unlock;
+		ret = extent_same_check_offsets(src, loff, &len, olen);
+		if (ret)
+			goto out_unlock;
 
-	ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
-	if (ret)
-		goto out_unlock;
+		/*
+		 * Single inode case wants the same checks, except we
+		 * don't want our length pushed out past i_size as
+		 * comparing that data range makes no sense.
+		 *
+		 * extent_same_check_offsets() will do this for an
+		 * unaligned length at i_size, so catch it here and
+		 * reject the request.
+		 *
+		 * This effectively means we require aligned extents
+		 * for the single-inode case, whereas the other cases
+		 * allow an unaligned length so long as it ends at
+		 * i_size.
+		 */
+		if (len != olen) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		/* Check for overlapping ranges */
+		if (dst_loff + len > loff && dst_loff < loff + len) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		same_lock_start = min_t(u64, loff, dst_loff);
+		same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start;
+	} else {
+		btrfs_double_inode_lock(src, dst);
+
+		ret = extent_same_check_offsets(src, loff, &len, olen);
+		if (ret)
+			goto out_unlock;
+
+		ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
+		if (ret)
+			goto out_unlock;
+	}
 
 	/* don't make the dst file partly checksummed */
 	if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
@@ -2943,12 +3055,32 @@
 		goto out_unlock;
 	}
 
-	ret = btrfs_cmp_data(src, loff, dst, dst_loff, len);
-	if (ret == 0)
-		ret = btrfs_clone(src, dst, loff, olen, len, dst_loff);
+	ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
+	if (ret)
+		goto out_unlock;
 
+	if (same_inode)
+		lock_extent_range(src, same_lock_start, same_lock_len);
+	else
+		btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
+
+	/* pass original length for comparison so we stay within i_size */
+	ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
+	if (ret == 0)
+		ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
+
+	if (same_inode)
+		unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start,
+			      same_lock_start + same_lock_len - 1);
+	else
+		btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
+
+	btrfs_cmp_data_free(&cmp);
 out_unlock:
-	btrfs_double_unlock(src, loff, dst, dst_loff, len);
+	if (same_inode)
+		mutex_unlock(&src->i_mutex);
+	else
+		btrfs_double_inode_unlock(src, dst);
 
 	return ret;
 }
@@ -3100,13 +3232,15 @@
 				     struct inode *inode,
 				     u64 endoff,
 				     const u64 destoff,
-				     const u64 olen)
+				     const u64 olen,
+				     int no_time_update)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	int ret;
 
 	inode_inc_iversion(inode);
-	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	if (!no_time_update)
+		inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	/*
 	 * We round up to the block size at eof when determining which
 	 * extents to clone above, but shouldn't round up the file size.
@@ -3191,13 +3325,13 @@
  * @inode: Inode to clone to
  * @off: Offset within source to start clone from
  * @olen: Original length, passed by user, of range to clone
- * @olen_aligned: Block-aligned value of olen, extent_same uses
- *               identical values here
+ * @olen_aligned: Block-aligned value of olen
  * @destoff: Offset within @inode to start clone
+ * @no_time_update: Whether to update mtime/ctime on the target inode
  */
 static int btrfs_clone(struct inode *src, struct inode *inode,
 		       const u64 off, const u64 olen, const u64 olen_aligned,
-		       const u64 destoff)
+		       const u64 destoff, int no_time_update)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_path *path = NULL;
@@ -3521,7 +3655,8 @@
 					      root->sectorsize);
 			ret = clone_finish_inode_update(trans, inode,
 							last_dest_end,
-							destoff, olen);
+							destoff, olen,
+							no_time_update);
 			if (ret)
 				goto out;
 			if (new_key.offset + datal >= destoff + len)
@@ -3559,7 +3694,7 @@
 		clone_update_extent_map(inode, trans, NULL, last_dest_end,
 					destoff + len - last_dest_end);
 		ret = clone_finish_inode_update(trans, inode, destoff + len,
-						destoff, olen);
+						destoff, olen, no_time_update);
 	}
 
 out:
@@ -3696,7 +3831,7 @@
 		lock_extent_range(inode, destoff, len);
 	}
 
-	ret = btrfs_clone(src, inode, off, olen, len, destoff);
+	ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
 
 	if (same_inode) {
 		u64 lock_start = min_t(u64, off, destoff);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 89656d7..52170cf 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -552,6 +552,10 @@
 	trace_btrfs_ordered_extent_put(entry->inode, entry);
 
 	if (atomic_dec_and_test(&entry->refs)) {
+		ASSERT(list_empty(&entry->log_list));
+		ASSERT(list_empty(&entry->trans_list));
+		ASSERT(list_empty(&entry->root_extent_list));
+		ASSERT(RB_EMPTY_NODE(&entry->rb_node));
 		if (entry->inode)
 			btrfs_add_delayed_iput(entry->inode);
 		while (!list_empty(&entry->list)) {
@@ -579,6 +583,7 @@
 	spin_lock_irq(&tree->lock);
 	node = &entry->rb_node;
 	rb_erase(node, &tree->tree);
+	RB_CLEAR_NODE(node);
 	if (tree->last == node)
 		tree->last = NULL;
 	set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index d5f1f03..e9ace09 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1349,6 +1349,11 @@
 	struct btrfs_root *quota_root;
 	struct btrfs_qgroup *qgroup;
 	int ret = 0;
+	/* Sometimes we would want to clear the limit on this qgroup.
+	 * To meet this requirement, we treat the -1 as a special value
+	 * which tell kernel to clear the limit on this qgroup.
+	 */
+	const u64 CLEAR_VALUE = -1;
 
 	mutex_lock(&fs_info->qgroup_ioctl_lock);
 	quota_root = fs_info->quota_root;
@@ -1364,14 +1369,42 @@
 	}
 
 	spin_lock(&fs_info->qgroup_lock);
-	if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER)
-		qgroup->max_rfer = limit->max_rfer;
-	if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL)
-		qgroup->max_excl = limit->max_excl;
-	if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER)
-		qgroup->rsv_rfer = limit->rsv_rfer;
-	if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL)
-		qgroup->rsv_excl = limit->rsv_excl;
+	if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) {
+		if (limit->max_rfer == CLEAR_VALUE) {
+			qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
+			limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
+			qgroup->max_rfer = 0;
+		} else {
+			qgroup->max_rfer = limit->max_rfer;
+		}
+	}
+	if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) {
+		if (limit->max_excl == CLEAR_VALUE) {
+			qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
+			limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
+			qgroup->max_excl = 0;
+		} else {
+			qgroup->max_excl = limit->max_excl;
+		}
+	}
+	if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) {
+		if (limit->rsv_rfer == CLEAR_VALUE) {
+			qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
+			limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
+			qgroup->rsv_rfer = 0;
+		} else {
+			qgroup->rsv_rfer = limit->rsv_rfer;
+		}
+	}
+	if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) {
+		if (limit->rsv_excl == CLEAR_VALUE) {
+			qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
+			limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
+			qgroup->rsv_excl = 0;
+		} else {
+			qgroup->rsv_excl = limit->rsv_excl;
+		}
+	}
 	qgroup->lim_flags |= limit->flags;
 
 	spin_unlock(&fs_info->qgroup_lock);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 827951f..88cbb59 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4049,7 +4049,7 @@
 	if (trans && progress && err == -ENOSPC) {
 		ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
 					      rc->block_group->flags);
-		if (ret == 0) {
+		if (ret == 1) {
 			err = 0;
 			progress = 0;
 			goto restart;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 9f2feab..94db0fa 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3571,7 +3571,6 @@
 static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
 						int is_dev_replace)
 {
-	int ret = 0;
 	unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
 	int max_active = fs_info->thread_pool_size;
 
@@ -3584,34 +3583,36 @@
 			fs_info->scrub_workers =
 				btrfs_alloc_workqueue("btrfs-scrub", flags,
 						      max_active, 4);
-		if (!fs_info->scrub_workers) {
-			ret = -ENOMEM;
-			goto out;
-		}
+		if (!fs_info->scrub_workers)
+			goto fail_scrub_workers;
+
 		fs_info->scrub_wr_completion_workers =
 			btrfs_alloc_workqueue("btrfs-scrubwrc", flags,
 					      max_active, 2);
-		if (!fs_info->scrub_wr_completion_workers) {
-			ret = -ENOMEM;
-			goto out;
-		}
+		if (!fs_info->scrub_wr_completion_workers)
+			goto fail_scrub_wr_completion_workers;
+
 		fs_info->scrub_nocow_workers =
 			btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0);
-		if (!fs_info->scrub_nocow_workers) {
-			ret = -ENOMEM;
-			goto out;
-		}
+		if (!fs_info->scrub_nocow_workers)
+			goto fail_scrub_nocow_workers;
 		fs_info->scrub_parity_workers =
 			btrfs_alloc_workqueue("btrfs-scrubparity", flags,
 					      max_active, 2);
-		if (!fs_info->scrub_parity_workers) {
-			ret = -ENOMEM;
-			goto out;
-		}
+		if (!fs_info->scrub_parity_workers)
+			goto fail_scrub_parity_workers;
 	}
 	++fs_info->scrub_workers_refcnt;
-out:
-	return ret;
+	return 0;
+
+fail_scrub_parity_workers:
+	btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
+fail_scrub_nocow_workers:
+	btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
+fail_scrub_wr_completion_workers:
+	btrfs_destroy_workqueue(fs_info->scrub_workers);
+fail_scrub_workers:
+	return -ENOMEM;
 }
 
 static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1ce80c1..9c45431 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4117,6 +4117,187 @@
 	return 0;
 }
 
+/*
+ * At the moment we always log all xattrs. This is to figure out at log replay
+ * time which xattrs must have their deletion replayed. If a xattr is missing
+ * in the log tree and exists in the fs/subvol tree, we delete it. This is
+ * because if a xattr is deleted, the inode is fsynced and a power failure
+ * happens, causing the log to be replayed the next time the fs is mounted,
+ * we want the xattr to not exist anymore (same behaviour as other filesystems
+ * with a journal, ext3/4, xfs, f2fs, etc).
+ */
+static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root,
+				struct inode *inode,
+				struct btrfs_path *path,
+				struct btrfs_path *dst_path)
+{
+	int ret;
+	struct btrfs_key key;
+	const u64 ino = btrfs_ino(inode);
+	int ins_nr = 0;
+	int start_slot = 0;
+
+	key.objectid = ino;
+	key.type = BTRFS_XATTR_ITEM_KEY;
+	key.offset = 0;
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0)
+		return ret;
+
+	while (true) {
+		int slot = path->slots[0];
+		struct extent_buffer *leaf = path->nodes[0];
+		int nritems = btrfs_header_nritems(leaf);
+
+		if (slot >= nritems) {
+			if (ins_nr > 0) {
+				u64 last_extent = 0;
+
+				ret = copy_items(trans, inode, dst_path, path,
+						 &last_extent, start_slot,
+						 ins_nr, 1, 0);
+				/* can't be 1, extent items aren't processed */
+				ASSERT(ret <= 0);
+				if (ret < 0)
+					return ret;
+				ins_nr = 0;
+			}
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0)
+				return ret;
+			else if (ret > 0)
+				break;
+			continue;
+		}
+
+		btrfs_item_key_to_cpu(leaf, &key, slot);
+		if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY)
+			break;
+
+		if (ins_nr == 0)
+			start_slot = slot;
+		ins_nr++;
+		path->slots[0]++;
+		cond_resched();
+	}
+	if (ins_nr > 0) {
+		u64 last_extent = 0;
+
+		ret = copy_items(trans, inode, dst_path, path,
+				 &last_extent, start_slot,
+				 ins_nr, 1, 0);
+		/* can't be 1, extent items aren't processed */
+		ASSERT(ret <= 0);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * If the no holes feature is enabled we need to make sure any hole between the
+ * last extent and the i_size of our inode is explicitly marked in the log. This
+ * is to make sure that doing something like:
+ *
+ *      1) create file with 128Kb of data
+ *      2) truncate file to 64Kb
+ *      3) truncate file to 256Kb
+ *      4) fsync file
+ *      5) <crash/power failure>
+ *      6) mount fs and trigger log replay
+ *
+ * Will give us a file with a size of 256Kb, the first 64Kb of data match what
+ * the file had in its first 64Kb of data at step 1 and the last 192Kb of the
+ * file correspond to a hole. The presence of explicit holes in a log tree is
+ * what guarantees that log replay will remove/adjust file extent items in the
+ * fs/subvol tree.
+ *
+ * Here we do not need to care about holes between extents, that is already done
+ * by copy_items(). We also only need to do this in the full sync path, where we
+ * lookup for extents from the fs/subvol tree only. In the fast path case, we
+ * lookup the list of modified extent maps and if any represents a hole, we
+ * insert a corresponding extent representing a hole in the log tree.
+ */
+static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
+				   struct btrfs_root *root,
+				   struct inode *inode,
+				   struct btrfs_path *path)
+{
+	int ret;
+	struct btrfs_key key;
+	u64 hole_start;
+	u64 hole_size;
+	struct extent_buffer *leaf;
+	struct btrfs_root *log = root->log_root;
+	const u64 ino = btrfs_ino(inode);
+	const u64 i_size = i_size_read(inode);
+
+	if (!btrfs_fs_incompat(root->fs_info, NO_HOLES))
+		return 0;
+
+	key.objectid = ino;
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	key.offset = (u64)-1;
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	ASSERT(ret != 0);
+	if (ret < 0)
+		return ret;
+
+	ASSERT(path->slots[0] > 0);
+	path->slots[0]--;
+	leaf = path->nodes[0];
+	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+	if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) {
+		/* inode does not have any extents */
+		hole_start = 0;
+		hole_size = i_size;
+	} else {
+		struct btrfs_file_extent_item *extent;
+		u64 len;
+
+		/*
+		 * If there's an extent beyond i_size, an explicit hole was
+		 * already inserted by copy_items().
+		 */
+		if (key.offset >= i_size)
+			return 0;
+
+		extent = btrfs_item_ptr(leaf, path->slots[0],
+					struct btrfs_file_extent_item);
+
+		if (btrfs_file_extent_type(leaf, extent) ==
+		    BTRFS_FILE_EXTENT_INLINE) {
+			len = btrfs_file_extent_inline_len(leaf,
+							   path->slots[0],
+							   extent);
+			ASSERT(len == i_size);
+			return 0;
+		}
+
+		len = btrfs_file_extent_num_bytes(leaf, extent);
+		/* Last extent goes beyond i_size, no need to log a hole. */
+		if (key.offset + len > i_size)
+			return 0;
+		hole_start = key.offset + len;
+		hole_size = i_size - hole_start;
+	}
+	btrfs_release_path(path);
+
+	/* Last extent ends at i_size. */
+	if (hole_size == 0)
+		return 0;
+
+	hole_size = ALIGN(hole_size, root->sectorsize);
+	ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0,
+				       hole_size, 0, hole_size, 0, 0, 0);
+	return ret;
+}
+
 /* log a single inode in the tree log.
  * At least one parent directory for this inode must exist in the tree
  * or be logged already.
@@ -4155,6 +4336,7 @@
 	u64 ino = btrfs_ino(inode);
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 	u64 logged_isize = 0;
+	bool need_log_inode_item = true;
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -4263,11 +4445,6 @@
 		} else {
 			if (inode_only == LOG_INODE_ALL)
 				fast_search = true;
-			ret = log_inode_item(trans, log, dst_path, inode);
-			if (ret) {
-				err = ret;
-				goto out_unlock;
-			}
 			goto log_extents;
 		}
 
@@ -4290,6 +4467,28 @@
 		if (min_key.type > max_key.type)
 			break;
 
+		if (min_key.type == BTRFS_INODE_ITEM_KEY)
+			need_log_inode_item = false;
+
+		/* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
+		if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
+			if (ins_nr == 0)
+				goto next_slot;
+			ret = copy_items(trans, inode, dst_path, path,
+					 &last_extent, ins_start_slot,
+					 ins_nr, inode_only, logged_isize);
+			if (ret < 0) {
+				err = ret;
+				goto out_unlock;
+			}
+			ins_nr = 0;
+			if (ret) {
+				btrfs_release_path(path);
+				continue;
+			}
+			goto next_slot;
+		}
+
 		src = path->nodes[0];
 		if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
 			ins_nr++;
@@ -4357,9 +4556,26 @@
 		ins_nr = 0;
 	}
 
+	btrfs_release_path(path);
+	btrfs_release_path(dst_path);
+	err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
+	if (err)
+		goto out_unlock;
+	if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
+		btrfs_release_path(path);
+		btrfs_release_path(dst_path);
+		err = btrfs_log_trailing_hole(trans, root, inode, path);
+		if (err)
+			goto out_unlock;
+	}
 log_extents:
 	btrfs_release_path(path);
 	btrfs_release_path(dst_path);
+	if (need_log_inode_item) {
+		err = log_inode_item(trans, log, dst_path, inode);
+		if (err)
+			goto out_unlock;
+	}
 	if (fast_search) {
 		/*
 		 * Some ordered extents started by fsync might have completed
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 4b438b4..fbe7c10 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2766,6 +2766,20 @@
 	root = root->fs_info->chunk_root;
 	extent_root = root->fs_info->extent_root;
 
+	/*
+	 * Prevent races with automatic removal of unused block groups.
+	 * After we relocate and before we remove the chunk with offset
+	 * chunk_offset, automatic removal of the block group can kick in,
+	 * resulting in a failure when calling btrfs_remove_chunk() below.
+	 *
+	 * Make sure to acquire this mutex before doing a tree search (dev
+	 * or chunk trees) to find chunks. Otherwise the cleaner kthread might
+	 * call btrfs_remove_chunk() (through btrfs_delete_unused_bgs()) after
+	 * we release the path used to search the chunk/dev tree and before
+	 * the current task acquires this mutex and calls us.
+	 */
+	ASSERT(mutex_is_locked(&root->fs_info->delete_unused_bgs_mutex));
+
 	ret = btrfs_can_relocate(extent_root, chunk_offset);
 	if (ret)
 		return -ENOSPC;
@@ -2814,13 +2828,18 @@
 	key.type = BTRFS_CHUNK_ITEM_KEY;
 
 	while (1) {
+		mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
 		ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
-		if (ret < 0)
+		if (ret < 0) {
+			mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
 			goto error;
+		}
 		BUG_ON(ret == 0); /* Corruption */
 
 		ret = btrfs_previous_item(chunk_root, path, key.objectid,
 					  key.type);
+		if (ret)
+			mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
 		if (ret < 0)
 			goto error;
 		if (ret > 0)
@@ -2843,6 +2862,7 @@
 			else
 				BUG_ON(ret);
 		}
+		mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
 
 		if (found_key.offset == 0)
 			break;
@@ -3299,9 +3319,12 @@
 			goto error;
 		}
 
+		mutex_lock(&fs_info->delete_unused_bgs_mutex);
 		ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
-		if (ret < 0)
+		if (ret < 0) {
+			mutex_unlock(&fs_info->delete_unused_bgs_mutex);
 			goto error;
+		}
 
 		/*
 		 * this shouldn't happen, it means the last relocate
@@ -3313,6 +3336,7 @@
 		ret = btrfs_previous_item(chunk_root, path, 0,
 					  BTRFS_CHUNK_ITEM_KEY);
 		if (ret) {
+			mutex_unlock(&fs_info->delete_unused_bgs_mutex);
 			ret = 0;
 			break;
 		}
@@ -3321,8 +3345,10 @@
 		slot = path->slots[0];
 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
 
-		if (found_key.objectid != key.objectid)
+		if (found_key.objectid != key.objectid) {
+			mutex_unlock(&fs_info->delete_unused_bgs_mutex);
 			break;
+		}
 
 		chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
 
@@ -3335,10 +3361,13 @@
 		ret = should_balance_chunk(chunk_root, leaf, chunk,
 					   found_key.offset);
 		btrfs_release_path(path);
-		if (!ret)
+		if (!ret) {
+			mutex_unlock(&fs_info->delete_unused_bgs_mutex);
 			goto loop;
+		}
 
 		if (counting) {
+			mutex_unlock(&fs_info->delete_unused_bgs_mutex);
 			spin_lock(&fs_info->balance_lock);
 			bctl->stat.expected++;
 			spin_unlock(&fs_info->balance_lock);
@@ -3348,6 +3377,7 @@
 		ret = btrfs_relocate_chunk(chunk_root,
 					   found_key.objectid,
 					   found_key.offset);
+		mutex_unlock(&fs_info->delete_unused_bgs_mutex);
 		if (ret && ret != -ENOSPC)
 			goto error;
 		if (ret == -ENOSPC) {
@@ -4087,11 +4117,16 @@
 	key.type = BTRFS_DEV_EXTENT_KEY;
 
 	do {
+		mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-		if (ret < 0)
+		if (ret < 0) {
+			mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
 			goto done;
+		}
 
 		ret = btrfs_previous_item(root, path, 0, key.type);
+		if (ret)
+			mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
 		if (ret < 0)
 			goto done;
 		if (ret) {
@@ -4105,6 +4140,7 @@
 		btrfs_item_key_to_cpu(l, &key, path->slots[0]);
 
 		if (key.objectid != device->devid) {
+			mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
 			btrfs_release_path(path);
 			break;
 		}
@@ -4113,6 +4149,7 @@
 		length = btrfs_dev_extent_length(l, dev_extent);
 
 		if (key.offset + length <= new_size) {
+			mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
 			btrfs_release_path(path);
 			break;
 		}
@@ -4122,6 +4159,7 @@
 		btrfs_release_path(path);
 
 		ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset);
+		mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
 		if (ret && ret != -ENOSPC)
 			goto done;
 		if (ret == -ENOSPC)
@@ -5715,7 +5753,6 @@
 static void btrfs_end_bio(struct bio *bio, int err)
 {
 	struct btrfs_bio *bbio = bio->bi_private;
-	struct btrfs_device *dev = bbio->stripes[0].dev;
 	int is_orig_bio = 0;
 
 	if (err) {
@@ -5723,6 +5760,7 @@
 		if (err == -EIO || err == -EREMOTEIO) {
 			unsigned int stripe_index =
 				btrfs_io_bio(bio)->stripe_index;
+			struct btrfs_device *dev;
 
 			BUG_ON(stripe_index >= bbio->num_stripes);
 			dev = bbio->stripes[stripe_index].dev;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 6b8e2f0..48851f6 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -896,6 +896,7 @@
 /* 'X' - originally XFS but some now in the VFS */
 COMPATIBLE_IOCTL(FIFREEZE)
 COMPATIBLE_IOCTL(FITHAW)
+COMPATIBLE_IOCTL(FITRIM)
 COMPATIBLE_IOCTL(KDGETKEYCODE)
 COMPATIBLE_IOCTL(KDSETKEYCODE)
 COMPATIBLE_IOCTL(KDGKBTYPE)
diff --git a/fs/dcache.c b/fs/dcache.c
index 7a3f3e5..5c8ea15 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -642,7 +642,7 @@
 
 	/*
 	 * If we have a d_op->d_delete() operation, we sould not
-	 * let the dentry count go to zero, so use "put__or_lock".
+	 * let the dentry count go to zero, so use "put_or_lock".
 	 */
 	if (unlikely(dentry->d_flags & DCACHE_OP_DELETE))
 		return lockref_put_or_lock(&dentry->d_lockref);
@@ -697,7 +697,7 @@
 	 */
 	smp_rmb();
 	d_flags = ACCESS_ONCE(dentry->d_flags);
-	d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST;
+	d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED;
 
 	/* Nothing to do? Dropping the reference was all we needed? */
 	if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry))
@@ -776,6 +776,9 @@
 	if (unlikely(d_unhashed(dentry)))
 		goto kill_it;
 
+	if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
+		goto kill_it;
+
 	if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
 		if (dentry->d_op->d_delete(dentry))
 			goto kill_it;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 72afcc6..feef8a9 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -325,7 +325,6 @@
 		return rc;
 
 	switch (cmd) {
-	case FITRIM:
 	case FS_IOC32_GETFLAGS:
 	case FS_IOC32_SETFLAGS:
 	case FS_IOC32_GETVERSION:
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index aadb728..2553aa8 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -504,7 +504,7 @@
 	struct buffer_head		*bh;
 	int				err;
 
-	bh = sb_getblk(inode->i_sb, pblk);
+	bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS);
 	if (unlikely(!bh))
 		return ERR_PTR(-ENOMEM);
 
@@ -1089,7 +1089,7 @@
 		err = -EIO;
 		goto cleanup;
 	}
-	bh = sb_getblk(inode->i_sb, newblock);
+	bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
 	if (unlikely(!bh)) {
 		err = -ENOMEM;
 		goto cleanup;
@@ -1283,7 +1283,7 @@
 	if (newblock == 0)
 		return err;
 
-	bh = sb_getblk(inode->i_sb, newblock);
+	bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
 	if (unlikely(!bh))
 		return -ENOMEM;
 	lock_buffer(bh);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 41f8e55..cecf9aa 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1323,7 +1323,7 @@
 					     unsigned int offset,
 					     unsigned int length)
 {
-	int to_release = 0;
+	int to_release = 0, contiguous_blks = 0;
 	struct buffer_head *head, *bh;
 	unsigned int curr_off = 0;
 	struct inode *inode = page->mapping->host;
@@ -1344,14 +1344,23 @@
 
 		if ((offset <= curr_off) && (buffer_delay(bh))) {
 			to_release++;
+			contiguous_blks++;
 			clear_buffer_delay(bh);
+		} else if (contiguous_blks) {
+			lblk = page->index <<
+			       (PAGE_CACHE_SHIFT - inode->i_blkbits);
+			lblk += (curr_off >> inode->i_blkbits) -
+				contiguous_blks;
+			ext4_es_remove_extent(inode, lblk, contiguous_blks);
+			contiguous_blks = 0;
 		}
 		curr_off = next_off;
 	} while ((bh = bh->b_this_page) != head);
 
-	if (to_release) {
+	if (contiguous_blks) {
 		lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
-		ext4_es_remove_extent(inode, lblk, to_release);
+		lblk += (curr_off >> inode->i_blkbits) - contiguous_blks;
+		ext4_es_remove_extent(inode, lblk, contiguous_blks);
 	}
 
 	/* If we have released all the blocks belonging to a cluster, then we
@@ -4344,7 +4353,12 @@
 	int inode_size = EXT4_INODE_SIZE(sb);
 
 	oi.orig_ino = orig_ino;
-	ino = (orig_ino & ~(inodes_per_block - 1)) + 1;
+	/*
+	 * Calculate the first inode in the inode table block.  Inode
+	 * numbers are one-based.  That is, the first inode in a block
+	 * (assuming 4k blocks and 256 byte inodes) is (n*16 + 1).
+	 */
+	ino = ((orig_ino - 1) & ~(inodes_per_block - 1)) + 1;
 	for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
 		if (ino == orig_ino)
 			continue;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index cb84512..1346cfa 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -755,7 +755,6 @@
 		return err;
 	}
 	case EXT4_IOC_MOVE_EXT:
-	case FITRIM:
 	case EXT4_IOC_RESIZE_FS:
 	case EXT4_IOC_PRECACHE_EXTENTS:
 	case EXT4_IOC_SET_ENCRYPTION_POLICY:
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f6aedf8..34b610e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4816,18 +4816,12 @@
 		/*
 		 * blocks being freed are metadata. these blocks shouldn't
 		 * be used until this transaction is committed
+		 *
+		 * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed
+		 * to fail.
 		 */
-	retry:
-		new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
-		if (!new_entry) {
-			/*
-			 * We use a retry loop because
-			 * ext4_free_blocks() is not allowed to fail.
-			 */
-			cond_resched();
-			congestion_wait(BLK_RW_ASYNC, HZ/50);
-			goto retry;
-		}
+		new_entry = kmem_cache_alloc(ext4_free_data_cachep,
+				GFP_NOFS|__GFP_NOFAIL);
 		new_entry->efd_start_cluster = bit;
 		new_entry->efd_group = block_group;
 		new_entry->efd_count = count_clusters;
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b52374e..6163ad2 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -620,6 +620,7 @@
 	struct ext4_inode_info		*ei = EXT4_I(inode);
 	struct ext4_extent		*ex;
 	unsigned int			i, len;
+	ext4_lblk_t			start, end;
 	ext4_fsblk_t			blk;
 	handle_t			*handle;
 	int				ret;
@@ -633,6 +634,14 @@
 				       EXT4_FEATURE_RO_COMPAT_BIGALLOC))
 		return -EOPNOTSUPP;
 
+	/*
+	 * In order to get correct extent info, force all delayed allocation
+	 * blocks to be allocated, otherwise delayed allocation blocks may not
+	 * be reflected and bypass the checks on extent header.
+	 */
+	if (test_opt(inode->i_sb, DELALLOC))
+		ext4_alloc_da_blocks(inode);
+
 	handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -650,11 +659,13 @@
 		goto errout;
 	}
 	if (eh->eh_entries == 0)
-		blk = len = 0;
+		blk = len = start = end = 0;
 	else {
 		len = le16_to_cpu(ex->ee_len);
 		blk = ext4_ext_pblock(ex);
-		if (len > EXT4_NDIR_BLOCKS) {
+		start = le32_to_cpu(ex->ee_block);
+		end = start + len - 1;
+		if (end >= EXT4_NDIR_BLOCKS) {
 			ret = -EOPNOTSUPP;
 			goto errout;
 		}
@@ -662,7 +673,7 @@
 
 	ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
 	memset(ei->i_data, 0, sizeof(ei->i_data));
-	for (i=0; i < len; i++)
+	for (i = start; i <= end; i++)
 		ei->i_data[i] = cpu_to_le32(blk++);
 	ext4_mark_inode_dirty(handle, inode);
 errout:
diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c
index f005046..d6a4b55 100644
--- a/fs/hpfs/alloc.c
+++ b/fs/hpfs/alloc.c
@@ -484,3 +484,98 @@
 	a->btree.first_free = cpu_to_le16(8);
 	return a;
 }
+
+static unsigned find_run(__le32 *bmp, unsigned *idx)
+{
+	unsigned len;
+	while (tstbits(bmp, *idx, 1)) {
+		(*idx)++;
+		if (unlikely(*idx >= 0x4000))
+			return 0;
+	}
+	len = 1;
+	while (!tstbits(bmp, *idx + len, 1))
+		len++;
+	return len;
+}
+
+static int do_trim(struct super_block *s, secno start, unsigned len, secno limit_start, secno limit_end, unsigned minlen, unsigned *result)
+{
+	int err;
+	secno end;
+	if (fatal_signal_pending(current))
+		return -EINTR;
+	end = start + len;
+	if (start < limit_start)
+		start = limit_start;
+	if (end > limit_end)
+		end = limit_end;
+	if (start >= end)
+		return 0;
+	if (end - start < minlen)
+		return 0;
+	err = sb_issue_discard(s, start, end - start, GFP_NOFS, 0);
+	if (err)
+		return err;
+	*result += end - start;
+	return 0;
+}
+
+int hpfs_trim_fs(struct super_block *s, u64 start, u64 end, u64 minlen, unsigned *result)
+{
+	int err = 0;
+	struct hpfs_sb_info *sbi = hpfs_sb(s);
+	unsigned idx, len, start_bmp, end_bmp;
+	__le32 *bmp;
+	struct quad_buffer_head qbh;
+
+	*result = 0;
+	if (!end || end > sbi->sb_fs_size)
+		end = sbi->sb_fs_size;
+	if (start >= sbi->sb_fs_size)
+		return 0;
+	if (minlen > 0x4000)
+		return 0;
+	if (start < sbi->sb_dirband_start + sbi->sb_dirband_size && end > sbi->sb_dirband_start) {
+		hpfs_lock(s);
+		if (s->s_flags & MS_RDONLY) {
+			err = -EROFS;
+			goto unlock_1;
+		}
+		if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) {
+			err = -EIO;
+			goto unlock_1;
+		}
+		idx = 0;
+		while ((len = find_run(bmp, &idx)) && !err) {
+			err = do_trim(s, sbi->sb_dirband_start + idx * 4, len * 4, start, end, minlen, result);
+			idx += len;
+		}
+		hpfs_brelse4(&qbh);
+unlock_1:
+		hpfs_unlock(s);
+	}
+	start_bmp = start >> 14;
+	end_bmp = (end + 0x3fff) >> 14;
+	while (start_bmp < end_bmp && !err) {
+		hpfs_lock(s);
+		if (s->s_flags & MS_RDONLY) {
+			err = -EROFS;
+			goto unlock_2;
+		}
+		if (!(bmp = hpfs_map_bitmap(s, start_bmp, &qbh, "trim"))) {
+			err = -EIO;
+			goto unlock_2;
+		}
+		idx = 0;
+		while ((len = find_run(bmp, &idx)) && !err) {
+			err = do_trim(s, (start_bmp << 14) + idx, len, start, end, minlen, result);
+			idx += len;
+		}
+		hpfs_brelse4(&qbh);
+unlock_2:
+		hpfs_unlock(s);
+		start_bmp++;
+	}
+	return err;
+}
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 2a8e074..dc540bf 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -327,4 +327,5 @@
 	.iterate	= hpfs_readdir,
 	.release	= hpfs_dir_release,
 	.fsync		= hpfs_file_fsync,
+	.unlocked_ioctl	= hpfs_ioctl,
 };
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 6d8cfe9..7ca28d6 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -203,6 +203,7 @@
 	.release	= hpfs_file_release,
 	.fsync		= hpfs_file_fsync,
 	.splice_read	= generic_file_splice_read,
+	.unlocked_ioctl	= hpfs_ioctl,
 };
 
 const struct inode_operations hpfs_file_iops =
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index bb04b58..c4867b5 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -18,6 +18,8 @@
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/blkdev.h>
 #include <asm/unaligned.h>
 
 #include "hpfs.h"
@@ -200,6 +202,7 @@
 struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *);
 struct fnode *hpfs_alloc_fnode(struct super_block *, secno, fnode_secno *, struct buffer_head **);
 struct anode *hpfs_alloc_anode(struct super_block *, secno, anode_secno *, struct buffer_head **);
+int hpfs_trim_fs(struct super_block *, u64, u64, u64, unsigned *);
 
 /* anode.c */
 
@@ -318,6 +321,7 @@
 void hpfs_error(struct super_block *, const char *, ...);
 int hpfs_stop_cycles(struct super_block *, int, int *, int *, char *);
 unsigned hpfs_get_free_dnodes(struct super_block *);
+long hpfs_ioctl(struct file *file, unsigned cmd, unsigned long arg);
 
 /*
  * local time (HPFS) to GMT (Unix)
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 7cd00d3..68a9bed 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -52,17 +52,20 @@
 }
 
 /* Filesystem error... */
-static char err_buf[1024];
-
 void hpfs_error(struct super_block *s, const char *fmt, ...)
 {
+	struct va_format vaf;
 	va_list args;
 
 	va_start(args, fmt);
-	vsnprintf(err_buf, sizeof(err_buf), fmt, args);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	pr_err("filesystem error: %pV", &vaf);
+
 	va_end(args);
 
-	pr_err("filesystem error: %s", err_buf);
 	if (!hpfs_sb(s)->sb_was_error) {
 		if (hpfs_sb(s)->sb_err == 2) {
 			pr_cont("; crashing the system because you wanted it\n");
@@ -196,12 +199,39 @@
 	return 0;
 }
 
+
+long hpfs_ioctl(struct file *file, unsigned cmd, unsigned long arg)
+{
+	switch (cmd) {
+		case FITRIM: {
+			struct fstrim_range range;
+			secno n_trimmed;
+			int r;
+			if (!capable(CAP_SYS_ADMIN))
+				return -EPERM;
+			if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range)))
+				return -EFAULT;
+			r = hpfs_trim_fs(file_inode(file)->i_sb, range.start >> 9, (range.start + range.len) >> 9, (range.minlen + 511) >> 9, &n_trimmed);
+			if (r)
+				return r;
+			range.len = (u64)n_trimmed << 9;
+			if (copy_to_user((struct fstrim_range __user *)arg, &range, sizeof(range)))
+				return -EFAULT;
+			return 0;
+		}
+		default: {
+			return -ENOIOCTLCMD;
+		}
+	}
+}
+
+
 static struct kmem_cache * hpfs_inode_cachep;
 
 static struct inode *hpfs_alloc_inode(struct super_block *sb)
 {
 	struct hpfs_inode_info *ei;
-	ei = (struct hpfs_inode_info *)kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS);
+	ei = kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS);
 	if (!ei)
 		return NULL;
 	ei->vfs_inode.i_version = 1;
@@ -424,11 +454,14 @@
 	int o;
 	struct hpfs_sb_info *sbi = hpfs_sb(s);
 	char *new_opts = kstrdup(data, GFP_KERNEL);
-	
+
+	if (!new_opts)
+		return -ENOMEM;
+
 	sync_filesystem(s);
 
 	*flags |= MS_NOATIME;
-	
+
 	hpfs_lock(s);
 	uid = sbi->sb_uid; gid = sbi->sb_gid;
 	umask = 0777 & ~sbi->sb_mode;
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index 93a1232..8db8b7d 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -180,9 +180,6 @@
 	case JFS_IOC_SETFLAGS32:
 		cmd = JFS_IOC_SETFLAGS;
 		break;
-	case FITRIM:
-		cmd = FITRIM;
-		break;
 	}
 	return jfs_ioctl(filp, cmd, arg);
 }
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 9a20e51..aba4381 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -1369,7 +1369,6 @@
 	case NILFS_IOCTL_SYNC:
 	case NILFS_IOCTL_RESIZE:
 	case NILFS_IOCTL_SET_ALLOC_RANGE:
-	case FITRIM:
 		break;
 	default:
 		return -ENOIOCTLCMD;
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 53e6c40..3cb097c 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -980,7 +980,6 @@
 	case OCFS2_IOC_GROUP_EXTEND:
 	case OCFS2_IOC_GROUP_ADD:
 	case OCFS2_IOC_GROUP_ADD64:
-	case FITRIM:
 		break;
 	case OCFS2_IOC_REFLINK:
 		if (copy_from_user(&args, argp, sizeof(args)))
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index f140e3db..d9da5a4 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -343,6 +343,9 @@
 	struct path realpath;
 	enum ovl_path_type type;
 
+	if (d_is_dir(dentry))
+		return d_backing_inode(dentry);
+
 	type = ovl_path_real(dentry, &realpath);
 	if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) {
 		err = ovl_want_write(dentry);
diff --git a/include/dt-bindings/reset-controller/stih407-resets.h b/include/dt-bindings/reset/stih407-resets.h
similarity index 100%
rename from include/dt-bindings/reset-controller/stih407-resets.h
rename to include/dt-bindings/reset/stih407-resets.h
diff --git a/include/dt-bindings/reset-controller/stih415-resets.h b/include/dt-bindings/reset/stih415-resets.h
similarity index 100%
rename from include/dt-bindings/reset-controller/stih415-resets.h
rename to include/dt-bindings/reset/stih415-resets.h
diff --git a/include/dt-bindings/reset-controller/stih416-resets.h b/include/dt-bindings/reset/stih416-resets.h
similarity index 100%
rename from include/dt-bindings/reset-controller/stih416-resets.h
rename to include/dt-bindings/reset/stih416-resets.h
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c471dfc..d2445fa 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -58,6 +58,19 @@
 	acpi_fwnode_handle(adev) : NULL)
 #define ACPI_HANDLE(dev)		acpi_device_handle(ACPI_COMPANION(dev))
 
+/**
+ * ACPI_DEVICE_CLASS - macro used to describe an ACPI device with
+ * the PCI-defined class-code information
+ *
+ * @_cls : the class, subclass, prog-if triple for this device
+ * @_msk : the class mask for this device
+ *
+ * This macro is used to create a struct acpi_device_id that matches a
+ * specific PCI class. The .id and .driver_data fields will be left
+ * initialized with the default value.
+ */
+#define ACPI_DEVICE_CLASS(_cls, _msk)	.cls = (_cls), .cls_msk = (_msk),
+
 static inline bool has_acpi_companion(struct device *dev)
 {
 	return is_acpi_node(dev->fwnode);
@@ -309,9 +322,6 @@
 
 int acpi_resources_are_enforced(void);
 
-int acpi_reserve_region(u64 start, unsigned int length, u8 space_id,
-			unsigned long flags, char *desc);
-
 #ifdef CONFIG_HIBERNATION
 void __init acpi_no_s4_hw_signature(void);
 #endif
@@ -446,6 +456,7 @@
 #define ACPI_COMPANION(dev)		(NULL)
 #define ACPI_COMPANION_SET(dev, adev)	do { } while (0)
 #define ACPI_HANDLE(dev)		(NULL)
+#define ACPI_DEVICE_CLASS(_cls, _msk)	.cls = (0), .cls_msk = (0),
 
 struct fwnode_handle;
 
@@ -507,13 +518,6 @@
 	return 0;
 }
 
-static inline int acpi_reserve_region(u64 start, unsigned int length,
-				      u8 space_id, unsigned long flags,
-				      char *desc)
-{
-	return -ENXIO;
-}
-
 struct acpi_table_header;
 static inline int acpi_table_parse(char *id,
 				int (*handler)(struct acpi_table_header *))
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 73b4522..e6797de 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -317,6 +317,13 @@
 	return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE);
 }
 
+
+static inline struct buffer_head *
+sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp)
+{
+	return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, gfp);
+}
+
 static inline struct buffer_head *
 sb_find_get_block(struct super_block *sb, sector_t block)
 {
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index e154994..3775327 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -8,6 +8,7 @@
 #include <linux/radix-tree.h>
 #include <linux/uio.h>
 #include <linux/workqueue.h>
+#include <net/net_namespace.h>
 
 #include <linux/ceph/types.h>
 #include <linux/ceph/buffer.h>
@@ -56,6 +57,7 @@
 	struct ceph_entity_addr my_enc_addr;
 
 	atomic_t stopping;
+	possible_net_t net;
 	bool nocrc;
 	bool tcp_nodelay;
 
@@ -267,6 +269,7 @@
 			u64 required_features,
 			bool nocrc,
 			bool tcp_nodelay);
+extern void ceph_messenger_fini(struct ceph_messenger *msgr);
 
 extern void ceph_con_init(struct ceph_connection *con, void *private,
 			const struct ceph_connection_operations *ops,
diff --git a/include/linux/clk/shmobile.h b/include/linux/clk/shmobile.h
index 63a8159..cb19cc1 100644
--- a/include/linux/clk/shmobile.h
+++ b/include/linux/clk/shmobile.h
@@ -16,8 +16,20 @@
 
 #include <linux/types.h>
 
+struct device;
+struct device_node;
+struct generic_pm_domain;
+
 void r8a7778_clocks_init(u32 mode);
 void r8a7779_clocks_init(u32 mode);
 void rcar_gen2_clocks_init(u32 mode);
 
+#ifdef CONFIG_PM_GENERIC_DOMAINS_OF
+void cpg_mstp_add_clk_domain(struct device_node *np);
+int cpg_mstp_attach_dev(struct generic_pm_domain *domain, struct device *dev);
+void cpg_mstp_detach_dev(struct generic_pm_domain *domain, struct device *dev);
+#else
+static inline void cpg_mstp_add_clk_domain(struct device_node *np) {}
+#endif
+
 #endif
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 7f8ad95..e08a6ae 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -17,11 +17,11 @@
 # define __release(x)	__context__(x,-1)
 # define __cond_lock(x,c)	((c) ? ({ __acquire(x); 1; }) : 0)
 # define __percpu	__attribute__((noderef, address_space(3)))
+# define __pmem		__attribute__((noderef, address_space(5)))
 #ifdef CONFIG_SPARSE_RCU_POINTER
 # define __rcu		__attribute__((noderef, address_space(4)))
 #else
 # define __rcu
-# define __pmem		__attribute__((noderef, address_space(5)))
 #endif
 extern void __chk_user_ptr(const volatile void __user *);
 extern void __chk_io_ptr(const volatile void __iomem *);
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 624a668..fcea4e4 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -87,7 +87,12 @@
 	const char		*name;
 } ____cacheline_internodealigned_in_smp;
 
-#ifndef CONFIG_SPARSE_IRQ
+#ifdef CONFIG_SPARSE_IRQ
+extern void irq_lock_sparse(void);
+extern void irq_unlock_sparse(void);
+#else
+static inline void irq_lock_sparse(void) { }
+static inline void irq_unlock_sparse(void) { }
 extern struct irq_desc irq_desc[NR_IRQS];
 #endif
 
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 8183d66..34f25b7 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -189,6 +189,8 @@
 struct acpi_device_id {
 	__u8 id[ACPI_ID_LEN];
 	kernel_ulong_t driver_data;
+	__u32 cls;
+	__u32 cls_msk;
 };
 
 #define PNP_ID_LEN	8
diff --git a/include/linux/rtc/sirfsoc_rtciobrg.h b/include/linux/rtc/sirfsoc_rtciobrg.h
index 2c92e1c..aefd997 100644
--- a/include/linux/rtc/sirfsoc_rtciobrg.h
+++ b/include/linux/rtc/sirfsoc_rtciobrg.h
@@ -9,10 +9,14 @@
 #ifndef _SIRFSOC_RTC_IOBRG_H_
 #define _SIRFSOC_RTC_IOBRG_H_
 
+struct regmap_config;
+
 extern void sirfsoc_rtc_iobrg_besyncing(void);
 
 extern u32 sirfsoc_rtc_iobrg_readl(u32 addr);
 
 extern void sirfsoc_rtc_iobrg_writel(u32 val, u32 addr);
+struct regmap *devm_regmap_init_iobg(struct device *dev,
+				    const struct regmap_config *config);
 
 #endif
diff --git a/include/linux/soc/dove/pmu.h b/include/linux/soc/dove/pmu.h
new file mode 100644
index 0000000..9c99f84
--- /dev/null
+++ b/include/linux/soc/dove/pmu.h
@@ -0,0 +1,6 @@
+#ifndef LINUX_SOC_DOVE_PMU_H
+#define LINUX_SOC_DOVE_PMU_H
+
+int dove_init_pmu(void);
+
+#endif
diff --git a/include/linux/soc/qcom/smd-rpm.h b/include/linux/soc/qcom/smd-rpm.h
new file mode 100644
index 0000000..2a53dca
--- /dev/null
+++ b/include/linux/soc/qcom/smd-rpm.h
@@ -0,0 +1,35 @@
+#ifndef __QCOM_SMD_RPM_H__
+#define __QCOM_SMD_RPM_H__
+
+struct qcom_smd_rpm;
+
+#define QCOM_SMD_RPM_ACTIVE_STATE        0
+#define QCOM_SMD_RPM_SLEEP_STATE         1
+
+/*
+ * Constants used for addressing resources in the RPM.
+ */
+#define QCOM_SMD_RPM_BOOST	0x61747362
+#define QCOM_SMD_RPM_BUS_CLK	0x316b6c63
+#define QCOM_SMD_RPM_BUS_MASTER	0x73616d62
+#define QCOM_SMD_RPM_BUS_SLAVE	0x766c7362
+#define QCOM_SMD_RPM_CLK_BUF_A	0x616B6C63
+#define QCOM_SMD_RPM_LDOA	0x616f646c
+#define QCOM_SMD_RPM_LDOB	0x626F646C
+#define QCOM_SMD_RPM_MEM_CLK	0x326b6c63
+#define QCOM_SMD_RPM_MISC_CLK	0x306b6c63
+#define QCOM_SMD_RPM_NCPA	0x6170636E
+#define QCOM_SMD_RPM_NCPB	0x6270636E
+#define QCOM_SMD_RPM_OCMEM_PWR	0x706d636f
+#define QCOM_SMD_RPM_QPIC_CLK	0x63697071
+#define QCOM_SMD_RPM_SMPA	0x61706d73
+#define QCOM_SMD_RPM_SMPB	0x62706d73
+#define QCOM_SMD_RPM_SPDM	0x63707362
+#define QCOM_SMD_RPM_VSA	0x00617376
+
+int qcom_rpm_smd_write(struct qcom_smd_rpm *rpm,
+		       int state,
+		       u32 resource_type, u32 resource_id,
+		       void *buf, size_t count);
+
+#endif
diff --git a/include/linux/soc/qcom/smd.h b/include/linux/soc/qcom/smd.h
new file mode 100644
index 0000000..d7e50aa
--- /dev/null
+++ b/include/linux/soc/qcom/smd.h
@@ -0,0 +1,46 @@
+#ifndef __QCOM_SMD_H__
+#define __QCOM_SMD_H__
+
+#include <linux/device.h>
+#include <linux/mod_devicetable.h>
+
+struct qcom_smd;
+struct qcom_smd_channel;
+struct qcom_smd_lookup;
+
+/**
+ * struct qcom_smd_device - smd device struct
+ * @dev:	the device struct
+ * @channel:	handle to the smd channel for this device
+ */
+struct qcom_smd_device {
+	struct device dev;
+	struct qcom_smd_channel *channel;
+};
+
+/**
+ * struct qcom_smd_driver - smd driver struct
+ * @driver:	underlying device driver
+ * @probe:	invoked when the smd channel is found
+ * @remove:	invoked when the smd channel is closed
+ * @callback:	invoked when an inbound message is received on the channel,
+ *		should return 0 on success or -EBUSY if the data cannot be
+ *		consumed at this time
+ */
+struct qcom_smd_driver {
+	struct device_driver driver;
+	int (*probe)(struct qcom_smd_device *dev);
+	void (*remove)(struct qcom_smd_device *dev);
+	int (*callback)(struct qcom_smd_device *, const void *, size_t);
+};
+
+int qcom_smd_driver_register(struct qcom_smd_driver *drv);
+void qcom_smd_driver_unregister(struct qcom_smd_driver *drv);
+
+#define module_qcom_smd_driver(__smd_driver) \
+	module_driver(__smd_driver, qcom_smd_driver_register, \
+		      qcom_smd_driver_unregister)
+
+int qcom_smd_send(struct qcom_smd_channel *channel, const void *data, int len);
+
+#endif
diff --git a/include/linux/soc/qcom/smem.h b/include/linux/soc/qcom/smem.h
new file mode 100644
index 0000000..bc9630d
--- /dev/null
+++ b/include/linux/soc/qcom/smem.h
@@ -0,0 +1,11 @@
+#ifndef __QCOM_SMEM_H__
+#define __QCOM_SMEM_H__
+
+#define QCOM_SMEM_HOST_ANY -1
+
+int qcom_smem_alloc(unsigned host, unsigned item, size_t size);
+int qcom_smem_get(unsigned host, unsigned item, void **ptr, size_t *size);
+
+int qcom_smem_get_free_space(unsigned host);
+
+#endif
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 3741ba1..edbfc9a 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -67,10 +67,13 @@
 static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { }
 #endif /* BROADCAST */
 
-#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
 extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state);
 #else
-static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state) { return 0; }
+static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
+{
+	return 0;
+}
 #endif
 
 static inline void tick_broadcast_enable(void)
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 3aa72e6..6e191e4 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -145,7 +145,6 @@
 }
 #endif
 
-#define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts)
 #define ktime_get_real_ts64(ts)	getnstimeofday64(ts)
 
 /*
diff --git a/include/soc/tegra/fuse.h b/include/soc/tegra/fuse.h
index b019e34..961b821 100644
--- a/include/soc/tegra/fuse.h
+++ b/include/soc/tegra/fuse.h
@@ -22,6 +22,7 @@
 #define TEGRA114	0x35
 #define TEGRA124	0x40
 #define TEGRA132	0x13
+#define TEGRA210	0x21
 
 #define TEGRA_FUSE_SKU_CALIB_0	0xf0
 #define TEGRA30_FUSE_SATA_CALIB	0x124
@@ -47,10 +48,11 @@
 	int cpu_speedo_id;
 	int cpu_speedo_value;
 	int cpu_iddq_value;
-	int core_process_id;
+	int soc_process_id;
 	int soc_speedo_id;
-	int gpu_speedo_id;
+	int soc_speedo_value;
 	int gpu_process_id;
+	int gpu_speedo_id;
 	int gpu_speedo_value;
 	enum tegra_revision revision;
 };
diff --git a/include/soc/tegra/pmc.h b/include/soc/tegra/pmc.h
index f5c0de4..d18efe4 100644
--- a/include/soc/tegra/pmc.h
+++ b/include/soc/tegra/pmc.h
@@ -67,6 +67,11 @@
 #define TEGRA_POWERGATE_XUSBC	22
 #define TEGRA_POWERGATE_VIC	23
 #define TEGRA_POWERGATE_IRAM	24
+#define TEGRA_POWERGATE_NVDEC	25
+#define TEGRA_POWERGATE_NVJPG	26
+#define TEGRA_POWERGATE_AUD	27
+#define TEGRA_POWERGATE_DFD	28
+#define TEGRA_POWERGATE_VE2	29
 
 #define TEGRA_POWERGATE_3D0	TEGRA_POWERGATE_3D
 
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 09c6564..e85bdfd 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1021,8 +1021,7 @@
 	 * for strings that are too long, we should not have created
 	 * any.
 	 */
-	if (unlikely((len == 0) || len > MAX_ARG_STRLEN - 1)) {
-		WARN_ON(1);
+	if (WARN_ON_ONCE(len < 0 || len > MAX_ARG_STRLEN - 1)) {
 		send_sig(SIGKILL, current, 0);
 		return -1;
 	}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 9c9c9fa..6a37454 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -21,6 +21,7 @@
 #include <linux/suspend.h>
 #include <linux/lockdep.h>
 #include <linux/tick.h>
+#include <linux/irq.h>
 #include <trace/events/power.h>
 
 #include "smpboot.h"
@@ -392,13 +393,19 @@
 	smpboot_park_threads(cpu);
 
 	/*
+	 * Prevent irq alloc/free while the dying cpu reorganizes the
+	 * interrupt affinities.
+	 */
+	irq_lock_sparse();
+
+	/*
 	 * So now all preempt/rcu users must observe !cpu_active().
 	 */
-
 	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
 	if (err) {
 		/* CPU didn't die: tell everyone.  Can't complain. */
 		cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
+		irq_unlock_sparse();
 		goto out_release;
 	}
 	BUG_ON(cpu_online(cpu));
@@ -415,6 +422,9 @@
 	smp_mb(); /* Read from cpu_dead_idle before __cpu_die(). */
 	per_cpu(cpu_dead_idle, cpu) = false;
 
+	/* Interrupts are moved away from the dying cpu, reenable alloc/free */
+	irq_unlock_sparse();
+
 	hotplug_cpu__broadcast_tick_pull(cpu);
 	/* This actually kills the CPU. */
 	__cpu_die(cpu);
@@ -517,8 +527,18 @@
 		goto out_notify;
 	}
 
+	/*
+	 * Some architectures have to walk the irq descriptors to
+	 * setup the vector space for the cpu which comes online.
+	 * Prevent irq alloc/free across the bringup.
+	 */
+	irq_lock_sparse();
+
 	/* Arch-specific enabling code. */
 	ret = __cpu_up(cpu, idle);
+
+	irq_unlock_sparse();
+
 	if (ret != 0)
 		goto out_notify;
 	BUG_ON(!cpu_online(cpu));
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e965cfa..d3dae34 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4358,14 +4358,6 @@
 	rcu_read_unlock();
 }
 
-static void rb_free_rcu(struct rcu_head *rcu_head)
-{
-	struct ring_buffer *rb;
-
-	rb = container_of(rcu_head, struct ring_buffer, rcu_head);
-	rb_free(rb);
-}
-
 struct ring_buffer *ring_buffer_get(struct perf_event *event)
 {
 	struct ring_buffer *rb;
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 2deb24c..2bbad9c 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -11,6 +11,7 @@
 struct ring_buffer {
 	atomic_t			refcount;
 	struct rcu_head			rcu_head;
+	struct irq_work			irq_work;
 #ifdef CONFIG_PERF_USE_VMALLOC
 	struct work_struct		work;
 	int				page_order;	/* allocation order  */
@@ -55,6 +56,15 @@
 };
 
 extern void rb_free(struct ring_buffer *rb);
+
+static inline void rb_free_rcu(struct rcu_head *rcu_head)
+{
+	struct ring_buffer *rb;
+
+	rb = container_of(rcu_head, struct ring_buffer, rcu_head);
+	rb_free(rb);
+}
+
 extern struct ring_buffer *
 rb_alloc(int nr_pages, long watermark, int cpu, int flags);
 extern void perf_event_wakeup(struct perf_event *event);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 9647282..b2be01b 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -221,6 +221,8 @@
 	rcu_read_unlock();
 }
 
+static void rb_irq_work(struct irq_work *work);
+
 static void
 ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
 {
@@ -241,6 +243,16 @@
 
 	INIT_LIST_HEAD(&rb->event_list);
 	spin_lock_init(&rb->event_lock);
+	init_irq_work(&rb->irq_work, rb_irq_work);
+}
+
+static void ring_buffer_put_async(struct ring_buffer *rb)
+{
+	if (!atomic_dec_and_test(&rb->refcount))
+		return;
+
+	rb->rcu_head.next = (void *)rb;
+	irq_work_queue(&rb->irq_work);
 }
 
 /*
@@ -319,7 +331,7 @@
 	rb_free_aux(rb);
 
 err:
-	ring_buffer_put(rb);
+	ring_buffer_put_async(rb);
 	handle->event = NULL;
 
 	return NULL;
@@ -370,7 +382,7 @@
 
 	local_set(&rb->aux_nest, 0);
 	rb_free_aux(rb);
-	ring_buffer_put(rb);
+	ring_buffer_put_async(rb);
 }
 
 /*
@@ -557,7 +569,18 @@
 void rb_free_aux(struct ring_buffer *rb)
 {
 	if (atomic_dec_and_test(&rb->aux_refcount))
+		irq_work_queue(&rb->irq_work);
+}
+
+static void rb_irq_work(struct irq_work *work)
+{
+	struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work);
+
+	if (!atomic_read(&rb->aux_refcount))
 		__rb_free_aux(rb);
+
+	if (rb->rcu_head.next == (void *)rb)
+		call_rcu(&rb->rcu_head, rb_free_rcu);
 }
 
 #ifndef CONFIG_PERF_USE_VMALLOC
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 4834ee8..61008b8 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -76,12 +76,8 @@
 
 #ifdef CONFIG_SPARSE_IRQ
 static inline void irq_mark_irq(unsigned int irq) { }
-extern void irq_lock_sparse(void);
-extern void irq_unlock_sparse(void);
 #else
 extern void irq_mark_irq(unsigned int irq);
-static inline void irq_lock_sparse(void) { }
-static inline void irq_unlock_sparse(void) { }
 #endif
 
 extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
diff --git a/kernel/module.c b/kernel/module.c
index 3e0e197..4d2b82e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3557,6 +3557,7 @@
 	mutex_lock(&module_mutex);
 	/* Unlink carefully: kallsyms could be walking list. */
 	list_del_rcu(&mod->list);
+	mod_tree_remove(mod);
 	wake_up_all(&module_wq);
 	/* Wait for RCU-sched synchronizing before releasing mod->list. */
 	synchronize_sched();
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 08ccc3d..50eb107 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -120,19 +120,25 @@
 		/* The clockevent device is getting replaced. Shut it down. */
 
 	case CLOCK_EVT_STATE_SHUTDOWN:
-		return dev->set_state_shutdown(dev);
+		if (dev->set_state_shutdown)
+			return dev->set_state_shutdown(dev);
+		return 0;
 
 	case CLOCK_EVT_STATE_PERIODIC:
 		/* Core internal bug */
 		if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC))
 			return -ENOSYS;
-		return dev->set_state_periodic(dev);
+		if (dev->set_state_periodic)
+			return dev->set_state_periodic(dev);
+		return 0;
 
 	case CLOCK_EVT_STATE_ONESHOT:
 		/* Core internal bug */
 		if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
 			return -ENOSYS;
-		return dev->set_state_oneshot(dev);
+		if (dev->set_state_oneshot)
+			return dev->set_state_oneshot(dev);
+		return 0;
 
 	case CLOCK_EVT_STATE_ONESHOT_STOPPED:
 		/* Core internal bug */
@@ -471,18 +477,6 @@
 	if (dev->features & CLOCK_EVT_FEAT_DUMMY)
 		return 0;
 
-	/* New state-specific callbacks */
-	if (!dev->set_state_shutdown)
-		return -EINVAL;
-
-	if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
-	    !dev->set_state_periodic)
-		return -EINVAL;
-
-	if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) &&
-	    !dev->set_state_oneshot)
-		return -EINVAL;
-
 	return 0;
 }
 
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index d39f32c..52b9e19 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -159,7 +159,7 @@
 {
 	struct clock_event_device *bc = tick_broadcast_device.evtdev;
 	unsigned long flags;
-	int ret;
+	int ret = 0;
 
 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 
@@ -221,13 +221,14 @@
 			 * If we kept the cpu in the broadcast mask,
 			 * tell the caller to leave the per cpu device
 			 * in shutdown state. The periodic interrupt
-			 * is delivered by the broadcast device.
+			 * is delivered by the broadcast device, if
+			 * the broadcast device exists and is not
+			 * hrtimer based.
 			 */
-			ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
+			if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER))
+				ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
 			break;
 		default:
-			/* Nothing to do */
-			ret = 0;
 			break;
 		}
 	}
@@ -265,8 +266,22 @@
 	 * Check, if the current cpu is in the mask
 	 */
 	if (cpumask_test_cpu(cpu, mask)) {
+		struct clock_event_device *bc = tick_broadcast_device.evtdev;
+
 		cpumask_clear_cpu(cpu, mask);
-		local = true;
+		/*
+		 * We only run the local handler, if the broadcast
+		 * device is not hrtimer based. Otherwise we run into
+		 * a hrtimer recursion.
+		 *
+		 * local timer_interrupt()
+		 *   local_handler()
+		 *     expire_hrtimers()
+		 *       bc_handler()
+		 *         local_handler()
+		 *	     expire_hrtimers()
+		 */
+		local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER);
 	}
 
 	if (!cpumask_empty(mask)) {
@@ -301,6 +316,13 @@
 	bool bc_local;
 
 	raw_spin_lock(&tick_broadcast_lock);
+
+	/* Handle spurious interrupts gracefully */
+	if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) {
+		raw_spin_unlock(&tick_broadcast_lock);
+		return;
+	}
+
 	bc_local = tick_do_periodic_broadcast();
 
 	if (clockevent_state_oneshot(dev)) {
@@ -359,8 +381,16 @@
 	case TICK_BROADCAST_ON:
 		cpumask_set_cpu(cpu, tick_broadcast_on);
 		if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
-			if (tick_broadcast_device.mode ==
-			    TICKDEV_MODE_PERIODIC)
+			/*
+			 * Only shutdown the cpu local device, if:
+			 *
+			 * - the broadcast device exists
+			 * - the broadcast device is not a hrtimer based one
+			 * - the broadcast device is in periodic mode to
+			 *   avoid a hickup during switch to oneshot mode
+			 */
+			if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) &&
+			    tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
 				clockevents_shutdown(dev);
 		}
 		break;
@@ -379,14 +409,16 @@
 		break;
 	}
 
-	if (cpumask_empty(tick_broadcast_mask)) {
-		if (!bc_stopped)
-			clockevents_shutdown(bc);
-	} else if (bc_stopped) {
-		if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
-			tick_broadcast_start_periodic(bc);
-		else
-			tick_broadcast_setup_oneshot(bc);
+	if (bc) {
+		if (cpumask_empty(tick_broadcast_mask)) {
+			if (!bc_stopped)
+				clockevents_shutdown(bc);
+		} else if (bc_stopped) {
+			if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
+				tick_broadcast_start_periodic(bc);
+			else
+				tick_broadcast_setup_oneshot(bc);
+		}
 	}
 	raw_spin_unlock(&tick_broadcast_lock);
 }
@@ -662,71 +694,82 @@
 	clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
 }
 
-/**
- * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode
- * @state:	The target state (enter/exit)
- *
- * The system enters/leaves a state, where affected devices might stop
- * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
- *
- * Called with interrupts disabled, so clockevents_lock is not
- * required here because the local clock event device cannot go away
- * under us.
- */
-int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
+int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
 {
 	struct clock_event_device *bc, *dev;
-	struct tick_device *td;
 	int cpu, ret = 0;
 	ktime_t now;
 
 	/*
-	 * Periodic mode does not care about the enter/exit of power
-	 * states
+	 * If there is no broadcast device, tell the caller not to go
+	 * into deep idle.
 	 */
-	if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
-		return 0;
+	if (!tick_broadcast_device.evtdev)
+		return -EBUSY;
 
-	/*
-	 * We are called with preemtion disabled from the depth of the
-	 * idle code, so we can't be moved away.
-	 */
-	td = this_cpu_ptr(&tick_cpu_device);
-	dev = td->evtdev;
-
-	if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
-		return 0;
+	dev = this_cpu_ptr(&tick_cpu_device)->evtdev;
 
 	raw_spin_lock(&tick_broadcast_lock);
 	bc = tick_broadcast_device.evtdev;
 	cpu = smp_processor_id();
 
 	if (state == TICK_BROADCAST_ENTER) {
+		/*
+		 * If the current CPU owns the hrtimer broadcast
+		 * mechanism, it cannot go deep idle and we do not add
+		 * the CPU to the broadcast mask. We don't have to go
+		 * through the EXIT path as the local timer is not
+		 * shutdown.
+		 */
+		ret = broadcast_needs_cpu(bc, cpu);
+		if (ret)
+			goto out;
+
+		/*
+		 * If the broadcast device is in periodic mode, we
+		 * return.
+		 */
+		if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
+			/* If it is a hrtimer based broadcast, return busy */
+			if (bc->features & CLOCK_EVT_FEAT_HRTIMER)
+				ret = -EBUSY;
+			goto out;
+		}
+
 		if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
 			WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
+
+			/* Conditionally shut down the local timer. */
 			broadcast_shutdown_local(bc, dev);
+
 			/*
 			 * We only reprogram the broadcast timer if we
 			 * did not mark ourself in the force mask and
 			 * if the cpu local event is earlier than the
 			 * broadcast event. If the current CPU is in
 			 * the force mask, then we are going to be
-			 * woken by the IPI right away.
+			 * woken by the IPI right away; we return
+			 * busy, so the CPU does not try to go deep
+			 * idle.
 			 */
-			if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
-			    dev->next_event.tv64 < bc->next_event.tv64)
+			if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) {
+				ret = -EBUSY;
+			} else if (dev->next_event.tv64 < bc->next_event.tv64) {
 				tick_broadcast_set_event(bc, cpu, dev->next_event);
+				/*
+				 * In case of hrtimer broadcasts the
+				 * programming might have moved the
+				 * timer to this cpu. If yes, remove
+				 * us from the broadcast mask and
+				 * return busy.
+				 */
+				ret = broadcast_needs_cpu(bc, cpu);
+				if (ret) {
+					cpumask_clear_cpu(cpu,
+						tick_broadcast_oneshot_mask);
+				}
+			}
 		}
-		/*
-		 * If the current CPU owns the hrtimer broadcast
-		 * mechanism, it cannot go deep idle and we remove the
-		 * CPU from the broadcast mask. We don't have to go
-		 * through the EXIT path as the local timer is not
-		 * shutdown.
-		 */
-		ret = broadcast_needs_cpu(bc, cpu);
-		if (ret)
-			cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
 	} else {
 		if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
 			clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
@@ -938,6 +981,16 @@
 	return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
 }
 
+#else
+int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
+{
+	struct clock_event_device *bc = tick_broadcast_device.evtdev;
+
+	if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER))
+		return -EBUSY;
+
+	return 0;
+}
 #endif
 
 void __init tick_broadcast_init(void)
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 76446cb..55e13ef 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -343,6 +343,27 @@
 	tick_install_broadcast_device(newdev);
 }
 
+/**
+ * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode
+ * @state:	The target state (enter/exit)
+ *
+ * The system enters/leaves a state, where affected devices might stop
+ * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
+ *
+ * Called with interrupts disabled, so clockevents_lock is not
+ * required here because the local clock event device cannot go away
+ * under us.
+ */
+int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
+{
+	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
+
+	if (!(td->evtdev->features & CLOCK_EVT_FEAT_C3STOP))
+		return 0;
+
+	return __tick_broadcast_oneshot_control(state);
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 /*
  * Transfer the do_timer job away from a dying cpu.
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 42fdf49..a4a8d4e 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -71,4 +71,14 @@
 static inline void tick_cancel_sched_timer(int cpu) { }
 #endif
 
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+extern int __tick_broadcast_oneshot_control(enum tick_broadcast_state state);
+#else
+static inline int
+__tick_broadcast_oneshot_control(enum tick_broadcast_state state)
+{
+	return -EBUSY;
+}
+#endif
+
 #endif
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 777eda7..39f24d6 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -18,10 +18,6 @@
 	  For better error detection enable CONFIG_STACKTRACE,
 	  and add slub_debug=U to boot cmdline.
 
-config KASAN_SHADOW_OFFSET
-	hex
-	default 0xdffffc0000000000 if X86_64
-
 choice
 	prompt "Instrumentation type"
 	depends on KASAN
diff --git a/mm/memory.c b/mm/memory.c
index a84fbb7..388dcf9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2670,6 +2670,10 @@
 
 	pte_unmap(page_table);
 
+	/* File mapping without ->vm_ops ? */
+	if (vma->vm_flags & VM_SHARED)
+		return VM_FAULT_SIGBUS;
+
 	/* Check if we need to add a guard page to the stack */
 	if (check_stack_guard_page(vma, address) < 0)
 		return VM_FAULT_SIGSEGV;
@@ -3099,6 +3103,9 @@
 			- vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
 
 	pte_unmap(page_table);
+	/* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
+	if (!vma->vm_ops->fault)
+		return VM_FAULT_SIGBUS;
 	if (!(flags & FAULT_FLAG_WRITE))
 		return do_read_fault(mm, vma, address, pmd, pgoff, flags,
 				orig_pte);
@@ -3244,13 +3251,12 @@
 	barrier();
 	if (!pte_present(entry)) {
 		if (pte_none(entry)) {
-			if (vma->vm_ops) {
-				if (likely(vma->vm_ops->fault))
-					return do_fault(mm, vma, address, pte,
-							pmd, flags, entry);
-			}
-			return do_anonymous_page(mm, vma, address,
-						 pte, pmd, flags);
+			if (vma->vm_ops)
+				return do_fault(mm, vma, address, pte, pmd,
+						flags, entry);
+
+			return do_anonymous_page(mm, vma, address, pte, pmd,
+					flags);
 		}
 		return do_swap_page(mm, vma, address,
 					pte, pmd, flags, entry);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index cb7db32..f30329f 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -9,6 +9,7 @@
 #include <keys/ceph-type.h>
 #include <linux/module.h>
 #include <linux/mount.h>
+#include <linux/nsproxy.h>
 #include <linux/parser.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
@@ -16,8 +17,6 @@
 #include <linux/statfs.h>
 #include <linux/string.h>
 #include <linux/vmalloc.h>
-#include <linux/nsproxy.h>
-#include <net/net_namespace.h>
 
 
 #include <linux/ceph/ceph_features.h>
@@ -131,6 +130,13 @@
 	int i;
 	int ret;
 
+	/*
+	 * Don't bother comparing options if network namespaces don't
+	 * match.
+	 */
+	if (!net_eq(current->nsproxy->net_ns, read_pnet(&client->msgr.net)))
+		return -1;
+
 	ret = memcmp(opt1, opt2, ofs);
 	if (ret)
 		return ret;
@@ -335,9 +341,6 @@
 	int err = -ENOMEM;
 	substring_t argstr[MAX_OPT_ARGS];
 
-	if (current->nsproxy->net_ns != &init_net)
-		return ERR_PTR(-EINVAL);
-
 	opt = kzalloc(sizeof(*opt), GFP_KERNEL);
 	if (!opt)
 		return ERR_PTR(-ENOMEM);
@@ -608,6 +611,7 @@
 fail_monc:
 	ceph_monc_stop(&client->monc);
 fail:
+	ceph_messenger_fini(&client->msgr);
 	kfree(client);
 	return ERR_PTR(err);
 }
@@ -621,8 +625,8 @@
 
 	/* unmount */
 	ceph_osdc_stop(&client->osdc);
-
 	ceph_monc_stop(&client->monc);
+	ceph_messenger_fini(&client->msgr);
 
 	ceph_debugfs_client_cleanup(client);
 
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 1679f47..e3be1d2 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -6,6 +6,7 @@
 #include <linux/inet.h>
 #include <linux/kthread.h>
 #include <linux/net.h>
+#include <linux/nsproxy.h>
 #include <linux/slab.h>
 #include <linux/socket.h>
 #include <linux/string.h>
@@ -479,7 +480,7 @@
 	int ret;
 
 	BUG_ON(con->sock);
-	ret = sock_create_kern(&init_net, con->peer_addr.in_addr.ss_family,
+	ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family,
 			       SOCK_STREAM, IPPROTO_TCP, &sock);
 	if (ret)
 		return ret;
@@ -1731,17 +1732,17 @@
 
 static bool addr_is_blank(struct sockaddr_storage *ss)
 {
+	struct in_addr *addr = &((struct sockaddr_in *)ss)->sin_addr;
+	struct in6_addr *addr6 = &((struct sockaddr_in6 *)ss)->sin6_addr;
+
 	switch (ss->ss_family) {
 	case AF_INET:
-		return ((struct sockaddr_in *)ss)->sin_addr.s_addr == 0;
+		return addr->s_addr == htonl(INADDR_ANY);
 	case AF_INET6:
-		return
-		     ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 &&
-		     ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 &&
-		     ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 &&
-		     ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0;
+		return ipv6_addr_any(addr6);
+	default:
+		return true;
 	}
-	return false;
 }
 
 static int addr_port(struct sockaddr_storage *ss)
@@ -2944,11 +2945,18 @@
 	msgr->tcp_nodelay = tcp_nodelay;
 
 	atomic_set(&msgr->stopping, 0);
+	write_pnet(&msgr->net, get_net(current->nsproxy->net_ns));
 
 	dout("%s %p\n", __func__, msgr);
 }
 EXPORT_SYMBOL(ceph_messenger_init);
 
+void ceph_messenger_fini(struct ceph_messenger *msgr)
+{
+	put_net(read_pnet(&msgr->net));
+}
+EXPORT_SYMBOL(ceph_messenger_fini);
+
 static void clear_standby(struct ceph_connection *con)
 {
 	/* come back from STANDBY? */
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index eff7de1..e70fcd1 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -63,6 +63,8 @@
 
 	DEVID(acpi_device_id);
 	DEVID_FIELD(acpi_device_id, id);
+	DEVID_FIELD(acpi_device_id, cls);
+	DEVID_FIELD(acpi_device_id, cls_msk);
 
 	DEVID(pnp_device_id);
 	DEVID_FIELD(pnp_device_id, id);
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 84c86f3..5f20882 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -523,12 +523,40 @@
 }
 ADD_TO_DEVTABLE("serio", serio_device_id, do_serio_entry);
 
-/* looks like: "acpi:ACPI0003 or acpi:PNP0C0B" or "acpi:LNXVIDEO" */
+/* looks like: "acpi:ACPI0003" or "acpi:PNP0C0B" or "acpi:LNXVIDEO" or
+ *             "acpi:bbsspp" (bb=base-class, ss=sub-class, pp=prog-if)
+ *
+ * NOTE: Each driver should use one of the following : _HID, _CIDs
+ *       or _CLS. Also, bb, ss, and pp can be substituted with ??
+ *       as don't care byte.
+ */
 static int do_acpi_entry(const char *filename,
 			void *symval, char *alias)
 {
 	DEF_FIELD_ADDR(symval, acpi_device_id, id);
-	sprintf(alias, "acpi*:%s:*", *id);
+	DEF_FIELD_ADDR(symval, acpi_device_id, cls);
+	DEF_FIELD_ADDR(symval, acpi_device_id, cls_msk);
+
+	if (id && strlen((const char *)*id))
+		sprintf(alias, "acpi*:%s:*", *id);
+	else if (cls) {
+		int i, byte_shift, cnt = 0;
+		unsigned int msk;
+
+		sprintf(&alias[cnt], "acpi*:");
+		cnt = 6;
+		for (i = 1; i <= 3; i++) {
+			byte_shift = 8 * (3-i);
+			msk = (*cls_msk >> byte_shift) & 0xFF;
+			if (msk)
+				sprintf(&alias[cnt], "%02x",
+					(*cls >> byte_shift) & 0xFF);
+			else
+				sprintf(&alias[cnt], "??");
+			cnt += 2;
+		}
+		sprintf(&alias[cnt], ":*");
+	}
 	return 1;
 }
 ADD_TO_DEVTABLE("acpi", acpi_device_id, do_acpi_entry);
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 91ee1b2..12d3db3 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -886,7 +886,8 @@
 #define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \
 		".kprobes.text"
 #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \
-		".fixup", ".entry.text", ".exception.text", ".text.*"
+		".fixup", ".entry.text", ".exception.text", ".text.*", \
+		".coldtext"
 
 #define INIT_SECTIONS      ".init.*"
 #define MEM_INIT_SECTIONS  ".meminit.*"
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 6231081..564079c 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3283,7 +3283,8 @@
 	int rc = 0;
 
 	if (default_noexec &&
-	    (prot & PROT_EXEC) && (!file || (!shared && (prot & PROT_WRITE)))) {
+	    (prot & PROT_EXEC) && (!file || IS_PRIVATE(file_inode(file)) ||
+				   (!shared && (prot & PROT_WRITE)))) {
 		/*
 		 * We are making executable an anonymous mapping or a
 		 * private file mapping that will also be writable.
diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c
index afe6a26..57644b1 100644
--- a/security/selinux/ss/ebitmap.c
+++ b/security/selinux/ss/ebitmap.c
@@ -153,6 +153,12 @@
 		if (offset == (u32)-1)
 			return 0;
 
+		/* don't waste ebitmap space if the netlabel bitmap is empty */
+		if (bitmap == 0) {
+			offset += EBITMAP_UNIT_SIZE;
+			continue;
+		}
+
 		if (e_iter == NULL ||
 		    offset >= e_iter->startbit + EBITMAP_SIZE) {
 			e_prev = e_iter;
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index f0e7267..9098083 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -41,4 +41,62 @@
 
 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
 
+#include <linux/types.h>
+
+static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
+{
+	switch (size) {
+	case 1: *(__u8 *)res = *(volatile __u8 *)p; break;
+	case 2: *(__u16 *)res = *(volatile __u16 *)p; break;
+	case 4: *(__u32 *)res = *(volatile __u32 *)p; break;
+	case 8: *(__u64 *)res = *(volatile __u64 *)p; break;
+	default:
+		barrier();
+		__builtin_memcpy((void *)res, (const void *)p, size);
+		barrier();
+	}
+}
+
+static __always_inline void __write_once_size(volatile void *p, void *res, int size)
+{
+	switch (size) {
+	case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
+	case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
+	case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
+	case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
+	default:
+		barrier();
+		__builtin_memcpy((void *)p, (const void *)res, size);
+		barrier();
+	}
+}
+
+/*
+ * Prevent the compiler from merging or refetching reads or writes. The
+ * compiler is also forbidden from reordering successive instances of
+ * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the
+ * compiler is aware of some particular ordering.  One way to make the
+ * compiler aware of ordering is to put the two invocations of READ_ONCE,
+ * WRITE_ONCE or ACCESS_ONCE() in different C statements.
+ *
+ * In contrast to ACCESS_ONCE these two macros will also work on aggregate
+ * data types like structs or unions. If the size of the accessed data
+ * type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
+ * READ_ONCE() and WRITE_ONCE()  will fall back to memcpy and print a
+ * compile-time warning.
+ *
+ * Their two major use cases are: (1) Mediating communication between
+ * process-level code and irq/NMI handlers, all running on the same CPU,
+ * and (2) Ensuring that the compiler does not  fold, spindle, or otherwise
+ * mutilate accesses that either do not require ordering or that interact
+ * with an explicit memory barrier or atomic instruction that provides the
+ * required ordering.
+ */
+
+#define READ_ONCE(x) \
+	({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
+
+#define WRITE_ONCE(x, val) \
+	({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
+
 #endif /* _TOOLS_LINUX_COMPILER_H */
diff --git a/tools/include/linux/export.h b/tools/include/linux/export.h
deleted file mode 100644
index d07e586..0000000
--- a/tools/include/linux/export.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _TOOLS_LINUX_EXPORT_H_
-#define _TOOLS_LINUX_EXPORT_H_
-
-#define EXPORT_SYMBOL(sym)
-#define EXPORT_SYMBOL_GPL(sym)
-#define EXPORT_SYMBOL_GPL_FUTURE(sym)
-#define EXPORT_UNUSED_SYMBOL(sym)
-#define EXPORT_UNUSED_SYMBOL_GPL(sym)
-
-#endif
diff --git a/tools/include/linux/rbtree.h b/tools/include/linux/rbtree.h
new file mode 100644
index 0000000..1125822
--- /dev/null
+++ b/tools/include/linux/rbtree.h
@@ -0,0 +1,104 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/include/linux/rbtree.h
+
+  To use rbtrees you'll have to implement your own insert and search cores.
+  This will avoid us to use callbacks and to drop drammatically performances.
+  I know it's not the cleaner way,  but in C (not in C++) to get
+  performances and genericity...
+
+  See Documentation/rbtree.txt for documentation and samples.
+*/
+
+#ifndef __TOOLS_LINUX_PERF_RBTREE_H
+#define __TOOLS_LINUX_PERF_RBTREE_H
+
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+
+struct rb_node {
+	unsigned long  __rb_parent_color;
+	struct rb_node *rb_right;
+	struct rb_node *rb_left;
+} __attribute__((aligned(sizeof(long))));
+    /* The alignment might seem pointless, but allegedly CRIS needs it */
+
+struct rb_root {
+	struct rb_node *rb_node;
+};
+
+
+#define rb_parent(r)   ((struct rb_node *)((r)->__rb_parent_color & ~3))
+
+#define RB_ROOT	(struct rb_root) { NULL, }
+#define	rb_entry(ptr, type, member) container_of(ptr, type, member)
+
+#define RB_EMPTY_ROOT(root)  ((root)->rb_node == NULL)
+
+/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */
+#define RB_EMPTY_NODE(node)  \
+	((node)->__rb_parent_color == (unsigned long)(node))
+#define RB_CLEAR_NODE(node)  \
+	((node)->__rb_parent_color = (unsigned long)(node))
+
+
+extern void rb_insert_color(struct rb_node *, struct rb_root *);
+extern void rb_erase(struct rb_node *, struct rb_root *);
+
+
+/* Find logical next and previous nodes in a tree */
+extern struct rb_node *rb_next(const struct rb_node *);
+extern struct rb_node *rb_prev(const struct rb_node *);
+extern struct rb_node *rb_first(const struct rb_root *);
+extern struct rb_node *rb_last(const struct rb_root *);
+
+/* Postorder iteration - always visit the parent after its children */
+extern struct rb_node *rb_first_postorder(const struct rb_root *);
+extern struct rb_node *rb_next_postorder(const struct rb_node *);
+
+/* Fast replacement of a single node without remove/rebalance/add/rebalance */
+extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+			    struct rb_root *root);
+
+static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
+				struct rb_node **rb_link)
+{
+	node->__rb_parent_color = (unsigned long)parent;
+	node->rb_left = node->rb_right = NULL;
+
+	*rb_link = node;
+}
+
+#define rb_entry_safe(ptr, type, member) \
+	({ typeof(ptr) ____ptr = (ptr); \
+	   ____ptr ? rb_entry(____ptr, type, member) : NULL; \
+	})
+
+
+/*
+ * Handy for checking that we are not deleting an entry that is
+ * already in a list, found in block/{blk-throttle,cfq-iosched}.c,
+ * probably should be moved to lib/rbtree.c...
+ */
+static inline void rb_erase_init(struct rb_node *n, struct rb_root *root)
+{
+	rb_erase(n, root);
+	RB_CLEAR_NODE(n);
+}
+#endif /* __TOOLS_LINUX_PERF_RBTREE_H */
diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h
new file mode 100644
index 0000000..43be941
--- /dev/null
+++ b/tools/include/linux/rbtree_augmented.h
@@ -0,0 +1,245 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  (C) 2002  David Woodhouse <dwmw2@infradead.org>
+  (C) 2012  Michel Lespinasse <walken@google.com>
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  tools/linux/include/linux/rbtree_augmented.h
+
+  Copied from:
+  linux/include/linux/rbtree_augmented.h
+*/
+
+#ifndef _TOOLS_LINUX_RBTREE_AUGMENTED_H
+#define _TOOLS_LINUX_RBTREE_AUGMENTED_H
+
+#include <linux/compiler.h>
+#include <linux/rbtree.h>
+
+/*
+ * Please note - only struct rb_augment_callbacks and the prototypes for
+ * rb_insert_augmented() and rb_erase_augmented() are intended to be public.
+ * The rest are implementation details you are not expected to depend on.
+ *
+ * See Documentation/rbtree.txt for documentation and samples.
+ */
+
+struct rb_augment_callbacks {
+	void (*propagate)(struct rb_node *node, struct rb_node *stop);
+	void (*copy)(struct rb_node *old, struct rb_node *new);
+	void (*rotate)(struct rb_node *old, struct rb_node *new);
+};
+
+extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
+/*
+ * Fixup the rbtree and update the augmented information when rebalancing.
+ *
+ * On insertion, the user must update the augmented information on the path
+ * leading to the inserted node, then call rb_link_node() as usual and
+ * rb_augment_inserted() instead of the usual rb_insert_color() call.
+ * If rb_augment_inserted() rebalances the rbtree, it will callback into
+ * a user provided function to update the augmented information on the
+ * affected subtrees.
+ */
+static inline void
+rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+		    const struct rb_augment_callbacks *augment)
+{
+	__rb_insert_augmented(node, root, augment->rotate);
+}
+
+#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield,	\
+			     rbtype, rbaugmented, rbcompute)		\
+static inline void							\
+rbname ## _propagate(struct rb_node *rb, struct rb_node *stop)		\
+{									\
+	while (rb != stop) {						\
+		rbstruct *node = rb_entry(rb, rbstruct, rbfield);	\
+		rbtype augmented = rbcompute(node);			\
+		if (node->rbaugmented == augmented)			\
+			break;						\
+		node->rbaugmented = augmented;				\
+		rb = rb_parent(&node->rbfield);				\
+	}								\
+}									\
+static inline void							\
+rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)		\
+{									\
+	rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);		\
+	rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);		\
+	new->rbaugmented = old->rbaugmented;				\
+}									\
+static void								\
+rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)	\
+{									\
+	rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);		\
+	rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);		\
+	new->rbaugmented = old->rbaugmented;				\
+	old->rbaugmented = rbcompute(old);				\
+}									\
+rbstatic const struct rb_augment_callbacks rbname = {			\
+	rbname ## _propagate, rbname ## _copy, rbname ## _rotate	\
+};
+
+
+#define	RB_RED		0
+#define	RB_BLACK	1
+
+#define __rb_parent(pc)    ((struct rb_node *)(pc & ~3))
+
+#define __rb_color(pc)     ((pc) & 1)
+#define __rb_is_black(pc)  __rb_color(pc)
+#define __rb_is_red(pc)    (!__rb_color(pc))
+#define rb_color(rb)       __rb_color((rb)->__rb_parent_color)
+#define rb_is_red(rb)      __rb_is_red((rb)->__rb_parent_color)
+#define rb_is_black(rb)    __rb_is_black((rb)->__rb_parent_color)
+
+static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
+{
+	rb->__rb_parent_color = rb_color(rb) | (unsigned long)p;
+}
+
+static inline void rb_set_parent_color(struct rb_node *rb,
+				       struct rb_node *p, int color)
+{
+	rb->__rb_parent_color = (unsigned long)p | color;
+}
+
+static inline void
+__rb_change_child(struct rb_node *old, struct rb_node *new,
+		  struct rb_node *parent, struct rb_root *root)
+{
+	if (parent) {
+		if (parent->rb_left == old)
+			parent->rb_left = new;
+		else
+			parent->rb_right = new;
+	} else
+		root->rb_node = new;
+}
+
+extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
+
+static __always_inline struct rb_node *
+__rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+		     const struct rb_augment_callbacks *augment)
+{
+	struct rb_node *child = node->rb_right, *tmp = node->rb_left;
+	struct rb_node *parent, *rebalance;
+	unsigned long pc;
+
+	if (!tmp) {
+		/*
+		 * Case 1: node to erase has no more than 1 child (easy!)
+		 *
+		 * Note that if there is one child it must be red due to 5)
+		 * and node must be black due to 4). We adjust colors locally
+		 * so as to bypass __rb_erase_color() later on.
+		 */
+		pc = node->__rb_parent_color;
+		parent = __rb_parent(pc);
+		__rb_change_child(node, child, parent, root);
+		if (child) {
+			child->__rb_parent_color = pc;
+			rebalance = NULL;
+		} else
+			rebalance = __rb_is_black(pc) ? parent : NULL;
+		tmp = parent;
+	} else if (!child) {
+		/* Still case 1, but this time the child is node->rb_left */
+		tmp->__rb_parent_color = pc = node->__rb_parent_color;
+		parent = __rb_parent(pc);
+		__rb_change_child(node, tmp, parent, root);
+		rebalance = NULL;
+		tmp = parent;
+	} else {
+		struct rb_node *successor = child, *child2;
+		tmp = child->rb_left;
+		if (!tmp) {
+			/*
+			 * Case 2: node's successor is its right child
+			 *
+			 *    (n)          (s)
+			 *    / \          / \
+			 *  (x) (s)  ->  (x) (c)
+			 *        \
+			 *        (c)
+			 */
+			parent = successor;
+			child2 = successor->rb_right;
+			augment->copy(node, successor);
+		} else {
+			/*
+			 * Case 3: node's successor is leftmost under
+			 * node's right child subtree
+			 *
+			 *    (n)          (s)
+			 *    / \          / \
+			 *  (x) (y)  ->  (x) (y)
+			 *      /            /
+			 *    (p)          (p)
+			 *    /            /
+			 *  (s)          (c)
+			 *    \
+			 *    (c)
+			 */
+			do {
+				parent = successor;
+				successor = tmp;
+				tmp = tmp->rb_left;
+			} while (tmp);
+			parent->rb_left = child2 = successor->rb_right;
+			successor->rb_right = child;
+			rb_set_parent(child, successor);
+			augment->copy(node, successor);
+			augment->propagate(parent, successor);
+		}
+
+		successor->rb_left = tmp = node->rb_left;
+		rb_set_parent(tmp, successor);
+
+		pc = node->__rb_parent_color;
+		tmp = __rb_parent(pc);
+		__rb_change_child(node, successor, tmp, root);
+		if (child2) {
+			successor->__rb_parent_color = pc;
+			rb_set_parent_color(child2, parent, RB_BLACK);
+			rebalance = NULL;
+		} else {
+			unsigned long pc2 = successor->__rb_parent_color;
+			successor->__rb_parent_color = pc;
+			rebalance = __rb_is_black(pc2) ? parent : NULL;
+		}
+		tmp = successor;
+	}
+
+	augment->propagate(tmp, NULL);
+	return rebalance;
+}
+
+static __always_inline void
+rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+		   const struct rb_augment_callbacks *augment)
+{
+	struct rb_node *rebalance = __rb_erase_augmented(node, root, augment);
+	if (rebalance)
+		__rb_erase_color(rebalance, root, augment->rotate);
+}
+
+#endif	/* _TOOLS_LINUX_RBTREE_AUGMENTED_H */
diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c
new file mode 100644
index 0000000..17c2b59
--- /dev/null
+++ b/tools/lib/rbtree.c
@@ -0,0 +1,548 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  (C) 2002  David Woodhouse <dwmw2@infradead.org>
+  (C) 2012  Michel Lespinasse <walken@google.com>
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/lib/rbtree.c
+*/
+
+#include <linux/rbtree_augmented.h>
+
+/*
+ * red-black trees properties:  http://en.wikipedia.org/wiki/Rbtree
+ *
+ *  1) A node is either red or black
+ *  2) The root is black
+ *  3) All leaves (NULL) are black
+ *  4) Both children of every red node are black
+ *  5) Every simple path from root to leaves contains the same number
+ *     of black nodes.
+ *
+ *  4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two
+ *  consecutive red nodes in a path and every red node is therefore followed by
+ *  a black. So if B is the number of black nodes on every simple path (as per
+ *  5), then the longest possible path due to 4 is 2B.
+ *
+ *  We shall indicate color with case, where black nodes are uppercase and red
+ *  nodes will be lowercase. Unknown color nodes shall be drawn as red within
+ *  parentheses and have some accompanying text comment.
+ */
+
+static inline void rb_set_black(struct rb_node *rb)
+{
+	rb->__rb_parent_color |= RB_BLACK;
+}
+
+static inline struct rb_node *rb_red_parent(struct rb_node *red)
+{
+	return (struct rb_node *)red->__rb_parent_color;
+}
+
+/*
+ * Helper function for rotations:
+ * - old's parent and color get assigned to new
+ * - old gets assigned new as a parent and 'color' as a color.
+ */
+static inline void
+__rb_rotate_set_parents(struct rb_node *old, struct rb_node *new,
+			struct rb_root *root, int color)
+{
+	struct rb_node *parent = rb_parent(old);
+	new->__rb_parent_color = old->__rb_parent_color;
+	rb_set_parent_color(old, new, color);
+	__rb_change_child(old, new, parent, root);
+}
+
+static __always_inline void
+__rb_insert(struct rb_node *node, struct rb_root *root,
+	    void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+	struct rb_node *parent = rb_red_parent(node), *gparent, *tmp;
+
+	while (true) {
+		/*
+		 * Loop invariant: node is red
+		 *
+		 * If there is a black parent, we are done.
+		 * Otherwise, take some corrective action as we don't
+		 * want a red root or two consecutive red nodes.
+		 */
+		if (!parent) {
+			rb_set_parent_color(node, NULL, RB_BLACK);
+			break;
+		} else if (rb_is_black(parent))
+			break;
+
+		gparent = rb_red_parent(parent);
+
+		tmp = gparent->rb_right;
+		if (parent != tmp) {	/* parent == gparent->rb_left */
+			if (tmp && rb_is_red(tmp)) {
+				/*
+				 * Case 1 - color flips
+				 *
+				 *       G            g
+				 *      / \          / \
+				 *     p   u  -->   P   U
+				 *    /            /
+				 *   n            n
+				 *
+				 * However, since g's parent might be red, and
+				 * 4) does not allow this, we need to recurse
+				 * at g.
+				 */
+				rb_set_parent_color(tmp, gparent, RB_BLACK);
+				rb_set_parent_color(parent, gparent, RB_BLACK);
+				node = gparent;
+				parent = rb_parent(node);
+				rb_set_parent_color(node, parent, RB_RED);
+				continue;
+			}
+
+			tmp = parent->rb_right;
+			if (node == tmp) {
+				/*
+				 * Case 2 - left rotate at parent
+				 *
+				 *      G             G
+				 *     / \           / \
+				 *    p   U  -->    n   U
+				 *     \           /
+				 *      n         p
+				 *
+				 * This still leaves us in violation of 4), the
+				 * continuation into Case 3 will fix that.
+				 */
+				parent->rb_right = tmp = node->rb_left;
+				node->rb_left = parent;
+				if (tmp)
+					rb_set_parent_color(tmp, parent,
+							    RB_BLACK);
+				rb_set_parent_color(parent, node, RB_RED);
+				augment_rotate(parent, node);
+				parent = node;
+				tmp = node->rb_right;
+			}
+
+			/*
+			 * Case 3 - right rotate at gparent
+			 *
+			 *        G           P
+			 *       / \         / \
+			 *      p   U  -->  n   g
+			 *     /                 \
+			 *    n                   U
+			 */
+			gparent->rb_left = tmp;  /* == parent->rb_right */
+			parent->rb_right = gparent;
+			if (tmp)
+				rb_set_parent_color(tmp, gparent, RB_BLACK);
+			__rb_rotate_set_parents(gparent, parent, root, RB_RED);
+			augment_rotate(gparent, parent);
+			break;
+		} else {
+			tmp = gparent->rb_left;
+			if (tmp && rb_is_red(tmp)) {
+				/* Case 1 - color flips */
+				rb_set_parent_color(tmp, gparent, RB_BLACK);
+				rb_set_parent_color(parent, gparent, RB_BLACK);
+				node = gparent;
+				parent = rb_parent(node);
+				rb_set_parent_color(node, parent, RB_RED);
+				continue;
+			}
+
+			tmp = parent->rb_left;
+			if (node == tmp) {
+				/* Case 2 - right rotate at parent */
+				parent->rb_left = tmp = node->rb_right;
+				node->rb_right = parent;
+				if (tmp)
+					rb_set_parent_color(tmp, parent,
+							    RB_BLACK);
+				rb_set_parent_color(parent, node, RB_RED);
+				augment_rotate(parent, node);
+				parent = node;
+				tmp = node->rb_left;
+			}
+
+			/* Case 3 - left rotate at gparent */
+			gparent->rb_right = tmp;  /* == parent->rb_left */
+			parent->rb_left = gparent;
+			if (tmp)
+				rb_set_parent_color(tmp, gparent, RB_BLACK);
+			__rb_rotate_set_parents(gparent, parent, root, RB_RED);
+			augment_rotate(gparent, parent);
+			break;
+		}
+	}
+}
+
+/*
+ * Inline version for rb_erase() use - we want to be able to inline
+ * and eliminate the dummy_rotate callback there
+ */
+static __always_inline void
+____rb_erase_color(struct rb_node *parent, struct rb_root *root,
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+	struct rb_node *node = NULL, *sibling, *tmp1, *tmp2;
+
+	while (true) {
+		/*
+		 * Loop invariants:
+		 * - node is black (or NULL on first iteration)
+		 * - node is not the root (parent is not NULL)
+		 * - All leaf paths going through parent and node have a
+		 *   black node count that is 1 lower than other leaf paths.
+		 */
+		sibling = parent->rb_right;
+		if (node != sibling) {	/* node == parent->rb_left */
+			if (rb_is_red(sibling)) {
+				/*
+				 * Case 1 - left rotate at parent
+				 *
+				 *     P               S
+				 *    / \             / \
+				 *   N   s    -->    p   Sr
+				 *      / \         / \
+				 *     Sl  Sr      N   Sl
+				 */
+				parent->rb_right = tmp1 = sibling->rb_left;
+				sibling->rb_left = parent;
+				rb_set_parent_color(tmp1, parent, RB_BLACK);
+				__rb_rotate_set_parents(parent, sibling, root,
+							RB_RED);
+				augment_rotate(parent, sibling);
+				sibling = tmp1;
+			}
+			tmp1 = sibling->rb_right;
+			if (!tmp1 || rb_is_black(tmp1)) {
+				tmp2 = sibling->rb_left;
+				if (!tmp2 || rb_is_black(tmp2)) {
+					/*
+					 * Case 2 - sibling color flip
+					 * (p could be either color here)
+					 *
+					 *    (p)           (p)
+					 *    / \           / \
+					 *   N   S    -->  N   s
+					 *      / \           / \
+					 *     Sl  Sr        Sl  Sr
+					 *
+					 * This leaves us violating 5) which
+					 * can be fixed by flipping p to black
+					 * if it was red, or by recursing at p.
+					 * p is red when coming from Case 1.
+					 */
+					rb_set_parent_color(sibling, parent,
+							    RB_RED);
+					if (rb_is_red(parent))
+						rb_set_black(parent);
+					else {
+						node = parent;
+						parent = rb_parent(node);
+						if (parent)
+							continue;
+					}
+					break;
+				}
+				/*
+				 * Case 3 - right rotate at sibling
+				 * (p could be either color here)
+				 *
+				 *   (p)           (p)
+				 *   / \           / \
+				 *  N   S    -->  N   Sl
+				 *     / \             \
+				 *    sl  Sr            s
+				 *                       \
+				 *                        Sr
+				 */
+				sibling->rb_left = tmp1 = tmp2->rb_right;
+				tmp2->rb_right = sibling;
+				parent->rb_right = tmp2;
+				if (tmp1)
+					rb_set_parent_color(tmp1, sibling,
+							    RB_BLACK);
+				augment_rotate(sibling, tmp2);
+				tmp1 = sibling;
+				sibling = tmp2;
+			}
+			/*
+			 * Case 4 - left rotate at parent + color flips
+			 * (p and sl could be either color here.
+			 *  After rotation, p becomes black, s acquires
+			 *  p's color, and sl keeps its color)
+			 *
+			 *      (p)             (s)
+			 *      / \             / \
+			 *     N   S     -->   P   Sr
+			 *        / \         / \
+			 *      (sl) sr      N  (sl)
+			 */
+			parent->rb_right = tmp2 = sibling->rb_left;
+			sibling->rb_left = parent;
+			rb_set_parent_color(tmp1, sibling, RB_BLACK);
+			if (tmp2)
+				rb_set_parent(tmp2, parent);
+			__rb_rotate_set_parents(parent, sibling, root,
+						RB_BLACK);
+			augment_rotate(parent, sibling);
+			break;
+		} else {
+			sibling = parent->rb_left;
+			if (rb_is_red(sibling)) {
+				/* Case 1 - right rotate at parent */
+				parent->rb_left = tmp1 = sibling->rb_right;
+				sibling->rb_right = parent;
+				rb_set_parent_color(tmp1, parent, RB_BLACK);
+				__rb_rotate_set_parents(parent, sibling, root,
+							RB_RED);
+				augment_rotate(parent, sibling);
+				sibling = tmp1;
+			}
+			tmp1 = sibling->rb_left;
+			if (!tmp1 || rb_is_black(tmp1)) {
+				tmp2 = sibling->rb_right;
+				if (!tmp2 || rb_is_black(tmp2)) {
+					/* Case 2 - sibling color flip */
+					rb_set_parent_color(sibling, parent,
+							    RB_RED);
+					if (rb_is_red(parent))
+						rb_set_black(parent);
+					else {
+						node = parent;
+						parent = rb_parent(node);
+						if (parent)
+							continue;
+					}
+					break;
+				}
+				/* Case 3 - right rotate at sibling */
+				sibling->rb_right = tmp1 = tmp2->rb_left;
+				tmp2->rb_left = sibling;
+				parent->rb_left = tmp2;
+				if (tmp1)
+					rb_set_parent_color(tmp1, sibling,
+							    RB_BLACK);
+				augment_rotate(sibling, tmp2);
+				tmp1 = sibling;
+				sibling = tmp2;
+			}
+			/* Case 4 - left rotate at parent + color flips */
+			parent->rb_left = tmp2 = sibling->rb_right;
+			sibling->rb_right = parent;
+			rb_set_parent_color(tmp1, sibling, RB_BLACK);
+			if (tmp2)
+				rb_set_parent(tmp2, parent);
+			__rb_rotate_set_parents(parent, sibling, root,
+						RB_BLACK);
+			augment_rotate(parent, sibling);
+			break;
+		}
+	}
+}
+
+/* Non-inline version for rb_erase_augmented() use */
+void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+	____rb_erase_color(parent, root, augment_rotate);
+}
+
+/*
+ * Non-augmented rbtree manipulation functions.
+ *
+ * We use dummy augmented callbacks here, and have the compiler optimize them
+ * out of the rb_insert_color() and rb_erase() function definitions.
+ */
+
+static inline void dummy_propagate(struct rb_node *node, struct rb_node *stop) {}
+static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {}
+static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {}
+
+static const struct rb_augment_callbacks dummy_callbacks = {
+	dummy_propagate, dummy_copy, dummy_rotate
+};
+
+void rb_insert_color(struct rb_node *node, struct rb_root *root)
+{
+	__rb_insert(node, root, dummy_rotate);
+}
+
+void rb_erase(struct rb_node *node, struct rb_root *root)
+{
+	struct rb_node *rebalance;
+	rebalance = __rb_erase_augmented(node, root, &dummy_callbacks);
+	if (rebalance)
+		____rb_erase_color(rebalance, root, dummy_rotate);
+}
+
+/*
+ * Augmented rbtree manipulation functions.
+ *
+ * This instantiates the same __always_inline functions as in the non-augmented
+ * case, but this time with user-defined callbacks.
+ */
+
+void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+	__rb_insert(node, root, augment_rotate);
+}
+
+/*
+ * This function returns the first node (in sort order) of the tree.
+ */
+struct rb_node *rb_first(const struct rb_root *root)
+{
+	struct rb_node	*n;
+
+	n = root->rb_node;
+	if (!n)
+		return NULL;
+	while (n->rb_left)
+		n = n->rb_left;
+	return n;
+}
+
+struct rb_node *rb_last(const struct rb_root *root)
+{
+	struct rb_node	*n;
+
+	n = root->rb_node;
+	if (!n)
+		return NULL;
+	while (n->rb_right)
+		n = n->rb_right;
+	return n;
+}
+
+struct rb_node *rb_next(const struct rb_node *node)
+{
+	struct rb_node *parent;
+
+	if (RB_EMPTY_NODE(node))
+		return NULL;
+
+	/*
+	 * If we have a right-hand child, go down and then left as far
+	 * as we can.
+	 */
+	if (node->rb_right) {
+		node = node->rb_right;
+		while (node->rb_left)
+			node=node->rb_left;
+		return (struct rb_node *)node;
+	}
+
+	/*
+	 * No right-hand children. Everything down and left is smaller than us,
+	 * so any 'next' node must be in the general direction of our parent.
+	 * Go up the tree; any time the ancestor is a right-hand child of its
+	 * parent, keep going up. First time it's a left-hand child of its
+	 * parent, said parent is our 'next' node.
+	 */
+	while ((parent = rb_parent(node)) && node == parent->rb_right)
+		node = parent;
+
+	return parent;
+}
+
+struct rb_node *rb_prev(const struct rb_node *node)
+{
+	struct rb_node *parent;
+
+	if (RB_EMPTY_NODE(node))
+		return NULL;
+
+	/*
+	 * If we have a left-hand child, go down and then right as far
+	 * as we can.
+	 */
+	if (node->rb_left) {
+		node = node->rb_left;
+		while (node->rb_right)
+			node=node->rb_right;
+		return (struct rb_node *)node;
+	}
+
+	/*
+	 * No left-hand children. Go up till we find an ancestor which
+	 * is a right-hand child of its parent.
+	 */
+	while ((parent = rb_parent(node)) && node == parent->rb_left)
+		node = parent;
+
+	return parent;
+}
+
+void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+		     struct rb_root *root)
+{
+	struct rb_node *parent = rb_parent(victim);
+
+	/* Set the surrounding nodes to point to the replacement */
+	__rb_change_child(victim, new, parent, root);
+	if (victim->rb_left)
+		rb_set_parent(victim->rb_left, new);
+	if (victim->rb_right)
+		rb_set_parent(victim->rb_right, new);
+
+	/* Copy the pointers/colour from the victim to the replacement */
+	*new = *victim;
+}
+
+static struct rb_node *rb_left_deepest_node(const struct rb_node *node)
+{
+	for (;;) {
+		if (node->rb_left)
+			node = node->rb_left;
+		else if (node->rb_right)
+			node = node->rb_right;
+		else
+			return (struct rb_node *)node;
+	}
+}
+
+struct rb_node *rb_next_postorder(const struct rb_node *node)
+{
+	const struct rb_node *parent;
+	if (!node)
+		return NULL;
+	parent = rb_parent(node);
+
+	/* If we're sitting on node, we've already seen our children */
+	if (parent && node == parent->rb_left && parent->rb_right) {
+		/* If we are the parent's left node, go to the parent's right
+		 * node then all the way down to the left */
+		return rb_left_deepest_node(parent->rb_right);
+	} else
+		/* Otherwise we are the parent's right node, and the parent
+		 * should be next */
+		return (struct rb_node *)parent;
+}
+
+struct rb_node *rb_first_postorder(const struct rb_root *root)
+{
+	if (!root->rb_node)
+		return NULL;
+
+	return rb_left_deepest_node(root->rb_node);
+}
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index fe50a1b..09dc0aa 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -18,6 +18,7 @@
 tools/arch/x86/include/asm/rmwcc.h
 tools/lib/traceevent
 tools/lib/api
+tools/lib/rbtree.c
 tools/lib/symbol/kallsyms.c
 tools/lib/symbol/kallsyms.h
 tools/lib/util/find_next_bit.c
@@ -44,6 +45,8 @@
 tools/include/linux/list.h
 tools/include/linux/log2.h
 tools/include/linux/poison.h
+tools/include/linux/rbtree.h
+tools/include/linux/rbtree_augmented.h
 tools/include/linux/types.h
 include/asm-generic/bitops/arch_hweight.h
 include/asm-generic/bitops/const_hweight.h
@@ -51,12 +54,10 @@
 include/asm-generic/bitops/__fls.h
 include/asm-generic/bitops/fls.h
 include/linux/perf_event.h
-include/linux/rbtree.h
 include/linux/list.h
 include/linux/hash.h
 include/linux/stringify.h
 lib/hweight.c
-lib/rbtree.c
 include/linux/swab.h
 arch/*/include/asm/unistd*.h
 arch/*/include/uapi/asm/unistd*.h
@@ -65,7 +66,6 @@
 arch/*/lib/memset*.S
 include/linux/poison.h
 include/linux/hw_breakpoint.h
-include/linux/rbtree_augmented.h
 include/uapi/linux/perf_event.h
 include/uapi/linux/const.h
 include/uapi/linux/swab.h
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 586a59d..601d114 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -139,7 +139,7 @@
 	$(call rule_mkdir)
 	$(call if_changed_dep,cc_o_c)
 
-$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c FORCE
+$(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE
 	$(call rule_mkdir)
 	$(call if_changed_dep,cc_o_c)
 
diff --git a/tools/perf/util/include/linux/rbtree.h b/tools/perf/util/include/linux/rbtree.h
deleted file mode 100644
index f06d89f..0000000
--- a/tools/perf/util/include/linux/rbtree.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef __TOOLS_LINUX_PERF_RBTREE_H
-#define __TOOLS_LINUX_PERF_RBTREE_H
-#include <stdbool.h>
-#include "../../../../include/linux/rbtree.h"
-
-/*
- * Handy for checking that we are not deleting an entry that is
- * already in a list, found in block/{blk-throttle,cfq-iosched}.c,
- * probably should be moved to lib/rbtree.c...
- */
-static inline void rb_erase_init(struct rb_node *n, struct rb_root *root)
-{
-	rb_erase(n, root);
-	RB_CLEAR_NODE(n);
-}
-#endif /* __TOOLS_LINUX_PERF_RBTREE_H */
diff --git a/tools/perf/util/include/linux/rbtree_augmented.h b/tools/perf/util/include/linux/rbtree_augmented.h
deleted file mode 100644
index 9d6fcdf..0000000
--- a/tools/perf/util/include/linux/rbtree_augmented.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#include <stdbool.h>
-#include "../../../../include/linux/rbtree_augmented.h"
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index 8e9b645..f56914c 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -1,3 +1,6 @@
+ldflags-y += --wrap=ioremap_wt
+ldflags-y += --wrap=ioremap_wc
+ldflags-y += --wrap=devm_ioremap_nocache
 ldflags-y += --wrap=ioremap_cache
 ldflags-y += --wrap=ioremap_nocache
 ldflags-y += --wrap=iounmap
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index c85a6f6..64bfaa5 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -65,6 +65,21 @@
 	return fallback_fn(offset, size);
 }
 
+void __iomem *__wrap_devm_ioremap_nocache(struct device *dev,
+		resource_size_t offset, unsigned long size)
+{
+	struct nfit_test_resource *nfit_res;
+
+	rcu_read_lock();
+	nfit_res = get_nfit_res(offset);
+	rcu_read_unlock();
+	if (nfit_res)
+		return (void __iomem *) nfit_res->buf + offset
+			- nfit_res->res->start;
+	return devm_ioremap_nocache(dev, offset, size);
+}
+EXPORT_SYMBOL(__wrap_devm_ioremap_nocache);
+
 void __iomem *__wrap_ioremap_cache(resource_size_t offset, unsigned long size)
 {
 	return __nfit_test_ioremap(offset, size, ioremap_cache);
@@ -77,6 +92,18 @@
 }
 EXPORT_SYMBOL(__wrap_ioremap_nocache);
 
+void __iomem *__wrap_ioremap_wt(resource_size_t offset, unsigned long size)
+{
+	return __nfit_test_ioremap(offset, size, ioremap_wt);
+}
+EXPORT_SYMBOL(__wrap_ioremap_wt);
+
+void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size)
+{
+	return __nfit_test_ioremap(offset, size, ioremap_wc);
+}
+EXPORT_SYMBOL(__wrap_ioremap_wc);
+
 void __wrap_iounmap(volatile void __iomem *addr)
 {
 	struct nfit_test_resource *nfit_res;
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 4b69b83..d0bdae4 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -128,6 +128,8 @@
 	int num_pm;
 	void **dimm;
 	dma_addr_t *dimm_dma;
+	void **flush;
+	dma_addr_t *flush_dma;
 	void **label;
 	dma_addr_t *label_dma;
 	void **spa_set;
@@ -155,7 +157,7 @@
 	int i, rc;
 
 	if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask))
-		return -ENXIO;
+		return -ENOTTY;
 
 	/* lookup label space for the given dimm */
 	for (i = 0; i < ARRAY_SIZE(handle); i++)
@@ -331,7 +333,8 @@
 			+ sizeof(struct acpi_nfit_system_address) * NUM_SPA
 			+ sizeof(struct acpi_nfit_memory_map) * NUM_MEM
 			+ sizeof(struct acpi_nfit_control_region) * NUM_DCR
-			+ sizeof(struct acpi_nfit_data_region) * NUM_BDW;
+			+ sizeof(struct acpi_nfit_data_region) * NUM_BDW
+			+ sizeof(struct acpi_nfit_flush_address) * NUM_DCR;
 	int i;
 
 	t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
@@ -356,6 +359,10 @@
 		if (!t->label[i])
 			return -ENOMEM;
 		sprintf(t->label[i], "label%d", i);
+
+		t->flush[i] = test_alloc(t, 8, &t->flush_dma[i]);
+		if (!t->flush[i])
+			return -ENOMEM;
 	}
 
 	for (i = 0; i < NUM_DCR; i++) {
@@ -408,6 +415,7 @@
 	struct acpi_nfit_system_address *spa;
 	struct acpi_nfit_control_region *dcr;
 	struct acpi_nfit_data_region *bdw;
+	struct acpi_nfit_flush_address *flush;
 	unsigned int offset;
 
 	nfit_test_init_header(nfit_buf, size);
@@ -831,6 +839,39 @@
 	bdw->capacity = DIMM_SIZE;
 	bdw->start_address = 0;
 
+	offset = offset + sizeof(struct acpi_nfit_data_region) * 4;
+	/* flush0 (dimm0) */
+	flush = nfit_buf + offset;
+	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+	flush->header.length = sizeof(struct acpi_nfit_flush_address);
+	flush->device_handle = handle[0];
+	flush->hint_count = 1;
+	flush->hint_address[0] = t->flush_dma[0];
+
+	/* flush1 (dimm1) */
+	flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 1;
+	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+	flush->header.length = sizeof(struct acpi_nfit_flush_address);
+	flush->device_handle = handle[1];
+	flush->hint_count = 1;
+	flush->hint_address[0] = t->flush_dma[1];
+
+	/* flush2 (dimm2) */
+	flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 2;
+	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+	flush->header.length = sizeof(struct acpi_nfit_flush_address);
+	flush->device_handle = handle[2];
+	flush->hint_count = 1;
+	flush->hint_address[0] = t->flush_dma[2];
+
+	/* flush3 (dimm3) */
+	flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 3;
+	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+	flush->header.length = sizeof(struct acpi_nfit_flush_address);
+	flush->device_handle = handle[3];
+	flush->hint_count = 1;
+	flush->hint_address[0] = t->flush_dma[3];
+
 	acpi_desc = &t->acpi_desc;
 	set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en);
 	set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en);
@@ -933,6 +974,10 @@
 				GFP_KERNEL);
 		nfit_test->dimm_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t),
 				GFP_KERNEL);
+		nfit_test->flush = devm_kcalloc(dev, num, sizeof(void *),
+				GFP_KERNEL);
+		nfit_test->flush_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t),
+				GFP_KERNEL);
 		nfit_test->label = devm_kcalloc(dev, num, sizeof(void *),
 				GFP_KERNEL);
 		nfit_test->label_dma = devm_kcalloc(dev, num,
@@ -943,7 +988,8 @@
 				sizeof(dma_addr_t), GFP_KERNEL);
 		if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label
 				&& nfit_test->label_dma && nfit_test->dcr
-				&& nfit_test->dcr_dma)
+				&& nfit_test->dcr_dma && nfit_test->flush
+				&& nfit_test->flush_dma)
 			/* pass */;
 		else
 			return -ENOMEM;